[Pkg-ceph-commits] [ceph] 01/06: Imported Upstream version 10.0.5

James Downing Page jamespage at moszumanska.debian.org
Wed Mar 23 13:56:44 UTC 2016


This is an automated email from the git hooks/post-receive script.

jamespage pushed a commit to branch ubuntu-xenial
in repository ceph.

commit c2576e4dc0ef48de5d90445cb278ec7fea1c98ce
Author: James Page <james.page at ubuntu.com>
Date:   Tue Mar 22 10:54:19 2016 +0000

    Imported Upstream version 10.0.5
---
 AUTHORS                                            |   23 +
 ChangeLog                                          |  900 +++-
 Makefile.in                                        |   11 +-
 README                                             |    2 +-
 ceph.spec                                          |  446 +-
 ceph.spec.in                                       |  444 +-
 configure                                          |  436 +-
 configure.ac                                       |  141 +-
 doc/Makefile.am                                    |    1 +
 doc/Makefile.in                                    |   10 +-
 doc/man/8/ceph-authtool.rst                        |    2 +-
 doc/man/8/ceph.rst                                 |    2 +-
 doc/man/8/monmaptool.rst                           |    6 +-
 doc/man/8/rados.rst                                |    4 +-
 doc/man/8/radosgw-admin.rst                        |   18 +
 doc/man/8/rbd-mirror.rst                           |   75 +
 man/Makefile-client.am                             |    1 +
 man/Makefile.in                                    |   10 +-
 man/ceph-authtool.8                                |    4 +-
 man/ceph-clsinfo.8                                 |    2 +-
 man/ceph-conf.8                                    |    2 +-
 man/ceph-create-keys.8                             |    2 +-
 man/ceph-debugpack.8                               |    2 +-
 man/ceph-dencoder.8                                |    2 +-
 man/ceph-deploy.8                                  |    2 +-
 man/ceph-detect-init.8                             |    2 +-
 man/ceph-disk.8                                    |    2 +-
 man/ceph-fuse.8                                    |    2 +-
 man/ceph-mds.8                                     |    2 +-
 man/ceph-mon.8                                     |    2 +-
 man/ceph-osd.8                                     |    2 +-
 man/ceph-post-file.8                               |    2 +-
 man/ceph-rbdnamer.8                                |    2 +-
 man/ceph-rest-api.8                                |    2 +-
 man/ceph-run.8                                     |    2 +-
 man/ceph-syn.8                                     |    2 +-
 man/ceph.8                                         |    4 +-
 man/cephfs.8                                       |    2 +-
 man/crushtool.8                                    |    2 +-
 man/librados-config.8                              |    2 +-
 man/monmaptool.8                                   |    8 +-
 man/mount.ceph.8                                   |    2 +-
 man/osdmaptool.8                                   |    2 +-
 man/rados.8                                        |    6 +-
 man/radosgw-admin.8                                |   19 +-
 man/radosgw.8                                      |    2 +-
 man/rbd-fuse.8                                     |    2 +-
 man/rbd-mirror.8                                   |   99 +
 man/rbd-nbd.8                                      |    2 +-
 man/rbd-replay-many.8                              |    2 +-
 man/rbd-replay-prep.8                              |    2 +-
 man/rbd-replay.8                                   |    2 +-
 man/rbd.8                                          |    2 +-
 selinux/Makefile.in                                |    9 +-
 src/.git_version                                   |    4 +-
 src/Makefile-env.am                                |   13 +-
 src/Makefile-server.am                             |    1 -
 src/Makefile-spdk.am                               |   48 +
 src/Makefile.am                                    |   26 +-
 src/Makefile.in                                    | 4782 ++++++++++++--------
 src/acconfig.h.in                                  |   14 +
 src/auth/AuthClientHandler.h                       |    2 +-
 src/auth/AuthServiceHandler.h                      |    2 +-
 src/auth/AuthSessionHandler.h                      |    2 +-
 src/auth/Crypto.cc                                 |    1 +
 src/auth/cephx/CephxKeyServer.h                    |    2 +-
 src/auth/cephx/CephxProtocol.h                     |    4 +-
 src/auth/none/AuthNoneServiceHandler.h             |    2 +-
 src/ceph-crush-location                            |    8 +-
 src/ceph-debugpack.in                              |    6 +-
 src/ceph-detect-init/Makefile.am                   |    8 +-
 src/ceph-detect-init/run-tox.sh                    |   18 +-
 src/ceph-disk/AUTHORS.rst                          |   28 +
 src/ceph-disk/MANIFEST.in                          |    1 +
 src/ceph-disk/Makefile.am                          |   54 +
 src/ceph-disk/README.rst                           |    4 +
 src/ceph-disk/ceph_disk/__init__.py                |    0
 src/{ceph-disk => ceph-disk/ceph_disk/main.py}     | 2791 +++++++-----
 src/ceph-disk/requirements.txt                     |    1 +
 src/ceph-disk/run-tox.sh                           |   24 +
 src/ceph-disk/setup.py                             |   74 +
 src/ceph-disk/test-requirements.txt                |   11 +
 .../tests/test_main.py}                            |  889 ++--
 src/ceph-disk/tox.ini                              |   23 +
 src/ceph_fuse.cc                                   |    2 +-
 src/ceph_mds.cc                                    |    3 +-
 src/ceph_mon.cc                                    |    5 +-
 src/ceph_osd.cc                                    |   11 +-
 src/civetweb/src/civetweb.c                        |    2 +
 src/client/Client.cc                               |  237 +-
 src/client/Client.h                                |    6 +-
 src/client/ClientSnapRealm.h                       |    2 +-
 src/client/Dir.h                                   |    2 +-
 src/client/Inode.h                                 |    4 +-
 src/client/Makefile.am                             |    2 +-
 src/client/MetaRequest.h                           |    2 +-
 src/client/ObjecterWriteback.h                     |    1 +
 src/client/SyntheticClient.cc                      |   29 +-
 src/client/Trace.h                                 |    2 +-
 src/cls/journal/cls_journal.cc                     |  534 ++-
 src/cls/journal/cls_journal_client.cc              |  200 +-
 src/cls/journal/cls_journal_client.h               |   54 +-
 src/cls/journal/cls_journal_types.cc               |  124 +-
 src/cls/journal/cls_journal_types.h                |   71 +-
 src/cls/log/cls_log_client.cc                      |    2 +-
 src/cls/rbd/cls_rbd.cc                             |    4 +-
 src/cls/replica_log/cls_replica_log_ops.h          |    4 +-
 src/cls/rgw/cls_rgw_client.cc                      |    2 +-
 src/cls/version/cls_version_client.cc              |    2 +-
 src/common/BackTrace.h                             |    2 +-
 src/common/DecayCounter.h                          |    3 +-
 src/common/Finisher.h                              |    4 +-
 src/common/Formatter.cc                            |   30 +-
 src/common/Formatter.h                             |   21 +-
 src/common/Graylog.cc                              |  175 +
 src/common/Graylog.h                               |   81 +
 src/common/HTMLFormatter.cc                        |  158 +
 src/common/HTMLFormatter.h                         |   50 +
 src/common/HeartbeatMap.h                          |    4 +-
 src/common/Initialize.h                            |    2 +-
 src/common/LogClient.cc                            |   72 +-
 src/common/LogClient.h                             |   29 +-
 src/common/Makefile.am                             |   17 +-
 src/common/MemoryModel.h                           |    2 +-
 src/common/Mutex.h                                 |    2 +-
 src/common/OpQueue.h                               |   63 +
 src/common/PluginRegistry.h                        |    4 +-
 src/common/PrioritizedQueue.h                      |   86 +-
 src/common/RWLock.h                                |   34 +-
 src/common/SloppyCRCMap.h                          |    4 +-
 src/common/Timer.cc                                |    2 +-
 src/common/TracepointProvider.h                    |    2 +-
 src/common/TrackedOp.cc                            |   12 +-
 src/common/TrackedOp.h                             |    6 +-
 src/common/WeightedPriorityQueue.h                 |  359 ++
 src/common/WorkQueue.h                             |   20 +-
 src/common/admin_socket.cc                         |    4 +-
 src/common/align.h                                 |   30 +
 src/common/buffer.cc                               |   76 +-
 src/common/ceph_argparse.cc                        |    2 +
 src/common/ceph_argparse.h                         |    2 +-
 src/common/ceph_context.cc                         |   46 +-
 src/common/ceph_time.cc                            |   18 +-
 src/common/ceph_time.h                             |   37 +-
 src/common/common_init.cc                          |    6 +-
 src/common/common_init.h                           |    3 +-
 src/common/config.cc                               |   40 +-
 src/common/config.h                                |    1 +
 src/common/config_opts.h                           |   68 +-
 src/common/hobject.cc                              |  171 +-
 src/common/hobject.h                               |   10 +-
 src/common/lockdep.cc                              |    9 +-
 src/common/mutex_debug.cc                          |   97 +
 src/common/mutex_debug.h                           |  191 +
 src/common/obj_bencher.cc                          |  232 +-
 src/common/obj_bencher.h                           |   15 +-
 src/common/pick_address.cc                         |    2 +-
 src/common/shunique_lock.h                         |  395 ++
 src/common/str_map.cc                              |   14 +-
 src/common/strtol.cc                               |   50 +-
 src/common/strtol.h                                |   18 +-
 src/compressor/AsyncCompressor.h                   |    4 +-
 src/compressor/CompressionPlugin.h                 |    2 +-
 src/compressor/Compressor.h                        |    4 +-
 src/compressor/Makefile.am                         |    1 +
 src/compressor/snappy/CompressionPluginSnappy.cc   |    2 +-
 src/compressor/snappy/SnappyCompressor.h           |   12 +-
 .../CompressionPluginZlib.cc}                      |   14 +-
 src/compressor/zlib/CompressionZlib.cc             |  152 +
 src/compressor/zlib/CompressionZlib.h              |   45 +
 src/compressor/zlib/Makefile.am                    |   21 +
 src/crush/CrushTester.cc                           |    7 -
 src/crush/CrushTreeDumper.h                        |    4 +-
 src/crush/CrushWrapper.cc                          |   17 +-
 src/crush/builder.c                                |    2 +-
 src/erasure-code/ErasureCode.cc                    |    4 +-
 src/erasure-code/isa/Makefile.am                   |   15 +-
 src/erasure-code/jerasure/ErasureCodeJerasure.h    |    6 +-
 src/erasure-code/lrc/ErasureCodeLrc.h              |    4 +-
 src/global/Makefile.am                             |    3 +
 src/global/global_init.cc                          |   36 +-
 src/global/global_init.h                           |    8 +-
 src/global/pidfile.cc                              |  216 +-
 src/global/pidfile.h                               |    2 +-
 src/include/Makefile.am                            |    5 +
 .../{interval_set.h => btree_interval_set.h}       |  217 +-
 src/include/buffer.h                               |   35 +-
 src/include/ceph_features.h                        |    4 +
 src/include/cephfs/libcephfs.h                     |    6 +-
 src/include/cpp-btree/btree.h                      | 2394 ++++++++++
 src/include/cpp-btree/btree_container.h            |  349 ++
 src/include/cpp-btree/btree_map.h                  |  130 +
 src/include/encoding_btree.h                       |   60 +
 src/include/interval_set.h                         |   70 +-
 src/include/object.h                               |    3 +
 src/include/rados/buffer.h                         |   35 +-
 src/include/rados/librados.hpp                     |   28 +-
 src/include/rbd/librbd.h                           |   13 +-
 src/include/rbd/librbd.hpp                         |    5 +
 src/include/str_map.h                              |   10 +-
 src/include/types.h                                |    8 +
 src/include/uuid.h                                 |    6 +-
 src/init-ceph.in                                   |   14 +-
 src/java/Makefile.in                               |    9 +-
 src/journal/Entry.cc                               |   36 +-
 src/journal/Entry.h                                |   18 +-
 src/journal/FutureImpl.cc                          |   10 +-
 src/journal/FutureImpl.h                           |   14 +-
 src/journal/JournalMetadata.cc                     |  311 +-
 src/journal/JournalMetadata.h                      |   37 +-
 src/journal/JournalPlayer.cc                       |   38 +-
 src/journal/JournalPlayer.h                        |    6 +-
 src/journal/JournalRecorder.cc                     |   49 +-
 src/journal/JournalRecorder.h                      |   28 +-
 src/journal/Journaler.cc                           |   29 +-
 src/journal/Journaler.h                            |   17 +-
 src/journal/ObjectPlayer.cc                        |    3 +-
 src/journal/ObjectPlayer.h                         |    2 +-
 src/journal/Utils.h                                |    7 +
 src/kv/KineticStore.cc                             |    2 +-
 src/kv/KineticStore.h                              |    7 +-
 src/kv/LevelDBStore.cc                             |    2 +-
 src/kv/LevelDBStore.h                              |    6 +-
 src/kv/RocksDBStore.cc                             |   22 +-
 src/kv/RocksDBStore.h                              |    6 +-
 src/libcephfs.cc                                   |    2 +-
 src/librados/AioCompletionImpl.h                   |    6 +-
 src/librados/IoCtxImpl.cc                          |   20 +-
 src/librados/IoCtxImpl.h                           |    8 +-
 src/librados/PoolAsyncCompletionImpl.h             |    2 +-
 src/librados/RadosClient.cc                        |  148 +-
 src/librados/RadosClient.h                         |    2 +-
 src/librados/librados.cc                           |   39 +-
 src/librbd/AioCompletion.cc                        |    3 +
 src/librbd/AioCompletion.h                         |   22 +-
 src/librbd/AioImageRequest.cc                      |    2 +-
 src/librbd/AsyncOperation.cc                       |    2 +-
 src/librbd/ExclusiveLock.cc                        |    2 +-
 src/librbd/ImageCtx.cc                             |    7 +-
 src/librbd/ImageCtx.h                              |    1 +
 src/librbd/ImageWatcher.cc                         |    5 +-
 src/librbd/Journal.cc                              |   50 +-
 src/librbd/LibrbdAdminSocketHook.cc                |    6 +-
 src/librbd/LibrbdWriteback.h                       |    1 +
 src/librbd/Makefile.am                             |    4 +-
 src/librbd/ObjectMap.cc                            |   15 +-
 src/librbd/ObjectMap.h                             |    3 +-
 src/librbd/Operations.cc                           |    3 +-
 src/librbd/TaskFinisher.h                          |    9 +-
 src/librbd/WatchNotifyTypes.cc                     |    4 +-
 src/librbd/exclusive_lock/AcquireRequest.cc        |   34 +-
 src/librbd/exclusive_lock/AcquireRequest.h         |   24 +-
 src/librbd/exclusive_lock/ReleaseRequest.cc        |   12 +-
 src/librbd/exclusive_lock/ReleaseRequest.h         |    6 +-
 src/librbd/image/RefreshRequest.cc                 |  128 +-
 src/librbd/image/RefreshRequest.h                  |   14 +-
 src/librbd/internal.cc                             |    4 +-
 src/librbd/journal/Replay.h                        |    2 +-
 src/librbd/journal/{Entries.cc => Types.cc}        |  211 +-
 src/librbd/journal/{Entries.h => Types.h}          |  144 +-
 src/librbd/librbd.cc                               |  161 +-
 src/librbd/object_map/LockRequest.h                |    3 +
 src/librbd/object_map/RefreshRequest.cc            |   32 +-
 src/librbd/object_map/RefreshRequest.h             |   15 +-
 src/librbd/operation/Request.h                     |    2 +-
 src/log/Log.cc                                     |   39 +-
 src/log/Log.h                                      |   18 +-
 src/log/Makefile.am                                |    2 +-
 src/mds/Beacon.h                                   |    2 +-
 src/mds/CDir.cc                                    |    6 +-
 src/mds/CInode.cc                                  |    2 +-
 src/mds/InoTable.h                                 |    2 +-
 src/mds/Locker.cc                                  |    2 +-
 src/mds/LogEvent.h                                 |    2 +-
 src/mds/MDBalancer.cc                              |    2 +-
 src/mds/MDCache.cc                                 |   24 +-
 src/mds/MDCache.h                                  |    5 +-
 src/mds/MDLog.cc                                   |    2 +-
 src/mds/MDLog.h                                    |    8 +-
 src/mds/MDSAuthCaps.h                              |    6 +-
 src/mds/MDSContext.h                               |    4 +-
 src/mds/MDSDaemon.cc                               |  128 +-
 src/mds/MDSDaemon.h                                |    2 +-
 src/mds/MDSRank.cc                                 |  153 +-
 src/mds/MDSRank.h                                  |    2 +-
 src/mds/MDSTable.cc                                |    2 +-
 src/mds/Migrator.cc                                |    2 +-
 src/mds/RecoveryQueue.h                            |    2 +-
 src/mds/ScatterLock.h                              |    2 +-
 src/mds/Server.cc                                  |  275 +-
 src/mds/Server.h                                   |    4 +-
 src/mds/SessionMap.cc                              |    4 +-
 src/mds/SessionMap.h                               |    2 +-
 src/mds/SimpleLock.h                               |    2 +-
 src/mds/SnapClient.h                               |    2 +-
 src/mds/SnapServer.cc                              |   72 +-
 src/mds/StrayManager.cc                            |   61 +-
 src/mds/StrayManager.h                             |   15 +-
 src/mds/events/ECommitted.h                        |    2 +-
 src/mds/events/EMetaBlob.h                         |    8 +-
 src/mds/events/ENoOp.h                             |    2 +-
 src/mds/events/EOpen.h                             |    2 +-
 src/mds/flock.h                                    |    2 +-
 src/mds/mdstypes.h                                 |    8 +-
 src/messages/MMonMap.h                             |    2 +-
 src/messages/MOSDOpReply.h                         |   65 +-
 src/messages/MOSDSubOp.h                           |   22 +-
 src/mon/ConfigKeyService.cc                        |    6 +-
 src/mon/ConfigKeyService.h                         |    2 +-
 src/mon/Elector.cc                                 |    2 +-
 src/mon/Elector.h                                  |    4 +-
 src/mon/LogMonitor.cc                              |   43 +-
 src/mon/LogMonitor.h                               |   44 +
 src/mon/MDSMonitor.cc                              |    5 +-
 src/mon/MDSMonitor.h                               |    3 +-
 src/mon/MonCap.h                                   |    5 +-
 src/mon/MonClient.cc                               |    3 +
 src/mon/MonClient.h                                |    6 +-
 src/mon/MonMap.cc                                  |    4 +
 src/mon/Monitor.cc                                 |   54 +-
 src/mon/Monitor.h                                  |   14 +-
 src/mon/MonitorDBStore.h                           |    2 +-
 src/mon/MonmapMonitor.cc                           |   20 +-
 src/mon/MonmapMonitor.h                            |    4 +-
 src/mon/OSDMonitor.cc                              |   18 +-
 src/mon/OSDMonitor.h                               |    6 +-
 src/mon/PGMap.cc                                   |   53 +-
 src/mon/PGMap.h                                    |    3 +-
 src/mon/PGMonitor.cc                               |  179 +-
 src/mon/PGMonitor.h                                |    3 +-
 src/mon/Paxos.cc                                   |    2 +-
 src/mon/Paxos.h                                    |   12 +-
 src/mon/PaxosService.cc                            |    3 +
 src/mon/PaxosService.h                             |    9 +-
 src/mon/mon_types.h                                |    2 +-
 src/msg/Dispatcher.h                               |    3 +-
 src/msg/Message.h                                  |    2 +-
 src/msg/async/AsyncConnection.cc                   |   84 +-
 src/msg/async/AsyncConnection.h                    |    8 +-
 src/msg/async/AsyncMessenger.cc                    |    3 +-
 src/msg/async/AsyncMessenger.h                     |   10 +-
 src/msg/async/Event.cc                             |   69 +-
 src/msg/async/Event.h                              |    6 +-
 src/msg/async/EventEpoll.h                         |    2 +-
 src/msg/async/EventKqueue.h                        |    2 +-
 src/msg/async/EventSelect.h                        |    2 +-
 src/msg/async/net_handler.h                        |    2 +-
 src/msg/msg_types.h                                |    5 +-
 src/msg/simple/DispatchQueue.h                     |    6 +-
 src/msg/simple/Pipe.cc                             |   23 +-
 src/msg/simple/Pipe.h                              |    6 +-
 src/msg/simple/SimpleMessenger.h                   |    2 +-
 src/msg/xio/QueueStrategy.cc                       |    5 +-
 src/msg/xio/QueueStrategy.h                        |    4 +-
 src/msg/xio/XioConnection.h                        |   10 +-
 src/msg/xio/XioMsg.h                               |    4 +-
 src/msg/xio/XioPool.h                              |    2 +-
 src/msg/xio/XioPortal.h                            |    6 +-
 src/ocf/Makefile.in                                |    9 +-
 src/os/FuseStore.cc                                | 1102 +++++
 src/os/FuseStore.h                                 |   35 +
 src/os/Makefile.am                                 |   37 +-
 src/os/ObjectStore.cc                              |   14 +-
 src/os/ObjectStore.h                               |  433 +-
 src/os/bluestore/BlockDevice.cc                    |  554 +--
 src/os/bluestore/BlockDevice.h                     |  119 +-
 src/os/bluestore/BlueFS.cc                         |  156 +-
 src/os/bluestore/BlueFS.h                          |   66 +-
 src/os/bluestore/BlueRocksEnv.cc                   |    4 +-
 src/os/bluestore/BlueRocksEnv.h                    |    2 +-
 src/os/bluestore/BlueStore.cc                      | 1394 +++---
 src/os/bluestore/BlueStore.h                       |  291 +-
 src/os/bluestore/FreelistManager.cc                |   14 +-
 src/os/bluestore/FreelistManager.h                 |    9 +-
 .../bluestore/{BlockDevice.cc => KernelDevice.cc}  |  122 +-
 src/os/bluestore/KernelDevice.h                    |   95 +
 src/os/bluestore/NVMEDevice.cc                     |  914 ++++
 src/os/bluestore/NVMEDevice.h                      |  260 ++
 src/os/bluestore/StupidAllocator.cc                |   41 +-
 src/os/bluestore/StupidAllocator.h                 |   15 +-
 src/os/bluestore/bluefs_types.cc                   |   18 +-
 src/os/bluestore/bluestore_types.h                 |    4 +-
 src/os/filestore/BtrfsFileStoreBackend.h           |    2 +-
 src/os/filestore/CollectionIndex.h                 |   12 +-
 src/os/filestore/DBObjectMap.h                     |   12 +-
 src/os/filestore/FDCache.h                         |    4 +-
 src/os/filestore/FileJournal.cc                    |   20 +-
 src/os/filestore/FileJournal.h                     |    6 +-
 src/os/filestore/FileStore.cc                      |  239 +-
 src/os/filestore/FileStore.h                       |  164 +-
 src/os/filestore/GenericFileStoreBackend.cc        |    2 +-
 src/os/filestore/GenericFileStoreBackend.h         |    2 +-
 src/os/filestore/HashIndex.h                       |    2 +-
 src/os/filestore/IndexManager.h                    |    6 +-
 src/os/filestore/Journal.h                         |    2 +-
 src/os/filestore/JournalingObjectStore.cc          |   11 +-
 src/os/filestore/JournalingObjectStore.h           |    4 +-
 src/os/filestore/LFNIndex.cc                       |    2 +-
 src/os/filestore/WBThrottle.h                      |    2 +-
 src/os/filestore/XfsFileStoreBackend.h             |    2 +-
 src/os/filestore/ZFSFileStoreBackend.h             |    2 +-
 src/os/fs/FS.cc                                    |    4 +-
 src/os/fs/FS.h                                     |    6 +-
 src/os/keyvaluestore/GenericObjectMap.cc           | 1127 -----
 src/os/keyvaluestore/GenericObjectMap.h            |  429 --
 src/os/keyvaluestore/KeyValueStore.cc              | 3015 ------------
 src/os/keyvaluestore/KeyValueStore.h               |  700 ---
 src/os/kstore/KStore.cc                            |  182 +-
 src/os/kstore/KStore.h                             |   56 +-
 src/os/kstore/kstore_types.h                       |    2 +-
 src/os/memstore/MemStore.cc                        |  180 +-
 src/os/memstore/MemStore.h                         |  135 +-
 src/os/memstore/PageSet.h                          |    2 +-
 src/osd/ClassHandler.h                             |    2 +-
 src/osd/ECBackend.cc                               |   91 +-
 src/osd/ECBackend.h                                |    5 +-
 src/osd/ECTransaction.cc                           |    2 +-
 src/osd/ECTransaction.h                            |    5 +-
 src/osd/ECUtil.h                                   |    2 +-
 src/osd/HitSet.h                                   |   12 +-
 src/osd/OSD.cc                                     |  508 ++-
 src/osd/OSD.h                                      |  106 +-
 src/osd/OSDCap.h                                   |    7 +-
 src/osd/OSDMap.h                                   |   15 +-
 src/osd/PG.cc                                      |   67 +-
 src/osd/PG.h                                       |   76 +-
 src/osd/PGBackend.cc                               |   18 +-
 src/osd/PGBackend.h                                |   24 +-
 src/osd/PGLog.h                                    |    1 +
 src/osd/ReplicatedBackend.cc                       |  156 +-
 src/osd/ReplicatedBackend.h                        |    9 +-
 src/osd/ReplicatedPG.cc                            |  140 +-
 src/osd/ReplicatedPG.h                             |   26 +-
 src/osd/SnapMapper.cc                              |    2 +-
 src/osd/Watch.cc                                   |    6 +-
 src/osd/osd_types.cc                               |    8 +-
 src/osd/osd_types.h                                |   18 +-
 src/osdc/Filer.cc                                  |   61 +-
 src/osdc/Filer.h                                   |   14 +-
 src/osdc/Journaler.cc                              |   73 +-
 src/osdc/Journaler.h                               |   21 +-
 src/osdc/ObjectCacher.cc                           |   10 +-
 src/osdc/ObjectCacher.h                            |    4 +-
 src/osdc/Objecter.cc                               |  634 +--
 src/osdc/Objecter.h                                |  162 +-
 src/osdc/Striper.cc                                |    4 +-
 src/perfglue/disabled_heap_profiler.cc             |   10 +
 src/perfglue/heap_profiler.cc                      |   16 +
 src/perfglue/heap_profiler.h                       |    4 +
 src/pybind/rados.py                                |   16 +
 src/rbd_fuse/rbd-fuse.cc                           |   35 +-
 src/rbd_replay/ActionTypes.cc                      |   49 +-
 src/rbd_replay/ActionTypes.h                       |   53 +-
 src/rbd_replay/BufferReader.cc                     |    7 +-
 src/rbd_replay/BufferReader.h                      |    1 +
 src/rbd_replay/Replayer.cc                         |    5 +-
 src/rbd_replay/Replayer.hpp                        |    2 +-
 src/rbd_replay/actions.cc                          |   40 +
 src/rbd_replay/actions.hpp                         |   43 +-
 src/rbd_replay/ios.cc                              |   24 +
 src/rbd_replay/ios.hpp                             |   55 +
 src/rbd_replay/rbd-replay-prep.cc                  |   27 +-
 src/rgw/Makefile.am                                |    6 +-
 src/rgw/librgw.cc                                  |    3 +-
 src/rgw/rgw_acl.cc                                 |    2 +-
 src/rgw/rgw_acl.h                                  |    4 +-
 src/rgw/rgw_acl_s3.h                               |    8 +-
 src/rgw/rgw_acl_swift.cc                           |    8 +-
 src/rgw/rgw_acl_swift.h                            |    2 +-
 src/rgw/rgw_admin.cc                               |    7 +-
 src/rgw/rgw_basic_types.h                          |    1 +
 src/rgw/rgw_bucket.cc                              |   21 +-
 src/rgw/rgw_bucket.h                               |    3 +-
 src/rgw/rgw_client_io.cc                           |    4 +
 src/rgw/rgw_client_io.h                            |    2 +-
 src/rgw/rgw_common.cc                              |    1 +
 src/rgw/rgw_common.h                               |   77 +-
 src/rgw/rgw_cors_s3.h                              |    2 +-
 src/rgw/rgw_fcgi.h                                 |    2 +-
 src/rgw/rgw_formats.h                              |    7 +-
 src/rgw/rgw_http_client.h                          |    2 +-
 src/rgw/rgw_http_errors.h                          |    4 +
 src/rgw/rgw_json_enc.cc                            |   84 +
 src/rgw/rgw_loadgen.h                              |    2 +-
 src/rgw/rgw_log.cc                                 |    2 +-
 src/rgw/rgw_main.cc                                |   85 +-
 src/rgw/rgw_metadata.cc                            |    2 +-
 src/rgw/rgw_object_expirer.cc                      |    4 +-
 src/rgw/rgw_object_expirer_core.h                  |    2 +-
 src/rgw/rgw_op.cc                                  |  952 ++--
 src/rgw/rgw_op.h                                   |  183 +-
 src/rgw/rgw_orphan.h                               |    6 +-
 src/rgw/rgw_quota.cc                               |    2 +-
 src/rgw/rgw_rados.cc                               |   12 +-
 src/rgw/rgw_rados.h                                |   54 +-
 src/rgw/rgw_replica_log.h                          |    4 +-
 src/rgw/rgw_rest.cc                                |  259 +-
 src/rgw/rgw_rest.h                                 |   12 +-
 src/rgw/rgw_rest_client.cc                         |    2 +-
 src/rgw/rgw_rest_conn.cc                           |    2 +-
 src/rgw/rgw_rest_s3.cc                             |  594 ++-
 src/rgw/rgw_rest_s3.h                              |   42 +-
 src/rgw/rgw_rest_s3website.h                       |   96 +
 src/rgw/rgw_rest_swift.cc                          |  212 +-
 src/rgw/rgw_rest_swift.h                           |    3 +-
 src/rgw/rgw_rest_user.cc                           |    2 +-
 src/rgw/rgw_swift.cc                               |   18 +-
 src/rgw/rgw_swift.h                                |    6 +-
 src/rgw/rgw_swift_auth.h                           |    1 +
 src/rgw/rgw_user.cc                                |    7 +-
 src/rgw/rgw_user.h                                 |    6 +-
 src/rgw/rgw_website.cc                             |  119 +
 src/rgw/rgw_website.h                              |  200 +
 src/rgw/rgw_xml.cc                                 |  263 +-
 src/rgw/rgw_xml.h                                  |  199 +-
 src/rgw/rgw_xml_enc.cc                             |  131 +
 src/spdk/CONFIG                                    |   51 +
 src/spdk/LICENSE                                   |   30 +
 src/spdk/Makefile                                  |   47 +
 src/spdk/PORTING.md                                |   23 +
 src/spdk/README.md                                 |   91 +
 src/spdk/autobuild.sh                              |   70 +
 src/spdk/autopackage.sh                            |   45 +
 src/spdk/autotest.sh                               |   77 +
 src/spdk/include/spdk/assert.h                     |   55 +
 src/spdk/include/spdk/barrier.h                    |   40 +
 src/spdk/include/spdk/file.h                       |   42 +
 src/spdk/include/spdk/ioat.h                       |  103 +
 src/spdk/include/spdk/ioat_spec.h                  |  308 ++
 src/spdk/include/spdk/mmio.h                       |   91 +
 src/spdk/include/spdk/nvme.h                       |  634 +++
 src/spdk/include/spdk/nvme_intel.h                 |  196 +
 src/spdk/include/spdk/nvme_spec.h                  | 1114 +++++
 src/spdk/include/spdk/pci.h                        |   55 +
 src/spdk/include/spdk/pci_ids.h                    |   39 +
 src/spdk/include/spdk/queue.h                      |   49 +
 src/spdk/include/spdk/queue_extras.h               |  341 ++
 src/spdk/include/spdk/string.h                     |   46 +
 src/spdk/include/spdk/vtophys.h                    |   51 +
 src/spdk/lib/Makefile                              |   44 +
 src/spdk/lib/ioat/Makefile                         |   51 +
 src/spdk/lib/ioat/ioat.c                           |  673 +++
 src/spdk/lib/ioat/ioat_impl.h                      |   93 +
 src/spdk/lib/ioat/ioat_internal.h                  |  103 +
 src/spdk/lib/ioat/ioat_pci.h                       |   94 +
 src/spdk/lib/memory/Makefile                       |   51 +
 src/spdk/lib/memory/vtophys.c                      |  183 +
 src/spdk/lib/nvme/Makefile                         |   51 +
 src/spdk/lib/nvme/nvme.c                           |  249 +
 src/spdk/lib/nvme/nvme_ctrlr.c                     |  915 ++++
 src/spdk/lib/nvme/nvme_ctrlr_cmd.c                 |  294 ++
 src/spdk/lib/nvme/nvme_impl.h                      |  171 +
 src/spdk/lib/nvme/nvme_internal.h                  |  444 ++
 src/spdk/lib/nvme/nvme_ns.c                        |  139 +
 src/spdk/lib/nvme/nvme_ns_cmd.c                    |  439 ++
 src/spdk/lib/nvme/nvme_qpair.c                     | 1011 +++++
 src/spdk/lib/util/Makefile                         |   51 +
 src/spdk/lib/util/file.c                           |  108 +
 src/spdk/lib/util/pci.c                            |  360 ++
 src/spdk/lib/util/string.c                         |   82 +
 src/spdk/mk/spdk.common.mk                         |  139 +
 src/spdk/mk/spdk.deps.mk                           |    3 +
 src/spdk/mk/spdk.subdirs.mk                        |    3 +
 src/test/Makefile-client.am                        |   47 +
 src/test/Makefile-server.am                        |    5 +
 src/test/Makefile.am                               |   28 +-
 src/test/ObjectMap/KeyValueDBMemory.cc             |    4 +-
 src/test/ObjectMap/KeyValueDBMemory.h              |    5 +-
 src/test/TestTimers.cc                             |    4 +-
 src/test/admin_socket.cc                           |    2 +-
 src/test/bench/bencher.cc                          |    6 +-
 src/test/bench/distribution.h                      |    2 +-
 src/test/bench/dumb_backend.h                      |    5 +-
 src/test/bench/rados_backend.h                     |    2 +-
 src/test/bench/rbd_backend.h                       |    2 +-
 src/test/bench/small_io_bench_fs.cc                |    6 +-
 src/test/bench/testfilestore_backend.cc            |   16 +-
 src/test/bench/tp_bench.cc                         |    3 +-
 src/test/bench_log.cc                              |    2 +-
 src/test/bufferlist.cc                             |  117 +-
 src/test/centos-6/ceph.spec.in                     |  444 +-
 src/test/centos-7/ceph.spec.in                     |  444 +-
 src/test/ceph-disk.sh                              |  363 --
 src/test/ceph_argparse.cc                          |    2 +-
 src/test/ceph_objectstore_tool.py                  |   12 +-
 src/test/cli/radosgw-admin/help.t                  |    6 +
 src/test/cli/rbd/help.t                            |    2 +
 src/test/cls_journal/test_cls_journal.cc           |  217 +-
 src/test/cls_log/test_cls_log.cc                   |    9 +
 src/test/common/ObjectContents.h                   |   10 +-
 src/test/common/test_async_compressor.cc           |    2 +-
 src/test/common/test_config.cc                     |    6 +-
 src/test/common/test_crc32c.cc                     |   14 +-
 src/test/common/test_interval_set.cc               |  564 +++
 src/test/common/test_lru.cc                        |    2 +-
 src/test/common/test_mutex_debug.cc                |  101 +
 src/test/common/test_prioritized_queue.cc          |    6 +-
 src/test/common/test_shunique_lock.cc              |  576 +++
 src/test/common/test_str_map.cc                    |    2 +-
 src/test/common/test_time.cc                       |    3 +-
 src/test/common/test_weighted_priority_queue.cc    |  287 ++
 src/test/compressor/Makefile.am                    |   22 +
 src/test/compressor/compressor_example.h           |    4 +-
 src/test/compressor/compressor_plugin_example.cc   |    2 +-
 ...n_snappy.cc => test_compression_plugin_zlib.cc} |   29 +-
 src/test/compressor/test_compression_snappy.cc     |   34 +-
 ...pression_snappy.cc => test_compression_zlib.cc} |   31 +-
 src/test/daemon_config.cc                          |    8 -
 src/test/encoding.cc                               |    2 +-
 src/test/encoding/ceph_dencoder.cc                 |    6 +-
 src/test/encoding/test_ceph_time.h                 |   30 +
 src/test/encoding/types.h                          |    7 +-
 src/test/erasure-code/Makefile.am                  |   74 +-
 src/test/fedora-21/ceph.spec.in                    |  444 +-
 src/test/formatter.cc                              |  149 +-
 src/test/journal/RadosTestFixture.cc               |    4 +-
 src/test/journal/test_Entry.cc                     |   16 +-
 src/test/journal/test_FutureImpl.cc                |   46 +-
 src/test/journal/test_JournalMetadata.cc           |    2 +-
 src/test/journal/test_JournalPlayer.cc             |  116 +-
 src/test/journal/test_JournalRecorder.cc           |   20 +-
 src/test/journal/test_JournalTrimmer.cc            |    2 +-
 src/test/journal/test_Journaler.cc                 |   51 +-
 src/test/journal/test_ObjectPlayer.cc              |   28 +-
 src/test/journal/test_ObjectRecorder.cc            |   37 +-
 src/test/librados/TestCase.cc                      |   56 +-
 src/test/librados/aio.cc                           |   16 +-
 src/test/librados/list.cc                          |    2 +-
 src/test/librados/test.cc                          |   45 +-
 src/test/librados/tier.cc                          |   19 +-
 src/test/librados_test_stub/TestRadosClient.cc     |    4 +-
 src/test/librados_test_stub/TestWatchNotify.cc     |  226 +-
 src/test/librados_test_stub/TestWatchNotify.h      |   33 +-
 .../exclusive_lock/test_mock_AcquireRequest.cc     |   15 +-
 .../exclusive_lock/test_mock_ReleaseRequest.cc     |   10 +-
 src/test/librbd/fsx.cc                             |   10 +-
 src/test/librbd/image/test_mock_RefreshRequest.cc  |  711 +++
 src/test/librbd/journal/test_Entries.cc            |    4 +-
 src/test/librbd/journal/test_Replay.cc             |    2 +-
 src/test/librbd/journal/test_mock_Replay.cc        |    2 +-
 src/test/librbd/mock/MockExclusiveLock.h           |    1 +
 src/test/librbd/mock/MockImageCtx.h                |   28 +
 src/test/librbd/mock/MockJournal.h                 |    2 +-
 src/test/librbd/mock/MockObjectMap.h               |    4 +-
 .../librbd/object_map/test_mock_RefreshRequest.cc  |  125 +-
 .../operation/test_mock_SnapshotCreateRequest.cc   |   14 +-
 .../operation/test_mock_SnapshotRollbackRequest.cc |    6 -
 src/test/librbd/test_ImageWatcher.cc               |    2 +-
 src/test/librbd/test_librbd.cc                     |  136 +-
 src/test/librbd/test_mock_Journal.cc               |   37 +-
 src/test/librbd/test_support.cc                    |   15 +-
 src/test/librbd/test_support.h                     |    8 +
 src/test/messenger/simple_dispatcher.h             |    2 +-
 src/test/messenger/xio_dispatcher.h                |    2 +-
 src/test/mon/mon-created-time.sh                   |   54 +
 src/test/mon/test-mon-msg.cc                       |    2 +-
 src/test/mon/test_mon_workloadgen.cc               |    6 +-
 src/test/msgr/perf_msgr_server.cc                  |    2 +-
 src/test/msgr/test_async_driver.cc                 |    4 +-
 src/test/msgr/test_msgr.cc                         |    6 +-
 src/test/objectstore/DeterministicOpSequence.cc    |   18 +-
 src/test/objectstore/FileStoreTracker.cc           |   12 +-
 .../objectstore/ObjectStoreTransactionBenchmark.cc |    4 -
 src/test/objectstore/TestObjectStoreState.cc       |   14 +-
 src/test/objectstore/TestObjectStoreState.h        |    7 +-
 src/test/objectstore/store_test.cc                 |  406 +-
 src/test/objectstore/test_idempotent.cc            |    2 +-
 src/test/objectstore/test_transaction.cc           |   75 +
 src/test/objectstore/workload_generator.cc         |    9 +-
 src/test/objectstore/workload_generator.h          |   12 +-
 src/test/objectstore_bench.cc                      |   20 +-
 src/test/opensuse-13.2/ceph.spec.in                |  444 +-
 src/test/osd/Object.cc                             |  195 +-
 src/test/osd/Object.h                              |  238 +-
 src/test/osd/RadosModel.h                          |   32 +-
 src/test/osd/TestPGLog.cc                          |    2 +-
 src/test/osd/hitset.cc                             |    2 +-
 src/test/osd/osd-scrub-snaps.sh                    |   44 +-
 src/test/osd/types.cc                              |   66 +-
 src/test/osdc/FakeWriteback.h                      |    2 +
 src/test/perf_local.cc                             |   10 +-
 src/test/python/ceph-disk/setup.py                 |   27 -
 src/test/python/ceph-disk/tox.ini                  |   19 -
 src/test/rbd_mirror/test_ClusterWatcher.cc         |  190 +
 src/test/rbd_mirror/test_PoolWatcher.cc            |  260 ++
 src/test/rbd_mirror/test_main.cc                   |   36 +
 src/test/system/cross_process_sem.h                |    2 +-
 src/test/test_filejournal.cc                       |   22 +-
 src/test/test_ipaddr.cc                            |   39 +-
 src/test/test_pidfile.sh                           |  102 +
 src/test/test_snap_mapper.cc                       |    8 +-
 src/test/test_stress_watch.cc                      |    2 +-
 src/test/test_trans.cc                             |    2 +-
 src/test/test_xlist.cc                             |    2 +-
 src/test/xattr_bench.cc                            |   13 +-
 src/tools/Makefile-client.am                       |   33 +
 src/tools/Makefile.am                              |    2 +
 src/tools/RadosDump.h                              |   10 +-
 src/tools/ceph_monstore_tool.cc                    |    2 +-
 src/tools/ceph_objectstore_tool.cc                 |  123 +-
 src/tools/cephfs/DataScan.cc                       |    2 +-
 src/tools/cephfs/TableTool.cc                      |    2 +-
 src/tools/rados/PoolDump.h                         |    2 +-
 src/tools/rados/RadosImport.cc                     |   18 +-
 src/tools/rados/rados.cc                           |   55 +-
 src/tools/rbd/ArgumentTypes.cc                     |    6 +-
 src/tools/rbd/ArgumentTypes.h                      |    1 +
 src/tools/rbd/Utils.cc                             |   13 +-
 src/tools/rbd/Utils.h                              |    2 +-
 src/tools/rbd/action/BenchWrite.cc                 |    2 +-
 src/tools/rbd/action/Journal.cc                    |   50 +-
 src/tools/rbd/action/Kernel.cc                     |    4 +-
 src/tools/rbd/action/Lock.cc                       |    2 +-
 src/tools/rbd/action/MergeDiff.cc                  |    2 +-
 src/tools/rbd/action/Nbd.cc                        |    2 +-
 src/tools/rbd/action/Snap.cc                       |    2 +-
 src/tools/rbd_mirror/ClusterWatcher.cc             |  130 +
 src/tools/rbd_mirror/ClusterWatcher.h              |   48 +
 src/tools/rbd_mirror/ImageReplayer.cc              |   62 +
 src/tools/rbd_mirror/ImageReplayer.h               |   45 +
 src/tools/rbd_mirror/Mirror.cc                     |  101 +
 src/tools/rbd_mirror/Mirror.h                      |   56 +
 src/tools/rbd_mirror/PoolWatcher.cc                |  152 +
 src/tools/rbd_mirror/PoolWatcher.h                 |   50 +
 src/tools/rbd_mirror/Replayer.cc                   |  146 +
 src/tools/rbd_mirror/Replayer.h                    |   69 +
 src/tools/rbd_mirror/main.cc                       |   80 +
 src/tools/rbd_mirror/types.cc                      |   11 +
 src/tools/rbd_mirror/types.h                       |   53 +
 src/tools/rbd_nbd/rbd-nbd.cc                       |   11 +-
 .../run-tox.sh => tools/setup-virtualenv.sh}       |   25 +-
 src/tracing/Makefile.am                            |    8 +-
 src/tracing/librbd.tp                              |   47 +
 src/upstart/ceph-osd.conf                          |    8 +-
 src/vstart.sh                                      |   35 +-
 src/yasm-wrapper                                   |    4 +-
 systemd/Makefile.am                                |    3 +-
 systemd/Makefile.in                                |   12 +-
 systemd/ceph-mds at .service                          |    4 +
 systemd/ceph-mon at .service                          |    4 +
 systemd/ceph-osd at .service                          |    5 +-
 systemd/ceph-radosgw-prestart.sh                   |  100 -
 systemd/ceph-radosgw at .service                      |    4 +
 udev/95-ceph-osd.rules                             |   39 +
 744 files changed, 42917 insertions(+), 18178 deletions(-)

diff --git a/AUTHORS b/AUTHORS
index 8469180..4c014a1 100644
--- a/AUTHORS
+++ b/AUTHORS
@@ -19,6 +19,7 @@ Alexandre Marangone <amarango at redhat.com>
 Alexandre Oliva <oliva at gnu.org>
 Alex Elder <elder at inktank.com>
 Alexey Lapitsky <lex at realisticgroup.com>
+Alexey Sheplyakov <asheplyakov at mirantis.com>
 Alexis Normand <n.al3xis at gmail.com>
 Alfredo Deza <adeza at redhat.com>
 Alfredo Deza <alfredo.deza at inktank.com>
@@ -52,10 +53,12 @@ Ashish Chandra <ashish.a.chandra at ril.com>
 atwardowski <adam.twardowski at gmail.com>
 Babu Shanmugam <anbu at enovance.com>
 Baptiste Veuillez <baptiste.veuillez--mainard at telecom-bretagne.eu>
+Barbora Ančincová <bancinco at redhat.com>
 Bastian Blank <waldi at debian.org>
 Benjamin Kerensa <bkerensa at gmail.com>
 Benoît Knecht <benoit.knecht at fsfe.org>
 Billy Olsen <billy.olsen at canonical.com>
+Bin Zheng <zhengbin.08747 at h3c.com>
 BJ Lougee <almightybeeij at gmail.com>
 Bjørnar Ness <bjornar.ness at gmail.com>
 Blaine Gardner <blaine.gardner at hp.com>
@@ -97,6 +100,7 @@ Christophe Courtaut <christophe.courtaut at gmail.com>
 Christopher O'Connell <jwriteclub at gmail.com>
 Christoph Hellwig <hch at infradead.org>
 Christos Stavrakakis <stavr.chris at gmail.com>
+chuanhong-wang <chuanhong.wang at 163.com>
 Chuanhong Wang <wang.chuanhong at zte.com.cn>
 Cilang Zhao <zhao.cilang at h3c.com>
 Claire Massot <claire.massot93 at gmail.com>
@@ -135,9 +139,11 @@ Drunkard Zhang <gongfan193 at gmail.com>
 Dunrong Huang <dunrong.huang at eayun.com>
 Dunrong Huang <riegamaths at gmail.com>
 Eleanor Cawthon <eleanor.cawthon at inktank.com>
+Emile Snyder <emsnyder at ebay.com>
 Emily Popper <emily.popper at dreamhost.com>
 Eric Mourgaya <eric.mourgaya at arkea.com>
 Erik Logtenberg <erik at logtenberg.eu>
+Erwan Velu <erwan at redhat.com>
 Erwin, Brock A <Brock.Erwin at pnl.gov>
 Esteban Molina-Estolano <eestolan at lanl.gov>
 Evan Felix <evan.felix at pnnl.gov>
@@ -174,6 +180,7 @@ Gregory Meno <gmeno at redhat.com>
 Guangliang Zhao <guangliang at unitedstack.com>
 Guang Yang <yguang at yahoo-inc.com>
 Guilhem Lettron <guilhem at lettron.fr>
+Gu Zhongyan <guzhongyan at 360.cn>
 Haifeng Liu <haifeng at yahoo-inc.com>
 Hannes Reinecke <hare at suse.de>
 Hannu Valtonen <hannu.valtonen at ormod.com>
@@ -199,6 +206,7 @@ Ilya Dryomov <ilya.dryomov at inktank.com>
 Ilya Shipitsin <ilia at localhost.localdomain>
 Ira Cooper <ira at samba.org>
 Ismael Serrano <ismael.serrano at gmail.com>
+Ivan Grcic <igrcic at gmail.com>
 Jacek J. Lakis <jacek.lakis at intel.com>
 Jacek J. Łakis <jacek.lakis at intel.com>
 James Page <james.page at ubuntu.com>
@@ -211,6 +219,7 @@ Javier Guerra <javier at guerrag.com>
 Javier M. Mellid <jmunhoz at igalia.com>
 Jean-Rémi Deveaux <jeanremi.deveaux at gmail.com>
 Jeff Epstein <jepst79 at gmail.com>
+Jeffrey Lu <lzhng2000 at hotmail.com>
 Jeff Weber <jweber at cofront.net>
 Jenkins <jenkins at ceph.com>
 Jens-Christian Fischer <jens-christian.fischer at switch.ch>
@@ -218,16 +227,19 @@ Jeremy Qian <vanpire110 at 163.com>
 Jevon Qiao <qiaojianfeng at unitedstack.com>
 Jiang Heng <jiangheng0511 at gmail.com>
 Jianhui Yuan <zuiwanyuan at gmail.com>
+Jianjian Huo <samuel.huo at gmail.com>
 Jiantao He <hejiantao5 at gmail.com>
 Jian Wen <wenjian at letv.com>
 Jiaying Ren <mikulely at gmail.com>
 Ji Chen <insomnia at 139.com>
 Jie Wang <jie.wang at kylin-cloud.com>
 Jim Schutt <jaschut at sandia.gov>
+jingkai <jingkai.yuan at intel.com>
 João Eduardo Luís <joao.luis at inktank.com>
 João Eduardo Luís <joao at redhat.com>
 Joao Eduardo Luis <joao at suse.de>
 Joaquim Rocha <joaquim.rocha at cern.ch>
+joconcepts <jonas at jonas-keidel.de>
 Joe Buck <jbbuck at gmail.com>
 Joe Handzik <joseph.t.handzik at hp.com>
 Joe Julian <jjulian at io.com>
@@ -293,6 +305,8 @@ Luis Pabón <lpabon at redhat.com>
 Lukasz Jagiello <lukasz at wikia-inc.com>
 Lu Shi <shi.lu at h3c.com>
 Ma Jianpeng <jianpeng.ma at intel.com>
+Marcel Lauhoff <lauhoff at uni-mainz.de>
+Marcel Lauhoff <ml at irq0.org>
 Marc Koderer <marc at koderer.com>
 Marco Garcês <marco.garces at bci.co.mz>
 Marcus Sorensen <shadowsor at gmail.com>
@@ -307,6 +321,7 @@ Matt Benjamin <mbenjamin at redhat.com>
 Matthew Roy <matthew at royhousehold.net>
 Matthew Wodrich <matthew.wodrich at dreamhost.com>
 Matt Richards <mattjrichards at gmail.com>
+Mauricio Garavaglia <mauriciogaravaglia at gmail.com>
 Maxime Robert <maxime.robert1992 at gmail.com>
 Mehdi Abaakouk <sileht at sileht.net>
 Michael McThrow <mmcthrow at gmail.com>
@@ -342,6 +357,7 @@ Owen Synge <osynge at suse.com>
 Padraig O'Sullivan <posulliv at umd.edu>
 Pascal de Bruijn <pascal at unilogicnetworks.net>
 Patience Warnick <patience at cranium.pelton.net>
+Patrick Donnelly <batrick at batbytes.com>
 Patrick McGarry <patrick at inktank.com>
 Patrick McGarry <pmcgarry at redhat.com>
 Paul Chiang <paul_chiang at tcloudcomputing.com>
@@ -357,6 +373,7 @@ Pierre Chaumont <pierre.chaumont31 at gmail.com>
 Pierre Rognant <prognant at oodrive.com>
 Piotr Dałek <piotr.dalek at ts.fujitsu.com>
 Qiankun Zheng <zheng.qiankun at h3c.com>
+Rachana Patel <rachana83.patel at gmail.com>
 Radoslaw Zarzynski <rzarzynski at mirantis.com>
 Rahul Aggarwal <rahul.1aggarwal at gmail.com>
 Rajesh Nambiar <rajesh.n at msystechnologies.com>
@@ -369,15 +386,18 @@ Riccardo Ferretti <rferrett at soe.ucsc.edu>
 ritz303 <ritz_303 at yahoo.com>
 Roald J. van Loon <roald at roaldvanloon.nl>
 Robert Jansen <r.jansen at fairbanks.nl>
+Robert LeBlanc <robert.leblanc at endurance.com>
 Robin Dehu <robindehu at gmail.com>
 Robin H. Johnson <robbat2 at gentoo.org>
 Robin H. Johnson <robin.johnson at dreamhost.com>
 Robin Tang <robintang974 at gmail.com>
 Rohan Mars <code at rohanmars.com>
+Roi Dayan <roid at mellanox.com>
 Roman Haritonov <reclosedev at gmail.com>
 Ron Allred <rallred at itrefined.com>
 Rongze Zhu <zrzhit at gmail.com>
 root <liu.peiyang at h3c.com>
+root <root at clove83.zte.com.cn>
 root <root at phenom.dyweni.com>
 Ross Turk <ross.turk at inktank.com>
 Ross Turk <rturk at redhat.com>
@@ -388,6 +408,7 @@ Rutger ter Borg <rutger at terborg.net>
 Sage Weil <sage at inktank.com>
 Sage Weil <sweil at redhat.com>
 Sahid Orentino Ferdjaoui <sahid.ferdjaoui at cloudwatt.com>
+Sahithi R V <tansy.rv at gmail.com>
 Sam Lang <sam.lang at inktank.com>
 Samuel Just <sam.just at inktank.com>
 Samuel Just <sjust at redhat.com>
@@ -407,6 +428,7 @@ Sharif Olorin <sio at tesser.org>
 Shawn Edwards <lesser.evil at gmail.com>
 shishir gowda <shishir.gowda at sandisk.com>
 Shotaro Kawaguchi <kawaguchi.s at jp.fujitsu.com>
+shun-s <songshun134 at 126.com>
 Shu, Xinxin <xinxin.shu at intel.com>
 Shylesh Kumar <shmohan at redhat.com>
 Siddharth Sharma <siddharth at redhat.com>
@@ -470,6 +492,7 @@ Walter Huf <hufman at gmail.com>
 Wang, Yaguang <yaguang.wang at intel.com>
 Warren Usui <warren.usui at inktank.com>
 Wei Feng <feng.wei at h3c.com>
+Wei Jin <wjin.cn at gmail.com>
 Weijun Duan <duanweijun at h3c.com>
 Wei Luo <luowei at yahoo-inc.com>
 Wei Luo <weilluo at tencent.com>
diff --git a/ChangeLog b/ChangeLog
index c6f79ed..ee94175 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,29 +1,612 @@
-efc8134 (HEAD -> 10.0.3, tag: v10.0.3) v10.0.3
+5acb265 (HEAD, tag: v10.0.5) v10.0.5
+10ca7f3 debian/changelog: Remove stray 'v' in version
+ea45099 (tag: v10.0.4) v10.0.4
+13a5aac RPM: move scriptlets from ceph to ceph-base
+b87f4ef packaging: lsb_release build and runtime dependency
+4de86bf (origin/wip-move-requires) RPM: drop duplicate /var/lib/ceph/* directories
+275b5f2 RPM: drop duplicate udev rules from ceph-base
+5083980 packaging: rados-classes libraries in ceph-base
+cba211d packaging: pkg_resources.py runtime dependency
+45ede09 ceph.spec.in: move ceph-disk runtime dependency to ceph-osd
+8b04c2b ceph.spec.in: drop support for ancient SUSE versions
+1a0bb0f packaging: move python-flask runtime dependency to ceph-mon, radosgw
+608dd28 RPM: align runtime dependencies with Debian packaging
+d4dd0f4 debian: remove ceph-dbg package and any traces of it
+3fc7fdb debian: remove ceph-base's dependency on -mon and -osd
+dc54a85 debian: fix logrotate conf handling
+9a66bf0 debian: put libexec files in /usr/lib
+0cbe3de debian/rpm: split mon/osd/mds server packages
+dff71a5 mon/MDSMonitor.cc: properly note beacon when health metrics changes
+30bddc4 Makefile: workaround an automake bug for "make check"
+b2ae384 RPM: refrain from packaging EC testing plugins
+1b6faf6 Refrain from versioning EC testing plugins
+3c5bc07 Makefile: workaround an automake bug for "make check"
+9d25202 Makefile: workaround an automake bug for "make check"
+c7eba2a src/yasm-wrapper: ignore parameters starting with ggc-min
+69291f8 packaging: move ceph_common.sh and ceph-osd-prestart.sh to /usr/lib/ceph
+fe14a26 ceph.spec.in: declare /usr/share/ceph properly
+a6cc8ea rpm: drop systemd_libexec_dir template variable
+5c09a3e rpm: drop user_rgw and group_rgw template variables
+efc8134 (tag: v10.0.3) v10.0.3
 db9408b ceph.spec.in: add license declaration
+51a1572 (origin/wip-cmake-2-8-16) cmake: Added new unittests to make check
+a0b0ae0 Revert "osd: do not keep ref of old osdmap in pg"
 23ec516 ceph.spec.in: add copyright notice
+d267aaf tests: ceph-disk.sh should compare with the resolved path
+4d38302 libcephfs: Update LIBCEPHFS_VERSION to indicate the interface was changed
+08364c2 ceph-disk/test: fix test_prepare.py::TestPrepare tests
+8569d38 AsyncConnection: avoid debug log in cleanup_handler
+f4b083a osdc: Update to use C++11 concurrency
+21438a6 cmake: add libboost_system to EXTRALIBS
+2669e0c cmake: don't try to install src/ceph-disk directory
+33bc2ba cmake: don't add_test cephtool_test_osd twice
+38b9be2 librados_test_stub: protect against notify/unwatch race
+c1b3138 concurrency: Add shunique_lock
+9418f05 librbd: partial implementation of journal client / tag allocation
+8443e50 librbd: initial version of journal client and tag metadata
+608947c journal: added tag support methods
+5eda285 cls_journal: new get_client / client_update methods
+3f29e7b cls_journal: client registration should hold opaque data structure
+fa6d0ba cls_journal: new tag management methods and handling
+243d91d journal: switched entry tags to use id instead of string
+48c542a concurrency: make C++11 style debugging mutices
+672a694 time: Remove constexpr from non-literal type in test
+0fcb307 time: Have skewing-now call non-skewing now
+47c9b62 tests: mon: "chmod +x mon-created-time.sh"
+0851757 os/bluestore/KernelDevice: use _exit(1) for failure injection
+a297261 os/bluestore: fix block device file creation
+4c13f9d os/bluestore/BlueFS: initialize super block_size earlier in mkfs
+23d39cb os/kstore: kill dead code
+5d5e420 os/kstore: fix race condition
+1e8719f os/bluestore: fix race condition
+d5cdc7a os/bluestore: remove unused intrusive member hook in enode
+9828d49 global: do not start two daemons with a single pid-file (part 2)
+dca8111 os/kstore: assign nid for newly created onode
+d5dfb00 os/bluestore: assign nid for newly created onode
+9da41fe systemd/ceph-radosgw-prestart.sh: remove
+b9ce1df unittest_config: cope with $data_dir not being expanded on non-daemons
+78592fd common: add $data_dir/config to config search path
+2592909 common/config: expand $data_dir in config path
+3944d56 global: add data_dir_option for all daemons
+28dcf0b .gitignore: add rbd-mirror
+aedb928 os/bluestore/BlueStore: fix enode uniqueness
+dada290 admin/build-doc: depend on zlib1g-dev and graphviz
+8497099 os/bluestore/BlueStore: default Onode::true to false
+70f7b1e os/memstore/MemStore: set Collection::cid on create
+ab7d3de CacheTier: set cache_min_evict_age is effective
+2591f63 ceph-disk: use the type file for bluestore
+36656c8 tests: ceph-disk tests pid files must exist
+c50f3b9 tests: ceph-disk qa workunit for bluestore
+7b9a804 ceph-disk: bluestore deactivate / destroy
+cfea72e ceph-disk: bluestore list
+fbc0984 ceph-disk: bluestore trigger
+170dca3 ceph-disk: bluestore activate
+65bc36e ceph-disk: bluestore prepare
+a547bd2 ceph-disk: refactor prepare
+501ddff librados/RadosClient: stop client's finisher in a more graceful way
+be9c0ef os/filestore/JournalingObjectStore: stop journal in a more graceful way
+8f01667 packaging: add an rbd-mirror package
+b35061a man: stub manpage for rbd-mirror
+3d26384 rbd-mirror: skeleton of a mirroring daemon
+ec4a288 tests: generic ceph-disk qa check_osd_status
+76606c9 tests: pytest must not truncate data structures
+3d9c4a8 tests: ceph-disk no longer test separate journals
+c9e3e8a tests: workaround ceph-disk global side effects
+53014bf ceph-disk: move check_journal_reqs (no change)
+ef702ef ceph-disk: cleanup unused argument in list_format
+7f6295d ceph-disk: make all must setup.py install
+17c5287 tests: fix ceph-disk unit tests
+b030d8f ceph-detect-init: make all must setup.py install
+c7fbe21 tests: instructions to collect ceph-disk coverage
+ca97b67 tests: remove ceph-disk dead code
+e3b0395 ceph-disk: flake8 fixes
+2e05e47 tests: run ceph-disk tests via tox
+5151063 ceph-disk: refactor into a proper python module
+9429c04 tests: ceph-disk deactivate takes the activate lock
+f4dd1c6 tests: ceph-disk command returns stderr
+43dce13 ceph-osd: don't mention journal on mkfs
+7ab19fd makefile: include spdk source in dist tar ball.
+09e55d0 autotools: enable NVMEDevice if SPDK is found
+ecb1256 cmake: enable NVMEDevice if SPDK is found
+fd630dc spdk: add submodule spdk for the nvme support
+8c2bb7e cmake: add Find{dpdk,pciaccess}.cmake
+ff56154 NVMEDevice: fix the build error with latest spdk
+ec162f0 doc: add orphans commands to radosgw-admin(8)
+c77e8fd config: set default bluestore wal size to 96 MB
+316810d os/bluestore: change block file mkfs behavior
+31649e5 test/osd/: rework ceph_test_rados buffer generation and verification
+67b6fac ceph_test_rados: switch to std::minstd_rand0
+abe31b1 msg/simple: fix warning
+15c0998 os/bluestore/KernelDevice: crash even when there is no io
+b3903b8 os/bluestore/KernelDevice: make flush delay on inject crash tunable
+dbf6979 os/bluestore/BlueStore: tweak whitespace in _txc_finalize
+96f23eb os/bluestore/BlueStore: fix wal tail block padding on zero
+9a6e661 os/bluestore/BlueStore: load OnodeRef in _txc_add_transaction
+dd6fdc7 os/bluestore/BlueStore: dump txn in _txc_add_transaction at high debug
+f1bf983 os/bluestore/BlueStore: fix wal tail block padding
+8eac9c6 os/bluestore/BlueStore: wr lock collection in _txc_add_transaction
+82637be cmake: add KernelDevice.cc to libos_srcs
+457f023 CMake: For CMake version <= 2.8.11, use LINK_{PRIVATE,PUBLIC} instead of PRIVATE,PUBLIC for backward compatibility
+669b932 rados: Add units to rados bench output
+06af9f0 os/kstore: insert new onode to the front position of onode LRU
+e85ffac scripts/run-coverity: fix upload process
+41c3dc2 test_bufferlist: add move tests for bufferlist
+caed882 os/bluestore: insert new onode to the front position of onode LRU
+7c86775 bluestore/bluefs_types: fix imcomplete output message
+1c9c49e Crush when get immediate parent quickly reply.
+627535a KernelDevice: add aio_log_finish for write
+cdbea72 KernelDevice: remove unused local variables
+b675b2e KernelDevice: put a guard for sync read
+edf4e2e KernelDevice: add error handling for sync_file_range
+4d535f0 KernelDevice: use standard errno for better tracing
+f0534bb KernelDevice: use local aio config for start/stop logic
+6325dea BlockDevice: fix result code overflow
+3278968 BlueFS: add a guard for log space allocation during mkfs
+881ac7a BlueFS: fix race condition
+d8eb57d BlueFS: fix typo
+baf10b9 BlueFS: remove unused local variable
+b593e1c BlueFS: remove redundant intermediate buffer pad
+65e1a21 BlueFS: stop alloc if unable to replay
+a3789e8 BlueFS: fix unprecise calculation of sync interval
+09d2644 BlueFS: add log after updating prefer_bdev field.
+93413b5 qa/workunits/rados/test.sh: bash
+669f143 buffer: add operator= for ptr/bufferlist rvalue
+0305cee misc: using move construct to avoid extra atomic inc/dec
+dd4e6e1 buffer: add ptr move construct
+06cecb4 ceph_test_objectstore: fix warning
+9a34933 os/bluestore/KernelDevice: explicit on ctor
+dd3c87d doc: update PG doc with the backfill change
+4c0d1d7 mon: PG Monitor should report backfill_wait
+9c95bb0 mon: reduce CPU and memory manager pressure of pg health check
+a682e77 script/run-coverity: fix user
+4f49c6b journal: reset commit_position_task_ctx pointer after task complete
+122f406 xio: fix compilation
+2445b5e osdc/objecter: update crush location in a safer way
+2573840 test/librados: fix hash position overflow issue
+48dc0bf rados: fix result code overflow
+a3360b5 osdc/objecter: fix race condition
+636b8f1 rgw: do not include "expat.h" in heade file
+f5f3282 BlueStore: fix wrong action scope of enode
+41ff579 BlueStore: add error handling for clone
+e10490a BlueStore: add error handling for clone_range
+d1d6014 BlueStore: fix wrong decode logic of nid_max
+2d52438 BlueStore: fix unhandled return code from blockdevice APIs
+9bc0c31 BlueStore: try to scan as many errors as possible
+3e61e3b BlueStore: fix duplicated counting of errors
+559a81b BlueStore: fix enode verification logic
+10bd8ad BlueStore: add a guard for collection loading
+f761113 BlueStore: add a guard for overlay loading
+105a76b rgw-admin: document orphans commands in usage
 820a760 ceph-disk: Fix trivial typo
+3cf2972 msg: ignore request_redirect_t encode/decode when we not need
+15ea140 qa/workunits/objectstore/test_fuse.sh: fix root check
+eb5db37 os/memstore: disable lockdep on Collection::lock
+18d3a39 os/kstore: disable lockdep on Collection::lock
+fe649a3 os/bluestore: disable lockdep on Collection::lock
+08df77a os/filestore/CollectionIndex: disable lockdep on access_lock
+622ed5a common/RWLock: alloc lockdep to be disabled
+ada41f4 lockdep: dump lock names if we run out of IDs
+300b15d os/bluestore: fix typo (again)
+9858626 os/bluestore: fix typo
 62906d8 osd/PG: fix scrub start object
+0819509 qa/workunits/rados/test.sh: run tests in parallel by default
+4d2c0f5 osd/ECTransaction: Removing unused local
+ccebf7a osd/PGBackend: PGBackend interface cleanup - make some interface methods pure virtual instead of assertion usage. Signed-off-by: Igor Fedotov <ifedotov at mirantis.com>
+7fcfc28 Compressor: add zlib unittests Signed-off-by: Alyona Kiseleva <akiselyova at mirantis.com>
+7bde5c4 Compressor: add Zlib compression plugin Signed-off-by: Kiseleva Alyona <akiselyova at mirantis.com>
+7d02b82 configure: remove unused lines
+55c9955 BlueStore: use special symbol to distinguish backend type
+86cd2f8 NVMEDevice: use the only aio thread to process task
+81defd1 NVMEDevice: add buffer write support
+5572064 BlueStore: add logger to record state transition latency
+dd5c36d NVMEDevice: add perf counter for queue time
+5497e2f BlueStore: allow nvme driver read from path
+619c08c NVMEDevice: no need to detach nvme device
+79f9d1e NVMEDevice: add polling time perf counter
+584f8f9 NVMEDevice: add flush perf counter calculate
+bb5d909 RocksDB: add block cache size option to RocksDB
+6b637df NVMEDevice: let perf counter do early
+91bf3bb NVMEDevice: make flush wait for all write completed
+b561d44 NVMEDevice: delay nvme aio thread start in case of logger is null
+4733510 NVMEDevice: let aio_write submit request FIFO
+23fd140 NVMEDevice: impl read_buffered method
+83c8261 NVMEDevice: adjust indent
+30342ab BlockDevice: Move KernelDevice reap ioc to BlockDevice
+b365d2c NVMEDevice: add log message to reminder potential failed reason
+705fd5b NVMEDevice: use stringify instead of std::to_string
+cbe42e1 NVMEDevice: disable flush command now
+8e258ff NVMEDevice: add perf counter
+263353b NVMEDevice: adjust request/task memory pool size
+997716c NVMEDevice: don't try to acquire lock always
+19a3c7e NVMEDevice: make aio thread pause when idle
+8740256 NVMEDevice: add retry support
+3776af0 NVMEDevice: add flush support
+8a59bd8 NVMEDevice: fix multi ops in one IOContext bug
+6081a58 NVMEDevice: Fix typo
+3bdd068 KernelDevice: Remove redundancy lines
+69e5a6d NVMEDevice: make controller global shared
+39c1318 NVMEDevice: reorder signal in case of signaled thread modify IOContext
+53c8b77 BlueFS: Use num_pending instead of pending_aios
+ac654f0 BlockDevice: initialize backend_priv
+a5748cb NVMEDevice: make read/write all async
+4689bd2 NVMEDevice: Fix static method
+2ad7c18 NVMEDevice: SPDK only permit submit/poll within one thread
+4e85f5e NVMEDevice: fix unknown serial number problem
+0525aae NVMEDevice: wrap C header file
+5aa1c86 Makefile: let spdk/dpdk libraries linked in upper layer
+f911e2b NVMEDevice: add spdk/pci.h
+bb1adcf NVMEDevice: nvme.h use _Static_assert which from c11
+b899452 NVMEDevice: let IOContext can use NVMEDevice pointer
+abb19d6 NVMEDevice: remove unused variables
+ba5973f configure.ac: fix spdk external header location
+fbdcb12 BlueFS: fix comparison warning
+153660c BlockDevice: Add override to explicit
+e64d16d BlockDevice: Fix compile error
+3a60406 NVMEDevice: fix compile error
+415111f NVMEDevice: use device name instead of path
+ee4951d makefile: add SPDK detect and NVMEDevice to Makefile
+b771c02 BlueStore: Add BlockDevice ability to create different backend
+1d9448e BlueStore: abostract BlockDevice based KernelDevice and NVMEDevice
+3557615 bluestore: rename BlockDevice to KernelDevice matching NVMEDevice
+e76f716 bluestore: add NVMEDevice backend
+2e2eb4c AsyncConnection: let ack reply use MSG_MORE flag
+6055d96 AsyncConnection: add a debug option to test noninline way
+86883b2 AsyncMessenger: add perf counter for inline message sent
+f6b87d3 AsyncConnection: use move instead of bufferlist copy construct
+e9872ad civetweb: update submodule to silence a compile warning
+493619a common,osd: remove _process(T *t) to silence warnings
+127e7c0 mon: monmap created is 0.000000 use ceph-deploy installed
+3e469f6 rgw: fix the condition of authenticated user's permit check
+a37b98a Event: fix array boundary
+b8bd9cb cmake: fix "jobserver unavailable" warning
+a17ea39 OSD:memory leak in ReplicatedPG.cc
+2292dd9 Event: don't wakeup if caller is thread self
+f8adf40 cmake: disable unused-variable warning for rocksdb
+2699c47 rgw: fix the signed/unsigned comparison warning
+d50bda5 rados: bench: fix the signed/unsigned comparison warning
+dc289fe AsyncConnection: avoid dispatch event to a closed connection
+4633e8d cmake: remove duplicated HAVE_EXECINFO_H macro
+88a95b7 os/*stores: fix hidden warnings
+72d1984 os/bluestore: fix unsigned/signed comparision warning
+96cfa48 ceph_test_rados_api_*: use unique ec profiles for each pool
+9da2fff AsyncConnection: add log for reap_dead and clean_handler
+cdf3c90 os/memstore: return empty list if offset overflows
+7d232d1 os/memstore: fix wrong blocksize for statfs
+579fbc8 add some cppcheck-suppress noExplicitConstructor comments
+bbf0582 make ctors with one argument explicit
+65ed192 rgw/rgw_rest.cc: fix -Wsign-compare
+b811760 test/librados/list.cc: fix -Wsign-compare
+4b550e0 tools/rbd/action/Nbd.cc: prefer ++operator for non-primitive iterators
+15e4ef9 tools/rbd/action/Journal.cc: prefer ++operator for non-primitive iterators
+adfe0f3 osd/osd_types.cc: prefer ++operator for non-primitive iterators
+e91f072 os/bluestore/BlueStore.cc: prefer ++operator for non-primitive iterators
+65f694f librbd/internal.cc: prefer ++operator for non-primitive iterators
+7981f9b librbd/LibrbdAdminSocketHook.cc: prefer ++operator for non-primitive iterators
+bc8bd27 journal/JournalMetadata.cc: prefer ++operator for non-primitive iterators
+a442f5a test/osd/RadosModel.h: fix class member hiding member from base
+10f5a46 rgw/rgw_rados.h: init member in ctor list
+9e4722b rgw/rgw_orphan.h: init member in ctor init list instead of body
+41e95e7 rgw/rgw_common.h: init members in ctor init list instead of body
+5230806 common/RWLock.h: init member in ctor init list
+2738368 src/ceph_fuse.cc: init members in ctor
+0df5a00 test/librbd/fsx.cc: clarify calculation precedence for '&' and '?'
+5128ac2 MergeDiff.cc: close 'sd' correctly at the end of do_merge_diff()
+dec345b test/encoding/ceph_dencoder.cc: fix null pointer deref
+cbbe79a src/mds/MDSRank.cc: remove delete calls from handle_asok_command()
+bffcb2c rgw/rgw_rados.cc: close resource leak in error case
+b43497c auth/Crypto.cc: close theoretical memory leak
+e0a6653 os/bluestore/BlueStore.cc: fix UNINIT_CTOR in BlueStore
+0695629 mon/OSDMonitor.h: fix UNINIT_CTOR
+94cd338 bluestore/bluestore_types.h: fix UNINIT_CTOR warning
+6fac5a9 msg/simple/Pipe.cc: silence cppcheck warning about reassign before use
+21ac978 mon/PGMonitor.cc: return error instead of '0' from dump_stuck_pg_stats()
+dddc21b mon/PGMonitor.cc: assign result directly
+91279df test_crc32c.cc: close memory leak, free 'b' after malloc
+3c89932 global_init.cc: add tag for cppcheck to suppress intentional memleak
+8d131ba tools/cephfs/DataScan.cc: fix expression
+922d0b2 osd/OSD.cc: fix UNINTENDED_INTEGER_DIVISION
+b838300 tools/rados/rados.cc: fix UNINTENDED_INTEGER_DIVISION
+e89cc6a ECBackend.cc: fix dynamic_cast error handling
+e794126 man/8/ceph.rst: fix man page --admin-daemon
+cc83e80 crush/CrushTester.cc: remove unused variable
+e892f9a rbd-replay-prep.cc: replace inefficient string::find() w/ compare()
+5d14351 crush/CrushTester.cc: remove unused variable
+0cb4457 mon/Monitor.cc: replace inefficient usage of string::find() with compare()
+1f262a1 mds/Server.cc: use string::compare() instead of ::find()
+fd8f18a ceph_mon.cc: replace string::find() with compare()
+09ca147 tools/rados/rados.cc: fix race condition in load-gen complete callback
+136433d rgw: support admin credentials in S3-related Keystone authentication.
+882672d cmake: Add Graylog logging backend build support
+3968852 graylog: Fix ptr to bool conversion
+c4892d9 Graylog: Integrate with Log{Client,LogMonitor}
+f88fcf0 debian: Add build dep 'libboost-iostreams-dev'
+92bb542 graylog: Move graylog logging to Ceph::Log::Graylog class
+a483168 Graylog2 logging
+4ca6bfd ceph_osd.cc: fix unreachable flush call
+6d860ba cmake: musl libc does not implement backtrace. Added feature check and fallback to libexecinfo.
+152f4eb doc: Updated the RPM section for RHEL and CentOS
+d3fd52a BlueStore: fix null pointer access
+2d602dd qa/workunits/objectstore/test_fuse.sh: no bashism
+8102d23 include/str_map.h removed unnecessary function overloading of get_str_map()
+da614b8 BlueStore: fix typo
+fa7470a rbd_fuse:replaced pthread_mutex*() functions with ceph Mutex wrapper
+a19aa22 kstore: flush before we really start a clone
+fb307c2 kstore: flush before we really start a truncate
+b0bed15 BlueStore: add fast check against empty list
+06ad784 kstore: add fast check against empty list
+ddca0dc tools/rados: fix wrong op/object sizes in rand/seq bench
+4ad0858 kstore: simplify open_collection logic a bit
+21b6fe6 kstore: fix unmatched type of decode
+b99b61e ceph_osd.cc/ceph_mon.cc: cleanup unreachable exit call
+994ac29 qa: Add test for #13829
+8b777a0 common: Allow config set with negative value
 b0c2bcf os/bluestore: fix bluestore_wal_transaction_t encoding test
+c8c5d0a mon:some cleanup in MonmapMonitor.h
+698e4a1 mon:some cleanup in MonmapMonitor.cc
+96563c1 librbd: use task finisher per CephContext
+0a3822f librados_test_stub: watch/notify now behaves similar to librados
+6f2de31 FuseStore: fix mountpoint leak
+6eb4aa4 FuseStore: return error if encounter collection_list error
+87b7e53 FuseStore: fix unhandled error cases for open
+97639f6 FuseStore: fix memory leak
+5e564ea tests: simulate writeback flush during snap create
+3ba54cc test/commom/test_weighted_priority_queue.cc: Add unit tests for the new Weighted Priority Queue.
+2061496 test/common/test_prioritized_queue.cc: Fix random shuffle and remove unneeded include.
+795fd8f osd: Add runtime config option to select which queue to use and the priority of the cutoff between the strict queue and the normal queue.
+57db464 rgw: cosmetic improvements in dump_content_length().
+5b8c044 rados: make rados bench metadata backward compatible
+b65d9c5 systemd: Add systemd sandboxing to services.
+157bf3e ceph-objectstore-tool: disable fuse support if !HAVE_LIBFUSE
+b2e5c55 osd: disable FuseStore if !HAVE_LIBFUSE
+563206c make: disable FuseStore if !WITH_FUSE
+63d3d2b cmake: link against libfuse if HAVE_LIBFUSE
+fd8ab0b cmake: remove the copyright of FindSnappy.cmake
+93d633a mon: compact full epochs also
+1e4dfae Script and Guidelines for mirroring Ceph
+38e2f7a rbd: simplify snap rename arguments
+18fe93c osd: Pass coll_t by reference
+5e4eb3f OSD: Deleting transaction object right after applying transaction
+464a0ad gitignore: ignore backup files from editors etc.
+e942051 remove unused source file
+bd2fdf8 OSD::consume_map: correctly remove pg shards which are no longer acting
+ab4b373 journal: flush commit position on metadata shutdown
+17901f7 osd: use collection handle where possible
+6560b14 osd: associate a CollectionHandle& with PGBackend
+f3de2b8 osd: associate a CollectionHandle with each PG
+8e172a9 os/memstore: implement handle-based read methods
+34fe2e8 os/memstore: implement open_collection
+765eb9f os/bluestore: implement CollectionHandle based read methods
+e81e3c0 os/bluestore: implement open_collection
+414d09b os/ObjectStore: add CollectionHandle wrappers for all read ops
+b6df3e1 os/ObjectStore: add CollectionHandle
+7d3579d Revert "kstore: fix dest onode process logic of clone_range"
+dc3faed Revert "kstore: fix dest onode process logic of clone"
+493afb4 Revert "kstore: fix nid overwritten of write/zero"
+d7e21cb Revert "BlueStore: fix nid overwritten of write/zero"
+fbd5cd4 Revert "BlueStore: fix dest onode process logic of clone"
+d1d88ba Revert "BlueStore: fix dest onode process logic of clone_range"
+2c353a8 librbd: block maintenance until after journal is ready
+4db22fd os/bluestore/BlueStore: use std::atomic
+bd694d4 os/bluestore/BlueFS: use std::atomic
+0613765 os/FuseStore: improve readdir for omap and attr
+a47f8ff os/FuseStore: wait for commit and flush in all updates
+f84384a os/FuseStore: use offset for readdir
+c3954e6 qa/workunits/objectstore/test_fuse.sh: simple workunit test
+3e36065 os/FuseStore: add 'type' file in root dir
+0638b11 os/ObjectStore: add get_type()
+e1a371d ceph-objectstore-tool: drop filestore-specific sanity check
+c4f5a08 ceph-objectstore-tool: add --op mkfs
+0ed0ec5 ceph-objectstore-tool: simplify error message
+614f165 ceph-objectstore-tool: 'whoami' is not required for an ObjectStore
+b2a1c86 os/FuseStore: do not call exists() on an out-of-bounds oid
+ca4f077 os/FuseStore: no-op chmod
+53a3336 os/FuseStore: allow object and collection creates
+57289de os/FuseStore: more ENOENT on getattr for non-existent items
+2aa6532 os/FuseStore: hide bitwise_hash_end if bits unknown; add bitwise_hash_bits
+27bae89 os/FuseStore: present bitwise_hash_end
+27dc220 os/bluestore: implement collection_bits
+17d3c9f os/ObjectStore: add collection_bits
+f8de017 os/FuseStore: respect fuse_debug
+aa8f2c9 ceph-objectstore-tool: no need to check for fsck here
+0e371d2 ceph-objectstore-tool: use myexit(1) for fsck
+d01967d osd: osd_objectstore_fuse
+96643e9 ceph-objectstore-tool: mount an ObjectStore via fuse
+f5d106f vstart.sh: put the OSD journal in the normal location
+efa88a4 [g]hobject_t: normalize operator<< result, and implement parse
+df53b16 os/bluestore/BlueStore: use std::mutex et al
+b62318e os/bluestore/BlueFS: use std::mutex et al
+8e5fbea os/bluestore/StupidAllocator: use std::mutex
+e9721b8 os/bluestore: fix FS creation by fs
+20c80b9 tools/rbd: fix error message typo
+130e8c6 Add UT for interval_set implementations
+5e2fc70 msg/async: bunch of fixes
+84175ba rgw: fix dangeours usage of strtok in RGWAccessControlPolicy_SWIFT.
+f176664 os/FileStore:clean up error message
+12649ef test: add unitest test_pidfile.sh
+4da5052 Mon: When set pool crush rule should consider the pool size.
+39e7fcd rbd: Add an argument-check for nbds_max in rbd-nbd
+a45f484 Build: Allow jemalloc enabled build with rocksdb-static
+d1160a0 time: Fix encode/decode for real_time values
+0f6bfbe test: add ceph-dencoder test for ceph::real_time
+5efc5dc cmake: check for libsnappy in default path also
+db3dc4d osd: build_past_intervals_parallel() add diagnostics before assert
+a95a3d3 upstart/ceph-osd.conf: pass TCMALLOC_MAX_TOTAL_THREAD_CACHE_BYTES through
+b77cd31 mon: add mon_config_key prefix when sync full
+f2c0ef4 global/pidfile: do not start two daemons with a single pid-file
+845f3f7 mds:judgment added to avoid the risk of visting the NULL pointer
+508013d mon: drop useless rank init assignment
+54aef92 Crush: add some safe judgment
+45f338d mon: fix locking in preinit error paths
+3cf8465 bluefs_types: fix test instance of fs transaction
+07af5f6 bluefs_types: fix device index overflow
+8395059 time: Make ceph_time clocks work under BSD
+e71057d os/fs: fix io_getevents argument
+68a571b doc: small edits according to review
+2f3774b doc: remove old install-ceph-gateway_old.rst file
+1037d2e doc: Add Ubuntu-specific steps to install RGW
+f66f9d7 doc: Port changes from downstream (install-ceph-gateway)
+46484e9 doc: Adding headers to RGW quick.
+d7eae8c doc: Edited RGW quick start according to peer review.
+3ce648d rados: bench: add --max-objects
+c2b5a68 BlueFS: fix space leak during write
+5088dfd BlueFS: flush_wait log before update super
+aba6746 PG::activate(): handle unexpected cached_removed_snaps more gracefully
 68c7877 objecter: avoid recursive lock of Objecter::rwlock
+4d30e7c os/bluestore: fix bluestore_wal_transaction_t encoding test
+3d87312 mon/PGMonitor.cc:warning if pg not scrubbed
+ab01c0e Removing duplicate definitions for rados pool_stat_t and cluster_stat_t
+da6d696 osd: delete useless bytes_written defined in RepModify
+2aa0f31 rbd: remove canceled tasks from timer thread
+5003770 doc: Fixes a CRUSH map step take argument
+7f44acf os: add custom move ctor/assignment for Transaction
+fd33668 os: add custom move ctor/assignment for TransactionData
+496605c os: Transaction sets default values to avoid duplication in ctors
+78cfe67 os: move Transaction ctors closer to member variables
+9e37a87 test: add unit test for Transaction move/copy
+c00945e cleanup: remove obsolete option "filestore_xattr_use_omap"
+fe4e180 rbd-replay: async open/close calls support
+19c39fa KeyValueStore: Kill this
+4524708 test/cls :clean useless pools
+82f28a8 rgw: fix up the use of tenant before it's available
+056a10c rgw: add a configurable to enable/disable static website
+f55fe9f rgw: fix null dereference
+d71b700 rgw: don't abort on op_ret < 0
+269c61a configure.ac: make "--with-librocksdb-static" default to 'check'
+f27e9c4 rgw: the big op_ret change
+458d8e5 librbd: API: async open and close
+3ebb584 librbd: fixed "hidden" virtual methods
+674fe18 cmake: let ceph-client-debug link with tcmalloc
+c74eeb5 test/objectstore: fix unused variable warnings
+b86c40b include/[btree_]interval_set: drop useless assert
+9cafe60 include/[btree_]interval_set: use std::swap
+7ccd1d2 os/bluestore/FreelistManager: use btree_map::erase return value
+c59b84c rbd-replay: handle EOF gracefully
+e430f8e mds: fix locking around handle_conf_change
+448bea4 client: don't hold client_lock around objecter init
+94d1aa2 client: take lock in handle_conf_change
+f4b0b2f cmake: fix libcephfs linking
+26d1032 mds: Fix lock ordering around asok registration
+fb6dea3 vstart: enable lockdep
+5793df4 doc: vstart update a few commandline switches
+4400b81 vstart.sh: If use '-n' delete related source rather than consider '-k'.
+4d00a14 SubmittingPatches: rebase to resolve conflicts
+a2c1b53 SubmittingPatches: sending kernel patches to mailing list
+18409a8 SubmittingPatches: add doc for commit title
+dbabbd0 SubmittingPatches: add doc for "Fixes:" tag
+2b5588a SubmittingPatches: update with reST syntax
+03e01ae init-ceph: pass TCMALLOC_MAX_TOTAL_THREAD_CACHE_BYTES through
+3772006 osd/: make heap properties settable via admin socket
+99b054f osd/: make heap properties gettable from admin_socket
+e1da085 test: race condition in ReplayOnDiskPostFlushError test case
+1442621 journal: avoid race between in-flight notifications and flush
+bbc06e7 rgw: fix subuser rm command failure
+eeae702 rgw: fix create bucket error handling
 9183d43 cmake: test_build_libcephfs needs ${ALLOC_LIBS}
+f7d59c3 test: new mock test cases for librbd image refresh state machine
+f425ed9 librbd: close object map when dynamically disabled
+5a42183 test: correct v1 image format mock expectations
+d2c145e doc :- fixing image in section ERASURE CODING
+946af50 librbd: open object map before journal
+5b54ed3 librbd: correct handling of enabling/disabling dynamic features
+6c7ab5f rgw: client io shouldn't try to write zero length buffer
+bb9343e include/interval_set: clean up std usage
+432fa9d os/bluestore/StupidAllocator: use btree_map<> instead of map<>
+d06d645 os/bluestore/FreelistManager: use btree_map instead of map
+2439bb6 ceph_test_rados: clean up std usage
+23257c3 include: add btree_interval_map
+87dbf0b include/cpp-btree: add google's cpp-btree library
 d74a7c1 client: properly trim unlinked inode
+9588efe mds: Fix lock ordering around asok registration
+cbffc3a vstart: enable lockdep
+b041b53 BlueStore/StupidAllocator: fix assert
 f526716 doc: fix "mon osd down out subtree limit" option name
 137d839 pybind/ceph_argparse: update test for "ceph mds setmap"
+0cbc76b BlueStore: fix rename logic
+7971552 kstore: fix rename logic
+a9cd257 doc: rename SubmittingPatches to SubmittingPatches.rst
 8af0758 qa/workunits/cephtool/test.sh: update test for "ceph mds setmap"
 02d15bb mon/MDSMonitor: Add confirmation to "ceph mds rmfailed"
+0880e5c rgw: a few minor cleanups
+11913b9 WIP x-amz-website-redirect-location.
+9bdbb1b S3Website: AmazonS3 changed RedirectAll HTTP response.
+195ceed rgw: init_permissions() shouldn't return ENOENT in a specific case
+4ed95cc test: use RBD v1 format when features not specified
+5e8325c librbd: hide object map lock/unlock behind open/close
+2c1576b ReplicatedPG::process_copy_chunk: fix invalid list traversal for proxy_read
+97ed425 ReplicatedPG::process_copy_chunk: fix invalid list traversal for proxy_write
+1178271 rgw: remove split formatter code
 a51fbf2 rgw: Parse --subuser better
 e153623 rgw: Drop a debugging message
+f5da159 makefile: remove dependency on libedit
+a9addc6 mon: Go into ERR state if multiple PGs are stuck inactive
+eb17b8f BlueStore: fix typo
+696584c BlueStore: fix dest onode process logic of clone_range
+1c70f86 BlueStore: fix dest onode process logic of clone
+fde0f02 BlueStore: fix nid overwritten of write/zero
+1fc9edf kstore: fix typo
+6375870 kstore: fix fsid validation logic during mkfs
+9ab7998 kstore: fix dest onode process logic of clone_range
+7ce6d06 kstore: fix dest onode process logic of clone
+8065b8a kstore: fix nid overwritten of write/zero
+b3f7d02 osd/OSD: Simplifying 'r' usage
+6a23b23 os/filestore/FileJournal: Removing dead code
+d03146d os/bluestore/BlueStore: Removing dead code
+fcac028 os/bluestore/BlueStore: Simplifying default return value
+1f21322 mon/ConfigKeyService: Removing dead code
+39a5d97 mon/MDSMonitor: Removing dead code
+858f606 mon/PGMonitor: Removing dead code
+dece3be Add error check for object_map->sync()
+3995b1c mon/MonClient: avoid null pointer error when configured incorrectly
 bd56e17 OSDService: Fix typo in osdmap comment
+326eb4c tests: fix pybind path for RBD unit tests
 1aa674c librbd: missing lock on state transition
 1215a1a helgrind: annotate false-positive race conditions
 bb4d7a7 qa: disable rbd/qemu-iotests test case 055 on RHEL/CentOS
 4c9345b cmake: add missing check for HAVE_EXECINFO_H
+887fbe1 Fixes #14400 + some refactoring to avoid input buffer modification in future.
+ccd65d2 os/memstore: no magic len=0 to fiemap
+3c4eddb os/kstore: no magic len=0 to fiemap
+d794be4 os/bluestore: no magic len=0 to fiemap
+f9019a4 ceph_test_objectstore: do not pass len=0 to fiemap
 aed8577 osd: disable filestore_xfs_extsize by default
 3286106 os/kstore: fix u64 decode checks
 1590b08 os/bluestore: fix _key_decode_u64 error checks
 92a5d1d os/bluestore/FreelistManager: switch to std::mutex
+507774a common/buffer: replace RWLock with spinlocks
+b4bd72f BlueStore: drain wal_wq first if we exit from mounting due to wal_replay error
+aa28b58 vstart: update help with a few more switches
+5183ff3 BlueStore: verify fsid in a more proper way during mkfs
+29a7d68 tests: update pool alignment API
+f021f5a tools: fix pool alignment API overflow issue
+8aa068d librados: fix rados_ioctx_pool_requires_alignment2()
+5ff452b BlueStore: negative return code if we unable to open path
 fc3554e mon/MDSMonitor: Add confirmation to "ceph mds setmap"
 b4023a4 mon/MDSMonitor: handle invalid input for "ceph mds setmap"
+ce85582 configure: detect new gperftool
+3b5e940 os/bluestore: fix signed/unsigned comparison warnings
+d54a747 kv/RocksDBStore: replace deprecated calls
+1e426be fsx: fix signed/unsigned comparison warnings
+055fcaa erasure-code: create an internal isa lib for testing
+865ee53 unittest_ipaddr: fix compile warnings
+a671448 BlueStore: improve wal_replay logic a little bit
+167bea5 kstore: add metadata "type" to disk
+dea2d92 BlueStore: fix fd leak during error fsck scan
+b04f1e2 BlueStore: update obsolete content of comment
+475d97f BlueStore: remove reap_cond
+cb56320 kstore: remove reap_cond
+e773794 MemStore: remove apply_lock
+8b30f9d MemStore: drain finisher first during umount
+9cacdf1 AsyncConnection: Dont send message when local connection is closed
+0771efa AsyncConnection: Fix leak event
+a9c5f99 organizationmap: modify org mail info.
+e7631ef common: change the type of counter total/unhealthy_workers
+1668796 s3website: update cmake.
+44c5d0c Copyright: add copyright blocks to new files, and update files with existing ones for website changes.
+981e108 cleanup: remove dead function.
+e5607c0 undo: clean up dead enum RGWEndpointType.
+d74d964 Undo constant renaming: RGW_REST_*->RGW_PROTO_*.
+c145650 Revert "common/*Formatters: Split Formatters"
+4a2351e doc: amend the rados.8
 7bcc960 librbd: handle additional journal replay states
 41987c8 librbd: avoid recursive callback when replaying journal
 d093d83 tests: correct googlemock warnings
@@ -51,11 +634,16 @@ a1f7006 librbd: convert journal classes to templates for unit testing
 eb057e1 tests: compile RBD and RGW types into ceph-dencoder when enabled
 588ccfb librbd: move journal support classes to journal namespace
 10335c5 cmake: fix the build on trusty
+275aaf4 Fixing both the subissues from #14383 by using intrusive_ptr instead of Dir* in BlueFS::dir_map
+0a4d1a4 mon: add an independent option for max election time
 838b16a ceph_test_libcephfs: tolerate duplicated entries in readdir
+70cb0f1 doc: remove redundant space in monmaptool doc
+24d10e6 doc: remove redundant space in ceph-authtool doc
 cbaee84 osd:osdmap some clean up
 19dc272 mds: add config option to suspend logging
 5d8d666 mds: fix completed cap flush handling
 dafb46b mds: delay handling client caps until corresponding inode is created
+0187a59 cls/cls_rbd: pass string by reference
 300c2f7 Compressor: compressor plugins unit tests
 2cadc41 Compressor: compressor code extention: plugin system added
 7d52372 fsx: checkout old version until it compiles properly on miras
@@ -149,6 +737,7 @@ b430484 mailmap: Florent Manens affiliation
 5265302 mailmap: Evgeniy Firsov affiliation
 ebdb0a9 packaging: add build dependency on python devel package
 e3d45f0 os/bluestore: don't include when building without libaio
+0b369e1 rados: allow both object size and op size to be specified
 e59293d librbd: add additional granularity to lock states
 94b7d23 librbd: initialize object map before replaying journal
 4fa53ea librbd: do not accept RPC requests while transitioning lock state
@@ -180,12 +769,14 @@ b84cf3e test_libcephfs: add POSIX ACL tests
 5064671 client: POSIX ACL support
 e063edd CrushTool: fix typo and improve indention
 db2e21c CrushTool: exit if fail to add a bucket
+c69a2aa os/FileStore: print error to log on eio
 d94cf5b os/memstore: fix fiemap
 fa1c64a os/kstore: fix split_collection to persist cnode
 00ec724 os/bluestore: fix split_collection to persist cnode
 092c5ea os/bluestore: simplify rebalance_freespace
 cec5069 os/bluestore/BlueFS: add reclaim_blocks interface
 d50ddba os/bluestore: base initial bluefs allocation on min_alloc_ratio
+474c989 rados: fix bench output column separation
 95e885e Reduce string use in coll_t::calc_str()
 8d97df0 mailmap: Dongmao Zhang affiliation
 2c914e2 mailmap: sort files
@@ -310,6 +901,7 @@ bf932d3 include: define a new REJECT session message
 a4db99e client: report mount root in session metadata
 4932cb9 mds: validate client 'root' metadata field
 a327dd8 mds: add MDSAuthCaps.maybe_capable(path)
+4f4dc88 fix rescuing while activating
 f11ad56 mon/OSDMonitor: osdmap laggy set a maximum limit for interval
 d225522 os: fix overflow of collection_empty method
 c07dcd5 os: put a guard when read the entire object
@@ -455,7 +1047,7 @@ a684a59 os/bluestore/StupidAllocator: bluestore_debug_small_allocations
 4aa5f02 os/bluestore/BlueStore: fix _zero when previous extent partially unwritten
 b758d9f os/bluestore: support copy-on-write clones
 8ebb390 os/bluestore/BlueStore: only allow clone if hash matches
-64b4e2f os/bluestore: Enode infrastructure
+64b4e2f6 os/bluestore: Enode infrastructure
 fd75b4e os/bluestore/bluestore_types: add extent FLAG_COW_{HEAD,TAIL}
 e93e926 unittest_bluefs, unittest_bluestore_types
 45801ed os/bluestore/bluestore_types: add contains(), clear(), empty() to extent_ref_map
@@ -582,6 +1174,7 @@ be0528f os/newstore: recycle rocksdb log files
 feb2d3f rocksdb: latest master
 c25ff99 osd: clear pg_stat_queue after stopping pgs
 c01a314 mon/PGMap: show rd/wr iops separately in status reports
+7404f85 Remove unused SnapContext member snapc from MOSDSubOp message.
 6a59aae config: complains when a setting is not tracked
 2fd3f43 osd: remove repop_map in osd
 e29f55e osd: fix wip (l_osd_op_wip) perf counter
@@ -629,8 +1222,13 @@ fff4941 mds: we should wait messenger when MDSDaemon suicide
 6a560b5 async: don't use shared_ptr to manage EventCallback
 80c770e ceph-detect-init: fix py3 test
 54b12f5 rbd:must specify both of stripe-unit and stripe-count when specify striping features
+29f9d10 doc: add missing semicolon
+be55a27 doc: s/Bucket/BucketName
+dd065d8 doc: update SetACL API
+d46297e doc: s/ListBucketResponse/ListBucketsResponse
 d821aca doc: document "readforward" and "readproxy" cache mode
 777564c doc:adding "--allow-shrink" in decreasing the size of the rbd block to distinguish from the increasing option
+fd6fca7 doc: `Amazon.AWSClientFactory' is obsolete
 c2e3913 man: document listwatchers cmd in "rados" manpage
 877f332 ceph doc fix slip of pen
 eb8057c ceph-fuse: fix double free of args
@@ -654,6 +1252,7 @@ fd7fe8c tests: ceph-disk workunit increase verbosity
 b271a06 ceph-disk: fix typo
 f5d36b9 ceph-disk: log parted output
 5fa35ba ceph-disk: do not discard stderr
+0b951fd Doc: fix misleading configuration guide on cache tiering
 845f4a1 cache-tier: Allow to config eviction check max size
 cd2947d Dispatcher.h: include assert.h
 c2a2499 xio: avoid conversion warning w/xio_queue_depth
@@ -669,8 +1268,10 @@ a752649 librbd: fix wrong tip message
 c97b1a6 librbd: fix snap_exists API overflow issue
 0c31a6e librbd: fix readahead counter update logic
 70b0962 osd: use atomic to generate ceph_tid
+253d45b common/OpQueue: Add base class for Op Queues. Introduce the new Weighted Priority Queue and update Prioritized Queue to inherit the base class.
 96563fb msg/Message.h:remove unneeded inline
 6d9da6a librbd: uninitialized state in snap remove state machine
+d790a60 common/PrioritizedQueue.h: Style clean up and remove uneeded includes.
 ecf2beb time: Update OSDC for C++11 Time
 ac96425 osdc: Whitespace
 4367eb1 perfcounters: Allow C++11 time increment/decrement
@@ -713,6 +1314,7 @@ fbd5959 doc/release-notes: v10.0.1
 4238974 mon: add `osd blacklist clear`
 4276fb3 Delete rbd'order define
 16d5045 Delete rbd'order setup process
+51e956b fix OSD utilization is abnormal after data disk lost
 ec42514 osd: use unordered_map for repop_map With a prediction of expected per pg maximum ops to initialize the number of hash bucket
 d13f602 rbd:add destination image name validation for rbd-fuse mv operation
 8284165 rbd: process crushed, rbd creating with striping parameters
@@ -725,6 +1327,7 @@ d465830 mds: fix setvxattr (broken in a536d114)
 a8b5920 makefiles: remove bz2-dev from dependencies
 9409e96 ceph-fuse: add process to ceph-fuse --help
 228aedf rbd: implement mv operation for rbd-fuse
+f4a077c doc: Use 'ceph auth get-or-create' for creating RGW keyring
 eadc771 doc: Update list of admin/build-doc dependencies
 b1429aa mds: tear down connections from `tell` commands
 8d68d02 test/encoding/readable.sh: add non-whole type skip
@@ -786,6 +1389,7 @@ df0c1f5 journal: add iohint flags for journal replayer.
 5440334 doc: dev: introduction to tests
 74c249d common: improve cache efficiency if (weak_refs.count(key))   weak_refs[key] it traverses the b-tree twice replace it with weak_refs.find(key), which traverses b-tree once
 c3a383c common: improve cache, replace to unordered map with initializing hash bucket size Based on the PR https://github.com/ceph/ceph/pull/4441
+ef8fa8b pybind: Implementation of Ioctx.set_read to allow read from snapshots
 09e21b4 init-ceph: do umount when the path exists.
 14e9d29 mount/mtab.cc: memory leaks the free() should be called to free resources, in order to avoid memory leaks
 58bad43 client: modify a word in log
@@ -827,15 +1431,16 @@ ad47b41 CodingStyle: Fix coding-style on PG related Monitor files
 7f5ee33 librbd: fix test case race condition for journaling ops
 9162bd2 Revert "LifeCycle feature"
 82c40b6 signal_handler: added support for using reentrant strsignal() implementations vs. sys_siglist[]
+ae114ae cmake: support ccache via a WITH_CCACHE build option
 99bcc7c tests: --osd-scrub-load-threshold=2000 for more consistency
-26008e8 (origin/wip/debian-infernalis-patches) ceph-detect-init: Ubuntu >= 15.04 uses systemd
+26008e8 ceph-detect-init: Ubuntu >= 15.04 uses systemd
 93cdc98 pybind/rbd.pyx: misc typo bugfixes
 8fabcbb ceph-fuse:print usage information when no parameter specified
 469b55a pybind/rbd.pyx: PyString -> PyBytes
 50022d3 os: skip checking pg_meta object existance in FileStore pg_meta object in FileStore is actually a logical object without any significant information. All it data writes to omap (leveldb), and actually no competition condition for the real object in FileStore Based on the optimazation, we further reduce _omap_setkeys() execution time (123.784us to 108.444us, about 15%), and save cpu usage 0.5% globally
 1df9705 test/librbd/fsx: musl libc doesn't implement random_r. Use c++11 std::mt19937 generator instead.
 6f1fb16 doc: osd: s/schedued/scheduled/
-7d93cf4 (upstream/wip-buffer-header) buffer: make usable outside of ceph source again
+7d93cf4 (origin/wip-buffer-header) buffer: make usable outside of ceph source again
 93d3dfe Fixing NULL pointer dereference
 72785ee rgw: enforce SLO part's ETag match during GET on SLO of Swift.
 1cf149d rgw: append X-Static-Large-Object attribute during PUT on SLO.
@@ -897,7 +1502,7 @@ c83d6db cmake: update for recent rbd changes
 25c70cf Update Jiaying Ren affinity
 c11ca42 Update Rongze Zhu affinity
 593c124 doc: rst style fix for pools document
-e62954e (upstream/wip-fix-buffer) deb,rpm: package buffer_fwd.h
+e62954e (origin/wip-fix-buffer) deb,rpm: package buffer_fwd.h
 b23b92d AsyncConnection: Fix potential return code overflow
 3443b6d Pipe: Fix potential return code overflow
 1adf306 SubmittingPatches: there is no next; only jewel
@@ -959,7 +1564,7 @@ fb120d7 osd: call on_new_interval on newly split child PG
 60519e2 librbd: correct lock ordering issues discovered by lockdep
 1997144 doc: remove unnecessary period in headline
 2a0263f journal: correct lock ordering issues discovered by lockdep
-4230504 (upstream/wip-5073) rgw: remove comments
+4230504 (origin/wip-5073) rgw: remove comments
 eda44cd rgw: a minor cleanup
 bf0a7b4 rgw: don't re-set bucket tenant and name when selecting location
 ddb4caa rgw: fix a typo
@@ -968,7 +1573,7 @@ e5bfd94 rgw: avoid calling rgw_make_bucket_entry_name() when not needed
 9425b04 rgw: objexp hint name backward compatibility
 e8de349 rgw: inherit bucket tenant from user if not specified
 acda806 rgw: add a missing cap type
-bc091ed (upstream/wip-cython-rbd) pybind/rbd.pyx: only set self.closed after a successful close
+bc091ed (origin/wip-cython-rbd) pybind/rbd.pyx: only set self.closed after a successful close
 d10c61a pybind/rbd.pyx: remove redundant RBD.__init__ method
 8931875 ceph.spec: Cython is spelled python-Cython in OpenSuSE
 07ae545 pybind: Improvements to Cython build
@@ -1049,6 +1654,7 @@ e7a1506 tests: Add tests for user io event notify
 c3a1edb librbd: Add event notify interfaces
 169cd4d osd: dump number of missing objects for each peer with pg query
 9b0e359 librbd: automatically flush IO after blocking write operations
+9ced26c rgw/s3website: errordoc conditional handling
 3c6e692 include/rados/librados.h: fix typo
 6f6fd2f librbd: utilize common flush helper when closing parent images
 39c5b70 librbd: only enqueue flush completion if a flush is pending
@@ -1099,6 +1705,7 @@ f5e0cce osd: don't update rollback_info for replicated pool rollback_info is jus
 2b390fc osd: don't update unneccessary epoch for pg epoch always remains unless state of cluster changes. Therefore, avoid update epoch for every Op in order to same cpu cost and disk bandwidth.
 75f1412 DiskUsage: close formatter session on error exit
 573151f doc: Fixes a spelling error
+2f330f6 src/*/Makefile.am: test fixup for as-needed compiling.
 b96c7e6 aix shared library build
 3680dc3 mon/OSDMonitor: block 'ceph osd pg-temp ...' if update is pending
 7a8fd0e tools: add cephfs-table-tool 'take_inos'
@@ -1115,6 +1722,7 @@ fdb3f66 crush: add chooseleaf_stable tunable
 f7ca00a rgw: make APIs to work with tenants
 1f19b60 rgw: buckets within tenant namespace
 788477a rgw: user has a tenant property
+61bf1bd rgw/Makefile.am: declare rgw_rest_s3website.h as well for bugfixing.
 13a12a5 rgw: add an inspection to the field of type when assigning user caps
 3369a83 librbd: simplify IO method signatures for 32bit environments
 ba3c64c Fix mon routed_request_tids leak
@@ -1122,6 +1730,7 @@ e242d84 mds: remove MDCache::cap_import_paths
 15b2aca tests: ceph-helpers assert success getting backfills
 3c87598 MOSDOp/MOSDOpReply: Move MOSDOp and MOSDOpReply newest version decoding to the front of decoding function.
 889158a WBThrottle: fix incorrect throttle
+b913cc2 rgw/Makefile.am: declare rgw_website.h for buildfix.
 922fea7 client: s/close_sessions/_close_sessions/
 a4924d4 ceph.spec.in: add BuildRequires: systemd
 07a7483 client: close mds sessions in shutdown()
@@ -1130,7 +1739,7 @@ a4924d4 ceph.spec.in: add BuildRequires: systemd
 917d85f osbench: Adds handling for the lack of required folders ( data & journal ) and adds checking for previous data presence to avoid assertion
 2902030 osbench: Fix race condition that may cause Sequencer::dtor assertion on benchmark completion
 daae180 Doubled marking from line 1151
-ada6e32 (upstream/bp-smaller-pglog-2) osd: slightly reduce actual size of pg_log_entry_t
+ada6e32 (origin/bp-smaller-pglog-2) osd: slightly reduce actual size of pg_log_entry_t
 d1c9bf6 journal: support replay passed skipped splay objects
 56100ef tests: verify that journal player can handle skipped journal objects
 1509ada mailmap: Jenkins affiliation
@@ -1223,6 +1832,7 @@ fda3f7e add rbd-nbd test case
 c44ab62 release-notes: draft v10.0.0 release notes
 9359847 librbd: commit journal op events immediately
 18713e6 mon/PGMonitor: MAX AVAIL is 0 if some OSDs' weight is 0
+5e6fcd4 rgw: compile fixup for JSONFormatter move
 1420a1f doc: add v0.80.11 to the release timeline
 9e9b03e doc/releases: add v0.80.11 to release table
 4b5afe5 doc/release-notes: final v0.80.11 notes
@@ -1370,7 +1980,7 @@ ef011da Update .organizationmap
 0fd8de3 msg/async: support of non-block connect in async messenger
 f7f55e3 scrub: compare omap_digest with each other
 785e58e scrub: clarify the result report
-a3aa565 (upstream/wip-11287-rebased) journal: avoid holding lock while marking ops are complete
+a3aa565 (origin/wip-11287-rebased) journal: avoid holding lock while marking ops are complete
 4719696 cmake: updates for refactored librbd IO path
 10deea8 librbd: flush journal entries prior to releasing lock
 b515314 librbd: only erase IO events after they are marked safe
@@ -1489,10 +2099,11 @@ f018928 revise organization
 d290b27 osd: trivial optimization
 d28698b osd: fix trivial bug
 1ace4d0 auth: keyring without mon entity type should return -EACCES             test:                          see test.sh:test_mon_caps                          before modify:                          when we first exec ../qa/workunits/cephtool/test.sh -t mon_caps --asok-does-not-need-root , it stuck.                          after modify:                          exec again, return Permission denied.
+ae79f1d osd: update recovery stats when the recovery completes
 f7f5a08 internal: remove unused local variables
 c8fe5ae librados: cast oid to object explicitly before call ioctx methods Cast oid to object explicitly before call ioctx methods. Signed-off-by: xie xingguo <xie.xingguo at zte.com.cn>
 e986ade IoCtxImpl: remove unused variable sName
-a5651b8 (upstream/revert-6419-wip-openssl) Revert 0374bb4a2f5054d606e4aba2d97b5e6765e781b0
+a5651b8 (origin/revert-6419-wip-openssl) Revert 0374bb4a2f5054d606e4aba2d97b5e6765e781b0
 7496741 rgw: fix modification to index attrs when setting acls
 9689fe0 kv: fix string ctor usage
 bfeb90e librbd: fixed deadlock while attempting to flush AIO requests
@@ -1600,7 +2211,7 @@ da6825d test/test_rados_tool.sh: Add tests for the new bench's write options
 9259e6e tools/rados/rados.cc: Add options to choose the benchmark's write destination
 7524e16 tools/rados/rados.cc: Write to different destinations
 00c6fa9 Objecter: pool_op callback may hang forever.
-400b0f4 (upstream/javacruft-wip-ec-modules, origin/wip/ec-modules) Build internal plugins and classes as modules
+400b0f4 (origin/javacruft-wip-ec-modules) Build internal plugins and classes as modules
 d457fc2 mds: apply validate_disk_state to dirs too
 6ba5bef mds: tidy up cdir scrub_initialize in scrubstack
 1930083 mds: write scrub tag during validation
@@ -1670,12 +2281,12 @@ b3f8d56 osd: reoder fields in ObjectRecoveryProgress struct
 bf3c30c osd: reorder and trim fields SnapSetContext
 e0fd540 rgw:swift use Civetweb ssl can not get right url
 b698a76 rgw: Fix typo in RGWHTTPClient::process error message
-173bfd0 (origin/wip-openssl) rgw: link against system openssl (instead of dlopen at runtime)
+173bfd0 rgw: link against system openssl (instead of dlopen at runtime)
 8160af6 tools: ceph-monstore-update-crush: add "--test" to crushtool
 83afe15 test: ceph-disk: coverage list_format_dev_plain() new behavior.
 6253aea FileJournal:_fdump wrongly returns if journal is currently unreadable.
 39fb7f1 messages/MOSDOp: Cast in assert to eliminate warnings
-3047b56 (origin/wip-12997) rgw: Add default quota config
+3047b56 rgw: Add default quota config
 570285b ceph-disk: get Nonetype when ceph-disk list with --format plain on single device.
 f22f4ac mailmap: Xie Xingguo affiliation
 93ec538 crush/mapper: ensure take bucket value is valid
@@ -1714,7 +2325,7 @@ f4906a1 tests: ceph-disk workunit uses configobj
 163de5b tests: ceph-disk workunit uses the ceph task
 c4fdbdd cmake: Use uname instead of arch. arch is deprecated in linux-utils and coreutils does not install it by default.
 03e556b doc: Removed the NOTE section about non-LTS supported distributions
-58414c5 (origin/wip-13559-infernalis) librbd: potential assertion failure during cache read
+58414c5 librbd: potential assertion failure during cache read
 011e9e5 tests: reproduce crash during read-induced CoW
 2a6b90f doc/release-notes.rst: recovery isn't in the unified queue yet
 9bf21ee doc: Updated the OS recommendations for newer Ceph releases
@@ -1763,7 +2374,7 @@ ffd4f2a mailmap: Daniel Gryniewicz affiliation
 7b2e9fc ceph.in: Remove unused variable
 113d727 ceph.in: Don't drop out of command mode on certain kinds of errors
 bb5bcab makefile: For ceph command generation don't append another copy of ceph.in
-d4869a6 (upstream/wip-13441) test: add test for pg list_missing on EC pool
+d4869a6 (origin/wip-13441) test: add test for pg list_missing on EC pool
 531dd77 osd: list_missing should query missing_loc.needs_recovery_map
 597c43e tracing: add tracepoints for cache pin/unpin
 4783899 osd: return ENOENT when object doesn't exist for cache pin/unpin
@@ -1845,7 +2456,7 @@ b15c541 libcephfs: only check file offset on glibc platforms
 e4b8600 rgw: Handle x-amz-request-payer in pre-signed urls
 f9c44ef osd: drop the interim set from load_pgs()
 1fb9fc9 librbd: fix rebuild_object_map() when no object map exists
-fb62c78 (origin/wip-13274-infernalis) ceph_context: remove unsafe cast for singletons
+fb62c78 ceph_context: remove unsafe cast for singletons
 24740a7 client: drop prefix from int types
 65d0fc4 doc: fix outdated content in cache tier
 477bb06 ceph.spec.in: only run systemd-tmpfiles on ceph run directory
@@ -1866,26 +2477,86 @@ e26469e mailmap: Alexander Chuzhoy affiliation
 fee7144 rgw: fix response of delete expired objects
 2cf8d20 update radosgw-admin command
 4a3f375 vstart: set cephfs root uid/gid to caller
-7060a3b (upstream/loic-infernalis, origin/loic-infernalis) doc/infernalis: hate hate
+7060a3b (origin/loic-infernalis) doc/infernalis: hate hate
 e6a9e62 doc/release-notes: i hate rst
 e98408d doc/release-notes: final infernalis notes
+f6adeda rgw: more style fixes
+916f4bf rgw: style fixes
 b105449 doc/release-notes: fix some attributions
 e9f200c doc/release-notes: infernalis notable changes
 638738f Revert "common, global: use lttng ust functions for handling fork-like calls"
 fca97db rgw, doc: remove remark for lack of custom account metadata of Swift.
 b4c5620 doc: remove toctree items under Create CephFS
 3be81ae (tag: v9.1.0) 9.1.0
+74180ad doc: Fixed links in RGW start.
 036d36f debian/control: python-setuptools is a build dependency
+24a8e71 doc: wrapped the lines to 80 characters in start-RGW.
 8e59595 doc/release-notes: 9.1.0
+91d51b9 doc: More edits in RGW quick start.
 1deb31d Init crush_location in Objecter from config file.
 303263d os: add a field indicate xattr only one chunk for set xattr.
 65064ca OSD:shall reset primary and up_primary fields when beginning a new past_interval.
 8855e60 ReplicatedPG::maybe_handle_cache_detail: always populate missing_oid
 da4803e ReplicatedPG::_rollback_to: handle block on full correctly
+45870f0 (origin/wip-static-website) rgw: more style fixes
+43a568d rgw: style fixes
+34b90b0 minor fixes following rebase
+47d4266 Fixup.
+068d5a7 WIP: static-site errordoc work.
+61b71c6 Use more specific error codes for retarget of website requests
+85e761c s3website: Prepwork for x-amz-website-redirect-location header.
+ccd1698 S3Website: "good" error status is actually not zero, use the function instead.
+69edf0c S3Website: Ensure redirect_all has a 302 response, just like AmazonS3
+517336c RGWWebsite: first pass at error redirect and error page handling.
+5210ec5 rgw: prepare for handlers to have custom error handler routines
+6a1e6aa rgw: add 301 http text.
+9944c32 Mark where to add some functionality in future.
+1d02421 Clean up headers.
+8d28b65 Remove config rgw_s3website_mode, only hostname mode remains.
+0f38940 Improve RGWHandler for S3Website.
+556f227 FIXUP: Shuffle auth order back down.
+ed7a1c3 Add error codes.
+f323ac1 Optimize website detection differently.
+4538a0e DEBUG: Ensure we always go past the debug output for now.
+94aee4a WIP-FIXUP: Muck with bucket detection again.
+2701572 Avoid dupe reset.
+f9b43b4 rgw: tweak dns-based s3website detection for other changes FIXME-REBASE
+a4bd79f More s3website json region data.
+61a668c rgw: pretty-print s3website error output.
+4981ecb rgw: More fields in error output to match main S3.
+f39694a rgw: use new formatter header/footer ability.
+80b4668 FIXUP: Fix comment.
+e0bca28 WIP-FIXME: Static fetch works without breaking anything else, uses bucketmode&&auth to decide.
+32f7661 WIP-FIXME-TODO: Planning for hostname/endpoints specific to APIs.
+dc0c7a6 rgw: Document rgw_dns_name
+6df1e3a RGWRegion: AmazonS3 detects website endpoint storage
+fbd9269 rgw: Add note about future rgw performance optimization for regions with many endpoint hostnames.
+1e2294f rgw-website: handle redirect_all via RGWBWRoutingRule.
+b700def rgw-website: append the key, not overwrite.
+0273d79 rgw-website: Use default hostname/protocol if redirect does not specify.
+d07ce03 rgw-website: Do not apply retarget for an authenticated request.
+dc7ef71 rgw-website: cleanup unused variable: rules
+8ad4c80 rgw-website: ListBucket not valid for websites
+39283c4 rgw-website: do not truncate nuke key if valid.
+78b68a8 rgw: apply redirect rules in static website
+c1ba1ad rgw: add remove website api
+6123d81 rgw: fix xml encoding for certain website operations
+7b3bdce rgw: no need to explicitly allocate XMLObj
+a78def8 rgw: set bucket website operation
+e9b9d52 rgw: add xml encoders for website conf
+7d9e66a rgw: xml encoder / decoder
+43cb089 rgw: enable website as subresource
+c1a34f4 rgw: initial implementation of website REST api
+e34668c rgw: retarget requests
+916db09 rgw: define data structures for static website config
+f1137a7 common/*Formatters: handling of headers & footers.
 be35ea9 release-notes: draft v0.94.4 release notes
 c1d48ff osd: use pg id (without shard) when referring the PG
+98e7239 common/HTMLFormatter: add new formatter
+2b21e3c common/*Formatters: Split Formatters
 2b7ddde osd: Correct the object_info_t::decode() version
 03078ba rgw: location constraints should return api name
+0f488ac doc: Edited RGW quick start.
 a077301 mon/OSDMonitor: put crushtool error in log
 0bf2a79 messages/MOSDOp: fix reqid encoding/decoding
 6f6fe39 messages/MOSDOp: decode complete message for v6, too.
@@ -1937,7 +2608,7 @@ d258bf5 ceph.spec.in: drop MY_CONF_OPTS
 e675400 librbd: invalidate object map on error even w/o holding lock
 bc48ef0 selinux: Fix man page location
 378d56d man/Makefile-server.am: conditionalize make ceph_selinux manpage
-fb50ff6 (upstream/wip-13379, origin/wip-13379) mon: do not remove proxied sessions
+fb50ff6 (origin/wip-13379) mon: do not remove proxied sessions
 0d1cab4 test: add TestSessionFilter
 be3c4a8 mds: implement filtered "session ls" tell command
 47a1816 mds: call through to MDSRank in handle_command
@@ -2104,14 +2775,14 @@ a965378 ReplicatedPG: clearing a whiteout should create the object
 47f4a03 ceph-objectstore-tool: delete ObjectStore::Sequencer after umount
 f20f67e pybind/cephfs: fix DirEntry helpers
 7b1882f ceph.spec.in: correctly declare systemd dependency for SLE/openSUSE
-3f00042 (upstream/wip-13239-infernalis, origin/wip-13239-infernalis) rgw: set default value for env->get() call
+3f00042 (origin/wip-13239-infernalis) rgw: set default value for env->get() call
 469d35f osd: init started to 0
 bba3ab3 mon: combine _ms_dispatch and dispatch
 612480b test/test_rados_tool.sh: implement regression test for bench verify crash
 0c8faf7 common/obj_bencher.cc: fix verification crashing when there's no objects
 e42c9aa ceph.spec.in: re-re-drop fdupes
 566c872 os/fs: fix aio submit method
-d7b620f (origin/wip-12983) ECBackend::handle_recovery_read_complete: do not expose the hash_info when getting the obc
+d7b620f ECBackend::handle_recovery_read_complete: do not expose the hash_info when getting the obc
 892800b ECBackend::handle_sub_read: restructure hash check and fix part of 12983
 80b7237 qa/workunits/cephtool/test.sh: don't assume crash_replay_interval=45
 c5a9275 osd/ReplicatedPG: preserve (some) flags when proxying reads
@@ -2140,7 +2811,7 @@ bf7e937 osdc/Objecter: set FULL_FORCE flag when honor_full is false
 95055e7 osd: add FULL_TRY and FULL_FORCE rados op flags
 7757342 qa: https://ceph.com/git -> https://git.ceph.com
 d4d65fb qa: http://ceph.com/qa -> http://download.ceph.com/qa
-cdccf11 (upstream/wip-scrub-fix, origin/wip-scrub-fix) osd/PG: compensate for sloppy hobject scrub bounds from hammer
+cdccf11 (origin/wip-scrub-fix) osd/PG: compensate for sloppy hobject scrub bounds from hammer
 acda626 osd: avoid duplicate MMonGetOSDMap requests
 f4bf14d Update Xinze affinity
 eb28eef Update Chen Min affinity
@@ -2335,7 +3006,7 @@ af39f98 .gitignore: ignore src/ceph.tmpe
 c57e868 rocksdb: ignore m4
 51abff1 ceph.spec: respect CEPH_EXTRA_CONFIGURE_ARGS
 4a5a5b3 qa/workunits/cephtool/test.sh: make mds epoch check more tolerant
-d33fea5 (upstream/wip-10617-again, origin/wip-10617-again) sd/PG: tolerate missing pgmeta object
+d33fea5 (origin/wip-10617-again) sd/PG: tolerate missing pgmeta object
 f15d958 osd: allow peek_map_epoch to return an error
 ff9600a osd/ReplicatedPG: remove stray debug line
 6e85433 AsyncMessenger: Kepp file_lock hold when accessing its event field
@@ -2425,8 +3096,8 @@ d5650c9 tests: new test case for librbd diff_iterate over discard extents
 d32a3be qa/workunits/rados/test_alloc_hint.sh: sudo to ls files
 ab4232b rgw: init_rados failed leads to repeated delete
 e48cec3 mon: disable gmt_hitset if not supported
-02f4461 (upstream/wip-leveldb-hang, origin/wip-leveldb-hang) test: mon: mon-scrub.sh: test 'mon scrub'
-8c2dfad (upstream/wonzhq-tmap-update, origin/wonzhq-tmap-update) osd: force promote for ops which ec base pool can't handle
+02f4461 (origin/wip-leveldb-hang) test: mon: mon-scrub.sh: test 'mon scrub'
+8c2dfad (origin/wonzhq-tmap-update) osd: force promote for ops which ec base pool can't handle
 8c4323c PerfCounter: Make l_os_queue_lat contains the complete queue latency
 834842c OSD: Add perf counter to count osd thread prepare latency
 70d3108 mon: MonitorDBStore: make get_next_key() work properly
@@ -2443,7 +3114,7 @@ e6fbe53 improve error handle of rbd metadata operation & format output
 bfe359a osd: dump full map bl at 20 when crc doesn't match
 351d957 doc: fix the typo in command example
 7080e0f Thread.h: disable copy constr and assignment op
-7d781f7 (upstream/wip-12966, origin/wip-12966) doc: 'ceph --admin-daemon ...' -> 'ceph daemon ...'
+7d781f7 (origin/wip-12966) doc: 'ceph --admin-daemon ...' -> 'ceph daemon ...'
 404dd16 tests: base gmock class support for librbd
 e8749b2 librbd: support templating of ImageCtx for async state machines
 1c522be ceph.spec.in: put distro conditional around Group:
@@ -2498,11 +3169,11 @@ af8b3da Messenger: Make fast dispatch message set dispatch timestamp
 929ca5b ceph.spec.in: drop lsb-release dependency from ceph-common
 557e581   mon/MonClient: fix error in 'ceph ping mon.id'   Fixes: #12442
 f65267c rgw : setting max number of buckets for users via ceph.conf option
-64962aa (upstream/wip-5785, origin/wip-5785) qa/workunits/rados/test_alloc_hint.sh: sudo to list files
+64962aa (origin/wip-5785) qa/workunits/rados/test_alloc_hint.sh: sudo to list files
 75d9f58 osd/ReplicatedPG: use apply_ctx_stats() everywhere
 eb2993a osd/ReplicatedPG: create apply_ctx_stats() helper
 9bf103c osd/ReplicatedPG: snaptimmer: adjust stats through ctx->delta_stats
-3626db4 (origin/wip-rgw-swift-expiration) rgw: don't copy delete_at attr, unless it's intra region copy
+3626db4 rgw: don't copy delete_at attr, unless it's intra region copy
 a69a989 rgw: objexp shards index by key
 fa347d8 rgw: delete-at and delete-after also on obj put / copy
 14c400f add test for python binding
@@ -2521,7 +3192,9 @@ c938d1f rocksdb: fix 32-bit build
 67f5f52 memstore: fix the build on i386
 89aacaf doc: add the doc for min_write_recency_for_promote
 b02cc06 AsyncConnection: Don't use unsafe feature as message encode feature
-7bfb7f9 (upstream/wonzhq-rbd-write-full, origin/wonzhq-rbd-write-full) librbd: do write_full for whole object write
+2a5d384 mds: add dump_blocked_ops asok command.
+7bfb7f9 (origin/wonzhq-rbd-write-full) librbd: do write_full for whole object write
+cf4e8c6 osd: add dump_blocked_ops asok command.
 b199c49 ceph-osd-prestart.sh: fix osd data dir ownership check
 023c517 vstart.sh: enable all experimental features for vstart
 3a41ef4 ms/async: log message tx/rx at level 1
@@ -2810,7 +3483,7 @@ f2f23c2 rgw: implement object_is_expired function.
 aa5f1b8 rgw: a few fixes, guard bufferlist decodes
 4f9a843 rgw: add basic support for X-Delete-At header of Swift API.
 2bc5a48 osd: Decode use_gmt_hitset with a unique version
-38465f0 (upstream/liewegas-wip-hammer-feature, origin/liewegas-wip-hammer-feature) osd: refuse to boot if any pre-hammer or old hammer (<v0.94.4) are running
+38465f0 (origin/liewegas-wip-hammer-feature) osd: refuse to boot if any pre-hammer or old hammer (<v0.94.4) are running
 f668c6c mon: use HAMMER_0_94_4 feature to require sufficiently new hammer
 470f970 include/ceph_features: define HAMMER_0_94_4 feature
 14e02bc PG::handle_advance_map: on_pool_change after handling the map change
@@ -2885,7 +3558,7 @@ b0882fb memstore: replace apply_lock with sequencer
 5d8307a memstore: add Object interface to hide bufferlist
 26f716e memstore: use intrusive_ptr instead of shared_ptr
 01a9a79 osbench: add multithreaded objectstore benchmark
-d7bf8cb (upstream/wip-11455, origin/wip-11455) rgw: init some manifest fields when handling explicit objs
+d7bf8cb (origin/wip-11455) rgw: init some manifest fields when handling explicit objs
 b610588 ceph.spec.in: remove obsolete SUSE-specific code
 df21a6e osd: expose PGLSFilter in objclass interface
 c318129 ceph.spec.in: Restart services only if they are running
@@ -2897,7 +3570,7 @@ fb1b6dd common: fix insert empty ptr when bufferlist rebuild
 347ac0f ceph_test_rados_api_tier: make PromoteOn2ndRead tolerate thrashing
 8a08acc common/hobject_t: fix is_temp() off-by-one
 7cc8d86 ceph_test_msgr: parse CEPH_ARGS
-dfd142f (upstream/wip-memcpy, origin/wip-memcpy) include/inline_memcpy: use __builtin_memcpy instead of explicit ptr copies
+dfd142f (origin/wip-memcpy) include/inline_memcpy: use __builtin_memcpy instead of explicit ptr copies
 98c0606 include/inline_memcpy: make prototype resemble memcpy's
 fc02a8a added boost timegm impl for cross platform support
 da6d5cf osd: bug fix hit_set_map size for tier pool
@@ -2912,6 +3585,59 @@ d0386d2 ceph.spec.in: drop sysvinit-specific macros that run only on openSUSE/SL
 178d4d5 osd: remove unused parameter of start_recovery_ops
 f77949f bug fix: osd: requeue_scrub when kick_object_context_blocked
 4152269 config: skip lockdep for intentionally recursive md_config_t lock
+a8db298 Fixup.
+a32f768 WIP: static-site errordoc work.
+8613b69 Use more specific error codes for retarget of website requests
+731c73b s3website: Prepwork for x-amz-website-redirect-location header.
+f7d04dd S3Website: "good" error status is actually not zero, use the function instead.
+61b0ce6 S3Website: Ensure redirect_all has a 302 response, just like AmazonS3
+1dc9ac6 RGWWebsite: first pass at error redirect and error page handling.
+3bdbe59 rgw: prepare for handlers to have custom error handler routines
+4a6f968 rgw: add 301 http text.
+f7e85c3 Mark where to add some functionality in future.
+ab90f5a Clean up headers.
+0719274 Remove config rgw_s3website_mode, only hostname mode remains.
+ca9db61 Improve RGWHandler for S3Website.
+5715456 FIXUP: Shuffle auth order back down.
+473f927 Add error codes.
+54aff3e Optimize website detection differently.
+a82267e DEBUG: Ensure we always go past the debug output for now.
+5dd7bdc WIP-FIXUP: Muck with bucket detection again.
+76f9f8f Avoid dupe reset.
+8f0d0bd Do not automatically output header; make the dev do it
+ec16828 rgw: tweak dns-based s3website detection for other changes FIXME-REBASE
+4651b21 More s3website json region data.
+96f9a01 rgw: pretty-print s3website error output.
+b3682b5 rgw: More fields in error output to match main S3.
+12e0063 rgw: use new formatter header/footer ability.
+fa02d4a common: teach formatters about headers & footers.
+2e4b1ba FIXUP: Fix comment.
+f378b5b WIP-FIXME: Static fetch works without breaking anything else, uses bucketmode&&auth to decide. WIP-FIXME rgw-website: refactor to have RGW*S3Website classes for website-specific stuff
+eb27d6f WIP-FIXME-TODO: Planning for hostname/endpoints specific to APIs.
+2608983 rgw: Document rgw_dns_name
+1fb5662 RGWRegion: AmazonS3 detects website endpoint storage
+5e72900 rgw: Add note about future rgw performance optimization for regions with many endpoint hostnames.
+e06b46a rgw-website: handle redirect_all via RGWBWRoutingRule.
+4716992 rgw-website: append the key, not overwrite.
+7482acf rgw-website: Use default hostname/protocol if redirect does not specify.
+84dfb4e rgw-website: Do not apply retarget for an authenticated request.
+a55cb56 rgw-website: cleanup unused variable: rules
+dbadaa9 rgw-website: ListBucket not valid for websites
+7bb0dda rgw-website: do not truncate nuke key if valid.
+dcb66f1 rgw: apply redirect rules in static website
+2d50cbb rgw: add remove website api
+17ce9de rgw: fix xml encoding for certain website operations
+dc1a457 rgw: no need to explicitly allocate XMLObj
+33c5710 rgw: set bucket website operation
+0fb16bd rgw: add xml encoders for website conf
+34ba437 rgw: xml encoder / decoder
+b31e6ef rgw: enable website as subresource
+d2b48f9 rgw: initial implementation of website REST api
+6d791d6 rgw: retarget requests
+e45469a rgw: define data structures for static website config
+2f8075c common/*Formatters: handling of headers & footers.
+eb14de2 common/HTMLFormatter: add new formatter
+f3c6ac7 common/*Formatters: Split Formatters
 8ef2c96 buffer: modify inline memory ops to use packed structs
 dbcaa54 uuid: use boost::random:random_device
 136242b rgw: be more flexible with iso8601 timestamps
@@ -2936,7 +3662,7 @@ ea8609b mon/OSDMonitor: debug why pool creation fails
 88bfd79 test/erasure-code: drop directory from profile
 5df1271 do not include directory in ec profiles
 660ae5b osd: always load erasure plugins from the configured directory
-7295612 (tag: v9.0.3, upstream/loic-v9.0.3, upstream/last, origin/loic-v9.0.3, origin/last) 9.0.3
+7295612 (tag: v9.0.3, origin/loic-v9.0.3, origin/last) 9.0.3
 271513f erasure-code: shec plugin feature
 5e99a57 mon: add a cache layer over MonitorDBStore
 2d13a47 rbd: fix the FTBFS on old boost introduced by 2050d08
@@ -3027,7 +3753,7 @@ aa50321 cmake: add DiffIterate.cc to librbd
 de0b66a test: add test for the perf counter of CephContext
 5d109e9 common: support perf counter (for unhealthy workers) on CephContext
 5fa03e9 osd: expose the number of unhealthy threads from heartbeat map
-9b23392 (origin/wip-sam-working) ReplicatedPG::cancel_pull: also finish_degraded_object
+9b23392 ReplicatedPG::cancel_pull: also finish_degraded_object
 79f310a ReplicatedPG: treat object as degraded until on_global_recover
 5390072 ReplicatedPG: block writes on promote of rollback snap promotion
 35af63b ReplicatedPG: enforce write ordering on rollback
@@ -3081,7 +3807,7 @@ c8a83cd client: track flush TIDs for all pending flushing caps
 d742e79 tests: Add unit tests for CLS numops class
 d17f158 cls_numops: Add cls_numops client
 87f6b73 Add new cls_numops class for numeric operations
-0ba2e14 (upstream/revert-4927-snapset-obc, origin/revert-4927-snapset-obc) Revert "osd/ReplicatedPG: snapset is not persisted"
+0ba2e14 (origin/revert-4927-snapset-obc) Revert "osd/ReplicatedPG: snapset is not persisted"
 b18558b osd/OSDMap: test_flag returns bool
 3540fb9 osdc/Objecter: restart listing this PG if sort order changes
 35c1970 osd/ReplicatedPG: fix missing set sort order on [N]PGLS
@@ -3089,7 +3815,7 @@ fc61fd7 osd/osd_types: add pg_missing_t::resort() method
 517921f osd/osd_types: make pg_missing_t sort order dynamic
 a5e27de osd: refuse to boot if SORTBITWISE not set but backend cannot sort nibblewise
 97c66e3 erasure-code: Update ISA-L to 2.14
-0bb57f10 configure: Fix checking for yasm compability
+0bb57f1 configure: Fix checking for yasm compability
 2743cc4 java: add libcommon to deps
 5afa21d java: search for JNI bits in common dirs
 af0ebee rbd:improve the error handle of rbd,check the return value.
@@ -3124,7 +3850,7 @@ d33ad15 Adding statfs api to KeyValueDB
 d57d36d osd: add flush/evict mode in pg stats
 af2a38b mon: fix the output of cache_io_rate_summary
 b78883b tests: be more generous with mon tests timeouts
-7e6f819 (upstream/wip-5072, origin/wip-5072) doc: update rgw configuration on multiple rgw rados handlers feature
+7e6f819 (origin/wip-5072) doc: update rgw configuration on multiple rgw rados handlers feature
 efc8969 Doc: Correcting the default number of copies.
 6f768a7 doc: Removed reference to RAID-4
 c6cf558 CMake: cut down unnecessary linkage on rados tests
@@ -3318,7 +4044,7 @@ f68553e osd/osd_types.cc: get rid of str concat when making hash key
 111ecf8 radosgw-admin: use cout not cerr to print help message.
 145364b logrotate: fix log rotation with systemd
 85cb86d doc: change "--keyfile" description in man page of rbd help
-1ca6bf6 (upstream/wip-12536, origin/wip-12536) common/hobject_t: correctly decode pre-infernalis hobject_t min
+1ca6bf6 (origin/wip-12536) common/hobject_t: correctly decode pre-infernalis hobject_t min
 bc0d942 CMake: add crushtool
 fe970bc CMake: fix librados build
 2355c45 CMake: fix rbd build
@@ -3402,7 +4128,7 @@ cb51b17 mon: reject over-large values of max_mds
 258cb34 rbd: add "--keyring" option to help message
 5c395ff doc: add bucket object version description. bucket object version has been supported, but do not have description in the docs, so add this part.
 6ab9efe osd: copy the RecoveryCtx::handle when creating a new RecoveryCtx instance from another one
-1320e29 (upstream/wip-12410, origin/wip-12410) OSDMonitor::preprocess_get_osdmap: send the last map as well
+1320e29 (origin/wip-12410) OSDMonitor::preprocess_get_osdmap: send the last map as well
 f217865 test_librbd_fsx: invalidate before discard in krbd mode
 c4872dd Log::reopen_log_file: take m_flush_mutex
 0559fd3 tools/rados: change the first op id to 0
@@ -3453,7 +4179,7 @@ bbc5c71 rbd: import doesn't require image-spec arg, ditto for export and path
 0d2467a Compressor: Remove thread affinity options
 3482e68 AsyncConnection: Exit process loop if entering fault
 554c982 test/perf_local: disable tests on unsupported archs
-8778ab3 (upstream/wip-12465, origin/wip-12465) Log::reopen_log_file: take m_flush_mutex
+8778ab3 (origin/wip-12465) Log::reopen_log_file: take m_flush_mutex
 6f54c61 debian: Update maintainers and uploaders
 824c541   common: add nested-name-specifier ThreadPool before WorkQueueVal   Fixes: #12459
 992d959 mds: fix val used in inode->last_journaled
@@ -3816,7 +4542,7 @@ e819a3c client: return EINVAL if iovcnt < 0 for p{read,write}v()
 db16353 mds: change mds_log_max_segments type from int to unsigned.
 8a91daa mds: fix mds crash when mds_max_log_events smaller.
 5614ec6 tests: fix segfault issue in preadv/pwritev tests
-7cf1f37 (origin/wip-rgw-orphans-2) rgw: api adjustment following a rebase
+7cf1f37 rgw: api adjustment following a rebase
 47edec3 rgw: orphans, fix check on number of shards
 5528f21 rgw: orphans, change default number of shards
 cac57ca rgw: change error output related to orphans
@@ -4024,7 +4750,7 @@ adfa2e0 librbd: flush operations need to acquire owner lock
 d3bd27f rgw: fix reset_loc()
 9298f93 mon/OSDMonitor: fix get_bucket_utilization return value
 e41d97c rgw: fix assignment of copy obj attributes
-4030774 (upstream/wip-12064, origin/wip-12064) mon: only send MMonMetadata to peer mons that support it
+4030774 (origin/wip-12064) mon: only send MMonMetadata to peer mons that support it
 54a516f configure.ac: Fix JUnit 4 detection on Fedora 22.
 7fbac49 doc: Change the type of list in doc Fixes: #12061. Signed-off-by: Maxime ROBERT <maxime.robert1992 at gmail.com>
 05c56b7 doc: ceph-deploy man page: typo #12063 Replace is initial monitor hostname with is the initial monitor hostname Fixes : #12063 Signed-off-by: CARADANT Kevin <kevin.caradant at gmail.com>
@@ -4364,7 +5090,7 @@ aa62dcb osdmaptool: dump 'osd tree' in specified format
 acdfd98 doc: fix typo in placement-groups.rst
 9c8f8d2 doc: fix a wrong quote in release.rst
 2cc7aee mon: MonitorDBStore: get_next_key() only if prefix matches
-2934909 (upstream/wip-mon-scrub, origin/wip-mon-scrub) mon: Monitor: allow updating scrub interval on demand
+2934909 (origin/wip-mon-scrub) mon: Monitor: allow updating scrub interval on demand
 e77b3f4 mon: Monitor: allow scrub to timeout after a while
 80ce9b0 mon: Monitor: inject missing key failures during scrub
 ba4a2c1 mon: Monitor: inject scrub failures
@@ -4487,7 +5213,7 @@ b2cd80c os/chain_xattr: s/ENODATA/ENOATTR/
 c6cdb40 os/chain_xattr: stripe shortish xattrs over small chunks for XFS
 8614dce os/chain_xattr: handle read on chnk-aligned xattr
 584ed2e ceph.spec.in: SUSE/openSUSE builds need libbz2-devel
-11b7801 (origin/wip-11493) OSDMonitor: disallow ec pools as tiers
+11b7801 OSDMonitor: disallow ec pools as tiers
 13c0fca src/test/librados/tier.cc: remove OmapOperation test
 363d957 ceph.spec.in: tweak ceph-common for SUSE/openSUSE
 46404dd ceph.spec.in: consolidate centos/rhel macros
@@ -4500,7 +5226,7 @@ f11de85 mds: fix handle_mds_map in standby_replay
 c199b7b KeyValueStore: Initialize the iterator
 ab30ff2 KeyValueStore: optimize the object header writes
 a46b333 do not return non-exist extents when doing sparse read
-389ae67 (upstream/wip-11622, origin/wip-11622) rgw: merge manifests correctly when there's prefix override
+389ae67 (origin/wip-11622) rgw: merge manifests correctly when there's prefix override
 9d8c115 init-radosgw.sysv: remove
 1c45f51 init-radosgw: unify init-radosgw[.sysv]
 a4bb992 init-radosgw: look in /var/lib/ceph/radosgw
@@ -4635,7 +5361,7 @@ e9300cb mailmap: Sergey Arkhipov affiliation
 f76bf6c mailmap: Xingyi Wu affiliation
 0f44127 mailmap: Ning Yao affiliation
 95a881f mailmap: Joao Eduardo Luis affiliation
-2738d02 (upstream/wip-txn-noappend-sam-rebased, origin/wip-txn-noappend-sam-rebased) ECBackend: eliminate transaction append, ECSubWrite copy
+2738d02 (origin/wip-txn-noappend-sam-rebased) ECBackend: eliminate transaction append, ECSubWrite copy
 3699a73 mon: fix the FTBFS
 07cf4f7 doc: release notes for hammer v0.94.2
 e1f1c56 mon/PGMap: add more constness
@@ -4787,7 +5513,7 @@ a0f96de mds: in damaged() call flush_log before ending
 8803776 mon: add MonClient::flush_log
 a5e88fc librbd: invoking RBD::open twice will leak memory
 585bc2b mds: send FLUSHSNAP_ACK even if FLUSHSNAP message is unexpected
-fbfd50d (upstream/wip-11429, origin/wip-11429) OSD: handle the case where we resurrected an old, deleted pg
+fbfd50d (origin/wip-11429) OSD: handle the case where we resurrected an old, deleted pg
 32b8bf5 test: update CMakefile to sync with c44f8e7
 b7f4328 systest_runnable: adjust argument to suite Preforker
 9699246 mds: handle missing mydir dirfrag
@@ -4838,7 +5564,7 @@ d62f80d common/config: detect overflow of int values
 ab51130 Event: Delete driver after cleanup
 caa9f0e rgw: fix ListParts response
 04b0002 qa/workunits/post-file: pick a dir that's readable by world
-999dcc8 (upstream/wip-11464, origin/wip-11464) Revert "osd: For object op, first check object whether unfound."
+999dcc8 (origin/wip-11464) Revert "osd: For object op, first check object whether unfound."
 8e20240 librbd: TaskFinisher should finish all queued tasks
 ea5107c librbd: librados completions are not properly released
 ed5472a tests: fix valgrind errors with librbd unit test
@@ -4860,7 +5586,7 @@ fd7723a librbd: update ref count when queueing AioCompletion
 f141e02 librbd: flatten should return -EROFS if image is read-only
 594a661 librbd: allow snapshots to be created when snapshot is active
 32c41f8 cls_rbd: get_features needs to support legacy negative tests
-e97fd50 (upstream/wip-rgw-content-length, origin/wip-rgw-content-length) rgw: simplify content length handling
+e97fd50 (origin/wip-rgw-content-length) rgw: simplify content length handling
 79d17af rgw: make compatability deconfliction optional.
 06d67d9 rgw_admin: add --remove-bad flag to bucket check
 8a7e58e AsyncMessenger: Don't need to join thread if not started
@@ -5112,7 +5838,7 @@ e3d62a9 common: make rados bench return correctly errno.
 0498b6a mds: add perf counters descriptions
 8ff8c57 doc/release-notes: note about SHEC
 1cc0181 doc: Corrects rgw.conf file path for Debian-based and RPM-based distros in radosgw man page.
-fb51175 (upstream/wip-2862, origin/wip-2862) TestCase: Change in testcase output
+fb51175 (origin/wip-2862) TestCase: Change in testcase output
 b15f6d0  Fix to some of the command line parsing (including rbd)
 b0172d8 rbd: create command throws inappropriate error messages
 d1cb94f RBD: update expunge set for latest test, parameterize test script
@@ -5292,7 +6018,7 @@ c4d8e65 Librbd: Add existing rbd configs to aware table
 cf715bd Librbd: Add tests for aware metadata config
 ccdeaf8 mds: fix out-of-order messages
 364e15b Librbd: Add basic metadata aware method
-59aa670 (upstream/zhouyuan/isal_2.13, origin/zhouyuan/isal_2.13) erasure-code: Update ISA-L to 2.13
+59aa670 (origin/zhouyuan/isal_2.13) erasure-code: Update ISA-L to 2.13
 ad15f7d osdc/Striper.cc fix stripe_count == 1 && stripe_unit != object_size
 eaf6e0c Always provide summary for non-healthy cluster.
 c6f1c07 Conditional-compile against minimal tcmalloc.
@@ -5318,7 +6044,7 @@ f9b98c9 ceph-objectstore-tool: Fix message and make it debug only to stderr
 d6acc6a Doc: Incomplete example in erasure-coded-pool.rst
 90c38b5 rocksdb: fix 32-bit build
 ddad2d4 Makefile-rocksdb.am: update for latest rocks
-c176ebf (upstream/wip-move-code, origin/wip-move-code) osd/: Move ReplicatedBackend methods into ReplicatedBackend.cc
+c176ebf (origin/wip-move-code) osd/: Move ReplicatedBackend methods into ReplicatedBackend.cc
 e9d6096 ReplicatedPG: remove unused C_OnPushCommit
 6413209 mds: include damaged in MDSMap::dump
 3b2a091 mds: update peer failure response to account for damaged
@@ -5580,7 +6306,7 @@ d9ea168 Some sanitization work on .mailmap, .organizationmap, .peoplemap : Sorti
 90a0393 PendingReleaseNotes: warn about lttng LD_PRELOAD for daemons
 53cc492 ceph_test_rados_tier: add test case for delete+create compound ops
 c0e6227 mds: give up replicas of a stopping mds's stuff
-d47e622 (upstream/wip-mon-doc, origin/wip-mon-doc) doc/rados/operations/add-or-rm-mons: revise doc a bit to be less confusing
+d47e622 (origin/wip-mon-doc) doc/rados/operations/add-or-rm-mons: revise doc a bit to be less confusing
 8a05092 debian: move /var/lib/ceph/mds to ceph-mds package
 353a325 ceph.spec.in: rm EOL Fedoras; add OBS RHEL5 instead
 703ba37 librbd: acquire cache_lock before refreshing parent
@@ -6263,7 +6989,7 @@ adebf22 rbd_recover_tool: move rbd_recover_tool directory to src/tools subdirect
 2f49de5 ReplicatedPG: block writes on degraded objects unless all peers support it
 2a83ef3 include/encoding: fix an compile warning
 71c6d98 msg: fixup for 2ffacbe (crc configuration in messenger)
-2598fc5 (upstream/wip-10734, origin/wip-10734) ObjectStore: fix Transaction encoding version number
+2598fc5 (origin/wip-10734) ObjectStore: fix Transaction encoding version number
 46f9ca4 pybind: fixed runtime errors with librbdpy
 9124a76 test/vstart_wrapper.sh: set PATH before calling vstart.sh
 189ef38 init-ceph.in: add $PWD to PATH if running as ./init-ceph
@@ -6342,13 +7068,13 @@ e0f12d9 Fix do_autogen.sh so that -L is allowed
 cfab01e rgw: move perf cleanup before context cleanup
 4074a91 pybind: fix error hiding and inconsistency on librados load.
 cfcfafc Objecter::_op_submit_with_budget: add timeout before call
-00a3ac3 (tag: v0.92, upstream/wip-sam-v0.92, origin/wip-sam-v0.92) 0.92
+00a3ac3 (tag: v0.92, origin/wip-sam-v0.92) 0.92
 c656bce PGLog: improve PGLog::check() debugging
 05ce2aa qa: use correct binary path on rpm-based systems
 eb526af rbd: watch command should unwatch before exiting
 2a0e9b7 encoding: ignore uninitialized instantiation in boost::optional decode
 f40ee8c do_autogen.sh: default to --with-lttng, -L to build without
-7590387 (upstream/wip-assert-version, origin/wip-assert-version) librados: add missing tracepoints
+7590387 (origin/wip-assert-version) librados: add missing tracepoints
 57bac8e osd:  change pg_stat plain to display CRUSH_ITEM_NONE in pgmap output section.
 4aa9f3f man: add rbd status to doc/man/8/rbd.rst
 a007c52 doc: add cephfs disaster recovery guidance
@@ -6412,7 +7138,7 @@ fc76c89 osdc: add new filed dontneed in BufferHead.
 c83a288 Rework ceph-disk to allow LUKS for encrypted partitions
 707c78b Only create a key of 256 bits length, not 256 bytes
 6a45b8e add all possible ceph-disk run-time requirements to build time deps
-4c50f6a (upstream/wip-rgw-versioning-4, origin/wip-rgw-versioning-4) rgw: more merge related fixes
+4c50f6a (origin/wip-rgw-versioning-4) rgw: more merge related fixes
 01cc9d5 rgw: fix merge artifact
 e26023e PG: set scrubber.start = scrubber.end after scrub_compare_maps
 4f9e6ed PG: remove block_writes from scrubber
@@ -6469,7 +7195,7 @@ c4a6eab rgw: fixing rebase casualties
 b6d6f90 mon/MDSMonitor: fix gid/rank/state parsing
 9b9a682 msg/Pipe: set dscp as CS6 for heartbeat socket
 1e236a3 mds: don't join on thread which has not been runned.
-6939e8c (upstream/zhouyuan-submodule_https_git, origin/zhouyuan-submodule_https_git) Update git submodule to use the same https protocol
+6939e8c (origin/zhouyuan-submodule_https_git) Update git submodule to use the same https protocol
 e393810 librbd: make librbd cache send read op with fadvise_flags.
 a23676b librbd: Don't do readahead for random read.
 8d0295c rgw: extend replica log api (purge-all)
@@ -6507,7 +7233,7 @@ e6f1280 librados: Expose RadosClient instance id through librados
 87ef462 rgw: format mtime of radosgw-admin bucket stats
 dc1630e librbd: trim would not complete if exclusive lock is lost
 3347e0d bug: error when installing ceph dependencies with install-deps.sh
-4e90a31 (upstream/wip-10617, origin/wip-10617) osd: add failure injection on pg removals
+4e90a31 (origin/wip-10617) osd: add failure injection on pg removals
 9b220bd ceph.spec.in: use wildcards to capture man pages
 51e3ffa rgw: reorder bucket cleanup on bucket overwrite
 313d6a5 rgw: access appropriate shard on bi_get(), bi_put()
@@ -6571,7 +7297,7 @@ b04f698 Doc: Fix the extra blank space in doc/start/quick-rbd.rst
 9ad9ba8 doc: Fix a typo in radosgw-admin doc
 008698b doc: Change Availability text in all of the man pages
 6f44f7a Revert "Revert "Merge remote-tracking branch 'origin/wip-bi-sharding-3' into next""
-90a90bb (upstream/wip-rgw-versioning-3, origin/wip-rgw-versioning-3) rgw: set default value for swift versioning extension
+90a90bb (origin/wip-rgw-versioning-3) rgw: set default value for swift versioning extension
 dc11ef1 PGBackend: fix and clarify be_select_auth_object
 26656e3 rgw: fix bucket removal with data purge
 b18b14b ObjectStore::_update_op: treat CLONERANGE2 like CLONE
@@ -6822,7 +7548,7 @@ b383b52 rgw: enable s3 get/set versioning ops
 0d97b40 rgw: get bucket versioning status op
 8ed79d6 rgw: add versioning_enabled field to bucket info
 50547dc mon: PGMonitor: fix division by zero on stats dump
-dbaa142 (upstream/wip-bi-sharding-3, origin/wip-bi-sharding-3) rgw: bilog marker related fixes
+dbaa142 (origin/wip-bi-sharding-3) rgw: bilog marker related fixes
 c4548f6 pybind: ceph_argparse: validate incorrectly formed targets
 80a9d99 mon: Monitor: return 'required_features' on get_required_features()
 ab996c1 mon: Elector: output features in handle_propose()
@@ -6926,7 +7652,7 @@ d80ded9 mailmap: David Zhang affiliation
 33ba23f common/shared_cache.hpp: empty() iff weak_refs is empty
 d532f3e remove unused hold_map_lock in _open_lock_pg
 9748655 man: add help for rbd merge-diff command
-6986ec1 (upstream/wip-10477, origin/wip-10477) osd/PG: populate blocked_by with peers we are trying to activate
+6986ec1 (origin/wip-10477) osd/PG: populate blocked_by with peers we are trying to activate
 5b0e8ae mailmap: Yehuda Sadeh name normalization
 3f03a7b doc/release-notes: v0.91
 4ca6931 doc/release-notes: typo
@@ -7015,7 +7741,7 @@ f9b280e Adjust bi log listing to work with multiple bucket shards. Signed-off-by
 67a90dd mon: accumulate a single pending transaction and propose it all at once
 d159586 PendingReleaseNotes: make a note about librados flag changes
 725d660 (tag: v0.91) 0.91
-9264d25 (upstream/wip-formatter, origin/wip-formatter) common/Formatter: new_formatter -> Formatter::create
+9264d25 (origin/wip-formatter) common/Formatter: new_formatter -> Formatter::create
 617ad5d common/Formatter: improve json-pretty whitespace
 83c3b13 common/Formatter: add newline to flushed output if m_pretty
 e2a7b17 osd/PG: remove unnecessary publish_stats_to_osd() in all_activated_and_committted()
@@ -7321,7 +8047,7 @@ a302c44 ceph-disk: Fix wrong string formatting
 9783a5c test/msgr/test_msgr: Fix potential unsafe cond wakeup and wrap check
 bba4d35 librados: init last_objver
 2cd9dc0 messages/MClientCaps: init peer.flags
-679652a (upstream/wip-osdmap-leak, origin/wip-osdmap-leak) osd: fix leaked OSDMap
+679652a (origin/wip-osdmap-leak) osd: fix leaked OSDMap
 18f545b librados: Avoid copy data from librados to caller buff when using rados_read_op_read.
 001ea29 Messenger: Create an Messenger implementation by name.
 3a2cb71 mds: fix asok on rank 0
@@ -7531,7 +8257,7 @@ b34e545 os/FileStore.cc: insert not empty list<Context*> to op_finisher/ondisk_f
 7ab4a39 ceph.conf: update sample
 efd9d8d tests: Minor cleanup to librbd test
 78a15ee Fix libstriprados::remove, use strtoll insdead of strtol
-2d4dca7 (upstream/wip-10029, origin/wip-10029) SimpleMessenger: Retry binding on addresses if binding fails
+2d4dca7 (origin/wip-10029) SimpleMessenger: Retry binding on addresses if binding fails
 e8063a1 test: modify cephfs quota test case
 31a0cdc mds: fix parse_quota_vxattr for invalid data
 bab7122 OSD: FileJournal: call writeq_cond.Signal if necessary in submit_entry
@@ -7540,7 +8266,7 @@ bab7122 OSD: FileJournal: call writeq_cond.Signal if necessary in submit_entry
 6b51a9f mds: set dirfrag version when fetching dirfrag is skipped
 17c72f5 ceph-osd: remove extra close of stderr
 5836899 Revert "client: support listxattr for quota attributes"
-89b2fee (upstream/wip-crush-straw, origin/wip-crush-straw) mon: 'osd crush reweight-all'
+89b2fee (origin/wip-crush-straw) mon: 'osd crush reweight-all'
 dd7b58f crush: set straw_calc_version=1 for default+optimal; do not touch for presets
 adf5c6d crush/builder: a note about the original crush_calc_straw()
 9000068 mon: add 'osd crush {get,set}-tunable <name> [value]' commands
@@ -7549,7 +8275,7 @@ bf0d8d3 osd: Remove dead code related to old pg removal mechanism
 0827bb7 client: use remount to trim kernel dcache
 dfcb1c9 client: cleanup client callback registration
 2f52202 Revert "client: invalidate kernel dentries one by one"
-9902383 (upstream/wip-9998, origin/wip-9998) crush/CrushWrapper: fix create_or_move_item when name exists but item does not
+9902383 (origin/wip-9998) crush/CrushWrapper: fix create_or_move_item when name exists but item does not
 8c87e95 crush/builder: prevent bucket weight underflow on item removal
 eeadd60 crush/CrushWrapper: fix _search_item_exists
 a198dee Modifying the docs to add the Get pool commands to match the CLI. Signed-off-by: Chris Holcombe <chris.holcombe at nebula.com>
@@ -7740,7 +8466,7 @@ b8f6b5f doc: Added rbd-replay-many and restructured index.
 6862891 doc: Adds man page for ceph disk in TOC.
 491da51 client: invalidate kernel dentries one by one
 2fa4884 mds: fix race of trimming log segments
-70e1a5d (upstream/wip-doc-rbd-replay, origin/wip-doc-rbd-replay) doc: Document RBD Replay
+70e1a5d (origin/wip-doc-rbd-replay) doc: Document RBD Replay
 131f092 mds: don't blindly create empty object when dirfrag is missing
 9b9e3ed mds: allow choosing action for wirte error
 dafef3c mds: add asok command to force MDS readonly
@@ -7753,7 +8479,7 @@ cfef515 mds: disallow slave requests when MDS is readonly
 4aed047 mds: keep locks in sync state when MDS is readonly
 2d4a746 mds: don't trim log when MDS is readonly
 4f6474f mds: disallow write operations when MDS is readonly
-01df222 (upstream/wip-10209, origin/wip-10209) osd: tolerate sessionless con in fast dispatch path
+01df222 (origin/wip-10209) osd: tolerate sessionless con in fast dispatch path
 0f1c9fd msg: do not const methods that return a mutable pointer
 0d6c803 osd/osd_types: drop category from object_info_t
 5ecdce3 osdc/Objecter: drop category from copy-get
@@ -8114,7 +8840,7 @@ fe7bf06 rgw: RGWRados::get_obj() returns wrong len if len == 0
 f4ee949 osd: cache pool: flush object ignoring cache min flush age when cache pool is full Signed-off-by: Xinze Chi <xmdxcxz at gmail.com>
 6da9405 doc: Edited Key/Value store config reference.
 03be944 doc: Added Key/Value store config reference to index.
-72fc262 (upstream/wip-doc-openstack-juno, origin/wip-doc-openstack-juno) doc: Update for OpenStack Juno.
+72fc262 (origin/wip-doc-openstack-juno) doc: Update for OpenStack Juno.
 65c3350 tools: skip up to expire_pos in journal-tool
 e0166a2 osdc/Objecter: Fix a bug of dead looping in Objecter::handle_osd_map
 31c584c osdc/Objecter: e shouldn't be zero in Objecter::handle_osd_map
@@ -8260,7 +8986,7 @@ f76f83c osdc/Objecter: fix tick_event handling in shutdown vs tick race
 60eaeca .gitmodules: ignoring changes in rocksdb submodule
 a9dd4af rgw: send http status reason explicitly in fastcgi
 44a8d59 java: fill in stat structure correctly
-cb9262a (upstream/wip-9806-giant, origin/wip-9806-giant) Objecter: resend linger ops on any interval change
+cb9262a Objecter: resend linger ops on any interval change
 1a3ad30 ReplicatedPG: writeout hit_set object with correct prior_version
 8ae942a Remove unnecessary expressions about conf_journal_sz
 024efeb EC: document the LRC per layer plugin configuration
@@ -8378,7 +9104,7 @@ c9f9e72 Revert "Objecter: disable fast dispatch of CEPH_MSG_OSD_OPREPLY messages
 c4bac3e mds: fix inotable initialization/reset
 c95bb59 mds: fix inotable initialization/reset
 f1fccb1 rpm: 95-ceph-osd-alt.rules is not needed for centos7 / rhel7
-b73fe1a (upstream/wip-9730, origin/wip-9730) doc: remove node requirement from 'mon create-initial'
+b73fe1a (origin/wip-9730) doc: remove node requirement from 'mon create-initial'
 264f0fc doc: remove whitespace
 20b2766 Update vstart to setup users for s3-tests
 0969945 client: use finisher to abort MDS request
@@ -8395,7 +9121,7 @@ d947050 osd/osd_types: consider CRUSH_ITEM_NONE in check_new_interval() min_size
 50987ec libcephfs.h libcephfs.cc : Defined error codes for the mount function Used new error codes from libcephfs.h to replace the magic numbers in the mount functon found in libcephfs.cc.
 7bab093 return value of handle_message for MSG_OSD_SUBOP/MSG_OSD_SUBOPREPLY should be true
 d955676 rados: Use strict_strtoll instead of strtoll
-809ddd2 (upstream/wip-9706, origin/wip-9706) osdc/Objecter: fix use-after-frees in close_session, shutdown
+809ddd2 (origin/wip-9706) osdc/Objecter: fix use-after-frees in close_session, shutdown
 72a2ab1 osdc/Objecter: fix tick() session locking
 d98b755 librados: Fix function prototypes in librados.h
 d458b4f PGLog::IndexedLog::trim(): rollback_info_trimmed_to_riter may be log.rend()
@@ -8531,7 +9257,7 @@ b2e4bd5 msg: move SimpleMessenger to msg/simple/
 5eff0ee msg: use localized cct for derr
 06aef6f doc/release-notes: v0.86
 10fe7cf ceph_objectstore_tool: Accept CEPH_ARGS environment arguments
-6aba0ab (upstream/wip-9128, origin/wip-9128) Add reset_tp_timeout in long loop in add_source_info for suicide timeout
+6aba0ab (origin/wip-9128) Add reset_tp_timeout in long loop in add_source_info for suicide timeout
 52ac520 tools: remove old ceph.cc
 63c7e16 test/osd/Object: don't generate length of 0
 abe4c35 doc: update kernel recommendations, add tunables link
@@ -8577,7 +9303,7 @@ fa0bd06 ceph-disk: bootstrap-osd keyring ignores --statedir
 19be358 PG::actingset should be used when checking the number of acting OSDs for a given PG. Signed-off-by: Guang Yang <yguang at yahoo-inc.com>
 8253ead osdc/Objecter: use SafeTimer; make callbacks race-tolerant
 6c37984 mailmap: Yehuda Sadeh name normalization
-beff616 ceph-disk: set guid if reusing a journal partition
+beff616f ceph-disk: set guid if reusing a journal partition
 50e8040 tools: rados put /dev/null should write() and not create()
 0b0a373 mailmap: update email address
 188370a doc/release-notes: fix attributions for 8702 fix
@@ -8676,7 +9402,7 @@ ed77178 erasure-code: run isa tests via libtool and valgrind
 6886224 mailmap: Yan Zheng affiliation
 fc1380b mailmap: Thorsten Glaser affiliation
 7973280 osd: Remove unused PG functions queue_notify(), queue_info(), queue_log()
-0f884fd (upstream/wip-9008, origin/wip-9008) For pgls OP, get/put budget on per list session basis, instead of per OP basis, which could lead to deadlock.
+0f884fd (origin/wip-9008) For pgls OP, get/put budget on per list session basis, instead of per OP basis, which could lead to deadlock.
 7f87cf1 ReplicatedPG: clean out completed trimmed objects as we go
 2cd9b5f tests: use memcmp to compare binary buffers
 c17ac03 ReplicatedPG: don't move on to the next snap immediately
@@ -8803,7 +9529,7 @@ bb49547 KeyValueStore: Reduce redundancy set_header call
 baf7be9 osdc/Objecter: cancel timeout before clearing op->session
 1149639 ceph-disk: mount xfs with inode64 by default
 ded1b30 erasure-code: preload fails if < 0
-27208db (upstream/wip-doc-preflight, origin/wip-doc-preflight) doc: Added feedback.
+27208db (origin/wip-doc-preflight) doc: Added feedback.
 a140439 mds: limit number of caps inspected in caps_tick
 bf590f8 mds: keep per-client revoking caps list
 a6a0fd8 xlist: implement copy constructor
@@ -8977,7 +9703,7 @@ a754ce5 ErasureCodeLrc.cc: fix -Wmaybe-uninitialized compiler warning
 16cbaba osd/PGLog.h: prefer ++operator for non-primitive iterators
 8f368c5 mailmap: Ashish Chandra affiliation
 5fd50c9 mailmap: Boris Ranto affiliation
-a5b4c58 (upstream/wip-9309, origin/wip-9309) lockdep: increase max locks (1000 -> 2000)
+a5b4c58 (origin/wip-9309) lockdep: increase max locks (1000 -> 2000)
 9fac072 documentation: add the mark_unfound_lost delete option
 bec3032 osd: MissingLoc::get_all_missing is const
 e13ddc7 tests: qa/workunits/cephtool/test.sh early fail
@@ -9290,7 +10016,7 @@ f7c0001 common: remove spurious uint32_t in buffer.c
 b0cc869 mds: rename a bunch of metrics
 31ef1a9 mds: set l_mds_req on client request
 06682c4 vstart.sh: debug rgw = 20 on -d
-00c677b (upstream/wip-civetweb-log, origin/wip-civetweb-log) rgw: use a separate callback for civetweb access log
+00c677b (origin/wip-civetweb-log) rgw: use a separate callback for civetweb access log
 850242c rgw: separate civetweb log from rgw log
 f246b56 common/shared_cache: dump weak refs on shutdown
 6cf583c common/shared_cache: take a cct
@@ -9462,7 +10188,7 @@ dd11042 os/FileStore: fix mount/remount force_sync race
 c83c90c rgw: update civetweb submodule
 0d6d1aa init-ceph: don't use bashism
 7df67a5 Fix -Wno-format and -Werror=format-security options clash
-ae0b9f1 (upstream/wip-osd-mon-feature, origin/wip-osd-mon-feature) osd: fix feature requirement for mons
+ae0b9f1 (origin/wip-osd-mon-feature) osd: fix feature requirement for mons
 0db3e51 ReplicatedPG::maybe_handle_cache: do not forward RWORDERED reads
 5040413 ReplicatedPG::cancel_copy: clear cop->obc
 2f0e295 unittest_osdmap: test EC rule and pool features
@@ -9492,7 +10218,7 @@ a1e79db rgw_admin: add --min-rewrite-stripe-size for object rewrite
 46d8c97 doc: Add documentation about Wireshark dissector.
 6a55543 rgw: fix compilation
 f6771f2 shared_cache: use a single lookup for lookup() too
-cec40da (upstream/historic/old-wireshark-dissectors, origin/historic/old-wireshark-dissectors) qa/workunits/cephtool: verify setmaxosd doesn't let you clobber osds
+cec40da (origin/historic/old-wireshark-dissectors) qa/workunits/cephtool: verify setmaxosd doesn't let you clobber osds
 a1c3afb OSDMonitor: Do not allow OSD removal using setmaxosd
 16a4360 rgw: pass set_mtime to copy_obj_data()
 800eff2 rgw: copy_obj_data() uses atomic processor
@@ -9824,7 +10550,7 @@ eb697dd librbd: make rbd_get_parent_info() accept NULL out params
 04d0526 PGMonitor: fix bug in caculating pool avail space
 b08470f configure.ac: link libboost_thread only with json-spirit
 9d23cc6 configure: don't link blkid, udev to everything
-de9cfca (upstream/wip-flush-set, origin/wip-flush-set) Only write bufferhead when it's dirty
+de9cfca (origin/wip-flush-set) Only write bufferhead when it's dirty
 1c26266 ObjectCacher: fix bh_{add,remove} dirty_or_tx_bh accounting
 727ac1d ObjectCacher: fix dirty_or_tx_bh logic in bh_set_state()
 5283cfe Wait tx state buffer in flush_set
@@ -10096,7 +10822,7 @@ c0ffa01 mon: Set crash_replay_interval automatically
 917ef15 test: use 0U with gtest to avoid spurious warnings
 522174b qa: support running under non privileged user
 8697d6a OSD: await_reserved_maps() prior to calling mark_down
-6f97206 (upstream/wip-osd-map-cache-size, origin/wip-osd-map-cache-size) osd: allow osd map cache size to be adjusted at runtime
+6f97206 (origin/wip-osd-map-cache-size) osd: allow osd map cache size to be adjusted at runtime
 bcc09f9 qa/workunits/cephtool/test.sh: sudo ceph daemon
 959f2b2 PGLog: fix clear() to avoid the IndexLog::zero() asserts
 e0d3b78 rgw: fix uninit ofs in RGWObjManifect::obj_iterator
@@ -10873,7 +11599,7 @@ e97b56e doc: New Admin Guide for Ceph Object Storage.
 24c5ea8 osd: check blacklisted clients in ReplicatedPG::do_op()
 f92677c osd: check blacklisted clients in ReplicatedPG::do_op()
 c64b67b ceph-object-corpus: rebase onto firefly corpus
-077e6f8 ceph-object-corpus: v0.80-rc1-35-g4812150
+077e6f86 ceph-object-corpus: v0.80-rc1-35-g4812150
 8bd4e58 Fix out of source builds
 3aee1e0 Fix clone problem
 fd970bb mon: OSDMonitor: disallow nonsensical cache-mode transitions
@@ -23771,7 +24497,7 @@ a53a017 ReplicatedPG: pull() should return PULL_NONE, not false
 f9b7529 osd_types.h: Add constructors for ObjectRecovery*
 7b1c144 test_filestore_idempotent: fix test to create initial object
 6b30cd3 libcephfs: define CEPH_SETATTR_*
-b54bac3 test/encoding/readable.sh: drop bashisms
+b54bac30 test/encoding/readable.sh: drop bashisms
 ffa1de3 filejournal: drop unused variable
 ccf8867 filejournal: aio off by default
 9fded38 test/encoding/readable.sh: skip old version with known incompatibilities
@@ -28243,7 +28969,7 @@ cae43fc Makefile: drop libradosgw_a LDFLAGS
 32fce3c rados_create: correctly handle null id
 f06f4ee librados: always call keyring_init in connect
 586fc66 librados: don't call keyring_init in init_internal
-9e1828af objecter: make response_data bufferlist static
+9e1828a objecter: make response_data bufferlist static
 251fd50 rados_create_internal calls keyring_init
 c548976 rados_create: set id based on parameter
 b1c3321 librados: add rados_create_internal
@@ -32577,7 +33303,7 @@ e439bd3 config: add kill arguments for mds import/export
 b709a72 mds: bracket mds journal events with {start,submit}_entry
 95ee211 todo
 adbd7d8 dropped old aleung mds branch code
-714a9af (upstream/historic/aleung_mds_security, origin/historic/aleung_mds_security) mon: fix allocation of low global_ids after mon restart
+714a9af (origin/historic/aleung_mds_security) mon: fix allocation of low global_ids after mon restart
 89603b6 test_ioctls: print preferred_osd value from GET_LAYOUT ioctl
 54b8537 hash: fix whitespace
 fd0195a mds: set mdr->in[n] in rdlock_path_xlock_dentry
@@ -33152,7 +33878,7 @@ bc9b863 kclient: include fs/{Kconfig,Makefile} in patchset
 3a3ccd8 kclient: checkpatch cleanups
 522f570 mds: fix default layout settings
 38dbaa5 (tag: v0.16) v0.16
-e678fbc msgr: authorizer get/verify callbacks
+e678fbc1 msgr: authorizer get/verify callbacks
 faa5fb5 msgr: get_authorizer hook?
 56f45b4 objecter: Session type
 8b04f9a auth: authorizer for osd
diff --git a/Makefile.in b/Makefile.in
index a611e93..1946b65 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -377,7 +377,7 @@ DIST_SUBDIRS = $(SUBDIRS)
 am__DIST_COMMON = $(srcdir)/Makefile.in $(srcdir)/ceph.spec.in AUTHORS \
 	COPYING ChangeLog INSTALL NEWS README ar-lib compile \
 	config.guess config.sub install-sh ltmain.sh missing \
-	py-compile test-driver
+	test-driver
 DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
 distdir = $(PACKAGE)-$(VERSION)
 top_distdir = $(distdir)
@@ -491,12 +491,14 @@ JDK_CPPFLAGS = @JDK_CPPFLAGS@
 KEYUTILS_LIB = @KEYUTILS_LIB@
 LD = @LD@
 LDFLAGS = @LDFLAGS@
-LIBEDIT_CFLAGS = @LIBEDIT_CFLAGS@
-LIBEDIT_LIBS = @LIBEDIT_LIBS@
+LIBDPDK_CFLAGS = @LIBDPDK_CFLAGS@
+LIBDPDK_LIBS = @LIBDPDK_LIBS@
 LIBFUSE_CFLAGS = @LIBFUSE_CFLAGS@
 LIBFUSE_LIBS = @LIBFUSE_LIBS@
 LIBJEMALLOC = @LIBJEMALLOC@
 LIBOBJS = @LIBOBJS@
+LIBPCIACCESS_CFLAGS = @LIBPCIACCESS_CFLAGS@
+LIBPCIACCESS_LIBS = @LIBPCIACCESS_LIBS@
 LIBROCKSDB_CFLAGS = @LIBROCKSDB_CFLAGS@
 LIBROCKSDB_LIBS = @LIBROCKSDB_LIBS@
 LIBS = @LIBS@
@@ -581,7 +583,6 @@ datarootdir = @datarootdir@
 docdir = @docdir@
 dvidir = @dvidir@
 exec_prefix = @exec_prefix@
-group_rgw = @group_rgw@
 host = @host@
 host_alias = @host_alias@
 host_cpu = @host_cpu@
@@ -612,7 +613,6 @@ sharedstatedir = @sharedstatedir@
 srcdir = @srcdir@
 subdirs = @subdirs@
 sysconfdir = @sysconfdir@
-systemd_libexec_dir = @systemd_libexec_dir@
 systemd_unit_dir = @systemd_unit_dir@
 target = @target@
 target_alias = @target_alias@
@@ -622,7 +622,6 @@ target_vendor = @target_vendor@
 top_build_prefix = @top_build_prefix@
 top_builddir = @top_builddir@
 top_srcdir = @top_srcdir@
-user_rgw = @user_rgw@
 AUTOMAKE_OPTIONS = gnu
 ACLOCAL_AMFLAGS = -I m4
 EXTRA_DIST = autogen.sh ceph.spec.in ceph.spec install-deps.sh \
diff --git a/README b/README
index a3684e4..96fd53c 100644
--- a/README
+++ b/README
@@ -16,7 +16,7 @@ COPYING for a full inventory of licenses by file.
 
 Code contributions must include a valid "Signed-off-by" acknowledging
 the license for the modified or contributed file.  Please see the file
-SubmittingPatches for details on what that means and on how to
+SubmittingPatches.rst for details on what that means and on how to
 generate and submit patches.
 
 We do not require assignment of copyright to contribute code; code is
diff --git a/ceph.spec b/ceph.spec
index 51811b5..65c51c4 100644
--- a/ceph.spec
+++ b/ceph.spec
@@ -56,7 +56,7 @@ restorecon -R /var/log/ceph > /dev/null 2>&1;
 # the _with_systemd variable only implies that we'll install
 # /etc/tmpfiles.d/ceph.conf in order to set up the socket directory in
 # /var/run/ceph.
-%if 0%{?fedora} || 0%{?rhel} >= 7 || 0%{?suse_version} >= 1210
+%if 0%{?fedora} || 0%{?rhel} >= 7 || 0%{?suse_version}
 %global _with_systemd 1
 %{!?tmpfiles_create: %global tmpfiles_create systemd-tmpfiles --create}
 %endif
@@ -66,11 +66,15 @@ restorecon -R /var/log/ceph > /dev/null 2>&1;
 %global _with_lttng 1
 %endif
 
+# unify libexec for all targets
+%global _libexecdir %{_exec_prefix}/lib
+
+
 #################################################################################
 # common
 #################################################################################
 Name:		ceph
-Version:	10.0.3
+Version:	10.0.5
 Release:	0%{?dist}
 Epoch:		1
 Summary:	User space components of the Ceph file system
@@ -86,27 +90,9 @@ Patch0:		init-ceph.in-fedora.patch
 #################################################################################
 # dependencies that apply across all distro families
 #################################################################################
-Requires:	librbd1 = %{epoch}:%{version}-%{release}
-Requires:	librados2 = %{epoch}:%{version}-%{release}
-Requires:	libcephfs1 = %{epoch}:%{version}-%{release}
-Requires:	ceph-common = %{epoch}:%{version}-%{release}
-%if 0%{with selinux}
-Requires:	ceph-selinux = %{epoch}:%{version}-%{release}
-%endif
-Requires:	python-rados = %{epoch}:%{version}-%{release}
-Requires:	python-rbd = %{epoch}:%{version}-%{release}
-Requires:	python-cephfs = %{epoch}:%{version}-%{release}
-Requires:	python
-Requires:	python-requests
-Requires:	grep
-Requires:	xfsprogs
-Requires:	logrotate
-Requires:	parted
-Requires:	util-linux
-Requires:	hdparm
-Requires:	cryptsetup
-Requires:	findutils
-Requires:	which
+Requires:       ceph-osd = %{epoch}:%{version}-%{release}
+Requires:       ceph-mds = %{epoch}:%{version}-%{release}
+Requires:       ceph-mon = %{epoch}:%{version}-%{release}
 Requires(post):	binutils
 %if 0%{with cephfs_java}
 BuildRequires:	java-devel
@@ -132,7 +118,6 @@ BuildRequires:	hdparm
 BuildRequires:	leveldb-devel > 1.2
 BuildRequires:	libaio-devel
 BuildRequires:	libcurl-devel
-BuildRequires:	libedit-devel
 BuildRequires:	libxml2-devel
 BuildRequires:	libblkid-devel >= 2.17
 BuildRequires:	libudev-devel
@@ -165,41 +150,35 @@ BuildRequires:	systemd
 %{?systemd_requires}
 %endif
 PreReq:		%fillup_prereq
-Requires:	python-Flask
 BuildRequires:	net-tools
 BuildRequires:	libbz2-devel
-%if 0%{?suse_version} > 1210
-Requires:	gptfdisk
 %if 0%{with tcmalloc}
 BuildRequires:	gperftools-devel
 %endif
-%else
-Requires:	scsirastools
-BuildRequires:	google-perftools-devel
-%endif
 BuildRequires:	mozilla-nss-devel
 BuildRequires:	keyutils-devel
 BuildRequires:	libatomic-ops-devel
-%else
+BuildRequires:  lsb-release
+%endif
+%if 0%{?fedora} || 0%{?rhel} 
 %if 0%{?_with_systemd}
 Requires:	systemd
 %endif
 BuildRequires:	nss-devel
 BuildRequires:	keyutils-libs-devel
 BuildRequires:	libatomic_ops-devel
-Requires:	gdisk
 Requires(post):	chkconfig
 Requires(preun):	chkconfig
 Requires(preun):	initscripts
 BuildRequires:	gperftools-devel
-Requires:	python-flask
+BuildRequires:  redhat-lsb-core
 %endif
 # boost
 %if 0%{?fedora} || 0%{?rhel} 
 BuildRequires:  boost-random
 %endif
 # python-argparse for distros with Python 2.6 or lower
-%if (0%{?rhel} && 0%{?rhel} <= 6) || (0%{?suse_version} && 0%{?suse_version} <= 1110)
+%if (0%{?rhel} && 0%{?rhel} <= 6)
 BuildRequires:	python-argparse
 %endif
 # lttng and babeltrace for rbd-replay-prep
@@ -238,6 +217,37 @@ on commodity hardware and delivers object, block and file system storage.
 #################################################################################
 # packages
 #################################################################################
+%package base
+Summary:       Ceph Base Package
+Group:         System Environment/Base
+Requires:      ceph-common = %{epoch}:%{version}-%{release}
+Requires:      librbd1 = %{epoch}:%{version}-%{release}
+Requires:      librados2 = %{epoch}:%{version}-%{release}
+Requires:      libcephfs1 = %{epoch}:%{version}-%{release}
+%if 0%{with selinux}
+Requires:      ceph-selinux = %{epoch}:%{version}-%{release}
+%endif
+Requires:      python
+Requires:      python-requests
+Requires:      python-setuptools
+Requires:      grep
+Requires:      xfsprogs
+Requires:      logrotate
+Requires:      parted
+Requires:      util-linux
+Requires:      hdparm
+Requires:      cryptsetup
+Requires:      findutils
+Requires:      which
+%if 0%{?suse_version}
+Requires:      lsb-release
+%endif
+%if 0%{?fedora} || 0%{?rhel}
+Requires:      redhat-lsb-core
+%endif
+%description base
+Base is the package that includes all the files shared amongst ceph servers
+
 %package -n ceph-common
 Summary:	Ceph Common
 Group:		System Environment/Base
@@ -254,11 +264,38 @@ Requires:	python-requests
 Requires(pre):	pwdutils
 %endif
 # python-argparse is only needed in distros with Python 2.6 or lower
-%if (0%{?rhel} && 0%{?rhel} <= 6) || (0%{?suse_version} && 0%{?suse_version} <= 1110)
+%if (0%{?rhel} && 0%{?rhel} <= 6)
 Requires:	python-argparse
 %endif
 %description -n ceph-common
 Common utilities to mount and interact with a ceph storage cluster.
+Comprised of files that are common to Ceph clients and servers.
+
+%package mds
+Summary:	Ceph Metadata Server Daemon
+Group:		System Environment/Base
+Requires:	ceph-base = %{epoch}:%{version}-%{release}
+%description mds
+ceph-mds is the metadata server daemon for the Ceph distributed file system.
+One or more instances of ceph-mds collectively manage the file system
+namespace, coordinating access to the shared OSD cluster.
+
+%package mon
+Summary:	Ceph Monitor Daemon
+Group:		System Environment/Base
+Requires:	ceph-base = %{epoch}:%{version}-%{release}
+# For ceph-rest-api
+%if 0%{?fedora} || 0%{?rhel}
+Requires:      python-flask
+%endif
+%if 0%{?suse_version}
+Requires:      python-Flask
+%endif
+%description mon
+ceph-mon is the cluster monitor daemon for the Ceph distributed file
+system. One or more instances of ceph-mon form a Paxos part-time
+parliament cluster that provides extremely reliable and durable storage
+of cluster membership, configuration, and state.
 
 %package fuse
 Summary:	Ceph fuse-based client
@@ -276,6 +313,16 @@ Requires:	librbd1 = %{epoch}:%{version}-%{release}
 %description -n rbd-fuse
 FUSE based client to map Ceph rbd images to files
 
+%package -n rbd-mirror
+Summary:	Ceph daemon for mirroring RBD images
+Group:		System Environment/Base
+Requires:	%{name}
+Requires:	ceph-common = %{epoch}:%{version}-%{release}
+Requires:	librados2 = %{epoch}:%{version}-%{release}
+%description -n rbd-mirror
+Daemon for mirroring RBD images between Ceph clusters, streaming
+changes asynchronously.
+
 %package -n rbd-nbd
 Summary:	Ceph RBD client base on NBD
 Group:		System Environment/Base
@@ -295,6 +342,12 @@ Requires:	ceph-selinux = %{epoch}:%{version}-%{release}
 Requires:	librados2 = %{epoch}:%{version}-%{release}
 %if 0%{?rhel} || 0%{?fedora}
 Requires:	mailcap
+# python-flask for powerdns
+Requires:	python-flask
+%endif
+%if 0%{?suse_version}
+# python-Flask for powerdns
+Requires:      python-Flask
 %endif
 %description radosgw
 This package is an S3 HTTP REST gateway for the RADOS object store. It
@@ -314,6 +367,22 @@ under Open Cluster Framework (OCF) compliant resource
 managers such as Pacemaker.
 %endif
 
+%package osd
+Summary:	Ceph Object Storage Daemon
+Group:		System Environment/Base
+Requires:	ceph-base = %{epoch}:%{version}-%{release}
+# for sgdisk, used by ceph-disk
+%if 0%{?fedora} || 0%{?rhel}
+Requires:	gdisk
+%endif
+%if 0%{?suse_version}
+Requires:	gptfdisk
+%endif
+%description osd
+ceph-osd is the object storage daemon for the Ceph distributed file
+system.  It is responsible for storing objects on a local file system
+and providing access to them over the network.
+
 %package -n librados2
 Summary:	RADOS distributed object store client library
 Group:		System Environment/Libraries
@@ -588,6 +657,7 @@ export RPM_OPT_FLAGS=`echo $RPM_OPT_FLAGS | sed -e 's/i386/i486/'`
 
 %{configure}	CPPFLAGS="$java_inc" \
 		--prefix=/usr \
+                --libexecdir=%{_libexecdir} \
 		--localstatedir=/var \
 		--sysconfdir=/etc \
 %if 0%{?_with_systemd}
@@ -606,16 +676,6 @@ export RPM_OPT_FLAGS=`echo $RPM_OPT_FLAGS | sed -e 's/i386/i486/'`
 		--with-selinux \
 %endif
 		--with-librocksdb-static=check \
-%if 0%{?rhel} || 0%{?fedora}
-		--with-systemd-libexec-dir=/usr/libexec/ceph \
-		--with-rgw-user=root \
-		--with-rgw-group=root \
-%endif
-%if 0%{?suse_version}
-		--with-systemd-libexec-dir=/usr/lib/ceph/ \
-		--with-rgw-user=wwwrun \
-		--with-rgw-group=www \
-%endif
 		--with-radosgw \
 		$CEPH_EXTRA_CONFIGURE_ARGS \
 		%{?_with_ocf} \
@@ -642,6 +702,20 @@ make %{?_smp_mflags} check-local
 
 %install
 make DESTDIR=$RPM_BUILD_ROOT install
+rm -f $RPM_BUILD_ROOT%{_libdir}/ceph/erasure-code/libec_example.so
+rm -f $RPM_BUILD_ROOT%{_libdir}/ceph/erasure-code/libec_fail_to_initialize.so
+rm -f $RPM_BUILD_ROOT%{_libdir}/ceph/erasure-code/libec_fail_to_register.so
+rm -f $RPM_BUILD_ROOT%{_libdir}/ceph/erasure-code/libec_hangs.so
+rm -f $RPM_BUILD_ROOT%{_libdir}/ceph/erasure-code/libec_missing_entry_point.so
+rm -f $RPM_BUILD_ROOT%{_libdir}/ceph/erasure-code/libec_missing_version.so
+rm -f $RPM_BUILD_ROOT%{_libdir}/ceph/erasure-code/libec_test_jerasure_generic.so
+rm -f $RPM_BUILD_ROOT%{_libdir}/ceph/erasure-code/libec_test_jerasure_neon.so
+rm -f $RPM_BUILD_ROOT%{_libdir}/ceph/erasure-code/libec_test_jerasure_sse3.so
+rm -f $RPM_BUILD_ROOT%{_libdir}/ceph/erasure-code/libec_test_jerasure_sse4.so
+rm -f $RPM_BUILD_ROOT%{_libdir}/ceph/erasure-code/libec_test_shec_generic.so
+rm -f $RPM_BUILD_ROOT%{_libdir}/ceph/erasure-code/libec_test_shec_neon.so
+rm -f $RPM_BUILD_ROOT%{_libdir}/ceph/erasure-code/libec_test_shec_sse3.so
+rm -f $RPM_BUILD_ROOT%{_libdir}/ceph/erasure-code/libec_test_shec_sse4.so
 find $RPM_BUILD_ROOT -type f -name "*.la" -exec rm -f {} ';'
 find $RPM_BUILD_ROOT -type f -name "*.a" -exec rm -f {} ';'
 install -D src/etc-rbdmap $RPM_BUILD_ROOT%{_sysconfdir}/ceph/rbdmap
@@ -718,120 +792,29 @@ mkdir -p $RPM_BUILD_ROOT%{_localstatedir}/lib/ceph/bootstrap-rgw
 %clean
 rm -rf $RPM_BUILD_ROOT
 
-%pre
-%if 0%{?_with_systemd}
-  %if 0%{?suse_version}
-    # service_add_pre and friends don't work with parameterized systemd service
-    # instances, only with single services or targets, so we always pass
-    # ceph.target to these macros
-    %service_add_pre ceph.target
-  %endif
-%endif
-
-
-%post
-/sbin/ldconfig
-%if 0%{?_with_systemd}
-  %if 0%{?suse_version}
-    %fillup_only
-    %service_add_post ceph.target
-  %endif
-%else
-  /sbin/chkconfig --add ceph
-%endif
-
-%preun
-%if 0%{?_with_systemd}
-  %if 0%{?suse_version}
-    %service_del_preun ceph.target
-  %endif
-  # Disable and stop on removal.
-  if [ $1 = 0 ] ; then
-    SERVICE_LIST=$(systemctl | grep -E '^ceph-mon@|^ceph-create-keys@|^ceph-osd@|^ceph-mds@|^ceph-disk-'  | cut -d' ' -f1)
-    if [ -n "$SERVICE_LIST" ]; then
-      for SERVICE in $SERVICE_LIST; do
-        /usr/bin/systemctl --no-reload disable $SERVICE > /dev/null 2>&1 || :
-        /usr/bin/systemctl stop $SERVICE > /dev/null 2>&1 || :
-      done
-    fi
-  fi
-%else
-  %if 0%{?rhel} || 0%{?fedora}
-    if [ $1 = 0 ] ; then
-      /sbin/service ceph stop >/dev/null 2>&1
-      /sbin/chkconfig --del ceph
-    fi
-  %endif
-%endif
-
-%postun
-/sbin/ldconfig
-%if 0%{?_with_systemd}
-  if [ $1 = 1 ] ; then
-    # Restart on upgrade, but only if "CEPH_AUTO_RESTART_ON_UPGRADE" is set to
-    # "yes". In any case: if units are not running, do not touch them.
-    SYSCONF_CEPH=/etc/sysconfig/ceph
-    if [ -f $SYSCONF_CEPH -a -r $SYSCONF_CEPH ] ; then
-      source $SYSCONF_CEPH
-    fi
-    if [ "X$CEPH_AUTO_RESTART_ON_UPGRADE" = "Xyes" ] ; then
-      SERVICE_LIST=$(systemctl | grep -E '^ceph-mon@|^ceph-create-keys@|^ceph-osd@|^ceph-mds@|^ceph-disk-'  | cut -d' ' -f1)
-      if [ -n "$SERVICE_LIST" ]; then
-        for SERVICE in $SERVICE_LIST; do
-          /usr/bin/systemctl try-restart $SERVICE > /dev/null 2>&1 || :
-        done
-      fi
-    fi
-  fi
-%endif
-
 #################################################################################
-# files
+# files and systemd scriptlets
 #################################################################################
 %files
+
+%files base
 %defattr(-,root,root,-)
 %docdir %{_docdir}
 %dir %{_docdir}/ceph
 %{_docdir}/ceph/sample.ceph.conf
 %{_docdir}/ceph/sample.fetch_config
-%{_bindir}/cephfs
-%{_bindir}/ceph-clsinfo
-%{_bindir}/ceph-rest-api
-%{python_sitelib}/ceph_rest_api.py*
 %{_bindir}/crushtool
 %{_bindir}/monmaptool
 %{_bindir}/osdmaptool
 %{_bindir}/ceph-run
-%{_bindir}/ceph-mon
-%{_bindir}/ceph-mds
-%{_bindir}/ceph-objectstore-tool
-%{_bindir}/ceph-bluefs-tool
-%{_bindir}/ceph-osd
 %{_bindir}/ceph-detect-init
-%{_bindir}/librados-config
 %{_bindir}/ceph-client-debug
-%{_bindir}/cephfs-journal-tool
-%{_bindir}/cephfs-table-tool
-%{_bindir}/cephfs-data-scan
-%{_bindir}/ceph-debugpack
-%{_bindir}/ceph-coverage
+%{_bindir}/cephfs
 %if 0%{?_with_systemd}
-%{_unitdir}/ceph-mds at .service
-%{_unitdir}/ceph-mon at .service
 %{_unitdir}/ceph-create-keys at .service
-%{_unitdir}/ceph-osd at .service
-%{_unitdir}/ceph-radosgw at .service
-%{_unitdir}/ceph-disk at .service
-%{_unitdir}/ceph.target
-%{_unitdir}/ceph-osd.target
-%{_unitdir}/ceph-mon.target
-%{_unitdir}/ceph-mds.target
-%{_unitdir}/ceph-radosgw.target
 %else
 %{_initrddir}/ceph
 %endif
-%{_sbindir}/ceph-disk
-%{_sbindir}/ceph-disk-udev
 %{_sbindir}/ceph-create-keys
 %{_sbindir}/rcceph
 %if 0%{?rhel} >= 7 || 0%{?fedora} || 0%{?suse_version}
@@ -839,25 +822,11 @@ rm -rf $RPM_BUILD_ROOT
 %else
 /sbin/mount.ceph
 %endif
-%dir %{_libdir}/ceph
-%{_libdir}/ceph/ceph_common.sh
-%{_libexecdir}/ceph/ceph-osd-prestart.sh
+%dir %{_libexecdir}/ceph
+%{_libexecdir}/ceph/ceph_common.sh
 %dir %{_libdir}/rados-classes
-%{_libdir}/rados-classes/libcls_cephfs.so*
-%{_libdir}/rados-classes/libcls_rbd.so*
-%{_libdir}/rados-classes/libcls_hello.so*
-%{_libdir}/rados-classes/libcls_numops.so*
-%{_libdir}/rados-classes/libcls_rgw.so*
-%{_libdir}/rados-classes/libcls_lock.so*
-%{_libdir}/rados-classes/libcls_kvs.so*
-%{_libdir}/rados-classes/libcls_refcount.so*
-%{_libdir}/rados-classes/libcls_log.so*
-%{_libdir}/rados-classes/libcls_replica_log.so*
-%{_libdir}/rados-classes/libcls_statelog.so*
-%{_libdir}/rados-classes/libcls_timeindex.so*
-%{_libdir}/rados-classes/libcls_user.so*
-%{_libdir}/rados-classes/libcls_version.so*
-%{_libdir}/rados-classes/libcls_journal.so*
+%{_libdir}/rados-classes/*
+%dir %{_libdir}/ceph
 %dir %{_libdir}/ceph/erasure-code
 %{_libdir}/ceph/erasure-code/libec_*.so*
 %dir %{_libdir}/ceph/compressor
@@ -866,8 +835,6 @@ rm -rf $RPM_BUILD_ROOT
 %{_libdir}/libos_tp.so*
 %{_libdir}/libosd_tp.so*
 %endif
-%{_udevrulesdir}/60-ceph-partuuid-workaround.rules
-%{_udevrulesdir}/95-ceph-osd.rules
 %config %{_sysconfdir}/bash_completion.d/ceph
 %config(noreplace) %{_sysconfdir}/logrotate.d/ceph
 %if 0%{?fedora} || 0%{?rhel}
@@ -878,29 +845,20 @@ rm -rf $RPM_BUILD_ROOT
 %config %{_sysconfdir}/sysconfig/SuSEfirewall2.d/services/ceph-mon
 %config %{_sysconfdir}/sysconfig/SuSEfirewall2.d/services/ceph-osd-mds
 %endif
+%{_unitdir}/ceph.target
 %{python_sitelib}/ceph_detect_init*
+%{python_sitelib}/ceph_disk*
 %{_mandir}/man8/ceph-deploy.8*
 %{_mandir}/man8/ceph-detect-init.8*
-%{_mandir}/man8/ceph-disk.8*
 %{_mandir}/man8/ceph-create-keys.8*
-%{_mandir}/man8/ceph-mon.8*
-%{_mandir}/man8/ceph-mds.8*
-%{_mandir}/man8/ceph-osd.8*
 %{_mandir}/man8/ceph-run.8*
-%{_mandir}/man8/ceph-rest-api.8*
 %{_mandir}/man8/crushtool.8*
 %{_mandir}/man8/osdmaptool.8*
 %{_mandir}/man8/monmaptool.8*
 %{_mandir}/man8/cephfs.8*
 %{_mandir}/man8/mount.ceph.8*
-%{_mandir}/man8/ceph-debugpack.8*
-%{_mandir}/man8/ceph-clsinfo.8*
-%{_mandir}/man8/librados-config.8*
 #set up placeholder directories
 %attr(750,ceph,ceph) %dir %{_localstatedir}/lib/ceph/tmp
-%attr(750,ceph,ceph) %dir %{_localstatedir}/lib/ceph/mon
-%attr(750,ceph,ceph) %dir %{_localstatedir}/lib/ceph/osd
-%attr(750,ceph,ceph) %dir %{_localstatedir}/lib/ceph/mds
 %attr(750,ceph,ceph) %dir %{_localstatedir}/lib/ceph/bootstrap-osd
 %attr(750,ceph,ceph) %dir %{_localstatedir}/lib/ceph/bootstrap-mds
 %attr(750,ceph,ceph) %dir %{_localstatedir}/lib/ceph/bootstrap-rgw
@@ -908,8 +866,74 @@ rm -rf $RPM_BUILD_ROOT
 %attr(770,ceph,ceph) %dir %{_localstatedir}/run/ceph
 %endif
 
+%pre base
+%if 0%{?_with_systemd}
+  %if 0%{?suse_version}
+    # service_add_pre and friends don't work with parameterized systemd service
+    # instances, only with single services or targets, so we always pass
+    # ceph.target to these macros
+    %service_add_pre ceph.target
+  %endif
+%endif
+
+%post base
+/sbin/ldconfig
+%if 0%{?_with_systemd}
+  %if 0%{?suse_version}
+    %fillup_only
+    %service_add_post ceph.target
+  %endif
+%else
+  /sbin/chkconfig --add ceph
+%endif
+
+%preun base
+%if 0%{?_with_systemd}
+  %if 0%{?suse_version}
+    %service_del_preun ceph.target
+  %endif
+  # Disable and stop on removal.
+  if [ $1 = 0 ] ; then
+    SERVICE_LIST=$(systemctl | grep -E '^ceph-mon@|^ceph-create-keys@|^ceph-osd@|^ceph-mds@|^ceph-disk-'  | cut -d' ' -f1)
+    if [ -n "$SERVICE_LIST" ]; then
+      for SERVICE in $SERVICE_LIST; do
+        /usr/bin/systemctl --no-reload disable $SERVICE > /dev/null 2>&1 || :
+        /usr/bin/systemctl stop $SERVICE > /dev/null 2>&1 || :
+      done
+    fi
+  fi
+%else
+  %if 0%{?rhel} || 0%{?fedora}
+    if [ $1 = 0 ] ; then
+      /sbin/service ceph stop >/dev/null 2>&1
+      /sbin/chkconfig --del ceph
+    fi
+  %endif
+%endif
+
+%postun base
+/sbin/ldconfig
+%if 0%{?_with_systemd}
+  if [ $1 = 1 ] ; then
+    # Restart on upgrade, but only if "CEPH_AUTO_RESTART_ON_UPGRADE" is set to
+    # "yes". In any case: if units are not running, do not touch them.
+    SYSCONF_CEPH=/etc/sysconfig/ceph
+    if [ -f $SYSCONF_CEPH -a -r $SYSCONF_CEPH ] ; then
+      source $SYSCONF_CEPH
+    fi
+    if [ "X$CEPH_AUTO_RESTART_ON_UPGRADE" = "Xyes" ] ; then
+      SERVICE_LIST=$(systemctl | grep -E '^ceph-mon@|^ceph-create-keys@|^ceph-osd@|^ceph-mds@|^ceph-disk-'  | cut -d' ' -f1)
+      if [ -n "$SERVICE_LIST" ]; then
+        for SERVICE in $SERVICE_LIST; do
+          /usr/bin/systemctl try-restart $SERVICE > /dev/null 2>&1 || :
+        done
+      fi
+    fi
+  fi
+%endif
+
 #################################################################################
-%files -n ceph-common
+%files common
 %defattr(-,root,root,-)
 %{_bindir}/ceph
 %{_bindir}/ceph-authtool
@@ -943,12 +967,11 @@ rm -rf $RPM_BUILD_ROOT
 %{_mandir}/man8/rbd-replay.8*
 %{_mandir}/man8/rbd-replay-many.8*
 %{_mandir}/man8/rbd-replay-prep.8*
+%dir %{_datadir}/ceph/
 %{_datadir}/ceph/known_hosts_drop.ceph.com
 %{_datadir}/ceph/id_dsa_drop.ceph.com
 %{_datadir}/ceph/id_dsa_drop.ceph.com.pub
 %dir %{_sysconfdir}/ceph/
-%dir %{_datarootdir}/ceph/
-%dir %{_libexecdir}/ceph/
 %config %{_sysconfdir}/bash_completion.d/rados
 %config %{_sysconfdir}/bash_completion.d/rbd
 %config(noreplace) %{_sysconfdir}/ceph/rbdmap
@@ -963,7 +986,7 @@ rm -rf $RPM_BUILD_ROOT
 %attr(3770,ceph,ceph) %dir %{_localstatedir}/log/ceph/
 %attr(750,ceph,ceph) %dir %{_localstatedir}/lib/ceph/
 
-%pre -n ceph-common
+%pre common
 CEPH_GROUP_ID=""
 CEPH_USER_ID=""
 %if 0%{?rhel} || 0%{?fedora}
@@ -980,12 +1003,12 @@ getent passwd ceph >/dev/null || useradd -r -g ceph -d %{_localstatedir}/lib/cep
 %endif
 exit 0
 
-%post -n ceph-common
+%post common
 %if 0%{?_with_systemd}
 %tmpfiles_create %{_tmpfilesdir}/ceph-common.conf
 %endif
 
-%postun -n ceph-common
+%postun common
 # Package removal cleanup
 if [ "$1" -eq "0" ] ; then
     rm -rf /var/log/ceph
@@ -993,6 +1016,36 @@ if [ "$1" -eq "0" ] ; then
 fi
 
 #################################################################################
+%files mds
+%{_bindir}/ceph-mds
+%{_bindir}/cephfs-journal-tool
+%{_bindir}/cephfs-table-tool
+%{_bindir}/cephfs-data-scan
+%{_mandir}/man8/ceph-mds.8*
+%if 0%{?_with_systemd}
+%{_unitdir}/ceph-mds at .service
+%{_unitdir}/ceph-mds.target
+%else
+%{_initrddir}/ceph
+%endif
+%attr(750,ceph,ceph) %dir %{_localstatedir}/lib/ceph/mds
+
+#################################################################################
+%files mon
+%{_bindir}/ceph-mon
+%{_bindir}/ceph-rest-api
+%{_mandir}/man8/ceph-mon.8*
+%{_mandir}/man8/ceph-rest-api.8*
+%{python_sitelib}/ceph_rest_api.py*
+%if 0%{?_with_systemd}
+%{_unitdir}/ceph-mon at .service
+%{_unitdir}/ceph-mon.target
+%else
+%{_initrddir}/ceph
+%endif
+%attr(750,ceph,ceph) %dir %{_localstatedir}/lib/ceph/mon
+
+#################################################################################
 %files fuse
 %defattr(-,root,root,-)
 %{_bindir}/ceph-fuse
@@ -1010,6 +1063,12 @@ fi
 %{_mandir}/man8/rbd-fuse.8*
 
 #################################################################################
+%files -n rbd-mirror
+%defattr(-,root,root,-)
+%{_bindir}/rbd-mirror
+%{_mandir}/man8/rbd-mirror.8*
+
+#################################################################################
 %files -n rbd-nbd
 %defattr(-,root,root,-)
 %{_bindir}/rbd-nbd
@@ -1026,6 +1085,8 @@ fi
 %config %{_sysconfdir}/bash_completion.d/radosgw-admin
 %dir %{_localstatedir}/lib/ceph/radosgw
 %if 0%{?_with_systemd}
+%{_unitdir}/ceph-radosgw at .service
+%{_unitdir}/ceph-radosgw.target
 %else
 %{_initrddir}/ceph-radosgw
 %{_sbindir}/rcceph-radosgw
@@ -1076,6 +1137,29 @@ fi
 %endif
 
 #################################################################################
+%files osd
+%{_bindir}/ceph-clsinfo
+%{_bindir}/ceph-bluefs-tool
+%{_bindir}/ceph-objectstore-tool
+%{_bindir}/ceph-osd
+%{_sbindir}/ceph-disk
+%{_sbindir}/ceph-disk-udev
+%{_libexecdir}/ceph/ceph-osd-prestart.sh
+%{_udevrulesdir}/60-ceph-partuuid-workaround.rules
+%{_udevrulesdir}/95-ceph-osd.rules
+%{_mandir}/man8/ceph-clsinfo.8*
+%{_mandir}/man8/ceph-disk.8*
+%{_mandir}/man8/ceph-osd.8*
+%if 0%{?_with_systemd}
+%{_unitdir}/ceph-osd at .service
+%{_unitdir}/ceph-osd.target
+%{_unitdir}/ceph-disk at .service
+%else
+%{_initrddir}/ceph
+%endif
+%attr(750,ceph,ceph) %dir %{_localstatedir}/lib/ceph/osd
+
+#################################################################################
 %if %{with ocf}
 %files resource-agents
 %defattr(0755,root,root,-)
@@ -1116,6 +1200,8 @@ fi
 %if 0%{?_with_lttng}
 %{_libdir}/librados_tp.so
 %endif
+%{_bindir}/librados-config
+%{_mandir}/man8/librados-config.8*
 
 #################################################################################
 %files -n python-rados
@@ -1226,9 +1312,12 @@ ln -sf %{_libdir}/librbd.so.1 /usr/lib64/qemu/librbd.so.1
 %{_bindir}/ceph_test_*
 %{_bindir}/ceph_tpbench
 %{_bindir}/ceph_xattr_bench
+%{_bindir}/ceph-coverage
 %{_bindir}/ceph-monstore-tool
 %{_bindir}/ceph-osdomap-tool
 %{_bindir}/ceph-kvstore-tool
+%{_bindir}/ceph-debugpack
+%{_mandir}/man8/ceph-debugpack.8*
 %dir %{_libdir}/ceph
 %{_libdir}/ceph/ceph-monstore-update-crush.sh
 
@@ -1377,4 +1466,5 @@ exit 0
 # We need an empty %%files list for python-ceph-compat, to tell rpmbuild to
 # actually build this meta package.
 
+
 %changelog
diff --git a/ceph.spec.in b/ceph.spec.in
index 487232c..498eac4 100644
--- a/ceph.spec.in
+++ b/ceph.spec.in
@@ -56,7 +56,7 @@ restorecon -R /var/log/ceph > /dev/null 2>&1;
 # the _with_systemd variable only implies that we'll install
 # /etc/tmpfiles.d/ceph.conf in order to set up the socket directory in
 # /var/run/ceph.
-%if 0%{?fedora} || 0%{?rhel} >= 7 || 0%{?suse_version} >= 1210
+%if 0%{?fedora} || 0%{?rhel} >= 7 || 0%{?suse_version}
 %global _with_systemd 1
 %{!?tmpfiles_create: %global tmpfiles_create systemd-tmpfiles --create}
 %endif
@@ -66,6 +66,10 @@ restorecon -R /var/log/ceph > /dev/null 2>&1;
 %global _with_lttng 1
 %endif
 
+# unify libexec for all targets
+%global _libexecdir %{_exec_prefix}/lib
+
+
 #################################################################################
 # common
 #################################################################################
@@ -86,27 +90,9 @@ Patch0:		init-ceph.in-fedora.patch
 #################################################################################
 # dependencies that apply across all distro families
 #################################################################################
-Requires:	librbd1 = %{epoch}:%{version}-%{release}
-Requires:	librados2 = %{epoch}:%{version}-%{release}
-Requires:	libcephfs1 = %{epoch}:%{version}-%{release}
-Requires:	ceph-common = %{epoch}:%{version}-%{release}
-%if 0%{with selinux}
-Requires:	ceph-selinux = %{epoch}:%{version}-%{release}
-%endif
-Requires:	python-rados = %{epoch}:%{version}-%{release}
-Requires:	python-rbd = %{epoch}:%{version}-%{release}
-Requires:	python-cephfs = %{epoch}:%{version}-%{release}
-Requires:	python
-Requires:	python-requests
-Requires:	grep
-Requires:	xfsprogs
-Requires:	logrotate
-Requires:	parted
-Requires:	util-linux
-Requires:	hdparm
-Requires:	cryptsetup
-Requires:	findutils
-Requires:	which
+Requires:       ceph-osd = %{epoch}:%{version}-%{release}
+Requires:       ceph-mds = %{epoch}:%{version}-%{release}
+Requires:       ceph-mon = %{epoch}:%{version}-%{release}
 Requires(post):	binutils
 %if 0%{with cephfs_java}
 BuildRequires:	java-devel
@@ -132,7 +118,6 @@ BuildRequires:	hdparm
 BuildRequires:	leveldb-devel > 1.2
 BuildRequires:	libaio-devel
 BuildRequires:	libcurl-devel
-BuildRequires:	libedit-devel
 BuildRequires:	libxml2-devel
 BuildRequires:	libblkid-devel >= 2.17
 BuildRequires:	libudev-devel
@@ -165,41 +150,35 @@ BuildRequires:	systemd
 %{?systemd_requires}
 %endif
 PreReq:		%fillup_prereq
-Requires:	python-Flask
 BuildRequires:	net-tools
 BuildRequires:	libbz2-devel
-%if 0%{?suse_version} > 1210
-Requires:	gptfdisk
 %if 0%{with tcmalloc}
 BuildRequires:	gperftools-devel
 %endif
-%else
-Requires:	scsirastools
-BuildRequires:	google-perftools-devel
-%endif
 BuildRequires:	mozilla-nss-devel
 BuildRequires:	keyutils-devel
 BuildRequires:	libatomic-ops-devel
-%else
+BuildRequires:  lsb-release
+%endif
+%if 0%{?fedora} || 0%{?rhel} 
 %if 0%{?_with_systemd}
 Requires:	systemd
 %endif
 BuildRequires:	nss-devel
 BuildRequires:	keyutils-libs-devel
 BuildRequires:	libatomic_ops-devel
-Requires:	gdisk
 Requires(post):	chkconfig
 Requires(preun):	chkconfig
 Requires(preun):	initscripts
 BuildRequires:	gperftools-devel
-Requires:	python-flask
+BuildRequires:  redhat-lsb-core
 %endif
 # boost
 %if 0%{?fedora} || 0%{?rhel} 
 BuildRequires:  boost-random
 %endif
 # python-argparse for distros with Python 2.6 or lower
-%if (0%{?rhel} && 0%{?rhel} <= 6) || (0%{?suse_version} && 0%{?suse_version} <= 1110)
+%if (0%{?rhel} && 0%{?rhel} <= 6)
 BuildRequires:	python-argparse
 %endif
 # lttng and babeltrace for rbd-replay-prep
@@ -238,6 +217,37 @@ on commodity hardware and delivers object, block and file system storage.
 #################################################################################
 # packages
 #################################################################################
+%package base
+Summary:       Ceph Base Package
+Group:         System Environment/Base
+Requires:      ceph-common = %{epoch}:%{version}-%{release}
+Requires:      librbd1 = %{epoch}:%{version}-%{release}
+Requires:      librados2 = %{epoch}:%{version}-%{release}
+Requires:      libcephfs1 = %{epoch}:%{version}-%{release}
+%if 0%{with selinux}
+Requires:      ceph-selinux = %{epoch}:%{version}-%{release}
+%endif
+Requires:      python
+Requires:      python-requests
+Requires:      python-setuptools
+Requires:      grep
+Requires:      xfsprogs
+Requires:      logrotate
+Requires:      parted
+Requires:      util-linux
+Requires:      hdparm
+Requires:      cryptsetup
+Requires:      findutils
+Requires:      which
+%if 0%{?suse_version}
+Requires:      lsb-release
+%endif
+%if 0%{?fedora} || 0%{?rhel}
+Requires:      redhat-lsb-core
+%endif
+%description base
+Base is the package that includes all the files shared amongst ceph servers
+
 %package -n ceph-common
 Summary:	Ceph Common
 Group:		System Environment/Base
@@ -254,11 +264,38 @@ Requires:	python-requests
 Requires(pre):	pwdutils
 %endif
 # python-argparse is only needed in distros with Python 2.6 or lower
-%if (0%{?rhel} && 0%{?rhel} <= 6) || (0%{?suse_version} && 0%{?suse_version} <= 1110)
+%if (0%{?rhel} && 0%{?rhel} <= 6)
 Requires:	python-argparse
 %endif
 %description -n ceph-common
 Common utilities to mount and interact with a ceph storage cluster.
+Comprised of files that are common to Ceph clients and servers.
+
+%package mds
+Summary:	Ceph Metadata Server Daemon
+Group:		System Environment/Base
+Requires:	ceph-base = %{epoch}:%{version}-%{release}
+%description mds
+ceph-mds is the metadata server daemon for the Ceph distributed file system.
+One or more instances of ceph-mds collectively manage the file system
+namespace, coordinating access to the shared OSD cluster.
+
+%package mon
+Summary:	Ceph Monitor Daemon
+Group:		System Environment/Base
+Requires:	ceph-base = %{epoch}:%{version}-%{release}
+# For ceph-rest-api
+%if 0%{?fedora} || 0%{?rhel}
+Requires:      python-flask
+%endif
+%if 0%{?suse_version}
+Requires:      python-Flask
+%endif
+%description mon
+ceph-mon is the cluster monitor daemon for the Ceph distributed file
+system. One or more instances of ceph-mon form a Paxos part-time
+parliament cluster that provides extremely reliable and durable storage
+of cluster membership, configuration, and state.
 
 %package fuse
 Summary:	Ceph fuse-based client
@@ -276,6 +313,16 @@ Requires:	librbd1 = %{epoch}:%{version}-%{release}
 %description -n rbd-fuse
 FUSE based client to map Ceph rbd images to files
 
+%package -n rbd-mirror
+Summary:	Ceph daemon for mirroring RBD images
+Group:		System Environment/Base
+Requires:	%{name}
+Requires:	ceph-common = %{epoch}:%{version}-%{release}
+Requires:	librados2 = %{epoch}:%{version}-%{release}
+%description -n rbd-mirror
+Daemon for mirroring RBD images between Ceph clusters, streaming
+changes asynchronously.
+
 %package -n rbd-nbd
 Summary:	Ceph RBD client base on NBD
 Group:		System Environment/Base
@@ -295,6 +342,12 @@ Requires:	ceph-selinux = %{epoch}:%{version}-%{release}
 Requires:	librados2 = %{epoch}:%{version}-%{release}
 %if 0%{?rhel} || 0%{?fedora}
 Requires:	mailcap
+# python-flask for powerdns
+Requires:	python-flask
+%endif
+%if 0%{?suse_version}
+# python-Flask for powerdns
+Requires:      python-Flask
 %endif
 %description radosgw
 This package is an S3 HTTP REST gateway for the RADOS object store. It
@@ -314,6 +367,22 @@ under Open Cluster Framework (OCF) compliant resource
 managers such as Pacemaker.
 %endif
 
+%package osd
+Summary:	Ceph Object Storage Daemon
+Group:		System Environment/Base
+Requires:	ceph-base = %{epoch}:%{version}-%{release}
+# for sgdisk, used by ceph-disk
+%if 0%{?fedora} || 0%{?rhel}
+Requires:	gdisk
+%endif
+%if 0%{?suse_version}
+Requires:	gptfdisk
+%endif
+%description osd
+ceph-osd is the object storage daemon for the Ceph distributed file
+system.  It is responsible for storing objects on a local file system
+and providing access to them over the network.
+
 %package -n librados2
 Summary:	RADOS distributed object store client library
 Group:		System Environment/Libraries
@@ -588,6 +657,7 @@ export RPM_OPT_FLAGS=`echo $RPM_OPT_FLAGS | sed -e 's/i386/i486/'`
 
 %{configure}	CPPFLAGS="$java_inc" \
 		--prefix=/usr \
+                --libexecdir=%{_libexecdir} \
 		--localstatedir=/var \
 		--sysconfdir=/etc \
 %if 0%{?_with_systemd}
@@ -606,16 +676,6 @@ export RPM_OPT_FLAGS=`echo $RPM_OPT_FLAGS | sed -e 's/i386/i486/'`
 		--with-selinux \
 %endif
 		--with-librocksdb-static=check \
-%if 0%{?rhel} || 0%{?fedora}
-		--with-systemd-libexec-dir=/usr/libexec/ceph \
-		--with-rgw-user=root \
-		--with-rgw-group=root \
-%endif
-%if 0%{?suse_version}
-		--with-systemd-libexec-dir=/usr/lib/ceph/ \
-		--with-rgw-user=wwwrun \
-		--with-rgw-group=www \
-%endif
 		--with-radosgw \
 		$CEPH_EXTRA_CONFIGURE_ARGS \
 		%{?_with_ocf} \
@@ -642,6 +702,20 @@ make %{?_smp_mflags} check-local
 
 %install
 make DESTDIR=$RPM_BUILD_ROOT install
+rm -f $RPM_BUILD_ROOT%{_libdir}/ceph/erasure-code/libec_example.so
+rm -f $RPM_BUILD_ROOT%{_libdir}/ceph/erasure-code/libec_fail_to_initialize.so
+rm -f $RPM_BUILD_ROOT%{_libdir}/ceph/erasure-code/libec_fail_to_register.so
+rm -f $RPM_BUILD_ROOT%{_libdir}/ceph/erasure-code/libec_hangs.so
+rm -f $RPM_BUILD_ROOT%{_libdir}/ceph/erasure-code/libec_missing_entry_point.so
+rm -f $RPM_BUILD_ROOT%{_libdir}/ceph/erasure-code/libec_missing_version.so
+rm -f $RPM_BUILD_ROOT%{_libdir}/ceph/erasure-code/libec_test_jerasure_generic.so
+rm -f $RPM_BUILD_ROOT%{_libdir}/ceph/erasure-code/libec_test_jerasure_neon.so
+rm -f $RPM_BUILD_ROOT%{_libdir}/ceph/erasure-code/libec_test_jerasure_sse3.so
+rm -f $RPM_BUILD_ROOT%{_libdir}/ceph/erasure-code/libec_test_jerasure_sse4.so
+rm -f $RPM_BUILD_ROOT%{_libdir}/ceph/erasure-code/libec_test_shec_generic.so
+rm -f $RPM_BUILD_ROOT%{_libdir}/ceph/erasure-code/libec_test_shec_neon.so
+rm -f $RPM_BUILD_ROOT%{_libdir}/ceph/erasure-code/libec_test_shec_sse3.so
+rm -f $RPM_BUILD_ROOT%{_libdir}/ceph/erasure-code/libec_test_shec_sse4.so
 find $RPM_BUILD_ROOT -type f -name "*.la" -exec rm -f {} ';'
 find $RPM_BUILD_ROOT -type f -name "*.a" -exec rm -f {} ';'
 install -D src/etc-rbdmap $RPM_BUILD_ROOT%{_sysconfdir}/ceph/rbdmap
@@ -718,120 +792,29 @@ mkdir -p $RPM_BUILD_ROOT%{_localstatedir}/lib/ceph/bootstrap-rgw
 %clean
 rm -rf $RPM_BUILD_ROOT
 
-%pre
-%if 0%{?_with_systemd}
-  %if 0%{?suse_version}
-    # service_add_pre and friends don't work with parameterized systemd service
-    # instances, only with single services or targets, so we always pass
-    # ceph.target to these macros
-    %service_add_pre ceph.target
-  %endif
-%endif
-
-
-%post
-/sbin/ldconfig
-%if 0%{?_with_systemd}
-  %if 0%{?suse_version}
-    %fillup_only
-    %service_add_post ceph.target
-  %endif
-%else
-  /sbin/chkconfig --add ceph
-%endif
-
-%preun
-%if 0%{?_with_systemd}
-  %if 0%{?suse_version}
-    %service_del_preun ceph.target
-  %endif
-  # Disable and stop on removal.
-  if [ $1 = 0 ] ; then
-    SERVICE_LIST=$(systemctl | grep -E '^ceph-mon@|^ceph-create-keys@|^ceph-osd@|^ceph-mds@|^ceph-disk-'  | cut -d' ' -f1)
-    if [ -n "$SERVICE_LIST" ]; then
-      for SERVICE in $SERVICE_LIST; do
-        /usr/bin/systemctl --no-reload disable $SERVICE > /dev/null 2>&1 || :
-        /usr/bin/systemctl stop $SERVICE > /dev/null 2>&1 || :
-      done
-    fi
-  fi
-%else
-  %if 0%{?rhel} || 0%{?fedora}
-    if [ $1 = 0 ] ; then
-      /sbin/service ceph stop >/dev/null 2>&1
-      /sbin/chkconfig --del ceph
-    fi
-  %endif
-%endif
-
-%postun
-/sbin/ldconfig
-%if 0%{?_with_systemd}
-  if [ $1 = 1 ] ; then
-    # Restart on upgrade, but only if "CEPH_AUTO_RESTART_ON_UPGRADE" is set to
-    # "yes". In any case: if units are not running, do not touch them.
-    SYSCONF_CEPH=/etc/sysconfig/ceph
-    if [ -f $SYSCONF_CEPH -a -r $SYSCONF_CEPH ] ; then
-      source $SYSCONF_CEPH
-    fi
-    if [ "X$CEPH_AUTO_RESTART_ON_UPGRADE" = "Xyes" ] ; then
-      SERVICE_LIST=$(systemctl | grep -E '^ceph-mon@|^ceph-create-keys@|^ceph-osd@|^ceph-mds@|^ceph-disk-'  | cut -d' ' -f1)
-      if [ -n "$SERVICE_LIST" ]; then
-        for SERVICE in $SERVICE_LIST; do
-          /usr/bin/systemctl try-restart $SERVICE > /dev/null 2>&1 || :
-        done
-      fi
-    fi
-  fi
-%endif
-
 #################################################################################
-# files
+# files and systemd scriptlets
 #################################################################################
 %files
+
+%files base
 %defattr(-,root,root,-)
 %docdir %{_docdir}
 %dir %{_docdir}/ceph
 %{_docdir}/ceph/sample.ceph.conf
 %{_docdir}/ceph/sample.fetch_config
-%{_bindir}/cephfs
-%{_bindir}/ceph-clsinfo
-%{_bindir}/ceph-rest-api
-%{python_sitelib}/ceph_rest_api.py*
 %{_bindir}/crushtool
 %{_bindir}/monmaptool
 %{_bindir}/osdmaptool
 %{_bindir}/ceph-run
-%{_bindir}/ceph-mon
-%{_bindir}/ceph-mds
-%{_bindir}/ceph-objectstore-tool
-%{_bindir}/ceph-bluefs-tool
-%{_bindir}/ceph-osd
 %{_bindir}/ceph-detect-init
-%{_bindir}/librados-config
 %{_bindir}/ceph-client-debug
-%{_bindir}/cephfs-journal-tool
-%{_bindir}/cephfs-table-tool
-%{_bindir}/cephfs-data-scan
-%{_bindir}/ceph-debugpack
-%{_bindir}/ceph-coverage
+%{_bindir}/cephfs
 %if 0%{?_with_systemd}
-%{_unitdir}/ceph-mds at .service
-%{_unitdir}/ceph-mon at .service
 %{_unitdir}/ceph-create-keys at .service
-%{_unitdir}/ceph-osd at .service
-%{_unitdir}/ceph-radosgw at .service
-%{_unitdir}/ceph-disk at .service
-%{_unitdir}/ceph.target
-%{_unitdir}/ceph-osd.target
-%{_unitdir}/ceph-mon.target
-%{_unitdir}/ceph-mds.target
-%{_unitdir}/ceph-radosgw.target
 %else
 %{_initrddir}/ceph
 %endif
-%{_sbindir}/ceph-disk
-%{_sbindir}/ceph-disk-udev
 %{_sbindir}/ceph-create-keys
 %{_sbindir}/rcceph
 %if 0%{?rhel} >= 7 || 0%{?fedora} || 0%{?suse_version}
@@ -839,25 +822,11 @@ rm -rf $RPM_BUILD_ROOT
 %else
 /sbin/mount.ceph
 %endif
-%dir %{_libdir}/ceph
-%{_libdir}/ceph/ceph_common.sh
-%{_libexecdir}/ceph/ceph-osd-prestart.sh
+%dir %{_libexecdir}/ceph
+%{_libexecdir}/ceph/ceph_common.sh
 %dir %{_libdir}/rados-classes
-%{_libdir}/rados-classes/libcls_cephfs.so*
-%{_libdir}/rados-classes/libcls_rbd.so*
-%{_libdir}/rados-classes/libcls_hello.so*
-%{_libdir}/rados-classes/libcls_numops.so*
-%{_libdir}/rados-classes/libcls_rgw.so*
-%{_libdir}/rados-classes/libcls_lock.so*
-%{_libdir}/rados-classes/libcls_kvs.so*
-%{_libdir}/rados-classes/libcls_refcount.so*
-%{_libdir}/rados-classes/libcls_log.so*
-%{_libdir}/rados-classes/libcls_replica_log.so*
-%{_libdir}/rados-classes/libcls_statelog.so*
-%{_libdir}/rados-classes/libcls_timeindex.so*
-%{_libdir}/rados-classes/libcls_user.so*
-%{_libdir}/rados-classes/libcls_version.so*
-%{_libdir}/rados-classes/libcls_journal.so*
+%{_libdir}/rados-classes/*
+%dir %{_libdir}/ceph
 %dir %{_libdir}/ceph/erasure-code
 %{_libdir}/ceph/erasure-code/libec_*.so*
 %dir %{_libdir}/ceph/compressor
@@ -866,8 +835,6 @@ rm -rf $RPM_BUILD_ROOT
 %{_libdir}/libos_tp.so*
 %{_libdir}/libosd_tp.so*
 %endif
-%{_udevrulesdir}/60-ceph-partuuid-workaround.rules
-%{_udevrulesdir}/95-ceph-osd.rules
 %config %{_sysconfdir}/bash_completion.d/ceph
 %config(noreplace) %{_sysconfdir}/logrotate.d/ceph
 %if 0%{?fedora} || 0%{?rhel}
@@ -878,29 +845,20 @@ rm -rf $RPM_BUILD_ROOT
 %config %{_sysconfdir}/sysconfig/SuSEfirewall2.d/services/ceph-mon
 %config %{_sysconfdir}/sysconfig/SuSEfirewall2.d/services/ceph-osd-mds
 %endif
+%{_unitdir}/ceph.target
 %{python_sitelib}/ceph_detect_init*
+%{python_sitelib}/ceph_disk*
 %{_mandir}/man8/ceph-deploy.8*
 %{_mandir}/man8/ceph-detect-init.8*
-%{_mandir}/man8/ceph-disk.8*
 %{_mandir}/man8/ceph-create-keys.8*
-%{_mandir}/man8/ceph-mon.8*
-%{_mandir}/man8/ceph-mds.8*
-%{_mandir}/man8/ceph-osd.8*
 %{_mandir}/man8/ceph-run.8*
-%{_mandir}/man8/ceph-rest-api.8*
 %{_mandir}/man8/crushtool.8*
 %{_mandir}/man8/osdmaptool.8*
 %{_mandir}/man8/monmaptool.8*
 %{_mandir}/man8/cephfs.8*
 %{_mandir}/man8/mount.ceph.8*
-%{_mandir}/man8/ceph-debugpack.8*
-%{_mandir}/man8/ceph-clsinfo.8*
-%{_mandir}/man8/librados-config.8*
 #set up placeholder directories
 %attr(750,ceph,ceph) %dir %{_localstatedir}/lib/ceph/tmp
-%attr(750,ceph,ceph) %dir %{_localstatedir}/lib/ceph/mon
-%attr(750,ceph,ceph) %dir %{_localstatedir}/lib/ceph/osd
-%attr(750,ceph,ceph) %dir %{_localstatedir}/lib/ceph/mds
 %attr(750,ceph,ceph) %dir %{_localstatedir}/lib/ceph/bootstrap-osd
 %attr(750,ceph,ceph) %dir %{_localstatedir}/lib/ceph/bootstrap-mds
 %attr(750,ceph,ceph) %dir %{_localstatedir}/lib/ceph/bootstrap-rgw
@@ -908,8 +866,74 @@ rm -rf $RPM_BUILD_ROOT
 %attr(770,ceph,ceph) %dir %{_localstatedir}/run/ceph
 %endif
 
+%pre base
+%if 0%{?_with_systemd}
+  %if 0%{?suse_version}
+    # service_add_pre and friends don't work with parameterized systemd service
+    # instances, only with single services or targets, so we always pass
+    # ceph.target to these macros
+    %service_add_pre ceph.target
+  %endif
+%endif
+
+%post base
+/sbin/ldconfig
+%if 0%{?_with_systemd}
+  %if 0%{?suse_version}
+    %fillup_only
+    %service_add_post ceph.target
+  %endif
+%else
+  /sbin/chkconfig --add ceph
+%endif
+
+%preun base
+%if 0%{?_with_systemd}
+  %if 0%{?suse_version}
+    %service_del_preun ceph.target
+  %endif
+  # Disable and stop on removal.
+  if [ $1 = 0 ] ; then
+    SERVICE_LIST=$(systemctl | grep -E '^ceph-mon@|^ceph-create-keys@|^ceph-osd@|^ceph-mds@|^ceph-disk-'  | cut -d' ' -f1)
+    if [ -n "$SERVICE_LIST" ]; then
+      for SERVICE in $SERVICE_LIST; do
+        /usr/bin/systemctl --no-reload disable $SERVICE > /dev/null 2>&1 || :
+        /usr/bin/systemctl stop $SERVICE > /dev/null 2>&1 || :
+      done
+    fi
+  fi
+%else
+  %if 0%{?rhel} || 0%{?fedora}
+    if [ $1 = 0 ] ; then
+      /sbin/service ceph stop >/dev/null 2>&1
+      /sbin/chkconfig --del ceph
+    fi
+  %endif
+%endif
+
+%postun base
+/sbin/ldconfig
+%if 0%{?_with_systemd}
+  if [ $1 = 1 ] ; then
+    # Restart on upgrade, but only if "CEPH_AUTO_RESTART_ON_UPGRADE" is set to
+    # "yes". In any case: if units are not running, do not touch them.
+    SYSCONF_CEPH=/etc/sysconfig/ceph
+    if [ -f $SYSCONF_CEPH -a -r $SYSCONF_CEPH ] ; then
+      source $SYSCONF_CEPH
+    fi
+    if [ "X$CEPH_AUTO_RESTART_ON_UPGRADE" = "Xyes" ] ; then
+      SERVICE_LIST=$(systemctl | grep -E '^ceph-mon@|^ceph-create-keys@|^ceph-osd@|^ceph-mds@|^ceph-disk-'  | cut -d' ' -f1)
+      if [ -n "$SERVICE_LIST" ]; then
+        for SERVICE in $SERVICE_LIST; do
+          /usr/bin/systemctl try-restart $SERVICE > /dev/null 2>&1 || :
+        done
+      fi
+    fi
+  fi
+%endif
+
 #################################################################################
-%files -n ceph-common
+%files common
 %defattr(-,root,root,-)
 %{_bindir}/ceph
 %{_bindir}/ceph-authtool
@@ -943,12 +967,11 @@ rm -rf $RPM_BUILD_ROOT
 %{_mandir}/man8/rbd-replay.8*
 %{_mandir}/man8/rbd-replay-many.8*
 %{_mandir}/man8/rbd-replay-prep.8*
+%dir %{_datadir}/ceph/
 %{_datadir}/ceph/known_hosts_drop.ceph.com
 %{_datadir}/ceph/id_dsa_drop.ceph.com
 %{_datadir}/ceph/id_dsa_drop.ceph.com.pub
 %dir %{_sysconfdir}/ceph/
-%dir %{_datarootdir}/ceph/
-%dir %{_libexecdir}/ceph/
 %config %{_sysconfdir}/bash_completion.d/rados
 %config %{_sysconfdir}/bash_completion.d/rbd
 %config(noreplace) %{_sysconfdir}/ceph/rbdmap
@@ -963,7 +986,7 @@ rm -rf $RPM_BUILD_ROOT
 %attr(3770,ceph,ceph) %dir %{_localstatedir}/log/ceph/
 %attr(750,ceph,ceph) %dir %{_localstatedir}/lib/ceph/
 
-%pre -n ceph-common
+%pre common
 CEPH_GROUP_ID=""
 CEPH_USER_ID=""
 %if 0%{?rhel} || 0%{?fedora}
@@ -980,12 +1003,12 @@ getent passwd ceph >/dev/null || useradd -r -g ceph -d %{_localstatedir}/lib/cep
 %endif
 exit 0
 
-%post -n ceph-common
+%post common
 %if 0%{?_with_systemd}
 %tmpfiles_create %{_tmpfilesdir}/ceph-common.conf
 %endif
 
-%postun -n ceph-common
+%postun common
 # Package removal cleanup
 if [ "$1" -eq "0" ] ; then
     rm -rf /var/log/ceph
@@ -993,6 +1016,36 @@ if [ "$1" -eq "0" ] ; then
 fi
 
 #################################################################################
+%files mds
+%{_bindir}/ceph-mds
+%{_bindir}/cephfs-journal-tool
+%{_bindir}/cephfs-table-tool
+%{_bindir}/cephfs-data-scan
+%{_mandir}/man8/ceph-mds.8*
+%if 0%{?_with_systemd}
+%{_unitdir}/ceph-mds at .service
+%{_unitdir}/ceph-mds.target
+%else
+%{_initrddir}/ceph
+%endif
+%attr(750,ceph,ceph) %dir %{_localstatedir}/lib/ceph/mds
+
+#################################################################################
+%files mon
+%{_bindir}/ceph-mon
+%{_bindir}/ceph-rest-api
+%{_mandir}/man8/ceph-mon.8*
+%{_mandir}/man8/ceph-rest-api.8*
+%{python_sitelib}/ceph_rest_api.py*
+%if 0%{?_with_systemd}
+%{_unitdir}/ceph-mon at .service
+%{_unitdir}/ceph-mon.target
+%else
+%{_initrddir}/ceph
+%endif
+%attr(750,ceph,ceph) %dir %{_localstatedir}/lib/ceph/mon
+
+#################################################################################
 %files fuse
 %defattr(-,root,root,-)
 %{_bindir}/ceph-fuse
@@ -1010,6 +1063,12 @@ fi
 %{_mandir}/man8/rbd-fuse.8*
 
 #################################################################################
+%files -n rbd-mirror
+%defattr(-,root,root,-)
+%{_bindir}/rbd-mirror
+%{_mandir}/man8/rbd-mirror.8*
+
+#################################################################################
 %files -n rbd-nbd
 %defattr(-,root,root,-)
 %{_bindir}/rbd-nbd
@@ -1026,6 +1085,8 @@ fi
 %config %{_sysconfdir}/bash_completion.d/radosgw-admin
 %dir %{_localstatedir}/lib/ceph/radosgw
 %if 0%{?_with_systemd}
+%{_unitdir}/ceph-radosgw at .service
+%{_unitdir}/ceph-radosgw.target
 %else
 %{_initrddir}/ceph-radosgw
 %{_sbindir}/rcceph-radosgw
@@ -1076,6 +1137,29 @@ fi
 %endif
 
 #################################################################################
+%files osd
+%{_bindir}/ceph-clsinfo
+%{_bindir}/ceph-bluefs-tool
+%{_bindir}/ceph-objectstore-tool
+%{_bindir}/ceph-osd
+%{_sbindir}/ceph-disk
+%{_sbindir}/ceph-disk-udev
+%{_libexecdir}/ceph/ceph-osd-prestart.sh
+%{_udevrulesdir}/60-ceph-partuuid-workaround.rules
+%{_udevrulesdir}/95-ceph-osd.rules
+%{_mandir}/man8/ceph-clsinfo.8*
+%{_mandir}/man8/ceph-disk.8*
+%{_mandir}/man8/ceph-osd.8*
+%if 0%{?_with_systemd}
+%{_unitdir}/ceph-osd at .service
+%{_unitdir}/ceph-osd.target
+%{_unitdir}/ceph-disk at .service
+%else
+%{_initrddir}/ceph
+%endif
+%attr(750,ceph,ceph) %dir %{_localstatedir}/lib/ceph/osd
+
+#################################################################################
 %if %{with ocf}
 %files resource-agents
 %defattr(0755,root,root,-)
@@ -1116,6 +1200,8 @@ fi
 %if 0%{?_with_lttng}
 %{_libdir}/librados_tp.so
 %endif
+%{_bindir}/librados-config
+%{_mandir}/man8/librados-config.8*
 
 #################################################################################
 %files -n python-rados
@@ -1226,9 +1312,12 @@ ln -sf %{_libdir}/librbd.so.1 /usr/lib64/qemu/librbd.so.1
 %{_bindir}/ceph_test_*
 %{_bindir}/ceph_tpbench
 %{_bindir}/ceph_xattr_bench
+%{_bindir}/ceph-coverage
 %{_bindir}/ceph-monstore-tool
 %{_bindir}/ceph-osdomap-tool
 %{_bindir}/ceph-kvstore-tool
+%{_bindir}/ceph-debugpack
+%{_mandir}/man8/ceph-debugpack.8*
 %dir %{_libdir}/ceph
 %{_libdir}/ceph/ceph-monstore-update-crush.sh
 
@@ -1377,4 +1466,5 @@ exit 0
 # We need an empty %%files list for python-ceph-compat, to tell rpmbuild to
 # actually build this meta package.
 
+
 %changelog
diff --git a/configure b/configure
index b5a8482..5b3143e 100755
--- a/configure
+++ b/configure
@@ -1,6 +1,6 @@
 #! /bin/sh
 # Guess values for system-dependent variables and create Makefiles.
-# Generated by GNU Autoconf 2.69 for ceph 10.0.3.
+# Generated by GNU Autoconf 2.69 for ceph 10.0.5.
 #
 # Report bugs to <ceph-devel at vger.kernel.org>.
 #
@@ -590,8 +590,8 @@ MAKEFLAGS=
 # Identity of this package.
 PACKAGE_NAME='ceph'
 PACKAGE_TARNAME='ceph'
-PACKAGE_VERSION='10.0.3'
-PACKAGE_STRING='ceph 10.0.3'
+PACKAGE_VERSION='10.0.5'
+PACKAGE_STRING='ceph 10.0.5'
 PACKAGE_BUGREPORT='ceph-devel at vger.kernel.org'
 PACKAGE_URL=''
 
@@ -650,9 +650,6 @@ WITH_BUILD_TESTS_TRUE
 WITH_EVENTFD_FALSE
 WITH_EVENTFD_TRUE
 systemd_unit_dir
-group_rgw
-user_rgw
-systemd_libexec_dir
 VALGRIND_ENABLED_FALSE
 VALGRIND_ENABLED_TRUE
 HAVE_VALGRIND
@@ -713,8 +710,12 @@ WITH_OCF_FALSE
 WITH_OCF_TRUE
 WITH_LIBATOMIC_FALSE
 WITH_LIBATOMIC_TRUE
-LIBEDIT_LIBS
-LIBEDIT_CFLAGS
+LIBDPDK_LIBS
+LIBDPDK_CFLAGS
+LIBPCIACCESS_LIBS
+LIBPCIACCESS_CFLAGS
+WITH_SPDK_FALSE
+WITH_SPDK_TRUE
 XIO_LIBS
 ENABLE_XIO_FALSE
 ENABLE_XIO_TRUE
@@ -989,6 +990,7 @@ enable_pgrefdebugging
 enable_cephfs_java
 with_jdk_dir
 enable_xio
+with_spdk
 with_libatomic_ops
 with_ocf
 with_kinetic
@@ -1000,9 +1002,6 @@ with_libzfs
 with_lttng
 with_babeltrace
 enable_valgrind
-with_systemd_libexec_dir
-with_rgw_user
-with_rgw_group
 with_systemd_unit_dir
 with_eventfd
 '
@@ -1031,8 +1030,8 @@ NSS_CFLAGS
 NSS_LIBS
 LIBFUSE_CFLAGS
 LIBFUSE_LIBS
-LIBEDIT_CFLAGS
-LIBEDIT_LIBS
+LIBPCIACCESS_CFLAGS
+LIBPCIACCESS_LIBS
 LIBROCKSDB_CFLAGS
 LIBROCKSDB_LIBS
 LIBZFS_CFLAGS
@@ -1588,7 +1587,7 @@ if test "$ac_init_help" = "long"; then
   # Omit some internal or obsolete options to make the list less imposing.
   # This message is too long to be a string in the A/UX 3.1 sh.
   cat <<_ACEOF
-\`configure' configures ceph 10.0.3 to adapt to many kinds of systems.
+\`configure' configures ceph 10.0.5 to adapt to many kinds of systems.
 
 Usage: $0 [OPTION]... [VAR=VALUE]...
 
@@ -1660,7 +1659,7 @@ fi
 
 if test -n "$ac_init_help"; then
   case $ac_init_help in
-     short | recursive ) echo "Configuration of ceph 10.0.3:";;
+     short | recursive ) echo "Configuration of ceph 10.0.5:";;
    esac
   cat <<\_ACEOF
 
@@ -1720,6 +1719,7 @@ Optional Packages:
                           allocations
   --without-tcmalloc      disable tcmalloc for memory allocations
   --with-jdk-dir(=DIR)    Path to JDK directory
+  --with-spdk             build Ceph SPDK Support
   --without-libatomic-ops disable libatomic-ops for the atomic_t type
   --with-ocf              build OCF-compliant cluster resource agent
   --with-kinetic          build kinetic support
@@ -1731,13 +1731,6 @@ Optional Packages:
   --with-libzfs           build ZFS support
   --with-lttng            Trace with LTTng
   --with-babeltrace       Enable Babeltrace
-  --with-systemd-libexec-dir=DIR
-                          systemd libexec directory [SYSTEMD_LIBEXEC_DIR]
-                          defaults to --libexecdir=DIR
-  --with-rgw-user=USER    systemd unit directory [USER_RGW] Defaults to
-                          "www-data"
-  --with-rgw-group=GROUP  systemd unit directory [GROUP_RGW] Defaults to
-                          "www-data"
   --with-systemdsystemunitdir=DIR
                           systemd unit directory [SYSTEMD_UNIT_DIR] Defaults
                           to the correct value for debian /etc/systemd/system/
@@ -1774,10 +1767,10 @@ Some influential environment variables:
               C compiler flags for LIBFUSE, overriding pkg-config
   LIBFUSE_LIBS
               linker flags for LIBFUSE, overriding pkg-config
-  LIBEDIT_CFLAGS
-              C compiler flags for LIBEDIT, overriding pkg-config
-  LIBEDIT_LIBS
-              linker flags for LIBEDIT, overriding pkg-config
+  LIBPCIACCESS_CFLAGS
+              C compiler flags for LIBPCIACCESS, overriding pkg-config
+  LIBPCIACCESS_LIBS
+              linker flags for LIBPCIACCESS, overriding pkg-config
   LIBROCKSDB_CFLAGS
               C compiler flags for LIBROCKSDB, overriding pkg-config
   LIBROCKSDB_LIBS
@@ -1853,7 +1846,7 @@ fi
 test -n "$ac_init_help" && exit $ac_status
 if $ac_init_version; then
   cat <<\_ACEOF
-ceph configure 10.0.3
+ceph configure 10.0.5
 generated by GNU Autoconf 2.69
 
 Copyright (C) 2012 Free Software Foundation, Inc.
@@ -2929,7 +2922,7 @@ cat >config.log <<_ACEOF
 This file contains any messages produced by compilers while
 running configure, to aid debugging if configure makes a mistake.
 
-It was created by ceph $as_me 10.0.3, which was
+It was created by ceph $as_me 10.0.5, which was
 generated by GNU Autoconf 2.69.  Invocation command line was
 
   $ $0 $@
@@ -17042,7 +17035,7 @@ fi
 
 # Define the identity of the package.
  PACKAGE='ceph'
- VERSION='10.0.3'
+ VERSION='10.0.5'
 
 
 cat >>confdefs.h <<_ACEOF
@@ -20424,7 +20417,6 @@ else
   with_profiler=no
 fi
 
-
 if test "x$with_profiler" = xyes; then :
   { $as_echo "$as_me:${as_lineno-$LINENO}: checking for ProfilerFlush in -lprofiler" >&5
 $as_echo_n "checking for ProfilerFlush in -lprofiler... " >&6; }
@@ -20476,33 +20468,6 @@ as_fn_error $? "--with-profiler was given but libprofiler (libgoogle-perftools-d
 See \`config.log' for more details" "$LINENO" 5; }
 fi
 
-             ac_ext=cpp
-ac_cpp='$CXXCPP $CPPFLAGS'
-ac_compile='$CXX -c $CXXFLAGS $CPPFLAGS conftest.$ac_ext >&5'
-ac_link='$CXX -o conftest$ac_exeext $CXXFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
-ac_compiler_gnu=$ac_cv_cxx_compiler_gnu
-
-             for ac_header in gperftools/heap-profiler.h \
-               gperftools/malloc_extension.h \
-               gperftools/profiler.h
-do :
-  as_ac_Header=`$as_echo "ac_cv_header_$ac_header" | $as_tr_sh`
-ac_fn_cxx_check_header_mongrel "$LINENO" "$ac_header" "$as_ac_Header" "$ac_includes_default"
-if eval test \"x\$"$as_ac_Header"\" = x"yes"; then :
-  cat >>confdefs.h <<_ACEOF
-#define `$as_echo "HAVE_$ac_header" | $as_tr_cpp` 1
-_ACEOF
-
-fi
-
-done
-
-             ac_ext=c
-ac_cpp='$CPP $CPPFLAGS'
-ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
-ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
-ac_compiler_gnu=$ac_cv_c_compiler_gnu
-
 
 fi
  if test "$with_profiler" = "yes"; then
@@ -20872,6 +20837,9 @@ else
         { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
 $as_echo "yes" >&6; }
 	HAVE_LIBFUSE=1
+
+$as_echo "#define HAVE_LIBFUSE 1" >>confdefs.h
+
 fi
 
 fi
@@ -21113,6 +21081,39 @@ else
 fi
 
 
+
+if test "$with_profiler" = yes -o \
+            "$HAVE_LIBTCMALLOC" = "1" -o \
+            "$HAVE_LIBTCMALLOC_MINIMAL" = "1"; then :
+  ac_ext=cpp
+ac_cpp='$CXXCPP $CPPFLAGS'
+ac_compile='$CXX -c $CXXFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CXX -o conftest$ac_exeext $CXXFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_cxx_compiler_gnu
+
+       for ac_header in gperftools/heap-profiler.h \
+                         gperftools/malloc_extension.h \
+                         gperftools/profiler.h
+do :
+  as_ac_Header=`$as_echo "ac_cv_header_$ac_header" | $as_tr_sh`
+ac_fn_cxx_check_header_mongrel "$LINENO" "$ac_header" "$as_ac_Header" "$ac_includes_default"
+if eval test \"x\$"$as_ac_Header"\" = x"yes"; then :
+  cat >>confdefs.h <<_ACEOF
+#define `$as_echo "HAVE_$ac_header" | $as_tr_cpp` 1
+_ACEOF
+
+fi
+
+done
+
+       ac_ext=c
+ac_cpp='$CPP $CPPFLAGS'
+ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_c_compiler_gnu
+
+fi
+
 # error out if --with-jemalloc and ! --without-tcmalloc
 if test "x$with_jemalloc" = "xyes"; then
 	if test "x$with_tcmalloc" != "xno"; then
@@ -21339,7 +21340,7 @@ else
 JAVA_TEST=Test.java
 CLASS_TEST=Test.class
 cat << \EOF > $JAVA_TEST
-/* #line 21342 "configure" */
+/* #line 21343 "configure" */
 public class Test {
 }
 EOF
@@ -21797,24 +21798,42 @@ $as_echo "#define HAVE_XIO 1" >>confdefs.h
 fi
 
 #
-# FreeBSD has it in base.
+# SPDK
 #
-if test x"$freebsd" != x"yes" -a x"$with_radosgw" = x"yes"; then
+
+# Check whether --with-spdk was given.
+if test "${with_spdk+set}" = set; then :
+  withval=$with_spdk;
+else
+  with_spdk=no
+fi
+
+
+ if test "x$with_spdk" != "xno"; then
+  WITH_SPDK_TRUE=
+  WITH_SPDK_FALSE='#'
+else
+  WITH_SPDK_TRUE='#'
+  WITH_SPDK_FALSE=
+fi
+
+
+if test "x$with_spdk" != x"no"; then
 
 pkg_failed=no
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for LIBEDIT" >&5
-$as_echo_n "checking for LIBEDIT... " >&6; }
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for LIBPCIACCESS" >&5
+$as_echo_n "checking for LIBPCIACCESS... " >&6; }
 
-if test -n "$LIBEDIT_CFLAGS"; then
-    pkg_cv_LIBEDIT_CFLAGS="$LIBEDIT_CFLAGS"
+if test -n "$LIBPCIACCESS_CFLAGS"; then
+    pkg_cv_LIBPCIACCESS_CFLAGS="$LIBPCIACCESS_CFLAGS"
  elif test -n "$PKG_CONFIG"; then
     if test -n "$PKG_CONFIG" && \
-    { { $as_echo "$as_me:${as_lineno-$LINENO}: \$PKG_CONFIG --exists --print-errors \"libedit >= 2.11\""; } >&5
-  ($PKG_CONFIG --exists --print-errors "libedit >= 2.11") 2>&5
+    { { $as_echo "$as_me:${as_lineno-$LINENO}: \$PKG_CONFIG --exists --print-errors \"pciaccess\""; } >&5
+  ($PKG_CONFIG --exists --print-errors "pciaccess") 2>&5
   ac_status=$?
   $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
   test $ac_status = 0; }; then
-  pkg_cv_LIBEDIT_CFLAGS=`$PKG_CONFIG --cflags "libedit >= 2.11" 2>/dev/null`
+  pkg_cv_LIBPCIACCESS_CFLAGS=`$PKG_CONFIG --cflags "pciaccess" 2>/dev/null`
 		      test "x$?" != "x0" && pkg_failed=yes
 else
   pkg_failed=yes
@@ -21822,16 +21841,16 @@ fi
  else
     pkg_failed=untried
 fi
-if test -n "$LIBEDIT_LIBS"; then
-    pkg_cv_LIBEDIT_LIBS="$LIBEDIT_LIBS"
+if test -n "$LIBPCIACCESS_LIBS"; then
+    pkg_cv_LIBPCIACCESS_LIBS="$LIBPCIACCESS_LIBS"
  elif test -n "$PKG_CONFIG"; then
     if test -n "$PKG_CONFIG" && \
-    { { $as_echo "$as_me:${as_lineno-$LINENO}: \$PKG_CONFIG --exists --print-errors \"libedit >= 2.11\""; } >&5
-  ($PKG_CONFIG --exists --print-errors "libedit >= 2.11") 2>&5
+    { { $as_echo "$as_me:${as_lineno-$LINENO}: \$PKG_CONFIG --exists --print-errors \"pciaccess\""; } >&5
+  ($PKG_CONFIG --exists --print-errors "pciaccess") 2>&5
   ac_status=$?
   $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
   test $ac_status = 0; }; then
-  pkg_cv_LIBEDIT_LIBS=`$PKG_CONFIG --libs "libedit >= 2.11" 2>/dev/null`
+  pkg_cv_LIBPCIACCESS_LIBS=`$PKG_CONFIG --libs "pciaccess" 2>/dev/null`
 		      test "x$?" != "x0" && pkg_failed=yes
 else
   pkg_failed=yes
@@ -21852,33 +21871,106 @@ else
         _pkg_short_errors_supported=no
 fi
         if test $_pkg_short_errors_supported = yes; then
-	        LIBEDIT_PKG_ERRORS=`$PKG_CONFIG --short-errors --print-errors --cflags --libs "libedit >= 2.11" 2>&1`
+	        LIBPCIACCESS_PKG_ERRORS=`$PKG_CONFIG --short-errors --print-errors --cflags --libs "pciaccess" 2>&1`
         else
-	        LIBEDIT_PKG_ERRORS=`$PKG_CONFIG --print-errors --cflags --libs "libedit >= 2.11" 2>&1`
+	        LIBPCIACCESS_PKG_ERRORS=`$PKG_CONFIG --print-errors --cflags --libs "pciaccess" 2>&1`
         fi
 	# Put the nasty error message in config.log where it belongs
-	echo "$LIBEDIT_PKG_ERRORS" >&5
+	echo "$LIBPCIACCESS_PKG_ERRORS" >&5
 
-	{ { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
-$as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
-as_fn_error $? "No usable version of libedit found.
-See \`config.log' for more details" "$LINENO" 5; }
+	as_fn_error $? "Package requirements (pciaccess) were not met:
+
+$LIBPCIACCESS_PKG_ERRORS
+
+Consider adjusting the PKG_CONFIG_PATH environment variable if you
+installed software in a non-standard prefix.
+
+Alternatively, you may set the environment variables LIBPCIACCESS_CFLAGS
+and LIBPCIACCESS_LIBS to avoid the need to call pkg-config.
+See the pkg-config man page for more details." "$LINENO" 5
 elif test $pkg_failed = untried; then
      	{ $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
 $as_echo "no" >&6; }
 	{ { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
 $as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
-as_fn_error $? "No usable version of libedit found.
+as_fn_error $? "The pkg-config script could not be found or is too old.  Make sure it
+is in your PATH or set the PKG_CONFIG environment variable to the full
+path to pkg-config.
+
+Alternatively, you may set the environment variables LIBPCIACCESS_CFLAGS
+and LIBPCIACCESS_LIBS to avoid the need to call pkg-config.
+See the pkg-config man page for more details.
+
+To get pkg-config, see <http://pkg-config.freedesktop.org/>.
 See \`config.log' for more details" "$LINENO" 5; }
 else
-	LIBEDIT_CFLAGS=$pkg_cv_LIBEDIT_CFLAGS
-	LIBEDIT_LIBS=$pkg_cv_LIBEDIT_LIBS
+	LIBPCIACCESS_CFLAGS=$pkg_cv_LIBPCIACCESS_CFLAGS
+	LIBPCIACCESS_LIBS=$pkg_cv_LIBPCIACCESS_LIBS
         { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
 $as_echo "yes" >&6; }
 
 fi
+   ac_fn_c_check_header_mongrel "$LINENO" "dpdk/rte_config.h" "ac_cv_header_dpdk_rte_config_h" "$ac_includes_default"
+if test "x$ac_cv_header_dpdk_rte_config_h" = xyes; then :
+
+else
+  as_fn_error $? "dpdk/rte_config.h not found (libdpdk-dev, libdpdk-devel)" "$LINENO" 5
+fi
+
+
+   { $as_echo "$as_me:${as_lineno-$LINENO}: checking for rte_eal_init in -lrte_eal" >&5
+$as_echo_n "checking for rte_eal_init in -lrte_eal... " >&6; }
+if ${ac_cv_lib_rte_eal_rte_eal_init+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  ac_check_lib_save_LIBS=$LIBS
+LIBS="-lrte_eal -lrte_mempool -lrte_ring -lpthread -ldl $LIBS"
+cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+/* Override any GCC internal prototype to avoid an error.
+   Use char because int might match the return type of a GCC
+   builtin and then its argument prototype would still apply.  */
+#ifdef __cplusplus
+extern "C"
+#endif
+char rte_eal_init ();
+int
+main ()
+{
+return rte_eal_init ();
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_link "$LINENO"; then :
+  ac_cv_lib_rte_eal_rte_eal_init=yes
+else
+  ac_cv_lib_rte_eal_rte_eal_init=no
+fi
+rm -f core conftest.err conftest.$ac_objext \
+    conftest$ac_exeext conftest.$ac_ext
+LIBS=$ac_check_lib_save_LIBS
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_rte_eal_rte_eal_init" >&5
+$as_echo "$ac_cv_lib_rte_eal_rte_eal_init" >&6; }
+if test "x$ac_cv_lib_rte_eal_rte_eal_init" = xyes; then :
+  true
 else
-	LIBEDIT_LIBS="-ledit"
+  { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
+$as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
+as_fn_error $? "DPDK rte_eal_init not found
+See \`config.log' for more details" "$LINENO" 5; }
+fi
+
+   LIBDPDK_CFLAGS=-I/usr/include/dpdk
+
+   LIBDPDK_LIBS='-lrte_eal -lrte_mempool -lrte_ring -lpthread'
+
+
+
+$as_echo "#define HAVE_SPDK 1" >>confdefs.h
+
 fi
 
 #libatomic-ops? You want it!
@@ -23007,7 +23099,7 @@ fi
 if test "${with_librocksdb_static+set}" = set; then :
   withval=$with_librocksdb_static;
 else
-  with_librocksdb_static=no
+  with_librocksdb_static=check
 fi
 
 if test "x$with_librocksdb_static" = "xcheck" -a "x$HAVE_CXX11" = "x1" ; then :
@@ -23068,17 +23160,6 @@ else
 fi
 
 
-# error out if --with-jemalloc and --with-librocksdb_static as rocksdb uses tcmalloc
-if test "x$with_jemalloc" = "xyes"; then
-	if test "x$with_librocksdb_static" != "xno"; then
-		{ { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
-$as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
-as_fn_error $? "--with-jemalloc called with --with-librocksdb_static, turn off
-			        --with-librocksdb-static or --with-jemalloc
-See \`config.log' for more details" "$LINENO" 5; }
-	fi
-fi
-
 # needs libcurl and libxml2
 if test "x$with_rest_bench" = xyes && test "x$with_system_libs3" = xno; then
    { $as_echo "$as_me:${as_lineno-$LINENO}: checking for curl_easy_init in -lcurl" >&5
@@ -23888,6 +23969,89 @@ fi
 fi
 
 
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for main in -lboost_iostreams-mt" >&5
+$as_echo_n "checking for main in -lboost_iostreams-mt... " >&6; }
+if ${ac_cv_lib_boost_iostreams_mt_main+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  ac_check_lib_save_LIBS=$LIBS
+LIBS="-lboost_iostreams-mt  $LIBS"
+cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+
+int
+main ()
+{
+return main ();
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_cxx_try_link "$LINENO"; then :
+  ac_cv_lib_boost_iostreams_mt_main=yes
+else
+  ac_cv_lib_boost_iostreams_mt_main=no
+fi
+rm -f core conftest.err conftest.$ac_objext \
+    conftest$ac_exeext conftest.$ac_ext
+LIBS=$ac_check_lib_save_LIBS
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_boost_iostreams_mt_main" >&5
+$as_echo "$ac_cv_lib_boost_iostreams_mt_main" >&6; }
+if test "x$ac_cv_lib_boost_iostreams_mt_main" = xyes; then :
+  cat >>confdefs.h <<_ACEOF
+#define HAVE_LIBBOOST_IOSTREAMS_MT 1
+_ACEOF
+
+  LIBS="-lboost_iostreams-mt $LIBS"
+
+else
+  { $as_echo "$as_me:${as_lineno-$LINENO}: checking for main in -lboost_iostreams" >&5
+$as_echo_n "checking for main in -lboost_iostreams... " >&6; }
+if ${ac_cv_lib_boost_iostreams_main+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  ac_check_lib_save_LIBS=$LIBS
+LIBS="-lboost_iostreams  $LIBS"
+cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+
+int
+main ()
+{
+return main ();
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_cxx_try_link "$LINENO"; then :
+  ac_cv_lib_boost_iostreams_main=yes
+else
+  ac_cv_lib_boost_iostreams_main=no
+fi
+rm -f core conftest.err conftest.$ac_objext \
+    conftest$ac_exeext conftest.$ac_ext
+LIBS=$ac_check_lib_save_LIBS
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_boost_iostreams_main" >&5
+$as_echo "$ac_cv_lib_boost_iostreams_main" >&6; }
+if test "x$ac_cv_lib_boost_iostreams_main" = xyes; then :
+  cat >>confdefs.h <<_ACEOF
+#define HAVE_LIBBOOST_IOSTREAMS 1
+_ACEOF
+
+  LIBS="-lboost_iostreams $LIBS"
+
+else
+  { $as_echo "$as_me:${as_lineno-$LINENO}: \"Boost iostreams library not found.\"" >&5
+$as_echo "$as_me: \"Boost iostreams library not found.\"" >&6;}
+fi
+
+fi
+
+
 # Find the right boost_thread library.
 BOOST_THREAD_LIBS=""
 saved_LIBS="${LIBS}"
@@ -25155,80 +25319,6 @@ fi
 
 
 
-# Check whether --with-systemd-libexec-dir was given.
-if test "${with_systemd_libexec_dir+set}" = set; then :
-  withval=$with_systemd_libexec_dir;
-	    systemd_libexec_dir="$withval"
-
-else
-
-        if test "x$SYSTEMD_LIBEXEC_DIR" = "x"; then
-
-            prefix_save=$prefix
-            exec_prefix_save=$exec_prefix
-
-                        if test "x$prefix" = "xNONE"; then
-                prefix="$ac_default_prefix"
-            fi
-                        if test "x$exec_prefix" = "xNONE"; then
-                exec_prefix=$prefix
-            fi
-
-                        systemd_libexec_dir="`eval exec_prefix=$exec_prefix prefix=$prefix echo $libexecdir`"
-
-
-            prefix=$prefix_save
-            exec_prefix=$exec_prefix_save
-        else
-            systemd_libexec_dir="$SYSTEMD_LIBEXEC_DIR"
-        fi
-
-
-fi
-
-
-
-
-
-# Check whether --with-rgw-user was given.
-if test "${with_rgw_user+set}" = set; then :
-  withval=$with_rgw_user;
-        user_rgw="$withval"
-
-else
-
-        if test "x$USER_RGW" = "x"; then
-            user_rgw=www-data
-        else
-            user_rgw="$USER_RGW"
-        fi
-
-
-fi
-
-
-
-
-# Check whether --with-rgw-group was given.
-if test "${with_rgw_group+set}" = set; then :
-  withval=$with_rgw_group;
-        group_rgw="$withval"
-
-else
-
-        if test "x$GROUP_RGW" = "x"; then
-            group_rgw=www-data
-        else
-            group_rgw="$GROUP_RGW"
-        fi
-
-
-fi
-
-
-
-
-
 # Check whether --with-systemd-unit-dir was given.
 if test "${with_systemd_unit_dir+set}" = set; then :
   withval=$with_systemd_unit_dir;
@@ -25982,6 +26072,10 @@ if test -z "${ENABLE_XIO_TRUE}" && test -z "${ENABLE_XIO_FALSE}"; then
   as_fn_error $? "conditional \"ENABLE_XIO\" was never defined.
 Usually this means the macro was only invoked conditionally." "$LINENO" 5
 fi
+if test -z "${WITH_SPDK_TRUE}" && test -z "${WITH_SPDK_FALSE}"; then
+  as_fn_error $? "conditional \"WITH_SPDK\" was never defined.
+Usually this means the macro was only invoked conditionally." "$LINENO" 5
+fi
 if test -z "${WITH_LIBATOMIC_TRUE}" && test -z "${WITH_LIBATOMIC_FALSE}"; then
   as_fn_error $? "conditional \"WITH_LIBATOMIC\" was never defined.
 Usually this means the macro was only invoked conditionally." "$LINENO" 5
@@ -26463,7 +26557,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
 # report actual input values of CONFIG_FILES etc. instead of their
 # values after options handling.
 ac_log="
-This file was extended by ceph $as_me 10.0.3, which was
+This file was extended by ceph $as_me 10.0.5, which was
 generated by GNU Autoconf 2.69.  Invocation command line was
 
   CONFIG_FILES    = $CONFIG_FILES
@@ -26529,7 +26623,7 @@ _ACEOF
 cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
 ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`"
 ac_cs_version="\\
-ceph config.status 10.0.3
+ceph config.status 10.0.5
 configured by $0, generated by GNU Autoconf 2.69,
   with options \\"\$ac_cs_config\\"
 
diff --git a/configure.ac b/configure.ac
index 047076e..be669b5 100644
--- a/configure.ac
+++ b/configure.ac
@@ -8,7 +8,7 @@ AC_PREREQ(2.59)
 # VERSION define is not used by the code.  It gets a version string
 # from 'git describe'; see src/ceph_ver.[ch]
 
-AC_INIT([ceph], [10.0.3], [ceph-devel at vger.kernel.org])
+AC_INIT([ceph], [10.0.5], [ceph-devel at vger.kernel.org])
 
 AX_CXX_COMPILE_STDCXX_11(, mandatory)
 
@@ -454,11 +454,6 @@ AC_ARG_WITH([profiler],
 AS_IF([test "x$with_profiler" = xyes],
 	    [AC_CHECK_LIB([profiler], [ProfilerFlush], [],
 	        	  [AC_MSG_FAILURE([--with-profiler was given but libprofiler (libgoogle-perftools-dev on debian) not found])])
-             AC_LANG_PUSH([C++])
-             AC_CHECK_HEADERS([gperftools/heap-profiler.h \
-               gperftools/malloc_extension.h \
-               gperftools/profiler.h])
-             AC_LANG_POP([C++])
             ],
 	    [])
 AM_CONDITIONAL(WITH_PROFILER, test "$with_profiler" = "yes")
@@ -528,7 +523,8 @@ AC_ARG_WITH([fuse],
 LIBFUSE=
 AS_IF([test "x$with_fuse" != xno], [
   PKG_CHECK_MODULES([LIBFUSE], [fuse],
-    [HAVE_LIBFUSE=1],
+    [HAVE_LIBFUSE=1
+     AC_DEFINE([HAVE_LIBFUSE], [1], [Define if you have fuse])],
     [AC_MSG_FAILURE([no FUSE found (use --without-fuse to disable)])])
 ])
 AM_CONDITIONAL(WITH_FUSE, [test "$HAVE_LIBFUSE" = "1"])
@@ -591,6 +587,15 @@ AS_IF([test "x$with_tcmalloc" != xno],
 		  [no tcmalloc found (use --without-tcmalloc to disable)])])])
 AM_CONDITIONAL(WITH_TCMALLOC, [test "$HAVE_LIBTCMALLOC" = "1"])
 
+AS_IF([test "$with_profiler" = yes -o \
+            "$HAVE_LIBTCMALLOC" = "1" -o \
+            "$HAVE_LIBTCMALLOC_MINIMAL" = "1"],
+      [AC_LANG_PUSH([C++])
+       AC_CHECK_HEADERS([gperftools/heap-profiler.h \
+                         gperftools/malloc_extension.h \
+                         gperftools/profiler.h])
+       AC_LANG_POP([C++])])
+
 # error out if --with-jemalloc and ! --without-tcmalloc
 if test "x$with_jemalloc" = "xyes"; then
 	if test "x$with_tcmalloc" != "xno"; then
@@ -717,13 +722,24 @@ if test "x$enable_xio" = x"yes"; then
 fi
 
 #
-# FreeBSD has it in base.
+# SPDK
 #
-if test x"$freebsd" != x"yes" -a x"$with_radosgw" = x"yes"; then
-PKG_CHECK_MODULES([LIBEDIT], [libedit >= 2.11],
-                [], AC_MSG_FAILURE([No usable version of libedit found.]))
-else
-	LIBEDIT_LIBS="-ledit"
+AC_ARG_WITH(spdk,
+    [AC_HELP_STRING([--with-spdk], [build Ceph SPDK Support])], [], [with_spdk=no])
+
+AM_CONDITIONAL(WITH_SPDK, [test "x$with_spdk" != "xno"])
+
+if test "x$with_spdk" != x"no"; then
+   PKG_CHECK_MODULES([LIBPCIACCESS], [pciaccess])
+   AC_CHECK_HEADER([dpdk/rte_config.h], [],
+     AC_MSG_ERROR([dpdk/rte_config.h not found (libdpdk-dev, libdpdk-devel)]))
+   AC_CHECK_LIB([rte_eal], [rte_eal_init], [true],
+     AC_MSG_FAILURE([DPDK rte_eal_init not found]),
+                [-lrte_mempool -lrte_ring -lpthread -ldl])
+   AC_SUBST([LIBDPDK_CFLAGS], [-I/usr/include/dpdk])
+   AC_SUBST([LIBDPDK_LIBS], ['-lrte_eal -lrte_mempool -lrte_ring -lpthread'])
+   AC_SUBST([LIBPCIACCESS_LIBS])
+   AC_DEFINE([HAVE_SPDK], [1], [SPDK conditional compilation])
 fi
 
 #libatomic-ops? You want it!
@@ -811,7 +827,7 @@ AM_CONDITIONAL(WITH_DLIBROCKSDB, [ test "$with_librocksdb" = "yes" ])
 AC_ARG_WITH([librocksdb-static],
             [AS_HELP_STRING([--with-librocksdb-static], [build rocksdb support])],
             [],
-            [with_librocksdb_static=no])
+            [with_librocksdb_static=check])
 AS_IF([test "x$with_librocksdb_static" = "xcheck" -a "x$HAVE_CXX11" = "x1" ],
             [with_librocksdb_static="yes"])
 AS_IF([test "x$with_librocksdb_static" = "xyes"],
@@ -826,14 +842,6 @@ AS_IF([test "x$with_librocksdb_static" = "xyes"], [
 AM_CONDITIONAL(HAVE_BZLIB, [test "x$have_bzlib" = "xyes"])
 AM_CONDITIONAL(HAVE_LZ4, [test "x$have_lz4" = "xyes"])
 
-# error out if --with-jemalloc and --with-librocksdb_static as rocksdb uses tcmalloc
-if test "x$with_jemalloc" = "xyes"; then
-	if test "x$with_librocksdb_static" != "xno"; then
-		AC_MSG_FAILURE([--with-jemalloc called with --with-librocksdb_static, turn off
-			        --with-librocksdb-static or --with-jemalloc])
-	fi
-fi
-
 # needs libcurl and libxml2
 if test "x$with_rest_bench" = xyes && test "x$with_system_libs3" = xno; then
    AC_CHECK_LIB([curl], [curl_easy_init], [], AC_MSG_ERROR([libcurl not found]))
@@ -920,6 +928,10 @@ AC_CHECK_LIB(boost_system-mt, main, [],
     [AC_CHECK_LIB(boost_system, main, [],
         AC_MSG_NOTICE(["Boost system library not found."]))])
 
+AC_CHECK_LIB(boost_iostreams-mt, main, [],
+    [AC_CHECK_LIB(boost_iostreams, main, [],
+        AC_MSG_NOTICE(["Boost iostreams library not found."]))])
+
 # Find the right boost_thread library.
 BOOST_THREAD_LIBS=""
 saved_LIBS="${LIBS}"
@@ -1222,91 +1234,6 @@ if test "x$enable_valgrind" = "xyes"; then
   AC_CHECK_HEADERS([valgrind/helgrind.h])
 fi
 
-dnl systemd-libexec-dir
-AC_SUBST(systemd_libexec_dir)
-AC_ARG_WITH(
-    systemd-libexec-dir,
-    AS_HELP_STRING(
-	    [--with-systemd-libexec-dir=DIR],
-	    [systemd libexec directory @<:@SYSTEMD_LIBEXEC_DIR@:>@
-        defaults to --libexecdir=DIR]
-    ),
-    [
-	    systemd_libexec_dir="$withval"
-    ],
-    [
-        if test "x$SYSTEMD_LIBEXEC_DIR" = "x"; then
-            dnl store old values
-
-            prefix_save=$prefix
-            exec_prefix_save=$exec_prefix
-
-            dnl if no prefix given, then use /usr/local, the default prefix
-            if test "x$prefix" = "xNONE"; then
-                prefix="$ac_default_prefix"
-            fi
-            dnl if no exec_prefix given, then use prefix
-            if test "x$exec_prefix" = "xNONE"; then
-                exec_prefix=$prefix
-            fi
-
-            dnl now get the expanded default
-            systemd_libexec_dir="`eval exec_prefix=$exec_prefix prefix=$prefix echo $libexecdir`"
-
-            dnl now cleanup prefix and exec_prefix
-
-            prefix=$prefix_save
-            exec_prefix=$exec_prefix_save
-        else
-            systemd_libexec_dir="$SYSTEMD_LIBEXEC_DIR"
-        fi
-    ]
-)
-
-
-dnl rgw-user
-AC_SUBST(user_rgw)
-AC_ARG_WITH(
-    rgw-user,
-    AS_HELP_STRING(
-        [--with-rgw-user=USER],
-        [systemd unit directory @<:@USER_RGW@:>@
-        Defaults to "www-data"]
-    ),
-    [
-        user_rgw="$withval"
-    ],
-    [
-        if test "x$USER_RGW" = "x"; then
-            user_rgw=www-data
-        else
-            user_rgw="$USER_RGW"
-        fi
-    ]
-)
-
-dnl rgw-group
-AC_SUBST(group_rgw)
-AC_ARG_WITH(
-    rgw-group,
-    AS_HELP_STRING(
-        [--with-rgw-group=GROUP],
-        [systemd unit directory @<:@GROUP_RGW@:>@
-        Defaults to "www-data"]
-    ),
-    [
-        group_rgw="$withval"
-    ],
-    [
-        if test "x$GROUP_RGW" = "x"; then
-            group_rgw=www-data
-        else
-            group_rgw="$GROUP_RGW"
-        fi
-    ]
-)
-
-
 AC_SUBST(systemd_unit_dir)
 AC_ARG_WITH(
     systemd-unit-dir,
diff --git a/doc/Makefile.am b/doc/Makefile.am
index 4b15b9d..332b323 100644
--- a/doc/Makefile.am
+++ b/doc/Makefile.am
@@ -27,6 +27,7 @@ EXTRA_DIST = \
 	man/8/radosgw.rst	\
 	man/8/rados.rst	\
 	man/8/rbd-fuse.rst	\
+	man/8/rbd-mirror.rst	\
 	man/8/rbd-nbd.rst	\
 	man/8/rbd-replay-many.rst	\
 	man/8/rbd-replay-prep.rst	\
diff --git a/doc/Makefile.in b/doc/Makefile.in
index f590595..e55b78b 100644
--- a/doc/Makefile.in
+++ b/doc/Makefile.in
@@ -205,12 +205,14 @@ JDK_CPPFLAGS = @JDK_CPPFLAGS@
 KEYUTILS_LIB = @KEYUTILS_LIB@
 LD = @LD@
 LDFLAGS = @LDFLAGS@
-LIBEDIT_CFLAGS = @LIBEDIT_CFLAGS@
-LIBEDIT_LIBS = @LIBEDIT_LIBS@
+LIBDPDK_CFLAGS = @LIBDPDK_CFLAGS@
+LIBDPDK_LIBS = @LIBDPDK_LIBS@
 LIBFUSE_CFLAGS = @LIBFUSE_CFLAGS@
 LIBFUSE_LIBS = @LIBFUSE_LIBS@
 LIBJEMALLOC = @LIBJEMALLOC@
 LIBOBJS = @LIBOBJS@
+LIBPCIACCESS_CFLAGS = @LIBPCIACCESS_CFLAGS@
+LIBPCIACCESS_LIBS = @LIBPCIACCESS_LIBS@
 LIBROCKSDB_CFLAGS = @LIBROCKSDB_CFLAGS@
 LIBROCKSDB_LIBS = @LIBROCKSDB_LIBS@
 LIBS = @LIBS@
@@ -295,7 +297,6 @@ datarootdir = @datarootdir@
 docdir = @docdir@
 dvidir = @dvidir@
 exec_prefix = @exec_prefix@
-group_rgw = @group_rgw@
 host = @host@
 host_alias = @host_alias@
 host_cpu = @host_cpu@
@@ -326,7 +327,6 @@ sharedstatedir = @sharedstatedir@
 srcdir = @srcdir@
 subdirs = @subdirs@
 sysconfdir = @sysconfdir@
-systemd_libexec_dir = @systemd_libexec_dir@
 systemd_unit_dir = @systemd_unit_dir@
 target = @target@
 target_alias = @target_alias@
@@ -336,7 +336,6 @@ target_vendor = @target_vendor@
 top_build_prefix = @top_build_prefix@
 top_builddir = @top_builddir@
 top_srcdir = @top_srcdir@
-user_rgw = @user_rgw@
 EXTRA_DIST = \
 	man/8/ceph-authtool.rst	\
 	man/8/ceph-clsinfo.rst	\
@@ -366,6 +365,7 @@ EXTRA_DIST = \
 	man/8/radosgw.rst	\
 	man/8/rados.rst	\
 	man/8/rbd-fuse.rst	\
+	man/8/rbd-mirror.rst	\
 	man/8/rbd-nbd.rst	\
 	man/8/rbd-replay-many.rst	\
 	man/8/rbd-replay-prep.rst	\
diff --git a/doc/man/8/ceph-authtool.rst b/doc/man/8/ceph-authtool.rst
index 523d14d..8565eba 100644
--- a/doc/man/8/ceph-authtool.rst
+++ b/doc/man/8/ceph-authtool.rst
@@ -26,7 +26,7 @@ associated with an entity name, of the form
 **WARNING** Ceph provides authentication and protection against
 man-in-the-middle attacks once secret keys are in place.  However,
 data over the wire is not encrypted, which may include the messages
-used to configure said keys.  The system is primarily intended to be
+used to configure said keys. The system is primarily intended to be
 used in trusted environments.
 
 Options
diff --git a/doc/man/8/ceph.rst b/doc/man/8/ceph.rst
index 1f7cbfe..7a43198 100644
--- a/doc/man/8/ceph.rst
+++ b/doc/man/8/ceph.rst
@@ -1360,7 +1360,7 @@ Options
 
 	Name of the Ceph cluster.
 
-.. option:: daemon ADMIN_SOCKET, daemon DAEMON_NAME, --admin-socket ADMIN_SOCKET, --admin-socket DAEMON_NAME
+.. option:: --admin-daemon ADMIN_SOCKET, daemon DAEMON_NAME
 
 	Submit admin-socket commands via admin sockets in /var/run/ceph.
 
diff --git a/doc/man/8/monmaptool.rst b/doc/man/8/monmaptool.rst
index 97d5d40..bed0c94 100644
--- a/doc/man/8/monmaptool.rst
+++ b/doc/man/8/monmaptool.rst
@@ -95,9 +95,9 @@ To replace one monitor::
 Availability
 ============
 
-**monmaptool** is part of Ceph, a massively scalable, open-source, distributed storage system.  Please
-refer to the Ceph documentation at http://ceph.com/docs for more
-information.
+**monmaptool** is part of Ceph, a massively scalable, open-source, distributed 
+storage system. Please refer to the Ceph documentation at http://ceph.com/docs 
+for more information.
 
 
 See also
diff --git a/doc/man/8/rados.rst b/doc/man/8/rados.rst
index fadfb72..2243a5e 100644
--- a/doc/man/8/rados.rst
+++ b/doc/man/8/rados.rst
@@ -118,7 +118,9 @@ Pool specific commands
   object size is 4 MB, and the default number of simulated threads
   (parallel writes) is 16. The *--run-name <label>* option is useful
   for benchmarking a workload test from multiple clients. The *<label>*
-  is an arbitrary object name.
+  is an arbitrary object name. It is "benchmark_last_metadata" by
+  default, and is used as the underlying object name for "read" and
+  "write" ops.
   Note: -b *objsize* option is valid only in *write* mode.
 
 :command:`cleanup`
diff --git a/doc/man/8/radosgw-admin.rst b/doc/man/8/radosgw-admin.rst
index a523a63..54d690e 100644
--- a/doc/man/8/radosgw-admin.rst
+++ b/doc/man/8/radosgw-admin.rst
@@ -218,6 +218,12 @@ which are as follows:
 :command:`replicalog delete`
   Delete replica metadata log entry.
 
+:command:`orphans find`
+  Init and run search for leaked rados objects
+
+:command:`orphans finish`
+  Clean up search for leaked rados objects
+
 
 Options
 =======
@@ -401,6 +407,18 @@ Quota Options
 	Scope of quota (bucket, user).
 
 
+Orphans Search Options
+======================
+
+.. option:: --pool
+
+	Data pool to scan for leaked rados objects
+
+.. option:: --num-shards
+
+	Number of shards to use for keeping the temporary scan info
+
+
 Examples
 ========
 
diff --git a/doc/man/8/rbd-mirror.rst b/doc/man/8/rbd-mirror.rst
new file mode 100644
index 0000000..cda8347
--- /dev/null
+++ b/doc/man/8/rbd-mirror.rst
@@ -0,0 +1,75 @@
+:orphan:
+
+===================================================
+ rbd-mirror -- Ceph daemon for mirroring RBD images
+===================================================
+
+.. program:: rbd-mirror
+
+Synopsis
+========
+
+| **rbd-mirror**
+
+
+Description
+===========
+
+:program:`rbd-mirror` is a daemon for asynchronous mirroring of RADOS
+block device (rbd) images among Ceph clusters. It replays changes to
+images in remote clusters in a local cluster, for disaster recovery.
+
+It connects to remote clusters via the RADOS protocol, relying on
+default search paths to find ceph.conf files monitor addresses and
+authentication information for them, i.e. ``/etc/ceph/$cluster.conf``,
+``/etc/ceph/$cluster.keyring``, and
+``/etc/ceph/$cluster.$name.keyring``, where ``$cluster`` is the
+human-friendly name of the cluster, and ``$name`` is the rados user to
+connect as, e.g. ``client.rbd-mirror``.
+
+
+Options
+=======
+
+.. option:: -c ceph.conf, --conf=ceph.conf
+
+   Use ``ceph.conf`` configuration file instead of the default
+   ``/etc/ceph/ceph.conf`` to determine monitor addresses during startup.
+
+.. option:: -m monaddress[:port]
+
+   Connect to specified monitor (instead of looking through ``ceph.conf``).
+
+.. option:: -i ID, --id ID
+
+   Set the ID portion of name for rbd-mirror
+
+.. option:: -n TYPE.ID, --name TYPE.ID
+
+   Set the rados user name for the gateway (eg. client.rbd-mirror)
+
+.. option:: --cluster NAME
+
+   Set the cluster name (default: ceph)
+
+.. option:: -d
+
+   Run in foreground, log to stderr
+
+.. option:: -f
+
+   Run in foreground, log to usual location
+
+
+Availability
+============
+
+:program:`rbd-mirror` is part of Ceph, a massively scalable, open-source, distributed
+storage system. Please refer to the Ceph documentation at http://ceph.com/docs for
+more information.
+
+
+See also
+========
+
+:doc:`rbd <rbd>`\(8)
diff --git a/man/Makefile-client.am b/man/Makefile-client.am
index f5e9063..b03c9bf 100644
--- a/man/Makefile-client.am
+++ b/man/Makefile-client.am
@@ -16,6 +16,7 @@ endif
 if WITH_RBD
 dist_man_MANS += \
 	ceph-rbdnamer.8 \
+	rbd-mirror.8 \
 	rbd-nbd.8 \
 	rbd-replay.8 \
 	rbd-replay-many.8 \
diff --git a/man/Makefile.in b/man/Makefile.in
index 648e8c5..ec60df3 100644
--- a/man/Makefile.in
+++ b/man/Makefile.in
@@ -103,6 +103,7 @@ target_triplet = @target@
 
 @ENABLE_CLIENT_TRUE@@WITH_MAN_PAGES_TRUE@@WITH_RBD_TRUE at am__append_3 = \
 @ENABLE_CLIENT_TRUE@@WITH_MAN_PAGES_TRUE@@WITH_RBD_TRUE@	ceph-rbdnamer.8 \
+ at ENABLE_CLIENT_TRUE@@WITH_MAN_PAGES_TRUE@@WITH_RBD_TRUE@	rbd-mirror.8 \
 @ENABLE_CLIENT_TRUE@@WITH_MAN_PAGES_TRUE@@WITH_RBD_TRUE@	rbd-nbd.8 \
 @ENABLE_CLIENT_TRUE@@WITH_MAN_PAGES_TRUE@@WITH_RBD_TRUE@	rbd-replay.8 \
 @ENABLE_CLIENT_TRUE@@WITH_MAN_PAGES_TRUE@@WITH_RBD_TRUE@	rbd-replay-many.8 \
@@ -294,12 +295,14 @@ JDK_CPPFLAGS = @JDK_CPPFLAGS@
 KEYUTILS_LIB = @KEYUTILS_LIB@
 LD = @LD@
 LDFLAGS = @LDFLAGS@
-LIBEDIT_CFLAGS = @LIBEDIT_CFLAGS@
-LIBEDIT_LIBS = @LIBEDIT_LIBS@
+LIBDPDK_CFLAGS = @LIBDPDK_CFLAGS@
+LIBDPDK_LIBS = @LIBDPDK_LIBS@
 LIBFUSE_CFLAGS = @LIBFUSE_CFLAGS@
 LIBFUSE_LIBS = @LIBFUSE_LIBS@
 LIBJEMALLOC = @LIBJEMALLOC@
 LIBOBJS = @LIBOBJS@
+LIBPCIACCESS_CFLAGS = @LIBPCIACCESS_CFLAGS@
+LIBPCIACCESS_LIBS = @LIBPCIACCESS_LIBS@
 LIBROCKSDB_CFLAGS = @LIBROCKSDB_CFLAGS@
 LIBROCKSDB_LIBS = @LIBROCKSDB_LIBS@
 LIBS = @LIBS@
@@ -384,7 +387,6 @@ datarootdir = @datarootdir@
 docdir = @docdir@
 dvidir = @dvidir@
 exec_prefix = @exec_prefix@
-group_rgw = @group_rgw@
 host = @host@
 host_alias = @host_alias@
 host_cpu = @host_cpu@
@@ -415,7 +417,6 @@ sharedstatedir = @sharedstatedir@
 srcdir = @srcdir@
 subdirs = @subdirs@
 sysconfdir = @sysconfdir@
-systemd_libexec_dir = @systemd_libexec_dir@
 systemd_unit_dir = @systemd_unit_dir@
 target = @target@
 target_alias = @target_alias@
@@ -425,7 +426,6 @@ target_vendor = @target_vendor@
 top_build_prefix = @top_build_prefix@
 top_builddir = @top_builddir@
 top_srcdir = @top_srcdir@
-user_rgw = @user_rgw@
 AUTOMAKE_OPTIONS = gnu
 EXTRA_DIST = conf.py ceph_selinux.8
 dist_man_MANS = $(am__append_1) $(am__append_2) $(am__append_3) \
diff --git a/man/ceph-authtool.8 b/man/ceph-authtool.8
index 47451cb..f396fe4 100644
--- a/man/ceph-authtool.8
+++ b/man/ceph-authtool.8
@@ -1,6 +1,6 @@
 .\" Man page generated from reStructuredText.
 .
-.TH "CEPH-AUTHTOOL" "8" "February 24, 2016" "dev" "Ceph"
+.TH "CEPH-AUTHTOOL" "8" "March 22, 2016" "dev" "Ceph"
 .SH NAME
 ceph-authtool \- ceph keyring manipulation tool
 .
@@ -48,7 +48,7 @@ associated with an entity name, of the form
 \fBWARNING\fP Ceph provides authentication and protection against
 man\-in\-the\-middle attacks once secret keys are in place.  However,
 data over the wire is not encrypted, which may include the messages
-used to configure said keys.  The system is primarily intended to be
+used to configure said keys. The system is primarily intended to be
 used in trusted environments.
 .SH OPTIONS
 .INDENT 0.0
diff --git a/man/ceph-clsinfo.8 b/man/ceph-clsinfo.8
index 3cf58a4..5c7bce7 100644
--- a/man/ceph-clsinfo.8
+++ b/man/ceph-clsinfo.8
@@ -1,6 +1,6 @@
 .\" Man page generated from reStructuredText.
 .
-.TH "CEPH-CLSINFO" "8" "February 24, 2016" "dev" "Ceph"
+.TH "CEPH-CLSINFO" "8" "March 22, 2016" "dev" "Ceph"
 .SH NAME
 ceph-clsinfo \- show class object information
 .
diff --git a/man/ceph-conf.8 b/man/ceph-conf.8
index 948e43f..eac2c9e 100644
--- a/man/ceph-conf.8
+++ b/man/ceph-conf.8
@@ -1,6 +1,6 @@
 .\" Man page generated from reStructuredText.
 .
-.TH "CEPH-CONF" "8" "February 24, 2016" "dev" "Ceph"
+.TH "CEPH-CONF" "8" "March 22, 2016" "dev" "Ceph"
 .SH NAME
 ceph-conf \- ceph conf file tool
 .
diff --git a/man/ceph-create-keys.8 b/man/ceph-create-keys.8
index 530c258..dc50e72 100644
--- a/man/ceph-create-keys.8
+++ b/man/ceph-create-keys.8
@@ -1,6 +1,6 @@
 .\" Man page generated from reStructuredText.
 .
-.TH "CEPH-CREATE-KEYS" "8" "February 24, 2016" "dev" "Ceph"
+.TH "CEPH-CREATE-KEYS" "8" "March 22, 2016" "dev" "Ceph"
 .SH NAME
 ceph-create-keys \- ceph keyring generate tool
 .
diff --git a/man/ceph-debugpack.8 b/man/ceph-debugpack.8
index 9f1aa42..cd2afcb 100644
--- a/man/ceph-debugpack.8
+++ b/man/ceph-debugpack.8
@@ -1,6 +1,6 @@
 .\" Man page generated from reStructuredText.
 .
-.TH "CEPH-DEBUGPACK" "8" "February 24, 2016" "dev" "Ceph"
+.TH "CEPH-DEBUGPACK" "8" "March 22, 2016" "dev" "Ceph"
 .SH NAME
 ceph-debugpack \- ceph debug packer utility
 .
diff --git a/man/ceph-dencoder.8 b/man/ceph-dencoder.8
index 8fe95f0..7da1978 100644
--- a/man/ceph-dencoder.8
+++ b/man/ceph-dencoder.8
@@ -1,6 +1,6 @@
 .\" Man page generated from reStructuredText.
 .
-.TH "CEPH-DENCODER" "8" "February 24, 2016" "dev" "Ceph"
+.TH "CEPH-DENCODER" "8" "March 22, 2016" "dev" "Ceph"
 .SH NAME
 ceph-dencoder \- ceph encoder/decoder utility
 .
diff --git a/man/ceph-deploy.8 b/man/ceph-deploy.8
index f9fee42..355da7e 100644
--- a/man/ceph-deploy.8
+++ b/man/ceph-deploy.8
@@ -1,6 +1,6 @@
 .\" Man page generated from reStructuredText.
 .
-.TH "CEPH-DEPLOY" "8" "February 24, 2016" "dev" "Ceph"
+.TH "CEPH-DEPLOY" "8" "March 22, 2016" "dev" "Ceph"
 .SH NAME
 ceph-deploy \- Ceph deployment tool
 .
diff --git a/man/ceph-detect-init.8 b/man/ceph-detect-init.8
index fbff1de..853064a 100644
--- a/man/ceph-detect-init.8
+++ b/man/ceph-detect-init.8
@@ -1,6 +1,6 @@
 .\" Man page generated from reStructuredText.
 .
-.TH "CEPH-DETECT-INIT" "8" "February 24, 2016" "dev" "Ceph"
+.TH "CEPH-DETECT-INIT" "8" "March 22, 2016" "dev" "Ceph"
 .SH NAME
 ceph-detect-init \- display the init system Ceph should use
 .
diff --git a/man/ceph-disk.8 b/man/ceph-disk.8
index f882a19..5687ca4 100644
--- a/man/ceph-disk.8
+++ b/man/ceph-disk.8
@@ -1,6 +1,6 @@
 .\" Man page generated from reStructuredText.
 .
-.TH "CEPH-DISK" "8" "February 24, 2016" "dev" "Ceph"
+.TH "CEPH-DISK" "8" "March 22, 2016" "dev" "Ceph"
 .SH NAME
 ceph-disk \- Ceph disk utility for OSD
 .
diff --git a/man/ceph-fuse.8 b/man/ceph-fuse.8
index ab15785..cafb304 100644
--- a/man/ceph-fuse.8
+++ b/man/ceph-fuse.8
@@ -1,6 +1,6 @@
 .\" Man page generated from reStructuredText.
 .
-.TH "CEPH-FUSE" "8" "February 24, 2016" "dev" "Ceph"
+.TH "CEPH-FUSE" "8" "March 22, 2016" "dev" "Ceph"
 .SH NAME
 ceph-fuse \- FUSE-based client for ceph
 .
diff --git a/man/ceph-mds.8 b/man/ceph-mds.8
index 7886f3a..3ae6814 100644
--- a/man/ceph-mds.8
+++ b/man/ceph-mds.8
@@ -1,6 +1,6 @@
 .\" Man page generated from reStructuredText.
 .
-.TH "CEPH-MDS" "8" "February 24, 2016" "dev" "Ceph"
+.TH "CEPH-MDS" "8" "March 22, 2016" "dev" "Ceph"
 .SH NAME
 ceph-mds \- ceph metadata server daemon
 .
diff --git a/man/ceph-mon.8 b/man/ceph-mon.8
index 49ca773..1949b98 100644
--- a/man/ceph-mon.8
+++ b/man/ceph-mon.8
@@ -1,6 +1,6 @@
 .\" Man page generated from reStructuredText.
 .
-.TH "CEPH-MON" "8" "February 24, 2016" "dev" "Ceph"
+.TH "CEPH-MON" "8" "March 22, 2016" "dev" "Ceph"
 .SH NAME
 ceph-mon \- ceph monitor daemon
 .
diff --git a/man/ceph-osd.8 b/man/ceph-osd.8
index e2f9502..8a8693b 100644
--- a/man/ceph-osd.8
+++ b/man/ceph-osd.8
@@ -1,6 +1,6 @@
 .\" Man page generated from reStructuredText.
 .
-.TH "CEPH-OSD" "8" "February 24, 2016" "dev" "Ceph"
+.TH "CEPH-OSD" "8" "March 22, 2016" "dev" "Ceph"
 .SH NAME
 ceph-osd \- ceph object storage daemon
 .
diff --git a/man/ceph-post-file.8 b/man/ceph-post-file.8
index 760d6b6..e9ea346 100644
--- a/man/ceph-post-file.8
+++ b/man/ceph-post-file.8
@@ -1,6 +1,6 @@
 .\" Man page generated from reStructuredText.
 .
-.TH "CEPH-POST-FILE" "8" "February 24, 2016" "dev" "Ceph"
+.TH "CEPH-POST-FILE" "8" "March 22, 2016" "dev" "Ceph"
 .SH NAME
 ceph-post-file \- post files for ceph developers
 .
diff --git a/man/ceph-rbdnamer.8 b/man/ceph-rbdnamer.8
index 05d15a3..7dc6e21 100644
--- a/man/ceph-rbdnamer.8
+++ b/man/ceph-rbdnamer.8
@@ -1,6 +1,6 @@
 .\" Man page generated from reStructuredText.
 .
-.TH "CEPH-RBDNAMER" "8" "February 24, 2016" "dev" "Ceph"
+.TH "CEPH-RBDNAMER" "8" "March 22, 2016" "dev" "Ceph"
 .SH NAME
 ceph-rbdnamer \- udev helper to name RBD devices
 .
diff --git a/man/ceph-rest-api.8 b/man/ceph-rest-api.8
index 4ae0ef7..9c409e8 100644
--- a/man/ceph-rest-api.8
+++ b/man/ceph-rest-api.8
@@ -1,6 +1,6 @@
 .\" Man page generated from reStructuredText.
 .
-.TH "CEPH-REST-API" "8" "February 24, 2016" "dev" "Ceph"
+.TH "CEPH-REST-API" "8" "March 22, 2016" "dev" "Ceph"
 .SH NAME
 ceph-rest-api \- ceph RESTlike administration server
 .
diff --git a/man/ceph-run.8 b/man/ceph-run.8
index d9a475b..a04c048 100644
--- a/man/ceph-run.8
+++ b/man/ceph-run.8
@@ -1,6 +1,6 @@
 .\" Man page generated from reStructuredText.
 .
-.TH "CEPH-RUN" "8" "February 24, 2016" "dev" "Ceph"
+.TH "CEPH-RUN" "8" "March 22, 2016" "dev" "Ceph"
 .SH NAME
 ceph-run \- restart daemon on core dump
 .
diff --git a/man/ceph-syn.8 b/man/ceph-syn.8
index acc7760..df35cbe 100644
--- a/man/ceph-syn.8
+++ b/man/ceph-syn.8
@@ -1,6 +1,6 @@
 .\" Man page generated from reStructuredText.
 .
-.TH "CEPH-SYN" "8" "February 24, 2016" "dev" "Ceph"
+.TH "CEPH-SYN" "8" "March 22, 2016" "dev" "Ceph"
 .SH NAME
 ceph-syn \- ceph synthetic workload generator
 .
diff --git a/man/ceph.8 b/man/ceph.8
index d75ca14..885b5da 100644
--- a/man/ceph.8
+++ b/man/ceph.8
@@ -1,6 +1,6 @@
 .\" Man page generated from reStructuredText.
 .
-.TH "CEPH" "8" "February 24, 2016" "dev" "Ceph"
+.TH "CEPH" "8" "March 22, 2016" "dev" "Ceph"
 .SH NAME
 ceph \- ceph administration tool
 .
@@ -2754,7 +2754,7 @@ Name of the Ceph cluster.
 .UNINDENT
 .INDENT 0.0
 .TP
-.B daemon ADMIN_SOCKET, daemon DAEMON_NAME, \-\-admin\-socket ADMIN_SOCKET, \-\-admin\-socket DAEMON_NAME
+.B \-\-admin\-daemon ADMIN_SOCKET, daemon DAEMON_NAME
 Submit admin\-socket commands via admin sockets in /var/run/ceph.
 .UNINDENT
 .INDENT 0.0
diff --git a/man/cephfs.8 b/man/cephfs.8
index 2e41859..df29f95 100644
--- a/man/cephfs.8
+++ b/man/cephfs.8
@@ -1,6 +1,6 @@
 .\" Man page generated from reStructuredText.
 .
-.TH "CEPHFS" "8" "February 24, 2016" "dev" "Ceph"
+.TH "CEPHFS" "8" "March 22, 2016" "dev" "Ceph"
 .SH NAME
 cephfs \- ceph file system options utility
 .
diff --git a/man/crushtool.8 b/man/crushtool.8
index c3e3155..bd04f1b 100644
--- a/man/crushtool.8
+++ b/man/crushtool.8
@@ -1,6 +1,6 @@
 .\" Man page generated from reStructuredText.
 .
-.TH "CRUSHTOOL" "8" "February 24, 2016" "dev" "Ceph"
+.TH "CRUSHTOOL" "8" "March 22, 2016" "dev" "Ceph"
 .SH NAME
 crushtool \- CRUSH map manipulation tool
 .
diff --git a/man/librados-config.8 b/man/librados-config.8
index 65c5ebc..59961c8 100644
--- a/man/librados-config.8
+++ b/man/librados-config.8
@@ -1,6 +1,6 @@
 .\" Man page generated from reStructuredText.
 .
-.TH "LIBRADOS-CONFIG" "8" "February 24, 2016" "dev" "Ceph"
+.TH "LIBRADOS-CONFIG" "8" "March 22, 2016" "dev" "Ceph"
 .SH NAME
 librados-config \- display information about librados
 .
diff --git a/man/monmaptool.8 b/man/monmaptool.8
index 3d06cae..ad2b12d 100644
--- a/man/monmaptool.8
+++ b/man/monmaptool.8
@@ -1,6 +1,6 @@
 .\" Man page generated from reStructuredText.
 .
-.TH "MONMAPTOOL" "8" "February 24, 2016" "dev" "Ceph"
+.TH "MONMAPTOOL" "8" "March 22, 2016" "dev" "Ceph"
 .SH NAME
 monmaptool \- ceph monitor cluster map manipulation tool
 .
@@ -148,9 +148,9 @@ monmaptool \-\-rm mon.a \-\-add mon.a 192.168.0.9:6789 \-\-clobber monmap
 .UNINDENT
 .SH AVAILABILITY
 .sp
-\fBmonmaptool\fP is part of Ceph, a massively scalable, open\-source, distributed storage system.  Please
-refer to the Ceph documentation at \fI\%http://ceph.com/docs\fP for more
-information.
+\fBmonmaptool\fP is part of Ceph, a massively scalable, open\-source, distributed
+storage system. Please refer to the Ceph documentation at \fI\%http://ceph.com/docs\fP
+for more information.
 .SH SEE ALSO
 .sp
 \fBceph\fP(8),
diff --git a/man/mount.ceph.8 b/man/mount.ceph.8
index a91af66..3c4e624 100644
--- a/man/mount.ceph.8
+++ b/man/mount.ceph.8
@@ -1,6 +1,6 @@
 .\" Man page generated from reStructuredText.
 .
-.TH "MOUNT.CEPH" "8" "February 24, 2016" "dev" "Ceph"
+.TH "MOUNT.CEPH" "8" "March 22, 2016" "dev" "Ceph"
 .SH NAME
 mount.ceph \- mount a ceph file system
 .
diff --git a/man/osdmaptool.8 b/man/osdmaptool.8
index 99b6cfb..89664a1 100644
--- a/man/osdmaptool.8
+++ b/man/osdmaptool.8
@@ -1,6 +1,6 @@
 .\" Man page generated from reStructuredText.
 .
-.TH "OSDMAPTOOL" "8" "February 24, 2016" "dev" "Ceph"
+.TH "OSDMAPTOOL" "8" "March 22, 2016" "dev" "Ceph"
 .SH NAME
 osdmaptool \- ceph osd cluster map manipulation tool
 .
diff --git a/man/rados.8 b/man/rados.8
index 60747c5..f23f2ae 100644
--- a/man/rados.8
+++ b/man/rados.8
@@ -1,6 +1,6 @@
 .\" Man page generated from reStructuredText.
 .
-.TH "RADOS" "8" "February 24, 2016" "dev" "Ceph"
+.TH "RADOS" "8" "March 22, 2016" "dev" "Ceph"
 .SH NAME
 rados \- rados object storage utility
 .
@@ -142,7 +142,9 @@ run a write benchmark with the \fI\-\-no\-cleanup\fP option. The default
 object size is 4 MB, and the default number of simulated threads
 (parallel writes) is 16. The \fI\-\-run\-name <label>\fP option is useful
 for benchmarking a workload test from multiple clients. The \fI<label>\fP
-is an arbitrary object name.
+is an arbitrary object name. It is "benchmark_last_metadata" by
+default, and is used as the underlying object name for "read" and
+"write" ops.
 Note: \-b \fIobjsize\fP option is valid only in \fIwrite\fP mode.
 .UNINDENT
 .sp
diff --git a/man/radosgw-admin.8 b/man/radosgw-admin.8
index 98215ed..e9df855 100644
--- a/man/radosgw-admin.8
+++ b/man/radosgw-admin.8
@@ -1,6 +1,6 @@
 .\" Man page generated from reStructuredText.
 .
-.TH "RADOSGW-ADMIN" "8" "February 24, 2016" "dev" "Ceph"
+.TH "RADOSGW-ADMIN" "8" "March 22, 2016" "dev" "Ceph"
 .SH NAME
 radosgw-admin \- rados REST gateway user administration utility
 .
@@ -237,6 +237,12 @@ Get replica metadata log entry.
 .TP
 .B \fBreplicalog delete\fP
 Delete replica metadata log entry.
+.TP
+.B \fBorphans find\fP
+Init and run search for leaked rados objects
+.TP
+.B \fBorphans finish\fP
+Clean up search for leaked rados objects
 .UNINDENT
 .SH OPTIONS
 .INDENT 0.0
@@ -456,6 +462,17 @@ Specify max size (in bytes, negative value to disable).
 .B \-\-quota\-scope
 Scope of quota (bucket, user).
 .UNINDENT
+.SH ORPHANS SEARCH OPTIONS
+.INDENT 0.0
+.TP
+.B \-\-pool
+Data pool to scan for leaked rados objects
+.UNINDENT
+.INDENT 0.0
+.TP
+.B \-\-num\-shards
+Number of shards to use for keeping the temporary scan info
+.UNINDENT
 .SH EXAMPLES
 .sp
 Generate a new user:
diff --git a/man/radosgw.8 b/man/radosgw.8
index 7c288a7..886c078 100644
--- a/man/radosgw.8
+++ b/man/radosgw.8
@@ -1,6 +1,6 @@
 .\" Man page generated from reStructuredText.
 .
-.TH "RADOSGW" "8" "February 24, 2016" "dev" "Ceph"
+.TH "RADOSGW" "8" "March 22, 2016" "dev" "Ceph"
 .SH NAME
 radosgw \- rados REST gateway
 .
diff --git a/man/rbd-fuse.8 b/man/rbd-fuse.8
index 38858bc..bcf7adc 100644
--- a/man/rbd-fuse.8
+++ b/man/rbd-fuse.8
@@ -1,6 +1,6 @@
 .\" Man page generated from reStructuredText.
 .
-.TH "RBD-FUSE" "8" "February 24, 2016" "dev" "Ceph"
+.TH "RBD-FUSE" "8" "March 22, 2016" "dev" "Ceph"
 .SH NAME
 rbd-fuse \- expose rbd images as files
 .
diff --git a/man/rbd-mirror.8 b/man/rbd-mirror.8
new file mode 100644
index 0000000..9b3ffdd
--- /dev/null
+++ b/man/rbd-mirror.8
@@ -0,0 +1,99 @@
+.\" Man page generated from reStructuredText.
+.
+.TH "RBD-MIRROR" "8" "March 22, 2016" "dev" "Ceph"
+.SH NAME
+rbd-mirror \- Ceph daemon for mirroring RBD images
+.
+.nr rst2man-indent-level 0
+.
+.de1 rstReportMargin
+\\$1 \\n[an-margin]
+level \\n[rst2man-indent-level]
+level margin: \\n[rst2man-indent\\n[rst2man-indent-level]]
+-
+\\n[rst2man-indent0]
+\\n[rst2man-indent1]
+\\n[rst2man-indent2]
+..
+.de1 INDENT
+.\" .rstReportMargin pre:
+. RS \\$1
+. nr rst2man-indent\\n[rst2man-indent-level] \\n[an-margin]
+. nr rst2man-indent-level +1
+.\" .rstReportMargin post:
+..
+.de UNINDENT
+. RE
+.\" indent \\n[an-margin]
+.\" old: \\n[rst2man-indent\\n[rst2man-indent-level]]
+.nr rst2man-indent-level -1
+.\" new: \\n[rst2man-indent\\n[rst2man-indent-level]]
+.in \\n[rst2man-indent\\n[rst2man-indent-level]]u
+..
+.SH SYNOPSIS
+.nf
+\fBrbd\-mirror\fP
+.fi
+.sp
+.SH DESCRIPTION
+.sp
+\fBrbd\-mirror\fP is a daemon for asynchronous mirroring of RADOS
+block device (rbd) images among Ceph clusters. It replays changes to
+images in remote clusters in a local cluster, for disaster recovery.
+.sp
+It connects to remote clusters via the RADOS protocol, relying on
+default search paths to find ceph.conf files monitor addresses and
+authentication information for them, i.e. \fB/etc/ceph/$cluster.conf\fP,
+\fB/etc/ceph/$cluster.keyring\fP, and
+\fB/etc/ceph/$cluster.$name.keyring\fP, where \fB$cluster\fP is the
+human\-friendly name of the cluster, and \fB$name\fP is the rados user to
+connect as, e.g. \fBclient.rbd\-mirror\fP\&.
+.SH OPTIONS
+.INDENT 0.0
+.TP
+.B \-c ceph.conf, \-\-conf=ceph.conf
+Use \fBceph.conf\fP configuration file instead of the default
+\fB/etc/ceph/ceph.conf\fP to determine monitor addresses during startup.
+.UNINDENT
+.INDENT 0.0
+.TP
+.B \-m monaddress[:port]
+Connect to specified monitor (instead of looking through \fBceph.conf\fP).
+.UNINDENT
+.INDENT 0.0
+.TP
+.B \-i ID, \-\-id ID
+Set the ID portion of name for rbd\-mirror
+.UNINDENT
+.INDENT 0.0
+.TP
+.B \-n TYPE.ID, \-\-name TYPE.ID
+Set the rados user name for the gateway (eg. client.rbd\-mirror)
+.UNINDENT
+.INDENT 0.0
+.TP
+.B \-\-cluster NAME
+Set the cluster name (default: ceph)
+.UNINDENT
+.INDENT 0.0
+.TP
+.B \-d
+Run in foreground, log to stderr
+.UNINDENT
+.INDENT 0.0
+.TP
+.B \-f
+Run in foreground, log to usual location
+.UNINDENT
+.SH AVAILABILITY
+.sp
+\fBrbd\-mirror\fP is part of Ceph, a massively scalable, open\-source, distributed
+storage system. Please refer to the Ceph documentation at \fI\%http://ceph.com/docs\fP for
+more information.
+.SH SEE ALSO
+.sp
+\fBrbd\fP(8)
+.SH COPYRIGHT
+2010-2014, Inktank Storage, Inc. and contributors. Licensed under Creative Commons BY-SA
+.\" Generated by docutils manpage writer.
+.
diff --git a/man/rbd-nbd.8 b/man/rbd-nbd.8
index c9f8f28..83c3198 100644
--- a/man/rbd-nbd.8
+++ b/man/rbd-nbd.8
@@ -1,6 +1,6 @@
 .\" Man page generated from reStructuredText.
 .
-.TH "RBD-NBD" "8" "February 24, 2016" "dev" "Ceph"
+.TH "RBD-NBD" "8" "March 22, 2016" "dev" "Ceph"
 .SH NAME
 rbd-nbd \- map rbd images to nbd device
 .
diff --git a/man/rbd-replay-many.8 b/man/rbd-replay-many.8
index c851542..b8f6d2f 100644
--- a/man/rbd-replay-many.8
+++ b/man/rbd-replay-many.8
@@ -1,6 +1,6 @@
 .\" Man page generated from reStructuredText.
 .
-.TH "RBD-REPLAY-MANY" "8" "February 24, 2016" "dev" "Ceph"
+.TH "RBD-REPLAY-MANY" "8" "March 22, 2016" "dev" "Ceph"
 .SH NAME
 rbd-replay-many \- replay a rados block device (RBD) workload on several clients
 .
diff --git a/man/rbd-replay-prep.8 b/man/rbd-replay-prep.8
index 231a5a0..58458c8 100644
--- a/man/rbd-replay-prep.8
+++ b/man/rbd-replay-prep.8
@@ -1,6 +1,6 @@
 .\" Man page generated from reStructuredText.
 .
-.TH "RBD-REPLAY-PREP" "8" "February 24, 2016" "dev" "Ceph"
+.TH "RBD-REPLAY-PREP" "8" "March 22, 2016" "dev" "Ceph"
 .SH NAME
 rbd-replay-prep \- prepare captured rados block device (RBD) workloads for replay
 .
diff --git a/man/rbd-replay.8 b/man/rbd-replay.8
index 29674df..24c4d38 100644
--- a/man/rbd-replay.8
+++ b/man/rbd-replay.8
@@ -1,6 +1,6 @@
 .\" Man page generated from reStructuredText.
 .
-.TH "RBD-REPLAY" "8" "February 24, 2016" "dev" "Ceph"
+.TH "RBD-REPLAY" "8" "March 22, 2016" "dev" "Ceph"
 .SH NAME
 rbd-replay \- replay rados block device (RBD) workloads
 .
diff --git a/man/rbd.8 b/man/rbd.8
index 4388a8d..b689909 100644
--- a/man/rbd.8
+++ b/man/rbd.8
@@ -1,6 +1,6 @@
 .\" Man page generated from reStructuredText.
 .
-.TH "RBD" "8" "February 24, 2016" "dev" "Ceph"
+.TH "RBD" "8" "March 22, 2016" "dev" "Ceph"
 .SH NAME
 rbd \- manage rados block device (RBD) images
 .
diff --git a/selinux/Makefile.in b/selinux/Makefile.in
index 482ac30..fab874f 100644
--- a/selinux/Makefile.in
+++ b/selinux/Makefile.in
@@ -205,12 +205,14 @@ JDK_CPPFLAGS = @JDK_CPPFLAGS@
 KEYUTILS_LIB = @KEYUTILS_LIB@
 LD = @LD@
 LDFLAGS = @LDFLAGS@
-LIBEDIT_CFLAGS = @LIBEDIT_CFLAGS@
-LIBEDIT_LIBS = @LIBEDIT_LIBS@
+LIBDPDK_CFLAGS = @LIBDPDK_CFLAGS@
+LIBDPDK_LIBS = @LIBDPDK_LIBS@
 LIBFUSE_CFLAGS = @LIBFUSE_CFLAGS@
 LIBFUSE_LIBS = @LIBFUSE_LIBS@
 LIBJEMALLOC = @LIBJEMALLOC@
 LIBOBJS = @LIBOBJS@
+LIBPCIACCESS_CFLAGS = @LIBPCIACCESS_CFLAGS@
+LIBPCIACCESS_LIBS = @LIBPCIACCESS_LIBS@
 LIBROCKSDB_CFLAGS = @LIBROCKSDB_CFLAGS@
 LIBROCKSDB_LIBS = @LIBROCKSDB_LIBS@
 LIBS = @LIBS@
@@ -295,7 +297,6 @@ datarootdir = @datarootdir@
 docdir = @docdir@
 dvidir = @dvidir@
 exec_prefix = @exec_prefix@
-group_rgw = @group_rgw@
 host = @host@
 host_alias = @host_alias@
 host_cpu = @host_cpu@
@@ -326,7 +327,6 @@ sharedstatedir = @sharedstatedir@
 srcdir = @srcdir@
 subdirs = @subdirs@
 sysconfdir = @sysconfdir@
-systemd_libexec_dir = @systemd_libexec_dir@
 systemd_unit_dir = @systemd_unit_dir@
 target = @target@
 target_alias = @target_alias@
@@ -336,7 +336,6 @@ target_vendor = @target_vendor@
 top_build_prefix = @top_build_prefix@
 top_builddir = @top_builddir@
 top_srcdir = @top_srcdir@
-user_rgw = @user_rgw@
 EXTRA_DIST = \
 	ceph.te \
 	ceph.fc \
diff --git a/src/.git_version b/src/.git_version
index b76b127..640de07 100644
--- a/src/.git_version
+++ b/src/.git_version
@@ -1,2 +1,2 @@
-efc8134f669743f4946297eac89aa0fd46a19dae
-v10.0.3
+5acb2655ec55c904e4c11137971c78bac5b76e4e
+v10.0.5
diff --git a/src/Makefile-env.am b/src/Makefile-env.am
index 7dd3967..a347537 100644
--- a/src/Makefile-env.am
+++ b/src/Makefile-env.am
@@ -16,8 +16,8 @@ sbin_SCRIPTS =
 su_sbin_PROGRAMS =
 su_sbin_SCRIPTS =
 dist_bin_SCRIPTS =
-lib_LTLIBRARIES = 
-noinst_LTLIBRARIES = 
+lib_LTLIBRARIES =
+noinst_LTLIBRARIES =
 noinst_LIBRARIES =
 radoslib_LTLIBRARIES =
 
@@ -233,6 +233,10 @@ if WITH_LIBZFS
 LIBOS += libos_zfs.a -lzfs
 endif # WITH_LIBZFS
 
+if WITH_SPDK
+LIBOS += ${LIBSPDK_LIBS} ${LIBDPDK_LIBS} ${LIBPCIACCESS_LIBS}
+endif # WITH_SPDK
+
 if WITH_TCMALLOC_MINIMAL
 LIBPERFGLUE += -ltcmalloc_minimal
 endif # WITH_TCMALLOC_MINIMAL
@@ -252,6 +256,8 @@ if ENABLE_COVERAGE
 EXTRALIBS += -lgcov
 endif # ENABLE_COVERAGE
 
+LIBCOMMON += -luuid
+
 # Libosd always needs osdc and os
 LIBOSD += $(LIBOSDC) $(LIBOS)
 
@@ -274,7 +280,7 @@ LIBKV += -llz4
 endif
 endif # WITH_SLIBROCKSDB
 LIBKV += -lz -lleveldb -lsnappy
-LIBOS += $(LIBOS_TYPES) $(LIBKV)
+LIBOS += $(LIBOS_TYPES) $(LIBKV) $(LIBFUSE_LIBS)
 
 LIBMON += $(LIBMON_TYPES)
 
@@ -292,4 +298,3 @@ DENCODER_DEPS =
 
 
 radoslibdir = $(libdir)/rados-classes
-
diff --git a/src/Makefile-server.am b/src/Makefile-server.am
index 01ef492..1ea73b1 100644
--- a/src/Makefile-server.am
+++ b/src/Makefile-server.am
@@ -36,7 +36,6 @@ endif # WITH_MON
 if WITH_OSD
 
 ceph_sbin_SCRIPTS += \
-	ceph-disk \
 	ceph-disk-udev
 
 bin_SCRIPTS += \
diff --git a/src/Makefile-spdk.am b/src/Makefile-spdk.am
new file mode 100644
index 0000000..ce08667
--- /dev/null
+++ b/src/Makefile-spdk.am
@@ -0,0 +1,48 @@
+EXTRA_DIST += \
+  spdk/PORTING.md \
+  spdk/README.md \
+  spdk/LICENSE \
+  spdk/CONFIG \
+  spdk/autotest.sh \
+  spdk/autopackage.sh \
+  spdk/Makefile \
+  spdk/autobuild.sh \
+  spdk/lib/nvme/nvme_ns.c \
+  spdk/lib/nvme/nvme_qpair.c \
+  spdk/lib/nvme/nvme_impl.h \
+  spdk/lib/nvme/nvme.c \
+  spdk/lib/nvme/nvme_ns_cmd.c \
+  spdk/lib/nvme/nvme_ctrlr.c \
+  spdk/lib/nvme/Makefile \
+  spdk/lib/nvme/nvme_internal.h \
+  spdk/lib/nvme/nvme_ctrlr_cmd.c \
+  spdk/lib/memory/vtophys.c \
+  spdk/lib/memory/Makefile \
+  spdk/lib/util/file.c \
+  spdk/lib/util/string.c \
+  spdk/lib/util/Makefile \
+  spdk/lib/util/pci.c \
+  spdk/lib/Makefile \
+  spdk/lib/ioat/ioat.c \
+  spdk/lib/ioat/ioat_pci.h \
+  spdk/lib/ioat/ioat_impl.h \
+  spdk/lib/ioat/Makefile \
+  spdk/lib/ioat/ioat_internal.h \
+  spdk/mk/spdk.deps.mk \
+  spdk/mk/spdk.subdirs.mk \
+  spdk/mk/spdk.common.mk \
+  spdk/include/spdk/string.h \
+  spdk/include/spdk/ioat.h \
+  spdk/include/spdk/queue.h \
+  spdk/include/spdk/pci_ids.h \
+  spdk/include/spdk/nvme.h \
+  spdk/include/spdk/ioat_spec.h \
+  spdk/include/spdk/nvme_spec.h \
+  spdk/include/spdk/nvme_intel.h \
+  spdk/include/spdk/pci.h \
+  spdk/include/spdk/vtophys.h \
+  spdk/include/spdk/queue_extras.h \
+  spdk/include/spdk/file.h \
+  spdk/include/spdk/assert.h \
+  spdk/include/spdk/barrier.h \
+  spdk/include/spdk/mmio.h
diff --git a/src/Makefile.am b/src/Makefile.am
index 1b651c1..d2f99b8 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -1,5 +1,20 @@
 include Makefile-env.am
 
+# a workaround for http://debbugs.gnu.org/cgi/bugreport.cgi?bug=18744, this
+# bug was fixed in automake 1.15, but automake 1.13 is supported by us.  so
+# we can not just require 1.15 using `AM_INIT_AUTOMAKE`
+am__is_gnu_make = { \
+  if test -z '$(MAKELEVEL)'; then \
+    false; \
+  elif test -n '$(MAKE_HOST)'; then \
+    true; \
+  elif test -n '$(MAKE_VERSION)' && test -n '$(CURDIR)'; then \
+    true; \
+  else \
+    false; \
+  fi; \
+}
+
 SUBDIRS += ocf java
 DIST_SUBDIRS += gmock ocf java
 
@@ -18,6 +33,7 @@ include arch/Makefile.am
 include auth/Makefile.am
 include brag/Makefile.am
 include ceph-detect-init/Makefile.am
+include ceph-disk/Makefile.am
 include crush/Makefile.am
 include kv/Makefile.am
 include mon/Makefile.am
@@ -47,6 +63,7 @@ include rbd_replay/Makefile.am
 include test/Makefile.am
 include tools/Makefile.am
 include Makefile-rocksdb.am
+include Makefile-spdk.am
 include tracing/Makefile.am
 include pybind/Makefile.am
 
@@ -60,6 +77,7 @@ editpaths = sed \
 	-e 's|@sysconfdir[@]|$(sysconfdir)|g' \
 	-e 's|@datadir[@]|$(pkgdatadir)|g' \
 	-e 's|@prefix[@]|$(prefix)|g' \
+	-e 's|@libexecdir[@]|$(libexecdir)|g' \
 	-e 's|@@GCOV_PREFIX_STRIP[@][@]|$(GCOV_PREFIX_STRIP)|g'
 shell_scripts = ceph-debugpack ceph-post-file ceph-crush-location
 $(shell_scripts): Makefile
@@ -106,7 +124,6 @@ EXTRA_DIST += \
 	$(srcdir)/upstart/radosgw-all-starter.conf \
 	$(srcdir)/upstart/rbdmap.conf \
 	ceph.in \
-	ceph-disk \
 	ceph-disk-udev \
 	ceph-create-keys \
 	ceph-rest-api \
@@ -127,13 +144,10 @@ docdir ?= ${datadir}/doc/ceph
 doc_DATA = $(srcdir)/sample.ceph.conf sample.fetch_config
 
 
-# various scripts
-
-shell_commondir = $(libdir)/ceph
-shell_common_SCRIPTS = ceph_common.sh
+# various scripts in $(libexecdir)
 
 ceph_libexecdir = $(libexecdir)/ceph
-ceph_libexec_SCRIPTS = ceph-osd-prestart.sh
+ceph_libexec_SCRIPTS = ceph_common.sh ceph-osd-prestart.sh
 
 
 # tests to actually run on "make check"; if you need extra, non-test,
diff --git a/src/Makefile.in b/src/Makefile.in
index 8671178..df3217e 100644
--- a/src/Makefile.in
+++ b/src/Makefile.in
@@ -20,17 +20,6 @@
 
 
 VPATH = @srcdir@
-am__is_gnu_make = { \
-  if test -z '$(MAKELEVEL)'; then \
-    false; \
-  elif test -n '$(MAKE_HOST)'; then \
-    true; \
-  elif test -n '$(MAKE_VERSION)' && test -n '$(CURDIR)'; then \
-    true; \
-  else \
-    false; \
-  fi; \
-}
 am__make_running_with_option = \
   case $${target_option-} in \
       ?) ;; \
@@ -97,15 +86,15 @@ target_triplet = @target@
 bin_PROGRAMS = $(am__EXEEXT_28) $(am__EXEEXT_29) $(am__EXEEXT_30) \
 	$(am__EXEEXT_31) $(am__EXEEXT_32) $(am__EXEEXT_33) \
 	$(am__EXEEXT_34) $(am__EXEEXT_35) $(am__EXEEXT_36) \
-	$(am__EXEEXT_37) monmaptool$(EXEEXT) crushtool$(EXEEXT) \
-	osdmaptool$(EXEEXT) ceph-conf$(EXEEXT) ceph-authtool$(EXEEXT) \
-	$(am__EXEEXT_38) $(am__EXEEXT_39) $(am__EXEEXT_40) \
+	$(am__EXEEXT_37) $(am__EXEEXT_38) monmaptool$(EXEEXT) \
+	crushtool$(EXEEXT) osdmaptool$(EXEEXT) ceph-conf$(EXEEXT) \
+	ceph-authtool$(EXEEXT) $(am__EXEEXT_39) $(am__EXEEXT_40) \
 	$(am__EXEEXT_41) $(am__EXEEXT_42) $(am__EXEEXT_43) \
-	$(am__EXEEXT_44) $(am__EXEEXT_45)
-noinst_PROGRAMS = $(am__EXEEXT_62) $(am__EXEEXT_63) $(am__EXEEXT_64)
+	$(am__EXEEXT_44) $(am__EXEEXT_45) $(am__EXEEXT_46)
+noinst_PROGRAMS = $(am__EXEEXT_64) $(am__EXEEXT_65) $(am__EXEEXT_66)
 sbin_PROGRAMS =
-su_sbin_PROGRAMS = $(am__EXEEXT_65)
-check_PROGRAMS = $(am__EXEEXT_60) $(am__EXEEXT_61) \
+su_sbin_PROGRAMS = $(am__EXEEXT_67)
+check_PROGRAMS = $(am__EXEEXT_62) $(am__EXEEXT_63) \
 	unittest_subprocess$(EXEEXT) \
 	unittest_async_compressor$(EXEEXT)
 
@@ -137,39 +126,40 @@ check_PROGRAMS = $(am__EXEEXT_60) $(am__EXEEXT_61) \
 @WITH_PROFILER_TRUE at am__append_16 = -lprofiler
 @WITH_LIBAIO_TRUE at am__append_17 = -laio
 @WITH_LIBZFS_TRUE at am__append_18 = libos_zfs.a -lzfs
- at WITH_TCMALLOC_MINIMAL_TRUE@am__append_19 = -ltcmalloc_minimal
- at WITH_TCMALLOC_TRUE@am__append_20 = -ltcmalloc
- at WITH_JEMALLOC_TRUE@am__append_21 = -ljemalloc
+ at WITH_SPDK_TRUE@am__append_19 = ${LIBSPDK_LIBS} ${LIBDPDK_LIBS} ${LIBPCIACCESS_LIBS}
+ at WITH_TCMALLOC_MINIMAL_TRUE@am__append_20 = -ltcmalloc_minimal
+ at WITH_TCMALLOC_TRUE@am__append_21 = -ltcmalloc
 @WITH_JEMALLOC_TRUE at am__append_22 = -ljemalloc
 @WITH_JEMALLOC_TRUE at am__append_23 = -ljemalloc
 @WITH_JEMALLOC_TRUE at am__append_24 = -ljemalloc
- at ENABLE_COVERAGE_TRUE@am__append_25 = -lgcov
+ at WITH_JEMALLOC_TRUE@am__append_25 = -ljemalloc
+ at ENABLE_COVERAGE_TRUE@am__append_26 = -lgcov
 
 # libkv/libos linking order is ornery
- at WITH_SLIBROCKSDB_TRUE@am__append_26 = rocksdb/librocksdb.a
- at HAVE_BZLIB_TRUE@@WITH_SLIBROCKSDB_TRUE at am__append_27 = -lbz2
- at HAVE_LZ4_TRUE@@WITH_SLIBROCKSDB_TRUE at am__append_28 = -llz4
- at ENABLE_CLIENT_TRUE@am__append_29 = brag/client/ceph-brag ceph \
+ at WITH_SLIBROCKSDB_TRUE@am__append_27 = rocksdb/librocksdb.a
+ at HAVE_BZLIB_TRUE@@WITH_SLIBROCKSDB_TRUE at am__append_28 = -lbz2
+ at HAVE_LZ4_TRUE@@WITH_SLIBROCKSDB_TRUE at am__append_29 = -llz4
+ at ENABLE_CLIENT_TRUE@am__append_30 = brag/client/ceph-brag ceph \
 @ENABLE_CLIENT_TRUE@	ceph-post-file
- at ENABLE_CLIENT_TRUE@am__append_30 = brag/server brag/README.md brag/client
- at ENABLE_SERVER_TRUE@am__append_31 = libkv.a
- at ENABLE_SERVER_TRUE@am__append_32 = \
+ at ENABLE_CLIENT_TRUE@am__append_31 = brag/server brag/README.md brag/client
+ at ENABLE_SERVER_TRUE@am__append_32 = libkv.a
+ at ENABLE_SERVER_TRUE@am__append_33 = \
 @ENABLE_SERVER_TRUE@	kv/KeyValueDB.h \
 @ENABLE_SERVER_TRUE@	kv/LevelDBStore.h
 
- at ENABLE_SERVER_TRUE@@WITH_SLIBROCKSDB_TRUE at am__append_33 = -I rocksdb/include -fPIC
- at ENABLE_SERVER_TRUE@@WITH_SLIBROCKSDB_TRUE at am__append_34 = kv/RocksDBStore.cc
- at ENABLE_SERVER_TRUE@@WITH_SLIBROCKSDB_TRUE at am__append_35 = rocksdb/librocksdb.a
- at ENABLE_SERVER_TRUE@@WITH_SLIBROCKSDB_TRUE at am__append_36 = kv/RocksDBStore.h
- at ENABLE_SERVER_TRUE@@WITH_DLIBROCKSDB_TRUE at am__append_37 = kv/RocksDBStore.cc
- at ENABLE_SERVER_TRUE@@WITH_DLIBROCKSDB_TRUE at am__append_38 = -lrocksdb
- at ENABLE_SERVER_TRUE@@WITH_DLIBROCKSDB_TRUE at am__append_39 = kv/RocksDBStore.h
- at ENABLE_SERVER_TRUE@@WITH_KINETIC_TRUE at am__append_40 = kv/KineticStore.cc
- at ENABLE_SERVER_TRUE@@WITH_KINETIC_TRUE at am__append_41 = -std=gnu++11
- at ENABLE_SERVER_TRUE@@WITH_KINETIC_TRUE at am__append_42 = -lkinetic_client -lprotobuf -lglog -lgflags libcrypto.a
- at ENABLE_SERVER_TRUE@@WITH_KINETIC_TRUE at am__append_43 = kv/KineticStore.h
- at ENABLE_SERVER_TRUE@@WITH_MON_TRUE at am__append_44 = libmon.a
- at ENABLE_SERVER_TRUE@@WITH_MON_TRUE at am__append_45 = \
+ at ENABLE_SERVER_TRUE@@WITH_SLIBROCKSDB_TRUE at am__append_34 = -I rocksdb/include -fPIC
+ at ENABLE_SERVER_TRUE@@WITH_SLIBROCKSDB_TRUE at am__append_35 = kv/RocksDBStore.cc
+ at ENABLE_SERVER_TRUE@@WITH_SLIBROCKSDB_TRUE at am__append_36 = rocksdb/librocksdb.a
+ at ENABLE_SERVER_TRUE@@WITH_SLIBROCKSDB_TRUE at am__append_37 = kv/RocksDBStore.h
+ at ENABLE_SERVER_TRUE@@WITH_DLIBROCKSDB_TRUE at am__append_38 = kv/RocksDBStore.cc
+ at ENABLE_SERVER_TRUE@@WITH_DLIBROCKSDB_TRUE at am__append_39 = -lrocksdb
+ at ENABLE_SERVER_TRUE@@WITH_DLIBROCKSDB_TRUE at am__append_40 = kv/RocksDBStore.h
+ at ENABLE_SERVER_TRUE@@WITH_KINETIC_TRUE at am__append_41 = kv/KineticStore.cc
+ at ENABLE_SERVER_TRUE@@WITH_KINETIC_TRUE at am__append_42 = -std=gnu++11
+ at ENABLE_SERVER_TRUE@@WITH_KINETIC_TRUE at am__append_43 = -lkinetic_client -lprotobuf -lglog -lgflags libcrypto.a
+ at ENABLE_SERVER_TRUE@@WITH_KINETIC_TRUE at am__append_44 = kv/KineticStore.h
+ at ENABLE_SERVER_TRUE@@WITH_MON_TRUE at am__append_45 = libmon.a
+ at ENABLE_SERVER_TRUE@@WITH_MON_TRUE at am__append_46 = \
 @ENABLE_SERVER_TRUE@@WITH_MON_TRUE@	mon/AuthMonitor.h \
 @ENABLE_SERVER_TRUE@@WITH_MON_TRUE@	mon/DataHealthService.h \
 @ENABLE_SERVER_TRUE@@WITH_MON_TRUE@	mon/Elector.h \
@@ -198,10 +188,10 @@ check_PROGRAMS = $(am__EXEEXT_60) $(am__EXEEXT_61) \
 
 
 # There are no libmds_types so use the full mds library for dencoder for now
- at ENABLE_CLIENT_TRUE@am__append_46 = $(LIBMDS_SOURCES)
- at ENABLE_CLIENT_TRUE@am__append_47 = $(LIBMDS_DEPS)
- at ENABLE_SERVER_TRUE@@WITH_MDS_TRUE at am__append_48 = libmds.la
- at ENABLE_SERVER_TRUE@@WITH_MDS_TRUE at am__append_49 =  \
+ at ENABLE_CLIENT_TRUE@am__append_47 = $(LIBMDS_SOURCES)
+ at ENABLE_CLIENT_TRUE@am__append_48 = $(LIBMDS_DEPS)
+ at ENABLE_SERVER_TRUE@@WITH_MDS_TRUE at am__append_49 = libmds.la
+ at ENABLE_SERVER_TRUE@@WITH_MDS_TRUE at am__append_50 =  \
 @ENABLE_SERVER_TRUE@@WITH_MDS_TRUE@	mds/inode_backtrace.h \
 @ENABLE_SERVER_TRUE@@WITH_MDS_TRUE@	mds/flock.h mds/locks.c \
 @ENABLE_SERVER_TRUE@@WITH_MDS_TRUE@	mds/locks.h mds/CDentry.h \
@@ -255,11 +245,12 @@ check_PROGRAMS = $(am__EXEEXT_60) $(am__EXEEXT_61) \
 @ENABLE_SERVER_TRUE@@WITH_MDS_TRUE@	mds/events/ETableClient.h \
 @ENABLE_SERVER_TRUE@@WITH_MDS_TRUE@	mds/events/ETableServer.h \
 @ENABLE_SERVER_TRUE@@WITH_MDS_TRUE@	mds/events/EUpdate.h
- at WITH_LIBAIO_TRUE@am__append_50 = \
+ at WITH_LIBAIO_TRUE@am__append_51 = \
 @WITH_LIBAIO_TRUE@	os/bluestore/bluestore_types.cc \
 @WITH_LIBAIO_TRUE@	os/bluestore/bluefs_types.cc
 
- at ENABLE_SERVER_TRUE@@WITH_LIBAIO_TRUE at am__append_51 = \
+ at ENABLE_SERVER_TRUE@@WITH_FUSE_TRUE at am__append_52 = os/FuseStore.cc
+ at ENABLE_SERVER_TRUE@@WITH_LIBAIO_TRUE at am__append_53 = \
 @ENABLE_SERVER_TRUE@@WITH_LIBAIO_TRUE@	os/bluestore/kv.cc \
 @ENABLE_SERVER_TRUE@@WITH_LIBAIO_TRUE@	os/bluestore/Allocator.cc \
 @ENABLE_SERVER_TRUE@@WITH_LIBAIO_TRUE@	os/bluestore/BlockDevice.cc \
@@ -267,17 +258,18 @@ check_PROGRAMS = $(am__EXEEXT_60) $(am__EXEEXT_61) \
 @ENABLE_SERVER_TRUE@@WITH_LIBAIO_TRUE@	os/bluestore/BlueRocksEnv.cc \
 @ENABLE_SERVER_TRUE@@WITH_LIBAIO_TRUE@	os/bluestore/BlueStore.cc \
 @ENABLE_SERVER_TRUE@@WITH_LIBAIO_TRUE@	os/bluestore/FreelistManager.cc \
+ at ENABLE_SERVER_TRUE@@WITH_LIBAIO_TRUE@	os/bluestore/KernelDevice.cc \
 @ENABLE_SERVER_TRUE@@WITH_LIBAIO_TRUE@	os/bluestore/StupidAllocator.cc
 
- at ENABLE_SERVER_TRUE@@LINUX_TRUE at am__append_52 = os/filestore/BtrfsFileStoreBackend.cc
- at ENABLE_SERVER_TRUE@@WITH_LIBXFS_TRUE at am__append_53 = \
+ at ENABLE_SERVER_TRUE@@LINUX_TRUE at am__append_54 = os/filestore/BtrfsFileStoreBackend.cc
+ at ENABLE_SERVER_TRUE@@WITH_LIBXFS_TRUE at am__append_55 = \
 @ENABLE_SERVER_TRUE@@WITH_LIBXFS_TRUE@    os/filestore/XfsFileStoreBackend.cc \
 @ENABLE_SERVER_TRUE@@WITH_LIBXFS_TRUE@    os/fs/XFS.cc
 
- at ENABLE_SERVER_TRUE@@WITH_LIBZFS_TRUE at am__append_54 = os/filestore/ZFSFileStoreBackend.cc
- at ENABLE_SERVER_TRUE@am__append_55 = libos.a
- at ENABLE_SERVER_TRUE@@WITH_LTTNG_TRUE at am__append_56 = $(LIBOS_TP)
- at ENABLE_SERVER_TRUE@am__append_57 = \
+ at ENABLE_SERVER_TRUE@@WITH_LIBZFS_TRUE at am__append_56 = os/filestore/ZFSFileStoreBackend.cc
+ at ENABLE_SERVER_TRUE@am__append_57 = libos.a
+ at ENABLE_SERVER_TRUE@@WITH_LTTNG_TRUE at am__append_58 = $(LIBOS_TP)
+ at ENABLE_SERVER_TRUE@am__append_59 = \
 @ENABLE_SERVER_TRUE@	os/filestore/chain_xattr.h \
 @ENABLE_SERVER_TRUE@	os/filestore/BtrfsFileStoreBackend.h \
 @ENABLE_SERVER_TRUE@	os/filestore/CollectionIndex.h \
@@ -298,17 +290,16 @@ check_PROGRAMS = $(am__EXEEXT_60) $(am__EXEEXT_61) \
 @ENABLE_SERVER_TRUE@	os/fs/btrfs_ioctl.h \
 @ENABLE_SERVER_TRUE@	os/fs/FS.h \
 @ENABLE_SERVER_TRUE@	os/fs/XFS.h \
- at ENABLE_SERVER_TRUE@	os/keyvaluestore/GenericObjectMap.h \
- at ENABLE_SERVER_TRUE@	os/keyvaluestore/KeyValueStore.h \
 @ENABLE_SERVER_TRUE@	os/kstore/kstore_types.h \
 @ENABLE_SERVER_TRUE@	os/kstore/KStore.h \
 @ENABLE_SERVER_TRUE@	os/kstore/kv.h \
 @ENABLE_SERVER_TRUE@	os/memstore/MemStore.h \
 @ENABLE_SERVER_TRUE@	os/memstore/PageSet.h \
+ at ENABLE_SERVER_TRUE@	os/FuseStore.h \
 @ENABLE_SERVER_TRUE@	os/ObjectMap.h \
 @ENABLE_SERVER_TRUE@	os/ObjectStore.h
 
- at ENABLE_SERVER_TRUE@@WITH_LIBAIO_TRUE at am__append_58 = \
+ at ENABLE_SERVER_TRUE@@WITH_LIBAIO_TRUE at am__append_60 = \
 @ENABLE_SERVER_TRUE@@WITH_LIBAIO_TRUE@	os/bluestore/bluefs_types.h \
 @ENABLE_SERVER_TRUE@@WITH_LIBAIO_TRUE@	os/bluestore/bluestore_types.h \
 @ENABLE_SERVER_TRUE@@WITH_LIBAIO_TRUE@	os/bluestore/kv.h \
@@ -317,14 +308,22 @@ check_PROGRAMS = $(am__EXEEXT_60) $(am__EXEEXT_61) \
 @ENABLE_SERVER_TRUE@@WITH_LIBAIO_TRUE@	os/bluestore/BlueFS.h \
 @ENABLE_SERVER_TRUE@@WITH_LIBAIO_TRUE@	os/bluestore/BlueRocksEnv.h \
 @ENABLE_SERVER_TRUE@@WITH_LIBAIO_TRUE@	os/bluestore/BlueStore.h \
+ at ENABLE_SERVER_TRUE@@WITH_LIBAIO_TRUE@	os/bluestore/KernelDevice.h \
 @ENABLE_SERVER_TRUE@@WITH_LIBAIO_TRUE@	os/bluestore/FreelistManager.h \
 @ENABLE_SERVER_TRUE@@WITH_LIBAIO_TRUE@	os/bluestore/StupidAllocator.h
 
- at ENABLE_SERVER_TRUE@@WITH_LIBZFS_TRUE at am__append_59 = libos_zfs.a
- at ENABLE_SERVER_TRUE@@WITH_LIBZFS_TRUE at am__append_60 = os/fs/ZFS.h
- at ENABLE_SERVER_TRUE@@WITH_LIBAIO_TRUE at am__append_61 = ceph-bluefs-tool
- at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at am__append_62 = libosd.a
- at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at am__append_63 = \
+ at ENABLE_SERVER_TRUE@@WITH_LIBZFS_TRUE at am__append_61 = libos_zfs.a
+ at ENABLE_SERVER_TRUE@@WITH_LIBZFS_TRUE at am__append_62 = os/fs/ZFS.h
+ at ENABLE_SERVER_TRUE@@WITH_SPDK_TRUE at am__append_63 = \
+ at ENABLE_SERVER_TRUE@@WITH_SPDK_TRUE@	${LIBSPDK_CFLAGS} \
+ at ENABLE_SERVER_TRUE@@WITH_SPDK_TRUE@	${LIBDPDK_CFLAGS} \
+ at ENABLE_SERVER_TRUE@@WITH_SPDK_TRUE@	${LIBPCIACCESS_CFLAGS}
+
+ at ENABLE_SERVER_TRUE@@WITH_SPDK_TRUE at am__append_64 = os/bluestore/NVMEDevice.cc
+ at ENABLE_SERVER_TRUE@@WITH_SPDK_TRUE at am__append_65 = os/bluestore/NVMEDevice.h
+ at ENABLE_SERVER_TRUE@@WITH_LIBAIO_TRUE at am__append_66 = ceph-bluefs-tool
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at am__append_67 = libosd.a
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at am__append_68 = \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	osd/ClassHandler.h \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	osd/HitSet.h \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	osd/OSD.h \
@@ -346,26 +345,26 @@ check_PROGRAMS = $(am__EXEEXT_60) $(am__EXEEXT_61) \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	osd/Watch.h \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	osd/osd_types.h
 
- at LINUX_TRUE@am__append_64 = -export-symbols-regex '.*__erasure_code_.*'
- at LINUX_TRUE@am__append_65 = -export-symbols-regex '.*__erasure_code_.*'
- at HAVE_NEON_TRUE@am__append_66 = libec_jerasure_neon.la
- at LINUX_TRUE@am__append_67 = -export-symbols-regex '.*__erasure_code_.*'
- at HAVE_SSSE3_TRUE@am__append_68 = libec_jerasure_sse3.la
 @LINUX_TRUE at am__append_69 = -export-symbols-regex '.*__erasure_code_.*'
- at HAVE_SSE4_PCLMUL_TRUE@am__append_70 = libec_jerasure_sse4.la
- at LINUX_TRUE@am__append_71 = -export-symbols-regex '.*__erasure_code_.*'
+ at LINUX_TRUE@am__append_70 = -export-symbols-regex '.*__erasure_code_.*'
+ at HAVE_NEON_TRUE@am__append_71 = libec_jerasure_neon.la
 @LINUX_TRUE at am__append_72 = -export-symbols-regex '.*__erasure_code_.*'
- at LINUX_TRUE@am__append_73 = -export-symbols-regex '.*__erasure_code_.*'
+ at HAVE_SSSE3_TRUE@am__append_73 = libec_jerasure_sse3.la
 @LINUX_TRUE at am__append_74 = -export-symbols-regex '.*__erasure_code_.*'
- at HAVE_NEON_TRUE@am__append_75 = libec_shec_neon.la
+ at HAVE_SSE4_PCLMUL_TRUE@am__append_75 = libec_jerasure_sse4.la
 @LINUX_TRUE at am__append_76 = -export-symbols-regex '.*__erasure_code_.*'
- at HAVE_SSSE3_TRUE@am__append_77 = libec_shec_sse3.la
+ at LINUX_TRUE@am__append_77 = -export-symbols-regex '.*__erasure_code_.*'
 @LINUX_TRUE at am__append_78 = -export-symbols-regex '.*__erasure_code_.*'
- at HAVE_SSE4_PCLMUL_TRUE@am__append_79 = libec_shec_sse4.la
- at LINUX_TRUE@am__append_80 = -export-symbols-regex '.*__erasure_code_.*'
+ at LINUX_TRUE@am__append_79 = -export-symbols-regex '.*__erasure_code_.*'
+ at HAVE_NEON_TRUE@am__append_80 = libec_shec_neon.la
+ at LINUX_TRUE@am__append_81 = -export-symbols-regex '.*__erasure_code_.*'
+ at HAVE_SSSE3_TRUE@am__append_82 = libec_shec_sse3.la
+ at LINUX_TRUE@am__append_83 = -export-symbols-regex '.*__erasure_code_.*'
+ at HAVE_SSE4_PCLMUL_TRUE@am__append_84 = libec_shec_sse4.la
+ at LINUX_TRUE@am__append_85 = -export-symbols-regex '.*__erasure_code_.*'
 
 # ISA
- at WITH_BETTER_YASM_ELF64_TRUE@am__append_81 = \
+ at WITH_BETTER_YASM_ELF64_TRUE@am__append_86 = \
 @WITH_BETTER_YASM_ELF64_TRUE@	erasure-code/isa/ErasureCodeIsa.h \
 @WITH_BETTER_YASM_ELF64_TRUE@	erasure-code/isa/ErasureCodeIsaTableCache.h \
 @WITH_BETTER_YASM_ELF64_TRUE@	erasure-code/isa/xor_op.h \
@@ -376,11 +375,13 @@ check_PROGRAMS = $(am__EXEEXT_60) $(am__EXEEXT_61) \
 @WITH_BETTER_YASM_ELF64_TRUE@	erasure-code/isa/isa-l/include/gf_vect_mul.h \
 @WITH_BETTER_YASM_ELF64_TRUE@	erasure-code/isa/isa-l/include/types.h
 
- at LINUX_TRUE@@WITH_BETTER_YASM_ELF64_TRUE at am__append_82 = -export-symbols-regex '.*__erasure_code_.*'
- at WITH_BETTER_YASM_ELF64_TRUE@am__append_83 = libec_isa.la
- at LINUX_TRUE@am__append_84 = -export-symbols-regex '.*__compressor_.*'
- at ENABLE_CLIENT_TRUE@am__append_85 = libclient.la
- at ENABLE_CLIENT_TRUE@am__append_86 = \
+ at WITH_BETTER_YASM_ELF64_TRUE@am__append_87 = libisa.la
+ at LINUX_TRUE@@WITH_BETTER_YASM_ELF64_TRUE at am__append_88 = -export-symbols-regex '.*__erasure_code_.*'
+ at WITH_BETTER_YASM_ELF64_TRUE@am__append_89 = libec_isa.la
+ at LINUX_TRUE@am__append_90 = -export-symbols-regex '.*__compressor_.*'
+ at LINUX_TRUE@am__append_91 = -export-symbols-regex '.*__compressor_.*'
+ at ENABLE_CLIENT_TRUE@am__append_92 = libclient.la
+ at ENABLE_CLIENT_TRUE@am__append_93 = \
 @ENABLE_CLIENT_TRUE@	client/Client.h \
 @ENABLE_CLIENT_TRUE@	client/Dentry.h \
 @ENABLE_CLIENT_TRUE@	client/Dir.h \
@@ -397,57 +398,58 @@ check_PROGRAMS = $(am__EXEEXT_60) $(am__EXEEXT_61) \
 @ENABLE_CLIENT_TRUE@	client/posix_acl.h \
 @ENABLE_CLIENT_TRUE@	client/UserGroups.h
 
- at ENABLE_CLIENT_TRUE@@WITH_FUSE_TRUE at am__append_87 = libclient_fuse.la
- at ENABLE_CLIENT_TRUE@@WITH_FUSE_TRUE at am__append_88 = client/fuse_ll.h
- at ENABLE_CLIENT_TRUE@am__append_89 = ceph_test_ioctls
- at WITH_TCMALLOC_TRUE@am__append_90 = perfglue/heap_profiler.cc
- at WITH_TCMALLOC_TRUE@am__append_91 = -fno-builtin-malloc -fno-builtin-calloc -fno-builtin-realloc -fno-builtin-free
- at WITH_TCMALLOC_TRUE@am__append_92 = -fno-builtin-malloc -fno-builtin-calloc -fno-builtin-realloc -fno-builtin-free
- at WITH_TCMALLOC_FALSE@@WITH_TCMALLOC_MINIMAL_TRUE at am__append_93 = perfglue/heap_profiler.cc
- at WITH_TCMALLOC_FALSE@@WITH_TCMALLOC_MINIMAL_TRUE at am__append_94 = -fno-builtin-malloc -fno-builtin-calloc -fno-builtin-realloc -fno-builtin-free
- at WITH_TCMALLOC_FALSE@@WITH_TCMALLOC_MINIMAL_TRUE at am__append_95 = -fno-builtin-malloc -fno-builtin-calloc -fno-builtin-realloc -fno-builtin-free
- at WITH_TCMALLOC_FALSE@@WITH_TCMALLOC_MINIMAL_FALSE at am__append_96 = perfglue/disabled_heap_profiler.cc
- at WITH_PROFILER_TRUE@am__append_97 = perfglue/cpu_profiler.cc
- at WITH_PROFILER_FALSE@am__append_98 = perfglue/disabled_stubs.cc
- at ENABLE_SERVER_TRUE@am__append_99 = \
+ at ENABLE_CLIENT_TRUE@@WITH_FUSE_TRUE at am__append_94 = libclient_fuse.la
+ at ENABLE_CLIENT_TRUE@@WITH_FUSE_TRUE at am__append_95 = client/fuse_ll.h
+ at ENABLE_CLIENT_TRUE@am__append_96 = ceph_test_ioctls
+ at WITH_LTTNG_TRUE@am__append_97 = -ldl -llttng-ust
+ at WITH_TCMALLOC_TRUE@am__append_98 = perfglue/heap_profiler.cc
+ at WITH_TCMALLOC_TRUE@am__append_99 = -fno-builtin-malloc -fno-builtin-calloc -fno-builtin-realloc -fno-builtin-free
+ at WITH_TCMALLOC_TRUE@am__append_100 = -fno-builtin-malloc -fno-builtin-calloc -fno-builtin-realloc -fno-builtin-free
+ at WITH_TCMALLOC_FALSE@@WITH_TCMALLOC_MINIMAL_TRUE at am__append_101 = perfglue/heap_profiler.cc
+ at WITH_TCMALLOC_FALSE@@WITH_TCMALLOC_MINIMAL_TRUE at am__append_102 = -fno-builtin-malloc -fno-builtin-calloc -fno-builtin-realloc -fno-builtin-free
+ at WITH_TCMALLOC_FALSE@@WITH_TCMALLOC_MINIMAL_TRUE at am__append_103 = -fno-builtin-malloc -fno-builtin-calloc -fno-builtin-realloc -fno-builtin-free
+ at WITH_TCMALLOC_FALSE@@WITH_TCMALLOC_MINIMAL_FALSE at am__append_104 = perfglue/disabled_heap_profiler.cc
+ at WITH_PROFILER_TRUE@am__append_105 = perfglue/cpu_profiler.cc
+ at WITH_PROFILER_FALSE@am__append_106 = perfglue/disabled_stubs.cc
+ at ENABLE_SERVER_TRUE@am__append_107 = \
 @ENABLE_SERVER_TRUE@	common/xattr.c \
 @ENABLE_SERVER_TRUE@	common/ipaddr.cc \
 @ENABLE_SERVER_TRUE@	common/ceph_json.cc \
 @ENABLE_SERVER_TRUE@	common/util.cc \
 @ENABLE_SERVER_TRUE@	common/pick_address.cc
 
- at LINUX_TRUE@am__append_100 = \
- at LINUX_TRUE@	common/linux_version.c 
+ at LINUX_TRUE@am__append_108 = \
+ at LINUX_TRUE@	common/linux_version.c
 
- at SOLARIS_TRUE@am__append_101 = \
+ at SOLARIS_TRUE@am__append_109 = \
 @SOLARIS_TRUE@        common/solaris_errno.cc
 
- at AIX_TRUE@am__append_102 = \
+ at AIX_TRUE@am__append_110 = \
 @AIX_TRUE@        common/aix_errno.cc
 
 
 # used by RBD and FileStore
- at LINUX_TRUE@am__append_103 = \
+ at LINUX_TRUE@am__append_111 = \
 @LINUX_TRUE@	common/blkdev.cc
 
- at ENABLE_XIO_TRUE@am__append_104 = \
+ at ENABLE_XIO_TRUE@am__append_112 = \
 @ENABLE_XIO_TRUE@	common/address_helper.cc
 
- at WITH_GOOD_YASM_ELF64_TRUE@am__append_105 = common/crc32c_intel_fast_asm.S common/crc32c_intel_fast_zero_asm.S
- at HAVE_ARMV8_CRC_TRUE@am__append_106 = libcommon_crc_aarch64.la
- at HAVE_ARMV8_CRC_TRUE@am__append_107 = libcommon_crc_aarch64.la
- at LINUX_TRUE@am__append_108 = -lrt -lblkid
- at ENABLE_XIO_TRUE@am__append_109 = \
+ at WITH_GOOD_YASM_ELF64_TRUE@am__append_113 = common/crc32c_intel_fast_asm.S common/crc32c_intel_fast_zero_asm.S
+ at HAVE_ARMV8_CRC_TRUE@am__append_114 = libcommon_crc_aarch64.la
+ at HAVE_ARMV8_CRC_TRUE@am__append_115 = libcommon_crc_aarch64.la
+ at LINUX_TRUE@am__append_116 = -lrt -lblkid
+ at ENABLE_XIO_TRUE@am__append_117 = \
 @ENABLE_XIO_TRUE@	common/address_helper.h
 
- at LINUX_TRUE@am__append_110 = libsecret.la
- at LINUX_TRUE@am__append_111 = msg/async/EventEpoll.cc
- at DARWIN_TRUE@am__append_112 = msg/async/EventKqueue.cc
- at FREEBSD_TRUE@am__append_113 = msg/async/EventKqueue.cc
- at LINUX_TRUE@am__append_114 = msg/async/EventEpoll.h
- at DARWIN_TRUE@am__append_115 = msg/async/EventKqueue.h
- at FREEBSD_TRUE@am__append_116 = msg/async/EventKqueue.h
- at ENABLE_XIO_TRUE@am__append_117 = \
+ at LINUX_TRUE@am__append_118 = libsecret.la
+ at LINUX_TRUE@am__append_119 = msg/async/EventEpoll.cc
+ at DARWIN_TRUE@am__append_120 = msg/async/EventKqueue.cc
+ at FREEBSD_TRUE@am__append_121 = msg/async/EventKqueue.cc
+ at LINUX_TRUE@am__append_122 = msg/async/EventEpoll.h
+ at DARWIN_TRUE@am__append_123 = msg/async/EventKqueue.h
+ at FREEBSD_TRUE@am__append_124 = msg/async/EventKqueue.h
+ at ENABLE_XIO_TRUE@am__append_125 = \
 @ENABLE_XIO_TRUE@	msg/xio/QueueStrategy.cc \
 @ENABLE_XIO_TRUE@	msg/xio/XioConnection.cc \
 @ENABLE_XIO_TRUE@	msg/xio/XioMessenger.cc \
@@ -455,7 +457,7 @@ check_PROGRAMS = $(am__EXEEXT_60) $(am__EXEEXT_61) \
 @ENABLE_XIO_TRUE@	msg/xio/XioPortal.cc \
 @ENABLE_XIO_TRUE@	msg/xio/XioPool.cc
 
- at ENABLE_XIO_TRUE@am__append_118 = \
+ at ENABLE_XIO_TRUE@am__append_126 = \
 @ENABLE_XIO_TRUE@	msg/xio/DispatchStrategy.h \
 @ENABLE_XIO_TRUE@	msg/xio/FastStrategy.h \
 @ENABLE_XIO_TRUE@	msg/xio/QueueStrategy.h \
@@ -467,18 +469,18 @@ check_PROGRAMS = $(am__EXEEXT_60) $(am__EXEEXT_61) \
 @ENABLE_XIO_TRUE@	msg/xio/XioPortal.h \
 @ENABLE_XIO_TRUE@	msg/xio/XioSubmit.h
 
- at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE at am__append_119 =  \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE at am__append_127 =  \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	librados_internal.la \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	librados_api.la \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	libjournal.la
- at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE at am__append_120 = \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE at am__append_128 = \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	librados_internal.la libcls_lock_client.la \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(LIBOSDC) $(LIBCOMMON_DEPS)
 
- at ENABLE_CLIENT_TRUE@@LINUX_TRUE@@WITH_RADOS_TRUE at am__append_121 = -fvisibility=hidden -fvisibility-inlines-hidden
- at ENABLE_CLIENT_TRUE@@LINUX_TRUE@@WITH_RADOS_TRUE at am__append_122 = -Xcompiler -Xlinker -Xcompiler '--exclude-libs=ALL'
- at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE at am__append_123 = librados.la
- at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE at am__append_124 = \
+ at ENABLE_CLIENT_TRUE@@LINUX_TRUE@@WITH_RADOS_TRUE at am__append_129 = -fvisibility=hidden -fvisibility-inlines-hidden
+ at ENABLE_CLIENT_TRUE@@LINUX_TRUE@@WITH_RADOS_TRUE at am__append_130 = -Xcompiler -Xlinker -Xcompiler '--exclude-libs=ALL'
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE at am__append_131 = librados.la
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE at am__append_132 = \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	librados/snap_set_diff.h \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	librados/AioCompletionImpl.h \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	librados/IoCtxImpl.h \
@@ -487,13 +489,13 @@ check_PROGRAMS = $(am__EXEEXT_60) $(am__EXEEXT_61) \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	librados/RadosXattrIter.h \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	librados/ListObjectImpl.h
 
- at ENABLE_CLIENT_TRUE@@LINUX_TRUE@@WITH_RADOSSTRIPER_TRUE@@WITH_RADOS_TRUE at am__append_125 = -export-symbols-regex '^radosstriper_.*'
- at ENABLE_CLIENT_TRUE@@WITH_RADOSSTRIPER_TRUE@@WITH_RADOS_TRUE at am__append_126 = libradosstriper.la
- at ENABLE_CLIENT_TRUE@@WITH_RADOSSTRIPER_TRUE@@WITH_RADOS_TRUE at am__append_127 = \
+ at ENABLE_CLIENT_TRUE@@LINUX_TRUE@@WITH_RADOSSTRIPER_TRUE@@WITH_RADOS_TRUE at am__append_133 = -export-symbols-regex '^radosstriper_.*'
+ at ENABLE_CLIENT_TRUE@@WITH_RADOSSTRIPER_TRUE@@WITH_RADOS_TRUE at am__append_134 = libradosstriper.la
+ at ENABLE_CLIENT_TRUE@@WITH_RADOSSTRIPER_TRUE@@WITH_RADOS_TRUE at am__append_135 = \
 @ENABLE_CLIENT_TRUE@@WITH_RADOSSTRIPER_TRUE@@WITH_RADOS_TRUE@	libradosstriper/RadosStriperImpl.h \
 @ENABLE_CLIENT_TRUE@@WITH_RADOSSTRIPER_TRUE@@WITH_RADOS_TRUE@	libradosstriper/MultiAioCompletionImpl.h
 
- at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE at am__append_128 = \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE at am__append_136 = \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	journal/AsyncOpTracker.h \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	journal/Entry.h \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	journal/Future.h \
@@ -509,12 +511,12 @@ check_PROGRAMS = $(am__EXEEXT_60) $(am__EXEEXT_61) \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	journal/ReplayHandler.h \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	journal/Utils.h
 
- at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE at am__append_129 = libjournal.la
- at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE at am__append_130 = librbd_internal.la \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE at am__append_137 = libjournal.la
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE at am__append_138 = librbd_internal.la \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	librbd_api.la
- at ENABLE_CLIENT_TRUE@@LINUX_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE at am__append_131 = -Xcompiler -Xlinker -Xcompiler '--exclude-libs=ALL'
- at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE at am__append_132 = librbd.la
- at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE at am__append_133 = \
+ at ENABLE_CLIENT_TRUE@@LINUX_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE at am__append_139 = -Xcompiler -Xlinker -Xcompiler '--exclude-libs=ALL'
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE at am__append_140 = librbd.la
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE at am__append_141 = \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	librbd/AioCompletion.h \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	librbd/AioImageRequest.h \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	librbd/AioImageRequestWQ.h \
@@ -547,7 +549,7 @@ check_PROGRAMS = $(am__EXEEXT_60) $(am__EXEEXT_61) \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	librbd/image/RefreshRequest.h \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	librbd/image/SetSnapRequest.h \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	librbd/journal/Replay.h \
- at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	librbd/journal/Entries.h \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	librbd/journal/Types.h \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	librbd/object_map/InvalidateRequest.h \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	librbd/object_map/LockRequest.h \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	librbd/object_map/Request.h \
@@ -573,7 +575,7 @@ check_PROGRAMS = $(am__EXEEXT_60) $(am__EXEEXT_61) \
 
 
 # inject rgw stuff in the decoder testcase
- at ENABLE_CLIENT_TRUE@am__append_134 = \
+ at ENABLE_CLIENT_TRUE@am__append_142 = \
 @ENABLE_CLIENT_TRUE@	rgw/rgw_dencoder.cc \
 @ENABLE_CLIENT_TRUE@	rgw/rgw_acl.cc \
 @ENABLE_CLIENT_TRUE@	rgw/rgw_basic_types.cc \
@@ -581,9 +583,9 @@ check_PROGRAMS = $(am__EXEEXT_60) $(am__EXEEXT_61) \
 @ENABLE_CLIENT_TRUE@	rgw/rgw_env.cc \
 @ENABLE_CLIENT_TRUE@	rgw/rgw_json_enc.cc
 
- at ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE at am__append_135 = librgw.la \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE at am__append_143 = librgw.la \
 @ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE@	libcivetweb.la
- at ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE at am__append_136 = \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE at am__append_144 = \
 @ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE@	$(LIBRADOS) \
 @ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE@	libcls_rgw_client.la \
 @ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE@	libcls_log_client.a \
@@ -600,12 +602,12 @@ check_PROGRAMS = $(am__EXEEXT_60) $(am__EXEEXT_61) \
 @ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE@	-lfcgi \
 @ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE@	-ldl
 
- at ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE at am__append_137 = radosgw \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE at am__append_145 = radosgw \
 @ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE@	radosgw-admin \
 @ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE@	radosgw-object-expirer
- at ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE at am__append_138 = ceph_rgw_multiparser \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE at am__append_146 = ceph_rgw_multiparser \
 @ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE@	ceph_rgw_jsonparser
- at ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE at am__append_139 = \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE at am__append_147 = \
 @ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE@	rgw/rgw_acl.h \
 @ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE@	rgw/rgw_acl_s3.h \
 @ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE@	rgw/rgw_acl_swift.h \
@@ -660,31 +662,33 @@ check_PROGRAMS = $(am__EXEEXT_60) $(am__EXEEXT_61) \
 @ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE@	rgw/rgw_keystone.h \
 @ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE@	rgw/rgw_civetweb.h \
 @ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE@	rgw/rgw_civetweb_log.h \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE@	rgw/rgw_website.h \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE@	rgw/rgw_rest_s3website.h \
 @ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE@	civetweb/civetweb.h \
 @ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE@	civetweb/include/civetweb.h \
 @ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE@	civetweb/include/civetweb_conf.h \
 @ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE@	civetweb/src/md5.h
 
- at ENABLE_CLIENT_TRUE@am__append_140 = libcls_lock_client.la \
+ at ENABLE_CLIENT_TRUE@am__append_148 = libcls_lock_client.la \
 @ENABLE_CLIENT_TRUE@	libcls_refcount_client.la \
 @ENABLE_CLIENT_TRUE@	libcls_rgw_client.la libcls_rbd_client.la \
 @ENABLE_CLIENT_TRUE@	libcls_cephfs_client.la \
 @ENABLE_CLIENT_TRUE@	libcls_numops_client.la \
 @ENABLE_CLIENT_TRUE@	libcls_journal_client.la
- at ENABLE_CLIENT_TRUE@am__append_141 = libcls_lock_client.la \
+ at ENABLE_CLIENT_TRUE@am__append_149 = libcls_lock_client.la \
 @ENABLE_CLIENT_TRUE@	libcls_refcount_client.la \
 @ENABLE_CLIENT_TRUE@	libcls_replica_log_client.a \
 @ENABLE_CLIENT_TRUE@	libcls_rgw_client.la libcls_rbd_client.la \
 @ENABLE_CLIENT_TRUE@	libcls_user_client.a \
 @ENABLE_CLIENT_TRUE@	libcls_numops_client.la \
 @ENABLE_CLIENT_TRUE@	libcls_journal_client.la
- at ENABLE_CLIENT_TRUE@am__append_142 = libcls_version_client.a \
+ at ENABLE_CLIENT_TRUE@am__append_150 = libcls_version_client.a \
 @ENABLE_CLIENT_TRUE@	libcls_log_client.a \
 @ENABLE_CLIENT_TRUE@	libcls_statelog_client.a \
 @ENABLE_CLIENT_TRUE@	libcls_timeindex_client.a \
 @ENABLE_CLIENT_TRUE@	libcls_replica_log_client.a \
 @ENABLE_CLIENT_TRUE@	libcls_user_client.a
- at ENABLE_CLIENT_TRUE@am__append_143 = \
+ at ENABLE_CLIENT_TRUE@am__append_151 = \
 @ENABLE_CLIENT_TRUE@	cls/lock/cls_lock_types.h \
 @ENABLE_CLIENT_TRUE@	cls/lock/cls_lock_ops.h \
 @ENABLE_CLIENT_TRUE@	cls/lock/cls_lock_client.h \
@@ -720,7 +724,7 @@ check_PROGRAMS = $(am__EXEEXT_60) $(am__EXEEXT_61) \
 @ENABLE_CLIENT_TRUE@	cls/journal/cls_journal_client.h \
 @ENABLE_CLIENT_TRUE@	cls/journal/cls_journal_types.h
 
- at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at am__append_144 = libcls_hello.la \
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at am__append_152 = libcls_hello.la \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	libcls_numops.la \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	libcls_rbd.la \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	libcls_lock.la \
@@ -734,13 +738,13 @@ check_PROGRAMS = $(am__EXEEXT_60) $(am__EXEEXT_61) \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	libcls_rgw.la \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	libcls_cephfs.la \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	libcls_journal.la
- at ENABLE_SERVER_TRUE@@LINUX_TRUE@@WITH_OSD_TRUE at am__append_145 = libcls_kvs.la
- at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at am__append_146 = \
+ at ENABLE_SERVER_TRUE@@LINUX_TRUE@@WITH_OSD_TRUE at am__append_153 = libcls_kvs.la
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at am__append_154 = \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	key_value_store/key_value_structure.h \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	key_value_store/kv_flat_btree_async.h \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	key_value_store/kvs_arg_types.h
 
- at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE at am__append_147 = rbd_replay/ActionTypes.h \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE at am__append_155 = rbd_replay/ActionTypes.h \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	rbd_replay/actions.hpp \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	rbd_replay/BoundedBuffer.hpp \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	rbd_replay/BufferReader.h \
@@ -750,26 +754,27 @@ check_PROGRAMS = $(am__EXEEXT_60) $(am__EXEEXT_61) \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	rbd_replay/rbd_loc.hpp \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	rbd_replay/rbd_replay_debug.hpp \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	rbd_replay/Replayer.hpp
- at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE at am__append_148 = librbd_replay_types.la \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE at am__append_156 = librbd_replay_types.la \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	librbd_replay.la \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	librbd_replay_ios.la
- at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE at am__append_149 = librbd_replay_types.la
- at ENABLE_CLIENT_TRUE@@LINUX_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE at am__append_150 = rbd-replay
- at ENABLE_CLIENT_TRUE@@WITH_BABELTRACE_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE at am__append_151 = rbd-replay-prep
- at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at am__append_152 = \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE at am__append_157 = librbd_replay_types.la
+ at ENABLE_CLIENT_TRUE@@LINUX_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE at am__append_158 = rbd-replay
+ at ENABLE_CLIENT_TRUE@@WITH_BABELTRACE_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE at am__append_159 = rbd-replay-prep
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at am__append_160 = \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	test/erasure-code/test-erasure-code.sh \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	test/erasure-code/test-erasure-eio.sh
 
- at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at am__append_153 = test/erasure-code/ceph_erasure_code_benchmark.h \
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at am__append_161 = test/erasure-code/ceph_erasure_code_benchmark.h \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	test/erasure-code/ceph_erasure_code_benchmark.h \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	test/erasure-code/ErasureCodeExample.h
- at ENABLE_SERVER_TRUE@@LINUX_TRUE@@WITH_OSD_TRUE at am__append_154 = -ldl
- at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at am__append_155 = ceph_erasure_code_benchmark \
+ at ENABLE_SERVER_TRUE@@LINUX_TRUE@@WITH_OSD_TRUE at am__append_162 = -ldl
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at am__append_163 = ceph_erasure_code_benchmark \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	ceph_erasure_code
- at ENABLE_SERVER_TRUE@@LINUX_TRUE@@WITH_OSD_TRUE at am__append_156 = -ldl
- at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at am__append_157 = ceph_erasure_code_non_regression
- at ENABLE_SERVER_TRUE@@LINUX_TRUE@@WITH_OSD_TRUE at am__append_158 = -ldl
- at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at am__append_159 = libec_example.la \
+ at ENABLE_SERVER_TRUE@@LINUX_TRUE@@WITH_OSD_TRUE at am__append_164 = -ldl
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at am__append_165 = ceph_erasure_code_non_regression
+ at ENABLE_SERVER_TRUE@@LINUX_TRUE@@WITH_OSD_TRUE at am__append_166 = -ldl
+ at ENABLE_SERVER_TRUE@@LINUX_TRUE@@WITH_OSD_TRUE at am__append_167 = -export-symbols-regex '.*__erasure_code_.*'
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at am__append_168 = libec_example.la \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	libec_missing_entry_point.la \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	libec_missing_version.la \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	libec_hangs.la \
@@ -783,19 +788,28 @@ check_PROGRAMS = $(am__EXEEXT_60) $(am__EXEEXT_61) \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	libec_test_shec_sse4.la \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	libec_test_shec_sse3.la \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	libec_test_shec_generic.la
- at ENABLE_SERVER_TRUE@@LINUX_TRUE@@WITH_OSD_TRUE at am__append_160 = -ldl
- at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at am__append_161 = unittest_erasure_code_plugin \
+ at ENABLE_SERVER_TRUE@@LINUX_TRUE@@WITH_OSD_TRUE at am__append_169 = -export-symbols-regex '.*__erasure_code_.*'
+ at ENABLE_SERVER_TRUE@@LINUX_TRUE@@WITH_OSD_TRUE at am__append_170 = -export-symbols-regex '.*__erasure_code_.*'
+ at ENABLE_SERVER_TRUE@@LINUX_TRUE@@WITH_OSD_TRUE at am__append_171 = -export-symbols-regex '.*__erasure_code_.*'
+ at ENABLE_SERVER_TRUE@@LINUX_TRUE@@WITH_OSD_TRUE at am__append_172 = -export-symbols-regex '.*__erasure_code_.*'
+ at ENABLE_SERVER_TRUE@@LINUX_TRUE@@WITH_OSD_TRUE at am__append_173 = -export-symbols-regex '.*__erasure_code_.*'
+ at ENABLE_SERVER_TRUE@@LINUX_TRUE@@WITH_OSD_TRUE at am__append_174 = -export-symbols-regex '.*__erasure_code_.*'
+ at ENABLE_SERVER_TRUE@@LINUX_TRUE@@WITH_OSD_TRUE at am__append_175 = -export-symbols-regex '.*__erasure_code_.*'
+ at ENABLE_SERVER_TRUE@@LINUX_TRUE@@WITH_OSD_TRUE at am__append_176 = -export-symbols-regex '.*__erasure_code_.*'
+ at ENABLE_SERVER_TRUE@@LINUX_TRUE@@WITH_OSD_TRUE at am__append_177 = -export-symbols-regex '.*__erasure_code_.*'
+ at ENABLE_SERVER_TRUE@@LINUX_TRUE@@WITH_OSD_TRUE at am__append_178 = -ldl
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at am__append_179 = unittest_erasure_code_plugin \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	unittest_erasure_code \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	unittest_erasure_code_jerasure \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	unittest_erasure_code_plugin_jerasure
- at ENABLE_SERVER_TRUE@@LINUX_TRUE@@WITH_OSD_TRUE at am__append_162 = -ldl
- at ENABLE_SERVER_TRUE@@LINUX_TRUE@@WITH_OSD_TRUE at am__append_163 = -ldl
- at ENABLE_SERVER_TRUE@@LINUX_TRUE@@WITH_BETTER_YASM_ELF64_TRUE@@WITH_OSD_TRUE at am__append_164 = -ldl
- at ENABLE_SERVER_TRUE@@WITH_BETTER_YASM_ELF64_TRUE@@WITH_OSD_TRUE at am__append_165 = unittest_erasure_code_isa \
+ at ENABLE_SERVER_TRUE@@LINUX_TRUE@@WITH_OSD_TRUE at am__append_180 = -ldl
+ at ENABLE_SERVER_TRUE@@LINUX_TRUE@@WITH_OSD_TRUE at am__append_181 = -ldl
+ at ENABLE_SERVER_TRUE@@LINUX_TRUE@@WITH_BETTER_YASM_ELF64_TRUE@@WITH_OSD_TRUE at am__append_182 = -ldl
+ at ENABLE_SERVER_TRUE@@WITH_BETTER_YASM_ELF64_TRUE@@WITH_OSD_TRUE at am__append_183 = unittest_erasure_code_isa \
 @ENABLE_SERVER_TRUE@@WITH_BETTER_YASM_ELF64_TRUE@@WITH_OSD_TRUE@	unittest_erasure_code_plugin_isa
- at ENABLE_SERVER_TRUE@@LINUX_TRUE@@WITH_BETTER_YASM_ELF64_TRUE@@WITH_OSD_TRUE at am__append_166 = -ldl
- at ENABLE_SERVER_TRUE@@LINUX_TRUE@@WITH_OSD_TRUE at am__append_167 = -ldl
- at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at am__append_168 =  \
+ at ENABLE_SERVER_TRUE@@LINUX_TRUE@@WITH_BETTER_YASM_ELF64_TRUE@@WITH_OSD_TRUE at am__append_184 = -ldl
+ at ENABLE_SERVER_TRUE@@LINUX_TRUE@@WITH_OSD_TRUE at am__append_185 = -ldl
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at am__append_186 =  \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	unittest_erasure_code_lrc \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	unittest_erasure_code_plugin_lrc \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	unittest_erasure_code_shec \
@@ -806,52 +820,63 @@ check_PROGRAMS = $(am__EXEEXT_60) $(am__EXEEXT_61) \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	unittest_erasure_code_example \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	unittest_compression_plugin \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	unittest_compression_snappy \
- at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	unittest_compression_plugin_snappy
- at ENABLE_SERVER_TRUE@@LINUX_TRUE@@WITH_OSD_TRUE at am__append_169 = -ldl
- at ENABLE_SERVER_TRUE@@LINUX_TRUE@@WITH_OSD_TRUE at am__append_170 = -ldl
- at ENABLE_SERVER_TRUE@@LINUX_TRUE@@WITH_OSD_TRUE at am__append_171 = -ldl
- at ENABLE_SERVER_TRUE@@LINUX_TRUE@@WITH_OSD_TRUE at am__append_172 = -ldl
- at ENABLE_SERVER_TRUE@@LINUX_TRUE@@WITH_OSD_TRUE at am__append_173 = -ldl
- at ENABLE_SERVER_TRUE@@LINUX_TRUE@@WITH_OSD_TRUE at am__append_174 = -ldl
- at ENABLE_SERVER_TRUE@@ENABLE_XIO_TRUE at am__append_175 = test/messenger/message_helper.h \
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	unittest_compression_plugin_snappy \
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	unittest_compression_zlib \
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	unittest_compression_plugin_zlib
+ at ENABLE_SERVER_TRUE@@LINUX_TRUE@@WITH_OSD_TRUE at am__append_187 = -ldl
+ at ENABLE_SERVER_TRUE@@LINUX_TRUE@@WITH_OSD_TRUE at am__append_188 = -ldl
+ at ENABLE_SERVER_TRUE@@LINUX_TRUE@@WITH_OSD_TRUE at am__append_189 = -ldl
+ at ENABLE_SERVER_TRUE@@LINUX_TRUE@@WITH_OSD_TRUE at am__append_190 = -ldl
+ at ENABLE_SERVER_TRUE@@LINUX_TRUE@@WITH_OSD_TRUE at am__append_191 = -ldl
+ at ENABLE_SERVER_TRUE@@LINUX_TRUE@@WITH_OSD_TRUE at am__append_192 = -ldl
+ at ENABLE_SERVER_TRUE@@LINUX_TRUE@@WITH_OSD_TRUE at am__append_193 = -export-symbols-regex '.*__erasure_code_.*'
+ at ENABLE_SERVER_TRUE@@LINUX_TRUE@@WITH_OSD_TRUE at am__append_194 = -export-symbols-regex '.*__erasure_code_.*'
+ at ENABLE_SERVER_TRUE@@LINUX_TRUE@@WITH_OSD_TRUE at am__append_195 = -export-symbols-regex '.*__erasure_code_.*'
+ at ENABLE_SERVER_TRUE@@LINUX_TRUE@@WITH_OSD_TRUE at am__append_196 = -export-symbols-regex '.*__erasure_code_.*'
+ at ENABLE_SERVER_TRUE@@ENABLE_XIO_TRUE at am__append_197 = test/messenger/message_helper.h \
 @ENABLE_SERVER_TRUE@@ENABLE_XIO_TRUE@	test/messenger/simple_dispatcher.h \
 @ENABLE_SERVER_TRUE@@ENABLE_XIO_TRUE@	test/messenger/xio_dispatcher.h
- at ENABLE_SERVER_TRUE@@ENABLE_XIO_TRUE@@LINUX_TRUE at am__append_176 = -ldl
- at ENABLE_SERVER_TRUE@@ENABLE_XIO_TRUE@@LINUX_TRUE at am__append_177 = -ldl
- at ENABLE_SERVER_TRUE@@ENABLE_XIO_TRUE at am__append_178 = simple_server \
+ at ENABLE_SERVER_TRUE@@ENABLE_XIO_TRUE@@LINUX_TRUE at am__append_198 = -ldl
+ at ENABLE_SERVER_TRUE@@ENABLE_XIO_TRUE@@LINUX_TRUE at am__append_199 = -ldl
+ at ENABLE_SERVER_TRUE@@ENABLE_XIO_TRUE at am__append_200 = simple_server \
 @ENABLE_SERVER_TRUE@@ENABLE_XIO_TRUE@	simple_client xio_server \
 @ENABLE_SERVER_TRUE@@ENABLE_XIO_TRUE@	xio_client
- at ENABLE_SERVER_TRUE@@ENABLE_XIO_TRUE@@LINUX_TRUE at am__append_179 = -ldl
- at ENABLE_SERVER_TRUE@@ENABLE_XIO_TRUE@@LINUX_TRUE at am__append_180 = -ldl
- at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at am__append_181 = test/compressor/compressor_example.h
- at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at am__append_182 = libceph_example.la
- at ENABLE_SERVER_TRUE@@LINUX_TRUE@@WITH_OSD_TRUE at am__append_183 = -ldl
- at ENABLE_SERVER_TRUE@@LINUX_TRUE@@WITH_OSD_TRUE at am__append_184 = -ldl
- at ENABLE_SERVER_TRUE@@LINUX_TRUE@@WITH_OSD_TRUE at am__append_185 = -ldl
- at COMPILER_HAS_VTA_TRUE@@ENABLE_CLIENT_TRUE at am__append_186 = -fno-var-tracking-assignments
- at COMPILER_HAS_VTA_TRUE@@ENABLE_CLIENT_TRUE at am__append_187 = -fno-var-tracking-assignments
- at ENABLE_CLIENT_TRUE@@WITH_RBD_TRUE at am__append_188 = -DWITH_RBD
- at ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE at am__append_189 = -DWITH_RADOSGW
- at ENABLE_CLIENT_TRUE@am__append_190 = ceph-dencoder
- at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE at am__append_191 = libradostest.la \
+ at ENABLE_SERVER_TRUE@@ENABLE_XIO_TRUE@@LINUX_TRUE at am__append_201 = -ldl
+ at ENABLE_SERVER_TRUE@@ENABLE_XIO_TRUE@@LINUX_TRUE at am__append_202 = -ldl
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at am__append_203 = test/compressor/compressor_example.h
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at am__append_204 = libceph_example.la
+ at ENABLE_SERVER_TRUE@@LINUX_TRUE@@WITH_OSD_TRUE at am__append_205 = -ldl
+ at ENABLE_SERVER_TRUE@@LINUX_TRUE@@WITH_OSD_TRUE at am__append_206 = -ldl
+ at ENABLE_SERVER_TRUE@@LINUX_TRUE@@WITH_OSD_TRUE at am__append_207 = -ldl
+ at ENABLE_SERVER_TRUE@@LINUX_TRUE@@WITH_OSD_TRUE at am__append_208 = -ldl
+ at ENABLE_SERVER_TRUE@@LINUX_TRUE@@WITH_OSD_TRUE at am__append_209 = -ldl
+ at COMPILER_HAS_VTA_TRUE@@ENABLE_CLIENT_TRUE at am__append_210 = -fno-var-tracking-assignments
+ at COMPILER_HAS_VTA_TRUE@@ENABLE_CLIENT_TRUE at am__append_211 = -fno-var-tracking-assignments
+ at ENABLE_CLIENT_TRUE@@WITH_RBD_TRUE at am__append_212 = -DWITH_RBD
+ at ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE at am__append_213 = -DWITH_RADOSGW
+ at ENABLE_CLIENT_TRUE@am__append_214 = ceph-dencoder
+ at ENABLE_CLIENT_TRUE@am__append_215 = \
+ at ENABLE_CLIENT_TRUE@	test/encoding/test_ceph_time.h
+
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE at am__append_216 = libradostest.la \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	librados_test_stub.la
- at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE at am__append_192 = ceph_test_rados \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE at am__append_217 = ceph_test_rados \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	ceph_test_mutate
- at ENABLE_CLIENT_TRUE@@WITH_BUILD_TESTS_TRUE@@WITH_RADOS_TRUE at am__append_193 = test_build_librados
- at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE at am__append_194 =  \
+ at ENABLE_CLIENT_TRUE@@WITH_BUILD_TESTS_TRUE@@WITH_RADOS_TRUE at am__append_218 = test_build_librados
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE at am__append_219 =  \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	ceph_smalliobench \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	ceph_omapbench \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	ceph_objectstore_bench
- at ENABLE_CLIENT_TRUE@@LINUX_TRUE@@WITH_RADOS_TRUE at am__append_195 = ceph_kvstorebench \
+ at ENABLE_CLIENT_TRUE@@LINUX_TRUE@@WITH_RADOS_TRUE at am__append_220 = ceph_kvstorebench \
 @ENABLE_CLIENT_TRUE@@LINUX_TRUE@@WITH_RADOS_TRUE@	ceph_test_rados_list_parallel \
 @ENABLE_CLIENT_TRUE@@LINUX_TRUE@@WITH_RADOS_TRUE@	ceph_test_rados_open_pools_parallel \
 @ENABLE_CLIENT_TRUE@@LINUX_TRUE@@WITH_RADOS_TRUE@	ceph_test_rados_delete_pools_parallel \
 @ENABLE_CLIENT_TRUE@@LINUX_TRUE@@WITH_RADOS_TRUE@	ceph_test_rados_watch_notify
- at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE at am__append_196 =  \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE at am__append_221 =  \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	unittest_librados \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	unittest_librados_config \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	unittest_journal
- at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE at am__append_197 =  \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE at am__append_222 =  \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	ceph_multi_stress_watch \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	ceph_test_cls_rbd \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	ceph_test_cls_refcount \
@@ -879,7 +904,7 @@ check_PROGRAMS = $(am__EXEEXT_60) $(am__EXEEXT_61) \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	ceph_test_rados_api_tier \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	ceph_test_rados_api_lock \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	ceph_test_stress_watch
- at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE at am__append_198 = \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE at am__append_223 = \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	test/librados_test_stub/LibradosTestStub.h \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	test/librados_test_stub/MockTestMemIoCtxImpl.h \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	test/librados_test_stub/MockTestMemRadosClient.h \
@@ -890,14 +915,17 @@ check_PROGRAMS = $(am__EXEEXT_60) $(am__EXEEXT_61) \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	test/librados_test_stub/TestMemIoCtxImpl.h \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	test/librados_test_stub/TestIoCtxImpl.h
 
- at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE at am__append_199 = ceph_smalliobenchrbd \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE at am__append_224 = ceph_smalliobenchrbd \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	ceph_test_librbd \
- at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	ceph_test_librbd_api
- at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE at am__append_200 = unittest_rbd_replay
- at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE at am__append_201 = librbd_test.la
- at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE at am__append_202 = unittest_librbd
- at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE at am__append_203 = test/run-rbd-unit-tests.sh
- at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE at am__append_204 = \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	ceph_test_librbd_api \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	ceph_test_rbd_mirror
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE at am__append_225 = unittest_rbd_replay
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE at am__append_226 = librbd_test.la \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	librbd_mirror_test.la
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE at am__append_227 = unittest_librbd \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	unittest_rbd_mirror
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE at am__append_228 = test/run-rbd-unit-tests.sh
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE at am__append_229 = \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	test/librbd/test_fixture.h \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	test/librbd/test_mock_fixture.h \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	test/librbd/test_support.h \
@@ -912,43 +940,43 @@ check_PROGRAMS = $(am__EXEEXT_60) $(am__EXEEXT_61) \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	test/librbd/mock/MockReadahead.h \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	test/librbd/object_map/mock/MockInvalidateRequest.h
 
- at ENABLE_CLIENT_TRUE@@LINUX_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE at am__append_205 = ceph_test_librbd_fsx
- at ENABLE_CLIENT_TRUE@@WITH_RADOSSTRIPER_TRUE@@WITH_RADOS_TRUE at am__append_206 = libradosstripertest.la
- at ENABLE_CLIENT_TRUE@@WITH_RADOSSTRIPER_TRUE@@WITH_RADOS_TRUE at am__append_207 = ceph_test_rados_striper_api_io \
+ at ENABLE_CLIENT_TRUE@@LINUX_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE at am__append_230 = ceph_test_librbd_fsx
+ at ENABLE_CLIENT_TRUE@@WITH_RADOSSTRIPER_TRUE@@WITH_RADOS_TRUE at am__append_231 = libradosstripertest.la
+ at ENABLE_CLIENT_TRUE@@WITH_RADOSSTRIPER_TRUE@@WITH_RADOS_TRUE at am__append_232 = ceph_test_rados_striper_api_io \
 @ENABLE_CLIENT_TRUE@@WITH_RADOSSTRIPER_TRUE@@WITH_RADOS_TRUE@	ceph_test_rados_striper_api_aio \
 @ENABLE_CLIENT_TRUE@@WITH_RADOSSTRIPER_TRUE@@WITH_RADOS_TRUE@	ceph_test_rados_striper_api_striping
- at ENABLE_CLIENT_TRUE@@WITH_BUILD_TESTS_TRUE@@WITH_CEPHFS_TRUE@@WITH_RADOS_TRUE at am__append_208 = test_build_libcephfs
- at ENABLE_CLIENT_TRUE@@WITH_CEPHFS_TRUE@@WITH_RADOS_TRUE at am__append_209 = unittest_encoding \
+ at ENABLE_CLIENT_TRUE@@WITH_BUILD_TESTS_TRUE@@WITH_CEPHFS_TRUE@@WITH_RADOS_TRUE at am__append_233 = test_build_libcephfs
+ at ENABLE_CLIENT_TRUE@@WITH_CEPHFS_TRUE@@WITH_RADOS_TRUE at am__append_234 = unittest_encoding \
 @ENABLE_CLIENT_TRUE@@WITH_CEPHFS_TRUE@@WITH_RADOS_TRUE@	unittest_base64 \
 @ENABLE_CLIENT_TRUE@@WITH_CEPHFS_TRUE@@WITH_RADOS_TRUE@	unittest_run_cmd \
 @ENABLE_CLIENT_TRUE@@WITH_CEPHFS_TRUE@@WITH_RADOS_TRUE@	unittest_simple_spin \
 @ENABLE_CLIENT_TRUE@@WITH_CEPHFS_TRUE@@WITH_RADOS_TRUE@	unittest_libcephfs_config
- at ENABLE_CLIENT_TRUE@@LINUX_TRUE@@WITH_CEPHFS_TRUE@@WITH_RADOS_TRUE at am__append_210 = test/libcephfs/flock.cc
- at ENABLE_CLIENT_TRUE@@WITH_CEPHFS_TRUE@@WITH_RADOS_TRUE at am__append_211 = ceph_test_libcephfs \
+ at ENABLE_CLIENT_TRUE@@LINUX_TRUE@@WITH_CEPHFS_TRUE@@WITH_RADOS_TRUE at am__append_235 = test/libcephfs/flock.cc
+ at ENABLE_CLIENT_TRUE@@WITH_CEPHFS_TRUE@@WITH_RADOS_TRUE at am__append_236 = ceph_test_libcephfs \
 @ENABLE_CLIENT_TRUE@@WITH_CEPHFS_TRUE@@WITH_RADOS_TRUE@	ceph_test_c_headers
- at CLANG_FALSE@@ENABLE_CLIENT_TRUE@@WITH_CEPHFS_TRUE@@WITH_RADOS_TRUE at am__append_212 = -Werror -Wold-style-declaration
- at ENABLE_CLIENT_TRUE@@WITH_BUILD_TESTS_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE at am__append_213 = test_build_librgw
- at ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE at am__append_214 = ceph_test_cors \
+ at CLANG_FALSE@@ENABLE_CLIENT_TRUE@@WITH_CEPHFS_TRUE@@WITH_RADOS_TRUE at am__append_237 = -Werror -Wold-style-declaration
+ at ENABLE_CLIENT_TRUE@@WITH_BUILD_TESTS_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE at am__append_238 = test_build_librgw
+ at ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE at am__append_239 = ceph_test_cors \
 @ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE@	ceph_test_rgw_manifest \
 @ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE@	ceph_test_rgw_obj \
 @ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE@	ceph_test_cls_rgw_meta \
 @ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE@	ceph_test_cls_rgw_log \
 @ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE@	ceph_test_cls_rgw_opstate \
 @ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE@	ceph_test_cls_rgw
- at ENABLE_SERVER_TRUE@am__append_215 = ceph_test_async_driver \
+ at ENABLE_SERVER_TRUE@am__append_240 = ceph_test_async_driver \
 @ENABLE_SERVER_TRUE@	ceph_test_msgr ceph_test_trans \
 @ENABLE_SERVER_TRUE@	ceph_test_mon_workloadgen \
 @ENABLE_SERVER_TRUE@	ceph_test_mon_msg ceph_perf_objectstore \
 @ENABLE_SERVER_TRUE@	ceph_perf_local ceph_perf_msgr_server \
 @ENABLE_SERVER_TRUE@	ceph_perf_msgr_client
- at ENABLE_SERVER_TRUE@am__append_216 = test/perf_helper.h
- at ENABLE_SERVER_TRUE@@LINUX_TRUE at am__append_217 =  \
+ at ENABLE_SERVER_TRUE@am__append_241 = test/perf_helper.h
+ at ENABLE_SERVER_TRUE@@LINUX_TRUE at am__append_242 =  \
 @ENABLE_SERVER_TRUE@@LINUX_TRUE@	ceph_test_objectstore \
 @ENABLE_SERVER_TRUE@@LINUX_TRUE@	ceph_test_keyvaluedb \
 @ENABLE_SERVER_TRUE@@LINUX_TRUE@	ceph_test_filestore
- at ENABLE_SERVER_TRUE@@LINUX_TRUE at am__append_218 = unittest_bluefs \
+ at ENABLE_SERVER_TRUE@@LINUX_TRUE at am__append_243 = unittest_bluefs \
 @ENABLE_SERVER_TRUE@@LINUX_TRUE@	unittest_bluestore_types
- at ENABLE_SERVER_TRUE@am__append_219 =  \
+ at ENABLE_SERVER_TRUE@am__append_244 =  \
 @ENABLE_SERVER_TRUE@	ceph_test_objectstore_workloadgen \
 @ENABLE_SERVER_TRUE@	ceph_test_filestore_idempotent \
 @ENABLE_SERVER_TRUE@	ceph_test_filestore_idempotent_sequence \
@@ -956,65 +984,72 @@ check_PROGRAMS = $(am__EXEEXT_60) $(am__EXEEXT_61) \
 @ENABLE_SERVER_TRUE@	ceph_test_object_map \
 @ENABLE_SERVER_TRUE@	ceph_test_keyvaluedb_atomicity \
 @ENABLE_SERVER_TRUE@	ceph_test_keyvaluedb_iterators
- at ENABLE_CLIENT_TRUE@@ENABLE_SERVER_TRUE@@WITH_RADOS_TRUE at am__append_220 = ceph_smalliobenchfs \
+ at ENABLE_SERVER_TRUE@am__append_245 = unittest_transaction
+ at ENABLE_CLIENT_TRUE@@ENABLE_SERVER_TRUE@@WITH_RADOS_TRUE at am__append_246 = ceph_smalliobenchfs \
 @ENABLE_CLIENT_TRUE@@ENABLE_SERVER_TRUE@@WITH_RADOS_TRUE@	ceph_smalliobenchdumb \
 @ENABLE_CLIENT_TRUE@@ENABLE_SERVER_TRUE@@WITH_RADOS_TRUE@	ceph_tpbench
- at ENABLE_SERVER_TRUE@@WITH_MON_TRUE at am__append_221 = ceph_test_keys
- at ENABLE_SERVER_TRUE@@WITH_MON_TRUE at am__append_222 = get_command_descriptions
- at ENABLE_SERVER_TRUE@@WITH_MON_TRUE at am__append_223 =  \
+ at ENABLE_SERVER_TRUE@@WITH_MON_TRUE at am__append_247 = ceph_test_keys
+ at ENABLE_SERVER_TRUE@@WITH_MON_TRUE at am__append_248 = get_command_descriptions
+ at ENABLE_SERVER_TRUE@@WITH_MON_TRUE at am__append_249 =  \
 @ENABLE_SERVER_TRUE@@WITH_MON_TRUE@	unittest_mon_moncap \
 @ENABLE_SERVER_TRUE@@WITH_MON_TRUE@	unittest_mon_pgmap
- at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at am__append_224 =  \
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at am__append_250 =  \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	unittest_ecbackend \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	unittest_osdscrub \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	unittest_pglog \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	unittest_hitset \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	unittest_osd_osdcap \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	unittest_pageset
- at ENABLE_SERVER_TRUE@@LINUX_TRUE@@WITH_OSD_TRUE at am__append_225 = -ldl
- at ENABLE_SERVER_TRUE@@LINUX_TRUE@@WITH_OSD_TRUE at am__append_226 = -ldl
- at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at am__append_227 = ceph_test_snap_mapper
- at ENABLE_SERVER_TRUE@@WITH_SLIBROCKSDB_TRUE at am__append_228 = unittest_rocksdb_option_static
- at ENABLE_SERVER_TRUE@@WITH_DLIBROCKSDB_TRUE at am__append_229 = unittest_rocksdb_option
- at ENABLE_SERVER_TRUE@am__append_230 = unittest_chain_xattr \
+ at ENABLE_SERVER_TRUE@@LINUX_TRUE@@WITH_OSD_TRUE at am__append_251 = -ldl
+ at ENABLE_SERVER_TRUE@@LINUX_TRUE@@WITH_OSD_TRUE at am__append_252 = -ldl
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at am__append_253 = ceph_test_snap_mapper
+ at ENABLE_SERVER_TRUE@@WITH_SLIBROCKSDB_TRUE at am__append_254 = unittest_rocksdb_option_static
+ at ENABLE_SERVER_TRUE@@WITH_DLIBROCKSDB_TRUE at am__append_255 = unittest_rocksdb_option
+ at ENABLE_SERVER_TRUE@am__append_256 = unittest_chain_xattr \
 @ENABLE_SERVER_TRUE@	unittest_lfnindex
- at ENABLE_SERVER_TRUE@@WITH_MDS_TRUE at am__append_231 = unittest_mds_authcap
- at WITH_BUILD_TESTS_TRUE@am__append_232 = test_build_libcommon
- at LINUX_TRUE@am__append_233 = libsystest.la
- at SOLARIS_TRUE@am__append_234 = \
+ at ENABLE_SERVER_TRUE@@WITH_MDS_TRUE at am__append_257 = unittest_mds_authcap
+ at WITH_BUILD_TESTS_TRUE@am__append_258 = test_build_libcommon
+ at LINUX_TRUE@am__append_259 = libsystest.la
+ at SOLARIS_TRUE@am__append_260 = \
 @SOLARIS_TRUE@	-lsocket -lnsl
 
- at LINUX_TRUE@am__append_235 = unittest_blkdev
- at LINUX_TRUE@am__append_236 = ceph_test_get_blkdev_size
- at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE at am__append_237 =  \
+ at LINUX_TRUE@am__append_261 = unittest_blkdev
+ at LINUX_TRUE@am__append_262 = ceph_test_get_blkdev_size
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE at am__append_263 =  \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	ceph_scratchtool \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	ceph_scratchtoolpp \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	ceph_radosacl
- at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE at am__append_238 = rados
- at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE at am__append_239 = \
- at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	tools/rbd/ArgumentTypes.h \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE at am__append_264 = rados
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE at am__append_265 = tools/rbd/ArgumentTypes.h \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	tools/rbd/IndentStream.h \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	tools/rbd/OptionPrinter.h \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	tools/rbd/Shell.h \
- at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	tools/rbd/Utils.h
-
- at ENABLE_CLIENT_TRUE@@LINUX_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE at am__append_240 = rbd \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	tools/rbd/Utils.h \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	tools/rbd_mirror/ClusterWatcher.h \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	tools/rbd_mirror/ImageReplayer.h \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	tools/rbd_mirror/Mirror.h \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	tools/rbd_mirror/PoolWatcher.h \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	tools/rbd_mirror/Replayer.h \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	tools/rbd_mirror/types.h
+ at ENABLE_CLIENT_TRUE@@LINUX_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE at am__append_266 = rbd \
 @ENABLE_CLIENT_TRUE@@LINUX_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	rbd-nbd
- at ENABLE_CLIENT_TRUE@@WITH_CEPHFS_TRUE@@WITH_RADOS_TRUE at am__append_241 = ceph-client-debug
- at ENABLE_SERVER_TRUE@am__append_242 = ceph-osdomap-tool \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE at am__append_267 = librbd_mirror_internal.la
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE at am__append_268 = rbd-mirror
+ at ENABLE_CLIENT_TRUE@@WITH_CEPHFS_TRUE@@WITH_RADOS_TRUE at am__append_269 = ceph-client-debug
+ at ENABLE_SERVER_TRUE@am__append_270 = ceph-osdomap-tool \
 @ENABLE_SERVER_TRUE@	ceph-monstore-tool ceph-kvstore-tool
- at ENABLE_SERVER_TRUE@@LINUX_TRUE@@WITH_OSD_TRUE at am__append_243 = -ldl
- at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at am__append_244 = ceph-objectstore-tool
- at ENABLE_CLIENT_TRUE@@ENABLE_SERVER_TRUE@@WITH_MDS_TRUE@@WITH_RADOS_TRUE at am__append_245 = cephfs-journal-tool \
+ at ENABLE_SERVER_TRUE@@LINUX_TRUE@@WITH_OSD_TRUE at am__append_271 = -ldl
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at am__append_272 = ceph-objectstore-tool
+ at ENABLE_CLIENT_TRUE@@ENABLE_SERVER_TRUE@@WITH_MDS_TRUE@@WITH_RADOS_TRUE at am__append_273 = cephfs-journal-tool \
 @ENABLE_CLIENT_TRUE@@ENABLE_SERVER_TRUE@@WITH_MDS_TRUE@@WITH_RADOS_TRUE@	cephfs-table-tool \
 @ENABLE_CLIENT_TRUE@@ENABLE_SERVER_TRUE@@WITH_MDS_TRUE@@WITH_RADOS_TRUE@	cephfs-data-scan
- at WITH_LTTNG_TRUE@am__append_246 = \
+ at WITH_LTTNG_TRUE@am__append_274 = \
 @WITH_LTTNG_TRUE@	libosd_tp.la \
 @WITH_LTTNG_TRUE@	libos_tp.la \
 @WITH_LTTNG_TRUE@	librados_tp.la \
 @WITH_LTTNG_TRUE@	librbd_tp.la
 
- at WITH_LTTNG_TRUE@am__append_247 = \
+ at WITH_LTTNG_TRUE@am__append_275 = \
 @WITH_LTTNG_TRUE@	tracing/librados.h \
 @WITH_LTTNG_TRUE@	tracing/librbd.h \
 @WITH_LTTNG_TRUE@	tracing/objectstore.h \
@@ -1022,57 +1057,56 @@ check_PROGRAMS = $(am__EXEEXT_60) $(am__EXEEXT_61) \
 @WITH_LTTNG_TRUE@	tracing/osd.h \
 @WITH_LTTNG_TRUE@	tracing/pg.h
 
- at ENABLE_CLIENT_TRUE@@WITH_CYTHON_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE at am__append_248 = pybind-all
- at ENABLE_CLIENT_TRUE@@WITH_CYTHON_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE at am__append_249 = pybind-clean
- at ENABLE_CLIENT_TRUE@@WITH_CYTHON_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE at am__append_250 = pybind-install-exec
-TESTS = $(am__EXEEXT_60) $(check_SCRIPTS)
- at ENABLE_CLIENT_TRUE@am__append_251 = \
+ at ENABLE_CLIENT_TRUE@@WITH_CYTHON_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE at am__append_276 = pybind-all
+ at ENABLE_CLIENT_TRUE@@WITH_CYTHON_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE at am__append_277 = pybind-clean
+ at ENABLE_CLIENT_TRUE@@WITH_CYTHON_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE at am__append_278 = pybind-install-exec
+TESTS = $(am__EXEEXT_62) $(check_SCRIPTS)
+ at ENABLE_CLIENT_TRUE@am__append_279 = \
 @ENABLE_CLIENT_TRUE@	pybind/ceph_argparse.py \
 @ENABLE_CLIENT_TRUE@	pybind/ceph_daemon.py
 
- at ENABLE_CLIENT_TRUE@am__append_252 = ceph-syn
- at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE at am__append_253 = \
+ at ENABLE_CLIENT_TRUE@am__append_280 = ceph-syn
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE at am__append_281 = \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(srcdir)/bash_completion/rados \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(srcdir)/bash_completion/radosgw-admin
 
- at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE at am__append_254 = pybind/rados.py
- at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE at am__append_255 = librados-config
- at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE at am__append_256 = \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE at am__append_282 = pybind/rados.py
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE at am__append_283 = librados-config
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE at am__append_284 = \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	$(srcdir)/bash_completion/rbd
 
- at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE at am__append_257 = \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE at am__append_285 = \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	ceph-rbdnamer \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	rbd-replay-many \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@        rbdmap
 
- at ENABLE_CLIENT_TRUE@@LINUX_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE at am__append_258 = libkrbd.la
- at ENABLE_CLIENT_TRUE@@WITH_FUSE_TRUE@@WITH_RADOS_TRUE at am__append_259 = ceph-fuse
- at ENABLE_CLIENT_TRUE@@WITH_FUSE_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE at am__append_260 = rbd-fuse
- at ENABLE_CLIENT_TRUE@@WITH_CEPHFS_TRUE@@WITH_RADOS_TRUE at am__append_261 = cephfs
- at ENABLE_CLIENT_TRUE@@WITH_CEPHFS_TRUE@@WITH_RADOS_TRUE at am__append_262 = pybind/cephfs.py \
+ at ENABLE_CLIENT_TRUE@@LINUX_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE at am__append_286 = libkrbd.la
+ at ENABLE_CLIENT_TRUE@@WITH_FUSE_TRUE@@WITH_RADOS_TRUE at am__append_287 = ceph-fuse
+ at ENABLE_CLIENT_TRUE@@WITH_FUSE_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE at am__append_288 = rbd-fuse
+ at ENABLE_CLIENT_TRUE@@WITH_CEPHFS_TRUE@@WITH_RADOS_TRUE at am__append_289 = cephfs
+ at ENABLE_CLIENT_TRUE@@WITH_CEPHFS_TRUE@@WITH_RADOS_TRUE at am__append_290 = pybind/cephfs.py \
 @ENABLE_CLIENT_TRUE@@WITH_CEPHFS_TRUE@@WITH_RADOS_TRUE@	pybind/ceph_volume_client.py
- at ENABLE_CLIENT_TRUE@@LINUX_TRUE@@WITH_CEPHFS_TRUE@@WITH_RADOS_TRUE at am__append_263 = -Xcompiler -Xlinker -Xcompiler '--exclude-libs=libcommon.a'
- at ENABLE_CLIENT_TRUE@@WITH_CEPHFS_TRUE@@WITH_RADOS_TRUE at am__append_264 = libcephfs.la
- at ENABLE_CEPHFS_JAVA_TRUE@@ENABLE_CLIENT_TRUE@@WITH_CEPHFS_TRUE@@WITH_RADOS_TRUE at am__append_265 = libcephfs_jni.la
- at ENABLE_SERVER_TRUE@am__append_266 = ceph-run ceph-rest-api \
+ at ENABLE_CLIENT_TRUE@@LINUX_TRUE@@WITH_CEPHFS_TRUE@@WITH_RADOS_TRUE at am__append_291 = -Xcompiler -Xlinker -Xcompiler '--exclude-libs=libcommon.a'
+ at ENABLE_CLIENT_TRUE@@WITH_CEPHFS_TRUE@@WITH_RADOS_TRUE at am__append_292 = libcephfs.la
+ at ENABLE_CEPHFS_JAVA_TRUE@@ENABLE_CLIENT_TRUE@@WITH_CEPHFS_TRUE@@WITH_RADOS_TRUE at am__append_293 = libcephfs_jni.la
+ at ENABLE_SERVER_TRUE@am__append_294 = ceph-run ceph-rest-api \
 @ENABLE_SERVER_TRUE@	ceph-debugpack ceph-crush-location \
 @ENABLE_SERVER_TRUE@	ceph-coverage
- at ENABLE_SERVER_TRUE@am__append_267 = pybind/ceph_rest_api.py
- at ENABLE_SERVER_TRUE@am__append_268 = ceph-coverage init-ceph
- at ENABLE_SERVER_TRUE@am__append_269 = init-ceph
- at ENABLE_SERVER_TRUE@@LINUX_TRUE at am__append_270 = mount.ceph
- at ENABLE_SERVER_TRUE@am__append_271 = mount.fuse.ceph
- at ENABLE_SERVER_TRUE@@WITH_MON_TRUE at am__append_272 = ceph-mon
- at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at am__append_273 = \
- at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	ceph-disk \
+ at ENABLE_SERVER_TRUE@am__append_295 = pybind/ceph_rest_api.py
+ at ENABLE_SERVER_TRUE@am__append_296 = ceph-coverage init-ceph
+ at ENABLE_SERVER_TRUE@am__append_297 = init-ceph
+ at ENABLE_SERVER_TRUE@@LINUX_TRUE at am__append_298 = mount.ceph
+ at ENABLE_SERVER_TRUE@am__append_299 = mount.fuse.ceph
+ at ENABLE_SERVER_TRUE@@WITH_MON_TRUE at am__append_300 = ceph-mon
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at am__append_301 = \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	ceph-disk-udev
 
- at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at am__append_274 = \
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at am__append_302 = \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	ceph-clsinfo
 
- at ENABLE_SERVER_TRUE@@WITH_LTTNG_TRUE@@WITH_OSD_TRUE at am__append_275 = $(LIBOSD_TP)
- at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at am__append_276 = ceph-osd
- at ENABLE_SERVER_TRUE@@WITH_MDS_TRUE at am__append_277 = ceph-mds
+ at ENABLE_SERVER_TRUE@@WITH_LTTNG_TRUE@@WITH_OSD_TRUE at am__append_303 = $(LIBOSD_TP)
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at am__append_304 = ceph-osd
+ at ENABLE_SERVER_TRUE@@WITH_MDS_TRUE at am__append_305 = ceph-mds
 subdir = src
 ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
 am__aclocal_m4_deps = $(top_srcdir)/m4/ac_check_classpath.m4 \
@@ -1160,7 +1194,7 @@ libkv_a_AR = $(AR) $(ARFLAGS)
 am__DEPENDENCIES_1 =
 @ENABLE_SERVER_TRUE@@WITH_KINETIC_TRUE at am__DEPENDENCIES_2 =  \
 @ENABLE_SERVER_TRUE@@WITH_KINETIC_TRUE@	libcrypto.a
- at ENABLE_SERVER_TRUE@libkv_a_DEPENDENCIES = $(am__append_35) \
+ at ENABLE_SERVER_TRUE@libkv_a_DEPENDENCIES = $(am__append_36) \
 @ENABLE_SERVER_TRUE@	$(am__DEPENDENCIES_1) \
 @ENABLE_SERVER_TRUE@	$(am__DEPENDENCIES_2)
 am__libkv_a_SOURCES_DIST = kv/KeyValueDB.cc kv/LevelDBStore.cc \
@@ -1205,30 +1239,31 @@ am__libos_a_SOURCES_DIST = os/filestore/chain_xattr.cc \
 	os/filestore/GenericFileStoreBackend.cc \
 	os/filestore/HashIndex.cc os/filestore/IndexManager.cc \
 	os/filestore/JournalingObjectStore.cc os/filestore/LFNIndex.cc \
-	os/filestore/WBThrottle.cc os/fs/FS.cc \
-	os/keyvaluestore/GenericObjectMap.cc \
-	os/keyvaluestore/KeyValueStore.cc os/kstore/kv.cc \
+	os/filestore/WBThrottle.cc os/fs/FS.cc os/kstore/kv.cc \
 	os/kstore/KStore.cc os/memstore/MemStore.cc os/ObjectStore.cc \
-	os/bluestore/kv.cc os/bluestore/Allocator.cc \
+	os/FuseStore.cc os/bluestore/kv.cc os/bluestore/Allocator.cc \
 	os/bluestore/BlockDevice.cc os/bluestore/BlueFS.cc \
 	os/bluestore/BlueRocksEnv.cc os/bluestore/BlueStore.cc \
-	os/bluestore/FreelistManager.cc \
+	os/bluestore/FreelistManager.cc os/bluestore/KernelDevice.cc \
 	os/bluestore/StupidAllocator.cc \
 	os/filestore/BtrfsFileStoreBackend.cc \
 	os/filestore/XfsFileStoreBackend.cc os/fs/XFS.cc \
-	os/filestore/ZFSFileStoreBackend.cc
- at ENABLE_SERVER_TRUE@@WITH_LIBAIO_TRUE at am__objects_4 = os/bluestore/libos_a-kv.$(OBJEXT) \
+	os/filestore/ZFSFileStoreBackend.cc os/bluestore/NVMEDevice.cc
+ at ENABLE_SERVER_TRUE@@WITH_FUSE_TRUE at am__objects_4 = os/libos_a-FuseStore.$(OBJEXT)
+ at ENABLE_SERVER_TRUE@@WITH_LIBAIO_TRUE at am__objects_5 = os/bluestore/libos_a-kv.$(OBJEXT) \
 @ENABLE_SERVER_TRUE@@WITH_LIBAIO_TRUE@	os/bluestore/libos_a-Allocator.$(OBJEXT) \
 @ENABLE_SERVER_TRUE@@WITH_LIBAIO_TRUE@	os/bluestore/libos_a-BlockDevice.$(OBJEXT) \
 @ENABLE_SERVER_TRUE@@WITH_LIBAIO_TRUE@	os/bluestore/libos_a-BlueFS.$(OBJEXT) \
 @ENABLE_SERVER_TRUE@@WITH_LIBAIO_TRUE@	os/bluestore/libos_a-BlueRocksEnv.$(OBJEXT) \
 @ENABLE_SERVER_TRUE@@WITH_LIBAIO_TRUE@	os/bluestore/libos_a-BlueStore.$(OBJEXT) \
 @ENABLE_SERVER_TRUE@@WITH_LIBAIO_TRUE@	os/bluestore/libos_a-FreelistManager.$(OBJEXT) \
+ at ENABLE_SERVER_TRUE@@WITH_LIBAIO_TRUE@	os/bluestore/libos_a-KernelDevice.$(OBJEXT) \
 @ENABLE_SERVER_TRUE@@WITH_LIBAIO_TRUE@	os/bluestore/libos_a-StupidAllocator.$(OBJEXT)
- at ENABLE_SERVER_TRUE@@LINUX_TRUE at am__objects_5 = os/filestore/libos_a-BtrfsFileStoreBackend.$(OBJEXT)
- at ENABLE_SERVER_TRUE@@WITH_LIBXFS_TRUE at am__objects_6 = os/filestore/libos_a-XfsFileStoreBackend.$(OBJEXT) \
+ at ENABLE_SERVER_TRUE@@LINUX_TRUE at am__objects_6 = os/filestore/libos_a-BtrfsFileStoreBackend.$(OBJEXT)
+ at ENABLE_SERVER_TRUE@@WITH_LIBXFS_TRUE at am__objects_7 = os/filestore/libos_a-XfsFileStoreBackend.$(OBJEXT) \
 @ENABLE_SERVER_TRUE@@WITH_LIBXFS_TRUE@	os/fs/libos_a-XFS.$(OBJEXT)
- at ENABLE_SERVER_TRUE@@WITH_LIBZFS_TRUE at am__objects_7 = os/filestore/libos_a-ZFSFileStoreBackend.$(OBJEXT)
+ at ENABLE_SERVER_TRUE@@WITH_LIBZFS_TRUE at am__objects_8 = os/filestore/libos_a-ZFSFileStoreBackend.$(OBJEXT)
+ at ENABLE_SERVER_TRUE@@WITH_SPDK_TRUE at am__objects_9 = os/bluestore/libos_a-NVMEDevice.$(OBJEXT)
 @ENABLE_SERVER_TRUE at am_libos_a_OBJECTS = os/filestore/libos_a-chain_xattr.$(OBJEXT) \
 @ENABLE_SERVER_TRUE@	os/filestore/libos_a-DBObjectMap.$(OBJEXT) \
 @ENABLE_SERVER_TRUE@	os/filestore/libos_a-FileJournal.$(OBJEXT) \
@@ -1240,25 +1275,24 @@ am__libos_a_SOURCES_DIST = os/filestore/chain_xattr.cc \
 @ENABLE_SERVER_TRUE@	os/filestore/libos_a-LFNIndex.$(OBJEXT) \
 @ENABLE_SERVER_TRUE@	os/filestore/libos_a-WBThrottle.$(OBJEXT) \
 @ENABLE_SERVER_TRUE@	os/fs/libos_a-FS.$(OBJEXT) \
- at ENABLE_SERVER_TRUE@	os/keyvaluestore/libos_a-GenericObjectMap.$(OBJEXT) \
- at ENABLE_SERVER_TRUE@	os/keyvaluestore/libos_a-KeyValueStore.$(OBJEXT) \
 @ENABLE_SERVER_TRUE@	os/kstore/libos_a-kv.$(OBJEXT) \
 @ENABLE_SERVER_TRUE@	os/kstore/libos_a-KStore.$(OBJEXT) \
 @ENABLE_SERVER_TRUE@	os/memstore/libos_a-MemStore.$(OBJEXT) \
 @ENABLE_SERVER_TRUE@	os/libos_a-ObjectStore.$(OBJEXT) \
 @ENABLE_SERVER_TRUE@	$(am__objects_4) $(am__objects_5) \
- at ENABLE_SERVER_TRUE@	$(am__objects_6) $(am__objects_7)
+ at ENABLE_SERVER_TRUE@	$(am__objects_6) $(am__objects_7) \
+ at ENABLE_SERVER_TRUE@	$(am__objects_8) $(am__objects_9)
 libos_a_OBJECTS = $(am_libos_a_OBJECTS)
 libos_types_a_AR = $(AR) $(ARFLAGS)
 libos_types_a_LIBADD =
 am__libos_types_a_SOURCES_DIST = os/kstore/kstore_types.cc \
 	os/Transaction.cc os/bluestore/bluestore_types.cc \
 	os/bluestore/bluefs_types.cc
- at WITH_LIBAIO_TRUE@am__objects_8 = os/bluestore/libos_types_a-bluestore_types.$(OBJEXT) \
+ at WITH_LIBAIO_TRUE@am__objects_10 = os/bluestore/libos_types_a-bluestore_types.$(OBJEXT) \
 @WITH_LIBAIO_TRUE@	os/bluestore/libos_types_a-bluefs_types.$(OBJEXT)
 am_libos_types_a_OBJECTS =  \
 	os/kstore/libos_types_a-kstore_types.$(OBJEXT) \
-	os/libos_types_a-Transaction.$(OBJEXT) $(am__objects_8)
+	os/libos_types_a-Transaction.$(OBJEXT) $(am__objects_10)
 libos_types_a_OBJECTS = $(am_libos_types_a_OBJECTS)
 libos_zfs_a_AR = $(AR) $(ARFLAGS)
 libos_zfs_a_LIBADD =
@@ -1323,10 +1357,9 @@ am__installdirs = "$(DESTDIR)$(compressorlibdir)" \
 	"$(DESTDIR)$(bindir)" "$(DESTDIR)$(ceph_libexecdir)" \
 	"$(DESTDIR)$(ceph_monstore_update_crushdir)" \
 	"$(DESTDIR)$(ceph_sbindir)" "$(DESTDIR)$(bindir)" \
-	"$(DESTDIR)$(sbindir)" "$(DESTDIR)$(shell_commondir)" \
-	"$(DESTDIR)$(su_sbindir)" "$(DESTDIR)$(pythondir)" \
-	"$(DESTDIR)$(bash_completiondir)" "$(DESTDIR)$(docdir)" \
-	"$(DESTDIR)$(libcephfs_includedir)" \
+	"$(DESTDIR)$(sbindir)" "$(DESTDIR)$(su_sbindir)" \
+	"$(DESTDIR)$(pythondir)" "$(DESTDIR)$(bash_completiondir)" \
+	"$(DESTDIR)$(docdir)" "$(DESTDIR)$(libcephfs_includedir)" \
 	"$(DESTDIR)$(librbd_includedir)" \
 	"$(DESTDIR)$(rados_includedir)" \
 	"$(DESTDIR)$(radosstriper_includedir)"
@@ -1371,16 +1404,28 @@ libceph_example_la_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	-rpath $(compressorlibdir)
 libceph_snappy_la_DEPENDENCIES = $(LIBCRUSH) $(am__DEPENDENCIES_1) \
 	$(am__DEPENDENCIES_3)
-am__objects_9 = compressor/libceph_snappy_la-Compressor.lo \
+am__objects_11 = compressor/libceph_snappy_la-Compressor.lo \
 	compressor/snappy/libceph_snappy_la-CompressionPluginSnappy.lo
-am_libceph_snappy_la_OBJECTS = $(am__objects_9)
+am_libceph_snappy_la_OBJECTS = $(am__objects_11)
 libceph_snappy_la_OBJECTS = $(am_libceph_snappy_la_OBJECTS)
 libceph_snappy_la_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 	$(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
 	$(libceph_snappy_la_CXXFLAGS) $(CXXFLAGS) \
 	$(libceph_snappy_la_LDFLAGS) $(LDFLAGS) -o $@
+libceph_zlib_la_DEPENDENCIES = $(LIBCRUSH) $(am__DEPENDENCIES_1) \
+	$(am__DEPENDENCIES_3)
+am__objects_12 = compressor/libceph_zlib_la-Compressor.lo \
+	compressor/zlib/libceph_zlib_la-CompressionPluginZlib.lo \
+	compressor/zlib/libceph_zlib_la-CompressionZlib.lo
+am_libceph_zlib_la_OBJECTS = $(am__objects_12)
+libceph_zlib_la_OBJECTS = $(am_libceph_zlib_la_OBJECTS)
+libceph_zlib_la_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
+	$(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
+	$(libceph_zlib_la_CXXFLAGS) $(CXXFLAGS) \
+	$(libceph_zlib_la_LDFLAGS) $(LDFLAGS) -o $@
+am__DEPENDENCIES_4 = libcommon.la
 @ENABLE_CLIENT_TRUE@@WITH_CEPHFS_TRUE@@WITH_RADOS_TRUE at libcephfs_la_DEPENDENCIES = $(LIBCLIENT) \
- at ENABLE_CLIENT_TRUE@@WITH_CEPHFS_TRUE@@WITH_RADOS_TRUE@	$(LIBCOMMON) \
+ at ENABLE_CLIENT_TRUE@@WITH_CEPHFS_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_4) \
 @ENABLE_CLIENT_TRUE@@WITH_CEPHFS_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_1) \
 @ENABLE_CLIENT_TRUE@@WITH_CEPHFS_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_1) \
 @ENABLE_CLIENT_TRUE@@WITH_CEPHFS_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_3)
@@ -1394,7 +1439,7 @@ libcephfs_la_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX $(AM_LIBTOOLFLAGS) \
 @ENABLE_CLIENT_TRUE@@WITH_CEPHFS_TRUE@@WITH_RADOS_TRUE@	-rpath \
 @ENABLE_CLIENT_TRUE@@WITH_CEPHFS_TRUE@@WITH_RADOS_TRUE@	$(libdir)
 @ENABLE_CEPHFS_JAVA_TRUE@@ENABLE_CLIENT_TRUE@@WITH_CEPHFS_TRUE@@WITH_RADOS_TRUE at libcephfs_jni_la_DEPENDENCIES = $(LIBCEPHFS) \
- at ENABLE_CEPHFS_JAVA_TRUE@@ENABLE_CLIENT_TRUE@@WITH_CEPHFS_TRUE@@WITH_RADOS_TRUE@	$(LIBCOMMON) \
+ at ENABLE_CEPHFS_JAVA_TRUE@@ENABLE_CLIENT_TRUE@@WITH_CEPHFS_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_4) \
 @ENABLE_CEPHFS_JAVA_TRUE@@ENABLE_CLIENT_TRUE@@WITH_CEPHFS_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_3)
 am__libcephfs_jni_la_SOURCES_DIST = java/native/libcephfs_jni.cc \
 	java/native/ScopedLocalRef.h java/native/JniConstants.cpp \
@@ -1420,8 +1465,7 @@ libcivetweb_la_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 	$(libcivetweb_la_CXXFLAGS) $(CXXFLAGS) $(AM_LDFLAGS) \
 	$(LDFLAGS) -o $@
 @ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE at am_libcivetweb_la_rpath =
- at ENABLE_CLIENT_TRUE@libclient_la_DEPENDENCIES = $(LIBOSDC) \
- at ENABLE_CLIENT_TRUE@	$(am__DEPENDENCIES_1)
+ at ENABLE_CLIENT_TRUE@libclient_la_DEPENDENCIES = $(LIBOSDC)
 am__libclient_la_SOURCES_DIST = client/Client.cc client/Inode.cc \
 	client/Dentry.cc client/MetaRequest.cc \
 	client/ClientSnapRealm.cc client/MetaSession.cc \
@@ -1710,11 +1754,11 @@ libcls_version_la_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 	$(LDFLAGS) -o $@
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at am_libcls_version_la_rpath =  \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	-rpath $(radoslibdir)
-am__DEPENDENCIES_4 = libcommon_internal.la libcommon_crc.la \
-	$(am__append_106) $(LIBERASURE_CODE) $(LIBCOMPRESSOR) \
+am__DEPENDENCIES_5 = libcommon_internal.la libcommon_crc.la \
+	$(am__append_114) $(LIBERASURE_CODE) $(LIBCOMPRESSOR) \
 	$(LIBMSG) $(LIBAUTH) $(LIBCRUSH) $(LIBJSON_SPIRIT) $(LIBLOG) \
 	$(LIBARCH) $(am__DEPENDENCIES_1) $(am__DEPENDENCIES_1)
-libcommon_la_DEPENDENCIES = $(am__DEPENDENCIES_4)
+libcommon_la_DEPENDENCIES = $(am__DEPENDENCIES_5)
 am_libcommon_la_OBJECTS = common/buffer.lo
 libcommon_la_OBJECTS = $(am_libcommon_la_OBJECTS)
 libcommon_crc_la_LIBADD =
@@ -1722,12 +1766,12 @@ am__libcommon_crc_la_SOURCES_DIST = common/sctp_crc32.c \
 	common/crc32c.cc common/crc32c_intel_baseline.c \
 	common/crc32c_intel_fast.c common/crc32c_intel_fast_asm.S \
 	common/crc32c_intel_fast_zero_asm.S
- at WITH_GOOD_YASM_ELF64_TRUE@am__objects_10 = common/libcommon_crc_la-crc32c_intel_fast_asm.lo \
+ at WITH_GOOD_YASM_ELF64_TRUE@am__objects_13 = common/libcommon_crc_la-crc32c_intel_fast_asm.lo \
 @WITH_GOOD_YASM_ELF64_TRUE@	common/libcommon_crc_la-crc32c_intel_fast_zero_asm.lo
 am_libcommon_crc_la_OBJECTS = common/libcommon_crc_la-sctp_crc32.lo \
 	common/libcommon_crc_la-crc32c.lo \
 	common/libcommon_crc_la-crc32c_intel_baseline.lo \
-	common/libcommon_crc_la-crc32c_intel_fast.lo $(am__objects_10)
+	common/libcommon_crc_la-crc32c_intel_fast.lo $(am__objects_13)
 libcommon_crc_la_OBJECTS = $(am_libcommon_crc_la_OBJECTS)
 libcommon_crc_la_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 	$(libcommon_crc_la_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link \
@@ -1746,8 +1790,9 @@ libcommon_crc_aarch64_la_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC \
 libcommon_internal_la_LIBADD =
 am__libcommon_internal_la_SOURCES_DIST = ceph_ver.c \
 	common/DecayCounter.cc common/LogClient.cc common/LogEntry.cc \
-	common/PrebufferedStreambuf.cc common/SloppyCRCMap.cc \
-	common/BackTrace.cc common/perf_counters.cc common/Mutex.cc \
+	common/Graylog.cc common/PrebufferedStreambuf.cc \
+	common/SloppyCRCMap.cc common/BackTrace.cc \
+	common/perf_counters.cc common/mutex_debug.cc common/Mutex.cc \
 	common/OutputDataSocket.cc common/admin_socket.cc \
 	common/admin_socket_client.cc common/cmdparse.cc \
 	common/escape.c common/io_priority.cc common/ceph_time.cc \
@@ -1761,8 +1806,9 @@ am__libcommon_internal_la_SOURCES_DIST = ceph_ver.c \
 	common/ceph_context.cc common/types.cc \
 	common/code_environment.cc common/dout.cc common/histogram.cc \
 	common/signal.cc common/simple_spin.cc common/Thread.cc \
-	common/Formatter.cc common/HeartbeatMap.cc common/config.cc \
-	common/utf8.c common/mime.c common/strtol.cc common/page.cc \
+	common/Formatter.cc common/HTMLFormatter.cc \
+	common/HeartbeatMap.cc common/config.cc common/utf8.c \
+	common/mime.c common/strtol.cc common/page.cc \
 	common/lockdep.cc common/version.cc common/hex.cc \
 	common/entity_name.cc common/ceph_crypto.cc \
 	common/ceph_crypto_cms.cc common/TextTable.cc \
@@ -1778,18 +1824,19 @@ am__libcommon_internal_la_SOURCES_DIST = ceph_ver.c \
 	mon/MonMap.cc osd/OSDMap.cc osd/osd_types.cc osd/ECMsgTypes.cc \
 	osd/HitSet.cc mds/MDSMap.cc mds/inode_backtrace.cc \
 	mds/mdstypes.cc mds/flock.cc
- at ENABLE_SERVER_TRUE@am__objects_11 = common/xattr.lo common/ipaddr.lo \
+ at ENABLE_SERVER_TRUE@am__objects_14 = common/xattr.lo common/ipaddr.lo \
 @ENABLE_SERVER_TRUE@	common/ceph_json.lo common/util.lo \
 @ENABLE_SERVER_TRUE@	common/pick_address.lo
- at LINUX_TRUE@am__objects_12 = common/linux_version.lo
- at SOLARIS_TRUE@am__objects_13 = common/solaris_errno.lo
- at AIX_TRUE@am__objects_14 = common/aix_errno.lo
- at LINUX_TRUE@am__objects_15 = common/blkdev.lo
- at ENABLE_XIO_TRUE@am__objects_16 = common/address_helper.lo
+ at LINUX_TRUE@am__objects_15 = common/linux_version.lo
+ at SOLARIS_TRUE@am__objects_16 = common/solaris_errno.lo
+ at AIX_TRUE@am__objects_17 = common/aix_errno.lo
+ at LINUX_TRUE@am__objects_18 = common/blkdev.lo
+ at ENABLE_XIO_TRUE@am__objects_19 = common/address_helper.lo
 am_libcommon_internal_la_OBJECTS = ceph_ver.lo common/DecayCounter.lo \
-	common/LogClient.lo common/LogEntry.lo \
+	common/LogClient.lo common/LogEntry.lo common/Graylog.lo \
 	common/PrebufferedStreambuf.lo common/SloppyCRCMap.lo \
-	common/BackTrace.lo common/perf_counters.lo common/Mutex.lo \
+	common/BackTrace.lo common/perf_counters.lo \
+	common/mutex_debug.lo common/Mutex.lo \
 	common/OutputDataSocket.lo common/admin_socket.lo \
 	common/admin_socket_client.lo common/cmdparse.lo \
 	common/escape.lo common/io_priority.lo common/ceph_time.lo \
@@ -1803,8 +1850,9 @@ am_libcommon_internal_la_OBJECTS = ceph_ver.lo common/DecayCounter.lo \
 	common/ceph_context.lo common/types.lo \
 	common/code_environment.lo common/dout.lo common/histogram.lo \
 	common/signal.lo common/simple_spin.lo common/Thread.lo \
-	common/Formatter.lo common/HeartbeatMap.lo common/config.lo \
-	common/utf8.lo common/mime.lo common/strtol.lo common/page.lo \
+	common/Formatter.lo common/HTMLFormatter.lo \
+	common/HeartbeatMap.lo common/config.lo common/utf8.lo \
+	common/mime.lo common/strtol.lo common/page.lo \
 	common/lockdep.lo common/version.lo common/hex.lo \
 	common/entity_name.lo common/ceph_crypto.lo \
 	common/ceph_crypto_cms.lo common/TextTable.lo \
@@ -1813,8 +1861,8 @@ am_libcommon_internal_la_OBJECTS = ceph_ver.lo common/DecayCounter.lo \
 	common/bloom_filter.lo common/module.lo common/Readahead.lo \
 	common/Cycles.lo common/ContextCompletion.lo \
 	common/TracepointProvider.lo common/PluginRegistry.lo \
-	$(am__objects_11) $(am__objects_12) $(am__objects_13) \
 	$(am__objects_14) $(am__objects_15) $(am__objects_16) \
+	$(am__objects_17) $(am__objects_18) $(am__objects_19) \
 	mon/MonCap.lo mon/MonClient.lo mon/MonMap.lo osd/OSDMap.lo \
 	osd/osd_types.lo osd/ECMsgTypes.lo osd/HitSet.lo mds/MDSMap.lo \
 	mds/inode_backtrace.lo mds/mdstypes.lo mds/flock.lo
@@ -1885,107 +1933,13 @@ libec_hangs_la_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 	$(libec_hangs_la_LDFLAGS) $(LDFLAGS) -o $@
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at am_libec_hangs_la_rpath = -rpath \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(erasure_codelibdir)
- at WITH_BETTER_YASM_ELF64_TRUE@libec_isa_la_DEPENDENCIES = $(LIBCRUSH) \
- at WITH_BETTER_YASM_ELF64_TRUE@	$(am__DEPENDENCIES_1) \
+ at WITH_BETTER_YASM_ELF64_TRUE@libec_isa_la_DEPENDENCIES = libisa.la \
+ at WITH_BETTER_YASM_ELF64_TRUE@	$(LIBCRUSH) $(am__DEPENDENCIES_1) \
 @WITH_BETTER_YASM_ELF64_TRUE@	$(am__DEPENDENCIES_3)
-am__libec_isa_la_SOURCES_DIST = erasure-code/ErasureCode.cc \
-	erasure-code/isa/isa-l/erasure_code/ec_base.c \
-	erasure-code/isa/isa-l/erasure_code/ec_highlevel_func.c \
-	erasure-code/isa/isa-l/erasure_code/ec_multibinary.asm.s \
-	erasure-code/isa/isa-l/erasure_code/gf_2vect_dot_prod_avx2.asm.s \
-	erasure-code/isa/isa-l/erasure_code/gf_2vect_dot_prod_avx.asm.s \
-	erasure-code/isa/isa-l/erasure_code/gf_2vect_dot_prod_sse.asm.s \
-	erasure-code/isa/isa-l/erasure_code/gf_3vect_dot_prod_avx2.asm.s \
-	erasure-code/isa/isa-l/erasure_code/gf_3vect_dot_prod_avx.asm.s \
-	erasure-code/isa/isa-l/erasure_code/gf_3vect_dot_prod_sse.asm.s \
-	erasure-code/isa/isa-l/erasure_code/gf_4vect_dot_prod_avx2.asm.s \
-	erasure-code/isa/isa-l/erasure_code/gf_4vect_dot_prod_avx.asm.s \
-	erasure-code/isa/isa-l/erasure_code/gf_4vect_dot_prod_sse.asm.s \
-	erasure-code/isa/isa-l/erasure_code/gf_5vect_dot_prod_avx2.asm.s \
-	erasure-code/isa/isa-l/erasure_code/gf_5vect_dot_prod_avx.asm.s \
-	erasure-code/isa/isa-l/erasure_code/gf_5vect_dot_prod_sse.asm.s \
-	erasure-code/isa/isa-l/erasure_code/gf_6vect_dot_prod_avx2.asm.s \
-	erasure-code/isa/isa-l/erasure_code/gf_6vect_dot_prod_avx.asm.s \
-	erasure-code/isa/isa-l/erasure_code/gf_6vect_dot_prod_sse.asm.s \
-	erasure-code/isa/isa-l/erasure_code/gf_vect_dot_prod_avx2.asm.s \
-	erasure-code/isa/isa-l/erasure_code/gf_vect_dot_prod_avx.asm.s \
-	erasure-code/isa/isa-l/erasure_code/gf_vect_dot_prod_sse.asm.s \
-	erasure-code/isa/isa-l/erasure_code/gf_2vect_mad_avx2.asm.s \
-	erasure-code/isa/isa-l/erasure_code/gf_2vect_mad_avx.asm.s \
-	erasure-code/isa/isa-l/erasure_code/gf_2vect_mad_sse.asm.s \
-	erasure-code/isa/isa-l/erasure_code/gf_3vect_mad_avx2.asm.s \
-	erasure-code/isa/isa-l/erasure_code/gf_3vect_mad_avx.asm.s \
-	erasure-code/isa/isa-l/erasure_code/gf_3vect_mad_sse.asm.s \
-	erasure-code/isa/isa-l/erasure_code/gf_4vect_mad_avx2.asm.s \
-	erasure-code/isa/isa-l/erasure_code/gf_4vect_mad_avx.asm.s \
-	erasure-code/isa/isa-l/erasure_code/gf_4vect_mad_sse.asm.s \
-	erasure-code/isa/isa-l/erasure_code/gf_5vect_mad_avx2.asm.s \
-	erasure-code/isa/isa-l/erasure_code/gf_5vect_mad_avx.asm.s \
-	erasure-code/isa/isa-l/erasure_code/gf_5vect_mad_sse.asm.s \
-	erasure-code/isa/isa-l/erasure_code/gf_6vect_mad_avx2.asm.s \
-	erasure-code/isa/isa-l/erasure_code/gf_6vect_mad_avx.asm.s \
-	erasure-code/isa/isa-l/erasure_code/gf_6vect_mad_sse.asm.s \
-	erasure-code/isa/isa-l/erasure_code/gf_vect_mad_avx2.asm.s \
-	erasure-code/isa/isa-l/erasure_code/gf_vect_mad_avx.asm.s \
-	erasure-code/isa/isa-l/erasure_code/gf_vect_mad_sse.asm.s \
-	erasure-code/isa/isa-l/erasure_code/gf_vect_mul_avx.asm.s \
-	erasure-code/isa/isa-l/erasure_code/gf_vect_mul_sse.asm.s \
-	erasure-code/isa/ErasureCodeIsa.cc \
-	erasure-code/isa/ErasureCodeIsaTableCache.cc \
-	erasure-code/isa/ErasureCodePluginIsa.cc \
-	erasure-code/isa/xor_op.cc
- at WITH_BETTER_YASM_ELF64_TRUE@am__objects_17 = erasure-code/libec_isa_la-ErasureCode.lo \
- at WITH_BETTER_YASM_ELF64_TRUE@	erasure-code/isa/isa-l/erasure_code/libec_isa_la-ec_base.lo \
- at WITH_BETTER_YASM_ELF64_TRUE@	erasure-code/isa/isa-l/erasure_code/libec_isa_la-ec_highlevel_func.lo \
- at WITH_BETTER_YASM_ELF64_TRUE@	erasure-code/isa/isa-l/erasure_code/libec_isa_la-ec_multibinary.asm.lo \
- at WITH_BETTER_YASM_ELF64_TRUE@	erasure-code/isa/isa-l/erasure_code/libec_isa_la-gf_2vect_dot_prod_avx2.asm.lo \
- at WITH_BETTER_YASM_ELF64_TRUE@	erasure-code/isa/isa-l/erasure_code/libec_isa_la-gf_2vect_dot_prod_avx.asm.lo \
- at WITH_BETTER_YASM_ELF64_TRUE@	erasure-code/isa/isa-l/erasure_code/libec_isa_la-gf_2vect_dot_prod_sse.asm.lo \
- at WITH_BETTER_YASM_ELF64_TRUE@	erasure-code/isa/isa-l/erasure_code/libec_isa_la-gf_3vect_dot_prod_avx2.asm.lo \
- at WITH_BETTER_YASM_ELF64_TRUE@	erasure-code/isa/isa-l/erasure_code/libec_isa_la-gf_3vect_dot_prod_avx.asm.lo \
- at WITH_BETTER_YASM_ELF64_TRUE@	erasure-code/isa/isa-l/erasure_code/libec_isa_la-gf_3vect_dot_prod_sse.asm.lo \
- at WITH_BETTER_YASM_ELF64_TRUE@	erasure-code/isa/isa-l/erasure_code/libec_isa_la-gf_4vect_dot_prod_avx2.asm.lo \
- at WITH_BETTER_YASM_ELF64_TRUE@	erasure-code/isa/isa-l/erasure_code/libec_isa_la-gf_4vect_dot_prod_avx.asm.lo \
- at WITH_BETTER_YASM_ELF64_TRUE@	erasure-code/isa/isa-l/erasure_code/libec_isa_la-gf_4vect_dot_prod_sse.asm.lo \
- at WITH_BETTER_YASM_ELF64_TRUE@	erasure-code/isa/isa-l/erasure_code/libec_isa_la-gf_5vect_dot_prod_avx2.asm.lo \
- at WITH_BETTER_YASM_ELF64_TRUE@	erasure-code/isa/isa-l/erasure_code/libec_isa_la-gf_5vect_dot_prod_avx.asm.lo \
- at WITH_BETTER_YASM_ELF64_TRUE@	erasure-code/isa/isa-l/erasure_code/libec_isa_la-gf_5vect_dot_prod_sse.asm.lo \
- at WITH_BETTER_YASM_ELF64_TRUE@	erasure-code/isa/isa-l/erasure_code/libec_isa_la-gf_6vect_dot_prod_avx2.asm.lo \
- at WITH_BETTER_YASM_ELF64_TRUE@	erasure-code/isa/isa-l/erasure_code/libec_isa_la-gf_6vect_dot_prod_avx.asm.lo \
- at WITH_BETTER_YASM_ELF64_TRUE@	erasure-code/isa/isa-l/erasure_code/libec_isa_la-gf_6vect_dot_prod_sse.asm.lo \
- at WITH_BETTER_YASM_ELF64_TRUE@	erasure-code/isa/isa-l/erasure_code/libec_isa_la-gf_vect_dot_prod_avx2.asm.lo \
- at WITH_BETTER_YASM_ELF64_TRUE@	erasure-code/isa/isa-l/erasure_code/libec_isa_la-gf_vect_dot_prod_avx.asm.lo \
- at WITH_BETTER_YASM_ELF64_TRUE@	erasure-code/isa/isa-l/erasure_code/libec_isa_la-gf_vect_dot_prod_sse.asm.lo \
- at WITH_BETTER_YASM_ELF64_TRUE@	erasure-code/isa/isa-l/erasure_code/libec_isa_la-gf_2vect_mad_avx2.asm.lo \
- at WITH_BETTER_YASM_ELF64_TRUE@	erasure-code/isa/isa-l/erasure_code/libec_isa_la-gf_2vect_mad_avx.asm.lo \
- at WITH_BETTER_YASM_ELF64_TRUE@	erasure-code/isa/isa-l/erasure_code/libec_isa_la-gf_2vect_mad_sse.asm.lo \
- at WITH_BETTER_YASM_ELF64_TRUE@	erasure-code/isa/isa-l/erasure_code/libec_isa_la-gf_3vect_mad_avx2.asm.lo \
- at WITH_BETTER_YASM_ELF64_TRUE@	erasure-code/isa/isa-l/erasure_code/libec_isa_la-gf_3vect_mad_avx.asm.lo \
- at WITH_BETTER_YASM_ELF64_TRUE@	erasure-code/isa/isa-l/erasure_code/libec_isa_la-gf_3vect_mad_sse.asm.lo \
- at WITH_BETTER_YASM_ELF64_TRUE@	erasure-code/isa/isa-l/erasure_code/libec_isa_la-gf_4vect_mad_avx2.asm.lo \
- at WITH_BETTER_YASM_ELF64_TRUE@	erasure-code/isa/isa-l/erasure_code/libec_isa_la-gf_4vect_mad_avx.asm.lo \
- at WITH_BETTER_YASM_ELF64_TRUE@	erasure-code/isa/isa-l/erasure_code/libec_isa_la-gf_4vect_mad_sse.asm.lo \
- at WITH_BETTER_YASM_ELF64_TRUE@	erasure-code/isa/isa-l/erasure_code/libec_isa_la-gf_5vect_mad_avx2.asm.lo \
- at WITH_BETTER_YASM_ELF64_TRUE@	erasure-code/isa/isa-l/erasure_code/libec_isa_la-gf_5vect_mad_avx.asm.lo \
- at WITH_BETTER_YASM_ELF64_TRUE@	erasure-code/isa/isa-l/erasure_code/libec_isa_la-gf_5vect_mad_sse.asm.lo \
- at WITH_BETTER_YASM_ELF64_TRUE@	erasure-code/isa/isa-l/erasure_code/libec_isa_la-gf_6vect_mad_avx2.asm.lo \
- at WITH_BETTER_YASM_ELF64_TRUE@	erasure-code/isa/isa-l/erasure_code/libec_isa_la-gf_6vect_mad_avx.asm.lo \
- at WITH_BETTER_YASM_ELF64_TRUE@	erasure-code/isa/isa-l/erasure_code/libec_isa_la-gf_6vect_mad_sse.asm.lo \
- at WITH_BETTER_YASM_ELF64_TRUE@	erasure-code/isa/isa-l/erasure_code/libec_isa_la-gf_vect_mad_avx2.asm.lo \
- at WITH_BETTER_YASM_ELF64_TRUE@	erasure-code/isa/isa-l/erasure_code/libec_isa_la-gf_vect_mad_avx.asm.lo \
- at WITH_BETTER_YASM_ELF64_TRUE@	erasure-code/isa/isa-l/erasure_code/libec_isa_la-gf_vect_mad_sse.asm.lo \
- at WITH_BETTER_YASM_ELF64_TRUE@	erasure-code/isa/isa-l/erasure_code/libec_isa_la-gf_vect_mul_avx.asm.lo \
- at WITH_BETTER_YASM_ELF64_TRUE@	erasure-code/isa/isa-l/erasure_code/libec_isa_la-gf_vect_mul_sse.asm.lo \
- at WITH_BETTER_YASM_ELF64_TRUE@	erasure-code/isa/libec_isa_la-ErasureCodeIsa.lo \
- at WITH_BETTER_YASM_ELF64_TRUE@	erasure-code/isa/libec_isa_la-ErasureCodeIsaTableCache.lo \
- at WITH_BETTER_YASM_ELF64_TRUE@	erasure-code/isa/libec_isa_la-ErasureCodePluginIsa.lo \
- at WITH_BETTER_YASM_ELF64_TRUE@	erasure-code/isa/libec_isa_la-xor_op.lo
- at WITH_BETTER_YASM_ELF64_TRUE@am_libec_isa_la_OBJECTS =  \
- at WITH_BETTER_YASM_ELF64_TRUE@	$(am__objects_17)
+am_libec_isa_la_OBJECTS =
 libec_isa_la_OBJECTS = $(am_libec_isa_la_OBJECTS)
-libec_isa_la_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
-	$(libec_isa_la_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link \
-	$(CXXLD) $(libec_isa_la_CXXFLAGS) $(CXXFLAGS) \
+libec_isa_la_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \
+	$(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \
 	$(libec_isa_la_LDFLAGS) $(LDFLAGS) -o $@
 @WITH_BETTER_YASM_ELF64_TRUE at am_libec_isa_la_rpath = -rpath \
 @WITH_BETTER_YASM_ELF64_TRUE@	$(erasure_codelibdir)
@@ -1999,7 +1953,7 @@ libec_jerasure_la_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 	$(libec_jerasure_la_LDFLAGS) $(LDFLAGS) -o $@
 libec_jerasure_generic_la_DEPENDENCIES = $(LIBCRUSH) \
 	$(am__DEPENDENCIES_1) $(am__DEPENDENCIES_3)
-am__objects_18 =  \
+am__objects_20 =  \
 	erasure-code/libec_jerasure_generic_la-ErasureCode.lo \
 	erasure-code/jerasure/jerasure/src/libec_jerasure_generic_la-cauchy.lo \
 	erasure-code/jerasure/jerasure/src/libec_jerasure_generic_la-galois.lo \
@@ -2019,7 +1973,7 @@ am__objects_18 =  \
 	erasure-code/jerasure/gf-complete/src/libec_jerasure_generic_la-gf_w8.lo \
 	erasure-code/jerasure/libec_jerasure_generic_la-ErasureCodePluginJerasure.lo \
 	erasure-code/jerasure/libec_jerasure_generic_la-ErasureCodeJerasure.lo
-am_libec_jerasure_generic_la_OBJECTS = $(am__objects_18)
+am_libec_jerasure_generic_la_OBJECTS = $(am__objects_20)
 libec_jerasure_generic_la_OBJECTS =  \
 	$(am_libec_jerasure_generic_la_OBJECTS)
 libec_jerasure_generic_la_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
@@ -2028,7 +1982,7 @@ libec_jerasure_generic_la_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 	$(libec_jerasure_generic_la_LDFLAGS) $(LDFLAGS) -o $@
 libec_jerasure_neon_la_DEPENDENCIES = $(LIBCRUSH) \
 	$(am__DEPENDENCIES_1) $(am__DEPENDENCIES_3)
-am__objects_19 = erasure-code/libec_jerasure_neon_la-ErasureCode.lo \
+am__objects_21 = erasure-code/libec_jerasure_neon_la-ErasureCode.lo \
 	erasure-code/jerasure/jerasure/src/libec_jerasure_neon_la-cauchy.lo \
 	erasure-code/jerasure/jerasure/src/libec_jerasure_neon_la-galois.lo \
 	erasure-code/jerasure/jerasure/src/libec_jerasure_neon_la-jerasure.lo \
@@ -2047,7 +2001,7 @@ am__objects_19 = erasure-code/libec_jerasure_neon_la-ErasureCode.lo \
 	erasure-code/jerasure/gf-complete/src/libec_jerasure_neon_la-gf_w8.lo \
 	erasure-code/jerasure/libec_jerasure_neon_la-ErasureCodePluginJerasure.lo \
 	erasure-code/jerasure/libec_jerasure_neon_la-ErasureCodeJerasure.lo
-am_libec_jerasure_neon_la_OBJECTS = $(am__objects_19) \
+am_libec_jerasure_neon_la_OBJECTS = $(am__objects_21) \
 	erasure-code/jerasure/gf-complete/src/neon/libec_jerasure_neon_la-gf_w4_neon.lo \
 	erasure-code/jerasure/gf-complete/src/neon/libec_jerasure_neon_la-gf_w8_neon.lo \
 	erasure-code/jerasure/gf-complete/src/neon/libec_jerasure_neon_la-gf_w16_neon.lo \
@@ -2062,7 +2016,7 @@ libec_jerasure_neon_la_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 @HAVE_NEON_TRUE@	$(erasure_codelibdir)
 libec_jerasure_sse3_la_DEPENDENCIES = $(LIBCRUSH) \
 	$(am__DEPENDENCIES_1) $(am__DEPENDENCIES_3)
-am__objects_20 = erasure-code/libec_jerasure_sse3_la-ErasureCode.lo \
+am__objects_22 = erasure-code/libec_jerasure_sse3_la-ErasureCode.lo \
 	erasure-code/jerasure/jerasure/src/libec_jerasure_sse3_la-cauchy.lo \
 	erasure-code/jerasure/jerasure/src/libec_jerasure_sse3_la-galois.lo \
 	erasure-code/jerasure/jerasure/src/libec_jerasure_sse3_la-jerasure.lo \
@@ -2081,7 +2035,7 @@ am__objects_20 = erasure-code/libec_jerasure_sse3_la-ErasureCode.lo \
 	erasure-code/jerasure/gf-complete/src/libec_jerasure_sse3_la-gf_w8.lo \
 	erasure-code/jerasure/libec_jerasure_sse3_la-ErasureCodePluginJerasure.lo \
 	erasure-code/jerasure/libec_jerasure_sse3_la-ErasureCodeJerasure.lo
-am_libec_jerasure_sse3_la_OBJECTS = $(am__objects_20)
+am_libec_jerasure_sse3_la_OBJECTS = $(am__objects_22)
 libec_jerasure_sse3_la_OBJECTS = $(am_libec_jerasure_sse3_la_OBJECTS)
 libec_jerasure_sse3_la_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 	$(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
@@ -2091,7 +2045,7 @@ libec_jerasure_sse3_la_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 @HAVE_SSSE3_TRUE@	$(erasure_codelibdir)
 libec_jerasure_sse4_la_DEPENDENCIES = $(LIBCRUSH) \
 	$(am__DEPENDENCIES_1) $(am__DEPENDENCIES_3)
-am__objects_21 = erasure-code/libec_jerasure_sse4_la-ErasureCode.lo \
+am__objects_23 = erasure-code/libec_jerasure_sse4_la-ErasureCode.lo \
 	erasure-code/jerasure/jerasure/src/libec_jerasure_sse4_la-cauchy.lo \
 	erasure-code/jerasure/jerasure/src/libec_jerasure_sse4_la-galois.lo \
 	erasure-code/jerasure/jerasure/src/libec_jerasure_sse4_la-jerasure.lo \
@@ -2110,7 +2064,7 @@ am__objects_21 = erasure-code/libec_jerasure_sse4_la-ErasureCode.lo \
 	erasure-code/jerasure/gf-complete/src/libec_jerasure_sse4_la-gf_w8.lo \
 	erasure-code/jerasure/libec_jerasure_sse4_la-ErasureCodePluginJerasure.lo \
 	erasure-code/jerasure/libec_jerasure_sse4_la-ErasureCodeJerasure.lo
-am_libec_jerasure_sse4_la_OBJECTS = $(am__objects_21)
+am_libec_jerasure_sse4_la_OBJECTS = $(am__objects_23)
 libec_jerasure_sse4_la_OBJECTS = $(am_libec_jerasure_sse4_la_OBJECTS)
 libec_jerasure_sse4_la_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 	$(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
@@ -2120,10 +2074,10 @@ libec_jerasure_sse4_la_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 @HAVE_SSE4_PCLMUL_TRUE@	$(erasure_codelibdir)
 libec_lrc_la_DEPENDENCIES = $(LIBCRUSH) $(am__DEPENDENCIES_1) \
 	$(LIBJSON_SPIRIT)
-am__objects_22 = erasure-code/libec_lrc_la-ErasureCode.lo \
+am__objects_24 = erasure-code/libec_lrc_la-ErasureCode.lo \
 	erasure-code/lrc/libec_lrc_la-ErasureCodePluginLrc.lo \
 	erasure-code/lrc/libec_lrc_la-ErasureCodeLrc.lo
-am_libec_lrc_la_OBJECTS = $(am__objects_22) \
+am_libec_lrc_la_OBJECTS = $(am__objects_24) \
 	common/libec_lrc_la-str_map.lo
 libec_lrc_la_OBJECTS = $(am_libec_lrc_la_OBJECTS)
 libec_lrc_la_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX $(AM_LIBTOOLFLAGS) \
@@ -2169,7 +2123,7 @@ libec_shec_la_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 	$(LDFLAGS) -o $@
 libec_shec_generic_la_DEPENDENCIES = $(LIBCRUSH) $(am__DEPENDENCIES_1) \
 	$(am__DEPENDENCIES_3)
-am__objects_23 = erasure-code/libec_shec_generic_la-ErasureCode.lo \
+am__objects_25 = erasure-code/libec_shec_generic_la-ErasureCode.lo \
 	erasure-code/shec/libec_shec_generic_la-ErasureCodePluginShec.lo \
 	erasure-code/shec/libec_shec_generic_la-ErasureCodeShec.lo \
 	erasure-code/shec/libec_shec_generic_la-ErasureCodeShecTableCache.lo \
@@ -2190,7 +2144,7 @@ am__objects_23 = erasure-code/libec_shec_generic_la-ErasureCode.lo \
 	erasure-code/jerasure/gf-complete/src/libec_shec_generic_la-gf_w4.lo \
 	erasure-code/jerasure/gf-complete/src/libec_shec_generic_la-gf_rand.lo \
 	erasure-code/jerasure/gf-complete/src/libec_shec_generic_la-gf_w8.lo
-am_libec_shec_generic_la_OBJECTS = $(am__objects_23)
+am_libec_shec_generic_la_OBJECTS = $(am__objects_25)
 libec_shec_generic_la_OBJECTS = $(am_libec_shec_generic_la_OBJECTS)
 libec_shec_generic_la_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 	$(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
@@ -2198,7 +2152,7 @@ libec_shec_generic_la_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 	$(libec_shec_generic_la_LDFLAGS) $(LDFLAGS) -o $@
 libec_shec_neon_la_DEPENDENCIES = $(LIBCRUSH) $(am__DEPENDENCIES_1) \
 	$(am__DEPENDENCIES_3)
-am__objects_24 = erasure-code/libec_shec_neon_la-ErasureCode.lo \
+am__objects_26 = erasure-code/libec_shec_neon_la-ErasureCode.lo \
 	erasure-code/shec/libec_shec_neon_la-ErasureCodePluginShec.lo \
 	erasure-code/shec/libec_shec_neon_la-ErasureCodeShec.lo \
 	erasure-code/shec/libec_shec_neon_la-ErasureCodeShecTableCache.lo \
@@ -2219,7 +2173,7 @@ am__objects_24 = erasure-code/libec_shec_neon_la-ErasureCode.lo \
 	erasure-code/jerasure/gf-complete/src/libec_shec_neon_la-gf_w4.lo \
 	erasure-code/jerasure/gf-complete/src/libec_shec_neon_la-gf_rand.lo \
 	erasure-code/jerasure/gf-complete/src/libec_shec_neon_la-gf_w8.lo
-am_libec_shec_neon_la_OBJECTS = $(am__objects_24) \
+am_libec_shec_neon_la_OBJECTS = $(am__objects_26) \
 	erasure-code/jerasure/gf-complete/src/neon/libec_shec_neon_la-gf_w4_neon.lo \
 	erasure-code/jerasure/gf-complete/src/neon/libec_shec_neon_la-gf_w8_neon.lo \
 	erasure-code/jerasure/gf-complete/src/neon/libec_shec_neon_la-gf_w16_neon.lo \
@@ -2234,7 +2188,7 @@ libec_shec_neon_la_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 @HAVE_NEON_TRUE@	$(erasure_codelibdir)
 libec_shec_sse3_la_DEPENDENCIES = $(LIBCRUSH) $(am__DEPENDENCIES_1) \
 	$(am__DEPENDENCIES_3)
-am__objects_25 = erasure-code/libec_shec_sse3_la-ErasureCode.lo \
+am__objects_27 = erasure-code/libec_shec_sse3_la-ErasureCode.lo \
 	erasure-code/shec/libec_shec_sse3_la-ErasureCodePluginShec.lo \
 	erasure-code/shec/libec_shec_sse3_la-ErasureCodeShec.lo \
 	erasure-code/shec/libec_shec_sse3_la-ErasureCodeShecTableCache.lo \
@@ -2255,7 +2209,7 @@ am__objects_25 = erasure-code/libec_shec_sse3_la-ErasureCode.lo \
 	erasure-code/jerasure/gf-complete/src/libec_shec_sse3_la-gf_w4.lo \
 	erasure-code/jerasure/gf-complete/src/libec_shec_sse3_la-gf_rand.lo \
 	erasure-code/jerasure/gf-complete/src/libec_shec_sse3_la-gf_w8.lo
-am_libec_shec_sse3_la_OBJECTS = $(am__objects_25)
+am_libec_shec_sse3_la_OBJECTS = $(am__objects_27)
 libec_shec_sse3_la_OBJECTS = $(am_libec_shec_sse3_la_OBJECTS)
 libec_shec_sse3_la_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 	$(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
@@ -2265,7 +2219,7 @@ libec_shec_sse3_la_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 @HAVE_SSSE3_TRUE@	$(erasure_codelibdir)
 libec_shec_sse4_la_DEPENDENCIES = $(LIBCRUSH) $(am__DEPENDENCIES_1) \
 	$(am__DEPENDENCIES_3)
-am__objects_26 = erasure-code/libec_shec_sse4_la-ErasureCode.lo \
+am__objects_28 = erasure-code/libec_shec_sse4_la-ErasureCode.lo \
 	erasure-code/shec/libec_shec_sse4_la-ErasureCodePluginShec.lo \
 	erasure-code/shec/libec_shec_sse4_la-ErasureCodeShec.lo \
 	erasure-code/shec/libec_shec_sse4_la-ErasureCodeShecTableCache.lo \
@@ -2286,7 +2240,7 @@ am__objects_26 = erasure-code/libec_shec_sse4_la-ErasureCode.lo \
 	erasure-code/jerasure/gf-complete/src/libec_shec_sse4_la-gf_w4.lo \
 	erasure-code/jerasure/gf-complete/src/libec_shec_sse4_la-gf_rand.lo \
 	erasure-code/jerasure/gf-complete/src/libec_shec_sse4_la-gf_w8.lo
-am_libec_shec_sse4_la_OBJECTS = $(am__objects_26)
+am_libec_shec_sse4_la_OBJECTS = $(am__objects_28)
 libec_shec_sse4_la_OBJECTS = $(am_libec_shec_sse4_la_OBJECTS)
 libec_shec_sse4_la_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 	$(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
@@ -2416,11 +2370,112 @@ libec_test_shec_sse4_la_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(erasure_codelibdir)
 am_liberasure_code_la_OBJECTS = erasure-code/ErasureCodePlugin.lo
 liberasure_code_la_OBJECTS = $(am_liberasure_code_la_OBJECTS)
-libglobal_la_DEPENDENCIES = $(LIBCOMMON)
+libglobal_la_DEPENDENCIES = $(am__DEPENDENCIES_4) \
+	$(am__DEPENDENCIES_1)
 am_libglobal_la_OBJECTS = global/global_context.lo \
 	global/global_init.lo global/pidfile.lo \
 	global/signal_handler.lo common/TrackedOp.lo
 libglobal_la_OBJECTS = $(am_libglobal_la_OBJECTS)
+libisa_la_LIBADD =
+am__libisa_la_SOURCES_DIST = erasure-code/ErasureCode.cc \
+	erasure-code/isa/isa-l/erasure_code/ec_base.c \
+	erasure-code/isa/isa-l/erasure_code/ec_highlevel_func.c \
+	erasure-code/isa/isa-l/erasure_code/ec_multibinary.asm.s \
+	erasure-code/isa/isa-l/erasure_code/gf_2vect_dot_prod_avx2.asm.s \
+	erasure-code/isa/isa-l/erasure_code/gf_2vect_dot_prod_avx.asm.s \
+	erasure-code/isa/isa-l/erasure_code/gf_2vect_dot_prod_sse.asm.s \
+	erasure-code/isa/isa-l/erasure_code/gf_3vect_dot_prod_avx2.asm.s \
+	erasure-code/isa/isa-l/erasure_code/gf_3vect_dot_prod_avx.asm.s \
+	erasure-code/isa/isa-l/erasure_code/gf_3vect_dot_prod_sse.asm.s \
+	erasure-code/isa/isa-l/erasure_code/gf_4vect_dot_prod_avx2.asm.s \
+	erasure-code/isa/isa-l/erasure_code/gf_4vect_dot_prod_avx.asm.s \
+	erasure-code/isa/isa-l/erasure_code/gf_4vect_dot_prod_sse.asm.s \
+	erasure-code/isa/isa-l/erasure_code/gf_5vect_dot_prod_avx2.asm.s \
+	erasure-code/isa/isa-l/erasure_code/gf_5vect_dot_prod_avx.asm.s \
+	erasure-code/isa/isa-l/erasure_code/gf_5vect_dot_prod_sse.asm.s \
+	erasure-code/isa/isa-l/erasure_code/gf_6vect_dot_prod_avx2.asm.s \
+	erasure-code/isa/isa-l/erasure_code/gf_6vect_dot_prod_avx.asm.s \
+	erasure-code/isa/isa-l/erasure_code/gf_6vect_dot_prod_sse.asm.s \
+	erasure-code/isa/isa-l/erasure_code/gf_vect_dot_prod_avx2.asm.s \
+	erasure-code/isa/isa-l/erasure_code/gf_vect_dot_prod_avx.asm.s \
+	erasure-code/isa/isa-l/erasure_code/gf_vect_dot_prod_sse.asm.s \
+	erasure-code/isa/isa-l/erasure_code/gf_2vect_mad_avx2.asm.s \
+	erasure-code/isa/isa-l/erasure_code/gf_2vect_mad_avx.asm.s \
+	erasure-code/isa/isa-l/erasure_code/gf_2vect_mad_sse.asm.s \
+	erasure-code/isa/isa-l/erasure_code/gf_3vect_mad_avx2.asm.s \
+	erasure-code/isa/isa-l/erasure_code/gf_3vect_mad_avx.asm.s \
+	erasure-code/isa/isa-l/erasure_code/gf_3vect_mad_sse.asm.s \
+	erasure-code/isa/isa-l/erasure_code/gf_4vect_mad_avx2.asm.s \
+	erasure-code/isa/isa-l/erasure_code/gf_4vect_mad_avx.asm.s \
+	erasure-code/isa/isa-l/erasure_code/gf_4vect_mad_sse.asm.s \
+	erasure-code/isa/isa-l/erasure_code/gf_5vect_mad_avx2.asm.s \
+	erasure-code/isa/isa-l/erasure_code/gf_5vect_mad_avx.asm.s \
+	erasure-code/isa/isa-l/erasure_code/gf_5vect_mad_sse.asm.s \
+	erasure-code/isa/isa-l/erasure_code/gf_6vect_mad_avx2.asm.s \
+	erasure-code/isa/isa-l/erasure_code/gf_6vect_mad_avx.asm.s \
+	erasure-code/isa/isa-l/erasure_code/gf_6vect_mad_sse.asm.s \
+	erasure-code/isa/isa-l/erasure_code/gf_vect_mad_avx2.asm.s \
+	erasure-code/isa/isa-l/erasure_code/gf_vect_mad_avx.asm.s \
+	erasure-code/isa/isa-l/erasure_code/gf_vect_mad_sse.asm.s \
+	erasure-code/isa/isa-l/erasure_code/gf_vect_mul_avx.asm.s \
+	erasure-code/isa/isa-l/erasure_code/gf_vect_mul_sse.asm.s \
+	erasure-code/isa/ErasureCodeIsa.cc \
+	erasure-code/isa/ErasureCodeIsaTableCache.cc \
+	erasure-code/isa/ErasureCodePluginIsa.cc \
+	erasure-code/isa/xor_op.cc
+ at WITH_BETTER_YASM_ELF64_TRUE@am__objects_29 = erasure-code/libisa_la-ErasureCode.lo \
+ at WITH_BETTER_YASM_ELF64_TRUE@	erasure-code/isa/isa-l/erasure_code/libisa_la-ec_base.lo \
+ at WITH_BETTER_YASM_ELF64_TRUE@	erasure-code/isa/isa-l/erasure_code/libisa_la-ec_highlevel_func.lo \
+ at WITH_BETTER_YASM_ELF64_TRUE@	erasure-code/isa/isa-l/erasure_code/libisa_la-ec_multibinary.asm.lo \
+ at WITH_BETTER_YASM_ELF64_TRUE@	erasure-code/isa/isa-l/erasure_code/libisa_la-gf_2vect_dot_prod_avx2.asm.lo \
+ at WITH_BETTER_YASM_ELF64_TRUE@	erasure-code/isa/isa-l/erasure_code/libisa_la-gf_2vect_dot_prod_avx.asm.lo \
+ at WITH_BETTER_YASM_ELF64_TRUE@	erasure-code/isa/isa-l/erasure_code/libisa_la-gf_2vect_dot_prod_sse.asm.lo \
+ at WITH_BETTER_YASM_ELF64_TRUE@	erasure-code/isa/isa-l/erasure_code/libisa_la-gf_3vect_dot_prod_avx2.asm.lo \
+ at WITH_BETTER_YASM_ELF64_TRUE@	erasure-code/isa/isa-l/erasure_code/libisa_la-gf_3vect_dot_prod_avx.asm.lo \
+ at WITH_BETTER_YASM_ELF64_TRUE@	erasure-code/isa/isa-l/erasure_code/libisa_la-gf_3vect_dot_prod_sse.asm.lo \
+ at WITH_BETTER_YASM_ELF64_TRUE@	erasure-code/isa/isa-l/erasure_code/libisa_la-gf_4vect_dot_prod_avx2.asm.lo \
+ at WITH_BETTER_YASM_ELF64_TRUE@	erasure-code/isa/isa-l/erasure_code/libisa_la-gf_4vect_dot_prod_avx.asm.lo \
+ at WITH_BETTER_YASM_ELF64_TRUE@	erasure-code/isa/isa-l/erasure_code/libisa_la-gf_4vect_dot_prod_sse.asm.lo \
+ at WITH_BETTER_YASM_ELF64_TRUE@	erasure-code/isa/isa-l/erasure_code/libisa_la-gf_5vect_dot_prod_avx2.asm.lo \
+ at WITH_BETTER_YASM_ELF64_TRUE@	erasure-code/isa/isa-l/erasure_code/libisa_la-gf_5vect_dot_prod_avx.asm.lo \
+ at WITH_BETTER_YASM_ELF64_TRUE@	erasure-code/isa/isa-l/erasure_code/libisa_la-gf_5vect_dot_prod_sse.asm.lo \
+ at WITH_BETTER_YASM_ELF64_TRUE@	erasure-code/isa/isa-l/erasure_code/libisa_la-gf_6vect_dot_prod_avx2.asm.lo \
+ at WITH_BETTER_YASM_ELF64_TRUE@	erasure-code/isa/isa-l/erasure_code/libisa_la-gf_6vect_dot_prod_avx.asm.lo \
+ at WITH_BETTER_YASM_ELF64_TRUE@	erasure-code/isa/isa-l/erasure_code/libisa_la-gf_6vect_dot_prod_sse.asm.lo \
+ at WITH_BETTER_YASM_ELF64_TRUE@	erasure-code/isa/isa-l/erasure_code/libisa_la-gf_vect_dot_prod_avx2.asm.lo \
+ at WITH_BETTER_YASM_ELF64_TRUE@	erasure-code/isa/isa-l/erasure_code/libisa_la-gf_vect_dot_prod_avx.asm.lo \
+ at WITH_BETTER_YASM_ELF64_TRUE@	erasure-code/isa/isa-l/erasure_code/libisa_la-gf_vect_dot_prod_sse.asm.lo \
+ at WITH_BETTER_YASM_ELF64_TRUE@	erasure-code/isa/isa-l/erasure_code/libisa_la-gf_2vect_mad_avx2.asm.lo \
+ at WITH_BETTER_YASM_ELF64_TRUE@	erasure-code/isa/isa-l/erasure_code/libisa_la-gf_2vect_mad_avx.asm.lo \
+ at WITH_BETTER_YASM_ELF64_TRUE@	erasure-code/isa/isa-l/erasure_code/libisa_la-gf_2vect_mad_sse.asm.lo \
+ at WITH_BETTER_YASM_ELF64_TRUE@	erasure-code/isa/isa-l/erasure_code/libisa_la-gf_3vect_mad_avx2.asm.lo \
+ at WITH_BETTER_YASM_ELF64_TRUE@	erasure-code/isa/isa-l/erasure_code/libisa_la-gf_3vect_mad_avx.asm.lo \
+ at WITH_BETTER_YASM_ELF64_TRUE@	erasure-code/isa/isa-l/erasure_code/libisa_la-gf_3vect_mad_sse.asm.lo \
+ at WITH_BETTER_YASM_ELF64_TRUE@	erasure-code/isa/isa-l/erasure_code/libisa_la-gf_4vect_mad_avx2.asm.lo \
+ at WITH_BETTER_YASM_ELF64_TRUE@	erasure-code/isa/isa-l/erasure_code/libisa_la-gf_4vect_mad_avx.asm.lo \
+ at WITH_BETTER_YASM_ELF64_TRUE@	erasure-code/isa/isa-l/erasure_code/libisa_la-gf_4vect_mad_sse.asm.lo \
+ at WITH_BETTER_YASM_ELF64_TRUE@	erasure-code/isa/isa-l/erasure_code/libisa_la-gf_5vect_mad_avx2.asm.lo \
+ at WITH_BETTER_YASM_ELF64_TRUE@	erasure-code/isa/isa-l/erasure_code/libisa_la-gf_5vect_mad_avx.asm.lo \
+ at WITH_BETTER_YASM_ELF64_TRUE@	erasure-code/isa/isa-l/erasure_code/libisa_la-gf_5vect_mad_sse.asm.lo \
+ at WITH_BETTER_YASM_ELF64_TRUE@	erasure-code/isa/isa-l/erasure_code/libisa_la-gf_6vect_mad_avx2.asm.lo \
+ at WITH_BETTER_YASM_ELF64_TRUE@	erasure-code/isa/isa-l/erasure_code/libisa_la-gf_6vect_mad_avx.asm.lo \
+ at WITH_BETTER_YASM_ELF64_TRUE@	erasure-code/isa/isa-l/erasure_code/libisa_la-gf_6vect_mad_sse.asm.lo \
+ at WITH_BETTER_YASM_ELF64_TRUE@	erasure-code/isa/isa-l/erasure_code/libisa_la-gf_vect_mad_avx2.asm.lo \
+ at WITH_BETTER_YASM_ELF64_TRUE@	erasure-code/isa/isa-l/erasure_code/libisa_la-gf_vect_mad_avx.asm.lo \
+ at WITH_BETTER_YASM_ELF64_TRUE@	erasure-code/isa/isa-l/erasure_code/libisa_la-gf_vect_mad_sse.asm.lo \
+ at WITH_BETTER_YASM_ELF64_TRUE@	erasure-code/isa/isa-l/erasure_code/libisa_la-gf_vect_mul_avx.asm.lo \
+ at WITH_BETTER_YASM_ELF64_TRUE@	erasure-code/isa/isa-l/erasure_code/libisa_la-gf_vect_mul_sse.asm.lo \
+ at WITH_BETTER_YASM_ELF64_TRUE@	erasure-code/isa/libisa_la-ErasureCodeIsa.lo \
+ at WITH_BETTER_YASM_ELF64_TRUE@	erasure-code/isa/libisa_la-ErasureCodeIsaTableCache.lo \
+ at WITH_BETTER_YASM_ELF64_TRUE@	erasure-code/isa/libisa_la-ErasureCodePluginIsa.lo \
+ at WITH_BETTER_YASM_ELF64_TRUE@	erasure-code/isa/libisa_la-xor_op.lo
+ at WITH_BETTER_YASM_ELF64_TRUE@am_libisa_la_OBJECTS = $(am__objects_29)
+libisa_la_OBJECTS = $(am_libisa_la_OBJECTS)
+libisa_la_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
+	$(libisa_la_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
+	$(libisa_la_CXXFLAGS) $(CXXFLAGS) $(AM_LDFLAGS) $(LDFLAGS) -o \
+	$@
+ at WITH_BETTER_YASM_ELF64_TRUE@am_libisa_la_rpath =
 libjournal_la_LIBADD =
 am__libjournal_la_SOURCES_DIST = journal/AsyncOpTracker.cc \
 	journal/Entry.cc journal/Future.cc journal/FutureImpl.cc \
@@ -2447,7 +2502,7 @@ am_libjson_spirit_la_OBJECTS = json_spirit/json_spirit_reader.lo \
 	json_spirit/json_spirit_writer.lo
 libjson_spirit_la_OBJECTS = $(am_libjson_spirit_la_OBJECTS)
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE at libkrbd_la_DEPENDENCIES = $(LIBSECRET) \
- at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	$(LIBCOMMON)
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	$(am__DEPENDENCIES_4)
 am__libkrbd_la_SOURCES_DIST = krbd.cc
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE at am_libkrbd_la_OBJECTS =  \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	krbd.lo
@@ -2469,7 +2524,7 @@ am__libmds_la_SOURCES_DIST = mds/Capability.cc mds/MDSDaemon.cc \
 	mds/SnapRealm.cc mds/SnapServer.cc mds/snap.cc \
 	mds/SessionMap.cc mds/MDSContext.cc mds/MDSAuthCaps.cc \
 	mds/MDLog.cc
-am__objects_27 = mds/Capability.lo mds/MDSDaemon.lo mds/MDSRank.lo \
+am__objects_30 = mds/Capability.lo mds/MDSDaemon.lo mds/MDSRank.lo \
 	mds/Beacon.lo mds/locks.lo mds/journal.lo mds/Server.lo \
 	mds/Mutation.lo mds/MDCache.lo mds/RecoveryQueue.lo \
 	mds/StrayManager.lo mds/Locker.lo mds/Migrator.lo \
@@ -2481,7 +2536,7 @@ am__objects_27 = mds/Capability.lo mds/MDSDaemon.lo mds/MDSRank.lo \
 	mds/SessionMap.lo mds/MDSContext.lo mds/MDSAuthCaps.lo \
 	mds/MDLog.lo
 @ENABLE_SERVER_TRUE@@WITH_MDS_TRUE at am_libmds_la_OBJECTS =  \
- at ENABLE_SERVER_TRUE@@WITH_MDS_TRUE@	$(am__objects_27)
+ at ENABLE_SERVER_TRUE@@WITH_MDS_TRUE@	$(am__objects_30)
 libmds_la_OBJECTS = $(am_libmds_la_OBJECTS)
 @ENABLE_SERVER_TRUE@@WITH_MDS_TRUE at am_libmds_la_rpath =
 libmon_types_la_LIBADD =
@@ -2499,11 +2554,11 @@ am__libmsg_la_SOURCES_DIST = msg/Message.cc msg/Messenger.cc \
 	msg/async/EventKqueue.h msg/xio/QueueStrategy.cc \
 	msg/xio/XioConnection.cc msg/xio/XioMessenger.cc \
 	msg/xio/XioMsg.cc msg/xio/XioPortal.cc msg/xio/XioPool.cc
- at LINUX_TRUE@am__objects_28 = msg/async/EventEpoll.lo
- at DARWIN_TRUE@am__objects_29 = msg/async/EventKqueue.lo
- at FREEBSD_TRUE@am__objects_30 = msg/async/EventKqueue.lo
-am__objects_31 =
- at ENABLE_XIO_TRUE@am__objects_32 = msg/xio/QueueStrategy.lo \
+ at LINUX_TRUE@am__objects_31 = msg/async/EventEpoll.lo
+ at DARWIN_TRUE@am__objects_32 = msg/async/EventKqueue.lo
+ at FREEBSD_TRUE@am__objects_33 = msg/async/EventKqueue.lo
+am__objects_34 =
+ at ENABLE_XIO_TRUE@am__objects_35 = msg/xio/QueueStrategy.lo \
 @ENABLE_XIO_TRUE@	msg/xio/XioConnection.lo \
 @ENABLE_XIO_TRUE@	msg/xio/XioMessenger.lo msg/xio/XioMsg.lo \
 @ENABLE_XIO_TRUE@	msg/xio/XioPortal.lo msg/xio/XioPool.lo
@@ -2513,9 +2568,9 @@ am_libmsg_la_OBJECTS = msg/Message.lo msg/Messenger.lo \
 	msg/simple/PipeConnection.lo msg/simple/SimpleMessenger.lo \
 	msg/async/AsyncConnection.lo msg/async/AsyncMessenger.lo \
 	msg/async/Event.lo msg/async/net_handler.lo \
-	msg/async/EventSelect.lo $(am__objects_28) $(am__objects_29) \
-	$(am__objects_30) $(am__objects_31) $(am__objects_31) \
-	$(am__objects_31) $(am__objects_32)
+	msg/async/EventSelect.lo $(am__objects_31) $(am__objects_32) \
+	$(am__objects_33) $(am__objects_34) $(am__objects_34) \
+	$(am__objects_34) $(am__objects_35)
 libmsg_la_OBJECTS = $(am_libmsg_la_OBJECTS)
 libos_tp_la_DEPENDENCIES =
 am__libos_tp_la_SOURCES_DIST = tracing/objectstore.c
@@ -2558,22 +2613,22 @@ libperfglue_la_DEPENDENCIES =
 am__libperfglue_la_SOURCES_DIST = perfglue/heap_profiler.cc \
 	perfglue/disabled_heap_profiler.cc perfglue/cpu_profiler.cc \
 	perfglue/disabled_stubs.cc
- at WITH_TCMALLOC_TRUE@am__objects_33 = perfglue/heap_profiler.lo
- at WITH_TCMALLOC_FALSE@@WITH_TCMALLOC_MINIMAL_TRUE at am__objects_34 = perfglue/heap_profiler.lo
- at WITH_TCMALLOC_FALSE@@WITH_TCMALLOC_MINIMAL_FALSE at am__objects_35 = perfglue/disabled_heap_profiler.lo
- at WITH_PROFILER_TRUE@am__objects_36 = perfglue/cpu_profiler.lo
- at WITH_PROFILER_FALSE@am__objects_37 = perfglue/disabled_stubs.lo
-am_libperfglue_la_OBJECTS = $(am__objects_33) $(am__objects_34) \
-	$(am__objects_35) $(am__objects_36) $(am__objects_37)
+ at WITH_TCMALLOC_TRUE@am__objects_36 = perfglue/heap_profiler.lo
+ at WITH_TCMALLOC_FALSE@@WITH_TCMALLOC_MINIMAL_TRUE at am__objects_37 = perfglue/heap_profiler.lo
+ at WITH_TCMALLOC_FALSE@@WITH_TCMALLOC_MINIMAL_FALSE at am__objects_38 = perfglue/disabled_heap_profiler.lo
+ at WITH_PROFILER_TRUE@am__objects_39 = perfglue/cpu_profiler.lo
+ at WITH_PROFILER_FALSE@am__objects_40 = perfglue/disabled_stubs.lo
+am_libperfglue_la_OBJECTS = $(am__objects_36) $(am__objects_37) \
+	$(am__objects_38) $(am__objects_39) $(am__objects_40)
 libperfglue_la_OBJECTS = $(am_libperfglue_la_OBJECTS)
- at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE at am__DEPENDENCIES_5 =  \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE at am__DEPENDENCIES_6 =  \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	librados_internal.la \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	libcls_lock_client.la \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(LIBOSDC) \
- at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_4)
-am__DEPENDENCIES_6 = $(am__DEPENDENCIES_5)
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_5)
+am__DEPENDENCIES_7 = $(am__DEPENDENCIES_6)
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE at librados_la_DEPENDENCIES =  \
- at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_6) \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_7) \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_1) \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_1) \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_3)
@@ -2635,11 +2690,11 @@ librados_tp_la_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC \
 	$(librados_tp_la_CFLAGS) $(CFLAGS) $(librados_tp_la_LDFLAGS) \
 	$(LDFLAGS) -o $@
 @WITH_LTTNG_TRUE at am_librados_tp_la_rpath = -rpath $(libdir)
- at ENABLE_CLIENT_TRUE@@WITH_RADOSSTRIPER_TRUE@@WITH_RADOS_TRUE at am__DEPENDENCIES_7 = librados_internal.la \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOSSTRIPER_TRUE@@WITH_RADOS_TRUE at am__DEPENDENCIES_8 = librados_internal.la \
 @ENABLE_CLIENT_TRUE@@WITH_RADOSSTRIPER_TRUE@@WITH_RADOS_TRUE@	libcls_lock_client.la \
 @ENABLE_CLIENT_TRUE@@WITH_RADOSSTRIPER_TRUE@@WITH_RADOS_TRUE@	$(LIBOSDC) \
- at ENABLE_CLIENT_TRUE@@WITH_RADOSSTRIPER_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_4)
- at ENABLE_CLIENT_TRUE@@WITH_RADOSSTRIPER_TRUE@@WITH_RADOS_TRUE at libradosstriper_la_DEPENDENCIES = $(am__DEPENDENCIES_7) \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOSSTRIPER_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_5)
+ at ENABLE_CLIENT_TRUE@@WITH_RADOSSTRIPER_TRUE@@WITH_RADOS_TRUE at libradosstriper_la_DEPENDENCIES = $(am__DEPENDENCIES_8) \
 @ENABLE_CLIENT_TRUE@@WITH_RADOSSTRIPER_TRUE@@WITH_RADOS_TRUE@	$(LIBRADOS) \
 @ENABLE_CLIENT_TRUE@@WITH_RADOSSTRIPER_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_1) \
 @ENABLE_CLIENT_TRUE@@WITH_RADOSSTRIPER_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_1) \
@@ -2658,11 +2713,11 @@ libradosstriper_la_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 	$(libradosstriper_la_LDFLAGS) $(LDFLAGS) -o $@
 @ENABLE_CLIENT_TRUE@@WITH_RADOSSTRIPER_TRUE@@WITH_RADOS_TRUE at am_libradosstriper_la_rpath = -rpath \
 @ENABLE_CLIENT_TRUE@@WITH_RADOSSTRIPER_TRUE@@WITH_RADOS_TRUE@	$(libdir)
- at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE at am__DEPENDENCIES_8 =  \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE at am__DEPENDENCIES_9 =  \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	libradostest.la \
- at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(LIBCOMMON) \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_4) \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_1)
- at ENABLE_CLIENT_TRUE@@WITH_RADOSSTRIPER_TRUE@@WITH_RADOS_TRUE at libradosstripertest_la_DEPENDENCIES = $(am__DEPENDENCIES_8)
+ at ENABLE_CLIENT_TRUE@@WITH_RADOSSTRIPER_TRUE@@WITH_RADOS_TRUE at libradosstripertest_la_DEPENDENCIES = $(am__DEPENDENCIES_9)
 am__libradosstripertest_la_SOURCES_DIST =  \
 	test/libradosstriper/TestCase.cc
 @ENABLE_CLIENT_TRUE@@WITH_RADOSSTRIPER_TRUE@@WITH_RADOS_TRUE at am_libradosstripertest_la_OBJECTS = test/libradosstriper/libradosstripertest_la-TestCase.lo
@@ -2687,7 +2742,7 @@ libradostest_la_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	$(LIBRBD_TYPES) \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	libjournal.la \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	$(LIBRADOS) \
- at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	$(LIBCOMMON) \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	$(am__DEPENDENCIES_4) \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	$(LIBOSDC) \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	librados_internal.la \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	libcls_rbd_client.la \
@@ -2798,12 +2853,40 @@ am__librbd_internal_la_SOURCES_DIST = librbd/AioCompletion.cc \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	librbd/operation/TrimRequest.lo
 librbd_internal_la_OBJECTS = $(am_librbd_internal_la_OBJECTS)
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE at am_librbd_internal_la_rpath =
-am__DEPENDENCIES_9 = $(LIBGLOBAL) $(LIBCOMMON) $(am__DEPENDENCIES_1) \
-	$(am__DEPENDENCIES_1) $(am__DEPENDENCIES_3)
+librbd_mirror_internal_la_LIBADD =
+am__librbd_mirror_internal_la_SOURCES_DIST =  \
+	tools/rbd_mirror/ClusterWatcher.cc \
+	tools/rbd_mirror/ImageReplayer.cc tools/rbd_mirror/Mirror.cc \
+	tools/rbd_mirror/PoolWatcher.cc tools/rbd_mirror/Replayer.cc \
+	tools/rbd_mirror/types.cc
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE at am_librbd_mirror_internal_la_OBJECTS = tools/rbd_mirror/ClusterWatcher.lo \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	tools/rbd_mirror/ImageReplayer.lo \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	tools/rbd_mirror/Mirror.lo \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	tools/rbd_mirror/PoolWatcher.lo \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	tools/rbd_mirror/Replayer.lo \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	tools/rbd_mirror/types.lo
+librbd_mirror_internal_la_OBJECTS =  \
+	$(am_librbd_mirror_internal_la_OBJECTS)
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE at am_librbd_mirror_internal_la_rpath =
+librbd_mirror_test_la_LIBADD =
+am__librbd_mirror_test_la_SOURCES_DIST =  \
+	test/rbd_mirror/test_ClusterWatcher.cc \
+	test/rbd_mirror/test_PoolWatcher.cc
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE at am_librbd_mirror_test_la_OBJECTS = test/rbd_mirror/librbd_mirror_test_la-test_ClusterWatcher.lo \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	test/rbd_mirror/librbd_mirror_test_la-test_PoolWatcher.lo
+librbd_mirror_test_la_OBJECTS = $(am_librbd_mirror_test_la_OBJECTS)
+librbd_mirror_test_la_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
+	$(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
+	$(librbd_mirror_test_la_CXXFLAGS) $(CXXFLAGS) $(AM_LDFLAGS) \
+	$(LDFLAGS) -o $@
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE at am_librbd_mirror_test_la_rpath =
+am__DEPENDENCIES_10 = $(LIBGLOBAL) $(am__DEPENDENCIES_4) \
+	$(am__DEPENDENCIES_1) $(am__DEPENDENCIES_1) \
+	$(am__DEPENDENCIES_3)
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE at librbd_replay_la_DEPENDENCIES =  \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	$(LIBRBD) \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	$(LIBRADOS) \
- at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	$(am__DEPENDENCIES_9)
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	$(am__DEPENDENCIES_10)
 am__librbd_replay_la_SOURCES_DIST = rbd_replay/actions.cc \
 	rbd_replay/BufferReader.cc rbd_replay/ImageNameMap.cc \
 	rbd_replay/PendingIO.cc rbd_replay/rbd_loc.cc \
@@ -2819,7 +2902,7 @@ librbd_replay_la_OBJECTS = $(am_librbd_replay_la_OBJECTS)
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE at librbd_replay_ios_la_DEPENDENCIES =  \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	$(LIBRBD) \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	$(LIBRADOS) \
- at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	$(am__DEPENDENCIES_9) \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	$(am__DEPENDENCIES_10) \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	librbd_replay.la
 am__librbd_replay_ios_la_SOURCES_DIST = rbd_replay/ios.cc
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE at am_librbd_replay_ios_la_OBJECTS = rbd_replay/ios.lo
@@ -2863,23 +2946,24 @@ librbd_tp_la_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \
 	$(CFLAGS) $(librbd_tp_la_LDFLAGS) $(LDFLAGS) -o $@
 @WITH_LTTNG_TRUE at am_librbd_tp_la_rpath = -rpath $(libdir)
 librbd_types_la_LIBADD =
-am_librbd_types_la_OBJECTS = librbd/journal/Entries.lo \
+am_librbd_types_la_OBJECTS = librbd/journal/Types.lo \
 	librbd/WatchNotifyTypes.lo
 librbd_types_la_OBJECTS = $(am_librbd_types_la_OBJECTS)
 librgw_la_LIBADD =
 am__librgw_la_SOURCES_DIST = rgw/librgw.cc rgw/rgw_acl.cc \
 	rgw/rgw_acl_s3.cc rgw/rgw_acl_swift.cc rgw/rgw_client_io.cc \
 	rgw/rgw_fcgi.cc rgw/rgw_xml.cc rgw/rgw_usage.cc \
-	rgw/rgw_json_enc.cc rgw/rgw_user.cc rgw/rgw_bucket.cc \
-	rgw/rgw_tools.cc rgw/rgw_rados.cc rgw/rgw_http_client.cc \
-	rgw/rgw_rest_client.cc rgw/rgw_rest_conn.cc rgw/rgw_op.cc \
-	rgw/rgw_basic_types.cc rgw/rgw_common.cc rgw/rgw_cache.cc \
-	rgw/rgw_formats.cc rgw/rgw_log.cc rgw/rgw_multi.cc \
-	rgw/rgw_policy_s3.cc rgw/rgw_gc.cc rgw/rgw_multi_del.cc \
-	rgw/rgw_env.cc rgw/rgw_cors.cc rgw/rgw_cors_s3.cc \
-	rgw/rgw_auth_s3.cc rgw/rgw_metadata.cc rgw/rgw_replica_log.cc \
-	rgw/rgw_keystone.cc rgw/rgw_quota.cc rgw/rgw_dencoder.cc \
-	rgw/rgw_object_expirer_core.cc
+	rgw/rgw_json_enc.cc rgw/rgw_xml_enc.cc rgw/rgw_user.cc \
+	rgw/rgw_bucket.cc rgw/rgw_tools.cc rgw/rgw_rados.cc \
+	rgw/rgw_http_client.cc rgw/rgw_rest_client.cc \
+	rgw/rgw_rest_conn.cc rgw/rgw_op.cc rgw/rgw_basic_types.cc \
+	rgw/rgw_common.cc rgw/rgw_cache.cc rgw/rgw_formats.cc \
+	rgw/rgw_log.cc rgw/rgw_multi.cc rgw/rgw_policy_s3.cc \
+	rgw/rgw_gc.cc rgw/rgw_multi_del.cc rgw/rgw_env.cc \
+	rgw/rgw_cors.cc rgw/rgw_cors_s3.cc rgw/rgw_auth_s3.cc \
+	rgw/rgw_metadata.cc rgw/rgw_replica_log.cc rgw/rgw_keystone.cc \
+	rgw/rgw_quota.cc rgw/rgw_dencoder.cc \
+	rgw/rgw_object_expirer_core.cc rgw/rgw_website.cc
 @ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE at am_librgw_la_OBJECTS = rgw/librgw_la-librgw.lo \
 @ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE@	rgw/librgw_la-rgw_acl.lo \
 @ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE@	rgw/librgw_la-rgw_acl_s3.lo \
@@ -2889,6 +2973,7 @@ am__librgw_la_SOURCES_DIST = rgw/librgw.cc rgw/rgw_acl.cc \
 @ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE@	rgw/librgw_la-rgw_xml.lo \
 @ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE@	rgw/librgw_la-rgw_usage.lo \
 @ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE@	rgw/librgw_la-rgw_json_enc.lo \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE@	rgw/librgw_la-rgw_xml_enc.lo \
 @ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE@	rgw/librgw_la-rgw_user.lo \
 @ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE@	rgw/librgw_la-rgw_bucket.lo \
 @ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE@	rgw/librgw_la-rgw_tools.lo \
@@ -2915,7 +3000,8 @@ am__librgw_la_SOURCES_DIST = rgw/librgw.cc rgw/rgw_acl.cc \
 @ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE@	rgw/librgw_la-rgw_keystone.lo \
 @ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE@	rgw/librgw_la-rgw_quota.lo \
 @ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE@	rgw/librgw_la-rgw_dencoder.lo \
- at ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE@	rgw/librgw_la-rgw_object_expirer_core.lo
+ at ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE@	rgw/librgw_la-rgw_object_expirer_core.lo \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE@	rgw/librgw_la-rgw_website.lo
 librgw_la_OBJECTS = $(am_librgw_la_OBJECTS)
 librgw_la_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX $(AM_LIBTOOLFLAGS) \
 	$(LIBTOOLFLAGS) --mode=link $(CXXLD) $(librgw_la_CXXFLAGS) \
@@ -2925,7 +3011,7 @@ libsecret_la_DEPENDENCIES = $(am__DEPENDENCIES_1)
 am_libsecret_la_OBJECTS = common/secret.lo
 libsecret_la_OBJECTS = $(am_libsecret_la_OBJECTS)
 @LINUX_TRUE at am_libsecret_la_rpath =
- at LINUX_TRUE@libsystest_la_DEPENDENCIES = $(am__DEPENDENCIES_9)
+ at LINUX_TRUE@libsystest_la_DEPENDENCIES = $(am__DEPENDENCIES_10)
 am__libsystest_la_SOURCES_DIST = test/system/cross_process_sem.cc \
 	test/system/systest_runnable.cc \
 	test/system/systest_settings.cc
@@ -2981,7 +3067,8 @@ libsystest_la_OBJECTS = $(am_libsystest_la_OBJECTS)
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	ceph_test_stress_watch$(EXEEXT)
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE at am__EXEEXT_9 = ceph_smalliobenchrbd$(EXEEXT) \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	ceph_test_librbd$(EXEEXT) \
- at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	ceph_test_librbd_api$(EXEEXT)
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	ceph_test_librbd_api$(EXEEXT) \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	ceph_test_rbd_mirror$(EXEEXT)
 @ENABLE_CLIENT_TRUE@@LINUX_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE at am__EXEEXT_10 = ceph_test_librbd_fsx$(EXEEXT)
 @ENABLE_CLIENT_TRUE@@WITH_RADOSSTRIPER_TRUE@@WITH_RADOS_TRUE at am__EXEEXT_11 = ceph_test_rados_striper_api_io$(EXEEXT) \
 @ENABLE_CLIENT_TRUE@@WITH_RADOSSTRIPER_TRUE@@WITH_RADOS_TRUE@	ceph_test_rados_striper_api_aio$(EXEEXT) \
@@ -3057,26 +3144,27 @@ am__EXEEXT_27 = $(am__EXEEXT_1) $(am__EXEEXT_2) $(am__EXEEXT_3) \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE at am__EXEEXT_34 = rados$(EXEEXT)
 @ENABLE_CLIENT_TRUE@@LINUX_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE at am__EXEEXT_35 = rbd$(EXEEXT) \
 @ENABLE_CLIENT_TRUE@@LINUX_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	rbd-nbd$(EXEEXT)
- at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at am__EXEEXT_36 = ceph-objectstore-tool$(EXEEXT)
- at ENABLE_CLIENT_TRUE@@ENABLE_SERVER_TRUE@@WITH_MDS_TRUE@@WITH_RADOS_TRUE at am__EXEEXT_37 = cephfs-journal-tool$(EXEEXT) \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE at am__EXEEXT_36 = rbd-mirror$(EXEEXT)
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at am__EXEEXT_37 = ceph-objectstore-tool$(EXEEXT)
+ at ENABLE_CLIENT_TRUE@@ENABLE_SERVER_TRUE@@WITH_MDS_TRUE@@WITH_RADOS_TRUE at am__EXEEXT_38 = cephfs-journal-tool$(EXEEXT) \
 @ENABLE_CLIENT_TRUE@@ENABLE_SERVER_TRUE@@WITH_MDS_TRUE@@WITH_RADOS_TRUE@	cephfs-table-tool$(EXEEXT) \
 @ENABLE_CLIENT_TRUE@@ENABLE_SERVER_TRUE@@WITH_MDS_TRUE@@WITH_RADOS_TRUE@	cephfs-data-scan$(EXEEXT)
- at ENABLE_CLIENT_TRUE@am__EXEEXT_38 = ceph-syn$(EXEEXT)
- at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE at am__EXEEXT_39 =  \
+ at ENABLE_CLIENT_TRUE@am__EXEEXT_39 = ceph-syn$(EXEEXT)
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE at am__EXEEXT_40 =  \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	librados-config$(EXEEXT)
- at ENABLE_CLIENT_TRUE@@WITH_FUSE_TRUE@@WITH_RADOS_TRUE at am__EXEEXT_40 = ceph-fuse$(EXEEXT)
- at ENABLE_CLIENT_TRUE@@WITH_FUSE_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE at am__EXEEXT_41 = rbd-fuse$(EXEEXT)
- at ENABLE_CLIENT_TRUE@@WITH_CEPHFS_TRUE@@WITH_RADOS_TRUE at am__EXEEXT_42 = cephfs$(EXEEXT)
- at ENABLE_SERVER_TRUE@@WITH_MON_TRUE at am__EXEEXT_43 = ceph-mon$(EXEEXT)
- at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at am__EXEEXT_44 = ceph-osd$(EXEEXT)
- at ENABLE_SERVER_TRUE@@WITH_MDS_TRUE at am__EXEEXT_45 = ceph-mds$(EXEEXT)
- at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at am__EXEEXT_46 = unittest_erasure_code_plugin$(EXEEXT) \
+ at ENABLE_CLIENT_TRUE@@WITH_FUSE_TRUE@@WITH_RADOS_TRUE at am__EXEEXT_41 = ceph-fuse$(EXEEXT)
+ at ENABLE_CLIENT_TRUE@@WITH_FUSE_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE at am__EXEEXT_42 = rbd-fuse$(EXEEXT)
+ at ENABLE_CLIENT_TRUE@@WITH_CEPHFS_TRUE@@WITH_RADOS_TRUE at am__EXEEXT_43 = cephfs$(EXEEXT)
+ at ENABLE_SERVER_TRUE@@WITH_MON_TRUE at am__EXEEXT_44 = ceph-mon$(EXEEXT)
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at am__EXEEXT_45 = ceph-osd$(EXEEXT)
+ at ENABLE_SERVER_TRUE@@WITH_MDS_TRUE at am__EXEEXT_46 = ceph-mds$(EXEEXT)
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at am__EXEEXT_47 = unittest_erasure_code_plugin$(EXEEXT) \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	unittest_erasure_code$(EXEEXT) \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	unittest_erasure_code_jerasure$(EXEEXT) \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	unittest_erasure_code_plugin_jerasure$(EXEEXT)
- at ENABLE_SERVER_TRUE@@WITH_BETTER_YASM_ELF64_TRUE@@WITH_OSD_TRUE at am__EXEEXT_47 = unittest_erasure_code_isa$(EXEEXT) \
+ at ENABLE_SERVER_TRUE@@WITH_BETTER_YASM_ELF64_TRUE@@WITH_OSD_TRUE at am__EXEEXT_48 = unittest_erasure_code_isa$(EXEEXT) \
 @ENABLE_SERVER_TRUE@@WITH_BETTER_YASM_ELF64_TRUE@@WITH_OSD_TRUE@	unittest_erasure_code_plugin_isa$(EXEEXT)
- at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at am__EXEEXT_48 = unittest_erasure_code_lrc$(EXEEXT) \
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at am__EXEEXT_49 = unittest_erasure_code_lrc$(EXEEXT) \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	unittest_erasure_code_plugin_lrc$(EXEEXT) \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	unittest_erasure_code_shec$(EXEEXT) \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	unittest_erasure_code_shec_all$(EXEEXT) \
@@ -3086,40 +3174,47 @@ am__EXEEXT_27 = $(am__EXEEXT_1) $(am__EXEEXT_2) $(am__EXEEXT_3) \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	unittest_erasure_code_example$(EXEEXT) \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	unittest_compression_plugin$(EXEEXT) \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	unittest_compression_snappy$(EXEEXT) \
- at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	unittest_compression_plugin_snappy$(EXEEXT)
- at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE at am__EXEEXT_49 = unittest_librados$(EXEEXT) \
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	unittest_compression_plugin_snappy$(EXEEXT) \
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	unittest_compression_zlib$(EXEEXT) \
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	unittest_compression_plugin_zlib$(EXEEXT)
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE at am__EXEEXT_50 = unittest_librados$(EXEEXT) \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	unittest_librados_config$(EXEEXT) \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	unittest_journal$(EXEEXT)
- at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE at am__EXEEXT_50 = unittest_rbd_replay$(EXEEXT)
- at ENABLE_CLIENT_TRUE@@WITH_CEPHFS_TRUE@@WITH_RADOS_TRUE at am__EXEEXT_51 = unittest_encoding$(EXEEXT) \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE at am__EXEEXT_51 = unittest_rbd_replay$(EXEEXT)
+ at ENABLE_CLIENT_TRUE@@WITH_CEPHFS_TRUE@@WITH_RADOS_TRUE at am__EXEEXT_52 = unittest_encoding$(EXEEXT) \
 @ENABLE_CLIENT_TRUE@@WITH_CEPHFS_TRUE@@WITH_RADOS_TRUE@	unittest_base64$(EXEEXT) \
 @ENABLE_CLIENT_TRUE@@WITH_CEPHFS_TRUE@@WITH_RADOS_TRUE@	unittest_run_cmd$(EXEEXT) \
 @ENABLE_CLIENT_TRUE@@WITH_CEPHFS_TRUE@@WITH_RADOS_TRUE@	unittest_simple_spin$(EXEEXT) \
 @ENABLE_CLIENT_TRUE@@WITH_CEPHFS_TRUE@@WITH_RADOS_TRUE@	unittest_libcephfs_config$(EXEEXT)
- at ENABLE_SERVER_TRUE@@LINUX_TRUE at am__EXEEXT_52 =  \
+ at ENABLE_SERVER_TRUE@@LINUX_TRUE at am__EXEEXT_53 =  \
 @ENABLE_SERVER_TRUE@@LINUX_TRUE@	unittest_bluefs$(EXEEXT) \
 @ENABLE_SERVER_TRUE@@LINUX_TRUE@	unittest_bluestore_types$(EXEEXT)
- at ENABLE_SERVER_TRUE@@WITH_MON_TRUE at am__EXEEXT_53 = unittest_mon_moncap$(EXEEXT) \
+ at ENABLE_SERVER_TRUE@am__EXEEXT_54 = unittest_transaction$(EXEEXT)
+ at ENABLE_SERVER_TRUE@@WITH_MON_TRUE at am__EXEEXT_55 = unittest_mon_moncap$(EXEEXT) \
 @ENABLE_SERVER_TRUE@@WITH_MON_TRUE@	unittest_mon_pgmap$(EXEEXT)
- at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at am__EXEEXT_54 = unittest_ecbackend$(EXEEXT) \
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at am__EXEEXT_56 = unittest_ecbackend$(EXEEXT) \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	unittest_osdscrub$(EXEEXT) \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	unittest_pglog$(EXEEXT) \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	unittest_hitset$(EXEEXT) \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	unittest_osd_osdcap$(EXEEXT) \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	unittest_pageset$(EXEEXT)
- at ENABLE_SERVER_TRUE@@WITH_SLIBROCKSDB_TRUE at am__EXEEXT_55 = unittest_rocksdb_option_static$(EXEEXT)
- at ENABLE_SERVER_TRUE@@WITH_DLIBROCKSDB_TRUE at am__EXEEXT_56 = unittest_rocksdb_option$(EXEEXT)
- at ENABLE_SERVER_TRUE@am__EXEEXT_57 = unittest_chain_xattr$(EXEEXT) \
+ at ENABLE_SERVER_TRUE@@WITH_SLIBROCKSDB_TRUE at am__EXEEXT_57 = unittest_rocksdb_option_static$(EXEEXT)
+ at ENABLE_SERVER_TRUE@@WITH_DLIBROCKSDB_TRUE at am__EXEEXT_58 = unittest_rocksdb_option$(EXEEXT)
+ at ENABLE_SERVER_TRUE@am__EXEEXT_59 = unittest_chain_xattr$(EXEEXT) \
 @ENABLE_SERVER_TRUE@	unittest_lfnindex$(EXEEXT)
- at ENABLE_SERVER_TRUE@@WITH_MDS_TRUE at am__EXEEXT_58 = unittest_mds_authcap$(EXEEXT)
- at LINUX_TRUE@am__EXEEXT_59 = unittest_blkdev$(EXEEXT)
-am__EXEEXT_60 = $(am__EXEEXT_46) $(am__EXEEXT_47) $(am__EXEEXT_48) \
-	$(am__EXEEXT_49) $(am__EXEEXT_50) $(am__EXEEXT_51) \
-	$(am__EXEEXT_52) $(am__EXEEXT_53) $(am__EXEEXT_54) \
-	$(am__EXEEXT_55) $(am__EXEEXT_56) $(am__EXEEXT_57) \
-	$(am__EXEEXT_58) unittest_addrs$(EXEEXT) $(am__EXEEXT_59) \
-	unittest_bloom_filter$(EXEEXT) unittest_histogram$(EXEEXT) \
-	unittest_prioritized_queue$(EXEEXT) unittest_str_map$(EXEEXT) \
+ at ENABLE_SERVER_TRUE@@WITH_MDS_TRUE at am__EXEEXT_60 = unittest_mds_authcap$(EXEEXT)
+ at LINUX_TRUE@am__EXEEXT_61 = unittest_blkdev$(EXEEXT)
+am__EXEEXT_62 = $(am__EXEEXT_47) $(am__EXEEXT_48) $(am__EXEEXT_49) \
+	$(am__EXEEXT_50) $(am__EXEEXT_51) $(am__EXEEXT_52) \
+	$(am__EXEEXT_53) $(am__EXEEXT_54) $(am__EXEEXT_55) \
+	$(am__EXEEXT_56) $(am__EXEEXT_57) $(am__EXEEXT_58) \
+	$(am__EXEEXT_59) $(am__EXEEXT_60) unittest_addrs$(EXEEXT) \
+	$(am__EXEEXT_61) unittest_bloom_filter$(EXEEXT) \
+	unittest_histogram$(EXEEXT) \
+	unittest_prioritized_queue$(EXEEXT) \
+	unittest_weighted_priority_queue$(EXEEXT) \
+	unittest_str_map$(EXEEXT) unittest_mutex_debug$(EXEEXT) \
+	unittest_shunique_lock$(EXEEXT) \
 	unittest_sharedptr_registry$(EXEEXT) \
 	unittest_shared_cache$(EXEEXT) \
 	unittest_sloppy_crc_map$(EXEEXT) unittest_time$(EXEEXT) \
@@ -3145,42 +3240,46 @@ am__EXEEXT_60 = $(am__EXEEXT_46) $(am__EXEEXT_47) $(am__EXEEXT_48) \
 	unittest_daemon_config$(EXEEXT) unittest_ipaddr$(EXEEXT) \
 	unittest_texttable$(EXEEXT) unittest_on_exit$(EXEEXT) \
 	unittest_readahead$(EXEEXT) unittest_tableformatter$(EXEEXT) \
-	unittest_bit_vector$(EXEEXT)
- at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE at am__EXEEXT_61 = unittest_librbd$(EXEEXT)
- at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at am__EXEEXT_62 = ceph_erasure_code_non_regression$(EXEEXT)
- at ENABLE_SERVER_TRUE@@ENABLE_XIO_TRUE at am__EXEEXT_63 =  \
+	unittest_bit_vector$(EXEEXT) unittest_interval_set$(EXEEXT)
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE at am__EXEEXT_63 = unittest_librbd$(EXEEXT) \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	unittest_rbd_mirror$(EXEEXT)
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at am__EXEEXT_64 = ceph_erasure_code_non_regression$(EXEEXT)
+ at ENABLE_SERVER_TRUE@@ENABLE_XIO_TRUE at am__EXEEXT_65 =  \
 @ENABLE_SERVER_TRUE@@ENABLE_XIO_TRUE@	simple_server$(EXEEXT) \
 @ENABLE_SERVER_TRUE@@ENABLE_XIO_TRUE@	simple_client$(EXEEXT) \
 @ENABLE_SERVER_TRUE@@ENABLE_XIO_TRUE@	xio_server$(EXEEXT) \
 @ENABLE_SERVER_TRUE@@ENABLE_XIO_TRUE@	xio_client$(EXEEXT)
- at ENABLE_SERVER_TRUE@@WITH_MON_TRUE at am__EXEEXT_64 = get_command_descriptions$(EXEEXT)
- at ENABLE_SERVER_TRUE@@LINUX_TRUE at am__EXEEXT_65 = mount.ceph$(EXEEXT)
+ at ENABLE_SERVER_TRUE@@WITH_MON_TRUE at am__EXEEXT_66 = get_command_descriptions$(EXEEXT)
+ at ENABLE_SERVER_TRUE@@LINUX_TRUE at am__EXEEXT_67 = mount.ceph$(EXEEXT)
 PROGRAMS = $(bin_PROGRAMS) $(noinst_PROGRAMS) $(sbin_PROGRAMS) \
 	$(su_sbin_PROGRAMS)
 am_ceph_authtool_OBJECTS = tools/ceph_authtool.$(OBJEXT)
 ceph_authtool_OBJECTS = $(am_ceph_authtool_OBJECTS)
-ceph_authtool_DEPENDENCIES = $(am__DEPENDENCIES_9)
+ceph_authtool_DEPENDENCIES = $(am__DEPENDENCIES_10)
 am__ceph_bluefs_tool_SOURCES_DIST = os/bluestore/bluefs_tool.cc
 @ENABLE_SERVER_TRUE@@WITH_LIBAIO_TRUE at am_ceph_bluefs_tool_OBJECTS = os/bluestore/bluefs_tool.$(OBJEXT)
 ceph_bluefs_tool_OBJECTS = $(am_ceph_bluefs_tool_OBJECTS)
- at WITH_LIBZFS_TRUE@am__DEPENDENCIES_10 = libos_zfs.a
-am__DEPENDENCIES_11 = libkv.a $(am__append_26) $(am__DEPENDENCIES_1) \
+ at WITH_LIBZFS_TRUE@am__DEPENDENCIES_11 = libos_zfs.a
+ at WITH_SPDK_TRUE@am__DEPENDENCIES_12 = $(LIBSPDK_LIBS) \
+ at WITH_SPDK_TRUE@	$(am__DEPENDENCIES_1) $(am__DEPENDENCIES_1)
+am__DEPENDENCIES_13 = libkv.a $(am__append_27) $(am__DEPENDENCIES_1) \
 	$(am__DEPENDENCIES_1)
-am__DEPENDENCIES_12 = libos.a $(am__DEPENDENCIES_1) \
-	$(am__DEPENDENCIES_10) $(LIBOS_TYPES) $(am__DEPENDENCIES_11)
+am__DEPENDENCIES_14 = libos.a $(am__DEPENDENCIES_1) \
+	$(am__DEPENDENCIES_11) $(am__DEPENDENCIES_12) $(LIBOS_TYPES) \
+	$(am__DEPENDENCIES_13) $(am__DEPENDENCIES_1)
 @ENABLE_SERVER_TRUE@@WITH_LIBAIO_TRUE at ceph_bluefs_tool_DEPENDENCIES =  \
- at ENABLE_SERVER_TRUE@@WITH_LIBAIO_TRUE@	$(am__DEPENDENCIES_12) \
- at ENABLE_SERVER_TRUE@@WITH_LIBAIO_TRUE@	$(am__DEPENDENCIES_9)
+ at ENABLE_SERVER_TRUE@@WITH_LIBAIO_TRUE@	$(am__DEPENDENCIES_14) \
+ at ENABLE_SERVER_TRUE@@WITH_LIBAIO_TRUE@	$(am__DEPENDENCIES_10)
 am__ceph_client_debug_SOURCES_DIST = tools/ceph-client-debug.cc
 @ENABLE_CLIENT_TRUE@@WITH_CEPHFS_TRUE@@WITH_RADOS_TRUE at am_ceph_client_debug_OBJECTS = tools/ceph-client-debug.$(OBJEXT)
 ceph_client_debug_OBJECTS = $(am_ceph_client_debug_OBJECTS)
 @ENABLE_CLIENT_TRUE@@WITH_CEPHFS_TRUE@@WITH_RADOS_TRUE at ceph_client_debug_DEPENDENCIES = $(LIBCEPHFS) \
 @ENABLE_CLIENT_TRUE@@WITH_CEPHFS_TRUE@@WITH_RADOS_TRUE@	$(LIBCLIENT) \
- at ENABLE_CLIENT_TRUE@@WITH_CEPHFS_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_9) \
- at ENABLE_CLIENT_TRUE@@WITH_CEPHFS_TRUE@@WITH_RADOS_TRUE@	$(LIBCOMMON)
+ at ENABLE_CLIENT_TRUE@@WITH_CEPHFS_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_10) \
+ at ENABLE_CLIENT_TRUE@@WITH_CEPHFS_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_4)
 am_ceph_conf_OBJECTS = tools/ceph_conf.$(OBJEXT)
 ceph_conf_OBJECTS = $(am_ceph_conf_OBJECTS)
-ceph_conf_DEPENDENCIES = $(am__DEPENDENCIES_9)
+ceph_conf_DEPENDENCIES = $(am__DEPENDENCIES_10)
 am__ceph_dencoder_SOURCES_DIST = test/encoding/ceph_dencoder.cc \
 	mds/Capability.cc mds/MDSDaemon.cc mds/MDSRank.cc \
 	mds/Beacon.cc mds/locks.c mds/journal.cc mds/Server.cc \
@@ -3196,7 +3295,7 @@ am__ceph_dencoder_SOURCES_DIST = test/encoding/ceph_dencoder.cc \
 	perfglue/disabled_stubs.cc rgw/rgw_dencoder.cc rgw/rgw_acl.cc \
 	rgw/rgw_basic_types.cc rgw/rgw_common.cc rgw/rgw_env.cc \
 	rgw/rgw_json_enc.cc
-am__objects_38 = mds/ceph_dencoder-Capability.$(OBJEXT) \
+am__objects_41 = mds/ceph_dencoder-Capability.$(OBJEXT) \
 	mds/ceph_dencoder-MDSDaemon.$(OBJEXT) \
 	mds/ceph_dencoder-MDSRank.$(OBJEXT) \
 	mds/ceph_dencoder-Beacon.$(OBJEXT) \
@@ -3228,25 +3327,25 @@ am__objects_38 = mds/ceph_dencoder-Capability.$(OBJEXT) \
 	mds/ceph_dencoder-MDSContext.$(OBJEXT) \
 	mds/ceph_dencoder-MDSAuthCaps.$(OBJEXT) \
 	mds/ceph_dencoder-MDLog.$(OBJEXT)
- at ENABLE_CLIENT_TRUE@am__objects_39 = $(am__objects_38)
- at ENABLE_CLIENT_TRUE@am__objects_40 =  \
+ at ENABLE_CLIENT_TRUE@am__objects_42 = $(am__objects_41)
+ at ENABLE_CLIENT_TRUE@am__objects_43 =  \
 @ENABLE_CLIENT_TRUE@	rgw/ceph_dencoder-rgw_dencoder.$(OBJEXT) \
 @ENABLE_CLIENT_TRUE@	rgw/ceph_dencoder-rgw_acl.$(OBJEXT) \
 @ENABLE_CLIENT_TRUE@	rgw/ceph_dencoder-rgw_basic_types.$(OBJEXT) \
 @ENABLE_CLIENT_TRUE@	rgw/ceph_dencoder-rgw_common.$(OBJEXT) \
 @ENABLE_CLIENT_TRUE@	rgw/ceph_dencoder-rgw_env.$(OBJEXT) \
 @ENABLE_CLIENT_TRUE@	rgw/ceph_dencoder-rgw_json_enc.$(OBJEXT)
-am__objects_41 = $(am__objects_39) \
+am__objects_44 = $(am__objects_42) \
 	perfglue/ceph_dencoder-disabled_heap_profiler.$(OBJEXT) \
 	perfglue/ceph_dencoder-disabled_stubs.$(OBJEXT) \
-	$(am__objects_40)
+	$(am__objects_43)
 @ENABLE_CLIENT_TRUE at am_ceph_dencoder_OBJECTS = test/encoding/ceph_dencoder-ceph_dencoder.$(OBJEXT) \
- at ENABLE_CLIENT_TRUE@	$(am__objects_41)
+ at ENABLE_CLIENT_TRUE@	$(am__objects_44)
 ceph_dencoder_OBJECTS = $(am_ceph_dencoder_OBJECTS)
 @ENABLE_CLIENT_TRUE at ceph_dencoder_DEPENDENCIES = $(LIBRBD_TYPES) \
 @ENABLE_CLIENT_TRUE@	$(LIBOSD_TYPES) $(LIBOS_TYPES) \
 @ENABLE_CLIENT_TRUE@	$(LIBMON_TYPES) $(DENCODER_DEPS) \
- at ENABLE_CLIENT_TRUE@	$(am__DEPENDENCIES_9)
+ at ENABLE_CLIENT_TRUE@	$(am__DEPENDENCIES_10)
 ceph_dencoder_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 	$(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
 	$(ceph_dencoder_CXXFLAGS) $(CXXFLAGS) $(AM_LDFLAGS) $(LDFLAGS) \
@@ -3255,13 +3354,13 @@ am__ceph_fuse_SOURCES_DIST = ceph_fuse.cc
 @ENABLE_CLIENT_TRUE@@WITH_FUSE_TRUE@@WITH_RADOS_TRUE at am_ceph_fuse_OBJECTS = ceph_fuse.$(OBJEXT)
 ceph_fuse_OBJECTS = $(am_ceph_fuse_OBJECTS)
 @ENABLE_CLIENT_TRUE@@WITH_FUSE_TRUE@@WITH_RADOS_TRUE at ceph_fuse_DEPENDENCIES = $(LIBCLIENT_FUSE) \
- at ENABLE_CLIENT_TRUE@@WITH_FUSE_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_9)
+ at ENABLE_CLIENT_TRUE@@WITH_FUSE_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_10)
 am__ceph_kvstore_tool_SOURCES_DIST = tools/ceph_kvstore_tool.cc
 @ENABLE_SERVER_TRUE at am_ceph_kvstore_tool_OBJECTS = tools/ceph_kvstore_tool-ceph_kvstore_tool.$(OBJEXT)
 ceph_kvstore_tool_OBJECTS = $(am_ceph_kvstore_tool_OBJECTS)
 @ENABLE_SERVER_TRUE at ceph_kvstore_tool_DEPENDENCIES =  \
- at ENABLE_SERVER_TRUE@	$(am__DEPENDENCIES_12) \
- at ENABLE_SERVER_TRUE@	$(am__DEPENDENCIES_9)
+ at ENABLE_SERVER_TRUE@	$(am__DEPENDENCIES_14) \
+ at ENABLE_SERVER_TRUE@	$(am__DEPENDENCIES_10)
 ceph_kvstore_tool_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 	$(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
 	$(ceph_kvstore_tool_CXXFLAGS) $(CXXFLAGS) $(AM_LDFLAGS) \
@@ -3270,48 +3369,49 @@ am__ceph_mds_SOURCES_DIST = ceph_mds.cc
 @ENABLE_SERVER_TRUE@@WITH_MDS_TRUE at am_ceph_mds_OBJECTS =  \
 @ENABLE_SERVER_TRUE@@WITH_MDS_TRUE@	ceph_mds.$(OBJEXT)
 ceph_mds_OBJECTS = $(am_ceph_mds_OBJECTS)
-am__DEPENDENCIES_13 = libperfglue.la $(am__DEPENDENCIES_1) \
+am__DEPENDENCIES_15 = libperfglue.la $(am__DEPENDENCIES_1) \
 	$(am__DEPENDENCIES_1)
-am__DEPENDENCIES_14 = libmds.la $(am__DEPENDENCIES_1) \
-	$(am__DEPENDENCIES_13)
+am__DEPENDENCIES_16 = libmds.la $(am__DEPENDENCIES_1) \
+	$(am__DEPENDENCIES_15)
 @ENABLE_SERVER_TRUE@@WITH_MDS_TRUE at ceph_mds_DEPENDENCIES =  \
- at ENABLE_SERVER_TRUE@@WITH_MDS_TRUE@	$(am__DEPENDENCIES_14) \
+ at ENABLE_SERVER_TRUE@@WITH_MDS_TRUE@	$(am__DEPENDENCIES_16) \
 @ENABLE_SERVER_TRUE@@WITH_MDS_TRUE@	$(LIBOSDC) \
- at ENABLE_SERVER_TRUE@@WITH_MDS_TRUE@	$(am__DEPENDENCIES_9) \
- at ENABLE_SERVER_TRUE@@WITH_MDS_TRUE@	$(LIBCOMMON)
+ at ENABLE_SERVER_TRUE@@WITH_MDS_TRUE@	$(am__DEPENDENCIES_10) \
+ at ENABLE_SERVER_TRUE@@WITH_MDS_TRUE@	$(am__DEPENDENCIES_4)
 am__ceph_mon_SOURCES_DIST = ceph_mon.cc
 @ENABLE_SERVER_TRUE@@WITH_MON_TRUE at am_ceph_mon_OBJECTS =  \
 @ENABLE_SERVER_TRUE@@WITH_MON_TRUE@	ceph_mon.$(OBJEXT)
 ceph_mon_OBJECTS = $(am_ceph_mon_OBJECTS)
-am__DEPENDENCIES_15 = libmon.a $(am__DEPENDENCIES_1) \
-	$(am__DEPENDENCIES_13) $(LIBMON_TYPES)
+am__DEPENDENCIES_17 = libmon.a $(am__DEPENDENCIES_1) \
+	$(am__DEPENDENCIES_15) $(LIBMON_TYPES)
 @ENABLE_SERVER_TRUE@@WITH_MON_TRUE at ceph_mon_DEPENDENCIES =  \
- at ENABLE_SERVER_TRUE@@WITH_MON_TRUE@	$(am__DEPENDENCIES_15) \
- at ENABLE_SERVER_TRUE@@WITH_MON_TRUE@	$(am__DEPENDENCIES_12) \
- at ENABLE_SERVER_TRUE@@WITH_MON_TRUE@	$(am__DEPENDENCIES_9) \
- at ENABLE_SERVER_TRUE@@WITH_MON_TRUE@	$(LIBCOMMON) $(LIBAUTH) \
- at ENABLE_SERVER_TRUE@@WITH_MON_TRUE@	$(LIBCOMMON) \
+ at ENABLE_SERVER_TRUE@@WITH_MON_TRUE@	$(am__DEPENDENCIES_17) \
+ at ENABLE_SERVER_TRUE@@WITH_MON_TRUE@	$(am__DEPENDENCIES_14) \
+ at ENABLE_SERVER_TRUE@@WITH_MON_TRUE@	$(am__DEPENDENCIES_10) \
+ at ENABLE_SERVER_TRUE@@WITH_MON_TRUE@	$(am__DEPENDENCIES_4) \
+ at ENABLE_SERVER_TRUE@@WITH_MON_TRUE@	$(LIBAUTH) \
+ at ENABLE_SERVER_TRUE@@WITH_MON_TRUE@	$(am__DEPENDENCIES_4) \
 @ENABLE_SERVER_TRUE@@WITH_MON_TRUE@	$(LIBMON_TYPES)
 am__ceph_monstore_tool_SOURCES_DIST = tools/ceph_monstore_tool.cc
 @ENABLE_SERVER_TRUE at am_ceph_monstore_tool_OBJECTS =  \
 @ENABLE_SERVER_TRUE@	tools/ceph_monstore_tool.$(OBJEXT)
 ceph_monstore_tool_OBJECTS = $(am_ceph_monstore_tool_OBJECTS)
 @ENABLE_SERVER_TRUE at ceph_monstore_tool_DEPENDENCIES =  \
- at ENABLE_SERVER_TRUE@	$(am__DEPENDENCIES_12) \
- at ENABLE_SERVER_TRUE@	$(am__DEPENDENCIES_9) \
+ at ENABLE_SERVER_TRUE@	$(am__DEPENDENCIES_14) \
+ at ENABLE_SERVER_TRUE@	$(am__DEPENDENCIES_10) \
 @ENABLE_SERVER_TRUE@	$(am__DEPENDENCIES_1)
 am__ceph_objectstore_tool_SOURCES_DIST =  \
 	tools/ceph_objectstore_tool.cc tools/RadosDump.cc
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at am_ceph_objectstore_tool_OBJECTS = tools/ceph_objectstore_tool.$(OBJEXT) \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	tools/RadosDump.$(OBJEXT)
 ceph_objectstore_tool_OBJECTS = $(am_ceph_objectstore_tool_OBJECTS)
-am__DEPENDENCIES_16 = libosd.a $(am__DEPENDENCIES_1) $(LIBOSDC) \
-	$(am__DEPENDENCIES_12) $(am__DEPENDENCIES_13) $(LIBOSD_TYPES) \
+am__DEPENDENCIES_18 = libosd.a $(am__DEPENDENCIES_1) $(LIBOSDC) \
+	$(am__DEPENDENCIES_14) $(am__DEPENDENCIES_15) $(LIBOSD_TYPES) \
 	$(LIBOS_TYPES)
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at ceph_objectstore_tool_DEPENDENCIES =  \
- at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_16) \
- at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_12) \
- at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_9) \
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_18) \
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_14) \
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_10) \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_1) \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_1)
 am__ceph_osd_SOURCES_DIST = ceph_osd.cc
@@ -3319,39 +3419,39 @@ am__ceph_osd_SOURCES_DIST = ceph_osd.cc
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	ceph_osd.$(OBJEXT)
 ceph_osd_OBJECTS = $(am_ceph_osd_OBJECTS)
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at ceph_osd_DEPENDENCIES = $(LIBOSDC) \
- at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_16) \
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_18) \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(LIBOSD_TYPES) \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(LIBOS_TYPES) \
- at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_12) \
- at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_9) \
- at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(LIBCOMMON) \
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_14) \
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_10) \
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_4) \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_1)
 am__ceph_osdomap_tool_SOURCES_DIST = tools/ceph_osdomap_tool.cc
 @ENABLE_SERVER_TRUE at am_ceph_osdomap_tool_OBJECTS =  \
 @ENABLE_SERVER_TRUE@	tools/ceph_osdomap_tool.$(OBJEXT)
 ceph_osdomap_tool_OBJECTS = $(am_ceph_osdomap_tool_OBJECTS)
 @ENABLE_SERVER_TRUE at ceph_osdomap_tool_DEPENDENCIES =  \
- at ENABLE_SERVER_TRUE@	$(am__DEPENDENCIES_12) \
- at ENABLE_SERVER_TRUE@	$(am__DEPENDENCIES_9) \
+ at ENABLE_SERVER_TRUE@	$(am__DEPENDENCIES_14) \
+ at ENABLE_SERVER_TRUE@	$(am__DEPENDENCIES_10) \
 @ENABLE_SERVER_TRUE@	$(am__DEPENDENCIES_1)
 am__ceph_syn_SOURCES_DIST = ceph_syn.cc client/SyntheticClient.cc
 @ENABLE_CLIENT_TRUE at am_ceph_syn_OBJECTS = ceph_syn.$(OBJEXT) \
 @ENABLE_CLIENT_TRUE@	client/SyntheticClient.$(OBJEXT)
 ceph_syn_OBJECTS = $(am_ceph_syn_OBJECTS)
 @ENABLE_CLIENT_TRUE at ceph_syn_DEPENDENCIES = $(LIBCLIENT) \
- at ENABLE_CLIENT_TRUE@	$(am__DEPENDENCIES_9)
+ at ENABLE_CLIENT_TRUE@	$(am__DEPENDENCIES_10)
 am_ceph_bench_log_OBJECTS = test/bench_log.$(OBJEXT)
 ceph_bench_log_OBJECTS = $(am_ceph_bench_log_OBJECTS)
-ceph_bench_log_DEPENDENCIES = $(am__DEPENDENCIES_9)
+ceph_bench_log_DEPENDENCIES = $(am__DEPENDENCIES_10)
 am__ceph_erasure_code_SOURCES_DIST =  \
 	test/erasure-code/ceph_erasure_code.cc
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at am_ceph_erasure_code_OBJECTS = test/erasure-code/ceph_erasure_code.$(OBJEXT)
 ceph_erasure_code_OBJECTS = $(am_ceph_erasure_code_OBJECTS)
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at ceph_erasure_code_DEPENDENCIES =  \
- at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_16) \
- at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(LIBCOMMON) \
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_18) \
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_4) \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_1) \
- at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_9) \
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_10) \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_1)
 am__ceph_erasure_code_benchmark_SOURCES_DIST =  \
 	erasure-code/ErasureCode.cc \
@@ -3361,10 +3461,10 @@ am__ceph_erasure_code_benchmark_SOURCES_DIST =  \
 ceph_erasure_code_benchmark_OBJECTS =  \
 	$(am_ceph_erasure_code_benchmark_OBJECTS)
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at ceph_erasure_code_benchmark_DEPENDENCIES =  \
- at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_16) \
- at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(LIBCOMMON) \
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_18) \
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_4) \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_1) \
- at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_9) \
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_10) \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_1)
 am__ceph_erasure_code_non_regression_SOURCES_DIST =  \
 	test/erasure-code/ceph_erasure_code_non_regression.cc
@@ -3372,10 +3472,10 @@ am__ceph_erasure_code_non_regression_SOURCES_DIST =  \
 ceph_erasure_code_non_regression_OBJECTS =  \
 	$(am_ceph_erasure_code_non_regression_OBJECTS)
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at ceph_erasure_code_non_regression_DEPENDENCIES =  \
- at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_16) \
- at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(LIBCOMMON) \
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_18) \
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_4) \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_1) \
- at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_9) \
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_10) \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_1)
 am__ceph_kvstorebench_SOURCES_DIST = test/kv_store_bench.cc \
 	key_value_store/kv_flat_btree_async.cc
@@ -3384,27 +3484,27 @@ am__ceph_kvstorebench_SOURCES_DIST = test/kv_store_bench.cc \
 ceph_kvstorebench_OBJECTS = $(am_ceph_kvstorebench_OBJECTS)
 @ENABLE_CLIENT_TRUE@@LINUX_TRUE@@WITH_RADOS_TRUE at ceph_kvstorebench_DEPENDENCIES =  \
 @ENABLE_CLIENT_TRUE@@LINUX_TRUE@@WITH_RADOS_TRUE@	$(LIBRADOS) \
- at ENABLE_CLIENT_TRUE@@LINUX_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_9)
+ at ENABLE_CLIENT_TRUE@@LINUX_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_10)
 am__ceph_multi_stress_watch_SOURCES_DIST = test/multi_stress_watch.cc
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE at am_ceph_multi_stress_watch_OBJECTS = test/multi_stress_watch.$(OBJEXT)
 ceph_multi_stress_watch_OBJECTS =  \
 	$(am_ceph_multi_stress_watch_OBJECTS)
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE at ceph_multi_stress_watch_DEPENDENCIES =  \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(LIBRADOS) \
- at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_9) \
- at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_8)
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_10) \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_9)
 am__ceph_objectstore_bench_SOURCES_DIST = test/objectstore_bench.cc
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE at am_ceph_objectstore_bench_OBJECTS = test/objectstore_bench.$(OBJEXT)
 ceph_objectstore_bench_OBJECTS = $(am_ceph_objectstore_bench_OBJECTS)
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE at ceph_objectstore_bench_DEPENDENCIES =  \
- at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_12) \
- at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_9)
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_14) \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_10)
 am__ceph_omapbench_SOURCES_DIST = test/omap_bench.cc
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE at am_ceph_omapbench_OBJECTS = test/omap_bench.$(OBJEXT)
 ceph_omapbench_OBJECTS = $(am_ceph_omapbench_OBJECTS)
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE at ceph_omapbench_DEPENDENCIES =  \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(LIBRADOS) \
- at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_9)
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_10)
 am__ceph_perf_local_SOURCES_DIST = test/perf_local.cc \
 	test/perf_helper.cc
 @ENABLE_SERVER_TRUE at am_ceph_perf_local_OBJECTS =  \
@@ -3412,8 +3512,8 @@ am__ceph_perf_local_SOURCES_DIST = test/perf_local.cc \
 @ENABLE_SERVER_TRUE@	test/ceph_perf_local-perf_helper.$(OBJEXT)
 ceph_perf_local_OBJECTS = $(am_ceph_perf_local_OBJECTS)
 @ENABLE_SERVER_TRUE at ceph_perf_local_DEPENDENCIES =  \
- at ENABLE_SERVER_TRUE@	$(am__DEPENDENCIES_12) \
- at ENABLE_SERVER_TRUE@	$(am__DEPENDENCIES_9)
+ at ENABLE_SERVER_TRUE@	$(am__DEPENDENCIES_14) \
+ at ENABLE_SERVER_TRUE@	$(am__DEPENDENCIES_10)
 ceph_perf_local_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 	$(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
 	$(ceph_perf_local_CXXFLAGS) $(CXXFLAGS) $(AM_LDFLAGS) \
@@ -3422,14 +3522,14 @@ am__ceph_perf_msgr_client_SOURCES_DIST =  \
 	test/msgr/perf_msgr_client.cc
 @ENABLE_SERVER_TRUE at am_ceph_perf_msgr_client_OBJECTS = test/msgr/ceph_perf_msgr_client-perf_msgr_client.$(OBJEXT)
 ceph_perf_msgr_client_OBJECTS = $(am_ceph_perf_msgr_client_OBJECTS)
-am__DEPENDENCIES_17 = $(top_builddir)/src/gmock/lib/libgmock_main.la \
+am__DEPENDENCIES_19 = $(top_builddir)/src/gmock/lib/libgmock_main.la \
 	$(top_builddir)/src/gmock/lib/libgmock.la \
 	$(top_builddir)/src/gmock/gtest/lib/libgtest.la \
 	$(am__DEPENDENCIES_1) $(am__DEPENDENCIES_1)
 @ENABLE_SERVER_TRUE at ceph_perf_msgr_client_DEPENDENCIES =  \
- at ENABLE_SERVER_TRUE@	$(am__DEPENDENCIES_12) \
- at ENABLE_SERVER_TRUE@	$(am__DEPENDENCIES_17) \
- at ENABLE_SERVER_TRUE@	$(am__DEPENDENCIES_9)
+ at ENABLE_SERVER_TRUE@	$(am__DEPENDENCIES_14) \
+ at ENABLE_SERVER_TRUE@	$(am__DEPENDENCIES_19) \
+ at ENABLE_SERVER_TRUE@	$(am__DEPENDENCIES_10)
 ceph_perf_msgr_client_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 	$(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
 	$(ceph_perf_msgr_client_CXXFLAGS) $(CXXFLAGS) $(AM_LDFLAGS) \
@@ -3439,9 +3539,9 @@ am__ceph_perf_msgr_server_SOURCES_DIST =  \
 @ENABLE_SERVER_TRUE at am_ceph_perf_msgr_server_OBJECTS = test/msgr/ceph_perf_msgr_server-perf_msgr_server.$(OBJEXT)
 ceph_perf_msgr_server_OBJECTS = $(am_ceph_perf_msgr_server_OBJECTS)
 @ENABLE_SERVER_TRUE at ceph_perf_msgr_server_DEPENDENCIES =  \
- at ENABLE_SERVER_TRUE@	$(am__DEPENDENCIES_12) \
- at ENABLE_SERVER_TRUE@	$(am__DEPENDENCIES_17) \
- at ENABLE_SERVER_TRUE@	$(am__DEPENDENCIES_9)
+ at ENABLE_SERVER_TRUE@	$(am__DEPENDENCIES_14) \
+ at ENABLE_SERVER_TRUE@	$(am__DEPENDENCIES_19) \
+ at ENABLE_SERVER_TRUE@	$(am__DEPENDENCIES_10)
 ceph_perf_msgr_server_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 	$(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
 	$(ceph_perf_msgr_server_CXXFLAGS) $(CXXFLAGS) $(AM_LDFLAGS) \
@@ -3451,23 +3551,23 @@ am__ceph_perf_objectstore_SOURCES_DIST =  \
 @ENABLE_SERVER_TRUE at am_ceph_perf_objectstore_OBJECTS = test/objectstore/ceph_perf_objectstore-ObjectStoreTransactionBenchmark.$(OBJEXT)
 ceph_perf_objectstore_OBJECTS = $(am_ceph_perf_objectstore_OBJECTS)
 @ENABLE_SERVER_TRUE at ceph_perf_objectstore_DEPENDENCIES =  \
- at ENABLE_SERVER_TRUE@	$(am__DEPENDENCIES_12) \
- at ENABLE_SERVER_TRUE@	$(am__DEPENDENCIES_17) \
- at ENABLE_SERVER_TRUE@	$(am__DEPENDENCIES_9)
+ at ENABLE_SERVER_TRUE@	$(am__DEPENDENCIES_14) \
+ at ENABLE_SERVER_TRUE@	$(am__DEPENDENCIES_19) \
+ at ENABLE_SERVER_TRUE@	$(am__DEPENDENCIES_10)
 ceph_perf_objectstore_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 	$(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
 	$(ceph_perf_objectstore_CXXFLAGS) $(CXXFLAGS) $(AM_LDFLAGS) \
 	$(LDFLAGS) -o $@
 am_ceph_psim_OBJECTS = tools/psim.$(OBJEXT)
 ceph_psim_OBJECTS = $(am_ceph_psim_OBJECTS)
-ceph_psim_DEPENDENCIES = $(am__DEPENDENCIES_9)
+ceph_psim_DEPENDENCIES = $(am__DEPENDENCIES_10)
 am__ceph_radosacl_SOURCES_DIST = tools/radosacl.cc
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE at am_ceph_radosacl_OBJECTS =  \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	tools/radosacl.$(OBJEXT)
 ceph_radosacl_OBJECTS = $(am_ceph_radosacl_OBJECTS)
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE at ceph_radosacl_DEPENDENCIES =  \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(LIBRADOS) \
- at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_9)
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_10)
 am__ceph_rgw_jsonparser_SOURCES_DIST = rgw/rgw_jsonparser.cc \
 	rgw/rgw_common.cc rgw/rgw_env.cc rgw/rgw_json_enc.cc
 @ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE at am_ceph_rgw_jsonparser_OBJECTS = rgw/rgw_jsonparser.$(OBJEXT) \
@@ -3475,8 +3575,8 @@ am__ceph_rgw_jsonparser_SOURCES_DIST = rgw/rgw_jsonparser.cc \
 @ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE@	rgw/rgw_env.$(OBJEXT) \
 @ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE@	rgw/rgw_json_enc.$(OBJEXT)
 ceph_rgw_jsonparser_OBJECTS = $(am_ceph_rgw_jsonparser_OBJECTS)
-am__DEPENDENCIES_18 = librgw.la $(am__DEPENDENCIES_1)
- at ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE at am__DEPENDENCIES_19 = $(LIBRADOS) \
+am__DEPENDENCIES_20 = librgw.la $(am__DEPENDENCIES_1)
+ at ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE at am__DEPENDENCIES_21 = $(LIBRADOS) \
 @ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE@	libcls_rgw_client.la \
 @ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE@	libcls_log_client.a \
 @ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE@	libcls_statelog_client.a \
@@ -3486,28 +3586,28 @@ am__DEPENDENCIES_18 = librgw.la $(am__DEPENDENCIES_1)
 @ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE@	libcls_lock_client.la \
 @ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE@	libcls_refcount_client.la \
 @ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE@	libcls_version_client.a
-am__DEPENDENCIES_20 = $(am__DEPENDENCIES_19)
- at ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE at ceph_rgw_jsonparser_DEPENDENCIES = $(am__DEPENDENCIES_18) \
- at ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_20) \
- at ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_9)
+am__DEPENDENCIES_22 = $(am__DEPENDENCIES_21)
+ at ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE at ceph_rgw_jsonparser_DEPENDENCIES = $(am__DEPENDENCIES_20) \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_22) \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_10)
 am__ceph_rgw_multiparser_SOURCES_DIST = rgw/rgw_multiparser.cc
 @ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE at am_ceph_rgw_multiparser_OBJECTS = rgw/rgw_multiparser.$(OBJEXT)
 ceph_rgw_multiparser_OBJECTS = $(am_ceph_rgw_multiparser_OBJECTS)
- at ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE at ceph_rgw_multiparser_DEPENDENCIES = $(am__DEPENDENCIES_18) \
- at ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_20) \
- at ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_9)
+ at ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE at ceph_rgw_multiparser_DEPENDENCIES = $(am__DEPENDENCIES_20) \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_22) \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_10)
 am__ceph_scratchtool_SOURCES_DIST = tools/scratchtool.c
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE at am_ceph_scratchtool_OBJECTS = tools/scratchtool.$(OBJEXT)
 ceph_scratchtool_OBJECTS = $(am_ceph_scratchtool_OBJECTS)
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE at ceph_scratchtool_DEPENDENCIES =  \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(LIBRADOS) \
- at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_9)
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_10)
 am__ceph_scratchtoolpp_SOURCES_DIST = tools/scratchtoolpp.cc
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE at am_ceph_scratchtoolpp_OBJECTS = tools/scratchtoolpp.$(OBJEXT)
 ceph_scratchtoolpp_OBJECTS = $(am_ceph_scratchtoolpp_OBJECTS)
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE at ceph_scratchtoolpp_DEPENDENCIES =  \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(LIBRADOS) \
- at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_9)
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_10)
 am__ceph_smalliobench_SOURCES_DIST = test/bench/small_io_bench.cc \
 	test/bench/rados_backend.cc \
 	test/bench/detailed_stat_collector.cc test/bench/bencher.cc
@@ -3519,7 +3619,7 @@ ceph_smalliobench_OBJECTS = $(am_ceph_smalliobench_OBJECTS)
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE at ceph_smalliobench_DEPENDENCIES =  \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(LIBRADOS) \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_1) \
- at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_9)
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_10)
 am__ceph_smalliobenchdumb_SOURCES_DIST =  \
 	test/bench/small_io_bench_dumb.cc test/bench/dumb_backend.cc \
 	test/bench/detailed_stat_collector.cc test/bench/bencher.cc
@@ -3530,8 +3630,8 @@ am__ceph_smalliobenchdumb_SOURCES_DIST =  \
 ceph_smalliobenchdumb_OBJECTS = $(am_ceph_smalliobenchdumb_OBJECTS)
 @ENABLE_CLIENT_TRUE@@ENABLE_SERVER_TRUE@@WITH_RADOS_TRUE at ceph_smalliobenchdumb_DEPENDENCIES = $(LIBRADOS) \
 @ENABLE_CLIENT_TRUE@@ENABLE_SERVER_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_1) \
- at ENABLE_CLIENT_TRUE@@ENABLE_SERVER_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_12) \
- at ENABLE_CLIENT_TRUE@@ENABLE_SERVER_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_9)
+ at ENABLE_CLIENT_TRUE@@ENABLE_SERVER_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_14) \
+ at ENABLE_CLIENT_TRUE@@ENABLE_SERVER_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_10)
 am__ceph_smalliobenchfs_SOURCES_DIST =  \
 	test/bench/small_io_bench_fs.cc \
 	test/bench/testfilestore_backend.cc \
@@ -3543,8 +3643,8 @@ am__ceph_smalliobenchfs_SOURCES_DIST =  \
 ceph_smalliobenchfs_OBJECTS = $(am_ceph_smalliobenchfs_OBJECTS)
 @ENABLE_CLIENT_TRUE@@ENABLE_SERVER_TRUE@@WITH_RADOS_TRUE at ceph_smalliobenchfs_DEPENDENCIES = $(LIBRADOS) \
 @ENABLE_CLIENT_TRUE@@ENABLE_SERVER_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_1) \
- at ENABLE_CLIENT_TRUE@@ENABLE_SERVER_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_12) \
- at ENABLE_CLIENT_TRUE@@ENABLE_SERVER_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_9)
+ at ENABLE_CLIENT_TRUE@@ENABLE_SERVER_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_14) \
+ at ENABLE_CLIENT_TRUE@@ENABLE_SERVER_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_10)
 am__ceph_smalliobenchrbd_SOURCES_DIST =  \
 	test/bench/small_io_bench_rbd.cc test/bench/rbd_backend.cc \
 	test/bench/detailed_stat_collector.cc test/bench/bencher.cc
@@ -3557,15 +3657,15 @@ ceph_smalliobenchrbd_OBJECTS = $(am_ceph_smalliobenchrbd_OBJECTS)
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	$(LIBRBD) \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	$(LIBRADOS) \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	$(am__DEPENDENCIES_1) \
- at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	$(am__DEPENDENCIES_9)
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	$(am__DEPENDENCIES_10)
 am__ceph_test_async_driver_SOURCES_DIST =  \
 	test/msgr/test_async_driver.cc
 @ENABLE_SERVER_TRUE at am_ceph_test_async_driver_OBJECTS = test/msgr/ceph_test_async_driver-test_async_driver.$(OBJEXT)
 ceph_test_async_driver_OBJECTS = $(am_ceph_test_async_driver_OBJECTS)
 @ENABLE_SERVER_TRUE at ceph_test_async_driver_DEPENDENCIES =  \
- at ENABLE_SERVER_TRUE@	$(am__DEPENDENCIES_12) \
- at ENABLE_SERVER_TRUE@	$(am__DEPENDENCIES_17) \
- at ENABLE_SERVER_TRUE@	$(am__DEPENDENCIES_9)
+ at ENABLE_SERVER_TRUE@	$(am__DEPENDENCIES_14) \
+ at ENABLE_SERVER_TRUE@	$(am__DEPENDENCIES_19) \
+ at ENABLE_SERVER_TRUE@	$(am__DEPENDENCIES_10)
 ceph_test_async_driver_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 	$(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
 	$(ceph_test_async_driver_CXXFLAGS) $(CXXFLAGS) $(AM_LDFLAGS) \
@@ -3591,9 +3691,9 @@ ceph_test_cls_hello_OBJECTS = $(am_ceph_test_cls_hello_OBJECTS)
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE at ceph_test_cls_hello_DEPENDENCIES =  \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(LIBRADOS) \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_1) \
- at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_17) \
- at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_9) \
- at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_8)
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_19) \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_10) \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_9)
 ceph_test_cls_hello_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 	$(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
 	$(ceph_test_cls_hello_CXXFLAGS) $(CXXFLAGS) $(AM_LDFLAGS) \
@@ -3604,10 +3704,11 @@ am__ceph_test_cls_journal_SOURCES_DIST =  \
 ceph_test_cls_journal_OBJECTS = $(am_ceph_test_cls_journal_OBJECTS)
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE at ceph_test_cls_journal_DEPENDENCIES =  \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	libcls_journal_client.la \
- at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(LIBRADOS) $(LIBCOMMON) \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(LIBRADOS) \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_4) \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_1) \
- at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_17) \
- at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_8)
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_19) \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_9)
 ceph_test_cls_journal_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 	$(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
 	$(ceph_test_cls_journal_CXXFLAGS) $(CXXFLAGS) $(AM_LDFLAGS) \
@@ -3618,9 +3719,9 @@ ceph_test_cls_lock_OBJECTS = $(am_ceph_test_cls_lock_OBJECTS)
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE at ceph_test_cls_lock_DEPENDENCIES =  \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(LIBRADOS) \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	libcls_lock_client.la \
- at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(LIBCOMMON) \
- at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_17) \
- at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_8)
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_4) \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_19) \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_9)
 ceph_test_cls_lock_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 	$(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
 	$(ceph_test_cls_lock_CXXFLAGS) $(CXXFLAGS) $(AM_LDFLAGS) \
@@ -3631,9 +3732,9 @@ ceph_test_cls_log_OBJECTS = $(am_ceph_test_cls_log_OBJECTS)
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE at ceph_test_cls_log_DEPENDENCIES =  \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(LIBRADOS) \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	libcls_log_client.a \
- at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_17) \
- at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_9) \
- at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_8)
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_19) \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_10) \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_9)
 ceph_test_cls_log_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 	$(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
 	$(ceph_test_cls_log_CXXFLAGS) $(CXXFLAGS) $(AM_LDFLAGS) \
@@ -3645,9 +3746,9 @@ ceph_test_cls_numops_OBJECTS = $(am_ceph_test_cls_numops_OBJECTS)
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE at ceph_test_cls_numops_DEPENDENCIES =  \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(LIBRADOS) \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	libcls_numops_client.la \
- at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_17) \
- at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_9) \
- at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_8)
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_19) \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_10) \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_9)
 ceph_test_cls_numops_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 	$(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
 	$(ceph_test_cls_numops_CXXFLAGS) $(CXXFLAGS) $(AM_LDFLAGS) \
@@ -3659,9 +3760,9 @@ ceph_test_cls_rbd_OBJECTS = $(am_ceph_test_cls_rbd_OBJECTS)
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(LIBRADOS) \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	libcls_rbd_client.la \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	libcls_lock_client.la \
- at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(LIBCOMMON) \
- at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_17) \
- at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_8) \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_4) \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_19) \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_9) \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_1) \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_3)
 ceph_test_cls_rbd_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
@@ -3675,8 +3776,8 @@ ceph_test_cls_refcount_OBJECTS = $(am_ceph_test_cls_refcount_OBJECTS)
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE at ceph_test_cls_refcount_DEPENDENCIES =  \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(LIBRADOS) \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	libcls_refcount_client.la \
- at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_17) \
- at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_8)
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_19) \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_9)
 ceph_test_cls_refcount_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 	$(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
 	$(ceph_test_cls_refcount_CXXFLAGS) $(CXXFLAGS) $(AM_LDFLAGS) \
@@ -3689,9 +3790,9 @@ ceph_test_cls_replica_log_OBJECTS =  \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE at ceph_test_cls_replica_log_DEPENDENCIES =  \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(LIBRADOS) \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	libcls_replica_log_client.a \
- at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_17) \
- at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_9) \
- at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_8)
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_19) \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_10) \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_9)
 ceph_test_cls_replica_log_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 	$(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
 	$(ceph_test_cls_replica_log_CXXFLAGS) $(CXXFLAGS) \
@@ -3702,10 +3803,10 @@ ceph_test_cls_rgw_OBJECTS = $(am_ceph_test_cls_rgw_OBJECTS)
 @ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE at ceph_test_cls_rgw_DEPENDENCIES = $(LIBRADOS) \
 @ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_1) \
 @ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE@	libcls_rgw_client.la \
- at ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE@	$(LIBCOMMON) \
- at ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_17) \
- at ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_9) \
- at ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_8)
+ at ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_4) \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_19) \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_10) \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_9)
 ceph_test_cls_rgw_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 	$(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
 	$(ceph_test_cls_rgw_CXXFLAGS) $(CXXFLAGS) $(AM_LDFLAGS) \
@@ -3714,9 +3815,9 @@ am__ceph_test_cls_rgw_log_SOURCES_DIST = test/test_rgw_admin_log.cc
 @ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE at am_ceph_test_cls_rgw_log_OBJECTS = test/ceph_test_cls_rgw_log-test_rgw_admin_log.$(OBJEXT)
 ceph_test_cls_rgw_log_OBJECTS = $(am_ceph_test_cls_rgw_log_OBJECTS)
 @ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE at ceph_test_cls_rgw_log_DEPENDENCIES = $(LIBRADOS) \
- at ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_18) \
- at ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_9) \
- at ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_17) \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_20) \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_10) \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_19) \
 @ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_1) \
 @ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE@	libcls_version_client.a \
 @ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE@	libcls_log_client.a \
@@ -3733,9 +3834,9 @@ am__ceph_test_cls_rgw_meta_SOURCES_DIST = test/test_rgw_admin_meta.cc
 @ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE at am_ceph_test_cls_rgw_meta_OBJECTS = test/ceph_test_cls_rgw_meta-test_rgw_admin_meta.$(OBJEXT)
 ceph_test_cls_rgw_meta_OBJECTS = $(am_ceph_test_cls_rgw_meta_OBJECTS)
 @ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE at ceph_test_cls_rgw_meta_DEPENDENCIES = $(LIBRADOS) \
- at ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_18) \
- at ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_9) \
- at ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_17) \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_20) \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_10) \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_19) \
 @ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_1) \
 @ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE@	libcls_version_client.a \
 @ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE@	libcls_log_client.a \
@@ -3754,9 +3855,9 @@ am__ceph_test_cls_rgw_opstate_SOURCES_DIST =  \
 ceph_test_cls_rgw_opstate_OBJECTS =  \
 	$(am_ceph_test_cls_rgw_opstate_OBJECTS)
 @ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE at ceph_test_cls_rgw_opstate_DEPENDENCIES = $(LIBRADOS) \
- at ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_18) \
- at ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_9) \
- at ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_17) \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_20) \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_10) \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_19) \
 @ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_1) \
 @ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE@	libcls_version_client.a \
 @ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE@	libcls_log_client.a \
@@ -3778,9 +3879,9 @@ ceph_test_cls_statelog_OBJECTS = $(am_ceph_test_cls_statelog_OBJECTS)
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE at ceph_test_cls_statelog_DEPENDENCIES =  \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(LIBRADOS) \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	libcls_statelog_client.a \
- at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_17) \
- at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_9) \
- at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_8)
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_19) \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_10) \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_9)
 ceph_test_cls_statelog_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 	$(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
 	$(ceph_test_cls_statelog_CXXFLAGS) $(CXXFLAGS) $(AM_LDFLAGS) \
@@ -3792,8 +3893,8 @@ ceph_test_cls_version_OBJECTS = $(am_ceph_test_cls_version_OBJECTS)
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE at ceph_test_cls_version_DEPENDENCIES =  \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(LIBRADOS) \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	libcls_version_client.a \
- at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_17) \
- at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_8)
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_19) \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_9)
 ceph_test_cls_version_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 	$(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
 	$(ceph_test_cls_version_CXXFLAGS) $(CXXFLAGS) $(AM_LDFLAGS) \
@@ -3802,23 +3903,23 @@ am__ceph_test_cors_SOURCES_DIST = test/test_cors.cc
 @ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE at am_ceph_test_cors_OBJECTS = test/ceph_test_cors-test_cors.$(OBJEXT)
 ceph_test_cors_OBJECTS = $(am_ceph_test_cors_OBJECTS)
 @ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE at ceph_test_cors_DEPENDENCIES = $(LIBRADOS) \
- at ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_18) \
- at ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_9) \
- at ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_17)
+ at ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_20) \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_10) \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_19)
 ceph_test_cors_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 	$(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
 	$(ceph_test_cors_CXXFLAGS) $(CXXFLAGS) $(AM_LDFLAGS) \
 	$(LDFLAGS) -o $@
 am_ceph_test_crypto_OBJECTS = test/testcrypto.$(OBJEXT)
 ceph_test_crypto_OBJECTS = $(am_ceph_test_crypto_OBJECTS)
-ceph_test_crypto_DEPENDENCIES = $(am__DEPENDENCIES_9)
+ceph_test_crypto_DEPENDENCIES = $(am__DEPENDENCIES_10)
 am__ceph_test_filejournal_SOURCES_DIST = test/test_filejournal.cc
 @ENABLE_SERVER_TRUE at am_ceph_test_filejournal_OBJECTS = test/ceph_test_filejournal-test_filejournal.$(OBJEXT)
 ceph_test_filejournal_OBJECTS = $(am_ceph_test_filejournal_OBJECTS)
 @ENABLE_SERVER_TRUE at ceph_test_filejournal_DEPENDENCIES =  \
- at ENABLE_SERVER_TRUE@	$(am__DEPENDENCIES_12) \
- at ENABLE_SERVER_TRUE@	$(am__DEPENDENCIES_17) \
- at ENABLE_SERVER_TRUE@	$(am__DEPENDENCIES_9)
+ at ENABLE_SERVER_TRUE@	$(am__DEPENDENCIES_14) \
+ at ENABLE_SERVER_TRUE@	$(am__DEPENDENCIES_19) \
+ at ENABLE_SERVER_TRUE@	$(am__DEPENDENCIES_10)
 ceph_test_filejournal_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 	$(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
 	$(ceph_test_filejournal_CXXFLAGS) $(CXXFLAGS) $(AM_LDFLAGS) \
@@ -3828,9 +3929,9 @@ am__ceph_test_filestore_SOURCES_DIST =  \
 @ENABLE_SERVER_TRUE@@LINUX_TRUE at am_ceph_test_filestore_OBJECTS = test/filestore/ceph_test_filestore-TestFileStore.$(OBJEXT)
 ceph_test_filestore_OBJECTS = $(am_ceph_test_filestore_OBJECTS)
 @ENABLE_SERVER_TRUE@@LINUX_TRUE at ceph_test_filestore_DEPENDENCIES =  \
- at ENABLE_SERVER_TRUE@@LINUX_TRUE@	$(am__DEPENDENCIES_12) \
- at ENABLE_SERVER_TRUE@@LINUX_TRUE@	$(am__DEPENDENCIES_17) \
- at ENABLE_SERVER_TRUE@@LINUX_TRUE@	$(am__DEPENDENCIES_9)
+ at ENABLE_SERVER_TRUE@@LINUX_TRUE@	$(am__DEPENDENCIES_14) \
+ at ENABLE_SERVER_TRUE@@LINUX_TRUE@	$(am__DEPENDENCIES_19) \
+ at ENABLE_SERVER_TRUE@@LINUX_TRUE@	$(am__DEPENDENCIES_10)
 ceph_test_filestore_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 	$(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
 	$(ceph_test_filestore_CXXFLAGS) $(CXXFLAGS) $(AM_LDFLAGS) \
@@ -3845,8 +3946,8 @@ am__ceph_test_filestore_idempotent_SOURCES_DIST =  \
 ceph_test_filestore_idempotent_OBJECTS =  \
 	$(am_ceph_test_filestore_idempotent_OBJECTS)
 @ENABLE_SERVER_TRUE at ceph_test_filestore_idempotent_DEPENDENCIES =  \
- at ENABLE_SERVER_TRUE@	$(am__DEPENDENCIES_12) \
- at ENABLE_SERVER_TRUE@	$(am__DEPENDENCIES_9)
+ at ENABLE_SERVER_TRUE@	$(am__DEPENDENCIES_14) \
+ at ENABLE_SERVER_TRUE@	$(am__DEPENDENCIES_10)
 am__ceph_test_filestore_idempotent_sequence_SOURCES_DIST =  \
 	test/objectstore/test_idempotent_sequence.cc \
 	test/objectstore/DeterministicOpSequence.cc \
@@ -3859,15 +3960,16 @@ am__ceph_test_filestore_idempotent_sequence_SOURCES_DIST =  \
 ceph_test_filestore_idempotent_sequence_OBJECTS =  \
 	$(am_ceph_test_filestore_idempotent_sequence_OBJECTS)
 @ENABLE_SERVER_TRUE at ceph_test_filestore_idempotent_sequence_DEPENDENCIES =  \
- at ENABLE_SERVER_TRUE@	$(am__DEPENDENCIES_12) \
- at ENABLE_SERVER_TRUE@	$(am__DEPENDENCIES_9)
+ at ENABLE_SERVER_TRUE@	$(am__DEPENDENCIES_14) \
+ at ENABLE_SERVER_TRUE@	$(am__DEPENDENCIES_10)
 am__ceph_test_get_blkdev_size_SOURCES_DIST =  \
 	test/test_get_blkdev_size.cc
 @LINUX_TRUE at am_ceph_test_get_blkdev_size_OBJECTS =  \
 @LINUX_TRUE@	test/test_get_blkdev_size.$(OBJEXT)
 ceph_test_get_blkdev_size_OBJECTS =  \
 	$(am_ceph_test_get_blkdev_size_OBJECTS)
- at LINUX_TRUE@ceph_test_get_blkdev_size_DEPENDENCIES = $(LIBCOMMON)
+ at LINUX_TRUE@ceph_test_get_blkdev_size_DEPENDENCIES =  \
+ at LINUX_TRUE@	$(am__DEPENDENCIES_4)
 am__ceph_test_ioctls_SOURCES_DIST = client/test_ioctls.c
 @ENABLE_CLIENT_TRUE at am_ceph_test_ioctls_OBJECTS =  \
 @ENABLE_CLIENT_TRUE@	client/test_ioctls.$(OBJEXT)
@@ -3878,15 +3980,15 @@ am__ceph_test_keys_SOURCES_DIST = test/testkeys.cc
 @ENABLE_SERVER_TRUE@@WITH_MON_TRUE@	test/testkeys.$(OBJEXT)
 ceph_test_keys_OBJECTS = $(am_ceph_test_keys_OBJECTS)
 @ENABLE_SERVER_TRUE@@WITH_MON_TRUE at ceph_test_keys_DEPENDENCIES =  \
- at ENABLE_SERVER_TRUE@@WITH_MON_TRUE@	$(am__DEPENDENCIES_15) \
- at ENABLE_SERVER_TRUE@@WITH_MON_TRUE@	$(am__DEPENDENCIES_9)
+ at ENABLE_SERVER_TRUE@@WITH_MON_TRUE@	$(am__DEPENDENCIES_17) \
+ at ENABLE_SERVER_TRUE@@WITH_MON_TRUE@	$(am__DEPENDENCIES_10)
 am__ceph_test_keyvaluedb_SOURCES_DIST = test/objectstore/test_kv.cc
 @ENABLE_SERVER_TRUE@@LINUX_TRUE at am_ceph_test_keyvaluedb_OBJECTS = test/objectstore/ceph_test_keyvaluedb-test_kv.$(OBJEXT)
 ceph_test_keyvaluedb_OBJECTS = $(am_ceph_test_keyvaluedb_OBJECTS)
 @ENABLE_SERVER_TRUE@@LINUX_TRUE at ceph_test_keyvaluedb_DEPENDENCIES =  \
- at ENABLE_SERVER_TRUE@@LINUX_TRUE@	$(am__DEPENDENCIES_12) \
- at ENABLE_SERVER_TRUE@@LINUX_TRUE@	$(am__DEPENDENCIES_17) \
- at ENABLE_SERVER_TRUE@@LINUX_TRUE@	$(am__DEPENDENCIES_9)
+ at ENABLE_SERVER_TRUE@@LINUX_TRUE@	$(am__DEPENDENCIES_14) \
+ at ENABLE_SERVER_TRUE@@LINUX_TRUE@	$(am__DEPENDENCIES_19) \
+ at ENABLE_SERVER_TRUE@@LINUX_TRUE@	$(am__DEPENDENCIES_10)
 ceph_test_keyvaluedb_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 	$(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
 	$(ceph_test_keyvaluedb_CXXFLAGS) $(CXXFLAGS) $(AM_LDFLAGS) \
@@ -3897,9 +3999,9 @@ am__ceph_test_keyvaluedb_atomicity_SOURCES_DIST =  \
 ceph_test_keyvaluedb_atomicity_OBJECTS =  \
 	$(am_ceph_test_keyvaluedb_atomicity_OBJECTS)
 @ENABLE_SERVER_TRUE at ceph_test_keyvaluedb_atomicity_DEPENDENCIES =  \
- at ENABLE_SERVER_TRUE@	$(am__DEPENDENCIES_12) \
- at ENABLE_SERVER_TRUE@	$(am__DEPENDENCIES_17) \
- at ENABLE_SERVER_TRUE@	$(am__DEPENDENCIES_9)
+ at ENABLE_SERVER_TRUE@	$(am__DEPENDENCIES_14) \
+ at ENABLE_SERVER_TRUE@	$(am__DEPENDENCIES_19) \
+ at ENABLE_SERVER_TRUE@	$(am__DEPENDENCIES_10)
 ceph_test_keyvaluedb_atomicity_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 	$(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
 	$(ceph_test_keyvaluedb_atomicity_CXXFLAGS) $(CXXFLAGS) \
@@ -3912,9 +4014,9 @@ am__ceph_test_keyvaluedb_iterators_SOURCES_DIST =  \
 ceph_test_keyvaluedb_iterators_OBJECTS =  \
 	$(am_ceph_test_keyvaluedb_iterators_OBJECTS)
 @ENABLE_SERVER_TRUE at ceph_test_keyvaluedb_iterators_DEPENDENCIES =  \
- at ENABLE_SERVER_TRUE@	$(am__DEPENDENCIES_12) \
- at ENABLE_SERVER_TRUE@	$(am__DEPENDENCIES_17) \
- at ENABLE_SERVER_TRUE@	$(am__DEPENDENCIES_9)
+ at ENABLE_SERVER_TRUE@	$(am__DEPENDENCIES_14) \
+ at ENABLE_SERVER_TRUE@	$(am__DEPENDENCIES_19) \
+ at ENABLE_SERVER_TRUE@	$(am__DEPENDENCIES_10)
 ceph_test_keyvaluedb_iterators_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 	$(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
 	$(ceph_test_keyvaluedb_iterators_CXXFLAGS) $(CXXFLAGS) \
@@ -3923,19 +4025,19 @@ am__ceph_test_libcephfs_SOURCES_DIST = test/libcephfs/test.cc \
 	test/libcephfs/readdir_r_cb.cc test/libcephfs/caps.cc \
 	test/libcephfs/multiclient.cc test/libcephfs/access.cc \
 	test/libcephfs/acl.cc test/libcephfs/flock.cc
- at ENABLE_CLIENT_TRUE@@LINUX_TRUE@@WITH_CEPHFS_TRUE@@WITH_RADOS_TRUE at am__objects_42 = test/libcephfs/ceph_test_libcephfs-flock.$(OBJEXT)
+ at ENABLE_CLIENT_TRUE@@LINUX_TRUE@@WITH_CEPHFS_TRUE@@WITH_RADOS_TRUE at am__objects_45 = test/libcephfs/ceph_test_libcephfs-flock.$(OBJEXT)
 @ENABLE_CLIENT_TRUE@@WITH_CEPHFS_TRUE@@WITH_RADOS_TRUE at am_ceph_test_libcephfs_OBJECTS = test/libcephfs/ceph_test_libcephfs-test.$(OBJEXT) \
 @ENABLE_CLIENT_TRUE@@WITH_CEPHFS_TRUE@@WITH_RADOS_TRUE@	test/libcephfs/ceph_test_libcephfs-readdir_r_cb.$(OBJEXT) \
 @ENABLE_CLIENT_TRUE@@WITH_CEPHFS_TRUE@@WITH_RADOS_TRUE@	test/libcephfs/ceph_test_libcephfs-caps.$(OBJEXT) \
 @ENABLE_CLIENT_TRUE@@WITH_CEPHFS_TRUE@@WITH_RADOS_TRUE@	test/libcephfs/ceph_test_libcephfs-multiclient.$(OBJEXT) \
 @ENABLE_CLIENT_TRUE@@WITH_CEPHFS_TRUE@@WITH_RADOS_TRUE@	test/libcephfs/ceph_test_libcephfs-access.$(OBJEXT) \
 @ENABLE_CLIENT_TRUE@@WITH_CEPHFS_TRUE@@WITH_RADOS_TRUE@	test/libcephfs/ceph_test_libcephfs-acl.$(OBJEXT) \
- at ENABLE_CLIENT_TRUE@@WITH_CEPHFS_TRUE@@WITH_RADOS_TRUE@	$(am__objects_42)
+ at ENABLE_CLIENT_TRUE@@WITH_CEPHFS_TRUE@@WITH_RADOS_TRUE@	$(am__objects_45)
 ceph_test_libcephfs_OBJECTS = $(am_ceph_test_libcephfs_OBJECTS)
 @ENABLE_CLIENT_TRUE@@WITH_CEPHFS_TRUE@@WITH_RADOS_TRUE at ceph_test_libcephfs_DEPENDENCIES = $(LIBRADOS) \
 @ENABLE_CLIENT_TRUE@@WITH_CEPHFS_TRUE@@WITH_RADOS_TRUE@	$(LIBCEPHFS) \
- at ENABLE_CLIENT_TRUE@@WITH_CEPHFS_TRUE@@WITH_RADOS_TRUE@	$(LIBCOMMON) \
- at ENABLE_CLIENT_TRUE@@WITH_CEPHFS_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_17)
+ at ENABLE_CLIENT_TRUE@@WITH_CEPHFS_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_4) \
+ at ENABLE_CLIENT_TRUE@@WITH_CEPHFS_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_19)
 ceph_test_libcephfs_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 	$(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
 	$(ceph_test_libcephfs_CXXFLAGS) $(CXXFLAGS) $(AM_LDFLAGS) \
@@ -3952,10 +4054,10 @@ ceph_test_librbd_OBJECTS = $(am_ceph_test_librbd_OBJECTS)
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	libjournal.la \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	libcls_journal_client.la \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	librados_api.la \
- at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	$(am__DEPENDENCIES_6) \
- at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	$(am__DEPENDENCIES_17) \
- at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	$(am__DEPENDENCIES_9) \
- at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	$(am__DEPENDENCIES_8)
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	$(am__DEPENDENCIES_7) \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	$(am__DEPENDENCIES_19) \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	$(am__DEPENDENCIES_10) \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	$(am__DEPENDENCIES_9)
 ceph_test_librbd_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 	$(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
 	$(ceph_test_librbd_CXXFLAGS) $(CXXFLAGS) $(AM_LDFLAGS) \
@@ -3969,9 +4071,9 @@ ceph_test_librbd_api_OBJECTS = $(am_ceph_test_librbd_api_OBJECTS)
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE at ceph_test_librbd_api_DEPENDENCIES =  \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	$(LIBRBD) \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	$(LIBRADOS) \
- at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	$(LIBCOMMON) \
- at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	$(am__DEPENDENCIES_17) \
- at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	$(am__DEPENDENCIES_8)
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	$(am__DEPENDENCIES_4) \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	$(am__DEPENDENCIES_19) \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	$(am__DEPENDENCIES_9)
 ceph_test_librbd_api_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 	$(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
 	$(ceph_test_librbd_api_CXXFLAGS) $(CXXFLAGS) $(AM_LDFLAGS) \
@@ -3992,9 +4094,9 @@ am__ceph_test_mon_msg_SOURCES_DIST = test/mon/test-mon-msg.cc
 @ENABLE_SERVER_TRUE at am_ceph_test_mon_msg_OBJECTS = test/mon/ceph_test_mon_msg-test-mon-msg.$(OBJEXT)
 ceph_test_mon_msg_OBJECTS = $(am_ceph_test_mon_msg_OBJECTS)
 @ENABLE_SERVER_TRUE at ceph_test_mon_msg_DEPENDENCIES =  \
- at ENABLE_SERVER_TRUE@	$(am__DEPENDENCIES_12) $(LIBOSDC) \
- at ENABLE_SERVER_TRUE@	$(am__DEPENDENCIES_9) \
- at ENABLE_SERVER_TRUE@	$(am__DEPENDENCIES_17)
+ at ENABLE_SERVER_TRUE@	$(am__DEPENDENCIES_14) $(LIBOSDC) \
+ at ENABLE_SERVER_TRUE@	$(am__DEPENDENCIES_10) \
+ at ENABLE_SERVER_TRUE@	$(am__DEPENDENCIES_19)
 ceph_test_mon_msg_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 	$(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
 	$(ceph_test_mon_msg_CXXFLAGS) $(CXXFLAGS) $(AM_LDFLAGS) \
@@ -4006,15 +4108,15 @@ am__ceph_test_mon_workloadgen_SOURCES_DIST =  \
 ceph_test_mon_workloadgen_OBJECTS =  \
 	$(am_ceph_test_mon_workloadgen_OBJECTS)
 @ENABLE_SERVER_TRUE at ceph_test_mon_workloadgen_DEPENDENCIES =  \
- at ENABLE_SERVER_TRUE@	$(am__DEPENDENCIES_12) $(LIBOSDC) \
- at ENABLE_SERVER_TRUE@	$(am__DEPENDENCIES_9)
+ at ENABLE_SERVER_TRUE@	$(am__DEPENDENCIES_14) $(LIBOSDC) \
+ at ENABLE_SERVER_TRUE@	$(am__DEPENDENCIES_10)
 am__ceph_test_msgr_SOURCES_DIST = test/msgr/test_msgr.cc
 @ENABLE_SERVER_TRUE at am_ceph_test_msgr_OBJECTS = test/msgr/ceph_test_msgr-test_msgr.$(OBJEXT)
 ceph_test_msgr_OBJECTS = $(am_ceph_test_msgr_OBJECTS)
 @ENABLE_SERVER_TRUE at ceph_test_msgr_DEPENDENCIES =  \
- at ENABLE_SERVER_TRUE@	$(am__DEPENDENCIES_12) \
- at ENABLE_SERVER_TRUE@	$(am__DEPENDENCIES_17) \
- at ENABLE_SERVER_TRUE@	$(am__DEPENDENCIES_9)
+ at ENABLE_SERVER_TRUE@	$(am__DEPENDENCIES_14) \
+ at ENABLE_SERVER_TRUE@	$(am__DEPENDENCIES_19) \
+ at ENABLE_SERVER_TRUE@	$(am__DEPENDENCIES_10)
 ceph_test_msgr_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 	$(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
 	$(ceph_test_msgr_CXXFLAGS) $(CXXFLAGS) $(AM_LDFLAGS) \
@@ -4024,7 +4126,7 @@ am__ceph_test_mutate_SOURCES_DIST = test/test_mutate.cc
 ceph_test_mutate_OBJECTS = $(am_ceph_test_mutate_OBJECTS)
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE at ceph_test_mutate_DEPENDENCIES =  \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(LIBRADOS) \
- at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_9)
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_10)
 am__ceph_test_object_map_SOURCES_DIST =  \
 	test/ObjectMap/test_object_map.cc \
 	test/ObjectMap/KeyValueDBMemory.cc
@@ -4032,9 +4134,9 @@ am__ceph_test_object_map_SOURCES_DIST =  \
 @ENABLE_SERVER_TRUE@	test/ObjectMap/ceph_test_object_map-KeyValueDBMemory.$(OBJEXT)
 ceph_test_object_map_OBJECTS = $(am_ceph_test_object_map_OBJECTS)
 @ENABLE_SERVER_TRUE at ceph_test_object_map_DEPENDENCIES =  \
- at ENABLE_SERVER_TRUE@	$(am__DEPENDENCIES_12) \
- at ENABLE_SERVER_TRUE@	$(am__DEPENDENCIES_17) \
- at ENABLE_SERVER_TRUE@	$(am__DEPENDENCIES_9)
+ at ENABLE_SERVER_TRUE@	$(am__DEPENDENCIES_14) \
+ at ENABLE_SERVER_TRUE@	$(am__DEPENDENCIES_19) \
+ at ENABLE_SERVER_TRUE@	$(am__DEPENDENCIES_10)
 ceph_test_object_map_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 	$(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
 	$(ceph_test_object_map_CXXFLAGS) $(CXXFLAGS) $(AM_LDFLAGS) \
@@ -4045,15 +4147,15 @@ am_ceph_test_objectcacher_stress_OBJECTS =  \
 ceph_test_objectcacher_stress_OBJECTS =  \
 	$(am_ceph_test_objectcacher_stress_OBJECTS)
 ceph_test_objectcacher_stress_DEPENDENCIES = $(LIBOSDC) \
-	$(am__DEPENDENCIES_9)
+	$(am__DEPENDENCIES_10)
 am__ceph_test_objectstore_SOURCES_DIST =  \
 	test/objectstore/store_test.cc
 @ENABLE_SERVER_TRUE@@LINUX_TRUE at am_ceph_test_objectstore_OBJECTS = test/objectstore/ceph_test_objectstore-store_test.$(OBJEXT)
 ceph_test_objectstore_OBJECTS = $(am_ceph_test_objectstore_OBJECTS)
 @ENABLE_SERVER_TRUE@@LINUX_TRUE at ceph_test_objectstore_DEPENDENCIES =  \
- at ENABLE_SERVER_TRUE@@LINUX_TRUE@	$(am__DEPENDENCIES_12) \
- at ENABLE_SERVER_TRUE@@LINUX_TRUE@	$(am__DEPENDENCIES_17) \
- at ENABLE_SERVER_TRUE@@LINUX_TRUE@	$(am__DEPENDENCIES_9)
+ at ENABLE_SERVER_TRUE@@LINUX_TRUE@	$(am__DEPENDENCIES_14) \
+ at ENABLE_SERVER_TRUE@@LINUX_TRUE@	$(am__DEPENDENCIES_19) \
+ at ENABLE_SERVER_TRUE@@LINUX_TRUE@	$(am__DEPENDENCIES_10)
 ceph_test_objectstore_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 	$(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
 	$(ceph_test_objectstore_CXXFLAGS) $(CXXFLAGS) $(AM_LDFLAGS) \
@@ -4066,8 +4168,8 @@ am__ceph_test_objectstore_workloadgen_SOURCES_DIST =  \
 ceph_test_objectstore_workloadgen_OBJECTS =  \
 	$(am_ceph_test_objectstore_workloadgen_OBJECTS)
 @ENABLE_SERVER_TRUE at ceph_test_objectstore_workloadgen_DEPENDENCIES =  \
- at ENABLE_SERVER_TRUE@	$(am__DEPENDENCIES_12) \
- at ENABLE_SERVER_TRUE@	$(am__DEPENDENCIES_9)
+ at ENABLE_SERVER_TRUE@	$(am__DEPENDENCIES_14) \
+ at ENABLE_SERVER_TRUE@	$(am__DEPENDENCIES_10)
 am__ceph_test_rados_SOURCES_DIST = test/osd/TestRados.cc \
 	test/osd/TestOpStat.cc test/osd/Object.cc \
 	test/osd/RadosModel.cc
@@ -4078,15 +4180,16 @@ am__ceph_test_rados_SOURCES_DIST = test/osd/TestRados.cc \
 ceph_test_rados_OBJECTS = $(am_ceph_test_rados_OBJECTS)
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE at ceph_test_rados_DEPENDENCIES =  \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(LIBRADOS) \
- at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_9)
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_10)
 am__ceph_test_rados_api_aio_SOURCES_DIST = test/librados/aio.cc
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE at am_ceph_test_rados_api_aio_OBJECTS = test/librados/ceph_test_rados_api_aio-aio.$(OBJEXT)
 ceph_test_rados_api_aio_OBJECTS =  \
 	$(am_ceph_test_rados_api_aio_OBJECTS)
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE at ceph_test_rados_api_aio_DEPENDENCIES =  \
- at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(LIBRADOS) $(LIBCOMMON) \
- at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_17) \
- at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_8)
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(LIBRADOS) \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_4) \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_19) \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_9)
 ceph_test_rados_api_aio_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 	$(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
 	$(ceph_test_rados_api_aio_CXXFLAGS) $(CXXFLAGS) $(AM_LDFLAGS) \
@@ -4098,8 +4201,8 @@ ceph_test_rados_api_c_read_operations_OBJECTS =  \
 	$(am_ceph_test_rados_api_c_read_operations_OBJECTS)
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE at ceph_test_rados_api_c_read_operations_DEPENDENCIES =  \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(LIBRADOS) \
- at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_17) \
- at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_8)
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_19) \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_9)
 ceph_test_rados_api_c_read_operations_LINK = $(LIBTOOL) $(AM_V_lt) \
 	--tag=CXX $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link \
 	$(CXXLD) $(ceph_test_rados_api_c_read_operations_CXXFLAGS) \
@@ -4111,8 +4214,8 @@ ceph_test_rados_api_c_write_operations_OBJECTS =  \
 	$(am_ceph_test_rados_api_c_write_operations_OBJECTS)
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE at ceph_test_rados_api_c_write_operations_DEPENDENCIES =  \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(LIBRADOS) \
- at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_17) \
- at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_8)
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_19) \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_9)
 ceph_test_rados_api_c_write_operations_LINK = $(LIBTOOL) $(AM_V_lt) \
 	--tag=CXX $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link \
 	$(CXXLD) $(ceph_test_rados_api_c_write_operations_CXXFLAGS) \
@@ -4123,8 +4226,8 @@ ceph_test_rados_api_cls_OBJECTS =  \
 	$(am_ceph_test_rados_api_cls_OBJECTS)
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE at ceph_test_rados_api_cls_DEPENDENCIES =  \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(LIBRADOS) \
- at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_17) \
- at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_8)
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_19) \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_9)
 ceph_test_rados_api_cls_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 	$(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
 	$(ceph_test_rados_api_cls_CXXFLAGS) $(CXXFLAGS) $(AM_LDFLAGS) \
@@ -4134,10 +4237,11 @@ am__ceph_test_rados_api_cmd_SOURCES_DIST = test/librados/cmd.cc
 ceph_test_rados_api_cmd_OBJECTS =  \
 	$(am_ceph_test_rados_api_cmd_OBJECTS)
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE at ceph_test_rados_api_cmd_DEPENDENCIES =  \
- at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(LIBCOMMON) $(LIBRADOS) \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_4) \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(LIBRADOS) \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_1) \
- at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_17) \
- at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_8)
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_19) \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_9)
 ceph_test_rados_api_cmd_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 	$(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
 	$(ceph_test_rados_api_cmd_CXXFLAGS) $(CXXFLAGS) $(AM_LDFLAGS) \
@@ -4147,8 +4251,8 @@ am__ceph_test_rados_api_io_SOURCES_DIST = test/librados/io.cc
 ceph_test_rados_api_io_OBJECTS = $(am_ceph_test_rados_api_io_OBJECTS)
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE at ceph_test_rados_api_io_DEPENDENCIES =  \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(LIBRADOS) \
- at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_17) \
- at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_8)
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_19) \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_9)
 ceph_test_rados_api_io_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 	$(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
 	$(ceph_test_rados_api_io_CXXFLAGS) $(CXXFLAGS) $(AM_LDFLAGS) \
@@ -4159,8 +4263,8 @@ ceph_test_rados_api_list_OBJECTS =  \
 	$(am_ceph_test_rados_api_list_OBJECTS)
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE at ceph_test_rados_api_list_DEPENDENCIES =  \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(LIBRADOS) \
- at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_17) \
- at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_8)
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_19) \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_9)
 ceph_test_rados_api_list_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 	$(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
 	$(ceph_test_rados_api_list_CXXFLAGS) $(CXXFLAGS) $(AM_LDFLAGS) \
@@ -4171,8 +4275,8 @@ ceph_test_rados_api_lock_OBJECTS =  \
 	$(am_ceph_test_rados_api_lock_OBJECTS)
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE at ceph_test_rados_api_lock_DEPENDENCIES =  \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(LIBRADOS) \
- at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_17) \
- at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_8)
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_19) \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_9)
 ceph_test_rados_api_lock_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 	$(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
 	$(ceph_test_rados_api_lock_CXXFLAGS) $(CXXFLAGS) $(AM_LDFLAGS) \
@@ -4183,9 +4287,9 @@ ceph_test_rados_api_misc_OBJECTS =  \
 	$(am_ceph_test_rados_api_misc_OBJECTS)
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE at ceph_test_rados_api_misc_DEPENDENCIES =  \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(LIBRADOS) \
- at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_17) \
- at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_9) \
- at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_8)
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_19) \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_10) \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_9)
 ceph_test_rados_api_misc_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 	$(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
 	$(ceph_test_rados_api_misc_CXXFLAGS) $(CXXFLAGS) $(AM_LDFLAGS) \
@@ -4196,8 +4300,8 @@ ceph_test_rados_api_nlist_OBJECTS =  \
 	$(am_ceph_test_rados_api_nlist_OBJECTS)
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE at ceph_test_rados_api_nlist_DEPENDENCIES =  \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(LIBRADOS) \
- at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_17) \
- at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_8)
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_19) \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_9)
 ceph_test_rados_api_nlist_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 	$(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
 	$(ceph_test_rados_api_nlist_CXXFLAGS) $(CXXFLAGS) \
@@ -4208,8 +4312,8 @@ ceph_test_rados_api_pool_OBJECTS =  \
 	$(am_ceph_test_rados_api_pool_OBJECTS)
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE at ceph_test_rados_api_pool_DEPENDENCIES =  \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(LIBRADOS) \
- at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_17) \
- at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_8)
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_19) \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_9)
 ceph_test_rados_api_pool_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 	$(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
 	$(ceph_test_rados_api_pool_CXXFLAGS) $(CXXFLAGS) $(AM_LDFLAGS) \
@@ -4221,8 +4325,8 @@ ceph_test_rados_api_snapshots_OBJECTS =  \
 	$(am_ceph_test_rados_api_snapshots_OBJECTS)
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE at ceph_test_rados_api_snapshots_DEPENDENCIES =  \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(LIBRADOS) \
- at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_17) \
- at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_8)
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_19) \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_9)
 ceph_test_rados_api_snapshots_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 	$(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
 	$(ceph_test_rados_api_snapshots_CXXFLAGS) $(CXXFLAGS) \
@@ -4233,8 +4337,8 @@ ceph_test_rados_api_stat_OBJECTS =  \
 	$(am_ceph_test_rados_api_stat_OBJECTS)
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE at ceph_test_rados_api_stat_DEPENDENCIES =  \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(LIBRADOS) \
- at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_17) \
- at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_8)
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_19) \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_9)
 ceph_test_rados_api_stat_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 	$(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
 	$(ceph_test_rados_api_stat_CXXFLAGS) $(CXXFLAGS) $(AM_LDFLAGS) \
@@ -4247,9 +4351,9 @@ ceph_test_rados_api_tier_OBJECTS =  \
 	$(am_ceph_test_rados_api_tier_OBJECTS)
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE at ceph_test_rados_api_tier_DEPENDENCIES =  \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(LIBRADOS) \
- at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_17) \
- at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_9) \
- at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_8)
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_19) \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_10) \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_9)
 ceph_test_rados_api_tier_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 	$(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
 	$(ceph_test_rados_api_tier_CXXFLAGS) $(CXXFLAGS) $(AM_LDFLAGS) \
@@ -4261,8 +4365,8 @@ ceph_test_rados_api_watch_notify_OBJECTS =  \
 	$(am_ceph_test_rados_api_watch_notify_OBJECTS)
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE at ceph_test_rados_api_watch_notify_DEPENDENCIES =  \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(LIBRADOS) \
- at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_17) \
- at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_8)
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_19) \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_9)
 ceph_test_rados_api_watch_notify_LINK = $(LIBTOOL) $(AM_V_lt) \
 	--tag=CXX $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link \
 	$(CXXLD) $(ceph_test_rados_api_watch_notify_CXXFLAGS) \
@@ -4313,7 +4417,7 @@ ceph_test_rados_striper_api_aio_OBJECTS =  \
 	$(am_ceph_test_rados_striper_api_aio_OBJECTS)
 @ENABLE_CLIENT_TRUE@@WITH_RADOSSTRIPER_TRUE@@WITH_RADOS_TRUE at ceph_test_rados_striper_api_aio_DEPENDENCIES = $(LIBRADOS) \
 @ENABLE_CLIENT_TRUE@@WITH_RADOSSTRIPER_TRUE@@WITH_RADOS_TRUE@	$(LIBRADOSSTRIPER) \
- at ENABLE_CLIENT_TRUE@@WITH_RADOSSTRIPER_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_17) \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOSSTRIPER_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_19) \
 @ENABLE_CLIENT_TRUE@@WITH_RADOSSTRIPER_TRUE@@WITH_RADOS_TRUE@	$(RADOS_STRIPER_TEST_LDADD)
 ceph_test_rados_striper_api_aio_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 	$(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
@@ -4326,8 +4430,8 @@ ceph_test_rados_striper_api_io_OBJECTS =  \
 	$(am_ceph_test_rados_striper_api_io_OBJECTS)
 @ENABLE_CLIENT_TRUE@@WITH_RADOSSTRIPER_TRUE@@WITH_RADOS_TRUE at ceph_test_rados_striper_api_io_DEPENDENCIES = $(LIBRADOS) \
 @ENABLE_CLIENT_TRUE@@WITH_RADOSSTRIPER_TRUE@@WITH_RADOS_TRUE@	$(LIBRADOSSTRIPER) \
- at ENABLE_CLIENT_TRUE@@WITH_RADOSSTRIPER_TRUE@@WITH_RADOS_TRUE@	$(LIBCOMMON) \
- at ENABLE_CLIENT_TRUE@@WITH_RADOSSTRIPER_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_17) \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOSSTRIPER_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_4) \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOSSTRIPER_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_19) \
 @ENABLE_CLIENT_TRUE@@WITH_RADOSSTRIPER_TRUE@@WITH_RADOS_TRUE@	$(RADOS_STRIPER_TEST_LDADD)
 ceph_test_rados_striper_api_io_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 	$(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
@@ -4340,7 +4444,7 @@ ceph_test_rados_striper_api_striping_OBJECTS =  \
 	$(am_ceph_test_rados_striper_api_striping_OBJECTS)
 @ENABLE_CLIENT_TRUE@@WITH_RADOSSTRIPER_TRUE@@WITH_RADOS_TRUE at ceph_test_rados_striper_api_striping_DEPENDENCIES = $(LIBRADOS) \
 @ENABLE_CLIENT_TRUE@@WITH_RADOSSTRIPER_TRUE@@WITH_RADOS_TRUE@	$(LIBRADOSSTRIPER) \
- at ENABLE_CLIENT_TRUE@@WITH_RADOSSTRIPER_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_17) \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOSSTRIPER_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_19) \
 @ENABLE_CLIENT_TRUE@@WITH_RADOSSTRIPER_TRUE@@WITH_RADOS_TRUE@	$(RADOS_STRIPER_TEST_LDADD)
 ceph_test_rados_striper_api_striping_LINK = $(LIBTOOL) $(AM_V_lt) \
 	--tag=CXX $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link \
@@ -4364,11 +4468,34 @@ ceph_test_rados_watch_notify_OBJECTS =  \
 @ENABLE_CLIENT_TRUE@@LINUX_TRUE@@WITH_RADOS_TRUE@	$(LIBRADOS) \
 @ENABLE_CLIENT_TRUE@@LINUX_TRUE@@WITH_RADOS_TRUE@	libsystest.la \
 @ENABLE_CLIENT_TRUE@@LINUX_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_1)
+am__ceph_test_rbd_mirror_SOURCES_DIST = test/rbd_mirror/test_main.cc
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE at am_ceph_test_rbd_mirror_OBJECTS = test/rbd_mirror/ceph_test_rbd_mirror-test_main.$(OBJEXT)
+ceph_test_rbd_mirror_OBJECTS = $(am_ceph_test_rbd_mirror_OBJECTS)
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE at ceph_test_rbd_mirror_DEPENDENCIES = librbd_mirror_test.la \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	librbd_mirror_internal.la \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	librbd_internal.la \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	librbd_api.la \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	libjournal.la \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	librados_internal.la \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	libcls_rbd_client.la \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	libcls_lock_client.la \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	libcls_journal_client.la \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	$(LIBRBD_TYPES) \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	librados_api.la \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	$(am__DEPENDENCIES_7) \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	$(LIBOSDC) \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	$(am__DEPENDENCIES_19) \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	$(am__DEPENDENCIES_10) \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	$(am__DEPENDENCIES_9)
+ceph_test_rbd_mirror_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
+	$(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
+	$(ceph_test_rbd_mirror_CXXFLAGS) $(CXXFLAGS) $(AM_LDFLAGS) \
+	$(LDFLAGS) -o $@
 am_ceph_test_rewrite_latency_OBJECTS =  \
 	test/test_rewrite_latency.$(OBJEXT)
 ceph_test_rewrite_latency_OBJECTS =  \
 	$(am_ceph_test_rewrite_latency_OBJECTS)
-ceph_test_rewrite_latency_DEPENDENCIES = $(LIBCOMMON) \
+ceph_test_rewrite_latency_DEPENDENCIES = $(am__DEPENDENCIES_4) \
 	$(am__DEPENDENCIES_1) $(am__DEPENDENCIES_1) \
 	$(am__DEPENDENCIES_3)
 am__ceph_test_rgw_manifest_SOURCES_DIST =  \
@@ -4376,10 +4503,10 @@ am__ceph_test_rgw_manifest_SOURCES_DIST =  \
 @ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE at am_ceph_test_rgw_manifest_OBJECTS = test/rgw/ceph_test_rgw_manifest-test_rgw_manifest.$(OBJEXT)
 ceph_test_rgw_manifest_OBJECTS = $(am_ceph_test_rgw_manifest_OBJECTS)
 @ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE at ceph_test_rgw_manifest_DEPENDENCIES = $(LIBRADOS) \
- at ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_18) \
 @ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_20) \
- at ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_9) \
- at ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_17) \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_22) \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_10) \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_19) \
 @ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_1)
 ceph_test_rgw_manifest_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 	$(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
@@ -4389,10 +4516,10 @@ am__ceph_test_rgw_obj_SOURCES_DIST = test/rgw/test_rgw_obj.cc
 @ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE at am_ceph_test_rgw_obj_OBJECTS = test/rgw/ceph_test_rgw_obj-test_rgw_obj.$(OBJEXT)
 ceph_test_rgw_obj_OBJECTS = $(am_ceph_test_rgw_obj_OBJECTS)
 @ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE at ceph_test_rgw_obj_DEPENDENCIES = $(LIBRADOS) \
- at ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_18) \
 @ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_20) \
- at ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_9) \
- at ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_17) \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_22) \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_10) \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_19) \
 @ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_1)
 ceph_test_rgw_obj_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 	$(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
@@ -4402,14 +4529,14 @@ am_ceph_test_signal_handlers_OBJECTS =  \
 	test/TestSignalHandlers.$(OBJEXT)
 ceph_test_signal_handlers_OBJECTS =  \
 	$(am_ceph_test_signal_handlers_OBJECTS)
-ceph_test_signal_handlers_DEPENDENCIES = $(am__DEPENDENCIES_9)
+ceph_test_signal_handlers_DEPENDENCIES = $(am__DEPENDENCIES_10)
 am__ceph_test_snap_mapper_SOURCES_DIST = test/test_snap_mapper.cc
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at am_ceph_test_snap_mapper_OBJECTS = test/ceph_test_snap_mapper-test_snap_mapper.$(OBJEXT)
 ceph_test_snap_mapper_OBJECTS = $(am_ceph_test_snap_mapper_OBJECTS)
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at ceph_test_snap_mapper_DEPENDENCIES =  \
- at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_16) \
- at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_17) \
- at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_9)
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_18) \
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_19) \
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_10)
 ceph_test_snap_mapper_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 	$(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
 	$(ceph_test_snap_mapper_CXXFLAGS) $(CXXFLAGS) $(AM_LDFLAGS) \
@@ -4418,23 +4545,24 @@ am__ceph_test_stress_watch_SOURCES_DIST = test/test_stress_watch.cc
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE at am_ceph_test_stress_watch_OBJECTS = test/ceph_test_stress_watch-test_stress_watch.$(OBJEXT)
 ceph_test_stress_watch_OBJECTS = $(am_ceph_test_stress_watch_OBJECTS)
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE at ceph_test_stress_watch_DEPENDENCIES =  \
- at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(LIBRADOS) $(LIBCOMMON) \
- at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_17) \
- at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_8)
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(LIBRADOS) \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_4) \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_19) \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_9)
 ceph_test_stress_watch_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 	$(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
 	$(ceph_test_stress_watch_CXXFLAGS) $(CXXFLAGS) $(AM_LDFLAGS) \
 	$(LDFLAGS) -o $@
 am_ceph_test_timers_OBJECTS = test/TestTimers.$(OBJEXT)
 ceph_test_timers_OBJECTS = $(am_ceph_test_timers_OBJECTS)
-ceph_test_timers_DEPENDENCIES = $(am__DEPENDENCIES_9)
+ceph_test_timers_DEPENDENCIES = $(am__DEPENDENCIES_10)
 am__ceph_test_trans_SOURCES_DIST = test/test_trans.cc
 @ENABLE_SERVER_TRUE at am_ceph_test_trans_OBJECTS =  \
 @ENABLE_SERVER_TRUE@	test/test_trans.$(OBJEXT)
 ceph_test_trans_OBJECTS = $(am_ceph_test_trans_OBJECTS)
 @ENABLE_SERVER_TRUE at ceph_test_trans_DEPENDENCIES =  \
- at ENABLE_SERVER_TRUE@	$(am__DEPENDENCIES_12) \
- at ENABLE_SERVER_TRUE@	$(am__DEPENDENCIES_9)
+ at ENABLE_SERVER_TRUE@	$(am__DEPENDENCIES_14) \
+ at ENABLE_SERVER_TRUE@	$(am__DEPENDENCIES_10)
 am__ceph_tpbench_SOURCES_DIST = test/bench/tp_bench.cc \
 	test/bench/detailed_stat_collector.cc
 @ENABLE_CLIENT_TRUE@@ENABLE_SERVER_TRUE@@WITH_RADOS_TRUE at am_ceph_tpbench_OBJECTS = test/bench/tp_bench.$(OBJEXT) \
@@ -4442,15 +4570,15 @@ am__ceph_tpbench_SOURCES_DIST = test/bench/tp_bench.cc \
 ceph_tpbench_OBJECTS = $(am_ceph_tpbench_OBJECTS)
 @ENABLE_CLIENT_TRUE@@ENABLE_SERVER_TRUE@@WITH_RADOS_TRUE at ceph_tpbench_DEPENDENCIES = $(LIBRADOS) \
 @ENABLE_CLIENT_TRUE@@ENABLE_SERVER_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_1) \
- at ENABLE_CLIENT_TRUE@@ENABLE_SERVER_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_12) \
- at ENABLE_CLIENT_TRUE@@ENABLE_SERVER_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_9)
+ at ENABLE_CLIENT_TRUE@@ENABLE_SERVER_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_14) \
+ at ENABLE_CLIENT_TRUE@@ENABLE_SERVER_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_10)
 am__ceph_xattr_bench_SOURCES_DIST = test/xattr_bench.cc
 @ENABLE_SERVER_TRUE at am_ceph_xattr_bench_OBJECTS = test/ceph_xattr_bench-xattr_bench.$(OBJEXT)
 ceph_xattr_bench_OBJECTS = $(am_ceph_xattr_bench_OBJECTS)
 @ENABLE_SERVER_TRUE at ceph_xattr_bench_DEPENDENCIES =  \
- at ENABLE_SERVER_TRUE@	$(am__DEPENDENCIES_12) \
- at ENABLE_SERVER_TRUE@	$(am__DEPENDENCIES_17) \
- at ENABLE_SERVER_TRUE@	$(am__DEPENDENCIES_9)
+ at ENABLE_SERVER_TRUE@	$(am__DEPENDENCIES_14) \
+ at ENABLE_SERVER_TRUE@	$(am__DEPENDENCIES_19) \
+ at ENABLE_SERVER_TRUE@	$(am__DEPENDENCIES_10)
 ceph_xattr_bench_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 	$(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
 	$(ceph_xattr_bench_CXXFLAGS) $(CXXFLAGS) $(AM_LDFLAGS) \
@@ -4458,17 +4586,17 @@ ceph_xattr_bench_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 am__cephfs_SOURCES_DIST = cephfs.cc
 @ENABLE_CLIENT_TRUE@@WITH_CEPHFS_TRUE@@WITH_RADOS_TRUE at am_cephfs_OBJECTS = cephfs.$(OBJEXT)
 cephfs_OBJECTS = $(am_cephfs_OBJECTS)
- at ENABLE_CLIENT_TRUE@@WITH_CEPHFS_TRUE@@WITH_RADOS_TRUE at cephfs_DEPENDENCIES = $(LIBCOMMON)
+ at ENABLE_CLIENT_TRUE@@WITH_CEPHFS_TRUE@@WITH_RADOS_TRUE at cephfs_DEPENDENCIES = $(am__DEPENDENCIES_4)
 am__cephfs_data_scan_SOURCES_DIST = tools/cephfs/cephfs-data-scan.cc \
 	tools/cephfs/DataScan.cc tools/cephfs/MDSUtility.cc
 @ENABLE_CLIENT_TRUE@@ENABLE_SERVER_TRUE@@WITH_MDS_TRUE@@WITH_RADOS_TRUE at am_cephfs_data_scan_OBJECTS = tools/cephfs/cephfs-data-scan.$(OBJEXT) \
 @ENABLE_CLIENT_TRUE@@ENABLE_SERVER_TRUE@@WITH_MDS_TRUE@@WITH_RADOS_TRUE@	tools/cephfs/DataScan.$(OBJEXT) \
 @ENABLE_CLIENT_TRUE@@ENABLE_SERVER_TRUE@@WITH_MDS_TRUE@@WITH_RADOS_TRUE@	tools/cephfs/MDSUtility.$(OBJEXT)
 cephfs_data_scan_OBJECTS = $(am_cephfs_data_scan_OBJECTS)
- at ENABLE_CLIENT_TRUE@@ENABLE_SERVER_TRUE@@WITH_MDS_TRUE@@WITH_RADOS_TRUE at cephfs_data_scan_DEPENDENCIES = $(am__DEPENDENCIES_14) \
+ at ENABLE_CLIENT_TRUE@@ENABLE_SERVER_TRUE@@WITH_MDS_TRUE@@WITH_RADOS_TRUE at cephfs_data_scan_DEPENDENCIES = $(am__DEPENDENCIES_16) \
 @ENABLE_CLIENT_TRUE@@ENABLE_SERVER_TRUE@@WITH_MDS_TRUE@@WITH_RADOS_TRUE@	libcls_cephfs_client.la \
 @ENABLE_CLIENT_TRUE@@ENABLE_SERVER_TRUE@@WITH_MDS_TRUE@@WITH_RADOS_TRUE@	$(LIBRADOS) \
- at ENABLE_CLIENT_TRUE@@ENABLE_SERVER_TRUE@@WITH_MDS_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_9)
+ at ENABLE_CLIENT_TRUE@@ENABLE_SERVER_TRUE@@WITH_MDS_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_10)
 am__cephfs_journal_tool_SOURCES_DIST =  \
 	tools/cephfs/cephfs-journal-tool.cc \
 	tools/cephfs/JournalTool.cc tools/cephfs/JournalFilter.cc \
@@ -4484,9 +4612,9 @@ am__cephfs_journal_tool_SOURCES_DIST =  \
 @ENABLE_CLIENT_TRUE@@ENABLE_SERVER_TRUE@@WITH_MDS_TRUE@@WITH_RADOS_TRUE@	tools/cephfs/Resetter.$(OBJEXT) \
 @ENABLE_CLIENT_TRUE@@ENABLE_SERVER_TRUE@@WITH_MDS_TRUE@@WITH_RADOS_TRUE@	tools/cephfs/MDSUtility.$(OBJEXT)
 cephfs_journal_tool_OBJECTS = $(am_cephfs_journal_tool_OBJECTS)
- at ENABLE_CLIENT_TRUE@@ENABLE_SERVER_TRUE@@WITH_MDS_TRUE@@WITH_RADOS_TRUE at cephfs_journal_tool_DEPENDENCIES = $(am__DEPENDENCIES_14) \
+ at ENABLE_CLIENT_TRUE@@ENABLE_SERVER_TRUE@@WITH_MDS_TRUE@@WITH_RADOS_TRUE at cephfs_journal_tool_DEPENDENCIES = $(am__DEPENDENCIES_16) \
 @ENABLE_CLIENT_TRUE@@ENABLE_SERVER_TRUE@@WITH_MDS_TRUE@@WITH_RADOS_TRUE@	$(LIBRADOS) \
- at ENABLE_CLIENT_TRUE@@ENABLE_SERVER_TRUE@@WITH_MDS_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_9)
+ at ENABLE_CLIENT_TRUE@@ENABLE_SERVER_TRUE@@WITH_MDS_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_10)
 am__cephfs_table_tool_SOURCES_DIST =  \
 	tools/cephfs/cephfs-table-tool.cc tools/cephfs/TableTool.cc \
 	tools/cephfs/MDSUtility.cc
@@ -4494,41 +4622,41 @@ am__cephfs_table_tool_SOURCES_DIST =  \
 @ENABLE_CLIENT_TRUE@@ENABLE_SERVER_TRUE@@WITH_MDS_TRUE@@WITH_RADOS_TRUE@	tools/cephfs/TableTool.$(OBJEXT) \
 @ENABLE_CLIENT_TRUE@@ENABLE_SERVER_TRUE@@WITH_MDS_TRUE@@WITH_RADOS_TRUE@	tools/cephfs/MDSUtility.$(OBJEXT)
 cephfs_table_tool_OBJECTS = $(am_cephfs_table_tool_OBJECTS)
- at ENABLE_CLIENT_TRUE@@ENABLE_SERVER_TRUE@@WITH_MDS_TRUE@@WITH_RADOS_TRUE at cephfs_table_tool_DEPENDENCIES = $(am__DEPENDENCIES_14) \
+ at ENABLE_CLIENT_TRUE@@ENABLE_SERVER_TRUE@@WITH_MDS_TRUE@@WITH_RADOS_TRUE at cephfs_table_tool_DEPENDENCIES = $(am__DEPENDENCIES_16) \
 @ENABLE_CLIENT_TRUE@@ENABLE_SERVER_TRUE@@WITH_MDS_TRUE@@WITH_RADOS_TRUE@	$(LIBRADOS) \
- at ENABLE_CLIENT_TRUE@@ENABLE_SERVER_TRUE@@WITH_MDS_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_9)
+ at ENABLE_CLIENT_TRUE@@ENABLE_SERVER_TRUE@@WITH_MDS_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_10)
 am_crushtool_OBJECTS = tools/crushtool.$(OBJEXT)
 crushtool_OBJECTS = $(am_crushtool_OBJECTS)
-crushtool_DEPENDENCIES = $(am__DEPENDENCIES_9)
+crushtool_DEPENDENCIES = $(am__DEPENDENCIES_10)
 am__get_command_descriptions_SOURCES_DIST =  \
 	test/common/get_command_descriptions.cc
 @ENABLE_SERVER_TRUE@@WITH_MON_TRUE at am_get_command_descriptions_OBJECTS = test/common/get_command_descriptions.$(OBJEXT)
 get_command_descriptions_OBJECTS =  \
 	$(am_get_command_descriptions_OBJECTS)
 @ENABLE_SERVER_TRUE@@WITH_MON_TRUE at get_command_descriptions_DEPENDENCIES =  \
- at ENABLE_SERVER_TRUE@@WITH_MON_TRUE@	$(am__DEPENDENCIES_15) \
+ at ENABLE_SERVER_TRUE@@WITH_MON_TRUE@	$(am__DEPENDENCIES_17) \
 @ENABLE_SERVER_TRUE@@WITH_MON_TRUE@	$(LIBMON_TYPES) \
- at ENABLE_SERVER_TRUE@@WITH_MON_TRUE@	$(am__DEPENDENCIES_12) \
- at ENABLE_SERVER_TRUE@@WITH_MON_TRUE@	$(LIBCOMMON) \
- at ENABLE_SERVER_TRUE@@WITH_MON_TRUE@	$(am__DEPENDENCIES_9)
+ at ENABLE_SERVER_TRUE@@WITH_MON_TRUE@	$(am__DEPENDENCIES_14) \
+ at ENABLE_SERVER_TRUE@@WITH_MON_TRUE@	$(am__DEPENDENCIES_4) \
+ at ENABLE_SERVER_TRUE@@WITH_MON_TRUE@	$(am__DEPENDENCIES_10)
 am__librados_config_SOURCES_DIST = librados-config.cc
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE at am_librados_config_OBJECTS = librados-config.$(OBJEXT)
 librados_config_OBJECTS = $(am_librados_config_OBJECTS)
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE at librados_config_DEPENDENCIES =  \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(LIBRADOS) \
- at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_9)
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_10)
 am_monmaptool_OBJECTS = tools/monmaptool.$(OBJEXT)
 monmaptool_OBJECTS = $(am_monmaptool_OBJECTS)
-monmaptool_DEPENDENCIES = $(am__DEPENDENCIES_9)
+monmaptool_DEPENDENCIES = $(am__DEPENDENCIES_10)
 am__mount_ceph_SOURCES_DIST = mount/mount.ceph.c
 @ENABLE_SERVER_TRUE at am_mount_ceph_OBJECTS =  \
 @ENABLE_SERVER_TRUE@	mount/mount.ceph.$(OBJEXT)
 mount_ceph_OBJECTS = $(am_mount_ceph_OBJECTS)
 @ENABLE_SERVER_TRUE at mount_ceph_DEPENDENCIES = $(LIBSECRET) \
- at ENABLE_SERVER_TRUE@	$(LIBCOMMON)
+ at ENABLE_SERVER_TRUE@	$(am__DEPENDENCIES_4)
 am_osdmaptool_OBJECTS = tools/osdmaptool.$(OBJEXT)
 osdmaptool_OBJECTS = $(am_osdmaptool_OBJECTS)
-osdmaptool_DEPENDENCIES = $(am__DEPENDENCIES_9)
+osdmaptool_DEPENDENCIES = $(am__DEPENDENCIES_10)
 am__rados_SOURCES_DIST = tools/rados/rados.cc tools/RadosDump.cc \
 	tools/rados/RadosImport.cc tools/rados/PoolDump.cc \
 	common/obj_bencher.cc
@@ -4542,7 +4670,7 @@ rados_OBJECTS = $(am_rados_OBJECTS)
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	libcls_lock_client.la \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(LIBRADOS) \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(LIBRADOSSTRIPER) \
- at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_9)
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_10)
 am__radosgw_SOURCES_DIST = rgw/rgw_resolve.cc rgw/rgw_rest.cc \
 	rgw/rgw_rest_swift.cc rgw/rgw_rest_s3.cc rgw/rgw_rest_usage.cc \
 	rgw/rgw_rest_user.cc rgw/rgw_rest_bucket.cc \
@@ -4570,24 +4698,24 @@ am__radosgw_SOURCES_DIST = rgw/rgw_resolve.cc rgw/rgw_rest.cc \
 @ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE@	rgw/rgw_loadgen.$(OBJEXT) \
 @ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE@	rgw/rgw_main.$(OBJEXT)
 radosgw_OBJECTS = $(am_radosgw_OBJECTS)
- at ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE at radosgw_DEPENDENCIES = $(am__DEPENDENCIES_18) \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE at radosgw_DEPENDENCIES = $(am__DEPENDENCIES_20) \
 @ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE@	$(LIBCIVETWEB) \
- at ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_20) \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_22) \
 @ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_1) \
- at ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_9)
+ at ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_10)
 am__radosgw_admin_SOURCES_DIST = rgw/rgw_admin.cc rgw/rgw_orphan.cc
 @ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE at am_radosgw_admin_OBJECTS = rgw/rgw_admin.$(OBJEXT) \
 @ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE@	rgw/rgw_orphan.$(OBJEXT)
 radosgw_admin_OBJECTS = $(am_radosgw_admin_OBJECTS)
- at ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE at radosgw_admin_DEPENDENCIES = $(am__DEPENDENCIES_18) \
- at ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_20) \
- at ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_9)
+ at ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE at radosgw_admin_DEPENDENCIES = $(am__DEPENDENCIES_20) \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_22) \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_10)
 am__radosgw_object_expirer_SOURCES_DIST = rgw/rgw_object_expirer.cc
 @ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE at am_radosgw_object_expirer_OBJECTS = rgw/rgw_object_expirer.$(OBJEXT)
 radosgw_object_expirer_OBJECTS = $(am_radosgw_object_expirer_OBJECTS)
- at ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE at radosgw_object_expirer_DEPENDENCIES = $(am__DEPENDENCIES_18) \
- at ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_20) \
- at ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_9)
+ at ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE at radosgw_object_expirer_DEPENDENCIES = $(am__DEPENDENCIES_20) \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_22) \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_10)
 am__rbd_SOURCES_DIST = tools/rbd/rbd.cc tools/rbd/ArgumentTypes.cc \
 	tools/rbd/IndentStream.cc tools/rbd/OptionPrinter.cc \
 	tools/rbd/Shell.cc tools/rbd/Utils.cc \
@@ -4648,7 +4776,7 @@ rbd_OBJECTS = $(am_rbd_OBJECTS)
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	$(LIBRBD) \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	$(LIBRBD_TYPES) \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	$(LIBRADOS) \
- at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	$(am__DEPENDENCIES_9) \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	$(am__DEPENDENCIES_10) \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	$(am__DEPENDENCIES_1) \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	$(am__DEPENDENCIES_1)
 am__rbd_fuse_SOURCES_DIST = rbd_fuse/rbd-fuse.cc
@@ -4657,16 +4785,31 @@ rbd_fuse_OBJECTS = $(am_rbd_fuse_OBJECTS)
 @ENABLE_CLIENT_TRUE@@WITH_FUSE_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE at rbd_fuse_DEPENDENCIES = $(am__DEPENDENCIES_1) \
 @ENABLE_CLIENT_TRUE@@WITH_FUSE_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	$(LIBRBD) \
 @ENABLE_CLIENT_TRUE@@WITH_FUSE_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	$(LIBRADOS) \
- at ENABLE_CLIENT_TRUE@@WITH_FUSE_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	$(am__DEPENDENCIES_9)
+ at ENABLE_CLIENT_TRUE@@WITH_FUSE_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	$(am__DEPENDENCIES_10)
 rbd_fuse_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX $(AM_LIBTOOLFLAGS) \
 	$(LIBTOOLFLAGS) --mode=link $(CXXLD) $(rbd_fuse_CXXFLAGS) \
 	$(CXXFLAGS) $(AM_LDFLAGS) $(LDFLAGS) -o $@
+am__rbd_mirror_SOURCES_DIST = tools/rbd_mirror/main.cc
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE at am_rbd_mirror_OBJECTS = tools/rbd_mirror/main.$(OBJEXT)
+rbd_mirror_OBJECTS = $(am_rbd_mirror_OBJECTS)
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE at rbd_mirror_DEPENDENCIES = librbd_mirror_internal.la \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	librbd_internal.la \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	librbd_api.la \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	$(LIBRBD_TYPES) \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	libjournal.la \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	$(LIBRADOS) \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	$(LIBOSDC) \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	librados_internal.la \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	libcls_rbd_client.la \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	libcls_lock_client.la \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	libcls_journal_client.la \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	$(am__DEPENDENCIES_10)
 am__rbd_nbd_SOURCES_DIST = tools/rbd_nbd/rbd-nbd.cc
 @ENABLE_CLIENT_TRUE@@LINUX_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE at am_rbd_nbd_OBJECTS = tools/rbd_nbd/rbd_nbd-rbd-nbd.$(OBJEXT)
 rbd_nbd_OBJECTS = $(am_rbd_nbd_OBJECTS)
 @ENABLE_CLIENT_TRUE@@LINUX_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE at rbd_nbd_DEPENDENCIES = $(LIBRBD) \
 @ENABLE_CLIENT_TRUE@@LINUX_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	$(LIBRADOS) \
- at ENABLE_CLIENT_TRUE@@LINUX_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	$(am__DEPENDENCIES_9) \
+ at ENABLE_CLIENT_TRUE@@LINUX_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	$(am__DEPENDENCIES_10) \
 @ENABLE_CLIENT_TRUE@@LINUX_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	$(am__DEPENDENCIES_1)
 rbd_nbd_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX $(AM_LIBTOOLFLAGS) \
 	$(LIBTOOLFLAGS) --mode=link $(CXXLD) $(rbd_nbd_CXXFLAGS) \
@@ -4678,8 +4821,8 @@ rbd_replay_OBJECTS = $(am_rbd_replay_OBJECTS)
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	librbd_replay_types.la \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	$(LIBRBD) \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	$(LIBRADOS) \
- at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	$(am__DEPENDENCIES_9) \
- at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	$(LIBCOMMON)
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	$(am__DEPENDENCIES_10) \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	$(am__DEPENDENCIES_4)
 am__rbd_replay_prep_SOURCES_DIST = rbd_replay/rbd-replay-prep.cc
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE at am_rbd_replay_prep_OBJECTS = rbd_replay/rbd-replay-prep.$(OBJEXT)
 rbd_replay_prep_OBJECTS = $(am_rbd_replay_prep_OBJECTS)
@@ -4688,17 +4831,17 @@ rbd_replay_prep_OBJECTS = $(am_rbd_replay_prep_OBJECTS)
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	librbd_replay_types.la \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	$(LIBRBD) \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	$(LIBRADOS) \
- at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	$(am__DEPENDENCIES_9) \
- at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	$(LIBCOMMON)
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	$(am__DEPENDENCIES_10) \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	$(am__DEPENDENCIES_4)
 am__simple_client_SOURCES_DIST = test/messenger/simple_client.cc \
 	test/messenger/simple_dispatcher.cc
 @ENABLE_SERVER_TRUE@@ENABLE_XIO_TRUE at am_simple_client_OBJECTS = test/messenger/simple_client-simple_client.$(OBJEXT) \
 @ENABLE_SERVER_TRUE@@ENABLE_XIO_TRUE@	test/messenger/simple_client-simple_dispatcher.$(OBJEXT)
 simple_client_OBJECTS = $(am_simple_client_OBJECTS)
 @ENABLE_SERVER_TRUE@@ENABLE_XIO_TRUE at simple_client_DEPENDENCIES =  \
- at ENABLE_SERVER_TRUE@@ENABLE_XIO_TRUE@	$(am__DEPENDENCIES_12) \
- at ENABLE_SERVER_TRUE@@ENABLE_XIO_TRUE@	$(LIBCOMMON) \
- at ENABLE_SERVER_TRUE@@ENABLE_XIO_TRUE@	$(am__DEPENDENCIES_9) \
+ at ENABLE_SERVER_TRUE@@ENABLE_XIO_TRUE@	$(am__DEPENDENCIES_14) \
+ at ENABLE_SERVER_TRUE@@ENABLE_XIO_TRUE@	$(am__DEPENDENCIES_4) \
+ at ENABLE_SERVER_TRUE@@ENABLE_XIO_TRUE@	$(am__DEPENDENCIES_10) \
 @ENABLE_SERVER_TRUE@@ENABLE_XIO_TRUE@	$(am__DEPENDENCIES_1) \
 @ENABLE_SERVER_TRUE@@ENABLE_XIO_TRUE@	$(am__DEPENDENCIES_3) \
 @ENABLE_SERVER_TRUE@@ENABLE_XIO_TRUE@	$(am__DEPENDENCIES_1)
@@ -4712,9 +4855,9 @@ am__simple_server_SOURCES_DIST = test/messenger/simple_server.cc \
 @ENABLE_SERVER_TRUE@@ENABLE_XIO_TRUE@	test/messenger/simple_server-simple_dispatcher.$(OBJEXT)
 simple_server_OBJECTS = $(am_simple_server_OBJECTS)
 @ENABLE_SERVER_TRUE@@ENABLE_XIO_TRUE at simple_server_DEPENDENCIES =  \
- at ENABLE_SERVER_TRUE@@ENABLE_XIO_TRUE@	$(am__DEPENDENCIES_12) \
- at ENABLE_SERVER_TRUE@@ENABLE_XIO_TRUE@	$(LIBCOMMON) \
- at ENABLE_SERVER_TRUE@@ENABLE_XIO_TRUE@	$(am__DEPENDENCIES_9) \
+ at ENABLE_SERVER_TRUE@@ENABLE_XIO_TRUE@	$(am__DEPENDENCIES_14) \
+ at ENABLE_SERVER_TRUE@@ENABLE_XIO_TRUE@	$(am__DEPENDENCIES_4) \
+ at ENABLE_SERVER_TRUE@@ENABLE_XIO_TRUE@	$(am__DEPENDENCIES_10) \
 @ENABLE_SERVER_TRUE@@ENABLE_XIO_TRUE@	$(am__DEPENDENCIES_1) \
 @ENABLE_SERVER_TRUE@@ENABLE_XIO_TRUE@	$(am__DEPENDENCIES_3) \
 @ENABLE_SERVER_TRUE@@ENABLE_XIO_TRUE@	$(am__DEPENDENCIES_1)
@@ -4725,13 +4868,13 @@ simple_server_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 am__test_build_libcephfs_SOURCES_DIST = test/buildtest_skeleton.cc \
 	osdc/Objecter.cc osdc/ObjectCacher.cc osdc/Filer.cc \
 	osdc/Striper.cc osdc/Journaler.cc
-am__objects_43 = osdc/test_build_libcephfs-Objecter.$(OBJEXT) \
+am__objects_46 = osdc/test_build_libcephfs-Objecter.$(OBJEXT) \
 	osdc/test_build_libcephfs-ObjectCacher.$(OBJEXT) \
 	osdc/test_build_libcephfs-Filer.$(OBJEXT) \
 	osdc/test_build_libcephfs-Striper.$(OBJEXT) \
 	osdc/test_build_libcephfs-Journaler.$(OBJEXT)
 @ENABLE_CLIENT_TRUE@@WITH_BUILD_TESTS_TRUE@@WITH_CEPHFS_TRUE@@WITH_RADOS_TRUE at am_test_build_libcephfs_OBJECTS = test/test_build_libcephfs-buildtest_skeleton.$(OBJEXT) \
- at ENABLE_CLIENT_TRUE@@WITH_BUILD_TESTS_TRUE@@WITH_CEPHFS_TRUE@@WITH_RADOS_TRUE@	$(am__objects_43)
+ at ENABLE_CLIENT_TRUE@@WITH_BUILD_TESTS_TRUE@@WITH_CEPHFS_TRUE@@WITH_RADOS_TRUE@	$(am__objects_46)
 test_build_libcephfs_OBJECTS = $(am_test_build_libcephfs_OBJECTS)
 @ENABLE_CLIENT_TRUE@@WITH_BUILD_TESTS_TRUE@@WITH_CEPHFS_TRUE@@WITH_RADOS_TRUE at test_build_libcephfs_DEPENDENCIES = $(LIBCEPHFS) \
 @ENABLE_CLIENT_TRUE@@WITH_BUILD_TESTS_TRUE@@WITH_CEPHFS_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_1) \
@@ -4743,12 +4886,12 @@ test_build_libcephfs_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 	$(test_build_libcephfs_LDFLAGS) $(LDFLAGS) -o $@
 am__test_build_libcommon_SOURCES_DIST = test/buildtest_skeleton.cc \
 	common/buffer.cc
-am__objects_44 = common/test_build_libcommon-buffer.$(OBJEXT)
+am__objects_47 = common/test_build_libcommon-buffer.$(OBJEXT)
 @WITH_BUILD_TESTS_TRUE at am_test_build_libcommon_OBJECTS = test/test_build_libcommon-buildtest_skeleton.$(OBJEXT) \
- at WITH_BUILD_TESTS_TRUE@	$(am__objects_44)
+ at WITH_BUILD_TESTS_TRUE@	$(am__objects_47)
 test_build_libcommon_OBJECTS = $(am_test_build_libcommon_OBJECTS)
 @WITH_BUILD_TESTS_TRUE at test_build_libcommon_DEPENDENCIES =  \
- at WITH_BUILD_TESTS_TRUE@	$(am__DEPENDENCIES_4) \
+ at WITH_BUILD_TESTS_TRUE@	$(am__DEPENDENCIES_5) \
 @WITH_BUILD_TESTS_TRUE@	$(am__DEPENDENCIES_1) \
 @WITH_BUILD_TESTS_TRUE@	$(am__DEPENDENCIES_1) \
 @WITH_BUILD_TESTS_TRUE@	$(am__DEPENDENCIES_3)
@@ -4758,12 +4901,12 @@ test_build_libcommon_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 	$(test_build_libcommon_LDFLAGS) $(LDFLAGS) -o $@
 am__test_build_librados_SOURCES_DIST = test/buildtest_skeleton.cc \
 	common/buffer.cc librados/librados.cc
- at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE at am__objects_45 = common/test_build_librados-buffer.$(OBJEXT) \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE at am__objects_48 = common/test_build_librados-buffer.$(OBJEXT) \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	librados/test_build_librados-librados.$(OBJEXT)
 @ENABLE_CLIENT_TRUE@@WITH_BUILD_TESTS_TRUE@@WITH_RADOS_TRUE at am_test_build_librados_OBJECTS = test/test_build_librados-buildtest_skeleton.$(OBJEXT) \
- at ENABLE_CLIENT_TRUE@@WITH_BUILD_TESTS_TRUE@@WITH_RADOS_TRUE@	$(am__objects_45)
+ at ENABLE_CLIENT_TRUE@@WITH_BUILD_TESTS_TRUE@@WITH_RADOS_TRUE@	$(am__objects_48)
 test_build_librados_OBJECTS = $(am_test_build_librados_OBJECTS)
- at ENABLE_CLIENT_TRUE@@WITH_BUILD_TESTS_TRUE@@WITH_RADOS_TRUE at test_build_librados_DEPENDENCIES = $(am__DEPENDENCIES_6) \
+ at ENABLE_CLIENT_TRUE@@WITH_BUILD_TESTS_TRUE@@WITH_RADOS_TRUE at test_build_librados_DEPENDENCIES = $(am__DEPENDENCIES_7) \
 @ENABLE_CLIENT_TRUE@@WITH_BUILD_TESTS_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_1) \
 @ENABLE_CLIENT_TRUE@@WITH_BUILD_TESTS_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_1) \
 @ENABLE_CLIENT_TRUE@@WITH_BUILD_TESTS_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_3)
@@ -4775,17 +4918,17 @@ am__test_build_librgw_SOURCES_DIST = test/buildtest_skeleton.cc \
 	rgw/librgw.cc rgw/rgw_acl.cc rgw/rgw_acl_s3.cc \
 	rgw/rgw_acl_swift.cc rgw/rgw_client_io.cc rgw/rgw_fcgi.cc \
 	rgw/rgw_xml.cc rgw/rgw_usage.cc rgw/rgw_json_enc.cc \
-	rgw/rgw_user.cc rgw/rgw_bucket.cc rgw/rgw_tools.cc \
-	rgw/rgw_rados.cc rgw/rgw_http_client.cc rgw/rgw_rest_client.cc \
-	rgw/rgw_rest_conn.cc rgw/rgw_op.cc rgw/rgw_basic_types.cc \
-	rgw/rgw_common.cc rgw/rgw_cache.cc rgw/rgw_formats.cc \
-	rgw/rgw_log.cc rgw/rgw_multi.cc rgw/rgw_policy_s3.cc \
-	rgw/rgw_gc.cc rgw/rgw_multi_del.cc rgw/rgw_env.cc \
-	rgw/rgw_cors.cc rgw/rgw_cors_s3.cc rgw/rgw_auth_s3.cc \
-	rgw/rgw_metadata.cc rgw/rgw_replica_log.cc rgw/rgw_keystone.cc \
-	rgw/rgw_quota.cc rgw/rgw_dencoder.cc \
-	rgw/rgw_object_expirer_core.cc
- at ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE at am__objects_46 = rgw/test_build_librgw-librgw.$(OBJEXT) \
+	rgw/rgw_xml_enc.cc rgw/rgw_user.cc rgw/rgw_bucket.cc \
+	rgw/rgw_tools.cc rgw/rgw_rados.cc rgw/rgw_http_client.cc \
+	rgw/rgw_rest_client.cc rgw/rgw_rest_conn.cc rgw/rgw_op.cc \
+	rgw/rgw_basic_types.cc rgw/rgw_common.cc rgw/rgw_cache.cc \
+	rgw/rgw_formats.cc rgw/rgw_log.cc rgw/rgw_multi.cc \
+	rgw/rgw_policy_s3.cc rgw/rgw_gc.cc rgw/rgw_multi_del.cc \
+	rgw/rgw_env.cc rgw/rgw_cors.cc rgw/rgw_cors_s3.cc \
+	rgw/rgw_auth_s3.cc rgw/rgw_metadata.cc rgw/rgw_replica_log.cc \
+	rgw/rgw_keystone.cc rgw/rgw_quota.cc rgw/rgw_dencoder.cc \
+	rgw/rgw_object_expirer_core.cc rgw/rgw_website.cc
+ at ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE at am__objects_49 = rgw/test_build_librgw-librgw.$(OBJEXT) \
 @ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE@	rgw/test_build_librgw-rgw_acl.$(OBJEXT) \
 @ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE@	rgw/test_build_librgw-rgw_acl_s3.$(OBJEXT) \
 @ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE@	rgw/test_build_librgw-rgw_acl_swift.$(OBJEXT) \
@@ -4794,6 +4937,7 @@ am__test_build_librgw_SOURCES_DIST = test/buildtest_skeleton.cc \
 @ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE@	rgw/test_build_librgw-rgw_xml.$(OBJEXT) \
 @ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE@	rgw/test_build_librgw-rgw_usage.$(OBJEXT) \
 @ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE@	rgw/test_build_librgw-rgw_json_enc.$(OBJEXT) \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE@	rgw/test_build_librgw-rgw_xml_enc.$(OBJEXT) \
 @ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE@	rgw/test_build_librgw-rgw_user.$(OBJEXT) \
 @ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE@	rgw/test_build_librgw-rgw_bucket.$(OBJEXT) \
 @ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE@	rgw/test_build_librgw-rgw_tools.$(OBJEXT) \
@@ -4820,23 +4964,24 @@ am__test_build_librgw_SOURCES_DIST = test/buildtest_skeleton.cc \
 @ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE@	rgw/test_build_librgw-rgw_keystone.$(OBJEXT) \
 @ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE@	rgw/test_build_librgw-rgw_quota.$(OBJEXT) \
 @ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE@	rgw/test_build_librgw-rgw_dencoder.$(OBJEXT) \
- at ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE@	rgw/test_build_librgw-rgw_object_expirer_core.$(OBJEXT)
+ at ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE@	rgw/test_build_librgw-rgw_object_expirer_core.$(OBJEXT) \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE@	rgw/test_build_librgw-rgw_website.$(OBJEXT)
 @ENABLE_CLIENT_TRUE@@WITH_BUILD_TESTS_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE at am_test_build_librgw_OBJECTS = test/test_build_librgw-buildtest_skeleton.$(OBJEXT) \
- at ENABLE_CLIENT_TRUE@@WITH_BUILD_TESTS_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE@	$(am__objects_46)
+ at ENABLE_CLIENT_TRUE@@WITH_BUILD_TESTS_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE@	$(am__objects_49)
 test_build_librgw_OBJECTS = $(am_test_build_librgw_OBJECTS)
- at ENABLE_CLIENT_TRUE@@WITH_BUILD_TESTS_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE at test_build_librgw_DEPENDENCIES = $(am__DEPENDENCIES_20) \
+ at ENABLE_CLIENT_TRUE@@WITH_BUILD_TESTS_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE at test_build_librgw_DEPENDENCIES = $(am__DEPENDENCIES_22) \
 @ENABLE_CLIENT_TRUE@@WITH_BUILD_TESTS_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_1) \
 @ENABLE_CLIENT_TRUE@@WITH_BUILD_TESTS_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_1) \
 @ENABLE_CLIENT_TRUE@@WITH_BUILD_TESTS_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_3) \
- at ENABLE_CLIENT_TRUE@@WITH_BUILD_TESTS_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_9)
+ at ENABLE_CLIENT_TRUE@@WITH_BUILD_TESTS_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_10)
 test_build_librgw_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 	$(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
 	$(test_build_librgw_CXXFLAGS) $(CXXFLAGS) \
 	$(test_build_librgw_LDFLAGS) $(LDFLAGS) -o $@
 am_unittest_addrs_OBJECTS = test/unittest_addrs-test_addrs.$(OBJEXT)
 unittest_addrs_OBJECTS = $(am_unittest_addrs_OBJECTS)
-unittest_addrs_DEPENDENCIES = $(am__DEPENDENCIES_17) \
-	$(am__DEPENDENCIES_9)
+unittest_addrs_DEPENDENCIES = $(am__DEPENDENCIES_19) \
+	$(am__DEPENDENCIES_10)
 unittest_addrs_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 	$(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
 	$(unittest_addrs_CXXFLAGS) $(CXXFLAGS) $(AM_LDFLAGS) \
@@ -4844,16 +4989,16 @@ unittest_addrs_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 am_unittest_admin_socket_OBJECTS =  \
 	test/unittest_admin_socket-admin_socket.$(OBJEXT)
 unittest_admin_socket_OBJECTS = $(am_unittest_admin_socket_OBJECTS)
-unittest_admin_socket_DEPENDENCIES = $(am__DEPENDENCIES_17) \
-	$(am__DEPENDENCIES_9)
+unittest_admin_socket_DEPENDENCIES = $(am__DEPENDENCIES_19) \
+	$(am__DEPENDENCIES_10)
 unittest_admin_socket_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 	$(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
 	$(unittest_admin_socket_CXXFLAGS) $(CXXFLAGS) $(AM_LDFLAGS) \
 	$(LDFLAGS) -o $@
 am_unittest_arch_OBJECTS = test/unittest_arch-test_arch.$(OBJEXT)
 unittest_arch_OBJECTS = $(am_unittest_arch_OBJECTS)
-unittest_arch_DEPENDENCIES = $(am__DEPENDENCIES_17) \
-	$(am__DEPENDENCIES_9)
+unittest_arch_DEPENDENCIES = $(am__DEPENDENCIES_19) \
+	$(am__DEPENDENCIES_10)
 unittest_arch_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 	$(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
 	$(unittest_arch_CXXFLAGS) $(CXXFLAGS) $(AM_LDFLAGS) $(LDFLAGS) \
@@ -4861,8 +5006,8 @@ unittest_arch_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 am_unittest_async_compressor_OBJECTS = test/common/unittest_async_compressor-test_async_compressor.$(OBJEXT)
 unittest_async_compressor_OBJECTS =  \
 	$(am_unittest_async_compressor_OBJECTS)
-unittest_async_compressor_DEPENDENCIES = $(am__DEPENDENCIES_17) \
-	$(am__DEPENDENCIES_9) $(LIBCOMPRESSOR) $(LIBCOMMON)
+unittest_async_compressor_DEPENDENCIES = $(am__DEPENDENCIES_19) \
+	$(am__DEPENDENCIES_10) $(LIBCOMPRESSOR) $(am__DEPENDENCIES_4)
 unittest_async_compressor_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 	$(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
 	$(unittest_async_compressor_CXXFLAGS) $(CXXFLAGS) \
@@ -4871,8 +5016,8 @@ am__unittest_base64_SOURCES_DIST = test/base64.cc
 @ENABLE_CLIENT_TRUE@@WITH_CEPHFS_TRUE@@WITH_RADOS_TRUE at am_unittest_base64_OBJECTS = test/unittest_base64-base64.$(OBJEXT)
 unittest_base64_OBJECTS = $(am_unittest_base64_OBJECTS)
 @ENABLE_CLIENT_TRUE@@WITH_CEPHFS_TRUE@@WITH_RADOS_TRUE at unittest_base64_DEPENDENCIES = $(LIBCEPHFS) \
- at ENABLE_CLIENT_TRUE@@WITH_CEPHFS_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_9) \
- at ENABLE_CLIENT_TRUE@@WITH_CEPHFS_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_17)
+ at ENABLE_CLIENT_TRUE@@WITH_CEPHFS_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_10) \
+ at ENABLE_CLIENT_TRUE@@WITH_CEPHFS_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_19)
 unittest_base64_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 	$(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
 	$(unittest_base64_CXXFLAGS) $(CXXFLAGS) $(AM_LDFLAGS) \
@@ -4880,8 +5025,8 @@ unittest_base64_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 am_unittest_bit_vector_OBJECTS =  \
 	test/common/unittest_bit_vector-test_bit_vector.$(OBJEXT)
 unittest_bit_vector_OBJECTS = $(am_unittest_bit_vector_OBJECTS)
-unittest_bit_vector_DEPENDENCIES = $(am__DEPENDENCIES_17) \
-	$(am__DEPENDENCIES_9)
+unittest_bit_vector_DEPENDENCIES = $(am__DEPENDENCIES_19) \
+	$(am__DEPENDENCIES_10)
 unittest_bit_vector_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 	$(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
 	$(unittest_bit_vector_CXXFLAGS) $(CXXFLAGS) $(AM_LDFLAGS) \
@@ -4889,8 +5034,8 @@ unittest_bit_vector_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 am_unittest_blkdev_OBJECTS =  \
 	test/common/unittest_blkdev-test_blkdev.$(OBJEXT)
 unittest_blkdev_OBJECTS = $(am_unittest_blkdev_OBJECTS)
-unittest_blkdev_DEPENDENCIES = $(am__DEPENDENCIES_17) \
-	$(am__DEPENDENCIES_9)
+unittest_blkdev_DEPENDENCIES = $(am__DEPENDENCIES_19) \
+	$(am__DEPENDENCIES_10)
 unittest_blkdev_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 	$(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
 	$(unittest_blkdev_CXXFLAGS) $(CXXFLAGS) $(AM_LDFLAGS) \
@@ -4898,8 +5043,8 @@ unittest_blkdev_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 am_unittest_bloom_filter_OBJECTS =  \
 	test/common/unittest_bloom_filter-test_bloom_filter.$(OBJEXT)
 unittest_bloom_filter_OBJECTS = $(am_unittest_bloom_filter_OBJECTS)
-unittest_bloom_filter_DEPENDENCIES = $(am__DEPENDENCIES_17) \
-	$(am__DEPENDENCIES_9)
+unittest_bloom_filter_DEPENDENCIES = $(am__DEPENDENCIES_19) \
+	$(am__DEPENDENCIES_10)
 unittest_bloom_filter_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 	$(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
 	$(unittest_bloom_filter_CXXFLAGS) $(CXXFLAGS) $(AM_LDFLAGS) \
@@ -4908,9 +5053,9 @@ am__unittest_bluefs_SOURCES_DIST = test/objectstore/test_bluefs.cc
 @ENABLE_SERVER_TRUE@@LINUX_TRUE at am_unittest_bluefs_OBJECTS = test/objectstore/unittest_bluefs-test_bluefs.$(OBJEXT)
 unittest_bluefs_OBJECTS = $(am_unittest_bluefs_OBJECTS)
 @ENABLE_SERVER_TRUE@@LINUX_TRUE at unittest_bluefs_DEPENDENCIES =  \
- at ENABLE_SERVER_TRUE@@LINUX_TRUE@	$(am__DEPENDENCIES_12) \
- at ENABLE_SERVER_TRUE@@LINUX_TRUE@	$(am__DEPENDENCIES_17) \
- at ENABLE_SERVER_TRUE@@LINUX_TRUE@	$(am__DEPENDENCIES_9)
+ at ENABLE_SERVER_TRUE@@LINUX_TRUE@	$(am__DEPENDENCIES_14) \
+ at ENABLE_SERVER_TRUE@@LINUX_TRUE@	$(am__DEPENDENCIES_19) \
+ at ENABLE_SERVER_TRUE@@LINUX_TRUE@	$(am__DEPENDENCIES_10)
 unittest_bluefs_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 	$(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
 	$(unittest_bluefs_CXXFLAGS) $(CXXFLAGS) $(AM_LDFLAGS) \
@@ -4921,9 +5066,9 @@ am__unittest_bluestore_types_SOURCES_DIST =  \
 unittest_bluestore_types_OBJECTS =  \
 	$(am_unittest_bluestore_types_OBJECTS)
 @ENABLE_SERVER_TRUE@@LINUX_TRUE at unittest_bluestore_types_DEPENDENCIES =  \
- at ENABLE_SERVER_TRUE@@LINUX_TRUE@	$(am__DEPENDENCIES_12) \
- at ENABLE_SERVER_TRUE@@LINUX_TRUE@	$(am__DEPENDENCIES_17) \
- at ENABLE_SERVER_TRUE@@LINUX_TRUE@	$(am__DEPENDENCIES_9)
+ at ENABLE_SERVER_TRUE@@LINUX_TRUE@	$(am__DEPENDENCIES_14) \
+ at ENABLE_SERVER_TRUE@@LINUX_TRUE@	$(am__DEPENDENCIES_19) \
+ at ENABLE_SERVER_TRUE@@LINUX_TRUE@	$(am__DEPENDENCIES_10)
 unittest_bluestore_types_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 	$(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
 	$(unittest_bluestore_types_CXXFLAGS) $(CXXFLAGS) $(AM_LDFLAGS) \
@@ -4931,8 +5076,8 @@ unittest_bluestore_types_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 am_unittest_bufferlist_OBJECTS =  \
 	test/unittest_bufferlist-bufferlist.$(OBJEXT)
 unittest_bufferlist_OBJECTS = $(am_unittest_bufferlist_OBJECTS)
-unittest_bufferlist_DEPENDENCIES = $(am__DEPENDENCIES_17) \
-	$(am__DEPENDENCIES_9)
+unittest_bufferlist_DEPENDENCIES = $(am__DEPENDENCIES_19) \
+	$(am__DEPENDENCIES_10)
 unittest_bufferlist_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 	$(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
 	$(unittest_bufferlist_CXXFLAGS) $(CXXFLAGS) $(AM_LDFLAGS) \
@@ -4940,8 +5085,8 @@ unittest_bufferlist_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 am_unittest_ceph_argparse_OBJECTS =  \
 	test/unittest_ceph_argparse-ceph_argparse.$(OBJEXT)
 unittest_ceph_argparse_OBJECTS = $(am_unittest_ceph_argparse_OBJECTS)
-unittest_ceph_argparse_DEPENDENCIES = $(am__DEPENDENCIES_17) \
-	$(am__DEPENDENCIES_9)
+unittest_ceph_argparse_DEPENDENCIES = $(am__DEPENDENCIES_19) \
+	$(am__DEPENDENCIES_10)
 unittest_ceph_argparse_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 	$(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
 	$(unittest_ceph_argparse_CXXFLAGS) $(CXXFLAGS) $(AM_LDFLAGS) \
@@ -4950,8 +5095,8 @@ am_unittest_ceph_compatset_OBJECTS =  \
 	test/unittest_ceph_compatset-ceph_compatset.$(OBJEXT)
 unittest_ceph_compatset_OBJECTS =  \
 	$(am_unittest_ceph_compatset_OBJECTS)
-unittest_ceph_compatset_DEPENDENCIES = $(am__DEPENDENCIES_17) \
-	$(am__DEPENDENCIES_9)
+unittest_ceph_compatset_DEPENDENCIES = $(am__DEPENDENCIES_19) \
+	$(am__DEPENDENCIES_10)
 unittest_ceph_compatset_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 	$(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
 	$(unittest_ceph_compatset_CXXFLAGS) $(CXXFLAGS) $(AM_LDFLAGS) \
@@ -4959,8 +5104,8 @@ unittest_ceph_compatset_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 am_unittest_ceph_crypto_OBJECTS =  \
 	test/unittest_ceph_crypto-ceph_crypto.$(OBJEXT)
 unittest_ceph_crypto_OBJECTS = $(am_unittest_ceph_crypto_OBJECTS)
-unittest_ceph_crypto_DEPENDENCIES = $(am__DEPENDENCIES_17) \
-	$(am__DEPENDENCIES_9)
+unittest_ceph_crypto_DEPENDENCIES = $(am__DEPENDENCIES_19) \
+	$(am__DEPENDENCIES_10)
 unittest_ceph_crypto_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 	$(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
 	$(unittest_ceph_crypto_CXXFLAGS) $(CXXFLAGS) $(AM_LDFLAGS) \
@@ -4970,9 +5115,9 @@ am__unittest_chain_xattr_SOURCES_DIST =  \
 @ENABLE_SERVER_TRUE at am_unittest_chain_xattr_OBJECTS = test/objectstore/unittest_chain_xattr-chain_xattr.$(OBJEXT)
 unittest_chain_xattr_OBJECTS = $(am_unittest_chain_xattr_OBJECTS)
 @ENABLE_SERVER_TRUE at unittest_chain_xattr_DEPENDENCIES =  \
- at ENABLE_SERVER_TRUE@	$(am__DEPENDENCIES_12) \
- at ENABLE_SERVER_TRUE@	$(am__DEPENDENCIES_17) \
- at ENABLE_SERVER_TRUE@	$(am__DEPENDENCIES_9)
+ at ENABLE_SERVER_TRUE@	$(am__DEPENDENCIES_14) \
+ at ENABLE_SERVER_TRUE@	$(am__DEPENDENCIES_19) \
+ at ENABLE_SERVER_TRUE@	$(am__DEPENDENCIES_10)
 unittest_chain_xattr_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 	$(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
 	$(unittest_chain_xattr_CXXFLAGS) $(CXXFLAGS) $(AM_LDFLAGS) \
@@ -4985,10 +5130,10 @@ am__unittest_compression_plugin_SOURCES_DIST =  \
 unittest_compression_plugin_OBJECTS =  \
 	$(am_unittest_compression_plugin_OBJECTS)
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at unittest_compression_plugin_DEPENDENCIES =  \
- at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_16) \
- at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(LIBCOMMON) \
- at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_17) \
- at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_9) \
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_18) \
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_4) \
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_19) \
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_10) \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_1)
 unittest_compression_plugin_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 	$(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
@@ -4998,17 +5143,17 @@ am__unittest_compression_plugin_snappy_SOURCES_DIST =  \
 	test/compressor/test_compression_plugin_snappy.cc \
 	compressor/Compressor.cc \
 	compressor/snappy/CompressionPluginSnappy.cc
-am__objects_47 = compressor/unittest_compression_plugin_snappy-Compressor.$(OBJEXT) \
+am__objects_50 = compressor/unittest_compression_plugin_snappy-Compressor.$(OBJEXT) \
 	compressor/snappy/unittest_compression_plugin_snappy-CompressionPluginSnappy.$(OBJEXT)
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at am_unittest_compression_plugin_snappy_OBJECTS = test/compressor/unittest_compression_plugin_snappy-test_compression_plugin_snappy.$(OBJEXT) \
- at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__objects_47)
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__objects_50)
 unittest_compression_plugin_snappy_OBJECTS =  \
 	$(am_unittest_compression_plugin_snappy_OBJECTS)
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at unittest_compression_plugin_snappy_DEPENDENCIES =  \
- at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_16) \
- at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(LIBCOMMON) \
- at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_17) \
- at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_9) \
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_18) \
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_4) \
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_19) \
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_10) \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(LIBCOMPRESSOR) \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_1)
 unittest_compression_plugin_snappy_LINK = $(LIBTOOL) $(AM_V_lt) \
@@ -5016,32 +5161,79 @@ unittest_compression_plugin_snappy_LINK = $(LIBTOOL) $(AM_V_lt) \
 	$(CXXLD) $(unittest_compression_plugin_snappy_CXXFLAGS) \
 	$(CXXFLAGS) $(unittest_compression_plugin_snappy_LDFLAGS) \
 	$(LDFLAGS) -o $@
-am__unittest_compression_snappy_SOURCES_DIST =  \
-	test/compressor/test_compression_snappy.cc \
+am__unittest_compression_plugin_zlib_SOURCES_DIST =  \
+	test/compressor/test_compression_plugin_zlib.cc \
 	compressor/Compressor.cc \
-	compressor/snappy/CompressionPluginSnappy.cc
-am__objects_48 =  \
+	compressor/zlib/CompressionPluginZlib.cc \
+	compressor/zlib/CompressionZlib.cc
+am__objects_51 = compressor/unittest_compression_plugin_zlib-Compressor.$(OBJEXT) \
+	compressor/zlib/unittest_compression_plugin_zlib-CompressionPluginZlib.$(OBJEXT) \
+	compressor/zlib/unittest_compression_plugin_zlib-CompressionZlib.$(OBJEXT)
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at am_unittest_compression_plugin_zlib_OBJECTS = test/compressor/unittest_compression_plugin_zlib-test_compression_plugin_zlib.$(OBJEXT) \
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__objects_51)
+unittest_compression_plugin_zlib_OBJECTS =  \
+	$(am_unittest_compression_plugin_zlib_OBJECTS)
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at unittest_compression_plugin_zlib_DEPENDENCIES =  \
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_18) \
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_4) \
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_19) \
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_10) \
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(LIBCOMPRESSOR) \
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_1)
+unittest_compression_plugin_zlib_LINK = $(LIBTOOL) $(AM_V_lt) \
+	--tag=CXX $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link \
+	$(CXXLD) $(unittest_compression_plugin_zlib_CXXFLAGS) \
+	$(CXXFLAGS) $(unittest_compression_plugin_zlib_LDFLAGS) \
+	$(LDFLAGS) -o $@
+am__unittest_compression_snappy_SOURCES_DIST =  \
+	test/compressor/test_compression_snappy.cc \
+	compressor/Compressor.cc \
+	compressor/snappy/CompressionPluginSnappy.cc
+am__objects_52 =  \
 	compressor/unittest_compression_snappy-Compressor.$(OBJEXT) \
 	compressor/snappy/unittest_compression_snappy-CompressionPluginSnappy.$(OBJEXT)
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at am_unittest_compression_snappy_OBJECTS = test/compressor/unittest_compression_snappy-test_compression_snappy.$(OBJEXT) \
- at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__objects_48)
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__objects_52)
 unittest_compression_snappy_OBJECTS =  \
 	$(am_unittest_compression_snappy_OBJECTS)
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at unittest_compression_snappy_DEPENDENCIES =  \
- at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_16) \
- at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(LIBCOMMON) \
- at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_17) \
- at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_9) \
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_18) \
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_4) \
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_19) \
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_10) \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_1)
 unittest_compression_snappy_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 	$(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
 	$(unittest_compression_snappy_CXXFLAGS) $(CXXFLAGS) \
 	$(unittest_compression_snappy_LDFLAGS) $(LDFLAGS) -o $@
+am__unittest_compression_zlib_SOURCES_DIST =  \
+	test/compressor/test_compression_zlib.cc \
+	compressor/Compressor.cc \
+	compressor/zlib/CompressionPluginZlib.cc \
+	compressor/zlib/CompressionZlib.cc
+am__objects_53 =  \
+	compressor/unittest_compression_zlib-Compressor.$(OBJEXT) \
+	compressor/zlib/unittest_compression_zlib-CompressionPluginZlib.$(OBJEXT) \
+	compressor/zlib/unittest_compression_zlib-CompressionZlib.$(OBJEXT)
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at am_unittest_compression_zlib_OBJECTS = test/compressor/unittest_compression_zlib-test_compression_zlib.$(OBJEXT) \
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__objects_53)
+unittest_compression_zlib_OBJECTS =  \
+	$(am_unittest_compression_zlib_OBJECTS)
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at unittest_compression_zlib_DEPENDENCIES =  \
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_18) \
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_4) \
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_19) \
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_10) \
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_1)
+unittest_compression_zlib_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
+	$(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
+	$(unittest_compression_zlib_CXXFLAGS) $(CXXFLAGS) \
+	$(unittest_compression_zlib_LDFLAGS) $(LDFLAGS) -o $@
 am_unittest_config_OBJECTS =  \
 	test/common/unittest_config-test_config.$(OBJEXT)
 unittest_config_OBJECTS = $(am_unittest_config_OBJECTS)
-unittest_config_DEPENDENCIES = $(am__DEPENDENCIES_17) \
-	$(am__DEPENDENCIES_9)
+unittest_config_DEPENDENCIES = $(am__DEPENDENCIES_19) \
+	$(am__DEPENDENCIES_10)
 unittest_config_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 	$(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
 	$(unittest_config_CXXFLAGS) $(CXXFLAGS) $(AM_LDFLAGS) \
@@ -5049,8 +5241,8 @@ unittest_config_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 am_unittest_confutils_OBJECTS =  \
 	test/unittest_confutils-confutils.$(OBJEXT)
 unittest_confutils_OBJECTS = $(am_unittest_confutils_OBJECTS)
-unittest_confutils_DEPENDENCIES = $(am__DEPENDENCIES_17) \
-	$(am__DEPENDENCIES_9)
+unittest_confutils_DEPENDENCIES = $(am__DEPENDENCIES_19) \
+	$(am__DEPENDENCIES_10)
 unittest_confutils_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 	$(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
 	$(unittest_confutils_CXXFLAGS) $(CXXFLAGS) $(AM_LDFLAGS) \
@@ -5058,8 +5250,8 @@ unittest_confutils_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 am_unittest_context_OBJECTS =  \
 	test/common/unittest_context-test_context.$(OBJEXT)
 unittest_context_OBJECTS = $(am_unittest_context_OBJECTS)
-unittest_context_DEPENDENCIES = $(am__DEPENDENCIES_17) \
-	$(am__DEPENDENCIES_9)
+unittest_context_DEPENDENCIES = $(am__DEPENDENCIES_19) \
+	$(am__DEPENDENCIES_10)
 unittest_context_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 	$(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
 	$(unittest_context_CXXFLAGS) $(CXXFLAGS) $(AM_LDFLAGS) \
@@ -5067,16 +5259,17 @@ unittest_context_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 am_unittest_crc32c_OBJECTS =  \
 	test/common/unittest_crc32c-test_crc32c.$(OBJEXT)
 unittest_crc32c_OBJECTS = $(am_unittest_crc32c_OBJECTS)
-unittest_crc32c_DEPENDENCIES = $(am__DEPENDENCIES_17) \
-	$(am__DEPENDENCIES_9)
+unittest_crc32c_DEPENDENCIES = $(am__DEPENDENCIES_19) \
+	$(am__DEPENDENCIES_10)
 unittest_crc32c_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 	$(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
 	$(unittest_crc32c_CXXFLAGS) $(CXXFLAGS) $(AM_LDFLAGS) \
 	$(LDFLAGS) -o $@
 am_unittest_crush_OBJECTS = test/crush/unittest_crush-crush.$(OBJEXT)
 unittest_crush_OBJECTS = $(am_unittest_crush_OBJECTS)
-unittest_crush_DEPENDENCIES = $(LIBCOMMON) $(am__DEPENDENCIES_17) \
-	$(am__DEPENDENCIES_3) $(am__DEPENDENCIES_9)
+unittest_crush_DEPENDENCIES = $(am__DEPENDENCIES_4) \
+	$(am__DEPENDENCIES_19) $(am__DEPENDENCIES_3) \
+	$(am__DEPENDENCIES_10)
 unittest_crush_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 	$(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
 	$(unittest_crush_CXXFLAGS) $(CXXFLAGS) $(AM_LDFLAGS) \
@@ -5084,16 +5277,16 @@ unittest_crush_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 am_unittest_crush_wrapper_OBJECTS =  \
 	test/crush/unittest_crush_wrapper-CrushWrapper.$(OBJEXT)
 unittest_crush_wrapper_OBJECTS = $(am_unittest_crush_wrapper_OBJECTS)
-unittest_crush_wrapper_DEPENDENCIES = $(am__DEPENDENCIES_17) \
-	$(am__DEPENDENCIES_9) $(LIBCRUSH)
+unittest_crush_wrapper_DEPENDENCIES = $(am__DEPENDENCIES_19) \
+	$(am__DEPENDENCIES_10) $(LIBCRUSH)
 unittest_crush_wrapper_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 	$(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
 	$(unittest_crush_wrapper_CXXFLAGS) $(CXXFLAGS) $(AM_LDFLAGS) \
 	$(LDFLAGS) -o $@
 am_unittest_crypto_OBJECTS = test/unittest_crypto-crypto.$(OBJEXT)
 unittest_crypto_OBJECTS = $(am_unittest_crypto_OBJECTS)
-unittest_crypto_DEPENDENCIES = $(am__DEPENDENCIES_17) \
-	$(am__DEPENDENCIES_9)
+unittest_crypto_DEPENDENCIES = $(am__DEPENDENCIES_19) \
+	$(am__DEPENDENCIES_10)
 unittest_crypto_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 	$(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
 	$(unittest_crypto_CXXFLAGS) $(CXXFLAGS) $(AM_LDFLAGS) \
@@ -5101,8 +5294,8 @@ unittest_crypto_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 am_unittest_crypto_init_OBJECTS =  \
 	test/unittest_crypto_init-crypto_init.$(OBJEXT)
 unittest_crypto_init_OBJECTS = $(am_unittest_crypto_init_OBJECTS)
-unittest_crypto_init_DEPENDENCIES = $(am__DEPENDENCIES_17) \
-	$(am__DEPENDENCIES_9)
+unittest_crypto_init_DEPENDENCIES = $(am__DEPENDENCIES_19) \
+	$(am__DEPENDENCIES_10)
 unittest_crypto_init_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 	$(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
 	$(unittest_crypto_init_CXXFLAGS) $(CXXFLAGS) $(AM_LDFLAGS) \
@@ -5110,8 +5303,8 @@ unittest_crypto_init_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 am_unittest_daemon_config_OBJECTS =  \
 	test/unittest_daemon_config-daemon_config.$(OBJEXT)
 unittest_daemon_config_OBJECTS = $(am_unittest_daemon_config_OBJECTS)
-unittest_daemon_config_DEPENDENCIES = $(am__DEPENDENCIES_17) \
-	$(am__DEPENDENCIES_9)
+unittest_daemon_config_DEPENDENCIES = $(am__DEPENDENCIES_19) \
+	$(am__DEPENDENCIES_10)
 unittest_daemon_config_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 	$(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
 	$(unittest_daemon_config_CXXFLAGS) $(CXXFLAGS) $(AM_LDFLAGS) \
@@ -5120,9 +5313,9 @@ am__unittest_ecbackend_SOURCES_DIST = test/osd/TestECBackend.cc
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at am_unittest_ecbackend_OBJECTS = test/osd/unittest_ecbackend-TestECBackend.$(OBJEXT)
 unittest_ecbackend_OBJECTS = $(am_unittest_ecbackend_OBJECTS)
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at unittest_ecbackend_DEPENDENCIES =  \
- at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_16) \
- at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_17) \
- at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_9)
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_18) \
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_19) \
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_10)
 unittest_ecbackend_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 	$(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
 	$(unittest_ecbackend_CXXFLAGS) $(CXXFLAGS) $(AM_LDFLAGS) \
@@ -5132,8 +5325,8 @@ am__unittest_encoding_SOURCES_DIST = test/encoding.cc
 unittest_encoding_OBJECTS = $(am_unittest_encoding_OBJECTS)
 @ENABLE_CLIENT_TRUE@@WITH_CEPHFS_TRUE@@WITH_RADOS_TRUE at unittest_encoding_DEPENDENCIES = $(LIBCEPHFS) \
 @ENABLE_CLIENT_TRUE@@WITH_CEPHFS_TRUE@@WITH_RADOS_TRUE@	$(LIBRADOS) \
- at ENABLE_CLIENT_TRUE@@WITH_CEPHFS_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_9) \
- at ENABLE_CLIENT_TRUE@@WITH_CEPHFS_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_17)
+ at ENABLE_CLIENT_TRUE@@WITH_CEPHFS_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_10) \
+ at ENABLE_CLIENT_TRUE@@WITH_CEPHFS_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_19)
 unittest_encoding_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 	$(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
 	$(unittest_encoding_CXXFLAGS) $(CXXFLAGS) $(AM_LDFLAGS) \
@@ -5144,10 +5337,10 @@ am__unittest_erasure_code_SOURCES_DIST = erasure-code/ErasureCode.cc \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	test/erasure-code/unittest_erasure_code-TestErasureCode.$(OBJEXT)
 unittest_erasure_code_OBJECTS = $(am_unittest_erasure_code_OBJECTS)
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at unittest_erasure_code_DEPENDENCIES =  \
- at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_16) \
- at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(LIBCOMMON) \
- at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_17) \
- at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_9)
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_18) \
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_4) \
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_19) \
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_10)
 unittest_erasure_code_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 	$(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
 	$(unittest_erasure_code_CXXFLAGS) $(CXXFLAGS) $(AM_LDFLAGS) \
@@ -5160,10 +5353,10 @@ am__unittest_erasure_code_example_SOURCES_DIST =  \
 unittest_erasure_code_example_OBJECTS =  \
 	$(am_unittest_erasure_code_example_OBJECTS)
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at unittest_erasure_code_example_DEPENDENCIES =  \
- at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_16) \
- at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(LIBCOMMON) \
- at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_17) \
- at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_9)
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_18) \
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_4) \
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_19) \
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_10)
 unittest_erasure_code_example_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 	$(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
 	$(unittest_erasure_code_example_CXXFLAGS) $(CXXFLAGS) \
@@ -5175,11 +5368,11 @@ am__unittest_erasure_code_isa_SOURCES_DIST =  \
 @ENABLE_SERVER_TRUE@@WITH_BETTER_YASM_ELF64_TRUE@@WITH_OSD_TRUE@	test/erasure-code/unittest_erasure_code_isa-TestErasureCodeIsa.$(OBJEXT)
 unittest_erasure_code_isa_OBJECTS =  \
 	$(am_unittest_erasure_code_isa_OBJECTS)
- at ENABLE_SERVER_TRUE@@WITH_BETTER_YASM_ELF64_TRUE@@WITH_OSD_TRUE at unittest_erasure_code_isa_DEPENDENCIES = $(am__DEPENDENCIES_16) \
- at ENABLE_SERVER_TRUE@@WITH_BETTER_YASM_ELF64_TRUE@@WITH_OSD_TRUE@	$(LIBCOMMON) \
- at ENABLE_SERVER_TRUE@@WITH_BETTER_YASM_ELF64_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_17) \
- at ENABLE_SERVER_TRUE@@WITH_BETTER_YASM_ELF64_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_9) \
- at ENABLE_SERVER_TRUE@@WITH_BETTER_YASM_ELF64_TRUE@@WITH_OSD_TRUE@	.libs/libec_isa.la \
+ at ENABLE_SERVER_TRUE@@WITH_BETTER_YASM_ELF64_TRUE@@WITH_OSD_TRUE at unittest_erasure_code_isa_DEPENDENCIES = $(am__DEPENDENCIES_18) \
+ at ENABLE_SERVER_TRUE@@WITH_BETTER_YASM_ELF64_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_4) \
+ at ENABLE_SERVER_TRUE@@WITH_BETTER_YASM_ELF64_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_19) \
+ at ENABLE_SERVER_TRUE@@WITH_BETTER_YASM_ELF64_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_10) \
+ at ENABLE_SERVER_TRUE@@WITH_BETTER_YASM_ELF64_TRUE@@WITH_OSD_TRUE@	libisa.la \
 @ENABLE_SERVER_TRUE@@WITH_BETTER_YASM_ELF64_TRUE@@WITH_OSD_TRUE@	$(LIBERASURE_CODE) \
 @ENABLE_SERVER_TRUE@@WITH_BETTER_YASM_ELF64_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_1)
 unittest_erasure_code_isa_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
@@ -5207,7 +5400,7 @@ am__unittest_erasure_code_jerasure_SOURCES_DIST =  \
 	erasure-code/jerasure/gf-complete/src/gf_w8.c \
 	erasure-code/jerasure/ErasureCodePluginJerasure.cc \
 	erasure-code/jerasure/ErasureCodeJerasure.cc
-am__objects_49 = erasure-code/unittest_erasure_code_jerasure-ErasureCode.$(OBJEXT) \
+am__objects_54 = erasure-code/unittest_erasure_code_jerasure-ErasureCode.$(OBJEXT) \
 	erasure-code/jerasure/jerasure/src/unittest_erasure_code_jerasure-cauchy.$(OBJEXT) \
 	erasure-code/jerasure/jerasure/src/unittest_erasure_code_jerasure-galois.$(OBJEXT) \
 	erasure-code/jerasure/jerasure/src/unittest_erasure_code_jerasure-jerasure.$(OBJEXT) \
@@ -5227,14 +5420,14 @@ am__objects_49 = erasure-code/unittest_erasure_code_jerasure-ErasureCode.$(OBJEX
 	erasure-code/jerasure/unittest_erasure_code_jerasure-ErasureCodePluginJerasure.$(OBJEXT) \
 	erasure-code/jerasure/unittest_erasure_code_jerasure-ErasureCodeJerasure.$(OBJEXT)
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at am_unittest_erasure_code_jerasure_OBJECTS = test/erasure-code/unittest_erasure_code_jerasure-TestErasureCodeJerasure.$(OBJEXT) \
- at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__objects_49)
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__objects_54)
 unittest_erasure_code_jerasure_OBJECTS =  \
 	$(am_unittest_erasure_code_jerasure_OBJECTS)
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at unittest_erasure_code_jerasure_DEPENDENCIES =  \
- at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_16) \
- at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(LIBCOMMON) \
- at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_17) \
- at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_9) \
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_18) \
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_4) \
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_19) \
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_10) \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_1)
 unittest_erasure_code_jerasure_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 	$(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
@@ -5245,19 +5438,19 @@ am__unittest_erasure_code_lrc_SOURCES_DIST =  \
 	erasure-code/ErasureCode.cc \
 	erasure-code/lrc/ErasureCodePluginLrc.cc \
 	erasure-code/lrc/ErasureCodeLrc.cc
-am__objects_50 =  \
+am__objects_55 =  \
 	erasure-code/unittest_erasure_code_lrc-ErasureCode.$(OBJEXT) \
 	erasure-code/lrc/unittest_erasure_code_lrc-ErasureCodePluginLrc.$(OBJEXT) \
 	erasure-code/lrc/unittest_erasure_code_lrc-ErasureCodeLrc.$(OBJEXT)
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at am_unittest_erasure_code_lrc_OBJECTS = test/erasure-code/unittest_erasure_code_lrc-TestErasureCodeLrc.$(OBJEXT) \
- at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__objects_50)
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__objects_55)
 unittest_erasure_code_lrc_OBJECTS =  \
 	$(am_unittest_erasure_code_lrc_OBJECTS)
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at unittest_erasure_code_lrc_DEPENDENCIES =  \
- at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_16) \
- at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(LIBCOMMON) \
- at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_17) \
- at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_9) \
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_18) \
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_4) \
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_19) \
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_10) \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_1)
 unittest_erasure_code_lrc_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 	$(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
@@ -5271,10 +5464,10 @@ am__unittest_erasure_code_plugin_SOURCES_DIST =  \
 unittest_erasure_code_plugin_OBJECTS =  \
 	$(am_unittest_erasure_code_plugin_OBJECTS)
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at unittest_erasure_code_plugin_DEPENDENCIES =  \
- at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_16) \
- at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(LIBCOMMON) \
- at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_17) \
- at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_9) \
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_18) \
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_4) \
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_19) \
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_10) \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_1)
 unittest_erasure_code_plugin_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 	$(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
@@ -5287,11 +5480,10 @@ am__unittest_erasure_code_plugin_isa_SOURCES_DIST =  \
 @ENABLE_SERVER_TRUE@@WITH_BETTER_YASM_ELF64_TRUE@@WITH_OSD_TRUE@	test/erasure-code/unittest_erasure_code_plugin_isa-TestErasureCodePluginIsa.$(OBJEXT)
 unittest_erasure_code_plugin_isa_OBJECTS =  \
 	$(am_unittest_erasure_code_plugin_isa_OBJECTS)
- at ENABLE_SERVER_TRUE@@WITH_BETTER_YASM_ELF64_TRUE@@WITH_OSD_TRUE at unittest_erasure_code_plugin_isa_DEPENDENCIES = $(am__DEPENDENCIES_16) \
- at ENABLE_SERVER_TRUE@@WITH_BETTER_YASM_ELF64_TRUE@@WITH_OSD_TRUE@	$(LIBCOMMON) \
- at ENABLE_SERVER_TRUE@@WITH_BETTER_YASM_ELF64_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_17) \
- at ENABLE_SERVER_TRUE@@WITH_BETTER_YASM_ELF64_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_9) \
- at ENABLE_SERVER_TRUE@@WITH_BETTER_YASM_ELF64_TRUE@@WITH_OSD_TRUE@	.libs/libec_isa.la \
+ at ENABLE_SERVER_TRUE@@WITH_BETTER_YASM_ELF64_TRUE@@WITH_OSD_TRUE at unittest_erasure_code_plugin_isa_DEPENDENCIES = $(am__DEPENDENCIES_18) \
+ at ENABLE_SERVER_TRUE@@WITH_BETTER_YASM_ELF64_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_4) \
+ at ENABLE_SERVER_TRUE@@WITH_BETTER_YASM_ELF64_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_19) \
+ at ENABLE_SERVER_TRUE@@WITH_BETTER_YASM_ELF64_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_10) \
 @ENABLE_SERVER_TRUE@@WITH_BETTER_YASM_ELF64_TRUE@@WITH_OSD_TRUE@	$(LIBERASURE_CODE) \
 @ENABLE_SERVER_TRUE@@WITH_BETTER_YASM_ELF64_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_1)
 unittest_erasure_code_plugin_isa_LINK = $(LIBTOOL) $(AM_V_lt) \
@@ -5304,10 +5496,10 @@ am__unittest_erasure_code_plugin_jerasure_SOURCES_DIST =  \
 unittest_erasure_code_plugin_jerasure_OBJECTS =  \
 	$(am_unittest_erasure_code_plugin_jerasure_OBJECTS)
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at unittest_erasure_code_plugin_jerasure_DEPENDENCIES =  \
- at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_16) \
- at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(LIBCOMMON) \
- at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_17) \
- at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_9) \
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_18) \
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_4) \
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_19) \
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_10) \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_1)
 unittest_erasure_code_plugin_jerasure_LINK = $(LIBTOOL) $(AM_V_lt) \
 	--tag=CXX $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link \
@@ -5319,10 +5511,10 @@ am__unittest_erasure_code_plugin_lrc_SOURCES_DIST =  \
 unittest_erasure_code_plugin_lrc_OBJECTS =  \
 	$(am_unittest_erasure_code_plugin_lrc_OBJECTS)
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at unittest_erasure_code_plugin_lrc_DEPENDENCIES =  \
- at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_16) \
- at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(LIBCOMMON) \
- at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_17) \
- at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_9) \
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_18) \
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_4) \
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_19) \
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_10) \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_1)
 unittest_erasure_code_plugin_lrc_LINK = $(LIBTOOL) $(AM_V_lt) \
 	--tag=CXX $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link \
@@ -5334,10 +5526,10 @@ am__unittest_erasure_code_plugin_shec_SOURCES_DIST =  \
 unittest_erasure_code_plugin_shec_OBJECTS =  \
 	$(am_unittest_erasure_code_plugin_shec_OBJECTS)
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at unittest_erasure_code_plugin_shec_DEPENDENCIES =  \
- at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_16) \
- at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(LIBCOMMON) \
- at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_17) \
- at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_9) \
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_18) \
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_4) \
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_19) \
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_10) \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_1)
 unittest_erasure_code_plugin_shec_LINK = $(LIBTOOL) $(AM_V_lt) \
 	--tag=CXX $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link \
@@ -5366,7 +5558,7 @@ am__unittest_erasure_code_shec_SOURCES_DIST =  \
 	erasure-code/jerasure/gf-complete/src/gf_w4.c \
 	erasure-code/jerasure/gf-complete/src/gf_rand.c \
 	erasure-code/jerasure/gf-complete/src/gf_w8.c
-am__objects_51 =  \
+am__objects_56 =  \
 	erasure-code/unittest_erasure_code_shec-ErasureCode.$(OBJEXT) \
 	erasure-code/shec/unittest_erasure_code_shec-ErasureCodePluginShec.$(OBJEXT) \
 	erasure-code/shec/unittest_erasure_code_shec-ErasureCodeShec.$(OBJEXT) \
@@ -5389,14 +5581,14 @@ am__objects_51 =  \
 	erasure-code/jerasure/gf-complete/src/unittest_erasure_code_shec-gf_rand.$(OBJEXT) \
 	erasure-code/jerasure/gf-complete/src/unittest_erasure_code_shec-gf_w8.$(OBJEXT)
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at am_unittest_erasure_code_shec_OBJECTS = test/erasure-code/unittest_erasure_code_shec-TestErasureCodeShec.$(OBJEXT) \
- at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__objects_51)
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__objects_56)
 unittest_erasure_code_shec_OBJECTS =  \
 	$(am_unittest_erasure_code_shec_OBJECTS)
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at unittest_erasure_code_shec_DEPENDENCIES =  \
- at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_16) \
- at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(LIBCOMMON) \
- at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_17) \
- at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_9) \
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_18) \
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_4) \
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_19) \
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_10) \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_1)
 unittest_erasure_code_shec_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 	$(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
@@ -5425,7 +5617,7 @@ am__unittest_erasure_code_shec_all_SOURCES_DIST =  \
 	erasure-code/jerasure/gf-complete/src/gf_w4.c \
 	erasure-code/jerasure/gf-complete/src/gf_rand.c \
 	erasure-code/jerasure/gf-complete/src/gf_w8.c
-am__objects_52 = erasure-code/unittest_erasure_code_shec_all-ErasureCode.$(OBJEXT) \
+am__objects_57 = erasure-code/unittest_erasure_code_shec_all-ErasureCode.$(OBJEXT) \
 	erasure-code/shec/unittest_erasure_code_shec_all-ErasureCodePluginShec.$(OBJEXT) \
 	erasure-code/shec/unittest_erasure_code_shec_all-ErasureCodeShec.$(OBJEXT) \
 	erasure-code/shec/unittest_erasure_code_shec_all-ErasureCodeShecTableCache.$(OBJEXT) \
@@ -5447,14 +5639,14 @@ am__objects_52 = erasure-code/unittest_erasure_code_shec_all-ErasureCode.$(OBJEX
 	erasure-code/jerasure/gf-complete/src/unittest_erasure_code_shec_all-gf_rand.$(OBJEXT) \
 	erasure-code/jerasure/gf-complete/src/unittest_erasure_code_shec_all-gf_w8.$(OBJEXT)
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at am_unittest_erasure_code_shec_all_OBJECTS = test/erasure-code/unittest_erasure_code_shec_all-TestErasureCodeShec_all.$(OBJEXT) \
- at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__objects_52)
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__objects_57)
 unittest_erasure_code_shec_all_OBJECTS =  \
 	$(am_unittest_erasure_code_shec_all_OBJECTS)
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at unittest_erasure_code_shec_all_DEPENDENCIES =  \
- at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_16) \
- at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(LIBCOMMON) \
- at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_17) \
- at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_9) \
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_18) \
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_4) \
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_19) \
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_10) \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_1)
 unittest_erasure_code_shec_all_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 	$(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
@@ -5483,7 +5675,7 @@ am__unittest_erasure_code_shec_arguments_SOURCES_DIST =  \
 	erasure-code/jerasure/gf-complete/src/gf_w4.c \
 	erasure-code/jerasure/gf-complete/src/gf_rand.c \
 	erasure-code/jerasure/gf-complete/src/gf_w8.c
-am__objects_53 = erasure-code/unittest_erasure_code_shec_arguments-ErasureCode.$(OBJEXT) \
+am__objects_58 = erasure-code/unittest_erasure_code_shec_arguments-ErasureCode.$(OBJEXT) \
 	erasure-code/shec/unittest_erasure_code_shec_arguments-ErasureCodePluginShec.$(OBJEXT) \
 	erasure-code/shec/unittest_erasure_code_shec_arguments-ErasureCodeShec.$(OBJEXT) \
 	erasure-code/shec/unittest_erasure_code_shec_arguments-ErasureCodeShecTableCache.$(OBJEXT) \
@@ -5505,14 +5697,14 @@ am__objects_53 = erasure-code/unittest_erasure_code_shec_arguments-ErasureCode.$
 	erasure-code/jerasure/gf-complete/src/unittest_erasure_code_shec_arguments-gf_rand.$(OBJEXT) \
 	erasure-code/jerasure/gf-complete/src/unittest_erasure_code_shec_arguments-gf_w8.$(OBJEXT)
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at am_unittest_erasure_code_shec_arguments_OBJECTS = test/erasure-code/unittest_erasure_code_shec_arguments-TestErasureCodeShec_arguments.$(OBJEXT) \
- at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__objects_53)
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__objects_58)
 unittest_erasure_code_shec_arguments_OBJECTS =  \
 	$(am_unittest_erasure_code_shec_arguments_OBJECTS)
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at unittest_erasure_code_shec_arguments_DEPENDENCIES =  \
- at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_16) \
- at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(LIBCOMMON) \
- at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_17) \
- at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_9) \
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_18) \
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_4) \
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_19) \
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_10) \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_1)
 unittest_erasure_code_shec_arguments_LINK = $(LIBTOOL) $(AM_V_lt) \
 	--tag=CXX $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link \
@@ -5541,7 +5733,7 @@ am__unittest_erasure_code_shec_thread_SOURCES_DIST =  \
 	erasure-code/jerasure/gf-complete/src/gf_w4.c \
 	erasure-code/jerasure/gf-complete/src/gf_rand.c \
 	erasure-code/jerasure/gf-complete/src/gf_w8.c
-am__objects_54 = erasure-code/unittest_erasure_code_shec_thread-ErasureCode.$(OBJEXT) \
+am__objects_59 = erasure-code/unittest_erasure_code_shec_thread-ErasureCode.$(OBJEXT) \
 	erasure-code/shec/unittest_erasure_code_shec_thread-ErasureCodePluginShec.$(OBJEXT) \
 	erasure-code/shec/unittest_erasure_code_shec_thread-ErasureCodeShec.$(OBJEXT) \
 	erasure-code/shec/unittest_erasure_code_shec_thread-ErasureCodeShecTableCache.$(OBJEXT) \
@@ -5563,14 +5755,14 @@ am__objects_54 = erasure-code/unittest_erasure_code_shec_thread-ErasureCode.$(OB
 	erasure-code/jerasure/gf-complete/src/unittest_erasure_code_shec_thread-gf_rand.$(OBJEXT) \
 	erasure-code/jerasure/gf-complete/src/unittest_erasure_code_shec_thread-gf_w8.$(OBJEXT)
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at am_unittest_erasure_code_shec_thread_OBJECTS = test/erasure-code/unittest_erasure_code_shec_thread-TestErasureCodeShec_thread.$(OBJEXT) \
- at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__objects_54)
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__objects_59)
 unittest_erasure_code_shec_thread_OBJECTS =  \
 	$(am_unittest_erasure_code_shec_thread_OBJECTS)
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at unittest_erasure_code_shec_thread_DEPENDENCIES =  \
- at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_16) \
- at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(LIBCOMMON) \
- at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_17) \
- at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_9) \
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_18) \
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_4) \
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_19) \
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_10) \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_1)
 unittest_erasure_code_shec_thread_LINK = $(LIBTOOL) $(AM_V_lt) \
 	--tag=CXX $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link \
@@ -5578,8 +5770,8 @@ unittest_erasure_code_shec_thread_LINK = $(LIBTOOL) $(AM_V_lt) \
 	$(CXXFLAGS) $(AM_LDFLAGS) $(LDFLAGS) -o $@
 am_unittest_escape_OBJECTS = test/unittest_escape-escape.$(OBJEXT)
 unittest_escape_OBJECTS = $(am_unittest_escape_OBJECTS)
-unittest_escape_DEPENDENCIES = $(am__DEPENDENCIES_17) \
-	$(am__DEPENDENCIES_9)
+unittest_escape_DEPENDENCIES = $(am__DEPENDENCIES_19) \
+	$(am__DEPENDENCIES_10)
 unittest_escape_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 	$(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
 	$(unittest_escape_CXXFLAGS) $(CXXFLAGS) $(AM_LDFLAGS) \
@@ -5588,16 +5780,16 @@ am_unittest_formatter_OBJECTS =  \
 	test/unittest_formatter-formatter.$(OBJEXT) \
 	rgw/unittest_formatter-rgw_formats.$(OBJEXT)
 unittest_formatter_OBJECTS = $(am_unittest_formatter_OBJECTS)
-unittest_formatter_DEPENDENCIES = $(am__DEPENDENCIES_17) \
-	$(am__DEPENDENCIES_9)
+unittest_formatter_DEPENDENCIES = $(am__DEPENDENCIES_19) \
+	$(am__DEPENDENCIES_10)
 unittest_formatter_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 	$(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
 	$(unittest_formatter_CXXFLAGS) $(CXXFLAGS) $(AM_LDFLAGS) \
 	$(LDFLAGS) -o $@
 am_unittest_gather_OBJECTS = test/unittest_gather-gather.$(OBJEXT)
 unittest_gather_OBJECTS = $(am_unittest_gather_OBJECTS)
-unittest_gather_DEPENDENCIES = $(am__DEPENDENCIES_17) \
-	$(am__DEPENDENCIES_9)
+unittest_gather_DEPENDENCIES = $(am__DEPENDENCIES_19) \
+	$(am__DEPENDENCIES_10)
 unittest_gather_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 	$(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
 	$(unittest_gather_CXXFLAGS) $(CXXFLAGS) $(AM_LDFLAGS) \
@@ -5605,8 +5797,8 @@ unittest_gather_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 am_unittest_heartbeatmap_OBJECTS =  \
 	test/unittest_heartbeatmap-heartbeat_map.$(OBJEXT)
 unittest_heartbeatmap_OBJECTS = $(am_unittest_heartbeatmap_OBJECTS)
-unittest_heartbeatmap_DEPENDENCIES = $(LIBCOMMON) \
-	$(am__DEPENDENCIES_17) $(am__DEPENDENCIES_9)
+unittest_heartbeatmap_DEPENDENCIES = $(am__DEPENDENCIES_4) \
+	$(am__DEPENDENCIES_19) $(am__DEPENDENCIES_10)
 unittest_heartbeatmap_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 	$(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
 	$(unittest_heartbeatmap_CXXFLAGS) $(CXXFLAGS) $(AM_LDFLAGS) \
@@ -5614,8 +5806,8 @@ unittest_heartbeatmap_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 am_unittest_histogram_OBJECTS =  \
 	test/common/unittest_histogram-histogram.$(OBJEXT)
 unittest_histogram_OBJECTS = $(am_unittest_histogram_OBJECTS)
-unittest_histogram_DEPENDENCIES = $(am__DEPENDENCIES_17) \
-	$(am__DEPENDENCIES_9)
+unittest_histogram_DEPENDENCIES = $(am__DEPENDENCIES_19) \
+	$(am__DEPENDENCIES_10)
 unittest_histogram_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 	$(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
 	$(unittest_histogram_CXXFLAGS) $(CXXFLAGS) $(AM_LDFLAGS) \
@@ -5624,18 +5816,27 @@ am__unittest_hitset_SOURCES_DIST = test/osd/hitset.cc
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at am_unittest_hitset_OBJECTS = test/osd/unittest_hitset-hitset.$(OBJEXT)
 unittest_hitset_OBJECTS = $(am_unittest_hitset_OBJECTS)
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at unittest_hitset_DEPENDENCIES =  \
- at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_16) \
- at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_17) \
- at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_9)
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_18) \
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_19) \
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_10)
 unittest_hitset_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 	$(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
 	$(unittest_hitset_CXXFLAGS) $(CXXFLAGS) $(AM_LDFLAGS) \
 	$(LDFLAGS) -o $@
+am_unittest_interval_set_OBJECTS =  \
+	test/common/unittest_interval_set-test_interval_set.$(OBJEXT)
+unittest_interval_set_OBJECTS = $(am_unittest_interval_set_OBJECTS)
+unittest_interval_set_DEPENDENCIES = $(am__DEPENDENCIES_19) \
+	$(am__DEPENDENCIES_10)
+unittest_interval_set_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
+	$(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
+	$(unittest_interval_set_CXXFLAGS) $(CXXFLAGS) $(AM_LDFLAGS) \
+	$(LDFLAGS) -o $@
 am_unittest_io_priority_OBJECTS =  \
 	test/common/unittest_io_priority-test_io_priority.$(OBJEXT)
 unittest_io_priority_OBJECTS = $(am_unittest_io_priority_OBJECTS)
-unittest_io_priority_DEPENDENCIES = $(am__DEPENDENCIES_17) \
-	$(am__DEPENDENCIES_9)
+unittest_io_priority_DEPENDENCIES = $(am__DEPENDENCIES_19) \
+	$(am__DEPENDENCIES_10)
 unittest_io_priority_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 	$(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
 	$(unittest_io_priority_CXXFLAGS) $(CXXFLAGS) $(AM_LDFLAGS) \
@@ -5643,8 +5844,8 @@ unittest_io_priority_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 am_unittest_ipaddr_OBJECTS =  \
 	test/unittest_ipaddr-test_ipaddr.$(OBJEXT)
 unittest_ipaddr_OBJECTS = $(am_unittest_ipaddr_OBJECTS)
-unittest_ipaddr_DEPENDENCIES = $(am__DEPENDENCIES_17) \
-	$(am__DEPENDENCIES_9)
+unittest_ipaddr_DEPENDENCIES = $(am__DEPENDENCIES_19) \
+	$(am__DEPENDENCIES_10)
 unittest_ipaddr_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 	$(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
 	$(unittest_ipaddr_CXXFLAGS) $(CXXFLAGS) $(AM_LDFLAGS) \
@@ -5676,9 +5877,9 @@ unittest_journal_OBJECTS = $(am_unittest_journal_OBJECTS)
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	libcls_journal_client.la \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	librados_test_stub.la \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	librados_internal.la \
- at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_17) \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_19) \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_10) \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_9) \
- at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_8) \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(LIBRADOS)
 unittest_journal_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 	$(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
@@ -5688,9 +5889,9 @@ am__unittest_lfnindex_SOURCES_DIST = test/os/TestLFNIndex.cc
 @ENABLE_SERVER_TRUE at am_unittest_lfnindex_OBJECTS = test/os/unittest_lfnindex-TestLFNIndex.$(OBJEXT)
 unittest_lfnindex_OBJECTS = $(am_unittest_lfnindex_OBJECTS)
 @ENABLE_SERVER_TRUE at unittest_lfnindex_DEPENDENCIES =  \
- at ENABLE_SERVER_TRUE@	$(am__DEPENDENCIES_12) \
- at ENABLE_SERVER_TRUE@	$(am__DEPENDENCIES_17) \
- at ENABLE_SERVER_TRUE@	$(am__DEPENDENCIES_9)
+ at ENABLE_SERVER_TRUE@	$(am__DEPENDENCIES_14) \
+ at ENABLE_SERVER_TRUE@	$(am__DEPENDENCIES_19) \
+ at ENABLE_SERVER_TRUE@	$(am__DEPENDENCIES_10)
 unittest_lfnindex_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 	$(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
 	$(unittest_lfnindex_CXXFLAGS) $(CXXFLAGS) $(AM_LDFLAGS) \
@@ -5700,8 +5901,8 @@ am__unittest_libcephfs_config_SOURCES_DIST = test/libcephfs_config.cc
 unittest_libcephfs_config_OBJECTS =  \
 	$(am_unittest_libcephfs_config_OBJECTS)
 @ENABLE_CLIENT_TRUE@@WITH_CEPHFS_TRUE@@WITH_RADOS_TRUE at unittest_libcephfs_config_DEPENDENCIES = $(LIBCEPHFS) \
- at ENABLE_CLIENT_TRUE@@WITH_CEPHFS_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_9) \
- at ENABLE_CLIENT_TRUE@@WITH_CEPHFS_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_17)
+ at ENABLE_CLIENT_TRUE@@WITH_CEPHFS_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_10) \
+ at ENABLE_CLIENT_TRUE@@WITH_CEPHFS_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_19)
 unittest_libcephfs_config_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 	$(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
 	$(unittest_libcephfs_config_CXXFLAGS) $(CXXFLAGS) \
@@ -5711,8 +5912,8 @@ am__unittest_librados_SOURCES_DIST = test/librados/librados.cc
 unittest_librados_OBJECTS = $(am_unittest_librados_OBJECTS)
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE at unittest_librados_DEPENDENCIES =  \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(LIBRADOS) \
- at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_9) \
- at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_17)
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_10) \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_19)
 unittest_librados_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 	$(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
 	$(unittest_librados_CXXFLAGS) $(CXXFLAGS) $(AM_LDFLAGS) \
@@ -5724,8 +5925,8 @@ unittest_librados_config_OBJECTS =  \
 	$(am_unittest_librados_config_OBJECTS)
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE at unittest_librados_config_DEPENDENCIES =  \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(LIBRADOS) \
- at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_9) \
- at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_17)
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_10) \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_19)
 unittest_librados_config_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 	$(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
 	$(unittest_librados_config_CXXFLAGS) $(CXXFLAGS) $(AM_LDFLAGS) \
@@ -5736,6 +5937,7 @@ am__unittest_librbd_SOURCES_DIST = test/librbd/test_main.cc \
 	test/librbd/test_mock_Journal.cc \
 	test/librbd/exclusive_lock/test_mock_AcquireRequest.cc \
 	test/librbd/exclusive_lock/test_mock_ReleaseRequest.cc \
+	test/librbd/image/test_mock_RefreshRequest.cc \
 	test/librbd/journal/test_mock_Replay.cc \
 	test/librbd/object_map/test_mock_InvalidateRequest.cc \
 	test/librbd/object_map/test_mock_LockRequest.cc \
@@ -5758,6 +5960,7 @@ am__unittest_librbd_SOURCES_DIST = test/librbd/test_main.cc \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	test/librbd/unittest_librbd-test_mock_Journal.$(OBJEXT) \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	test/librbd/exclusive_lock/unittest_librbd-test_mock_AcquireRequest.$(OBJEXT) \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	test/librbd/exclusive_lock/unittest_librbd-test_mock_ReleaseRequest.$(OBJEXT) \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	test/librbd/image/unittest_librbd-test_mock_RefreshRequest.$(OBJEXT) \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	test/librbd/journal/unittest_librbd-test_mock_Replay.$(OBJEXT) \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	test/librbd/object_map/unittest_librbd-test_mock_InvalidateRequest.$(OBJEXT) \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	test/librbd/object_map/unittest_librbd-test_mock_LockRequest.$(OBJEXT) \
@@ -5787,23 +5990,24 @@ unittest_librbd_OBJECTS = $(am_unittest_librbd_OBJECTS)
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	librados_internal.la \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	$(LIBRADOS) \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	$(LIBOSDC) \
- at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	$(am__DEPENDENCIES_17) \
- at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	$(am__DEPENDENCIES_9) \
- at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	$(am__DEPENDENCIES_8)
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	$(am__DEPENDENCIES_19) \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	$(am__DEPENDENCIES_10) \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	$(am__DEPENDENCIES_9)
 unittest_librbd_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 	$(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
 	$(unittest_librbd_CXXFLAGS) $(CXXFLAGS) $(AM_LDFLAGS) \
 	$(LDFLAGS) -o $@
 am_unittest_log_OBJECTS = log/unittest_log-test.$(OBJEXT)
 unittest_log_OBJECTS = $(am_unittest_log_OBJECTS)
-unittest_log_DEPENDENCIES = $(LIBCOMMON) $(am__DEPENDENCIES_17)
+unittest_log_DEPENDENCIES = $(am__DEPENDENCIES_4) \
+	$(am__DEPENDENCIES_19)
 unittest_log_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX $(AM_LIBTOOLFLAGS) \
 	$(LIBTOOLFLAGS) --mode=link $(CXXLD) $(unittest_log_CXXFLAGS) \
 	$(CXXFLAGS) $(AM_LDFLAGS) $(LDFLAGS) -o $@
 am_unittest_lru_OBJECTS = test/common/unittest_lru-test_lru.$(OBJEXT)
 unittest_lru_OBJECTS = $(am_unittest_lru_OBJECTS)
-unittest_lru_DEPENDENCIES = $(am__DEPENDENCIES_17) \
-	$(am__DEPENDENCIES_9)
+unittest_lru_DEPENDENCIES = $(am__DEPENDENCIES_19) \
+	$(am__DEPENDENCIES_10)
 unittest_lru_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX $(AM_LIBTOOLFLAGS) \
 	$(LIBTOOLFLAGS) --mode=link $(CXXLD) $(unittest_lru_CXXFLAGS) \
 	$(CXXFLAGS) $(AM_LDFLAGS) $(LDFLAGS) -o $@
@@ -5811,9 +6015,9 @@ am__unittest_mds_authcap_SOURCES_DIST = test/mds/TestMDSAuthCaps.cc
 @ENABLE_SERVER_TRUE@@WITH_MDS_TRUE at am_unittest_mds_authcap_OBJECTS = test/mds/unittest_mds_authcap-TestMDSAuthCaps.$(OBJEXT)
 unittest_mds_authcap_OBJECTS = $(am_unittest_mds_authcap_OBJECTS)
 @ENABLE_SERVER_TRUE@@WITH_MDS_TRUE at unittest_mds_authcap_DEPENDENCIES =  \
- at ENABLE_SERVER_TRUE@@WITH_MDS_TRUE@	$(am__DEPENDENCIES_14) \
- at ENABLE_SERVER_TRUE@@WITH_MDS_TRUE@	$(am__DEPENDENCIES_17) \
- at ENABLE_SERVER_TRUE@@WITH_MDS_TRUE@	$(am__DEPENDENCIES_9)
+ at ENABLE_SERVER_TRUE@@WITH_MDS_TRUE@	$(am__DEPENDENCIES_16) \
+ at ENABLE_SERVER_TRUE@@WITH_MDS_TRUE@	$(am__DEPENDENCIES_19) \
+ at ENABLE_SERVER_TRUE@@WITH_MDS_TRUE@	$(am__DEPENDENCIES_10)
 unittest_mds_authcap_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 	$(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
 	$(unittest_mds_authcap_CXXFLAGS) $(CXXFLAGS) $(AM_LDFLAGS) \
@@ -5821,16 +6025,16 @@ unittest_mds_authcap_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 am_unittest_mds_types_OBJECTS =  \
 	test/fs/unittest_mds_types-mds_types.$(OBJEXT)
 unittest_mds_types_OBJECTS = $(am_unittest_mds_types_OBJECTS)
-unittest_mds_types_DEPENDENCIES = $(am__DEPENDENCIES_17) \
-	$(am__DEPENDENCIES_9)
+unittest_mds_types_DEPENDENCIES = $(am__DEPENDENCIES_19) \
+	$(am__DEPENDENCIES_10)
 unittest_mds_types_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 	$(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
 	$(unittest_mds_types_CXXFLAGS) $(CXXFLAGS) $(AM_LDFLAGS) \
 	$(LDFLAGS) -o $@
 am_unittest_mime_OBJECTS = test/unittest_mime-mime.$(OBJEXT)
 unittest_mime_OBJECTS = $(am_unittest_mime_OBJECTS)
-unittest_mime_DEPENDENCIES = $(am__DEPENDENCIES_17) \
-	$(am__DEPENDENCIES_9)
+unittest_mime_DEPENDENCIES = $(am__DEPENDENCIES_19) \
+	$(am__DEPENDENCIES_10)
 unittest_mime_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 	$(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
 	$(unittest_mime_CXXFLAGS) $(CXXFLAGS) $(AM_LDFLAGS) $(LDFLAGS) \
@@ -5839,9 +6043,9 @@ am__unittest_mon_moncap_SOURCES_DIST = test/mon/moncap.cc
 @ENABLE_SERVER_TRUE@@WITH_MON_TRUE at am_unittest_mon_moncap_OBJECTS = test/mon/unittest_mon_moncap-moncap.$(OBJEXT)
 unittest_mon_moncap_OBJECTS = $(am_unittest_mon_moncap_OBJECTS)
 @ENABLE_SERVER_TRUE@@WITH_MON_TRUE at unittest_mon_moncap_DEPENDENCIES =  \
- at ENABLE_SERVER_TRUE@@WITH_MON_TRUE@	$(am__DEPENDENCIES_15) \
 @ENABLE_SERVER_TRUE@@WITH_MON_TRUE@	$(am__DEPENDENCIES_17) \
- at ENABLE_SERVER_TRUE@@WITH_MON_TRUE@	$(am__DEPENDENCIES_9)
+ at ENABLE_SERVER_TRUE@@WITH_MON_TRUE@	$(am__DEPENDENCIES_19) \
+ at ENABLE_SERVER_TRUE@@WITH_MON_TRUE@	$(am__DEPENDENCIES_10)
 unittest_mon_moncap_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 	$(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
 	$(unittest_mon_moncap_CXXFLAGS) $(CXXFLAGS) $(AM_LDFLAGS) \
@@ -5850,13 +6054,22 @@ am__unittest_mon_pgmap_SOURCES_DIST = test/mon/PGMap.cc
 @ENABLE_SERVER_TRUE@@WITH_MON_TRUE at am_unittest_mon_pgmap_OBJECTS = test/mon/unittest_mon_pgmap-PGMap.$(OBJEXT)
 unittest_mon_pgmap_OBJECTS = $(am_unittest_mon_pgmap_OBJECTS)
 @ENABLE_SERVER_TRUE@@WITH_MON_TRUE at unittest_mon_pgmap_DEPENDENCIES =  \
- at ENABLE_SERVER_TRUE@@WITH_MON_TRUE@	$(am__DEPENDENCIES_15) \
 @ENABLE_SERVER_TRUE@@WITH_MON_TRUE@	$(am__DEPENDENCIES_17) \
- at ENABLE_SERVER_TRUE@@WITH_MON_TRUE@	$(am__DEPENDENCIES_9)
+ at ENABLE_SERVER_TRUE@@WITH_MON_TRUE@	$(am__DEPENDENCIES_19) \
+ at ENABLE_SERVER_TRUE@@WITH_MON_TRUE@	$(am__DEPENDENCIES_10)
 unittest_mon_pgmap_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 	$(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
 	$(unittest_mon_pgmap_CXXFLAGS) $(CXXFLAGS) $(AM_LDFLAGS) \
 	$(LDFLAGS) -o $@
+am_unittest_mutex_debug_OBJECTS =  \
+	test/common/unittest_mutex_debug-test_mutex_debug.$(OBJEXT)
+unittest_mutex_debug_OBJECTS = $(am_unittest_mutex_debug_OBJECTS)
+unittest_mutex_debug_DEPENDENCIES = $(am__DEPENDENCIES_19) \
+	$(am__DEPENDENCIES_10) $(am__DEPENDENCIES_3)
+unittest_mutex_debug_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
+	$(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
+	$(unittest_mutex_debug_CXXFLAGS) $(CXXFLAGS) $(AM_LDFLAGS) \
+	$(LDFLAGS) -o $@
 am_unittest_on_exit_OBJECTS = test/on_exit.$(OBJEXT)
 unittest_on_exit_OBJECTS = $(am_unittest_on_exit_OBJECTS)
 unittest_on_exit_DEPENDENCIES = $(am__DEPENDENCIES_1)
@@ -5864,9 +6077,9 @@ am__unittest_osd_osdcap_SOURCES_DIST = test/osd/osdcap.cc
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at am_unittest_osd_osdcap_OBJECTS = test/osd/unittest_osd_osdcap-osdcap.$(OBJEXT)
 unittest_osd_osdcap_OBJECTS = $(am_unittest_osd_osdcap_OBJECTS)
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at unittest_osd_osdcap_DEPENDENCIES =  \
- at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_16) \
- at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_17) \
- at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_9)
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_18) \
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_19) \
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_10)
 unittest_osd_osdcap_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 	$(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
 	$(unittest_osd_osdcap_CXXFLAGS) $(CXXFLAGS) $(AM_LDFLAGS) \
@@ -5874,8 +6087,8 @@ unittest_osd_osdcap_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 am_unittest_osd_types_OBJECTS =  \
 	test/osd/unittest_osd_types-types.$(OBJEXT)
 unittest_osd_types_OBJECTS = $(am_unittest_osd_types_OBJECTS)
-unittest_osd_types_DEPENDENCIES = $(am__DEPENDENCIES_17) \
-	$(am__DEPENDENCIES_9)
+unittest_osd_types_DEPENDENCIES = $(am__DEPENDENCIES_19) \
+	$(am__DEPENDENCIES_10)
 unittest_osd_types_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 	$(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
 	$(unittest_osd_types_CXXFLAGS) $(CXXFLAGS) $(AM_LDFLAGS) \
@@ -5883,8 +6096,8 @@ unittest_osd_types_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 am_unittest_osdmap_OBJECTS =  \
 	test/osd/unittest_osdmap-TestOSDMap.$(OBJEXT)
 unittest_osdmap_OBJECTS = $(am_unittest_osdmap_OBJECTS)
-unittest_osdmap_DEPENDENCIES = $(am__DEPENDENCIES_17) $(LIBCOMMON) \
-	$(am__DEPENDENCIES_9)
+unittest_osdmap_DEPENDENCIES = $(am__DEPENDENCIES_19) \
+	$(am__DEPENDENCIES_4) $(am__DEPENDENCIES_10)
 unittest_osdmap_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 	$(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
 	$(unittest_osdmap_CXXFLAGS) $(CXXFLAGS) $(AM_LDFLAGS) \
@@ -5893,9 +6106,9 @@ am__unittest_osdscrub_SOURCES_DIST = test/osd/TestOSDScrub.cc
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at am_unittest_osdscrub_OBJECTS = test/osd/unittest_osdscrub-TestOSDScrub.$(OBJEXT)
 unittest_osdscrub_OBJECTS = $(am_unittest_osdscrub_OBJECTS)
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at unittest_osdscrub_DEPENDENCIES =  \
- at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_16) \
- at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_17) \
- at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_9) \
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_18) \
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_19) \
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_10) \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_1)
 unittest_osdscrub_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 	$(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
@@ -5905,7 +6118,7 @@ am__unittest_pageset_SOURCES_DIST = test/test_pageset.cc
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at am_unittest_pageset_OBJECTS = test/unittest_pageset-test_pageset.$(OBJEXT)
 unittest_pageset_OBJECTS = $(am_unittest_pageset_OBJECTS)
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at unittest_pageset_DEPENDENCIES =  \
- at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_17)
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_19)
 unittest_pageset_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 	$(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
 	$(unittest_pageset_CXXFLAGS) $(CXXFLAGS) $(AM_LDFLAGS) \
@@ -5913,8 +6126,8 @@ unittest_pageset_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 am_unittest_perf_counters_OBJECTS =  \
 	test/unittest_perf_counters-perf_counters.$(OBJEXT)
 unittest_perf_counters_OBJECTS = $(am_unittest_perf_counters_OBJECTS)
-unittest_perf_counters_DEPENDENCIES = $(am__DEPENDENCIES_17) \
-	$(am__DEPENDENCIES_9)
+unittest_perf_counters_DEPENDENCIES = $(am__DEPENDENCIES_19) \
+	$(am__DEPENDENCIES_10)
 unittest_perf_counters_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 	$(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
 	$(unittest_perf_counters_CXXFLAGS) $(CXXFLAGS) $(AM_LDFLAGS) \
@@ -5923,9 +6136,9 @@ am__unittest_pglog_SOURCES_DIST = test/osd/TestPGLog.cc
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at am_unittest_pglog_OBJECTS = test/osd/unittest_pglog-TestPGLog.$(OBJEXT)
 unittest_pglog_OBJECTS = $(am_unittest_pglog_OBJECTS)
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at unittest_pglog_DEPENDENCIES =  \
- at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_16) \
- at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_17) \
- at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_9) \
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_18) \
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_19) \
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_10) \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_1)
 unittest_pglog_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 	$(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
@@ -5934,8 +6147,8 @@ unittest_pglog_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 am_unittest_prebufferedstreambuf_OBJECTS = test/unittest_prebufferedstreambuf-test_prebufferedstreambuf.$(OBJEXT)
 unittest_prebufferedstreambuf_OBJECTS =  \
 	$(am_unittest_prebufferedstreambuf_OBJECTS)
-unittest_prebufferedstreambuf_DEPENDENCIES = $(LIBCOMMON) \
-	$(am__DEPENDENCIES_17) $(am__DEPENDENCIES_3)
+unittest_prebufferedstreambuf_DEPENDENCIES = $(am__DEPENDENCIES_4) \
+	$(am__DEPENDENCIES_19) $(am__DEPENDENCIES_3)
 unittest_prebufferedstreambuf_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 	$(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
 	$(unittest_prebufferedstreambuf_CXXFLAGS) $(CXXFLAGS) \
@@ -5943,22 +6156,45 @@ unittest_prebufferedstreambuf_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 am_unittest_prioritized_queue_OBJECTS = test/common/unittest_prioritized_queue-test_prioritized_queue.$(OBJEXT)
 unittest_prioritized_queue_OBJECTS =  \
 	$(am_unittest_prioritized_queue_OBJECTS)
-unittest_prioritized_queue_DEPENDENCIES = $(am__DEPENDENCIES_17) \
-	$(am__DEPENDENCIES_9)
+unittest_prioritized_queue_DEPENDENCIES = $(am__DEPENDENCIES_19) \
+	$(am__DEPENDENCIES_10)
 unittest_prioritized_queue_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 	$(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
 	$(unittest_prioritized_queue_CXXFLAGS) $(CXXFLAGS) \
 	$(AM_LDFLAGS) $(LDFLAGS) -o $@
+am__unittest_rbd_mirror_SOURCES_DIST = test/rbd_mirror/test_main.cc
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE at am_unittest_rbd_mirror_OBJECTS = test/rbd_mirror/unittest_rbd_mirror-test_main.$(OBJEXT)
+unittest_rbd_mirror_OBJECTS = $(am_unittest_rbd_mirror_OBJECTS)
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE at unittest_rbd_mirror_DEPENDENCIES = librbd_mirror_test.la \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	librados_test_stub.la \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	librbd_mirror_internal.la \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	librbd_internal.la \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	librbd_api.la \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	libjournal.la \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	librados_internal.la \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	libcls_rbd_client.la \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	libcls_lock_client.la \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	libcls_journal_client.la \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	$(LIBRBD_TYPES) \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	$(LIBRADOS) \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	$(LIBOSDC) \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	$(am__DEPENDENCIES_19) \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	$(am__DEPENDENCIES_10) \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	$(am__DEPENDENCIES_9)
+unittest_rbd_mirror_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
+	$(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
+	$(unittest_rbd_mirror_CXXFLAGS) $(CXXFLAGS) $(AM_LDFLAGS) \
+	$(LDFLAGS) -o $@
 am__unittest_rbd_replay_SOURCES_DIST = test/test_rbd_replay.cc
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE at am_unittest_rbd_replay_OBJECTS = test/unittest_rbd_replay-test_rbd_replay.$(OBJEXT)
 unittest_rbd_replay_OBJECTS = $(am_unittest_rbd_replay_OBJECTS)
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE at unittest_rbd_replay_DEPENDENCIES =  \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	$(LIBRBD) \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	$(LIBRADOS) \
- at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	$(am__DEPENDENCIES_9) \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	$(am__DEPENDENCIES_10) \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	librbd_replay.la \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	librbd_replay_ios.la \
- at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	$(am__DEPENDENCIES_17)
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	$(am__DEPENDENCIES_19)
 unittest_rbd_replay_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 	$(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
 	$(unittest_rbd_replay_CXXFLAGS) $(CXXFLAGS) $(AM_LDFLAGS) \
@@ -5966,8 +6202,8 @@ unittest_rbd_replay_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 am_unittest_readahead_OBJECTS =  \
 	test/common/unittest_readahead-Readahead.$(OBJEXT)
 unittest_readahead_OBJECTS = $(am_unittest_readahead_OBJECTS)
-unittest_readahead_DEPENDENCIES = $(am__DEPENDENCIES_17) \
-	$(am__DEPENDENCIES_9)
+unittest_readahead_DEPENDENCIES = $(am__DEPENDENCIES_19) \
+	$(am__DEPENDENCIES_10)
 unittest_readahead_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 	$(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
 	$(unittest_readahead_CXXFLAGS) $(CXXFLAGS) $(AM_LDFLAGS) \
@@ -5977,9 +6213,9 @@ am__unittest_rocksdb_option_SOURCES_DIST =  \
 @ENABLE_SERVER_TRUE@@WITH_DLIBROCKSDB_TRUE at am_unittest_rocksdb_option_OBJECTS = test/objectstore/unittest_rocksdb_option-TestRocksdbOptionParse.$(OBJEXT)
 unittest_rocksdb_option_OBJECTS =  \
 	$(am_unittest_rocksdb_option_OBJECTS)
- at ENABLE_SERVER_TRUE@@WITH_DLIBROCKSDB_TRUE at unittest_rocksdb_option_DEPENDENCIES = $(am__DEPENDENCIES_12) \
- at ENABLE_SERVER_TRUE@@WITH_DLIBROCKSDB_TRUE@	$(am__DEPENDENCIES_17) \
- at ENABLE_SERVER_TRUE@@WITH_DLIBROCKSDB_TRUE@	$(am__DEPENDENCIES_9)
+ at ENABLE_SERVER_TRUE@@WITH_DLIBROCKSDB_TRUE at unittest_rocksdb_option_DEPENDENCIES = $(am__DEPENDENCIES_14) \
+ at ENABLE_SERVER_TRUE@@WITH_DLIBROCKSDB_TRUE@	$(am__DEPENDENCIES_19) \
+ at ENABLE_SERVER_TRUE@@WITH_DLIBROCKSDB_TRUE@	$(am__DEPENDENCIES_10)
 unittest_rocksdb_option_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 	$(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
 	$(unittest_rocksdb_option_CXXFLAGS) $(CXXFLAGS) $(AM_LDFLAGS) \
@@ -5989,9 +6225,9 @@ am__unittest_rocksdb_option_static_SOURCES_DIST =  \
 @ENABLE_SERVER_TRUE@@WITH_SLIBROCKSDB_TRUE at am_unittest_rocksdb_option_static_OBJECTS = test/objectstore/unittest_rocksdb_option_static-TestRocksdbOptionParse.$(OBJEXT)
 unittest_rocksdb_option_static_OBJECTS =  \
 	$(am_unittest_rocksdb_option_static_OBJECTS)
- at ENABLE_SERVER_TRUE@@WITH_SLIBROCKSDB_TRUE at unittest_rocksdb_option_static_DEPENDENCIES = $(am__DEPENDENCIES_12) \
- at ENABLE_SERVER_TRUE@@WITH_SLIBROCKSDB_TRUE@	$(am__DEPENDENCIES_17) \
- at ENABLE_SERVER_TRUE@@WITH_SLIBROCKSDB_TRUE@	$(am__DEPENDENCIES_9)
+ at ENABLE_SERVER_TRUE@@WITH_SLIBROCKSDB_TRUE at unittest_rocksdb_option_static_DEPENDENCIES = $(am__DEPENDENCIES_14) \
+ at ENABLE_SERVER_TRUE@@WITH_SLIBROCKSDB_TRUE@	$(am__DEPENDENCIES_19) \
+ at ENABLE_SERVER_TRUE@@WITH_SLIBROCKSDB_TRUE@	$(am__DEPENDENCIES_10)
 unittest_rocksdb_option_static_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 	$(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
 	$(unittest_rocksdb_option_static_CXXFLAGS) $(CXXFLAGS) \
@@ -6000,8 +6236,8 @@ am__unittest_run_cmd_SOURCES_DIST = test/run_cmd.cc
 @ENABLE_CLIENT_TRUE@@WITH_CEPHFS_TRUE@@WITH_RADOS_TRUE at am_unittest_run_cmd_OBJECTS = test/unittest_run_cmd-run_cmd.$(OBJEXT)
 unittest_run_cmd_OBJECTS = $(am_unittest_run_cmd_OBJECTS)
 @ENABLE_CLIENT_TRUE@@WITH_CEPHFS_TRUE@@WITH_RADOS_TRUE at unittest_run_cmd_DEPENDENCIES = $(LIBCEPHFS) \
- at ENABLE_CLIENT_TRUE@@WITH_CEPHFS_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_9) \
- at ENABLE_CLIENT_TRUE@@WITH_CEPHFS_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_17)
+ at ENABLE_CLIENT_TRUE@@WITH_CEPHFS_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_10) \
+ at ENABLE_CLIENT_TRUE@@WITH_CEPHFS_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_19)
 unittest_run_cmd_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 	$(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
 	$(unittest_run_cmd_CXXFLAGS) $(CXXFLAGS) $(AM_LDFLAGS) \
@@ -6009,8 +6245,8 @@ unittest_run_cmd_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 am_unittest_safe_io_OBJECTS =  \
 	test/common/unittest_safe_io-test_safe_io.$(OBJEXT)
 unittest_safe_io_OBJECTS = $(am_unittest_safe_io_OBJECTS)
-unittest_safe_io_DEPENDENCIES = $(am__DEPENDENCIES_17) \
-	$(am__DEPENDENCIES_9)
+unittest_safe_io_DEPENDENCIES = $(am__DEPENDENCIES_19) \
+	$(am__DEPENDENCIES_10)
 unittest_safe_io_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 	$(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
 	$(unittest_safe_io_CXXFLAGS) $(CXXFLAGS) $(AM_LDFLAGS) \
@@ -6018,8 +6254,8 @@ unittest_safe_io_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 am_unittest_shared_cache_OBJECTS =  \
 	test/common/unittest_shared_cache-test_shared_cache.$(OBJEXT)
 unittest_shared_cache_OBJECTS = $(am_unittest_shared_cache_OBJECTS)
-unittest_shared_cache_DEPENDENCIES = $(am__DEPENDENCIES_17) \
-	$(am__DEPENDENCIES_9)
+unittest_shared_cache_DEPENDENCIES = $(am__DEPENDENCIES_19) \
+	$(am__DEPENDENCIES_10)
 unittest_shared_cache_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 	$(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
 	$(unittest_shared_cache_CXXFLAGS) $(CXXFLAGS) $(AM_LDFLAGS) \
@@ -6027,16 +6263,24 @@ unittest_shared_cache_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 am_unittest_sharedptr_registry_OBJECTS = test/common/unittest_sharedptr_registry-test_sharedptr_registry.$(OBJEXT)
 unittest_sharedptr_registry_OBJECTS =  \
 	$(am_unittest_sharedptr_registry_OBJECTS)
-unittest_sharedptr_registry_DEPENDENCIES = $(am__DEPENDENCIES_17) \
-	$(am__DEPENDENCIES_9)
+unittest_sharedptr_registry_DEPENDENCIES = $(am__DEPENDENCIES_19) \
+	$(am__DEPENDENCIES_10)
 unittest_sharedptr_registry_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 	$(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
 	$(unittest_sharedptr_registry_CXXFLAGS) $(CXXFLAGS) \
 	$(AM_LDFLAGS) $(LDFLAGS) -o $@
+am_unittest_shunique_lock_OBJECTS = test/common/unittest_shunique_lock-test_shunique_lock.$(OBJEXT)
+unittest_shunique_lock_OBJECTS = $(am_unittest_shunique_lock_OBJECTS)
+unittest_shunique_lock_DEPENDENCIES = $(am__DEPENDENCIES_19) \
+	$(am__DEPENDENCIES_10) $(am__DEPENDENCIES_3)
+unittest_shunique_lock_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
+	$(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
+	$(unittest_shunique_lock_CXXFLAGS) $(CXXFLAGS) $(AM_LDFLAGS) \
+	$(LDFLAGS) -o $@
 am_unittest_signals_OBJECTS = test/unittest_signals-signals.$(OBJEXT)
 unittest_signals_OBJECTS = $(am_unittest_signals_OBJECTS)
-unittest_signals_DEPENDENCIES = $(am__DEPENDENCIES_17) \
-	$(am__DEPENDENCIES_9)
+unittest_signals_DEPENDENCIES = $(am__DEPENDENCIES_19) \
+	$(am__DEPENDENCIES_10)
 unittest_signals_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 	$(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
 	$(unittest_signals_CXXFLAGS) $(CXXFLAGS) $(AM_LDFLAGS) \
@@ -6045,8 +6289,8 @@ am__unittest_simple_spin_SOURCES_DIST = test/simple_spin.cc
 @ENABLE_CLIENT_TRUE@@WITH_CEPHFS_TRUE@@WITH_RADOS_TRUE at am_unittest_simple_spin_OBJECTS = test/unittest_simple_spin-simple_spin.$(OBJEXT)
 unittest_simple_spin_OBJECTS = $(am_unittest_simple_spin_OBJECTS)
 @ENABLE_CLIENT_TRUE@@WITH_CEPHFS_TRUE@@WITH_RADOS_TRUE at unittest_simple_spin_DEPENDENCIES = $(LIBCEPHFS) \
- at ENABLE_CLIENT_TRUE@@WITH_CEPHFS_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_9) \
- at ENABLE_CLIENT_TRUE@@WITH_CEPHFS_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_17)
+ at ENABLE_CLIENT_TRUE@@WITH_CEPHFS_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_10) \
+ at ENABLE_CLIENT_TRUE@@WITH_CEPHFS_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_19)
 unittest_simple_spin_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 	$(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
 	$(unittest_simple_spin_CXXFLAGS) $(CXXFLAGS) $(AM_LDFLAGS) \
@@ -6054,8 +6298,8 @@ unittest_simple_spin_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 am_unittest_sloppy_crc_map_OBJECTS = test/common/unittest_sloppy_crc_map-test_sloppy_crc_map.$(OBJEXT)
 unittest_sloppy_crc_map_OBJECTS =  \
 	$(am_unittest_sloppy_crc_map_OBJECTS)
-unittest_sloppy_crc_map_DEPENDENCIES = $(am__DEPENDENCIES_17) \
-	$(am__DEPENDENCIES_9)
+unittest_sloppy_crc_map_DEPENDENCIES = $(am__DEPENDENCIES_19) \
+	$(am__DEPENDENCIES_10)
 unittest_sloppy_crc_map_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 	$(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
 	$(unittest_sloppy_crc_map_CXXFLAGS) $(CXXFLAGS) $(AM_LDFLAGS) \
@@ -6063,8 +6307,8 @@ unittest_sloppy_crc_map_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 am_unittest_str_list_OBJECTS =  \
 	test/unittest_str_list-test_str_list.$(OBJEXT)
 unittest_str_list_OBJECTS = $(am_unittest_str_list_OBJECTS)
-unittest_str_list_DEPENDENCIES = $(am__DEPENDENCIES_17) \
-	$(am__DEPENDENCIES_9)
+unittest_str_list_DEPENDENCIES = $(am__DEPENDENCIES_19) \
+	$(am__DEPENDENCIES_10)
 unittest_str_list_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 	$(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
 	$(unittest_str_list_CXXFLAGS) $(CXXFLAGS) $(AM_LDFLAGS) \
@@ -6072,8 +6316,8 @@ unittest_str_list_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 am_unittest_str_map_OBJECTS =  \
 	test/common/unittest_str_map-test_str_map.$(OBJEXT)
 unittest_str_map_OBJECTS = $(am_unittest_str_map_OBJECTS)
-unittest_str_map_DEPENDENCIES = $(am__DEPENDENCIES_17) \
-	$(am__DEPENDENCIES_9)
+unittest_str_map_DEPENDENCIES = $(am__DEPENDENCIES_19) \
+	$(am__DEPENDENCIES_10)
 unittest_str_map_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 	$(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
 	$(unittest_str_map_CXXFLAGS) $(CXXFLAGS) $(AM_LDFLAGS) \
@@ -6081,16 +6325,16 @@ unittest_str_map_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 am_unittest_striper_OBJECTS =  \
 	test/unittest_striper-test_striper.$(OBJEXT)
 unittest_striper_OBJECTS = $(am_unittest_striper_OBJECTS)
-unittest_striper_DEPENDENCIES = $(LIBOSDC) $(am__DEPENDENCIES_17) \
-	$(am__DEPENDENCIES_9)
+unittest_striper_DEPENDENCIES = $(LIBOSDC) $(am__DEPENDENCIES_19) \
+	$(am__DEPENDENCIES_10)
 unittest_striper_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 	$(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
 	$(unittest_striper_CXXFLAGS) $(CXXFLAGS) $(AM_LDFLAGS) \
 	$(LDFLAGS) -o $@
 am_unittest_strtol_OBJECTS = test/unittest_strtol-strtol.$(OBJEXT)
 unittest_strtol_OBJECTS = $(am_unittest_strtol_OBJECTS)
-unittest_strtol_DEPENDENCIES = $(am__DEPENDENCIES_17) \
-	$(am__DEPENDENCIES_9)
+unittest_strtol_DEPENDENCIES = $(am__DEPENDENCIES_19) \
+	$(am__DEPENDENCIES_10)
 unittest_strtol_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 	$(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
 	$(unittest_strtol_CXXFLAGS) $(CXXFLAGS) $(AM_LDFLAGS) \
@@ -6098,7 +6342,8 @@ unittest_strtol_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 am_unittest_subprocess_OBJECTS =  \
 	test/unittest_subprocess-test_subprocess.$(OBJEXT)
 unittest_subprocess_OBJECTS = $(am_unittest_subprocess_OBJECTS)
-unittest_subprocess_DEPENDENCIES = $(LIBCOMMON) $(am__DEPENDENCIES_17)
+unittest_subprocess_DEPENDENCIES = $(am__DEPENDENCIES_4) \
+	$(am__DEPENDENCIES_19)
 unittest_subprocess_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 	$(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
 	$(unittest_subprocess_CXXFLAGS) $(CXXFLAGS) $(AM_LDFLAGS) \
@@ -6106,8 +6351,8 @@ unittest_subprocess_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 am_unittest_tableformatter_OBJECTS = test/common/unittest_tableformatter-test_tableformatter.$(OBJEXT)
 unittest_tableformatter_OBJECTS =  \
 	$(am_unittest_tableformatter_OBJECTS)
-unittest_tableformatter_DEPENDENCIES = $(am__DEPENDENCIES_17) \
-	$(am__DEPENDENCIES_9)
+unittest_tableformatter_DEPENDENCIES = $(am__DEPENDENCIES_19) \
+	$(am__DEPENDENCIES_10)
 unittest_tableformatter_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 	$(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
 	$(unittest_tableformatter_CXXFLAGS) $(CXXFLAGS) $(AM_LDFLAGS) \
@@ -6115,7 +6360,8 @@ unittest_tableformatter_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 am_unittest_texttable_OBJECTS =  \
 	test/unittest_texttable-test_texttable.$(OBJEXT)
 unittest_texttable_OBJECTS = $(am_unittest_texttable_OBJECTS)
-unittest_texttable_DEPENDENCIES = $(LIBCOMMON) $(am__DEPENDENCIES_17)
+unittest_texttable_DEPENDENCIES = $(am__DEPENDENCIES_4) \
+	$(am__DEPENDENCIES_19)
 unittest_texttable_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 	$(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
 	$(unittest_texttable_CXXFLAGS) $(CXXFLAGS) $(AM_LDFLAGS) \
@@ -6123,8 +6369,8 @@ unittest_texttable_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 am_unittest_throttle_OBJECTS =  \
 	test/common/unittest_throttle-Throttle.$(OBJEXT)
 unittest_throttle_OBJECTS = $(am_unittest_throttle_OBJECTS)
-unittest_throttle_DEPENDENCIES = $(am__DEPENDENCIES_17) \
-	$(am__DEPENDENCIES_9)
+unittest_throttle_DEPENDENCIES = $(am__DEPENDENCIES_19) \
+	$(am__DEPENDENCIES_10)
 unittest_throttle_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 	$(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
 	$(unittest_throttle_CXXFLAGS) $(CXXFLAGS) $(AM_LDFLAGS) \
@@ -6132,16 +6378,29 @@ unittest_throttle_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 am_unittest_time_OBJECTS =  \
 	test/common/unittest_time-test_time.$(OBJEXT)
 unittest_time_OBJECTS = $(am_unittest_time_OBJECTS)
-unittest_time_DEPENDENCIES = $(LIBCOMMON) $(am__DEPENDENCIES_17) \
-	$(am__DEPENDENCIES_1) $(am__DEPENDENCIES_3)
+unittest_time_DEPENDENCIES = $(am__DEPENDENCIES_4) \
+	$(am__DEPENDENCIES_19) $(am__DEPENDENCIES_1) \
+	$(am__DEPENDENCIES_3)
 unittest_time_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 	$(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
 	$(unittest_time_CXXFLAGS) $(CXXFLAGS) $(AM_LDFLAGS) $(LDFLAGS) \
 	-o $@
+am__unittest_transaction_SOURCES_DIST =  \
+	test/objectstore/test_transaction.cc
+ at ENABLE_SERVER_TRUE@am_unittest_transaction_OBJECTS = test/objectstore/unittest_transaction-test_transaction.$(OBJEXT)
+unittest_transaction_OBJECTS = $(am_unittest_transaction_OBJECTS)
+ at ENABLE_SERVER_TRUE@unittest_transaction_DEPENDENCIES =  \
+ at ENABLE_SERVER_TRUE@	$(am__DEPENDENCIES_14) \
+ at ENABLE_SERVER_TRUE@	$(am__DEPENDENCIES_19) \
+ at ENABLE_SERVER_TRUE@	$(am__DEPENDENCIES_10)
+unittest_transaction_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
+	$(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
+	$(unittest_transaction_CXXFLAGS) $(CXXFLAGS) $(AM_LDFLAGS) \
+	$(LDFLAGS) -o $@
 am_unittest_utf8_OBJECTS = test/unittest_utf8-utf8.$(OBJEXT)
 unittest_utf8_OBJECTS = $(am_unittest_utf8_OBJECTS)
-unittest_utf8_DEPENDENCIES = $(am__DEPENDENCIES_17) \
-	$(am__DEPENDENCIES_9)
+unittest_utf8_DEPENDENCIES = $(am__DEPENDENCIES_19) \
+	$(am__DEPENDENCIES_10)
 unittest_utf8_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 	$(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
 	$(unittest_utf8_CXXFLAGS) $(CXXFLAGS) $(AM_LDFLAGS) $(LDFLAGS) \
@@ -6149,24 +6408,35 @@ unittest_utf8_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 am_unittest_util_OBJECTS =  \
 	test/common/unittest_util-test_util.$(OBJEXT)
 unittest_util_OBJECTS = $(am_unittest_util_OBJECTS)
-unittest_util_DEPENDENCIES = $(LIBCOMMON) $(am__DEPENDENCIES_17) \
-	$(am__DEPENDENCIES_1) $(am__DEPENDENCIES_3)
+unittest_util_DEPENDENCIES = $(am__DEPENDENCIES_4) \
+	$(am__DEPENDENCIES_19) $(am__DEPENDENCIES_1) \
+	$(am__DEPENDENCIES_3)
 unittest_util_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 	$(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
 	$(unittest_util_CXXFLAGS) $(CXXFLAGS) $(AM_LDFLAGS) $(LDFLAGS) \
 	-o $@
+am_unittest_weighted_priority_queue_OBJECTS = test/common/unittest_weighted_priority_queue-test_weighted_priority_queue.$(OBJEXT)
+unittest_weighted_priority_queue_OBJECTS =  \
+	$(am_unittest_weighted_priority_queue_OBJECTS)
+unittest_weighted_priority_queue_DEPENDENCIES =  \
+	$(am__DEPENDENCIES_19) $(am__DEPENDENCIES_10)
+unittest_weighted_priority_queue_LINK = $(LIBTOOL) $(AM_V_lt) \
+	--tag=CXX $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link \
+	$(CXXLD) $(unittest_weighted_priority_queue_CXXFLAGS) \
+	$(CXXFLAGS) $(AM_LDFLAGS) $(LDFLAGS) -o $@
 am_unittest_workqueue_OBJECTS =  \
 	test/unittest_workqueue-test_workqueue.$(OBJEXT)
 unittest_workqueue_OBJECTS = $(am_unittest_workqueue_OBJECTS)
-unittest_workqueue_DEPENDENCIES = $(am__DEPENDENCIES_17) \
-	$(am__DEPENDENCIES_9)
+unittest_workqueue_DEPENDENCIES = $(am__DEPENDENCIES_19) \
+	$(am__DEPENDENCIES_10)
 unittest_workqueue_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 	$(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
 	$(unittest_workqueue_CXXFLAGS) $(CXXFLAGS) $(AM_LDFLAGS) \
 	$(LDFLAGS) -o $@
 am_unittest_xlist_OBJECTS = test/unittest_xlist-test_xlist.$(OBJEXT)
 unittest_xlist_OBJECTS = $(am_unittest_xlist_OBJECTS)
-unittest_xlist_DEPENDENCIES = $(am__DEPENDENCIES_17) $(LIBCOMMON)
+unittest_xlist_DEPENDENCIES = $(am__DEPENDENCIES_19) \
+	$(am__DEPENDENCIES_4)
 unittest_xlist_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 	$(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
 	$(unittest_xlist_CXXFLAGS) $(CXXFLAGS) $(AM_LDFLAGS) \
@@ -6177,9 +6447,9 @@ am__xio_client_SOURCES_DIST = test/messenger/xio_client.cc \
 @ENABLE_SERVER_TRUE@@ENABLE_XIO_TRUE@	test/messenger/xio_client-xio_dispatcher.$(OBJEXT)
 xio_client_OBJECTS = $(am_xio_client_OBJECTS)
 @ENABLE_SERVER_TRUE@@ENABLE_XIO_TRUE at xio_client_DEPENDENCIES =  \
- at ENABLE_SERVER_TRUE@@ENABLE_XIO_TRUE@	$(am__DEPENDENCIES_12) \
- at ENABLE_SERVER_TRUE@@ENABLE_XIO_TRUE@	$(am__DEPENDENCIES_9) \
- at ENABLE_SERVER_TRUE@@ENABLE_XIO_TRUE@	$(LIBCOMMON) \
+ at ENABLE_SERVER_TRUE@@ENABLE_XIO_TRUE@	$(am__DEPENDENCIES_14) \
+ at ENABLE_SERVER_TRUE@@ENABLE_XIO_TRUE@	$(am__DEPENDENCIES_10) \
+ at ENABLE_SERVER_TRUE@@ENABLE_XIO_TRUE@	$(am__DEPENDENCIES_4) \
 @ENABLE_SERVER_TRUE@@ENABLE_XIO_TRUE@	$(am__DEPENDENCIES_1) \
 @ENABLE_SERVER_TRUE@@ENABLE_XIO_TRUE@	$(am__DEPENDENCIES_3) \
 @ENABLE_SERVER_TRUE@@ENABLE_XIO_TRUE@	$(am__DEPENDENCIES_1)
@@ -6192,9 +6462,9 @@ am__xio_server_SOURCES_DIST = test/messenger/xio_server.cc \
 @ENABLE_SERVER_TRUE@@ENABLE_XIO_TRUE@	test/messenger/xio_server-xio_dispatcher.$(OBJEXT)
 xio_server_OBJECTS = $(am_xio_server_OBJECTS)
 @ENABLE_SERVER_TRUE@@ENABLE_XIO_TRUE at xio_server_DEPENDENCIES =  \
- at ENABLE_SERVER_TRUE@@ENABLE_XIO_TRUE@	$(am__DEPENDENCIES_12) \
- at ENABLE_SERVER_TRUE@@ENABLE_XIO_TRUE@	$(am__DEPENDENCIES_9) \
- at ENABLE_SERVER_TRUE@@ENABLE_XIO_TRUE@	$(LIBCOMMON) \
+ at ENABLE_SERVER_TRUE@@ENABLE_XIO_TRUE@	$(am__DEPENDENCIES_14) \
+ at ENABLE_SERVER_TRUE@@ENABLE_XIO_TRUE@	$(am__DEPENDENCIES_10) \
+ at ENABLE_SERVER_TRUE@@ENABLE_XIO_TRUE@	$(am__DEPENDENCIES_4) \
 @ENABLE_SERVER_TRUE@@ENABLE_XIO_TRUE@	$(am__DEPENDENCIES_1) \
 @ENABLE_SERVER_TRUE@@ENABLE_XIO_TRUE@	$(am__DEPENDENCIES_3) \
 @ENABLE_SERVER_TRUE@@ENABLE_XIO_TRUE@	$(am__DEPENDENCIES_1)
@@ -6203,8 +6473,7 @@ xio_server_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX $(AM_LIBTOOLFLAGS) \
 	$(CXXFLAGS) $(AM_LDFLAGS) $(LDFLAGS) -o $@
 SCRIPTS = $(bin_SCRIPTS) $(ceph_libexec_SCRIPTS) \
 	$(ceph_monstore_update_crush_SCRIPTS) $(ceph_sbin_SCRIPTS) \
-	$(dist_bin_SCRIPTS) $(sbin_SCRIPTS) $(shell_common_SCRIPTS) \
-	$(su_sbin_SCRIPTS)
+	$(dist_bin_SCRIPTS) $(sbin_SCRIPTS) $(su_sbin_SCRIPTS)
 AM_V_P = $(am__v_P_ at AM_V@)
 am__v_P_ = $(am__v_P_ at AM_DEFAULT_V@)
 am__v_P_0 = false
@@ -6285,11 +6554,12 @@ SOURCES = $(libcls_log_client_a_SOURCES) \
 	$(libos_types_a_SOURCES) $(libos_zfs_a_SOURCES) \
 	$(libosd_a_SOURCES) $(libarch_la_SOURCES) \
 	$(libauth_la_SOURCES) $(libceph_example_la_SOURCES) \
-	$(libceph_snappy_la_SOURCES) $(libcephfs_la_SOURCES) \
-	$(libcephfs_jni_la_SOURCES) $(libcivetweb_la_SOURCES) \
-	$(libclient_la_SOURCES) $(libclient_fuse_la_SOURCES) \
-	$(libcls_cephfs_la_SOURCES) $(libcls_cephfs_client_la_SOURCES) \
-	$(libcls_hello_la_SOURCES) $(libcls_journal_la_SOURCES) \
+	$(libceph_snappy_la_SOURCES) $(libceph_zlib_la_SOURCES) \
+	$(libcephfs_la_SOURCES) $(libcephfs_jni_la_SOURCES) \
+	$(libcivetweb_la_SOURCES) $(libclient_la_SOURCES) \
+	$(libclient_fuse_la_SOURCES) $(libcls_cephfs_la_SOURCES) \
+	$(libcls_cephfs_client_la_SOURCES) $(libcls_hello_la_SOURCES) \
+	$(libcls_journal_la_SOURCES) \
 	$(libcls_journal_client_la_SOURCES) $(libcls_kvs_la_SOURCES) \
 	$(libcls_lock_la_SOURCES) $(libcls_lock_client_la_SOURCES) \
 	$(libcls_log_la_SOURCES) $(libcls_numops_la_SOURCES) \
@@ -6324,20 +6594,22 @@ SOURCES = $(libcls_log_client_a_SOURCES) \
 	$(libec_test_shec_sse3_la_SOURCES) \
 	$(libec_test_shec_sse4_la_SOURCES) \
 	$(liberasure_code_la_SOURCES) $(libglobal_la_SOURCES) \
-	$(libjournal_la_SOURCES) $(libjson_spirit_la_SOURCES) \
-	$(libkrbd_la_SOURCES) $(liblog_la_SOURCES) \
-	$(libmds_la_SOURCES) $(libmon_types_la_SOURCES) \
-	$(libmsg_la_SOURCES) $(libos_tp_la_SOURCES) \
-	$(nodist_libos_tp_la_SOURCES) $(libosd_tp_la_SOURCES) \
-	$(nodist_libosd_tp_la_SOURCES) $(libosd_types_la_SOURCES) \
-	$(libosdc_la_SOURCES) $(libperfglue_la_SOURCES) \
-	$(librados_la_SOURCES) $(librados_api_la_SOURCES) \
-	$(librados_internal_la_SOURCES) \
+	$(libisa_la_SOURCES) $(libjournal_la_SOURCES) \
+	$(libjson_spirit_la_SOURCES) $(libkrbd_la_SOURCES) \
+	$(liblog_la_SOURCES) $(libmds_la_SOURCES) \
+	$(libmon_types_la_SOURCES) $(libmsg_la_SOURCES) \
+	$(libos_tp_la_SOURCES) $(nodist_libos_tp_la_SOURCES) \
+	$(libosd_tp_la_SOURCES) $(nodist_libosd_tp_la_SOURCES) \
+	$(libosd_types_la_SOURCES) $(libosdc_la_SOURCES) \
+	$(libperfglue_la_SOURCES) $(librados_la_SOURCES) \
+	$(librados_api_la_SOURCES) $(librados_internal_la_SOURCES) \
 	$(librados_test_stub_la_SOURCES) $(librados_tp_la_SOURCES) \
 	$(nodist_librados_tp_la_SOURCES) $(libradosstriper_la_SOURCES) \
 	$(libradosstripertest_la_SOURCES) $(libradostest_la_SOURCES) \
 	$(librbd_la_SOURCES) $(librbd_api_la_SOURCES) \
-	$(librbd_internal_la_SOURCES) $(librbd_replay_la_SOURCES) \
+	$(librbd_internal_la_SOURCES) \
+	$(librbd_mirror_internal_la_SOURCES) \
+	$(librbd_mirror_test_la_SOURCES) $(librbd_replay_la_SOURCES) \
 	$(librbd_replay_ios_la_SOURCES) \
 	$(librbd_replay_types_la_SOURCES) $(librbd_test_la_SOURCES) \
 	$(librbd_tp_la_SOURCES) $(nodist_librbd_tp_la_SOURCES) \
@@ -6416,6 +6688,7 @@ SOURCES = $(libcls_log_client_a_SOURCES) \
 	$(ceph_test_rados_striper_api_io_SOURCES) \
 	$(ceph_test_rados_striper_api_striping_SOURCES) \
 	$(ceph_test_rados_watch_notify_SOURCES) \
+	$(ceph_test_rbd_mirror_SOURCES) \
 	$(ceph_test_rewrite_latency_SOURCES) \
 	$(ceph_test_rgw_manifest_SOURCES) $(ceph_test_rgw_obj_SOURCES) \
 	$(ceph_test_signal_handlers_SOURCES) \
@@ -6429,10 +6702,10 @@ SOURCES = $(libcls_log_client_a_SOURCES) \
 	$(monmaptool_SOURCES) $(mount_ceph_SOURCES) \
 	$(osdmaptool_SOURCES) $(rados_SOURCES) $(radosgw_SOURCES) \
 	$(radosgw_admin_SOURCES) $(radosgw_object_expirer_SOURCES) \
-	$(rbd_SOURCES) $(rbd_fuse_SOURCES) $(rbd_nbd_SOURCES) \
-	$(rbd_replay_SOURCES) $(rbd_replay_prep_SOURCES) \
-	$(simple_client_SOURCES) $(simple_server_SOURCES) \
-	$(test_build_libcephfs_SOURCES) \
+	$(rbd_SOURCES) $(rbd_fuse_SOURCES) $(rbd_mirror_SOURCES) \
+	$(rbd_nbd_SOURCES) $(rbd_replay_SOURCES) \
+	$(rbd_replay_prep_SOURCES) $(simple_client_SOURCES) \
+	$(simple_server_SOURCES) $(test_build_libcephfs_SOURCES) \
 	$(test_build_libcommon_SOURCES) $(test_build_librados_SOURCES) \
 	$(test_build_librgw_SOURCES) $(unittest_addrs_SOURCES) \
 	$(unittest_admin_socket_SOURCES) $(unittest_arch_SOURCES) \
@@ -6447,7 +6720,9 @@ SOURCES = $(libcls_log_client_a_SOURCES) \
 	$(unittest_chain_xattr_SOURCES) \
 	$(unittest_compression_plugin_SOURCES) \
 	$(unittest_compression_plugin_snappy_SOURCES) \
+	$(unittest_compression_plugin_zlib_SOURCES) \
 	$(unittest_compression_snappy_SOURCES) \
+	$(unittest_compression_zlib_SOURCES) \
 	$(unittest_config_SOURCES) $(unittest_confutils_SOURCES) \
 	$(unittest_context_SOURCES) $(unittest_crc32c_SOURCES) \
 	$(unittest_crush_SOURCES) $(unittest_crush_wrapper_SOURCES) \
@@ -6471,6 +6746,7 @@ SOURCES = $(libcls_log_client_a_SOURCES) \
 	$(unittest_escape_SOURCES) $(unittest_formatter_SOURCES) \
 	$(unittest_gather_SOURCES) $(unittest_heartbeatmap_SOURCES) \
 	$(unittest_histogram_SOURCES) $(unittest_hitset_SOURCES) \
+	$(unittest_interval_set_SOURCES) \
 	$(unittest_io_priority_SOURCES) $(unittest_ipaddr_SOURCES) \
 	$(unittest_journal_SOURCES) $(unittest_lfnindex_SOURCES) \
 	$(unittest_libcephfs_config_SOURCES) \
@@ -6479,30 +6755,33 @@ SOURCES = $(libcls_log_client_a_SOURCES) \
 	$(unittest_log_SOURCES) $(unittest_lru_SOURCES) \
 	$(unittest_mds_authcap_SOURCES) $(unittest_mds_types_SOURCES) \
 	$(unittest_mime_SOURCES) $(unittest_mon_moncap_SOURCES) \
-	$(unittest_mon_pgmap_SOURCES) $(unittest_on_exit_SOURCES) \
-	$(unittest_osd_osdcap_SOURCES) $(unittest_osd_types_SOURCES) \
-	$(unittest_osdmap_SOURCES) $(unittest_osdscrub_SOURCES) \
-	$(unittest_pageset_SOURCES) $(unittest_perf_counters_SOURCES) \
-	$(unittest_pglog_SOURCES) \
+	$(unittest_mon_pgmap_SOURCES) $(unittest_mutex_debug_SOURCES) \
+	$(unittest_on_exit_SOURCES) $(unittest_osd_osdcap_SOURCES) \
+	$(unittest_osd_types_SOURCES) $(unittest_osdmap_SOURCES) \
+	$(unittest_osdscrub_SOURCES) $(unittest_pageset_SOURCES) \
+	$(unittest_perf_counters_SOURCES) $(unittest_pglog_SOURCES) \
 	$(unittest_prebufferedstreambuf_SOURCES) \
 	$(unittest_prioritized_queue_SOURCES) \
-	$(unittest_rbd_replay_SOURCES) $(unittest_readahead_SOURCES) \
+	$(unittest_rbd_mirror_SOURCES) $(unittest_rbd_replay_SOURCES) \
+	$(unittest_readahead_SOURCES) \
 	$(unittest_rocksdb_option_SOURCES) \
 	$(unittest_rocksdb_option_static_SOURCES) \
 	$(unittest_run_cmd_SOURCES) $(unittest_safe_io_SOURCES) \
 	$(unittest_shared_cache_SOURCES) \
 	$(unittest_sharedptr_registry_SOURCES) \
-	$(unittest_signals_SOURCES) $(unittest_simple_spin_SOURCES) \
+	$(unittest_shunique_lock_SOURCES) $(unittest_signals_SOURCES) \
+	$(unittest_simple_spin_SOURCES) \
 	$(unittest_sloppy_crc_map_SOURCES) \
 	$(unittest_str_list_SOURCES) $(unittest_str_map_SOURCES) \
 	$(unittest_striper_SOURCES) $(unittest_strtol_SOURCES) \
 	$(unittest_subprocess_SOURCES) \
 	$(unittest_tableformatter_SOURCES) \
 	$(unittest_texttable_SOURCES) $(unittest_throttle_SOURCES) \
-	$(unittest_time_SOURCES) $(unittest_utf8_SOURCES) \
-	$(unittest_util_SOURCES) $(unittest_workqueue_SOURCES) \
-	$(unittest_xlist_SOURCES) $(xio_client_SOURCES) \
-	$(xio_server_SOURCES)
+	$(unittest_time_SOURCES) $(unittest_transaction_SOURCES) \
+	$(unittest_utf8_SOURCES) $(unittest_util_SOURCES) \
+	$(unittest_weighted_priority_queue_SOURCES) \
+	$(unittest_workqueue_SOURCES) $(unittest_xlist_SOURCES) \
+	$(xio_client_SOURCES) $(xio_server_SOURCES)
 DIST_SOURCES = $(am__libcls_log_client_a_SOURCES_DIST) \
 	$(am__libcls_replica_log_client_a_SOURCES_DIST) \
 	$(am__libcls_statelog_client_a_SOURCES_DIST) \
@@ -6514,7 +6793,8 @@ DIST_SOURCES = $(am__libcls_log_client_a_SOURCES_DIST) \
 	$(am__libos_zfs_a_SOURCES_DIST) $(am__libosd_a_SOURCES_DIST) \
 	$(libarch_la_SOURCES) $(libauth_la_SOURCES) \
 	$(am__libceph_example_la_SOURCES_DIST) \
-	$(libceph_snappy_la_SOURCES) $(am__libcephfs_la_SOURCES_DIST) \
+	$(libceph_snappy_la_SOURCES) $(libceph_zlib_la_SOURCES) \
+	$(am__libcephfs_la_SOURCES_DIST) \
 	$(am__libcephfs_jni_la_SOURCES_DIST) \
 	$(am__libcivetweb_la_SOURCES_DIST) \
 	$(am__libclient_la_SOURCES_DIST) \
@@ -6548,8 +6828,8 @@ DIST_SOURCES = $(am__libcls_log_client_a_SOURCES_DIST) \
 	$(am__libec_example_la_SOURCES_DIST) \
 	$(am__libec_fail_to_initialize_la_SOURCES_DIST) \
 	$(am__libec_fail_to_register_la_SOURCES_DIST) \
-	$(am__libec_hangs_la_SOURCES_DIST) \
-	$(am__libec_isa_la_SOURCES_DIST) $(libec_jerasure_la_SOURCES) \
+	$(am__libec_hangs_la_SOURCES_DIST) $(libec_isa_la_SOURCES) \
+	$(libec_jerasure_la_SOURCES) \
 	$(libec_jerasure_generic_la_SOURCES) \
 	$(libec_jerasure_neon_la_SOURCES) \
 	$(libec_jerasure_sse3_la_SOURCES) \
@@ -6568,6 +6848,7 @@ DIST_SOURCES = $(am__libcls_log_client_a_SOURCES_DIST) \
 	$(am__libec_test_shec_sse3_la_SOURCES_DIST) \
 	$(am__libec_test_shec_sse4_la_SOURCES_DIST) \
 	$(liberasure_code_la_SOURCES) $(libglobal_la_SOURCES) \
+	$(am__libisa_la_SOURCES_DIST) \
 	$(am__libjournal_la_SOURCES_DIST) $(libjson_spirit_la_SOURCES) \
 	$(am__libkrbd_la_SOURCES_DIST) $(liblog_la_SOURCES) \
 	$(am__libmds_la_SOURCES_DIST) $(libmon_types_la_SOURCES) \
@@ -6585,6 +6866,8 @@ DIST_SOURCES = $(am__libcls_log_client_a_SOURCES_DIST) \
 	$(am__librbd_la_SOURCES_DIST) \
 	$(am__librbd_api_la_SOURCES_DIST) \
 	$(am__librbd_internal_la_SOURCES_DIST) \
+	$(am__librbd_mirror_internal_la_SOURCES_DIST) \
+	$(am__librbd_mirror_test_la_SOURCES_DIST) \
 	$(am__librbd_replay_la_SOURCES_DIST) \
 	$(am__librbd_replay_ios_la_SOURCES_DIST) \
 	$(am__librbd_replay_types_la_SOURCES_DIST) \
@@ -6686,6 +6969,7 @@ DIST_SOURCES = $(am__libcls_log_client_a_SOURCES_DIST) \
 	$(am__ceph_test_rados_striper_api_io_SOURCES_DIST) \
 	$(am__ceph_test_rados_striper_api_striping_SOURCES_DIST) \
 	$(am__ceph_test_rados_watch_notify_SOURCES_DIST) \
+	$(am__ceph_test_rbd_mirror_SOURCES_DIST) \
 	$(ceph_test_rewrite_latency_SOURCES) \
 	$(am__ceph_test_rgw_manifest_SOURCES_DIST) \
 	$(am__ceph_test_rgw_obj_SOURCES_DIST) \
@@ -6707,7 +6991,8 @@ DIST_SOURCES = $(am__libcls_log_client_a_SOURCES_DIST) \
 	$(am__radosgw_admin_SOURCES_DIST) \
 	$(am__radosgw_object_expirer_SOURCES_DIST) \
 	$(am__rbd_SOURCES_DIST) $(am__rbd_fuse_SOURCES_DIST) \
-	$(am__rbd_nbd_SOURCES_DIST) $(am__rbd_replay_SOURCES_DIST) \
+	$(am__rbd_mirror_SOURCES_DIST) $(am__rbd_nbd_SOURCES_DIST) \
+	$(am__rbd_replay_SOURCES_DIST) \
 	$(am__rbd_replay_prep_SOURCES_DIST) \
 	$(am__simple_client_SOURCES_DIST) \
 	$(am__simple_server_SOURCES_DIST) \
@@ -6729,7 +7014,9 @@ DIST_SOURCES = $(am__libcls_log_client_a_SOURCES_DIST) \
 	$(am__unittest_chain_xattr_SOURCES_DIST) \
 	$(am__unittest_compression_plugin_SOURCES_DIST) \
 	$(am__unittest_compression_plugin_snappy_SOURCES_DIST) \
+	$(am__unittest_compression_plugin_zlib_SOURCES_DIST) \
 	$(am__unittest_compression_snappy_SOURCES_DIST) \
+	$(am__unittest_compression_zlib_SOURCES_DIST) \
 	$(unittest_config_SOURCES) $(unittest_confutils_SOURCES) \
 	$(unittest_context_SOURCES) $(unittest_crc32c_SOURCES) \
 	$(unittest_crush_SOURCES) $(unittest_crush_wrapper_SOURCES) \
@@ -6755,6 +7042,7 @@ DIST_SOURCES = $(am__libcls_log_client_a_SOURCES_DIST) \
 	$(unittest_gather_SOURCES) $(unittest_heartbeatmap_SOURCES) \
 	$(unittest_histogram_SOURCES) \
 	$(am__unittest_hitset_SOURCES_DIST) \
+	$(unittest_interval_set_SOURCES) \
 	$(unittest_io_priority_SOURCES) $(unittest_ipaddr_SOURCES) \
 	$(am__unittest_journal_SOURCES_DIST) \
 	$(am__unittest_lfnindex_SOURCES_DIST) \
@@ -6767,7 +7055,7 @@ DIST_SOURCES = $(am__libcls_log_client_a_SOURCES_DIST) \
 	$(unittest_mds_types_SOURCES) $(unittest_mime_SOURCES) \
 	$(am__unittest_mon_moncap_SOURCES_DIST) \
 	$(am__unittest_mon_pgmap_SOURCES_DIST) \
-	$(unittest_on_exit_SOURCES) \
+	$(unittest_mutex_debug_SOURCES) $(unittest_on_exit_SOURCES) \
 	$(am__unittest_osd_osdcap_SOURCES_DIST) \
 	$(unittest_osd_types_SOURCES) $(unittest_osdmap_SOURCES) \
 	$(am__unittest_osdscrub_SOURCES_DIST) \
@@ -6776,6 +7064,7 @@ DIST_SOURCES = $(am__libcls_log_client_a_SOURCES_DIST) \
 	$(am__unittest_pglog_SOURCES_DIST) \
 	$(unittest_prebufferedstreambuf_SOURCES) \
 	$(unittest_prioritized_queue_SOURCES) \
+	$(am__unittest_rbd_mirror_SOURCES_DIST) \
 	$(am__unittest_rbd_replay_SOURCES_DIST) \
 	$(unittest_readahead_SOURCES) \
 	$(am__unittest_rocksdb_option_SOURCES_DIST) \
@@ -6783,7 +7072,7 @@ DIST_SOURCES = $(am__libcls_log_client_a_SOURCES_DIST) \
 	$(am__unittest_run_cmd_SOURCES_DIST) \
 	$(unittest_safe_io_SOURCES) $(unittest_shared_cache_SOURCES) \
 	$(unittest_sharedptr_registry_SOURCES) \
-	$(unittest_signals_SOURCES) \
+	$(unittest_shunique_lock_SOURCES) $(unittest_signals_SOURCES) \
 	$(am__unittest_simple_spin_SOURCES_DIST) \
 	$(unittest_sloppy_crc_map_SOURCES) \
 	$(unittest_str_list_SOURCES) $(unittest_str_map_SOURCES) \
@@ -6791,10 +7080,12 @@ DIST_SOURCES = $(am__libcls_log_client_a_SOURCES_DIST) \
 	$(unittest_subprocess_SOURCES) \
 	$(unittest_tableformatter_SOURCES) \
 	$(unittest_texttable_SOURCES) $(unittest_throttle_SOURCES) \
-	$(unittest_time_SOURCES) $(unittest_utf8_SOURCES) \
-	$(unittest_util_SOURCES) $(unittest_workqueue_SOURCES) \
-	$(unittest_xlist_SOURCES) $(am__xio_client_SOURCES_DIST) \
-	$(am__xio_server_SOURCES_DIST)
+	$(unittest_time_SOURCES) \
+	$(am__unittest_transaction_SOURCES_DIST) \
+	$(unittest_utf8_SOURCES) $(unittest_util_SOURCES) \
+	$(unittest_weighted_priority_queue_SOURCES) \
+	$(unittest_workqueue_SOURCES) $(unittest_xlist_SOURCES) \
+	$(am__xio_client_SOURCES_DIST) $(am__xio_server_SOURCES_DIST)
 RECURSIVE_TARGETS = all-recursive check-recursive cscopelist-recursive \
 	ctags-recursive dvi-recursive html-recursive info-recursive \
 	install-data-recursive install-dvi-recursive \
@@ -6881,21 +7172,21 @@ am__noinst_HEADERS_DIST = arch/intel.h arch/arm.h arch/probe.h \
 	os/filestore/LFNIndex.h os/filestore/SequencerPosition.h \
 	os/filestore/WBThrottle.h os/filestore/XfsFileStoreBackend.h \
 	os/filestore/ZFSFileStoreBackend.h os/fs/btrfs_ioctl.h \
-	os/fs/FS.h os/fs/XFS.h os/keyvaluestore/GenericObjectMap.h \
-	os/keyvaluestore/KeyValueStore.h os/kstore/kstore_types.h \
+	os/fs/FS.h os/fs/XFS.h os/kstore/kstore_types.h \
 	os/kstore/KStore.h os/kstore/kv.h os/memstore/MemStore.h \
-	os/memstore/PageSet.h os/ObjectMap.h os/ObjectStore.h \
-	os/bluestore/bluefs_types.h os/bluestore/bluestore_types.h \
-	os/bluestore/kv.h os/bluestore/Allocator.h \
-	os/bluestore/BlockDevice.h os/bluestore/BlueFS.h \
-	os/bluestore/BlueRocksEnv.h os/bluestore/BlueStore.h \
+	os/memstore/PageSet.h os/FuseStore.h os/ObjectMap.h \
+	os/ObjectStore.h os/bluestore/bluefs_types.h \
+	os/bluestore/bluestore_types.h os/bluestore/kv.h \
+	os/bluestore/Allocator.h os/bluestore/BlockDevice.h \
+	os/bluestore/BlueFS.h os/bluestore/BlueRocksEnv.h \
+	os/bluestore/BlueStore.h os/bluestore/KernelDevice.h \
 	os/bluestore/FreelistManager.h os/bluestore/StupidAllocator.h \
-	os/fs/ZFS.h osd/ClassHandler.h osd/HitSet.h osd/OSD.h \
-	osd/OSDCap.h osd/OSDMap.h osd/ObjectVersioner.h \
-	osd/OpRequest.h osd/SnapMapper.h osd/PG.h osd/PGLog.h \
-	osd/ReplicatedPG.h osd/PGBackend.h osd/ReplicatedBackend.h \
-	osd/TierAgentState.h osd/ECBackend.h osd/ECUtil.h \
-	osd/ECMsgTypes.h osd/ECTransaction.h osd/Watch.h \
+	os/fs/ZFS.h os/bluestore/NVMEDevice.h osd/ClassHandler.h \
+	osd/HitSet.h osd/OSD.h osd/OSDCap.h osd/OSDMap.h \
+	osd/ObjectVersioner.h osd/OpRequest.h osd/SnapMapper.h \
+	osd/PG.h osd/PGLog.h osd/ReplicatedPG.h osd/PGBackend.h \
+	osd/ReplicatedBackend.h osd/TierAgentState.h osd/ECBackend.h \
+	osd/ECUtil.h osd/ECMsgTypes.h osd/ECTransaction.h osd/Watch.h \
 	osd/osd_types.h \
 	erasure-code/jerasure/gf-complete/include/gf_complete.h \
 	erasure-code/jerasure/gf-complete/include/gf_general.h \
@@ -6926,6 +7217,7 @@ am__noinst_HEADERS_DIST = arch/intel.h arch/arm.h arch/probe.h \
 	erasure-code/isa/isa-l/include/types.h \
 	erasure-code/ErasureCode.h erasure-code/ErasureCodeInterface.h \
 	erasure-code/ErasureCodePlugin.h \
+	compressor/zlib/CompressionZlib.h \
 	compressor/snappy/SnappyCompressor.h compressor/Compressor.h \
 	compressor/AsyncCompressor.h compressor/CompressionPlugin.h \
 	osdc/Filer.h osdc/Journaler.h osdc/ObjectCacher.h \
@@ -6953,9 +7245,10 @@ am__noinst_HEADERS_DIST = arch/intel.h arch/arm.h arch/probe.h \
 	common/crc32c_intel_baseline.h common/crc32c_intel_fast.h \
 	common/crc32c_aarch64.h common/BackTrace.h \
 	common/RefCountedObj.h common/HeartbeatMap.h \
-	common/LogClient.h common/LogEntry.h common/Preforker.h \
-	common/SloppyCRCMap.h common/WorkQueue.h \
-	common/PrioritizedQueue.h common/ceph_argparse.h \
+	common/LogClient.h common/LogEntry.h common/Graylog.h \
+	common/Preforker.h common/SloppyCRCMap.h common/WorkQueue.h \
+	common/OpQueue.h common/PrioritizedQueue.h \
+	common/WeightedPriorityQueue.h common/ceph_argparse.h \
 	common/ceph_context.h common/xattr.h common/blkdev.h \
 	common/compiler_extensions.h common/debug.h common/dout.h \
 	common/escape.h common/fd.h common/version.h common/hex.h \
@@ -6963,16 +7256,17 @@ am__noinst_HEADERS_DIST = arch/intel.h arch/arm.h arch/probe.h \
 	common/environment.h common/likely.h common/lockdep.h \
 	common/obj_bencher.h common/snap_types.h common/Clock.h \
 	common/Cond.h common/ConfUtils.h common/DecayCounter.h \
-	common/Finisher.h common/Formatter.h common/perf_counters.h \
-	common/OutputDataSocket.h common/admin_socket.h \
-	common/admin_socket_client.h common/random_cache.hpp \
-	common/shared_cache.hpp common/tracked_int_ptr.hpp \
-	common/simple_cache.hpp common/sharedptr_registry.hpp \
-	common/map_cacher.hpp common/MemoryModel.h common/Mutex.h \
-	common/QueueRing.h common/PrebufferedStreambuf.h \
-	common/RWLock.h common/Semaphore.h common/SimpleRNG.h \
-	common/TextTable.h common/Thread.h common/Throttle.h \
-	common/Timer.h common/TrackedOp.h common/arch.h common/armor.h \
+	common/Finisher.h common/Formatter.h common/HTMLFormatter.h \
+	common/perf_counters.h common/OutputDataSocket.h \
+	common/admin_socket.h common/admin_socket_client.h \
+	common/random_cache.hpp common/shared_cache.hpp \
+	common/tracked_int_ptr.hpp common/simple_cache.hpp \
+	common/sharedptr_registry.hpp common/map_cacher.hpp \
+	common/MemoryModel.h common/Mutex.h common/QueueRing.h \
+	common/PrebufferedStreambuf.h common/RWLock.h \
+	common/Semaphore.h common/SimpleRNG.h common/TextTable.h \
+	common/Thread.h common/Throttle.h common/Timer.h \
+	common/TrackedOp.h common/arch.h common/armor.h \
 	common/common_init.h common/io_priority.h common/pipe.h \
 	common/code_environment.h common/signal.h common/simple_spin.h \
 	common/run_cmd.h common/safe_io.h common/config.h \
@@ -6987,6 +7281,7 @@ am__noinst_HEADERS_DIST = arch/intel.h arch/arm.h arch/probe.h \
 	common/SubProcess.h common/valgrind.h \
 	common/TracepointProvider.h common/event_socket.h \
 	common/PluginRegistry.h common/ceph_time.h common/ceph_timer.h \
+	common/align.h common/mutex_debug.h common/shunique_lock.h \
 	common/address_helper.h common/secret.h msg/Connection.h \
 	msg/Dispatcher.h msg/Message.h msg/Messenger.h \
 	msg/SimplePolicyMessenger.h msg/msg_types.h \
@@ -7060,11 +7355,14 @@ am__noinst_HEADERS_DIST = arch/intel.h arch/arm.h arch/probe.h \
 	include/Context.h include/CompatSet.h include/Distribution.h \
 	include/Spinlock.h include/addr_parsing.h include/assert.h \
 	include/atomic.h include/bitmapper.h include/blobhash.h \
-	include/buffer.h include/buffer_fwd.h include/byteorder.h \
+	include/btree_interval_set.h include/buffer.h \
+	include/buffer_fwd.h include/byteorder.h \
 	include/cephfs/libcephfs.h include/ceph_features.h \
 	include/ceph_frag.h include/ceph_fs.h include/ceph_hash.h \
 	include/cmp.h include/color.h include/compat.h \
-	include/sock_compat.h include/crc32c.h include/encoding.h \
+	include/cpp-btree/btree.h include/cpp-btree/btree_container.h \
+	include/cpp-btree/btree_map.h include/sock_compat.h \
+	include/crc32c.h include/encoding.h include/encoding_btree.h \
 	include/err.h include/error.h include/filepath.h \
 	include/frag.h include/hash.h include/inline_memory.h \
 	include/intarith.h include/interval_set.h include/int_types.h \
@@ -7114,7 +7412,7 @@ am__noinst_HEADERS_DIST = arch/intel.h arch/arm.h arch/probe.h \
 	librbd/image/CloseRequest.h librbd/image/OpenRequest.h \
 	librbd/image/RefreshParentRequest.h \
 	librbd/image/RefreshRequest.h librbd/image/SetSnapRequest.h \
-	librbd/journal/Replay.h librbd/journal/Entries.h \
+	librbd/journal/Replay.h librbd/journal/Types.h \
 	librbd/object_map/InvalidateRequest.h \
 	librbd/object_map/LockRequest.h librbd/object_map/Request.h \
 	librbd/object_map/RefreshRequest.h \
@@ -7153,7 +7451,8 @@ am__noinst_HEADERS_DIST = arch/intel.h arch/arm.h arch/probe.h \
 	rgw/rgw_rest_opstate.h rgw/rgw_rest_replica_log.h \
 	rgw/rgw_rest_config.h rgw/rgw_usage.h rgw/rgw_user.h \
 	rgw/rgw_bucket.h rgw/rgw_keystone.h rgw/rgw_civetweb.h \
-	rgw/rgw_civetweb_log.h civetweb/civetweb.h \
+	rgw/rgw_civetweb_log.h rgw/rgw_website.h \
+	rgw/rgw_rest_s3website.h civetweb/civetweb.h \
 	civetweb/include/civetweb.h civetweb/include/civetweb_conf.h \
 	civetweb/src/md5.h cls/lock/cls_lock_types.h \
 	cls/lock/cls_lock_ops.h cls/lock/cls_lock_client.h \
@@ -7193,6 +7492,7 @@ am__noinst_HEADERS_DIST = arch/intel.h arch/arm.h arch/probe.h \
 	test/messenger/simple_dispatcher.h \
 	test/messenger/xio_dispatcher.h \
 	test/compressor/compressor_example.h \
+	test/encoding/test_ceph_time.h \
 	test/librados_test_stub/LibradosTestStub.h \
 	test/librados_test_stub/MockTestMemIoCtxImpl.h \
 	test/librados_test_stub/MockTestMemRadosClient.h \
@@ -7240,15 +7540,18 @@ am__noinst_HEADERS_DIST = arch/intel.h arch/arm.h arch/probe.h \
 	test/unit.h test/journal/RadosTestFixture.h \
 	tools/rbd/ArgumentTypes.h tools/rbd/IndentStream.h \
 	tools/rbd/OptionPrinter.h tools/rbd/Shell.h tools/rbd/Utils.h \
-	tools/cephfs/JournalTool.h tools/cephfs/JournalScanner.h \
-	tools/cephfs/JournalFilter.h tools/cephfs/EventOutput.h \
-	tools/cephfs/Resetter.h tools/cephfs/Dumper.h \
-	tools/cephfs/TableTool.h tools/cephfs/MDSUtility.h \
-	tools/RadosDump.h tools/rados/RadosImport.h \
-	tools/ceph_objectstore_tool.h tools/rados/PoolDump.h \
-	tools/cephfs/DataScan.h cls_acl.cc cls_crypto.cc fetch_config \
-	logrotate.conf sample.ceph.conf bash_completion/ceph \
-	bash_completion/rados bash_completion/rbd \
+	tools/rbd_mirror/ClusterWatcher.h \
+	tools/rbd_mirror/ImageReplayer.h tools/rbd_mirror/Mirror.h \
+	tools/rbd_mirror/PoolWatcher.h tools/rbd_mirror/Replayer.h \
+	tools/rbd_mirror/types.h tools/cephfs/JournalTool.h \
+	tools/cephfs/JournalScanner.h tools/cephfs/JournalFilter.h \
+	tools/cephfs/EventOutput.h tools/cephfs/Resetter.h \
+	tools/cephfs/Dumper.h tools/cephfs/TableTool.h \
+	tools/cephfs/MDSUtility.h tools/RadosDump.h \
+	tools/rados/RadosImport.h tools/ceph_objectstore_tool.h \
+	tools/rados/PoolDump.h tools/cephfs/DataScan.h cls_acl.cc \
+	cls_crypto.cc fetch_config logrotate.conf sample.ceph.conf \
+	bash_completion/ceph bash_completion/rados bash_completion/rbd \
 	bash_completion/radosgw-admin mount/canonicalize.c \
 	mount/mtab.c objclass/objclass.h
 HEADERS = $(noinst_HEADERS)
@@ -7458,14 +7761,17 @@ TEST_LOG_COMPILE = $(TEST_LOG_COMPILER) $(AM_TEST_LOG_FLAGS) \
 	$(TEST_LOG_FLAGS)
 am__DIST_COMMON = $(srcdir)/Makefile-client.am \
 	$(srcdir)/Makefile-env.am $(srcdir)/Makefile-rocksdb.am \
-	$(srcdir)/Makefile-server.am $(srcdir)/Makefile.in \
-	$(srcdir)/acconfig.h.in $(srcdir)/arch/Makefile.am \
-	$(srcdir)/auth/Makefile.am $(srcdir)/brag/Makefile.am \
+	$(srcdir)/Makefile-server.am $(srcdir)/Makefile-spdk.am \
+	$(srcdir)/Makefile.in $(srcdir)/acconfig.h.in \
+	$(srcdir)/arch/Makefile.am $(srcdir)/auth/Makefile.am \
+	$(srcdir)/brag/Makefile.am \
 	$(srcdir)/ceph-detect-init/Makefile.am \
-	$(srcdir)/client/Makefile.am $(srcdir)/cls/Makefile-client.am \
+	$(srcdir)/ceph-disk/Makefile.am $(srcdir)/client/Makefile.am \
+	$(srcdir)/cls/Makefile-client.am \
 	$(srcdir)/cls/Makefile-server.am $(srcdir)/cls/Makefile.am \
 	$(srcdir)/common/Makefile.am $(srcdir)/compressor/Makefile.am \
 	$(srcdir)/compressor/snappy/Makefile.am \
+	$(srcdir)/compressor/zlib/Makefile.am \
 	$(srcdir)/crush/Makefile.am $(srcdir)/erasure-code/Makefile.am \
 	$(srcdir)/erasure-code/isa/Makefile.am \
 	$(srcdir)/erasure-code/jerasure/Makefile.am \
@@ -7520,12 +7826,28 @@ am__relativize = \
     dir1=`echo "$$dir1" | sed -e "$$sed_rest"`; \
   done; \
   reldir="$$dir2"
+
+# a workaround for http://debbugs.gnu.org/cgi/bugreport.cgi?bug=18744, this
+# bug was fixed in automake 1.15, but automake 1.13 is supported by us.  so
+# we can not just require 1.15 using `AM_INIT_AUTOMAKE`
+am__is_gnu_make = { \
+  if test -z '$(MAKELEVEL)'; then \
+    false; \
+  elif test -n '$(MAKE_HOST)'; then \
+    true; \
+  elif test -n '$(MAKE_VERSION)' && test -n '$(CURDIR)'; then \
+    true; \
+  else \
+    false; \
+  fi; \
+}
+
 ACLOCAL = @ACLOCAL@
 AMTAR = @AMTAR@
 AM_CXXFLAGS = @AM_CXXFLAGS@ $(AM_COMMON_CFLAGS) -ftemplate-depth-1024 \
 	-Wnon-virtual-dtor -Wno-invalid-offsetof $(am__append_7) \
-	$(am__append_10) $(am__append_13) $(am__append_92) \
-	$(am__append_95)
+	$(am__append_10) $(am__append_13) $(am__append_100) \
+	$(am__append_103)
 AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@
 AR = @AR@
 ARM_CRC_FLAGS = @ARM_CRC_FLAGS@
@@ -7594,12 +7916,14 @@ JDK_CPPFLAGS = @JDK_CPPFLAGS@
 KEYUTILS_LIB = @KEYUTILS_LIB@
 LD = @LD@
 LDFLAGS = @LDFLAGS@
-LIBEDIT_CFLAGS = @LIBEDIT_CFLAGS@
-LIBEDIT_LIBS = @LIBEDIT_LIBS@
+LIBDPDK_CFLAGS = @LIBDPDK_CFLAGS@
+LIBDPDK_LIBS = @LIBDPDK_LIBS@
 LIBFUSE_CFLAGS = @LIBFUSE_CFLAGS@
 LIBFUSE_LIBS = @LIBFUSE_LIBS@
 LIBJEMALLOC = @LIBJEMALLOC@
 LIBOBJS = @LIBOBJS@
+LIBPCIACCESS_CFLAGS = @LIBPCIACCESS_CFLAGS@
+LIBPCIACCESS_LIBS = @LIBPCIACCESS_LIBS@
 LIBROCKSDB_CFLAGS = @LIBROCKSDB_CFLAGS@
 LIBROCKSDB_LIBS = @LIBROCKSDB_LIBS@
 LIBS = @LIBS@
@@ -7684,7 +8008,6 @@ datarootdir = @datarootdir@
 docdir = @docdir@
 dvidir = @dvidir@
 exec_prefix = @exec_prefix@
-group_rgw = @group_rgw@
 host = @host@
 host_alias = @host_alias@
 host_cpu = @host_cpu@
@@ -7715,7 +8038,6 @@ sharedstatedir = @sharedstatedir@
 srcdir = @srcdir@
 subdirs = @subdirs@
 sysconfdir = @sysconfdir@
-systemd_libexec_dir = @systemd_libexec_dir@
 systemd_unit_dir = @systemd_unit_dir@
 target = @target@
 target_alias = @target_alias@
@@ -7725,14 +8047,13 @@ target_vendor = @target_vendor@
 top_build_prefix = @top_build_prefix@
 top_builddir = @top_builddir@
 top_srcdir = @top_srcdir@
-user_rgw = @user_rgw@
 AUTOMAKE_OPTIONS = gnu subdir-objects
 SUBDIRS = ocf java
 DIST_SUBDIRS = gmock ocf java
-BUILT_SOURCES = $(am__append_247) $(am__append_269)
+BUILT_SOURCES = $(am__append_275) $(am__append_297)
 
 # extra bits
-EXTRA_DIST = $(am__append_30) ceph-detect-init/AUTHORS.rst \
+EXTRA_DIST = $(am__append_31) ceph-detect-init/AUTHORS.rst \
 	ceph-detect-init/ceph_detect_init/centos/__init__.py \
 	ceph-detect-init/ceph_detect_init/exc.py \
 	ceph-detect-init/ceph_detect_init/main.py \
@@ -7759,9 +8080,12 @@ EXTRA_DIST = $(am__append_30) ceph-detect-init/AUTHORS.rst \
 	ceph-detect-init/run-tox.sh ceph-detect-init/setup.py \
 	ceph-detect-init/test-requirements.txt \
 	ceph-detect-init/tests/test_all.py ceph-detect-init/tox.ini \
-	$(srcdir)/test/python/ceph-disk/setup.py \
-	$(srcdir)/test/python/ceph-disk/tox.ini \
-	$(srcdir)/test/python/ceph-disk/tests/test_ceph_disk.py \
+	ceph-disk/AUTHORS.rst ceph-disk/ceph_disk/__init__.py \
+	ceph-disk/ceph_disk/main.py ceph-disk/Makefile.am \
+	ceph-disk/MANIFEST.in ceph-disk/README.rst \
+	ceph-disk/requirements.txt ceph-disk/run-tox.sh \
+	ceph-disk/setup.py ceph-disk/test-requirements.txt \
+	ceph-disk/tests/test_main.py ceph-disk/tox.ini \
 	$(srcdir)/test/python/brag-client/setup.py \
 	$(srcdir)/test/python/brag-client/tox.ini \
 	$(srcdir)/test/python/brag-client/tests/test_ceph_brag.py \
@@ -7787,7 +8111,8 @@ EXTRA_DIST = $(am__append_30) ceph-detect-init/AUTHORS.rst \
 	$(srcdir)/test/opensuse-13.2/install-deps.sh \
 	$(srcdir)/test/opensuse-13.2/ceph.spec.in \
 	$(srcdir)/test/coverage.sh $(patsubst \
-	%,$(srcdir)/%,$(check_SCRIPTS)) rocksdb/appveyor.yml \
+	%,$(srcdir)/%,$(check_SCRIPTS)) tools/setup-virtualenv.sh \
+	rocksdb/appveyor.yml \
 	rocksdb/arcanist_util/config/FacebookArcanistConfiguration.php \
 	rocksdb/arcanist_util/cpp_linter/ArcanistCpplintLinter.php \
 	rocksdb/arcanist_util/cpp_linter/BaseDirectoryScopedFormatLinter.php \
@@ -8395,13 +8720,35 @@ EXTRA_DIST = $(am__append_30) ceph-detect-init/AUTHORS.rst \
 	rocksdb/util/xfunc.cc rocksdb/util/xfunc.h \
 	rocksdb/util/xxhash.cc rocksdb/util/xxhash.h \
 	rocksdb/Vagrantfile rocksdb/WINDOWS_PORT.md rocksdb/AUTHORS \
-	tracing/tracing-common.h $(srcdir)/pybind/setup.py \
-	$(srcdir)/pybind/rbd.pyx $(srcdir)/$(shell_scripts:%=%.in) \
-	$(srcdir)/vstart.sh $(srcdir)/stop.sh ceph-run \
-	$(srcdir)/ceph-osd-prestart.sh $(srcdir)/ceph_common.sh \
-	$(srcdir)/init-radosgw $(srcdir)/init-rbdmap \
-	$(srcdir)/ceph-clsinfo $(srcdir)/make_version \
-	$(srcdir)/.git_version $(srcdir)/ceph-rbdnamer \
+	spdk/PORTING.md spdk/README.md spdk/LICENSE spdk/CONFIG \
+	spdk/autotest.sh spdk/autopackage.sh spdk/Makefile \
+	spdk/autobuild.sh spdk/lib/nvme/nvme_ns.c \
+	spdk/lib/nvme/nvme_qpair.c spdk/lib/nvme/nvme_impl.h \
+	spdk/lib/nvme/nvme.c spdk/lib/nvme/nvme_ns_cmd.c \
+	spdk/lib/nvme/nvme_ctrlr.c spdk/lib/nvme/Makefile \
+	spdk/lib/nvme/nvme_internal.h spdk/lib/nvme/nvme_ctrlr_cmd.c \
+	spdk/lib/memory/vtophys.c spdk/lib/memory/Makefile \
+	spdk/lib/util/file.c spdk/lib/util/string.c \
+	spdk/lib/util/Makefile spdk/lib/util/pci.c spdk/lib/Makefile \
+	spdk/lib/ioat/ioat.c spdk/lib/ioat/ioat_pci.h \
+	spdk/lib/ioat/ioat_impl.h spdk/lib/ioat/Makefile \
+	spdk/lib/ioat/ioat_internal.h spdk/mk/spdk.deps.mk \
+	spdk/mk/spdk.subdirs.mk spdk/mk/spdk.common.mk \
+	spdk/include/spdk/string.h spdk/include/spdk/ioat.h \
+	spdk/include/spdk/queue.h spdk/include/spdk/pci_ids.h \
+	spdk/include/spdk/nvme.h spdk/include/spdk/ioat_spec.h \
+	spdk/include/spdk/nvme_spec.h spdk/include/spdk/nvme_intel.h \
+	spdk/include/spdk/pci.h spdk/include/spdk/vtophys.h \
+	spdk/include/spdk/queue_extras.h spdk/include/spdk/file.h \
+	spdk/include/spdk/assert.h spdk/include/spdk/barrier.h \
+	spdk/include/spdk/mmio.h tracing/tracing-common.h \
+	$(srcdir)/pybind/setup.py $(srcdir)/pybind/rbd.pyx \
+	$(srcdir)/$(shell_scripts:%=%.in) $(srcdir)/vstart.sh \
+	$(srcdir)/stop.sh ceph-run $(srcdir)/ceph-osd-prestart.sh \
+	$(srcdir)/ceph_common.sh $(srcdir)/init-radosgw \
+	$(srcdir)/init-rbdmap $(srcdir)/ceph-clsinfo \
+	$(srcdir)/make_version $(srcdir)/.git_version \
+	$(srcdir)/ceph-rbdnamer \
 	$(srcdir)/tools/ceph-monstore-update-crush.sh \
 	$(srcdir)/upstart/ceph-all.conf \
 	$(srcdir)/upstart/ceph-disk.conf \
@@ -8418,7 +8765,7 @@ EXTRA_DIST = $(am__append_30) ceph-detect-init/AUTHORS.rst \
 	$(srcdir)/upstart/radosgw.conf \
 	$(srcdir)/upstart/radosgw-all.conf \
 	$(srcdir)/upstart/radosgw-all-starter.conf \
-	$(srcdir)/upstart/rbdmap.conf ceph.in ceph-disk ceph-disk-udev \
+	$(srcdir)/upstart/rbdmap.conf ceph.in ceph-disk-udev \
 	ceph-create-keys ceph-rest-api ceph-crush-location \
 	mount.fuse.ceph rbd-replay-many rbdmap etc-rbdmap yasm-wrapper \
 	unittest_bufferlist.sh
@@ -8432,6 +8779,8 @@ dist_noinst_DATA = tracing/librados.tp tracing/librbd.tp \
 
 # lrc plugin
 
+# zlib plugin
+
 # snappy plugin
 
 # everything else we want to include in a 'make dist'
@@ -8458,10 +8807,10 @@ noinst_HEADERS = arch/intel.h arch/arm.h arch/probe.h \
 	crush/CrushWrapper.h crush/CrushWrapper.i crush/builder.h \
 	crush/crush.h crush/crush_compat.h crush/crush_ln_table.h \
 	crush/grammar.h crush/hash.h crush/mapper.h crush/sample.txt \
-	crush/types.h $(am__append_32) $(am__append_36) \
-	$(am__append_39) $(am__append_43) $(am__append_45) \
-	$(am__append_49) $(am__append_57) $(am__append_58) \
-	$(am__append_60) $(am__append_63) \
+	crush/types.h $(am__append_33) $(am__append_37) \
+	$(am__append_40) $(am__append_44) $(am__append_46) \
+	$(am__append_50) $(am__append_59) $(am__append_60) \
+	$(am__append_62) $(am__append_65) $(am__append_68) \
 	erasure-code/jerasure/gf-complete/include/gf_complete.h \
 	erasure-code/jerasure/gf-complete/include/gf_general.h \
 	erasure-code/jerasure/gf-complete/include/gf_int.h \
@@ -8491,14 +8840,15 @@ noinst_HEADERS = arch/intel.h arch/arm.h arch/probe.h \
 	erasure-code/jerasure/gf-complete/include/gf_rand.h \
 	erasure-code/jerasure/gf-complete/include/gf_method.h \
 	erasure-code/jerasure/gf-complete/include/gf_general.h \
-	$(am__append_81) erasure-code/ErasureCode.h \
+	$(am__append_86) erasure-code/ErasureCode.h \
 	erasure-code/ErasureCodeInterface.h \
 	erasure-code/ErasureCodePlugin.h \
+	compressor/zlib/CompressionZlib.h \
 	compressor/snappy/SnappyCompressor.h compressor/Compressor.h \
 	compressor/AsyncCompressor.h compressor/CompressionPlugin.h \
 	osdc/Filer.h osdc/Journaler.h osdc/ObjectCacher.h \
 	osdc/Objecter.h osdc/Striper.h osdc/WritebackHandler.h \
-	$(am__append_86) $(am__append_88) global/pidfile.h \
+	$(am__append_93) $(am__append_95) global/pidfile.h \
 	global/global_init.h global/global_context.h \
 	global/signal_handler.h json_spirit/json_spirit.h \
 	json_spirit/json_spirit_error_position.h \
@@ -8516,9 +8866,10 @@ noinst_HEADERS = arch/intel.h arch/arm.h arch/probe.h \
 	common/crc32c_intel_baseline.h common/crc32c_intel_fast.h \
 	common/crc32c_aarch64.h common/BackTrace.h \
 	common/RefCountedObj.h common/HeartbeatMap.h \
-	common/LogClient.h common/LogEntry.h common/Preforker.h \
-	common/SloppyCRCMap.h common/WorkQueue.h \
-	common/PrioritizedQueue.h common/ceph_argparse.h \
+	common/LogClient.h common/LogEntry.h common/Graylog.h \
+	common/Preforker.h common/SloppyCRCMap.h common/WorkQueue.h \
+	common/OpQueue.h common/PrioritizedQueue.h \
+	common/WeightedPriorityQueue.h common/ceph_argparse.h \
 	common/ceph_context.h common/xattr.h common/blkdev.h \
 	common/compiler_extensions.h common/debug.h common/dout.h \
 	common/escape.h common/fd.h common/version.h common/hex.h \
@@ -8526,16 +8877,17 @@ noinst_HEADERS = arch/intel.h arch/arm.h arch/probe.h \
 	common/environment.h common/likely.h common/lockdep.h \
 	common/obj_bencher.h common/snap_types.h common/Clock.h \
 	common/Cond.h common/ConfUtils.h common/DecayCounter.h \
-	common/Finisher.h common/Formatter.h common/perf_counters.h \
-	common/OutputDataSocket.h common/admin_socket.h \
-	common/admin_socket_client.h common/random_cache.hpp \
-	common/shared_cache.hpp common/tracked_int_ptr.hpp \
-	common/simple_cache.hpp common/sharedptr_registry.hpp \
-	common/map_cacher.hpp common/MemoryModel.h common/Mutex.h \
-	common/QueueRing.h common/PrebufferedStreambuf.h \
-	common/RWLock.h common/Semaphore.h common/SimpleRNG.h \
-	common/TextTable.h common/Thread.h common/Throttle.h \
-	common/Timer.h common/TrackedOp.h common/arch.h common/armor.h \
+	common/Finisher.h common/Formatter.h common/HTMLFormatter.h \
+	common/perf_counters.h common/OutputDataSocket.h \
+	common/admin_socket.h common/admin_socket_client.h \
+	common/random_cache.hpp common/shared_cache.hpp \
+	common/tracked_int_ptr.hpp common/simple_cache.hpp \
+	common/sharedptr_registry.hpp common/map_cacher.hpp \
+	common/MemoryModel.h common/Mutex.h common/QueueRing.h \
+	common/PrebufferedStreambuf.h common/RWLock.h \
+	common/Semaphore.h common/SimpleRNG.h common/TextTable.h \
+	common/Thread.h common/Throttle.h common/Timer.h \
+	common/TrackedOp.h common/arch.h common/armor.h \
 	common/common_init.h common/io_priority.h common/pipe.h \
 	common/code_environment.h common/signal.h common/simple_spin.h \
 	common/run_cmd.h common/safe_io.h common/config.h \
@@ -8550,7 +8902,8 @@ noinst_HEADERS = arch/intel.h arch/arm.h arch/probe.h \
 	common/SubProcess.h common/valgrind.h \
 	common/TracepointProvider.h common/event_socket.h \
 	common/PluginRegistry.h common/ceph_time.h common/ceph_timer.h \
-	$(am__append_109) common/secret.h msg/Connection.h \
+	common/align.h common/mutex_debug.h common/shunique_lock.h \
+	$(am__append_117) common/secret.h msg/Connection.h \
 	msg/Dispatcher.h msg/Message.h msg/Messenger.h \
 	msg/SimplePolicyMessenger.h msg/msg_types.h \
 	msg/simple/Accepter.h msg/simple/DispatchQueue.h \
@@ -8558,7 +8911,7 @@ noinst_HEADERS = arch/intel.h arch/arm.h arch/probe.h \
 	msg/simple/SimpleMessenger.h msg/async/AsyncConnection.h \
 	msg/async/AsyncMessenger.h msg/async/Event.h \
 	msg/async/EventEpoll.h msg/async/EventSelect.h \
-	msg/async/net_handler.h $(am__append_118) messages/MAuth.h \
+	msg/async/net_handler.h $(am__append_126) messages/MAuth.h \
 	messages/MAuthReply.h messages/MCacheExpire.h \
 	messages/MClientCaps.h messages/MClientCapRelease.h \
 	messages/MClientLease.h messages/MClientReconnect.h \
@@ -8619,11 +8972,14 @@ noinst_HEADERS = arch/intel.h arch/arm.h arch/probe.h \
 	include/Context.h include/CompatSet.h include/Distribution.h \
 	include/Spinlock.h include/addr_parsing.h include/assert.h \
 	include/atomic.h include/bitmapper.h include/blobhash.h \
-	include/buffer.h include/buffer_fwd.h include/byteorder.h \
+	include/btree_interval_set.h include/buffer.h \
+	include/buffer_fwd.h include/byteorder.h \
 	include/cephfs/libcephfs.h include/ceph_features.h \
 	include/ceph_frag.h include/ceph_fs.h include/ceph_hash.h \
 	include/cmp.h include/color.h include/compat.h \
-	include/sock_compat.h include/crc32c.h include/encoding.h \
+	include/cpp-btree/btree.h include/cpp-btree/btree_container.h \
+	include/cpp-btree/btree_map.h include/sock_compat.h \
+	include/crc32c.h include/encoding.h include/encoding_btree.h \
 	include/err.h include/error.h include/filepath.h \
 	include/frag.h include/hash.h include/inline_memory.h \
 	include/intarith.h include/interval_set.h include/int_types.h \
@@ -8646,16 +9002,16 @@ noinst_HEADERS = arch/intel.h arch/arm.h arch/probe.h \
 	include/util.h include/stat.h include/on_exit.h \
 	include/memory.h include/rados/memory.h \
 	include/unordered_set.h include/unordered_map.h \
-	include/timegm.h include/event_type.h $(am__append_124) \
-	$(am__append_127) $(am__append_128) $(am__append_133) \
-	$(am__append_139) $(am__append_143) $(am__append_146) \
-	$(am__append_147) $(am__append_153) $(am__append_175) \
-	$(am__append_181) $(am__append_198) $(am__append_204) \
-	$(am__append_216) test/bench/backend.h test/bench/bencher.h \
-	test/bench/detailed_stat_collector.h test/bench/distribution.h \
-	test/bench/dumb_backend.h test/bench/rados_backend.h \
-	test/bench/rbd_backend.h test/bench/stat_collector.h \
-	test/bench/testfilestore_backend.h \
+	include/timegm.h include/event_type.h $(am__append_132) \
+	$(am__append_135) $(am__append_136) $(am__append_141) \
+	$(am__append_147) $(am__append_151) $(am__append_154) \
+	$(am__append_155) $(am__append_161) $(am__append_197) \
+	$(am__append_203) $(am__append_215) $(am__append_223) \
+	$(am__append_229) $(am__append_241) test/bench/backend.h \
+	test/bench/bencher.h test/bench/detailed_stat_collector.h \
+	test/bench/distribution.h test/bench/dumb_backend.h \
+	test/bench/rados_backend.h test/bench/rbd_backend.h \
+	test/bench/stat_collector.h test/bench/testfilestore_backend.h \
 	test/common/ObjectContents.h test/encoding/types.h \
 	test/objectstore/DeterministicOpSequence.h \
 	test/objectstore/FileStoreDiff.h \
@@ -8674,7 +9030,7 @@ noinst_HEADERS = arch/intel.h arch/arm.h arch/probe.h \
 	test/system/st_rados_list_objects.h \
 	test/system/st_rados_notify.h test/system/st_rados_watch.h \
 	test/system/systest_runnable.h test/system/systest_settings.h \
-	test/unit.h test/journal/RadosTestFixture.h $(am__append_239) \
+	test/unit.h test/journal/RadosTestFixture.h $(am__append_265) \
 	tools/cephfs/JournalTool.h tools/cephfs/JournalScanner.h \
 	tools/cephfs/JournalFilter.h tools/cephfs/EventOutput.h \
 	tools/cephfs/Resetter.h tools/cephfs/Dumper.h \
@@ -8686,42 +9042,43 @@ noinst_HEADERS = arch/intel.h arch/arm.h arch/probe.h \
 	bash_completion/rados bash_completion/rbd \
 	bash_completion/radosgw-admin mount/canonicalize.c \
 	mount/mtab.c objclass/objclass.h
-bin_SCRIPTS = $(am__append_29) $(am__append_257) $(am__append_266) \
-	$(am__append_274)
+bin_SCRIPTS = $(am__append_30) $(am__append_285) $(am__append_294) \
+	$(am__append_302)
 sbin_SCRIPTS = 
-su_sbin_SCRIPTS = $(am__append_271)
+su_sbin_SCRIPTS = $(am__append_299)
 dist_bin_SCRIPTS = 
-lib_LTLIBRARIES = $(am__append_123) $(am__append_126) \
-	$(am__append_132) $(am__append_246) $(am__append_264) \
-	$(am__append_265)
+lib_LTLIBRARIES = $(am__append_131) $(am__append_134) \
+	$(am__append_140) $(am__append_274) $(am__append_292) \
+	$(am__append_293)
 noinst_LTLIBRARIES = libarch.la libauth.la libcrush.la libmon_types.la \
-	$(am__append_48) libosd_types.la liberasure_code.la \
-	libcompressor.la libosdc.la $(am__append_85) $(am__append_87) \
-	libglobal.la libjson_spirit.la liblog.la libperfglue.la \
-	libcommon_internal.la libcommon_crc.la $(am__append_107) \
-	libcommon.la $(am__append_110) libmsg.la $(am__append_119) \
-	librbd_types.la $(am__append_130) $(am__append_135) \
-	$(am__append_140) $(am__append_148) $(am__append_191) \
-	$(am__append_201) $(am__append_206) $(am__append_233) \
-	$(am__append_258)
-noinst_LIBRARIES = $(am__append_31) $(am__append_44) libos_types.a \
-	$(am__append_55) $(am__append_59) $(am__append_62) \
-	$(am__append_142)
-radoslib_LTLIBRARIES = $(am__append_144) $(am__append_145)
+	$(am__append_49) libosd_types.la $(am__append_87) \
+	liberasure_code.la libcompressor.la libosdc.la \
+	$(am__append_92) $(am__append_94) libglobal.la \
+	libjson_spirit.la liblog.la libperfglue.la \
+	libcommon_internal.la libcommon_crc.la $(am__append_115) \
+	libcommon.la $(am__append_118) libmsg.la $(am__append_127) \
+	librbd_types.la $(am__append_138) $(am__append_143) \
+	$(am__append_148) $(am__append_156) $(am__append_216) \
+	$(am__append_226) $(am__append_231) $(am__append_259) \
+	$(am__append_267) $(am__append_286)
+noinst_LIBRARIES = $(am__append_32) $(am__append_45) libos_types.a \
+	$(am__append_57) $(am__append_61) $(am__append_67) \
+	$(am__append_150)
+radoslib_LTLIBRARIES = $(am__append_152) $(am__append_153)
 
 # like bin_PROGRAMS, but these targets are only built for debug builds
-bin_DEBUGPROGRAMS = $(am__append_89) $(am__append_138) \
-	$(am__append_155) $(am__append_192) $(am__append_193) \
-	$(am__append_194) $(am__append_195) $(am__append_197) \
-	$(am__append_199) $(am__append_205) $(am__append_207) \
-	$(am__append_208) $(am__append_211) $(am__append_213) \
-	$(am__append_214) $(am__append_215) $(am__append_217) \
-	$(am__append_219) $(am__append_220) $(am__append_221) \
-	$(am__append_227) ceph_test_timers ceph_test_signal_handlers \
-	ceph_test_rewrite_latency ceph_test_crypto $(am__append_232) \
+bin_DEBUGPROGRAMS = $(am__append_96) $(am__append_146) \
+	$(am__append_163) $(am__append_217) $(am__append_218) \
+	$(am__append_219) $(am__append_220) $(am__append_222) \
+	$(am__append_224) $(am__append_230) $(am__append_232) \
+	$(am__append_233) $(am__append_236) $(am__append_238) \
+	$(am__append_239) $(am__append_240) $(am__append_242) \
+	$(am__append_244) $(am__append_246) $(am__append_247) \
+	$(am__append_253) ceph_test_timers ceph_test_signal_handlers \
+	ceph_test_rewrite_latency ceph_test_crypto $(am__append_258) \
 	ceph_bench_log ceph_test_objectcacher_stress \
-	ceph_test_cfuse_cache_invalidate $(am__append_236) \
-	$(am__append_237) $(am__append_241) $(am__append_242) \
+	ceph_test_cfuse_cache_invalidate $(am__append_262) \
+	$(am__append_263) $(am__append_269) $(am__append_270) \
 	ceph_psim
 
 # like sbin_SCRIPTS but can be used to install to e.g. /usr/sbin
@@ -8731,13 +9088,15 @@ ceph_sbindir = $(sbindir)
 su_sbindir = /sbin
 
 # C/C++ tests to build and executed will be appended to this
-check_TESTPROGRAMS = $(am__append_161) $(am__append_165) \
-	$(am__append_168) $(am__append_196) $(am__append_200) \
-	$(am__append_209) $(am__append_218) $(am__append_223) \
-	$(am__append_224) $(am__append_228) $(am__append_229) \
-	$(am__append_230) $(am__append_231) unittest_addrs \
-	$(am__append_235) unittest_bloom_filter unittest_histogram \
-	unittest_prioritized_queue unittest_str_map \
+check_TESTPROGRAMS = $(am__append_179) $(am__append_183) \
+	$(am__append_186) $(am__append_221) $(am__append_225) \
+	$(am__append_234) $(am__append_243) $(am__append_245) \
+	$(am__append_249) $(am__append_250) $(am__append_254) \
+	$(am__append_255) $(am__append_256) $(am__append_257) \
+	unittest_addrs $(am__append_261) unittest_bloom_filter \
+	unittest_histogram unittest_prioritized_queue \
+	unittest_weighted_priority_queue unittest_str_map \
+	unittest_mutex_debug unittest_shunique_lock \
 	unittest_sharedptr_registry unittest_shared_cache \
 	unittest_sloppy_crc_map unittest_time unittest_util \
 	unittest_crush_wrapper unittest_crush unittest_osdmap \
@@ -8755,7 +9114,7 @@ check_TESTPROGRAMS = $(am__append_161) $(am__append_165) \
 	unittest_safe_io unittest_heartbeatmap unittest_formatter \
 	unittest_daemon_config unittest_ipaddr unittest_texttable \
 	unittest_on_exit unittest_readahead unittest_tableformatter \
-	unittest_bit_vector
+	unittest_bit_vector unittest_interval_set
 
 # tests scripts will be appended to this
 
@@ -8779,13 +9138,31 @@ check_TESTPROGRAMS = $(am__append_161) $(am__append_165) \
 # You should have received a copy of the GNU Affero General Public License
 # along with this program.  If not, see `<http://www.gnu.org/licenses/>`.
 #
-check_SCRIPTS = ceph-detect-init/run-tox.sh $(am__append_152) \
-	$(am__append_203) test/ceph_objectstore_tool.py \
-	test/test-ceph-helpers.sh test/cephtool-test-osd.sh \
-	test/cephtool-test-mon.sh test/cephtool-test-mds.sh \
-	test/cephtool-test-rados.sh unittest_bufferlist.sh \
-	test/encoding/check-generated.sh test/mon/osd-pool-create.sh \
-	test/mon/misc.sh test/mon/osd-crush.sh test/mon/mon-ping.sh \
+
+#
+# Copyright (C) 2015 <contact at redhat.com>
+#
+# Author: Loic Dachary <loic at dachary.org>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Library Public License as published by
+# the Free Software Foundation; either version 2, or (at your option)
+# any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Library Public License for more details.
+#
+check_SCRIPTS = ceph-detect-init/run-tox.sh ceph-disk/run-tox.sh \
+	$(am__append_160) $(am__append_228) \
+	test/ceph_objectstore_tool.py test/test-ceph-helpers.sh \
+	test/cephtool-test-osd.sh test/cephtool-test-mon.sh \
+	test/cephtool-test-mds.sh test/cephtool-test-rados.sh \
+	unittest_bufferlist.sh test/encoding/check-generated.sh \
+	test/mon/osd-pool-create.sh test/mon/misc.sh \
+	test/mon/osd-crush.sh test/mon/mon-ping.sh \
+	test/mon/mon-created-time.sh \
 	test/mon/osd-erasure-code-profile.sh test/mon/mkfs.sh \
 	test/mon/mon-scrub.sh test/osd/osd-scrub-repair.sh \
 	test/osd/osd-scrub-snaps.sh test/osd/osd-config.sh \
@@ -8793,7 +9170,7 @@ check_SCRIPTS = ceph-detect-init/run-tox.sh $(am__append_152) \
 	test/osd/osd-reactivate.sh test/osd/osd-copy-from.sh \
 	test/osd/osd-markdown.sh test/mon/mon-handle-forward.sh \
 	test/libradosstriper/rados-striper.sh \
-	test/test_objectstore_memstore.sh test/ceph-disk.sh \
+	test/test_objectstore_memstore.sh test/test_pidfile.sh \
 	test/pybind/test_ceph_argparse.py \
 	test/pybind/test_ceph_daemon.py \
 	../qa/workunits/erasure-code/encode-decode-non-regression.sh \
@@ -8833,7 +9210,7 @@ AM_COMMON_CFLAGS = \
 	-fsigned-char
 
 AM_CFLAGS = $(AM_COMMON_CFLAGS) $(am__append_6) $(am__append_12) \
-	$(am__append_91) $(am__append_94)
+	$(am__append_99) $(am__append_102)
 AM_CPPFLAGS = $(AM_COMMON_CPPFLAGS)
 
 # note: this is position dependant, it affects the -l options that
@@ -8849,41 +9226,41 @@ AM_CCASFLAGS = -f elf64
 
 #####################
 EXTRALIBS = -lm $(am__append_14) $(am__append_15) $(am__append_16) \
-	$(am__append_25)
+	$(am__append_26)
 LIBGLOBAL = libglobal.la
-LIBCOMMON = libcommon.la
+LIBCOMMON = libcommon.la -luuid
 LIBSECRET = libsecret.la
 LIBARCH = libarch.la
-LIBPERFGLUE = libperfglue.la $(am__append_19) $(am__append_20)
+LIBPERFGLUE = libperfglue.la $(am__append_20) $(am__append_21)
 LIBAUTH = libauth.la
 LIBMSG = libmsg.la
 LIBCRUSH = libcrush.la
 LIBCOMPRESSOR = libcompressor.la
 LIBJSON_SPIRIT = libjson_spirit.la
-LIBKV = libkv.a $(am__append_26) $(am__append_27) $(am__append_28) -lz \
+LIBKV = libkv.a $(am__append_27) $(am__append_28) $(am__append_29) -lz \
 	-lleveldb -lsnappy
 LIBLOG = liblog.la
-LIBOS = libos.a $(am__append_17) $(am__append_18) $(LIBOS_TYPES) \
-	$(LIBKV)
+LIBOS = libos.a $(am__append_17) $(am__append_18) $(am__append_19) \
+	$(LIBOS_TYPES) $(LIBKV) $(LIBFUSE_LIBS)
 LIBOS_TYPES = libos_types.a
 
 # Libosd always needs osdc and os
 
 # OSD needs types
-LIBOSD = libosd.a $(am__append_22) $(LIBOSDC) $(LIBOS) $(LIBPERFGLUE) \
+LIBOSD = libosd.a $(am__append_23) $(LIBOSDC) $(LIBOS) $(LIBPERFGLUE) \
 	$(LIBOSD_TYPES) $(LIBOS_TYPES)
 LIBOSD_TYPES = libosd_types.la
 LIBOSDC = libosdc.la
 
 # These have references to syms like ceph_using_tcmalloc(), glue libperfglue to them
-LIBMON = libmon.a $(am__append_21) $(LIBPERFGLUE) $(LIBMON_TYPES)
+LIBMON = libmon.a $(am__append_22) $(LIBPERFGLUE) $(LIBMON_TYPES)
 LIBMON_TYPES = libmon_types.la
-LIBMDS = libmds.la $(am__append_23) $(LIBPERFGLUE)
+LIBMDS = libmds.la $(am__append_24) $(LIBPERFGLUE)
 LIBCLIENT = libclient.la
 LIBCLIENT_FUSE = libclient_fuse.la
 LIBRADOS = librados.la
 LIBRADOSSTRIPER = libradosstriper.la
-LIBRGW = librgw.la $(am__append_24)
+LIBRGW = librgw.la $(am__append_25)
 LIBCIVETWEB = libcivetweb.la
 LIBRBD = librbd.la
 LIBRBD_TYPES = librbd_types.la
@@ -8898,26 +9275,26 @@ CEPH_GLOBAL = $(LIBGLOBAL) $(LIBCOMMON) $(PTHREAD_LIBS) -lm $(CRYPTO_LIBS) $(EXT
 
 # important; libmsg before libauth!
 LIBCOMMON_DEPS = libcommon_internal.la libcommon_crc.la \
-	$(am__append_106) $(LIBERASURE_CODE) $(LIBCOMPRESSOR) \
+	$(am__append_114) $(LIBERASURE_CODE) $(LIBCOMPRESSOR) \
 	$(LIBMSG) $(LIBAUTH) $(LIBCRUSH) $(LIBJSON_SPIRIT) $(LIBLOG) \
-	$(LIBARCH) $(BOOST_RANDOM_LIBS) $(am__append_108)
-LIBRADOS_DEPS = $(am__append_120)
-LIBRGW_DEPS = $(am__append_136)
+	$(LIBARCH) $(BOOST_RANDOM_LIBS) -luuid $(am__append_116)
+LIBRADOS_DEPS = $(am__append_128)
+LIBRGW_DEPS = $(am__append_144)
 
 # This is used by the dencoder test
 
 # Do not use TCMALLOC with dencoder
-DENCODER_SOURCES = $(am__append_46) perfglue/disabled_heap_profiler.cc \
-	perfglue/disabled_stubs.cc $(am__append_134)
-DENCODER_DEPS = $(am__append_47) $(am__append_129) $(am__append_141) \
-	$(am__append_149)
+DENCODER_SOURCES = $(am__append_47) perfglue/disabled_heap_profiler.cc \
+	perfglue/disabled_stubs.cc $(am__append_142)
+DENCODER_DEPS = $(am__append_48) $(am__append_137) $(am__append_149) \
+	$(am__append_157)
 radoslibdir = $(libdir)/rados-classes
-LOCAL_ALL = ceph-detect-init-all $(am__append_248)
-LOCAL_CLEAN = ceph-detect-init-clean $(am__append_249) \
+LOCAL_ALL = ceph-detect-init-all ceph-disk-all $(am__append_276)
+LOCAL_CLEAN = ceph-detect-init-clean ceph-disk-clean $(am__append_277) \
 	base-clean-local
 LOCAL_INSTALLDATA = ceph-detect-init-install-data \
-	base-install-data-local
-LOCAL_INSTALLEXEC = $(am__append_250)
+	ceph-disk-install-data base-install-data-local
+LOCAL_INSTALLEXEC = $(am__append_278)
 libarch_la_SOURCES = \
 	arch/intel.c \
 	arch/arm.c \
@@ -8951,12 +9328,12 @@ libcrush_la_SOURCES = \
 	crush/CrushTester.cc
 
 @ENABLE_SERVER_TRUE at libkv_a_SOURCES = kv/KeyValueDB.cc \
- at ENABLE_SERVER_TRUE@	kv/LevelDBStore.cc $(am__append_34) \
- at ENABLE_SERVER_TRUE@	$(am__append_37) $(am__append_40)
- at ENABLE_SERVER_TRUE@libkv_a_CXXFLAGS = ${AM_CXXFLAGS} $(am__append_33) \
- at ENABLE_SERVER_TRUE@	$(am__append_41)
- at ENABLE_SERVER_TRUE@libkv_a_LIBADD = $(am__append_35) $(am__append_38) \
+ at ENABLE_SERVER_TRUE@	kv/LevelDBStore.cc $(am__append_35) \
+ at ENABLE_SERVER_TRUE@	$(am__append_38) $(am__append_41)
+ at ENABLE_SERVER_TRUE@libkv_a_CXXFLAGS = ${AM_CXXFLAGS} $(am__append_34) \
 @ENABLE_SERVER_TRUE@	$(am__append_42)
+ at ENABLE_SERVER_TRUE@libkv_a_LIBADD = $(am__append_36) $(am__append_39) \
+ at ENABLE_SERVER_TRUE@	$(am__append_43)
 libmon_types_la_SOURCES = \
 	mon/PGMap.cc
 
@@ -9014,7 +9391,7 @@ LIBMDS_DEPS = $(LIBOSDC)
 @ENABLE_SERVER_TRUE@@WITH_MDS_TRUE at libmds_la_SOURCES = $(LIBMDS_SOURCES)
 @ENABLE_SERVER_TRUE@@WITH_MDS_TRUE at libmds_la_LIBADD = $(LIBMDS_DEPS)
 libos_types_a_SOURCES = os/kstore/kstore_types.cc os/Transaction.cc \
-	$(am__append_50)
+	$(am__append_51)
 libos_types_a_CXXFLAGS = ${AM_CXXFLAGS}
 @ENABLE_SERVER_TRUE at libos_a_SOURCES = os/filestore/chain_xattr.cc \
 @ENABLE_SERVER_TRUE@	os/filestore/DBObjectMap.cc \
@@ -9026,17 +9403,25 @@ libos_types_a_CXXFLAGS = ${AM_CXXFLAGS}
 @ENABLE_SERVER_TRUE@	os/filestore/JournalingObjectStore.cc \
 @ENABLE_SERVER_TRUE@	os/filestore/LFNIndex.cc \
 @ENABLE_SERVER_TRUE@	os/filestore/WBThrottle.cc os/fs/FS.cc \
- at ENABLE_SERVER_TRUE@	os/keyvaluestore/GenericObjectMap.cc \
- at ENABLE_SERVER_TRUE@	os/keyvaluestore/KeyValueStore.cc \
 @ENABLE_SERVER_TRUE@	os/kstore/kv.cc os/kstore/KStore.cc \
 @ENABLE_SERVER_TRUE@	os/memstore/MemStore.cc os/ObjectStore.cc \
- at ENABLE_SERVER_TRUE@	$(am__append_51) $(am__append_52) \
- at ENABLE_SERVER_TRUE@	$(am__append_53) $(am__append_54)
- at ENABLE_SERVER_TRUE@libos_a_CXXFLAGS = ${AM_CXXFLAGS} -I rocksdb/include -fPIC
+ at ENABLE_SERVER_TRUE@	$(am__append_52) $(am__append_53) \
+ at ENABLE_SERVER_TRUE@	$(am__append_54) $(am__append_55) \
+ at ENABLE_SERVER_TRUE@	$(am__append_56) $(am__append_64)
+ at ENABLE_SERVER_TRUE@libos_a_CXXFLAGS = ${AM_CXXFLAGS} -I \
+ at ENABLE_SERVER_TRUE@	rocksdb/include -fPIC $(am__append_63)
 @ENABLE_SERVER_TRUE at libos_a_LIBADD = libos_types.a libkv.a \
- at ENABLE_SERVER_TRUE@	$(am__append_56)
+ at ENABLE_SERVER_TRUE@	$(am__append_58)
 @ENABLE_SERVER_TRUE@@WITH_LIBZFS_TRUE at libos_zfs_a_SOURCES = os/fs/ZFS.cc
 @ENABLE_SERVER_TRUE@@WITH_LIBZFS_TRUE at libos_zfs_a_CXXFLAGS = ${AM_CXXFLAGS} ${LIBZFS_CFLAGS}
+ at ENABLE_SERVER_TRUE@@WITH_SPDK_TRUE at SPDK_SRCDIR = ${top_srcdir}/src/spdk/lib
+ at ENABLE_SERVER_TRUE@@WITH_SPDK_TRUE at LIBSPDK_CFLAGS = "-I${top_srcdir}/src/spdk/include"
+ at ENABLE_SERVER_TRUE@@WITH_SPDK_TRUE at LIBSPDK_LIBS = \
+ at ENABLE_SERVER_TRUE@@WITH_SPDK_TRUE@	${top_srcdir}/src/spdk/lib/nvme/libspdk_nvme.a \
+ at ENABLE_SERVER_TRUE@@WITH_SPDK_TRUE@	${top_srcdir}/src/spdk/lib/memory/libspdk_memory.a \
+ at ENABLE_SERVER_TRUE@@WITH_SPDK_TRUE@	${top_srcdir}/src/spdk/lib/util/libspdk_util.a \
+ at ENABLE_SERVER_TRUE@@WITH_SPDK_TRUE@	${top_srcdir}/src/spdk/lib/ioat/libspdk_ioat.a
+
 @ENABLE_SERVER_TRUE@@WITH_LIBAIO_TRUE at ceph_bluefs_tool_SOURCES = os/bluestore/bluefs_tool.cc
 @ENABLE_SERVER_TRUE@@WITH_LIBAIO_TRUE at ceph_bluefs_tool_LDADD = $(LIBOS) $(CEPH_GLOBAL)
 libosd_types_la_SOURCES = \
@@ -9066,10 +9451,10 @@ libosd_types_la_CXXFLAGS = ${AM_CXXFLAGS}
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at libosd_a_LIBADD = 
 erasure_codelibdir = $(pkglibdir)/erasure-code
 erasure_codelib_LTLIBRARIES = libec_jerasure_generic.la \
-	$(am__append_66) $(am__append_68) $(am__append_70) \
+	$(am__append_71) $(am__append_73) $(am__append_75) \
 	libec_jerasure.la libec_lrc.la libec_shec_generic.la \
-	$(am__append_75) $(am__append_77) $(am__append_79) \
-	libec_shec.la $(am__append_83) $(am__append_159)
+	$(am__append_80) $(am__append_82) $(am__append_84) \
+	libec_shec.la $(am__append_89) $(am__append_168)
 jerasure_sources = \
   erasure-code/ErasureCode.cc \
   erasure-code/jerasure/jerasure/src/cauchy.c \
@@ -9102,7 +9487,7 @@ libec_jerasure_generic_la_CXXFLAGS = ${AM_CXXFLAGS} \
 
 libec_jerasure_generic_la_LIBADD = $(LIBCRUSH) $(PTHREAD_LIBS) $(EXTRALIBS)
 libec_jerasure_generic_la_LDFLAGS = ${AM_LDFLAGS} -module \
-	-avoid-version -shared $(am__append_64)
+	-avoid-version -shared $(am__append_69)
 libec_jerasure_neon_la_SOURCES = ${jerasure_sources}                                       \
                                   erasure-code/jerasure/gf-complete/src/neon/gf_w4_neon.c  \
                                   erasure-code/jerasure/gf-complete/src/neon/gf_w8_neon.c  \
@@ -9122,7 +9507,7 @@ libec_jerasure_neon_la_CXXFLAGS = ${AM_CXXFLAGS} \
 
 libec_jerasure_neon_la_LIBADD = $(LIBCRUSH) $(PTHREAD_LIBS) $(EXTRALIBS)
 libec_jerasure_neon_la_LDFLAGS = ${AM_LDFLAGS} -module -avoid-version \
-	-shared $(am__append_65)
+	-shared $(am__append_70)
 libec_jerasure_sse3_la_SOURCES = ${jerasure_sources}
 libec_jerasure_sse3_la_CFLAGS = ${AM_CFLAGS}  \
 	${INTEL_SSE_FLAGS} \
@@ -9142,7 +9527,7 @@ libec_jerasure_sse3_la_CXXFLAGS = ${AM_CXXFLAGS} \
 
 libec_jerasure_sse3_la_LIBADD = $(LIBCRUSH) $(PTHREAD_LIBS) $(EXTRALIBS)
 libec_jerasure_sse3_la_LDFLAGS = ${AM_LDFLAGS} -module -avoid-version \
-	-shared $(am__append_67)
+	-shared $(am__append_72)
 libec_jerasure_sse4_la_SOURCES = ${jerasure_sources}
 libec_jerasure_sse4_la_CFLAGS = ${AM_CFLAGS}  \
 	${INTEL_SSE_FLAGS} \
@@ -9166,7 +9551,7 @@ libec_jerasure_sse4_la_CXXFLAGS = ${AM_CXXFLAGS} \
 
 libec_jerasure_sse4_la_LIBADD = $(LIBCRUSH) $(PTHREAD_LIBS) $(EXTRALIBS)
 libec_jerasure_sse4_la_LDFLAGS = ${AM_LDFLAGS} -module -avoid-version \
-	-shared $(am__append_69)
+	-shared $(am__append_74)
 libec_jerasure_la_SOURCES = \
 	erasure-code/jerasure/ErasureCodePluginSelectJerasure.cc
 
@@ -9174,7 +9559,7 @@ libec_jerasure_la_CFLAGS = ${AM_CFLAGS}
 libec_jerasure_la_CXXFLAGS = ${AM_CXXFLAGS}
 libec_jerasure_la_LIBADD = $(LIBCRUSH) $(PTHREAD_LIBS) $(EXTRALIBS)
 libec_jerasure_la_LDFLAGS = ${AM_LDFLAGS} -module -avoid-version \
-	-shared $(am__append_71)
+	-shared $(am__append_76)
 lrc_sources = \
   erasure-code/ErasureCode.cc \
   erasure-code/lrc/ErasureCodePluginLrc.cc \
@@ -9185,7 +9570,7 @@ libec_lrc_la_CFLAGS = ${AM_CFLAGS}
 libec_lrc_la_CXXFLAGS = ${AM_CXXFLAGS}
 libec_lrc_la_LIBADD = $(LIBCRUSH) $(PTHREAD_LIBS) $(LIBJSON_SPIRIT)
 libec_lrc_la_LDFLAGS = ${AM_LDFLAGS} -module -avoid-version -shared \
-	$(am__append_72)
+	$(am__append_77)
 
 # SHEC plugin
 shec_sources = \
@@ -9226,7 +9611,7 @@ libec_shec_generic_la_CXXFLAGS = ${AM_CXXFLAGS} \
 
 libec_shec_generic_la_LIBADD = $(LIBCRUSH) $(PTHREAD_LIBS) $(EXTRALIBS)
 libec_shec_generic_la_LDFLAGS = ${AM_LDFLAGS} -module -avoid-version \
-	-shared $(am__append_73)
+	-shared $(am__append_78)
 libec_shec_neon_la_SOURCES = ${shec_sources} \
 	erasure-code/jerasure/gf-complete/src/neon/gf_w4_neon.c \
 	erasure-code/jerasure/gf-complete/src/neon/gf_w8_neon.c \
@@ -9250,7 +9635,7 @@ libec_shec_neon_la_CXXFLAGS = ${AM_CXXFLAGS} \
 
 libec_shec_neon_la_LIBADD = $(LIBCRUSH) $(PTHREAD_LIBS) $(EXTRALIBS)
 libec_shec_neon_la_LDFLAGS = ${AM_LDFLAGS} -module -avoid-version \
-	-shared $(am__append_74)
+	-shared $(am__append_79)
 libec_shec_sse3_la_SOURCES = ${shec_sources}
 libec_shec_sse3_la_CFLAGS = ${AM_CFLAGS}  \
 	${INTEL_SSE_FLAGS} \
@@ -9274,7 +9659,7 @@ libec_shec_sse3_la_CXXFLAGS = ${AM_CXXFLAGS} \
 
 libec_shec_sse3_la_LIBADD = $(LIBCRUSH) $(PTHREAD_LIBS) $(EXTRALIBS)
 libec_shec_sse3_la_LDFLAGS = ${AM_LDFLAGS} -module -avoid-version \
-	-shared $(am__append_76)
+	-shared $(am__append_81)
 libec_shec_sse4_la_SOURCES = ${shec_sources}
 libec_shec_sse4_la_CFLAGS = ${AM_CFLAGS}  \
 	${INTEL_SSE_FLAGS} \
@@ -9302,7 +9687,7 @@ libec_shec_sse4_la_CXXFLAGS = ${AM_CXXFLAGS} \
 
 libec_shec_sse4_la_LIBADD = $(LIBCRUSH) $(PTHREAD_LIBS) $(EXTRALIBS)
 libec_shec_sse4_la_LDFLAGS = ${AM_LDFLAGS} -module -avoid-version \
-	-shared $(am__append_78)
+	-shared $(am__append_83)
 libec_shec_la_SOURCES = \
 	erasure-code/shec/ErasureCodePluginSelectShec.cc
 
@@ -9310,7 +9695,7 @@ libec_shec_la_CFLAGS = ${AM_CFLAGS}
 libec_shec_la_CXXFLAGS = ${AM_CXXFLAGS}
 libec_shec_la_LIBADD = $(LIBCRUSH) $(PTHREAD_LIBS) $(EXTRALIBS)
 libec_shec_la_LDFLAGS = ${AM_LDFLAGS} -module -avoid-version -shared \
-	$(am__append_80)
+	$(am__append_85)
 @WITH_BETTER_YASM_ELF64_TRUE at isa_sources = \
 @WITH_BETTER_YASM_ELF64_TRUE@	erasure-code/ErasureCode.cc \
 @WITH_BETTER_YASM_ELF64_TRUE@	erasure-code/isa/isa-l/erasure_code/ec_base.c \
@@ -9359,22 +9744,35 @@ libec_shec_la_LDFLAGS = ${AM_LDFLAGS} -module -avoid-version -shared \
 @WITH_BETTER_YASM_ELF64_TRUE@	erasure-code/isa/ErasureCodePluginIsa.cc \
 @WITH_BETTER_YASM_ELF64_TRUE@	erasure-code/isa/xor_op.cc
 
- at WITH_BETTER_YASM_ELF64_TRUE@libec_isa_la_SOURCES = ${isa_sources}
- at WITH_BETTER_YASM_ELF64_TRUE@libec_isa_la_CFLAGS = ${AM_CFLAGS} -I $(srcdir)/erasure-code/isa/isa-l/include/
- at WITH_BETTER_YASM_ELF64_TRUE@libec_isa_la_CXXFLAGS = ${AM_CXXFLAGS} -I $(srcdir)/erasure-code/isa/isa-l/include/
- at WITH_BETTER_YASM_ELF64_TRUE@libec_isa_la_CCASFLAGS = ${AM_CCASFLAGS} -I $(abs_srcdir)/erasure-code/isa/isa-l/include/
- at WITH_BETTER_YASM_ELF64_TRUE@libec_isa_la_LIBADD = $(LIBCRUSH) $(PTHREAD_LIBS) $(EXTRALIBS)
+ at WITH_BETTER_YASM_ELF64_TRUE@libisa_la_SOURCES = ${isa_sources}
+ at WITH_BETTER_YASM_ELF64_TRUE@libisa_la_CFLAGS = ${AM_CFLAGS} -I $(srcdir)/erasure-code/isa/isa-l/include/
+ at WITH_BETTER_YASM_ELF64_TRUE@libisa_la_CXXFLAGS = ${AM_CXXFLAGS}
+ at WITH_BETTER_YASM_ELF64_TRUE@libisa_la_CCASFLAGS = ${AM_CCASFLAGS} -I $(srcdir)/erasure-code/isa/isa-l/include/
+ at WITH_BETTER_YASM_ELF64_TRUE@libisa_la_LIBTOOLFLAGS = --tag=CC
+ at WITH_BETTER_YASM_ELF64_TRUE@libec_isa_la_SOURCES = 
+ at WITH_BETTER_YASM_ELF64_TRUE@libec_isa_la_LIBADD = libisa.la $(LIBCRUSH) $(PTHREAD_LIBS) $(EXTRALIBS)
 @WITH_BETTER_YASM_ELF64_TRUE at libec_isa_la_LDFLAGS = ${AM_LDFLAGS} \
 @WITH_BETTER_YASM_ELF64_TRUE@	-module -avoid-version -shared \
- at WITH_BETTER_YASM_ELF64_TRUE@	$(am__append_82)
- at WITH_BETTER_YASM_ELF64_TRUE@libec_isa_la_LIBTOOLFLAGS = --tag=CC
+ at WITH_BETTER_YASM_ELF64_TRUE@	$(am__append_88)
 liberasure_code_la_SOURCES = \
 	erasure-code/ErasureCodePlugin.cc
 
 liberasure_code_la_DEPENDENCIES = $(erasure_codelib_LTLIBRARIES)
 @LINUX_TRUE at liberasure_code_la_LIBADD = -ldl
 compressorlibdir = $(pkglibdir)/compressor
-compressorlib_LTLIBRARIES = libceph_snappy.la $(am__append_182)
+compressorlib_LTLIBRARIES = libceph_zlib.la libceph_snappy.la \
+	$(am__append_204)
+zlib_sources = \
+  compressor/Compressor.cc \
+  compressor/zlib/CompressionPluginZlib.cc \
+  compressor/zlib/CompressionZlib.cc
+
+libceph_zlib_la_SOURCES = ${zlib_sources}
+libceph_zlib_la_CFLAGS = ${AM_CFLAGS}  
+libceph_zlib_la_CXXFLAGS = ${AM_CXXFLAGS} 
+libceph_zlib_la_LIBADD = $(LIBCRUSH) $(PTHREAD_LIBS) $(EXTRALIBS)
+libceph_zlib_la_LDFLAGS = ${AM_LDFLAGS} -lz -version-info 2:0:0 \
+	$(am__append_90)
 snappy_sources = \
   compressor/Compressor.cc \
   compressor/snappy/CompressionPluginSnappy.cc
@@ -9388,7 +9786,7 @@ libceph_snappy_la_CXXFLAGS = ${AM_CXXFLAGS} \
 
 libceph_snappy_la_LIBADD = $(LIBCRUSH) $(PTHREAD_LIBS) $(EXTRALIBS)
 libceph_snappy_la_LDFLAGS = ${AM_LDFLAGS} -lsnappy -version-info 2:0:0 \
-	$(am__append_84)
+	$(am__append_91)
 libcompressor_la_SOURCES = \
 	compressor/Compressor.cc \
 	compressor/AsyncCompressor.cc
@@ -9412,7 +9810,7 @@ libosdc_la_SOURCES = \
 @ENABLE_CLIENT_TRUE@	client/Trace.cc \
 @ENABLE_CLIENT_TRUE@	client/posix_acl.cc
 
- at ENABLE_CLIENT_TRUE@libclient_la_LIBADD = $(LIBOSDC) $(LIBEDIT_LIBS)
+ at ENABLE_CLIENT_TRUE@libclient_la_LIBADD = $(LIBOSDC)
 @ENABLE_CLIENT_TRUE@@WITH_FUSE_TRUE at libclient_fuse_la_SOURCES = client/fuse_ll.cc
 @ENABLE_CLIENT_TRUE@@WITH_FUSE_TRUE at libclient_fuse_la_LIBADD = libclient.la $(LIBFUSE_LIBS)
 @ENABLE_CLIENT_TRUE@@WITH_FUSE_TRUE at libclient_fuse_la_CXXFLAGS = $(AM_CXXFLAGS) $(LIBFUSE_CFLAGS)
@@ -9424,7 +9822,7 @@ libglobal_la_SOURCES = \
 	global/signal_handler.cc \
 	common/TrackedOp.cc
 
-libglobal_la_LIBADD = $(LIBCOMMON)
+libglobal_la_LIBADD = $(LIBCOMMON) $(am__append_97)
 libjson_spirit_la_SOURCES = \
 	json_spirit/json_spirit_reader.cpp \
 	json_spirit/json_spirit_writer.cpp
@@ -9434,16 +9832,17 @@ liblog_la_SOURCES = \
 	log/Log.cc \
 	log/SubsystemMap.cc
 
-libperfglue_la_SOURCES = $(am__append_90) $(am__append_93) \
-	$(am__append_96) $(am__append_97) $(am__append_98)
+libperfglue_la_SOURCES = $(am__append_98) $(am__append_101) \
+	$(am__append_104) $(am__append_105) $(am__append_106)
 @WITH_TCMALLOC_FALSE@@WITH_TCMALLOC_MINIMAL_TRUE at libperfglue_la_LIBADD = -ltcmalloc_minimal
 @WITH_TCMALLOC_TRUE at libperfglue_la_LIBADD = -ltcmalloc
 
 # these should go out of libcommon_internal
 libcommon_internal_la_SOURCES = ceph_ver.c common/DecayCounter.cc \
-	common/LogClient.cc common/LogEntry.cc \
+	common/LogClient.cc common/LogEntry.cc common/Graylog.cc \
 	common/PrebufferedStreambuf.cc common/SloppyCRCMap.cc \
-	common/BackTrace.cc common/perf_counters.cc common/Mutex.cc \
+	common/BackTrace.cc common/perf_counters.cc \
+	common/mutex_debug.cc common/Mutex.cc \
 	common/OutputDataSocket.cc common/admin_socket.cc \
 	common/admin_socket_client.cc common/cmdparse.cc \
 	common/escape.c common/io_priority.cc common/ceph_time.cc \
@@ -9457,8 +9856,9 @@ libcommon_internal_la_SOURCES = ceph_ver.c common/DecayCounter.cc \
 	common/ceph_context.cc common/types.cc \
 	common/code_environment.cc common/dout.cc common/histogram.cc \
 	common/signal.cc common/simple_spin.cc common/Thread.cc \
-	common/Formatter.cc common/HeartbeatMap.cc common/config.cc \
-	common/utf8.c common/mime.c common/strtol.cc common/page.cc \
+	common/Formatter.cc common/HTMLFormatter.cc \
+	common/HeartbeatMap.cc common/config.cc common/utf8.c \
+	common/mime.c common/strtol.cc common/page.cc \
 	common/lockdep.cc common/version.cc common/hex.cc \
 	common/entity_name.cc common/ceph_crypto.cc \
 	common/ceph_crypto_cms.cc common/TextTable.cc \
@@ -9467,8 +9867,8 @@ libcommon_internal_la_SOURCES = ceph_ver.c common/DecayCounter.cc \
 	common/bloom_filter.cc common/module.c common/Readahead.cc \
 	common/Cycles.cc common/ContextCompletion.cc \
 	common/TracepointProvider.cc common/PluginRegistry.cc \
-	$(am__append_99) $(am__append_100) $(am__append_101) \
-	$(am__append_102) $(am__append_103) $(am__append_104) \
+	$(am__append_107) $(am__append_108) $(am__append_109) \
+	$(am__append_110) $(am__append_111) $(am__append_112) \
 	mon/MonCap.cc mon/MonClient.cc mon/MonMap.cc osd/OSDMap.cc \
 	osd/osd_types.cc osd/ECMsgTypes.cc osd/HitSet.cc mds/MDSMap.cc \
 	mds/inode_backtrace.cc mds/mdstypes.cc mds/flock.cc
@@ -9476,7 +9876,7 @@ libcommon_internal_la_SOURCES = ceph_ver.c common/DecayCounter.cc \
 # inject crc in common
 libcommon_crc_la_SOURCES = common/sctp_crc32.c common/crc32c.cc \
 	common/crc32c_intel_baseline.c common/crc32c_intel_fast.c \
-	$(am__append_105)
+	$(am__append_113)
 @WITH_GOOD_YASM_ELF64_TRUE at libcommon_crc_la_LIBTOOLFLAGS = --tag=CC
 @HAVE_ARMV8_CRC_TRUE at libcommon_crc_aarch64_la_SOURCES = common/crc32c_aarch64.c
 @HAVE_ARMV8_CRC_TRUE at libcommon_crc_aarch64_la_CFLAGS = $(AM_CFLAGS) $(ARM_CRC_FLAGS)
@@ -9490,9 +9890,9 @@ libmsg_la_SOURCES = msg/Message.cc msg/Messenger.cc msg/msg_types.cc \
 	msg/simple/SimpleMessenger.cc msg/async/AsyncConnection.cc \
 	msg/async/AsyncMessenger.cc msg/async/Event.cc \
 	msg/async/net_handler.cc msg/async/EventSelect.cc \
-	$(am__append_111) $(am__append_112) $(am__append_113) \
-	$(am__append_114) $(am__append_115) $(am__append_116) \
-	$(am__append_117)
+	$(am__append_119) $(am__append_120) $(am__append_121) \
+	$(am__append_122) $(am__append_123) $(am__append_124) \
+	$(am__append_125)
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE at rados_includedir = $(includedir)/rados
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE at rados_include_DATA = \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(srcdir)/include/rados/librados.h \
@@ -9536,12 +9936,12 @@ libmsg_la_SOURCES = msg/Message.cc msg/Messenger.cc msg/msg_types.cc \
 # We need this to avoid basename conflicts with the librados build tests in test/Makefile.am
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE at librados_la_CXXFLAGS =  \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	${AM_CXXFLAGS} \
- at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__append_121)
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__append_129)
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE at librados_la_LIBADD = $(LIBRADOS_DEPS) $(PTHREAD_LIBS) $(CRYPTO_LIBS) $(EXTRALIBS)
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE at librados_la_LDFLAGS =  \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	${AM_LDFLAGS} \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	-version-info 2:0:0 \
- at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__append_122)
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__append_130)
 @ENABLE_CLIENT_TRUE@@WITH_RADOSSTRIPER_TRUE@@WITH_RADOS_TRUE at libradosstriper_la_SOURCES = \
 @ENABLE_CLIENT_TRUE@@WITH_RADOSSTRIPER_TRUE@@WITH_RADOS_TRUE@	libradosstriper/libradosstriper.cc \
 @ENABLE_CLIENT_TRUE@@WITH_RADOSSTRIPER_TRUE@@WITH_RADOS_TRUE@	libradosstriper/RadosStriperImpl.cc \
@@ -9555,7 +9955,7 @@ libmsg_la_SOURCES = msg/Message.cc msg/Messenger.cc msg/msg_types.cc \
 @ENABLE_CLIENT_TRUE@@WITH_RADOSSTRIPER_TRUE@@WITH_RADOS_TRUE at libradosstriper_la_LDFLAGS = ${AM_LDFLAGS} \
 @ENABLE_CLIENT_TRUE@@WITH_RADOSSTRIPER_TRUE@@WITH_RADOS_TRUE@	-version-info \
 @ENABLE_CLIENT_TRUE@@WITH_RADOSSTRIPER_TRUE@@WITH_RADOS_TRUE@	1:0:0 \
- at ENABLE_CLIENT_TRUE@@WITH_RADOSSTRIPER_TRUE@@WITH_RADOS_TRUE@	$(am__append_125)
+ at ENABLE_CLIENT_TRUE@@WITH_RADOSSTRIPER_TRUE@@WITH_RADOS_TRUE@	$(am__append_133)
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE at libjournal_la_SOURCES = \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	journal/AsyncOpTracker.cc \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	journal/Entry.cc \
@@ -9571,7 +9971,7 @@ libmsg_la_SOURCES = msg/Message.cc msg/Messenger.cc msg/msg_types.cc \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	journal/Utils.cc
 
 librbd_types_la_SOURCES = \
-	librbd/journal/Entries.cc \
+	librbd/journal/Types.cc \
 	librbd/WatchNotifyTypes.cc
 
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE at librbd_internal_la_SOURCES = \
@@ -9644,7 +10044,7 @@ librbd_types_la_SOURCES = \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE at librbd_la_LDFLAGS = ${AM_LDFLAGS} \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	-version-info \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	1:0:0 \
- at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	$(am__append_131)
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	$(am__append_139)
 @ENABLE_CLIENT_TRUE@@LINUX_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE at librbd_la_CXXFLAGS = -fvisibility=hidden -fvisibility-inlines-hidden
 @ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE at librgw_la_SOURCES = \
 @ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE@	rgw/librgw.cc \
@@ -9656,6 +10056,7 @@ librbd_types_la_SOURCES = \
 @ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE@	rgw/rgw_xml.cc \
 @ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE@	rgw/rgw_usage.cc \
 @ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE@	rgw/rgw_json_enc.cc \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE@	rgw/rgw_xml_enc.cc \
 @ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE@	rgw/rgw_user.cc \
 @ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE@	rgw/rgw_bucket.cc\
 @ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE@	rgw/rgw_tools.cc \
@@ -9682,7 +10083,8 @@ librbd_types_la_SOURCES = \
 @ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE@	rgw/rgw_keystone.cc \
 @ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE@	rgw/rgw_quota.cc \
 @ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE@	rgw/rgw_dencoder.cc \
- at ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE@	rgw/rgw_object_expirer_core.cc
+ at ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE@	rgw/rgw_object_expirer_core.cc \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE@	rgw/rgw_website.cc
 
 @ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE at librgw_la_CXXFLAGS = -Woverloaded-virtual ${AM_CXXFLAGS}
 @ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE at CIVETWEB_INCLUDE = --include $(srcdir)/civetweb/include/civetweb_conf.h
@@ -9885,7 +10287,7 @@ librbd_types_la_SOURCES = \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(LIBOSD) $(LIBCOMMON) \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(BOOST_PROGRAM_OPTIONS_LIBS) \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(CEPH_GLOBAL) \
- at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__append_154)
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__append_162)
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at ceph_erasure_code_non_regression_SOURCES = \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	test/erasure-code/ceph_erasure_code_non_regression.cc
 
@@ -9893,7 +10295,7 @@ librbd_types_la_SOURCES = \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(LIBOSD) $(LIBCOMMON) \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(BOOST_PROGRAM_OPTIONS_LIBS) \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(CEPH_GLOBAL) \
- at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__append_156)
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__append_164)
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at ceph_erasure_code_SOURCES = \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	test/erasure-code/ceph_erasure_code.cc
 
@@ -9901,7 +10303,7 @@ librbd_types_la_SOURCES = \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(LIBOSD) $(LIBCOMMON) \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(BOOST_PROGRAM_OPTIONS_LIBS) \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(CEPH_GLOBAL) \
- at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__append_158)
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__append_166)
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at libec_example_la_SOURCES = \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	erasure-code/ErasureCode.cc \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	test/erasure-code/ErasureCodePluginExample.cc
@@ -9909,52 +10311,82 @@ librbd_types_la_SOURCES = \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at libec_example_la_CFLAGS = ${AM_CFLAGS}
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at libec_example_la_CXXFLAGS = ${AM_CXXFLAGS}
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at libec_example_la_LIBADD = $(LIBCRUSH) $(PTHREAD_LIBS) $(EXTRALIBS)
- at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at libec_example_la_LDFLAGS = ${AM_LDFLAGS} -export-symbols-regex '.*__erasure_code_.*'
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at libec_example_la_LDFLAGS =  \
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	${AM_LDFLAGS} -module \
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	-avoid-version -shared \
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__append_167)
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at libec_missing_entry_point_la_SOURCES = test/erasure-code/ErasureCodePluginMissingEntryPoint.cc
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at libec_missing_entry_point_la_CFLAGS = ${AM_CFLAGS}
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at libec_missing_entry_point_la_CXXFLAGS = ${AM_CXXFLAGS}
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at libec_missing_entry_point_la_LIBADD = $(PTHREAD_LIBS) $(EXTRALIBS)
- at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at libec_missing_entry_point_la_LDFLAGS = ${AM_LDFLAGS} -export-symbols-regex '.*__erasure_code_.*'
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at libec_missing_entry_point_la_LDFLAGS =  \
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	${AM_LDFLAGS} -module \
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	-avoid-version -shared \
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__append_169)
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at libec_missing_version_la_SOURCES = test/erasure-code/ErasureCodePluginMissingVersion.cc
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at libec_missing_version_la_CFLAGS = ${AM_CFLAGS}
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at libec_missing_version_la_CXXFLAGS = ${AM_CXXFLAGS}
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at libec_missing_version_la_LIBADD = $(PTHREAD_LIBS) $(EXTRALIBS)
- at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at libec_missing_version_la_LDFLAGS = ${AM_LDFLAGS} -export-symbols-regex '.*__erasure_code_.*'
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at libec_missing_version_la_LDFLAGS =  \
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	${AM_LDFLAGS} -module \
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	-avoid-version -shared \
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__append_170)
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at libec_hangs_la_SOURCES = test/erasure-code/ErasureCodePluginHangs.cc
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at libec_hangs_la_CFLAGS = ${AM_CFLAGS}
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at libec_hangs_la_CXXFLAGS = ${AM_CXXFLAGS}
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at libec_hangs_la_LIBADD = $(PTHREAD_LIBS) $(EXTRALIBS)
- at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at libec_hangs_la_LDFLAGS = ${AM_LDFLAGS} -export-symbols-regex '.*__erasure_code_.*'
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at libec_hangs_la_LDFLAGS =  \
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	${AM_LDFLAGS} -module \
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	-avoid-version -shared \
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__append_171)
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at libec_fail_to_initialize_la_SOURCES = test/erasure-code/ErasureCodePluginFailToInitialize.cc
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at libec_fail_to_initialize_la_CFLAGS = ${AM_CFLAGS}
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at libec_fail_to_initialize_la_CXXFLAGS = ${AM_CXXFLAGS}
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at libec_fail_to_initialize_la_LIBADD = $(PTHREAD_LIBS) $(EXTRALIBS)
- at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at libec_fail_to_initialize_la_LDFLAGS = ${AM_LDFLAGS} -export-symbols-regex '.*__erasure_code_.*'
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at libec_fail_to_initialize_la_LDFLAGS =  \
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	${AM_LDFLAGS} -module \
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	-avoid-version -shared \
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__append_172)
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at libec_fail_to_register_la_SOURCES = test/erasure-code/ErasureCodePluginFailToRegister.cc
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at libec_fail_to_register_la_CFLAGS = ${AM_CFLAGS}
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at libec_fail_to_register_la_CXXFLAGS = ${AM_CXXFLAGS}
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at libec_fail_to_register_la_LIBADD = $(PTHREAD_LIBS) $(EXTRALIBS)
- at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at libec_fail_to_register_la_LDFLAGS = ${AM_LDFLAGS} -export-symbols-regex '.*__erasure_code_.*'
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at libec_fail_to_register_la_LDFLAGS =  \
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	${AM_LDFLAGS} -module \
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	-avoid-version -shared \
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__append_173)
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at libec_test_jerasure_neon_la_SOURCES = test/erasure-code/TestJerasurePluginNEON.cc
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at libec_test_jerasure_neon_la_CFLAGS = ${AM_CFLAGS}
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at libec_test_jerasure_neon_la_CXXFLAGS = ${AM_CXXFLAGS}
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at libec_test_jerasure_neon_la_LIBADD = $(PTHREAD_LIBS) $(EXTRALIBS)
- at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at libec_test_jerasure_neon_la_LDFLAGS = ${AM_LDFLAGS} -export-symbols-regex '.*__erasure_code_.*'
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at libec_test_jerasure_neon_la_LDFLAGS =  \
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	${AM_LDFLAGS} -module \
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	-avoid-version -shared \
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__append_174)
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at libec_test_jerasure_sse4_la_SOURCES = test/erasure-code/TestJerasurePluginSSE4.cc
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at libec_test_jerasure_sse4_la_CFLAGS = ${AM_CFLAGS}
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at libec_test_jerasure_sse4_la_CXXFLAGS = ${AM_CXXFLAGS}
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at libec_test_jerasure_sse4_la_LIBADD = $(PTHREAD_LIBS) $(EXTRALIBS)
- at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at libec_test_jerasure_sse4_la_LDFLAGS = ${AM_LDFLAGS} -export-symbols-regex '.*__erasure_code_.*'
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at libec_test_jerasure_sse4_la_LDFLAGS =  \
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	${AM_LDFLAGS} -module \
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	-avoid-version -shared \
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__append_175)
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at libec_test_jerasure_sse3_la_SOURCES = test/erasure-code/TestJerasurePluginSSE3.cc
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at libec_test_jerasure_sse3_la_CFLAGS = ${AM_CFLAGS}
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at libec_test_jerasure_sse3_la_CXXFLAGS = ${AM_CXXFLAGS}
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at libec_test_jerasure_sse3_la_LIBADD = $(PTHREAD_LIBS) $(EXTRALIBS)
- at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at libec_test_jerasure_sse3_la_LDFLAGS = ${AM_LDFLAGS} -export-symbols-regex '.*__erasure_code_.*'
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at libec_test_jerasure_sse3_la_LDFLAGS =  \
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	${AM_LDFLAGS} -module \
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	-avoid-version -shared \
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__append_176)
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at libec_test_jerasure_generic_la_SOURCES = test/erasure-code/TestJerasurePluginGeneric.cc
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at libec_test_jerasure_generic_la_CFLAGS = ${AM_CFLAGS}
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at libec_test_jerasure_generic_la_CXXFLAGS = ${AM_CXXFLAGS}
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at libec_test_jerasure_generic_la_LIBADD = $(PTHREAD_LIBS) $(EXTRALIBS)
- at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at libec_test_jerasure_generic_la_LDFLAGS = ${AM_LDFLAGS} -export-symbols-regex '.*__erasure_code_.*'
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at libec_test_jerasure_generic_la_LDFLAGS =  \
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	${AM_LDFLAGS} -module \
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	-avoid-version -shared \
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__append_177)
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at unittest_erasure_code_plugin_SOURCES = \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	erasure-code/ErasureCode.cc \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	test/erasure-code/TestErasureCodePlugin.cc 
@@ -9964,7 +10396,7 @@ librbd_types_la_SOURCES = \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(LIBOSD) $(LIBCOMMON) \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(UNITTEST_LDADD) \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(CEPH_GLOBAL) \
- at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__append_160)
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__append_178)
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at unittest_erasure_code_SOURCES = \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	erasure-code/ErasureCode.cc \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	test/erasure-code/TestErasureCode.cc
@@ -9987,7 +10419,7 @@ librbd_types_la_SOURCES = \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(LIBOSD) $(LIBCOMMON) \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(UNITTEST_LDADD) \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(CEPH_GLOBAL) \
- at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__append_162)
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__append_180)
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at unittest_erasure_code_plugin_jerasure_SOURCES = \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	test/erasure-code/TestErasureCodePluginJerasure.cc
 
@@ -9996,7 +10428,7 @@ librbd_types_la_SOURCES = \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(LIBOSD) $(LIBCOMMON) \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(UNITTEST_LDADD) \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(CEPH_GLOBAL) \
- at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__append_163)
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__append_181)
 @ENABLE_SERVER_TRUE@@WITH_BETTER_YASM_ELF64_TRUE@@WITH_OSD_TRUE at unittest_erasure_code_isa_SOURCES = \
 @ENABLE_SERVER_TRUE@@WITH_BETTER_YASM_ELF64_TRUE@@WITH_OSD_TRUE@	erasure-code/ErasureCode.cc \
 @ENABLE_SERVER_TRUE@@WITH_BETTER_YASM_ELF64_TRUE@@WITH_OSD_TRUE@	test/erasure-code/TestErasureCodeIsa.cc
@@ -10006,9 +10438,9 @@ librbd_types_la_SOURCES = \
 @ENABLE_SERVER_TRUE@@WITH_BETTER_YASM_ELF64_TRUE@@WITH_OSD_TRUE@	$(LIBCOMMON) \
 @ENABLE_SERVER_TRUE@@WITH_BETTER_YASM_ELF64_TRUE@@WITH_OSD_TRUE@	$(UNITTEST_LDADD) \
 @ENABLE_SERVER_TRUE@@WITH_BETTER_YASM_ELF64_TRUE@@WITH_OSD_TRUE@	$(CEPH_GLOBAL) \
- at ENABLE_SERVER_TRUE@@WITH_BETTER_YASM_ELF64_TRUE@@WITH_OSD_TRUE@	.libs/libec_isa.la \
+ at ENABLE_SERVER_TRUE@@WITH_BETTER_YASM_ELF64_TRUE@@WITH_OSD_TRUE@	libisa.la \
 @ENABLE_SERVER_TRUE@@WITH_BETTER_YASM_ELF64_TRUE@@WITH_OSD_TRUE@	$(LIBERASURE_CODE) \
- at ENABLE_SERVER_TRUE@@WITH_BETTER_YASM_ELF64_TRUE@@WITH_OSD_TRUE@	$(am__append_164)
+ at ENABLE_SERVER_TRUE@@WITH_BETTER_YASM_ELF64_TRUE@@WITH_OSD_TRUE@	$(am__append_182)
 @ENABLE_SERVER_TRUE@@WITH_BETTER_YASM_ELF64_TRUE@@WITH_OSD_TRUE at unittest_erasure_code_plugin_isa_SOURCES = \
 @ENABLE_SERVER_TRUE@@WITH_BETTER_YASM_ELF64_TRUE@@WITH_OSD_TRUE@	erasure-code/ErasureCode.cc \
 @ENABLE_SERVER_TRUE@@WITH_BETTER_YASM_ELF64_TRUE@@WITH_OSD_TRUE@	test/erasure-code/TestErasureCodePluginIsa.cc
@@ -10018,9 +10450,8 @@ librbd_types_la_SOURCES = \
 @ENABLE_SERVER_TRUE@@WITH_BETTER_YASM_ELF64_TRUE@@WITH_OSD_TRUE@	$(LIBCOMMON) \
 @ENABLE_SERVER_TRUE@@WITH_BETTER_YASM_ELF64_TRUE@@WITH_OSD_TRUE@	$(UNITTEST_LDADD) \
 @ENABLE_SERVER_TRUE@@WITH_BETTER_YASM_ELF64_TRUE@@WITH_OSD_TRUE@	$(CEPH_GLOBAL) \
- at ENABLE_SERVER_TRUE@@WITH_BETTER_YASM_ELF64_TRUE@@WITH_OSD_TRUE@	.libs/libec_isa.la \
 @ENABLE_SERVER_TRUE@@WITH_BETTER_YASM_ELF64_TRUE@@WITH_OSD_TRUE@	$(LIBERASURE_CODE) \
- at ENABLE_SERVER_TRUE@@WITH_BETTER_YASM_ELF64_TRUE@@WITH_OSD_TRUE@	$(am__append_166)
+ at ENABLE_SERVER_TRUE@@WITH_BETTER_YASM_ELF64_TRUE@@WITH_OSD_TRUE@	$(am__append_184)
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at unittest_erasure_code_lrc_SOURCES = \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	test/erasure-code/TestErasureCodeLrc.cc \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	${lrc_sources}
@@ -10030,7 +10461,7 @@ librbd_types_la_SOURCES = \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(LIBOSD) $(LIBCOMMON) \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(UNITTEST_LDADD) \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(CEPH_GLOBAL) \
- at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__append_167)
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__append_185)
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at unittest_erasure_code_plugin_lrc_SOURCES = \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	test/erasure-code/TestErasureCodePluginLrc.cc
 
@@ -10039,7 +10470,7 @@ librbd_types_la_SOURCES = \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(LIBOSD) $(LIBCOMMON) \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(UNITTEST_LDADD) \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(CEPH_GLOBAL) \
- at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__append_169)
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__append_187)
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at unittest_erasure_code_shec_SOURCES = \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	test/erasure-code/TestErasureCodeShec.cc \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	${shec_sources}
@@ -10060,7 +10491,7 @@ librbd_types_la_SOURCES = \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(LIBOSD) $(LIBCOMMON) \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(UNITTEST_LDADD) \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(CEPH_GLOBAL) \
- at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__append_170)
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__append_188)
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at unittest_erasure_code_shec_all_SOURCES = \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	test/erasure-code/TestErasureCodeShec_all.cc \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	${shec_sources}
@@ -10081,7 +10512,7 @@ librbd_types_la_SOURCES = \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(LIBOSD) $(LIBCOMMON) \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(UNITTEST_LDADD) \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(CEPH_GLOBAL) \
- at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__append_171)
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__append_189)
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at unittest_erasure_code_shec_thread_SOURCES = \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	test/erasure-code/TestErasureCodeShec_thread.cc \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	${shec_sources}
@@ -10102,7 +10533,7 @@ librbd_types_la_SOURCES = \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(LIBOSD) $(LIBCOMMON) \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(UNITTEST_LDADD) \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(CEPH_GLOBAL) \
- at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__append_172)
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__append_190)
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at unittest_erasure_code_shec_arguments_SOURCES = \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	test/erasure-code/TestErasureCodeShec_arguments.cc \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	${shec_sources}
@@ -10123,7 +10554,7 @@ librbd_types_la_SOURCES = \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(LIBOSD) $(LIBCOMMON) \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(UNITTEST_LDADD) \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(CEPH_GLOBAL) \
- at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__append_173)
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__append_191)
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at unittest_erasure_code_plugin_shec_SOURCES = \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@        test/erasure-code/TestErasureCodePluginShec.cc
 
@@ -10132,27 +10563,39 @@ librbd_types_la_SOURCES = \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(LIBOSD) $(LIBCOMMON) \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(UNITTEST_LDADD) \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(CEPH_GLOBAL) \
- at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__append_174)
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__append_192)
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at libec_test_shec_neon_la_SOURCES = test/erasure-code/TestShecPluginNEON.cc
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at libec_test_shec_neon_la_CFLAGS = ${AM_CFLAGS}
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at libec_test_shec_neon_la_CXXFLAGS = ${AM_CXXFLAGS}
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at libec_test_shec_neon_la_LIBADD = $(PTHREAD_LIBS) $(EXTRALIBS)
- at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at libec_test_shec_neon_la_LDFLAGS = ${AM_LDFLAGS} -export-symbols-regex '.*__erasure_code_.*'
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at libec_test_shec_neon_la_LDFLAGS =  \
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	${AM_LDFLAGS} -module \
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	-avoid-version -shared \
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__append_193)
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at libec_test_shec_sse4_la_SOURCES = test/erasure-code/TestShecPluginSSE4.cc
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at libec_test_shec_sse4_la_CFLAGS = ${AM_CFLAGS}
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at libec_test_shec_sse4_la_CXXFLAGS = ${AM_CXXFLAGS}
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at libec_test_shec_sse4_la_LIBADD = $(PTHREAD_LIBS) $(EXTRALIBS)
- at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at libec_test_shec_sse4_la_LDFLAGS = ${AM_LDFLAGS} -export-symbols-regex '.*__erasure_code_.*'
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at libec_test_shec_sse4_la_LDFLAGS =  \
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	${AM_LDFLAGS} -module \
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	-avoid-version -shared \
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__append_194)
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at libec_test_shec_sse3_la_SOURCES = test/erasure-code/TestShecPluginSSE3.cc
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at libec_test_shec_sse3_la_CFLAGS = ${AM_CFLAGS}
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at libec_test_shec_sse3_la_CXXFLAGS = ${AM_CXXFLAGS}
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at libec_test_shec_sse3_la_LIBADD = $(PTHREAD_LIBS) $(EXTRALIBS)
- at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at libec_test_shec_sse3_la_LDFLAGS = ${AM_LDFLAGS} -export-symbols-regex '.*__erasure_code_.*'
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at libec_test_shec_sse3_la_LDFLAGS =  \
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	${AM_LDFLAGS} -module \
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	-avoid-version -shared \
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__append_195)
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at libec_test_shec_generic_la_SOURCES = test/erasure-code/TestShecPluginGeneric.cc
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at libec_test_shec_generic_la_CFLAGS = ${AM_CFLAGS}
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at libec_test_shec_generic_la_CXXFLAGS = ${AM_CXXFLAGS}
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at libec_test_shec_generic_la_LIBADD = $(PTHREAD_LIBS) $(EXTRALIBS)
- at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at libec_test_shec_generic_la_LDFLAGS = ${AM_LDFLAGS} -export-symbols-regex '.*__erasure_code_.*'
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at libec_test_shec_generic_la_LDFLAGS =  \
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	${AM_LDFLAGS} -module \
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	-avoid-version -shared \
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__append_196)
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at unittest_erasure_code_example_SOURCES = \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	erasure-code/ErasureCode.cc \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	test/erasure-code/TestErasureCodeExample.cc
@@ -10170,7 +10613,7 @@ librbd_types_la_SOURCES = \
 @ENABLE_SERVER_TRUE@@ENABLE_XIO_TRUE@	$(CEPH_GLOBAL) \
 @ENABLE_SERVER_TRUE@@ENABLE_XIO_TRUE@	$(PTHREAD_LIBS) \
 @ENABLE_SERVER_TRUE@@ENABLE_XIO_TRUE@	$(EXTRALIBS) \
- at ENABLE_SERVER_TRUE@@ENABLE_XIO_TRUE@	$(am__append_176)
+ at ENABLE_SERVER_TRUE@@ENABLE_XIO_TRUE@	$(am__append_198)
 @ENABLE_SERVER_TRUE@@ENABLE_XIO_TRUE at simple_client_SOURCES = \
 @ENABLE_SERVER_TRUE@@ENABLE_XIO_TRUE@	test/messenger/simple_client.cc \
 @ENABLE_SERVER_TRUE@@ENABLE_XIO_TRUE@	test/messenger/simple_dispatcher.cc
@@ -10182,7 +10625,7 @@ librbd_types_la_SOURCES = \
 @ENABLE_SERVER_TRUE@@ENABLE_XIO_TRUE@	$(CEPH_GLOBAL) \
 @ENABLE_SERVER_TRUE@@ENABLE_XIO_TRUE@	$(PTHREAD_LIBS) \
 @ENABLE_SERVER_TRUE@@ENABLE_XIO_TRUE@	$(EXTRALIBS) \
- at ENABLE_SERVER_TRUE@@ENABLE_XIO_TRUE@	$(am__append_177)
+ at ENABLE_SERVER_TRUE@@ENABLE_XIO_TRUE@	$(am__append_199)
 @ENABLE_SERVER_TRUE@@ENABLE_XIO_TRUE at xio_server_SOURCES = \
 @ENABLE_SERVER_TRUE@@ENABLE_XIO_TRUE@	test/messenger/xio_server.cc \
 @ENABLE_SERVER_TRUE@@ENABLE_XIO_TRUE@	test/messenger/xio_dispatcher.cc
@@ -10194,7 +10637,7 @@ librbd_types_la_SOURCES = \
 @ENABLE_SERVER_TRUE@@ENABLE_XIO_TRUE@	$(LIBCOMMON) \
 @ENABLE_SERVER_TRUE@@ENABLE_XIO_TRUE@	$(PTHREAD_LIBS) \
 @ENABLE_SERVER_TRUE@@ENABLE_XIO_TRUE@	$(EXTRALIBS) \
- at ENABLE_SERVER_TRUE@@ENABLE_XIO_TRUE@	$(am__append_179)
+ at ENABLE_SERVER_TRUE@@ENABLE_XIO_TRUE@	$(am__append_201)
 @ENABLE_SERVER_TRUE@@ENABLE_XIO_TRUE at xio_client_SOURCES = \
 @ENABLE_SERVER_TRUE@@ENABLE_XIO_TRUE@	test/messenger/xio_client.cc \
 @ENABLE_SERVER_TRUE@@ENABLE_XIO_TRUE@	test/messenger/xio_dispatcher.cc
@@ -10206,7 +10649,7 @@ librbd_types_la_SOURCES = \
 @ENABLE_SERVER_TRUE@@ENABLE_XIO_TRUE@	$(LIBCOMMON) \
 @ENABLE_SERVER_TRUE@@ENABLE_XIO_TRUE@	$(PTHREAD_LIBS) \
 @ENABLE_SERVER_TRUE@@ENABLE_XIO_TRUE@	$(EXTRALIBS) \
- at ENABLE_SERVER_TRUE@@ENABLE_XIO_TRUE@	$(am__append_180)
+ at ENABLE_SERVER_TRUE@@ENABLE_XIO_TRUE@	$(am__append_202)
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at libceph_example_la_SOURCES = \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	compressor/Compressor.cc \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	test/compressor/compressor_plugin_example.cc
@@ -10224,7 +10667,7 @@ librbd_types_la_SOURCES = \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(LIBOSD) $(LIBCOMMON) \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(UNITTEST_LDADD) \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(CEPH_GLOBAL) \
- at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__append_183)
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__append_205)
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at unittest_compression_snappy_SOURCES = \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	test/compressor/test_compression_snappy.cc \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	${snappy_sources}
@@ -10234,7 +10677,7 @@ librbd_types_la_SOURCES = \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(LIBOSD) $(LIBCOMMON) \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(UNITTEST_LDADD) \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(CEPH_GLOBAL) \
- at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__append_184)
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__append_206)
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at unittest_compression_snappy_LDFLAGS = -lsnappy
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at unittest_compression_plugin_snappy_SOURCES = \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	test/compressor/test_compression_plugin_snappy.cc \
@@ -10246,8 +10689,31 @@ librbd_types_la_SOURCES = \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(UNITTEST_LDADD) \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(CEPH_GLOBAL) \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(LIBCOMPRESSOR) \
- at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__append_185)
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__append_207)
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at unittest_compression_plugin_snappy_LDFLAGS = -lsnappy
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at unittest_compression_zlib_SOURCES = \
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	test/compressor/test_compression_zlib.cc \
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	${zlib_sources}
+
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at unittest_compression_zlib_CXXFLAGS = $(UNITTEST_CXXFLAGS)
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at unittest_compression_zlib_LDADD =  \
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(LIBOSD) $(LIBCOMMON) \
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(UNITTEST_LDADD) \
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(CEPH_GLOBAL) \
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__append_208)
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at unittest_compression_zlib_LDFLAGS = -lz
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at unittest_compression_plugin_zlib_SOURCES = \
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	test/compressor/test_compression_plugin_zlib.cc \
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	${zlib_sources}
+
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at unittest_compression_plugin_zlib_CXXFLAGS = ${AM_CXXFLAGS} ${UNITTEST_CXXFLAGS}
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at unittest_compression_plugin_zlib_LDADD =  \
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(LIBOSD) $(LIBCOMMON) \
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(UNITTEST_LDADD) \
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(CEPH_GLOBAL) \
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(LIBCOMPRESSOR) \
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__append_209)
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at unittest_compression_plugin_zlib_LDFLAGS = -lz
 
 # This should use LIBMDS_TYPES once it exists
 @ENABLE_CLIENT_TRUE at ceph_dencoder_SOURCES = \
@@ -10265,10 +10731,10 @@ librbd_types_la_SOURCES = \
 
 # These should always use explicit _CFLAGS/_CXXFLAGS so avoid basename conflicts
 @ENABLE_CLIENT_TRUE at ceph_dencoder_CFLAGS = ${AM_CFLAGS} \
- at ENABLE_CLIENT_TRUE@	$(am__append_186)
+ at ENABLE_CLIENT_TRUE@	$(am__append_210)
 @ENABLE_CLIENT_TRUE at ceph_dencoder_CXXFLAGS = ${AM_CXXFLAGS} \
- at ENABLE_CLIENT_TRUE@	$(am__append_187) $(am__append_188) \
- at ENABLE_CLIENT_TRUE@	$(am__append_189)
+ at ENABLE_CLIENT_TRUE@	$(am__append_211) $(am__append_212) \
+ at ENABLE_CLIENT_TRUE@	$(am__append_213)
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE at libradostest_la_SOURCES = \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	test/librados/test.cc \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	test/librados/TestCase.cc
@@ -10519,6 +10985,7 @@ librbd_types_la_SOURCES = \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	test/librbd/test_mock_Journal.cc \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	test/librbd/exclusive_lock/test_mock_AcquireRequest.cc \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	test/librbd/exclusive_lock/test_mock_ReleaseRequest.cc \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	test/librbd/image/test_mock_RefreshRequest.cc \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	test/librbd/journal/test_mock_Replay.cc \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	test/librbd/object_map/test_mock_InvalidateRequest.cc \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	test/librbd/object_map/test_mock_LockRequest.cc \
@@ -10565,6 +11032,49 @@ librbd_types_la_SOURCES = \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE at ceph_test_librbd_api_LDADD = \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	$(LIBRBD) $(LIBRADOS) $(LIBCOMMON) $(UNITTEST_LDADD) $(RADOS_TEST_LDADD)
 
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE at librbd_mirror_test_la_SOURCES = \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	test/rbd_mirror/test_ClusterWatcher.cc \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	test/rbd_mirror/test_PoolWatcher.cc
+
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE at librbd_mirror_test_la_CXXFLAGS = $(UNITTEST_CXXFLAGS)
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE at unittest_rbd_mirror_SOURCES = \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	test/rbd_mirror/test_main.cc
+
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE at unittest_rbd_mirror_CXXFLAGS = $(UNITTEST_CXXFLAGS)
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE at unittest_rbd_mirror_LDADD = \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	librbd_mirror_test.la \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	librados_test_stub.la \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	librbd_mirror_internal.la \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	librbd_internal.la \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	librbd_api.la \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	libjournal.la \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	librados_internal.la \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	libcls_rbd_client.la \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	libcls_lock_client.la \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	libcls_journal_client.la \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	$(LIBRBD_TYPES) \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	$(LIBRADOS) $(LIBOSDC) $(UNITTEST_LDADD) \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	$(CEPH_GLOBAL) $(RADOS_TEST_LDADD)
+
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE at ceph_test_rbd_mirror_SOURCES = \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@        test/rbd_mirror/test_main.cc
+
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE at ceph_test_rbd_mirror_CXXFLAGS = $(UNITTEST_CXXFLAGS)
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE at ceph_test_rbd_mirror_LDADD = \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	librbd_mirror_test.la \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	librbd_mirror_internal.la \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	librbd_internal.la \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	librbd_api.la \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	libjournal.la \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	librados_internal.la \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	libcls_rbd_client.la \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	libcls_lock_client.la \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	libcls_journal_client.la \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	$(LIBRBD_TYPES) \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	librados_api.la $(LIBRADOS_DEPS) \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	$(LIBOSDC) $(UNITTEST_LDADD) \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	$(CEPH_GLOBAL) $(RADOS_TEST_LDADD)
+
 @ENABLE_CLIENT_TRUE@@LINUX_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE at ceph_test_librbd_fsx_SOURCES = test/librbd/fsx.cc
 @ENABLE_CLIENT_TRUE@@LINUX_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE at ceph_test_librbd_fsx_LDADD = \
 @ENABLE_CLIENT_TRUE@@LINUX_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	$(LIBKRBD) $(LIBRBD) $(LIBRADOS) \
@@ -10620,7 +11130,7 @@ librbd_types_la_SOURCES = \
 @ENABLE_CLIENT_TRUE@@WITH_CEPHFS_TRUE@@WITH_RADOS_TRUE@	test/libcephfs/multiclient.cc \
 @ENABLE_CLIENT_TRUE@@WITH_CEPHFS_TRUE@@WITH_RADOS_TRUE@	test/libcephfs/access.cc \
 @ENABLE_CLIENT_TRUE@@WITH_CEPHFS_TRUE@@WITH_RADOS_TRUE@	test/libcephfs/acl.cc \
- at ENABLE_CLIENT_TRUE@@WITH_CEPHFS_TRUE@@WITH_RADOS_TRUE@	$(am__append_210)
+ at ENABLE_CLIENT_TRUE@@WITH_CEPHFS_TRUE@@WITH_RADOS_TRUE@	$(am__append_235)
 @ENABLE_CLIENT_TRUE@@WITH_CEPHFS_TRUE@@WITH_RADOS_TRUE at ceph_test_libcephfs_LDADD = $(LIBRADOS) $(LIBCEPHFS) $(LIBCOMMON) $(UNITTEST_LDADD)
 @ENABLE_CLIENT_TRUE@@WITH_CEPHFS_TRUE@@WITH_RADOS_TRUE at ceph_test_libcephfs_CXXFLAGS = $(UNITTEST_CXXFLAGS)
 @ENABLE_CLIENT_TRUE@@WITH_CEPHFS_TRUE@@WITH_RADOS_TRUE at unittest_encoding_SOURCES = test/encoding.cc
@@ -10643,7 +11153,7 @@ librbd_types_la_SOURCES = \
 @ENABLE_CLIENT_TRUE@@WITH_CEPHFS_TRUE@@WITH_RADOS_TRUE@	-Wignored-qualifiers \
 @ENABLE_CLIENT_TRUE@@WITH_CEPHFS_TRUE@@WITH_RADOS_TRUE@	-Wold-style-definition \
 @ENABLE_CLIENT_TRUE@@WITH_CEPHFS_TRUE@@WITH_RADOS_TRUE@	-Wtype-limits \
- at ENABLE_CLIENT_TRUE@@WITH_CEPHFS_TRUE@@WITH_RADOS_TRUE@	$(am__append_212)
+ at ENABLE_CLIENT_TRUE@@WITH_CEPHFS_TRUE@@WITH_RADOS_TRUE@	$(am__append_237)
 @ENABLE_CLIENT_TRUE@@WITH_BUILD_TESTS_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE at test_build_librgw_SOURCES = \
 @ENABLE_CLIENT_TRUE@@WITH_BUILD_TESTS_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE@	test/buildtest_skeleton.cc \
 @ENABLE_CLIENT_TRUE@@WITH_BUILD_TESTS_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE@	$(librgw_la_SOURCES)
@@ -10787,6 +11297,9 @@ librbd_types_la_SOURCES = \
 @ENABLE_SERVER_TRUE@	test/objectstore/FileStoreDiff.cc
 
 @ENABLE_SERVER_TRUE at ceph_test_filestore_idempotent_sequence_LDADD = $(LIBOS) $(CEPH_GLOBAL)
+ at ENABLE_SERVER_TRUE@unittest_transaction_SOURCES = test/objectstore/test_transaction.cc
+ at ENABLE_SERVER_TRUE@unittest_transaction_LDADD = $(LIBOS) $(UNITTEST_LDADD) $(CEPH_GLOBAL)
+ at ENABLE_SERVER_TRUE@unittest_transaction_CXXFLAGS = $(UNITTEST_CXXFLAGS)
 @ENABLE_SERVER_TRUE at ceph_xattr_bench_SOURCES = test/xattr_bench.cc
 @ENABLE_SERVER_TRUE at ceph_xattr_bench_LDADD = $(LIBOS) $(UNITTEST_LDADD) $(CEPH_GLOBAL)
 @ENABLE_SERVER_TRUE at ceph_xattr_bench_CXXFLAGS = $(UNITTEST_CXXFLAGS)
@@ -10845,13 +11358,13 @@ librbd_types_la_SOURCES = \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at unittest_osdscrub_LDADD =  \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(LIBOSD) $(UNITTEST_LDADD) \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(CEPH_GLOBAL) \
- at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__append_225)
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__append_251)
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at unittest_pglog_SOURCES = test/osd/TestPGLog.cc
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at unittest_pglog_CXXFLAGS = $(UNITTEST_CXXFLAGS)
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at unittest_pglog_LDADD = $(LIBOSD) \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(UNITTEST_LDADD) \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(CEPH_GLOBAL) \
- at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__append_226)
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__append_252)
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at unittest_hitset_SOURCES = test/osd/hitset.cc
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at unittest_hitset_CXXFLAGS = $(UNITTEST_CXXFLAGS)
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at unittest_hitset_LDADD = $(LIBOSD) $(UNITTEST_LDADD) $(CEPH_GLOBAL)
@@ -10918,7 +11431,7 @@ UNITTEST_CXXFLAGS = \
 UNITTEST_LDADD = $(top_builddir)/src/gmock/lib/libgmock_main.la \
 	$(top_builddir)/src/gmock/lib/libgmock.la \
 	$(top_builddir)/src/gmock/gtest/lib/libgtest.la \
-	$(PTHREAD_LIBS) $(am__append_234)
+	$(PTHREAD_LIBS) $(am__append_260)
 unittest_addrs_SOURCES = test/test_addrs.cc
 unittest_addrs_CXXFLAGS = $(UNITTEST_CXXFLAGS)
 unittest_addrs_LDADD = $(UNITTEST_LDADD) $(CEPH_GLOBAL)
@@ -10934,9 +11447,18 @@ unittest_histogram_LDADD = $(UNITTEST_LDADD) $(CEPH_GLOBAL)
 unittest_prioritized_queue_SOURCES = test/common/test_prioritized_queue.cc
 unittest_prioritized_queue_CXXFLAGS = $(UNITTEST_CXXFLAGS)
 unittest_prioritized_queue_LDADD = $(UNITTEST_LDADD) $(CEPH_GLOBAL)
+unittest_weighted_priority_queue_SOURCES = test/common/test_weighted_priority_queue.cc
+unittest_weighted_priority_queue_CXXFLAGS = $(UNITTEST_CXXFLAGS)
+unittest_weighted_priority_queue_LDADD = $(UNITTEST_LDADD) $(CEPH_GLOBAL)
 unittest_str_map_SOURCES = test/common/test_str_map.cc
 unittest_str_map_CXXFLAGS = $(UNITTEST_CXXFLAGS)
 unittest_str_map_LDADD = $(UNITTEST_LDADD) $(CEPH_GLOBAL)
+unittest_mutex_debug_SOURCES = test/common/test_mutex_debug.cc
+unittest_mutex_debug_CXXFLAGS = $(UNITTEST_CXXFLAGS)
+unittest_mutex_debug_LDADD = $(UNITTEST_LDADD) $(CEPH_GLOBAL) ${EXTRALIBS}
+unittest_shunique_lock_SOURCES = test/common/test_shunique_lock.cc
+unittest_shunique_lock_CXXFLAGS = $(UNITTEST_CXXFLAGS)
+unittest_shunique_lock_LDADD = $(UNITTEST_LDADD) $(CEPH_GLOBAL) ${EXTRALIBS}
 unittest_sharedptr_registry_SOURCES = test/common/test_sharedptr_registry.cc
 unittest_sharedptr_registry_CXXFLAGS = $(UNITTEST_CXXFLAGS)
 unittest_sharedptr_registry_LDADD = $(UNITTEST_LDADD) $(CEPH_GLOBAL)
@@ -11085,6 +11607,9 @@ unittest_tableformatter_LDADD = $(UNITTEST_LDADD) $(CEPH_GLOBAL)
 unittest_bit_vector_SOURCES = test/common/test_bit_vector.cc
 unittest_bit_vector_CXXFLAGS = $(UNITTEST_CXXFLAGS)
 unittest_bit_vector_LDADD = $(UNITTEST_LDADD) $(CEPH_GLOBAL)
+unittest_interval_set_SOURCES = test/common/test_interval_set.cc
+unittest_interval_set_CXXFLAGS = $(UNITTEST_CXXFLAGS)
+unittest_interval_set_LDADD = $(UNITTEST_LDADD) $(CEPH_GLOBAL)
 unittest_subprocess_SOURCES = test/test_subprocess.cc
 unittest_subprocess_LDADD = $(LIBCOMMON) $(UNITTEST_LDADD)
 unittest_subprocess_CXXFLAGS = $(UNITTEST_CXXFLAGS)
@@ -11159,6 +11684,32 @@ ceph_test_cfuse_cache_invalidate_SOURCES = test/test_cfuse_cache_invalidate.cc
 @ENABLE_CLIENT_TRUE@@LINUX_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE at rbd_nbd_SOURCES = tools/rbd_nbd/rbd-nbd.cc
 @ENABLE_CLIENT_TRUE@@LINUX_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE at rbd_nbd_CXXFLAGS = $(AM_CXXFLAGS)
 @ENABLE_CLIENT_TRUE@@LINUX_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE at rbd_nbd_LDADD = $(LIBRBD) $(LIBRADOS) $(CEPH_GLOBAL) $(BOOST_REGEX_LIBS)
+
+# library for unit tests
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE at librbd_mirror_internal_la_SOURCES = \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	tools/rbd_mirror/ClusterWatcher.cc \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	tools/rbd_mirror/ImageReplayer.cc \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	tools/rbd_mirror/Mirror.cc \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	tools/rbd_mirror/PoolWatcher.cc \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	tools/rbd_mirror/Replayer.cc \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	tools/rbd_mirror/types.cc
+
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE at rbd_mirror_SOURCES = \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	tools/rbd_mirror/main.cc
+
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE at rbd_mirror_LDADD = \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	librbd_mirror_internal.la \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	librbd_internal.la \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	librbd_api.la \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	$(LIBRBD_TYPES) \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	libjournal.la \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	$(LIBRADOS) $(LIBOSDC) \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	librados_internal.la \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	libcls_rbd_client.la \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	libcls_lock_client.la \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	libcls_journal_client.la \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	$(CEPH_GLOBAL)
+
 @ENABLE_CLIENT_TRUE@@WITH_CEPHFS_TRUE@@WITH_RADOS_TRUE at ceph_client_debug_SOURCES = tools/ceph-client-debug.cc
 @ENABLE_CLIENT_TRUE@@WITH_CEPHFS_TRUE@@WITH_RADOS_TRUE at ceph_client_debug_LDADD = $(LIBCEPHFS) $(LIBCLIENT) $(CEPH_GLOBAL) $(LIBCOMMON)
 @ENABLE_SERVER_TRUE at ceph_osdomap_tool_SOURCES = tools/ceph_osdomap_tool.cc
@@ -11175,7 +11726,7 @@ ceph_test_cfuse_cache_invalidate_SOURCES = test/test_cfuse_cache_invalidate.cc
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(LIBOSD) $(LIBOS) \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(CEPH_GLOBAL) \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(BOOST_PROGRAM_OPTIONS_LIBS) \
- at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__append_243)
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__append_271)
 @ENABLE_CLIENT_TRUE@@ENABLE_SERVER_TRUE@@WITH_MDS_TRUE@@WITH_RADOS_TRUE at cephfs_journal_tool_SOURCES = \
 @ENABLE_CLIENT_TRUE@@ENABLE_SERVER_TRUE@@WITH_MDS_TRUE@@WITH_RADOS_TRUE@	tools/cephfs/cephfs-journal-tool.cc \
 @ENABLE_CLIENT_TRUE@@ENABLE_SERVER_TRUE@@WITH_MDS_TRUE@@WITH_RADOS_TRUE@	tools/cephfs/JournalTool.cc \
@@ -11221,7 +11772,7 @@ ceph_authtool_LDADD = $(CEPH_GLOBAL)
 @WITH_LTTNG_TRUE@	tracing/osd.h \
 @WITH_LTTNG_TRUE@	tracing/pg.h
 
-libosd_tp_la_LIBADD = -llttng-ust -ldl
+libosd_tp_la_LIBADD = -ldl -llttng-ust
 libosd_tp_la_CFLAGS = -I$(top_srcdir)/src/tracing -I$(top_srcdir)/src $(AM_CFLAGS) -fpic
 libosd_tp_la_LDFLAGS = -version-info 1:0:0
 @WITH_LTTNG_TRUE at librados_tp_la_SOURCES = \
@@ -11230,7 +11781,7 @@ libosd_tp_la_LDFLAGS = -version-info 1:0:0
 @WITH_LTTNG_TRUE at nodist_librados_tp_la_SOURCES = \
 @WITH_LTTNG_TRUE@	tracing/librados.h
 
-librados_tp_la_LIBADD = -llttng-ust -ldl
+librados_tp_la_LIBADD = -ldl -llttng-ust
 librados_tp_la_CFLAGS = -I$(top_srcdir)/src/tracing -I$(top_srcdir)/src $(AM_CFLAGS) -fpic
 librados_tp_la_LDFLAGS = -version-info 2:0:0
 @WITH_LTTNG_TRUE at librbd_tp_la_SOURCES = \
@@ -11239,7 +11790,7 @@ librados_tp_la_LDFLAGS = -version-info 2:0:0
 @WITH_LTTNG_TRUE at nodist_librbd_tp_la_SOURCES = \
 @WITH_LTTNG_TRUE@	tracing/librbd.h
 
-librbd_tp_la_LIBADD = -llttng-ust -ldl
+librbd_tp_la_LIBADD = -ldl -llttng-ust
 librbd_tp_la_CFLAGS = -I$(top_srcdir)/src/tracing -I$(top_srcdir)/src $(AM_CFLAGS) -fpic
 librbd_tp_la_LDFLAGS = -version-info 1:0:0
 @WITH_LTTNG_TRUE at libos_tp_la_SOURCES = \
@@ -11248,7 +11799,7 @@ librbd_tp_la_LDFLAGS = -version-info 1:0:0
 @WITH_LTTNG_TRUE at nodist_libos_tp_la_SOURCES = \
 @WITH_LTTNG_TRUE@	tracing/objectstore.h
 
-libos_tp_la_LIBADD = -llttng-ust -ldl
+libos_tp_la_LIBADD = -ldl -llttng-ust
 libos_tp_la_CFLAGS = -I$(top_srcdir)/src/tracing -I$(top_srcdir)/src $(AM_CFLAGS) -fpic
 libos_tp_la_LDFLAGS = -version-info 1:0:0
 @ENABLE_CLIENT_TRUE@@WITH_CYTHON_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE at PY_DISTUTILS = \
@@ -11269,17 +11820,16 @@ editpaths = sed \
 	-e 's|@sysconfdir[@]|$(sysconfdir)|g' \
 	-e 's|@datadir[@]|$(pkgdatadir)|g' \
 	-e 's|@prefix[@]|$(prefix)|g' \
+	-e 's|@libexecdir[@]|$(libexecdir)|g' \
 	-e 's|@@GCOV_PREFIX_STRIP[@][@]|$(GCOV_PREFIX_STRIP)|g'
 
 shell_scripts = ceph-debugpack ceph-post-file ceph-crush-location \
-	$(am__append_268)
+	$(am__append_296)
 doc_DATA = $(srcdir)/sample.ceph.conf sample.fetch_config
 
-# various scripts
-shell_commondir = $(libdir)/ceph
-shell_common_SCRIPTS = ceph_common.sh
+# various scripts in $(libexecdir)
 ceph_libexecdir = $(libexecdir)/ceph
-ceph_libexec_SCRIPTS = ceph-osd-prestart.sh
+ceph_libexec_SCRIPTS = ceph_common.sh ceph-osd-prestart.sh
 
 # TODO: If we're running the parallel test harness (the preferred harness), this should be AM_TESTS_ENVIRONMENT instead.
 # See: https://www.gnu.org/software/automake/manual/html_node/Scripts_002dbased-Testsuites.html
@@ -11287,12 +11837,12 @@ ceph_libexec_SCRIPTS = ceph-osd-prestart.sh
 @WITH_LTTNG_TRUE at TESTS_ENVIRONMENT = LD_PRELOAD=liblttng-ust-fork.so; export LD_PRELOAD; echo "LD_PRELOAD=$${LD_PRELOAD}";
 
 # pybind
-python_PYTHON = $(am__append_251) $(am__append_254) $(am__append_262) \
-	$(am__append_267)
+python_PYTHON = $(am__append_279) $(am__append_282) $(am__append_290) \
+	$(am__append_295)
 @ENABLE_CLIENT_TRUE at bash_completiondir = $(sysconfdir)/bash_completion.d
 @ENABLE_CLIENT_TRUE at bash_completion_DATA =  \
 @ENABLE_CLIENT_TRUE@	$(srcdir)/bash_completion/ceph \
- at ENABLE_CLIENT_TRUE@	$(am__append_253) $(am__append_256)
+ at ENABLE_CLIENT_TRUE@	$(am__append_281) $(am__append_284)
 @ENABLE_CLIENT_TRUE at ceph_syn_SOURCES = ceph_syn.cc \
 @ENABLE_CLIENT_TRUE@	client/SyntheticClient.cc # uses g_conf.. \
 @ENABLE_CLIENT_TRUE@	needs cleanup
@@ -11319,7 +11869,7 @@ python_PYTHON = $(am__append_251) $(am__append_254) $(am__append_262) \
 @ENABLE_CLIENT_TRUE@@WITH_CEPHFS_TRUE@@WITH_RADOS_TRUE@	1:0:0 \
 @ENABLE_CLIENT_TRUE@@WITH_CEPHFS_TRUE@@WITH_RADOS_TRUE@	-export-symbols-regex \
 @ENABLE_CLIENT_TRUE@@WITH_CEPHFS_TRUE@@WITH_RADOS_TRUE@	'^ceph_.*' \
- at ENABLE_CLIENT_TRUE@@WITH_CEPHFS_TRUE@@WITH_RADOS_TRUE@	$(am__append_263)
+ at ENABLE_CLIENT_TRUE@@WITH_CEPHFS_TRUE@@WITH_RADOS_TRUE@	$(am__append_291)
 
 # jni library (java source is in src/java)
 @ENABLE_CEPHFS_JAVA_TRUE@@ENABLE_CLIENT_TRUE@@WITH_CEPHFS_TRUE@@WITH_RADOS_TRUE at libcephfs_jni_la_SOURCES = \
@@ -11332,7 +11882,7 @@ python_PYTHON = $(am__append_251) $(am__append_254) $(am__append_262) \
 @ENABLE_CEPHFS_JAVA_TRUE@@ENABLE_CLIENT_TRUE@@WITH_CEPHFS_TRUE@@WITH_RADOS_TRUE at libcephfs_jni_la_CPPFLAGS = $(JDK_CPPFLAGS) $(AM_CPPFLAGS)
 @ENABLE_CEPHFS_JAVA_TRUE@@ENABLE_CLIENT_TRUE@@WITH_CEPHFS_TRUE@@WITH_RADOS_TRUE at libcephfs_jni_la_LDFLAGS = ${AM_LDFLAGS} -version-info 1:0:0
 @ENABLE_SERVER_TRUE at ceph_sbin_SCRIPTS = ceph-create-keys \
- at ENABLE_SERVER_TRUE@	$(am__append_273)
+ at ENABLE_SERVER_TRUE@	$(am__append_301)
 @ENABLE_SERVER_TRUE at mount_ceph_SOURCES = mount/mount.ceph.c
 @ENABLE_SERVER_TRUE at mount_ceph_LDADD = $(LIBSECRET) $(LIBCOMMON)
 @ENABLE_SERVER_TRUE@@WITH_MON_TRUE at ceph_mon_SOURCES = ceph_mon.cc
@@ -11342,7 +11892,7 @@ python_PYTHON = $(am__append_251) $(am__append_254) $(am__append_262) \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(LIBOSD) $(LIBOSD_TYPES) \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(LIBOS_TYPES) $(LIBOS) \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(CEPH_GLOBAL) $(LIBCOMMON) \
- at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__append_275)
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__append_303)
 @ENABLE_SERVER_TRUE@@WITH_MDS_TRUE at ceph_mds_SOURCES = ceph_mds.cc
 @ENABLE_SERVER_TRUE@@WITH_MDS_TRUE at ceph_mds_LDADD = $(LIBMDS) $(LIBOSDC) $(CEPH_GLOBAL) $(LIBCOMMON)
 @ENABLE_COVERAGE_TRUE@@ENABLE_SERVER_TRUE at COV_DIR = $(DESTDIR)$(libdir)/ceph/coverage
@@ -11353,7 +11903,7 @@ all: $(BUILT_SOURCES) acconfig.h
 
 .SUFFIXES:
 .SUFFIXES: .S .c .cc .cpp .lo .log .o .obj .s .test .test$(EXEEXT) .trs
-$(srcdir)/Makefile.in:  $(srcdir)/Makefile.am $(srcdir)/Makefile-env.am $(srcdir)/arch/Makefile.am $(srcdir)/auth/Makefile.am $(srcdir)/brag/Makefile.am $(srcdir)/ceph-detect-init/Makefile.am $(srcdir)/crush/Makefile.am $(srcdir)/kv/Makefile.am $(srcdir)/mon/Makefile.am $(srcdir)/mds/Makefile.am $(srcdir)/mds/Makefile-client.am $(srcdir)/mds/Makefile-server.am $(srcdir)/os/Makefile.am $(srcdir)/osd/Makefile.am $(srcdir)/erasure-code/Makefile.am $(srcdir)/erasure-code/jerasure/Makefile.am [...]
+$(srcdir)/Makefile.in:  $(srcdir)/Makefile.am $(srcdir)/Makefile-env.am $(srcdir)/arch/Makefile.am $(srcdir)/auth/Makefile.am $(srcdir)/brag/Makefile.am $(srcdir)/ceph-detect-init/Makefile.am $(srcdir)/ceph-disk/Makefile.am $(srcdir)/crush/Makefile.am $(srcdir)/kv/Makefile.am $(srcdir)/mon/Makefile.am $(srcdir)/mds/Makefile.am $(srcdir)/mds/Makefile-client.am $(srcdir)/mds/Makefile-server.am $(srcdir)/os/Makefile.am $(srcdir)/osd/Makefile.am $(srcdir)/erasure-code/Makefile.am $(srcdir)/e [...]
 	@for dep in $?; do \
 	  case '$(am__configure_deps)' in \
 	    *$$dep*) \
@@ -11373,7 +11923,7 @@ Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
 	    echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \
 	    cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \
 	esac;
-$(srcdir)/Makefile-env.am $(srcdir)/arch/Makefile.am $(srcdir)/auth/Makefile.am $(srcdir)/brag/Makefile.am $(srcdir)/ceph-detect-init/Makefile.am $(srcdir)/crush/Makefile.am $(srcdir)/kv/Makefile.am $(srcdir)/mon/Makefile.am $(srcdir)/mds/Makefile.am $(srcdir)/mds/Makefile-client.am $(srcdir)/mds/Makefile-server.am $(srcdir)/os/Makefile.am $(srcdir)/osd/Makefile.am $(srcdir)/erasure-code/Makefile.am $(srcdir)/erasure-code/jerasure/Makefile.am $(srcdir)/erasure-code/lrc/Makefile.am $(srcd [...]
+$(srcdir)/Makefile-env.am $(srcdir)/arch/Makefile.am $(srcdir)/auth/Makefile.am $(srcdir)/brag/Makefile.am $(srcdir)/ceph-detect-init/Makefile.am $(srcdir)/ceph-disk/Makefile.am $(srcdir)/crush/Makefile.am $(srcdir)/kv/Makefile.am $(srcdir)/mon/Makefile.am $(srcdir)/mds/Makefile.am $(srcdir)/mds/Makefile-client.am $(srcdir)/mds/Makefile-server.am $(srcdir)/os/Makefile.am $(srcdir)/osd/Makefile.am $(srcdir)/erasure-code/Makefile.am $(srcdir)/erasure-code/jerasure/Makefile.am $(srcdir)/era [...]
 
 $(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)
 	cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
@@ -11592,18 +12142,6 @@ os/fs/$(DEPDIR)/$(am__dirstamp):
 	@: > os/fs/$(DEPDIR)/$(am__dirstamp)
 os/fs/libos_a-FS.$(OBJEXT): os/fs/$(am__dirstamp) \
 	os/fs/$(DEPDIR)/$(am__dirstamp)
-os/keyvaluestore/$(am__dirstamp):
-	@$(MKDIR_P) os/keyvaluestore
-	@: > os/keyvaluestore/$(am__dirstamp)
-os/keyvaluestore/$(DEPDIR)/$(am__dirstamp):
-	@$(MKDIR_P) os/keyvaluestore/$(DEPDIR)
-	@: > os/keyvaluestore/$(DEPDIR)/$(am__dirstamp)
-os/keyvaluestore/libos_a-GenericObjectMap.$(OBJEXT):  \
-	os/keyvaluestore/$(am__dirstamp) \
-	os/keyvaluestore/$(DEPDIR)/$(am__dirstamp)
-os/keyvaluestore/libos_a-KeyValueStore.$(OBJEXT):  \
-	os/keyvaluestore/$(am__dirstamp) \
-	os/keyvaluestore/$(DEPDIR)/$(am__dirstamp)
 os/kstore/$(am__dirstamp):
 	@$(MKDIR_P) os/kstore
 	@: > os/kstore/$(am__dirstamp)
@@ -11630,6 +12168,8 @@ os/$(DEPDIR)/$(am__dirstamp):
 	@: > os/$(DEPDIR)/$(am__dirstamp)
 os/libos_a-ObjectStore.$(OBJEXT): os/$(am__dirstamp) \
 	os/$(DEPDIR)/$(am__dirstamp)
+os/libos_a-FuseStore.$(OBJEXT): os/$(am__dirstamp) \
+	os/$(DEPDIR)/$(am__dirstamp)
 os/bluestore/$(am__dirstamp):
 	@$(MKDIR_P) os/bluestore
 	@: > os/bluestore/$(am__dirstamp)
@@ -11655,6 +12195,9 @@ os/bluestore/libos_a-BlueStore.$(OBJEXT):  \
 os/bluestore/libos_a-FreelistManager.$(OBJEXT):  \
 	os/bluestore/$(am__dirstamp) \
 	os/bluestore/$(DEPDIR)/$(am__dirstamp)
+os/bluestore/libos_a-KernelDevice.$(OBJEXT):  \
+	os/bluestore/$(am__dirstamp) \
+	os/bluestore/$(DEPDIR)/$(am__dirstamp)
 os/bluestore/libos_a-StupidAllocator.$(OBJEXT):  \
 	os/bluestore/$(am__dirstamp) \
 	os/bluestore/$(DEPDIR)/$(am__dirstamp)
@@ -11669,6 +12212,9 @@ os/fs/libos_a-XFS.$(OBJEXT): os/fs/$(am__dirstamp) \
 os/filestore/libos_a-ZFSFileStoreBackend.$(OBJEXT):  \
 	os/filestore/$(am__dirstamp) \
 	os/filestore/$(DEPDIR)/$(am__dirstamp)
+os/bluestore/libos_a-NVMEDevice.$(OBJEXT):  \
+	os/bluestore/$(am__dirstamp) \
+	os/bluestore/$(DEPDIR)/$(am__dirstamp)
 
 libos.a: $(libos_a_OBJECTS) $(libos_a_DEPENDENCIES) $(EXTRA_libos_a_DEPENDENCIES) 
 	$(AM_V_at)-rm -f libos.a
@@ -12000,6 +12546,23 @@ compressor/snappy/libceph_snappy_la-CompressionPluginSnappy.lo:  \
 
 libceph_snappy.la: $(libceph_snappy_la_OBJECTS) $(libceph_snappy_la_DEPENDENCIES) $(EXTRA_libceph_snappy_la_DEPENDENCIES) 
 	$(AM_V_CXXLD)$(libceph_snappy_la_LINK) -rpath $(compressorlibdir) $(libceph_snappy_la_OBJECTS) $(libceph_snappy_la_LIBADD) $(LIBS)
+compressor/libceph_zlib_la-Compressor.lo: compressor/$(am__dirstamp) \
+	compressor/$(DEPDIR)/$(am__dirstamp)
+compressor/zlib/$(am__dirstamp):
+	@$(MKDIR_P) compressor/zlib
+	@: > compressor/zlib/$(am__dirstamp)
+compressor/zlib/$(DEPDIR)/$(am__dirstamp):
+	@$(MKDIR_P) compressor/zlib/$(DEPDIR)
+	@: > compressor/zlib/$(DEPDIR)/$(am__dirstamp)
+compressor/zlib/libceph_zlib_la-CompressionPluginZlib.lo:  \
+	compressor/zlib/$(am__dirstamp) \
+	compressor/zlib/$(DEPDIR)/$(am__dirstamp)
+compressor/zlib/libceph_zlib_la-CompressionZlib.lo:  \
+	compressor/zlib/$(am__dirstamp) \
+	compressor/zlib/$(DEPDIR)/$(am__dirstamp)
+
+libceph_zlib.la: $(libceph_zlib_la_OBJECTS) $(libceph_zlib_la_DEPENDENCIES) $(EXTRA_libceph_zlib_la_DEPENDENCIES) 
+	$(AM_V_CXXLD)$(libceph_zlib_la_LINK) -rpath $(compressorlibdir) $(libceph_zlib_la_OBJECTS) $(libceph_zlib_la_LIBADD) $(LIBS)
 
 libcephfs.la: $(libcephfs_la_OBJECTS) $(libcephfs_la_DEPENDENCIES) $(EXTRA_libcephfs_la_DEPENDENCIES) 
 	$(AM_V_CXXLD)$(libcephfs_la_LINK) $(am_libcephfs_la_rpath) $(libcephfs_la_OBJECTS) $(libcephfs_la_LIBADD) $(LIBS)
@@ -12286,6 +12849,8 @@ common/LogClient.lo: common/$(am__dirstamp) \
 	common/$(DEPDIR)/$(am__dirstamp)
 common/LogEntry.lo: common/$(am__dirstamp) \
 	common/$(DEPDIR)/$(am__dirstamp)
+common/Graylog.lo: common/$(am__dirstamp) \
+	common/$(DEPDIR)/$(am__dirstamp)
 common/PrebufferedStreambuf.lo: common/$(am__dirstamp) \
 	common/$(DEPDIR)/$(am__dirstamp)
 common/SloppyCRCMap.lo: common/$(am__dirstamp) \
@@ -12294,6 +12859,8 @@ common/BackTrace.lo: common/$(am__dirstamp) \
 	common/$(DEPDIR)/$(am__dirstamp)
 common/perf_counters.lo: common/$(am__dirstamp) \
 	common/$(DEPDIR)/$(am__dirstamp)
+common/mutex_debug.lo: common/$(am__dirstamp) \
+	common/$(DEPDIR)/$(am__dirstamp)
 common/Mutex.lo: common/$(am__dirstamp) \
 	common/$(DEPDIR)/$(am__dirstamp)
 common/OutputDataSocket.lo: common/$(am__dirstamp) \
@@ -12369,6 +12936,8 @@ common/Thread.lo: common/$(am__dirstamp) \
 	common/$(DEPDIR)/$(am__dirstamp)
 common/Formatter.lo: common/$(am__dirstamp) \
 	common/$(DEPDIR)/$(am__dirstamp)
+common/HTMLFormatter.lo: common/$(am__dirstamp) \
+	common/$(DEPDIR)/$(am__dirstamp)
 common/HeartbeatMap.lo: common/$(am__dirstamp) \
 	common/$(DEPDIR)/$(am__dirstamp)
 common/config.lo: common/$(am__dirstamp) \
@@ -12525,159 +13094,9 @@ test/erasure-code/libec_hangs_la-ErasureCodePluginHangs.lo:  \
 
 libec_hangs.la: $(libec_hangs_la_OBJECTS) $(libec_hangs_la_DEPENDENCIES) $(EXTRA_libec_hangs_la_DEPENDENCIES) 
 	$(AM_V_CXXLD)$(libec_hangs_la_LINK) $(am_libec_hangs_la_rpath) $(libec_hangs_la_OBJECTS) $(libec_hangs_la_LIBADD) $(LIBS)
-erasure-code/libec_isa_la-ErasureCode.lo:  \
-	erasure-code/$(am__dirstamp) \
-	erasure-code/$(DEPDIR)/$(am__dirstamp)
-erasure-code/isa/isa-l/erasure_code/$(am__dirstamp):
-	@$(MKDIR_P) erasure-code/isa/isa-l/erasure_code
-	@: > erasure-code/isa/isa-l/erasure_code/$(am__dirstamp)
-erasure-code/isa/isa-l/erasure_code/$(DEPDIR)/$(am__dirstamp):
-	@$(MKDIR_P) erasure-code/isa/isa-l/erasure_code/$(DEPDIR)
-	@: > erasure-code/isa/isa-l/erasure_code/$(DEPDIR)/$(am__dirstamp)
-erasure-code/isa/isa-l/erasure_code/libec_isa_la-ec_base.lo:  \
-	erasure-code/isa/isa-l/erasure_code/$(am__dirstamp) \
-	erasure-code/isa/isa-l/erasure_code/$(DEPDIR)/$(am__dirstamp)
-erasure-code/isa/isa-l/erasure_code/libec_isa_la-ec_highlevel_func.lo:  \
-	erasure-code/isa/isa-l/erasure_code/$(am__dirstamp) \
-	erasure-code/isa/isa-l/erasure_code/$(DEPDIR)/$(am__dirstamp)
-erasure-code/isa/isa-l/erasure_code/libec_isa_la-ec_multibinary.asm.lo:  \
-	erasure-code/isa/isa-l/erasure_code/$(am__dirstamp) \
-	erasure-code/isa/isa-l/erasure_code/$(DEPDIR)/$(am__dirstamp)
-erasure-code/isa/isa-l/erasure_code/libec_isa_la-gf_2vect_dot_prod_avx2.asm.lo:  \
-	erasure-code/isa/isa-l/erasure_code/$(am__dirstamp) \
-	erasure-code/isa/isa-l/erasure_code/$(DEPDIR)/$(am__dirstamp)
-erasure-code/isa/isa-l/erasure_code/libec_isa_la-gf_2vect_dot_prod_avx.asm.lo:  \
-	erasure-code/isa/isa-l/erasure_code/$(am__dirstamp) \
-	erasure-code/isa/isa-l/erasure_code/$(DEPDIR)/$(am__dirstamp)
-erasure-code/isa/isa-l/erasure_code/libec_isa_la-gf_2vect_dot_prod_sse.asm.lo:  \
-	erasure-code/isa/isa-l/erasure_code/$(am__dirstamp) \
-	erasure-code/isa/isa-l/erasure_code/$(DEPDIR)/$(am__dirstamp)
-erasure-code/isa/isa-l/erasure_code/libec_isa_la-gf_3vect_dot_prod_avx2.asm.lo:  \
-	erasure-code/isa/isa-l/erasure_code/$(am__dirstamp) \
-	erasure-code/isa/isa-l/erasure_code/$(DEPDIR)/$(am__dirstamp)
-erasure-code/isa/isa-l/erasure_code/libec_isa_la-gf_3vect_dot_prod_avx.asm.lo:  \
-	erasure-code/isa/isa-l/erasure_code/$(am__dirstamp) \
-	erasure-code/isa/isa-l/erasure_code/$(DEPDIR)/$(am__dirstamp)
-erasure-code/isa/isa-l/erasure_code/libec_isa_la-gf_3vect_dot_prod_sse.asm.lo:  \
-	erasure-code/isa/isa-l/erasure_code/$(am__dirstamp) \
-	erasure-code/isa/isa-l/erasure_code/$(DEPDIR)/$(am__dirstamp)
-erasure-code/isa/isa-l/erasure_code/libec_isa_la-gf_4vect_dot_prod_avx2.asm.lo:  \
-	erasure-code/isa/isa-l/erasure_code/$(am__dirstamp) \
-	erasure-code/isa/isa-l/erasure_code/$(DEPDIR)/$(am__dirstamp)
-erasure-code/isa/isa-l/erasure_code/libec_isa_la-gf_4vect_dot_prod_avx.asm.lo:  \
-	erasure-code/isa/isa-l/erasure_code/$(am__dirstamp) \
-	erasure-code/isa/isa-l/erasure_code/$(DEPDIR)/$(am__dirstamp)
-erasure-code/isa/isa-l/erasure_code/libec_isa_la-gf_4vect_dot_prod_sse.asm.lo:  \
-	erasure-code/isa/isa-l/erasure_code/$(am__dirstamp) \
-	erasure-code/isa/isa-l/erasure_code/$(DEPDIR)/$(am__dirstamp)
-erasure-code/isa/isa-l/erasure_code/libec_isa_la-gf_5vect_dot_prod_avx2.asm.lo:  \
-	erasure-code/isa/isa-l/erasure_code/$(am__dirstamp) \
-	erasure-code/isa/isa-l/erasure_code/$(DEPDIR)/$(am__dirstamp)
-erasure-code/isa/isa-l/erasure_code/libec_isa_la-gf_5vect_dot_prod_avx.asm.lo:  \
-	erasure-code/isa/isa-l/erasure_code/$(am__dirstamp) \
-	erasure-code/isa/isa-l/erasure_code/$(DEPDIR)/$(am__dirstamp)
-erasure-code/isa/isa-l/erasure_code/libec_isa_la-gf_5vect_dot_prod_sse.asm.lo:  \
-	erasure-code/isa/isa-l/erasure_code/$(am__dirstamp) \
-	erasure-code/isa/isa-l/erasure_code/$(DEPDIR)/$(am__dirstamp)
-erasure-code/isa/isa-l/erasure_code/libec_isa_la-gf_6vect_dot_prod_avx2.asm.lo:  \
-	erasure-code/isa/isa-l/erasure_code/$(am__dirstamp) \
-	erasure-code/isa/isa-l/erasure_code/$(DEPDIR)/$(am__dirstamp)
-erasure-code/isa/isa-l/erasure_code/libec_isa_la-gf_6vect_dot_prod_avx.asm.lo:  \
-	erasure-code/isa/isa-l/erasure_code/$(am__dirstamp) \
-	erasure-code/isa/isa-l/erasure_code/$(DEPDIR)/$(am__dirstamp)
-erasure-code/isa/isa-l/erasure_code/libec_isa_la-gf_6vect_dot_prod_sse.asm.lo:  \
-	erasure-code/isa/isa-l/erasure_code/$(am__dirstamp) \
-	erasure-code/isa/isa-l/erasure_code/$(DEPDIR)/$(am__dirstamp)
-erasure-code/isa/isa-l/erasure_code/libec_isa_la-gf_vect_dot_prod_avx2.asm.lo:  \
-	erasure-code/isa/isa-l/erasure_code/$(am__dirstamp) \
-	erasure-code/isa/isa-l/erasure_code/$(DEPDIR)/$(am__dirstamp)
-erasure-code/isa/isa-l/erasure_code/libec_isa_la-gf_vect_dot_prod_avx.asm.lo:  \
-	erasure-code/isa/isa-l/erasure_code/$(am__dirstamp) \
-	erasure-code/isa/isa-l/erasure_code/$(DEPDIR)/$(am__dirstamp)
-erasure-code/isa/isa-l/erasure_code/libec_isa_la-gf_vect_dot_prod_sse.asm.lo:  \
-	erasure-code/isa/isa-l/erasure_code/$(am__dirstamp) \
-	erasure-code/isa/isa-l/erasure_code/$(DEPDIR)/$(am__dirstamp)
-erasure-code/isa/isa-l/erasure_code/libec_isa_la-gf_2vect_mad_avx2.asm.lo:  \
-	erasure-code/isa/isa-l/erasure_code/$(am__dirstamp) \
-	erasure-code/isa/isa-l/erasure_code/$(DEPDIR)/$(am__dirstamp)
-erasure-code/isa/isa-l/erasure_code/libec_isa_la-gf_2vect_mad_avx.asm.lo:  \
-	erasure-code/isa/isa-l/erasure_code/$(am__dirstamp) \
-	erasure-code/isa/isa-l/erasure_code/$(DEPDIR)/$(am__dirstamp)
-erasure-code/isa/isa-l/erasure_code/libec_isa_la-gf_2vect_mad_sse.asm.lo:  \
-	erasure-code/isa/isa-l/erasure_code/$(am__dirstamp) \
-	erasure-code/isa/isa-l/erasure_code/$(DEPDIR)/$(am__dirstamp)
-erasure-code/isa/isa-l/erasure_code/libec_isa_la-gf_3vect_mad_avx2.asm.lo:  \
-	erasure-code/isa/isa-l/erasure_code/$(am__dirstamp) \
-	erasure-code/isa/isa-l/erasure_code/$(DEPDIR)/$(am__dirstamp)
-erasure-code/isa/isa-l/erasure_code/libec_isa_la-gf_3vect_mad_avx.asm.lo:  \
-	erasure-code/isa/isa-l/erasure_code/$(am__dirstamp) \
-	erasure-code/isa/isa-l/erasure_code/$(DEPDIR)/$(am__dirstamp)
-erasure-code/isa/isa-l/erasure_code/libec_isa_la-gf_3vect_mad_sse.asm.lo:  \
-	erasure-code/isa/isa-l/erasure_code/$(am__dirstamp) \
-	erasure-code/isa/isa-l/erasure_code/$(DEPDIR)/$(am__dirstamp)
-erasure-code/isa/isa-l/erasure_code/libec_isa_la-gf_4vect_mad_avx2.asm.lo:  \
-	erasure-code/isa/isa-l/erasure_code/$(am__dirstamp) \
-	erasure-code/isa/isa-l/erasure_code/$(DEPDIR)/$(am__dirstamp)
-erasure-code/isa/isa-l/erasure_code/libec_isa_la-gf_4vect_mad_avx.asm.lo:  \
-	erasure-code/isa/isa-l/erasure_code/$(am__dirstamp) \
-	erasure-code/isa/isa-l/erasure_code/$(DEPDIR)/$(am__dirstamp)
-erasure-code/isa/isa-l/erasure_code/libec_isa_la-gf_4vect_mad_sse.asm.lo:  \
-	erasure-code/isa/isa-l/erasure_code/$(am__dirstamp) \
-	erasure-code/isa/isa-l/erasure_code/$(DEPDIR)/$(am__dirstamp)
-erasure-code/isa/isa-l/erasure_code/libec_isa_la-gf_5vect_mad_avx2.asm.lo:  \
-	erasure-code/isa/isa-l/erasure_code/$(am__dirstamp) \
-	erasure-code/isa/isa-l/erasure_code/$(DEPDIR)/$(am__dirstamp)
-erasure-code/isa/isa-l/erasure_code/libec_isa_la-gf_5vect_mad_avx.asm.lo:  \
-	erasure-code/isa/isa-l/erasure_code/$(am__dirstamp) \
-	erasure-code/isa/isa-l/erasure_code/$(DEPDIR)/$(am__dirstamp)
-erasure-code/isa/isa-l/erasure_code/libec_isa_la-gf_5vect_mad_sse.asm.lo:  \
-	erasure-code/isa/isa-l/erasure_code/$(am__dirstamp) \
-	erasure-code/isa/isa-l/erasure_code/$(DEPDIR)/$(am__dirstamp)
-erasure-code/isa/isa-l/erasure_code/libec_isa_la-gf_6vect_mad_avx2.asm.lo:  \
-	erasure-code/isa/isa-l/erasure_code/$(am__dirstamp) \
-	erasure-code/isa/isa-l/erasure_code/$(DEPDIR)/$(am__dirstamp)
-erasure-code/isa/isa-l/erasure_code/libec_isa_la-gf_6vect_mad_avx.asm.lo:  \
-	erasure-code/isa/isa-l/erasure_code/$(am__dirstamp) \
-	erasure-code/isa/isa-l/erasure_code/$(DEPDIR)/$(am__dirstamp)
-erasure-code/isa/isa-l/erasure_code/libec_isa_la-gf_6vect_mad_sse.asm.lo:  \
-	erasure-code/isa/isa-l/erasure_code/$(am__dirstamp) \
-	erasure-code/isa/isa-l/erasure_code/$(DEPDIR)/$(am__dirstamp)
-erasure-code/isa/isa-l/erasure_code/libec_isa_la-gf_vect_mad_avx2.asm.lo:  \
-	erasure-code/isa/isa-l/erasure_code/$(am__dirstamp) \
-	erasure-code/isa/isa-l/erasure_code/$(DEPDIR)/$(am__dirstamp)
-erasure-code/isa/isa-l/erasure_code/libec_isa_la-gf_vect_mad_avx.asm.lo:  \
-	erasure-code/isa/isa-l/erasure_code/$(am__dirstamp) \
-	erasure-code/isa/isa-l/erasure_code/$(DEPDIR)/$(am__dirstamp)
-erasure-code/isa/isa-l/erasure_code/libec_isa_la-gf_vect_mad_sse.asm.lo:  \
-	erasure-code/isa/isa-l/erasure_code/$(am__dirstamp) \
-	erasure-code/isa/isa-l/erasure_code/$(DEPDIR)/$(am__dirstamp)
-erasure-code/isa/isa-l/erasure_code/libec_isa_la-gf_vect_mul_avx.asm.lo:  \
-	erasure-code/isa/isa-l/erasure_code/$(am__dirstamp) \
-	erasure-code/isa/isa-l/erasure_code/$(DEPDIR)/$(am__dirstamp)
-erasure-code/isa/isa-l/erasure_code/libec_isa_la-gf_vect_mul_sse.asm.lo:  \
-	erasure-code/isa/isa-l/erasure_code/$(am__dirstamp) \
-	erasure-code/isa/isa-l/erasure_code/$(DEPDIR)/$(am__dirstamp)
-erasure-code/isa/$(am__dirstamp):
-	@$(MKDIR_P) erasure-code/isa
-	@: > erasure-code/isa/$(am__dirstamp)
-erasure-code/isa/$(DEPDIR)/$(am__dirstamp):
-	@$(MKDIR_P) erasure-code/isa/$(DEPDIR)
-	@: > erasure-code/isa/$(DEPDIR)/$(am__dirstamp)
-erasure-code/isa/libec_isa_la-ErasureCodeIsa.lo:  \
-	erasure-code/isa/$(am__dirstamp) \
-	erasure-code/isa/$(DEPDIR)/$(am__dirstamp)
-erasure-code/isa/libec_isa_la-ErasureCodeIsaTableCache.lo:  \
-	erasure-code/isa/$(am__dirstamp) \
-	erasure-code/isa/$(DEPDIR)/$(am__dirstamp)
-erasure-code/isa/libec_isa_la-ErasureCodePluginIsa.lo:  \
-	erasure-code/isa/$(am__dirstamp) \
-	erasure-code/isa/$(DEPDIR)/$(am__dirstamp)
-erasure-code/isa/libec_isa_la-xor_op.lo:  \
-	erasure-code/isa/$(am__dirstamp) \
-	erasure-code/isa/$(DEPDIR)/$(am__dirstamp)
 
 libec_isa.la: $(libec_isa_la_OBJECTS) $(libec_isa_la_DEPENDENCIES) $(EXTRA_libec_isa_la_DEPENDENCIES) 
-	$(AM_V_CXXLD)$(libec_isa_la_LINK) $(am_libec_isa_la_rpath) $(libec_isa_la_OBJECTS) $(libec_isa_la_LIBADD) $(LIBS)
+	$(AM_V_CCLD)$(libec_isa_la_LINK) $(am_libec_isa_la_rpath) $(libec_isa_la_OBJECTS) $(libec_isa_la_LIBADD) $(LIBS)
 erasure-code/jerasure/$(am__dirstamp):
 	@$(MKDIR_P) erasure-code/jerasure
 	@: > erasure-code/jerasure/$(am__dirstamp)
@@ -13356,8 +13775,160 @@ global/signal_handler.lo: global/$(am__dirstamp) \
 common/TrackedOp.lo: common/$(am__dirstamp) \
 	common/$(DEPDIR)/$(am__dirstamp)
 
-libglobal.la: $(libglobal_la_OBJECTS) $(libglobal_la_DEPENDENCIES) $(EXTRA_libglobal_la_DEPENDENCIES) 
-	$(AM_V_CXXLD)$(CXXLINK)  $(libglobal_la_OBJECTS) $(libglobal_la_LIBADD) $(LIBS)
+libglobal.la: $(libglobal_la_OBJECTS) $(libglobal_la_DEPENDENCIES) $(EXTRA_libglobal_la_DEPENDENCIES) 
+	$(AM_V_CXXLD)$(CXXLINK)  $(libglobal_la_OBJECTS) $(libglobal_la_LIBADD) $(LIBS)
+erasure-code/libisa_la-ErasureCode.lo: erasure-code/$(am__dirstamp) \
+	erasure-code/$(DEPDIR)/$(am__dirstamp)
+erasure-code/isa/isa-l/erasure_code/$(am__dirstamp):
+	@$(MKDIR_P) erasure-code/isa/isa-l/erasure_code
+	@: > erasure-code/isa/isa-l/erasure_code/$(am__dirstamp)
+erasure-code/isa/isa-l/erasure_code/$(DEPDIR)/$(am__dirstamp):
+	@$(MKDIR_P) erasure-code/isa/isa-l/erasure_code/$(DEPDIR)
+	@: > erasure-code/isa/isa-l/erasure_code/$(DEPDIR)/$(am__dirstamp)
+erasure-code/isa/isa-l/erasure_code/libisa_la-ec_base.lo:  \
+	erasure-code/isa/isa-l/erasure_code/$(am__dirstamp) \
+	erasure-code/isa/isa-l/erasure_code/$(DEPDIR)/$(am__dirstamp)
+erasure-code/isa/isa-l/erasure_code/libisa_la-ec_highlevel_func.lo:  \
+	erasure-code/isa/isa-l/erasure_code/$(am__dirstamp) \
+	erasure-code/isa/isa-l/erasure_code/$(DEPDIR)/$(am__dirstamp)
+erasure-code/isa/isa-l/erasure_code/libisa_la-ec_multibinary.asm.lo:  \
+	erasure-code/isa/isa-l/erasure_code/$(am__dirstamp) \
+	erasure-code/isa/isa-l/erasure_code/$(DEPDIR)/$(am__dirstamp)
+erasure-code/isa/isa-l/erasure_code/libisa_la-gf_2vect_dot_prod_avx2.asm.lo:  \
+	erasure-code/isa/isa-l/erasure_code/$(am__dirstamp) \
+	erasure-code/isa/isa-l/erasure_code/$(DEPDIR)/$(am__dirstamp)
+erasure-code/isa/isa-l/erasure_code/libisa_la-gf_2vect_dot_prod_avx.asm.lo:  \
+	erasure-code/isa/isa-l/erasure_code/$(am__dirstamp) \
+	erasure-code/isa/isa-l/erasure_code/$(DEPDIR)/$(am__dirstamp)
+erasure-code/isa/isa-l/erasure_code/libisa_la-gf_2vect_dot_prod_sse.asm.lo:  \
+	erasure-code/isa/isa-l/erasure_code/$(am__dirstamp) \
+	erasure-code/isa/isa-l/erasure_code/$(DEPDIR)/$(am__dirstamp)
+erasure-code/isa/isa-l/erasure_code/libisa_la-gf_3vect_dot_prod_avx2.asm.lo:  \
+	erasure-code/isa/isa-l/erasure_code/$(am__dirstamp) \
+	erasure-code/isa/isa-l/erasure_code/$(DEPDIR)/$(am__dirstamp)
+erasure-code/isa/isa-l/erasure_code/libisa_la-gf_3vect_dot_prod_avx.asm.lo:  \
+	erasure-code/isa/isa-l/erasure_code/$(am__dirstamp) \
+	erasure-code/isa/isa-l/erasure_code/$(DEPDIR)/$(am__dirstamp)
+erasure-code/isa/isa-l/erasure_code/libisa_la-gf_3vect_dot_prod_sse.asm.lo:  \
+	erasure-code/isa/isa-l/erasure_code/$(am__dirstamp) \
+	erasure-code/isa/isa-l/erasure_code/$(DEPDIR)/$(am__dirstamp)
+erasure-code/isa/isa-l/erasure_code/libisa_la-gf_4vect_dot_prod_avx2.asm.lo:  \
+	erasure-code/isa/isa-l/erasure_code/$(am__dirstamp) \
+	erasure-code/isa/isa-l/erasure_code/$(DEPDIR)/$(am__dirstamp)
+erasure-code/isa/isa-l/erasure_code/libisa_la-gf_4vect_dot_prod_avx.asm.lo:  \
+	erasure-code/isa/isa-l/erasure_code/$(am__dirstamp) \
+	erasure-code/isa/isa-l/erasure_code/$(DEPDIR)/$(am__dirstamp)
+erasure-code/isa/isa-l/erasure_code/libisa_la-gf_4vect_dot_prod_sse.asm.lo:  \
+	erasure-code/isa/isa-l/erasure_code/$(am__dirstamp) \
+	erasure-code/isa/isa-l/erasure_code/$(DEPDIR)/$(am__dirstamp)
+erasure-code/isa/isa-l/erasure_code/libisa_la-gf_5vect_dot_prod_avx2.asm.lo:  \
+	erasure-code/isa/isa-l/erasure_code/$(am__dirstamp) \
+	erasure-code/isa/isa-l/erasure_code/$(DEPDIR)/$(am__dirstamp)
+erasure-code/isa/isa-l/erasure_code/libisa_la-gf_5vect_dot_prod_avx.asm.lo:  \
+	erasure-code/isa/isa-l/erasure_code/$(am__dirstamp) \
+	erasure-code/isa/isa-l/erasure_code/$(DEPDIR)/$(am__dirstamp)
+erasure-code/isa/isa-l/erasure_code/libisa_la-gf_5vect_dot_prod_sse.asm.lo:  \
+	erasure-code/isa/isa-l/erasure_code/$(am__dirstamp) \
+	erasure-code/isa/isa-l/erasure_code/$(DEPDIR)/$(am__dirstamp)
+erasure-code/isa/isa-l/erasure_code/libisa_la-gf_6vect_dot_prod_avx2.asm.lo:  \
+	erasure-code/isa/isa-l/erasure_code/$(am__dirstamp) \
+	erasure-code/isa/isa-l/erasure_code/$(DEPDIR)/$(am__dirstamp)
+erasure-code/isa/isa-l/erasure_code/libisa_la-gf_6vect_dot_prod_avx.asm.lo:  \
+	erasure-code/isa/isa-l/erasure_code/$(am__dirstamp) \
+	erasure-code/isa/isa-l/erasure_code/$(DEPDIR)/$(am__dirstamp)
+erasure-code/isa/isa-l/erasure_code/libisa_la-gf_6vect_dot_prod_sse.asm.lo:  \
+	erasure-code/isa/isa-l/erasure_code/$(am__dirstamp) \
+	erasure-code/isa/isa-l/erasure_code/$(DEPDIR)/$(am__dirstamp)
+erasure-code/isa/isa-l/erasure_code/libisa_la-gf_vect_dot_prod_avx2.asm.lo:  \
+	erasure-code/isa/isa-l/erasure_code/$(am__dirstamp) \
+	erasure-code/isa/isa-l/erasure_code/$(DEPDIR)/$(am__dirstamp)
+erasure-code/isa/isa-l/erasure_code/libisa_la-gf_vect_dot_prod_avx.asm.lo:  \
+	erasure-code/isa/isa-l/erasure_code/$(am__dirstamp) \
+	erasure-code/isa/isa-l/erasure_code/$(DEPDIR)/$(am__dirstamp)
+erasure-code/isa/isa-l/erasure_code/libisa_la-gf_vect_dot_prod_sse.asm.lo:  \
+	erasure-code/isa/isa-l/erasure_code/$(am__dirstamp) \
+	erasure-code/isa/isa-l/erasure_code/$(DEPDIR)/$(am__dirstamp)
+erasure-code/isa/isa-l/erasure_code/libisa_la-gf_2vect_mad_avx2.asm.lo:  \
+	erasure-code/isa/isa-l/erasure_code/$(am__dirstamp) \
+	erasure-code/isa/isa-l/erasure_code/$(DEPDIR)/$(am__dirstamp)
+erasure-code/isa/isa-l/erasure_code/libisa_la-gf_2vect_mad_avx.asm.lo:  \
+	erasure-code/isa/isa-l/erasure_code/$(am__dirstamp) \
+	erasure-code/isa/isa-l/erasure_code/$(DEPDIR)/$(am__dirstamp)
+erasure-code/isa/isa-l/erasure_code/libisa_la-gf_2vect_mad_sse.asm.lo:  \
+	erasure-code/isa/isa-l/erasure_code/$(am__dirstamp) \
+	erasure-code/isa/isa-l/erasure_code/$(DEPDIR)/$(am__dirstamp)
+erasure-code/isa/isa-l/erasure_code/libisa_la-gf_3vect_mad_avx2.asm.lo:  \
+	erasure-code/isa/isa-l/erasure_code/$(am__dirstamp) \
+	erasure-code/isa/isa-l/erasure_code/$(DEPDIR)/$(am__dirstamp)
+erasure-code/isa/isa-l/erasure_code/libisa_la-gf_3vect_mad_avx.asm.lo:  \
+	erasure-code/isa/isa-l/erasure_code/$(am__dirstamp) \
+	erasure-code/isa/isa-l/erasure_code/$(DEPDIR)/$(am__dirstamp)
+erasure-code/isa/isa-l/erasure_code/libisa_la-gf_3vect_mad_sse.asm.lo:  \
+	erasure-code/isa/isa-l/erasure_code/$(am__dirstamp) \
+	erasure-code/isa/isa-l/erasure_code/$(DEPDIR)/$(am__dirstamp)
+erasure-code/isa/isa-l/erasure_code/libisa_la-gf_4vect_mad_avx2.asm.lo:  \
+	erasure-code/isa/isa-l/erasure_code/$(am__dirstamp) \
+	erasure-code/isa/isa-l/erasure_code/$(DEPDIR)/$(am__dirstamp)
+erasure-code/isa/isa-l/erasure_code/libisa_la-gf_4vect_mad_avx.asm.lo:  \
+	erasure-code/isa/isa-l/erasure_code/$(am__dirstamp) \
+	erasure-code/isa/isa-l/erasure_code/$(DEPDIR)/$(am__dirstamp)
+erasure-code/isa/isa-l/erasure_code/libisa_la-gf_4vect_mad_sse.asm.lo:  \
+	erasure-code/isa/isa-l/erasure_code/$(am__dirstamp) \
+	erasure-code/isa/isa-l/erasure_code/$(DEPDIR)/$(am__dirstamp)
+erasure-code/isa/isa-l/erasure_code/libisa_la-gf_5vect_mad_avx2.asm.lo:  \
+	erasure-code/isa/isa-l/erasure_code/$(am__dirstamp) \
+	erasure-code/isa/isa-l/erasure_code/$(DEPDIR)/$(am__dirstamp)
+erasure-code/isa/isa-l/erasure_code/libisa_la-gf_5vect_mad_avx.asm.lo:  \
+	erasure-code/isa/isa-l/erasure_code/$(am__dirstamp) \
+	erasure-code/isa/isa-l/erasure_code/$(DEPDIR)/$(am__dirstamp)
+erasure-code/isa/isa-l/erasure_code/libisa_la-gf_5vect_mad_sse.asm.lo:  \
+	erasure-code/isa/isa-l/erasure_code/$(am__dirstamp) \
+	erasure-code/isa/isa-l/erasure_code/$(DEPDIR)/$(am__dirstamp)
+erasure-code/isa/isa-l/erasure_code/libisa_la-gf_6vect_mad_avx2.asm.lo:  \
+	erasure-code/isa/isa-l/erasure_code/$(am__dirstamp) \
+	erasure-code/isa/isa-l/erasure_code/$(DEPDIR)/$(am__dirstamp)
+erasure-code/isa/isa-l/erasure_code/libisa_la-gf_6vect_mad_avx.asm.lo:  \
+	erasure-code/isa/isa-l/erasure_code/$(am__dirstamp) \
+	erasure-code/isa/isa-l/erasure_code/$(DEPDIR)/$(am__dirstamp)
+erasure-code/isa/isa-l/erasure_code/libisa_la-gf_6vect_mad_sse.asm.lo:  \
+	erasure-code/isa/isa-l/erasure_code/$(am__dirstamp) \
+	erasure-code/isa/isa-l/erasure_code/$(DEPDIR)/$(am__dirstamp)
+erasure-code/isa/isa-l/erasure_code/libisa_la-gf_vect_mad_avx2.asm.lo:  \
+	erasure-code/isa/isa-l/erasure_code/$(am__dirstamp) \
+	erasure-code/isa/isa-l/erasure_code/$(DEPDIR)/$(am__dirstamp)
+erasure-code/isa/isa-l/erasure_code/libisa_la-gf_vect_mad_avx.asm.lo:  \
+	erasure-code/isa/isa-l/erasure_code/$(am__dirstamp) \
+	erasure-code/isa/isa-l/erasure_code/$(DEPDIR)/$(am__dirstamp)
+erasure-code/isa/isa-l/erasure_code/libisa_la-gf_vect_mad_sse.asm.lo:  \
+	erasure-code/isa/isa-l/erasure_code/$(am__dirstamp) \
+	erasure-code/isa/isa-l/erasure_code/$(DEPDIR)/$(am__dirstamp)
+erasure-code/isa/isa-l/erasure_code/libisa_la-gf_vect_mul_avx.asm.lo:  \
+	erasure-code/isa/isa-l/erasure_code/$(am__dirstamp) \
+	erasure-code/isa/isa-l/erasure_code/$(DEPDIR)/$(am__dirstamp)
+erasure-code/isa/isa-l/erasure_code/libisa_la-gf_vect_mul_sse.asm.lo:  \
+	erasure-code/isa/isa-l/erasure_code/$(am__dirstamp) \
+	erasure-code/isa/isa-l/erasure_code/$(DEPDIR)/$(am__dirstamp)
+erasure-code/isa/$(am__dirstamp):
+	@$(MKDIR_P) erasure-code/isa
+	@: > erasure-code/isa/$(am__dirstamp)
+erasure-code/isa/$(DEPDIR)/$(am__dirstamp):
+	@$(MKDIR_P) erasure-code/isa/$(DEPDIR)
+	@: > erasure-code/isa/$(DEPDIR)/$(am__dirstamp)
+erasure-code/isa/libisa_la-ErasureCodeIsa.lo:  \
+	erasure-code/isa/$(am__dirstamp) \
+	erasure-code/isa/$(DEPDIR)/$(am__dirstamp)
+erasure-code/isa/libisa_la-ErasureCodeIsaTableCache.lo:  \
+	erasure-code/isa/$(am__dirstamp) \
+	erasure-code/isa/$(DEPDIR)/$(am__dirstamp)
+erasure-code/isa/libisa_la-ErasureCodePluginIsa.lo:  \
+	erasure-code/isa/$(am__dirstamp) \
+	erasure-code/isa/$(DEPDIR)/$(am__dirstamp)
+erasure-code/isa/libisa_la-xor_op.lo:  \
+	erasure-code/isa/$(am__dirstamp) \
+	erasure-code/isa/$(DEPDIR)/$(am__dirstamp)
+
+libisa.la: $(libisa_la_OBJECTS) $(libisa_la_DEPENDENCIES) $(EXTRA_libisa_la_DEPENDENCIES) 
+	$(AM_V_CXXLD)$(libisa_la_LINK) $(am_libisa_la_rpath) $(libisa_la_OBJECTS) $(libisa_la_LIBADD) $(LIBS)
 journal/$(am__dirstamp):
 	@$(MKDIR_P) journal
 	@: > journal/$(am__dirstamp)
@@ -13859,6 +14430,42 @@ librbd/operation/TrimRequest.lo: librbd/operation/$(am__dirstamp) \
 
 librbd_internal.la: $(librbd_internal_la_OBJECTS) $(librbd_internal_la_DEPENDENCIES) $(EXTRA_librbd_internal_la_DEPENDENCIES) 
 	$(AM_V_CXXLD)$(CXXLINK) $(am_librbd_internal_la_rpath) $(librbd_internal_la_OBJECTS) $(librbd_internal_la_LIBADD) $(LIBS)
+tools/rbd_mirror/$(am__dirstamp):
+	@$(MKDIR_P) tools/rbd_mirror
+	@: > tools/rbd_mirror/$(am__dirstamp)
+tools/rbd_mirror/$(DEPDIR)/$(am__dirstamp):
+	@$(MKDIR_P) tools/rbd_mirror/$(DEPDIR)
+	@: > tools/rbd_mirror/$(DEPDIR)/$(am__dirstamp)
+tools/rbd_mirror/ClusterWatcher.lo: tools/rbd_mirror/$(am__dirstamp) \
+	tools/rbd_mirror/$(DEPDIR)/$(am__dirstamp)
+tools/rbd_mirror/ImageReplayer.lo: tools/rbd_mirror/$(am__dirstamp) \
+	tools/rbd_mirror/$(DEPDIR)/$(am__dirstamp)
+tools/rbd_mirror/Mirror.lo: tools/rbd_mirror/$(am__dirstamp) \
+	tools/rbd_mirror/$(DEPDIR)/$(am__dirstamp)
+tools/rbd_mirror/PoolWatcher.lo: tools/rbd_mirror/$(am__dirstamp) \
+	tools/rbd_mirror/$(DEPDIR)/$(am__dirstamp)
+tools/rbd_mirror/Replayer.lo: tools/rbd_mirror/$(am__dirstamp) \
+	tools/rbd_mirror/$(DEPDIR)/$(am__dirstamp)
+tools/rbd_mirror/types.lo: tools/rbd_mirror/$(am__dirstamp) \
+	tools/rbd_mirror/$(DEPDIR)/$(am__dirstamp)
+
+librbd_mirror_internal.la: $(librbd_mirror_internal_la_OBJECTS) $(librbd_mirror_internal_la_DEPENDENCIES) $(EXTRA_librbd_mirror_internal_la_DEPENDENCIES) 
+	$(AM_V_CXXLD)$(CXXLINK) $(am_librbd_mirror_internal_la_rpath) $(librbd_mirror_internal_la_OBJECTS) $(librbd_mirror_internal_la_LIBADD) $(LIBS)
+test/rbd_mirror/$(am__dirstamp):
+	@$(MKDIR_P) test/rbd_mirror
+	@: > test/rbd_mirror/$(am__dirstamp)
+test/rbd_mirror/$(DEPDIR)/$(am__dirstamp):
+	@$(MKDIR_P) test/rbd_mirror/$(DEPDIR)
+	@: > test/rbd_mirror/$(DEPDIR)/$(am__dirstamp)
+test/rbd_mirror/librbd_mirror_test_la-test_ClusterWatcher.lo:  \
+	test/rbd_mirror/$(am__dirstamp) \
+	test/rbd_mirror/$(DEPDIR)/$(am__dirstamp)
+test/rbd_mirror/librbd_mirror_test_la-test_PoolWatcher.lo:  \
+	test/rbd_mirror/$(am__dirstamp) \
+	test/rbd_mirror/$(DEPDIR)/$(am__dirstamp)
+
+librbd_mirror_test.la: $(librbd_mirror_test_la_OBJECTS) $(librbd_mirror_test_la_DEPENDENCIES) $(EXTRA_librbd_mirror_test_la_DEPENDENCIES) 
+	$(AM_V_CXXLD)$(librbd_mirror_test_la_LINK) $(am_librbd_mirror_test_la_rpath) $(librbd_mirror_test_la_OBJECTS) $(librbd_mirror_test_la_LIBADD) $(LIBS)
 rbd_replay/$(am__dirstamp):
 	@$(MKDIR_P) rbd_replay
 	@: > rbd_replay/$(am__dirstamp)
@@ -13934,7 +14541,7 @@ tracing/librbd_tp_la-librbd.lo: tracing/$(am__dirstamp) \
 
 librbd_tp.la: $(librbd_tp_la_OBJECTS) $(librbd_tp_la_DEPENDENCIES) $(EXTRA_librbd_tp_la_DEPENDENCIES) 
 	$(AM_V_CCLD)$(librbd_tp_la_LINK) $(am_librbd_tp_la_rpath) $(librbd_tp_la_OBJECTS) $(librbd_tp_la_LIBADD) $(LIBS)
-librbd/journal/Entries.lo: librbd/journal/$(am__dirstamp) \
+librbd/journal/Types.lo: librbd/journal/$(am__dirstamp) \
 	librbd/journal/$(DEPDIR)/$(am__dirstamp)
 librbd/WatchNotifyTypes.lo: librbd/$(am__dirstamp) \
 	librbd/$(DEPDIR)/$(am__dirstamp)
@@ -13959,6 +14566,8 @@ rgw/librgw_la-rgw_usage.lo: rgw/$(am__dirstamp) \
 	rgw/$(DEPDIR)/$(am__dirstamp)
 rgw/librgw_la-rgw_json_enc.lo: rgw/$(am__dirstamp) \
 	rgw/$(DEPDIR)/$(am__dirstamp)
+rgw/librgw_la-rgw_xml_enc.lo: rgw/$(am__dirstamp) \
+	rgw/$(DEPDIR)/$(am__dirstamp)
 rgw/librgw_la-rgw_user.lo: rgw/$(am__dirstamp) \
 	rgw/$(DEPDIR)/$(am__dirstamp)
 rgw/librgw_la-rgw_bucket.lo: rgw/$(am__dirstamp) \
@@ -14013,6 +14622,8 @@ rgw/librgw_la-rgw_dencoder.lo: rgw/$(am__dirstamp) \
 	rgw/$(DEPDIR)/$(am__dirstamp)
 rgw/librgw_la-rgw_object_expirer_core.lo: rgw/$(am__dirstamp) \
 	rgw/$(DEPDIR)/$(am__dirstamp)
+rgw/librgw_la-rgw_website.lo: rgw/$(am__dirstamp) \
+	rgw/$(DEPDIR)/$(am__dirstamp)
 
 librgw.la: $(librgw_la_OBJECTS) $(librgw_la_DEPENDENCIES) $(EXTRA_librgw_la_DEPENDENCIES) 
 	$(AM_V_CXXLD)$(librgw_la_LINK) $(am_librgw_la_rpath) $(librgw_la_OBJECTS) $(librgw_la_LIBADD) $(LIBS)
@@ -15166,6 +15777,13 @@ test/system/st_rados_notify.$(OBJEXT): test/system/$(am__dirstamp) \
 ceph_test_rados_watch_notify$(EXEEXT): $(ceph_test_rados_watch_notify_OBJECTS) $(ceph_test_rados_watch_notify_DEPENDENCIES) $(EXTRA_ceph_test_rados_watch_notify_DEPENDENCIES) 
 	@rm -f ceph_test_rados_watch_notify$(EXEEXT)
 	$(AM_V_CXXLD)$(CXXLINK) $(ceph_test_rados_watch_notify_OBJECTS) $(ceph_test_rados_watch_notify_LDADD) $(LIBS)
+test/rbd_mirror/ceph_test_rbd_mirror-test_main.$(OBJEXT):  \
+	test/rbd_mirror/$(am__dirstamp) \
+	test/rbd_mirror/$(DEPDIR)/$(am__dirstamp)
+
+ceph_test_rbd_mirror$(EXEEXT): $(ceph_test_rbd_mirror_OBJECTS) $(ceph_test_rbd_mirror_DEPENDENCIES) $(EXTRA_ceph_test_rbd_mirror_DEPENDENCIES) 
+	@rm -f ceph_test_rbd_mirror$(EXEEXT)
+	$(AM_V_CXXLD)$(ceph_test_rbd_mirror_LINK) $(ceph_test_rbd_mirror_OBJECTS) $(ceph_test_rbd_mirror_LDADD) $(LIBS)
 test/test_rewrite_latency.$(OBJEXT): test/$(am__dirstamp) \
 	test/$(DEPDIR)/$(am__dirstamp)
 
@@ -15499,6 +16117,12 @@ rbd_fuse/rbd_fuse-rbd-fuse.$(OBJEXT): rbd_fuse/$(am__dirstamp) \
 rbd-fuse$(EXEEXT): $(rbd_fuse_OBJECTS) $(rbd_fuse_DEPENDENCIES) $(EXTRA_rbd_fuse_DEPENDENCIES) 
 	@rm -f rbd-fuse$(EXEEXT)
 	$(AM_V_CXXLD)$(rbd_fuse_LINK) $(rbd_fuse_OBJECTS) $(rbd_fuse_LDADD) $(LIBS)
+tools/rbd_mirror/main.$(OBJEXT): tools/rbd_mirror/$(am__dirstamp) \
+	tools/rbd_mirror/$(DEPDIR)/$(am__dirstamp)
+
+rbd-mirror$(EXEEXT): $(rbd_mirror_OBJECTS) $(rbd_mirror_DEPENDENCIES) $(EXTRA_rbd_mirror_DEPENDENCIES) 
+	@rm -f rbd-mirror$(EXEEXT)
+	$(AM_V_CXXLD)$(CXXLINK) $(rbd_mirror_OBJECTS) $(rbd_mirror_LDADD) $(LIBS)
 tools/rbd_nbd/$(am__dirstamp):
 	@$(MKDIR_P) tools/rbd_nbd
 	@: > tools/rbd_nbd/$(am__dirstamp)
@@ -15604,6 +16228,8 @@ rgw/test_build_librgw-rgw_usage.$(OBJEXT): rgw/$(am__dirstamp) \
 	rgw/$(DEPDIR)/$(am__dirstamp)
 rgw/test_build_librgw-rgw_json_enc.$(OBJEXT): rgw/$(am__dirstamp) \
 	rgw/$(DEPDIR)/$(am__dirstamp)
+rgw/test_build_librgw-rgw_xml_enc.$(OBJEXT): rgw/$(am__dirstamp) \
+	rgw/$(DEPDIR)/$(am__dirstamp)
 rgw/test_build_librgw-rgw_user.$(OBJEXT): rgw/$(am__dirstamp) \
 	rgw/$(DEPDIR)/$(am__dirstamp)
 rgw/test_build_librgw-rgw_bucket.$(OBJEXT): rgw/$(am__dirstamp) \
@@ -15658,6 +16284,8 @@ rgw/test_build_librgw-rgw_dencoder.$(OBJEXT): rgw/$(am__dirstamp) \
 	rgw/$(DEPDIR)/$(am__dirstamp)
 rgw/test_build_librgw-rgw_object_expirer_core.$(OBJEXT):  \
 	rgw/$(am__dirstamp) rgw/$(DEPDIR)/$(am__dirstamp)
+rgw/test_build_librgw-rgw_website.$(OBJEXT): rgw/$(am__dirstamp) \
+	rgw/$(DEPDIR)/$(am__dirstamp)
 
 test_build_librgw$(EXEEXT): $(test_build_librgw_OBJECTS) $(test_build_librgw_DEPENDENCIES) $(EXTRA_test_build_librgw_DEPENDENCIES) 
 	@rm -f test_build_librgw$(EXEEXT)
@@ -15782,6 +16410,22 @@ compressor/snappy/unittest_compression_plugin_snappy-CompressionPluginSnappy.$(O
 unittest_compression_plugin_snappy$(EXEEXT): $(unittest_compression_plugin_snappy_OBJECTS) $(unittest_compression_plugin_snappy_DEPENDENCIES) $(EXTRA_unittest_compression_plugin_snappy_DEPENDENCIES) 
 	@rm -f unittest_compression_plugin_snappy$(EXEEXT)
 	$(AM_V_CXXLD)$(unittest_compression_plugin_snappy_LINK) $(unittest_compression_plugin_snappy_OBJECTS) $(unittest_compression_plugin_snappy_LDADD) $(LIBS)
+test/compressor/unittest_compression_plugin_zlib-test_compression_plugin_zlib.$(OBJEXT):  \
+	test/compressor/$(am__dirstamp) \
+	test/compressor/$(DEPDIR)/$(am__dirstamp)
+compressor/unittest_compression_plugin_zlib-Compressor.$(OBJEXT):  \
+	compressor/$(am__dirstamp) \
+	compressor/$(DEPDIR)/$(am__dirstamp)
+compressor/zlib/unittest_compression_plugin_zlib-CompressionPluginZlib.$(OBJEXT):  \
+	compressor/zlib/$(am__dirstamp) \
+	compressor/zlib/$(DEPDIR)/$(am__dirstamp)
+compressor/zlib/unittest_compression_plugin_zlib-CompressionZlib.$(OBJEXT):  \
+	compressor/zlib/$(am__dirstamp) \
+	compressor/zlib/$(DEPDIR)/$(am__dirstamp)
+
+unittest_compression_plugin_zlib$(EXEEXT): $(unittest_compression_plugin_zlib_OBJECTS) $(unittest_compression_plugin_zlib_DEPENDENCIES) $(EXTRA_unittest_compression_plugin_zlib_DEPENDENCIES) 
+	@rm -f unittest_compression_plugin_zlib$(EXEEXT)
+	$(AM_V_CXXLD)$(unittest_compression_plugin_zlib_LINK) $(unittest_compression_plugin_zlib_OBJECTS) $(unittest_compression_plugin_zlib_LDADD) $(LIBS)
 test/compressor/unittest_compression_snappy-test_compression_snappy.$(OBJEXT):  \
 	test/compressor/$(am__dirstamp) \
 	test/compressor/$(DEPDIR)/$(am__dirstamp)
@@ -15795,6 +16439,22 @@ compressor/snappy/unittest_compression_snappy-CompressionPluginSnappy.$(OBJEXT):
 unittest_compression_snappy$(EXEEXT): $(unittest_compression_snappy_OBJECTS) $(unittest_compression_snappy_DEPENDENCIES) $(EXTRA_unittest_compression_snappy_DEPENDENCIES) 
 	@rm -f unittest_compression_snappy$(EXEEXT)
 	$(AM_V_CXXLD)$(unittest_compression_snappy_LINK) $(unittest_compression_snappy_OBJECTS) $(unittest_compression_snappy_LDADD) $(LIBS)
+test/compressor/unittest_compression_zlib-test_compression_zlib.$(OBJEXT):  \
+	test/compressor/$(am__dirstamp) \
+	test/compressor/$(DEPDIR)/$(am__dirstamp)
+compressor/unittest_compression_zlib-Compressor.$(OBJEXT):  \
+	compressor/$(am__dirstamp) \
+	compressor/$(DEPDIR)/$(am__dirstamp)
+compressor/zlib/unittest_compression_zlib-CompressionPluginZlib.$(OBJEXT):  \
+	compressor/zlib/$(am__dirstamp) \
+	compressor/zlib/$(DEPDIR)/$(am__dirstamp)
+compressor/zlib/unittest_compression_zlib-CompressionZlib.$(OBJEXT):  \
+	compressor/zlib/$(am__dirstamp) \
+	compressor/zlib/$(DEPDIR)/$(am__dirstamp)
+
+unittest_compression_zlib$(EXEEXT): $(unittest_compression_zlib_OBJECTS) $(unittest_compression_zlib_DEPENDENCIES) $(EXTRA_unittest_compression_zlib_DEPENDENCIES) 
+	@rm -f unittest_compression_zlib$(EXEEXT)
+	$(AM_V_CXXLD)$(unittest_compression_zlib_LINK) $(unittest_compression_zlib_OBJECTS) $(unittest_compression_zlib_LDADD) $(LIBS)
 test/common/unittest_config-test_config.$(OBJEXT):  \
 	test/common/$(am__dirstamp) \
 	test/common/$(DEPDIR)/$(am__dirstamp)
@@ -16341,6 +17001,13 @@ test/osd/unittest_hitset-hitset.$(OBJEXT): test/osd/$(am__dirstamp) \
 unittest_hitset$(EXEEXT): $(unittest_hitset_OBJECTS) $(unittest_hitset_DEPENDENCIES) $(EXTRA_unittest_hitset_DEPENDENCIES) 
 	@rm -f unittest_hitset$(EXEEXT)
 	$(AM_V_CXXLD)$(unittest_hitset_LINK) $(unittest_hitset_OBJECTS) $(unittest_hitset_LDADD) $(LIBS)
+test/common/unittest_interval_set-test_interval_set.$(OBJEXT):  \
+	test/common/$(am__dirstamp) \
+	test/common/$(DEPDIR)/$(am__dirstamp)
+
+unittest_interval_set$(EXEEXT): $(unittest_interval_set_OBJECTS) $(unittest_interval_set_DEPENDENCIES) $(EXTRA_unittest_interval_set_DEPENDENCIES) 
+	@rm -f unittest_interval_set$(EXEEXT)
+	$(AM_V_CXXLD)$(unittest_interval_set_LINK) $(unittest_interval_set_OBJECTS) $(unittest_interval_set_LDADD) $(LIBS)
 test/common/unittest_io_priority-test_io_priority.$(OBJEXT):  \
 	test/common/$(am__dirstamp) \
 	test/common/$(DEPDIR)/$(am__dirstamp)
@@ -16453,6 +17120,15 @@ test/librbd/exclusive_lock/unittest_librbd-test_mock_AcquireRequest.$(OBJEXT):
 test/librbd/exclusive_lock/unittest_librbd-test_mock_ReleaseRequest.$(OBJEXT):  \
 	test/librbd/exclusive_lock/$(am__dirstamp) \
 	test/librbd/exclusive_lock/$(DEPDIR)/$(am__dirstamp)
+test/librbd/image/$(am__dirstamp):
+	@$(MKDIR_P) test/librbd/image
+	@: > test/librbd/image/$(am__dirstamp)
+test/librbd/image/$(DEPDIR)/$(am__dirstamp):
+	@$(MKDIR_P) test/librbd/image/$(DEPDIR)
+	@: > test/librbd/image/$(DEPDIR)/$(am__dirstamp)
+test/librbd/image/unittest_librbd-test_mock_RefreshRequest.$(OBJEXT):  \
+	test/librbd/image/$(am__dirstamp) \
+	test/librbd/image/$(DEPDIR)/$(am__dirstamp)
 test/librbd/journal/unittest_librbd-test_mock_Replay.$(OBJEXT):  \
 	test/librbd/journal/$(am__dirstamp) \
 	test/librbd/journal/$(DEPDIR)/$(am__dirstamp)
@@ -16572,6 +17248,13 @@ test/mon/unittest_mon_pgmap-PGMap.$(OBJEXT): test/mon/$(am__dirstamp) \
 unittest_mon_pgmap$(EXEEXT): $(unittest_mon_pgmap_OBJECTS) $(unittest_mon_pgmap_DEPENDENCIES) $(EXTRA_unittest_mon_pgmap_DEPENDENCIES) 
 	@rm -f unittest_mon_pgmap$(EXEEXT)
 	$(AM_V_CXXLD)$(unittest_mon_pgmap_LINK) $(unittest_mon_pgmap_OBJECTS) $(unittest_mon_pgmap_LDADD) $(LIBS)
+test/common/unittest_mutex_debug-test_mutex_debug.$(OBJEXT):  \
+	test/common/$(am__dirstamp) \
+	test/common/$(DEPDIR)/$(am__dirstamp)
+
+unittest_mutex_debug$(EXEEXT): $(unittest_mutex_debug_OBJECTS) $(unittest_mutex_debug_DEPENDENCIES) $(EXTRA_unittest_mutex_debug_DEPENDENCIES) 
+	@rm -f unittest_mutex_debug$(EXEEXT)
+	$(AM_V_CXXLD)$(unittest_mutex_debug_LINK) $(unittest_mutex_debug_OBJECTS) $(unittest_mutex_debug_LDADD) $(LIBS)
 test/on_exit.$(OBJEXT): test/$(am__dirstamp) \
 	test/$(DEPDIR)/$(am__dirstamp)
 
@@ -16633,6 +17316,13 @@ test/common/unittest_prioritized_queue-test_prioritized_queue.$(OBJEXT):  \
 unittest_prioritized_queue$(EXEEXT): $(unittest_prioritized_queue_OBJECTS) $(unittest_prioritized_queue_DEPENDENCIES) $(EXTRA_unittest_prioritized_queue_DEPENDENCIES) 
 	@rm -f unittest_prioritized_queue$(EXEEXT)
 	$(AM_V_CXXLD)$(unittest_prioritized_queue_LINK) $(unittest_prioritized_queue_OBJECTS) $(unittest_prioritized_queue_LDADD) $(LIBS)
+test/rbd_mirror/unittest_rbd_mirror-test_main.$(OBJEXT):  \
+	test/rbd_mirror/$(am__dirstamp) \
+	test/rbd_mirror/$(DEPDIR)/$(am__dirstamp)
+
+unittest_rbd_mirror$(EXEEXT): $(unittest_rbd_mirror_OBJECTS) $(unittest_rbd_mirror_DEPENDENCIES) $(EXTRA_unittest_rbd_mirror_DEPENDENCIES) 
+	@rm -f unittest_rbd_mirror$(EXEEXT)
+	$(AM_V_CXXLD)$(unittest_rbd_mirror_LINK) $(unittest_rbd_mirror_OBJECTS) $(unittest_rbd_mirror_LDADD) $(LIBS)
 test/unittest_rbd_replay-test_rbd_replay.$(OBJEXT):  \
 	test/$(am__dirstamp) test/$(DEPDIR)/$(am__dirstamp)
 
@@ -16687,6 +17377,13 @@ test/common/unittest_sharedptr_registry-test_sharedptr_registry.$(OBJEXT):  \
 unittest_sharedptr_registry$(EXEEXT): $(unittest_sharedptr_registry_OBJECTS) $(unittest_sharedptr_registry_DEPENDENCIES) $(EXTRA_unittest_sharedptr_registry_DEPENDENCIES) 
 	@rm -f unittest_sharedptr_registry$(EXEEXT)
 	$(AM_V_CXXLD)$(unittest_sharedptr_registry_LINK) $(unittest_sharedptr_registry_OBJECTS) $(unittest_sharedptr_registry_LDADD) $(LIBS)
+test/common/unittest_shunique_lock-test_shunique_lock.$(OBJEXT):  \
+	test/common/$(am__dirstamp) \
+	test/common/$(DEPDIR)/$(am__dirstamp)
+
+unittest_shunique_lock$(EXEEXT): $(unittest_shunique_lock_OBJECTS) $(unittest_shunique_lock_DEPENDENCIES) $(EXTRA_unittest_shunique_lock_DEPENDENCIES) 
+	@rm -f unittest_shunique_lock$(EXEEXT)
+	$(AM_V_CXXLD)$(unittest_shunique_lock_LINK) $(unittest_shunique_lock_OBJECTS) $(unittest_shunique_lock_LDADD) $(LIBS)
 test/unittest_signals-signals.$(OBJEXT): test/$(am__dirstamp) \
 	test/$(DEPDIR)/$(am__dirstamp)
 
@@ -16764,6 +17461,13 @@ test/common/unittest_time-test_time.$(OBJEXT):  \
 unittest_time$(EXEEXT): $(unittest_time_OBJECTS) $(unittest_time_DEPENDENCIES) $(EXTRA_unittest_time_DEPENDENCIES) 
 	@rm -f unittest_time$(EXEEXT)
 	$(AM_V_CXXLD)$(unittest_time_LINK) $(unittest_time_OBJECTS) $(unittest_time_LDADD) $(LIBS)
+test/objectstore/unittest_transaction-test_transaction.$(OBJEXT):  \
+	test/objectstore/$(am__dirstamp) \
+	test/objectstore/$(DEPDIR)/$(am__dirstamp)
+
+unittest_transaction$(EXEEXT): $(unittest_transaction_OBJECTS) $(unittest_transaction_DEPENDENCIES) $(EXTRA_unittest_transaction_DEPENDENCIES) 
+	@rm -f unittest_transaction$(EXEEXT)
+	$(AM_V_CXXLD)$(unittest_transaction_LINK) $(unittest_transaction_OBJECTS) $(unittest_transaction_LDADD) $(LIBS)
 test/unittest_utf8-utf8.$(OBJEXT): test/$(am__dirstamp) \
 	test/$(DEPDIR)/$(am__dirstamp)
 
@@ -16777,6 +17481,13 @@ test/common/unittest_util-test_util.$(OBJEXT):  \
 unittest_util$(EXEEXT): $(unittest_util_OBJECTS) $(unittest_util_DEPENDENCIES) $(EXTRA_unittest_util_DEPENDENCIES) 
 	@rm -f unittest_util$(EXEEXT)
 	$(AM_V_CXXLD)$(unittest_util_LINK) $(unittest_util_OBJECTS) $(unittest_util_LDADD) $(LIBS)
+test/common/unittest_weighted_priority_queue-test_weighted_priority_queue.$(OBJEXT):  \
+	test/common/$(am__dirstamp) \
+	test/common/$(DEPDIR)/$(am__dirstamp)
+
+unittest_weighted_priority_queue$(EXEEXT): $(unittest_weighted_priority_queue_OBJECTS) $(unittest_weighted_priority_queue_DEPENDENCIES) $(EXTRA_unittest_weighted_priority_queue_DEPENDENCIES) 
+	@rm -f unittest_weighted_priority_queue$(EXEEXT)
+	$(AM_V_CXXLD)$(unittest_weighted_priority_queue_LINK) $(unittest_weighted_priority_queue_OBJECTS) $(unittest_weighted_priority_queue_LDADD) $(LIBS)
 test/unittest_workqueue-test_workqueue.$(OBJEXT):  \
 	test/$(am__dirstamp) test/$(DEPDIR)/$(am__dirstamp)
 
@@ -17019,41 +17730,6 @@ uninstall-sbinSCRIPTS:
 	files=`for p in $$list; do echo "$$p"; done | \
 	       sed -e 's,.*/,,;$(transform)'`; \
 	dir='$(DESTDIR)$(sbindir)'; $(am__uninstall_files_from_dir)
-install-shell_commonSCRIPTS: $(shell_common_SCRIPTS)
-	@$(NORMAL_INSTALL)
-	@list='$(shell_common_SCRIPTS)'; test -n "$(shell_commondir)" || list=; \
-	if test -n "$$list"; then \
-	  echo " $(MKDIR_P) '$(DESTDIR)$(shell_commondir)'"; \
-	  $(MKDIR_P) "$(DESTDIR)$(shell_commondir)" || exit 1; \
-	fi; \
-	for p in $$list; do \
-	  if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \
-	  if test -f "$$d$$p"; then echo "$$d$$p"; echo "$$p"; else :; fi; \
-	done | \
-	sed -e 'p;s,.*/,,;n' \
-	    -e 'h;s|.*|.|' \
-	    -e 'p;x;s,.*/,,;$(transform)' | sed 'N;N;N;s,\n, ,g' | \
-	$(AWK) 'BEGIN { files["."] = ""; dirs["."] = 1; } \
-	  { d=$$3; if (dirs[d] != 1) { print "d", d; dirs[d] = 1 } \
-	    if ($$2 == $$4) { files[d] = files[d] " " $$1; \
-	      if (++n[d] == $(am__install_max)) { \
-		print "f", d, files[d]; n[d] = 0; files[d] = "" } } \
-	    else { print "f", d "/" $$4, $$1 } } \
-	  END { for (d in files) print "f", d, files[d] }' | \
-	while read type dir files; do \
-	     if test "$$dir" = .; then dir=; else dir=/$$dir; fi; \
-	     test -z "$$files" || { \
-	       echo " $(INSTALL_SCRIPT) $$files '$(DESTDIR)$(shell_commondir)$$dir'"; \
-	       $(INSTALL_SCRIPT) $$files "$(DESTDIR)$(shell_commondir)$$dir" || exit $$?; \
-	     } \
-	; done
-
-uninstall-shell_commonSCRIPTS:
-	@$(NORMAL_UNINSTALL)
-	@list='$(shell_common_SCRIPTS)'; test -n "$(shell_commondir)" || exit 0; \
-	files=`for p in $$list; do echo "$$p"; done | \
-	       sed -e 's,.*/,,;$(transform)'`; \
-	dir='$(DESTDIR)$(shell_commondir)'; $(am__uninstall_files_from_dir)
 install-su_sbinSCRIPTS: $(su_sbin_SCRIPTS)
 	@$(NORMAL_INSTALL)
 	@list='$(su_sbin_SCRIPTS)'; test -n "$(su_sbindir)" || list=; \
@@ -17140,6 +17816,8 @@ mostlyclean-compile:
 	-rm -f compressor/*.lo
 	-rm -f compressor/snappy/*.$(OBJEXT)
 	-rm -f compressor/snappy/*.lo
+	-rm -f compressor/zlib/*.$(OBJEXT)
+	-rm -f compressor/zlib/*.lo
 	-rm -f crush/*.$(OBJEXT)
 	-rm -f crush/*.lo
 	-rm -f erasure-code/*.$(OBJEXT)
@@ -17207,7 +17885,6 @@ mostlyclean-compile:
 	-rm -f os/bluestore/*.$(OBJEXT)
 	-rm -f os/filestore/*.$(OBJEXT)
 	-rm -f os/fs/*.$(OBJEXT)
-	-rm -f os/keyvaluestore/*.$(OBJEXT)
 	-rm -f os/kstore/*.$(OBJEXT)
 	-rm -f os/memstore/*.$(OBJEXT)
 	-rm -f osd/*.$(OBJEXT)
@@ -17255,6 +17932,7 @@ mostlyclean-compile:
 	-rm -f test/librbd/*.$(OBJEXT)
 	-rm -f test/librbd/*.lo
 	-rm -f test/librbd/exclusive_lock/*.$(OBJEXT)
+	-rm -f test/librbd/image/*.$(OBJEXT)
 	-rm -f test/librbd/journal/*.$(OBJEXT)
 	-rm -f test/librbd/journal/*.lo
 	-rm -f test/librbd/object_map/*.$(OBJEXT)
@@ -17267,6 +17945,8 @@ mostlyclean-compile:
 	-rm -f test/os/*.$(OBJEXT)
 	-rm -f test/osd/*.$(OBJEXT)
 	-rm -f test/osdc/*.$(OBJEXT)
+	-rm -f test/rbd_mirror/*.$(OBJEXT)
+	-rm -f test/rbd_mirror/*.lo
 	-rm -f test/rgw/*.$(OBJEXT)
 	-rm -f test/system/*.$(OBJEXT)
 	-rm -f test/system/*.lo
@@ -17275,6 +17955,8 @@ mostlyclean-compile:
 	-rm -f tools/rados/*.$(OBJEXT)
 	-rm -f tools/rbd/*.$(OBJEXT)
 	-rm -f tools/rbd/action/*.$(OBJEXT)
+	-rm -f tools/rbd_mirror/*.$(OBJEXT)
+	-rm -f tools/rbd_mirror/*.lo
 	-rm -f tools/rbd_nbd/*.$(OBJEXT)
 	-rm -f tracing/*.$(OBJEXT)
 	-rm -f tracing/*.lo
@@ -17370,6 +18052,8 @@ distclean-compile:
 @AMDEP_TRUE@@am__include@ @am__quote at common/$(DEPDIR)/DecayCounter.Plo at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at common/$(DEPDIR)/Finisher.Plo at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at common/$(DEPDIR)/Formatter.Plo at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at common/$(DEPDIR)/Graylog.Plo at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at common/$(DEPDIR)/HTMLFormatter.Plo at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at common/$(DEPDIR)/HeartbeatMap.Plo at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at common/$(DEPDIR)/LogClient.Plo at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at common/$(DEPDIR)/LogEntry.Plo at am__quote@
@@ -17436,6 +18120,7 @@ distclean-compile:
 @AMDEP_TRUE@@am__include@ @am__quote at common/$(DEPDIR)/lockdep.Plo at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at common/$(DEPDIR)/mime.Plo at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at common/$(DEPDIR)/module.Plo at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at common/$(DEPDIR)/mutex_debug.Plo at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at common/$(DEPDIR)/obj_bencher.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at common/$(DEPDIR)/page.Plo at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at common/$(DEPDIR)/perf_counters.Plo at am__quote@
@@ -17462,12 +18147,21 @@ distclean-compile:
 @AMDEP_TRUE@@am__include@ @am__quote at compressor/$(DEPDIR)/Compressor.Plo at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at compressor/$(DEPDIR)/libceph_example_la-Compressor.Plo at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at compressor/$(DEPDIR)/libceph_snappy_la-Compressor.Plo at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at compressor/$(DEPDIR)/libceph_zlib_la-Compressor.Plo at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at compressor/$(DEPDIR)/unittest_compression_plugin-Compressor.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at compressor/$(DEPDIR)/unittest_compression_plugin_snappy-Compressor.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at compressor/$(DEPDIR)/unittest_compression_plugin_zlib-Compressor.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at compressor/$(DEPDIR)/unittest_compression_snappy-Compressor.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at compressor/$(DEPDIR)/unittest_compression_zlib-Compressor.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at compressor/snappy/$(DEPDIR)/libceph_snappy_la-CompressionPluginSnappy.Plo at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at compressor/snappy/$(DEPDIR)/unittest_compression_plugin_snappy-CompressionPluginSnappy.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at compressor/snappy/$(DEPDIR)/unittest_compression_snappy-CompressionPluginSnappy.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at compressor/zlib/$(DEPDIR)/libceph_zlib_la-CompressionPluginZlib.Plo at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at compressor/zlib/$(DEPDIR)/libceph_zlib_la-CompressionZlib.Plo at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at compressor/zlib/$(DEPDIR)/unittest_compression_plugin_zlib-CompressionPluginZlib.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at compressor/zlib/$(DEPDIR)/unittest_compression_plugin_zlib-CompressionZlib.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at compressor/zlib/$(DEPDIR)/unittest_compression_zlib-CompressionPluginZlib.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at compressor/zlib/$(DEPDIR)/unittest_compression_zlib-CompressionZlib.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at crush/$(DEPDIR)/CrushCompiler.Plo at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at crush/$(DEPDIR)/CrushTester.Plo at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at crush/$(DEPDIR)/CrushWrapper.Plo at am__quote@
@@ -17478,7 +18172,6 @@ distclean-compile:
 @AMDEP_TRUE@@am__include@ @am__quote at erasure-code/$(DEPDIR)/ErasureCode.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at erasure-code/$(DEPDIR)/ErasureCodePlugin.Plo at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at erasure-code/$(DEPDIR)/libec_example_la-ErasureCode.Plo at am__quote@
- at AMDEP_TRUE@@am__include@ @am__quote at erasure-code/$(DEPDIR)/libec_isa_la-ErasureCode.Plo at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at erasure-code/$(DEPDIR)/libec_jerasure_generic_la-ErasureCode.Plo at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at erasure-code/$(DEPDIR)/libec_jerasure_neon_la-ErasureCode.Plo at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at erasure-code/$(DEPDIR)/libec_jerasure_sse3_la-ErasureCode.Plo at am__quote@
@@ -17488,6 +18181,7 @@ distclean-compile:
 @AMDEP_TRUE@@am__include@ @am__quote at erasure-code/$(DEPDIR)/libec_shec_neon_la-ErasureCode.Plo at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at erasure-code/$(DEPDIR)/libec_shec_sse3_la-ErasureCode.Plo at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at erasure-code/$(DEPDIR)/libec_shec_sse4_la-ErasureCode.Plo at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at erasure-code/$(DEPDIR)/libisa_la-ErasureCode.Plo at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at erasure-code/$(DEPDIR)/unittest_erasure_code-ErasureCode.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at erasure-code/$(DEPDIR)/unittest_erasure_code_example-ErasureCode.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at erasure-code/$(DEPDIR)/unittest_erasure_code_isa-ErasureCode.Po at am__quote@
@@ -17499,12 +18193,12 @@ distclean-compile:
 @AMDEP_TRUE@@am__include@ @am__quote at erasure-code/$(DEPDIR)/unittest_erasure_code_shec_all-ErasureCode.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at erasure-code/$(DEPDIR)/unittest_erasure_code_shec_arguments-ErasureCode.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at erasure-code/$(DEPDIR)/unittest_erasure_code_shec_thread-ErasureCode.Po at am__quote@
- at AMDEP_TRUE@@am__include@ @am__quote at erasure-code/isa/$(DEPDIR)/libec_isa_la-ErasureCodeIsa.Plo at am__quote@
- at AMDEP_TRUE@@am__include@ @am__quote at erasure-code/isa/$(DEPDIR)/libec_isa_la-ErasureCodeIsaTableCache.Plo at am__quote@
- at AMDEP_TRUE@@am__include@ @am__quote at erasure-code/isa/$(DEPDIR)/libec_isa_la-ErasureCodePluginIsa.Plo at am__quote@
- at AMDEP_TRUE@@am__include@ @am__quote at erasure-code/isa/$(DEPDIR)/libec_isa_la-xor_op.Plo at am__quote@
- at AMDEP_TRUE@@am__include@ @am__quote at erasure-code/isa/isa-l/erasure_code/$(DEPDIR)/libec_isa_la-ec_base.Plo at am__quote@
- at AMDEP_TRUE@@am__include@ @am__quote at erasure-code/isa/isa-l/erasure_code/$(DEPDIR)/libec_isa_la-ec_highlevel_func.Plo at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at erasure-code/isa/$(DEPDIR)/libisa_la-ErasureCodeIsa.Plo at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at erasure-code/isa/$(DEPDIR)/libisa_la-ErasureCodeIsaTableCache.Plo at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at erasure-code/isa/$(DEPDIR)/libisa_la-ErasureCodePluginIsa.Plo at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at erasure-code/isa/$(DEPDIR)/libisa_la-xor_op.Plo at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at erasure-code/isa/isa-l/erasure_code/$(DEPDIR)/libisa_la-ec_base.Plo at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at erasure-code/isa/isa-l/erasure_code/$(DEPDIR)/libisa_la-ec_highlevel_func.Plo at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at erasure-code/jerasure/$(DEPDIR)/libec_jerasure_generic_la-ErasureCodeJerasure.Plo at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at erasure-code/jerasure/$(DEPDIR)/libec_jerasure_generic_la-ErasureCodePluginJerasure.Plo at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at erasure-code/jerasure/$(DEPDIR)/libec_jerasure_la-ErasureCodePluginSelectJerasure.Plo at am__quote@
@@ -17837,8 +18531,8 @@ distclean-compile:
 @AMDEP_TRUE@@am__include@ @am__quote at librbd/image/$(DEPDIR)/RefreshParentRequest.Plo at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at librbd/image/$(DEPDIR)/RefreshRequest.Plo at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at librbd/image/$(DEPDIR)/SetSnapRequest.Plo at am__quote@
- at AMDEP_TRUE@@am__include@ @am__quote at librbd/journal/$(DEPDIR)/Entries.Plo at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at librbd/journal/$(DEPDIR)/Replay.Plo at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at librbd/journal/$(DEPDIR)/Types.Plo at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at librbd/object_map/$(DEPDIR)/InvalidateRequest.Plo at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at librbd/object_map/$(DEPDIR)/LockRequest.Plo at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at librbd/object_map/$(DEPDIR)/RefreshRequest.Plo at am__quote@
@@ -17972,6 +18666,7 @@ distclean-compile:
 @AMDEP_TRUE@@am__include@ @am__quote at msg/xio/$(DEPDIR)/XioPool.Plo at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at msg/xio/$(DEPDIR)/XioPortal.Plo at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at objclass/$(DEPDIR)/libosd_a-class_api.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at os/$(DEPDIR)/libos_a-FuseStore.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at os/$(DEPDIR)/libos_a-ObjectStore.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at os/$(DEPDIR)/libos_types_a-Transaction.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at os/bluestore/$(DEPDIR)/bluefs_tool.Po at am__quote@
@@ -17981,6 +18676,8 @@ distclean-compile:
 @AMDEP_TRUE@@am__include@ @am__quote at os/bluestore/$(DEPDIR)/libos_a-BlueRocksEnv.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at os/bluestore/$(DEPDIR)/libos_a-BlueStore.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at os/bluestore/$(DEPDIR)/libos_a-FreelistManager.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at os/bluestore/$(DEPDIR)/libos_a-KernelDevice.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at os/bluestore/$(DEPDIR)/libos_a-NVMEDevice.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at os/bluestore/$(DEPDIR)/libos_a-StupidAllocator.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at os/bluestore/$(DEPDIR)/libos_a-kv.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at os/bluestore/$(DEPDIR)/libos_types_a-bluefs_types.Po at am__quote@
@@ -18001,8 +18698,6 @@ distclean-compile:
 @AMDEP_TRUE@@am__include@ @am__quote at os/fs/$(DEPDIR)/libos_a-FS.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at os/fs/$(DEPDIR)/libos_a-XFS.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at os/fs/$(DEPDIR)/libos_zfs_a-ZFS.Po at am__quote@
- at AMDEP_TRUE@@am__include@ @am__quote at os/keyvaluestore/$(DEPDIR)/libos_a-GenericObjectMap.Po at am__quote@
- at AMDEP_TRUE@@am__include@ @am__quote at os/keyvaluestore/$(DEPDIR)/libos_a-KeyValueStore.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at os/kstore/$(DEPDIR)/libos_a-KStore.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at os/kstore/$(DEPDIR)/libos_a-kv.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at os/kstore/$(DEPDIR)/libos_types_a-kstore_types.Po at am__quote@
@@ -18099,7 +18794,9 @@ distclean-compile:
 @AMDEP_TRUE@@am__include@ @am__quote at rgw/$(DEPDIR)/librgw_la-rgw_tools.Plo at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at rgw/$(DEPDIR)/librgw_la-rgw_usage.Plo at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at rgw/$(DEPDIR)/librgw_la-rgw_user.Plo at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at rgw/$(DEPDIR)/librgw_la-rgw_website.Plo at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at rgw/$(DEPDIR)/librgw_la-rgw_xml.Plo at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at rgw/$(DEPDIR)/librgw_la-rgw_xml_enc.Plo at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at rgw/$(DEPDIR)/rgw_admin.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at rgw/$(DEPDIR)/rgw_common.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at rgw/$(DEPDIR)/rgw_env.Po at am__quote@
@@ -18161,7 +18858,9 @@ distclean-compile:
 @AMDEP_TRUE@@am__include@ @am__quote at rgw/$(DEPDIR)/test_build_librgw-rgw_tools.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at rgw/$(DEPDIR)/test_build_librgw-rgw_usage.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at rgw/$(DEPDIR)/test_build_librgw-rgw_user.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at rgw/$(DEPDIR)/test_build_librgw-rgw_website.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at rgw/$(DEPDIR)/test_build_librgw-rgw_xml.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at rgw/$(DEPDIR)/test_build_librgw-rgw_xml_enc.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at rgw/$(DEPDIR)/unittest_formatter-rgw_formats.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at test/$(DEPDIR)/TestSignalHandlers.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at test/$(DEPDIR)/TestTimers.Po at am__quote@
@@ -18265,23 +18964,29 @@ distclean-compile:
 @AMDEP_TRUE@@am__include@ @am__quote at test/common/$(DEPDIR)/unittest_context-test_context.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at test/common/$(DEPDIR)/unittest_crc32c-test_crc32c.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at test/common/$(DEPDIR)/unittest_histogram-histogram.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at test/common/$(DEPDIR)/unittest_interval_set-test_interval_set.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at test/common/$(DEPDIR)/unittest_io_priority-test_io_priority.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at test/common/$(DEPDIR)/unittest_lru-test_lru.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at test/common/$(DEPDIR)/unittest_mutex_debug-test_mutex_debug.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at test/common/$(DEPDIR)/unittest_prioritized_queue-test_prioritized_queue.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at test/common/$(DEPDIR)/unittest_readahead-Readahead.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at test/common/$(DEPDIR)/unittest_safe_io-test_safe_io.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at test/common/$(DEPDIR)/unittest_shared_cache-test_shared_cache.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at test/common/$(DEPDIR)/unittest_sharedptr_registry-test_sharedptr_registry.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at test/common/$(DEPDIR)/unittest_shunique_lock-test_shunique_lock.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at test/common/$(DEPDIR)/unittest_sloppy_crc_map-test_sloppy_crc_map.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at test/common/$(DEPDIR)/unittest_str_map-test_str_map.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at test/common/$(DEPDIR)/unittest_tableformatter-test_tableformatter.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at test/common/$(DEPDIR)/unittest_throttle-Throttle.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at test/common/$(DEPDIR)/unittest_time-test_time.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at test/common/$(DEPDIR)/unittest_util-test_util.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at test/common/$(DEPDIR)/unittest_weighted_priority_queue-test_weighted_priority_queue.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at test/compressor/$(DEPDIR)/libceph_example_la-compressor_plugin_example.Plo at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at test/compressor/$(DEPDIR)/unittest_compression_plugin-test_compression_plugin.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at test/compressor/$(DEPDIR)/unittest_compression_plugin_snappy-test_compression_plugin_snappy.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at test/compressor/$(DEPDIR)/unittest_compression_plugin_zlib-test_compression_plugin_zlib.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at test/compressor/$(DEPDIR)/unittest_compression_snappy-test_compression_snappy.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at test/compressor/$(DEPDIR)/unittest_compression_zlib-test_compression_zlib.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at test/crush/$(DEPDIR)/unittest_crush-crush.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at test/crush/$(DEPDIR)/unittest_crush_wrapper-CrushWrapper.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at test/encoding/$(DEPDIR)/ceph_dencoder-ceph_dencoder.Po at am__quote@
@@ -18383,6 +19088,7 @@ distclean-compile:
 @AMDEP_TRUE@@am__include@ @am__quote at test/librbd/$(DEPDIR)/unittest_librbd-test_mock_fixture.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at test/librbd/exclusive_lock/$(DEPDIR)/unittest_librbd-test_mock_AcquireRequest.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at test/librbd/exclusive_lock/$(DEPDIR)/unittest_librbd-test_mock_ReleaseRequest.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at test/librbd/image/$(DEPDIR)/unittest_librbd-test_mock_RefreshRequest.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at test/librbd/journal/$(DEPDIR)/librbd_test_la-test_Entries.Plo at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at test/librbd/journal/$(DEPDIR)/librbd_test_la-test_Replay.Plo at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at test/librbd/journal/$(DEPDIR)/unittest_librbd-test_mock_Replay.Po at am__quote@
@@ -18432,6 +19138,7 @@ distclean-compile:
 @AMDEP_TRUE@@am__include@ @am__quote at test/objectstore/$(DEPDIR)/unittest_chain_xattr-chain_xattr.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at test/objectstore/$(DEPDIR)/unittest_rocksdb_option-TestRocksdbOptionParse.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at test/objectstore/$(DEPDIR)/unittest_rocksdb_option_static-TestRocksdbOptionParse.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at test/objectstore/$(DEPDIR)/unittest_transaction-test_transaction.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at test/objectstore/$(DEPDIR)/workload_generator.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at test/os/$(DEPDIR)/unittest_lfnindex-TestLFNIndex.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at test/osd/$(DEPDIR)/Object.Po at am__quote@
@@ -18447,6 +19154,10 @@ distclean-compile:
 @AMDEP_TRUE@@am__include@ @am__quote at test/osd/$(DEPDIR)/unittest_pglog-TestPGLog.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at test/osdc/$(DEPDIR)/FakeWriteback.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at test/osdc/$(DEPDIR)/object_cacher_stress.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at test/rbd_mirror/$(DEPDIR)/ceph_test_rbd_mirror-test_main.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at test/rbd_mirror/$(DEPDIR)/librbd_mirror_test_la-test_ClusterWatcher.Plo at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at test/rbd_mirror/$(DEPDIR)/librbd_mirror_test_la-test_PoolWatcher.Plo at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at test/rbd_mirror/$(DEPDIR)/unittest_rbd_mirror-test_main.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at test/rgw/$(DEPDIR)/ceph_test_rgw_manifest-test_rgw_manifest.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at test/rgw/$(DEPDIR)/ceph_test_rgw_obj-test_rgw_obj.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at test/system/$(DEPDIR)/cross_process_sem.Plo at am__quote@
@@ -18527,6 +19238,13 @@ distclean-compile:
 @AMDEP_TRUE@@am__include@ @am__quote at tools/rbd/action/$(DEPDIR)/Snap.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at tools/rbd/action/$(DEPDIR)/Status.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at tools/rbd/action/$(DEPDIR)/Watch.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at tools/rbd_mirror/$(DEPDIR)/ClusterWatcher.Plo at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at tools/rbd_mirror/$(DEPDIR)/ImageReplayer.Plo at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at tools/rbd_mirror/$(DEPDIR)/Mirror.Plo at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at tools/rbd_mirror/$(DEPDIR)/PoolWatcher.Plo at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at tools/rbd_mirror/$(DEPDIR)/Replayer.Plo at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at tools/rbd_mirror/$(DEPDIR)/main.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at tools/rbd_mirror/$(DEPDIR)/types.Plo at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at tools/rbd_nbd/$(DEPDIR)/rbd_nbd-rbd-nbd.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at tracing/$(DEPDIR)/libos_tp_la-objectstore.Plo at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at tracing/$(DEPDIR)/libosd_tp_la-oprequest.Plo at am__quote@
@@ -18632,20 +19350,6 @@ common/libcommon_crc_aarch64_la-crc32c_aarch64.lo: common/crc32c_aarch64.c
 @AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
 @am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libcommon_crc_aarch64_la_CFLAGS) $(CFLAGS) -c -o common/libcommon_crc_aarch64_la-crc32c_aarch64.lo `test -f 'common/crc32c_aarch64.c' || echo '$(srcdir)/'`common/crc32c_aarch64.c
 
-erasure-code/isa/isa-l/erasure_code/libec_isa_la-ec_base.lo: erasure-code/isa/isa-l/erasure_code/ec_base.c
- at am__fastdepCC_TRUE@	$(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(libec_isa_la_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libec_isa_la_CFLAGS) $(CFLAGS) -MT erasure-code/isa/isa-l/erasure_code/libec_isa_la-ec_base.lo -MD -MP -MF erasure-code/isa/isa-l/erasure_code/$(DEPDIR)/libec_isa_la-ec_base.Tpo -c -o erasure-code/isa/isa-l/erasure_code/libec_isa_la-ec_base.lo `test -f 'erasure-code/isa/isa-l/erasure_code/ec_ba [...]
- at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) erasure-code/isa/isa-l/erasure_code/$(DEPDIR)/libec_isa_la-ec_base.Tpo erasure-code/isa/isa-l/erasure_code/$(DEPDIR)/libec_isa_la-ec_base.Plo
- at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='erasure-code/isa/isa-l/erasure_code/ec_base.c' object='erasure-code/isa/isa-l/erasure_code/libec_isa_la-ec_base.lo' libtool=yes @AMDEPBACKSLASH@
- at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
- at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(libec_isa_la_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libec_isa_la_CFLAGS) $(CFLAGS) -c -o erasure-code/isa/isa-l/erasure_code/libec_isa_la-ec_base.lo `test -f 'erasure-code/isa/isa-l/erasure_code/ec_base.c' || echo '$(srcdir)/'`erasure-code/isa/isa-l/erasure_code/ec_base.c
-
-erasure-code/isa/isa-l/erasure_code/libec_isa_la-ec_highlevel_func.lo: erasure-code/isa/isa-l/erasure_code/ec_highlevel_func.c
- at am__fastdepCC_TRUE@	$(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(libec_isa_la_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libec_isa_la_CFLAGS) $(CFLAGS) -MT erasure-code/isa/isa-l/erasure_code/libec_isa_la-ec_highlevel_func.lo -MD -MP -MF erasure-code/isa/isa-l/erasure_code/$(DEPDIR)/libec_isa_la-ec_highlevel_func.Tpo -c -o erasure-code/isa/isa-l/erasure_code/libec_isa_la-ec_highlevel_func.lo `test -f 'erasure-cod [...]
- at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) erasure-code/isa/isa-l/erasure_code/$(DEPDIR)/libec_isa_la-ec_highlevel_func.Tpo erasure-code/isa/isa-l/erasure_code/$(DEPDIR)/libec_isa_la-ec_highlevel_func.Plo
- at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='erasure-code/isa/isa-l/erasure_code/ec_highlevel_func.c' object='erasure-code/isa/isa-l/erasure_code/libec_isa_la-ec_highlevel_func.lo' libtool=yes @AMDEPBACKSLASH@
- at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
- at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(libec_isa_la_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libec_isa_la_CFLAGS) $(CFLAGS) -c -o erasure-code/isa/isa-l/erasure_code/libec_isa_la-ec_highlevel_func.lo `test -f 'erasure-code/isa/isa-l/erasure_code/ec_highlevel_func.c' || echo '$(srcdir)/'`erasure-code/isa/isa-l/erasure_code/ec_highlevel_func.c
-
 erasure-code/jerasure/jerasure/src/libec_jerasure_generic_la-cauchy.lo: erasure-code/jerasure/jerasure/src/cauchy.c
 @am__fastdepCC_TRUE@	$(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libec_jerasure_generic_la_CFLAGS) $(CFLAGS) -MT erasure-code/jerasure/jerasure/src/libec_jerasure_generic_la-cauchy.lo -MD -MP -MF erasure-code/jerasure/jerasure/src/$(DEPDIR)/libec_jerasure_generic_la-cauchy.Tpo -c -o erasure-code/jerasure/jerasure/src/libec_jerasure_generic_la-cauchy.lo `test -f 'erasu [...]
 @am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) erasure-code/jerasure/jerasure/src/$(DEPDIR)/libec_jerasure_generic_la-cauchy.Tpo erasure-code/jerasure/jerasure/src/$(DEPDIR)/libec_jerasure_generic_la-cauchy.Plo
@@ -19640,6 +20344,20 @@ erasure-code/jerasure/gf-complete/src/libec_shec_sse4_la-gf_w8.lo: erasure-code/
 @AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
 @am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libec_shec_sse4_la_CFLAGS) $(CFLAGS) -c -o erasure-code/jerasure/gf-complete/src/libec_shec_sse4_la-gf_w8.lo `test -f 'erasure-code/jerasure/gf-complete/src/gf_w8.c' || echo '$(srcdir)/'`erasure-code/jerasure/gf-complete/src/gf_w8.c
 
+erasure-code/isa/isa-l/erasure_code/libisa_la-ec_base.lo: erasure-code/isa/isa-l/erasure_code/ec_base.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(libisa_la_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libisa_la_CFLAGS) $(CFLAGS) -MT erasure-code/isa/isa-l/erasure_code/libisa_la-ec_base.lo -MD -MP -MF erasure-code/isa/isa-l/erasure_code/$(DEPDIR)/libisa_la-ec_base.Tpo -c -o erasure-code/isa/isa-l/erasure_code/libisa_la-ec_base.lo `test -f 'erasure-code/isa/isa-l/erasure_code/ec_base.c' || echo ' [...]
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) erasure-code/isa/isa-l/erasure_code/$(DEPDIR)/libisa_la-ec_base.Tpo erasure-code/isa/isa-l/erasure_code/$(DEPDIR)/libisa_la-ec_base.Plo
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='erasure-code/isa/isa-l/erasure_code/ec_base.c' object='erasure-code/isa/isa-l/erasure_code/libisa_la-ec_base.lo' libtool=yes @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(libisa_la_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libisa_la_CFLAGS) $(CFLAGS) -c -o erasure-code/isa/isa-l/erasure_code/libisa_la-ec_base.lo `test -f 'erasure-code/isa/isa-l/erasure_code/ec_base.c' || echo '$(srcdir)/'`erasure-code/isa/isa-l/erasure_code/ec_base.c
+
+erasure-code/isa/isa-l/erasure_code/libisa_la-ec_highlevel_func.lo: erasure-code/isa/isa-l/erasure_code/ec_highlevel_func.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(libisa_la_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libisa_la_CFLAGS) $(CFLAGS) -MT erasure-code/isa/isa-l/erasure_code/libisa_la-ec_highlevel_func.lo -MD -MP -MF erasure-code/isa/isa-l/erasure_code/$(DEPDIR)/libisa_la-ec_highlevel_func.Tpo -c -o erasure-code/isa/isa-l/erasure_code/libisa_la-ec_highlevel_func.lo `test -f 'erasure-code/isa/isa-l/era [...]
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) erasure-code/isa/isa-l/erasure_code/$(DEPDIR)/libisa_la-ec_highlevel_func.Tpo erasure-code/isa/isa-l/erasure_code/$(DEPDIR)/libisa_la-ec_highlevel_func.Plo
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='erasure-code/isa/isa-l/erasure_code/ec_highlevel_func.c' object='erasure-code/isa/isa-l/erasure_code/libisa_la-ec_highlevel_func.lo' libtool=yes @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(libisa_la_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libisa_la_CFLAGS) $(CFLAGS) -c -o erasure-code/isa/isa-l/erasure_code/libisa_la-ec_highlevel_func.lo `test -f 'erasure-code/isa/isa-l/erasure_code/ec_highlevel_func.c' || echo '$(srcdir)/'`erasure-code/isa/isa-l/erasure_code/ec_highlevel_func.c
+
 tracing/libos_tp_la-objectstore.lo: tracing/objectstore.c
 @am__fastdepCC_TRUE@	$(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libos_tp_la_CFLAGS) $(CFLAGS) -MT tracing/libos_tp_la-objectstore.lo -MD -MP -MF tracing/$(DEPDIR)/libos_tp_la-objectstore.Tpo -c -o tracing/libos_tp_la-objectstore.lo `test -f 'tracing/objectstore.c' || echo '$(srcdir)/'`tracing/objectstore.c
 @am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) tracing/$(DEPDIR)/libos_tp_la-objectstore.Tpo tracing/$(DEPDIR)/libos_tp_la-objectstore.Plo
@@ -21120,34 +21838,6 @@ os/fs/libos_a-FS.obj: os/fs/FS.cc
 @AMDEP_TRUE@@am__fastdepCXX_FALSE@	DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
 @am__fastdepCXX_FALSE@	$(AM_V_CXX at am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libos_a_CXXFLAGS) $(CXXFLAGS) -c -o os/fs/libos_a-FS.obj `if test -f 'os/fs/FS.cc'; then $(CYGPATH_W) 'os/fs/FS.cc'; else $(CYGPATH_W) '$(srcdir)/os/fs/FS.cc'; fi`
 
-os/keyvaluestore/libos_a-GenericObjectMap.o: os/keyvaluestore/GenericObjectMap.cc
- at am__fastdepCXX_TRUE@	$(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libos_a_CXXFLAGS) $(CXXFLAGS) -MT os/keyvaluestore/libos_a-GenericObjectMap.o -MD -MP -MF os/keyvaluestore/$(DEPDIR)/libos_a-GenericObjectMap.Tpo -c -o os/keyvaluestore/libos_a-GenericObjectMap.o `test -f 'os/keyvaluestore/GenericObjectMap.cc' || echo '$(srcdir)/'`os/keyvaluestore/GenericObjectMap.cc
- at am__fastdepCXX_TRUE@	$(AM_V_at)$(am__mv) os/keyvaluestore/$(DEPDIR)/libos_a-GenericObjectMap.Tpo os/keyvaluestore/$(DEPDIR)/libos_a-GenericObjectMap.Po
- at AMDEP_TRUE@@am__fastdepCXX_FALSE@	$(AM_V_CXX)source='os/keyvaluestore/GenericObjectMap.cc' object='os/keyvaluestore/libos_a-GenericObjectMap.o' libtool=no @AMDEPBACKSLASH@
- at AMDEP_TRUE@@am__fastdepCXX_FALSE@	DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
- at am__fastdepCXX_FALSE@	$(AM_V_CXX at am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libos_a_CXXFLAGS) $(CXXFLAGS) -c -o os/keyvaluestore/libos_a-GenericObjectMap.o `test -f 'os/keyvaluestore/GenericObjectMap.cc' || echo '$(srcdir)/'`os/keyvaluestore/GenericObjectMap.cc
-
-os/keyvaluestore/libos_a-GenericObjectMap.obj: os/keyvaluestore/GenericObjectMap.cc
- at am__fastdepCXX_TRUE@	$(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libos_a_CXXFLAGS) $(CXXFLAGS) -MT os/keyvaluestore/libos_a-GenericObjectMap.obj -MD -MP -MF os/keyvaluestore/$(DEPDIR)/libos_a-GenericObjectMap.Tpo -c -o os/keyvaluestore/libos_a-GenericObjectMap.obj `if test -f 'os/keyvaluestore/GenericObjectMap.cc'; then $(CYGPATH_W) 'os/keyvaluestore/GenericObjectMap.cc'; else $(CYGPATH_W) '$(srcdir)/os/keyvaluestore/GenericObjectMap.cc'; fi`
- at am__fastdepCXX_TRUE@	$(AM_V_at)$(am__mv) os/keyvaluestore/$(DEPDIR)/libos_a-GenericObjectMap.Tpo os/keyvaluestore/$(DEPDIR)/libos_a-GenericObjectMap.Po
- at AMDEP_TRUE@@am__fastdepCXX_FALSE@	$(AM_V_CXX)source='os/keyvaluestore/GenericObjectMap.cc' object='os/keyvaluestore/libos_a-GenericObjectMap.obj' libtool=no @AMDEPBACKSLASH@
- at AMDEP_TRUE@@am__fastdepCXX_FALSE@	DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
- at am__fastdepCXX_FALSE@	$(AM_V_CXX at am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libos_a_CXXFLAGS) $(CXXFLAGS) -c -o os/keyvaluestore/libos_a-GenericObjectMap.obj `if test -f 'os/keyvaluestore/GenericObjectMap.cc'; then $(CYGPATH_W) 'os/keyvaluestore/GenericObjectMap.cc'; else $(CYGPATH_W) '$(srcdir)/os/keyvaluestore/GenericObjectMap.cc'; fi`
-
-os/keyvaluestore/libos_a-KeyValueStore.o: os/keyvaluestore/KeyValueStore.cc
- at am__fastdepCXX_TRUE@	$(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libos_a_CXXFLAGS) $(CXXFLAGS) -MT os/keyvaluestore/libos_a-KeyValueStore.o -MD -MP -MF os/keyvaluestore/$(DEPDIR)/libos_a-KeyValueStore.Tpo -c -o os/keyvaluestore/libos_a-KeyValueStore.o `test -f 'os/keyvaluestore/KeyValueStore.cc' || echo '$(srcdir)/'`os/keyvaluestore/KeyValueStore.cc
- at am__fastdepCXX_TRUE@	$(AM_V_at)$(am__mv) os/keyvaluestore/$(DEPDIR)/libos_a-KeyValueStore.Tpo os/keyvaluestore/$(DEPDIR)/libos_a-KeyValueStore.Po
- at AMDEP_TRUE@@am__fastdepCXX_FALSE@	$(AM_V_CXX)source='os/keyvaluestore/KeyValueStore.cc' object='os/keyvaluestore/libos_a-KeyValueStore.o' libtool=no @AMDEPBACKSLASH@
- at AMDEP_TRUE@@am__fastdepCXX_FALSE@	DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
- at am__fastdepCXX_FALSE@	$(AM_V_CXX at am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libos_a_CXXFLAGS) $(CXXFLAGS) -c -o os/keyvaluestore/libos_a-KeyValueStore.o `test -f 'os/keyvaluestore/KeyValueStore.cc' || echo '$(srcdir)/'`os/keyvaluestore/KeyValueStore.cc
-
-os/keyvaluestore/libos_a-KeyValueStore.obj: os/keyvaluestore/KeyValueStore.cc
- at am__fastdepCXX_TRUE@	$(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libos_a_CXXFLAGS) $(CXXFLAGS) -MT os/keyvaluestore/libos_a-KeyValueStore.obj -MD -MP -MF os/keyvaluestore/$(DEPDIR)/libos_a-KeyValueStore.Tpo -c -o os/keyvaluestore/libos_a-KeyValueStore.obj `if test -f 'os/keyvaluestore/KeyValueStore.cc'; then $(CYGPATH_W) 'os/keyvaluestore/KeyValueStore.cc'; else $(CYGPATH_W) '$(srcdir)/os/keyvaluestore/KeyValueStore.cc'; fi`
- at am__fastdepCXX_TRUE@	$(AM_V_at)$(am__mv) os/keyvaluestore/$(DEPDIR)/libos_a-KeyValueStore.Tpo os/keyvaluestore/$(DEPDIR)/libos_a-KeyValueStore.Po
- at AMDEP_TRUE@@am__fastdepCXX_FALSE@	$(AM_V_CXX)source='os/keyvaluestore/KeyValueStore.cc' object='os/keyvaluestore/libos_a-KeyValueStore.obj' libtool=no @AMDEPBACKSLASH@
- at AMDEP_TRUE@@am__fastdepCXX_FALSE@	DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
- at am__fastdepCXX_FALSE@	$(AM_V_CXX at am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libos_a_CXXFLAGS) $(CXXFLAGS) -c -o os/keyvaluestore/libos_a-KeyValueStore.obj `if test -f 'os/keyvaluestore/KeyValueStore.cc'; then $(CYGPATH_W) 'os/keyvaluestore/KeyValueStore.cc'; else $(CYGPATH_W) '$(srcdir)/os/keyvaluestore/KeyValueStore.cc'; fi`
-
 os/kstore/libos_a-kv.o: os/kstore/kv.cc
 @am__fastdepCXX_TRUE@	$(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libos_a_CXXFLAGS) $(CXXFLAGS) -MT os/kstore/libos_a-kv.o -MD -MP -MF os/kstore/$(DEPDIR)/libos_a-kv.Tpo -c -o os/kstore/libos_a-kv.o `test -f 'os/kstore/kv.cc' || echo '$(srcdir)/'`os/kstore/kv.cc
 @am__fastdepCXX_TRUE@	$(AM_V_at)$(am__mv) os/kstore/$(DEPDIR)/libos_a-kv.Tpo os/kstore/$(DEPDIR)/libos_a-kv.Po
@@ -21204,6 +21894,20 @@ os/libos_a-ObjectStore.obj: os/ObjectStore.cc
 @AMDEP_TRUE@@am__fastdepCXX_FALSE@	DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
 @am__fastdepCXX_FALSE@	$(AM_V_CXX at am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libos_a_CXXFLAGS) $(CXXFLAGS) -c -o os/libos_a-ObjectStore.obj `if test -f 'os/ObjectStore.cc'; then $(CYGPATH_W) 'os/ObjectStore.cc'; else $(CYGPATH_W) '$(srcdir)/os/ObjectStore.cc'; fi`
 
+os/libos_a-FuseStore.o: os/FuseStore.cc
+ at am__fastdepCXX_TRUE@	$(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libos_a_CXXFLAGS) $(CXXFLAGS) -MT os/libos_a-FuseStore.o -MD -MP -MF os/$(DEPDIR)/libos_a-FuseStore.Tpo -c -o os/libos_a-FuseStore.o `test -f 'os/FuseStore.cc' || echo '$(srcdir)/'`os/FuseStore.cc
+ at am__fastdepCXX_TRUE@	$(AM_V_at)$(am__mv) os/$(DEPDIR)/libos_a-FuseStore.Tpo os/$(DEPDIR)/libos_a-FuseStore.Po
+ at AMDEP_TRUE@@am__fastdepCXX_FALSE@	$(AM_V_CXX)source='os/FuseStore.cc' object='os/libos_a-FuseStore.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCXX_FALSE@	DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCXX_FALSE@	$(AM_V_CXX at am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libos_a_CXXFLAGS) $(CXXFLAGS) -c -o os/libos_a-FuseStore.o `test -f 'os/FuseStore.cc' || echo '$(srcdir)/'`os/FuseStore.cc
+
+os/libos_a-FuseStore.obj: os/FuseStore.cc
+ at am__fastdepCXX_TRUE@	$(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libos_a_CXXFLAGS) $(CXXFLAGS) -MT os/libos_a-FuseStore.obj -MD -MP -MF os/$(DEPDIR)/libos_a-FuseStore.Tpo -c -o os/libos_a-FuseStore.obj `if test -f 'os/FuseStore.cc'; then $(CYGPATH_W) 'os/FuseStore.cc'; else $(CYGPATH_W) '$(srcdir)/os/FuseStore.cc'; fi`
+ at am__fastdepCXX_TRUE@	$(AM_V_at)$(am__mv) os/$(DEPDIR)/libos_a-FuseStore.Tpo os/$(DEPDIR)/libos_a-FuseStore.Po
+ at AMDEP_TRUE@@am__fastdepCXX_FALSE@	$(AM_V_CXX)source='os/FuseStore.cc' object='os/libos_a-FuseStore.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCXX_FALSE@	DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCXX_FALSE@	$(AM_V_CXX at am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libos_a_CXXFLAGS) $(CXXFLAGS) -c -o os/libos_a-FuseStore.obj `if test -f 'os/FuseStore.cc'; then $(CYGPATH_W) 'os/FuseStore.cc'; else $(CYGPATH_W) '$(srcdir)/os/FuseStore.cc'; fi`
+
 os/bluestore/libos_a-kv.o: os/bluestore/kv.cc
 @am__fastdepCXX_TRUE@	$(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libos_a_CXXFLAGS) $(CXXFLAGS) -MT os/bluestore/libos_a-kv.o -MD -MP -MF os/bluestore/$(DEPDIR)/libos_a-kv.Tpo -c -o os/bluestore/libos_a-kv.o `test -f 'os/bluestore/kv.cc' || echo '$(srcdir)/'`os/bluestore/kv.cc
 @am__fastdepCXX_TRUE@	$(AM_V_at)$(am__mv) os/bluestore/$(DEPDIR)/libos_a-kv.Tpo os/bluestore/$(DEPDIR)/libos_a-kv.Po
@@ -21302,6 +22006,20 @@ os/bluestore/libos_a-FreelistManager.obj: os/bluestore/FreelistManager.cc
 @AMDEP_TRUE@@am__fastdepCXX_FALSE@	DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
 @am__fastdepCXX_FALSE@	$(AM_V_CXX at am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libos_a_CXXFLAGS) $(CXXFLAGS) -c -o os/bluestore/libos_a-FreelistManager.obj `if test -f 'os/bluestore/FreelistManager.cc'; then $(CYGPATH_W) 'os/bluestore/FreelistManager.cc'; else $(CYGPATH_W) '$(srcdir)/os/bluestore/FreelistManager.cc'; fi`
 
+os/bluestore/libos_a-KernelDevice.o: os/bluestore/KernelDevice.cc
+ at am__fastdepCXX_TRUE@	$(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libos_a_CXXFLAGS) $(CXXFLAGS) -MT os/bluestore/libos_a-KernelDevice.o -MD -MP -MF os/bluestore/$(DEPDIR)/libos_a-KernelDevice.Tpo -c -o os/bluestore/libos_a-KernelDevice.o `test -f 'os/bluestore/KernelDevice.cc' || echo '$(srcdir)/'`os/bluestore/KernelDevice.cc
+ at am__fastdepCXX_TRUE@	$(AM_V_at)$(am__mv) os/bluestore/$(DEPDIR)/libos_a-KernelDevice.Tpo os/bluestore/$(DEPDIR)/libos_a-KernelDevice.Po
+ at AMDEP_TRUE@@am__fastdepCXX_FALSE@	$(AM_V_CXX)source='os/bluestore/KernelDevice.cc' object='os/bluestore/libos_a-KernelDevice.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCXX_FALSE@	DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCXX_FALSE@	$(AM_V_CXX at am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libos_a_CXXFLAGS) $(CXXFLAGS) -c -o os/bluestore/libos_a-KernelDevice.o `test -f 'os/bluestore/KernelDevice.cc' || echo '$(srcdir)/'`os/bluestore/KernelDevice.cc
+
+os/bluestore/libos_a-KernelDevice.obj: os/bluestore/KernelDevice.cc
+ at am__fastdepCXX_TRUE@	$(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libos_a_CXXFLAGS) $(CXXFLAGS) -MT os/bluestore/libos_a-KernelDevice.obj -MD -MP -MF os/bluestore/$(DEPDIR)/libos_a-KernelDevice.Tpo -c -o os/bluestore/libos_a-KernelDevice.obj `if test -f 'os/bluestore/KernelDevice.cc'; then $(CYGPATH_W) 'os/bluestore/KernelDevice.cc'; else $(CYGPATH_W) '$(srcdir)/os/bluestore/KernelDevice.cc'; fi`
+ at am__fastdepCXX_TRUE@	$(AM_V_at)$(am__mv) os/bluestore/$(DEPDIR)/libos_a-KernelDevice.Tpo os/bluestore/$(DEPDIR)/libos_a-KernelDevice.Po
+ at AMDEP_TRUE@@am__fastdepCXX_FALSE@	$(AM_V_CXX)source='os/bluestore/KernelDevice.cc' object='os/bluestore/libos_a-KernelDevice.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCXX_FALSE@	DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCXX_FALSE@	$(AM_V_CXX at am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libos_a_CXXFLAGS) $(CXXFLAGS) -c -o os/bluestore/libos_a-KernelDevice.obj `if test -f 'os/bluestore/KernelDevice.cc'; then $(CYGPATH_W) 'os/bluestore/KernelDevice.cc'; else $(CYGPATH_W) '$(srcdir)/os/bluestore/KernelDevice.cc'; fi`
+
 os/bluestore/libos_a-StupidAllocator.o: os/bluestore/StupidAllocator.cc
 @am__fastdepCXX_TRUE@	$(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libos_a_CXXFLAGS) $(CXXFLAGS) -MT os/bluestore/libos_a-StupidAllocator.o -MD -MP -MF os/bluestore/$(DEPDIR)/libos_a-StupidAllocator.Tpo -c -o os/bluestore/libos_a-StupidAllocator.o `test -f 'os/bluestore/StupidAllocator.cc' || echo '$(srcdir)/'`os/bluestore/StupidAllocator.cc
 @am__fastdepCXX_TRUE@	$(AM_V_at)$(am__mv) os/bluestore/$(DEPDIR)/libos_a-StupidAllocator.Tpo os/bluestore/$(DEPDIR)/libos_a-StupidAllocator.Po
@@ -21372,6 +22090,20 @@ os/filestore/libos_a-ZFSFileStoreBackend.obj: os/filestore/ZFSFileStoreBackend.c
 @AMDEP_TRUE@@am__fastdepCXX_FALSE@	DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
 @am__fastdepCXX_FALSE@	$(AM_V_CXX at am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libos_a_CXXFLAGS) $(CXXFLAGS) -c -o os/filestore/libos_a-ZFSFileStoreBackend.obj `if test -f 'os/filestore/ZFSFileStoreBackend.cc'; then $(CYGPATH_W) 'os/filestore/ZFSFileStoreBackend.cc'; else $(CYGPATH_W) '$(srcdir)/os/filestore/ZFSFileStoreBackend.cc'; fi`
 
+os/bluestore/libos_a-NVMEDevice.o: os/bluestore/NVMEDevice.cc
+ at am__fastdepCXX_TRUE@	$(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libos_a_CXXFLAGS) $(CXXFLAGS) -MT os/bluestore/libos_a-NVMEDevice.o -MD -MP -MF os/bluestore/$(DEPDIR)/libos_a-NVMEDevice.Tpo -c -o os/bluestore/libos_a-NVMEDevice.o `test -f 'os/bluestore/NVMEDevice.cc' || echo '$(srcdir)/'`os/bluestore/NVMEDevice.cc
+ at am__fastdepCXX_TRUE@	$(AM_V_at)$(am__mv) os/bluestore/$(DEPDIR)/libos_a-NVMEDevice.Tpo os/bluestore/$(DEPDIR)/libos_a-NVMEDevice.Po
+ at AMDEP_TRUE@@am__fastdepCXX_FALSE@	$(AM_V_CXX)source='os/bluestore/NVMEDevice.cc' object='os/bluestore/libos_a-NVMEDevice.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCXX_FALSE@	DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCXX_FALSE@	$(AM_V_CXX at am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libos_a_CXXFLAGS) $(CXXFLAGS) -c -o os/bluestore/libos_a-NVMEDevice.o `test -f 'os/bluestore/NVMEDevice.cc' || echo '$(srcdir)/'`os/bluestore/NVMEDevice.cc
+
+os/bluestore/libos_a-NVMEDevice.obj: os/bluestore/NVMEDevice.cc
+ at am__fastdepCXX_TRUE@	$(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libos_a_CXXFLAGS) $(CXXFLAGS) -MT os/bluestore/libos_a-NVMEDevice.obj -MD -MP -MF os/bluestore/$(DEPDIR)/libos_a-NVMEDevice.Tpo -c -o os/bluestore/libos_a-NVMEDevice.obj `if test -f 'os/bluestore/NVMEDevice.cc'; then $(CYGPATH_W) 'os/bluestore/NVMEDevice.cc'; else $(CYGPATH_W) '$(srcdir)/os/bluestore/NVMEDevice.cc'; fi`
+ at am__fastdepCXX_TRUE@	$(AM_V_at)$(am__mv) os/bluestore/$(DEPDIR)/libos_a-NVMEDevice.Tpo os/bluestore/$(DEPDIR)/libos_a-NVMEDevice.Po
+ at AMDEP_TRUE@@am__fastdepCXX_FALSE@	$(AM_V_CXX)source='os/bluestore/NVMEDevice.cc' object='os/bluestore/libos_a-NVMEDevice.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCXX_FALSE@	DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCXX_FALSE@	$(AM_V_CXX at am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libos_a_CXXFLAGS) $(CXXFLAGS) -c -o os/bluestore/libos_a-NVMEDevice.obj `if test -f 'os/bluestore/NVMEDevice.cc'; then $(CYGPATH_W) 'os/bluestore/NVMEDevice.cc'; else $(CYGPATH_W) '$(srcdir)/os/bluestore/NVMEDevice.cc'; fi`
+
 os/kstore/libos_types_a-kstore_types.o: os/kstore/kstore_types.cc
 @am__fastdepCXX_TRUE@	$(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libos_types_a_CXXFLAGS) $(CXXFLAGS) -MT os/kstore/libos_types_a-kstore_types.o -MD -MP -MF os/kstore/$(DEPDIR)/libos_types_a-kstore_types.Tpo -c -o os/kstore/libos_types_a-kstore_types.o `test -f 'os/kstore/kstore_types.cc' || echo '$(srcdir)/'`os/kstore/kstore_types.cc
 @am__fastdepCXX_TRUE@	$(AM_V_at)$(am__mv) os/kstore/$(DEPDIR)/libos_types_a-kstore_types.Tpo os/kstore/$(DEPDIR)/libos_types_a-kstore_types.Po
@@ -21680,6 +22412,27 @@ compressor/snappy/libceph_snappy_la-CompressionPluginSnappy.lo: compressor/snapp
 @AMDEP_TRUE@@am__fastdepCXX_FALSE@	DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
 @am__fastdepCXX_FALSE@	$(AM_V_CXX at am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CXX $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libceph_snappy_la_CXXFLAGS) $(CXXFLAGS) -c -o compressor/snappy/libceph_snappy_la-CompressionPluginSnappy.lo `test -f 'compressor/snappy/CompressionPluginSnappy.cc' || echo '$(srcdir)/'`compressor/snappy/CompressionPluginSnappy.cc
 
+compressor/libceph_zlib_la-Compressor.lo: compressor/Compressor.cc
+ at am__fastdepCXX_TRUE@	$(AM_V_CXX)$(LIBTOOL) $(AM_V_lt) --tag=CXX $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libceph_zlib_la_CXXFLAGS) $(CXXFLAGS) -MT compressor/libceph_zlib_la-Compressor.lo -MD -MP -MF compressor/$(DEPDIR)/libceph_zlib_la-Compressor.Tpo -c -o compressor/libceph_zlib_la-Compressor.lo `test -f 'compressor/Compressor.cc' || echo '$(srcdir)/'`compressor/Compressor.cc
+ at am__fastdepCXX_TRUE@	$(AM_V_at)$(am__mv) compressor/$(DEPDIR)/libceph_zlib_la-Compressor.Tpo compressor/$(DEPDIR)/libceph_zlib_la-Compressor.Plo
+ at AMDEP_TRUE@@am__fastdepCXX_FALSE@	$(AM_V_CXX)source='compressor/Compressor.cc' object='compressor/libceph_zlib_la-Compressor.lo' libtool=yes @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCXX_FALSE@	DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCXX_FALSE@	$(AM_V_CXX at am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CXX $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libceph_zlib_la_CXXFLAGS) $(CXXFLAGS) -c -o compressor/libceph_zlib_la-Compressor.lo `test -f 'compressor/Compressor.cc' || echo '$(srcdir)/'`compressor/Compressor.cc
+
+compressor/zlib/libceph_zlib_la-CompressionPluginZlib.lo: compressor/zlib/CompressionPluginZlib.cc
+ at am__fastdepCXX_TRUE@	$(AM_V_CXX)$(LIBTOOL) $(AM_V_lt) --tag=CXX $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libceph_zlib_la_CXXFLAGS) $(CXXFLAGS) -MT compressor/zlib/libceph_zlib_la-CompressionPluginZlib.lo -MD -MP -MF compressor/zlib/$(DEPDIR)/libceph_zlib_la-CompressionPluginZlib.Tpo -c -o compressor/zlib/libceph_zlib_la-CompressionPluginZlib.lo `test -f 'compressor/zlib/CompressionPluginZlib.cc' || echo [...]
+ at am__fastdepCXX_TRUE@	$(AM_V_at)$(am__mv) compressor/zlib/$(DEPDIR)/libceph_zlib_la-CompressionPluginZlib.Tpo compressor/zlib/$(DEPDIR)/libceph_zlib_la-CompressionPluginZlib.Plo
+ at AMDEP_TRUE@@am__fastdepCXX_FALSE@	$(AM_V_CXX)source='compressor/zlib/CompressionPluginZlib.cc' object='compressor/zlib/libceph_zlib_la-CompressionPluginZlib.lo' libtool=yes @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCXX_FALSE@	DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCXX_FALSE@	$(AM_V_CXX at am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CXX $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libceph_zlib_la_CXXFLAGS) $(CXXFLAGS) -c -o compressor/zlib/libceph_zlib_la-CompressionPluginZlib.lo `test -f 'compressor/zlib/CompressionPluginZlib.cc' || echo '$(srcdir)/'`compressor/zlib/CompressionPluginZlib.cc
+
+compressor/zlib/libceph_zlib_la-CompressionZlib.lo: compressor/zlib/CompressionZlib.cc
+ at am__fastdepCXX_TRUE@	$(AM_V_CXX)$(LIBTOOL) $(AM_V_lt) --tag=CXX $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libceph_zlib_la_CXXFLAGS) $(CXXFLAGS) -MT compressor/zlib/libceph_zlib_la-CompressionZlib.lo -MD -MP -MF compressor/zlib/$(DEPDIR)/libceph_zlib_la-CompressionZlib.Tpo -c -o compressor/zlib/libceph_zlib_la-CompressionZlib.lo `test -f 'compressor/zlib/CompressionZlib.cc' || echo '$(srcdir)/'`compressor [...]
+ at am__fastdepCXX_TRUE@	$(AM_V_at)$(am__mv) compressor/zlib/$(DEPDIR)/libceph_zlib_la-CompressionZlib.Tpo compressor/zlib/$(DEPDIR)/libceph_zlib_la-CompressionZlib.Plo
+ at AMDEP_TRUE@@am__fastdepCXX_FALSE@	$(AM_V_CXX)source='compressor/zlib/CompressionZlib.cc' object='compressor/zlib/libceph_zlib_la-CompressionZlib.lo' libtool=yes @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCXX_FALSE@	DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCXX_FALSE@	$(AM_V_CXX at am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CXX $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libceph_zlib_la_CXXFLAGS) $(CXXFLAGS) -c -o compressor/zlib/libceph_zlib_la-CompressionZlib.lo `test -f 'compressor/zlib/CompressionZlib.cc' || echo '$(srcdir)/'`compressor/zlib/CompressionZlib.cc
+
 java/native/libcephfs_jni_la-libcephfs_jni.lo: java/native/libcephfs_jni.cc
 @am__fastdepCXX_TRUE@	$(AM_V_CXX)$(LIBTOOL) $(AM_V_lt) --tag=CXX $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libcephfs_jni_la_CPPFLAGS) $(CPPFLAGS) $(AM_CXXFLAGS) $(CXXFLAGS) -MT java/native/libcephfs_jni_la-libcephfs_jni.lo -MD -MP -MF java/native/$(DEPDIR)/libcephfs_jni_la-libcephfs_jni.Tpo -c -o java/native/libcephfs_jni_la-libcephfs_jni.lo `test -f 'java/native/libcephfs_jni.cc' || echo '$(srcdir)/'`java/native/libcephfs_jni.cc
 @am__fastdepCXX_TRUE@	$(AM_V_at)$(am__mv) java/native/$(DEPDIR)/libcephfs_jni_la-libcephfs_jni.Tpo java/native/$(DEPDIR)/libcephfs_jni_la-libcephfs_jni.Plo
@@ -21757,41 +22510,6 @@ test/erasure-code/libec_hangs_la-ErasureCodePluginHangs.lo: test/erasure-code/Er
 @AMDEP_TRUE@@am__fastdepCXX_FALSE@	DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
 @am__fastdepCXX_FALSE@	$(AM_V_CXX at am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CXX $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libec_hangs_la_CXXFLAGS) $(CXXFLAGS) -c -o test/erasure-code/libec_hangs_la-ErasureCodePluginHangs.lo `test -f 'test/erasure-code/ErasureCodePluginHangs.cc' || echo '$(srcdir)/'`test/erasure-code/ErasureCodePluginHangs.cc
 
-erasure-code/libec_isa_la-ErasureCode.lo: erasure-code/ErasureCode.cc
- at am__fastdepCXX_TRUE@	$(AM_V_CXX)$(LIBTOOL) $(AM_V_lt) --tag=CXX $(libec_isa_la_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libec_isa_la_CXXFLAGS) $(CXXFLAGS) -MT erasure-code/libec_isa_la-ErasureCode.lo -MD -MP -MF erasure-code/$(DEPDIR)/libec_isa_la-ErasureCode.Tpo -c -o erasure-code/libec_isa_la-ErasureCode.lo `test -f 'erasure-code/ErasureCode.cc' || echo '$(srcdir)/'`erasure-code/ErasureCode.cc
- at am__fastdepCXX_TRUE@	$(AM_V_at)$(am__mv) erasure-code/$(DEPDIR)/libec_isa_la-ErasureCode.Tpo erasure-code/$(DEPDIR)/libec_isa_la-ErasureCode.Plo
- at AMDEP_TRUE@@am__fastdepCXX_FALSE@	$(AM_V_CXX)source='erasure-code/ErasureCode.cc' object='erasure-code/libec_isa_la-ErasureCode.lo' libtool=yes @AMDEPBACKSLASH@
- at AMDEP_TRUE@@am__fastdepCXX_FALSE@	DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
- at am__fastdepCXX_FALSE@	$(AM_V_CXX at am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CXX $(libec_isa_la_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libec_isa_la_CXXFLAGS) $(CXXFLAGS) -c -o erasure-code/libec_isa_la-ErasureCode.lo `test -f 'erasure-code/ErasureCode.cc' || echo '$(srcdir)/'`erasure-code/ErasureCode.cc
-
-erasure-code/isa/libec_isa_la-ErasureCodeIsa.lo: erasure-code/isa/ErasureCodeIsa.cc
- at am__fastdepCXX_TRUE@	$(AM_V_CXX)$(LIBTOOL) $(AM_V_lt) --tag=CXX $(libec_isa_la_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libec_isa_la_CXXFLAGS) $(CXXFLAGS) -MT erasure-code/isa/libec_isa_la-ErasureCodeIsa.lo -MD -MP -MF erasure-code/isa/$(DEPDIR)/libec_isa_la-ErasureCodeIsa.Tpo -c -o erasure-code/isa/libec_isa_la-ErasureCodeIsa.lo `test -f 'erasure-code/isa/ErasureCodeIsa.cc' || echo '$(srcdir)/'`erasure-code [...]
- at am__fastdepCXX_TRUE@	$(AM_V_at)$(am__mv) erasure-code/isa/$(DEPDIR)/libec_isa_la-ErasureCodeIsa.Tpo erasure-code/isa/$(DEPDIR)/libec_isa_la-ErasureCodeIsa.Plo
- at AMDEP_TRUE@@am__fastdepCXX_FALSE@	$(AM_V_CXX)source='erasure-code/isa/ErasureCodeIsa.cc' object='erasure-code/isa/libec_isa_la-ErasureCodeIsa.lo' libtool=yes @AMDEPBACKSLASH@
- at AMDEP_TRUE@@am__fastdepCXX_FALSE@	DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
- at am__fastdepCXX_FALSE@	$(AM_V_CXX at am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CXX $(libec_isa_la_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libec_isa_la_CXXFLAGS) $(CXXFLAGS) -c -o erasure-code/isa/libec_isa_la-ErasureCodeIsa.lo `test -f 'erasure-code/isa/ErasureCodeIsa.cc' || echo '$(srcdir)/'`erasure-code/isa/ErasureCodeIsa.cc
-
-erasure-code/isa/libec_isa_la-ErasureCodeIsaTableCache.lo: erasure-code/isa/ErasureCodeIsaTableCache.cc
- at am__fastdepCXX_TRUE@	$(AM_V_CXX)$(LIBTOOL) $(AM_V_lt) --tag=CXX $(libec_isa_la_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libec_isa_la_CXXFLAGS) $(CXXFLAGS) -MT erasure-code/isa/libec_isa_la-ErasureCodeIsaTableCache.lo -MD -MP -MF erasure-code/isa/$(DEPDIR)/libec_isa_la-ErasureCodeIsaTableCache.Tpo -c -o erasure-code/isa/libec_isa_la-ErasureCodeIsaTableCache.lo `test -f 'erasure-code/isa/ErasureCodeIsaTableCac [...]
- at am__fastdepCXX_TRUE@	$(AM_V_at)$(am__mv) erasure-code/isa/$(DEPDIR)/libec_isa_la-ErasureCodeIsaTableCache.Tpo erasure-code/isa/$(DEPDIR)/libec_isa_la-ErasureCodeIsaTableCache.Plo
- at AMDEP_TRUE@@am__fastdepCXX_FALSE@	$(AM_V_CXX)source='erasure-code/isa/ErasureCodeIsaTableCache.cc' object='erasure-code/isa/libec_isa_la-ErasureCodeIsaTableCache.lo' libtool=yes @AMDEPBACKSLASH@
- at AMDEP_TRUE@@am__fastdepCXX_FALSE@	DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
- at am__fastdepCXX_FALSE@	$(AM_V_CXX at am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CXX $(libec_isa_la_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libec_isa_la_CXXFLAGS) $(CXXFLAGS) -c -o erasure-code/isa/libec_isa_la-ErasureCodeIsaTableCache.lo `test -f 'erasure-code/isa/ErasureCodeIsaTableCache.cc' || echo '$(srcdir)/'`erasure-code/isa/ErasureCodeIsaTableCache.cc
-
-erasure-code/isa/libec_isa_la-ErasureCodePluginIsa.lo: erasure-code/isa/ErasureCodePluginIsa.cc
- at am__fastdepCXX_TRUE@	$(AM_V_CXX)$(LIBTOOL) $(AM_V_lt) --tag=CXX $(libec_isa_la_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libec_isa_la_CXXFLAGS) $(CXXFLAGS) -MT erasure-code/isa/libec_isa_la-ErasureCodePluginIsa.lo -MD -MP -MF erasure-code/isa/$(DEPDIR)/libec_isa_la-ErasureCodePluginIsa.Tpo -c -o erasure-code/isa/libec_isa_la-ErasureCodePluginIsa.lo `test -f 'erasure-code/isa/ErasureCodePluginIsa.cc' || echo ' [...]
- at am__fastdepCXX_TRUE@	$(AM_V_at)$(am__mv) erasure-code/isa/$(DEPDIR)/libec_isa_la-ErasureCodePluginIsa.Tpo erasure-code/isa/$(DEPDIR)/libec_isa_la-ErasureCodePluginIsa.Plo
- at AMDEP_TRUE@@am__fastdepCXX_FALSE@	$(AM_V_CXX)source='erasure-code/isa/ErasureCodePluginIsa.cc' object='erasure-code/isa/libec_isa_la-ErasureCodePluginIsa.lo' libtool=yes @AMDEPBACKSLASH@
- at AMDEP_TRUE@@am__fastdepCXX_FALSE@	DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
- at am__fastdepCXX_FALSE@	$(AM_V_CXX at am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CXX $(libec_isa_la_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libec_isa_la_CXXFLAGS) $(CXXFLAGS) -c -o erasure-code/isa/libec_isa_la-ErasureCodePluginIsa.lo `test -f 'erasure-code/isa/ErasureCodePluginIsa.cc' || echo '$(srcdir)/'`erasure-code/isa/ErasureCodePluginIsa.cc
-
-erasure-code/isa/libec_isa_la-xor_op.lo: erasure-code/isa/xor_op.cc
- at am__fastdepCXX_TRUE@	$(AM_V_CXX)$(LIBTOOL) $(AM_V_lt) --tag=CXX $(libec_isa_la_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libec_isa_la_CXXFLAGS) $(CXXFLAGS) -MT erasure-code/isa/libec_isa_la-xor_op.lo -MD -MP -MF erasure-code/isa/$(DEPDIR)/libec_isa_la-xor_op.Tpo -c -o erasure-code/isa/libec_isa_la-xor_op.lo `test -f 'erasure-code/isa/xor_op.cc' || echo '$(srcdir)/'`erasure-code/isa/xor_op.cc
- at am__fastdepCXX_TRUE@	$(AM_V_at)$(am__mv) erasure-code/isa/$(DEPDIR)/libec_isa_la-xor_op.Tpo erasure-code/isa/$(DEPDIR)/libec_isa_la-xor_op.Plo
- at AMDEP_TRUE@@am__fastdepCXX_FALSE@	$(AM_V_CXX)source='erasure-code/isa/xor_op.cc' object='erasure-code/isa/libec_isa_la-xor_op.lo' libtool=yes @AMDEPBACKSLASH@
- at AMDEP_TRUE@@am__fastdepCXX_FALSE@	DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
- at am__fastdepCXX_FALSE@	$(AM_V_CXX at am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CXX $(libec_isa_la_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libec_isa_la_CXXFLAGS) $(CXXFLAGS) -c -o erasure-code/isa/libec_isa_la-xor_op.lo `test -f 'erasure-code/isa/xor_op.cc' || echo '$(srcdir)/'`erasure-code/isa/xor_op.cc
-
 erasure-code/jerasure/libec_jerasure_la-ErasureCodePluginSelectJerasure.lo: erasure-code/jerasure/ErasureCodePluginSelectJerasure.cc
 @am__fastdepCXX_TRUE@	$(AM_V_CXX)$(LIBTOOL) $(AM_V_lt) --tag=CXX $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libec_jerasure_la_CXXFLAGS) $(CXXFLAGS) -MT erasure-code/jerasure/libec_jerasure_la-ErasureCodePluginSelectJerasure.lo -MD -MP -MF erasure-code/jerasure/$(DEPDIR)/libec_jerasure_la-ErasureCodePluginSelectJerasure.Tpo -c -o erasure-code/jerasure/libec_jerasure_la-ErasureCodePluginSelectJerasure.lo `te [...]
 @am__fastdepCXX_TRUE@	$(AM_V_at)$(am__mv) erasure-code/jerasure/$(DEPDIR)/libec_jerasure_la-ErasureCodePluginSelectJerasure.Tpo erasure-code/jerasure/$(DEPDIR)/libec_jerasure_la-ErasureCodePluginSelectJerasure.Plo
@@ -22100,6 +22818,41 @@ test/erasure-code/libec_test_shec_sse4_la-TestShecPluginSSE4.lo: test/erasure-co
 @AMDEP_TRUE@@am__fastdepCXX_FALSE@	DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
 @am__fastdepCXX_FALSE@	$(AM_V_CXX at am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CXX $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libec_test_shec_sse4_la_CXXFLAGS) $(CXXFLAGS) -c -o test/erasure-code/libec_test_shec_sse4_la-TestShecPluginSSE4.lo `test -f 'test/erasure-code/TestShecPluginSSE4.cc' || echo '$(srcdir)/'`test/erasure-code/TestShecPluginSSE4.cc
 
+erasure-code/libisa_la-ErasureCode.lo: erasure-code/ErasureCode.cc
+ at am__fastdepCXX_TRUE@	$(AM_V_CXX)$(LIBTOOL) $(AM_V_lt) --tag=CXX $(libisa_la_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libisa_la_CXXFLAGS) $(CXXFLAGS) -MT erasure-code/libisa_la-ErasureCode.lo -MD -MP -MF erasure-code/$(DEPDIR)/libisa_la-ErasureCode.Tpo -c -o erasure-code/libisa_la-ErasureCode.lo `test -f 'erasure-code/ErasureCode.cc' || echo '$(srcdir)/'`erasure-code/ErasureCode.cc
+ at am__fastdepCXX_TRUE@	$(AM_V_at)$(am__mv) erasure-code/$(DEPDIR)/libisa_la-ErasureCode.Tpo erasure-code/$(DEPDIR)/libisa_la-ErasureCode.Plo
+ at AMDEP_TRUE@@am__fastdepCXX_FALSE@	$(AM_V_CXX)source='erasure-code/ErasureCode.cc' object='erasure-code/libisa_la-ErasureCode.lo' libtool=yes @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCXX_FALSE@	DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCXX_FALSE@	$(AM_V_CXX at am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CXX $(libisa_la_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libisa_la_CXXFLAGS) $(CXXFLAGS) -c -o erasure-code/libisa_la-ErasureCode.lo `test -f 'erasure-code/ErasureCode.cc' || echo '$(srcdir)/'`erasure-code/ErasureCode.cc
+
+erasure-code/isa/libisa_la-ErasureCodeIsa.lo: erasure-code/isa/ErasureCodeIsa.cc
+ at am__fastdepCXX_TRUE@	$(AM_V_CXX)$(LIBTOOL) $(AM_V_lt) --tag=CXX $(libisa_la_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libisa_la_CXXFLAGS) $(CXXFLAGS) -MT erasure-code/isa/libisa_la-ErasureCodeIsa.lo -MD -MP -MF erasure-code/isa/$(DEPDIR)/libisa_la-ErasureCodeIsa.Tpo -c -o erasure-code/isa/libisa_la-ErasureCodeIsa.lo `test -f 'erasure-code/isa/ErasureCodeIsa.cc' || echo '$(srcdir)/'`erasure-code/isa/ErasureCodeIsa.cc
+ at am__fastdepCXX_TRUE@	$(AM_V_at)$(am__mv) erasure-code/isa/$(DEPDIR)/libisa_la-ErasureCodeIsa.Tpo erasure-code/isa/$(DEPDIR)/libisa_la-ErasureCodeIsa.Plo
+ at AMDEP_TRUE@@am__fastdepCXX_FALSE@	$(AM_V_CXX)source='erasure-code/isa/ErasureCodeIsa.cc' object='erasure-code/isa/libisa_la-ErasureCodeIsa.lo' libtool=yes @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCXX_FALSE@	DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCXX_FALSE@	$(AM_V_CXX at am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CXX $(libisa_la_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libisa_la_CXXFLAGS) $(CXXFLAGS) -c -o erasure-code/isa/libisa_la-ErasureCodeIsa.lo `test -f 'erasure-code/isa/ErasureCodeIsa.cc' || echo '$(srcdir)/'`erasure-code/isa/ErasureCodeIsa.cc
+
+erasure-code/isa/libisa_la-ErasureCodeIsaTableCache.lo: erasure-code/isa/ErasureCodeIsaTableCache.cc
+ at am__fastdepCXX_TRUE@	$(AM_V_CXX)$(LIBTOOL) $(AM_V_lt) --tag=CXX $(libisa_la_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libisa_la_CXXFLAGS) $(CXXFLAGS) -MT erasure-code/isa/libisa_la-ErasureCodeIsaTableCache.lo -MD -MP -MF erasure-code/isa/$(DEPDIR)/libisa_la-ErasureCodeIsaTableCache.Tpo -c -o erasure-code/isa/libisa_la-ErasureCodeIsaTableCache.lo `test -f 'erasure-code/isa/ErasureCodeIsaTableCache.cc' || echo  [...]
+ at am__fastdepCXX_TRUE@	$(AM_V_at)$(am__mv) erasure-code/isa/$(DEPDIR)/libisa_la-ErasureCodeIsaTableCache.Tpo erasure-code/isa/$(DEPDIR)/libisa_la-ErasureCodeIsaTableCache.Plo
+ at AMDEP_TRUE@@am__fastdepCXX_FALSE@	$(AM_V_CXX)source='erasure-code/isa/ErasureCodeIsaTableCache.cc' object='erasure-code/isa/libisa_la-ErasureCodeIsaTableCache.lo' libtool=yes @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCXX_FALSE@	DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCXX_FALSE@	$(AM_V_CXX at am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CXX $(libisa_la_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libisa_la_CXXFLAGS) $(CXXFLAGS) -c -o erasure-code/isa/libisa_la-ErasureCodeIsaTableCache.lo `test -f 'erasure-code/isa/ErasureCodeIsaTableCache.cc' || echo '$(srcdir)/'`erasure-code/isa/ErasureCodeIsaTableCache.cc
+
+erasure-code/isa/libisa_la-ErasureCodePluginIsa.lo: erasure-code/isa/ErasureCodePluginIsa.cc
+ at am__fastdepCXX_TRUE@	$(AM_V_CXX)$(LIBTOOL) $(AM_V_lt) --tag=CXX $(libisa_la_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libisa_la_CXXFLAGS) $(CXXFLAGS) -MT erasure-code/isa/libisa_la-ErasureCodePluginIsa.lo -MD -MP -MF erasure-code/isa/$(DEPDIR)/libisa_la-ErasureCodePluginIsa.Tpo -c -o erasure-code/isa/libisa_la-ErasureCodePluginIsa.lo `test -f 'erasure-code/isa/ErasureCodePluginIsa.cc' || echo '$(srcdir)/'`era [...]
+ at am__fastdepCXX_TRUE@	$(AM_V_at)$(am__mv) erasure-code/isa/$(DEPDIR)/libisa_la-ErasureCodePluginIsa.Tpo erasure-code/isa/$(DEPDIR)/libisa_la-ErasureCodePluginIsa.Plo
+ at AMDEP_TRUE@@am__fastdepCXX_FALSE@	$(AM_V_CXX)source='erasure-code/isa/ErasureCodePluginIsa.cc' object='erasure-code/isa/libisa_la-ErasureCodePluginIsa.lo' libtool=yes @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCXX_FALSE@	DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCXX_FALSE@	$(AM_V_CXX at am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CXX $(libisa_la_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libisa_la_CXXFLAGS) $(CXXFLAGS) -c -o erasure-code/isa/libisa_la-ErasureCodePluginIsa.lo `test -f 'erasure-code/isa/ErasureCodePluginIsa.cc' || echo '$(srcdir)/'`erasure-code/isa/ErasureCodePluginIsa.cc
+
+erasure-code/isa/libisa_la-xor_op.lo: erasure-code/isa/xor_op.cc
+ at am__fastdepCXX_TRUE@	$(AM_V_CXX)$(LIBTOOL) $(AM_V_lt) --tag=CXX $(libisa_la_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libisa_la_CXXFLAGS) $(CXXFLAGS) -MT erasure-code/isa/libisa_la-xor_op.lo -MD -MP -MF erasure-code/isa/$(DEPDIR)/libisa_la-xor_op.Tpo -c -o erasure-code/isa/libisa_la-xor_op.lo `test -f 'erasure-code/isa/xor_op.cc' || echo '$(srcdir)/'`erasure-code/isa/xor_op.cc
+ at am__fastdepCXX_TRUE@	$(AM_V_at)$(am__mv) erasure-code/isa/$(DEPDIR)/libisa_la-xor_op.Tpo erasure-code/isa/$(DEPDIR)/libisa_la-xor_op.Plo
+ at AMDEP_TRUE@@am__fastdepCXX_FALSE@	$(AM_V_CXX)source='erasure-code/isa/xor_op.cc' object='erasure-code/isa/libisa_la-xor_op.lo' libtool=yes @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCXX_FALSE@	DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCXX_FALSE@	$(AM_V_CXX at am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CXX $(libisa_la_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libisa_la_CXXFLAGS) $(CXXFLAGS) -c -o erasure-code/isa/libisa_la-xor_op.lo `test -f 'erasure-code/isa/xor_op.cc' || echo '$(srcdir)/'`erasure-code/isa/xor_op.cc
+
 osd/libosd_types_la-PGLog.lo: osd/PGLog.cc
 @am__fastdepCXX_TRUE@	$(AM_V_CXX)$(LIBTOOL) $(AM_V_lt) --tag=CXX $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libosd_types_la_CXXFLAGS) $(CXXFLAGS) -MT osd/libosd_types_la-PGLog.lo -MD -MP -MF osd/$(DEPDIR)/libosd_types_la-PGLog.Tpo -c -o osd/libosd_types_la-PGLog.lo `test -f 'osd/PGLog.cc' || echo '$(srcdir)/'`osd/PGLog.cc
 @am__fastdepCXX_TRUE@	$(AM_V_at)$(am__mv) osd/$(DEPDIR)/libosd_types_la-PGLog.Tpo osd/$(DEPDIR)/libosd_types_la-PGLog.Plo
@@ -22184,6 +22937,20 @@ librbd/librbd_la-librbd.lo: librbd/librbd.cc
 @AMDEP_TRUE@@am__fastdepCXX_FALSE@	DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
 @am__fastdepCXX_FALSE@	$(AM_V_CXX at am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CXX $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(librbd_la_CXXFLAGS) $(CXXFLAGS) -c -o librbd/librbd_la-librbd.lo `test -f 'librbd/librbd.cc' || echo '$(srcdir)/'`librbd/librbd.cc
 
+test/rbd_mirror/librbd_mirror_test_la-test_ClusterWatcher.lo: test/rbd_mirror/test_ClusterWatcher.cc
+ at am__fastdepCXX_TRUE@	$(AM_V_CXX)$(LIBTOOL) $(AM_V_lt) --tag=CXX $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(librbd_mirror_test_la_CXXFLAGS) $(CXXFLAGS) -MT test/rbd_mirror/librbd_mirror_test_la-test_ClusterWatcher.lo -MD -MP -MF test/rbd_mirror/$(DEPDIR)/librbd_mirror_test_la-test_ClusterWatcher.Tpo -c -o test/rbd_mirror/librbd_mirror_test_la-test_ClusterWatcher.lo `test -f 'test/rbd_mirror/test_ClusterWat [...]
+ at am__fastdepCXX_TRUE@	$(AM_V_at)$(am__mv) test/rbd_mirror/$(DEPDIR)/librbd_mirror_test_la-test_ClusterWatcher.Tpo test/rbd_mirror/$(DEPDIR)/librbd_mirror_test_la-test_ClusterWatcher.Plo
+ at AMDEP_TRUE@@am__fastdepCXX_FALSE@	$(AM_V_CXX)source='test/rbd_mirror/test_ClusterWatcher.cc' object='test/rbd_mirror/librbd_mirror_test_la-test_ClusterWatcher.lo' libtool=yes @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCXX_FALSE@	DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCXX_FALSE@	$(AM_V_CXX at am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CXX $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(librbd_mirror_test_la_CXXFLAGS) $(CXXFLAGS) -c -o test/rbd_mirror/librbd_mirror_test_la-test_ClusterWatcher.lo `test -f 'test/rbd_mirror/test_ClusterWatcher.cc' || echo '$(srcdir)/'`test/rbd_mirror/test_ClusterWatcher.cc
+
+test/rbd_mirror/librbd_mirror_test_la-test_PoolWatcher.lo: test/rbd_mirror/test_PoolWatcher.cc
+ at am__fastdepCXX_TRUE@	$(AM_V_CXX)$(LIBTOOL) $(AM_V_lt) --tag=CXX $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(librbd_mirror_test_la_CXXFLAGS) $(CXXFLAGS) -MT test/rbd_mirror/librbd_mirror_test_la-test_PoolWatcher.lo -MD -MP -MF test/rbd_mirror/$(DEPDIR)/librbd_mirror_test_la-test_PoolWatcher.Tpo -c -o test/rbd_mirror/librbd_mirror_test_la-test_PoolWatcher.lo `test -f 'test/rbd_mirror/test_PoolWatcher.cc' ||  [...]
+ at am__fastdepCXX_TRUE@	$(AM_V_at)$(am__mv) test/rbd_mirror/$(DEPDIR)/librbd_mirror_test_la-test_PoolWatcher.Tpo test/rbd_mirror/$(DEPDIR)/librbd_mirror_test_la-test_PoolWatcher.Plo
+ at AMDEP_TRUE@@am__fastdepCXX_FALSE@	$(AM_V_CXX)source='test/rbd_mirror/test_PoolWatcher.cc' object='test/rbd_mirror/librbd_mirror_test_la-test_PoolWatcher.lo' libtool=yes @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCXX_FALSE@	DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCXX_FALSE@	$(AM_V_CXX at am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CXX $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(librbd_mirror_test_la_CXXFLAGS) $(CXXFLAGS) -c -o test/rbd_mirror/librbd_mirror_test_la-test_PoolWatcher.lo `test -f 'test/rbd_mirror/test_PoolWatcher.cc' || echo '$(srcdir)/'`test/rbd_mirror/test_PoolWatcher.cc
+
 test/librbd/librbd_test_la-test_fixture.lo: test/librbd/test_fixture.cc
 @am__fastdepCXX_TRUE@	$(AM_V_CXX)$(LIBTOOL) $(AM_V_lt) --tag=CXX $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(librbd_test_la_CXXFLAGS) $(CXXFLAGS) -MT test/librbd/librbd_test_la-test_fixture.lo -MD -MP -MF test/librbd/$(DEPDIR)/librbd_test_la-test_fixture.Tpo -c -o test/librbd/librbd_test_la-test_fixture.lo `test -f 'test/librbd/test_fixture.cc' || echo '$(srcdir)/'`test/librbd/test_fixture.cc
 @am__fastdepCXX_TRUE@	$(AM_V_at)$(am__mv) test/librbd/$(DEPDIR)/librbd_test_la-test_fixture.Tpo test/librbd/$(DEPDIR)/librbd_test_la-test_fixture.Plo
@@ -22303,6 +23070,13 @@ rgw/librgw_la-rgw_json_enc.lo: rgw/rgw_json_enc.cc
 @AMDEP_TRUE@@am__fastdepCXX_FALSE@	DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
 @am__fastdepCXX_FALSE@	$(AM_V_CXX at am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CXX $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(librgw_la_CXXFLAGS) $(CXXFLAGS) -c -o rgw/librgw_la-rgw_json_enc.lo `test -f 'rgw/rgw_json_enc.cc' || echo '$(srcdir)/'`rgw/rgw_json_enc.cc
 
+rgw/librgw_la-rgw_xml_enc.lo: rgw/rgw_xml_enc.cc
+ at am__fastdepCXX_TRUE@	$(AM_V_CXX)$(LIBTOOL) $(AM_V_lt) --tag=CXX $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(librgw_la_CXXFLAGS) $(CXXFLAGS) -MT rgw/librgw_la-rgw_xml_enc.lo -MD -MP -MF rgw/$(DEPDIR)/librgw_la-rgw_xml_enc.Tpo -c -o rgw/librgw_la-rgw_xml_enc.lo `test -f 'rgw/rgw_xml_enc.cc' || echo '$(srcdir)/'`rgw/rgw_xml_enc.cc
+ at am__fastdepCXX_TRUE@	$(AM_V_at)$(am__mv) rgw/$(DEPDIR)/librgw_la-rgw_xml_enc.Tpo rgw/$(DEPDIR)/librgw_la-rgw_xml_enc.Plo
+ at AMDEP_TRUE@@am__fastdepCXX_FALSE@	$(AM_V_CXX)source='rgw/rgw_xml_enc.cc' object='rgw/librgw_la-rgw_xml_enc.lo' libtool=yes @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCXX_FALSE@	DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCXX_FALSE@	$(AM_V_CXX at am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CXX $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(librgw_la_CXXFLAGS) $(CXXFLAGS) -c -o rgw/librgw_la-rgw_xml_enc.lo `test -f 'rgw/rgw_xml_enc.cc' || echo '$(srcdir)/'`rgw/rgw_xml_enc.cc
+
 rgw/librgw_la-rgw_user.lo: rgw/rgw_user.cc
 @am__fastdepCXX_TRUE@	$(AM_V_CXX)$(LIBTOOL) $(AM_V_lt) --tag=CXX $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(librgw_la_CXXFLAGS) $(CXXFLAGS) -MT rgw/librgw_la-rgw_user.lo -MD -MP -MF rgw/$(DEPDIR)/librgw_la-rgw_user.Tpo -c -o rgw/librgw_la-rgw_user.lo `test -f 'rgw/rgw_user.cc' || echo '$(srcdir)/'`rgw/rgw_user.cc
 @am__fastdepCXX_TRUE@	$(AM_V_at)$(am__mv) rgw/$(DEPDIR)/librgw_la-rgw_user.Tpo rgw/$(DEPDIR)/librgw_la-rgw_user.Plo
@@ -22492,6 +23266,13 @@ rgw/librgw_la-rgw_object_expirer_core.lo: rgw/rgw_object_expirer_core.cc
 @AMDEP_TRUE@@am__fastdepCXX_FALSE@	DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
 @am__fastdepCXX_FALSE@	$(AM_V_CXX at am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CXX $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(librgw_la_CXXFLAGS) $(CXXFLAGS) -c -o rgw/librgw_la-rgw_object_expirer_core.lo `test -f 'rgw/rgw_object_expirer_core.cc' || echo '$(srcdir)/'`rgw/rgw_object_expirer_core.cc
 
+rgw/librgw_la-rgw_website.lo: rgw/rgw_website.cc
+ at am__fastdepCXX_TRUE@	$(AM_V_CXX)$(LIBTOOL) $(AM_V_lt) --tag=CXX $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(librgw_la_CXXFLAGS) $(CXXFLAGS) -MT rgw/librgw_la-rgw_website.lo -MD -MP -MF rgw/$(DEPDIR)/librgw_la-rgw_website.Tpo -c -o rgw/librgw_la-rgw_website.lo `test -f 'rgw/rgw_website.cc' || echo '$(srcdir)/'`rgw/rgw_website.cc
+ at am__fastdepCXX_TRUE@	$(AM_V_at)$(am__mv) rgw/$(DEPDIR)/librgw_la-rgw_website.Tpo rgw/$(DEPDIR)/librgw_la-rgw_website.Plo
+ at AMDEP_TRUE@@am__fastdepCXX_FALSE@	$(AM_V_CXX)source='rgw/rgw_website.cc' object='rgw/librgw_la-rgw_website.lo' libtool=yes @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCXX_FALSE@	DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCXX_FALSE@	$(AM_V_CXX at am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CXX $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(librgw_la_CXXFLAGS) $(CXXFLAGS) -c -o rgw/librgw_la-rgw_website.lo `test -f 'rgw/rgw_website.cc' || echo '$(srcdir)/'`rgw/rgw_website.cc
+
 test/encoding/ceph_dencoder-ceph_dencoder.o: test/encoding/ceph_dencoder.cc
 @am__fastdepCXX_TRUE@	$(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(ceph_dencoder_CXXFLAGS) $(CXXFLAGS) -MT test/encoding/ceph_dencoder-ceph_dencoder.o -MD -MP -MF test/encoding/$(DEPDIR)/ceph_dencoder-ceph_dencoder.Tpo -c -o test/encoding/ceph_dencoder-ceph_dencoder.o `test -f 'test/encoding/ceph_dencoder.cc' || echo '$(srcdir)/'`test/encoding/ceph_dencoder.cc
 @am__fastdepCXX_TRUE@	$(AM_V_at)$(am__mv) test/encoding/$(DEPDIR)/ceph_dencoder-ceph_dencoder.Tpo test/encoding/$(DEPDIR)/ceph_dencoder-ceph_dencoder.Po
@@ -23948,6 +24729,20 @@ test/libradosstriper/ceph_test_rados_striper_api_striping-striping.obj: test/lib
 @AMDEP_TRUE@@am__fastdepCXX_FALSE@	DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
 @am__fastdepCXX_FALSE@	$(AM_V_CXX at am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(ceph_test_rados_striper_api_striping_CXXFLAGS) $(CXXFLAGS) -c -o test/libradosstriper/ceph_test_rados_striper_api_striping-striping.obj `if test -f 'test/libradosstriper/striping.cc'; then $(CYGPATH_W) 'test/libradosstriper/striping.cc'; else $(CYGPATH_W) '$(srcdir)/test/libradosstriper/striping.cc'; fi`
 
+test/rbd_mirror/ceph_test_rbd_mirror-test_main.o: test/rbd_mirror/test_main.cc
+ at am__fastdepCXX_TRUE@	$(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(ceph_test_rbd_mirror_CXXFLAGS) $(CXXFLAGS) -MT test/rbd_mirror/ceph_test_rbd_mirror-test_main.o -MD -MP -MF test/rbd_mirror/$(DEPDIR)/ceph_test_rbd_mirror-test_main.Tpo -c -o test/rbd_mirror/ceph_test_rbd_mirror-test_main.o `test -f 'test/rbd_mirror/test_main.cc' || echo '$(srcdir)/'`test/rbd_mirror/test_main.cc
+ at am__fastdepCXX_TRUE@	$(AM_V_at)$(am__mv) test/rbd_mirror/$(DEPDIR)/ceph_test_rbd_mirror-test_main.Tpo test/rbd_mirror/$(DEPDIR)/ceph_test_rbd_mirror-test_main.Po
+ at AMDEP_TRUE@@am__fastdepCXX_FALSE@	$(AM_V_CXX)source='test/rbd_mirror/test_main.cc' object='test/rbd_mirror/ceph_test_rbd_mirror-test_main.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCXX_FALSE@	DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCXX_FALSE@	$(AM_V_CXX at am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(ceph_test_rbd_mirror_CXXFLAGS) $(CXXFLAGS) -c -o test/rbd_mirror/ceph_test_rbd_mirror-test_main.o `test -f 'test/rbd_mirror/test_main.cc' || echo '$(srcdir)/'`test/rbd_mirror/test_main.cc
+
+test/rbd_mirror/ceph_test_rbd_mirror-test_main.obj: test/rbd_mirror/test_main.cc
+ at am__fastdepCXX_TRUE@	$(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(ceph_test_rbd_mirror_CXXFLAGS) $(CXXFLAGS) -MT test/rbd_mirror/ceph_test_rbd_mirror-test_main.obj -MD -MP -MF test/rbd_mirror/$(DEPDIR)/ceph_test_rbd_mirror-test_main.Tpo -c -o test/rbd_mirror/ceph_test_rbd_mirror-test_main.obj `if test -f 'test/rbd_mirror/test_main.cc'; then $(CYGPATH_W) 'test/rbd_mirror/test_main.cc'; else $(CYGPATH_W) '$(srcdir)/test/rbd_mirror/test_main.cc'; fi`
+ at am__fastdepCXX_TRUE@	$(AM_V_at)$(am__mv) test/rbd_mirror/$(DEPDIR)/ceph_test_rbd_mirror-test_main.Tpo test/rbd_mirror/$(DEPDIR)/ceph_test_rbd_mirror-test_main.Po
+ at AMDEP_TRUE@@am__fastdepCXX_FALSE@	$(AM_V_CXX)source='test/rbd_mirror/test_main.cc' object='test/rbd_mirror/ceph_test_rbd_mirror-test_main.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCXX_FALSE@	DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCXX_FALSE@	$(AM_V_CXX at am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(ceph_test_rbd_mirror_CXXFLAGS) $(CXXFLAGS) -c -o test/rbd_mirror/ceph_test_rbd_mirror-test_main.obj `if test -f 'test/rbd_mirror/test_main.cc'; then $(CYGPATH_W) 'test/rbd_mirror/test_main.cc'; else $(CYGPATH_W) '$(srcdir)/test/rbd_mirror/test_main.cc'; fi`
+
 test/rgw/ceph_test_rgw_manifest-test_rgw_manifest.o: test/rgw/test_rgw_manifest.cc
 @am__fastdepCXX_TRUE@	$(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(ceph_test_rgw_manifest_CXXFLAGS) $(CXXFLAGS) -MT test/rgw/ceph_test_rgw_manifest-test_rgw_manifest.o -MD -MP -MF test/rgw/$(DEPDIR)/ceph_test_rgw_manifest-test_rgw_manifest.Tpo -c -o test/rgw/ceph_test_rgw_manifest-test_rgw_manifest.o `test -f 'test/rgw/test_rgw_manifest.cc' || echo '$(srcdir)/'`test/rgw/test_rgw_manifest.cc
 @am__fastdepCXX_TRUE@	$(AM_V_at)$(am__mv) test/rgw/$(DEPDIR)/ceph_test_rgw_manifest-test_rgw_manifest.Tpo test/rgw/$(DEPDIR)/ceph_test_rgw_manifest-test_rgw_manifest.Po
@@ -24396,6 +25191,20 @@ rgw/test_build_librgw-rgw_json_enc.obj: rgw/rgw_json_enc.cc
 @AMDEP_TRUE@@am__fastdepCXX_FALSE@	DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
 @am__fastdepCXX_FALSE@	$(AM_V_CXX at am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(test_build_librgw_CXXFLAGS) $(CXXFLAGS) -c -o rgw/test_build_librgw-rgw_json_enc.obj `if test -f 'rgw/rgw_json_enc.cc'; then $(CYGPATH_W) 'rgw/rgw_json_enc.cc'; else $(CYGPATH_W) '$(srcdir)/rgw/rgw_json_enc.cc'; fi`
 
+rgw/test_build_librgw-rgw_xml_enc.o: rgw/rgw_xml_enc.cc
+ at am__fastdepCXX_TRUE@	$(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(test_build_librgw_CXXFLAGS) $(CXXFLAGS) -MT rgw/test_build_librgw-rgw_xml_enc.o -MD -MP -MF rgw/$(DEPDIR)/test_build_librgw-rgw_xml_enc.Tpo -c -o rgw/test_build_librgw-rgw_xml_enc.o `test -f 'rgw/rgw_xml_enc.cc' || echo '$(srcdir)/'`rgw/rgw_xml_enc.cc
+ at am__fastdepCXX_TRUE@	$(AM_V_at)$(am__mv) rgw/$(DEPDIR)/test_build_librgw-rgw_xml_enc.Tpo rgw/$(DEPDIR)/test_build_librgw-rgw_xml_enc.Po
+ at AMDEP_TRUE@@am__fastdepCXX_FALSE@	$(AM_V_CXX)source='rgw/rgw_xml_enc.cc' object='rgw/test_build_librgw-rgw_xml_enc.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCXX_FALSE@	DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCXX_FALSE@	$(AM_V_CXX at am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(test_build_librgw_CXXFLAGS) $(CXXFLAGS) -c -o rgw/test_build_librgw-rgw_xml_enc.o `test -f 'rgw/rgw_xml_enc.cc' || echo '$(srcdir)/'`rgw/rgw_xml_enc.cc
+
+rgw/test_build_librgw-rgw_xml_enc.obj: rgw/rgw_xml_enc.cc
+ at am__fastdepCXX_TRUE@	$(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(test_build_librgw_CXXFLAGS) $(CXXFLAGS) -MT rgw/test_build_librgw-rgw_xml_enc.obj -MD -MP -MF rgw/$(DEPDIR)/test_build_librgw-rgw_xml_enc.Tpo -c -o rgw/test_build_librgw-rgw_xml_enc.obj `if test -f 'rgw/rgw_xml_enc.cc'; then $(CYGPATH_W) 'rgw/rgw_xml_enc.cc'; else $(CYGPATH_W) '$(srcdir)/rgw/rgw_xml_enc.cc'; fi`
+ at am__fastdepCXX_TRUE@	$(AM_V_at)$(am__mv) rgw/$(DEPDIR)/test_build_librgw-rgw_xml_enc.Tpo rgw/$(DEPDIR)/test_build_librgw-rgw_xml_enc.Po
+ at AMDEP_TRUE@@am__fastdepCXX_FALSE@	$(AM_V_CXX)source='rgw/rgw_xml_enc.cc' object='rgw/test_build_librgw-rgw_xml_enc.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCXX_FALSE@	DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCXX_FALSE@	$(AM_V_CXX at am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(test_build_librgw_CXXFLAGS) $(CXXFLAGS) -c -o rgw/test_build_librgw-rgw_xml_enc.obj `if test -f 'rgw/rgw_xml_enc.cc'; then $(CYGPATH_W) 'rgw/rgw_xml_enc.cc'; else $(CYGPATH_W) '$(srcdir)/rgw/rgw_xml_enc.cc'; fi`
+
 rgw/test_build_librgw-rgw_user.o: rgw/rgw_user.cc
 @am__fastdepCXX_TRUE@	$(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(test_build_librgw_CXXFLAGS) $(CXXFLAGS) -MT rgw/test_build_librgw-rgw_user.o -MD -MP -MF rgw/$(DEPDIR)/test_build_librgw-rgw_user.Tpo -c -o rgw/test_build_librgw-rgw_user.o `test -f 'rgw/rgw_user.cc' || echo '$(srcdir)/'`rgw/rgw_user.cc
 @am__fastdepCXX_TRUE@	$(AM_V_at)$(am__mv) rgw/$(DEPDIR)/test_build_librgw-rgw_user.Tpo rgw/$(DEPDIR)/test_build_librgw-rgw_user.Po
@@ -24774,6 +25583,20 @@ rgw/test_build_librgw-rgw_object_expirer_core.obj: rgw/rgw_object_expirer_core.c
 @AMDEP_TRUE@@am__fastdepCXX_FALSE@	DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
 @am__fastdepCXX_FALSE@	$(AM_V_CXX at am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(test_build_librgw_CXXFLAGS) $(CXXFLAGS) -c -o rgw/test_build_librgw-rgw_object_expirer_core.obj `if test -f 'rgw/rgw_object_expirer_core.cc'; then $(CYGPATH_W) 'rgw/rgw_object_expirer_core.cc'; else $(CYGPATH_W) '$(srcdir)/rgw/rgw_object_expirer_core.cc'; fi`
 
+rgw/test_build_librgw-rgw_website.o: rgw/rgw_website.cc
+ at am__fastdepCXX_TRUE@	$(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(test_build_librgw_CXXFLAGS) $(CXXFLAGS) -MT rgw/test_build_librgw-rgw_website.o -MD -MP -MF rgw/$(DEPDIR)/test_build_librgw-rgw_website.Tpo -c -o rgw/test_build_librgw-rgw_website.o `test -f 'rgw/rgw_website.cc' || echo '$(srcdir)/'`rgw/rgw_website.cc
+ at am__fastdepCXX_TRUE@	$(AM_V_at)$(am__mv) rgw/$(DEPDIR)/test_build_librgw-rgw_website.Tpo rgw/$(DEPDIR)/test_build_librgw-rgw_website.Po
+ at AMDEP_TRUE@@am__fastdepCXX_FALSE@	$(AM_V_CXX)source='rgw/rgw_website.cc' object='rgw/test_build_librgw-rgw_website.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCXX_FALSE@	DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCXX_FALSE@	$(AM_V_CXX at am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(test_build_librgw_CXXFLAGS) $(CXXFLAGS) -c -o rgw/test_build_librgw-rgw_website.o `test -f 'rgw/rgw_website.cc' || echo '$(srcdir)/'`rgw/rgw_website.cc
+
+rgw/test_build_librgw-rgw_website.obj: rgw/rgw_website.cc
+ at am__fastdepCXX_TRUE@	$(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(test_build_librgw_CXXFLAGS) $(CXXFLAGS) -MT rgw/test_build_librgw-rgw_website.obj -MD -MP -MF rgw/$(DEPDIR)/test_build_librgw-rgw_website.Tpo -c -o rgw/test_build_librgw-rgw_website.obj `if test -f 'rgw/rgw_website.cc'; then $(CYGPATH_W) 'rgw/rgw_website.cc'; else $(CYGPATH_W) '$(srcdir)/rgw/rgw_website.cc'; fi`
+ at am__fastdepCXX_TRUE@	$(AM_V_at)$(am__mv) rgw/$(DEPDIR)/test_build_librgw-rgw_website.Tpo rgw/$(DEPDIR)/test_build_librgw-rgw_website.Po
+ at AMDEP_TRUE@@am__fastdepCXX_FALSE@	$(AM_V_CXX)source='rgw/rgw_website.cc' object='rgw/test_build_librgw-rgw_website.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCXX_FALSE@	DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCXX_FALSE@	$(AM_V_CXX at am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(test_build_librgw_CXXFLAGS) $(CXXFLAGS) -c -o rgw/test_build_librgw-rgw_website.obj `if test -f 'rgw/rgw_website.cc'; then $(CYGPATH_W) 'rgw/rgw_website.cc'; else $(CYGPATH_W) '$(srcdir)/rgw/rgw_website.cc'; fi`
+
 test/unittest_addrs-test_addrs.o: test/test_addrs.cc
 @am__fastdepCXX_TRUE@	$(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(unittest_addrs_CXXFLAGS) $(CXXFLAGS) -MT test/unittest_addrs-test_addrs.o -MD -MP -MF test/$(DEPDIR)/unittest_addrs-test_addrs.Tpo -c -o test/unittest_addrs-test_addrs.o `test -f 'test/test_addrs.cc' || echo '$(srcdir)/'`test/test_addrs.cc
 @am__fastdepCXX_TRUE@	$(AM_V_at)$(am__mv) test/$(DEPDIR)/unittest_addrs-test_addrs.Tpo test/$(DEPDIR)/unittest_addrs-test_addrs.Po
@@ -25054,6 +25877,62 @@ compressor/snappy/unittest_compression_plugin_snappy-CompressionPluginSnappy.obj
 @AMDEP_TRUE@@am__fastdepCXX_FALSE@	DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
 @am__fastdepCXX_FALSE@	$(AM_V_CXX at am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(unittest_compression_plugin_snappy_CXXFLAGS) $(CXXFLAGS) -c -o compressor/snappy/unittest_compression_plugin_snappy-CompressionPluginSnappy.obj `if test -f 'compressor/snappy/CompressionPluginSnappy.cc'; then $(CYGPATH_W) 'compressor/snappy/CompressionPluginSnappy.cc'; else $(CYGPATH_W) '$(srcdir)/compressor/snappy/CompressionPluginSnappy.cc'; fi`
 
+test/compressor/unittest_compression_plugin_zlib-test_compression_plugin_zlib.o: test/compressor/test_compression_plugin_zlib.cc
+ at am__fastdepCXX_TRUE@	$(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(unittest_compression_plugin_zlib_CXXFLAGS) $(CXXFLAGS) -MT test/compressor/unittest_compression_plugin_zlib-test_compression_plugin_zlib.o -MD -MP -MF test/compressor/$(DEPDIR)/unittest_compression_plugin_zlib-test_compression_plugin_zlib.Tpo -c -o test/compressor/unittest_compression_plugin_zlib-test_compression_plugin_zlib.o `test -f 'test/compressor/test_compression_plugin_zlib [...]
+ at am__fastdepCXX_TRUE@	$(AM_V_at)$(am__mv) test/compressor/$(DEPDIR)/unittest_compression_plugin_zlib-test_compression_plugin_zlib.Tpo test/compressor/$(DEPDIR)/unittest_compression_plugin_zlib-test_compression_plugin_zlib.Po
+ at AMDEP_TRUE@@am__fastdepCXX_FALSE@	$(AM_V_CXX)source='test/compressor/test_compression_plugin_zlib.cc' object='test/compressor/unittest_compression_plugin_zlib-test_compression_plugin_zlib.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCXX_FALSE@	DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCXX_FALSE@	$(AM_V_CXX at am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(unittest_compression_plugin_zlib_CXXFLAGS) $(CXXFLAGS) -c -o test/compressor/unittest_compression_plugin_zlib-test_compression_plugin_zlib.o `test -f 'test/compressor/test_compression_plugin_zlib.cc' || echo '$(srcdir)/'`test/compressor/test_compression_plugin_zlib.cc
+
+test/compressor/unittest_compression_plugin_zlib-test_compression_plugin_zlib.obj: test/compressor/test_compression_plugin_zlib.cc
+ at am__fastdepCXX_TRUE@	$(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(unittest_compression_plugin_zlib_CXXFLAGS) $(CXXFLAGS) -MT test/compressor/unittest_compression_plugin_zlib-test_compression_plugin_zlib.obj -MD -MP -MF test/compressor/$(DEPDIR)/unittest_compression_plugin_zlib-test_compression_plugin_zlib.Tpo -c -o test/compressor/unittest_compression_plugin_zlib-test_compression_plugin_zlib.obj `if test -f 'test/compressor/test_compression_plug [...]
+ at am__fastdepCXX_TRUE@	$(AM_V_at)$(am__mv) test/compressor/$(DEPDIR)/unittest_compression_plugin_zlib-test_compression_plugin_zlib.Tpo test/compressor/$(DEPDIR)/unittest_compression_plugin_zlib-test_compression_plugin_zlib.Po
+ at AMDEP_TRUE@@am__fastdepCXX_FALSE@	$(AM_V_CXX)source='test/compressor/test_compression_plugin_zlib.cc' object='test/compressor/unittest_compression_plugin_zlib-test_compression_plugin_zlib.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCXX_FALSE@	DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCXX_FALSE@	$(AM_V_CXX at am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(unittest_compression_plugin_zlib_CXXFLAGS) $(CXXFLAGS) -c -o test/compressor/unittest_compression_plugin_zlib-test_compression_plugin_zlib.obj `if test -f 'test/compressor/test_compression_plugin_zlib.cc'; then $(CYGPATH_W) 'test/compressor/test_compression_plugin_zlib.cc'; else $(CYGPATH_W) '$(srcdir)/test/compressor/test_compression_plugin_zlib.cc'; fi`
+
+compressor/unittest_compression_plugin_zlib-Compressor.o: compressor/Compressor.cc
+ at am__fastdepCXX_TRUE@	$(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(unittest_compression_plugin_zlib_CXXFLAGS) $(CXXFLAGS) -MT compressor/unittest_compression_plugin_zlib-Compressor.o -MD -MP -MF compressor/$(DEPDIR)/unittest_compression_plugin_zlib-Compressor.Tpo -c -o compressor/unittest_compression_plugin_zlib-Compressor.o `test -f 'compressor/Compressor.cc' || echo '$(srcdir)/'`compressor/Compressor.cc
+ at am__fastdepCXX_TRUE@	$(AM_V_at)$(am__mv) compressor/$(DEPDIR)/unittest_compression_plugin_zlib-Compressor.Tpo compressor/$(DEPDIR)/unittest_compression_plugin_zlib-Compressor.Po
+ at AMDEP_TRUE@@am__fastdepCXX_FALSE@	$(AM_V_CXX)source='compressor/Compressor.cc' object='compressor/unittest_compression_plugin_zlib-Compressor.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCXX_FALSE@	DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCXX_FALSE@	$(AM_V_CXX at am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(unittest_compression_plugin_zlib_CXXFLAGS) $(CXXFLAGS) -c -o compressor/unittest_compression_plugin_zlib-Compressor.o `test -f 'compressor/Compressor.cc' || echo '$(srcdir)/'`compressor/Compressor.cc
+
+compressor/unittest_compression_plugin_zlib-Compressor.obj: compressor/Compressor.cc
+ at am__fastdepCXX_TRUE@	$(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(unittest_compression_plugin_zlib_CXXFLAGS) $(CXXFLAGS) -MT compressor/unittest_compression_plugin_zlib-Compressor.obj -MD -MP -MF compressor/$(DEPDIR)/unittest_compression_plugin_zlib-Compressor.Tpo -c -o compressor/unittest_compression_plugin_zlib-Compressor.obj `if test -f 'compressor/Compressor.cc'; then $(CYGPATH_W) 'compressor/Compressor.cc'; else $(CYGPATH_W) '$(srcdir)/comp [...]
+ at am__fastdepCXX_TRUE@	$(AM_V_at)$(am__mv) compressor/$(DEPDIR)/unittest_compression_plugin_zlib-Compressor.Tpo compressor/$(DEPDIR)/unittest_compression_plugin_zlib-Compressor.Po
+ at AMDEP_TRUE@@am__fastdepCXX_FALSE@	$(AM_V_CXX)source='compressor/Compressor.cc' object='compressor/unittest_compression_plugin_zlib-Compressor.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCXX_FALSE@	DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCXX_FALSE@	$(AM_V_CXX at am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(unittest_compression_plugin_zlib_CXXFLAGS) $(CXXFLAGS) -c -o compressor/unittest_compression_plugin_zlib-Compressor.obj `if test -f 'compressor/Compressor.cc'; then $(CYGPATH_W) 'compressor/Compressor.cc'; else $(CYGPATH_W) '$(srcdir)/compressor/Compressor.cc'; fi`
+
+compressor/zlib/unittest_compression_plugin_zlib-CompressionPluginZlib.o: compressor/zlib/CompressionPluginZlib.cc
+ at am__fastdepCXX_TRUE@	$(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(unittest_compression_plugin_zlib_CXXFLAGS) $(CXXFLAGS) -MT compressor/zlib/unittest_compression_plugin_zlib-CompressionPluginZlib.o -MD -MP -MF compressor/zlib/$(DEPDIR)/unittest_compression_plugin_zlib-CompressionPluginZlib.Tpo -c -o compressor/zlib/unittest_compression_plugin_zlib-CompressionPluginZlib.o `test -f 'compressor/zlib/CompressionPluginZlib.cc' || echo '$(srcdir)/'`co [...]
+ at am__fastdepCXX_TRUE@	$(AM_V_at)$(am__mv) compressor/zlib/$(DEPDIR)/unittest_compression_plugin_zlib-CompressionPluginZlib.Tpo compressor/zlib/$(DEPDIR)/unittest_compression_plugin_zlib-CompressionPluginZlib.Po
+ at AMDEP_TRUE@@am__fastdepCXX_FALSE@	$(AM_V_CXX)source='compressor/zlib/CompressionPluginZlib.cc' object='compressor/zlib/unittest_compression_plugin_zlib-CompressionPluginZlib.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCXX_FALSE@	DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCXX_FALSE@	$(AM_V_CXX at am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(unittest_compression_plugin_zlib_CXXFLAGS) $(CXXFLAGS) -c -o compressor/zlib/unittest_compression_plugin_zlib-CompressionPluginZlib.o `test -f 'compressor/zlib/CompressionPluginZlib.cc' || echo '$(srcdir)/'`compressor/zlib/CompressionPluginZlib.cc
+
+compressor/zlib/unittest_compression_plugin_zlib-CompressionPluginZlib.obj: compressor/zlib/CompressionPluginZlib.cc
+ at am__fastdepCXX_TRUE@	$(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(unittest_compression_plugin_zlib_CXXFLAGS) $(CXXFLAGS) -MT compressor/zlib/unittest_compression_plugin_zlib-CompressionPluginZlib.obj -MD -MP -MF compressor/zlib/$(DEPDIR)/unittest_compression_plugin_zlib-CompressionPluginZlib.Tpo -c -o compressor/zlib/unittest_compression_plugin_zlib-CompressionPluginZlib.obj `if test -f 'compressor/zlib/CompressionPluginZlib.cc'; then $(CYGPATH_ [...]
+ at am__fastdepCXX_TRUE@	$(AM_V_at)$(am__mv) compressor/zlib/$(DEPDIR)/unittest_compression_plugin_zlib-CompressionPluginZlib.Tpo compressor/zlib/$(DEPDIR)/unittest_compression_plugin_zlib-CompressionPluginZlib.Po
+ at AMDEP_TRUE@@am__fastdepCXX_FALSE@	$(AM_V_CXX)source='compressor/zlib/CompressionPluginZlib.cc' object='compressor/zlib/unittest_compression_plugin_zlib-CompressionPluginZlib.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCXX_FALSE@	DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCXX_FALSE@	$(AM_V_CXX at am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(unittest_compression_plugin_zlib_CXXFLAGS) $(CXXFLAGS) -c -o compressor/zlib/unittest_compression_plugin_zlib-CompressionPluginZlib.obj `if test -f 'compressor/zlib/CompressionPluginZlib.cc'; then $(CYGPATH_W) 'compressor/zlib/CompressionPluginZlib.cc'; else $(CYGPATH_W) '$(srcdir)/compressor/zlib/CompressionPluginZlib.cc'; fi`
+
+compressor/zlib/unittest_compression_plugin_zlib-CompressionZlib.o: compressor/zlib/CompressionZlib.cc
+ at am__fastdepCXX_TRUE@	$(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(unittest_compression_plugin_zlib_CXXFLAGS) $(CXXFLAGS) -MT compressor/zlib/unittest_compression_plugin_zlib-CompressionZlib.o -MD -MP -MF compressor/zlib/$(DEPDIR)/unittest_compression_plugin_zlib-CompressionZlib.Tpo -c -o compressor/zlib/unittest_compression_plugin_zlib-CompressionZlib.o `test -f 'compressor/zlib/CompressionZlib.cc' || echo '$(srcdir)/'`compressor/zlib/Compressio [...]
+ at am__fastdepCXX_TRUE@	$(AM_V_at)$(am__mv) compressor/zlib/$(DEPDIR)/unittest_compression_plugin_zlib-CompressionZlib.Tpo compressor/zlib/$(DEPDIR)/unittest_compression_plugin_zlib-CompressionZlib.Po
+ at AMDEP_TRUE@@am__fastdepCXX_FALSE@	$(AM_V_CXX)source='compressor/zlib/CompressionZlib.cc' object='compressor/zlib/unittest_compression_plugin_zlib-CompressionZlib.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCXX_FALSE@	DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCXX_FALSE@	$(AM_V_CXX at am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(unittest_compression_plugin_zlib_CXXFLAGS) $(CXXFLAGS) -c -o compressor/zlib/unittest_compression_plugin_zlib-CompressionZlib.o `test -f 'compressor/zlib/CompressionZlib.cc' || echo '$(srcdir)/'`compressor/zlib/CompressionZlib.cc
+
+compressor/zlib/unittest_compression_plugin_zlib-CompressionZlib.obj: compressor/zlib/CompressionZlib.cc
+ at am__fastdepCXX_TRUE@	$(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(unittest_compression_plugin_zlib_CXXFLAGS) $(CXXFLAGS) -MT compressor/zlib/unittest_compression_plugin_zlib-CompressionZlib.obj -MD -MP -MF compressor/zlib/$(DEPDIR)/unittest_compression_plugin_zlib-CompressionZlib.Tpo -c -o compressor/zlib/unittest_compression_plugin_zlib-CompressionZlib.obj `if test -f 'compressor/zlib/CompressionZlib.cc'; then $(CYGPATH_W) 'compressor/zlib/Comp [...]
+ at am__fastdepCXX_TRUE@	$(AM_V_at)$(am__mv) compressor/zlib/$(DEPDIR)/unittest_compression_plugin_zlib-CompressionZlib.Tpo compressor/zlib/$(DEPDIR)/unittest_compression_plugin_zlib-CompressionZlib.Po
+ at AMDEP_TRUE@@am__fastdepCXX_FALSE@	$(AM_V_CXX)source='compressor/zlib/CompressionZlib.cc' object='compressor/zlib/unittest_compression_plugin_zlib-CompressionZlib.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCXX_FALSE@	DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCXX_FALSE@	$(AM_V_CXX at am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(unittest_compression_plugin_zlib_CXXFLAGS) $(CXXFLAGS) -c -o compressor/zlib/unittest_compression_plugin_zlib-CompressionZlib.obj `if test -f 'compressor/zlib/CompressionZlib.cc'; then $(CYGPATH_W) 'compressor/zlib/CompressionZlib.cc'; else $(CYGPATH_W) '$(srcdir)/compressor/zlib/CompressionZlib.cc'; fi`
+
 test/compressor/unittest_compression_snappy-test_compression_snappy.o: test/compressor/test_compression_snappy.cc
 @am__fastdepCXX_TRUE@	$(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(unittest_compression_snappy_CXXFLAGS) $(CXXFLAGS) -MT test/compressor/unittest_compression_snappy-test_compression_snappy.o -MD -MP -MF test/compressor/$(DEPDIR)/unittest_compression_snappy-test_compression_snappy.Tpo -c -o test/compressor/unittest_compression_snappy-test_compression_snappy.o `test -f 'test/compressor/test_compression_snappy.cc' || echo '$(srcdir)/'`test/compresso [...]
 @am__fastdepCXX_TRUE@	$(AM_V_at)$(am__mv) test/compressor/$(DEPDIR)/unittest_compression_snappy-test_compression_snappy.Tpo test/compressor/$(DEPDIR)/unittest_compression_snappy-test_compression_snappy.Po
@@ -25096,6 +25975,62 @@ compressor/snappy/unittest_compression_snappy-CompressionPluginSnappy.obj: compr
 @AMDEP_TRUE@@am__fastdepCXX_FALSE@	DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
 @am__fastdepCXX_FALSE@	$(AM_V_CXX at am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(unittest_compression_snappy_CXXFLAGS) $(CXXFLAGS) -c -o compressor/snappy/unittest_compression_snappy-CompressionPluginSnappy.obj `if test -f 'compressor/snappy/CompressionPluginSnappy.cc'; then $(CYGPATH_W) 'compressor/snappy/CompressionPluginSnappy.cc'; else $(CYGPATH_W) '$(srcdir)/compressor/snappy/CompressionPluginSnappy.cc'; fi`
 
+test/compressor/unittest_compression_zlib-test_compression_zlib.o: test/compressor/test_compression_zlib.cc
+ at am__fastdepCXX_TRUE@	$(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(unittest_compression_zlib_CXXFLAGS) $(CXXFLAGS) -MT test/compressor/unittest_compression_zlib-test_compression_zlib.o -MD -MP -MF test/compressor/$(DEPDIR)/unittest_compression_zlib-test_compression_zlib.Tpo -c -o test/compressor/unittest_compression_zlib-test_compression_zlib.o `test -f 'test/compressor/test_compression_zlib.cc' || echo '$(srcdir)/'`test/compressor/test_compressi [...]
+ at am__fastdepCXX_TRUE@	$(AM_V_at)$(am__mv) test/compressor/$(DEPDIR)/unittest_compression_zlib-test_compression_zlib.Tpo test/compressor/$(DEPDIR)/unittest_compression_zlib-test_compression_zlib.Po
+ at AMDEP_TRUE@@am__fastdepCXX_FALSE@	$(AM_V_CXX)source='test/compressor/test_compression_zlib.cc' object='test/compressor/unittest_compression_zlib-test_compression_zlib.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCXX_FALSE@	DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCXX_FALSE@	$(AM_V_CXX at am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(unittest_compression_zlib_CXXFLAGS) $(CXXFLAGS) -c -o test/compressor/unittest_compression_zlib-test_compression_zlib.o `test -f 'test/compressor/test_compression_zlib.cc' || echo '$(srcdir)/'`test/compressor/test_compression_zlib.cc
+
+test/compressor/unittest_compression_zlib-test_compression_zlib.obj: test/compressor/test_compression_zlib.cc
+ at am__fastdepCXX_TRUE@	$(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(unittest_compression_zlib_CXXFLAGS) $(CXXFLAGS) -MT test/compressor/unittest_compression_zlib-test_compression_zlib.obj -MD -MP -MF test/compressor/$(DEPDIR)/unittest_compression_zlib-test_compression_zlib.Tpo -c -o test/compressor/unittest_compression_zlib-test_compression_zlib.obj `if test -f 'test/compressor/test_compression_zlib.cc'; then $(CYGPATH_W) 'test/compressor/test_com [...]
+ at am__fastdepCXX_TRUE@	$(AM_V_at)$(am__mv) test/compressor/$(DEPDIR)/unittest_compression_zlib-test_compression_zlib.Tpo test/compressor/$(DEPDIR)/unittest_compression_zlib-test_compression_zlib.Po
+ at AMDEP_TRUE@@am__fastdepCXX_FALSE@	$(AM_V_CXX)source='test/compressor/test_compression_zlib.cc' object='test/compressor/unittest_compression_zlib-test_compression_zlib.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCXX_FALSE@	DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCXX_FALSE@	$(AM_V_CXX at am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(unittest_compression_zlib_CXXFLAGS) $(CXXFLAGS) -c -o test/compressor/unittest_compression_zlib-test_compression_zlib.obj `if test -f 'test/compressor/test_compression_zlib.cc'; then $(CYGPATH_W) 'test/compressor/test_compression_zlib.cc'; else $(CYGPATH_W) '$(srcdir)/test/compressor/test_compression_zlib.cc'; fi`
+
+compressor/unittest_compression_zlib-Compressor.o: compressor/Compressor.cc
+ at am__fastdepCXX_TRUE@	$(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(unittest_compression_zlib_CXXFLAGS) $(CXXFLAGS) -MT compressor/unittest_compression_zlib-Compressor.o -MD -MP -MF compressor/$(DEPDIR)/unittest_compression_zlib-Compressor.Tpo -c -o compressor/unittest_compression_zlib-Compressor.o `test -f 'compressor/Compressor.cc' || echo '$(srcdir)/'`compressor/Compressor.cc
+ at am__fastdepCXX_TRUE@	$(AM_V_at)$(am__mv) compressor/$(DEPDIR)/unittest_compression_zlib-Compressor.Tpo compressor/$(DEPDIR)/unittest_compression_zlib-Compressor.Po
+ at AMDEP_TRUE@@am__fastdepCXX_FALSE@	$(AM_V_CXX)source='compressor/Compressor.cc' object='compressor/unittest_compression_zlib-Compressor.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCXX_FALSE@	DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCXX_FALSE@	$(AM_V_CXX at am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(unittest_compression_zlib_CXXFLAGS) $(CXXFLAGS) -c -o compressor/unittest_compression_zlib-Compressor.o `test -f 'compressor/Compressor.cc' || echo '$(srcdir)/'`compressor/Compressor.cc
+
+compressor/unittest_compression_zlib-Compressor.obj: compressor/Compressor.cc
+ at am__fastdepCXX_TRUE@	$(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(unittest_compression_zlib_CXXFLAGS) $(CXXFLAGS) -MT compressor/unittest_compression_zlib-Compressor.obj -MD -MP -MF compressor/$(DEPDIR)/unittest_compression_zlib-Compressor.Tpo -c -o compressor/unittest_compression_zlib-Compressor.obj `if test -f 'compressor/Compressor.cc'; then $(CYGPATH_W) 'compressor/Compressor.cc'; else $(CYGPATH_W) '$(srcdir)/compressor/Compressor.cc'; fi`
+ at am__fastdepCXX_TRUE@	$(AM_V_at)$(am__mv) compressor/$(DEPDIR)/unittest_compression_zlib-Compressor.Tpo compressor/$(DEPDIR)/unittest_compression_zlib-Compressor.Po
+ at AMDEP_TRUE@@am__fastdepCXX_FALSE@	$(AM_V_CXX)source='compressor/Compressor.cc' object='compressor/unittest_compression_zlib-Compressor.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCXX_FALSE@	DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCXX_FALSE@	$(AM_V_CXX at am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(unittest_compression_zlib_CXXFLAGS) $(CXXFLAGS) -c -o compressor/unittest_compression_zlib-Compressor.obj `if test -f 'compressor/Compressor.cc'; then $(CYGPATH_W) 'compressor/Compressor.cc'; else $(CYGPATH_W) '$(srcdir)/compressor/Compressor.cc'; fi`
+
+compressor/zlib/unittest_compression_zlib-CompressionPluginZlib.o: compressor/zlib/CompressionPluginZlib.cc
+ at am__fastdepCXX_TRUE@	$(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(unittest_compression_zlib_CXXFLAGS) $(CXXFLAGS) -MT compressor/zlib/unittest_compression_zlib-CompressionPluginZlib.o -MD -MP -MF compressor/zlib/$(DEPDIR)/unittest_compression_zlib-CompressionPluginZlib.Tpo -c -o compressor/zlib/unittest_compression_zlib-CompressionPluginZlib.o `test -f 'compressor/zlib/CompressionPluginZlib.cc' || echo '$(srcdir)/'`compressor/zlib/CompressionPlu [...]
+ at am__fastdepCXX_TRUE@	$(AM_V_at)$(am__mv) compressor/zlib/$(DEPDIR)/unittest_compression_zlib-CompressionPluginZlib.Tpo compressor/zlib/$(DEPDIR)/unittest_compression_zlib-CompressionPluginZlib.Po
+ at AMDEP_TRUE@@am__fastdepCXX_FALSE@	$(AM_V_CXX)source='compressor/zlib/CompressionPluginZlib.cc' object='compressor/zlib/unittest_compression_zlib-CompressionPluginZlib.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCXX_FALSE@	DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCXX_FALSE@	$(AM_V_CXX at am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(unittest_compression_zlib_CXXFLAGS) $(CXXFLAGS) -c -o compressor/zlib/unittest_compression_zlib-CompressionPluginZlib.o `test -f 'compressor/zlib/CompressionPluginZlib.cc' || echo '$(srcdir)/'`compressor/zlib/CompressionPluginZlib.cc
+
+compressor/zlib/unittest_compression_zlib-CompressionPluginZlib.obj: compressor/zlib/CompressionPluginZlib.cc
+ at am__fastdepCXX_TRUE@	$(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(unittest_compression_zlib_CXXFLAGS) $(CXXFLAGS) -MT compressor/zlib/unittest_compression_zlib-CompressionPluginZlib.obj -MD -MP -MF compressor/zlib/$(DEPDIR)/unittest_compression_zlib-CompressionPluginZlib.Tpo -c -o compressor/zlib/unittest_compression_zlib-CompressionPluginZlib.obj `if test -f 'compressor/zlib/CompressionPluginZlib.cc'; then $(CYGPATH_W) 'compressor/zlib/Compress [...]
+ at am__fastdepCXX_TRUE@	$(AM_V_at)$(am__mv) compressor/zlib/$(DEPDIR)/unittest_compression_zlib-CompressionPluginZlib.Tpo compressor/zlib/$(DEPDIR)/unittest_compression_zlib-CompressionPluginZlib.Po
+ at AMDEP_TRUE@@am__fastdepCXX_FALSE@	$(AM_V_CXX)source='compressor/zlib/CompressionPluginZlib.cc' object='compressor/zlib/unittest_compression_zlib-CompressionPluginZlib.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCXX_FALSE@	DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCXX_FALSE@	$(AM_V_CXX at am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(unittest_compression_zlib_CXXFLAGS) $(CXXFLAGS) -c -o compressor/zlib/unittest_compression_zlib-CompressionPluginZlib.obj `if test -f 'compressor/zlib/CompressionPluginZlib.cc'; then $(CYGPATH_W) 'compressor/zlib/CompressionPluginZlib.cc'; else $(CYGPATH_W) '$(srcdir)/compressor/zlib/CompressionPluginZlib.cc'; fi`
+
+compressor/zlib/unittest_compression_zlib-CompressionZlib.o: compressor/zlib/CompressionZlib.cc
+ at am__fastdepCXX_TRUE@	$(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(unittest_compression_zlib_CXXFLAGS) $(CXXFLAGS) -MT compressor/zlib/unittest_compression_zlib-CompressionZlib.o -MD -MP -MF compressor/zlib/$(DEPDIR)/unittest_compression_zlib-CompressionZlib.Tpo -c -o compressor/zlib/unittest_compression_zlib-CompressionZlib.o `test -f 'compressor/zlib/CompressionZlib.cc' || echo '$(srcdir)/'`compressor/zlib/CompressionZlib.cc
+ at am__fastdepCXX_TRUE@	$(AM_V_at)$(am__mv) compressor/zlib/$(DEPDIR)/unittest_compression_zlib-CompressionZlib.Tpo compressor/zlib/$(DEPDIR)/unittest_compression_zlib-CompressionZlib.Po
+ at AMDEP_TRUE@@am__fastdepCXX_FALSE@	$(AM_V_CXX)source='compressor/zlib/CompressionZlib.cc' object='compressor/zlib/unittest_compression_zlib-CompressionZlib.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCXX_FALSE@	DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCXX_FALSE@	$(AM_V_CXX at am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(unittest_compression_zlib_CXXFLAGS) $(CXXFLAGS) -c -o compressor/zlib/unittest_compression_zlib-CompressionZlib.o `test -f 'compressor/zlib/CompressionZlib.cc' || echo '$(srcdir)/'`compressor/zlib/CompressionZlib.cc
+
+compressor/zlib/unittest_compression_zlib-CompressionZlib.obj: compressor/zlib/CompressionZlib.cc
+ at am__fastdepCXX_TRUE@	$(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(unittest_compression_zlib_CXXFLAGS) $(CXXFLAGS) -MT compressor/zlib/unittest_compression_zlib-CompressionZlib.obj -MD -MP -MF compressor/zlib/$(DEPDIR)/unittest_compression_zlib-CompressionZlib.Tpo -c -o compressor/zlib/unittest_compression_zlib-CompressionZlib.obj `if test -f 'compressor/zlib/CompressionZlib.cc'; then $(CYGPATH_W) 'compressor/zlib/CompressionZlib.cc'; else $(CYGP [...]
+ at am__fastdepCXX_TRUE@	$(AM_V_at)$(am__mv) compressor/zlib/$(DEPDIR)/unittest_compression_zlib-CompressionZlib.Tpo compressor/zlib/$(DEPDIR)/unittest_compression_zlib-CompressionZlib.Po
+ at AMDEP_TRUE@@am__fastdepCXX_FALSE@	$(AM_V_CXX)source='compressor/zlib/CompressionZlib.cc' object='compressor/zlib/unittest_compression_zlib-CompressionZlib.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCXX_FALSE@	DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCXX_FALSE@	$(AM_V_CXX at am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(unittest_compression_zlib_CXXFLAGS) $(CXXFLAGS) -c -o compressor/zlib/unittest_compression_zlib-CompressionZlib.obj `if test -f 'compressor/zlib/CompressionZlib.cc'; then $(CYGPATH_W) 'compressor/zlib/CompressionZlib.cc'; else $(CYGPATH_W) '$(srcdir)/compressor/zlib/CompressionZlib.cc'; fi`
+
 test/common/unittest_config-test_config.o: test/common/test_config.cc
 @am__fastdepCXX_TRUE@	$(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(unittest_config_CXXFLAGS) $(CXXFLAGS) -MT test/common/unittest_config-test_config.o -MD -MP -MF test/common/$(DEPDIR)/unittest_config-test_config.Tpo -c -o test/common/unittest_config-test_config.o `test -f 'test/common/test_config.cc' || echo '$(srcdir)/'`test/common/test_config.cc
 @am__fastdepCXX_TRUE@	$(AM_V_at)$(am__mv) test/common/$(DEPDIR)/unittest_config-test_config.Tpo test/common/$(DEPDIR)/unittest_config-test_config.Po
@@ -25922,6 +26857,20 @@ test/osd/unittest_hitset-hitset.obj: test/osd/hitset.cc
 @AMDEP_TRUE@@am__fastdepCXX_FALSE@	DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
 @am__fastdepCXX_FALSE@	$(AM_V_CXX at am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(unittest_hitset_CXXFLAGS) $(CXXFLAGS) -c -o test/osd/unittest_hitset-hitset.obj `if test -f 'test/osd/hitset.cc'; then $(CYGPATH_W) 'test/osd/hitset.cc'; else $(CYGPATH_W) '$(srcdir)/test/osd/hitset.cc'; fi`
 
+test/common/unittest_interval_set-test_interval_set.o: test/common/test_interval_set.cc
+ at am__fastdepCXX_TRUE@	$(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(unittest_interval_set_CXXFLAGS) $(CXXFLAGS) -MT test/common/unittest_interval_set-test_interval_set.o -MD -MP -MF test/common/$(DEPDIR)/unittest_interval_set-test_interval_set.Tpo -c -o test/common/unittest_interval_set-test_interval_set.o `test -f 'test/common/test_interval_set.cc' || echo '$(srcdir)/'`test/common/test_interval_set.cc
+ at am__fastdepCXX_TRUE@	$(AM_V_at)$(am__mv) test/common/$(DEPDIR)/unittest_interval_set-test_interval_set.Tpo test/common/$(DEPDIR)/unittest_interval_set-test_interval_set.Po
+ at AMDEP_TRUE@@am__fastdepCXX_FALSE@	$(AM_V_CXX)source='test/common/test_interval_set.cc' object='test/common/unittest_interval_set-test_interval_set.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCXX_FALSE@	DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCXX_FALSE@	$(AM_V_CXX at am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(unittest_interval_set_CXXFLAGS) $(CXXFLAGS) -c -o test/common/unittest_interval_set-test_interval_set.o `test -f 'test/common/test_interval_set.cc' || echo '$(srcdir)/'`test/common/test_interval_set.cc
+
+test/common/unittest_interval_set-test_interval_set.obj: test/common/test_interval_set.cc
+ at am__fastdepCXX_TRUE@	$(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(unittest_interval_set_CXXFLAGS) $(CXXFLAGS) -MT test/common/unittest_interval_set-test_interval_set.obj -MD -MP -MF test/common/$(DEPDIR)/unittest_interval_set-test_interval_set.Tpo -c -o test/common/unittest_interval_set-test_interval_set.obj `if test -f 'test/common/test_interval_set.cc'; then $(CYGPATH_W) 'test/common/test_interval_set.cc'; else $(CYGPATH_W) '$(srcdir)/test/com [...]
+ at am__fastdepCXX_TRUE@	$(AM_V_at)$(am__mv) test/common/$(DEPDIR)/unittest_interval_set-test_interval_set.Tpo test/common/$(DEPDIR)/unittest_interval_set-test_interval_set.Po
+ at AMDEP_TRUE@@am__fastdepCXX_FALSE@	$(AM_V_CXX)source='test/common/test_interval_set.cc' object='test/common/unittest_interval_set-test_interval_set.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCXX_FALSE@	DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCXX_FALSE@	$(AM_V_CXX at am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(unittest_interval_set_CXXFLAGS) $(CXXFLAGS) -c -o test/common/unittest_interval_set-test_interval_set.obj `if test -f 'test/common/test_interval_set.cc'; then $(CYGPATH_W) 'test/common/test_interval_set.cc'; else $(CYGPATH_W) '$(srcdir)/test/common/test_interval_set.cc'; fi`
+
 test/common/unittest_io_priority-test_io_priority.o: test/common/test_io_priority.cc
 @am__fastdepCXX_TRUE@	$(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(unittest_io_priority_CXXFLAGS) $(CXXFLAGS) -MT test/common/unittest_io_priority-test_io_priority.o -MD -MP -MF test/common/$(DEPDIR)/unittest_io_priority-test_io_priority.Tpo -c -o test/common/unittest_io_priority-test_io_priority.o `test -f 'test/common/test_io_priority.cc' || echo '$(srcdir)/'`test/common/test_io_priority.cc
 @am__fastdepCXX_TRUE@	$(AM_V_at)$(am__mv) test/common/$(DEPDIR)/unittest_io_priority-test_io_priority.Tpo test/common/$(DEPDIR)/unittest_io_priority-test_io_priority.Po
@@ -26244,6 +27193,20 @@ test/librbd/exclusive_lock/unittest_librbd-test_mock_ReleaseRequest.obj: test/li
 @AMDEP_TRUE@@am__fastdepCXX_FALSE@	DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
 @am__fastdepCXX_FALSE@	$(AM_V_CXX at am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(unittest_librbd_CXXFLAGS) $(CXXFLAGS) -c -o test/librbd/exclusive_lock/unittest_librbd-test_mock_ReleaseRequest.obj `if test -f 'test/librbd/exclusive_lock/test_mock_ReleaseRequest.cc'; then $(CYGPATH_W) 'test/librbd/exclusive_lock/test_mock_ReleaseRequest.cc'; else $(CYGPATH_W) '$(srcdir)/test/librbd/exclusive_lock/test_mock_ReleaseRequest.cc'; fi`
 
+test/librbd/image/unittest_librbd-test_mock_RefreshRequest.o: test/librbd/image/test_mock_RefreshRequest.cc
+ at am__fastdepCXX_TRUE@	$(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(unittest_librbd_CXXFLAGS) $(CXXFLAGS) -MT test/librbd/image/unittest_librbd-test_mock_RefreshRequest.o -MD -MP -MF test/librbd/image/$(DEPDIR)/unittest_librbd-test_mock_RefreshRequest.Tpo -c -o test/librbd/image/unittest_librbd-test_mock_RefreshRequest.o `test -f 'test/librbd/image/test_mock_RefreshRequest.cc' || echo '$(srcdir)/'`test/librbd/image/test_mock_RefreshRequest.cc
+ at am__fastdepCXX_TRUE@	$(AM_V_at)$(am__mv) test/librbd/image/$(DEPDIR)/unittest_librbd-test_mock_RefreshRequest.Tpo test/librbd/image/$(DEPDIR)/unittest_librbd-test_mock_RefreshRequest.Po
+ at AMDEP_TRUE@@am__fastdepCXX_FALSE@	$(AM_V_CXX)source='test/librbd/image/test_mock_RefreshRequest.cc' object='test/librbd/image/unittest_librbd-test_mock_RefreshRequest.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCXX_FALSE@	DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCXX_FALSE@	$(AM_V_CXX at am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(unittest_librbd_CXXFLAGS) $(CXXFLAGS) -c -o test/librbd/image/unittest_librbd-test_mock_RefreshRequest.o `test -f 'test/librbd/image/test_mock_RefreshRequest.cc' || echo '$(srcdir)/'`test/librbd/image/test_mock_RefreshRequest.cc
+
+test/librbd/image/unittest_librbd-test_mock_RefreshRequest.obj: test/librbd/image/test_mock_RefreshRequest.cc
+ at am__fastdepCXX_TRUE@	$(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(unittest_librbd_CXXFLAGS) $(CXXFLAGS) -MT test/librbd/image/unittest_librbd-test_mock_RefreshRequest.obj -MD -MP -MF test/librbd/image/$(DEPDIR)/unittest_librbd-test_mock_RefreshRequest.Tpo -c -o test/librbd/image/unittest_librbd-test_mock_RefreshRequest.obj `if test -f 'test/librbd/image/test_mock_RefreshRequest.cc'; then $(CYGPATH_W) 'test/librbd/image/test_mock_RefreshRequest.c [...]
+ at am__fastdepCXX_TRUE@	$(AM_V_at)$(am__mv) test/librbd/image/$(DEPDIR)/unittest_librbd-test_mock_RefreshRequest.Tpo test/librbd/image/$(DEPDIR)/unittest_librbd-test_mock_RefreshRequest.Po
+ at AMDEP_TRUE@@am__fastdepCXX_FALSE@	$(AM_V_CXX)source='test/librbd/image/test_mock_RefreshRequest.cc' object='test/librbd/image/unittest_librbd-test_mock_RefreshRequest.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCXX_FALSE@	DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCXX_FALSE@	$(AM_V_CXX at am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(unittest_librbd_CXXFLAGS) $(CXXFLAGS) -c -o test/librbd/image/unittest_librbd-test_mock_RefreshRequest.obj `if test -f 'test/librbd/image/test_mock_RefreshRequest.cc'; then $(CYGPATH_W) 'test/librbd/image/test_mock_RefreshRequest.cc'; else $(CYGPATH_W) '$(srcdir)/test/librbd/image/test_mock_RefreshRequest.cc'; fi`
+
 test/librbd/journal/unittest_librbd-test_mock_Replay.o: test/librbd/journal/test_mock_Replay.cc
 @am__fastdepCXX_TRUE@	$(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(unittest_librbd_CXXFLAGS) $(CXXFLAGS) -MT test/librbd/journal/unittest_librbd-test_mock_Replay.o -MD -MP -MF test/librbd/journal/$(DEPDIR)/unittest_librbd-test_mock_Replay.Tpo -c -o test/librbd/journal/unittest_librbd-test_mock_Replay.o `test -f 'test/librbd/journal/test_mock_Replay.cc' || echo '$(srcdir)/'`test/librbd/journal/test_mock_Replay.cc
 @am__fastdepCXX_TRUE@	$(AM_V_at)$(am__mv) test/librbd/journal/$(DEPDIR)/unittest_librbd-test_mock_Replay.Tpo test/librbd/journal/$(DEPDIR)/unittest_librbd-test_mock_Replay.Po
@@ -26566,6 +27529,20 @@ test/mon/unittest_mon_pgmap-PGMap.obj: test/mon/PGMap.cc
 @AMDEP_TRUE@@am__fastdepCXX_FALSE@	DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
 @am__fastdepCXX_FALSE@	$(AM_V_CXX at am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(unittest_mon_pgmap_CXXFLAGS) $(CXXFLAGS) -c -o test/mon/unittest_mon_pgmap-PGMap.obj `if test -f 'test/mon/PGMap.cc'; then $(CYGPATH_W) 'test/mon/PGMap.cc'; else $(CYGPATH_W) '$(srcdir)/test/mon/PGMap.cc'; fi`
 
+test/common/unittest_mutex_debug-test_mutex_debug.o: test/common/test_mutex_debug.cc
+ at am__fastdepCXX_TRUE@	$(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(unittest_mutex_debug_CXXFLAGS) $(CXXFLAGS) -MT test/common/unittest_mutex_debug-test_mutex_debug.o -MD -MP -MF test/common/$(DEPDIR)/unittest_mutex_debug-test_mutex_debug.Tpo -c -o test/common/unittest_mutex_debug-test_mutex_debug.o `test -f 'test/common/test_mutex_debug.cc' || echo '$(srcdir)/'`test/common/test_mutex_debug.cc
+ at am__fastdepCXX_TRUE@	$(AM_V_at)$(am__mv) test/common/$(DEPDIR)/unittest_mutex_debug-test_mutex_debug.Tpo test/common/$(DEPDIR)/unittest_mutex_debug-test_mutex_debug.Po
+ at AMDEP_TRUE@@am__fastdepCXX_FALSE@	$(AM_V_CXX)source='test/common/test_mutex_debug.cc' object='test/common/unittest_mutex_debug-test_mutex_debug.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCXX_FALSE@	DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCXX_FALSE@	$(AM_V_CXX at am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(unittest_mutex_debug_CXXFLAGS) $(CXXFLAGS) -c -o test/common/unittest_mutex_debug-test_mutex_debug.o `test -f 'test/common/test_mutex_debug.cc' || echo '$(srcdir)/'`test/common/test_mutex_debug.cc
+
+test/common/unittest_mutex_debug-test_mutex_debug.obj: test/common/test_mutex_debug.cc
+ at am__fastdepCXX_TRUE@	$(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(unittest_mutex_debug_CXXFLAGS) $(CXXFLAGS) -MT test/common/unittest_mutex_debug-test_mutex_debug.obj -MD -MP -MF test/common/$(DEPDIR)/unittest_mutex_debug-test_mutex_debug.Tpo -c -o test/common/unittest_mutex_debug-test_mutex_debug.obj `if test -f 'test/common/test_mutex_debug.cc'; then $(CYGPATH_W) 'test/common/test_mutex_debug.cc'; else $(CYGPATH_W) '$(srcdir)/test/common/test_ [...]
+ at am__fastdepCXX_TRUE@	$(AM_V_at)$(am__mv) test/common/$(DEPDIR)/unittest_mutex_debug-test_mutex_debug.Tpo test/common/$(DEPDIR)/unittest_mutex_debug-test_mutex_debug.Po
+ at AMDEP_TRUE@@am__fastdepCXX_FALSE@	$(AM_V_CXX)source='test/common/test_mutex_debug.cc' object='test/common/unittest_mutex_debug-test_mutex_debug.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCXX_FALSE@	DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCXX_FALSE@	$(AM_V_CXX at am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(unittest_mutex_debug_CXXFLAGS) $(CXXFLAGS) -c -o test/common/unittest_mutex_debug-test_mutex_debug.obj `if test -f 'test/common/test_mutex_debug.cc'; then $(CYGPATH_W) 'test/common/test_mutex_debug.cc'; else $(CYGPATH_W) '$(srcdir)/test/common/test_mutex_debug.cc'; fi`
+
 test/osd/unittest_osd_osdcap-osdcap.o: test/osd/osdcap.cc
 @am__fastdepCXX_TRUE@	$(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(unittest_osd_osdcap_CXXFLAGS) $(CXXFLAGS) -MT test/osd/unittest_osd_osdcap-osdcap.o -MD -MP -MF test/osd/$(DEPDIR)/unittest_osd_osdcap-osdcap.Tpo -c -o test/osd/unittest_osd_osdcap-osdcap.o `test -f 'test/osd/osdcap.cc' || echo '$(srcdir)/'`test/osd/osdcap.cc
 @am__fastdepCXX_TRUE@	$(AM_V_at)$(am__mv) test/osd/$(DEPDIR)/unittest_osd_osdcap-osdcap.Tpo test/osd/$(DEPDIR)/unittest_osd_osdcap-osdcap.Po
@@ -26692,6 +27669,20 @@ test/common/unittest_prioritized_queue-test_prioritized_queue.obj: test/common/t
 @AMDEP_TRUE@@am__fastdepCXX_FALSE@	DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
 @am__fastdepCXX_FALSE@	$(AM_V_CXX at am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(unittest_prioritized_queue_CXXFLAGS) $(CXXFLAGS) -c -o test/common/unittest_prioritized_queue-test_prioritized_queue.obj `if test -f 'test/common/test_prioritized_queue.cc'; then $(CYGPATH_W) 'test/common/test_prioritized_queue.cc'; else $(CYGPATH_W) '$(srcdir)/test/common/test_prioritized_queue.cc'; fi`
 
+test/rbd_mirror/unittest_rbd_mirror-test_main.o: test/rbd_mirror/test_main.cc
+ at am__fastdepCXX_TRUE@	$(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(unittest_rbd_mirror_CXXFLAGS) $(CXXFLAGS) -MT test/rbd_mirror/unittest_rbd_mirror-test_main.o -MD -MP -MF test/rbd_mirror/$(DEPDIR)/unittest_rbd_mirror-test_main.Tpo -c -o test/rbd_mirror/unittest_rbd_mirror-test_main.o `test -f 'test/rbd_mirror/test_main.cc' || echo '$(srcdir)/'`test/rbd_mirror/test_main.cc
+ at am__fastdepCXX_TRUE@	$(AM_V_at)$(am__mv) test/rbd_mirror/$(DEPDIR)/unittest_rbd_mirror-test_main.Tpo test/rbd_mirror/$(DEPDIR)/unittest_rbd_mirror-test_main.Po
+ at AMDEP_TRUE@@am__fastdepCXX_FALSE@	$(AM_V_CXX)source='test/rbd_mirror/test_main.cc' object='test/rbd_mirror/unittest_rbd_mirror-test_main.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCXX_FALSE@	DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCXX_FALSE@	$(AM_V_CXX at am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(unittest_rbd_mirror_CXXFLAGS) $(CXXFLAGS) -c -o test/rbd_mirror/unittest_rbd_mirror-test_main.o `test -f 'test/rbd_mirror/test_main.cc' || echo '$(srcdir)/'`test/rbd_mirror/test_main.cc
+
+test/rbd_mirror/unittest_rbd_mirror-test_main.obj: test/rbd_mirror/test_main.cc
+ at am__fastdepCXX_TRUE@	$(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(unittest_rbd_mirror_CXXFLAGS) $(CXXFLAGS) -MT test/rbd_mirror/unittest_rbd_mirror-test_main.obj -MD -MP -MF test/rbd_mirror/$(DEPDIR)/unittest_rbd_mirror-test_main.Tpo -c -o test/rbd_mirror/unittest_rbd_mirror-test_main.obj `if test -f 'test/rbd_mirror/test_main.cc'; then $(CYGPATH_W) 'test/rbd_mirror/test_main.cc'; else $(CYGPATH_W) '$(srcdir)/test/rbd_mirror/test_main.cc'; fi`
+ at am__fastdepCXX_TRUE@	$(AM_V_at)$(am__mv) test/rbd_mirror/$(DEPDIR)/unittest_rbd_mirror-test_main.Tpo test/rbd_mirror/$(DEPDIR)/unittest_rbd_mirror-test_main.Po
+ at AMDEP_TRUE@@am__fastdepCXX_FALSE@	$(AM_V_CXX)source='test/rbd_mirror/test_main.cc' object='test/rbd_mirror/unittest_rbd_mirror-test_main.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCXX_FALSE@	DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCXX_FALSE@	$(AM_V_CXX at am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(unittest_rbd_mirror_CXXFLAGS) $(CXXFLAGS) -c -o test/rbd_mirror/unittest_rbd_mirror-test_main.obj `if test -f 'test/rbd_mirror/test_main.cc'; then $(CYGPATH_W) 'test/rbd_mirror/test_main.cc'; else $(CYGPATH_W) '$(srcdir)/test/rbd_mirror/test_main.cc'; fi`
+
 test/unittest_rbd_replay-test_rbd_replay.o: test/test_rbd_replay.cc
 @am__fastdepCXX_TRUE@	$(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(unittest_rbd_replay_CXXFLAGS) $(CXXFLAGS) -MT test/unittest_rbd_replay-test_rbd_replay.o -MD -MP -MF test/$(DEPDIR)/unittest_rbd_replay-test_rbd_replay.Tpo -c -o test/unittest_rbd_replay-test_rbd_replay.o `test -f 'test/test_rbd_replay.cc' || echo '$(srcdir)/'`test/test_rbd_replay.cc
 @am__fastdepCXX_TRUE@	$(AM_V_at)$(am__mv) test/$(DEPDIR)/unittest_rbd_replay-test_rbd_replay.Tpo test/$(DEPDIR)/unittest_rbd_replay-test_rbd_replay.Po
@@ -26804,6 +27795,20 @@ test/common/unittest_sharedptr_registry-test_sharedptr_registry.obj: test/common
 @AMDEP_TRUE@@am__fastdepCXX_FALSE@	DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
 @am__fastdepCXX_FALSE@	$(AM_V_CXX at am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(unittest_sharedptr_registry_CXXFLAGS) $(CXXFLAGS) -c -o test/common/unittest_sharedptr_registry-test_sharedptr_registry.obj `if test -f 'test/common/test_sharedptr_registry.cc'; then $(CYGPATH_W) 'test/common/test_sharedptr_registry.cc'; else $(CYGPATH_W) '$(srcdir)/test/common/test_sharedptr_registry.cc'; fi`
 
+test/common/unittest_shunique_lock-test_shunique_lock.o: test/common/test_shunique_lock.cc
+ at am__fastdepCXX_TRUE@	$(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(unittest_shunique_lock_CXXFLAGS) $(CXXFLAGS) -MT test/common/unittest_shunique_lock-test_shunique_lock.o -MD -MP -MF test/common/$(DEPDIR)/unittest_shunique_lock-test_shunique_lock.Tpo -c -o test/common/unittest_shunique_lock-test_shunique_lock.o `test -f 'test/common/test_shunique_lock.cc' || echo '$(srcdir)/'`test/common/test_shunique_lock.cc
+ at am__fastdepCXX_TRUE@	$(AM_V_at)$(am__mv) test/common/$(DEPDIR)/unittest_shunique_lock-test_shunique_lock.Tpo test/common/$(DEPDIR)/unittest_shunique_lock-test_shunique_lock.Po
+ at AMDEP_TRUE@@am__fastdepCXX_FALSE@	$(AM_V_CXX)source='test/common/test_shunique_lock.cc' object='test/common/unittest_shunique_lock-test_shunique_lock.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCXX_FALSE@	DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCXX_FALSE@	$(AM_V_CXX at am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(unittest_shunique_lock_CXXFLAGS) $(CXXFLAGS) -c -o test/common/unittest_shunique_lock-test_shunique_lock.o `test -f 'test/common/test_shunique_lock.cc' || echo '$(srcdir)/'`test/common/test_shunique_lock.cc
+
+test/common/unittest_shunique_lock-test_shunique_lock.obj: test/common/test_shunique_lock.cc
+ at am__fastdepCXX_TRUE@	$(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(unittest_shunique_lock_CXXFLAGS) $(CXXFLAGS) -MT test/common/unittest_shunique_lock-test_shunique_lock.obj -MD -MP -MF test/common/$(DEPDIR)/unittest_shunique_lock-test_shunique_lock.Tpo -c -o test/common/unittest_shunique_lock-test_shunique_lock.obj `if test -f 'test/common/test_shunique_lock.cc'; then $(CYGPATH_W) 'test/common/test_shunique_lock.cc'; else $(CYGPATH_W) '$(srcdir) [...]
+ at am__fastdepCXX_TRUE@	$(AM_V_at)$(am__mv) test/common/$(DEPDIR)/unittest_shunique_lock-test_shunique_lock.Tpo test/common/$(DEPDIR)/unittest_shunique_lock-test_shunique_lock.Po
+ at AMDEP_TRUE@@am__fastdepCXX_FALSE@	$(AM_V_CXX)source='test/common/test_shunique_lock.cc' object='test/common/unittest_shunique_lock-test_shunique_lock.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCXX_FALSE@	DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCXX_FALSE@	$(AM_V_CXX at am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(unittest_shunique_lock_CXXFLAGS) $(CXXFLAGS) -c -o test/common/unittest_shunique_lock-test_shunique_lock.obj `if test -f 'test/common/test_shunique_lock.cc'; then $(CYGPATH_W) 'test/common/test_shunique_lock.cc'; else $(CYGPATH_W) '$(srcdir)/test/common/test_shunique_lock.cc'; fi`
+
 test/unittest_signals-signals.o: test/signals.cc
 @am__fastdepCXX_TRUE@	$(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(unittest_signals_CXXFLAGS) $(CXXFLAGS) -MT test/unittest_signals-signals.o -MD -MP -MF test/$(DEPDIR)/unittest_signals-signals.Tpo -c -o test/unittest_signals-signals.o `test -f 'test/signals.cc' || echo '$(srcdir)/'`test/signals.cc
 @am__fastdepCXX_TRUE@	$(AM_V_at)$(am__mv) test/$(DEPDIR)/unittest_signals-signals.Tpo test/$(DEPDIR)/unittest_signals-signals.Po
@@ -26972,6 +27977,20 @@ test/common/unittest_time-test_time.obj: test/common/test_time.cc
 @AMDEP_TRUE@@am__fastdepCXX_FALSE@	DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
 @am__fastdepCXX_FALSE@	$(AM_V_CXX at am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(unittest_time_CXXFLAGS) $(CXXFLAGS) -c -o test/common/unittest_time-test_time.obj `if test -f 'test/common/test_time.cc'; then $(CYGPATH_W) 'test/common/test_time.cc'; else $(CYGPATH_W) '$(srcdir)/test/common/test_time.cc'; fi`
 
+test/objectstore/unittest_transaction-test_transaction.o: test/objectstore/test_transaction.cc
+ at am__fastdepCXX_TRUE@	$(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(unittest_transaction_CXXFLAGS) $(CXXFLAGS) -MT test/objectstore/unittest_transaction-test_transaction.o -MD -MP -MF test/objectstore/$(DEPDIR)/unittest_transaction-test_transaction.Tpo -c -o test/objectstore/unittest_transaction-test_transaction.o `test -f 'test/objectstore/test_transaction.cc' || echo '$(srcdir)/'`test/objectstore/test_transaction.cc
+ at am__fastdepCXX_TRUE@	$(AM_V_at)$(am__mv) test/objectstore/$(DEPDIR)/unittest_transaction-test_transaction.Tpo test/objectstore/$(DEPDIR)/unittest_transaction-test_transaction.Po
+ at AMDEP_TRUE@@am__fastdepCXX_FALSE@	$(AM_V_CXX)source='test/objectstore/test_transaction.cc' object='test/objectstore/unittest_transaction-test_transaction.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCXX_FALSE@	DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCXX_FALSE@	$(AM_V_CXX at am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(unittest_transaction_CXXFLAGS) $(CXXFLAGS) -c -o test/objectstore/unittest_transaction-test_transaction.o `test -f 'test/objectstore/test_transaction.cc' || echo '$(srcdir)/'`test/objectstore/test_transaction.cc
+
+test/objectstore/unittest_transaction-test_transaction.obj: test/objectstore/test_transaction.cc
+ at am__fastdepCXX_TRUE@	$(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(unittest_transaction_CXXFLAGS) $(CXXFLAGS) -MT test/objectstore/unittest_transaction-test_transaction.obj -MD -MP -MF test/objectstore/$(DEPDIR)/unittest_transaction-test_transaction.Tpo -c -o test/objectstore/unittest_transaction-test_transaction.obj `if test -f 'test/objectstore/test_transaction.cc'; then $(CYGPATH_W) 'test/objectstore/test_transaction.cc'; else $(CYGPATH_W) '$( [...]
+ at am__fastdepCXX_TRUE@	$(AM_V_at)$(am__mv) test/objectstore/$(DEPDIR)/unittest_transaction-test_transaction.Tpo test/objectstore/$(DEPDIR)/unittest_transaction-test_transaction.Po
+ at AMDEP_TRUE@@am__fastdepCXX_FALSE@	$(AM_V_CXX)source='test/objectstore/test_transaction.cc' object='test/objectstore/unittest_transaction-test_transaction.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCXX_FALSE@	DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCXX_FALSE@	$(AM_V_CXX at am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(unittest_transaction_CXXFLAGS) $(CXXFLAGS) -c -o test/objectstore/unittest_transaction-test_transaction.obj `if test -f 'test/objectstore/test_transaction.cc'; then $(CYGPATH_W) 'test/objectstore/test_transaction.cc'; else $(CYGPATH_W) '$(srcdir)/test/objectstore/test_transaction.cc'; fi`
+
 test/unittest_utf8-utf8.o: test/utf8.cc
 @am__fastdepCXX_TRUE@	$(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(unittest_utf8_CXXFLAGS) $(CXXFLAGS) -MT test/unittest_utf8-utf8.o -MD -MP -MF test/$(DEPDIR)/unittest_utf8-utf8.Tpo -c -o test/unittest_utf8-utf8.o `test -f 'test/utf8.cc' || echo '$(srcdir)/'`test/utf8.cc
 @am__fastdepCXX_TRUE@	$(AM_V_at)$(am__mv) test/$(DEPDIR)/unittest_utf8-utf8.Tpo test/$(DEPDIR)/unittest_utf8-utf8.Po
@@ -27000,6 +28019,20 @@ test/common/unittest_util-test_util.obj: test/common/test_util.cc
 @AMDEP_TRUE@@am__fastdepCXX_FALSE@	DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
 @am__fastdepCXX_FALSE@	$(AM_V_CXX at am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(unittest_util_CXXFLAGS) $(CXXFLAGS) -c -o test/common/unittest_util-test_util.obj `if test -f 'test/common/test_util.cc'; then $(CYGPATH_W) 'test/common/test_util.cc'; else $(CYGPATH_W) '$(srcdir)/test/common/test_util.cc'; fi`
 
+test/common/unittest_weighted_priority_queue-test_weighted_priority_queue.o: test/common/test_weighted_priority_queue.cc
+ at am__fastdepCXX_TRUE@	$(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(unittest_weighted_priority_queue_CXXFLAGS) $(CXXFLAGS) -MT test/common/unittest_weighted_priority_queue-test_weighted_priority_queue.o -MD -MP -MF test/common/$(DEPDIR)/unittest_weighted_priority_queue-test_weighted_priority_queue.Tpo -c -o test/common/unittest_weighted_priority_queue-test_weighted_priority_queue.o `test -f 'test/common/test_weighted_priority_queue.cc' || echo '$( [...]
+ at am__fastdepCXX_TRUE@	$(AM_V_at)$(am__mv) test/common/$(DEPDIR)/unittest_weighted_priority_queue-test_weighted_priority_queue.Tpo test/common/$(DEPDIR)/unittest_weighted_priority_queue-test_weighted_priority_queue.Po
+ at AMDEP_TRUE@@am__fastdepCXX_FALSE@	$(AM_V_CXX)source='test/common/test_weighted_priority_queue.cc' object='test/common/unittest_weighted_priority_queue-test_weighted_priority_queue.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCXX_FALSE@	DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCXX_FALSE@	$(AM_V_CXX at am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(unittest_weighted_priority_queue_CXXFLAGS) $(CXXFLAGS) -c -o test/common/unittest_weighted_priority_queue-test_weighted_priority_queue.o `test -f 'test/common/test_weighted_priority_queue.cc' || echo '$(srcdir)/'`test/common/test_weighted_priority_queue.cc
+
+test/common/unittest_weighted_priority_queue-test_weighted_priority_queue.obj: test/common/test_weighted_priority_queue.cc
+ at am__fastdepCXX_TRUE@	$(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(unittest_weighted_priority_queue_CXXFLAGS) $(CXXFLAGS) -MT test/common/unittest_weighted_priority_queue-test_weighted_priority_queue.obj -MD -MP -MF test/common/$(DEPDIR)/unittest_weighted_priority_queue-test_weighted_priority_queue.Tpo -c -o test/common/unittest_weighted_priority_queue-test_weighted_priority_queue.obj `if test -f 'test/common/test_weighted_priority_queue.cc'; the [...]
+ at am__fastdepCXX_TRUE@	$(AM_V_at)$(am__mv) test/common/$(DEPDIR)/unittest_weighted_priority_queue-test_weighted_priority_queue.Tpo test/common/$(DEPDIR)/unittest_weighted_priority_queue-test_weighted_priority_queue.Po
+ at AMDEP_TRUE@@am__fastdepCXX_FALSE@	$(AM_V_CXX)source='test/common/test_weighted_priority_queue.cc' object='test/common/unittest_weighted_priority_queue-test_weighted_priority_queue.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCXX_FALSE@	DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCXX_FALSE@	$(AM_V_CXX at am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(unittest_weighted_priority_queue_CXXFLAGS) $(CXXFLAGS) -c -o test/common/unittest_weighted_priority_queue-test_weighted_priority_queue.obj `if test -f 'test/common/test_weighted_priority_queue.cc'; then $(CYGPATH_W) 'test/common/test_weighted_priority_queue.cc'; else $(CYGPATH_W) '$(srcdir)/test/common/test_weighted_priority_queue.cc'; fi`
+
 test/unittest_workqueue-test_workqueue.o: test/test_workqueue.cc
 @am__fastdepCXX_TRUE@	$(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(unittest_workqueue_CXXFLAGS) $(CXXFLAGS) -MT test/unittest_workqueue-test_workqueue.o -MD -MP -MF test/$(DEPDIR)/unittest_workqueue-test_workqueue.Tpo -c -o test/unittest_workqueue-test_workqueue.o `test -f 'test/test_workqueue.cc' || echo '$(srcdir)/'`test/test_workqueue.cc
 @am__fastdepCXX_TRUE@	$(AM_V_at)$(am__mv) test/$(DEPDIR)/unittest_workqueue-test_workqueue.Tpo test/$(DEPDIR)/unittest_workqueue-test_workqueue.Po
@@ -27117,122 +28150,122 @@ test/messenger/xio_server-xio_dispatcher.obj: test/messenger/xio_dispatcher.cc
 .s.lo:
 	$(AM_V_CCAS)$(LTCCASCOMPILE) -c -o $@ $<
 
-erasure-code/isa/isa-l/erasure_code/libec_isa_la-ec_multibinary.asm.lo: erasure-code/isa/isa-l/erasure_code/ec_multibinary.asm.s
-	$(AM_V_CCAS)$(LIBTOOL) $(AM_V_lt) $(libec_isa_la_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CCAS) $(libec_isa_la_CCASFLAGS) $(CCASFLAGS) -c -o erasure-code/isa/isa-l/erasure_code/libec_isa_la-ec_multibinary.asm.lo `test -f 'erasure-code/isa/isa-l/erasure_code/ec_multibinary.asm.s' || echo '$(srcdir)/'`erasure-code/isa/isa-l/erasure_code/ec_multibinary.asm.s
+erasure-code/isa/isa-l/erasure_code/libisa_la-ec_multibinary.asm.lo: erasure-code/isa/isa-l/erasure_code/ec_multibinary.asm.s
+	$(AM_V_CCAS)$(LIBTOOL) $(AM_V_lt) $(libisa_la_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CCAS) $(libisa_la_CCASFLAGS) $(CCASFLAGS) -c -o erasure-code/isa/isa-l/erasure_code/libisa_la-ec_multibinary.asm.lo `test -f 'erasure-code/isa/isa-l/erasure_code/ec_multibinary.asm.s' || echo '$(srcdir)/'`erasure-code/isa/isa-l/erasure_code/ec_multibinary.asm.s
 
-erasure-code/isa/isa-l/erasure_code/libec_isa_la-gf_2vect_dot_prod_avx2.asm.lo: erasure-code/isa/isa-l/erasure_code/gf_2vect_dot_prod_avx2.asm.s
-	$(AM_V_CCAS)$(LIBTOOL) $(AM_V_lt) $(libec_isa_la_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CCAS) $(libec_isa_la_CCASFLAGS) $(CCASFLAGS) -c -o erasure-code/isa/isa-l/erasure_code/libec_isa_la-gf_2vect_dot_prod_avx2.asm.lo `test -f 'erasure-code/isa/isa-l/erasure_code/gf_2vect_dot_prod_avx2.asm.s' || echo '$(srcdir)/'`erasure-code/isa/isa-l/erasure_code/gf_2vect_dot_prod_avx2.asm.s
+erasure-code/isa/isa-l/erasure_code/libisa_la-gf_2vect_dot_prod_avx2.asm.lo: erasure-code/isa/isa-l/erasure_code/gf_2vect_dot_prod_avx2.asm.s
+	$(AM_V_CCAS)$(LIBTOOL) $(AM_V_lt) $(libisa_la_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CCAS) $(libisa_la_CCASFLAGS) $(CCASFLAGS) -c -o erasure-code/isa/isa-l/erasure_code/libisa_la-gf_2vect_dot_prod_avx2.asm.lo `test -f 'erasure-code/isa/isa-l/erasure_code/gf_2vect_dot_prod_avx2.asm.s' || echo '$(srcdir)/'`erasure-code/isa/isa-l/erasure_code/gf_2vect_dot_prod_avx2.asm.s
 
-erasure-code/isa/isa-l/erasure_code/libec_isa_la-gf_2vect_dot_prod_avx.asm.lo: erasure-code/isa/isa-l/erasure_code/gf_2vect_dot_prod_avx.asm.s
-	$(AM_V_CCAS)$(LIBTOOL) $(AM_V_lt) $(libec_isa_la_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CCAS) $(libec_isa_la_CCASFLAGS) $(CCASFLAGS) -c -o erasure-code/isa/isa-l/erasure_code/libec_isa_la-gf_2vect_dot_prod_avx.asm.lo `test -f 'erasure-code/isa/isa-l/erasure_code/gf_2vect_dot_prod_avx.asm.s' || echo '$(srcdir)/'`erasure-code/isa/isa-l/erasure_code/gf_2vect_dot_prod_avx.asm.s
+erasure-code/isa/isa-l/erasure_code/libisa_la-gf_2vect_dot_prod_avx.asm.lo: erasure-code/isa/isa-l/erasure_code/gf_2vect_dot_prod_avx.asm.s
+	$(AM_V_CCAS)$(LIBTOOL) $(AM_V_lt) $(libisa_la_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CCAS) $(libisa_la_CCASFLAGS) $(CCASFLAGS) -c -o erasure-code/isa/isa-l/erasure_code/libisa_la-gf_2vect_dot_prod_avx.asm.lo `test -f 'erasure-code/isa/isa-l/erasure_code/gf_2vect_dot_prod_avx.asm.s' || echo '$(srcdir)/'`erasure-code/isa/isa-l/erasure_code/gf_2vect_dot_prod_avx.asm.s
 
-erasure-code/isa/isa-l/erasure_code/libec_isa_la-gf_2vect_dot_prod_sse.asm.lo: erasure-code/isa/isa-l/erasure_code/gf_2vect_dot_prod_sse.asm.s
-	$(AM_V_CCAS)$(LIBTOOL) $(AM_V_lt) $(libec_isa_la_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CCAS) $(libec_isa_la_CCASFLAGS) $(CCASFLAGS) -c -o erasure-code/isa/isa-l/erasure_code/libec_isa_la-gf_2vect_dot_prod_sse.asm.lo `test -f 'erasure-code/isa/isa-l/erasure_code/gf_2vect_dot_prod_sse.asm.s' || echo '$(srcdir)/'`erasure-code/isa/isa-l/erasure_code/gf_2vect_dot_prod_sse.asm.s
+erasure-code/isa/isa-l/erasure_code/libisa_la-gf_2vect_dot_prod_sse.asm.lo: erasure-code/isa/isa-l/erasure_code/gf_2vect_dot_prod_sse.asm.s
+	$(AM_V_CCAS)$(LIBTOOL) $(AM_V_lt) $(libisa_la_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CCAS) $(libisa_la_CCASFLAGS) $(CCASFLAGS) -c -o erasure-code/isa/isa-l/erasure_code/libisa_la-gf_2vect_dot_prod_sse.asm.lo `test -f 'erasure-code/isa/isa-l/erasure_code/gf_2vect_dot_prod_sse.asm.s' || echo '$(srcdir)/'`erasure-code/isa/isa-l/erasure_code/gf_2vect_dot_prod_sse.asm.s
 
-erasure-code/isa/isa-l/erasure_code/libec_isa_la-gf_3vect_dot_prod_avx2.asm.lo: erasure-code/isa/isa-l/erasure_code/gf_3vect_dot_prod_avx2.asm.s
-	$(AM_V_CCAS)$(LIBTOOL) $(AM_V_lt) $(libec_isa_la_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CCAS) $(libec_isa_la_CCASFLAGS) $(CCASFLAGS) -c -o erasure-code/isa/isa-l/erasure_code/libec_isa_la-gf_3vect_dot_prod_avx2.asm.lo `test -f 'erasure-code/isa/isa-l/erasure_code/gf_3vect_dot_prod_avx2.asm.s' || echo '$(srcdir)/'`erasure-code/isa/isa-l/erasure_code/gf_3vect_dot_prod_avx2.asm.s
+erasure-code/isa/isa-l/erasure_code/libisa_la-gf_3vect_dot_prod_avx2.asm.lo: erasure-code/isa/isa-l/erasure_code/gf_3vect_dot_prod_avx2.asm.s
+	$(AM_V_CCAS)$(LIBTOOL) $(AM_V_lt) $(libisa_la_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CCAS) $(libisa_la_CCASFLAGS) $(CCASFLAGS) -c -o erasure-code/isa/isa-l/erasure_code/libisa_la-gf_3vect_dot_prod_avx2.asm.lo `test -f 'erasure-code/isa/isa-l/erasure_code/gf_3vect_dot_prod_avx2.asm.s' || echo '$(srcdir)/'`erasure-code/isa/isa-l/erasure_code/gf_3vect_dot_prod_avx2.asm.s
 
-erasure-code/isa/isa-l/erasure_code/libec_isa_la-gf_3vect_dot_prod_avx.asm.lo: erasure-code/isa/isa-l/erasure_code/gf_3vect_dot_prod_avx.asm.s
-	$(AM_V_CCAS)$(LIBTOOL) $(AM_V_lt) $(libec_isa_la_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CCAS) $(libec_isa_la_CCASFLAGS) $(CCASFLAGS) -c -o erasure-code/isa/isa-l/erasure_code/libec_isa_la-gf_3vect_dot_prod_avx.asm.lo `test -f 'erasure-code/isa/isa-l/erasure_code/gf_3vect_dot_prod_avx.asm.s' || echo '$(srcdir)/'`erasure-code/isa/isa-l/erasure_code/gf_3vect_dot_prod_avx.asm.s
+erasure-code/isa/isa-l/erasure_code/libisa_la-gf_3vect_dot_prod_avx.asm.lo: erasure-code/isa/isa-l/erasure_code/gf_3vect_dot_prod_avx.asm.s
+	$(AM_V_CCAS)$(LIBTOOL) $(AM_V_lt) $(libisa_la_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CCAS) $(libisa_la_CCASFLAGS) $(CCASFLAGS) -c -o erasure-code/isa/isa-l/erasure_code/libisa_la-gf_3vect_dot_prod_avx.asm.lo `test -f 'erasure-code/isa/isa-l/erasure_code/gf_3vect_dot_prod_avx.asm.s' || echo '$(srcdir)/'`erasure-code/isa/isa-l/erasure_code/gf_3vect_dot_prod_avx.asm.s
 
-erasure-code/isa/isa-l/erasure_code/libec_isa_la-gf_3vect_dot_prod_sse.asm.lo: erasure-code/isa/isa-l/erasure_code/gf_3vect_dot_prod_sse.asm.s
-	$(AM_V_CCAS)$(LIBTOOL) $(AM_V_lt) $(libec_isa_la_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CCAS) $(libec_isa_la_CCASFLAGS) $(CCASFLAGS) -c -o erasure-code/isa/isa-l/erasure_code/libec_isa_la-gf_3vect_dot_prod_sse.asm.lo `test -f 'erasure-code/isa/isa-l/erasure_code/gf_3vect_dot_prod_sse.asm.s' || echo '$(srcdir)/'`erasure-code/isa/isa-l/erasure_code/gf_3vect_dot_prod_sse.asm.s
+erasure-code/isa/isa-l/erasure_code/libisa_la-gf_3vect_dot_prod_sse.asm.lo: erasure-code/isa/isa-l/erasure_code/gf_3vect_dot_prod_sse.asm.s
+	$(AM_V_CCAS)$(LIBTOOL) $(AM_V_lt) $(libisa_la_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CCAS) $(libisa_la_CCASFLAGS) $(CCASFLAGS) -c -o erasure-code/isa/isa-l/erasure_code/libisa_la-gf_3vect_dot_prod_sse.asm.lo `test -f 'erasure-code/isa/isa-l/erasure_code/gf_3vect_dot_prod_sse.asm.s' || echo '$(srcdir)/'`erasure-code/isa/isa-l/erasure_code/gf_3vect_dot_prod_sse.asm.s
 
-erasure-code/isa/isa-l/erasure_code/libec_isa_la-gf_4vect_dot_prod_avx2.asm.lo: erasure-code/isa/isa-l/erasure_code/gf_4vect_dot_prod_avx2.asm.s
-	$(AM_V_CCAS)$(LIBTOOL) $(AM_V_lt) $(libec_isa_la_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CCAS) $(libec_isa_la_CCASFLAGS) $(CCASFLAGS) -c -o erasure-code/isa/isa-l/erasure_code/libec_isa_la-gf_4vect_dot_prod_avx2.asm.lo `test -f 'erasure-code/isa/isa-l/erasure_code/gf_4vect_dot_prod_avx2.asm.s' || echo '$(srcdir)/'`erasure-code/isa/isa-l/erasure_code/gf_4vect_dot_prod_avx2.asm.s
+erasure-code/isa/isa-l/erasure_code/libisa_la-gf_4vect_dot_prod_avx2.asm.lo: erasure-code/isa/isa-l/erasure_code/gf_4vect_dot_prod_avx2.asm.s
+	$(AM_V_CCAS)$(LIBTOOL) $(AM_V_lt) $(libisa_la_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CCAS) $(libisa_la_CCASFLAGS) $(CCASFLAGS) -c -o erasure-code/isa/isa-l/erasure_code/libisa_la-gf_4vect_dot_prod_avx2.asm.lo `test -f 'erasure-code/isa/isa-l/erasure_code/gf_4vect_dot_prod_avx2.asm.s' || echo '$(srcdir)/'`erasure-code/isa/isa-l/erasure_code/gf_4vect_dot_prod_avx2.asm.s
 
-erasure-code/isa/isa-l/erasure_code/libec_isa_la-gf_4vect_dot_prod_avx.asm.lo: erasure-code/isa/isa-l/erasure_code/gf_4vect_dot_prod_avx.asm.s
-	$(AM_V_CCAS)$(LIBTOOL) $(AM_V_lt) $(libec_isa_la_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CCAS) $(libec_isa_la_CCASFLAGS) $(CCASFLAGS) -c -o erasure-code/isa/isa-l/erasure_code/libec_isa_la-gf_4vect_dot_prod_avx.asm.lo `test -f 'erasure-code/isa/isa-l/erasure_code/gf_4vect_dot_prod_avx.asm.s' || echo '$(srcdir)/'`erasure-code/isa/isa-l/erasure_code/gf_4vect_dot_prod_avx.asm.s
+erasure-code/isa/isa-l/erasure_code/libisa_la-gf_4vect_dot_prod_avx.asm.lo: erasure-code/isa/isa-l/erasure_code/gf_4vect_dot_prod_avx.asm.s
+	$(AM_V_CCAS)$(LIBTOOL) $(AM_V_lt) $(libisa_la_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CCAS) $(libisa_la_CCASFLAGS) $(CCASFLAGS) -c -o erasure-code/isa/isa-l/erasure_code/libisa_la-gf_4vect_dot_prod_avx.asm.lo `test -f 'erasure-code/isa/isa-l/erasure_code/gf_4vect_dot_prod_avx.asm.s' || echo '$(srcdir)/'`erasure-code/isa/isa-l/erasure_code/gf_4vect_dot_prod_avx.asm.s
 
-erasure-code/isa/isa-l/erasure_code/libec_isa_la-gf_4vect_dot_prod_sse.asm.lo: erasure-code/isa/isa-l/erasure_code/gf_4vect_dot_prod_sse.asm.s
-	$(AM_V_CCAS)$(LIBTOOL) $(AM_V_lt) $(libec_isa_la_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CCAS) $(libec_isa_la_CCASFLAGS) $(CCASFLAGS) -c -o erasure-code/isa/isa-l/erasure_code/libec_isa_la-gf_4vect_dot_prod_sse.asm.lo `test -f 'erasure-code/isa/isa-l/erasure_code/gf_4vect_dot_prod_sse.asm.s' || echo '$(srcdir)/'`erasure-code/isa/isa-l/erasure_code/gf_4vect_dot_prod_sse.asm.s
+erasure-code/isa/isa-l/erasure_code/libisa_la-gf_4vect_dot_prod_sse.asm.lo: erasure-code/isa/isa-l/erasure_code/gf_4vect_dot_prod_sse.asm.s
+	$(AM_V_CCAS)$(LIBTOOL) $(AM_V_lt) $(libisa_la_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CCAS) $(libisa_la_CCASFLAGS) $(CCASFLAGS) -c -o erasure-code/isa/isa-l/erasure_code/libisa_la-gf_4vect_dot_prod_sse.asm.lo `test -f 'erasure-code/isa/isa-l/erasure_code/gf_4vect_dot_prod_sse.asm.s' || echo '$(srcdir)/'`erasure-code/isa/isa-l/erasure_code/gf_4vect_dot_prod_sse.asm.s
 
-erasure-code/isa/isa-l/erasure_code/libec_isa_la-gf_5vect_dot_prod_avx2.asm.lo: erasure-code/isa/isa-l/erasure_code/gf_5vect_dot_prod_avx2.asm.s
-	$(AM_V_CCAS)$(LIBTOOL) $(AM_V_lt) $(libec_isa_la_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CCAS) $(libec_isa_la_CCASFLAGS) $(CCASFLAGS) -c -o erasure-code/isa/isa-l/erasure_code/libec_isa_la-gf_5vect_dot_prod_avx2.asm.lo `test -f 'erasure-code/isa/isa-l/erasure_code/gf_5vect_dot_prod_avx2.asm.s' || echo '$(srcdir)/'`erasure-code/isa/isa-l/erasure_code/gf_5vect_dot_prod_avx2.asm.s
+erasure-code/isa/isa-l/erasure_code/libisa_la-gf_5vect_dot_prod_avx2.asm.lo: erasure-code/isa/isa-l/erasure_code/gf_5vect_dot_prod_avx2.asm.s
+	$(AM_V_CCAS)$(LIBTOOL) $(AM_V_lt) $(libisa_la_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CCAS) $(libisa_la_CCASFLAGS) $(CCASFLAGS) -c -o erasure-code/isa/isa-l/erasure_code/libisa_la-gf_5vect_dot_prod_avx2.asm.lo `test -f 'erasure-code/isa/isa-l/erasure_code/gf_5vect_dot_prod_avx2.asm.s' || echo '$(srcdir)/'`erasure-code/isa/isa-l/erasure_code/gf_5vect_dot_prod_avx2.asm.s
 
-erasure-code/isa/isa-l/erasure_code/libec_isa_la-gf_5vect_dot_prod_avx.asm.lo: erasure-code/isa/isa-l/erasure_code/gf_5vect_dot_prod_avx.asm.s
-	$(AM_V_CCAS)$(LIBTOOL) $(AM_V_lt) $(libec_isa_la_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CCAS) $(libec_isa_la_CCASFLAGS) $(CCASFLAGS) -c -o erasure-code/isa/isa-l/erasure_code/libec_isa_la-gf_5vect_dot_prod_avx.asm.lo `test -f 'erasure-code/isa/isa-l/erasure_code/gf_5vect_dot_prod_avx.asm.s' || echo '$(srcdir)/'`erasure-code/isa/isa-l/erasure_code/gf_5vect_dot_prod_avx.asm.s
+erasure-code/isa/isa-l/erasure_code/libisa_la-gf_5vect_dot_prod_avx.asm.lo: erasure-code/isa/isa-l/erasure_code/gf_5vect_dot_prod_avx.asm.s
+	$(AM_V_CCAS)$(LIBTOOL) $(AM_V_lt) $(libisa_la_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CCAS) $(libisa_la_CCASFLAGS) $(CCASFLAGS) -c -o erasure-code/isa/isa-l/erasure_code/libisa_la-gf_5vect_dot_prod_avx.asm.lo `test -f 'erasure-code/isa/isa-l/erasure_code/gf_5vect_dot_prod_avx.asm.s' || echo '$(srcdir)/'`erasure-code/isa/isa-l/erasure_code/gf_5vect_dot_prod_avx.asm.s
 
-erasure-code/isa/isa-l/erasure_code/libec_isa_la-gf_5vect_dot_prod_sse.asm.lo: erasure-code/isa/isa-l/erasure_code/gf_5vect_dot_prod_sse.asm.s
-	$(AM_V_CCAS)$(LIBTOOL) $(AM_V_lt) $(libec_isa_la_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CCAS) $(libec_isa_la_CCASFLAGS) $(CCASFLAGS) -c -o erasure-code/isa/isa-l/erasure_code/libec_isa_la-gf_5vect_dot_prod_sse.asm.lo `test -f 'erasure-code/isa/isa-l/erasure_code/gf_5vect_dot_prod_sse.asm.s' || echo '$(srcdir)/'`erasure-code/isa/isa-l/erasure_code/gf_5vect_dot_prod_sse.asm.s
+erasure-code/isa/isa-l/erasure_code/libisa_la-gf_5vect_dot_prod_sse.asm.lo: erasure-code/isa/isa-l/erasure_code/gf_5vect_dot_prod_sse.asm.s
+	$(AM_V_CCAS)$(LIBTOOL) $(AM_V_lt) $(libisa_la_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CCAS) $(libisa_la_CCASFLAGS) $(CCASFLAGS) -c -o erasure-code/isa/isa-l/erasure_code/libisa_la-gf_5vect_dot_prod_sse.asm.lo `test -f 'erasure-code/isa/isa-l/erasure_code/gf_5vect_dot_prod_sse.asm.s' || echo '$(srcdir)/'`erasure-code/isa/isa-l/erasure_code/gf_5vect_dot_prod_sse.asm.s
 
-erasure-code/isa/isa-l/erasure_code/libec_isa_la-gf_6vect_dot_prod_avx2.asm.lo: erasure-code/isa/isa-l/erasure_code/gf_6vect_dot_prod_avx2.asm.s
-	$(AM_V_CCAS)$(LIBTOOL) $(AM_V_lt) $(libec_isa_la_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CCAS) $(libec_isa_la_CCASFLAGS) $(CCASFLAGS) -c -o erasure-code/isa/isa-l/erasure_code/libec_isa_la-gf_6vect_dot_prod_avx2.asm.lo `test -f 'erasure-code/isa/isa-l/erasure_code/gf_6vect_dot_prod_avx2.asm.s' || echo '$(srcdir)/'`erasure-code/isa/isa-l/erasure_code/gf_6vect_dot_prod_avx2.asm.s
+erasure-code/isa/isa-l/erasure_code/libisa_la-gf_6vect_dot_prod_avx2.asm.lo: erasure-code/isa/isa-l/erasure_code/gf_6vect_dot_prod_avx2.asm.s
+	$(AM_V_CCAS)$(LIBTOOL) $(AM_V_lt) $(libisa_la_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CCAS) $(libisa_la_CCASFLAGS) $(CCASFLAGS) -c -o erasure-code/isa/isa-l/erasure_code/libisa_la-gf_6vect_dot_prod_avx2.asm.lo `test -f 'erasure-code/isa/isa-l/erasure_code/gf_6vect_dot_prod_avx2.asm.s' || echo '$(srcdir)/'`erasure-code/isa/isa-l/erasure_code/gf_6vect_dot_prod_avx2.asm.s
 
-erasure-code/isa/isa-l/erasure_code/libec_isa_la-gf_6vect_dot_prod_avx.asm.lo: erasure-code/isa/isa-l/erasure_code/gf_6vect_dot_prod_avx.asm.s
-	$(AM_V_CCAS)$(LIBTOOL) $(AM_V_lt) $(libec_isa_la_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CCAS) $(libec_isa_la_CCASFLAGS) $(CCASFLAGS) -c -o erasure-code/isa/isa-l/erasure_code/libec_isa_la-gf_6vect_dot_prod_avx.asm.lo `test -f 'erasure-code/isa/isa-l/erasure_code/gf_6vect_dot_prod_avx.asm.s' || echo '$(srcdir)/'`erasure-code/isa/isa-l/erasure_code/gf_6vect_dot_prod_avx.asm.s
+erasure-code/isa/isa-l/erasure_code/libisa_la-gf_6vect_dot_prod_avx.asm.lo: erasure-code/isa/isa-l/erasure_code/gf_6vect_dot_prod_avx.asm.s
+	$(AM_V_CCAS)$(LIBTOOL) $(AM_V_lt) $(libisa_la_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CCAS) $(libisa_la_CCASFLAGS) $(CCASFLAGS) -c -o erasure-code/isa/isa-l/erasure_code/libisa_la-gf_6vect_dot_prod_avx.asm.lo `test -f 'erasure-code/isa/isa-l/erasure_code/gf_6vect_dot_prod_avx.asm.s' || echo '$(srcdir)/'`erasure-code/isa/isa-l/erasure_code/gf_6vect_dot_prod_avx.asm.s
 
-erasure-code/isa/isa-l/erasure_code/libec_isa_la-gf_6vect_dot_prod_sse.asm.lo: erasure-code/isa/isa-l/erasure_code/gf_6vect_dot_prod_sse.asm.s
-	$(AM_V_CCAS)$(LIBTOOL) $(AM_V_lt) $(libec_isa_la_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CCAS) $(libec_isa_la_CCASFLAGS) $(CCASFLAGS) -c -o erasure-code/isa/isa-l/erasure_code/libec_isa_la-gf_6vect_dot_prod_sse.asm.lo `test -f 'erasure-code/isa/isa-l/erasure_code/gf_6vect_dot_prod_sse.asm.s' || echo '$(srcdir)/'`erasure-code/isa/isa-l/erasure_code/gf_6vect_dot_prod_sse.asm.s
+erasure-code/isa/isa-l/erasure_code/libisa_la-gf_6vect_dot_prod_sse.asm.lo: erasure-code/isa/isa-l/erasure_code/gf_6vect_dot_prod_sse.asm.s
+	$(AM_V_CCAS)$(LIBTOOL) $(AM_V_lt) $(libisa_la_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CCAS) $(libisa_la_CCASFLAGS) $(CCASFLAGS) -c -o erasure-code/isa/isa-l/erasure_code/libisa_la-gf_6vect_dot_prod_sse.asm.lo `test -f 'erasure-code/isa/isa-l/erasure_code/gf_6vect_dot_prod_sse.asm.s' || echo '$(srcdir)/'`erasure-code/isa/isa-l/erasure_code/gf_6vect_dot_prod_sse.asm.s
 
-erasure-code/isa/isa-l/erasure_code/libec_isa_la-gf_vect_dot_prod_avx2.asm.lo: erasure-code/isa/isa-l/erasure_code/gf_vect_dot_prod_avx2.asm.s
-	$(AM_V_CCAS)$(LIBTOOL) $(AM_V_lt) $(libec_isa_la_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CCAS) $(libec_isa_la_CCASFLAGS) $(CCASFLAGS) -c -o erasure-code/isa/isa-l/erasure_code/libec_isa_la-gf_vect_dot_prod_avx2.asm.lo `test -f 'erasure-code/isa/isa-l/erasure_code/gf_vect_dot_prod_avx2.asm.s' || echo '$(srcdir)/'`erasure-code/isa/isa-l/erasure_code/gf_vect_dot_prod_avx2.asm.s
+erasure-code/isa/isa-l/erasure_code/libisa_la-gf_vect_dot_prod_avx2.asm.lo: erasure-code/isa/isa-l/erasure_code/gf_vect_dot_prod_avx2.asm.s
+	$(AM_V_CCAS)$(LIBTOOL) $(AM_V_lt) $(libisa_la_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CCAS) $(libisa_la_CCASFLAGS) $(CCASFLAGS) -c -o erasure-code/isa/isa-l/erasure_code/libisa_la-gf_vect_dot_prod_avx2.asm.lo `test -f 'erasure-code/isa/isa-l/erasure_code/gf_vect_dot_prod_avx2.asm.s' || echo '$(srcdir)/'`erasure-code/isa/isa-l/erasure_code/gf_vect_dot_prod_avx2.asm.s
 
-erasure-code/isa/isa-l/erasure_code/libec_isa_la-gf_vect_dot_prod_avx.asm.lo: erasure-code/isa/isa-l/erasure_code/gf_vect_dot_prod_avx.asm.s
-	$(AM_V_CCAS)$(LIBTOOL) $(AM_V_lt) $(libec_isa_la_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CCAS) $(libec_isa_la_CCASFLAGS) $(CCASFLAGS) -c -o erasure-code/isa/isa-l/erasure_code/libec_isa_la-gf_vect_dot_prod_avx.asm.lo `test -f 'erasure-code/isa/isa-l/erasure_code/gf_vect_dot_prod_avx.asm.s' || echo '$(srcdir)/'`erasure-code/isa/isa-l/erasure_code/gf_vect_dot_prod_avx.asm.s
+erasure-code/isa/isa-l/erasure_code/libisa_la-gf_vect_dot_prod_avx.asm.lo: erasure-code/isa/isa-l/erasure_code/gf_vect_dot_prod_avx.asm.s
+	$(AM_V_CCAS)$(LIBTOOL) $(AM_V_lt) $(libisa_la_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CCAS) $(libisa_la_CCASFLAGS) $(CCASFLAGS) -c -o erasure-code/isa/isa-l/erasure_code/libisa_la-gf_vect_dot_prod_avx.asm.lo `test -f 'erasure-code/isa/isa-l/erasure_code/gf_vect_dot_prod_avx.asm.s' || echo '$(srcdir)/'`erasure-code/isa/isa-l/erasure_code/gf_vect_dot_prod_avx.asm.s
 
-erasure-code/isa/isa-l/erasure_code/libec_isa_la-gf_vect_dot_prod_sse.asm.lo: erasure-code/isa/isa-l/erasure_code/gf_vect_dot_prod_sse.asm.s
-	$(AM_V_CCAS)$(LIBTOOL) $(AM_V_lt) $(libec_isa_la_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CCAS) $(libec_isa_la_CCASFLAGS) $(CCASFLAGS) -c -o erasure-code/isa/isa-l/erasure_code/libec_isa_la-gf_vect_dot_prod_sse.asm.lo `test -f 'erasure-code/isa/isa-l/erasure_code/gf_vect_dot_prod_sse.asm.s' || echo '$(srcdir)/'`erasure-code/isa/isa-l/erasure_code/gf_vect_dot_prod_sse.asm.s
+erasure-code/isa/isa-l/erasure_code/libisa_la-gf_vect_dot_prod_sse.asm.lo: erasure-code/isa/isa-l/erasure_code/gf_vect_dot_prod_sse.asm.s
+	$(AM_V_CCAS)$(LIBTOOL) $(AM_V_lt) $(libisa_la_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CCAS) $(libisa_la_CCASFLAGS) $(CCASFLAGS) -c -o erasure-code/isa/isa-l/erasure_code/libisa_la-gf_vect_dot_prod_sse.asm.lo `test -f 'erasure-code/isa/isa-l/erasure_code/gf_vect_dot_prod_sse.asm.s' || echo '$(srcdir)/'`erasure-code/isa/isa-l/erasure_code/gf_vect_dot_prod_sse.asm.s
 
-erasure-code/isa/isa-l/erasure_code/libec_isa_la-gf_2vect_mad_avx2.asm.lo: erasure-code/isa/isa-l/erasure_code/gf_2vect_mad_avx2.asm.s
-	$(AM_V_CCAS)$(LIBTOOL) $(AM_V_lt) $(libec_isa_la_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CCAS) $(libec_isa_la_CCASFLAGS) $(CCASFLAGS) -c -o erasure-code/isa/isa-l/erasure_code/libec_isa_la-gf_2vect_mad_avx2.asm.lo `test -f 'erasure-code/isa/isa-l/erasure_code/gf_2vect_mad_avx2.asm.s' || echo '$(srcdir)/'`erasure-code/isa/isa-l/erasure_code/gf_2vect_mad_avx2.asm.s
+erasure-code/isa/isa-l/erasure_code/libisa_la-gf_2vect_mad_avx2.asm.lo: erasure-code/isa/isa-l/erasure_code/gf_2vect_mad_avx2.asm.s
+	$(AM_V_CCAS)$(LIBTOOL) $(AM_V_lt) $(libisa_la_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CCAS) $(libisa_la_CCASFLAGS) $(CCASFLAGS) -c -o erasure-code/isa/isa-l/erasure_code/libisa_la-gf_2vect_mad_avx2.asm.lo `test -f 'erasure-code/isa/isa-l/erasure_code/gf_2vect_mad_avx2.asm.s' || echo '$(srcdir)/'`erasure-code/isa/isa-l/erasure_code/gf_2vect_mad_avx2.asm.s
 
-erasure-code/isa/isa-l/erasure_code/libec_isa_la-gf_2vect_mad_avx.asm.lo: erasure-code/isa/isa-l/erasure_code/gf_2vect_mad_avx.asm.s
-	$(AM_V_CCAS)$(LIBTOOL) $(AM_V_lt) $(libec_isa_la_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CCAS) $(libec_isa_la_CCASFLAGS) $(CCASFLAGS) -c -o erasure-code/isa/isa-l/erasure_code/libec_isa_la-gf_2vect_mad_avx.asm.lo `test -f 'erasure-code/isa/isa-l/erasure_code/gf_2vect_mad_avx.asm.s' || echo '$(srcdir)/'`erasure-code/isa/isa-l/erasure_code/gf_2vect_mad_avx.asm.s
+erasure-code/isa/isa-l/erasure_code/libisa_la-gf_2vect_mad_avx.asm.lo: erasure-code/isa/isa-l/erasure_code/gf_2vect_mad_avx.asm.s
+	$(AM_V_CCAS)$(LIBTOOL) $(AM_V_lt) $(libisa_la_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CCAS) $(libisa_la_CCASFLAGS) $(CCASFLAGS) -c -o erasure-code/isa/isa-l/erasure_code/libisa_la-gf_2vect_mad_avx.asm.lo `test -f 'erasure-code/isa/isa-l/erasure_code/gf_2vect_mad_avx.asm.s' || echo '$(srcdir)/'`erasure-code/isa/isa-l/erasure_code/gf_2vect_mad_avx.asm.s
 
-erasure-code/isa/isa-l/erasure_code/libec_isa_la-gf_2vect_mad_sse.asm.lo: erasure-code/isa/isa-l/erasure_code/gf_2vect_mad_sse.asm.s
-	$(AM_V_CCAS)$(LIBTOOL) $(AM_V_lt) $(libec_isa_la_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CCAS) $(libec_isa_la_CCASFLAGS) $(CCASFLAGS) -c -o erasure-code/isa/isa-l/erasure_code/libec_isa_la-gf_2vect_mad_sse.asm.lo `test -f 'erasure-code/isa/isa-l/erasure_code/gf_2vect_mad_sse.asm.s' || echo '$(srcdir)/'`erasure-code/isa/isa-l/erasure_code/gf_2vect_mad_sse.asm.s
+erasure-code/isa/isa-l/erasure_code/libisa_la-gf_2vect_mad_sse.asm.lo: erasure-code/isa/isa-l/erasure_code/gf_2vect_mad_sse.asm.s
+	$(AM_V_CCAS)$(LIBTOOL) $(AM_V_lt) $(libisa_la_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CCAS) $(libisa_la_CCASFLAGS) $(CCASFLAGS) -c -o erasure-code/isa/isa-l/erasure_code/libisa_la-gf_2vect_mad_sse.asm.lo `test -f 'erasure-code/isa/isa-l/erasure_code/gf_2vect_mad_sse.asm.s' || echo '$(srcdir)/'`erasure-code/isa/isa-l/erasure_code/gf_2vect_mad_sse.asm.s
 
-erasure-code/isa/isa-l/erasure_code/libec_isa_la-gf_3vect_mad_avx2.asm.lo: erasure-code/isa/isa-l/erasure_code/gf_3vect_mad_avx2.asm.s
-	$(AM_V_CCAS)$(LIBTOOL) $(AM_V_lt) $(libec_isa_la_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CCAS) $(libec_isa_la_CCASFLAGS) $(CCASFLAGS) -c -o erasure-code/isa/isa-l/erasure_code/libec_isa_la-gf_3vect_mad_avx2.asm.lo `test -f 'erasure-code/isa/isa-l/erasure_code/gf_3vect_mad_avx2.asm.s' || echo '$(srcdir)/'`erasure-code/isa/isa-l/erasure_code/gf_3vect_mad_avx2.asm.s
+erasure-code/isa/isa-l/erasure_code/libisa_la-gf_3vect_mad_avx2.asm.lo: erasure-code/isa/isa-l/erasure_code/gf_3vect_mad_avx2.asm.s
+	$(AM_V_CCAS)$(LIBTOOL) $(AM_V_lt) $(libisa_la_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CCAS) $(libisa_la_CCASFLAGS) $(CCASFLAGS) -c -o erasure-code/isa/isa-l/erasure_code/libisa_la-gf_3vect_mad_avx2.asm.lo `test -f 'erasure-code/isa/isa-l/erasure_code/gf_3vect_mad_avx2.asm.s' || echo '$(srcdir)/'`erasure-code/isa/isa-l/erasure_code/gf_3vect_mad_avx2.asm.s
 
-erasure-code/isa/isa-l/erasure_code/libec_isa_la-gf_3vect_mad_avx.asm.lo: erasure-code/isa/isa-l/erasure_code/gf_3vect_mad_avx.asm.s
-	$(AM_V_CCAS)$(LIBTOOL) $(AM_V_lt) $(libec_isa_la_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CCAS) $(libec_isa_la_CCASFLAGS) $(CCASFLAGS) -c -o erasure-code/isa/isa-l/erasure_code/libec_isa_la-gf_3vect_mad_avx.asm.lo `test -f 'erasure-code/isa/isa-l/erasure_code/gf_3vect_mad_avx.asm.s' || echo '$(srcdir)/'`erasure-code/isa/isa-l/erasure_code/gf_3vect_mad_avx.asm.s
+erasure-code/isa/isa-l/erasure_code/libisa_la-gf_3vect_mad_avx.asm.lo: erasure-code/isa/isa-l/erasure_code/gf_3vect_mad_avx.asm.s
+	$(AM_V_CCAS)$(LIBTOOL) $(AM_V_lt) $(libisa_la_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CCAS) $(libisa_la_CCASFLAGS) $(CCASFLAGS) -c -o erasure-code/isa/isa-l/erasure_code/libisa_la-gf_3vect_mad_avx.asm.lo `test -f 'erasure-code/isa/isa-l/erasure_code/gf_3vect_mad_avx.asm.s' || echo '$(srcdir)/'`erasure-code/isa/isa-l/erasure_code/gf_3vect_mad_avx.asm.s
 
-erasure-code/isa/isa-l/erasure_code/libec_isa_la-gf_3vect_mad_sse.asm.lo: erasure-code/isa/isa-l/erasure_code/gf_3vect_mad_sse.asm.s
-	$(AM_V_CCAS)$(LIBTOOL) $(AM_V_lt) $(libec_isa_la_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CCAS) $(libec_isa_la_CCASFLAGS) $(CCASFLAGS) -c -o erasure-code/isa/isa-l/erasure_code/libec_isa_la-gf_3vect_mad_sse.asm.lo `test -f 'erasure-code/isa/isa-l/erasure_code/gf_3vect_mad_sse.asm.s' || echo '$(srcdir)/'`erasure-code/isa/isa-l/erasure_code/gf_3vect_mad_sse.asm.s
+erasure-code/isa/isa-l/erasure_code/libisa_la-gf_3vect_mad_sse.asm.lo: erasure-code/isa/isa-l/erasure_code/gf_3vect_mad_sse.asm.s
+	$(AM_V_CCAS)$(LIBTOOL) $(AM_V_lt) $(libisa_la_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CCAS) $(libisa_la_CCASFLAGS) $(CCASFLAGS) -c -o erasure-code/isa/isa-l/erasure_code/libisa_la-gf_3vect_mad_sse.asm.lo `test -f 'erasure-code/isa/isa-l/erasure_code/gf_3vect_mad_sse.asm.s' || echo '$(srcdir)/'`erasure-code/isa/isa-l/erasure_code/gf_3vect_mad_sse.asm.s
 
-erasure-code/isa/isa-l/erasure_code/libec_isa_la-gf_4vect_mad_avx2.asm.lo: erasure-code/isa/isa-l/erasure_code/gf_4vect_mad_avx2.asm.s
-	$(AM_V_CCAS)$(LIBTOOL) $(AM_V_lt) $(libec_isa_la_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CCAS) $(libec_isa_la_CCASFLAGS) $(CCASFLAGS) -c -o erasure-code/isa/isa-l/erasure_code/libec_isa_la-gf_4vect_mad_avx2.asm.lo `test -f 'erasure-code/isa/isa-l/erasure_code/gf_4vect_mad_avx2.asm.s' || echo '$(srcdir)/'`erasure-code/isa/isa-l/erasure_code/gf_4vect_mad_avx2.asm.s
+erasure-code/isa/isa-l/erasure_code/libisa_la-gf_4vect_mad_avx2.asm.lo: erasure-code/isa/isa-l/erasure_code/gf_4vect_mad_avx2.asm.s
+	$(AM_V_CCAS)$(LIBTOOL) $(AM_V_lt) $(libisa_la_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CCAS) $(libisa_la_CCASFLAGS) $(CCASFLAGS) -c -o erasure-code/isa/isa-l/erasure_code/libisa_la-gf_4vect_mad_avx2.asm.lo `test -f 'erasure-code/isa/isa-l/erasure_code/gf_4vect_mad_avx2.asm.s' || echo '$(srcdir)/'`erasure-code/isa/isa-l/erasure_code/gf_4vect_mad_avx2.asm.s
 
-erasure-code/isa/isa-l/erasure_code/libec_isa_la-gf_4vect_mad_avx.asm.lo: erasure-code/isa/isa-l/erasure_code/gf_4vect_mad_avx.asm.s
-	$(AM_V_CCAS)$(LIBTOOL) $(AM_V_lt) $(libec_isa_la_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CCAS) $(libec_isa_la_CCASFLAGS) $(CCASFLAGS) -c -o erasure-code/isa/isa-l/erasure_code/libec_isa_la-gf_4vect_mad_avx.asm.lo `test -f 'erasure-code/isa/isa-l/erasure_code/gf_4vect_mad_avx.asm.s' || echo '$(srcdir)/'`erasure-code/isa/isa-l/erasure_code/gf_4vect_mad_avx.asm.s
+erasure-code/isa/isa-l/erasure_code/libisa_la-gf_4vect_mad_avx.asm.lo: erasure-code/isa/isa-l/erasure_code/gf_4vect_mad_avx.asm.s
+	$(AM_V_CCAS)$(LIBTOOL) $(AM_V_lt) $(libisa_la_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CCAS) $(libisa_la_CCASFLAGS) $(CCASFLAGS) -c -o erasure-code/isa/isa-l/erasure_code/libisa_la-gf_4vect_mad_avx.asm.lo `test -f 'erasure-code/isa/isa-l/erasure_code/gf_4vect_mad_avx.asm.s' || echo '$(srcdir)/'`erasure-code/isa/isa-l/erasure_code/gf_4vect_mad_avx.asm.s
 
-erasure-code/isa/isa-l/erasure_code/libec_isa_la-gf_4vect_mad_sse.asm.lo: erasure-code/isa/isa-l/erasure_code/gf_4vect_mad_sse.asm.s
-	$(AM_V_CCAS)$(LIBTOOL) $(AM_V_lt) $(libec_isa_la_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CCAS) $(libec_isa_la_CCASFLAGS) $(CCASFLAGS) -c -o erasure-code/isa/isa-l/erasure_code/libec_isa_la-gf_4vect_mad_sse.asm.lo `test -f 'erasure-code/isa/isa-l/erasure_code/gf_4vect_mad_sse.asm.s' || echo '$(srcdir)/'`erasure-code/isa/isa-l/erasure_code/gf_4vect_mad_sse.asm.s
+erasure-code/isa/isa-l/erasure_code/libisa_la-gf_4vect_mad_sse.asm.lo: erasure-code/isa/isa-l/erasure_code/gf_4vect_mad_sse.asm.s
+	$(AM_V_CCAS)$(LIBTOOL) $(AM_V_lt) $(libisa_la_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CCAS) $(libisa_la_CCASFLAGS) $(CCASFLAGS) -c -o erasure-code/isa/isa-l/erasure_code/libisa_la-gf_4vect_mad_sse.asm.lo `test -f 'erasure-code/isa/isa-l/erasure_code/gf_4vect_mad_sse.asm.s' || echo '$(srcdir)/'`erasure-code/isa/isa-l/erasure_code/gf_4vect_mad_sse.asm.s
 
-erasure-code/isa/isa-l/erasure_code/libec_isa_la-gf_5vect_mad_avx2.asm.lo: erasure-code/isa/isa-l/erasure_code/gf_5vect_mad_avx2.asm.s
-	$(AM_V_CCAS)$(LIBTOOL) $(AM_V_lt) $(libec_isa_la_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CCAS) $(libec_isa_la_CCASFLAGS) $(CCASFLAGS) -c -o erasure-code/isa/isa-l/erasure_code/libec_isa_la-gf_5vect_mad_avx2.asm.lo `test -f 'erasure-code/isa/isa-l/erasure_code/gf_5vect_mad_avx2.asm.s' || echo '$(srcdir)/'`erasure-code/isa/isa-l/erasure_code/gf_5vect_mad_avx2.asm.s
+erasure-code/isa/isa-l/erasure_code/libisa_la-gf_5vect_mad_avx2.asm.lo: erasure-code/isa/isa-l/erasure_code/gf_5vect_mad_avx2.asm.s
+	$(AM_V_CCAS)$(LIBTOOL) $(AM_V_lt) $(libisa_la_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CCAS) $(libisa_la_CCASFLAGS) $(CCASFLAGS) -c -o erasure-code/isa/isa-l/erasure_code/libisa_la-gf_5vect_mad_avx2.asm.lo `test -f 'erasure-code/isa/isa-l/erasure_code/gf_5vect_mad_avx2.asm.s' || echo '$(srcdir)/'`erasure-code/isa/isa-l/erasure_code/gf_5vect_mad_avx2.asm.s
 
-erasure-code/isa/isa-l/erasure_code/libec_isa_la-gf_5vect_mad_avx.asm.lo: erasure-code/isa/isa-l/erasure_code/gf_5vect_mad_avx.asm.s
-	$(AM_V_CCAS)$(LIBTOOL) $(AM_V_lt) $(libec_isa_la_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CCAS) $(libec_isa_la_CCASFLAGS) $(CCASFLAGS) -c -o erasure-code/isa/isa-l/erasure_code/libec_isa_la-gf_5vect_mad_avx.asm.lo `test -f 'erasure-code/isa/isa-l/erasure_code/gf_5vect_mad_avx.asm.s' || echo '$(srcdir)/'`erasure-code/isa/isa-l/erasure_code/gf_5vect_mad_avx.asm.s
+erasure-code/isa/isa-l/erasure_code/libisa_la-gf_5vect_mad_avx.asm.lo: erasure-code/isa/isa-l/erasure_code/gf_5vect_mad_avx.asm.s
+	$(AM_V_CCAS)$(LIBTOOL) $(AM_V_lt) $(libisa_la_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CCAS) $(libisa_la_CCASFLAGS) $(CCASFLAGS) -c -o erasure-code/isa/isa-l/erasure_code/libisa_la-gf_5vect_mad_avx.asm.lo `test -f 'erasure-code/isa/isa-l/erasure_code/gf_5vect_mad_avx.asm.s' || echo '$(srcdir)/'`erasure-code/isa/isa-l/erasure_code/gf_5vect_mad_avx.asm.s
 
-erasure-code/isa/isa-l/erasure_code/libec_isa_la-gf_5vect_mad_sse.asm.lo: erasure-code/isa/isa-l/erasure_code/gf_5vect_mad_sse.asm.s
-	$(AM_V_CCAS)$(LIBTOOL) $(AM_V_lt) $(libec_isa_la_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CCAS) $(libec_isa_la_CCASFLAGS) $(CCASFLAGS) -c -o erasure-code/isa/isa-l/erasure_code/libec_isa_la-gf_5vect_mad_sse.asm.lo `test -f 'erasure-code/isa/isa-l/erasure_code/gf_5vect_mad_sse.asm.s' || echo '$(srcdir)/'`erasure-code/isa/isa-l/erasure_code/gf_5vect_mad_sse.asm.s
+erasure-code/isa/isa-l/erasure_code/libisa_la-gf_5vect_mad_sse.asm.lo: erasure-code/isa/isa-l/erasure_code/gf_5vect_mad_sse.asm.s
+	$(AM_V_CCAS)$(LIBTOOL) $(AM_V_lt) $(libisa_la_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CCAS) $(libisa_la_CCASFLAGS) $(CCASFLAGS) -c -o erasure-code/isa/isa-l/erasure_code/libisa_la-gf_5vect_mad_sse.asm.lo `test -f 'erasure-code/isa/isa-l/erasure_code/gf_5vect_mad_sse.asm.s' || echo '$(srcdir)/'`erasure-code/isa/isa-l/erasure_code/gf_5vect_mad_sse.asm.s
 
-erasure-code/isa/isa-l/erasure_code/libec_isa_la-gf_6vect_mad_avx2.asm.lo: erasure-code/isa/isa-l/erasure_code/gf_6vect_mad_avx2.asm.s
-	$(AM_V_CCAS)$(LIBTOOL) $(AM_V_lt) $(libec_isa_la_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CCAS) $(libec_isa_la_CCASFLAGS) $(CCASFLAGS) -c -o erasure-code/isa/isa-l/erasure_code/libec_isa_la-gf_6vect_mad_avx2.asm.lo `test -f 'erasure-code/isa/isa-l/erasure_code/gf_6vect_mad_avx2.asm.s' || echo '$(srcdir)/'`erasure-code/isa/isa-l/erasure_code/gf_6vect_mad_avx2.asm.s
+erasure-code/isa/isa-l/erasure_code/libisa_la-gf_6vect_mad_avx2.asm.lo: erasure-code/isa/isa-l/erasure_code/gf_6vect_mad_avx2.asm.s
+	$(AM_V_CCAS)$(LIBTOOL) $(AM_V_lt) $(libisa_la_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CCAS) $(libisa_la_CCASFLAGS) $(CCASFLAGS) -c -o erasure-code/isa/isa-l/erasure_code/libisa_la-gf_6vect_mad_avx2.asm.lo `test -f 'erasure-code/isa/isa-l/erasure_code/gf_6vect_mad_avx2.asm.s' || echo '$(srcdir)/'`erasure-code/isa/isa-l/erasure_code/gf_6vect_mad_avx2.asm.s
 
-erasure-code/isa/isa-l/erasure_code/libec_isa_la-gf_6vect_mad_avx.asm.lo: erasure-code/isa/isa-l/erasure_code/gf_6vect_mad_avx.asm.s
-	$(AM_V_CCAS)$(LIBTOOL) $(AM_V_lt) $(libec_isa_la_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CCAS) $(libec_isa_la_CCASFLAGS) $(CCASFLAGS) -c -o erasure-code/isa/isa-l/erasure_code/libec_isa_la-gf_6vect_mad_avx.asm.lo `test -f 'erasure-code/isa/isa-l/erasure_code/gf_6vect_mad_avx.asm.s' || echo '$(srcdir)/'`erasure-code/isa/isa-l/erasure_code/gf_6vect_mad_avx.asm.s
+erasure-code/isa/isa-l/erasure_code/libisa_la-gf_6vect_mad_avx.asm.lo: erasure-code/isa/isa-l/erasure_code/gf_6vect_mad_avx.asm.s
+	$(AM_V_CCAS)$(LIBTOOL) $(AM_V_lt) $(libisa_la_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CCAS) $(libisa_la_CCASFLAGS) $(CCASFLAGS) -c -o erasure-code/isa/isa-l/erasure_code/libisa_la-gf_6vect_mad_avx.asm.lo `test -f 'erasure-code/isa/isa-l/erasure_code/gf_6vect_mad_avx.asm.s' || echo '$(srcdir)/'`erasure-code/isa/isa-l/erasure_code/gf_6vect_mad_avx.asm.s
 
-erasure-code/isa/isa-l/erasure_code/libec_isa_la-gf_6vect_mad_sse.asm.lo: erasure-code/isa/isa-l/erasure_code/gf_6vect_mad_sse.asm.s
-	$(AM_V_CCAS)$(LIBTOOL) $(AM_V_lt) $(libec_isa_la_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CCAS) $(libec_isa_la_CCASFLAGS) $(CCASFLAGS) -c -o erasure-code/isa/isa-l/erasure_code/libec_isa_la-gf_6vect_mad_sse.asm.lo `test -f 'erasure-code/isa/isa-l/erasure_code/gf_6vect_mad_sse.asm.s' || echo '$(srcdir)/'`erasure-code/isa/isa-l/erasure_code/gf_6vect_mad_sse.asm.s
+erasure-code/isa/isa-l/erasure_code/libisa_la-gf_6vect_mad_sse.asm.lo: erasure-code/isa/isa-l/erasure_code/gf_6vect_mad_sse.asm.s
+	$(AM_V_CCAS)$(LIBTOOL) $(AM_V_lt) $(libisa_la_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CCAS) $(libisa_la_CCASFLAGS) $(CCASFLAGS) -c -o erasure-code/isa/isa-l/erasure_code/libisa_la-gf_6vect_mad_sse.asm.lo `test -f 'erasure-code/isa/isa-l/erasure_code/gf_6vect_mad_sse.asm.s' || echo '$(srcdir)/'`erasure-code/isa/isa-l/erasure_code/gf_6vect_mad_sse.asm.s
 
-erasure-code/isa/isa-l/erasure_code/libec_isa_la-gf_vect_mad_avx2.asm.lo: erasure-code/isa/isa-l/erasure_code/gf_vect_mad_avx2.asm.s
-	$(AM_V_CCAS)$(LIBTOOL) $(AM_V_lt) $(libec_isa_la_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CCAS) $(libec_isa_la_CCASFLAGS) $(CCASFLAGS) -c -o erasure-code/isa/isa-l/erasure_code/libec_isa_la-gf_vect_mad_avx2.asm.lo `test -f 'erasure-code/isa/isa-l/erasure_code/gf_vect_mad_avx2.asm.s' || echo '$(srcdir)/'`erasure-code/isa/isa-l/erasure_code/gf_vect_mad_avx2.asm.s
+erasure-code/isa/isa-l/erasure_code/libisa_la-gf_vect_mad_avx2.asm.lo: erasure-code/isa/isa-l/erasure_code/gf_vect_mad_avx2.asm.s
+	$(AM_V_CCAS)$(LIBTOOL) $(AM_V_lt) $(libisa_la_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CCAS) $(libisa_la_CCASFLAGS) $(CCASFLAGS) -c -o erasure-code/isa/isa-l/erasure_code/libisa_la-gf_vect_mad_avx2.asm.lo `test -f 'erasure-code/isa/isa-l/erasure_code/gf_vect_mad_avx2.asm.s' || echo '$(srcdir)/'`erasure-code/isa/isa-l/erasure_code/gf_vect_mad_avx2.asm.s
 
-erasure-code/isa/isa-l/erasure_code/libec_isa_la-gf_vect_mad_avx.asm.lo: erasure-code/isa/isa-l/erasure_code/gf_vect_mad_avx.asm.s
-	$(AM_V_CCAS)$(LIBTOOL) $(AM_V_lt) $(libec_isa_la_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CCAS) $(libec_isa_la_CCASFLAGS) $(CCASFLAGS) -c -o erasure-code/isa/isa-l/erasure_code/libec_isa_la-gf_vect_mad_avx.asm.lo `test -f 'erasure-code/isa/isa-l/erasure_code/gf_vect_mad_avx.asm.s' || echo '$(srcdir)/'`erasure-code/isa/isa-l/erasure_code/gf_vect_mad_avx.asm.s
+erasure-code/isa/isa-l/erasure_code/libisa_la-gf_vect_mad_avx.asm.lo: erasure-code/isa/isa-l/erasure_code/gf_vect_mad_avx.asm.s
+	$(AM_V_CCAS)$(LIBTOOL) $(AM_V_lt) $(libisa_la_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CCAS) $(libisa_la_CCASFLAGS) $(CCASFLAGS) -c -o erasure-code/isa/isa-l/erasure_code/libisa_la-gf_vect_mad_avx.asm.lo `test -f 'erasure-code/isa/isa-l/erasure_code/gf_vect_mad_avx.asm.s' || echo '$(srcdir)/'`erasure-code/isa/isa-l/erasure_code/gf_vect_mad_avx.asm.s
 
-erasure-code/isa/isa-l/erasure_code/libec_isa_la-gf_vect_mad_sse.asm.lo: erasure-code/isa/isa-l/erasure_code/gf_vect_mad_sse.asm.s
-	$(AM_V_CCAS)$(LIBTOOL) $(AM_V_lt) $(libec_isa_la_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CCAS) $(libec_isa_la_CCASFLAGS) $(CCASFLAGS) -c -o erasure-code/isa/isa-l/erasure_code/libec_isa_la-gf_vect_mad_sse.asm.lo `test -f 'erasure-code/isa/isa-l/erasure_code/gf_vect_mad_sse.asm.s' || echo '$(srcdir)/'`erasure-code/isa/isa-l/erasure_code/gf_vect_mad_sse.asm.s
+erasure-code/isa/isa-l/erasure_code/libisa_la-gf_vect_mad_sse.asm.lo: erasure-code/isa/isa-l/erasure_code/gf_vect_mad_sse.asm.s
+	$(AM_V_CCAS)$(LIBTOOL) $(AM_V_lt) $(libisa_la_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CCAS) $(libisa_la_CCASFLAGS) $(CCASFLAGS) -c -o erasure-code/isa/isa-l/erasure_code/libisa_la-gf_vect_mad_sse.asm.lo `test -f 'erasure-code/isa/isa-l/erasure_code/gf_vect_mad_sse.asm.s' || echo '$(srcdir)/'`erasure-code/isa/isa-l/erasure_code/gf_vect_mad_sse.asm.s
 
-erasure-code/isa/isa-l/erasure_code/libec_isa_la-gf_vect_mul_avx.asm.lo: erasure-code/isa/isa-l/erasure_code/gf_vect_mul_avx.asm.s
-	$(AM_V_CCAS)$(LIBTOOL) $(AM_V_lt) $(libec_isa_la_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CCAS) $(libec_isa_la_CCASFLAGS) $(CCASFLAGS) -c -o erasure-code/isa/isa-l/erasure_code/libec_isa_la-gf_vect_mul_avx.asm.lo `test -f 'erasure-code/isa/isa-l/erasure_code/gf_vect_mul_avx.asm.s' || echo '$(srcdir)/'`erasure-code/isa/isa-l/erasure_code/gf_vect_mul_avx.asm.s
+erasure-code/isa/isa-l/erasure_code/libisa_la-gf_vect_mul_avx.asm.lo: erasure-code/isa/isa-l/erasure_code/gf_vect_mul_avx.asm.s
+	$(AM_V_CCAS)$(LIBTOOL) $(AM_V_lt) $(libisa_la_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CCAS) $(libisa_la_CCASFLAGS) $(CCASFLAGS) -c -o erasure-code/isa/isa-l/erasure_code/libisa_la-gf_vect_mul_avx.asm.lo `test -f 'erasure-code/isa/isa-l/erasure_code/gf_vect_mul_avx.asm.s' || echo '$(srcdir)/'`erasure-code/isa/isa-l/erasure_code/gf_vect_mul_avx.asm.s
 
-erasure-code/isa/isa-l/erasure_code/libec_isa_la-gf_vect_mul_sse.asm.lo: erasure-code/isa/isa-l/erasure_code/gf_vect_mul_sse.asm.s
-	$(AM_V_CCAS)$(LIBTOOL) $(AM_V_lt) $(libec_isa_la_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CCAS) $(libec_isa_la_CCASFLAGS) $(CCASFLAGS) -c -o erasure-code/isa/isa-l/erasure_code/libec_isa_la-gf_vect_mul_sse.asm.lo `test -f 'erasure-code/isa/isa-l/erasure_code/gf_vect_mul_sse.asm.s' || echo '$(srcdir)/'`erasure-code/isa/isa-l/erasure_code/gf_vect_mul_sse.asm.s
+erasure-code/isa/isa-l/erasure_code/libisa_la-gf_vect_mul_sse.asm.lo: erasure-code/isa/isa-l/erasure_code/gf_vect_mul_sse.asm.s
+	$(AM_V_CCAS)$(LIBTOOL) $(AM_V_lt) $(libisa_la_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CCAS) $(libisa_la_CCASFLAGS) $(CCASFLAGS) -c -o erasure-code/isa/isa-l/erasure_code/libisa_la-gf_vect_mul_sse.asm.lo `test -f 'erasure-code/isa/isa-l/erasure_code/gf_vect_mul_sse.asm.s' || echo '$(srcdir)/'`erasure-code/isa/isa-l/erasure_code/gf_vect_mul_sse.asm.s
 
 mostlyclean-libtool:
 	-rm -f *.lo
@@ -27263,6 +28296,7 @@ clean-libtool:
 	-rm -rf common/.libs common/_libs
 	-rm -rf compressor/.libs compressor/_libs
 	-rm -rf compressor/snappy/.libs compressor/snappy/_libs
+	-rm -rf compressor/zlib/.libs compressor/zlib/_libs
 	-rm -rf crush/.libs crush/_libs
 	-rm -rf erasure-code/.libs erasure-code/_libs
 	-rm -rf erasure-code/isa/.libs erasure-code/isa/_libs
@@ -27305,7 +28339,9 @@ clean-libtool:
 	-rm -rf test/libradosstriper/.libs test/libradosstriper/_libs
 	-rm -rf test/librbd/.libs test/librbd/_libs
 	-rm -rf test/librbd/journal/.libs test/librbd/journal/_libs
+	-rm -rf test/rbd_mirror/.libs test/rbd_mirror/_libs
 	-rm -rf test/system/.libs test/system/_libs
+	-rm -rf tools/rbd_mirror/.libs tools/rbd_mirror/_libs
 	-rm -rf tracing/.libs tracing/_libs
 install-pythonPYTHON: $(python_PYTHON)
 	@$(NORMAL_INSTALL)
@@ -27841,6 +28877,20 @@ unittest_compression_plugin_snappy.log: unittest_compression_plugin_snappy$(EXEE
 	--log-file $$b.log --trs-file $$b.trs \
 	$(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \
 	"$$tst" $(AM_TESTS_FD_REDIRECT)
+unittest_compression_zlib.log: unittest_compression_zlib$(EXEEXT)
+	@p='unittest_compression_zlib$(EXEEXT)'; \
+	b='unittest_compression_zlib'; \
+	$(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \
+	--log-file $$b.log --trs-file $$b.trs \
+	$(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \
+	"$$tst" $(AM_TESTS_FD_REDIRECT)
+unittest_compression_plugin_zlib.log: unittest_compression_plugin_zlib$(EXEEXT)
+	@p='unittest_compression_plugin_zlib$(EXEEXT)'; \
+	b='unittest_compression_plugin_zlib'; \
+	$(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \
+	--log-file $$b.log --trs-file $$b.trs \
+	$(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \
+	"$$tst" $(AM_TESTS_FD_REDIRECT)
 unittest_librados.log: unittest_librados$(EXEEXT)
 	@p='unittest_librados$(EXEEXT)'; \
 	b='unittest_librados'; \
@@ -27918,6 +28968,13 @@ unittest_bluestore_types.log: unittest_bluestore_types$(EXEEXT)
 	--log-file $$b.log --trs-file $$b.trs \
 	$(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \
 	"$$tst" $(AM_TESTS_FD_REDIRECT)
+unittest_transaction.log: unittest_transaction$(EXEEXT)
+	@p='unittest_transaction$(EXEEXT)'; \
+	b='unittest_transaction'; \
+	$(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \
+	--log-file $$b.log --trs-file $$b.trs \
+	$(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \
+	"$$tst" $(AM_TESTS_FD_REDIRECT)
 unittest_mon_moncap.log: unittest_mon_moncap$(EXEEXT)
 	@p='unittest_mon_moncap$(EXEEXT)'; \
 	b='unittest_mon_moncap'; \
@@ -28044,6 +29101,13 @@ unittest_prioritized_queue.log: unittest_prioritized_queue$(EXEEXT)
 	--log-file $$b.log --trs-file $$b.trs \
 	$(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \
 	"$$tst" $(AM_TESTS_FD_REDIRECT)
+unittest_weighted_priority_queue.log: unittest_weighted_priority_queue$(EXEEXT)
+	@p='unittest_weighted_priority_queue$(EXEEXT)'; \
+	b='unittest_weighted_priority_queue'; \
+	$(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \
+	--log-file $$b.log --trs-file $$b.trs \
+	$(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \
+	"$$tst" $(AM_TESTS_FD_REDIRECT)
 unittest_str_map.log: unittest_str_map$(EXEEXT)
 	@p='unittest_str_map$(EXEEXT)'; \
 	b='unittest_str_map'; \
@@ -28051,6 +29115,20 @@ unittest_str_map.log: unittest_str_map$(EXEEXT)
 	--log-file $$b.log --trs-file $$b.trs \
 	$(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \
 	"$$tst" $(AM_TESTS_FD_REDIRECT)
+unittest_mutex_debug.log: unittest_mutex_debug$(EXEEXT)
+	@p='unittest_mutex_debug$(EXEEXT)'; \
+	b='unittest_mutex_debug'; \
+	$(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \
+	--log-file $$b.log --trs-file $$b.trs \
+	$(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \
+	"$$tst" $(AM_TESTS_FD_REDIRECT)
+unittest_shunique_lock.log: unittest_shunique_lock$(EXEEXT)
+	@p='unittest_shunique_lock$(EXEEXT)'; \
+	b='unittest_shunique_lock'; \
+	$(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \
+	--log-file $$b.log --trs-file $$b.trs \
+	$(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \
+	"$$tst" $(AM_TESTS_FD_REDIRECT)
 unittest_sharedptr_registry.log: unittest_sharedptr_registry$(EXEEXT)
 	@p='unittest_sharedptr_registry$(EXEEXT)'; \
 	b='unittest_sharedptr_registry'; \
@@ -28387,6 +29465,13 @@ unittest_bit_vector.log: unittest_bit_vector$(EXEEXT)
 	--log-file $$b.log --trs-file $$b.trs \
 	$(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \
 	"$$tst" $(AM_TESTS_FD_REDIRECT)
+unittest_interval_set.log: unittest_interval_set$(EXEEXT)
+	@p='unittest_interval_set$(EXEEXT)'; \
+	b='unittest_interval_set'; \
+	$(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \
+	--log-file $$b.log --trs-file $$b.trs \
+	$(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \
+	"$$tst" $(AM_TESTS_FD_REDIRECT)
 ceph-detect-init/run-tox.sh.log: ceph-detect-init/run-tox.sh
 	@p='ceph-detect-init/run-tox.sh'; \
 	b='ceph-detect-init/run-tox.sh'; \
@@ -28394,6 +29479,13 @@ ceph-detect-init/run-tox.sh.log: ceph-detect-init/run-tox.sh
 	--log-file $$b.log --trs-file $$b.trs \
 	$(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \
 	"$$tst" $(AM_TESTS_FD_REDIRECT)
+ceph-disk/run-tox.sh.log: ceph-disk/run-tox.sh
+	@p='ceph-disk/run-tox.sh'; \
+	b='ceph-disk/run-tox.sh'; \
+	$(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \
+	--log-file $$b.log --trs-file $$b.trs \
+	$(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \
+	"$$tst" $(AM_TESTS_FD_REDIRECT)
 test/erasure-code/test-erasure-code.sh.log: test/erasure-code/test-erasure-code.sh
 	@p='test/erasure-code/test-erasure-code.sh'; \
 	b='test/erasure-code/test-erasure-code.sh'; \
@@ -28499,6 +29591,13 @@ test/mon/mon-ping.sh.log: test/mon/mon-ping.sh
 	--log-file $$b.log --trs-file $$b.trs \
 	$(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \
 	"$$tst" $(AM_TESTS_FD_REDIRECT)
+test/mon/mon-created-time.sh.log: test/mon/mon-created-time.sh
+	@p='test/mon/mon-created-time.sh'; \
+	b='test/mon/mon-created-time.sh'; \
+	$(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \
+	--log-file $$b.log --trs-file $$b.trs \
+	$(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \
+	"$$tst" $(AM_TESTS_FD_REDIRECT)
 test/mon/osd-erasure-code-profile.sh.log: test/mon/osd-erasure-code-profile.sh
 	@p='test/mon/osd-erasure-code-profile.sh'; \
 	b='test/mon/osd-erasure-code-profile.sh'; \
@@ -28597,9 +29696,9 @@ test/test_objectstore_memstore.sh.log: test/test_objectstore_memstore.sh
 	--log-file $$b.log --trs-file $$b.trs \
 	$(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \
 	"$$tst" $(AM_TESTS_FD_REDIRECT)
-test/ceph-disk.sh.log: test/ceph-disk.sh
-	@p='test/ceph-disk.sh'; \
-	b='test/ceph-disk.sh'; \
+test/test_pidfile.sh.log: test/test_pidfile.sh
+	@p='test/test_pidfile.sh'; \
+	b='test/test_pidfile.sh'; \
 	$(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \
 	--log-file $$b.log --trs-file $$b.trs \
 	$(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \
@@ -28716,7 +29815,7 @@ install-binPROGRAMS: install-libLTLIBRARIES
 
 installdirs: installdirs-recursive
 installdirs-am:
-	for dir in "$(DESTDIR)$(compressorlibdir)" "$(DESTDIR)$(erasure_codelibdir)" "$(DESTDIR)$(libdir)" "$(DESTDIR)$(radoslibdir)" "$(DESTDIR)$(bindir)" "$(DESTDIR)$(sbindir)" "$(DESTDIR)$(su_sbindir)" "$(DESTDIR)$(bindir)" "$(DESTDIR)$(ceph_libexecdir)" "$(DESTDIR)$(ceph_monstore_update_crushdir)" "$(DESTDIR)$(ceph_sbindir)" "$(DESTDIR)$(bindir)" "$(DESTDIR)$(sbindir)" "$(DESTDIR)$(shell_commondir)" "$(DESTDIR)$(su_sbindir)" "$(DESTDIR)$(pythondir)" "$(DESTDIR)$(bash_completiondir)" "$(DEST [...]
+	for dir in "$(DESTDIR)$(compressorlibdir)" "$(DESTDIR)$(erasure_codelibdir)" "$(DESTDIR)$(libdir)" "$(DESTDIR)$(radoslibdir)" "$(DESTDIR)$(bindir)" "$(DESTDIR)$(sbindir)" "$(DESTDIR)$(su_sbindir)" "$(DESTDIR)$(bindir)" "$(DESTDIR)$(ceph_libexecdir)" "$(DESTDIR)$(ceph_monstore_update_crushdir)" "$(DESTDIR)$(ceph_sbindir)" "$(DESTDIR)$(bindir)" "$(DESTDIR)$(sbindir)" "$(DESTDIR)$(su_sbindir)" "$(DESTDIR)$(pythondir)" "$(DESTDIR)$(bash_completiondir)" "$(DESTDIR)$(docdir)" "$(DESTDIR)$(lib [...]
 	  test -z "$$dir" || $(MKDIR_P) "$$dir"; \
 	done
 install: $(BUILT_SOURCES)
@@ -28798,6 +29897,8 @@ distclean-generic:
 	-rm -f compressor/$(am__dirstamp)
 	-rm -f compressor/snappy/$(DEPDIR)/$(am__dirstamp)
 	-rm -f compressor/snappy/$(am__dirstamp)
+	-rm -f compressor/zlib/$(DEPDIR)/$(am__dirstamp)
+	-rm -f compressor/zlib/$(am__dirstamp)
 	-rm -f crush/$(DEPDIR)/$(am__dirstamp)
 	-rm -f crush/$(am__dirstamp)
 	-rm -f erasure-code/$(DEPDIR)/$(am__dirstamp)
@@ -28872,8 +29973,6 @@ distclean-generic:
 	-rm -f os/filestore/$(am__dirstamp)
 	-rm -f os/fs/$(DEPDIR)/$(am__dirstamp)
 	-rm -f os/fs/$(am__dirstamp)
-	-rm -f os/keyvaluestore/$(DEPDIR)/$(am__dirstamp)
-	-rm -f os/keyvaluestore/$(am__dirstamp)
 	-rm -f os/kstore/$(DEPDIR)/$(am__dirstamp)
 	-rm -f os/kstore/$(am__dirstamp)
 	-rm -f os/memstore/$(DEPDIR)/$(am__dirstamp)
@@ -28946,6 +30045,8 @@ distclean-generic:
 	-rm -f test/librbd/$(am__dirstamp)
 	-rm -f test/librbd/exclusive_lock/$(DEPDIR)/$(am__dirstamp)
 	-rm -f test/librbd/exclusive_lock/$(am__dirstamp)
+	-rm -f test/librbd/image/$(DEPDIR)/$(am__dirstamp)
+	-rm -f test/librbd/image/$(am__dirstamp)
 	-rm -f test/librbd/journal/$(DEPDIR)/$(am__dirstamp)
 	-rm -f test/librbd/journal/$(am__dirstamp)
 	-rm -f test/librbd/object_map/$(DEPDIR)/$(am__dirstamp)
@@ -28968,6 +30069,8 @@ distclean-generic:
 	-rm -f test/osd/$(am__dirstamp)
 	-rm -f test/osdc/$(DEPDIR)/$(am__dirstamp)
 	-rm -f test/osdc/$(am__dirstamp)
+	-rm -f test/rbd_mirror/$(DEPDIR)/$(am__dirstamp)
+	-rm -f test/rbd_mirror/$(am__dirstamp)
 	-rm -f test/rgw/$(DEPDIR)/$(am__dirstamp)
 	-rm -f test/rgw/$(am__dirstamp)
 	-rm -f test/system/$(DEPDIR)/$(am__dirstamp)
@@ -28982,6 +30085,8 @@ distclean-generic:
 	-rm -f tools/rbd/$(am__dirstamp)
 	-rm -f tools/rbd/action/$(DEPDIR)/$(am__dirstamp)
 	-rm -f tools/rbd/action/$(am__dirstamp)
+	-rm -f tools/rbd_mirror/$(DEPDIR)/$(am__dirstamp)
+	-rm -f tools/rbd_mirror/$(am__dirstamp)
 	-rm -f tools/rbd_nbd/$(DEPDIR)/$(am__dirstamp)
 	-rm -f tools/rbd_nbd/$(am__dirstamp)
 	-rm -f tracing/$(DEPDIR)/$(am__dirstamp)
@@ -29002,7 +30107,7 @@ clean-am: clean-binPROGRAMS clean-checkPROGRAMS \
 	clean-sbinPROGRAMS clean-su_sbinPROGRAMS mostlyclean-am
 
 distclean: distclean-recursive
-	-rm -rf ./$(DEPDIR) arch/$(DEPDIR) auth/$(DEPDIR) auth/cephx/$(DEPDIR) auth/none/$(DEPDIR) auth/unknown/$(DEPDIR) civetweb/src/$(DEPDIR) client/$(DEPDIR) cls/cephfs/$(DEPDIR) cls/hello/$(DEPDIR) cls/journal/$(DEPDIR) cls/lock/$(DEPDIR) cls/log/$(DEPDIR) cls/numops/$(DEPDIR) cls/rbd/$(DEPDIR) cls/refcount/$(DEPDIR) cls/replica_log/$(DEPDIR) cls/rgw/$(DEPDIR) cls/statelog/$(DEPDIR) cls/timeindex/$(DEPDIR) cls/user/$(DEPDIR) cls/version/$(DEPDIR) common/$(DEPDIR) compressor/$(DEPDIR) compr [...]
+	-rm -rf ./$(DEPDIR) arch/$(DEPDIR) auth/$(DEPDIR) auth/cephx/$(DEPDIR) auth/none/$(DEPDIR) auth/unknown/$(DEPDIR) civetweb/src/$(DEPDIR) client/$(DEPDIR) cls/cephfs/$(DEPDIR) cls/hello/$(DEPDIR) cls/journal/$(DEPDIR) cls/lock/$(DEPDIR) cls/log/$(DEPDIR) cls/numops/$(DEPDIR) cls/rbd/$(DEPDIR) cls/refcount/$(DEPDIR) cls/replica_log/$(DEPDIR) cls/rgw/$(DEPDIR) cls/statelog/$(DEPDIR) cls/timeindex/$(DEPDIR) cls/user/$(DEPDIR) cls/version/$(DEPDIR) common/$(DEPDIR) compressor/$(DEPDIR) compr [...]
 	-rm -f Makefile
 distclean-am: clean-am distclean-compile distclean-generic \
 	distclean-hdr distclean-tags
@@ -29027,8 +30132,7 @@ install-data-am: install-bash_completionDATA \
 	install-libcephfs_includeDATA install-librbd_includeDATA \
 	install-pythonPYTHON install-rados_includeDATA \
 	install-radoslibLTLIBRARIES install-radosstriper_includeDATA \
-	install-shell_commonSCRIPTS install-su_sbinPROGRAMS \
-	install-su_sbinSCRIPTS
+	install-su_sbinPROGRAMS install-su_sbinSCRIPTS
 
 install-dvi: install-dvi-recursive
 
@@ -29060,7 +30164,7 @@ install-ps-am:
 installcheck-am:
 
 maintainer-clean: maintainer-clean-recursive
-	-rm -rf ./$(DEPDIR) arch/$(DEPDIR) auth/$(DEPDIR) auth/cephx/$(DEPDIR) auth/none/$(DEPDIR) auth/unknown/$(DEPDIR) civetweb/src/$(DEPDIR) client/$(DEPDIR) cls/cephfs/$(DEPDIR) cls/hello/$(DEPDIR) cls/journal/$(DEPDIR) cls/lock/$(DEPDIR) cls/log/$(DEPDIR) cls/numops/$(DEPDIR) cls/rbd/$(DEPDIR) cls/refcount/$(DEPDIR) cls/replica_log/$(DEPDIR) cls/rgw/$(DEPDIR) cls/statelog/$(DEPDIR) cls/timeindex/$(DEPDIR) cls/user/$(DEPDIR) cls/version/$(DEPDIR) common/$(DEPDIR) compressor/$(DEPDIR) compr [...]
+	-rm -rf ./$(DEPDIR) arch/$(DEPDIR) auth/$(DEPDIR) auth/cephx/$(DEPDIR) auth/none/$(DEPDIR) auth/unknown/$(DEPDIR) civetweb/src/$(DEPDIR) client/$(DEPDIR) cls/cephfs/$(DEPDIR) cls/hello/$(DEPDIR) cls/journal/$(DEPDIR) cls/lock/$(DEPDIR) cls/log/$(DEPDIR) cls/numops/$(DEPDIR) cls/rbd/$(DEPDIR) cls/refcount/$(DEPDIR) cls/replica_log/$(DEPDIR) cls/rgw/$(DEPDIR) cls/statelog/$(DEPDIR) cls/timeindex/$(DEPDIR) cls/user/$(DEPDIR) cls/version/$(DEPDIR) common/$(DEPDIR) compressor/$(DEPDIR) compr [...]
 	-rm -f Makefile
 maintainer-clean-am: distclean-am maintainer-clean-generic
 
@@ -29087,8 +30191,8 @@ uninstall-am: uninstall-bash_completionDATA uninstall-binPROGRAMS \
 	uninstall-local uninstall-pythonPYTHON \
 	uninstall-rados_includeDATA uninstall-radoslibLTLIBRARIES \
 	uninstall-radosstriper_includeDATA uninstall-sbinPROGRAMS \
-	uninstall-sbinSCRIPTS uninstall-shell_commonSCRIPTS \
-	uninstall-su_sbinPROGRAMS uninstall-su_sbinSCRIPTS
+	uninstall-sbinSCRIPTS uninstall-su_sbinPROGRAMS \
+	uninstall-su_sbinSCRIPTS
 
 .MAKE: $(am__recursive_targets) all check check-am install install-am \
 	install-strip
@@ -29119,9 +30223,9 @@ uninstall-am: uninstall-bash_completionDATA uninstall-binPROGRAMS \
 	install-pdf-am install-ps install-ps-am install-pythonPYTHON \
 	install-rados_includeDATA install-radoslibLTLIBRARIES \
 	install-radosstriper_includeDATA install-sbinPROGRAMS \
-	install-sbinSCRIPTS install-shell_commonSCRIPTS install-strip \
-	install-su_sbinPROGRAMS install-su_sbinSCRIPTS installcheck \
-	installcheck-am installdirs installdirs-am maintainer-clean \
+	install-sbinSCRIPTS install-strip install-su_sbinPROGRAMS \
+	install-su_sbinSCRIPTS installcheck installcheck-am \
+	installdirs installdirs-am maintainer-clean \
 	maintainer-clean-generic mostlyclean mostlyclean-compile \
 	mostlyclean-generic mostlyclean-libtool pdf pdf-am ps ps-am \
 	recheck tags tags-am uninstall uninstall-am \
@@ -29135,8 +30239,8 @@ uninstall-am: uninstall-bash_completionDATA uninstall-binPROGRAMS \
 	uninstall-local uninstall-pythonPYTHON \
 	uninstall-rados_includeDATA uninstall-radoslibLTLIBRARIES \
 	uninstall-radosstriper_includeDATA uninstall-sbinPROGRAMS \
-	uninstall-sbinSCRIPTS uninstall-shell_commonSCRIPTS \
-	uninstall-su_sbinPROGRAMS uninstall-su_sbinSCRIPTS
+	uninstall-sbinSCRIPTS uninstall-su_sbinPROGRAMS \
+	uninstall-su_sbinSCRIPTS
 
 .PRECIOUS: Makefile
 
@@ -29158,11 +30262,13 @@ export PYTHONPATH=$(top_srcdir)/src/pybind
 
 @NO_GIT_VERSION_TRUE at export NO_VERSION="yes"
 
-ceph-detect-init-all:
-	cd $(srcdir)/ceph-detect-init ; python setup.py build
+ceph-detect-init-all: ceph-detect-init/virtualenv
+
+ceph-detect-init/virtualenv:
+	cd $(srcdir)/ceph-detect-init ; ../tools/setup-virtualenv.sh ; virtualenv/bin/python setup.py develop
 
 ceph-detect-init-clean:
-	cd $(srcdir)/ceph-detect-init ; python setup.py clean ; rm -fr wheelhouse .tox build .coverage *.egg-info
+	cd $(srcdir)/ceph-detect-init ; python setup.py clean ; rm -fr wheelhouse .tox virtualenv .coverage *.egg-info
 
 ceph-detect-init-install-data:
 	cd $(srcdir)/ceph-detect-init ; \
@@ -29176,6 +30282,26 @@ ceph-detect-init-install-data:
 	fi ; \
 	python setup.py install $$root $$options
 
+ceph-disk-all: ceph-disk/virtualenv
+
+ceph-disk/virtualenv:
+	cd $(srcdir)/ceph-disk ; ../tools/setup-virtualenv.sh ; virtualenv/bin/python setup.py develop
+
+ceph-disk-clean:
+	cd $(srcdir)/ceph-disk ; python setup.py clean ; rm -fr wheelhouse .tox virtualenv .coverage *.egg-info
+
+ceph-disk-install-data:
+	cd $(srcdir)/ceph-disk ; \
+	if test "$(DESTDIR)" ; then \
+		if lsb_release -si | grep --quiet 'Ubuntu\|Debian\|Devuan' ; then \
+			options=--install-layout=deb ; \
+		else \
+			options=--prefix=/usr ; \
+		fi ; \
+		root="--root=$(DESTDIR) --install-script=/usr/sbin" ; \
+	fi ; \
+	python setup.py install $$root $$options
+
 #crush_includedir = $(includedir)/crush
 #crush_include_DATA = \
 #	$(srcdir)/crush/hash.h \
@@ -29188,6 +30314,14 @@ ceph-detect-init-install-data:
 # PORTABLE=1 fixes the aarch64 build (-march=native doesn't work there)
 @ENABLE_SERVER_TRUE@@WITH_SLIBROCKSDB_TRUE at rocksdb/librocksdb.a:
 @ENABLE_SERVER_TRUE@@WITH_SLIBROCKSDB_TRUE@	cd rocksdb && EXTRA_CXXFLAGS=-fPIC PORTABLE=1 make -j$(shell nproc) static_lib
+ at ENABLE_SERVER_TRUE@@WITH_SPDK_TRUE@${SPDK_SRCDIR}/nvme/libspdk_nvme.a:
+ at ENABLE_SERVER_TRUE@@WITH_SPDK_TRUE@	$(MAKE) -C ${SPDK_SRCDIR}/nvme DPDK_INC=${LIBDPDK_CFLAGS}
+ at ENABLE_SERVER_TRUE@@WITH_SPDK_TRUE@${SPDK_SRCDIR}/memory/libspdk_memory.a:
+ at ENABLE_SERVER_TRUE@@WITH_SPDK_TRUE@	$(MAKE) -C ${SPDK_SRCDIR}/memory DPDK_INC=${LIBDPDK_CFLAGS}
+ at ENABLE_SERVER_TRUE@@WITH_SPDK_TRUE@${SPDK_SRCDIR}/util/libspdk_util.a:
+ at ENABLE_SERVER_TRUE@@WITH_SPDK_TRUE@	$(MAKE) -C ${SPDK_SRCDIR}/util DPDK_INC=${LIBDPDK_CFLAGS}
+ at ENABLE_SERVER_TRUE@@WITH_SPDK_TRUE@${SPDK_SRCDIR}/ioat/libspdk_ioat.a:
+ at ENABLE_SERVER_TRUE@@WITH_SPDK_TRUE@	$(MAKE) -C ${SPDK_SRCDIR}/ioat DPDK_INC=${LIBDPDK_CFLAGS}
 
 erasure-code/jerasure/ErasureCodePluginJerasure.cc: ./ceph_ver.h
 
@@ -29202,6 +30336,8 @@ erasure-code/shec/ErasureCodePluginSelectShec.cc: ./ceph_ver.h
 @WITH_BETTER_YASM_ELF64_TRUE at erasure-code/isa/ErasureCodePluginIsa.cc: ./ceph_ver.h
 erasure-code/ErasureCodePlugin.cc: ./ceph_ver.h
 
+compressor/zlib/CompressionPluginZlib.cc: ./ceph_ver.h
+
 compressor/snappy/CompressionPluginSnappy.cc: ./ceph_ver.h
 compressor/CompressionPlugin.cc: ./ceph_ver.h
 
diff --git a/src/acconfig.h.in b/src/acconfig.h.in
index 25af9b5..8a4dad2 100644
--- a/src/acconfig.h.in
+++ b/src/acconfig.h.in
@@ -113,6 +113,14 @@
 /* Defined if you don't have atomic_ops */
 #undef HAVE_LIBAIO
 
+/* Define to 1 if you have the `boost_iostreams' library (-lboost_iostreams).
+   */
+#undef HAVE_LIBBOOST_IOSTREAMS
+
+/* Define to 1 if you have the `boost_iostreams-mt' library
+   (-lboost_iostreams-mt). */
+#undef HAVE_LIBBOOST_IOSTREAMS_MT
+
 /* Define to 1 if you have the `boost_program_options' library
    (-lboost_program_options). */
 #undef HAVE_LIBBOOST_PROGRAM_OPTIONS
@@ -151,6 +159,9 @@
 /* Define to 1 if you have the `curl' library (-lcurl). */
 #undef HAVE_LIBCURL
 
+/* Define if you have fuse */
+#undef HAVE_LIBFUSE
+
 /* Define to 1 if you have the `ibverbs' library (-libverbs). */
 #undef HAVE_LIBIBVERBS
 
@@ -244,6 +255,9 @@
 /* Define to 1 if you have sched.h. */
 #undef HAVE_SCHED
 
+/* SPDK conditional compilation */
+#undef HAVE_SPDK
+
 /* Support SSE (Streaming SIMD Extensions) instructions */
 #undef HAVE_SSE
 
diff --git a/src/auth/AuthClientHandler.h b/src/auth/AuthClientHandler.h
index e12cbb9..9b34a6f 100644
--- a/src/auth/AuthClientHandler.h
+++ b/src/auth/AuthClientHandler.h
@@ -40,7 +40,7 @@ protected:
   RWLock lock;
 
 public:
-  AuthClientHandler(CephContext *cct_) 
+  explicit AuthClientHandler(CephContext *cct_)
     : cct(cct_), global_id(0), want(CEPH_ENTITY_TYPE_AUTH), have(0), need(0),
       lock("AuthClientHandler::lock") {}
   virtual ~AuthClientHandler() {}
diff --git a/src/auth/AuthServiceHandler.h b/src/auth/AuthServiceHandler.h
index 429a550..ccadbb0 100644
--- a/src/auth/AuthServiceHandler.h
+++ b/src/auth/AuthServiceHandler.h
@@ -29,7 +29,7 @@ public:
   EntityName entity_name;
   uint64_t global_id;
 
-  AuthServiceHandler(CephContext *cct_) : cct(cct_), global_id(0) {}
+  explicit AuthServiceHandler(CephContext *cct_) : cct(cct_), global_id(0) {}
 
   virtual ~AuthServiceHandler() { }
 
diff --git a/src/auth/AuthSessionHandler.h b/src/auth/AuthSessionHandler.h
index c560119..b1f30a4 100644
--- a/src/auth/AuthSessionHandler.h
+++ b/src/auth/AuthSessionHandler.h
@@ -44,7 +44,7 @@ public:
   int messages_encrypted;
   int messages_decrypted;
 
-  AuthSessionHandler(CephContext *cct_) : cct(cct_), protocol(CEPH_AUTH_UNKNOWN), messages_signed(0),
+  explicit AuthSessionHandler(CephContext *cct_) : cct(cct_), protocol(CEPH_AUTH_UNKNOWN), messages_signed(0),
     signatures_checked(0), signatures_matched(0), signatures_failed(0), messages_encrypted(0),
     messages_decrypted(0) {}
 
diff --git a/src/auth/Crypto.cc b/src/auth/Crypto.cc
index 000a5b4..61b2ac8 100644
--- a/src/auth/Crypto.cc
+++ b/src/auth/Crypto.cc
@@ -354,6 +354,7 @@ CryptoKeyHandler *CryptoAES::get_key_handler(const bufferptr& secret,
   ostringstream oss;
   if (ckh->init(secret, oss) < 0) {
     error = oss.str();
+    delete ckh;
     return NULL;
   }
   return ckh;
diff --git a/src/auth/cephx/CephxKeyServer.h b/src/auth/cephx/CephxKeyServer.h
index 250331d..7d95fad 100644
--- a/src/auth/cephx/CephxKeyServer.h
+++ b/src/auth/cephx/CephxKeyServer.h
@@ -35,7 +35,7 @@ struct KeyServerData {
   version_t rotating_ver;
   map<uint32_t, RotatingSecrets> rotating_secrets;
 
-  KeyServerData(KeyRing *extra)
+  explicit KeyServerData(KeyRing *extra)
     : version(0),
       extra_secrets(extra),
       rotating_ver(0) {}
diff --git a/src/auth/cephx/CephxProtocol.h b/src/auth/cephx/CephxProtocol.h
index f08f07d..4a5fb5d 100644
--- a/src/auth/cephx/CephxProtocol.h
+++ b/src/auth/cephx/CephxProtocol.h
@@ -277,7 +277,7 @@ private:
 public:
   uint64_t nonce;
 
-  CephXAuthorizer(CephContext *cct_)
+  explicit CephXAuthorizer(CephContext *cct_)
     : AuthAuthorizer(CEPH_AUTH_CEPHX), cct(cct_), nonce(0) {}
 
   bool build_authorizer();
@@ -320,7 +320,7 @@ struct CephXTicketManager {
   tickets_map_t tickets_map;
   uint64_t global_id;
 
-  CephXTicketManager(CephContext *cct_) : global_id(0), cct(cct_) {}
+  explicit CephXTicketManager(CephContext *cct_) : global_id(0), cct(cct_) {}
 
   bool verify_service_ticket_reply(CryptoKey& principal_secret,
 				 bufferlist::iterator& indata);
diff --git a/src/auth/none/AuthNoneServiceHandler.h b/src/auth/none/AuthNoneServiceHandler.h
index c2bbbc8..a45dbbf 100644
--- a/src/auth/none/AuthNoneServiceHandler.h
+++ b/src/auth/none/AuthNoneServiceHandler.h
@@ -22,7 +22,7 @@ class CephContext;
 
 class AuthNoneServiceHandler  : public AuthServiceHandler {
 public:
-  AuthNoneServiceHandler(CephContext *cct_) 
+  explicit AuthNoneServiceHandler(CephContext *cct_)
     : AuthServiceHandler(cct_) {}
   ~AuthNoneServiceHandler() {}
   
diff --git a/src/ceph-crush-location b/src/ceph-crush-location
index 1f33f38..d683a7d 100755
--- a/src/ceph-crush-location
+++ b/src/ceph-crush-location
@@ -20,10 +20,10 @@ if [ `dirname $0` = "." ] && [ $PWD != "/usr/bin" ]; then
     LIBDIR=.
     ETCDIR=.
 else
-    BINDIR=/usr/bin
-    SBINDIR=/usr/sbin
-    LIBDIR=/usr/lib/ceph
-    ETCDIR=/etc/ceph
+    BINDIR=/usr/local/bin
+    SBINDIR=/usr/local/sbin
+    LIBDIR=/usr/local/lib/ceph
+    ETCDIR=/usr/local/etc/ceph
 fi
 
 usage_exit() {
diff --git a/src/ceph-debugpack.in b/src/ceph-debugpack.in
index 9ac8f3b..fc61796 100644
--- a/src/ceph-debugpack.in
+++ b/src/ceph-debugpack.in
@@ -4,11 +4,11 @@
 # current directory too.
 if [ `dirname $0` = "." ] && [ $PWD != "/etc/init.d" ]; then
     BINDIR=.
-    LIBDIR=.
+    LIBEXECDIR=.
     ETCDIR=.
 else
     BINDIR=@bindir@
-    LIBDIR=@libdir@/ceph
+    LIBEXECDIR=@libexecdir@/ceph
     ETCDIR=@sysconfdir@/ceph
 fi
 
@@ -32,7 +32,7 @@ wait_pid_exit() {
 	fi
 }
 
-. $LIBDIR/ceph_common.sh
+. $LIBEXECDIR/ceph_common.sh
 
 dest_tar=''
 while [ $# -ge 1 ]; do
diff --git a/src/ceph-detect-init/Makefile.am b/src/ceph-detect-init/Makefile.am
index 932f755..45196a5 100644
--- a/src/ceph-detect-init/Makefile.am
+++ b/src/ceph-detect-init/Makefile.am
@@ -53,11 +53,13 @@ EXTRA_DIST += \
 	ceph-detect-init/tests/test_all.py \
 	ceph-detect-init/tox.ini
 
-ceph-detect-init-all:
-	cd $(srcdir)/ceph-detect-init ; python setup.py build
+ceph-detect-init-all: ceph-detect-init/virtualenv
+
+ceph-detect-init/virtualenv:
+	cd $(srcdir)/ceph-detect-init ; ../tools/setup-virtualenv.sh ; virtualenv/bin/python setup.py develop
 
 ceph-detect-init-clean:
-	cd $(srcdir)/ceph-detect-init ; python setup.py clean ; rm -fr wheelhouse .tox build .coverage *.egg-info
+	cd $(srcdir)/ceph-detect-init ; python setup.py clean ; rm -fr wheelhouse .tox virtualenv .coverage *.egg-info
 
 ceph-detect-init-install-data:
 	cd $(srcdir)/ceph-detect-init ; \
diff --git a/src/ceph-detect-init/run-tox.sh b/src/ceph-detect-init/run-tox.sh
index 206938e..6a8e073 100755
--- a/src/ceph-detect-init/run-tox.sh
+++ b/src/ceph-detect-init/run-tox.sh
@@ -1,7 +1,7 @@
 #!/bin/bash
 #
 # Copyright (C) 2015 SUSE LINUX GmbH
-# Copyright (C) 2015 <contact at redhat.com>
+# Copyright (C) 2016 <contact at redhat.com>
 #
 # Author: Owen Synge <osynge at suse.com>
 # Author: Loic Dachary <loic at dachary.org>
@@ -19,18 +19,8 @@
 
 # run from the ceph-detect-init directory or from its parent
 test -d ceph-detect-init && cd ceph-detect-init
-trap "rm -fr make-check" EXIT
-virtualenv make-check
-. make-check/bin/activate
-# older versions of pip will not install wrap_console scripts
-# when using wheel packages
-pip --log make-check/log.txt install --upgrade 'pip >= 6.1'
-if test -d wheelhouse ; then
-    export NO_INDEX=--no-index
-fi
-pip --log make-check/log.txt install $NO_INDEX --use-wheel --find-links=file://$(pwd)/wheelhouse --upgrade distribute
-pip --log make-check/log.txt install $NO_INDEX --use-wheel --find-links=file://$(pwd)/wheelhouse 'tox >=1.9' 
-tox > make-check/tox.out 2>&1 
+source virtualenv/bin/activate
+tox > virtualenv/tox.out 2>&1
 status=$?
-grep -v InterpreterNotFound < make-check/tox.out
+grep -v InterpreterNotFound < virtualenv/tox.out
 exit $status
diff --git a/src/ceph-disk/AUTHORS.rst b/src/ceph-disk/AUTHORS.rst
new file mode 100644
index 0000000..6c15eab
--- /dev/null
+++ b/src/ceph-disk/AUTHORS.rst
@@ -0,0 +1,28 @@
+- Loic Dachary <ldachary at redhat.com>
+- Sage Weil <sweil at redhat.com>
+- Danny Al-Gaaf <danny.al-gaaf at bisect.de>
+- Alfredo Deza <adeza at redhat.com>
+- Vicente Cheng <freeze.bilsted at gmail.com>
+- Dan van der Ster <daniel.vanderster at cern.ch>
+- David Disseldorp <ddiss at suse.de>
+- Owen Synge <osynge at suse.com>
+- Milan Broz <mbroz at redhat.com>
+- Gary Lowell <gary.lowell at inktank.com>
+- Andrew Bartlett <abartlet at catalyst.net.nz>
+- Dan Mick <dmick at redhat.com>
+- Alexandre Marangone <amarango at redhat.com>
+- Stuart Longland <stuartl at vrt.com.au>
+- Stephen F Taylor <steveftaylor at gmail.com>
+- Sébastien Han <shan at redhat.com>
+- Sandon Van Ness <svanness at redhat.com>
+- Mykola Golub <mgolub at mirantis.com>
+- Kefu Chai <kchai at redhat.com>
+- Josh Durgin <jdurgin at redhat.com>
+- Jonathan Davies <jonathan.davies at canonical.com>
+- John Spray <jspray at redhat.com>
+- Joe Julian <jjulian at io.com>
+- Ilja Slepnev <islepnev at gmail.com>
+- Greg Farnum <gfarnum at redhat.com>
+- git-harry <git-harry at live.co.uk>
+- Christos Stavrakakis <stavr.chris at gmail.com>
+- Blaine Gardner <blaine.gardner at hp.com>
diff --git a/src/ceph-disk/MANIFEST.in b/src/ceph-disk/MANIFEST.in
new file mode 100644
index 0000000..23abe0d
--- /dev/null
+++ b/src/ceph-disk/MANIFEST.in
@@ -0,0 +1 @@
+include AUTHORS.rst
diff --git a/src/ceph-disk/Makefile.am b/src/ceph-disk/Makefile.am
new file mode 100644
index 0000000..a0f68de
--- /dev/null
+++ b/src/ceph-disk/Makefile.am
@@ -0,0 +1,54 @@
+#
+# Copyright (C) 2015 <contact at redhat.com>
+#
+# Author: Loic Dachary <loic at dachary.org>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Library Public License as published by
+# the Free Software Foundation; either version 2, or (at your option)
+# any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Library Public License for more details.
+#
+check_SCRIPTS += ceph-disk/run-tox.sh
+
+EXTRA_DIST += \
+	ceph-disk/AUTHORS.rst \
+	ceph-disk/ceph_disk/__init__.py \
+	ceph-disk/ceph_disk/main.py \
+	ceph-disk/Makefile.am \
+	ceph-disk/MANIFEST.in \
+	ceph-disk/README.rst \
+	ceph-disk/requirements.txt \
+	ceph-disk/run-tox.sh \
+	ceph-disk/setup.py \
+	ceph-disk/test-requirements.txt \
+	ceph-disk/tests/test_main.py \
+	ceph-disk/tox.ini
+
+ceph-disk-all: ceph-disk/virtualenv
+
+ceph-disk/virtualenv:
+	cd $(srcdir)/ceph-disk ; ../tools/setup-virtualenv.sh ; virtualenv/bin/python setup.py develop
+
+ceph-disk-clean:
+	cd $(srcdir)/ceph-disk ; python setup.py clean ; rm -fr wheelhouse .tox virtualenv .coverage *.egg-info
+
+ceph-disk-install-data:
+	cd $(srcdir)/ceph-disk ; \
+	if test "$(DESTDIR)" ; then \
+		if lsb_release -si | grep --quiet 'Ubuntu\|Debian\|Devuan' ; then \
+			options=--install-layout=deb ; \
+		else \
+			options=--prefix=/usr ; \
+		fi ; \
+		root="--root=$(DESTDIR) --install-script=/usr/sbin" ; \
+	fi ; \
+	python setup.py install $$root $$options
+
+LOCAL_ALL += ceph-disk-all
+LOCAL_CLEAN += ceph-disk-clean
+LOCAL_INSTALLDATA += ceph-disk-install-data
diff --git a/src/ceph-disk/README.rst b/src/ceph-disk/README.rst
new file mode 100644
index 0000000..a5fae08
--- /dev/null
+++ b/src/ceph-disk/README.rst
@@ -0,0 +1,4 @@
+ceph-disk
+=========
+
+ceph-disk is a command line tool that manages Ceph OSD storage
diff --git a/src/ceph-disk/ceph_disk/__init__.py b/src/ceph-disk/ceph_disk/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/src/ceph-disk b/src/ceph-disk/ceph_disk/main.py
similarity index 63%
rename from src/ceph-disk
rename to src/ceph-disk/ceph_disk/main.py
index 4c9de91..d9277b1 100755
--- a/src/ceph-disk
+++ b/src/ceph-disk/ceph_disk/main.py
@@ -36,80 +36,124 @@ import shlex
 import pwd
 import grp
 
-"""
-Prepare:
- - create GPT partition
- - mark the partition with the ceph type uuid
- - create a file system
- - mark the fs as ready for ceph consumption
- - entire data disk is used (one big partition)
- - a new partition is added to the journal disk (so it can be easily shared)
-
- - triggered by administrator or ceph-deploy, e.g.  'ceph-disk <data disk> [journal disk]
-
-Activate:
- - if encrypted, map the dmcrypt volume
- - mount the volume in a temp location
- - allocate an osd id (if needed)
- - if deactived, no-op (to activate with --reactivate flag)
- - remount in the correct location /var/lib/ceph/osd/$cluster-$id
- - remove the deactive flag (with --reactivate flag)
- - start ceph-osd
-
- - triggered by udev when it sees the OSD gpt partition type
- - triggered by admin 'ceph-disk activate <path>'
- - triggered on ceph service startup with 'ceph-disk activate-all'
-
-Deactivate:
- - check partition type (support dmcrypt, mpath, normal)
- - stop ceph-osd service if needed (make osd out with option --mark-out)
- - remove 'ready', 'active', and INIT-specific files
- - create deactive flag
- - umount device and remove mount point
- - if the partition type is dmcrypt, remove the data dmcrypt map.
-
-Destroy:
- - check partition type (support dmcrypt, mpath, normal)
- - remove OSD from CRUSH map
- - remove OSD cephx key
- - deallocate OSD ID
- - if the partition type is dmcrypt, remove the journal dmcrypt map.
- - destroy data (with --zap option)
-
-We rely on /dev/disk/by-partuuid to find partitions by their UUID;
-this is what the journal symlink inside the osd data volume normally
-points to.
-
-activate-all relies on /dev/disk/by-parttype-uuid/$typeuuid.$uuid to
-find all partitions.  We install special udev rules to create these
-links.
-
-udev triggers 'ceph-disk activate <dev>' or 'ceph-disk
-activate-journal <dev>' based on the partition type.
-
-On old distros (e.g., RHEL6), the blkid installed does not recognize
-GPT partition metadata and the /dev/disk/by-partuuid etc. links aren't
-present.  We have a horrible hack in the form of ceph-disk-udev that
-parses gparted output to create the symlinks above and triggers the
-'ceph-disk activate' etc commands that udev normally would do if it
-knew the GPT partition type.
+CEPH_OSD_ONDISK_MAGIC = 'ceph osd volume v026'
 
-"""
+PTYPE = {
+    'regular': {
+        'journal': {
+            # identical because creating a journal is atomic
+            'ready': '45b0969e-9b03-4f30-b4c6-b4b80ceff106',
+            'tobe': '45b0969e-9b03-4f30-b4c6-b4b80ceff106',
+        },
+        'block': {
+            # identical because creating a block is atomic
+            'ready': 'cafecafe-9b03-4f30-b4c6-b4b80ceff106',
+            'tobe': 'cafecafe-9b03-4f30-b4c6-b4b80ceff106',
+        },
+        'osd': {
+            'ready': '4fbd7e29-9d25-41b8-afd0-062c0ceff05d',
+            'tobe': '89c57f98-2fe5-4dc0-89c1-f3ad0ceff2be',
+        },
+    },
+    'luks': {
+        'journal': {
+            'ready': '45b0969e-9b03-4f30-b4c6-35865ceff106',
+            'tobe': '89c57f98-2fe5-4dc0-89c1-35865ceff2be',
+        },
+        'block': {
+            'ready': 'cafecafe-9b03-4f30-b4c6-35865ceff106',
+            'tobe': '89c57f98-2fe5-4dc0-89c1-35865ceff2be',
+        },
+        'osd': {
+            'ready': '4fbd7e29-9d25-41b8-afd0-35865ceff05d',
+            'tobe': '89c57f98-2fe5-4dc0-89c1-5ec00ceff2be',
+        },
+    },
+    'plain': {
+        'journal': {
+            'ready': '45b0969e-9b03-4f30-b4c6-5ec00ceff106',
+            'tobe': '89c57f98-2fe5-4dc0-89c1-35865ceff2be',
+        },
+        'block': {
+            'ready': 'cafecafe-9b03-4f30-b4c6-5ec00ceff106',
+            'tobe': '89c57f98-2fe5-4dc0-89c1-35865ceff2be',
+        },
+        'osd': {
+            'ready': '4fbd7e29-9d25-41b8-afd0-5ec00ceff05d',
+            'tobe': '89c57f98-2fe5-4dc0-89c1-5ec00ceff2be',
+        },
+    },
+    'mpath': {
+        'journal': {
+            'ready': '45b0969e-8ae0-4982-bf9d-5a8d867af560',
+            'tobe': '45b0969e-8ae0-4982-bf9d-5a8d867af560',
+        },
+        'block': {
+            'ready': 'cafecafe-8ae0-4982-bf9d-5a8d867af560',
+            'tobe': 'cafecafe-8ae0-4982-bf9d-5a8d867af560',
+        },
+        'osd': {
+            'ready': '4fbd7e29-8ae0-4982-bf9d-5a8d867af560',
+            'tobe': '89c57f98-8ae0-4982-bf9d-5a8d867af560',
+        },
+    },
+}
+
+
+class Ptype(object):
+
+    @staticmethod
+    def get_ready_by_type(what):
+        return [x['ready'] for x in PTYPE[what].values()]
+
+    @staticmethod
+    def get_ready_by_name(name):
+        return [x[name]['ready'] for x in PTYPE.values()]
+
+    @staticmethod
+    def is_regular_space(ptype):
+        return Ptype.is_what_space('regular', ptype)
+
+    @staticmethod
+    def is_mpath_space(ptype):
+        return Ptype.is_what_space('mpath', ptype)
+
+    @staticmethod
+    def is_plain_space(ptype):
+        return Ptype.is_what_space('plain', ptype)
+
+    @staticmethod
+    def is_luks_space(ptype):
+        return Ptype.is_what_space('luks', ptype)
+
+    @staticmethod
+    def is_what_space(what, ptype):
+        for name in Space.NAMES:
+            if ptype == PTYPE[what][name]['ready']:
+                return True
+        return False
 
-CEPH_OSD_ONDISK_MAGIC = 'ceph osd volume v026'
+    @staticmethod
+    def space_ptype_to_name(ptype):
+        for what in PTYPE.values():
+            for name in Space.NAMES:
+                if ptype == what[name]['ready']:
+                    return name
+        raise ValueError('ptype ' + ptype + ' not found')
+
+    @staticmethod
+    def is_dmcrypt_space(ptype):
+        for name in Space.NAMES:
+            if Ptype.is_dmcrypt(ptype, name):
+                return True
+        return False
 
-JOURNAL_UUID =              '45b0969e-9b03-4f30-b4c6-b4b80ceff106'
-MPATH_JOURNAL_UUID =        '45b0969e-8ae0-4982-bf9d-5a8d867af560'
-DMCRYPT_JOURNAL_UUID =      '45b0969e-9b03-4f30-b4c6-5ec00ceff106'
-DMCRYPT_LUKS_JOURNAL_UUID = '45b0969e-9b03-4f30-b4c6-35865ceff106'
-OSD_UUID =                  '4fbd7e29-9d25-41b8-afd0-062c0ceff05d'
-MPATH_OSD_UUID =            '4fbd7e29-8ae0-4982-bf9d-5a8d867af560'
-DMCRYPT_OSD_UUID =          '4fbd7e29-9d25-41b8-afd0-5ec00ceff05d'
-DMCRYPT_LUKS_OSD_UUID =     '4fbd7e29-9d25-41b8-afd0-35865ceff05d'
-TOBE_UUID =                 '89c57f98-2fe5-4dc0-89c1-f3ad0ceff2be'
-MPATH_TOBE_UUID =           '89c57f98-8ae0-4982-bf9d-5a8d867af560'
-DMCRYPT_TOBE_UUID =         '89c57f98-2fe5-4dc0-89c1-5ec00ceff2be'
-DMCRYPT_JOURNAL_TOBE_UUID = '89c57f98-2fe5-4dc0-89c1-35865ceff2be'
+    @staticmethod
+    def is_dmcrypt(ptype, name):
+        for what in ('plain', 'luks'):
+            if ptype == PTYPE[what][name]['ready']:
+                return True
+        return False
 
 DEFAULT_FS_TYPE = 'xfs'
 SYSFS = '/sys'
@@ -117,10 +161,10 @@ SYSFS = '/sys'
 """
 OSD STATUS Definition
 """
-OSD_STATUS_OUT_DOWN = 	0
-OSD_STATUS_OUT_UP = 	1
-OSD_STATUS_IN_DOWN =	2
-OSD_STATUS_IN_UP =	3
+OSD_STATUS_OUT_DOWN = 0
+OSD_STATUS_OUT_UP = 1
+OSD_STATUS_IN_DOWN = 2
+OSD_STATUS_IN_UP = 3
 
 MOUNT_OPTIONS = dict(
     btrfs='noatime,user_subvol_rm_allowed',
@@ -130,7 +174,7 @@ MOUNT_OPTIONS = dict(
     # that user_xattr helped
     ext4='noatime,user_xattr',
     xfs='noatime,inode64',
-    )
+)
 
 MKFS_ARGS = dict(
     btrfs=[
@@ -139,15 +183,15 @@ MKFS_ARGS = dict(
         '-m', 'single',
         '-l', '32768',
         '-n', '32768',
-        ],
+    ],
     xfs=[
         # xfs insists on not overwriting previous fs; even if we wipe
         # partition table, we often recreate it exactly the same way,
         # so we'll see ghosts of filesystems past
         '-f',
         '-i', 'size=2048',
-        ],
-    )
+    ],
+)
 
 INIT_SYSTEMS = [
     'upstart',
@@ -155,12 +199,16 @@ INIT_SYSTEMS = [
     'systemd',
     'auto',
     'none',
-    ]
+]
 
 STATEDIR = '/var/lib/ceph'
 
 SYSCONFDIR = '/etc/ceph'
 
+prepare_lock = None
+activate_lock = None
+SUPPRESS_PREFIX = None
+
 # only warn once about some things
 warned_about = {}
 
@@ -176,8 +224,6 @@ if LOG_NAME == '__main__':
 LOG = logging.getLogger(LOG_NAME)
 
 
-###### lock ########
-
 class filelock(object):
     def __init__(self, fn):
         self.fn = fn
@@ -194,9 +240,6 @@ class filelock(object):
         self.fd = None
 
 
-###### exceptions ########
-
-
 class Error(Exception):
     """
     Error
@@ -258,8 +301,6 @@ class ExecutableNotFound(CephDiskException):
     pass
 
 
-####### utils
-
 def is_systemd():
     """
     Detect whether systemd is running
@@ -270,6 +311,7 @@ def is_systemd():
                 return True
     return False
 
+
 def is_upstart():
     """
     Detect whether upstart is running
@@ -279,6 +321,7 @@ def is_upstart():
         return True
     return False
 
+
 def maybe_mkdir(*a, **kw):
     """
     Creates a new directory if it doesn't exist, removes
@@ -317,7 +360,7 @@ def which(executable):
     for location in locations:
         executable_path = os.path.join(location, executable)
         if (os.path.isfile(executable_path) and
-            os.access(executable_path, os.X_OK)):
+                os.access(executable_path, os.X_OK)):
             return executable_path
 
 
@@ -389,7 +432,8 @@ def platform_distro():
 
 def platform_information():
     distro, release, codename = platform.linux_distribution()
-    if not codename and 'debian' in distro.lower():  # this could be an empty string in Debian
+    # this could be an empty string in Debian
+    if not codename and 'debian' in distro.lower():
         debian_codenames = {
             '8': 'jessie',
             '7': 'wheezy',
@@ -398,8 +442,8 @@ def platform_information():
         major_version = release.split('.')[0]
         codename = debian_codenames.get(major_version, '')
 
-        # In order to support newer jessie/sid or wheezy/sid strings we test this
-        # if sid is buried in the minor, we should use sid anyway.
+        # In order to support newer jessie/sid or wheezy/sid strings we test
+        # this if sid is buried in the minor, we should use sid anyway.
         if not codename and '/' in release:
             major, minor = release.split('/')
             if minor == 'sid':
@@ -440,12 +484,15 @@ def platform_information():
 #
 # Using the basename in this case fails.
 #
+
+
 def block_path(dev):
     path = os.path.realpath(dev)
     rdev = os.stat(path).st_rdev
     (M, m) = (os.major(rdev), os.minor(rdev))
     return "{sysfs}/dev/block/{M}:{m}".format(sysfs=SYSFS, M=M, m=m)
 
+
 def get_dm_uuid(dev):
     uuid_path = os.path.join(block_path(dev), 'dm', 'uuid')
     LOG.debug("get_dm_uuid " + dev + " uuid path is " + uuid_path)
@@ -455,6 +502,7 @@ def get_dm_uuid(dev):
     LOG.debug("get_dm_uuid " + dev + " uuid is " + uuid)
     return uuid
 
+
 def is_mpath(dev):
     """
     True if the path is managed by multipath
@@ -464,6 +512,7 @@ def is_mpath(dev):
             (re.match('part\d+-mpath-', uuid) or
              re.match('mpath-', uuid)))
 
+
 def get_dev_name(path):
     """
     get device name from path.  e.g.::
@@ -512,11 +561,11 @@ def get_dev_size(dev, size='megabytes'):
     :param dev: the device to calculate the size
     """
     fd = os.open(dev, os.O_RDONLY)
-    dividers = {'bytes': 1, 'megabytes': 1024*1024}
+    dividers = {'bytes': 1, 'megabytes': 1024 * 1024}
     try:
         device_size = os.lseek(fd, 0, os.SEEK_END)
-        divider = dividers.get(size, 1024*1024)  # default to megabytes
-        return device_size/divider
+        divider = dividers.get(size, 1024 * 1024)  # default to megabytes
+        return device_size / divider
     except Exception as error:
         LOG.warning('failed to get size of %s: %s' % (dev, str(error)))
     finally:
@@ -531,11 +580,13 @@ def get_partition_mpath(dev, pnum):
     else:
         return None
 
+
 def get_partition_dev(dev, pnum):
     """
     get the device name for a partition
 
-    assume that partitions are named like the base dev, with a number, and optionally
+    assume that partitions are named like the base dev,
+    with a number, and optionally
     some intervening characters (like 'p').  e.g.,
 
        sda 1 -> sda1
@@ -548,13 +599,15 @@ def get_partition_dev(dev, pnum):
         name = get_dev_name(os.path.realpath(dev))
         for f in os.listdir(os.path.join('/sys/block', name)):
             if f.startswith(name) and f.endswith(str(pnum)):
-                # we want the shortest name that starts with the base name and ends with the partition number
+                # we want the shortest name that starts with the base name
+                # and ends with the partition number
                 if not partname or len(f) < len(partname):
                     partname = f
     if partname:
         return get_dev_path(partname)
     else:
-        raise Error('partition %d for %s does not appear to exist' % (pnum, dev))
+        raise Error('partition %d for %s does not appear to exist' %
+                    (pnum, dev))
 
 
 def list_all_partitions():
@@ -570,6 +623,7 @@ def list_all_partitions():
         dev_part_list[name] = list_partitions(get_dev_path(name))
     return dev_part_list
 
+
 def list_partitions(dev):
     dev = os.path.realpath(dev)
     if is_mpath(dev):
@@ -577,6 +631,7 @@ def list_partitions(dev):
     else:
         return list_partitions_device(dev)
 
+
 def list_partitions_mpath(dev, part_re="part\d+-mpath-"):
     p = block_path(dev)
     partitions = []
@@ -601,6 +656,7 @@ def list_partitions_device(dev):
             partitions.append(name)
     return partitions
 
+
 def get_partition_base(dev):
     """
     Get the base device for a partition
@@ -619,14 +675,17 @@ def get_partition_base(dev):
             return get_dev_path(basename)
     raise Error('no parent device for partition', dev)
 
+
 def is_partition_mpath(dev):
     uuid = get_dm_uuid(dev)
     return bool(re.match('part\d+-mpath-', uuid))
 
+
 def partnum_mpath(dev):
     uuid = get_dm_uuid(dev)
     return re.findall('part(\d+)-mpath-', uuid)[0]
 
+
 def get_partition_base_mpath(dev):
     slave_path = os.path.join(block_path(dev), 'slaves')
     slaves = os.listdir(slave_path)
@@ -635,6 +694,7 @@ def get_partition_base_mpath(dev):
     name = open(name_path, 'r').read().strip()
     return os.path.join('/dev/mapper', name)
 
+
 def is_partition(dev):
     """
     Check whether a given device path is a partition or a full disk.
@@ -698,7 +758,8 @@ def is_held(dev):
     # partition?
     part = base
     while len(base):
-        directory = '/sys/block/{base}/{part}/holders'.format(part=part, base=base)
+        directory = '/sys/block/{base}/{part}/holders'.format(
+            part=part, base=base)
         if os.path.exists(directory):
             return os.listdir(directory)
         base = base[:-1]
@@ -717,7 +778,8 @@ def verify_not_in_use(dev, check_partitions=False):
         raise Error('Device is mounted', dev)
     holders = is_held(dev)
     if holders:
-        raise Error('Device %s is in use by a device-mapper mapping (dm-crypt?)' % dev, ','.join(holders))
+        raise Error('Device %s is in use by a device-mapper '
+                    'mapping (dm-crypt?)' % dev, ','.join(holders))
 
     if check_partitions and not is_partition(dev):
         for partname in list_partitions(dev):
@@ -726,7 +788,9 @@ def verify_not_in_use(dev, check_partitions=False):
                 raise Error('Device is mounted', partition)
             holders = is_held(partition)
             if holders:
-                raise Error('Device %s is in use by a device-mapper mapping (dm-crypt?)' % partition, ','.join(holders))
+                raise Error('Device %s is in use by a device-mapper '
+                            'mapping (dm-crypt?)'
+                            % partition, ','.join(holders))
 
 
 def must_be_one_line(line):
@@ -796,8 +860,8 @@ def init_get():
         args=[
             'ceph-detect-init',
             '--default', 'sysvinit',
-            ],
-        )
+        ],
+    )
     init = must_be_one_line(init)
     return init
 
@@ -829,7 +893,7 @@ def allocate_osd_id(
     cluster,
     fsid,
     keyring,
-    ):
+):
     """
     Accocates an OSD id on the given cluster.
 
@@ -847,8 +911,8 @@ def allocate_osd_id(
                 '--keyring', keyring,
                 'osd', 'create', '--concise',
                 fsid,
-                ],
-            )
+            ],
+        )
     except subprocess.CalledProcessError as e:
         raise Error('ceph osd create failed', e, e.output)
     osd_id = must_be_one_line(osd_id)
@@ -865,6 +929,7 @@ def get_osd_id(path):
         check_osd_id(osd_id)
     return osd_id
 
+
 def get_ceph_user():
     try:
         pwd.getpwnam('ceph')
@@ -873,24 +938,16 @@ def get_ceph_user():
     except KeyError:
         return 'root'
 
+
 def path_set_context(path):
     # restore selinux context to default policy values
     if which('restorecon'):
-        command(
-           [
-                'restorecon', '-R',
-                path,
-                ],
-           )
+        command(['restorecon', '-R', path])
 
     # if ceph user exists, set owner to ceph
     if get_ceph_user() == 'ceph':
-        command(
-           [
-                'chown', '-R', 'ceph:ceph',
-                path,
-                 ],
-           )
+        command(['chown', '-R', 'ceph:ceph', path])
+
 
 def _check_output(args=None, **kwargs):
     out, err, ret = command(args, **kwargs)
@@ -916,13 +973,13 @@ def get_conf(cluster, variable):
                 'ceph-conf',
                 '--cluster={cluster}'.format(
                     cluster=cluster,
-                    ),
+                ),
                 '--name=osd.',
                 '--lookup',
                 variable,
-                ],
+            ],
             close_fds=True,
-            )
+        )
     except OSError as e:
         raise Error('error executing ceph-conf', e, err)
     if ret == 1:
@@ -950,18 +1007,18 @@ def get_conf_with_default(cluster, variable):
                 'ceph-osd',
                 '--cluster={cluster}'.format(
                     cluster=cluster,
-                    ),
+                ),
                 '--show-config-value={variable}'.format(
                     variable=variable,
-                    ),
-                ],
+                ),
+            ],
             close_fds=True,
-            )
+        )
     except subprocess.CalledProcessError as e:
         raise Error(
             'getting variable from configuration failed',
             e,
-            )
+        )
 
     value = str(out).split('\n', 1)[0]
     return value
@@ -983,7 +1040,7 @@ def get_dmcrypt_key_path(
     _uuid,
     key_dir,
     luks
-    ):
+):
     """
     Get path to dmcrypt key file.
 
@@ -1002,7 +1059,7 @@ def get_or_create_dmcrypt_key(
     key_dir,
     key_size,
     luks
-    ):
+):
     """
     Get path to existing dmcrypt key or create a new key file.
 
@@ -1015,11 +1072,11 @@ def get_or_create_dmcrypt_key(
     # make a new key
     try:
         if not os.path.exists(key_dir):
-            os.makedirs(key_dir, stat.S_IRUSR|stat.S_IWUSR|stat.S_IXUSR)
+            os.makedirs(key_dir, stat.S_IRUSR | stat.S_IWUSR | stat.S_IXUSR)
         with file('/dev/urandom', 'rb') as i:
             key = i.read(key_size / 8)
-            fd = os.open(path, os.O_WRONLY|os.O_CREAT,
-                         stat.S_IRUSR|stat.S_IWUSR)
+            fd = os.open(path, os.O_WRONLY | os.O_CREAT,
+                         stat.S_IRUSR | stat.S_IWUSR)
             assert os.write(fd, key) == len(key)
             os.close(fd)
         return path
@@ -1034,7 +1091,7 @@ def _dmcrypt_map(
     cryptsetup_parameters,
     luks,
     format_dev=False,
-    ):
+):
     """
     Maps a device to a dmcrypt device.
 
@@ -1048,7 +1105,7 @@ def _dmcrypt_map(
         keypath,
         'luksFormat',
         rawdev,
-        ] + cryptsetup_parameters
+    ] + cryptsetup_parameters
 
     luksOpen_args = [
         'cryptsetup',
@@ -1057,7 +1114,7 @@ def _dmcrypt_map(
         'luksOpen',
         rawdev,
         _uuid,
-        ]
+    ]
 
     create_args = [
         'cryptsetup',
@@ -1066,15 +1123,16 @@ def _dmcrypt_map(
         'create',
         _uuid,
         rawdev,
-        ] + cryptsetup_parameters
+    ] + cryptsetup_parameters
 
     try:
         if luks:
             if format_dev:
-		    command_check_call(luksFormat_args)
+                command_check_call(luksFormat_args)
             command_check_call(luksOpen_args)
         else:
-            # Plain mode has no format function, nor any validation that the key is correct.
+            # Plain mode has no format function, nor any validation
+            # that the key is correct.
             command_check_call(create_args)
         # set proper ownership of mapped device
         command_check_call(['chown', 'ceph:ceph', dev])
@@ -1086,7 +1144,7 @@ def _dmcrypt_map(
 
 def dmcrypt_unmap(
     _uuid
-    ):
+):
     """
     Removes the dmcrypt device with the given UUID.
     """
@@ -1107,7 +1165,7 @@ def mount(
     dev,
     fstype,
     options,
-    ):
+):
     """
     Mounts a device with given filessystem type and
     mount options to a tempfile path under /var/lib/ceph/tmp.
@@ -1126,7 +1184,7 @@ def mount(
     path = tempfile.mkdtemp(
         prefix='mnt.',
         dir=STATEDIR + '/tmp',
-        )
+    )
     try:
         LOG.debug('Mounting %s on %s with options %s', dev, path, options)
         command_check_call(
@@ -1137,8 +1195,8 @@ def mount(
                 '--',
                 dev,
                 path,
-                ],
-            )
+            ],
+        )
         if which('restorecon'):
             command(
                 [
@@ -1158,7 +1216,7 @@ def mount(
 
 def unmount(
     path,
-    ):
+):
     """
     Unmount and removes the given mount point.
     """
@@ -1171,8 +1229,8 @@ def unmount(
                     '/bin/umount',
                     '--',
                     path,
-                    ],
-                )
+                ],
+            )
             break
         except subprocess.CalledProcessError as e:
             # on failure, retry 3 times with incremental backoff
@@ -1191,6 +1249,7 @@ def extract_parted_partition_numbers(partitions):
     numbers_as_strings = re.findall('^\d+', partitions, re.MULTILINE)
     return map(int, numbers_as_strings)
 
+
 def get_free_partition_index(dev):
     """
     Get the next free partition index on a given device.
@@ -1206,18 +1265,19 @@ def get_free_partition_index(dev):
                 '--',
                 dev,
                 'print',
-                ],
-            )
+            ],
+        )
     except subprocess.CalledProcessError as e:
-        LOG.info('cannot read partition index; assume it isn\'t present\n (Error: %s)' % e)
+        LOG.info('cannot read partition index; assume it '
+                 'isn\'t present\n (Error: %s)' % e)
         return 1
 
     if not lines:
         raise Error('parted failed to output anything')
     LOG.debug('get_free_partition_index: analyzing ' + lines)
     if ('CHS;' not in lines and
-        'CYL;' not in lines and
-        'BYT;' not in lines):
+            'CYL;' not in lines and
+            'BYT;' not in lines):
         raise Error('parted output expected to contain one of ' +
                     'CHH; CYL; or BYT; : ' + lines)
     if os.path.realpath(dev) not in lines:
@@ -1230,6 +1290,25 @@ def get_free_partition_index(dev):
         return 1
 
 
+def check_journal_reqs(args):
+    _, _, allows_journal = command([
+        'ceph-osd', '--check-allows-journal',
+        '-i', '0',
+        '--cluster', args.cluster,
+    ])
+    _, _, wants_journal = command([
+        'ceph-osd', '--check-wants-journal',
+        '-i', '0',
+        '--cluster', args.cluster,
+    ])
+    _, _, needs_journal = command([
+        'ceph-osd', '--check-needs-journal',
+        '-i', '0',
+        '--cluster', args.cluster,
+    ])
+    return (not allows_journal, not wants_journal, not needs_journal)
+
+
 def update_partition(dev, description):
     """
     Must be called after modifying a partition table so the kernel
@@ -1254,14 +1333,16 @@ def update_partition(dev, description):
         except subprocess.CalledProcessError as e:
             error = e.output
             if ('unable to inform the kernel' not in error and
-                'Device or resource busy' not in error):
+                    'Device or resource busy' not in error):
                 raise
-            LOG.debug('partprobe %s failed : %s (ignored, waiting 60s)' % (dev, error))
+            LOG.debug('partprobe %s failed : %s (ignored, waiting 60s)'
+                      % (dev, error))
             time.sleep(60)
     if not partprobe_ok:
         raise Error('partprobe %s failed : %s' % (dev, error))
     command_check_call(['udevadm', 'settle', '--timeout=600'])
 
+
 def zap(dev):
     """
     Destroy the partition table and content of a given disk.
@@ -1279,7 +1360,7 @@ def zap(dev):
         size = 33 * lba_size
         with file(dev, 'wb') as dev_file:
             dev_file.seek(-size, os.SEEK_END)
-            dev_file.write(size*'\0')
+            dev_file.write(size * '\0')
 
         command_check_call(
             [
@@ -1305,210 +1386,6 @@ def zap(dev):
         raise Error(e)
 
 
-def prepare_journal_dev(
-    data,
-    journal,
-    journal_size,
-    journal_uuid,
-    journal_dm_keypath,
-    cryptsetup_parameters,
-    luks
-   ):
-
-    reusing_partition = False
-
-    if is_partition(journal):
-        if journal_dm_keypath:
-            raise Error(journal + ' partition already exists'
-                        ' and --dmcrypt specified')
-        LOG.debug('Journal %s is a partition', journal)
-        LOG.warning('OSD will not be hot-swappable if journal is not the same device as the osd data')
-        if get_partition_type(journal) in (JOURNAL_UUID, MPATH_JOURNAL_UUID):
-            LOG.debug('Journal %s was previously prepared with ceph-disk. Reusing it.', journal)
-            reusing_partition = True
-            # Read and reuse the partition uuid from this journal's previous life.
-            # We reuse the uuid instead of changing it because udev does not reliably
-            # notice changes to an existing partition's GUID.
-            # See http://tracker.ceph.com/issues/10146
-            journal_uuid = get_partition_uuid(journal)
-            LOG.debug('Reusing journal with uuid %s', journal_uuid)
-        else:
-            LOG.warning('Journal %s was not prepared with ceph-disk. Symlinking directly.', journal)
-            return (journal, None, None)
-
-    journal_symlink = '/dev/disk/by-partuuid/{journal_uuid}'.format(
-        journal_uuid=journal_uuid,
-        )
-
-    journal_dmcrypt = None
-    if journal_dm_keypath:
-        journal_dmcrypt = journal_symlink
-        journal_symlink = '/dev/mapper/{uuid}'.format(uuid=journal_uuid)
-
-    if reusing_partition:
-        # confirm that the journal_symlink exists. It should since this was an active journal
-        # in the past. Continuing otherwise would be futile.
-        assert os.path.exists(journal_symlink)
-        return (journal_symlink, journal_dmcrypt, journal_uuid)
-
-    # From here on we are creating a new journal device, not reusing.
-
-    ptype = JOURNAL_UUID
-    ptype_tobe = JOURNAL_UUID
-    if is_mpath(journal):
-        ptype = MPATH_JOURNAL_UUID
-        ptype_tobe = MPATH_JOURNAL_UUID
-    if journal_dm_keypath:
-        if luks:
-            ptype = DMCRYPT_LUKS_JOURNAL_UUID
-        else:
-            ptype = DMCRYPT_JOURNAL_UUID
-        ptype_tobe = DMCRYPT_JOURNAL_TOBE_UUID
-
-    # it is a whole disk.  create a partition!
-    num = None
-    if journal == data:
-        # we're sharing the disk between osd data and journal;
-        # make journal be partition number 2, so it's pretty
-        num = 2
-        journal_part = '{num}:0:{size}M'.format(
-            num=num,
-            size=journal_size,
-            )
-    else:
-        # sgdisk has no way for me to say "whatever is the next
-        # free index number" when setting type guids etc, so we
-        # need to awkwardly look up the next free number, and then
-        # fix that in the call -- and hope nobody races with us;
-        # then again nothing guards the partition table from races
-        # anyway
-        num = get_free_partition_index(dev=journal)
-        journal_part = '{num}:0:+{size}M'.format(
-            num=num,
-            size=journal_size,
-            )
-        LOG.warning('OSD will not be hot-swappable if journal is not the same device as the osd data')
-
-    dev_size = get_dev_size(journal)
-
-    if journal_size > dev_size:
-        LOG.error('refusing to create journal on %s' % journal)
-        LOG.error('journal size (%sM) is bigger than device (%sM)' % (journal_size, dev_size))
-        raise Error(
-            '%s device size (%sM) is not big enough for journal' % (journal, dev_size)
-        )
-
-    try:
-        LOG.debug('Creating journal partition num %d size %d on %s', num, journal_size, journal)
-        command_check_call(
-            [
-                'sgdisk',
-                '--new={part}'.format(part=journal_part),
-                '--change-name={num}:ceph journal'.format(num=num),
-                '--partition-guid={num}:{journal_uuid}'.format(
-                    num=num,
-                    journal_uuid=journal_uuid,
-                    ),
-                '--typecode={num}:{uuid}'.format(
-                    num=num,
-                    uuid=ptype_tobe,
-                    ),
-                '--mbrtogpt',
-                '--',
-                journal,
-                ]
-            )
-
-        update_partition(journal, 'prepared')
-
-        LOG.debug('Journal is GPT partition %s', journal_symlink)
-
-        if journal_dm_keypath:
-            if luks:
-                luksFormat_args = [
-                    'cryptsetup',
-                    '--batch-mode',
-                    '--key-file',
-                    journal_dm_keypath,
-                    'luksFormat',
-                    journal_dmcrypt,
-                    ] + cryptsetup_parameters
-
-                try:
-                    command_check_call(luksFormat_args)
-                except subprocess.CalledProcessError as e:
-                    raise Error('unable to format device for LUKS', journal_symlink, e)
-
-            try:
-                command_check_call(
-                    [
-                        'sgdisk',
-                        '--typecode={num}:{uuid}'.format(
-                            num=num,
-                            uuid=ptype,
-                            ),
-                        '--',
-                        journal,
-                        ],
-                    )
-            except subprocess.CalledProcessError as e:
-                raise Error('unable to mark device as formatted for LUKS', journal_symlink, e)
-
-        LOG.debug('Journal is GPT partition %s', journal_symlink)
-        return (journal_symlink, journal_dmcrypt, journal_uuid)
-
-    except subprocess.CalledProcessError as e:
-        raise Error(e)
-
-def prepare_journal_file(
-    journal):
-
-    if not os.path.exists(journal):
-        LOG.debug('Creating journal file %s with size 0 (ceph-osd will resize and allocate)', journal)
-        with file(journal, 'wb') as journal_file:  # noqa
-            pass
-
-    LOG.debug('Journal is file %s', journal)
-    LOG.warning('OSD will not be hot-swappable if journal is not the same device as the osd data')
-    return (journal, None, None)
-
-
-def prepare_journal(
-    data,
-    journal,
-    journal_size,
-    journal_uuid,
-    force_file,
-    force_dev,
-    journal_dm_keypath,
-    cryptsetup_parameters,
-    luks
-    ):
-
-    if journal is None:
-        if force_dev:
-            raise Error('Journal is unspecified; not a block device')
-        return (None, None, None)
-
-    if not os.path.exists(journal):
-        if force_dev:
-            raise Error('Journal does not exist; not a block device', journal)
-        return prepare_journal_file(journal)
-
-    jmode = os.stat(journal).st_mode
-    if stat.S_ISREG(jmode):
-        if force_dev:
-            raise Error('Journal is not a block device', journal)
-        return prepare_journal_file(journal)
-
-    if stat.S_ISBLK(jmode):
-        if force_file:
-            raise Error('Journal is not a regular file', journal)
-        return prepare_journal_dev(data, journal, journal_size, journal_uuid, journal_dm_keypath, cryptsetup_parameters, luks)
-
-    raise Error('Journal %s is neither a block device nor regular file' % journal)
-
-
 def adjust_symlink(target, path):
     create = True
     if os.path.lexists(path):
@@ -1525,7 +1402,8 @@ def adjust_symlink(target, path):
                 else:
                     create = False
         except:
-            raise Error('unable to remove (or adjust) old file (symlink)', path)
+            raise Error('unable to remove (or adjust) old file (symlink)',
+                        path)
     if create:
         LOG.debug('Creating symlink %s -> %s', path, target)
         try:
@@ -1534,433 +1412,986 @@ def adjust_symlink(target, path):
             raise Error('unable to create symlink %s -> %s' % (path, target))
 
 
-def prepare_dir(
-    path,
-    journal,
-    cluster_uuid,
-    osd_uuid,
-    journal_uuid,
-    journal_dmcrypt=None,
-    ):
-
-    if os.path.exists(os.path.join(path, 'magic')):
-        LOG.debug('Data dir %s already exists', path)
-        return
-    else:
-        LOG.debug('Preparing osd data dir %s', path)
+class Device(object):
+
+    def __init__(self, path, args):
+        self.args = args
+        self.path = path
+        self.dev_size = None
+        self.partitions = {}
+        self.ptype_map = None
+        assert not is_partition(self.path)
+
+    def create_partition(self, uuid, name, size=0, num=0):
+        ptype = self.ptype_tobe_for_name(name)
+        if num == 0:
+            num = get_free_partition_index(dev=self.path)
+        if size > 0:
+            new = '--new={num}:0:+{size}M'.format(num=num, size=size)
+            if size > self.get_dev_size():
+                LOG.error('refusing to create %s on %s' % (name, self.path))
+                LOG.error('%s size (%sM) is bigger than device (%sM)'
+                          % (name, size, self.get_dev_size()))
+                raise Error('%s device size (%sM) is not big enough for %s'
+                            % (self.path, self.get_dev_size(), name))
+        else:
+            new = '--largest-new={num}'.format(num=num)
 
-    if osd_uuid is None:
-        osd_uuid = str(uuid.uuid4())
+        LOG.debug('Creating %s partition num %d size %d on %s',
+                  name, num, size, self.path)
+        command_check_call(
+            [
+                'sgdisk',
+                new,
+                '--change-name={num}:ceph {name}'.format(num=num, name=name),
+                '--partition-guid={num}:{uuid}'.format(num=num, uuid=uuid),
+                '--typecode={num}:{uuid}'.format(num=num, uuid=ptype),
+                '--mbrtogpt',
+                '--',
+                self.path,
+            ]
+        )
+        update_partition(self.path, 'created')
+        return num
+
+    def ptype_tobe_for_name(self, name):
+        if name == 'data':
+            name = 'osd'
+        if self.ptype_map is None:
+            partition = DevicePartition.factory(
+                path=self.path, dev=None, args=self.args)
+            self.ptype_map = partition.ptype_map
+        return self.ptype_map[name]['tobe']
+
+    def get_partition(self, num):
+        if num not in self.partitions:
+            dev = get_partition_dev(self.path, num)
+            partition = DevicePartition.factory(
+                path=self.path, dev=dev, args=self.args)
+            partition.set_partition_number(num)
+            self.partitions[num] = partition
+        return self.partitions[num]
+
+    def get_dev_size(self):
+        if self.dev_size is None:
+            self.dev_size = get_dev_size(self.path)
+        return self.dev_size
+
+    @staticmethod
+    def factory(path, args):
+        return Device(path, args)
+
+
+class DevicePartition(object):
+
+    def __init__(self, args):
+        self.args = args
+        self.num = None
+        self.rawdev = None
+        self.dev = None
+        self.uuid = None
+        self.ptype_map = None
+        self.ptype = None
+        self.set_variables_ptype()
+
+    def get_uuid(self):
+        if self.uuid is None:
+            self.uuid = get_partition_uuid(self.rawdev)
+        return self.uuid
+
+    def get_ptype(self):
+        if self.ptype is None:
+            self.ptype = get_partition_type(self.rawdev)
+        return self.ptype
+
+    def set_partition_number(self, num):
+        self.num = num
+
+    def get_partition_number(self):
+        return self.num
+
+    def set_dev(self, dev):
+        self.dev = dev
+        self.rawdev = dev
+
+    def get_dev(self):
+        return self.dev
+
+    def get_rawdev(self):
+        return self.rawdev
+
+    def set_variables_ptype(self):
+        self.ptype_map = PTYPE['regular']
+
+    def ptype_for_name(self, name):
+        return self.ptype_map[name]['ready']
+
+    @staticmethod
+    def factory(path, dev, args):
+        dmcrypt_type = CryptHelpers.get_dmcrypt_type(args)
+        if ((path is not None and is_mpath(path)) or
+                (dev is not None and is_mpath(dev))):
+            partition = DevicePartitionMultipath(args)
+        elif dmcrypt_type == 'luks':
+            partition = DevicePartitionCryptLuks(args)
+        elif dmcrypt_type == 'plain':
+            partition = DevicePartitionCryptPlain(args)
+        else:
+            partition = DevicePartition(args)
+        partition.set_dev(dev)
+        return partition
 
-    if journal is not None:
-        # we're using an external journal; point to it here
-        adjust_symlink(journal, os.path.join(path, 'journal'))
 
-    if journal_dmcrypt is not None:
-        adjust_symlink(journal_dmcrypt, os.path.join(path, 'journal_dmcrypt'))
-    else:
-        try:
-            os.unlink(os.path.join(path, 'journal_dmcrypt'))
-        except OSError:
-            pass
+class DevicePartitionMultipath(DevicePartition):
 
-    write_one_line(path, 'ceph_fsid', cluster_uuid)
-    write_one_line(path, 'fsid', osd_uuid)
+    def set_variables_ptype(self):
+        self.ptype_map = PTYPE['mpath']
 
-    if journal_uuid is not None:
-        # i.e., journal is a tagged partition
-        write_one_line(path, 'journal_uuid', journal_uuid)
 
-    write_one_line(path, 'magic', CEPH_OSD_ONDISK_MAGIC)
+class DevicePartitionCrypt(DevicePartition):
 
+    def __init__(self, args):
+        super(DevicePartitionCrypt, self).__init__(args)
+        self.osd_dm_keypath = None
+        self.cryptsetup_parameters = CryptHelpers.get_cryptsetup_parameters(
+            self.args)
+        self.dmcrypt_type = CryptHelpers.get_dmcrypt_type(self.args)
+        self.dmcrypt_keysize = CryptHelpers.get_dmcrypt_keysize(self.args)
 
-def prepare_dev(
-    data,
-    journal,
-    fstype,
-    mkfs_args,
-    mount_options,
-    cluster_uuid,
-    osd_uuid,
-    journal_uuid,
-    journal_dmcrypt,
-    osd_dm_keypath,
-    cryptsetup_parameters,
-    luks
-    ):
-    """
-    Prepare a data/journal combination to be used for an OSD.
+    def setup_crypt(self):
+        pass
 
-    The ``magic`` file is written last, so it's presence is a reliable
-    indicator of the whole sequence having completed.
+    def map(self):
+        self.setup_crypt()
+        self.dev = _dmcrypt_map(
+            rawdev=self.rawdev,
+            keypath=self.osd_dm_keypath,
+            _uuid=self.get_uuid(),
+            cryptsetup_parameters=self.cryptsetup_parameters,
+            luks=self.luks(),
+            format_dev=True,
+        )
 
-    WARNING: This will unconditionally overwrite anything given to
-    it.
-    """
+    def unmap(self):
+        self.setup_crypt()
+        dmcrypt_unmap(self.get_uuid())
+        self.dev = self.rawdev
 
-    ptype_tobe = TOBE_UUID
-    ptype_osd = OSD_UUID
-    if is_mpath(data):
-        ptype_tobe = MPATH_TOBE_UUID
-        ptype_osd = MPATH_OSD_UUID
+    def format(self):
+        self.setup_crypt()
+        self.map()
+        self.unmap()
 
-    if osd_dm_keypath:
-        ptype_tobe = DMCRYPT_TOBE_UUID
-        if luks:
-            ptype_osd = DMCRYPT_LUKS_OSD_UUID
-        else:
-            ptype_osd = DMCRYPT_OSD_UUID
 
-    rawdev = None
-    if is_partition(data):
-        LOG.debug('OSD data device %s is a partition', data)
-        rawdev = data
+class DevicePartitionCryptPlain(DevicePartitionCrypt):
 
-        ptype = get_partition_type(rawdev)
-        if ptype != ptype_osd:
-                LOG.warning('incorrect partition UUID: %s, expected %s'
-                            % (ptype, ptype_osd))
-    else:
-        LOG.debug('Creating osd partition on %s', data)
-        try:
-            command_check_call(
-                [
-                    'sgdisk',
-                    '--largest-new=1',
-                    '--change-name=1:ceph data',
-                    '--partition-guid=1:{osd_uuid}'.format(
-                        osd_uuid=osd_uuid,
-                        ),
-                    '--typecode=1:%s' % ptype_tobe,
-                    '--',
-                    data,
-                ],
-            )
-            update_partition(data, 'created')
-        except subprocess.CalledProcessError as e:
-            raise Error(e)
+    def luks(self):
+        return False
 
-        rawdev = get_partition_dev(data, 1)
+    def setup_crypt(self):
+        if self.osd_dm_keypath is not None:
+            return
 
-    dev = None
-    if osd_dm_keypath:
-        dev = _dmcrypt_map(
-                rawdev=rawdev,
-                keypath=osd_dm_keypath,
-                _uuid=osd_uuid,
-                cryptsetup_parameters=cryptsetup_parameters,
-                luks=luks,
-                format_dev=True,
-                )
-    else:
-        dev = rawdev
+        self.cryptsetup_parameters += ['--key-size', str(self.dmcrypt_keysize)]
 
-    try:
-        args = [
-            'mkfs',
-            '-t',
-            fstype,
-            ]
-        if mkfs_args is not None:
-            args.extend(mkfs_args.split())
-            if fstype == 'xfs':
-                args.extend(['-f'])  # always force
-        else:
-            args.extend(MKFS_ARGS.get(fstype, []))
-        args.extend([
-            '--',
-            dev,
-            ])
-        try:
-            LOG.debug('Creating %s fs on %s', fstype, dev)
-            command_check_call(args)
-        except subprocess.CalledProcessError as e:
-            raise Error(e)
+        self.osd_dm_keypath = get_or_create_dmcrypt_key(
+            self.get_uuid(), self.args.dmcrypt_key_dir,
+            self.dmcrypt_keysize, False)
 
-        #remove whitespaces from mount_options
-        if mount_options is not None:
-            mount_options = "".join(mount_options.split())
+    def set_variables_ptype(self):
+        self.ptype_map = PTYPE['plain']
 
-        path = mount(dev=dev, fstype=fstype, options=mount_options)
 
-        try:
-            prepare_dir(
-                path=path,
-                journal=journal,
-                cluster_uuid=cluster_uuid,
-                osd_uuid=osd_uuid,
-                journal_uuid=journal_uuid,
-                journal_dmcrypt=journal_dmcrypt,
-                )
-        finally:
-            path_set_context(path)
-            unmount(path)
-    finally:
-        if rawdev != dev:
-            dmcrypt_unmap(osd_uuid)
+class DevicePartitionCryptLuks(DevicePartitionCrypt):
 
-    if not is_partition(data):
-        try:
-            command_check_call(
-                [
-                    'sgdisk',
-                    '--typecode=1:%s' % ptype_osd,
-                    '--',
-                    data,
-                ],
-            )
-        except subprocess.CalledProcessError as e:
-            raise Error(e)
-        update_partition(data, 'prepared')
-        command_check_call(['udevadm', 'trigger',
-                            '--action=add',
-                            '--sysname-match',
-                            os.path.basename(rawdev)])
+    def luks(self):
+        return True
 
-def check_journal_reqs(args):
-    _, _, allows_journal = command([
-        'ceph-osd', '--check-allows-journal',
-        '-i', '0',
-        '--cluster', args.cluster,
-    ])
-    _, _, wants_journal = command([
-        'ceph-osd', '--check-wants-journal',
-        '-i', '0',
-        '--cluster', args.cluster,
-    ])
-    _, _, needs_journal = command([
-        'ceph-osd', '--check-needs-journal',
-        '-i', '0',
-        '--cluster', args.cluster,
-    ])
-    return (not allows_journal, not wants_journal, not needs_journal)
+    def setup_crypt(self):
+        if self.osd_dm_keypath is not None:
+            return
 
-def main_prepare(args):
-    journal_dm_keypath = None
-    osd_dm_keypath = None
+        if self.dmcrypt_keysize == 1024:
+            # We don't force this into the cryptsetup_parameters,
+            # as we want the cryptsetup defaults
+            # to prevail for the actual LUKS key lengths.
+            pass
+        else:
+            self.cryptsetup_parameters += ['--key-size',
+                                           str(self.dmcrypt_keysize)]
 
-    try:
-        # first learn what the osd allows/wants/needs
-        (allows_journal, wants_journal, needs_journal) = check_journal_reqs(args)
-
-        prepare_lock.acquire()  # noqa
-        if not os.path.exists(args.data):
-            if args.data_dev:
-                raise Error('data path for device does not exist', args.data)
-            if args.data_dir:
-                raise Error('data path for directory does not exist', args.data)
-            raise Error('data path does not exist', args.data)
-
-        # in use?
-        dmode = os.stat(args.data).st_mode
-        if stat.S_ISBLK(dmode):
-            verify_not_in_use(args.data, True)
+        self.osd_dm_keypath = get_or_create_dmcrypt_key(
+            self.get_uuid(), self.args.dmcrypt_key_dir,
+            self.dmcrypt_keysize, True)
 
-        if args.journal and not allows_journal:
-            raise Error('journal specified but not allowed by osd backend')
+    def set_variables_ptype(self):
+        self.ptype_map = PTYPE['luks']
 
-        if args.journal and os.path.exists(args.journal):
-            jmode = os.stat(args.journal).st_mode
-            if stat.S_ISBLK(jmode):
-                verify_not_in_use(args.journal, False)
 
-        if args.zap_disk is not None:
-            zap(args.data)
+class Prepare(object):
 
-        if args.cluster_uuid is None:
-            args.cluster_uuid = get_fsid(cluster=args.cluster)
-            if args.cluster_uuid is None:
-                raise Error(
-                    'must have fsid in config or pass --cluster-uuid=',
-                    )
+    @staticmethod
+    def parser():
+        parser = argparse.ArgumentParser(add_help=False)
+        parser.add_argument(
+            '--cluster',
+            metavar='NAME',
+            default='ceph',
+            help='cluster name to assign this disk to',
+        )
+        parser.add_argument(
+            '--cluster-uuid',
+            metavar='UUID',
+            help='cluster uuid to assign this disk to',
+        )
+        parser.add_argument(
+            '--osd-uuid',
+            metavar='UUID',
+            help='unique OSD uuid to assign this disk to',
+        )
+        parser.add_argument(
+            '--dmcrypt',
+            action='store_true', default=None,
+            help='encrypt DATA and/or JOURNAL devices with dm-crypt',
+        )
+        parser.add_argument(
+            '--dmcrypt-key-dir',
+            metavar='KEYDIR',
+            default='/etc/ceph/dmcrypt-keys',
+            help='directory where dm-crypt keys are stored',
+        )
+        return parser
 
-        if args.fs_type is None:
-            args.fs_type = get_conf(
-                cluster=args.cluster,
-                variable='osd_mkfs_type',
-                )
-            if args.fs_type is None:
-                args.fs_type = get_conf(
-                    cluster=args.cluster,
-                    variable='osd_fs_type',
-                    )
-            if args.fs_type is None:
-                args.fs_type = DEFAULT_FS_TYPE
+    @staticmethod
+    def set_subparser(subparsers):
+        parents = [
+            Prepare.parser(),
+            PrepareData.parser(),
+        ]
+        parents.extend(PrepareFilestore.parent_parsers())
+        parents.extend(PrepareBluestore.parent_parsers())
+        parser = subparsers.add_parser(
+            'prepare',
+            parents=parents,
+            help='Prepare a directory or disk for a Ceph OSD',
+        )
+        parser.set_defaults(
+            func=Prepare.main,
+        )
+        return parser
 
-        mkfs_args = get_conf(
-            cluster=args.cluster,
-            variable='osd_mkfs_options_{fstype}'.format(
-                fstype=args.fs_type,
-                ),
-            )
-        if mkfs_args is None:
-            mkfs_args = get_conf(
-                cluster=args.cluster,
-                variable='osd_fs_mkfs_options_{fstype}'.format(
-                    fstype=args.fs_type,
-                    ),
-                )
+    def prepare(self):
+        prepare_lock.acquire()
+        self.prepare_locked()
+        prepare_lock.release()
 
-        mount_options = get_conf(
-            cluster=args.cluster,
-            variable='osd_mount_options_{fstype}'.format(
-                fstype=args.fs_type,
-                ),
-            )
-        if mount_options is None:
-            mount_options = get_conf(
-                cluster=args.cluster,
-                variable='osd_fs_mount_options_{fstype}'.format(
-                    fstype=args.fs_type,
+    @staticmethod
+    def factory(args):
+        if args.bluestore:
+            return PrepareBluestore(args)
+        else:
+            return PrepareFilestore(args)
+
+    @staticmethod
+    def main(args):
+        Prepare.factory(args).prepare()
+
+
+class PrepareFilestore(Prepare):
+
+    def __init__(self, args):
+        self.data = PrepareFilestoreData(args)
+        self.journal = PrepareJournal(args)
+
+    @staticmethod
+    def parent_parsers():
+        return [
+            PrepareJournal.parser(),
+        ]
+
+    def prepare_locked(self):
+        self.data.prepare(self.journal)
+
+
+class PrepareBluestore(Prepare):
+
+    def __init__(self, args):
+        self.data = PrepareBluestoreData(args)
+        self.block = PrepareBluestoreBlock(args)
+
+    @staticmethod
+    def parser():
+        parser = argparse.ArgumentParser(add_help=False)
+        parser.add_argument(
+            '--bluestore',
+            action='store_true', default=None,
+            help='bluestore objectstore',
+        )
+        return parser
+
+    @staticmethod
+    def parent_parsers():
+        return [
+            PrepareBluestore.parser(),
+            PrepareBluestoreBlock.parser(),
+        ]
+
+    def prepare_locked(self):
+        self.data.prepare(self.block)
+
+
+class Space(object):
+
+    NAMES = ('block', 'journal')
+
+
+class PrepareSpace(object):
+
+    NONE = 0
+    FILE = 1
+    DEVICE = 2
+
+    def __init__(self, args):
+        self.args = args
+        self.set_type()
+        self.space_size = self.get_space_size()
+        if (getattr(self.args, self.name) and
+                getattr(self.args, self.name + '_uuid') is None):
+            setattr(self.args, self.name + '_uuid', str(uuid.uuid4()))
+        self.space_symlink = None
+        self.space_dmcrypt = None
+
+    def set_type(self):
+        name = self.name
+        args = self.args
+        dmode = os.stat(args.data).st_mode
+        if (self.wants_space() and
+                stat.S_ISBLK(dmode) and
+                not is_partition(args.data) and
+                getattr(args, name) is None and
+                getattr(args, name + '_file') is None):
+            LOG.info('Will colocate %s with data on %s',
+                     name, args.data)
+            setattr(args, name, args.data)
+
+        if getattr(args, name) is None:
+            if getattr(args, name + '_dev'):
+                raise Error('%s is unspecified; not a block device' %
+                            name.capitalize(), getattr(args, name))
+            self.type = self.NONE
+            return
+
+        if not os.path.exists(getattr(args, name)):
+            if getattr(args, name + '_dev'):
+                raise Error('%s does not exist; not a block device' %
+                            name.capitalize(), getattr(args, name))
+            self.type = self.FILE
+            return
+
+        mode = os.stat(getattr(args, name)).st_mode
+        if stat.S_ISBLK(mode):
+            if getattr(args, name + '_file'):
+                raise Error('%s is not a regular file' % name.capitalize,
+                            geattr(args, name))
+            self.type = self.DEVICE
+            return
+
+        if stat.S_ISREG(mode):
+            if getattr(args, name + '_dev'):
+                raise Error('%s is not a block device' % name.capitalize,
+                            geattr(args, name))
+            self.type = self.FILE
+
+        raise Error('%s %s is neither a block device nor regular file' %
+                    (name.capitalize, geattr(args, name)))
+
+    def is_none(self):
+        return self.type == self.NONE
+
+    def is_file(self):
+        return self.type == self.FILE
+
+    def is_device(self):
+        return self.type == self.DEVICE
+
+    @staticmethod
+    def parser(name):
+        parser = argparse.ArgumentParser(add_help=False)
+        parser.add_argument(
+            '--%s-uuid' % name,
+            metavar='UUID',
+            help='unique uuid to assign to the %s' % name,
+        )
+        parser.add_argument(
+            '--%s-file' % name,
+            action='store_true', default=None,
+            help='verify that %s is a file' % name.upper(),
+        )
+        parser.add_argument(
+            '--%s-dev' % name,
+            action='store_true', default=None,
+            help='verify that %s is a block device' % name.upper(),
+        )
+        parser.add_argument(
+            name,
+            metavar=name.upper(),
+            nargs='?',
+            help=('path to OSD %s disk block device;' % name,
+                  ' leave out to store %s in file' % name),
+        )
+        return parser
+
+    def wants_space(self):
+        return True
+
+    def populate_data_path(self, path):
+        if self.type == self.DEVICE:
+            self.populate_data_path_device(path)
+        elif self.type == self.FILE:
+            self.populate_data_path_file(path)
+        elif self.type == self.NONE:
+            pass
+        else:
+            raise Error('unexpected type ', self.type)
+
+    def populate_data_path_file(self, path):
+        space_uuid = self.name + '_uuid'
+        if getattr(self.args, space_uuid) is not None:
+            write_one_line(path, space_uuid,
+                           getattr(self.args, space_uuid))
+
+    def populate_data_path_device(self, path):
+        self.populate_data_path_file(path)
+        if self.space_symlink is not None:
+            adjust_symlink(self.space_symlink,
+                           os.path.join(path, self.name))
+
+        if self.space_dmcrypt is not None:
+            adjust_symlink(self.space_dmcrypt,
+                           os.path.join(path, self.name + '_dmcrypt'))
+        else:
+            try:
+                os.unlink(os.path.join(path, self.name + '_dmcrypt'))
+            except OSError:
+                pass
+
+    def prepare(self):
+        if self.type == self.DEVICE:
+            self.prepare_device()
+        elif self.type == self.FILE:
+            self.prepare_file()
+        elif self.type == self.NONE:
+            pass
+        else:
+            raise Error('unexpected type ', self.type)
+
+    def prepare_file(self):
+        if not os.path.exists(getattr(self.args, self.name)):
+            LOG.debug('Creating %s file %s with size 0'
+                      ' (ceph-osd will resize and allocate)',
+                      self.name,
+                      getattr(self.args, self.name))
+            with file(getattr(self.args, self.name), 'wb') as space_file:
+                pass
+
+        LOG.debug('%s is file %s',
+                  self.name.capitalize(),
+                  getattr(self.args, self.name))
+        LOG.warning('OSD will not be hot-swappable if %s is '
+                    'not the same device as the osd data' %
+                    self.name)
+        self.space_symlink = space_file
+
+    def prepare_device(self):
+        reusing_partition = False
+
+        if is_partition(getattr(self.args, self.name)):
+            LOG.debug('%s %s is a partition',
+                      self.name.capitalize(), getattr(self.args, self.name))
+            partition = DevicePartition.factory(
+                path=None, dev=getattr(self.args, self.name), args=self.args)
+            if isinstance(partition, DevicePartitionCrypt):
+                raise Error(getattr(self.args, self.name) +
+                            ' partition already exists'
+                            ' and --dmcrypt specified')
+            LOG.warning('OSD will not be hot-swappable' +
+                        ' if ' + self.name + ' is not' +
+                        ' the same device as the osd data')
+            if partition.get_ptype() == partition.ptype_for_name(self.name):
+                LOG.debug('%s %s was previously prepared with '
+                          'ceph-disk. Reusing it.',
+                          self.name.capitalize(),
+                          getattr(self.args, self.name))
+                reusing_partition = True
+                # Read and reuse the partition uuid from this journal's
+                # previous life. We reuse the uuid instead of changing it
+                # because udev does not reliably notice changes to an
+                # existing partition's GUID.  See
+                # http://tracker.ceph.com/issues/10146
+                setattr(self.args, self.name + '_uuid', partition.get_uuid())
+                LOG.debug('Reusing %s with uuid %s',
+                          self.name,
+                          getattr(self.args, self.name + '_uuid'))
+            else:
+                LOG.warning('%s %s was not prepared with '
+                            'ceph-disk. Symlinking directly.',
+                            self.name.capitalize(),
+                            getattr(self.args, self.name))
+                self.space_symlink = getattr(self.args, self.name)
+                return
+
+        self.space_symlink = '/dev/disk/by-partuuid/{uuid}'.format(
+            uuid=getattr(self.args, self.name + '_uuid'))
+
+        if self.args.dmcrypt:
+            self.space_dmcrypt = self.space_symlink
+            self.space_symlink = '/dev/mapper/{uuid}'.format(
+                uuid=getattr(self.args, self.name + '_uuid'))
+
+        if reusing_partition:
+            # confirm that the space_symlink exists. It should since
+            # this was an active space
+            # in the past. Continuing otherwise would be futile.
+            assert os.path.exists(self.space_symlink)
+            return
+
+        num = self.desired_partition_number()
+
+        if num == 0:
+            LOG.warning('OSD will not be hot-swappable if %s '
+                        'is not the same device as the osd data',
+                        self.name)
+
+        device = Device.factory(getattr(self.args, self.name), self.args)
+        num = device.create_partition(
+            uuid=getattr(self.args, self.name + '_uuid'),
+            name=self.name,
+            size=self.space_size,
+            num=num)
+
+        partition = device.get_partition(num)
+
+        LOG.debug('%s is GPT partition %s',
+                  self.name.capitalize(),
+                  self.space_symlink)
+
+        if isinstance(partition, DevicePartitionCrypt):
+            partition.format()
+
+            command_check_call(
+                [
+                    'sgdisk',
+                    '--typecode={num}:{uuid}'.format(
+                        num=num,
+                        uuid=partition.ptype_for_name(self.name),
                     ),
-                )
+                    '--',
+                    getattr(self.args, self.name),
+                ],
+            )
 
-        journal_size = get_conf_with_default(
-            cluster=args.cluster,
+        LOG.debug('%s is GPT partition %s',
+                  self.name.capitalize(),
+                  self.space_symlink)
+
+
+class PrepareJournal(PrepareSpace):
+
+    def __init__(self, args):
+        self.name = 'journal'
+        (self.allows_journal,
+         self.wants_journal,
+         self.needs_journal) = check_journal_reqs(args)
+
+        if args.journal and not self.allows_journal:
+            raise Error('journal specified but not allowed by osd backend')
+
+        super(PrepareJournal, self).__init__(args)
+
+    def wants_space(self):
+        return self.wants_journal
+
+    def get_space_size(self):
+        return int(get_conf_with_default(
+            cluster=self.args.cluster,
             variable='osd_journal_size',
-            )
-        journal_size = int(journal_size)
+        ))
+
+    def desired_partition_number(self):
+        if self.args.journal == self.args.data:
+            # we're sharing the disk between osd data and journal;
+            # make journal be partition number 2
+            num = 2
+        else:
+            num = 0
+        return num
+
+    @staticmethod
+    def parser():
+        return PrepareSpace.parser('journal')
+
+
+class PrepareBluestoreBlock(PrepareSpace):
+
+    def __init__(self, args):
+        self.name = 'block'
+        super(PrepareBluestoreBlock, self).__init__(args)
+
+    def get_space_size(self):
+        return 0  # get as much space as possible
+
+    def desired_partition_number(self):
+        if self.args.block == self.args.data:
+            num = 2
+        else:
+            num = 0
+        return num
+
+    @staticmethod
+    def parser():
+        return PrepareSpace.parser('block')
 
+
+class CryptHelpers(object):
+
+    @staticmethod
+    def get_cryptsetup_parameters(args):
         cryptsetup_parameters_str = get_conf(
             cluster=args.cluster,
             variable='osd_cryptsetup_parameters',
-            )
+        )
         if cryptsetup_parameters_str is None:
-            cryptsetup_parameters = []
+            return []
         else:
-            cryptsetup_parameters = shlex.split(cryptsetup_parameters_str)
+            return shlex.split(cryptsetup_parameters_str)
 
+    @staticmethod
+    def get_dmcrypt_keysize(args):
         dmcrypt_keysize_str = get_conf(
             cluster=args.cluster,
             variable='osd_dmcrypt_key_size',
-            )
-
-        dmcrypt_type = get_conf(
-            cluster=args.cluster,
-            variable='osd_dmcrypt_type',
-            )
-
-        if dmcrypt_type is None:
-            dmcrypt_type = "luks"
-
-        if dmcrypt_type == "plain":
-            if dmcrypt_keysize_str is None:
-                # This value is hard-coded in the udev script
-                dmcrypt_keysize = 256
-            else:
-                dmcrypt_keysize = int(dmcrypt_keysize_str)
-                LOG.warning('''ensure the 95-ceph-osd.rules file has been copied to /etc/udev/rules.d
- and modified to call cryptsetup with --key-size=%s'''
-                            % dmcrypt_keysize_str)
-
-            if len (cryptsetup_parameters) > 0:
-                LOG.warning('''ensure the 95-ceph-osd.rules file has been copied to /etc/udev/rules.d
- and modified to call cryptsetup with %s'''
-                            % cryptsetup_parameters_str)
-
-            cryptsetup_parameters = ['--key-size', str(dmcrypt_keysize)] + cryptsetup_parameters
-            luks = False
-        elif dmcrypt_type == "luks":
+        )
+        dmcrypt_type = CryptHelpers.get_dmcrypt_type(args)
+        if dmcrypt_type == 'luks':
             if dmcrypt_keysize_str is None:
-                # As LUKS will hash the 'passphrase' in .luks.key into a key, set a large default
+                # As LUKS will hash the 'passphrase' in .luks.key
+                # into a key, set a large default
                 # so if not updated for some time, it is still a
                 # reasonable value.
                 #
-                # We don't force this into the cryptsetup_parameters, as we want the cryptsetup defaults
-                # to prevail for the actual LUKS key lengths.
-                dmcrypt_keysize = 1024
+                return 1024
+            else:
+                return int(dmcrypt_keysize_str)
+        elif dmcrypt_type == 'plain':
+            if dmcrypt_keysize_str is None:
+                # This value is hard-coded in the udev script
+                return 256
             else:
-                dmcrypt_keysize = int(dmcrypt_keysize_str)
-                cryptsetup_parameters = ['--key-size', str(dmcrypt_keysize)] + cryptsetup_parameters
+                LOG.warning('ensure the 95-ceph-osd.rules file has '
+                            'been copied to /etc/udev/rules.d '
+                            'and modified to call cryptsetup '
+                            'with --key-size=%s' % dmcrypt_keysize_str)
+                return int(dmcrypt_keysize_str)
+        else:
+            return 0
+
+    @staticmethod
+    def get_dmcrypt_type(args):
+        if args.dmcrypt:
+            dmcrypt_type = get_conf(
+                cluster=args.cluster,
+                variable='osd_dmcrypt_type',
+            )
 
-            luks = True
+            if dmcrypt_type is None or dmcrypt_type == 'luks':
+                return 'luks'
+            elif dmcrypt_type == 'plain':
+                return 'plain'
+            else:
+                raise Error('invalid osd_dmcrypt_type parameter '
+                            '(must be luks or plain): ', dmcrypt_type)
         else:
-            raise Error('invalid osd_dmcrypt_type parameter (must be luks or plain): ', dmcrypt_type)
+            return None
 
-        # colocate journal with data?
-        if wants_journal and stat.S_ISBLK(dmode) and not is_partition(args.data) and args.journal is None and args.journal_file is None:
-            LOG.info('Will colocate journal with data on %s', args.data)
-            args.journal = args.data
 
-        if args.journal and args.journal_uuid is None:
-            args.journal_uuid = str(uuid.uuid4())
-        if args.osd_uuid is None:
-            args.osd_uuid = str(uuid.uuid4())
+class PrepareData(object):
 
-        # dm-crypt keys?
-        if args.dmcrypt:
-            if args.journal:
-                journal_dm_keypath = get_or_create_dmcrypt_key(args.journal_uuid, args.dmcrypt_key_dir, dmcrypt_keysize, luks)
-            osd_dm_keypath = get_or_create_dmcrypt_key(args.osd_uuid, args.dmcrypt_key_dir, dmcrypt_keysize, luks)
-
-        # prepare journal
-        journal_symlink = None
-        journal_dmcrypt = None
-        journal_uuid = None
-        if args.journal:
-            (journal_symlink, journal_dmcrypt, journal_uuid) = prepare_journal(
-                data=args.data,
-                journal=args.journal,
-                journal_size=journal_size,
-                journal_uuid=args.journal_uuid,
-                force_file=args.journal_file,
-                force_dev=args.journal_dev,
-                journal_dm_keypath=journal_dm_keypath,
-                cryptsetup_parameters=cryptsetup_parameters,
-                luks=luks
-            )
+    FILE = 1
+    DEVICE = 2
+
+    def __init__(self, args):
+
+        self.args = args
+        self.partition = None
+        self.set_type()
+        if self.args.cluster_uuid is None:
+            self.args.cluster_uuid = get_fsid(cluster=self.args.cluster)
+
+        if self.args.osd_uuid is None:
+            self.args.osd_uuid = str(uuid.uuid4())
+
+    def set_type(self):
+        dmode = os.stat(self.args.data).st_mode
 
-        # prepare data
         if stat.S_ISDIR(dmode):
-            if args.data_dev:
-                raise Error('data path is not a block device', args.data)
-            prepare_dir(
-                path=args.data,
-                journal=journal_symlink,
-                cluster_uuid=args.cluster_uuid,
-                osd_uuid=args.osd_uuid,
-                journal_uuid=journal_uuid,
-                journal_dmcrypt=journal_dmcrypt,
-                )
+            self.type = self.FILE
         elif stat.S_ISBLK(dmode):
-            if args.data_dir:
-                raise Error('data path is not a directory', args.data)
-            prepare_dev(
-                data=args.data,
-                journal=journal_symlink,
-                fstype=args.fs_type,
-                mkfs_args=mkfs_args,
-                mount_options=mount_options,
-                cluster_uuid=args.cluster_uuid,
-                osd_uuid=args.osd_uuid,
-                journal_uuid=journal_uuid,
-                journal_dmcrypt=journal_dmcrypt,
-                osd_dm_keypath=osd_dm_keypath,
-                cryptsetup_parameters=cryptsetup_parameters,
-                luks=luks
-                )
+            self.type = self.DEVICE
         else:
             raise Error('not a dir or block device', args.data)
-        prepare_lock.release()  # noqa
 
-    except Error:
-        if journal_dm_keypath:
+    def is_file(self):
+        return self.type == self.FILE
+
+    def is_device(self):
+        return self.type == self.DEVICE
+
+    @staticmethod
+    def parser():
+        parser = argparse.ArgumentParser(add_help=False)
+        parser.add_argument(
+            '--fs-type',
+            help='file system type to use (e.g. "ext4")',
+        )
+        parser.add_argument(
+            '--zap-disk',
+            action='store_true', default=None,
+            help='destroy the partition table (and content) of a disk',
+        )
+        parser.add_argument(
+            '--data-dir',
+            action='store_true', default=None,
+            help='verify that DATA is a dir',
+        )
+        parser.add_argument(
+            '--data-dev',
+            action='store_true', default=None,
+            help='verify that DATA is a block device',
+        )
+        parser.add_argument(
+            'data',
+            metavar='DATA',
+            help='path to OSD data (a disk block device or directory)',
+        )
+        return parser
+
+    def populate_data_path_file(self, path, *to_prepare_list):
+        self.populate_data_path(path, *to_prepare_list)
+
+    def populate_data_path(self, path, *to_prepare_list):
+        if os.path.exists(os.path.join(path, 'magic')):
+            LOG.debug('Data dir %s already exists', path)
+            return
+        else:
+            LOG.debug('Preparing osd data dir %s', path)
+
+        if self.args.osd_uuid is None:
+            self.args.osd_uuid = str(uuid.uuid4())
+
+        write_one_line(path, 'ceph_fsid', self.args.cluster_uuid)
+        write_one_line(path, 'fsid', self.args.osd_uuid)
+        write_one_line(path, 'magic', CEPH_OSD_ONDISK_MAGIC)
+
+        for to_prepare in to_prepare_list:
+            to_prepare.populate_data_path(path)
+
+    def prepare(self, *to_prepare_list):
+        if self.type == self.DEVICE:
+            self.prepare_device(*to_prepare_list)
+        elif self.type == self.FILE:
+            self.prepare_file(*to_prepare_list)
+        else:
+            raise Error('unexpected type ', self.type)
+
+    def prepare_file(self, *to_prepare_list):
+
+        if not os.path.exists(self.args.data):
+            raise Error('data path for directory does not exist',
+                        self.args.data)
+
+        if self.args.data_dev:
+            raise Error('data path is not a block device', self.args.data)
+
+        for to_prepare in to_prepare_list:
+            to_prepare.prepare()
+
+        self.populate_data_path_file(self.args.data, *to_prepare_list)
+
+    def sanity_checks(self):
+        if not os.path.exists(self.args.data):
+            raise Error('data path for device does not exist',
+                        self.args.data)
+        verify_not_in_use(self.args.data, True)
+
+    def set_variables(self):
+        if self.args.fs_type is None:
+            self.args.fs_type = get_conf(
+                cluster=self.args.cluster,
+                variable='osd_mkfs_type',
+            )
+            if self.args.fs_type is None:
+                self.args.fs_type = get_conf(
+                    cluster=self.args.cluster,
+                    variable='osd_fs_type',
+                )
+            if self.args.fs_type is None:
+                self.args.fs_type = DEFAULT_FS_TYPE
+
+        self.mkfs_args = get_conf(
+            cluster=self.args.cluster,
+            variable='osd_mkfs_options_{fstype}'.format(
+                fstype=self.args.fs_type,
+            ),
+        )
+        if self.mkfs_args is None:
+            self.mkfs_args = get_conf(
+                cluster=self.args.cluster,
+                variable='osd_fs_mkfs_options_{fstype}'.format(
+                    fstype=self.args.fs_type,
+                ),
+            )
+
+        self.mount_options = get_conf(
+            cluster=self.args.cluster,
+            variable='osd_mount_options_{fstype}'.format(
+                fstype=self.args.fs_type,
+            ),
+        )
+        if self.mount_options is None:
+            self.mount_options = get_conf(
+                cluster=self.args.cluster,
+                variable='osd_fs_mount_options_{fstype}'.format(
+                    fstype=self.args.fs_type,
+                ),
+            )
+        else:
+            # remove whitespaces
+            self.mount_options = "".join(self.mount_options.split())
+
+        if self.args.osd_uuid is None:
+            self.args.osd_uuid = str(uuid.uuid4())
+
+    def prepare_device(self, *to_prepare_list):
+        self.sanity_checks()
+        self.set_variables()
+        if self.args.zap_disk is not None:
+            zap(self.args.data)
+
+    def create_data_partition(self):
+        device = Device.factory(self.args.data, self.args)
+        partition_number = 1
+        device.create_partition(uuid=self.args.osd_uuid,
+                                name='data',
+                                num=partition_number,
+                                size=self.get_space_size())
+        return device.get_partition(partition_number)
+
+    def set_data_partition(self):
+        if is_partition(self.args.data):
+            LOG.debug('OSD data device %s is a partition',
+                      self.args.data)
+            self.partition = DevicePartition.factory(
+                path=None, dev=self.args.data, args=self.args)
+            ptype = partition.get_ptype()
+            if ptype != ptype_osd:
+                LOG.warning('incorrect partition UUID: %s, expected %s'
+                            % (ptype, ptype_osd))
+        else:
+            LOG.debug('Creating osd partition on %s',
+                      self.args.data)
+            self.partition = self.create_data_partition()
+
+    def populate_data_path_device(self, *to_prepare_list):
+        partition = self.partition
+
+        if isinstance(partition, DevicePartitionCrypt):
+            partition.map()
+
+        try:
+            args = [
+                'mkfs',
+                '-t',
+                self.args.fs_type,
+            ]
+            if self.mkfs_args is not None:
+                args.extend(self.mkfs_args.split())
+                if self.args.fs_type == 'xfs':
+                    args.extend(['-f'])  # always force
+            else:
+                args.extend(MKFS_ARGS.get(self.args.fs_type, []))
+            args.extend([
+                '--',
+                partition.get_dev(),
+            ])
             try:
-                os.unlink(journal_dm_keypath)
-            except OSError as e2:
-                if e2.errno != errno.ENOENT: # errno.ENOENT = no such file or directory
-                    raise # re-raise exception if a different error occured
-        if osd_dm_keypath:
+                LOG.debug('Creating %s fs on %s',
+                          self.args.fs_type, partition.get_dev())
+                command_check_call(args)
+            except subprocess.CalledProcessError as e:
+                raise Error(e)
+
+            path = mount(dev=partition.get_dev(),
+                         fstype=self.args.fs_type,
+                         options=self.mount_options)
+
             try:
-                os.unlink(osd_dm_keypath)
-            except OSError as e2:
-                if e2.errno != errno.ENOENT: # errno.ENOENT = no such file or directory
-                    raise # re-raise exception if a different error occured
-        prepare_lock.release()  # noqa
-        raise
+                self.populate_data_path(path, *to_prepare_list)
+            finally:
+                path_set_context(path)
+                unmount(path)
+        finally:
+            if isinstance(partition, DevicePartitionCrypt):
+                partition.unmap()
+
+        if not is_partition(self.args.data):
+            try:
+                command_check_call(
+                    [
+                        'sgdisk',
+                        '--typecode=%d:%s' % (partition.get_partition_number(),
+                                              partition.ptype_for_name('osd')),
+                        '--',
+                        self.args.data,
+                    ],
+                )
+            except subprocess.CalledProcessError as e:
+                raise Error(e)
+            update_partition(self.args.data, 'prepared')
+            command_check_call(['udevadm', 'trigger',
+                                '--action=add',
+                                '--sysname-match',
+                                os.path.basename(partition.rawdev)])
 
 
-###########################
+class PrepareFilestoreData(PrepareData):
+
+    def get_space_size(self):
+        return 0  # get as much space as possible
+
+    def prepare_device(self, *to_prepare_list):
+        super(PrepareFilestoreData, self).prepare_device(*to_prepare_list)
+        for to_prepare in to_prepare_list:
+            to_prepare.prepare()
+        self.set_data_partition()
+        self.populate_data_path_device(*to_prepare_list)
+
+
+class PrepareBluestoreData(PrepareData):
+
+    def get_space_size(self):
+        return 100  # MB
+
+    def prepare_device(self, *to_prepare_list):
+        super(PrepareBluestoreData, self).prepare_device(*to_prepare_list)
+        self.set_data_partition()
+        for to_prepare in to_prepare_list:
+            to_prepare.prepare()
+        self.populate_data_path_device(*to_prepare_list)
+
+    def populate_data_path(self, path, *to_prepare_list):
+        super(PrepareBluestoreData, self).populate_data_path(path,
+                                                             *to_prepare_list)
+        write_one_line(path, 'type', 'bluestore')
 
 
 def mkfs(
@@ -1969,7 +2400,7 @@ def mkfs(
     osd_id,
     fsid,
     keyring,
-    ):
+):
     monmap = os.path.join(path, 'activate.monmap')
     command_check_call(
         [
@@ -1978,27 +2409,44 @@ def mkfs(
             '--name', 'client.bootstrap-osd',
             '--keyring', keyring,
             'mon', 'getmap', '-o', monmap,
+        ],
+    )
+
+    osd_type = read_one_line(path, 'type')
+
+    if osd_type == 'bluestore':
+        command_check_call(
+            [
+                'ceph-osd',
+                '--cluster', cluster,
+                '--mkfs',
+                '--mkkey',
+                '-i', osd_id,
+                '--monmap', monmap,
+                '--osd-data', path,
+                '--osd-uuid', fsid,
+                '--keyring', os.path.join(path, 'keyring'),
+                '--setuser', get_ceph_user(),
+                '--setgroup', get_ceph_user(),
             ],
         )
-
-    command_check_call(
-        [
-            'ceph-osd',
-            '--cluster', cluster,
-            '--mkfs',
-            '--mkkey',
-            '-i', osd_id,
-            '--monmap', monmap,
-            '--osd-data', path,
-            '--osd-journal', os.path.join(path, 'journal'),
-            '--osd-uuid', fsid,
-            '--keyring', os.path.join(path, 'keyring'),
-            '--setuser', get_ceph_user(),
-            '--setgroup', get_ceph_user(),
+    else:
+        command_check_call(
+            [
+                'ceph-osd',
+                '--cluster', cluster,
+                '--mkfs',
+                '--mkkey',
+                '-i', osd_id,
+                '--monmap', monmap,
+                '--osd-data', path,
+                '--osd-journal', os.path.join(path, 'journal'),
+                '--osd-uuid', fsid,
+                '--keyring', os.path.join(path, 'keyring'),
+                '--setuser', get_ceph_user(),
+                '--setgroup', get_ceph_user(),
             ],
         )
-    # TODO ceph-osd --mkfs removes the monmap file?
-    # os.unlink(monmap)
 
 
 def auth_key(
@@ -2006,7 +2454,7 @@ def auth_key(
     cluster,
     osd_id,
     keyring,
-    ):
+):
     try:
         # try dumpling+ cap scheme
         command_check_call(
@@ -2019,10 +2467,10 @@ def auth_key(
                 '-i', os.path.join(path, 'keyring'),
                 'osd', 'allow *',
                 'mon', 'allow profile osd',
-                ],
-            )
+            ],
+        )
     except subprocess.CalledProcessError as err:
-        if err.returncode == errno.EACCES:
+        if err.returncode == errno.EINVAL:
             # try old cap scheme
             command_check_call(
                 [
@@ -2035,10 +2483,11 @@ def auth_key(
                     'osd', 'allow *',
                     'mon', 'allow rwx',
                 ],
-                )
+            )
         else:
             raise
 
+
 def get_mount_point(cluster, osd_id):
     parent = STATEDIR + '/osd'
     return os.path.join(
@@ -2046,6 +2495,7 @@ def get_mount_point(cluster, osd_id):
         '{cluster}-{osd_id}'.format(cluster=cluster, osd_id=osd_id),
     )
 
+
 def move_mount(
     dev,
     path,
@@ -2053,7 +2503,7 @@ def move_mount(
     osd_id,
     fstype,
     mount_options,
-    ):
+):
     LOG.debug('Moving mount to final location...')
     osd_data = get_mount_point(cluster, osd_id)
     maybe_mkdir(osd_data)
@@ -2076,8 +2526,8 @@ def move_mount(
             '--',
             dev,
             osd_data,
-            ],
-        )
+        ],
+    )
     command_check_call(
         [
             '/bin/umount',
@@ -2085,14 +2535,14 @@ def move_mount(
                     # wrong moment
             '--',
             path,
-            ],
-        )
+        ],
+    )
 
 
 def start_daemon(
     cluster,
     osd_id,
-    ):
+):
     LOG.debug('Starting %s osd.%s...', cluster, osd_id)
 
     path = (STATEDIR + '/osd/{cluster}-{osd_id}').format(
@@ -2114,8 +2564,8 @@ def start_daemon(
                     'ceph-osd',
                     'cluster={cluster}'.format(cluster=cluster),
                     'id={osd_id}'.format(osd_id=osd_id),
-                    ],
-                )
+                ],
+            )
         elif os.path.exists(os.path.join(path, 'sysvinit')):
             if os.path.exists('/usr/sbin/service'):
                 svc = '/usr/sbin/service'
@@ -2129,28 +2579,29 @@ def start_daemon(
                     '{cluster}'.format(cluster=cluster),
                     'start',
                     'osd.{osd_id}'.format(osd_id=osd_id),
-                    ],
-                )
+                ],
+            )
         elif os.path.exists(os.path.join(path, 'systemd')):
             command_check_call(
                 [
                     'systemctl',
                     'enable',
                     'ceph-osd@{osd_id}'.format(osd_id=osd_id),
-                    ],
-                )
+                ],
+            )
             command_check_call(
                 [
                     'systemctl',
                     'start',
                     'ceph-osd@{osd_id}'.format(osd_id=osd_id),
-                    ],
-                )
+                ],
+            )
         else:
-            raise Error('{cluster} osd.{osd_id} is not tagged with an init system'.format(
-                cluster=cluster,
-                osd_id=osd_id,
-            ))
+            raise Error('{cluster} osd.{osd_id} is not tagged '
+                        'with an init system'.format(
+                            cluster=cluster,
+                            osd_id=osd_id,
+                        ))
     except subprocess.CalledProcessError as e:
         raise Error('ceph osd start failed', e)
 
@@ -2158,14 +2609,14 @@ def start_daemon(
 def stop_daemon(
     cluster,
     osd_id,
-    ):
+):
     LOG.debug('Stoping %s osd.%s...', cluster, osd_id)
 
     path = (STATEDIR + '/osd/{cluster}-{osd_id}').format(
         cluster=cluster, osd_id=osd_id)
 
     try:
-        if os.path.exists(os.path.join(path,'upstart')):
+        if os.path.exists(os.path.join(path, 'upstart')):
             command_check_call(
                 [
                     '/sbin/initctl',
@@ -2173,8 +2624,8 @@ def stop_daemon(
                     'ceph-osd',
                     'cluster={cluster}'.format(cluster=cluster),
                     'id={osd_id}'.format(osd_id=osd_id),
-                    ],
-                )
+                ],
+            )
         elif os.path.exists(os.path.join(path, 'sysvinit')):
             svc = which('service')
             command_check_call(
@@ -2185,33 +2636,33 @@ def stop_daemon(
                     '{cluster}'.format(cluster=cluster),
                     'stop',
                     'osd.{osd_id}'.format(osd_id=osd_id),
-                    ],
-                )
+                ],
+            )
         elif os.path.exists(os.path.join(path, 'systemd')):
             command_check_call(
                 [
                     'systemctl',
                     'disable',
                     'ceph-osd@{osd_id}'.format(osd_id=osd_id),
-                    ],
-                )
+                ],
+            )
             command_check_call(
                 [
                     'systemctl',
                     'stop',
                     'ceph-osd@{osd_id}'.format(osd_id=osd_id),
-                    ],
-                )
+                ],
+            )
         else:
-            raise Error('{cluster} osd.{osd_id} is not tagged with an init '\
-                        ' system'.format(cluster=cluster,osd_id=osd_id))
+            raise Error('{cluster} osd.{osd_id} is not tagged with an init '
+                        ' system'.format(cluster=cluster, osd_id=osd_id))
     except subprocess.CalledProcessError as e:
         raise Error('ceph osd stop failed', e)
 
 
 def detect_fstype(
     dev,
-    ):
+):
     fstype = _check_output(
         args=[
             '/sbin/blkid',
@@ -2221,17 +2672,18 @@ def detect_fstype(
             '-o', 'value',
             '--',
             dev,
-            ],
-        )
+        ],
+    )
     fstype = must_be_one_line(fstype)
     return fstype
 
+
 def dmcrypt_map(dev, dmcrypt_key_dir):
     ptype = get_partition_type(dev)
-    if ptype in [DMCRYPT_OSD_UUID, DMCRYPT_JOURNAL_UUID]:
+    if ptype in Ptype.get_ready_by_type('plain'):
         luks = False
         cryptsetup_parameters = ['--key-size', '256']
-    elif ptype in [DMCRYPT_LUKS_OSD_UUID, DMCRYPT_LUKS_JOURNAL_UUID]:
+    elif ptype in Ptype.get_ready_by_type('luks'):
         luks = True
         cryptsetup_parameters = []
     else:
@@ -2248,6 +2700,7 @@ def dmcrypt_map(dev, dmcrypt_key_dir):
         format_dev=False,
     )
 
+
 def mount_activate(
     dev,
     activate_key_template,
@@ -2255,7 +2708,7 @@ def mount_activate(
     dmcrypt,
     dmcrypt_key_dir,
     reactivate=False,
-    ):
+):
 
     if dmcrypt:
         part_uuid = get_partition_uuid(dev)
@@ -2268,7 +2721,7 @@ def mount_activate(
         raise FilesystemTypeError(
             'device {dev}'.format(dev=dev),
             e,
-            )
+        )
 
     # TODO always using mount options from cluster=ceph for
     # now; see http://tracker.newdream.net/issues/3253
@@ -2276,18 +2729,18 @@ def mount_activate(
         cluster='ceph',
         variable='osd_mount_options_{fstype}'.format(
             fstype=fstype,
-            ),
-        )
+        ),
+    )
 
     if mount_options is None:
         mount_options = get_conf(
             cluster='ceph',
             variable='osd_fs_mount_options_{fstype}'.format(
                 fstype=fstype,
-                ),
-            )
+            ),
+        )
 
-    #remove whitespaces from mount_options
+    # remove whitespaces from mount_options
     if mount_options is not None:
         mount_options = "".join(mount_options.split())
 
@@ -2338,17 +2791,21 @@ def mount_activate(
                 if dst_dev != parent_dev:
                     other = True
                 elif os.listdir(get_mount_point(cluster, osd_id)):
-                    LOG.info(get_mount_point(cluster, osd_id) + " is not empty, won't override")
+                    LOG.info(get_mount_point(cluster, osd_id) +
+                             " is not empty, won't override")
                     other = True
 
         except OSError:
             pass
 
         if active:
-            LOG.info('%s osd.%s already mounted in position; unmounting ours.' % (cluster, osd_id))
+            LOG.info('%s osd.%s already mounted in position; unmounting ours.'
+                     % (cluster, osd_id))
             unmount(path)
         elif other:
-            raise Error('another %s osd.%s already mounted in position (old/different cluster instance?); unmounting ours.' % (cluster, osd_id))
+            raise Error('another %s osd.%s already mounted in position '
+                        '(old/different cluster instance?); unmounting ours.'
+                        % (cluster, osd_id))
         else:
             move_mount(
                 dev=dev,
@@ -2357,7 +2814,7 @@ def mount_activate(
                 osd_id=osd_id,
                 fstype=fstype,
                 mount_options=mount_options,
-                )
+            )
         return (cluster, osd_id)
 
     except:
@@ -2374,16 +2831,16 @@ def activate_dir(
     path,
     activate_key_template,
     init,
-    ):
+):
 
     if not os.path.exists(path):
         raise Error(
             'directory %s does not exist' % path
-            )
+        )
 
     (osd_id, cluster) = activate(path, activate_key_template, init)
 
-    if init not in (None, 'none' ):
+    if init not in (None, 'none'):
         canonical = (STATEDIR + '/osd/{cluster}-{osd_id}').format(
             cluster=cluster,
             osd_id=osd_id)
@@ -2405,7 +2862,8 @@ def activate_dir(
                 try:
                     os.symlink(path, canonical)
                 except:
-                    raise Error('unable to create symlink %s -> %s' % (canonical, path))
+                    raise Error('unable to create symlink %s -> %s'
+                                % (canonical, path))
 
     return (cluster, osd_id)
 
@@ -2434,7 +2892,8 @@ def find_cluster_by_uuid(_uuid):
                 return cluster
     # be tolerant of /etc/ceph/ceph.conf without an fsid defined.
     if len(no_fsid) == 1 and no_fsid[0] == 'ceph':
-        LOG.warning('No fsid defined in ' + SYSCONFDIR + '/ceph.conf; using anyway')
+        LOG.warning('No fsid defined in ' + SYSCONFDIR +
+                    '/ceph.conf; using anyway')
         return 'ceph'
     return None
 
@@ -2443,7 +2902,7 @@ def activate(
     path,
     activate_key_template,
     init,
-    ):
+):
 
     check_osd_magic(path)
 
@@ -2454,7 +2913,8 @@ def activate(
 
     cluster = find_cluster_by_uuid(ceph_fsid)
     if cluster is None:
-        raise Error('No cluster conf found in ' + SYSCONFDIR + ' with fsid %s' % ceph_fsid)
+        raise Error('No cluster conf found in ' + SYSCONFDIR +
+                    ' with fsid %s' % ceph_fsid)
     LOG.debug('Cluster name is %s', cluster)
 
     fsid = read_one_line(path, 'fsid')
@@ -2471,7 +2931,7 @@ def activate(
             cluster=cluster,
             fsid=fsid,
             keyring=keyring,
-            )
+        )
         write_one_line(path, 'whoami', osd_id)
     LOG.debug('OSD id is %s', osd_id)
 
@@ -2484,14 +2944,14 @@ def activate(
             osd_id=osd_id,
             fsid=fsid,
             keyring=keyring,
-            )
+        )
 
-    if init not in (None, 'none' ):
+    if init not in (None, 'none'):
         if init == 'auto':
             conf_val = get_conf(
                 cluster=cluster,
                 variable='init'
-                )
+            )
             if conf_val is not None:
                 init = conf_val
             else:
@@ -2516,7 +2976,7 @@ def activate(
             cluster=cluster,
             osd_id=osd_id,
             keyring=keyring,
-            )
+        )
         write_one_line(path, 'active', 'ok')
     LOG.debug('%s osd.%s data dir is ready at %s', cluster, osd_id, path)
     return (osd_id, cluster)
@@ -2538,8 +2998,9 @@ def main_activate(args):
         mode = os.stat(args.path).st_mode
         if stat.S_ISBLK(mode):
             if (is_partition(args.path) and
-                get_partition_type(args.path) == MPATH_OSD_UUID and
-                not is_mpath(args.path)):
+                    (get_partition_type(args.path) ==
+                     PTYPE['mpath']['osd']['ready']) and
+                    not is_mpath(args.path)):
                 raise Error('%s is not a multipath block device' %
                             args.path)
             (cluster, osd_id) = mount_activate(
@@ -2549,7 +3010,7 @@ def main_activate(args):
                 dmcrypt=args.dmcrypt,
                 dmcrypt_key_dir=args.dmcrypt_key_dir,
                 reactivate=args.reactivate,
-                )
+            )
             osd_data = get_mount_point(cluster, osd_id)
 
         elif stat.S_ISDIR(mode):
@@ -2557,7 +3018,7 @@ def main_activate(args):
                 path=args.path,
                 activate_key_template=args.activate_key_template,
                 init=args.mark_init,
-                )
+            )
             osd_data = args.path
 
         else:
@@ -2575,7 +3036,7 @@ def main_activate(args):
             )
 
         if (not args.no_start_daemon and
-            args.mark_init not in (None, 'none' )):
+                args.mark_init not in (None, 'none')):
 
             start_daemon(
                 cluster=cluster,
@@ -2590,14 +3051,12 @@ def main_activate(args):
 
 def _mark_osd_out(cluster, osd_id):
     LOG.info('Prepare to mark osd.%d out...', osd_id)
-    command(
-            [
-                'ceph',
-                'osd',
-                'out',
-                'osd.%d' % osd_id,
-                ],
-            )
+    command([
+        'ceph',
+        'osd',
+        'out',
+        'osd.%d' % osd_id,
+    ])
 
 
 def _check_osd_status(cluster, osd_id):
@@ -2611,18 +3070,16 @@ def _check_osd_status(cluster, osd_id):
     LOG.info("Checking osd id: %s ..." % osd_id)
     found = False
     status_code = 0
-    out, err, ret = command(
-            [
-                'ceph',
-                'osd',
-                'dump',
-                '--cluster={cluster}'.format(
-                    cluster=cluster,
-                    ),
-                '--format',
-                'json',
-                ],
-            )
+    out, err, ret = command([
+        'ceph',
+        'osd',
+        'dump',
+        '--cluster={cluster}'.format(
+            cluster=cluster,
+        ),
+        '--format',
+        'json',
+    ])
     out_json = json.loads(out)
     for item in out_json[u'osds']:
         if item.get(u'osd') == int(osd_id):
@@ -2656,7 +3113,7 @@ def _remove_osd_directory_files(mounted_path, cluster):
     conf_val = get_conf(
         cluster=cluster,
         variable='init'
-        )
+    )
     if conf_val is not None:
         init = conf_val
     else:
@@ -2673,6 +3130,7 @@ def main_deactivate(args):
     finally:
         activate_lock.release()  # noqa
 
+
 def main_deactivate_locked(args):
     osd_id = args.deactivate_by_id
     path = args.path
@@ -2684,13 +3142,13 @@ def main_deactivate_locked(args):
     for device in devices:
         if 'partitions' in device:
             for dev_part in device.get('partitions'):
-                if osd_id and \
-                   'whoami' in dev_part and \
-                   dev_part['whoami'] == osd_id:
+                if (osd_id and
+                        'whoami' in dev_part and
+                        dev_part['whoami'] == osd_id):
                     target_dev = dev_part
-                elif path and \
-                   'path' in dev_part and \
-                   dev_part['path'] == path:
+                elif (path and
+                        'path' in dev_part and
+                        dev_part['path'] == path):
                     target_dev = dev_part
     if not target_dev:
         raise Error('Cannot find any match device!!')
@@ -2699,8 +3157,7 @@ def main_deactivate_locked(args):
     osd_id = target_dev['whoami']
     part_type = target_dev['ptype']
     mounted_path = target_dev['mount']
-    if part_type == DMCRYPT_OSD_UUID or \
-       part_type == DMCRYPT_LUKS_OSD_UUID:
+    if Ptype.is_dmcrypt(part_type, 'osd'):
         dmcrypt = True
 
     # Do not do anything if osd is already down.
@@ -2732,44 +3189,43 @@ def main_deactivate_locked(args):
 
     if dmcrypt:
         dmcrypt_unmap(target_dev['uuid'])
-        if 'journal_uuid' in target_dev:
-            dmcrypt_unmap(target_dev['journal_uuid'])
+        for name in Space.NAMES:
+            if name + '_uuid' in target_dev:
+                dmcrypt_unmap(target_dev[name + '_uuid'])
 
 ###########################
 
+
 def _remove_from_crush_map(cluster, osd_id):
     LOG.info("Prepare to remove osd.%s from crush map..." % osd_id)
-    command(
-            [
-                'ceph',
-                'osd',
-                'crush',
-                'remove',
-                'osd.%s' % osd_id,
-                ],
-            )
+    command([
+        'ceph',
+        'osd',
+        'crush',
+        'remove',
+        'osd.%s' % osd_id,
+    ])
+
 
 def _delete_osd_auth_key(cluster, osd_id):
     LOG.info("Prepare to delete osd.%s cephx key..." % osd_id)
-    command(
-            [
-                'ceph',
-                'auth',
-                'del',
-                'osd.%s' % osd_id,
-                ],
-            )
+    command([
+        'ceph',
+        'auth',
+        'del',
+        'osd.%s' % osd_id,
+    ])
+
 
 def _deallocate_osd_id(cluster, osd_id):
     LOG.info("Prepare to deallocate the osd-id: %s..." % osd_id)
-    command(
-            [
-                'ceph',
-                'osd',
-                'rm',
-                '%s' % osd_id,
-                ],
-            )
+    command([
+        'ceph',
+        'osd',
+        'rm',
+        '%s' % osd_id,
+    ])
+
 
 def destroy_lookup_device(args, predicate, description):
     devices = list_devices()
@@ -2784,10 +3240,10 @@ def destroy_lookup_device(args, predicate, description):
                 return partition
     raise Error('found no device matching ', description)
 
+
 def main_destroy(args):
     osd_id = args.destroy_by_id
     path = args.path
-    dmcrypt_key_dir = args.dmcrypt_key_dir
     dmcrypt = False
     target_dev = None
 
@@ -2807,8 +3263,7 @@ def main_destroy(args):
 
     osd_id = target_dev['whoami']
     dev_path = target_dev['path']
-    journal_part_uuid = target_dev['journal_uuid']
-    if target_dev['ptype'] == MPATH_OSD_UUID:
+    if target_dev['ptype'] == PTYPE['mpath']['osd']['ready']:
         base_dev = get_partition_base_mpath(dev_path)
     else:
         base_dev = get_partition_base(dev_path)
@@ -2817,7 +3272,7 @@ def main_destroy(args):
     status_code = _check_osd_status(args.cluster, osd_id)
     if status_code != OSD_STATUS_OUT_DOWN and \
        status_code != OSD_STATUS_IN_DOWN:
-        raise Error("Could not destroy the active osd. (osd-id: %s)" % \
+        raise Error("Could not destroy the active osd. (osd-id: %s)" %
                     osd_id)
 
     # Remove OSD from crush map
@@ -2831,8 +3286,9 @@ def main_destroy(args):
 
     # we remove the crypt map and device mapper (if dmcrypt is True)
     if dmcrypt:
-        if journal_part_uuid:
-            dmcrypt_unmap(journal_part_uuid)
+        for name in Space.NAMES:
+            if target_dev.get(name + '_uuid'):
+                dmcrypt_unmap(target_dev[name + '_uuid'])
 
     # Check zap flag. If we found zap flag, we need to find device for
     # destroy this osd data.
@@ -2841,11 +3297,8 @@ def main_destroy(args):
         LOG.info("Prepare to zap the device %s" % base_dev)
         zap(base_dev)
 
-    return
-
-###########################
 
-def get_journal_osd_uuid(path):
+def get_space_osd_uuid(name, path):
     if not os.path.exists(path):
         raise Error('%s does not exist' % path)
 
@@ -2854,8 +3307,9 @@ def get_journal_osd_uuid(path):
         raise Error('%s is not a block device' % path)
 
     if (is_partition(path) and
-        get_partition_type(path) == MPATH_JOURNAL_UUID and
-        not is_mpath(path)):
+            get_partition_type(path) in (PTYPE['mpath']['journal']['ready'],
+                                         PTYPE['mpath']['block']['ready']) and
+            not is_mpath(path)):
         raise Error('%s is not a multipath block device' %
                     path)
 
@@ -2865,20 +3319,20 @@ def get_journal_osd_uuid(path):
                 'ceph-osd',
                 '--get-device-fsid',
                 path,
-                ],
+            ],
             close_fds=True,
-            )
+        )
     except subprocess.CalledProcessError as e:
         raise Error(
-            'failed to get osd uuid/fsid from journal',
+            'failed to get osd uuid/fsid from %s' % name,
             e,
-            )
+        )
     value = str(out).split('\n', 1)[0]
-    LOG.debug('Journal %s has OSD UUID %s', path, value)
+    LOG.debug('%s %s has OSD UUID %s', name.capitalize(), path, value)
     return value
 
 
-def main_activate_journal(args):
+def main_activate_space(name, args):
     if not os.path.exists(args.dev):
         raise Error('%s does not exist' % args.dev)
 
@@ -2892,11 +3346,11 @@ def main_activate_journal(args):
             dev = dmcrypt_map(args.dev, args.dmcrypt_key_dir)
         else:
             dev = args.dev
-
-        # FIXME: For an encrypted journal dev, does this return the cyphertext
-        # or plaintext dev uuid!? Also, if the journal is encrypted, is the data
-        # partition also always encrypted, or are mixed pairs supported!?
-        osd_uuid = get_journal_osd_uuid(dev)
+        # FIXME: For an encrypted journal dev, does this return the
+        # cyphertext or plaintext dev uuid!? Also, if the journal is
+        # encrypted, is the data partition also always encrypted, or
+        # are mixed pairs supported!?
+        osd_uuid = get_space_osd_uuid(name, dev)
         path = os.path.join('/dev/disk/by-partuuid/', osd_uuid.lower())
 
         if is_suppressed(path):
@@ -2910,12 +3364,12 @@ def main_activate_journal(args):
             dmcrypt=args.dmcrypt,
             dmcrypt_key_dir=args.dmcrypt_key_dir,
             reactivate=args.reactivate,
-            )
+        )
 
         start_daemon(
             cluster=cluster,
             osd_id=osd_id,
-            )
+        )
 
     finally:
         activate_lock.release()  # noqa
@@ -2935,12 +3389,9 @@ def main_activate_all(args):
             continue
         (tag, uuid) = name.split('.')
 
-        if tag in (OSD_UUID,
-                   MPATH_OSD_UUID,
-                   DMCRYPT_OSD_UUID,
-                   DMCRYPT_LUKS_OSD_UUID):
+        if tag in Ptype.get_ready_by_name('osd'):
 
-            if tag == DMCRYPT_OSD_UUID or tag == DMCRYPT_LUKS_OSD_UUID:
+            if Ptype.is_dmcrpyt(tag, 'osd'):
                 path = os.path.join('/dev/mapper', uuid)
             else:
                 path = os.path.join(dir, name)
@@ -2959,17 +3410,17 @@ def main_activate_all(args):
                     init=args.mark_init,
                     dmcrypt=False,
                     dmcrypt_key_dir='',
-                    )
+                )
                 start_daemon(
                     cluster=cluster,
                     osd_id=osd_id,
-                    )
+                )
 
             except Exception as e:
                 print >> sys.stderr, '{prog}: {msg}'.format(
                     prog=args.prog,
                     msg=e,
-                    )
+                )
                 err = True
 
             finally:
@@ -3018,6 +3469,7 @@ def get_dev_fs(dev):
     else:
         return None
 
+
 def split_dev_base_partnum(dev):
     if is_mpath(dev):
         partnum = partnum_mpath(dev)
@@ -3028,12 +3480,15 @@ def split_dev_base_partnum(dev):
         base = get_partition_base(dev)
     return (base, partnum)
 
+
 def get_partition_type(part):
     return get_blkid_partition_info(part, 'ID_PART_ENTRY_TYPE')
 
+
 def get_partition_uuid(part):
     return get_blkid_partition_info(part, 'ID_PART_ENTRY_UUID')
 
+
 def get_blkid_partition_info(dev, what=None):
     out, _, _ = command(
         [
@@ -3053,16 +3508,19 @@ def get_blkid_partition_info(dev, what=None):
     else:
         return p
 
+
 def more_osd_info(path, uuid_map, desc):
     desc['ceph_fsid'] = get_oneliner(path, 'ceph_fsid')
     if desc['ceph_fsid']:
         desc['cluster'] = find_cluster_by_uuid(desc['ceph_fsid'])
     desc['whoami'] = get_oneliner(path, 'whoami')
-    desc['journal_uuid'] = get_oneliner(path, 'journal_uuid')
-    if desc['journal_uuid']:
-        desc['journal_uuid'] = desc['journal_uuid'].lower()
-        if desc['journal_uuid'] in uuid_map:
-            desc['journal_dev'] = uuid_map[desc['journal_uuid']]
+    for name in Space.NAMES:
+        uuid = get_oneliner(path, name + '_uuid')
+        if uuid:
+            desc[name + '_uuid'] = uuid.lower()
+            if desc[name + '_uuid'] in uuid_map:
+                desc[name + '_dev'] = uuid_map[desc[name + '_uuid']]
+
 
 def list_dev_osd(dev, uuid_map, desc):
     desc['mount'] = is_mounted(dev)
@@ -3086,6 +3544,7 @@ def list_dev_osd(dev, uuid_map, desc):
         except MountError:
             pass
 
+
 def list_format_more_osd_info_plain(dev):
     desc = []
     if dev.get('ceph_fsid'):
@@ -3095,38 +3554,46 @@ def list_format_more_osd_info_plain(dev):
             desc.append('unknown cluster ' + dev['ceph_fsid'])
     if dev.get('whoami'):
         desc.append('osd.%s' % dev['whoami'])
-    if dev.get('journal_dev'):
-        desc.append('journal %s' % dev['journal_dev'])
+    for name in Space.NAMES:
+        if dev.get(name + '_dev'):
+            desc.append(name + ' %s' % dev[name + '_dev'])
     return desc
 
-def list_format_dev_plain(dev, devices=[], prefix=''):
+
+def list_format_dev_plain(dev, prefix=''):
     desc = []
-    if dev['ptype'] == OSD_UUID:
-        desc = ['ceph data', dev['state']] + list_format_more_osd_info_plain(dev)
-    elif dev['ptype'] in (DMCRYPT_OSD_UUID,
-                          DMCRYPT_LUKS_OSD_UUID):
+    if dev['ptype'] == PTYPE['regular']['osd']['ready']:
+        desc = (['ceph data', dev['state']] +
+                list_format_more_osd_info_plain(dev))
+    elif Ptype.is_dmcrypt(dev['ptype'], 'osd'):
         dmcrypt = dev['dmcrypt']
         if not dmcrypt['holders']:
-            desc = ['ceph data (dmcrypt %s)' % dmcrypt['type'], 'not currently mapped']
+            desc = ['ceph data (dmcrypt %s)' % dmcrypt['type'],
+                    'not currently mapped']
         elif len(dmcrypt['holders']) == 1:
             holder = get_dev_path(dmcrypt['holders'][0])
-            desc = ['ceph data (dmcrypt %s %s)' % (dmcrypt['type'], holder)] + list_format_more_osd_info_plain(dev)
+            desc = ['ceph data (dmcrypt %s %s)' %
+                    (dmcrypt['type'], holder)]
+            desc += list_format_more_osd_info_plain(dev)
         else:
-            desc = ['ceph data (dmcrypt %s)' % dmcrypt['type'], 'holders: ' + ','.join(dmcrypt['holders'])]
-    elif dev['ptype'] == JOURNAL_UUID:
-        desc.append('ceph journal')
-        if dev.get('journal_for'):
-            desc.append('for %s' % dev['journal_for'])
-    elif dev['ptype'] in (DMCRYPT_JOURNAL_UUID,
-                          DMCRYPT_LUKS_JOURNAL_UUID):
+            desc = ['ceph data (dmcrypt %s)' % dmcrypt['type'],
+                    'holders: ' + ','.join(dmcrypt['holders'])]
+    elif Ptype.is_regular_space(dev['ptype']):
+        name = Ptype.space_ptype_to_name(dev['ptype'])
+        desc.append('ceph ' + name)
+        if dev.get(name + '_for'):
+            desc.append('for %s' % dev[name + '_for'])
+    elif Ptype.is_dmcrypt_space(dev['ptype']):
+        name = Ptype.space_ptype_to_name(dev['ptype'])
         dmcrypt = dev['dmcrypt']
         if dmcrypt['holders'] and len(dmcrypt['holders']) == 1:
             holder = get_dev_path(dmcrypt['holders'][0])
-            desc = ['ceph journal (dmcrypt %s %s)' % (dmcrypt['type'], holder)]
+            desc = ['ceph ' + name + ' (dmcrypt %s %s)' %
+                    (dmcrypt['type'], holder)]
         else:
-            desc = ['ceph journal (dmcrypt %s)' % dmcrypt['type']]
-        if dev.get('journal_for'):
-            desc.append('for %s' % dev['journal_for'])
+            desc = ['ceph ' + name + ' (dmcrypt %s)' % dmcrypt['type']]
+        if dev.get(name + '_for'):
+            desc.append('for %s' % dev[name + '_for'])
     else:
         desc.append(dev['type'])
         if dev.get('fs_type'):
@@ -3137,22 +3604,22 @@ def list_format_dev_plain(dev, devices=[], prefix=''):
             desc.append('mounted on %s' % dev['mount'])
     return '%s%s %s' % (prefix, dev['path'], ', '.join(desc))
 
-def list_format_plain(devices, selected_devices):
+
+def list_format_plain(devices):
     lines = []
-    for device in selected_devices:
+    for device in devices:
         if device.get('partitions'):
             lines.append('%s :' % device['path'])
             for p in sorted(device['partitions']):
                 lines.append(list_format_dev_plain(dev=p,
-                                                   devices=devices,
                                                    prefix=' '))
         else:
             lines.append(list_format_dev_plain(dev=device,
-                                               devices=devices,
                                                prefix=''))
     return "\n".join(lines)
 
-def list_dev(dev, uuid_map, journal_map):
+
+def list_dev(dev, uuid_map, space_map):
     info = {
         'path': dev,
         'dmcrypt': {},
@@ -3166,45 +3633,49 @@ def list_dev(dev, uuid_map, journal_map):
         ptype = 'unknown'
     info['ptype'] = ptype
     LOG.info("list_dev(dev = " + dev + ", ptype = " + str(ptype) + ")")
-    if ptype in (OSD_UUID, MPATH_OSD_UUID):
+    if ptype in (PTYPE['regular']['osd']['ready'],
+                 PTYPE['mpath']['osd']['ready']):
         info['type'] = 'data'
-        if ptype == MPATH_OSD_UUID:
+        if ptype == PTYPE['mpath']['osd']['ready']:
             info['multipath'] = True
         list_dev_osd(dev, uuid_map, info)
-    elif ptype == DMCRYPT_OSD_UUID:
+    elif ptype == PTYPE['plain']['osd']['ready']:
         holders = is_held(dev)
         info['type'] = 'data'
         info['dmcrypt']['holders'] = holders
         info['dmcrypt']['type'] = 'plain'
         if len(holders) == 1:
             list_dev_osd(get_dev_path(holders[0]), uuid_map, info)
-    elif ptype == DMCRYPT_LUKS_OSD_UUID:
+    elif ptype == PTYPE['luks']['osd']['ready']:
         holders = is_held(dev)
         info['type'] = 'data'
         info['dmcrypt']['holders'] = holders
         info['dmcrypt']['type'] = 'LUKS'
         if len(holders) == 1:
             list_dev_osd(get_dev_path(holders[0]), uuid_map, info)
-    elif ptype in (JOURNAL_UUID, MPATH_JOURNAL_UUID):
-        info['type'] = 'journal'
-        if ptype == MPATH_JOURNAL_UUID:
+    elif Ptype.is_regular_space(ptype) or Ptype.is_mpath_space(ptype):
+        name = Ptype.space_ptype_to_name(ptype)
+        info['type'] = name
+        if ptype == PTYPE['mpath'][name]['ready']:
             info['multipath'] = True
-        if info.get('uuid') in journal_map:
-            info['journal_for'] = journal_map[info['uuid']]
-    elif ptype == DMCRYPT_JOURNAL_UUID:
+        if info.get('uuid') in space_map:
+            info[name + '_for'] = space_map[info['uuid']]
+    elif Ptype.is_plain_space(ptype):
+        name = Ptype.space_ptype_to_name(ptype)
         holders = is_held(dev)
-        info['type'] = 'journal'
+        info['type'] = name
         info['dmcrypt']['type'] = 'plain'
         info['dmcrypt']['holders'] = holders
-        if info.get('uuid') in journal_map:
-            info['journal_for'] = journal_map[info['uuid']]
-    elif ptype == DMCRYPT_LUKS_JOURNAL_UUID:
+        if info.get('uuid') in space_map:
+            info[name + '_for'] = space_map[info['uuid']]
+    elif Ptype.is_luks_space(ptype):
+        name = Ptype.space_ptype_to_name(ptype)
         holders = is_held(dev)
-        info['type'] = 'journal'
+        info['type'] = name
         info['dmcrypt']['type'] = 'LUKS'
         info['dmcrypt']['holders'] = holders
-        if info.get('uuid') in journal_map:
-            info['journal_for'] = journal_map[info['uuid']]
+        if info.get('uuid') in space_map:
+            info[name + '_for'] = space_map[info['uuid']]
     else:
         path = is_mounted(dev)
         fs_type = get_dev_fs(dev)
@@ -3219,11 +3690,12 @@ def list_dev(dev, uuid_map, journal_map):
 
     return info
 
+
 def list_devices():
     partmap = list_all_partitions()
 
     uuid_map = {}
-    journal_map = {}
+    space_map = {}
     for base, parts in sorted(partmap.iteritems()):
         for p in parts:
             dev = get_dev_path(p)
@@ -3234,11 +3706,8 @@ def list_devices():
             LOG.debug("main_list: " + dev +
                       " ptype = " + str(ptype) +
                       " uuid = " + str(part_uuid))
-            if ptype in (OSD_UUID,
-                         DMCRYPT_OSD_UUID,
-                         DMCRYPT_LUKS_OSD_UUID):
-                if ptype in (DMCRYPT_OSD_UUID,
-                             DMCRYPT_LUKS_OSD_UUID):
+            if ptype in Ptype.get_ready_by_name('osd'):
+                if Ptype.is_dmcrypt(ptype, 'osd'):
                     holders = is_held(dev)
                     if len(holders) != 1:
                         continue
@@ -3252,33 +3721,38 @@ def list_devices():
                         tpath = mount(dev=dev_to_mount,
                                       fstype=fs_type, options='')
                         try:
-                            journal_uuid = get_oneliner(tpath, 'journal_uuid')
-                            if journal_uuid:
-                                journal_map[journal_uuid.lower()] = dev
+                            for name in Space.NAMES:
+                                space_uuid = get_oneliner(tpath,
+                                                          name + '_uuid')
+                                if space_uuid:
+                                    space_map[space_uuid.lower()] = dev
                         finally:
                             unmount(tpath)
                     except MountError:
                         pass
 
     LOG.debug("main_list: " + str(partmap) + ", uuid_map = " +
-              str(uuid_map) + ", journal_map = " + str(journal_map))
+              str(uuid_map) + ", space_map = " + str(space_map))
 
     devices = []
     for base, parts in sorted(partmap.iteritems()):
         if parts:
-            disk = { 'path': get_dev_path(base) }
+            disk = {'path': get_dev_path(base)}
             partitions = []
             for p in sorted(parts):
-                partitions.append(list_dev(get_dev_path(p), uuid_map, journal_map))
+                partitions.append(list_dev(get_dev_path(p),
+                                           uuid_map,
+                                           space_map))
             disk['partitions'] = partitions
             devices.append(disk)
         else:
-            device = list_dev(get_dev_path(base), uuid_map, journal_map)
+            device = list_dev(get_dev_path(base), uuid_map, space_map)
             device['path'] = get_dev_path(base)
             devices.append(device)
     LOG.debug("list_devices: " + str(devices))
     return devices
 
+
 def main_list(args):
     devices = list_devices()
     if args.path:
@@ -3298,7 +3772,7 @@ def main_list(args):
     if args.format == 'json':
         print json.dumps(selected_devices)
     else:
-        output = list_format_plain(devices, selected_devices)
+        output = list_format_plain(selected_devices)
         if output:
             print output
 
@@ -3318,7 +3792,8 @@ def main_list(args):
 def is_suppressed(path):
     disk = os.path.realpath(path)
     try:
-        if not disk.startswith('/dev/') or not stat.S_ISBLK(os.lstat(disk).st_mode):
+        if (not disk.startswith('/dev/') or
+                not stat.S_ISBLK(os.lstat(disk).st_mode)):
             return False
         base = get_dev_name(disk)
         while len(base):
@@ -3374,14 +3849,13 @@ def main_zap(args):
     for dev in args.dev:
         zap(dev)
 
-###########################
 
 def main_trigger(args):
     LOG.debug("main_trigger: " + str(args))
     if is_systemd() and not args.sync:
         # http://www.freedesktop.org/software/systemd/man/systemd-escape.html
         escaped_dev = args.dev.replace('-', '\\x2d')
-        service='ceph-disk@{dev}.service'.format(dev=escaped_dev)
+        service = 'ceph-disk@{dev}.service'.format(dev=escaped_dev)
         LOG.info('systemd detected, triggering %s' % service)
         command(
             [
@@ -3412,10 +3886,10 @@ def main_trigger(args):
         dev=args.dev,
         parttype=parttype,
         partid=partid,
-        )
-    )
+    ))
 
-    if parttype in [OSD_UUID, MPATH_OSD_UUID]:
+    if parttype in (PTYPE['regular']['osd']['ready'],
+                    PTYPE['mpath']['osd']['ready']):
         command(
             [
                 'ceph-disk',
@@ -3423,7 +3897,8 @@ def main_trigger(args):
                 args.dev,
             ]
         )
-    elif parttype in [JOURNAL_UUID, MPATH_JOURNAL_UUID]:
+    elif parttype in (PTYPE['regular']['journal']['ready'],
+                      PTYPE['mpath']['journal']['ready']):
         command(
             [
                 'ceph-disk',
@@ -3433,7 +3908,7 @@ def main_trigger(args):
         )
 
         # journals are easy: map, chown, activate-journal
-    elif parttype == DMCRYPT_JOURNAL_UUID:
+    elif parttype == PTYPE['plain']['journal']['ready']:
         command(
             [
                 '/sbin/cryptsetup',
@@ -3446,8 +3921,8 @@ def main_trigger(args):
                 args.dev,
             ]
         )
-        newdev='/dev/mapper/' + partid
-        count=0
+        newdev = '/dev/mapper/' + partid
+        count = 0
         while not os.path.exists(newdev) and count <= 10:
             time.sleep(1)
             count += 1
@@ -3464,8 +3939,8 @@ def main_trigger(args):
                 'activate-journal',
                 newdev,
             ]
-            )
-    elif parttype == DMCRYPT_LUKS_JOURNAL_UUID:
+        )
+    elif parttype == PTYPE['luks']['journal']['ready']:
         command(
             [
                 '/sbin/cryptsetup',
@@ -3477,8 +3952,8 @@ def main_trigger(args):
                 partid,
             ]
         )
-        newdev='/dev/mapper/' + partid
-        count=0
+        newdev = '/dev/mapper/' + partid
+        count = 0
         while not os.path.exists(newdev) and count <= 10:
             time.sleep(1)
             count += 1
@@ -3495,10 +3970,85 @@ def main_trigger(args):
                 'activate-journal',
                 newdev,
             ]
-            )
+        )
+
+    elif parttype in (PTYPE['regular']['block']['ready'],
+                      PTYPE['mpath']['block']['ready']):
+        command(
+            [
+                'ceph-disk',
+                'activate-block',
+                args.dev,
+            ]
+        )
+
+        # blocks are easy: map, chown, activate-block
+    elif parttype == PTYPE['plain']['block']['ready']:
+        command(
+            [
+                '/sbin/cryptsetup',
+                '--key-file',
+                '/etc/ceph/dmcrypt-keys/{partid}'.format(partid=partid),
+                '--key-size',
+                '256',
+                'create',
+                partid,
+                args.dev,
+            ]
+        )
+        newdev = '/dev/mapper/' + partid
+        count = 0
+        while not os.path.exists(newdev) and count <= 10:
+            time.sleep(1)
+            count += 1
+        command(
+            [
+                '/bin/chown',
+                'ceph:ceph',
+                newdev,
+            ]
+        )
+        command(
+            [
+                '/usr/sbin/ceph-disk',
+                'activate-block',
+                newdev,
+            ]
+        )
+    elif parttype == PTYPE['luks']['block']['ready']:
+        command(
+            [
+                '/sbin/cryptsetup',
+                '--key-file',
+                '/etc/ceph/dmcrypt-keys/{partid}.luks.key'.format(
+                    partid=partid),
+                'luksOpen',
+                args.dev,
+                partid,
+            ]
+        )
+        newdev = '/dev/mapper/' + partid
+        count = 0
+        while not os.path.exists(newdev) and count <= 10:
+            time.sleep(1)
+            count += 1
+        command(
+            [
+                '/bin/chown',
+                'ceph:ceph',
+                newdev,
+            ]
+        )
+        command(
+            [
+                '/usr/sbin/ceph-disk',
+                'activate-block',
+                newdev,
+            ]
+        )
 
         # osd data: map, activate
-    elif parttype == DMCRYPT_OSD_UUID:
+    elif parttype == PTYPE['plain']['osd']['ready']:
         command(
             [
                 '/sbin/cryptsetup',
@@ -3511,8 +4061,8 @@ def main_trigger(args):
                 args.dev,
             ]
         )
-        newdev='/dev/mapper/' + partid
-        count=0
+        newdev = '/dev/mapper/' + partid
+        count = 0
         while not os.path.exists(newdev) and count <= 10:
             time.sleep(1)
             count += 1
@@ -3524,7 +4074,7 @@ def main_trigger(args):
             ]
         )
 
-    elif parttype == DMCRYPT_LUKS_OSD_UUID:
+    elif parttype == PTYPE['luks']['osd']['ready']:
         command(
             [
                 '/sbin/cryptsetup',
@@ -3536,8 +4086,8 @@ def main_trigger(args):
                 partid,
             ]
         )
-        newdev='/dev/mapper/' + partid
-        count=0
+        newdev = '/dev/mapper/' + partid
+        count = 0
         while not os.path.exists(newdev) and count <= 10:
             time.sleep(1)
             count += 1
@@ -3553,9 +4103,6 @@ def main_trigger(args):
         raise Error('unrecognized partition type %s' % parttype)
 
 
-
-###########################
-
 def setup_statedir(dir):
     # XXX The following use of globals makes linting
     # really hard. Global state in Python is iffy and
@@ -3586,48 +4133,52 @@ def setup_sysconfdir(dir):
 def parse_args(argv):
     parser = argparse.ArgumentParser(
         'ceph-disk',
-        )
+    )
     parser.add_argument(
         '-v', '--verbose',
         action='store_true', default=None,
         help='be more verbose',
-        )
+    )
     parser.add_argument(
         '--log-stdout',
         action='store_true', default=None,
         help='log to stdout',
-        )
+    )
     parser.add_argument(
         '--prepend-to-path',
         metavar='PATH',
         default='/usr/bin',
-        help='prepend PATH to $PATH for backward compatibility (default /usr/bin)',
-        )
+        help=('prepend PATH to $PATH for backward compatibility '
+              '(default /usr/bin)'),
+    )
     parser.add_argument(
         '--statedir',
         metavar='PATH',
         default='/var/lib/ceph',
-        help='directory in which ceph state is preserved (default /var/lib/ceph)',
-        )
+        help=('directory in which ceph state is preserved '
+              '(default /var/lib/ceph)'),
+    )
     parser.add_argument(
         '--sysconfdir',
         metavar='PATH',
         default='/etc/ceph',
-        help='directory in which ceph configuration files are found (default /etc/ceph)',
-        )
+        help=('directory in which ceph configuration files are found '
+              '(default /etc/ceph)'),
+    )
     parser.set_defaults(
         # we want to hold on to this, for later
         prog=parser.prog,
-        )
+    )
 
     subparsers = parser.add_subparsers(
         title='subcommands',
         description='valid subcommands',
         help='sub-command help',
-        )
+    )
 
-    make_prepare_parser(subparsers)
+    Prepare.set_subparser(subparsers)
     make_activate_parser(subparsers)
+    make_activate_block_parser(subparsers)
     make_activate_journal_parser(subparsers)
     make_activate_all_parser(subparsers)
     make_list_parser(subparsers)
@@ -3640,336 +4191,297 @@ def parse_args(argv):
     args = parser.parse_args(argv)
     return args
 
+
 def make_trigger_parser(subparsers):
-    trigger_parser = subparsers.add_parser('trigger', help='Trigger an event (caled by udev)')
+    trigger_parser = subparsers.add_parser(
+        'trigger',
+        help='Trigger an event (caled by udev)')
     trigger_parser.add_argument(
         'dev',
         help=('device'),
-        )
+    )
     trigger_parser.add_argument(
         '--sync',
         action='store_true', default=None,
         help=('do operation synchronously; do not trigger systemd'),
-        )
+    )
     trigger_parser.set_defaults(
         func=main_trigger,
-        )
+    )
     return trigger_parser
 
-def make_prepare_parser(subparsers):
-    prepare_parser = subparsers.add_parser('prepare', help='Prepare a directory or disk for a Ceph OSD')
-    prepare_parser.add_argument(
-        '--cluster',
-        metavar='NAME',
-        default='ceph',
-        help='cluster name to assign this disk to',
-        )
-    prepare_parser.add_argument(
-        '--cluster-uuid',
-        metavar='UUID',
-        help='cluster uuid to assign this disk to',
-        )
-    prepare_parser.add_argument(
-        '--osd-uuid',
-        metavar='UUID',
-        help='unique OSD uuid to assign this disk to',
-        )
-    prepare_parser.add_argument(
-        '--journal-uuid',
-        metavar='UUID',
-        help='unique uuid to assign to the journal',
-        )
-    prepare_parser.add_argument(
-        '--fs-type',
-        help='file system type to use (e.g. "ext4")',
-        )
-    prepare_parser.add_argument(
-        '--zap-disk',
-        action='store_true', default=None,
-        help='destroy the partition table (and content) of a disk',
-        )
-    prepare_parser.add_argument(
-        '--data-dir',
-        action='store_true', default=None,
-        help='verify that DATA is a dir',
-        )
-    prepare_parser.add_argument(
-        '--data-dev',
-        action='store_true', default=None,
-        help='verify that DATA is a block device',
-        )
-    prepare_parser.add_argument(
-        '--journal-file',
-        action='store_true', default=None,
-        help='verify that JOURNAL is a file',
-        )
-    prepare_parser.add_argument(
-        '--journal-dev',
-        action='store_true', default=None,
-        help='verify that JOURNAL is a block device',
-        )
-    prepare_parser.add_argument(
-        '--dmcrypt',
-        action='store_true', default=None,
-        help='encrypt DATA and/or JOURNAL devices with dm-crypt',
-        )
-    prepare_parser.add_argument(
-        '--dmcrypt-key-dir',
-        metavar='KEYDIR',
-        default='/etc/ceph/dmcrypt-keys',
-        help='directory where dm-crypt keys are stored',
-        )
-    prepare_parser.add_argument(
-        'data',
-        metavar='DATA',
-        help='path to OSD data (a disk block device or directory)',
-        )
-    prepare_parser.add_argument(
-        'journal',
-        metavar='JOURNAL',
-        nargs='?',
-        help=('path to OSD journal disk block device;'
-              + ' leave out to store journal in file'),
-        )
-    prepare_parser.set_defaults(
-        func=main_prepare,
-        )
-    return prepare_parser
 
 def make_activate_parser(subparsers):
-    activate_parser = subparsers.add_parser('activate', help='Activate a Ceph OSD')
+    activate_parser = subparsers.add_parser(
+        'activate',
+        help='Activate a Ceph OSD')
     activate_parser.add_argument(
         '--mount',
         action='store_true', default=None,
         help='mount a block device [deprecated, ignored]',
-        )
+    )
     activate_parser.add_argument(
         '--activate-key',
         metavar='PATH',
         help='bootstrap-osd keyring path template (%(default)s)',
         dest='activate_key_template',
-        )
+    )
     activate_parser.add_argument(
         '--mark-init',
         metavar='INITSYSTEM',
         help='init system to manage this dir',
         default='auto',
         choices=INIT_SYSTEMS,
-        )
+    )
     activate_parser.add_argument(
         '--no-start-daemon',
         action='store_true', default=None,
         help='do not start the daemon',
-        )
+    )
     activate_parser.add_argument(
         'path',
         metavar='PATH',
         nargs='?',
         help='path to block device or directory',
-        )
+    )
     activate_parser.add_argument(
         '--dmcrypt',
         action='store_true', default=None,
         help='map DATA and/or JOURNAL devices with dm-crypt',
-        )
+    )
     activate_parser.add_argument(
         '--dmcrypt-key-dir',
         metavar='KEYDIR',
         default='/etc/ceph/dmcrypt-keys',
         help='directory where dm-crypt keys are stored',
-        )
+    )
     activate_parser.add_argument(
         '--reactivate',
         action='store_true', default=False,
         help='activate the deactived OSD',
-        )
+    )
     activate_parser.set_defaults(
         activate_key_template='{statedir}/bootstrap-osd/{cluster}.keyring',
         func=main_activate,
-        )
+    )
     return activate_parser
 
+
+def make_activate_block_parser(subparsers):
+    return make_activate_space_parser('block', subparsers)
+
+
 def make_activate_journal_parser(subparsers):
-    activate_journal_parser = subparsers.add_parser('activate-journal', help='Activate an OSD via its journal device')
-    activate_journal_parser.add_argument(
+    return make_activate_space_parser('journal', subparsers)
+
+
+def make_activate_space_parser(name, subparsers):
+    activate_space_parser = subparsers.add_parser(
+        'activate-%s' % name,
+        help='Activate an OSD via its %s device' % name)
+    activate_space_parser.add_argument(
         'dev',
         metavar='DEV',
-        help='path to journal block device',
-        )
-    activate_journal_parser.add_argument(
+        help='path to %s block device' % name,
+    )
+    activate_space_parser.add_argument(
         '--activate-key',
         metavar='PATH',
         help='bootstrap-osd keyring path template (%(default)s)',
         dest='activate_key_template',
-        )
-    activate_journal_parser.add_argument(
+    )
+    activate_space_parser.add_argument(
         '--mark-init',
         metavar='INITSYSTEM',
         help='init system to manage this dir',
         default='auto',
         choices=INIT_SYSTEMS,
-        )
-    activate_journal_parser.add_argument(
+    )
+    activate_space_parser.add_argument(
         '--dmcrypt',
         action='store_true', default=None,
-        help='map DATA and/or JOURNAL devices with dm-crypt',
-        )
-    activate_journal_parser.add_argument(
+        help=('map data and/or auxiliariy (journal, etc.) '
+              'devices with dm-crypt'),
+    )
+    activate_space_parser.add_argument(
         '--dmcrypt-key-dir',
         metavar='KEYDIR',
         default='/etc/ceph/dmcrypt-keys',
         help='directory where dm-crypt keys are stored',
-        )
-    activate_journal_parser.add_argument(
+    )
+    activate_space_parser.add_argument(
         '--reactivate',
         action='store_true', default=False,
         help='activate the deactived OSD',
-        )
-    activate_journal_parser.set_defaults(
+    )
+    activate_space_parser.set_defaults(
         activate_key_template='{statedir}/bootstrap-osd/{cluster}.keyring',
-        func=main_activate_journal,
-        )
-    return activate_journal_parser
+        func=lambda args: main_activate_space(name, args),
+    )
+    return activate_space_parser
+
 
 def make_activate_all_parser(subparsers):
-    activate_all_parser = subparsers.add_parser('activate-all', help='Activate all tagged OSD partitions')
+    activate_all_parser = subparsers.add_parser(
+        'activate-all',
+        help='Activate all tagged OSD partitions')
     activate_all_parser.add_argument(
         '--activate-key',
         metavar='PATH',
         help='bootstrap-osd keyring path template (%(default)s)',
         dest='activate_key_template',
-        )
+    )
     activate_all_parser.add_argument(
         '--mark-init',
         metavar='INITSYSTEM',
         help='init system to manage this dir',
         default='auto',
         choices=INIT_SYSTEMS,
-        )
+    )
     activate_all_parser.set_defaults(
         activate_key_template='{statedir}/bootstrap-osd/{cluster}.keyring',
         func=main_activate_all,
-        )
+    )
     return activate_all_parser
 
+
 def make_list_parser(subparsers):
-    list_parser = subparsers.add_parser('list', help='List disks, partitions, and Ceph OSDs')
+    list_parser = subparsers.add_parser(
+        'list',
+        help='List disks, partitions, and Ceph OSDs')
     list_parser.add_argument(
         '--format',
         help='output format',
         default='plain',
-        choices=['json','plain'],
-        )
+        choices=['json', 'plain'],
+    )
     list_parser.add_argument(
         'path',
         metavar='PATH',
         nargs='*',
         help='path to block devices, relative to /sys/block',
-        )
+    )
     list_parser.set_defaults(
         func=main_list,
-        )
+    )
     return list_parser
 
+
 def make_suppress_parser(subparsers):
-    suppress_parser = subparsers.add_parser('suppress-activate', help='Suppress activate on a device (prefix)')
+    suppress_parser = subparsers.add_parser(
+        'suppress-activate',
+        help='Suppress activate on a device (prefix)')
     suppress_parser.add_argument(
         'path',
         metavar='PATH',
         nargs='?',
         help='path to block device or directory',
-        )
+    )
     suppress_parser.set_defaults(
         func=main_suppress,
-        )
+    )
 
-    unsuppress_parser = subparsers.add_parser('unsuppress-activate', help='Stop suppressing activate on a device (prefix)')
+    unsuppress_parser = subparsers.add_parser(
+        'unsuppress-activate',
+        help='Stop suppressing activate on a device (prefix)')
     unsuppress_parser.add_argument(
         'path',
         metavar='PATH',
         nargs='?',
         help='path to block device or directory',
-        )
+    )
     unsuppress_parser.set_defaults(
         func=main_unsuppress,
-        )
+    )
     return suppress_parser
 
+
 def make_deactivate_parser(subparsers):
-    deactivate_parser = subparsers.add_parser('deactivate', help='Deactivate a Ceph OSD')
+    deactivate_parser = subparsers.add_parser(
+        'deactivate',
+        help='Deactivate a Ceph OSD')
     deactivate_parser.add_argument(
         '--cluster',
         metavar='NAME',
         default='ceph',
         help='cluster name to assign this disk to',
-        )
+    )
     deactivate_parser.add_argument(
         'path',
         metavar='PATH',
         nargs='?',
         help='path to block device or directory',
-        )
+    )
     deactivate_parser.add_argument(
         '--deactivate-by-id',
         metavar='<id>',
         help='ID of OSD to deactive'
-        )
+    )
     deactivate_parser.add_argument(
         '--mark-out',
         action='store_true', default=False,
         help='option to mark the osd out',
-        )
+    )
     deactivate_parser.set_defaults(
         func=main_deactivate,
-        )
+    )
+
 
 def make_destroy_parser(subparsers):
-    destroy_parser = subparsers.add_parser('destroy', help='Destroy a Ceph OSD')
+    destroy_parser = subparsers.add_parser(
+        'destroy',
+        help='Destroy a Ceph OSD')
     destroy_parser.add_argument(
         '--cluster',
         metavar='NAME',
         default='ceph',
         help='cluster name to assign this disk to',
-        )
+    )
     destroy_parser.add_argument(
         'path',
         metavar='PATH',
         nargs='?',
         help='path to block device or directory',
-        )
+    )
     destroy_parser.add_argument(
         '--destroy-by-id',
         metavar='<id>',
         help='ID of OSD to destroy'
-        )
+    )
     destroy_parser.add_argument(
         '--dmcrypt-key-dir',
         metavar='KEYDIR',
         default='/etc/ceph/dmcrypt-keys',
-        help='directory where dm-crypt keys are stored (If you don\'t know how it work, dont use it. we have default value)',
-        )
+        help=('directory where dm-crypt keys are stored '
+              '(If you don\'t know how it work, '
+              'dont use it. we have default value)'),
+    )
     destroy_parser.add_argument(
         '--zap',
         action='store_true', default=False,
         help='option to erase data and partition',
-        )
+    )
     destroy_parser.set_defaults(
         func=main_destroy,
-        )
+    )
+
 
 def make_zap_parser(subparsers):
-    zap_parser = subparsers.add_parser('zap', help='Zap/erase/destroy a device\'s partition table (and contents)')
+    zap_parser = subparsers.add_parser(
+        'zap',
+        help='Zap/erase/destroy a device\'s partition table (and contents)')
     zap_parser.add_argument(
         'dev',
         metavar='DEV',
         nargs='+',
         help='path to block device',
-        )
+    )
     zap_parser.set_defaults(
         func=main_zap,
-        )
+    )
     return zap_parser
 
+
 def main(argv):
     args = parse_args(argv)
 
@@ -3987,6 +4499,7 @@ def main(argv):
     else:
         main_catch(args.func, args)
 
+
 def setup_logging(verbose, log_stdout):
     loglevel = logging.WARNING
     if verbose:
@@ -4002,7 +4515,8 @@ def setup_logging(verbose, log_stdout):
     else:
         logging.basicConfig(
             level=loglevel,
-            )
+        )
+
 
 def main_catch(func, args):
 
@@ -4028,6 +4542,9 @@ def main_catch(func, args):
         )
 
 
+def run():
+    main(sys.argv[1:])
+
 if __name__ == '__main__':
     main(sys.argv[1:])
     warned_about = {}
diff --git a/src/ceph-disk/requirements.txt b/src/ceph-disk/requirements.txt
new file mode 100644
index 0000000..1352d5e
--- /dev/null
+++ b/src/ceph-disk/requirements.txt
@@ -0,0 +1 @@
+argparse
diff --git a/src/ceph-disk/run-tox.sh b/src/ceph-disk/run-tox.sh
new file mode 100755
index 0000000..1d2b30f
--- /dev/null
+++ b/src/ceph-disk/run-tox.sh
@@ -0,0 +1,24 @@
+#!/bin/bash
+#
+# Copyright (C) 2015 <contact at redhat.com>
+#
+# Author: Loic Dachary <loic at dachary.org>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Library Public License as published by
+# the Free Software Foundation; either version 2, or (at your option)
+# any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Library Public License for more details.
+#
+
+# run from the ceph-disk directory or from its parent
+test -d ceph-disk && cd ceph-disk
+source virtualenv/bin/activate
+tox > virtualenv/tox.out 2>&1
+status=$?
+grep -v InterpreterNotFound < virtualenv/tox.out
+exit $status
diff --git a/src/ceph-disk/setup.py b/src/ceph-disk/setup.py
new file mode 100644
index 0000000..05c9203
--- /dev/null
+++ b/src/ceph-disk/setup.py
@@ -0,0 +1,74 @@
+#
+# Copyright (C) 2015 <contact at redhat.com>
+#
+# Author: Loic Dachary <loic at dachary.org>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Library Public License as published by
+# the Free Software Foundation; either version 2, or (at your option)
+# any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Library Public License for more details.
+#
+import os
+import sys
+from setuptools import setup
+from setuptools import find_packages
+
+def read(fname):
+    path = os.path.join(os.path.dirname(__file__), fname)
+    f = open(path)
+    return f.read()
+
+
+def filter_included_modules(*m):
+    modules = sum(m, [])
+    if sys.version_info[0] == 2 and sys.version_info[1] <= 6:
+        return modules
+    included_modules = set(['argparse', 'importlib', 'sysconfig'])
+    return list(set(modules) - included_modules)
+
+
+install_requires = read('requirements.txt').split()
+tests_require = read('test-requirements.txt').split()
+
+setup(
+    name='ceph-disk',
+    version='1.0.0',
+    packages=find_packages(),
+
+    author='Loic Dachary',
+    author_email='loic at dachary.org',
+    description='prepare storage for a Ceph OSD',
+    long_description=read('README.rst'),
+    license='LGPLv2+',
+    keywords='ceph',
+    url="https://git.ceph.com/?p=ceph.git;a=summary",
+
+    install_requires=filter_included_modules(['setuptools'],
+                                             install_requires),
+    tests_require=filter_included_modules(tests_require),
+
+    classifiers=[
+        'Environment :: Console',
+        'Intended Audience :: Information Technology',
+        'Intended Audience :: System Administrators',
+        'Operating System :: POSIX :: Linux',
+        'License :: OSI Approved :: GNU Lesser General Public License v2 or later (LGPLv2+)',
+        'Programming Language :: Python',
+        'Programming Language :: Python :: 2',
+        'Programming Language :: Python :: 3',
+        'Topic :: Utilities',
+    ],
+
+    entry_points={
+
+        'console_scripts': [
+            'ceph-disk = ceph_disk.main:run',
+            ],
+
+        },
+    )
diff --git a/src/ceph-disk/test-requirements.txt b/src/ceph-disk/test-requirements.txt
new file mode 100644
index 0000000..fa6d3a7
--- /dev/null
+++ b/src/ceph-disk/test-requirements.txt
@@ -0,0 +1,11 @@
+configobj
+coverage>=3.6
+discover
+fixtures>=0.3.14
+python-subunit
+testrepository>=0.0.17
+testtools>=0.9.32
+mock
+pytest
+tox
+flake8
diff --git a/src/test/python/ceph-disk/tests/test_ceph_disk.py b/src/ceph-disk/tests/test_main.py
similarity index 56%
rename from src/test/python/ceph-disk/tests/test_ceph_disk.py
rename to src/ceph-disk/tests/test_main.py
index 1ec4ed1..fe85eb7 100644
--- a/src/test/python/ceph-disk/tests/test_ceph_disk.py
+++ b/src/ceph-disk/tests/test_main.py
@@ -1,35 +1,51 @@
-from mock import patch, DEFAULT, Mock
+#!/bin/bash
+#
+# Copyright (C) 2015, 2016 Red Hat <contact at redhat.com>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Library Public License as published by
+# the Free Software Foundation; either version 2, or (at your option)
+# any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Library Public License for more details.
+#
+from mock import patch, DEFAULT
 import os
 import io
+import shutil
 import subprocess
+import tempfile
 import unittest
-import argparse
-import pytest
-import ceph_disk
+from ceph_disk import main
+
 
 def fail_to_mount(dev, fstype, options):
-    raise ceph_disk.MountError(dev + " mount fail")
+    raise main.MountError(dev + " mount fail")
+
 
 class TestCephDisk(object):
 
     def setup_class(self):
-        ceph_disk.setup_logging(verbose=True, log_stdout=False)
+        main.setup_logging(verbose=True, log_stdout=False)
 
     def test_main_list_json(self, capsys):
-        args = ceph_disk.parse_args(['list', '--format', 'json'])
+        args = main.parse_args(['list', '--format', 'json'])
         with patch.multiple(
-                ceph_disk,
-                list_devices=lambda args: {}):
-            ceph_disk.main_list(args)
+                main,
+                list_devices=lambda: {}):
+            main.main_list(args)
             out, err = capsys.readouterr()
             assert '{}\n' == out
 
     def test_main_list_plain(self, capsys):
-        args = ceph_disk.parse_args(['list'])
+        args = main.parse_args(['list'])
         with patch.multiple(
-                ceph_disk,
-                list_devices=lambda args: {}):
-            ceph_disk.main_list(args)
+                main,
+                list_devices=lambda: {}):
+            main.main_list(args)
             out, err = capsys.readouterr()
             assert '' == out
 
@@ -40,7 +56,7 @@ class TestCephDisk(object):
             'whoami': '1234',
             'journal_dev': '/dev/Xda2',
         }
-        out = ceph_disk.list_format_more_osd_info_plain(dev)
+        out = main.list_format_more_osd_info_plain(dev)
         assert dev['cluster'] in " ".join(out)
         assert dev['journal_dev'] in " ".join(out)
         assert dev['whoami'] in " ".join(out)
@@ -50,7 +66,7 @@ class TestCephDisk(object):
             'whoami': '1234',
             'journal_dev': '/dev/Xda2',
         }
-        out = ceph_disk.list_format_more_osd_info_plain(dev)
+        out = main.list_format_more_osd_info_plain(dev)
         assert 'unknown cluster' in " ".join(out)
 
     def test_list_format_plain(self):
@@ -60,7 +76,7 @@ class TestCephDisk(object):
             'type': 'other',
             'mount': '/somewhere',
         }]
-        out = ceph_disk.list_format_plain(payload)
+        out = main.list_format_plain(payload)
         assert payload[0]['path'] in out
         assert payload[0]['type'] in out
         assert payload[0]['mount'] in out
@@ -70,7 +86,7 @@ class TestCephDisk(object):
             'ptype': 'unknown',
             'type': 'swap',
         }]
-        out = ceph_disk.list_format_plain(payload)
+        out = main.list_format_plain(payload)
         assert payload[0]['path'] in out
         assert payload[0]['type'] in out
 
@@ -88,7 +104,7 @@ class TestCephDisk(object):
                 }
             ],
         }]
-        out = ceph_disk.list_format_plain(payload)
+        out = main.list_format_plain(payload)
         assert payload[0]['path'] in out
         assert payload[0]['partitions'][0]['path'] in out
 
@@ -98,11 +114,11 @@ class TestCephDisk(object):
         #
         dev = {
             'path': '/dev/Xda1',
-            'ptype': ceph_disk.OSD_UUID,
+            'ptype': main.PTYPE['regular']['osd']['ready'],
             'state': 'prepared',
             'whoami': '1234',
         }
-        out = ceph_disk.list_format_dev_plain(dev)
+        out = main.list_format_dev_plain(dev)
         assert 'data' in out
         assert dev['whoami'] in out
         assert dev['state'] in out
@@ -111,10 +127,10 @@ class TestCephDisk(object):
         #
         dev = {
             'path': '/dev/Xda2',
-            'ptype': ceph_disk.JOURNAL_UUID,
+            'ptype': main.PTYPE['regular']['journal']['ready'],
             'journal_for': '/dev/Xda1',
         }
-        out = ceph_disk.list_format_dev_plain(dev)
+        out = main.list_format_dev_plain(dev)
         assert 'journal' in out
         assert dev['journal_for'] in out
 
@@ -122,15 +138,11 @@ class TestCephDisk(object):
         # dmcrypt data
         #
         ptype2type = {
-            ceph_disk.DMCRYPT_OSD_UUID: 'plain',
-            ceph_disk.DMCRYPT_LUKS_OSD_UUID: 'LUKS',
+            main.PTYPE['plain']['osd']['ready']: 'plain',
+            main.PTYPE['luks']['osd']['ready']: 'luks',
         }
         for (ptype, type) in ptype2type.iteritems():
             for holders in ((), ("dm_0",), ("dm_0", "dm_1")):
-                devices = [{
-                    'path': '/dev/dm_0',
-                    'whoami': '1234',
-                }]
                 dev = {
                     'dmcrypt': {
                         'holders': holders,
@@ -139,17 +151,14 @@ class TestCephDisk(object):
                     'path': '/dev/Xda1',
                     'ptype': ptype,
                     'state': 'prepared',
+                    'whoami': '1234',
                 }
-                with patch.multiple(
-                        ceph_disk,
-                        list_devices=lambda path: devices,
-                        ):
-                    out = ceph_disk.list_format_dev_plain(dev, devices)
+                out = main.list_format_dev_plain(dev)
                 assert 'data' in out
                 assert 'dmcrypt' in out
                 assert type in out
                 if len(holders) == 1:
-                    assert devices[0]['whoami'] in out
+                    assert dev['whoami'] in out
                 for holder in holders:
                     assert holder in out
 
@@ -157,8 +166,8 @@ class TestCephDisk(object):
         # dmcrypt journal
         #
         ptype2type = {
-            ceph_disk.DMCRYPT_JOURNAL_UUID: 'plain',
-            ceph_disk.DMCRYPT_LUKS_JOURNAL_UUID: 'LUKS',
+            main.PTYPE['plain']['journal']['ready']: 'plain',
+            main.PTYPE['luks']['journal']['ready']: 'luks',
         }
         for (ptype, type) in ptype2type.iteritems():
             for holders in ((), ("dm_0",)):
@@ -171,7 +180,7 @@ class TestCephDisk(object):
                         'type': type,
                     },
                 }
-                out = ceph_disk.list_format_dev_plain(dev, devices)
+                out = main.list_format_dev_plain(dev)
                 assert 'journal' in out
                 assert 'dmcrypt' in out
                 assert type in out
@@ -185,19 +194,20 @@ class TestCephDisk(object):
         fs_type = 'ext4'
         cluster = 'ceph'
         uuid_map = {}
+
         def more_osd_info(path, uuid_map, desc):
             desc['cluster'] = cluster
         #
         # mounted therefore active
         #
         with patch.multiple(
-                ceph_disk,
+                main,
                 is_mounted=lambda dev: mount_path,
                 get_dev_fs=lambda dev: fs_type,
                 more_osd_info=more_osd_info
         ):
             desc = {}
-            ceph_disk.list_dev_osd(dev, uuid_map, desc)
+            main.list_dev_osd(dev, uuid_map, desc)
             assert {'cluster': 'ceph',
                     'fs_type': 'ext4',
                     'mount': '/mount/path',
@@ -207,27 +217,28 @@ class TestCephDisk(object):
         #
         mount_path = None
         with patch.multiple(
-                ceph_disk,
+                main,
                 is_mounted=lambda dev: mount_path,
                 get_dev_fs=lambda dev: fs_type,
                 mount=fail_to_mount,
                 more_osd_info=more_osd_info
         ):
             desc = {}
-            ceph_disk.list_dev_osd(dev, uuid_map, desc)
+            main.list_dev_osd(dev, uuid_map, desc)
             assert {'fs_type': 'ext4',
                     'mount': mount_path,
                     'state': 'unprepared'} == desc
         #
         # not mounted and magic found: prepared
         #
+
         def get_oneliner(path, what):
             if what == 'magic':
-                return ceph_disk.CEPH_OSD_ONDISK_MAGIC
+                return main.CEPH_OSD_ONDISK_MAGIC
             else:
                 raise Exception('unknown ' + what)
         with patch.multiple(
-                ceph_disk,
+                main,
                 is_mounted=lambda dev: mount_path,
                 get_dev_fs=lambda dev: fs_type,
                 mount=DEFAULT,
@@ -236,67 +247,27 @@ class TestCephDisk(object):
                 more_osd_info=more_osd_info
         ):
             desc = {}
-            ceph_disk.list_dev_osd(dev, uuid_map, desc)
+            main.list_dev_osd(dev, uuid_map, desc)
             assert {'cluster': 'ceph',
                     'fs_type': 'ext4',
                     'mount': mount_path,
-                    'magic': ceph_disk.CEPH_OSD_ONDISK_MAGIC,
+                    'magic': main.CEPH_OSD_ONDISK_MAGIC,
                     'state': 'prepared'} == desc
 
-    @patch('os.path.exists')
-    def test_list_paths_to_names(self, m_exists):
-
-        def exists(path):
-            return path in (
-                '/sys/block/sda',
-                '/sys/block/sdb',
-                '/sys/block/cciss!c0d0',
-                '/sys/block/cciss!c0d1',
-                '/sys/block/cciss!c0d2',
-            )
-
-        m_exists.side_effect = exists
-        paths = [
-            '/dev/sda',
-            '/dev/cciss/c0d0',
-            'cciss!c0d1',
-            'cciss/c0d2',
-            'sdb',
-        ]
-        expected = [
-            'sda',
-            'cciss!c0d0',
-            'cciss!c0d1',
-            'cciss!c0d2',
-            'sdb',
-        ]
-        assert expected == ceph_disk.list_paths_to_names(paths)
-        with pytest.raises(ceph_disk.Error) as excinfo:
-            ceph_disk.list_paths_to_names(['unknown'])
-        assert 'unknown' in excinfo.value.message
-
     def test_list_all_partitions(self):
-        partition_uuid = "56244cf5-83ef-4984-888a-2d8b8e0e04b2"
         disk = "Xda"
         partition = "Xda1"
 
         with patch(
-                'ceph_disk.os',
+                'ceph_disk.main.os',
                 listdir=lambda path: [disk],
         ), patch.multiple(
-            ceph_disk,
+            main,
             list_partitions=lambda dev: [partition],
         ):
-                assert {disk: [partition]} == ceph_disk.list_all_partitions([])
-
-        with patch.multiple(
-                ceph_disk,
-                list_partitions=lambda dev: [partition],
-        ):
-                assert {disk: [partition]} == ceph_disk.list_all_partitions([disk])
+                assert {disk: [partition]} == main.list_all_partitions()
 
     def test_list_data(self):
-        args = ceph_disk.parse_args(['list'])
         #
         # a data partition that fails to mount is silently
         # ignored
@@ -306,16 +277,18 @@ class TestCephDisk(object):
         partition = "Xda1"
         fs_type = "ext4"
 
+        def get_partition_type(dev):
+            return main.PTYPE['regular']['osd']['ready']
         with patch.multiple(
-                ceph_disk,
-                list_all_partitions=lambda names: { disk: [partition] },
+                main,
+                list_all_partitions=lambda: {disk: [partition]},
                 get_partition_uuid=lambda dev: partition_uuid,
-                get_partition_type=lambda dev: ceph_disk.OSD_UUID,
+                get_partition_type=get_partition_type,
                 get_dev_fs=lambda dev: fs_type,
                 mount=fail_to_mount,
                 unmount=DEFAULT,
                 is_partition=lambda dev: True,
-                ):
+        ):
             expect = [{'path': '/dev/' + disk,
                        'partitions': [{
                            'dmcrypt': {},
@@ -323,18 +296,17 @@ class TestCephDisk(object):
                            'is_partition': True,
                            'mount': None,
                            'path': '/dev/' + partition,
-                           'ptype': ceph_disk.OSD_UUID,
+                           'ptype': main.PTYPE['regular']['osd']['ready'],
                            'state': 'unprepared',
                            'type': 'data',
                            'uuid': partition_uuid,
                        }]}]
-            assert expect == ceph_disk.list_devices(args)
+            assert expect == main.list_devices()
 
     def test_list_dmcrypt_data(self):
-        args = ceph_disk.parse_args(['list'])
         partition_type2type = {
-            ceph_disk.DMCRYPT_OSD_UUID: 'plain',
-            ceph_disk.DMCRYPT_LUKS_OSD_UUID: 'LUKS',
+            main.PTYPE['plain']['osd']['ready']: 'plain',
+            main.PTYPE['luks']['osd']['ready']: 'LUKS',
         }
         for (partition_type, type) in partition_type2type.iteritems():
             #
@@ -345,13 +317,13 @@ class TestCephDisk(object):
             partition = "Xda1"
             holders = ["dm-0"]
             with patch.multiple(
-                    ceph_disk,
+                    main,
                     is_held=lambda dev: holders,
-                    list_all_partitions=lambda names: { disk: [partition] },
+                    list_all_partitions=lambda: {disk: [partition]},
                     get_partition_uuid=lambda dev: partition_uuid,
                     get_partition_type=lambda dev: partition_type,
                     is_partition=lambda dev: True,
-                    ):
+            ):
                 expect = [{'path': '/dev/' + disk,
                            'partitions': [{
                                'dmcrypt': {
@@ -367,22 +339,22 @@ class TestCephDisk(object):
                                'type': 'data',
                                'uuid': partition_uuid,
                            }]}]
-                assert expect == ceph_disk.list_devices(args)
+                assert expect == main.list_devices()
             #
             # dmcrypt data partition with two holders
             #
             partition_uuid = "56244cf5-83ef-4984-888a-2d8b8e0e04b2"
             disk = "Xda"
             partition = "Xda1"
-            holders = ["dm-0","dm-1"]
+            holders = ["dm-0", "dm-1"]
             with patch.multiple(
-                    ceph_disk,
+                    main,
                     is_held=lambda dev: holders,
-                    list_all_partitions=lambda names: { disk: [partition] },
+                    list_all_partitions=lambda: {disk: [partition]},
                     get_partition_uuid=lambda dev: partition_uuid,
                     get_partition_type=lambda dev: partition_type,
                     is_partition=lambda dev: True,
-                    ):
+            ):
                 expect = [{'path': '/dev/' + disk,
                            'partitions': [{
                                'dmcrypt': {
@@ -395,23 +367,25 @@ class TestCephDisk(object):
                                'type': 'data',
                                'uuid': partition_uuid,
                            }]}]
-                assert expect == ceph_disk.list_devices(args)
+                assert expect == main.list_devices()
 
     def test_list_multipath(self):
-        args = ceph_disk.parse_args(['list'])
         #
         # multipath data partition
         #
         partition_uuid = "56244cf5-83ef-4984-888a-2d8b8e0e04b2"
         disk = "Xda"
         partition = "Xda1"
+
+        def get_partition_type(dev):
+            return main.PTYPE['mpath']['osd']['ready']
         with patch.multiple(
-                ceph_disk,
-                list_all_partitions=lambda names: { disk: [partition] },
+                main,
+                list_all_partitions=lambda: {disk: [partition]},
                 get_partition_uuid=lambda dev: partition_uuid,
-                get_partition_type=lambda dev: ceph_disk.MPATH_OSD_UUID,
+                get_partition_type=get_partition_type,
                 is_partition=lambda dev: True,
-                ):
+        ):
             expect = [{'path': '/dev/' + disk,
                        'partitions': [{
                            'dmcrypt': {},
@@ -420,93 +394,111 @@ class TestCephDisk(object):
                            'mount': None,
                            'multipath': True,
                            'path': '/dev/' + partition,
-                           'ptype': ceph_disk.MPATH_OSD_UUID,
+                           'ptype': main.PTYPE['mpath']['osd']['ready'],
                            'state': 'unprepared',
                            'type': 'data',
                            'uuid': partition_uuid,
                        }]}]
-            assert expect == ceph_disk.list_devices(args)
+            assert expect == main.list_devices()
         #
         # multipath journal partition
         #
         journal_partition_uuid = "2cc40457-259e-4542-b029-785c7cc37871"
+
+        def get_partition_type(dev):
+            return main.PTYPE['mpath']['journal']['ready']
         with patch.multiple(
-                ceph_disk,
-                list_all_partitions=lambda names: { disk: [partition] },
+                main,
+                list_all_partitions=lambda: {disk: [partition]},
                 get_partition_uuid=lambda dev: journal_partition_uuid,
-                get_partition_type=lambda dev: ceph_disk.MPATH_JOURNAL_UUID,
+                get_partition_type=get_partition_type,
                 is_partition=lambda dev: True,
-                ):
+        ):
             expect = [{'path': '/dev/' + disk,
                        'partitions': [{
                            'dmcrypt': {},
                            'is_partition': True,
                            'multipath': True,
                            'path': '/dev/' + partition,
-                           'ptype': ceph_disk.MPATH_JOURNAL_UUID,
+                           'ptype': main.PTYPE['mpath']['journal']['ready'],
                            'type': 'journal',
                            'uuid': journal_partition_uuid,
                        }]}]
-            assert expect == ceph_disk.list_devices(args)
-
-    def test_list_dmcrypt(self):
-        self.list(ceph_disk.DMCRYPT_OSD_UUID, ceph_disk.DMCRYPT_JOURNAL_UUID)
-        self.list(ceph_disk.DMCRYPT_LUKS_OSD_UUID, ceph_disk.DMCRYPT_LUKS_JOURNAL_UUID)
-
-    def test_list_normal(self):
-        self.list(ceph_disk.OSD_UUID, ceph_disk.JOURNAL_UUID)
-
-    def list(self, data_ptype, journal_ptype):
-        args = ceph_disk.parse_args(['--verbose', 'list'])
+            assert expect == main.list_devices()
+
+    def test_list_default(self):
+        self.list(main.PTYPE['plain']['osd']['ready'],
+                  main.PTYPE['plain']['journal']['ready'])
+        self.list(main.PTYPE['luks']['osd']['ready'],
+                  main.PTYPE['luks']['journal']['ready'])
+        self.list(main.PTYPE['regular']['osd']['ready'],
+                  main.PTYPE['regular']['journal']['ready'])
+
+    def test_list_bluestore(self):
+        self.list(main.PTYPE['plain']['osd']['ready'],
+                  main.PTYPE['plain']['block']['ready'])
+        self.list(main.PTYPE['luks']['osd']['ready'],
+                  main.PTYPE['luks']['block']['ready'])
+        self.list(main.PTYPE['regular']['osd']['ready'],
+                  main.PTYPE['regular']['block']['ready'])
+
+    def list(self, data_ptype, space_ptype):
         #
         # a single disk has a data partition and a journal
         # partition and the osd is active
         #
+        name = main.Ptype.space_ptype_to_name(space_ptype)
         data_uuid = "56244cf5-83ef-4984-888a-2d8b8e0e04b2"
         disk = "Xda"
         data = "Xda1"
         data_holder = "dm-0"
-        journal = "Xda2"
-        journal_holder = "dm-0"
+        space = "Xda2"
+        space_holder = "dm-0"
         mount_path = '/mount/path'
         fs_type = 'ext4'
-        journal_uuid = "7ad5e65a-0ca5-40e4-a896-62a74ca61c55"
+        space_uuid = "7ad5e65a-0ca5-40e4-a896-62a74ca61c55"
         ceph_fsid = "60a2ef70-d99b-4b9b-a83c-8a86e5e60091"
         osd_id = '1234'
+
         def get_oneliner(path, what):
-            if what == 'journal_uuid':
-                return journal_uuid
+            if '_uuid' in what:
+                if what == name + '_uuid':
+                    return space_uuid
+                else:
+                    return None
             elif what == 'ceph_fsid':
                 return ceph_fsid
             elif what == 'whoami':
                 return osd_id
             else:
                 raise Exception('unknown ' + what)
+
         def get_partition_uuid(dev):
             if dev == '/dev/' + data:
                 return data_uuid
-            elif dev == '/dev/' + journal:
-                return journal_uuid
+            elif dev == '/dev/' + space:
+                return space_uuid
             else:
                 raise Exception('unknown ' + dev)
+
         def get_partition_type(dev):
             if (dev == '/dev/' + data or
-                dev == '/dev/' + data_holder):
+                    dev == '/dev/' + data_holder):
                 return data_ptype
-            elif (dev == '/dev/' + journal or
-                  dev == '/dev/' + journal_holder):
-                return journal_ptype
+            elif (dev == '/dev/' + space or
+                    dev == '/dev/' + space_holder):
+                return space_ptype
             else:
                 raise Exception('unknown ' + dev)
         cluster = 'ceph'
-        if data_ptype == ceph_disk.OSD_UUID:
+        if data_ptype == main.PTYPE['regular']['osd']['ready']:
             data_dmcrypt = {}
-        elif data_ptype == ceph_disk.DMCRYPT_OSD_UUID:
+        elif data_ptype == main.PTYPE['plain']['osd']['ready']:
             data_dmcrypt = {
                 'type': 'plain',
                 'holders': [data_holder],
             }
-        elif data_ptype == ceph_disk.DMCRYPT_LUKS_OSD_UUID:
+        elif data_ptype == main.PTYPE['luks']['osd']['ready']:
             data_dmcrypt = {
                 'type': 'LUKS',
                 'holders': [data_holder],
@@ -514,27 +506,27 @@ class TestCephDisk(object):
         else:
             raise Exception('unknown ' + data_ptype)
 
-        if journal_ptype == ceph_disk.JOURNAL_UUID:
-            journal_dmcrypt = {}
-        elif journal_ptype == ceph_disk.DMCRYPT_JOURNAL_UUID:
-            journal_dmcrypt = {
+        if space_ptype == main.PTYPE['regular'][name]['ready']:
+            space_dmcrypt = {}
+        elif space_ptype == main.PTYPE['plain'][name]['ready']:
+            space_dmcrypt = {
                 'type': 'plain',
-                'holders': [journal_holder],
+                'holders': [space_holder],
             }
-        elif journal_ptype == ceph_disk.DMCRYPT_LUKS_JOURNAL_UUID:
-            journal_dmcrypt = {
+        elif space_ptype == main.PTYPE['luks'][name]['ready']:
+            space_dmcrypt = {
                 'type': 'LUKS',
-                'holders': [journal_holder],
+                'holders': [space_holder],
             }
         else:
-            raise Exception('unknown ' + journal_ptype)
+            raise Exception('unknown ' + space_ptype)
 
         if data_dmcrypt:
             def is_held(dev):
                 if dev == '/dev/' + data:
                     return [data_holder]
-                elif dev == '/dev/' + journal:
-                    return [journal_holder]
+                elif dev == '/dev/' + space:
+                    return [space_holder]
                 else:
                     raise Exception('unknown ' + dev)
         else:
@@ -542,8 +534,8 @@ class TestCephDisk(object):
                 return []
 
         with patch.multiple(
-                ceph_disk,
-                list_all_partitions=lambda names: { disk: [data, journal] },
+                main,
+                list_all_partitions=lambda: {disk: [data, space]},
                 get_dev_fs=lambda dev: fs_type,
                 is_mounted=lambda dev: mount_path,
                 get_partition_uuid=get_partition_uuid,
@@ -554,7 +546,7 @@ class TestCephDisk(object):
                 unmount=DEFAULT,
                 get_oneliner=get_oneliner,
                 is_held=is_held,
-                ):
+        ):
             expect = [{'path': '/dev/' + disk,
                        'partitions': [{
                            'ceph_fsid': ceph_fsid,
@@ -562,8 +554,8 @@ class TestCephDisk(object):
                            'dmcrypt': data_dmcrypt,
                            'fs_type': fs_type,
                            'is_partition': True,
-                           'journal_dev': '/dev/' + journal,
-                           'journal_uuid': journal_uuid,
+                           name + '_dev': '/dev/' + space,
+                           name + '_uuid': space_uuid,
                            'mount': mount_path,
                            'path': '/dev/' + data,
                            'ptype': data_ptype,
@@ -572,19 +564,17 @@ class TestCephDisk(object):
                            'whoami': osd_id,
                            'uuid': data_uuid,
                        }, {
-                           'dmcrypt': journal_dmcrypt,
+                           'dmcrypt': space_dmcrypt,
                            'is_partition': True,
-                           'journal_for': '/dev/' + data,
-                           'path': '/dev/' + journal,
-                           'ptype': journal_ptype,
-                           'type': 'journal',
-                           'uuid': journal_uuid,
-                       },
-                                  ]}]
-            assert expect == ceph_disk.list_devices(args)
+                           name + '_for': '/dev/' + data,
+                           'path': '/dev/' + space,
+                           'ptype': space_ptype,
+                           'type': name,
+                           'uuid': space_uuid,
+                       }]}]
+            assert expect == main.list_devices()
 
     def test_list_other(self):
-        args = ceph_disk.parse_args(['list'])
         #
         # not swap, unknown fs type, not mounted, with uuid
         #
@@ -593,12 +583,12 @@ class TestCephDisk(object):
         disk = "Xda"
         partition = "Xda1"
         with patch.multiple(
-                ceph_disk,
-                list_all_partitions=lambda names: { disk: [partition] },
+                main,
+                list_all_partitions=lambda: {disk: [partition]},
                 get_partition_uuid=lambda dev: partition_uuid,
                 get_partition_type=lambda dev: partition_type,
                 is_partition=lambda dev: True,
-                ):
+        ):
             expect = [{'path': '/dev/' + disk,
                        'partitions': [{'dmcrypt': {},
                                        'is_partition': True,
@@ -606,7 +596,7 @@ class TestCephDisk(object):
                                        'ptype': partition_type,
                                        'type': 'other',
                                        'uuid': partition_uuid}]}]
-            assert expect == ceph_disk.list_devices(args)
+            assert expect == main.list_devices()
         #
         # not swap, mounted, ext4 fs type, with uuid
         #
@@ -617,25 +607,26 @@ class TestCephDisk(object):
         mount_path = '/mount/path'
         fs_type = 'ext4'
         with patch.multiple(
-                ceph_disk,
-                list_all_partitions=lambda names: { disk: [partition] },
+                main,
+                list_all_partitions=lambda: {disk: [partition]},
                 get_dev_fs=lambda dev: fs_type,
                 is_mounted=lambda dev: mount_path,
                 get_partition_uuid=lambda dev: partition_uuid,
                 get_partition_type=lambda dev: partition_type,
                 is_partition=lambda dev: True,
-                ):
+        ):
             expect = [{'path': '/dev/' + disk,
-                       'partitions': [{'dmcrypt': {},
-                                       'is_partition': True,
-                                       'mount': mount_path,
-                                       'fs_type': fs_type,
-                                       'path': '/dev/' + partition,
-                                       'ptype': partition_type,
-                                       'type': 'other',
-                                       'uuid': partition_uuid,
-                                   }]}]
-            assert expect == ceph_disk.list_devices(args)
+                       'partitions': [{
+                           'dmcrypt': {},
+                           'is_partition': True,
+                           'mount': mount_path,
+                           'fs_type': fs_type,
+                           'path': '/dev/' + partition,
+                           'ptype': partition_type,
+                           'type': 'other',
+                           'uuid': partition_uuid,
+                       }]}]
+            assert expect == main.list_devices()
 
         #
         # swap, with uuid
@@ -645,13 +636,13 @@ class TestCephDisk(object):
         disk = "Xda"
         partition = "Xda1"
         with patch.multiple(
-                ceph_disk,
-                list_all_partitions=lambda names: { disk: [partition] },
+                main,
+                list_all_partitions=lambda: {disk: [partition]},
                 is_swap=lambda dev: True,
                 get_partition_uuid=lambda dev: partition_uuid,
                 get_partition_type=lambda dev: partition_type,
                 is_partition=lambda dev: True,
-                ):
+        ):
             expect = [{'path': '/dev/' + disk,
                        'partitions': [{'dmcrypt': {},
                                        'is_partition': True,
@@ -659,7 +650,7 @@ class TestCephDisk(object):
                                        'ptype': partition_type,
                                        'type': 'swap',
                                        'uuid': partition_uuid}]}]
-            assert expect == ceph_disk.list_devices(args)
+            assert expect == main.list_devices()
 
         #
         # whole disk
@@ -668,107 +659,108 @@ class TestCephDisk(object):
         disk = "Xda"
         partition = "Xda1"
         with patch.multiple(
-                ceph_disk,
-                list_all_partitions=lambda names: { disk: [] },
+                main,
+                list_all_partitions=lambda: {disk: []},
                 is_partition=lambda dev: False,
-                ):
+        ):
             expect = [{'path': '/dev/' + disk,
                        'dmcrypt': {},
                        'is_partition': False,
                        'ptype': 'unknown',
                        'type': 'other'}]
-            assert expect == ceph_disk.list_devices(args)
+            assert expect == main.list_devices()
+
 
 class TestCephDiskDeactivateAndDestroy(unittest.TestCase):
 
     def setup_class(self):
-        ceph_disk.setup_logging(verbose=True, log_stdout=False)
+        main.setup_logging(verbose=True, log_stdout=False)
 
     @patch('__builtin__.open')
     def test_main_deactivate(self, mock_open):
-        DMCRYPT_OSD_UUID = '4fbd7e29-9d25-41b8-afd0-5ec00ceff05d'
+        data = tempfile.mkdtemp()
+        main.setup_statedir(data)
         DMCRYPT_LUKS_OSD_UUID = '4fbd7e29-9d25-41b8-afd0-35865ceff05d'
         part_uuid = '0ce28a16-6d5d-11e5-aec3-fa163e5c167b'
         disk = 'sdX'
-        cluster = 'ceph'
         #
         # Can not find match device by osd-id
         #
-        args = ceph_disk.parse_args(['deactivate', \
-                                     '--cluster', 'ceph', \
-                                     '--deactivate-by-id', '5566'])
+        args = main.parse_args(['deactivate',
+                                '--cluster', 'ceph',
+                                '--deactivate-by-id', '5566'])
         fake_device = [{'path': '/dev/' + disk,
-                          'partitions': [{
-                                  'path': '/dev/sdX1',
-                                  'whoami': '-1',
-                                  }]}]
+                        'partitions': [{
+                            'path': '/dev/sdX1',
+                            'whoami': '-1',
+                        }]}]
         with patch.multiple(
-                ceph_disk,
-                list_devices=lambda dev:fake_device,
-                ):
-            self.assertRaises(Exception, ceph_disk.main_deactivate, args)
+                main,
+                list_devices=lambda: fake_device,
+        ):
+            self.assertRaises(Exception, main.main_deactivate, args)
 
         #
         # find match device by osd-id, status: OSD_STATUS_IN_DOWN
         # with --mark-out option
         #
-        args = ceph_disk.parse_args(['deactivate', \
-                                     '--cluster', 'ceph', \
-                                     '--deactivate-by-id', '5566', \
-                                     '--mark-out'])
+        args = main.parse_args(['deactivate',
+                                '--cluster', 'ceph',
+                                '--deactivate-by-id', '5566',
+                                '--mark-out'])
         fake_device = [{'path': '/dev/' + disk,
-                          'partitions': [{
-                                  'ptype': DMCRYPT_LUKS_OSD_UUID,
-                                  'path': '/dev/sdX1',
-                                  'whoami': '5566',
-                                  'mount': '/var/lib/ceph/osd/ceph-5566/',
-                                  'uuid': part_uuid,
-                                  }]}]
+                        'partitions': [{
+                            'ptype': DMCRYPT_LUKS_OSD_UUID,
+                            'path': '/dev/sdX1',
+                            'whoami': '5566',
+                            'mount': '/var/lib/ceph/osd/ceph-5566/',
+                            'uuid': part_uuid,
+                        }]}]
         with patch.multiple(
-                ceph_disk,
-                list_devices=lambda dev:fake_device,
+                main,
+                list_devices=lambda: fake_device,
                 _check_osd_status=lambda cluster, osd_id: 2,
                 _mark_osd_out=lambda cluster, osd_id: True
-                ):
-            ceph_disk.main_deactivate(args)
+        ):
+            main.main_deactivate(args)
 
         #
         # find match device by device partition, status: OSD_STATUS_IN_DOWN
         #
-        args = ceph_disk.parse_args(['deactivate', \
-                                     '--cluster', 'ceph', \
-                                     '/dev/sdX1'])
+        args = main.parse_args(['deactivate',
+                                '--cluster', 'ceph',
+                                '/dev/sdX1'])
         fake_device = [{'path': '/dev/' + disk,
-                          'partitions': [{
-                                  'ptype': DMCRYPT_LUKS_OSD_UUID,
-                                  'path': '/dev/sdX1',
-                                  'whoami': '5566',
-                                  'mount': '/var/lib/ceph/osd/ceph-5566/',
-                                  'uuid': part_uuid,
-                                  }]}]
+                        'partitions': [{
+                            'ptype': DMCRYPT_LUKS_OSD_UUID,
+                            'path': '/dev/sdX1',
+                            'whoami': '5566',
+                            'mount': '/var/lib/ceph/osd/ceph-5566/',
+                            'uuid': part_uuid,
+                        }]}]
         with patch.multiple(
-                ceph_disk,
-                list_devices=lambda dev:fake_device,
+                main,
+                list_devices=lambda: fake_device,
                 _check_osd_status=lambda cluster, osd_id: 0,
-                ):
-            ceph_disk.main_deactivate(args)
+        ):
+            main.main_deactivate(args)
 
         #
         # find match device by device partition, status: OSD_STATUS_IN_UP
         # with --mark-out option
         #
-        args = ceph_disk.parse_args(['deactivate', \
-                                     '--cluster', 'ceph', \
-                                     '/dev/sdX1', \
-                                     '--mark-out'])
+        args = main.parse_args(['deactivate',
+                                '--cluster', 'ceph',
+                                '/dev/sdX1',
+                                '--mark-out'])
         fake_device = [{'path': '/dev/' + disk,
-                          'partitions': [{
-                                  'ptype': DMCRYPT_LUKS_OSD_UUID,
-                                  'path': '/dev/sdX1',
-                                  'whoami': '5566',
-                                  'mount': '/var/lib/ceph/osd/ceph-5566/',
-                                  'uuid': part_uuid,
-                                  }]}]
+                        'partitions': [{
+                            'ptype': DMCRYPT_LUKS_OSD_UUID,
+                            'path': '/dev/sdX1',
+                            'whoami': '5566',
+                            'mount': '/var/lib/ceph/osd/ceph-5566/',
+                            'uuid': part_uuid,
+                        }]}]
 
         # mock the file open.
         file_opened = io.StringIO()
@@ -776,9 +768,9 @@ class TestCephDiskDeactivateAndDestroy(unittest.TestCase):
         mock_open.return_value = file_opened
 
         with patch.multiple(
-                ceph_disk,
+                main,
                 mock_open,
-                list_devices=lambda dev:fake_device,
+                list_devices=lambda: fake_device,
                 _check_osd_status=lambda cluster, osd_id: 3,
                 _mark_osd_out=lambda cluster, osd_id: True,
                 stop_daemon=lambda cluster, osd_id: True,
@@ -786,23 +778,23 @@ class TestCephDiskDeactivateAndDestroy(unittest.TestCase):
                 path_set_context=lambda path: True,
                 unmount=lambda path: True,
                 dmcrypt_unmap=lambda part_uuid: True,
-                ):
-            ceph_disk.main_deactivate(args)
+        ):
+            main.main_deactivate(args)
 
         #
         # find match device by osd-id, status: OSD_STATUS_OUT_UP
         #
-        args = ceph_disk.parse_args(['deactivate', \
-                                     '--cluster', 'ceph', \
-                                     '--deactivate-by-id', '5566'])
+        args = main.parse_args(['deactivate',
+                                '--cluster', 'ceph',
+                                '--deactivate-by-id', '5566'])
         fake_device = [{'path': '/dev/' + disk,
-                          'partitions': [{
-                                  'ptype': DMCRYPT_LUKS_OSD_UUID,
-                                  'path': '/dev/sdX1',
-                                  'whoami': '5566',
-                                  'mount': '/var/lib/ceph/osd/ceph-5566/',
-                                  'uuid': part_uuid,
-                                  }]}]
+                        'partitions': [{
+                            'ptype': DMCRYPT_LUKS_OSD_UUID,
+                            'path': '/dev/sdX1',
+                            'whoami': '5566',
+                            'mount': '/var/lib/ceph/osd/ceph-5566/',
+                            'uuid': part_uuid,
+                        }]}]
 
         # mock the file open.
         file_opened = io.StringIO()
@@ -810,9 +802,9 @@ class TestCephDiskDeactivateAndDestroy(unittest.TestCase):
         mock_open.return_value = file_opened
 
         with patch.multiple(
-                ceph_disk,
+                main,
                 mock_open,
-                list_devices=lambda dev:fake_device,
+                list_devices=lambda: fake_device,
                 _check_osd_status=lambda cluster, osd_id: 1,
                 _mark_osd_out=lambda cluster, osd_id: True,
                 stop_daemon=lambda cluster, osd_id: True,
@@ -820,63 +812,64 @@ class TestCephDiskDeactivateAndDestroy(unittest.TestCase):
                 path_set_context=lambda path: True,
                 unmount=lambda path: True,
                 dmcrypt_unmap=lambda part_uuid: True,
-                ):
-            ceph_disk.main_deactivate(args)
+        ):
+            main.main_deactivate(args)
+        shutil.rmtree(data)
 
     def test_mark_out_out(self):
-        dev = {
-            'cluster': 'ceph',
-            'osd_id': '5566',
-        }
-
         def mark_osd_out_fail(osd_id):
-            raise ceph_disk.Error('Could not find osd.%s, is a vaild/exist osd id?' % osd_id)
+            raise main.Error('Could not find osd.%s, is a vaild/exist osd id?'
+                             % osd_id)
 
         with patch.multiple(
-                ceph_disk,
+                main,
                 command=mark_osd_out_fail,
-                ):
-            self.assertRaises(Exception, ceph_disk._mark_osd_out, 'ceph', '5566')
+        ):
+            self.assertRaises(Exception, main._mark_osd_out, 'ceph', '5566')
 
     def test_check_osd_status(self):
         #
         # command failure
         #
         with patch.multiple(
-                ceph_disk,
+                main,
                 command=raise_command_error,
-                ):
-            self.assertRaises(Exception, ceph_disk._check_osd_status, 'ceph', '5566')
+        ):
+            self.assertRaises(Exception, main._check_osd_status,
+                              'ceph', '5566')
 
         #
         # osd not found
         #
 
-        fake_data = '{"osds":[{"osd":0,"up":1,"in":1},{"osd":1,"up":1,"in":1}]}'
+        fake_data = ('{"osds":[{"osd":0,"up":1,"in":1},'
+                     '{"osd":1,"up":1,"in":1}]}')
 
         def return_fake_value(cmd):
-            return fake_data, 0
+            return fake_data, '', 0
 
         with patch.multiple(
-                ceph_disk,
+                main,
                 command=return_fake_value,
-                ):
-            self.assertRaises(Exception, ceph_disk._check_osd_status, 'ceph', '5566')
+        ):
+            self.assertRaises(Exception, main._check_osd_status,
+                              'ceph', '5566')
 
         #
         # successfully
         #
 
-        fake_data = '{"osds":[{"osd":0,"up":1,"in":1},{"osd":5566,"up":1,"in":1}]}'
+        fake_data = ('{"osds":[{"osd":0,"up":1,"in":1},'
+                     '{"osd":5566,"up":1,"in":1}]}')
 
         def return_fake_value(cmd):
-            return fake_data, 0
+            return fake_data, '', 0
 
         with patch.multiple(
-                ceph_disk,
+                main,
                 command=return_fake_value,
-                ):
-            ceph_disk._check_osd_status('ceph', '5566')
+        ):
+            main._check_osd_status('ceph', '5566')
 
     def test_stop_daemon(self):
         STATEDIR = '/var/lib/ceph'
@@ -890,19 +883,13 @@ class TestCephDiskDeactivateAndDestroy(unittest.TestCase):
         # fail on init type
         #
         with patch('os.path.exists', return_value=False):
-            self.assertRaises(Exception, ceph_disk.stop_daemon, 'ceph', '5566')
-
-        #
-        # faile on os path
-        #
-        with patch('os.path.exists', return_value=Exception):
-            self.assertRaises(Exception, ceph_disk.stop_daemon, 'ceph', '5566')
+            self.assertRaises(Exception, main.stop_daemon, 'ceph', '5566')
 
         #
         # upstart failure
         #
         fake_path = (STATEDIR + '/osd/{cluster}-{osd_id}/upstart').format(
-                    cluster=cluster, osd_id=osd_id)
+            cluster=cluster, osd_id=osd_id)
 
         def path_exist(check_path):
             if check_path == fake_path:
@@ -914,17 +901,17 @@ class TestCephDiskDeactivateAndDestroy(unittest.TestCase):
         check_path = patcher.start()
         check_path.side_effect = path_exist
         with patch.multiple(
-                ceph_disk,
+                main,
                 check_path,
                 command_check_call=stop_daemon_fail,
-                ):
-            self.assertRaises(Exception, ceph_disk.stop_daemon, 'ceph', '5566')
+        ):
+            self.assertRaises(Exception, main.stop_daemon, 'ceph', '5566')
 
         #
         # sysvinit failure
         #
         fake_path = (STATEDIR + '/osd/{cluster}-{osd_id}/sysvinit').format(
-                    cluster=cluster, osd_id=osd_id)
+            cluster=cluster, osd_id=osd_id)
 
         def path_exist(check_path):
             if check_path == fake_path:
@@ -936,18 +923,18 @@ class TestCephDiskDeactivateAndDestroy(unittest.TestCase):
         check_path = patcher.start()
         check_path.side_effect = path_exist
         with patch.multiple(
-                ceph_disk,
+                main,
                 check_path,
                 which=lambda name: True,
                 command_check_call=stop_daemon_fail,
-                ):
-            self.assertRaises(Exception, ceph_disk.stop_daemon, 'ceph', '5566')
+        ):
+            self.assertRaises(Exception, main.stop_daemon, 'ceph', '5566')
 
         #
         # systemd failure
         #
         fake_path = (STATEDIR + '/osd/{cluster}-{osd_id}/systemd').format(
-                    cluster=cluster, osd_id=osd_id)
+            cluster=cluster, osd_id=osd_id)
 
         def path_exist(check_path):
             if check_path == fake_path:
@@ -965,11 +952,11 @@ class TestCephDiskDeactivateAndDestroy(unittest.TestCase):
         check_path = patcher.start()
         check_path.side_effect = path_exist
         with patch.multiple(
-                ceph_disk,
+                main,
                 check_path,
                 command_check_call=stop_daemon_fail,
-                ):
-            self.assertRaises(Exception, ceph_disk.stop_daemon, 'ceph', '5566')
+        ):
+            self.assertRaises(Exception, main.stop_daemon, 'ceph', '5566')
 
     def test_remove_osd_directory_files(self):
         cluster = 'ceph'
@@ -991,7 +978,8 @@ class TestCephDiskDeactivateAndDestroy(unittest.TestCase):
                 return True
             elif fake_path_remove_2 and remove_path == fake_path_remove_2:
                 return True
-            elif fake_path_remove_init and remove_path == fake_path_remove_init:
+            elif (fake_path_remove_init and
+                  remove_path == fake_path_remove_init):
                 return True
             else:
                 raise OSError
@@ -1009,12 +997,13 @@ class TestCephDiskDeactivateAndDestroy(unittest.TestCase):
         path_exist.side_effect = handle_path_exist
         path_remove.side_effect = handle_path_remove
         with patch.multiple(
-                ceph_disk,
+                main,
                 path_exist,
                 path_remove,
                 get_conf=lambda cluster, **kwargs: True,
-                ):
-            self.assertRaises(Exception, ceph_disk._remove_osd_directory_files, 'somewhere', cluster)
+        ):
+            self.assertRaises(Exception, main._remove_osd_directory_files,
+                              'somewhere', cluster)
 
         #
         # remove active fil failure
@@ -1031,12 +1020,13 @@ class TestCephDiskDeactivateAndDestroy(unittest.TestCase):
         path_exist.side_effect = handle_path_exist
         path_remove.side_effect = handle_path_remove
         with patch.multiple(
-                ceph_disk,
+                main,
                 path_exist,
                 path_remove,
                 get_conf=lambda cluster, **kwargs: True,
-                ):
-            self.assertRaises(Exception, ceph_disk._remove_osd_directory_files, 'somewhere', cluster)
+        ):
+            self.assertRaises(Exception, main._remove_osd_directory_files,
+                              'somewhere', cluster)
 
         #
         # conf_val is None and remove init file failure
@@ -1054,13 +1044,14 @@ class TestCephDiskDeactivateAndDestroy(unittest.TestCase):
         path_exist.side_effect = handle_path_exist
         path_remove.side_effect = handle_path_remove
         with patch.multiple(
-                ceph_disk,
+                main,
                 path_exist,
                 path_remove,
                 get_conf=lambda cluster, **kwargs: None,
                 init_get=lambda: 'upstart',
-                ):
-            self.assertRaises(Exception, ceph_disk._remove_osd_directory_files, 'somewhere', cluster)
+        ):
+            self.assertRaises(Exception, main._remove_osd_directory_files,
+                              'somewhere', cluster)
 
         #
         # already remove `ready`, `active` and remove init file successfully
@@ -1076,20 +1067,20 @@ class TestCephDiskDeactivateAndDestroy(unittest.TestCase):
         path_exist.side_effect = handle_path_exist
         path_remove.side_effect = handle_path_remove
         with patch.multiple(
-                ceph_disk,
+                main,
                 path_exist,
                 path_remove,
                 get_conf=lambda cluster, **kwargs: 'upstart',
-                ):
-            ceph_disk._remove_osd_directory_files('somewhere', cluster)
+        ):
+            main._remove_osd_directory_files('somewhere', cluster)
 
     def test_path_set_context(self):
         path = '/somewhere'
         with patch.multiple(
-                ceph_disk,
+                main,
                 get_ceph_user=lambda **kwargs: 'ceph',
-                ):
-            ceph_disk.path_set_context(path)
+        ):
+            main.path_set_context(path)
 
     def test_mount(self):
         #
@@ -1098,7 +1089,7 @@ class TestCephDiskDeactivateAndDestroy(unittest.TestCase):
         dev = None
         fs_type = 'ext4'
         option = ''
-        self.assertRaises(Exception, ceph_disk.mount, dev, fs_type, option)
+        self.assertRaises(Exception, main.mount, dev, fs_type, option)
 
         #
         # fstype undefine
@@ -1106,7 +1097,7 @@ class TestCephDiskDeactivateAndDestroy(unittest.TestCase):
         dev = '/dev/Xda1'
         fs_type = None
         option = ''
-        self.assertRaises(Exception, ceph_disk.mount, dev, fs_type, option)
+        self.assertRaises(Exception, main.mount, dev, fs_type, option)
 
         #
         # mount failure
@@ -1115,7 +1106,7 @@ class TestCephDiskDeactivateAndDestroy(unittest.TestCase):
         fstype = 'ext4'
         options = ''
         with patch('tempfile.mkdtemp', return_value='/mnt'):
-            self.assertRaises(Exception, ceph_disk.mount, dev, fstype, options)
+            self.assertRaises(Exception, main.mount, dev, fstype, options)
 
         #
         # mount successfully
@@ -1130,18 +1121,18 @@ class TestCephDiskDeactivateAndDestroy(unittest.TestCase):
         create_tmpdir = patcher.start()
         create_tmpdir.side_effect = create_temp_directory
         with patch.multiple(
-                ceph_disk,
+                main,
                 create_tmpdir,
                 command_check_call=lambda cmd: True,
-                ):
-            ceph_disk.mount(dev, fstype, options)
+        ):
+            main.mount(dev, fstype, options)
 
     def test_umount(self):
         #
         # umount failure
         #
         path = '/somewhere'
-        self.assertRaises(Exception, ceph_disk.unmount, path)
+        self.assertRaises(Exception, main.unmount, path)
 
         #
         # umount successfully
@@ -1154,137 +1145,81 @@ class TestCephDiskDeactivateAndDestroy(unittest.TestCase):
         rm_directory = patcher.start()
         rm_directory.side_effect = remove_directory_successfully
         with patch.multiple(
-                ceph_disk,
+                main,
                 rm_directory,
                 command_check_call=lambda cmd: True,
-                ):
-            ceph_disk.unmount(path)
+        ):
+            main.unmount(path)
 
     def test_main_destroy(self):
-        DMCRYPT_OSD_UUID = '4fbd7e29-9d25-41b8-afd0-5ec00ceff05d'
-        DMCRYPT_LUKS_OSD_UUID = '4fbd7e29-9d25-41b8-afd0-35865ceff05d'
         OSD_UUID = '4fbd7e29-9d25-41b8-afd0-062c0ceff05d'
         MPATH_OSD_UUID = '4fbd7e29-8ae0-4982-bf9d-5a8d867af560'
         part_uuid = '0ce28a16-6d5d-11e5-aec3-fa163e5c167b'
         journal_uuid = "7ad5e65a-0ca5-40e4-a896-62a74ca61c55"
-        cluster = 'ceph'
+        mount_5566 = '/var/lib/ceph/osd/ceph-5566/'
 
         fake_devices_normal = [{'path': '/dev/sdY',
-                                    'partitions': [{
+                                'partitions': [{
                                     'dmcrypt': {},
                                     'ptype': OSD_UUID,
                                     'path': '/dev/sdY1',
                                     'whoami': '5566',
-                                    'mount': '/var/lib/ceph/osd/ceph-5566/',
+                                    'mount': mount_5566,
                                     'uuid': part_uuid,
                                     'journal_uuid': journal_uuid}]},
                                {'path': '/dev/sdX',
-                               'partitions': [{
+                                'partitions': [{
                                     'dmcrypt': {},
                                     'ptype': MPATH_OSD_UUID,
                                     'path': '/dev/sdX1',
                                     'whoami': '7788',
                                     'mount': '/var/lib/ceph/osd/ceph-7788/',
                                     'uuid': part_uuid,
-                                    'journal_uuid': journal_uuid}]}
-                              ]
-        fake_devices_dmcrypt_unmap = [{'path': '/dev/sdY',
-                                        'partitions': [{
-                                        'dmcrypt': {
-                                            'holders': '',
-                                            'type': type,
-                                        },
-                                        'ptype': DMCRYPT_OSD_UUID,
-                                        'path': '/dev/sdX1',
-                                        'whoami': '5566',
-                                        'mount': '/var/lib/ceph/osd/ceph-5566/',
-                                        'uuid': part_uuid,
-                                        'journal_uuid': journal_uuid}]}]
-        fake_devices_dmcrypt_luk_unmap = [{'path': '/dev/sdY',
-                                            'partitions': [{
-                                            'dmcrypt': {
-                                                'holders': '',
-                                                'type': type,
-                                            },
-                                            'ptype': DMCRYPT_LUKS_OSD_UUID,
-                                            'path': '/dev/sdX1',
-                                            'whoami': '5566',
-                                            'mount': '/var/lib/ceph/osd/ceph-5566/',
-                                            'uuid': part_uuid,
-                                            'journal_uuid': journal_uuid}]}]
-        fake_devices_dmcrypt_unknow = [{'path': '/dev/sdY',
-                                            'partitions': [{
-                                            'dmcrypt': {
-                                                'holders': '',
-                                                'type': type,
-                                            },
-                                            'ptype': '00000000-0000-0000-0000-000000000000',
-                                            'path': '/dev/sdX1',
-                                            'whoami': '5566',
-                                            'mount': '/var/lib/ceph/osd/ceph-5566/',
-                                            'uuid': part_uuid,
-                                            'journal_uuid': journal_uuid}]}]
-        fake_devices_dmcrypt_map = [{'dmcrypt': {
-                                        'holders': 'dm_0',
-                                        'type': type,
-                                        },
-                                     'ptype': DMCRYPT_OSD_UUID,
-                                     'path': '/dev/sdX1',
-                                     'whoami': '5566',
-                                     'mount': '/var/lib/ceph/osd/ceph-5566/',
-                                     'uuid': part_uuid,
-                                     'journal_uuid': journal_uuid}]
-
-        def list_devices_return(dev):
-            if dev == []:
-                return fake_devices_normal
+                                    'journal_uuid': journal_uuid}]}]
+
+        def list_devices_return():
+            return fake_devices_normal
 
         #
         # input device is not the device partition
         #
-        args = ceph_disk.parse_args(['destroy', \
-                                     '--cluster', 'ceph', \
-                                     '/dev/sdX'])
+        args = main.parse_args(['destroy', '--cluster', 'ceph', '/dev/sdX'])
         with patch.multiple(
-                ceph_disk,
+                main,
                 is_partition=lambda path: False,
-                ):
-            self.assertRaises(Exception, ceph_disk.main_destroy, args)
+        ):
+            self.assertRaises(Exception, main.main_destroy, args)
 
         #
         # skip the redundent devices and not found by dev
         #
-        args = ceph_disk.parse_args(['destroy', \
-                                     '--cluster', 'ceph', \
-                                     '/dev/sdZ1'])
+        args = main.parse_args(['destroy', '--cluster', 'ceph', '/dev/sdZ1'])
         with patch.multiple(
-                ceph_disk,
+                main,
                 is_partition=lambda path: True,
                 list_devices=list_devices_return,
-                ):
-            self.assertRaises(Exception, ceph_disk.main_destroy, args)
+        ):
+            self.assertRaises(Exception, main.main_destroy, args)
 
         #
         # skip the redundent devices and not found by osd-id
         #
-        args = ceph_disk.parse_args(['destroy', \
-                                     '--cluster', 'ceph', \
-                                     '--destroy-by-id', '1234'])
+        args = main.parse_args(['destroy', '--cluster', 'ceph',
+                                '--destroy-by-id', '1234'])
         with patch.multiple(
-                ceph_disk,
+                main,
                 is_partition=lambda path: True,
                 list_devices=list_devices_return,
-                ):
-            self.assertRaises(Exception, ceph_disk.main_destroy, args)
+        ):
+            self.assertRaises(Exception, main.main_destroy, args)
 
         #
         # skip the redundent devices and found by dev
         #
-        args = ceph_disk.parse_args(['destroy', \
-                                     '--cluster', 'ceph', \
-                                     '/dev/sdY1', '--zap'])
+        args = main.parse_args(['destroy', '--cluster',
+                                'ceph', '/dev/sdY1', '--zap'])
         with patch.multiple(
-                ceph_disk,
+                main,
                 is_partition=lambda path: True,
                 list_devices=list_devices_return,
                 get_partition_base=lambda dev_path: '/dev/sdY',
@@ -1293,143 +1228,55 @@ class TestCephDiskDeactivateAndDestroy(unittest.TestCase):
                 _delete_osd_auth_key=lambda cluster, osd_id: True,
                 _deallocate_osd_id=lambda cluster, osd_id: True,
                 zap=lambda dev: True
-                ):
-            ceph_disk.main_destroy(args)
-            #self.assertRaises(Exception, ceph_disk.main_destroy, args)
+        ):
+            main.main_destroy(args)
 
         #
         # skip the redundent devices and found by osd-id
         # with active status and MPATH_OSD
         #
-        args = ceph_disk.parse_args(['destroy', \
-                                     '--cluster', 'ceph', \
-                                     '--destroy-by-id', '7788'])
+        args = main.parse_args(['destroy', '--cluster', 'ceph',
+                                '--destroy-by-id', '7788'])
         with patch.multiple(
-                ceph_disk,
+                main,
                 is_partition=lambda path: True,
                 list_devices=list_devices_return,
                 get_partition_base_mpath=lambda dev_path: '/dev/sdX',
                 _check_osd_status=lambda cluster, osd_id: 1,
-                ):
-            self.assertRaises(Exception, ceph_disk.main_destroy, args)
-
-        #
-        # skip the redundent devices and found by dev
-        # with dmcrypt (plain)
-        #
-        args = ceph_disk.parse_args(['destroy', \
-                                     '--cluster', 'ceph', \
-                                     '/dev/sdX1', '--zap'])
-        def list_devices_return(dev):
-            if dev == []:
-                return fake_devices_dmcrypt_unmap
-            elif dev == ['/dev/sdX1']:
-                return fake_devices_dmcrypt_map
-
-        with patch.multiple(
-                ceph_disk,
-                is_partition=lambda path: True,
-                list_devices=list_devices_return,
-                get_dmcrypt_key_path=lambda part_uuid, dmcrypt_key_dir, luks: True,
-                dmcrypt_map=lambda rawdev, keypath, _uuid, \
-                                   cryptsetup_parameters, luks, format_dev: True,
-                dmcrypt_unmap=lambda part_uuid: True,
-                get_partition_base=lambda dev_path: '/dev/sdX',
-                _check_osd_status=lambda cluster, osd_id: 0,
-                _remove_from_crush_map=lambda cluster, osd_id: True,
-                _delete_osd_auth_key=lambda cluster, osd_id: True,
-                _deallocate_osd_id=lambda cluster, osd_id: True,
-                zap=lambda dev: True
-                ):
-            ceph_disk.main_destroy(args)
-            #self.assertRaises(Exception, ceph_disk.main_destroy, args)
-
-        #
-        # skip the redundent devices and found by osd-id
-        # with dmcrypt (luk) and status: active
-        #
-        args = ceph_disk.parse_args(['destroy', \
-                                     '--cluster', 'ceph', \
-                                     '--destroy-by-id', '5566'])
-        def list_devices_return(dev):
-            if dev == []:
-                return fake_devices_dmcrypt_luk_unmap
-            elif dev == ['/dev/sdX1']:
-                return fake_devices_dmcrypt_map
-
-        with patch.multiple(
-                ceph_disk,
-                is_partition=lambda path: True,
-                list_devices=list_devices_return,
-                get_dmcrypt_key_path=lambda part_uuid, dmcrypt_key_dir, luks: True,
-                dmcrypt_map=lambda rawdev, keypath, _uuid, \
-                                   cryptsetup_parameters, luks, format_dev: True,
-                dmcrypt_unmap=lambda part_uuid: True,
-                get_partition_base=lambda dev_path: '/dev/sdX',
-                _check_osd_status=lambda cluster, osd_id: 1,
-                ):
-            self.assertRaises(Exception, ceph_disk.main_destroy, args)
-
-        #
-        # skip the redundent devices and found by osd-id
-        # with unknow dmcrypt type
-        #
-        args = ceph_disk.parse_args(['destroy', \
-                                     '--cluster', 'ceph', \
-                                     '--destroy-by-id', '5566'])
-        def list_devices_return(dev):
-            if dev == []:
-                return fake_devices_dmcrypt_unknow
-
-        with patch.multiple(
-                ceph_disk,
-                is_partition=lambda path: True,
-                list_devices=list_devices_return,
-                ):
-            self.assertRaises(Exception, ceph_disk.main_destroy, args)
+        ):
+            self.assertRaises(Exception, main.main_destroy, args)
 
     def test_remove_from_crush_map_fail(self):
         cluster = 'ceph'
         osd_id = '5566'
         with patch.multiple(
-                ceph_disk,
+                main,
                 command=raise_command_error
-                ):
-            self.assertRaises(Exception, ceph_disk._remove_from_crush_map, cluster, osd_id)
+        ):
+            self.assertRaises(Exception, main._remove_from_crush_map,
+                              cluster, osd_id)
 
     def test_delete_osd_auth_key_fail(self):
         cluster = 'ceph'
         osd_id = '5566'
         with patch.multiple(
-                ceph_disk,
+                main,
                 command=raise_command_error
-                ):
-            self.assertRaises(Exception, ceph_disk._delete_osd_auth_key, cluster, osd_id)
+        ):
+            self.assertRaises(Exception, main._delete_osd_auth_key,
+                              cluster, osd_id)
 
     def test_deallocate_osd_id_fail(self):
         cluster = 'ceph'
         osd_id = '5566'
         with patch.multiple(
-                ceph_disk,
+                main,
                 command=raise_command_error
-                ):
-            self.assertRaises(Exception, ceph_disk._deallocate_osd_id, cluster, osd_id)
-
+        ):
+            self.assertRaises(Exception, main._deallocate_osd_id,
+                              cluster, osd_id)
 
-##### Help function #####
 
 def raise_command_error(*args):
     e = subprocess.CalledProcessError('aaa', 'bbb', 'ccc')
     raise e
-
-def path_exists(target_paths=None):
-    """
-    A quick helper that enforces a check for the existence of a path. Since we
-    are dealing with fakes, we allow to pass in a list of paths that are OK to
-    return True, otherwise return False.
-    """
-    target_paths = target_paths or []
-
-    def exists(path):
-        return path in target_paths
-    return exists
diff --git a/src/ceph-disk/tox.ini b/src/ceph-disk/tox.ini
new file mode 100644
index 0000000..ef8b8d0
--- /dev/null
+++ b/src/ceph-disk/tox.ini
@@ -0,0 +1,23 @@
+[tox]
+envlist = flake8,py27
+
+[testenv]
+setenv =
+       VIRTUAL_ENV={envdir}
+       CEPH_DISK={envbindir}/coverage run --append --source=ceph_disk -- {envbindir}/ceph-disk
+usedevelop = true
+deps =
+  {env:NO_INDEX:}
+  --use-wheel
+  --find-links=file://{toxinidir}/wheelhouse
+  -r{toxinidir}/requirements.txt
+  -r{toxinidir}/test-requirements.txt
+  ../ceph-detect-init
+
+commands = coverage run --append --source=ceph_disk {envbindir}/py.test -vv tests/test_main.py
+           coverage run --append --source=ceph_disk {envbindir}/py.test -vv tests/test_prepare.py
+           bash -x tests/ceph-disk.sh
+           coverage report --omit=*test*,*tox* --show-missing
+
+[testenv:flake8]
+commands = flake8 --ignore=H105,H405 ceph_disk tests
diff --git a/src/ceph_fuse.cc b/src/ceph_fuse.cc
index e66fe65..27f430d 100644
--- a/src/ceph_fuse.cc
+++ b/src/ceph_fuse.cc
@@ -139,7 +139,7 @@ int main(int argc, const char **argv, const char *envp[]) {
     public:
       CephFuse *cfuse;
       Client *client;
-      RemountTest() : Thread() {}
+      RemountTest() : cfuse(NULL), client(NULL) {}
       void init(CephFuse *cf, Client *cl) {
 	cfuse = cf;
 	client = cl;
diff --git a/src/ceph_mds.cc b/src/ceph_mds.cc
index 0da51bf..08eee5e 100644
--- a/src/ceph_mds.cc
+++ b/src/ceph_mds.cc
@@ -93,7 +93,8 @@ int main(int argc, const char **argv)
   argv_to_vec(argc, argv, args);
   env_to_vec(args);
 
-  global_init(NULL, args, CEPH_ENTITY_TYPE_MDS, CODE_ENVIRONMENT_DAEMON, 0);
+  global_init(NULL, args, CEPH_ENTITY_TYPE_MDS, CODE_ENVIRONMENT_DAEMON,
+	      0, "mds_data");
   ceph_heap_profiler_init();
 
   // mds specific args
diff --git a/src/ceph_mon.cc b/src/ceph_mon.cc
index e6e3ada..79d9e37 100644
--- a/src/ceph_mon.cc
+++ b/src/ceph_mon.cc
@@ -258,7 +258,7 @@ int main(int argc, const char **argv)
   }
 
   global_init(&def_args, args,
-              CEPH_ENTITY_TYPE_MON, CODE_ENVIRONMENT_DAEMON, flags);
+              CEPH_ENTITY_TYPE_MON, CODE_ENVIRONMENT_DAEMON, flags, "mon_data");
   ceph_heap_profiler_init();
 
   uuid_d fsid;
@@ -268,7 +268,6 @@ int main(int argc, const char **argv)
       break;
     } else if (ceph_argparse_flag(args, i, "-h", "--help", (char*)NULL)) {
       usage();
-      exit(0);
     } else if (ceph_argparse_flag(args, i, "--mkfs", (char*)NULL)) {
       mkfs = true;
     } else if (ceph_argparse_flag(args, i, "--compact", (char*)NULL)) {
@@ -393,7 +392,7 @@ int main(int argc, const char **argv)
 	  string name;
 	  monmap.get_addr_name(local, name);
 
-	  if (name.find("noname-") == 0) {
+	  if (name.compare(0, 7, "noname-") == 0) {
 	    cout << argv[0] << ": mon." << name << " " << local
 		 << " is local, renaming to mon." << g_conf->name.get_id() << std::endl;
 	    monmap.rename(name, g_conf->name.get_id());
diff --git a/src/ceph_osd.cc b/src/ceph_osd.cc
index fb5de36..ce5b9ef 100644
--- a/src/ceph_osd.cc
+++ b/src/ceph_osd.cc
@@ -90,7 +90,6 @@ void usage()
        << "                    get OSD fsid for the given block device\n"
        << std::endl;
   generic_server_usage();
-  cout.flush();
 }
 
 int preload_erasure_code()
@@ -119,7 +118,8 @@ int main(int argc, const char **argv)
   // option, therefore we will pass it as a default argument to global_init().
   def_args.push_back("--leveldb-log=");
 
-  global_init(&def_args, args, CEPH_ENTITY_TYPE_OSD, CODE_ENVIRONMENT_DAEMON, 0);
+  global_init(&def_args, args, CEPH_ENTITY_TYPE_OSD, CODE_ENVIRONMENT_DAEMON,
+	      0, "osd_data");
   ceph_heap_profiler_init();
 
   // osd specific args
@@ -145,7 +145,6 @@ int main(int argc, const char **argv)
       break;
     } else if (ceph_argparse_flag(args, i, "-h", "--help", (char*)NULL)) {
       usage();
-      exit(0);
     } else if (ceph_argparse_flag(args, i, "--mkfs", (char*)NULL)) {
       mkfs = true;
     } else if (ceph_argparse_flag(args, i, "--mkjournal", (char*)NULL)) {
@@ -279,10 +278,8 @@ int main(int argc, const char **argv)
 	   << g_conf->osd_data << ": " << cpp_strerror(-err) << TEXT_NORMAL << dendl;
       exit(1);
     }
-    derr << "created object store " << g_conf->osd_data;
-    if (!g_conf->osd_journal.empty())
-      *_dout << " journal " << g_conf->osd_journal;
-    *_dout << " for osd." << whoami << " fsid " << mc.monmap.fsid << dendl;
+    derr << "created object store " << g_conf->osd_data
+	 << " for osd." << whoami << " fsid " << mc.monmap.fsid << dendl;
   }
   if (mkkey) {
     common_init_finish(g_ceph_context);
diff --git a/src/civetweb/src/civetweb.c b/src/civetweb/src/civetweb.c
index 967d853..0385b8f 100644
--- a/src/civetweb/src/civetweb.c
+++ b/src/civetweb/src/civetweb.c
@@ -3994,6 +3994,7 @@ static void parse_http_headers(char **buf, struct mg_request_info *ri)
     }
 }
 
+#ifndef RGW
 static int is_valid_http_method(const char *method)
 {
     return !strcmp(method, "GET") || !strcmp(method, "POST") ||
@@ -4003,6 +4004,7 @@ static int is_valid_http_method(const char *method)
            || !strcmp(method, "MKCOL")
            ;
 }
+#endif
 
 /* Parse HTTP request, fill in mg_request_info structure.
    This function modifies the buffer by NUL-terminating
diff --git a/src/client/Client.cc b/src/client/Client.cc
index 7f89665..1e1c9af 100644
--- a/src/client/Client.cc
+++ b/src/client/Client.cc
@@ -443,18 +443,13 @@ void Client::dump_status(Formatter *f)
 
   ldout(cct, 1) << __func__ << dendl;
 
-  const OSDMap *osdmap = objecter->get_osdmap_read();
-  const epoch_t osd_epoch = osdmap->get_epoch();
-  objecter->put_osdmap_read();
+  const epoch_t osd_epoch
+    = objecter->with_osdmap(std::mem_fn(&OSDMap::get_epoch));
 
   if (f) {
     f->open_object_section("metadata");
-    {
-      for (std::map<std::string, std::string>::const_iterator i = metadata.begin();
-           i != metadata.end(); ++i) {
-        f->dump_string(i->first.c_str(), i->second);
-      }
-    }
+    for (const auto& kv : metadata)
+      f->dump_string(kv.first.c_str(), kv.second);
     f->close_section();
 
     f->dump_int("dentry_count", lru.lru_get_size());
@@ -468,15 +463,13 @@ void Client::dump_status(Formatter *f)
 
 int Client::init()
 {
-  client_lock.Lock();
-  assert(!initialized);
-
   timer.init();
-
   objectcacher->start();
-
   objecter->init();
 
+  client_lock.Lock();
+  assert(!initialized);
+
   // ok!
   messenger->add_dispatcher_tail(objecter);
   messenger->add_dispatcher_tail(this);
@@ -608,9 +601,9 @@ void Client::shutdown()
   assert(initialized);
   initialized = false;
   timer.shutdown();
-  objecter->shutdown();
   client_lock.Unlock();
 
+  objecter->shutdown();
   objecter_finisher.wait_for_empty();
   objecter_finisher.stop();
 
@@ -2052,9 +2045,9 @@ void Client::send_request(MetaRequest *request, MetaSession *session,
   }
   r->set_mdsmap_epoch(mdsmap->get_epoch());
   if (r->head.op == CEPH_MDS_OP_SETXATTR) {
-    const OSDMap *osdmap = objecter->get_osdmap_read();
-    r->set_osdmap_epoch(osdmap->get_epoch());
-    objecter->put_osdmap_read();
+    objecter->with_osdmap([r](const OSDMap& o) {
+	r->set_osdmap_epoch(o.get_epoch());
+      });
   }
 
   if (request->mds == -1) {
@@ -2317,21 +2310,16 @@ void Client::handle_osd_map(MOSDMap *m)
     // cancel_writes
     std::vector<int64_t> full_pools;
 
-    const OSDMap *osd_map = objecter->get_osdmap_read();
-    const map<int64_t,pg_pool_t>& pools = osd_map->get_pools();
-    for (map<int64_t,pg_pool_t>::const_iterator i = pools.begin();
-         i != pools.end(); ++i) {
-      if (i->second.has_flag(pg_pool_t::FLAG_FULL)) {
-        full_pools.push_back(i->first);
-      }
-    }
+    objecter->with_osdmap([&full_pools](const OSDMap &o) {
+	for (const auto& kv : o.get_pools()) {
+	  if (kv.second.has_flag(pg_pool_t::FLAG_FULL)) {
+	    full_pools.push_back(kv.first);
+	  }
+	}
+      });
 
-    objecter->put_osdmap_read();
-
-    for (std::vector<int64_t>::iterator i = full_pools.begin();
-         i != full_pools.end(); ++i) {
-      _handle_full_flag(*i);
-    }
+    for (auto p : full_pools)
+      _handle_full_flag(p);
 
     // Subscribe to subsequent maps to watch for the full flag going
     // away.  For the global full flag objecter does this for us, but
@@ -3795,7 +3783,7 @@ class C_Client_Remount : public Context  {
 private:
   Client *client;
 public:
-  C_Client_Remount(Client *c) : client(c) {}
+  explicit C_Client_Remount(Client *c) : client(c) {}
   void finish(int r) {
     assert (r == 0);
     r = client->remount_cb(client->callback_handle);
@@ -5566,7 +5554,7 @@ void Client::unmount()
 class C_C_Tick : public Context {
   Client *client;
 public:
-  C_C_Tick(Client *c) : client(c) {}
+  explicit C_C_Tick(Client *c) : client(c) {}
   void finish(int r) {
     // Called back via Timer, which takes client_lock for us
     assert(client->client_lock.is_locked_by_me());
@@ -9868,9 +9856,9 @@ int Client::ll_setxattr(Inode *in, const char *name, const void *value,
       strcmp(name, "ceph.file.layout") == 0 || strcmp(name, "ceph.dir.layout") == 0) {
     string rest(strstr(name, "layout"));
     string v((const char*)value);
-    const OSDMap *osdmap = objecter->get_osdmap_read();
-    int r = check_data_pool_exist(rest, v, osdmap);
-    objecter->put_osdmap_read();
+    int r = objecter->with_osdmap([&](const OSDMap& o) {
+      return check_data_pool_exist(rest, v, &o);
+    });
 
     if (r == -ENOENT) {
       C_SaferCond ctx;
@@ -9994,14 +9982,14 @@ size_t Client::_vxattrcb_layout(Inode *in, char *val, size_t size)
       (unsigned long long)in->layout.fl_stripe_unit,
       (unsigned long long)in->layout.fl_stripe_count,
       (unsigned long long)in->layout.fl_object_size);
-  const OSDMap *osdmap = objecter->get_osdmap_read();
-  if (osdmap->have_pg_pool(in->layout.fl_pg_pool))
-    r += snprintf(val + r, size - r, "%s",
-	osdmap->get_pool_name(in->layout.fl_pg_pool).c_str());
-  else
-    r += snprintf(val + r, size - r, "%lld",
-	(unsigned long long)in->layout.fl_pg_pool);
-  objecter->put_osdmap_read();
+  objecter->with_osdmap([&](const OSDMap& o) {
+      if (o.have_pg_pool(in->layout.fl_pg_pool))
+	r += snprintf(val + r, size - r, "%s",
+		      o.get_pool_name(in->layout.fl_pg_pool).c_str());
+      else
+	r += snprintf(val + r, size - r, "%" PRIu64,
+		      (uint64_t)in->layout.fl_pg_pool);
+    });
   return r;
 }
 size_t Client::_vxattrcb_layout_stripe_unit(Inode *in, char *val, size_t size)
@@ -10019,12 +10007,13 @@ size_t Client::_vxattrcb_layout_object_size(Inode *in, char *val, size_t size)
 size_t Client::_vxattrcb_layout_pool(Inode *in, char *val, size_t size)
 {
   size_t r;
-  const OSDMap *osdmap = objecter->get_osdmap_read();
-  if (osdmap->have_pg_pool(in->layout.fl_pg_pool))
-    r = snprintf(val, size, "%s", osdmap->get_pool_name(in->layout.fl_pg_pool).c_str());
-  else
-    r = snprintf(val, size, "%lld", (unsigned long long)in->layout.fl_pg_pool);
-  objecter->put_osdmap_read();
+  objecter->with_osdmap([&](const OSDMap& o) {
+      if (o.have_pg_pool(in->layout.fl_pg_pool))
+	r = snprintf(val, size, "%s", o.get_pool_name(
+		       in->layout.fl_pg_pool).c_str());
+      else
+	r = snprintf(val, size, "%" PRIu64, (uint64_t)in->layout.fl_pg_pool);
+    });
   return r;
 }
 size_t Client::_vxattrcb_dir_entries(Inode *in, char *val, size_t size)
@@ -10303,9 +10292,8 @@ int Client::_create(Inode *dir, const char *name, int flags, mode_t mode,
 
   int64_t pool_id = -1;
   if (data_pool && *data_pool) {
-    const OSDMap * osdmap = objecter->get_osdmap_read();
-    pool_id = osdmap->lookup_pg_pool_name(data_pool);
-    objecter->put_osdmap_read();
+    pool_id = objecter->with_osdmap(
+      std::mem_fn(&OSDMap::lookup_pg_pool_name), data_pool);
     if (pool_id < 0)
       return -EINVAL;
     if (pool_id > 0xffffffffll)
@@ -10872,29 +10860,25 @@ out:
 int Client::ll_num_osds(void)
 {
   Mutex::Locker lock(client_lock);
-  const OSDMap *osdmap = objecter->get_osdmap_read();
-  int ret = osdmap->get_num_osds();
-  objecter->put_osdmap_read();
-  return ret;
+  return objecter->with_osdmap(std::mem_fn(&OSDMap::get_num_osds));
 }
 
 int Client::ll_osdaddr(int osd, uint32_t *addr)
 {
   Mutex::Locker lock(client_lock);
-  const OSDMap *osdmap = objecter->get_osdmap_read();
-  bool exists = osdmap->exists(osd);
   entity_addr_t g;
-  if (exists)
-    g = osdmap->get_addr(osd);
-  objecter->put_osdmap_read();
-  if (!exists) {
+  bool exists = objecter->with_osdmap([&](const OSDMap& o) {
+      if (!o.exists(osd))
+	return false;
+      g = o.get_addr(osd);
+      return true;
+    });
+  if (!exists)
     return -1;
-  }
   uint32_t nb_addr = (g.in4_addr()).sin_addr.s_addr;
   *addr = ntohl(nb_addr);
   return 0;
 }
-
 uint32_t Client::ll_stripe_unit(Inode *in)
 {
   Mutex::Locker lock(client_lock);
@@ -10942,15 +10926,15 @@ int Client::ll_get_stripe_osd(Inode *in, uint64_t blockno,
   uint64_t objectno = objectsetno * stripe_count + stripepos;  // object id
 
   object_t oid = file_object_t(ino, objectno);
-  const OSDMap *osdmap = objecter->get_osdmap_read();
-  ceph_object_layout olayout = osdmap->file_to_object_layout(oid, *layout, "");
-  objecter->put_osdmap_read();
-
-  pg_t pg = (pg_t)olayout.ol_pgid;
-  vector<int> osds;
-  int primary;
-  osdmap->pg_to_osds(pg, &osds, &primary);
-  return osds[0];
+  return objecter->with_osdmap([&](const OSDMap& o) {
+      ceph_object_layout olayout =
+	o.file_to_object_layout(oid, *layout, string());
+      pg_t pg = (pg_t)olayout.ol_pgid;
+      vector<int> osds;
+      int primary;
+      o.pg_to_osds(pg, &osds, &primary);
+      return osds[0];
+    });
 }
 
 /* Return the offset of the block, internal to the object */
@@ -11610,34 +11594,24 @@ int Client::fdescribe_layout(int fd, ceph_file_layout *lp)
 int64_t Client::get_pool_id(const char *pool_name)
 {
   Mutex::Locker lock(client_lock);
-  const OSDMap *osdmap = objecter->get_osdmap_read();
-  int64_t pool = osdmap->lookup_pg_pool_name(pool_name);
-  objecter->put_osdmap_read();
-  return pool;
+  return objecter->with_osdmap(std::mem_fn(&OSDMap::lookup_pg_pool_name),
+			       pool_name);
 }
 
 string Client::get_pool_name(int64_t pool)
 {
   Mutex::Locker lock(client_lock);
-  const OSDMap *osdmap = objecter->get_osdmap_read();
-  string ret;
-  if (osdmap->have_pg_pool(pool))
-    ret = osdmap->get_pool_name(pool);
-  objecter->put_osdmap_read();
-  return ret;
+  return objecter->with_osdmap([pool](const OSDMap& o) {
+      return o.have_pg_pool(pool) ? o.get_pool_name(pool) : string();
+    });
 }
 
 int Client::get_pool_replication(int64_t pool)
 {
   Mutex::Locker lock(client_lock);
-  const OSDMap *osdmap = objecter->get_osdmap_read();
-  int ret;
-  if (!osdmap->have_pg_pool(pool))
-    ret = -ENOENT;
-  else
-    ret = osdmap->get_pg_pool(pool)->get_size();
-  objecter->put_osdmap_read();
-  return ret;
+  return objecter->with_osdmap([pool](const OSDMap& o) {
+      return o.have_pg_pool(pool) ? o.get_pg_pool(pool)->get_size() : -ENOENT;
+    });
 }
 
 int Client::get_file_extent_osds(int fd, loff_t off, loff_t *len, vector<int>& osds)
@@ -11653,10 +11627,10 @@ int Client::get_file_extent_osds(int fd, loff_t off, loff_t *len, vector<int>& o
   Striper::file_to_extents(cct, in->ino, &in->layout, off, 1, in->truncate_size, extents);
   assert(extents.size() == 1);
 
-  const OSDMap *osdmap = objecter->get_osdmap_read();
-  pg_t pg = osdmap->object_locator_to_pg(extents[0].oid, extents[0].oloc);
-  osdmap->pg_to_acting_osds(pg, osds);
-  objecter->put_osdmap_read();
+  objecter->with_osdmap([&](const OSDMap& o) {
+      pg_t pg = o.object_locator_to_pg(extents[0].oid, extents[0].oloc);
+      o.pg_to_acting_osds(pg, osds);
+    });
 
   if (osds.empty())
     return -EINVAL;
@@ -11688,13 +11662,13 @@ int Client::get_osd_crush_location(int id, vector<pair<string, string> >& path)
   Mutex::Locker lock(client_lock);
   if (id < 0)
     return -EINVAL;
-  const OSDMap *osdmap = objecter->get_osdmap_read();
-  int ret = osdmap->crush->get_full_location_ordered(id, path);
-  objecter->put_osdmap_read();
-  return ret;
+  return objecter->with_osdmap([&](const OSDMap& o) {
+      return o.crush->get_full_location_ordered(id, path);
+    });
 }
 
-int Client::get_file_stripe_address(int fd, loff_t offset, vector<entity_addr_t>& address)
+int Client::get_file_stripe_address(int fd, loff_t offset,
+				    vector<entity_addr_t>& address)
 {
   Mutex::Locker lock(client_lock);
 
@@ -11705,38 +11679,35 @@ int Client::get_file_stripe_address(int fd, loff_t offset, vector<entity_addr_t>
 
   // which object?
   vector<ObjectExtent> extents;
-  Striper::file_to_extents(cct, in->ino, &in->layout, offset, 1, in->truncate_size, extents);
+  Striper::file_to_extents(cct, in->ino, &in->layout, offset, 1,
+			   in->truncate_size, extents);
   assert(extents.size() == 1);
 
   // now we have the object and its 'layout'
-  const OSDMap *osdmap = objecter->get_osdmap_read();
-  pg_t pg = osdmap->object_locator_to_pg(extents[0].oid, extents[0].oloc);
-  vector<int> osds;
-  osdmap->pg_to_acting_osds(pg, osds);
-  int ret = 0;
-  if (!osds.empty()) {
-    ret = -EINVAL;
-  } else {
-    for (unsigned i = 0; i < osds.size(); i++) {
-      entity_addr_t addr = osdmap->get_addr(osds[i]);
-      address.push_back(addr);
-    }
-  }
-  objecter->put_osdmap_read();
-  return ret;
+  return objecter->with_osdmap([&](const OSDMap& o) {
+      pg_t pg = o.object_locator_to_pg(extents[0].oid, extents[0].oloc);
+      vector<int> osds;
+      o.pg_to_acting_osds(pg, osds);
+      if (osds.empty())
+	return -EINVAL;
+      for (unsigned i = 0; i < osds.size(); i++) {
+	entity_addr_t addr = o.get_addr(osds[i]);
+	address.push_back(addr);
+      }
+      return 0;
+    });
 }
 
 int Client::get_osd_addr(int osd, entity_addr_t& addr)
 {
   Mutex::Locker lock(client_lock);
-  const OSDMap *osdmap = objecter->get_osdmap_read();
-  int ret = 0;
-  if (!osdmap->exists(osd))
-    ret = -ENOENT;
-  else
-    addr = osdmap->get_addr(osd);
-  objecter->put_osdmap_read();
-  return ret;
+  return objecter->with_osdmap([&](const OSDMap& o) {
+      if (!o.exists(osd))
+	return -ENOENT;
+
+      addr = o.get_addr(osd);
+      return 0;
+    });
 }
 
 int Client::enumerate_layout(int fd, vector<ObjectExtent>& result,
@@ -11763,12 +11734,12 @@ int Client::enumerate_layout(int fd, vector<ObjectExtent>& result,
 int Client::get_local_osd()
 {
   Mutex::Locker lock(client_lock);
-  const OSDMap *osdmap = objecter->get_osdmap_read();
-  if (osdmap->get_epoch() != local_osd_epoch) {
-    local_osd = osdmap->find_osd_on_ip(messenger->get_myaddr());
-    local_osd_epoch = osdmap->get_epoch();
-  }
-  objecter->put_osdmap_read();
+  objecter->with_osdmap([this](const OSDMap& o) {
+      if (o.get_epoch() != local_osd_epoch) {
+	local_osd = o.find_osd_on_ip(messenger->get_myaddr());
+	local_osd_epoch = o.get_epoch();
+      }
+    });
   return local_osd;
 }
 
@@ -12233,6 +12204,8 @@ const char** Client::get_tracked_conf_keys() const
 void Client::handle_conf_change(const struct md_config_t *conf,
 				const std::set <std::string> &changed)
 {
+  Mutex::Locker lock(client_lock);
+
   if (changed.count("client_cache_size") ||
       changed.count("client_cache_mid")) {
     lru.lru_set_max(cct->_conf->client_cache_size);
diff --git a/src/client/Client.h b/src/client/Client.h
index a3415b0..ccee486 100644
--- a/src/client/Client.h
+++ b/src/client/Client.h
@@ -118,7 +118,7 @@ struct DirEntry {
   string d_name;
   struct stat st;
   int stmask;
-  DirEntry(const string &s) : d_name(s), stmask(0) {}
+  explicit DirEntry(const string &s) : d_name(s), stmask(0) {}
   DirEntry(const string &n, struct stat& s, int stm) : d_name(n), st(s), stmask(stm) {}
 };
 
@@ -192,7 +192,7 @@ struct dir_result_t {
 
   string at_cache_name;  // last entry we successfully returned
 
-  dir_result_t(Inode *in);
+  explicit dir_result_t(Inode *in);
 
   frag_t frag() { return frag_t(offset >> SHIFT); }
   unsigned fragpos() { return offset & MASK; }
@@ -231,7 +231,7 @@ class Client : public Dispatcher, public md_config_obs_t {
   class CommandHook : public AdminSocketHook {
     Client *m_client;
   public:
-    CommandHook(Client *client);
+    explicit CommandHook(Client *client);
     bool call(std::string command, cmdmap_t &cmdmap, std::string format,
 	      bufferlist& out);
   };
diff --git a/src/client/ClientSnapRealm.h b/src/client/ClientSnapRealm.h
index 040430a..ccb129d 100644
--- a/src/client/ClientSnapRealm.h
+++ b/src/client/ClientSnapRealm.h
@@ -31,7 +31,7 @@ private:
 public:
   xlist<Inode*> inodes_with_caps;
 
-  SnapRealm(inodeno_t i) : 
+  explicit SnapRealm(inodeno_t i) :
     ino(i), nref(0), created(0), seq(0),
     pparent(NULL) { }
 
diff --git a/src/client/Dir.h b/src/client/Dir.h
index 0e6d28b..a7f484d 100644
--- a/src/client/Dir.h
+++ b/src/client/Dir.h
@@ -11,7 +11,7 @@ class Dir {
   uint64_t release_count;
   uint64_t ordered_count;
 
-  Dir(Inode* in) : release_count(0), ordered_count(0) { parent_inode = in; }
+  explicit Dir(Inode* in) : release_count(0), ordered_count(0) { parent_inode = in; }
 
   bool is_empty() {  return dentries.empty(); }
 };
diff --git a/src/client/Inode.h b/src/client/Inode.h
index bd10ad7..dd2ad99 100644
--- a/src/client/Inode.h
+++ b/src/client/Inode.h
@@ -66,7 +66,7 @@ struct CapSnap {
   uint64_t flush_tid;
   xlist<CapSnap*>::item flushing_item;
 
-  CapSnap(Inode *i)
+  explicit CapSnap(Inode *i)
     : in(i), issued(0), dirty(0),
       size(0), time_warp_seq(0), mode(0), uid(0), gid(0), xattr_version(0),
       inline_version(0), writing(false), dirty_data(false), flush_tid(0),
@@ -95,7 +95,7 @@ private:
   }
   ~QuotaTree() {}
 public:
-  QuotaTree(Inode *i) :
+  explicit QuotaTree(Inode *i) :
     _in(i),
     _ancestor_ref(0),
     _ancestor(NULL),
diff --git a/src/client/Makefile.am b/src/client/Makefile.am
index 31ca60c..fa0dc80 100644
--- a/src/client/Makefile.am
+++ b/src/client/Makefile.am
@@ -8,7 +8,7 @@ libclient_la_SOURCES = \
 	client/MetaSession.cc \
 	client/Trace.cc \
 	client/posix_acl.cc
-libclient_la_LIBADD = $(LIBOSDC) $(LIBEDIT_LIBS)
+libclient_la_LIBADD = $(LIBOSDC)
 noinst_LTLIBRARIES += libclient.la
 
 noinst_HEADERS += \
diff --git a/src/client/MetaRequest.h b/src/client/MetaRequest.h
index 7a6bd3f..d21e5c5 100644
--- a/src/client/MetaRequest.h
+++ b/src/client/MetaRequest.h
@@ -81,7 +81,7 @@ public:
 
   InodeRef target;
 
-  MetaRequest(int op) :
+  explicit MetaRequest(int op) :
     _dentry(NULL), _old_dentry(NULL), abort_rc(0),
     tid(0),
     inode_drop(0), inode_unless(0),
diff --git a/src/client/ObjecterWriteback.h b/src/client/ObjecterWriteback.h
index 8acd3f7..cb27723 100644
--- a/src/client/ObjecterWriteback.h
+++ b/src/client/ObjecterWriteback.h
@@ -43,6 +43,7 @@ class ObjecterWriteback : public WritebackHandler {
   }
 
   virtual bool can_scattered_write() { return true; }
+  using WritebackHandler::write;
   virtual ceph_tid_t write(const object_t& oid, const object_locator_t& oloc,
                            vector<pair<uint64_t, bufferlist> >& io_vec,
 			   const SnapContext& snapc, ceph::real_time mtime,
diff --git a/src/client/SyntheticClient.cc b/src/client/SyntheticClient.cc
index 9d1888b..8a84a48 100644
--- a/src/client/SyntheticClient.cc
+++ b/src/client/SyntheticClient.cc
@@ -1714,19 +1714,15 @@ int SyntheticClient::dump_placement(string& fn) {
   // run through all the object extents
   dout(0) << "file size is " << filesize << dendl;
   dout(0) << "(osd, start, length) tuples for file " << fn << dendl;
-  for (vector<ObjectExtent>::iterator i = extents.begin(); 
-       i != extents.end(); ++i) {
-    
-    const OSDMap *osdmap = client->objecter->get_osdmap_read();
-    int osd = osdmap->get_pg_acting_primary(osdmap->object_locator_to_pg(i->oid, i->oloc));
-    client->objecter->put_osdmap_read();
+  for (const auto& x : extents) {
+    int osd = client->objecter->with_osdmap([&](const OSDMap& o) {
+	return o.get_pg_acting_primary(o.object_locator_to_pg(x.oid, x.oloc));
+      });
 
     // run through all the buffer extents
-    for (vector<pair<uint64_t, uint64_t> >::iterator j = i->buffer_extents.begin();
-	 j != i->buffer_extents.end(); ++j) {
-      dout(0) << "OSD " << osd << ", offset " << (*j).first
-	      << ", length " << (*j).second << dendl;    
-    }
+    for (const auto& be : x.buffer_extents)
+      dout(0) << "OSD " << osd << ", offset " << be.first
+	      << ", length " << be.second << dendl;
   }
   return 0;
 }
@@ -1999,12 +1995,11 @@ int SyntheticClient::overload_osd_0(int n, int size, int wrsize) {
 int SyntheticClient::check_first_primary(int fh)
 {
   vector<ObjectExtent> extents;
-  client->enumerate_layout(fh, extents, 1, 0);  
-  const OSDMap *osdmap = client->objecter->get_osdmap_read();
-  int primary = osdmap->get_pg_acting_primary(osdmap->object_locator_to_pg(extents.begin()->oid,
-									   extents.begin()->oloc));
-  client->objecter->put_osdmap_read();
-  return primary;
+  client->enumerate_layout(fh, extents, 1, 0);
+  return client->objecter->with_osdmap([&](const OSDMap& o) {
+      return o.get_pg_acting_primary(
+	o.object_locator_to_pg(extents.begin()->oid, extents.begin()->oloc));
+    });
 }
 
 int SyntheticClient::rm_file(string& fn)
diff --git a/src/client/Trace.h b/src/client/Trace.h
index 24145a8..7da2eaf 100644
--- a/src/client/Trace.h
+++ b/src/client/Trace.h
@@ -39,7 +39,7 @@ class Trace {
   string line;
 
  public:
-  Trace(const char* f) : _line(0), filename(f), fs(0) {}
+  explicit Trace(const char* f) : _line(0), filename(f), fs(0) {}
   ~Trace() { 
     delete fs; 
   }
diff --git a/src/cls/journal/cls_journal.cc b/src/cls/journal/cls_journal.cc
index 0f2f3e4..f87c2b2 100644
--- a/src/cls/journal/cls_journal.cc
+++ b/src/cls/journal/cls_journal.cc
@@ -24,32 +24,61 @@ cls_method_handle_t h_journal_get_minimum_set;
 cls_method_handle_t h_journal_set_minimum_set;
 cls_method_handle_t h_journal_get_active_set;
 cls_method_handle_t h_journal_set_active_set;
+cls_method_handle_t h_journal_get_client;
 cls_method_handle_t h_journal_client_register;
+cls_method_handle_t h_journal_client_update;
 cls_method_handle_t h_journal_client_unregister;
 cls_method_handle_t h_journal_client_commit;
 cls_method_handle_t h_journal_client_list;
+cls_method_handle_t h_journal_get_next_tag_tid;
+cls_method_handle_t h_journal_get_tag;
+cls_method_handle_t h_journal_tag_create;
+cls_method_handle_t h_journal_tag_list;
 cls_method_handle_t h_journal_object_guard_append;
 
 namespace {
 
 static const uint64_t MAX_KEYS_READ = 64;
 
-static const std::string HEADER_KEY_ORDER         = "order";
-static const std::string HEADER_KEY_SPLAY_WIDTH   = "splay_width";
-static const std::string HEADER_KEY_POOL_ID       = "pool_id";
-static const std::string HEADER_KEY_MINIMUM_SET   = "minimum_set";
-static const std::string HEADER_KEY_ACTIVE_SET    = "active_set";
-static const std::string HEADER_KEY_CLIENT_PREFIX = "client_";
+static const std::string HEADER_KEY_ORDER          = "order";
+static const std::string HEADER_KEY_SPLAY_WIDTH    = "splay_width";
+static const std::string HEADER_KEY_POOL_ID        = "pool_id";
+static const std::string HEADER_KEY_MINIMUM_SET    = "minimum_set";
+static const std::string HEADER_KEY_ACTIVE_SET     = "active_set";
+static const std::string HEADER_KEY_NEXT_TAG_TID   = "next_tag_tid";
+static const std::string HEADER_KEY_NEXT_TAG_CLASS = "next_tag_class";
+static const std::string HEADER_KEY_CLIENT_PREFIX  = "client_";
+static const std::string HEADER_KEY_TAG_PREFIX     = "tag_";
+
+std::string to_hex(uint64_t value) {
+  std::ostringstream oss;
+  oss << std::setw(16) << std::setfill('0') << std::hex << value;
+  return oss.str();
+}
+
+std::string key_from_client_id(const std::string &client_id) {
+  return HEADER_KEY_CLIENT_PREFIX + client_id;
+}
 
-static void key_from_client_id(const std::string &client_id, string *key) {
-  *key = HEADER_KEY_CLIENT_PREFIX + client_id;
+std::string key_from_tag_tid(uint64_t tag_tid) {
+  return HEADER_KEY_TAG_PREFIX + to_hex(tag_tid);
+}
+
+uint64_t tag_tid_from_key(const std::string &key) {
+  std::istringstream iss(key);
+  uint64_t id;
+  iss.ignore(HEADER_KEY_TAG_PREFIX.size()) >> std::hex >> id;
+  return id;
 }
 
 template <typename T>
-int read_key(cls_method_context_t hctx, const string &key, T *t) {
+int read_key(cls_method_context_t hctx, const string &key, T *t,
+             bool ignore_enoent = false) {
   bufferlist bl;
   int r = cls_cxx_map_get_val(hctx, key, &bl);
-  if (r < 0) {
+  if (r == -ENOENT && ignore_enoent) {
+    return 0;
+  } else if (r < 0) {
     CLS_ERR("failed to get omap key: %s", key.c_str());
     return r;
   }
@@ -77,6 +106,123 @@ int write_key(cls_method_context_t hctx, const string &key, const T &t) {
   return 0;
 }
 
+int remove_key(cls_method_context_t hctx, const string &key) {
+  int r = cls_cxx_map_remove_key(hctx, key);
+  if (r < 0 && r != -ENOENT) {
+      CLS_ERR("failed to remove key: %s", key.c_str());
+      return r;
+  }
+  return 0;
+}
+
+int expire_tags(cls_method_context_t hctx, const std::string *skip_client_id) {
+
+  std::string skip_client_key;
+  if (skip_client_id != nullptr) {
+    skip_client_key = key_from_client_id(*skip_client_id);
+  }
+
+  int r;
+  uint64_t minimum_tag_tid = std::numeric_limits<uint64_t>::max();
+  std::string last_read = HEADER_KEY_CLIENT_PREFIX;
+  do {
+    std::map<std::string, bufferlist> vals;
+    r = cls_cxx_map_get_vals(hctx, last_read, HEADER_KEY_CLIENT_PREFIX,
+                             MAX_KEYS_READ, &vals);
+    if (r < 0 && r != -ENOENT) {
+      CLS_ERR("failed to retrieve registered clients: %s",
+              cpp_strerror(r).c_str());
+      return r;
+    }
+
+    for (auto &val : vals) {
+      // if we are removing a client, skip its commit positions
+      if (val.first == skip_client_key) {
+        continue;
+      }
+
+      cls::journal::Client client;
+      bufferlist::iterator iter = val.second.begin();
+      try {
+        ::decode(client, iter);
+      } catch (const buffer::error &err) {
+        CLS_ERR("error decoding registered client: %s",
+                val.first.c_str());
+        return -EIO;
+      }
+
+      // cannot expire tags if a client hasn't committed yet
+      if (client.commit_position.entry_positions.empty()) {
+        return 0;
+      }
+
+      for (auto entry_position : client.commit_position.entry_positions) {
+        minimum_tag_tid = MIN(minimum_tag_tid, entry_position.tag_tid);
+      }
+    }
+    if (!vals.empty()) {
+      last_read = vals.rbegin()->first;
+    }
+  } while (r == MAX_KEYS_READ);
+
+  // compute the minimum in-use tag for each class
+  std::map<uint64_t, uint64_t> minimum_tag_class_to_tids;
+  typedef enum { TAG_PASS_CALCULATE_MINIMUMS,
+                 TAG_PASS_SCRUB,
+                 TAG_PASS_DONE } TagPass;
+  int tag_pass = TAG_PASS_CALCULATE_MINIMUMS;
+  last_read = HEADER_KEY_TAG_PREFIX;
+  do {
+    std::map<std::string, bufferlist> vals;
+    r = cls_cxx_map_get_vals(hctx, last_read, HEADER_KEY_TAG_PREFIX,
+                             MAX_KEYS_READ, &vals);
+    if (r < 0 && r != -ENOENT) {
+      CLS_ERR("failed to retrieve tags: %s", cpp_strerror(r).c_str());
+      return r;
+    }
+
+    for (auto &val : vals) {
+      cls::journal::Tag tag;
+      bufferlist::iterator iter = val.second.begin();
+      try {
+        ::decode(tag, iter);
+      } catch (const buffer::error &err) {
+        CLS_ERR("error decoding tag: %s", val.first.c_str());
+        return -EIO;
+      }
+
+      if (tag.tid != tag_tid_from_key(val.first)) {
+        CLS_ERR("tag tid mismatched: %s", val.first.c_str());
+        return -EINVAL;
+      }
+
+      if (tag_pass == TAG_PASS_CALCULATE_MINIMUMS) {
+        minimum_tag_class_to_tids[tag.tag_class] = tag.tid;
+      } else if (tag_pass == TAG_PASS_SCRUB &&
+                 tag.tid < minimum_tag_class_to_tids[tag.tag_class]) {
+        r = remove_key(hctx, val.first);
+        if (r < 0) {
+          return r;
+        }
+      }
+
+      if (tag.tid >= minimum_tag_tid) {
+        // no need to check for tag classes beyond this point
+        vals.clear();
+        break;
+      }
+    }
+
+    if (tag_pass != TAG_PASS_DONE && vals.size() < MAX_KEYS_READ) {
+      last_read = HEADER_KEY_TAG_PREFIX;
+      ++tag_pass;
+    } else if (!vals.empty()) {
+      last_read = vals.rbegin()->first;
+    }
+  } while (tag_pass != TAG_PASS_DONE);
+  return 0;
+}
+
 } // anonymous namespace
 
 /**
@@ -133,6 +279,17 @@ int journal_create(cls_method_context_t hctx, bufferlist *in, bufferlist *out) {
   if (r < 0) {
     return r;
   }
+
+  uint64_t tag_id = 0;
+  r = write_key(hctx, HEADER_KEY_NEXT_TAG_TID, tag_id);
+  if (r < 0) {
+    return r;
+  }
+
+  r = write_key(hctx, HEADER_KEY_NEXT_TAG_CLASS, tag_id);
+  if (r < 0) {
+    return r;
+  }
   return 0;
 }
 
@@ -341,7 +498,37 @@ int journal_set_active_set(cls_method_context_t hctx, bufferlist *in,
 /**
  * Input:
  * @param id (string) - unique client id
- * @param description (string) - human-readable description of the client
+ *
+ * Output:
+ * cls::journal::Client
+ * @returns 0 on success, negative error code on failure
+ */
+int journal_get_client(cls_method_context_t hctx, bufferlist *in,
+                       bufferlist *out) {
+  std::string id;
+  try {
+    bufferlist::iterator iter = in->begin();
+    ::decode(id, iter);
+  } catch (const buffer::error &err) {
+    CLS_ERR("failed to decode input parameters: %s", err.what());
+    return -EINVAL;
+  }
+
+  std::string key(key_from_client_id(id));
+  cls::journal::Client client;
+  int r = read_key(hctx, key, &client);
+  if (r < 0) {
+    return r;
+  }
+
+  ::encode(client, *out);
+  return 0;
+}
+
+/**
+ * Input:
+ * @param id (string) - unique client id
+ * @param data (bufferlist) - opaque data associated to client
  *
  * Output:
  * @returns 0 on success, negative error code on failure
@@ -349,19 +536,17 @@ int journal_set_active_set(cls_method_context_t hctx, bufferlist *in,
 int journal_client_register(cls_method_context_t hctx, bufferlist *in,
                             bufferlist *out) {
   std::string id;
-  std::string description;
+  bufferlist data;
   try {
     bufferlist::iterator iter = in->begin();
     ::decode(id, iter);
-    ::decode(description, iter);
+    ::decode(data, iter);
   } catch (const buffer::error &err) {
     CLS_ERR("failed to decode input parameters: %s", err.what());
     return -EINVAL;
   }
 
-  std::string key;
-  key_from_client_id(id, &key);
-
+  std::string key(key_from_client_id(id));
   bufferlist stored_clientbl;
   int r = cls_cxx_map_get_val(hctx, key, &stored_clientbl);
   if (r != -ENOENT) {
@@ -369,7 +554,44 @@ int journal_client_register(cls_method_context_t hctx, bufferlist *in,
     return -EEXIST;
   }
 
-  cls::journal::Client client(id, description);
+  cls::journal::Client client(id, data);
+  key = key_from_client_id(id);
+  r = write_key(hctx, key, client);
+  if (r < 0) {
+    return r;
+  }
+  return 0;
+}
+
+/**
+ * Input:
+ * @param id (string) - unique client id
+ * @param data (bufferlist) - opaque data associated to client
+ *
+ * Output:
+ * @returns 0 on success, negative error code on failure
+ */
+int journal_client_update(cls_method_context_t hctx, bufferlist *in,
+                          bufferlist *out) {
+  std::string id;
+  bufferlist data;
+  try {
+    bufferlist::iterator iter = in->begin();
+    ::decode(id, iter);
+    ::decode(data, iter);
+  } catch (const buffer::error &err) {
+    CLS_ERR("failed to decode input parameters: %s", err.what());
+    return -EINVAL;
+  }
+
+  std::string key(key_from_client_id(id));
+  cls::journal::Client client;
+  int r = read_key(hctx, key, &client);
+  if (r < 0) {
+    return r;
+  }
+
+  client.data = data;
   r = write_key(hctx, key, client);
   if (r < 0) {
     return r;
@@ -395,9 +617,7 @@ int journal_client_unregister(cls_method_context_t hctx, bufferlist *in,
     return -EINVAL;
   }
 
-  std::string key;
-  key_from_client_id(id, &key);
-
+  std::string key(key_from_client_id(id));
   bufferlist bl;
   int r = cls_cxx_map_get_val(hctx, key, &bl);
   if (r < 0) {
@@ -410,6 +630,12 @@ int journal_client_unregister(cls_method_context_t hctx, bufferlist *in,
     CLS_ERR("failed to remove omap key: %s", key.c_str());
     return r;
   }
+
+  // prune expired tags
+  r = expire_tags(hctx, &id);
+  if (r < 0) {
+    return r;
+  }
   return 0;
 }
 
@@ -444,9 +670,7 @@ int journal_client_commit(cls_method_context_t hctx, bufferlist *in,
     return -EINVAL;
   }
 
-  std::string key;
-  key_from_client_id(id, &key);
-
+  std::string key(key_from_client_id(id));
   cls::journal::Client client;
   r = read_key(hctx, key, &client);
   if (r < 0) {
@@ -489,7 +713,7 @@ int journal_client_list(cls_method_context_t hctx, bufferlist *in,
 
   std::string last_read;
   if (!start_after.empty()) {
-    key_from_client_id(start_after, &last_read);
+    last_read = key_from_client_id(start_after);
   }
 
   std::map<std::string, bufferlist> vals;
@@ -522,6 +746,247 @@ int journal_client_list(cls_method_context_t hctx, bufferlist *in,
 
 /**
  * Input:
+ * none
+ *
+ * Output:
+ * @returns 0 on success, negative error code on failure
+ */
+int journal_get_next_tag_tid(cls_method_context_t hctx, bufferlist *in,
+                             bufferlist *out) {
+  uint64_t tag_tid;
+  int r = read_key(hctx, HEADER_KEY_NEXT_TAG_TID, &tag_tid);
+  if (r < 0) {
+    return r;
+  }
+
+  ::encode(tag_tid, *out);
+  return 0;
+}
+
+/**
+ * Input:
+ * @param tag_tid (uint64_t)
+ *
+ * Output:
+ * cls::journal::Tag
+ * @returns 0 on success, negative error code on failure
+ */
+int journal_get_tag(cls_method_context_t hctx, bufferlist *in,
+                    bufferlist *out) {
+  uint64_t tag_tid;
+  try {
+    bufferlist::iterator iter = in->begin();
+    ::decode(tag_tid, iter);
+  } catch (const buffer::error &err) {
+    CLS_ERR("failed to decode input parameters: %s", err.what());
+    return -EINVAL;
+  }
+
+  std::string key(key_from_tag_tid(tag_tid));
+  cls::journal::Tag tag;
+  int r = read_key(hctx, key, &tag);
+  if (r < 0) {
+    return r;
+  }
+
+  ::encode(tag, *out);
+  return 0;
+}
+
+/**
+ * Input:
+ * @param tag_tid (uint64_t)
+ * @param tag_class (uint64_t)
+ * @param data (bufferlist)
+ *
+ * Output:
+ * @returns 0 on success, negative error code on failure
+ */
+int journal_tag_create(cls_method_context_t hctx, bufferlist *in,
+                       bufferlist *out) {
+  uint64_t tag_tid;
+  uint64_t tag_class;
+  bufferlist data;
+  try {
+    bufferlist::iterator iter = in->begin();
+    ::decode(tag_tid, iter);
+    ::decode(tag_class, iter);
+    ::decode(data, iter);
+  } catch (const buffer::error &err) {
+    CLS_ERR("failed to decode input parameters: %s", err.what());
+    return -EINVAL;
+  }
+
+  std::string key(key_from_tag_tid(tag_tid));
+  bufferlist stored_tag_bl;
+  int r = cls_cxx_map_get_val(hctx, key, &stored_tag_bl);
+  if (r != -ENOENT) {
+    CLS_ERR("duplicate tag id: %" PRIu64, tag_tid);
+    return -EEXIST;
+  }
+
+  // verify tag tid ordering
+  uint64_t next_tag_tid;
+  r = read_key(hctx, HEADER_KEY_NEXT_TAG_TID, &next_tag_tid);
+  if (r < 0) {
+    return r;
+  }
+  if (tag_tid != next_tag_tid) {
+    CLS_LOG(5, "out-of-order tag sequence: %" PRIu64, tag_tid);
+    return -ESTALE;
+  }
+
+  uint64_t next_tag_class;
+  r = read_key(hctx, HEADER_KEY_NEXT_TAG_CLASS, &next_tag_class);
+  if (r < 0) {
+    return r;
+  }
+
+  if (tag_class == cls::journal::Tag::TAG_CLASS_NEW) {
+    // allocate a new tag class
+    tag_class = next_tag_class;
+    r = write_key(hctx, HEADER_KEY_NEXT_TAG_CLASS, tag_class + 1);
+    if (r < 0) {
+      return r;
+    }
+  } else {
+    // verify tag class range
+    if (tag_class >= next_tag_class) {
+      CLS_ERR("out-of-sequence tag class: %" PRIu64, tag_class);
+      return -EINVAL;
+    }
+  }
+
+  // prune expired tags
+  r = expire_tags(hctx, nullptr);
+  if (r < 0) {
+    return r;
+  }
+
+  // update tag tid sequence
+  r = write_key(hctx, HEADER_KEY_NEXT_TAG_TID, tag_tid + 1);
+  if (r < 0) {
+    return r;
+  }
+
+  // write tag structure
+  cls::journal::Tag tag(tag_tid, tag_class, data);
+  key = key_from_tag_tid(tag_tid);
+  r = write_key(hctx, key, tag);
+  if (r < 0) {
+    return r;
+  }
+  return 0;
+}
+
+/**
+ * Input:
+ * @param start_after_tag_tid (uint64_t) - first tag tid
+ * @param max_return (uint64_t) - max tags to return
+ * @param client_id (std::string) - client id filter
+ * @param tag_class (boost::optional<uint64_t> - optional tag class filter
+ *
+ * Output:
+ * std::set<cls::journal::Tag> - collection of tags
+ * @returns 0 on success, negative error code on failure
+ */
+int journal_tag_list(cls_method_context_t hctx, bufferlist *in,
+                     bufferlist *out) {
+  uint64_t start_after_tag_tid;
+  uint64_t max_return;
+  std::string client_id;
+  boost::optional<uint64_t> tag_class(0);
+
+  // handle compiler false positive about use-before-init
+  tag_class = boost::none;
+  try {
+    bufferlist::iterator iter = in->begin();
+    ::decode(start_after_tag_tid, iter);
+    ::decode(max_return, iter);
+    ::decode(client_id, iter);
+    ::decode(tag_class, iter);
+  } catch (const buffer::error &err) {
+    CLS_ERR("failed to decode input parameters: %s", err.what());
+    return -EINVAL;
+  }
+
+  // calculate the minimum tag within client's commit position
+  uint64_t minimum_tag_tid = std::numeric_limits<uint64_t>::max();
+  cls::journal::Client client;
+  int r = read_key(hctx, key_from_client_id(client_id), &client);
+  if (r < 0) {
+    return r;
+  }
+
+  for (auto entry_position : client.commit_position.entry_positions) {
+    minimum_tag_tid = MIN(minimum_tag_tid, entry_position.tag_tid);
+  }
+
+  // compute minimum tags in use per-class
+  std::set<cls::journal::Tag> tags;
+  std::map<uint64_t, uint64_t> minimum_tag_class_to_tids;
+  typedef enum { TAG_PASS_CALCULATE_MINIMUMS,
+                 TAG_PASS_LIST,
+                 TAG_PASS_DONE } TagPass;
+  int tag_pass = (client.commit_position.entry_positions.empty() ?
+    TAG_PASS_LIST : TAG_PASS_CALCULATE_MINIMUMS);
+  std::string last_read = HEADER_KEY_TAG_PREFIX;
+  do {
+    std::map<std::string, bufferlist> vals;
+    r = cls_cxx_map_get_vals(hctx, last_read, HEADER_KEY_TAG_PREFIX,
+                             MAX_KEYS_READ, &vals);
+    if (r < 0 && r != -ENOENT) {
+      CLS_ERR("failed to retrieve tags: %s", cpp_strerror(r).c_str());
+      return r;
+    }
+
+    for (auto &val : vals) {
+      cls::journal::Tag tag;
+      bufferlist::iterator iter = val.second.begin();
+      try {
+        ::decode(tag, iter);
+      } catch (const buffer::error &err) {
+        CLS_ERR("error decoding tag: %s", val.first.c_str());
+        return -EIO;
+      }
+
+      if (tag_pass == TAG_PASS_CALCULATE_MINIMUMS) {
+        minimum_tag_class_to_tids[tag.tag_class] = tag.tid;
+
+        // completed calculation of tag class minimums
+        if (tag.tid >= minimum_tag_tid) {
+          vals.clear();
+          break;
+        }
+      } else if (tag_pass == TAG_PASS_LIST) {
+        if (start_after_tag_tid != 0 && tag.tid <= start_after_tag_tid) {
+          continue;
+        }
+
+        if (tag.tid >= minimum_tag_class_to_tids[tag.tag_class] &&
+            (!tag_class || *tag_class == tag.tag_class)) {
+          tags.insert(tag);
+        }
+        if (tags.size() >= max_return) {
+          tag_pass = TAG_PASS_DONE;
+        }
+      }
+    }
+
+    if (tag_pass != TAG_PASS_DONE && vals.size() < MAX_KEYS_READ) {
+      last_read = HEADER_KEY_TAG_PREFIX;
+      ++tag_pass;
+    } else if (!vals.empty()) {
+      last_read = vals.rbegin()->first;
+    }
+  } while (tag_pass != TAG_PASS_DONE);
+
+  ::encode(tags, *out);
+  return 0;
+}
+
+/**
+ * Input:
  * @param soft_max_size (uint64_t)
  *
  * Output:
@@ -597,9 +1062,16 @@ void CEPH_CLS_API __cls_init()
                           CLS_METHOD_RD | CLS_METHOD_WR,
                           journal_set_active_set,
                           &h_journal_set_active_set);
+
+  cls_register_cxx_method(h_class, "get_client",
+                          CLS_METHOD_RD,
+                          journal_get_client, &h_journal_get_client);
   cls_register_cxx_method(h_class, "client_register",
                           CLS_METHOD_RD | CLS_METHOD_WR,
                           journal_client_register, &h_journal_client_register);
+  cls_register_cxx_method(h_class, "client_update",
+                          CLS_METHOD_RD | CLS_METHOD_WR,
+                          journal_client_update, &h_journal_client_update);
   cls_register_cxx_method(h_class, "client_unregister",
                           CLS_METHOD_RD | CLS_METHOD_WR,
                           journal_client_unregister,
@@ -611,6 +1083,20 @@ void CEPH_CLS_API __cls_init()
                           CLS_METHOD_RD,
                           journal_client_list, &h_journal_client_list);
 
+  cls_register_cxx_method(h_class, "get_next_tag_tid",
+                          CLS_METHOD_RD,
+                          journal_get_next_tag_tid,
+                          &h_journal_get_next_tag_tid);
+  cls_register_cxx_method(h_class, "get_tag",
+                          CLS_METHOD_RD,
+                          journal_get_tag, &h_journal_get_tag);
+  cls_register_cxx_method(h_class, "tag_create",
+                          CLS_METHOD_RD | CLS_METHOD_WR,
+                          journal_tag_create, &h_journal_tag_create);
+  cls_register_cxx_method(h_class, "tag_list",
+                          CLS_METHOD_RD,
+                          journal_tag_list, &h_journal_tag_list);
+
   /// methods for journal_data.$journal_id.$object_id objects
   cls_register_cxx_method(h_class, "guard_append",
                           CLS_METHOD_RD | CLS_METHOD_WR,
diff --git a/src/cls/journal/cls_journal_client.cc b/src/cls/journal/cls_journal_client.cc
index a4a268d..3295964 100644
--- a/src/cls/journal/cls_journal_client.cc
+++ b/src/cls/journal/cls_journal_client.cc
@@ -223,21 +223,72 @@ void set_active_set(librados::ObjectWriteOperation *op, uint64_t object_set) {
   op->exec("journal", "set_active_set", bl);
 }
 
+int get_client(librados::IoCtx &ioctx, const std::string &oid,
+               const std::string &id, cls::journal::Client *client) {
+  librados::ObjectReadOperation op;
+  get_client_start(&op, id);
+
+  bufferlist out_bl;
+  int r = ioctx.operate(oid, &op, &out_bl);
+  if (r < 0) {
+    return r;
+  }
+
+  bufferlist::iterator iter = out_bl.begin();
+  r = get_client_finish(&iter, client);
+  if (r < 0) {
+    return r;
+  }
+  return 0;
+}
+
+void get_client_start(librados::ObjectReadOperation *op,
+                      const std::string &id) {
+  bufferlist bl;
+  ::encode(id, bl);
+  op->exec("journal", "get_client", bl);
+}
+
+int get_client_finish(bufferlist::iterator *iter,
+                      cls::journal::Client *client) {
+  try {
+    ::decode(*client, *iter);
+  } catch (const buffer::error &err) {
+    return -EBADMSG;
+  }
+  return 0;
+}
+
+int client_register(librados::IoCtx &ioctx, const std::string &oid,
+                    const std::string &id, const bufferlist &data) {
+  librados::ObjectWriteOperation op;
+  client_register(&op, id, data);
+  return ioctx.operate(oid, &op);
+}
+
 void client_register(librados::ObjectWriteOperation *op,
-                     const std::string &id, const std::string &description) {
+                     const std::string &id, const bufferlist &data) {
   bufferlist bl;
   ::encode(id, bl);
-  ::encode(description, bl);
+  ::encode(data, bl);
   op->exec("journal", "client_register", bl);
 }
 
-int client_register(librados::IoCtx &ioctx, const std::string &oid,
-                    const std::string &id, const std::string &description) {
+int client_update(librados::IoCtx &ioctx, const std::string &oid,
+                  const std::string &id, const bufferlist &data) {
   librados::ObjectWriteOperation op;
-  client_register(&op, id, description);
+  client_update(&op, id, data);
   return ioctx.operate(oid, &op);
 }
 
+void client_update(librados::ObjectWriteOperation *op,
+                   const std::string &id, const bufferlist &data) {
+  bufferlist bl;
+  ::encode(id, bl);
+  ::encode(data, bl);
+  op->exec("journal", "client_update", bl);
+}
+
 int client_unregister(librados::IoCtx &ioctx, const std::string &oid,
                        const std::string &id) {
   bufferlist inbl;
@@ -263,6 +314,145 @@ int client_list(librados::IoCtx &ioctx, const std::string &oid,
   return cond.wait();
 }
 
+int get_next_tag_tid(librados::IoCtx &ioctx, const std::string &oid,
+                     uint64_t *tag_tid) {
+  librados::ObjectReadOperation op;
+  get_next_tag_tid_start(&op);
+
+  bufferlist out_bl;
+  int r = ioctx.operate(oid, &op, &out_bl);
+  if (r < 0) {
+    return r;
+  }
+
+  bufferlist::iterator iter = out_bl.begin();
+  r = get_next_tag_tid_finish(&iter, tag_tid);
+  if (r < 0) {
+    return r;
+  }
+  return 0;
+}
+
+void get_next_tag_tid_start(librados::ObjectReadOperation *op) {
+  bufferlist bl;
+  op->exec("journal", "get_next_tag_tid", bl);
+}
+
+int get_next_tag_tid_finish(bufferlist::iterator *iter,
+                            uint64_t *tag_tid) {
+  try {
+    ::decode(*tag_tid, *iter);
+  } catch (const buffer::error &err) {
+    return -EBADMSG;
+  }
+  return 0;
+}
+
+int get_tag(librados::IoCtx &ioctx, const std::string &oid,
+            uint64_t tag_tid, cls::journal::Tag *tag) {
+  librados::ObjectReadOperation op;
+  get_tag_start(&op, tag_tid);
+
+  bufferlist out_bl;
+  int r = ioctx.operate(oid, &op, &out_bl);
+  if (r < 0) {
+    return r;
+  }
+
+  bufferlist::iterator iter = out_bl.begin();
+  r = get_tag_finish(&iter, tag);
+  if (r < 0) {
+    return r;
+  }
+  return 0;
+}
+
+void get_tag_start(librados::ObjectReadOperation *op,
+                   uint64_t tag_tid) {
+  bufferlist bl;
+  ::encode(tag_tid, bl);
+  op->exec("journal", "get_tag", bl);
+}
+
+int get_tag_finish(bufferlist::iterator *iter, cls::journal::Tag *tag) {
+  try {
+    ::decode(*tag, *iter);
+  } catch (const buffer::error &err) {
+    return -EBADMSG;
+  }
+  return 0;
+}
+
+int tag_create(librados::IoCtx &ioctx, const std::string &oid,
+               uint64_t tag_tid, uint64_t tag_class,
+               const bufferlist &data) {
+  librados::ObjectWriteOperation op;
+  tag_create(&op, tag_tid, tag_class, data);
+  return ioctx.operate(oid, &op);
+}
+
+void tag_create(librados::ObjectWriteOperation *op, uint64_t tag_tid,
+                uint64_t tag_class, const bufferlist &data) {
+  bufferlist bl;
+  ::encode(tag_tid, bl);
+  ::encode(tag_class, bl);
+  ::encode(data, bl);
+  op->exec("journal", "tag_create", bl);
+}
+
+int tag_list(librados::IoCtx &ioctx, const std::string &oid,
+             const std::string &client_id, boost::optional<uint64_t> tag_class,
+             std::set<cls::journal::Tag> *tags) {
+  tags->clear();
+  uint64_t start_after_tag_tid = 0;
+  while (true) {
+    librados::ObjectReadOperation op;
+    tag_list_start(&op, start_after_tag_tid, JOURNAL_MAX_RETURN, client_id,
+                   tag_class);
+
+    bufferlist out_bl;
+    int r = ioctx.operate(oid, &op, &out_bl);
+    if (r < 0) {
+      return r;
+    }
+
+    bufferlist::iterator iter = out_bl.begin();
+    std::set<cls::journal::Tag> decode_tags;
+    r = tag_list_finish(&iter, &decode_tags);
+    if (r < 0) {
+      return r;
+    }
+
+    tags->insert(decode_tags.begin(), decode_tags.end());
+    if (decode_tags.size() < JOURNAL_MAX_RETURN) {
+      break;
+    }
+  }
+  return 0;
+}
+
+void tag_list_start(librados::ObjectReadOperation *op,
+                    uint64_t start_after_tag_tid, uint64_t max_return,
+                    const std::string &client_id,
+                    boost::optional<uint64_t> tag_class) {
+  bufferlist bl;
+  ::encode(start_after_tag_tid, bl);
+  ::encode(max_return, bl);
+  ::encode(client_id, bl);
+  ::encode(tag_class, bl);
+  op->exec("journal", "tag_list", bl);
+}
+
+int tag_list_finish(bufferlist::iterator *iter,
+                    std::set<cls::journal::Tag> *tags) {
+  try {
+    ::decode(*tags, *iter);
+  } catch (const buffer::error &err) {
+    return -EBADMSG;
+  }
+  return 0;
+}
+
 void guard_append(librados::ObjectWriteOperation *op, uint64_t soft_max_size) {
   bufferlist bl;
   ::encode(soft_max_size, bl);
diff --git a/src/cls/journal/cls_journal_client.h b/src/cls/journal/cls_journal_client.h
index 18ccf7b..37b0143 100644
--- a/src/cls/journal/cls_journal_client.h
+++ b/src/cls/journal/cls_journal_client.h
@@ -10,6 +10,7 @@
 #include <map>
 #include <set>
 #include <string>
+#include <boost/optional.hpp>
 
 class Context;
 
@@ -31,17 +32,64 @@ void get_mutable_metadata(librados::IoCtx &ioctx, const std::string &oid,
 void set_minimum_set(librados::ObjectWriteOperation *op, uint64_t object_set);
 void set_active_set(librados::ObjectWriteOperation *op, uint64_t object_set);
 
-void client_register(librados::ObjectWriteOperation *op,
-                     const std::string &id, const std::string &description);
+// journal client helpers
+int get_client(librados::IoCtx &ioctx, const std::string &oid,
+               const std::string &id, cls::journal::Client *client);
+void get_client_start(librados::ObjectReadOperation *op,
+                      const std::string &id);
+int get_client_finish(bufferlist::iterator *iter,
+                      cls::journal::Client *client);
+
 int client_register(librados::IoCtx &ioctx, const std::string &oid,
-                    const std::string &id, const std::string &description);
+                    const std::string &id, const bufferlist &data);
+void client_register(librados::ObjectWriteOperation *op,
+                     const std::string &id, const bufferlist &data);
+
+int client_update(librados::IoCtx &ioctx, const std::string &oid,
+                  const std::string &id, const bufferlist &data);
+void client_update(librados::ObjectWriteOperation *op,
+                   const std::string &id, const bufferlist &data);
+
 int client_unregister(librados::IoCtx &ioctx, const std::string &oid,
                       const std::string &id);
+
 void client_commit(librados::ObjectWriteOperation *op, const std::string &id,
                    const cls::journal::ObjectSetPosition &commit_position);
+
 int client_list(librados::IoCtx &ioctx, const std::string &oid,
                 std::set<cls::journal::Client> *clients);
 
+// journal tag helpers
+int get_next_tag_tid(librados::IoCtx &ioctx, const std::string &oid,
+                     uint64_t *tag_tid);
+void get_next_tag_tid_start(librados::ObjectReadOperation *op);
+int get_next_tag_tid_finish(bufferlist::iterator *iter,
+                            uint64_t *tag_tid);
+
+int get_tag(librados::IoCtx &ioctx, const std::string &oid,
+            uint64_t tag_tid, cls::journal::Tag *tag);
+void get_tag_start(librados::ObjectReadOperation *op,
+                   uint64_t tag_tid);
+int get_tag_finish(bufferlist::iterator *iter, cls::journal::Tag *tag);
+
+int tag_create(librados::IoCtx &ioctx, const std::string &oid,
+               uint64_t tag_tid, uint64_t tag_class,
+               const bufferlist &data);
+void tag_create(librados::ObjectWriteOperation *op,
+                uint64_t tag_tid, uint64_t tag_class,
+                const bufferlist &data);
+
+int tag_list(librados::IoCtx &ioctx, const std::string &oid,
+             const std::string &client_id, boost::optional<uint64_t> tag_class,
+             std::set<cls::journal::Tag> *tags);
+void tag_list_start(librados::ObjectReadOperation *op,
+                    uint64_t start_after_tag_tid, uint64_t max_return,
+                    const std::string &client_id,
+                    boost::optional<uint64_t> tag_class);
+int tag_list_finish(bufferlist::iterator *iter,
+                    std::set<cls::journal::Tag> *tags);
+
+// journal entry helpers
 void guard_append(librados::ObjectWriteOperation *op, uint64_t soft_max_size);
 
 } // namespace client
diff --git a/src/cls/journal/cls_journal_types.cc b/src/cls/journal/cls_journal_types.cc
index 3084d10..8a94d10 100644
--- a/src/cls/journal/cls_journal_types.cc
+++ b/src/cls/journal/cls_journal_types.cc
@@ -3,56 +3,32 @@
 
 #include "cls/journal/cls_journal_types.h"
 #include "common/Formatter.h"
-#include <set>
 
 namespace cls {
 namespace journal {
 
 void EntryPosition::encode(bufferlist& bl) const {
   ENCODE_START(1, 1, bl);
-  ::encode(tag, bl);
-  ::encode(tid, bl);
+  ::encode(tag_tid, bl);
+  ::encode(entry_tid, bl);
   ENCODE_FINISH(bl);
 }
 
 void EntryPosition::decode(bufferlist::iterator& iter) {
   DECODE_START(1, iter);
-  ::decode(tag, iter);
-  ::decode(tid, iter);
+  ::decode(tag_tid, iter);
+  ::decode(entry_tid, iter);
   DECODE_FINISH(iter);
 }
 
 void EntryPosition::dump(Formatter *f) const {
-  f->dump_string("tag", tag);
-  f->dump_unsigned("tid", tid);
+  f->dump_unsigned("tag_tid", tag_tid);
+  f->dump_unsigned("entry_tid", entry_tid);
 }
 
 void EntryPosition::generate_test_instances(std::list<EntryPosition *> &o) {
   o.push_back(new EntryPosition());
-  o.push_back(new EntryPosition("id", 2));
-}
-
-bool ObjectSetPosition::operator<(const ObjectSetPosition& rhs) const {
-  if (entry_positions.size() < rhs.entry_positions.size()) {
-    return true;
-  } else if (entry_positions.size() > rhs.entry_positions.size()) {
-    return false;
-  }
-
-  std::map<std::string, uint64_t> rhs_tids;
-  for (EntryPositions::const_iterator it = rhs.entry_positions.begin();
-       it != rhs.entry_positions.end(); ++it) {
-    rhs_tids[it->tag] = it->tid;
-  }
-
-  for (EntryPositions::const_iterator it = entry_positions.begin();
-       it != entry_positions.end(); ++it) {
-    const EntryPosition &entry_position = *it;
-    if (entry_position.tid < rhs_tids[entry_position.tag]) {
-      return true;
-    }
-  }
-  return false;
+  o.push_back(new EntryPosition(1, 2));
 }
 
 void ObjectSetPosition::encode(bufferlist& bl) const {
@@ -84,17 +60,13 @@ void ObjectSetPosition::dump(Formatter *f) const {
 void ObjectSetPosition::generate_test_instances(
     std::list<ObjectSetPosition *> &o) {
   o.push_back(new ObjectSetPosition());
-
-  EntryPositions entry_positions;
-  entry_positions.push_back(EntryPosition("tag1", 120));
-  entry_positions.push_back(EntryPosition("tag2", 121));
-  o.push_back(new ObjectSetPosition(1, entry_positions));
+  o.push_back(new ObjectSetPosition(1, {{1, 120}, {2, 121}}));
 }
 
 void Client::encode(bufferlist& bl) const {
   ENCODE_START(1, 1, bl);
   ::encode(id, bl);
-  ::encode(description, bl);
+  ::encode(data, bl);
   ::encode(commit_position, bl);
   ENCODE_FINISH(bl);
 }
@@ -102,33 +74,69 @@ void Client::encode(bufferlist& bl) const {
 void Client::decode(bufferlist::iterator& iter) {
   DECODE_START(1, iter);
   ::decode(id, iter);
-  ::decode(description, iter);
+  ::decode(data, iter);
   ::decode(commit_position, iter);
   DECODE_FINISH(iter);
 }
 
 void Client::dump(Formatter *f) const {
   f->dump_string("id", id);
-  f->dump_string("description", description);
+
+  std::stringstream data_ss;
+  data.hexdump(data_ss);
+  f->dump_string("data", data_ss.str());
+
   f->open_object_section("commit_position");
   commit_position.dump(f);
   f->close_section();
 }
 
 void Client::generate_test_instances(std::list<Client *> &o) {
+  bufferlist data;
+  data.append(std::string('1', 128));
+
   o.push_back(new Client());
-  o.push_back(new Client("id", "desc"));
+  o.push_back(new Client("id", data));
+  o.push_back(new Client("id", data, {1, {{1, 120}, {2, 121}}}));
+}
 
-  EntryPositions entry_positions;
-  entry_positions.push_back(EntryPosition("tag1", 120));
-  entry_positions.push_back(EntryPosition("tag1", 121));
-  o.push_back(new Client("id", "desc", ObjectSetPosition(1, entry_positions)));
+void Tag::encode(bufferlist& bl) const {
+  ENCODE_START(1, 1, bl);
+  ::encode(tid, bl);
+  ::encode(tag_class, bl);
+  ::encode(data, bl);
+  ENCODE_FINISH(bl);
+}
+
+void Tag::decode(bufferlist::iterator& iter) {
+  DECODE_START(1, iter);
+  ::decode(tid, iter);
+  ::decode(tag_class, iter);
+  ::decode(data, iter);
+  DECODE_FINISH(iter);
+}
+
+void Tag::dump(Formatter *f) const {
+  f->dump_unsigned("tid", tid);
+  f->dump_unsigned("tag_class", tag_class);
+
+  std::stringstream data_ss;
+  data.hexdump(data_ss);
+  f->dump_string("data", data_ss.str());
+}
+
+void Tag::generate_test_instances(std::list<Tag *> &o) {
+  o.push_back(new Tag());
+
+  bufferlist data;
+  data.append(std::string('1', 128));
+  o.push_back(new Tag(123, 234, data));
 }
 
 std::ostream &operator<<(std::ostream &os,
                          const EntryPosition &entry_position) {
-  os << "[tag=" << entry_position.tag << ", tid="
-     << entry_position.tid << "]";
+  os << "[tag_tid=" << entry_position.tag_tid << ", entry_tid="
+     << entry_position.entry_tid << "]";
   return os;
 }
 
@@ -136,18 +144,30 @@ std::ostream &operator<<(std::ostream &os,
                          const ObjectSetPosition &object_set_position) {
   os << "[object_number=" << object_set_position.object_number << ", "
      << "positions=[";
-  for (EntryPositions::const_iterator it =
-         object_set_position.entry_positions.begin();
-       it != object_set_position.entry_positions.end(); ++it) {
-    os << *it;
+  std::string delim;
+  for (auto &entry_position : object_set_position.entry_positions) {
+    os << entry_position << delim;
+    delim = ", ";
   }
   os << "]]";
   return os;
 }
 
 std::ostream &operator<<(std::ostream &os, const Client &client) {
-  os << "[id=" << client.id << ", description=" << client.description
-     << ", commit_position=" << client.commit_position << "]";
+  os << "[id=" << client.id << ", "
+     << "data=";
+  client.data.hexdump(os);
+  os << ", "
+     << "commit_position=" << client.commit_position << "]";
+  return os;
+}
+
+std::ostream &operator<<(std::ostream &os, const Tag &tag) {
+  os << "[tid=" << tag.tid << ", "
+     << "tag_class=" << tag.tag_class << ", "
+     << "data=";
+  tag.data.hexdump(os);
+  os << "]";
   return os;
 }
 
diff --git a/src/cls/journal/cls_journal_types.h b/src/cls/journal/cls_journal_types.h
index 9348739..19a1fcb 100644
--- a/src/cls/journal/cls_journal_types.h
+++ b/src/cls/journal/cls_journal_types.h
@@ -9,6 +9,7 @@
 #include "include/encoding.h"
 #include <iosfwd>
 #include <list>
+#include <set>
 #include <string>
 
 namespace ceph {
@@ -19,21 +20,28 @@ namespace cls {
 namespace journal {
 
 struct EntryPosition {
-  std::string tag;
-  uint64_t tid;
+  uint64_t tag_tid;
+  uint64_t entry_tid;
 
-  EntryPosition() : tid(0) {}
-  EntryPosition(const std::string& _tag, uint64_t _tid)
-    : tag(_tag), tid(_tid) {}
+  EntryPosition() : tag_tid(0), entry_tid(0) {}
+  EntryPosition(uint64_t _tag_tid, uint64_t _entry_tid)
+    : tag_tid(_tag_tid), entry_tid(_entry_tid) {}
 
   inline bool operator==(const EntryPosition& rhs) const {
-    return (tag == rhs.tag && tid == rhs.tid);
+    return (tag_tid == rhs.tag_tid && entry_tid == rhs.entry_tid);
   }
 
   void encode(bufferlist& bl) const;
   void decode(bufferlist::iterator& iter);
   void dump(Formatter *f) const;
 
+  inline bool operator<(const EntryPosition &rhs) const {
+    if (tag_tid != rhs.tag_tid) {
+      return tag_tid < rhs.tag_tid;
+    }
+    return entry_tid < rhs.entry_tid;
+  }
+
   static void generate_test_instances(std::list<EntryPosition *> &o);
 };
 
@@ -48,33 +56,31 @@ struct ObjectSetPosition {
                     const EntryPositions &_entry_positions)
     : object_number(_object_number), entry_positions(_entry_positions) {}
 
-  bool operator<(const ObjectSetPosition& rhs) const;
-  inline bool operator<=(const ObjectSetPosition& rhs) const {
-    return (*this == rhs || *this < rhs);
-  }
-  inline bool operator==(const ObjectSetPosition &rhs) const {
-    return (entry_positions == rhs.entry_positions);
-  }
-
   void encode(bufferlist& bl) const;
   void decode(bufferlist::iterator& iter);
   void dump(Formatter *f) const;
 
+  inline bool operator==(const ObjectSetPosition &rhs) const {
+    return (object_number == rhs.object_number &&
+            entry_positions == rhs.entry_positions);
+  }
+
   static void generate_test_instances(std::list<ObjectSetPosition *> &o);
 };
 
 struct Client {
   std::string id;
-  std::string description;
+  bufferlist data;
   ObjectSetPosition commit_position;
 
   Client() {}
-  Client(const std::string& _id, const std::string& _description,
+  Client(const std::string& _id, const bufferlist &_data,
          const ObjectSetPosition &_commit_position = ObjectSetPosition())
-    : id(_id), description(_description), commit_position(_commit_position) {}
+    : id(_id), data(_data), commit_position(_commit_position) {}
 
   inline bool operator==(const Client &rhs) const {
-    return (id == rhs.id && description == rhs.description &&
+    return (id == rhs.id &&
+            data.contents_equal(rhs.data) &&
             commit_position == rhs.commit_position);
   }
   inline bool operator<(const Client &rhs) const {
@@ -88,9 +94,37 @@ struct Client {
   static void generate_test_instances(std::list<Client *> &o);
 };
 
+struct Tag {
+  static const uint64_t TAG_CLASS_NEW = static_cast<uint64_t>(-1);
+
+  uint64_t tid;
+  uint64_t tag_class;
+  bufferlist data;
+
+  Tag() : tid(0), tag_class(0) {}
+  Tag(uint64_t tid, uint64_t tag_class, const bufferlist &data)
+    : tid(tid), tag_class(tag_class), data(data) {}
+
+  inline bool operator==(const Tag &rhs) const {
+    return (tid == rhs.tid &&
+            tag_class == rhs.tag_class &&
+            data.contents_equal(rhs.data));
+  }
+  inline bool operator<(const Tag &rhs) const {
+    return (tid < rhs.tid);
+  }
+
+  void encode(bufferlist& bl) const;
+  void decode(bufferlist::iterator& iter);
+  void dump(Formatter *f) const;
+
+  static void generate_test_instances(std::list<Tag *> &o);
+};
+
 WRITE_CLASS_ENCODER(EntryPosition);
 WRITE_CLASS_ENCODER(ObjectSetPosition);
 WRITE_CLASS_ENCODER(Client);
+WRITE_CLASS_ENCODER(Tag);
 
 std::ostream &operator<<(std::ostream &os,
                          const EntryPosition &entry_position);
@@ -98,6 +132,7 @@ std::ostream &operator<<(std::ostream &os,
                          const ObjectSetPosition &object_set_position);
 std::ostream &operator<<(std::ostream &os,
 			 const Client &client);
+std::ostream &operator<<(std::ostream &os, const Tag &tag);
 
 } // namespace journal
 } // namespace cls
diff --git a/src/cls/log/cls_log_client.cc b/src/cls/log/cls_log_client.cc
index d0c6603..0334deb 100644
--- a/src/cls/log/cls_log_client.cc
+++ b/src/cls/log/cls_log_client.cc
@@ -127,7 +127,7 @@ void cls_log_list(librados::ObjectReadOperation& op, utime_t& from, utime_t& to,
 class LogInfoCtx : public ObjectOperationCompletion {
   cls_log_header *header;
 public:
-  LogInfoCtx(cls_log_header *_header) : header(_header) {}
+  explicit LogInfoCtx(cls_log_header *_header) : header(_header) {}
   void handle_completion(int r, bufferlist& outbl) {
     if (r >= 0) {
       cls_log_info_ret ret;
diff --git a/src/cls/rbd/cls_rbd.cc b/src/cls/rbd/cls_rbd.cc
index 8b88c5e..36271da 100644
--- a/src/cls/rbd/cls_rbd.cc
+++ b/src/cls/rbd/cls_rbd.cc
@@ -2983,7 +2983,7 @@ int read_peers(cls_method_context_t hctx,
   return 0;
 }
 
-int read_peer(cls_method_context_t hctx, const std::string uuid,
+int read_peer(cls_method_context_t hctx, const std::string &uuid,
               cls::rbd::MirrorPeer *peer) {
   bufferlist bl;
   int r = cls_cxx_map_get_val(hctx, peer_key(uuid), &bl);
@@ -3003,7 +3003,7 @@ int read_peer(cls_method_context_t hctx, const std::string uuid,
   return 0;
 }
 
-int write_peer(cls_method_context_t hctx, const std::string uuid,
+int write_peer(cls_method_context_t hctx, const std::string &uuid,
                const cls::rbd::MirrorPeer &peer) {
   bufferlist bl;
   ::encode(peer, bl);
diff --git a/src/cls/replica_log/cls_replica_log_ops.h b/src/cls/replica_log/cls_replica_log_ops.h
index 0905bb9..c5361a0 100644
--- a/src/cls/replica_log/cls_replica_log_ops.h
+++ b/src/cls/replica_log/cls_replica_log_ops.h
@@ -16,7 +16,7 @@
 struct cls_replica_log_delete_marker_op {
   string entity_id;
   cls_replica_log_delete_marker_op() {}
-  cls_replica_log_delete_marker_op(const string& id) : entity_id(id) {}
+  explicit cls_replica_log_delete_marker_op(const string& id) : entity_id(id) {}
 
   void encode(bufferlist& bl) const {
     ENCODE_START(1, 1, bl);
@@ -39,7 +39,7 @@ WRITE_CLASS_ENCODER(cls_replica_log_delete_marker_op)
 struct cls_replica_log_set_marker_op {
   cls_replica_log_progress_marker marker;
   cls_replica_log_set_marker_op() {}
-  cls_replica_log_set_marker_op(const cls_replica_log_progress_marker& m) :
+  explicit cls_replica_log_set_marker_op(const cls_replica_log_progress_marker& m) :
     marker(m) {}
 
   void encode(bufferlist& bl) const {
diff --git a/src/cls/rgw/cls_rgw_client.cc b/src/cls/rgw/cls_rgw_client.cc
index e6ac56b..62034f2 100644
--- a/src/cls/rgw/cls_rgw_client.cc
+++ b/src/cls/rgw/cls_rgw_client.cc
@@ -476,7 +476,7 @@ int CLSRGWIssueGetDirHeader::issue_op(int shard_id, const string& oid)
 class GetDirHeaderCompletion : public ObjectOperationCompletion {
   RGWGetDirHeader_CB *ret_ctx;
 public:
-  GetDirHeaderCompletion(RGWGetDirHeader_CB *_ctx) : ret_ctx(_ctx) {}
+  explicit GetDirHeaderCompletion(RGWGetDirHeader_CB *_ctx) : ret_ctx(_ctx) {}
   ~GetDirHeaderCompletion() {
     ret_ctx->put();
   }
diff --git a/src/cls/version/cls_version_client.cc b/src/cls/version/cls_version_client.cc
index 70d5ebe..191fa4b 100644
--- a/src/cls/version/cls_version_client.cc
+++ b/src/cls/version/cls_version_client.cc
@@ -60,7 +60,7 @@ void cls_version_check(librados::ObjectOperation& op, obj_version& objv, Version
 class VersionReadCtx : public ObjectOperationCompletion {
   obj_version *objv;
 public:
-  VersionReadCtx(obj_version *_objv) : objv(_objv) {}
+  explicit VersionReadCtx(obj_version *_objv) : objv(_objv) {}
   void handle_completion(int r, bufferlist& outbl) {
     if (r >= 0) {
       cls_version_read_ret ret;
diff --git a/src/common/BackTrace.h b/src/common/BackTrace.h
index 4157f74..addfdee 100644
--- a/src/common/BackTrace.h
+++ b/src/common/BackTrace.h
@@ -18,7 +18,7 @@ struct BackTrace {
   size_t size;
   char **strings;
 
-  BackTrace(int s) : skip(s) {
+  explicit BackTrace(int s) : skip(s) {
 #ifdef HAVE_EXECINFO_H
     size = backtrace(array, max);
     strings = backtrace_symbols(array, size);
diff --git a/src/common/DecayCounter.h b/src/common/DecayCounter.h
index 4e69a88..5abbeb3 100644
--- a/src/common/DecayCounter.h
+++ b/src/common/DecayCounter.h
@@ -35,6 +35,7 @@ class DecayRate {
 
 public:
   DecayRate() : k(0) {}
+  // cppcheck-suppress noExplicitConstructor
   DecayRate(double hl) { set_halflife(hl); }
   void set_halflife(double hl) {
     k = ::log(.5) / hl;
@@ -56,7 +57,7 @@ public:
   void dump(Formatter *f) const;
   static void generate_test_instances(list<DecayCounter*>& ls);
 
-  DecayCounter(const utime_t &now)
+  explicit DecayCounter(const utime_t &now)
     : val(0), delta(0), vel(0), last_decay(now)
   {
   }
diff --git a/src/common/Finisher.h b/src/common/Finisher.h
index 26a41b7..4d494ed 100644
--- a/src/common/Finisher.h
+++ b/src/common/Finisher.h
@@ -62,7 +62,7 @@ class Finisher {
 
   struct FinisherThread : public Thread {
     Finisher *fin;    
-    FinisherThread(Finisher *f) : fin(f) {}
+    explicit FinisherThread(Finisher *f) : fin(f) {}
     void* entry() { return (void*)fin->finisher_thread_entry(); }
   } finisher_thread;
 
@@ -134,7 +134,7 @@ class Finisher {
 
   /// Construct an anonymous Finisher.
   /// Anonymous finishers do not log their queue length.
-  Finisher(CephContext *cct_) :
+  explicit Finisher(CephContext *cct_) :
     cct(cct_), finisher_lock("Finisher::finisher_lock"),
     finisher_stop(false), finisher_running(false),
     thread_name("fn_anonymous"), logger(0),
diff --git a/src/common/Formatter.cc b/src/common/Formatter.cc
index 9e7fd38..4a16524 100644
--- a/src/common/Formatter.cc
+++ b/src/common/Formatter.cc
@@ -18,6 +18,7 @@
 
 #include "assert.h"
 #include "Formatter.h"
+#include "HTMLFormatter.h"
 #include "common/escape.h"
 #include "include/buffer.h"
 
@@ -88,6 +89,10 @@ Formatter *Formatter::create(const std::string &type,
     return new TableFormatter();
   else if (mytype == "table-kv")
     return new TableFormatter(true);
+  else if (mytype == "html")
+    return new HTMLFormatter(false);
+  else if (mytype == "html-pretty")
+    return new HTMLFormatter(true);
   else if (fallback != "")
     return create(fallback, "", "");
   else
@@ -331,8 +336,11 @@ XMLFormatter::XMLFormatter(bool pretty, bool lowercased_underscored)
 void XMLFormatter::flush(std::ostream& os)
 {
   finish_pending_string();
-  os << m_ss.str();
-  if (m_pretty)
+  std::string m_ss_str = m_ss.str();
+  os << m_ss_str;
+  /* There is a small catch here. If the rest of the formatter had NO output,
+   * we should NOT output a newline. This primarily triggers on HTTP redirects */
+  if (m_pretty && !m_ss_str.empty())
     os << "\n";
   m_ss.clear();
   m_ss.str("");
@@ -346,6 +354,24 @@ void XMLFormatter::reset()
   m_pending_string.str("");
   m_sections.clear();
   m_pending_string_name.clear();
+  m_header_done = false;
+}
+
+void XMLFormatter::output_header()
+{
+  if(!m_header_done) {
+    m_header_done = true;
+    write_raw_data(XMLFormatter::XML_1_DTD);;
+    if (m_pretty)
+      m_ss << "\n";
+  }
+}
+
+void XMLFormatter::output_footer()
+{
+  while(!m_sections.empty()) {
+    close_section();
+  }
 }
 
 void XMLFormatter::open_object_section(const char *name)
diff --git a/src/common/Formatter.h b/src/common/Formatter.h
index 3784bdb..d74fab7 100644
--- a/src/common/Formatter.h
+++ b/src/common/Formatter.h
@@ -44,6 +44,10 @@ namespace ceph {
     void flush(bufferlist &bl);
     virtual void reset() = 0;
 
+    virtual void set_status(int status, const char* status_name) = 0;
+    virtual void output_header() = 0;
+    virtual void output_footer() = 0;
+
     virtual void open_array_section(const char *name) = 0;
     virtual void open_array_section_in_ns(const char *name, const char *ns) = 0;
     virtual void open_object_section(const char *name) = 0;
@@ -87,8 +91,11 @@ namespace ceph {
 
   class JSONFormatter : public Formatter {
   public:
-    JSONFormatter(bool p = false);
+    explicit JSONFormatter(bool p = false);
 
+    virtual void set_status(int status, const char* status_name) {};
+    virtual void output_header() {};
+    virtual void output_footer() {};
     void flush(std::ostream& os);
     void reset();
     virtual void open_array_section(const char *name);
@@ -130,6 +137,10 @@ namespace ceph {
     static const char *XML_1_DTD;
     XMLFormatter(bool pretty = false, bool lowercased_underscored = false);
 
+    virtual void set_status(int status, const char* status_name) {}
+    virtual void output_header();
+    virtual void output_footer();
+
     void flush(std::ostream& os);
     void reset();
     void open_array_section(const char *name);
@@ -150,7 +161,7 @@ namespace ceph {
     void open_array_section_with_attrs(const char *name, const FormatterAttrs& attrs);
     void open_object_section_with_attrs(const char *name, const FormatterAttrs& attrs);
     void dump_string_with_attrs(const char *name, const std::string& s, const FormatterAttrs& attrs);
-  private:
+  protected:
     void open_section_in_ns(const char *name, const char *ns, const FormatterAttrs *attrs);
     void finish_pending_string();
     void print_spaces();
@@ -162,12 +173,16 @@ namespace ceph {
     bool m_pretty;
     bool m_lowercased_underscored;
     std::string m_pending_string_name;
+    bool m_header_done;
   };
 
   class TableFormatter : public Formatter {
   public:
-    TableFormatter(bool keyval = false);
+    explicit TableFormatter(bool keyval = false);
 
+    virtual void set_status(int status, const char* status_name) {};
+    virtual void output_header() {};
+    virtual void output_footer() {};
     void flush(std::ostream& os);
     void reset();
     virtual void open_array_section(const char *name);
diff --git a/src/common/Graylog.cc b/src/common/Graylog.cc
new file mode 100644
index 0000000..8dde3d0
--- /dev/null
+++ b/src/common/Graylog.cc
@@ -0,0 +1,175 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "Graylog.h"
+
+#include <iostream>
+#include <sstream>
+#include <memory>
+
+#include <arpa/inet.h>
+
+#include <boost/asio.hpp>
+#include <boost/iostreams/filtering_stream.hpp>
+#include <boost/iostreams/filter/zlib.hpp>
+#include <boost/lexical_cast.hpp>
+
+#include "common/Formatter.h"
+#include "include/uuid.h"
+
+namespace ceph {
+namespace log {
+
+Graylog::Graylog(const SubsystemMap * const s, std::string logger)
+    : m_subs(s),
+      m_log_dst_valid(false),
+      m_hostname(""),
+      m_fsid(""),
+      m_logger(logger),
+      m_ostream_compressed(std::stringstream::in |
+                           std::stringstream::out |
+                           std::stringstream::binary)
+{
+  m_formatter = std::unique_ptr<Formatter>(Formatter::create("json"));
+  m_formatter_section = std::unique_ptr<Formatter>(Formatter::create("json"));
+}
+
+Graylog::Graylog(std::string logger)
+    : m_subs(NULL),
+      m_log_dst_valid(false),
+      m_hostname(""),
+      m_fsid(""),
+      m_logger(logger),
+      m_ostream_compressed(std::stringstream::in |
+                           std::stringstream::out |
+                           std::stringstream::binary)
+{
+  m_formatter = std::unique_ptr<Formatter>(Formatter::create("json"));
+  m_formatter_section = std::unique_ptr<Formatter>(Formatter::create("json"));
+}
+
+Graylog::~Graylog()
+{
+}
+
+void Graylog::set_destination(const std::string& host, int port)
+{
+  try {
+    boost::asio::ip::udp::resolver resolver(m_io_service);
+    boost::asio::ip::udp::resolver::query query(host,
+                                                boost::lexical_cast<std::string>(port));
+    m_endpoint = *resolver.resolve(query);
+    m_log_dst_valid = true;
+  } catch (boost::system::system_error const& e) {
+    cerr << "Error resolving graylog destination: " << e.what() << std::endl;
+    m_log_dst_valid = false;
+  }
+}
+
+void Graylog::set_hostname(const std::string& host)
+{
+  m_hostname = host;
+}
+
+void Graylog::set_fsid(uuid_d fsid)
+{
+  std::vector<char> buf(40);
+  fsid.print(&buf[0]);
+  m_fsid = std::string(&buf[0]);
+}
+
+void Graylog::log_entry(Entry const * const e)
+{
+  if (m_log_dst_valid) {
+    std::string s = e->get_str();
+
+    m_formatter->open_object_section("");
+    m_formatter->dump_string("version", "1.1");
+    m_formatter->dump_string("host", m_hostname);
+    m_formatter->dump_string("short_message", s);
+    m_formatter->dump_string("_app", "ceph");
+    m_formatter->dump_float("timestamp", e->m_stamp.sec() + (e->m_stamp.usec() / 1000000.0));
+    m_formatter->dump_int("_thread", e->m_thread);
+    m_formatter->dump_int("_level", e->m_prio);
+    if (m_subs != NULL)
+    m_formatter->dump_string("_subsys_name", m_subs->get_name(e->m_subsys));
+    m_formatter->dump_int("_subsys_id", e->m_subsys);
+    m_formatter->dump_string("_fsid", m_fsid);
+    m_formatter->dump_string("_logger", m_logger);
+    m_formatter->close_section();
+
+    m_ostream_compressed.clear();
+    m_ostream_compressed.str("");
+
+    m_ostream.reset();
+
+    m_ostream.push(m_compressor);
+    m_ostream.push(m_ostream_compressed);
+
+    m_formatter->flush(m_ostream);
+    m_ostream << std::endl;
+
+    m_ostream.reset();
+
+    try {
+      boost::asio::ip::udp::socket socket(m_io_service);
+      socket.open(m_endpoint.protocol());
+      socket.send_to(boost::asio::buffer(m_ostream_compressed.str()), m_endpoint);
+    } catch (boost::system::system_error const& e) {
+      cerr << "Error sending graylog message: " << e.what() << std::endl;
+    }
+  }
+}
+
+void Graylog::log_log_entry(LogEntry const * const e)
+{
+  if (m_log_dst_valid) {
+    m_formatter->open_object_section("");
+    m_formatter->dump_string("version", "1.1");
+    m_formatter->dump_string("host", m_hostname);
+    m_formatter->dump_string("short_message", e->msg);
+    m_formatter->dump_float("timestamp", e->stamp.sec() + (e->stamp.usec() / 1000000.0));
+    m_formatter->dump_string("_app", "ceph");
+
+    m_formatter_section->open_object_section("");
+    e->who.addr.dump(m_formatter_section.get());
+    e->who.name.dump(m_formatter_section.get());
+    m_formatter_section->close_section();
+
+    m_ostream_section.clear();
+    m_ostream_section.str("");
+    m_formatter_section->flush(m_ostream_section);
+    m_formatter->dump_string("_who", m_ostream_section.str());
+
+    m_formatter->dump_int("_seq", e->seq);
+    m_formatter->dump_string("_prio", clog_type_to_string(e->prio));
+    m_formatter->dump_string("_channel", e->channel);
+    m_formatter->dump_string("_fsid", m_fsid);
+    m_formatter->dump_string("_logger", m_logger);
+    m_formatter->close_section();
+
+    m_ostream_compressed.clear();
+    m_ostream_compressed.str("");
+
+    m_ostream.reset();
+
+    m_ostream.push(m_compressor);
+    m_ostream.push(m_ostream_compressed);
+
+    m_formatter->flush(m_ostream);
+    m_ostream << std::endl;
+
+    m_ostream.reset();
+
+    try {
+      boost::asio::ip::udp::socket socket(m_io_service);
+      socket.open(m_endpoint.protocol());
+      socket.send_to(boost::asio::buffer(m_ostream_compressed.str()), m_endpoint);
+    } catch (boost::system::system_error const& e) {
+      cerr << "Error sending graylog message: " << e.what() << std::endl;
+    }
+  }
+}
+
+} // ceph::log::
+} // ceph::
diff --git a/src/common/Graylog.h b/src/common/Graylog.h
new file mode 100644
index 0000000..7caa656
--- /dev/null
+++ b/src/common/Graylog.h
@@ -0,0 +1,81 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef __CEPH_LOG_GRAYLOG_H
+#define __CEPH_LOG_GRAYLOG_H
+
+
+#include <memory>
+
+#include <boost/thread/mutex.hpp>
+#include <boost/asio.hpp>
+#include <boost/iostreams/filtering_stream.hpp>
+#include <boost/iostreams/filter/zlib.hpp>
+
+#include "log/Entry.h"
+#include "log/SubsystemMap.h"
+#include "common/LogEntry.h"
+#include "include/memory.h"
+
+namespace ceph {
+namespace log {
+
+// Graylog logging backend: Convert log datastructures (LogEntry, Entry) to
+// GELF (http://www.graylog2.org/resources/gelf/specification) and send it
+// to a GELF UDP receiver
+
+class Graylog
+{
+ public:
+
+  /**
+   * Create Graylog with SubsystemMap. log_entry will resolve the subsystem
+   * id to string. Logging will not be ready until set_destination is called
+   * @param s SubsystemMap
+   * @param logger Value for key "_logger" in GELF
+   */
+  Graylog(const SubsystemMap * const s, std::string logger);
+
+  /**
+   * Create Graylog without SubsystemMap. Logging will not be ready
+   * until set_destination is called
+   * @param logger Value for key "_logger" in GELF
+   */
+  Graylog(std::string logger);
+  virtual ~Graylog();
+
+  void set_hostname(const std::string& host);
+  void set_fsid(uuid_d fsid);
+
+  void set_destination(const std::string& host, int port);
+
+  void log_entry(Entry const * const e);
+  void log_log_entry(LogEntry const * const e);
+
+  typedef ceph::shared_ptr<Graylog> Ref;
+
+ private:
+  SubsystemMap const * const m_subs;
+
+  bool m_log_dst_valid;
+
+  std::string m_hostname;
+  std::string m_fsid;
+  std::string m_logger;
+
+  boost::asio::ip::udp::endpoint m_endpoint;
+  boost::asio::io_service m_io_service;
+
+  std::unique_ptr<Formatter> m_formatter;
+  std::unique_ptr<Formatter> m_formatter_section;
+  std::stringstream m_ostream_section;
+  std::stringstream m_ostream_compressed;
+  boost::iostreams::filtering_ostream m_ostream;
+  boost::iostreams::zlib_compressor m_compressor;
+
+};
+
+}
+}
+
+#endif
diff --git a/src/common/HTMLFormatter.cc b/src/common/HTMLFormatter.cc
new file mode 100644
index 0000000..b10c296
--- /dev/null
+++ b/src/common/HTMLFormatter.cc
@@ -0,0 +1,158 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2011 New Dream Network
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation.  See file COPYING.
+ *
+ */
+
+#define LARGE_SIZE 1024
+
+#include "include/int_types.h"
+
+#include "assert.h"
+#include "Formatter.h"
+#include "HTMLFormatter.h"
+#include "common/escape.h"
+
+#include <iostream>
+#include <sstream>
+#include <stdarg.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <vector>
+#include <string>
+#include <set>
+#include <boost/format.hpp>
+
+// -----------------------
+namespace ceph {
+
+HTMLFormatter::HTMLFormatter(bool pretty)
+: XMLFormatter(pretty), m_header_done(false), m_status(0), m_status_name(NULL)
+{
+}
+
+HTMLFormatter::~HTMLFormatter()
+{
+  if (m_status_name) {
+    free((void*)m_status_name);
+    m_status_name = NULL;
+  }
+}
+
+void HTMLFormatter::reset()
+{
+  XMLFormatter::reset();
+  m_header_done = false;
+  m_status = 0;
+  if (m_status_name) {
+    free((void*)m_status_name);
+    m_status_name = NULL;
+  }
+}
+
+void HTMLFormatter::set_status(int status, const char* status_name)
+{
+  m_status = status;
+  if (status_name) {
+    m_status_name = strdup(status_name);
+  }
+};
+
+void HTMLFormatter::output_header() {
+  if (!m_header_done) {
+    m_header_done = true;
+    char buf[16];
+    snprintf(buf, sizeof(buf), "%d", m_status);
+    std::string status_line(buf);
+    if (m_status_name) {
+      status_line += " ";
+      status_line += m_status_name;
+    }
+    open_object_section("html");
+    print_spaces();
+    m_ss << "<head><title>" << status_line << "</title></head>";
+    if (m_pretty)
+      m_ss << "\n";
+    open_object_section("body");
+    print_spaces();
+    m_ss << "<h1>" << status_line << "</h1>";
+    if (m_pretty)
+      m_ss << "\n";
+    open_object_section("ul");
+  }
+}
+
+template <typename T>
+void HTMLFormatter::dump_template(const char *name, T arg)
+{
+  print_spaces();
+  m_ss << "<li>" << name << ": " << arg << "</li>";
+  if (m_pretty)
+    m_ss << "\n";
+}
+
+void HTMLFormatter::dump_unsigned(const char *name, uint64_t u)
+{
+  dump_template(name, u);
+}
+
+void HTMLFormatter::dump_int(const char *name, int64_t u)
+{
+  dump_template(name, u);
+}
+
+void HTMLFormatter::dump_float(const char *name, double d)
+{
+  dump_template(name, d);
+}
+
+void HTMLFormatter::dump_string(const char *name, const std::string& s)
+{
+  dump_template(name, escape_xml_str(s.c_str()));
+}
+
+void HTMLFormatter::dump_string_with_attrs(const char *name, const std::string& s, const FormatterAttrs& attrs)
+{
+  std::string e(name);
+  std::string attrs_str;
+  get_attrs_str(&attrs, attrs_str);
+  print_spaces();
+  m_ss << "<li>" << e << ": " << escape_xml_str(s.c_str()) << attrs_str << "</li>";
+  if (m_pretty)
+    m_ss << "\n";
+}
+
+std::ostream& HTMLFormatter::dump_stream(const char *name)
+{
+  print_spaces();
+  m_pending_string_name = "li";
+  m_ss << "<li>" << name << ": ";
+  return m_pending_string;
+}
+
+void HTMLFormatter::dump_format_va(const char* name, const char *ns, bool quoted, const char *fmt, va_list ap)
+{
+  char buf[LARGE_SIZE];
+  vsnprintf(buf, LARGE_SIZE, fmt, ap);
+
+  std::string e(name);
+  print_spaces();
+  if (ns) {
+    m_ss << "<li xmlns=\"" << ns << "\">" << e << ": " << escape_xml_str(buf) << "</li>";
+  } else {
+    m_ss << "<li>" << e << ": " << escape_xml_str(buf) << "</li>";
+  }
+
+  if (m_pretty)
+    m_ss << "\n";
+}
+
+} // namespace ceph
diff --git a/src/common/HTMLFormatter.h b/src/common/HTMLFormatter.h
new file mode 100644
index 0000000..c165efc
--- /dev/null
+++ b/src/common/HTMLFormatter.h
@@ -0,0 +1,50 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+#ifndef CEPH_HTML_FORMATTER_H
+#define CEPH_HTML_FORMATTER_H
+
+#include "include/int_types.h"
+
+#include <deque>
+#include <iosfwd>
+#include <list>
+#include <vector>
+#include <sstream>
+#include <stdarg.h>
+#include <string>
+#include <map>
+
+#include "include/buffer.h"
+#include "Formatter.h"
+
+namespace ceph {
+  class HTMLFormatter : public XMLFormatter {
+  public:
+    explicit HTMLFormatter(bool pretty = false);
+    ~HTMLFormatter();
+    void reset();
+
+    virtual void set_status(int status, const char* status_name);
+    virtual void output_header();
+
+    void dump_unsigned(const char *name, uint64_t u);
+    void dump_int(const char *name, int64_t u);
+    void dump_float(const char *name, double d);
+    void dump_string(const char *name, const std::string& s);
+    std::ostream& dump_stream(const char *name);
+    void dump_format_va(const char *name, const char *ns, bool quoted, const char *fmt, va_list ap);
+
+    /* with attrs */
+    void dump_string_with_attrs(const char *name, const std::string& s, const FormatterAttrs& attrs);
+  private:
+	template <typename T> void dump_template(const char *name, T arg);
+
+    bool m_header_done;
+
+    int m_status;
+    const char* m_status_name;
+  };
+
+}
+
+#endif
diff --git a/src/common/HeartbeatMap.h b/src/common/HeartbeatMap.h
index 61c2f90..8ab5f64 100644
--- a/src/common/HeartbeatMap.h
+++ b/src/common/HeartbeatMap.h
@@ -46,7 +46,7 @@ struct heartbeat_handle_d {
   time_t grace, suicide_grace;
   std::list<heartbeat_handle_d*>::iterator list_item;
 
-  heartbeat_handle_d(const std::string& n)
+  explicit heartbeat_handle_d(const std::string& n)
     : name(n), grace(0), suicide_grace(0)
   { }
 };
@@ -74,7 +74,7 @@ class HeartbeatMap {
   // get the number of total workers
   int get_total_workers() const;
 
-  HeartbeatMap(CephContext *cct);
+  explicit HeartbeatMap(CephContext *cct);
   ~HeartbeatMap();
 
  private:
diff --git a/src/common/Initialize.h b/src/common/Initialize.h
index 35414d6..78ad5ec 100644
--- a/src/common/Initialize.h
+++ b/src/common/Initialize.h
@@ -69,7 +69,7 @@ class Initialize {
    *      function should normally contain an internal guard so that it
    *      only performs its initialization the first time it is invoked.
    */
-  Initialize(void (*func)()) {
+  explicit Initialize(void (*func)()) {
     (*func)();
   }
 
diff --git a/src/common/LogClient.cc b/src/common/LogClient.cc
index 756297b..5300c41 100644
--- a/src/common/LogClient.cc
+++ b/src/common/LogClient.cc
@@ -44,7 +44,12 @@ int parse_log_client_options(CephContext *cct,
 			     map<string,string> &log_to_monitors,
 			     map<string,string> &log_to_syslog,
 			     map<string,string> &log_channels,
-			     map<string,string> &log_prios)
+			     map<string,string> &log_prios,
+			     map<string,string> &log_to_graylog,
+			     map<string,string> &log_to_graylog_host,
+			     map<string,string> &log_to_graylog_port,
+			     uuid_d &fsid,
+			     string &host)
 {
   ostringstream oss;
 
@@ -75,6 +80,30 @@ int parse_log_client_options(CephContext *cct,
     lderr(cct) << __func__ << " error parsing 'clog_to_syslog_level'" << dendl;
     return r;
   }
+
+  r = get_conf_str_map_helper(cct->_conf->clog_to_graylog, oss,
+                              &log_to_graylog, CLOG_CONFIG_DEFAULT_KEY);
+  if (r < 0) {
+    lderr(cct) << __func__ << " error parsing 'clog_to_graylog'" << dendl;
+    return r;
+  }
+
+  r = get_conf_str_map_helper(cct->_conf->clog_to_graylog_host, oss,
+                              &log_to_graylog_host, CLOG_CONFIG_DEFAULT_KEY);
+  if (r < 0) {
+    lderr(cct) << __func__ << " error parsing 'clog_to_graylog_host'" << dendl;
+    return r;
+  }
+
+  r = get_conf_str_map_helper(cct->_conf->clog_to_graylog_port, oss,
+                              &log_to_graylog_port, CLOG_CONFIG_DEFAULT_KEY);
+  if (r < 0) {
+    lderr(cct) << __func__ << " error parsing 'clog_to_graylog_port'" << dendl;
+    return r;
+  }
+
+  fsid = cct->_conf->fsid;
+  host = cct->_conf->host;
   return 0;
 }
 
@@ -130,7 +159,12 @@ LogClientTemp::~LogClientTemp()
 void LogChannel::update_config(map<string,string> &log_to_monitors,
 			       map<string,string> &log_to_syslog,
 			       map<string,string> &log_channels,
-			       map<string,string> &log_prios)
+			       map<string,string> &log_prios,
+			       map<string,string> &log_to_graylog,
+			       map<string,string> &log_to_graylog_host,
+			       map<string,string> &log_to_graylog_port,
+			       uuid_d &fsid,
+			       string &host)
 {
   ldout(cct, 20) << __func__ << " log_to_monitors " << log_to_monitors
 		 << " log_to_syslog " << log_to_syslog
@@ -145,17 +179,43 @@ void LogChannel::update_config(map<string,string> &log_to_monitors,
 					   &CLOG_CONFIG_DEFAULT_KEY);
   string prio = get_str_map_key(log_prios, log_channel,
 				&CLOG_CONFIG_DEFAULT_KEY);
+  bool to_graylog = (get_str_map_key(log_to_graylog, log_channel,
+				     &CLOG_CONFIG_DEFAULT_KEY) == "true");
+  string graylog_host = get_str_map_key(log_to_graylog_host, log_channel,
+				       &CLOG_CONFIG_DEFAULT_KEY);
+  string graylog_port_str = get_str_map_key(log_to_graylog_port, log_channel,
+					    &CLOG_CONFIG_DEFAULT_KEY);
+  int graylog_port = atoi(graylog_port_str.c_str());
 
   set_log_to_monitors(to_monitors);
   set_log_to_syslog(to_syslog);
   set_syslog_facility(syslog_facility);
   set_log_prio(prio);
 
+  if (to_graylog && !graylog) { /* should but isn't */
+    graylog = ceph::log::Graylog::Ref(new ceph::log::Graylog("clog"));
+  } else if (!to_graylog && graylog) { /* shouldn't but is */
+    graylog.reset();
+  }
+
+  if (to_graylog && graylog) {
+    graylog->set_fsid(fsid);
+    graylog->set_hostname(host);
+  }
+
+  if (graylog && (!graylog_host.empty()) && (graylog_port != 0)) {
+    graylog->set_destination(graylog_host, graylog_port);
+  }
+
   ldout(cct, 10) << __func__
 		 << " to_monitors: " << (to_monitors ? "true" : "false")
 		 << " to_syslog: " << (to_syslog ? "true" : "false")
 		 << " syslog_facility: " << syslog_facility
-		 << " prio: " << prio << ")" << dendl;
+		 << " prio: " << prio
+		 << " to_graylog: " << (to_graylog ? "true" : "false")
+		 << " graylog_host: " << graylog_host
+		 << " graylog_port: " << graylog_port
+		 << ")" << dendl;
 }
 
 void LogChannel::do_log(clog_type prio, std::stringstream& ss)
@@ -189,6 +249,12 @@ void LogChannel::do_log(clog_type prio, const std::string& s)
     e.log_to_syslog(get_log_prio(), get_syslog_facility());
   }
 
+  // log to graylog?
+  if (do_log_to_graylog()) {
+    ldout(cct,0) << __func__ << " log to graylog"  << dendl;
+    graylog->log_log_entry(&e);
+  }
+
   // log to monitor?
   if (log_to_monitors) {
     parent->queue(e);
diff --git a/src/common/LogClient.h b/src/common/LogClient.h
index 15d131f..84682ea 100644
--- a/src/common/LogClient.h
+++ b/src/common/LogClient.h
@@ -1,4 +1,4 @@
-// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- 
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
 // vim: ts=8 sw=2 smarttab
 /*
  * Ceph - scalable distributed file system
@@ -7,9 +7,9 @@
  *
  * This is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
- * License version 2.1, as published by the Free Software 
+ * License version 2.1, as published by the Free Software
  * Foundation.  See file COPYING.
- * 
+ *
  */
 
 #ifndef CEPH_LOGCLIENT_H
@@ -17,6 +17,8 @@
 
 #include "common/LogEntry.h"
 #include "common/Mutex.h"
+#include "include/uuid.h"
+#include "common/Graylog.h"
 
 #include <iosfwd>
 #include <sstream>
@@ -35,7 +37,12 @@ int parse_log_client_options(CephContext *cct,
 			     map<string,string> &log_to_monitors,
 			     map<string,string> &log_to_syslog,
 			     map<string,string> &log_channels,
-			     map<string,string> &log_prios);
+			     map<string,string> &log_prios,
+			     map<string,string> &log_to_graylog,
+			     map<string,string> &log_to_graylog_host,
+			     map<string,string> &log_to_graylog_port,
+			     uuid_d &fsid,
+			     string &host);
 
 class LogClientTemp
 {
@@ -137,6 +144,10 @@ public:
   }
   bool must_log_to_monitors() { return log_to_monitors; }
 
+  bool do_log_to_graylog() {
+    return (graylog != nullptr);
+  }
+
   typedef shared_ptr<LogChannel> Ref;
 
   /**
@@ -147,7 +158,12 @@ public:
   void update_config(map<string,string> &log_to_monitors,
 		     map<string,string> &log_to_syslog,
 		     map<string,string> &log_channels,
-		     map<string,string> &log_prios);
+		     map<string,string> &log_prios,
+		     map<string,string> &log_to_graylog,
+		     map<string,string> &log_to_graylog_host,
+		     map<string,string> &log_to_graylog_port,
+		     uuid_d &fsid,
+		     string &host);
 
   void do_log(clog_type prio, std::stringstream& ss);
   void do_log(clog_type prio, const std::string& s);
@@ -161,6 +177,7 @@ private:
   std::string syslog_facility;
   bool log_to_syslog;
   bool log_to_monitors;
+  ceph::log::Graylog::Ref graylog;
 
 
   friend class LogClientTemp;
@@ -210,7 +227,7 @@ public:
   void shutdown() {
     channels.clear();
   }
-  
+
   version_t queue(LogEntry &entry);
 
 private:
diff --git a/src/common/Makefile.am b/src/common/Makefile.am
index 5ed2d3b..2851cf2 100644
--- a/src/common/Makefile.am
+++ b/src/common/Makefile.am
@@ -3,10 +3,12 @@ libcommon_internal_la_SOURCES = \
 	common/DecayCounter.cc \
 	common/LogClient.cc \
 	common/LogEntry.cc \
+	common/Graylog.cc \
 	common/PrebufferedStreambuf.cc \
 	common/SloppyCRCMap.cc \
 	common/BackTrace.cc \
 	common/perf_counters.cc \
+	common/mutex_debug.cc \
 	common/Mutex.cc \
 	common/OutputDataSocket.cc \
 	common/admin_socket.cc \
@@ -45,6 +47,7 @@ libcommon_internal_la_SOURCES = \
 	common/simple_spin.cc \
 	common/Thread.cc \
 	common/Formatter.cc \
+	common/HTMLFormatter.cc \
 	common/HeartbeatMap.cc \
 	common/config.cc \
 	common/utf8.c \
@@ -85,7 +88,7 @@ endif
 
 if LINUX
 libcommon_internal_la_SOURCES += \
-	common/linux_version.c 
+	common/linux_version.c
 endif
 
 if SOLARIS
@@ -160,7 +163,8 @@ LIBCOMMON_DEPS += \
 	$(LIBCOMPRESSOR) \
 	$(LIBMSG) $(LIBAUTH) \
 	$(LIBCRUSH) $(LIBJSON_SPIRIT) $(LIBLOG) $(LIBARCH) \
-	$(BOOST_RANDOM_LIBS)
+	$(BOOST_RANDOM_LIBS) \
+	-luuid
 
 if LINUX
 LIBCOMMON_DEPS += -lrt -lblkid
@@ -176,10 +180,13 @@ noinst_HEADERS += \
 	common/HeartbeatMap.h \
 	common/LogClient.h \
 	common/LogEntry.h \
+	common/Graylog.h \
 	common/Preforker.h \
 	common/SloppyCRCMap.h \
 	common/WorkQueue.h \
+	common/OpQueue.h \
 	common/PrioritizedQueue.h \
+	common/WeightedPriorityQueue.h \
 	common/ceph_argparse.h \
 	common/ceph_context.h \
 	common/xattr.h \
@@ -205,6 +212,7 @@ noinst_HEADERS += \
 	common/DecayCounter.h \
 	common/Finisher.h \
 	common/Formatter.h \
+	common/HTMLFormatter.h \
 	common/perf_counters.h \
 	common/OutputDataSocket.h \
 	common/admin_socket.h \
@@ -267,7 +275,10 @@ noinst_HEADERS += \
 	common/event_socket.h \
 	common/PluginRegistry.h \
 	common/ceph_time.h \
-	common/ceph_timer.h
+	common/ceph_timer.h \
+	common/align.h \
+	common/mutex_debug.h \
+	common/shunique_lock.h
 
 if ENABLE_XIO
 noinst_HEADERS += \
diff --git a/src/common/MemoryModel.h b/src/common/MemoryModel.h
index 1b8d185..0d68dc5 100644
--- a/src/common/MemoryModel.h
+++ b/src/common/MemoryModel.h
@@ -43,7 +43,7 @@ private:
   void _sample(snap *p);
 
 public:
-  MemoryModel(CephContext *cct);
+  explicit MemoryModel(CephContext *cct);
   void sample(snap *p = 0) {
     _sample(&last);
     if (p)
diff --git a/src/common/Mutex.h b/src/common/Mutex.h
index 6a4e6b3..09e8f8c 100644
--- a/src/common/Mutex.h
+++ b/src/common/Mutex.h
@@ -111,7 +111,7 @@ public:
     Mutex &mutex;
 
   public:
-    Locker(Mutex& m) : mutex(m) {
+    explicit Locker(Mutex& m) : mutex(m) {
       mutex.Lock();
     }
     ~Locker() {
diff --git a/src/common/OpQueue.h b/src/common/OpQueue.h
new file mode 100644
index 0000000..34adc02
--- /dev/null
+++ b/src/common/OpQueue.h
@@ -0,0 +1,63 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2004-2006 Sage Weil <sage at newdream.net>
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation.  See file COPYING.
+ *
+ */
+
+#ifndef OP_QUEUE_H
+#define OP_QUEUE_H
+
+#include "include/msgr.h"
+
+#include <list>
+#include <functional>
+
+namespace ceph {
+  class Formatter;
+}
+
+/**
+ * Abstract class for all Op Queues
+ *
+ * In order to provide optimized code, be sure to declare all
+ * virutal functions as final in the derived class.
+ */
+
+template <typename T, typename K>
+class OpQueue {
+
+  public:
+    // How many Ops are in the queue
+    virtual unsigned length() const = 0;
+    // Ops will be removed and placed in *removed if f is true
+    virtual void remove_by_filter(
+	std::function<bool (T)> f, std::list<T> *removed) = 0;
+    // Ops of this priority should be deleted immediately
+    virtual void remove_by_class(K k, std::list<T> *out) = 0;
+    // Enqueue op in the back of the strict queue
+    virtual void enqueue_strict(K cl, unsigned priority, T item) = 0;
+    // Enqueue op in the front of the strict queue
+    virtual void enqueue_strict_front(K cl, unsigned priority, T item) = 0;
+    // Enqueue op in the back of the regular queue
+    virtual void enqueue(K cl, unsigned priority, unsigned cost, T item) = 0;
+    // Enqueue the op in the front of the regular queue
+    virtual void enqueue_front(K cl, unsigned priority, unsigned cost, T item) = 0;
+    // Returns if the queue is empty
+    virtual bool empty() const = 0;
+    // Return an op to be dispatch
+    virtual T dequeue() = 0;
+    // Formatted output of the queue
+    virtual void dump(ceph::Formatter *f) const = 0;
+    // Don't leak resources on destruction
+    virtual ~OpQueue() {}; 
+};
+
+#endif
diff --git a/src/common/PluginRegistry.h b/src/common/PluginRegistry.h
index 6757ce1..ab9abc1 100644
--- a/src/common/PluginRegistry.h
+++ b/src/common/PluginRegistry.h
@@ -39,7 +39,7 @@ namespace ceph {
     void *library;
     CephContext *cct;
 
-    Plugin(CephContext *cct) : cct(cct) {}
+    explicit Plugin(CephContext *cct) : cct(cct) {}
     virtual ~Plugin() {}
   };
 
@@ -51,7 +51,7 @@ namespace ceph {
     bool disable_dlclose;
     std::map<std::string,std::map<std::string,Plugin*> > plugins;
 
-    PluginRegistry(CephContext *cct);
+    explicit PluginRegistry(CephContext *cct);
     ~PluginRegistry();
 
     int add(const std::string& type, const std::string& name,
diff --git a/src/common/PrioritizedQueue.h b/src/common/PrioritizedQueue.h
index 5ae94a5..8be6ca8 100644
--- a/src/common/PrioritizedQueue.h
+++ b/src/common/PrioritizedQueue.h
@@ -15,13 +15,11 @@
 #ifndef PRIORITY_QUEUE_H
 #define PRIORITY_QUEUE_H
 
-#include "common/Mutex.h"
 #include "common/Formatter.h"
+#include "common/OpQueue.h"
 
 #include <map>
-#include <utility>
 #include <list>
-#include <algorithm>
 
 /**
  * Manages queue for normal and strict priority items
@@ -44,15 +42,15 @@
  * to provide fairness for different clients.
  */
 template <typename T, typename K>
-class PrioritizedQueue {
+class PrioritizedQueue : public OpQueue <T, K> {
   int64_t total_priority;
   int64_t max_tokens_per_subqueue;
   int64_t min_cost;
 
   typedef std::list<std::pair<unsigned, T> > ListPairs;
-  template <class F>
   static unsigned filter_list_pairs(
-    ListPairs *l, F f,
+    ListPairs *l,
+    std::function<bool (T)> f,
     std::list<T> *out) {
     unsigned ret = 0;
     if (out) {
@@ -106,14 +104,16 @@ class PrioritizedQueue {
     }
     void put_tokens(unsigned t) {
       tokens += t;
-      if (tokens > max_tokens)
+      if (tokens > max_tokens) {
 	tokens = max_tokens;
+      }
     }
     void take_tokens(unsigned t) {
-      if (tokens > t)
+      if (tokens > t) {
 	tokens -= t;
-      else
+      } else {
 	tokens = 0;
+      }
     }
     void enqueue(K cl, unsigned cost, T item) {
       q[cl].push_back(std::make_pair(cost, item));
@@ -136,12 +136,14 @@ class PrioritizedQueue {
       assert(!(q.empty()));
       assert(cur != q.end());
       cur->second.pop_front();
-      if (cur->second.empty())
+      if (cur->second.empty()) {
 	q.erase(cur++);
-      else
+      } else {
 	++cur;
-      if (cur == q.end())
+      }
+      if (cur == q.end()) {
 	cur = q.begin();
+      }
       size--;
     }
     unsigned length() const {
@@ -151,15 +153,17 @@ class PrioritizedQueue {
     bool empty() const {
       return q.empty();
     }
-    template <class F>
-    void remove_by_filter(F f, std::list<T> *out) {
+    void remove_by_filter(
+	std::function<bool (T)> f,
+       	std::list<T> *out) {
       for (typename Classes::iterator i = q.begin();
 	   i != q.end();
 	   ) {
 	size -= filter_list_pairs(&(i->second), f, out);
 	if (i->second.empty()) {
-	  if (cur == i)
+	  if (cur == i) {
 	    ++cur;
+	  }
 	  q.erase(i++);
 	} else {
 	  ++i;
@@ -170,11 +174,13 @@ class PrioritizedQueue {
     }
     void remove_by_class(K k, std::list<T> *out) {
       typename Classes::iterator i = q.find(k);
-      if (i == q.end())
+      if (i == q.end()) {
 	return;
+      }
       size -= i->second.size();
-      if (i == cur)
+      if (i == cur) {
 	++cur;
+      }
       if (out) {
 	for (typename ListPairs::reverse_iterator j =
 	       i->second.rbegin();
@@ -184,17 +190,19 @@ class PrioritizedQueue {
 	}
       }
       q.erase(i);
-      if (cur == q.end())
+      if (cur == q.end()) {
 	cur = q.begin();
+      }
     }
 
-    void dump(Formatter *f) const {
+    void dump(ceph::Formatter *f) const {
       f->dump_int("tokens", tokens);
       f->dump_int("max_tokens", max_tokens);
       f->dump_int("size", size);
       f->dump_int("num_keys", q.size());
-      if (!empty())
+      if (!empty()) {
 	f->dump_int("first_item_cost", front().first);
+      }
     }
   };
 
@@ -204,8 +212,9 @@ class PrioritizedQueue {
 
   SubQueue *create_queue(unsigned priority) {
     typename SubQueues::iterator p = queue.find(priority);
-    if (p != queue.end())
+    if (p != queue.end()) {
       return &p->second;
+    }
     total_priority += priority;
     SubQueue *sq = &queue[priority];
     sq->set_max_tokens(max_tokens_per_subqueue);
@@ -220,8 +229,9 @@ class PrioritizedQueue {
   }
 
   void distribute_tokens(unsigned cost) {
-    if (total_priority == 0)
+    if (total_priority == 0) {
       return;
+    }
     for (typename SubQueues::iterator i = queue.begin();
 	 i != queue.end();
 	 ++i) {
@@ -236,7 +246,7 @@ public:
       min_cost(min_c)
   {}
 
-  unsigned length() const {
+  unsigned length() const override final {
     unsigned total = 0;
     for (typename SubQueues::const_iterator i = queue.begin();
 	 i != queue.end();
@@ -253,8 +263,9 @@ public:
     return total;
   }
 
-  template <class F>
-  void remove_by_filter(F f, std::list<T> *removed = 0) {
+  void remove_by_filter(
+      std::function<bool (T)> f,
+      std::list<T> *removed = 0) override final {
     for (typename SubQueues::iterator i = queue.begin();
 	 i != queue.end();
 	 ) {
@@ -280,7 +291,7 @@ public:
     }
   }
 
-  void remove_by_class(K k, std::list<T> *out = 0) {
+  void remove_by_class(K k, std::list<T> *out = 0) override final {
     for (typename SubQueues::iterator i = queue.begin();
 	 i != queue.end();
 	 ) {
@@ -305,15 +316,15 @@ public:
     }
   }
 
-  void enqueue_strict(K cl, unsigned priority, T item) {
+  void enqueue_strict(K cl, unsigned priority, T item) override final {
     high_queue[priority].enqueue(cl, 0, item);
   }
 
-  void enqueue_strict_front(K cl, unsigned priority, T item) {
+  void enqueue_strict_front(K cl, unsigned priority, T item) override final {
     high_queue[priority].enqueue_front(cl, 0, item);
   }
 
-  void enqueue(K cl, unsigned priority, unsigned cost, T item) {
+  void enqueue(K cl, unsigned priority, unsigned cost, T item) override final {
     if (cost < min_cost)
       cost = min_cost;
     if (cost > max_tokens_per_subqueue)
@@ -321,7 +332,7 @@ public:
     create_queue(priority)->enqueue(cl, cost, item);
   }
 
-  void enqueue_front(K cl, unsigned priority, unsigned cost, T item) {
+  void enqueue_front(K cl, unsigned priority, unsigned cost, T item) override final {
     if (cost < min_cost)
       cost = min_cost;
     if (cost > max_tokens_per_subqueue)
@@ -329,20 +340,21 @@ public:
     create_queue(priority)->enqueue_front(cl, cost, item);
   }
 
-  bool empty() const {
+  bool empty() const override final {
     assert(total_priority >= 0);
     assert((total_priority == 0) || !(queue.empty()));
     return queue.empty() && high_queue.empty();
   }
 
-  T dequeue() {
+  T dequeue() override final {
     assert(!empty());
 
     if (!(high_queue.empty())) {
       T ret = high_queue.rbegin()->second.front().second;
       high_queue.rbegin()->second.pop_front();
-      if (high_queue.rbegin()->second.empty())
+      if (high_queue.rbegin()->second.empty()) {
 	high_queue.erase(high_queue.rbegin()->first);
+      }
       return ret;
     }
 
@@ -358,8 +370,9 @@ public:
 	unsigned cost = i->second.front().first;
 	i->second.take_tokens(cost);
 	i->second.pop_front();
-	if (i->second.empty())
+	if (i->second.empty()) {
 	  remove_queue(i->first);
+	}
 	distribute_tokens(cost);
 	return ret;
       }
@@ -370,13 +383,14 @@ public:
     T ret = queue.rbegin()->second.front().second;
     unsigned cost = queue.rbegin()->second.front().first;
     queue.rbegin()->second.pop_front();
-    if (queue.rbegin()->second.empty())
+    if (queue.rbegin()->second.empty()) {
       remove_queue(queue.rbegin()->first);
+    }
     distribute_tokens(cost);
     return ret;
   }
 
-  void dump(Formatter *f) const {
+  void dump(ceph::Formatter *f) const override final {
     f->dump_int("total_priority", total_priority);
     f->dump_int("max_tokens_per_subqueue", max_tokens_per_subqueue);
     f->dump_int("min_cost", min_cost);
diff --git a/src/common/RWLock.h b/src/common/RWLock.h
index 282b69b..84aba2a 100644
--- a/src/common/RWLock.h
+++ b/src/common/RWLock.h
@@ -30,7 +30,7 @@ class RWLock
   std::string name;
   mutable int id;
   mutable atomic_t nrlock, nwlock;
-  bool track;
+  bool track, lockdep;
 
   std::string unique_name(const char* name) const;
 
@@ -38,12 +38,14 @@ public:
   RWLock(const RWLock& other);
   const RWLock& operator=(const RWLock& other);
 
-  RWLock(const std::string &n, bool track_lock=true) : name(n), id(-1), nrlock(0), nwlock(0), track(track_lock) {
+  RWLock(const std::string &n, bool track_lock=true, bool ld=true)
+    : name(n), id(-1), nrlock(0), nwlock(0), track(track_lock),
+      lockdep(ld) {
     pthread_rwlock_init(&L, NULL);
     ANNOTATE_BENIGN_RACE_SIZED(&id, sizeof(id), "RWLock lockdep id");
     ANNOTATE_BENIGN_RACE_SIZED(&nrlock, sizeof(nrlock), "RWlock nrlock");
     ANNOTATE_BENIGN_RACE_SIZED(&nwlock, sizeof(nwlock), "RWlock nwlock");
-    if (g_lockdep) id = lockdep_register(name.c_str());
+    if (lockdep && g_lockdep) id = lockdep_register(name.c_str());
   }
 
   bool is_locked() const {
@@ -61,7 +63,7 @@ public:
     if (track)
       assert(!is_locked());
     pthread_rwlock_destroy(&L);
-    if (g_lockdep) {
+    if (lockdep && g_lockdep) {
       lockdep_unregister(id);
     }
   }
@@ -75,17 +77,18 @@ public:
         nrlock.dec();
       }
     }
-    if (lockdep && g_lockdep) id = lockdep_will_unlock(name.c_str(), id);
+    if (lockdep && this->lockdep && g_lockdep)
+      id = lockdep_will_unlock(name.c_str(), id);
     int r = pthread_rwlock_unlock(&L);
     assert(r == 0);
   }
 
   // read
   void get_read() const {
-    if (g_lockdep) id = lockdep_will_lock(name.c_str(), id);
+    if (lockdep && g_lockdep) id = lockdep_will_lock(name.c_str(), id);
     int r = pthread_rwlock_rdlock(&L);
     assert(r == 0);
-    if (g_lockdep) id = lockdep_locked(name.c_str(), id);
+    if (lockdep && g_lockdep) id = lockdep_locked(name.c_str(), id);
     if (track)
       nrlock.inc();
   }
@@ -93,7 +96,7 @@ public:
     if (pthread_rwlock_tryrdlock(&L) == 0) {
       if (track)
          nrlock.inc();
-      if (g_lockdep) id = lockdep_locked(name.c_str(), id);
+      if (lockdep && g_lockdep) id = lockdep_locked(name.c_str(), id);
       return true;
     }
     return false;
@@ -104,17 +107,20 @@ public:
 
   // write
   void get_write(bool lockdep=true) {
-    if (lockdep && g_lockdep) id = lockdep_will_lock(name.c_str(), id);
+    if (lockdep && this->lockdep && g_lockdep)
+      id = lockdep_will_lock(name.c_str(), id);
     int r = pthread_rwlock_wrlock(&L);
     assert(r == 0);
-    if (g_lockdep) id = lockdep_locked(name.c_str(), id);
+    if (lockdep && this->lockdep && g_lockdep)
+      id = lockdep_locked(name.c_str(), id);
     if (track)
       nwlock.inc();
 
   }
   bool try_get_write(bool lockdep=true) {
     if (pthread_rwlock_trywrlock(&L) == 0) {
-      if (lockdep && g_lockdep) id = lockdep_locked(name.c_str(), id);
+      if (lockdep && this->lockdep && g_lockdep)
+	id = lockdep_locked(name.c_str(), id);
       if (track)
          nwlock.inc();
       return true;
@@ -140,7 +146,7 @@ public:
     bool locked;
 
   public:
-    RLocker(const RWLock& lock) : m_lock(lock) {
+   explicit  RLocker(const RWLock& lock) : m_lock(lock) {
       m_lock.get_read();
       locked = true;
     }
@@ -162,7 +168,7 @@ public:
     bool locked;
 
   public:
-    WLocker(RWLock& lock) : m_lock(lock) {
+    explicit WLocker(RWLock& lock) : m_lock(lock) {
       m_lock.get_write();
       locked = true;
     }
@@ -192,7 +198,7 @@ public:
     LockState state;
 
   public:
-    Context(RWLock& l) : lock(l) {}
+    explicit Context(RWLock& l) : lock(l), state(Untaken) {}
     Context(RWLock& l, LockState s) : lock(l), state(s) {}
 
     void get_write() {
diff --git a/src/common/SloppyCRCMap.h b/src/common/SloppyCRCMap.h
index c07b4d9..34642a3 100644
--- a/src/common/SloppyCRCMap.h
+++ b/src/common/SloppyCRCMap.h
@@ -34,9 +34,7 @@ public:
     //zero_crc = ceph_crc32c(0xffffffff, NULL, block_size);
     if (b) {
       bufferlist bl;
-      bufferptr bp(block_size);
-      bp.zero();
-      bl.append(bp);
+      bl.append_zero(block_size);
       zero_crc = bl.crc32c(crc_iv);
     } else {
       zero_crc = crc_iv;
diff --git a/src/common/Timer.cc b/src/common/Timer.cc
index 1160541..b71511d 100644
--- a/src/common/Timer.cc
+++ b/src/common/Timer.cc
@@ -33,7 +33,7 @@
 class SafeTimerThread : public Thread {
   SafeTimer *parent;
 public:
-  SafeTimerThread(SafeTimer *s) : parent(s) {}
+  explicit SafeTimerThread(SafeTimer *s) : parent(s) {}
   void *entry() {
     parent->timer_thread();
     return NULL;
diff --git a/src/common/TracepointProvider.h b/src/common/TracepointProvider.h
index 447e2de..9f5ec11 100644
--- a/src/common/TracepointProvider.h
+++ b/src/common/TracepointProvider.h
@@ -45,7 +45,7 @@ public:
   template <const Traits &traits>
   class TypedSingleton : public Singleton {
   public:
-    TypedSingleton(CephContext *cct)
+    explicit TypedSingleton(CephContext *cct)
       : Singleton(cct, traits.library, traits.config_key) {
     }
   };
diff --git a/src/common/TrackedOp.cc b/src/common/TrackedOp.cc
index 1be8f5c..f76587e 100644
--- a/src/common/TrackedOp.cc
+++ b/src/common/TrackedOp.cc
@@ -93,7 +93,7 @@ void OpTracker::dump_historic_ops(Formatter *f)
   history.dump_ops(now, f);
 }
 
-void OpTracker::dump_ops_in_flight(Formatter *f)
+void OpTracker::dump_ops_in_flight(Formatter *f, bool print_only_blocked)
 {
   f->open_object_section("ops_in_flight"); // overall dump
   uint64_t total_ops_in_flight = 0;
@@ -102,8 +102,10 @@ void OpTracker::dump_ops_in_flight(Formatter *f)
   for (uint32_t i = 0; i < num_optracker_shards; i++) {
     ShardedTrackingData* sdata = sharded_in_flight_list[i];
     assert(NULL != sdata); 
-    Mutex::Locker locker(sdata->ops_in_flight_lock_sharded);    
+    Mutex::Locker locker(sdata->ops_in_flight_lock_sharded);
     for (xlist<TrackedOp*>::iterator p = sdata->ops_in_flight_sharded.begin(); !p.end(); ++p) {
+      if (print_only_blocked && (now - (*p)->get_initiated() <= complaint_time))
+	break;
       f->open_object_section("op");
       (*p)->dump(now, f);
       f->close_section(); // this TrackedOp
@@ -111,7 +113,11 @@ void OpTracker::dump_ops_in_flight(Formatter *f)
     }
   }
   f->close_section(); // list of TrackedOps
-  f->dump_int("num_ops", total_ops_in_flight);
+  if (print_only_blocked) {
+    f->dump_float("complaint_time", complaint_time);
+    f->dump_int("num_blocked_ops", total_ops_in_flight);
+  } else
+    f->dump_int("num_ops", total_ops_in_flight);
   f->close_section(); // overall dump
 }
 
diff --git a/src/common/TrackedOp.h b/src/common/TrackedOp.h
index 89b990a..c1d8eb6 100644
--- a/src/common/TrackedOp.h
+++ b/src/common/TrackedOp.h
@@ -56,7 +56,7 @@ class OpTracker {
   class RemoveOnDelete {
     OpTracker *tracker;
   public:
-    RemoveOnDelete(OpTracker *tracker) : tracker(tracker) {}
+    explicit RemoveOnDelete(OpTracker *tracker) : tracker(tracker) {}
     void operator()(TrackedOp *op);
   };
   friend class RemoveOnDelete;
@@ -65,7 +65,7 @@ class OpTracker {
   struct ShardedTrackingData {
     Mutex ops_in_flight_lock_sharded;
     xlist<TrackedOp *> ops_in_flight_sharded;
-    ShardedTrackingData(string lock_name):
+    explicit ShardedTrackingData(string lock_name):
         ops_in_flight_lock_sharded(lock_name.c_str()) {}
   };
   vector<ShardedTrackingData*> sharded_in_flight_list;
@@ -104,7 +104,7 @@ public:
     RWLock::WLocker l(lock);
     tracking_enabled = enable;
   }
-  void dump_ops_in_flight(Formatter *f);
+  void dump_ops_in_flight(Formatter *f, bool print_only_blocked=false);
   void dump_historic_ops(Formatter *f);
   void register_inflight_op(xlist<TrackedOp*>::item *i);
   void unregister_inflight_op(TrackedOp *i);
diff --git a/src/common/WeightedPriorityQueue.h b/src/common/WeightedPriorityQueue.h
new file mode 100644
index 0000000..6a77a6c
--- /dev/null
+++ b/src/common/WeightedPriorityQueue.h
@@ -0,0 +1,359 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2004-2006 Sage Weil <sage at newdream.net>
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation.  See file COPYING.
+ *
+ */
+
+#ifndef WP_QUEUE_H
+#define WP_QUEUE_H
+
+#include "common/Formatter.h"
+#include "common/OpQueue.h"
+
+#include <map>
+#include <list>
+
+/**
+ * Weighted Priority queue with strict priority queue
+ *
+ * This queue attempts to be fair to all classes of
+ * operations but is also weighted so that higher classes
+ * get more share of the operations.
+ */
+
+template <typename T, typename K>
+class WeightedPriorityQueue : public OpQueue <T, K> {
+  int64_t total_priority;
+
+  typedef std::list<std::pair<unsigned, T>> ListPairs;
+  static unsigned filter_list_pairs(
+    ListPairs *l, std::function<bool (T)> f,
+    std::list<T> *out) {
+    unsigned ret = 0;
+    if (out) {
+      for (typename ListPairs::reverse_iterator i = l->rbegin();
+	   i != l->rend();
+	   ++i) {
+	if (f(i->second)) {
+	  out->push_front(i->second);
+	}
+      }
+    }
+    for (typename ListPairs::iterator i = l->begin();
+	 i != l->end(); ) {
+      if (f(i->second)) {
+	l->erase(i++);
+	++ret;
+      } else {
+	++i;
+      }
+    }
+    return ret;
+  }
+
+  struct SubQueue {
+  private:
+    typedef std::map<K, ListPairs> Classes;
+    Classes q;
+    typename Classes::iterator cur;
+    unsigned q_size;
+  public:
+    SubQueue(const SubQueue &other)
+      : q(other.q),
+	cur(q.begin()),
+	q_size(0) {}
+    SubQueue()
+      :	cur(q.begin()),
+	q_size(0) {}
+    void enqueue_front(K cl, unsigned cost, T item) {
+      q[cl].push_front(std::make_pair(cost, item));
+      if (cur == q.end()) {
+	cur = q.begin();
+      }
+      ++q_size;
+    }
+    void enqueue(K cl, unsigned cost, T item) {
+      q[cl].push_back(std::make_pair(cost, item));
+      if (cur == q.end()) {
+	cur = q.begin();
+      }
+      ++q_size;
+    }
+    std::pair<unsigned, T> front() const {
+      assert(!q.empty());
+      assert(cur != q.end());
+      assert(!cur->second.empty());
+      return cur->second.front();
+    }
+    void pop_front() {
+      assert(!q.empty());
+      assert(cur != q.end());
+      assert(!cur->second.empty());
+      cur->second.pop_front();
+      if (cur->second.empty()) {
+	cur = q.erase(cur);
+      } else {
+	++cur;
+      }
+      if (cur == q.end()) {
+	cur = q.begin();
+      }
+      --q_size;
+    }
+    unsigned size() const {
+      return q_size;
+    }
+    bool empty() const {
+      return (q_size == 0);
+    }
+    unsigned remove_by_filter(std::function<bool (T)> f, std::list<T> *out) {
+      unsigned count = 0;
+      for (typename Classes::iterator i = q.begin();
+	   i != q.end(); ) {
+	count += filter_list_pairs(&(i->second), f, out);
+	if (i->second.empty()) {
+	  if (cur == i) {
+	    ++cur;
+	  }
+	  q.erase(i++);
+	} else {
+	  ++i;
+	}
+      }
+      if (cur == q.end()) {
+	cur = q.begin();
+      }
+      q_size -= count;
+      return count;
+    }
+    unsigned remove_by_class(K k, std::list<T> *out) {
+      typename Classes::iterator i = q.find(k);
+      if (i == q.end()) {
+	return 0;
+      }
+      unsigned count = i->second.size();
+      q_size -= count;
+      if (out) {
+	for (typename ListPairs::reverse_iterator j =
+	       i->second.rbegin();
+	     j != i->second.rend();
+	     ++j) {
+	  out->push_front(j->second);
+	}
+      }
+      if (i == cur) {
+	++cur;
+      }
+      q.erase(i);
+      if (cur == q.end()) {
+	cur = q.begin();
+      }
+      return count;
+    }
+
+    void dump(ceph::Formatter *f) const {
+      f->dump_int("num_keys", q.size());
+      if (!empty()) {
+	f->dump_int("first_item_cost", front().first);
+      }
+    }
+  };
+
+  unsigned high_size, wrr_size;
+  unsigned max_cost;
+
+  typedef std::map<unsigned, SubQueue> SubQueues;
+  SubQueues high_queue;
+  SubQueues queue;
+  typename SubQueues::reverse_iterator dq;
+
+  SubQueue *create_queue(unsigned priority) {
+    typename SubQueues::iterator p = queue.find(priority);
+    if (p != queue.end()) {
+      return &p->second;
+    }
+    total_priority += priority;
+    SubQueue *sq = &queue[priority];
+    return sq;
+  }
+
+  void remove_queue(unsigned priority) {
+    assert(queue.count(priority));
+    dq = (typename SubQueues::reverse_iterator) queue.erase(queue.find(priority));
+    if (dq == queue.rend()) {
+      dq = queue.rbegin();
+    }
+    total_priority -= priority;
+    assert(total_priority >= 0);
+  }
+
+public:
+  WeightedPriorityQueue(unsigned max_per, unsigned min_c)
+    : total_priority(0),
+      high_size(0),
+      wrr_size(0),
+      max_cost(0),
+      dq(queue.rbegin())
+  {
+    srand(time(0));
+  }
+
+  unsigned length() const override final {
+    return high_size + wrr_size;
+  }
+
+  void remove_by_filter(
+      std::function<bool (T)> f, std::list<T> *removed = 0) override final {
+    for (typename SubQueues::iterator i = queue.begin();
+	 i != queue.end(); ++i) {
+      wrr_size -= i->second.remove_by_filter(f, removed);
+      unsigned priority = i->first;
+      if (i->second.empty()) {
+	remove_queue(priority);
+      }
+    }
+    for (typename SubQueues::iterator i = high_queue.begin();
+	 i != high_queue.end();
+	 ) {
+      high_size -= i->second.remove_by_filter(f, removed);
+      if (i->second.empty()) {
+	high_queue.erase(i++);
+      } else {
+	++i;
+      }
+    }
+  }
+
+  void remove_by_class(K k, std::list<T> *out = 0) override final {
+    for (typename SubQueues::iterator i = queue.begin();
+	 i != queue.end(); ++i) {
+      wrr_size -= i->second.remove_by_class(k, out);
+      unsigned priority = i->first;
+      if (i->second.empty()) {
+	remove_queue(priority);
+      }
+    }
+    for (typename SubQueues::iterator i = high_queue.begin();
+	 i != high_queue.end();
+	 ) {
+      high_size -= i->second.remove_by_class(k, out);
+      if (i->second.empty()) {
+	high_queue.erase(i++);
+      } else {
+	++i;
+      }
+    }
+  }
+
+  void enqueue_strict(K cl, unsigned priority, T item) override final {
+    high_queue[priority].enqueue(cl, 0, item);
+    ++high_size;
+  }
+
+  void enqueue_strict_front(K cl, unsigned priority, T item) override final {
+    high_queue[priority].enqueue_front(cl, 0, item);
+    ++high_size;
+  }
+
+  void enqueue(K cl, unsigned priority, unsigned cost, T item) override final {
+    if (cost > max_cost) {
+      max_cost = cost;
+    }
+    create_queue(priority)->enqueue(cl, cost, item);
+    ++wrr_size;
+  }
+
+  void enqueue_front(K cl, unsigned priority, unsigned cost, T item) override final {
+    if (cost > max_cost) {
+      max_cost = cost;
+    }
+    create_queue(priority)->enqueue_front(cl, cost, item);
+    ++wrr_size;
+  }
+
+  bool empty() const override final {
+    assert(total_priority >= 0);
+    assert((total_priority == 0) || !queue.empty());
+    return (high_size + wrr_size  == 0) ? true : false;
+  }
+
+  T dequeue() override final {
+    assert(!empty());
+
+    if (!high_queue.empty()) {
+      T ret = high_queue.rbegin()->second.front().second;
+      high_queue.rbegin()->second.pop_front();
+      if (high_queue.rbegin()->second.empty()) {
+	high_queue.erase(high_queue.rbegin()->first);
+      }
+      --high_size;
+      return ret;
+    }
+    // If there is more than one priority, choose one to run.
+    if (dq->second.size() != wrr_size) {
+      while (true) {
+	// Pick a new priority out of the total priority.
+	unsigned prio = rand() % total_priority;
+	typename SubQueues::iterator i = queue.begin();
+	unsigned tp = i->first;
+	// Find the priority coresponding to the picked number.
+	// Add low priorities to high priorities until the picked number
+	// is less than the total and try to dequeue that priority.
+	while (prio > tp) {
+	  ++i;
+	  tp += i->first;
+	}
+	dq = (typename SubQueues::reverse_iterator) ++i;
+	// Flip a coin to see if this priority gets to run based on cost.
+	// The next op's cost is multiplied by .9 and subtracted from the
+	// max cost seen. Ops with lower costs will have a larger value
+	// and allow them to be selected easier than ops with high costs.
+	if (max_cost == 0 || rand() % max_cost <=
+	    (max_cost - ((dq->second.front().first * 9) / 10))){
+	  break;
+	}
+      }
+    }
+    T ret = dq->second.front().second;
+    dq->second.pop_front();
+    if (dq->second.empty()) {
+      remove_queue(dq->first);
+    }
+    --wrr_size;
+    return ret;
+  }
+
+  void dump(ceph::Formatter *f) const {
+    f->dump_int("total_priority", total_priority);
+    f->open_array_section("high_queues");
+    for (typename SubQueues::const_iterator p = high_queue.begin();
+	 p != high_queue.end();
+	 ++p) {
+      f->open_object_section("subqueue");
+      f->dump_int("priority", p->first);
+      p->second.dump(f);
+      f->close_section();
+    }
+    f->close_section();
+    f->open_array_section("queues");
+    for (typename SubQueues::const_iterator p = queue.begin();
+	 p != queue.end();
+	 ++p) {
+      f->open_object_section("subqueue");
+      f->dump_int("priority", p->first);
+      p->second.dump(f);
+      f->close_section();
+    }
+    f->close_section();
+  }
+};
+
+#endif
diff --git a/src/common/WorkQueue.h b/src/common/WorkQueue.h
index 67c1a87..5ce6952 100644
--- a/src/common/WorkQueue.h
+++ b/src/common/WorkQueue.h
@@ -126,10 +126,7 @@ public:
     }
 
   protected:
-    virtual void _process(const list<T*> &) { assert(0); }
-    virtual void _process(const list<T*> &items, TPHandle &handle) {
-      _process(items);
-    }
+    virtual void _process(const list<T*> &items, TPHandle &handle) = 0;
 
   public:
     BatchWorkQueue(string n, time_t ti, time_t sti, ThreadPool* p)
@@ -257,10 +254,7 @@ public:
     void unlock() {
       pool->unlock();
     }
-    virtual void _process(U) { assert(0); }
-    virtual void _process(U u, TPHandle &) {
-      _process(u);
-    }
+    virtual void _process(U u, TPHandle &) = 0;
   };
 
   /** @brief Template by-pointer work queue.
@@ -293,10 +287,7 @@ public:
 
   protected:
     /// Process a work item. Called from the worker threads.
-    virtual void _process(T *t) { assert(0); }
-    virtual void _process(T *t, TPHandle &) {
-      _process(t);
-    }
+    virtual void _process(T *t, TPHandle &) = 0;
 
   public:
     WorkQueue(string n, time_t ti, time_t sti, ThreadPool* p) : WorkQueue_(n, ti, sti), pool(p) {
@@ -443,6 +434,7 @@ private:
   // threads
   struct WorkThread : public Thread {
     ThreadPool *pool;
+    // cppcheck-suppress noExplicitConstructor
     WorkThread(ThreadPool *p) : pool(p) {}
     void *entry() {
       pool->worker(this);
@@ -555,8 +547,8 @@ public:
     _queue.pop_front();
     return c;
   }
-  using ThreadPool::WorkQueueVal<GenContext<ThreadPool::TPHandle&>*>::_process;
-  void _process(GenContext<ThreadPool::TPHandle&> *c, ThreadPool::TPHandle &tp) {
+  void _process(GenContext<ThreadPool::TPHandle&> *c,
+		ThreadPool::TPHandle &tp) override {
     c->complete(tp);
   }
 };
diff --git a/src/common/admin_socket.cc b/src/common/admin_socket.cc
index 1d51ef5..aa0146f 100644
--- a/src/common/admin_socket.cc
+++ b/src/common/admin_socket.cc
@@ -473,7 +473,7 @@ public:
 class HelpHook : public AdminSocketHook {
   AdminSocket *m_as;
 public:
-  HelpHook(AdminSocket *as) : m_as(as) {}
+  explicit HelpHook(AdminSocket *as) : m_as(as) {}
   bool call(string command, cmdmap_t &cmdmap, string format, bufferlist& out) {
     Formatter *f = Formatter::create(format, "json-pretty", "json-pretty");
     f->open_object_section("help");
@@ -495,7 +495,7 @@ public:
 class GetdescsHook : public AdminSocketHook {
   AdminSocket *m_as;
 public:
-  GetdescsHook(AdminSocket *as) : m_as(as) {}
+  explicit GetdescsHook(AdminSocket *as) : m_as(as) {}
   bool call(string command, cmdmap_t &cmdmap, string format, bufferlist& out) {
     int cmdnum = 0;
     JSONFormatter jf(false);
diff --git a/src/common/align.h b/src/common/align.h
new file mode 100644
index 0000000..b5c25b9
--- /dev/null
+++ b/src/common/align.h
@@ -0,0 +1,30 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+/*
+ * Ceph - scalable distributed file system
+  *
+ * Copyright (C) 2015 XSky <haomai at xsky.com>
+ *
+ * Author: Haomai Wang <haomaiwang at gmail.com>
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation.  See file COPYING.
+ *
+ */
+
+#ifndef CEPH_COMMON_ALIGN_H
+#define CEPH_COMMON_ALIGN_H
+
+template <typename T>
+inline constexpr T align_up(T v, T align) {
+  return (v + align - 1) & ~(align - 1);
+}
+
+template <typename T>
+inline constexpr T align_down(T v, T align) {
+  return v & ~(align - 1);
+}
+
+#endif /* CEPH_COMMON_ALIGN_H */
diff --git a/src/common/buffer.cc b/src/common/buffer.cc
index e2de936..5a2f60f 100644
--- a/src/common/buffer.cc
+++ b/src/common/buffer.cc
@@ -162,20 +162,21 @@ static simple_spinlock_t buffer_debug_lock = SIMPLE_SPINLOCK_INITIALIZER;
     unsigned len;
     atomic_t nref;
 
-    mutable RWLock crc_lock;
+    mutable simple_spinlock_t crc_spinlock;
     map<pair<size_t, size_t>, pair<uint32_t, uint32_t> > crc_map;
 
-    raw(unsigned l)
+    explicit raw(unsigned l)
       : data(NULL), len(l), nref(0),
-	crc_lock("buffer::raw::crc_lock", false)
+	crc_spinlock(SIMPLE_SPINLOCK_INITIALIZER)
     { }
     raw(char *c, unsigned l)
       : data(c), len(l), nref(0),
-	crc_lock("buffer::raw::crc_lock", false)
+	crc_spinlock(SIMPLE_SPINLOCK_INITIALIZER)
     { }
     virtual ~raw() {}
 
     // no copying.
+    // cppcheck-suppress noExplicitConstructor
     raw(const raw &other);
     const raw& operator=(const raw &other);
 
@@ -208,38 +209,35 @@ static simple_spinlock_t buffer_debug_lock = SIMPLE_SPINLOCK_INITIALIZER;
     }
     bool get_crc(const pair<size_t, size_t> &fromto,
          pair<uint32_t, uint32_t> *crc) const {
-      crc_lock.get_read();
+      simple_spin_lock(&crc_spinlock);
       map<pair<size_t, size_t>, pair<uint32_t, uint32_t> >::const_iterator i =
       crc_map.find(fromto);
       if (i == crc_map.end()) {
-          crc_lock.unlock();
+          simple_spin_unlock(&crc_spinlock);
           return false;
       }
       *crc = i->second;
-      crc_lock.unlock();
+      simple_spin_unlock(&crc_spinlock);
       return true;
     }
     void set_crc(const pair<size_t, size_t> &fromto,
          const pair<uint32_t, uint32_t> &crc) {
-      crc_lock.get_write();
+      simple_spin_lock(&crc_spinlock);
       crc_map[fromto] = crc;
-      crc_lock.unlock();
+      simple_spin_unlock(&crc_spinlock);
     }
     void invalidate_crc() {
-      // don't own the write lock when map is empty
-      crc_lock.get_read();
+      simple_spin_lock(&crc_spinlock);
       if (crc_map.size() != 0) {
-        crc_lock.unlock();
-        crc_lock.get_write();
         crc_map.clear();
       }
-      crc_lock.unlock();
+      simple_spin_unlock(&crc_spinlock);
     }
   };
 
   class buffer::raw_malloc : public buffer::raw {
   public:
-    raw_malloc(unsigned l) : raw(l) {
+    explicit raw_malloc(unsigned l) : raw(l) {
       if (len) {
 	data = (char *)malloc(len);
         if (!data)
@@ -268,7 +266,7 @@ static simple_spinlock_t buffer_debug_lock = SIMPLE_SPINLOCK_INITIALIZER;
 #ifndef __CYGWIN__
   class buffer::raw_mmap_pages : public buffer::raw {
   public:
-    raw_mmap_pages(unsigned l) : raw(l) {
+    explicit raw_mmap_pages(unsigned l) : raw(l) {
       data = (char*)::mmap(NULL, len, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANON, -1, 0);
       if (!data)
 	throw bad_alloc();
@@ -350,7 +348,7 @@ static simple_spinlock_t buffer_debug_lock = SIMPLE_SPINLOCK_INITIALIZER;
 #ifdef CEPH_HAVE_SPLICE
   class buffer::raw_pipe : public buffer::raw {
   public:
-    raw_pipe(unsigned len) : raw(len), source_consumed(false) {
+    explicit raw_pipe(unsigned len) : raw(len), source_consumed(false) {
       size_t max = get_max_pipe_size();
       if (len > max) {
 	bdout << "raw_pipe: requested length " << len
@@ -530,7 +528,7 @@ static simple_spinlock_t buffer_debug_lock = SIMPLE_SPINLOCK_INITIALIZER;
    */
   class buffer::raw_char : public buffer::raw {
   public:
-    raw_char(unsigned l) : raw(l) {
+    explicit raw_char(unsigned l) : raw(l) {
       if (len)
 	data = new char[len];
       else
@@ -555,7 +553,7 @@ static simple_spinlock_t buffer_debug_lock = SIMPLE_SPINLOCK_INITIALIZER;
 
   class buffer::raw_unshareable : public buffer::raw {
   public:
-    raw_unshareable(unsigned l) : raw(l) {
+    explicit raw_unshareable(unsigned l) : raw(l) {
       if (len)
 	data = new char[len];
       else
@@ -710,6 +708,11 @@ static simple_spinlock_t buffer_debug_lock = SIMPLE_SPINLOCK_INITIALIZER;
       bdout << "ptr " << this << " get " << _raw << bendl;
     }
   }
+  buffer::ptr::ptr(ptr&& p) : _raw(p._raw), _off(p._off), _len(p._len)
+  {
+    p._raw = nullptr;
+    p._off = p._len = 0;
+  }
   buffer::ptr::ptr(const ptr& p, unsigned o, unsigned l)
     : _raw(p._raw), _off(p._off + o), _len(l)
   {
@@ -735,6 +738,21 @@ static simple_spinlock_t buffer_debug_lock = SIMPLE_SPINLOCK_INITIALIZER;
     }
     return *this;
   }
+  buffer::ptr& buffer::ptr::operator= (ptr&& p)
+  {
+    release();
+    buffer::raw *raw = p._raw;
+    if (raw) {
+      _raw = raw;
+      _off = p._off;
+      _len = p._len;
+      p._raw = nullptr;
+      p._off = p._len = 0;
+    } else {
+      _off = _len = 0;
+    }
+    return *this;
+  }
 
   buffer::raw *buffer::ptr::clone()
   {
@@ -1585,6 +1603,12 @@ static simple_spinlock_t buffer_debug_lock = SIMPLE_SPINLOCK_INITIALIZER;
       push_back(bp);
   }
 
+  void buffer::list::append(ptr&& bp)
+  {
+    if (bp.length())
+      push_back(std::move(bp));
+  }
+
   void buffer::list::append(const ptr& bp, unsigned off, unsigned len)
   {
     assert(len+off <= bp.length());
@@ -1599,8 +1623,7 @@ static simple_spinlock_t buffer_debug_lock = SIMPLE_SPINLOCK_INITIALIZER;
       }
     }
     // add new item to list
-    ptr tempbp(bp, off, len);
-    push_back(tempbp);
+    push_back(ptr(bp, off, len));
   }
 
   void buffer::list::append(const list& bl)
@@ -1627,7 +1650,7 @@ static simple_spinlock_t buffer_debug_lock = SIMPLE_SPINLOCK_INITIALIZER;
   {
     ptr bp(len);
     bp.zero();
-    append(bp);
+    append(std::move(bp));
   }
 
   
@@ -1836,7 +1859,7 @@ void buffer::list::encode_base64(buffer::list& o)
   bufferptr bp(length() * 4 / 3 + 3);
   int l = ceph_armor(bp.c_str(), bp.c_str() + bp.length(), c_str(), c_str() + length());
   bp.set_length(l);
-  o.push_back(bp);
+  o.push_back(std::move(bp));
 }
 
 void buffer::list::decode_base64(buffer::list& e)
@@ -1851,7 +1874,7 @@ void buffer::list::decode_base64(buffer::list& e)
   }
   assert(l <= (int)bp.length());
   bp.set_length(l);
-  push_back(bp);
+  push_back(std::move(bp));
 }
 
   
@@ -1905,7 +1928,7 @@ ssize_t buffer::list::read_fd(int fd, size_t len)
   ssize_t ret = safe_read(fd, (void*)bp.c_str(), len);
   if (ret >= 0) {
     bp.set_length(ret);
-    append(bp);
+    append(std::move(bp));
   }
   return ret;
 }
@@ -1914,8 +1937,7 @@ int buffer::list::read_fd_zero_copy(int fd, size_t len)
 {
 #ifdef CEPH_HAVE_SPLICE
   try {
-    bufferptr bp = buffer::create_zero_copy(len, fd, NULL);
-    append(bp);
+    append(buffer::create_zero_copy(len, fd, NULL));
   } catch (buffer::error_code &e) {
     return e.code;
   } catch (buffer::malformed_input &e) {
diff --git a/src/common/ceph_argparse.cc b/src/common/ceph_argparse.cc
index 1a60f2e..df02434 100644
--- a/src/common/ceph_argparse.cc
+++ b/src/common/ceph_argparse.cc
@@ -533,6 +533,8 @@ static void generic_usage(bool is_server)
     cout << "\
   --debug_ms N      set message debug level (e.g. 1)\n";
   }
+
+  cout.flush();
 }
 
 void generic_server_usage()
diff --git a/src/common/ceph_argparse.h b/src/common/ceph_argparse.h
index 6ad0234..b504b35 100644
--- a/src/common/ceph_argparse.h
+++ b/src/common/ceph_argparse.h
@@ -35,7 +35,7 @@
 class CephInitParameters
 {
 public:
-  CephInitParameters(uint32_t module_type_);
+  explicit CephInitParameters(uint32_t module_type_);
   std::list<std::string> get_conf_files() const;
 
   uint32_t module_type;
diff --git a/src/common/ceph_context.cc b/src/common/ceph_context.cc
index 42595f5..5470807 100644
--- a/src/common/ceph_context.cc
+++ b/src/common/ceph_context.cc
@@ -46,7 +46,7 @@ namespace {
 
 class LockdepObs : public md_config_obs_t {
 public:
-  LockdepObs(CephContext *cct) : m_cct(cct), m_registered(false) {
+  explicit LockdepObs(CephContext *cct) : m_cct(cct), m_registered(false) {
   }
   virtual ~LockdepObs() {
     if (m_registered) {
@@ -80,7 +80,7 @@ private:
 class CephContextServiceThread : public Thread
 {
 public:
-  CephContextServiceThread(CephContext *cct)
+  explicit CephContextServiceThread(CephContext *cct)
     : _lock("CephContextServiceThread::_lock"),
       _reopen_logs(false), _exit_thread(false), _cct(cct)
   {
@@ -149,7 +149,7 @@ class LogObs : public md_config_obs_t {
   ceph::log::Log *log;
 
 public:
-  LogObs(ceph::log::Log *l) : log(l) {}
+  explicit LogObs(ceph::log::Log *l) : log(l) {}
 
   const char** get_tracked_conf_keys() const {
     static const char *KEYS[] = {
@@ -160,6 +160,12 @@ public:
       "err_to_syslog",
       "log_to_stderr",
       "err_to_stderr",
+      "log_to_graylog",
+      "err_to_graylog",
+      "log_graylog_host",
+      "log_graylog_port",
+      "fsid",
+      "host",
       NULL
     };
     return KEYS;
@@ -186,12 +192,38 @@ public:
     }
 
     if (changed.count("log_max_new")) {
+
       log->set_max_new(conf->log_max_new);
     }
 
     if (changed.count("log_max_recent")) {
       log->set_max_recent(conf->log_max_recent);
     }
+
+    // graylog
+    if (changed.count("log_to_graylog") || changed.count("err_to_graylog")) {
+      int l = conf->log_to_graylog ? 99 : (conf->err_to_graylog ? -1 : -2);
+      log->set_graylog_level(l, l);
+
+      if (conf->log_to_graylog || conf->err_to_graylog) {
+	log->start_graylog();
+      } else if (! (conf->log_to_graylog && conf->err_to_graylog)) {
+	log->stop_graylog();
+      }
+    }
+
+    if (log->graylog() && (changed.count("log_graylog_host") || changed.count("log_graylog_port"))) {
+      log->graylog()->set_destination(conf->log_graylog_host, conf->log_graylog_port);
+    }
+
+    // metadata
+    if (log->graylog() && changed.count("host")) {
+      log->graylog()->set_hostname(conf->host);
+    }
+
+    if (log->graylog() && changed.count("fsid")) {
+      log->graylog()->set_fsid(conf->fsid);
+    }
   }
 };
 
@@ -201,7 +233,7 @@ class CephContextObs : public md_config_obs_t {
   CephContext *cct;
 
 public:
-  CephContextObs(CephContext *cct) : cct(cct) {}
+  explicit CephContextObs(CephContext *cct) : cct(cct) {}
 
   const char** get_tracked_conf_keys() const {
     static const char *KEYS[] = {
@@ -264,7 +296,7 @@ class CephContextHook : public AdminSocketHook {
   CephContext *m_cct;
 
 public:
-  CephContextHook(CephContext *cct) : m_cct(cct) {}
+  explicit CephContextHook(CephContext *cct) : m_cct(cct) {}
 
   bool call(std::string command, cmdmap_t& cmdmap, std::string format,
 	    bufferlist& out) {
@@ -617,8 +649,8 @@ PerfCountersCollection *CephContext::get_perfcounters_collection()
 void CephContext::enable_perf_counter()
 {
   PerfCountersBuilder plb(this, "cct", l_cct_first, l_cct_last);
-  plb.add_u64_counter(l_cct_total_workers, "total_workers", "Total workers");
-  plb.add_u64_counter(l_cct_unhealthy_workers, "unhealthy_workers", "Unhealthy workers");
+  plb.add_u64(l_cct_total_workers, "total_workers", "Total workers");
+  plb.add_u64(l_cct_unhealthy_workers, "unhealthy_workers", "Unhealthy workers");
   PerfCounters *perf_tmp = plb.create_perf_counters();
 
   ceph_spin_lock(&_cct_perf_lock);
diff --git a/src/common/ceph_time.cc b/src/common/ceph_time.cc
index a18a316..8860165 100644
--- a/src/common/ceph_time.cc
+++ b/src/common/ceph_time.cc
@@ -22,13 +22,10 @@
 namespace ceph {
   namespace time_detail {
     real_clock::time_point real_clock::now(const CephContext* cct) noexcept {
-      struct timespec ts;
-      clock_gettime(CLOCK_REALTIME, &ts);
-      // TODO: After we get the time-literal configuration patch in,
-      // just add the configured duration.
+      auto t = now();
       if (cct)
-	ts.tv_sec += cct->_conf->clock_offset;
-      return from_timespec(ts);
+	t += make_timespan(cct->_conf->clock_offset);
+      return t;
     }
 
     void real_clock::to_ceph_timespec(const time_point& t,
@@ -48,13 +45,10 @@ namespace ceph {
 
     coarse_real_clock::time_point coarse_real_clock::now(
       const CephContext* cct) noexcept {
-      struct timespec ts;
-      clock_gettime(CLOCK_REALTIME_COARSE, &ts);
-      // TODO: After we get the time-literal configuration patch in,
-      // just add the configured duration.
+      auto t = now();
       if (cct)
-	ts.tv_sec += cct->_conf->clock_offset;
-      return from_timespec(ts);
+	t += make_timespan(cct->_conf->clock_offset);
+      return t;
     }
 
     void coarse_real_clock::to_ceph_timespec(const time_point& t,
diff --git a/src/common/ceph_time.h b/src/common/ceph_time.h
index a634e7f..f0e188f 100644
--- a/src/common/ceph_time.h
+++ b/src/common/ceph_time.h
@@ -151,7 +151,18 @@ namespace ceph {
 
       static time_point now() noexcept {
 	struct timespec ts;
+#if defined(CLOCK_REALTIME_COARSE)
+	// Linux systems have _COARSE clocks.
 	clock_gettime(CLOCK_REALTIME_COARSE, &ts);
+#elif defined(CLOCK_REALTIME_FAST)
+	// BSD systems have _FAST clocks.
+	clock_gettime(CLOCK_REALTIME_FAST, &ts);
+#else
+	// And if we find neither, you may wish to consult your system's
+	// documentation.
+#warning Falling back to CLOCK_REALTIME, may be slow.
+	clock_gettime(CLOCK_REALTIME, &ts);
+#endif
 	return from_timespec(ts);
       }
       static time_point now(const CephContext* cct) noexcept;
@@ -233,7 +244,18 @@ namespace ceph {
 
       static time_point now() noexcept {
 	struct timespec ts;
+#if defined(CLOCK_MONOTONIC_COARSE)
+	// Linux systems have _COARSE clocks.
 	clock_gettime(CLOCK_MONOTONIC_COARSE, &ts);
+#elif defined(CLOCK_MONOTONIC_FAST)
+	// BSD systems have _FAST clocks.
+	clock_gettime(CLOCK_MONOTONIC_FAST, &ts);
+#else
+	// And if we find neither, you may wish to consult your system's
+	// documentation.
+#warning Falling back to CLOCK_MONOTONIC, may be slow.
+	clock_gettime(CLOCK_MONOTONIC, &ts);
+#endif
 	return time_point(seconds(ts.tv_sec) + nanoseconds(ts.tv_nsec));
       }
     };
@@ -345,18 +367,21 @@ namespace ceph {
 template<typename Clock, typename Duration>
 void encode(const std::chrono::time_point<Clock, Duration>& t,
 	    ceph::bufferlist &bl) {
-  struct timespec ts = Clock::to_timespec();
+  auto ts = Clock::to_timespec(t);
   // A 32 bit count of seconds causes me vast unhappiness.
-  ::encode((uint32_t) ts.tv_sec, bl);
-  ::encode((uint32_t) ts.tv_nsec, bl);
+  uint32_t s = ts.tv_sec;
+  uint32_t ns = ts.tv_nsec;
+  ::encode(s, bl);
+  ::encode(ns, bl);
 }
 
 template<typename Clock, typename Duration>
 void decode(std::chrono::time_point<Clock, Duration>& t,
 	    bufferlist::iterator& p) {
-  struct timespec ts;
-  ::decode((uint32_t&) ts.tv_sec, p);
-  ::decode((uint32_t&) ts.tv_nsec, p);
+  uint32_t s, ns;
+  ::decode(s, p);
+  ::decode(ns, p);
+  struct timespec ts = {s, ns};
 
   t = Clock::from_timespec(ts);
 }
diff --git a/src/common/common_init.cc b/src/common/common_init.cc
index 3303a7a..b1f7165 100644
--- a/src/common/common_init.cc
+++ b/src/common/common_init.cc
@@ -34,7 +34,8 @@
 #define STRINGIFY(x) _STR(x)
 
 CephContext *common_preinit(const CephInitParameters &iparams,
-			  enum code_environment_t code_env, int flags)
+			    enum code_environment_t code_env, int flags,
+			    const char *data_dir_option)
 {
   // set code environment
   ANNOTATE_BENIGN_RACE_SIZED(&g_code_env, sizeof(g_code_env), "g_code_env");
@@ -49,6 +50,9 @@ CephContext *common_preinit(const CephInitParameters &iparams,
   // Set up our entity name.
   conf->name = iparams.name;
 
+  if (data_dir_option)
+    conf->data_dir_option = data_dir_option;
+
   // Set some defaults based on code type
   switch (code_env) {
   case CODE_ENVIRONMENT_DAEMON:
diff --git a/src/common/common_init.h b/src/common/common_init.h
index f48b349..6d11ef3 100644
--- a/src/common/common_init.h
+++ b/src/common/common_init.h
@@ -57,7 +57,8 @@ enum common_init_flags_t {
  * Your library may also supply functions to read a configuration file.
  */
 CephContext *common_preinit(const CephInitParameters &iparams,
-			    enum code_environment_t code_env, int flags);
+			    enum code_environment_t code_env, int flags,
+			    const char *data_dir_option = 0);
 
 /* Print out some parse errors. */
 void complain_about_parse_errors(CephContext *cct,
diff --git a/src/common/config.cc b/src/common/config.cc
index 37758d4..d67a96b 100644
--- a/src/common/config.cc
+++ b/src/common/config.cc
@@ -56,7 +56,7 @@ using std::pair;
 using std::set;
 using std::string;
 
-const char *CEPH_CONF_FILE_DEFAULT = "/etc/ceph/$cluster.conf, ~/.ceph/$cluster.conf, $cluster.conf";
+const char *CEPH_CONF_FILE_DEFAULT = "$data_dir/config, /etc/ceph/$cluster.conf, ~/.ceph/$cluster.conf, $cluster.conf";
 
 // file layouts
 struct ceph_file_layout g_default_file_layout = {
@@ -217,8 +217,25 @@ int md_config_t::parse_config_files(const char *conf_files,
       conf_files = CEPH_CONF_FILE_DEFAULT;
     }
   }
+
   std::list<std::string> cfl;
   get_str_list(conf_files, cfl);
+  auto p = cfl.begin();
+  while (p != cfl.end()) {
+    // expand $data_dir?
+    string &s = *p;
+    if (s.find("$data_dir") != string::npos) {
+      if (data_dir_option.length()) {
+	list<config_option*> stack;
+	expand_meta(s, NULL, stack, warnings);
+	p++;
+      } else {
+	cfl.erase(p++);  // ignore this item
+      }
+    } else {
+      ++p;
+    }
+  }
   return parse_config_files_impl(cfl, parse_errors, warnings);
 }
 
@@ -987,8 +1004,10 @@ int md_config_t::set_val_raw(const char *val, const config_option *opt)
   return -ENOSYS;
 }
 
-static const char *CONF_METAVARIABLES[] =
-  { "cluster", "type", "name", "host", "num", "id", "pid", "cctid" };
+static const char *CONF_METAVARIABLES[] = {
+  "data_dir", // put this first: it may contain some of the others
+  "cluster", "type", "name", "host", "num", "id", "pid", "cctid"
+};
 static const int NUM_CONF_METAVARIABLES =
       (sizeof(CONF_METAVARIABLES) / sizeof(CONF_METAVARIABLES[0]));
 
@@ -1102,7 +1121,20 @@ bool md_config_t::expand_meta(std::string &origval,
 	  out += stringify(getpid());
 	else if (var == "cctid")
 	  out += stringify((unsigned long long)this);
-	else
+	else if (var == "data_dir") {
+	  if (data_dir_option.length()) {
+	    char *vv = NULL;
+	    _get_val(data_dir_option.c_str(), &vv, -1);
+	    string tmp = vv;
+	    free(vv);
+	    expand_meta(tmp, NULL, stack, oss);
+	    out += tmp;
+	  } else {
+	    // this isn't really right, but it'll result in a mangled
+	    // non-existent path that will fail any search list
+	    out += "$data_dir";
+	  }
+	} else
 	  assert(0); // unreachable
 	expanded = true;
       }
diff --git a/src/common/config.h b/src/common/config.h
index 262dc71..79c4437 100644
--- a/src/common/config.h
+++ b/src/common/config.h
@@ -205,6 +205,7 @@ public:
   ceph::log::SubsystemMap subsys;
 
   EntityName name;
+  string data_dir_option;  ///< data_dir config option, if any
 
   /// cluster name
   string cluster;
diff --git a/src/common/config_opts.h b/src/common/config_opts.h
index dcbb7e0..04dafac 100644
--- a/src/common/config_opts.h
+++ b/src/common/config_opts.h
@@ -50,6 +50,10 @@ OPTION(log_to_syslog, OPT_BOOL, false)
 OPTION(err_to_syslog, OPT_BOOL, false)
 OPTION(log_flush_on_exit, OPT_BOOL, true) // default changed by common_preinit()
 OPTION(log_stop_at_utilization, OPT_FLOAT, .97)  // stop logging at (near) full
+OPTION(log_to_graylog, OPT_BOOL, false)
+OPTION(err_to_graylog, OPT_BOOL, false)
+OPTION(log_graylog_host, OPT_STR, "127.0.0.1")
+OPTION(log_graylog_port, OPT_INT, 12201)
 
 // options will take k/v pairs, or single-item that will be assumed as general
 // default for all, regardless of channel.
@@ -60,6 +64,9 @@ OPTION(clog_to_monitors, OPT_STR, "default=true")
 OPTION(clog_to_syslog, OPT_STR, "false")
 OPTION(clog_to_syslog_level, OPT_STR, "info") // this level and above
 OPTION(clog_to_syslog_facility, OPT_STR, "default=daemon audit=local0")
+OPTION(clog_to_graylog, OPT_STR, "false")
+OPTION(clog_to_graylog_host, OPT_STR, "127.0.0.1")
+OPTION(clog_to_graylog_port, OPT_STR, "12201")
 
 OPTION(mon_cluster_log_to_syslog, OPT_STR, "default=false")
 OPTION(mon_cluster_log_to_syslog_level, OPT_STR, "info")   // this level and above
@@ -67,6 +74,9 @@ OPTION(mon_cluster_log_to_syslog_facility, OPT_STR, "daemon")
 OPTION(mon_cluster_log_file, OPT_STR,
     "default=/var/log/ceph/$cluster.$channel.log cluster=/var/log/ceph/$cluster.log")
 OPTION(mon_cluster_log_file_level, OPT_STR, "info")
+OPTION(mon_cluster_log_to_graylog, OPT_STR, "false")
+OPTION(mon_cluster_log_to_graylog_host, OPT_STR, "127.0.0.1")
+OPTION(mon_cluster_log_to_graylog_port, OPT_STR, "12201")
 
 OPTION(enable_experimental_unrecoverable_data_corrupting_features, OPT_STR, "")
 
@@ -109,6 +119,7 @@ SUBSYS(striper, 0, 1)
 SUBSYS(objecter, 0, 1)
 SUBSYS(rados, 0, 5)
 SUBSYS(rbd, 0, 5)
+SUBSYS(rbd_mirror, 0, 5)
 SUBSYS(rbd_replay, 0, 5)
 SUBSYS(journaler, 0, 5)
 SUBSYS(objectcacher, 0, 5)
@@ -117,7 +128,6 @@ SUBSYS(osd, 0, 5)
 SUBSYS(optracker, 0, 5)
 SUBSYS(objclass, 0, 5)
 SUBSYS(filestore, 1, 3)
-SUBSYS(keyvaluestore, 1, 3)
 SUBSYS(journal, 1, 3)
 SUBSYS(ms, 0, 5)
 SUBSYS(mon, 1, 5)
@@ -144,6 +154,8 @@ SUBSYS(bdev, 1, 3)
 SUBSYS(kstore, 1, 5)
 SUBSYS(rocksdb, 4, 5)
 SUBSYS(leveldb, 4, 5)
+SUBSYS(kinetic, 1, 5)
+SUBSYS(fuse, 1, 5)
 
 OPTION(key, OPT_STR, "")
 OPTION(keyfile, OPT_STR, "")
@@ -191,6 +203,7 @@ OPTION(ms_async_set_affinity, OPT_BOOL, true)
 // If ms_async_affinity_cores is empty, all threads will be bind to current running
 // core
 OPTION(ms_async_affinity_cores, OPT_STR, "")
+OPTION(ms_async_send_inline, OPT_BOOL, true)
 
 OPTION(inject_early_sigterm, OPT_BOOL, false)
 
@@ -226,6 +239,7 @@ OPTION(mon_osd_prime_pg_temp, OPT_BOOL, false)  // prime osdmap with pg mapping
 OPTION(mon_osd_prime_pg_temp_max_time, OPT_FLOAT, .5)  // max time to spend priming
 OPTION(mon_osd_pool_ec_fast_read, OPT_BOOL, false) // whether turn on fast read on the pool or not
 OPTION(mon_stat_smooth_intervals, OPT_INT, 2)  // smooth stats over last N PGMap maps
+OPTION(mon_election_timeout, OPT_FLOAT, 5)  // on election proposer, max waiting time for all ACKs
 OPTION(mon_lease, OPT_FLOAT, 5)       // lease interval
 OPTION(mon_lease_renew_interval_factor, OPT_FLOAT, .6) // on leader, to renew the lease
 OPTION(mon_lease_ack_timeout_factor, OPT_FLOAT, 2.0) // on leader, if lease isn't acked by all peons
@@ -236,6 +250,7 @@ OPTION(mon_clock_drift_warn_backoff, OPT_FLOAT, 5) // exponential backoff for cl
 OPTION(mon_timecheck_interval, OPT_FLOAT, 300.0) // on leader, timecheck (clock drift check) interval (seconds)
 OPTION(mon_pg_create_interval, OPT_FLOAT, 30.0) // no more than every 30s
 OPTION(mon_pg_stuck_threshold, OPT_INT, 300) // number of seconds after which pgs can be considered inactive, unclean, or stale (see doc/control.rst under dump_stuck for more info)
+OPTION(mon_pg_min_inactive, OPT_U64, 1) // the number of PGs which have to be inactive longer than 'mon_pg_stuck_threshold' before health goes into ERR. 0 means disabled, never go into ERR.
 OPTION(mon_pg_warn_min_per_osd, OPT_INT, 30)  // min # pgs per (in) osd before we warn the admin
 OPTION(mon_pg_warn_max_per_osd, OPT_INT, 300)  // max # pgs per (in) osd before we warn the admin
 OPTION(mon_pg_warn_max_object_skew, OPT_FLOAT, 10.0) // max skew few average in objects per pg
@@ -273,6 +288,8 @@ OPTION(mon_health_to_clog_tick_interval, OPT_DOUBLE, 60.0)
 OPTION(mon_data_avail_crit, OPT_INT, 5)
 OPTION(mon_data_avail_warn, OPT_INT, 30)
 OPTION(mon_data_size_warn, OPT_U64, 15*1024*1024*1024) // issue a warning when the monitor's data store goes over 15GB (in bytes)
+OPTION(mon_warn_not_scrubbed, OPT_INT, 0)
+OPTION(mon_warn_not_deep_scrubbed, OPT_INT, 0)
 OPTION(mon_scrub_interval, OPT_INT, 3600*24) // once a day
 OPTION(mon_scrub_timeout, OPT_INT, 60*5) // let's give it 5 minutes; why not.
 OPTION(mon_scrub_max_keys, OPT_INT, 100) // max number of keys to scrub each time
@@ -578,7 +595,8 @@ OPTION(osd_pool_default_min_size, OPT_INT, 0)  // 0 means no specific default; c
 OPTION(osd_pool_default_pg_num, OPT_INT, 8) // number of PGs for new pools. Configure in global or mon section of ceph.conf
 OPTION(osd_pool_default_pgp_num, OPT_INT, 8) // number of PGs for placement purposes. Should be equal to pg_num
 OPTION(osd_compression_plugins, OPT_STR,
-       "snappy"
+       "zlib"
+       " snappy"
        ) // list of compression plugins
 OPTION(osd_pool_default_erasure_code_profile,
        OPT_STR,
@@ -645,6 +663,8 @@ OPTION(osd_recovery_threads, OPT_INT, 1)
 OPTION(osd_recover_clone_overlap, OPT_BOOL, true)   // preserve clone_overlap during recovery/migration
 OPTION(osd_op_num_threads_per_shard, OPT_INT, 2)
 OPTION(osd_op_num_shards, OPT_INT, 5)
+OPTION(osd_op_queue, OPT_STR, "prio") // PrioritzedQueue (prio), Weighted Priority Queue (wpq), or debug_random
+OPTION(osd_op_queue_cut_off, OPT_STR, "low") // Min priority to go to strict queue. (low, high, debug_random)
 
 // Set to true for testing.  Users should NOT set this.
 // If set to true even after reading enough shards to
@@ -788,8 +808,8 @@ OPTION(kinetic_use_ssl, OPT_BOOL, false) // whether to secure kinetic traffic wi
 OPTION(rocksdb_separate_wal_dir, OPT_BOOL, false) // use $path.wal for wal
 OPTION(rocksdb_db_paths, OPT_STR, "")   // path,size( path,size)*
 OPTION(rocksdb_log_to_ceph_log, OPT_BOOL, true)  // log to ceph log
-// rocksdb options that will be used for keyvaluestore(if backend is rocksdb)
-OPTION(keyvaluestore_rocksdb_options, OPT_STR, "")
+OPTION(rocksdb_cache_size, OPT_INT, 128*1024*1024)  // default leveldb cache size
+OPTION(rocksdb_block_size, OPT_INT, 4*1024)  // default rocksdb block size
 // rocksdb options that will be used for omap(if omap_backend is rocksdb)
 OPTION(filestore_rocksdb_options, OPT_STR, "")
 // rocksdb options that will be used in monstore
@@ -832,6 +852,7 @@ OPTION(osd_objectstore_tracing, OPT_BOOL, false) // true if LTTng-UST tracepoint
 // Override maintaining compatibility with older OSDs
 // Set to true for testing.  Users should NOT set this.
 OPTION(osd_debug_override_acting_compat, OPT_BOOL, false)
+OPTION(osd_objectstore_fuse, OPT_BOOL, false)
 
 OPTION(osd_bench_small_size_max_iops, OPT_U32, 100) // 100 IOPS
 OPTION(osd_bench_large_size_max_throughput, OPT_U64, 100 << 20) // 100 MB/s
@@ -844,10 +865,17 @@ OPTION(memstore_page_size, OPT_U64, 64 << 10)
 
 OPTION(bdev_debug_inflight_ios, OPT_BOOL, false)
 OPTION(bdev_inject_crash, OPT_INT, 0)  // if N>0, then ~ 1/N IOs will complete before we crash on flush.
+OPTION(bdev_inject_crash_flush_delay, OPT_INT, 2) // wait N more seconds on flush
 OPTION(bdev_aio, OPT_BOOL, true)
 OPTION(bdev_aio_poll_ms, OPT_INT, 250)  // milliseconds
 OPTION(bdev_aio_max_queue_depth, OPT_INT, 32)
 
+// if yes, osd will unbind all NVMe devices from kernel driver and bind them
+// to the uio_pci_generic driver. The purpose is to prevent the case where
+// NVMe driver is loaded while osd is running.
+OPTION(bdev_nvme_unbind_from_kernel, OPT_BOOL, false)
+OPTION(bdev_nvme_retry_count, OPT_INT, -1) // -1 means by default which is 4
+
 OPTION(bluefs_alloc_size, OPT_U64, 1048576)
 OPTION(bluefs_max_prefetch, OPT_U64, 1048576)
 OPTION(bluefs_min_log_runway, OPT_U64, 1048576)  // alloc when we get this low
@@ -863,12 +891,21 @@ OPTION(bluestore_bluefs_min_ratio, OPT_FLOAT, .02)  // min fs free / total free
 OPTION(bluestore_bluefs_max_ratio, OPT_FLOAT, .90)  // max fs free / total free
 OPTION(bluestore_bluefs_gift_ratio, OPT_FLOAT, .02) // how much to add at a time
 OPTION(bluestore_bluefs_reclaim_ratio, OPT_FLOAT, .20) // how much to reclaim at a time
+// If you want to use spdk driver, you need to specify NVMe serial number here
+// with "spdk:" prefix.
+// Users can use 'lspci -vvv -d 8086:0953 | grep "Device Serial Number"' to
+// get the serial number of Intel(R) Fultondale NVMe controllers.
+// Example:
+// bluestore_block_path = spdk:55cd2e404bd73932
 OPTION(bluestore_block_path, OPT_STR, "")
 OPTION(bluestore_block_size, OPT_U64, 10 * 1024*1024*1024)  // 10gb for testing
+OPTION(bluestore_block_create, OPT_BOOL, true)
 OPTION(bluestore_block_db_path, OPT_STR, "")
-OPTION(bluestore_block_db_size, OPT_U64, 0)      // rocksdb primary storage
+OPTION(bluestore_block_db_size, OPT_U64, 0)   // rocksdb ssts (hot/warm)
+OPTION(bluestore_block_db_create, OPT_BOOL, false)
 OPTION(bluestore_block_wal_path, OPT_STR, "")
-OPTION(bluestore_block_wal_size, OPT_U64, 0)     // rocksdb wal
+OPTION(bluestore_block_wal_size, OPT_U64, 96 * 1024*1024) // rocksdb wal
+OPTION(bluestore_block_wal_create, OPT_BOOL, false)
 OPTION(bluestore_max_dir_size, OPT_U32, 1000000)
 OPTION(bluestore_min_alloc_size, OPT_U32, 64*1024)
 OPTION(bluestore_onode_map_size, OPT_U32, 1024)   // onodes per collection
@@ -1014,18 +1051,6 @@ OPTION(journal_dio, OPT_BOOL, true)
 OPTION(journal_aio, OPT_BOOL, true)
 OPTION(journal_force_aio, OPT_BOOL, false)
 
-OPTION(keyvaluestore_queue_max_ops, OPT_INT, 50)
-OPTION(keyvaluestore_queue_max_bytes, OPT_INT, 100 << 20)
-OPTION(keyvaluestore_debug_check_backend, OPT_BOOL, 0) // Expensive debugging check on sync
-OPTION(keyvaluestore_op_threads, OPT_INT, 2)
-OPTION(keyvaluestore_op_thread_timeout, OPT_INT, 60)
-OPTION(keyvaluestore_op_thread_suicide_timeout, OPT_INT, 180)
-OPTION(keyvaluestore_default_strip_size, OPT_INT, 4096) // Only affect new object
-OPTION(keyvaluestore_max_expected_write_size, OPT_U64, 1ULL << 24) // bytes
-OPTION(keyvaluestore_header_cache_size, OPT_INT, 4096)    // Header cache size
-OPTION(keyvaluestore_backend, OPT_STR, "leveldb")
-OPTION(keyvaluestore_dump_file, OPT_STR, "")         // file onto which store transaction dumps
-
 // max bytes to search ahead in journal searching for corruption
 OPTION(journal_max_corrupt_search, OPT_U64, 10<<20)
 OPTION(journal_block_align, OPT_BOOL, true)
@@ -1138,13 +1163,14 @@ OPTION(rgw_enable_quota_threads, OPT_BOOL, true)
 OPTION(rgw_enable_gc_threads, OPT_BOOL, true)
 
 OPTION(rgw_data, OPT_STR, "/var/lib/ceph/radosgw/$cluster-$id")
-OPTION(rgw_enable_apis, OPT_STR, "s3, swift, swift_auth, admin")
+OPTION(rgw_enable_apis, OPT_STR, "s3, s3website, swift, swift_auth, admin")
 OPTION(rgw_cache_enabled, OPT_BOOL, true)   // rgw cache enabled
 OPTION(rgw_cache_lru_size, OPT_INT, 10000)   // num of entries in rgw cache
 OPTION(rgw_socket_path, OPT_STR, "")   // path to unix domain socket, if not specified, rgw will not run as external fcgi
 OPTION(rgw_host, OPT_STR, "")  // host for radosgw, can be an IP, default is 0.0.0.0
 OPTION(rgw_port, OPT_STR, "")  // port to listen, format as "8080" "5000", if not specified, rgw will not run external fcgi
-OPTION(rgw_dns_name, OPT_STR, "")
+OPTION(rgw_dns_name, OPT_STR, "") // hostname suffix on buckets
+OPTION(rgw_dns_s3website_name, OPT_STR, "") // hostname suffix on buckets for s3-website endpoint
 OPTION(rgw_content_length_compat, OPT_BOOL, false) // Check both HTTP_CONTENT_LENGTH and CONTENT_LENGTH in fcgi env
 OPTION(rgw_script_uri, OPT_STR, "") // alternative value for SCRIPT_URI if not set in request
 OPTION(rgw_request_uri, OPT_STR,  "") // alternative value for REQUEST_URI if not set in request
@@ -1253,6 +1279,8 @@ OPTION(rgw_objexp_time_step, OPT_U32, 4096) // number of seconds for rounding th
 OPTION(rgw_objexp_hints_num_shards, OPT_U32, 127) // maximum number of parts in which the hint index is stored in
 OPTION(rgw_objexp_chunk_size, OPT_U32, 100) // maximum number of entries in a single operation when processing objexp data
 
+OPTION(rgw_enable_static_website, OPT_BOOL, false) // enable static website feature
+
 OPTION(mutex_perf_counter, OPT_BOOL, false) // enable/disable mutex perf counter
 OPTION(throttler_perf_counter, OPT_BOOL, true) // enable/disable throttler perf counter
 
diff --git a/src/common/hobject.cc b/src/common/hobject.cc
index e651047..9e84219 100644
--- a/src/common/hobject.cc
+++ b/src/common/hobject.cc
@@ -194,28 +194,124 @@ void hobject_t::generate_test_instances(list<hobject_t*>& o)
 	CEPH_SNAPDIR, 910, 1, "n2"));
 }
 
+static void append_out_escaped(const string &in, string *out)
+{
+  for (string::const_iterator i = in.begin(); i != in.end(); ++i) {
+    if (*i == '%' || *i == ':' || *i == '/' || *i < 32 || *i >= 127) {
+      out->push_back('%');
+      char buf[3];
+      snprintf(buf, sizeof(buf), "%02x", (int)(unsigned char)*i);
+      out->append(buf);
+    } else {
+      out->push_back(*i);
+    }
+  }
+}
+
+static const char *decode_out_escaped(const char *in, string *out)
+{
+  while (*in && *in != ':') {
+    if (*in == '%') {
+      ++in;
+      char buf[3];
+      buf[0] = *in;
+      ++in;
+      buf[1] = *in;
+      buf[2] = 0;
+      int v = strtol(buf, NULL, 16);
+      out->push_back(v);
+    } else {
+      out->push_back(*in);
+    }
+    ++in;
+  }
+  return in;
+}
+
 ostream& operator<<(ostream& out, const hobject_t& o)
 {
   if (o == hobject_t())
     return out << "MIN";
   if (o.is_max())
     return out << "MAX";
-  out << o.pool << '/';
+  out << o.pool << ':';
   out << std::hex;
   out.width(8);
   out.fill('0');
-  out << o.get_hash();
+  out << o.get_bitwise_key_u32(); // << '~' << o.get_hash();
   out.width(0);
   out.fill(' ');
   out << std::dec;
-  if (o.nspace.length())
-    out << ":" << o.nspace;
-  if (o.get_key().length())
-    out << "." << o.get_key();
-  out << "/" << o.oid << "/" << o.snap;
+  out << ':';
+  string v;
+  append_out_escaped(o.nspace, &v);
+  v.push_back(':');
+  append_out_escaped(o.get_key(), &v);
+  v.push_back(':');
+  append_out_escaped(o.oid.name, &v);
+  out << v << ':' << o.snap;
   return out;
 }
 
+bool hobject_t::parse(const string &s)
+{
+  if (s == "MIN") {
+    *this = hobject_t();
+    return true;
+  }
+  if (s == "MAX") {
+    *this = hobject_t::get_max();
+    return true;
+  }
+
+  const char *start = s.c_str();
+  long long po;
+  unsigned h;
+  int r = sscanf(start, "%lld:%x:", &po, &h);
+  if (r != 2)
+    return false;
+  for (; *start && *start != ':'; ++start) ;
+  for (++start; *start && isxdigit(*start); ++start) ;
+  if (*start != ':')
+    return false;
+
+  string ns, k, name;
+  const char *p = decode_out_escaped(start + 1, &ns);
+  if (*p != ':')
+    return false;
+  p = decode_out_escaped(p + 1, &k);
+  if (*p != ':')
+    return false;
+  p = decode_out_escaped(p + 1, &name);
+  if (*p != ':')
+    return false;
+  start = p + 1;
+
+  unsigned long long sn;
+  if (strncmp(start, "head", 4) == 0) {
+    sn = CEPH_NOSNAP;
+    start += 4;
+    if (*start != 0)
+      return false;
+  } else {
+    r = sscanf(start, "%llx", &sn);
+    if (r != 1)
+      return false;
+    for (++start; *start && isxdigit(*start); ++start) ;
+    if (*start)
+      return false;
+  }
+
+  max = false;
+  pool = po;
+  set_hash(_reverse_bits(h));
+  nspace = ns;
+  oid.name = name;
+  set_key(k);
+  snap = sn;
+  return true;
+}
+
 int cmp_nibblewise(const hobject_t& l, const hobject_t& r)
 {
   if (l.max < r.max)
@@ -402,14 +498,65 @@ ostream& operator<<(ostream& out, const ghobject_t& o)
   if (o.is_max())
     return out << "GHMAX";
   if (o.shard_id != shard_id_t::NO_SHARD)
-    out << std::hex << o.shard_id << std::dec << ":";
-  out << o.hobj;
-  if (o.generation != ghobject_t::NO_GEN) {
-    out << "/" << std::hex << (unsigned)(o.generation) << std::dec;
-  }
+    out << std::hex << o.shard_id << std::dec;
+  out << '@' << o.hobj << '@';
+  if (o.generation != ghobject_t::NO_GEN)
+    out << std::hex << (unsigned long long)(o.generation) << std::dec;
   return out;
 }
 
+bool ghobject_t::parse(const string& s)
+{
+  if (s == "GHMIN") {
+    *this = ghobject_t();
+    return true;
+  }
+  if (s == "GHMAX") {
+    *this = ghobject_t::get_max();
+    return true;
+  }
+
+  // look for shard@ prefix
+  const char *start = s.c_str();
+  const char *p;
+  int sh = shard_id_t::NO_SHARD;
+  for (p = start; *p && isxdigit(*p); ++p) ;
+  if (!*p && *p != '@')
+    return false;
+  if (p > start) {
+    int r = sscanf(s.c_str(), "%x", &sh);
+    if (r < 1)
+      return false;
+    start = p + 1;
+  } else {
+    ++start;
+  }
+
+  // look for @generation suffix
+  long long unsigned g = NO_GEN;
+  const char *last = start + strlen(start) - 1;
+  p = last;
+  while (isxdigit(*p))
+    p--;
+  if (*p != '@')
+    return false;
+  if (p < last) {
+    sscanf(p + 1, "%llx", &g);
+  }
+
+  string inner(start, p - start);
+  hobject_t h;
+  if (!h.parse(inner)) {
+    return false;
+  }
+
+  shard_id = shard_id_t(sh);
+  hobj = h;
+  generation = g;
+  max = false;
+  return true;
+}
+
 int cmp_nibblewise(const ghobject_t& l, const ghobject_t& r)
 {
   if (l.max < r.max)
diff --git a/src/common/hobject.h b/src/common/hobject.h
index 601af40..b823708 100644
--- a/src/common/hobject.h
+++ b/src/common/hobject.h
@@ -258,6 +258,8 @@ public:
     return nspace;
   }
 
+  bool parse(const string& s);
+
   void encode(bufferlist& bl) const;
   void decode(bufferlist::iterator& bl);
   void decode(json_spirit::Value& v);
@@ -283,7 +285,7 @@ public:
 
   struct Comparator {
     bool bitwise;
-    Comparator(bool b) : bitwise(b) {}
+    explicit Comparator(bool b) : bitwise(b) {}
     bool operator()(const hobject_t& l, const hobject_t& r) const {
       if (bitwise)
 	return cmp_bitwise(l, r) < 0;
@@ -293,7 +295,7 @@ public:
   };
   struct ComparatorWithDefault {
     bool bitwise;
-    ComparatorWithDefault(bool b=true) : bitwise(b) {}
+    explicit ComparatorWithDefault(bool b=true) : bitwise(b) {}
     bool operator()(const hobject_t& l, const hobject_t& r) const {
       if (bitwise)
 	return cmp_bitwise(l, r) < 0;
@@ -415,6 +417,8 @@ public:
     shard_id = s;
   }
 
+  bool parse(const string& s);
+
   // maximum sorted value.
   static ghobject_t get_max() {
     ghobject_t h;
@@ -459,7 +463,7 @@ public:
 
   struct Comparator {
     bool bitwise;
-    Comparator(bool b) : bitwise(b) {}
+    explicit Comparator(bool b) : bitwise(b) {}
     bool operator()(const ghobject_t& l, const ghobject_t& r) const {
          if (bitwise)
 	return cmp_bitwise(l, r) < 0;
diff --git a/src/common/lockdep.cc b/src/common/lockdep.cc
index 66e1c07..18b0845 100644
--- a/src/common/lockdep.cc
+++ b/src/common/lockdep.cc
@@ -138,7 +138,14 @@ int lockdep_register(const char *name)
   pthread_mutex_lock(&lockdep_mutex);
   ceph::unordered_map<std::string, int>::iterator p = lock_ids.find(name);
   if (p == lock_ids.end()) {
-    assert(!free_ids.empty());
+    if (free_ids.empty()) {
+      lockdep_dout(0) << "ERROR OUT OF IDS .. have " << free_ids.size()
+		      << " max " << MAX_LOCKS << dendl;
+      for (auto& p : lock_names) {
+	lockdep_dout(0) << "  lock " << p.first << " " << p.second << dendl;
+      }
+      assert(free_ids.empty());
+    }
     id = free_ids.front();
     free_ids.pop_front();
 
diff --git a/src/common/mutex_debug.cc b/src/common/mutex_debug.cc
new file mode 100644
index 0000000..a854358
--- /dev/null
+++ b/src/common/mutex_debug.cc
@@ -0,0 +1,97 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2004-2006 Sage Weil <sage at newdream.net>
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation.  See file COPYING.
+ *
+ */
+#include <string>
+
+#include <boost/uuid/uuid.hpp>
+#include <boost/uuid/uuid_generators.hpp>
+#include <boost/uuid/uuid_io.hpp>
+
+#include "common/mutex_debug.h"
+#include "common/perf_counters.h"
+#include "common/ceph_context.h"
+#include "common/config.h"
+#include "include/stringify.h"
+
+namespace ceph {
+namespace mutex_debug_detail {
+enum {
+  l_mutex_first = 999082,
+  l_mutex_wait,
+  l_mutex_last
+};
+
+mutex_debugging_base::mutex_debugging_base(const std::string &n, bool bt,
+					   CephContext *cct) :
+  id(-1), backtrace(bt), nlock(0), locked_by(thread::id()),
+  cct(cct), logger(0) {
+  if (n.empty()) {
+    uuid_d uu;
+    uu.generate_random();
+    name = string("Unnamed-Mutex-") + uu.to_string();
+  } else {
+    name = n;
+  }
+  if (cct) {
+    PerfCountersBuilder b(cct, string("mutex-") + name,
+			  l_mutex_first, l_mutex_last);
+    b.add_time_avg(l_mutex_wait, "wait",
+		   "Average time of mutex in locked state");
+    logger = b.create_perf_counters();
+    cct->get_perfcounters_collection()->add(logger);
+    logger->set(l_mutex_wait, 0);
+  }
+  if (g_lockdep)
+    _register();
+}
+
+mutex_debugging_base::~mutex_debugging_base() {
+  assert(nlock == 0);
+  if (cct && logger) {
+    cct->get_perfcounters_collection()->remove(logger);
+    delete logger;
+  }
+  if (g_lockdep) {
+    lockdep_unregister(id);
+  }
+}
+
+void mutex_debugging_base::_register() {
+  id = lockdep_register(name.c_str());
+}
+void mutex_debugging_base::_will_lock() { // about to lock
+  id = lockdep_will_lock(name.c_str(), id, backtrace);
+}
+void mutex_debugging_base::_locked() {    // just locked
+  id = lockdep_locked(name.c_str(), id, backtrace);
+}
+void mutex_debugging_base::_will_unlock() {  // about to unlock
+  id = lockdep_will_unlock(name.c_str(), id);
+}
+
+ceph::mono_time mutex_debugging_base::before_lock_blocks() {
+  if (logger && cct && cct->_conf->mutex_perf_counter)
+    return ceph::mono_clock::now();
+  return ceph::mono_time::min();
+}
+
+void mutex_debugging_base::after_lock_blocks(ceph::mono_time start,
+					     bool no_lockdep) {
+  if (logger && cct && cct->_conf->mutex_perf_counter)
+    logger->tinc(l_mutex_wait,
+		 ceph::mono_clock::now() - start);
+  if (!no_lockdep && g_lockdep)
+    _locked();
+}
+} // namespace mutex_debug_detail
+} // namespace ceph
diff --git a/src/common/mutex_debug.h b/src/common/mutex_debug.h
new file mode 100644
index 0000000..c92a88f
--- /dev/null
+++ b/src/common/mutex_debug.h
@@ -0,0 +1,191 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2004-2006 Sage Weil <sage at newdream.net>
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation.  See file COPYING.
+ *
+ */
+
+#ifndef CEPH_COMMON_MUTEX_DEBUG_H
+#define CEPH_COMMON_MUTEX_DEBUG_H
+
+#include <system_error>
+#include <thread>
+
+#include <pthread.h>
+
+#include "include/assert.h"
+
+#include "ceph_time.h"
+#include "likely.h"
+#include "lockdep.h"
+
+class CephContext;
+class PerfCounters;
+
+namespace ceph {
+namespace mutex_debug_detail {
+class mutex_debugging_base {
+protected:
+  std::string name;
+  int id;
+  bool backtrace; // gather backtrace on lock acquisition
+
+  int nlock;
+  std::thread::id locked_by;
+  CephContext *cct;
+  PerfCounters *logger;
+
+  void _register();
+  void _will_lock(); // about to lock
+  void _locked(); // just locked
+    void _will_unlock(); // about to unlock
+
+  mutex_debugging_base(const std::string &n = std::string(), bool bt = false,
+		       CephContext *cct = nullptr);
+  ~mutex_debugging_base();
+
+  ceph::mono_time before_lock_blocks();
+  void after_lock_blocks(ceph::mono_time start,
+			 bool no_lockdep);
+
+public:
+  bool is_locked() const {
+    return (nlock > 0);
+  }
+  bool is_locked_by_me() const {
+    return nlock > 0 && locked_by == std::this_thread::get_id();
+  }
+  operator bool() const {
+    return nlock > 0 && locked_by == std::this_thread::get_id();
+  }
+};
+
+template<typename Mutex>
+class mutex_debugging : public mutex_debugging_base {
+  Mutex* impl;
+
+public:
+  mutex_debugging(const std::string &n = std::string(), bool bt = false,
+		  CephContext *cct = nullptr) :
+    mutex_debugging_base(n, bt, cct), impl(static_cast<Mutex*>(this)) {}
+
+  ~mutex_debugging() = default;
+
+  void _post_lock() {
+    if (!impl->recursive)
+      assert(nlock == 0);
+    locked_by = std::this_thread::get_id();
+    nlock++;
+  }
+
+  void _pre_unlock() {
+    assert(nlock > 0);
+    --nlock;
+    assert(locked_by == std::this_thread::get_id());
+    if (!impl->recursive)
+      assert(nlock == 0);
+    if (nlock == 0)
+      locked_by = std::thread::id();
+  }
+
+  bool try_lock(bool no_lockdep = false) {
+    bool locked = impl->try_lock_impl();
+    if (locked) {
+      if (g_lockdep && !no_lockdep)
+	_locked();
+      _post_lock();
+    }
+    return locked;
+  }
+
+  void lock(bool no_lockdep = false) {
+    if (g_lockdep && !no_lockdep)
+      _will_lock();
+
+    if (try_lock())
+      return;
+
+    auto t = before_lock_blocks();
+    impl->lock_impl();
+    after_lock_blocks(t, no_lockdep);
+    _post_lock();
+  }
+
+  void unlock(bool no_lockdep = false) {
+    _pre_unlock();
+    if (!no_lockdep && g_lockdep)
+      _will_unlock();
+    impl->unlock_impl();
+  }
+};
+
+// Since this is a /debugging/ mutex just define it in terms of the
+// pthread error check mutex.
+template<bool Recursive>
+class mutex_debug_impl : public mutex_debugging<mutex_debug_impl<Recursive> > {
+private:
+  pthread_mutex_t m;
+public:
+  static constexpr bool recursive = Recursive;
+
+  // Mutex concept is DefaultConstructible
+  mutex_debug_impl(const std::string &n = std::string(), bool bt = false,
+		   CephContext *cct = nullptr) :
+    mutex_debugging<mutex_debug_impl<Recursive> >(n, bt, cct) {
+    if (recursive)
+      m = PTHREAD_RECURSIVE_MUTEX_INITIALIZER_NP;
+    else
+      m = PTHREAD_ERRORCHECK_MUTEX_INITIALIZER_NP;
+  }
+  // Mutex is Destructible
+  ~mutex_debug_impl() = default;
+
+  // Mutex concept is non-Copyable
+  mutex_debug_impl(const mutex_debug_impl&) = delete;
+  mutex_debug_impl& operator =(const mutex_debug_impl&) = delete;
+
+  // Mutex concept is non-Movable
+  mutex_debug_impl(mutex_debug_impl&&) = delete;
+  mutex_debug_impl& operator =(mutex_debug_impl&&) = delete;
+
+  void lock_impl() {
+    int r = pthread_mutex_lock(&m);
+    // Allowed error codes for Mutex concept
+    if (unlikely(r == EPERM ||
+		 r == EDEADLK ||
+		 r == EBUSY)) {
+      throw std::system_error(r, std::generic_category());
+    }
+    assert(r == 0);
+  }
+
+  void unlock_impl() noexcept {
+    int r = pthread_mutex_unlock(&m);
+    assert(r == 0);
+  }
+
+  bool try_lock_impl() {
+    int r = pthread_mutex_trylock(&m);
+    switch (r) {
+    case 0:
+      return true;
+    case EBUSY:
+      return false;
+    default:
+      throw std::system_error(r, std::generic_category());
+    }
+  }
+};
+} // namespace mutex_debug_detail
+typedef mutex_debug_detail::mutex_debug_impl<false> mutex_debug;
+typedef mutex_debug_detail::mutex_debug_impl<true> mutex_recursive_debug;
+} // namespace ceph
+
+#endif
diff --git a/src/common/obj_bencher.cc b/src/common/obj_bencher.cc
index 525a542..a6dc3e8 100644
--- a/src/common/obj_bencher.cc
+++ b/src/common/obj_bencher.cc
@@ -102,12 +102,12 @@ void *ObjBencher::status_printer(void *_bencher) {
           << setw(10) << "finished"
           << setw(10) << "avg MB/s"
           << setw(10) << "cur MB/s"
-          << setw(10) << "last lat"
-          << setw(10) << "avg lat" << std::endl;
+          << setw(12) << "last lat(s)"
+          << setw(12) << "avg lat(s)" << std::endl;
     }
     if (cycleSinceChange)
       bandwidth = (double)(data.finished - previous_writes)
-        * (data.object_size)
+        * (data.op_size)
         / (1024*1024)
         / cycleSinceChange;
     else
@@ -140,21 +140,22 @@ void *ObjBencher::status_printer(void *_bencher) {
     if (formatter)
       formatter->open_object_section("data");
 
-    double avg_bandwidth = (double) (data.object_size) * (data.finished)
+    double avg_bandwidth = (double) (data.op_size) * (data.finished)
       / (double)(cur_time - data.start_time) / (1024*1024);
     if (previous_writes != data.finished) {
       previous_writes = data.finished;
       cycleSinceChange = 0;
       if (!formatter) {
-        bencher->out(cout, cur_time) << setfill(' ')
+        bencher->out(cout, cur_time)
+	  << setfill(' ')
           << setw(5) << i
-          << setw(8) << data.in_flight
-          << setw(10) << data.started
-          << setw(10) << data.finished
-          << setw(10) << avg_bandwidth
-          << setw(10) << bandwidth
-          << setw(10) << (double)data.cur_latency
-          << setw(10) << data.avg_latency << std::endl;
+	  << ' ' << setw(7) << data.in_flight
+          << ' ' << setw(9) << data.started
+          << ' ' << setw(9) << data.finished
+          << ' ' << setw(9) << avg_bandwidth
+          << ' ' << setw(9) << bandwidth
+          << ' ' << setw(11) << (double)data.cur_latency
+          << ' ' << setw(11) << data.avg_latency << std::endl;
       } else {
         formatter->dump_format("sec", "%d", i);
         formatter->dump_format("cur_ops", "%d", data.in_flight);
@@ -168,15 +169,16 @@ void *ObjBencher::status_printer(void *_bencher) {
     }
     else {
       if (!formatter) {
-        bencher->out(cout, cur_time) << setfill(' ')
+        bencher->out(cout, cur_time)
+	  << setfill(' ')
           << setw(5) << i
-          << setw(8) << data.in_flight
-          << setw(10) << data.started
-          << setw(10) << data.finished
-          << setw(10) << avg_bandwidth
-          << setw(10) << '0'
-          << setw(10) << '-'
-          << setw(10) << data.avg_latency << std::endl;
+	  << ' ' << setw(7) << data.in_flight
+          << ' ' << setw(9) << data.started
+          << ' ' << setw(9) << data.finished
+          << ' ' << setw(9) << avg_bandwidth
+	  << ' ' << setw(9) << '0'
+          << ' ' << setw(11) << '-'
+          << ' '<< setw(11) << data.avg_latency << std::endl;
       } else {
         formatter->dump_format("sec", "%d", i);
         formatter->dump_format("cur_ops", "%d", data.in_flight);
@@ -204,9 +206,11 @@ void *ObjBencher::status_printer(void *_bencher) {
 
 int ObjBencher::aio_bench(
   int operation, int secondsToRun,
-  int concurrentios, size_t object_size, bool cleanup, const std::string& run_name, bool no_verify) {
+  int concurrentios, size_t op_size, size_t object_size,
+  unsigned max_objects,
+  bool cleanup, const std::string& run_name, bool no_verify) {
 
-  if (concurrentios <= 0) 
+  if (concurrentios <= 0)
     return -EINVAL;
 
   int num_objects = 0;
@@ -218,18 +222,23 @@ int ObjBencher::aio_bench(
 
   //get data from previous write run, if available
   if (operation != OP_WRITE) {
-    r = fetch_bench_metadata(run_name_meta, &object_size, &num_objects, &prevPid);
+    size_t prev_op_size, prev_object_size;
+    r = fetch_bench_metadata(run_name_meta, &prev_op_size, &prev_object_size,
+			     &num_objects, &prevPid);
     if (r < 0) {
       if (r == -ENOENT)
         cerr << "Must write data before running a read benchmark!" << std::endl;
       return r;
     }
+    object_size = prev_object_size;   
+    op_size = prev_op_size;           
   }
 
-  char* contentsChars = new char[object_size];
+  char* contentsChars = new char[op_size];
   lock.Lock();
   data.done = false;
   data.object_size = object_size;
+  data.op_size = op_size;
   data.in_flight = 0;
   data.started = 0;
   data.finished = 0;
@@ -240,13 +249,13 @@ int ObjBencher::aio_bench(
   lock.Unlock();
 
   //fill in contentsChars deterministically so we can check returns
-  sanitize_object_contents(&data, data.object_size);
+  sanitize_object_contents(&data, data.op_size);
 
   if (formatter)
     formatter->open_object_section("bench");
 
   if (OP_WRITE == operation) {
-    r = write_bench(secondsToRun, concurrentios, run_name_meta);
+    r = write_bench(secondsToRun, concurrentios, run_name_meta, max_objects);
     if (r != 0) goto out;
   }
   else if (OP_SEQ_READ == operation) {
@@ -259,7 +268,8 @@ int ObjBencher::aio_bench(
   }
 
   if (OP_WRITE == operation && cleanup) {
-    r = fetch_bench_metadata(run_name_meta, &object_size, &num_objects, &prevPid);
+    r = fetch_bench_metadata(run_name_meta, &op_size, &object_size,
+			     &num_objects, &prevPid);
     if (r < 0) {
       if (r == -ENOENT)
         cerr << "Should never happen: bench metadata missing for current run!" << std::endl;
@@ -285,7 +295,7 @@ int ObjBencher::aio_bench(
 }
 
 struct lock_cond {
-  lock_cond(Mutex *_lock) : lock(_lock) {}
+  explicit lock_cond(Mutex *_lock) : lock(_lock) {}
   Mutex *lock;
   Cond cond;
 };
@@ -322,11 +332,14 @@ static T vec_stddev(vector<T>& v)
   return sqrt(stddev);
 }
 
-int ObjBencher::fetch_bench_metadata(const std::string& metadata_file, size_t* object_size, int* num_objects, int* prevPid) {
+int ObjBencher::fetch_bench_metadata(const std::string& metadata_file,
+				     size_t *op_size, size_t* object_size,
+				     int* num_objects, int* prevPid) {
   int r = 0;
   bufferlist object_data;
 
-  r = sync_read(metadata_file, object_data, sizeof(int) * 2 + sizeof(size_t));
+  r = sync_read(metadata_file, object_data,
+		sizeof(int) * 2 + sizeof(size_t) * 2);
   if (r <= 0) {
     // treat an empty file as a file that does not exist
     if (r == 0) {
@@ -338,24 +351,34 @@ int ObjBencher::fetch_bench_metadata(const std::string& metadata_file, size_t* o
   ::decode(*object_size, p);
   ::decode(*num_objects, p);
   ::decode(*prevPid, p);
+  if (!p.end()) {
+    ::decode(*op_size, p);
+  } else {
+    *op_size = *object_size;
+  }
 
   return 0;
 }
 
 int ObjBencher::write_bench(int secondsToRun,
-			    int concurrentios, const string& run_name_meta) {
+			    int concurrentios, const string& run_name_meta,
+			    unsigned max_objects) {
   if (concurrentios <= 0) 
     return -EINVAL;
   
   if (!formatter) {
     out(cout) << "Maintaining " << concurrentios << " concurrent writes of "
-           << data.object_size << " bytes for up to "
-           << secondsToRun << " seconds"
-           << std::endl;
+	      << data.op_size << " bytes to objects of size "
+	      << data.object_size << " for up to "
+	      << secondsToRun << " seconds or "
+	      << max_objects << " objects"
+	      << std::endl;
   } else {
     formatter->dump_format("concurrent_ios", "%d", concurrentios);
     formatter->dump_format("object_size", "%d", data.object_size);
+    formatter->dump_format("op_size", "%d", data.op_size);
     formatter->dump_format("seconds_to_run", "%d", secondsToRun);
+    formatter->dump_format("max_objects", "%d", max_objects);
   }
   bufferlist* newContents = 0;
 
@@ -377,14 +400,18 @@ int ObjBencher::write_bench(int secondsToRun,
   utime_t runtime;
   utime_t timePassed;
 
+  unsigned writes_per_object = 1;
+  if (data.op_size)
+    writes_per_object = data.object_size / data.op_size;
+
   r = completions_init(concurrentios);
 
   //set up writes so I can start them together
   for (int i = 0; i<concurrentios; ++i) {
-    name[i] = generate_object_name(i);
+    name[i] = generate_object_name(i / writes_per_object);
     contents[i] = new bufferlist();
-    snprintf(data.object_contents, data.object_size, "I'm the %16dth object!", i);
-    contents[i]->append(data.object_contents, data.object_size);
+    snprintf(data.object_contents, data.op_size, "I'm the %16dth op!", i);
+    contents[i]->append(data.object_contents, data.op_size);
   }
 
   pthread_t print_thread;
@@ -400,7 +427,8 @@ int ObjBencher::write_bench(int secondsToRun,
     r = create_completion(i, _aio_cb, (void *)&lc);
     if (r < 0)
       goto ERR;
-    r = aio_write(name[i], i, *contents[i], data.object_size);
+    r = aio_write(name[i], i, *contents[i], data.op_size,
+		  data.op_size * (i % writes_per_object));
     if (r < 0) { //naughty, doesn't clean up heap
       goto ERR;
     }
@@ -412,6 +440,7 @@ int ObjBencher::write_bench(int secondsToRun,
 
   //keep on adding new writes as old ones complete until we've passed minimum time
   int slot;
+  int num_objects;
 
   //don't need locking for reads because other thread doesn't write
 
@@ -419,7 +448,7 @@ int ObjBencher::write_bench(int secondsToRun,
   stopTime = data.start_time + runtime;
   slot = 0;
   lock.Lock();
-  while(ceph_clock_now(cct) < stopTime) {
+  while (!secondsToRun || ceph_clock_now(cct) < stopTime) {
     bool found = false;
     while (1) {
       int old_slot = slot;
@@ -439,9 +468,9 @@ int ObjBencher::write_bench(int secondsToRun,
     }
     lock.Unlock();
     //create new contents and name on the heap, and fill them
-    newName = generate_object_name(data.started);
+    newName = generate_object_name(data.started / writes_per_object);
     newContents = contents[slot];
-    snprintf(newContents->c_str(), data.object_size, "I'm the %16dth object!", data.started);
+    snprintf(newContents->c_str(), data.op_size, "I'm the %16dth op!", data.started);
     // we wrote to buffer, going around internal crc cache, so invalidate it now.
     newContents->invalidate_crc();
 
@@ -469,7 +498,8 @@ int ObjBencher::write_bench(int secondsToRun,
     r = create_completion(slot, _aio_cb, &lc);
     if (r < 0)
       goto ERR;
-    r = aio_write(newName, slot, *newContents, data.object_size);
+    r = aio_write(newName, slot, *newContents, data.op_size,
+		  data.op_size * (data.started % writes_per_object));
     if (r < 0) {//naughty; doesn't clean up heap space.
       goto ERR;
     }
@@ -477,6 +507,10 @@ int ObjBencher::write_bench(int secondsToRun,
     lock.Lock();
     ++data.started;
     ++data.in_flight;
+    if (max_objects &&
+	data.started > (int)((data.object_size * max_objects + data.op_size - 1) /
+			     data.op_size))
+      break;
   }
   lock.Unlock();
 
@@ -511,13 +545,14 @@ int ObjBencher::write_bench(int secondsToRun,
   pthread_join(print_thread, NULL);
 
   double bandwidth;
-  bandwidth = ((double)data.finished)*((double)data.object_size)/(double)timePassed;
+  bandwidth = ((double)data.finished)*((double)data.op_size)/(double)timePassed;
   bandwidth = bandwidth/(1024*1024); // we want it in MB/sec
 
   if (!formatter) {
     out(cout) << "Total time run:         " << timePassed << std::endl
        << "Total writes made:      " << data.finished << std::endl
-       << "Write size:             " << data.object_size << std::endl
+       << "Write size:             " << data.op_size << std::endl
+       << "Object size:            " << data.object_size << std::endl      
        << "Bandwidth (MB/sec):     " << setprecision(3) << bandwidth << std::endl
        << "Stddev Bandwidth:       " << vec_stddev(data.history.bandwidth) << std::endl
        << "Max bandwidth (MB/sec): " << data.idata.max_bandwidth << std::endl
@@ -526,14 +561,15 @@ int ObjBencher::write_bench(int secondsToRun,
        << "Stddev IOPS:            " << vec_stddev(data.history.iops) << std::endl
        << "Max IOPS:               " << data.idata.max_iops << std::endl
        << "Min IOPS:               " << data.idata.min_iops << std::endl
-       << "Average Latency:        " << data.avg_latency << std::endl
-       << "Stddev Latency:         " << vec_stddev(data.history.latency) << std::endl
-       << "Max latency:            " << data.max_latency << std::endl
-       << "Min latency:            " << data.min_latency << std::endl;
+       << "Average Latency(s):     " << data.avg_latency << std::endl
+       << "Stddev Latency(s):      " << vec_stddev(data.history.latency) << std::endl
+       << "Max latency(s):         " << data.max_latency << std::endl
+       << "Min latency(s):         " << data.min_latency << std::endl;
   } else {
     formatter->dump_format("total_time_run", "%f", (double)timePassed);
     formatter->dump_format("total_writes_made", "%d", data.finished);
-    formatter->dump_format("write_size", "%d", data.object_size);
+    formatter->dump_format("write_size", "%d", data.op_size);
+    formatter->dump_format("object_size", "%d", data.object_size);
     formatter->dump_format("bandwidth", "%f", bandwidth);
     formatter->dump_format("stddev_bandwidth", "%f", vec_stddev(data.history.bandwidth));
     formatter->dump_format("max_bandwidth", "%f", data.idata.max_bandwidth);
@@ -549,8 +585,10 @@ int ObjBencher::write_bench(int secondsToRun,
   }
   //write object size/number data for read benchmarks
   ::encode(data.object_size, b_write);
-  ::encode(data.finished, b_write);
+  num_objects = (data.finished + writes_per_object - 1) / writes_per_object;
+  ::encode(num_objects, b_write);
   ::encode(getpid(), b_write);
+  ::encode(data.op_size, b_write);
 
   // persist meta-data for further cleanup or read
   sync_write(run_name_meta, b_write, sizeof(int)*3);
@@ -591,16 +629,20 @@ int ObjBencher::seq_read_bench(int seconds_to_run, int num_objects, int concurre
   double total_latency = 0;
   int r = 0;
   utime_t runtime;
-  sanitize_object_contents(&data, data.object_size); //clean it up once; subsequent
+  sanitize_object_contents(&data, data.op_size); //clean it up once; subsequent
   //changes will be safe because string length should remain the same
 
+  unsigned writes_per_object = 1;
+  if (data.op_size)
+    writes_per_object = data.object_size / data.op_size;
+
   r = completions_init(concurrentios);
   if (r < 0)
     return r;
 
   //set up initial reads
   for (int i = 0; i < concurrentios; ++i) {
-    name[i] = generate_object_name(i, pid);
+    name[i] = generate_object_name(i / writes_per_object, pid);
     contents[i] = new bufferlist();
   }
 
@@ -619,7 +661,8 @@ int ObjBencher::seq_read_bench(int seconds_to_run, int num_objects, int concurre
     index[i] = i;
     start_times[i] = ceph_clock_now(cct);
     create_completion(i, _aio_cb, (void *)&lc);
-    r = aio_read(name[i], i, contents[i], data.object_size);
+    r = aio_read(name[i], i, contents[i], data.op_size,
+		 data.op_size * (i % writes_per_object));
     if (r < 0) { //naughty, doesn't clean up heap -- oh, or handle the print thread!
       cerr << "r = " << r << std::endl;
       goto ERR;
@@ -635,8 +678,8 @@ int ObjBencher::seq_read_bench(int seconds_to_run, int num_objects, int concurre
   bufferlist *cur_contents;
 
   slot = 0;
-  while (seconds_to_run && (ceph_clock_now(cct) < finish_time) &&
-      num_objects > data.started) {
+  while ((!seconds_to_run || ceph_clock_now(cct) < finish_time) &&
+	 num_objects > data.started) {
     lock.Lock();
     int old_slot = slot;
     bool found = false;
@@ -667,15 +710,15 @@ int ObjBencher::seq_read_bench(int seconds_to_run, int num_objects, int concurre
     cur_contents->invalidate_crc();
   
     if (!no_verify) {
-      snprintf(data.object_contents, data.object_size, "I'm the %16dth object!", current_index);
-      if ( (cur_contents->length() != data.object_size) || 
-           (memcmp(data.object_contents, cur_contents->c_str(), data.object_size) != 0) ) {
+      snprintf(data.object_contents, data.op_size, "I'm the %16dth op!", current_index);
+      if ( (cur_contents->length() != data.op_size) || 
+           (memcmp(data.object_contents, cur_contents->c_str(), data.op_size) != 0) ) {
         cerr << name[slot] << " is not correct!" << std::endl;
         ++errors;
       }
     }
 
-    newName = generate_object_name(data.started, pid);
+    newName = generate_object_name(data.started / writes_per_object, pid);
     index[slot] = data.started;
     lock.Unlock();
     completion_wait(slot);
@@ -698,7 +741,8 @@ int ObjBencher::seq_read_bench(int seconds_to_run, int num_objects, int concurre
     //start new read and check data if requested
     start_times[slot] = ceph_clock_now(cct);
     create_completion(slot, _aio_cb, (void *)&lc);
-    r = aio_read(newName, slot, contents[slot], data.object_size);
+    r = aio_read(newName, slot, contents[slot], data.op_size,
+		 data.op_size * (data.started % writes_per_object));
     if (r < 0) {
       goto ERR;
     }
@@ -729,10 +773,10 @@ int ObjBencher::seq_read_bench(int seconds_to_run, int num_objects, int concurre
     --data.in_flight;
     release_completion(slot);
     if (!no_verify) {
-      snprintf(data.object_contents, data.object_size, "I'm the %16dth object!", index[slot]);
+      snprintf(data.object_contents, data.op_size, "I'm the %16dth op!", index[slot]);
       lock.Unlock();
-      if ((contents[slot]->length() != data.object_size) || 
-         (memcmp(data.object_contents, contents[slot]->c_str(), data.object_size) != 0)) {
+      if ((contents[slot]->length() != data.op_size) || 
+         (memcmp(data.object_contents, contents[slot]->c_str(), data.op_size) != 0)) {
         cerr << name[slot] << " is not correct!" << std::endl;
         ++errors;
       }
@@ -750,25 +794,27 @@ int ObjBencher::seq_read_bench(int seconds_to_run, int num_objects, int concurre
   pthread_join(print_thread, NULL);
 
   double bandwidth;
-  bandwidth = ((double)data.finished)*((double)data.object_size)/(double)runtime;
+  bandwidth = ((double)data.finished)*((double)data.op_size)/(double)runtime;
   bandwidth = bandwidth/(1024*1024); // we want it in MB/sec
 
   if (!formatter) {
     out(cout) << "Total time run:       " << runtime << std::endl
        << "Total reads made:     " << data.finished << std::endl
-       << "Read size:            " << data.object_size << std::endl
+       << "Read size:            " << data.op_size << std::endl
+       << "Object size:          " << data.object_size << std::endl
        << "Bandwidth (MB/sec):   " << setprecision(3) << bandwidth << std::endl
        << "Average IOPS          " << (int)(data.finished/runtime) << std::endl
        << "Stddev IOPS:          " << vec_stddev(data.history.iops) << std::endl
        << "Max IOPS:             " << data.idata.max_iops << std::endl
        << "Min IOPS:             " << data.idata.min_iops << std::endl
-       << "Average Latency:      " << data.avg_latency << std::endl
-       << "Max latency:          " << data.max_latency << std::endl
-       << "Min latency:          " << data.min_latency << std::endl;
+       << "Average Latency(s):   " << data.avg_latency << std::endl
+       << "Max latency(s):       " << data.max_latency << std::endl
+       << "Min latency(s):       " << data.min_latency << std::endl;
   } else {
     formatter->dump_format("total_time_run", "%f", (double)runtime);
     formatter->dump_format("total_reads_made", "%d", data.finished);
-    formatter->dump_format("read_size", "%d", data.object_size);
+    formatter->dump_format("read_size", "%d", data.op_size);
+    formatter->dump_format("object_size", "%d", data.object_size);
     formatter->dump_format("bandwidth", "%f", bandwidth);
     formatter->dump_format("average_iops", "%d", (int)(data.finished/runtime));
     formatter->dump_format("stddev_iops", "%d", vec_stddev(data.history.iops));
@@ -810,9 +856,13 @@ int ObjBencher::rand_read_bench(int seconds_to_run, int num_objects, int concurr
   double total_latency = 0;
   int r = 0;
   utime_t runtime;
-  sanitize_object_contents(&data, data.object_size); //clean it up once; subsequent
+  sanitize_object_contents(&data, data.op_size); //clean it up once; subsequent
   //changes will be safe because string length should remain the same
 
+  unsigned writes_per_object = 1;
+  if (data.op_size)
+    writes_per_object = data.object_size / data.op_size;
+
   srand (time(NULL));
 
   r = completions_init(concurrentios);
@@ -821,7 +871,7 @@ int ObjBencher::rand_read_bench(int seconds_to_run, int num_objects, int concurr
 
   //set up initial reads
   for (int i = 0; i < concurrentios; ++i) {
-    name[i] = generate_object_name(i, pid);
+    name[i] = generate_object_name(i / writes_per_object, pid);
     contents[i] = new bufferlist();
   }
 
@@ -840,7 +890,8 @@ int ObjBencher::rand_read_bench(int seconds_to_run, int num_objects, int concurr
     index[i] = i;
     start_times[i] = ceph_clock_now(g_ceph_context);
     create_completion(i, _aio_cb, (void *)&lc);
-    r = aio_read(name[i], i, contents[i], data.object_size);
+    r = aio_read(name[i], i, contents[i], data.op_size,
+		 data.op_size * (i % writes_per_object));
     if (r < 0) { //naughty, doesn't clean up heap -- oh, or handle the print thread!
       cerr << "r = " << r << std::endl;
       goto ERR;
@@ -857,7 +908,7 @@ int ObjBencher::rand_read_bench(int seconds_to_run, int num_objects, int concurr
   int rand_id;
 
   slot = 0;
-  while (seconds_to_run && (ceph_clock_now(g_ceph_context) < finish_time)) {
+  while ((!seconds_to_run || ceph_clock_now(g_ceph_context) < finish_time)) {
     lock.Lock();
     int old_slot = slot;
     bool found = false;
@@ -903,16 +954,16 @@ int ObjBencher::rand_read_bench(int seconds_to_run, int num_objects, int concurr
     lock.Unlock();
     
     if (!no_verify) {
-      snprintf(data.object_contents, data.object_size, "I'm the %16dth object!", current_index);
-      if ((cur_contents->length() != data.object_size) || 
-          (memcmp(data.object_contents, cur_contents->c_str(), data.object_size) != 0)) {
+      snprintf(data.object_contents, data.op_size, "I'm the %16dth op!", current_index);
+      if ((cur_contents->length() != data.op_size) || 
+          (memcmp(data.object_contents, cur_contents->c_str(), data.op_size) != 0)) {
         cerr << name[slot] << " is not correct!" << std::endl;
         ++errors;
       }
     } 
 
     rand_id = rand() % num_objects;
-    newName = generate_object_name(rand_id, pid);
+    newName = generate_object_name(rand_id / writes_per_object, pid);
     index[slot] = rand_id;
     release_completion(slot);
 
@@ -922,7 +973,8 @@ int ObjBencher::rand_read_bench(int seconds_to_run, int num_objects, int concurr
     //start new read and check data if requested
     start_times[slot] = ceph_clock_now(g_ceph_context);
     create_completion(slot, _aio_cb, (void *)&lc);
-    r = aio_read(newName, slot, contents[slot], data.object_size);
+    r = aio_read(newName, slot, contents[slot], data.op_size,
+		 data.op_size * (rand_id % writes_per_object));
     if (r < 0) {
       goto ERR;
     }
@@ -954,10 +1006,10 @@ int ObjBencher::rand_read_bench(int seconds_to_run, int num_objects, int concurr
     --data.in_flight;
     release_completion(slot);
     if (!no_verify) {
-      snprintf(data.object_contents, data.object_size, "I'm the %16dth object!", index[slot]);
+      snprintf(data.object_contents, data.op_size, "I'm the %16dth op!", index[slot]);
       lock.Unlock();
-      if ((contents[slot]->length() != data.object_size) || 
-          (memcmp(data.object_contents, contents[slot]->c_str(), data.object_size) != 0)) {
+      if ((contents[slot]->length() != data.op_size) || 
+          (memcmp(data.object_contents, contents[slot]->c_str(), data.op_size) != 0)) {
         cerr << name[slot] << " is not correct!" << std::endl;
         ++errors;
       }
@@ -975,25 +1027,27 @@ int ObjBencher::rand_read_bench(int seconds_to_run, int num_objects, int concurr
   pthread_join(print_thread, NULL);
 
   double bandwidth;
-  bandwidth = ((double)data.finished)*((double)data.object_size)/(double)runtime;
+  bandwidth = ((double)data.finished)*((double)data.op_size)/(double)runtime;
   bandwidth = bandwidth/(1024*1024); // we want it in MB/sec
 
   if (!formatter) {
     out(cout) << "Total time run:       " << runtime << std::endl
        << "Total reads made:     " << data.finished << std::endl
-       << "Read size:            " << data.object_size << std::endl
+       << "Read size:            " << data.op_size << std::endl
+       << "Object size:          " << data.object_size << std::endl
        << "Bandwidth (MB/sec):   " << setprecision(3) << bandwidth << std::endl
        << "Average IOPS:         " << (int)(data.finished/runtime) << std::endl
        << "Stddev IOPS:          " << vec_stddev(data.history.iops) << std::endl
        << "Max IOPS:             " << data.idata.max_iops << std::endl
        << "Min IOPS:             " << data.idata.min_iops << std::endl
-       << "Average Latency:      " << data.avg_latency << std::endl
-       << "Max latency:          " << data.max_latency << std::endl
-       << "Min latency:          " << data.min_latency << std::endl;
+       << "Average Latency(s):   " << data.avg_latency << std::endl
+       << "Max latency(s):       " << data.max_latency << std::endl
+       << "Min latency(s):       " << data.min_latency << std::endl;
   } else {
     formatter->dump_format("total_time_run", "%f", (double)runtime);
     formatter->dump_format("total_reads_made", "%d", data.finished);
-    formatter->dump_format("read_size", "%d", data.object_size);
+    formatter->dump_format("read_size", "%d", data.op_size);
+    formatter->dump_format("object_size", "%d", data.object_size);
     formatter->dump_format("bandwidth", "%f", bandwidth);
     formatter->dump_format("average_iops", "%d", (int)(data.finished/runtime));
     formatter->dump_format("stddev_iops", "%d", vec_stddev(data.history.iops));
@@ -1017,14 +1071,14 @@ int ObjBencher::rand_read_bench(int seconds_to_run, int num_objects, int concurr
 
 int ObjBencher::clean_up(const std::string& prefix, int concurrentios, const std::string& run_name) {
   int r = 0;
-  size_t object_size;
+  size_t op_size, object_size;
   int num_objects;
   int prevPid;
 
   // default meta object if user does not specify one
   const std::string run_name_meta = (run_name.empty() ? BENCH_LASTRUN_METADATA : run_name);
 
-  r = fetch_bench_metadata(run_name_meta, &object_size, &num_objects, &prevPid);
+  r = fetch_bench_metadata(run_name_meta, &op_size, &object_size, &num_objects, &prevPid);
   if (r < 0) {
     // if the metadata file is not found we should try to do a linear search on the prefix
     if (r == -ENOENT && prefix != "") {
diff --git a/src/common/obj_bencher.h b/src/common/obj_bencher.h
index 1c96815..e39e7df 100644
--- a/src/common/obj_bencher.h
+++ b/src/common/obj_bencher.h
@@ -37,6 +37,7 @@ struct bench_history {
 struct bench_data {
   bool done; //is the benchmark is done
   size_t object_size; //the size of the objects
+  size_t op_size;     // the size of the read/write ops
   // same as object_size for write tests
   int in_flight; //number of reads/writes being waited on
   int started;
@@ -71,9 +72,10 @@ protected:
 
   struct bench_data data;
 
-  int fetch_bench_metadata(const std::string& metadata_file, size_t* object_size, int* num_objects, int* prevPid);
+  int fetch_bench_metadata(const std::string& metadata_file, size_t* op_size,
+			   size_t* object_size, int* num_objects, int* prevPid);
 
-  int write_bench(int secondsToRun, int concurrentios, const string& run_name_meta);
+  int write_bench(int secondsToRun, int concurrentios, const string& run_name_meta, unsigned max_objects);
   int seq_read_bench(int secondsToRun, int num_objects, int concurrentios, int writePid, bool no_verify=false);
   int rand_read_bench(int secondsToRun, int num_objects, int concurrentios, int writePid, bool no_verify=false);
 
@@ -90,8 +92,8 @@ protected:
   virtual int completion_wait(int slot) = 0;
   virtual int completion_ret(int slot) = 0;
 
-  virtual int aio_read(const std::string& oid, int slot, bufferlist *pbl, size_t len) = 0;
-  virtual int aio_write(const std::string& oid, int slot, bufferlist& bl, size_t len) = 0;
+  virtual int aio_read(const std::string& oid, int slot, bufferlist *pbl, size_t len, size_t offset) = 0;
+  virtual int aio_write(const std::string& oid, int slot, bufferlist& bl, size_t len, size_t offset) = 0;
   virtual int aio_remove(const std::string& oid, int slot) = 0;
   virtual int sync_read(const std::string& oid, bufferlist& bl, size_t len) = 0;
   virtual int sync_write(const std::string& oid, bufferlist& bl, size_t len) = 0;
@@ -103,11 +105,12 @@ protected:
   ostream& out(ostream& os);
   ostream& out(ostream& os, utime_t& t);
 public:
-  ObjBencher(CephContext *cct_) : show_time(false), cct(cct_), lock("ObjBencher::lock") {}
+  explicit ObjBencher(CephContext *cct_) : show_time(false), cct(cct_), lock("ObjBencher::lock") {}
   virtual ~ObjBencher() {}
   int aio_bench(
     int operation, int secondsToRun,
-    int concurrentios, size_t op_size, bool cleanup, const std::string& run_name, bool no_verify=false);
+    int concurrentios, size_t op_size, size_t object_size, unsigned max_objects,
+    bool cleanup, const std::string& run_name, bool no_verify=false);
   int clean_up(const std::string& prefix, int concurrentios, const std::string& run_name);
 
   void set_show_time(bool dt) {
diff --git a/src/common/pick_address.cc b/src/common/pick_address.cc
index f685362..a47a1d9 100644
--- a/src/common/pick_address.cc
+++ b/src/common/pick_address.cc
@@ -51,7 +51,7 @@ static const struct sockaddr *find_ip_in_subnet_list(CephContext *cct,
 // observe this change
 struct Observer : public md_config_obs_t {
   const char *keys[2];
-  Observer(const char *c) {
+  explicit Observer(const char *c) {
     keys[0] = c;
     keys[1] = NULL;
   }
diff --git a/src/common/shunique_lock.h b/src/common/shunique_lock.h
new file mode 100644
index 0000000..b7ad08c
--- /dev/null
+++ b/src/common/shunique_lock.h
@@ -0,0 +1,395 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_COMMON_SHUNIQUE_LOCK_H
+#define CEPH_COMMON_SHUNIQUE_LOCK_H
+
+#include <mutex>
+#include <system_error>
+#include <boost/thread/shared_mutex.hpp>
+
+namespace ceph {
+// This is a 'lock' class in the style of shared_lock and
+// unique_lock. Like shared_mutex it implements both Lockable and
+// SharedLockable.
+
+// My rationale is thus: one of the advantages of unique_lock is that
+// I can pass a thread of execution's control of a lock around as a
+// parameter. So that methods further down the call stack can unlock
+// it, do something, relock it, and have the lock state be known by
+// the caller afterward, explicitly. The shared_lock class offers a
+// similar advantage to shared_lock, but each class is one or the
+// other. In Objecter we have calls that in most cases need /a/ lock
+// on the shared mutex, and whether it's shared or exclusive doesn't
+// matter. In some circumstances they may drop the shared lock and
+// reacquire an exclusive one. This could be handled by passing both a
+// shared and unique lock down the call stack. This is vexacious and
+// shameful.
+
+// Wanting to avoid heaping shame and vexation upon myself, I threw
+// this class together.
+
+// This class makes no attempt to support atomic upgrade or
+// downgrade. I don't want either. Matt has convinced me that if you
+// think you want them you've usually made a mistake somewhere. It is
+// exactly and only a reification of the state held on a shared mutex.
+
+/// Acquire unique ownership of the mutex.
+struct acquire_unique_t { };
+
+/// Acquire shared ownership of the mutex.
+struct acquire_shared_t { };
+
+constexpr acquire_unique_t acquire_unique { };
+constexpr acquire_shared_t acquire_shared { };
+
+template<typename Mutex>
+class shunique_lock {
+public:
+  typedef Mutex mutex_type;
+  typedef std::unique_lock<Mutex> unique_lock_type;
+  typedef boost::shared_lock<Mutex> shared_lock_type;
+
+  shunique_lock() noexcept : m(nullptr), o(ownership::none) { }
+
+  // We do not provide a default locking/try_locking constructor that
+  // takes only the mutex, since it is not clear whether to take it
+  // shared or unique. We explicitly require the use of lock_deferred
+  // to prevent Nasty Surprises.
+
+  shunique_lock(mutex_type& m, std::defer_lock_t) noexcept
+    : m(&m), o(ownership::none) { }
+
+  shunique_lock(mutex_type& m, acquire_unique_t)
+    : m(&m), o(ownership::none) {
+    lock();
+  }
+
+  shunique_lock(mutex_type& m, acquire_shared_t)
+    : m(&m), o(ownership::none) {
+    lock_shared();
+  }
+
+  template<typename AcquireType>
+  shunique_lock(mutex_type& m, AcquireType at, std::try_to_lock_t)
+    : m(&m), o(ownership::none) {
+    try_lock(at);
+  }
+
+  shunique_lock(mutex_type& m, acquire_unique_t, std::adopt_lock_t)
+    : m(&m), o(ownership::unique) {
+    // You'd better actually have a lock, or I will find you and I
+    // will hunt you down.
+  }
+
+  shunique_lock(mutex_type& m, acquire_shared_t, std::adopt_lock_t)
+    : m(&m), o(ownership::shared) {
+  }
+
+  template<typename AcquireType, typename Clock, typename Duration>
+  shunique_lock(mutex_type& m, AcquireType at,
+		const std::chrono::time_point<Clock, Duration>& t)
+    : m(&m), o(ownership::none) {
+    try_lock_until(at, t);
+  }
+
+  template<typename AcquireType, typename Rep, typename Period>
+  shunique_lock(mutex_type& m, AcquireType at,
+		const std::chrono::duration<Rep, Period>& dur)
+    : m(&m), o(ownership::none) {
+    try_lock_for(at, dur);
+  }
+
+  ~shunique_lock() {
+    switch (o) {
+    case ownership::none:
+      return;
+      break;
+    case ownership::unique:
+      m->unlock();
+      break;
+    case ownership::shared:
+      m->unlock_shared();
+      break;
+    }
+  }
+
+  shunique_lock(shunique_lock const&) = delete;
+  shunique_lock& operator=(shunique_lock const&) = delete;
+
+  shunique_lock(shunique_lock&& l) noexcept : shunique_lock() {
+    swap(l);
+  }
+
+  shunique_lock(unique_lock_type&& l) noexcept {
+    if (l.owns_lock())
+      o = ownership::unique;
+    else
+      o = ownership::none;
+    m = l.release();
+  }
+
+  shunique_lock(shared_lock_type&& l) noexcept {
+    if (l.owns_lock())
+      o = ownership::shared;
+    else
+      o = ownership::none;
+    m = l.release();
+  }
+
+  shunique_lock& operator=(shunique_lock&& l) noexcept {
+    shunique_lock(std::move(l)).swap(*this);
+    return *this;
+  }
+
+  shunique_lock& operator=(unique_lock_type&& l) noexcept {
+    shunique_lock(std::move(l)).swap(*this);
+    return *this;
+  }
+
+  shunique_lock& operator=(shared_lock_type&& l) noexcept {
+    shunique_lock(std::move(l)).swap(*this);
+    return *this;
+  }
+
+  void lock() {
+    lockable();
+    m->lock();
+    o = ownership::unique;
+  }
+
+  void lock_shared() {
+    lockable();
+    m->lock_shared();
+    o = ownership::shared;
+  }
+
+  void lock(ceph::acquire_unique_t) {
+    lock();
+  }
+
+  void lock(ceph::acquire_shared_t) {
+    lock_shared();
+  }
+
+  bool try_lock() {
+    lockable();
+    if (m->try_lock()) {
+      o = ownership::unique;
+      return true;
+    }
+    return false;
+  }
+
+  bool try_lock_shared() {
+    lockable();
+    if (m->try_lock_shared()) {
+      o = ownership::shared;
+      return true;
+    }
+    return false;
+  }
+
+  bool try_lock(ceph::acquire_unique_t) {
+    return try_lock();
+  }
+
+  bool try_lock(ceph::acquire_shared_t) {
+    return try_lock_shared();
+  }
+
+  template<typename Rep, typename Period>
+  bool try_lock_for(const std::chrono::duration<Rep, Period>& dur) {
+    lockable();
+    if (m->try_lock_for(dur)) {
+      o = ownership::unique;
+      return true;
+    }
+    return false;
+  }
+
+  template<typename Rep, typename Period>
+  bool try_lock_shared_for(const std::chrono::duration<Rep, Period>& dur) {
+    lockable();
+    if (m->try_lock_shared_for(dur)) {
+      o = ownership::shared;
+      return true;
+    }
+    return false;
+  }
+
+  template<typename Rep, typename Period>
+  bool try_lock_for(ceph::acquire_unique_t,
+		    const std::chrono::duration<Rep, Period>& dur) {
+    return try_lock_for(dur);
+  }
+
+  template<typename Rep, typename Period>
+  bool try_lock_for(ceph::acquire_shared_t,
+		    const std::chrono::duration<Rep, Period>& dur) {
+    return try_lock_shared_for(dur);
+  }
+
+  template<typename Clock, typename Duration>
+  bool try_lock_until(const std::chrono::time_point<Clock, Duration>& time) {
+    lockable();
+    if (m->try_lock_until(time)) {
+      o = ownership::unique;
+      return true;
+    }
+    return false;
+  }
+
+  template<typename Clock, typename Duration>
+  bool try_lock_shared_until(const std::chrono::time_point<Clock,
+			     Duration>& time) {
+    lockable();
+    if (m->try_lock_shared_until(time)) {
+      o = ownership::shared;
+      return true;
+    }
+    return false;
+  }
+
+  template<typename Clock, typename Duration>
+  bool try_lock_until(ceph::acquire_unique_t,
+		      const std::chrono::time_point<Clock, Duration>& time) {
+    return try_lock_until(time);
+  }
+
+  template<typename Clock, typename Duration>
+  bool try_lock_until(ceph::acquire_shared_t,
+		      const std::chrono::time_point<Clock, Duration>& time) {
+    return try_lock_shared_until(time);
+  }
+
+  // Only have a single unlock method. Otherwise we'd be building an
+  // Acme lock class suitable only for ravenous coyotes desparate to
+  // devour a road runner. It would be bad. It would be disgusting. It
+  // would be infelicitous as heck. It would leave our developers in a
+  // state of seeming safety unaware of the yawning chasm of failure
+  // that had opened beneath their feet that would soon transition
+  // into a sickening realization of the error they made and a brief
+  // moment of blinking self pity before their program hurled itself
+  // into undefined behaviour and plummeted up the stack with core
+  // dumps trailing behind it.
+
+  void unlock() {
+    switch (o) {
+    case ownership::none:
+      throw std::system_error((int)std::errc::resource_deadlock_would_occur,
+			      std::generic_category());
+      break;
+
+    case ownership::unique:
+      m->unlock();
+      break;
+
+    case ownership::shared:
+      m->unlock_shared();
+      break;
+    }
+    o = ownership::none;
+  }
+
+  // Setters
+
+  void swap(shunique_lock& u) noexcept {
+    std::swap(m, u.m);
+    std::swap(o, u.o);
+  }
+
+  mutex_type* release() noexcept {
+    o = ownership::none;
+    mutex_type* tm = m;
+    m = nullptr;
+    return tm;
+  }
+
+  // Ideally I'd rather make a move constructor for std::unique_lock
+  // that took a shunique_lock, but obviously I can't.
+  unique_lock_type release_to_unique() {
+    if (o == ownership::unique) {
+      o = ownership::none;
+      unique_lock_type tu(*m, std::adopt_lock);
+      m = nullptr;
+      return tu;
+    } else if (o == ownership::none) {
+      unique_lock_type tu(*m, std::defer_lock);
+      m = nullptr;
+      return tu;
+    } else if (m == nullptr) {
+      return unique_lock_type();
+    }
+    throw std::system_error((int)std::errc::operation_not_permitted,
+			    std::generic_category());
+    return unique_lock_type();
+  }
+
+  shared_lock_type release_to_shared() {
+    if (o == ownership::shared) {
+      o = ownership::none;
+      shared_lock_type ts(*m, std::adopt_lock);
+      m = nullptr;
+      return ts;
+    } else if (o == ownership::none) {
+      shared_lock_type ts(*m, std::defer_lock);
+      m = nullptr;
+      return ts;
+    } else if (m == nullptr) {
+      return shared_lock_type();
+    }
+    throw std::system_error((int)std::errc::operation_not_permitted,
+			    std::generic_category());
+    return shared_lock_type();
+  }
+
+  // Getters
+
+  // Note that this returns true if the lock UNIQUE, it will return
+  // false for shared
+  bool owns_lock() const noexcept {
+    return o == ownership::unique;
+  }
+
+  bool owns_lock_shared() const noexcept {
+    return o == ownership::shared;
+  }
+
+  // If you want to make sure you have a lock of some sort on the
+  // mutex, just treat as a bool.
+  explicit operator bool() const noexcept {
+    return o != ownership::none;
+  }
+
+  mutex_type* mutex() const noexcept {
+    return m;
+  }
+
+private:
+  void lockable() const {
+    if (m == nullptr)
+      throw std::system_error((int)std::errc::operation_not_permitted,
+			      std::generic_category());
+    if (o != ownership::none)
+      throw std::system_error((int)std::errc::resource_deadlock_would_occur,
+			      std::generic_category());
+  }
+
+  mutex_type*	m;
+  enum struct ownership : uint8_t {
+    none, unique, shared
+      };
+  ownership o;
+};
+} // namespace ceph
+
+namespace std {
+  template<typename Mutex>
+  void swap(ceph::shunique_lock<Mutex> sh1,
+	    ceph::shunique_lock<Mutex> sha) {
+    sh1.swap(sha);
+  }
+} // namespace std
+
+#endif // CEPH_COMMON_SHUNIQUE_LOCK_H
diff --git a/src/common/str_map.cc b/src/common/str_map.cc
index bd68612..4605302 100644
--- a/src/common/str_map.cc
+++ b/src/common/str_map.cc
@@ -51,7 +51,7 @@ int get_json_str_map(
   } catch (json_spirit::Error_position &e) {
     if (fallback_to_plain) {
       // fallback to key=value format
-      get_str_map(str, "\t\n ", str_map);
+      get_str_map(str, str_map, "\t\n ");
     } else {
       return -EINVAL;
     }
@@ -75,8 +75,8 @@ string trim(const string& str) {
 
 int get_str_map(
     const string &str,
-    const char *delims,
-    map<string,string> *str_map)
+    map<string,string> *str_map,
+    const char *delims)
 {
   list<string> pairs;
   get_str_list(str, delims, pairs);
@@ -94,14 +94,6 @@ int get_str_map(
   return 0;
 }
 
-int get_str_map(
-    const string &str,
-    map<string,string> *str_map)
-{
-  const char *delims = ",;\t\n ";
-  return get_str_map(str, delims, str_map);
-}
-
 string get_str_map_value(
     const map<string,string> &str_map,
     const string &key,
diff --git a/src/common/strtol.cc b/src/common/strtol.cc
index ea39ba0..fdab2cf 100644
--- a/src/common/strtol.cc
+++ b/src/common/strtol.cc
@@ -14,10 +14,10 @@
 
 #include "strtol.h"
 
-#include <errno.h>
-#include <limits.h>
+#include <cerrno>
+#include <climits>
+#include <cstdlib>
 #include <sstream>
-#include <stdlib.h>
 
 using std::ostringstream;
 
@@ -129,14 +129,15 @@ float strict_strtof(const char *str, std::string *err)
   return ret;
 }
 
-uint64_t strict_sistrtoll(const char *str, std::string *err)
+template<typename T>
+T strict_si_cast(const char *str, std::string *err)
 {
   std::string s(str);
   if (s.empty()) {
     *err = "strict_sistrtoll: value not specified";
     return 0;
   }
-  const char &u = s.at(s.size()-1); //str[std::strlen(str)-1];
+  const char &u = s.back();
   int m = 0;
   if (u == 'B')
     m = 0;
@@ -155,30 +156,35 @@ uint64_t strict_sistrtoll(const char *str, std::string *err)
   else
     m = -1;
 
-  const char *v = NULL;
   if (m >= 0)
-    s = std::string(str, s.size()-1);
-  v = s.c_str();
-
-  long long r_ll = strict_strtoll(v, 10, err);
+    s.pop_back();
+  else
+    m = 0;
 
-  if (r_ll < 0) {
+  long long ll = strict_strtoll(s.c_str(), 10, err);
+  if (ll < 0 && !std::numeric_limits<T>::is_signed) {
     *err = "strict_sistrtoll: value should not be negative";
     return 0;
   }
+  if (ll < (long long)std::numeric_limits<T>::min() >> m) {
+    *err = "strict_sistrtoll: value seems to be too small";
+    return 0;
+  }
+  if (ll > std::numeric_limits<T>::max() >> m) {
+    *err = "strict_sistrtoll: value seems to be too large";
+    return 0;
 
-  uint64_t r = r_ll;
-  if (err->empty() && m > 0) {
-    if (r > (std::numeric_limits<uint64_t>::max() >> m)) {
-      *err = "strict_sistrtoll: value seems to be too large";
-      return 0;
-    }
-    r <<= m;
   }
-  return r;
+  return (ll << m);
 }
 
-template <>
-uint64_t strict_si_cast(const char *str, std::string *err) {
-  return strict_sistrtoll(str, err);
+template int strict_si_cast<int>(const char *str, std::string *err);
+
+template long long strict_si_cast<long long>(const char *str, std::string *err);
+
+template uint64_t strict_si_cast<uint64_t>(const char *str, std::string *err);
+
+uint64_t strict_sistrtoll(const char *str, std::string *err)
+{
+  return strict_si_cast<uint64_t>(str, err);
 }
diff --git a/src/common/strtol.h b/src/common/strtol.h
index 5575ed7..ed86568 100644
--- a/src/common/strtol.h
+++ b/src/common/strtol.h
@@ -31,21 +31,7 @@ float strict_strtof(const char *str, std::string *err);
 
 uint64_t strict_sistrtoll(const char *str, std::string *err);
 
-template <typename Target>
-Target strict_si_cast(const char *str, std::string *err) {
-  uint64_t ret = strict_sistrtoll(str, err);
-  if (!err->empty())
-    return ret;
-  if (ret > (uint64_t)std::numeric_limits<Target>::max()) {
-    err->append("The option value '");
-    err->append(str);
-    err->append("' seems to be too large");
-    return 0;
-  }
-  return ret;
-}
-
-template <>
-uint64_t strict_si_cast(const char *str, std::string *err);
+template<typename T>
+T strict_si_cast(const char *str, std::string *err);
 
 #endif
diff --git a/src/compressor/AsyncCompressor.h b/src/compressor/AsyncCompressor.h
index cec2e96..5a565ba 100644
--- a/src/compressor/AsyncCompressor.h
+++ b/src/compressor/AsyncCompressor.h
@@ -85,7 +85,7 @@ class AsyncCompressor {
       }
       return item;
     }
-    void _process(Job *item, ThreadPool::TPHandle &handle) {
+    void _process(Job *item, ThreadPool::TPHandle &) override {
       assert(item->status.read() == WORKING);
       bufferlist out;
       int r;
@@ -108,7 +108,7 @@ class AsyncCompressor {
   void _decompress(bufferlist &in, bufferlist &out);
 
  public:
-  AsyncCompressor(CephContext *c);
+  explicit AsyncCompressor(CephContext *c);
   virtual ~AsyncCompressor() {}
 
   int get_cpuid(int id) {
diff --git a/src/compressor/CompressionPlugin.h b/src/compressor/CompressionPlugin.h
index d699d00..be1f6b8 100644
--- a/src/compressor/CompressionPlugin.h
+++ b/src/compressor/CompressionPlugin.h
@@ -28,7 +28,7 @@ namespace ceph {
   public:
     CompressorRef compressor;
 
-    CompressionPlugin(CephContext *cct) : Plugin(cct),
+    explicit CompressionPlugin(CephContext *cct) : Plugin(cct),
                                           compressor(0) 
     {}
     
diff --git a/src/compressor/Compressor.h b/src/compressor/Compressor.h
index c891bbe..6e39cec 100644
--- a/src/compressor/Compressor.h
+++ b/src/compressor/Compressor.h
@@ -25,8 +25,8 @@ typedef shared_ptr<Compressor> CompressorRef;
 class Compressor {
  public:
   virtual ~Compressor() {}
-  virtual int compress(bufferlist &in, bufferlist &out) = 0;
-  virtual int decompress(bufferlist &in, bufferlist &out) = 0;
+  virtual int compress(const bufferlist &in, bufferlist &out) = 0;
+  virtual int decompress(const bufferlist &in, bufferlist &out) = 0;
 
   static CompressorRef create(CephContext *cct, const string &type);
 };
diff --git a/src/compressor/Makefile.am b/src/compressor/Makefile.am
index 47a069f..9deda3c 100644
--- a/src/compressor/Makefile.am
+++ b/src/compressor/Makefile.am
@@ -1,6 +1,7 @@
 compressorlibdir = $(pkglibdir)/compressor
 compressorlib_LTLIBRARIES =  
 
+include compressor/zlib/Makefile.am
 include compressor/snappy/Makefile.am
 
 libcompressor_la_SOURCES = \
diff --git a/src/compressor/snappy/CompressionPluginSnappy.cc b/src/compressor/snappy/CompressionPluginSnappy.cc
index 2030d56..d0780f3 100644
--- a/src/compressor/snappy/CompressionPluginSnappy.cc
+++ b/src/compressor/snappy/CompressionPluginSnappy.cc
@@ -23,7 +23,7 @@ class CompressionPluginSnappy : public CompressionPlugin {
 
 public:
 
-  CompressionPluginSnappy(CephContext* cct) : CompressionPlugin(cct)
+  explicit CompressionPluginSnappy(CephContext* cct) : CompressionPlugin(cct)
   {}
 
   virtual int factory(CompressorRef *cs,
diff --git a/src/compressor/snappy/SnappyCompressor.h b/src/compressor/snappy/SnappyCompressor.h
index 40ec450..ae2ca83 100644
--- a/src/compressor/snappy/SnappyCompressor.h
+++ b/src/compressor/snappy/SnappyCompressor.h
@@ -26,7 +26,7 @@ class BufferlistSource : public snappy::Source {
   size_t left;
 
  public:
-  BufferlistSource(bufferlist &data): pb(data.buffers().begin()), pb_off(0), left(data.length()) {}
+  explicit BufferlistSource(const bufferlist &data): pb(data.buffers().begin()), pb_off(0), left(data.length()) {}
   virtual ~BufferlistSource() {}
   virtual size_t Available() const { return left; }
   virtual const char* Peek(size_t* len) {
@@ -53,7 +53,7 @@ class SnappyCompressor : public Compressor {
  public:
   virtual ~SnappyCompressor() {}
   virtual const char* get_method_name() { return "snappy"; }
-  virtual int compress(bufferlist &src, bufferlist &dst) {
+  virtual int compress(const bufferlist &src, bufferlist &dst) {
     BufferlistSource source(src);
     bufferptr ptr(snappy::MaxCompressedLength(src.length()));
     snappy::UncheckedByteArraySink sink(ptr.c_str());
@@ -61,12 +61,14 @@ class SnappyCompressor : public Compressor {
     dst.append(ptr, 0, sink.CurrentDestination()-ptr.c_str());
     return 0;
   }
-  virtual int decompress(bufferlist &src, bufferlist &dst) {
-    BufferlistSource source(src);
+  virtual int decompress(const bufferlist &src, bufferlist &dst) {
     size_t res_len = 0;
     // Trick, decompress only need first 32bits buffer
-    if (!snappy::GetUncompressedLength(src.get_contiguous(0, 8), 8, &res_len))
+    bufferlist tmp;
+    tmp.substr_of( src, 0, 4 );
+    if (!snappy::GetUncompressedLength(tmp.c_str(), tmp.length(), &res_len))
       return -1;
+    BufferlistSource source(src);
     bufferptr ptr(res_len);
     if (snappy::RawUncompress(&source, ptr.c_str())) {
       dst.append(ptr);
diff --git a/src/compressor/snappy/CompressionPluginSnappy.cc b/src/compressor/zlib/CompressionPluginZlib.cc
similarity index 79%
copy from src/compressor/snappy/CompressionPluginSnappy.cc
copy to src/compressor/zlib/CompressionPluginZlib.cc
index 2030d56..d303d7f 100644
--- a/src/compressor/snappy/CompressionPluginSnappy.cc
+++ b/src/compressor/zlib/CompressionPluginZlib.cc
@@ -16,21 +16,23 @@
 // -----------------------------------------------------------------------------
 #include "ceph_ver.h"
 #include "compressor/CompressionPlugin.h"
-#include "SnappyCompressor.h"
-// -----------------------------------------------------------------------------
+#include "CompressionZlib.h"
+#include "common/debug.h"
 
-class CompressionPluginSnappy : public CompressionPlugin {
+#define dout_subsys ceph_subsys_mon
+// -----------------------------------------------------------------------------
 
+class CompressionPluginZlib : public CompressionPlugin {
 public:
 
-  CompressionPluginSnappy(CephContext* cct) : CompressionPlugin(cct)
+  CompressionPluginZlib(CephContext *cct) : CompressionPlugin(cct)
   {}
 
   virtual int factory(CompressorRef *cs,
                       ostream *ss)
   {
     if (compressor == 0) {
-      SnappyCompressor *interface = new SnappyCompressor();
+      CompressionZlib *interface = new CompressionZlib();
       compressor = CompressorRef(interface);
     }
     *cs = compressor;
@@ -53,5 +55,5 @@ int __ceph_plugin_init(CephContext *cct,
 {
   PluginRegistry *instance = cct->get_plugin_registry();
 
-  return instance->add(type, name, new CompressionPluginSnappy(cct));
+  return instance->add(type, name, new CompressionPluginZlib(cct));
 }
diff --git a/src/compressor/zlib/CompressionZlib.cc b/src/compressor/zlib/CompressionZlib.cc
new file mode 100644
index 0000000..fd03bdc
--- /dev/null
+++ b/src/compressor/zlib/CompressionZlib.cc
@@ -0,0 +1,152 @@
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2015 Mirantis, Inc.
+ *
+ * Author: Alyona Kiseleva <akiselyova at mirantis.com>
+ *
+ *  This library is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU Lesser General Public
+ *  License as published by the Free Software Foundation; either
+ *  version 2.1 of the License, or (at your option) any later version.
+ *
+ */
+
+// -----------------------------------------------------------------------------
+#include "common/debug.h"
+#include "CompressionZlib.h"
+#include "osd/osd_types.h"
+// -----------------------------------------------------------------------------
+
+#include <zlib.h>
+
+// -----------------------------------------------------------------------------
+#define dout_subsys ceph_subsys_compressor
+#undef dout_prefix
+#define dout_prefix _prefix(_dout)
+// -----------------------------------------------------------------------------
+
+// -----------------------------------------------------------------------------
+
+static ostream&
+_prefix(std::ostream* _dout)
+{
+  return *_dout << "CompressionZlib: ";
+}
+// -----------------------------------------------------------------------------
+
+const long unsigned int max_len = 2048;
+
+const char* CompressionZlib::get_method_name()
+{
+	return "zlib";
+}
+
+int CompressionZlib::compress(const bufferlist &in, bufferlist &out)
+{
+  int ret, flush;
+  unsigned have;
+  z_stream strm;
+  unsigned char* c_in;
+  int level = 5;
+
+  /* allocate deflate state */
+  strm.zalloc = Z_NULL;
+  strm.zfree = Z_NULL;
+  strm.opaque = Z_NULL;
+  ret = deflateInit(&strm, level);
+  if (ret != Z_OK) {
+    dout(1) << "Compression init error: init return "
+         << ret << " instead of Z_OK" << dendl;
+    return -1;
+  }
+
+   unsigned char c_out [max_len];
+
+  for (std::list<buffer::ptr>::const_iterator i = in.buffers().begin();
+      i != in.buffers().end();) {
+
+    c_in = (unsigned char*) (*i).c_str();
+    long unsigned int len = (*i).length();
+    ++i;
+
+    strm.avail_in = len;
+    flush = i != in.buffers().end() ? Z_NO_FLUSH : Z_FINISH;
+
+    strm.next_in = c_in;
+
+    do {
+      strm.avail_out = max_len;
+      strm.next_out = c_out;
+      ret = deflate(&strm, flush);    /* no bad return value */
+      if (ret == Z_STREAM_ERROR) {
+         dout(1) << "Compression error: compress return Z_STREAM_ERROR("
+              << ret << ")" << dendl;
+         deflateEnd(&strm);
+         return -1;
+      }
+      have = max_len - strm.avail_out;
+      out.append((char*)c_out, have);
+    } while (strm.avail_out == 0);
+    if (strm.avail_in != 0) {
+      dout(10) << "Compression error: unused input" << dendl;
+      deflateEnd(&strm);
+      return -1;
+    }
+  }
+
+  deflateEnd(&strm);
+  return 0;
+}
+
+int CompressionZlib::decompress(const bufferlist &in, bufferlist &out)
+{
+  int ret;
+  unsigned have;
+  z_stream strm;
+  unsigned char* c_in;
+
+  /* allocate inflate state */
+  strm.zalloc = Z_NULL;
+  strm.zfree = Z_NULL;
+  strm.opaque = Z_NULL;
+  strm.avail_in = 0;
+  strm.next_in = Z_NULL;
+  ret = inflateInit(&strm);
+  if (ret != Z_OK) {
+    dout(1) << "Decompression init error: init return "
+         << ret << " instead of Z_OK" << dendl;
+    return -1;
+  }
+
+  unsigned char c_out[max_len];
+
+  for (std::list<buffer::ptr>::const_iterator i = in.buffers().begin();
+      i != in.buffers().end(); ++i) {
+
+    c_in = (unsigned char*) (*i).c_str();
+    long unsigned int len = (*i).length();
+
+    strm.avail_in = len;
+    strm.next_in = c_in;
+
+    do {
+      strm.avail_out = max_len;
+      strm.next_out = c_out;
+      ret = inflate(&strm, Z_NO_FLUSH);
+      if (ret != Z_OK && ret != Z_STREAM_END) {
+       dout(1) << "Decompression error: decompress return "
+            << ret << dendl;
+       inflateEnd(&strm);
+       return -1;
+      }
+      have = max_len - strm.avail_out;
+      out.append((char*)c_out, have);
+    } while (strm.avail_out == 0);
+
+  }
+
+  /* clean up and return */
+  (void)inflateEnd(&strm);
+  return 0;
+}
diff --git a/src/compressor/zlib/CompressionZlib.h b/src/compressor/zlib/CompressionZlib.h
new file mode 100644
index 0000000..8974107
--- /dev/null
+++ b/src/compressor/zlib/CompressionZlib.h
@@ -0,0 +1,45 @@
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2015 Mirantis, Inc.
+ *
+ * Author: Alyona Kiseleva <akiselyova at mirantis.com>
+ *
+ *  This library is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU Lesser General Public
+ *  License as published by the Free Software Foundation; either
+ *  version 2.1 of the License, or (at your option) any later version.
+ *
+ */
+
+
+#ifndef CEPH_COMPRESSION_ZLIB_H
+#define CEPH_COMPRESSION_ZLIB_H
+
+// -----------------------------------------------------------------------------
+#include "compressor/Compressor.h"
+// -----------------------------------------------------------------------------
+#include <list>
+// -----------------------------------------------------------------------------
+
+class CompressionZlib : public Compressor {
+	const char version = '1';
+public:
+
+  CompressionZlib()
+  {
+  }
+
+  virtual
+  ~CompressionZlib()
+  {
+  }
+
+  virtual int compress(const bufferlist &in, bufferlist &out);
+  virtual int decompress(const bufferlist &in, bufferlist &out);
+  virtual const char* get_method_name();
+
+ };
+
+
+#endif
diff --git a/src/compressor/zlib/Makefile.am b/src/compressor/zlib/Makefile.am
new file mode 100644
index 0000000..250f396
--- /dev/null
+++ b/src/compressor/zlib/Makefile.am
@@ -0,0 +1,21 @@
+# zlib plugin
+noinst_HEADERS += \
+  compressor/zlib/CompressionZlib.h
+
+zlib_sources = \
+  compressor/Compressor.cc \
+  compressor/zlib/CompressionPluginZlib.cc \
+  compressor/zlib/CompressionZlib.cc
+
+compressor/zlib/CompressionPluginZlib.cc: ./ceph_ver.h
+
+libceph_zlib_la_SOURCES = ${zlib_sources}
+libceph_zlib_la_CFLAGS = ${AM_CFLAGS}  
+libceph_zlib_la_CXXFLAGS= ${AM_CXXFLAGS} 
+libceph_zlib_la_LIBADD = $(LIBCRUSH) $(PTHREAD_LIBS) $(EXTRALIBS)
+libceph_zlib_la_LDFLAGS = ${AM_LDFLAGS} -lz -version-info 2:0:0
+if LINUX
+libceph_zlib_la_LDFLAGS += -export-symbols-regex '.*__compressor_.*'
+endif
+
+compressorlib_LTLIBRARIES += libceph_zlib.la
diff --git a/src/crush/CrushTester.cc b/src/crush/CrushTester.cc
index adf147b..da1ed94 100644
--- a/src/crush/CrushTester.cc
+++ b/src/crush/CrushTester.cc
@@ -254,12 +254,6 @@ int CrushTester::random_placement(int ruleno, vector<int>& out, int maxout, vect
       crush.get_max_devices() == 0)
     return -EINVAL;
 
-  // compute each device's proportional weight
-  vector<float> proportional_weights( weight.size() );
-  for (unsigned i = 0; i < weight.size(); i++) {
-    proportional_weights[i] = (float) weight[i] / (float) total_weight;
-  }
-
   // determine the real maximum number of devices to return
   int devices_requested = min(maxout, get_maximum_affected_by_rule(ruleno));
   bool accept_placement = false;
@@ -571,7 +565,6 @@ int CrushTester::test()
 
       // create a structure to hold data for post-processing
       tester_data_set tester_data;
-      vector<int> vector_data_buffer;
       vector<float> vector_data_buffer_f;
 
       // create a map to hold batch-level placement information
diff --git a/src/crush/CrushTreeDumper.h b/src/crush/CrushTreeDumper.h
index abb5fca..41cf1a9 100644
--- a/src/crush/CrushTreeDumper.h
+++ b/src/crush/CrushTreeDumper.h
@@ -63,7 +63,7 @@ namespace CrushTreeDumper {
   template <typename F>
   class Dumper : public list<Item> {
   public:
-    Dumper(const CrushWrapper *crush_) : crush(crush_) {
+    explicit Dumper(const CrushWrapper *crush_) : crush(crush_) {
       crush->find_roots(roots);
       root = roots.begin();
     }
@@ -155,7 +155,7 @@ namespace CrushTreeDumper {
 
   class FormattingDumper : public Dumper<Formatter> {
   public:
-    FormattingDumper(const CrushWrapper *crush) : Dumper<Formatter>(crush) {}
+    explicit FormattingDumper(const CrushWrapper *crush) : Dumper<Formatter>(crush) {}
 
   protected:
     virtual void dump_item(const Item &qi, Formatter *f) {
diff --git a/src/crush/CrushWrapper.cc b/src/crush/CrushWrapper.cc
index 8a2d28b..72739fd 100644
--- a/src/crush/CrushWrapper.cc
+++ b/src/crush/CrushWrapper.cc
@@ -907,8 +907,6 @@ bool CrushWrapper::check_item_present(int id) const
 
 pair<string,string> CrushWrapper::get_immediate_parent(int id, int *_ret)
 {
-  pair <string, string> loc;
-  int ret = -ENOENT;
 
   for (int bidx = 0; bidx < crush->max_buckets; bidx++) {
     crush_bucket *b = crush->buckets[bidx];
@@ -918,15 +916,16 @@ pair<string,string> CrushWrapper::get_immediate_parent(int id, int *_ret)
       if (b->items[i] == id) {
         string parent_id = name_map[b->id];
         string parent_bucket_type = type_map[b->type];
-        loc = make_pair(parent_bucket_type, parent_id);
-        ret = 0;
+        if (_ret)
+          *_ret = 0;
+        return make_pair(parent_bucket_type, parent_id);
       }
   }
 
   if (_ret)
-    *_ret = ret;
+    *_ret = -ENOENT;
 
-  return loc;
+  return pair<string, string>();
 }
 
 int CrushWrapper::get_immediate_parent_id(int id, int *parent)
@@ -1498,7 +1497,7 @@ namespace {
     typedef CrushTreeDumper::Item Item;
     const CrushWrapper *crush;
   public:
-    TreeDumper(const CrushWrapper *crush)
+    explicit TreeDumper(const CrushWrapper *crush)
       : crush(crush) {}
 
     void dump(Formatter *f) {
@@ -1670,7 +1669,7 @@ class CrushTreePlainDumper : public CrushTreeDumper::Dumper<ostream> {
 public:
   typedef CrushTreeDumper::Dumper<ostream> Parent;
 
-  CrushTreePlainDumper(const CrushWrapper *crush)
+  explicit CrushTreePlainDumper(const CrushWrapper *crush)
     : Parent(crush) {}
 
   void dump(ostream *out) {
@@ -1704,7 +1703,7 @@ class CrushTreeFormattingDumper : public CrushTreeDumper::FormattingDumper {
 public:
   typedef CrushTreeDumper::FormattingDumper Parent;
 
-  CrushTreeFormattingDumper(const CrushWrapper *crush)
+  explicit CrushTreeFormattingDumper(const CrushWrapper *crush)
     : Parent(crush) {}
 
   void dump(Formatter *f) {
diff --git a/src/crush/builder.c b/src/crush/builder.c
index 1212e4b..387c8be 100644
--- a/src/crush/builder.c
+++ b/src/crush/builder.c
@@ -176,7 +176,7 @@ int crush_add_bucket(struct crush_map *map,
 int crush_remove_bucket(struct crush_map *map, struct crush_bucket *bucket)
 {
 	int pos = -1 - bucket->id;
-
+       assert(pos < map->max_buckets);
 	map->buckets[pos] = NULL;
 	crush_destroy_bucket(bucket);
 	return 0;
diff --git a/src/erasure-code/ErasureCode.cc b/src/erasure-code/ErasureCode.cc
index 6d83d44..eaacb24 100644
--- a/src/erasure-code/ErasureCode.cc
+++ b/src/erasure-code/ErasureCode.cc
@@ -93,12 +93,12 @@ int ErasureCode::encode_prepare(const bufferlist &raw,
 
     raw.copy((k - padded_chunks) * blocksize, remainder, buf.c_str());
     buf.zero(remainder, blocksize - remainder);
-    encoded[chunk_index(k-padded_chunks)].push_back(buf);
+    encoded[chunk_index(k-padded_chunks)].push_back(std::move(buf));
 
     for (unsigned int i = k - padded_chunks + 1; i < k; i++) {
       bufferptr buf(buffer::create_aligned(blocksize, SIMD_ALIGN));
       buf.zero();
-      encoded[chunk_index(i)].push_back(buf);
+      encoded[chunk_index(i)].push_back(std::move(buf));
     }
   }
   for (unsigned int i = k; i < k + m; i++) {
diff --git a/src/erasure-code/isa/Makefile.am b/src/erasure-code/isa/Makefile.am
index 7b60562..6bbb37c 100644
--- a/src/erasure-code/isa/Makefile.am
+++ b/src/erasure-code/isa/Makefile.am
@@ -60,17 +60,18 @@ isa_sources = \
 
 erasure-code/isa/ErasureCodePluginIsa.cc: ./ceph_ver.h
 
-libec_isa_la_SOURCES = ${isa_sources}
+noinst_LTLIBRARIES += libisa.la
+libisa_la_SOURCES = ${isa_sources}
+libisa_la_CFLAGS = ${AM_CFLAGS} -I $(srcdir)/erasure-code/isa/isa-l/include/
+libisa_la_CXXFLAGS = ${AM_CXXFLAGS}
+libisa_la_CCASFLAGS = ${AM_CCASFLAGS} -I $(srcdir)/erasure-code/isa/isa-l/include/
+libisa_la_LIBTOOLFLAGS = --tag=CC
 
-libec_isa_la_CFLAGS = ${AM_CFLAGS} -I $(srcdir)/erasure-code/isa/isa-l/include/
-libec_isa_la_CXXFLAGS = ${AM_CXXFLAGS} -I $(srcdir)/erasure-code/isa/isa-l/include/
-libec_isa_la_CCASFLAGS = ${AM_CCASFLAGS} -I $(abs_srcdir)/erasure-code/isa/isa-l/include/
-
-libec_isa_la_LIBADD = $(LIBCRUSH) $(PTHREAD_LIBS) $(EXTRALIBS)
+libec_isa_la_SOURCES =
+libec_isa_la_LIBADD = libisa.la $(LIBCRUSH) $(PTHREAD_LIBS) $(EXTRALIBS)
 libec_isa_la_LDFLAGS = ${AM_LDFLAGS} -module -avoid-version -shared
 if LINUX
 libec_isa_la_LDFLAGS += -export-symbols-regex '.*__erasure_code_.*'
 endif
-libec_isa_la_LIBTOOLFLAGS = --tag=CC
 
 erasure_codelib_LTLIBRARIES += libec_isa.la
diff --git a/src/erasure-code/jerasure/ErasureCodeJerasure.h b/src/erasure-code/jerasure/ErasureCodeJerasure.h
index df60dac..67fc360 100644
--- a/src/erasure-code/jerasure/ErasureCodeJerasure.h
+++ b/src/erasure-code/jerasure/ErasureCodeJerasure.h
@@ -36,7 +36,7 @@ public:
   string ruleset_failure_domain;
   bool per_chunk_alignment;
 
-  ErasureCodeJerasure(const char *_technique) :
+  explicit ErasureCodeJerasure(const char *_technique) :
     k(0),
     DEFAULT_K("2"),
     m(0),
@@ -155,7 +155,7 @@ public:
   int **schedule;
   int packetsize;
 
-  ErasureCodeJerasureCauchy(const char *technique) :
+  explicit ErasureCodeJerasureCauchy(const char *technique) :
     ErasureCodeJerasure(technique),
     bitmatrix(0),
     schedule(0)
@@ -208,7 +208,7 @@ public:
   int **schedule;
   int packetsize;
 
-  ErasureCodeJerasureLiberation(const char *technique = "liberation") :
+  explicit ErasureCodeJerasureLiberation(const char *technique = "liberation") :
     ErasureCodeJerasure(technique),
     bitmatrix(0),
     schedule(0)
diff --git a/src/erasure-code/lrc/ErasureCodeLrc.h b/src/erasure-code/lrc/ErasureCodeLrc.h
index ffc7748..e54688a 100644
--- a/src/erasure-code/lrc/ErasureCodeLrc.h
+++ b/src/erasure-code/lrc/ErasureCodeLrc.h
@@ -49,7 +49,7 @@ public:
   static const string DEFAULT_KML;
 
   struct Layer {
-    Layer(string _chunks_map) : chunks_map(_chunks_map) { }
+    explicit Layer(string _chunks_map) : chunks_map(_chunks_map) { }
     ErasureCodeInterfaceRef erasure_code;
     vector<int> data;
     vector<int> coding;
@@ -74,7 +74,7 @@ public:
   };
   vector<Step> ruleset_steps;
 
-  ErasureCodeLrc(const std::string &dir)
+  explicit ErasureCodeLrc(const std::string &dir)
     : directory(dir),
       chunk_count(0), data_chunk_count(0), ruleset_root("default")
   {
diff --git a/src/global/Makefile.am b/src/global/Makefile.am
index 4738b37..51fff4b 100644
--- a/src/global/Makefile.am
+++ b/src/global/Makefile.am
@@ -5,6 +5,9 @@ libglobal_la_SOURCES = \
 	global/signal_handler.cc \
 	common/TrackedOp.cc
 libglobal_la_LIBADD = $(LIBCOMMON)
+if WITH_LTTNG
+libglobal_la_LIBADD += -ldl -llttng-ust
+endif
 noinst_LTLIBRARIES += libglobal.la
 
 noinst_HEADERS += \
diff --git a/src/global/global_init.cc b/src/global/global_init.cc
index c0df0ee..7cbfbd0 100644
--- a/src/global/global_init.cc
+++ b/src/global/global_init.cc
@@ -62,7 +62,8 @@ static const char* c_str_or_null(const std::string &str)
 void global_pre_init(std::vector < const char * > *alt_def_args,
 		     std::vector < const char* >& args,
 		     uint32_t module_type, code_environment_t code_env,
-		     int flags)
+		     int flags,
+		     const char *data_dir_option)
 {
   // You can only call global_init once.
   assert(!g_ceph_context);
@@ -70,7 +71,7 @@ void global_pre_init(std::vector < const char * > *alt_def_args,
   std::string cluster = "ceph";
   CephInitParameters iparams = ceph_argparse_early_args(args, module_type, flags,
 							&cluster, &conf_file_list);
-  CephContext *cct = common_preinit(iparams, code_env, flags);
+  CephContext *cct = common_preinit(iparams, code_env, flags, data_dir_option);
   cct->_conf->cluster = cluster;
   global_init_set_globals(cct);
   md_config_t *conf = cct->_conf;
@@ -115,9 +116,12 @@ void global_pre_init(std::vector < const char * > *alt_def_args,
 
 void global_init(std::vector < const char * > *alt_def_args,
 		 std::vector < const char* >& args,
-		 uint32_t module_type, code_environment_t code_env, int flags)
+		 uint32_t module_type, code_environment_t code_env,
+		 int flags,
+		 const char *data_dir_option)
 {
-  global_pre_init(alt_def_args, args, module_type, code_env, flags);
+  global_pre_init(alt_def_args, args, module_type, code_env, flags,
+		  data_dir_option);
 
   // signal stuff
   int siglist[] = { SIGPIPE, 0 };
@@ -241,6 +245,7 @@ void global_init(std::vector < const char * > *alt_def_args,
     derr << "deliberately leaking some memory" << dendl;
     char *s = new char[1234567];
     (void)s;
+    // cppcheck-suppress memleak
   }
 
   if (code_env == CODE_ENVIRONMENT_DAEMON && !(flags & CINIT_FLAG_NO_DAEMON_ACTIONS))
@@ -252,23 +257,16 @@ void global_print_banner(void)
   output_ceph_version();
 }
 
-static void pidfile_remove_void(void)
-{
-  pidfile_remove();
-}
-
 int global_init_prefork(CephContext *cct)
 {
   if (g_code_env != CODE_ENVIRONMENT_DAEMON)
     return -1;
+
   const md_config_t *conf = cct->_conf;
   if (!conf->daemonize) {
-    if (atexit(pidfile_remove_void)) {
-      derr << "global_init_daemonize: failed to set pidfile_remove function "
-	   << "to run at exit." << dendl;
-    }
 
-    pidfile_write(g_conf);
+    if (pidfile_write(g_conf) < 0)
+      exit(1);
 
     return -1;
   }
@@ -292,7 +290,7 @@ void global_init_daemonize(CephContext *cct)
 	 << cpp_strerror(ret) << dendl;
     exit(1);
   }
-
+ 
   global_init_postfork_start(cct);
   global_init_postfork_finish(cct);
 #else
@@ -305,11 +303,6 @@ void global_init_postfork_start(CephContext *cct)
   // restart log thread
   g_ceph_context->_log->start();
 
-  if (atexit(pidfile_remove_void)) {
-    derr << "global_init_daemonize: failed to set pidfile_remove function "
-	 << "to run at exit." << dendl;
-  }
-
   /* This is the old trick where we make file descriptors 0, 1, and possibly 2
    * point to /dev/null.
    *
@@ -333,7 +326,8 @@ void global_init_postfork_start(CephContext *cct)
     exit(1);
   }
 
-  pidfile_write(g_conf);
+  if (pidfile_write(g_conf) < 0)
+    exit(1);
 }
 
 void global_init_postfork_finish(CephContext *cct)
diff --git a/src/global/global_init.h b/src/global/global_init.h
index 5e934a7..0e27d43 100644
--- a/src/global/global_init.h
+++ b/src/global/global_init.h
@@ -32,14 +32,18 @@ class CephContext;
  */
 void global_init(std::vector < const char * > *alt_def_args,
 		 std::vector < const char* >& args,
-		 uint32_t module_type, code_environment_t code_env, int flags);
+		 uint32_t module_type,
+		 code_environment_t code_env,
+		 int flags,
+		 const char *data_dir_option = 0);
 
 // just the first half; enough to get config parsed but doesn't start up the
 // cct or log.
 void global_pre_init(std::vector < const char * > *alt_def_args,
 		     std::vector < const char* >& args,
 		     uint32_t module_type, code_environment_t code_env,
-		     int flags);
+		     int flags,
+		     const char *data_dir_option = 0);
 
 /*
  * perform all of the steps that global_init_daemonize performs just prior
diff --git a/src/global/pidfile.cc b/src/global/pidfile.cc
index 3b8962a..17fe680 100644
--- a/src/global/pidfile.cc
+++ b/src/global/pidfile.cc
@@ -29,70 +29,204 @@
 
 #include "include/compat.h"
 
+//
+// derr can be used for functions exclusively called from pidfile_write
+//
+// cerr must be used for functions called by pidfile_remove because
+// logging is not functional when it is called. cerr output is lost
+// when the caller is daemonized but it will show if not (-f)
+//
 #define dout_prefix *_dout
 
-static char pid_file[PATH_MAX] = "";
+struct pidfh {
+  int pf_fd;
+  char pf_path[PATH_MAX + 1];
+  dev_t pf_dev;
+  ino_t pf_ino;
 
-int pidfile_write(const md_config_t *conf)
-{
-  int ret, fd;
-
-  if (conf->pid_file.empty()) {
-    return pidfile_remove();
+  pidfh() {
+    reset();
   }
-  snprintf(pid_file, PATH_MAX, "%s", conf->pid_file.c_str());
-
-  fd = TEMP_FAILURE_RETRY(::open(pid_file,
-				 O_CREAT|O_TRUNC|O_WRONLY, 0644));
-  if (fd < 0) {
-    int err = errno;
-    derr << "write_pid_file: failed to open pid file '"
-	 << pid_file << "': " << cpp_strerror(err) << dendl;
-    return err;
+  ~pidfh() {
+    remove();
   }
 
-  char buf[20];
-  int len = snprintf(buf, sizeof(buf), "%d\n", getpid());
-  ret = safe_write(fd, buf, len);
-  if (ret < 0) {
-    derr << "write_pid_file: failed to write to pid file '"
-	 << pid_file << "': " << cpp_strerror(ret) << dendl;
-    VOID_TEMP_FAILURE_RETRY(::close(fd));
-    return ret;
+  bool is_open() {
+    return pf_path[0] != '\0' && pf_fd != -1;
   }
-  if (TEMP_FAILURE_RETRY(::close(fd))) {
-    ret = errno;
-    derr << "SimpleMessenger::write_pid_file: failed to close to pid file '"
-	 << pid_file << "': " << cpp_strerror(ret) << dendl;
-    return -ret;
+  void reset() {
+    pf_fd = -1;
+    memset(pf_path, 0, sizeof(pf_path));
+    pf_dev = 0;
+    pf_ino = 0;
   }
+  int verify();
+  int remove();
+  int open(const md_config_t *conf);
+  int write();
+};
+
+static pidfh *pfh = nullptr;
 
+int pidfh::verify() {
+  // check that the file we opened still is the same
+  if (pf_fd == -1)
+    return -EINVAL;
+  struct stat st;
+  if (stat(pf_path, &st) == -1)
+    return -errno;
+  if (st.st_dev != pf_dev || st.st_ino != pf_ino)
+    return -ESTALE;
   return 0;
 }
 
-int pidfile_remove(void)
+int pidfh::remove()
 {
-  if (!pid_file[0])
+  if (!pf_path[0])
     return 0;
 
-  // only remove it if it has OUR pid in it!
-  int fd = TEMP_FAILURE_RETRY(::open(pid_file, O_RDONLY));
-  if (fd < 0)
+  int ret;
+  if ((ret = verify()) < 0) {
+    if (pf_fd != -1) {
+      ::close(pf_fd);
+      reset();
+    }
+    return ret;
+  }
+
+  // seek to the beginning of the file before reading
+  ret = ::lseek(pf_fd, 0, SEEK_SET);
+  if (ret < 0) {
+    std::cerr << __func__ << " lseek failed "
+	      << cpp_strerror(errno) << std::endl;
     return -errno;
+  }
+
+  // check that the pid file still has our pid in it
   char buf[32];
   memset(buf, 0, sizeof(buf));
-  ssize_t res = safe_read(fd, buf, sizeof(buf));
-  VOID_TEMP_FAILURE_RETRY(::close(fd));
-  if (res < 0)
+  ssize_t res = safe_read(pf_fd, buf, sizeof(buf));
+  ::close(pf_fd);
+  if (res < 0) {
+    std::cerr << __func__ << " safe_read failed "
+	      << cpp_strerror(-res) << std::endl;
     return res;
+  }
+
   int a = atoi(buf);
-  if (a != getpid())
+  if (a != getpid()) {
+    std::cerr << __func__ << " the pid found in the file is "
+	      << a << " which is different from getpid() "
+	      << getpid() << std::endl;
     return -EDOM;
+  }
+  ret = ::unlink(pf_path);
+  if (ret < 0) {
+    std::cerr << __func__ << " unlink " << pf_path << " failed "
+	      << cpp_strerror(errno) << std::endl;
+    return -errno;
+  }
+  reset();
+  return 0;
+}
+
+int pidfh::open(const md_config_t *conf)
+{
+  int len = snprintf(pf_path, sizeof(pf_path),
+		    "%s", conf->pid_file.c_str());
+
+  if (len >= (int)sizeof(pf_path))
+    return -ENAMETOOLONG;
+
+  int fd;
+  fd = ::open(pf_path, O_CREAT|O_RDWR, 0644);
+  if (fd < 0) {
+    int err = errno;
+    derr << __func__ << ": failed to open pid file '"
+	 << pf_path << "': " << cpp_strerror(err) << dendl;
+    reset();
+    return -err;
+  }
+  struct stat st;
+  if (fstat(fd, &st) == -1) {
+    int err = errno;
+    derr << __func__ << ": failed to fstat pid file '"
+	 << pf_path << "': " << cpp_strerror(err) << dendl;
+    ::close(fd);
+    reset();
+    return -err;
+  }
+
+  pf_fd = fd;
+  pf_dev = st.st_dev;
+  pf_ino = st.st_ino;
 
-  res = ::unlink(pid_file);
-  if (res)
+  struct flock l = { F_WRLCK, SEEK_SET, 0, 0, 0 };
+  int r = ::fcntl(pf_fd, F_SETLK, &l);
+  if (r < 0) {
+    derr << __func__ << ": failed to lock pidfile "
+	 << pf_path << " because another process locked it." << dendl;
+    ::close(pf_fd);
+    reset();
+    return -errno;
+  }
+  return 0;
+}
+
+int pidfh::write()
+{
+  if (!is_open())
+    return 0;
+
+  char buf[32];
+  int len = snprintf(buf, sizeof(buf), "%d\n", getpid());
+  if (::ftruncate(pf_fd, 0) < 0) {
+    int err = errno;
+    derr << __func__ << ": failed to ftruncate the pid file '"
+	 << pf_path << "': " << cpp_strerror(err) << dendl;
+    return err;
+  }
+  ssize_t res = safe_write(pf_fd, buf, len);
+  if (res < 0) {
+    derr << __func__ << ": failed to write to pid file '"
+	 << pf_path << "': " << cpp_strerror(-res) << dendl;
     return res;
+  }
+  return 0;
+}
+
+void pidfile_remove()
+{
+  if (pfh != nullptr)
+    delete pfh;
+  pfh = nullptr;
+}
+
+int pidfile_write(const md_config_t *conf)
+{
+  if (conf->pid_file.empty())
+    return 0;
+
+  assert(pfh == nullptr);
+
+  pfh = new pidfh();
+  if (atexit(pidfile_remove)) {
+    derr << __func__ << ": failed to set pidfile_remove function "
+	 << "to run at exit." << dendl;
+    return -EINVAL;
+  }
+
+  int r = pfh->open(conf);
+  if (r != 0) {
+    pidfile_remove();
+    return r;
+  }
+
+  r = pfh->write();
+  if (r != 0) {
+    pidfile_remove();
+    return r;
+  }
 
-  pid_file[0] = '\0';
   return 0;
 }
diff --git a/src/global/pidfile.h b/src/global/pidfile.h
index 6b60a5a..e7e2b0d 100644
--- a/src/global/pidfile.h
+++ b/src/global/pidfile.h
@@ -23,6 +23,6 @@ int pidfile_write(const md_config_t *conf);
 
 // Remove the pid file that was previously written by pidfile_write.
 // This is safe to call in a signal handler context.
-int pidfile_remove(void);
+void pidfile_remove();
 
 #endif
diff --git a/src/include/Makefile.am b/src/include/Makefile.am
index 887ac55..900a4c1 100644
--- a/src/include/Makefile.am
+++ b/src/include/Makefile.am
@@ -52,6 +52,7 @@ noinst_HEADERS += \
 	include/atomic.h \
 	include/bitmapper.h \
 	include/blobhash.h \
+	include/btree_interval_set.h \
 	include/buffer.h \
 	include/buffer_fwd.h \
 	include/byteorder.h \
@@ -63,9 +64,13 @@ noinst_HEADERS += \
 	include/cmp.h \
 	include/color.h \
 	include/compat.h \
+	include/cpp-btree/btree.h \
+	include/cpp-btree/btree_container.h \
+	include/cpp-btree/btree_map.h \
 	include/sock_compat.h \
 	include/crc32c.h \
 	include/encoding.h \
+	include/encoding_btree.h \
 	include/err.h \
 	include/error.h \
 	include/filepath.h \
diff --git a/src/include/interval_set.h b/src/include/btree_interval_set.h
similarity index 69%
copy from src/include/interval_set.h
copy to src/include/btree_interval_set.h
index 3759f77..965f9fd 100644
--- a/src/include/interval_set.h
+++ b/src/include/btree_interval_set.h
@@ -1,4 +1,4 @@
-// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- 
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
 // vim: ts=8 sw=2 smarttab
 /*
  * Ceph - scalable distributed file system
@@ -7,19 +7,18 @@
  *
  * This is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
- * License version 2.1, as published by the Free Software 
+ * License version 2.1, as published by the Free Software
  * Foundation.  See file COPYING.
- * 
+ *
  */
 
 
-#ifndef CEPH_INTERVAL_SET_H
-#define CEPH_INTERVAL_SET_H
+#ifndef CEPH_BTREE_INTERVAL_SET_H
+#define CEPH_BTREE_INTERVAL_SET_H
 
 #include <iterator>
 #include <map>
 #include <ostream>
-using namespace std;
 
 #include "encoding.h"
 
@@ -30,17 +29,22 @@ using namespace std;
 # define MAX(a,b)  ((a)>=(b) ? (a):(b))
 #endif
 
+#include "cpp-btree/btree_map.h"
+#include "assert.h"
+#include "encoding_btree.h"
 
 template<typename T>
-class interval_set {
+class btree_interval_set {
  public:
 
+  typedef btree::btree_map<T,T> map_t;
+
   class const_iterator;
 
   class iterator : public std::iterator <std::forward_iterator_tag, T>
   {
     public:
-        explicit iterator(typename std::map<T,T>::iterator iter)
+        explicit iterator(typename map_t::iterator iter)
           : _iter(iter)
         { }
 
@@ -56,7 +60,7 @@ class interval_set {
         }
 
         // Dereference this iterator to get a pair.
-        pair < T, T > &operator*() {
+        std::pair < T, T > &operator*() {
                 return *_iter;
         }
 
@@ -90,17 +94,17 @@ class interval_set {
                 return prev;
         }
 
-    friend class interval_set<T>::const_iterator;
+    friend class btree_interval_set<T>::const_iterator;
 
     protected:
-        typename map<T,T>::iterator _iter;
-    friend class interval_set<T>;
+        typename map_t::iterator _iter;
+    friend class btree_interval_set<T>;
   };
 
   class const_iterator : public std::iterator <std::forward_iterator_tag, T>
   {
     public:
-        explicit const_iterator(typename std::map<T,T>::const_iterator iter)
+        explicit const_iterator(typename map_t::const_iterator iter)
           : _iter(iter)
         { }
 
@@ -120,7 +124,7 @@ class interval_set {
         }
 
         // Dereference this iterator to get a pair.
-        pair < T, T > operator*() const {
+        std::pair < T, T > operator*() const {
                 return *_iter;
         }
 
@@ -150,44 +154,44 @@ class interval_set {
         }
 
     protected:
-        typename map<T,T>::const_iterator _iter;
+        typename map_t::const_iterator _iter;
   };
 
-  interval_set() : _size(0) {}
+  btree_interval_set() : _size(0) {}
 
   int num_intervals() const
   {
     return m.size();
   }
 
-  typename interval_set<T>::iterator begin() {
-    return typename interval_set<T>::iterator(m.begin());
+  typename btree_interval_set<T>::iterator begin() {
+    return typename btree_interval_set<T>::iterator(m.begin());
   }
 
-  typename interval_set<T>::iterator lower_bound(T start) {
-    return typename interval_set<T>::iterator(find_inc_m(start));
+  typename btree_interval_set<T>::iterator lower_bound(T start) {
+    return typename btree_interval_set<T>::iterator(find_inc_m(start));
   }
 
-  typename interval_set<T>::iterator end() {
-    return typename interval_set<T>::iterator(m.end());
+  typename btree_interval_set<T>::iterator end() {
+    return typename btree_interval_set<T>::iterator(m.end());
   }
 
-  typename interval_set<T>::const_iterator begin() const {
-    return typename interval_set<T>::const_iterator(m.begin());
+  typename btree_interval_set<T>::const_iterator begin() const {
+    return typename btree_interval_set<T>::const_iterator(m.begin());
   }
 
-  typename interval_set<T>::const_iterator lower_bound(T start) const {
-    return typename interval_set<T>::const_iterator(find_inc(start));
+  typename btree_interval_set<T>::const_iterator lower_bound(T start) const {
+    return typename btree_interval_set<T>::const_iterator(find_inc(start));
   }
 
-  typename interval_set<T>::const_iterator end() const {
-    return typename interval_set<T>::const_iterator(m.end());
+  typename btree_interval_set<T>::const_iterator end() const {
+    return typename btree_interval_set<T>::const_iterator(m.end());
   }
 
   // helpers
  private:
-  typename map<T,T>::const_iterator find_inc(T start) const {
-    typename map<T,T>::const_iterator p = m.lower_bound(start);  // p->first >= start
+  typename map_t::const_iterator find_inc(T start) const {
+    typename map_t::const_iterator p = m.lower_bound(start);  // p->first >= start
     if (p != m.begin() &&
         (p == m.end() || p->first > start)) {
       p--;   // might overlap?
@@ -196,9 +200,9 @@ class interval_set {
     }
     return p;
   }
-  
-  typename map<T,T>::iterator find_inc_m(T start) {
-    typename map<T,T>::iterator p = m.lower_bound(start);
+
+  typename map_t::iterator find_inc_m(T start) {
+    typename map_t::iterator p = m.lower_bound(start);
     if (p != m.begin() &&
         (p == m.end() || p->first > start)) {
       p--;   // might overlap?
@@ -207,9 +211,9 @@ class interval_set {
     }
     return p;
   }
-  
-  typename map<T,T>::const_iterator find_adj(T start) const {
-    typename map<T,T>::const_iterator p = m.lower_bound(start);
+
+  typename map_t::const_iterator find_adj(T start) const {
+    typename map_t::const_iterator p = m.lower_bound(start);
     if (p != m.begin() &&
         (p == m.end() || p->first > start)) {
       p--;   // might touch?
@@ -218,9 +222,9 @@ class interval_set {
     }
     return p;
   }
-  
-  typename map<T,T>::iterator find_adj_m(T start) {
-    typename map<T,T>::iterator p = m.lower_bound(start);
+
+  typename map_t::iterator find_adj_m(T start) {
+    typename map_t::iterator p = m.lower_bound(start);
     if (p != m.begin() &&
         (p == m.end() || p->first > start)) {
       p--;   // might touch?
@@ -229,9 +233,9 @@ class interval_set {
     }
     return p;
   }
-  
+
  public:
-  bool operator==(const interval_set& other) const {
+  bool operator==(const btree_interval_set& other) const {
     return _size == other._size && m == other.m;
   }
 
@@ -248,7 +252,7 @@ class interval_set {
   void decode(bufferlist::iterator& bl) {
     ::decode(m, bl);
     _size = 0;
-    for (typename map<T,T>::const_iterator p = m.begin();
+    for (typename map_t::const_iterator p = m.begin();
          p != m.end();
          p++)
       _size += p->second;
@@ -256,7 +260,7 @@ class interval_set {
   void decode_nohead(int n, bufferlist::iterator& bl) {
     ::decode_nohead(n, m, bl);
     _size = 0;
-    for (typename map<T,T>::const_iterator p = m.begin();
+    for (typename map_t::const_iterator p = m.begin();
          p != m.end();
          p++)
       _size += p->second;
@@ -268,7 +272,7 @@ class interval_set {
   }
 
   bool contains(T i, T *pstart=0, T *plen=0) const {
-    typename map<T,T>::const_iterator p = find_inc(i);
+    typename map_t::const_iterator p = find_inc(i);
     if (p == m.end()) return false;
     if (p->first > i) return false;
     if (p->first+p->second <= i) return false;
@@ -280,7 +284,7 @@ class interval_set {
     return true;
   }
   bool contains(T start, T len) const {
-    typename map<T,T>::const_iterator p = find_inc(start);
+    typename map_t::const_iterator p = find_inc(start);
     if (p == m.end()) return false;
     if (p->first > start) return false;
     if (p->first+p->second <= start) return false;
@@ -289,9 +293,9 @@ class interval_set {
     return true;
   }
   bool intersects(T start, T len) const {
-    interval_set a;
+    btree_interval_set a;
     a.insert(start, len);
-    interval_set i;
+    btree_interval_set i;
     i.intersection_of( *this, a );
     if (i.empty()) return false;
     return true;
@@ -303,12 +307,12 @@ class interval_set {
   }
   T range_start() const {
     assert(!empty());
-    typename map<T,T>::const_iterator p = m.begin();
+    typename map_t::const_iterator p = m.begin();
     return p->first;
   }
   T range_end() const {
     assert(!empty());
-    typename map<T,T>::const_iterator p = m.end();
+    typename map_t::const_iterator p = m.end();
     p--;
     return p->first+p->second;
   }
@@ -316,23 +320,23 @@ class interval_set {
   // interval start after p (where p not in set)
   bool starts_after(T i) const {
     assert(!contains(i));
-    typename map<T,T>::const_iterator p = find_inc(i);
+    typename map_t::const_iterator p = find_inc(i);
     if (p == m.end()) return false;
     return true;
   }
   T start_after(T i) const {
     assert(!contains(i));
-    typename map<T,T>::const_iterator p = find_inc(i);
+    typename map_t::const_iterator p = find_inc(i);
     return p->first;
   }
 
   // interval end that contains start
   T end_after(T start) const {
     assert(contains(start));
-    typename map<T,T>::const_iterator p = find_inc(start);
+    typename map_t::const_iterator p = find_inc(start);
     return p->first+p->second;
   }
-  
+
   void insert(T val) {
     insert(val, 1);
   }
@@ -341,7 +345,7 @@ class interval_set {
     //cout << "insert " << start << "~" << len << endl;
     assert(len > 0);
     _size += len;
-    typename map<T,T>::iterator p = find_adj_m(start);
+    typename map_t::iterator p = find_adj_m(start);
     if (p == m.end()) {
       m[start] = len;                  // new interval
       if (pstart)
@@ -350,53 +354,54 @@ class interval_set {
 	*plen = len;
     } else {
       if (p->first < start) {
-        
+
         if (p->first + p->second != start) {
           //cout << "p is " << p->first << "~" << p->second << ", start is " << start << ", len is " << len << endl;
           assert(0);
         }
-        
-        assert(p->first + p->second == start);
+
         p->second += len;               // append to end
-        
-        typename map<T,T>::iterator n = p;
+
+        typename map_t::iterator n = p;
         n++;
-        if (n != m.end() && 
+	if (pstart)
+	  *pstart = p->first;
+        if (n != m.end() &&
             start+len == n->first) {   // combine with next, too!
           p->second += n->second;
+	  if (plen)
+	    *plen = p->second;
           m.erase(n);
-        }
-	if (pstart)
-	  *pstart = p->first;
-	if (plen)
-	  *plen = p->second;
+        } else {
+	  if (plen)
+	    *plen = p->second;
+	}
       } else {
         if (start+len == p->first) {
-          m[start] = len + p->second;  // append to front 
 	  if (pstart)
 	    *pstart = start;
 	  if (plen)
 	    *plen = len + p->second;
+	  T plen = p->second;
           m.erase(p);
+          m[start] = len + plen;  // append to front
         } else {
           assert(p->first > start+len);
-          m[start] = len;              // new interval
 	  if (pstart)
 	    *pstart = start;
 	  if (plen)
 	    *plen = len;
+          m[start] = len;              // new interval
         }
       }
     }
   }
 
-  void swap(interval_set<T>& other) {
+  void swap(btree_interval_set<T>& other) {
     m.swap(other.m);
-    int64_t t = _size;
-    _size = other._size;
-    other._size = t;
-  }    
-  
+    std::swap(_size, other._size);
+  }
+
   void erase(iterator &i) {
     _size -= i.get_len();
     assert(_size >= 0);
@@ -408,7 +413,7 @@ class interval_set {
   }
 
   void erase(T start, T len) {
-    typename map<T,T>::iterator p = find_inc_m(start);
+    typename map_t::iterator p = find_inc_m(start);
 
     _size -= len;
     assert(_size >= 0);
@@ -419,8 +424,8 @@ class interval_set {
     T before = start - p->first;
     assert(p->second >= before+len);
     T after = p->second - before - len;
-    
-    if (before) 
+
+    if (before)
       p->second = before;        // shorten bit before
     else
       m.erase(p);
@@ -429,34 +434,34 @@ class interval_set {
   }
 
 
-  void subtract(const interval_set &a) {
-    for (typename map<T,T>::const_iterator p = a.m.begin();
+  void subtract(const btree_interval_set &a) {
+    for (typename map_t::const_iterator p = a.m.begin();
          p != a.m.end();
          p++)
       erase(p->first, p->second);
   }
 
-  void insert(const interval_set &a) {
-    for (typename map<T,T>::const_iterator p = a.m.begin();
+  void insert(const btree_interval_set &a) {
+    for (typename map_t::const_iterator p = a.m.begin();
          p != a.m.end();
          p++)
       insert(p->first, p->second);
   }
 
 
-  void intersection_of(const interval_set &a, const interval_set &b) {
+  void intersection_of(const btree_interval_set &a, const btree_interval_set &b) {
     assert(&a != this);
     assert(&b != this);
     clear();
 
-    typename map<T,T>::const_iterator pa = a.m.begin();
-    typename map<T,T>::const_iterator pb = b.m.begin();
-    
+    typename map_t::const_iterator pa = a.m.begin();
+    typename map_t::const_iterator pb = b.m.begin();
+
     while (pa != a.m.end() && pb != b.m.end()) {
       // passing?
-      if (pa->first + pa->second <= pb->first) 
+      if (pa->first + pa->second <= pb->first)
         { pa++;  continue; }
-      if (pb->first + pb->second <= pa->first) 
+      if (pb->first + pb->second <= pa->first)
         { pb++;  continue; }
       T start = MAX(pa->first, pb->first);
       T en = MIN(pa->first+pa->second, pb->first+pb->second);
@@ -465,20 +470,20 @@ class interval_set {
       if (pa->first+pa->second > pb->first+pb->second)
         pb++;
       else
-        pa++; 
+        pa++;
     }
   }
-  void intersection_of(const interval_set& b) {
-    interval_set a;
+  void intersection_of(const btree_interval_set& b) {
+    btree_interval_set a;
     swap(a);
     intersection_of(a, b);
   }
 
-  void union_of(const interval_set &a, const interval_set &b) {
+  void union_of(const btree_interval_set &a, const btree_interval_set &b) {
     assert(&a != this);
     assert(&b != this);
     clear();
-    
+
     //cout << "union_of" << endl;
 
     // a
@@ -486,7 +491,7 @@ class interval_set {
     _size = a._size;
 
     // - (a*b)
-    interval_set ab;
+    btree_interval_set ab;
     ab.intersection_of(a, b);
     subtract(ab);
 
@@ -494,28 +499,28 @@ class interval_set {
     insert(b);
     return;
   }
-  void union_of(const interval_set &b) {
-    interval_set a;
-    swap(a);    
+  void union_of(const btree_interval_set &b) {
+    btree_interval_set a;
+    swap(a);
     union_of(a, b);
   }
 
-  bool subset_of(const interval_set &big) const {
-    for (typename map<T,T>::const_iterator i = m.begin();
+  bool subset_of(const btree_interval_set &big) const {
+    for (typename map_t::const_iterator i = m.begin();
          i != m.end();
-         i++) 
+         i++)
       if (!big.contains(i->first, i->second)) return false;
     return true;
-  }  
+  }
 
   /*
    * build a subset of @other, starting at or after @start, and including
    * @len worth of values, skipping holes.  e.g.,
    *  span_of([5~10,20~5], 8, 5) -> [8~2,20~3]
    */
-  void span_of(const interval_set &other, T start, T len) {
+  void span_of(const btree_interval_set &other, T start, T len) {
     clear();
-    typename map<T,T>::const_iterator p = other.find_inc(start);
+    typename map_t::const_iterator p = other.find_inc(start);
     if (p == other.m.end())
       return;
     if (p->first < start) {
@@ -546,15 +551,15 @@ class interval_set {
 private:
   // data
   int64_t _size;
-  map<T,T> m;   // map start -> len
+  map_t m;   // map start -> len
 };
 
 
 template<class T>
-inline ostream& operator<<(ostream& out, const interval_set<T> &s) {
+inline std::ostream& operator<<(std::ostream& out, const btree_interval_set<T> &s) {
   out << "[";
   const char *prequel = "";
-  for (typename interval_set<T>::const_iterator i = s.begin();
+  for (typename btree_interval_set<T>::const_iterator i = s.begin();
        i != s.end();
        ++i)
   {
@@ -566,12 +571,12 @@ inline ostream& operator<<(ostream& out, const interval_set<T> &s) {
 }
 
 template<class T>
-inline void encode(const interval_set<T>& s, bufferlist& bl)
+inline void encode(const btree_interval_set<T>& s, bufferlist& bl)
 {
   s.encode(bl);
 }
 template<class T>
-inline void decode(interval_set<T>& s, bufferlist::iterator& p)
+inline void decode(btree_interval_set<T>& s, bufferlist::iterator& p)
 {
   s.decode(p);
 }
diff --git a/src/include/buffer.h b/src/include/buffer.h
index 5a8b05f..bc988db 100644
--- a/src/include/buffer.h
+++ b/src/include/buffer.h
@@ -168,12 +168,16 @@ namespace buffer CEPH_BUFFER_API {
 
   public:
     ptr() : _raw(0), _off(0), _len(0) {}
+    // cppcheck-suppress noExplicitConstructor
     ptr(raw *r);
+    // cppcheck-suppress noExplicitConstructor
     ptr(unsigned l);
     ptr(const char *d, unsigned l);
     ptr(const ptr& p);
+    ptr(ptr&& p);
     ptr(const ptr& p, unsigned o, unsigned l);
     ptr& operator= (const ptr& p);
+    ptr& operator= (ptr&& p);
     ~ptr() {
       release();
     }
@@ -354,6 +358,7 @@ namespace buffer CEPH_BUFFER_API {
   public:
     // cons/des
     list() : _len(0), _memcopy_count(0), last_p(this) {}
+    // cppcheck-suppress noExplicitConstructor
     list(unsigned prealloc) : _len(0), _memcopy_count(0), last_p(this) {
       append_buffer = buffer::create(prealloc);
       append_buffer.set_length(0);   // unused, so far.
@@ -373,6 +378,16 @@ namespace buffer CEPH_BUFFER_API {
       return *this;
     }
 
+    list& operator= (list&& other) {
+      _buffers = std::move(other._buffers);
+      _len = other._len;
+      _memcopy_count = other._memcopy_count;
+      last_p = begin();
+      append_buffer.swap(other.append_buffer);
+      other.clear();
+      return *this;
+    }
+
     unsigned get_memcopy_count() const {return _memcopy_count; }
     const std::list<ptr>& buffers() const { return _buffers; }
     void swap(list& other);
@@ -414,9 +429,14 @@ namespace buffer CEPH_BUFFER_API {
       _buffers.push_front(bp);
       _len += bp.length();
     }
+    void push_front(ptr&& bp) {
+      if (bp.length() == 0)
+	return;
+      _len += bp.length();
+      _buffers.push_front(std::move(bp));
+    }
     void push_front(raw *r) {
-      ptr bp(r);
-      push_front(bp);
+      push_front(ptr(r));
     }
     void push_back(const ptr& bp) {
       if (bp.length() == 0)
@@ -424,9 +444,14 @@ namespace buffer CEPH_BUFFER_API {
       _buffers.push_back(bp);
       _len += bp.length();
     }
+    void push_back(ptr&& bp) {
+      if (bp.length() == 0)
+	return;
+      _len += bp.length();
+      _buffers.push_back(std::move(bp));
+    }
     void push_back(raw *r) {
-      ptr bp(r);
-      push_back(bp);
+      push_back(ptr(r));
     }
 
     void zero();
@@ -497,6 +522,7 @@ namespace buffer CEPH_BUFFER_API {
       append(s.data(), s.length());
     }
     void append(const ptr& bp);
+    void append(ptr&& bp);
     void append(const ptr& bp, unsigned off, unsigned len);
     void append(const list& bl);
     void append(std::istream& in);
@@ -543,6 +569,7 @@ namespace buffer CEPH_BUFFER_API {
 
   public:
     hash() : crc(0) { }
+    // cppcheck-suppress noExplicitConstructor
     hash(uint32_t init) : crc(init) { }
 
     void update(buffer::list& bl) {
diff --git a/src/include/ceph_features.h b/src/include/ceph_features.h
index adf597a..c15d2af 100755
--- a/src/include/ceph_features.h
+++ b/src/include/ceph_features.h
@@ -72,7 +72,10 @@
 #define CEPH_FEATURE_NEW_OSDOP_ENCODING   (1ULL<<56) /* New, v7 encoding */
 #define CEPH_FEATURE_MON_STATEFUL_SUB (1ULL<<57) /* stateful mon subscription */
 #define CEPH_FEATURE_MON_ROUTE_OSDMAP (1ULL<<57) /* peon sends osdmaps */
+#define CEPH_FEATURE_OSDSUBOP_NO_SNAPCONTEXT (1ULL<<57) /* overlap, drop unused SnapContext in v12 */
 #define CEPH_FEATURE_CRUSH_TUNABLES5	(1ULL<<58) /* chooseleaf stable mode */
+// duplicated since it was introduced at the same time as CEPH_FEATURE_CRUSH_TUNABLES5
+#define CEPH_FEATURE_NEW_OSDOPREPLY_ENCODING   (1ULL<<58) /* New, v7 encoding */
 
 #define CEPH_FEATURE_RESERVED2 (1ULL<<61)  /* slow down, we are almost out... */
 #define CEPH_FEATURE_RESERVED  (1ULL<<62)  /* DO NOT USE THIS ... last bit! */
@@ -136,6 +139,7 @@ static inline unsigned long long ceph_sanitize_features(unsigned long long f) {
 	 CEPH_FEATURE_MDSENC |			\
 	 CEPH_FEATURE_OSDHASHPSPOOL |       \
 	 CEPH_FEATURE_NEW_OSDOP_ENCODING |        \
+         CEPH_FEATURE_NEW_OSDOPREPLY_ENCODING | \
 	 CEPH_FEATURE_MON_SINGLE_PAXOS |    \
 	 CEPH_FEATURE_OSD_SNAPMAPPER |	    \
 	 CEPH_FEATURE_MON_SCRUB	|	    \
diff --git a/src/include/cephfs/libcephfs.h b/src/include/cephfs/libcephfs.h
index ba55c35..cdf8f0c 100644
--- a/src/include/cephfs/libcephfs.h
+++ b/src/include/cephfs/libcephfs.h
@@ -28,9 +28,9 @@
 extern "C" {
 #endif
 
-#define LIBCEPHFS_VER_MAJOR 0
-#define LIBCEPHFS_VER_MINOR 94
-#define LIBCEPHFS_VER_EXTRA 0
+#define LIBCEPHFS_VER_MAJOR 10
+#define LIBCEPHFS_VER_MINOR 0
+#define LIBCEPHFS_VER_EXTRA 2
 
 #define LIBCEPHFS_VERSION(maj, min, extra) ((maj << 16) + (min << 8) + extra)
 #define LIBCEPHFS_VERSION_CODE LIBCEPHFS_VERSION(LIBCEPHFS_VER_MAJOR, LIBCEPHFS_VER_MINOR, LIBCEPHFS_VER_EXTRA)
diff --git a/src/include/cpp-btree/btree.h b/src/include/cpp-btree/btree.h
new file mode 100644
index 0000000..49310a2
--- /dev/null
+++ b/src/include/cpp-btree/btree.h
@@ -0,0 +1,2394 @@
+// Copyright 2013 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// A btree implementation of the STL set and map interfaces. A btree is both
+// smaller and faster than STL set/map. The red-black tree implementation of
+// STL set/map has an overhead of 3 pointers (left, right and parent) plus the
+// node color information for each stored value. So a set<int32> consumes 20
+// bytes for each value stored. This btree implementation stores multiple
+// values on fixed size nodes (usually 256 bytes) and doesn't store child
+// pointers for leaf nodes. The result is that a btree_set<int32> may use much
+// less memory per stored value. For the random insertion benchmark in
+// btree_test.cc, a btree_set<int32> with node-size of 256 uses 4.9 bytes per
+// stored value.
+//
+// The packing of multiple values on to each node of a btree has another effect
+// besides better space utilization: better cache locality due to fewer cache
+// lines being accessed. Better cache locality translates into faster
+// operations.
+//
+// CAVEATS
+//
+// Insertions and deletions on a btree can cause splitting, merging or
+// rebalancing of btree nodes. And even without these operations, insertions
+// and deletions on a btree will move values around within a node. In both
+// cases, the result is that insertions and deletions can invalidate iterators
+// pointing to values other than the one being inserted/deleted. This is
+// notably different from STL set/map which takes care to not invalidate
+// iterators on insert/erase except, of course, for iterators pointing to the
+// value being erased.  A partial workaround when erasing is available:
+// erase() returns an iterator pointing to the item just after the one that was
+// erased (or end() if none exists).  See also safe_btree.
+
+// PERFORMANCE
+//
+//   btree_bench --benchmarks=. 2>&1 | ./benchmarks.awk
+//
+// Run on pmattis-warp.nyc (4 X 2200 MHz CPUs); 2010/03/04-15:23:06
+// Benchmark                 STL(ns) B-Tree(ns) @    <size>
+// --------------------------------------------------------
+// BM_set_int32_insert        1516      608  +59.89%  <256>    [40.0,  5.2]
+// BM_set_int32_lookup        1160      414  +64.31%  <256>    [40.0,  5.2]
+// BM_set_int32_fulllookup     960      410  +57.29%  <256>    [40.0,  4.4]
+// BM_set_int32_delete        1741      528  +69.67%  <256>    [40.0,  5.2]
+// BM_set_int32_queueaddrem   3078     1046  +66.02%  <256>    [40.0,  5.5]
+// BM_set_int32_mixedaddrem   3600     1384  +61.56%  <256>    [40.0,  5.3]
+// BM_set_int32_fifo           227      113  +50.22%  <256>    [40.0,  4.4]
+// BM_set_int32_fwditer        158       26  +83.54%  <256>    [40.0,  5.2]
+// BM_map_int32_insert        1551      636  +58.99%  <256>    [48.0, 10.5]
+// BM_map_int32_lookup        1200      508  +57.67%  <256>    [48.0, 10.5]
+// BM_map_int32_fulllookup     989      487  +50.76%  <256>    [48.0,  8.8]
+// BM_map_int32_delete        1794      628  +64.99%  <256>    [48.0, 10.5]
+// BM_map_int32_queueaddrem   3189     1266  +60.30%  <256>    [48.0, 11.6]
+// BM_map_int32_mixedaddrem   3822     1623  +57.54%  <256>    [48.0, 10.9]
+// BM_map_int32_fifo           151      134  +11.26%  <256>    [48.0,  8.8]
+// BM_map_int32_fwditer        161       32  +80.12%  <256>    [48.0, 10.5]
+// BM_set_int64_insert        1546      636  +58.86%  <256>    [40.0, 10.5]
+// BM_set_int64_lookup        1200      512  +57.33%  <256>    [40.0, 10.5]
+// BM_set_int64_fulllookup     971      487  +49.85%  <256>    [40.0,  8.8]
+// BM_set_int64_delete        1745      616  +64.70%  <256>    [40.0, 10.5]
+// BM_set_int64_queueaddrem   3163     1195  +62.22%  <256>    [40.0, 11.6]
+// BM_set_int64_mixedaddrem   3760     1564  +58.40%  <256>    [40.0, 10.9]
+// BM_set_int64_fifo           146      103  +29.45%  <256>    [40.0,  8.8]
+// BM_set_int64_fwditer        162       31  +80.86%  <256>    [40.0, 10.5]
+// BM_map_int64_insert        1551      720  +53.58%  <256>    [48.0, 20.7]
+// BM_map_int64_lookup        1214      612  +49.59%  <256>    [48.0, 20.7]
+// BM_map_int64_fulllookup     994      592  +40.44%  <256>    [48.0, 17.2]
+// BM_map_int64_delete        1778      764  +57.03%  <256>    [48.0, 20.7]
+// BM_map_int64_queueaddrem   3189     1547  +51.49%  <256>    [48.0, 20.9]
+// BM_map_int64_mixedaddrem   3779     1887  +50.07%  <256>    [48.0, 21.6]
+// BM_map_int64_fifo           147      145   +1.36%  <256>    [48.0, 17.2]
+// BM_map_int64_fwditer        162       41  +74.69%  <256>    [48.0, 20.7]
+// BM_set_string_insert       1989     1966   +1.16%  <256>    [64.0, 44.5]
+// BM_set_string_lookup       1709     1600   +6.38%  <256>    [64.0, 44.5]
+// BM_set_string_fulllookup   1573     1529   +2.80%  <256>    [64.0, 35.4]
+// BM_set_string_delete       2520     1920  +23.81%  <256>    [64.0, 44.5]
+// BM_set_string_queueaddrem  4706     4309   +8.44%  <256>    [64.0, 48.3]
+// BM_set_string_mixedaddrem  5080     4654   +8.39%  <256>    [64.0, 46.7]
+// BM_set_string_fifo          318      512  -61.01%  <256>    [64.0, 35.4]
+// BM_set_string_fwditer       182       93  +48.90%  <256>    [64.0, 44.5]
+// BM_map_string_insert       2600     2227  +14.35%  <256>    [72.0, 55.8]
+// BM_map_string_lookup       2068     1730  +16.34%  <256>    [72.0, 55.8]
+// BM_map_string_fulllookup   1859     1618  +12.96%  <256>    [72.0, 44.0]
+// BM_map_string_delete       3168     2080  +34.34%  <256>    [72.0, 55.8]
+// BM_map_string_queueaddrem  5840     4701  +19.50%  <256>    [72.0, 59.4]
+// BM_map_string_mixedaddrem  6400     5200  +18.75%  <256>    [72.0, 57.8]
+// BM_map_string_fifo          398      596  -49.75%  <256>    [72.0, 44.0]
+// BM_map_string_fwditer       243      113  +53.50%  <256>    [72.0, 55.8]
+
+#ifndef UTIL_BTREE_BTREE_H__
+#define UTIL_BTREE_BTREE_H__
+
+#include <assert.h>
+#include <stddef.h>
+#include <string.h>
+#include <sys/types.h>
+#include <algorithm>
+#include <functional>
+#include <iostream>
+#include <iterator>
+#include <limits>
+#include <type_traits>
+#include <new>
+#include <ostream>
+#include <string>
+#include <utility>
+
+#ifndef NDEBUG
+#define NDEBUG 1
+#endif
+
+namespace btree {
+
+// Inside a btree method, if we just call swap(), it will choose the
+// btree::swap method, which we don't want. And we can't say ::swap
+// because then MSVC won't pickup any std::swap() implementations. We
+// can't just use std::swap() directly because then we don't get the
+// specialization for types outside the std namespace. So the solution
+// is to have a special swap helper function whose name doesn't
+// collide with other swap functions defined by the btree classes.
+template <typename T>
+inline void btree_swap_helper(T &a, T &b) {
+  using std::swap;
+  swap(a, b);
+}
+
+// A template helper used to select A or B based on a condition.
+template<bool cond, typename A, typename B>
+struct if_{
+  typedef A type;
+};
+
+template<typename A, typename B>
+struct if_<false, A, B> {
+  typedef B type;
+};
+
+// Types small_ and big_ are promise that sizeof(small_) < sizeof(big_)
+typedef char small_;
+
+struct big_ {
+  char dummy[2];
+};
+
+// A compile-time assertion.
+template <bool>
+struct CompileAssert {
+};
+
+#define COMPILE_ASSERT(expr, msg) \
+  typedef CompileAssert<(bool(expr))> msg[bool(expr) ? 1 : -1]
+
+// A helper type used to indicate that a key-compare-to functor has been
+// provided. A user can specify a key-compare-to functor by doing:
+//
+//  struct MyStringComparer
+//      : public util::btree::btree_key_compare_to_tag {
+//    int operator()(const string &a, const string &b) const {
+//      return a.compare(b);
+//    }
+//  };
+//
+// Note that the return type is an int and not a bool. There is a
+// COMPILE_ASSERT which enforces this return type.
+struct btree_key_compare_to_tag {
+};
+
+// A helper class that indicates if the Compare parameter is derived from
+// btree_key_compare_to_tag.
+template <typename Compare>
+struct btree_is_key_compare_to
+    : public std::is_convertible<Compare, btree_key_compare_to_tag> {
+};
+
+// A helper class to convert a boolean comparison into a three-way
+// "compare-to" comparison that returns a negative value to indicate
+// less-than, zero to indicate equality and a positive value to
+// indicate greater-than. This helper class is specialized for
+// less<string> and greater<string>. The btree_key_compare_to_adapter
+// class is provided so that btree users automatically get the more
+// efficient compare-to code when using common google string types
+// with common comparison functors.
+template <typename Compare>
+struct btree_key_compare_to_adapter : Compare {
+  btree_key_compare_to_adapter() { }
+  btree_key_compare_to_adapter(const Compare &c) : Compare(c) { }
+  btree_key_compare_to_adapter(const btree_key_compare_to_adapter<Compare> &c)
+      : Compare(c) {
+  }
+};
+
+template <>
+struct btree_key_compare_to_adapter<std::less<std::string> >
+    : public btree_key_compare_to_tag {
+  btree_key_compare_to_adapter() {}
+  btree_key_compare_to_adapter(const std::less<std::string>&) {}
+  btree_key_compare_to_adapter(
+      const btree_key_compare_to_adapter<std::less<std::string> >&) {}
+  int operator()(const std::string &a, const std::string &b) const {
+    return a.compare(b);
+  }
+};
+
+template <>
+struct btree_key_compare_to_adapter<std::greater<std::string> >
+    : public btree_key_compare_to_tag {
+  btree_key_compare_to_adapter() {}
+  btree_key_compare_to_adapter(const std::greater<std::string>&) {}
+  btree_key_compare_to_adapter(
+      const btree_key_compare_to_adapter<std::greater<std::string> >&) {}
+  int operator()(const std::string &a, const std::string &b) const {
+    return b.compare(a);
+  }
+};
+
+// A helper class that allows a compare-to functor to behave like a plain
+// compare functor. This specialization is used when we do not have a
+// compare-to functor.
+template <typename Key, typename Compare, bool HaveCompareTo>
+struct btree_key_comparer {
+  btree_key_comparer() {}
+  btree_key_comparer(Compare c) : comp(c) {}
+  static bool bool_compare(const Compare &comp, const Key &x, const Key &y) {
+    return comp(x, y);
+  }
+  bool operator()(const Key &x, const Key &y) const {
+    return bool_compare(comp, x, y);
+  }
+  Compare comp;
+};
+
+// A specialization of btree_key_comparer when a compare-to functor is
+// present. We need a plain (boolean) comparison in some parts of the btree
+// code, such as insert-with-hint.
+template <typename Key, typename Compare>
+struct btree_key_comparer<Key, Compare, true> {
+  btree_key_comparer() {}
+  btree_key_comparer(Compare c) : comp(c) {}
+  static bool bool_compare(const Compare &comp, const Key &x, const Key &y) {
+    return comp(x, y) < 0;
+  }
+  bool operator()(const Key &x, const Key &y) const {
+    return bool_compare(comp, x, y);
+  }
+  Compare comp;
+};
+
+// A helper function to compare to keys using the specified compare
+// functor. This dispatches to the appropriate btree_key_comparer comparison,
+// depending on whether we have a compare-to functor or not (which depends on
+// whether Compare is derived from btree_key_compare_to_tag).
+template <typename Key, typename Compare>
+static bool btree_compare_keys(
+    const Compare &comp, const Key &x, const Key &y) {
+  typedef btree_key_comparer<Key, Compare,
+      btree_is_key_compare_to<Compare>::value> key_comparer;
+  return key_comparer::bool_compare(comp, x, y);
+}
+
+template <typename Key, typename Compare,
+          typename Alloc, int TargetNodeSize, int ValueSize>
+struct btree_common_params {
+  // If Compare is derived from btree_key_compare_to_tag then use it as the
+  // key_compare type. Otherwise, use btree_key_compare_to_adapter<> which will
+  // fall-back to Compare if we don't have an appropriate specialization.
+  typedef typename if_<
+    btree_is_key_compare_to<Compare>::value,
+    Compare, btree_key_compare_to_adapter<Compare> >::type key_compare;
+  // A type which indicates if we have a key-compare-to functor or a plain old
+  // key-compare functor.
+  typedef btree_is_key_compare_to<key_compare> is_key_compare_to;
+
+  typedef Alloc allocator_type;
+  typedef Key key_type;
+  typedef ssize_t size_type;
+  typedef ptrdiff_t difference_type;
+
+  enum {
+    kTargetNodeSize = TargetNodeSize,
+
+    // Available space for values.  This is largest for leaf nodes,
+    // which has overhead no fewer than two pointers.
+    kNodeValueSpace = TargetNodeSize - 2 * sizeof(void*),
+  };
+
+  // This is an integral type large enough to hold as many
+  // ValueSize-values as will fit a node of TargetNodeSize bytes.
+  typedef typename if_<
+    (kNodeValueSpace / ValueSize) >= 256,
+    uint16_t,
+    uint8_t>::type node_count_type;
+};
+
+// A parameters structure for holding the type parameters for a btree_map.
+template <typename Key, typename Data, typename Compare,
+          typename Alloc, int TargetNodeSize>
+struct btree_map_params
+    : public btree_common_params<Key, Compare, Alloc, TargetNodeSize,
+                                 sizeof(Key) + sizeof(Data)> {
+  typedef Data data_type;
+  typedef Data mapped_type;
+  typedef std::pair<const Key, data_type> value_type;
+  typedef std::pair<Key, data_type> mutable_value_type;
+  typedef value_type* pointer;
+  typedef const value_type* const_pointer;
+  typedef value_type& reference;
+  typedef const value_type& const_reference;
+
+  enum {
+    kValueSize = sizeof(Key) + sizeof(data_type),
+  };
+
+  static const Key& key(const value_type &x) { return x.first; }
+  static const Key& key(const mutable_value_type &x) { return x.first; }
+  static void swap(mutable_value_type *a, mutable_value_type *b) {
+    btree_swap_helper(a->first, b->first);
+    btree_swap_helper(a->second, b->second);
+  }
+};
+
+// A parameters structure for holding the type parameters for a btree_set.
+template <typename Key, typename Compare, typename Alloc, int TargetNodeSize>
+struct btree_set_params
+    : public btree_common_params<Key, Compare, Alloc, TargetNodeSize,
+                                 sizeof(Key)> {
+  typedef std::false_type data_type;
+  typedef std::false_type mapped_type;
+  typedef Key value_type;
+  typedef value_type mutable_value_type;
+  typedef value_type* pointer;
+  typedef const value_type* const_pointer;
+  typedef value_type& reference;
+  typedef const value_type& const_reference;
+
+  enum {
+    kValueSize = sizeof(Key),
+  };
+
+  static const Key& key(const value_type &x) { return x; }
+  static void swap(mutable_value_type *a, mutable_value_type *b) {
+    btree_swap_helper<mutable_value_type>(*a, *b);
+  }
+};
+
+// An adapter class that converts a lower-bound compare into an upper-bound
+// compare.
+template <typename Key, typename Compare>
+struct btree_upper_bound_adapter : public Compare {
+  btree_upper_bound_adapter(Compare c) : Compare(c) {}
+  bool operator()(const Key &a, const Key &b) const {
+    return !static_cast<const Compare&>(*this)(b, a);
+  }
+};
+
+template <typename Key, typename CompareTo>
+struct btree_upper_bound_compare_to_adapter : public CompareTo {
+  btree_upper_bound_compare_to_adapter(CompareTo c) : CompareTo(c) {}
+  int operator()(const Key &a, const Key &b) const {
+    return static_cast<const CompareTo&>(*this)(b, a);
+  }
+};
+
+// Dispatch helper class for using linear search with plain compare.
+template <typename K, typename N, typename Compare>
+struct btree_linear_search_plain_compare {
+  static int lower_bound(const K &k, const N &n, Compare comp)  {
+    return n.linear_search_plain_compare(k, 0, n.count(), comp);
+  }
+  static int upper_bound(const K &k, const N &n, Compare comp)  {
+    typedef btree_upper_bound_adapter<K, Compare> upper_compare;
+    return n.linear_search_plain_compare(k, 0, n.count(), upper_compare(comp));
+  }
+};
+
+// Dispatch helper class for using linear search with compare-to
+template <typename K, typename N, typename CompareTo>
+struct btree_linear_search_compare_to {
+  static int lower_bound(const K &k, const N &n, CompareTo comp)  {
+    return n.linear_search_compare_to(k, 0, n.count(), comp);
+  }
+  static int upper_bound(const K &k, const N &n, CompareTo comp)  {
+    typedef btree_upper_bound_adapter<K,
+        btree_key_comparer<K, CompareTo, true> > upper_compare;
+    return n.linear_search_plain_compare(k, 0, n.count(), upper_compare(comp));
+  }
+};
+
+// Dispatch helper class for using binary search with plain compare.
+template <typename K, typename N, typename Compare>
+struct btree_binary_search_plain_compare {
+  static int lower_bound(const K &k, const N &n, Compare comp)  {
+    return n.binary_search_plain_compare(k, 0, n.count(), comp);
+  }
+  static int upper_bound(const K &k, const N &n, Compare comp)  {
+    typedef btree_upper_bound_adapter<K, Compare> upper_compare;
+    return n.binary_search_plain_compare(k, 0, n.count(), upper_compare(comp));
+  }
+};
+
+// Dispatch helper class for using binary search with compare-to.
+template <typename K, typename N, typename CompareTo>
+struct btree_binary_search_compare_to {
+  static int lower_bound(const K &k, const N &n, CompareTo comp)  {
+    return n.binary_search_compare_to(k, 0, n.count(), CompareTo());
+  }
+  static int upper_bound(const K &k, const N &n, CompareTo comp)  {
+    typedef btree_upper_bound_adapter<K,
+        btree_key_comparer<K, CompareTo, true> > upper_compare;
+    return n.linear_search_plain_compare(k, 0, n.count(), upper_compare(comp));
+  }
+};
+
+// A node in the btree holding. The same node type is used for both internal
+// and leaf nodes in the btree, though the nodes are allocated in such a way
+// that the children array is only valid in internal nodes.
+template <typename Params>
+class btree_node {
+ public:
+  typedef Params params_type;
+  typedef btree_node<Params> self_type;
+  typedef typename Params::key_type key_type;
+  typedef typename Params::data_type data_type;
+  typedef typename Params::value_type value_type;
+  typedef typename Params::mutable_value_type mutable_value_type;
+  typedef typename Params::pointer pointer;
+  typedef typename Params::const_pointer const_pointer;
+  typedef typename Params::reference reference;
+  typedef typename Params::const_reference const_reference;
+  typedef typename Params::key_compare key_compare;
+  typedef typename Params::size_type size_type;
+  typedef typename Params::difference_type difference_type;
+  // Typedefs for the various types of node searches.
+  typedef btree_linear_search_plain_compare<
+    key_type, self_type, key_compare> linear_search_plain_compare_type;
+  typedef btree_linear_search_compare_to<
+    key_type, self_type, key_compare> linear_search_compare_to_type;
+  typedef btree_binary_search_plain_compare<
+    key_type, self_type, key_compare> binary_search_plain_compare_type;
+  typedef btree_binary_search_compare_to<
+    key_type, self_type, key_compare> binary_search_compare_to_type;
+  // If we have a valid key-compare-to type, use linear_search_compare_to,
+  // otherwise use linear_search_plain_compare.
+  typedef typename if_<
+    Params::is_key_compare_to::value,
+    linear_search_compare_to_type,
+    linear_search_plain_compare_type>::type linear_search_type;
+  // If we have a valid key-compare-to type, use binary_search_compare_to,
+  // otherwise use binary_search_plain_compare.
+  typedef typename if_<
+    Params::is_key_compare_to::value,
+    binary_search_compare_to_type,
+    binary_search_plain_compare_type>::type binary_search_type;
+  // If the key is an integral or floating point type, use linear search which
+  // is faster than binary search for such types. Might be wise to also
+  // configure linear search based on node-size.
+  typedef typename if_<
+    std::is_integral<key_type>::value ||
+    std::is_floating_point<key_type>::value,
+    linear_search_type, binary_search_type>::type search_type;
+
+  struct base_fields {
+    typedef typename Params::node_count_type field_type;
+
+    // A boolean indicating whether the node is a leaf or not.
+    bool leaf;
+    // The position of the node in the node's parent.
+    field_type position;
+    // The maximum number of values the node can hold.
+    field_type max_count;
+    // The count of the number of values in the node.
+    field_type count;
+    // A pointer to the node's parent.
+    btree_node *parent;
+  };
+
+  enum {
+    kValueSize = params_type::kValueSize,
+    kTargetNodeSize = params_type::kTargetNodeSize,
+
+    // Compute how many values we can fit onto a leaf node.
+    kNodeTargetValues = (kTargetNodeSize - sizeof(base_fields)) / kValueSize,
+    // We need a minimum of 3 values per internal node in order to perform
+    // splitting (1 value for the two nodes involved in the split and 1 value
+    // propagated to the parent as the delimiter for the split).
+    kNodeValues = kNodeTargetValues >= 3 ? kNodeTargetValues : 3,
+
+    kExactMatch = 1 << 30,
+    kMatchMask = kExactMatch - 1,
+  };
+
+  struct leaf_fields : public base_fields {
+    // The array of values. Only the first count of these values have been
+    // constructed and are valid.
+    mutable_value_type values[kNodeValues];
+  };
+
+  struct internal_fields : public leaf_fields {
+    // The array of child pointers. The keys in children_[i] are all less than
+    // key(i). The keys in children_[i + 1] are all greater than key(i). There
+    // are always count + 1 children.
+    btree_node *children[kNodeValues + 1];
+  };
+
+  struct root_fields : public internal_fields {
+    btree_node *rightmost;
+    size_type size;
+  };
+
+ public:
+  // Getter/setter for whether this is a leaf node or not. This value doesn't
+  // change after the node is created.
+  bool leaf() const { return fields_.leaf; }
+
+  // Getter for the position of this node in its parent.
+  int position() const { return fields_.position; }
+  void set_position(int v) { fields_.position = v; }
+
+  // Getter/setter for the number of values stored in this node.
+  int count() const { return fields_.count; }
+  void set_count(int v) { fields_.count = v; }
+  int max_count() const { return fields_.max_count; }
+
+  // Getter for the parent of this node.
+  btree_node* parent() const { return fields_.parent; }
+  // Getter for whether the node is the root of the tree. The parent of the
+  // root of the tree is the leftmost node in the tree which is guaranteed to
+  // be a leaf.
+  bool is_root() const { return parent()->leaf(); }
+  void make_root() {
+    assert(parent()->is_root());
+    fields_.parent = fields_.parent->parent();
+  }
+
+  // Getter for the rightmost root node field. Only valid on the root node.
+  btree_node* rightmost() const { return fields_.rightmost; }
+  btree_node** mutable_rightmost() { return &fields_.rightmost; }
+
+  // Getter for the size root node field. Only valid on the root node.
+  size_type size() const { return fields_.size; }
+  size_type* mutable_size() { return &fields_.size; }
+
+  // Getters for the key/value at position i in the node.
+  const key_type& key(int i) const {
+    return params_type::key(fields_.values[i]);
+  }
+  reference value(int i) {
+    return reinterpret_cast<reference>(fields_.values[i]);
+  }
+  const_reference value(int i) const {
+    return reinterpret_cast<const_reference>(fields_.values[i]);
+  }
+  mutable_value_type* mutable_value(int i) {
+    return &fields_.values[i];
+  }
+
+  // Swap value i in this node with value j in node x.
+  void value_swap(int i, btree_node *x, int j) {
+    params_type::swap(mutable_value(i), x->mutable_value(j));
+  }
+
+  // Getters/setter for the child at position i in the node.
+  btree_node* child(int i) const { return fields_.children[i]; }
+  btree_node** mutable_child(int i) { return &fields_.children[i]; }
+  void set_child(int i, btree_node *c) {
+    *mutable_child(i) = c;
+    c->fields_.parent = this;
+    c->fields_.position = i;
+  }
+
+  // Returns the position of the first value whose key is not less than k.
+  template <typename Compare>
+  int lower_bound(const key_type &k, const Compare &comp) const {
+    return search_type::lower_bound(k, *this, comp);
+  }
+  // Returns the position of the first value whose key is greater than k.
+  template <typename Compare>
+  int upper_bound(const key_type &k, const Compare &comp) const {
+    return search_type::upper_bound(k, *this, comp);
+  }
+
+  // Returns the position of the first value whose key is not less than k using
+  // linear search performed using plain compare.
+  template <typename Compare>
+  int linear_search_plain_compare(
+      const key_type &k, int s, int e, const Compare &comp) const {
+    while (s < e) {
+      if (!btree_compare_keys(comp, key(s), k)) {
+        break;
+      }
+      ++s;
+    }
+    return s;
+  }
+
+  // Returns the position of the first value whose key is not less than k using
+  // linear search performed using compare-to.
+  template <typename Compare>
+  int linear_search_compare_to(
+      const key_type &k, int s, int e, const Compare &comp) const {
+    while (s < e) {
+      int c = comp(key(s), k);
+      if (c == 0) {
+        return s | kExactMatch;
+      } else if (c > 0) {
+        break;
+      }
+      ++s;
+    }
+    return s;
+  }
+
+  // Returns the position of the first value whose key is not less than k using
+  // binary search performed using plain compare.
+  template <typename Compare>
+  int binary_search_plain_compare(
+      const key_type &k, int s, int e, const Compare &comp) const {
+    while (s != e) {
+      int mid = (s + e) / 2;
+      if (btree_compare_keys(comp, key(mid), k)) {
+        s = mid + 1;
+      } else {
+        e = mid;
+      }
+    }
+    return s;
+  }
+
+  // Returns the position of the first value whose key is not less than k using
+  // binary search performed using compare-to.
+  template <typename CompareTo>
+  int binary_search_compare_to(
+      const key_type &k, int s, int e, const CompareTo &comp) const {
+    while (s != e) {
+      int mid = (s + e) / 2;
+      int c = comp(key(mid), k);
+      if (c < 0) {
+        s = mid + 1;
+      } else if (c > 0) {
+        e = mid;
+      } else {
+        // Need to return the first value whose key is not less than k, which
+        // requires continuing the binary search. Note that we are guaranteed
+        // that the result is an exact match because if "key(mid-1) < k" the
+        // call to binary_search_compare_to() will return "mid".
+        s = binary_search_compare_to(k, s, mid, comp);
+        return s | kExactMatch;
+      }
+    }
+    return s;
+  }
+
+  // Inserts the value x at position i, shifting all existing values and
+  // children at positions >= i to the right by 1.
+  void insert_value(int i, const value_type &x);
+
+  // Removes the value at position i, shifting all existing values and children
+  // at positions > i to the left by 1.
+  void remove_value(int i);
+
+  // Rebalances a node with its right sibling.
+  void rebalance_right_to_left(btree_node *sibling, int to_move);
+  void rebalance_left_to_right(btree_node *sibling, int to_move);
+
+  // Splits a node, moving a portion of the node's values to its right sibling.
+  void split(btree_node *sibling, int insert_position);
+
+  // Merges a node with its right sibling, moving all of the values and the
+  // delimiting key in the parent node onto itself.
+  void merge(btree_node *sibling);
+
+  // Swap the contents of "this" and "src".
+  void swap(btree_node *src);
+
+  // Node allocation/deletion routines.
+  static btree_node* init_leaf(
+      leaf_fields *f, btree_node *parent, int max_count) {
+    btree_node *n = reinterpret_cast<btree_node*>(f);
+    f->leaf = 1;
+    f->position = 0;
+    f->max_count = max_count;
+    f->count = 0;
+    f->parent = parent;
+    if (!NDEBUG) {
+      memset(&f->values, 0, max_count * sizeof(value_type));
+    }
+    return n;
+  }
+  static btree_node* init_internal(internal_fields *f, btree_node *parent) {
+    btree_node *n = init_leaf(f, parent, kNodeValues);
+    f->leaf = 0;
+    if (!NDEBUG) {
+      memset(f->children, 0, sizeof(f->children));
+    }
+    return n;
+  }
+  static btree_node* init_root(root_fields *f, btree_node *parent) {
+    btree_node *n = init_internal(f, parent);
+    f->rightmost = parent;
+    f->size = parent->count();
+    return n;
+  }
+  void destroy() {
+    for (int i = 0; i < count(); ++i) {
+      value_destroy(i);
+    }
+  }
+
+ private:
+  void value_init(int i) {
+    new (&fields_.values[i]) mutable_value_type;
+  }
+  void value_init(int i, const value_type &x) {
+    new (&fields_.values[i]) mutable_value_type(x);
+  }
+  void value_destroy(int i) {
+    fields_.values[i].~mutable_value_type();
+  }
+
+ private:
+  root_fields fields_;
+
+ private:
+  btree_node(const btree_node&);
+  void operator=(const btree_node&);
+};
+
+template <typename Node, typename Reference, typename Pointer>
+struct btree_iterator {
+  typedef typename Node::key_type key_type;
+  typedef typename Node::size_type size_type;
+  typedef typename Node::difference_type difference_type;
+  typedef typename Node::params_type params_type;
+
+  typedef Node node_type;
+  typedef typename std::remove_const<Node>::type normal_node;
+  typedef const Node const_node;
+  typedef typename params_type::value_type value_type;
+  typedef typename params_type::pointer normal_pointer;
+  typedef typename params_type::reference normal_reference;
+  typedef typename params_type::const_pointer const_pointer;
+  typedef typename params_type::const_reference const_reference;
+
+  typedef Pointer pointer;
+  typedef Reference reference;
+  typedef std::bidirectional_iterator_tag iterator_category;
+
+  typedef btree_iterator<
+    normal_node, normal_reference, normal_pointer> iterator;
+  typedef btree_iterator<
+    const_node, const_reference, const_pointer> const_iterator;
+  typedef btree_iterator<Node, Reference, Pointer> self_type;
+
+  btree_iterator()
+      : node(NULL),
+        position(-1) {
+  }
+  btree_iterator(Node *n, int p)
+      : node(n),
+        position(p) {
+  }
+  btree_iterator(const iterator &x)
+      : node(x.node),
+        position(x.position) {
+  }
+
+  // Increment/decrement the iterator.
+  void increment() {
+    if (node->leaf() && ++position < node->count()) {
+      return;
+    }
+    increment_slow();
+  }
+  void increment_by(int count);
+  void increment_slow();
+
+  void decrement() {
+    if (node->leaf() && --position >= 0) {
+      return;
+    }
+    decrement_slow();
+  }
+  void decrement_slow();
+
+  bool operator==(const const_iterator &x) const {
+    return node == x.node && position == x.position;
+  }
+  bool operator!=(const const_iterator &x) const {
+    return node != x.node || position != x.position;
+  }
+
+  // Accessors for the key/value the iterator is pointing at.
+  const key_type& key() const {
+    return node->key(position);
+  }
+  reference operator*() const {
+    return node->value(position);
+  }
+  pointer operator->() const {
+    return &node->value(position);
+  }
+
+  self_type& operator++() {
+    increment();
+    return *this;
+  }
+  self_type& operator--() {
+    decrement();
+    return *this;
+  }
+  self_type operator++(int) {
+    self_type tmp = *this;
+    ++*this;
+    return tmp;
+  }
+  self_type operator--(int) {
+    self_type tmp = *this;
+    --*this;
+    return tmp;
+  }
+
+  // The node in the tree the iterator is pointing at.
+  Node *node;
+  // The position within the node of the tree the iterator is pointing at.
+  int position;
+};
+
+// Dispatch helper class for using btree::internal_locate with plain compare.
+struct btree_internal_locate_plain_compare {
+  template <typename K, typename T, typename Iter>
+  static std::pair<Iter, int> dispatch(const K &k, const T &t, Iter iter) {
+    return t.internal_locate_plain_compare(k, iter);
+  }
+};
+
+// Dispatch helper class for using btree::internal_locate with compare-to.
+struct btree_internal_locate_compare_to {
+  template <typename K, typename T, typename Iter>
+  static std::pair<Iter, int> dispatch(const K &k, const T &t, Iter iter) {
+    return t.internal_locate_compare_to(k, iter);
+  }
+};
+
+template <typename Params>
+class btree : public Params::key_compare {
+  typedef btree<Params> self_type;
+  typedef btree_node<Params> node_type;
+  typedef typename node_type::base_fields base_fields;
+  typedef typename node_type::leaf_fields leaf_fields;
+  typedef typename node_type::internal_fields internal_fields;
+  typedef typename node_type::root_fields root_fields;
+  typedef typename Params::is_key_compare_to is_key_compare_to;
+
+  friend class btree_internal_locate_plain_compare;
+  friend class btree_internal_locate_compare_to;
+  typedef typename if_<
+    is_key_compare_to::value,
+    btree_internal_locate_compare_to,
+    btree_internal_locate_plain_compare>::type internal_locate_type;
+
+  enum {
+    kNodeValues = node_type::kNodeValues,
+    kMinNodeValues = kNodeValues / 2,
+    kValueSize = node_type::kValueSize,
+    kExactMatch = node_type::kExactMatch,
+    kMatchMask = node_type::kMatchMask,
+  };
+
+  // A helper class to get the empty base class optimization for 0-size
+  // allocators. Base is internal_allocator_type.
+  // (e.g. empty_base_handle<internal_allocator_type, node_type*>). If Base is
+  // 0-size, the compiler doesn't have to reserve any space for it and
+  // sizeof(empty_base_handle) will simply be sizeof(Data). Google [empty base
+  // class optimization] for more details.
+  template <typename Base, typename Data>
+  struct empty_base_handle : public Base {
+    empty_base_handle(const Base &b, const Data &d)
+        : Base(b),
+          data(d) {
+    }
+    Data data;
+  };
+
+  struct node_stats {
+    node_stats(ssize_t l, ssize_t i)
+        : leaf_nodes(l),
+          internal_nodes(i) {
+    }
+
+    node_stats& operator+=(const node_stats &x) {
+      leaf_nodes += x.leaf_nodes;
+      internal_nodes += x.internal_nodes;
+      return *this;
+    }
+
+    ssize_t leaf_nodes;
+    ssize_t internal_nodes;
+  };
+
+ public:
+  typedef Params params_type;
+  typedef typename Params::key_type key_type;
+  typedef typename Params::data_type data_type;
+  typedef typename Params::mapped_type mapped_type;
+  typedef typename Params::value_type value_type;
+  typedef typename Params::key_compare key_compare;
+  typedef typename Params::pointer pointer;
+  typedef typename Params::const_pointer const_pointer;
+  typedef typename Params::reference reference;
+  typedef typename Params::const_reference const_reference;
+  typedef typename Params::size_type size_type;
+  typedef typename Params::difference_type difference_type;
+  typedef btree_iterator<node_type, reference, pointer> iterator;
+  typedef typename iterator::const_iterator const_iterator;
+  typedef std::reverse_iterator<const_iterator> const_reverse_iterator;
+  typedef std::reverse_iterator<iterator> reverse_iterator;
+
+  typedef typename Params::allocator_type allocator_type;
+  typedef typename allocator_type::template rebind<char>::other
+    internal_allocator_type;
+
+ public:
+  // Default constructor.
+  btree(const key_compare &comp, const allocator_type &alloc);
+
+  // Copy constructor.
+  btree(const self_type &x);
+
+  // Destructor.
+  ~btree() {
+    clear();
+  }
+
+  // Iterator routines.
+  iterator begin() {
+    return iterator(leftmost(), 0);
+  }
+  const_iterator begin() const {
+    return const_iterator(leftmost(), 0);
+  }
+  iterator end() {
+    return iterator(rightmost(), rightmost() ? rightmost()->count() : 0);
+  }
+  const_iterator end() const {
+    return const_iterator(rightmost(), rightmost() ? rightmost()->count() : 0);
+  }
+  reverse_iterator rbegin() {
+    return reverse_iterator(end());
+  }
+  const_reverse_iterator rbegin() const {
+    return const_reverse_iterator(end());
+  }
+  reverse_iterator rend() {
+    return reverse_iterator(begin());
+  }
+  const_reverse_iterator rend() const {
+    return const_reverse_iterator(begin());
+  }
+
+  // Finds the first element whose key is not less than key.
+  iterator lower_bound(const key_type &key) {
+    return internal_end(
+        internal_lower_bound(key, iterator(root(), 0)));
+  }
+  const_iterator lower_bound(const key_type &key) const {
+    return internal_end(
+        internal_lower_bound(key, const_iterator(root(), 0)));
+  }
+
+  // Finds the first element whose key is greater than key.
+  iterator upper_bound(const key_type &key) {
+    return internal_end(
+        internal_upper_bound(key, iterator(root(), 0)));
+  }
+  const_iterator upper_bound(const key_type &key) const {
+    return internal_end(
+        internal_upper_bound(key, const_iterator(root(), 0)));
+  }
+
+  // Finds the range of values which compare equal to key. The first member of
+  // the returned pair is equal to lower_bound(key). The second member pair of
+  // the pair is equal to upper_bound(key).
+  std::pair<iterator,iterator> equal_range(const key_type &key) {
+    return std::make_pair(lower_bound(key), upper_bound(key));
+  }
+  std::pair<const_iterator,const_iterator> equal_range(const key_type &key) const {
+    return std::make_pair(lower_bound(key), upper_bound(key));
+  }
+
+  // Inserts a value into the btree only if it does not already exist. The
+  // boolean return value indicates whether insertion succeeded or failed. The
+  // ValuePointer type is used to avoid instatiating the value unless the key
+  // is being inserted. Value is not dereferenced if the key already exists in
+  // the btree. See btree_map::operator[].
+  template <typename ValuePointer>
+  std::pair<iterator,bool> insert_unique(const key_type &key, ValuePointer value);
+
+  // Inserts a value into the btree only if it does not already exist. The
+  // boolean return value indicates whether insertion succeeded or failed.
+  std::pair<iterator,bool> insert_unique(const value_type &v) {
+    return insert_unique(params_type::key(v), &v);
+  }
+
+  // Insert with hint. Check to see if the value should be placed immediately
+  // before position in the tree. If it does, then the insertion will take
+  // amortized constant time. If not, the insertion will take amortized
+  // logarithmic time as if a call to insert_unique(v) were made.
+  iterator insert_unique(iterator position, const value_type &v);
+
+  // Insert a range of values into the btree.
+  template <typename InputIterator>
+  void insert_unique(InputIterator b, InputIterator e);
+
+  // Inserts a value into the btree. The ValuePointer type is used to avoid
+  // instatiating the value unless the key is being inserted. Value is not
+  // dereferenced if the key already exists in the btree. See
+  // btree_map::operator[].
+  template <typename ValuePointer>
+  iterator insert_multi(const key_type &key, ValuePointer value);
+
+  // Inserts a value into the btree.
+  iterator insert_multi(const value_type &v) {
+    return insert_multi(params_type::key(v), &v);
+  }
+
+  // Insert with hint. Check to see if the value should be placed immediately
+  // before position in the tree. If it does, then the insertion will take
+  // amortized constant time. If not, the insertion will take amortized
+  // logarithmic time as if a call to insert_multi(v) were made.
+  iterator insert_multi(iterator position, const value_type &v);
+
+  // Insert a range of values into the btree.
+  template <typename InputIterator>
+  void insert_multi(InputIterator b, InputIterator e);
+
+  void assign(const self_type &x);
+
+  // Erase the specified iterator from the btree. The iterator must be valid
+  // (i.e. not equal to end()).  Return an iterator pointing to the node after
+  // the one that was erased (or end() if none exists).
+  iterator erase(iterator iter);
+
+  // Erases range. Returns the number of keys erased.
+  int erase(iterator begin, iterator end);
+
+  // Erases the specified key from the btree. Returns 1 if an element was
+  // erased and 0 otherwise.
+  int erase_unique(const key_type &key);
+
+  // Erases all of the entries matching the specified key from the
+  // btree. Returns the number of elements erased.
+  int erase_multi(const key_type &key);
+
+  // Finds the iterator corresponding to a key or returns end() if the key is
+  // not present.
+  iterator find_unique(const key_type &key) {
+    return internal_end(
+        internal_find_unique(key, iterator(root(), 0)));
+  }
+  const_iterator find_unique(const key_type &key) const {
+    return internal_end(
+        internal_find_unique(key, const_iterator(root(), 0)));
+  }
+  iterator find_multi(const key_type &key) {
+    return internal_end(
+        internal_find_multi(key, iterator(root(), 0)));
+  }
+  const_iterator find_multi(const key_type &key) const {
+    return internal_end(
+        internal_find_multi(key, const_iterator(root(), 0)));
+  }
+
+  // Returns a count of the number of times the key appears in the btree.
+  size_type count_unique(const key_type &key) const {
+    const_iterator begin = internal_find_unique(
+        key, const_iterator(root(), 0));
+    if (!begin.node) {
+      // The key doesn't exist in the tree.
+      return 0;
+    }
+    return 1;
+  }
+  // Returns a count of the number of times the key appears in the btree.
+  size_type count_multi(const key_type &key) const {
+    return distance(lower_bound(key), upper_bound(key));
+  }
+
+  // Clear the btree, deleting all of the values it contains.
+  void clear();
+
+  // Swap the contents of *this and x.
+  void swap(self_type &x);
+
+  // Assign the contents of x to *this.
+  self_type& operator=(const self_type &x) {
+    if (&x == this) {
+      // Don't copy onto ourselves.
+      return *this;
+    }
+    assign(x);
+    return *this;
+  }
+
+  key_compare* mutable_key_comp() {
+    return this;
+  }
+  const key_compare& key_comp() const {
+    return *this;
+  }
+  bool compare_keys(const key_type &x, const key_type &y) const {
+    return btree_compare_keys(key_comp(), x, y);
+  }
+
+  // Dump the btree to the specified ostream. Requires that operator<< is
+  // defined for Key and Value.
+  void dump(std::ostream &os) const {
+    if (root() != NULL) {
+      internal_dump(os, root(), 0);
+    }
+  }
+
+  // Verifies the structure of the btree.
+  void verify() const;
+
+  // Size routines. Note that empty() is slightly faster than doing size()==0.
+  size_type size() const {
+    if (empty()) return 0;
+    if (root()->leaf()) return root()->count();
+    return root()->size();
+  }
+  size_type max_size() const { return std::numeric_limits<size_type>::max(); }
+  bool empty() const { return root() == NULL; }
+
+  // The height of the btree. An empty tree will have height 0.
+  size_type height() const {
+    size_type h = 0;
+    if (root()) {
+      // Count the length of the chain from the leftmost node up to the
+      // root. We actually count from the root back around to the level below
+      // the root, but the calculation is the same because of the circularity
+      // of that traversal.
+      const node_type *n = root();
+      do {
+        ++h;
+        n = n->parent();
+      } while (n != root());
+    }
+    return h;
+  }
+
+  // The number of internal, leaf and total nodes used by the btree.
+  size_type leaf_nodes() const {
+    return internal_stats(root()).leaf_nodes;
+  }
+  size_type internal_nodes() const {
+    return internal_stats(root()).internal_nodes;
+  }
+  size_type nodes() const {
+    node_stats stats = internal_stats(root());
+    return stats.leaf_nodes + stats.internal_nodes;
+  }
+
+  // The total number of bytes used by the btree.
+  size_type bytes_used() const {
+    node_stats stats = internal_stats(root());
+    if (stats.leaf_nodes == 1 && stats.internal_nodes == 0) {
+      return sizeof(*this) +
+          sizeof(base_fields) + root()->max_count() * sizeof(value_type);
+    } else {
+      return sizeof(*this) +
+          sizeof(root_fields) - sizeof(internal_fields) +
+          stats.leaf_nodes * sizeof(leaf_fields) +
+          stats.internal_nodes * sizeof(internal_fields);
+    }
+  }
+
+  // The average number of bytes used per value stored in the btree.
+  static double average_bytes_per_value() {
+    // Returns the number of bytes per value on a leaf node that is 75%
+    // full. Experimentally, this matches up nicely with the computed number of
+    // bytes per value in trees that had their values inserted in random order.
+    return sizeof(leaf_fields) / (kNodeValues * 0.75);
+  }
+
+  // The fullness of the btree. Computed as the number of elements in the btree
+  // divided by the maximum number of elements a tree with the current number
+  // of nodes could hold. A value of 1 indicates perfect space
+  // utilization. Smaller values indicate space wastage.
+  double fullness() const {
+    return double(size()) / (nodes() * kNodeValues);
+  }
+  // The overhead of the btree structure in bytes per node. Computed as the
+  // total number of bytes used by the btree minus the number of bytes used for
+  // storing elements divided by the number of elements.
+  double overhead() const {
+    if (empty()) {
+      return 0.0;
+    }
+    return (bytes_used() - size() * kValueSize) / double(size());
+  }
+
+ private:
+  // Internal accessor routines.
+  node_type* root() { return root_.data; }
+  const node_type* root() const { return root_.data; }
+  node_type** mutable_root() { return &root_.data; }
+
+  // The rightmost node is stored in the root node.
+  node_type* rightmost() {
+    return (!root() || root()->leaf()) ? root() : root()->rightmost();
+  }
+  const node_type* rightmost() const {
+    return (!root() || root()->leaf()) ? root() : root()->rightmost();
+  }
+  node_type** mutable_rightmost() { return root()->mutable_rightmost(); }
+
+  // The leftmost node is stored as the parent of the root node.
+  node_type* leftmost() { return root() ? root()->parent() : NULL; }
+  const node_type* leftmost() const { return root() ? root()->parent() : NULL; }
+
+  // The size of the tree is stored in the root node.
+  size_type* mutable_size() { return root()->mutable_size(); }
+
+  // Allocator routines.
+  internal_allocator_type* mutable_internal_allocator() {
+    return static_cast<internal_allocator_type*>(&root_);
+  }
+  const internal_allocator_type& internal_allocator() const {
+    return *static_cast<const internal_allocator_type*>(&root_);
+  }
+
+  // Node creation/deletion routines.
+  node_type* new_internal_node(node_type *parent) {
+    internal_fields *p = reinterpret_cast<internal_fields*>(
+        mutable_internal_allocator()->allocate(sizeof(internal_fields)));
+    return node_type::init_internal(p, parent);
+  }
+  node_type* new_internal_root_node() {
+    root_fields *p = reinterpret_cast<root_fields*>(
+        mutable_internal_allocator()->allocate(sizeof(root_fields)));
+    return node_type::init_root(p, root()->parent());
+  }
+  node_type* new_leaf_node(node_type *parent) {
+    leaf_fields *p = reinterpret_cast<leaf_fields*>(
+        mutable_internal_allocator()->allocate(sizeof(leaf_fields)));
+    return node_type::init_leaf(p, parent, kNodeValues);
+  }
+  node_type* new_leaf_root_node(int max_count) {
+    leaf_fields *p = reinterpret_cast<leaf_fields*>(
+        mutable_internal_allocator()->allocate(
+            sizeof(base_fields) + max_count * sizeof(value_type)));
+    return node_type::init_leaf(p, reinterpret_cast<node_type*>(p), max_count);
+  }
+  void delete_internal_node(node_type *node) {
+    node->destroy();
+    assert(node != root());
+    mutable_internal_allocator()->deallocate(
+        reinterpret_cast<char*>(node), sizeof(internal_fields));
+  }
+  void delete_internal_root_node() {
+    root()->destroy();
+    mutable_internal_allocator()->deallocate(
+        reinterpret_cast<char*>(root()), sizeof(root_fields));
+  }
+  void delete_leaf_node(node_type *node) {
+    node->destroy();
+    mutable_internal_allocator()->deallocate(
+        reinterpret_cast<char*>(node),
+        sizeof(base_fields) + node->max_count() * sizeof(value_type));
+  }
+
+  // Rebalances or splits the node iter points to.
+  void rebalance_or_split(iterator *iter);
+
+  // Merges the values of left, right and the delimiting key on their parent
+  // onto left, removing the delimiting key and deleting right.
+  void merge_nodes(node_type *left, node_type *right);
+
+  // Tries to merge node with its left or right sibling, and failing that,
+  // rebalance with its left or right sibling. Returns true if a merge
+  // occurred, at which point it is no longer valid to access node. Returns
+  // false if no merging took place.
+  bool try_merge_or_rebalance(iterator *iter);
+
+  // Tries to shrink the height of the tree by 1.
+  void try_shrink();
+
+  iterator internal_end(iterator iter) {
+    return iter.node ? iter : end();
+  }
+  const_iterator internal_end(const_iterator iter) const {
+    return iter.node ? iter : end();
+  }
+
+  // Inserts a value into the btree immediately before iter. Requires that
+  // key(v) <= iter.key() and (--iter).key() <= key(v).
+  iterator internal_insert(iterator iter, const value_type &v);
+
+  // Returns an iterator pointing to the first value >= the value "iter" is
+  // pointing at. Note that "iter" might be pointing to an invalid location as
+  // iter.position == iter.node->count(). This routine simply moves iter up in
+  // the tree to a valid location.
+  template <typename IterType>
+  static IterType internal_last(IterType iter);
+
+  // Returns an iterator pointing to the leaf position at which key would
+  // reside in the tree. We provide 2 versions of internal_locate. The first
+  // version (internal_locate_plain_compare) always returns 0 for the second
+  // field of the pair. The second version (internal_locate_compare_to) is for
+  // the key-compare-to specialization and returns either kExactMatch (if the
+  // key was found in the tree) or -kExactMatch (if it wasn't) in the second
+  // field of the pair. The compare_to specialization allows the caller to
+  // avoid a subsequent comparison to determine if an exact match was made,
+  // speeding up string keys.
+  template <typename IterType>
+  std::pair<IterType, int> internal_locate(
+      const key_type &key, IterType iter) const;
+  template <typename IterType>
+  std::pair<IterType, int> internal_locate_plain_compare(
+      const key_type &key, IterType iter) const;
+  template <typename IterType>
+  std::pair<IterType, int> internal_locate_compare_to(
+      const key_type &key, IterType iter) const;
+
+  // Internal routine which implements lower_bound().
+  template <typename IterType>
+  IterType internal_lower_bound(
+      const key_type &key, IterType iter) const;
+
+  // Internal routine which implements upper_bound().
+  template <typename IterType>
+  IterType internal_upper_bound(
+      const key_type &key, IterType iter) const;
+
+  // Internal routine which implements find_unique().
+  template <typename IterType>
+  IterType internal_find_unique(
+      const key_type &key, IterType iter) const;
+
+  // Internal routine which implements find_multi().
+  template <typename IterType>
+  IterType internal_find_multi(
+      const key_type &key, IterType iter) const;
+
+  // Deletes a node and all of its children.
+  void internal_clear(node_type *node);
+
+  // Dumps a node and all of its children to the specified ostream.
+  void internal_dump(std::ostream &os, const node_type *node, int level) const;
+
+  // Verifies the tree structure of node.
+  int internal_verify(const node_type *node,
+                      const key_type *lo, const key_type *hi) const;
+
+  node_stats internal_stats(const node_type *node) const {
+    if (!node) {
+      return node_stats(0, 0);
+    }
+    if (node->leaf()) {
+      return node_stats(1, 0);
+    }
+    node_stats res(0, 1);
+    for (int i = 0; i <= node->count(); ++i) {
+      res += internal_stats(node->child(i));
+    }
+    return res;
+  }
+
+ private:
+  empty_base_handle<internal_allocator_type, node_type*> root_;
+
+ private:
+  // A never instantiated helper function that returns big_ if we have a
+  // key-compare-to functor or if R is bool and small_ otherwise.
+  template <typename R>
+  static typename if_<
+   if_<is_key_compare_to::value,
+             std::is_same<R, int>,
+             std::is_same<R, bool> >::type::value,
+   big_, small_>::type key_compare_checker(R);
+
+  // A never instantiated helper function that returns the key comparison
+  // functor.
+  static key_compare key_compare_helper();
+
+  // Verify that key_compare returns a bool. This is similar to the way
+  // is_convertible in base/type_traits.h works. Note that key_compare_checker
+  // is never actually invoked. The compiler will select which
+  // key_compare_checker() to instantiate and then figure out the size of the
+  // return type of key_compare_checker() at compile time which we then check
+  // against the sizeof of big_.
+  COMPILE_ASSERT(
+      sizeof(key_compare_checker(key_compare_helper()(key_type(), key_type()))) ==
+      sizeof(big_),
+      key_comparison_function_must_return_bool);
+
+  // Note: We insist on kTargetValues, which is computed from
+  // Params::kTargetNodeSize, must fit the base_fields::field_type.
+  COMPILE_ASSERT(kNodeValues <
+                 (1 << (8 * sizeof(typename base_fields::field_type))),
+                 target_node_size_too_large);
+
+  // Test the assumption made in setting kNodeValueSpace.
+  COMPILE_ASSERT(sizeof(base_fields) >= 2 * sizeof(void*),
+                 node_space_assumption_incorrect);
+};
+
+////
+// btree_node methods
+template <typename P>
+inline void btree_node<P>::insert_value(int i, const value_type &x) {
+  assert(i <= count());
+  value_init(count(), x);
+  for (int j = count(); j > i; --j) {
+    value_swap(j, this, j - 1);
+  }
+  set_count(count() + 1);
+
+  if (!leaf()) {
+    ++i;
+    for (int j = count(); j > i; --j) {
+      *mutable_child(j) = child(j - 1);
+      child(j)->set_position(j);
+    }
+    *mutable_child(i) = NULL;
+  }
+}
+
+template <typename P>
+inline void btree_node<P>::remove_value(int i) {
+  if (!leaf()) {
+    assert(child(i + 1)->count() == 0);
+    for (int j = i + 1; j < count(); ++j) {
+      *mutable_child(j) = child(j + 1);
+      child(j)->set_position(j);
+    }
+    *mutable_child(count()) = NULL;
+  }
+
+  set_count(count() - 1);
+  for (; i < count(); ++i) {
+    value_swap(i, this, i + 1);
+  }
+  value_destroy(i);
+}
+
+template <typename P>
+void btree_node<P>::rebalance_right_to_left(btree_node *src, int to_move) {
+  assert(parent() == src->parent());
+  assert(position() + 1 == src->position());
+  assert(src->count() >= count());
+  assert(to_move >= 1);
+  assert(to_move <= src->count());
+
+  // Make room in the left node for the new values.
+  for (int i = 0; i < to_move; ++i) {
+    value_init(i + count());
+  }
+
+  // Move the delimiting value to the left node and the new delimiting value
+  // from the right node.
+  value_swap(count(), parent(), position());
+  parent()->value_swap(position(), src, to_move - 1);
+
+  // Move the values from the right to the left node.
+  for (int i = 1; i < to_move; ++i) {
+    value_swap(count() + i, src, i - 1);
+  }
+  // Shift the values in the right node to their correct position.
+  for (int i = to_move; i < src->count(); ++i) {
+    src->value_swap(i - to_move, src, i);
+  }
+  for (int i = 1; i <= to_move; ++i) {
+    src->value_destroy(src->count() - i);
+  }
+
+  if (!leaf()) {
+    // Move the child pointers from the right to the left node.
+    for (int i = 0; i < to_move; ++i) {
+      set_child(1 + count() + i, src->child(i));
+    }
+    for (int i = 0; i <= src->count() - to_move; ++i) {
+      assert(i + to_move <= src->max_count());
+      src->set_child(i, src->child(i + to_move));
+      *src->mutable_child(i + to_move) = NULL;
+    }
+  }
+
+  // Fixup the counts on the src and dest nodes.
+  set_count(count() + to_move);
+  src->set_count(src->count() - to_move);
+}
+
+template <typename P>
+void btree_node<P>::rebalance_left_to_right(btree_node *dest, int to_move) {
+  assert(parent() == dest->parent());
+  assert(position() + 1 == dest->position());
+  assert(count() >= dest->count());
+  assert(to_move >= 1);
+  assert(to_move <= count());
+
+  // Make room in the right node for the new values.
+  for (int i = 0; i < to_move; ++i) {
+    dest->value_init(i + dest->count());
+  }
+  for (int i = dest->count() - 1; i >= 0; --i) {
+    dest->value_swap(i, dest, i + to_move);
+  }
+
+  // Move the delimiting value to the right node and the new delimiting value
+  // from the left node.
+  dest->value_swap(to_move - 1, parent(), position());
+  parent()->value_swap(position(), this, count() - to_move);
+  value_destroy(count() - to_move);
+
+  // Move the values from the left to the right node.
+  for (int i = 1; i < to_move; ++i) {
+    value_swap(count() - to_move + i, dest, i - 1);
+    value_destroy(count() - to_move + i);
+  }
+
+  if (!leaf()) {
+    // Move the child pointers from the left to the right node.
+    for (int i = dest->count(); i >= 0; --i) {
+      dest->set_child(i + to_move, dest->child(i));
+      *dest->mutable_child(i) = NULL;
+    }
+    for (int i = 1; i <= to_move; ++i) {
+      dest->set_child(i - 1, child(count() - to_move + i));
+      *mutable_child(count() - to_move + i) = NULL;
+    }
+  }
+
+  // Fixup the counts on the src and dest nodes.
+  set_count(count() - to_move);
+  dest->set_count(dest->count() + to_move);
+}
+
+template <typename P>
+void btree_node<P>::split(btree_node *dest, int insert_position) {
+  assert(dest->count() == 0);
+
+  // We bias the split based on the position being inserted. If we're
+  // inserting at the beginning of the left node then bias the split to put
+  // more values on the right node. If we're inserting at the end of the
+  // right node then bias the split to put more values on the left node.
+  if (insert_position == 0) {
+    dest->set_count(count() - 1);
+  } else if (insert_position == max_count()) {
+    dest->set_count(0);
+  } else {
+    dest->set_count(count() / 2);
+  }
+  set_count(count() - dest->count());
+  assert(count() >= 1);
+
+  // Move values from the left sibling to the right sibling.
+  for (int i = 0; i < dest->count(); ++i) {
+    dest->value_init(i);
+    value_swap(count() + i, dest, i);
+    value_destroy(count() + i);
+  }
+
+  // The split key is the largest value in the left sibling.
+  set_count(count() - 1);
+  parent()->insert_value(position(), value_type());
+  value_swap(count(), parent(), position());
+  value_destroy(count());
+  parent()->set_child(position() + 1, dest);
+
+  if (!leaf()) {
+    for (int i = 0; i <= dest->count(); ++i) {
+      assert(child(count() + i + 1) != NULL);
+      dest->set_child(i, child(count() + i + 1));
+      *mutable_child(count() + i + 1) = NULL;
+    }
+  }
+}
+
+template <typename P>
+void btree_node<P>::merge(btree_node *src) {
+  assert(parent() == src->parent());
+  assert(position() + 1 == src->position());
+
+  // Move the delimiting value to the left node.
+  value_init(count());
+  value_swap(count(), parent(), position());
+
+  // Move the values from the right to the left node.
+  for (int i = 0; i < src->count(); ++i) {
+    value_init(1 + count() + i);
+    value_swap(1 + count() + i, src, i);
+    src->value_destroy(i);
+  }
+
+  if (!leaf()) {
+    // Move the child pointers from the right to the left node.
+    for (int i = 0; i <= src->count(); ++i) {
+      set_child(1 + count() + i, src->child(i));
+      *src->mutable_child(i) = NULL;
+    }
+  }
+
+  // Fixup the counts on the src and dest nodes.
+  set_count(1 + count() + src->count());
+  src->set_count(0);
+
+  // Remove the value on the parent node.
+  parent()->remove_value(position());
+}
+
+template <typename P>
+void btree_node<P>::swap(btree_node *x) {
+  assert(leaf() == x->leaf());
+
+  // Swap the values.
+  for (int i = count(); i < x->count(); ++i) {
+    value_init(i);
+  }
+  for (int i = x->count(); i < count(); ++i) {
+    x->value_init(i);
+  }
+  int n = std::max(count(), x->count());
+  for (int i = 0; i < n; ++i) {
+    value_swap(i, x, i);
+  }
+  for (int i = count(); i < x->count(); ++i) {
+    x->value_destroy(i);
+  }
+  for (int i = x->count(); i < count(); ++i) {
+    value_destroy(i);
+  }
+
+  if (!leaf()) {
+    // Swap the child pointers.
+    for (int i = 0; i <= n; ++i) {
+      btree_swap_helper(*mutable_child(i), *x->mutable_child(i));
+    }
+    for (int i = 0; i <= count(); ++i) {
+      x->child(i)->fields_.parent = x;
+    }
+    for (int i = 0; i <= x->count(); ++i) {
+      child(i)->fields_.parent = this;
+    }
+  }
+
+  // Swap the counts.
+  btree_swap_helper(fields_.count, x->fields_.count);
+}
+
+////
+// btree_iterator methods
+template <typename N, typename R, typename P>
+void btree_iterator<N, R, P>::increment_slow() {
+  if (node->leaf()) {
+    assert(position >= node->count());
+    self_type save(*this);
+    while (position == node->count() && !node->is_root()) {
+      assert(node->parent()->child(node->position()) == node);
+      position = node->position();
+      node = node->parent();
+    }
+    if (position == node->count()) {
+      *this = save;
+    }
+  } else {
+    assert(position < node->count());
+    node = node->child(position + 1);
+    while (!node->leaf()) {
+      node = node->child(0);
+    }
+    position = 0;
+  }
+}
+
+template <typename N, typename R, typename P>
+void btree_iterator<N, R, P>::increment_by(int count) {
+  while (count > 0) {
+    if (node->leaf()) {
+      int rest = node->count() - position;
+      position += std::min(rest, count);
+      count = count - rest;
+      if (position < node->count()) {
+        return;
+      }
+    } else {
+      --count;
+    }
+    increment_slow();
+  }
+}
+
+template <typename N, typename R, typename P>
+void btree_iterator<N, R, P>::decrement_slow() {
+  if (node->leaf()) {
+    assert(position <= -1);
+    self_type save(*this);
+    while (position < 0 && !node->is_root()) {
+      assert(node->parent()->child(node->position()) == node);
+      position = node->position() - 1;
+      node = node->parent();
+    }
+    if (position < 0) {
+      *this = save;
+    }
+  } else {
+    assert(position >= 0);
+    node = node->child(position);
+    while (!node->leaf()) {
+      node = node->child(node->count());
+    }
+    position = node->count() - 1;
+  }
+}
+
+////
+// btree methods
+template <typename P>
+btree<P>::btree(const key_compare &comp, const allocator_type &alloc)
+    : key_compare(comp),
+      root_(alloc, NULL) {
+}
+
+template <typename P>
+btree<P>::btree(const self_type &x)
+    : key_compare(x.key_comp()),
+      root_(x.internal_allocator(), NULL) {
+  assign(x);
+}
+
+template <typename P> template <typename ValuePointer>
+std::pair<typename btree<P>::iterator, bool>
+btree<P>::insert_unique(const key_type &key, ValuePointer value) {
+  if (empty()) {
+    *mutable_root() = new_leaf_root_node(1);
+  }
+
+  std::pair<iterator, int> res = internal_locate(key, iterator(root(), 0));
+  iterator &iter = res.first;
+  if (res.second == kExactMatch) {
+    // The key already exists in the tree, do nothing.
+    return std::make_pair(internal_last(iter), false);
+  } else if (!res.second) {
+    iterator last = internal_last(iter);
+    if (last.node && !compare_keys(key, last.key())) {
+      // The key already exists in the tree, do nothing.
+      return std::make_pair(last, false);
+    }
+  }
+
+  return std::make_pair(internal_insert(iter, *value), true);
+}
+
+template <typename P>
+inline typename btree<P>::iterator
+btree<P>::insert_unique(iterator position, const value_type &v) {
+  if (!empty()) {
+    const key_type &key = params_type::key(v);
+    if (position == end() || compare_keys(key, position.key())) {
+      iterator prev = position;
+      if (position == begin() || compare_keys((--prev).key(), key)) {
+        // prev.key() < key < position.key()
+        return internal_insert(position, v);
+      }
+    } else if (compare_keys(position.key(), key)) {
+      iterator next = position;
+      ++next;
+      if (next == end() || compare_keys(key, next.key())) {
+        // position.key() < key < next.key()
+        return internal_insert(next, v);
+      }
+    } else {
+      // position.key() == key
+      return position;
+    }
+  }
+  return insert_unique(v).first;
+}
+
+template <typename P> template <typename InputIterator>
+void btree<P>::insert_unique(InputIterator b, InputIterator e) {
+  for (; b != e; ++b) {
+    insert_unique(end(), *b);
+  }
+}
+
+template <typename P> template <typename ValuePointer>
+typename btree<P>::iterator
+btree<P>::insert_multi(const key_type &key, ValuePointer value) {
+  if (empty()) {
+    *mutable_root() = new_leaf_root_node(1);
+  }
+
+  iterator iter = internal_upper_bound(key, iterator(root(), 0));
+  if (!iter.node) {
+    iter = end();
+  }
+  return internal_insert(iter, *value);
+}
+
+template <typename P>
+typename btree<P>::iterator
+btree<P>::insert_multi(iterator position, const value_type &v) {
+  if (!empty()) {
+    const key_type &key = params_type::key(v);
+    if (position == end() || !compare_keys(position.key(), key)) {
+      iterator prev = position;
+      if (position == begin() || !compare_keys(key, (--prev).key())) {
+        // prev.key() <= key <= position.key()
+        return internal_insert(position, v);
+      }
+    } else {
+      iterator next = position;
+      ++next;
+      if (next == end() || !compare_keys(next.key(), key)) {
+        // position.key() < key <= next.key()
+        return internal_insert(next, v);
+      }
+    }
+  }
+  return insert_multi(v);
+}
+
+template <typename P> template <typename InputIterator>
+void btree<P>::insert_multi(InputIterator b, InputIterator e) {
+  for (; b != e; ++b) {
+    insert_multi(end(), *b);
+  }
+}
+
+template <typename P>
+void btree<P>::assign(const self_type &x) {
+  clear();
+
+  *mutable_key_comp() = x.key_comp();
+  *mutable_internal_allocator() = x.internal_allocator();
+
+  // Assignment can avoid key comparisons because we know the order of the
+  // values is the same order we'll store them in.
+  for (const_iterator iter = x.begin(); iter != x.end(); ++iter) {
+    if (empty()) {
+      insert_multi(*iter);
+    } else {
+      // If the btree is not empty, we can just insert the new value at the end
+      // of the tree!
+      internal_insert(end(), *iter);
+    }
+  }
+}
+
+template <typename P>
+typename btree<P>::iterator btree<P>::erase(iterator iter) {
+  bool internal_delete = false;
+  if (!iter.node->leaf()) {
+    // Deletion of a value on an internal node. Swap the key with the largest
+    // value of our left child. This is easy, we just decrement iter.
+    iterator tmp_iter(iter--);
+    assert(iter.node->leaf());
+    assert(!compare_keys(tmp_iter.key(), iter.key()));
+    iter.node->value_swap(iter.position, tmp_iter.node, tmp_iter.position);
+    internal_delete = true;
+    --*mutable_size();
+  } else if (!root()->leaf()) {
+    --*mutable_size();
+  }
+
+  // Delete the key from the leaf.
+  iter.node->remove_value(iter.position);
+
+  // We want to return the next value after the one we just erased. If we
+  // erased from an internal node (internal_delete == true), then the next
+  // value is ++(++iter). If we erased from a leaf node (internal_delete ==
+  // false) then the next value is ++iter. Note that ++iter may point to an
+  // internal node and the value in the internal node may move to a leaf node
+  // (iter.node) when rebalancing is performed at the leaf level.
+
+  // Merge/rebalance as we walk back up the tree.
+  iterator res(iter);
+  for (;;) {
+    if (iter.node == root()) {
+      try_shrink();
+      if (empty()) {
+        return end();
+      }
+      break;
+    }
+    if (iter.node->count() >= kMinNodeValues) {
+      break;
+    }
+    bool merged = try_merge_or_rebalance(&iter);
+    if (iter.node->leaf()) {
+      res = iter;
+    }
+    if (!merged) {
+      break;
+    }
+    iter.node = iter.node->parent();
+  }
+
+  // Adjust our return value. If we're pointing at the end of a node, advance
+  // the iterator.
+  if (res.position == res.node->count()) {
+    res.position = res.node->count() - 1;
+    ++res;
+  }
+  // If we erased from an internal node, advance the iterator.
+  if (internal_delete) {
+    ++res;
+  }
+  return res;
+}
+
+template <typename P>
+int btree<P>::erase(iterator begin, iterator end) {
+  int count = distance(begin, end);
+  for (int i = 0; i < count; i++) {
+    begin = erase(begin);
+  }
+  return count;
+}
+
+template <typename P>
+int btree<P>::erase_unique(const key_type &key) {
+  iterator iter = internal_find_unique(key, iterator(root(), 0));
+  if (!iter.node) {
+    // The key doesn't exist in the tree, return nothing done.
+    return 0;
+  }
+  erase(iter);
+  return 1;
+}
+
+template <typename P>
+int btree<P>::erase_multi(const key_type &key) {
+  iterator begin = internal_lower_bound(key, iterator(root(), 0));
+  if (!begin.node) {
+    // The key doesn't exist in the tree, return nothing done.
+    return 0;
+  }
+  // Delete all of the keys between begin and upper_bound(key).
+  iterator end = internal_end(
+      internal_upper_bound(key, iterator(root(), 0)));
+  return erase(begin, end);
+}
+
+template <typename P>
+void btree<P>::clear() {
+  if (root() != NULL) {
+    internal_clear(root());
+  }
+  *mutable_root() = NULL;
+}
+
+template <typename P>
+void btree<P>::swap(self_type &x) {
+  std::swap(static_cast<key_compare&>(*this), static_cast<key_compare&>(x));
+  std::swap(root_, x.root_);
+}
+
+template <typename P>
+void btree<P>::verify() const {
+  if (root() != NULL) {
+    assert(size() == internal_verify(root(), NULL, NULL));
+    assert(leftmost() == (++const_iterator(root(), -1)).node);
+    assert(rightmost() == (--const_iterator(root(), root()->count())).node);
+    assert(leftmost()->leaf());
+    assert(rightmost()->leaf());
+  } else {
+    assert(size() == 0);
+    assert(leftmost() == NULL);
+    assert(rightmost() == NULL);
+  }
+}
+
+template <typename P>
+void btree<P>::rebalance_or_split(iterator *iter) {
+  node_type *&node = iter->node;
+  int &insert_position = iter->position;
+  assert(node->count() == node->max_count());
+
+  // First try to make room on the node by rebalancing.
+  node_type *parent = node->parent();
+  if (node != root()) {
+    if (node->position() > 0) {
+      // Try rebalancing with our left sibling.
+      node_type *left = parent->child(node->position() - 1);
+      if (left->count() < left->max_count()) {
+        // We bias rebalancing based on the position being inserted. If we're
+        // inserting at the end of the right node then we bias rebalancing to
+        // fill up the left node.
+        int to_move = (left->max_count() - left->count()) /
+            (1 + (insert_position < left->max_count()));
+        to_move = std::max(1, to_move);
+
+        if (((insert_position - to_move) >= 0) ||
+            ((left->count() + to_move) < left->max_count())) {
+          left->rebalance_right_to_left(node, to_move);
+
+          assert(node->max_count() - node->count() == to_move);
+          insert_position = insert_position - to_move;
+          if (insert_position < 0) {
+            insert_position = insert_position + left->count() + 1;
+            node = left;
+          }
+
+          assert(node->count() < node->max_count());
+          return;
+        }
+      }
+    }
+
+    if (node->position() < parent->count()) {
+      // Try rebalancing with our right sibling.
+      node_type *right = parent->child(node->position() + 1);
+      if (right->count() < right->max_count()) {
+        // We bias rebalancing based on the position being inserted. If we're
+        // inserting at the beginning of the left node then we bias rebalancing
+        // to fill up the right node.
+        int to_move = (right->max_count() - right->count()) /
+            (1 + (insert_position > 0));
+        to_move = std::max(1, to_move);
+
+        if ((insert_position <= (node->count() - to_move)) ||
+            ((right->count() + to_move) < right->max_count())) {
+          node->rebalance_left_to_right(right, to_move);
+
+          if (insert_position > node->count()) {
+            insert_position = insert_position - node->count() - 1;
+            node = right;
+          }
+
+          assert(node->count() < node->max_count());
+          return;
+        }
+      }
+    }
+
+    // Rebalancing failed, make sure there is room on the parent node for a new
+    // value.
+    if (parent->count() == parent->max_count()) {
+      iterator parent_iter(node->parent(), node->position());
+      rebalance_or_split(&parent_iter);
+    }
+  } else {
+    // Rebalancing not possible because this is the root node.
+    if (root()->leaf()) {
+      // The root node is currently a leaf node: create a new root node and set
+      // the current root node as the child of the new root.
+      parent = new_internal_root_node();
+      parent->set_child(0, root());
+      *mutable_root() = parent;
+      assert(*mutable_rightmost() == parent->child(0));
+    } else {
+      // The root node is an internal node. We do not want to create a new root
+      // node because the root node is special and holds the size of the tree
+      // and a pointer to the rightmost node. So we create a new internal node
+      // and move all of the items on the current root into the new node.
+      parent = new_internal_node(parent);
+      parent->set_child(0, parent);
+      parent->swap(root());
+      node = parent;
+    }
+  }
+
+  // Split the node.
+  node_type *split_node;
+  if (node->leaf()) {
+    split_node = new_leaf_node(parent);
+    node->split(split_node, insert_position);
+    if (rightmost() == node) {
+      *mutable_rightmost() = split_node;
+    }
+  } else {
+    split_node = new_internal_node(parent);
+    node->split(split_node, insert_position);
+  }
+
+  if (insert_position > node->count()) {
+    insert_position = insert_position - node->count() - 1;
+    node = split_node;
+  }
+}
+
+template <typename P>
+void btree<P>::merge_nodes(node_type *left, node_type *right) {
+  left->merge(right);
+  if (right->leaf()) {
+    if (rightmost() == right) {
+      *mutable_rightmost() = left;
+    }
+    delete_leaf_node(right);
+  } else {
+    delete_internal_node(right);
+  }
+}
+
+template <typename P>
+bool btree<P>::try_merge_or_rebalance(iterator *iter) {
+  node_type *parent = iter->node->parent();
+  if (iter->node->position() > 0) {
+    // Try merging with our left sibling.
+    node_type *left = parent->child(iter->node->position() - 1);
+    if ((1 + left->count() + iter->node->count()) <= left->max_count()) {
+      iter->position += 1 + left->count();
+      merge_nodes(left, iter->node);
+      iter->node = left;
+      return true;
+    }
+  }
+  if (iter->node->position() < parent->count()) {
+    // Try merging with our right sibling.
+    node_type *right = parent->child(iter->node->position() + 1);
+    if ((1 + iter->node->count() + right->count()) <= right->max_count()) {
+      merge_nodes(iter->node, right);
+      return true;
+    }
+    // Try rebalancing with our right sibling. We don't perform rebalancing if
+    // we deleted the first element from iter->node and the node is not
+    // empty. This is a small optimization for the common pattern of deleting
+    // from the front of the tree.
+    if ((right->count() > kMinNodeValues) &&
+        ((iter->node->count() == 0) ||
+         (iter->position > 0))) {
+      int to_move = (right->count() - iter->node->count()) / 2;
+      to_move = std::min(to_move, right->count() - 1);
+      iter->node->rebalance_right_to_left(right, to_move);
+      return false;
+    }
+  }
+  if (iter->node->position() > 0) {
+    // Try rebalancing with our left sibling. We don't perform rebalancing if
+    // we deleted the last element from iter->node and the node is not
+    // empty. This is a small optimization for the common pattern of deleting
+    // from the back of the tree.
+    node_type *left = parent->child(iter->node->position() - 1);
+    if ((left->count() > kMinNodeValues) &&
+        ((iter->node->count() == 0) ||
+         (iter->position < iter->node->count()))) {
+      int to_move = (left->count() - iter->node->count()) / 2;
+      to_move = std::min(to_move, left->count() - 1);
+      left->rebalance_left_to_right(iter->node, to_move);
+      iter->position += to_move;
+      return false;
+    }
+  }
+  return false;
+}
+
+template <typename P>
+void btree<P>::try_shrink() {
+  if (root()->count() > 0) {
+    return;
+  }
+  // Deleted the last item on the root node, shrink the height of the tree.
+  if (root()->leaf()) {
+    assert(size() == 0);
+    delete_leaf_node(root());
+    *mutable_root() = NULL;
+  } else {
+    node_type *child = root()->child(0);
+    if (child->leaf()) {
+      // The child is a leaf node so simply make it the root node in the tree.
+      child->make_root();
+      delete_internal_root_node();
+      *mutable_root() = child;
+    } else {
+      // The child is an internal node. We want to keep the existing root node
+      // so we move all of the values from the child node into the existing
+      // (empty) root node.
+      child->swap(root());
+      delete_internal_node(child);
+    }
+  }
+}
+
+template <typename P> template <typename IterType>
+inline IterType btree<P>::internal_last(IterType iter) {
+  while (iter.node && iter.position == iter.node->count()) {
+    iter.position = iter.node->position();
+    iter.node = iter.node->parent();
+    if (iter.node->leaf()) {
+      iter.node = NULL;
+    }
+  }
+  return iter;
+}
+
+template <typename P>
+inline typename btree<P>::iterator
+btree<P>::internal_insert(iterator iter, const value_type &v) {
+  if (!iter.node->leaf()) {
+    // We can't insert on an internal node. Instead, we'll insert after the
+    // previous value which is guaranteed to be on a leaf node.
+    --iter;
+    ++iter.position;
+  }
+  if (iter.node->count() == iter.node->max_count()) {
+    // Make room in the leaf for the new item.
+    if (iter.node->max_count() < kNodeValues) {
+      // Insertion into the root where the root is smaller that the full node
+      // size. Simply grow the size of the root node.
+      assert(iter.node == root());
+      iter.node = new_leaf_root_node(
+          std::min<int>(kNodeValues, 2 * iter.node->max_count()));
+      iter.node->swap(root());
+      delete_leaf_node(root());
+      *mutable_root() = iter.node;
+    } else {
+      rebalance_or_split(&iter);
+      ++*mutable_size();
+    }
+  } else if (!root()->leaf()) {
+    ++*mutable_size();
+  }
+  iter.node->insert_value(iter.position, v);
+  return iter;
+}
+
+template <typename P> template <typename IterType>
+inline std::pair<IterType, int> btree<P>::internal_locate(
+    const key_type &key, IterType iter) const {
+  return internal_locate_type::dispatch(key, *this, iter);
+}
+
+template <typename P> template <typename IterType>
+inline std::pair<IterType, int> btree<P>::internal_locate_plain_compare(
+    const key_type &key, IterType iter) const {
+  for (;;) {
+    iter.position = iter.node->lower_bound(key, key_comp());
+    if (iter.node->leaf()) {
+      break;
+    }
+    iter.node = iter.node->child(iter.position);
+  }
+  return std::make_pair(iter, 0);
+}
+
+template <typename P> template <typename IterType>
+inline std::pair<IterType, int> btree<P>::internal_locate_compare_to(
+    const key_type &key, IterType iter) const {
+  for (;;) {
+    int res = iter.node->lower_bound(key, key_comp());
+    iter.position = res & kMatchMask;
+    if (res & kExactMatch) {
+      return std::make_pair(iter, static_cast<int>(kExactMatch));
+    }
+    if (iter.node->leaf()) {
+      break;
+    }
+    iter.node = iter.node->child(iter.position);
+  }
+  return std::make_pair(iter, -kExactMatch);
+}
+
+template <typename P> template <typename IterType>
+IterType btree<P>::internal_lower_bound(
+    const key_type &key, IterType iter) const {
+  if (iter.node) {
+    for (;;) {
+      iter.position =
+          iter.node->lower_bound(key, key_comp()) & kMatchMask;
+      if (iter.node->leaf()) {
+        break;
+      }
+      iter.node = iter.node->child(iter.position);
+    }
+    iter = internal_last(iter);
+  }
+  return iter;
+}
+
+template <typename P> template <typename IterType>
+IterType btree<P>::internal_upper_bound(
+    const key_type &key, IterType iter) const {
+  if (iter.node) {
+    for (;;) {
+      iter.position = iter.node->upper_bound(key, key_comp());
+      if (iter.node->leaf()) {
+        break;
+      }
+      iter.node = iter.node->child(iter.position);
+    }
+    iter = internal_last(iter);
+  }
+  return iter;
+}
+
+template <typename P> template <typename IterType>
+IterType btree<P>::internal_find_unique(
+    const key_type &key, IterType iter) const {
+  if (iter.node) {
+    std::pair<IterType, int> res = internal_locate(key, iter);
+    if (res.second == kExactMatch) {
+      return res.first;
+    }
+    if (!res.second) {
+      iter = internal_last(res.first);
+      if (iter.node && !compare_keys(key, iter.key())) {
+        return iter;
+      }
+    }
+  }
+  return IterType(NULL, 0);
+}
+
+template <typename P> template <typename IterType>
+IterType btree<P>::internal_find_multi(
+    const key_type &key, IterType iter) const {
+  if (iter.node) {
+    iter = internal_lower_bound(key, iter);
+    if (iter.node) {
+      iter = internal_last(iter);
+      if (iter.node && !compare_keys(key, iter.key())) {
+        return iter;
+      }
+    }
+  }
+  return IterType(NULL, 0);
+}
+
+template <typename P>
+void btree<P>::internal_clear(node_type *node) {
+  if (!node->leaf()) {
+    for (int i = 0; i <= node->count(); ++i) {
+      internal_clear(node->child(i));
+    }
+    if (node == root()) {
+      delete_internal_root_node();
+    } else {
+      delete_internal_node(node);
+    }
+  } else {
+    delete_leaf_node(node);
+  }
+}
+
+template <typename P>
+void btree<P>::internal_dump(
+    std::ostream &os, const node_type *node, int level) const {
+  for (int i = 0; i < node->count(); ++i) {
+    if (!node->leaf()) {
+      internal_dump(os, node->child(i), level + 1);
+    }
+    for (int j = 0; j < level; ++j) {
+      os << "  ";
+    }
+    os << node->key(i) << " [" << level << "]\n";
+  }
+  if (!node->leaf()) {
+    internal_dump(os, node->child(node->count()), level + 1);
+  }
+}
+
+template <typename P>
+int btree<P>::internal_verify(
+    const node_type *node, const key_type *lo, const key_type *hi) const {
+  assert(node->count() > 0);
+  assert(node->count() <= node->max_count());
+  if (lo) {
+    assert(!compare_keys(node->key(0), *lo));
+  }
+  if (hi) {
+    assert(!compare_keys(*hi, node->key(node->count() - 1)));
+  }
+  for (int i = 1; i < node->count(); ++i) {
+    assert(!compare_keys(node->key(i), node->key(i - 1)));
+  }
+  int count = node->count();
+  if (!node->leaf()) {
+    for (int i = 0; i <= node->count(); ++i) {
+      assert(node->child(i) != NULL);
+      assert(node->child(i)->parent() == node);
+      assert(node->child(i)->position() == i);
+      count += internal_verify(
+          node->child(i),
+          (i == 0) ? lo : &node->key(i - 1),
+          (i == node->count()) ? hi : &node->key(i));
+    }
+  }
+  return count;
+}
+
+} // namespace btree
+
+#endif  // UTIL_BTREE_BTREE_H__
diff --git a/src/include/cpp-btree/btree_container.h b/src/include/cpp-btree/btree_container.h
new file mode 100644
index 0000000..fb617ab
--- /dev/null
+++ b/src/include/cpp-btree/btree_container.h
@@ -0,0 +1,349 @@
+// Copyright 2013 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef UTIL_BTREE_BTREE_CONTAINER_H__
+#define UTIL_BTREE_BTREE_CONTAINER_H__
+
+#include <iosfwd>
+#include <utility>
+
+#include "btree.h"
+
+namespace btree {
+
+// A common base class for btree_set, btree_map, btree_multiset and
+// btree_multimap.
+template <typename Tree>
+class btree_container {
+  typedef btree_container<Tree> self_type;
+
+ public:
+  typedef typename Tree::params_type params_type;
+  typedef typename Tree::key_type key_type;
+  typedef typename Tree::value_type value_type;
+  typedef typename Tree::key_compare key_compare;
+  typedef typename Tree::allocator_type allocator_type;
+  typedef typename Tree::pointer pointer;
+  typedef typename Tree::const_pointer const_pointer;
+  typedef typename Tree::reference reference;
+  typedef typename Tree::const_reference const_reference;
+  typedef typename Tree::size_type size_type;
+  typedef typename Tree::difference_type difference_type;
+  typedef typename Tree::iterator iterator;
+  typedef typename Tree::const_iterator const_iterator;
+  typedef typename Tree::reverse_iterator reverse_iterator;
+  typedef typename Tree::const_reverse_iterator const_reverse_iterator;
+
+ public:
+  // Default constructor.
+  btree_container(const key_compare &comp, const allocator_type &alloc)
+      : tree_(comp, alloc) {
+  }
+
+  // Copy constructor.
+  btree_container(const self_type &x)
+      : tree_(x.tree_) {
+  }
+
+  // Iterator routines.
+  iterator begin() { return tree_.begin(); }
+  const_iterator begin() const { return tree_.begin(); }
+  iterator end() { return tree_.end(); }
+  const_iterator end() const { return tree_.end(); }
+  reverse_iterator rbegin() { return tree_.rbegin(); }
+  const_reverse_iterator rbegin() const { return tree_.rbegin(); }
+  reverse_iterator rend() { return tree_.rend(); }
+  const_reverse_iterator rend() const { return tree_.rend(); }
+
+  // Lookup routines.
+  iterator lower_bound(const key_type &key) {
+    return tree_.lower_bound(key);
+  }
+  const_iterator lower_bound(const key_type &key) const {
+    return tree_.lower_bound(key);
+  }
+  iterator upper_bound(const key_type &key) {
+    return tree_.upper_bound(key);
+  }
+  const_iterator upper_bound(const key_type &key) const {
+    return tree_.upper_bound(key);
+  }
+  std::pair<iterator,iterator> equal_range(const key_type &key) {
+    return tree_.equal_range(key);
+  }
+  std::pair<const_iterator,const_iterator> equal_range(const key_type &key) const {
+    return tree_.equal_range(key);
+  }
+
+  // Utility routines.
+  void clear() {
+    tree_.clear();
+  }
+  void swap(self_type &x) {
+    tree_.swap(x.tree_);
+  }
+  void dump(std::ostream &os) const {
+    tree_.dump(os);
+  }
+  void verify() const {
+    tree_.verify();
+  }
+
+  // Size routines.
+  size_type size() const { return tree_.size(); }
+  size_type max_size() const { return tree_.max_size(); }
+  bool empty() const { return tree_.empty(); }
+  size_type height() const { return tree_.height(); }
+  size_type internal_nodes() const { return tree_.internal_nodes(); }
+  size_type leaf_nodes() const { return tree_.leaf_nodes(); }
+  size_type nodes() const { return tree_.nodes(); }
+  size_type bytes_used() const { return tree_.bytes_used(); }
+  static double average_bytes_per_value() {
+    return Tree::average_bytes_per_value();
+  }
+  double fullness() const { return tree_.fullness(); }
+  double overhead() const { return tree_.overhead(); }
+
+  bool operator==(const self_type& x) const {
+    if (size() != x.size()) {
+      return false;
+    }
+    for (const_iterator i = begin(), xi = x.begin(); i != end(); ++i, ++xi) {
+      if (*i != *xi) {
+        return false;
+      }
+    }
+    return true;
+  }
+
+  bool operator!=(const self_type& other) const {
+    return !operator==(other);
+  }
+
+
+ protected:
+  Tree tree_;
+};
+
+template <typename T>
+inline std::ostream& operator<<(std::ostream &os, const btree_container<T> &b) {
+  b.dump(os);
+  return os;
+}
+
+// A common base class for btree_set and safe_btree_set.
+template <typename Tree>
+class btree_unique_container : public btree_container<Tree> {
+  typedef btree_unique_container<Tree> self_type;
+  typedef btree_container<Tree> super_type;
+
+ public:
+  typedef typename Tree::key_type key_type;
+  typedef typename Tree::value_type value_type;
+  typedef typename Tree::size_type size_type;
+  typedef typename Tree::key_compare key_compare;
+  typedef typename Tree::allocator_type allocator_type;
+  typedef typename Tree::iterator iterator;
+  typedef typename Tree::const_iterator const_iterator;
+
+ public:
+  // Default constructor.
+  btree_unique_container(const key_compare &comp = key_compare(),
+                         const allocator_type &alloc = allocator_type())
+      : super_type(comp, alloc) {
+  }
+
+  // Copy constructor.
+  btree_unique_container(const self_type &x)
+      : super_type(x) {
+  }
+
+  // Range constructor.
+  template <class InputIterator>
+  btree_unique_container(InputIterator b, InputIterator e,
+                         const key_compare &comp = key_compare(),
+                         const allocator_type &alloc = allocator_type())
+      : super_type(comp, alloc) {
+    insert(b, e);
+  }
+
+  // Lookup routines.
+  iterator find(const key_type &key) {
+    return this->tree_.find_unique(key);
+  }
+  const_iterator find(const key_type &key) const {
+    return this->tree_.find_unique(key);
+  }
+  size_type count(const key_type &key) const {
+    return this->tree_.count_unique(key);
+  }
+
+  // Insertion routines.
+  std::pair<iterator,bool> insert(const value_type &x) {
+    return this->tree_.insert_unique(x);
+  }
+  iterator insert(iterator position, const value_type &x) {
+    return this->tree_.insert_unique(position, x);
+  }
+  template <typename InputIterator>
+  void insert(InputIterator b, InputIterator e) {
+    this->tree_.insert_unique(b, e);
+  }
+
+  // Deletion routines.
+  int erase(const key_type &key) {
+    return this->tree_.erase_unique(key);
+  }
+  // Erase the specified iterator from the btree. The iterator must be valid
+  // (i.e. not equal to end()).  Return an iterator pointing to the node after
+  // the one that was erased (or end() if none exists).
+  iterator erase(const iterator &iter) {
+    return this->tree_.erase(iter);
+  }
+  void erase(const iterator &first, const iterator &last) {
+    this->tree_.erase(first, last);
+  }
+};
+
+// A common base class for btree_map and safe_btree_map.
+template <typename Tree>
+class btree_map_container : public btree_unique_container<Tree> {
+  typedef btree_map_container<Tree> self_type;
+  typedef btree_unique_container<Tree> super_type;
+
+ public:
+  typedef typename Tree::key_type key_type;
+  typedef typename Tree::data_type data_type;
+  typedef typename Tree::value_type value_type;
+  typedef typename Tree::mapped_type mapped_type;
+  typedef typename Tree::key_compare key_compare;
+  typedef typename Tree::allocator_type allocator_type;
+
+ private:
+  // A pointer-like object which only generates its value when
+  // dereferenced. Used by operator[] to avoid constructing an empty data_type
+  // if the key already exists in the map.
+  struct generate_value {
+    generate_value(const key_type &k)
+        : key(k) {
+    }
+    value_type operator*() const {
+      return std::make_pair(key, data_type());
+    }
+    const key_type &key;
+  };
+
+ public:
+  // Default constructor.
+  btree_map_container(const key_compare &comp = key_compare(),
+                      const allocator_type &alloc = allocator_type())
+      : super_type(comp, alloc) {
+  }
+
+  // Copy constructor.
+  btree_map_container(const self_type &x)
+      : super_type(x) {
+  }
+
+  // Range constructor.
+  template <class InputIterator>
+  btree_map_container(InputIterator b, InputIterator e,
+                      const key_compare &comp = key_compare(),
+                      const allocator_type &alloc = allocator_type())
+      : super_type(b, e, comp, alloc) {
+  }
+
+  // Insertion routines.
+  data_type& operator[](const key_type &key) {
+    return this->tree_.insert_unique(key, generate_value(key)).first->second;
+  }
+};
+
+// A common base class for btree_multiset and btree_multimap.
+template <typename Tree>
+class btree_multi_container : public btree_container<Tree> {
+  typedef btree_multi_container<Tree> self_type;
+  typedef btree_container<Tree> super_type;
+
+ public:
+  typedef typename Tree::key_type key_type;
+  typedef typename Tree::value_type value_type;
+  typedef typename Tree::size_type size_type;
+  typedef typename Tree::key_compare key_compare;
+  typedef typename Tree::allocator_type allocator_type;
+  typedef typename Tree::iterator iterator;
+  typedef typename Tree::const_iterator const_iterator;
+
+ public:
+  // Default constructor.
+  btree_multi_container(const key_compare &comp = key_compare(),
+                        const allocator_type &alloc = allocator_type())
+      : super_type(comp, alloc) {
+  }
+
+  // Copy constructor.
+  btree_multi_container(const self_type &x)
+      : super_type(x) {
+  }
+
+  // Range constructor.
+  template <class InputIterator>
+  btree_multi_container(InputIterator b, InputIterator e,
+                        const key_compare &comp = key_compare(),
+                        const allocator_type &alloc = allocator_type())
+      : super_type(comp, alloc) {
+    insert(b, e);
+  }
+
+  // Lookup routines.
+  iterator find(const key_type &key) {
+    return this->tree_.find_multi(key);
+  }
+  const_iterator find(const key_type &key) const {
+    return this->tree_.find_multi(key);
+  }
+  size_type count(const key_type &key) const {
+    return this->tree_.count_multi(key);
+  }
+
+  // Insertion routines.
+  iterator insert(const value_type &x) {
+    return this->tree_.insert_multi(x);
+  }
+  iterator insert(iterator position, const value_type &x) {
+    return this->tree_.insert_multi(position, x);
+  }
+  template <typename InputIterator>
+  void insert(InputIterator b, InputIterator e) {
+    this->tree_.insert_multi(b, e);
+  }
+
+  // Deletion routines.
+  int erase(const key_type &key) {
+    return this->tree_.erase_multi(key);
+  }
+  // Erase the specified iterator from the btree. The iterator must be valid
+  // (i.e. not equal to end()).  Return an iterator pointing to the node after
+  // the one that was erased (or end() if none exists).
+  iterator erase(const iterator &iter) {
+    return this->tree_.erase(iter);
+  }
+  void erase(const iterator &first, const iterator &last) {
+    this->tree_.erase(first, last);
+  }
+};
+
+} // namespace btree
+
+#endif  // UTIL_BTREE_BTREE_CONTAINER_H__
diff --git a/src/include/cpp-btree/btree_map.h b/src/include/cpp-btree/btree_map.h
new file mode 100644
index 0000000..b83489f
--- /dev/null
+++ b/src/include/cpp-btree/btree_map.h
@@ -0,0 +1,130 @@
+// Copyright 2013 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// A btree_map<> implements the STL unique sorted associative container
+// interface and the pair associative container interface (a.k.a map<>) using a
+// btree. A btree_multimap<> implements the STL multiple sorted associative
+// container interface and the pair associtive container interface (a.k.a
+// multimap<>) using a btree. See btree.h for details of the btree
+// implementation and caveats.
+
+#ifndef UTIL_BTREE_BTREE_MAP_H__
+#define UTIL_BTREE_BTREE_MAP_H__
+
+#include <algorithm>
+#include <functional>
+#include <memory>
+#include <string>
+#include <utility>
+
+#include "btree.h"
+#include "btree_container.h"
+
+namespace btree {
+
+// The btree_map class is needed mainly for its constructors.
+template <typename Key, typename Value,
+          typename Compare = std::less<Key>,
+          typename Alloc = std::allocator<std::pair<const Key, Value> >,
+          int TargetNodeSize = 256>
+class btree_map : public btree_map_container<
+  btree<btree_map_params<Key, Value, Compare, Alloc, TargetNodeSize> > > {
+
+  typedef btree_map<Key, Value, Compare, Alloc, TargetNodeSize> self_type;
+  typedef btree_map_params<
+    Key, Value, Compare, Alloc, TargetNodeSize> params_type;
+  typedef btree<params_type> btree_type;
+  typedef btree_map_container<btree_type> super_type;
+
+ public:
+  typedef typename btree_type::key_compare key_compare;
+  typedef typename btree_type::allocator_type allocator_type;
+
+ public:
+  // Default constructor.
+  btree_map(const key_compare &comp = key_compare(),
+            const allocator_type &alloc = allocator_type())
+      : super_type(comp, alloc) {
+  }
+
+  // Copy constructor.
+  btree_map(const self_type &x)
+      : super_type(x) {
+  }
+
+  // Range constructor.
+  template <class InputIterator>
+  btree_map(InputIterator b, InputIterator e,
+            const key_compare &comp = key_compare(),
+            const allocator_type &alloc = allocator_type())
+      : super_type(b, e, comp, alloc) {
+  }
+};
+
+template <typename K, typename V, typename C, typename A, int N>
+inline void swap(btree_map<K, V, C, A, N> &x,
+                 btree_map<K, V, C, A, N> &y) {
+  x.swap(y);
+}
+
+// The btree_multimap class is needed mainly for its constructors.
+template <typename Key, typename Value,
+          typename Compare = std::less<Key>,
+          typename Alloc = std::allocator<std::pair<const Key, Value> >,
+          int TargetNodeSize = 256>
+class btree_multimap : public btree_multi_container<
+  btree<btree_map_params<Key, Value, Compare, Alloc, TargetNodeSize> > > {
+
+  typedef btree_multimap<Key, Value, Compare, Alloc, TargetNodeSize> self_type;
+  typedef btree_map_params<
+    Key, Value, Compare, Alloc, TargetNodeSize> params_type;
+  typedef btree<params_type> btree_type;
+  typedef btree_multi_container<btree_type> super_type;
+
+ public:
+  typedef typename btree_type::key_compare key_compare;
+  typedef typename btree_type::allocator_type allocator_type;
+  typedef typename btree_type::data_type data_type;
+  typedef typename btree_type::mapped_type mapped_type;
+
+ public:
+  // Default constructor.
+  btree_multimap(const key_compare &comp = key_compare(),
+                 const allocator_type &alloc = allocator_type())
+      : super_type(comp, alloc) {
+  }
+
+  // Copy constructor.
+  btree_multimap(const self_type &x)
+      : super_type(x) {
+  }
+
+  // Range constructor.
+  template <class InputIterator>
+  btree_multimap(InputIterator b, InputIterator e,
+                 const key_compare &comp = key_compare(),
+                 const allocator_type &alloc = allocator_type())
+      : super_type(b, e, comp, alloc) {
+  }
+};
+
+template <typename K, typename V, typename C, typename A, int N>
+inline void swap(btree_multimap<K, V, C, A, N> &x,
+                 btree_multimap<K, V, C, A, N> &y) {
+  x.swap(y);
+}
+
+} // namespace btree
+
+#endif  // UTIL_BTREE_BTREE_MAP_H__
diff --git a/src/include/encoding_btree.h b/src/include/encoding_btree.h
new file mode 100644
index 0000000..19faf4b
--- /dev/null
+++ b/src/include/encoding_btree.h
@@ -0,0 +1,60 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_INCLUDE_ENCODING_BTREE_H
+#define CEPH_INCLUDE_ENCODING_BTREE_H
+
+#include "include/cpp-btree/btree_map.h"
+
+template<class T, class U>
+inline void encode(const btree::btree_map<T,U>& m, bufferlist& bl)
+{
+  __u32 n = (__u32)(m.size());
+  encode(n, bl);
+  for (typename btree::btree_map<T,U>::const_iterator p = m.begin(); p != m.end(); ++p) {
+    encode(p->first, bl);
+    encode(p->second, bl);
+  }
+}
+template<class T, class U>
+inline void encode(const btree::btree_map<T,U>& m, bufferlist& bl, uint64_t features)
+{
+  __u32 n = (__u32)(m.size());
+  encode(n, bl);
+  for (typename btree::btree_map<T,U>::const_iterator p = m.begin(); p != m.end(); ++p) {
+    encode(p->first, bl, features);
+    encode(p->second, bl, features);
+  }
+}
+template<class T, class U>
+inline void decode(btree::btree_map<T,U>& m, bufferlist::iterator& p)
+{
+  __u32 n;
+  decode(n, p);
+  m.clear();
+  while (n--) {
+    T k;
+    decode(k, p);
+    decode(m[k], p);
+  }
+}
+template<class T, class U>
+inline void encode_nohead(const btree::btree_map<T,U>& m, bufferlist& bl)
+{
+  for (typename btree::btree_map<T,U>::const_iterator p = m.begin(); p != m.end(); ++p) {
+    encode(p->first, bl);
+    encode(p->second, bl);
+  }
+}
+template<class T, class U>
+inline void decode_nohead(int n, btree::btree_map<T,U>& m, bufferlist::iterator& p)
+{
+  m.clear();
+  while (n--) {
+    T k;
+    decode(k, p);
+    decode(m[k], p);
+  }
+}
+
+#endif
diff --git a/src/include/interval_set.h b/src/include/interval_set.h
index 3759f77..c903dc3 100644
--- a/src/include/interval_set.h
+++ b/src/include/interval_set.h
@@ -19,7 +19,6 @@
 #include <iterator>
 #include <map>
 #include <ostream>
-using namespace std;
 
 #include "encoding.h"
 
@@ -56,7 +55,7 @@ class interval_set {
         }
 
         // Dereference this iterator to get a pair.
-        pair < T, T > &operator*() {
+        std::pair < T, T > &operator*() {
                 return *_iter;
         }
 
@@ -93,7 +92,7 @@ class interval_set {
     friend class interval_set<T>::const_iterator;
 
     protected:
-        typename map<T,T>::iterator _iter;
+        typename std::map<T,T>::iterator _iter;
     friend class interval_set<T>;
   };
 
@@ -120,7 +119,7 @@ class interval_set {
         }
 
         // Dereference this iterator to get a pair.
-        pair < T, T > operator*() const {
+        std::pair < T, T > operator*() const {
                 return *_iter;
         }
 
@@ -150,7 +149,7 @@ class interval_set {
         }
 
     protected:
-        typename map<T,T>::const_iterator _iter;
+        typename std::map<T,T>::const_iterator _iter;
   };
 
   interval_set() : _size(0) {}
@@ -186,8 +185,8 @@ class interval_set {
 
   // helpers
  private:
-  typename map<T,T>::const_iterator find_inc(T start) const {
-    typename map<T,T>::const_iterator p = m.lower_bound(start);  // p->first >= start
+  typename std::map<T,T>::const_iterator find_inc(T start) const {
+    typename std::map<T,T>::const_iterator p = m.lower_bound(start);  // p->first >= start
     if (p != m.begin() &&
         (p == m.end() || p->first > start)) {
       p--;   // might overlap?
@@ -197,8 +196,8 @@ class interval_set {
     return p;
   }
   
-  typename map<T,T>::iterator find_inc_m(T start) {
-    typename map<T,T>::iterator p = m.lower_bound(start);
+  typename std::map<T,T>::iterator find_inc_m(T start) {
+    typename std::map<T,T>::iterator p = m.lower_bound(start);
     if (p != m.begin() &&
         (p == m.end() || p->first > start)) {
       p--;   // might overlap?
@@ -208,8 +207,8 @@ class interval_set {
     return p;
   }
   
-  typename map<T,T>::const_iterator find_adj(T start) const {
-    typename map<T,T>::const_iterator p = m.lower_bound(start);
+  typename std::map<T,T>::const_iterator find_adj(T start) const {
+    typename std::map<T,T>::const_iterator p = m.lower_bound(start);
     if (p != m.begin() &&
         (p == m.end() || p->first > start)) {
       p--;   // might touch?
@@ -219,8 +218,8 @@ class interval_set {
     return p;
   }
   
-  typename map<T,T>::iterator find_adj_m(T start) {
-    typename map<T,T>::iterator p = m.lower_bound(start);
+  typename std::map<T,T>::iterator find_adj_m(T start) {
+    typename std::map<T,T>::iterator p = m.lower_bound(start);
     if (p != m.begin() &&
         (p == m.end() || p->first > start)) {
       p--;   // might touch?
@@ -248,7 +247,7 @@ class interval_set {
   void decode(bufferlist::iterator& bl) {
     ::decode(m, bl);
     _size = 0;
-    for (typename map<T,T>::const_iterator p = m.begin();
+    for (typename std::map<T,T>::const_iterator p = m.begin();
          p != m.end();
          p++)
       _size += p->second;
@@ -256,7 +255,7 @@ class interval_set {
   void decode_nohead(int n, bufferlist::iterator& bl) {
     ::decode_nohead(n, m, bl);
     _size = 0;
-    for (typename map<T,T>::const_iterator p = m.begin();
+    for (typename std::map<T,T>::const_iterator p = m.begin();
          p != m.end();
          p++)
       _size += p->second;
@@ -268,7 +267,7 @@ class interval_set {
   }
 
   bool contains(T i, T *pstart=0, T *plen=0) const {
-    typename map<T,T>::const_iterator p = find_inc(i);
+    typename std::map<T,T>::const_iterator p = find_inc(i);
     if (p == m.end()) return false;
     if (p->first > i) return false;
     if (p->first+p->second <= i) return false;
@@ -280,7 +279,7 @@ class interval_set {
     return true;
   }
   bool contains(T start, T len) const {
-    typename map<T,T>::const_iterator p = find_inc(start);
+    typename std::map<T,T>::const_iterator p = find_inc(start);
     if (p == m.end()) return false;
     if (p->first > start) return false;
     if (p->first+p->second <= start) return false;
@@ -303,12 +302,12 @@ class interval_set {
   }
   T range_start() const {
     assert(!empty());
-    typename map<T,T>::const_iterator p = m.begin();
+    typename std::map<T,T>::const_iterator p = m.begin();
     return p->first;
   }
   T range_end() const {
     assert(!empty());
-    typename map<T,T>::const_iterator p = m.end();
+    typename std::map<T,T>::const_iterator p = m.end();
     p--;
     return p->first+p->second;
   }
@@ -316,20 +315,20 @@ class interval_set {
   // interval start after p (where p not in set)
   bool starts_after(T i) const {
     assert(!contains(i));
-    typename map<T,T>::const_iterator p = find_inc(i);
+    typename std::map<T,T>::const_iterator p = find_inc(i);
     if (p == m.end()) return false;
     return true;
   }
   T start_after(T i) const {
     assert(!contains(i));
-    typename map<T,T>::const_iterator p = find_inc(i);
+    typename std::map<T,T>::const_iterator p = find_inc(i);
     return p->first;
   }
 
   // interval end that contains start
   T end_after(T start) const {
     assert(contains(start));
-    typename map<T,T>::const_iterator p = find_inc(start);
+    typename std::map<T,T>::const_iterator p = find_inc(start);
     return p->first+p->second;
   }
   
@@ -341,7 +340,7 @@ class interval_set {
     //cout << "insert " << start << "~" << len << endl;
     assert(len > 0);
     _size += len;
-    typename map<T,T>::iterator p = find_adj_m(start);
+    typename std::map<T,T>::iterator p = find_adj_m(start);
     if (p == m.end()) {
       m[start] = len;                  // new interval
       if (pstart)
@@ -356,10 +355,9 @@ class interval_set {
           assert(0);
         }
         
-        assert(p->first + p->second == start);
         p->second += len;               // append to end
         
-        typename map<T,T>::iterator n = p;
+        typename std::map<T,T>::iterator n = p;
         n++;
         if (n != m.end() && 
             start+len == n->first) {   // combine with next, too!
@@ -392,9 +390,7 @@ class interval_set {
 
   void swap(interval_set<T>& other) {
     m.swap(other.m);
-    int64_t t = _size;
-    _size = other._size;
-    other._size = t;
+    std::swap(_size, other._size);
   }    
   
   void erase(iterator &i) {
@@ -408,7 +404,7 @@ class interval_set {
   }
 
   void erase(T start, T len) {
-    typename map<T,T>::iterator p = find_inc_m(start);
+    typename std::map<T,T>::iterator p = find_inc_m(start);
 
     _size -= len;
     assert(_size >= 0);
@@ -430,14 +426,14 @@ class interval_set {
 
 
   void subtract(const interval_set &a) {
-    for (typename map<T,T>::const_iterator p = a.m.begin();
+    for (typename std::map<T,T>::const_iterator p = a.m.begin();
          p != a.m.end();
          p++)
       erase(p->first, p->second);
   }
 
   void insert(const interval_set &a) {
-    for (typename map<T,T>::const_iterator p = a.m.begin();
+    for (typename std::map<T,T>::const_iterator p = a.m.begin();
          p != a.m.end();
          p++)
       insert(p->first, p->second);
@@ -449,8 +445,8 @@ class interval_set {
     assert(&b != this);
     clear();
 
-    typename map<T,T>::const_iterator pa = a.m.begin();
-    typename map<T,T>::const_iterator pb = b.m.begin();
+    typename std::map<T,T>::const_iterator pa = a.m.begin();
+    typename std::map<T,T>::const_iterator pb = b.m.begin();
     
     while (pa != a.m.end() && pb != b.m.end()) {
       // passing?
@@ -501,7 +497,7 @@ class interval_set {
   }
 
   bool subset_of(const interval_set &big) const {
-    for (typename map<T,T>::const_iterator i = m.begin();
+    for (typename std::map<T,T>::const_iterator i = m.begin();
          i != m.end();
          i++) 
       if (!big.contains(i->first, i->second)) return false;
@@ -515,7 +511,7 @@ class interval_set {
    */
   void span_of(const interval_set &other, T start, T len) {
     clear();
-    typename map<T,T>::const_iterator p = other.find_inc(start);
+    typename std::map<T,T>::const_iterator p = other.find_inc(start);
     if (p == other.m.end())
       return;
     if (p->first < start) {
@@ -546,12 +542,12 @@ class interval_set {
 private:
   // data
   int64_t _size;
-  map<T,T> m;   // map start -> len
+  std::map<T,T> m;   // map start -> len
 };
 
 
 template<class T>
-inline ostream& operator<<(ostream& out, const interval_set<T> &s) {
+inline std::ostream& operator<<(std::ostream& out, const interval_set<T> &s) {
   out << "[";
   const char *prequel = "";
   for (typename interval_set<T>::const_iterator i = s.begin();
diff --git a/src/include/object.h b/src/include/object.h
index 0f51143..bdc15b5 100644
--- a/src/include/object.h
+++ b/src/include/object.h
@@ -33,7 +33,9 @@ struct object_t {
   string name;
 
   object_t() {}
+  // cppcheck-suppress noExplicitConstructor
   object_t(const char *s) : name(s) {}
+  // cppcheck-suppress noExplicitConstructor
   object_t(const string& s) : name(s) {}
 
   void swap(object_t& o) {
@@ -110,6 +112,7 @@ struct file_object_t {
 
 struct snapid_t {
   uint64_t val;
+  // cppcheck-suppress noExplicitConstructor
   snapid_t(uint64_t v=0) : val(v) {}
   snapid_t operator+=(snapid_t o) { val += o.val; return *this; }
   snapid_t operator++() { ++val; return *this; }
diff --git a/src/include/rados/buffer.h b/src/include/rados/buffer.h
index 5a8b05f..bc988db 100644
--- a/src/include/rados/buffer.h
+++ b/src/include/rados/buffer.h
@@ -168,12 +168,16 @@ namespace buffer CEPH_BUFFER_API {
 
   public:
     ptr() : _raw(0), _off(0), _len(0) {}
+    // cppcheck-suppress noExplicitConstructor
     ptr(raw *r);
+    // cppcheck-suppress noExplicitConstructor
     ptr(unsigned l);
     ptr(const char *d, unsigned l);
     ptr(const ptr& p);
+    ptr(ptr&& p);
     ptr(const ptr& p, unsigned o, unsigned l);
     ptr& operator= (const ptr& p);
+    ptr& operator= (ptr&& p);
     ~ptr() {
       release();
     }
@@ -354,6 +358,7 @@ namespace buffer CEPH_BUFFER_API {
   public:
     // cons/des
     list() : _len(0), _memcopy_count(0), last_p(this) {}
+    // cppcheck-suppress noExplicitConstructor
     list(unsigned prealloc) : _len(0), _memcopy_count(0), last_p(this) {
       append_buffer = buffer::create(prealloc);
       append_buffer.set_length(0);   // unused, so far.
@@ -373,6 +378,16 @@ namespace buffer CEPH_BUFFER_API {
       return *this;
     }
 
+    list& operator= (list&& other) {
+      _buffers = std::move(other._buffers);
+      _len = other._len;
+      _memcopy_count = other._memcopy_count;
+      last_p = begin();
+      append_buffer.swap(other.append_buffer);
+      other.clear();
+      return *this;
+    }
+
     unsigned get_memcopy_count() const {return _memcopy_count; }
     const std::list<ptr>& buffers() const { return _buffers; }
     void swap(list& other);
@@ -414,9 +429,14 @@ namespace buffer CEPH_BUFFER_API {
       _buffers.push_front(bp);
       _len += bp.length();
     }
+    void push_front(ptr&& bp) {
+      if (bp.length() == 0)
+	return;
+      _len += bp.length();
+      _buffers.push_front(std::move(bp));
+    }
     void push_front(raw *r) {
-      ptr bp(r);
-      push_front(bp);
+      push_front(ptr(r));
     }
     void push_back(const ptr& bp) {
       if (bp.length() == 0)
@@ -424,9 +444,14 @@ namespace buffer CEPH_BUFFER_API {
       _buffers.push_back(bp);
       _len += bp.length();
     }
+    void push_back(ptr&& bp) {
+      if (bp.length() == 0)
+	return;
+      _len += bp.length();
+      _buffers.push_back(std::move(bp));
+    }
     void push_back(raw *r) {
-      ptr bp(r);
-      push_back(bp);
+      push_back(ptr(r));
     }
 
     void zero();
@@ -497,6 +522,7 @@ namespace buffer CEPH_BUFFER_API {
       append(s.data(), s.length());
     }
     void append(const ptr& bp);
+    void append(ptr&& bp);
     void append(const ptr& bp, unsigned off, unsigned len);
     void append(const list& bl);
     void append(std::istream& in);
@@ -543,6 +569,7 @@ namespace buffer CEPH_BUFFER_API {
 
   public:
     hash() : crc(0) { }
+    // cppcheck-suppress noExplicitConstructor
     hash(uint32_t init) : crc(init) { }
 
     void update(buffer::list& bl) {
diff --git a/src/include/rados/librados.hpp b/src/include/rados/librados.hpp
index 9173bf5..1287c77 100644
--- a/src/include/rados/librados.hpp
+++ b/src/include/rados/librados.hpp
@@ -37,22 +37,8 @@ namespace librados
   typedef uint64_t auid_t;
   typedef void *config_t;
 
-  struct cluster_stat_t {
-    uint64_t kb, kb_used, kb_avail;
-    uint64_t num_objects;
-  };
-
-  struct pool_stat_t {
-    uint64_t num_bytes;    // in bytes
-    uint64_t num_kb;       // in KB
-    uint64_t num_objects;
-    uint64_t num_object_clones;
-    uint64_t num_object_copies;  // num_objects * num_replicas
-    uint64_t num_objects_missing_on_primary;
-    uint64_t num_objects_unfound;
-    uint64_t num_objects_degraded;
-    uint64_t num_rd, num_rd_kb, num_wr, num_wr_kb;
-  };
+  typedef struct rados_cluster_stat_t cluster_stat_t;
+  typedef struct rados_pool_stat_t pool_stat_t;
 
   typedef struct {
     std::string client;
@@ -1105,8 +1091,14 @@ namespace librados
 
     int64_t get_id();
 
-    uint32_t get_object_hash_position(const std::string& oid);
-    uint32_t get_object_pg_hash_position(const std::string& oid);
+    // deprecated versions
+    uint32_t get_object_hash_position(const std::string& oid)
+      __attribute__ ((deprecated));
+    uint32_t get_object_pg_hash_position(const std::string& oid)
+      __attribute__ ((deprecated));
+
+    int get_object_hash_position2(const std::string& oid, uint32_t *hash_position);
+    int get_object_pg_hash_position2(const std::string& oid, uint32_t *pg_hash_position);
 
     config_t cct();
 
diff --git a/src/include/rbd/librbd.h b/src/include/rbd/librbd.h
index e2d63f9..dde1a61 100644
--- a/src/include/rbd/librbd.h
+++ b/src/include/rbd/librbd.h
@@ -41,6 +41,7 @@ extern "C" {
 #define LIBRBD_SUPPORTS_WATCH 0
 #define LIBRBD_SUPPORTS_AIO_FLUSH 1
 #define LIBRBD_SUPPORTS_INVALIDATE 1
+#define LIBRBD_SUPPORTS_AIO_OPEN 1
 
 #if __GNUC__ >= 4
   #define CEPH_RBD_API    __attribute__ ((visibility ("default")))
@@ -55,6 +56,9 @@ typedef void *rbd_snap_t;
 typedef void *rbd_image_t;
 typedef void *rbd_image_options_t;
 
+typedef void *rbd_completion_t;
+typedef void (*rbd_callback_t)(rbd_completion_t cb, void *arg);
+
 typedef int (*librbd_progress_fn_t)(uint64_t offset, uint64_t total, void *ptr);
 
 typedef struct {
@@ -188,6 +192,9 @@ CEPH_RBD_API int rbd_mirror_peer_set_cluster(rados_ioctx_t io_ctx,
 CEPH_RBD_API int rbd_open(rados_ioctx_t io, const char *name,
                           rbd_image_t *image, const char *snap_name);
 
+CEPH_RBD_API int rbd_aio_open(rados_ioctx_t io, const char *name,
+			      rbd_image_t *image, const char *snap_name,
+			      rbd_completion_t c);
 /**
  * Open an image in read-only mode.
  *
@@ -209,7 +216,11 @@ CEPH_RBD_API int rbd_open(rados_ioctx_t io, const char *name,
  */
 CEPH_RBD_API int rbd_open_read_only(rados_ioctx_t io, const char *name,
                                     rbd_image_t *image, const char *snap_name);
+CEPH_RBD_API int rbd_aio_open_read_only(rados_ioctx_t io, const char *name,
+					rbd_image_t *image, const char *snap_name,
+					rbd_completion_t c);
 CEPH_RBD_API int rbd_close(rbd_image_t image);
+CEPH_RBD_API int rbd_aio_close(rbd_image_t image, rbd_completion_t c);
 CEPH_RBD_API int rbd_resize(rbd_image_t image, uint64_t size);
 CEPH_RBD_API int rbd_resize_with_progress(rbd_image_t image, uint64_t size,
 			     librbd_progress_fn_t cb, void *cbdata);
@@ -423,8 +434,6 @@ CEPH_RBD_API int rbd_break_lock(rbd_image_t image, const char *client,
 /** @} locking */
 
 /* I/O */
-typedef void *rbd_completion_t;
-typedef void (*rbd_callback_t)(rbd_completion_t cb, void *arg);
 CEPH_RBD_API ssize_t rbd_read(rbd_image_t image, uint64_t ofs, size_t len,
                               char *buf);
 /*
diff --git a/src/include/rbd/librbd.hpp b/src/include/rbd/librbd.hpp
index d9bf1de..7a4a2c7 100644
--- a/src/include/rbd/librbd.hpp
+++ b/src/include/rbd/librbd.hpp
@@ -84,9 +84,13 @@ public:
 
   int open(IoCtx& io_ctx, Image& image, const char *name);
   int open(IoCtx& io_ctx, Image& image, const char *name, const char *snapname);
+  int aio_open(IoCtx& io_ctx, Image& image, const char *name,
+	       const char *snapname, RBD::AioCompletion *c);
   // see librbd.h
   int open_read_only(IoCtx& io_ctx, Image& image, const char *name,
 		     const char *snapname);
+  int aio_open_read_only(IoCtx& io_ctx, Image& image, const char *name,
+			 const char *snapname, RBD::AioCompletion *c);
   int list(IoCtx& io_ctx, std::vector<std::string>& names);
   int create(IoCtx& io_ctx, const char *name, uint64_t size, int *order);
   int create2(IoCtx& io_ctx, const char *name, uint64_t size,
@@ -155,6 +159,7 @@ public:
   ~Image();
 
   int close();
+  int aio_close(RBD::AioCompletion *c);
 
   int resize(uint64_t size);
   int resize_with_progress(uint64_t size, ProgressContext& pctx);
diff --git a/src/include/str_map.h b/src/include/str_map.h
index 0bd9de3..6a0370d 100644
--- a/src/include/str_map.h
+++ b/src/include/str_map.h
@@ -17,6 +17,8 @@
 #ifndef CEPH_STRMAP_H
 #define CEPH_STRMAP_H
 
+#define CONST_DELIMS ",;\t\n "
+
 #include <map>
 #include <string>
 #include <sstream>
@@ -89,12 +91,8 @@ extern int get_json_str_map(
  */
 extern int get_str_map(
     const std::string &str,
-    const char *delims,
-    std::map<std::string,std::string> *str_map);
-
-extern int get_str_map(
-    const std::string &str,
-    std::map<std::string,std::string> *str_map);
+    std::map<std::string,std::string> *str_map,
+    const char *delims = CONST_DELIMS);
 
 /**
  * Returns the value of **key** in **str_map** if available.
diff --git a/src/include/types.h b/src/include/types.h
index aebdc52..19982db 100644
--- a/src/include/types.h
+++ b/src/include/types.h
@@ -280,6 +280,7 @@ typedef __u32 epoch_t;       // map epoch  (32bits -> 13 epochs/second for 10 ye
 struct client_t {
   int64_t v;
 
+  // cppcheck-suppress noExplicitConstructor
   client_t(int64_t _v = -2) : v(_v) {}
   
   void encode(bufferlist& bl) const {
@@ -315,6 +316,7 @@ typedef uint64_t _inodeno_t;
 struct inodeno_t {
   _inodeno_t val;
   inodeno_t() : val(0) {}
+  // cppcheck-suppress noExplicitConstructor
   inodeno_t(_inodeno_t v) : val(v) {}
   inodeno_t operator+=(inodeno_t o) { val += o.val; return *this; }
   operator _inodeno_t() const { return val; }
@@ -366,6 +368,7 @@ void dump(const ceph_dir_layout& l, ceph::Formatter *f);
 
 struct prettybyte_t {
   uint64_t v;
+  // cppcheck-suppress noExplicitConstructor
   prettybyte_t(uint64_t _v) : v(_v) {}
 };
 
@@ -389,6 +392,7 @@ inline ostream& operator<<(ostream& out, const prettybyte_t& b)
 
 struct si_t {
   uint64_t v;
+  // cppcheck-suppress noExplicitConstructor
   si_t(uint64_t _v) : v(_v) {}
 };
 
@@ -412,6 +416,7 @@ inline ostream& operator<<(ostream& out, const si_t& b)
 
 struct pretty_si_t {
   uint64_t v;
+  // cppcheck-suppress noExplicitConstructor
   pretty_si_t(uint64_t _v) : v(_v) {}
 };
 
@@ -435,6 +440,7 @@ inline ostream& operator<<(ostream& out, const pretty_si_t& b)
 
 struct kb_t {
   uint64_t v;
+  // cppcheck-suppress noExplicitConstructor
   kb_t(uint64_t _v) : v(_v) {}
 };
 
@@ -483,6 +489,7 @@ inline ostream& operator<<(ostream &oss, health_status_t status) {
 
 struct weightf_t {
   float v;
+  // cppcheck-suppress noExplicitConstructor
   weightf_t(float _v) : v(_v) {}
 };
 
@@ -530,6 +537,7 @@ struct errorcode32_t {
   int32_t code;
 
   errorcode32_t() {}
+  // cppcheck-suppress noExplicitConstructor
   errorcode32_t(int32_t i) : code(i) {}
 
   operator int() const { return code; }
diff --git a/src/include/uuid.h b/src/include/uuid.h
index 03e6b5a..bd888f8 100644
--- a/src/include/uuid.h
+++ b/src/include/uuid.h
@@ -44,10 +44,14 @@ struct uuid_d {
     memcpy(s, boost::uuids::to_string(uuid).c_str(), 37);
   }
 
+ std::string to_string() const {
+    return boost::uuids::to_string(uuid);
+  }
+
   char *bytes() const {
     return (char*)uuid.data;
   }
-  
+
   void encode(bufferlist& bl) const {
     ::encode_raw(uuid, bl);
   }
diff --git a/src/init-ceph.in b/src/init-ceph.in
index 9d4fc71..5baa8fe 100755
--- a/src/init-ceph.in
+++ b/src/init-ceph.in
@@ -26,14 +26,14 @@ grep -qs systemd /proc/1/comm || SYSTEMD_RUN=""
 if [ `dirname $0` = "." ] && [ $PWD != "/etc/init.d" ]; then
     BINDIR=.
     SBINDIR=.
-    LIBDIR=.
+    LIBEXECDIR=.
     ETCDIR=.
     SYSTEMD_RUN=""
     ASSUME_DEV=1
 else
     BINDIR=@bindir@
     SBINDIR=@prefix@/sbin
-    LIBDIR=@libdir@/ceph
+    LIBEXECDIR=@libexecdir@/ceph
     ETCDIR=@sysconfdir@/ceph
     ASSUME_DEV=0
 fi
@@ -65,9 +65,9 @@ usage_exit() {
 
 # behave if we are not completely installed (e.g., Debian "removed,
 # config remains" state)
-test -f $LIBDIR/ceph_common.sh || exit 0
+test -f $LIBEXECDIR/ceph_common.sh || exit 0
 
-. $LIBDIR/ceph_common.sh
+. $LIBEXECDIR/ceph_common.sh
 
 EXIT_STATUS=0
 
@@ -338,11 +338,13 @@ for name in $what; do
 	    [ -n "$wrap" ] && runmode="-f &" && runarg="-f"
 	    [ -n "$max_open_files" ] && files="ulimit -n $max_open_files;"
 
+	    [ -n "$TCMALLOC_MAX_TOTAL_THREAD_CACHE_BYTES" ] && tcmalloc="TCMALLOC_MAX_TOTAL_THREAD_CACHE_BYTES=$TCMALLOC_MAX_TOTAL_THREAD_CACHE_BYTES"
+
 	    if [ -n "$SYSTEMD_RUN" ]; then
                 time=`date +%s.%N` 
-		cmd="$SYSTEMD_RUN --unit=ceph-$name.$time -r bash -c '$files $cmd --cluster $cluster --setuser ceph --setgroup ceph -f'"
+		cmd="$SYSTEMD_RUN --unit=ceph-$name.$time -r bash -c '$files $tcmalloc $cmd --cluster $cluster --setuser ceph --setgroup ceph -f'"
 	    else
-		cmd="$files $wrap $cmd --cluster $cluster --setuser ceph --setgroup ceph $runmode"
+		cmd="$files $tcmalloc $wrap $cmd --cluster $cluster --setuser ceph --setgroup ceph $runmode"
 	    fi
 
 	    if [ $dofsmount -eq 1 ] && [ -n "$fs_devs" ]; then
diff --git a/src/java/Makefile.in b/src/java/Makefile.in
index 959fe08..dce51a0 100644
--- a/src/java/Makefile.in
+++ b/src/java/Makefile.in
@@ -239,12 +239,14 @@ JDK_CPPFLAGS = @JDK_CPPFLAGS@
 KEYUTILS_LIB = @KEYUTILS_LIB@
 LD = @LD@
 LDFLAGS = @LDFLAGS@
-LIBEDIT_CFLAGS = @LIBEDIT_CFLAGS@
-LIBEDIT_LIBS = @LIBEDIT_LIBS@
+LIBDPDK_CFLAGS = @LIBDPDK_CFLAGS@
+LIBDPDK_LIBS = @LIBDPDK_LIBS@
 LIBFUSE_CFLAGS = @LIBFUSE_CFLAGS@
 LIBFUSE_LIBS = @LIBFUSE_LIBS@
 LIBJEMALLOC = @LIBJEMALLOC@
 LIBOBJS = @LIBOBJS@
+LIBPCIACCESS_CFLAGS = @LIBPCIACCESS_CFLAGS@
+LIBPCIACCESS_LIBS = @LIBPCIACCESS_LIBS@
 LIBROCKSDB_CFLAGS = @LIBROCKSDB_CFLAGS@
 LIBROCKSDB_LIBS = @LIBROCKSDB_LIBS@
 LIBS = @LIBS@
@@ -329,7 +331,6 @@ datarootdir = @datarootdir@
 docdir = @docdir@
 dvidir = @dvidir@
 exec_prefix = @exec_prefix@
-group_rgw = @group_rgw@
 host = @host@
 host_alias = @host_alias@
 host_cpu = @host_cpu@
@@ -360,7 +361,6 @@ sharedstatedir = @sharedstatedir@
 srcdir = @srcdir@
 subdirs = @subdirs@
 sysconfdir = @sysconfdir@
-systemd_libexec_dir = @systemd_libexec_dir@
 systemd_unit_dir = @systemd_unit_dir@
 target = @target@
 target_alias = @target_alias@
@@ -370,7 +370,6 @@ target_vendor = @target_vendor@
 top_build_prefix = @top_build_prefix@
 top_builddir = @top_builddir@
 top_srcdir = @top_srcdir@
-user_rgw = @user_rgw@
 JAVA_SRC = \
 	java/com/ceph/fs/CephMount.java \
 	java/com/ceph/fs/CephStat.java \
diff --git a/src/journal/Entry.cc b/src/journal/Entry.cc
index bd26ebe..af6c699 100644
--- a/src/journal/Entry.cc
+++ b/src/journal/Entry.cc
@@ -15,7 +15,7 @@ namespace journal {
 
 namespace {
 
-const uint32_t HEADER_FIXED_SIZE = 17; /// preamble, version, tid
+const uint32_t HEADER_FIXED_SIZE = 25; /// preamble, version, entry tid, tag id
 
 } // anonymous namespace
 
@@ -23,10 +23,10 @@ void Entry::encode(bufferlist &bl) const {
   bufferlist data_bl;
   ::encode(preamble, data_bl);
   ::encode(static_cast<uint8_t>(1), data_bl);
-  ::encode(m_tid, data_bl);
+  ::encode(m_entry_tid, data_bl);
+  ::encode(m_tag_tid, data_bl);
   assert(HEADER_FIXED_SIZE == data_bl.length());
 
-  ::encode(m_tag, data_bl);
   ::encode(m_data, data_bl);
 
   uint32_t crc = data_bl.crc32c(0);
@@ -49,8 +49,8 @@ void Entry::decode(bufferlist::iterator &iter) {
     throw buffer::malformed_input("unknown version: " + stringify(version));
   }
 
-  ::decode(m_tid, iter);
-  ::decode(m_tag, iter);
+  ::decode(m_entry_tid, iter);
+  ::decode(m_tag_tid, iter);
   ::decode(m_data, iter);
   uint32_t end_offset = iter.get_off();
 
@@ -67,8 +67,8 @@ void Entry::decode(bufferlist::iterator &iter) {
 }
 
 void Entry::dump(Formatter *f) const {
-  f->dump_string("tag", m_tag);
-  f->dump_unsigned("tid", m_tid);
+  f->dump_unsigned("tag_tid", m_tag_tid);
+  f->dump_unsigned("entry_tid", m_entry_tid);
 
   std::stringstream data;
   m_data.hexdump(data);
@@ -93,19 +93,6 @@ bool Entry::is_readable(bufferlist::iterator iter, uint32_t *bytes_needed) {
     *bytes_needed = sizeof(uint32_t) - iter.get_remaining();
     return false;
   }
-  uint32_t tag_size;
-  ::decode(tag_size, iter);
-
-  if (iter.get_remaining() < tag_size) {
-    *bytes_needed = tag_size - iter.get_remaining();
-    return false;
-  }
-  iter.advance(tag_size);
-
-  if (iter.get_remaining() < sizeof(uint32_t)) {
-    *bytes_needed = sizeof(uint32_t) - iter.get_remaining();
-    return false;
-  }
   uint32_t data_size;
   ::decode(data_size, iter);
 
@@ -134,21 +121,22 @@ bool Entry::is_readable(bufferlist::iterator iter, uint32_t *bytes_needed) {
 }
 
 void Entry::generate_test_instances(std::list<Entry *> &o) {
-  o.push_back(new Entry("tag1", 123, bufferlist()));
+  o.push_back(new Entry(1, 123, bufferlist()));
 
   bufferlist bl;
   bl.append("data");
-  o.push_back(new Entry("tag2", 123, bl));
+  o.push_back(new Entry(2, 123, bl));
 }
 
 bool Entry::operator==(const Entry& rhs) const {
-  return (m_tag == rhs.m_tag && m_tid == rhs.m_tid &&
+  return (m_tag_tid == rhs.m_tag_tid && m_entry_tid == rhs.m_entry_tid &&
           const_cast<bufferlist&>(m_data).contents_equal(
             const_cast<bufferlist&>(rhs.m_data)));
 }
 
 std::ostream &operator<<(std::ostream &os, const Entry &entry) {
-  os << "Entry[tag=" << entry.get_tag() << ", tid=" << entry.get_tid() << ", "
+  os << "Entry[tag_tid=" << entry.get_tag_tid() << ", "
+     << "entry_tid=" << entry.get_entry_tid() << ", "
      << "data size=" << entry.get_data().length() << "]";
   return os;
 }
diff --git a/src/journal/Entry.h b/src/journal/Entry.h
index 9e85df4..3d0a0d0 100644
--- a/src/journal/Entry.h
+++ b/src/journal/Entry.h
@@ -18,17 +18,17 @@ namespace journal {
 
 class Entry {
 public:
-  Entry() : m_tid() {}
-  Entry(const std::string &tag, uint64_t tid, const bufferlist &data)
-    : m_tag(tag), m_tid(tid), m_data(data)
+  Entry() : m_tag_tid(0), m_entry_tid() {}
+  Entry(uint64_t tag_tid, uint64_t entry_tid, const bufferlist &data)
+    : m_tag_tid(tag_tid), m_entry_tid(entry_tid), m_data(data)
   {
   }
 
-  inline const std::string &get_tag() const {
-    return m_tag;
+  inline uint64_t get_tag_tid() const {
+    return m_tag_tid;
   }
-  inline uint64_t get_tid() const {
-    return m_tid;
+  inline uint64_t get_entry_tid() const {
+    return m_entry_tid;
   }
   inline const bufferlist &get_data() const {
     return m_data;
@@ -46,8 +46,8 @@ public:
 private:
   static const uint64_t preamble = 0x3141592653589793;
 
-  std::string m_tag;
-  uint64_t m_tid;
+  uint64_t m_tag_tid;
+  uint64_t m_entry_tid;
   bufferlist m_data;
 };
 
diff --git a/src/journal/FutureImpl.cc b/src/journal/FutureImpl.cc
index 0ccb46f..c3344ba 100644
--- a/src/journal/FutureImpl.cc
+++ b/src/journal/FutureImpl.cc
@@ -7,10 +7,10 @@
 
 namespace journal {
 
-FutureImpl::FutureImpl(Finisher &finisher, const std::string &tag, uint64_t tid,
+FutureImpl::FutureImpl(Finisher &finisher, uint64_t tag_tid, uint64_t entry_tid,
                        uint64_t commit_tid)
-  : RefCountedObject(NULL, 0), m_finisher(finisher), m_tag(tag), m_tid(tid),
-    m_commit_tid(commit_tid),
+  : RefCountedObject(NULL, 0), m_finisher(finisher), m_tag_tid(tag_tid),
+    m_entry_tid(entry_tid), m_commit_tid(commit_tid),
     m_lock(utils::unique_lock_name("FutureImpl::m_lock", this)), m_safe(false),
     m_consistent(false), m_return_value(0), m_flush_state(FLUSH_STATE_NONE),
     m_consistent_ack(this) {
@@ -137,7 +137,9 @@ void FutureImpl::finish_unlock() {
 }
 
 std::ostream &operator<<(std::ostream &os, const FutureImpl &future) {
-  os << "Future[tag=" << future.m_tag << ", tid=" << future.m_tid << "]";
+  os << "Future[tag_tid=" << future.m_tag_tid << ", "
+     << "entry_tid=" << future.m_entry_tid << ", "
+     << "commit_tid=" << future.m_commit_tid << "]";
   return os;
 }
 
diff --git a/src/journal/FutureImpl.h b/src/journal/FutureImpl.h
index 855c958..e77f049 100644
--- a/src/journal/FutureImpl.h
+++ b/src/journal/FutureImpl.h
@@ -31,16 +31,16 @@ public:
   };
   typedef boost::intrusive_ptr<FlushHandler> FlushHandlerPtr;
 
-  FutureImpl(Finisher &finisher, const std::string &tag, uint64_t tid,
+  FutureImpl(Finisher &finisher, uint64_t tag_tid, uint64_t entry_tid,
              uint64_t commit_tid);
 
   void init(const FutureImplPtr &prev_future);
 
-  inline const std::string &get_tag() const {
-    return m_tag;
+  inline uint64_t get_tag_tid() const {
+    return m_tag_tid;
   }
-  inline uint64_t get_tid() const {
-    return m_tid;
+  inline uint64_t get_entry_tid() const {
+    return m_entry_tid;
   }
   inline uint64_t get_commit_tid() const {
     return m_commit_tid;
@@ -96,8 +96,8 @@ private:
   };
 
   Finisher &m_finisher;
-  std::string m_tag;
-  uint64_t m_tid;
+  uint64_t m_tag_tid;
+  uint64_t m_entry_tid;
   uint64_t m_commit_tid;
 
   mutable Mutex m_lock;
diff --git a/src/journal/JournalMetadata.cc b/src/journal/JournalMetadata.cc
index 0cd935f..f8d5574 100644
--- a/src/journal/JournalMetadata.cc
+++ b/src/journal/JournalMetadata.cc
@@ -7,6 +7,7 @@
 #include "common/Finisher.h"
 #include "common/Timer.h"
 #include "cls/journal/cls_journal_client.h"
+#include <functional>
 #include <set>
 
 #define dout_subsys ceph_subsys_journaler
@@ -17,6 +18,233 @@ namespace journal {
 
 using namespace cls::journal;
 
+namespace {
+
+// does not compare object number
+inline bool entry_positions_less_equal(const ObjectSetPosition &lhs,
+                                       const ObjectSetPosition &rhs) {
+  if (lhs.entry_positions == rhs.entry_positions) {
+    return true;
+  }
+
+  if (lhs.entry_positions.size() < rhs.entry_positions.size()) {
+    return true;
+  } else if (rhs.entry_positions.size() > rhs.entry_positions.size()) {
+    return false;
+  }
+
+  std::map<uint64_t, uint64_t> rhs_tids;
+  for (EntryPositions::const_iterator it = rhs.entry_positions.begin();
+       it != rhs.entry_positions.end(); ++it) {
+    rhs_tids[it->tag_tid] = it->entry_tid;
+  }
+
+  for (EntryPositions::const_iterator it = lhs.entry_positions.begin();
+       it != lhs.entry_positions.end(); ++it) {
+    const EntryPosition &entry_position = *it;
+    if (entry_position.entry_tid < rhs_tids[entry_position.tag_tid]) {
+      return true;
+    }
+  }
+  return false;
+}
+
+struct C_AllocateTag : public Context {
+  CephContext *cct;
+  librados::IoCtx &ioctx;
+  const std::string &oid;
+  AsyncOpTracker &async_op_tracker;
+  uint64_t tag_class;
+  Tag *tag;
+  Context *on_finish;
+
+  bufferlist out_bl;
+
+  C_AllocateTag(CephContext *cct, librados::IoCtx &ioctx,
+                const std::string &oid, AsyncOpTracker &async_op_tracker,
+                uint64_t tag_class, const bufferlist &data, Tag *tag,
+                Context *on_finish)
+    : cct(cct), ioctx(ioctx), oid(oid), async_op_tracker(async_op_tracker),
+      tag_class(tag_class), tag(tag), on_finish(on_finish) {
+    async_op_tracker.start_op();
+    tag->data = data;
+  }
+  virtual ~C_AllocateTag() {
+    async_op_tracker.finish_op();
+  }
+
+  void send() {
+    send_get_next_tag_tid();
+  }
+
+  void send_get_next_tag_tid() {
+    ldout(cct, 20) << "C_AllocateTag: " << __func__ << dendl;
+
+    librados::ObjectReadOperation op;
+    client::get_next_tag_tid_start(&op);
+
+    librados::AioCompletion *comp = librados::Rados::aio_create_completion(
+      this, nullptr, &utils::rados_state_callback<
+        C_AllocateTag, &C_AllocateTag::handle_get_next_tag_tid>);
+
+    out_bl.clear();
+    int r = ioctx.aio_operate(oid, comp, &op, &out_bl);
+    assert(r == 0);
+    comp->release();
+  }
+
+  void handle_get_next_tag_tid(int r) {
+    ldout(cct, 20) << "C_AllocateTag: " << __func__ << ": r=" << r << dendl;
+
+    if (r == 0) {
+      bufferlist::iterator iter = out_bl.begin();
+      r = client::get_next_tag_tid_finish(&iter, &tag->tid);
+    }
+    if (r < 0) {
+      complete(r);
+      return;
+    }
+    send_tag_create();
+  }
+
+  void send_tag_create() {
+    ldout(cct, 20) << "C_AllocateTag: " << __func__ << dendl;
+
+    librados::ObjectWriteOperation op;
+    client::tag_create(&op, tag->tid, tag_class, tag->data);
+
+    librados::AioCompletion *comp = librados::Rados::aio_create_completion(
+      this, nullptr, &utils::rados_state_callback<
+        C_AllocateTag, &C_AllocateTag::handle_tag_create>);
+
+    int r = ioctx.aio_operate(oid, comp, &op);
+    assert(r == 0);
+    comp->release();
+  }
+
+  void handle_tag_create(int r) {
+    ldout(cct, 20) << "C_AllocateTag: " << __func__ << ": r=" << r << dendl;
+
+    if (r == -ESTALE) {
+      send_get_next_tag_tid();
+      return;
+    } else if (r < 0) {
+      complete(r);
+      return;
+    }
+
+    send_get_tag();
+  }
+
+  void send_get_tag() {
+    ldout(cct, 20) << "C_AllocateTag: " << __func__ << dendl;
+
+    librados::ObjectReadOperation op;
+    client::get_tag_start(&op, tag->tid);
+
+    librados::AioCompletion *comp = librados::Rados::aio_create_completion(
+      this, nullptr, &utils::rados_state_callback<
+        C_AllocateTag, &C_AllocateTag::handle_get_tag>);
+
+    out_bl.clear();
+    int r = ioctx.aio_operate(oid, comp, &op, &out_bl);
+    assert(r == 0);
+    comp->release();
+  }
+
+  void handle_get_tag(int r) {
+    ldout(cct, 20) << "C_AllocateTag: " << __func__ << ": r=" << r << dendl;
+
+    if (r == 0) {
+      bufferlist::iterator iter = out_bl.begin();
+
+      cls::journal::Tag journal_tag;
+      r = client::get_tag_finish(&iter, &journal_tag);
+      if (r == 0) {
+        *tag = journal_tag;
+      }
+    }
+    complete(r);
+  }
+
+  virtual void finish(int r) override {
+    on_finish->complete(r);
+  }
+};
+
+struct C_GetTags : public Context {
+  CephContext *cct;
+  librados::IoCtx &ioctx;
+  const std::string &oid;
+  const std::string &client_id;
+  AsyncOpTracker &async_op_tracker;
+  boost::optional<uint64_t> tag_class;
+  JournalMetadata::Tags *tags;
+  Context *on_finish;
+
+  const uint64_t MAX_RETURN = 64;
+  uint64_t start_after_tag_tid = 0;
+  bufferlist out_bl;
+
+  C_GetTags(CephContext *cct, librados::IoCtx &ioctx, const std::string &oid,
+            const std::string &client_id, AsyncOpTracker &async_op_tracker,
+            const boost::optional<uint64_t> &tag_class,
+            JournalMetadata::Tags *tags, Context *on_finish)
+    : cct(cct), ioctx(ioctx), oid(oid), client_id(client_id),
+      async_op_tracker(async_op_tracker), tag_class(tag_class), tags(tags),
+      on_finish(on_finish) {
+    async_op_tracker.start_op();
+  }
+  virtual ~C_GetTags() {
+    async_op_tracker.finish_op();
+  }
+
+  void send() {
+    send_tag_list();
+  }
+
+  void send_tag_list() {
+    librados::ObjectReadOperation op;
+    client::tag_list_start(&op, start_after_tag_tid, MAX_RETURN, client_id,
+                           tag_class);
+
+    librados::AioCompletion *comp = librados::Rados::aio_create_completion(
+      this, nullptr, &utils::rados_state_callback<
+        C_GetTags, &C_GetTags::handle_tag_list>);
+
+    out_bl.clear();
+    int r = ioctx.aio_operate(oid, comp, &op, &out_bl);
+    assert(r == 0);
+    comp->release();
+  }
+
+  void handle_tag_list(int r) {
+    if (r == 0) {
+      std::set<cls::journal::Tag> journal_tags;
+      bufferlist::iterator iter = out_bl.begin();
+      r = client::tag_list_finish(&iter, &journal_tags);
+      if (r == 0) {
+        for (auto &journal_tag : journal_tags) {
+          tags->push_back(journal_tag);
+          start_after_tag_tid = journal_tag.tid;
+        }
+
+        if (journal_tags.size() == MAX_RETURN) {
+          send_tag_list();
+          return;
+        }
+      }
+    }
+    complete(r);
+  }
+
+  virtual void finish(int r) override {
+    on_finish->complete(r);
+  }
+};
+
+} // anonymous namespace
+
 JournalMetadata::JournalMetadata(librados::IoCtx &ioctx,
                                  const std::string &oid,
                                  const std::string &client_id,
@@ -63,6 +291,9 @@ void JournalMetadata::init(Context *on_init) {
 }
 
 void JournalMetadata::shutdown() {
+
+  ldout(m_cct, 20) << __func__ << dendl;
+
   assert(m_initialized);
   {
     Mutex::Locker locker(m_lock);
@@ -74,6 +305,8 @@ void JournalMetadata::shutdown() {
     }
   }
 
+  flush_commit_position();
+
   if (m_timer != NULL) {
     Mutex::Locker locker(m_timer_lock);
     m_timer->shutdown();
@@ -94,9 +327,9 @@ void JournalMetadata::shutdown() {
   m_ioctx.aio_flush();
 }
 
-int JournalMetadata::register_client(const std::string &description) {
+int JournalMetadata::register_client(const bufferlist &data) {
   ldout(m_cct, 10) << __func__ << ": " << m_client_id << dendl;
-  int r = client::client_register(m_ioctx, m_oid, m_client_id, description);
+  int r = client::client_register(m_ioctx, m_oid, m_client_id, data);
   if (r < 0) {
     lderr(m_cct) << "failed to register journal client '" << m_client_id
                  << "': " << cpp_strerror(r) << dendl;
@@ -122,6 +355,22 @@ int JournalMetadata::unregister_client() {
   return 0;
 }
 
+void JournalMetadata::allocate_tag(uint64_t tag_class, const bufferlist &data,
+                                   Tag *tag, Context *on_finish) {
+  C_AllocateTag *ctx = new C_AllocateTag(m_cct, m_ioctx, m_oid,
+                                         m_async_op_tracker, tag_class,
+                                         data, tag, on_finish);
+  ctx->send();
+}
+
+void JournalMetadata::get_tags(const boost::optional<uint64_t> &tag_class,
+                               Tags *tags, Context *on_finish) {
+  C_GetTags *ctx = new C_GetTags(m_cct, m_ioctx, m_oid, m_client_id,
+                                 m_async_op_tracker, tag_class,
+                                 tags, on_finish);
+  ctx->send();
+}
+
 void JournalMetadata::add_listener(Listener *listener) {
   Mutex::Locker locker(m_lock);
   while (m_update_notifications > 0) {
@@ -185,6 +434,9 @@ void JournalMetadata::set_active_set(uint64_t object_set) {
 }
 
 void JournalMetadata::flush_commit_position() {
+
+  ldout(m_cct, 20) << __func__ << dendl;
+
   {
     Mutex::Locker timer_locker(m_timer_lock);
     Mutex::Locker locker(m_lock);
@@ -208,8 +460,8 @@ void JournalMetadata::set_commit_position(
     Mutex::Locker locker(m_lock);
     ldout(m_cct, 20) << __func__ << ": current=" << m_client.commit_position
                      << ", new=" << commit_position << dendl;
-    if (commit_position <= m_client.commit_position ||
-        commit_position <= m_commit_position) {
+    if (entry_positions_less_equal(commit_position, m_client.commit_position) ||
+        entry_positions_less_equal(commit_position, m_commit_position)) {
       stale_ctx = on_safe;
     } else {
       stale_ctx = m_commit_position_ctx;
@@ -226,25 +478,25 @@ void JournalMetadata::set_commit_position(
   }
 }
 
-void JournalMetadata::reserve_tid(const std::string &tag, uint64_t tid) {
+void JournalMetadata::reserve_entry_tid(uint64_t tag_tid, uint64_t entry_tid) {
   Mutex::Locker locker(m_lock);
-  uint64_t &allocated_tid = m_allocated_tids[tag];
-  if (allocated_tid <= tid) {
-    allocated_tid = tid + 1;
+  uint64_t &allocated_entry_tid = m_allocated_entry_tids[tag_tid];
+  if (allocated_entry_tid <= entry_tid) {
+    allocated_entry_tid = entry_tid + 1;
   }
 }
 
-bool JournalMetadata::get_last_allocated_tid(const std::string &tag,
-                                             uint64_t *tid) const {
+bool JournalMetadata::get_last_allocated_entry_tid(uint64_t tag_tid,
+                                                   uint64_t *entry_tid) const {
   Mutex::Locker locker(m_lock);
 
-  AllocatedTids::const_iterator it = m_allocated_tids.find(tag);
-  if (it == m_allocated_tids.end()) {
+  AllocatedEntryTids::const_iterator it = m_allocated_entry_tids.find(tag_tid);
+  if (it == m_allocated_entry_tids.end()) {
     return false;
   }
 
   assert(it->second > 0);
-  *tid = it->second - 1;
+  *entry_tid = it->second - 1;
   return true;
 }
 
@@ -273,7 +525,7 @@ void JournalMetadata::handle_refresh_complete(C_Refresh *refresh, int r) {
   if (r == 0) {
     Mutex::Locker locker(m_lock);
 
-    Client client(m_client_id, "");
+    Client client(m_client_id, bufferlist());
     RegisteredClients::iterator it = refresh->registered_clients.find(client);
     if (it != refresh->registered_clients.end()) {
       m_minimum_set = refresh->minimum_set;
@@ -303,6 +555,9 @@ void JournalMetadata::handle_refresh_complete(C_Refresh *refresh, int r) {
 }
 
 void JournalMetadata::schedule_commit_task() {
+
+  ldout(m_cct, 20) << __func__ << dendl;
+
   assert(m_timer_lock.is_locked());
   assert(m_lock.is_locked());
 
@@ -313,6 +568,9 @@ void JournalMetadata::schedule_commit_task() {
 }
 
 void JournalMetadata::handle_commit_position_task() {
+
+  ldout(m_cct, 20) << __func__ << dendl;
+
   Mutex::Locker locker(m_lock);
 
   librados::ObjectWriteOperation op;
@@ -327,6 +585,8 @@ void JournalMetadata::handle_commit_position_task() {
   int r = m_ioctx.aio_operate(m_oid, comp, &op);
   assert(r == 0);
   comp->release();
+
+  m_commit_position_task_ctx = NULL;
 }
 
 void JournalMetadata::schedule_watch_reset() {
@@ -377,15 +637,17 @@ void JournalMetadata::handle_watch_error(int err) {
 }
 
 uint64_t JournalMetadata::allocate_commit_tid(uint64_t object_num,
-                                              const std::string &tag,
-                                              uint64_t tid) {
+                                              uint64_t tag_tid,
+                                              uint64_t entry_tid) {
   Mutex::Locker locker(m_lock);
   uint64_t commit_tid = ++m_commit_tid;
-  m_pending_commit_tids[commit_tid] = CommitEntry(object_num, tag, tid);
+  m_pending_commit_tids[commit_tid] = CommitEntry(object_num, tag_tid,
+                                                  entry_tid);
 
   ldout(m_cct, 20) << "allocated commit tid: commit_tid=" << commit_tid << " ["
                    << "object_num=" << object_num << ", "
-                   << "tag=" << tag << ", tid=" << tid << "]" << dendl;
+                   << "tag_tid=" << tag_tid << ", entry_tid=" << entry_tid << "]"
+                   << dendl;
   return commit_tid;
 }
 
@@ -418,12 +680,13 @@ bool JournalMetadata::committed(uint64_t commit_tid,
 
     object_set_position->object_number = commit_entry.object_num;
     if (!object_set_position->entry_positions.empty() &&
-        object_set_position->entry_positions.front().tag == commit_entry.tag) {
+        object_set_position->entry_positions.front().tag_tid ==
+          commit_entry.tag_tid) {
       object_set_position->entry_positions.front() = EntryPosition(
-        commit_entry.tag, commit_entry.tid);
+        commit_entry.tag_tid, commit_entry.entry_tid);
     } else {
       object_set_position->entry_positions.push_front(EntryPosition(
-        commit_entry.tag, commit_entry.tid));
+        commit_entry.tag_tid, commit_entry.entry_tid));
     }
     m_pending_commit_tids.erase(it);
     update_commit_position = true;
@@ -431,10 +694,10 @@ bool JournalMetadata::committed(uint64_t commit_tid,
 
   if (update_commit_position) {
     // prune the position to have unique tags in commit-order
-    std::set<std::string> in_use_tags;
+    std::set<uint64_t> in_use_tag_tids;
     EntryPositions::iterator it = object_set_position->entry_positions.begin();
     while (it != object_set_position->entry_positions.end()) {
-      if (!in_use_tags.insert(it->tag).second) {
+      if (!in_use_tag_tids.insert(it->tag_tid).second) {
         it = object_set_position->entry_positions.erase(it);
       } else {
         ++it;
@@ -477,7 +740,7 @@ std::ostream &operator<<(std::ostream &os,
 			 const JournalMetadata::RegisteredClients &clients) {
   os << "[";
   for (JournalMetadata::RegisteredClients::const_iterator c = clients.begin();
-       c != clients.end(); c++) {
+       c != clients.end(); ++c) {
     os << (c == clients.begin() ? "" : ", " ) << *c;
   }
   os << "]";
diff --git a/src/journal/JournalMetadata.h b/src/journal/JournalMetadata.h
index d15bbca..450169b 100644
--- a/src/journal/JournalMetadata.h
+++ b/src/journal/JournalMetadata.h
@@ -14,6 +14,7 @@
 #include "journal/AsyncOpTracker.h"
 #include <boost/intrusive_ptr.hpp>
 #include <boost/noncopyable.hpp>
+#include <boost/optional.hpp>
 #include <list>
 #include <map>
 #include <string>
@@ -33,8 +34,10 @@ public:
   typedef cls::journal::EntryPositions EntryPositions;
   typedef cls::journal::ObjectSetPosition ObjectSetPosition;
   typedef cls::journal::Client Client;
+  typedef cls::journal::Tag Tag;
 
   typedef std::set<Client> RegisteredClients;
+  typedef std::list<Tag> Tags;
 
   struct Listener {
     virtual ~Listener() {};
@@ -51,9 +54,14 @@ public:
   void add_listener(Listener *listener);
   void remove_listener(Listener *listener);
 
-  int register_client(const std::string &description);
+  int register_client(const bufferlist &data);
   int unregister_client();
 
+  void allocate_tag(uint64_t tag_class, const bufferlist &data,
+                    Tag *tag, Context *on_finish);
+  void get_tags(const boost::optional<uint64_t> &tag_class, Tags *tags,
+                Context *on_finish);
+
   inline const std::string &get_client_id() const {
     return m_client_id;
   }
@@ -103,34 +111,35 @@ public:
     *registered_clients = m_registered_clients;
   }
 
-  inline uint64_t allocate_tid(const std::string &tag) {
+  inline uint64_t allocate_entry_tid(uint64_t tag_tid) {
     Mutex::Locker locker(m_lock);
-    return m_allocated_tids[tag]++;
+    return m_allocated_entry_tids[tag_tid]++;
   }
-  void reserve_tid(const std::string &tag, uint64_t tid);
-  bool get_last_allocated_tid(const std::string &tag, uint64_t *tid) const;
+  void reserve_entry_tid(uint64_t tag_tid, uint64_t entry_tid);
+  bool get_last_allocated_entry_tid(uint64_t tag_tid, uint64_t *entry_tid) const;
 
-  uint64_t allocate_commit_tid(uint64_t object_num, const std::string &tag,
-                               uint64_t tid);
+  uint64_t allocate_commit_tid(uint64_t object_num, uint64_t tag_tid,
+                               uint64_t entry_tid);
   bool committed(uint64_t commit_tid, ObjectSetPosition *object_set_position);
 
   void notify_update();
   void async_notify_update();
 
 private:
-  typedef std::map<std::string, uint64_t> AllocatedTids;
+  typedef std::map<uint64_t, uint64_t> AllocatedEntryTids;
   typedef std::list<Listener*> Listeners;
 
   struct CommitEntry {
     uint64_t object_num;
-    std::string tag;
-    uint64_t tid;
+    uint64_t tag_tid;
+    uint64_t entry_tid;
     bool committed;
 
-    CommitEntry() : object_num(0), tid(0), committed(false) {
+    CommitEntry() : object_num(0), tag_tid(0), entry_tid(0), committed(false) {
     }
-    CommitEntry(uint64_t _object_num, const std::string &_tag, uint64_t _tid)
-      : object_num(_object_num), tag(_tag), tid(_tid), committed(false) {
+    CommitEntry(uint64_t _object_num, uint64_t _tag_tid, uint64_t _entry_tid)
+      : object_num(_object_num), tag_tid(_tag_tid), entry_tid(_entry_tid),
+        committed(false) {
     }
   };
   typedef std::map<uint64_t, CommitEntry> CommitTids;
@@ -284,7 +293,7 @@ private:
   RegisteredClients m_registered_clients;
   Client m_client;
 
-  AllocatedTids m_allocated_tids;
+  AllocatedEntryTids m_allocated_entry_tids;
 
   size_t m_update_notifications;
   Cond m_update_cond;
diff --git a/src/journal/JournalPlayer.cc b/src/journal/JournalPlayer.cc
index b78f29c..45505a3 100644
--- a/src/journal/JournalPlayer.cc
+++ b/src/journal/JournalPlayer.cc
@@ -18,7 +18,7 @@ namespace {
 struct C_HandleComplete : public Context {
   ReplayHandler *replay_handler;
 
-  C_HandleComplete(ReplayHandler *_replay_handler)
+  explicit C_HandleComplete(ReplayHandler *_replay_handler)
     : replay_handler(_replay_handler) {
     replay_handler->get();
   }
@@ -33,7 +33,7 @@ struct C_HandleComplete : public Context {
 struct C_HandleEntriesAvailable : public Context {
   ReplayHandler *replay_handler;
 
-  C_HandleEntriesAvailable(ReplayHandler *_replay_handler)
+  explicit C_HandleEntriesAvailable(ReplayHandler *_replay_handler)
       : replay_handler(_replay_handler) {
     replay_handler->get();
   }
@@ -55,7 +55,7 @@ JournalPlayer::JournalPlayer(librados::IoCtx &ioctx,
     m_journal_metadata(journal_metadata), m_replay_handler(replay_handler),
     m_lock("JournalPlayer::m_lock"), m_state(STATE_INIT), m_splay_offset(0),
     m_watch_enabled(false), m_watch_scheduled(false), m_watch_interval(0),
-    m_commit_object(0) {
+    m_commit_object(0), m_commit_tag_tid(0) {
   m_replay_handler->get();
   m_ioctx.dup(ioctx);
   m_cct = reinterpret_cast<CephContext *>(m_ioctx.cct());
@@ -66,13 +66,13 @@ JournalPlayer::JournalPlayer(librados::IoCtx &ioctx,
     uint8_t splay_width = m_journal_metadata->get_splay_width();
     m_splay_offset = commit_position.object_number % splay_width;
     m_commit_object = commit_position.object_number;
-    m_commit_tag = commit_position.entry_positions.front().tag;
+    m_commit_tag_tid = commit_position.entry_positions.front().tag_tid;
 
     for (EntryPositions::const_iterator it =
            commit_position.entry_positions.begin();
          it != commit_position.entry_positions.end(); ++it) {
       const EntryPosition &entry_position = *it;
-      m_commit_tids[entry_position.tag] = entry_position.tid;
+      m_commit_tids[entry_position.tag_tid] = entry_position.entry_tid;
     }
   }
 }
@@ -156,9 +156,10 @@ bool JournalPlayer::try_pop_front(Entry *entry, uint64_t *commit_tid) {
   object_player->front(entry);
   object_player->pop_front();
 
-  uint64_t last_tid;
-  if (m_journal_metadata->get_last_allocated_tid(entry->get_tag(), &last_tid) &&
-      entry->get_tid() != last_tid + 1) {
+  uint64_t last_entry_tid;
+  if (m_journal_metadata->get_last_allocated_entry_tid(
+        entry->get_tag_tid(), &last_entry_tid) &&
+      entry->get_entry_tid() != last_entry_tid + 1) {
     lderr(m_cct) << "missing prior journal entry: " << *entry << dendl;
 
     m_state = STATE_ERROR;
@@ -171,10 +172,10 @@ bool JournalPlayer::try_pop_front(Entry *entry, uint64_t *commit_tid) {
   if (!object_player->empty()) {
     Entry peek_entry;
     object_player->front(&peek_entry);
-    if (peek_entry.get_tag() == entry->get_tag() ||
-        (m_journal_metadata->get_last_allocated_tid(peek_entry.get_tag(),
-                                                    &last_tid) &&
-         last_tid + 1 != peek_entry.get_tid())) {
+    if (peek_entry.get_tag_tid() == entry->get_tag_tid() ||
+        (m_journal_metadata->get_last_allocated_entry_tid(
+           peek_entry.get_tag_tid(), &last_entry_tid) &&
+         last_entry_tid + 1 != peek_entry.get_entry_tid())) {
       advance_splay_object();
     }
   } else {
@@ -182,9 +183,11 @@ bool JournalPlayer::try_pop_front(Entry *entry, uint64_t *commit_tid) {
     remove_empty_object_player(object_player);
   }
 
-  m_journal_metadata->reserve_tid(entry->get_tag(), entry->get_tid());
+  m_journal_metadata->reserve_entry_tid(entry->get_tag_tid(),
+                                        entry->get_entry_tid());
   *commit_tid = m_journal_metadata->allocate_commit_tid(
-    object_player->get_object_number(), entry->get_tag(), entry->get_tid());
+    object_player->get_object_number(), entry->get_tag_tid(),
+    entry->get_entry_tid());
   return true;
 }
 
@@ -249,14 +252,15 @@ int JournalPlayer::process_prefetch(uint64_t object_number) {
       Entry entry;
       while (!m_commit_tids.empty() && !object_player->empty()) {
         object_player->front(&entry);
-        if (entry.get_tid() > m_commit_tids[entry.get_tag()]) {
+        if (entry.get_entry_tid() > m_commit_tids[entry.get_tag_tid()]) {
           ldout(m_cct, 10) << "located next uncommitted entry: " << entry
                            << dendl;
           break;
         }
 
         ldout(m_cct, 20) << "skipping committed entry: " << entry << dendl;
-        m_journal_metadata->reserve_tid(entry.get_tag(), entry.get_tid());
+        m_journal_metadata->reserve_entry_tid(entry.get_tag_tid(),
+                                              entry.get_entry_tid());
         object_player->pop_front();
       }
 
@@ -269,7 +273,7 @@ int JournalPlayer::process_prefetch(uint64_t object_number) {
         } else {
           Entry entry;
           object_player->front(&entry);
-          if (entry.get_tag() == m_commit_tag) {
+          if (entry.get_tag_tid() == m_commit_tag_tid) {
             advance_splay_object();
           }
         }
diff --git a/src/journal/JournalPlayer.h b/src/journal/JournalPlayer.h
index 49680ad..1e7bdff 100644
--- a/src/journal/JournalPlayer.h
+++ b/src/journal/JournalPlayer.h
@@ -40,7 +40,7 @@ public:
 
 private:
   typedef std::set<uint8_t> PrefetchSplayOffsets;
-  typedef std::map<std::string, uint64_t> AllocatedTids;
+  typedef std::map<uint64_t, uint64_t> AllocatedEntryTids;
   typedef std::map<uint64_t, ObjectPlayerPtr> ObjectPlayers;
   typedef std::map<uint8_t, ObjectPlayers> SplayedObjectPlayers;
 
@@ -96,8 +96,8 @@ private:
   PrefetchSplayOffsets m_prefetch_splay_offsets;
   SplayedObjectPlayers m_object_players;
   uint64_t m_commit_object;
-  std::string m_commit_tag;
-  AllocatedTids m_commit_tids;
+  uint64_t m_commit_tag_tid;
+  AllocatedEntryTids m_commit_tids;
 
   void advance_splay_object();
 
diff --git a/src/journal/JournalRecorder.cc b/src/journal/JournalRecorder.cc
index 4fb7765..21ecc2c 100644
--- a/src/journal/JournalRecorder.cc
+++ b/src/journal/JournalRecorder.cc
@@ -2,6 +2,7 @@
 // vim: ts=8 sw=2 smarttab
 
 #include "journal/JournalRecorder.h"
+#include "common/Finisher.h"
 #include "journal/Entry.h"
 #include "journal/Utils.h"
 
@@ -11,6 +12,36 @@
 
 namespace journal {
 
+namespace {
+
+struct C_Flush : public Context {
+  JournalMetadataPtr journal_metadata;
+  Context *on_finish;
+  atomic_t pending_flushes;
+  int ret_val;
+
+  C_Flush(JournalMetadataPtr _journal_metadata, Context *_on_finish,
+          size_t _pending_flushes)
+    : journal_metadata(_journal_metadata), on_finish(_on_finish),
+      pending_flushes(_pending_flushes), ret_val(0) {
+  }
+
+  virtual void complete(int r) {
+    if (r < 0 && ret_val == 0) {
+      ret_val = r;
+    }
+    if (pending_flushes.dec() == 0) {
+      // ensure all prior callback have been flushed as well
+      journal_metadata->get_finisher().queue(on_finish, ret_val);
+      delete this;
+    }
+  }
+  virtual void finish(int r) {
+  }
+};
+
+} // anonymous namespace
+
 JournalRecorder::JournalRecorder(librados::IoCtx &ioctx,
                                  const std::string &object_oid_prefix,
                                  const JournalMetadataPtr& journal_metadata,
@@ -39,24 +70,25 @@ JournalRecorder::~JournalRecorder() {
   m_journal_metadata->remove_listener(&m_listener);
 }
 
-Future JournalRecorder::append(const std::string &tag,
+Future JournalRecorder::append(uint64_t tag_tid,
                                const bufferlist &payload_bl) {
   Mutex::Locker locker(m_lock);
 
-  uint64_t tid = m_journal_metadata->allocate_tid(tag);
+  uint64_t entry_tid = m_journal_metadata->allocate_entry_tid(tag_tid);
   uint8_t splay_width = m_journal_metadata->get_splay_width();
-  uint8_t splay_offset = tid % splay_width;
+  uint8_t splay_offset = entry_tid % splay_width;
 
   ObjectRecorderPtr object_ptr = get_object(splay_offset);
   uint64_t commit_tid = m_journal_metadata->allocate_commit_tid(
-    object_ptr->get_object_number(), tag, tid);
+    object_ptr->get_object_number(), tag_tid, entry_tid);
   FutureImplPtr future(new FutureImpl(m_journal_metadata->get_finisher(),
-                                      tag, tid, commit_tid));
+                                      tag_tid, entry_tid, commit_tid));
   future->init(m_prev_future);
   m_prev_future = future;
 
   bufferlist entry_bl;
-  ::encode(Entry(future->get_tag(), future->get_tid(), payload_bl), entry_bl);
+  ::encode(Entry(future->get_tag_tid(), future->get_entry_tid(), payload_bl),
+           entry_bl);
 
   AppendBuffers append_buffers;
   append_buffers.push_back(std::make_pair(future, entry_bl));
@@ -76,14 +108,15 @@ void JournalRecorder::flush(Context *on_safe) {
   {
     Mutex::Locker locker(m_lock);
 
-    ctx = new C_Flush(on_safe, m_object_ptrs.size());
+    ctx = new C_Flush(m_journal_metadata, on_safe, m_object_ptrs.size() + 1);
     for (ObjectRecorderPtrs::iterator it = m_object_ptrs.begin();
          it != m_object_ptrs.end(); ++it) {
       it->second->flush(ctx);
     }
   }
 
-  ctx->unblock();
+  // avoid holding the lock in case there is nothing to flush
+  ctx->complete(0);
 }
 
 ObjectRecorderPtr JournalRecorder::get_object(uint8_t splay_offset) {
diff --git a/src/journal/JournalRecorder.h b/src/journal/JournalRecorder.h
index 4c3489f..be92298 100644
--- a/src/journal/JournalRecorder.h
+++ b/src/journal/JournalRecorder.h
@@ -28,7 +28,7 @@ public:
                   double flush_age);
   ~JournalRecorder();
 
-  Future append(const std::string &tag, const bufferlist &bl);
+  Future append(uint64_t tag_tid, const bufferlist &bl);
   void flush(Context *on_safe);
 
   ObjectRecorderPtr get_object(uint8_t splay_offset);
@@ -58,32 +58,6 @@ private:
     }
   };
 
-  struct C_Flush : public Context {
-    Context *on_finish;
-    atomic_t pending_flushes;
-    int ret_val;
-
-    C_Flush(Context *_on_finish, size_t _pending_flushes)
-      : on_finish(_on_finish), pending_flushes(_pending_flushes + 1),
-        ret_val(0) {
-    }
-
-    void unblock() {
-      complete(0);
-    }
-    virtual void complete(int r) {
-      if (r < 0 && ret_val == 0) {
-        ret_val = r;
-      }
-      if (pending_flushes.dec() == 0) {
-        on_finish->complete(ret_val);
-        delete this;
-      }
-    }
-    virtual void finish(int r) {
-    }
-  };
-
   librados::IoCtx m_ioctx;
   CephContext *m_cct;
   std::string m_object_oid_prefix;
diff --git a/src/journal/Journaler.cc b/src/journal/Journaler.cc
index 25a50b8..1eb7e2c 100644
--- a/src/journal/Journaler.cc
+++ b/src/journal/Journaler.cc
@@ -159,14 +159,29 @@ int Journaler::remove(bool force) {
   return 0;
 }
 
-int Journaler::register_client(const std::string &description) {
-  return m_metadata->register_client(description);
+int Journaler::register_client(const bufferlist &data) {
+  return m_metadata->register_client(data);
 }
 
 int Journaler::unregister_client() {
   return m_metadata->unregister_client();
 }
 
+void Journaler::allocate_tag(const bufferlist &data, cls::journal::Tag *tag,
+                             Context *on_finish) {
+  m_metadata->allocate_tag(cls::journal::Tag::TAG_CLASS_NEW, data, tag,
+                           on_finish);
+}
+
+void Journaler::allocate_tag(uint64_t tag_class, const bufferlist &data,
+                             cls::journal::Tag *tag, Context *on_finish) {
+  m_metadata->allocate_tag(tag_class, data, tag, on_finish);
+}
+
+void Journaler::get_tags(uint64_t tag_class, Tags *tags, Context *on_finish) {
+  m_metadata->get_tags(tag_class, tags, on_finish);
+}
+
 void Journaler::start_replay(ReplayHandler *replay_handler) {
   create_player(replay_handler);
   m_player->prefetch();
@@ -179,7 +194,7 @@ void Journaler::start_live_replay(ReplayHandler *replay_handler,
 }
 
 bool Journaler::try_pop_front(ReplayEntry *replay_entry,
-			      std::string* tag) {
+			      uint64_t *tag_tid) {
   assert(m_player != NULL);
 
   Entry entry;
@@ -189,8 +204,8 @@ bool Journaler::try_pop_front(ReplayEntry *replay_entry,
   }
 
   *replay_entry = ReplayEntry(entry.get_data(), commit_tid);
-  if (tag != NULL) {
-    *tag = entry.get_tag();
+  if (tag_tid != nullptr) {
+    *tag_tid = entry.get_tag_tid();
   }
   return true;
 }
@@ -229,8 +244,8 @@ void Journaler::stop_append(Context *on_safe) {
   m_recorder = NULL;
 }
 
-Future Journaler::append(const std::string &tag, const bufferlist &payload_bl) {
-  return m_recorder->append(tag, payload_bl);
+Future Journaler::append(uint64_t tag_tid, const bufferlist &payload_bl) {
+  return m_recorder->append(tag_tid, payload_bl);
 }
 
 void Journaler::flush(Context *on_safe) {
diff --git a/src/journal/Journaler.h b/src/journal/Journaler.h
index 27f77c7..6702eb4 100644
--- a/src/journal/Journaler.h
+++ b/src/journal/Journaler.h
@@ -9,8 +9,10 @@
 #include "include/Context.h"
 #include "include/rados/librados.hpp"
 #include "journal/Future.h"
-#include <string>
+#include "cls/journal/cls_journal_types.h"
+#include <list>
 #include <map>
+#include <string>
 #include "include/assert.h"
 
 class SafeTimer;
@@ -26,6 +28,7 @@ class ReplayHandler;
 
 class Journaler {
 public:
+  typedef std::list<cls::journal::Tag> Tags;
 
   static std::string header_oid(const std::string &journal_id);
   static std::string object_oid_prefix(int pool_id,
@@ -42,16 +45,22 @@ public:
   void init(Context *on_init);
   void shutdown();
 
-  int register_client(const std::string &description);
+  int register_client(const bufferlist &data);
   int unregister_client();
 
+  void allocate_tag(const bufferlist &data, cls::journal::Tag *tag,
+                    Context *on_finish);
+  void allocate_tag(uint64_t tag_class, const bufferlist &data,
+                    cls::journal::Tag *tag, Context *on_finish);
+  void get_tags(uint64_t tag_class, Tags *tags, Context *on_finish);
+
   void start_replay(ReplayHandler *replay_handler);
   void start_live_replay(ReplayHandler *replay_handler, double interval);
-  bool try_pop_front(ReplayEntry *replay_entry, std::string* tag = NULL);
+  bool try_pop_front(ReplayEntry *replay_entry, uint64_t *tag_tid = nullptr);
   void stop_replay();
 
   void start_append(int flush_interval, uint64_t flush_bytes, double flush_age);
-  Future append(const std::string &tag, const bufferlist &bl);
+  Future append(uint64_t tag_tid, const bufferlist &bl);
   void flush(Context *on_safe);
   void stop_append(Context *on_safe);
 
diff --git a/src/journal/ObjectPlayer.cc b/src/journal/ObjectPlayer.cc
index 9d58d8e..56459a5 100644
--- a/src/journal/ObjectPlayer.cc
+++ b/src/journal/ObjectPlayer.cc
@@ -142,7 +142,8 @@ int ObjectPlayer::handle_fetch_complete(int r, const bufferlist &bl) {
     ::decode(entry, iter);
     ldout(m_cct, 20) << ": " << entry << " decoded" << dendl;
 
-    EntryKey entry_key(std::make_pair(entry.get_tag(), entry.get_tid()));
+    EntryKey entry_key(std::make_pair(entry.get_tag_tid(),
+                                      entry.get_entry_tid()));
     if (m_entry_keys.find(entry_key) == m_entry_keys.end()) {
       m_entry_keys[entry_key] = m_entries.insert(m_entries.end(), entry);
     } else {
diff --git a/src/journal/ObjectPlayer.h b/src/journal/ObjectPlayer.h
index 5fb9c27..22b51f6 100644
--- a/src/journal/ObjectPlayer.h
+++ b/src/journal/ObjectPlayer.h
@@ -68,7 +68,7 @@ public:
   }
 
 private:
-  typedef std::pair<std::string, uint64_t> EntryKey;
+  typedef std::pair<uint64_t, uint64_t> EntryKey;
   typedef boost::unordered_map<EntryKey, Entries::iterator> EntryKeys;
 
   struct C_Fetch : public Context {
diff --git a/src/journal/Utils.h b/src/journal/Utils.h
index 1169ac9..e29f359 100644
--- a/src/journal/Utils.h
+++ b/src/journal/Utils.h
@@ -11,6 +11,13 @@
 namespace journal {
 namespace utils {
 
+template <typename T, void(T::*MF)(int)>
+void rados_state_callback(rados_completion_t c, void *arg) {
+  T *obj = reinterpret_cast<T*>(arg);
+  int r = rados_aio_get_return_value(c);
+  (obj->*MF)(r);
+}
+
 std::string get_object_name(const std::string &prefix, uint64_t number);
 
 std::string unique_lock_name(const std::string &name, void *address);
diff --git a/src/kv/KineticStore.cc b/src/kv/KineticStore.cc
index 71559f0..7a23714 100644
--- a/src/kv/KineticStore.cc
+++ b/src/kv/KineticStore.cc
@@ -11,7 +11,7 @@
 using std::string;
 #include "common/perf_counters.h"
 
-#define dout_subsys ceph_subsys_keyvaluestore
+#define dout_subsys ceph_subsys_kinetic
 
 int KineticStore::init()
 {
diff --git a/src/kv/KineticStore.h b/src/kv/KineticStore.h
index f275b89..c2be802 100644
--- a/src/kv/KineticStore.h
+++ b/src/kv/KineticStore.h
@@ -45,7 +45,7 @@ class KineticStore : public KeyValueDB {
   int do_open(ostream &out, bool create_if_missing);
 
 public:
-  KineticStore(CephContext *c);
+  explicit KineticStore(CephContext *c);
   ~KineticStore();
 
   static int _test_init(CephContext *c);
@@ -81,7 +81,7 @@ public:
     vector<KineticOp> ops;
     KineticStore *db;
 
-    KineticTransactionImpl(KineticStore *db) : db(db) {}
+    explicit KineticTransactionImpl(KineticStore *db) : db(db) {}
     void set(
       const string &prefix,
       const string &k,
@@ -106,6 +106,7 @@ public:
     const std::set<string> &key,
     std::map<string, bufferlist> *out
     );
+  using KeyValueDB::get;
 
   class KineticWholeSpaceIteratorImpl :
     public KeyValueDB::WholeSpaceIteratorImpl {
@@ -114,7 +115,7 @@ public:
     kinetic::BlockingKineticConnection *kinetic_conn;
     kinetic::KineticStatus kinetic_status;
   public:
-    KineticWholeSpaceIteratorImpl(kinetic::BlockingKineticConnection *conn);
+    explicit KineticWholeSpaceIteratorImpl(kinetic::BlockingKineticConnection *conn);
     virtual ~KineticWholeSpaceIteratorImpl() { }
 
     int seek_to_first() {
diff --git a/src/kv/LevelDBStore.cc b/src/kv/LevelDBStore.cc
index 5ea75b9..c47532d 100644
--- a/src/kv/LevelDBStore.cc
+++ b/src/kv/LevelDBStore.cc
@@ -18,7 +18,7 @@ using std::string;
 class CephLevelDBLogger : public leveldb::Logger {
   CephContext *cct;
 public:
-  CephLevelDBLogger(CephContext *c) : cct(c) {
+  explicit CephLevelDBLogger(CephContext *c) : cct(c) {
     cct->get();
   }
   ~CephLevelDBLogger() {
diff --git a/src/kv/LevelDBStore.h b/src/kv/LevelDBStore.h
index 8a201df..8260f8b 100644
--- a/src/kv/LevelDBStore.h
+++ b/src/kv/LevelDBStore.h
@@ -73,7 +73,7 @@ class LevelDBStore : public KeyValueDB {
   class CompactThread : public Thread {
     LevelDBStore *db;
   public:
-    CompactThread(LevelDBStore *d) : db(d) {}
+    explicit CompactThread(LevelDBStore *d) : db(d) {}
     void *entry() {
       db->compact_thread_entry();
       return NULL;
@@ -185,7 +185,7 @@ public:
   public:
     leveldb::WriteBatch bat;
     LevelDBStore *db;
-    LevelDBTransactionImpl(LevelDBStore *db) : db(db) {}
+    explicit LevelDBTransactionImpl(LevelDBStore *db) : db(db) {}
     void set(
       const string &prefix,
       const string &k,
@@ -220,7 +220,7 @@ public:
   protected:
     boost::scoped_ptr<leveldb::Iterator> dbiter;
   public:
-    LevelDBWholeSpaceIteratorImpl(leveldb::Iterator *iter) :
+    explicit LevelDBWholeSpaceIteratorImpl(leveldb::Iterator *iter) :
       dbiter(iter) { }
     virtual ~LevelDBWholeSpaceIteratorImpl() { }
 
diff --git a/src/kv/RocksDBStore.cc b/src/kv/RocksDBStore.cc
index 794a254..923537c 100644
--- a/src/kv/RocksDBStore.cc
+++ b/src/kv/RocksDBStore.cc
@@ -35,7 +35,7 @@ using std::string;
 class CephRocksdbLogger : public rocksdb::Logger {
   CephContext *cct;
 public:
-  CephRocksdbLogger(CephContext *c) : cct(c) {
+  explicit CephRocksdbLogger(CephContext *c) : cct(c) {
     cct->get();
   }
   ~CephRocksdbLogger() {
@@ -118,7 +118,7 @@ int RocksDBStore::tryInterpret(const string key, const string val, rocksdb::Opti
 int RocksDBStore::ParseOptionsFromString(const string opt_str, rocksdb::Options &opt)
 {
   map<string, string> str_map;
-  int r = get_str_map(opt_str, ",\n;", &str_map);
+  int r = get_str_map(opt_str, &str_map, ",\n;");
   if (r < 0)
     return r;
   map<string, string>::iterator it;
@@ -218,6 +218,14 @@ int RocksDBStore::do_open(ostream &out, bool create_if_missing)
     opt.env = static_cast<rocksdb::Env*>(priv);
   }
 
+  auto cache = rocksdb::NewLRUCache(g_conf->rocksdb_cache_size);
+  rocksdb::BlockBasedTableOptions bbt_opts;
+  bbt_opts.block_size = g_conf->rocksdb_block_size;
+  bbt_opts.block_cache = cache;
+  opt.table_factory.reset(rocksdb::NewBlockBasedTableFactory(bbt_opts));
+  dout(10) << __func__ << " set block size to " << g_conf->rocksdb_block_size
+           << " cache size to " << g_conf->rocksdb_cache_size << dendl;
+
   status = rocksdb::DB::Open(opt, path, &db);
   if (!status.ok()) {
     derr << status.ToString() << dendl;
@@ -456,7 +464,8 @@ int RocksDBStore::split_key(rocksdb::Slice in, string *prefix, string *key)
 void RocksDBStore::compact()
 {
   logger->inc(l_rocksdb_compact);
-  db->CompactRange(NULL, NULL);
+  rocksdb::CompactRangeOptions options;
+  db->CompactRange(options, nullptr, nullptr);
 }
 
 
@@ -529,9 +538,10 @@ bool RocksDBStore::check_omap_dir(string &omap_dir)
 }
 void RocksDBStore::compact_range(const string& start, const string& end)
 {
-    rocksdb::Slice cstart(start);
-    rocksdb::Slice cend(end);
-    db->CompactRange(&cstart, &cend);
+  rocksdb::CompactRangeOptions options;
+  rocksdb::Slice cstart(start);
+  rocksdb::Slice cend(end);
+  db->CompactRange(options, &cstart, &cend);
 }
 RocksDBStore::RocksDBWholeSpaceIteratorImpl::~RocksDBWholeSpaceIteratorImpl()
 {
diff --git a/src/kv/RocksDBStore.h b/src/kv/RocksDBStore.h
index d2caf8a..63cbc5a 100644
--- a/src/kv/RocksDBStore.h
+++ b/src/kv/RocksDBStore.h
@@ -72,7 +72,7 @@ class RocksDBStore : public KeyValueDB {
   class CompactThread : public Thread {
     RocksDBStore *db;
   public:
-    CompactThread(RocksDBStore *d) : db(d) {}
+    explicit CompactThread(RocksDBStore *d) : db(d) {}
     void *entry() {
       db->compact_thread_entry();
       return NULL;
@@ -142,7 +142,7 @@ public:
     rocksdb::WriteBatch *bat;
     RocksDBStore *db;
 
-    RocksDBTransactionImpl(RocksDBStore *_db);
+    explicit RocksDBTransactionImpl(RocksDBStore *_db);
     ~RocksDBTransactionImpl();
     void set(
       const string &prefix,
@@ -179,7 +179,7 @@ public:
   protected:
     rocksdb::Iterator *dbiter;
   public:
-    RocksDBWholeSpaceIteratorImpl(rocksdb::Iterator *iter) :
+    explicit RocksDBWholeSpaceIteratorImpl(rocksdb::Iterator *iter) :
       dbiter(iter) { }
     //virtual ~RocksDBWholeSpaceIteratorImpl() { }
     ~RocksDBWholeSpaceIteratorImpl();
diff --git a/src/libcephfs.cc b/src/libcephfs.cc
index c5bb2d4..400669a 100644
--- a/src/libcephfs.cc
+++ b/src/libcephfs.cc
@@ -37,7 +37,7 @@
 struct ceph_mount_info
 {
 public:
-  ceph_mount_info(CephContext *cct_)
+  explicit ceph_mount_info(CephContext *cct_)
     : mounted(false),
       inited(false),
       client(NULL),
diff --git a/src/librados/AioCompletionImpl.h b/src/librados/AioCompletionImpl.h
index efcd596..aaadc1a 100644
--- a/src/librados/AioCompletionImpl.h
+++ b/src/librados/AioCompletionImpl.h
@@ -170,7 +170,7 @@ namespace librados {
 struct C_AioComplete : public Context {
   AioCompletionImpl *c;
 
-  C_AioComplete(AioCompletionImpl *cc) : c(cc) {
+  explicit C_AioComplete(AioCompletionImpl *cc) : c(cc) {
     c->_get();
   }
 
@@ -189,7 +189,7 @@ struct C_AioComplete : public Context {
 struct C_AioSafe : public Context {
   AioCompletionImpl *c;
 
-  C_AioSafe(AioCompletionImpl *cc) : c(cc) {
+  explicit C_AioSafe(AioCompletionImpl *cc) : c(cc) {
     c->_get();
   }
 
@@ -216,7 +216,7 @@ struct C_AioSafe : public Context {
 struct C_AioCompleteAndSafe : public Context {
   AioCompletionImpl *c;
 
-  C_AioCompleteAndSafe(AioCompletionImpl *cc) : c(cc) {
+  explicit C_AioCompleteAndSafe(AioCompletionImpl *cc) : c(cc) {
     c->get();
   }
 
diff --git a/src/librados/IoCtxImpl.cc b/src/librados/IoCtxImpl.cc
index 24eda4f..b70c00a 100644
--- a/src/librados/IoCtxImpl.cc
+++ b/src/librados/IoCtxImpl.cc
@@ -181,14 +181,24 @@ int librados::IoCtxImpl::set_snap_write_context(snapid_t seq, vector<snapid_t>&
   return 0;
 }
 
-uint32_t librados::IoCtxImpl::get_object_hash_position(const std::string& oid)
+int librados::IoCtxImpl::get_object_hash_position(
+    const std::string& oid, uint32_t *hash_position)
 {
-  return objecter->get_object_hash_position(poolid, oid, oloc.nspace);
+  int64_t r = objecter->get_object_hash_position(poolid, oid, oloc.nspace);
+  if (r < 0)
+    return r;
+  *hash_position = (uint32_t)r;
+  return 0;
 }
 
-uint32_t librados::IoCtxImpl::get_object_pg_hash_position(const std::string& oid)
+int librados::IoCtxImpl::get_object_pg_hash_position(
+    const std::string& oid, uint32_t *pg_hash_position)
 {
-  return objecter->get_object_pg_hash_position(poolid, oid, oloc.nspace);
+  int64_t r = objecter->get_object_pg_hash_position(poolid, oid, oloc.nspace);
+  if (r < 0)
+    return r;
+  *pg_hash_position = (uint32_t)r;
+  return 0;
 }
 
 void librados::IoCtxImpl::queue_aio_write(AioCompletionImpl *c)
@@ -762,7 +772,7 @@ int librados::IoCtxImpl::aio_read(const object_t oid, AioCompletionImpl *c,
 class C_ObjectOperation : public Context {
 public:
   ::ObjectOperation m_ops;
-  C_ObjectOperation(Context *c) : m_ctx(c) {}
+  explicit C_ObjectOperation(Context *c) : m_ctx(c) {}
   virtual void finish(int r) {
     m_ctx->complete(r);
   }
diff --git a/src/librados/IoCtxImpl.h b/src/librados/IoCtxImpl.h
index 25166c0..ff3c235 100644
--- a/src/librados/IoCtxImpl.h
+++ b/src/librados/IoCtxImpl.h
@@ -92,8 +92,8 @@ struct librados::IoCtxImpl {
 
   const string& get_cached_pool_name();
 
-  uint32_t get_object_hash_position(const std::string& oid);
-  uint32_t get_object_pg_hash_position(const std::string& oid);
+  int get_object_hash_position(const std::string& oid, uint32_t *hash_postion);
+  int get_object_pg_hash_position(const std::string& oid, uint32_t *pg_hash_position);
 
   ::ObjectOperation *prepare_assert_ops(::ObjectOperation *op);
 
@@ -161,7 +161,7 @@ struct librados::IoCtxImpl {
 
   struct C_aio_Ack : public Context {
     librados::AioCompletionImpl *c;
-    C_aio_Ack(AioCompletionImpl *_c);
+    explicit C_aio_Ack(AioCompletionImpl *_c);
     void finish(int r);
   };
 
@@ -175,7 +175,7 @@ struct librados::IoCtxImpl {
 
   struct C_aio_Safe : public Context {
     AioCompletionImpl *c;
-    C_aio_Safe(AioCompletionImpl *_c);
+    explicit C_aio_Safe(AioCompletionImpl *_c);
     void finish(int r);
   };
 
diff --git a/src/librados/PoolAsyncCompletionImpl.h b/src/librados/PoolAsyncCompletionImpl.h
index 443b2c2..b3fba49 100644
--- a/src/librados/PoolAsyncCompletionImpl.h
+++ b/src/librados/PoolAsyncCompletionImpl.h
@@ -91,7 +91,7 @@ namespace librados {
     PoolAsyncCompletionImpl *c;
 
   public:
-    C_PoolAsync_Safe(PoolAsyncCompletionImpl *_c) : c(_c) {
+    explicit C_PoolAsync_Safe(PoolAsyncCompletionImpl *_c) : c(_c) {
       c->get();
     }
     ~C_PoolAsync_Safe() {
diff --git a/src/librados/RadosClient.cc b/src/librados/RadosClient.cc
index 09dd64a..8994395 100644
--- a/src/librados/RadosClient.cc
+++ b/src/librados/RadosClient.cc
@@ -86,10 +86,8 @@ int64_t librados::RadosClient::lookup_pool(const char *name)
     return r;
   }
 
-  const OSDMap *osdmap = objecter->get_osdmap_read();
-  int64_t ret = osdmap->lookup_pg_pool_name(name);
-  objecter->put_osdmap_read();
-  return ret;
+  return objecter->with_osdmap(std::mem_fn(&OSDMap::lookup_pg_pool_name),
+			       name);
 }
 
 bool librados::RadosClient::pool_requires_alignment(int64_t pool_id)
@@ -107,7 +105,7 @@ bool librados::RadosClient::pool_requires_alignment(int64_t pool_id)
 
 // a safer version of pool_requires_alignment
 int librados::RadosClient::pool_requires_alignment2(int64_t pool_id,
-	bool *requires)
+						    bool *requires)
 {
   if (!requires)
     return -EINVAL;
@@ -117,14 +115,13 @@ int librados::RadosClient::pool_requires_alignment2(int64_t pool_id,
     return r;
   }
 
-  const OSDMap *osdmap = objecter->get_osdmap_read();
-  if (!osdmap->have_pg_pool(pool_id)) { 
-    objecter->put_osdmap_read();
-    return -ENOENT;
-  }
-  *requires = osdmap->get_pg_pool(pool_id)->requires_aligned_append();
-  objecter->put_osdmap_read();
-  return 0;
+  return objecter->with_osdmap([requires, pool_id](const OSDMap& o) {
+      if (!o.have_pg_pool(pool_id)) {
+	return -ENOENT;
+      }
+      *requires = o.get_pg_pool(pool_id)->requires_aligned_append();
+      return 0;
+    });
 }
 
 uint64_t librados::RadosClient::pool_required_alignment(int64_t pool_id)
@@ -140,7 +137,7 @@ uint64_t librados::RadosClient::pool_required_alignment(int64_t pool_id)
 
 // a safer version of pool_required_alignment
 int librados::RadosClient::pool_required_alignment2(int64_t pool_id,
-	uint64_t *alignment)
+						    uint64_t *alignment)
 {
   if (!alignment)
     return -EINVAL;
@@ -150,30 +147,30 @@ int librados::RadosClient::pool_required_alignment2(int64_t pool_id,
     return r;
   }
 
-  const OSDMap *osdmap = objecter->get_osdmap_read();
-  if (!osdmap->have_pg_pool(pool_id)) {
-    objecter->put_osdmap_read();
-    return -ENOENT;
-  }
-  *alignment = osdmap->get_pg_pool(pool_id)->required_alignment();
-  objecter->put_osdmap_read();
-  return 0;
+  return objecter->with_osdmap([alignment, pool_id](const OSDMap &o) {
+      if (!o.have_pg_pool(pool_id)) {
+	return -ENOENT;
+      }
+      *alignment = o.get_pg_pool(pool_id)->required_alignment();
+      return 0;
+    });
 }
 
-int librados::RadosClient::pool_get_auid(uint64_t pool_id, unsigned long long *auid)
+int librados::RadosClient::pool_get_auid(uint64_t pool_id,
+					 unsigned long long *auid)
 {
   int r = wait_for_osdmap();
   if (r < 0)
     return r;
-  const OSDMap *osdmap = objecter->get_osdmap_read();
-  const pg_pool_t *pg = osdmap->get_pg_pool(pool_id);
-  if (!pg) {
-    r = -ENOENT;
-  } else {
-    r = 0;
-    *auid = pg->auid;
-  }
-  objecter->put_osdmap_read();
+  objecter->with_osdmap([&](const OSDMap& o) {
+      const pg_pool_t *pg = o.get_pg_pool(pool_id);
+      if (!pg) {
+	r = -ENOENT;
+      } else {
+	r = 0;
+	*auid = pg->auid;
+      }
+    });
   return r;
 }
 
@@ -182,14 +179,14 @@ int librados::RadosClient::pool_get_name(uint64_t pool_id, std::string *s)
   int r = wait_for_osdmap();
   if (r < 0)
     return r;
-  const OSDMap *osdmap = objecter->get_osdmap_read();
-  if (!osdmap->have_pg_pool(pool_id)) {
-    r = -ENOENT;
-  } else {
-    r = 0;
-    *s = osdmap->get_pool_name(pool_id);
-  }
-  objecter->put_osdmap_read();
+  objecter->with_osdmap([&](const OSDMap& o) {
+      if (!o.have_pg_pool(pool_id)) {
+	r = -ENOENT;
+      } else {
+	r = 0;
+	*s = o.get_pool_name(pool_id);
+      }
+    });
   return r;
 }
 
@@ -333,6 +330,7 @@ void librados::RadosClient::shutdown()
     return;
   }
   if (state == CONNECTED) {
+    finisher.wait_for_empty();
     finisher.stop();
   }
   bool need_objecter = false;
@@ -467,11 +465,11 @@ int librados::RadosClient::wait_for_osdmap()
   }
 
   bool need_map = false;
-  const OSDMap *osdmap = objecter->get_osdmap_read();
-  if (osdmap->get_epoch() == 0) {
-    need_map = true;
-  }
-  objecter->put_osdmap_read();
+  objecter->with_osdmap([&](const OSDMap& o) {
+      if (o.get_epoch() == 0) {
+	need_map = true;
+      }
+    });
 
   if (need_map) {
     Mutex::Locker l(lock);
@@ -480,23 +478,20 @@ int librados::RadosClient::wait_for_osdmap()
     if (cct->_conf->rados_mon_op_timeout > 0)
       timeout.set_from_double(cct->_conf->rados_mon_op_timeout);
 
-    const OSDMap *osdmap = objecter->get_osdmap_read();
-    if (osdmap->get_epoch() == 0) {
+    if (objecter->with_osdmap(std::mem_fn(&OSDMap::get_epoch)) == 0) {
       ldout(cct, 10) << __func__ << " waiting" << dendl;
       utime_t start = ceph_clock_now(cct);
-      while (osdmap->get_epoch() == 0) {
-        objecter->put_osdmap_read();
-        cond.WaitInterval(cct, lock, timeout);
-        utime_t elapsed = ceph_clock_now(cct) - start;
-        if (!timeout.is_zero() && elapsed > timeout) {
-          lderr(cct) << "timed out waiting for first osdmap from monitors" << dendl;
-          return -ETIMEDOUT;
-        }
-        osdmap = objecter->get_osdmap_read();
+      while (objecter->with_osdmap(std::mem_fn(&OSDMap::get_epoch)) == 0) {
+	cond.WaitInterval(cct, lock, timeout);
+	utime_t elapsed = ceph_clock_now(cct) - start;
+	if (!timeout.is_zero() && elapsed > timeout) {
+	  lderr(cct) << "timed out waiting for first osdmap from monitors"
+		     << dendl;
+	  return -ETIMEDOUT;
+	}
       }
       ldout(cct, 10) << __func__ << " done waiting" << dendl;
     }
-    objecter->put_osdmap_read();
     return 0;
   } else {
     return 0;
@@ -525,12 +520,11 @@ int librados::RadosClient::pool_list(std::list<std::pair<int64_t, string> >& v)
   int r = wait_for_osdmap();
   if (r < 0)
     return r;
-  const OSDMap *osdmap = objecter->get_osdmap_read();
-  for (map<int64_t,pg_pool_t>::const_iterator p = osdmap->get_pools().begin();
-       p != osdmap->get_pools().end();
-       ++p)
-    v.push_back(std::make_pair(p->first, osdmap->get_pool_name(p->first)));
-  objecter->put_osdmap_read();
+
+  objecter->with_osdmap([&](const OSDMap& o) {
+      for (auto p : o.get_pools())
+	v.push_back(std::make_pair(p.first, o.get_pool_name(p.first)));
+    });
   return 0;
 }
 
@@ -633,21 +627,19 @@ int librados::RadosClient::pool_get_base_tier(int64_t pool_id, int64_t* base_tie
     return r;
   }
 
-  const OSDMap *osdmap = objecter->get_osdmap_read();
-
-  const pg_pool_t* pool = osdmap->get_pg_pool(pool_id);
-  if (pool) {
-    if (pool->tier_of < 0) {
-      *base_tier = pool_id;
-    } else {
-      *base_tier = pool->tier_of;
-    }
-    r = 0;
-  } else {
-    r = -ENOENT;
-  }
-
-  objecter->put_osdmap_read();
+  objecter->with_osdmap([&](const OSDMap& o) {
+      const pg_pool_t* pool = o.get_pg_pool(pool_id);
+      if (pool) {
+	if (pool->tier_of < 0) {
+	  *base_tier = pool_id;
+	} else {
+	  *base_tier = pool->tier_of;
+	}
+	r = 0;
+      } else {
+	r = -ENOENT;
+      }
+    });
   return r;
 }
 
diff --git a/src/librados/RadosClient.h b/src/librados/RadosClient.h
index a26e46f..e190bfc 100644
--- a/src/librados/RadosClient.h
+++ b/src/librados/RadosClient.h
@@ -75,7 +75,7 @@ private:
 public:
   Finisher finisher;
 
-  RadosClient(CephContext *cct_);
+  explicit RadosClient(CephContext *cct_);
   ~RadosClient();
   int ping_monitor(string mon_id, string *result);
   int connect();
diff --git a/src/librados/librados.cc b/src/librados/librados.cc
index 90a89c0..e66d17b 100644
--- a/src/librados/librados.cc
+++ b/src/librados/librados.cc
@@ -188,7 +188,7 @@ class ObjectOpCompletionCtx : public Context {
   librados::ObjectOperationCompletion *completion;
   bufferlist bl;
 public:
-  ObjectOpCompletionCtx(librados::ObjectOperationCompletion *c) : completion(c) {}
+  explicit ObjectOpCompletionCtx(librados::ObjectOperationCompletion *c) : completion(c) {}
   void finish(int r) {
     completion->handle_completion(r, bl);
     delete completion;
@@ -1927,12 +1927,32 @@ int64_t librados::IoCtx::get_id()
 
 uint32_t librados::IoCtx::get_object_hash_position(const std::string& oid)
 {
-  return io_ctx_impl->get_object_hash_position(oid);
+  uint32_t hash;
+  int r = io_ctx_impl->get_object_hash_position(oid, &hash);
+  if (r < 0)
+    hash = 0;
+  return hash;
 }
 
 uint32_t librados::IoCtx::get_object_pg_hash_position(const std::string& oid)
 {
-  return io_ctx_impl->get_object_pg_hash_position(oid);
+  uint32_t hash;
+  int r = io_ctx_impl->get_object_pg_hash_position(oid, &hash);
+  if (r < 0)
+    hash = 0;
+  return hash;
+}
+
+int librados::IoCtx::get_object_hash_position2(
+    const std::string& oid, uint32_t *hash_position)
+{
+  return io_ctx_impl->get_object_hash_position(oid, hash_position);
+}
+
+int librados::IoCtx::get_object_pg_hash_position2(
+    const std::string& oid, uint32_t *pg_hash_position)
+{
+  return io_ctx_impl->get_object_pg_hash_position(oid, pg_hash_position);
 }
 
 librados::config_t librados::IoCtx::cct()
@@ -3186,10 +3206,13 @@ extern "C" int rados_ioctx_pool_requires_alignment2(rados_ioctx_t io,
 {
   tracepoint(librados, rados_ioctx_pool_requires_alignment_enter2, io);
   librados::IoCtxImpl *ctx = (librados::IoCtxImpl *)io;
+  bool requires_alignment;
   int retval = ctx->client->pool_requires_alignment2(ctx->get_id(), 
-  	(bool *)requires);
+  	&requires_alignment);
   tracepoint(librados, rados_ioctx_pool_requires_alignment_exit2, retval, 
-  	*requires);
+  	requires_alignment);
+  if (requires)
+    *requires = requires_alignment;
   return retval;
 }
 
@@ -4870,7 +4893,7 @@ struct RadosOmapIter {
 class C_OmapIter : public Context {
   RadosOmapIter *iter;
 public:
-  C_OmapIter(RadosOmapIter *iter) : iter(iter) {}
+  explicit C_OmapIter(RadosOmapIter *iter) : iter(iter) {}
   void finish(int r) {
     iter->i = iter->values.begin();
   }
@@ -4879,7 +4902,7 @@ public:
 class C_XattrsIter : public Context {
   librados::RadosXattrsIter *iter;
 public:
-  C_XattrsIter(librados::RadosXattrsIter *iter) : iter(iter) {}
+  explicit C_XattrsIter(librados::RadosXattrsIter *iter) : iter(iter) {}
   void finish(int r) {
     iter->i = iter->attrset.begin();
   }
@@ -4921,7 +4944,7 @@ extern "C" void rados_read_op_omap_get_vals(rados_read_op_t read_op,
 struct C_OmapKeysIter : public Context {
   RadosOmapIter *iter;
   std::set<std::string> keys;
-  C_OmapKeysIter(RadosOmapIter *iter) : iter(iter) {}
+  explicit C_OmapKeysIter(RadosOmapIter *iter) : iter(iter) {}
   void finish(int r) {
     // map each key to an empty bl
     for (std::set<std::string>::const_iterator i = keys.begin();
diff --git a/src/librbd/AioCompletion.cc b/src/librbd/AioCompletion.cc
index 9ad318b..ab71c3c 100644
--- a/src/librbd/AioCompletion.cc
+++ b/src/librbd/AioCompletion.cc
@@ -69,6 +69,9 @@ namespace librbd {
     assert(lock.is_locked());
     elapsed = ceph_clock_now(cct) - start_time;
     switch (aio_type) {
+    case AIO_TYPE_OPEN:
+    case AIO_TYPE_CLOSE:
+      break;
     case AIO_TYPE_READ:
       ictx->perfcounter->tinc(l_librbd_rd_latency, elapsed); break;
     case AIO_TYPE_WRITE:
diff --git a/src/librbd/AioCompletion.h b/src/librbd/AioCompletion.h
index 0d1e226..c554bc5 100644
--- a/src/librbd/AioCompletion.h
+++ b/src/librbd/AioCompletion.h
@@ -21,11 +21,13 @@ namespace librbd {
   class AioObjectRead;
 
   typedef enum {
-    AIO_TYPE_READ = 0,
+    AIO_TYPE_NONE = 0,
+    AIO_TYPE_OPEN,
+    AIO_TYPE_CLOSE,
+    AIO_TYPE_READ,
     AIO_TYPE_WRITE,
     AIO_TYPE_DISCARD,
     AIO_TYPE_FLUSH,
-    AIO_TYPE_NONE,
   } aio_type_t;
 
   /**
@@ -156,11 +158,17 @@ namespace librbd {
       int n = --ref;
       lock.Unlock();
       if (!n) {
-        if (ictx && event_notify) {
-          ictx->completed_reqs_lock.Lock();
-          m_xlist_item.remove_myself();
-          ictx->completed_reqs_lock.Unlock();
-        }
+        if (ictx) {
+	  if (event_notify) {
+	    ictx->completed_reqs_lock.Lock();
+	    m_xlist_item.remove_myself();
+	    ictx->completed_reqs_lock.Unlock();
+	  }
+	  if (aio_type == AIO_TYPE_CLOSE || (aio_type == AIO_TYPE_OPEN &&
+					     rval < 0)) {
+	    delete ictx;
+	  }
+	}
         delete this;
       }
     }
diff --git a/src/librbd/AioImageRequest.cc b/src/librbd/AioImageRequest.cc
index cba7dde..fc77ea7 100644
--- a/src/librbd/AioImageRequest.cc
+++ b/src/librbd/AioImageRequest.cc
@@ -10,7 +10,7 @@
 #include "librbd/ImageWatcher.h"
 #include "librbd/internal.h"
 #include "librbd/Journal.h"
-#include "librbd/journal/Entries.h"
+#include "librbd/journal/Types.h"
 #include "include/rados/librados.hpp"
 #include "osdc/Striper.h"
 
diff --git a/src/librbd/AsyncOperation.cc b/src/librbd/AsyncOperation.cc
index fd315ad..0c79d8a 100644
--- a/src/librbd/AsyncOperation.cc
+++ b/src/librbd/AsyncOperation.cc
@@ -18,7 +18,7 @@ struct C_CompleteFlushes : public Context {
   ImageCtx *image_ctx;
   std::list<Context *> flush_contexts;
 
-  C_CompleteFlushes(ImageCtx *image_ctx, std::list<Context *> &&flush_contexts)
+  explicit C_CompleteFlushes(ImageCtx *image_ctx, std::list<Context *> &&flush_contexts)
     : image_ctx(image_ctx), flush_contexts(std::move(flush_contexts)) {
   }
   virtual void finish(int r) {
diff --git a/src/librbd/ExclusiveLock.cc b/src/librbd/ExclusiveLock.cc
index cef72cc..1d0d189 100644
--- a/src/librbd/ExclusiveLock.cc
+++ b/src/librbd/ExclusiveLock.cc
@@ -28,7 +28,7 @@ const std::string WATCHER_LOCK_COOKIE_PREFIX = "auto";
 template <typename I>
 struct C_SendReleaseRequest : public Context {
   ReleaseRequest<I>* request;
-  C_SendReleaseRequest(ReleaseRequest<I>* request) : request(request) {
+  explicit C_SendReleaseRequest(ReleaseRequest<I>* request) : request(request) {
   }
   virtual void finish(int r) override {
     request->send();
diff --git a/src/librbd/ImageCtx.cc b/src/librbd/ImageCtx.cc
index e432d63..0961d0e 100644
--- a/src/librbd/ImageCtx.cc
+++ b/src/librbd/ImageCtx.cc
@@ -14,6 +14,7 @@
 #include "librbd/AioCompletion.h"
 #include "librbd/AsyncOperation.h"
 #include "librbd/AsyncRequest.h"
+#include "librbd/ExclusiveLock.h"
 #include "librbd/internal.h"
 #include "librbd/ImageCtx.h"
 #include "librbd/ImageState.h"
@@ -47,7 +48,7 @@ namespace {
 
 class ThreadPoolSingleton : public ThreadPool {
 public:
-  ThreadPoolSingleton(CephContext *cct)
+  explicit ThreadPoolSingleton(CephContext *cct)
     : ThreadPool(cct, "librbd::thread_pool", "tp_librbd", cct->_conf->rbd_op_threads,
                  "rbd_op_threads") {
     start();
@@ -1005,6 +1006,10 @@ struct C_InvalidateCache : public Context {
     ASSIGN_OPTION(journal_pool);
   }
 
+  ExclusiveLock<ImageCtx> *ImageCtx::create_exclusive_lock() {
+    return new ExclusiveLock<ImageCtx>(*this);
+  }
+
   ObjectMap *ImageCtx::create_object_map(uint64_t snap_id) {
     return new ObjectMap(*this, snap_id);
   }
diff --git a/src/librbd/ImageCtx.h b/src/librbd/ImageCtx.h
index 7606f6f..689e941 100644
--- a/src/librbd/ImageCtx.h
+++ b/src/librbd/ImageCtx.h
@@ -270,6 +270,7 @@ namespace librbd {
 
     void apply_metadata_confs();
 
+    ExclusiveLock<ImageCtx> *create_exclusive_lock();
     ObjectMap *create_object_map(uint64_t snap_id);
     Journal<ImageCtx> *create_journal();
 
diff --git a/src/librbd/ImageWatcher.cc b/src/librbd/ImageWatcher.cc
index b97d502..2c5331c 100644
--- a/src/librbd/ImageWatcher.cc
+++ b/src/librbd/ImageWatcher.cc
@@ -34,15 +34,16 @@ ImageWatcher::ImageWatcher(ImageCtx &image_ctx)
     m_watch_lock(util::unique_lock_name("librbd::ImageWatcher::m_watch_lock", this)),
     m_watch_ctx(*this), m_watch_handle(0),
     m_watch_state(WATCH_STATE_UNREGISTERED),
-    m_task_finisher(new TaskFinisher<Task>(*m_image_ctx.cct)),
     m_async_request_lock(util::unique_lock_name("librbd::ImageWatcher::m_async_request_lock", this)),
     m_owner_client_id_lock(util::unique_lock_name("librbd::ImageWatcher::m_owner_client_id_lock", this))
 {
+  m_image_ctx.cct->lookup_or_create_singleton_object<TaskFinisher<Task> >(
+    m_task_finisher, "librbd::task_finisher");
 }
 
 ImageWatcher::~ImageWatcher()
 {
-  delete m_task_finisher;
+  m_task_finisher = nullptr;
   {
     RWLock::RLocker l(m_watch_lock);
     assert(m_watch_state != WATCH_STATE_REGISTERED);
diff --git a/src/librbd/Journal.cc b/src/librbd/Journal.cc
index 3b52f52..a597671 100644
--- a/src/librbd/Journal.cc
+++ b/src/librbd/Journal.cc
@@ -7,8 +7,8 @@
 #include "librbd/AioObjectRequest.h"
 #include "librbd/ExclusiveLock.h"
 #include "librbd/ImageCtx.h"
-#include "librbd/journal/Entries.h"
 #include "librbd/journal/Replay.h"
+#include "librbd/journal/Types.h"
 #include "librbd/Utils.h"
 #include "journal/Journaler.h"
 #include "journal/ReplayEntry.h"
@@ -23,12 +23,6 @@ namespace librbd {
 using util::create_async_context_callback;
 using util::create_context_callback;
 
-namespace {
-
-const std::string CLIENT_DESCRIPTION = "master image";
-
-} // anonymous namespace
-
 template <typename I>
 std::ostream &operator<<(std::ostream &os,
                          const typename Journal<I>::State &state) {
@@ -103,9 +97,9 @@ int Journal<I>::create(librados::IoCtx &io_ctx, const std::string &image_id,
   CephContext *cct = reinterpret_cast<CephContext *>(io_ctx.cct());
   ldout(cct, 5) << __func__ << ": image=" << image_id << dendl;
 
+  librados::Rados rados(io_ctx);
   int64_t pool_id = -1;
   if (!object_pool.empty()) {
-    librados::Rados rados(io_ctx);
     IoCtx data_io_ctx;
     int r = rados.ioctx_create(object_pool.c_str(), data_io_ctx);
     if (r != 0) {
@@ -125,7 +119,32 @@ int Journal<I>::create(librados::IoCtx &io_ctx, const std::string &image_id,
     return r;
   }
 
-  r = journaler.register_client(CLIENT_DESCRIPTION);
+  std::string cluster_id;
+  r = rados.cluster_fsid(&cluster_id);
+  if (r < 0) {
+    lderr(cct) << "failed to retrieve cluster id: " << cpp_strerror(r) << dendl;
+    return r;
+  }
+
+  // create tag class for this image's journal events
+  bufferlist tag_data;
+  ::encode(journal::TagData{cluster_id, pool_id, image_id}, tag_data);
+
+  C_SaferCond tag_ctx;
+  cls::journal::Tag tag;
+  journaler.allocate_tag(cls::journal::Tag::TAG_CLASS_NEW, tag_data,
+                         &tag, &tag_ctx);
+  r = tag_ctx.wait();
+  if (r < 0) {
+    lderr(cct) << "failed to allocate journal tag: " << cpp_strerror(r)
+               << dendl;
+  }
+
+  bufferlist client_data;
+  ::encode(journal::ClientData{journal::ImageClientMeta{tag.tag_class}},
+           client_data);
+
+  r = journaler.register_client(client_data);
   if (r < 0) {
     lderr(cct) << "failed to register client: " << cpp_strerror(r) << dendl;
     return r;
@@ -200,7 +219,7 @@ int Journal<I>::reset(librados::IoCtx &io_ctx, const std::string &image_id) {
     lderr(cct) << "failed to create journal: " << cpp_strerror(r) << dendl;
     return r;
   }
-  r = journaler.register_client(CLIENT_DESCRIPTION);
+  r = journaler.register_client(bufferlist());
   if (r < 0) {
     lderr(cct) << "failed to register client: " << cpp_strerror(r) << dendl;
     return r;
@@ -290,7 +309,8 @@ uint64_t Journal<I>::append_io_event(AioCompletion *aio_comp,
     tid = ++m_event_tid;
     assert(tid != 0);
 
-    future = m_journaler->append("", bl);
+    // TODO: use allocated tag_id
+    future = m_journaler->append(0, bl);
     m_events[tid] = Event(future, aio_comp, requests, offset, length);
   }
 
@@ -374,7 +394,9 @@ void Journal<I>::append_op_event(uint64_t op_tid,
   {
     Mutex::Locker locker(m_lock);
     assert(m_state == STATE_READY);
-    future = m_journaler->append("", bl);
+
+    // TODO: use allocated tag_id
+    future = m_journaler->append(0, bl);
   }
 
   on_safe = create_async_context_callback(m_image_ctx, on_safe);
@@ -401,7 +423,9 @@ void Journal<I>::commit_op_event(uint64_t op_tid, int r) {
   {
     Mutex::Locker locker(m_lock);
     assert(m_state == STATE_READY);
-    future = m_journaler->append("", bl);
+
+    // TODO: use allocated tag_id
+    future = m_journaler->append(0, bl);
   }
 
   future.flush(new C_OpEventSafe(this, op_tid, future, nullptr));
diff --git a/src/librbd/LibrbdAdminSocketHook.cc b/src/librbd/LibrbdAdminSocketHook.cc
index 719d656..5485ba5 100644
--- a/src/librbd/LibrbdAdminSocketHook.cc
+++ b/src/librbd/LibrbdAdminSocketHook.cc
@@ -21,7 +21,7 @@ public:
 
 class FlushCacheCommand : public LibrbdAdminSocketCommand {
 public:
-  FlushCacheCommand(ImageCtx *ictx) : ictx(ictx) {}
+  explicit FlushCacheCommand(ImageCtx *ictx) : ictx(ictx) {}
 
   bool call(stringstream *ss) {
     int r = flush(ictx);
@@ -38,7 +38,7 @@ private:
 
 struct InvalidateCacheCommand : public LibrbdAdminSocketCommand {
 public:
-  InvalidateCacheCommand(ImageCtx *ictx) : ictx(ictx) {}
+  explicit InvalidateCacheCommand(ImageCtx *ictx) : ictx(ictx) {}
 
   bool call(stringstream *ss) {
     int r = invalidate_cache(ictx);
@@ -81,7 +81,7 @@ LibrbdAdminSocketHook::LibrbdAdminSocketHook(ImageCtx *ictx) :
 
 LibrbdAdminSocketHook::~LibrbdAdminSocketHook() {
   for (Commands::const_iterator i = commands.begin(); i != commands.end();
-       i++) {
+       ++i) {
     (void)admin_socket->unregister_command(i->first);
     delete i->second;
   }
diff --git a/src/librbd/LibrbdWriteback.h b/src/librbd/LibrbdWriteback.h
index d4cb341..018b043 100644
--- a/src/librbd/LibrbdWriteback.h
+++ b/src/librbd/LibrbdWriteback.h
@@ -39,6 +39,7 @@ namespace librbd {
 			     ceph::real_time mtime, uint64_t trunc_size,
 			     __u32 trunc_seq, ceph_tid_t journal_tid,
 			     Context *oncommit);
+    using WritebackHandler::write;
 
     virtual void overwrite_extent(const object_t& oid, uint64_t off,
 				  uint64_t len, ceph_tid_t journal_tid);
diff --git a/src/librbd/Makefile.am b/src/librbd/Makefile.am
index 775d702..7248f82 100644
--- a/src/librbd/Makefile.am
+++ b/src/librbd/Makefile.am
@@ -1,5 +1,5 @@
 librbd_types_la_SOURCES = \
-	librbd/journal/Entries.cc \
+	librbd/journal/Types.cc \
 	librbd/WatchNotifyTypes.cc
 noinst_LTLIBRARIES += librbd_types.la
 
@@ -115,7 +115,7 @@ noinst_HEADERS += \
 	librbd/image/RefreshRequest.h \
 	librbd/image/SetSnapRequest.h \
 	librbd/journal/Replay.h \
-	librbd/journal/Entries.h \
+	librbd/journal/Types.h \
 	librbd/object_map/InvalidateRequest.h \
 	librbd/object_map/LockRequest.h \
 	librbd/object_map/Request.h \
diff --git a/src/librbd/ObjectMap.cc b/src/librbd/ObjectMap.cc
index 4f3f2f4..59d3a36 100644
--- a/src/librbd/ObjectMap.cc
+++ b/src/librbd/ObjectMap.cc
@@ -6,7 +6,6 @@
 #include "librbd/ImageWatcher.h"
 #include "librbd/internal.h"
 #include "librbd/object_map/InvalidateRequest.h"
-#include "librbd/object_map/LockRequest.h"
 #include "librbd/object_map/RefreshRequest.h"
 #include "librbd/object_map/ResizeRequest.h"
 #include "librbd/object_map/SnapshotCreateRequest.h"
@@ -17,6 +16,7 @@
 #include "librbd/Utils.h"
 #include "common/dout.h"
 #include "common/errno.h"
+#include "common/WorkQueue.h"
 #include "include/stringify.h"
 #include "cls/lock/cls_lock_client.h"
 #include <sstream>
@@ -89,15 +89,12 @@ void ObjectMap::open(Context *on_finish) {
   req->send();
 }
 
-void ObjectMap::lock(Context *on_finish) {
-  assert(m_snap_id == CEPH_NOSNAP);
-  object_map::LockRequest<> *req = new object_map::LockRequest<>(
-    m_image_ctx, on_finish);
-  req->send();
-}
+void ObjectMap::close(Context *on_finish) {
+  if (m_snap_id != CEPH_NOSNAP) {
+    m_image_ctx.op_work_queue->queue(on_finish, 0);
+    return;
+  }
 
-void ObjectMap::unlock(Context *on_finish) {
-  assert(m_snap_id == CEPH_NOSNAP);
   object_map::UnlockRequest<> *req = new object_map::UnlockRequest<>(
     m_image_ctx, on_finish);
   req->send();
diff --git a/src/librbd/ObjectMap.h b/src/librbd/ObjectMap.h
index 5253c51..a2868aa 100644
--- a/src/librbd/ObjectMap.h
+++ b/src/librbd/ObjectMap.h
@@ -31,8 +31,7 @@ public:
   }
 
   void open(Context *on_finish);
-  void lock(Context *on_finish);
-  void unlock(Context *on_finish);
+  void close(Context *on_finish);
 
   bool object_may_exist(uint64_t object_no) const;
 
diff --git a/src/librbd/Operations.cc b/src/librbd/Operations.cc
index 563c9de..719baf5 100644
--- a/src/librbd/Operations.cc
+++ b/src/librbd/Operations.cc
@@ -739,7 +739,8 @@ int Operations<I>::prepare_image_update() {
   {
     RWLock::WLocker owner_locker(m_image_ctx.owner_lock);
     if (m_image_ctx.exclusive_lock != nullptr &&
-        !m_image_ctx.exclusive_lock->is_lock_owner()) {
+        (!m_image_ctx.exclusive_lock->is_lock_owner() ||
+         !m_image_ctx.exclusive_lock->accept_requests())) {
       m_image_ctx.exclusive_lock->try_lock(&ctx);
       trying_lock = true;
     }
diff --git a/src/librbd/TaskFinisher.h b/src/librbd/TaskFinisher.h
index 43ec517..8d37a04 100644
--- a/src/librbd/TaskFinisher.h
+++ b/src/librbd/TaskFinisher.h
@@ -19,10 +19,10 @@ namespace librbd {
 template <typename Task>
 class TaskFinisher {
 public:
-  TaskFinisher(CephContext &cct)
+  TaskFinisher(CephContext *cct)
     : m_cct(cct), m_lock("librbd::TaskFinisher::m_lock"),
-      m_finisher(new Finisher(&cct)),
-      m_safe_timer(new SafeTimer(&cct, m_lock, false))
+      m_finisher(new Finisher(cct)),
+      m_safe_timer(new SafeTimer(cct, m_lock, false))
   {
     m_finisher->start();
     m_safe_timer->init();
@@ -45,6 +45,7 @@ public:
     typename TaskContexts::iterator it = m_task_contexts.find(task);
     if (it != m_task_contexts.end()) {
       delete it->second.first;
+      m_safe_timer->cancel_event(it->second.second);
       m_task_contexts.erase(it);
     }
   }
@@ -111,7 +112,7 @@ private:
     Task m_task;
   };
 
-  CephContext &m_cct;
+  CephContext *m_cct;
 
   Mutex m_lock;
   Finisher *m_finisher;
diff --git a/src/librbd/WatchNotifyTypes.cc b/src/librbd/WatchNotifyTypes.cc
index 93b12ee..f3a6f4b 100644
--- a/src/librbd/WatchNotifyTypes.cc
+++ b/src/librbd/WatchNotifyTypes.cc
@@ -13,7 +13,7 @@ namespace {
 
 class EncodePayloadVisitor : public boost::static_visitor<void> {
 public:
-  EncodePayloadVisitor(bufferlist &bl) : m_bl(bl) {}
+  explicit EncodePayloadVisitor(bufferlist &bl) : m_bl(bl) {}
 
   template <typename Payload>
   inline void operator()(const Payload &payload) const {
@@ -42,7 +42,7 @@ private:
 
 class DumpPayloadVisitor : public boost::static_visitor<void> {
 public:
-  DumpPayloadVisitor(Formatter *formatter) : m_formatter(formatter) {}
+  explicit DumpPayloadVisitor(Formatter *formatter) : m_formatter(formatter) {}
 
   template <typename Payload>
   inline void operator()(const Payload &payload) const {
diff --git a/src/librbd/exclusive_lock/AcquireRequest.cc b/src/librbd/exclusive_lock/AcquireRequest.cc
index ad59c04..5f49869 100644
--- a/src/librbd/exclusive_lock/AcquireRequest.cc
+++ b/src/librbd/exclusive_lock/AcquireRequest.cc
@@ -144,7 +144,7 @@ Context *AcquireRequest<I>::handle_open_journal(int *ret_val) {
   if (*ret_val < 0) {
     lderr(cct) << "failed to open journal: " << cpp_strerror(*ret_val) << dendl;
     m_error_result = *ret_val;
-    return send_unlock_object_map();
+    return send_close_object_map();
   }
 
   return m_on_finish;
@@ -175,35 +175,11 @@ Context *AcquireRequest<I>::handle_open_object_map(int *ret_val) {
 
   // object map should never result in an error
   assert(*ret_val == 0);
-  return send_lock_object_map();
-}
-
-template <typename I>
-Context *AcquireRequest<I>::send_lock_object_map() {
-  CephContext *cct = m_image_ctx.cct;
-  ldout(cct, 10) << __func__ << dendl;
-
-  assert(m_object_map != nullptr);
-
-  using klass = AcquireRequest<I>;
-  Context *ctx = create_context_callback<klass, &klass::handle_lock_object_map>(
-    this);
-  m_object_map->lock(ctx);
-  return nullptr;
-}
-
-template <typename I>
-Context *AcquireRequest<I>::handle_lock_object_map(int *ret_val) {
-  CephContext *cct = m_image_ctx.cct;
-  ldout(cct, 10) << __func__ << ": r=" << *ret_val << dendl;
-
-  // object map should never result in an error
-  assert(*ret_val == 0);
   return send_open_journal();
 }
 
 template <typename I>
-Context *AcquireRequest<I>::send_unlock_object_map() {
+Context *AcquireRequest<I>::send_close_object_map() {
   if (m_object_map == nullptr) {
     revert();
     return m_on_finish;
@@ -214,13 +190,13 @@ Context *AcquireRequest<I>::send_unlock_object_map() {
 
   using klass = AcquireRequest<I>;
   Context *ctx = create_context_callback<
-    klass, &klass::handle_unlock_object_map>(this);
-  m_object_map->unlock(ctx);
+    klass, &klass::handle_close_object_map>(this);
+  m_object_map->close(ctx);
   return nullptr;
 }
 
 template <typename I>
-Context *AcquireRequest<I>::handle_unlock_object_map(int *ret_val) {
+Context *AcquireRequest<I>::handle_close_object_map(int *ret_val) {
   CephContext *cct = m_image_ctx.cct;
   ldout(cct, 10) << __func__ << ": r=" << *ret_val << dendl;
 
diff --git a/src/librbd/exclusive_lock/AcquireRequest.h b/src/librbd/exclusive_lock/AcquireRequest.h
index d7b717b..e3a2525 100644
--- a/src/librbd/exclusive_lock/AcquireRequest.h
+++ b/src/librbd/exclusive_lock/AcquireRequest.h
@@ -48,16 +48,13 @@ private:
    *    .     OPEN_OBJECT_MAP           GET_WATCHERS . . .              |
    *    .         |                       |              .              |
    *    .         v                       v              .              |
-   *    .     LOCK_OBJECT_MAP           BLACKLIST        . (blacklist   |
-   *    .         |                       |              .  disabled)   |
-   *    .         v                       v              .              |
-   *    . . > OPEN_JOURNAL * *          BREAK_LOCK < . . .              |
-   *    .         |          *            |                             |
-   *    .         |          v            |                             |
-   *    .         |    UNLOCK_OBJECT_MAP  |                             |
-   *    .         |          |            \-----------------------------/
-   *    .         v          |
-   *    . . > <finish> <-----/
+   *    . . > OPEN_JOURNAL * *          BLACKLIST        . (blacklist   |
+   *    .         |          *            |              .  disabled)   |
+   *    .         |          v            v              .              |
+   *    .         | CLOSE_OBJECT_MAP    BREAK_LOCK < . . .              |
+   *    .         v          |            |                             |
+   *    . . > <finish> <-----/            \-----------------------------/
+   *
    * @endverbatim
    */
 
@@ -93,11 +90,8 @@ private:
   Context *send_open_object_map();
   Context *handle_open_object_map(int *ret_val);
 
-  Context *send_lock_object_map();
-  Context *handle_lock_object_map(int *ret_val);
-
-  Context *send_unlock_object_map();
-  Context *handle_unlock_object_map(int *ret_val);
+  Context *send_close_object_map();
+  Context *handle_close_object_map(int *ret_val);
 
   void send_get_lockers();
   Context *handle_get_lockers(int *ret_val);
diff --git a/src/librbd/exclusive_lock/ReleaseRequest.cc b/src/librbd/exclusive_lock/ReleaseRequest.cc
index 3b65199..a96e97b 100644
--- a/src/librbd/exclusive_lock/ReleaseRequest.cc
+++ b/src/librbd/exclusive_lock/ReleaseRequest.cc
@@ -117,7 +117,7 @@ void ReleaseRequest<I>::send_close_journal() {
   }
 
   if (m_journal == nullptr) {
-    send_unlock_object_map();
+    send_close_object_map();
     return;
   }
 
@@ -143,12 +143,12 @@ Context *ReleaseRequest<I>::handle_close_journal(int *ret_val) {
 
   delete m_journal;
 
-  send_unlock_object_map();
+  send_close_object_map();
   return nullptr;
 }
 
 template <typename I>
-void ReleaseRequest<I>::send_unlock_object_map() {
+void ReleaseRequest<I>::send_close_object_map() {
   {
     RWLock::WLocker snap_locker(m_image_ctx.snap_lock);
     std::swap(m_object_map, m_image_ctx.object_map);
@@ -164,12 +164,12 @@ void ReleaseRequest<I>::send_unlock_object_map() {
 
   using klass = ReleaseRequest<I>;
   Context *ctx = create_context_callback<
-    klass, &klass::handle_unlock_object_map>(this);
-  m_object_map->unlock(ctx);
+    klass, &klass::handle_close_object_map>(this);
+  m_object_map->close(ctx);
 }
 
 template <typename I>
-Context *ReleaseRequest<I>::handle_unlock_object_map(int *ret_val) {
+Context *ReleaseRequest<I>::handle_close_object_map(int *ret_val) {
   CephContext *cct = m_image_ctx.cct;
   ldout(cct, 10) << __func__ << ": r=" << *ret_val << dendl;
 
diff --git a/src/librbd/exclusive_lock/ReleaseRequest.h b/src/librbd/exclusive_lock/ReleaseRequest.h
index 056d4e7..ecb1c21 100644
--- a/src/librbd/exclusive_lock/ReleaseRequest.h
+++ b/src/librbd/exclusive_lock/ReleaseRequest.h
@@ -42,7 +42,7 @@ private:
    * CLOSE_JOURNAL                            .
    *    |                (journal disabled,   .
    *    v                 object map enabled) .
-   * UNLOCK_OBJECT_MAP  < . . . . . . . . . . .
+   * CLOSE_OBJECT_MAP < . . . . . . . . . . . .
    *    |                                     .
    *    v               (object map disabled) .
    * UNLOCK < . . . . . . . . . . . . . . . . .
@@ -73,8 +73,8 @@ private:
   void send_close_journal();
   Context *handle_close_journal(int *ret_val);
 
-  void send_unlock_object_map();
-  Context *handle_unlock_object_map(int *ret_val);
+  void send_close_object_map();
+  Context *handle_close_object_map(int *ret_val);
 
   void send_unlock();
   Context *handle_unlock(int *ret_val);
diff --git a/src/librbd/image/RefreshRequest.cc b/src/librbd/image/RefreshRequest.cc
index d58bced..013b1d6 100644
--- a/src/librbd/image/RefreshRequest.cc
+++ b/src/librbd/image/RefreshRequest.cc
@@ -33,10 +33,9 @@ RefreshRequest<I>::RefreshRequest(I &image_ctx, Context *on_finish)
 
 template <typename I>
 RefreshRequest<I>::~RefreshRequest() {
-  delete m_object_map;
-
   // these require state machine to close
   assert(m_exclusive_lock == nullptr);
+  assert(m_object_map == nullptr);
   assert(m_journal == nullptr);
   assert(m_refresh_parent == nullptr);
 }
@@ -404,9 +403,9 @@ Context *RefreshRequest<I>::handle_v2_refresh_parent(int *result) {
 template <typename I>
 Context *RefreshRequest<I>::send_v2_init_exclusive_lock() {
   if ((m_features & RBD_FEATURE_EXCLUSIVE_LOCK) == 0 ||
-      !m_image_ctx.snap_name.empty() ||
+      m_image_ctx.read_only || !m_image_ctx.snap_name.empty() ||
       m_image_ctx.exclusive_lock != nullptr) {
-    return send_v2_open_journal();
+    return send_v2_open_object_map();
   }
 
   // implies exclusive lock dynamically enabled or image open in-progress
@@ -414,7 +413,7 @@ Context *RefreshRequest<I>::send_v2_init_exclusive_lock() {
   ldout(cct, 10) << this << " " << __func__ << dendl;
 
   // TODO need safe shut down
-  m_exclusive_lock = ExclusiveLock<I>::create(m_image_ctx);
+  m_exclusive_lock = m_image_ctx.create_exclusive_lock();
 
   using klass = RefreshRequest<I>;
   Context *ctx = create_context_callback<
@@ -434,20 +433,22 @@ Context *RefreshRequest<I>::handle_v2_init_exclusive_lock(int *result) {
     lderr(cct) << "failed to initialize exclusive lock: "
                << cpp_strerror(*result) << dendl;
     save_result(result);
-    return send_v2_finalize_refresh_parent();
   }
 
-  return send_v2_open_journal();
+  // object map and journal will be opened when exclusive lock is
+  // acquired (if features are enabled)
+  return send_v2_finalize_refresh_parent();
 }
 
 template <typename I>
 Context *RefreshRequest<I>::send_v2_open_journal() {
   if ((m_features & RBD_FEATURE_JOURNALING) == 0 ||
       m_image_ctx.read_only ||
+      !m_image_ctx.snap_name.empty() ||
       m_image_ctx.journal != nullptr ||
       m_image_ctx.exclusive_lock == nullptr ||
       !m_image_ctx.exclusive_lock->is_lock_owner()) {
-    return send_v2_open_object_map();
+    return send_v2_finalize_refresh_parent();
   }
 
   // implies journal dynamically enabled since ExclusiveLock will init
@@ -474,17 +475,20 @@ Context *RefreshRequest<I>::handle_v2_open_journal(int *result) {
     lderr(cct) << "failed to initialize journal: " << cpp_strerror(*result)
                << dendl;
     save_result(result);
-    return send_v2_finalize_refresh_parent();
   }
 
-  return send_v2_shut_down_exclusive_lock();
+  return send_v2_finalize_refresh_parent();
 }
 
 template <typename I>
 Context *RefreshRequest<I>::send_v2_open_object_map() {
   if ((m_features & RBD_FEATURE_OBJECT_MAP) == 0 ||
-      m_image_ctx.object_map != nullptr || m_image_ctx.snap_name.empty()) {
-    return send_v2_finalize_refresh_parent();
+      m_image_ctx.object_map != nullptr ||
+      (m_image_ctx.snap_name.empty() &&
+       (m_image_ctx.read_only ||
+        m_image_ctx.exclusive_lock == nullptr ||
+        !m_image_ctx.exclusive_lock->is_lock_owner()))) {
+    return send_v2_open_journal();
   }
 
   // implies object map dynamically enabled or image open in-progress
@@ -493,21 +497,29 @@ Context *RefreshRequest<I>::send_v2_open_object_map() {
   CephContext *cct = m_image_ctx.cct;
   ldout(cct, 10) << this << " " << __func__ << dendl;
 
-  for (size_t snap_idx = 0; snap_idx < m_snap_names.size(); ++snap_idx) {
-    if (m_snap_names[snap_idx] == m_image_ctx.snap_name) {
-      using klass = RefreshRequest<I>;
-      Context *ctx = create_context_callback<
-        klass, &klass::handle_v2_open_object_map>(this);
+  if (m_image_ctx.snap_name.empty()) {
+    m_object_map = m_image_ctx.create_object_map(CEPH_NOSNAP);
+  } else {
+    for (size_t snap_idx = 0; snap_idx < m_snap_names.size(); ++snap_idx) {
+      if (m_snap_names[snap_idx] == m_image_ctx.snap_name) {
+        m_object_map = m_image_ctx.create_object_map(
+          m_snapc.snaps[snap_idx].val);
+        break;
+      }
+    }
 
-      m_object_map = m_image_ctx.create_object_map(m_snapc.snaps[snap_idx].val);
-      m_object_map->open(ctx);
-      return nullptr;
+    if (m_object_map == nullptr) {
+      lderr(cct) << "failed to locate snapshot: " << m_image_ctx.snap_name
+                 << dendl;
+      return send_v2_open_journal();
     }
   }
 
-  lderr(cct) << "failed to locate snapshot: " << m_image_ctx.snap_name
-             << dendl;
-  return send_v2_finalize_refresh_parent();
+  using klass = RefreshRequest<I>;
+  Context *ctx = create_context_callback<
+    klass, &klass::handle_v2_open_object_map>(this);
+  m_object_map->open(ctx);
+  return nullptr;
 }
 
 template <typename I>
@@ -516,7 +528,7 @@ Context *RefreshRequest<I>::handle_v2_open_object_map(int *result) {
   ldout(cct, 10) << this << " " << __func__ << ": r=" << *result << dendl;
 
   assert(*result == 0);
-  return send_v2_finalize_refresh_parent();
+  return send_v2_open_journal();
 }
 
 template <typename I>
@@ -587,7 +599,7 @@ Context *RefreshRequest<I>::handle_v2_shut_down_exclusive_lock(int *result) {
 template <typename I>
 Context *RefreshRequest<I>::send_v2_close_journal() {
   if (m_journal == nullptr) {
-    return send_flush_aio();
+    return send_v2_close_object_map();
   }
 
   CephContext *cct = m_image_ctx.cct;
@@ -612,6 +624,40 @@ Context *RefreshRequest<I>::handle_v2_close_journal(int *result) {
                << dendl;
   }
 
+  assert(m_journal != nullptr);
+  delete m_journal;
+  m_journal = nullptr;
+
+  return send_v2_close_object_map();
+}
+
+template <typename I>
+Context *RefreshRequest<I>::send_v2_close_object_map() {
+  if (m_object_map == nullptr) {
+    return send_flush_aio();
+  }
+
+  CephContext *cct = m_image_ctx.cct;
+  ldout(cct, 10) << this << " " << __func__ << dendl;
+
+  // object map was dynamically disabled
+  using klass = RefreshRequest<I>;
+  Context *ctx = create_context_callback<
+    klass, &klass::handle_v2_close_object_map>(this);
+  m_object_map->close(ctx);
+  return nullptr;
+}
+
+template <typename I>
+Context *RefreshRequest<I>::handle_v2_close_object_map(int *result) {
+  CephContext *cct = m_image_ctx.cct;
+  ldout(cct, 10) << this << " " << __func__ << ": r=" << *result << dendl;
+
+  assert(*result == 0);
+  assert(m_object_map != nullptr);
+  delete m_object_map;
+  m_object_map = nullptr;
+
   return send_flush_aio();
 }
 
@@ -722,20 +768,26 @@ void RefreshRequest<I>::apply() {
                                                         m_image_ctx.snaps);
 
     // handle dynamically enabled / disabled features
-    if (!m_image_ctx.test_features(RBD_FEATURE_EXCLUSIVE_LOCK,
-                                   m_image_ctx.snap_lock) ||
-        m_exclusive_lock != nullptr) {
+    if (m_image_ctx.exclusive_lock != nullptr &&
+        !m_image_ctx.test_features(RBD_FEATURE_EXCLUSIVE_LOCK,
+                                   m_image_ctx.snap_lock)) {
+      // disabling exclusive lock will automatically handle closing
+      // object map and journaling
       std::swap(m_exclusive_lock, m_image_ctx.exclusive_lock);
-    }
-    if (!m_image_ctx.test_features(RBD_FEATURE_JOURNALING,
-                                   m_image_ctx.snap_lock) ||
-        m_journal != nullptr) {
-      std::swap(m_journal, m_image_ctx.journal);
-    }
-    if (!m_image_ctx.test_features(RBD_FEATURE_OBJECT_MAP,
-                                   m_image_ctx.snap_lock) ||
-        m_object_map != nullptr) {
-      std::swap(m_object_map, m_image_ctx.object_map);
+    } else {
+      if (m_exclusive_lock != nullptr) {
+        std::swap(m_exclusive_lock, m_image_ctx.exclusive_lock);
+      }
+      if (!m_image_ctx.test_features(RBD_FEATURE_JOURNALING,
+                                     m_image_ctx.snap_lock) ||
+          m_journal != nullptr) {
+        std::swap(m_journal, m_image_ctx.journal);
+      }
+      if (!m_image_ctx.test_features(RBD_FEATURE_OBJECT_MAP,
+                                     m_image_ctx.snap_lock) ||
+          m_object_map != nullptr) {
+        std::swap(m_object_map, m_image_ctx.object_map);
+      }
     }
   }
 }
diff --git a/src/librbd/image/RefreshRequest.h b/src/librbd/image/RefreshRequest.h
index 58451f5..8a08696 100644
--- a/src/librbd/image/RefreshRequest.h
+++ b/src/librbd/image/RefreshRequest.h
@@ -27,10 +27,11 @@ template<typename> class RefreshParentRequest;
 template<typename ImageCtxT = ImageCtx>
 class RefreshRequest {
 public:
-  static  RefreshRequest *create(ImageCtxT &image_ctx, Context *on_finish) {
+  static RefreshRequest *create(ImageCtxT &image_ctx, Context *on_finish) {
     return new RefreshRequest(image_ctx, on_finish);
   }
 
+  RefreshRequest(ImageCtxT &image_ctx, Context *on_finish);
   ~RefreshRequest();
 
   void send();
@@ -60,12 +61,12 @@ private:
    *            V2_INIT_EXCLUSIVE_LOCK (skip if lock          |
    *                |                   active or disabled)   |
    *                v                                         |
-   *            V2_OPEN_JOURNAL (skip if journal              |
-   *                |            active or disabled)          |
-   *                v                                         |
    *            V2_OPEN_OBJECT_MAP (skip if map               |
    *                |               active or disabled)       |
    *                v                                         |
+   *            V2_OPEN_JOURNAL (skip if journal              |
+   *                |            active or disabled)          |
+   *                v                                         |
    *             <apply>                                      |
    *                |                                         |
    *                v                                         |
@@ -124,8 +125,6 @@ private:
   std::string m_lock_tag;
   bool m_exclusive_locked;
 
-  RefreshRequest(ImageCtxT &image_ctx, Context *on_finish);
-
   void send_v1_read_header();
   Context *handle_v1_read_header(int *result);
 
@@ -165,6 +164,9 @@ private:
   Context *send_v2_close_journal();
   Context *handle_v2_close_journal(int *result);
 
+  Context *send_v2_close_object_map();
+  Context *handle_v2_close_object_map(int *result);
+
   Context *send_flush_aio();
   Context *handle_flush_aio(int *result);
 
diff --git a/src/librbd/internal.cc b/src/librbd/internal.cc
index eb77456..928f404 100644
--- a/src/librbd/internal.cc
+++ b/src/librbd/internal.cc
@@ -399,7 +399,7 @@ int validate_pool(IoCtx &io_ctx, CephContext *cct) {
     os << "[";
 
     for (image_options_t::const_iterator i = (*opts_)->begin();
-	 i != (*opts_)->end(); i++) {
+	 i != (*opts_)->end(); ++i) {
       os << (i == (*opts_)->begin() ? "" : ", ") << image_option_name(i->first)
 	 << "=" << i->second;
     }
@@ -1729,7 +1729,7 @@ int validate_pool(IoCtx &io_ctx, CephContext *cct) {
   }
 
   struct CopyProgressCtx {
-    CopyProgressCtx(ProgressContext &p)
+    explicit CopyProgressCtx(ProgressContext &p)
       : destictx(NULL), src_size(0), prog_ctx(p)
     { }
 
diff --git a/src/librbd/journal/Replay.h b/src/librbd/journal/Replay.h
index 96333e3..e0fad8a 100644
--- a/src/librbd/journal/Replay.h
+++ b/src/librbd/journal/Replay.h
@@ -9,7 +9,7 @@
 #include "include/Context.h"
 #include "include/rbd/librbd.hpp"
 #include "common/Mutex.h"
-#include "librbd/journal/Entries.h"
+#include "librbd/journal/Types.h"
 #include <boost/variant.hpp>
 #include <list>
 #include <set>
diff --git a/src/librbd/journal/Entries.cc b/src/librbd/journal/Types.cc
similarity index 59%
rename from src/librbd/journal/Entries.cc
rename to src/librbd/journal/Types.cc
index be350af..781f793 100644
--- a/src/librbd/journal/Entries.cc
+++ b/src/librbd/journal/Types.cc
@@ -1,7 +1,7 @@
 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
 // vim: ts=8 sw=2 smarttab
 
-#include "librbd/journal/Entries.h"
+#include "librbd/journal/Types.h"
 #include "include/assert.h"
 #include "include/stringify.h"
 #include "common/Formatter.h"
@@ -11,55 +11,58 @@ namespace journal {
 
 namespace {
 
-class GetEventTypeVistor : public boost::static_visitor<EventType> {
+template <typename E>
+class GetTypeVisitor : public boost::static_visitor<E> {
 public:
-  template <typename Event>
-  inline EventType operator()(const Event &event) const {
-    return Event::EVENT_TYPE;
+  template <typename T>
+  inline E operator()(const T&) const {
+    return T::TYPE;
   }
 };
 
-class EncodeEventVisitor : public boost::static_visitor<void> {
+class EncodeVisitor : public boost::static_visitor<void> {
 public:
-  EncodeEventVisitor(bufferlist &bl) : m_bl(bl) {
+  explicit EncodeVisitor(bufferlist &bl) : m_bl(bl) {
   }
 
-  template <typename Event>
-  inline void operator()(const Event &event) const {
-    ::encode(static_cast<uint32_t>(Event::EVENT_TYPE), m_bl);
-    event.encode(m_bl);
+  template <typename T>
+  inline void operator()(const T& t) const {
+    ::encode(static_cast<uint32_t>(T::TYPE), m_bl);
+    t.encode(m_bl);
   }
 private:
   bufferlist &m_bl;
 };
 
-class DecodeEventVisitor : public boost::static_visitor<void> {
+class DecodeVisitor : public boost::static_visitor<void> {
 public:
-  DecodeEventVisitor(__u8 version, bufferlist::iterator &iter)
+  DecodeVisitor(__u8 version, bufferlist::iterator &iter)
     : m_version(version), m_iter(iter) {
   }
 
-  template <typename Event>
-  inline void operator()(Event &event) const {
-    event.decode(m_version, m_iter);
+  template <typename T>
+  inline void operator()(T& t) const {
+    t.decode(m_version, m_iter);
   }
 private:
   __u8 m_version;
   bufferlist::iterator &m_iter;
 };
 
-class DumpEventVisitor : public boost::static_visitor<void> {
+class DumpVisitor : public boost::static_visitor<void> {
 public:
-  DumpEventVisitor(Formatter *formatter) : m_formatter(formatter) {}
-
-  template <typename Event>
-  inline void operator()(const Event &event) const {
-    EventType event_type = Event::EVENT_TYPE;
-    m_formatter->dump_string("event_type", stringify(event_type));
-    event.dump(m_formatter);
+  explicit DumpVisitor(Formatter *formatter, const std::string &key)
+    : m_formatter(formatter), m_key(key) {}
+
+  template <typename T>
+  inline void operator()(const T& t) const {
+    auto type = T::TYPE;
+    m_formatter->dump_string(m_key.c_str(), stringify(type));
+    t.dump(m_formatter);
   }
 private:
   ceph::Formatter *m_formatter;
+  std::string m_key;
 };
 
 } // anonymous namespace
@@ -207,12 +210,12 @@ void UnknownEvent::dump(Formatter *f) const {
 }
 
 EventType EventEntry::get_event_type() const {
-  return boost::apply_visitor(GetEventTypeVistor(), event);
+  return boost::apply_visitor(GetTypeVisitor<EventType>(), event);
 }
 
 void EventEntry::encode(bufferlist& bl) const {
   ENCODE_START(1, 1, bl);
-  boost::apply_visitor(EncodeEventVisitor(bl), event);
+  boost::apply_visitor(EncodeVisitor(bl), event);
   ENCODE_FINISH(bl);
 }
 
@@ -268,12 +271,12 @@ void EventEntry::decode(bufferlist::iterator& it) {
     break;
   }
 
-  boost::apply_visitor(DecodeEventVisitor(struct_v, it), event);
+  boost::apply_visitor(DecodeVisitor(struct_v, it), event);
   DECODE_FINISH(it);
 }
 
 void EventEntry::dump(Formatter *f) const {
-  boost::apply_visitor(DumpEventVisitor(f), event);
+  boost::apply_visitor(DumpVisitor(f, "event_type"), event);
 }
 
 void EventEntry::generate_test_instances(std::list<EventEntry *> &o) {
@@ -316,6 +319,136 @@ void EventEntry::generate_test_instances(std::list<EventEntry *> &o) {
   o.push_back(new EventEntry(FlattenEvent(123)));
 }
 
+// Journal Client
+
+void ImageClientMeta::encode(bufferlist& bl) const {
+  ::encode(tag_class, bl);
+}
+
+void ImageClientMeta::decode(__u8 version, bufferlist::iterator& it) {
+  ::decode(tag_class, it);
+}
+
+void ImageClientMeta::dump(Formatter *f) const {
+  f->dump_unsigned("tag_class", tag_class);
+}
+
+void MirrorPeerClientMeta::encode(bufferlist& bl) const {
+  ::encode(cluster_id, bl);
+  ::encode(pool_id, bl);
+  ::encode(image_id, bl);
+}
+
+void MirrorPeerClientMeta::decode(__u8 version, bufferlist::iterator& it) {
+  ::decode(cluster_id, it);
+  ::decode(pool_id, it);
+  ::decode(image_id, it);
+}
+
+void MirrorPeerClientMeta::dump(Formatter *f) const {
+  f->dump_string("cluster_id", cluster_id.c_str());
+  f->dump_int("pool_id", pool_id);
+  f->dump_string("image_id", image_id.c_str());
+}
+
+void CliClientMeta::encode(bufferlist& bl) const {
+}
+
+void CliClientMeta::decode(__u8 version, bufferlist::iterator& it) {
+}
+
+void CliClientMeta::dump(Formatter *f) const {
+}
+
+void UnknownClientMeta::encode(bufferlist& bl) const {
+  assert(false);
+}
+
+void UnknownClientMeta::decode(__u8 version, bufferlist::iterator& it) {
+}
+
+void UnknownClientMeta::dump(Formatter *f) const {
+}
+
+ClientMetaType ClientData::get_client_meta_type() const {
+  return boost::apply_visitor(GetTypeVisitor<ClientMetaType>(), client_meta);
+}
+
+void ClientData::encode(bufferlist& bl) const {
+  ENCODE_START(1, 1, bl);
+  boost::apply_visitor(EncodeVisitor(bl), client_meta);
+  ENCODE_FINISH(bl);
+}
+
+void ClientData::decode(bufferlist::iterator& it) {
+  DECODE_START(1, it);
+
+  uint32_t client_meta_type;
+  ::decode(client_meta_type, it);
+
+  // select the correct payload variant based upon the encoded op
+  switch (client_meta_type) {
+  case IMAGE_CLIENT_META_TYPE:
+    client_meta = ImageClientMeta();
+    break;
+  case MIRROR_PEER_CLIENT_META_TYPE:
+    client_meta = MirrorPeerClientMeta();
+    break;
+  case CLI_CLIENT_META_TYPE:
+    client_meta = CliClientMeta();
+    break;
+  default:
+    client_meta = UnknownClientMeta();
+    break;
+  }
+
+  boost::apply_visitor(DecodeVisitor(struct_v, it), client_meta);
+  DECODE_FINISH(it);
+}
+
+void ClientData::dump(Formatter *f) const {
+  boost::apply_visitor(DumpVisitor(f, "client_meta_type"), client_meta);
+}
+
+void ClientData::generate_test_instances(std::list<ClientData *> &o) {
+  o.push_back(new ClientData(ImageClientMeta()));
+  o.push_back(new ClientData(ImageClientMeta(123)));
+  o.push_back(new ClientData(MirrorPeerClientMeta()));
+  o.push_back(new ClientData(MirrorPeerClientMeta("cluster_id", 123, "image_id")));
+  o.push_back(new ClientData(CliClientMeta()));
+}
+
+// Journal Tag
+
+void TagData::encode(bufferlist& bl) const {
+  ::encode(cluster_id, bl);
+  ::encode(pool_id, bl);
+  ::encode(image_id, bl);
+  ::encode(predecessor_tag_tid, bl);
+  ::encode(predecessor_entry_tid, bl);
+}
+
+void TagData::decode(bufferlist::iterator& it) {
+  ::decode(cluster_id, it);
+  ::decode(pool_id, it);
+  ::decode(image_id, it);
+  ::decode(predecessor_tag_tid, it);
+  ::decode(predecessor_entry_tid, it);
+}
+
+void TagData::dump(Formatter *f) const {
+  f->dump_string("cluster_id", cluster_id.c_str());
+  f->dump_int("pool_id", pool_id);
+  f->dump_string("image_id", image_id.c_str());
+  f->dump_unsigned("predecessor_tag_tid", predecessor_tag_tid);
+  f->dump_unsigned("predecessor_entry_tid", predecessor_entry_tid);
+}
+
+void TagData::generate_test_instances(std::list<TagData *> &o) {
+  o.push_back(new TagData());
+  o.push_back(new TagData("cluster_id", 123, "image_id"));
+}
+
 } // namespace journal
 } // namespace librbd
 
@@ -369,3 +502,25 @@ std::ostream &operator<<(std::ostream &out,
   }
   return out;
 }
+
+std::ostream &operator<<(std::ostream &out,
+                         const librbd::journal::ClientMetaType &type) {
+  using namespace librbd::journal;
+
+  switch (type) {
+  case IMAGE_CLIENT_META_TYPE:
+    out << "Master Image";
+    break;
+  case MIRROR_PEER_CLIENT_META_TYPE:
+    out << "Mirror Peer";
+    break;
+  case CLI_CLIENT_META_TYPE:
+    out << "CLI Tool";
+    break;
+  default:
+    out << "Unknown (" << static_cast<uint32_t>(type) << ")";
+    break;
+  }
+  return out;
+
+}
diff --git a/src/librbd/journal/Entries.h b/src/librbd/journal/Types.h
similarity index 63%
rename from src/librbd/journal/Entries.h
rename to src/librbd/journal/Types.h
index 09218be..cf4a642 100644
--- a/src/librbd/journal/Entries.h
+++ b/src/librbd/journal/Types.h
@@ -1,8 +1,8 @@
 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
 // vim: ts=8 sw=2 smarttab
 
-#ifndef CEPH_LIBRBD_JOURNAL_ENTRIES_H
-#define CEPH_LIBRBD_JOURNAL_ENTRIES_H
+#ifndef CEPH_LIBRBD_JOURNAL_TYPES_H
+#define CEPH_LIBRBD_JOURNAL_TYPES_H
 
 #include "include/int_types.h"
 #include "include/buffer.h"
@@ -35,7 +35,7 @@ enum EventType {
 };
 
 struct AioDiscardEvent {
-  static const EventType EVENT_TYPE = EVENT_TYPE_AIO_DISCARD;
+  static const EventType TYPE = EVENT_TYPE_AIO_DISCARD;
 
   uint64_t offset;
   size_t length;
@@ -52,7 +52,7 @@ struct AioDiscardEvent {
 };
 
 struct AioWriteEvent {
-  static const EventType EVENT_TYPE = EVENT_TYPE_AIO_WRITE;
+  static const EventType TYPE = EVENT_TYPE_AIO_WRITE;
 
   uint64_t offset;
   size_t length;
@@ -70,7 +70,7 @@ struct AioWriteEvent {
 };
 
 struct AioFlushEvent {
-  static const EventType EVENT_TYPE = EVENT_TYPE_AIO_FLUSH;
+  static const EventType TYPE = EVENT_TYPE_AIO_FLUSH;
 
   void encode(bufferlist& bl) const;
   void decode(__u8 version, bufferlist::iterator& it);
@@ -92,7 +92,7 @@ protected:
 };
 
 struct OpFinishEvent : public OpEventBase {
-  static const EventType EVENT_TYPE = EVENT_TYPE_OP_FINISH;
+  static const EventType TYPE = EVENT_TYPE_OP_FINISH;
 
   int r;
 
@@ -122,7 +122,7 @@ protected:
 };
 
 struct SnapCreateEvent : public SnapEventBase {
-  static const EventType EVENT_TYPE = EVENT_TYPE_SNAP_CREATE;
+  static const EventType TYPE = EVENT_TYPE_SNAP_CREATE;
 
   SnapCreateEvent() {
   }
@@ -136,7 +136,7 @@ struct SnapCreateEvent : public SnapEventBase {
 };
 
 struct SnapRemoveEvent : public SnapEventBase {
-  static const EventType EVENT_TYPE = EVENT_TYPE_SNAP_REMOVE;
+  static const EventType TYPE = EVENT_TYPE_SNAP_REMOVE;
 
   SnapRemoveEvent() {
   }
@@ -150,7 +150,7 @@ struct SnapRemoveEvent : public SnapEventBase {
 };
 
 struct SnapRenameEvent : public SnapEventBase {
-  static const EventType EVENT_TYPE = EVENT_TYPE_SNAP_RENAME;
+  static const EventType TYPE = EVENT_TYPE_SNAP_RENAME;
 
   uint64_t snap_id;
 
@@ -167,7 +167,7 @@ struct SnapRenameEvent : public SnapEventBase {
 };
 
 struct SnapProtectEvent : public SnapEventBase {
-  static const EventType EVENT_TYPE = EVENT_TYPE_SNAP_PROTECT;
+  static const EventType TYPE = EVENT_TYPE_SNAP_PROTECT;
 
   SnapProtectEvent() {
   }
@@ -181,7 +181,7 @@ struct SnapProtectEvent : public SnapEventBase {
 };
 
 struct SnapUnprotectEvent : public SnapEventBase {
-  static const EventType EVENT_TYPE = EVENT_TYPE_SNAP_UNPROTECT;
+  static const EventType TYPE = EVENT_TYPE_SNAP_UNPROTECT;
 
   SnapUnprotectEvent() {
   }
@@ -195,7 +195,7 @@ struct SnapUnprotectEvent : public SnapEventBase {
 };
 
 struct SnapRollbackEvent : public SnapEventBase {
-  static const EventType EVENT_TYPE = EVENT_TYPE_SNAP_ROLLBACK;
+  static const EventType TYPE = EVENT_TYPE_SNAP_ROLLBACK;
 
   SnapRollbackEvent() {
   }
@@ -209,7 +209,7 @@ struct SnapRollbackEvent : public SnapEventBase {
 };
 
 struct RenameEvent : public OpEventBase {
-  static const EventType EVENT_TYPE = EVENT_TYPE_RENAME;
+  static const EventType TYPE = EVENT_TYPE_RENAME;
 
   std::string image_name;
 
@@ -225,7 +225,7 @@ struct RenameEvent : public OpEventBase {
 };
 
 struct ResizeEvent : public OpEventBase {
-  static const EventType EVENT_TYPE = EVENT_TYPE_RESIZE;
+  static const EventType TYPE = EVENT_TYPE_RESIZE;
 
   uint64_t size;
 
@@ -241,7 +241,7 @@ struct ResizeEvent : public OpEventBase {
 };
 
 struct FlattenEvent : public OpEventBase {
-  static const EventType EVENT_TYPE = EVENT_TYPE_FLATTEN;
+  static const EventType TYPE = EVENT_TYPE_FLATTEN;
 
   FlattenEvent() {
   }
@@ -254,7 +254,7 @@ struct FlattenEvent : public OpEventBase {
 };
 
 struct UnknownEvent {
-  static const EventType EVENT_TYPE = static_cast<EventType>(-1);
+  static const EventType TYPE = static_cast<EventType>(-1);
 
   void encode(bufferlist& bl) const;
   void decode(__u8 version, bufferlist::iterator& it);
@@ -293,12 +293,122 @@ struct EventEntry {
   static void generate_test_instances(std::list<EventEntry *> &o);
 };
 
+// Journal Client data structures
+
+enum ClientMetaType {
+  IMAGE_CLIENT_META_TYPE       = 0,
+  MIRROR_PEER_CLIENT_META_TYPE = 1,
+  CLI_CLIENT_META_TYPE         = 2
+};
+
+struct ImageClientMeta {
+  static const ClientMetaType TYPE = IMAGE_CLIENT_META_TYPE;
+
+  uint64_t tag_class = 0;
+
+  ImageClientMeta() {
+  }
+  ImageClientMeta(uint64_t tag_class) : tag_class(tag_class) {
+  }
+
+  void encode(bufferlist& bl) const;
+  void decode(__u8 version, bufferlist::iterator& it);
+  void dump(Formatter *f) const;
+};
+
+struct MirrorPeerClientMeta {
+  static const ClientMetaType TYPE = MIRROR_PEER_CLIENT_META_TYPE;
+
+  std::string cluster_id;
+  int64_t pool_id = 0;
+  std::string image_id;
+
+  MirrorPeerClientMeta() {
+  }
+  MirrorPeerClientMeta(const std::string &cluster_id, int64_t pool_id,
+                       const std::string &image_id)
+    : cluster_id(cluster_id), pool_id(pool_id), image_id(image_id) {
+  }
+
+  void encode(bufferlist& bl) const;
+  void decode(__u8 version, bufferlist::iterator& it);
+  void dump(Formatter *f) const;
+};
+
+struct CliClientMeta {
+  static const ClientMetaType TYPE = CLI_CLIENT_META_TYPE;
+
+  void encode(bufferlist& bl) const;
+  void decode(__u8 version, bufferlist::iterator& it);
+  void dump(Formatter *f) const;
+};
+
+struct UnknownClientMeta {
+  static const ClientMetaType TYPE = static_cast<ClientMetaType>(-1);
+
+  void encode(bufferlist& bl) const;
+  void decode(__u8 version, bufferlist::iterator& it);
+  void dump(Formatter *f) const;
+};
+
+typedef boost::variant<ImageClientMeta,
+                       MirrorPeerClientMeta,
+                       CliClientMeta,
+                       UnknownClientMeta> ClientMeta;
+
+struct ClientData {
+  ClientData() {
+  }
+  ClientData(const ClientMeta &client_meta) : client_meta(client_meta) {
+  }
+
+  ClientMeta client_meta;
+
+  ClientMetaType get_client_meta_type() const;
+
+  void encode(bufferlist& bl) const;
+  void decode(bufferlist::iterator& it);
+  void dump(Formatter *f) const;
+
+  static void generate_test_instances(std::list<ClientData *> &o);
+};
+
+// Journal Tag data structures
+
+struct TagData {
+  // owner of the tag (exclusive lock epoch)
+  std::string cluster_id;
+  int64_t pool_id = 0;
+  std::string image_id;
+
+  // mapping to last committed record of previous tag
+  uint64_t predecessor_tag_tid = 0;
+  uint64_t predecessor_entry_tid = 0;
+
+  TagData() {
+  }
+  TagData(const std::string &cluster_id, int64_t pool_id,
+          const std::string &image_id)
+    : cluster_id(cluster_id), pool_id(pool_id), image_id(image_id) {
+  }
+
+  void encode(bufferlist& bl) const;
+  void decode(bufferlist::iterator& it);
+  void dump(Formatter *f) const;
+
+  static void generate_test_instances(std::list<TagData *> &o);
+};
+
 } // namespace journal
 } // namespace librbd
 
 std::ostream &operator<<(std::ostream &out,
                          const librbd::journal::EventType &type);
+std::ostream &operator<<(std::ostream &out,
+                         const librbd::journal::ClientMetaType &type);
 
 WRITE_CLASS_ENCODER(librbd::journal::EventEntry);
+WRITE_CLASS_ENCODER(librbd::journal::ClientData);
+WRITE_CLASS_ENCODER(librbd::journal::TagData);
 
-#endif // CEPH_LIBRBD_JOURNAL_ENTRIES_H
+#endif // CEPH_LIBRBD_JOURNAL_TYPES_H
diff --git a/src/librbd/librbd.cc b/src/librbd/librbd.cc
index 3fda9db..4a47ab2 100644
--- a/src/librbd/librbd.cc
+++ b/src/librbd/librbd.cc
@@ -69,6 +69,69 @@ librbd::AioCompletion* get_aio_completion(librbd::RBD::AioCompletion *comp) {
   return reinterpret_cast<librbd::AioCompletion *>(comp->pc);
 }
 
+struct C_OpenComplete : public Context {
+  librbd::ImageCtx *ictx;
+  librbd::AioCompletion* comp;
+  void **ictxp;
+  bool reopen;
+  C_OpenComplete(librbd::ImageCtx *ictx, librbd::AioCompletion* comp,
+		 void **ictxp, bool reopen = false)
+    : ictx(ictx), comp(comp), ictxp(ictxp), reopen(reopen) {
+    comp->init_time(ictx, librbd::AIO_TYPE_OPEN);
+    comp->get();
+  }
+  virtual void finish(int r) {
+    ldout(ictx->cct, 20) << "C_OpenComplete::finish: r=" << r << dendl;
+    if (reopen) {
+      delete reinterpret_cast<librbd::ImageCtx*>(*ictxp);
+    }
+    if (r < 0) {
+      *ictxp = nullptr;
+      comp->fail(ictx->cct, r);
+    } else {
+      *ictxp = ictx;
+      comp->lock.Lock();
+      comp->complete(ictx->cct);
+      comp->put_unlock();
+    }
+  }
+};
+
+struct C_OpenAfterCloseComplete : public Context {
+  librbd::ImageCtx *ictx;
+  librbd::AioCompletion* comp;
+  void **ictxp;
+  C_OpenAfterCloseComplete(librbd::ImageCtx *ictx, librbd::AioCompletion* comp,
+			   void **ictxp)
+    : ictx(ictx), comp(comp), ictxp(ictxp) {
+  }
+  virtual void finish(int r) {
+    ldout(ictx->cct, 20) << "C_OpenAfterCloseComplete::finish: r=" << r
+			 << dendl;
+    ictx->state->open(new C_OpenComplete(ictx, comp, ictxp, true));
+  }
+};
+
+struct C_CloseComplete : public Context {
+  CephContext *cct;
+  librbd::AioCompletion* comp;
+  C_CloseComplete(librbd::ImageCtx *ictx, librbd::AioCompletion* comp)
+    : cct(ictx->cct), comp(comp) {
+    comp->init_time(ictx, librbd::AIO_TYPE_CLOSE);
+    comp->get();
+  }
+  virtual void finish(int r) {
+    ldout(cct, 20) << "C_CloseComplete::finish: r=" << r << dendl;
+    if (r < 0) {
+      comp->fail(cct, r);
+    } else {
+      comp->lock.Lock();
+      comp->complete(cct);
+      comp->put_unlock();
+    }
+  }
+};
+
 } // anonymous namespace
 
 namespace librbd {
@@ -137,6 +200,24 @@ namespace librbd {
     return 0;
   }
 
+  int RBD::aio_open(IoCtx& io_ctx, Image& image, const char *name,
+		    const char *snap_name, RBD::AioCompletion *c)
+  {
+    ImageCtx *ictx = new ImageCtx(name, "", snap_name, io_ctx, false);
+    TracepointProvider::initialize<tracepoint_traits>(get_cct(io_ctx));
+    tracepoint(librbd, aio_open_image_enter, ictx, ictx->name.c_str(), ictx->id.c_str(), ictx->snap_name.c_str(), ictx->read_only, c->pc);
+
+    if (image.ctx != NULL) {
+      reinterpret_cast<ImageCtx*>(image.ctx)->state->close(
+	new C_OpenAfterCloseComplete(ictx, get_aio_completion(c), &image.ctx));
+    } else {
+      ictx->state->open(new C_OpenComplete(ictx, get_aio_completion(c),
+					   &image.ctx));
+    }
+    tracepoint(librbd, aio_open_image_exit, 0);
+    return 0;
+  }
+
   int RBD::open_read_only(IoCtx& io_ctx, Image& image, const char *name,
 			  const char *snap_name)
   {
@@ -161,6 +242,24 @@ namespace librbd {
     return 0;
   }
 
+  int RBD::aio_open_read_only(IoCtx& io_ctx, Image& image, const char *name,
+			      const char *snap_name, RBD::AioCompletion *c)
+  {
+    ImageCtx *ictx = new ImageCtx(name, "", snap_name, io_ctx, true);
+    TracepointProvider::initialize<tracepoint_traits>(get_cct(io_ctx));
+    tracepoint(librbd, aio_open_image_enter, ictx, ictx->name.c_str(), ictx->id.c_str(), ictx->snap_name.c_str(), ictx->read_only, c->pc);
+
+    if (image.ctx != NULL) {
+      reinterpret_cast<ImageCtx*>(image.ctx)->state->close(
+	new C_OpenAfterCloseComplete(ictx, get_aio_completion(c), &image.ctx));
+    } else {
+      ictx->state->open(new C_OpenComplete(ictx, get_aio_completion(c),
+					   &image.ctx));
+    }
+    tracepoint(librbd, aio_open_image_exit, 0);
+    return 0;
+  }
+
   int RBD::create(IoCtx& io_ctx, const char *name, uint64_t size, int *order)
   {
     TracepointProvider::initialize<tracepoint_traits>(get_cct(io_ctx));
@@ -434,6 +533,22 @@ namespace librbd {
     return r;
   }
 
+  int Image::aio_close(RBD::AioCompletion *c)
+  {
+    if (!ctx) {
+      return -EINVAL;
+    }
+
+    ImageCtx *ictx = (ImageCtx *)ctx;
+    tracepoint(librbd, aio_close_image_enter, ictx, ictx->name.c_str(), ictx->id.c_str(), c->pc);
+
+    ictx->state->close(new C_CloseComplete(ictx, get_aio_completion(c)));
+    ctx = NULL;
+
+    tracepoint(librbd, aio_close_image_exit, 0);
+    return 0;
+  }
+
   int Image::resize(uint64_t size)
   {
     ImageCtx *ictx = (ImageCtx *)ctx;
@@ -853,7 +968,7 @@ namespace librbd {
     ImageCtx *ictx = (ImageCtx *)ctx;
     tracepoint(librbd, read_enter, ictx, ictx->name.c_str(), ictx->snap_name.c_str(), ictx->read_only, ofs, len);
     bufferptr ptr(len);
-    bl.push_back(ptr);
+    bl.push_back(std::move(ptr));
     int r = ictx->aio_work_queue->read(ofs, len, bl.c_str(), 0);
     tracepoint(librbd, read_exit, r);
     return r;
@@ -865,7 +980,7 @@ namespace librbd {
     tracepoint(librbd, read2_enter, ictx, ictx->name.c_str(), ictx->snap_name.c_str(),
 		ictx->read_only, ofs, len, op_flags);
     bufferptr ptr(len);
-    bl.push_back(ptr);
+    bl.push_back(std::move(ptr));
     int r = ictx->aio_work_queue->read(ofs, len, bl.c_str(), op_flags);
     tracepoint(librbd, read_exit, r);
     return r;
@@ -1575,6 +1690,22 @@ extern "C" int rbd_open(rados_ioctx_t p, const char *name, rbd_image_t *image,
   return r;
 }
 
+extern "C" int rbd_aio_open(rados_ioctx_t p, const char *name,
+			    rbd_image_t *image, const char *snap_name,
+			    rbd_completion_t c)
+{
+  librados::IoCtx io_ctx;
+  librados::IoCtx::from_rados_ioctx_t(p, io_ctx);
+  TracepointProvider::initialize<tracepoint_traits>(get_cct(io_ctx));
+  librbd::ImageCtx *ictx = new librbd::ImageCtx(name, "", snap_name, io_ctx,
+						false);
+  librbd::RBD::AioCompletion *comp = (librbd::RBD::AioCompletion *)c;
+  tracepoint(librbd, aio_open_image_enter, ictx, ictx->name.c_str(), ictx->id.c_str(), ictx->snap_name.c_str(), ictx->read_only, comp->pc);
+  ictx->state->open(new C_OpenComplete(ictx, get_aio_completion(comp), image));
+  tracepoint(librbd, aio_open_image_exit, 0);
+  return 0;
+}
+
 extern "C" int rbd_open_read_only(rados_ioctx_t p, const char *name,
 				  rbd_image_t *image, const char *snap_name)
 {
@@ -1595,6 +1726,22 @@ extern "C" int rbd_open_read_only(rados_ioctx_t p, const char *name,
   return r;
 }
 
+extern "C" int rbd_aio_open_read_only(rados_ioctx_t p, const char *name,
+				      rbd_image_t *image, const char *snap_name,
+				      rbd_completion_t c)
+{
+  librados::IoCtx io_ctx;
+  librados::IoCtx::from_rados_ioctx_t(p, io_ctx);
+  TracepointProvider::initialize<tracepoint_traits>(get_cct(io_ctx));
+  librbd::ImageCtx *ictx = new librbd::ImageCtx(name, "", snap_name, io_ctx,
+						true);
+  librbd::RBD::AioCompletion *comp = (librbd::RBD::AioCompletion *)c;
+  tracepoint(librbd, aio_open_image_enter, ictx, ictx->name.c_str(), ictx->id.c_str(), ictx->snap_name.c_str(), ictx->read_only, comp->pc);
+  ictx->state->open(new C_OpenComplete(ictx, get_aio_completion(comp), image));
+  tracepoint(librbd, aio_open_image_exit, 0);
+  return 0;
+}
+
 extern "C" int rbd_close(rbd_image_t image)
 {
   librbd::ImageCtx *ictx = (librbd::ImageCtx *)image;
@@ -1606,6 +1753,16 @@ extern "C" int rbd_close(rbd_image_t image)
   return r;
 }
 
+extern "C" int rbd_aio_close(rbd_image_t image, rbd_completion_t c)
+{
+  librbd::ImageCtx *ictx = (librbd::ImageCtx *)image;
+  librbd::RBD::AioCompletion *comp = (librbd::RBD::AioCompletion *)c;
+  tracepoint(librbd, aio_close_image_enter, ictx, ictx->name.c_str(), ictx->id.c_str(), comp->pc);
+  ictx->state->close(new C_CloseComplete(ictx, get_aio_completion(comp)));
+  tracepoint(librbd, aio_close_image_exit, 0);
+  return 0;
+}
+
 extern "C" int rbd_resize(rbd_image_t image, uint64_t size)
 {
   librbd::ImageCtx *ictx = (librbd::ImageCtx *)image;
diff --git a/src/librbd/object_map/LockRequest.h b/src/librbd/object_map/LockRequest.h
index 8f1ee6c..04c9a09 100644
--- a/src/librbd/object_map/LockRequest.h
+++ b/src/librbd/object_map/LockRequest.h
@@ -19,6 +19,9 @@ namespace object_map {
 template <typename ImageCtxT = ImageCtx>
 class LockRequest {
 public:
+  static LockRequest* create(ImageCtxT &image_ctx, Context *on_finish) {
+    return new LockRequest(image_ctx, on_finish);
+  }
   LockRequest(ImageCtxT &image_ctx, Context *on_finish);
 
   void send();
diff --git a/src/librbd/object_map/RefreshRequest.cc b/src/librbd/object_map/RefreshRequest.cc
index 84acba3..048f72f 100644
--- a/src/librbd/object_map/RefreshRequest.cc
+++ b/src/librbd/object_map/RefreshRequest.cc
@@ -9,6 +9,7 @@
 #include "librbd/ImageCtx.h"
 #include "librbd/ObjectMap.h"
 #include "librbd/object_map/InvalidateRequest.h"
+#include "librbd/object_map/LockRequest.h"
 #include "librbd/object_map/ResizeRequest.h"
 #include "librbd/Utils.h"
 #include "osdc/Striper.h"
@@ -41,7 +42,7 @@ void RefreshRequest<I>::send() {
       m_image_ctx.layout, m_image_ctx.get_image_size(m_snap_id));
   }
 
-  send_load();
+  send_lock();
 }
 
 template <typename I>
@@ -58,6 +59,35 @@ void RefreshRequest<I>::apply() {
 }
 
 template <typename I>
+void RefreshRequest<I>::send_lock() {
+  if (m_snap_id != CEPH_NOSNAP) {
+    send_load();
+    return;
+  }
+
+  CephContext *cct = m_image_ctx.cct;
+  std::string oid(ObjectMap::object_map_name(m_image_ctx.id, m_snap_id));
+  ldout(cct, 10) << this << " " << __func__ << ": oid=" << oid << dendl;
+
+  using klass = RefreshRequest<I>;
+  Context *ctx = create_context_callback<
+    klass, &klass::handle_lock>(this);
+
+  LockRequest<I> *req = LockRequest<I>::create(m_image_ctx, ctx);
+  req->send();
+}
+
+template <typename I>
+Context *RefreshRequest<I>::handle_lock(int *ret_val) {
+  CephContext *cct = m_image_ctx.cct;
+  ldout(cct, 10) << this << " " << __func__ << dendl;
+
+  assert(*ret_val == 0);
+  send_load();
+  return nullptr;
+}
+
+template <typename I>
 void RefreshRequest<I>::send_load() {
   CephContext *cct = m_image_ctx.cct;
   std::string oid(ObjectMap::object_map_name(m_image_ctx.id, m_snap_id));
diff --git a/src/librbd/object_map/RefreshRequest.h b/src/librbd/object_map/RefreshRequest.h
index 17a69a0..2c94d1e 100644
--- a/src/librbd/object_map/RefreshRequest.h
+++ b/src/librbd/object_map/RefreshRequest.h
@@ -26,8 +26,11 @@ public:
 private:
   /**
    * @verbatim
-   *                         (other errors)
-   * <start> -----> LOAD * * * * * * * > INVALIDATE ------------\
+   *
+   * <start> -----> LOCK (skip if snapshot)
+   *                  |
+   *                  v  (other errors)
+   *                LOAD * * * * * * * > INVALIDATE ------------\
    *                  |    *                                    |
    *                  |    * (-EINVAL or too small)             |
    *                  |    * * * * * * > INVALIDATE_AND_RESIZE  |
@@ -39,7 +42,10 @@ private:
    *                  |                      |  * * * * * * *   |
    *                  |                      |  *               |
    *                  |                      v  v               |
-   *                  \-----------------> <finish> <------------/
+   *                  \--------------------> LOCK <-------------/
+   *                                          |
+   *                                          v
+   *                                      <finish>
    * @endverbatim
    */
 
@@ -53,6 +59,9 @@ private:
   bool m_truncate_on_disk_object_map;
   bufferlist m_out_bl;
 
+  void send_lock();
+  Context *handle_lock(int *ret_val);
+
   void send_load();
   Context *handle_load(int *ret_val);
 
diff --git a/src/librbd/operation/Request.h b/src/librbd/operation/Request.h
index 54d8281..44ff5e2 100644
--- a/src/librbd/operation/Request.h
+++ b/src/librbd/operation/Request.h
@@ -9,7 +9,7 @@
 #include "common/RWLock.h"
 #include "librbd/Utils.h"
 #include "librbd/Journal.h"
-#include "librbd/journal/Entries.h"
+#include "librbd/journal/Types.h"
 
 namespace librbd {
 
diff --git a/src/log/Log.cc b/src/log/Log.cc
index eb9c545..46dbb71 100644
--- a/src/log/Log.cc
+++ b/src/log/Log.cc
@@ -9,13 +9,20 @@
 #include <iostream>
 #include <sstream>
 
+#include <boost/asio.hpp>
+#include <boost/iostreams/filtering_stream.hpp>
+#include <boost/iostreams/filter/zlib.hpp>
+#include <boost/shared_ptr.hpp>
+
 #include "common/errno.h"
 #include "common/safe_io.h"
 #include "common/Clock.h"
 #include "common/valgrind.h"
+#include "common/Formatter.h"
 #include "include/assert.h"
 #include "include/compat.h"
 #include "include/on_exit.h"
+#include "include/uuid.h"
 
 #define DEFAULT_MAX_NEW    100
 #define DEFAULT_MAX_RECENT 10000
@@ -45,6 +52,7 @@ Log::Log(SubsystemMap *s)
     m_fd(-1),
     m_syslog_log(-2), m_syslog_crash(-2),
     m_stderr_log(1), m_stderr_crash(-1),
+    m_graylog_log(-3), m_graylog_crash(-3),
     m_stop(false),
     m_max_new(DEFAULT_MAX_NEW),
     m_max_recent(DEFAULT_MAX_RECENT),
@@ -151,6 +159,30 @@ void Log::set_stderr_level(int log, int crash)
   pthread_mutex_unlock(&m_flush_mutex);
 }
 
+void Log::set_graylog_level(int log, int crash)
+{
+  pthread_mutex_lock(&m_flush_mutex);
+  m_graylog_log = log;
+  m_graylog_crash = crash;
+  pthread_mutex_unlock(&m_flush_mutex);
+}
+
+void Log::start_graylog()
+{
+  pthread_mutex_lock(&m_flush_mutex);
+  if (! m_graylog.get())
+    m_graylog = Graylog::Ref(new Graylog(m_subs, "dlog"));
+  pthread_mutex_unlock(&m_flush_mutex);
+}
+
+
+void Log::stop_graylog()
+{
+  pthread_mutex_lock(&m_flush_mutex);
+  m_graylog.reset();
+  pthread_mutex_unlock(&m_flush_mutex);
+}
+
 void Log::submit_entry(Entry *e)
 {
   pthread_mutex_lock(&m_queue_mutex);
@@ -240,6 +272,7 @@ void Log::_flush(EntryQueue *t, EntryQueue *requeue, bool crash)
     bool do_fd = m_fd >= 0 && should_log;
     bool do_syslog = m_syslog_crash >= e->m_prio && should_log;
     bool do_stderr = m_stderr_crash >= e->m_prio && should_log;
+    bool do_graylog2 = m_graylog_crash >= e->m_prio && should_log;
 
     e->hint_size();
     if (do_fd || do_syslog || do_stderr) {
@@ -271,6 +304,10 @@ void Log::_flush(EntryQueue *t, EntryQueue *requeue, bool crash)
         if (r < 0)
           cerr << "problem writing to " << m_log_file << ": " << cpp_strerror(r) << std::endl;
       }
+
+    }
+    if (do_graylog2 && m_graylog) {
+      m_graylog->log_entry(e);
     }
 
     requeue->enqueue(e);
@@ -289,7 +326,7 @@ void Log::_log_message(const char *s, bool crash)
   if ((crash ? m_syslog_crash : m_syslog_log) >= 0) {
     syslog(LOG_USER|LOG_DEBUG, "%s", s);
   }
-  
+
   if ((crash ? m_stderr_crash : m_stderr_log) >= 0) {
     cerr << s << std::endl;
   }
diff --git a/src/log/Log.h b/src/log/Log.h
index 57727d3..ba2dc41 100644
--- a/src/log/Log.h
+++ b/src/log/Log.h
@@ -6,11 +6,14 @@
 
 #include "common/Thread.h"
 
+#include <assert.h>
 #include <pthread.h>
+#include <boost/asio.hpp>
 
 #include "Entry.h"
 #include "EntryQueue.h"
 #include "SubsystemMap.h"
+#include "common/Graylog.h"
 
 namespace ceph {
 namespace log {
@@ -20,7 +23,7 @@ class Log : private Thread
   Log **m_indirect_this;
 
   SubsystemMap *m_subs;
-  
+
   pthread_mutex_t m_queue_mutex;
   pthread_mutex_t m_flush_mutex;
   pthread_cond_t m_cond_loggers;
@@ -37,6 +40,9 @@ class Log : private Thread
 
   int m_syslog_log, m_syslog_crash;
   int m_stderr_log, m_stderr_crash;
+  int m_graylog_log, m_graylog_crash;
+
+  Graylog::Ref m_graylog;
 
   bool m_stop;
 
@@ -51,7 +57,7 @@ class Log : private Thread
   void _log_message(const char *s, bool crash);
 
 public:
-  Log(SubsystemMap *s);
+  explicit Log(SubsystemMap *s);
   virtual ~Log();
 
   void set_flush_on_exit();
@@ -61,12 +67,18 @@ public:
   void set_log_file(std::string fn);
   void reopen_log_file();
 
-  void flush(); 
+  void flush();
 
   void dump_recent();
 
   void set_syslog_level(int log, int crash);
   void set_stderr_level(int log, int crash);
+  void set_graylog_level(int log, int crash);
+
+  void start_graylog();
+  void stop_graylog();
+
+  Graylog::Ref graylog() { return m_graylog; }
 
   Entry *create_entry(int level, int subsys);
   Entry *create_entry(int level, int subsys, size_t* expected_size);
diff --git a/src/log/Makefile.am b/src/log/Makefile.am
index b66e6cf..8dcaac0 100644
--- a/src/log/Makefile.am
+++ b/src/log/Makefile.am
@@ -1,6 +1,7 @@
 liblog_la_SOURCES = \
 	log/Log.cc \
 	log/SubsystemMap.cc
+
 noinst_LTLIBRARIES += liblog.la
 
 noinst_HEADERS += \
@@ -8,4 +9,3 @@ noinst_HEADERS += \
 	log/EntryQueue.h \
 	log/Log.h \
 	log/SubsystemMap.h
-
diff --git a/src/mds/Beacon.h b/src/mds/Beacon.h
index a63daff..29efb4a 100644
--- a/src/mds/Beacon.h
+++ b/src/mds/Beacon.h
@@ -67,7 +67,7 @@ class Beacon : public Dispatcher
   class C_MDS_BeaconSender : public Context {
     Beacon *beacon;
   public:
-    C_MDS_BeaconSender(Beacon *beacon_) : beacon(beacon_) {}
+    explicit C_MDS_BeaconSender(Beacon *beacon_) : beacon(beacon_) {}
     void finish(int r) {
       assert(beacon->lock.is_locked_by_me());
       beacon->sender = NULL;
diff --git a/src/mds/CDir.cc b/src/mds/CDir.cc
index 119fb32..f43c768 100644
--- a/src/mds/CDir.cc
+++ b/src/mds/CDir.cc
@@ -51,7 +51,7 @@ protected:
   MDSRank* get_mds() {return dir->cache->mds;}
 
 public:
-  CDirContext(CDir *d) : dir(d) {
+  explicit CDirContext(CDir *d) : dir(d) {
     assert(dir != NULL);
   }
 };
@@ -64,7 +64,7 @@ protected:
   MDSRank* get_mds() {return dir->cache->mds;}
 
 public:
-  CDirIOContext(CDir *d) : dir(d) {
+  explicit CDirIOContext(CDir *d) : dir(d) {
     assert(dir != NULL);
   }
 };
@@ -2714,7 +2714,7 @@ CDir *CDir::get_frozen_tree_root()
 
 class C_Dir_AuthUnpin : public CDirContext {
   public:
-  C_Dir_AuthUnpin(CDir *d) : CDirContext(d) {}
+  explicit C_Dir_AuthUnpin(CDir *d) : CDirContext(d) {}
   void finish(int r) {
     dir->auth_unpin(dir->get_inode());
   }
diff --git a/src/mds/CInode.cc b/src/mds/CInode.cc
index 038de0e..68ebd76 100644
--- a/src/mds/CInode.cc
+++ b/src/mds/CInode.cc
@@ -58,7 +58,7 @@ protected:
   CInode *in;
   MDSRank *get_mds() {return in->mdcache->mds;}
 public:
-  CInodeIOContext(CInode *in_) : in(in_) {
+  explicit CInodeIOContext(CInode *in_) : in(in_) {
     assert(in != NULL);
   }
 };
diff --git a/src/mds/InoTable.h b/src/mds/InoTable.h
index 02656b0..a767d46 100644
--- a/src/mds/InoTable.h
+++ b/src/mds/InoTable.h
@@ -26,7 +26,7 @@ class InoTable : public MDSTable {
   interval_set<inodeno_t> projected_free;
 
  public:
-  InoTable(MDSRank *m) : MDSTable(m, "inotable", true) { }
+  explicit InoTable(MDSRank *m) : MDSTable(m, "inotable", true) { }
 
   inodeno_t project_alloc_id(inodeno_t id=0);
   void apply_alloc_id(inodeno_t id);
diff --git a/src/mds/Locker.cc b/src/mds/Locker.cc
index aea88f5..25977fb 100644
--- a/src/mds/Locker.cc
+++ b/src/mds/Locker.cc
@@ -64,7 +64,7 @@ class LockerContext : public MDSInternalContextBase {
   }
 
   public:
-  LockerContext(Locker *locker_) : locker(locker_) {
+  explicit LockerContext(Locker *locker_) : locker(locker_) {
     assert(locker != NULL);
   }
 };
diff --git a/src/mds/LogEvent.h b/src/mds/LogEvent.h
index 26bf279..bf57db0 100644
--- a/src/mds/LogEvent.h
+++ b/src/mds/LogEvent.h
@@ -68,7 +68,7 @@ protected:
 public:
   LogSegment *_segment;
 
-  LogEvent(int t)
+  explicit LogEvent(int t)
     : _type(t), _start_off(0), _segment(0) { }
   virtual ~LogEvent() { }
 
diff --git a/src/mds/MDBalancer.cc b/src/mds/MDBalancer.cc
index 263c21a..4c42568 100644
--- a/src/mds/MDBalancer.cc
+++ b/src/mds/MDBalancer.cc
@@ -113,7 +113,7 @@ void MDBalancer::tick()
 
 class C_Bal_SendHeartbeat : public MDSInternalContext {
 public:
-  C_Bal_SendHeartbeat(MDSRank *mds_) : MDSInternalContext(mds_) { }
+  explicit C_Bal_SendHeartbeat(MDSRank *mds_) : MDSInternalContext(mds_) { }
   virtual void finish(int f) {
     mds->balancer->send_heartbeat();
   }
diff --git a/src/mds/MDCache.cc b/src/mds/MDCache.cc
index bcf7046..0441f84 100644
--- a/src/mds/MDCache.cc
+++ b/src/mds/MDCache.cc
@@ -143,7 +143,7 @@ protected:
     return mdcache->mds;
   }
 public:
-  MDCacheContext(MDCache *mdc_) : mdcache(mdc_) {}
+  explicit MDCacheContext(MDCache *mdc_) : mdcache(mdc_) {}
 };
 
 
@@ -163,7 +163,7 @@ protected:
     return mdcache->mds;
   }
 public:
-  MDCacheIOContext(MDCache *mdc_) : mdcache(mdc_) {}
+  explicit MDCacheIOContext(MDCache *mdc_) : mdcache(mdc_) {}
 };
 
 
@@ -574,7 +574,7 @@ void MDCache::_create_system_file_finish(MutationRef& mut, CDentry *dn, version_
 
 struct C_MDS_RetryOpenRoot : public MDSInternalContext {
   MDCache *cache;
-  C_MDS_RetryOpenRoot(MDCache *c) : MDSInternalContext(c->mds), cache(c) {}
+  explicit C_MDS_RetryOpenRoot(MDCache *c) : MDSInternalContext(c->mds), cache(c) {}
   void finish(int r) {
     if (r < 0)
       cache->mds->suicide();
@@ -4475,7 +4475,7 @@ void MDCache::handle_cache_rejoin_weak(MMDSCacheRejoin *weak)
 
 class C_MDC_RejoinGatherFinish : public MDCacheContext {
 public:
-  C_MDC_RejoinGatherFinish(MDCache *c) : MDCacheContext(c) {}
+  explicit C_MDC_RejoinGatherFinish(MDCache *c) : MDCacheContext(c) {}
   void finish(int r) {
     mdcache->rejoin_gather_finish();
   }
@@ -5633,7 +5633,7 @@ void MDCache::do_delayed_cap_imports()
 }
 
 struct C_MDC_OpenSnapParents : public MDCacheContext {
-  C_MDC_OpenSnapParents(MDCache *c) : MDCacheContext(c) {}
+  explicit C_MDC_OpenSnapParents(MDCache *c) : MDCacheContext(c) {}
   void finish(int r) {
     mdcache->open_snap_parents();
   }
@@ -7251,7 +7251,7 @@ void MDCache::check_memory_usage()
 
 class C_MDC_ShutdownCheck : public MDCacheContext {
 public:
-  C_MDC_ShutdownCheck(MDCache *m) : MDCacheContext(m) {}
+  explicit C_MDC_ShutdownCheck(MDCache *m) : MDCacheContext(m) {}
   void finish(int) {
     mdcache->shutdown_check();
   }
@@ -7406,7 +7406,7 @@ bool MDCache::shutdown_pass()
   assert(subtrees.empty());
 
   // Still replicas of mydir?
-  if (mydir->inode->is_replicated()) {
+  if ((mydir != NULL) && mydir->inode->is_replicated()) {
     // We do this because otherwise acks to locks could come in after
     // we cap the log.
     dout(7) << "waiting for mydir replicas to release: " << *mydir << dendl;
@@ -11957,3 +11957,13 @@ void MDCache::notify_osdmap_changed()
   stray_manager.update_op_limit();
 }
 
+void MDCache::handle_conf_change(const struct md_config_t *conf,
+			     const std::set <std::string> &changed)
+{
+  assert(mds->mds_lock.is_locked_by_me());
+
+  if (changed.count("mds_max_purge_ops")
+      || changed.count("mds_max_purge_ops_per_pg")) {
+    stray_manager.update_op_limit();
+  }
+}
diff --git a/src/mds/MDCache.h b/src/mds/MDCache.h
index 10641ea..3b44d0c 100644
--- a/src/mds/MDCache.h
+++ b/src/mds/MDCache.h
@@ -156,6 +156,9 @@ public:
     stray_manager.eval_stray(dn);
   }
 
+  void handle_conf_change(const struct md_config_t *conf,
+                          const std::set <std::string> &changed);
+
   void maybe_eval_stray(CInode *in, bool delay=false);
   bool is_readonly() { return readonly; }
   void force_readonly();
@@ -626,7 +629,7 @@ public:
   Migrator *migrator;
 
  public:
-  MDCache(MDSRank *m);
+  explicit MDCache(MDSRank *m);
   ~MDCache();
   
   // debug
diff --git a/src/mds/MDLog.cc b/src/mds/MDLog.cc
index bc27661..86a1945 100644
--- a/src/mds/MDLog.cc
+++ b/src/mds/MDLog.cc
@@ -104,7 +104,7 @@ class C_MDL_WriteError : public MDSIOContextBase {
   }
 
   public:
-  C_MDL_WriteError(MDLog *m) : mdlog(m) {}
+  explicit C_MDL_WriteError(MDLog *m) : mdlog(m) {}
 };
 
 
diff --git a/src/mds/MDLog.h b/src/mds/MDLog.h
index 558f723..663eac6 100644
--- a/src/mds/MDLog.h
+++ b/src/mds/MDLog.h
@@ -87,7 +87,7 @@ protected:
   class ReplayThread : public Thread {
     MDLog *log;
   public:
-    ReplayThread(MDLog *l) : log(l) {}
+    explicit ReplayThread(MDLog *l) : log(l) {}
     void* entry() {
       log->_replay_thread();
       return 0;
@@ -109,7 +109,7 @@ protected:
     MDSInternalContextBase *completion;
   public:
     void set_completion(MDSInternalContextBase *c) {completion = c;}
-    RecoveryThread(MDLog *l) : log(l), completion(NULL) {}
+    explicit RecoveryThread(MDLog *l) : log(l), completion(NULL) {}
     void* entry() {
       log->_recovery_thread(completion);
       return 0;
@@ -141,7 +141,7 @@ protected:
   class SubmitThread : public Thread {
     MDLog *log;
   public:
-    SubmitThread(MDLog *l) : log(l) {}
+    explicit SubmitThread(MDLog *l) : log(l) {}
     void* entry() {
       log->_submit_thread();
       return 0;
@@ -182,7 +182,7 @@ public:
   void set_write_iohint(unsigned iohint_flags);
 
 public:
-  MDLog(MDSRank *m) : mds(m),
+  explicit MDLog(MDSRank *m) : mds(m),
                       num_events(0), 
                       unflushed(0),
                       capped(false),
diff --git a/src/mds/MDSAuthCaps.h b/src/mds/MDSAuthCaps.h
index ccf2857..eee7707 100644
--- a/src/mds/MDSAuthCaps.h
+++ b/src/mds/MDSAuthCaps.h
@@ -74,7 +74,7 @@ struct MDSCapMatch {
 
   MDSCapMatch() : uid(MDS_AUTH_UID_ANY) {}
   MDSCapMatch(int64_t uid_, std::vector<gid_t>& gids_) : uid(uid_), gids(gids_) {}
-  MDSCapMatch(std::string path_)
+  explicit MDSCapMatch(std::string path_)
     : uid(MDS_AUTH_UID_ANY), path(path_) {
     normalize_path();
   }
@@ -119,11 +119,11 @@ class MDSAuthCaps
   std::vector<MDSCapGrant> grants;
 
 public:
-  MDSAuthCaps(CephContext *cct_=NULL)
+  explicit MDSAuthCaps(CephContext *cct_=NULL)
     : cct(cct_) { }
 
   // this ctor is used by spirit/phoenix; doesn't need cct.
-  MDSAuthCaps(const std::vector<MDSCapGrant> &grants_)
+  explicit MDSAuthCaps(const std::vector<MDSCapGrant> &grants_)
     : cct(NULL), grants(grants_) { }
 
   void set_allow_all();
diff --git a/src/mds/MDSContext.h b/src/mds/MDSContext.h
index cd49f3e..d26d1bd 100644
--- a/src/mds/MDSContext.h
+++ b/src/mds/MDSContext.h
@@ -55,7 +55,7 @@ protected:
   virtual MDSRank* get_mds();
 
 public:
-  MDSInternalContext(MDSRank *mds_) : mds(mds_) {
+  explicit MDSInternalContext(MDSRank *mds_) : mds(mds_) {
     assert(mds != NULL);
   }
 };
@@ -91,7 +91,7 @@ protected:
   virtual MDSRank* get_mds();
 
 public:
-  MDSIOContext(MDSRank *mds_) : mds(mds_) {
+  explicit MDSIOContext(MDSRank *mds_) : mds(mds_) {
     assert(mds != NULL);
   }
 };
diff --git a/src/mds/MDSDaemon.cc b/src/mds/MDSDaemon.cc
index 1e9f4f6..7e93a98 100644
--- a/src/mds/MDSDaemon.cc
+++ b/src/mds/MDSDaemon.cc
@@ -1,4 +1,4 @@
-// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- 
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
 // vim: ts=8 sw=2 smarttab
 /*
  * Ceph - scalable distributed file system
@@ -7,9 +7,9 @@
  *
  * This is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
- * License version 2.1, as published by the Free Software 
+ * License version 2.1, as published by the Free Software
  * Foundation.  See file COPYING.
- * 
+ *
  */
 
 #include <unistd.h>
@@ -99,7 +99,7 @@ class C_VoidFn : public Context
 };
 
 // cons/des
-MDSDaemon::MDSDaemon(const std::string &n, Messenger *m, MonClient *mc) : 
+MDSDaemon::MDSDaemon(const std::string &n, Messenger *m, MonClient *mc) :
   Dispatcher(m->cct),
   mds_lock("MDSDaemon::mds_lock"),
   stopping(false),
@@ -139,9 +139,9 @@ MDSDaemon::MDSDaemon(const std::string &n, Messenger *m, MonClient *mc) :
 MDSDaemon::~MDSDaemon() {
   Mutex::Locker lock(mds_lock);
 
-  delete mds_rank; 
-  mds_rank = NULL; 
-  delete objecter; 
+  delete mds_rank;
+  mds_rank = NULL;
+  delete objecter;
   objecter = NULL;
   delete mdsmap;
   mdsmap = NULL;
@@ -153,7 +153,7 @@ MDSDaemon::~MDSDaemon() {
 class MDSSocketHook : public AdminSocketHook {
   MDSDaemon *mds;
 public:
-  MDSSocketHook(MDSDaemon *m) : mds(m) {}
+  explicit MDSSocketHook(MDSDaemon *m) : mds(m) {}
   bool call(std::string command, cmdmap_t& cmdmap, std::string format,
 	    bufferlist& out) {
     stringstream ss;
@@ -178,23 +178,21 @@ bool MDSDaemon::asok_command(string command, cmdmap_t& cmdmap, string format,
       dout(1) << "Can't run that command on an inactive MDS!" << dendl;
       f->dump_string("error", "mds_not_active");
     } else {
-      handled =  mds_rank->handle_asok_command(command, cmdmap, f, ss);
+      handled = mds_rank->handle_asok_command(command, cmdmap, f, ss);
     }
-
   }
   f->flush(ss);
   delete f;
-  
+
   dout(1) << "asok_command: " << command << " (complete)" << dendl;
-  
+
   return handled;
 }
 
 void MDSDaemon::dump_status(Formatter *f)
 {
-  const OSDMap *osdmap = objecter->get_osdmap_read();
-  const epoch_t osd_epoch = osdmap->get_epoch();
-  objecter->put_osdmap_read();
+  const epoch_t osd_epoch = objecter->with_osdmap(
+    std::mem_fn(&OSDMap::get_epoch));
 
   f->open_object_section("status");
   f->dump_stream("cluster_fsid") << monc->get_fsid();
@@ -226,6 +224,7 @@ void MDSDaemon::set_up_admin_socket()
 {
   int r;
   AdminSocket *admin_socket = g_ceph_context->get_admin_socket();
+  assert(asok_hook == nullptr);
   asok_hook = new MDSSocketHook(this);
   r = admin_socket->register_command("status", "status", asok_hook,
 				     "high-level status of MDS");
@@ -238,6 +237,10 @@ void MDSDaemon::set_up_admin_socket()
 				     "ops", asok_hook,
 				     "show the ops currently in flight");
   assert(r == 0);
+  r = admin_socket->register_command("dump_blocked_ops", "dump_blocked_ops",
+      asok_hook,
+      "show the blocked ops currently in flight");
+  assert(r == 0);
   r = admin_socket->register_command("dump_historic_ops", "dump_historic_ops",
 				     asok_hook,
 				     "show slowest recent ops");
@@ -329,6 +332,7 @@ void MDSDaemon::clean_up_admin_socket()
   admin_socket->unregister_command("status");
   admin_socket->unregister_command("dump_ops_in_flight");
   admin_socket->unregister_command("ops");
+  admin_socket->unregister_command("dump_blocked_ops");
   admin_socket->unregister_command("dump_historic_ops");
   admin_socket->unregister_command("scrub_path");
   admin_socket->unregister_command("flush_path");
@@ -352,6 +356,14 @@ const char** MDSDaemon::get_tracked_conf_keys() const
     "clog_to_syslog",
     "clog_to_syslog_facility",
     "clog_to_syslog_level",
+    // StrayManager
+    "mds_max_purge_ops",
+    "mds_max_purge_ops_per_pg",
+    "clog_to_graylog",
+    "clog_to_graylog_host",
+    "clog_to_graylog_port",
+    "host",
+    "fsid",
     NULL
   };
   return KEYS;
@@ -360,7 +372,12 @@ const char** MDSDaemon::get_tracked_conf_keys() const
 void MDSDaemon::handle_conf_change(const struct md_config_t *conf,
 			     const std::set <std::string> &changed)
 {
-  Mutex::Locker l(mds_lock);
+  // We may be called within mds_lock (via `tell`) or outwith the
+  // lock (via admin socket `config set`), so handle either case.
+  const bool initially_locked = mds_lock.is_locked_by_me();
+  if (!initially_locked) {
+    mds_lock.Lock();
+  }
 
   if (changed.count("mds_op_complaint_time") ||
       changed.count("mds_op_log_threshold")) {
@@ -384,14 +401,29 @@ void MDSDaemon::handle_conf_change(const struct md_config_t *conf,
   if (changed.count("clog_to_monitors") ||
       changed.count("clog_to_syslog") ||
       changed.count("clog_to_syslog_level") ||
-      changed.count("clog_to_syslog_facility")) {
+      changed.count("clog_to_syslog_facility") ||
+      changed.count("clog_to_graylog") ||
+      changed.count("clog_to_graylog_host") ||
+      changed.count("clog_to_graylog_port") ||
+      changed.count("host") ||
+      changed.count("fsid")) {
     if (mds_rank) {
       mds_rank->update_log_config();
     }
   }
+
   if (!g_conf->mds_log_pause && changed.count("mds_log_pause")) {
-    if (mds_rank)
+    if (mds_rank) {
       mds_rank->mdlog->kick_submitter();
+    }
+  }
+
+  if (mds_rank) {
+    mds_rank->mdcache->handle_conf_change(conf, changed);
+  }
+
+  if (!initially_locked) {
+    mds_lock.Unlock();
   }
 }
 
@@ -431,7 +463,7 @@ int MDSDaemon::init(MDSMap::DaemonState wanted_state)
 
   // tell monc about log_client so it will know about mon session resets
   monc->set_log_client(&log_client);
-  
+
   int r = monc->authenticate();
   if (r < 0) {
     derr << "ERROR: failed to authenticate: " << cpp_strerror(-r) << dendl;
@@ -462,21 +494,27 @@ int MDSDaemon::init(MDSMap::DaemonState wanted_state)
   while (true) {
     objecter->maybe_request_map();
     objecter->wait_for_osd_map();
-    const OSDMap *osdmap = objecter->get_osdmap_read();
-    uint64_t osd_features = osdmap->get_up_osd_features();
-    if (osd_features & CEPH_FEATURE_OSD_TMAP2OMAP) {
-      objecter->put_osdmap_read();
+    if (objecter->with_osdmap([&](const OSDMap& o) {
+	  uint64_t osd_features = o.get_up_osd_features();
+	  if (osd_features & CEPH_FEATURE_OSD_TMAP2OMAP)
+	    return true;
+	  if (o.get_num_up_osds() > 0) {
+	    derr << "*** one or more OSDs do not support TMAP2OMAP; upgrade "
+		 << "OSDs before starting MDS (or downgrade MDS) ***" << dendl;
+	  } else {
+	    derr << "*** no OSDs are up as of epoch " << o.get_epoch()
+		 << ", waiting" << dendl;
+	  }
+	  return false;
+	}))
       break;
-    }
-    if (osdmap->get_num_up_osds() > 0) {
-        derr << "*** one or more OSDs do not support TMAP2OMAP; upgrade OSDs before starting MDS (or downgrade MDS) ***" << dendl;
-    } else {
-        derr << "*** no OSDs are up as of epoch " << osdmap->get_epoch() << ", waiting" << dendl;
-    }
-    objecter->put_osdmap_read();
     sleep(10);
   }
 
+  // Set up admin socket before taking mds_lock, so that ordering
+  // is consistent (later we take mds_lock within asok callbacks)
+  set_up_admin_socket();
+  g_conf->add_observer(this);
   mds_lock.Lock();
   if (beacon.get_want_state() == MDSMap::STATE_DNE) {
     suicide();  // we could do something more graceful here
@@ -521,13 +559,10 @@ int MDSDaemon::init(MDSMap::DaemonState wanted_state)
   }
   beacon.init(mdsmap, wanted_state, standby_for_rank, standby_for_name);
   messenger->set_myname(entity_name_t::MDS(MDS_RANK_NONE));
-  
+
   // schedule tick
   reset_tick();
-
-  set_up_admin_socket();
   g_conf->add_observer(this);
-
   mds_lock.Unlock();
 
   return 0;
@@ -681,7 +716,7 @@ int MDSDaemon::_handle_command(
     MDSDaemon *mds;
 
     public:
-    SuicideLater(MDSDaemon *mds_) : mds(mds_) {}
+    explicit SuicideLater(MDSDaemon *mds_) : mds(mds_) {}
     void finish(int r) {
       // Wait a little to improve chances of caller getting
       // our response before seeing us disappear from mdsmap
@@ -698,7 +733,7 @@ int MDSDaemon::_handle_command(
 
     public:
 
-    RespawnLater(MDSDaemon *mds_) : mds(mds_) {}
+    explicit RespawnLater(MDSDaemon *mds_) : mds(mds_) {}
     void finish(int r) {
       // Wait a little to improve chances of caller getting
       // our response before seeing us disappear from mdsmap
@@ -864,7 +899,7 @@ void MDSDaemon::handle_mds_map(MMDSMap *m)
 
   // is it new?
   if (epoch <= mdsmap->get_epoch()) {
-    dout(5) << " old map epoch " << epoch << " <= " << mdsmap->get_epoch() 
+    dout(5) << " old map epoch " << epoch << " <= " << mdsmap->get_epoch()
 	    << ", discarding" << dendl;
     m->put();
     return;
@@ -980,7 +1015,7 @@ void MDSDaemon::handle_mds_map(MMDSMap *m)
     }
 
     // MDSRank is active: let him process the map, we have no say.
-    dout(10) <<  __func__ << ": handling map as rank " 
+    dout(10) <<  __func__ << ": handling map as rank "
              << mds_rank->get_nodeid() << dendl;
     mds_rank->handle_mds_map(m, oldmap);
   }
@@ -1014,7 +1049,7 @@ void MDSDaemon::_handle_mds_map(MDSMap *oldmap)
     beacon.set_want_state(mdsmap, new_state);
     return;
   }
-  
+
   // Case where we have sent a boot beacon that isn't reflected yet
   if (beacon.get_want_state() == MDSMap::STATE_BOOT) {
     dout(10) << "not in map yet" << dendl;
@@ -1048,7 +1083,7 @@ void MDSDaemon::suicide()
 
   //because add_observer is called after set_up_admin_socket
   //so we can use asok_hook to avoid assert in the remove_observer
-  if (asok_hook != NULL) 
+  if (asok_hook != NULL)
     g_conf->remove_observer(this);
 
   clean_up_admin_socket();
@@ -1138,7 +1173,7 @@ bool MDSDaemon::ms_dispatch(Message *m)
   }
 
   // First see if it's a daemon message
-  const bool handled_core = handle_core_message(m); 
+  const bool handled_core = handle_core_message(m);
   if (handled_core) {
     return true;
   }
@@ -1190,7 +1225,7 @@ bool MDSDaemon::handle_core_message(Message *m)
   case MSG_MON_COMMAND:
     ALLOW_MESSAGES_FROM(CEPH_ENTITY_TYPE_MON);
     handle_command(static_cast<MMonCommand*>(m));
-    break;    
+    break;
 
     // OSD
   case MSG_COMMAND:
@@ -1210,11 +1245,11 @@ bool MDSDaemon::handle_core_message(Message *m)
   return true;
 }
 
-void MDSDaemon::ms_handle_connect(Connection *con) 
+void MDSDaemon::ms_handle_connect(Connection *con)
 {
 }
 
-bool MDSDaemon::ms_handle_reset(Connection *con) 
+bool MDSDaemon::ms_handle_reset(Connection *con)
 {
   if (con->get_peer_type() != CEPH_ENTITY_TYPE_CLIENT)
     return false;
@@ -1242,7 +1277,7 @@ bool MDSDaemon::ms_handle_reset(Connection *con)
 }
 
 
-void MDSDaemon::ms_handle_remote_reset(Connection *con) 
+void MDSDaemon::ms_handle_remote_reset(Connection *con)
 {
   if (con->get_peer_type() != CEPH_ENTITY_TYPE_CLIENT)
     return;
@@ -1312,7 +1347,7 @@ bool MDSDaemon::ms_verify_authorizer(Connection *con, int peer_type,
       // a new connection, rather than a new client
       s = mds_rank->sessionmap.get_session(n);
     }
-    
+
     // Wire up a Session* to this connection
     // It doesn't go into a SessionMap instance until it sends an explicit
     // request to open a session (initial state of Session is `closed`)
@@ -1406,4 +1441,3 @@ bool MDSDaemon::is_clean_shutdown()
     return true;
   }
 }
-
diff --git a/src/mds/MDSDaemon.h b/src/mds/MDSDaemon.h
index 8c61ea9..783f37e 100644
--- a/src/mds/MDSDaemon.h
+++ b/src/mds/MDSDaemon.h
@@ -130,7 +130,7 @@ class MDSDaemon : public Dispatcher, public md_config_obs_t {
     protected:
       MDSDaemon *mds_daemon;
   public:
-    C_MDS_Tick(MDSDaemon *m) : mds_daemon(m) {}
+    explicit C_MDS_Tick(MDSDaemon *m) : mds_daemon(m) {}
     void finish(int r) {
       assert(mds_daemon->mds_lock.is_locked_by_me());
 
diff --git a/src/mds/MDSRank.cc b/src/mds/MDSRank.cc
index a836dfb..44d6e3d 100644
--- a/src/mds/MDSRank.cc
+++ b/src/mds/MDSRank.cc
@@ -1,4 +1,4 @@
-// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- 
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
 // vim: ts=8 sw=2 smarttab
 /*
  * Ceph - scalable distributed file system
@@ -7,9 +7,9 @@
  *
  * This is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
- * License version 2.1, as published by the Free Software 
+ * License version 2.1, as published by the Free Software
  * Foundation.  See file COPYING.
- * 
+ *
  */
 
 #include "common/debug.h"
@@ -58,7 +58,7 @@ MDSRank::MDSRank(
     balancer(NULL), scrubstack(NULL),
     inotable(NULL), snapserver(NULL), snapclient(NULL),
     sessionmap(this), logger(NULL), mlogger(NULL),
-    op_tracker(g_ceph_context, g_conf->mds_enable_op_tracker, 
+    op_tracker(g_ceph_context, g_conf->mds_enable_op_tracker,
                g_conf->osd_num_op_tracker_shard),
     last_state(MDSMap::STATE_BOOT),
     state(MDSMap::STATE_BOOT),
@@ -175,7 +175,7 @@ void MDSRankDispatcher::tick()
   // log
   utime_t now = ceph_clock_now(g_ceph_context);
   mds_load_t load = balancer->get_load(now);
-  
+
   if (logger) {
     logger->set(l_mds_load_cent, 100 * load.mds_load());
     logger->set(l_mds_dispatch_queue_len, messenger->get_dispatch_queue_len());
@@ -189,10 +189,10 @@ void MDSRankDispatcher::tick()
     server->find_idle_sessions();
     locker->tick();
   }
-  
+
   if (is_reconnect())
     server->reconnect_tick();
-  
+
   if (is_active()) {
     balancer->tick();
     mdcache->find_stale_fragment_freeze();
@@ -254,7 +254,7 @@ class C_VoidFn : public MDSInternalContext
 {
   typedef void (MDSRank::*fn_ptr)();
   protected:
-   fn_ptr fn; 
+   fn_ptr fn;
   public:
   C_VoidFn(MDSRank *mds_, fn_ptr fn_)
     : MDSInternalContext(mds_), fn(fn_)
@@ -447,7 +447,7 @@ bool MDSRank::_dispatch(Message *m, bool new_msg)
   // hack: thrash exports
   static utime_t start;
   utime_t now = ceph_clock_now(g_ceph_context);
-  if (start == utime_t()) 
+  if (start == utime_t())
     start = now;
   /*double el = now - start;
   if (el > 30.0 &&
@@ -456,11 +456,11 @@ bool MDSRank::_dispatch(Message *m, bool new_msg)
     set<mds_rank_t> s;
     if (!is_active()) break;
     mdsmap->get_mds_set(s, MDSMap::STATE_ACTIVE);
-    if (s.size() < 2 || mdcache->get_num_inodes() < 10) 
+    if (s.size() < 2 || mdcache->get_num_inodes() < 10)
       break;  // need peers for this to work.
 
     dout(7) << "mds thrashing exports pass " << (i+1) << "/" << g_conf->mds_thrash_exports << dendl;
-    
+
     // pick a random dir inode
     CInode *in = mdcache->hack_pick_random_inode();
 
@@ -474,7 +474,7 @@ bool MDSRank::_dispatch(Message *m, bool new_msg)
       CDir *dir = *p;
       if (!dir->get_parent_dir()) continue;    // must be linked.
       if (!dir->is_auth()) continue;           // must be auth.
-  
+
       mds_rank_t dest;
       do {
         int k = rand() % s.size();
@@ -490,7 +490,7 @@ bool MDSRank::_dispatch(Message *m, bool new_msg)
     if (!is_active()) break;
     if (mdcache->get_num_fragmenting_dirs() > 5) break;
     dout(7) << "mds thrashing fragments pass " << (i+1) << "/" << g_conf->mds_thrash_fragments << dendl;
-    
+
     // pick a random dir inode
     CInode *in = mdcache->hack_pick_random_inode();
 
@@ -512,7 +512,7 @@ bool MDSRank::_dispatch(Message *m, bool new_msg)
   if (false &&
       mdcache->get_root() &&
       mdcache->get_root()->dir &&
-      !(mdcache->get_root()->dir->is_hashed() || 
+      !(mdcache->get_root()->dir->is_hashed() ||
         mdcache->get_root()->dir->is_hashing())) {
     dout(0) << "hashing root" << dendl;
     mdcache->migrator->hash_dir(mdcache->get_root()->dir);
@@ -563,12 +563,12 @@ bool MDSRank::handle_deferrable_message(Message *m)
     ALLOW_MESSAGES_FROM(CEPH_ENTITY_TYPE_MDS);
     mdcache->dispatch(m);
     break;
-    
+
   case MDS_PORT_MIGRATOR:
     ALLOW_MESSAGES_FROM(CEPH_ENTITY_TYPE_MDS);
     mdcache->migrator->dispatch(m);
     break;
-    
+
   default:
     switch (m->get_type()) {
       // SERVER
@@ -583,12 +583,12 @@ bool MDSRank::handle_deferrable_message(Message *m)
       ALLOW_MESSAGES_FROM(CEPH_ENTITY_TYPE_MDS);
       server->dispatch(m);
       break;
-      
+
     case MSG_MDS_HEARTBEAT:
       ALLOW_MESSAGES_FROM(CEPH_ENTITY_TYPE_MDS);
       balancer->proc_message(m);
       break;
-	  
+
     case MSG_MDS_TABLE_REQUEST:
       ALLOW_MESSAGES_FROM(CEPH_ENTITY_TYPE_MDS);
       {
@@ -608,14 +608,14 @@ bool MDSRank::handle_deferrable_message(Message *m)
       ALLOW_MESSAGES_FROM(CEPH_ENTITY_TYPE_MDS);
       locker->dispatch(m);
       break;
-      
+
     case CEPH_MSG_CLIENT_CAPS:
     case CEPH_MSG_CLIENT_CAPRELEASE:
     case CEPH_MSG_CLIENT_LEASE:
       ALLOW_MESSAGES_FROM(CEPH_ENTITY_TYPE_CLIENT);
       locker->dispatch(m);
       break;
-      
+
     default:
       return false;
     }
@@ -715,7 +715,7 @@ bool MDSRank::is_stale_message(Message *m)
 
 
 void MDSRank::send_message(Message *m, Connection *c)
-{ 
+{
   assert(c);
   c->send_message(m);
 }
@@ -731,7 +731,7 @@ void MDSRank::send_message_mds(Message *m, mds_rank_t mds)
 
   // send mdsmap first?
   if (mds != whoami && peer_mdsmap_epoch[mds] < mdsmap->get_epoch()) {
-    messenger->send_message(new MMDSMap(monc->get_fsid(), mdsmap), 
+    messenger->send_message(new MMDSMap(monc->get_fsid(), mdsmap),
 			    mdsmap->get_inst(mds));
     peer_mdsmap_epoch[mds] = mdsmap->get_epoch();
   }
@@ -756,17 +756,17 @@ void MDSRank::forward_message_mds(Message *m, mds_rank_t mds)
      * the affected metadata may migrate, in which case the new authority
      * won't have the metareq_id in the completed request map.
      */
-    // NEW: always make the client resend!  
+    // NEW: always make the client resend!
     bool client_must_resend = true;  //!creq->can_forward();
 
     // tell the client where it should go
     messenger->send_message(new MClientRequestForward(creq->get_tid(), mds, creq->get_num_fwd(),
 						      client_must_resend),
 			    creq->get_source_inst());
-    
+
     if (client_must_resend) {
       m->put();
-      return; 
+      return;
     }
   }
 
@@ -778,7 +778,7 @@ void MDSRank::forward_message_mds(Message *m, mds_rank_t mds)
 
   // send mdsmap first?
   if (peer_mdsmap_epoch[mds] < mdsmap->get_epoch()) {
-    messenger->send_message(new MMDSMap(monc->get_fsid(), mdsmap), 
+    messenger->send_message(new MMDSMap(monc->get_fsid(), mdsmap),
 			    mdsmap->get_inst(mds));
     peer_mdsmap_epoch[mds] = mdsmap->get_epoch();
   }
@@ -1005,7 +1005,7 @@ void MDSRank::replay_start()
 
   if (is_standby_replay())
     standby_replaying = true;
-  
+
   calc_recovery_set();
 
   // Check if we need to wait for a newer OSD map before starting
@@ -1018,7 +1018,7 @@ void MDSRank::replay_start()
     delete fin;
     boot_start();
   } else {
-    dout(1) << " waiting for osdmap " << mdsmap->get_last_failure_osd_epoch() 
+    dout(1) << " waiting for osdmap " << mdsmap->get_last_failure_osd_epoch()
 	    << " (which blacklists prior instance)" << dendl;
   }
 }
@@ -1072,7 +1072,7 @@ inline void MDSRank::standby_replay_restart()
           this,
 	  mdlog->get_journaler()->get_read_pos()));
     } else {
-      dout(1) << " waiting for osdmap " << mdsmap->get_last_failure_osd_epoch() 
+      dout(1) << " waiting for osdmap " << mdsmap->get_last_failure_osd_epoch()
               << " (which blacklists prior instance)" << dendl;
     }
   }
@@ -1080,7 +1080,7 @@ inline void MDSRank::standby_replay_restart()
 
 class MDSRank::C_MDS_StandbyReplayRestart : public MDSInternalContext {
 public:
-  C_MDS_StandbyReplayRestart(MDSRank *m) : MDSInternalContext(m) {}
+  explicit C_MDS_StandbyReplayRestart(MDSRank *m) : MDSInternalContext(m) {}
   void finish(int r) {
     assert(!r);
     mds->standby_replay_restart();
@@ -1146,7 +1146,7 @@ void MDSRank::replay_done()
 
   if (mdsmap->get_num_in_mds() == 1 &&
       mdsmap->get_num_failed_mds() == 0) { // just me!
-    dout(2) << "i am alone, moving to state reconnect" << dendl;      
+    dout(2) << "i am alone, moving to state reconnect" << dendl;
     request_state(MDSMap::STATE_RECONNECT);
   } else {
     dout(2) << "i am not alone, moving to state resolve" << dendl;
@@ -1276,7 +1276,7 @@ void MDSRank::recovery_done(int oldstate)
 {
   dout(1) << "recovery_done -- successful recovery!" << dendl;
   assert(is_clientreplay() || is_active());
-  
+
   // kick snaptable (resent AGREEs)
   if (mdsmap->get_tableserver() == whoami) {
     set<mds_rank_t> active;
@@ -1394,7 +1394,7 @@ void MDSRankDispatcher::handle_mds_map(
   version_t epoch = m->get_epoch();
 
   // note source's map version
-  if (m->get_source().is_mds() && 
+  if (m->get_source().is_mds() &&
       peer_mdsmap_epoch[mds_rank_t(m->get_source().num())] < epoch) {
     dout(15) << " peer " << m->get_source()
 	     << " has mdsmap epoch >= " << epoch
@@ -1490,7 +1490,7 @@ void MDSRankDispatcher::handle_mds_map(
       }
     }
   }
-  
+
   // RESOLVE
   // is someone else newly resolving?
   if (is_resolve() || is_reconnect() || is_rejoin() ||
@@ -1503,7 +1503,7 @@ void MDSRankDispatcher::handle_mds_map(
       mdcache->send_resolves();
     }
   }
-  
+
   // REJOIN
   // is everybody finally rejoining?
   if (is_rejoin() || is_clientreplay() || is_active() || is_stopping()) {
@@ -1513,7 +1513,7 @@ void MDSRankDispatcher::handle_mds_map(
 
     // did we finish?
     if (g_conf->mds_dump_cache_after_rejoin &&
-	oldmap->is_rejoining() && !mdsmap->is_rejoining()) 
+	oldmap->is_rejoining() && !mdsmap->is_rejoining())
       mdcache->dump_cache();      // for DEBUG only
 
     if (oldstate >= MDSMap::STATE_REJOIN) {
@@ -1545,7 +1545,7 @@ void MDSRankDispatcher::handle_mds_map(
     oldmap->get_mds_set(oldactive, MDSMap::STATE_CLIENTREPLAY);
     mdsmap->get_mds_set(active, MDSMap::STATE_ACTIVE);
     mdsmap->get_mds_set(active, MDSMap::STATE_CLIENTREPLAY);
-    for (set<mds_rank_t>::iterator p = active.begin(); p != active.end(); ++p) 
+    for (set<mds_rank_t>::iterator p = active.begin(); p != active.end(); ++p)
       if (*p != whoami &&            // not me
 	  oldactive.count(*p) == 0)  // newly so?
 	handle_mds_recovery(*p);
@@ -1584,7 +1584,7 @@ void MDSRankDispatcher::handle_mds_map(
     set<mds_rank_t> oldstopped, stopped;
     oldmap->get_stopped_mds_set(oldstopped);
     mdsmap->get_stopped_mds_set(stopped);
-    for (set<mds_rank_t>::iterator p = stopped.begin(); p != stopped.end(); ++p) 
+    for (set<mds_rank_t>::iterator p = stopped.begin(); p != stopped.end(); ++p)
       if (oldstopped.count(*p) == 0)      // newly so?
 	mdcache->migrator->handle_mds_failure_or_stop(*p);
   }
@@ -1606,10 +1606,8 @@ void MDSRankDispatcher::handle_mds_map(
     // Before going active, set OSD epoch barrier to latest (so that
     // we don't risk handing out caps to clients with old OSD maps that
     // might not include barriers from the previous incarnation of this MDS)
-    const OSDMap *osdmap = objecter->get_osdmap_read();
-    const epoch_t osd_epoch = osdmap->get_epoch();
-    objecter->put_osdmap_read();
-    set_osd_epoch_barrier(osd_epoch);
+    set_osd_epoch_barrier(objecter->with_osdmap(
+			    std::mem_fn(&OSDMap::get_epoch)));
   }
 
   if (is_active()) {
@@ -1634,10 +1632,10 @@ void MDSRankDispatcher::handle_mds_map(
   mdcache->notify_mdsmap_changed();
 }
 
-void MDSRank::handle_mds_recovery(mds_rank_t who) 
+void MDSRank::handle_mds_recovery(mds_rank_t who)
 {
   dout(5) << "handle_mds_recovery mds." << who << dendl;
-  
+
   mdcache->handle_mds_recovery(who);
 
   if (mdsmap->get_tableserver() == whoami) {
@@ -1669,31 +1667,39 @@ bool MDSRankDispatcher::handle_asok_command(
              command == "ops") {
     RWLock::RLocker l(op_tracker.lock);
     if (!op_tracker.tracking_enabled) {
-      ss << "op_tracker tracking is not enabled";
+      ss << "op_tracker tracking is not enabled now, so no ops are tracked currently, even those get stuck. \
+	  please enable \"osd_enable_op_tracker\", and the tracker will start to track new ops received afterwards.";
     } else {
       op_tracker.dump_ops_in_flight(f);
     }
+  } else if (command == "dump_blocked_ops") {
+    if (!op_tracker.tracking_enabled) {
+      ss << "op_tracker tracking is not enabled now, so no ops are tracked currently, even those get stuck. \
+	Please enable \"osd_enable_op_tracker\", and the tracker will start to track new ops received afterwards.";
+    } else {
+      op_tracker.dump_ops_in_flight(f, true);
+    }
   } else if (command == "dump_historic_ops") {
     RWLock::RLocker l(op_tracker.lock);
     if (!op_tracker.tracking_enabled) {
-      ss << "op_tracker tracking is not enabled";
+      ss << "op_tracker tracking is not enabled now, so no ops are tracked currently, even those get stuck. \
+	  please enable \"osd_enable_op_tracker\", and the tracker will start to track new ops received afterwards.";
     } else {
       op_tracker.dump_historic_ops(f);
     }
   } else if (command == "osdmap barrier") {
     int64_t target_epoch = 0;
     bool got_val = cmd_getval(g_ceph_context, cmdmap, "target_epoch", target_epoch);
-    
+
     if (!got_val) {
       ss << "no target epoch given";
-      delete f;
       return true;
     }
-    
+
     mds_lock.Lock();
     set_osd_epoch_barrier(target_epoch);
     mds_lock.Unlock();
-    
+
     C_SaferCond cond;
     bool already_got = objecter->wait_for_map(target_epoch, &cond);
     if (!already_got) {
@@ -1702,24 +1708,24 @@ bool MDSRankDispatcher::handle_asok_command(
     }
   } else if (command == "session ls") {
     mds_lock.Lock();
-    
+
     heartbeat_reset();
-    
+
     dump_sessions(SessionFilter(), f);
-    
+
     mds_lock.Unlock();
   } else if (command == "session evict") {
     std::string client_id;
     const bool got_arg = cmd_getval(g_ceph_context, cmdmap, "client_id", client_id);
     assert(got_arg == true);
-    
+
     mds_lock.Lock();
     Session *session = sessionmap.get_session(entity_name_t(CEPH_ENTITY_TYPE_CLIENT,
                                                             strtol(client_id.c_str(), 0, 10)));
     if (session) {
       C_SaferCond on_safe;
       server->kill_session(session, &on_safe);
-      
+
       mds_lock.Unlock();
       on_safe.wait();
     } else {
@@ -1748,13 +1754,11 @@ bool MDSRankDispatcher::handle_asok_command(
     string path;
     if(!cmd_getval(g_ceph_context, cmdmap, "path", path)) {
       ss << "malformed path";
-      delete f;
       return true;
     }
     int64_t rank;
     if(!cmd_getval(g_ceph_context, cmdmap, "rank", rank)) {
       ss << "malformed rank";
-      delete f;
       return true;
     }
     command_export_dir(f, path, (mds_rank_t)rank);
@@ -1847,13 +1851,13 @@ void MDSRankDispatcher::dump_sessions(const SessionFilter &filter, Formatter *f)
     if (!filter.match(*s, std::bind(&Server::waiting_for_reconnect, server, std::placeholders::_1))) {
       continue;
     }
-    
+
     f->open_object_section("session");
     f->dump_int("id", p->first.num());
-    
+
     f->dump_int("num_leases", s->leases.size());
     f->dump_int("num_caps", s->caps.size());
-    
+
     f->dump_string("state", s->get_state_name());
     f->dump_int("replay_requests", is_clientreplay() ? s->get_request_count() : 0);
     f->dump_unsigned("completed_requests", s->get_num_completed_requests());
@@ -2279,10 +2283,20 @@ void MDSRankDispatcher::update_log_config()
   map<string,string> log_to_syslog;
   map<string,string> log_channel;
   map<string,string> log_prio;
+  map<string,string> log_to_graylog;
+  map<string,string> log_to_graylog_host;
+  map<string,string> log_to_graylog_port;
+  uuid_d fsid;
+  string host;
+
   if (parse_log_client_options(g_ceph_context, log_to_monitors, log_to_syslog,
-			       log_channel, log_prio) == 0)
+			       log_channel, log_prio, log_to_graylog,
+			       log_to_graylog_host, log_to_graylog_port,
+			       fsid, host) == 0)
     clog->update_config(log_to_monitors, log_to_syslog,
-			log_channel, log_prio);
+			log_channel, log_prio, log_to_graylog,
+			log_to_graylog_host, log_to_graylog_port,
+			fsid, host);
   dout(10) << __func__ << " log_to_monitors " << log_to_monitors << dendl;
 }
 
@@ -2297,7 +2311,7 @@ void MDSRank::create_logger()
     mds_plb.add_time_avg(l_mds_reply_latency, "reply_latency",
         "Reply latency", "rlat");
     mds_plb.add_u64_counter(l_mds_forward, "forward", "Forwarding request");
-    
+
     mds_plb.add_u64_counter(l_mds_dir_fetch, "dir_fetch", "Directory fetch");
     mds_plb.add_u64_counter(l_mds_dir_commit, "dir_commit", "Directory commit");
     mds_plb.add_u64_counter(l_mds_dir_split, "dir_split", "Directory split");
@@ -2306,24 +2320,24 @@ void MDSRank::create_logger()
     mds_plb.add_u64(l_mds_inodes, "inodes", "Inodes", "inos");
     mds_plb.add_u64(l_mds_inodes_top, "inodes_top", "Inodes on top");
     mds_plb.add_u64(l_mds_inodes_bottom, "inodes_bottom", "Inodes on bottom");
-    mds_plb.add_u64(l_mds_inodes_pin_tail, "inodes_pin_tail", "Inodes on pin tail");  
+    mds_plb.add_u64(l_mds_inodes_pin_tail, "inodes_pin_tail", "Inodes on pin tail");
     mds_plb.add_u64(l_mds_inodes_pinned, "inodes_pinned", "Inodes pinned");
     mds_plb.add_u64(l_mds_inodes_expired, "inodes_expired", "Inodes expired");
     mds_plb.add_u64(l_mds_inodes_with_caps, "inodes_with_caps", "Inodes with capabilities");
     mds_plb.add_u64(l_mds_caps, "caps", "Capabilities", "caps");
     mds_plb.add_u64(l_mds_subtrees, "subtrees", "Subtrees");
-    
-    mds_plb.add_u64_counter(l_mds_traverse, "traverse", "Traverses"); 
+
+    mds_plb.add_u64_counter(l_mds_traverse, "traverse", "Traverses");
     mds_plb.add_u64_counter(l_mds_traverse_hit, "traverse_hit", "Traverse hits");
     mds_plb.add_u64_counter(l_mds_traverse_forward, "traverse_forward", "Traverse forwards");
     mds_plb.add_u64_counter(l_mds_traverse_discover, "traverse_discover", "Traverse directory discovers");
     mds_plb.add_u64_counter(l_mds_traverse_dir_fetch, "traverse_dir_fetch", "Traverse incomplete directory content fetchings");
     mds_plb.add_u64_counter(l_mds_traverse_remote_ino, "traverse_remote_ino", "Traverse remote dentries");
     mds_plb.add_u64_counter(l_mds_traverse_lock, "traverse_lock", "Traverse locks");
-    
+
     mds_plb.add_u64(l_mds_load_cent, "load_cent", "Load per cent");
     mds_plb.add_u64(l_mds_dispatch_queue_len, "q", "Dispatch queue length");
-    
+
     mds_plb.add_u64_counter(l_mds_exported, "exported", "Exports");
     mds_plb.add_u64_counter(l_mds_exported_inodes, "exported_inodes", "Exported inodes");
     mds_plb.add_u64_counter(l_mds_imported, "imported", "Imports");
@@ -2409,7 +2423,7 @@ void MDSRank::bcast_mds_map()
   sessionmap.get_client_session_set(clients);
   for (set<Session*>::const_iterator p = clients.begin();
        p != clients.end();
-       ++p) 
+       ++p)
     (*p)->connection->send_message(new MMDSMap(monc->get_fsid(), mdsmap));
   last_client_mdsmap_bcast = mdsmap->get_epoch();
 }
@@ -2560,4 +2574,3 @@ bool MDSRankDispatcher::handle_command(
     return false;
   }
 }
-
diff --git a/src/mds/MDSRank.h b/src/mds/MDSRank.h
index 5d334a2..a11f3b9 100644
--- a/src/mds/MDSRank.h
+++ b/src/mds/MDSRank.h
@@ -205,7 +205,7 @@ class MDSRank {
       MDSRank *mds;
       Cond cond;
       public:
-      ProgressThread(MDSRank *mds_) : mds(mds_) {}
+      explicit ProgressThread(MDSRank *mds_) : mds(mds_) {}
       void * entry(); 
       void shutdown();
       void signal() {cond.Signal();}
diff --git a/src/mds/MDSTable.cc b/src/mds/MDSTable.cc
index 5ff0dd0..083eae9 100644
--- a/src/mds/MDSTable.cc
+++ b/src/mds/MDSTable.cc
@@ -39,7 +39,7 @@ class MDSTableIOContext : public MDSIOContextBase
     MDSTable *ida;
     MDSRank *get_mds() {return ida->mds;}
   public:
-    MDSTableIOContext(MDSTable *ida_) : ida(ida_) {
+    explicit MDSTableIOContext(MDSTable *ida_) : ida(ida_) {
       assert(ida != NULL);
     }
 };
diff --git a/src/mds/Migrator.cc b/src/mds/Migrator.cc
index d807fb6..4dce94f 100644
--- a/src/mds/Migrator.cc
+++ b/src/mds/Migrator.cc
@@ -90,7 +90,7 @@ protected:
     return mig->mds;
   }
 public:
-  MigratorContext(Migrator *mig_) : mig(mig_) {
+  explicit MigratorContext(Migrator *mig_) : mig(mig_) {
     assert(mig != NULL);
   }
 };
diff --git a/src/mds/RecoveryQueue.h b/src/mds/RecoveryQueue.h
index c0e2f7a..6750c5f 100644
--- a/src/mds/RecoveryQueue.h
+++ b/src/mds/RecoveryQueue.h
@@ -30,7 +30,7 @@ public:
   void enqueue(CInode *in);
   void advance();
   void prioritize(CInode *in);   ///< do this inode now/soon
-  RecoveryQueue(MDSRank *mds_);
+  explicit RecoveryQueue(MDSRank *mds_);
 
   void set_logger(PerfCounters *p) {logger=p;}
 
diff --git a/src/mds/ScatterLock.h b/src/mds/ScatterLock.h
index c490b44..78bd474 100644
--- a/src/mds/ScatterLock.h
+++ b/src/mds/ScatterLock.h
@@ -26,7 +26,7 @@ class ScatterLock : public SimpleLock {
     xlist<ScatterLock*>::item item_updated;
     utime_t update_stamp;
 
-    more_bits_t(ScatterLock *lock) :
+    explicit more_bits_t(ScatterLock *lock) :
       state_flags(0),
       item_updated(lock)
     {}
diff --git a/src/mds/Server.cc b/src/mds/Server.cc
index 7816460..b9f5b0c 100644
--- a/src/mds/Server.cc
+++ b/src/mds/Server.cc
@@ -80,7 +80,7 @@ class ServerContext : public MDSInternalContextBase {
   }
 
   public:
-  ServerContext(Server *s) : server(s) {
+  explicit ServerContext(Server *s) : server(s) {
     assert(server != NULL);
   }
 };
@@ -558,7 +558,7 @@ class C_MDS_TerminatedSessions : public ServerContext {
     server->terminating_sessions = false;
   }
   public:
-  C_MDS_TerminatedSessions(Server *s) : ServerContext(s) {}
+  explicit C_MDS_TerminatedSessions(Server *s) : ServerContext(s) {}
 };
 
 void Server::terminate_sessions()
@@ -1450,10 +1450,11 @@ void Server::handle_osd_map()
   /* Note that we check the OSDMAP_FULL flag directly rather than
    * using osdmap_full_flag(), because we want to know "is the flag set"
    * rather than "does the flag apply to us?" */
-  const OSDMap *osdmap = mds->objecter->get_osdmap_read();
-  is_full = osdmap->test_flag(CEPH_OSDMAP_FULL);
-  dout(7) << __func__ << ": full = " << is_full << " epoch = " << osdmap->get_epoch() << dendl;
-  mds->objecter->put_osdmap_read();
+  mds->objecter->with_osdmap([this](const OSDMap& o) {
+      is_full = o.test_flag(CEPH_OSDMAP_FULL);
+      dout(7) << __func__ << ": full = " << is_full << " epoch = "
+	      << o.get_epoch() << dendl;
+    });
 }
 
 void Server::dispatch_client_request(MDRequestRef& mdr)
@@ -3965,7 +3966,7 @@ void Server::handle_client_setdirlayout(MDRequestRef& mdr)
 
 // XATTRS
 
-int Server::parse_layout_vxattr(string name, string value, const OSDMap *osdmap,
+int Server::parse_layout_vxattr(string name, string value, const OSDMap& osdmap,
 				ceph_file_layout *layout, bool validate)
 {
   dout(20) << "parse_layout_vxattr name " << name << " value '" << value << "'" << dendl;
@@ -4000,7 +4001,7 @@ int Server::parse_layout_vxattr(string name, string value, const OSDMap *osdmap,
       try {
 	layout->fl_pg_pool = boost::lexical_cast<unsigned>(value);
       } catch (boost::bad_lexical_cast const&) {
-	int64_t pool = osdmap->lookup_pg_pool_name(value);
+	int64_t pool = osdmap.lookup_pg_pool_name(value);
 	if (pool < 0) {
 	  dout(10) << " unknown pool " << value << dendl;
 	  return -ENOENT;
@@ -4086,158 +4087,158 @@ void Server::handle_set_vxattr(MDRequestRef& mdr, CInode *cur,
   string value (bl.c_str(), bl.length());
   dout(10) << "handle_set_vxattr " << name << " val " << value.length() << " bytes on " << *cur << dendl;
 
-  // layout or quota
-  if (name.find("ceph.file.layout") == 0 ||
-      name.find("ceph.dir.layout") == 0 ||
-      name.find("ceph.quota") == 0) {
-    inode_t *pi = NULL;
-    string rest;
-    if (name.find("ceph.dir.layout") == 0) {
-      if (!cur->is_dir()) {
-	respond_to_request(mdr, -EINVAL);
-	return;
-      }
-
-      ceph_file_layout layout;
-      if (cur->get_projected_inode()->has_layout())
-	layout = cur->get_projected_inode()->layout;
-      else if (dir_layout)
-	layout = *dir_layout;
-      else
-	layout = mdcache->default_file_layout;
-
-      rest = name.substr(name.find("layout"));
-      const OSDMap *osdmap = mds->objecter->get_osdmap_read();
-      int r = parse_layout_vxattr(rest, value, osdmap, &layout);
-      epoch_t epoch = osdmap->get_epoch();
-      mds->objecter->put_osdmap_read();
-      if (r < 0) {
-	if (r == -ENOENT) {
-	  epoch_t req_epoch = req->get_osdmap_epoch();
-	  if (req_epoch > epoch) {
-	    if (!mds->objecter->wait_for_map(req_epoch,
-		  new C_OnFinisher(new C_IO_Wrapper(mds, new C_MDS_RetryRequest(mdcache, mdr)), mds->finisher)))
-	    return;
-	  } else  if (req_epoch == 0 && !mdr->waited_for_osdmap) {
-	    // For compatibility with client w/ old code, we still need get the latest map. 
-	    // One day if COMPACT_VERSION of MClientRequest >=3, we can remove those code.
-	    mdr->waited_for_osdmap = true;
-	    mds->objecter->wait_for_latest_osdmap(
-		new C_OnFinisher(new C_IO_Wrapper(mds, new C_MDS_RetryRequest(mdcache, mdr)), mds->finisher));
-	    return;
-	  }
-	  r = -EINVAL;
-	}
-	respond_to_request(mdr, r);
-	return;
-      }
+  inode_t *pi = NULL;
+  string rest;
 
-      xlocks.insert(&cur->policylock);
-      if (!mds->locker->acquire_locks(mdr, rdlocks, wrlocks, xlocks))
-	return;
+  if (name.compare(0, 15, "ceph.dir.layout") == 0) {
+    if (!cur->is_dir()) {
+      respond_to_request(mdr, -EINVAL);
+      return;
+    }
 
-      if (cur->inode.layout.fl_pg_pool != layout.fl_pg_pool) {
-        if (!check_access(mdr, cur, MAY_SET_POOL)) {
+    ceph_file_layout layout;
+    if (cur->get_projected_inode()->has_layout())
+      layout = cur->get_projected_inode()->layout;
+    else if (dir_layout)
+      layout = *dir_layout;
+    else
+      layout = mdcache->default_file_layout;
+
+    rest = name.substr(name.find("layout"));
+    epoch_t epoch;
+    int r;
+    mds->objecter->with_osdmap([&](const OSDMap& osdmap) {
+	r = parse_layout_vxattr(rest, value, osdmap, &layout);
+	epoch = osdmap.get_epoch();
+      });
+    if (r < 0) {
+      if (r == -ENOENT) {
+        epoch_t req_epoch = req->get_osdmap_epoch();
+        if (req_epoch > epoch) {
+          if (!mds->objecter->wait_for_map(req_epoch,
+      	  new C_OnFinisher(new C_IO_Wrapper(mds, new C_MDS_RetryRequest(mdcache, mdr)), mds->finisher)))
+          return;
+        } else  if (req_epoch == 0 && !mdr->waited_for_osdmap) {
+          // For compatibility with client w/ old code, we still need get the latest map. 
+          // One day if COMPACT_VERSION of MClientRequest >=3, we can remove those code.
+          mdr->waited_for_osdmap = true;
+          mds->objecter->wait_for_latest_osdmap(
+      	new C_OnFinisher(new C_IO_Wrapper(mds, new C_MDS_RetryRequest(mdcache, mdr)), mds->finisher));
           return;
         }
+        r = -EINVAL;
       }
+      respond_to_request(mdr, r);
+      return;
+    }
 
-      pi = cur->project_inode();
-      pi->layout = layout;
-    } else if (name.find("ceph.file.layout") == 0) {
-      if (!cur->is_file()) {
-	respond_to_request(mdr, -EINVAL);
-	return;
-      }
-      if (cur->get_projected_inode()->size ||
-	  cur->get_projected_inode()->truncate_seq > 1) {
-	respond_to_request(mdr, -ENOTEMPTY);
-	return;
-      }
-      ceph_file_layout layout = cur->get_projected_inode()->layout;
-      rest = name.substr(name.find("layout"));
-      const OSDMap *osdmap = mds->objecter->get_osdmap_read();
-      int r = parse_layout_vxattr(rest, value, osdmap, &layout);
-      epoch_t epoch = osdmap->get_epoch();
-      mds->objecter->put_osdmap_read();
-      if (r < 0) {
-	if (r == -ENOENT) {
-	  epoch_t req_epoch = req->get_osdmap_epoch();
-	  if (req_epoch > epoch) {
-	    if (!mds->objecter->wait_for_map(req_epoch,
-		new C_OnFinisher(new C_IO_Wrapper(mds, new C_MDS_RetryRequest(mdcache, mdr)), mds->finisher)))
-	    return;
-	  } else if (req_epoch == 0 && !mdr->waited_for_osdmap) {
-	    // For compatibility with client w/ old code, we still need get the latest map. 
-	    // One day if COMPACT_VERSION of MClientRequest >=3, we can remove those code.
-	    mdr->waited_for_osdmap = true;
-	    mds->objecter->wait_for_latest_osdmap(
-	      new C_OnFinisher(new C_IO_Wrapper(mds, new C_MDS_RetryRequest(mdcache, mdr)), mds->finisher));
-	    return;
-	  }
-	  r = -EINVAL;
-	}
-	respond_to_request(mdr, r);
-	return;
-      }
+    xlocks.insert(&cur->policylock);
+    if (!mds->locker->acquire_locks(mdr, rdlocks, wrlocks, xlocks))
+      return;
 
-      xlocks.insert(&cur->filelock);
-      if (!mds->locker->acquire_locks(mdr, rdlocks, wrlocks, xlocks))
-	return;
+    if (cur->inode.layout.fl_pg_pool != layout.fl_pg_pool) {
+      if (!check_access(mdr, cur, MAY_SET_POOL)) {
+        return;
+      }
+    }
 
-      if (cur->inode.layout.fl_pg_pool != layout.fl_pg_pool) {
-        if (!check_access(mdr, cur, MAY_SET_POOL)) {
+    pi = cur->project_inode();
+    pi->layout = layout;
+  } else if (name.compare(0, 16, "ceph.file.layout") == 0) {
+    if (!cur->is_file()) {
+      respond_to_request(mdr, -EINVAL);
+      return;
+    }
+    if (cur->get_projected_inode()->size ||
+        cur->get_projected_inode()->truncate_seq > 1) {
+      respond_to_request(mdr, -ENOTEMPTY);
+      return;
+    }
+    ceph_file_layout layout = cur->get_projected_inode()->layout;
+    rest = name.substr(name.find("layout"));
+    int r;
+    epoch_t epoch;
+    mds->objecter->with_osdmap([&](const OSDMap& osdmap) {
+	r = parse_layout_vxattr(rest, value, osdmap, &layout);
+	epoch = osdmap.get_epoch();
+      });
+    if (r < 0) {
+      if (r == -ENOENT) {
+        epoch_t req_epoch = req->get_osdmap_epoch();
+        if (req_epoch > epoch) {
+          if (!mds->objecter->wait_for_map(req_epoch,
+      	new C_OnFinisher(new C_IO_Wrapper(mds, new C_MDS_RetryRequest(mdcache, mdr)), mds->finisher)))
+          return;
+        } else if (req_epoch == 0 && !mdr->waited_for_osdmap) {
+          // For compatibility with client w/ old code, we still need get the latest map. 
+          // One day if COMPACT_VERSION of MClientRequest >=3, we can remove those code.
+          mdr->waited_for_osdmap = true;
+          mds->objecter->wait_for_latest_osdmap(
+            new C_OnFinisher(new C_IO_Wrapper(mds, new C_MDS_RetryRequest(mdcache, mdr)), mds->finisher));
           return;
         }
+        r = -EINVAL;
       }
+      respond_to_request(mdr, r);
+      return;
+    }
 
-      pi = cur->project_inode();
-      int64_t old_pool = pi->layout.fl_pg_pool;
-      pi->add_old_pool(old_pool);
-      pi->layout = layout;
-      pi->ctime = mdr->get_op_stamp();
-    } else {
-      // expect this to be "ceph.quota"
-      if (!cur->is_dir() || cur->is_root()) {
-        respond_to_request(mdr, -EINVAL);
-        return;
-      }
-
-      quota_info_t quota = cur->get_projected_inode()->quota;
+    xlocks.insert(&cur->filelock);
+    if (!mds->locker->acquire_locks(mdr, rdlocks, wrlocks, xlocks))
+      return;
 
-      rest = name.substr(name.find("quota"));
-      int r = parse_quota_vxattr(rest, value, &quota);
-      if (r < 0) {
-        respond_to_request(mdr, r);
+    if (cur->inode.layout.fl_pg_pool != layout.fl_pg_pool) {
+      if (!check_access(mdr, cur, MAY_SET_POOL)) {
         return;
       }
+    }
 
-      xlocks.insert(&cur->policylock);
-      if (!mds->locker->acquire_locks(mdr, rdlocks, wrlocks, xlocks))
-        return;
-
-      pi = cur->project_inode();
-      pi->quota = quota;
+    pi = cur->project_inode();
+    int64_t old_pool = pi->layout.fl_pg_pool;
+    pi->add_old_pool(old_pool);
+    pi->layout = layout;
+    pi->ctime = mdr->get_op_stamp();
+  } else if (name.compare(0, 10, "ceph.quota") == 0) { 
+    if (!cur->is_dir() || cur->is_root()) {
+      respond_to_request(mdr, -EINVAL);
+      return;
     }
 
-    pi->version = cur->pre_dirty();
-    if (cur->is_file())
-      pi->update_backtrace();
+    quota_info_t quota = cur->get_projected_inode()->quota;
 
-    // log + wait
-    mdr->ls = mdlog->get_current_segment();
-    EUpdate *le = new EUpdate(mdlog, "set vxattr layout");
-    mdlog->start_entry(le);
-    le->metablob.add_client_req(req->get_reqid(), req->get_oldest_client_tid());
-    mdcache->predirty_journal_parents(mdr, &le->metablob, cur, 0, PREDIRTY_PRIMARY);
-    mdcache->journal_dirty_inode(mdr.get(), &le->metablob, cur);
+    rest = name.substr(name.find("quota"));
+    int r = parse_quota_vxattr(rest, value, &quota);
+    if (r < 0) {
+      respond_to_request(mdr, r);
+      return;
+    }
 
-    journal_and_reply(mdr, cur, 0, le, new C_MDS_inode_update_finish(mds, mdr, cur));
+    xlocks.insert(&cur->policylock);
+    if (!mds->locker->acquire_locks(mdr, rdlocks, wrlocks, xlocks))
+      return;
+
+    pi = cur->project_inode();
+    pi->quota = quota;
+  } else {
+    dout(10) << " unknown vxattr " << name << dendl;
+    respond_to_request(mdr, -EINVAL);
     return;
   }
 
-  dout(10) << " unknown vxattr " << name << dendl;
-  respond_to_request(mdr, -EINVAL);
+  pi->version = cur->pre_dirty();
+  if (cur->is_file())
+    pi->update_backtrace();
+
+  // log + wait
+  mdr->ls = mdlog->get_current_segment();
+  EUpdate *le = new EUpdate(mdlog, "set vxattr layout");
+  mdlog->start_entry(le);
+  le->metablob.add_client_req(req->get_reqid(), req->get_oldest_client_tid());
+  mdcache->predirty_journal_parents(mdr, &le->metablob, cur, 0, PREDIRTY_PRIMARY);
+  mdcache->journal_dirty_inode(mdr.get(), &le->metablob, cur);
+
+  journal_and_reply(mdr, cur, 0, le, new C_MDS_inode_update_finish(mds, mdr, cur));
+  return;
 }
 
 void Server::handle_remove_vxattr(MDRequestRef& mdr, CInode *cur,
diff --git a/src/mds/Server.h b/src/mds/Server.h
index 44ee5b7..40c71f7 100644
--- a/src/mds/Server.h
+++ b/src/mds/Server.h
@@ -63,7 +63,7 @@ private:
 public:
   bool terminating_sessions;
 
-  Server(MDSRank *m);
+  explicit Server(MDSRank *m);
   ~Server() {
     g_ceph_context->get_perfcounters_collection()->remove(logger);
     delete logger;
@@ -172,7 +172,7 @@ public:
   void handle_client_setlayout(MDRequestRef& mdr);
   void handle_client_setdirlayout(MDRequestRef& mdr);
 
-  int parse_layout_vxattr(string name, string value, const OSDMap *osdmap,
+  int parse_layout_vxattr(string name, string value, const OSDMap& osdmap,
 			  ceph_file_layout *layout, bool validate=true);
   int parse_quota_vxattr(string name, string value, quota_info_t *quota);
   void handle_set_vxattr(MDRequestRef& mdr, CInode *cur,
diff --git a/src/mds/SessionMap.cc b/src/mds/SessionMap.cc
index 4552e5e..9b4bc7f 100644
--- a/src/mds/SessionMap.cc
+++ b/src/mds/SessionMap.cc
@@ -35,7 +35,7 @@ class SessionMapIOContext : public MDSIOContextBase
     SessionMap *sessionmap;
     MDSRank *get_mds() {return sessionmap->mds;}
   public:
-    SessionMapIOContext(SessionMap *sessionmap_) : sessionmap(sessionmap_) {
+    explicit SessionMapIOContext(SessionMap *sessionmap_) : sessionmap(sessionmap_) {
       assert(sessionmap != NULL);
     }
 };
@@ -253,7 +253,7 @@ void SessionMap::load(MDSInternalContextBase *onload)
 class C_IO_SM_LoadLegacy : public SessionMapIOContext {
 public:
   bufferlist bl;
-  C_IO_SM_LoadLegacy(SessionMap *cm) : SessionMapIOContext(cm) {}
+  explicit C_IO_SM_LoadLegacy(SessionMap *cm) : SessionMapIOContext(cm) {}
   void finish(int r) {
     sessionmap->_load_legacy_finish(r, bl);
   }
diff --git a/src/mds/SessionMap.h b/src/mds/SessionMap.h
index b6a3792..75a7d07 100644
--- a/src/mds/SessionMap.h
+++ b/src/mds/SessionMap.h
@@ -428,7 +428,7 @@ public:
   uint64_t set_state(Session *session, int state);
   map<version_t, list<MDSInternalContextBase*> > commit_waiters;
 
-  SessionMap(MDSRank *m) : mds(m),
+  explicit SessionMap(MDSRank *m) : mds(m),
 		       projected(0), committing(0), committed(0),
                        loaded_legacy(false)
   { }
diff --git a/src/mds/SimpleLock.h b/src/mds/SimpleLock.h
index 085aec7..faec26a 100644
--- a/src/mds/SimpleLock.h
+++ b/src/mds/SimpleLock.h
@@ -58,7 +58,7 @@ struct LockType {
   int type;
   const sm_t *sm;
 
-  LockType(int t) : type(t) {
+  explicit LockType(int t) : type(t) {
     switch (type) {
     case CEPH_LOCK_DN:
     case CEPH_LOCK_IAUTH:
diff --git a/src/mds/SnapClient.h b/src/mds/SnapClient.h
index 883699f..800538c 100644
--- a/src/mds/SnapClient.h
+++ b/src/mds/SnapClient.h
@@ -24,7 +24,7 @@ class LogSegment;
 
 class SnapClient : public MDSTableClient {
 public:
-  SnapClient(MDSRank *m) : MDSTableClient(m, TABLE_SNAP) {}
+  explicit SnapClient(MDSRank *m) : MDSTableClient(m, TABLE_SNAP) {}
 
   void resend_queries() {}
   void handle_query_result(MMDSTableRequest *m) {}
diff --git a/src/mds/SnapServer.cc b/src/mds/SnapServer.cc
index 7006909..45dd9e4 100644
--- a/src/mds/SnapServer.cc
+++ b/src/mds/SnapServer.cc
@@ -41,21 +41,19 @@ void SnapServer::reset_state()
   // find any removed snapshot in data pools
   if (mds) {  // only if I'm running in a live MDS
     snapid_t first_free = 0;
-    const OSDMap *osdmap = mds->objecter->get_osdmap_read();
-    for (set<int64_t>::const_iterator p = mds->mdsmap->get_data_pools().begin();
-         p != mds->mdsmap->get_data_pools().end();
-         ++p) {
-      const pg_pool_t *pi = osdmap->get_pg_pool(*p);
-      if (!pi) {
-        // If pool isn't in OSDMap yet then can't have any snaps needing
-        // removal, skip.
-        continue;
-      }
-      if (!pi->removed_snaps.empty() &&
-          pi->removed_snaps.range_end() > first_free)
-        first_free = pi->removed_snaps.range_end();
-    }
-    mds->objecter->put_osdmap_read();
+    mds->objecter->with_osdmap([&](const OSDMap& o) {
+	for (const auto p : mds->mdsmap->get_data_pools()) {
+	  const pg_pool_t *pi = o.get_pg_pool(p);
+	  if (!pi) {
+	    // If pool isn't in OSDMap yet then can't have any snaps
+	    // needing removal, skip.
+	    continue;
+	  }
+	  if (!pi->removed_snaps.empty() &&
+	      pi->removed_snaps.range_end() > first_free)
+	    first_free = pi->removed_snaps.range_end();
+	}
+      });
     if (first_free > last_snap)
       last_snap = first_free;
   }
@@ -259,30 +257,28 @@ void SnapServer::check_osd_map(bool force)
   map<int, vector<snapid_t> > all_purge;
   map<int, vector<snapid_t> > all_purged;
 
-  const OSDMap *osdmap = mds->objecter->get_osdmap_read();
-  for (map<int, set<snapid_t> >::iterator p = need_to_purge.begin();
-       p != need_to_purge.end();
-       ++p) {
-    int id = p->first;
-    const pg_pool_t *pi = osdmap->get_pg_pool(id);
-    if (pi == NULL) {
-      // The pool is gone.  So are the snapshots.
-      all_purged[id] = std::vector<snapid_t>(p->second.begin(), p->second.end());
-      continue;
-    }
-
-    for (set<snapid_t>::iterator q = p->second.begin();
-	 q != p->second.end();
-	 ++q) {
-      if (pi->is_removed_snap(*q)) {
-	dout(10) << " osdmap marks " << *q << " as removed" << dendl;
-	all_purged[id].push_back(*q);
-      } else {
-	all_purge[id].push_back(*q);
+  mds->objecter->with_osdmap(
+    [this, &all_purged, &all_purge](const OSDMap& osdmap) {
+      for (const auto& p : need_to_purge) {
+	int id = p.first;
+	const pg_pool_t *pi = osdmap.get_pg_pool(id);
+	if (pi == NULL) {
+	  // The pool is gone.  So are the snapshots.
+	  all_purged[id] = std::vector<snapid_t>(p.second.begin(),
+						 p.second.end());
+	  continue;
+	}
+
+	for (const auto& q : p.second) {
+	  if (pi->is_removed_snap(q)) {
+	    dout(10) << " osdmap marks " << q << " as removed" << dendl;
+	    all_purged[id].push_back(q);
+	  } else {
+	    all_purge[id].push_back(q);
+	  }
+	}
       }
-    }
-  }
-  mds->objecter->put_osdmap_read();
+  });
 
   if (!all_purged.empty()) {
     // prepare to remove from need_to_purge list
diff --git a/src/mds/StrayManager.cc b/src/mds/StrayManager.cc
index bb9a3ee..6547a26 100644
--- a/src/mds/StrayManager.cc
+++ b/src/mds/StrayManager.cc
@@ -42,7 +42,7 @@ protected:
     return sm->mds;
   }
 public:
-  StrayManagerIOContext(StrayManager *sm_) : sm(sm_) {}
+  explicit StrayManagerIOContext(StrayManager *sm_) : sm(sm_) {}
 };
 
 
@@ -54,7 +54,7 @@ protected:
     return sm->mds;
   }
 public:
-  StrayManagerContext(StrayManager *sm_) : sm(sm_) {}
+  explicit StrayManagerContext(StrayManager *sm_) : sm(sm_) {}
 };
 
 
@@ -847,54 +847,31 @@ void StrayManager::_truncate_stray_logged(CDentry *dn, LogSegment *ls)
 }
 
 
-const char** StrayManager::get_tracked_conf_keys() const
-{
-  static const char* KEYS[] = {
-    "mds_max_purge_ops",
-    "mds_max_purge_ops_per_pg",
-    NULL
-  };
-  return KEYS;
-}
-
-void StrayManager::handle_conf_change(const struct md_config_t *conf,
-			  const std::set <std::string> &changed)
-{
-  if (changed.count("mds_max_purge_ops")
-      || changed.count("mds_max_purge_ops_per_pg")) {
-    update_op_limit();
-  }
-}
-
-
 void StrayManager::update_op_limit()
 {
-  const OSDMap *osdmap = mds->objecter->get_osdmap_read();
-  assert(osdmap != NULL);
-
-  // Number of PGs across all data pools
   uint64_t pg_count = 0;
-  const std::set<int64_t> &data_pools = mds->mdsmap->get_data_pools();
-  for (std::set<int64_t>::iterator i = data_pools.begin();
-       i != data_pools.end(); ++i) {
-    if (osdmap->get_pg_pool(*i) == NULL) {
-      // It is possible that we have an older OSDMap than MDSMap, because
-      // we don't start watching every OSDMap until after MDSRank is
-      // initialized
-      dout(4) << __func__ << " data pool " << *i
-              << " not found in OSDMap" << dendl;
-      continue;
-    }
-    pg_count += osdmap->get_pg_num(*i);
-  }
-
-  mds->objecter->put_osdmap_read();
+  mds->objecter->with_osdmap([&](const OSDMap& o) {
+      // Number of PGs across all data pools
+      const std::set<int64_t> &data_pools = mds->mdsmap->get_data_pools();
+      for (const auto dp : data_pools) {
+	if (o.get_pg_pool(dp) == NULL) {
+	  // It is possible that we have an older OSDMap than MDSMap,
+	  // because we don't start watching every OSDMap until after
+	  // MDSRank is initialized
+	  dout(4) << __func__ << " data pool " << dp
+		  << " not found in OSDMap" << dendl;
+	  continue;
+	}
+	pg_count += o.get_pg_num(dp);
+      }
+    });
 
   uint64_t mds_count = mds->mdsmap->get_max_mds();
 
   // Work out a limit based on n_pgs / n_mdss, multiplied by the user's
   // preference for how many ops per PG
-  max_purge_ops = uint64_t(((double)pg_count / (double)mds_count) * g_conf->mds_max_purge_ops_per_pg);
+  max_purge_ops = uint64_t(((double)pg_count / (double)mds_count) *
+			   g_conf->mds_max_purge_ops_per_pg);
 
   // User may also specify a hard limit, apply this if so.
   if (g_conf->mds_max_purge_ops) {
diff --git a/src/mds/StrayManager.h b/src/mds/StrayManager.h
index 1db88b1..3576889 100644
--- a/src/mds/StrayManager.h
+++ b/src/mds/StrayManager.h
@@ -23,7 +23,7 @@ class PerfCounters;
 class CInode;
 class CDentry;
 
-class StrayManager : public md_config_obs_t
+class StrayManager
 {
   protected:
   class QueuedStray {
@@ -150,7 +150,7 @@ class StrayManager : public md_config_obs_t
 
   // My public interface is for consumption by MDCache
   public:
-  StrayManager(MDSRank *mds);
+  explicit StrayManager(MDSRank *mds);
   void set_logger(PerfCounters *l) {logger = l;}
 
   bool eval_stray(CDentry *dn, bool delay=false);
@@ -236,17 +236,6 @@ class StrayManager : public md_config_obs_t
    * Call this whenever one of those operands changes.
    */
   void update_op_limit();
-
-  /**
-   * Subscribe to changes on mds_max_purge_ops
-   */
-  virtual const char** get_tracked_conf_keys() const;
-
-  /**
-   * Call update_op_limit if mds_max_purge_ops changes
-   */
-  virtual void handle_conf_change(const struct md_config_t *conf,
-			  const std::set <std::string> &changed);
 };
 
 #endif  // STRAY_MANAGER_H
diff --git a/src/mds/events/ECommitted.h b/src/mds/events/ECommitted.h
index 48de756..4cdbbfb 100644
--- a/src/mds/events/ECommitted.h
+++ b/src/mds/events/ECommitted.h
@@ -23,7 +23,7 @@ public:
   metareqid_t reqid;
 
   ECommitted() : LogEvent(EVENT_COMMITTED) { }
-  ECommitted(metareqid_t r) : 
+  explicit ECommitted(metareqid_t r) :
     LogEvent(EVENT_COMMITTED), reqid(r) { }
 
   void print(ostream& out) const {
diff --git a/src/mds/events/EMetaBlob.h b/src/mds/events/EMetaBlob.h
index 7d219e9..8db8f67 100644
--- a/src/mds/events/EMetaBlob.h
+++ b/src/mds/events/EMetaBlob.h
@@ -94,7 +94,7 @@ public:
 	old_inodes = *oi;
       snapbl = sbl;
     }
-    fullbit(bufferlist::iterator &p) {
+    explicit fullbit(bufferlist::iterator &p) {
       decode(p);
     }
     fullbit() {}
@@ -144,7 +144,7 @@ public:
 
     remotebit(const string& d, snapid_t df, snapid_t dl, version_t v, inodeno_t i, unsigned char dt, bool dr) : 
       dn(d), dnfirst(df), dnlast(dl), dnv(v), ino(i), d_type(dt), dirty(dr) { }
-    remotebit(bufferlist::iterator &p) { decode(p); }
+    explicit remotebit(bufferlist::iterator &p) { decode(p); }
     remotebit(): dnfirst(0), dnlast(0), dnv(0), ino(0),
 	d_type('\0'), dirty(false) {}
 
@@ -171,7 +171,7 @@ public:
 
     nullbit(const string& d, snapid_t df, snapid_t dl, version_t v, bool dr) : 
       dn(d), dnfirst(df), dnlast(dl), dnv(v), dirty(dr) { }
-    nullbit(bufferlist::iterator &p) { decode(p); }
+    explicit nullbit(bufferlist::iterator &p) { decode(p); }
     nullbit(): dnfirst(0), dnlast(0), dnv(0), dirty(false) {}
 
     void encode(bufferlist& bl) const;
@@ -333,7 +333,7 @@ private:
   // for replay, in certain cases
   //LogSegment *_segment;
 
-  EMetaBlob(MDLog *mdl = 0);  // defined in journal.cc
+  explicit EMetaBlob(MDLog *mdl = 0);  // defined in journal.cc
   ~EMetaBlob() { }
 
   void print(ostream& out) {
diff --git a/src/mds/events/ENoOp.h b/src/mds/events/ENoOp.h
index 9a585c2..ed825c6 100644
--- a/src/mds/events/ENoOp.h
+++ b/src/mds/events/ENoOp.h
@@ -22,7 +22,7 @@ class ENoOp : public LogEvent {
 
 public:
   ENoOp() : LogEvent(EVENT_NOOP), pad_size(0) { }
-  ENoOp(uint32_t size_) : LogEvent(EVENT_NOOP), pad_size(size_){ }
+  explicit ENoOp(uint32_t size_) : LogEvent(EVENT_NOOP), pad_size(size_){ }
 
   void encode(bufferlist& bl) const;
   void decode(bufferlist::iterator& bl);
diff --git a/src/mds/events/EOpen.h b/src/mds/events/EOpen.h
index c22a585..0c1175b 100644
--- a/src/mds/events/EOpen.h
+++ b/src/mds/events/EOpen.h
@@ -24,7 +24,7 @@ public:
   vector<inodeno_t> inos;
 
   EOpen() : LogEvent(EVENT_OPEN) { }
-  EOpen(MDLog *mdlog) : 
+  explicit EOpen(MDLog *mdlog) :
     LogEvent(EVENT_OPEN), metablob(mdlog) { }
 
   void print(ostream& out) const {
diff --git a/src/mds/flock.h b/src/mds/flock.h
index 37149eb..55ba2b8 100644
--- a/src/mds/flock.h
+++ b/src/mds/flock.h
@@ -39,7 +39,7 @@ inline bool operator==(ceph_filelock& l, ceph_filelock& r) {
 class ceph_lock_state_t {
   CephContext *cct;
 public:
-  ceph_lock_state_t(CephContext *cct_) : cct(cct_) {}
+  explicit ceph_lock_state_t(CephContext *cct_) : cct(cct_) {}
   multimap<uint64_t, ceph_filelock> held_locks;    // current locks
   multimap<uint64_t, ceph_filelock> waiting_locks; // locks waiting for other locks
   // both of the above are keyed by starting offset
diff --git a/src/mds/mdstypes.h b/src/mds/mdstypes.h
index 669e8b9..4e1b926 100644
--- a/src/mds/mdstypes.h
+++ b/src/mds/mdstypes.h
@@ -980,7 +980,7 @@ class inode_load_vec_t {
   static const int NUM = 2;
   std::vector < DecayCounter > vec;
 public:
-  inode_load_vec_t(const utime_t &now)
+  explicit inode_load_vec_t(const utime_t &now)
      : vec(NUM, DecayCounter(now))
   {}
   // for dencoder infrastructure
@@ -1011,7 +1011,7 @@ class dirfrag_load_vec_t {
 public:
   static const int NUM = 5;
   std::vector < DecayCounter > vec;
-  dirfrag_load_vec_t(const utime_t &now)
+  explicit dirfrag_load_vec_t(const utime_t &now)
      : vec(NUM, DecayCounter(now))
   { }
   // for dencoder infrastructure
@@ -1115,7 +1115,7 @@ struct mds_load_t {
 
   double cpu_load_avg;
 
-  mds_load_t(const utime_t &t) : 
+  explicit mds_load_t(const utime_t &t) : 
     auth(t), all(t), req_rate(0), cache_hit_rate(0),
     queue_len(0), cpu_load_avg(0)
   {}
@@ -1227,7 +1227,7 @@ struct ClientLease {
 // print hack
 struct mdsco_db_line_prefix {
   MDSCacheObject *object;
-  mdsco_db_line_prefix(MDSCacheObject *o) : object(o) {}
+  explicit mdsco_db_line_prefix(MDSCacheObject *o) : object(o) {}
 };
 std::ostream& operator<<(std::ostream& out, mdsco_db_line_prefix o);
 
diff --git a/src/messages/MMonMap.h b/src/messages/MMonMap.h
index 3e950fa..808a757 100644
--- a/src/messages/MMonMap.h
+++ b/src/messages/MMonMap.h
@@ -24,7 +24,7 @@ public:
   bufferlist monmapbl;
 
   MMonMap() : Message(CEPH_MSG_MON_MAP) { }
-  MMonMap(bufferlist &bl) : Message(CEPH_MSG_MON_MAP) { 
+  explicit MMonMap(bufferlist &bl) : Message(CEPH_MSG_MON_MAP) { 
     monmapbl.claim(bl);
   }
 private:
diff --git a/src/messages/MOSDOpReply.h b/src/messages/MOSDOpReply.h
index eee12b5..1ea3b92 100644
--- a/src/messages/MOSDOpReply.h
+++ b/src/messages/MOSDOpReply.h
@@ -32,7 +32,7 @@
 
 class MOSDOpReply : public Message {
 
-  static const int HEAD_VERSION = 6;
+  static const int HEAD_VERSION = 7;
   static const int COMPAT_VERSION = 2;
 
   object_t oid;
@@ -45,6 +45,7 @@ class MOSDOpReply : public Message {
   version_t user_version;
   epoch_t osdmap_epoch;
   int32_t retry_attempt;
+  bool do_redirect;
   request_redirect_t redirect;
 
 public:
@@ -91,7 +92,7 @@ public:
 
   void set_redirect(const request_redirect_t& redir) { redirect = redir; }
   const request_redirect_t& get_redirect() const { return redirect; }
-  bool is_redirect_reply() const { return !redirect.empty(); }
+  bool is_redirect_reply() const { return do_redirect; }
 
   void add_flags(int f) { flags |= f; }
 
@@ -124,7 +125,9 @@ public:
 
 public:
   MOSDOpReply()
-    : Message(CEPH_MSG_OSD_OPREPLY, HEAD_VERSION, COMPAT_VERSION) { }
+    : Message(CEPH_MSG_OSD_OPREPLY, HEAD_VERSION, COMPAT_VERSION) {
+    do_redirect = false;
+  }
   MOSDOpReply(MOSDOp *req, int r, epoch_t e, int acktype, bool ignore_out_data)
     : Message(CEPH_MSG_OSD_OPREPLY, HEAD_VERSION, COMPAT_VERSION),
       oid(req->oid), pgid(req->pgid), ops(req->ops) {
@@ -136,6 +139,7 @@ public:
     osdmap_epoch = e;
     user_version = 0;
     retry_attempt = req->get_retry_attempt();
+    do_redirect = false;
 
     // zero out ops payload_len and possibly out data
     for (unsigned i = 0; i < ops.size(); i++) {
@@ -189,7 +193,16 @@ public:
 
       ::encode(replay_version, payload);
       ::encode(user_version, payload);
-      ::encode(redirect, payload);
+      if ((features & CEPH_FEATURE_NEW_OSDOPREPLY_ENCODING) == 0) {
+        header.version = 6;
+        ::encode(redirect, payload);
+      } else {
+        do_redirect = !redirect.empty();
+        ::encode(do_redirect, payload);
+        if (do_redirect) {
+          ::encode(redirect, payload);
+        }
+      }
     }
   }
   virtual void decode_payload() {
@@ -197,27 +210,29 @@ public:
 
     // Always keep here the newest version of decoding order/rule
     if (header.version == HEAD_VERSION) {
-	::decode(oid, p);
-	::decode(pgid, p);
-	::decode(flags, p);
-	::decode(result, p);
-	::decode(bad_replay_version, p);
-	::decode(osdmap_epoch, p);
-
-	__u32 num_ops = ops.size();
-	::decode(num_ops, p);
-	ops.resize(num_ops);
-	for (unsigned i = 0; i < num_ops; i++)
+      ::decode(oid, p);
+      ::decode(pgid, p);
+      ::decode(flags, p);
+      ::decode(result, p);
+      ::decode(bad_replay_version, p);
+      ::decode(osdmap_epoch, p);
+
+      __u32 num_ops = ops.size();
+      ::decode(num_ops, p);
+      ops.resize(num_ops);
+      for (unsigned i = 0; i < num_ops; i++)
 	::decode(ops[i].op, p);
-	::decode(retry_attempt, p);
+      ::decode(retry_attempt, p);
 
-	for (unsigned i = 0; i < num_ops; ++i)
+      for (unsigned i = 0; i < num_ops; ++i)
 	::decode(ops[i].rval, p);
 
-	OSDOp::split_osd_op_vector_out_data(ops, data);
+      OSDOp::split_osd_op_vector_out_data(ops, data);
 
-	::decode(replay_version, p);
-	::decode(user_version, p);
+      ::decode(replay_version, p);
+      ::decode(user_version, p);
+      ::decode(do_redirect, p);
+      if (do_redirect)
 	::decode(redirect, p);
     } else if (header.version < 2) {
       ceph_osd_reply_head head;
@@ -268,8 +283,16 @@ public:
 	user_version = replay_version.version;
       }
 
-      if (header.version >= 6)
+      if (header.version == 6) {
 	::decode(redirect, p);
+        do_redirect = !redirect.empty();
+      }
+      if (header.version >= 7) {
+        ::decode(do_redirect, p);
+        if (do_redirect) {
+	  ::decode(redirect, p);
+        }
+      }
     }
   }
 
diff --git a/src/messages/MOSDSubOp.h b/src/messages/MOSDSubOp.h
index 38c303c..373ad5a 100644
--- a/src/messages/MOSDSubOp.h
+++ b/src/messages/MOSDSubOp.h
@@ -19,13 +19,15 @@
 #include "msg/Message.h"
 #include "osd/osd_types.h"
 
+#include "include/ceph_features.h"
+
 /*
  * OSD sub op - for internal ops on pobjects between primary and replicas(/stripes/whatever)
  */
 
 class MOSDSubOp : public Message {
 
-  static const int HEAD_VERSION = 11;
+  static const int HEAD_VERSION = 12;
   static const int COMPAT_VERSION = 7;
 
 public:
@@ -51,7 +53,6 @@ public:
   eversion_t old_version;
 
   SnapSet snapset;
-  SnapContext snapc;
 
   // transaction to exec
   bufferlist logbl;
@@ -128,7 +129,12 @@ public:
     ::decode(old_size, p);
     ::decode(old_version, p);
     ::decode(snapset, p);
-    ::decode(snapc, p);
+
+    if (header.version <= 11) {
+      SnapContext snapc_dont_need;
+      ::decode(snapc_dont_need, p);
+    }
+
     ::decode(logbl, p);
     ::decode(pg_stats, p);
     ::decode(pg_trim_to, p);
@@ -196,7 +202,13 @@ public:
     ::encode(old_size, payload);
     ::encode(old_version, payload);
     ::encode(snapset, payload);
-    ::encode(snapc, payload);
+
+    if ((features & CEPH_FEATURE_OSDSUBOP_NO_SNAPCONTEXT) == 0) {
+      header.version = 11;
+      SnapContext dummy_snapc;
+      ::encode(dummy_snapc, payload);
+    }
+
     ::encode(logbl, payload);
     ::encode(pg_stats, payload);
     ::encode(pg_trim_to, payload);
@@ -258,7 +270,7 @@ public:
     if (complete)
       out << " complete";
     out << " v " << version
-	<< " snapset=" << snapset << " snapc=" << snapc;    
+	<< " snapset=" << snapset;
     if (!data_subset.empty()) out << " subset " << data_subset;
     if (updated_hit_set_history)
       out << ", has_updated_hit_set_history";
diff --git a/src/mon/ConfigKeyService.cc b/src/mon/ConfigKeyService.cc
index 84e95ef..c4cab9e 100644
--- a/src/mon/ConfigKeyService.cc
+++ b/src/mon/ConfigKeyService.cc
@@ -42,6 +42,11 @@ int ConfigKeyService::store_get(string key, bufferlist &bl)
   return mon->store->get(STORE_PREFIX, key, bl);
 }
 
+void ConfigKeyService::get_store_prefixes(set<string>& s)
+{
+  s.insert(STORE_PREFIX);
+}
+
 void ConfigKeyService::store_put(string key, bufferlist &bl, Context *cb)
 {
   bufferlist proposal_bl;
@@ -110,7 +115,6 @@ bool ConfigKeyService::service_dispatch(MonOpRequestRef op)
   map<string, cmd_vartype> cmdmap;
 
   if (!cmdmap_from_json(cmd->cmd, &cmdmap, ss)) {
-    ret = -EINVAL;
     return false;
   }
 
diff --git a/src/mon/ConfigKeyService.h b/src/mon/ConfigKeyService.h
index 0ceface..fe1d54d 100644
--- a/src/mon/ConfigKeyService.h
+++ b/src/mon/ConfigKeyService.h
@@ -69,7 +69,7 @@ public:
   virtual string get_name() const {
     return "config_key";
   }
-
+  virtual void get_store_prefixes(set<string>& s);
   /**
    * @} // ConfigKeyService_Inherited_h
    */
diff --git a/src/mon/Elector.cc b/src/mon/Elector.cc
index 3bec0ef..9e011e3 100644
--- a/src/mon/Elector.cc
+++ b/src/mon/Elector.cc
@@ -130,7 +130,7 @@ void Elector::reset_timer(double plus)
   // set the timer
   cancel_timer();
   expire_event = new C_ElectionExpire(this);
-  mon->timer.add_event_after(g_conf->mon_lease + plus,
+  mon->timer.add_event_after(g_conf->mon_election_timeout + plus,
 			     expire_event);
 }
 
diff --git a/src/mon/Elector.h b/src/mon/Elector.h
index ab84d0b..03a9d17 100644
--- a/src/mon/Elector.h
+++ b/src/mon/Elector.h
@@ -169,7 +169,7 @@ class Elector {
   class C_ElectionExpire : public Context {
     Elector *elector;
   public:
-    C_ElectionExpire(Elector *e) : elector(e) { }
+    explicit C_ElectionExpire(Elector *e) : elector(e) { }
     void finish(int r) {
       elector->expire();
     }
@@ -348,7 +348,7 @@ class Elector {
    *
    * @param m A Monitor instance
    */
-  Elector(Monitor *m) : mon(m),
+  explicit Elector(Monitor *m) : mon(m),
 			expire_event(0),
 			epoch(0),
 			participating(true),
diff --git a/src/mon/LogMonitor.cc b/src/mon/LogMonitor.cc
index eb9f32f..5aa66e2 100644
--- a/src/mon/LogMonitor.cc
+++ b/src/mon/LogMonitor.cc
@@ -152,6 +152,15 @@ void LogMonitor::update_from_paxos(bool *need_bootstrap)
                          channels.get_facility(channel));
       }
 
+      if (channels.do_log_to_graylog(channel)) {
+	ceph::log::Graylog::Ref graylog = channels.get_graylog(channel);
+	if (graylog) {
+	  graylog->log_log_entry(&le);
+	}
+	dout(7) << "graylog: " << channel << " " << graylog
+		<< " host:" << channels.log_to_graylog_host << dendl;
+      }
+
       string log_file = channels.get_log_file(channel);
       dout(20) << __func__ << " logging for channel '" << channel
                << "' to file '" << log_file << "'" << dendl;
@@ -650,6 +659,33 @@ void LogMonitor::update_log_channels()
     return;
   }
 
+  r = get_conf_str_map_helper(g_conf->mon_cluster_log_to_graylog, oss,
+                              &channels.log_to_graylog,
+                              CLOG_CONFIG_DEFAULT_KEY);
+  if (r < 0) {
+    derr << __func__ << " error parsing 'mon_cluster_log_to_graylog'"
+         << dendl;
+    return;
+  }
+
+  r = get_conf_str_map_helper(g_conf->mon_cluster_log_to_graylog_host, oss,
+                              &channels.log_to_graylog_host,
+                              CLOG_CONFIG_DEFAULT_KEY);
+  if (r < 0) {
+    derr << __func__ << " error parsing 'mon_cluster_log_to_graylog_host'"
+         << dendl;
+    return;
+  }
+
+  r = get_conf_str_map_helper(g_conf->mon_cluster_log_to_graylog_port, oss,
+                              &channels.log_to_graylog_port,
+                              CLOG_CONFIG_DEFAULT_KEY);
+  if (r < 0) {
+    derr << __func__ << " error parsing 'mon_cluster_log_to_graylog_port'"
+         << dendl;
+    return;
+  }
+
   channels.expand_channel_meta();
 }
 
@@ -714,7 +750,12 @@ void LogMonitor::handle_conf_change(const struct md_config_t *conf,
       changed.count("mon_cluster_log_to_syslog_level") ||
       changed.count("mon_cluster_log_to_syslog_facility") ||
       changed.count("mon_cluster_log_file") ||
-      changed.count("mon_cluster_log_file_level")) {
+      changed.count("mon_cluster_log_file_level") ||
+      changed.count("mon_cluster_log_to_graylog") ||
+      changed.count("mon_cluster_log_to_graylog_host") ||
+      changed.count("mon_cluster_log_to_graylog_port") ||
+      changed.count("fsid") ||
+      changed.count("host")) {
     update_log_channels();
   }
 }
diff --git a/src/mon/LogMonitor.h b/src/mon/LogMonitor.h
index 4d31b66..d2e3a4a 100644
--- a/src/mon/LogMonitor.h
+++ b/src/mon/LogMonitor.h
@@ -25,6 +25,7 @@ using namespace std;
 
 #include "common/LogEntry.h"
 #include "messages/MLog.h"
+#include "common/Graylog.h"
 
 class MMonCommand;
 
@@ -44,6 +45,13 @@ private:
     map<string,string> log_file;
     map<string,string> expanded_log_file;
     map<string,string> log_file_level;
+    map<string,string> log_to_graylog;
+    map<string,string> log_to_graylog_host;
+    map<string,string> log_to_graylog_port;
+
+    map<string, ceph::log::Graylog::Ref> graylogs;
+    uuid_d fsid;
+    string host;
 
     void clear() {
       log_to_syslog.clear();
@@ -52,6 +60,10 @@ private:
       log_file.clear();
       expanded_log_file.clear();
       log_file_level.clear();
+      log_to_graylog.clear();
+      log_to_graylog_host.clear();
+      log_to_graylog_port.clear();
+      graylogs.clear();
     }
 
     /** expands $channel meta variable on all maps *EXCEPT* log_file
@@ -102,6 +114,35 @@ private:
       return get_str_map_key(log_file_level, channel,
                              &CLOG_CONFIG_DEFAULT_KEY);
     }
+
+    bool do_log_to_graylog(const string &channel) {
+      return (get_str_map_key(log_to_graylog, channel,
+			      &CLOG_CONFIG_DEFAULT_KEY) == "true");
+    }
+
+    ceph::log::Graylog::Ref get_graylog(const string &channel) {
+      generic_dout(25) << __func__ << " for channel '"
+                       << channel << "'" << dendl;
+
+      if (graylogs.count(channel) == 0) {
+	ceph::log::Graylog::Ref graylog = ceph::log::Graylog::Ref(new ceph::log::Graylog("mon"));
+
+	graylog->set_fsid(g_conf->fsid);
+	graylog->set_hostname(g_conf->host);
+	graylog->set_destination(get_str_map_key(log_to_graylog_host, channel,
+						 &CLOG_CONFIG_DEFAULT_KEY),
+				 atoi(get_str_map_key(log_to_graylog_port, channel,
+						      &CLOG_CONFIG_DEFAULT_KEY).c_str()));
+
+	graylogs[channel] = graylog;
+        generic_dout(20) << __func__ << " for channel '"
+                         << channel << "' to graylog host '"
+			 << log_to_graylog_host[channel] << ":"
+			 << log_to_graylog_port[channel]
+			 << "'" << dendl;
+      }
+      return graylogs[channel];
+    }
   } channels;
 
   void update_log_channels();
@@ -182,6 +223,9 @@ private:
       "mon_cluster_log_to_syslog_facility",
       "mon_cluster_log_file",
       "mon_cluster_log_file_level",
+      "mon_cluster_log_to_graylog",
+      "mon_cluster_log_to_graylog_host",
+      "mon_cluster_log_to_graylog_port",
       NULL
     };
     return KEYS;
diff --git a/src/mon/MDSMonitor.cc b/src/mon/MDSMonitor.cc
index a62cdb9..79d1500 100644
--- a/src/mon/MDSMonitor.cc
+++ b/src/mon/MDSMonitor.cc
@@ -367,6 +367,7 @@ bool MDSMonitor::preprocess_beacon(MonOpRequestRef op)
   // do not match, to update our stored
   if (!(pending_daemon_health[gid] == m->get_health())) {
     dout(20) << __func__ << " health metrics for gid " << gid << " were updated" << dendl;
+    _note_beacon(m);
     return false;
   }
 
@@ -682,7 +683,8 @@ void MDSMonitor::on_active()
 }
 
 void MDSMonitor::get_health(list<pair<health_status_t, string> >& summary,
-			    list<pair<health_status_t, string> > *detail) const
+			    list<pair<health_status_t, string> > *detail,
+			    CephContext* cct) const
 {
   mdsmap.get_health(summary, detail);
 
@@ -1729,7 +1731,6 @@ int MDSMonitor::filesystem_command(
       string err;
       poolid = strict_strtol(poolname.c_str(), 10, &err);
       if (err.length()) {
-	poolid = -1;
 	ss << "pool '" << poolname << "' does not exist";
 	return -ENOENT;
       }
diff --git a/src/mon/MDSMonitor.h b/src/mon/MDSMonitor.h
index 03a2276..b755ba9 100644
--- a/src/mon/MDSMonitor.h
+++ b/src/mon/MDSMonitor.h
@@ -97,7 +97,8 @@ class MDSMonitor : public PaxosService {
   bool prepare_offload_targets(MonOpRequestRef op);
 
   void get_health(list<pair<health_status_t,string> >& summary,
-		  list<pair<health_status_t,string> > *detail) const;
+		  list<pair<health_status_t,string> > *detail,
+		  CephContext *cct) const override;
   int fail_mds(std::ostream &ss, const std::string &arg);
   void fail_mds_gid(mds_gid_t gid);
 
diff --git a/src/mon/MonCap.h b/src/mon/MonCap.h
index 7dba641..2501f44 100644
--- a/src/mon/MonCap.h
+++ b/src/mon/MonCap.h
@@ -21,6 +21,7 @@ static const __u8 MON_CAP_ANY   = 0xff;          // *
 struct mon_rwxa_t {
   __u8 val;
 
+  // cppcheck-suppress noExplicitConstructor
   mon_rwxa_t(__u8 v = 0) : val(v) {}
   mon_rwxa_t& operator=(__u8 v) {
     val = v;
@@ -79,8 +80,10 @@ struct MonCapGrant {
   void expand_profile(EntityName name) const;
 
   MonCapGrant() : allow(0) {}
+  // cppcheck-suppress noExplicitConstructor
   MonCapGrant(mon_rwxa_t a) : allow(a) {}
   MonCapGrant(string s, mon_rwxa_t a) : service(s), allow(a) {}
+  // cppcheck-suppress noExplicitConstructor  
   MonCapGrant(string c) : command(c) {}
   MonCapGrant(string c, string a, StringConstraint co) : command(c) {
     command_args[a] = co;
@@ -118,7 +121,7 @@ struct MonCap {
   std::vector<MonCapGrant> grants;
 
   MonCap() {}
-  MonCap(std::vector<MonCapGrant> g) : grants(g) {}
+  explicit MonCap(std::vector<MonCapGrant> g) : grants(g) {}
 
   string get_str() const {
     return text;
diff --git a/src/mon/MonClient.cc b/src/mon/MonClient.cc
index 7183551..dc1ec6c 100644
--- a/src/mon/MonClient.cc
+++ b/src/mon/MonClient.cc
@@ -117,6 +117,9 @@ int MonClient::get_monmap_privately()
   Messenger* smessenger = NULL;
   if (!messenger) {
     messenger = smessenger = Messenger::create_client_messenger(cct, "temp_mon_client");
+    if (NULL == messenger) {
+        return -1;
+    }
     messenger->add_dispatcher_head(this);
     smessenger->start();
     temp_msgr = true;
diff --git a/src/mon/MonClient.h b/src/mon/MonClient.h
index c3efd78..d98a15b 100644
--- a/src/mon/MonClient.h
+++ b/src/mon/MonClient.h
@@ -155,7 +155,7 @@ private:
 
   struct C_Tick : public Context {
     MonClient *monc;
-    C_Tick(MonClient *m) : monc(m) {}
+    explicit C_Tick(MonClient *m) : monc(m) {}
     void finish(int r) {
       monc->tick();
     }
@@ -300,7 +300,7 @@ public:
   RotatingKeyRing *rotating_secrets;
 
  public:
-  MonClient(CephContext *cct_);
+  explicit MonClient(CephContext *cct_);
   ~MonClient();
 
   int init();
@@ -406,7 +406,7 @@ private:
     int *prval;
     Context *onfinish, *ontimeout;
 
-    MonCommand(uint64_t t)
+    explicit MonCommand(uint64_t t)
       : target_rank(-1),
 	tid(t),
 	poutbl(NULL), prs(NULL), prval(NULL), onfinish(NULL), ontimeout(NULL)
diff --git a/src/mon/MonMap.cc b/src/mon/MonMap.cc
index d511b40..8341cac 100644
--- a/src/mon/MonMap.cc
+++ b/src/mon/MonMap.cc
@@ -274,6 +274,8 @@ int MonMap::build_initial(CephContext *cct, ostream& errout)
              << std::endl;
       return r;
     }
+    created = ceph_clock_now(cct);
+    last_changed = created;
     return 0;
   }
 
@@ -333,5 +335,7 @@ int MonMap::build_initial(CephContext *cct, ostream& errout)
     errout << "no monitors specified to connect to." << std::endl;
     return -ENOENT;
   }
+  created = ceph_clock_now(cct);
+  last_changed = created;
   return 0;
 }
diff --git a/src/mon/Monitor.cc b/src/mon/Monitor.cc
index e54e55e..0af79c7 100644
--- a/src/mon/Monitor.cc
+++ b/src/mon/Monitor.cc
@@ -196,8 +196,6 @@ Monitor::Monitor(CephContext* cct_, string nm, MonitorDBStore *s,
   routed_request_tid(0),
   op_tracker(cct, true, 1)
 {
-  rank = -1;
-
   clog = log_client.create_channel(CLOG_CHANNEL_CLUSTER);
   audit_clog = log_client.create_channel(CLOG_CHANNEL_AUDIT);
 
@@ -268,7 +266,7 @@ Monitor::~Monitor()
 class AdminHook : public AdminSocketHook {
   Monitor *mon;
 public:
-  AdminHook(Monitor *m) : mon(m) {}
+  explicit AdminHook(Monitor *m) : mon(m) {}
   bool call(std::string command, cmdmap_t& cmdmap, std::string format,
 	    bufferlist& out) {
     stringstream ss;
@@ -321,7 +319,7 @@ void Monitor::do_admin_command(string command, cmdmap_t& cmdmap, string format,
       goto abort;
     }
     sync_force(f.get(), ss);
-  } else if (command.find("add_bootstrap_peer_hint") == 0) {
+  } else if (command.compare(0, 23, "add_bootstrap_peer_hint") == 0) {
     if (!_add_bootstrap_peer_hint(command, cmdmap, ss))
       goto abort;
   } else if (command == "quorum enter") {
@@ -451,6 +449,7 @@ const char** Monitor::get_tracked_conf_keys() const
 {
   static const char* KEYS[] = {
     "crushtool", // helpful for testing
+    "mon_election_timeout",
     "mon_lease",
     "mon_lease_renew_interval_factor",
     "mon_lease_ack_timeout_factor",
@@ -460,6 +459,10 @@ const char** Monitor::get_tracked_conf_keys() const
     "clog_to_syslog",
     "clog_to_syslog_facility",
     "clog_to_syslog_level",
+    "clog_to_graylog",
+    "clog_to_graylog_host",
+    "clog_to_graylog_port",
+    "fsid",
     // periodic health to clog
     "mon_health_to_clog",
     "mon_health_to_clog_interval",
@@ -481,7 +484,12 @@ void Monitor::handle_conf_change(const struct md_config_t *conf,
   if (changed.count("clog_to_monitors") ||
       changed.count("clog_to_syslog") ||
       changed.count("clog_to_syslog_level") ||
-      changed.count("clog_to_syslog_facility")) {
+      changed.count("clog_to_syslog_facility") ||
+      changed.count("clog_to_graylog") ||
+      changed.count("clog_to_graylog_host") ||
+      changed.count("clog_to_graylog_port") ||
+      changed.count("host") ||
+      changed.count("fsid")) {
     update_log_clients();
   }
 
@@ -502,15 +510,27 @@ void Monitor::update_log_clients()
   map<string,string> log_to_syslog;
   map<string,string> log_channel;
   map<string,string> log_prio;
+  map<string,string> log_to_graylog;
+  map<string,string> log_to_graylog_host;
+  map<string,string> log_to_graylog_port;
+  uuid_d fsid;
+  string host;
 
   if (parse_log_client_options(g_ceph_context, log_to_monitors, log_to_syslog,
-			       log_channel, log_prio))
+			       log_channel, log_prio, log_to_graylog,
+			       log_to_graylog_host, log_to_graylog_port,
+			       fsid, host))
     return;
 
   clog->update_config(log_to_monitors, log_to_syslog,
-		      log_channel, log_prio);
+		      log_channel, log_prio, log_to_graylog,
+		      log_to_graylog_host, log_to_graylog_port,
+		      fsid, host);
+
   audit_clog->update_config(log_to_monitors, log_to_syslog,
-			    log_channel, log_prio);
+			    log_channel, log_prio, log_to_graylog,
+			    log_to_graylog_host, log_to_graylog_port,
+			    fsid, host);
 }
 
 int Monitor::sanitize_options()
@@ -550,6 +570,7 @@ int Monitor::preinit()
   int r = sanitize_options();
   if (r < 0) {
     derr << "option sanitization failed!" << dendl;
+    lock.Unlock();
     return r;
   }
 
@@ -639,6 +660,7 @@ int Monitor::preinit()
               << "'mon_force_quorum_join' is set -- allowing boot" << dendl;
     } else {
       derr << "commit suicide!" << dendl;
+      lock.Unlock();
       return -ENOENT;
     }
   }
@@ -1053,7 +1075,9 @@ set<string> Monitor::get_sync_targets_names()
   targets.insert(paxos->get_name());
   for (int i = 0; i < PAXOS_NUM; ++i)
     paxos_service[i]->get_store_prefixes(targets);
-
+  ConfigKeyService *config_key_service_ptr = dynamic_cast<ConfigKeyService*>(config_key_service);
+  assert(config_key_service_ptr);
+  config_key_service_ptr->get_store_prefixes(targets);
   return targets;
 }
 
@@ -1675,7 +1699,7 @@ void Monitor::handle_probe_reply(MonOpRequestRef op)
 
   // rename peer?
   string peer_name = monmap->get_name(m->get_source_addr());
-  if (monmap->get_epoch() == 0 && peer_name.find("noname-") == 0) {
+  if (monmap->get_epoch() == 0 && peer_name.compare(0, 7, "noname-") == 0) {
     dout(10) << " renaming peer " << m->get_source_addr() << " "
 	     << peer_name << " -> " << m->name << " in my monmap"
 	     << dendl;
@@ -1722,7 +1746,7 @@ void Monitor::handle_probe_reply(MonOpRequestRef op)
   } else {
     if (paxos->get_version() < m->paxos_first_version &&
 	m->paxos_first_version > 1) {  // no need to sync if we're 0 and they start at 1.
-      dout(10) << " peer paxos versions [" << m->paxos_first_version
+      dout(10) << " peer paxos first versions [" << m->paxos_first_version
 	       << "," << m->paxos_last_version << "]"
 	       << " vs my version " << paxos->get_version()
 	       << " (too far ahead)"
@@ -1732,7 +1756,7 @@ void Monitor::handle_probe_reply(MonOpRequestRef op)
       return;
     }
     if (paxos->get_version() + g_conf->paxos_max_join_drift < m->paxos_last_version) {
-      dout(10) << " peer paxos version " << m->paxos_last_version
+      dout(10) << " peer paxos last version " << m->paxos_last_version
 	       << " vs my version " << paxos->get_version()
 	       << " (too far ahead)"
 	       << dendl;
@@ -2290,7 +2314,7 @@ health_status_t Monitor::get_health(list<string>& status,
        p != paxos_service.end();
        ++p) {
     PaxosService *s = *p;
-    s->get_health(summary, detailbl ? &detail : NULL);
+    s->get_health(summary, detailbl ? &detail : NULL, cct);
   }
 
   health_monitor->get_health(f, summary, (detailbl ? &detail : NULL));
@@ -3090,7 +3114,7 @@ void Monitor::forward_request_leader(MonOpRequestRef op)
 
 // fake connection attached to forwarded messages
 struct AnonConnection : public Connection {
-  AnonConnection(CephContext *cct) : Connection(cct, NULL) {}
+  explicit AnonConnection(CephContext *cct) : Connection(cct, NULL) {}
 
   int send_message(Message *m) override {
     assert(!"send_message on anonymous connection");
@@ -4691,7 +4715,7 @@ void Monitor::scrub_reset_timeout()
 class C_Mon_Tick : public Context {
   Monitor *mon;
 public:
-  C_Mon_Tick(Monitor *m) : mon(m) {}
+  explicit C_Mon_Tick(Monitor *m) : mon(m) {}
   void finish(int r) {
     mon->tick();
   }
diff --git a/src/mon/Monitor.h b/src/mon/Monitor.h
index 9c2ced0..d506eba 100644
--- a/src/mon/Monitor.h
+++ b/src/mon/Monitor.h
@@ -260,14 +260,14 @@ private:
 
   struct C_Scrub : public Context {
     Monitor *mon;
-    C_Scrub(Monitor *m) : mon(m) { }
+    explicit C_Scrub(Monitor *m) : mon(m) { }
     void finish(int r) {
       mon->scrub_start();
     }
   };
   struct C_ScrubTimeout : public Context {
     Monitor *mon;
-    C_ScrubTimeout(Monitor *m) : mon(m) { }
+    explicit C_ScrubTimeout(Monitor *m) : mon(m) { }
     void finish(int r) {
       mon->scrub_timeout();
     }
@@ -351,7 +351,7 @@ private:
 
   struct C_SyncTimeout : public Context {
     Monitor *mon;
-    C_SyncTimeout(Monitor *m) : mon(m) {}
+    explicit C_SyncTimeout(Monitor *m) : mon(m) {}
     void finish(int r) {
       mon->sync_timeout();
     }
@@ -509,7 +509,7 @@ private:
 
   struct C_TimeCheck : public Context {
     Monitor *mon;
-    C_TimeCheck(Monitor *m) : mon(m) { }
+    explicit C_TimeCheck(Monitor *m) : mon(m) { }
     void finish(int r) {
       mon->timecheck_start_round();
     }
@@ -565,7 +565,7 @@ private:
 
   struct C_ProbeTimeout : public Context {
     Monitor *mon;
-    C_ProbeTimeout(Monitor *m) : mon(m) {}
+    explicit C_ProbeTimeout(Monitor *m) : mon(m) {}
     void finish(int r) {
       mon->probe_timeout(r);
     }
@@ -714,7 +714,7 @@ public:
 
   struct C_HealthToClogTick : public Context {
     Monitor *mon;
-    C_HealthToClogTick(Monitor *m) : mon(m) { }
+    explicit C_HealthToClogTick(Monitor *m) : mon(m) { }
     void finish(int r) {
       if (r < 0)
         return;
@@ -725,7 +725,7 @@ public:
 
   struct C_HealthToClogInterval : public Context {
     Monitor *mon;
-    C_HealthToClogInterval(Monitor *m) : mon(m) { }
+    explicit C_HealthToClogInterval(Monitor *m) : mon(m) { }
     void finish(int r) {
       if (r < 0)
         return;
diff --git a/src/mon/MonitorDBStore.h b/src/mon/MonitorDBStore.h
index 082e171..cbb0b33 100644
--- a/src/mon/MonitorDBStore.h
+++ b/src/mon/MonitorDBStore.h
@@ -621,7 +621,7 @@ class MonitorDBStore
     return db->get_estimated_size(extras);
   }
 
-  MonitorDBStore(const string& path)
+  explicit MonitorDBStore(const string& path)
     : db(0),
       do_dump(false),
       dump_fd_binary(-1),
diff --git a/src/mon/MonmapMonitor.cc b/src/mon/MonmapMonitor.cc
index 900c291..d95455e 100644
--- a/src/mon/MonmapMonitor.cc
+++ b/src/mon/MonmapMonitor.cc
@@ -240,10 +240,6 @@ bool MonmapMonitor::preprocess_command(MonOpRequestRef op)
     if (p != mon->monmap)
        delete p;
   }
-  else if (prefix == "mon add")
-    return false;
-  else if (prefix == "mon remove")
-    return false;
 
 reply:
   if (r != -1) {
@@ -528,7 +524,8 @@ void MonmapMonitor::tick()
 }
 
 void MonmapMonitor::get_health(list<pair<health_status_t, string> >& summary,
-			       list<pair<health_status_t, string> > *detail) const
+			       list<pair<health_status_t, string> > *detail,
+			       CephContext *cct) const
 {
   int max = mon->monmap->size();
   int actual = mon->get_quorum().size();
@@ -582,16 +579,3 @@ int MonmapMonitor::get_monmap(bufferlist &bl)
   }
   return 0;
 }
-
-int MonmapMonitor::get_monmap(MonMap &m)
-{
-  dout(10) << __func__ << dendl;
-  bufferlist monmap_bl;
-
-  int err = get_monmap(monmap_bl);
-  if (err < 0) {
-    return err;
-  }
-  m.decode(monmap_bl);
-  return 0;
-}
diff --git a/src/mon/MonmapMonitor.h b/src/mon/MonmapMonitor.h
index f554092..ecfbe1e 100644
--- a/src/mon/MonmapMonitor.h
+++ b/src/mon/MonmapMonitor.h
@@ -68,10 +68,10 @@ class MonmapMonitor : public PaxosService {
   bool prepare_command(MonOpRequestRef op);
 
   void get_health(list<pair<health_status_t,string> >& summary,
-		  list<pair<health_status_t,string> > *detail) const;
+		  list<pair<health_status_t,string> > *detail,
+		  CephContext *cct) const override;
 
   int get_monmap(bufferlist &bl);
-  int get_monmap(MonMap &m);
 
   /*
    * Since monitors are pretty
diff --git a/src/mon/OSDMonitor.cc b/src/mon/OSDMonitor.cc
index 77e26de..42c58a0 100644
--- a/src/mon/OSDMonitor.cc
+++ b/src/mon/OSDMonitor.cc
@@ -781,7 +781,7 @@ public:
 protected:
   struct lowprecision_t {
     float v;
-    lowprecision_t(float _v) : v(_v) {}
+    explicit lowprecision_t(float _v) : v(_v) {}
   };
   friend std::ostream &operator<<(ostream& out, const lowprecision_t& v);
 
@@ -2769,7 +2769,8 @@ void OSDMonitor::mark_all_down()
 }
 
 void OSDMonitor::get_health(list<pair<health_status_t,string> >& summary,
-			    list<pair<health_status_t,string> > *detail) const
+			    list<pair<health_status_t,string> > *detail,
+			    CephContext *cct) const
 {
   int num_osds = osdmap.get_num_osds();
 
@@ -4973,6 +4974,19 @@ int OSDMonitor::prepare_command_pool_set(map<string,cmd_vartype> &cmdmap,
       ss << "crush ruleset " << n << " does not exist";
       return -ENOENT;
     }
+    const int64_t poolsize = p.get_size();
+    const int64_t minsize = osdmap.crush->get_rule_mask_min_size(n);
+    if (poolsize < minsize) {
+      ss << "pool size " << poolsize << " is smaller than crush ruleset " 
+         << n << " min size " << minsize;
+      return -EINVAL;
+    }
+    const int64_t maxsize = osdmap.crush->get_rule_mask_max_size(n);
+    if (poolsize > maxsize) {
+      ss << "pool size " << poolsize << " is bigger than crush ruleset " 
+         << n << " max size " << maxsize;
+      return -EINVAL;
+    }
     p.crush_ruleset = n;
   } else if (var == "hashpspool" || var == "nodelete" || var == "nopgchange" ||
 	     var == "nosizechange" || var == "write_fadvise_dontneed" ||
diff --git a/src/mon/OSDMonitor.h b/src/mon/OSDMonitor.h
index 7638b6a..3e50dae 100644
--- a/src/mon/OSDMonitor.h
+++ b/src/mon/OSDMonitor.h
@@ -55,7 +55,7 @@ struct failure_reporter_t {
   MonOpRequestRef op;       ///< failure op request
 
   failure_reporter_t() {}
-  failure_reporter_t(utime_t s) : failed_since(s) {}
+  explicit failure_reporter_t(utime_t s) : failed_since(s) {}
   ~failure_reporter_t() { }
 };
 
@@ -386,7 +386,6 @@ private:
   bool preprocess_remove_snaps(MonOpRequestRef op);
   bool prepare_remove_snaps(MonOpRequestRef op);
 
-  CephContext *cct;
   OpTracker op_tracker;
 
   int load_metadata(int osd, map<string, string>& m, ostream *err);
@@ -399,7 +398,8 @@ private:
   int parse_osd_id(const char *s, stringstream *pss);
 
   void get_health(list<pair<health_status_t,string> >& summary,
-		  list<pair<health_status_t,string> > *detail) const;
+		  list<pair<health_status_t,string> > *detail,
+		  CephContext *cct) const override;
   bool preprocess_command(MonOpRequestRef op);
   bool prepare_command(MonOpRequestRef op);
   bool prepare_command_impl(MonOpRequestRef op, map<string,cmd_vartype>& cmdmap);
diff --git a/src/mon/PGMap.cc b/src/mon/PGMap.cc
index ab8f8a9..b81db7c 100644
--- a/src/mon/PGMap.cc
+++ b/src/mon/PGMap.cc
@@ -914,7 +914,7 @@ void PGMap::dump_osd_sum_stats(ostream& ss) const
      << std::endl;
 }
 
-void PGMap::get_stuck_stats(int types, utime_t cutoff,
+void PGMap::get_stuck_stats(int types, const utime_t cutoff,
                             ceph::unordered_map<pg_t, pg_stat_t>& stuck_pgs) const
 {
   assert(types != 0);
@@ -955,6 +955,54 @@ void PGMap::get_stuck_stats(int types, utime_t cutoff,
   }
 }
 
+bool PGMap::get_stuck_counts(const utime_t cutoff, map<string, int>& note) const
+{
+  int inactive = 0;
+  int unclean = 0;
+  int degraded = 0;
+  int undersized = 0;
+  int stale = 0;
+
+  for (ceph::unordered_map<pg_t, pg_stat_t>::const_iterator i = pg_stat.begin();
+       i != pg_stat.end();
+       ++i) {
+
+    if (! (i->second.state & PG_STATE_ACTIVE)) {
+      if (i->second.last_active < cutoff)
+        ++inactive;
+    } else if (! (i->second.state & PG_STATE_CLEAN)) {
+      if (i->second.last_clean < cutoff)
+        ++unclean;
+    } else if (i->second.state & PG_STATE_DEGRADED) {
+      if (i->second.last_undegraded < cutoff)
+        ++degraded;
+    } else if (i->second.state & PG_STATE_UNDERSIZED) {
+      if (i->second.last_fullsized < cutoff)
+        ++undersized;
+    } else if (i->second.state & PG_STATE_STALE) {
+      if (i->second.last_unstale < cutoff)
+        ++stale;
+    }
+  }
+  
+  if (inactive)
+    note["stuck inactive"] = inactive;
+  
+  if (unclean)
+    note["stuck unclean"] = unclean;
+  
+  if (undersized)
+    note["stuck undersized"] = undersized;
+  
+  if (degraded)
+    note["stuck degraded"] = degraded;
+  
+  if (stale)
+    note["stuck stale"] = stale; 
+  
+  return inactive || unclean || undersized || degraded || stale;
+}
+
 void PGMap::dump_stuck(Formatter *f, int types, utime_t cutoff) const
 {
   ceph::unordered_map<pg_t, pg_stat_t> stuck_pg_stats;
@@ -1113,6 +1161,9 @@ void PGMap::recovery_rate_summary(Formatter *f, ostream *out,
       f->dump_int("recovering_objects_per_sec", objps);
       f->dump_int("recovering_bytes_per_sec", bps);
       f->dump_int("recovering_keys_per_sec", kps);
+      f->dump_int("num_objects_recovered", pos_delta.stats.sum.num_objects_recovered);
+      f->dump_int("num_bytes_recovered", pos_delta.stats.sum.num_bytes_recovered);
+      f->dump_int("num_keys_recovered", pos_delta.stats.sum.num_keys_recovered);
     } else {
       *out << pretty_si_t(bps) << "B/s";
       if (pos_delta.stats.sum.num_keys_recovered)
diff --git a/src/mon/PGMap.h b/src/mon/PGMap.h
index d2b9e8a..8c2b3ca 100644
--- a/src/mon/PGMap.h
+++ b/src/mon/PGMap.h
@@ -282,8 +282,9 @@ public:
   void dump_pg_stats_plain(ostream& ss,
 			   const ceph::unordered_map<pg_t, pg_stat_t>& pg_stats,
 			   bool brief) const;
-  void get_stuck_stats(int types, utime_t cutoff,
+  void get_stuck_stats(int types, const utime_t cutoff,
 		       ceph::unordered_map<pg_t, pg_stat_t>& stuck_pgs) const;
+  bool get_stuck_counts(const utime_t cutoff, map<string, int>& note) const;
   void dump_stuck(Formatter *f, int types, utime_t cutoff) const;
   void dump_stuck_plain(ostream& ss, int types, utime_t cutoff) const;
 
diff --git a/src/mon/PGMonitor.cc b/src/mon/PGMonitor.cc
index 6cfa82a..99e2287 100644
--- a/src/mon/PGMonitor.cc
+++ b/src/mon/PGMonitor.cc
@@ -1604,8 +1604,7 @@ bool PGMonitor::preprocess_command(MonOpRequestRef op)
     prefix = "pg ls";
     string poolstr;
     cmd_getval(g_ceph_context, cmdmap, "poolstr", poolstr);
-    int64_t pool = -2;
-    pool = mon->osdmon()->osdmap.lookup_pg_pool_name(poolstr.c_str());
+    int64_t pool = mon->osdmon()->osdmap.lookup_pg_pool_name(poolstr.c_str());
     if (pool < 0) {
       r = -ENOENT;
       ss << "pool " << poolstr << " does not exist";
@@ -1697,7 +1696,6 @@ bool PGMonitor::preprocess_command(MonOpRequestRef op)
 	}
 	if (what.count("pools")) {
 	  pg_map.dump_pool_stats(ds, header);
-	  header = false;
 	}
 	if (what.count("osds")) {
 	  pg_map.dump_osd_stats(ds);
@@ -1750,7 +1748,10 @@ bool PGMonitor::preprocess_command(MonOpRequestRef op)
                int64_t(g_conf->mon_pg_stuck_threshold));
 
     r = dump_stuck_pg_stats(ds, f.get(), (int)threshold, stuckop_vec);
-    ss << "ok";
+    if (r < 0)
+      ss << "failed";  
+    else 
+      ss << "ok";
     r = 0;
   } else if (prefix == "pg map") {
     pg_t pgid;
@@ -2031,8 +2032,82 @@ int PGMonitor::_warn_slow_request_histogram(const pow2_hist_t& h, string suffix,
   return sum;
 }
 
+namespace {
+  enum class scrubbed_or_deepscrubbed_t { SCRUBBED, DEEPSCRUBBED };
+
+  void print_unscrubbed_detailed(const std::pair<const pg_t,pg_stat_t> &pg_entry,
+				 list<pair<health_status_t,string> > *detail,
+				 scrubbed_or_deepscrubbed_t how_scrubbed) {
+
+    std::stringstream ss;
+    const auto& pg_stat(pg_entry.second);
+
+    ss << "pg " << pg_entry.first << " is not ";
+    if (how_scrubbed == scrubbed_or_deepscrubbed_t::SCRUBBED) {
+      ss << "scrubbed, last_scrub_stamp "
+	 << pg_stat.last_scrub_stamp;
+    } else if (how_scrubbed == scrubbed_or_deepscrubbed_t::DEEPSCRUBBED) {
+      ss << "deep-scrubbed, last_deep_scrub_stamp "
+	 << pg_stat.last_deep_scrub_stamp;
+    }
+
+    detail->push_back(make_pair(HEALTH_WARN, ss.str()));
+  }
+
+
+  using pg_stat_map_t = const ceph::unordered_map<pg_t,pg_stat_t>;
+
+  void print_unscrubbed_pgs(pg_stat_map_t& pg_stats,
+			    list<pair<health_status_t,string> > &summary,
+			    list<pair<health_status_t,string> > *detail,
+			    const CephContext* cct) {
+    int pgs_count = 0;
+    const utime_t now = ceph_clock_now(nullptr);
+    for (const auto& pg_entry : pg_stats) {
+      const auto& pg_stat(pg_entry.second);
+      const utime_t time_since_ls = now - pg_stat.last_scrub_stamp;
+      const utime_t time_since_lds = now - pg_stat.last_deep_scrub_stamp;
+
+      const int mon_warn_not_scrubbed =
+	cct->_conf->mon_warn_not_scrubbed + cct->_conf->mon_scrub_interval;
+
+      const int mon_warn_not_deep_scrubbed =
+	cct->_conf->mon_warn_not_deep_scrubbed + cct->_conf->mon_scrub_interval;
+
+      bool not_scrubbed = (time_since_ls >= mon_warn_not_scrubbed &&
+			   cct->_conf->mon_warn_not_scrubbed != 0);
+
+      bool not_deep_scrubbed = (time_since_lds >= mon_warn_not_deep_scrubbed &&
+				cct->_conf->mon_warn_not_deep_scrubbed != 0);
+
+      if (detail != nullptr) {
+	if (not_scrubbed) {
+	  print_unscrubbed_detailed(pg_entry,
+				    detail,
+				    scrubbed_or_deepscrubbed_t::SCRUBBED);
+	} else if (not_deep_scrubbed) {
+	  print_unscrubbed_detailed(pg_entry,
+				    detail,
+				    scrubbed_or_deepscrubbed_t::DEEPSCRUBBED);
+	}
+      }
+      if (not_scrubbed || not_deep_scrubbed) {
+	++pgs_count;
+      }
+    }
+
+    if (pgs_count > 0) {
+      std::stringstream ss;
+      ss << pgs_count << " unscrubbed pgs";
+      summary.push_back(make_pair(HEALTH_WARN, ss.str()));
+    }
+
+  }
+}
+
 void PGMonitor::get_health(list<pair<health_status_t,string> >& summary,
-                           list<pair<health_status_t,string> > *detail) const
+			   list<pair<health_status_t,string> > *detail,
+			   CephContext *cct) const
 {
   map<string,int> note;
   ceph::unordered_map<int,int>::const_iterator p = pg_map.num_pg_by_state.begin();
@@ -2061,7 +2136,7 @@ void PGMonitor::get_health(list<pair<health_status_t,string> >& summary,
     if (p->first & PG_STATE_INCOMPLETE)
       note["incomplete"] += p->second;
     if (p->first & PG_STATE_BACKFILL_WAIT)
-      note["backfill"] += p->second;
+      note["backfill_wait"] += p->second;
     if (p->first & PG_STATE_BACKFILL)
       note["backfilling"] += p->second;
     if (p->first & PG_STATE_BACKFILL_TOOFULL)
@@ -2071,44 +2146,67 @@ void PGMonitor::get_health(list<pair<health_status_t,string> >& summary,
   ceph::unordered_map<pg_t, pg_stat_t> stuck_pgs;
   utime_t now(ceph_clock_now(g_ceph_context));
   utime_t cutoff = now - utime_t(g_conf->mon_pg_stuck_threshold, 0);
+  uint64_t num_inactive_pgs = 0;
+  
+  if (detail) {
+    
+    // we need to collect details of stuck pgs, first do a quick check
+    // whether this will yield any results
+    if (pg_map.get_stuck_counts(cutoff, note)) {
+      
+      // there are stuck pgs. gather details for specified statuses
+      // only if we know that there are pgs stuck in that status
+      
+      if (note.find("stuck inactive") != note.end()) {
+        pg_map.get_stuck_stats(PGMap::STUCK_INACTIVE, cutoff, stuck_pgs);
+        note["stuck inactive"] = stuck_pgs.size();
+        num_inactive_pgs += stuck_pgs.size();
+        note_stuck_detail(PGMap::STUCK_INACTIVE, stuck_pgs, detail);
+        stuck_pgs.clear();
+      }
 
-  pg_map.get_stuck_stats(PGMap::STUCK_INACTIVE, cutoff, stuck_pgs);
-  if (!stuck_pgs.empty()) {
-    note["stuck inactive"] = stuck_pgs.size();
-    if (detail)
-      note_stuck_detail(PGMap::STUCK_INACTIVE, stuck_pgs, detail);
-  }
-  stuck_pgs.clear();
+      if (note.find("stuck unclean") != note.end()) {
+        pg_map.get_stuck_stats(PGMap::STUCK_UNCLEAN, cutoff, stuck_pgs);
+        note["stuck unclean"] = stuck_pgs.size();
+        note_stuck_detail(PGMap::STUCK_UNCLEAN, stuck_pgs, detail);
+        stuck_pgs.clear();
+      }
 
-  pg_map.get_stuck_stats(PGMap::STUCK_UNCLEAN, cutoff, stuck_pgs);
-  if (!stuck_pgs.empty()) {
-    note["stuck unclean"] = stuck_pgs.size();
-    if (detail)
-      note_stuck_detail(PGMap::STUCK_UNCLEAN, stuck_pgs, detail);
-  }
-  stuck_pgs.clear();
+      if (note.find("stuck undersized") != note.end()) {
+        pg_map.get_stuck_stats(PGMap::STUCK_UNDERSIZED, cutoff, stuck_pgs);
+        note["stuck undersized"] = stuck_pgs.size();
+        note_stuck_detail(PGMap::STUCK_UNDERSIZED, stuck_pgs, detail);
+        stuck_pgs.clear();
+      }
 
-  pg_map.get_stuck_stats(PGMap::STUCK_UNDERSIZED, cutoff, stuck_pgs);
-  if (!stuck_pgs.empty()) {
-    note["stuck undersized"] = stuck_pgs.size();
-    if (detail)
-      note_stuck_detail(PGMap::STUCK_UNDERSIZED, stuck_pgs, detail);
-  }
-  stuck_pgs.clear();
+      if (note.find("stuck degraded") != note.end()) {
+        pg_map.get_stuck_stats(PGMap::STUCK_DEGRADED, cutoff, stuck_pgs);
+        note["stuck degraded"] = stuck_pgs.size();
+        note_stuck_detail(PGMap::STUCK_DEGRADED, stuck_pgs, detail);
+        stuck_pgs.clear();
+      }
 
-  pg_map.get_stuck_stats(PGMap::STUCK_DEGRADED, cutoff, stuck_pgs);
-  if (!stuck_pgs.empty()) {
-    note["stuck degraded"] = stuck_pgs.size();
-    if (detail)
-      note_stuck_detail(PGMap::STUCK_DEGRADED, stuck_pgs, detail);
+      if (note.find("stuck stale") != note.end()) {
+        pg_map.get_stuck_stats(PGMap::STUCK_STALE, cutoff, stuck_pgs);
+        note["stuck stale"] = stuck_pgs.size();
+        num_inactive_pgs += stuck_pgs.size();
+        note_stuck_detail(PGMap::STUCK_STALE, stuck_pgs, detail);
+      }
+    }
+  } else {
+    pg_map.get_stuck_counts(cutoff, note);
+    map<string,int>::const_iterator p = note.find("stuck inactive");
+    if (p != note.end()) 
+      num_inactive_pgs += p->second;
+    p = note.find("stuck stale");
+    if (p != note.end()) 
+      num_inactive_pgs += p->second;
   }
-  stuck_pgs.clear();
 
-  pg_map.get_stuck_stats(PGMap::STUCK_STALE, cutoff, stuck_pgs);
-  if (!stuck_pgs.empty()) {
-    note["stuck stale"] = stuck_pgs.size();
-    if (detail)
-      note_stuck_detail(PGMap::STUCK_STALE, stuck_pgs, detail);
+  if (g_conf->mon_pg_min_inactive > 0 && num_inactive_pgs >= g_conf->mon_pg_min_inactive) {
+    ostringstream ss;
+    ss << num_inactive_pgs << " pgs are stuck inactive for more than " << g_conf->mon_pg_stuck_threshold << " seconds";
+    summary.push_back(make_pair(HEALTH_ERR, ss.str()));
   }
 
   if (!note.empty()) {
@@ -2310,6 +2408,9 @@ void PGMonitor::get_health(list<pair<health_status_t,string> >& summary,
       }
     }
   }
+
+  print_unscrubbed_pgs(pg_map.pg_stat, summary, detail, cct);
+
 }
 
 void PGMonitor::check_full_osd_health(list<pair<health_status_t,string> >& summary,
@@ -2353,7 +2454,7 @@ int PGMonitor::dump_stuck_pg_stats(stringstream &ds,
       stuck_types |= PGMap::STUCK_STALE;
     else {
       ds << "Unknown type: " << *i << std::endl;
-      return 0;
+      return -EINVAL;
     }
   }
 
diff --git a/src/mon/PGMonitor.h b/src/mon/PGMonitor.h
index e2d2095..c5af197 100644
--- a/src/mon/PGMonitor.h
+++ b/src/mon/PGMonitor.h
@@ -207,7 +207,8 @@ public:
 				   list<pair<health_status_t,string> > *detail) const;
 
   void get_health(list<pair<health_status_t,string> >& summary,
-		  list<pair<health_status_t,string> > *detail) const;
+		  list<pair<health_status_t,string> > *detail,
+		  CephContext *cct) const override;
   void check_full_osd_health(list<pair<health_status_t,string> >& summary,
 			     list<pair<health_status_t,string> > *detail,
 			     const set<int>& s, const char *desc, health_status_t sev) const;
diff --git a/src/mon/Paxos.cc b/src/mon/Paxos.cc
index dfab30a..52b0bda 100644
--- a/src/mon/Paxos.cc
+++ b/src/mon/Paxos.cc
@@ -809,7 +809,7 @@ void Paxos::accept_timeout()
 
 struct C_Committed : public Context {
   Paxos *paxos;
-  C_Committed(Paxos *p) : paxos(p) {}
+  explicit C_Committed(Paxos *p) : paxos(p) {}
   void finish(int r) {
     assert(r >= 0);
     Mutex::Locker l(paxos->mon->lock);
diff --git a/src/mon/Paxos.h b/src/mon/Paxos.h
index 4f57027..052cd76 100644
--- a/src/mon/Paxos.h
+++ b/src/mon/Paxos.h
@@ -629,7 +629,7 @@ private:
   class C_CollectTimeout : public Context {
     Paxos *paxos;
   public:
-    C_CollectTimeout(Paxos *p) : paxos(p) {}
+    explicit C_CollectTimeout(Paxos *p) : paxos(p) {}
     void finish(int r) {
       if (r == -ECANCELED)
 	return;
@@ -643,7 +643,7 @@ private:
   class C_AcceptTimeout : public Context {
     Paxos *paxos;
   public:
-    C_AcceptTimeout(Paxos *p) : paxos(p) {}
+    explicit C_AcceptTimeout(Paxos *p) : paxos(p) {}
     void finish(int r) {
       if (r == -ECANCELED)
 	return;
@@ -657,7 +657,7 @@ private:
   class C_LeaseAckTimeout : public Context {
     Paxos *paxos;
   public:
-    C_LeaseAckTimeout(Paxos *p) : paxos(p) {}
+    explicit C_LeaseAckTimeout(Paxos *p) : paxos(p) {}
     void finish(int r) {
       if (r == -ECANCELED)
 	return;
@@ -671,7 +671,7 @@ private:
   class C_LeaseTimeout : public Context {
     Paxos *paxos;
   public:
-    C_LeaseTimeout(Paxos *p) : paxos(p) {}
+    explicit C_LeaseTimeout(Paxos *p) : paxos(p) {}
     void finish(int r) {
       if (r == -ECANCELED)
 	return;
@@ -685,7 +685,7 @@ private:
   class C_LeaseRenew : public Context {
     Paxos *paxos;
   public:
-    C_LeaseRenew(Paxos *p) : paxos(p) {}
+    explicit C_LeaseRenew(Paxos *p) : paxos(p) {}
     void finish(int r) {
       if (r == -ECANCELED)
 	return;
@@ -696,7 +696,7 @@ private:
   class C_Trimmed : public Context {
     Paxos *paxos;
   public:
-    C_Trimmed(Paxos *p) : paxos(p) { }
+    explicit C_Trimmed(Paxos *p) : paxos(p) { }
     void finish(int r) {
       paxos->trimming = false;
     }
diff --git a/src/mon/PaxosService.cc b/src/mon/PaxosService.cc
index 141a28b..ab169b8 100644
--- a/src/mon/PaxosService.cc
+++ b/src/mon/PaxosService.cc
@@ -388,6 +388,9 @@ void PaxosService::trim(MonitorDBStore::TransactionRef t,
   if (g_conf->mon_compact_on_trim) {
     dout(20) << " compacting prefix " << get_service_name() << dendl;
     t->compact_range(get_service_name(), stringify(from - 1), stringify(to));
+    t->compact_range(get_service_name(),
+		     mon->store->combine_strings(full_prefix_name, from - 1),
+		     mon->store->combine_strings(full_prefix_name, to));
   }
 }
 
diff --git a/src/mon/PaxosService.h b/src/mon/PaxosService.h
index 87bf04b..77c6225 100644
--- a/src/mon/PaxosService.h
+++ b/src/mon/PaxosService.h
@@ -128,7 +128,7 @@ protected:
   class C_Active : public Context {
     PaxosService *svc;
   public:
-    C_Active(PaxosService *s) : svc(s) {}
+    explicit C_Active(PaxosService *s) : svc(s) {}
     void finish(int r) {
       if (r >= 0)
 	svc->_active();
@@ -142,7 +142,7 @@ protected:
   class C_Propose : public Context {
     PaxosService *ps;
   public:
-    C_Propose(PaxosService *p) : ps(p) { }
+    explicit C_Propose(PaxosService *p) : ps(p) { }
     void finish(int r) {
       ps->proposal_timer = 0;
       if (r >= 0)
@@ -167,7 +167,7 @@ protected:
   class C_Committed : public Context {
     PaxosService *ps;
   public:
-    C_Committed(PaxosService *p) : ps(p) { }
+    explicit C_Committed(PaxosService *p) : ps(p) { }
     void finish(int r) {
       ps->proposing = false;
       if (r >= 0)
@@ -487,7 +487,8 @@ public:
    * @param detail optional list of detailed problem reports; may be NULL
    */
   virtual void get_health(list<pair<health_status_t,string> >& summary,
-			  list<pair<health_status_t,string> > *detail) const { }
+			  list<pair<health_status_t,string> > *detail,
+			  CephContext *cct) const { }
 
  private:
   /**
diff --git a/src/mon/mon_types.h b/src/mon/mon_types.h
index a720075..08dd11f 100644
--- a/src/mon/mon_types.h
+++ b/src/mon/mon_types.h
@@ -212,7 +212,7 @@ struct C_MonOp : public Context
 {
   MonOpRequestRef op;
 
-  C_MonOp(MonOpRequestRef o) :
+  explicit C_MonOp(MonOpRequestRef o) :
     op(o) { }
 
   void finish(int r) {
diff --git a/src/msg/Dispatcher.h b/src/msg/Dispatcher.h
index d6868ed..f7de0de 100644
--- a/src/msg/Dispatcher.h
+++ b/src/msg/Dispatcher.h
@@ -16,6 +16,7 @@
 #ifndef CEPH_DISPATCHER_H
 #define CEPH_DISPATCHER_H
 
+#include "include/assert.h"
 #include "include/buffer_fwd.h"
 #include "include/assert.h"
 
@@ -28,7 +29,7 @@ class CephContext;
 
 class Dispatcher {
 public:
-  Dispatcher(CephContext *cct_)
+  explicit Dispatcher(CephContext *cct_)
     : cct(cct_)
   {
   }
diff --git a/src/msg/Message.h b/src/msg/Message.h
index 9eddce1..a79dbaa 100644
--- a/src/msg/Message.h
+++ b/src/msg/Message.h
@@ -231,7 +231,7 @@ public:
     Message *m;
     friend class Message;
   public:
-    CompletionHook(Message *_m) : m(_m) {}
+    explicit CompletionHook(Message *_m) : m(_m) {}
     virtual void set_message(Message *_m) { m = _m; }
   };
 
diff --git a/src/msg/async/AsyncConnection.cc b/src/msg/async/AsyncConnection.cc
index e9a80d7..9398e91 100644
--- a/src/msg/async/AsyncConnection.cc
+++ b/src/msg/async/AsyncConnection.cc
@@ -50,7 +50,7 @@ class C_time_wakeup : public EventCallback {
   AsyncConnectionRef conn;
 
  public:
-  C_time_wakeup(AsyncConnectionRef c): conn(c) {}
+  explicit C_time_wakeup(AsyncConnectionRef c): conn(c) {}
   void do_request(int fd_or_id) {
     conn->wakeup_from(fd_or_id);
   }
@@ -60,7 +60,7 @@ class C_handle_read : public EventCallback {
   AsyncConnectionRef conn;
 
  public:
-  C_handle_read(AsyncConnectionRef c): conn(c) {}
+  explicit C_handle_read(AsyncConnectionRef c): conn(c) {}
   void do_request(int fd_or_id) {
     conn->process();
   }
@@ -70,7 +70,7 @@ class C_handle_write : public EventCallback {
   AsyncConnectionRef conn;
 
  public:
-  C_handle_write(AsyncConnectionRef c): conn(c) {}
+  explicit C_handle_write(AsyncConnectionRef c): conn(c) {}
   void do_request(int fd) {
     conn->handle_write();
   }
@@ -106,6 +106,7 @@ class C_handle_dispatch : public EventCallback {
   C_handle_dispatch(AsyncMessenger *msgr, Message *m): msgr(msgr), m(m) {}
   void do_request(int id) {
     msgr->ms_deliver_dispatch(m);
+    delete this;
   }
 };
 
@@ -128,13 +129,14 @@ class C_deliver_accept : public EventCallback {
   C_deliver_accept(AsyncMessenger *msgr, AsyncConnectionRef c): msgr(msgr), conn(c) {}
   void do_request(int id) {
     msgr->ms_deliver_handle_accept(conn.get());
+    delete this;
   }
 };
 
 class C_local_deliver : public EventCallback {
   AsyncConnectionRef conn;
  public:
-  C_local_deliver(AsyncConnectionRef c): conn(c) {}
+  explicit C_local_deliver(AsyncConnectionRef c): conn(c) {}
   void do_request(int id) {
     conn->local_deliver();
   }
@@ -144,7 +146,7 @@ class C_local_deliver : public EventCallback {
 class C_clean_handler : public EventCallback {
   AsyncConnectionRef conn;
  public:
-  C_clean_handler(AsyncConnectionRef c): conn(c) {}
+  explicit C_clean_handler(AsyncConnectionRef c): conn(c) {}
   void do_request(int id) {
     conn->cleanup_handler();
     delete this;
@@ -159,19 +161,16 @@ static void alloc_aligned_buffer(bufferlist& data, unsigned len, unsigned off)
     // head
     unsigned head = 0;
     head = MIN(CEPH_PAGE_SIZE - (off & ~CEPH_PAGE_MASK), left);
-    bufferptr bp = buffer::create(head);
-    data.push_back(bp);
+    data.push_back(buffer::create(head));
     left -= head;
   }
   unsigned middle = left & CEPH_PAGE_MASK;
   if (middle > 0) {
-    bufferptr bp = buffer::create_page_aligned(middle);
-    data.push_back(bp);
+    data.push_back(buffer::create_page_aligned(middle));
     left -= middle;
   }
   if (left) {
-    bufferptr bp = buffer::create(left);
-    data.push_back(bp);
+    data.push_back(buffer::create(left));
   }
 }
 
@@ -302,9 +301,9 @@ ssize_t AsyncConnection::do_sendmsg(struct msghdr &msg, unsigned len, bool more)
   while (len > 0) {
     ssize_t r;
 #if defined(MSG_NOSIGNAL)
-    r = ::sendmsg(sd, &msg, MSG_NOSIGNAL);
+    r = ::sendmsg(sd, &msg, MSG_NOSIGNAL | (more ? MSG_MORE : 0));
 #else
-    r = ::sendmsg(sd, &msg, 0);
+    r = ::sendmsg(sd, &msg, (more ? MSG_MORE : 0));
 #endif /* defined(MSG_NOSIGNAL) */
 
     if (r == 0) {
@@ -316,6 +315,7 @@ ssize_t AsyncConnection::do_sendmsg(struct msghdr &msg, unsigned len, bool more)
         break;
       } else {
         ldout(async_msgr->cct, 1) << __func__ << " sendmsg error: " << cpp_strerror(errno) << dendl;
+        restore_sigpipe();
         return r;
       }
     }
@@ -337,21 +337,18 @@ ssize_t AsyncConnection::do_sendmsg(struct msghdr &msg, unsigned len, bool more)
         break;
       }
     }
-    restore_sigpipe();
   }
+  restore_sigpipe();
   return (ssize_t)len;
 }
 
 // return the remaining bytes, it may larger than the length of ptr
 // else return < 0 means error
-ssize_t AsyncConnection::_try_send(bufferlist &send_bl, bool send)
+ssize_t AsyncConnection::_try_send(bufferlist &send_bl, bool send, bool more)
 {
   ldout(async_msgr->cct, 20) << __func__ << " send bl length is " << send_bl.length() << dendl;
   if (send_bl.length()) {
-    if (outcoming_bl.length())
-      outcoming_bl.claim_append(send_bl);
-    else
-      outcoming_bl.swap(send_bl);
+    outcoming_bl.claim_append(send_bl);
   }
 
   if (!send)
@@ -384,7 +381,7 @@ ssize_t AsyncConnection::_try_send(bufferlist &send_bl, bool send)
       size--;
     }
 
-    ssize_t r = do_sendmsg(msg, msglen, false);
+    ssize_t r = do_sendmsg(msg, msglen, left_pbrs || more);
     if (r < 0)
       return r;
 
@@ -402,9 +399,12 @@ ssize_t AsyncConnection::_try_send(bufferlist &send_bl, bool send)
   // trim already sent for outcoming_bl
   if (sent_bytes) {
     bufferlist bl;
-    if (sent_bytes < outcoming_bl.length())
+    if (sent_bytes < outcoming_bl.length()) {
       outcoming_bl.splice(sent_bytes, outcoming_bl.length()-sent_bytes, &bl);
-    bl.swap(outcoming_bl);
+      bl.swap(outcoming_bl);
+    } else {
+      outcoming_bl.clear();
+    }
   }
 
   ldout(async_msgr->cct, 20) << __func__ << " sent bytes " << sent_bytes
@@ -724,10 +724,9 @@ void AsyncConnection::process()
           // read front
           unsigned front_len = current_header.front_len;
           if (front_len) {
-            if (!front.length()) {
-              bufferptr ptr = buffer::create(front_len);
-              front.push_back(ptr);
-            }
+            if (!front.length())
+              front.push_back(buffer::create(front_len));
+
             r = read_until(front_len, front.c_str());
             if (r < 0) {
               ldout(async_msgr->cct, 1) << __func__ << " read message front failed" << dendl;
@@ -747,10 +746,9 @@ void AsyncConnection::process()
           // read middle
           unsigned middle_len = current_header.middle_len;
           if (middle_len) {
-            if (!middle.length()) {
-              bufferptr ptr = buffer::create(middle_len);
-              middle.push_back(ptr);
-            }
+            if (!middle.length())
+              middle.push_back(buffer::create(middle_len));
+
             r = read_until(middle_len, middle.c_str());
             if (r < 0) {
               ldout(async_msgr->cct, 1) << __func__ << " read message middle failed" << dendl;
@@ -2004,6 +2002,7 @@ void AsyncConnection::accept(int incoming)
   ldout(async_msgr->cct, 10) << __func__ << " sd=" << incoming << dendl;
   assert(sd < 0);
 
+  Mutex::Locker l(lock);
   sd = incoming;
   state = STATE_ACCEPTING;
   center->create_file_event(sd, EVENT_READABLE, read_handler);
@@ -2023,11 +2022,17 @@ int AsyncConnection::send_message(Message *m)
   m->set_connection(this);
 
   if (async_msgr->get_myaddr() == get_peer_addr()) { //loopback connection
-   ldout(async_msgr->cct, 20) << __func__ << " " << *m << " local" << dendl;
-   Mutex::Locker l(write_lock);
-   local_messages.push_back(m);
-   center->dispatch_event_external(local_deliver_handler);
-   return 0;
+    ldout(async_msgr->cct, 20) << __func__ << " " << *m << " local" << dendl;
+    Mutex::Locker l(write_lock);
+    if (can_write != CLOSED) {
+      local_messages.push_back(m);
+      center->dispatch_event_external(local_deliver_handler);
+    } else {
+      ldout(async_msgr->cct, 10) << __func__ << " loopback connection closed."
+                                 << " Drop message " << m << dendl;
+      m->put();
+    }
+    return 0;
   }
 
   // we don't want to consider local message here, it's too lightweight which
@@ -2052,9 +2057,10 @@ int AsyncConnection::send_message(Message *m)
     ldout(async_msgr->cct, 5) << __func__ << " clear encoded buffer, can_write=" << can_write << " previous "
                               << f << " != " << get_features() << dendl;
   }
-  if (!is_queued() && can_write == CANWRITE) {
+  if (!is_queued() && can_write == CANWRITE && async_msgr->cct->_conf->ms_async_send_inline) {
     if (!can_fast_prepare)
       prepare_send_message(get_features(), m, bl);
+    logger->inc(l_msgr_send_messages_inline);
     if (write_message(m, bl) < 0) {
       ldout(async_msgr->cct, 1) << __func__ << " send msg failed" << dendl;
       // we want to handle fault within internal thread
@@ -2065,7 +2071,7 @@ int AsyncConnection::send_message(Message *m)
                                << " Drop message " << m << dendl;
     m->put();
   } else {
-    out_q[m->get_priority()].push_back(make_pair(bl, m));
+    out_q[m->get_priority()].emplace_back(std::move(bl), m);
     ldout(async_msgr->cct, 15) << __func__ << " inline write is denied, reschedule m=" << m << dendl;
     center->dispatch_event_external(write_handler);
   }
@@ -2449,7 +2455,7 @@ void AsyncConnection::_send_keepalive_or_ack(bool ack, utime_t *tp)
   }
 
   ldout(async_msgr->cct, 10) << __func__ << " try send keepalive or ack" << dendl;
-  _try_send(bl, false);
+  _try_send(bl, false, true);
 }
 
 void AsyncConnection::handle_write()
@@ -2493,7 +2499,7 @@ void AsyncConnection::handle_write()
       bl.append((char*)&s, sizeof(s));
       ldout(async_msgr->cct, 10) << __func__ << " try send msg ack, acked " << left << " messages" << dendl;
       ack_left.sub(left);
-      r = _try_send(bl);
+      r = _try_send(bl, true, true);
     } else if (is_queued()) {
       r = _try_send(bl);
     }
diff --git a/src/msg/async/AsyncConnection.h b/src/msg/async/AsyncConnection.h
index 40a9887..049c2af 100644
--- a/src/msg/async/AsyncConnection.h
+++ b/src/msg/async/AsyncConnection.h
@@ -49,13 +49,13 @@ class AsyncConnection : public Connection {
   void suppress_sigpipe();
   void restore_sigpipe();
   ssize_t do_sendmsg(struct msghdr &msg, unsigned len, bool more);
-  ssize_t try_send(bufferlist &bl, bool send=true) {
+  ssize_t try_send(bufferlist &bl, bool send=true, bool more=false) {
     Mutex::Locker l(write_lock);
-    return _try_send(bl, send);
+    return _try_send(bl, send, more);
   }
   // if "send" is false, it will only append bl to send buffer
   // the main usage is avoid error happen outside messenger threads
-  ssize_t _try_send(bufferlist &bl, bool send=true);
+  ssize_t _try_send(bufferlist &bl, bool send=true, bool more=false);
   ssize_t _send(Message *m);
   void prepare_send_message(uint64_t features, Message *m, bufferlist &bl);
   ssize_t read_until(unsigned needed, char *p);
@@ -74,7 +74,7 @@ class AsyncConnection : public Connection {
   void _send_keepalive_or_ack(bool ack=false, utime_t *t=NULL);
   ssize_t write_message(Message *m, bufferlist& bl);
   ssize_t _reply_accept(char tag, ceph_msg_connect &connect, ceph_msg_connect_reply &reply,
-                    bufferlist authorizer_reply) {
+                    bufferlist &authorizer_reply) {
     bufferlist reply_bl;
     reply.tag = tag;
     reply.features = ((uint64_t)connect.features & policy.features_supported) | policy.features_required;
diff --git a/src/msg/async/AsyncMessenger.cc b/src/msg/async/AsyncMessenger.cc
index 31158a4..b54db4e 100644
--- a/src/msg/async/AsyncMessenger.cc
+++ b/src/msg/async/AsyncMessenger.cc
@@ -286,7 +286,7 @@ void *Worker::entry()
     }
   }
 
-  center.set_owner(pthread_self());
+  center.set_owner();
   while (!done) {
     ldout(cct, 20) << __func__ << " calling event process" << dendl;
 
@@ -741,6 +741,7 @@ void AsyncMessenger::learned_addr(const entity_addr_t &peer_addr_for_me)
 
 int AsyncMessenger::reap_dead()
 {
+  ldout(cct, 1) << __func__ << " start" << dendl;
   int num = 0;
 
   Mutex::Locker l1(lock);
diff --git a/src/msg/async/AsyncMessenger.h b/src/msg/async/AsyncMessenger.h
index 5206a81..deea67c 100644
--- a/src/msg/async/AsyncMessenger.h
+++ b/src/msg/async/AsyncMessenger.h
@@ -45,6 +45,7 @@ enum {
   l_msgr_first = 94000,
   l_msgr_recv_messages,
   l_msgr_send_messages,
+  l_msgr_send_messages_inline,
   l_msgr_recv_bytes,
   l_msgr_send_bytes,
   l_msgr_created_connections,
@@ -74,6 +75,7 @@ class Worker : public Thread {
 
     plb.add_u64_counter(l_msgr_recv_messages, "msgr_recv_messages", "Network received messages");
     plb.add_u64_counter(l_msgr_send_messages, "msgr_send_messages", "Network sent messages");
+    plb.add_u64_counter(l_msgr_send_messages_inline, "msgr_send_messages_inline", "Network sent inline messages");
     plb.add_u64_counter(l_msgr_recv_bytes, "msgr_recv_bytes", "Network received bytes");
     plb.add_u64_counter(l_msgr_send_bytes, "msgr_send_bytes", "Network received bytes");
     plb.add_u64_counter(l_msgr_created_connections, "msgr_active_connections", "Active connection number");
@@ -109,7 +111,7 @@ class Processor {
     Processor *pro;
 
    public:
-    C_processor_accept(Processor *p): pro(p) {}
+    explicit C_processor_accept(Processor *p): pro(p) {}
     void do_request(int id) {
       pro->accept();
     }
@@ -143,7 +145,7 @@ class WorkerPool {
   class C_barrier : public EventCallback {
     WorkerPool *pool;
    public:
-    C_barrier(WorkerPool *p): pool(p) {}
+    explicit C_barrier(WorkerPool *p): pool(p) {}
     void do_request(int id) {
       Mutex::Locker l(pool->barrier_lock);
       pool->barrier_count.dec();
@@ -153,7 +155,7 @@ class WorkerPool {
   };
   friend class C_barrier;
  public:
-  WorkerPool(CephContext *c);
+  explicit WorkerPool(CephContext *c);
   virtual ~WorkerPool();
   void start();
   Worker *get_worker() {
@@ -332,7 +334,7 @@ private:
     AsyncMessenger *msgr;
 
    public:
-    C_handle_reap(AsyncMessenger *m): msgr(m) {}
+    explicit C_handle_reap(AsyncMessenger *m): msgr(m) {}
     void do_request(int id) {
       // judge whether is a time event
       msgr->reap_dead();
diff --git a/src/msg/async/Event.cc b/src/msg/async/Event.cc
index 04887b8..00abef4 100644
--- a/src/msg/async/Event.cc
+++ b/src/msg/async/Event.cc
@@ -62,6 +62,8 @@ ostream& EventCenter::_event_prefix(std::ostream *_dout)
                 << " time_id=" << time_event_next_id << ").";
 }
 
+static thread_local pthread_t thread_id = 0;
+
 int EventCenter::init(int n)
 {
   // can't init multi times
@@ -126,6 +128,12 @@ EventCenter::~EventCenter()
     free(file_events);
 }
 
+
+void EventCenter::set_owner()
+{
+  thread_id = owner = pthread_self();
+}
+
 int EventCenter::create_file_event(int fd, int mask, EventCallbackRef ctxt)
 {
   int r = 0;
@@ -181,8 +189,8 @@ void EventCenter::delete_file_event(int fd, int mask)
 {
   assert(fd >= 0);
   Mutex::Locker l(file_lock);
-  if (fd > nevent) {
-    ldout(cct, 1) << __func__ << " delete event fd=" << fd << " exceed nevent=" << nevent
+  if (fd >= nevent) {
+    ldout(cct, 1) << __func__ << " delete event fd=" << fd << " is equal or greater than nevent=" << nevent
                   << "mask=" << mask << dendl;
     return ;
   }
@@ -334,15 +342,21 @@ int EventCenter::process_events(int timeout_microseconds)
   int numevents;
   bool trigger_time = false;
 
-  utime_t period, shortest, now = ceph_clock_now(cct);
-  now.copy_to_timeval(&tv);
-  if (timeout_microseconds > 0) {
-    tv.tv_sec += timeout_microseconds / 1000000;
-    tv.tv_usec += timeout_microseconds % 1000000;
-  }
-  shortest.set_from_timeval(&tv);
+  utime_t now = ceph_clock_now(cct);;
+  // If exists external events, don't block
+  if (external_num_events.read()) {
+    tv.tv_sec = 0;
+    tv.tv_usec = 0;
+    next_time = now;
+  } else {
+    utime_t period, shortest;
+    now.copy_to_timeval(&tv);
+    if (timeout_microseconds > 0) {
+      tv.tv_sec += timeout_microseconds / 1000000;
+      tv.tv_usec += timeout_microseconds % 1000000;
+    }
+    shortest.set_from_timeval(&tv);
 
-  {
     Mutex::Locker l(time_lock);
     map<utime_t, list<TimeEvent> >::iterator it = time_events.begin();
     if (it != time_events.end() && shortest >= it->first) {
@@ -360,11 +374,11 @@ int EventCenter::process_events(int timeout_microseconds)
       tv.tv_sec = timeout_microseconds / 1000000;
       tv.tv_usec = timeout_microseconds % 1000000;
     }
+    next_time = shortest;
   }
 
   ldout(cct, 10) << __func__ << " wait second " << tv.tv_sec << " usec " << tv.tv_usec << dendl;
   vector<FiredFileEvent> fired_events;
-  next_time = shortest;
   numevents = driver->event_wait(fired_events, &tv);
   file_lock.Lock();
   for (int j = 0; j < numevents; j++) {
@@ -402,18 +416,21 @@ int EventCenter::process_events(int timeout_microseconds)
   if (trigger_time)
     numevents += process_time_events();
 
-  external_lock.Lock();
-  if (external_events.empty()) {
-    external_lock.Unlock();
-  } else {
-    deque<EventCallbackRef> cur_process;
-    cur_process.swap(external_events);
-    external_lock.Unlock();
-    while (!cur_process.empty()) {
-      EventCallbackRef e = cur_process.front();
-      if (e)
-        e->do_request(0);
-      cur_process.pop_front();
+  if (external_num_events.read()) {
+    external_lock.Lock();
+    if (external_events.empty()) {
+      external_lock.Unlock();
+    } else {
+      deque<EventCallbackRef> cur_process;
+      cur_process.swap(external_events);
+      external_num_events.set(0);
+      external_lock.Unlock();
+      while (!cur_process.empty()) {
+        EventCallbackRef e = cur_process.front();
+        if (e)
+          e->do_request(0);
+        cur_process.pop_front();
+      }
     }
   }
   return numevents;
@@ -423,6 +440,10 @@ void EventCenter::dispatch_event_external(EventCallbackRef e)
 {
   external_lock.Lock();
   external_events.push_back(e);
+  uint64_t num = external_num_events.inc();
   external_lock.Unlock();
-  wakeup();
+  if (thread_id != owner)
+    wakeup();
+
+  ldout(cct, 10) << __func__ << " " << e << " pending " << num << dendl;
 }
diff --git a/src/msg/async/Event.h b/src/msg/async/Event.h
index 2575130..526f8b3 100644
--- a/src/msg/async/Event.h
+++ b/src/msg/async/Event.h
@@ -103,6 +103,7 @@ class EventCenter {
   int nevent;
   // Used only to external event
   Mutex external_lock, file_lock, time_lock;
+  atomic_t external_num_events;
   deque<EventCallbackRef> external_events;
   FileEvent *file_events;
   EventDriver *driver;
@@ -127,11 +128,12 @@ class EventCenter {
  public:
   atomic_t already_wakeup;
 
-  EventCenter(CephContext *c):
+  explicit EventCenter(CephContext *c):
     cct(c), nevent(0),
     external_lock("AsyncMessenger::external_lock"),
     file_lock("AsyncMessenger::file_lock"),
     time_lock("AsyncMessenger::time_lock"),
+    external_num_events(0),
     file_events(NULL),
     driver(NULL), time_event_next_id(1),
     notify_receive_fd(-1), notify_send_fd(-1), net(c), owner(0), already_wakeup(0) {
@@ -141,7 +143,7 @@ class EventCenter {
   ostream& _event_prefix(std::ostream *_dout);
 
   int init(int nevent);
-  void set_owner(pthread_t p) { owner = p; }
+  void set_owner();
   pthread_t get_owner() { return owner; }
 
   // Used by internal thread
diff --git a/src/msg/async/EventEpoll.h b/src/msg/async/EventEpoll.h
index 7f01488..c9e5d5c 100644
--- a/src/msg/async/EventEpoll.h
+++ b/src/msg/async/EventEpoll.h
@@ -29,7 +29,7 @@ class EpollDriver : public EventDriver {
   int size;
 
  public:
-  EpollDriver(CephContext *c): epfd(-1), events(NULL), cct(c), size(0) {}
+  explicit EpollDriver(CephContext *c): epfd(-1), events(NULL), cct(c), size(0) {}
   virtual ~EpollDriver() {
     if (epfd != -1)
       close(epfd);
diff --git a/src/msg/async/EventKqueue.h b/src/msg/async/EventKqueue.h
index c7f99a7..8fb53dd 100644
--- a/src/msg/async/EventKqueue.h
+++ b/src/msg/async/EventKqueue.h
@@ -30,7 +30,7 @@ class KqueueDriver : public EventDriver {
   int size;
 
  public:
-  KqueueDriver(CephContext *c): kqfd(-1), events(NULL), cct(c), size(0) {}
+  explicit KqueueDriver(CephContext *c): kqfd(-1), events(NULL), cct(c), size(0) {}
   virtual ~KqueueDriver() {
     if (kqfd != -1)
       close(kqfd);
diff --git a/src/msg/async/EventSelect.h b/src/msg/async/EventSelect.h
index 96ec322..7cf42ff 100644
--- a/src/msg/async/EventSelect.h
+++ b/src/msg/async/EventSelect.h
@@ -31,7 +31,7 @@ class SelectDriver : public EventDriver {
   CephContext *cct;
 
  public:
-  SelectDriver(CephContext *c): max_fd(0), cct(c) {}
+  explicit SelectDriver(CephContext *c): max_fd(0), cct(c) {}
   virtual ~SelectDriver() {}
 
   int init(int nevent);
diff --git a/src/msg/async/net_handler.h b/src/msg/async/net_handler.h
index 64423dc..980e009 100644
--- a/src/msg/async/net_handler.h
+++ b/src/msg/async/net_handler.h
@@ -26,7 +26,7 @@ namespace ceph {
 
     CephContext *cct;
    public:
-    NetHandler(CephContext *c): cct(c) {}
+    explicit NetHandler(CephContext *c): cct(c) {}
     int set_nonblock(int sd);
     void set_socket_options(int sd);
     int connect(const entity_addr_t &addr);
diff --git a/src/msg/msg_types.h b/src/msg/msg_types.h
index 8f98b88..3acad77 100644
--- a/src/msg/msg_types.h
+++ b/src/msg/msg_types.h
@@ -43,7 +43,7 @@ public:
   // cons
   entity_name_t() : _type(0), _num(0) { }
   entity_name_t(int t, int64_t n) : _type(t), _num(n) { }
-  entity_name_t(const ceph_entity_name &n) : 
+  explicit entity_name_t(const ceph_entity_name &n) : 
     _type(n.type), _num(n.num) { }
 
   // static cons
@@ -222,7 +222,7 @@ struct entity_addr_t {
   entity_addr_t() : type(0), nonce(0) { 
     memset(&addr, 0, sizeof(addr));
   }
-  entity_addr_t(const ceph_entity_addr &o) {
+  explicit entity_addr_t(const ceph_entity_addr &o) {
     type = o.type;
     nonce = o.nonce;
     addr = o.in_addr;
@@ -424,6 +424,7 @@ struct entity_inst_t {
   entity_addr_t addr;
   entity_inst_t() {}
   entity_inst_t(entity_name_t n, const entity_addr_t& a) : name(n), addr(a) {}
+  // cppcheck-suppress noExplicitConstructor
   entity_inst_t(const ceph_entity_inst& i) : name(i.name), addr(i.addr) { }
   entity_inst_t(const ceph_entity_name& n, const ceph_entity_addr &a) : name(n), addr(a) {}
   operator ceph_entity_inst() {
diff --git a/src/msg/simple/DispatchQueue.h b/src/msg/simple/DispatchQueue.h
index d379f55..dca304a 100644
--- a/src/msg/simple/DispatchQueue.h
+++ b/src/msg/simple/DispatchQueue.h
@@ -44,7 +44,7 @@ class DispatchQueue {
     ConnectionRef con;
     MessageRef m;
   public:
-    QueueItem(Message *m) : type(-1), con(0), m(m) {}
+    explicit QueueItem(Message *m) : type(-1), con(0), m(m) {}
     QueueItem(int type, Connection *con) : type(type), con(con), m(0) {}
     bool is_code() const {
       return type != -1;
@@ -98,7 +98,7 @@ class DispatchQueue {
   class DispatchThread : public Thread {
     DispatchQueue *dq;
   public:
-    DispatchThread(DispatchQueue *dq) : dq(dq) {}
+    explicit DispatchThread(DispatchQueue *dq) : dq(dq) {}
     void *entry() {
       dq->entry();
       return 0;
@@ -112,7 +112,7 @@ class DispatchQueue {
   class LocalDeliveryThread : public Thread {
     DispatchQueue *dq;
   public:
-    LocalDeliveryThread(DispatchQueue *dq) : dq(dq) {}
+    explicit LocalDeliveryThread(DispatchQueue *dq) : dq(dq) {}
     void *entry() {
       dq->run_local_delivery();
       return 0;
diff --git a/src/msg/simple/Pipe.cc b/src/msg/simple/Pipe.cc
index ace77aa..9bfa230 100644
--- a/src/msg/simple/Pipe.cc
+++ b/src/msg/simple/Pipe.cc
@@ -333,7 +333,7 @@ int Pipe::accept()
   }
   {
     bufferptr tp(sizeof(peer_addr));
-    addrbl.push_back(tp);
+    addrbl.push_back(std::move(tp));
   }
   if (tcp_read(addrbl.c_str(), addrbl.length()) < 0) {
     ldout(msgr->cct,10) << "accept couldn't read peer_addr" << dendl;
@@ -371,7 +371,7 @@ int Pipe::accept()
         ldout(msgr->cct,10) << "accept couldn't read connect authorizer" << dendl;
         goto fail_unlocked;
       }
-      authorizer.push_back(bp);
+      authorizer.push_back(std::move(bp));
       authorizer_reply.clear();
     }
 
@@ -948,7 +948,7 @@ int Pipe::connect()
     int wirelen = sizeof(__u32) * 2 + sizeof(ceph_sockaddr_storage);
     bufferptr p(wirelen * 2);
 #endif
-    addrbl.push_back(p);
+    addrbl.push_back(std::move(p));
   }
   if (tcp_read(addrbl.c_str(), addrbl.length()) < 0) {
     ldout(msgr->cct,2) << "connect couldn't read peer addrs, " << cpp_strerror(errno) << dendl;
@@ -1717,9 +1717,9 @@ void Pipe::writer()
       state_closed.set(1);
       pipe_lock.Unlock();
       if (sd) {
+	// we can ignore return value, actually; we don't care if this succeeds.
 	int r = ::write(sd, &tag, 1);
-	// we can ignore r, actually; we don't care if this succeeds.
-	r++; r = 0; // placate gcc
+	(void)r;
       }
       pipe_lock.Lock();
       continue;
@@ -1876,19 +1876,16 @@ static void alloc_aligned_buffer(bufferlist& data, unsigned len, unsigned off)
     // head
     unsigned head = 0;
     head = MIN(CEPH_PAGE_SIZE - (off & ~CEPH_PAGE_MASK), left);
-    bufferptr bp = buffer::create(head);
-    data.push_back(bp);
+    data.push_back(buffer::create(head));
     left -= head;
   }
   unsigned middle = left & CEPH_PAGE_MASK;
   if (middle > 0) {
-    bufferptr bp = buffer::create_page_aligned(middle);
-    data.push_back(bp);
+    data.push_back(buffer::create_page_aligned(middle));
     left -= middle;
   }
   if (left) {
-    bufferptr bp = buffer::create(left);
-    data.push_back(bp);
+    data.push_back(buffer::create(left));
   }
 }
 
@@ -1976,7 +1973,7 @@ int Pipe::read_message(Message **pm, AuthSessionHandler* auth_handler)
     bufferptr bp = buffer::create(front_len);
     if (tcp_read(bp.c_str(), front_len) < 0)
       goto out_dethrottle;
-    front.push_back(bp);
+    front.push_back(std::move(bp));
     ldout(msgr->cct,20) << "reader got front " << front.length() << dendl;
   }
 
@@ -1986,7 +1983,7 @@ int Pipe::read_message(Message **pm, AuthSessionHandler* auth_handler)
     bufferptr bp = buffer::create(middle_len);
     if (tcp_read(bp.c_str(), middle_len) < 0)
       goto out_dethrottle;
-    middle.push_back(bp);
+    middle.push_back(std::move(bp));
     ldout(msgr->cct,20) << "reader got middle " << middle.length() << dendl;
   }
 
diff --git a/src/msg/simple/Pipe.h b/src/msg/simple/Pipe.h
index bbe8aa2..664f733 100644
--- a/src/msg/simple/Pipe.h
+++ b/src/msg/simple/Pipe.h
@@ -46,7 +46,7 @@ class DispatchQueue;
     class Reader : public Thread {
       Pipe *pipe;
     public:
-      Reader(Pipe *p) : pipe(p) {}
+      explicit Reader(Pipe *p) : pipe(p) {}
       void *entry() { pipe->reader(); return 0; }
     } reader_thread;
 
@@ -57,7 +57,7 @@ class DispatchQueue;
     class Writer : public Thread {
       Pipe *pipe;
     public:
-      Writer(Pipe *p) : pipe(p) {}
+      explicit Writer(Pipe *p) : pipe(p) {}
       void *entry() { pipe->writer(); return 0; }
     } writer_thread;
 
@@ -81,7 +81,7 @@ class DispatchQueue;
       bool stop_fast_dispatching_flag; // we need to stop fast dispatching
 
     public:
-      DelayedDelivery(Pipe *p)
+      explicit DelayedDelivery(Pipe *p)
 	: pipe(p),
 	  delay_lock("Pipe::DelayedDelivery::delay_lock"), flush_count(0),
 	  active_flush(false),
diff --git a/src/msg/simple/SimpleMessenger.h b/src/msg/simple/SimpleMessenger.h
index c05ccc6..5eb441e 100644
--- a/src/msg/simple/SimpleMessenger.h
+++ b/src/msg/simple/SimpleMessenger.h
@@ -191,7 +191,7 @@ private:
   class ReaperThread : public Thread {
     SimpleMessenger *msgr;
   public:
-    ReaperThread(SimpleMessenger *m) : msgr(m) {}
+    explicit ReaperThread(SimpleMessenger *m) : msgr(m) {}
     void *entry() {
       msgr->reaper_entry();
       return 0;
diff --git a/src/msg/xio/QueueStrategy.cc b/src/msg/xio/QueueStrategy.cc
index 2d9806c..0ce279b 100644
--- a/src/msg/xio/QueueStrategy.cc
+++ b/src/msg/xio/QueueStrategy.cc
@@ -11,6 +11,7 @@
  * Foundation.  See file COPYING.
  *
  */
+#include <string>
 #include "QueueStrategy.h"
 #define dout_subsys ceph_subsys_ms
 #include "common/debug.h"
@@ -106,8 +107,10 @@ void QueueStrategy::start()
   assert(!stop);
   lock.Lock();
   for (int ix = 0; ix < n_threads; ++ix) {
+    string thread_name = "ms_xio_qs_";
+    thread_name.append(std::to_string(ix));
     thrd = new QSThread(this);
-    thrd->create();
+    thrd->create(thread_name.c_str());
   }
   lock.Unlock();
 }
diff --git a/src/msg/xio/QueueStrategy.h b/src/msg/xio/QueueStrategy.h
index b5f9e8a..7b23d8f 100644
--- a/src/msg/xio/QueueStrategy.h
+++ b/src/msg/xio/QueueStrategy.h
@@ -34,7 +34,7 @@ class QueueStrategy : public DispatchStrategy {
     bi::list_member_hook<> thread_q;
     QueueStrategy *dq;
     Cond cond;
-    QSThread(QueueStrategy *dq) : thread_q(), dq(dq), cond() {}
+    explicit QSThread(QueueStrategy *dq) : thread_q(), dq(dq), cond() {}
     void* entry() {
       dq->entry(this);
       delete(this);
@@ -50,7 +50,7 @@ class QueueStrategy : public DispatchStrategy {
   QSThread::Queue disp_threads;
 
 public:
-  QueueStrategy(int n_threads);
+  explicit QueueStrategy(int n_threads);
   virtual void ds_dispatch(Message *m);
   virtual void shutdown();
   virtual void start();
diff --git a/src/msg/xio/XioConnection.h b/src/msg/xio/XioConnection.h
index da34d6e..845d4ab 100644
--- a/src/msg/xio/XioConnection.h
+++ b/src/msg/xio/XioConnection.h
@@ -142,7 +142,7 @@ private:
 
     uint32_t flags;
 
-    CState(XioConnection* _xcon)
+    explicit CState(XioConnection* _xcon)
       : xcon(_xcon),
 	protocol_version(0),
 	session_state(INIT),
@@ -265,8 +265,8 @@ public:
   void send_keepalive() override {}
   void mark_down() override;
   int _mark_down(uint32_t flags);
-  virtual void mark_disposable() override;
-  int _mark_disposable(uint32_t flags) override;
+  void mark_disposable() override;
+  int _mark_disposable(uint32_t flags);
 
   const entity_inst_t& get_peer() const { return peer; }
 
@@ -322,7 +322,7 @@ class XioLoopbackConnection : public Connection
 private:
   atomic_t seq;
 public:
-  XioLoopbackConnection(Messenger *m) : Connection(m->cct, m), seq(0)
+  explicit XioLoopbackConnection(Messenger *m) : Connection(m->cct, m), seq(0)
     {
       const entity_inst_t& m_inst = m->get_myinst();
       peer_addr = m_inst.addr;
@@ -337,7 +337,7 @@ public:
   bool is_connected() override { return true; }
 
   int send_message(Message *m) override;
-  void send_keepalive()i override {}
+  void send_keepalive() override {}
   void mark_down() override {}
   void mark_disposable() override {}
 
diff --git a/src/msg/xio/XioMsg.h b/src/msg/xio/XioMsg.h
index 68d8ebe..0cba48a 100644
--- a/src/msg/xio/XioMsg.h
+++ b/src/msg/xio/XioMsg.h
@@ -34,7 +34,7 @@ public:
   __le32 msg_cnt;
   buffer::list bl;
 public:
-  XioMsgCnt(buffer::ptr p)
+  explicit XioMsgCnt(buffer::ptr p)
     {
       bl.append(p);
       buffer::list::iterator bl_iter = bl.begin();
@@ -145,7 +145,7 @@ struct xio_msg_ex
   struct xio_msg msg;
   struct xio_iovec_ex iovs[XIO_MSGR_IOVLEN];
 
-  xio_msg_ex(void* user_context) {
+  explicit xio_msg_ex(void* user_context) {
     // go in structure order
     msg.in.header.iov_len = 0;
     msg.in.header.iov_base = NULL;  /* XXX Accelio requires this currently */
diff --git a/src/msg/xio/XioPool.h b/src/msg/xio/XioPool.h
index c8e7b87..f7c950f 100644
--- a/src/msg/xio/XioPool.h
+++ b/src/msg/xio/XioPool.h
@@ -48,7 +48,7 @@ public:
     char payload[MB];
   } *first;
 
-  XioPool(struct xio_mempool *_handle) :
+  explicit XioPool(struct xio_mempool *_handle) :
     handle(_handle), first(0)
     {
     }
diff --git a/src/msg/xio/XioPortal.h b/src/msg/xio/XioPortal.h
index cb711c6..3bbc31b 100644
--- a/src/msg/xio/XioPortal.h
+++ b/src/msg/xio/XioPortal.h
@@ -129,7 +129,7 @@ private:
   friend class XioMessenger;
 
 public:
-  XioPortal(Messenger *_msgr) :
+  explicit XioPortal(Messenger *_msgr) :
   msgr(_msgr), ctx(NULL), server(NULL), submit_q(), xio_uri(""),
   portal_id(NULL), _shutdown(false), drained(false),
   magic(0),
@@ -424,11 +424,11 @@ public:
       /* shift left */
       p_vec[(p_ix-1)] = (char*) /* portal->xio_uri.c_str() */
 			portal->portal_id;
-      }
+    }
 
     for (p_ix = 0; p_ix < nportals; ++p_ix) {
       portal = portals[p_ix];
-      portal->create();
+      portal->create(portal->portal_id);
     }
   }
 
diff --git a/src/ocf/Makefile.in b/src/ocf/Makefile.in
index 72ac9c4..85811bc 100644
--- a/src/ocf/Makefile.in
+++ b/src/ocf/Makefile.in
@@ -236,12 +236,14 @@ JDK_CPPFLAGS = @JDK_CPPFLAGS@
 KEYUTILS_LIB = @KEYUTILS_LIB@
 LD = @LD@
 LDFLAGS = @LDFLAGS@
-LIBEDIT_CFLAGS = @LIBEDIT_CFLAGS@
-LIBEDIT_LIBS = @LIBEDIT_LIBS@
+LIBDPDK_CFLAGS = @LIBDPDK_CFLAGS@
+LIBDPDK_LIBS = @LIBDPDK_LIBS@
 LIBFUSE_CFLAGS = @LIBFUSE_CFLAGS@
 LIBFUSE_LIBS = @LIBFUSE_LIBS@
 LIBJEMALLOC = @LIBJEMALLOC@
 LIBOBJS = @LIBOBJS@
+LIBPCIACCESS_CFLAGS = @LIBPCIACCESS_CFLAGS@
+LIBPCIACCESS_LIBS = @LIBPCIACCESS_LIBS@
 LIBROCKSDB_CFLAGS = @LIBROCKSDB_CFLAGS@
 LIBROCKSDB_LIBS = @LIBROCKSDB_LIBS@
 LIBS = @LIBS@
@@ -326,7 +328,6 @@ datarootdir = @datarootdir@
 docdir = @docdir@
 dvidir = @dvidir@
 exec_prefix = @exec_prefix@
-group_rgw = @group_rgw@
 host = @host@
 host_alias = @host_alias@
 host_cpu = @host_cpu@
@@ -357,7 +358,6 @@ sharedstatedir = @sharedstatedir@
 srcdir = @srcdir@
 subdirs = @subdirs@
 sysconfdir = @sysconfdir@
-systemd_libexec_dir = @systemd_libexec_dir@
 systemd_unit_dir = @systemd_unit_dir@
 target = @target@
 target_alias = @target_alias@
@@ -367,7 +367,6 @@ target_vendor = @target_vendor@
 top_build_prefix = @top_build_prefix@
 top_builddir = @top_builddir@
 top_srcdir = @top_srcdir@
-user_rgw = @user_rgw@
 EXTRA_DIST = ceph.in Makefile.in
 
 # The root of the OCF resource agent hierarchy
diff --git a/src/os/FuseStore.cc b/src/os/FuseStore.cc
new file mode 100644
index 0000000..0849865
--- /dev/null
+++ b/src/os/FuseStore.cc
@@ -0,0 +1,1102 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "FuseStore.h"
+#include "include/stringify.h"
+#include "common/errno.h"
+
+#define FUSE_USE_VERSION 30
+#include <fuse/fuse.h>
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#include <fcntl.h>           /* Definition of AT_* constants */
+#include <sys/stat.h>
+
+#define dout_subsys ceph_subsys_fuse
+#include "common/debug.h"
+#undef dout_prefix
+#define dout_prefix *_dout << "fuse "
+
+// some fuse-y bits of state
+struct fs_info {
+  struct fuse_args args;
+  struct fuse *f;
+  struct fuse_chan *ch;
+  char *mountpoint;
+};
+
+FuseStore::FuseStore(ObjectStore *s, string p)
+  : store(s),
+    mount_point(p),
+    fuse_thread(this)
+{
+  info = new fs_info;
+}
+
+FuseStore::~FuseStore()
+{
+  delete info;
+}
+
+/*
+ * / - root directory
+ * $cid/
+ * $cid/type - objectstore type
+ * $cid/bitwise_hash_start = lowest hash value
+ * $cid/bitwise_hash_end = highest hash value
+ * $cid/bitwise_hash_bits - how many bits are significant
+ * $cid/pgmeta/ - pgmeta object
+ * $cid/all/ - all objects
+ * $cid/all/$obj/
+ * $cid/all/$obj/bitwise_hash
+ * $cid/all/$obj/data
+ * $cid/all/$obj/omap/$key
+ * $cid/all/$obj/attr/$name
+ * $cid/by_bitwise_hash/$hash/$bits/$obj - all objects with this (bitwise) hash (prefix)
+ */
+enum {
+  FN_ROOT = 1,
+  FN_TYPE,
+  FN_COLLECTION,
+  FN_HASH_START,
+  FN_HASH_END,
+  FN_HASH_BITS,
+  FN_OBJECT,
+  FN_OBJECT_HASH,
+  FN_OBJECT_DATA,
+  FN_OBJECT_OMAP_HEADER,
+  FN_OBJECT_OMAP,
+  FN_OBJECT_OMAP_VAL,
+  FN_OBJECT_ATTR,
+  FN_OBJECT_ATTR_VAL,
+  FN_ALL,
+  FN_HASH_DIR,
+  FN_HASH_VAL,
+};
+
+static int parse_fn(const char *path, coll_t *cid, ghobject_t *oid, string *key,
+		    uint32_t *hash, uint32_t *hash_bits)
+{
+  list<string> v;
+  for (const char *p = path; *p; ++p) {
+    if (*p == '/')
+      continue;
+    const char *e;
+    for (e = p + 1; *e && *e != '/'; e++) ;
+    string c(p, e-p);
+    v.push_back(c);
+    p = e;
+    if (!*p)
+      break;
+  }
+  dout(10) << __func__ << " path " << path << " -> " << v << dendl;
+
+  if (v.empty())
+    return FN_ROOT;
+
+  if (v.front() == "type")
+    return FN_TYPE;
+
+  if (!cid->parse(v.front())) {
+    return -ENOENT;
+  }
+  if (v.size() == 1)
+    return FN_COLLECTION;
+  v.pop_front();
+
+  if (v.front() == "bitwise_hash_start")
+    return FN_HASH_START;
+  if (v.front() == "bitwise_hash_end")
+    return FN_HASH_END;
+  if (v.front() == "bitwise_hash_bits")
+    return FN_HASH_BITS;
+  if (v.front() == "pgmeta") {
+    spg_t pgid;
+    if (cid->is_pg(&pgid)) {
+      *oid = pgid.make_pgmeta_oid();
+      v.pop_front();
+      if (v.empty())
+	return FN_OBJECT;
+      goto do_object;
+    }
+    return -ENOENT;
+  }
+  if (v.front() == "all") {
+    v.pop_front();
+    if (v.empty())
+      return FN_ALL;
+    goto do_dir;
+  }
+  if (v.front() == "by_bitwise_hash") {
+    v.pop_front();
+    if (v.empty())
+      return FN_HASH_DIR;
+    unsigned long hv, hm;
+    int r = sscanf(v.front().c_str(), "%lx", &hv);
+    if (r != 1)
+      return -ENOENT;
+    int shift = 32 - v.front().length() * 4;
+    v.pop_front();
+    if (v.empty())
+      return FN_HASH_DIR;
+    r = sscanf(v.front().c_str(), "%ld", &hm);
+    if (r != 1)
+      return -ENOENT;
+    if (hm < 1 || hm > 32)
+      return -ENOENT;
+    v.pop_front();
+    *hash = hv << shift;//hobject_t::_reverse_bits(hv << shift);
+    *hash_bits = hm;
+    if (v.empty())
+      return FN_HASH_VAL;
+    goto do_dir;
+  }
+  return -ENOENT;
+
+ do_dir:
+  {
+    string o = v.front();
+    if (!oid->parse(o)) {
+      return -ENOENT;
+    }
+    v.pop_front();
+    if (v.empty())
+      return FN_OBJECT;
+  }
+
+ do_object:
+  if (v.front() == "data")
+    return FN_OBJECT_DATA;
+  if (v.front() == "omap_header")
+    return FN_OBJECT_OMAP_HEADER;
+  if (v.front() == "omap") {
+    v.pop_front();
+    if (v.empty())
+      return FN_OBJECT_OMAP;
+    *key = v.front();
+    v.pop_front();
+    if (v.empty())
+      return FN_OBJECT_OMAP_VAL;
+    return -ENOENT;
+  }
+  if (v.front() == "attr") {
+    v.pop_front();
+    if (v.empty())
+      return FN_OBJECT_ATTR;
+    *key = v.front();
+    v.pop_front();
+    if (v.empty())
+      return FN_OBJECT_ATTR_VAL;
+    return -ENOENT;
+  }
+  if (v.front() == "bitwise_hash")
+    return FN_OBJECT_HASH;
+  return -ENOENT;
+}
+
+
+static int os_getattr(const char *path, struct stat *stbuf)
+{
+  coll_t cid;
+  ghobject_t oid;
+  string key;
+  uint32_t hash_value, hash_bits;
+  int t = parse_fn(path, &cid, &oid, &key, &hash_value, &hash_bits);
+  if (t < 0)
+    return t;
+
+  fuse_context *fc = fuse_get_context();
+  FuseStore *fs = static_cast<FuseStore*>(fc->private_data);
+
+  stbuf->st_size = 0;
+  stbuf->st_uid = 0;
+  stbuf->st_gid = 0;
+  stbuf->st_mode = S_IFREG | 0700;
+
+  switch (t) {
+  case FN_OBJECT_OMAP:
+  case FN_OBJECT_ATTR:
+  case FN_OBJECT:
+  case FN_OBJECT_DATA:
+  case FN_OBJECT_OMAP_HEADER:
+  case FN_OBJECT_OMAP_VAL:
+    {
+      spg_t pgid;
+      if (cid.is_pg(&pgid)) {
+	int bits = fs->store->collection_bits(cid);
+	if (bits >= 0 && !oid.match(bits, pgid.ps())) {
+	  // sorry, not part of this PG
+	  return -ENOENT;
+	}
+      }
+    }
+    break;
+  }
+
+  switch (t) {
+  case FN_OBJECT_OMAP:
+  case FN_OBJECT_ATTR:
+  case FN_OBJECT:
+    if (!fs->store->exists(cid, oid))
+      return -ENOENT;
+    // fall-thru
+  case FN_ALL:
+  case FN_HASH_DIR:
+  case FN_HASH_VAL:
+  case FN_COLLECTION:
+    if (!fs->store->collection_exists(cid))
+      return -ENOENT;
+    // fall-thru
+  case FN_ROOT:
+    stbuf->st_mode = S_IFDIR | 0700;
+    return 0;
+
+  case FN_TYPE:
+    stbuf->st_size = fs->store->get_type().length() + 1;
+    break;
+
+  case FN_OBJECT_HASH:
+    if (!fs->store->exists(cid, oid))
+      return -ENOENT;
+    stbuf->st_size = 9;
+    return 0;
+
+  case FN_HASH_END:
+    if (!fs->store->collection_exists(cid))
+      return -ENOENT;
+    if (fs->store->collection_bits(cid) < 0)
+      return -ENOENT;
+    // fall-thru
+  case FN_HASH_START:
+    stbuf->st_size = 9;
+    return 0;
+
+  case FN_HASH_BITS:
+    {
+      if (!fs->store->collection_exists(cid))
+	return -ENOENT;
+      int bits = fs->store->collection_bits(cid);
+      if (bits < 0)
+	return -ENOENT;
+      char buf[8];
+      snprintf(buf, sizeof(buf), "%d\n", bits);
+      stbuf->st_size = strlen(buf);
+    }
+    return 0;
+
+  case FN_OBJECT_DATA:
+    {
+      if (!fs->store->exists(cid, oid))
+	return -ENOENT;
+      int r = fs->store->stat(cid, oid, stbuf);
+      if (r < 0)
+	return r;
+    }
+    break;
+
+  case FN_OBJECT_OMAP_HEADER:
+    {
+      if (!fs->store->exists(cid, oid))
+	return -ENOENT;
+      bufferlist bl;
+      fs->store->omap_get_header(cid, oid, &bl);
+      stbuf->st_size = bl.length();
+    }
+    break;
+
+  case FN_OBJECT_OMAP_VAL:
+    {
+      if (!fs->store->exists(cid, oid))
+	return -ENOENT;
+      set<string> k;
+      k.insert(key);
+      map<string,bufferlist> v;
+      fs->store->omap_get_values(cid, oid, k, &v);
+      if (!v.count(key)) {
+	return -ENOENT;
+      }
+      stbuf->st_size = v[key].length();
+    }
+    break;
+
+  case FN_OBJECT_ATTR_VAL:
+    {
+      if (!fs->store->exists(cid, oid))
+	return -ENOENT;
+      bufferptr v;
+      int r = fs->store->getattr(cid, oid, key.c_str(), v);
+      if (r == -ENODATA)
+	r = -ENOENT;
+      if (r < 0)
+	return r;
+      stbuf->st_size = v.length();
+    }
+    break;
+
+  default:
+    return -ENOENT;
+  }
+
+  return 0;
+}
+
+static int os_readdir(const char *path,
+		      void *buf,
+		      fuse_fill_dir_t filler,
+		      off_t offset,
+		      struct fuse_file_info *fi)
+{
+  coll_t cid;
+  ghobject_t oid;
+  string key;
+  uint32_t hash_value, hash_bits;
+  int t = parse_fn(path, &cid, &oid, &key, &hash_value, &hash_bits);
+  if (t < 0)
+    return t;
+
+  fuse_context *fc = fuse_get_context();
+  FuseStore *fs = static_cast<FuseStore*>(fc->private_data);
+
+  // we can't shift 32 bits or else off_t will go negative
+  const int hash_shift = 31;
+
+  switch (t) {
+  case FN_ROOT:
+    {
+      filler(buf, "type", NULL, 0);
+      vector<coll_t> cls;
+      fs->store->list_collections(cls);
+      for (auto c : cls) {
+	int r = filler(buf, stringify(c).c_str(), NULL, 0);
+	if (r > 0)
+	  break;
+      }
+    }
+    break;
+
+  case FN_COLLECTION:
+    {
+      filler(buf, "bitwise_hash_start", NULL, 0);
+      if (fs->store->collection_bits(cid) >= 0) {
+	filler(buf, "bitwise_hash_end", NULL, 0);
+	filler(buf, "bitwise_hash_bits", NULL, 0);
+      }
+      filler(buf, "all", NULL, 0);
+      filler(buf, "by_bitwise_hash", NULL, 0);
+      spg_t pgid;
+      if (cid.is_pg(&pgid) &&
+	  fs->store->exists(cid, pgid.make_pgmeta_oid())) {
+	filler(buf, "pgmeta", NULL, 0);
+      }
+    }
+    break;
+
+  case FN_OBJECT:
+    {
+      filler(buf, "bitwise_hash", NULL, 0);
+      filler(buf, "data", NULL, 0);
+      filler(buf, "omap", NULL, 0);
+      filler(buf, "attr", NULL, 0);
+      filler(buf, "omap_header", NULL, 0);
+    }
+    break;
+
+  case FN_HASH_VAL:
+  case FN_ALL:
+    {
+      uint32_t bitwise_hash = (offset >> hash_shift) & 0xffffffff;
+      uint32_t hashoff = offset - (bitwise_hash << hash_shift);
+      int skip = hashoff;
+      ghobject_t next = cid.get_min_hobj();
+      if (offset) {
+	// obey the offset
+	next.hobj.set_hash(hobject_t::_reverse_bits(bitwise_hash));
+      } else if (t == FN_HASH_VAL) {
+	next.hobj.set_hash(hobject_t::_reverse_bits(hash_value));
+      }
+      ghobject_t last;
+      if (t == FN_HASH_VAL) {
+	last = next;
+	uint64_t rev_end = (hash_value | (0xffffffff >> hash_bits)) + 1;
+	if (rev_end >= 0x100000000)
+	  last = ghobject_t::get_max();
+	else
+	  last.hobj.set_hash(hobject_t::_reverse_bits(rev_end));
+      } else {
+	last = ghobject_t::get_max();
+      }
+      dout(10) << __func__ << std::hex
+	       << " offset " << offset << " hash "
+	       << hobject_t::_reverse_bits(hash_value)
+	       << std::dec
+	       << "/" << hash_bits
+	       << " first " << next << " last " << last
+	       << dendl;
+      while (true) {
+	vector<ghobject_t> ls;
+	int r = fs->store->collection_list(
+	  cid, next, last, true, 1000, &ls, &next);
+	if (r < 0)
+	  return r;
+	for (auto p : ls) {
+	  if (skip) {
+	    --skip;
+	    continue;
+	  }
+	  uint32_t cur_bitwise_hash = p.hobj.get_bitwise_key_u32();
+	  if (cur_bitwise_hash != bitwise_hash) {
+	    bitwise_hash = cur_bitwise_hash;
+	    hashoff = 0;
+	  }
+	  ++hashoff;
+	  uint64_t cur_off = ((uint64_t)bitwise_hash << hash_shift) |
+	    (uint64_t)hashoff;
+	  string s = stringify(p);
+	  r = filler(buf, s.c_str(), NULL, cur_off);
+	  if (r)
+	    break;
+	}
+	if (r)
+	  break;
+	if (next == ghobject_t::get_max() || next == last)
+	  break;
+      }
+    }
+    break;
+
+  case FN_OBJECT_OMAP:
+    {
+      set<string> keys;
+      fs->store->omap_get_keys(cid, oid, &keys);
+      unsigned skip = offset;
+      for (auto k : keys) {
+	if (skip) {
+	  --skip;
+	  continue;
+	}
+	++offset;
+	int r = filler(buf, k.c_str(), NULL, offset);
+	if (r)
+	  break;
+      }
+    }
+    break;
+
+  case FN_OBJECT_ATTR:
+    {
+      map<string,bufferptr> aset;
+      fs->store->getattrs(cid, oid, aset);
+      unsigned skip = offset;
+      for (auto a : aset) {
+	if (skip) {
+	  --skip;
+	  continue;
+	}
+	++offset;
+	int r = filler(buf, a.first.c_str(), NULL, offset);
+	if (r)
+	  break;
+      }
+    }
+    break;
+  }
+  return 0;
+}
+
+static int os_open(const char *path, struct fuse_file_info *fi)
+{
+  coll_t cid;
+  ghobject_t oid;
+  string key;
+  uint32_t hash_value, hash_bits;
+  int t = parse_fn(path, &cid, &oid, &key, &hash_value, &hash_bits);
+  if (t < 0)
+    return t;
+
+  fuse_context *fc = fuse_get_context();
+  FuseStore *fs = static_cast<FuseStore*>(fc->private_data);
+
+  bufferlist *pbl = 0;
+  switch (t) {
+  case FN_TYPE:
+    pbl = new bufferlist;
+    pbl->append(fs->store->get_type());
+    pbl->append("\n");
+    break;
+
+  case FN_HASH_START:
+    {
+      pbl = new bufferlist;
+      spg_t pgid;
+      if (cid.is_pg(&pgid)) {
+	unsigned long h;
+	h = hobject_t::_reverse_bits(pgid.ps());
+	char buf[10];
+	snprintf(buf, sizeof(buf), "%08lx\n", h);
+	pbl->append(buf);
+      } else {
+	pbl->append("00000000\n");
+      }
+    }
+    break;
+
+  case FN_HASH_END:
+    {
+      spg_t pgid;
+      unsigned long h;
+      if (cid.is_pg(&pgid)) {
+	int hash_bits = fs->store->collection_bits(cid);
+	if (hash_bits >= 0) {
+	  uint64_t rev_start = hobject_t::_reverse_bits(pgid.ps());
+	  uint64_t rev_end = (rev_start | (0xffffffff >> hash_bits));
+	  h = rev_end;
+	} else {
+	  return -ENOENT;
+	}
+      } else {
+	h = 0xffffffff;
+      }
+      char buf[10];
+      snprintf(buf, sizeof(buf), "%08lx\n", h);
+      pbl = new bufferlist;
+      pbl->append(buf);
+    }
+    break;
+
+  case FN_HASH_BITS:
+    {
+      int r = fs->store->collection_bits(cid);
+      if (r < 0)
+        return r;
+      char buf[8];
+      snprintf(buf, sizeof(buf), "%d\n", r);
+      pbl = new bufferlist;
+      pbl->append(buf);
+    }
+    break;
+
+  case FN_OBJECT_HASH:
+    {
+      pbl = new bufferlist;
+      char buf[10];
+      snprintf(buf, sizeof(buf), "%08x\n",
+	       (unsigned)oid.hobj.get_bitwise_key_u32());
+      pbl->append(buf);
+    }
+    break;
+
+  case FN_OBJECT_DATA:
+    {
+      pbl = new bufferlist;
+      int r = fs->store->read(cid, oid, 0, 0, *pbl);
+      if (r < 0) {
+        delete pbl;
+        return r;
+      }
+    }
+    break;
+
+  case FN_OBJECT_ATTR_VAL:
+    {
+      bufferptr bp;
+      int r = fs->store->getattr(cid, oid, key.c_str(), bp);
+      if (r < 0)
+        return r;
+      pbl = new bufferlist;
+      pbl->append(bp);
+    }
+    break;
+
+  case FN_OBJECT_OMAP_VAL:
+    {
+      set<string> k;
+      k.insert(key);
+      map<string,bufferlist> v;
+      int r = fs->store->omap_get_values(cid, oid, k, &v);
+      if (r < 0)
+        return r;
+      pbl = new bufferlist;
+      *pbl = v[key];
+    }
+    break;
+
+  case FN_OBJECT_OMAP_HEADER:
+    {
+      bufferlist bl;
+      int r = fs->store->omap_get_header(cid, oid, &bl);
+      if (r < 0)
+       return r;
+      pbl = new bufferlist;
+      pbl->claim(bl);
+    }
+    break;
+  }
+
+  if (pbl) {
+    fi->fh = reinterpret_cast<uint64_t>(pbl);
+  }
+  return 0;
+}
+
+static int os_mkdir(const char *path, mode_t mode)
+{
+  coll_t cid;
+  ghobject_t oid;
+  string key;
+  uint32_t hash_value, hash_bits;
+  int f = parse_fn(path, &cid, &oid, &key, &hash_value, &hash_bits);
+  if (f < 0)
+    return f;
+
+  fuse_context *fc = fuse_get_context();
+  FuseStore *fs = static_cast<FuseStore*>(fc->private_data);
+
+  ObjectStore::Transaction t;
+  switch (f) {
+  case FN_OBJECT:
+    {
+      spg_t pgid;
+      if (cid.is_pg(&pgid)) {
+	int bits = fs->store->collection_bits(cid);
+	if (bits >= 0 && !oid.match(bits, pgid.ps())) {
+	  // sorry, not part of this PG
+	  return -EINVAL;
+	}
+      }
+      t.touch(cid, oid);
+    }
+    break;
+
+  case FN_COLLECTION:
+    if (cid.is_pg()) {
+      // use the mode for the bit count.  e.g., mkdir --mode=0003
+      // mnt/0.7_head will create 0.7 with bits = 3.
+      mode &= 0777;
+      if (mode >= 32)
+	return -EINVAL;
+    } else {
+      mode = 0;
+    }
+    t.create_collection(cid, mode);
+    break;
+
+  default:
+    return -EPERM;
+  }
+
+  if (!t.empty()) {
+    ceph::shared_ptr<ObjectStore::Sequencer> osr(
+      new ObjectStore::Sequencer("fuse"));
+    fs->store->apply_transaction(&*osr, std::move(t));
+    C_SaferCond waiter;
+    if (!osr->flush_commit(&waiter))
+      waiter.wait();
+  }
+
+  return 0;
+}
+
+static int os_chmod(const char *path, mode_t mode)
+{
+  return 0;
+}
+
+static int os_create(const char *path, mode_t mode, struct fuse_file_info *fi)
+{
+  coll_t cid;
+  ghobject_t oid;
+  string key;
+  uint32_t hash_value, hash_bits;
+  int f = parse_fn(path, &cid, &oid, &key, &hash_value, &hash_bits);
+  if (f < 0)
+    return f;
+
+  fuse_context *fc = fuse_get_context();
+  FuseStore *fs = static_cast<FuseStore*>(fc->private_data);
+
+  ObjectStore::Transaction t;
+  bufferlist *pbl = 0;
+  switch (f) {
+  case FN_OBJECT_DATA:
+    {
+      pbl = new bufferlist;
+      fs->store->read(cid, oid, 0, 0, *pbl);
+    }
+    break;
+
+  case FN_OBJECT_ATTR_VAL:
+    {
+      pbl = new bufferlist;
+      bufferptr bp;
+      int r = fs->store->getattr(cid, oid, key.c_str(), bp);
+      if (r == -ENODATA) {
+	bufferlist empty;
+	t.setattr(cid, oid, key.c_str(), empty);
+      }
+      pbl->append(bp);
+    }
+    break;
+
+  case FN_OBJECT_OMAP_VAL:
+    {
+      pbl = new bufferlist;
+      set<string> k;
+      k.insert(key);
+      map<string,bufferlist> v;
+      fs->store->omap_get_values(cid, oid, k, &v);
+      if (v.count(key) == 0) {
+	map<string,bufferlist> aset;
+	aset[key] = bufferlist();
+	t.omap_setkeys(cid, oid, aset);
+      } else {
+	*pbl = v[key];
+      }
+    }
+    break;
+  }
+
+  if (!t.empty()) {
+    ceph::shared_ptr<ObjectStore::Sequencer> osr(
+      new ObjectStore::Sequencer("fuse"));
+    fs->store->apply_transaction(&*osr, std::move(t));
+    C_SaferCond waiter;
+    if (!osr->flush_commit(&waiter))
+      waiter.wait();
+  }
+
+  if (pbl) {
+    fi->fh = reinterpret_cast<uint64_t>(pbl);
+  }
+  return 0;
+}
+
+static int os_release(const char *path, struct fuse_file_info *fi)
+{
+  bufferlist *pbl = reinterpret_cast<bufferlist*>(fi->fh);
+  delete pbl;
+  return 0;
+}
+
+static int os_read(const char *path, char *buf, size_t size, off_t offset,
+		   struct fuse_file_info *fi)
+{
+  bufferlist *pbl = reinterpret_cast<bufferlist*>(fi->fh);
+  if (!pbl)
+    return 0;
+  if (offset >= pbl->length())
+    return 0;
+  if (offset + size > pbl->length())
+    size = pbl->length() - offset;
+  bufferlist r;
+  r.substr_of(*pbl, offset, size);
+  memcpy(buf, r.c_str(), r.length());
+  return r.length();
+}
+
+static int os_write(const char *path, const char *buf, size_t size,
+		    off_t offset, struct fuse_file_info *fi)
+{
+  bufferlist *pbl = reinterpret_cast<bufferlist*>(fi->fh);
+  if (!pbl)
+    return 0;
+
+  bufferlist final;
+  if (offset) {
+    if (offset > pbl->length()) {
+      final.substr_of(*pbl, 0, offset);
+    } else {
+      final.claim_append(*pbl);
+      size_t zlen = offset - final.length();
+      final.append_zero(zlen);
+    }
+  }
+  final.append(buf, size);
+  if (offset + size < pbl->length()) {
+    bufferlist rest;
+    rest.substr_of(*pbl, offset + size, pbl->length() - offset - size);
+    final.claim_append(rest);
+  }
+  *pbl = final;
+  return size;
+}
+
+int os_flush(const char *path, struct fuse_file_info *fi)
+{
+  coll_t cid;
+  ghobject_t oid;
+  string key;
+  uint32_t hash_value, hash_bits;
+  int f = parse_fn(path, &cid, &oid, &key, &hash_value, &hash_bits);
+  if (f < 0)
+    return f;
+
+  fuse_context *fc = fuse_get_context();
+  FuseStore *fs = static_cast<FuseStore*>(fc->private_data);
+
+  bufferlist *pbl = reinterpret_cast<bufferlist*>(fi->fh);
+  if (!pbl)
+    return 0;
+
+  ObjectStore::Transaction t;
+
+  switch (f) {
+  case FN_OBJECT_DATA:
+    t.write(cid, oid, 0, pbl->length(), *pbl);
+    break;
+
+  case FN_OBJECT_ATTR_VAL:
+    t.setattr(cid, oid, key.c_str(), *pbl);
+    break;
+
+  case FN_OBJECT_OMAP_VAL:
+    {
+      map<string,bufferlist> aset;
+      aset[key] = *pbl;
+      t.omap_setkeys(cid, oid, aset);
+      break;
+    }
+
+  case FN_OBJECT_OMAP_HEADER:
+    t.omap_setheader(cid, oid, *pbl);
+    break;
+
+  default:
+    return 0;
+  }
+
+  ceph::shared_ptr<ObjectStore::Sequencer> osr(
+    new ObjectStore::Sequencer("fuse"));
+  fs->store->apply_transaction(&*osr, std::move(t));
+  C_SaferCond waiter;
+  if (!osr->flush_commit(&waiter))
+    waiter.wait();
+
+  return 0;
+}
+
+static int os_unlink(const char *path)
+{
+  coll_t cid;
+  ghobject_t oid;
+  string key;
+  uint32_t hash_value, hash_bits;
+  int f = parse_fn(path, &cid, &oid, &key, &hash_value, &hash_bits);
+  if (f < 0)
+    return f;
+
+  fuse_context *fc = fuse_get_context();
+  FuseStore *fs = static_cast<FuseStore*>(fc->private_data);
+
+  ObjectStore::Transaction t;
+
+  switch (f) {
+  case FN_OBJECT_OMAP_VAL:
+    {
+      set<string> keys;
+      keys.insert(key);
+      t.omap_rmkeys(cid, oid, keys);
+    }
+    break;
+
+  case FN_OBJECT_ATTR_VAL:
+    t.rmattr(cid, oid, key.c_str());
+    break;
+
+  case FN_OBJECT_OMAP_HEADER:
+    {
+      bufferlist empty;
+      t.omap_setheader(cid, oid, empty);
+    }
+    break;
+
+  case FN_OBJECT:
+    t.remove(cid, oid);
+    break;
+
+  case FN_COLLECTION:
+    if (!fs->store->collection_empty(cid))
+      return -ENOTEMPTY;
+    t.remove_collection(cid);
+    break;
+
+  case FN_OBJECT_DATA:
+    t.truncate(cid, oid, 0);
+    break;
+
+  default:
+    return -EPERM;
+  }
+
+  ceph::shared_ptr<ObjectStore::Sequencer> osr(
+    new ObjectStore::Sequencer("fuse"));
+  fs->store->apply_transaction(&*osr, std::move(t));
+  C_SaferCond waiter;
+  if (!osr->flush_commit(&waiter))
+    waiter.wait();
+
+  return 0;
+}
+
+static int os_truncate(const char *path, off_t size)
+{
+  coll_t cid;
+  ghobject_t oid;
+  string key;
+  uint32_t hash_value, hash_bits;
+  int f = parse_fn(path, &cid, &oid, &key, &hash_value, &hash_bits);
+  if (f < 0)
+    return f;
+
+  if (f == FN_OBJECT_OMAP_VAL ||
+      f == FN_OBJECT_ATTR_VAL ||
+      f == FN_OBJECT_OMAP_HEADER) {
+    if (size)
+      return -EPERM;
+    return 0;
+  }
+  if (f != FN_OBJECT_DATA)
+    return -EPERM;
+
+  fuse_context *fc = fuse_get_context();
+  FuseStore *fs = static_cast<FuseStore*>(fc->private_data);
+
+  ObjectStore::Transaction t;
+  t.truncate(cid, oid, size);
+  ceph::shared_ptr<ObjectStore::Sequencer> osr(
+    new ObjectStore::Sequencer("fuse"));
+  fs->store->apply_transaction(&*osr, std::move(t));
+  C_SaferCond waiter;
+  if (!osr->flush_commit(&waiter))
+    waiter.wait();
+  return 0;
+}
+
+static int os_statfs(const char *path, struct statvfs *stbuf)
+{
+  fuse_context *fc = fuse_get_context();
+  FuseStore *fs = static_cast<FuseStore*>(fc->private_data);
+
+  struct statfs s;
+  int r = fs->store->statfs(&s);
+  if (r < 0)
+    return r;
+  stbuf->f_bsize = s.f_bsize;
+  stbuf->f_blocks = s.f_blocks;
+  stbuf->f_bfree = s.f_bfree;
+  stbuf->f_bavail = s.f_bavail;
+  stbuf->f_files = s.f_files;
+  stbuf->f_ffree = s.f_ffree;
+  return 0;
+}
+
+static struct fuse_operations fs_oper = {
+  getattr: os_getattr,
+  readlink: 0,
+  getdir: 0,
+  mknod: 0,
+  mkdir: os_mkdir,
+  unlink: os_unlink,
+  rmdir: os_unlink,
+  symlink: 0,
+  rename: 0,
+  link: 0,
+  chmod: os_chmod,
+  chown: 0,
+  truncate: os_truncate,
+  utime: 0,
+  open: os_open,
+  read: os_read,
+  write: os_write,
+  statfs: os_statfs,
+  flush: os_flush,
+  release: os_release,
+  fsync: 0,
+  setxattr: 0,
+  getxattr: 0,
+  listxattr: 0,
+  removexattr: 0,
+  opendir: 0,
+  readdir: os_readdir,
+  releasedir: 0,
+  fsyncdir: 0,
+  init: 0,
+  destroy: 0,
+  access: 0,
+  create: os_create,
+};
+
+int FuseStore::main()
+{
+  const char *v[] = {
+    "foo",
+    mount_point.c_str(),
+    "-f",
+    "-d", // debug
+  };
+  int c = 3;
+  if (g_conf->fuse_debug)
+    ++c;
+  return fuse_main(c, (char**)v, &fs_oper, (void*)this);
+}
+
+int FuseStore::start()
+{
+  dout(10) << __func__ << dendl;
+
+  memset(&info->args, 0, sizeof(info->args));
+  const char *v[] = {
+    "foo",
+    mount_point.c_str(),
+    "-f", // foreground
+    "-d", // debug
+  };
+  int c = 3;
+  if (g_conf->fuse_debug)
+    ++c;
+  fuse_args a = FUSE_ARGS_INIT(c, (char**)v);
+  info->args = a;
+  if (fuse_parse_cmdline(&info->args, &info->mountpoint, NULL, NULL) == -1) {
+    derr << __func__ << " failed to parse args" << dendl;
+    return -EINVAL;
+  }
+
+  info->ch = fuse_mount(info->mountpoint, &info->args);
+  if (!info->ch) {
+    derr << __func__ << " fuse_mount failed" << dendl;
+    return -EIO;
+  }
+
+  info->f = fuse_new(info->ch, &info->args, &fs_oper, sizeof(fs_oper),
+		     (void*)this);
+  if (!info->f) {
+    fuse_unmount(info->mountpoint, info->ch);
+    derr << __func__ << " fuse_new failed" << dendl;
+    return -EIO;
+  }
+
+  fuse_thread.create("fusestore");
+  dout(10) << __func__ << " done" << dendl;
+  return 0;
+}
+
+int FuseStore::loop()
+{
+  dout(10) << __func__ << " enter" << dendl;
+  int r = fuse_loop(info->f);
+  if (r)
+    derr << __func__ << " got " << cpp_strerror(r) << dendl;
+  dout(10) << __func__ << " exit" << dendl;
+  return r;
+}
+
+int FuseStore::stop()
+{
+  dout(10) << __func__ << " enter" << dendl;
+  fuse_unmount(info->mountpoint, info->ch);
+  fuse_thread.join();
+  fuse_destroy(info->f);
+  dout(10) << __func__ << " exit" << dendl;
+  return 0;
+}
diff --git a/src/os/FuseStore.h b/src/os/FuseStore.h
new file mode 100644
index 0000000..751c52a
--- /dev/null
+++ b/src/os/FuseStore.h
@@ -0,0 +1,35 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_OS_FUSESTORE_H
+#define CEPH_OS_FUSESTORE_H
+
+#include "common/Thread.h"
+#include "os/ObjectStore.h"
+
+class FuseStore {
+public:
+  ObjectStore *store;
+  string mount_point;
+  struct fs_info *info;
+
+  class FuseThread : public Thread {
+    FuseStore *fs;
+  public:
+    explicit FuseThread(FuseStore *f) : fs(f) {}
+    void *entry() {
+      fs->loop();
+      return NULL;
+    }
+  } fuse_thread;
+
+  FuseStore(ObjectStore *s, string p);
+  ~FuseStore();
+
+  int main();
+  int start();
+  int loop();
+  int stop();
+};
+
+#endif
diff --git a/src/os/Makefile.am b/src/os/Makefile.am
index d7cdbf1..41b33d2 100644
--- a/src/os/Makefile.am
+++ b/src/os/Makefile.am
@@ -24,13 +24,15 @@ libos_a_SOURCES = \
 	os/filestore/LFNIndex.cc \
 	os/filestore/WBThrottle.cc \
 	os/fs/FS.cc \
-	os/keyvaluestore/GenericObjectMap.cc \
-	os/keyvaluestore/KeyValueStore.cc \
 	os/kstore/kv.cc \
 	os/kstore/KStore.cc \
 	os/memstore/MemStore.cc \
 	os/ObjectStore.cc
 
+if WITH_FUSE
+libos_a_SOURCES += os/FuseStore.cc
+endif
+
 if WITH_LIBAIO
 libos_a_SOURCES += \
 	os/bluestore/kv.cc \
@@ -40,6 +42,7 @@ libos_a_SOURCES += \
 	os/bluestore/BlueRocksEnv.cc \
 	os/bluestore/BlueStore.cc \
 	os/bluestore/FreelistManager.cc \
+	os/bluestore/KernelDevice.cc \
 	os/bluestore/StupidAllocator.cc
 endif
 
@@ -86,13 +89,12 @@ noinst_HEADERS += \
 	os/fs/btrfs_ioctl.h \
 	os/fs/FS.h \
 	os/fs/XFS.h \
-	os/keyvaluestore/GenericObjectMap.h \
-	os/keyvaluestore/KeyValueStore.h \
 	os/kstore/kstore_types.h \
 	os/kstore/KStore.h \
 	os/kstore/kv.h \
 	os/memstore/MemStore.h \
 	os/memstore/PageSet.h \
+	os/FuseStore.h \
 	os/ObjectMap.h \
 	os/ObjectStore.h
 
@@ -106,6 +108,7 @@ noinst_HEADERS += \
 	os/bluestore/BlueFS.h \
 	os/bluestore/BlueRocksEnv.h \
 	os/bluestore/BlueStore.h \
+	os/bluestore/KernelDevice.h \
 	os/bluestore/FreelistManager.h \
 	os/bluestore/StupidAllocator.h
 endif
@@ -117,6 +120,32 @@ noinst_LIBRARIES += libos_zfs.a
 noinst_HEADERS += os/fs/ZFS.h
 endif
 
+if WITH_SPDK
+
+SPDK_SRCDIR = ${top_srcdir}/src/spdk/lib
+${SPDK_SRCDIR}/nvme/libspdk_nvme.a:
+	$(MAKE) -C ${SPDK_SRCDIR}/nvme DPDK_INC=${LIBDPDK_CFLAGS}
+${SPDK_SRCDIR}/memory/libspdk_memory.a:
+	$(MAKE) -C ${SPDK_SRCDIR}/memory DPDK_INC=${LIBDPDK_CFLAGS}
+${SPDK_SRCDIR}/util/libspdk_util.a:
+	$(MAKE) -C ${SPDK_SRCDIR}/util DPDK_INC=${LIBDPDK_CFLAGS}
+${SPDK_SRCDIR}/ioat/libspdk_ioat.a:
+	$(MAKE) -C ${SPDK_SRCDIR}/ioat DPDK_INC=${LIBDPDK_CFLAGS}
+LIBSPDK_CFLAGS = "-I${top_srcdir}/src/spdk/include"
+LIBSPDK_LIBS = \
+	${top_srcdir}/src/spdk/lib/nvme/libspdk_nvme.a \
+	${top_srcdir}/src/spdk/lib/memory/libspdk_memory.a \
+	${top_srcdir}/src/spdk/lib/util/libspdk_util.a \
+	${top_srcdir}/src/spdk/lib/ioat/libspdk_ioat.a
+
+libos_a_CXXFLAGS += \
+	${LIBSPDK_CFLAGS} \
+	${LIBDPDK_CFLAGS} \
+	${LIBPCIACCESS_CFLAGS}
+libos_a_SOURCES += os/bluestore/NVMEDevice.cc
+noinst_HEADERS += os/bluestore/NVMEDevice.h
+endif
+
 if WITH_LIBAIO
 ceph_bluefs_tool_SOURCES = os/bluestore/bluefs_tool.cc
 ceph_bluefs_tool_LDADD = $(LIBOS) $(CEPH_GLOBAL)
diff --git a/src/os/ObjectStore.cc b/src/os/ObjectStore.cc
index 62ec739..ba638bf 100644
--- a/src/os/ObjectStore.cc
+++ b/src/os/ObjectStore.cc
@@ -20,7 +20,6 @@
 
 #include "filestore/FileStore.h"
 #include "memstore/MemStore.h"
-#include "keyvaluestore/KeyValueStore.h"
 #if defined(HAVE_LIBAIO)
 #include "bluestore/BlueStore.h"
 #endif
@@ -73,10 +72,6 @@ ObjectStore *ObjectStore::create(CephContext *cct,
   if (type == "memstore") {
     return new MemStore(cct, data);
   }
-  if (type == "keyvaluestore" &&
-      cct->check_experimental_feature_enabled("keyvaluestore")) {
-    return new KeyValueStore(data);
-  }
 #if defined(HAVE_LIBAIO)
   if (type == "bluestore" &&
       cct->check_experimental_feature_enabled("bluestore")) {
@@ -148,8 +143,13 @@ ostream& operator<<(ostream& out, const ObjectStore::Sequencer& s)
   return out << "osr(" << s.get_name() << " " << &s << ")";
 }
 
+ostream& operator<<(ostream& out, const ObjectStore::Transaction& tx) {
+
+  return out << "Transaction(" << &tx << ")"; 
+}
+
 unsigned ObjectStore::apply_transactions(Sequencer *osr,
-					 list<Transaction*> &tls,
+					 vector<Transaction>& tls,
 					 Context *ondisk)
 {
   // use op pool
@@ -170,7 +170,7 @@ unsigned ObjectStore::apply_transactions(Sequencer *osr,
 
 int ObjectStore::queue_transactions(
   Sequencer *osr,
-  list<Transaction*>& tls,
+  vector<Transaction>& tls,
   Context *onreadable,
   Context *oncommit,
   Context *onreadable_sync,
diff --git a/src/os/ObjectStore.h b/src/os/ObjectStore.h
index ba9dd76..b0b9524 100644
--- a/src/os/ObjectStore.h
+++ b/src/os/ObjectStore.h
@@ -181,7 +181,7 @@ public:
     string name;
     Sequencer_implRef p;
 
-    Sequencer(string n)
+    explicit Sequencer(string n)
       : name(n), p(NULL) {}
     ~Sequencer() {
     }
@@ -206,6 +206,20 @@ public:
     }
   };
 
+  struct CollectionImpl : public RefCountedObject {
+    virtual const coll_t &get_cid() = 0;
+    CollectionImpl() : RefCountedObject(NULL, 0) {}
+  };
+  typedef boost::intrusive_ptr<CollectionImpl> CollectionHandle;
+
+  struct CompatCollectionHandle : public CollectionImpl {
+    coll_t cid;
+    explicit CompatCollectionHandle(coll_t c) : cid(c) {}
+    const coll_t &get_cid() override {
+      return cid;
+    }
+  };
+
   /*********************************
    *
    * Object Contents and semantics
@@ -431,6 +445,36 @@ public:
         largest_data_off_in_tbl(0),
 	fadvise_flags(0) { }
 
+      // override default move operations to reset default values
+      TransactionData(TransactionData&& other) :
+        ops(other.ops),
+        largest_data_len(other.largest_data_len),
+        largest_data_off(other.largest_data_off),
+        largest_data_off_in_tbl(other.largest_data_off_in_tbl),
+        fadvise_flags(other.fadvise_flags) {
+        other.ops = 0;
+        other.largest_data_len = 0;
+        other.largest_data_off = 0;
+        other.largest_data_off_in_tbl = 0;
+        other.fadvise_flags = 0;
+      }
+      TransactionData& operator=(TransactionData&& other) {
+        ops = other.ops;
+        largest_data_len = other.largest_data_len;
+        largest_data_off = other.largest_data_off;
+        largest_data_off_in_tbl = other.largest_data_off_in_tbl;
+        fadvise_flags = other.fadvise_flags;
+        other.ops = 0;
+        other.largest_data_len = 0;
+        other.largest_data_off = 0;
+        other.largest_data_off_in_tbl = 0;
+        other.fadvise_flags = 0;
+        return *this;
+      }
+
+      TransactionData(const TransactionData& other) = default;
+      TransactionData& operator=(const TransactionData& other) = default;
+
       void encode(bufferlist& bl) const {
         bl.append((char*)this, sizeof(TransactionData));
       }
@@ -442,16 +486,16 @@ public:
   private:
     TransactionData data;
 
-    void *osr; // NULL on replay
+    void *osr {nullptr}; // NULL on replay
 
-    bool use_tbl;   //use_tbl for encode/decode
+    bool use_tbl {false};   //use_tbl for encode/decode
     bufferlist tbl;
 
     map<coll_t, __le32> coll_index;
     map<ghobject_t, __le32, ghobject_t::BitwiseComparator> object_index;
 
-    __le32 coll_id;
-    __le32 object_id;
+    __le32 coll_id {0};
+    __le32 object_id {0};
 
     bufferlist data_bl;
     bufferlist op_bl;
@@ -463,6 +507,62 @@ public:
     list<Context *> on_applied_sync;
 
   public:
+    Transaction() = default;
+
+    explicit Transaction(bufferlist::iterator &dp) {
+      decode(dp);
+    }
+    explicit Transaction(bufferlist &nbl) {
+      bufferlist::iterator dp = nbl.begin();
+      decode(dp);
+    }
+
+    // override default move operations to reset default values
+    Transaction(Transaction&& other) :
+      data(std::move(other.data)),
+      osr(other.osr),
+      use_tbl(other.use_tbl),
+      tbl(std::move(other.tbl)),
+      coll_index(std::move(other.coll_index)),
+      object_index(std::move(other.object_index)),
+      coll_id(other.coll_id),
+      object_id(other.object_id),
+      data_bl(std::move(other.data_bl)),
+      op_bl(std::move(other.op_bl)),
+      op_ptr(std::move(other.op_ptr)),
+      on_applied(std::move(other.on_applied)),
+      on_commit(std::move(other.on_commit)),
+      on_applied_sync(std::move(other.on_applied_sync)) {
+      other.osr = nullptr;
+      other.use_tbl = false;
+      other.coll_id = 0;
+      other.object_id = 0;
+    }
+
+    Transaction& operator=(Transaction&& other) {
+      data = std::move(other.data);
+      osr = other.osr;
+      use_tbl = other.use_tbl;
+      tbl = std::move(other.tbl);
+      coll_index = std::move(other.coll_index);
+      object_index = std::move(other.object_index);
+      coll_id = other.coll_id;
+      object_id = other.object_id;
+      data_bl = std::move(other.data_bl);
+      op_bl = std::move(other.op_bl);
+      op_ptr = std::move(other.op_ptr);
+      on_applied = std::move(other.on_applied);
+      on_commit = std::move(other.on_commit);
+      on_applied_sync = std::move(other.on_applied_sync);
+      other.osr = nullptr;
+      other.use_tbl = false;
+      other.coll_id = 0;
+      other.object_id = 0;
+      return *this;
+    }
+
+    Transaction(const Transaction& other) = default;
+    Transaction& operator=(const Transaction& other) = default;
 
     /* Operations on callback contexts */
     void register_on_applied(Context *c) {
@@ -485,7 +585,7 @@ public:
     }
 
     static void collect_contexts(
-      list<Transaction *> &t,
+      vector<Transaction>& t,
       Context **out_on_applied,
       Context **out_on_commit,
       Context **out_on_applied_sync) {
@@ -493,12 +593,12 @@ public:
       assert(out_on_commit);
       assert(out_on_applied_sync);
       list<Context *> on_applied, on_commit, on_applied_sync;
-      for (list<Transaction *>::iterator i = t.begin();
+      for (vector<Transaction>::iterator i = t.begin();
 	   i != t.end();
 	   ++i) {
-	on_applied.splice(on_applied.end(), (*i)->on_applied);
-	on_commit.splice(on_commit.end(), (*i)->on_commit);
-	on_applied_sync.splice(on_applied_sync.end(), (*i)->on_applied_sync);
+	on_applied.splice(on_applied.end(), (*i).on_applied);
+	on_commit.splice(on_commit.end(), (*i).on_commit);
+	on_applied_sync.splice(on_applied_sync.end(), (*i).on_applied_sync);
       }
       *out_on_applied = C_Contexts::list_to_context(on_applied);
       *out_on_commit = C_Contexts::list_to_context(on_commit);
@@ -796,7 +896,7 @@ public:
       vector<ghobject_t> objects;
 
     private:
-      iterator(Transaction *t)
+      explicit iterator(Transaction *t)
         : t(t),
 	  data_bl_p(t->data_bl.begin()),
           colls(t->coll_index.size()),
@@ -952,7 +1052,7 @@ public:
      * Ensure the existance of an object in a collection. Create an
      * empty object if necessary
      */
-    void touch(coll_t cid, const ghobject_t& oid) {
+    void touch(const coll_t& cid, const ghobject_t& oid) {
       if (use_tbl) {
         __u32 op = OP_TOUCH;
         ::encode(op, tbl);
@@ -976,7 +1076,7 @@ public:
      * ObjectStore will omit the untouched data and store it as a
      * "hole" in the file.
      */
-    void write(coll_t cid, const ghobject_t& oid, uint64_t off, uint64_t len,
+    void write(const coll_t& cid, const ghobject_t& oid, uint64_t off, uint64_t len,
 	       const bufferlist& write_data, uint32_t flags = 0) {
       if (use_tbl) {
         __u32 op = OP_WRITE;
@@ -1009,7 +1109,7 @@ public:
      * ObjectStore instances may optimize this to release the
      * underlying storage space.
      */
-    void zero(coll_t cid, const ghobject_t& oid, uint64_t off, uint64_t len) {
+    void zero(const coll_t& cid, const ghobject_t& oid, uint64_t off, uint64_t len) {
       if (use_tbl) {
         __u32 op = OP_ZERO;
         ::encode(op, tbl);
@@ -1028,7 +1128,7 @@ public:
       data.ops++;
     }
     /// Discard all data in the object beyond the specified size.
-    void truncate(coll_t cid, const ghobject_t& oid, uint64_t off) {
+    void truncate(const coll_t& cid, const ghobject_t& oid, uint64_t off) {
       if (use_tbl) {
         __u32 op = OP_TRUNCATE;
         ::encode(op, tbl);
@@ -1045,7 +1145,7 @@ public:
       data.ops++;
     }
     /// Remove an object. All four parts of the object are removed.
-    void remove(coll_t cid, const ghobject_t& oid) {
+    void remove(const coll_t& cid, const ghobject_t& oid) {
       if (use_tbl) {
         __u32 op = OP_REMOVE;
         ::encode(op, tbl);
@@ -1060,12 +1160,12 @@ public:
       data.ops++;
     }
     /// Set an xattr of an object
-    void setattr(coll_t cid, const ghobject_t& oid, const char* name, bufferlist& val) {
+    void setattr(const coll_t& cid, const ghobject_t& oid, const char* name, bufferlist& val) {
       string n(name);
       setattr(cid, oid, n, val);
     }
     /// Set an xattr of an object
-    void setattr(coll_t cid, const ghobject_t& oid, const string& s, bufferlist& val) {
+    void setattr(const coll_t& cid, const ghobject_t& oid, const string& s, bufferlist& val) {
       if (use_tbl) {
         __u32 op = OP_SETATTR;
         ::encode(op, tbl);
@@ -1084,7 +1184,7 @@ public:
       data.ops++;
     }
     /// Set multiple xattrs of an object
-    void setattrs(coll_t cid, const ghobject_t& oid, map<string,bufferptr>& attrset) {
+    void setattrs(const coll_t& cid, const ghobject_t& oid, map<string,bufferptr>& attrset) {
       if (use_tbl) {
         __u32 op = OP_SETATTRS;
         ::encode(op, tbl);
@@ -1101,7 +1201,7 @@ public:
       data.ops++;
     }
     /// Set multiple xattrs of an object
-    void setattrs(coll_t cid, const ghobject_t& oid, map<string,bufferlist>& attrset) {
+    void setattrs(const coll_t& cid, const ghobject_t& oid, map<string,bufferlist>& attrset) {
       if (use_tbl) {
         __u32 op = OP_SETATTRS;
         ::encode(op, tbl);
@@ -1118,12 +1218,12 @@ public:
       data.ops++;
     }
     /// remove an xattr from an object
-    void rmattr(coll_t cid, const ghobject_t& oid, const char *name) {
+    void rmattr(const coll_t& cid, const ghobject_t& oid, const char *name) {
       string n(name);
       rmattr(cid, oid, n);
     }
     /// remove an xattr from an object
-    void rmattr(coll_t cid, const ghobject_t& oid, const string& s) {
+    void rmattr(const coll_t& cid, const ghobject_t& oid, const string& s) {
       if (use_tbl) {
         __u32 op = OP_RMATTR;
         ::encode(op, tbl);
@@ -1140,7 +1240,7 @@ public:
       data.ops++;
     }
     /// remove all xattrs from an object
-    void rmattrs(coll_t cid, const ghobject_t& oid) {
+    void rmattrs(const coll_t& cid, const ghobject_t& oid) {
       if (use_tbl) {
         __u32 op = OP_RMATTRS;
         ::encode(op, tbl);
@@ -1165,7 +1265,7 @@ public:
      * The destination named object may already exist, in
      * which case its previous contents are discarded.
      */
-    void clone(coll_t cid, const ghobject_t& oid, ghobject_t noid) {
+    void clone(const coll_t& cid, const ghobject_t& oid, ghobject_t noid) {
       if (use_tbl) {
         __u32 op = OP_CLONE;
         ::encode(op, tbl);
@@ -1188,7 +1288,7 @@ public:
      * portion of the data from the source object. None of the other
      * three parts of an object is copied from the source.
      */
-    void clone_range(coll_t cid, const ghobject_t& oid, ghobject_t noid,
+    void clone_range(const coll_t& cid, const ghobject_t& oid, ghobject_t noid,
 		     uint64_t srcoff, uint64_t srclen, uint64_t dstoff) {
       if (use_tbl) {
         __u32 op = OP_CLONERANGE2;
@@ -1212,7 +1312,7 @@ public:
       data.ops++;
     }
     /// Create the collection
-    void create_collection(coll_t cid, int bits) {
+    void create_collection(const coll_t& cid, int bits) {
       if (use_tbl) {
         __u32 op = OP_MKCOLL;
         ::encode(op, tbl);
@@ -1234,7 +1334,7 @@ public:
      * @param hint - the hint payload, which contains the customized
      *               data along with the hint type.
      */
-    void collection_hint(coll_t cid, uint32_t type, const bufferlist& hint) {
+    void collection_hint(const coll_t& cid, uint32_t type, const bufferlist& hint) {
       if (use_tbl) {
         __u32 op = OP_COLL_HINT;
         ::encode(op, tbl);
@@ -1252,7 +1352,7 @@ public:
     }
 
     /// remove the collection, the collection must be empty
-    void remove_collection(coll_t cid) {
+    void remove_collection(const coll_t& cid) {
       if (use_tbl) {
         __u32 op = OP_RMCOLL;
         ::encode(op, tbl);
@@ -1264,7 +1364,7 @@ public:
       }
       data.ops++;
     }
-    void collection_move(coll_t cid, coll_t oldcid, const ghobject_t& oid)
+    void collection_move(const coll_t& cid, coll_t oldcid, const ghobject_t& oid)
       __attribute__ ((deprecated)) {
       // NOTE: we encode this as a fixed combo of ADD + REMOVE.  they
       // always appear together, so this is effectively a single MOVE.
@@ -1296,7 +1396,7 @@ public:
       }
       data.ops++;
     }
-    void collection_move_rename(coll_t oldcid, const ghobject_t& oldoid,
+    void collection_move_rename(const coll_t& oldcid, const ghobject_t& oldoid,
 				coll_t cid, const ghobject_t& oid) {
       if (use_tbl) {
         __u32 op = OP_COLL_MOVE_RENAME;
@@ -1320,7 +1420,7 @@ public:
     // backends need not implement these at all.
 
     /// Set an xattr on a collection
-    void collection_setattr(coll_t cid, const string& name, bufferlist& val)
+    void collection_setattr(const coll_t& cid, const string& name, bufferlist& val)
       __attribute__ ((deprecated)) {
       if (use_tbl) {
         __u32 op = OP_COLL_SETATTR;
@@ -1339,7 +1439,7 @@ public:
     }
 
     /// Remove an xattr from a collection
-    void collection_rmattr(coll_t cid, const string& name)
+    void collection_rmattr(const coll_t& cid, const string& name)
       __attribute__ ((deprecated)) {
       if (use_tbl) {
         __u32 op = OP_COLL_RMATTR;
@@ -1355,7 +1455,7 @@ public:
       data.ops++;
     }
     /// Set multiple xattrs on a collection
-    void collection_setattrs(coll_t cid, map<string,bufferptr>& aset)
+    void collection_setattrs(const coll_t& cid, map<string,bufferptr>& aset)
       __attribute__ ((deprecated)) {
       if (use_tbl) {
         __u32 op = OP_COLL_SETATTRS;
@@ -1371,7 +1471,7 @@ public:
       data.ops++;
     }
     /// Set multiple xattrs on a collection
-    void collection_setattrs(coll_t cid, map<string,bufferlist>& aset)
+    void collection_setattrs(const coll_t& cid, map<string,bufferlist>& aset)
       __attribute__ ((deprecated)) {
       if (use_tbl) {
         __u32 op = OP_COLL_SETATTRS;
@@ -1406,7 +1506,7 @@ public:
     }
     /// Set keys on oid omap.  Replaces duplicate keys.
     void omap_setkeys(
-      coll_t cid,                           ///< [in] Collection containing oid
+      const coll_t& cid,                           ///< [in] Collection containing oid
       const ghobject_t &oid,                ///< [in] Object to update
       const map<string, bufferlist> &attrset ///< [in] Replacement keys and values
       ) {
@@ -1588,30 +1688,6 @@ public:
       data.ops++;
     }
 
-    // etc.
-    Transaction() :
-      osr(NULL),
-      use_tbl(false),
-      coll_id(0),
-      object_id(0) { }
-
-    Transaction(bufferlist::iterator &dp) :
-      osr(NULL),
-      use_tbl(false),
-      coll_id(0),
-      object_id(0) {
-      decode(dp);
-    }
-
-    Transaction(bufferlist &nbl) :
-      osr(NULL),
-      use_tbl(false),
-      coll_id(0),
-      object_id(0) {
-      bufferlist::iterator dp = nbl.begin();
-      decode(dp);
-    }
-
     void encode(bufferlist& bl) const {
       if (use_tbl) {
         uint64_t ops = data.ops;
@@ -1716,71 +1792,45 @@ public:
     static void generate_test_instances(list<Transaction*>& o);
   };
 
-  struct C_DeleteTransaction : public Context {
-    ObjectStore::Transaction *t;
-    C_DeleteTransaction(ObjectStore::Transaction *tt) : t(tt) {}
-    void finish(int r) {
-      delete t;
-    }
-  };
-  template<class T>
-  struct C_DeleteTransactionHolder : public Context {
-    ObjectStore::Transaction *t;
-    T obj;
-    C_DeleteTransactionHolder(ObjectStore::Transaction *tt, T &obj) :
-      t(tt), obj(obj) {}
-    void finish(int r) {
-      delete t;
-    }
-  };
-
   // synchronous wrappers
-  unsigned apply_transaction(Sequencer *osr, Transaction& t, Context *ondisk=0) {
-    list<Transaction*> tls;
-    tls.push_back(&t);
+  unsigned apply_transaction(Sequencer *osr, Transaction&& t, Context *ondisk=0) {
+    vector<Transaction> tls;
+    tls.push_back(std::move(t));
     return apply_transactions(osr, tls, ondisk);
   }
-  unsigned apply_transactions(Sequencer *osr, list<Transaction*>& tls, Context *ondisk=0);
-
-  int queue_transaction_and_cleanup(Sequencer *osr, Transaction* t,
-				    ThreadPool::TPHandle *handle = NULL) {
-    list<Transaction *> tls;
-    tls.push_back(t);
-    return queue_transactions(osr, tls, new C_DeleteTransaction(t),
-	                      NULL, NULL, TrackedOpRef(), handle);
-  }
+  unsigned apply_transactions(Sequencer *osr, vector<Transaction>& tls, Context *ondisk=0);
 
-  int queue_transaction(Sequencer *osr, Transaction *t, Context *onreadable, Context *ondisk=0,
+  int queue_transaction(Sequencer *osr, Transaction&& t, Context *onreadable, Context *ondisk=0,
 				Context *onreadable_sync=0,
 				TrackedOpRef op = TrackedOpRef(),
 				ThreadPool::TPHandle *handle = NULL) {
-    list<Transaction*> tls;
-    tls.push_back(t);
+    vector<Transaction> tls;
+    tls.push_back(std::move(t));
     return queue_transactions(osr, tls, onreadable, ondisk, onreadable_sync,
 	                      op, handle);
   }
 
-  int queue_transactions(Sequencer *osr, list<Transaction*>& tls,
+  int queue_transactions(Sequencer *osr, vector<Transaction>& tls,
 			 Context *onreadable, Context *ondisk=0,
 			 Context *onreadable_sync=0,
 			 TrackedOpRef op = TrackedOpRef(),
 			 ThreadPool::TPHandle *handle = NULL) {
     assert(!tls.empty());
-    tls.back()->register_on_applied(onreadable);
-    tls.back()->register_on_commit(ondisk);
-    tls.back()->register_on_applied_sync(onreadable_sync);
+    tls.back().register_on_applied(onreadable);
+    tls.back().register_on_commit(ondisk);
+    tls.back().register_on_applied_sync(onreadable_sync);
     return queue_transactions(osr, tls, op, handle);
   }
 
   virtual int queue_transactions(
-    Sequencer *osr, list<Transaction*>& tls,
+    Sequencer *osr, vector<Transaction>& tls,
     TrackedOpRef op = TrackedOpRef(),
     ThreadPool::TPHandle *handle = NULL) = 0;
 
 
   int queue_transactions(
     Sequencer *osr,
-    list<Transaction*>& tls,
+    vector<Transaction>& tls,
     Context *onreadable,
     Context *oncommit,
     Context *onreadable_sync,
@@ -1789,24 +1839,25 @@ public:
 
   int queue_transaction(
     Sequencer *osr,
-    Transaction* t,
+    Transaction&& t,
     Context *onreadable,
     Context *oncommit,
     Context *onreadable_sync,
     Context *oncomplete,
     TrackedOpRef op) {
-    list<Transaction*> tls;
-    tls.push_back(t);
+
+    vector<Transaction> tls;
+    tls.push_back(std::move(t));
     return queue_transactions(
       osr, tls, onreadable, oncommit, onreadable_sync, oncomplete, op);
   }
 
  public:
-  ObjectStore(const std::string& path_) : path(path_), logger(NULL) {}
+  explicit ObjectStore(const std::string& path_) : path(path_), logger(NULL) {}
   virtual ~ObjectStore() {}
 
   // no copying
-  ObjectStore(const ObjectStore& o);
+  explicit ObjectStore(const ObjectStore& o);
   const ObjectStore& operator=(const ObjectStore& o);
 
   // versioning
@@ -1814,6 +1865,8 @@ public:
     return 0;
   }
 
+  virtual string get_type() = 0;
+
   // mgmt
   virtual bool test_mount_in_use() = 0;
   virtual int mount() = 0;
@@ -1875,10 +1928,21 @@ public:
    */
   virtual int get_ideal_list_max() { return 64; }
 
+
   /**
-   * Synchronous read operations
+   * get a collection handle
+   *
+   * Provide a trivial handle as a default to avoid converting legacy
+   * implementations.
    */
+  virtual CollectionHandle open_collection(const coll_t &cid) {
+    return new CompatCollectionHandle(cid);
+  }
+
 
+  /**
+   * Synchronous read operations
+   */
 
   /**
    * exists -- Test for existance of object
@@ -1887,7 +1951,10 @@ public:
    * @param oid oid of object
    * @returns true if object exists, false otherwise
    */
-  virtual bool exists(coll_t cid, const ghobject_t& oid) = 0;                   // useful?
+  virtual bool exists(const coll_t& cid, const ghobject_t& oid) = 0; // useful?
+  virtual bool exists(CollectionHandle& c, const ghobject_t& oid) {
+    return exists(c->get_cid(), oid);
+  }
 
   /**
    * stat -- get information for an object
@@ -1899,10 +1966,17 @@ public:
    * @returns 0 on success, negative error code on failure.
    */
   virtual int stat(
-    coll_t cid,
+    const coll_t& cid,
     const ghobject_t& oid,
     struct stat *st,
     bool allow_eio = false) = 0; // struct stat?
+  virtual int stat(
+    CollectionHandle &c,
+    const ghobject_t& oid,
+    struct stat *st,
+    bool allow_eio = false) {
+    return stat(c->get_cid(), oid, st, allow_eio);
+  }
 
   /**
    * read -- read a byte range of data from an object
@@ -1920,13 +1994,23 @@ public:
    * @returns number of bytes read on success, or negative error code on failure.
    */
    virtual int read(
-    coll_t cid,
+    const coll_t& cid,
     const ghobject_t& oid,
     uint64_t offset,
     size_t len,
     bufferlist& bl,
     uint32_t op_flags = 0,
     bool allow_eio = false) = 0;
+   virtual int read(
+     CollectionHandle &c,
+     const ghobject_t& oid,
+     uint64_t offset,
+     size_t len,
+     bufferlist& bl,
+     uint32_t op_flags = 0,
+     bool allow_eio = false) {
+     return read(c->get_cid(), oid, offset, len, bl, op_flags, allow_eio);
+   }
 
   /**
    * fiemap -- get extent map of data of an object
@@ -1944,7 +2028,12 @@ public:
    * @param bl output bufferlist for extent map information.
    * @returns 0 on success, negative error code on failure.
    */
-  virtual int fiemap(coll_t cid, const ghobject_t& oid, uint64_t offset, size_t len, bufferlist& bl) = 0;
+  virtual int fiemap(const coll_t& cid, const ghobject_t& oid,
+		     uint64_t offset, size_t len, bufferlist& bl) = 0;
+  virtual int fiemap(CollectionHandle& c, const ghobject_t& oid,
+		     uint64_t offset, size_t len, bufferlist& bl) {
+    return fiemap(c->get_cid(), oid, offset, len, bl);
+  }
 
   /**
    * getattr -- get an xattr of an object
@@ -1955,7 +2044,12 @@ public:
    * @param value place to put output result.
    * @returns 0 on success, negative error code on failure.
    */
-  virtual int getattr(coll_t cid, const ghobject_t& oid, const char *name, bufferptr& value) = 0;
+  virtual int getattr(const coll_t& cid, const ghobject_t& oid,
+		      const char *name, bufferptr& value) = 0;
+  virtual int getattr(CollectionHandle &c, const ghobject_t& oid,
+		      const char *name, bufferptr& value) {
+    return getattr(c->get_cid(), oid, name, value);
+  }
 
   /**
    * getattr -- get an xattr of an object
@@ -1966,7 +2060,7 @@ public:
    * @param value place to put output result.
    * @returns 0 on success, negative error code on failure.
    */
-  int getattr(coll_t cid, const ghobject_t& oid, const char *name, bufferlist& value) {
+  int getattr(const coll_t& cid, const ghobject_t& oid, const char *name, bufferlist& value) {
     bufferptr bp;
     int r = getattr(cid, oid, name, bp);
     if (bp.length())
@@ -1981,6 +2075,14 @@ public:
     value.push_back(bp);
     return r;
   }
+  int getattr(
+    CollectionHandle &c, const ghobject_t& oid,
+    const string& name, bufferlist& value) {
+    bufferptr bp;
+    int r = getattr(c, oid, name.c_str(), bp);
+    value.push_back(bp);
+    return r;
+  }
 
   /**
    * getattrs -- get all of the xattrs of an object
@@ -1990,7 +2092,12 @@ public:
    * @param aset place to put output result.
    * @returns 0 on success, negative error code on failure.
    */
-  virtual int getattrs(coll_t cid, const ghobject_t& oid, map<string,bufferptr>& aset) = 0;
+  virtual int getattrs(const coll_t& cid, const ghobject_t& oid,
+		       map<string,bufferptr>& aset) = 0;
+  virtual int getattrs(CollectionHandle &c, const ghobject_t& oid,
+		       map<string,bufferptr>& aset) {
+    return getattrs(c->get_cid(), oid, aset);
+  }
 
   /**
    * getattrs -- get all of the xattrs of an object
@@ -2000,7 +2107,7 @@ public:
    * @param aset place to put output result.
    * @returns 0 on success, negative error code on failure.
    */
-  int getattrs(coll_t cid, const ghobject_t& oid, map<string,bufferlist>& aset) {
+  int getattrs(const coll_t& cid, const ghobject_t& oid, map<string,bufferlist>& aset) {
     map<string,bufferptr> bmap;
     int r = getattrs(cid, oid, bmap);
     for (map<string,bufferptr>::iterator i = bmap.begin();
@@ -2010,6 +2117,17 @@ public:
     }
     return r;
   }
+  int getattrs(CollectionHandle &c, const ghobject_t& oid,
+	       map<string,bufferlist>& aset) {
+    map<string,bufferptr> bmap;
+    int r = getattrs(c, oid, bmap);
+    for (map<string,bufferptr>::iterator i = bmap.begin();
+	i != bmap.end();
+	++i) {
+      aset[i->first].append(i->second);
+    }
+    return r;
+  }
 
 
   // collections
@@ -2022,7 +2140,7 @@ public:
    */
   virtual int list_collections(vector<coll_t>& ls) = 0;
 
-  virtual int collection_version_current(coll_t c, uint32_t *version) {
+  virtual int collection_version_current(const coll_t& c, uint32_t *version) {
     *version = 0;
     return 1;
   }
@@ -2032,7 +2150,7 @@ public:
    * @param c collection
    * @returns true if it exists, false otherwise
    */
-  virtual bool collection_exists(coll_t c) = 0;
+  virtual bool collection_exists(const coll_t& c) = 0;
   /**
    * collection_getattr - get an xattr of a collection
    *
@@ -2042,7 +2160,7 @@ public:
    * @param size size of buffer to receive value
    * @returns 0 on success, negative error code on failure
    */
-  virtual int collection_getattr(coll_t cid, const char *name,
+  virtual int collection_getattr(const coll_t& cid, const char *name,
 	                         void *value, size_t size)
     __attribute__ ((deprecated)) {
     return -EOPNOTSUPP;
@@ -2056,7 +2174,7 @@ public:
    * @param bl buffer to receive value
    * @returns 0 on success, negative error code on failure
    */
-  virtual int collection_getattr(coll_t cid, const char *name, bufferlist& bl)
+  virtual int collection_getattr(const coll_t& cid, const char *name, bufferlist& bl)
     __attribute__ ((deprecated)) {
     return -EOPNOTSUPP;
   }
@@ -2068,7 +2186,7 @@ public:
    * @param aset map of keys and buffers that contain the values
    * @returns 0 on success, negative error code on failure
    */
-  virtual int collection_getattrs(coll_t cid, map<string,bufferptr> &aset)
+  virtual int collection_getattrs(const coll_t& cid, map<string,bufferptr> &aset)
     __attribute__ ((deprecated)) {
     return -EOPNOTSUPP;
   }
@@ -2079,7 +2197,18 @@ public:
    * @param c collection
    * @returns true if empty, false otherwise
    */
-  virtual bool collection_empty(coll_t c) = 0;
+  virtual bool collection_empty(const coll_t& c) = 0;
+
+  /**
+   * return the number of significant bits of the coll_t::pgid.
+   *
+   * This should return what the last create_collection or split_collection
+   * set.  A lazy backend can choose not to store and report this (e.g.,
+   * FileStore).
+   */
+  virtual int collection_bits(const coll_t& c) {
+    return -EOPNOTSUPP;
+  }
 
   /**
    * list contents of a collection that fall in the range [start, end) and no more than a specified many result
@@ -2094,49 +2223,95 @@ public:
    * @param next [out] next item sorts >= this value
    * @return zero on success, or negative error
    */
-  virtual int collection_list(coll_t c, ghobject_t start, ghobject_t end,
+  virtual int collection_list(const coll_t& c, ghobject_t start, ghobject_t end,
 			      bool sort_bitwise, int max,
 			      vector<ghobject_t> *ls, ghobject_t *next) = 0;
+  virtual int collection_list(CollectionHandle &c,
+			      ghobject_t start, ghobject_t end,
+			      bool sort_bitwise, int max,
+			      vector<ghobject_t> *ls, ghobject_t *next) {
+    return collection_list(c->get_cid(), start, end, sort_bitwise, max, ls, next);
+  }
+
 
   /// OMAP
   /// Get omap contents
   virtual int omap_get(
-    coll_t c,                ///< [in] Collection containing oid
+    const coll_t& c,                ///< [in] Collection containing oid
     const ghobject_t &oid,   ///< [in] Object containing omap
     bufferlist *header,      ///< [out] omap header
     map<string, bufferlist> *out /// < [out] Key to value map
     ) = 0;
+  virtual int omap_get(
+    CollectionHandle &c,     ///< [in] Collection containing oid
+    const ghobject_t &oid,   ///< [in] Object containing omap
+    bufferlist *header,      ///< [out] omap header
+    map<string, bufferlist> *out /// < [out] Key to value map
+    ) {
+    return omap_get(c->get_cid(), oid, header, out);
+  }
 
   /// Get omap header
   virtual int omap_get_header(
-    coll_t c,                ///< [in] Collection containing oid
+    const coll_t& c,                ///< [in] Collection containing oid
     const ghobject_t &oid,   ///< [in] Object containing omap
     bufferlist *header,      ///< [out] omap header
     bool allow_eio = false ///< [in] don't assert on eio
     ) = 0;
+  virtual int omap_get_header(
+    CollectionHandle &c,     ///< [in] Collection containing oid
+    const ghobject_t &oid,   ///< [in] Object containing omap
+    bufferlist *header,      ///< [out] omap header
+    bool allow_eio = false ///< [in] don't assert on eio
+    ) {
+    return omap_get_header(c->get_cid(), oid, header, allow_eio);
+  }
 
   /// Get keys defined on oid
   virtual int omap_get_keys(
-    coll_t c,              ///< [in] Collection containing oid
+    const coll_t& c,              ///< [in] Collection containing oid
     const ghobject_t &oid, ///< [in] Object containing omap
     set<string> *keys      ///< [out] Keys defined on oid
     ) = 0;
+  virtual int omap_get_keys(
+    CollectionHandle &c,   ///< [in] Collection containing oid
+    const ghobject_t &oid, ///< [in] Object containing omap
+    set<string> *keys      ///< [out] Keys defined on oid
+    ) {
+    return omap_get_keys(c->get_cid(), oid, keys);
+  }
 
   /// Get key values
   virtual int omap_get_values(
-    coll_t c,                    ///< [in] Collection containing oid
+    const coll_t& c,                    ///< [in] Collection containing oid
     const ghobject_t &oid,       ///< [in] Object containing omap
     const set<string> &keys,     ///< [in] Keys to get
     map<string, bufferlist> *out ///< [out] Returned keys and values
     ) = 0;
+  virtual int omap_get_values(
+    CollectionHandle &c,         ///< [in] Collection containing oid
+    const ghobject_t &oid,       ///< [in] Object containing omap
+    const set<string> &keys,     ///< [in] Keys to get
+    map<string, bufferlist> *out ///< [out] Returned keys and values
+    ) {
+    return omap_get_values(c->get_cid(), oid, keys, out);
+  }
 
   /// Filters keys into out which are defined on oid
   virtual int omap_check_keys(
-    coll_t c,                ///< [in] Collection containing oid
+    const coll_t& c,                ///< [in] Collection containing oid
     const ghobject_t &oid,   ///< [in] Object containing omap
     const set<string> &keys, ///< [in] Keys to check
     set<string> *out         ///< [out] Subset of keys defined on oid
     ) = 0;
+  virtual int omap_check_keys(
+    CollectionHandle &c,     ///< [in] Collection containing oid
+    const ghobject_t &oid,   ///< [in] Object containing omap
+    const set<string> &keys, ///< [in] Keys to check
+    set<string> *out         ///< [out] Subset of keys defined on oid
+    ) {
+    return omap_check_keys(c->get_cid(), oid, keys, out);
+  }
 
   /**
    * Returns an object map iterator
@@ -2148,10 +2323,15 @@ public:
    * @return iterator, null on error
    */
   virtual ObjectMap::ObjectMapIterator get_omap_iterator(
-    coll_t c,              ///< [in] collection
+    const coll_t& c,              ///< [in] collection
     const ghobject_t &oid  ///< [in] object
     ) = 0;
-
+  virtual ObjectMap::ObjectMapIterator get_omap_iterator(
+    CollectionHandle &c,   ///< [in] collection
+    const ghobject_t &oid  ///< [in] object
+    ) {
+    return get_omap_iterator(c->get_cid(), oid);
+  }
 
   virtual int flush_journal() { return -EOPNOTSUPP; }
 
@@ -2180,5 +2360,6 @@ static inline void intrusive_ptr_release(ObjectStore::Sequencer_impl *s) {
 }
 
 ostream& operator<<(ostream& out, const ObjectStore::Sequencer& s);
+ostream& operator<<(ostream& out, const ObjectStore::Transaction& tx);
 
 #endif
diff --git a/src/os/bluestore/BlockDevice.cc b/src/os/bluestore/BlockDevice.cc
index 230ea45..ac17b64 100644
--- a/src/os/bluestore/BlockDevice.cc
+++ b/src/os/bluestore/BlockDevice.cc
@@ -1,25 +1,32 @@
 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
 // vim: ts=8 sw=2 smarttab
+/*
+ * Ceph - scalable distributed file system
+  *
+ * Copyright (C) 2015 XSky <haomai at xsky.com>
+ *
+ * Author: Haomai Wang <haomaiwang at gmail.com>
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation.  See file COPYING.
+ *
+ */
 
 #include <unistd.h>
-#include <stdlib.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <fcntl.h>
-#include <unistd.h>
 
-#include "BlockDevice.h"
-#include "include/types.h"
-#include "include/compat.h"
-#include "common/errno.h"
+#include "KernelDevice.h"
+#if defined(HAVE_SPDK)
+#include "NVMEDevice.h"
+#endif
+
 #include "common/debug.h"
-#include "common/blkdev.h"
 
 #define dout_subsys ceph_subsys_bdev
 #undef dout_prefix
 #define dout_prefix *_dout << "bdev "
 
-
 void IOContext::aio_wait()
 {
   Mutex::Locker l(lock);
@@ -35,510 +42,28 @@ void IOContext::aio_wait()
   dout(20) << __func__ << " " << this << " done" << dendl;
 }
 
-// ----------------
-#undef dout_prefix
-#define dout_prefix *_dout << "bdev(" << path << ") "
-
-BlockDevice::BlockDevice(aio_callback_t cb, void *cbpriv)
-  : fd_direct(-1),
-    fd_buffered(-1),
-    size(0), block_size(0),
-    fs(NULL), aio(false), dio(false),
-    debug_lock("BlockDevice::debug_lock"),
-    ioc_reap_lock("BlockDevice::ioc_reap_lock"),
-    flush_lock("BlockDevice::flush_lock"),
-    aio_queue(g_conf->bdev_aio_max_queue_depth),
-    aio_callback(cb),
-    aio_callback_priv(cbpriv),
-    aio_stop(false),
-    aio_thread(this)
-{
-  zeros = buffer::create_page_aligned(1048576);
-  zeros.zero();
-}
-
-int BlockDevice::_lock()
-{
-  struct flock l;
-  memset(&l, 0, sizeof(l));
-  l.l_type = F_WRLCK;
-  l.l_whence = SEEK_SET;
-  l.l_start = 0;
-  l.l_len = 0;
-  int r = ::fcntl(fd_direct, F_SETLK, &l);
-  if (r < 0)
-    return -errno;
-  return 0;
-}
-
-int BlockDevice::open(string p)
-{
-  path = p;
-  int r = 0;
-  dout(1) << __func__ << " path " << path << dendl;
-
-  fd_direct = ::open(path.c_str(), O_RDWR | O_DIRECT);
-  if (fd_direct < 0) {
-    int r = -errno;
-    derr << __func__ << " open got: " << cpp_strerror(r) << dendl;
-    return r;
-  }
-  fd_buffered = ::open(path.c_str(), O_RDWR);
-  if (fd_buffered < 0) {
-    r = -errno;
-    derr << __func__ << " open got: " << cpp_strerror(r) << dendl;
-    goto out_direct;
-  }
-  dio = true;
-  aio = g_conf->bdev_aio;
-  if (!aio) {
-    assert(0 == "non-aio not supported");
-  }
-
-  // disable readahead as it will wreak havoc on our mix of
-  // directio/aio and buffered io.
-  r = posix_fadvise(fd_buffered, 0, 0, POSIX_FADV_RANDOM);
-  if (r < 0) {
-    r = -errno;
-    derr << __func__ << " open got: " << cpp_strerror(r) << dendl;
-    goto out_fail;
-  }
-
-  r = _lock();
-  if (r < 0) {
-    derr << __func__ << " failed to lock " << path << ": " << cpp_strerror(r)
-	 << dendl;
-    goto out_fail;
-  }
-
-  struct stat st;
-  r = ::fstat(fd_direct, &st);
-  if (r < 0) {
-    r = -errno;
-    derr << __func__ << " fstat got " << cpp_strerror(r) << dendl;
-    goto out_fail;
-  }
-  if (S_ISBLK(st.st_mode)) {
-    int64_t s;
-    r = get_block_device_size(fd_direct, &s);
-    if (r < 0) {
-      goto out_fail;
-    }
-    size = s;
-  } else {
-    size = st.st_size;
-  }
-  block_size = st.st_blksize;
-
-  fs = FS::create_by_fd(fd_direct);
-  assert(fs);
-
-  r = _aio_start();
-  assert(r == 0);
-
-  dout(1) << __func__
-	  << " size " << size
-	  << " (" << pretty_si_t(size) << "B)"
-	  << " block_size " << block_size
-	  << " (" << pretty_si_t(block_size) << "B)"
-	  << dendl;
-  return 0;
-
- out_fail:
-  VOID_TEMP_FAILURE_RETRY(::close(fd_buffered));
-  fd_buffered = -1;
- out_direct:
-  VOID_TEMP_FAILURE_RETRY(::close(fd_direct));
-  fd_direct = -1;
-  return r;
-}
-
-void BlockDevice::close()
+BlockDevice *BlockDevice::create(const string& path, aio_callback_t cb, void *cbpriv)
 {
-  dout(1) << __func__ << dendl;
-  _aio_stop();
-
-  assert(fs);
-  delete fs;
-  fs = NULL;
-
-  assert(fd_direct >= 0);
-  VOID_TEMP_FAILURE_RETRY(::close(fd_direct));
-  fd_direct = -1;
-
-  assert(fd_buffered >= 0);
-  VOID_TEMP_FAILURE_RETRY(::close(fd_buffered));
-  fd_buffered = -1;
-
-  path.clear();
-}
-
-int BlockDevice::flush()
-{
-  // serialize flushers, so that we can avoid weird io_since_flush
-  // races (w/ multipler flushers).
-  Mutex::Locker l(flush_lock);
-  if (io_since_flush.read() == 0) {
-    dout(10) << __func__ << " no-op (no ios since last flush)" << dendl;
-    return 0;
-  }
-  dout(10) << __func__ << " start" << dendl;
-  io_since_flush.set(0);
-  if (g_conf->bdev_inject_crash) {
-    // sleep for a moment to give other threads a chance to submit or
-    // wait on io that races with a flush.
-    derr << __func__ << " injecting crash. first we sleep..." << dendl;
-    sleep(3);
-    derr << __func__ << " and now we die" << dendl;
-    assert(0 == "bdev_inject_crash");
+  string type = "kernel";
+  char buf[10];
+  int r = ::readlink(path.c_str(), buf, sizeof(buf));
+  if (r >= 0 && strncmp(buf, SPDK_PREFIX, sizeof(SPDK_PREFIX)-1) == 0) {
+    type = "ust-nvme";
   }
-  utime_t start = ceph_clock_now(NULL);
-  int r = ::fdatasync(fd_direct);
-  utime_t end = ceph_clock_now(NULL);
-  utime_t dur = end - start;
-  if (r < 0) {
-    r = -errno;
-    derr << __func__ << " fdatasync got: " << cpp_strerror(r) << dendl;
-  }
-  dout(5) << __func__ << " in " << dur << dendl;;
-  return r;
-}
+  dout(1) << __func__ << " path " << path << " type " << type << dendl;
 
-int BlockDevice::_aio_start()
-{
-  if (g_conf->bdev_aio) {
-    dout(10) << __func__ << dendl;
-    int r = aio_queue.init();
-    if (r < 0) {
-      derr << __func__ << " failed: " << cpp_strerror(r) << dendl;
-      return r;
-    }
-    aio_thread.create("bstore_aio");
+  if (type == "kernel") {
+    return new KernelDevice(cb, cbpriv);
   }
-  return 0;
-}
-
-void BlockDevice::_aio_stop()
-{
-  if (g_conf->bdev_aio) {
-    dout(10) << __func__ << dendl;
-    aio_stop = true;
-    aio_thread.join();
-    aio_stop = false;
-    aio_queue.shutdown();
-  }
-}
-
-void BlockDevice::_aio_thread()
-{
-  dout(10) << __func__ << " start" << dendl;
-  while (!aio_stop) {
-    dout(40) << __func__ << " polling" << dendl;
-    int max = 16;
-    FS::aio_t *aio[max];
-    int r = aio_queue.get_next_completed(g_conf->bdev_aio_poll_ms,
-					 aio, max);
-    if (r < 0) {
-      derr << __func__ << " got " << cpp_strerror(r) << dendl;
-    }
-    if (r > 0) {
-      dout(30) << __func__ << " got " << r << " completed aios" << dendl;
-      for (int i = 0; i < r; ++i) {
-	IOContext *ioc = static_cast<IOContext*>(aio[i]->priv);
-	_aio_log_finish(ioc, aio[i]->offset, aio[i]->length);
-	int left = ioc->num_running.dec();
-	int r = aio[i]->get_return_value();
-	dout(10) << __func__ << " finished aio " << aio[i] << " r " << r
-		 << " ioc " << ioc
-		 << " with " << left << " aios left" << dendl;
-	assert(r >= 0);
-	if (left == 0) {
-	  // check waiting count before doing callback (which may
-	  // destroy this ioc).
-	  if (ioc->num_waiting.read()) {
-	    dout(20) << __func__ << " waking waiter" << dendl;
-	    Mutex::Locker l(ioc->lock);
-	    ioc->cond.Signal();
-	  }
-	  if (ioc->priv) {
-	    aio_callback(aio_callback_priv, ioc->priv);
-	  }
-	}
-      }
-    }
-    if (ioc_reap_count.read()) {
-      Mutex::Locker l(ioc_reap_lock);
-      for (auto p : ioc_reap_queue) {
-	dout(20) << __func__ << " reap ioc " << p << dendl;
-	delete p;
-      }
-      ioc_reap_queue.clear();
-      ioc_reap_count.dec();
-    }
-  }
-  dout(10) << __func__ << " end" << dendl;
-}
-
-void BlockDevice::_aio_log_start(
-  IOContext *ioc,
-  uint64_t offset,
-  uint64_t length)
-{
-  dout(20) << __func__ << " " << offset << "~" << length << dendl;
-  if (g_conf->bdev_debug_inflight_ios) {
-    Mutex::Locker l(debug_lock);
-    if (debug_inflight.intersects(offset, length)) {
-      derr << __func__ << " inflight overlap of "
-	   << offset << "~" << length
-	   << " with " << debug_inflight << dendl;
-      assert(0);
-    }
-    debug_inflight.insert(offset, length);
+#if defined(HAVE_SPDK)
+  if (type == "ust-nvme") {
+    return new NVMEDevice(cb, cbpriv);
   }
-}
-
-void BlockDevice::_aio_log_finish(
-  IOContext *ioc,
-  uint64_t offset,
-  uint64_t length)
-{
-  dout(20) << __func__ << " " << aio << " " << offset << "~" << length << dendl;
-  if (g_conf->bdev_debug_inflight_ios) {
-    Mutex::Locker l(debug_lock);
-    debug_inflight.erase(offset, length);
-  }
-}
-
-void BlockDevice::aio_submit(IOContext *ioc)
-{
-  dout(20) << __func__ << " ioc " << ioc
-	   << " pending " << ioc->num_pending.read()
-	   << " running " << ioc->num_running.read()
-	   << dendl;
-  // move these aside, and get our end iterator position now, as the
-  // aios might complete as soon as they are submitted and queue more
-  // wal aio's.
-  list<FS::aio_t>::iterator e = ioc->running_aios.begin();
-  ioc->running_aios.splice(e, ioc->pending_aios);
-  list<FS::aio_t>::iterator p = ioc->running_aios.begin();
-
-  int pending = ioc->num_pending.read();
-  ioc->num_running.add(pending);
-  ioc->num_pending.sub(pending);
-  assert(ioc->num_pending.read() == 0);  // we should be only thread doing this
-
-  bool done = false;
-  while (!done) {
-    FS::aio_t& aio = *p;
-    aio.priv = static_cast<void*>(ioc);
-    dout(20) << __func__ << "  aio " << &aio << " fd " << aio.fd
-	     << " " << aio.offset << "~" << aio.length << dendl;
-    for (vector<iovec>::iterator q = aio.iov.begin(); q != aio.iov.end(); ++q)
-      dout(30) << __func__ << "   iov " << (void*)q->iov_base
-	       << " len " << q->iov_len << dendl;
-
-    // be careful: as soon as we submit aio we race with completion.
-    // since we are holding a ref take care not to dereference txc at
-    // all after that point.
-    list<FS::aio_t>::iterator cur = p;
-    ++p;
-    done = (p == e);
-
-    // do not dereference txc (or it's contents) after we submit (if
-    // done == true and we don't loop)
-    int retries = 0;
-    int r = aio_queue.submit(*cur, &retries);
-    if (retries)
-      derr << __func__ << " retries " << retries << dendl;
-    if (r) {
-      derr << " aio submit got " << cpp_strerror(r) << dendl;
-      assert(r == 0);
-    }
-  }
-}
-
-int BlockDevice::aio_write(
-  uint64_t off,
-  bufferlist &bl,
-  IOContext *ioc,
-  bool buffered)
-{
-  uint64_t len = bl.length();
-  dout(20) << __func__ << " " << off << "~" << len << dendl;
-  assert(off % block_size == 0);
-  assert(len % block_size == 0);
-  assert(len > 0);
-  assert(off < size);
-  assert(off + len <= size);
-
-  if (!bl.is_n_page_sized() || !bl.is_page_aligned()) {
-    dout(20) << __func__ << " rebuilding buffer to be page-aligned" << dendl;
-    bl.rebuild();
-  }
-
-  dout(40) << "data: ";
-  bl.hexdump(*_dout);
-  *_dout << dendl;
-
-  _aio_log_start(ioc, off, bl.length());
-
-#ifdef HAVE_LIBAIO
-  if (aio && dio && !buffered) {
-    ioc->pending_aios.push_back(FS::aio_t(ioc, fd_direct));
-    ioc->num_pending.inc();
-    FS::aio_t& aio = ioc->pending_aios.back();
-    if (g_conf->bdev_inject_crash &&
-	rand() % g_conf->bdev_inject_crash == 0) {
-      derr << __func__ << " bdev_inject_crash: dropping io " << off << "~" << len
-	   << dendl;
-      // generate a real io so that aio_wait behaves properly, but make it
-      // a read instead of write, and toss the result.
-      aio.pread(off, len);
-    } else {
-      bl.prepare_iov(&aio.iov);
-      for (unsigned i=0; i<aio.iov.size(); ++i) {
-	dout(30) << "aio " << i << " " << aio.iov[i].iov_base
-		 << " " << aio.iov[i].iov_len << dendl;
-      }
-      aio.bl.claim_append(bl);
-      aio.pwritev(off);
-    }
-    dout(5) << __func__ << " " << off << "~" << len << " aio " << &aio << dendl;
-  } else
 #endif
-  {
-    dout(5) << __func__ << " " << off << "~" << len << " buffered" << dendl;
-    if (g_conf->bdev_inject_crash &&
-	rand() % g_conf->bdev_inject_crash == 0) {
-      derr << __func__ << " bdev_inject_crash: dropping io " << off << "~" << len
-	   << dendl;
-      return 0;
-    }
-    vector<iovec> iov;
-    bl.prepare_iov(&iov);
-    int r = ::pwritev(buffered ? fd_buffered : fd_direct,
-		      &iov[0], iov.size(), off);
-    if (r < 0) {
-      derr << __func__ << " pwritev error: " << cpp_strerror(r) << dendl;
-      return r;
-    }
-    if (buffered) {
-      // initiate IO (but do not wait)
-      ::sync_file_range(fd_buffered, off, len, SYNC_FILE_RANGE_WRITE);
-    }
-  }
 
-  io_since_flush.set(1);
-  return 0;
-}
-
-int BlockDevice::aio_zero(
-  uint64_t off,
-  uint64_t len,
-  IOContext *ioc)
-{
-  dout(5) << __func__ << " " << off << "~" << len << dendl;
-  assert(off % block_size == 0);
-  assert(len % block_size == 0);
-  assert(len > 0);
-  assert(off < size);
-  assert(off + len <= size);
-
-  bufferlist bl;
-  while (len > 0) {
-    bufferlist t;
-    t.append(zeros, 0, MIN(zeros.length(), len));
-    len -= t.length();
-    bl.claim_append(t);
-  }
-  bufferlist foo;
-  // note: this works with aio only becaues the actual buffer is
-  // this->zeros, which is page-aligned and never freed.
-  return aio_write(off, bl, ioc, false);
-}
-
-int BlockDevice::read(uint64_t off, uint64_t len, bufferlist *pbl,
-		      IOContext *ioc,
-		      bool buffered)
-{
-  dout(5) << __func__ << " " << off << "~" << len << dendl;
-  assert(off % block_size == 0);
-  assert(len % block_size == 0);
-  assert(len > 0);
-  assert(off < size);
-  assert(off + len <= size);
-
-  _aio_log_start(ioc, off, len);
-  ioc->num_reading.inc();;
-
-  bufferptr p = buffer::create_page_aligned(len);
-  int r = ::pread(buffered ? fd_buffered : fd_direct,
-		  p.c_str(), len, off);
-  if (r < 0) {
-    r = -errno;
-    goto out;
-  }
-  pbl->clear();
-  pbl->push_back(p);
-
-  dout(40) << "data: ";
-  pbl->hexdump(*_dout);
-  *_dout << dendl;
-
- out:
-  _aio_log_finish(ioc, off, len);
-  ioc->num_reading.dec();
-  if (ioc->num_waiting.read()) {
-    dout(20) << __func__ << " waking waiter" << dendl;
-    Mutex::Locker l(ioc->lock);
-    ioc->cond.Signal();
-  }
-  return r < 0 ? r : 0;
-}
-
-int BlockDevice::read_buffered(uint64_t off, uint64_t len, char *buf)
-{
-  dout(5) << __func__ << " " << off << "~" << len << dendl;
-  assert(len > 0);
-  assert(off < size);
-  assert(off + len <= size);
-
-  int r = 0;
-  char *t = buf;
-  uint64_t left = len;
-  while (left > 0) {
-    r = ::pread(fd_buffered, t, left, off);
-    if (r < 0) {
-      r = -errno;
-      goto out;
-    }
-    off += r;
-    t += r;
-    left -= r;
-  }
-
-  dout(40) << __func__ << " data: ";
-  bufferlist bl;
-  bl.append(buf, len);
-  bl.hexdump(*_dout);
-  *_dout << dendl;
-
- out:
-  return r < 0 ? r : 0;
-}
-
-int BlockDevice::invalidate_cache(uint64_t off, uint64_t len)
-{
-  dout(5) << __func__ << " " << off << "~" << len << dendl;
-  assert(off % block_size == 0);
-  assert(len % block_size == 0);
-  int r = posix_fadvise(fd_buffered, off, len, POSIX_FADV_DONTNEED);
-  if (r < 0) {
-    r = -errno;
-    derr << __func__ << " " << off << "~" << len << " error: "
-	 << cpp_strerror(r) << dendl;
-  }
-  return r;
+  derr << __func__ << " unknown backend " << type << dendl;
+  assert(0);
+  return NULL;
 }
 
 void BlockDevice::queue_reap_ioc(IOContext *ioc)
@@ -548,3 +73,16 @@ void BlockDevice::queue_reap_ioc(IOContext *ioc)
     ioc_reap_count.inc();
   ioc_reap_queue.push_back(ioc);
 }
+
+void BlockDevice::reap_ioc()
+{
+  if (ioc_reap_count.read()) {
+    Mutex::Locker l(ioc_reap_lock);
+    for (auto p : ioc_reap_queue) {
+      dout(20) << __func__ << " reap ioc " << p << dendl;
+      delete p;
+    }
+    ioc_reap_queue.clear();
+    ioc_reap_count.dec();
+  }
+}
diff --git a/src/os/bluestore/BlockDevice.h b/src/os/bluestore/BlockDevice.h
index 77bb8eb..955f77c 100644
--- a/src/os/bluestore/BlockDevice.h
+++ b/src/os/bluestore/BlockDevice.h
@@ -1,15 +1,34 @@
 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
 // vim: ts=8 sw=2 smarttab
-
-#ifndef CEPH_OS_BLUESTORE_BLOCKDEVICE
-#define CEPH_OS_BLUESTORE_BLOCKDEVICE
-
+/*
+ * Ceph - scalable distributed file system
+  *
+ * Copyright (C) 2015 XSky <haomai at xsky.com>
+ *
+ * Author: Haomai Wang <haomaiwang at gmail.com>
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation.  See file COPYING.
+ *
+ */
+
+#ifndef CEPH_OS_BLUESTORE_BLOCKDEVICE_H
+#define CEPH_OS_BLUESTORE_BLOCKDEVICE_H
+
+#include "acconfig.h"
 #include "os/fs/FS.h"
-#include "include/interval_set.h"
+
+#define SPDK_PREFIX "spdk:"
 
 /// track in-flight io
 struct IOContext {
   void *priv;
+#ifdef HAVE_SPDK
+  void *nvme_task_first = nullptr;
+  void *nvme_task_last = nullptr;
+#endif
 
   Mutex lock;
   Cond cond;
@@ -22,7 +41,7 @@ struct IOContext {
   atomic_t num_reading;
   atomic_t num_waiting;
 
-  IOContext(void *p)
+  explicit IOContext(void *p)
     : priv(p),
       lock("IOContext::lock")
     {}
@@ -33,88 +52,48 @@ struct IOContext {
 
   bool has_aios() {
     Mutex::Locker l(lock);
-    return num_pending.read() + num_running.read();
+    return num_pending.read() || num_running.read();
   }
 
   void aio_wait();
 };
 
-class BlockDevice {
-public:
-  typedef void (*aio_callback_t)(void *handle, void *aio);
-
-private:
-  int fd_direct, fd_buffered;
-  uint64_t size;
-  uint64_t block_size;
-  string path;
-  FS *fs;
-  bool aio, dio;
-  bufferptr zeros;
-
-  Mutex debug_lock;
-  interval_set<uint64_t> debug_inflight;
 
+class BlockDevice {
   Mutex ioc_reap_lock;
   vector<IOContext*> ioc_reap_queue;
   atomic_t ioc_reap_count;
 
-  Mutex flush_lock;
-  atomic_t io_since_flush;
-
-  FS::aio_queue_t aio_queue;
-  aio_callback_t aio_callback;
-  void *aio_callback_priv;
-  bool aio_stop;
-
-  struct AioCompletionThread : public Thread {
-    BlockDevice *bdev;
-    AioCompletionThread(BlockDevice *b) : bdev(b) {}
-    void *entry() {
-      bdev->_aio_thread();
-      return NULL;
-    }
-  } aio_thread;
-
-  void _aio_thread();
-  int _aio_start();
-  void _aio_stop();
-
-  void _aio_log_start(IOContext *ioc, uint64_t offset, uint64_t length);
-  void _aio_log_finish(IOContext *ioc, uint64_t offset, uint64_t length);
-
-  int _lock();
-
 public:
-  BlockDevice(aio_callback_t cb, void *cbpriv);
+  BlockDevice(): ioc_reap_lock("BlockDevice::ioc_reap_lock") {}
+  virtual ~BlockDevice() {}
+  typedef void (*aio_callback_t)(void *handle, void *aio);
 
-  void aio_submit(IOContext *ioc);
+  static BlockDevice *create(
+      const string& path, aio_callback_t cb, void *cbpriv);
+  virtual bool supported_bdev_label() { return true; }
 
-  uint64_t get_size() const {
-    return size;
-  }
-  uint64_t get_block_size() const {
-    return block_size;
-  }
+  virtual void aio_submit(IOContext *ioc) = 0;
 
-  int read(uint64_t off, uint64_t len, bufferlist *pbl,
-	   IOContext *ioc,
-	   bool buffered);
-  int read_buffered(uint64_t off, uint64_t len, char *buf);
+  virtual uint64_t get_size() const = 0;
+  virtual uint64_t get_block_size() const = 0;
 
-  int aio_write(uint64_t off, bufferlist& bl,
-		IOContext *ioc,
-		bool buffered);
-  int aio_zero(uint64_t off, uint64_t len,
-	       IOContext *ioc);
-  int flush();
+  virtual int read(uint64_t off, uint64_t len, bufferlist *pbl,
+	   IOContext *ioc, bool buffered) = 0;
+  virtual int read_buffered(uint64_t off, uint64_t len, char *buf) = 0;
+
+  virtual int aio_write(uint64_t off, bufferlist& bl,
+		IOContext *ioc, bool buffered) = 0;
+  virtual int aio_zero(uint64_t off, uint64_t len, IOContext *ioc) = 0;
+  virtual int flush() = 0;
 
   void queue_reap_ioc(IOContext *ioc);
+  void reap_ioc();
 
   // for managing buffered readers/writers
-  int invalidate_cache(uint64_t off, uint64_t len);
-  int open(string path);
-  void close();
+  virtual int invalidate_cache(uint64_t off, uint64_t len) = 0;
+  virtual int open(string path) = 0;
+  virtual void close() = 0;
 };
 
-#endif
+#endif //CEPH_OS_BLUESTORE_BLOCKDEVICE_H
diff --git a/src/os/bluestore/BlueFS.cc b/src/os/bluestore/BlueFS.cc
index d32d7d6..a79af94 100644
--- a/src/os/bluestore/BlueFS.cc
+++ b/src/os/bluestore/BlueFS.cc
@@ -15,8 +15,7 @@
 #define dout_prefix *_dout << "bluefs "
 
 BlueFS::BlueFS()
-  : lock("BlueFS::lock"),
-    ino_last(0),
+  : ino_last(0),
     log_seq(0),
     log_writer(NULL)
 {
@@ -44,7 +43,7 @@ int BlueFS::add_block_device(unsigned id, string path)
 {
   dout(10) << __func__ << " bdev " << id << " path " << path << dendl;
   assert(id == bdev.size());
-  BlockDevice *b = new BlockDevice(NULL, NULL); //aio_cb, this);
+  BlockDevice *b = BlockDevice::create(path, NULL, NULL); //aio_cb, this);
   int r = b->open(path);
   if (r < 0) {
     delete b;
@@ -65,7 +64,7 @@ uint64_t BlueFS::get_block_device_size(unsigned id)
 
 void BlueFS::add_block_extent(unsigned id, uint64_t offset, uint64_t length)
 {
-  Mutex::Locker l(lock);
+  std::lock_guard<std::mutex> l(lock);
   dout(1) << __func__ << " bdev " << id << " " << offset << "~" << length
 	  << dendl;
   assert(id < bdev.size());
@@ -84,6 +83,7 @@ void BlueFS::add_block_extent(unsigned id, uint64_t offset, uint64_t length)
 int BlueFS::reclaim_blocks(unsigned id, uint64_t want,
 			   uint64_t *offset, uint32_t *length)
 {
+  std::lock_guard<std::mutex> l(lock);
   dout(1) << __func__ << " bdev " << id << " want " << want << dendl;
   assert(id < alloc.size());
   int r = alloc[id]->reserve(want);
@@ -107,7 +107,7 @@ int BlueFS::reclaim_blocks(unsigned id, uint64_t want,
 
 uint64_t BlueFS::get_total(unsigned id)
 {
-  Mutex::Locker l(lock);
+  std::lock_guard<std::mutex> l(lock);
   assert(id < block_all.size());
   uint64_t r = 0;
   interval_set<uint64_t>& p = block_all[id];
@@ -119,14 +119,14 @@ uint64_t BlueFS::get_total(unsigned id)
 
 uint64_t BlueFS::get_free(unsigned id)
 {
-  Mutex::Locker l(lock);
+  std::lock_guard<std::mutex> l(lock);
   assert(id < alloc.size());
   return alloc[id]->get_free();
 }
 
 void BlueFS::get_usage(vector<pair<uint64_t,uint64_t>> *usage)
 {
-  Mutex::Locker l(lock);
+  std::lock_guard<std::mutex> l(lock);
   usage->resize(bdev.size());
   for (unsigned id = 0; id < bdev.size(); ++id) {
     uint64_t total = 0;
@@ -149,7 +149,7 @@ void BlueFS::get_usage(vector<pair<uint64_t,uint64_t>> *usage)
 
 int BlueFS::get_block_extents(unsigned id, interval_set<uint64_t> *extents)
 {
-  Mutex::Locker l(lock);
+  std::lock_guard<std::mutex> l(lock);
   dout(10) << __func__ << " bdev " << id << dendl;
   if (id >= block_all.size())
     return -EINVAL;
@@ -166,6 +166,8 @@ int BlueFS::mkfs(uuid_d osd_uuid)
 
   _init_alloc();
 
+  super.version = 1;
+  super.block_size = bdev[0]->get_block_size();
   super.osd_uuid = osd_uuid;
   super.uuid.generate_random();
   dout(1) << __func__ << " uuid " << super.uuid << dendl;
@@ -174,9 +176,10 @@ int BlueFS::mkfs(uuid_d osd_uuid)
   FileRef log_file = new File;
   log_file->fnode.ino = 1;
   log_file->fnode.prefer_bdev = bdev.size() - 1;
-  _allocate(log_file->fnode.prefer_bdev,
+  int r = _allocate(log_file->fnode.prefer_bdev,
 	    g_conf->bluefs_max_log_runway,
 	    &log_file->fnode.extents);
+  assert(r == 0);
   log_writer = new FileWriter(log_file, bdev.size());
 
   // initial txn
@@ -192,8 +195,6 @@ int BlueFS::mkfs(uuid_d osd_uuid)
   _flush_log();
 
   // write supers
-  super.version = 1;
-  super.block_size = bdev[0]->get_block_size();
   super.log_fnode = log_file->fnode;
   _write_super();
   _flush_bdev();
@@ -249,6 +250,7 @@ int BlueFS::mount()
   r = _replay();
   if (r < 0) {
     derr << __func__ << " failed to replay log: " << cpp_strerror(r) << dendl;
+    _stop_alloc();
     goto out;
   }
 
@@ -284,16 +286,14 @@ void BlueFS::umount()
   block_all.clear();
   _stop_alloc();
   file_map.clear();
-  for (auto& p : dir_map) {
-    delete p.second;
-  }
+  dir_map.clear();
   super = bluefs_super_t();
   log_t.clear();
 }
 
 int BlueFS::fsck()
 {
-  Mutex::Locker l(lock);
+  std::lock_guard<std::mutex> l(lock);
   dout(1) << __func__ << dendl;
   // hrm, i think we check everything on mount...
   return 0;
@@ -307,9 +307,7 @@ int BlueFS::_write_super()
   uint32_t crc = bl.crc32c(-1);
   ::encode(crc, bl);
   assert(bl.length() <= get_super_length());
-  bufferptr z(get_super_length() - bl.length());
-  z.zero();
-  bl.append(z);
+  bl.append_zero(get_super_length() - bl.length());
   bl.rebuild();
 
   IOContext ioc(NULL);
@@ -325,7 +323,7 @@ int BlueFS::_open_super()
 {
   dout(10) << __func__ << dendl;
 
-  bufferlist bl, t;
+  bufferlist bl;
   uint32_t expected_crc, crc;
   int r;
 
@@ -471,7 +469,7 @@ int BlueFS::_replay()
 	  ::decode(id, p);
 	  ::decode(offset, p);
 	  ::decode(length, p);
-	  dout(20) << __func__ << " " << pos << ":  op_alloc_add "
+	  dout(20) << __func__ << " " << pos << ":  op_alloc_rm "
 		   << " " << (int)id << ":" << offset << "~" << length << dendl;
 	  block_all[id].erase(offset, length);
 	  alloc[id]->init_rm_free(offset, length);
@@ -490,7 +488,7 @@ int BlueFS::_replay()
 		   << dendl;
 	  FileRef file = _get_file(ino);
 	  assert(file->fnode.ino);
-	  map<string,Dir*>::iterator q = dir_map.find(dirname);
+	  map<string,DirRef>::iterator q = dir_map.find(dirname);
 	  assert(q != dir_map.end());
 	  map<string,FileRef>::iterator r = q->second->file_map.find(filename);
 	  assert(r == q->second->file_map.end());
@@ -506,7 +504,7 @@ int BlueFS::_replay()
 	  ::decode(filename, p);
 	  dout(20) << __func__ << " " << pos << ":  op_dir_unlink "
 		   << " " << dirname << "/" << filename << dendl;
-	  map<string,Dir*>::iterator q = dir_map.find(dirname);
+	  map<string,DirRef>::iterator q = dir_map.find(dirname);
 	  assert(q != dir_map.end());
 	  map<string,FileRef>::iterator r = q->second->file_map.find(filename);
 	  assert(r != q->second->file_map.end());
@@ -521,7 +519,7 @@ int BlueFS::_replay()
 	  ::decode(dirname, p);
 	  dout(20) << __func__ << " " << pos << ":  op_dir_create " << dirname
 		   << dendl;
-	  map<string,Dir*>::iterator q = dir_map.find(dirname);
+	  map<string,DirRef>::iterator q = dir_map.find(dirname);
 	  assert(q == dir_map.end());
 	  dir_map[dirname] = new Dir;
 	}
@@ -533,10 +531,9 @@ int BlueFS::_replay()
 	  ::decode(dirname, p);
 	  dout(20) << __func__ << " " << pos << ":  op_dir_remove " << dirname
 		   << dendl;
-	  map<string,Dir*>::iterator q = dir_map.find(dirname);
+	  map<string,DirRef>::iterator q = dir_map.find(dirname);
 	  assert(q != dir_map.end());
 	  assert(q->second->file_map.empty());
-	  delete q->second;
 	  dir_map.erase(q);
 	}
 	break;
@@ -620,7 +617,7 @@ void BlueFS::_drop_link(FileRef file)
   --file->refs;
   if (file->refs == 0) {
     dout(20) << __func__ << " destroying " << file->fnode << dendl;
-    assert(file->num_reading.read() == 0);
+    assert(file->num_reading.load() == 0);
     log_t.op_file_remove(file->fnode.ino);
     for (auto& r : file->fnode.extents) {
       alloc[r.bdev]->release(r.offset, r.length);
@@ -643,7 +640,7 @@ int BlueFS::_read_random(
   dout(10) << __func__ << " h " << h << " " << off << "~" << len
 	   << " from " << h->file->fnode << dendl;
 
-  h->file->num_reading.inc();
+  ++h->file->num_reading;
 
   if (!h->ignore_eof &&
       off + len > h->file->fnode.size) {
@@ -675,7 +672,7 @@ int BlueFS::_read_random(
   }
 
   dout(20) << __func__ << " got " << ret << dendl;
-  h->file->num_reading.dec();
+  --h->file->num_reading;
   return ret;
 }
 
@@ -690,7 +687,7 @@ int BlueFS::_read(
   dout(10) << __func__ << " h " << h << " " << off << "~" << len
 	   << " from " << h->file->fnode << dendl;
 
-  h->file->num_reading.inc();
+  ++h->file->num_reading;
 
   if (!h->ignore_eof &&
       off + len > h->file->fnode.size) {
@@ -706,7 +703,7 @@ int BlueFS::_read(
 
   int ret = 0;
   while (len > 0) {
-    int left;
+    size_t left;
     if (off < buf->bl_off || off >= buf->get_buf_end()) {
       buf->bl.clear();
       buf->bl_off = off & super.block_mask();
@@ -731,7 +728,7 @@ int BlueFS::_read(
     left = buf->get_buf_remaining(off);
     dout(20) << __func__ << " left " << left << " len " << len << dendl;
 
-    int r = MIN((int)len, left);
+    int r = MIN(len, left);
     if (outbl) {
       bufferlist t;
       t.substr_of(buf->bl, off - buf->bl_off, r);
@@ -757,7 +754,7 @@ int BlueFS::_read(
 
   dout(20) << __func__ << " got " << ret << dendl;
   assert(!outbl || (int)outbl->length() == ret);
-  h->file->num_reading.dec();
+  --h->file->num_reading;
   return ret;
 }
 
@@ -875,6 +872,7 @@ void BlueFS::_compact_log()
   log_writer->append(bl);
   int r = _flush(log_writer, true);
   assert(r == 0);
+  _flush_wait(log_writer);
 
   dout(10) << __func__ << " writing super" << dendl;
   super.log_fnode = log_file->fnode;
@@ -892,12 +890,9 @@ void BlueFS::_pad_bl(bufferlist& bl)
 {
   uint64_t partial = bl.length() % super.block_size;
   if (partial) {
-    bufferptr z(super.block_size - partial);
-    dout(10) << __func__ << " padding with " << z.length() << " zeros" << dendl;
-    z.zero();
-    bufferlist zbl;
-    zbl.append(z);
-    bl.append(z);
+    dout(10) << __func__ << " padding with " << super.block_size - partial
+	     << " zeros" << dendl;
+    bl.append_zero(super.block_size - partial);
   }
 }
 
@@ -955,7 +950,7 @@ int BlueFS::_flush_range(FileWriter *h, uint64_t offset, uint64_t length)
 	   << " " << offset << "~" << length
 	   << " to " << h->file->fnode << dendl;
   assert(!h->file->deleted);
-  assert(h->file->num_readers.read() == 0);
+  assert(h->file->num_readers.load() == 0);
 
   if (offset + length <= h->pos)
     return 0;
@@ -1041,9 +1036,7 @@ int BlueFS::_flush_range(FileWriter *h, uint64_t offset, uint64_t length)
       dout(20) << __func__ << " caching tail of " << tail
 	       << " and padding block with zeros" << dendl;
       h->tail_block.substr_of(bl, bl.length() - tail, tail);
-      bufferptr z(super.block_size - tail);
-      z.zero();
-      t.append(z);
+      t.append_zero(super.block_size - tail);
     }
     bdev[p->bdev]->aio_write(p->offset + x_off, t, h->iocv[p->bdev], true);
     bloff += x_len;
@@ -1052,7 +1045,7 @@ int BlueFS::_flush_range(FileWriter *h, uint64_t offset, uint64_t length)
     x_off = 0;
   }
   for (unsigned i = 0; i < bdev.size(); ++i) {
-    if (!h->iocv[i]->pending_aios.empty()) {
+    if (h->iocv[i]->num_pending.read()) {
       bdev[i]->aio_submit(h->iocv[i]);
     }
   }
@@ -1207,7 +1200,7 @@ int BlueFS::_preallocate(FileRef f, uint64_t off, uint64_t len)
 
 void BlueFS::sync_metadata()
 {
-  Mutex::Locker l(lock);
+  std::lock_guard<std::mutex> l(lock);
   if (log_t.empty()) {
     dout(10) << __func__ << " - no pending log events" << dendl;
     return;
@@ -1221,10 +1214,10 @@ void BlueFS::sync_metadata()
   for (auto p : alloc) {
     p->commit_finish();
   }
+  _maybe_compact_log();
   utime_t end = ceph_clock_now(NULL);
   utime_t dur = end - start;
   dout(10) << __func__ << " done in " << dur << dendl;
-  _maybe_compact_log();
 }
 
 int BlueFS::open_for_write(
@@ -1233,10 +1226,10 @@ int BlueFS::open_for_write(
   FileWriter **h,
   bool overwrite)
 {
-  Mutex::Locker l(lock);
+  std::lock_guard<std::mutex> l(lock);
   dout(10) << __func__ << " " << dirname << "/" << filename << dendl;
-  map<string,Dir*>::iterator p = dir_map.find(dirname);
-  Dir *dir;
+  map<string,DirRef>::iterator p = dir_map.find(dirname);
+  DirRef dir;
   if (p == dir_map.end()) {
     // implicitly create the dir
     dout(20) << __func__ << "  dir " << dirname
@@ -1247,6 +1240,7 @@ int BlueFS::open_for_write(
   }
 
   FileRef file;
+  bool create = false;
   map<string,FileRef>::iterator q = dir->file_map.find(filename);
   if (q == dir->file_map.end()) {
     if (overwrite) {
@@ -1261,8 +1255,7 @@ int BlueFS::open_for_write(
     file_map[ino_last] = file;
     dir->file_map[filename] = file;
     ++file->refs;
-    log_t.op_file_update(file->fnode);
-    log_t.op_dir_link(dirname, filename, file->fnode.ino);
+    create = true;
   } else {
     // overwrite existing file?
     file = q->second;
@@ -1275,9 +1268,12 @@ int BlueFS::open_for_write(
 	       << ") file " << filename
 	       << " already exists, truncate + overwrite" << dendl;
       file->fnode.size = 0;
+      for (auto& p : file->fnode.extents) {
+        alloc[p.bdev]->release(p.offset, p.length);
+      }
+      file->fnode.extents.clear();
     }
     file->fnode.mtime = ceph_clock_now(NULL);
-    log_t.op_file_update(file->fnode);
   }
 
   if (dirname.length() > 5) {
@@ -1298,6 +1294,10 @@ int BlueFS::open_for_write(
     }
   }
 
+  log_t.op_file_update(file->fnode);
+  if (create)
+    log_t.op_dir_link(dirname, filename, file->fnode.ino);
+
   *h = new FileWriter(file, bdev.size());
   dout(10) << __func__ << " h " << *h << " on " << file->fnode << dendl;
   return 0;
@@ -1319,15 +1319,15 @@ int BlueFS::open_for_read(
   FileReader **h,
   bool random)
 {
-  Mutex::Locker l(lock);
+  std::lock_guard<std::mutex> l(lock);
   dout(10) << __func__ << " " << dirname << "/" << filename
 	   << (random ? " (random)":" (sequential)") << dendl;
-  map<string,Dir*>::iterator p = dir_map.find(dirname);
+  map<string,DirRef>::iterator p = dir_map.find(dirname);
   if (p == dir_map.end()) {
     dout(20) << __func__ << " dir " << dirname << " not found" << dendl;
     return -ENOENT;
   }
-  Dir *dir = p->second;
+  DirRef dir = p->second;
 
   map<string,FileRef>::iterator q = dir->file_map.find(filename);
   if (q == dir->file_map.end()) {
@@ -1348,15 +1348,15 @@ int BlueFS::rename(
   const string& old_dirname, const string& old_filename,
   const string& new_dirname, const string& new_filename)
 {
-  Mutex::Locker l(lock);
+  std::lock_guard<std::mutex> l(lock);
   dout(10) << __func__ << " " << old_dirname << "/" << old_filename
 	   << " -> " << new_dirname << "/" << new_filename << dendl;
-  map<string,Dir*>::iterator p = dir_map.find(old_dirname);
+  map<string,DirRef>::iterator p = dir_map.find(old_dirname);
   if (p == dir_map.end()) {
     dout(20) << __func__ << " dir " << old_dirname << " not found" << dendl;
     return -ENOENT;
   }
-  Dir *old_dir = p->second;
+  DirRef old_dir = p->second;
   map<string,FileRef>::iterator q = old_dir->file_map.find(old_filename);
   if (q == old_dir->file_map.end()) {
     dout(20) << __func__ << " dir " << old_dirname << " (" << old_dir
@@ -1371,7 +1371,7 @@ int BlueFS::rename(
     dout(20) << __func__ << " dir " << new_dirname << " not found" << dendl;
     return -ENOENT;
   }
-  Dir *new_dir = p->second;
+  DirRef new_dir = p->second;
   q = new_dir->file_map.find(new_filename);
   if (q != new_dir->file_map.end()) {
     dout(20) << __func__ << " dir " << new_dirname << " (" << old_dir
@@ -1395,9 +1395,9 @@ int BlueFS::rename(
 
 int BlueFS::mkdir(const string& dirname)
 {
-  Mutex::Locker l(lock);
+  std::lock_guard<std::mutex> l(lock);
   dout(10) << __func__ << " " << dirname << dendl;
-  map<string,Dir*>::iterator p = dir_map.find(dirname);
+  map<string,DirRef>::iterator p = dir_map.find(dirname);
   if (p != dir_map.end()) {
     dout(20) << __func__ << " dir " << dirname << " exists" << dendl;
     return -EEXIST;
@@ -1409,14 +1409,14 @@ int BlueFS::mkdir(const string& dirname)
 
 int BlueFS::rmdir(const string& dirname)
 {
-  Mutex::Locker l(lock);
+  std::lock_guard<std::mutex> l(lock);
   dout(10) << __func__ << " " << dirname << dendl;
-  map<string,Dir*>::iterator p = dir_map.find(dirname);
+  map<string,DirRef>::iterator p = dir_map.find(dirname);
   if (p == dir_map.end()) {
     dout(20) << __func__ << " dir " << dirname << " does not exist" << dendl;
     return -ENOENT;
   }
-  Dir *dir = p->second;
+  DirRef dir = p->second;
   if (!dir->file_map.empty()) {
     dout(20) << __func__ << " dir " << dirname << " not empty" << dendl;
     return -ENOTEMPTY;
@@ -1428,8 +1428,8 @@ int BlueFS::rmdir(const string& dirname)
 
 bool BlueFS::dir_exists(const string& dirname)
 {
-  Mutex::Locker l(lock);
-  map<string,Dir*>::iterator p = dir_map.find(dirname);
+  std::lock_guard<std::mutex> l(lock);
+  map<string,DirRef>::iterator p = dir_map.find(dirname);
   bool exists = p != dir_map.end();
   dout(10) << __func__ << " " << dirname << " = " << (int)exists << dendl;
   return exists;
@@ -1438,14 +1438,14 @@ bool BlueFS::dir_exists(const string& dirname)
 int BlueFS::stat(const string& dirname, const string& filename,
 		 uint64_t *size, utime_t *mtime)
 {
-  Mutex::Locker l(lock);
+  std::lock_guard<std::mutex> l(lock);
   dout(10) << __func__ << " " << dirname << "/" << filename << dendl;
-  map<string,Dir*>::iterator p = dir_map.find(dirname);
+  map<string,DirRef>::iterator p = dir_map.find(dirname);
   if (p == dir_map.end()) {
     dout(20) << __func__ << " dir " << dirname << " not found" << dendl;
     return -ENOENT;
   }
-  Dir *dir = p->second;
+  DirRef dir = p->second;
   map<string,FileRef>::iterator q = dir->file_map.find(filename);
   if (q == dir->file_map.end()) {
     dout(20) << __func__ << " dir " << dirname << " (" << dir
@@ -1466,14 +1466,14 @@ int BlueFS::stat(const string& dirname, const string& filename,
 int BlueFS::lock_file(const string& dirname, const string& filename,
 		      FileLock **plock)
 {
-  Mutex::Locker l(lock);
+  std::lock_guard<std::mutex> l(lock);
   dout(10) << __func__ << " " << dirname << "/" << filename << dendl;
-  map<string,Dir*>::iterator p = dir_map.find(dirname);
+  map<string,DirRef>::iterator p = dir_map.find(dirname);
   if (p == dir_map.end()) {
     dout(20) << __func__ << " dir " << dirname << " not found" << dendl;
     return -ENOENT;
   }
-  Dir *dir = p->second;
+  DirRef dir = p->second;
   map<string,FileRef>::iterator q = dir->file_map.find(filename);
   File *file;
   if (q == dir->file_map.end()) {
@@ -1504,7 +1504,7 @@ int BlueFS::lock_file(const string& dirname, const string& filename,
 
 int BlueFS::unlock_file(FileLock *fl)
 {
-  Mutex::Locker l(lock);
+  std::lock_guard<std::mutex> l(lock);
   dout(10) << __func__ << " " << fl << " on " << fl->file->fnode << dendl;
   assert(fl->file->locked);
   fl->file->locked = false;
@@ -1514,7 +1514,7 @@ int BlueFS::unlock_file(FileLock *fl)
 
 int BlueFS::readdir(const string& dirname, vector<string> *ls)
 {
-  Mutex::Locker l(lock);
+  std::lock_guard<std::mutex> l(lock);
   dout(10) << __func__ << " " << dirname << dendl;
   if (dirname.size() == 0) {
     // list dirs
@@ -1524,12 +1524,12 @@ int BlueFS::readdir(const string& dirname, vector<string> *ls)
     }
   } else {
     // list files in dir
-    map<string,Dir*>::iterator p = dir_map.find(dirname);
+    map<string,DirRef>::iterator p = dir_map.find(dirname);
     if (p == dir_map.end()) {
       dout(20) << __func__ << " dir " << dirname << " not found" << dendl;
       return -ENOENT;
     }
-    Dir *dir = p->second;
+    DirRef dir = p->second;
     ls->reserve(dir->file_map.size() + 2);
     for (auto& q : dir->file_map) {
       ls->push_back(q.first);
@@ -1542,14 +1542,14 @@ int BlueFS::readdir(const string& dirname, vector<string> *ls)
 
 int BlueFS::unlink(const string& dirname, const string& filename)
 {
-  Mutex::Locker l(lock);
+  std::lock_guard<std::mutex> l(lock);
   dout(10) << __func__ << " " << dirname << "/" << filename << dendl;
-  map<string,Dir*>::iterator p = dir_map.find(dirname);
+  map<string,DirRef>::iterator p = dir_map.find(dirname);
   if (p == dir_map.end()) {
     dout(20) << __func__ << " dir " << dirname << " not found" << dendl;
     return -ENOENT;
   }
-  Dir *dir = p->second;
+  DirRef dir = p->second;
   map<string,FileRef>::iterator q = dir->file_map.find(filename);
   if (q == dir->file_map.end()) {
     dout(20) << __func__ << " file " << dirname << "/" << filename
diff --git a/src/os/bluestore/BlueFS.h b/src/os/bluestore/BlueFS.h
index be1f88d..b665bb7 100644
--- a/src/os/bluestore/BlueFS.h
+++ b/src/os/bluestore/BlueFS.h
@@ -3,9 +3,10 @@
 #ifndef CEPH_OS_BLUESTORE_BLUEFS_H
 #define CEPH_OS_BLUESTORE_BLUEFS_H
 
+#include <atomic>
+#include <mutex>
+
 #include "bluefs_types.h"
-#include "common/Mutex.h"
-#include "common/Cond.h"
 #include "common/RefCountedObj.h"
 #include "BlockDevice.h"
 
@@ -24,20 +25,23 @@ public:
     bool deleted;
     boost::intrusive::list_member_hook<> dirty_item;
 
-    atomic_t num_readers, num_writers;
-    atomic_t num_reading;
+    std::atomic_int num_readers, num_writers;
+    std::atomic_int num_reading;
 
     File()
       : RefCountedObject(NULL, 0),
 	refs(0),
 	dirty(false),
 	locked(false),
-	deleted(false)
+	deleted(false),
+	num_readers(0),
+	num_writers(0),
+	num_reading(0)
       {}
     ~File() {
-      assert(num_readers.read() == 0);
-      assert(num_writers.read() == 0);
-      assert(num_reading.read() == 0);
+      assert(num_readers.load() == 0);
+      assert(num_writers.load() == 0);
+      assert(num_reading.load() == 0);
       assert(!locked);
     }
 
@@ -57,9 +61,17 @@ public:
 	boost::intrusive::list_member_hook<>,
 	&File::dirty_item> > dirty_file_list_t;
 
-  struct Dir {
+  struct Dir : public RefCountedObject {
     map<string,FileRef> file_map;
+
+    friend void intrusive_ptr_add_ref(Dir *d) {
+      d->get();
+    }
+    friend void intrusive_ptr_release(Dir *d) {
+      d->put();
+    }
   };
+  typedef boost::intrusive_ptr<Dir> DirRef;
 
   struct FileWriter {
     FileRef file;
@@ -67,21 +79,20 @@ public:
     bufferlist buffer;      ///< new data to write (at end of file)
     bufferlist tail_block;  ///< existing partial block at end of file, if any
 
-    Mutex lock;
+    std::mutex lock;
     vector<IOContext*> iocv;  ///< one for each bdev
 
     FileWriter(FileRef f, unsigned num_bdev)
       : file(f),
-	pos(0),
-	lock("BlueFS::FileWriter::lock") {
-      file->num_writers.inc();
+	pos(0) {
+      ++file->num_writers;
       iocv.resize(num_bdev);
       for (unsigned i = 0; i < num_bdev; ++i) {
 	iocv[i] = new IOContext(NULL);
       }
     }
     ~FileWriter() {
-      file->num_writers.dec();
+      --file->num_writers;
       assert(iocv.empty());  // caller must call BlueFS::close_writer()
     }
 
@@ -102,7 +113,7 @@ public:
     uint64_t pos;           ///< current logical offset
     uint64_t max_prefetch;  ///< max allowed prefetch
 
-    FileReaderBuffer(uint64_t mpf)
+    explicit FileReaderBuffer(uint64_t mpf)
       : bl_off(0),
 	pos(0),
 	max_prefetch(mpf) {}
@@ -135,24 +146,23 @@ public:
 	buf(mpf),
 	random(rand),
 	ignore_eof(ie) {
-      file->num_readers.inc();
+      ++file->num_readers;
     }
     ~FileReader() {
-      file->num_readers.dec();
+      --file->num_readers;
     }
   };
 
   struct FileLock {
     FileRef file;
-    FileLock(FileRef f) : file(f) {}
+    explicit FileLock(FileRef f) : file(f) {}
   };
 
 private:
-  Mutex lock;
-  Cond cond;
+  std::mutex lock;
 
   // cache
-  map<string, Dir*> dir_map;                      ///< dirname -> Dir
+  map<string, DirRef> dir_map;                    ///< dirname -> Dir
   ceph::unordered_map<uint64_t,FileRef> file_map; ///< ino -> File
   dirty_file_list_t dirty_files;                  ///< list of dirty files
 
@@ -269,7 +279,7 @@ public:
     bool random = false);
 
   void close_writer(FileWriter *h) {
-    Mutex::Locker l(lock);
+    std::lock_guard<std::mutex> l(lock);
     _close_writer(h);
   }
 
@@ -308,15 +318,15 @@ public:
 		     uint64_t *offset, uint32_t *length);
 
   void flush(FileWriter *h) {
-    Mutex::Locker l(lock);
+    std::lock_guard<std::mutex> l(lock);
     _flush(h, false);
   }
   void flush_range(FileWriter *h, uint64_t offset, uint64_t length) {
-    Mutex::Locker l(lock);
+    std::lock_guard<std::mutex> l(lock);
     _flush_range(h, offset, length);
   }
   void fsync(FileWriter *h) {
-    Mutex::Locker l(lock);
+    std::lock_guard<std::mutex> l(lock);
     _fsync(h);
   }
   int read(FileReader *h, FileReaderBuffer *buf, uint64_t offset, size_t len,
@@ -334,15 +344,15 @@ public:
     return _read_random(h, offset, len, out);
   }
   void invalidate_cache(FileRef f, uint64_t offset, uint64_t len) {
-    Mutex::Locker l(lock);
+    std::lock_guard<std::mutex> l(lock);
     _invalidate_cache(f, offset, len);
   }
   int preallocate(FileRef f, uint64_t offset, uint64_t len) {
-    Mutex::Locker l(lock);
+    std::lock_guard<std::mutex> l(lock);
     return _preallocate(f, offset, len);
   }
   int truncate(FileWriter *h, uint64_t offset) {
-    Mutex::Locker l(lock);
+    std::lock_guard<std::mutex> l(lock);
     return _truncate(h, offset);
   }
 
diff --git a/src/os/bluestore/BlueRocksEnv.cc b/src/os/bluestore/BlueRocksEnv.cc
index 252bdbd..a6471ea 100644
--- a/src/os/bluestore/BlueRocksEnv.cc
+++ b/src/os/bluestore/BlueRocksEnv.cc
@@ -255,6 +255,7 @@ class BlueRocksWritableFile : public rocksdb::WritableFile {
     return rocksdb::Status::OK();
   }
 
+  using rocksdb::WritableFile::RangeSync;
   // Sync a file range with disk.
   // offset is the starting byte of the file range to be synchronized.
   // nbytes specifies the length of the range to be synchronized.
@@ -273,6 +274,7 @@ class BlueRocksWritableFile : public rocksdb::WritableFile {
   }
 
  protected:
+  using rocksdb::WritableFile::Allocate;
   /*
    * Pre-allocate space for a file.
    */
@@ -288,7 +290,7 @@ class BlueRocksWritableFile : public rocksdb::WritableFile {
 class BlueRocksDirectory : public rocksdb::Directory {
   BlueFS *fs;
  public:
-  BlueRocksDirectory(BlueFS *f) : fs(f) {}
+  explicit BlueRocksDirectory(BlueFS *f) : fs(f) {}
 
   // Fsync directory. Can be called concurrently from multiple threads.
   rocksdb::Status Fsync() {
diff --git a/src/os/bluestore/BlueRocksEnv.h b/src/os/bluestore/BlueRocksEnv.h
index b67a2db..a44aa9c 100644
--- a/src/os/bluestore/BlueRocksEnv.h
+++ b/src/os/bluestore/BlueRocksEnv.h
@@ -150,7 +150,7 @@ public:
   rocksdb::Status GetAbsolutePath(const std::string& db_path,
       std::string* output_path);
 
-  BlueRocksEnv(BlueFS *f);
+  explicit BlueRocksEnv(BlueFS *f);
 private:
   BlueFS *fs;
 };
diff --git a/src/os/bluestore/BlueStore.cc b/src/os/bluestore/BlueStore.cc
index 304a02a..2643b88 100644
--- a/src/os/bluestore/BlueStore.cc
+++ b/src/os/bluestore/BlueStore.cc
@@ -72,6 +72,7 @@ const string PREFIX_ALLOC = "B";   // u64 offset -> u64 length (freelist)
 // for bluefs, label (4k) + bluefs super (4k), means we start at 8k.
 #define BLUEFS_START  8192
 
+
 /*
  * object name key structure
  *
@@ -97,11 +98,11 @@ const string PREFIX_ALLOC = "B";   // u64 offset -> u64 length (freelist)
  * string encoding in the key
  *
  * The key string needs to lexicographically sort the same way that
- * ghobject_t does.  We do this by escaping anything <= to '%' with %
+ * ghobject_t does.  We do this by escaping anything <= to '#' with #
  * plus a 2 digit hex string, and anything >= '~' with ~ plus the two
  * hex digits.
  *
- * We use ! as a terminator for strings; this works because it is < %
+ * We use ! as a terminator for strings; this works because it is < #
  * and will get escaped if it is present in the string.
  *
  */
@@ -454,7 +455,7 @@ static void get_wal_key(uint64_t seq, string *out)
 
 void BlueStore::Enode::put()
 {
-  int final = nref.dec();
+  int final = --nref;
   if (final == 0) {
     dout(20) << __func__ << " removing self from set " << enode_set << dendl;
     enode_set->uset.erase(*this);
@@ -467,21 +468,12 @@ void BlueStore::Enode::put()
 #undef dout_prefix
 #define dout_prefix *_dout << "bluestore.onode(" << this << ") "
 
-BlueStore::Onode::Onode(const ghobject_t& o, const string& k)
-  : nref(0),
-    oid(o),
-    key(k),
-    dirty(false),
-    exists(true),
-    flush_lock("BlueStore::Onode::flush_lock") {
-}
-
 void BlueStore::Onode::flush()
 {
-  Mutex::Locker l(flush_lock);
+  std::unique_lock<std::mutex> l(flush_lock);
   dout(20) << __func__ << " " << flush_txns << dendl;
   while (!flush_txns.empty())
-    flush_cond.Wait(flush_lock);
+    flush_cond.wait(l);
   dout(20) << __func__ << " done" << dendl;
 }
 
@@ -499,16 +491,16 @@ void BlueStore::OnodeHashLRU::_touch(OnodeRef o)
 
 void BlueStore::OnodeHashLRU::add(const ghobject_t& oid, OnodeRef o)
 {
-  Mutex::Locker l(lock);
+  std::lock_guard<std::mutex> l(lock);
   dout(30) << __func__ << " " << oid << " " << o << dendl;
   assert(onode_map.count(oid) == 0);
   onode_map[oid] = o;
-  lru.push_back(*o);
+  lru.push_front(*o);
 }
 
 BlueStore::OnodeRef BlueStore::OnodeHashLRU::lookup(const ghobject_t& oid)
 {
-  Mutex::Locker l(lock);
+  std::lock_guard<std::mutex> l(lock);
   dout(30) << __func__ << dendl;
   ceph::unordered_map<ghobject_t,OnodeRef>::iterator p = onode_map.find(oid);
   if (p == onode_map.end()) {
@@ -522,7 +514,7 @@ BlueStore::OnodeRef BlueStore::OnodeHashLRU::lookup(const ghobject_t& oid)
 
 void BlueStore::OnodeHashLRU::clear()
 {
-  Mutex::Locker l(lock);
+  std::lock_guard<std::mutex> l(lock);
   dout(10) << __func__ << dendl;
   lru.clear();
   onode_map.clear();
@@ -531,7 +523,7 @@ void BlueStore::OnodeHashLRU::clear()
 void BlueStore::OnodeHashLRU::rename(const ghobject_t& old_oid,
 				    const ghobject_t& new_oid)
 {
-  Mutex::Locker l(lock);
+  std::lock_guard<std::mutex> l(lock);
   dout(30) << __func__ << " " << old_oid << " -> " << new_oid << dendl;
   ceph::unordered_map<ghobject_t,OnodeRef>::iterator po, pn;
   po = onode_map.find(old_oid);
@@ -549,7 +541,6 @@ void BlueStore::OnodeHashLRU::rename(const ghobject_t& old_oid,
 
   // install a non-existent onode at old location
   po->second.reset(new Onode(old_oid, o->key));
-  po->second->exists = false;
   lru.push_back(*po->second);
 
   // add at new position and fix oid, key
@@ -563,7 +554,7 @@ bool BlueStore::OnodeHashLRU::get_next(
   const ghobject_t& after,
   pair<ghobject_t,OnodeRef> *next)
 {
-  Mutex::Locker l(lock);
+  std::lock_guard<std::mutex> l(lock);
   dout(20) << __func__ << " after " << after << dendl;
 
   if (after == ghobject_t()) {
@@ -591,17 +582,20 @@ bool BlueStore::OnodeHashLRU::get_next(
 
 int BlueStore::OnodeHashLRU::trim(int max)
 {
-  Mutex::Locker l(lock);
+  std::lock_guard<std::mutex> l(lock);
   dout(20) << __func__ << " max " << max
 	   << " size " << onode_map.size() << dendl;
   int trimmed = 0;
   int num = onode_map.size() - max;
+  if (onode_map.size() == 0 || num <= 0)
+    return 0; // don't even try
+  
   lru_list_t::iterator p = lru.end();
   if (num)
     --p;
   while (num > 0) {
     Onode *o = &*p;
-    int refs = o->nref.read();
+    int refs = o->nref.load();
     if (refs > 1) {
       dout(20) << __func__ << "  " << o->oid << " has " << refs
 	       << " refs; stopping with " << num << " left to trim" << dendl;
@@ -633,7 +627,8 @@ int BlueStore::OnodeHashLRU::trim(int max)
 BlueStore::Collection::Collection(BlueStore *ns, coll_t c)
   : store(ns),
     cid(c),
-    lock("BlueStore::Collection::lock"),
+    lock("BlueStore::Collection::lock", true, false),
+    exists(true),
     onode_map(),
     enode_set(g_conf->bluestore_onode_map_size)
 {
@@ -715,11 +710,12 @@ BlueStore::OnodeRef BlueStore::Collection::get_onode(
     on = new Onode(oid, key);
     on->dirty = true;
     if (g_conf->bluestore_debug_misc && !create)
-      on->exists = on->dirty = false;
+      on->dirty = false;
   } else {
     // loaded
     assert(r >=0);
     on = new Onode(oid, key);
+    on->exists = true;
     bufferlist::iterator p = v.begin();
     ::decode(on->onode, p);
   }
@@ -756,7 +752,7 @@ BlueStore::BlueStore(CephContext *cct, const string& path)
     fsid_fd(-1),
     mounted(false),
     coll_lock("BlueStore::coll_lock"),
-    nid_lock("BlueStore::nid_lock"),
+    nid_last(0),
     nid_max(0),
     throttle_ops(cct, "bluestore_max_ops", cct->_conf->bluestore_max_ops),
     throttle_bytes(cct, "bluestore_max_bytes", cct->_conf->bluestore_max_bytes),
@@ -766,7 +762,6 @@ BlueStore::BlueStore(CephContext *cct, const string& path)
     throttle_wal_bytes(cct, "bluestore_wal_max_bytes",
 		       cct->_conf->bluestore_max_bytes +
 		       cct->_conf->bluestore_wal_max_bytes),
-    wal_lock("BlueStore::wal_lock"),
     wal_seq(0),
     wal_tp(cct,
 	   "BlueStore::wal_tp",
@@ -779,10 +774,8 @@ BlueStore::BlueStore(CephContext *cct, const string& path)
 	     &wal_tp),
     finisher(cct),
     kv_sync_thread(this),
-    kv_lock("BlueStore::kv_lock"),
     kv_stop(false),
-    logger(NULL),
-    reap_lock("BlueStore::reap_lock")
+    logger(NULL)
 {
   _init_logger();
 }
@@ -798,12 +791,29 @@ BlueStore::~BlueStore()
 
 void BlueStore::_init_logger()
 {
-  // XXX
+  PerfCountersBuilder b(g_ceph_context, "BlueStore",
+                        l_bluestore_first, l_bluestore_last);
+  b.add_time_avg(l_bluestore_state_prepare_lat, "state_prepare_lat", "Average prepare state latency");
+  b.add_time_avg(l_bluestore_state_aio_wait_lat, "state_aio_wait_lat", "Average aio_wait state latency");
+  b.add_time_avg(l_bluestore_state_io_done_lat, "state_io_done_lat", "Average io_done state latency");
+  b.add_time_avg(l_bluestore_state_kv_queued_lat, "state_kv_queued_lat", "Average kv_queued state latency");
+  b.add_time_avg(l_bluestore_state_kv_committing_lat, "state_kv_commiting_lat", "Average kv_commiting state latency");
+  b.add_time_avg(l_bluestore_state_kv_done_lat, "state_kv_done_lat", "Average kv_done state latency");
+  b.add_time_avg(l_bluestore_state_wal_queued_lat, "state_wal_queued_lat", "Average wal_queued state latency");
+  b.add_time_avg(l_bluestore_state_wal_applying_lat, "state_wal_applying_lat", "Average wal_applying state latency");
+  b.add_time_avg(l_bluestore_state_wal_aio_wait_lat, "state_wal_aio_wait_lat", "Average aio_wait state latency");
+  b.add_time_avg(l_bluestore_state_wal_cleanup_lat, "state_wal_cleanup_lat", "Average cleanup state latency");
+  b.add_time_avg(l_bluestore_state_wal_done_lat, "state_wal_done_lat", "Average wal_done state latency");
+  b.add_time_avg(l_bluestore_state_finishing_lat, "state_finishing_lat", "Average finishing state latency");
+  b.add_time_avg(l_bluestore_state_done_lat, "state_done_lat", "Average done state latency");
+  logger = b.create_perf_counters();
+  g_ceph_context->get_perfcounters_collection()->add(logger);
 }
 
 void BlueStore::_shutdown_logger()
 {
-  // XXX
+  g_ceph_context->get_perfcounters_collection()->remove(logger);
+  delete logger;
 }
 
 int BlueStore::get_block_device_fsid(const string& path, uuid_d *fsid)
@@ -827,7 +837,7 @@ int BlueStore::_open_path()
     return r;
   }
   assert(fs == NULL);
-  fs = FS::create(path_fd);
+  fs = FS::create_by_fd(path_fd);
   dout(1) << __func__ << " using fs driver '" << fs->get_name() << "'" << dendl;
   return 0;
 }
@@ -850,11 +860,11 @@ int BlueStore::_write_bdev_label(string path, bluestore_bdev_label_t label)
   assert(bl.length() <= BDEV_LABEL_BLOCK_SIZE);
   bufferptr z(BDEV_LABEL_BLOCK_SIZE - bl.length());
   z.zero();
-  bl.append(z);
+  bl.append(std::move(z));
 
   int fd = ::open(path.c_str(), O_WRONLY);
   if (fd < 0) {
-    fd = errno;
+    fd = -errno;
     derr << __func__ << " failed to open " << path << ": " << cpp_strerror(fd)
 	 << dendl;
     return fd;
@@ -940,15 +950,17 @@ int BlueStore::_open_bdev(bool create)
 {
   bluestore_bdev_label_t label;
   assert(bdev == NULL);
-  bdev = new BlockDevice(aio_cb, static_cast<void*>(this));
   string p = path + "/block";
+  bdev = BlockDevice::create(p, aio_cb, static_cast<void*>(this));
   int r = bdev->open(p);
   if (r < 0)
     goto fail;
 
-  r = _check_or_set_bdev_label(p, bdev->get_size(), "main", create);
-  if (r < 0)
-    goto fail_close;
+  if (bdev->supported_bdev_label()) {
+    r = _check_or_set_bdev_label(p, bdev->get_size(), "main", create);
+    if (r < 0)
+      goto fail_close;
+  }
   return 0;
 
  fail_close:
@@ -1124,7 +1136,7 @@ int BlueStore::_open_db(bool create)
   } else {
     r = read_meta("kv_backend", &kv_backend);
     if (r < 0) {
-      derr << __func__ << " uanble to read 'kv_backend' meta" << dendl;
+      derr << __func__ << " unable to read 'kv_backend' meta" << dendl;
       return -EIO;
     }
   }
@@ -1537,10 +1549,15 @@ int BlueStore::_open_collections(int *errors)
     coll_t cid;
     if (cid.parse(it->key())) {
       CollectionRef c(new Collection(this, cid));
-      bufferlist bl;
-      db->get(PREFIX_COLL, it->key(), &bl);
+      bufferlist bl = it->value();
       bufferlist::iterator p = bl.begin();
-      ::decode(c->cnode, p);
+      try {
+        ::decode(c->cnode, p);
+      } catch (buffer::error& e) {
+        derr << __func__ << " failed to decode cnode, key:"
+             << pretty_binary_string(it->key()) << dendl;
+        return -EIO;
+      }   
       dout(20) << __func__ << " opened " << cid << dendl;
       coll_map[cid] = c;
     } else {
@@ -1554,42 +1571,77 @@ int BlueStore::_open_collections(int *errors)
 
 int BlueStore::_setup_block_symlink_or_file(
   string name,
-  string path,
-  uint64_t size)
+  string epath,
+  uint64_t size,
+  bool create)
 {
-  dout(20) << __func__ << " name " << name << " path " << path
-	   << " size " << size << dendl;
-  if (path.length()) {
-    int r = ::symlinkat(path.c_str(), path_fd, name.c_str());
-    if (r < 0) {
-      r = -errno;
-      derr << __func__ << " failed to create " << name << " symlink to "
-	   << path << ": " << cpp_strerror(r) << dendl;
-      return r;
-    }
-  } else if (size) {
-    struct stat st;
-    int r = ::fstatat(path_fd, name.c_str(), &st, 0);
-    if (r < 0)
-      r = -errno;
-    if (r == -ENOENT) {
-      int fd = ::openat(path_fd, name.c_str(), O_CREAT|O_RDWR, 0644);
+  dout(20) << __func__ << " name " << name << " path " << epath
+	   << " size " << size << " create=" << (int)create << dendl;
+  int r = 0;
+  if (epath.length()) {
+    if (!epath.compare(0, sizeof(SPDK_PREFIX-1), SPDK_PREFIX)) {
+      string symbol_spdk_file = path + "/" + epath;
+      r = ::symlinkat(symbol_spdk_file.c_str(), path_fd, name.c_str());
+      if (r < 0) {
+        r = -errno;
+        derr << __func__ << " failed to create " << name << " symlink to "
+    	     << symbol_spdk_file << ": " << cpp_strerror(r) << dendl;
+        return r;
+      }
+      int fd = ::openat(path_fd, epath.c_str(), O_RDWR, 0644);
       if (fd < 0) {
-	int r = -errno;
-	derr << __func__ << " failed to create " << name << " file: "
+	r = -errno;
+	derr << __func__ << " failed to open " << epath << " file: "
 	     << cpp_strerror(r) << dendl;
 	return r;
       }
-      int r = ::ftruncate(fd, size);
-      assert(r == 0);
-      dout(1) << __func__ << " created " << name << " file with size "
-	      << pretty_si_t(size) << "B" << dendl;
+      string serial_number = epath.substr(sizeof(SPDK_PREFIX)-1);
+      r = ::write(fd, serial_number.c_str(), serial_number.size());
+      assert(r == (int)serial_number.size());
+      dout(1) << __func__ << " created " << name << " file with " << dendl;
       VOID_TEMP_FAILURE_RETRY(::close(fd));
-    } else if (r < 0) {
-      derr << __func__ << " failed to stat " << name << " file: "
-           << cpp_strerror(r) << dendl;
-      return r;
-    } 
+    } else {
+      r = ::symlinkat(epath.c_str(), path_fd, name.c_str());
+      if (r < 0) {
+        r = -errno;
+        derr << __func__ << " failed to create " << name << " symlink to "
+    	   << epath << ": " << cpp_strerror(r) << dendl;
+        return r;
+      }
+    }
+  }
+  if (size) {
+    unsigned flags = O_RDWR;
+    if (create)
+      flags |= O_CREAT;
+    int fd = ::openat(path_fd, name.c_str(), flags, 0644);
+    if (fd >= 0) {
+      // block file is present
+      struct stat st;
+      int r = ::fstat(fd, &st);
+      if (r == 0 &&
+	  S_ISREG(st.st_mode) &&   // if it is a regular file
+	  st.st_size == 0) {       // and is 0 bytes
+	r = ::ftruncate(fd, size);
+	if (r < 0) {
+	  r = -errno;
+	  derr << __func__ << " failed to resize " << name << " file to "
+	       << size << ": " << cpp_strerror(r) << dendl;
+	  VOID_TEMP_FAILURE_RETRY(::close(fd));
+	  return r;
+	}
+	dout(1) << __func__ << " resized " << name << " file to "
+		<< pretty_si_t(size) << "B" << dendl;
+      }
+      VOID_TEMP_FAILURE_RETRY(::close(fd));
+    } else {
+      int r = -errno;
+      if (r != -ENOENT) {
+	derr << __func__ << " failed to open " << name << " file: "
+	     << cpp_strerror(r) << dendl;
+	return r;
+      }
+    }
   }
   return 0;
 }
@@ -1600,6 +1652,29 @@ int BlueStore::mkfs()
   int r;
   uuid_d old_fsid;
 
+  {
+    string done;
+    r = read_meta("mkfs_done", &done);
+    if (r == 0) {
+      dout(1) << __func__ << " already created" << dendl;
+      return 0; // idempotent
+    }
+  }
+
+  {
+    string type;
+    r = read_meta("type", &type);
+    if (r == 0) {
+      if (type != "bluestore") {
+	dout(1) << __func__ << " expected bluestore, but type is " << type << dendl;
+	return -EIO;
+      }
+    }
+    r = write_meta("type", "bluestore");
+    if (r < 0)
+      return r;
+  }
+
   r = _open_path();
   if (r < 0)
     return r;
@@ -1613,7 +1688,7 @@ int BlueStore::mkfs()
     goto out_close_fsid;
 
   r = _read_fsid(&old_fsid);
-  if (r < 0 && old_fsid.is_zero()) {
+  if (r < 0 || old_fsid.is_zero()) {
     if (fsid.is_zero()) {
       fsid.generate_random();
       dout(1) << __func__ << " generated fsid " << fsid << dendl;
@@ -1629,20 +1704,21 @@ int BlueStore::mkfs()
       goto out_close_fsid;
     }
     fsid = old_fsid;
-    dout(1) << __func__ << " already created, fsid is " << fsid << dendl;
-    goto out_close_fsid;
   }
 
   r = _setup_block_symlink_or_file("block", g_conf->bluestore_block_path,
-				   g_conf->bluestore_block_size);
+				   g_conf->bluestore_block_size,
+				   g_conf->bluestore_block_create);
   if (r < 0)
     goto out_close_fsid;
   r = _setup_block_symlink_or_file("block.wal", g_conf->bluestore_block_wal_path,
-				   g_conf->bluestore_block_wal_size);
+				   g_conf->bluestore_block_wal_size,
+				   g_conf->bluestore_block_wal_create);
   if (r < 0)
     goto out_close_fsid;
   r = _setup_block_symlink_or_file("block.db", g_conf->bluestore_block_db_path,
-				   g_conf->bluestore_block_db_size);
+				   g_conf->bluestore_block_db_size,
+				   g_conf->bluestore_block_db_create);
   if (r < 0)
     goto out_close_fsid;
 
@@ -1707,16 +1783,20 @@ int BlueStore::mkfs()
   r = write_meta("bluefs", stringify((int)g_conf->bluestore_bluefs));
   if (r < 0)
     goto out_close_alloc;
-  r = write_meta("type", "bluestore");
+
+  if (fsid != old_fsid) {
+    r = _write_fsid();
+    if (r < 0) {
+      derr << __func__ << " error writing fsid: " << cpp_strerror(r) << dendl;
+      goto out_close_alloc;
+    }
+  }
+
+  // indicate success by writing the 'mkfs_done' file
+  r = write_meta("mkfs_done", "yes");
   if (r < 0)
     goto out_close_alloc;
-
-  // indicate mkfs completion/success by writing the fsid file
-  r = _write_fsid();
-  if (r == 0)
-    dout(10) << __func__ << " success" << dendl;
-  else
-    derr << __func__ << " error writing fsid: " << cpp_strerror(r) << dendl;
+  dout(10) << __func__ << " success" << dendl;
 
  out_close_alloc:
   _close_alloc();
@@ -1799,10 +1879,11 @@ int BlueStore::mount()
 
  out_stop:
   _kv_stop();
+  wal_wq.drain();
   wal_tp.stop();
   finisher.wait_for_empty();
   finisher.stop();
-out_coll:
+ out_coll:
   coll_map.clear();
  out_alloc:
   _close_alloc();
@@ -1896,7 +1977,6 @@ int BlueStore::fsck()
   set<uint64_t> used_omap_head;
   interval_set<uint64_t> used_blocks;
   KeyValueDB::Iterator it;
-  EnodeRef enode;
   vector<bluestore_extent_t> hash_shared;
 
   int r = _open_path();
@@ -1948,13 +2028,14 @@ int BlueStore::fsck()
 
   // walk collections, objects
   for (ceph::unordered_map<coll_t, CollectionRef>::iterator p = coll_map.begin();
-       p != coll_map.end() && !errors;
+       p != coll_map.end();
        ++p) {
     dout(1) << __func__ << " collection " << p->first << dendl;
     CollectionRef c = _get_collection(p->first);
     RWLock::RLocker l(c->lock);
     ghobject_t pos;
-    while (!errors) {
+    EnodeRef enode;
+    while (true) {
       vector<ghobject_t> ols;
       int r = collection_list(p->first, pos, ghobject_t::get_max(), true,
 			      100, &ols, &pos);
@@ -1970,9 +2051,9 @@ int BlueStore::fsck()
 	OnodeRef o = c->get_onode(oid, false);
 	if (!o || !o->exists) {
 	  ++errors;
-	  break;
+	  continue; // go for next object
 	}
-	if (enode && enode->hash != o->oid.hobj.get_hash()) {
+	if (!enode || enode->hash != o->oid.hobj.get_hash()) {
 	  if (enode)
 	    errors += _verify_enode_shared(enode, hash_shared);
 	  enode = c->get_enode(o->oid.hobj.get_hash());
@@ -1983,7 +2064,7 @@ int BlueStore::fsck()
 	    derr << " " << oid << " nid " << o->onode.nid << " already in use"
 		 << dendl;
 	    ++errors;
-	    break;
+	    continue; // go for next object
 	  }
 	  used_nids.insert(o->onode.nid);
 	}
@@ -2012,12 +2093,14 @@ int BlueStore::fsck()
 	    derr << " " << oid << " overlay " << v.first << " " << v.second
 		 << " extends past end of object" << dendl;
 	    ++errors;
+            continue; // go for next overlay
 	  }
 	  if (v.second.key > o->onode.last_overlay_key) {
 	    derr << " " << oid << " overlay " << v.first << " " << v.second
 		 << " is > last_overlay_key " << o->onode.last_overlay_key
 		 << dendl;
 	    ++errors;
+            continue; // go for next overlay
 	  }
 	  ++refs[v.second.key];
 	  string key;
@@ -2029,6 +2112,7 @@ int BlueStore::fsck()
 	    derr << " " << oid << " overlay " << v.first << " " << v.second
 		 << " failed to fetch: " << cpp_strerror(r) << dendl;
 	    ++errors;
+            continue;
 	  }
 	  if (val.length() < v.second.value_offset + v.second.length) {
 	    derr << " " << oid << " overlay " << v.first << " " << v.second
@@ -2129,7 +2213,7 @@ int BlueStore::fsck()
 	c = NULL;
 	for (ceph::unordered_map<coll_t, CollectionRef>::iterator p =
 	       coll_map.begin();
-	     p != coll_map.end() && !errors;
+	     p != coll_map.end();
 	     ++p) {
 	  if (p->second->contains(oid)) {
 	    c = p->second;
@@ -2189,7 +2273,8 @@ int BlueStore::fsck()
       } catch (buffer::error& e) {
 	derr << __func__ << " failed to decode wal txn "
 	     << pretty_binary_string(it->key()) << dendl;
-	return -EIO;
+	r = -EIO;
+        goto out_scan;
       }
       dout(20) << __func__ << "  wal " << wt.seq
 	       << " ops " << wt.ops.size()
@@ -2224,6 +2309,7 @@ int BlueStore::fsck()
     }
   }
 
+ out_scan:
   coll_map.clear();
  out_alloc:
   _close_alloc();
@@ -2252,13 +2338,12 @@ void BlueStore::_sync()
   // flush aios in flght
   bdev->flush();
 
-  kv_lock.Lock();
+  std::unique_lock<std::mutex> l(kv_lock);
   while (!kv_committing.empty() ||
 	 !kv_queue.empty()) {
     dout(20) << " waiting for kv to commit" << dendl;
-    kv_sync_cond.Wait(kv_lock);
+    kv_sync_cond.wait(l);
   }
-  kv_lock.Unlock();
 
   dout(10) << __func__ << " done" << dendl;
 }
@@ -2278,7 +2363,7 @@ int BlueStore::statfs(struct statfs *buf)
 // ---------------
 // cache
 
-BlueStore::CollectionRef BlueStore::_get_collection(coll_t cid)
+BlueStore::CollectionRef BlueStore::_get_collection(const coll_t& cid)
 {
   RWLock::RLocker l(coll_lock);
   ceph::unordered_map<coll_t,CollectionRef>::iterator cp = coll_map.find(cid);
@@ -2290,17 +2375,17 @@ BlueStore::CollectionRef BlueStore::_get_collection(coll_t cid)
 void BlueStore::_queue_reap_collection(CollectionRef& c)
 {
   dout(10) << __func__ << " " << c->cid << dendl;
-  Mutex::Locker l(reap_lock);
+  std::lock_guard<std::mutex> l(reap_lock);
   removed_collections.push_back(c);
 }
 
 void BlueStore::_reap_collections()
 {
-  reap_lock.Lock();
-
   list<CollectionRef> removed_colls;
-  removed_colls.swap(removed_collections);
-  reap_lock.Unlock();
+  {
+    std::lock_guard<std::mutex> l(reap_lock);
+    removed_colls.swap(removed_collections);
+  }
 
   for (list<CollectionRef>::iterator p = removed_colls.begin();
        p != removed_colls.end();
@@ -2323,18 +2408,30 @@ void BlueStore::_reap_collections()
   }
 
   dout(10) << __func__ << " all reaped" << dendl;
-  reap_cond.Signal();
 }
 
 // ---------------
 // read operations
 
-bool BlueStore::exists(coll_t cid, const ghobject_t& oid)
+ObjectStore::CollectionHandle BlueStore::open_collection(const coll_t& cid)
 {
-  dout(10) << __func__ << " " << cid << " " << oid << dendl;
-  CollectionRef c = _get_collection(cid);
+  return _get_collection(cid);
+}
+
+bool BlueStore::exists(const coll_t& cid, const ghobject_t& oid)
+{
+  CollectionHandle c = _get_collection(cid);
   if (!c)
     return false;
+  return exists(c, oid);
+}
+
+bool BlueStore::exists(CollectionHandle &c_, const ghobject_t& oid)
+{
+  Collection *c = static_cast<Collection*>(c_.get());
+  dout(10) << __func__ << " " << c->cid << " " << oid << dendl;
+  if (!c->exists)
+    return false;
   RWLock::RLocker l(c->lock);
   OnodeRef o = c->get_onode(oid, false);
   if (!o || !o->exists)
@@ -2343,15 +2440,27 @@ bool BlueStore::exists(coll_t cid, const ghobject_t& oid)
 }
 
 int BlueStore::stat(
-    coll_t cid,
+    const coll_t& cid,
     const ghobject_t& oid,
     struct stat *st,
     bool allow_eio)
 {
-  dout(10) << __func__ << " " << cid << " " << oid << dendl;
-  CollectionRef c = _get_collection(cid);
+  CollectionHandle c = _get_collection(cid);
   if (!c)
     return -ENOENT;
+  return stat(c, oid, st, allow_eio);
+}
+
+int BlueStore::stat(
+  CollectionHandle &c_,
+  const ghobject_t& oid,
+  struct stat *st,
+  bool allow_eio)
+{
+  Collection *c = static_cast<Collection*>(c_.get());
+  if (!c->exists)
+    return -ENOENT;
+  dout(10) << __func__ << " " << c->get_cid() << " " << oid << dendl;
   RWLock::RLocker l(c->lock);
   OnodeRef o = c->get_onode(oid, false);
   if (!o || !o->exists)
@@ -2364,7 +2473,22 @@ int BlueStore::stat(
 }
 
 int BlueStore::read(
-  coll_t cid,
+  const coll_t& cid,
+  const ghobject_t& oid,
+  uint64_t offset,
+  size_t length,
+  bufferlist& bl,
+  uint32_t op_flags,
+  bool allow_eio)
+{
+  CollectionHandle c = _get_collection(cid);
+  if (!c)
+    return -ENOENT;
+  return read(c, oid, offset, length, bl, op_flags, allow_eio);
+}
+
+int BlueStore::read(
+  CollectionHandle &c_,
   const ghobject_t& oid,
   uint64_t offset,
   size_t length,
@@ -2372,15 +2496,17 @@ int BlueStore::read(
   uint32_t op_flags,
   bool allow_eio)
 {
+  Collection *c = static_cast<Collection*>(c_.get());
+  const coll_t &cid = c->get_cid();
   dout(15) << __func__ << " " << cid << " " << oid
 	   << " " << offset << "~" << length
 	   << dendl;
-  bl.clear();
-  CollectionRef c = _get_collection(cid);
-  if (!c)
+  if (!c->exists)
     return -ENOENT;
   RWLock::RLocker l(c->lock);
 
+  bl.clear();
+
   int r;
 
   OnodeRef o = c->get_onode(oid, false);
@@ -2411,7 +2537,7 @@ int BlueStore::_do_read(
   map<uint64_t,bluestore_extent_t>::iterator bp, bend;
   map<uint64_t,bluestore_overlay_t>::iterator op, oend;
   uint64_t block_size = bdev->get_block_size();
-  int r;
+  int r = 0;
   IOContext ioc(NULL);   // FIXME?
 
   // generally, don't buffer anything, unless the client explicitly requests
@@ -2433,7 +2559,6 @@ int BlueStore::_do_read(
   _dump_onode(o);
 
   if (offset > o->onode.size) {
-    r = 0;
     goto out;
   }
 
@@ -2443,8 +2568,6 @@ int BlueStore::_do_read(
 
   o->flush();
 
-  r = 0;
-
   // loop over overlays and data fragments.  overlays take precedence.
   bend = o->onode.block_map.end();
   bp = o->onode.block_map.lower_bound(offset);
@@ -2479,7 +2602,13 @@ int BlueStore::_do_read(
       bufferlist v;
       string key;
       get_overlay_key(o->onode.nid, op->second.key, &key);
-      db->get(PREFIX_OVERLAY, key, &v);
+      r = db->get(PREFIX_OVERLAY, key, &v);
+      if (r < 0) {
+        derr << " failed to fetch overlay(nid = " << o->onode.nid
+             << ", key = " << key 
+             << "): " << cpp_strerror(r) << dendl;
+        goto out;
+      }
       bufferlist frag;
       frag.substr_of(v, x_off, x_len);
       bl.claim_append(frag);
@@ -2521,9 +2650,7 @@ int BlueStore::_do_read(
 	// unwritten (zero) extent
 	dout(30) << __func__ << " data " << bp->first << ": " << bp->second
 		 << ", use " << x_len << " zeros" << dendl;
-	bufferptr bp(x_len);
-	bp.zero();
-	bl.push_back(bp);
+	bl.append_zero(x_len);
       }
       offset += x_len;
       length -= x_len;
@@ -2540,9 +2667,7 @@ int BlueStore::_do_read(
 
     // zero.
     dout(30) << __func__ << " zero " << offset << "~" << x_len << dendl;
-    bufferptr bp(x_len);
-    bp.zero();
-    bl.push_back(bp);
+    bl.append_zero(x_len);
     offset += x_len;
     length -= x_len;
     continue;
@@ -2554,16 +2679,29 @@ int BlueStore::_do_read(
 }
 
 int BlueStore::fiemap(
-  coll_t cid,
+  const coll_t& cid,
   const ghobject_t& oid,
   uint64_t offset,
   size_t len,
   bufferlist& bl)
 {
-  interval_set<uint64_t> m;
-  CollectionRef c = _get_collection(cid);
+  CollectionHandle c = _get_collection(cid);
   if (!c)
     return -ENOENT;
+  return fiemap(c, oid, offset, len, bl);
+}
+
+int BlueStore::fiemap(
+  CollectionHandle &c_,
+  const ghobject_t& oid,
+  uint64_t offset,
+  size_t len,
+  bufferlist& bl)
+{
+  Collection *c = static_cast<Collection*>(c_.get());
+  if (!c->exists)
+    return -ENOENT;
+  interval_set<uint64_t> m;
   RWLock::RLocker l(c->lock);
 
   OnodeRef o = c->get_onode(oid, false);
@@ -2578,9 +2716,6 @@ int BlueStore::fiemap(
   map<uint64_t,bluestore_extent_t>::iterator bp, bend;
   map<uint64_t,bluestore_overlay_t>::iterator op, oend;
 
-  if (offset == len && offset == 0)
-    len = o->onode.size;
-
   if (offset > o->onode.size)
     goto out;
 
@@ -2658,15 +2793,27 @@ int BlueStore::fiemap(
 }
 
 int BlueStore::getattr(
-  coll_t cid,
+  const coll_t& cid,
   const ghobject_t& oid,
   const char *name,
   bufferptr& value)
 {
-  dout(15) << __func__ << " " << cid << " " << oid << " " << name << dendl;
-  CollectionRef c = _get_collection(cid);
+  CollectionHandle c = _get_collection(cid);
   if (!c)
     return -ENOENT;
+  return getattr(c, oid, name, value);
+}
+
+int BlueStore::getattr(
+  CollectionHandle &c_,
+  const ghobject_t& oid,
+  const char *name,
+  bufferptr& value)
+{
+  Collection *c = static_cast<Collection*>(c_.get());
+  dout(15) << __func__ << " " << c->cid << " " << oid << " " << name << dendl;
+  if (!c->exists)
+    return -ENOENT;
   RWLock::RLocker l(c->lock);
   int r;
   string k(name);
@@ -2684,20 +2831,32 @@ int BlueStore::getattr(
   value = o->onode.attrs[k];
   r = 0;
  out:
-  dout(10) << __func__ << " " << cid << " " << oid << " " << name
+  dout(10) << __func__ << " " << c->cid << " " << oid << " " << name
 	   << " = " << r << dendl;
   return r;
 }
 
+
 int BlueStore::getattrs(
-  coll_t cid,
+  const coll_t& cid,
   const ghobject_t& oid,
   map<string,bufferptr>& aset)
 {
-  dout(15) << __func__ << " " << cid << " " << oid << dendl;
-  CollectionRef c = _get_collection(cid);
+  CollectionHandle c = _get_collection(cid);
   if (!c)
     return -ENOENT;
+  return getattrs(c, oid, aset);
+}
+
+int BlueStore::getattrs(
+  CollectionHandle &c_,
+  const ghobject_t& oid,
+  map<string,bufferptr>& aset)
+{
+  Collection *c = static_cast<Collection*>(c_.get());
+  dout(15) << __func__ << " " << c->cid << " " << oid << dendl;
+  if (!c->exists)
+    return -ENOENT;
   RWLock::RLocker l(c->lock);
   int r;
 
@@ -2709,7 +2868,7 @@ int BlueStore::getattrs(
   aset = o->onode.attrs;
   r = 0;
  out:
-  dout(10) << __func__ << " " << cid << " " << oid
+  dout(10) << __func__ << " " << c->cid << " " << oid
 	   << " = " << r << dendl;
   return r;
 }
@@ -2724,13 +2883,13 @@ int BlueStore::list_collections(vector<coll_t>& ls)
   return 0;
 }
 
-bool BlueStore::collection_exists(coll_t c)
+bool BlueStore::collection_exists(const coll_t& c)
 {
   RWLock::RLocker l(coll_lock);
   return coll_map.count(c);
 }
 
-bool BlueStore::collection_empty(coll_t cid)
+bool BlueStore::collection_empty(const coll_t& cid)
 {
   dout(15) << __func__ << " " << cid << dendl;
   vector<ghobject_t> ls;
@@ -2744,18 +2903,40 @@ bool BlueStore::collection_empty(coll_t cid)
   return empty;
 }
 
+int BlueStore::collection_bits(const coll_t& cid)
+{
+  dout(15) << __func__ << " " << cid << dendl;
+  CollectionRef c = _get_collection(cid);
+  if (!c)
+    return -ENOENT;
+  RWLock::RLocker l(c->lock);
+  dout(10) << __func__ << " " << cid << " = " << c->cnode.bits << dendl;
+  return c->cnode.bits;
+}
+
+int BlueStore::collection_list(
+  const coll_t& cid, ghobject_t start, ghobject_t end,
+  bool sort_bitwise, int max,
+  vector<ghobject_t> *ls, ghobject_t *pnext)
+{
+  CollectionHandle c = _get_collection(cid);
+  if (!c)
+    return -ENOENT;
+  return collection_list(c, start, end, sort_bitwise, max, ls, pnext);
+}
+
 int BlueStore::collection_list(
-  coll_t cid, ghobject_t start, ghobject_t end,
+  CollectionHandle &c_, ghobject_t start, ghobject_t end,
   bool sort_bitwise, int max,
   vector<ghobject_t> *ls, ghobject_t *pnext)
 {
-  dout(15) << __func__ << " " << cid
+  Collection *c = static_cast<Collection*>(c_.get());
+  dout(15) << __func__ << " " << c->cid
 	   << " start " << start << " end " << end << " max " << max << dendl;
+  if (!c->exists)
+    return -ENOENT;
   if (!sort_bitwise)
     return -EOPNOTSUPP;
-  CollectionRef c = _get_collection(cid);
-  if (!c)
-    return -ENOENT;
   RWLock::RLocker l(c->lock);
   int r = 0;
   KeyValueDB::Iterator it;
@@ -2773,7 +2954,7 @@ int BlueStore::collection_list(
       start.hobj == hobject_t::get_max()) {
     goto out;
   }
-  get_coll_key_range(cid, c->cnode.bits, &temp_start_key, &temp_end_key,
+  get_coll_key_range(c->cid, c->cnode.bits, &temp_start_key, &temp_end_key,
 		     &start_key, &end_key);
   dout(20) << __func__
 	   << " range " << pretty_binary_string(temp_start_key)
@@ -2784,7 +2965,7 @@ int BlueStore::collection_list(
   it = db->get_iterator(PREFIX_OBJ);
   if (start == ghobject_t() ||
       start.hobj == hobject_t() ||
-      start == cid.get_min_hobj()) {
+      start == c->cid.get_min_hobj()) {
     it->upper_bound(temp_start_key);
     temp = true;
   } else {
@@ -2859,7 +3040,7 @@ int BlueStore::collection_list(
     *pnext = ghobject_t::get_max();
   }
  out:
-  dout(10) << __func__ << " " << cid
+  dout(10) << __func__ << " " << c->cid
 	   << " start " << start << " end " << end << " max " << max
 	   << " = " << r << ", ls.size() = " << ls->size()
 	   << ", next = " << *pnext << dendl;
@@ -2956,16 +3137,29 @@ bufferlist BlueStore::OmapIteratorImpl::value()
 }
 
 int BlueStore::omap_get(
-  coll_t cid,                ///< [in] Collection containing oid
+  const coll_t& cid,                ///< [in] Collection containing oid
   const ghobject_t &oid,   ///< [in] Object containing omap
   bufferlist *header,      ///< [out] omap header
   map<string, bufferlist> *out /// < [out] Key to value map
   )
 {
-  dout(15) << __func__ << " " << cid << " oid " << oid << dendl;
-  CollectionRef c = _get_collection(cid);
+  CollectionHandle c = _get_collection(cid);
   if (!c)
     return -ENOENT;
+  return omap_get(c, oid, header, out);
+}
+
+int BlueStore::omap_get(
+  CollectionHandle &c_,    ///< [in] Collection containing oid
+  const ghobject_t &oid,   ///< [in] Object containing omap
+  bufferlist *header,      ///< [out] omap header
+  map<string, bufferlist> *out /// < [out] Key to value map
+  )
+{
+  Collection *c = static_cast<Collection*>(c_.get());
+  dout(15) << __func__ << " " << c->get_cid() << " oid " << oid << dendl;
+  if (!c->exists)
+    return -ENOENT;
   RWLock::RLocker l(c->lock);
   int r = 0;
   OnodeRef o = c->get_onode(oid, false);
@@ -3001,21 +3195,35 @@ int BlueStore::omap_get(
     }
   }
  out:
-  dout(10) << __func__ << " " << cid << " oid " << oid << " = " << r << dendl;
+  dout(10) << __func__ << " " << c->get_cid() << " oid " << oid << " = " << r
+	   << dendl;
   return r;
 }
 
 int BlueStore::omap_get_header(
-  coll_t cid,                ///< [in] Collection containing oid
+  const coll_t& cid,                ///< [in] Collection containing oid
   const ghobject_t &oid,   ///< [in] Object containing omap
   bufferlist *header,      ///< [out] omap header
   bool allow_eio ///< [in] don't assert on eio
   )
 {
-  dout(15) << __func__ << " " << cid << " oid " << oid << dendl;
-  CollectionRef c = _get_collection(cid);
+  CollectionHandle c = _get_collection(cid);
   if (!c)
     return -ENOENT;
+  return omap_get_header(c, oid, header, allow_eio);
+}
+
+int BlueStore::omap_get_header(
+  CollectionHandle &c_,                ///< [in] Collection containing oid
+  const ghobject_t &oid,   ///< [in] Object containing omap
+  bufferlist *header,      ///< [out] omap header
+  bool allow_eio ///< [in] don't assert on eio
+  )
+{
+  Collection *c = static_cast<Collection*>(c_.get());
+  dout(15) << __func__ << " " << c->get_cid() << " oid " << oid << dendl;
+  if (!c->exists)
+    return -ENOENT;
   RWLock::RLocker l(c->lock);
   int r = 0;
   OnodeRef o = c->get_onode(oid, false);
@@ -3036,20 +3244,33 @@ int BlueStore::omap_get_header(
     }
   }
  out:
-  dout(10) << __func__ << " " << cid << " oid " << oid << " = " << r << dendl;
+  dout(10) << __func__ << " " << c->get_cid() << " oid " << oid << " = " << r
+	   << dendl;
   return r;
 }
 
 int BlueStore::omap_get_keys(
-  coll_t cid,              ///< [in] Collection containing oid
+  const coll_t& cid,              ///< [in] Collection containing oid
   const ghobject_t &oid, ///< [in] Object containing omap
   set<string> *keys      ///< [out] Keys defined on oid
   )
 {
-  dout(15) << __func__ << " " << cid << " oid " << oid << dendl;
-  CollectionRef c = _get_collection(cid);
+  CollectionHandle c = _get_collection(cid);
   if (!c)
     return -ENOENT;
+  return omap_get_keys(c, oid, keys);
+}
+
+int BlueStore::omap_get_keys(
+  CollectionHandle &c_,              ///< [in] Collection containing oid
+  const ghobject_t &oid, ///< [in] Object containing omap
+  set<string> *keys      ///< [out] Keys defined on oid
+  )
+{
+  Collection *c = static_cast<Collection*>(c_.get());
+  dout(15) << __func__ << " " << c->get_cid() << " oid " << oid << dendl;
+  if (!c->exists)
+    return -ENOENT;
   RWLock::RLocker l(c->lock);
   int r = 0;
   OnodeRef o = c->get_onode(oid, false);
@@ -3081,21 +3302,35 @@ int BlueStore::omap_get_keys(
     }
   }
  out:
-  dout(10) << __func__ << " " << cid << " oid " << oid << " = " << r << dendl;
+  dout(10) << __func__ << " " << c->get_cid() << " oid " << oid << " = " << r
+	   << dendl;
   return r;
 }
 
 int BlueStore::omap_get_values(
-  coll_t cid,                    ///< [in] Collection containing oid
+  const coll_t& cid,                    ///< [in] Collection containing oid
   const ghobject_t &oid,       ///< [in] Object containing omap
   const set<string> &keys,     ///< [in] Keys to get
   map<string, bufferlist> *out ///< [out] Returned keys and values
   )
 {
-  dout(15) << __func__ << " " << cid << " oid " << oid << dendl;
-  CollectionRef c = _get_collection(cid);
+  CollectionHandle c = _get_collection(cid);
   if (!c)
     return -ENOENT;
+  return omap_get_values(c, oid, keys, out);
+}
+
+int BlueStore::omap_get_values(
+  CollectionHandle &c_,        ///< [in] Collection containing oid
+  const ghobject_t &oid,       ///< [in] Object containing omap
+  const set<string> &keys,     ///< [in] Keys to get
+  map<string, bufferlist> *out ///< [out] Returned keys and values
+  )
+{
+  Collection *c = static_cast<Collection*>(c_.get());
+  dout(15) << __func__ << " " << c->get_cid() << " oid " << oid << dendl;
+  if (!c->exists)
+    return -ENOENT;
   RWLock::RLocker l(c->lock);
   int r = 0;
   OnodeRef o = c->get_onode(oid, false);
@@ -3117,21 +3352,35 @@ int BlueStore::omap_get_values(
     }
   }
  out:
-  dout(10) << __func__ << " " << cid << " oid " << oid << " = " << r << dendl;
+  dout(10) << __func__ << " " << c->get_cid() << " oid " << oid << " = " << r
+	   << dendl;
   return r;
 }
 
 int BlueStore::omap_check_keys(
-  coll_t cid,                ///< [in] Collection containing oid
+  const coll_t& cid,                ///< [in] Collection containing oid
   const ghobject_t &oid,   ///< [in] Object containing omap
   const set<string> &keys, ///< [in] Keys to check
   set<string> *out         ///< [out] Subset of keys defined on oid
   )
 {
-  dout(15) << __func__ << " " << cid << " oid " << oid << dendl;
-  CollectionRef c = _get_collection(cid);
+  CollectionHandle c = _get_collection(cid);
   if (!c)
     return -ENOENT;
+  return omap_check_keys(c, oid, keys, out);
+}
+
+int BlueStore::omap_check_keys(
+  CollectionHandle &c_,    ///< [in] Collection containing oid
+  const ghobject_t &oid,   ///< [in] Object containing omap
+  const set<string> &keys, ///< [in] Keys to check
+  set<string> *out         ///< [out] Subset of keys defined on oid
+  )
+{
+  Collection *c = static_cast<Collection*>(c_.get());
+  dout(15) << __func__ << " " << c->get_cid() << " oid " << oid << dendl;
+  if (!c->exists)
+    return -ENOENT;
   RWLock::RLocker l(c->lock);
   int r = 0;
   OnodeRef o = c->get_onode(oid, false);
@@ -3156,25 +3405,37 @@ int BlueStore::omap_check_keys(
     }
   }
  out:
-  dout(10) << __func__ << " " << cid << " oid " << oid << " = " << r << dendl;
+  dout(10) << __func__ << " " << c->get_cid() << " oid " << oid << " = " << r
+	   << dendl;
   return r;
 }
 
 ObjectMap::ObjectMapIterator BlueStore::get_omap_iterator(
-  coll_t cid,              ///< [in] collection
+  const coll_t& cid,              ///< [in] collection
   const ghobject_t &oid  ///< [in] object
   )
 {
-
-  dout(10) << __func__ << " " << cid << " " << oid << dendl;
-  CollectionRef c = _get_collection(cid);
+  CollectionHandle c = _get_collection(cid);
   if (!c) {
     dout(10) << __func__ << " " << cid << "doesn't exist" <<dendl;
     return ObjectMap::ObjectMapIterator();
   }
+  return get_omap_iterator(c, oid);
+}
+
+ObjectMap::ObjectMapIterator BlueStore::get_omap_iterator(
+  CollectionHandle &c_,              ///< [in] collection
+  const ghobject_t &oid  ///< [in] object
+  )
+{
+  Collection *c = static_cast<Collection*>(c_.get());
+  dout(10) << __func__ << " " << c->get_cid() << " " << oid << dendl;
+  if (!c->exists) {
+    return ObjectMap::ObjectMapIterator();
+  }
   RWLock::RLocker l(c->lock);
   OnodeRef o = c->get_onode(oid, false);
-  if (!o) {
+  if (!o || !o->exists) {
     dout(10) << __func__ << " " << oid << "doesn't exist" <<dendl;
     return ObjectMap::ObjectMapIterator();
   }
@@ -3195,8 +3456,9 @@ int BlueStore::_open_super_meta()
     nid_max = 0;
     bufferlist bl;
     db->get(PREFIX_SUPER, "nid_max", &bl);
+    bufferlist::iterator p = bl.begin();
     try {
-      ::decode(nid_max, bl);
+      ::decode(nid_max, p);
     } catch (buffer::error& e) {
     }
     dout(10) << __func__ << " old nid_max " << nid_max << dendl;
@@ -3223,7 +3485,7 @@ void BlueStore::_assign_nid(TransContext *txc, OnodeRef o)
 {
   if (o->onode.nid)
     return;
-  Mutex::Locker l(nid_lock);
+  std::lock_guard<std::mutex> l(nid_lock);
   o->onode.nid = ++nid_last;
   dout(20) << __func__ << " " << o->onode.nid << dendl;
   if (nid_last > nid_max) {
@@ -3245,19 +3507,19 @@ BlueStore::TransContext *BlueStore::_txc_create(OpSequencer *osr)
 }
 
 void BlueStore::_txc_release(
-  TransContext *txc, CollectionRef& c, EnodeRef& e, uint32_t hash,
+  TransContext *txc, CollectionRef& c, OnodeRef& o,
   uint64_t offset, uint64_t length,
   bool shared)
 {
   if (shared) {
     vector<bluestore_extent_t> release;
-    if (!e)
-      e = c->get_enode(hash);
-    e->ref_map.put(offset, length, &release);
+    if (!o->enode)
+      o->enode = c->get_enode(o->oid.hobj.get_hash());
+    o->enode->ref_map.put(offset, length, &release);
     dout(10) << __func__ << " " << offset << "~" << length
-	     << " shared: ref_map now " << e->ref_map
+	     << " shared: ref_map now " << o->enode->ref_map
 	     << " releasing " << release << dendl;
-    txc->write_enode(e);
+    txc->write_enode(o->enode);
     for (auto& p : release) {
       txc->released.insert(p.offset, p.length);
     }
@@ -3274,6 +3536,7 @@ void BlueStore::_txc_state_proc(TransContext *txc)
 	     << " " << txc->get_state_name() << dendl;
     switch (txc->state) {
     case TransContext::STATE_PREPARE:
+      txc->log_state_latency(logger, l_bluestore_state_prepare_lat);
       if (txc->ioc.has_aios()) {
 	txc->state = TransContext::STATE_AIO_WAIT;
 	_txc_aio_submit(txc);
@@ -3282,30 +3545,34 @@ void BlueStore::_txc_state_proc(TransContext *txc)
       // ** fall-thru **
 
     case TransContext::STATE_AIO_WAIT:
+      txc->log_state_latency(logger, l_bluestore_state_aio_wait_lat);
       _txc_finish_io(txc);  // may trigger blocked txc's too
       return;
 
     case TransContext::STATE_IO_DONE:
-      assert(txc->osr->qlock.is_locked());  // see _txc_finish_io
+      //assert(txc->osr->qlock.is_locked());  // see _txc_finish_io
+      txc->log_state_latency(logger, l_bluestore_state_io_done_lat);
       txc->state = TransContext::STATE_KV_QUEUED;
       if (!g_conf->bluestore_sync_transaction) {
-	Mutex::Locker l(kv_lock);
+	std::lock_guard<std::mutex> l(kv_lock);
 	if (g_conf->bluestore_sync_submit_transaction) {
 	  db->submit_transaction(txc->t);
 	}
 	kv_queue.push_back(txc);
-	kv_cond.SignalOne();
+	kv_cond.notify_one();
 	return;
       }
       db->submit_transaction_sync(txc->t);
       break;
 
     case TransContext::STATE_KV_QUEUED:
+      txc->log_state_latency(logger, l_bluestore_state_kv_queued_lat);
       txc->state = TransContext::STATE_KV_DONE;
       _txc_finish_kv(txc);
       // ** fall-thru **
 
     case TransContext::STATE_KV_DONE:
+      txc->log_state_latency(logger, l_bluestore_state_kv_done_lat);
       if (txc->wal_txn) {
 	txc->state = TransContext::STATE_WAL_QUEUED;
 	if (g_conf->bluestore_sync_wal_apply) {
@@ -3319,6 +3586,7 @@ void BlueStore::_txc_state_proc(TransContext *txc)
       break;
 
     case TransContext::STATE_WAL_APPLYING:
+      txc->log_state_latency(logger, l_bluestore_state_wal_applying_lat);
       if (txc->ioc.has_aios()) {
 	txc->state = TransContext::STATE_WAL_AIO_WAIT;
 	_txc_aio_submit(txc);
@@ -3327,14 +3595,17 @@ void BlueStore::_txc_state_proc(TransContext *txc)
       // ** fall-thru **
 
     case TransContext::STATE_WAL_AIO_WAIT:
+      txc->log_state_latency(logger, l_bluestore_state_wal_aio_wait_lat);
       _wal_finish(txc);
       return;
 
     case TransContext::STATE_WAL_CLEANUP:
+      txc->log_state_latency(logger, l_bluestore_state_wal_cleanup_lat);
       txc->state = TransContext::STATE_FINISHING;
       // ** fall-thru **
 
     case TransContext::TransContext::STATE_FINISHING:
+      txc->log_state_latency(logger, l_bluestore_state_finishing_lat);
       _txc_finish(txc);
       return;
 
@@ -3357,7 +3628,7 @@ void BlueStore::_txc_finish_io(TransContext *txc)
    */
 
   OpSequencer *osr = txc->osr.get();
-  Mutex::Locker l(osr->qlock);
+  std::lock_guard<std::mutex> l(osr->qlock);
   txc->state = TransContext::STATE_IO_DONE;
 
   OpSequencer::q_list_t::iterator p = osr->q.iterator_to(*txc);
@@ -3390,10 +3661,10 @@ int BlueStore::_txc_finalize(OpSequencer *osr, TransContext *txc)
        ++p) {
     bufferlist bl;
     ::encode((*p)->onode, bl);
-    dout(20) << " onode " << (*p)->oid << " is " << bl.length() << dendl;
+    dout(20) << "  onode " << (*p)->oid << " is " << bl.length() << dendl;
     txc->t->set(PREFIX_OBJ, (*p)->key, bl);
 
-    Mutex::Locker l((*p)->flush_lock);
+    std::lock_guard<std::mutex> l((*p)->flush_lock);
     (*p)->flush_txns.insert(txc);
   }
 
@@ -3461,13 +3732,13 @@ void BlueStore::_txc_finish(TransContext *txc)
   for (set<OnodeRef>::iterator p = txc->onodes.begin();
        p != txc->onodes.end();
        ++p) {
-    Mutex::Locker l((*p)->flush_lock);
+    std::lock_guard<std::mutex> l((*p)->flush_lock);
     dout(20) << __func__ << " onode " << *p << " had " << (*p)->flush_txns
 	     << dendl;
     assert((*p)->flush_txns.count(txc));
     (*p)->flush_txns.erase(txc);
     if ((*p)->flush_txns.empty())
-      (*p)->flush_cond.Signal();
+      (*p)->flush_cond.notify_all();
   }
 
   // clear out refs
@@ -3482,16 +3753,17 @@ void BlueStore::_txc_finish(TransContext *txc)
   throttle_wal_bytes.put(txc->bytes);
 
   OpSequencerRef osr = txc->osr;
-  osr->qlock.Lock();
-  txc->state = TransContext::STATE_DONE;
-  osr->qlock.Unlock();
+  {
+    std::lock_guard<std::mutex> l(osr->qlock);
+    txc->state = TransContext::STATE_DONE;
+  }
 
   _osr_reap_done(osr.get());
 }
 
 void BlueStore::_osr_reap_done(OpSequencer *osr)
 {
-  Mutex::Locker l(osr->qlock);
+  std::lock_guard<std::mutex> l(osr->qlock);
   dout(20) << __func__ << " osr " << osr << dendl;
   while (!osr->q.empty()) {
     TransContext *txc = &osr->q.front();
@@ -3506,8 +3778,9 @@ void BlueStore::_osr_reap_done(OpSequencer *osr)
     }
 
     osr->q.pop_front();
+    txc->log_state_latency(logger, l_bluestore_state_done_lat);
     delete txc;
-    osr->qcond.Signal();
+    osr->qcond.notify_all();
     if (osr->q.empty())
       dout(20) << __func__ << " osr " << osr << " q now empty" << dendl;
   }
@@ -3516,7 +3789,7 @@ void BlueStore::_osr_reap_done(OpSequencer *osr)
 void BlueStore::_kv_sync_thread()
 {
   dout(10) << __func__ << " start" << dendl;
-  kv_lock.Lock();
+  std::unique_lock<std::mutex> l(kv_lock);
   while (true) {
     assert(kv_committing.empty());
     assert(wal_cleaning.empty());
@@ -3524,8 +3797,8 @@ void BlueStore::_kv_sync_thread()
       if (kv_stop)
 	break;
       dout(20) << __func__ << " sleep" << dendl;
-      kv_sync_cond.Signal();
-      kv_cond.Wait(kv_lock);
+      kv_sync_cond.notify_all();
+      kv_cond.wait(l);
       dout(20) << __func__ << " wake" << dendl;
     } else {
       dout(20) << __func__ << " committing " << kv_queue.size()
@@ -3533,7 +3806,7 @@ void BlueStore::_kv_sync_thread()
       kv_committing.swap(kv_queue);
       wal_cleaning.swap(wal_cleanup_queue);
       utime_t start = ceph_clock_now(NULL);
-      kv_lock.Unlock();
+      l.unlock();
 
       dout(30) << __func__ << " committing txc " << kv_committing << dendl;
       dout(30) << __func__ << " wal_cleaning txc " << wal_cleaning << dendl;
@@ -3682,10 +3955,9 @@ void BlueStore::_kv_sync_thread()
 	}
       }
 
-      kv_lock.Lock();
+      l.lock();
     }
   }
-  kv_lock.Unlock();
   dout(10) << __func__ << " finish" << dendl;
 }
 
@@ -3703,6 +3975,7 @@ int BlueStore::_wal_apply(TransContext *txc)
 {
   bluestore_wal_transaction_t& wt = *txc->wal_txn;
   dout(20) << __func__ << " txc " << txc << " seq " << wt.seq << dendl;
+  txc->log_state_latency(logger, l_bluestore_state_wal_queued_lat);
   txc->state = TransContext::STATE_WAL_APPLYING;
 
   assert(txc->ioc.pending_aios.empty());
@@ -3723,10 +3996,10 @@ int BlueStore::_wal_finish(TransContext *txc)
   bluestore_wal_transaction_t& wt = *txc->wal_txn;
   dout(20) << __func__ << " txc " << " seq " << wt.seq << txc << dendl;
 
-  Mutex::Locker l(kv_lock);
+  std::lock_guard<std::mutex> l(kv_lock);
   txc->state = TransContext::STATE_WAL_CLEANUP;
   wal_cleanup_queue.push_back(txc);
-  kv_cond.SignalOne();
+  kv_cond.notify_one();
   return 0;
 }
 
@@ -3734,6 +4007,7 @@ int BlueStore::_do_wal_op(bluestore_wal_op_t& wo, IOContext *ioc)
 {
   const uint64_t block_size = bdev->get_block_size();
   const uint64_t block_mask = ~(block_size - 1);
+  int r = 0;
 
   // read all the overlay data first for apply
   _do_read_all_overlays(wo);
@@ -3760,7 +4034,8 @@ int BlueStore::_do_wal_op(bluestore_wal_op_t& wo, IOContext *ioc)
       offset = offset & block_mask;
       dout(20) << __func__ << "  reading initial partial block "
 	       << src_offset << "~" << block_size << dendl;
-      bdev->read(src_offset, block_size, &first, ioc, true);
+      r = bdev->read(src_offset, block_size, &first, ioc, true);
+      assert(r == 0);
       bufferlist t;
       t.substr_of(first, 0, first_len);
       t.claim_append(bl);
@@ -3778,7 +4053,8 @@ int BlueStore::_do_wal_op(bluestore_wal_op_t& wo, IOContext *ioc)
       } else {
 	dout(20) << __func__ << "  reading trailing partial block "
 		 << last_offset << "~" << block_size << dendl;
-	bdev->read(last_offset, block_size, &last, ioc, true);
+	r = bdev->read(last_offset, block_size, &last, ioc, true);
+        assert(r == 0);
       }
       bufferlist t;
       uint64_t endoff = wo.extent.end() & ~block_mask;
@@ -3786,7 +4062,8 @@ int BlueStore::_do_wal_op(bluestore_wal_op_t& wo, IOContext *ioc)
       bl.claim_append(t);
     }
     assert((bl.length() & ~block_mask) == 0);
-    bdev->aio_write(offset, bl, ioc, true);
+    r = bdev->aio_write(offset, bl, ioc, true);
+    assert(r == 0);
   }
   break;
 
@@ -3799,11 +4076,12 @@ int BlueStore::_do_wal_op(bluestore_wal_op_t& wo, IOContext *ioc)
     assert(wo.extent.length == wo.src_extent.length);
     assert((wo.src_extent.offset & ~block_mask) == 0);
     bufferlist bl;
-    int r = bdev->read(wo.src_extent.offset, wo.src_extent.length, &bl, ioc,
+    r = bdev->read(wo.src_extent.offset, wo.src_extent.length, &bl, ioc,
 		       true);
-    assert(r >= 0);
+    assert(r == 0);
     assert(bl.length() == wo.extent.length);
-    bdev->aio_write(wo.extent.offset, bl, ioc, true);
+    r = bdev->aio_write(wo.extent.offset, bl, ioc, true);
+    assert(r == 0);
   }
   break;
 
@@ -3818,10 +4096,12 @@ int BlueStore::_do_wal_op(bluestore_wal_op_t& wo, IOContext *ioc)
       uint64_t first_offset = offset & block_mask;
       dout(20) << __func__ << "  reading initial partial block "
 	       << first_offset << "~" << block_size << dendl;
-      bdev->read(first_offset, block_size, &first, ioc, true);
+      r = bdev->read(first_offset, block_size, &first, ioc, true);
+      assert(r == 0);
       size_t z_len = MIN(block_size - first_len, length);
       memset(first.c_str() + first_len, 0, z_len);
-      bdev->aio_write(first_offset, first, ioc, true);
+      r = bdev->aio_write(first_offset, first, ioc, true);
+      assert(r == 0);
       offset += block_size - first_len;
       length -= z_len;
     }
@@ -3829,7 +4109,8 @@ int BlueStore::_do_wal_op(bluestore_wal_op_t& wo, IOContext *ioc)
     if (length >= block_size) {
       uint64_t middle_len = length & block_mask;
       dout(20) << __func__ << "  zero " << offset << "~" << length << dendl;
-      bdev->aio_zero(offset, middle_len, ioc);
+      r = bdev->aio_zero(offset, middle_len, ioc);
+      assert(r == 0);
       offset += middle_len;
       length -= middle_len;
     }
@@ -3839,9 +4120,11 @@ int BlueStore::_do_wal_op(bluestore_wal_op_t& wo, IOContext *ioc)
       bufferlist last;
       dout(20) << __func__ << "  reading trailing partial block "
 	       << offset << "~" << block_size << dendl;
-      bdev->read(offset, block_size, &last, ioc, true);
+      r = bdev->read(offset, block_size, &last, ioc, true);
+      assert(r == 0);
       memset(last.c_str(), 0, length);
-      bdev->aio_write(offset, last, ioc, true);
+      r = bdev->aio_write(offset, last, ioc, true);
+      assert(r == 0);
     }
   }
   break;
@@ -3862,17 +4145,19 @@ int BlueStore::_wal_replay()
   for (it->lower_bound(string()); it->valid(); it->next(), ++count) {
     dout(20) << __func__ << " replay " << pretty_binary_string(it->key())
 	     << dendl;
-    TransContext *txc = _txc_create(osr.get());
-    txc->wal_txn = new bluestore_wal_transaction_t;
+    bluestore_wal_transaction_t *wal_txn = new bluestore_wal_transaction_t;
     bufferlist bl = it->value();
     bufferlist::iterator p = bl.begin();
     try {
-      ::decode(*txc->wal_txn, p);
+      ::decode(*wal_txn, p);
     } catch (buffer::error& e) {
       derr << __func__ << " failed to decode wal txn "
 	   << pretty_binary_string(it->key()) << dendl;
+      delete wal_txn;
       return -EIO;
     }
+    TransContext *txc = _txc_create(osr.get());
+    txc->wal_txn = wal_txn;
     txc->state = TransContext::STATE_KV_DONE;
     _txc_state_proc(txc);
   }
@@ -3887,7 +4172,7 @@ int BlueStore::_wal_replay()
 
 int BlueStore::queue_transactions(
     Sequencer *posr,
-    list<Transaction*>& tls,
+    vector<Transaction>& tls,
     TrackedOpRef op,
     ThreadPool::TPHandle *handle)
 {
@@ -3917,11 +4202,11 @@ int BlueStore::queue_transactions(
   txc->onreadable_sync = onreadable_sync;
   txc->oncommit = ondisk;
 
-  for (list<Transaction*>::iterator p = tls.begin(); p != tls.end(); ++p) {
-    (*p)->set_osr(osr);
-    txc->ops += (*p)->get_num_ops();
-    txc->bytes += (*p)->get_num_bytes();
-    _txc_add_transaction(txc, *p);
+  for (vector<Transaction>::iterator p = tls.begin(); p != tls.end(); ++p) {
+    (*p).set_osr(osr);
+    txc->ops += (*p).get_num_ops();
+    txc->bytes += (*p).get_num_bytes();
+    _txc_add_transaction(txc, &(*p));
   }
 
   r = _txc_finalize(osr, txc);
@@ -3946,7 +4231,14 @@ void BlueStore::_txc_aio_submit(TransContext *txc)
 int BlueStore::_txc_add_transaction(TransContext *txc, Transaction *t)
 {
   Transaction::iterator i = t->begin();
-  int pos = 0;
+
+  dout(30) << __func__ << " transaction dump:\n";
+  JSONFormatter f(true);
+  f.open_object_section("transaction");
+  t->dump(&f);
+  f.close_section();
+  f.flush(*_dout);
+  *_dout << dendl;
 
   vector<CollectionRef> cvec(i.colls.size());
   unsigned j = 0;
@@ -3958,40 +4250,144 @@ int BlueStore::_txc_add_transaction(TransContext *txc, Transaction *t)
     if (!j && !txc->first_collection)
       txc->first_collection = cvec[j];
   }
+  vector<OnodeRef> ovec(i.objects.size());
 
-  while (i.have_op()) {
+  for (int pos = 0; i.have_op(); ++pos) {
     Transaction::Op *op = i.decode_op();
     int r = 0;
-    CollectionRef &c = cvec[op->cid];
 
+    // no coll or obj
+    if (op->op == Transaction::OP_NOP)
+      continue;
+
+    // collection operations
+    CollectionRef &c = cvec[op->cid];
     switch (op->op) {
-    case Transaction::OP_NOP:
+    case Transaction::OP_RMCOLL:
+      {
+        coll_t cid = i.get_cid(op->cid);
+	r = _remove_collection(txc, cid, &c);
+	if (!r)
+	  continue;
+      }
       break;
-    case Transaction::OP_TOUCH:
+
+    case Transaction::OP_MKCOLL:
       {
-        const ghobject_t &oid = i.get_oid(op->oid);
-	r = _touch(txc, c, oid);
+	assert(!c);
+	coll_t cid = i.get_cid(op->cid);
+	r = _create_collection(txc, cid, op->split_bits, &c);
+	if (!r)
+	  continue;
+      }
+      break;
+
+    case Transaction::OP_SPLIT_COLLECTION:
+      assert(0 == "deprecated");
+      break;
+
+    case Transaction::OP_SPLIT_COLLECTION2:
+      {
+        uint32_t bits = op->split_bits;
+        uint32_t rem = op->split_rem;
+	r = _split_collection(txc, c, cvec[op->dest_cid], bits, rem);
+	if (!r)
+	  continue;
+      }
+      break;
+
+    case Transaction::OP_COLL_HINT:
+      {
+        uint32_t type = op->hint_type;
+        bufferlist hint;
+        i.decode_bl(hint);
+        bufferlist::iterator hiter = hint.begin();
+        if (type == Transaction::COLL_HINT_EXPECTED_NUM_OBJECTS) {
+          uint32_t pg_num;
+          uint64_t num_objs;
+          ::decode(pg_num, hiter);
+          ::decode(num_objs, hiter);
+          dout(10) << __func__ << " collection hint objects is a no-op, "
+		   << " pg_num " << pg_num << " num_objects " << num_objs
+		   << dendl;
+        } else {
+          // Ignore the hint
+          dout(10) << __func__ << " unknown collection hint " << type << dendl;
+        }
+	continue;
       }
       break;
 
+    case Transaction::OP_COLL_SETATTR:
+      r = -EOPNOTSUPP;
+      break;
+
+    case Transaction::OP_COLL_RMATTR:
+      r = -EOPNOTSUPP;
+      break;
+
+    case Transaction::OP_COLL_RENAME:
+      assert(0 == "not implemented");
+      break;
+    }
+    if (r < 0) {
+      dout(0) << " error " << cpp_strerror(r)
+	      << " not handled on operation " << op->op
+	      << " (op " << pos << ", counting from 0)" << dendl;
+      dout(0) << " transaction dump:\n";
+      JSONFormatter f(true);
+      f.open_object_section("transaction");
+      t->dump(&f);
+      f.close_section();
+      f.flush(*_dout);
+      *_dout << dendl;
+      assert(0 == "unexpected error");
+    }
+
+    // object operations
+    RWLock::WLocker l(c->lock);
+    OnodeRef &o = ovec[op->oid];
+    if (!o) {
+      // these operations implicity create the object
+      bool create = false;
+      if (op->op == Transaction::OP_TOUCH ||
+	  op->op == Transaction::OP_WRITE ||
+	  op->op == Transaction::OP_ZERO) {
+	create = true;
+      }
+      ghobject_t oid = i.get_oid(op->oid);
+      o = c->get_onode(oid, create);
+      if (!create) {
+	if (!o || !o->exists) {
+	  dout(10) << __func__ << " op " << op->op << " got ENOENT on "
+		   << oid << dendl;
+	  r = -ENOENT;
+	  goto endop;
+	}
+      }
+    }
+
+    switch (op->op) {
+    case Transaction::OP_TOUCH:
+      r = _touch(txc, c, o);
+      break;
+
     case Transaction::OP_WRITE:
       {
-        const ghobject_t &oid = i.get_oid(op->oid);
         uint64_t off = op->off;
         uint64_t len = op->len;
 	uint32_t fadvise_flags = i.get_fadvise_flags();
         bufferlist bl;
         i.decode_bl(bl);
-	r = _write(txc, c, oid, off, len, bl, fadvise_flags);
+	r = _write(txc, c, o, off, len, bl, fadvise_flags);
       }
       break;
 
     case Transaction::OP_ZERO:
       {
-        const ghobject_t &oid = i.get_oid(op->oid);
         uint64_t off = op->off;
         uint64_t len = op->len;
-	r = _zero(txc, c, oid, off, len);
+	r = _zero(txc, c, o, off, len);
       }
       break;
 
@@ -4003,60 +4399,54 @@ int BlueStore::_txc_add_transaction(TransContext *txc, Transaction *t)
 
     case Transaction::OP_TRUNCATE:
       {
-        const ghobject_t& oid = i.get_oid(op->oid);
         uint64_t off = op->off;
-	r = _truncate(txc, c, oid, off);
+	r = _truncate(txc, c, o, off);
       }
       break;
 
     case Transaction::OP_REMOVE:
       {
-        const ghobject_t& oid = i.get_oid(op->oid);
-	r = _remove(txc, c, oid);
+	r = _remove(txc, c, o);
       }
       break;
 
     case Transaction::OP_SETATTR:
       {
-        const ghobject_t &oid = i.get_oid(op->oid);
         string name = i.decode_string();
         bufferlist bl;
         i.decode_bl(bl);
 	map<string, bufferptr> to_set;
 	to_set[name] = bufferptr(bl.c_str(), bl.length());
-	r = _setattrs(txc, c, oid, to_set);
+	r = _setattrs(txc, c, o, to_set);
       }
       break;
 
     case Transaction::OP_SETATTRS:
       {
-        const ghobject_t& oid = i.get_oid(op->oid);
         map<string, bufferptr> aset;
         i.decode_attrset(aset);
-	r = _setattrs(txc, c, oid, aset);
+	r = _setattrs(txc, c, o, aset);
       }
       break;
 
     case Transaction::OP_RMATTR:
       {
-        const ghobject_t &oid = i.get_oid(op->oid);
 	string name = i.decode_string();
-	r = _rmattr(txc, c, oid, name);
+	r = _rmattr(txc, c, o, name);
       }
       break;
 
     case Transaction::OP_RMATTRS:
       {
-        const ghobject_t &oid = i.get_oid(op->oid);
-	r = _rmattrs(txc, c, oid);
+	r = _rmattrs(txc, c, o);
       }
       break;
 
     case Transaction::OP_CLONE:
       {
-        const ghobject_t& oid = i.get_oid(op->oid);
         const ghobject_t& noid = i.get_oid(op->dest_oid);
-	r = _clone(txc, c, oid, noid);
+	OnodeRef no = c->get_onode(noid, true);
+	r = _clone(txc, c, o, no);
       }
       break;
 
@@ -4066,57 +4456,21 @@ int BlueStore::_txc_add_transaction(TransContext *txc, Transaction *t)
 
     case Transaction::OP_CLONERANGE2:
       {
-        const ghobject_t &oid = i.get_oid(op->oid);
-        const ghobject_t &noid = i.get_oid(op->dest_oid);
+	const ghobject_t& noid = i.get_oid(op->dest_oid);
+	OnodeRef no = c->get_onode(noid, true);
         uint64_t srcoff = op->off;
         uint64_t len = op->len;
         uint64_t dstoff = op->dest_off;
-	r = _clone_range(txc, c, oid, noid, srcoff, len, dstoff);
-      }
-      break;
-
-    case Transaction::OP_MKCOLL:
-      {
-	assert(!c);
-        coll_t cid = i.get_cid(op->cid);
-	r = _create_collection(txc, cid, op->split_bits, &c);
-      }
-      break;
-
-    case Transaction::OP_COLL_HINT:
-      {
-        uint32_t type = op->hint_type;
-        bufferlist hint;
-        i.decode_bl(hint);
-        bufferlist::iterator hiter = hint.begin();
-        if (type == Transaction::COLL_HINT_EXPECTED_NUM_OBJECTS) {
-          uint32_t pg_num;
-          uint64_t num_objs;
-          ::decode(pg_num, hiter);
-          ::decode(num_objs, hiter);
-          dout(10) << __func__ << " collection hint objects is a no-op, "
-		   << " pg_num " << pg_num << " num_objects " << num_objs
-		   << dendl;
-        } else {
-          // Ignore the hint
-          dout(10) << __func__ << " unknown collection hint " << type << dendl;
-        }
-      }
-      break;
-
-    case Transaction::OP_RMCOLL:
-      {
-        coll_t cid = i.get_cid(op->cid);
-	r = _remove_collection(txc, cid, &c);
+	r = _clone_range(txc, c, o, no, srcoff, len, dstoff);
       }
       break;
 
     case Transaction::OP_COLL_ADD:
-      assert(0 == "not implmeented");
+      assert(0 == "not implemented");
       break;
 
     case Transaction::OP_COLL_REMOVE:
-      assert(0 == "not implmeented");
+      assert(0 == "not implemented");
       break;
 
     case Transaction::OP_COLL_MOVE:
@@ -4126,80 +4480,53 @@ int BlueStore::_txc_add_transaction(TransContext *txc, Transaction *t)
     case Transaction::OP_COLL_MOVE_RENAME:
       {
 	assert(op->cid == op->dest_cid);
-        ghobject_t oldoid = i.get_oid(op->oid);
-        ghobject_t newoid = i.get_oid(op->dest_oid);
-	r = _rename(txc, c, oldoid, newoid);
+	const ghobject_t& noid = i.get_oid(op->dest_oid);
+	OnodeRef no = c->get_onode(noid, true);
+	r = _rename(txc, c, o, no, noid);
+	o.reset();
       }
       break;
 
-    case Transaction::OP_COLL_SETATTR:
-      r = -EOPNOTSUPP;
-      break;
-
-    case Transaction::OP_COLL_RMATTR:
-      r = -EOPNOTSUPP;
-      break;
-
-    case Transaction::OP_COLL_RENAME:
-      assert(0 == "not implmeneted");
-      break;
-
     case Transaction::OP_OMAP_CLEAR:
       {
-        ghobject_t oid = i.get_oid(op->oid);
-	r = _omap_clear(txc, c, oid);
+	r = _omap_clear(txc, c, o);
       }
       break;
     case Transaction::OP_OMAP_SETKEYS:
       {
-        ghobject_t oid = i.get_oid(op->oid);
 	bufferlist aset_bl;
         i.decode_attrset_bl(&aset_bl);
-	r = _omap_setkeys(txc, c, oid, aset_bl);
+	r = _omap_setkeys(txc, c, o, aset_bl);
       }
       break;
     case Transaction::OP_OMAP_RMKEYS:
       {
-        ghobject_t oid = i.get_oid(op->oid);
 	bufferlist keys_bl;
         i.decode_keyset_bl(&keys_bl);
-	r = _omap_rmkeys(txc, c, oid, keys_bl);
+	r = _omap_rmkeys(txc, c, o, keys_bl);
       }
       break;
     case Transaction::OP_OMAP_RMKEYRANGE:
       {
-        ghobject_t oid = i.get_oid(op->oid);
         string first, last;
         first = i.decode_string();
         last = i.decode_string();
-	r = _omap_rmkey_range(txc, c, oid, first, last);
+	r = _omap_rmkey_range(txc, c, o, first, last);
       }
       break;
     case Transaction::OP_OMAP_SETHEADER:
       {
-        ghobject_t oid = i.get_oid(op->oid);
         bufferlist bl;
         i.decode_bl(bl);
-	r = _omap_setheader(txc, c, oid, bl);
-      }
-      break;
-    case Transaction::OP_SPLIT_COLLECTION:
-      assert(0 == "deprecated");
-      break;
-    case Transaction::OP_SPLIT_COLLECTION2:
-      {
-        uint32_t bits = op->split_bits;
-        uint32_t rem = op->split_rem;
-	r = _split_collection(txc, c, cvec[op->dest_cid], bits, rem);
+	r = _omap_setheader(txc, c, o, bl);
       }
       break;
 
     case Transaction::OP_SETALLOCHINT:
       {
-        ghobject_t oid = i.get_oid(op->oid);
         uint64_t expected_object_size = op->expected_object_size;
         uint64_t expected_write_size = op->expected_write_size;
-	r = _setallochint(txc, c, oid,
+	r = _setallochint(txc, c, o,
 			  expected_object_size,
 			  expected_write_size);
       }
@@ -4210,6 +4537,7 @@ int BlueStore::_txc_add_transaction(TransContext *txc, Transaction *t)
       assert(0);
     }
 
+  endop:
     if (r < 0) {
       bool ok = false;
 
@@ -4252,8 +4580,6 @@ int BlueStore::_txc_add_transaction(TransContext *txc, Transaction *t)
 	assert(0 == "unexpected error");
       }
     }
-
-    ++pos;
   }
 
   return 0;
@@ -4265,18 +4591,15 @@ int BlueStore::_txc_add_transaction(TransContext *txc, Transaction *t)
 // write operations
 
 int BlueStore::_touch(TransContext *txc,
-		     CollectionRef& c,
-		     const ghobject_t& oid)
+		      CollectionRef& c,
+		      OnodeRef &o)
 {
-  dout(15) << __func__ << " " << c->cid << " " << oid << dendl;
+  dout(15) << __func__ << " " << c->cid << " " << o->oid << dendl;
   int r = 0;
-  RWLock::WLocker l(c->lock);
-  OnodeRef o = c->get_onode(oid, true);
-  assert(o);
   o->exists = true;
   _assign_nid(txc, o);
   txc->write_onode(o);
-  dout(10) << __func__ << " " << c->cid << " " << oid << " = " << r << dendl;
+  dout(10) << __func__ << " " << c->cid << " " << o->oid << " = " << r << dendl;
   return r;
 }
 
@@ -4484,7 +4807,8 @@ void BlueStore::_do_read_all_overlays(bluestore_wal_op_t& wo)
     string key;
     get_overlay_key(wo.nid, q->key, &key);
     bufferlist bl, bl_data;
-    db->get(PREFIX_OVERLAY, key, &bl);
+    int r = db->get(PREFIX_OVERLAY, key, &bl);
+    assert(r >= 0); 
     bl_data.substr_of(bl, q->value_offset, q->length);
     wo.data.claim_append(bl_data);
   }
@@ -4637,9 +4961,17 @@ void BlueStore::_pad_zeros_tail(
   uint64_t end = offset + *length;
   unsigned back_copy = end % block_size;
   assert(back_copy);  // or we wouldn't have been called
-  uint64_t back_pad = block_size - back_copy;
-  assert(back_copy <= *length);
-  bufferptr tail(block_size);
+  uint64_t tail_len;
+  if (back_copy <= *length) {
+    // we start at or before the block boundary
+    tail_len = block_size;
+  } else {
+    // we start partway into the tail block
+    back_copy = *length;
+    tail_len = block_size - (offset % block_size);
+  }
+  uint64_t back_pad = tail_len - back_copy;
+  bufferptr tail(tail_len);
   memcpy(tail.c_str(), bl->get_contiguous(*length - back_copy, back_copy),
 	 back_copy);
   memset(tail.c_str() + back_copy, 0, back_pad);
@@ -4846,7 +5178,6 @@ int BlueStore::_do_allocate(
     }
 
     // deallocate existing extents
-    EnodeRef enode;
     bp = o->onode.seek_extent(offset);
     while (bp != o->onode.block_map.end() &&
 	   bp->first < offset + length &&
@@ -4857,7 +5188,7 @@ int BlueStore::_do_allocate(
 	if (bp->first + bp->second.length <= offset + length) {
 	  dout(20) << "  trim tail " << bp->first << ": " << bp->second << dendl;
 	  _txc_release(
-	    txc, c, enode, o->oid.hobj.get_hash(),
+	    txc, c, o,
 	    bp->second.offset + left,
 	    bp->second.length - left,
 	    bp->second.has_flag(bluestore_extent_t::FLAG_SHARED));
@@ -4868,7 +5199,7 @@ int BlueStore::_do_allocate(
 	} else {
 	  dout(20) << "      split " << bp->first << ": " << bp->second << dendl;
 	  _txc_release(
-	    txc, c, enode, o->oid.hobj.get_hash(),
+	    txc, c, o,
 	    bp->second.offset + left, length,
 	    bp->second.has_flag(bluestore_extent_t::FLAG_SHARED));
 	  o->onode.block_map[offset + length] =
@@ -4890,7 +5221,7 @@ int BlueStore::_do_allocate(
 	  dout(20) << "  trim head " << bp->first << ": " << bp->second
 		   << " (overlap " << overlap << ")" << dendl;
 	  _txc_release(
-	    txc, c, enode, o->oid.hobj.get_hash(),
+	    txc, c, o,
 	    bp->second.offset, overlap,
 	    bp->second.has_flag(bluestore_extent_t::FLAG_SHARED));
 	  o->onode.block_map[bp->first + overlap] =
@@ -4905,7 +5236,7 @@ int BlueStore::_do_allocate(
 	} else {
 	  dout(20) << "    dealloc " << bp->first << ": " << bp->second << dendl;
 	  _txc_release(
-	    txc, c, enode, o->oid.hobj.get_hash(),
+	    txc, c, o,
 	    bp->second.offset, bp->second.length,
 	    bp->second.has_flag(bluestore_extent_t::FLAG_SHARED));
 	  hint = bp->first + bp->second.length;
@@ -5276,22 +5607,20 @@ int BlueStore::_do_write(
 }
 
 int BlueStore::_write(TransContext *txc,
-		     CollectionRef& c,
-		     const ghobject_t& oid,
+		      CollectionRef& c,
+		      OnodeRef& o,
 		     uint64_t offset, size_t length,
 		     bufferlist& bl,
 		     uint32_t fadvise_flags)
 {
-  dout(15) << __func__ << " " << c->cid << " " << oid
+  dout(15) << __func__ << " " << c->cid << " " << o->oid
 	   << " " << offset << "~" << length
 	   << dendl;
-  RWLock::WLocker l(c->lock);
-  OnodeRef o = c->get_onode(oid, true);
   _assign_nid(txc, o);
   int r = _do_write(txc, c, o, offset, length, bl, fadvise_flags);
   txc->write_onode(o);
 
-  dout(10) << __func__ << " " << c->cid << " " << oid
+  dout(10) << __func__ << " " << c->cid << " " << o->oid
 	   << " " << offset << "~" << length
 	   << " = " << r << dendl;
   return r;
@@ -5304,26 +5633,22 @@ int BlueStore::_do_write_zero(
   uint64_t offset,
   uint64_t length)
 {
-  bufferptr z(length);
-  z.zero();
   bufferlist zl;
-  zl.push_back(z);
+  zl.append_zero(length);
   return _do_write(txc, c, o, offset, length, zl, 0);
 }
 
 int BlueStore::_zero(TransContext *txc,
-		    CollectionRef& c,
-		    const ghobject_t& oid,
-		    uint64_t offset, size_t length)
+		     CollectionRef& c,
+		     OnodeRef& o,
+		     uint64_t offset, size_t length)
 {
-  dout(15) << __func__ << " " << c->cid << " " << oid
+  dout(15) << __func__ << " " << c->cid << " " << o->oid
 	   << " " << offset << "~" << length
 	   << dendl;
   int r = 0;
+  o->exists = true;
 
-  RWLock::WLocker l(c->lock);
-  EnodeRef enode;
-  OnodeRef o = c->get_onode(oid, true);
   _dump_onode(o);
   _assign_nid(txc, o);
 
@@ -5361,7 +5686,7 @@ int BlueStore::_zero(TransContext *txc,
       dout(20) << __func__ << " dealloc " << bp->first << ": "
 	       << bp->second << dendl;
       _txc_release(
-	txc, c, enode, oid.hobj.get_hash(),
+	txc, c, o,
 	bp->second.offset, bp->second.length,
 	bp->second.has_flag(bluestore_extent_t::FLAG_SHARED));
       o->onode.block_map.erase(bp++);
@@ -5375,7 +5700,6 @@ int BlueStore::_zero(TransContext *txc,
     }
     uint64_t x_len = MIN(offset + length - bp->first,
 			 bp->second.length) - x_off;
-
     if (bp->second.has_flag(bluestore_extent_t::FLAG_SHARED)) {
       uint64_t end = bp->first + x_off + x_len;
       _do_write_zero(txc, c, o, bp->first + x_off, x_len);
@@ -5384,6 +5708,11 @@ int BlueStore::_zero(TransContext *txc,
       bp = o->onode.seek_extent(end - 1);
     } else {
       // WAL
+      uint64_t end = bp->first + x_off + x_len;
+      if (end >= o->onode.size && end % block_size) {
+	dout(20) << __func__ << " past eof, padding out tail block" << dendl;
+	x_len += block_size - (end % block_size);
+      }
       bluestore_wal_op_t *op = _get_wal_op(txc, o);
       op->op = bluestore_wal_op_t::OP_ZERO;
       op->extent.offset = bp->second.offset + x_off;
@@ -5391,7 +5720,7 @@ int BlueStore::_zero(TransContext *txc,
       dout(20) << __func__ << "  wal zero " << x_off << "~" << x_len
 	       << " " << op->extent << dendl;
     }
-    bp++;
+    ++bp;
   }
 
   if (offset + length > o->onode.size) {
@@ -5401,7 +5730,7 @@ int BlueStore::_zero(TransContext *txc,
   }
   txc->write_onode(o);
 
-  dout(10) << __func__ << " " << c->cid << " " << oid
+  dout(10) << __func__ << " " << c->cid << " " << o->oid
 	   << " " << offset << "~" << length
 	   << " = " << r << dendl;
   return r;
@@ -5413,7 +5742,6 @@ int BlueStore::_do_truncate(
   uint64_t block_size = bdev->get_block_size();
   uint64_t min_alloc_size = g_conf->bluestore_min_alloc_size;
   uint64_t alloc_end = ROUND_UP_TO(offset, min_alloc_size);
-  EnodeRef enode;
 
   // ensure any wal IO has completed before we truncate off any extents
   // they may touch.
@@ -5439,7 +5767,7 @@ int BlueStore::_do_truncate(
       dout(20) << __func__ << " dealloc " << bp->first << ": "
 	       << bp->second << dendl;
       _txc_release(
-	txc, c, enode, o->oid.hobj.get_hash(),
+	txc, c, o,
 	bp->second.offset, bp->second.length,
 	bp->second.has_flag(bluestore_extent_t::FLAG_SHARED));
       if (bp != o->onode.block_map.begin()) {
@@ -5457,7 +5785,7 @@ int BlueStore::_do_truncate(
       dout(20) << __func__ << " trunc " << bp->first << ": " << bp->second
 	       << " to " << newlen << dendl;
       _txc_release(
-	txc, c, enode, o->oid.hobj.get_hash(),
+	txc, c, o,
 	bp->second.offset + newlen, bp->second.length - newlen,
 	bp->second.has_flag(bluestore_extent_t::FLAG_SHARED));
       bp->second.length = newlen;
@@ -5557,25 +5885,15 @@ int BlueStore::_do_truncate(
 }
 
 int BlueStore::_truncate(TransContext *txc,
-			CollectionRef& c,
-			const ghobject_t& oid,
-			uint64_t offset)
+			 CollectionRef& c,
+			 OnodeRef& o,
+			 uint64_t offset)
 {
-  dout(15) << __func__ << " " << c->cid << " " << oid
+  dout(15) << __func__ << " " << c->cid << " " << o->oid
 	   << " " << offset
 	   << dendl;
-  int r = 0;
-
-  RWLock::WLocker l(c->lock);
-  OnodeRef o = c->get_onode(oid, false);
-  if (!o || !o->exists) {
-    r = -ENOENT;
-    goto out;
-  }
-  r = _do_truncate(txc, c, o, offset);
-
- out:
-  dout(10) << __func__ << " " << c->cid << " " << oid
+  int r = _do_truncate(txc, c, o, offset);
+  dout(10) << __func__ << " " << c->cid << " " << o->oid
 	   << " " << offset
 	   << " = " << r << dendl;
   return r;
@@ -5598,68 +5916,42 @@ int BlueStore::_do_remove(
 }
 
 int BlueStore::_remove(TransContext *txc,
-		      CollectionRef& c,
-		      const ghobject_t& oid)
+		       CollectionRef& c,
+		       OnodeRef &o)
 {
-  dout(15) << __func__ << " " << c->cid << " " << oid << dendl;
-  int r;
-  RWLock::WLocker l(c->lock);
-  OnodeRef o = c->get_onode(oid, false);
-  if (!o || !o->exists) {
-    r = -ENOENT;
-    goto out;
-  }
-  r = _do_remove(txc, c, o);
-
- out:
-  dout(10) << __func__ << " " << c->cid << " " << oid << " = " << r << dendl;
+  dout(15) << __func__ << " " << c->cid << " " << o->oid << dendl;
+  int r = _do_remove(txc, c, o);
+  dout(10) << __func__ << " " << c->cid << " " << o->oid << " = " << r << dendl;
   return r;
 }
 
 int BlueStore::_setattr(TransContext *txc,
-		       CollectionRef& c,
-		       const ghobject_t& oid,
-		       const string& name,
-		       bufferptr& val)
+			CollectionRef& c,
+			OnodeRef& o,
+			const string& name,
+			bufferptr& val)
 {
-  dout(15) << __func__ << " " << c->cid << " " << oid
+  dout(15) << __func__ << " " << c->cid << " " << o->oid
 	   << " " << name << " (" << val.length() << " bytes)"
 	   << dendl;
   int r = 0;
-
-  RWLock::WLocker l(c->lock);
-  OnodeRef o = c->get_onode(oid, false);
-  if (!o || !o->exists) {
-    r = -ENOENT;
-    goto out;
-  }
   o->onode.attrs[name] = val;
   txc->write_onode(o);
-  r = 0;
-
- out:
-  dout(10) << __func__ << " " << c->cid << " " << oid
+  dout(10) << __func__ << " " << c->cid << " " << o->oid
 	   << " " << name << " (" << val.length() << " bytes)"
 	   << " = " << r << dendl;
   return r;
 }
 
 int BlueStore::_setattrs(TransContext *txc,
-			CollectionRef& c,
-			const ghobject_t& oid,
-			const map<string,bufferptr>& aset)
+			 CollectionRef& c,
+			 OnodeRef& o,
+			 const map<string,bufferptr>& aset)
 {
-  dout(15) << __func__ << " " << c->cid << " " << oid
+  dout(15) << __func__ << " " << c->cid << " " << o->oid
 	   << " " << aset.size() << " keys"
 	   << dendl;
   int r = 0;
-
-  RWLock::WLocker l(c->lock);
-  OnodeRef o = c->get_onode(oid, false);
-  if (!o || !o->exists) {
-    r = -ENOENT;
-    goto out;
-  }
   for (map<string,bufferptr>::const_iterator p = aset.begin();
        p != aset.end(); ++p) {
     if (p->second.is_partial())
@@ -5668,10 +5960,7 @@ int BlueStore::_setattrs(TransContext *txc,
       o->onode.attrs[p->first] = p->second;
   }
   txc->write_onode(o);
-  r = 0;
-
- out:
-  dout(10) << __func__ << " " << c->cid << " " << oid
+  dout(10) << __func__ << " " << c->cid << " " << o->oid
 	   << " " << aset.size() << " keys"
 	   << " = " << r << dendl;
   return r;
@@ -5679,49 +5968,29 @@ int BlueStore::_setattrs(TransContext *txc,
 
 
 int BlueStore::_rmattr(TransContext *txc,
-		      CollectionRef& c,
-		      const ghobject_t& oid,
-		      const string& name)
+		       CollectionRef& c,
+		       OnodeRef& o,
+		       const string& name)
 {
-  dout(15) << __func__ << " " << c->cid << " " << oid
+  dout(15) << __func__ << " " << c->cid << " " << o->oid
 	   << " " << name << dendl;
   int r = 0;
-
-  RWLock::WLocker l(c->lock);
-  OnodeRef o = c->get_onode(oid, false);
-  if (!o || !o->exists) {
-    r = -ENOENT;
-    goto out;
-  }
   o->onode.attrs.erase(name);
   txc->write_onode(o);
-  r = 0;
-
- out:
-  dout(10) << __func__ << " " << c->cid << " " << oid
+  dout(10) << __func__ << " " << c->cid << " " << o->oid
 	   << " " << name << " = " << r << dendl;
   return r;
 }
 
 int BlueStore::_rmattrs(TransContext *txc,
-		       CollectionRef& c,
-		       const ghobject_t& oid)
+			CollectionRef& c,
+			OnodeRef& o)
 {
-  dout(15) << __func__ << " " << c->cid << " " << oid << dendl;
+  dout(15) << __func__ << " " << c->cid << " " << o->oid << dendl;
   int r = 0;
-
-  RWLock::WLocker l(c->lock);
-  OnodeRef o = c->get_onode(oid, false);
-  if (!o || !o->exists) {
-    r = -ENOENT;
-    goto out;
-  }
   o->onode.attrs.clear();
   txc->write_onode(o);
-  r = 0;
-
- out:
-  dout(10) << __func__ << " " << c->cid << " " << oid << " = " << r << dendl;
+  dout(10) << __func__ << " " << c->cid << " " << o->oid << " = " << r << dendl;
   return r;
 }
 
@@ -5744,44 +6013,27 @@ void BlueStore::_do_omap_clear(TransContext *txc, uint64_t id)
 }
 
 int BlueStore::_omap_clear(TransContext *txc,
-			  CollectionRef& c,
-			  const ghobject_t& oid)
+			   CollectionRef& c,
+			   OnodeRef& o)
 {
-  dout(15) << __func__ << " " << c->cid << " " << oid << dendl;
+  dout(15) << __func__ << " " << c->cid << " " << o->oid << dendl;
   int r = 0;
-
-  RWLock::WLocker l(c->lock);
-  OnodeRef o = c->get_onode(oid, false);
-  if (!o || !o->exists) {
-    r = -ENOENT;
-    goto out;
-  }
   if (o->onode.omap_head != 0) {
     _do_omap_clear(txc, o->onode.omap_head);
   }
-  r = 0;
-
- out:
-  dout(10) << __func__ << " " << c->cid << " " << oid << " = " << r << dendl;
+  dout(10) << __func__ << " " << c->cid << " " << o->oid << " = " << r << dendl;
   return r;
 }
 
 int BlueStore::_omap_setkeys(TransContext *txc,
-			    CollectionRef& c,
-			    const ghobject_t& oid,
-			    bufferlist &bl)
+			     CollectionRef& c,
+			     OnodeRef& o,
+			     bufferlist &bl)
 {
-  dout(15) << __func__ << " " << c->cid << " " << oid << dendl;
-  int r = 0;
+  dout(15) << __func__ << " " << c->cid << " " << o->oid << dendl;
+  int r;
   bufferlist::iterator p = bl.begin();
   __u32 num;
-
-  RWLock::WLocker l(c->lock);
-  OnodeRef o = c->get_onode(oid, false);
-  if (!o || !o->exists) {
-    r = -ENOENT;
-    goto out;
-  }
   if (!o->onode.omap_head) {
     o->onode.omap_head = o->onode.nid;
     txc->write_onode(o);
@@ -5799,27 +6051,18 @@ int BlueStore::_omap_setkeys(TransContext *txc,
     txc->t->set(PREFIX_OMAP, final_key, value);
   }
   r = 0;
-
- out:
-  dout(10) << __func__ << " " << c->cid << " " << oid << " = " << r << dendl;
+  dout(10) << __func__ << " " << c->cid << " " << o->oid << " = " << r << dendl;
   return r;
 }
 
 int BlueStore::_omap_setheader(TransContext *txc,
-			      CollectionRef& c,
-			      const ghobject_t& oid,
-			      bufferlist& bl)
+			       CollectionRef& c,
+			       OnodeRef &o,
+			       bufferlist& bl)
 {
-  dout(15) << __func__ << " " << c->cid << " " << oid << dendl;
-  int r = 0;
-
-  RWLock::WLocker l(c->lock);
-  OnodeRef o = c->get_onode(oid, false);
+  dout(15) << __func__ << " " << c->cid << " " << o->oid << dendl;
+  int r;
   string key;
-  if (!o || !o->exists) {
-    r = -ENOENT;
-    goto out;
-  }
   if (!o->onode.omap_head) {
     o->onode.omap_head = o->onode.nid;
     txc->write_onode(o);
@@ -5827,28 +6070,20 @@ int BlueStore::_omap_setheader(TransContext *txc,
   get_omap_header(o->onode.omap_head, &key);
   txc->t->set(PREFIX_OMAP, key, bl);
   r = 0;
-
- out:
-  dout(10) << __func__ << " " << c->cid << " " << oid << " = " << r << dendl;
+  dout(10) << __func__ << " " << c->cid << " " << o->oid << " = " << r << dendl;
   return r;
 }
 
 int BlueStore::_omap_rmkeys(TransContext *txc,
-			   CollectionRef& c,
-			   const ghobject_t& oid,
-			   bufferlist& bl)
+			    CollectionRef& c,
+			    OnodeRef& o,
+			    bufferlist& bl)
 {
-  dout(15) << __func__ << " " << c->cid << " " << oid << dendl;
+  dout(15) << __func__ << " " << c->cid << " " << o->oid << dendl;
   int r = 0;
   bufferlist::iterator p = bl.begin();
   __u32 num;
 
-  RWLock::WLocker l(c->lock);
-  OnodeRef o = c->get_onode(oid, false);
-  if (!o || !o->exists) {
-    r = -ENOENT;
-    goto out;
-  }
   if (!o->onode.omap_head) {
     r = 0;
     goto out;
@@ -5866,28 +6101,20 @@ int BlueStore::_omap_rmkeys(TransContext *txc,
   r = 0;
 
  out:
-  dout(10) << __func__ << " " << c->cid << " " << oid << " = " << r << dendl;
+  dout(10) << __func__ << " " << c->cid << " " << o->oid << " = " << r << dendl;
   return r;
 }
 
 int BlueStore::_omap_rmkey_range(TransContext *txc,
-				CollectionRef& c,
-				const ghobject_t& oid,
-				const string& first, const string& last)
+				 CollectionRef& c,
+				 OnodeRef& o,
+				 const string& first, const string& last)
 {
-  dout(15) << __func__ << " " << c->cid << " " << oid << dendl;
-  int r = 0;
+  dout(15) << __func__ << " " << c->cid << " " << o->oid << dendl;
   KeyValueDB::Iterator it;
   string key_first, key_last;
-
-  RWLock::WLocker l(c->lock);
-  OnodeRef o = c->get_onode(oid, false);
-  if (!o || !o->exists) {
-    r = -ENOENT;
-    goto out;
-  }
+  int r = 0;
   if (!o->onode.omap_head) {
-    r = 0;
     goto out;
   }
   it = db->get_iterator(PREFIX_OMAP);
@@ -5907,34 +6134,25 @@ int BlueStore::_omap_rmkey_range(TransContext *txc,
   r = 0;
 
  out:
-  dout(10) << __func__ << " " << c->cid << " " << oid << " = " << r << dendl;
+  dout(10) << __func__ << " " << c->cid << " " << o->oid << " = " << r << dendl;
   return r;
 }
 
 int BlueStore::_setallochint(TransContext *txc,
-			    CollectionRef& c,
-			    const ghobject_t& oid,
-			    uint64_t expected_object_size,
-			    uint64_t expected_write_size)
+			     CollectionRef& c,
+			     OnodeRef& o,
+			     uint64_t expected_object_size,
+			     uint64_t expected_write_size)
 {
-  dout(15) << __func__ << " " << c->cid << " " << oid
+  dout(15) << __func__ << " " << c->cid << " " << o->oid
 	   << " object_size " << expected_object_size
 	   << " write_size " << expected_write_size
 	   << dendl;
   int r = 0;
-  RWLock::WLocker l(c->lock);
-  OnodeRef o = c->get_onode(oid, false);
-  if (!o || !o->exists) {
-    r = -ENOENT;
-    goto out;
-  }
-
   o->onode.expected_object_size = expected_object_size;
   o->onode.expected_write_size = expected_write_size;
   txc->write_onode(o);
-
- out:
-  dout(10) << __func__ << " " << c->cid << " " << oid
+  dout(10) << __func__ << " " << c->cid << " " << o->oid
 	   << " object_size " << expected_object_size
 	   << " write_size " << expected_write_size
 	   << " = " << r << dendl;
@@ -5942,29 +6160,20 @@ int BlueStore::_setallochint(TransContext *txc,
 }
 
 int BlueStore::_clone(TransContext *txc,
-		     CollectionRef& c,
-		     const ghobject_t& old_oid,
-		     const ghobject_t& new_oid)
+		      CollectionRef& c,
+		      OnodeRef& oldo,
+		      OnodeRef& newo)
 {
-  dout(15) << __func__ << " " << c->cid << " " << old_oid << " -> "
-	   << new_oid << dendl;
+  dout(15) << __func__ << " " << c->cid << " " << oldo->oid << " -> "
+	   << newo->oid << dendl;
   int r = 0;
-  if (old_oid.hobj.get_hash() != new_oid.hobj.get_hash()) {
-    derr << __func__ << " mismatched hash on " << old_oid << " and " << new_oid
-	 << dendl;
+  if (oldo->oid.hobj.get_hash() != newo->oid.hobj.get_hash()) {
+    derr << __func__ << " mismatched hash on " << oldo->oid
+	 << " and " << newo->oid << dendl;
     return -EINVAL;
   }
 
-  RWLock::WLocker l(c->lock);
   bufferlist bl;
-  OnodeRef newo;
-  OnodeRef oldo = c->get_onode(old_oid, false);
-  if (!oldo || !oldo->exists) {
-    r = -ENOENT;
-    goto out;
-  }
-  newo = c->get_onode(new_oid, true);
-  assert(newo);
   newo->exists = true;
   _assign_nid(txc, newo);
 
@@ -5991,6 +6200,7 @@ int BlueStore::_clone(TransContext *txc,
 	     << e->ref_map << dendl;
     newo->onode.block_map = oldo->onode.block_map;
     newo->onode.size = oldo->onode.size;
+    newo->enode = e;
     dout(20) << __func__ << " block_map " << newo->onode.block_map << dendl;
     txc->write_enode(e);
     if (marked)
@@ -6002,6 +6212,8 @@ int BlueStore::_clone(TransContext *txc,
       goto out;
 
     r = _do_write(txc, c, newo, 0, oldo->onode.size, bl, 0);
+    if (r < 0)
+      goto out;
   }
 
   // attrs
@@ -6039,81 +6251,67 @@ int BlueStore::_clone(TransContext *txc,
   }
 
   txc->write_onode(newo);
-
   r = 0;
 
  out:
-  dout(10) << __func__ << " " << c->cid << " " << old_oid << " -> "
-	   << new_oid << " = " << r << dendl;
+  dout(10) << __func__ << " " << c->cid << " " << oldo->oid << " -> "
+	   << newo->oid << " = " << r << dendl;
   return r;
 }
 
 int BlueStore::_clone_range(TransContext *txc,
-			   CollectionRef& c,
-			   const ghobject_t& old_oid,
-			   const ghobject_t& new_oid,
-			   uint64_t srcoff, uint64_t length, uint64_t dstoff)
+			    CollectionRef& c,
+			    OnodeRef& oldo,
+			    OnodeRef& newo,
+			    uint64_t srcoff, uint64_t length, uint64_t dstoff)
 {
-  dout(15) << __func__ << " " << c->cid << " " << old_oid << " -> "
-	   << new_oid << " from " << srcoff << "~" << length
+  dout(15) << __func__ << " " << c->cid << " " << oldo->oid << " -> "
+	   << newo->oid << " from " << srcoff << "~" << length
 	   << " to offset " << dstoff << dendl;
   int r = 0;
 
-  RWLock::WLocker l(c->lock);
   bufferlist bl;
-  OnodeRef newo;
-  OnodeRef oldo = c->get_onode(old_oid, false);
-  if (!oldo || !oldo->exists) {
-    r = -ENOENT;
-    goto out;
-  }
-  newo = c->get_onode(new_oid, true);
-  assert(newo);
   newo->exists = true;
+  _assign_nid(txc, newo);
 
   r = _do_read(oldo, srcoff, length, bl, 0);
   if (r < 0)
     goto out;
 
   r = _do_write(txc, c, newo, dstoff, bl.length(), bl, 0);
+  if (r < 0)
+    goto out;
 
   txc->write_onode(newo);
 
   r = 0;
 
  out:
-  dout(10) << __func__ << " " << c->cid << " " << old_oid << " -> "
-	   << new_oid << " from " << srcoff << "~" << length
+  dout(10) << __func__ << " " << c->cid << " " << oldo->oid << " -> "
+	   << newo->oid << " from " << srcoff << "~" << length
 	   << " to offset " << dstoff
 	   << " = " << r << dendl;
   return r;
 }
 
 int BlueStore::_rename(TransContext *txc,
-		      CollectionRef& c,
-		      const ghobject_t& old_oid,
-		      const ghobject_t& new_oid)
+		       CollectionRef& c,
+		       OnodeRef& oldo,
+		       OnodeRef& newo,
+		       const ghobject_t& new_oid)
 {
-  dout(15) << __func__ << " " << c->cid << " " << old_oid << " -> "
+  dout(15) << __func__ << " " << c->cid << " " << oldo->oid << " -> "
 	   << new_oid << dendl;
   int r;
-
-  RWLock::WLocker l(c->lock);
+  ghobject_t old_oid = oldo->oid;
   bufferlist bl;
   string old_key, new_key;
-  OnodeRef newo;
-  OnodeRef oldo = c->get_onode(old_oid, false);
-  if (!oldo || !oldo->exists) {
-    r = -ENOENT;
-    goto out;
-  }
-  newo = c->get_onode(new_oid, true);
-  assert(newo);
 
-  if (newo->exists) {
+  if (newo && newo->exists) {
+    // destination object already exists, remove it first
     r = _do_remove(txc, c, newo);
     if (r < 0)
-      return r;
+      goto out;
   }
 
   txc->t->rmkey(PREFIX_OBJ, oldo->key);
@@ -6159,7 +6357,7 @@ int BlueStore::_create_collection(
 }
 
 int BlueStore::_remove_collection(TransContext *txc, coll_t cid,
-				 CollectionRef *c)
+				  CollectionRef *c)
 {
   dout(15) << __func__ << " " << cid << dendl;
   int r;
@@ -6170,6 +6368,7 @@ int BlueStore::_remove_collection(TransContext *txc, coll_t cid,
       r = -ENOENT;
       goto out;
     }
+    assert((*c)->exists);
     pair<ghobject_t,OnodeRef> next;
     while ((*c)->onode_map.get_next(next.first, &next)) {
       if (next.second->exists) {
@@ -6179,6 +6378,7 @@ int BlueStore::_remove_collection(TransContext *txc, coll_t cid,
     }
     coll_map.erase(cid);
     txc->removed_collections.push_back(*c);
+    (*c)->exists = false;
     c->reset();
   }
   txc->t->rmkey(PREFIX_COLL, stringify(cid));
diff --git a/src/os/bluestore/BlueStore.h b/src/os/bluestore/BlueStore.h
index cfabfe6..2899c78 100644
--- a/src/os/bluestore/BlueStore.h
+++ b/src/os/bluestore/BlueStore.h
@@ -19,6 +19,10 @@
 
 #include <unistd.h>
 
+#include <atomic>
+#include <mutex>
+#include <condition_variable>
+
 #include <boost/intrusive/list.hpp>
 #include <boost/intrusive/unordered_set.hpp>
 #include <boost/functional/hash.hpp>
@@ -29,6 +33,7 @@
 #include "common/Finisher.h"
 #include "common/RWLock.h"
 #include "common/WorkQueue.h"
+#include "common/perf_counters.h"
 #include "os/ObjectStore.h"
 #include "os/fs/FS.h"
 #include "kv/KeyValueDB.h"
@@ -40,6 +45,24 @@ class Allocator;
 class FreelistManager;
 class BlueFS;
 
+enum {
+  l_bluestore_first = 732430,
+  l_bluestore_state_prepare_lat,
+  l_bluestore_state_aio_wait_lat,
+  l_bluestore_state_io_done_lat,
+  l_bluestore_state_kv_queued_lat,
+  l_bluestore_state_kv_committing_lat,
+  l_bluestore_state_kv_done_lat,
+  l_bluestore_state_wal_queued_lat,
+  l_bluestore_state_wal_applying_lat,
+  l_bluestore_state_wal_aio_wait_lat,
+  l_bluestore_state_wal_cleanup_lat,
+  l_bluestore_state_wal_done_lat,
+  l_bluestore_state_finishing_lat,
+  l_bluestore_state_done_lat,
+  l_bluestore_last
+};
+
 class BlueStore : public ObjectStore {
   // -----------------------------------------------------
   // types
@@ -51,15 +74,13 @@ public:
   struct EnodeSet;
 
   struct Enode : public boost::intrusive::unordered_set_base_hook<> {
-    atomic_t nref;        ///< reference count
+    std::atomic_int nref;        ///< reference count
     uint32_t hash;
     string key;           ///< key under PREFIX_OBJ where we are stored
     EnodeSet *enode_set;  ///< reference to the containing set
 
     bluestore_extent_ref_map_t ref_map;
 
-    boost::intrusive::unordered_set_member_hook<> map_item;
-
     Enode(uint32_t h, const string& k, EnodeSet *s)
       : nref(0),
 	hash(h),
@@ -67,7 +88,7 @@ public:
 	enode_set(s) {}
 
     void get() {
-      nref.inc();
+      ++nref;
     }
     void put();
 
@@ -93,7 +114,7 @@ public:
 
     boost::intrusive::unordered_set<Enode> uset;
 
-    EnodeSet(unsigned n)
+    explicit EnodeSet(unsigned n)
       : num_buckets(n),
 	buckets(n),
 	uset(bucket_traits(buckets.data(), num_buckets)) {
@@ -106,7 +127,7 @@ public:
 
   /// an in-memory object
   struct Onode {
-    atomic_t nref;  ///< reference count
+    std::atomic_int nref;  ///< reference count
 
     ghobject_t oid;
     string key;     ///< key under PREFIX_OBJ where we are stored
@@ -118,21 +139,27 @@ public:
     bool dirty;     // ???
     bool exists;
 
-    Mutex flush_lock;  ///< protect flush_txns
-    Cond flush_cond;   ///< wait here for unapplied txns
+    std::mutex flush_lock;  ///< protect flush_txns
+    std::condition_variable flush_cond;   ///< wait here for unapplied txns
     set<TransContext*> flush_txns;   ///< committing or wal txns
 
     uint64_t tail_offset;
     bufferlist tail_bl;
 
-    Onode(const ghobject_t& o, const string& k);
+    Onode(const ghobject_t& o, const string& k)
+      : nref(0),
+	oid(o),
+	key(k),
+	dirty(false),
+	exists(false) {
+    }
 
     void flush();
     void get() {
-      nref.inc();
+      ++nref;
     }
     void put() {
-      if (nref.dec() == 0)
+      if (--nref == 0)
 	delete this;
     }
 
@@ -151,11 +178,11 @@ public:
 	boost::intrusive::list_member_hook<>,
 	&Onode::lru_item> > lru_list_t;
 
-    Mutex lock;
+    std::mutex lock;
     ceph::unordered_map<ghobject_t,OnodeRef> onode_map;  ///< forward lookups
     lru_list_t lru;                                      ///< lru
 
-    OnodeHashLRU() : lock("BlueStore::OnodeHashLRU::lock") {}
+    OnodeHashLRU() {}
 
     void add(const ghobject_t& oid, OnodeRef o);
     void _touch(OnodeRef o);
@@ -166,12 +193,14 @@ public:
     int trim(int max=-1);
   };
 
-  struct Collection {
+  struct Collection : public CollectionImpl {
     BlueStore *store;
     coll_t cid;
     bluestore_cnode_t cnode;
     RWLock lock;
 
+    bool exists;
+
     // cache onodes on a per-collection basis to avoid lock
     // contention.
     OnodeHashLRU onode_map;
@@ -181,6 +210,10 @@ public:
     OnodeRef get_onode(const ghobject_t& oid, bool create);
     EnodeRef get_enode(uint32_t hash);
 
+    const coll_t &get_cid() override {
+      return cid;
+    }
+
     bool contains(const ghobject_t& oid) {
       if (cid.is_meta())
 	return oid.hobj.pool == -1;
@@ -194,7 +227,7 @@ public:
 
     Collection(BlueStore *ns, coll_t c);
   };
-  typedef ceph::shared_ptr<Collection> CollectionRef;
+  typedef boost::intrusive_ptr<Collection> CollectionRef;
 
   class OmapIteratorImpl : public ObjectMap::ObjectMapIteratorImpl {
     CollectionRef c;
@@ -256,6 +289,13 @@ public:
       return "???";
     }
 
+    void log_state_latency(PerfCounters *logger, int state) {
+      utime_t lat, now = ceph_clock_now(g_ceph_context);
+      lat = now - start;
+      logger->tinc(state, lat);
+      start = now;
+    }
+
     OpSequencerRef osr;
     boost::intrusive::list_member_hook<> sequencer_item;
 
@@ -280,7 +320,9 @@ public:
 
     CollectionRef first_collection;  ///< first referenced collection
 
-    TransContext(OpSequencer *o)
+    utime_t start;
+
+    explicit TransContext(OpSequencer *o)
       : state(STATE_PREPARE),
 	osr(o),
 	ops(0),
@@ -289,7 +331,8 @@ public:
 	onreadable(NULL),
 	onreadable_sync(NULL),
 	wal_txn(NULL),
-	ioc(this) {
+	ioc(this),
+	start(ceph_clock_now(g_ceph_context)) {
       //cout << "txc new " << this << std::endl;
     }
     ~TransContext() {
@@ -307,8 +350,8 @@ public:
 
   class OpSequencer : public Sequencer_impl {
   public:
-    Mutex qlock;
-    Cond qcond;
+    std::mutex qlock;
+    std::condition_variable qcond;
     typedef boost::intrusive::list<
       TransContext,
       boost::intrusive::member_hook<
@@ -329,31 +372,31 @@ public:
 
     Sequencer *parent;
 
-    Mutex wal_apply_lock;
+    std::mutex wal_apply_mutex;
+    std::unique_lock<std::mutex> wal_apply_lock;
 
     OpSequencer()
 	//set the qlock to to PTHREAD_MUTEX_RECURSIVE mode
-      : qlock("BlueStore::OpSequencer::qlock", true, false),
-	parent(NULL),
-	wal_apply_lock("BlueStore::OpSequencer::wal_apply_lock") {
+      : parent(NULL),
+	wal_apply_lock(wal_apply_mutex, std::defer_lock) {
     }
     ~OpSequencer() {
       assert(q.empty());
     }
 
     void queue_new(TransContext *txc) {
-      Mutex::Locker l(qlock);
+      std::lock_guard<std::mutex> l(qlock);
       q.push_back(*txc);
     }
 
     void flush() {
-      Mutex::Locker l(qlock);
+      std::unique_lock<std::mutex> l(qlock);
       while (!q.empty())
-	qcond.Wait(qlock);
+	qcond.wait(l);
     }
 
     bool flush_commit(Context *c) {
-      Mutex::Locker l(qlock);
+      std::lock_guard<std::mutex> l(qlock);
       if (q.empty()) {
 	return true;
       }
@@ -418,14 +461,13 @@ public:
 
       // preserve wal ordering for this sequencer by taking the lock
       // while still holding the queue lock
-      i->osr->wal_apply_lock.Lock();
+      i->osr->wal_apply_lock.lock();
       return i;
     }
-    void _process(TransContext *i, ThreadPool::TPHandle &handle) {
+    void _process(TransContext *i, ThreadPool::TPHandle &) override {
       store->_wal_apply(i);
-      i->osr->wal_apply_lock.Unlock();
+      i->osr->wal_apply_lock.unlock();
     }
-    using ThreadPool::WorkQueue<TransContext>::_process;
     void _clear() {
       assert(wal_queue.empty());
     }
@@ -442,7 +484,7 @@ public:
 
   struct KVSyncThread : public Thread {
     BlueStore *store;
-    KVSyncThread(BlueStore *s) : store(s) {}
+    explicit KVSyncThread(BlueStore *s) : store(s) {}
     void *entry() {
       store->_kv_sync_thread();
       return NULL;
@@ -468,7 +510,7 @@ private:
   RWLock coll_lock;    ///< rwlock to protect coll_map
   ceph::unordered_map<coll_t, CollectionRef> coll_map;
 
-  Mutex nid_lock;
+  std::mutex nid_lock;
   uint64_t nid_last;
   uint64_t nid_max;
 
@@ -477,7 +519,7 @@ private:
 
   interval_set<uint64_t> bluefs_extents;  ///< block extents owned by bluefs
 
-  Mutex wal_lock;
+  std::mutex wal_lock;
   atomic64_t wal_seq;
   ThreadPool wal_tp;
   WALWQ wal_wq;
@@ -485,16 +527,15 @@ private:
   Finisher finisher;
 
   KVSyncThread kv_sync_thread;
-  Mutex kv_lock;
-  Cond kv_cond, kv_sync_cond;
+  std::mutex kv_lock;
+  std::condition_variable kv_cond, kv_sync_cond;
   bool kv_stop;
   deque<TransContext*> kv_queue, kv_committing;
   deque<TransContext*> wal_cleanup_queue, wal_cleaning;
 
-  Logger *logger;
+  PerfCounters *logger;
 
-  Mutex reap_lock;
-  Cond reap_cond;
+  std::mutex reap_lock;
   list<CollectionRef> removed_collections;
 
 
@@ -520,7 +561,8 @@ private:
   int _open_collections(int *errors=0);
   void _close_collections();
 
-  int _setup_block_symlink_or_file(string name, string path, uint64_t size);
+  int _setup_block_symlink_or_file(string name, string path, uint64_t size,
+				   bool create);
 
   int _write_bdev_label(string path, bluestore_bdev_label_t label);
   static int _read_bdev_label(string path, bluestore_bdev_label_t *label);
@@ -534,7 +576,7 @@ private:
 				KeyValueDB::Transaction t);
   void _commit_bluefs_freespace(const vector<bluestore_extent_t>& extents);
 
-  CollectionRef _get_collection(coll_t cid);
+  CollectionRef _get_collection(const coll_t& cid);
   void _queue_reap_collection(CollectionRef& c);
   void _reap_collections();
 
@@ -543,8 +585,7 @@ private:
   void _dump_onode(OnodeRef o);
 
   TransContext *_txc_create(OpSequencer *osr);
-  void _txc_release(TransContext *txc, CollectionRef& c,
-		    EnodeRef& enode, uint32_t hash,
+  void _txc_release(TransContext *txc, CollectionRef& c, OnodeRef& onode,
 		    uint64_t offset, uint64_t length,
 		    bool shared);
   int _txc_add_transaction(TransContext *txc, Transaction *t);
@@ -565,9 +606,9 @@ private:
   void _kv_sync_thread();
   void _kv_stop() {
     {
-      Mutex::Locker l(kv_lock);
+      std::lock_guard<std::mutex> l(kv_lock);
       kv_stop = true;
-      kv_cond.Signal();
+      kv_cond.notify_all();
     }
     kv_sync_thread.join();
     kv_stop = false;
@@ -586,6 +627,10 @@ public:
   BlueStore(CephContext *cct, const string& path);
   ~BlueStore();
 
+  string get_type() {
+    return "bluestore";
+  }
+
   bool needs_journal() { return false; };
   bool wants_journal() { return false; };
   bool allows_journal() { return false; };
@@ -615,20 +660,34 @@ public:
 public:
   int statfs(struct statfs *buf);
 
-  bool exists(coll_t cid, const ghobject_t& oid);
+  bool exists(const coll_t& cid, const ghobject_t& oid);
+  bool exists(CollectionHandle &c, const ghobject_t& oid);
   int stat(
-    coll_t cid,
+    const coll_t& cid,
     const ghobject_t& oid,
     struct stat *st,
-    bool allow_eio = false); // struct stat?
+    bool allow_eio = false) override;
+  int stat(
+    CollectionHandle &c,
+    const ghobject_t& oid,
+    struct stat *st,
+    bool allow_eio = false) override;
+  int read(
+    const coll_t& cid,
+    const ghobject_t& oid,
+    uint64_t offset,
+    size_t len,
+    bufferlist& bl,
+    uint32_t op_flags = 0,
+    bool allow_eio = false) override;
   int read(
-    coll_t cid,
+    CollectionHandle &c,
     const ghobject_t& oid,
     uint64_t offset,
     size_t len,
     bufferlist& bl,
     uint32_t op_flags = 0,
-    bool allow_eio = false);
+    bool allow_eio = false) override;
   int _do_read(
     OnodeRef o,
     uint64_t offset,
@@ -636,60 +695,111 @@ public:
     bufferlist& bl,
     uint32_t op_flags = 0);
 
-  int fiemap(coll_t cid, const ghobject_t& oid, uint64_t offset, size_t len, bufferlist& bl);
-  int getattr(coll_t cid, const ghobject_t& oid, const char *name, bufferptr& value);
-  int getattrs(coll_t cid, const ghobject_t& oid, map<string,bufferptr>& aset);
+  int fiemap(const coll_t& cid, const ghobject_t& oid,
+	     uint64_t offset, size_t len, bufferlist& bl) override;
+  int fiemap(CollectionHandle &c, const ghobject_t& oid,
+	     uint64_t offset, size_t len, bufferlist& bl) override;
+
+  int getattr(const coll_t& cid, const ghobject_t& oid, const char *name,
+	      bufferptr& value) override;
+  int getattr(CollectionHandle &c, const ghobject_t& oid, const char *name,
+	      bufferptr& value) override;
+
+  int getattrs(const coll_t& cid, const ghobject_t& oid,
+	       map<string,bufferptr>& aset) override;
+  int getattrs(CollectionHandle &c, const ghobject_t& oid,
+	       map<string,bufferptr>& aset) override;
+
+  int list_collections(vector<coll_t>& ls) override;
 
-  int list_collections(vector<coll_t>& ls);
-  bool collection_exists(coll_t c);
-  bool collection_empty(coll_t c);
+  CollectionHandle open_collection(const coll_t &c) override;
 
-  int collection_list(coll_t cid, ghobject_t start, ghobject_t end,
+  bool collection_exists(const coll_t& c);
+  bool collection_empty(const coll_t& c);
+  int collection_bits(const coll_t& c);
+
+  int collection_list(const coll_t& cid, ghobject_t start, ghobject_t end,
+		      bool sort_bitwise, int max,
+		      vector<ghobject_t> *ls, ghobject_t *next) override;
+  int collection_list(CollectionHandle &c, ghobject_t start, ghobject_t end,
 		      bool sort_bitwise, int max,
-		      vector<ghobject_t> *ls, ghobject_t *next);
+		      vector<ghobject_t> *ls, ghobject_t *next) override;
 
   int omap_get(
-    coll_t cid,                ///< [in] Collection containing oid
+    const coll_t& cid,                ///< [in] Collection containing oid
     const ghobject_t &oid,   ///< [in] Object containing omap
     bufferlist *header,      ///< [out] omap header
     map<string, bufferlist> *out /// < [out] Key to value map
-    );
+    ) override;
+  int omap_get(
+    CollectionHandle &c,     ///< [in] Collection containing oid
+    const ghobject_t &oid,   ///< [in] Object containing omap
+    bufferlist *header,      ///< [out] omap header
+    map<string, bufferlist> *out /// < [out] Key to value map
+    ) override;
 
   /// Get omap header
   int omap_get_header(
-    coll_t cid,                ///< [in] Collection containing oid
+    const coll_t& cid,                ///< [in] Collection containing oid
     const ghobject_t &oid,   ///< [in] Object containing omap
     bufferlist *header,      ///< [out] omap header
     bool allow_eio = false ///< [in] don't assert on eio
-    );
+    ) override;
+  int omap_get_header(
+    CollectionHandle &c,                ///< [in] Collection containing oid
+    const ghobject_t &oid,   ///< [in] Object containing omap
+    bufferlist *header,      ///< [out] omap header
+    bool allow_eio = false ///< [in] don't assert on eio
+    ) override;
 
   /// Get keys defined on oid
   int omap_get_keys(
-    coll_t cid,              ///< [in] Collection containing oid
+    const coll_t& cid,              ///< [in] Collection containing oid
+    const ghobject_t &oid, ///< [in] Object containing omap
+    set<string> *keys      ///< [out] Keys defined on oid
+    ) override;
+  int omap_get_keys(
+    CollectionHandle &c,              ///< [in] Collection containing oid
     const ghobject_t &oid, ///< [in] Object containing omap
     set<string> *keys      ///< [out] Keys defined on oid
     );
 
   /// Get key values
   int omap_get_values(
-    coll_t cid,                    ///< [in] Collection containing oid
+    const coll_t& cid,                    ///< [in] Collection containing oid
     const ghobject_t &oid,       ///< [in] Object containing omap
     const set<string> &keys,     ///< [in] Keys to get
     map<string, bufferlist> *out ///< [out] Returned keys and values
-    );
+    ) override;
+  int omap_get_values(
+    CollectionHandle &c,         ///< [in] Collection containing oid
+    const ghobject_t &oid,       ///< [in] Object containing omap
+    const set<string> &keys,     ///< [in] Keys to get
+    map<string, bufferlist> *out ///< [out] Returned keys and values
+    ) override;
 
   /// Filters keys into out which are defined on oid
   int omap_check_keys(
-    coll_t cid,                ///< [in] Collection containing oid
+    const coll_t& cid,                ///< [in] Collection containing oid
     const ghobject_t &oid,   ///< [in] Object containing omap
     const set<string> &keys, ///< [in] Keys to check
     set<string> *out         ///< [out] Subset of keys defined on oid
-    );
+    ) override;
+  int omap_check_keys(
+    CollectionHandle &c,                ///< [in] Collection containing oid
+    const ghobject_t &oid,   ///< [in] Object containing omap
+    const set<string> &keys, ///< [in] Keys to check
+    set<string> *out         ///< [out] Subset of keys defined on oid
+    ) override;
 
   ObjectMap::ObjectMapIterator get_omap_iterator(
-    coll_t cid,              ///< [in] collection
+    const coll_t& cid,              ///< [in] collection
     const ghobject_t &oid  ///< [in] object
-    );
+    ) override;
+  ObjectMap::ObjectMapIterator get_omap_iterator(
+    CollectionHandle &c,   ///< [in] collection
+    const ghobject_t &oid  ///< [in] object
+    ) override;
 
   void set_fsid(uuid_d u) {
     fsid = u;
@@ -704,7 +814,7 @@ public:
 
   int queue_transactions(
     Sequencer *osr,
-    list<Transaction*>& tls,
+    vector<Transaction>& tls,
     TrackedOpRef op = TrackedOpRef(),
     ThreadPool::TPHandle *handle = NULL);
 
@@ -718,7 +828,7 @@ private:
 
   int _write(TransContext *txc,
 	     CollectionRef& c,
-	     const ghobject_t& oid,
+	     OnodeRef& o,
 	     uint64_t offset, size_t len,
 	     bufferlist& bl,
 	     uint32_t fadvise_flags);
@@ -759,14 +869,14 @@ private:
 		uint32_t fadvise_flags);
   int _touch(TransContext *txc,
 	     CollectionRef& c,
-	     const ghobject_t& oid);
+	     OnodeRef& o);
   int _do_write_zero(TransContext *txc,
 		     CollectionRef &c,
 		     OnodeRef o,
 		     uint64_t offset, uint64_t length);
   int _zero(TransContext *txc,
 	    CollectionRef& c,
-	    const ghobject_t& oid,
+	    OnodeRef& o,
 	    uint64_t offset, size_t len);
   int _do_truncate(TransContext *txc,
 		   CollectionRef& c,
@@ -774,67 +884,68 @@ private:
 		   uint64_t offset);
   int _truncate(TransContext *txc,
 		CollectionRef& c,
-		const ghobject_t& oid,
+		OnodeRef& o,
 		uint64_t offset);
   int _remove(TransContext *txc,
 	      CollectionRef& c,
-	      const ghobject_t& oid);
+	      OnodeRef& o);
   int _do_remove(TransContext *txc,
 		 CollectionRef& c,
 		 OnodeRef o);
   int _setattr(TransContext *txc,
 	       CollectionRef& c,
-	       const ghobject_t& oid,
+	       OnodeRef& o,
 	       const string& name,
 	       bufferptr& val);
   int _setattrs(TransContext *txc,
 		CollectionRef& c,
-		const ghobject_t& oid,
+		OnodeRef& o,
 		const map<string,bufferptr>& aset);
   int _rmattr(TransContext *txc,
 	      CollectionRef& c,
-	      const ghobject_t& oid,
+	      OnodeRef& o,
 	      const string& name);
   int _rmattrs(TransContext *txc,
 	       CollectionRef& c,
-	       const ghobject_t& oid);
+	       OnodeRef& o);
   void _do_omap_clear(TransContext *txc, uint64_t id);
   int _omap_clear(TransContext *txc,
 		  CollectionRef& c,
-		  const ghobject_t& oid);
+		  OnodeRef& o);
   int _omap_setkeys(TransContext *txc,
 		    CollectionRef& c,
-		    const ghobject_t& oid,
+		    OnodeRef& o,
 		    bufferlist& bl);
   int _omap_setheader(TransContext *txc,
 		      CollectionRef& c,
-		      const ghobject_t& oid,
+		      OnodeRef& o,
 		      bufferlist& header);
   int _omap_rmkeys(TransContext *txc,
 		   CollectionRef& c,
-		   const ghobject_t& oid,
+		   OnodeRef& o,
 		   bufferlist& bl);
   int _omap_rmkey_range(TransContext *txc,
 			CollectionRef& c,
-			const ghobject_t& oid,
+			OnodeRef& o,
 			const string& first, const string& last);
   int _setallochint(TransContext *txc,
 		    CollectionRef& c,
-		    const ghobject_t& oid,
+		    OnodeRef& o,
 		    uint64_t expected_object_size,
 		    uint64_t expected_write_size);
   int _clone(TransContext *txc,
 	     CollectionRef& c,
-	     const ghobject_t& old_oid,
-	     const ghobject_t& new_oid);
+	     OnodeRef& oldo,
+	     OnodeRef& newo);
   int _clone_range(TransContext *txc,
 		   CollectionRef& c,
-		   const ghobject_t& old_oid,
-		   const ghobject_t& new_oid,
+		   OnodeRef& oldo,
+		   OnodeRef& newo,
 		   uint64_t srcoff, uint64_t length, uint64_t dstoff);
   int _rename(TransContext *txc,
 	      CollectionRef& c,
-	      const ghobject_t& old_oid,
+	      OnodeRef& oldo,
+	      OnodeRef& newo,
 	      const ghobject_t& new_oid);
   int _create_collection(TransContext *txc, coll_t cid, unsigned bits,
 			 CollectionRef *c);
diff --git a/src/os/bluestore/FreelistManager.cc b/src/os/bluestore/FreelistManager.cc
index a9385ad..20480a7 100644
--- a/src/os/bluestore/FreelistManager.cc
+++ b/src/os/bluestore/FreelistManager.cc
@@ -144,9 +144,11 @@ int FreelistManager::allocate(
       txn->set(prefix, newkey, newvalue);
       dout(20) << __func__ << "  set " << newoff << "~" << newlen
 	       << " (remaining tail)" << dendl;
+      kv_free.erase(p);
       kv_free[newoff] = newlen;
+    } else {
+      kv_free.erase(p);
     }
-    kv_free.erase(p);
   } else {
     assert(p->first < offset);
     // shorten
@@ -170,9 +172,11 @@ int FreelistManager::allocate(
       dout(20) << __func__ << "  set " << tailoff << "~" << taillen
 	       << " (remaining tail from " << p->first << "~" << p->second << ")"
 	       << dendl;
+      p->second = newlen;
       kv_free[tailoff] = taillen;
+    } else {
+      p->second = newlen;
     }
-    p->second = newlen;
   }
   if (g_conf->bluestore_debug_freelist)
     _audit();
@@ -199,7 +203,11 @@ int FreelistManager::release(
 	       << " (merge with previous)" << dendl;
       length += p->second;
       offset = p->first;
-      kv_free.erase(p++);
+      if (map_t_has_stable_iterators) {
+	kv_free.erase(p++);
+      } else {
+	p = kv_free.erase(p);
+      }
     } else if (p->first + p->second > offset) {
       derr << __func__ << " bad release " << offset << "~" << length
 	   << " overlaps with " << p->first << "~" << p->second << dendl;
diff --git a/src/os/bluestore/FreelistManager.h b/src/os/bluestore/FreelistManager.h
index b0115e5..1694f53 100644
--- a/src/os/bluestore/FreelistManager.h
+++ b/src/os/bluestore/FreelistManager.h
@@ -10,12 +10,17 @@
 #include <ostream>
 #include "kv/KeyValueDB.h"
 
+#include "include/cpp-btree/btree_map.h"
+
 class FreelistManager {
   std::string prefix;
   std::mutex lock;
   uint64_t total_free;
 
-  std::map<uint64_t, uint64_t> kv_free;    ///< mirrors our kv values in the db
+  typedef btree::btree_map<uint64_t,uint64_t> map_t;
+  static const bool map_t_has_stable_iterators = false;
+
+  map_t kv_free;    ///< mirrors our kv values in the db
 
   void _audit();
   void _dump();
@@ -35,7 +40,7 @@ public:
     return total_free;
   }
 
-  const std::map<uint64_t,uint64_t>& get_freelist() {
+  const map_t& get_freelist() {
     return kv_free;
   }
 
diff --git a/src/os/bluestore/BlockDevice.cc b/src/os/bluestore/KernelDevice.cc
similarity index 85%
copy from src/os/bluestore/BlockDevice.cc
copy to src/os/bluestore/KernelDevice.cc
index 230ea45..b85b473 100644
--- a/src/os/bluestore/BlockDevice.cc
+++ b/src/os/bluestore/KernelDevice.cc
@@ -1,5 +1,16 @@
 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
 // vim: ts=8 sw=2 smarttab
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2014 Red Hat
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation.  See file COPYING.
+ *
+ */
 
 #include <unistd.h>
 #include <stdlib.h>
@@ -8,7 +19,7 @@
 #include <fcntl.h>
 #include <unistd.h>
 
-#include "BlockDevice.h"
+#include "KernelDevice.h"
 #include "include/types.h"
 #include "include/compat.h"
 #include "common/errno.h"
@@ -17,47 +28,27 @@
 
 #define dout_subsys ceph_subsys_bdev
 #undef dout_prefix
-#define dout_prefix *_dout << "bdev "
-
-
-void IOContext::aio_wait()
-{
-  Mutex::Locker l(lock);
-  // see _aio_thread for waker logic
-  num_waiting.inc();
-  while (num_running.read() > 0 || num_reading.read() > 0) {
-    dout(10) << __func__ << " " << this
-	     << " waiting for " << num_running.read() << " aios and/or "
-	     << num_reading.read() << " readers to complete" << dendl;
-    cond.Wait(lock);
-  }
-  num_waiting.dec();
-  dout(20) << __func__ << " " << this << " done" << dendl;
-}
-
-// ----------------
-#undef dout_prefix
 #define dout_prefix *_dout << "bdev(" << path << ") "
 
-BlockDevice::BlockDevice(aio_callback_t cb, void *cbpriv)
+KernelDevice::KernelDevice(aio_callback_t cb, void *cbpriv)
   : fd_direct(-1),
     fd_buffered(-1),
     size(0), block_size(0),
     fs(NULL), aio(false), dio(false),
-    debug_lock("BlockDevice::debug_lock"),
-    ioc_reap_lock("BlockDevice::ioc_reap_lock"),
-    flush_lock("BlockDevice::flush_lock"),
+    debug_lock("KernelDevice::debug_lock"),
+    flush_lock("KernelDevice::flush_lock"),
     aio_queue(g_conf->bdev_aio_max_queue_depth),
     aio_callback(cb),
     aio_callback_priv(cbpriv),
     aio_stop(false),
-    aio_thread(this)
+    aio_thread(this),
+    injecting_crash(0)
 {
   zeros = buffer::create_page_aligned(1048576);
   zeros.zero();
 }
 
-int BlockDevice::_lock()
+int KernelDevice::_lock()
 {
   struct flock l;
   memset(&l, 0, sizeof(l));
@@ -71,7 +62,7 @@ int BlockDevice::_lock()
   return 0;
 }
 
-int BlockDevice::open(string p)
+int KernelDevice::open(string p)
 {
   path = p;
   int r = 0;
@@ -153,7 +144,7 @@ int BlockDevice::open(string p)
   return r;
 }
 
-void BlockDevice::close()
+void KernelDevice::close()
 {
   dout(1) << __func__ << dendl;
   _aio_stop();
@@ -173,7 +164,7 @@ void BlockDevice::close()
   path.clear();
 }
 
-int BlockDevice::flush()
+int KernelDevice::flush()
 {
   // serialize flushers, so that we can avoid weird io_since_flush
   // races (w/ multipler flushers).
@@ -185,12 +176,14 @@ int BlockDevice::flush()
   dout(10) << __func__ << " start" << dendl;
   io_since_flush.set(0);
   if (g_conf->bdev_inject_crash) {
+    ++injecting_crash;
     // sleep for a moment to give other threads a chance to submit or
     // wait on io that races with a flush.
     derr << __func__ << " injecting crash. first we sleep..." << dendl;
-    sleep(3);
+    sleep(g_conf->bdev_inject_crash_flush_delay);
     derr << __func__ << " and now we die" << dendl;
-    assert(0 == "bdev_inject_crash");
+    g_ceph_context->_log->flush();
+    _exit(1);
   }
   utime_t start = ceph_clock_now(NULL);
   int r = ::fdatasync(fd_direct);
@@ -204,9 +197,9 @@ int BlockDevice::flush()
   return r;
 }
 
-int BlockDevice::_aio_start()
+int KernelDevice::_aio_start()
 {
-  if (g_conf->bdev_aio) {
+  if (aio) {
     dout(10) << __func__ << dendl;
     int r = aio_queue.init();
     if (r < 0) {
@@ -218,9 +211,9 @@ int BlockDevice::_aio_start()
   return 0;
 }
 
-void BlockDevice::_aio_stop()
+void KernelDevice::_aio_stop()
 {
-  if (g_conf->bdev_aio) {
+  if (aio) {
     dout(10) << __func__ << dendl;
     aio_stop = true;
     aio_thread.join();
@@ -229,9 +222,10 @@ void BlockDevice::_aio_stop()
   }
 }
 
-void BlockDevice::_aio_thread()
+void KernelDevice::_aio_thread()
 {
   dout(10) << __func__ << " start" << dendl;
+  int inject_crash_count = 0;
   while (!aio_stop) {
     dout(40) << __func__ << " polling" << dendl;
     int max = 16;
@@ -266,20 +260,22 @@ void BlockDevice::_aio_thread()
 	}
       }
     }
-    if (ioc_reap_count.read()) {
-      Mutex::Locker l(ioc_reap_lock);
-      for (auto p : ioc_reap_queue) {
-	dout(20) << __func__ << " reap ioc " << p << dendl;
-	delete p;
+    reap_ioc();
+    if (g_conf->bdev_inject_crash) {
+      ++inject_crash_count;
+      if (inject_crash_count * g_conf->bdev_aio_poll_ms / 1000 >
+	  g_conf->bdev_inject_crash + g_conf->bdev_inject_crash_flush_delay) {
+	derr << __func__ << " bdev_inject_crash trigger from aio thread"
+	     << dendl;
+	g_ceph_context->_log->flush();
+	_exit(1);
       }
-      ioc_reap_queue.clear();
-      ioc_reap_count.dec();
     }
   }
   dout(10) << __func__ << " end" << dendl;
 }
 
-void BlockDevice::_aio_log_start(
+void KernelDevice::_aio_log_start(
   IOContext *ioc,
   uint64_t offset,
   uint64_t length)
@@ -297,7 +293,7 @@ void BlockDevice::_aio_log_start(
   }
 }
 
-void BlockDevice::_aio_log_finish(
+void KernelDevice::_aio_log_finish(
   IOContext *ioc,
   uint64_t offset,
   uint64_t length)
@@ -309,7 +305,7 @@ void BlockDevice::_aio_log_finish(
   }
 }
 
-void BlockDevice::aio_submit(IOContext *ioc)
+void KernelDevice::aio_submit(IOContext *ioc)
 {
   dout(20) << __func__ << " ioc " << ioc
 	   << " pending " << ioc->num_pending.read()
@@ -357,7 +353,7 @@ void BlockDevice::aio_submit(IOContext *ioc)
   }
 }
 
-int BlockDevice::aio_write(
+int KernelDevice::aio_write(
   uint64_t off,
   bufferlist &bl,
   IOContext *ioc,
@@ -394,6 +390,7 @@ int BlockDevice::aio_write(
       // generate a real io so that aio_wait behaves properly, but make it
       // a read instead of write, and toss the result.
       aio.pread(off, len);
+      ++injecting_crash;
     } else {
       bl.prepare_iov(&aio.iov);
       for (unsigned i=0; i<aio.iov.size(); ++i) {
@@ -412,6 +409,7 @@ int BlockDevice::aio_write(
 	rand() % g_conf->bdev_inject_crash == 0) {
       derr << __func__ << " bdev_inject_crash: dropping io " << off << "~" << len
 	   << dendl;
+      ++injecting_crash;
       return 0;
     }
     vector<iovec> iov;
@@ -419,20 +417,27 @@ int BlockDevice::aio_write(
     int r = ::pwritev(buffered ? fd_buffered : fd_direct,
 		      &iov[0], iov.size(), off);
     if (r < 0) {
+      r = -errno;
       derr << __func__ << " pwritev error: " << cpp_strerror(r) << dendl;
       return r;
     }
     if (buffered) {
       // initiate IO (but do not wait)
-      ::sync_file_range(fd_buffered, off, len, SYNC_FILE_RANGE_WRITE);
+      r = ::sync_file_range(fd_buffered, off, len, SYNC_FILE_RANGE_WRITE);
+      if (r < 0) {
+        r = -errno;
+        derr << __func__ << " sync_file_range error: " << cpp_strerror(r) << dendl;
+        return r;
+      }
     }
   }
 
+  _aio_log_finish(ioc, off, bl.length());
   io_since_flush.set(1);
   return 0;
 }
 
-int BlockDevice::aio_zero(
+int KernelDevice::aio_zero(
   uint64_t off,
   uint64_t len,
   IOContext *ioc)
@@ -451,13 +456,12 @@ int BlockDevice::aio_zero(
     len -= t.length();
     bl.claim_append(t);
   }
-  bufferlist foo;
   // note: this works with aio only becaues the actual buffer is
   // this->zeros, which is page-aligned and never freed.
   return aio_write(off, bl, ioc, false);
 }
 
-int BlockDevice::read(uint64_t off, uint64_t len, bufferlist *pbl,
+int KernelDevice::read(uint64_t off, uint64_t len, bufferlist *pbl,
 		      IOContext *ioc,
 		      bool buffered)
 {
@@ -478,8 +482,9 @@ int BlockDevice::read(uint64_t off, uint64_t len, bufferlist *pbl,
     r = -errno;
     goto out;
   }
+  assert((uint64_t)r == len);
   pbl->clear();
-  pbl->push_back(p);
+  pbl->push_back(std::move(p));
 
   dout(40) << "data: ";
   pbl->hexdump(*_dout);
@@ -496,7 +501,7 @@ int BlockDevice::read(uint64_t off, uint64_t len, bufferlist *pbl,
   return r < 0 ? r : 0;
 }
 
-int BlockDevice::read_buffered(uint64_t off, uint64_t len, char *buf)
+int KernelDevice::read_buffered(uint64_t off, uint64_t len, char *buf)
 {
   dout(5) << __func__ << " " << off << "~" << len << dendl;
   assert(len > 0);
@@ -527,7 +532,7 @@ int BlockDevice::read_buffered(uint64_t off, uint64_t len, char *buf)
   return r < 0 ? r : 0;
 }
 
-int BlockDevice::invalidate_cache(uint64_t off, uint64_t len)
+int KernelDevice::invalidate_cache(uint64_t off, uint64_t len)
 {
   dout(5) << __func__ << " " << off << "~" << len << dendl;
   assert(off % block_size == 0);
@@ -541,10 +546,3 @@ int BlockDevice::invalidate_cache(uint64_t off, uint64_t len)
   return r;
 }
 
-void BlockDevice::queue_reap_ioc(IOContext *ioc)
-{
-  Mutex::Locker l(ioc_reap_lock);
-  if (ioc_reap_count.read() == 0)
-    ioc_reap_count.inc();
-  ioc_reap_queue.push_back(ioc);
-}
diff --git a/src/os/bluestore/KernelDevice.h b/src/os/bluestore/KernelDevice.h
new file mode 100644
index 0000000..4ced202
--- /dev/null
+++ b/src/os/bluestore/KernelDevice.h
@@ -0,0 +1,95 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2014 Red Hat
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation.  See file COPYING.
+ *
+ */
+
+#ifndef CEPH_OS_BLUESTORE_KERNELDEVICE_H
+#define CEPH_OS_BLUESTORE_KERNELDEVICE_H
+
+#include <atomic>
+
+#include "os/fs/FS.h"
+#include "include/interval_set.h"
+
+#include "BlockDevice.h"
+
+class KernelDevice : public BlockDevice {
+  int fd_direct, fd_buffered;
+  uint64_t size;
+  uint64_t block_size;
+  string path;
+  FS *fs;
+  bool aio, dio;
+  bufferptr zeros;
+
+  Mutex debug_lock;
+  interval_set<uint64_t> debug_inflight;
+
+  Mutex flush_lock;
+  atomic_t io_since_flush;
+
+  FS::aio_queue_t aio_queue;
+  aio_callback_t aio_callback;
+  void *aio_callback_priv;
+  bool aio_stop;
+
+  struct AioCompletionThread : public Thread {
+    KernelDevice *bdev;
+    explicit AioCompletionThread(KernelDevice *b) : bdev(b) {}
+    void *entry() {
+      bdev->_aio_thread();
+      return NULL;
+    }
+  } aio_thread;
+
+  std::atomic_int injecting_crash;
+
+  void _aio_thread();
+  int _aio_start();
+  void _aio_stop();
+
+  void _aio_log_start(IOContext *ioc, uint64_t offset, uint64_t length);
+  void _aio_log_finish(IOContext *ioc, uint64_t offset, uint64_t length);
+
+  int _lock();
+
+public:
+  KernelDevice(aio_callback_t cb, void *cbpriv);
+
+  void aio_submit(IOContext *ioc) override;
+
+  uint64_t get_size() const override {
+    return size;
+  }
+  uint64_t get_block_size() const override {
+    return block_size;
+  }
+
+  int read(uint64_t off, uint64_t len, bufferlist *pbl,
+	   IOContext *ioc,
+	   bool buffered) override;
+  int read_buffered(uint64_t off, uint64_t len, char *buf) override;
+
+  int aio_write(uint64_t off, bufferlist& bl,
+		IOContext *ioc,
+		bool buffered) override;
+  int aio_zero(uint64_t off, uint64_t len,
+	       IOContext *ioc) override;
+  int flush() override;
+
+  // for managing buffered readers/writers
+  int invalidate_cache(uint64_t off, uint64_t len) override;
+  int open(string path) override;
+  void close() override;
+};
+
+#endif
diff --git a/src/os/bluestore/NVMEDevice.cc b/src/os/bluestore/NVMEDevice.cc
new file mode 100644
index 0000000..b570c87
--- /dev/null
+++ b/src/os/bluestore/NVMEDevice.cc
@@ -0,0 +1,914 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+/*
+ * Ceph - scalable distributed file system
+  *
+ * Copyright (C) 2015 XSky <haomai at xsky.com>
+ *
+ * Author: Haomai Wang <haomaiwang at gmail.com>
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation.  See file COPYING.
+ *
+ */
+
+#include <unistd.h>
+#include <stdlib.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <map>
+#ifdef HAVE_SSE
+#include <xmmintrin.h>
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <spdk/pci.h>
+#include <spdk/nvme.h>
+
+#ifdef __cplusplus
+}
+#endif
+
+#include <rte_config.h>
+#include <rte_cycles.h>
+#include <rte_mempool.h>
+#include <rte_malloc.h>
+#include <rte_lcore.h>
+
+#include "include/stringify.h"
+#include "include/types.h"
+#include "include/compat.h"
+#include "common/align.h"
+#include "common/errno.h"
+#include "common/debug.h"
+#include "common/Initialize.h"
+#include "common/perf_counters.h"
+
+#include "NVMEDevice.h"
+
+#define dout_subsys ceph_subsys_bdev
+#undef dout_prefix
+#define dout_prefix *_dout << "bdev(" << sn << ") "
+
+rte_mempool *request_mempool = nullptr;
+rte_mempool *task_pool = nullptr;
+
+enum {
+  l_bluestore_nvmedevice_first = 632430,
+  l_bluestore_nvmedevice_aio_write_lat,
+  l_bluestore_nvmedevice_read_lat,
+  l_bluestore_nvmedevice_flush_lat,
+  l_bluestore_nvmedevice_aio_write_queue_lat,
+  l_bluestore_nvmedevice_read_queue_lat,
+  l_bluestore_nvmedevice_flush_queue_lat,
+  l_bluestore_nvmedevice_queue_ops,
+  l_bluestore_nvmedevice_polling_lat,
+  l_bluestore_nvmedevice_last
+};
+
+static void io_complete(void *t, const struct nvme_completion *completion);
+
+static const char *ealargs[] = {
+    "ceph-osd",
+    "-c 0x1", /* This must be the second parameter. It is overwritten by index in main(). */
+    "-n 4",
+};
+
+
+class SharedDriverData {
+  std::string sn;
+  std::string name;
+  nvme_controller *ctrlr;
+  nvme_namespace *ns;
+
+  uint64_t block_size = 0;
+  uint64_t size = 0;
+  std::vector<NVMEDevice*> registered_devices;
+  struct AioCompletionThread : public Thread {
+    SharedDriverData *data;
+    AioCompletionThread(SharedDriverData *d) : data(d) {}
+    void *entry() {
+      data->_aio_thread();
+      return NULL;
+    }
+  } aio_thread;
+  friend class AioCompletionThread;
+
+  bool aio_stop = false;
+  int ref = 1;
+  void _aio_thread();
+  void _aio_start() {
+    aio_thread.create("nvme_aio_thread");
+  }
+  void _aio_stop() {
+    assert(aio_thread.is_started());
+    {
+      Mutex::Locker l(queue_lock);
+      aio_stop = true;
+      queue_cond.Signal();
+    }
+    aio_thread.join();
+    aio_stop = false;
+  }
+  atomic_t queue_empty;
+  Mutex queue_lock;
+  Cond queue_cond;
+  std::queue<Task*> task_queue;
+
+  Mutex flush_lock;
+  Cond flush_cond;
+  atomic_t flush_waiters;
+
+ public:
+  atomic_t inflight_ops;
+  PerfCounters *logger = nullptr;
+
+  SharedDriverData(const std::string &sn_tag, const std::string &n, nvme_controller *c, nvme_namespace *ns)
+      : sn(sn_tag),
+        name(n),
+        ctrlr(c),
+        ns(ns),
+        aio_thread(this),
+        queue_empty(1),
+        queue_lock("NVMEDevice::queue_lock"),
+        flush_lock("NVMEDevice::flush_lock"),
+        flush_waiters(0),
+        inflight_ops(0) {
+    block_size = nvme_ns_get_sector_size(ns);
+    size = block_size * nvme_ns_get_num_sectors(ns);
+
+    PerfCountersBuilder b(g_ceph_context, string("NVMEDevice-AIOThread-"+stringify(this)),
+                          l_bluestore_nvmedevice_first, l_bluestore_nvmedevice_last);
+    b.add_time_avg(l_bluestore_nvmedevice_aio_write_lat, "aio_write_lat", "Average write completing latency");
+    b.add_time_avg(l_bluestore_nvmedevice_read_lat, "read_lat", "Average read completing latency");
+    b.add_time_avg(l_bluestore_nvmedevice_flush_lat, "flush_lat", "Average flush completing latency");
+    b.add_u64(l_bluestore_nvmedevice_queue_ops, "queue_ops", "Operations in nvme queue");
+    b.add_time_avg(l_bluestore_nvmedevice_polling_lat, "polling_lat", "Average polling latency");
+    b.add_time_avg(l_bluestore_nvmedevice_aio_write_queue_lat, "aio_write_queue_lat", "Average queue write request latency");
+    b.add_time_avg(l_bluestore_nvmedevice_read_queue_lat, "read_queue_lat", "Average queue read request latency");
+    b.add_time_avg(l_bluestore_nvmedevice_flush_queue_lat, "flush_queue_lat", "Average queue flush request latency");
+    logger = b.create_perf_counters();
+    g_ceph_context->get_perfcounters_collection()->add(logger);
+    _aio_start();
+  }
+  ~SharedDriverData() {
+    g_ceph_context->get_perfcounters_collection()->remove(logger);
+    delete logger;
+  }
+
+  bool is_equal(const string &tag) const { return sn == tag; }
+  void register_device(NVMEDevice *device) {
+    // in case of registered_devices, we stop thread now.
+    // Because release is really a rare case, we could bear this
+    _aio_stop();
+    registered_devices.push_back(device);
+    _aio_start();
+  }
+  void remove_device(NVMEDevice *device) {
+    _aio_stop();
+    std::vector<NVMEDevice*> new_devices;
+    for (auto &&it : registered_devices) {
+      if (it == device)
+        new_devices.push_back(it);
+    }
+    registered_devices.swap(new_devices);
+    _aio_start();
+  }
+
+  uint64_t get_block_size() {
+    return block_size;
+  }
+  uint64_t get_size() {
+    return size;
+  }
+  void queue_task(Task *t, uint64_t ops = 1) {
+    inflight_ops.add(ops);
+    Mutex::Locker l(queue_lock);
+    task_queue.push(t);
+    if (queue_empty.read()) {
+      queue_empty.dec();
+      queue_cond.Signal();
+    }
+  }
+
+  void flush_wait() {
+    if (inflight_ops.read()) {
+      // TODO: this may contains read op
+      dout(1) << __func__ << " existed inflight ops " << inflight_ops.read() << dendl;
+      Mutex::Locker l(flush_lock);
+      flush_waiters.inc();
+      while (inflight_ops.read()) {
+        flush_cond.Wait(flush_lock);
+      }
+      flush_waiters.dec();
+    }
+  }
+};
+
+void SharedDriverData::_aio_thread()
+{
+  dout(1) << __func__ << " start" << dendl;
+  if (nvme_register_io_thread() != 0) {
+    assert(0);
+  }
+
+  Task *t;
+  int r = 0;
+  const int max = 4;
+  uint64_t lba_off, lba_count;
+  utime_t lat, start = ceph_clock_now(g_ceph_context);
+  while (true) {
+    dout(40) << __func__ << " polling" << dendl;
+    t = nullptr;
+    if (!queue_empty.read()) {
+      Mutex::Locker l(queue_lock);
+      if (!task_queue.empty()) {
+        t = task_queue.front();
+        task_queue.pop();
+        logger->set(l_bluestore_nvmedevice_queue_ops, task_queue.size());
+      }
+      if (!t)
+        queue_empty.inc();
+    } else if (!inflight_ops.read()) {
+      if (flush_waiters.read()) {
+        Mutex::Locker l(flush_lock);
+        flush_cond.Signal();
+      }
+
+      for (auto &&it : registered_devices)
+        it->reap_ioc();
+
+      Mutex::Locker l(queue_lock);
+      if (queue_empty.read()) {
+        lat = ceph_clock_now(g_ceph_context);
+        lat -= start;
+        logger->tinc(l_bluestore_nvmedevice_polling_lat, lat);
+        if (aio_stop)
+          break;
+        dout(20) << __func__ << " enter sleep" << dendl;
+        queue_cond.Wait(queue_lock);
+        dout(20) << __func__ << " exit sleep" << dendl;
+        start = ceph_clock_now(g_ceph_context);
+      }
+    }
+
+    if (t) {
+      switch (t->command) {
+        case IOCommand::WRITE_COMMAND:
+        {
+          while (t) {
+            lba_off = t->offset / block_size;
+            lba_count = t->len / block_size;
+            dout(20) << __func__ << " write command issued " << lba_off << "~" << lba_count << dendl;
+            r = nvme_ns_cmd_write(ns, t->buf, lba_off, lba_count, io_complete, t, 0);
+            if (r < 0) {
+              t->ctx->nvme_task_first = t->ctx->nvme_task_last = nullptr;
+              rte_free(t->buf);
+              rte_mempool_put(task_pool, t);
+              derr << __func__ << " failed to do write command" << dendl;
+              assert(0);
+            }
+            lat = ceph_clock_now(g_ceph_context);
+            lat -= t->start;
+            logger->tinc(l_bluestore_nvmedevice_aio_write_queue_lat, lat);
+            t = t->next;
+          }
+          break;
+        }
+        case IOCommand::READ_COMMAND:
+        {
+          dout(20) << __func__ << " read command issueed " << lba_off << "~" << lba_count << dendl;
+          lba_off = t->offset / block_size;
+          lba_count = t->len / block_size;
+          r = nvme_ns_cmd_read(ns, t->buf, lba_off, lba_count, io_complete, t, 0);
+          if (r < 0) {
+            derr << __func__ << " failed to read" << dendl;
+            t->ctx->num_reading.dec();
+            t->return_code = r;
+            Mutex::Locker l(t->ctx->lock);
+            t->ctx->cond.Signal();
+          } else {
+            lat = ceph_clock_now(g_ceph_context);
+            lat -= t->start;
+            logger->tinc(l_bluestore_nvmedevice_read_queue_lat, lat);
+          }
+          break;
+        }
+        case IOCommand::FLUSH_COMMAND:
+        {
+          dout(20) << __func__ << " flush command issueed " << dendl;
+          r = nvme_ns_cmd_flush(ns, io_complete, t);
+          if (r < 0) {
+            derr << __func__ << " failed to flush" << dendl;
+            t->return_code = r;
+            Mutex::Locker l(t->ctx->lock);
+            t->ctx->cond.Signal();
+          } else {
+            lat = ceph_clock_now(g_ceph_context);
+            lat -= t->start;
+            logger->tinc(l_bluestore_nvmedevice_flush_queue_lat, lat);
+          }
+          break;
+        }
+      }
+    } else if (inflight_ops.read()) {
+      nvme_ctrlr_process_io_completions(ctrlr, max);
+      dout(30) << __func__ << " idle, have a pause" << dendl;
+#ifdef HAVE_SSE
+      _mm_pause();
+#else
+      usleep(10);
+#endif
+    }
+  }
+  nvme_unregister_io_thread();
+  dout(1) << __func__ << " end" << dendl;
+}
+
+class NVMEManager {
+  Mutex lock;
+  bool init = false;
+  std::vector<SharedDriverData*> shared_driver_datas;
+
+  static int _scan_nvme_device(const string &sn_tag, string &name, nvme_controller **c, nvme_namespace **ns);
+
+ public:
+  NVMEManager()
+      : lock("NVMEDevice::NVMEManager::lock") {}
+  int try_get(const string &sn_tag, SharedDriverData **driver);
+};
+
+static NVMEManager manager;
+
+#define dout_subsys ceph_subsys_bdev
+#undef dout_prefix
+#define dout_prefix *_dout << "bdev "
+
+int NVMEManager::_scan_nvme_device(const string &sn_tag, string &name, nvme_controller **c, nvme_namespace **ns)
+{
+  int r = 0;
+  dout(1) << __func__ << " serial number " << sn_tag << dendl;
+
+  pci_device *pci_dev;
+
+  // Search for matching devices
+  pci_id_match match;
+  match.vendor_id = PCI_MATCH_ANY;
+  match.subvendor_id = PCI_MATCH_ANY;
+  match.subdevice_id = PCI_MATCH_ANY;
+  match.device_id = PCI_MATCH_ANY;
+  match.device_class = NVME_CLASS_CODE;
+  match.device_class_mask = 0xFFFFFF;
+
+  pci_device_iterator *iter = pci_id_match_iterator_create(&match);
+
+  char serial_number[128];
+  while ((pci_dev = pci_device_next(iter)) != NULL) {
+    dout(0) << __func__ << " found device at name: " << pci_device_get_device_name(pci_dev)
+            << " bus: " << pci_dev->bus << ":" << pci_dev->dev << ":"
+            << pci_dev->func << " vendor:0x" << pci_dev->vendor_id << " device:0x" << pci_dev->device_id
+            << dendl;
+    r = pci_device_get_serial_number(pci_dev, serial_number, 128);
+    if (r < 0) {
+      dout(10) << __func__ << " failed to get serial number from " << pci_device_get_device_name(pci_dev) << dendl;
+      continue;
+    }
+
+    if (sn_tag.compare(string(serial_number, 16))) {
+      dout(0) << __func__ << " device serial number not match " << serial_number << dendl;
+      continue;
+    }
+    break;
+  }
+  if (pci_dev == NULL) {
+    derr << __func__ << " failed to found nvme serial number " << sn_tag << dendl;
+    return -ENOENT;
+  }
+
+  pci_device_probe(pci_dev);
+  name = pci_device_get_device_name(pci_dev) ? pci_device_get_device_name(pci_dev) : "Unknown";
+  if (pci_device_has_kernel_driver(pci_dev)) {
+    if (pci_device_has_non_uio_driver(pci_dev)) {
+      /*NVMe kernel driver case*/
+      if (g_conf->bdev_nvme_unbind_from_kernel) {
+        r =  pci_device_switch_to_uio_driver(pci_dev);
+        if (r < 0) {
+          derr << __func__ << " device " << name << " " << pci_dev->bus
+               << ":" << pci_dev->dev << ":" << pci_dev->func
+               << " switch to uio driver failed" << dendl;
+          return r;
+        }
+      } else {
+        derr << __func__ << " device has kernel nvme driver attached, exiting..." << dendl;
+        r = -EBUSY;
+        return r;
+      }
+    }
+  } else {
+    r =  pci_device_bind_uio_driver(pci_dev, const_cast<char*>(PCI_UIO_DRIVER));
+    if (r < 0) {
+      derr << __func__ << " device " << name << " " << pci_dev->bus
+           << ":" << pci_dev->dev << ":" << pci_dev->func
+           << " bind to uio driver failed, may lack of uio_pci_generic kernel module" << dendl;
+      return r;
+    }
+  }
+
+  /* Claim the device in case conflict with other ids process */
+  r =  pci_device_claim(pci_dev);
+  if (r < 0) {
+    derr << __func__ << " device " << name << " " << pci_dev->bus
+         << ":" << pci_dev->dev << ":" << pci_dev->func
+         << " claim failed" << dendl;
+    return r;
+  }
+
+  *c = nvme_attach(pci_dev);
+  if (!*c) {
+    derr << __func__ << " device attach nvme failed" << dendl;
+    r = -1;
+    return r;
+  }
+
+  pci_iterator_destroy(iter);
+
+  int num_ns = nvme_ctrlr_get_num_ns(*c);
+  assert(num_ns >= 1);
+  if (num_ns > 1) {
+    dout(0) << __func__ << " namespace count larger than 1, currently only use the first namespace" << dendl;
+  }
+  *ns = nvme_ctrlr_get_ns(*c, 1);
+  if (!*ns) {
+    derr << __func__ << " failed to get namespace at 1" << dendl;
+    return -1;
+  }
+  dout(1) << __func__ << " successfully attach nvme device at" << name
+          << " " << pci_dev->bus << ":" << pci_dev->dev << ":" << pci_dev->func << dendl;
+
+  return 0;
+}
+
+int NVMEManager::try_get(const string &sn_tag, SharedDriverData **driver)
+{
+  Mutex::Locker l(lock);
+  int r = 0;
+  if (!init) {
+    r = rte_eal_init(sizeof(ealargs) / sizeof(ealargs[0]), (char **)(void *)(uintptr_t)ealargs);
+    if (r < 0) {
+      derr << __func__ << " failed to do rte_eal_init" << dendl;
+      return r;
+    }
+
+    request_mempool = rte_mempool_create("nvme_request", 512,
+                                         nvme_request_size(), 128, 0,
+                                         NULL, NULL, NULL, NULL,
+                                         SOCKET_ID_ANY, 0);
+    if (request_mempool == NULL) {
+      derr << __func__ << " failed to create memory pool for nvme requests" << dendl;
+      return -ENOMEM;
+    }
+
+    task_pool = rte_mempool_create(
+        "task_pool", 512, sizeof(Task),
+        64, 0, NULL, NULL, NULL, NULL,
+        SOCKET_ID_ANY, 0);
+    if (task_pool == NULL) {
+      derr << __func__ << " failed to create memory pool for nvme requests" << dendl;
+      return -ENOMEM;
+    }
+
+    pci_system_init();
+    nvme_retry_count = g_conf->bdev_nvme_retry_count;
+    if (nvme_retry_count < 0)
+      nvme_retry_count = NVME_DEFAULT_RETRY_COUNT;
+
+    init = true;
+  }
+
+  if (sn_tag.empty()) {
+    r = -ENOENT;
+    derr << __func__ << " empty serial number: " << cpp_strerror(r) << dendl;
+    return r;
+  }
+
+  for (auto &&it : shared_driver_datas) {
+    if (it->is_equal(sn_tag)) {
+      *driver = it;
+      return 0;
+    }
+  }
+
+  nvme_controller *c;
+  nvme_namespace *ns;
+  std::string name;
+  if (_scan_nvme_device(sn_tag, name, &c, &ns) < 0)
+    return -1;
+
+  shared_driver_datas.push_back(new SharedDriverData(sn_tag, name, c, ns));
+  *driver = shared_driver_datas.back();
+
+  return 0;
+}
+
+void io_complete(void *t, const struct nvme_completion *completion)
+{
+  Task *task = static_cast<Task*>(t);
+  IOContext *ctx = task->ctx;
+  SharedDriverData *driver = task->device->get_driver();
+  driver->inflight_ops.dec();
+  utime_t lat = ceph_clock_now(g_ceph_context);
+  lat -= task->start;
+  if (task->command == IOCommand::WRITE_COMMAND) {
+    driver->logger->tinc(l_bluestore_nvmedevice_aio_write_lat, lat);
+    assert(!nvme_completion_is_error(completion));
+    dout(20) << __func__ << " write op successfully, left " << left << dendl;
+    // buffer write won't have ctx, and we will free request later, see `flush`
+    if (ctx) {
+      // check waiting count before doing callback (which may
+      // destroy this ioc).
+      if (!ctx->num_running.dec()) {
+        if (ctx->num_waiting.read()) {
+          Mutex::Locker l(ctx->lock);
+          ctx->cond.Signal();
+        }
+        if (task->device->aio_callback && ctx->priv) {
+          task->device->aio_callback(task->device->aio_callback_priv, ctx->priv);
+        }
+      }
+      rte_free(task->buf);
+      rte_mempool_put(task_pool, task);
+    } else {
+      task->device->queue_buffer_task(task);
+    }
+  } else if (task->command == IOCommand::READ_COMMAND) {
+    driver->logger->tinc(l_bluestore_nvmedevice_read_lat, lat);
+    ctx->num_reading.dec();
+    dout(20) << __func__ << " read op successfully" << dendl;
+    if (nvme_completion_is_error(completion))
+      task->return_code = -1; // FIXME
+    else
+      task->return_code = 0;
+    {
+      Mutex::Locker l(ctx->lock);
+      ctx->cond.Signal();
+    }
+  } else {
+    assert(task->command == IOCommand::FLUSH_COMMAND);
+    driver->logger->tinc(l_bluestore_nvmedevice_flush_lat, lat);
+    dout(20) << __func__ << " flush op successfully" << dendl;
+    if (nvme_completion_is_error(completion))
+      task->return_code = -1; // FIXME
+    else
+      task->return_code = 0;
+    {
+      Mutex::Locker l(ctx->lock);
+      ctx->cond.Signal();
+    }
+  }
+}
+
+// ----------------
+#undef dout_prefix
+#define dout_prefix *_dout << "bdev(" << name << ") "
+
+NVMEDevice::NVMEDevice(aio_callback_t cb, void *cbpriv)
+    : buffer_lock("NVMEDevice::buffer_lock"),
+      aio_callback(cb),
+      aio_callback_priv(cbpriv)
+{
+  zeros = buffer::create_page_aligned(1048576);
+  zeros.zero();
+}
+
+
+int NVMEDevice::open(string p)
+{
+  int r = 0;
+  dout(1) << __func__ << " path " << p << dendl;
+
+  string serial_number;
+  int fd = ::open(p.c_str(), O_RDONLY);
+  if (fd < 0) {
+    r = -errno;
+    derr << __func__ << " unable to open " << p << ": " << cpp_strerror(r)
+	 << dendl;
+    return r;
+  }
+  char buf[100];
+  r = ::read(fd, buf, sizeof(buf));
+  if (r <= 0) {
+    r = -errno;
+    derr << __func__ << " unable to read " << p << ": " << cpp_strerror(r) << dendl;
+    return r;
+  }
+  serial_number = string(buf, r);
+  r = manager.try_get(serial_number, &driver);
+  if (r < 0) {
+    derr << __func__ << " failed to get nvme deivce with sn " << serial_number << dendl;
+    return r;
+  }
+
+  driver->register_device(this);
+  block_size = driver->get_block_size();
+  size = driver->get_size();
+
+  dout(1) << __func__ << " size " << size << " (" << pretty_si_t(size) << "B)"
+          << " block_size " << block_size << " (" << pretty_si_t(block_size)
+          << "B)" << dendl;
+
+  return 0;
+}
+
+void NVMEDevice::close()
+{
+  dout(1) << __func__ << dendl;
+
+  name.clear();
+  driver->remove_device(this);
+
+  dout(1) << __func__ << " end" << dendl;
+}
+
+int NVMEDevice::flush()
+{
+  dout(10) << __func__ << " start" << dendl;
+  utime_t start = ceph_clock_now(g_ceph_context);
+  driver->flush_wait();
+  Task *t = nullptr;
+  {
+    Mutex::Locker l(buffer_lock);
+    buffered_extents.clear();
+    t = buffered_task_head;
+    buffered_task_head = nullptr;
+  }
+  while (t) {
+    rte_free(t->buf);
+    rte_mempool_put(task_pool, t);
+    t = t->next;
+  }
+  utime_t lat = ceph_clock_now(g_ceph_context);
+  lat -= start;
+  driver->logger->tinc(l_bluestore_nvmedevice_flush_lat, lat);
+  return 0;
+  // nvme device will cause terriable performance degraded
+  // while issuing flush command
+  /*
+  Task *t;
+  int r = rte_mempool_get(task_pool, (void **)&t);
+  if (r < 0) {
+    derr << __func__ << " task_pool rte_mempool_get failed" << dendl;
+    return r;
+  }
+
+  t->start = ceph_clock_now(g_ceph_context);
+  IOContext ioc(nullptr);
+  t->buf = nullptr;
+  t->ctx = &ioc;
+  t->command = IOCommand::FLUSH_COMMAND;
+  t->offset = 0;
+  t->len = 0;
+  t->device = this;
+  t->return_code = 1;
+  t->next = nullptr;
+  driver->queue_task(t);
+
+  {
+    Mutex::Locker l(ioc.lock);
+    while (t->return_code > 0)
+      ioc.cond.Wait(ioc.lock);
+  }
+  r = t->return_code;
+  rte_mempool_put(task_pool, t);
+  return 0;
+   */
+}
+
+void NVMEDevice::aio_submit(IOContext *ioc)
+{
+  dout(20) << __func__ << " ioc " << ioc << " pending "
+           << ioc->num_pending.read() << " running "
+           << ioc->num_running.read() << dendl;
+  int pending = ioc->num_pending.read();
+  Task *t = static_cast<Task*>(ioc->nvme_task_first);
+  if (pending && t) {
+    ioc->num_running.add(pending);
+    ioc->num_pending.sub(pending);
+    assert(ioc->num_pending.read() == 0);  // we should be only thread doing this
+    // Only need to push the first entry
+    driver->queue_task(t, pending);
+    ioc->nvme_task_first = ioc->nvme_task_last = nullptr;
+  }
+}
+
+int NVMEDevice::aio_write(
+    uint64_t off,
+    bufferlist &bl,
+    IOContext *ioc,
+    bool buffered)
+{
+  uint64_t len = bl.length();
+  dout(20) << __func__ << " " << off << "~" << len << " ioc " << ioc
+           << " buffered " << buffered << dendl;
+  assert(off % block_size == 0);
+  assert(len % block_size == 0);
+  assert(len > 0);
+  assert(off < size);
+  assert(off + len <= size);
+
+  Task *t;
+  int r = rte_mempool_get(task_pool, (void **)&t);
+  if (r < 0) {
+    return r;
+  }
+  t->start = ceph_clock_now(g_ceph_context);
+
+  t->buf = rte_malloc(NULL, len, block_size);
+  if (t->buf == NULL) {
+    derr << __func__ << " task->buf rte_malloc failed" << dendl;
+    rte_mempool_put(task_pool, t);
+    return -ENOMEM;
+  }
+  bl.copy(0, len, static_cast<char*>(t->buf));
+
+  t->command = IOCommand::WRITE_COMMAND;
+  t->offset = off;
+  t->len = len;
+  t->device = this;
+  t->return_code = 0;
+  t->next = nullptr;
+
+  if (buffered) {
+    t->ctx = nullptr;
+    // Only need to push the first entry
+    driver->queue_task(t);
+    Mutex::Locker l(buffer_lock);
+    buffered_extents.insert(off, len, (char*)t->buf);
+  } else {
+    t->ctx = ioc;
+    Task *first = static_cast<Task*>(ioc->nvme_task_first);
+    Task *last = static_cast<Task*>(ioc->nvme_task_last);
+    if (last)
+      last->next = t;
+    if (!first)
+      ioc->nvme_task_first = t;
+    ioc->nvme_task_last = t;
+    ioc->num_pending.inc();
+  }
+
+  dout(5) << __func__ << " " << off << "~" << len << dendl;
+
+  return 0;
+}
+
+int NVMEDevice::aio_zero(
+    uint64_t off,
+    uint64_t len,
+    IOContext *ioc)
+{
+  dout(5) << __func__ << " " << off << "~" << len << dendl;
+  assert(off % block_size == 0);
+  assert(len % block_size == 0);
+  assert(len > 0);
+  assert(off < size);
+  assert(off + len <= size);
+
+  bufferlist bl;
+  while (len > 0) {
+    bufferlist t;
+    t.append(zeros, 0, MIN(zeros.length(), len));
+    len -= t.length();
+    bl.claim_append(t);
+  }
+  bufferlist foo;
+  // note: this works with aio only becaues the actual buffer is
+  // this->zeros, which is page-aligned and never freed.
+  return aio_write(off, bl, ioc, false);
+}
+
+int NVMEDevice::read(uint64_t off, uint64_t len, bufferlist *pbl,
+                     IOContext *ioc,
+                     bool buffered)
+{
+  dout(5) << __func__ << " " << off << "~" << len << " ioc " << ioc << dendl;
+  assert(off % block_size == 0);
+  assert(len % block_size == 0);
+  assert(len > 0);
+  assert(off < size);
+  assert(off + len <= size);
+
+  Task *t;
+  int r = rte_mempool_get(task_pool, (void **)&t);
+  if (r < 0) {
+    derr << __func__ << " task_pool rte_mempool_get failed" << dendl;
+    return r;
+  }
+  t->start = ceph_clock_now(g_ceph_context);
+
+  bufferptr p = buffer::create_page_aligned(len);
+  t->buf = rte_malloc(NULL, len, block_size);
+  if (t->buf == NULL) {
+    derr << __func__ << " task->buf rte_malloc failed" << dendl;
+    r = -ENOMEM;
+    goto out;
+  }
+  t->ctx = ioc;
+  t->command = IOCommand::READ_COMMAND;
+  t->offset = off;
+  t->len = len;
+  t->device = this;
+  t->return_code = 1;
+  t->next = nullptr;
+  ioc->num_reading.inc();;
+  driver->queue_task(t);
+
+  {
+    Mutex::Locker l(ioc->lock);
+    while (t->return_code > 0)
+      ioc->cond.Wait(ioc->lock);
+  }
+  memcpy(p.c_str(), t->buf, len);
+  {
+    Mutex::Locker l(buffer_lock);
+    uint64_t copied = buffered_extents.read_overlap(off, len, (char*)t->buf);
+    dout(10) << __func__ << " read from buffer " << copied << dendl;
+  }
+  pbl->clear();
+  pbl->push_back(std::move(p));
+  r = t->return_code;
+  rte_free(t->buf);
+
+ out:
+  rte_mempool_put(task_pool, t);
+  if (ioc->num_waiting.read()) {
+    dout(20) << __func__ << " waking waiter" << dendl;
+    Mutex::Locker l(ioc->lock);
+    ioc->cond.Signal();
+  }
+  return r;
+}
+
+int NVMEDevice::read_buffered(uint64_t off, uint64_t len, char *buf)
+{
+  assert(len > 0);
+  assert(off < size);
+  assert(off + len <= size);
+
+  uint64_t aligned_off = align_down(off, block_size);
+  uint64_t aligned_len = align_up(off+len, block_size) - aligned_off;
+  dout(5) << __func__ << " " << off << "~" << len
+          << " aligned " << aligned_off << "~" << aligned_len << dendl;
+  IOContext ioc(nullptr);
+  Task *t;
+  int r = rte_mempool_get(task_pool, (void **)&t);
+  if (r < 0) {
+    derr << __func__ << " task_pool rte_mempool_get failed" << dendl;
+    return r;
+  }
+  t->start = ceph_clock_now(g_ceph_context);
+  t->buf = rte_malloc(NULL, aligned_len, block_size);
+  if (t->buf == NULL) {
+    derr << __func__ << " task->buf rte_malloc failed" << dendl;
+    r = -ENOMEM;
+    rte_mempool_put(task_pool, t);
+    return r;
+  }
+  t->ctx = &ioc;
+  t->command = IOCommand::READ_COMMAND;
+  t->offset = aligned_off;
+  t->len = aligned_len;
+  t->device = this;
+  t->return_code = 1;
+  t->next = nullptr;
+  ioc.num_reading.inc();;
+  driver->queue_task(t);
+
+  {
+    Mutex::Locker l(ioc.lock);
+    while (t->return_code > 0)
+      ioc.cond.Wait(ioc.lock);
+  }
+  memcpy(buf, (char*)t->buf+off-aligned_off, len);
+  {
+    Mutex::Locker l(buffer_lock);
+    uint64_t copied = buffered_extents.read_overlap(off, len, buf);
+    dout(10) << __func__ << " read from buffer " << copied << dendl;
+  }
+  r = t->return_code;
+  rte_free(t->buf);
+  rte_mempool_put(task_pool, t);
+
+  return r;
+}
+
+int NVMEDevice::invalidate_cache(uint64_t off, uint64_t len)
+{
+  dout(5) << __func__ << " " << off << "~" << len << dendl;
+  return 0;
+}
diff --git a/src/os/bluestore/NVMEDevice.h b/src/os/bluestore/NVMEDevice.h
new file mode 100644
index 0000000..dca5837
--- /dev/null
+++ b/src/os/bluestore/NVMEDevice.h
@@ -0,0 +1,260 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+/*
+ * Ceph - scalable distributed file system
+  *
+ * Copyright (C) 2015 XSky <haomai at xsky.com>
+ *
+ * Author: Haomai Wang <haomaiwang at gmail.com>
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation.  See file COPYING.
+ *
+ */
+
+#ifndef CEPH_OS_BLUESTORE_NVMEDEVICE
+#define CEPH_OS_BLUESTORE_NVMEDEVICE
+
+#include <queue>
+#include <map>
+#include <pciaccess.h>
+#include <limits>
+
+// since _Static_assert introduced in c11
+#define _Static_assert static_assert
+
+
+#include "include/atomic.h"
+#include "include/interval_set.h"
+#include "include/utime.h"
+#include "common/Mutex.h"
+#include "BlockDevice.h"
+
+enum class IOCommand {
+  READ_COMMAND,
+  WRITE_COMMAND,
+  FLUSH_COMMAND
+};
+
+class NVMEDevice;
+
+struct Task {
+  NVMEDevice *device;
+  IOContext *ctx;
+  IOCommand command;
+  uint64_t offset, len;
+  void *buf;
+  Task *next;
+  int64_t return_code;
+  utime_t start;
+};
+
+class PerfCounters;
+class SharedDriverData;
+
+class NVMEDevice : public BlockDevice {
+  /**
+   * points to pinned, physically contiguous memory region;
+   * contains 4KB IDENTIFY structure for controller which is
+   *  target for CONTROLLER IDENTIFY command during initialization
+   */
+  SharedDriverData *driver;
+  string name;
+
+  uint64_t size;
+  uint64_t block_size;
+
+  bool aio_stop;
+  bufferptr zeros;
+
+  struct BufferedExtents {
+    struct Extent {
+      uint64_t x_len;
+      uint64_t x_off;
+      const char *data;
+      uint64_t data_len;
+    };
+    using Offset = uint64_t;
+    map<Offset, Extent> buffered_extents;
+    uint64_t left_edge = std::numeric_limits<uint64_t>::max();
+    uint64_t right_edge = 0;
+
+    void verify() {
+      interval_set<uint64_t> m;
+      for (auto && it : buffered_extents) {
+        assert(!m.intersects(it.first, it.second.x_len));
+        m.insert(it.first, it.second.x_len);
+      }
+    }
+
+    void insert(uint64_t off, uint64_t len, const char *data) {
+      auto it = buffered_extents.lower_bound(off);
+      if (it != buffered_extents.begin()) {
+        --it;
+        if (it->first + it->second.x_len <= off)
+          ++it;
+      }
+      uint64_t end = off + len;
+      if (off < left_edge)
+        left_edge = off;
+      if (end > right_edge)
+        right_edge = end;
+      while (it != buffered_extents.end()) {
+        if (it->first >= end)
+          break;
+        uint64_t extent_it_end = it->first + it->second.x_len;
+        assert(extent_it_end >= off);
+        if (it->first <= off) {
+          if (extent_it_end > end) {
+            //         <-     data    ->
+            // <-            it           ->
+            it->second.x_len -= (extent_it_end - off);
+            buffered_extents[end] = Extent{
+                extent_it_end - end, it->second.x_off + it->second.x_len + len, it->second.data, it->second.data_len};
+          } else {
+            //         <-     data    ->
+            // <-     it    ->
+            assert(extent_it_end <= end);
+            it->second.x_len -= (extent_it_end - off);
+          }
+          ++it;
+        } else {
+          assert(it->first > off) ;
+          if (extent_it_end > end) {
+            //  <-     data    ->
+            //      <-           it          ->
+            uint64_t overlap = end - it->first;
+            buffered_extents[end] = Extent{
+                it->second.x_len - overlap, it->second.x_off + overlap, it->second.data, it->second.data_len};
+          } else {
+            //  <-     data    ->
+            //      <- it ->
+          }
+          buffered_extents.erase(it++);
+        }
+      }
+      buffered_extents[off] = Extent{
+          len, 0, data, len};
+
+      if (0)
+        verify();
+    }
+
+    void memcpy_check(char *dst, uint64_t dst_raw_len, uint64_t dst_off,
+                      map<Offset, Extent>::iterator &it, uint64_t src_off, uint64_t copylen) {
+      if (0) {
+        assert(dst_off + copylen <= dst_raw_len);
+        assert(it->second.x_off + src_off + copylen <= it->second.data_len);
+      }
+      memcpy(dst + dst_off, it->second.data + it->second.x_off + src_off, copylen);
+    }
+
+    uint64_t read_overlap(uint64_t off, uint64_t len, char *buf) {
+      uint64_t end = off + len;
+      if (end <= left_edge || off >= right_edge)
+        return 0;
+
+      uint64_t copied = 0;
+      auto it = buffered_extents.lower_bound(off);
+      if (it != buffered_extents.begin()) {
+        --it;
+        if (it->first + it->second.x_len <= off)
+          ++it;
+      }
+      uint64_t copy_len;
+      while (it != buffered_extents.end()) {
+        if (it->first >= end)
+          break;
+        uint64_t extent_it_end = it->first + it->second.x_len;
+        assert(extent_it_end >= off);
+        if (it->first >= off) {
+          if (extent_it_end > end) {
+            //  <-     data    ->
+            //      <-           it          ->
+            copy_len = len - (it->first - off);
+            memcpy_check(buf, len, it->first - off, it, 0, copy_len);
+          } else {
+            //  <-     data    ->
+            //      <- it ->
+            copy_len = it->second.x_len;
+            memcpy_check(buf, len, it->first - off, it, 0, copy_len);
+          }
+        } else {
+          if (extent_it_end > end) {
+            //         <-     data    ->
+            // <-           it          ->
+            copy_len = len;
+            memcpy_check(buf, len, 0, it, off - it->first, copy_len);
+          } else {
+            //         <-     data    ->
+            // <-     it    ->
+            assert(extent_it_end <= end);
+            copy_len = it->first + it->second.x_len - off;
+            memcpy_check(buf, len, 0, it, off - it->first, copy_len);
+          }
+        }
+        copied += copy_len;
+        ++it;
+      }
+      return copied;
+    }
+
+    void clear() {
+      buffered_extents.clear();
+      left_edge = std::numeric_limits<uint64_t>::max();
+      right_edge = 0;
+    }
+  };
+  Mutex buffer_lock;
+  BufferedExtents buffered_extents;
+  Task *buffered_task_head = nullptr;
+
+  static void init();
+ public:
+  void queue_buffer_task(Task *t) {
+    Mutex::Locker l(buffer_lock);
+    assert(t->next == nullptr);
+    t->next = buffered_task_head;
+    buffered_task_head = t;
+  }
+
+  SharedDriverData *get_driver() { return driver; }
+
+ public:
+  aio_callback_t aio_callback;
+  void *aio_callback_priv;
+
+  NVMEDevice(aio_callback_t cb, void *cbpriv);
+
+  bool supported_bdev_label() override { return false; }
+
+  void aio_submit(IOContext *ioc) override;
+
+  uint64_t get_size() const override {
+    return size;
+  }
+  uint64_t get_block_size() const override {
+    return block_size;
+  }
+
+  int read(uint64_t off, uint64_t len, bufferlist *pbl,
+           IOContext *ioc,
+           bool buffered) override;
+
+  int aio_write(uint64_t off, bufferlist& bl,
+                IOContext *ioc,
+                bool buffered) override ;
+  int aio_zero(uint64_t off, uint64_t len,
+               IOContext *ioc) override;
+  int flush() override;
+  int read_buffered(uint64_t off, uint64_t len, char *buf) override;
+
+  // for managing buffered readers/writers
+  int invalidate_cache(uint64_t off, uint64_t len) override;
+  int open(string path) override;
+  void close() override;
+};
+
+#endif
diff --git a/src/os/bluestore/StupidAllocator.cc b/src/os/bluestore/StupidAllocator.cc
old mode 100644
new mode 100755
index d96e945..4d8c5f1
--- a/src/os/bluestore/StupidAllocator.cc
+++ b/src/os/bluestore/StupidAllocator.cc
@@ -10,8 +10,7 @@
 #define dout_prefix *_dout << "stupidalloc "
 
 StupidAllocator::StupidAllocator()
-  : lock("StupicAllocator::lock"),
-    num_free(0),
+  : num_free(0),
     num_uncommitted(0),
     num_committing(0),
     num_reserved(0),
@@ -54,7 +53,7 @@ void StupidAllocator::_insert_free(uint64_t off, uint64_t len)
 
 int StupidAllocator::reserve(uint64_t need)
 {
-  Mutex::Locker l(lock);
+  std::lock_guard<std::mutex> l(lock);
   dout(10) << __func__ << " need " << need << " num_free " << num_free
 	   << " num_reserved " << num_reserved << dendl;
   if ((int64_t)need > num_free - num_reserved)
@@ -65,15 +64,15 @@ int StupidAllocator::reserve(uint64_t need)
 
 void StupidAllocator::unreserve(uint64_t unused)
 {
-  Mutex::Locker l(lock);
+  std::lock_guard<std::mutex> l(lock);
   dout(10) << __func__ << " unused " << unused << " num_free " << num_free
 	   << " num_reserved " << num_reserved << dendl;
-  assert((int64_t)unused >= num_reserved);
+  assert(num_reserved >= (int64_t)unused);
   num_reserved -= unused;
 }
 
 /// return the effective length of the extent if we align to alloc_unit
-static uint64_t aligned_len(interval_set<uint64_t>::iterator p,
+static uint64_t aligned_len(btree_interval_set<uint64_t>::iterator p,
 			    uint64_t alloc_unit)
 {
   uint64_t skew = p.get_start() % alloc_unit;
@@ -89,7 +88,7 @@ int StupidAllocator::allocate(
   uint64_t need_size, uint64_t alloc_unit, int64_t hint,
   uint64_t *offset, uint32_t *length)
 {
-  Mutex::Locker l(lock);
+  std::lock_guard<std::mutex> l(lock);
   dout(10) << __func__ << " need_size " << need_size
 	   << " alloc_unit " << alloc_unit
 	   << " hint " << hint
@@ -98,7 +97,7 @@ int StupidAllocator::allocate(
   int bin = _choose_bin(want);
   int orig_bin = bin;
 
-  interval_set<uint64_t>::iterator p = free[0].begin();
+  auto p = free[0].begin();
 
   if (!hint)
     hint = last_alloc;
@@ -204,7 +203,7 @@ int StupidAllocator::allocate(
 int StupidAllocator::release(
   uint64_t offset, uint64_t length)
 {
-  Mutex::Locker l(lock);
+  std::lock_guard<std::mutex> l(lock);
   dout(10) << __func__ << " " << offset << "~" << length << dendl;
   uncommitted.insert(offset, length);
   num_uncommitted += length;
@@ -213,17 +212,17 @@ int StupidAllocator::release(
 
 uint64_t StupidAllocator::get_free()
 {
-  Mutex::Locker l(lock);
+  std::lock_guard<std::mutex> l(lock);
   return num_free;
 }
 
 void StupidAllocator::dump(ostream& out)
 {
-  Mutex::Locker l(lock);
+  std::lock_guard<std::mutex> l(lock);
   for (unsigned bin = 0; bin < free.size(); ++bin) {
     dout(30) << __func__ << " free bin " << bin << ": "
 	     << free[bin].num_intervals() << " extents" << dendl;
-    for (interval_set<uint64_t>::iterator p = free[bin].begin();
+    for (auto p = free[bin].begin();
 	 p != free[bin].end();
 	 ++p) {
       dout(30) << __func__ << "  " << p.get_start() << "~" << p.get_len() << dendl;
@@ -231,14 +230,14 @@ void StupidAllocator::dump(ostream& out)
   }
   dout(30) << __func__ << " committing: "
 	   << committing.num_intervals() << " extents" << dendl;
-  for (interval_set<uint64_t>::iterator p = committing.begin();
+  for (auto p = committing.begin();
        p != committing.end();
        ++p) {
     dout(30) << __func__ << "  " << p.get_start() << "~" << p.get_len() << dendl;
   }
   dout(30) << __func__ << " uncommitted: "
 	   << uncommitted.num_intervals() << " extents" << dendl;
-  for (interval_set<uint64_t>::iterator p = uncommitted.begin();
+  for (auto p = uncommitted.begin();
        p != uncommitted.end();
        ++p) {
     dout(30) << __func__ << "  " << p.get_start() << "~" << p.get_len() << dendl;
@@ -247,7 +246,7 @@ void StupidAllocator::dump(ostream& out)
 
 void StupidAllocator::init_add_free(uint64_t offset, uint64_t length)
 {
-  Mutex::Locker l(lock);
+  std::lock_guard<std::mutex> l(lock);
   dout(10) << __func__ << " " << offset << "~" << length << dendl;
   _insert_free(offset, length);
   num_free += length;
@@ -255,12 +254,12 @@ void StupidAllocator::init_add_free(uint64_t offset, uint64_t length)
 
 void StupidAllocator::init_rm_free(uint64_t offset, uint64_t length)
 {
-  Mutex::Locker l(lock);
+  std::lock_guard<std::mutex> l(lock);
   dout(10) << __func__ << " " << offset << "~" << length << dendl;
-  interval_set<uint64_t> rm;
+  btree_interval_set<uint64_t> rm;
   rm.insert(offset, length);
   for (unsigned i = 0; i < free.size() && !rm.empty(); ++i) {
-    interval_set<uint64_t> overlap;
+    btree_interval_set<uint64_t> overlap;
     overlap.intersection_of(rm, free[i]);
     if (!overlap.empty()) {
       dout(20) << __func__ << " bin " << i << " rm " << overlap << dendl;
@@ -281,7 +280,7 @@ void StupidAllocator::shutdown()
 
 void StupidAllocator::commit_start()
 {
-  Mutex::Locker l(lock);
+  std::lock_guard<std::mutex> l(lock);
   dout(10) << __func__ << " releasing " << num_uncommitted
 	   << " in extents " << uncommitted.num_intervals() << dendl;
   assert(committing.empty());
@@ -292,10 +291,10 @@ void StupidAllocator::commit_start()
 
 void StupidAllocator::commit_finish()
 {
-  Mutex::Locker l(lock);
+  std::lock_guard<std::mutex> l(lock);
   dout(10) << __func__ << " released " << num_committing
 	   << " in extents " << committing.num_intervals() << dendl;
-  for (interval_set<uint64_t>::iterator p = committing.begin();
+  for (auto p = committing.begin();
        p != committing.end();
        ++p) {
     _insert_free(p.get_start(), p.get_len());
diff --git a/src/os/bluestore/StupidAllocator.h b/src/os/bluestore/StupidAllocator.h
index ec71b86..1b09ace 100644
--- a/src/os/bluestore/StupidAllocator.h
+++ b/src/os/bluestore/StupidAllocator.h
@@ -4,21 +4,22 @@
 #ifndef CEPH_OS_BLUESTORE_STUPIDALLOCATOR_H
 #define CEPH_OS_BLUESTORE_STUPIDALLOCATOR_H
 
+#include <mutex>
+
 #include "Allocator.h"
-#include "include/interval_set.h"
-#include "common/Mutex.h"
+#include "include/btree_interval_set.h"
 
 class StupidAllocator : public Allocator {
-  Mutex lock;
+  std::mutex lock;
 
   int64_t num_free;     ///< total bytes in freelist
   int64_t num_uncommitted;
   int64_t num_committing;
   int64_t num_reserved; ///< reserved bytes
 
-  vector<interval_set<uint64_t> > free;        ///< leading-edge copy
-  interval_set<uint64_t> uncommitted; ///< released but not yet usable
-  interval_set<uint64_t> committing;  ///< released but not yet usable
+  std::vector<btree_interval_set<uint64_t> > free;        ///< leading-edge copy
+  btree_interval_set<uint64_t> uncommitted; ///< released but not yet usable
+  btree_interval_set<uint64_t> committing;  ///< released but not yet usable
 
   uint64_t last_alloc;
 
@@ -44,7 +45,7 @@ public:
 
   uint64_t get_free();
 
-  void dump(ostream& out);
+  void dump(std::ostream& out);
 
   void init_add_free(uint64_t offset, uint64_t length);
   void init_rm_free(uint64_t offset, uint64_t length);
diff --git a/src/os/bluestore/bluefs_types.cc b/src/os/bluestore/bluefs_types.cc
index 8b8a326..a1eda91 100644
--- a/src/os/bluestore/bluefs_types.cc
+++ b/src/os/bluestore/bluefs_types.cc
@@ -39,7 +39,7 @@ void bluefs_extent_t::generate_test_instances(list<bluefs_extent_t*>& ls)
   ls.push_back(new bluefs_extent_t);
   ls.back()->offset = 1;
   ls.back()->length = 2;
-  ls.back()->bdev = 3;
+  ls.back()->bdev = 1;
 }
 
 ostream& operator<<(ostream& out, bluefs_extent_t e)
@@ -90,7 +90,7 @@ void bluefs_super_t::generate_test_instances(list<bluefs_super_t*>& ls)
 
 ostream& operator<<(ostream& out, const bluefs_super_t& s)
 {
-  return out << "super(" << s.uuid
+  return out << "super(uuid " << s.uuid
 	     << " osd " << s.osd_uuid
 	     << " v " << s.version
 	     << " block_size " << s.block_size
@@ -163,7 +163,7 @@ void bluefs_fnode_t::generate_test_instances(list<bluefs_fnode_t*>& ls)
 
 ostream& operator<<(ostream& out, const bluefs_fnode_t& file)
 {
-  return out << "file(" << file.ino
+  return out << "file(ino " << file.ino
 	     << " size " << file.size
 	     << " mtime " << file.mtime
 	     << " bdev " << (int)file.prefer_bdev
@@ -218,16 +218,18 @@ void bluefs_transaction_t::generate_test_instance(
   ls.back()->op_alloc_rm(1, 0, 123);
   ls.back()->op_dir_create("dir");
   ls.back()->op_dir_create("dir2");
-  ls.back()->op_dir_link("dir", "file1", 1);
-  ls.back()->op_dir_unlink("dir", "oldfile");
-  ls.back()->op_file_update(bluefs_fnode_t());
-  ls.back()->op_dir_remove("dir3");
+  bluefs_fnode_t fnode;
+  fnode.ino = 2;
+  ls.back()->op_file_update(fnode);
+  ls.back()->op_dir_link("dir", "file1", 2);
+  ls.back()->op_dir_unlink("dir", "file1");
   ls.back()->op_file_remove(2);
+  ls.back()->op_dir_remove("dir2");
 }
 
 ostream& operator<<(ostream& out, const bluefs_transaction_t& t)
 {
-  return out << "txn(" << t.seq
+  return out << "txn(seq " << t.seq
 	     << " len " << t.op_bl.length()
 	     << " crc " << t.op_bl.crc32c(-1)
 	     << ")";
diff --git a/src/os/bluestore/bluestore_types.h b/src/os/bluestore/bluestore_types.h
index c07412d..9fbc007 100644
--- a/src/os/bluestore/bluestore_types.h
+++ b/src/os/bluestore/bluestore_types.h
@@ -45,7 +45,7 @@ ostream& operator<<(ostream& out, const bluestore_bdev_label_t& l);
 struct bluestore_cnode_t {
   uint32_t bits;   ///< how many bits of coll pgid are significant
 
-  bluestore_cnode_t(int b=0) : bits(b) {}
+  explicit bluestore_cnode_t(int b=0) : bits(b) {}
 
   void encode(bufferlist& bl) const;
   void decode(bufferlist::iterator& p);
@@ -285,7 +285,7 @@ struct bluestore_wal_transaction_t {
 
   int64_t _bytes;  ///< cached byte count
 
-  bluestore_wal_transaction_t() : _bytes(-1) {}
+  bluestore_wal_transaction_t() : seq(0), _bytes(-1) {}
 
 #if 0
   no users for htis
diff --git a/src/os/filestore/BtrfsFileStoreBackend.h b/src/os/filestore/BtrfsFileStoreBackend.h
index 9bc878f..2933eb1 100644
--- a/src/os/filestore/BtrfsFileStoreBackend.h
+++ b/src/os/filestore/BtrfsFileStoreBackend.h
@@ -29,7 +29,7 @@ private:
   bool m_filestore_btrfs_clone_range;
   bool m_filestore_btrfs_snap;
 public:
-  BtrfsFileStoreBackend(FileStore *fs);
+  explicit BtrfsFileStoreBackend(FileStore *fs);
   ~BtrfsFileStoreBackend() {}
   const char *get_name() {
     return "btrfs";
diff --git a/src/os/filestore/CollectionIndex.h b/src/os/filestore/CollectionIndex.h
index a9947cc..0d1fc30 100644
--- a/src/os/filestore/CollectionIndex.h
+++ b/src/os/filestore/CollectionIndex.h
@@ -57,14 +57,14 @@ protected:
     /// Debugging Constructor
     Path(
       string path,                              ///< [in] Path to return.
-      coll_t coll)                              ///< [in] collection
+      const coll_t& coll)                              ///< [in] collection
       : full_path(path), parent_coll(coll) {}
 
     /// Getter for the stored path.
     const char *path() const { return full_path.c_str(); }
 
     /// Getter for collection
-    coll_t coll() const { return parent_coll; }
+    const coll_t& coll() const { return parent_coll; }
 
     /// Getter for parent
     CollectionIndex* get_index() const {
@@ -73,12 +73,11 @@ protected:
   };
  public:
 
-  string access_lock_name;
   RWLock access_lock;
   /// Type of returned paths
   typedef ceph::shared_ptr<Path> IndexedPath;
 
-  static IndexedPath get_testing_path(string path, coll_t collection) {
+  static IndexedPath get_testing_path(string path, const coll_t& collection) {
     return IndexedPath(new Path(path, collection));
   }
 
@@ -176,9 +175,8 @@ protected:
   /// Call prior to removing directory
   virtual int prep_delete() { return 0; }
 
-  CollectionIndex(coll_t collection):
-    access_lock_name ("CollectionIndex::access_lock::" + collection.to_str()),
-    access_lock(access_lock_name.c_str()) {}
+  explicit CollectionIndex(const coll_t& collection):
+    access_lock("CollectionIndex::access_lock", true, false) {}
 
   /*
    * Pre-hash the collection, this collection should map to a PG folder.
diff --git a/src/os/filestore/DBObjectMap.h b/src/os/filestore/DBObjectMap.h
index 1b57485..400c54a 100644
--- a/src/os/filestore/DBObjectMap.h
+++ b/src/os/filestore/DBObjectMap.h
@@ -83,7 +83,7 @@ public:
     MapHeaderLock(const MapHeaderLock &);
     MapHeaderLock &operator=(const MapHeaderLock &);
   public:
-    MapHeaderLock(DBObjectMap *db) : db(db) {}
+    explicit MapHeaderLock(DBObjectMap *db) : db(db) {}
     MapHeaderLock(DBObjectMap *db, const ghobject_t &oid) : db(db), locked(oid) {
       Mutex::Locker l(db->header_lock);
       while (db->map_header_in_use.count(*locked))
@@ -115,9 +115,9 @@ public:
     }
   };
 
-  DBObjectMap(KeyValueDB *db) : db(db), header_lock("DBOBjectMap"),
-                                cache_lock("DBObjectMap::CacheLock"),
-                                caches(g_conf->filestore_omap_header_cache_size)
+  explicit DBObjectMap(KeyValueDB *db) : db(db), header_lock("DBOBjectMap"),
+           	                         cache_lock("DBObjectMap::CacheLock"),
+      	                                 caches(g_conf->filestore_omap_header_cache_size)
     {}
 
   int set_keys(
@@ -241,7 +241,7 @@ public:
     __u8 v;
     uint64_t seq;
     State() : v(0), seq(1) {}
-    State(uint64_t seq) : v(0), seq(seq) {}
+    explicit State(uint64_t seq) : v(0), seq(seq) {}
 
     void encode(bufferlist &bl) const {
       ENCODE_START(2, 1, bl);
@@ -516,7 +516,7 @@ private:
   class RemoveOnDelete {
   public:
     DBObjectMap *db;
-    RemoveOnDelete(DBObjectMap *db) :
+    explicit RemoveOnDelete(DBObjectMap *db) :
       db(db) {}
     void operator() (_Header *header) {
       Mutex::Locker l(db->header_lock);
diff --git a/src/os/filestore/FDCache.h b/src/os/filestore/FDCache.h
index 635043b..566e65a 100644
--- a/src/os/filestore/FDCache.h
+++ b/src/os/filestore/FDCache.h
@@ -38,7 +38,7 @@ public:
   class FD {
   public:
     const int fd;
-    FD(int _fd) : fd(_fd) {
+    explicit FD(int _fd) : fd(_fd) {
       assert(_fd >= 0);
     }
     int operator*() const {
@@ -55,7 +55,7 @@ private:
   SharedLRU<ghobject_t, FD, ghobject_t::BitwiseComparator> *registry;
 
 public:
-  FDCache(CephContext *cct) : cct(cct),
+  explicit FDCache(CephContext *cct) : cct(cct),
   registry_shards(cct->_conf->filestore_fd_cache_shards) {
     assert(cct);
     cct->_conf->add_observer(this);
diff --git a/src/os/filestore/FileJournal.cc b/src/os/filestore/FileJournal.cc
index fa4751b..7763c6f 100644
--- a/src/os/filestore/FileJournal.cc
+++ b/src/os/filestore/FileJournal.cc
@@ -540,7 +540,6 @@ int FileJournal::open(uint64_t fs_op_seq)
 	       << dendl;
       read_pos = -1;
       last_committed_seq = 0;
-      seq = 0;
       return 0;
     }
     if (seq == next_seq) {
@@ -767,7 +766,7 @@ int FileJournal::read_header(header_t *hdr) const
       memset(bpdata, 0, bp.length() - r);
   }
 
-  bl.push_back(bp);
+  bl.push_back(std::move(bp));
 
   try {
     bufferlist::iterator p = bl.begin();
@@ -1585,20 +1584,19 @@ void FileJournal::check_aio_completion()
 }
 #endif
 
-int FileJournal::prepare_entry(list<ObjectStore::Transaction*>& tls, bufferlist* tbl) {
+int FileJournal::prepare_entry(vector<ObjectStore::Transaction>& tls, bufferlist* tbl) {
   dout(10) << "prepare_entry " << tls << dendl;
   unsigned data_len = 0;
   int data_align = -1; // -1 indicates that we don't care about the alignment
   bufferlist bl;
-  for (list<ObjectStore::Transaction*>::iterator p = tls.begin();
+  for (vector<ObjectStore::Transaction>::iterator p = tls.begin();
       p != tls.end(); ++p) {
-    ObjectStore::Transaction *t = *p;
-    if (t->get_data_length() > data_len &&
-     (int)t->get_data_length() >= g_conf->journal_align_min_size) {
-     data_len = t->get_data_length();
-     data_align = (t->get_data_alignment() - bl.length()) & ~CEPH_PAGE_MASK;
+   if ((*p).get_data_length() > data_len &&
+     (int)(*p).get_data_length() >= g_conf->journal_align_min_size) {
+     data_len = (*p).get_data_length();
+     data_align = ((*p).get_data_alignment() - bl.length()) & ~CEPH_PAGE_MASK;
     }
-    ::encode(*t, bl);
+    ::encode(*p, bl);
   }
   if (tbl->length()) {
     bl.claim_append(*tbl);
@@ -1887,7 +1885,7 @@ void FileJournal::wrap_read_bl(
 	   << r << dendl;
       ceph_abort();
     }
-    bl->push_back(bp);
+    bl->push_back(std::move(bp));
     pos += len;
     olen -= len;
   }
diff --git a/src/os/filestore/FileJournal.h b/src/os/filestore/FileJournal.h
index 69935a6..92d8b4b 100644
--- a/src/os/filestore/FileJournal.h
+++ b/src/os/filestore/FileJournal.h
@@ -98,7 +98,7 @@ public:
     completions.pop_front();
   }
 
-  int prepare_entry(list<ObjectStore::Transaction*>& tls, bufferlist* tbl);
+  int prepare_entry(vector<ObjectStore::Transaction>& tls, bufferlist* tbl);
 
   void submit_entry(uint64_t seq, bufferlist& bl, uint32_t orig_len,
 		    Context *oncommit,
@@ -347,7 +347,7 @@ private:
   class Writer : public Thread {
     FileJournal *journal;
   public:
-    Writer(FileJournal *fj) : journal(fj) {}
+    explicit Writer(FileJournal *fj) : journal(fj) {}
     void *entry() {
       journal->write_thread_entry();
       return 0;
@@ -357,7 +357,7 @@ private:
   class WriteFinisher : public Thread {
     FileJournal *journal;
   public:
-    WriteFinisher(FileJournal *fj) : journal(fj) {}
+    explicit WriteFinisher(FileJournal *fj) : journal(fj) {}
     void *entry() {
       journal->write_finish_thread_entry();
       return 0;
diff --git a/src/os/filestore/FileStore.cc b/src/os/filestore/FileStore.cc
index 5cb73e7..1390b02 100644
--- a/src/os/filestore/FileStore.cc
+++ b/src/os/filestore/FileStore.cc
@@ -150,20 +150,20 @@ ostream& operator<<(ostream& out, const FileStore::OpSequencer& s)
   return out << *s.parent;
 }
 
-int FileStore::get_cdir(coll_t cid, char *s, int len)
+int FileStore::get_cdir(const coll_t& cid, char *s, int len)
 {
   const string &cid_str(cid.to_str());
   return snprintf(s, len, "%s/current/%s", basedir.c_str(), cid_str.c_str());
 }
 
-int FileStore::get_index(coll_t cid, Index *index)
+int FileStore::get_index(const coll_t& cid, Index *index)
 {
   int r = index_manager.get_index(cid, basedir, index);
   assert(!m_filestore_fail_eio || r != -EIO);
   return r;
 }
 
-int FileStore::init_index(coll_t cid)
+int FileStore::init_index(const coll_t& cid)
 {
   char path[PATH_MAX];
   get_cdir(cid, path, sizeof(path));
@@ -189,7 +189,7 @@ int FileStore::lfn_find(const ghobject_t& oid, const Index& index, IndexedPath *
   return 0;
 }
 
-int FileStore::lfn_truncate(coll_t cid, const ghobject_t& oid, off_t length)
+int FileStore::lfn_truncate(const coll_t& cid, const ghobject_t& oid, off_t length)
 {
   FDRef fd;
   int r = lfn_open(cid, oid, false, &fd);
@@ -207,7 +207,7 @@ int FileStore::lfn_truncate(coll_t cid, const ghobject_t& oid, off_t length)
   return r;
 }
 
-int FileStore::lfn_stat(coll_t cid, const ghobject_t& oid, struct stat *buf)
+int FileStore::lfn_stat(const coll_t& cid, const ghobject_t& oid, struct stat *buf)
 {
   IndexedPath path;
   Index index;
@@ -227,7 +227,7 @@ int FileStore::lfn_stat(coll_t cid, const ghobject_t& oid, struct stat *buf)
   return r;
 }
 
-int FileStore::lfn_open(coll_t cid,
+int FileStore::lfn_open(const coll_t& cid,
 			const ghobject_t& oid,
 			bool create,
 			FDRef *outfd,
@@ -337,7 +337,7 @@ void FileStore::lfn_close(FDRef fd)
 {
 }
 
-int FileStore::lfn_link(coll_t c, coll_t newcid, const ghobject_t& o, const ghobject_t& newoid)
+int FileStore::lfn_link(const coll_t& c, const coll_t& newcid, const ghobject_t& o, const ghobject_t& newoid)
 {
   Index index_new, index_old;
   IndexedPath path_new, path_old;
@@ -439,7 +439,7 @@ int FileStore::lfn_link(coll_t c, coll_t newcid, const ghobject_t& o, const ghob
   return 0;
 }
 
-int FileStore::lfn_unlink(coll_t cid, const ghobject_t& o,
+int FileStore::lfn_unlink(const coll_t& cid, const ghobject_t& o,
 			  const SequencerPosition &spos,
 			  bool force_clear_omap)
 {
@@ -1164,7 +1164,7 @@ int FileStore::read_superblock()
   }
 
   bufferlist bl;
-  bl.push_back(bp);
+  bl.push_back(std::move(bp));
   bufferlist::iterator i = bl.begin();
   ::decode(superblock, i);
   return 0;
@@ -1186,7 +1186,7 @@ int FileStore::version_stamp_is_valid(uint32_t *version)
     return ret;
   }
   bufferlist bl;
-  bl.push_back(bp);
+  bl.push_back(std::move(bp));
   bufferlist::iterator i = bl.begin();
   ::decode(*version, i);
   dout(10) << __func__ << " was " << *version << " vs target "
@@ -1308,12 +1308,12 @@ int FileStore::mount()
   uint32_t version_stamp;
   ret = version_stamp_is_valid(&version_stamp);
   if (ret < 0) {
-    derr << "FileStore::mount : error in version_stamp_is_valid: "
+    derr << "FileStore::mount: error in version_stamp_is_valid: "
 	 << cpp_strerror(ret) << dendl;
     goto close_fsid_fd;
   } else if (ret == 0) {
     if (do_update || (int)version_stamp < g_conf->filestore_update_to) {
-      derr << "FileStore::mount : stale version stamp detected: "
+      derr << "FileStore::mount: stale version stamp detected: "
 	   << version_stamp
 	   << ". Proceeding, do_update "
 	   << "is set, performing disk format upgrade."
@@ -1321,7 +1321,7 @@ int FileStore::mount()
       do_update = true;
     } else {
       ret = -EINVAL;
-      derr << "FileStore::mount : stale version stamp " << version_stamp
+      derr << "FileStore::mount: stale version stamp " << version_stamp
 	   << ". Please run the FileStore update script before starting the "
 	   << "OSD, or set filestore_update_to to " << target_version
 	   << " (currently " << g_conf->filestore_update_to << ")"
@@ -1338,7 +1338,7 @@ int FileStore::mount()
 
   // Check if this FileStore supports all the necessary features to mount
   if (supported_compat_set.compare(superblock.compat_features) == -1) {
-    derr << "FileStore::mount : Incompatible features set "
+    derr << "FileStore::mount: Incompatible features set "
 	   << superblock.compat_features << dendl;
     ret = -EINVAL;
     goto close_fsid_fd;
@@ -1357,7 +1357,7 @@ int FileStore::mount()
   // test for btrfs, xattrs, etc.
   ret = _detect_fs();
   if (ret < 0) {
-    derr << "FileStore::mount : error in _detect_fs: "
+    derr << "FileStore::mount: error in _detect_fs: "
 	 << cpp_strerror(ret) << dendl;
     goto close_basedir_fd;
   }
@@ -1366,7 +1366,7 @@ int FileStore::mount()
     list<string> ls;
     ret = backend->list_checkpoints(ls);
     if (ret < 0) {
-      derr << "FileStore::mount : error in _list_snaps: "<< cpp_strerror(ret) << dendl;
+      derr << "FileStore::mount: error in _list_snaps: "<< cpp_strerror(ret) << dendl;
       goto close_basedir_fd;
     }
 
@@ -1774,22 +1774,22 @@ int FileStore::umount()
 
 /// -----------------------------
 
-FileStore::Op *FileStore::build_op(list<Transaction*>& tls,
+FileStore::Op *FileStore::build_op(vector<Transaction>& tls,
 				   Context *onreadable,
 				   Context *onreadable_sync,
 				   TrackedOpRef osd_op)
 {
   uint64_t bytes = 0, ops = 0;
-  for (list<Transaction*>::iterator p = tls.begin();
+  for (vector<Transaction>::iterator p = tls.begin();
        p != tls.end();
        ++p) {
-    bytes += (*p)->get_num_bytes();
-    ops += (*p)->get_num_ops();
+    bytes += (*p).get_num_bytes();
+    ops += (*p).get_num_ops();
   }
 
   Op *o = new Op;
   o->start = ceph_clock_now(g_ceph_context);
-  o->tls.swap(tls);
+  o->tls = std::move(tls);
   o->onreadable = onreadable;
   o->onreadable_sync = onreadable_sync;
   o->ops = ops;
@@ -1879,6 +1879,9 @@ void FileStore::_do_op(OpSequencer *osr, ThreadPool::TPHandle &handle)
   apply_manager.op_apply_finish(o->op);
   dout(10) << "_do_op " << o << " seq " << o->op << " r = " << r
 	   << ", finisher " << o->onreadable << " " << o->onreadable_sync << dendl;
+
+  o->tls.clear();
+
 }
 
 void FileStore::_finish_op(OpSequencer *osr)
@@ -1923,7 +1926,7 @@ struct C_JournaledAhead : public Context {
   }
 };
 
-int FileStore::queue_transactions(Sequencer *posr, list<Transaction*> &tls,
+int FileStore::queue_transactions(Sequencer *posr, vector<Transaction>& tls,
 				  TrackedOpRef osd_op,
 				  ThreadPool::TPHandle *handle)
 {
@@ -1956,8 +1959,8 @@ int FileStore::queue_transactions(Sequencer *posr, list<Transaction*> &tls,
   }
 
   // used to include osr information in tracepoints during transaction apply
-  for (list<ObjectStore::Transaction*>::iterator i = tls.begin(); i != tls.end(); ++i) {
-    (*i)->set_osr(osr);
+  for (vector<Transaction>::iterator i = tls.begin(); i != tls.end(); ++i) {
+    (*i).set_osr(osr);
   }
 
   if (journal && journal->is_writeable() && !m_filestore_journal_trailing) {
@@ -2078,16 +2081,16 @@ void FileStore::_journaled_ahead(OpSequencer *osr, Op *o, Context *ondisk)
 }
 
 int FileStore::_do_transactions(
-  list<Transaction*> &tls,
+  vector<Transaction> &tls,
   uint64_t op_seq,
   ThreadPool::TPHandle *handle)
 {
   int trans_num = 0;
 
-  for (list<Transaction*>::iterator p = tls.begin();
+  for (vector<Transaction>::iterator p = tls.begin();
        p != tls.end();
        ++p, trans_num++) {
-    _do_transaction(**p, op_seq, trans_num, handle);
+    _do_transaction(*p, op_seq, trans_num, handle);
     if (handle)
       handle->reset_tp_timeout();
   }
@@ -2095,7 +2098,7 @@ int FileStore::_do_transactions(
   return 0;
 }
 
-void FileStore::_set_global_replay_guard(coll_t cid,
+void FileStore::_set_global_replay_guard(const coll_t& cid,
 					 const SequencerPosition &spos)
 {
   if (backend->can_checkpoint())
@@ -2109,7 +2112,7 @@ void FileStore::_set_global_replay_guard(coll_t cid,
   }
   ret = sync_filesystem(basedir_fd);
   if (ret < 0) {
-    derr << __func__ << " :sync_filesytem error " << cpp_strerror(ret) << dendl;
+    derr << __func__ << " : sync_filesytem error " << cpp_strerror(ret) << dendl;
     assert(0 == "_set_global_replay_guard failed");
   }
 
@@ -2143,7 +2146,7 @@ void FileStore::_set_global_replay_guard(coll_t cid,
   dout(10) << __func__ << ": " << spos << " done" << dendl;
 }
 
-int FileStore::_check_global_replay_guard(coll_t cid,
+int FileStore::_check_global_replay_guard(const coll_t& cid,
 					  const SequencerPosition& spos)
 {
   char fn[PATH_MAX];
@@ -2174,7 +2177,7 @@ int FileStore::_check_global_replay_guard(coll_t cid,
 }
 
 
-void FileStore::_set_replay_guard(coll_t cid,
+void FileStore::_set_replay_guard(const coll_t& cid,
                                   const SequencerPosition &spos,
                                   bool in_progress=false)
 {
@@ -2231,7 +2234,7 @@ void FileStore::_set_replay_guard(int fd,
   dout(10) << "_set_replay_guard " << spos << " done" << dendl;
 }
 
-void FileStore::_close_replay_guard(coll_t cid,
+void FileStore::_close_replay_guard(const coll_t& cid,
                                     const SequencerPosition &spos)
 {
   char fn[PATH_MAX];
@@ -2274,7 +2277,7 @@ void FileStore::_close_replay_guard(int fd, const SequencerPosition& spos)
   dout(10) << "_close_replay_guard " << spos << " done" << dendl;
 }
 
-int FileStore::_check_replay_guard(coll_t cid, ghobject_t oid, const SequencerPosition& spos)
+int FileStore::_check_replay_guard(const coll_t& cid, ghobject_t oid, const SequencerPosition& spos)
 {
   if (!replaying || backend->can_checkpoint())
     return 1;
@@ -2294,7 +2297,7 @@ int FileStore::_check_replay_guard(coll_t cid, ghobject_t oid, const SequencerPo
   return ret;
 }
 
-int FileStore::_check_replay_guard(coll_t cid, const SequencerPosition& spos)
+int FileStore::_check_replay_guard(const coll_t& cid, const SequencerPosition& spos)
 {
   if (!replaying || backend->can_checkpoint())
     return 1;
@@ -2882,10 +2885,10 @@ void FileStore::_do_transaction(
 // --------------------
 // objects
 
-bool FileStore::exists(coll_t cid, const ghobject_t& oid)
+bool FileStore::exists(const coll_t& _cid, const ghobject_t& oid)
 {
-  tracepoint(objectstore, exists_enter, cid.c_str());
-  _kludge_temp_object_collection(cid, oid);
+  tracepoint(objectstore, exists_enter, _cid.c_str());
+  const coll_t& cid = !_need_temp_object_collection(_cid, oid) ? _cid : _cid.get_temp();
   struct stat st;
   bool retval = stat(cid, oid, &st) == 0;
   tracepoint(objectstore, exists_exit, retval);
@@ -2893,10 +2896,10 @@ bool FileStore::exists(coll_t cid, const ghobject_t& oid)
 }
 
 int FileStore::stat(
-  coll_t cid, const ghobject_t& oid, struct stat *st, bool allow_eio)
+  const coll_t& _cid, const ghobject_t& oid, struct stat *st, bool allow_eio)
 {
-  tracepoint(objectstore, stat_enter, cid.c_str());
-  _kludge_temp_object_collection(cid, oid);
+  tracepoint(objectstore, stat_enter, _cid.c_str());
+  const coll_t& cid = !_need_temp_object_collection(_cid, oid) ? _cid : _cid.get_temp();
   int r = lfn_stat(cid, oid, st);
   assert(allow_eio || !m_filestore_fail_eio || r != -EIO);
   if (r < 0) {
@@ -2917,7 +2920,7 @@ int FileStore::stat(
 }
 
 int FileStore::read(
-  coll_t cid,
+  const coll_t& _cid,
   const ghobject_t& oid,
   uint64_t offset,
   size_t len,
@@ -2926,8 +2929,8 @@ int FileStore::read(
   bool allow_eio)
 {
   int got;
-  tracepoint(objectstore, read_enter, cid.c_str(), offset, len);
-  _kludge_temp_object_collection(cid, oid);
+  tracepoint(objectstore, read_enter, _cid.c_str(), offset, len);
+  const coll_t& cid = !_need_temp_object_collection(_cid, oid) ? _cid : _cid.get_temp();
 
   dout(15) << "read " << cid << "/" << oid << " " << offset << "~" << len << dendl;
 
@@ -2959,11 +2962,14 @@ int FileStore::read(
   if (got < 0) {
     dout(10) << "FileStore::read(" << cid << "/" << oid << ") pread error: " << cpp_strerror(got) << dendl;
     lfn_close(fd);
-    assert(allow_eio || !m_filestore_fail_eio || got != -EIO);
+    if (!(allow_eio || !m_filestore_fail_eio || got != -EIO)) {
+      derr << "FileStore::read(" << cid << "/" << oid << ") pread error: " << cpp_strerror(got) << dendl;
+      assert(0 == "eio on pread");
+    }
     return got;
   }
   bptr.set_length(got);   // properly size the buffer
-  bl.push_back(bptr);   // put it in the target bufferlist
+  bl.push_back(std::move(bptr));   // put it in the target bufferlist
 
 #ifdef HAVE_POSIX_FADVISE
   if (op_flags & CEPH_OSD_OP_FLAG_FADVISE_DONTNEED)
@@ -3099,12 +3105,12 @@ int FileStore::_do_seek_hole_data(int fd, uint64_t offset, size_t len,
 #endif
 }
 
-int FileStore::fiemap(coll_t cid, const ghobject_t& oid,
+int FileStore::fiemap(const coll_t& _cid, const ghobject_t& oid,
                     uint64_t offset, size_t len,
                     bufferlist& bl)
 {
-  tracepoint(objectstore, fiemap_enter, cid.c_str(), offset, len);
-  _kludge_temp_object_collection(cid, oid);
+  tracepoint(objectstore, fiemap_enter, _cid.c_str(), offset, len);
+  const coll_t& cid = !_need_temp_object_collection(_cid, oid) ? _cid : _cid.get_temp();
 
   if ((!backend->has_seek_data_hole() && !backend->has_fiemap()) ||
       len <= (size_t)m_filestore_fiemap_threshold) {
@@ -3148,7 +3154,7 @@ done:
 }
 
 
-int FileStore::_remove(coll_t cid, const ghobject_t& oid,
+int FileStore::_remove(const coll_t& cid, const ghobject_t& oid,
 		       const SequencerPosition &spos)
 {
   dout(15) << "remove " << cid << "/" << oid << dendl;
@@ -3157,7 +3163,7 @@ int FileStore::_remove(coll_t cid, const ghobject_t& oid,
   return r;
 }
 
-int FileStore::_truncate(coll_t cid, const ghobject_t& oid, uint64_t size)
+int FileStore::_truncate(const coll_t& cid, const ghobject_t& oid, uint64_t size)
 {
   dout(15) << "truncate " << cid << "/" << oid << " size " << size << dendl;
   int r = lfn_truncate(cid, oid, size);
@@ -3166,7 +3172,7 @@ int FileStore::_truncate(coll_t cid, const ghobject_t& oid, uint64_t size)
 }
 
 
-int FileStore::_touch(coll_t cid, const ghobject_t& oid)
+int FileStore::_touch(const coll_t& cid, const ghobject_t& oid)
 {
   dout(15) << "touch " << cid << "/" << oid << dendl;
 
@@ -3181,7 +3187,7 @@ int FileStore::_touch(coll_t cid, const ghobject_t& oid)
   return r;
 }
 
-int FileStore::_write(coll_t cid, const ghobject_t& oid,
+int FileStore::_write(const coll_t& cid, const ghobject_t& oid,
                      uint64_t offset, size_t len,
                      const bufferlist& bl, uint32_t fadvise_flags)
 {
@@ -3236,7 +3242,7 @@ int FileStore::_write(coll_t cid, const ghobject_t& oid,
   return r;
 }
 
-int FileStore::_zero(coll_t cid, const ghobject_t& oid, uint64_t offset, size_t len)
+int FileStore::_zero(const coll_t& cid, const ghobject_t& oid, uint64_t offset, size_t len)
 {
   dout(15) << "zero " << cid << "/" << oid << " " << offset << "~" << len << dendl;
   int ret = 0;
@@ -3272,10 +3278,8 @@ int FileStore::_zero(coll_t cid, const ghobject_t& oid, uint64_t offset, size_t
   // write zeros.. yuck!
   dout(20) << "zero FALLOC_FL_PUNCH_HOLE not supported, falling back to writing zeros" << dendl;
   {
-    bufferptr bp(len);
-    bp.zero();
     bufferlist bl;
-    bl.push_back(bp);
+    bl.append_zero(len);
     ret = _write(cid, oid, offset, len, bl);
   }
 
@@ -3284,7 +3288,7 @@ int FileStore::_zero(coll_t cid, const ghobject_t& oid, uint64_t offset, size_t
   return ret;
 }
 
-int FileStore::_clone(coll_t cid, const ghobject_t& oldoid, const ghobject_t& newoid,
+int FileStore::_clone(const coll_t& cid, const ghobject_t& oldoid, const ghobject_t& newoid,
 		      const SequencerPosition& spos)
 {
   dout(15) << "clone " << cid << "/" << oldoid << " -> " << cid << "/" << newoid << dendl;
@@ -3540,7 +3544,7 @@ int FileStore::_do_copy_range(int from, int to, uint64_t srcoff, uint64_t len, u
   return r;
 }
 
-int FileStore::_clone_range(coll_t cid, const ghobject_t& oldoid, const ghobject_t& newoid,
+int FileStore::_clone_range(const coll_t& cid, const ghobject_t& oldoid, const ghobject_t& newoid,
 			    uint64_t srcoff, uint64_t len, uint64_t dstoff,
 			    const SequencerPosition& spos)
 {
@@ -3580,7 +3584,7 @@ int FileStore::_clone_range(coll_t cid, const ghobject_t& oldoid, const ghobject
 
 class SyncEntryTimeout : public Context {
 public:
-  SyncEntryTimeout(int commit_timeo)
+  explicit SyncEntryTimeout(int commit_timeo)
     : m_commit_timeo(commit_timeo)
   {
   }
@@ -3692,8 +3696,13 @@ void FileStore::sync_entry()
 	apply_manager.commit_started();
 	op_tp.unpause();
 
-	object_map->sync();
-	int err = backend->syncfs();
+	int err = object_map->sync();
+	if (err < 0) {
+	  derr << "object_map sync got " << cpp_strerror(err) << dendl;
+	  assert(0 == "object_map sync returned error");
+	}
+
+	err = backend->syncfs();
 	if (err < 0) {
 	  derr << "syncfs got " << cpp_strerror(err) << dendl;
 	  assert(0 == "syncfs returned error");
@@ -4032,10 +4041,10 @@ bool FileStore::debug_mdata_eio(const ghobject_t &oid) {
 
 // objects
 
-int FileStore::getattr(coll_t cid, const ghobject_t& oid, const char *name, bufferptr &bp)
+int FileStore::getattr(const coll_t& _cid, const ghobject_t& oid, const char *name, bufferptr &bp)
 {
-  tracepoint(objectstore, getattr_enter, cid.c_str());
-  _kludge_temp_object_collection(cid, oid);
+  tracepoint(objectstore, getattr_enter, _cid.c_str());
+  const coll_t& cid = !_need_temp_object_collection(_cid, oid) ? _cid : _cid.get_temp();
   dout(15) << "getattr " << cid << "/" << oid << " '" << name << "'" << dendl;
   FDRef fd;
   int r = lfn_open(cid, oid, false, &fd);
@@ -4081,10 +4090,10 @@ int FileStore::getattr(coll_t cid, const ghobject_t& oid, const char *name, buff
   }
 }
 
-int FileStore::getattrs(coll_t cid, const ghobject_t& oid, map<string,bufferptr>& aset)
+int FileStore::getattrs(const coll_t& _cid, const ghobject_t& oid, map<string,bufferptr>& aset)
 {
-  tracepoint(objectstore, getattrs_enter, cid.c_str());
-  _kludge_temp_object_collection(cid, oid);
+  tracepoint(objectstore, getattrs_enter, _cid.c_str());
+  const coll_t& cid = !_need_temp_object_collection(_cid, oid) ? _cid : _cid.get_temp();
   set<string> omap_attrs;
   map<string, bufferlist> omap_aset;
   Index index;
@@ -4156,7 +4165,7 @@ int FileStore::getattrs(coll_t cid, const ghobject_t& oid, map<string,bufferptr>
   }
 }
 
-int FileStore::_setattrs(coll_t cid, const ghobject_t& oid, map<string,bufferptr>& aset,
+int FileStore::_setattrs(const coll_t& cid, const ghobject_t& oid, map<string,bufferptr>& aset,
 			 const SequencerPosition &spos)
 {
   map<string, bufferlist> omap_set;
@@ -4256,7 +4265,7 @@ int FileStore::_setattrs(coll_t cid, const ghobject_t& oid, map<string,bufferptr
 }
 
 
-int FileStore::_rmattr(coll_t cid, const ghobject_t& oid, const char *name,
+int FileStore::_rmattr(const coll_t& cid, const ghobject_t& oid, const char *name,
 		       const SequencerPosition &spos)
 {
   dout(15) << "rmattr " << cid << "/" << oid << " '" << name << "'" << dendl;
@@ -4301,7 +4310,7 @@ int FileStore::_rmattr(coll_t cid, const ghobject_t& oid, const char *name,
   return r;
 }
 
-int FileStore::_rmattrs(coll_t cid, const ghobject_t& oid,
+int FileStore::_rmattrs(const coll_t& cid, const ghobject_t& oid,
 			const SequencerPosition &spos)
 {
   dout(15) << "rmattrs " << cid << "/" << oid << dendl;
@@ -4373,7 +4382,7 @@ int FileStore::_rmattrs(coll_t cid, const ghobject_t& oid,
 
 // collections
 
-int FileStore::collection_getattr(coll_t c, const char *name,
+int FileStore::collection_getattr(const coll_t& c, const char *name,
 				  void *value, size_t size)
 {
   char fn[PATH_MAX];
@@ -4395,7 +4404,7 @@ int FileStore::collection_getattr(coll_t c, const char *name,
   return r;
 }
 
-int FileStore::collection_getattr(coll_t c, const char *name, bufferlist& bl)
+int FileStore::collection_getattr(const coll_t& c, const char *name, bufferlist& bl)
 {
   char fn[PATH_MAX];
   get_cdir(c, fn, sizeof(fn));
@@ -4410,7 +4419,7 @@ int FileStore::collection_getattr(coll_t c, const char *name, bufferlist& bl)
     goto out;
   }
   r = _fgetattr(fd, n, bp);
-  bl.push_back(bp);
+  bl.push_back(std::move(bp));
   VOID_TEMP_FAILURE_RETRY(::close(fd));
  out:
   dout(10) << "collection_getattr " << fn << " '" << name << "' = " << r << dendl;
@@ -4418,7 +4427,7 @@ int FileStore::collection_getattr(coll_t c, const char *name, bufferlist& bl)
   return r;
 }
 
-int FileStore::collection_getattrs(coll_t cid, map<string,bufferptr>& aset)
+int FileStore::collection_getattrs(const coll_t& cid, map<string,bufferptr>& aset)
 {
   char fn[PATH_MAX];
   get_cdir(cid, fn, sizeof(fn));
@@ -4438,7 +4447,7 @@ int FileStore::collection_getattrs(coll_t cid, map<string,bufferptr>& aset)
 }
 
 
-int FileStore::_collection_setattr(coll_t c, const char *name,
+int FileStore::_collection_setattr(const coll_t& c, const char *name,
 				  const void *value, size_t size)
 {
   char fn[PATH_MAX];
@@ -4459,7 +4468,7 @@ int FileStore::_collection_setattr(coll_t c, const char *name,
   return r;
 }
 
-int FileStore::_collection_rmattr(coll_t c, const char *name)
+int FileStore::_collection_rmattr(const coll_t& c, const char *name)
 {
   char fn[PATH_MAX];
   get_cdir(c, fn, sizeof(fn));
@@ -4480,7 +4489,7 @@ int FileStore::_collection_rmattr(coll_t c, const char *name)
 }
 
 
-int FileStore::_collection_setattrs(coll_t cid, map<string,bufferptr>& aset)
+int FileStore::_collection_setattrs(const coll_t& cid, map<string,bufferptr>& aset)
 {
   char fn[PATH_MAX];
   get_cdir(cid, fn, sizeof(fn));
@@ -4539,7 +4548,7 @@ int FileStore::_collection_remove_recursive(const coll_t &cid,
 // --------------------------
 // collections
 
-int FileStore::collection_version_current(coll_t c, uint32_t *version)
+int FileStore::collection_version_current(const coll_t& c, uint32_t *version)
 {
   Index index;
   int r = get_index(c, &index);
@@ -4630,7 +4639,7 @@ int FileStore::list_collections(vector<coll_t>& ls, bool include_temp)
   return r;
 }
 
-int FileStore::collection_stat(coll_t c, struct stat *st)
+int FileStore::collection_stat(const coll_t& c, struct stat *st)
 {
   tracepoint(objectstore, collection_stat_enter, c.c_str());
   char fn[PATH_MAX];
@@ -4645,7 +4654,7 @@ int FileStore::collection_stat(coll_t c, struct stat *st)
   return r;
 }
 
-bool FileStore::collection_exists(coll_t c)
+bool FileStore::collection_exists(const coll_t& c)
 {
   tracepoint(objectstore, collection_exists_enter, c.c_str());
   struct stat st;
@@ -4654,7 +4663,7 @@ bool FileStore::collection_exists(coll_t c)
   return ret;
 }
 
-bool FileStore::collection_empty(coll_t c)
+bool FileStore::collection_empty(const coll_t& c)
 {
   tracepoint(objectstore, collection_empty_enter, c.c_str());
   dout(15) << "collection_empty " << c << dendl;
@@ -4677,7 +4686,7 @@ bool FileStore::collection_empty(coll_t c)
   tracepoint(objectstore, collection_empty_exit, ret);
   return ret;
 }
-int FileStore::collection_list(coll_t c, ghobject_t start, ghobject_t end,
+int FileStore::collection_list(const coll_t& c, ghobject_t start, ghobject_t end,
 			       bool sort_bitwise, int max,
 			       vector<ghobject_t> *ls, ghobject_t *next)
 {
@@ -4757,12 +4766,12 @@ int FileStore::collection_list(coll_t c, ghobject_t start, ghobject_t end,
   return 0;
 }
 
-int FileStore::omap_get(coll_t c, const ghobject_t &hoid,
+int FileStore::omap_get(const coll_t& _c, const ghobject_t &hoid,
 			bufferlist *header,
 			map<string, bufferlist> *out)
 {
-  tracepoint(objectstore, omap_get_enter, c.c_str());
-  _kludge_temp_object_collection(c, hoid);
+  tracepoint(objectstore, omap_get_enter, _c.c_str());
+  const coll_t& c = !_need_temp_object_collection(_c, hoid) ? _c : _c.get_temp();
   dout(15) << __func__ << " " << c << "/" << hoid << dendl;
   Index index;
   int r = get_index(c, &index);
@@ -4785,13 +4794,13 @@ int FileStore::omap_get(coll_t c, const ghobject_t &hoid,
 }
 
 int FileStore::omap_get_header(
-  coll_t c,
+  const coll_t& _c,
   const ghobject_t &hoid,
   bufferlist *bl,
   bool allow_eio)
 {
-  tracepoint(objectstore, omap_get_header_enter, c.c_str());
-  _kludge_temp_object_collection(c, hoid);
+  tracepoint(objectstore, omap_get_header_enter, _c.c_str());
+  const coll_t& c = !_need_temp_object_collection(_c, hoid) ? _c : _c.get_temp();
   dout(15) << __func__ << " " << c << "/" << hoid << dendl;
   Index index;
   int r = get_index(c, &index);
@@ -4813,10 +4822,10 @@ int FileStore::omap_get_header(
   return 0;
 }
 
-int FileStore::omap_get_keys(coll_t c, const ghobject_t &hoid, set<string> *keys)
+int FileStore::omap_get_keys(const coll_t& _c, const ghobject_t &hoid, set<string> *keys)
 {
-  tracepoint(objectstore, omap_get_keys_enter, c.c_str());
-  _kludge_temp_object_collection(c, hoid);
+  tracepoint(objectstore, omap_get_keys_enter, _c.c_str());
+  const coll_t& c = !_need_temp_object_collection(_c, hoid) ? _c : _c.get_temp();
   dout(15) << __func__ << " " << c << "/" << hoid << dendl;
   Index index;
   int r = get_index(c, &index);
@@ -4838,12 +4847,12 @@ int FileStore::omap_get_keys(coll_t c, const ghobject_t &hoid, set<string> *keys
   return 0;
 }
 
-int FileStore::omap_get_values(coll_t c, const ghobject_t &hoid,
+int FileStore::omap_get_values(const coll_t& _c, const ghobject_t &hoid,
 			       const set<string> &keys,
 			       map<string, bufferlist> *out)
 {
-  tracepoint(objectstore, omap_get_values_enter, c.c_str());
-  _kludge_temp_object_collection(c, hoid);
+  tracepoint(objectstore, omap_get_values_enter, _c.c_str());
+  const coll_t& c = !_need_temp_object_collection(_c, hoid) ? _c : _c.get_temp();
   dout(15) << __func__ << " " << c << "/" << hoid << dendl;
   Index index;
   const char *where = 0;
@@ -4874,12 +4883,12 @@ int FileStore::omap_get_values(coll_t c, const ghobject_t &hoid,
   return r;
 }
 
-int FileStore::omap_check_keys(coll_t c, const ghobject_t &hoid,
+int FileStore::omap_check_keys(const coll_t& _c, const ghobject_t &hoid,
 			       const set<string> &keys,
 			       set<string> *out)
 {
-  tracepoint(objectstore, omap_check_keys_enter, c.c_str());
-  _kludge_temp_object_collection(c, hoid);
+  tracepoint(objectstore, omap_check_keys_enter, _c.c_str());
+  const coll_t& c = !_need_temp_object_collection(_c, hoid) ? _c : _c.get_temp();
   dout(15) << __func__ << " " << c << "/" << hoid << dendl;
 
   Index index;
@@ -4902,11 +4911,11 @@ int FileStore::omap_check_keys(coll_t c, const ghobject_t &hoid,
   return 0;
 }
 
-ObjectMap::ObjectMapIterator FileStore::get_omap_iterator(coll_t c,
+ObjectMap::ObjectMapIterator FileStore::get_omap_iterator(const coll_t& _c,
 							  const ghobject_t &hoid)
 {
-  tracepoint(objectstore, get_omap_iterator, c.c_str());
-  _kludge_temp_object_collection(c, hoid);
+  tracepoint(objectstore, get_omap_iterator, _c.c_str());
+  const coll_t& c = !_need_temp_object_collection(_c, hoid) ? _c : _c.get_temp();
   dout(15) << __func__ << " " << c << "/" << hoid << dendl;
   Index index;
   int r = get_index(c, &index);
@@ -4928,7 +4937,7 @@ ObjectMap::ObjectMapIterator FileStore::get_omap_iterator(coll_t c,
   return object_map->get_iterator(hoid);
 }
 
-int FileStore::_collection_hint_expected_num_objs(coll_t c, uint32_t pg_num,
+int FileStore::_collection_hint_expected_num_objs(const coll_t& c, uint32_t pg_num,
     uint64_t expected_num_objs,
     const SequencerPosition &spos)
 {
@@ -4957,7 +4966,7 @@ int FileStore::_collection_hint_expected_num_objs(coll_t c, uint32_t pg_num,
 }
 
 int FileStore::_create_collection(
-  coll_t c,
+  const coll_t& c,
   const SequencerPosition &spos)
 {
   char fn[PATH_MAX];
@@ -4988,7 +4997,7 @@ int FileStore::_create_collection(
   return 0;
 }
 
-int FileStore::_destroy_collection(coll_t c)
+int FileStore::_destroy_collection(const coll_t& c)
 {
   int r = 0;
   char fn[PATH_MAX];
@@ -5029,7 +5038,7 @@ int FileStore::_destroy_collection(coll_t c)
 }
 
 
-int FileStore::_collection_add(coll_t c, coll_t oldcid, const ghobject_t& o,
+int FileStore::_collection_add(const coll_t& c, const coll_t& oldcid, const ghobject_t& o,
 			       const SequencerPosition& spos)
 {
   dout(15) << "collection_add " << c << "/" << o << " from " << oldcid << "/" << o << dendl;
@@ -5077,7 +5086,7 @@ int FileStore::_collection_add(coll_t c, coll_t oldcid, const ghobject_t& o,
   return r;
 }
 
-int FileStore::_collection_move_rename(coll_t oldcid, const ghobject_t& oldoid,
+int FileStore::_collection_move_rename(const coll_t& oldcid, const ghobject_t& oldoid,
 				       coll_t c, const ghobject_t& o,
 				       const SequencerPosition& spos)
 {
@@ -5180,7 +5189,7 @@ void FileStore::_inject_failure()
   }
 }
 
-int FileStore::_omap_clear(coll_t cid, const ghobject_t &hoid,
+int FileStore::_omap_clear(const coll_t& cid, const ghobject_t &hoid,
 			   const SequencerPosition &spos) {
   dout(15) << __func__ << " " << cid << "/" << hoid << dendl;
   Index index;
@@ -5200,7 +5209,7 @@ int FileStore::_omap_clear(coll_t cid, const ghobject_t &hoid,
   return 0;
 }
 
-int FileStore::_omap_setkeys(coll_t cid, const ghobject_t &hoid,
+int FileStore::_omap_setkeys(const coll_t& cid, const ghobject_t &hoid,
 			     const map<string, bufferlist> &aset,
 			     const SequencerPosition &spos) {
   dout(15) << __func__ << " " << cid << "/" << hoid << dendl;
@@ -5230,7 +5239,7 @@ skip:
   return r;
 }
 
-int FileStore::_omap_rmkeys(coll_t cid, const ghobject_t &hoid,
+int FileStore::_omap_rmkeys(const coll_t& cid, const ghobject_t &hoid,
 			    const set<string> &keys,
 			    const SequencerPosition &spos) {
   dout(15) << __func__ << " " << cid << "/" << hoid << dendl;
@@ -5257,7 +5266,7 @@ skip:
   return 0;
 }
 
-int FileStore::_omap_rmkeyrange(coll_t cid, const ghobject_t &hoid,
+int FileStore::_omap_rmkeyrange(const coll_t& cid, const ghobject_t &hoid,
 				const string& first, const string& last,
 				const SequencerPosition &spos) {
   dout(15) << __func__ << " " << cid << "/" << hoid << " [" << first << "," << last << "]" << dendl;
@@ -5274,7 +5283,7 @@ int FileStore::_omap_rmkeyrange(coll_t cid, const ghobject_t &hoid,
   return _omap_rmkeys(cid, hoid, keys, spos);
 }
 
-int FileStore::_omap_setheader(coll_t cid, const ghobject_t &hoid,
+int FileStore::_omap_setheader(const coll_t& cid, const ghobject_t &hoid,
 			       const bufferlist &bl,
 			       const SequencerPosition &spos)
 {
@@ -5293,7 +5302,7 @@ int FileStore::_omap_setheader(coll_t cid, const ghobject_t &hoid,
   return object_map->set_header(hoid, bl, &spos);
 }
 
-int FileStore::_split_collection(coll_t cid,
+int FileStore::_split_collection(const coll_t& cid,
 				 uint32_t bits,
 				 uint32_t rem,
 				 coll_t dest,
@@ -5391,7 +5400,7 @@ int FileStore::_split_collection(coll_t cid,
   return r;
 }
 
-int FileStore::_set_alloc_hint(coll_t cid, const ghobject_t& oid,
+int FileStore::_set_alloc_hint(const coll_t& cid, const ghobject_t& oid,
                                uint64_t expected_object_size,
                                uint64_t expected_write_size)
 {
@@ -5521,16 +5530,16 @@ void FileStore::dump_stop()
   }
 }
 
-void FileStore::dump_transactions(list<ObjectStore::Transaction*>& ls, uint64_t seq, OpSequencer *osr)
+void FileStore::dump_transactions(vector<ObjectStore::Transaction>& ls, uint64_t seq, OpSequencer *osr)
 {
   m_filestore_dump_fmt.open_array_section("transactions");
   unsigned trans_num = 0;
-  for (list<ObjectStore::Transaction*>::iterator i = ls.begin(); i != ls.end(); ++i, ++trans_num) {
+  for (vector<ObjectStore::Transaction>::iterator i = ls.begin(); i != ls.end(); ++i, ++trans_num) {
     m_filestore_dump_fmt.open_object_section("transaction");
     m_filestore_dump_fmt.dump_string("osr", osr->get_name());
     m_filestore_dump_fmt.dump_unsigned("seq", seq);
     m_filestore_dump_fmt.dump_unsigned("trans_num", trans_num);
-    (*i)->dump(&m_filestore_dump_fmt);
+    (*i).dump(&m_filestore_dump_fmt);
     m_filestore_dump_fmt.close_section();
   }
   m_filestore_dump_fmt.close_section();
diff --git a/src/os/filestore/FileStore.h b/src/os/filestore/FileStore.h
index 52714f9..3e4e11f 100644
--- a/src/os/filestore/FileStore.h
+++ b/src/os/filestore/FileStore.h
@@ -137,9 +137,15 @@ private:
 
   // Indexed Collections
   IndexManager index_manager;
-  int get_index(coll_t c, Index *index);
-  int init_index(coll_t c);
+  int get_index(const coll_t& c, Index *index);
+  int init_index(const coll_t& c);
 
+  bool _need_temp_object_collection(const coll_t& cid, const ghobject_t& oid) {
+    // - normal temp case: cid is pg, object is temp (pool < -1)
+    // - hammer temp case: cid is pg (or already temp), object pool is -1
+    return (cid.is_pg() && (oid.hobj.pool < -1 ||
+			oid.hobj.pool == -1));
+  }
   void _kludge_temp_object_collection(coll_t& cid, const ghobject_t& oid) {
     // - normal temp case: cid is pg, object is temp (pool < -1)
     // - hammer temp case: cid is pg (or already temp), object pool is -1
@@ -153,7 +159,7 @@ private:
   boost::scoped_ptr<ObjectMap> object_map;
 
   // helper fns
-  int get_cdir(coll_t cid, char *s, int len);
+  int get_cdir(const coll_t& cid, char *s, int len);
 
   /// read a uuid from fd
   int read_fsid(int fd, uuid_d *uuid);
@@ -174,7 +180,7 @@ private:
   void sync_entry();
   struct SyncThread : public Thread {
     FileStore *fs;
-    SyncThread(FileStore *f) : fs(f) {}
+    explicit SyncThread(FileStore *f) : fs(f) {}
     void *entry() {
       fs->sync_entry();
       return 0;
@@ -185,7 +191,7 @@ private:
   struct Op {
     utime_t start;
     uint64_t op;
-    list<Transaction*> tls;
+    vector<Transaction> tls;
     Context *onreadable, *onreadable_sync;
     uint64_t ops, bytes;
     TrackedOpRef osd_op;
@@ -314,7 +320,7 @@ private:
       }
     }
 
-    OpSequencer(int i)
+    explicit OpSequencer(int i)
       : qlock("FileStore::OpSequencer::qlock", false, false),
 	parent(0),
 	apply_lock("FileStore::OpSequencer::apply_lock", false, false),
@@ -364,10 +370,9 @@ private:
       store->op_queue.pop_front();
       return osr;
     }
-    void _process(OpSequencer *osr, ThreadPool::TPHandle &handle) {
+    void _process(OpSequencer *osr, ThreadPool::TPHandle &handle) override {
       store->_do_op(osr, handle);
     }
-    using ThreadPool::WorkQueue<OpSequencer>::_process;
     void _process_finish(OpSequencer *osr) {
       store->_finish_op(osr);
     }
@@ -378,7 +383,7 @@ private:
 
   void _do_op(OpSequencer *o, ThreadPool::TPHandle &handle);
   void _finish_op(OpSequencer *o);
-  Op *build_op(list<Transaction*>& tls,
+  Op *build_op(vector<Transaction>& tls,
 	       Context *onreadable, Context *onreadable_sync,
 	       TrackedOpRef osd_op);
   void queue_op(OpSequencer *osr, Op *o);
@@ -394,18 +399,18 @@ private:
 public:
   int lfn_find(const ghobject_t& oid, const Index& index,
                                   IndexedPath *path = NULL);
-  int lfn_truncate(coll_t cid, const ghobject_t& oid, off_t length);
-  int lfn_stat(coll_t cid, const ghobject_t& oid, struct stat *buf);
+  int lfn_truncate(const coll_t& cid, const ghobject_t& oid, off_t length);
+  int lfn_stat(const coll_t& cid, const ghobject_t& oid, struct stat *buf);
   int lfn_open(
-    coll_t cid,
+    const coll_t& cid,
     const ghobject_t& oid,
     bool create,
     FDRef *outfd,
     Index *index = 0);
 
   void lfn_close(FDRef fd);
-  int lfn_link(coll_t c, coll_t newcid, const ghobject_t& o, const ghobject_t& newoid) ;
-  int lfn_unlink(coll_t cid, const ghobject_t& o, const SequencerPosition &spos,
+  int lfn_link(const coll_t& c, const coll_t& newcid, const ghobject_t& o, const ghobject_t& newoid) ;
+  int lfn_unlink(const coll_t& cid, const ghobject_t& o, const SequencerPosition &spos,
 		 bool force_clear_omap=false);
 
 public:
@@ -414,6 +419,10 @@ public:
     const char *internal_name = "filestore", bool update_to=false);
   ~FileStore();
 
+  string get_type() {
+    return "filestore";
+  }
+
   int _detect_fs();
   int _sanity_check_fs();
 
@@ -457,16 +466,16 @@ public:
   int statfs(struct statfs *buf);
 
   int _do_transactions(
-    list<Transaction*> &tls, uint64_t op_seq,
+    vector<Transaction> &tls, uint64_t op_seq,
     ThreadPool::TPHandle *handle);
-  int do_transactions(list<Transaction*> &tls, uint64_t op_seq) {
+  int do_transactions(vector<Transaction> &tls, uint64_t op_seq) {
     return _do_transactions(tls, op_seq, 0);
   }
   void _do_transaction(
     Transaction& t, uint64_t op_seq, int trans_num,
     ThreadPool::TPHandle *handle);
 
-  int queue_transactions(Sequencer *osr, list<Transaction*>& tls,
+  int queue_transactions(Sequencer *osr, vector<Transaction>& tls,
 			 TrackedOpRef op = TrackedOpRef(),
 			 ThreadPool::TPHandle *handle = NULL);
 
@@ -483,15 +492,15 @@ public:
 			 const SequencerPosition& spos,
 			 const ghobject_t *oid=0,
 			 bool in_progress=false);
-  void _set_replay_guard(coll_t cid,
+  void _set_replay_guard(const coll_t& cid,
                          const SequencerPosition& spos,
                          bool in_progress);
-  void _set_global_replay_guard(coll_t cid,
+  void _set_global_replay_guard(const coll_t& cid,
 				const SequencerPosition &spos);
 
   /// close a replay guard opened with in_progress=true
   void _close_replay_guard(int fd, const SequencerPosition& spos);
-  void _close_replay_guard(coll_t cid, const SequencerPosition& spos);
+  void _close_replay_guard(const coll_t& cid, const SequencerPosition& spos);
 
   /**
    * check replay guard xattr on given file
@@ -510,23 +519,26 @@ public:
    * @return 1 if we can apply (maybe replay) this operation, -1 if spos has already been applied, 0 if it was in progress
    */
   int _check_replay_guard(int fd, const SequencerPosition& spos);
-  int _check_replay_guard(coll_t cid, const SequencerPosition& spos);
-  int _check_replay_guard(coll_t cid, ghobject_t oid, const SequencerPosition& pos);
-  int _check_global_replay_guard(coll_t cid, const SequencerPosition& spos);
+  int _check_replay_guard(const coll_t& cid, const SequencerPosition& spos);
+  int _check_replay_guard(const coll_t& cid, ghobject_t oid, const SequencerPosition& pos);
+  int _check_global_replay_guard(const coll_t& cid, const SequencerPosition& spos);
 
   // ------------------
   // objects
   int pick_object_revision_lt(ghobject_t& oid) {
     return 0;
   }
-  bool exists(coll_t cid, const ghobject_t& oid);
+  using ObjectStore::exists;
+  bool exists(const coll_t& cid, const ghobject_t& oid);
+  using ObjectStore::stat;
   int stat(
-    coll_t cid,
+    const coll_t& cid,
     const ghobject_t& oid,
     struct stat *st,
     bool allow_eio = false);
+  using ObjectStore::read;
   int read(
-    coll_t cid,
+    const coll_t& cid,
     const ghobject_t& oid,
     uint64_t offset,
     size_t len,
@@ -537,22 +549,23 @@ public:
                  map<uint64_t, uint64_t> *m);
   int _do_seek_hole_data(int fd, uint64_t offset, size_t len,
                          map<uint64_t, uint64_t> *m);
-  int fiemap(coll_t cid, const ghobject_t& oid, uint64_t offset, size_t len, bufferlist& bl);
+  using ObjectStore::fiemap;
+  int fiemap(const coll_t& cid, const ghobject_t& oid, uint64_t offset, size_t len, bufferlist& bl);
 
-  int _touch(coll_t cid, const ghobject_t& oid);
-  int _write(coll_t cid, const ghobject_t& oid, uint64_t offset, size_t len,
+  int _touch(const coll_t& cid, const ghobject_t& oid);
+  int _write(const coll_t& cid, const ghobject_t& oid, uint64_t offset, size_t len,
 	      const bufferlist& bl, uint32_t fadvise_flags = 0);
-  int _zero(coll_t cid, const ghobject_t& oid, uint64_t offset, size_t len);
-  int _truncate(coll_t cid, const ghobject_t& oid, uint64_t size);
-  int _clone(coll_t cid, const ghobject_t& oldoid, const ghobject_t& newoid,
+  int _zero(const coll_t& cid, const ghobject_t& oid, uint64_t offset, size_t len);
+  int _truncate(const coll_t& cid, const ghobject_t& oid, uint64_t size);
+  int _clone(const coll_t& cid, const ghobject_t& oldoid, const ghobject_t& newoid,
 	     const SequencerPosition& spos);
-  int _clone_range(coll_t cid, const ghobject_t& oldoid, const ghobject_t& newoid,
+  int _clone_range(const coll_t& cid, const ghobject_t& oldoid, const ghobject_t& newoid,
 		   uint64_t srcoff, uint64_t len, uint64_t dstoff,
 		   const SequencerPosition& spos);
   int _do_clone_range(int from, int to, uint64_t srcoff, uint64_t len, uint64_t dstoff);
   int _do_sparse_copy_range(int from, int to, uint64_t srcoff, uint64_t len, uint64_t dstoff);
   int _do_copy_range(int from, int to, uint64_t srcoff, uint64_t len, uint64_t dstoff, bool skip_sloppycrc=false);
-  int _remove(coll_t cid, const ghobject_t& oid, const SequencerPosition &spos);
+  int _remove(const coll_t& cid, const ghobject_t& oid, const SequencerPosition &spos);
 
   int _fgetattr(int fd, const char *name, bufferptr& bp);
   int _fgetattrs(int fd, map<string,bufferptr>& aset);
@@ -588,54 +601,63 @@ public:
   int snapshot(const string& name);
 
   // attrs
-  int getattr(coll_t cid, const ghobject_t& oid, const char *name, bufferptr &bp);
-  int getattrs(coll_t cid, const ghobject_t& oid, map<string,bufferptr>& aset);
+  using ObjectStore::getattr;
+  using ObjectStore::getattrs;
+  int getattr(const coll_t& cid, const ghobject_t& oid, const char *name, bufferptr &bp);
+  int getattrs(const coll_t& cid, const ghobject_t& oid, map<string,bufferptr>& aset);
 
-  int _setattrs(coll_t cid, const ghobject_t& oid, map<string,bufferptr>& aset,
+  int _setattrs(const coll_t& cid, const ghobject_t& oid, map<string,bufferptr>& aset,
 		const SequencerPosition &spos);
-  int _rmattr(coll_t cid, const ghobject_t& oid, const char *name,
+  int _rmattr(const coll_t& cid, const ghobject_t& oid, const char *name,
 	      const SequencerPosition &spos);
-  int _rmattrs(coll_t cid, const ghobject_t& oid,
+  int _rmattrs(const coll_t& cid, const ghobject_t& oid,
 	       const SequencerPosition &spos);
 
-  int collection_getattr(coll_t c, const char *name, void *value, size_t size);
-  int collection_getattr(coll_t c, const char *name, bufferlist& bl);
-  int collection_getattrs(coll_t cid, map<string,bufferptr> &aset);
+  int collection_getattr(const coll_t& c, const char *name, void *value, size_t size);
+  int collection_getattr(const coll_t& c, const char *name, bufferlist& bl);
+  int collection_getattrs(const coll_t& cid, map<string,bufferptr> &aset);
 
-  int _collection_setattr(coll_t c, const char *name, const void *value, size_t size);
-  int _collection_rmattr(coll_t c, const char *name);
-  int _collection_setattrs(coll_t cid, map<string,bufferptr> &aset);
+  int _collection_setattr(const coll_t& c, const char *name, const void *value, size_t size);
+  int _collection_rmattr(const coll_t& c, const char *name);
+  int _collection_setattrs(const coll_t& cid, map<string,bufferptr> &aset);
   int _collection_remove_recursive(const coll_t &cid,
 				   const SequencerPosition &spos);
 
   // collections
-  int collection_list(coll_t c, ghobject_t start, ghobject_t end,
+  using ObjectStore::collection_list;
+  int collection_list(const coll_t& c, ghobject_t start, ghobject_t end,
 		      bool sort_bitwise, int max,
 		      vector<ghobject_t> *ls, ghobject_t *next);
   int list_collections(vector<coll_t>& ls);
   int list_collections(vector<coll_t>& ls, bool include_temp);
-  int collection_version_current(coll_t c, uint32_t *version);
-  int collection_stat(coll_t c, struct stat *st);
-  bool collection_exists(coll_t c);
-  bool collection_empty(coll_t c);
+  int collection_version_current(const coll_t& c, uint32_t *version);
+  int collection_stat(const coll_t& c, struct stat *st);
+  bool collection_exists(const coll_t& c);
+  bool collection_empty(const coll_t& c);
 
   // omap (see ObjectStore.h for documentation)
-  int omap_get(coll_t c, const ghobject_t &oid, bufferlist *header,
+  using ObjectStore::omap_get;
+  int omap_get(const coll_t& c, const ghobject_t &oid, bufferlist *header,
 	       map<string, bufferlist> *out);
+  using ObjectStore::omap_get_header;
   int omap_get_header(
-    coll_t c,
+    const coll_t& c,
     const ghobject_t &oid,
     bufferlist *out,
     bool allow_eio = false);
-  int omap_get_keys(coll_t c, const ghobject_t &oid, set<string> *keys);
-  int omap_get_values(coll_t c, const ghobject_t &oid, const set<string> &keys,
+  using ObjectStore::omap_get_keys;
+  int omap_get_keys(const coll_t& c, const ghobject_t &oid, set<string> *keys);
+  using ObjectStore::omap_get_values;
+  int omap_get_values(const coll_t& c, const ghobject_t &oid, const set<string> &keys,
 		      map<string, bufferlist> *out);
-  int omap_check_keys(coll_t c, const ghobject_t &oid, const set<string> &keys,
+  using ObjectStore::omap_check_keys;
+  int omap_check_keys(const coll_t& c, const ghobject_t &oid, const set<string> &keys,
 		      set<string> *out);
-  ObjectMap::ObjectMapIterator get_omap_iterator(coll_t c, const ghobject_t &oid);
+  using ObjectStore::get_omap_iterator;
+  ObjectMap::ObjectMapIterator get_omap_iterator(const coll_t& c, const ghobject_t &oid);
 
-  int _create_collection(coll_t c, const SequencerPosition &spos);
-  int _destroy_collection(coll_t c);
+  int _create_collection(const coll_t& c, const SequencerPosition &spos);
+  int _destroy_collection(const coll_t& c);
   /**
    * Give an expected number of objects hint to the collection.
    *
@@ -646,42 +668,42 @@ public:
    *
    * @return 0 on success, an error code otherwise
    */
-  int _collection_hint_expected_num_objs(coll_t c, uint32_t pg_num,
+  int _collection_hint_expected_num_objs(const coll_t& c, uint32_t pg_num,
       uint64_t expected_num_objs,
       const SequencerPosition &spos);
-  int _collection_add(coll_t c, coll_t ocid, const ghobject_t& oid,
+  int _collection_add(const coll_t& c, const coll_t& ocid, const ghobject_t& oid,
 		      const SequencerPosition& spos);
-  int _collection_move_rename(coll_t oldcid, const ghobject_t& oldoid,
+  int _collection_move_rename(const coll_t& oldcid, const ghobject_t& oldoid,
 			      coll_t c, const ghobject_t& o,
 			      const SequencerPosition& spos);
 
-  int _set_alloc_hint(coll_t cid, const ghobject_t& oid,
+  int _set_alloc_hint(const coll_t& cid, const ghobject_t& oid,
                       uint64_t expected_object_size,
                       uint64_t expected_write_size);
 
   void dump_start(const std::string& file);
   void dump_stop();
-  void dump_transactions(list<ObjectStore::Transaction*>& ls, uint64_t seq, OpSequencer *osr);
+  void dump_transactions(vector<Transaction>& ls, uint64_t seq, OpSequencer *osr);
 
 private:
   void _inject_failure();
 
   // omap
-  int _omap_clear(coll_t cid, const ghobject_t &oid,
+  int _omap_clear(const coll_t& cid, const ghobject_t &oid,
 		  const SequencerPosition &spos);
-  int _omap_setkeys(coll_t cid, const ghobject_t &oid,
+  int _omap_setkeys(const coll_t& cid, const ghobject_t &oid,
 		    const map<string, bufferlist> &aset,
 		    const SequencerPosition &spos);
-  int _omap_rmkeys(coll_t cid, const ghobject_t &oid, const set<string> &keys,
+  int _omap_rmkeys(const coll_t& cid, const ghobject_t &oid, const set<string> &keys,
 		   const SequencerPosition &spos);
-  int _omap_rmkeyrange(coll_t cid, const ghobject_t &oid,
+  int _omap_rmkeyrange(const coll_t& cid, const ghobject_t &oid,
 		       const string& first, const string& last,
 		       const SequencerPosition &spos);
-  int _omap_setheader(coll_t cid, const ghobject_t &oid, const bufferlist &bl,
+  int _omap_setheader(const coll_t& cid, const ghobject_t &oid, const bufferlist &bl,
 		      const SequencerPosition &spos);
-  int _split_collection(coll_t cid, uint32_t bits, uint32_t rem, coll_t dest,
+  int _split_collection(const coll_t& cid, uint32_t bits, uint32_t rem, coll_t dest,
                         const SequencerPosition &spos);
-  int _split_collection_create(coll_t cid, uint32_t bits, uint32_t rem,
+  int _split_collection_create(const coll_t& cid, uint32_t bits, uint32_t rem,
 			       coll_t dest,
 			       const SequencerPosition &spos);
 
@@ -781,7 +803,7 @@ protected:
   }
 
 public:
-  FileStoreBackend(FileStore *fs) : filestore(fs) {}
+  explicit FileStoreBackend(FileStore *fs) : filestore(fs) {}
   virtual ~FileStoreBackend() {}
 
   static FileStoreBackend *create(long f_type, FileStore *fs);
diff --git a/src/os/filestore/GenericFileStoreBackend.cc b/src/os/filestore/GenericFileStoreBackend.cc
index d62d622..d4c2a52 100644
--- a/src/os/filestore/GenericFileStoreBackend.cc
+++ b/src/os/filestore/GenericFileStoreBackend.cc
@@ -344,7 +344,7 @@ int GenericFileStoreBackend::_crc_load_or_init(int fd, SloppyCRCMap *cm)
     }
   }
   bufferlist bl;
-  bl.append(bp);
+  bl.append(std::move(bp));
   bufferlist::iterator p = bl.begin();
   try {
     ::decode(*cm, p);
diff --git a/src/os/filestore/GenericFileStoreBackend.h b/src/os/filestore/GenericFileStoreBackend.h
index f31e202..e2c4ec8 100644
--- a/src/os/filestore/GenericFileStoreBackend.h
+++ b/src/os/filestore/GenericFileStoreBackend.h
@@ -28,7 +28,7 @@ private:
   bool m_filestore_fsync_flushes_journal_data;
   bool m_filestore_splice;
 public:
-  GenericFileStoreBackend(FileStore *fs);
+  explicit GenericFileStoreBackend(FileStore *fs);
   virtual ~GenericFileStoreBackend() {}
 
   virtual const char *get_name() {
diff --git a/src/os/filestore/HashIndex.h b/src/os/filestore/HashIndex.h
index 2ed2186..d4222f9 100644
--- a/src/os/filestore/HashIndex.h
+++ b/src/os/filestore/HashIndex.h
@@ -106,7 +106,7 @@ private:
     InProgressOp(int op, const vector<string> &path)
       : op(op), path(path) {}
 
-    InProgressOp(bufferlist::iterator &bl) {
+    explicit InProgressOp(bufferlist::iterator &bl) {
       decode(bl);
     }
 
diff --git a/src/os/filestore/IndexManager.h b/src/os/filestore/IndexManager.h
index da71807..5317d2b 100644
--- a/src/os/filestore/IndexManager.h
+++ b/src/os/filestore/IndexManager.h
@@ -31,7 +31,7 @@ struct Index {
   CollectionIndex *index;
 
   Index() : index(NULL) {}
-  Index(CollectionIndex* index) : index(index) {}
+  explicit Index(CollectionIndex* index) : index(index) {}
 
   CollectionIndex *operator->() { return index; }
   CollectionIndex &operator*() { return *index; }
@@ -67,8 +67,8 @@ class IndexManager {
   int build_index(coll_t c, const char *path, CollectionIndex **index);
 public:
   /// Constructor
-  IndexManager(bool upgrade) : lock("IndexManager lock"),
-			       upgrade(upgrade) {}
+  explicit IndexManager(bool upgrade) : lock("IndexManager lock"),
+		    		        upgrade(upgrade) {}
 
   ~IndexManager();
 
diff --git a/src/os/filestore/Journal.h b/src/os/filestore/Journal.h
index 602e8ea..236e431 100644
--- a/src/os/filestore/Journal.h
+++ b/src/os/filestore/Journal.h
@@ -72,7 +72,7 @@ public:
 
   virtual bool should_commit_now() = 0;
 
-  virtual int prepare_entry(list<ObjectStore::Transaction*>& tls, bufferlist* tbl) = 0;
+  virtual int prepare_entry(vector<ObjectStore::Transaction>& tls, bufferlist* tbl) = 0;
 
   // reads/recovery
 
diff --git a/src/os/filestore/JournalingObjectStore.cc b/src/os/filestore/JournalingObjectStore.cc
index e47b2e6..100ae66 100644
--- a/src/os/filestore/JournalingObjectStore.cc
+++ b/src/os/filestore/JournalingObjectStore.cc
@@ -20,6 +20,7 @@ void JournalingObjectStore::journal_start()
 void JournalingObjectStore::journal_stop()
 {
   dout(10) << "journal_stop" << dendl;
+  finisher.wait_for_empty();
   finisher.stop();
 }
 
@@ -81,10 +82,9 @@ int JournalingObjectStore::journal_replay(uint64_t fs_op_seq)
 
     dout(3) << "journal_replay: applying op seq " << seq << dendl;
     bufferlist::iterator p = bl.begin();
-    list<Transaction*> tls;
+    vector<ObjectStore::Transaction> tls;
     while (!p.end()) {
-      Transaction *t = new Transaction(p);
-      tls.push_back(t);
+      tls.emplace_back(Transaction(p));
     }
 
     apply_manager.op_apply_start(seq);
@@ -93,11 +93,6 @@ int JournalingObjectStore::journal_replay(uint64_t fs_op_seq)
 
     op_seq = seq;
 
-    while (!tls.empty()) {
-      delete tls.front();
-      tls.pop_front();
-    }
-
     dout(3) << "journal_replay: r = " << r << ", op_seq now " << op_seq << dendl;
   }
 
diff --git a/src/os/filestore/JournalingObjectStore.h b/src/os/filestore/JournalingObjectStore.h
index f384ba6..8b3bdaa 100644
--- a/src/os/filestore/JournalingObjectStore.h
+++ b/src/os/filestore/JournalingObjectStore.h
@@ -118,7 +118,7 @@ protected:
   void _op_journal_transactions(bufferlist& tls, uint32_t orig_len, uint64_t op,
 				Context *onjournal, TrackedOpRef osd_op);
 
-  virtual int do_transactions(list<ObjectStore::Transaction*>& tls, uint64_t op_seq) = 0;
+  virtual int do_transactions(vector<ObjectStore::Transaction>& tls, uint64_t op_seq) = 0;
 
 public:
   bool is_committing() {
@@ -129,7 +129,7 @@ public:
   }
 
 public:
-  JournalingObjectStore(const std::string& path)
+  explicit JournalingObjectStore(const std::string& path)
     : ObjectStore(path),
       journal(NULL),
       finisher(g_ceph_context, "JournalObjectStore", "fn_jrn_objstore"),
diff --git a/src/os/filestore/LFNIndex.cc b/src/os/filestore/LFNIndex.cc
index 7f8e6d0..4cc09c3 100644
--- a/src/os/filestore/LFNIndex.cc
+++ b/src/os/filestore/LFNIndex.cc
@@ -65,7 +65,7 @@ void LFNIndex::maybe_inject_failure()
 // in combination with RetryException, thrown by the above.
 struct FDCloser {
   int fd;
-  FDCloser(int f) : fd(f) {}
+  explicit FDCloser(int f) : fd(f) {}
   ~FDCloser() {
     VOID_TEMP_FAILURE_RETRY(::close(fd));
   }
diff --git a/src/os/filestore/WBThrottle.h b/src/os/filestore/WBThrottle.h
index f06ec87..9a3d0a7 100644
--- a/src/os/filestore/WBThrottle.h
+++ b/src/os/filestore/WBThrottle.h
@@ -146,7 +146,7 @@ private:
   }
 
 public:
-  WBThrottle(CephContext *cct);
+  explicit WBThrottle(CephContext *cct);
   ~WBThrottle();
 
   void start();
diff --git a/src/os/filestore/XfsFileStoreBackend.h b/src/os/filestore/XfsFileStoreBackend.h
index 84d4694..29f412f 100644
--- a/src/os/filestore/XfsFileStoreBackend.h
+++ b/src/os/filestore/XfsFileStoreBackend.h
@@ -24,7 +24,7 @@ private:
   bool m_has_extsize;
   int set_extsize(int fd, unsigned int val);
 public:
-  XfsFileStoreBackend(FileStore *fs);
+  explicit XfsFileStoreBackend(FileStore *fs);
   ~XfsFileStoreBackend() {}
   const char *get_name() {
     return "xfs";
diff --git a/src/os/filestore/ZFSFileStoreBackend.h b/src/os/filestore/ZFSFileStoreBackend.h
index f68b8ab..1c15bda 100644
--- a/src/os/filestore/ZFSFileStoreBackend.h
+++ b/src/os/filestore/ZFSFileStoreBackend.h
@@ -16,7 +16,7 @@ private:
   bool m_filestore_zfs_snap;
   int update_current_zh();
 public:
-  ZFSFileStoreBackend(FileStore *fs);
+  explicit ZFSFileStoreBackend(FileStore *fs);
   ~ZFSFileStoreBackend();
   int detect_features();
   bool can_checkpoint();
diff --git a/src/os/fs/FS.cc b/src/os/fs/FS.cc
index b7c7987..1872170 100644
--- a/src/os/fs/FS.cc
+++ b/src/os/fs/FS.cc
@@ -170,9 +170,7 @@ int FS::zero(int fd, uint64_t offset, uint64_t length)
   {
     // fall back to writing zeros
     bufferlist bl;
-    bufferptr bp(length);
-    bp.zero();
-    bl.append(bp);
+    bl.append_zero(length);
     r = ::lseek64(fd, offset, SEEK_SET);
     if (r < 0) {
       r = -errno;
diff --git a/src/os/fs/FS.h b/src/os/fs/FS.h
index 3941799..63cd05f 100644
--- a/src/os/fs/FS.h
+++ b/src/os/fs/FS.h
@@ -76,8 +76,8 @@ public:
       offset = _offset;
       length = len;
       bufferptr p = buffer::create_page_aligned(length);
-      bl.append(p);
       io_prep_pread(&iocb, fd, p.c_str(), length, offset);
+      bl.append(std::move(p));
     }
 
     int get_return_value() {
@@ -89,7 +89,7 @@ public:
     int max_iodepth;
     io_context_t ctx;
 
-    aio_queue_t(unsigned max_iodepth)
+    explicit aio_queue_t(unsigned max_iodepth)
       : max_iodepth(max_iodepth),
 	ctx(0) {
     }
@@ -137,7 +137,7 @@ public:
 	timeout_ms / 1000,
 	(timeout_ms % 1000) * 1000 * 1000
       };
-      int r = io_getevents(ctx, 1, 1, event, &t);
+      int r = io_getevents(ctx, 1, max, event, &t);
       if (r <= 0) {
 	return r;
       }
diff --git a/src/os/keyvaluestore/GenericObjectMap.cc b/src/os/keyvaluestore/GenericObjectMap.cc
deleted file mode 100644
index 3453bc0..0000000
--- a/src/os/keyvaluestore/GenericObjectMap.cc
+++ /dev/null
@@ -1,1127 +0,0 @@
-// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
-// vim: ts=8 sw=2 smarttab
-/*
- * Ceph - scalable distributed file system
- *
- * Copyright (C) 2013 UnitedStack <haomai at unitedstack.com>
- *
- * Author: Haomai Wang <haomaiwang at gmail.com>
- *
- * This is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License version 2.1, as published by the Free Software
- * Foundation.  See file COPYING.
- *
- */
-
-#include "include/int_types.h"
-#include "include/buffer.h"
-
-#include <iostream>
-#include <set>
-#include <map>
-#include <string>
-#include <vector>
-
-#include <errno.h>
-
-#include "GenericObjectMap.h"
-#include "common/debug.h"
-#include "common/config.h"
-#include "include/assert.h"
-
-#define dout_subsys ceph_subsys_keyvaluestore
-
-const string GenericObjectMap::GLOBAL_STATE_KEY = "HEADER";
-
-const string GenericObjectMap::USER_PREFIX = "_SEQ_";
-const string GenericObjectMap::INTERN_PREFIX = "_INTERN_";
-const string GenericObjectMap::COMPLETE_PREFIX = "_COMPLETE_";
-const string GenericObjectMap::GHOBJECT_TO_SEQ_PREFIX = "_GHOBJTOSEQ_";
-const string GenericObjectMap::PARENT_KEY = "_PARENT_HEADER_";
-
-// In order to make right ordering for leveldb matching with hobject_t,
-// so use "!" to separated
-const string GenericObjectMap::GHOBJECT_KEY_SEP_S = "!";
-const char GenericObjectMap::GHOBJECT_KEY_SEP_C = '!';
-const char GenericObjectMap::GHOBJECT_KEY_ENDING = 0xFF;
-
-// ============== GenericObjectMap Key Function =================
-
-static void append_escaped(const string &in, string *out)
-{
-  for (string::const_iterator i = in.begin(); i != in.end(); ++i) {
-    if (*i == '%') {
-      out->push_back('%');
-      out->push_back('p');
-    } else if (*i == '.') {
-      out->push_back('%');
-      out->push_back('e');
-    } else if (*i == GenericObjectMap::GHOBJECT_KEY_SEP_C) {
-      out->push_back('%');
-      out->push_back('u');
-    } else if (*i == '!') {
-      out->push_back('%');
-      out->push_back('s');
-    } else {
-      out->push_back(*i);
-    }
-  }
-}
-
-static bool append_unescaped(string::const_iterator begin,
-                             string::const_iterator end,
-                             string *out)
-{
-  for (string::const_iterator i = begin; i != end; ++i) {
-    if (*i == '%') {
-      ++i;
-      if (*i == 'p')
-        out->push_back('%');
-      else if (*i == 'e')
-        out->push_back('.');
-      else if (*i == 'u')
-        out->push_back(GenericObjectMap::GHOBJECT_KEY_SEP_C);
-      else if (*i == 's')
-        out->push_back('!');
-      else
-        return false;
-    } else {
-      out->push_back(*i);
-    }
-  }
-  return true;
-}
-
-string GenericObjectMap::header_key(const coll_t &cid)
-{
-  string full_name;
-
-  append_escaped(cid.to_str(), &full_name);
-  full_name.append(GHOBJECT_KEY_SEP_S);
-  return full_name;
-}
-
-string GenericObjectMap::header_key(const coll_t &cid, const ghobject_t &oid)
-{
-  string full_name;
-
-  append_escaped(cid.to_str(), &full_name);
-  full_name.append(GHOBJECT_KEY_SEP_S);
-
-  char buf[PATH_MAX];
-  char *t;
-  char *end;
-
-  // make field ordering match with ghobject_t compare operations
-  t = buf;
-  end = t + sizeof(buf);
-  if (oid.shard_id == shard_id_t::NO_SHARD) {
-    // otherwise ff will sort *after* 0, not before.
-    full_name += "--";
-  } else {
-    t += snprintf(t, end - t, "%02x", (int)oid.shard_id);
-    full_name += string(buf);
-  }
-  full_name.append(GHOBJECT_KEY_SEP_S);
-
-  t = buf;
-  t += snprintf(t, end - t, "%016llx",
-		(long long)(oid.hobj.pool + 0x8000000000000000));
-  full_name += string(buf);
-  full_name.append(GHOBJECT_KEY_SEP_S);
-
-  t = buf;
-  snprintf(t, end - t, "%.*X", (int)(sizeof(oid.hobj.get_hash())*2),
-           (uint32_t)oid.hobj.get_bitwise_key_u32());
-  full_name += string(buf);
-  full_name.append(GHOBJECT_KEY_SEP_S);
-
-  append_escaped(oid.hobj.nspace, &full_name);
-  full_name.append(GHOBJECT_KEY_SEP_S);
-
-  append_escaped(oid.hobj.get_key(), &full_name);
-  full_name.append(GHOBJECT_KEY_SEP_S);
-
-  append_escaped(oid.hobj.oid.name, &full_name);
-  full_name.append(GHOBJECT_KEY_SEP_S);
-
-  t = buf;
-  if (oid.hobj.snap == CEPH_NOSNAP)
-    t += snprintf(t, end - t, "head");
-  else if (oid.hobj.snap == CEPH_SNAPDIR)
-    t += snprintf(t, end - t, "snapdir");
-  else
-    // Keep length align
-    t += snprintf(t, end - t, "%016llx", (long long unsigned)oid.hobj.snap);
-  full_name += string(buf);
-
-  if (oid.generation != ghobject_t::NO_GEN) {
-    full_name.append(GHOBJECT_KEY_SEP_S);
-
-    t = buf;
-    end = t + sizeof(buf);
-    t += snprintf(t, end - t, "%016llx", (long long unsigned)oid.generation);
-    full_name += string(buf);
-  }
-
-  full_name.append(1, GHOBJECT_KEY_ENDING);
-
-  return full_name;
-}
-
-bool GenericObjectMap::parse_header_key(const string &long_name,
-                                          coll_t *out_coll, ghobject_t *out)
-{
-  string coll;
-  string name;
-  string key;
-  string ns;
-  uint32_t hash;
-  snapid_t snap;
-  int64_t pool;
-  gen_t generation = ghobject_t::NO_GEN;
-  shard_id_t shard_id = shard_id_t::NO_SHARD;
-
-  string::const_iterator current = long_name.begin();
-  string::const_iterator end;
-
-  for (end = current; end != long_name.end() && *end != GHOBJECT_KEY_SEP_C; ++end) ;
-  if (!append_unescaped(current, end, &coll))
-    return false;
-
-  current = ++end;
-  for ( ; end != long_name.end() && *end != GHOBJECT_KEY_SEP_C; ++end) ;
-  if (end == long_name.end())
-    return false;
-  string shardstring = string(current, end);
-  if (shardstring == "--")
-    shard_id = shard_id_t::NO_SHARD;
-  else
-    shard_id = (shard_id_t)strtoul(shardstring.c_str(), NULL, 16);
-
-  current = ++end;
-  for ( ; end != long_name.end() && *end != GHOBJECT_KEY_SEP_C; ++end) ;
-  if (end == long_name.end())
-    return false;
-  string pstring(current, end);
-  pool = strtoull(pstring.c_str(), NULL, 16);
-  pool -= 0x8000000000000000;
-
-  current = ++end;
-  for ( ; end != long_name.end() && *end != GHOBJECT_KEY_SEP_C; ++end) ;
-  if (end == long_name.end())
-    return false;
-  string hash_str(current, end);
-  sscanf(hash_str.c_str(), "%X", &hash);
-
-  current = ++end;
-  for ( ; end != long_name.end() && *end != GHOBJECT_KEY_SEP_C; ++end) ;
-  if (end == long_name.end())
-    return false;
-  if (!append_unescaped(current, end, &ns))
-    return false;
-
-  current = ++end;
-  for ( ; end != long_name.end() && *end != GHOBJECT_KEY_SEP_C; ++end) ;
-  if (end == long_name.end())
-    return false;
-  if (!append_unescaped(current, end, &key))
-    return false;
-
-  current = ++end;
-  for ( ; end != long_name.end() && *end != GHOBJECT_KEY_SEP_C; ++end) ;
-  if (end == long_name.end())
-    return false;
-  if (!append_unescaped(current, end, &name))
-    return false;
-
-  current = ++end;
-  for ( ; end != long_name.end() && *end != GHOBJECT_KEY_SEP_C &&
-	  *end != GHOBJECT_KEY_ENDING; ++end) ;
-  if (end == long_name.end())
-    return false;
-  string snap_str(current, end);
-  if (snap_str == "head")
-    snap = CEPH_NOSNAP;
-  else if (snap_str == "snapdir")
-    snap = CEPH_SNAPDIR;
-  else
-    snap = strtoull(snap_str.c_str(), NULL, 16);
-
-  // Optional generation/shard_id
-  string genstring;
-  if (*end == GHOBJECT_KEY_SEP_C) {
-    current = ++end;
-    for ( ; end != long_name.end() && *end != GHOBJECT_KEY_ENDING; ++end) ;
-    if (end != long_name.end())
-      return false;
-    genstring = string(current, end);
-    generation = (gen_t)strtoull(genstring.c_str(), NULL, 16);
-  }
-
-  if (out) {
-    (*out) = ghobject_t(hobject_t(name, key, snap,
-				  hobject_t::_reverse_bits(hash),
-				  (int64_t)pool, ns),
-                        generation, shard_id);
-  }
-
-  if (out_coll) {
-    bool valid = out_coll->parse(coll);
-    assert(valid);
-  }
-
-  return true;
-}
-
-
-// ============== GenericObjectMap Prefix =================
-
-string GenericObjectMap::user_prefix(Header header, const string &prefix)
-{
-  return USER_PREFIX + seq_key(header->seq) + prefix;
-}
-
-string GenericObjectMap::complete_prefix(Header header)
-{
-  return INTERN_PREFIX + seq_key(header->seq) + COMPLETE_PREFIX;
-}
-
-string GenericObjectMap::parent_seq_prefix(uint64_t seq)
-{
-  return INTERN_PREFIX + seq_key(seq) + PARENT_KEY;
-}
-
-
-// ============== GenericObjectMapIteratorImpl =================
-
-int GenericObjectMap::GenericObjectMapIteratorImpl::init()
-{
-  invalid = false;
-  if (ready) {
-    return 0;
-  }
-
-  assert(!parent_iter);
-  if (header->parent) {
-    Header parent = map->lookup_parent(header);
-    if (!parent) {
-      assert(0);
-      return -EINVAL;
-    }
-    parent_iter.reset(new GenericObjectMapIteratorImpl(map, parent, prefix));
-  }
-
-  key_iter = map->db->get_iterator(map->user_prefix(header, prefix));
-  assert(key_iter);
-  complete_iter = map->db->get_iterator(map->complete_prefix(header));
-  assert(complete_iter);
-  cur_iter = key_iter;
-  assert(cur_iter);
-  ready = true;
-  return 0;
-}
-
-ObjectMap::ObjectMapIterator GenericObjectMap::get_iterator(
-    const coll_t &cid, const ghobject_t &oid, const string &prefix)
-{
-  Header header = lookup_header(cid, oid);
-  if (!header)
-    return ObjectMap::ObjectMapIterator(new EmptyIteratorImpl());
-  return _get_iterator(header, prefix);
-}
-
-int GenericObjectMap::GenericObjectMapIteratorImpl::seek_to_first()
-{
-  init();
-  r = 0;
-  if (parent_iter) {
-    r = parent_iter->seek_to_first();
-    if (r < 0)
-      return r;
-  }
-  r = key_iter->seek_to_first();
-  if (r < 0)
-    return r;
-  return adjust();
-}
-
-int GenericObjectMap::GenericObjectMapIteratorImpl::seek_to_last()
-{
-  init();
-  r = 0;
-  if (parent_iter) {
-    r = parent_iter->seek_to_last();
-    if (r < 0)
-      return r;
-    if (parent_iter->valid())
-      r = parent_iter->next();
-    if (r < 0)
-      return r;
-  }
-  r = key_iter->seek_to_last();
-  if (r < 0)
-    return r;
-  if (key_iter->valid())
-    r = key_iter->next();
-  if (r < 0)
-    return r;
-  return adjust();
-}
-
-int GenericObjectMap::GenericObjectMapIteratorImpl::lower_bound(const string &to)
-{
-  init();
-  r = 0;
-  if (parent_iter) {
-    r = parent_iter->lower_bound(to);
-    if (r < 0)
-      return r;
-  }
-  r = key_iter->lower_bound(to);
-  if (r < 0)
-    return r;
-  return adjust();
-}
-
-int GenericObjectMap::GenericObjectMapIteratorImpl::upper_bound(const string &after)
-{
-  init();
-  r = 0;
-  if (parent_iter) {
-    r = parent_iter->upper_bound(after);
-    if (r < 0)
-      return r;
-  }
-  r = key_iter->upper_bound(after);
-  if (r < 0)
-    return r;
-  return adjust();
-}
-
-bool GenericObjectMap::GenericObjectMapIteratorImpl::valid()
-{
-  bool valid = !invalid && ready;
-  assert(!valid || cur_iter->valid());
-  return valid;
-}
-
-bool GenericObjectMap::GenericObjectMapIteratorImpl::valid_parent()
-{
-  if (parent_iter && parent_iter->valid() &&
-      (!key_iter->valid() || key_iter->key() > parent_iter->key()))
-    return true;
-  return false;
-}
-
-int GenericObjectMap::GenericObjectMapIteratorImpl::next(bool validate)
-{
-  assert(cur_iter->valid());
-  assert(valid());
-  cur_iter->next();
-  return adjust();
-}
-
-int GenericObjectMap::GenericObjectMapIteratorImpl::next_parent()
-{
-  if (!parent_iter || !parent_iter->valid()) {
-    invalid = true;
-    return 0;
-  }
-  r = next();
-  if (r < 0)
-    return r;
-  if (!valid() || on_parent() || !parent_iter->valid())
-    return 0;
-
-  return lower_bound(parent_iter->key());
-}
-
-int GenericObjectMap::GenericObjectMapIteratorImpl::in_complete_region(
-    const string &to_test, string *begin, string *end)
-{
-  complete_iter->upper_bound(to_test);
-  if (complete_iter->valid())
-    complete_iter->prev();
-  else
-    complete_iter->seek_to_last();
-
-  if (!complete_iter->valid())
-    return false;
-
-  string _end;
-  if (begin)
-    *begin = complete_iter->key();
-  _end = string(complete_iter->value().c_str());
-  if (end)
-    *end = _end;
-  return (to_test >= complete_iter->key()) && (!_end.size() || _end > to_test);
-}
-
-/**
- * Moves parent_iter to the next position both out of the complete_region and
- * not equal to key_iter.  Then, we set cur_iter to parent_iter if valid and
- * less than key_iter and key_iter otherwise.
- */
-int GenericObjectMap::GenericObjectMapIteratorImpl::adjust()
-{
-  string begin, end;
-  while (parent_iter && parent_iter->valid()) {
-    if (in_complete_region(parent_iter->key(), &begin, &end)) {
-      if (end.size() == 0) {
-        parent_iter->seek_to_last();
-        if (parent_iter->valid())
-          parent_iter->next();
-      } else {
-        parent_iter->lower_bound(end);
-      }
-    } else if (key_iter->valid() && key_iter->key() == parent_iter->key()) {
-      parent_iter->next();
-    } else {
-      break;
-    }
-  }
-  if (valid_parent()) {
-    cur_iter = parent_iter;
-  } else if (key_iter->valid()) {
-    cur_iter = key_iter;
-  } else {
-    invalid = true;
-  }
-  assert(invalid || cur_iter->valid());
-  return 0;
-}
-
-string GenericObjectMap::GenericObjectMapIteratorImpl::key()
-{
-  return cur_iter->key();
-}
-
-bufferlist GenericObjectMap::GenericObjectMapIteratorImpl::value()
-{
-  return cur_iter->value();
-}
-
-int GenericObjectMap::GenericObjectMapIteratorImpl::status()
-{
-  return r;
-}
-
-
-// ============== GenericObjectMap Public API =================
-
-void GenericObjectMap::set_keys(const Header header,
-                                const string &prefix,
-                                const map<string, bufferlist> &set,
-                                KeyValueDB::Transaction t)
-{
-  t->set(user_prefix(header, prefix), set);
-}
-
-int GenericObjectMap::clear(const Header header,
-                            KeyValueDB::Transaction t)
-{
-  remove_header(header->cid, header->oid, header, t);
-  assert(header->num_children > 0);
-  header->num_children--;
-  int r = _clear(header, t);
-  if (r < 0)
-    return r;
-  return 0;
-}
-
-int GenericObjectMap::rm_keys(const Header header,
-                              const string &prefix,
-                              const set<string> &buffered_keys,
-                              const set<string> &to_clear,
-                              KeyValueDB::Transaction t)
-{
-  t->rmkeys(user_prefix(header, prefix), to_clear);
-  if (!header->parent) {
-    return 0;
-  }
-
-  // Copy up keys from parent around to_clear
-  int keep_parent;
-  {
-    GenericObjectMapIterator iter = _get_iterator(header, prefix);
-    iter->seek_to_first();
-    map<string, string> new_complete;
-    map<string, bufferlist> to_write;
-    for(set<string>::const_iterator i = to_clear.begin();
-        i != to_clear.end(); ) {
-      unsigned copied = 0;
-      iter->lower_bound(*i);
-      ++i;
-      if (!iter->valid())
-        break;
-      string begin = iter->key();
-      if (!iter->on_parent())
-        iter->next_parent();
-      if (new_complete.size() && new_complete.rbegin()->second == begin) {
-        begin = new_complete.rbegin()->first;
-      }
-      while (iter->valid() && copied < 20) {
-        if (!to_clear.count(iter->key()) && !buffered_keys.count(iter->key()))
-          to_write[iter->key()].append(iter->value());
-        if (i != to_clear.end() && *i <= iter->key()) {
-          ++i;
-          copied = 0;
-        }
-
-        iter->next_parent();
-        copied++;
-      }
-      if (iter->valid()) {
-        new_complete[begin] = iter->key();
-      } else {
-        new_complete[begin] = "";
-        break;
-      }
-    }
-    t->set(user_prefix(header, prefix), to_write);
-    merge_new_complete(header, new_complete, iter, t);
-    keep_parent = need_parent(iter);
-    if (keep_parent < 0)
-      return keep_parent;
-  }
-
-  if (!keep_parent) {
-    Header parent = lookup_parent(header);
-    if (!parent)
-      return -EINVAL;
-    parent->num_children--;
-    _clear(parent, t);
-    header->parent = 0;
-    set_header(header->cid, header->oid, *header, t);
-    t->rmkeys_by_prefix(complete_prefix(header));
-  }
-
-  return 0;
-}
-
-int GenericObjectMap::get(const coll_t &cid, const ghobject_t &oid,
-                          const string &prefix,
-                          map<string, bufferlist> *out)
-{
-  Header header = lookup_header(cid, oid);
-  if (!header)
-    return -ENOENT;
-
-  ObjectMap::ObjectMapIterator iter = _get_iterator(header, prefix);
-  for (iter->seek_to_first(); iter->valid(); iter->next()) {
-    if (iter->status())
-      return iter->status();
-    out->insert(make_pair(iter->key(), iter->value()));
-  }
-
-  return 0;
-}
-
-int GenericObjectMap::get_keys(const coll_t &cid, const ghobject_t &oid,
-                               const string &prefix,
-                               set<string> *keys)
-{
-  Header header = lookup_header(cid, oid);
-  if (!header)
-    return -ENOENT;
-
-  ObjectMap::ObjectMapIterator iter = _get_iterator(header, prefix);
-  for (iter->seek_to_first(); iter->valid(); iter->next()) {
-    if (iter->status())
-      return iter->status();
-    keys->insert(iter->key());
-  }
-  return 0;
-}
-
-int GenericObjectMap::get_values(const coll_t &cid, const ghobject_t &oid,
-                                 const string &prefix,
-                                 const set<string> &keys,
-                                 map<string, bufferlist> *out)
-{
-  Header header = lookup_header(cid, oid);
-  if (!header)
-    return -ENOENT;
-  return scan(header, prefix, keys, 0, out);
-}
-
-int GenericObjectMap::check_keys(const coll_t &cid, const ghobject_t &oid,
-                                 const string &prefix,
-                                 const set<string> &keys,
-                                 set<string> *out)
-{
-  Header header = lookup_header(cid, oid);
-  if (!header)
-    return -ENOENT;
-  return scan(header, prefix, keys, out, 0);
-}
-
-void GenericObjectMap::clone(const Header parent, const coll_t &cid,
-                             const ghobject_t &target,
-                             KeyValueDB::Transaction t,
-                             Header *old_header, Header *new_header)
-{
-  {
-    Header destination = lookup_header(cid, target);
-    if (destination) {
-      remove_header(cid, target, destination, t);
-      destination->num_children--;
-      _clear(destination, t);
-    }
-  }
-
-  Header source = generate_new_header(parent->cid, parent->oid, parent, t);
-  Header destination = generate_new_header(cid, target, parent, t);
-
-  destination->data = parent->data;
-  source->data = parent->data;
-
-  parent->num_children = 2;
-  set_parent_header(parent, t);
-  set_header(parent->cid, parent->oid, *source, t);
-  set_header(cid, target, *destination, t);
-
-  if (new_header)
-    *old_header = source;
-  if (new_header)
-    *new_header = destination;
-
-  // Clone will set parent header and rm_keys wll lookup_parent which will try
-  // to find parent header. So it will let lookup_parent fail when "clone" and
-  // "rm_keys" in one transaction. Here have to sync transaction to make
-  // visiable for lookup_parent
-  // FIXME: Clear transaction operations here
-  int r = submit_transaction_sync(t);
-  assert(r == 0);
-}
-
-void GenericObjectMap::rename(const Header old_header, const coll_t &cid,
-                              const ghobject_t &target,
-                              KeyValueDB::Transaction t)
-{
-  if (old_header->oid == target && old_header->cid == cid)
-    return ;
-
-  remove_header(old_header->cid, old_header->oid, old_header, t);
-  old_header->cid = cid;
-  old_header->oid = target;
-  set_header(cid, target, *old_header, t);
-}
-
-int GenericObjectMap::init(bool do_upgrade)
-{
-  map<string, bufferlist> result;
-  set<string> to_get;
-  to_get.insert(GLOBAL_STATE_KEY);
-  int r = db->get(INTERN_PREFIX, to_get, &result);
-  if (r < 0)
-    return r;
-  if (!result.empty()) {
-    bufferlist::iterator bliter = result.begin()->second.begin();
-    state.decode(bliter);
-    if (state.v < 1) { // Needs upgrade
-      if (!do_upgrade) {
-        dout(1) << "GenericObjbectMap requires an upgrade,"
-                << " set filestore_update_to"
-                << dendl;
-        return -ENOTSUP;
-      } else {
-        r = upgrade();
-        if (r < 0)
-          return r;
-      }
-    }
-  } else {
-    // New store
-    state.v = 1;
-    state.seq = 1;
-  }
-  dout(20) << "(init)genericobjectmap: seq is " << state.seq << dendl;
-  return 0;
-}
-
-bool GenericObjectMap::check(std::ostream &out)
-{
-  bool retval = true;
-  map<uint64_t, uint64_t> parent_to_num_children;
-  map<uint64_t, uint64_t> parent_to_actual_num_children;
-  KeyValueDB::Iterator iter = db->get_iterator(GHOBJECT_TO_SEQ_PREFIX);
-
-  for (iter->seek_to_first(); iter->valid(); iter->next()) {
-    _Header header;
-    assert(header.num_children == 1);
-    header.num_children = 0; // Hack for leaf node
-    bufferlist bl = iter->value();
-    while (true) {
-      bufferlist::iterator bliter = bl.begin();
-      header.decode(bliter);
-      if (header.seq != 0)
-        parent_to_actual_num_children[header.seq] = header.num_children;
-      if (header.parent == 0)
-        break;
-
-      if (!parent_to_num_children.count(header.parent))
-        parent_to_num_children[header.parent] = 0;
-      parent_to_num_children[header.parent]++;
-      if (parent_to_actual_num_children.count(header.parent))
-        break;
-
-      set<string> to_get;
-      map<string, bufferlist> got;
-      to_get.insert(PARENT_KEY);
-      db->get(parent_seq_prefix(header.parent), to_get, &got);
-      if (got.empty()) {
-        out << "Missing: seq " << header.parent << std::endl;
-        retval = false;
-        break;
-      } else {
-        bl = got.begin()->second;
-      }
-    }
-  }
-
-  for (map<uint64_t, uint64_t>::iterator i = parent_to_num_children.begin();
-       i != parent_to_num_children.end();
-       parent_to_num_children.erase(i++)) {
-    if (!parent_to_actual_num_children.count(i->first))
-      continue;
-    if (parent_to_actual_num_children[i->first] != i->second) {
-      out << "Invalid: seq " << i->first << " recorded children: "
-          << parent_to_actual_num_children[i->first] << " found: "
-          << i->second << std::endl;
-      retval = false;
-    }
-    parent_to_actual_num_children.erase(i->first);
-  }
-  return retval;
-}
-
-
-// ============== GenericObjectMap Intern Implementation =================
-
-int GenericObjectMap::scan(Header header,
-                           const string &prefix,
-                           const set<string> &in_keys,
-                           set<string> *out_keys,
-                           map<string, bufferlist> *out_values)
-{
-  ObjectMap::ObjectMapIterator db_iter = _get_iterator(header, prefix);
-  for (set<string>::const_iterator key_iter = in_keys.begin();
-       key_iter != in_keys.end();
-       ++key_iter) {
-    db_iter->lower_bound(*key_iter);
-    if (db_iter->status())
-      return db_iter->status();
-
-    if (db_iter->valid() && db_iter->key() == *key_iter) {
-      if (out_keys)
-        out_keys->insert(*key_iter);
-      if (out_values)
-        out_values->insert(make_pair(db_iter->key(), db_iter->value()));
-    }
-  }
-  return 0;
-}
-
-int GenericObjectMap::_clear(Header header, KeyValueDB::Transaction t)
-{
-  while (1) {
-    if (header->num_children) {
-      set_parent_header(header, t);
-      break;
-    }
-
-    clear_header(header, t);
-    if (!header->parent)
-      break;
-
-    Header parent = lookup_parent(header);
-    if (!parent) {
-      return -EINVAL;
-    }
-    assert(parent->num_children > 0);
-    parent->num_children--;
-    header.swap(parent);
-  }
-  return 0;
-}
-
-int GenericObjectMap::merge_new_complete(
-    Header header, const map<string, string> &new_complete,
-    GenericObjectMapIterator iter, KeyValueDB::Transaction t)
-{
-  KeyValueDB::Iterator complete_iter = db->get_iterator(
-    complete_prefix(header));
-  map<string, string>::const_iterator i = new_complete.begin();
-  set<string> to_remove;
-  map<string, bufferlist> to_add;
-
-  string begin, end;
-  while (i != new_complete.end()) {
-    string new_begin = i->first;
-    string new_end = i->second;
-    int r = iter->in_complete_region(new_begin, &begin, &end);
-    if (r < 0)
-      return r;
-    if (r) {
-      to_remove.insert(begin);
-      new_begin = begin;
-    }
-    ++i;
-    while (i != new_complete.end()) {
-      if (!new_end.size() || i->first <= new_end) {
-        if (!new_end.size() && i->second > new_end) {
-          new_end = i->second;
-        }
-        ++i;
-        continue;
-      }
-
-      r = iter->in_complete_region(new_end, &begin, &end);
-      if (r < 0)
-        return r;
-      if (r) {
-        to_remove.insert(begin);
-        new_end = end;
-        continue;
-      }
-      break;
-    }
-    bufferlist bl;
-    bl.append(bufferptr(new_end.c_str(), new_end.size() + 1));
-    to_add.insert(make_pair(new_begin, bl));
-  }
-  t->rmkeys(complete_prefix(header), to_remove);
-  t->set(complete_prefix(header), to_add);
-  return 0;
-}
-
-int GenericObjectMap::need_parent(GenericObjectMapIterator iter)
-{
-  int r = iter->seek_to_first();
-  if (r < 0)
-    return r;
-
-  if (!iter->valid())
-    return 0;
-
-  string begin, end;
-  if (iter->in_complete_region(iter->key(), &begin, &end) && end == "") {
-    return 0;
-  }
-  return 1;
-}
-
-int GenericObjectMap::write_state(KeyValueDB::Transaction t)
-{
-  dout(20) << __func__ << " seq is " << state.seq << dendl;
-  bufferlist bl;
-  state.encode(bl);
-  map<string, bufferlist> to_write;
-  to_write[GLOBAL_STATE_KEY] = bl;
-  t->set(INTERN_PREFIX, to_write);
-  return 0;
-}
-
-// NOTE(haomai): It may occur dead lock if thread A hold header A try to header
-// B and thread hold header B try to get header A
-GenericObjectMap::Header GenericObjectMap::_lookup_header(
-    const coll_t &cid, const ghobject_t &oid)
-{
-  set<string> to_get;
-  to_get.insert(header_key(cid, oid));
-  _Header header;
-
-  map<string, bufferlist> out;
-
-  int r = db->get(GHOBJECT_TO_SEQ_PREFIX, to_get, &out);
-  if (r < 0)
-    return Header();
-  if (out.empty())
-    return Header();
-
-  bufferlist::iterator iter = out.begin()->second.begin();
-  header.decode(iter);
-
-  Header ret = Header(new _Header(header));
-  return ret;
-}
-
-GenericObjectMap::Header GenericObjectMap::_generate_new_header(
-    const coll_t &cid, const ghobject_t &oid, Header parent,
-    KeyValueDB::Transaction t)
-{
-  Header header = Header(new _Header());
-  header->seq = state.seq++;
-  if (parent) {
-    header->parent = parent->seq;
-  }
-  header->num_children = 1;
-  header->oid = oid;
-  header->cid = cid;
-
-  write_state(t);
-  return header;
-}
-
-GenericObjectMap::Header GenericObjectMap::lookup_parent(Header input)
-{
-  Mutex::Locker l(header_lock);
-  map<string, bufferlist> out;
-  set<string> keys;
-  keys.insert(PARENT_KEY);
-
-  dout(20) << "lookup_parent: parent " << input->parent
-       << " for seq " << input->seq << dendl;
-
-  int r = db->get(parent_seq_prefix(input->parent), keys, &out);
-  if (r < 0) {
-    assert(0);
-    return Header();
-  }
-  if (out.empty()) {
-    assert(0);
-    return Header();
-  }
-
-  Header header = Header(new _Header());
-  header->seq = input->parent;
-  bufferlist::iterator iter = out.begin()->second.begin();
-  header->decode(iter);
-  dout(20) << "lookup_parent: parent seq is " << header->seq << " with parent "
-           << header->parent << dendl;
-  return header;
-}
-
-GenericObjectMap::Header GenericObjectMap::lookup_create_header(
-    const coll_t &cid, const ghobject_t &oid, KeyValueDB::Transaction t)
-{
-  Mutex::Locker l(header_lock);
-  Header header = _lookup_header(cid, oid);
-  if (!header) {
-    header = _generate_new_header(cid, oid, Header(), t);
-    set_header(cid, oid, *header, t);
-  }
-  return header;
-}
-
-void GenericObjectMap::set_parent_header(Header header, KeyValueDB::Transaction t)
-{
-  dout(20) << __func__ << " setting seq " << header->seq << dendl;
-  map<string, bufferlist> to_write;
-  header->encode(to_write[PARENT_KEY]);
-  t->set(parent_seq_prefix(header->seq), to_write);
-}
-
-void GenericObjectMap::clear_header(Header header, KeyValueDB::Transaction t)
-{
-  dout(20) << __func__ << " clearing seq " << header->seq << dendl;
-  t->rmkeys_by_prefix(user_prefix(header, string()));
-  t->rmkeys_by_prefix(complete_prefix(header));
-  set<string> keys;
-  keys.insert(PARENT_KEY);
-  t->rmkeys(parent_seq_prefix(header->seq), keys);
-}
-
-// only remove GHOBJECT_TO_SEQ
-void GenericObjectMap::remove_header(const coll_t &cid,
-                                     const ghobject_t &oid, Header header,
-                                     KeyValueDB::Transaction t)
-{
-  dout(20) << __func__ << " removing " << header->seq
-           << " cid " << cid << " oid " << oid << dendl;
-  set<string> to_remove;
-  to_remove.insert(header_key(cid, oid));
-  t->rmkeys(GHOBJECT_TO_SEQ_PREFIX, to_remove);
-}
-
-void GenericObjectMap::set_header(const coll_t &cid, const ghobject_t &oid,
-                                  _Header &header, KeyValueDB::Transaction t)
-{
-  dout(20) << __func__ << " setting " << header.seq
-           << " cid " << cid << " oid " << oid << " parent seq "
-           << header.parent << dendl;
-  map<string, bufferlist> to_set;
-  header.encode(to_set[header_key(cid, oid)]);
-  t->set(GHOBJECT_TO_SEQ_PREFIX, to_set);
-}
-
-int GenericObjectMap::list_objects(const coll_t &cid, ghobject_t start, ghobject_t end, int max,
-                                   vector<ghobject_t> *out, ghobject_t *next)
-{
-  // FIXME
-  Mutex::Locker l(header_lock);
-  if (start.is_max())
-      return 0;
-
-  if (start.is_min()) {
-    vector<ghobject_t> oids;
-
-    KeyValueDB::Iterator iter = db->get_iterator(GHOBJECT_TO_SEQ_PREFIX);
-    for (iter->lower_bound(header_key(cid)); iter->valid(); iter->next()) {
-      bufferlist bl = iter->value();
-      bufferlist::iterator bliter = bl.begin();
-      _Header header;
-      header.decode(bliter);
-
-      if (header.cid == cid)
-        oids.push_back(header.oid);
-
-      break;
-    }
-
-    if (oids.empty()) {
-      if (next)
-        *next = ghobject_t::get_max();
-      return 0;
-    }
-    start = oids[0];
-  }
-
-  int size = 0;
-  KeyValueDB::Iterator iter = db->get_iterator(GHOBJECT_TO_SEQ_PREFIX);
-  for (iter->lower_bound(header_key(cid, start)); iter->valid(); iter->next()) {
-    bufferlist bl = iter->value();
-    bufferlist::iterator bliter = bl.begin();
-    _Header header;
-    header.decode(bliter);
-
-    if (header.cid != cid) {
-      if (next)
-        *next = ghobject_t::get_max();
-      break;
-    }
-
-    if (max && size >= max) {
-      if (next)
-        *next = header.oid;
-      break;
-    }
-
-    if (cmp_bitwise(header.oid, end) >= 0) {
-      if (next)
-	*next = ghobject_t::get_max();
-      break;
-    }
-
-    assert(cmp_bitwise(start, header.oid) <= 0);
-    assert(cmp_bitwise(header.oid, end) < 0);
-
-
-    size++;
-    if (out)
-      out->push_back(header.oid);
-    start = header.oid;
-  }
-
-  if (out->size())
-    dout(20) << "objects: " << *out << dendl;
-
-  if (!iter->valid())
-    if (next)
-      *next = ghobject_t::get_max();
-
-  return 0;
-}
diff --git a/src/os/keyvaluestore/GenericObjectMap.h b/src/os/keyvaluestore/GenericObjectMap.h
deleted file mode 100644
index 9417937..0000000
--- a/src/os/keyvaluestore/GenericObjectMap.h
+++ /dev/null
@@ -1,429 +0,0 @@
-// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
-// vim: ts=8 sw=2 smarttab
-/*
- * Ceph - scalable distributed file system
- *
- * Copyright (C) 2013 UnitedStack <haomai at unitedstack.com>
- *
- * Author: Haomai Wang <haomaiwang at gmail.com>
- *
- * This is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License version 2.1, as published by the Free Software
- * Foundation.  See file COPYING.
- *
- */
-
-#ifndef CEPH_GENERICOBJECTMAP_H
-#define CEPH_GENERICOBJECTMAP_H
-
-#include "include/buffer.h"
-#include <set>
-#include <map>
-#include <string>
-#include <vector>
-#include <boost/scoped_ptr.hpp>
-
-#include "include/memory.h"
-#include "os/ObjectMap.h"
-#include "kv/KeyValueDB.h"
-#include "osd/osd_types.h"
-#include "common/Mutex.h"
-#include "common/Cond.h"
-#include "common/simple_cache.hpp"
-
-
-/**
- * Genericobjectmap: Provide with key/value associated to ghobject_t APIs to caller
- * and avoid concerning too much. Wrap and combine KeyValueDB/ObjectMap APIs
- * with ghobject_t and adding clone capacity.
- *
- * Prefix space structure:
- *
- * - GHOBJECT_TO_SEQ: Contains leaf mapping from ghobject_t->Header(including
- *                    hobj.seq and related metadata)
- * - INTERN_PREFIX: GLOBAL_STATE_KEY - contains the global state
- *                                  @see State
- *                                  @see write_state
- *                                  @see init
- *                                  @see generate_new_header
- * - INTERN_PREFIX + header_key(header->seq) + COMPLETE_PREFIX: see below
- * - INTERN_PREFIX + header_key(header->seq) + PARENT_KEY
- *              : used to store parent header(same as headers in GHOBJECT_TO_SEQ)
- * - USER_PREFIX + header_key(header->seq) + [CUSTOM_PREFIX]
- *              : key->value which set by callers
- *
- * For each node (represented by a header), we
- * store three mappings: the key mapping, the complete mapping, and the parent.
- * The complete mapping (COMPLETE_PREFIX space) is key->key.  Each x->y entry in
- * this mapping indicates that the key mapping contains all entries on [x,y).
- * Note, max string is represented by "", so ""->"" indicates that the parent
- * is unnecessary (@see rm_keys).  When looking up a key not contained in the
- * the complete set, we have to check the parent if we don't find it in the
- * key set.  During rm_keys, we copy keys from the parent and update the
- * complete set to reflect the change @see rm_keys.
- */
-
-// This class only provide basic read capacity, suggest inherit it to
-// implement write transaction to use it. @see StripObjectMap
-class GenericObjectMap {
- public:
-  boost::scoped_ptr<KeyValueDB> db;
-
-  /**
-   * Serializes access to next_seq as well as the in_use set
-   */
-  Mutex header_lock;
-
-  GenericObjectMap(KeyValueDB *db) : db(db), header_lock("GenericObjectMap") {}
-
-  int get(
-    const coll_t &cid,
-    const ghobject_t &oid,
-    const string &prefix,
-    map<string, bufferlist> *out
-    );
-
-  int get_keys(
-    const coll_t &cid,
-    const ghobject_t &oid,
-    const string &prefix,
-    set<string> *keys
-    );
-
-  int get_values(
-    const coll_t &cid,
-    const ghobject_t &oid,
-    const string &prefix,
-    const set<string> &keys,
-    map<string, bufferlist> *out
-    );
-
-  int check_keys(
-    const coll_t &cid,
-    const ghobject_t &oid,
-    const string &prefix,
-    const set<string> &keys,
-    set<string> *out
-    );
-
-  /// Read initial state from backing store
-  int init(bool upgrade = false);
-
-  /// Upgrade store to current version
-  int upgrade() {return 0;}
-
-  /// Consistency check, debug, there must be no parallel writes
-  bool check(std::ostream &out);
-
-  /// Util, list all objects, there must be no other concurrent access
-  int list_objects(const coll_t &cid, ghobject_t start, ghobject_t end, int max,
-                   vector<ghobject_t> *objs, ///< [out] objects
-                   ghobject_t *next);
-
-  ObjectMap::ObjectMapIterator get_iterator(const coll_t &cid,
-                                            const ghobject_t &oid,
-                                            const string &prefix);
-
-  KeyValueDB::Transaction get_transaction() { return db->get_transaction(); }
-  int submit_transaction(KeyValueDB::Transaction t) {
-    return db->submit_transaction(t);
-  }
-  int submit_transaction_sync(KeyValueDB::Transaction t) {
-    return db->submit_transaction_sync(t);
-  }
-
-  /// persistent state for store @see generate_header
-  struct State {
-    __u8 v;
-    uint64_t seq;
-    State() : v(0), seq(1) {}
-    State(uint64_t seq) : v(0), seq(seq) {}
-
-    void encode(bufferlist &bl) const {
-      ENCODE_START(1, 1, bl);
-      ::encode(v, bl);
-      ::encode(seq, bl);
-      ENCODE_FINISH(bl);
-    }
-
-    void decode(bufferlist::iterator &bl) {
-      DECODE_START(1, bl);
-      ::decode(v, bl);
-      ::decode(seq, bl);
-      DECODE_FINISH(bl);
-    }
-
-    void dump(Formatter *f) const {
-      f->dump_unsigned("seq", seq);
-    }
-
-    static void generate_test_instances(list<State*> &o) {
-      o.push_back(new State(0));
-      o.push_back(new State(20));
-    }
-  } state;
-
-  struct _Header {
-    uint64_t seq;
-    uint64_t parent;
-    uint64_t num_children;
-
-    coll_t cid;
-    ghobject_t oid;
-
-    // Used by successor
-    bufferlist data;
-
-    void encode(bufferlist &bl) const {
-      ENCODE_START(1, 1, bl);
-      ::encode(seq, bl);
-      ::encode(parent, bl);
-      ::encode(num_children, bl);
-      ::encode(cid, bl);
-      ::encode(oid, bl);
-      ::encode(data, bl);
-      ENCODE_FINISH(bl);
-    }
-
-    void decode(bufferlist::iterator &bl) {
-      DECODE_START(1, bl);
-      ::decode(seq, bl);
-      ::decode(parent, bl);
-      ::decode(num_children, bl);
-      ::decode(cid, bl);
-      ::decode(oid, bl);
-      ::decode(data, bl);
-      DECODE_FINISH(bl);
-    }
-
-    void dump(Formatter *f) const {
-      f->dump_unsigned("seq", seq);
-      f->dump_unsigned("parent", parent);
-      f->dump_unsigned("num_children", num_children);
-      f->dump_stream("coll") << cid;
-      f->dump_stream("oid") << oid;
-    }
-
-    _Header() : seq(0), parent(0), num_children(1) {}
-  };
-
-  typedef ceph::shared_ptr<_Header> Header;
-
-  Header lookup_header(const coll_t &cid, const ghobject_t &oid) {
-    Mutex::Locker l(header_lock);
-    return _lookup_header(cid, oid);
-  }
-
-  /// Lookup or create header for c oid
-  Header lookup_create_header(const coll_t &cid, const ghobject_t &oid,
-    KeyValueDB::Transaction t);
-
-  /// Set leaf node for c and oid to the value of header
-  void set_header(const coll_t &cid, const ghobject_t &oid, _Header &header,
-    KeyValueDB::Transaction t);
-
-  // Move all modify member function to "protect", in order to indicate these
-  // should be made use of by sub-class
-  void set_keys(
-    const Header header,
-    const string &prefix,
-    const map<string, bufferlist> &set,
-    KeyValueDB::Transaction t
-    );
-
-  int clear(
-    const Header header,
-    KeyValueDB::Transaction t
-    );
-
-  int rm_keys(
-    const Header header,
-    const string &prefix,
-    const set<string> &buffered_keys,
-    const set<string> &to_clear,
-    KeyValueDB::Transaction t
-    );
-
-  void clone(
-    const Header origin_header,
-    const coll_t &cid,
-    const ghobject_t &target,
-    KeyValueDB::Transaction t,
-    Header *old_header,
-    Header *new_header
-    );
-
-  void rename(
-    const Header header,
-    const coll_t &cid,
-    const ghobject_t &target,
-    KeyValueDB::Transaction t
-    );
-
-  static const string GLOBAL_STATE_KEY;
-  static const string PARENT_KEY;
-
-  static const string USER_PREFIX;
-  static const string INTERN_PREFIX;
-  static const string PARENT_PREFIX;
-  static const string COMPLETE_PREFIX;
-  static const string GHOBJECT_TO_SEQ_PREFIX;
-
-  static const string GHOBJECT_KEY_SEP_S;
-  static const char GHOBJECT_KEY_SEP_C;
-  static const char GHOBJECT_KEY_ENDING;
-
-private:
-  /// Implicit lock on Header->seq
-
-  static string header_key(const coll_t &cid);
-  static string header_key(const coll_t &cid, const ghobject_t &oid);
-  static bool parse_header_key(const string &in, coll_t *c, ghobject_t *oid);
-
-  string seq_key(uint64_t seq) {
-    char buf[100];
-    snprintf(buf, sizeof(buf), "%.*" PRId64, (int)(2*sizeof(seq)), seq);
-    return string(buf);
-  }
-
-  string user_prefix(Header header, const string &prefix);
-  string complete_prefix(Header header);
-  string parent_seq_prefix(uint64_t seq);
-
-  class EmptyIteratorImpl : public ObjectMap::ObjectMapIteratorImpl {
-  public:
-    int seek_to_first() { return 0; }
-    int seek_to_last() { return 0; }
-    int upper_bound(const string &after) { return 0; }
-    int lower_bound(const string &to) { return 0; }
-    bool valid() { return false; }
-    int next(bool validate=true) { assert(0); return 0; }
-    string key() { assert(0); return ""; }
-    bufferlist value() { assert(0); return bufferlist(); }
-    int status() { return 0; }
-  };
-
-
-  /// Iterator
-  class GenericObjectMapIteratorImpl : public ObjectMap::ObjectMapIteratorImpl {
-  public:
-    GenericObjectMap *map;
-
-    /// NOTE: implicit lock on header->seq AND for all ancestors
-    Header header;
-
-    /// parent_iter == NULL iff no parent
-    ceph::shared_ptr<GenericObjectMapIteratorImpl> parent_iter;
-    KeyValueDB::Iterator key_iter;
-    KeyValueDB::Iterator complete_iter;
-
-    /// cur_iter points to currently valid iterator
-    ceph::shared_ptr<ObjectMap::ObjectMapIteratorImpl> cur_iter;
-    int r;
-
-    /// init() called, key_iter, complete_iter, parent_iter filled in
-    bool ready;
-    /// past end
-    bool invalid;
-
-    string prefix;
-
-    GenericObjectMapIteratorImpl(GenericObjectMap *map, Header header,
-        const string &_prefix) : map(map), header(header), r(0), ready(false),
-                                 invalid(true), prefix(_prefix) { }
-    int seek_to_first();
-    int seek_to_last();
-    int upper_bound(const string &after);
-    int lower_bound(const string &to);
-    bool valid();
-    int next(bool validate=true);
-    string key();
-    bufferlist value();
-    int status();
-
-    bool on_parent() {
-      return cur_iter == parent_iter;
-    }
-
-    /// skips to next valid parent entry
-    int next_parent();
-
-    /// Tests whether to_test is in complete region
-    int in_complete_region(const string &to_test, ///[in] key to test
-                           string *begin,         ///[out] beginning of region
-                           string *end            ///[out] end of region
-      ); ///< @returns true if to_test is in the complete region, else false
-
-  private:
-    int init();
-    bool valid_parent();
-    int adjust();
-  };
-
-protected:
-  typedef ceph::shared_ptr<GenericObjectMapIteratorImpl> GenericObjectMapIterator;
-  GenericObjectMapIterator _get_iterator(Header header, string prefix) {
-    return GenericObjectMapIterator(new GenericObjectMapIteratorImpl(this, header, prefix));
-  }
-
-  Header generate_new_header(const coll_t &cid, const ghobject_t &oid,
-                             Header parent, KeyValueDB::Transaction t) {
-    Mutex::Locker l(header_lock);
-    return _generate_new_header(cid, oid, parent, t);
-  }
-
-  // Scan keys in header into out_keys and out_values (if nonnull)
-  int scan(Header header, const string &prefix, const set<string> &in_keys,
-           set<string> *out_keys, map<string, bufferlist> *out_values);
-
- private:
-
-  /// Removes node corresponding to header
-  void clear_header(Header header, KeyValueDB::Transaction t);
-
-  /// Set node containing input to new contents
-  void set_parent_header(Header input, KeyValueDB::Transaction t);
-
-    /// Remove leaf node corresponding to oid in c
-  void remove_header(const coll_t &cid, const ghobject_t &oid, Header header,
-      KeyValueDB::Transaction t);
-
-  /**
-   * Generate new header for c oid with new seq number
-   *
-   * Has the side effect of syncronously saving the new GenericObjectMap state
-   */
-  Header _generate_new_header(const coll_t &cid, const ghobject_t &oid,
-                              Header parent, KeyValueDB::Transaction t);
-
-  // Lookup leaf header for c oid
-  Header _lookup_header(const coll_t &cid, const ghobject_t &oid);
-
-  // Lookup header node for input
-  Header lookup_parent(Header input);
-
-  // Remove header and all related prefixes
-  int _clear(Header header, KeyValueDB::Transaction t);
-
-  // Adds to t operations necessary to add new_complete to the complete set
-  int merge_new_complete(Header header, const map<string, string> &new_complete,
-      GenericObjectMapIterator iter, KeyValueDB::Transaction t);
-
-  // Writes out State (mainly next_seq)
-  int write_state(KeyValueDB::Transaction _t);
-
-  // 0 if the complete set now contains all of key space, < 0 on error, 1 else
-  int need_parent(GenericObjectMapIterator iter);
-
-  // Copies header entry from parent @see rm_keys
-  int copy_up_header(Header header, KeyValueDB::Transaction t);
-
-  // Sets header @see set_header
-  void _set_header(Header header, const bufferlist &bl,
-                   KeyValueDB::Transaction t);
-};
-WRITE_CLASS_ENCODER(GenericObjectMap::_Header)
-WRITE_CLASS_ENCODER(GenericObjectMap::State)
-
-#endif
diff --git a/src/os/keyvaluestore/KeyValueStore.cc b/src/os/keyvaluestore/KeyValueStore.cc
deleted file mode 100644
index 5738c50..0000000
--- a/src/os/keyvaluestore/KeyValueStore.cc
+++ /dev/null
@@ -1,3015 +0,0 @@
-// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
-// vim: ts=8 sw=2 smarttab
-/*
- * Ceph - scalable distributed file system
- *
- * Copyright (C) 2013 UnitedStack <haomai at unitedstack.com>
- *
- * Author: Haomai Wang <haomaiwang at gmail.com>
- *
- * This is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License version 2.1, as published by the Free Software
- * Foundation.  See file COPYING.
- *
- */
-
-#include "include/int_types.h"
-
-#include <unistd.h>
-#include <stdlib.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <sys/file.h>
-#include <sys/param.h>
-#include <sys/mount.h>
-#include <errno.h>
-#include <dirent.h>
-
-#include <iostream>
-#include <map>
-
-#include "include/compat.h"
-
-#include <fstream>
-#include <sstream>
-
-#include "KeyValueStore.h"
-#include "common/BackTrace.h"
-#include "include/types.h"
-
-#include "osd/osd_types.h"
-#include "include/color.h"
-#include "include/buffer.h"
-
-#include "common/debug.h"
-#include "common/errno.h"
-#include "common/run_cmd.h"
-#include "common/safe_io.h"
-#include "common/perf_counters.h"
-#include "common/sync_filesystem.h"
-
-#include "common/ceph_crypto.h"
-using ceph::crypto::SHA1;
-
-#include "include/assert.h"
-
-#include "common/config.h"
-
-#define dout_subsys ceph_subsys_keyvaluestore
-
-const string KeyValueStore::OBJECT_STRIP_PREFIX = "_STRIP_";
-const string KeyValueStore::OBJECT_XATTR = "__OBJATTR__";
-const string KeyValueStore::OBJECT_OMAP = "__OBJOMAP__";
-const string KeyValueStore::OBJECT_OMAP_HEADER = "__OBJOMAP_HEADER__";
-const string KeyValueStore::OBJECT_OMAP_HEADER_KEY = "__OBJOMAP_HEADER__KEY_";
-const string KeyValueStore::COLLECTION = "__COLLECTION__";
-const string KeyValueStore::COLLECTION_ATTR = "__COLL_ATTR__";
-
-
-//Initial features in new superblock.
-static CompatSet get_kv_initial_compat_set() {
-  CompatSet::FeatureSet ceph_osd_feature_compat;
-  CompatSet::FeatureSet ceph_osd_feature_ro_compat;
-  CompatSet::FeatureSet ceph_osd_feature_incompat;
-  return CompatSet(ceph_osd_feature_compat, ceph_osd_feature_ro_compat,
-		   ceph_osd_feature_incompat);
-}
-
-//Features are added here that this KeyValueStore supports.
-static CompatSet get_kv_supported_compat_set() {
-  CompatSet compat =  get_kv_initial_compat_set();
-  //Any features here can be set in code, but not in initial superblock
-  return compat;
-}
-
-
-// ============== StripObjectMap Implementation =================
-
-int StripObjectMap::save_strip_header(StripObjectHeaderRef strip_header,
-                                      KeyValueDB::Transaction t)
-{
-  if (strip_header->updated) {
-    strip_header->header->data.clear();
-    ::encode(*strip_header, strip_header->header->data);
-
-    set_header(strip_header->cid, strip_header->oid, *(strip_header->header), t);
-    strip_header->updated = false;
-  }
-  return 0;
-}
-
-int StripObjectMap::create_strip_header(const coll_t &cid,
-                                        const ghobject_t &oid,
-                                        StripObjectHeaderRef *strip_header,
-                                        KeyValueDB::Transaction t)
-{
-  Header header = generate_new_header(cid, oid, Header(), t);
-  if (!header)
-    return -EINVAL;
-
-  StripObjectHeaderRef tmp = StripObjectHeaderRef(new StripObjectHeader());
-  tmp->oid = oid;
-  tmp->cid = cid;
-  tmp->header = header;
-  tmp->updated = true;
-  if (strip_header)
-    *strip_header = tmp;
-
-  return 0;
-}
-
-int StripObjectMap::lookup_strip_header(const coll_t &cid,
-                                        const ghobject_t &oid,
-                                        StripObjectHeaderRef *strip_header)
-{
-  {
-    Mutex::Locker l(lock);
-    pair<coll_t, StripObjectHeaderRef> p;
-    if (caches.lookup(oid, &p)) {
-      if (p.first == cid) {
-        *strip_header = p.second;
-        return 0;
-      }
-    }
-  }
-  Header header = lookup_header(cid, oid);
-
-  if (!header) {
-    dout(20) << "lookup_strip_header failed to get strip_header "
-             << " cid " << cid <<" oid " << oid << dendl;
-    return -ENOENT;
-  }
-
-
-  StripObjectHeaderRef tmp = StripObjectHeaderRef(new StripObjectHeader());
-  if (header->data.length()) {
-    bufferlist::iterator bliter = header->data.begin();
-    ::decode(*tmp, bliter);
-  }
-
-  if (tmp->strip_size == 0) {
-    tmp->strip_size = default_strip_size;
-    tmp->updated = true;
-  }
-
-  tmp->oid = oid;
-  tmp->cid = cid;
-  tmp->header = header;
-
-  {
-    Mutex::Locker l(lock);
-    caches.add(oid, make_pair(cid, tmp));
-  }
-  *strip_header = tmp;
-  dout(10) << "lookup_strip_header done " << " cid " << cid << " oid "
-           << oid << dendl;
-  return 0;
-}
-
-int StripObjectMap::file_to_extents(uint64_t offset, size_t len,
-                                    uint64_t strip_size,
-                                    vector<StripExtent> &extents)
-{
-  if (len == 0)
-    return 0;
-
-  uint64_t start, end, strip_offset;
-  start = offset / strip_size;
-  end = (offset + len) / strip_size;
-  strip_offset = start * strip_size;
-
-  // "offset" may in the middle of first strip object
-  if (offset > strip_offset) {
-    uint64_t extent_offset, extent_len;
-    extent_offset = offset - strip_offset;
-    if (extent_offset + len <= strip_size)
-      extent_len = len;
-    else
-      extent_len = strip_size - extent_offset;
-    extents.push_back(StripExtent(start, extent_offset, extent_len));
-    start++;
-    strip_offset += strip_size;
-  }
-
-  for (; start < end; ++start) {
-    extents.push_back(StripExtent(start, 0, strip_size));
-    strip_offset += strip_size;
-  }
-
-  // The end of strip object may be partial
-  if (offset + len > strip_offset)
-    extents.push_back(StripExtent(start, 0, offset+len-strip_offset));
-
-  assert(extents.size());
-  dout(10) << "file_to_extents done " << dendl;
-  return 0;
-}
-
-void StripObjectMap::clone_wrap(StripObjectHeaderRef old_header,
-                                const coll_t &cid, const ghobject_t &oid,
-                                KeyValueDB::Transaction t,
-                                StripObjectHeaderRef *target_header)
-{
-  Header new_origin_header;
-  StripObjectHeaderRef tmp = StripObjectHeaderRef(new StripObjectHeader());
-
-  clone(old_header->header, cid, oid, t, &new_origin_header,
-        &tmp->header);
-
-  tmp->oid = oid;
-  tmp->cid = cid;
-  tmp->strip_size = old_header->strip_size;
-  tmp->max_size = old_header->max_size;
-  tmp->bits = old_header->bits;
-  tmp->updated = true;
-  old_header->header = new_origin_header;
-  old_header->updated = true;
-
-  if (target_header)
-    *target_header = tmp;
-}
-
-void StripObjectMap::rename_wrap(StripObjectHeaderRef old_header, const coll_t &cid, const ghobject_t &oid,
-                                 KeyValueDB::Transaction t,
-                                 StripObjectHeaderRef *new_header)
-{
-  rename(old_header->header, cid, oid, t);
-
-  StripObjectHeaderRef tmp = StripObjectHeaderRef(new StripObjectHeader());
-  tmp->strip_size = old_header->strip_size;
-  tmp->max_size = old_header->max_size;
-  tmp->bits = old_header->bits;
-  tmp->header = old_header->header;
-  tmp->oid = oid;
-  tmp->cid = cid;
-  tmp->updated = true;
-
-  if (new_header)
-    *new_header = tmp;
-
-  old_header->header = Header();
-  old_header->deleted = true;
-}
-
-int StripObjectMap::get_values_with_header(const StripObjectHeaderRef header,
-                                           const string &prefix,
-                                           const set<string> &keys,
-                                           map<string, bufferlist> *out)
-{
-  return scan(header->header, prefix, keys, 0, out);
-}
-
-int StripObjectMap::get_keys_with_header(const StripObjectHeaderRef header,
-                                         const string &prefix,
-                                         set<string> *keys)
-{
-  ObjectMap::ObjectMapIterator iter = _get_iterator(header->header, prefix);
-  for (iter->seek_to_first(); iter->valid(); iter->next()) {
-    assert(!iter->status());
-    keys->insert(iter->key());
-  }
-  return 0;
-}
-
-int StripObjectMap::get_with_header(const StripObjectHeaderRef header,
-                        const string &prefix, map<string, bufferlist> *out)
-{
-  ObjectMap::ObjectMapIterator iter = _get_iterator(header->header, prefix);
-  for (iter->seek_to_first(); iter->valid(); iter->next()) {
-    assert(!iter->status());
-    out->insert(make_pair(iter->key(), iter->value()));
-  }
-
-  return 0;
-}
-
-// ========= KeyValueStore::BufferTransaction Implementation ============
-
-int KeyValueStore::BufferTransaction::lookup_cached_header(
-    const coll_t &cid, const ghobject_t &oid,
-    StripObjectMap::StripObjectHeaderRef *strip_header,
-    bool create_if_missing)
-{
-  uniq_id uid = make_pair(cid, oid);
-  StripObjectMap::StripObjectHeaderRef header;
-  int r = 0;
-
-  StripHeaderMap::iterator it = strip_headers.find(uid);
-  if (it != strip_headers.end()) {
-
-    if (!it->second->deleted) {
-      if (strip_header)
-        *strip_header = it->second;
-      return 0;
-    } else if (!create_if_missing) {
-      return -ENOENT;
-    }
-
-    // If (it->second.deleted && create_if_missing) go down
-    r = -ENOENT;
-  } else {
-    r = store->backend->lookup_strip_header(cid, oid, &header);
-  }
-
-  if (r == -ENOENT && create_if_missing) {
-    r = store->backend->create_strip_header(cid, oid, &header, t);
-  }
-
-  if (r < 0) {
-    dout(10) << __func__  << " " << cid << "/" << oid << " "
-             << " r = " << r << dendl;
-    return r;
-  }
-
-  strip_headers[uid] = header;
-  if (strip_header)
-    *strip_header = header;
-  return r;
-}
-
-int KeyValueStore::BufferTransaction::get_buffer_keys(
-    StripObjectMap::StripObjectHeaderRef strip_header, const string &prefix,
-    const set<string> &keys, map<string, bufferlist> *out)
-{
-  set<string> need_lookup;
-
-  uniq_id uid = make_pair(strip_header->cid, strip_header->oid);
-  for (set<string>::iterator it = keys.begin(); it != keys.end(); ++it) {
-    map< uniq_id, map<pair<string, string>, bufferlist> >::iterator obj_it = buffers.find(uid);
-    if ( obj_it != buffers.end() ) {
-      map<pair<string, string>, bufferlist>::iterator i =
-          obj_it->second.find(make_pair(prefix, *it));
-      if (i != obj_it->second.end()) {
-        (*out)[*it].swap(i->second);
-      } else {
-        need_lookup.insert(*it);
-      }
-    }else {
-      need_lookup.insert(*it);
-    }
-  }
-
-  if (!need_lookup.empty()) {
-    int r = store->backend->get_values_with_header(strip_header, prefix,
-                                                   need_lookup, out);
-    if (r < 0) {
-      dout(10) << __func__  << " " << strip_header->cid << "/"
-               << strip_header->oid << " " << " r = " << r << dendl;
-      return r;
-    }
-  }
-
-  return 0;
-}
-
-void KeyValueStore::BufferTransaction::set_buffer_keys(
-     StripObjectMap::StripObjectHeaderRef strip_header,
-     const string &prefix, map<string, bufferlist> &values)
-{
-  store->backend->set_keys(strip_header->header, prefix, values, t);
-
-  uniq_id uid = make_pair(strip_header->cid, strip_header->oid);
-  map<pair<string, string>, bufferlist> &uid_buffers = buffers[uid];
-  for (map<string, bufferlist>::iterator iter = values.begin();
-       iter != values.end(); ++iter) {
-    uid_buffers[make_pair(prefix, iter->first)].swap(iter->second);
-  }
-}
-
-int KeyValueStore::BufferTransaction::remove_buffer_keys(
-     StripObjectMap::StripObjectHeaderRef strip_header, const string &prefix,
-     const set<string> &keys)
-{
-  uniq_id uid = make_pair(strip_header->cid, strip_header->oid);
-  map< uniq_id, map<pair<string, string>, bufferlist> >::iterator obj_it = buffers.find(uid);
-  set<string> buffered_keys;
-  if ( obj_it != buffers.end() ) {
-    // TODO: Avoid use empty bufferlist to indicate the key is removed
-    for (set<string>::iterator iter = keys.begin(); iter != keys.end(); ++iter) {
-      obj_it->second[make_pair(prefix, *iter)] = bufferlist();
-    }
-    // TODO: Avoid collect all buffered keys when remove keys
-    if (strip_header->header->parent) {
-      for (map<pair<string, string>, bufferlist>::iterator iter = obj_it->second.begin();
-           iter != obj_it->second.end(); ++iter) {
-        buffered_keys.insert(iter->first.second);
-      }
-    }
-  }
-
-  return store->backend->rm_keys(strip_header->header, prefix, buffered_keys, keys, t);
-}
-
-void KeyValueStore::BufferTransaction::clear_buffer_keys(
-     StripObjectMap::StripObjectHeaderRef strip_header, const string &prefix)
-{
-  uniq_id uid = make_pair(strip_header->cid, strip_header->oid);
-  map< uniq_id, map<pair<string, string>, bufferlist> >::iterator obj_it = buffers.find(uid);
-  if ( obj_it != buffers.end() ) {
-    for (map<pair<string, string>, bufferlist>::iterator iter = obj_it->second.begin();
-         iter != obj_it->second.end(); ++iter) {
-      if (iter->first.first == prefix)
-        iter->second = bufferlist();
-    }
-  }
-}
-
-int KeyValueStore::BufferTransaction::clear_buffer(
-     StripObjectMap::StripObjectHeaderRef strip_header)
-{
-  strip_header->deleted = true;
-
-  InvalidateCacheContext *c = new InvalidateCacheContext(store, strip_header->cid, strip_header->oid);
-  finishes.push_back(c);
-  return store->backend->clear(strip_header->header, t);
-}
-
-void KeyValueStore::BufferTransaction::clone_buffer(
-    StripObjectMap::StripObjectHeaderRef old_header,
-    const coll_t &cid, const ghobject_t &oid)
-{
-  // Remove target ahead to avoid dead lock
-  strip_headers.erase(make_pair(cid, oid));
-
-  StripObjectMap::StripObjectHeaderRef new_target_header;
-
-  store->backend->clone_wrap(old_header, cid, oid, t, &new_target_header);
-
-  // FIXME: Lacking of lock for origin header(now become parent), it will
-  // cause other operation can get the origin header while submitting
-  // transactions
-  strip_headers[make_pair(cid, oid)] = new_target_header;
-}
-
-void KeyValueStore::BufferTransaction::rename_buffer(
-    StripObjectMap::StripObjectHeaderRef old_header,
-    const coll_t &cid, const ghobject_t &oid)
-{
-  // FIXME: Lacking of lock for origin header, it will cause other operation
-  // can get the origin header while submitting transactions
-  StripObjectMap::StripObjectHeaderRef new_header;
-  store->backend->rename_wrap(old_header, cid, oid, t, &new_header);
-
-  InvalidateCacheContext *c = new InvalidateCacheContext(store, old_header->cid, old_header->oid);
-  finishes.push_back(c);
-  strip_headers[make_pair(cid, oid)] = new_header;
-}
-
-int KeyValueStore::BufferTransaction::submit_transaction()
-{
-  int r = 0;
-
-  for (StripHeaderMap::iterator header_iter = strip_headers.begin();
-       header_iter != strip_headers.end(); ++header_iter) {
-    StripObjectMap::StripObjectHeaderRef header = header_iter->second;
-
-    if (header->deleted)
-      continue;
-
-    if (header->updated) {
-      r = store->backend->save_strip_header(header, t);
-
-      if (r < 0) {
-        dout(10) << __func__ << " save strip header failed " << dendl;
-        goto out;
-      }
-    }
-  }
-
-  r = store->backend->submit_transaction_sync(t);
-  for (list<Context*>::iterator it = finishes.begin(); it != finishes.end(); ++it) {
-    (*it)->complete(r);
-  }
-
-out:
-  dout(5) << __func__ << " r = " << r << dendl;
-  return r;
-}
-
-// =========== KeyValueStore Intern Helper Implementation ==============
-
-ostream& operator<<(ostream& out, const KeyValueStore::OpSequencer& s)
-{
-  assert(&out);
-  return out << *s.parent;
-}
-
-int KeyValueStore::_create_current()
-{
-  struct stat st;
-  int ret = ::stat(current_fn.c_str(), &st);
-  if (ret == 0) {
-    // current/ exists
-    if (!S_ISDIR(st.st_mode)) {
-      dout(0) << "_create_current: current/ exists but is not a directory" << dendl;
-      ret = -EINVAL;
-    }
-  } else {
-    ret = ::mkdir(current_fn.c_str(), 0755);
-    if (ret < 0) {
-      ret = -errno;
-      dout(0) << "_create_current: mkdir " << current_fn << " failed: "<< cpp_strerror(ret) << dendl;
-    }
-  }
-
-  return ret;
-}
-
-
-
-// =========== KeyValueStore API Implementation ==============
-
-KeyValueStore::KeyValueStore(const std::string &base,
-                             const char *name, bool do_update) :
-  ObjectStore(base),
-  internal_name(name),
-  basedir(base),
-  fsid_fd(-1), current_fd(-1),
-  backend(NULL),
-  ondisk_finisher(g_ceph_context),
-  collections_lock("KeyValueStore::collections_lock"),
-  lock("KeyValueStore::lock"),
-  throttle_ops(g_ceph_context, "keyvaluestore_ops", g_conf->keyvaluestore_queue_max_ops),
-  throttle_bytes(g_ceph_context, "keyvaluestore_bytes", g_conf->keyvaluestore_queue_max_bytes),
-  op_finisher(g_ceph_context),
-  op_tp(g_ceph_context, "KeyValueStore::op_tp", "tp_kvstore",
-        g_conf->keyvaluestore_op_threads, "keyvaluestore_op_threads"),
-  op_wq(this, g_conf->keyvaluestore_op_thread_timeout,
-        g_conf->keyvaluestore_op_thread_suicide_timeout, &op_tp),
-  perf_logger(NULL),
-  m_keyvaluestore_queue_max_ops(g_conf->keyvaluestore_queue_max_ops),
-  m_keyvaluestore_queue_max_bytes(g_conf->keyvaluestore_queue_max_bytes),
-  m_keyvaluestore_strip_size(g_conf->keyvaluestore_default_strip_size),
-  m_keyvaluestore_max_expected_write_size(g_conf->keyvaluestore_max_expected_write_size),
-  do_update(do_update),
-  m_keyvaluestore_do_dump(false),
-  m_keyvaluestore_dump_fmt(true)
-{
-  ostringstream oss;
-  oss << basedir << "/current";
-  current_fn = oss.str();
-
-  // initialize perf_logger
-  PerfCountersBuilder plb(g_ceph_context, internal_name, l_os_commit_len, l_os_last);
-
-  plb.add_u64(l_os_oq_max_ops, "op_queue_max_ops", "Max operations count in queue");
-  plb.add_u64(l_os_oq_ops, "op_queue_ops", "Operations count in queue");
-  plb.add_u64_counter(l_os_ops, "ops", "Operations");
-  plb.add_u64(l_os_oq_max_bytes, "op_queue_max_bytes", "Max size of queue");
-  plb.add_u64(l_os_oq_bytes, "op_queue_bytes", "Size of queue");
-  plb.add_u64_counter(l_os_bytes, "bytes", "Data written to store");
-  plb.add_time_avg(l_os_commit_lat, "commit_latency", "Commit latency");
-  plb.add_time_avg(l_os_apply_lat, "apply_latency", "Apply latency");
-  plb.add_time_avg(l_os_queue_lat, "queue_transaction_latency_avg", "Store operation queue latency");
-
-  perf_logger = plb.create_perf_counters();
-
-  g_ceph_context->get_perfcounters_collection()->add(perf_logger);
-  g_ceph_context->_conf->add_observer(this);
-
-  superblock.compat_features = get_kv_initial_compat_set();
-}
-
-KeyValueStore::~KeyValueStore()
-{
-  g_ceph_context->_conf->remove_observer(this);
-  g_ceph_context->get_perfcounters_collection()->remove(perf_logger);
-
-  delete perf_logger;
-
-  if (m_keyvaluestore_do_dump) {
-    dump_stop();
-  }
-}
-
-int KeyValueStore::statfs(struct statfs *buf)
-{
-  int r = backend->db->get_statfs(buf);
-  if (r < 0) {
-    if (::statfs(basedir.c_str(), buf) < 0) {
-      int r = -errno;
-      return r;
-    }
-  }
-  return 0;
-}
-
-void KeyValueStore::collect_metadata(map<string,string> *pm)
-{
-  (*pm)["keyvaluestore_backend"] = superblock.backend;
-}
-
-int KeyValueStore::mkfs()
-{
-  int ret = 0;
-  char fsid_fn[PATH_MAX];
-  uuid_d old_fsid;
-
-  dout(1) << "mkfs in " << basedir << dendl;
-
-  // open+lock fsid
-  snprintf(fsid_fn, sizeof(fsid_fn), "%s/fsid", basedir.c_str());
-  fsid_fd = ::open(fsid_fn, O_RDWR|O_CREAT, 0644);
-  if (fsid_fd < 0) {
-    ret = -errno;
-    derr << "mkfs: failed to open " << fsid_fn << ": " << cpp_strerror(ret) << dendl;
-    return ret;
-  }
-
-  if (lock_fsid() < 0) {
-    ret = -EBUSY;
-    goto close_fsid_fd;
-  }
-
-  if (read_fsid(fsid_fd, &old_fsid) < 0 || old_fsid.is_zero()) {
-    if (fsid.is_zero()) {
-      fsid.generate_random();
-      dout(1) << "mkfs generated fsid " << fsid << dendl;
-    } else {
-      dout(1) << "mkfs using provided fsid " << fsid << dendl;
-    }
-
-    char fsid_str[40];
-    fsid.print(fsid_str);
-    strcat(fsid_str, "\n");
-    ret = ::ftruncate(fsid_fd, 0);
-    if (ret < 0) {
-      ret = -errno;
-      derr << "mkfs: failed to truncate fsid: " << cpp_strerror(ret) << dendl;
-      goto close_fsid_fd;
-    }
-    ret = safe_write(fsid_fd, fsid_str, strlen(fsid_str));
-    if (ret < 0) {
-      derr << "mkfs: failed to write fsid: " << cpp_strerror(ret) << dendl;
-      goto close_fsid_fd;
-    }
-    if (::fsync(fsid_fd) < 0) {
-      ret = -errno;
-      derr << "mkfs: close failed: can't write fsid: "
-           << cpp_strerror(ret) << dendl;
-      goto close_fsid_fd;
-    }
-    dout(10) << "mkfs fsid is " << fsid << dendl;
-  } else {
-    if (!fsid.is_zero() && fsid != old_fsid) {
-      derr << "mkfs on-disk fsid " << old_fsid << " != provided " << fsid << dendl;
-      ret = -EINVAL;
-      goto close_fsid_fd;
-    }
-    fsid = old_fsid;
-    dout(1) << "mkfs fsid is already set to " << fsid << dendl;
-  }
-
-  // version stamp
-  ret = write_version_stamp();
-  if (ret < 0) {
-    derr << "mkfs: write_version_stamp() failed: "
-         << cpp_strerror(ret) << dendl;
-    goto close_fsid_fd;
-  }
-
-  ret = _create_current();
-  if (ret < 0) {
-    derr << "mkfs: failed to create current/ " << cpp_strerror(ret) << dendl;
-    goto close_fsid_fd;
-  }
-
-  // superblock
-  superblock.backend = g_conf->keyvaluestore_backend;
-  ret = write_superblock();
-  if (ret < 0) {
-    derr << "KeyValueStore::mkfs write_superblock() failed: "
-	 << cpp_strerror(ret) << dendl;
-    goto close_fsid_fd;
-  }
-
-  {
-    KeyValueDB *store = KeyValueDB::create(g_ceph_context,
-					   superblock.backend,
-					   current_fn.c_str());
-    if (!store) {
-      derr << __func__ << " failed to create backend type "
-	   << g_conf->keyvaluestore_backend << "." << dendl;
-      ret = -1;
-      goto close_fsid_fd;
-    }
-
-    ostringstream err;
-    if (store->create_and_open(err)) {
-      derr << __func__  << " failed to create/open backend type "
-	   << g_conf->keyvaluestore_backend << "." << dendl;
-      ret = -1;
-      delete store;
-      goto close_fsid_fd;
-    }
-
-    bufferlist bl;
-    ::encode(collections, bl);
-    KeyValueDB::Transaction t = store->get_transaction();
-    t->set("meta", "collections", bl);
-    store->submit_transaction_sync(t);
-
-    dout(1) << g_conf->keyvaluestore_backend << " backend exists/created" << dendl;
-    delete store;
-  }
-
-  ret = write_meta("type", "keyvaluestore");
-  if (ret < 0)
-    goto close_fsid_fd;
-
-  dout(1) << "mkfs done in " << basedir << dendl;
-  ret = 0;
-
- close_fsid_fd:
-  VOID_TEMP_FAILURE_RETRY(::close(fsid_fd));
-  fsid_fd = -1;
-  return ret;
-}
-
-int KeyValueStore::read_fsid(int fd, uuid_d *uuid)
-{
-  char fsid_str[40];
-  memset(fsid_str, 0, sizeof(fsid_str));
-  int ret = safe_read(fd, fsid_str, sizeof(fsid_str));
-  if (ret < 0)
-    return ret;
-  if (ret == 8) {
-    // old 64-bit fsid... mirror it.
-    *(uint64_t*)&uuid->bytes()[0] = *(uint64_t*)fsid_str;
-    *(uint64_t*)&uuid->bytes()[8] = *(uint64_t*)fsid_str;
-    return 0;
-  }
-
-  if (ret > 36)
-    fsid_str[36] = 0;
-  if (!uuid->parse(fsid_str))
-    return -EINVAL;
-  return 0;
-}
-
-int KeyValueStore::lock_fsid()
-{
-  struct flock l;
-  memset(&l, 0, sizeof(l));
-  l.l_type = F_WRLCK;
-  l.l_whence = SEEK_SET;
-  l.l_start = 0;
-  l.l_len = 0;
-  int r = ::fcntl(fsid_fd, F_SETLK, &l);
-  if (r < 0) {
-    int err = errno;
-    dout(0) << "lock_fsid failed to lock " << basedir
-            << "/fsid, is another ceph-osd still running? "
-            << cpp_strerror(err) << dendl;
-    return -err;
-  }
-  return 0;
-}
-
-bool KeyValueStore::test_mount_in_use()
-{
-  dout(5) << "test_mount basedir " << basedir << dendl;
-  char fn[PATH_MAX];
-  snprintf(fn, sizeof(fn), "%s/fsid", basedir.c_str());
-
-  // verify fs isn't in use
-
-  fsid_fd = ::open(fn, O_RDWR, 0644);
-  if (fsid_fd < 0)
-    return 0;   // no fsid, ok.
-  bool inuse = lock_fsid() < 0;
-  VOID_TEMP_FAILURE_RETRY(::close(fsid_fd));
-  fsid_fd = -1;
-  return inuse;
-}
-
-int KeyValueStore::write_superblock()
-{
-  bufferlist bl;
-  ::encode(superblock, bl);
-  return safe_write_file(basedir.c_str(), "superblock",
-      bl.c_str(), bl.length());
-}
-
-int KeyValueStore::read_superblock()
-{
-  bufferptr bp(PATH_MAX);
-  int ret = safe_read_file(basedir.c_str(), "superblock",
-      bp.c_str(), bp.length());
-  if (ret < 0) {
-    if (ret == -ENOENT) {
-      // If the file doesn't exist write initial CompatSet
-      return write_superblock();
-    }
-    return ret;
-  }
-
-  bufferlist bl;
-  bl.push_back(bp);
-  bufferlist::iterator i = bl.begin();
-  ::decode(superblock, i);
-  return 0;
-}
-
-
-
-int KeyValueStore::update_version_stamp()
-{
-  return write_version_stamp();
-}
-
-int KeyValueStore::version_stamp_is_valid(uint32_t *version)
-{
-  bufferptr bp(PATH_MAX);
-  int ret = safe_read_file(basedir.c_str(), "store_version",
-      bp.c_str(), bp.length());
-  if (ret < 0) {
-    if (ret == -ENOENT)
-      return 0;
-    return ret;
-  }
-  bufferlist bl;
-  bl.push_back(bp);
-  bufferlist::iterator i = bl.begin();
-  ::decode(*version, i);
-  if (*version == target_version)
-    return 1;
-  else
-    return 0;
-}
-
-int KeyValueStore::write_version_stamp()
-{
-  bufferlist bl;
-  ::encode(target_version, bl);
-
-  return safe_write_file(basedir.c_str(), "store_version",
-      bl.c_str(), bl.length());
-}
-
-int KeyValueStore::mount()
-{
-  int ret;
-  char buf[PATH_MAX];
-  CompatSet supported_compat_set = get_kv_supported_compat_set();
-
-  dout(5) << "basedir " << basedir << dendl;
-
-  // make sure global base dir exists
-  if (::access(basedir.c_str(), R_OK | W_OK)) {
-    ret = -errno;
-    derr << "KeyValueStore::mount: unable to access basedir '" << basedir
-         << "': " << cpp_strerror(ret) << dendl;
-    goto done;
-  }
-
-  // get fsid
-  snprintf(buf, sizeof(buf), "%s/fsid", basedir.c_str());
-  fsid_fd = ::open(buf, O_RDWR, 0644);
-  if (fsid_fd < 0) {
-    ret = -errno;
-    derr << "KeyValueStore::mount: error opening '" << buf << "': "
-         << cpp_strerror(ret) << dendl;
-    goto done;
-  }
-
-  ret = read_fsid(fsid_fd, &fsid);
-  if (ret < 0) {
-    derr << "KeyValueStore::mount: error reading fsid_fd: "
-         << cpp_strerror(ret) << dendl;
-    goto close_fsid_fd;
-  }
-
-  if (lock_fsid() < 0) {
-    derr << "KeyValueStore::mount: lock_fsid failed" << dendl;
-    ret = -EBUSY;
-    goto close_fsid_fd;
-  }
-
-  dout(10) << "mount fsid is " << fsid << dendl;
-
-  uint32_t version_stamp;
-  ret = version_stamp_is_valid(&version_stamp);
-  if (ret < 0) {
-    derr << "KeyValueStore::mount : error in version_stamp_is_valid: "
-         << cpp_strerror(ret) << dendl;
-    goto close_fsid_fd;
-  } else if (ret == 0) {
-    if (do_update) {
-      derr << "KeyValueStore::mount : stale version stamp detected: "
-           << version_stamp << ". Proceeding, do_update "
-           << "is set, performing disk format upgrade." << dendl;
-    } else {
-      ret = -EINVAL;
-      derr << "KeyValueStore::mount : stale version stamp " << version_stamp
-           << ". Please run the KeyValueStore update script before starting "
-           << "the OSD, or set keyvaluestore_update_to to " << target_version
-           << dendl;
-      goto close_fsid_fd;
-    }
-  }
-
-  superblock.backend = g_conf->keyvaluestore_backend;
-  ret = read_superblock();
-  if (ret < 0) {
-    ret = -EINVAL;
-    goto close_fsid_fd;
-  }
-
-  // Check if this KeyValueStore supports all the necessary features to mount
-  if (supported_compat_set.compare(superblock.compat_features) == -1) {
-    derr << "KeyValueStore::mount : Incompatible features set "
-	   << superblock.compat_features << dendl;
-    ret = -EINVAL;
-    goto close_fsid_fd;
-  }
-
-  current_fd = ::open(current_fn.c_str(), O_RDONLY);
-  if (current_fd < 0) {
-    ret = -errno;
-    derr << "KeyValueStore::mount: error opening: " << current_fn << ": "
-         << cpp_strerror(ret) << dendl;
-    goto close_fsid_fd;
-  }
-
-  assert(current_fd >= 0);
-
-  {
-    if (superblock.backend.empty())
-      superblock.backend = g_conf->keyvaluestore_backend;
-    KeyValueDB *store = KeyValueDB::create(g_ceph_context,
-					   superblock.backend,
-					   current_fn.c_str());
-    if(!store)
-    {
-      derr << "KeyValueStore::mount backend type "
-	   << superblock.backend << " error" << dendl;
-      ret = -1;
-      goto close_current_fd;
-
-    }
-
-    if (superblock.backend == "rocksdb")
-      store->init(g_conf->keyvaluestore_rocksdb_options);
-    else
-      store->init();
-    stringstream err;
-    if (store->open(err)) {
-      derr << "KeyValueStore::mount Error initializing keyvaluestore backend "
-           << superblock.backend << ": " << err.str() << dendl;
-      ret = -1;
-      delete store;
-      goto close_current_fd;
-    }
-
-    // get collection list
-    set<string> keys;
-    keys.insert("collections");
-    map<string,bufferlist> values;
-    store->get("meta", keys, &values);
-    if (values.empty()) {
-      ret = -EIO;
-      derr << "Error no collection list; old store?" << dendl;
-      goto close_current_fd;
-    }
-    bufferlist::iterator p = values["collections"].begin();
-    ::decode(collections, p);
-    dout(20) << "collections: " << collections << dendl;
-
-    StripObjectMap *dbomap = new StripObjectMap(store);
-    ret = dbomap->init(do_update);
-    if (ret < 0) {
-      delete dbomap;
-      derr << "Error initializing StripObjectMap: " << ret << dendl;
-      goto close_current_fd;
-    }
-    stringstream err2;
-
-    if (g_conf->keyvaluestore_debug_check_backend && !dbomap->check(err2)) {
-      derr << err2.str() << dendl;
-      delete dbomap;
-      ret = -EINVAL;
-      goto close_current_fd;
-    }
-
-    default_strip_size = m_keyvaluestore_strip_size;
-    backend.reset(dbomap);
-  }
-
-  op_tp.start();
-  op_finisher.start();
-  ondisk_finisher.start();
-
-  // all okay.
-  return 0;
-
-close_current_fd:
-  VOID_TEMP_FAILURE_RETRY(::close(current_fd));
-  current_fd = -1;
-close_fsid_fd:
-  VOID_TEMP_FAILURE_RETRY(::close(fsid_fd));
-  fsid_fd = -1;
-done:
-  return ret;
-}
-
-int KeyValueStore::umount()
-{
-  dout(5) << "umount " << basedir << dendl;
-
-  op_tp.stop();
-  op_finisher.stop();
-  ondisk_finisher.stop();
-
-  if (fsid_fd >= 0) {
-    VOID_TEMP_FAILURE_RETRY(::close(fsid_fd));
-    fsid_fd = -1;
-  }
-  if (current_fd >= 0) {
-    VOID_TEMP_FAILURE_RETRY(::close(current_fd));
-    current_fd = -1;
-  }
-
-  backend.reset();
-
-  // nothing
-  return 0;
-}
-
-int KeyValueStore::queue_transactions(Sequencer *posr, list<Transaction*> &tls,
-                                      TrackedOpRef osd_op,
-                                      ThreadPool::TPHandle *handle)
-{
-  utime_t start = ceph_clock_now(g_ceph_context);
-  Context *onreadable;
-  Context *ondisk;
-  Context *onreadable_sync;
-  ObjectStore::Transaction::collect_contexts(
-    tls, &onreadable, &ondisk, &onreadable_sync);
-
-  // set up the sequencer
-  OpSequencer *osr;
-  assert(posr);
-  if (posr->p) {
-    osr = static_cast<OpSequencer *>(posr->p.get());
-    dout(5) << "queue_transactions existing " << osr << " " << *osr << "/" << osr->parent
-            << dendl; //<< " w/ q " << osr->q << dendl;
-  } else {
-    osr = new OpSequencer;
-    osr->parent = posr;
-    posr->p = osr;
-    dout(5) << "queue_transactions new " << osr << " " << *osr << "/" << osr->parent << dendl;
-  }
-
-  Op *o = build_op(tls, ondisk, onreadable, onreadable_sync, osd_op);
-  op_queue_reserve_throttle(o, handle);
-  if (m_keyvaluestore_do_dump)
-    dump_transactions(o->tls, o->op, osr);
-  dout(5) << "queue_transactions (trailing journal) " << " " << tls <<dendl;
-  queue_op(osr, o);
-
-  utime_t end = ceph_clock_now(g_ceph_context);
-  perf_logger->tinc(l_os_queue_lat, end - start);
-  return 0;
-}
-
-
-// ============== KeyValueStore Op Handler =================
-
-KeyValueStore::Op *KeyValueStore::build_op(list<Transaction*>& tls,
-        Context *ondisk, Context *onreadable, Context *onreadable_sync,
-        TrackedOpRef osd_op)
-{
-  uint64_t bytes = 0, ops = 0;
-  for (list<Transaction*>::iterator p = tls.begin();
-       p != tls.end();
-       ++p) {
-    bytes += (*p)->get_num_bytes();
-    ops += (*p)->get_num_ops();
-  }
-
-  Op *o = new Op;
-  o->start = ceph_clock_now(g_ceph_context);
-  o->tls.swap(tls);
-  o->ondisk = ondisk;
-  o->onreadable = onreadable;
-  o->onreadable_sync = onreadable_sync;
-  o->ops = ops;
-  o->bytes = bytes;
-  o->osd_op = osd_op;
-  return o;
-}
-
-void KeyValueStore::queue_op(OpSequencer *osr, Op *o)
-{
-  // queue op on sequencer, then queue sequencer for the threadpool,
-  // so that regardless of which order the threads pick up the
-  // sequencer, the op order will be preserved.
-
-  osr->queue(o);
-
-  perf_logger->inc(l_os_ops);
-  perf_logger->inc(l_os_bytes, o->bytes);
-
-  dout(5) << "queue_op " << o << " seq " << o->op << " " << *osr << " "
-          << o->bytes << " bytes" << "   (queue has " << throttle_ops.get_current()
-          << " ops and " << throttle_bytes.get_current() << " bytes)" << dendl;
-  op_wq.queue(osr);
-}
-
-void KeyValueStore::op_queue_reserve_throttle(Op *o, ThreadPool::TPHandle *handle)
-{
-  uint64_t max_ops = m_keyvaluestore_queue_max_ops;
-  uint64_t max_bytes = m_keyvaluestore_queue_max_bytes;
-
-  perf_logger->set(l_os_oq_max_ops, max_ops);
-  perf_logger->set(l_os_oq_max_bytes, max_bytes);
-
-  if (handle)
-    handle->suspend_tp_timeout();
-  if (throttle_ops.should_wait(1) ||
-    (throttle_bytes.get_current()      // let single large ops through!
-    && throttle_bytes.should_wait(o->bytes))) {
-    dout(2) << "waiting " << throttle_ops.get_current() + 1 << " > " << max_ops << " ops || "
-      << throttle_bytes.get_current() + o->bytes << " > " << max_bytes << dendl;
-  }
-  throttle_ops.get();
-  throttle_bytes.get(o->bytes);
-  if (handle)
-    handle->reset_tp_timeout();
-
-  perf_logger->set(l_os_oq_ops, throttle_ops.get_current());
-  perf_logger->set(l_os_oq_bytes, throttle_bytes.get_current());
-}
-
-void KeyValueStore::op_queue_release_throttle(Op *o)
-{
-  throttle_ops.put();
-  throttle_bytes.put(o->bytes);
-  perf_logger->set(l_os_oq_ops, throttle_ops.get_current());
-  perf_logger->set(l_os_oq_bytes, throttle_bytes.get_current());
-}
-
-void KeyValueStore::_do_op(OpSequencer *osr, ThreadPool::TPHandle &handle)
-{
-  // FIXME: Suppose the collection of transaction only affect objects in the
-  // one PG, so this lock will ensure no other concurrent write operation
-  osr->apply_lock.Lock();
-  Op *o = osr->peek_queue();
-  dout(5) << "_do_op " << o << " seq " << o->op << " " << *osr << "/" << osr->parent << " start" << dendl;
-  int r = _do_transactions(o->tls, o->op, &handle);
-  dout(10) << "_do_op " << o << " seq " << o->op << " r = " << r
-           << ", finisher " << o->onreadable << " " << o->onreadable_sync << dendl;
-
-  if (o->ondisk) {
-    if (r < 0) {
-      delete o->ondisk;
-      o->ondisk = 0;
-    } else {
-      ondisk_finisher.queue(o->ondisk, r);
-    }
-  }
-}
-
-void KeyValueStore::_finish_op(OpSequencer *osr)
-{
-  list<Context*> to_queue;
-  Op *o = osr->dequeue(&to_queue);
-
-  utime_t lat = ceph_clock_now(g_ceph_context);
-  lat -= o->start;
-
-  dout(10) << "_finish_op " << o << " seq " << o->op << " " << *osr << "/" << osr->parent << " lat " << lat << dendl;
-  osr->apply_lock.Unlock();  // locked in _do_op
-  op_queue_release_throttle(o);
-
-  perf_logger->tinc(l_os_commit_lat, lat);
-  perf_logger->tinc(l_os_apply_lat, lat);
-
-  if (o->onreadable_sync) {
-    o->onreadable_sync->complete(0);
-  }
-  if (o->onreadable)
-    op_finisher.queue(o->onreadable);
-  if (!to_queue.empty())
-    op_finisher.queue(to_queue);
-  delete o;
-}
-
-// Combine all the ops in the same transaction using "BufferTransaction" and
-// cache the middle results in order to make visible to the following ops.
-//
-// Lock: KeyValueStore use "in_use" in GenericObjectMap to avoid concurrent
-// operation on the same object. Not sure ReadWrite lock should be applied to
-// improve concurrent performance. In the future, I'd like to remove apply_lock
-// on "osr" and introduce PG RWLock.
-int KeyValueStore::_do_transactions(list<Transaction*> &tls, uint64_t op_seq,
-  ThreadPool::TPHandle *handle)
-{
-  int r = 0;
-  int trans_num = 0;
-  BufferTransaction bt(this);
-
-  for (list<Transaction*>::iterator p = tls.begin();
-       p != tls.end();
-       ++p, trans_num++) {
-    _do_transaction(**p, bt, handle);
-    if (handle)
-      handle->reset_tp_timeout();
-  }
-
-  r = bt.submit_transaction();
-  if (r < 0) {
-    assert(0 == "unexpected error");  // FIXME
-  }
-
-  return r;
-}
-
-void KeyValueStore::_do_transaction(Transaction& transaction,
-                                        BufferTransaction &t,
-                                        ThreadPool::TPHandle *handle)
-{
-  dout(10) << "_do_transaction on " << &transaction << dendl;
-
-  Transaction::iterator i = transaction.begin();
-  uint64_t op_num = 0;
-  bool exist_clone = false;
-
-  while (i.have_op()) {
-    if (handle)
-      handle->reset_tp_timeout();
-
-    Transaction::Op *op = i.decode_op();
-    int r = 0;
-
-    switch (op->op) {
-    case Transaction::OP_NOP:
-      break;
-
-    case Transaction::OP_TOUCH:
-      {
-        coll_t cid = i.get_cid(op->cid);
-        ghobject_t oid = i.get_oid(op->oid);
-        r = _touch(cid, oid, t);
-      }
-      break;
-
-    case Transaction::OP_WRITE:
-      {
-        coll_t cid = i.get_cid(op->cid);
-        ghobject_t oid = i.get_oid(op->oid);
-        uint64_t off = op->off;
-        uint64_t len = op->len;
-	uint32_t fadvise_flags = i.get_fadvise_flags();
-        bufferlist bl;
-        i.decode_bl(bl);
-        r = _write(cid, oid, off, len, bl, t, fadvise_flags);
-      }
-      break;
-
-    case Transaction::OP_ZERO:
-      {
-        coll_t cid = i.get_cid(op->cid);
-        ghobject_t oid = i.get_oid(op->oid);
-        uint64_t off = op->off;
-        uint64_t len = op->len;
-        r = _zero(cid, oid, off, len, t);
-      }
-      break;
-
-    case Transaction::OP_TRIMCACHE:
-      {
-        // deprecated, no-op
-      }
-      break;
-
-    case Transaction::OP_TRUNCATE:
-      {
-        coll_t cid = i.get_cid(op->cid);
-        ghobject_t oid = i.get_oid(op->oid);
-        uint64_t off = op->off;
-        r = _truncate(cid, oid, off, t);
-      }
-      break;
-
-    case Transaction::OP_REMOVE:
-      {
-        coll_t cid = i.get_cid(op->cid);
-        ghobject_t oid = i.get_oid(op->oid);
-        r = _remove(cid, oid, t);
-      }
-      break;
-
-    case Transaction::OP_SETATTR:
-      {
-        coll_t cid = i.get_cid(op->cid);
-        ghobject_t oid = i.get_oid(op->oid);
-        string name = i.decode_string();
-        bufferlist bl;
-        i.decode_bl(bl);
-        map<string, bufferptr> to_set;
-        to_set[name] = bufferptr(bl.c_str(), bl.length());
-        r = _setattrs(cid, oid, to_set, t);
-        if (r == -ENOSPC)
-          dout(0) << " ENOSPC on setxattr on " << cid << "/" << oid
-                  << " name " << name << " size " << bl.length() << dendl;
-      }
-      break;
-
-    case Transaction::OP_SETATTRS:
-      {
-        coll_t cid = i.get_cid(op->cid);
-        ghobject_t oid = i.get_oid(op->oid);
-        map<string, bufferptr> aset;
-        i.decode_attrset(aset);
-        r = _setattrs(cid, oid, aset, t);
-        if (r == -ENOSPC)
-          dout(0) << " ENOSPC on setxattrs on " << cid << "/" << oid << dendl;
-      }
-      break;
-
-    case Transaction::OP_RMATTR:
-      {
-        coll_t cid = i.get_cid(op->cid);
-        ghobject_t oid = i.get_oid(op->oid);
-        string name = i.decode_string();
-        r = _rmattr(cid, oid, name.c_str(), t);
-      }
-      break;
-
-    case Transaction::OP_RMATTRS:
-      {
-        coll_t cid = i.get_cid(op->cid);
-        ghobject_t oid = i.get_oid(op->oid);
-        r = _rmattrs(cid, oid, t);
-      }
-      break;
-
-    case Transaction::OP_CLONE:
-      {
-        coll_t cid = i.get_cid(op->cid);
-        ghobject_t oid = i.get_oid(op->oid);
-        ghobject_t noid = i.get_oid(op->dest_oid);
-        exist_clone = true;
-        r = _clone(cid, oid, noid, t);
-      }
-      break;
-
-    case Transaction::OP_CLONERANGE:
-      {
-        coll_t cid = i.get_cid(op->cid);
-        ghobject_t oid = i.get_oid(op->oid);
-        ghobject_t noid = i.get_oid(op->dest_oid);
-        uint64_t off = op->off;
-        uint64_t len = op->len;
-        exist_clone = true;
-        r = _clone_range(cid, oid, noid, off, len, off, t);
-      }
-      break;
-
-    case Transaction::OP_CLONERANGE2:
-      {
-        coll_t cid = i.get_cid(op->cid);
-        ghobject_t oid = i.get_oid(op->oid);
-        ghobject_t noid = i.get_oid(op->dest_oid);
-        uint64_t srcoff = op->off;
-        uint64_t len = op->len;
-        uint64_t dstoff = op->dest_off;
-        exist_clone = true;
-        r = _clone_range(cid, oid, noid, srcoff, len, dstoff, t);
-      }
-      break;
-
-    case Transaction::OP_MKCOLL:
-      {
-        coll_t cid = i.get_cid(op->cid);
-        r = _create_collection(cid, t);
-      }
-      break;
-
-    case Transaction::OP_COLL_HINT:
-      {
-        coll_t cid = i.get_cid(op->cid);
-        uint32_t type = op->hint_type;
-        bufferlist hint;
-        i.decode_bl(hint);
-        bufferlist::iterator hiter = hint.begin();
-        if (type == Transaction::COLL_HINT_EXPECTED_NUM_OBJECTS) {
-          uint32_t pg_num;
-          uint64_t num_objs;
-          ::decode(pg_num, hiter);
-          ::decode(num_objs, hiter);
-          r = _collection_hint_expected_num_objs(cid, pg_num, num_objs);
-        } else {
-          // Ignore the hint
-          dout(10) << "Unrecognized collection hint type: " << type << dendl;
-        }
-      }
-      break;
-
-    case Transaction::OP_RMCOLL:
-      {
-        coll_t cid = i.get_cid(op->cid);
-        r = _destroy_collection(cid, t);
-      }
-      break;
-
-    case Transaction::OP_COLL_ADD:
-      {
-        coll_t ocid = i.get_cid(op->cid);
-        coll_t ncid = i.get_cid(op->dest_cid);
-        ghobject_t oid = i.get_oid(op->oid);
-        r = _collection_add(ncid, ocid, oid, t);
-      }
-      break;
-
-    case Transaction::OP_COLL_REMOVE:
-       {
-        coll_t cid = i.get_cid(op->cid);
-        ghobject_t oid = i.get_oid(op->oid);
-        r = _remove(cid, oid, t);
-       }
-      break;
-
-    case Transaction::OP_COLL_MOVE:
-      {
-        // WARNING: this is deprecated and buggy; only here to replay old journals.
-        coll_t ocid = i.get_cid(op->cid);
-        coll_t ncid = i.get_cid(op->dest_cid);
-        ghobject_t oid = i.get_oid(op->oid);
-        r = _collection_move_rename(ocid, oid, ncid, oid, t);
-      }
-      break;
-
-    case Transaction::OP_COLL_MOVE_RENAME:
-      {
-        coll_t oldcid = i.get_cid(op->cid);
-        ghobject_t oldoid = i.get_oid(op->oid);
-        coll_t newcid = i.get_cid(op->dest_cid);
-        ghobject_t newoid = i.get_oid(op->dest_oid);
-        r = _collection_move_rename(oldcid, oldoid, newcid, newoid, t);
-      }
-      break;
-
-    case Transaction::OP_COLL_SETATTR:
-    case Transaction::OP_COLL_RMATTR:
-      assert(0 == "coll attrs no longer supported");
-      break;
-
-    case Transaction::OP_STARTSYNC:
-      {
-        start_sync();
-        break;
-      }
-
-    case Transaction::OP_COLL_RENAME:
-      {
-        assert(0 == "not implemented");
-      }
-      break;
-
-    case Transaction::OP_OMAP_CLEAR:
-      {
-        coll_t cid = i.get_cid(op->cid);
-        ghobject_t oid = i.get_oid(op->oid);
-        r = _omap_clear(cid, oid, t);
-      }
-      break;
-    case Transaction::OP_OMAP_SETKEYS:
-      {
-        coll_t cid = i.get_cid(op->cid);
-        ghobject_t oid = i.get_oid(op->oid);
-        map<string, bufferlist> aset;
-        i.decode_attrset(aset);
-        r = _omap_setkeys(cid, oid, aset, t);
-      }
-      break;
-    case Transaction::OP_OMAP_RMKEYS:
-      {
-        coll_t cid = i.get_cid(op->cid);
-        ghobject_t oid = i.get_oid(op->oid);
-        set<string> keys;
-        i.decode_keyset(keys);
-        r = _omap_rmkeys(cid, oid, keys, t);
-      }
-      break;
-    case Transaction::OP_OMAP_RMKEYRANGE:
-      {
-        coll_t cid = i.get_cid(op->cid);
-        ghobject_t oid = i.get_oid(op->oid);
-        string first, last;
-        first = i.decode_string();
-        last = i.decode_string();
-        r = _omap_rmkeyrange(cid, oid, first, last, t);
-      }
-      break;
-    case Transaction::OP_OMAP_SETHEADER:
-      {
-        coll_t cid = i.get_cid(op->cid);
-        ghobject_t oid = i.get_oid(op->oid);
-        bufferlist bl;
-        i.decode_bl(bl);
-        r = _omap_setheader(cid, oid, bl, t);
-      }
-      break;
-    case Transaction::OP_SPLIT_COLLECTION:
-      {
-        coll_t cid = i.get_cid(op->cid);
-        uint32_t bits = op->split_bits;
-        uint32_t rem = op->split_rem;
-        coll_t dest = i.get_cid(op->dest_cid);
-        r = _split_collection_create(cid, bits, rem, dest, t);
-      }
-      break;
-    case Transaction::OP_SPLIT_COLLECTION2:
-      {
-        coll_t cid = i.get_cid(op->cid);
-        uint32_t bits = op->split_bits;
-        uint32_t rem = op->split_rem;
-        coll_t dest = i.get_cid(op->dest_cid);
-        r = _split_collection(cid, bits, rem, dest, t);
-      }
-      break;
-
-    case Transaction::OP_SETALLOCHINT:
-      {
-        coll_t cid = i.get_cid(op->cid);
-        ghobject_t oid = i.get_oid(op->oid);
-        uint64_t expected_object_size = op->expected_object_size;
-        uint64_t expected_write_size = op->expected_write_size;
-        r = _set_alloc_hint(cid, oid, expected_object_size,
-                            expected_write_size, t);
-      }
-      break;
-
-    default:
-      derr << "bad op " << op->op << dendl;
-      assert(0);
-    }
-
-    if (r < 0) {
-      bool ok = false;
-
-      if (r == -ENOENT && !(op->op == Transaction::OP_CLONERANGE ||
-                            op->op == Transaction::OP_CLONE ||
-                            op->op == Transaction::OP_CLONERANGE2 ||
-                            op->op == Transaction::OP_COLL_ADD))
-        // -ENOENT is normally okay
-        // ...including on a replayed OP_RMCOLL with checkpoint mode
-        ok = true;
-      if (r == -ENODATA)
-        ok = true;
-
-      if (!ok) {
-        const char *msg = "unexpected error code";
-
-        if (exist_clone) {
-          dout(0) << "BUG: clone failed will lead to paritial transaction applied" << dendl;
-        }
-
-        if (r == -ENOSPC)
-          // For now, if we hit _any_ ENOSPC, crash, before we do any damage
-          // by partially applying transactions.
-          msg = "ENOSPC handling not implemented";
-
-        if (r == -ENOTEMPTY) {
-          msg = "ENOTEMPTY suggests garbage data in osd data dir";
-        }
-
-        dout(0) << " error " << cpp_strerror(r) << " not handled on operation "
-                << op->op << " op " << op_num << ", counting from 0)" << dendl;
-        dout(0) << msg << dendl;
-        dout(0) << " transaction dump:\n";
-        JSONFormatter f(true);
-        f.open_object_section("transaction");
-        transaction.dump(&f);
-        f.close_section();
-        f.flush(*_dout);
-        *_dout << dendl;
-
-        if (r == -EMFILE) {
-          dump_open_fds(g_ceph_context);
-        }      
-
-        assert(0 == "unexpected error");
-      }
-    }
-
-    op_num++;
-  }
-}
-
-
-// =========== KeyValueStore Op Implementation ==============
-// objects
-
-bool KeyValueStore::exists(coll_t cid, const ghobject_t& oid)
-{
-  dout(10) << __func__ << "collection: " << cid << " object: " << oid
-           << dendl;
-  int r;
-  StripObjectMap::StripObjectHeaderRef header;
-
-  r = backend->lookup_strip_header(cid, oid, &header);
-  if (r < 0) {
-    return false;
-  }
-
-  return true;
-}
-
-int KeyValueStore::stat(coll_t cid, const ghobject_t& oid,
-                        struct stat *st, bool allow_eio)
-{
-  dout(10) << "stat " << cid << "/" << oid << dendl;
-
-  StripObjectMap::StripObjectHeaderRef header;
-
-  int r = backend->lookup_strip_header(cid, oid, &header);
-  if (r < 0) {
-    dout(10) << "stat " << cid << "/" << oid << "=" << r << dendl;
-    return -ENOENT;
-  }
-
-  st->st_blocks = header->max_size / header->strip_size;
-  if (header->max_size % header->strip_size)
-    st->st_blocks++;
-  st->st_nlink = 1;
-  st->st_size = header->max_size;
-  st->st_blksize = header->strip_size;
-
-  return r;
-}
-
-int KeyValueStore::_generic_read(StripObjectMap::StripObjectHeaderRef header,
-                                 uint64_t offset, size_t len, bufferlist& bl,
-                                 bool allow_eio, BufferTransaction *bt)
-{
-  if (header->max_size < offset) {
-    dout(10) << __func__ << " " << header->cid << "/" << header->oid << ")"
-             << " offset exceed the length of bl"<< dendl;
-    return 0;
-  }
-
-  if (len == 0)
-    len = header->max_size - offset;
-
-  if (offset + len > header->max_size)
-    len = header->max_size - offset;
-
-  vector<StripObjectMap::StripExtent> extents;
-  StripObjectMap::file_to_extents(offset, len, header->strip_size,
-                                  extents);
-  map<string, bufferlist> out;
-  set<string> keys;
-  pair<coll_t, ghobject_t> uid = make_pair(header->cid, header->oid);
-
-  for (vector<StripObjectMap::StripExtent>::iterator iter = extents.begin();
-       iter != extents.end(); ++iter) {
-    bufferlist old;
-    string key = strip_object_key(iter->no);
-
-    map< pair<coll_t, ghobject_t>, map<pair<string, string>, bufferlist> >::iterator obj_it;
-    if ( bt ){
-      obj_it = bt->buffers.find(uid);
-    }
-    if ( bt && obj_it != bt->buffers.end() && obj_it->second.count(make_pair(OBJECT_STRIP_PREFIX, key))) {
-      // use strip_header buffer
-      assert(header->bits[iter->no]);
-      out[key] = obj_it->second[make_pair(OBJECT_STRIP_PREFIX, key)];
-    }else if (header->bits[iter->no]) {
-      keys.insert(key);
-    }
-  }
-
-
-  int r = backend->get_values_with_header(header, OBJECT_STRIP_PREFIX, keys, &out);
-  r = check_get_rc(header->cid, header->oid, r, out.size() == keys.size());
-  if (r < 0)
-    return r;
-
-  for (vector<StripObjectMap::StripExtent>::iterator iter = extents.begin();
-       iter != extents.end(); ++iter) {
-    string key = strip_object_key(iter->no);
-
-    if (header->bits[iter->no]) {
-      if (iter->len == header->strip_size) {
-        bl.claim_append(out[key]);
-      } else {
-        out[key].copy(iter->offset, iter->len, bl);
-      }
-    } else {
-      bl.append_zero(iter->len);
-    }
-  }
-
-  dout(10) << __func__ << " " << header->cid << "/" << header->oid << " "
-           << offset << "~" << bl.length() << "/" << len << " r = " << r
-           << dendl;
-
-  return bl.length();
-}
-
-
-int KeyValueStore::read(coll_t cid, const ghobject_t& oid, uint64_t offset,
-                        size_t len, bufferlist& bl, uint32_t op_flags,
-			bool allow_eio)
-{
-  dout(15) << __func__ << " " << cid << "/" << oid << " " << offset << "~"
-           << len << dendl;
-
-  StripObjectMap::StripObjectHeaderRef header;
-
-  int r = backend->lookup_strip_header(cid, oid, &header);
-
-  if (r < 0) {
-    dout(10) << __func__ << " " << cid << "/" << oid << " " << offset << "~"
-              << len << " header isn't exist: r = " << r << dendl;
-    return r;
-  }
-
-  return _generic_read(header, offset, len, bl, allow_eio);
-}
-
-int KeyValueStore::fiemap(coll_t cid, const ghobject_t& oid,
-                          uint64_t offset, size_t len, bufferlist& bl)
-{
-  dout(10) << __func__ << " " << cid << " " << oid << " " << offset << "~"
-           << len << dendl;
-  int r;
-  StripObjectMap::StripObjectHeaderRef header;
-
-  r = backend->lookup_strip_header(cid, oid, &header);
-  if (r < 0) {
-    dout(10) << "fiemap " << cid << "/" << oid << " " << offset << "~" << len
-             << " failed to get header: r = " << r << dendl;
-    return r;
-  }
-
-  vector<StripObjectMap::StripExtent> extents;
-  StripObjectMap::file_to_extents(offset, len, header->strip_size,
-                                  extents);
-
-  map<uint64_t, uint64_t> m;
-  for (vector<StripObjectMap::StripExtent>::iterator iter = extents.begin();
-       iter != extents.end(); ++iter) {
-    if (header->bits[iter->no]) {
-      uint64_t off = iter->no * header->strip_size + iter->offset;
-      m[off] = iter->len;
-    }
-  }
-  ::encode(m, bl);
-  return 0;
-}
-
-int KeyValueStore::_remove(coll_t cid, const ghobject_t& oid,
-                           BufferTransaction &t)
-{
-  dout(15) << __func__ << " " << cid << "/" << oid << dendl;
-
-  int r;
-  StripObjectMap::StripObjectHeaderRef header;
-
-  r = t.lookup_cached_header(cid, oid, &header, false);
-  if (r < 0) {
-    dout(10) << __func__ << " " << cid << "/" << oid << " "
-             << " failed to get header: r = " << r << dendl;
-    return r;
-  }
-
-  header->max_size = 0;
-  header->bits.clear();
-  header->updated = true;
-  r = t.clear_buffer(header);
-
-  dout(10) << __func__ << " " << cid << "/" << oid << " = " << r << dendl;
-  return r;
-}
-
-int KeyValueStore::_truncate(coll_t cid, const ghobject_t& oid, uint64_t size,
-                             BufferTransaction &t)
-{
-  dout(15) << __func__ << " " << cid << "/" << oid << " size " << size
-           << dendl;
-
-  int r;
-  StripObjectMap::StripObjectHeaderRef header;
-
-  r = t.lookup_cached_header(cid, oid, &header, false);
-  if (r < 0) {
-    dout(10) << __func__ << " " << cid << "/" << oid << " " << size
-             << " failed to get header: r = " << r << dendl;
-    return r;
-  }
-
-  if (header->max_size == size)
-    return 0;
-
-  if (header->max_size > size) {
-    vector<StripObjectMap::StripExtent> extents;
-    StripObjectMap::file_to_extents(size, header->max_size-size,
-                                    header->strip_size, extents);
-    assert(extents.size());
-
-    vector<StripObjectMap::StripExtent>::iterator iter = extents.begin();
-    if (header->bits[iter->no] && iter->offset != 0) {
-      bufferlist value;
-      map<string, bufferlist> values;
-      set<string> lookup_keys;
-      string key = strip_object_key(iter->no);
-
-      lookup_keys.insert(key);
-      r = t.get_buffer_keys(header, OBJECT_STRIP_PREFIX,
-                            lookup_keys, &values);
-      r = check_get_rc(cid, oid, r, lookup_keys.size() == values.size());
-      if (r < 0)
-        return r;
-
-      values[key].copy(0, iter->offset, value);
-      value.append_zero(header->strip_size-iter->offset);
-      assert(value.length() == header->strip_size);
-      value.swap(values[key]);
-
-      t.set_buffer_keys(header, OBJECT_STRIP_PREFIX, values);
-      ++iter;
-    }
-
-    set<string> keys;
-    for (; iter != extents.end(); ++iter) {
-      if (header->bits[iter->no]) {
-        keys.insert(strip_object_key(iter->no));
-        header->bits[iter->no] = 0;
-      }
-    }
-    r = t.remove_buffer_keys(header, OBJECT_STRIP_PREFIX, keys);
-    if (r < 0) {
-      dout(10) << __func__ << " " << cid << "/" << oid << " "
-               << size << " = " << r << dendl;
-      return r;
-    }
-  }
-
-  header->bits.resize(size/header->strip_size+1);
-  header->max_size = size;
-  header->updated = true;
-
-  dout(10) << __func__ << " " << cid << "/" << oid << " size " << size << " = "
-           << r << dendl;
-  return r;
-}
-
-int KeyValueStore::_touch(coll_t cid, const ghobject_t& oid,
-                          BufferTransaction &t)
-{
-  dout(15) << __func__ << " " << cid << "/" << oid << dendl;
-
-  int r;
-  StripObjectMap::StripObjectHeaderRef header;
-
-  r = t.lookup_cached_header(cid, oid, &header, true);
-  if (r < 0) {
-    dout(10) << __func__ << " " << cid << "/" << oid << " "
-             << " failed to get header: r = " << r << dendl;
-    r = -EINVAL;
-    return r;
-  }
-
-  dout(10) << __func__ << " " << cid << "/" << oid << " = " << r << dendl;
-  return r;
-}
-
-int KeyValueStore::_generic_write(StripObjectMap::StripObjectHeaderRef header,
-                                  uint64_t offset, size_t len,
-                                  const bufferlist& bl, BufferTransaction &t,
-                                  uint32_t fadvise_flags)
-{
-  if (len > bl.length())
-    len = bl.length();
-
-  if (len + offset > header->max_size) {
-    header->max_size = len + offset;
-    header->bits.resize(header->max_size/header->strip_size+1);
-    header->updated = true;
-  }
-
-  vector<StripObjectMap::StripExtent> extents;
-  StripObjectMap::file_to_extents(offset, len, header->strip_size,
-                                  extents);
-
-  map<string, bufferlist> out;
-  set<string> keys;
-  for (vector<StripObjectMap::StripExtent>::iterator iter = extents.begin();
-       iter != extents.end(); ++iter) {
-    if (header->bits[iter->no] && !(iter->offset == 0 &&
-                                   iter->len == header->strip_size))
-      keys.insert(strip_object_key(iter->no));
-  }
-
-  int r = t.get_buffer_keys(header, OBJECT_STRIP_PREFIX, keys, &out);
-  r = check_get_rc(header->cid, header->oid, r, keys.size() == out.size());
-  if (r < 0)
-    return r;
-
-  uint64_t bl_offset = 0;
-  map<string, bufferlist> values;
-  for (vector<StripObjectMap::StripExtent>::iterator iter = extents.begin();
-       iter != extents.end(); ++iter) {
-    bufferlist value;
-    string key = strip_object_key(iter->no);
-    if (header->bits[iter->no]) {
-      if (iter->offset == 0 && iter->len == header->strip_size) {
-        bl.copy(bl_offset, iter->len, value);
-        bl_offset += iter->len;
-      } else {
-        assert(out[key].length() == header->strip_size);
-
-        out[key].copy(0, iter->offset, value);
-        bl.copy(bl_offset, iter->len, value);
-        bl_offset += iter->len;
-
-        if (value.length() != header->strip_size)
-          out[key].copy(value.length(), header->strip_size-value.length(),
-                        value);
-      }
-    } else {
-      if (iter->offset)
-        value.append_zero(iter->offset);
-      bl.copy(bl_offset, iter->len, value);
-      bl_offset += iter->len;
-
-      if (value.length() < header->strip_size)
-        value.append_zero(header->strip_size-value.length());
-
-      header->bits[iter->no] = 1;
-      header->updated = true;
-    }
-    assert(value.length() == header->strip_size);
-    values[key].swap(value);
-  }
-  assert(bl_offset == len);
-
-  t.set_buffer_keys(header, OBJECT_STRIP_PREFIX, values);
-  dout(10) << __func__ << " " << header->cid << "/" << header->oid << " "
-           << offset << "~" << len << " = " << r << dendl;
-
-  return r;
-}
-
-int KeyValueStore::_write(coll_t cid, const ghobject_t& oid,
-                          uint64_t offset, size_t len, const bufferlist& bl,
-                          BufferTransaction &t, uint32_t fadvise_flags)
-{
-  dout(15) << __func__ << " " << cid << "/" << oid << " " << offset << "~"
-           << len << dendl;
-
-  int r;
-  StripObjectMap::StripObjectHeaderRef header;
-
-  r = t.lookup_cached_header(cid, oid, &header, true);
-  if (r < 0) {
-    dout(10) << __func__ << " " << cid << "/" << oid << " " << offset
-             << "~" << len << " failed to get header: r = " << r << dendl;
-    return r;
-  }
-
-  return _generic_write(header, offset, len, bl, t, fadvise_flags);
-}
-
-int KeyValueStore::_zero(coll_t cid, const ghobject_t& oid, uint64_t offset,
-                         size_t len, BufferTransaction &t)
-{
-  dout(15) << __func__ << " " << cid << "/" << oid << " " << offset << "~" << len << dendl;
-  int r;
-  StripObjectMap::StripObjectHeaderRef header;
-
-  r = t.lookup_cached_header(cid, oid, &header, true);
-  if (r < 0) {
-    dout(10) << __func__ << " " << cid << "/" << oid << " " << offset
-             << "~" << len << " failed to get header: r = " << r << dendl;
-    return r;
-  }
-
-  if (len + offset > header->max_size) {
-    header->max_size = len + offset;
-    header->bits.resize(header->max_size/header->strip_size+1);
-    header->updated = true;
-  }
-
-  vector<StripObjectMap::StripExtent> extents;
-  StripObjectMap::file_to_extents(offset, len, header->strip_size,
-                                  extents);
-  set<string> rm_keys;
-  set<string> lookup_keys;
-  map<string, bufferlist> values;
-  map<string, pair<uint64_t, uint64_t> > off_len;
-  for (vector<StripObjectMap::StripExtent>::iterator iter = extents.begin();
-       iter != extents.end(); ++iter) {
-    string key = strip_object_key(iter->no);
-    if (header->bits[iter->no]) {
-      if (iter->offset == 0 && iter->len == header->strip_size) {
-        rm_keys.insert(key);
-        header->bits[iter->no] = 0;
-        header->updated = true;
-      } else {
-        lookup_keys.insert(key);
-        off_len[key] = make_pair(iter->offset, iter->len);
-      }
-    }
-  }
-  r = t.get_buffer_keys(header, OBJECT_STRIP_PREFIX,
-                        lookup_keys, &values);
-  r = check_get_rc(header->cid, header->oid, r, lookup_keys.size() == values.size());
-  if (r < 0)
-    return r;
-  for(set<string>::iterator it = lookup_keys.begin(); it != lookup_keys.end(); ++it)
-  {
-    pair<uint64_t, uint64_t> p = off_len[*it];
-    values[*it].zero(p.first, p.second);
-  }
-  t.set_buffer_keys(header, OBJECT_STRIP_PREFIX, values);
-  t.remove_buffer_keys(header, OBJECT_STRIP_PREFIX, rm_keys);
-  dout(10) << __func__ << " " << cid << "/" << oid << " " << offset << "~"
-           << len << " = " << r << dendl;
-  return r;
-}
-
-int KeyValueStore::_clone(coll_t cid, const ghobject_t& oldoid,
-                          const ghobject_t& newoid, BufferTransaction &t)
-{
-  dout(15) << __func__ << " " << cid << "/" << oldoid << " -> " << cid << "/"
-           << newoid << dendl;
-
-  if (oldoid == newoid)
-    return 0;
-
-  int r;
-  StripObjectMap::StripObjectHeaderRef old_header;
-
-  r = t.lookup_cached_header(cid, oldoid, &old_header, false);
-  if (r < 0) {
-    dout(10) << __func__ << " " << cid << "/" << oldoid << " -> " << cid << "/"
-             << newoid << " = " << r << dendl;
-    return r;
-  }
-
-  t.clone_buffer(old_header, cid, newoid);
-
-  dout(10) << __func__ << " " << cid << "/" << oldoid << " -> " << cid << "/"
-           << newoid << " = " << r << dendl;
-  return r;
-}
-
-int KeyValueStore::_clone_range(coll_t cid, const ghobject_t& oldoid,
-                                const ghobject_t& newoid, uint64_t srcoff,
-                                uint64_t len, uint64_t dstoff,
-                                BufferTransaction &t)
-{
-  dout(15) << __func__ << " " << cid << "/" << oldoid << " -> " << cid << "/"
-           << newoid << " " << srcoff << "~" << len << " to " << dstoff
-           << dendl;
-
-  int r;
-  bufferlist bl;
-
-  StripObjectMap::StripObjectHeaderRef old_header, new_header;
-
-  r = t.lookup_cached_header(cid, oldoid, &old_header, false);
-  if (r < 0) {
-    dout(10) << __func__ << " " << cid << "/" << oldoid << " -> " << cid << "/"
-           << newoid << " " << srcoff << "~" << len << " to " << dstoff
-           << " header isn't exist: r = " << r << dendl;
-    return r;
-  }
-
-  r = t.lookup_cached_header(cid, newoid, &new_header, true);
-  if (r < 0) {
-    dout(10) << __func__ << " " << cid << "/" << oldoid << " -> " << cid << "/"
-           << newoid << " " << srcoff << "~" << len << " to " << dstoff
-           << " can't create header: r = " << r << dendl;
-    return r;
-  }
-
-  r = _generic_read(old_header, srcoff, len, bl, &t);
-  if (r < 0)
-    goto out;
-
-  r = _generic_write(new_header, dstoff, len, bl, t);
-
- out:
-  dout(10) << __func__ << " " << cid << "/" << oldoid << " -> " << cid << "/"
-           << newoid << " " << srcoff << "~" << len << " to " << dstoff
-           << " = " << r << dendl;
-  return r;
-}
-
-// attrs
-
-int KeyValueStore::getattr(coll_t cid, const ghobject_t& oid, const char *name,
-                           bufferptr &bp)
-{
-  dout(15) << __func__ << " " << cid << "/" << oid << " '" << name << "'"
-           << dendl;
-
-  int r;
-  map<string, bufferlist> got;
-  set<string> to_get;
-  StripObjectMap::StripObjectHeaderRef header;
-
-  to_get.insert(string(name));
-
-  r = backend->lookup_strip_header(cid, oid, &header);
-  if (r < 0) {
-    dout(10) << __func__ << " lookup_strip_header failed: r =" << r << dendl;
-    return r;
-  }
-
-  r = backend->get_values_with_header(header, OBJECT_XATTR, to_get, &got);
-  if (r < 0 && r != -ENOENT) {
-    dout(10) << __func__ << " get_xattrs err r =" << r << dendl;
-    goto out;
-  }
-  if (got.empty()) {
-    dout(10) << __func__ << " got.size() is 0" << dendl;
-    return -ENODATA;
-  }
-  bp = bufferptr(got.begin()->second.c_str(),
-                 got.begin()->second.length());
-  r = 0;
-
- out:
-  dout(10) << __func__ << " " << cid << "/" << oid << " '" << name << "' = "
-           << r << dendl;
-  return r;
-}
-
-int KeyValueStore::getattrs(coll_t cid, const ghobject_t& oid,
-                           map<string,bufferptr>& aset)
-{
-  map<string, bufferlist> attr_aset;
-  int r;
-  StripObjectMap::StripObjectHeaderRef header;
-
-  r = backend->lookup_strip_header(cid, oid, &header);
-  if (r < 0) {
-    dout(10) << __func__ << " lookup_strip_header failed: r =" << r << dendl;
-    return r;
-  }
-
-  r = backend->get_with_header(header, OBJECT_XATTR, &attr_aset);
-  if (r < 0) {
-    dout(10) << __func__ << " could not get attrs r = " << r << dendl;
-    goto out;
-  }
-
-  for (map<string, bufferlist>::iterator i = attr_aset.begin();
-       i != attr_aset.end(); ++i) {
-    string key;
-    key = i->first;
-    aset.insert(make_pair(key,
-                bufferptr(i->second.c_str(), i->second.length())));
-  }
-
- out:
-  dout(10) << __func__ << " " << cid << "/" << oid << " = " << r << dendl;
-
-  return r;
-}
-
-int KeyValueStore::_setattrs(coll_t cid, const ghobject_t& oid,
-                             map<string, bufferptr>& aset,
-                             BufferTransaction &t)
-{
-  dout(15) << __func__ << " " << cid << "/" << oid << dendl;
-
-  int r;
-
-  StripObjectMap::StripObjectHeaderRef header;
-  map<string, bufferlist> attrs;
-
-  r = t.lookup_cached_header(cid, oid, &header, false);
-  if (r < 0)
-    goto out;
-
-  for (map<string, bufferptr>::iterator it = aset.begin();
-       it != aset.end(); ++it) {
-    attrs[it->first].push_back(it->second);
-  }
-
-  t.set_buffer_keys(header, OBJECT_XATTR, attrs);
-
-out:
-  dout(10) << __func__ << " " << cid << "/" << oid << " = " << r << dendl;
-  return r;
-}
-
-
-int KeyValueStore::_rmattr(coll_t cid, const ghobject_t& oid, const char *name,
-                           BufferTransaction &t)
-{
-  dout(15) << __func__ << " " << cid << "/" << oid << " '" << name << "'"
-           << dendl;
-
-  int r;
-  set<string> to_remove;
-  StripObjectMap::StripObjectHeaderRef header;
-
-  r = t.lookup_cached_header(cid, oid, &header, false);
-  if (r < 0) {
-    dout(10) << __func__ << " could not find header r = " << r
-             << dendl;
-    return r;
-  }
-
-  to_remove.insert(string(name));
-  r = t.remove_buffer_keys(header, OBJECT_XATTR, to_remove);
-
-  dout(10) << __func__ << " " << cid << "/" << oid << " '" << name << "' = "
-           << r << dendl;
-  return r;
-}
-
-int KeyValueStore::_rmattrs(coll_t cid, const ghobject_t& oid,
-                            BufferTransaction &t)
-{
-  dout(15) << __func__ << " " << cid << "/" << oid << dendl;
-
-  int r;
-  set<string> attrs;
-
-  StripObjectMap::StripObjectHeaderRef header;
-
-  r = t.lookup_cached_header(cid, oid, &header, false);
-  if (r < 0) {
-    dout(10) << __func__ << " could not find header r = " << r
-             << dendl;
-    return r;
-  }
-
-  r = backend->get_keys_with_header(header, OBJECT_XATTR, &attrs);
-  if (r < 0) {
-    dout(10) << __func__ << " could not get attrs r = " << r << dendl;
-    return r;
-  }
-
-  r = t.remove_buffer_keys(header, OBJECT_XATTR, attrs);
-  t.clear_buffer_keys(header, OBJECT_XATTR);
-
-  dout(10) << __func__ <<  " " << cid << "/" << oid << " = " << r << dendl;
-  return r;
-}
-
-
-// collections
-
-int KeyValueStore::_create_collection(coll_t c, BufferTransaction &t)
-{
-  dout(15) << __func__ << " " << c << dendl;
-  int r = 0;
-  bufferlist bl;
-
-  RWLock::WLocker l(collections_lock);
-  if (collections.count(c)) {
-    r = -EEXIST;
-    goto out;
-  }
-
-  collections.insert(c);
-  t.set_collections(collections);
-
- out:
-  dout(10) << __func__ << " cid " << c << " r = " << r << dendl;
-  return r;
-}
-
-int KeyValueStore::_destroy_collection(coll_t c, BufferTransaction &t)
-{
-  dout(15) << __func__ << " " << c << dendl;
-
-  int r;
-  uint64_t modified_object = 0;
-  vector<ghobject_t> oids;
-  bufferlist bl;
-
-  {
-    RWLock::RLocker l(collections_lock);
-    if (!collections.count(c)) {
-      r = -ENOENT;
-      goto out;
-    }
-  }
-
-  // All modified objects are marked deleted
-  for (BufferTransaction::StripHeaderMap::iterator iter = t.strip_headers.begin();
-       iter != t.strip_headers.end(); ++iter) {
-    // sum the total modified object in this PG
-    if (iter->first.first != c)
-      continue;
-
-    modified_object++;
-    if (!iter->second->deleted) {
-      r = -ENOTEMPTY;
-      goto out;
-    }
-  }
-
-  r = backend->list_objects(c, ghobject_t(), ghobject_t::get_max(), modified_object+1, &oids,
-                            0);
-  // No other object
-  if (oids.size() != modified_object && oids.size() != 0) {
-    r = -ENOTEMPTY;
-    goto out;
-  }
-
-  for (vector<ghobject_t>::iterator iter = oids.begin();
-      iter != oids.end(); ++iter) {
-    if (!t.strip_headers.count(make_pair(c, *iter))) {
-      r = -ENOTEMPTY;
-      goto out;
-    }
-  }
-
-  {
-    RWLock::WLocker l(collections_lock);
-    collections.erase(c);
-    t.set_collections(collections);
-  }
-  r = 0;
-
-out:
-  dout(10) << __func__ << " " << c << " = " << r << dendl;
-  return r;
-}
-
-
-int KeyValueStore::_collection_add(coll_t c, coll_t oldcid,
-                                   const ghobject_t& o,
-                                   BufferTransaction &t)
-{
-  dout(15) << __func__ <<  " " << c << "/" << o << " from " << oldcid << "/"
-           << o << dendl;
-
-  bufferlist bl;
-  StripObjectMap::StripObjectHeaderRef header, old_header;
-
-  int r = t.lookup_cached_header(oldcid, o, &old_header, false);
-  if (r < 0) {
-    goto out;
-  }
-
-  r = t.lookup_cached_header(c, o, &header, false);
-  if (r == 0) {
-    r = -EEXIST;
-    dout(10) << __func__ << " " << c << "/" << o << " from " << oldcid << "/"
-             << o << " already exist " << dendl;
-    goto out;
-  }
-
-  r = _generic_read(old_header, 0, old_header->max_size, bl, &t);
-  if (r < 0) {
-    r = -EINVAL;
-    goto out;
-  }
-
-  r = _generic_write(header, 0, bl.length(), bl, t);
-  if (r < 0) {
-    r = -EINVAL;
-  }
-
-out:
-  dout(10) << __func__ << " " << c << "/" << o << " from " << oldcid << "/"
-           << o << " = " << r << dendl;
-  return r;
-}
-
-int KeyValueStore::_collection_move_rename(coll_t oldcid,
-                                           const ghobject_t& oldoid,
-                                           coll_t c, const ghobject_t& o,
-                                           BufferTransaction &t)
-{
-  dout(15) << __func__ << " " << c << "/" << o << " from " << oldcid << "/"
-           << oldoid << dendl;
-  int r;
-  StripObjectMap::StripObjectHeaderRef header;
-
-  r = t.lookup_cached_header(c, o, &header, false);
-  if (r == 0) {
-    dout(10) << __func__ << " " << oldcid << "/" << oldoid << " -> " << c
-             << "/" << o << " = " << r << dendl;
-    return -EEXIST;
-  }
-
-  r = t.lookup_cached_header(oldcid, oldoid, &header, false);
-  if (r < 0) {
-    dout(10) << __func__ << " " << oldcid << "/" << oldoid << " -> " << c
-             << "/" << o << " = " << r << dendl;
-    return r;
-  }
-
-  t.rename_buffer(header, c, o);
-
-  dout(10) << __func__ << " " << c << "/" << o << " from " << oldcid << "/"
-           << oldoid << " = " << r << dendl;
-  return r;
-}
-
-int KeyValueStore::_collection_remove_recursive(const coll_t &cid,
-                                                BufferTransaction &t)
-{
-  dout(15) << __func__ << " " << cid << dendl;
-  int r = 0;
-
-  {
-    RWLock::RLocker l(collections_lock);
-    if (collections.count(cid) == 0)
-      return -ENOENT;
-  }
-
-  vector<ghobject_t> objects;
-  ghobject_t max;
-  while (!max.is_max()) {
-    r = collection_list(cid, max, ghobject_t::get_max(), true, 300, &objects, &max);
-    if (r < 0)
-      goto out;
-
-    for (vector<ghobject_t>::iterator i = objects.begin();
-         i != objects.end(); ++i) {
-      r = _remove(cid, *i, t);
-      if (r < 0)
-	goto out;
-    }
-  }
-
-  {
-    RWLock::WLocker l(collections_lock);
-    collections.erase(cid);
-    t.set_collections(collections);
-  }
-
- out:
-  dout(10) << __func__ << " " << cid  << " r = " << r << dendl;
-  return r;
-}
-
-int KeyValueStore::list_collections(vector<coll_t>& ls)
-{
-  dout(10) << __func__ << " " << dendl;
-  RWLock::RLocker l(collections_lock);
-  for (set<coll_t>::iterator p = collections.begin(); p != collections.end();
-       ++p) {
-    ls.push_back(*p);
-  }
-  return 0;
-}
-
-bool KeyValueStore::collection_exists(coll_t c)
-{
-  dout(10) << __func__ << " " << dendl;
-  RWLock::RLocker l(collections_lock);
-  return collections.count(c);
-}
-
-bool KeyValueStore::collection_empty(coll_t c)
-{
-  dout(10) << __func__ << " " << dendl;
-
-  vector<ghobject_t> oids;
-  backend->list_objects(c, ghobject_t(), ghobject_t::get_max(), 1, &oids, 0);
-
-  return oids.empty();
-}
-
-int KeyValueStore::collection_list(coll_t c, ghobject_t start,
-				   ghobject_t end, bool sort_bitwise, int max,
-				   vector<ghobject_t> *ls, ghobject_t *next)
-{
-  if (!sort_bitwise)
-    return -EOPNOTSUPP;
-
-  if (max < 0)
-    return -EINVAL;
-
-  if (start.is_max())
-    return 0;
-
-  int r = backend->list_objects(c, start, end, max, ls, next);
-  return r;
-}
-
-int KeyValueStore::collection_version_current(coll_t c, uint32_t *version)
-{
-  *version = COLLECTION_VERSION;
-  if (*version == target_version)
-    return 1;
-  else
-    return 0;
-}
-
-// omap
-
-int KeyValueStore::omap_get(coll_t c, const ghobject_t &hoid,
-                            bufferlist *bl, map<string, bufferlist> *out)
-{
-  dout(15) << __func__ << " " << c << "/" << hoid << dendl;
-
-  StripObjectMap::StripObjectHeaderRef header;
-
-  int r = backend->lookup_strip_header(c, hoid, &header);
-  if (r < 0) {
-    dout(10) << __func__ << " lookup_strip_header failed: r =" << r << dendl;
-    return r;
-  }
-
-  r = backend->get_with_header(header, OBJECT_OMAP, out);
-  if (r < 0) {
-    dout(10) << __func__ << " err r =" << r << dendl;
-    return r;
-  }
-
-  set<string> keys;
-  map<string, bufferlist> got;
-
-  keys.insert(OBJECT_OMAP_HEADER_KEY);
-  r = backend->get_values_with_header(header, OBJECT_OMAP_HEADER, keys, &got);
-  if (r < 0 && r != -ENOENT) {
-    dout(10) << __func__ << " err r =" << r << dendl;
-    return r;
-  }
-
-  if (!got.empty()) {
-    assert(got.size() == 1);
-    bl->swap(got.begin()->second);
-  }
-
-  return 0;
-}
-
-int KeyValueStore::omap_get_header(coll_t c, const ghobject_t &hoid,
-                                   bufferlist *bl, bool allow_eio)
-{
-  dout(15) << __func__ << " " << c << "/" << hoid << dendl;
-
-  set<string> keys;
-  map<string, bufferlist> got;
-  StripObjectMap::StripObjectHeaderRef header;
-
-  int r = backend->lookup_strip_header(c, hoid, &header);
-  if (r < 0) {
-    dout(10) << __func__ << " lookup_strip_header failed: r =" << r << dendl;
-    return r;
-  }
-
-  keys.insert(OBJECT_OMAP_HEADER_KEY);
-  r = backend->get_values_with_header(header, OBJECT_OMAP_HEADER, keys, &got);
-  if (r < 0 && r != -ENOENT) {
-    dout(10) << __func__ << " err r =" << r << dendl;
-    return r;
-  }
-
-  if (!got.empty()) {
-    assert(got.size() == 1);
-    bl->swap(got.begin()->second);
-  }
-
-  return 0;
-}
-
-int KeyValueStore::omap_get_keys(coll_t c, const ghobject_t &hoid, set<string> *keys)
-{
-  dout(15) << __func__ << " " << c << "/" << hoid << dendl;
-
-  StripObjectMap::StripObjectHeaderRef header;
-  int r = backend->lookup_strip_header(c, hoid, &header);
-  if (r < 0) {
-    dout(10) << __func__ << " lookup_strip_header failed: r =" << r << dendl;
-    return r;
-  }
-
-  r = backend->get_keys_with_header(header, OBJECT_OMAP, keys);
-  if (r < 0) {
-    return r;
-  }
-  return 0;
-}
-
-int KeyValueStore::omap_get_values(coll_t c, const ghobject_t &hoid,
-                                   const set<string> &keys,
-                                   map<string, bufferlist> *out)
-{
-  dout(15) << __func__ << " " << c << "/" << hoid << dendl;
-
-  StripObjectMap::StripObjectHeaderRef header;
-  int r = backend->lookup_strip_header(c, hoid, &header);
-  if (r < 0) {
-    dout(10) << __func__ << " lookup_strip_header failed: r =" << r << dendl;
-    return r;
-  }
-
-  r = backend->get_values_with_header(header, OBJECT_OMAP, keys, out);
-  if (r < 0 && r != -ENOENT) {
-    return r;
-  }
-  return 0;
-}
-
-int KeyValueStore::omap_check_keys(coll_t c, const ghobject_t &hoid,
-                                   const set<string> &keys, set<string> *out)
-{
-  dout(15) << __func__ << " " << c << "/" << hoid << dendl;
-
-  int r = backend->check_keys(c, hoid, OBJECT_OMAP, keys, out);
-  if (r < 0 && r != -ENOENT) {
-    return r;
-  }
-  return 0;
-}
-
-ObjectMap::ObjectMapIterator KeyValueStore::get_omap_iterator(
-    coll_t c, const ghobject_t &hoid)
-{
-  dout(15) << __func__ << " " << c << "/" << hoid << dendl;
-  return backend->get_iterator(c, hoid, OBJECT_OMAP);
-}
-
-int KeyValueStore::_omap_clear(coll_t cid, const ghobject_t &hoid,
-                               BufferTransaction &t)
-{
-  dout(15) << __func__ << " " << cid << "/" << hoid << dendl;
-
-  StripObjectMap::StripObjectHeaderRef header;
-
-  int r = t.lookup_cached_header(cid, hoid, &header, false);
-  if (r < 0) {
-    dout(10) << __func__ << " " << cid << "/" << hoid << " "
-             << " failed to get header: r = " << r << dendl;
-    return r;
-  }
-
-  set<string> keys;
-  r = backend->get_keys_with_header(header, OBJECT_OMAP, &keys);
-  if (r < 0) {
-    dout(10) << __func__ << " could not get omap_keys r = " << r << dendl;
-    return r;
-  }
-
-  r = t.remove_buffer_keys(header, OBJECT_OMAP, keys);
-  if (r < 0) {
-    dout(10) << __func__ << " could not remove keys r = " << r << dendl;
-    return r;
-  }
-
-  keys.clear();
-  keys.insert(OBJECT_OMAP_HEADER_KEY);
-  r = t.remove_buffer_keys(header, OBJECT_OMAP_HEADER, keys);
-  if (r < 0) {
-    dout(10) << __func__ << " could not remove keys r = " << r << dendl;
-    return r;
-  }
-
-  t.clear_buffer_keys(header, OBJECT_OMAP_HEADER);
-
-  dout(10) << __func__ << " " << cid << "/" << hoid << " r = " << r << dendl;
-  return 0;
-}
-
-int KeyValueStore::_omap_setkeys(coll_t cid, const ghobject_t &hoid,
-                                 map<string, bufferlist> &aset,
-                                 BufferTransaction &t)
-{
-  dout(15) << __func__ << " " << cid << "/" << hoid << dendl;
-
-  StripObjectMap::StripObjectHeaderRef header;
-
-  int r = t.lookup_cached_header(cid, hoid, &header, false);
-  if (r < 0) {
-    dout(10) << __func__ << " " << cid << "/" << hoid << " "
-             << " failed to get header: r = " << r << dendl;
-    return r;
-  }
-
-  t.set_buffer_keys(header, OBJECT_OMAP, aset);
-
-  return 0;
-}
-
-int KeyValueStore::_omap_rmkeys(coll_t cid, const ghobject_t &hoid,
-                                const set<string> &keys,
-                                BufferTransaction &t)
-{
-  dout(15) << __func__ << " " << cid << "/" << hoid << dendl;
-
-  StripObjectMap::StripObjectHeaderRef header;
-
-  int r = t.lookup_cached_header(cid, hoid, &header, false);
-  if (r < 0) {
-    dout(10) << __func__ << " " << cid << "/" << hoid << " "
-             << " failed to get header: r = " << r << dendl;
-    return r;
-  }
-
-  r = t.remove_buffer_keys(header, OBJECT_OMAP, keys);
-
-  dout(10) << __func__ << " " << cid << "/" << hoid << " r = " << r << dendl;
-  return r;
-}
-
-int KeyValueStore::_omap_rmkeyrange(coll_t cid, const ghobject_t &hoid,
-                                    const string& first, const string& last,
-                                    BufferTransaction &t)
-{
-  dout(15) << __func__ << " " << cid << "/" << hoid << " [" << first << ","
-           << last << "]" << dendl;
-
-  set<string> keys;
-  {
-    ObjectMap::ObjectMapIterator iter = get_omap_iterator(cid, hoid);
-    if (!iter)
-      return -ENOENT;
-
-    for (iter->lower_bound(first); iter->valid() && iter->key() < last;
-         iter->next(false)) {
-      keys.insert(iter->key());
-    }
-  }
-  return _omap_rmkeys(cid, hoid, keys, t);
-}
-
-int KeyValueStore::_omap_setheader(coll_t cid, const ghobject_t &hoid,
-                                   const bufferlist &bl,
-                                   BufferTransaction &t)
-{
-  dout(15) << __func__ << " " << cid << "/" << hoid << dendl;
-
-  map<string, bufferlist> sets;
-  StripObjectMap::StripObjectHeaderRef header;
-
-  int r = t.lookup_cached_header(cid, hoid, &header, false);
-  if (r < 0) {
-    dout(10) << __func__ << " " << cid << "/" << hoid << " "
-             << " failed to get header: r = " << r << dendl;
-    return r;
-  }
-
-  sets[OBJECT_OMAP_HEADER_KEY] = bl;
-  t.set_buffer_keys(header, OBJECT_OMAP_HEADER, sets);
-  return 0;
-}
-
-int KeyValueStore::_split_collection(coll_t cid, uint32_t bits, uint32_t rem,
-                                     coll_t dest, BufferTransaction &t)
-{
-  {
-    dout(15) << __func__ << " " << cid << " bits: " << bits << dendl;
-
-    StripObjectMap::StripObjectHeaderRef header;
-
-    {
-      RWLock::RLocker l(collections_lock);
-      if (collections.count(cid) == 0)
-	return -ENOENT;
-      if (collections.count(dest) == 0)
-	return -ENOENT;
-    }
-
-    vector<ghobject_t> objects;
-    ghobject_t next, current;
-    int move_size = 0;
-    while (1) {
-      collection_list(cid, current, ghobject_t::get_max(), true,
-		      get_ideal_list_max(), &objects, &next);
-
-      dout(20) << __func__ << cid << "objects size: " << objects.size()
-              << dendl;
-
-      if (objects.empty())
-        break;
-
-      for (vector<ghobject_t>::iterator i = objects.begin();
-          i != objects.end(); ++i) {
-        if (i->match(bits, rem)) {
-          if (_collection_move_rename(cid, *i, dest, *i, t) < 0) {
-            return -1;
-          }
-          move_size++;
-        }
-      }
-
-      objects.clear();
-      current = next;
-    }
-
-    dout(20) << __func__ << "move" << move_size << " object from " << cid
-             << "to " << dest << dendl;
-  }
-
-  if (g_conf->filestore_debug_verify_split) {
-    vector<ghobject_t> objects;
-    ghobject_t next;
-    while (1) {
-      collection_list(cid, next, ghobject_t::get_max(), true,
-		      get_ideal_list_max(), &objects, &next);
-      if (objects.empty())
-        break;
-
-      for (vector<ghobject_t>::iterator i = objects.begin();
-           i != objects.end(); ++i) {
-        dout(20) << __func__ << ": " << *i << " still in source "
-                 << cid << dendl;
-        assert(!i->match(bits, rem));
-      }
-      objects.clear();
-    }
-
-    next = ghobject_t();
-    while (1) {
-      collection_list(dest, next, ghobject_t::get_max(), true,
-		      get_ideal_list_max(), &objects, &next);
-      if (objects.empty())
-        break;
-
-      for (vector<ghobject_t>::iterator i = objects.begin();
-           i != objects.end(); ++i) {
-        dout(20) << __func__ << ": " << *i << " now in dest "
-                 << *i << dendl;
-        assert(i->match(bits, rem));
-      }
-      objects.clear();
-    }
-  }
-  return 0;
-}
-
-int KeyValueStore::_set_alloc_hint(coll_t cid, const ghobject_t& oid,
-                                   uint64_t expected_object_size,
-                                   uint64_t expected_write_size,
-                                   BufferTransaction &t)
-{
-  dout(15) << __func__ << " " << cid << "/" << oid << " object_size "
-           << expected_object_size << " write_size "
-           << expected_write_size << dendl;
-
-  int r = 0;
-  StripObjectMap::StripObjectHeaderRef header;
-
-  r = t.lookup_cached_header(cid, oid, &header, false);
-  if (r < 0) {
-    dout(10) << __func__ << " " << cid << "/" << oid
-             << " failed to get header: r = " << r << dendl;
-    return r;
-  }
-
-  bool blank = true;
-  for (vector<char>::iterator it = header->bits.begin();
-       it != header->bits.end(); ++it) {
-    if (*it) {
-      blank = false;
-      break;
-    }
-  }
-
-  // Now only consider to change "strip_size" when the object is blank,
-  // because set_alloc_hint is expected to be very lightweight<O(1)>
-  if (blank) {
-    // header->strip_size = MIN(expected_write_size, m_keyvaluestore_max_expected_write_size);
-    // dout(20) << __func__ << " hint " << header->strip_size << " success" << dendl;
-  }
-
-  dout(10) << __func__ << "" << cid << "/" << oid << " object_size "
-           << expected_object_size << " write_size "
-           << expected_write_size << " = " << r << dendl;
-
-  return r;
-}
-
-const char** KeyValueStore::get_tracked_conf_keys() const
-{
-  static const char* KEYS[] = {
-    "keyvaluestore_queue_max_ops",
-    "keyvaluestore_queue_max_bytes",
-    "keyvaluestore_default_strip_size",
-    "keyvaluestore_dump_file",
-    NULL
-  };
-  return KEYS;
-}
-
-void KeyValueStore::handle_conf_change(const struct md_config_t *conf,
-                                       const std::set <std::string> &changed)
-{
-  if (changed.count("keyvaluestore_queue_max_ops") ||
-      changed.count("keyvaluestore_queue_max_bytes") ||
-      changed.count("keyvaluestore_max_expected_write_size")) {
-    m_keyvaluestore_queue_max_ops = conf->keyvaluestore_queue_max_ops;
-    m_keyvaluestore_queue_max_bytes = conf->keyvaluestore_queue_max_bytes;
-    m_keyvaluestore_max_expected_write_size = conf->keyvaluestore_max_expected_write_size;
-    throttle_ops.reset_max(conf->keyvaluestore_queue_max_ops);
-    throttle_bytes.reset_max(conf->keyvaluestore_queue_max_bytes);
-  }
-  if (changed.count("keyvaluestore_default_strip_size")) {
-    m_keyvaluestore_strip_size = conf->keyvaluestore_default_strip_size;
-    default_strip_size = m_keyvaluestore_strip_size;
-  }
-  if (changed.count("keyvaluestore_dump_file")) {
-    if (conf->keyvaluestore_dump_file.length() &&
-	conf->keyvaluestore_dump_file != "-") {
-      dump_start(conf->keyvaluestore_dump_file);
-    } else {
-      dump_stop();
-    }
-  }
-}
-
-int KeyValueStore::check_get_rc(const coll_t cid, const ghobject_t& oid, int r, bool is_equal_size)
-{
-  if (r < 0) {
-    dout(10) << __func__ << " " << cid << "/" << oid << " "
-             << " get rc = " <<  r << dendl;
-  } else if (!is_equal_size) {
-    dout(0) << __func__ << " broken header or missing data in backend "
-            << cid << "/" << oid << " get rc = " << r << dendl;
-    r = -EBADF;
-  }
-  return r;
-}
-
-void KeyValueStore::dump_start(const std::string &file)
-{
-  dout(10) << "dump_start " << file << dendl;
-  if (m_keyvaluestore_do_dump) {
-    dump_stop();
-  }
-  m_keyvaluestore_dump_fmt.reset();
-  m_keyvaluestore_dump_fmt.open_array_section("dump");
-  m_keyvaluestore_dump.open(file.c_str());
-  m_keyvaluestore_do_dump = true;
-}
-
-void KeyValueStore::dump_stop()
-{
-  dout(10) << "dump_stop" << dendl;
-  m_keyvaluestore_do_dump = false;
-  if (m_keyvaluestore_dump.is_open()) {
-    m_keyvaluestore_dump_fmt.close_section();
-    m_keyvaluestore_dump_fmt.flush(m_keyvaluestore_dump);
-    m_keyvaluestore_dump.flush();
-    m_keyvaluestore_dump.close();
-  }
-}
-void KeyValueStore::dump_transactions(list<ObjectStore::Transaction*>& ls, uint64_t seq, OpSequencer *osr)
-{
-  m_keyvaluestore_dump_fmt.open_array_section("transactions");
-  unsigned trans_num = 0;
-  for (list<ObjectStore::Transaction*>::iterator i = ls.begin(); i != ls.end(); ++i, ++trans_num) {
-    m_keyvaluestore_dump_fmt.open_object_section("transaction");
-    m_keyvaluestore_dump_fmt.dump_string("osr", osr->get_name());
-    m_keyvaluestore_dump_fmt.dump_unsigned("seq", seq);
-    m_keyvaluestore_dump_fmt.dump_unsigned("trans_num", trans_num);
-    (*i)->dump(&m_keyvaluestore_dump_fmt);
-    m_keyvaluestore_dump_fmt.close_section();
-  }
-  m_keyvaluestore_dump_fmt.close_section();
-  m_keyvaluestore_dump_fmt.flush(m_keyvaluestore_dump);
-  m_keyvaluestore_dump.flush();
-}
-
-
-// -- KVSuperblock --
-
-void KVSuperblock::encode(bufferlist &bl) const
-{
-  ENCODE_START(1, 1, bl);
-  compat_features.encode(bl);
-  ::encode(backend, bl);
-  ENCODE_FINISH(bl);
-}
-
-void KVSuperblock::decode(bufferlist::iterator &bl)
-{
-  DECODE_START(1, bl);
-  compat_features.decode(bl);
-  ::decode(backend, bl);
-  DECODE_FINISH(bl);
-}
-
-void KVSuperblock::dump(Formatter *f) const
-{
-  f->open_object_section("compat");
-  compat_features.dump(f);
-  f->dump_string("backend", backend);
-  f->close_section();
-}
-
-void KVSuperblock::generate_test_instances(list<KVSuperblock*>& o)
-{
-  KVSuperblock z;
-  o.push_back(new KVSuperblock(z));
-  CompatSet::FeatureSet feature_compat;
-  CompatSet::FeatureSet feature_ro_compat;
-  CompatSet::FeatureSet feature_incompat;
-  z.compat_features = CompatSet(feature_compat, feature_ro_compat,
-                                feature_incompat);
-  o.push_back(new KVSuperblock(z));
-  z.backend = "rocksdb";
-  o.push_back(new KVSuperblock(z));
-}
diff --git a/src/os/keyvaluestore/KeyValueStore.h b/src/os/keyvaluestore/KeyValueStore.h
deleted file mode 100644
index 9e245ee..0000000
--- a/src/os/keyvaluestore/KeyValueStore.h
+++ /dev/null
@@ -1,700 +0,0 @@
-// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
-// vim: ts=8 sw=2 smarttab
-/*
- * Ceph - scalable distributed file system
- *
- * Copyright (C) 2013 UnitedStack <haomai at unitedstack.com>
- *
- * Author: Haomai Wang <haomaiwang at gmail.com>
- *
- * This is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License version 2.1, as published by the Free Software
- * Foundation.  See file COPYING.
- *
- */
-
-
-#ifndef CEPH_KEYVALUESTORE_H
-#define CEPH_KEYVALUESTORE_H
-
-#include "include/types.h"
-
-#include <map>
-#include <deque>
-#include <boost/scoped_ptr.hpp>
-#include <fstream>
-using namespace std;
-
-#include "include/assert.h"
-
-#include "os/ObjectStore.h"
-
-#include "common/WorkQueue.h"
-#include "common/Finisher.h"
-#include "common/fd.h"
-
-#include "common/Mutex.h"
-#include "GenericObjectMap.h"
-#include "kv/KeyValueDB.h"
-#include "common/random_cache.hpp"
-
-#include "include/uuid.h"
-
-static uint64_t default_strip_size = 1024;
-
-class StripObjectMap: public GenericObjectMap {
- public:
-
-  struct StripExtent {
-    uint64_t no;
-    uint64_t offset;    // in key
-    uint64_t len;    // in key
-    StripExtent(uint64_t n, uint64_t off, size_t len):
-      no(n), offset(off), len(len) {}
-  };
-
-  // -- strip object --
-  struct StripObjectHeader {
-    // Persistent state
-    uint64_t strip_size;
-    uint64_t max_size;
-    vector<char> bits;
-
-    // soft state
-    Header header; // FIXME: Hold lock to avoid concurrent operations, it will
-                   // also block read operation which not should be permitted.
-    coll_t cid;
-    ghobject_t oid;
-    bool updated;
-    bool deleted;
-
-    StripObjectHeader(): strip_size(default_strip_size), max_size(0), updated(false), deleted(false) {}
-
-    void encode(bufferlist &bl) const {
-      ENCODE_START(1, 1, bl);
-      ::encode(strip_size, bl);
-      ::encode(max_size, bl);
-      ::encode(bits, bl);
-      ENCODE_FINISH(bl);
-    }
-
-    void decode(bufferlist::iterator &bl) {
-      DECODE_START(1, bl);
-      ::decode(strip_size, bl);
-      ::decode(max_size, bl);
-      ::decode(bits, bl);
-      DECODE_FINISH(bl);
-    }
-  };
-  typedef ceph::shared_ptr<StripObjectHeader> StripObjectHeaderRef;
-
-  static int file_to_extents(uint64_t offset, size_t len, uint64_t strip_size,
-                             vector<StripExtent> &extents);
-  int lookup_strip_header(const coll_t & cid, const ghobject_t &oid,
-                          StripObjectHeaderRef *header);
-  int save_strip_header(StripObjectHeaderRef header, KeyValueDB::Transaction t);
-  int create_strip_header(const coll_t &cid, const ghobject_t &oid,
-                          StripObjectHeaderRef *strip_header,
-                          KeyValueDB::Transaction t);
-  void clone_wrap(StripObjectHeaderRef old_header,
-                  const coll_t &cid, const ghobject_t &oid,
-                  KeyValueDB::Transaction t,
-                  StripObjectHeaderRef *target_header);
-  void rename_wrap(StripObjectHeaderRef old_header, const coll_t &cid, const ghobject_t &oid,
-                   KeyValueDB::Transaction t,
-                   StripObjectHeaderRef *new_header);
-  // Already hold header to avoid lock header seq again
-  int get_with_header(
-    const StripObjectHeaderRef header,
-    const string &prefix,
-    map<string, bufferlist> *out
-    );
-
-  int get_values_with_header(
-    const StripObjectHeaderRef header,
-    const string &prefix,
-    const set<string> &keys,
-    map<string, bufferlist> *out
-    );
-  int get_keys_with_header(
-    const StripObjectHeaderRef header,
-    const string &prefix,
-    set<string> *keys
-    );
-
-  Mutex lock;
-  void invalidate_cache(const coll_t &c, const ghobject_t &oid) {
-    Mutex::Locker l(lock);
-    caches.clear(oid);
-  }
-
-  RandomCache<ghobject_t, pair<coll_t, StripObjectHeaderRef> > caches;
-  StripObjectMap(KeyValueDB *db): GenericObjectMap(db),
-                                  lock("StripObjectMap::lock"),
-                                  caches(g_conf->keyvaluestore_header_cache_size)
-  {}
-};
-
-
-class KVSuperblock {
-public:
-  CompatSet compat_features;
-  string backend;
-
-  KVSuperblock() { }
-
-  void encode(bufferlist &bl) const;
-  void decode(bufferlist::iterator &bl);
-  void dump(Formatter *f) const;
-  static void generate_test_instances(list<KVSuperblock*>& o);
-};
-WRITE_CLASS_ENCODER(KVSuperblock)
-
-
-inline ostream& operator<<(ostream& out, const KVSuperblock& sb)
-{
-  return out << "sb(" << sb.compat_features << " " << sb.backend << ")";
-}
-
-
-class KeyValueStore : public ObjectStore,
-                      public md_config_obs_t {
- public:
-  struct KVPerfTracker {
-    PerfCounters::avg_tracker<uint64_t> os_commit_latency;
-    PerfCounters::avg_tracker<uint64_t> os_apply_latency;
-
-    objectstore_perf_stat_t get_cur_stats() const {
-      objectstore_perf_stat_t ret;
-      ret.filestore_commit_latency = os_commit_latency.avg();
-      ret.filestore_apply_latency = os_apply_latency.avg();
-      return ret;
-    }
-
-    void update_from_perfcounters(PerfCounters &logger) {
-      os_commit_latency.consume_next(
-        logger.get_tavg_ms(
-          l_os_commit_lat));
-      os_apply_latency.consume_next(
-        logger.get_tavg_ms(
-          l_os_apply_lat));
-    }
-
-  } perf_tracker;
-
-  objectstore_perf_stat_t get_cur_stats() {
-    perf_tracker.update_from_perfcounters(*perf_logger);
-    return perf_tracker.get_cur_stats();
-  }
-
-  static const uint32_t target_version = 1;
-
- private:
-  string internal_name; // internal name, used to name the perfcounter instance
-  string basedir;
-  std::string current_fn;
-  uuid_d fsid;
-
-  int fsid_fd, current_fd;
-
-  deque<uint64_t> snaps;
-
-  // ObjectMap
-  boost::scoped_ptr<StripObjectMap> backend;
-
-  Finisher ondisk_finisher;
-
-  RWLock collections_lock;
-  set<coll_t> collections;
-
-  Mutex lock;
-
-  int _create_current();
-
-  /// read a uuid from fd
-  int read_fsid(int fd, uuid_d *uuid);
-
-  /// lock fsid_fd
-  int lock_fsid();
-
-  string strip_object_key(uint64_t no) {
-    char n[100];
-    snprintf(n, 100, "%08lld", (long long)no);
-    return string(n);
-  }
-
-  // Each transaction has side effect which may influent the following
-  // operations, we need to make it visible for the following within
-  // transaction by caching middle result.
-  // Side effects contains:
-  // 1. Creating/Deleting collection
-  // 2. Creating/Deleting object
-  // 3. Object modify(including omap, xattr)
-  // 4. Clone or rename
-  struct BufferTransaction {
-    typedef pair<coll_t, ghobject_t> uniq_id;
-
-    struct CollGhobjectPairBitwiseComparator {
-      bool operator()(const uniq_id& l,
-		      const uniq_id& r) const {
-	if (l.first < r.first)
-	  return true;
-	if (l.first != r.first)
-	  return false;
-	if (cmp_bitwise(l.second, r.second) < 0)
-	  return true;
-	return false;
-      }
-    };
-
-    typedef map<uniq_id, StripObjectMap::StripObjectHeaderRef,
-		CollGhobjectPairBitwiseComparator> StripHeaderMap;
-
-    //Dirty records
-    StripHeaderMap strip_headers;
-    map< uniq_id, map<pair<string, string>, bufferlist>,
-	 CollGhobjectPairBitwiseComparator> buffers;  // pair(prefix, key),to buffer updated data in one transaction
-
-    list<Context*> finishes;
-
-    KeyValueStore *store;
-
-    KeyValueDB::Transaction t;
-
-    void set_collections(const set<coll_t>& collections) {
-      bufferlist collections_bl;
-      ::encode(collections, collections_bl);
-      t->set("meta", "collections", collections_bl);
-    }
-
-    int lookup_cached_header(const coll_t &cid, const ghobject_t &oid,
-                             StripObjectMap::StripObjectHeaderRef *strip_header,
-                             bool create_if_missing);
-    int get_buffer_keys(StripObjectMap::StripObjectHeaderRef strip_header,
-                        const string &prefix, const set<string> &keys,
-                        map<string, bufferlist> *out);
-    void set_buffer_keys(StripObjectMap::StripObjectHeaderRef strip_header,
-                         const string &prefix, map<string, bufferlist> &bl);
-    int remove_buffer_keys(StripObjectMap::StripObjectHeaderRef strip_header,
-                           const string &prefix, const set<string> &keys);
-    void clear_buffer_keys(StripObjectMap::StripObjectHeaderRef strip_header,
-                           const string &prefix);
-    int clear_buffer(StripObjectMap::StripObjectHeaderRef strip_header);
-    void clone_buffer(StripObjectMap::StripObjectHeaderRef old_header,
-                      const coll_t &cid, const ghobject_t &oid);
-    void rename_buffer(StripObjectMap::StripObjectHeaderRef old_header,
-                       const coll_t &cid, const ghobject_t &oid);
-    int submit_transaction();
-
-    BufferTransaction(KeyValueStore *store): store(store) {
-      t = store->backend->get_transaction();
-    }
-
-    struct InvalidateCacheContext : public Context {
-      KeyValueStore *store;
-      const coll_t cid;
-      const ghobject_t oid;
-      InvalidateCacheContext(KeyValueStore *s, const coll_t &c, const ghobject_t &oid): store(s), cid(c), oid(oid) {}
-      void finish(int r) {
-      if (r == 0)
-        store->backend->invalidate_cache(cid, oid);
-      }
-    };
-  };
-
-  // -- op workqueue --
-  struct Op {
-    utime_t start;
-    uint64_t op;
-    list<Transaction*> tls;
-    Context *ondisk, *onreadable, *onreadable_sync;
-    uint64_t ops, bytes;
-    TrackedOpRef osd_op;
-  };
-  class OpSequencer : public Sequencer_impl {
-    Mutex qlock; // to protect q, for benefit of flush (peek/dequeue also protected by lock)
-    list<Op*> q;
-    Cond cond;
-    list<pair<uint64_t, Context*> > flush_commit_waiters;
-    uint64_t op; // used by flush() to know the sequence of op
-   public:
-    Sequencer *parent;
-    Mutex apply_lock;  // for apply mutual exclusion
-
-    /// get_max_uncompleted
-    bool _get_max_uncompleted(
-      uint64_t *seq ///< [out] max uncompleted seq
-      ) {
-      assert(qlock.is_locked());
-      assert(seq);
-      *seq = 0;
-      if (q.empty()) {
-	return true;
-      } else {
-	*seq = q.back()->op;
-	return false;
-      }
-    } /// @returns true if the queue is empty
-
-    /// get_min_uncompleted
-    bool _get_min_uncompleted(
-      uint64_t *seq ///< [out] min uncompleted seq
-      ) {
-      assert(qlock.is_locked());
-      assert(seq);
-      *seq = 0;
-      if (q.empty()) {
-	return true;
-      } else {
-	*seq = q.front()->op;
-	return false;
-      }
-    } /// @returns true if both queues are empty
-
-    void _wake_flush_waiters(list<Context*> *to_queue) {
-      uint64_t seq;
-      if (_get_min_uncompleted(&seq))
-	seq = -1;
-
-      for (list<pair<uint64_t, Context*> >::iterator i =
-	     flush_commit_waiters.begin();
-	   i != flush_commit_waiters.end() && i->first < seq;
-	   flush_commit_waiters.erase(i++)) {
-	to_queue->push_back(i->second);
-      }
-    }
-
-    void queue(Op *o) {
-      Mutex::Locker l(qlock);
-      q.push_back(o);
-      op++;
-      o->op = op;
-    }
-    Op *peek_queue() {
-      assert(apply_lock.is_locked());
-      return q.front();
-    }
-
-    Op *dequeue(list<Context*> *to_queue) {
-      assert(to_queue);
-      assert(apply_lock.is_locked());
-      Mutex::Locker l(qlock);
-      Op *o = q.front();
-      q.pop_front();
-      cond.Signal();
-
-      _wake_flush_waiters(to_queue);
-      return o;
-    }
-
-    void flush() {
-      Mutex::Locker l(qlock);
-
-      // get max for journal _or_ op queues
-      uint64_t seq = 0;
-      if (!q.empty())
-        seq = q.back()->op;
-
-      if (seq) {
-        // everything prior to our watermark to drain through either/both
-        // queues
-        while (!q.empty() && q.front()->op <= seq)
-          cond.Wait(qlock);
-      }
-    }
-    bool flush_commit(Context *c) {
-      Mutex::Locker l(qlock);
-      uint64_t seq = 0;
-      if (_get_max_uncompleted(&seq)) {
-	return true;
-      } else {
-	flush_commit_waiters.push_back(make_pair(seq, c));
-	return false;
-      }
-    }
-
-    OpSequencer()
-      : qlock("KeyValueStore::OpSequencer::qlock", false, false),
-        op(0), parent(0),
-	apply_lock("KeyValueStore::OpSequencer::apply_lock", false, false) {}
-    ~OpSequencer() {
-      assert(q.empty());
-    }
-
-    const string& get_name() const {
-      return parent->get_name();
-    }
-  };
-
-  friend ostream& operator<<(ostream& out, const OpSequencer& s);
-
-  deque<OpSequencer*> op_queue;
-  Throttle throttle_ops, throttle_bytes;
-  Finisher op_finisher;
-
-  ThreadPool op_tp;
-  struct OpWQ : public ThreadPool::WorkQueue<OpSequencer> {
-    KeyValueStore *store;
-    OpWQ(KeyValueStore *fs, time_t timeout, time_t suicide_timeout,
-         ThreadPool *tp) :
-      ThreadPool::WorkQueue<OpSequencer>("KeyValueStore::OpWQ",
-                                         timeout, suicide_timeout, tp),
-      store(fs) {}
-
-    bool _enqueue(OpSequencer *osr) {
-      store->op_queue.push_back(osr);
-      return true;
-    }
-    void _dequeue(OpSequencer *o) {
-      assert(0);
-    }
-    bool _empty() {
-      return store->op_queue.empty();
-    }
-    OpSequencer *_dequeue() {
-      if (store->op_queue.empty())
-	return NULL;
-      OpSequencer *osr = store->op_queue.front();
-      store->op_queue.pop_front();
-      return osr;
-    }
-    using ThreadPool::WorkQueue<OpSequencer>::_process;
-    void _process(OpSequencer *osr, ThreadPool::TPHandle &handle) {
-      store->_do_op(osr, handle);
-    }
-    void _process_finish(OpSequencer *osr) {
-      store->_finish_op(osr);
-    }
-    void _clear() {
-      assert(store->op_queue.empty());
-    }
-  } op_wq;
-
-  Op *build_op(list<Transaction*>& tls, Context *ondisk, Context *onreadable,
-               Context *onreadable_sync, TrackedOpRef osd_op);
-  void queue_op(OpSequencer *osr, Op *o);
-  void op_queue_reserve_throttle(Op *o, ThreadPool::TPHandle *handle = NULL);
-  void _do_op(OpSequencer *osr, ThreadPool::TPHandle &handle);
-  void op_queue_release_throttle(Op *o);
-  void _finish_op(OpSequencer *osr);
-
-  PerfCounters *perf_logger;
-
- public:
-
-  KeyValueStore(const std::string &base,
-                const char *internal_name = "keyvaluestore",
-                bool update_to=false);
-  ~KeyValueStore();
-
-  bool test_mount_in_use();
-  int version_stamp_is_valid(uint32_t *version);
-  int update_version_stamp();
-  uint32_t get_target_version() {
-    return target_version;
-  }
-
-  int write_version_stamp();
-  int mount();
-  int umount();
-  unsigned get_max_object_name_length() {
-    return 4096;  // no real limit for leveldb
-  }
-  unsigned get_max_attr_name_length() {
-    return 256;  // arbitrary; there is no real limit internally
-  }
-  int mkfs();
-  int mkjournal() {return 0;}
-  bool wants_journal() {
-    return false;
-  }
-  bool allows_journal() {
-    return false;
-  }
-  bool needs_journal() {
-    return false;
-  }
-
-  void collect_metadata(map<string,string> *pm);
-
-  int statfs(struct statfs *buf);
-
-  int _do_transactions(
-    list<Transaction*> &tls, uint64_t op_seq,
-    ThreadPool::TPHandle *handle);
-  int do_transactions(list<Transaction*> &tls, uint64_t op_seq) {
-    return _do_transactions(tls, op_seq, 0);
-  }
-  void _do_transaction(Transaction& transaction,
-                           BufferTransaction &bt,
-                           ThreadPool::TPHandle *handle);
-
-  int queue_transactions(Sequencer *osr, list<Transaction*>& tls,
-                         TrackedOpRef op = TrackedOpRef(),
-                         ThreadPool::TPHandle *handle = NULL);
-
-
-  // ------------------
-  // objects
-
-  int _generic_read(StripObjectMap::StripObjectHeaderRef header,
-                    uint64_t offset, size_t len, bufferlist& bl,
-                    bool allow_eio = false, BufferTransaction *bt = 0);
-  int _generic_write(StripObjectMap::StripObjectHeaderRef header,
-                     uint64_t offset, size_t len, const bufferlist& bl,
-                     BufferTransaction &t, uint32_t fadvise_flags = 0);
-
-  bool exists(coll_t cid, const ghobject_t& oid);
-  int stat(coll_t cid, const ghobject_t& oid, struct stat *st,
-           bool allow_eio = false);
-  int read(coll_t cid, const ghobject_t& oid, uint64_t offset, size_t len,
-           bufferlist& bl, uint32_t op_flags = 0, bool allow_eio = false);
-  int fiemap(coll_t cid, const ghobject_t& oid, uint64_t offset, size_t len,
-             bufferlist& bl);
-
-  int _touch(coll_t cid, const ghobject_t& oid, BufferTransaction &t);
-  int _write(coll_t cid, const ghobject_t& oid, uint64_t offset, size_t len,
-             const bufferlist& bl, BufferTransaction &t, uint32_t fadvise_flags = 0);
-  int _zero(coll_t cid, const ghobject_t& oid, uint64_t offset, size_t len,
-            BufferTransaction &t);
-  int _truncate(coll_t cid, const ghobject_t& oid, uint64_t size,
-                BufferTransaction &t);
-  int _clone(coll_t cid, const ghobject_t& oldoid, const ghobject_t& newoid,
-             BufferTransaction &t);
-  int _clone_range(coll_t cid, const ghobject_t& oldoid,
-                   const ghobject_t& newoid, uint64_t srcoff,
-                   uint64_t len, uint64_t dstoff, BufferTransaction &t);
-  int _remove(coll_t cid, const ghobject_t& oid, BufferTransaction &t);
-  int _set_alloc_hint(coll_t cid, const ghobject_t& oid,
-                      uint64_t expected_object_size,
-                      uint64_t expected_write_size,
-                      BufferTransaction &t);
-
-  void start_sync() {}
-
-  void set_fsid(uuid_d u) { fsid = u; }
-  uuid_d get_fsid() { return fsid; }
-
-  // attrs
-  int getattr(coll_t cid, const ghobject_t& oid, const char *name,
-              bufferptr &bp);
-  int getattrs(coll_t cid, const ghobject_t& oid, map<string,bufferptr>& aset);
-
-  int _setattrs(coll_t cid, const ghobject_t& oid,
-                map<string, bufferptr>& aset, BufferTransaction &t);
-  int _rmattr(coll_t cid, const ghobject_t& oid, const char *name,
-              BufferTransaction &t);
-  int _rmattrs(coll_t cid, const ghobject_t& oid, BufferTransaction &t);
-
-  // collections
-  int _collection_hint_expected_num_objs(coll_t cid, uint32_t pg_num,
-      uint64_t num_objs) const { return 0; }
-  int _create_collection(coll_t c, BufferTransaction &t);
-  int _destroy_collection(coll_t c, BufferTransaction &t);
-  int _collection_add(coll_t c, coll_t ocid, const ghobject_t& oid,
-                      BufferTransaction &t);
-  int _collection_move_rename(coll_t oldcid, const ghobject_t& oldoid,
-                              coll_t c, const ghobject_t& o,
-                              BufferTransaction &t);
-  int _collection_remove_recursive(const coll_t &cid,
-                                   BufferTransaction &t);
-  int list_collections(vector<coll_t>& ls);
-  bool collection_exists(coll_t c);
-  bool collection_empty(coll_t c);
-  int collection_list(coll_t c, ghobject_t start, ghobject_t end,
-		      bool sort_bitwise, int max,
-		      vector<ghobject_t> *ls, ghobject_t *next);
-  int collection_version_current(coll_t c, uint32_t *version);
-
-  // omap (see ObjectStore.h for documentation)
-  int omap_get(coll_t c, const ghobject_t &oid, bufferlist *header,
-               map<string, bufferlist> *out);
-  int omap_get_header(
-    coll_t c,
-    const ghobject_t &oid,
-    bufferlist *out,
-    bool allow_eio = false);
-  int omap_get_keys(coll_t c, const ghobject_t &oid, set<string> *keys);
-  int omap_get_values(coll_t c, const ghobject_t &oid, const set<string> &keys,
-                      map<string, bufferlist> *out);
-  int omap_check_keys(coll_t c, const ghobject_t &oid, const set<string> &keys,
-                      set<string> *out);
-  ObjectMap::ObjectMapIterator get_omap_iterator(coll_t c,
-                                                 const ghobject_t &oid);
-
-  int check_get_rc(const coll_t cid, const ghobject_t& oid, int r, bool is_equal_size);
-  void dump_start(const std::string &file);
-  void dump_stop();
-  void dump_transactions(list<ObjectStore::Transaction*>& ls, uint64_t seq,
-                         OpSequencer *osr);
-
- private:
-  void _inject_failure() {}
-
-  // omap
-  int _omap_clear(coll_t cid, const ghobject_t &oid,
-                  BufferTransaction &t);
-  int _omap_setkeys(coll_t cid, const ghobject_t &oid,
-                    map<string, bufferlist> &aset,
-                    BufferTransaction &t);
-  int _omap_rmkeys(coll_t cid, const ghobject_t &oid, const set<string> &keys,
-                   BufferTransaction &t);
-  int _omap_rmkeyrange(coll_t cid, const ghobject_t &oid,
-                       const string& first, const string& last,
-                       BufferTransaction &t);
-  int _omap_setheader(coll_t cid, const ghobject_t &oid, const bufferlist &bl,
-                      BufferTransaction &t);
-  int _split_collection(coll_t cid, uint32_t bits, uint32_t rem, coll_t dest,
-                        BufferTransaction &t);
-  int _split_collection_create(coll_t cid, uint32_t bits, uint32_t rem,
-                               coll_t dest, BufferTransaction &t){
-    return 0;
-  }
-
-  virtual const char** get_tracked_conf_keys() const;
-  virtual void handle_conf_change(const struct md_config_t *conf,
-                                  const std::set <std::string> &changed);
-
-  std::string m_osd_rollback_to_cluster_snap;
-  int m_keyvaluestore_queue_max_ops;
-  int m_keyvaluestore_queue_max_bytes;
-  int m_keyvaluestore_strip_size;
-  uint64_t m_keyvaluestore_max_expected_write_size;
-  int do_update;
-  bool m_keyvaluestore_do_dump;
-  std::ofstream m_keyvaluestore_dump;
-  JSONFormatter m_keyvaluestore_dump_fmt;
-
-  static const string OBJECT_STRIP_PREFIX;
-  static const string OBJECT_XATTR;
-  static const string OBJECT_OMAP;
-  static const string OBJECT_OMAP_HEADER;
-  static const string OBJECT_OMAP_HEADER_KEY;
-  static const string COLLECTION;
-  static const string COLLECTION_ATTR;
-  static const uint32_t COLLECTION_VERSION = 1;
-
-  KVSuperblock superblock;
-  /**
-   * write_superblock()
-   *
-   * Write superblock to persisent storage
-   *
-   * return value: 0 on success, otherwise negative errno
-   */
-  int write_superblock();
-
-  /**
-   * read_superblock()
-   *
-   * Fill in KeyValueStore::superblock by reading persistent storage
-   *
-   * return value: 0 on success, otherwise negative errno
-   */
-  int read_superblock();
-};
-
-WRITE_CLASS_ENCODER(StripObjectMap::StripObjectHeader)
-
-#endif
diff --git a/src/os/kstore/KStore.cc b/src/os/kstore/KStore.cc
index c9fbf4a..7de1b16 100644
--- a/src/os/kstore/KStore.cc
+++ b/src/os/kstore/KStore.cc
@@ -25,6 +25,7 @@
 #include "include/stringify.h"
 #include "common/errno.h"
 #include "common/safe_io.h"
+#include "common/Formatter.h"
 
 
 #define dout_subsys ceph_subsys_kstore
@@ -438,7 +439,7 @@ void KStore::OnodeHashLRU::add(const ghobject_t& oid, OnodeRef o)
   dout(30) << __func__ << " " << oid << " " << o << dendl;
   assert(onode_map.count(oid) == 0);
   onode_map[oid] = o;
-  lru.push_back(*o);
+  lru.push_front(*o);
 }
 
 KStore::OnodeRef KStore::OnodeHashLRU::lookup(const ghobject_t& oid)
@@ -543,6 +544,9 @@ int KStore::OnodeHashLRU::trim(int max)
 	   << " size " << onode_map.size() << dendl;
   int trimmed = 0;
   int num = onode_map.size() - max;
+  if (onode_map.size() == 0 || num <= 0)
+    return 0; // don't even try
+
   lru_list_t::iterator p = lru.end();
   if (num)
     --p;
@@ -580,7 +584,7 @@ int KStore::OnodeHashLRU::trim(int max)
 KStore::Collection::Collection(KStore *ns, coll_t c)
   : store(ns),
     cid(c),
-    lock("KStore::Collection::lock"),
+    lock("KStore::Collection::lock", true, false),
     onode_map()
 {
 }
@@ -890,10 +894,15 @@ int KStore::_open_collections(int *errors)
     coll_t cid;
     if (cid.parse(it->key())) {
       CollectionRef c(new Collection(this, cid));
-      bufferlist bl;
-      db->get(PREFIX_COLL, it->key(), &bl);
+      bufferlist bl = it->value();
       bufferlist::iterator p = bl.begin();
-      ::decode(c->cnode, p);
+      try {
+        ::decode(c->cnode, p);
+      } catch (buffer::error& e) {
+        derr << __func__ << " failed to decode cnode, key:"
+             << pretty_binary_string(it->key()) << dendl;
+        return -EIO;
+      } 
       dout(20) << __func__ << " opened " << cid << dendl;
       coll_map[cid] = c;
     } else {
@@ -924,7 +933,7 @@ int KStore::mkfs()
     goto out_close_fsid;
 
   r = _read_fsid(&old_fsid);
-  if (r < 0 && old_fsid.is_zero()) {
+  if (r < 0 || old_fsid.is_zero()) {
     if (fsid.is_zero()) {
       fsid.generate_random();
       dout(1) << __func__ << " generated fsid " << fsid << dendl;
@@ -952,6 +961,10 @@ int KStore::mkfs()
   if (r < 0)
     goto out_close_db;
 
+  r = write_meta("type", "kstore");
+  if (r < 0)
+    goto out_close_db;
+
   // indicate mkfs completion/success by writing the fsid file
   r = _write_fsid();
   if (r == 0)
@@ -1429,13 +1442,12 @@ void KStore::_reap_collections()
   }
 
   dout(10) << __func__ << " all reaped" << dendl;
-  reap_cond.Signal();
 }
 
 // ---------------
 // read operations
 
-bool KStore::exists(coll_t cid, const ghobject_t& oid)
+bool KStore::exists(const coll_t& cid, const ghobject_t& oid)
 {
   dout(10) << __func__ << " " << cid << " " << oid << dendl;
   CollectionRef c = _get_collection(cid);
@@ -1449,7 +1461,7 @@ bool KStore::exists(coll_t cid, const ghobject_t& oid)
 }
 
 int KStore::stat(
-    coll_t cid,
+    const coll_t& cid,
     const ghobject_t& oid,
     struct stat *st,
     bool allow_eio)
@@ -1470,7 +1482,7 @@ int KStore::stat(
 }
 
 int KStore::read(
-  coll_t cid,
+  const coll_t& cid,
   const ghobject_t& oid,
   uint64_t offset,
   size_t length,
@@ -1529,9 +1541,7 @@ int KStore::_do_read(
     length = o->onode.size - offset;
   }
   if (stripe_size == 0) {
-    bufferptr z(length);
-    z.zero();
-    bl.append(z);
+    bl.append_zero(length);
     r = length;
     goto out;
   }
@@ -1559,17 +1569,13 @@ int KStore::_do_read(
 	  dout(30) << __func__ << " taking " << stripe_off << "~" << l << dendl;
 	}
 	if (l < swant) {
-	  bufferptr z(swant - l);
-	  z.zero();
-	  bl.append(z);
-	  dout(30) << __func__ << " adding " << z.length() << " zeros" << dendl;
+	  bl.append_zero(swant - l);
+	  dout(30) << __func__ << " adding " << swant - l << " zeros" << dendl;
 	}
       }
     } else {
       dout(30) << __func__ << " generating " << swant << " zeros" << dendl;
-      bufferptr z(swant);
-      z.zero();
-      bl.append(z);
+      bl.append_zero(swant);
     }
     offset += swant;
     length -= swant;
@@ -1585,7 +1591,7 @@ int KStore::_do_read(
 }
 
 int KStore::fiemap(
-  coll_t cid,
+  const coll_t& cid,
   const ghobject_t& oid,
   uint64_t offset,
   size_t len,
@@ -1602,9 +1608,6 @@ int KStore::fiemap(
     return -ENOENT;
   }
 
-  if (offset == len && offset == 0)
-    len = o->onode.size;
-
   if (offset > o->onode.size)
     goto out;
 
@@ -1626,7 +1629,7 @@ int KStore::fiemap(
 }
 
 int KStore::getattr(
-  coll_t cid,
+  const coll_t& cid,
   const ghobject_t& oid,
   const char *name,
   bufferptr& value)
@@ -1658,7 +1661,7 @@ int KStore::getattr(
 }
 
 int KStore::getattrs(
-  coll_t cid,
+  const coll_t& cid,
   const ghobject_t& oid,
   map<string,bufferptr>& aset)
 {
@@ -1692,13 +1695,13 @@ int KStore::list_collections(vector<coll_t>& ls)
   return 0;
 }
 
-bool KStore::collection_exists(coll_t c)
+bool KStore::collection_exists(const coll_t& c)
 {
   RWLock::RLocker l(coll_lock);
   return coll_map.count(c);
 }
 
-bool KStore::collection_empty(coll_t cid)
+bool KStore::collection_empty(const coll_t& cid)
 {
   dout(15) << __func__ << " " << cid << dendl;
   vector<ghobject_t> ls;
@@ -1713,7 +1716,7 @@ bool KStore::collection_empty(coll_t cid)
 }
 
 int KStore::collection_list(
-  coll_t cid, ghobject_t start, ghobject_t end,
+  const coll_t& cid, ghobject_t start, ghobject_t end,
   bool sort_bitwise, int max,
   vector<ghobject_t> *ls, ghobject_t *pnext)
 {
@@ -1913,7 +1916,7 @@ bufferlist KStore::OmapIteratorImpl::value()
 }
 
 int KStore::omap_get(
-  coll_t cid,                ///< [in] Collection containing oid
+  const coll_t& cid,                ///< [in] Collection containing oid
   const ghobject_t &oid,   ///< [in] Object containing omap
   bufferlist *header,      ///< [out] omap header
   map<string, bufferlist> *out /// < [out] Key to value map
@@ -1963,7 +1966,7 @@ int KStore::omap_get(
 }
 
 int KStore::omap_get_header(
-  coll_t cid,                ///< [in] Collection containing oid
+  const coll_t& cid,                ///< [in] Collection containing oid
   const ghobject_t &oid,   ///< [in] Object containing omap
   bufferlist *header,      ///< [out] omap header
   bool allow_eio ///< [in] don't assert on eio
@@ -1998,7 +2001,7 @@ int KStore::omap_get_header(
 }
 
 int KStore::omap_get_keys(
-  coll_t cid,              ///< [in] Collection containing oid
+  const coll_t& cid,              ///< [in] Collection containing oid
   const ghobject_t &oid, ///< [in] Object containing omap
   set<string> *keys      ///< [out] Keys defined on oid
   )
@@ -2043,7 +2046,7 @@ int KStore::omap_get_keys(
 }
 
 int KStore::omap_get_values(
-  coll_t cid,                    ///< [in] Collection containing oid
+  const coll_t& cid,                    ///< [in] Collection containing oid
   const ghobject_t &oid,       ///< [in] Object containing omap
   const set<string> &keys,     ///< [in] Keys to get
   map<string, bufferlist> *out ///< [out] Returned keys and values
@@ -2079,7 +2082,7 @@ int KStore::omap_get_values(
 }
 
 int KStore::omap_check_keys(
-  coll_t cid,                ///< [in] Collection containing oid
+  const coll_t& cid,                ///< [in] Collection containing oid
   const ghobject_t &oid,   ///< [in] Object containing omap
   const set<string> &keys, ///< [in] Keys to check
   set<string> *out         ///< [out] Subset of keys defined on oid
@@ -2118,7 +2121,7 @@ int KStore::omap_check_keys(
 }
 
 ObjectMap::ObjectMapIterator KStore::get_omap_iterator(
-  coll_t cid,              ///< [in] collection
+  const coll_t& cid,              ///< [in] collection
   const ghobject_t &oid  ///< [in] object
   )
 {
@@ -2131,7 +2134,7 @@ ObjectMap::ObjectMapIterator KStore::get_omap_iterator(
   }
   RWLock::RLocker l(c->lock);
   OnodeRef o = c->get_onode(oid, false);
-  if (!o) {
+  if (!o || !o->exists) {
     dout(10) << __func__ << " " << oid << "doesn't exist" <<dendl;
     return ObjectMap::ObjectMapIterator();
   }
@@ -2152,8 +2155,9 @@ int KStore::_open_super_meta()
     nid_max = 0;
     bufferlist bl;
     db->get(PREFIX_SUPER, "nid_max", &bl);
+    bufferlist::iterator p = bl.begin();
     try {
-      ::decode(nid_max, bl);
+      ::decode(nid_max, p);
     } catch (buffer::error& e) {
     }
     dout(10) << __func__ << " old nid_max " << nid_max << dendl;
@@ -2392,7 +2396,7 @@ void KStore::_kv_sync_thread()
 
 int KStore::queue_transactions(
     Sequencer *posr,
-    list<Transaction*>& tls,
+    vector<Transaction>& tls,
     TrackedOpRef op,
     ThreadPool::TPHandle *handle)
 {
@@ -2422,11 +2426,11 @@ int KStore::queue_transactions(
   txc->onreadable_sync = onreadable_sync;
   txc->oncommit = ondisk;
 
-  for (list<Transaction*>::iterator p = tls.begin(); p != tls.end(); ++p) {
-    (*p)->set_osr(osr);
-    txc->ops += (*p)->get_num_ops();
-    txc->bytes += (*p)->get_num_bytes();
-    _txc_add_transaction(txc, *p);
+  for (vector<Transaction>::iterator p = tls.begin(); p != tls.end(); ++p) {
+    (*p).set_osr(osr);
+    txc->ops += (*p).get_num_ops();
+    txc->bytes += (*p).get_num_bytes();
+    _txc_add_transaction(txc, &(*p));
   }
 
   r = _txc_finalize(osr, txc);
@@ -2609,11 +2613,11 @@ int KStore::_txc_add_transaction(TransContext *txc, Transaction *t)
       break;
 
     case Transaction::OP_COLL_ADD:
-      assert(0 == "not implmeented");
+      assert(0 == "not implemented");
       break;
 
     case Transaction::OP_COLL_REMOVE:
-      assert(0 == "not implmeented");
+      assert(0 == "not implemented");
       break;
 
     case Transaction::OP_COLL_MOVE:
@@ -2638,7 +2642,7 @@ int KStore::_txc_add_transaction(TransContext *txc, Transaction *t)
       break;
 
     case Transaction::OP_COLL_RENAME:
-      assert(0 == "not implmeneted");
+      assert(0 == "not implemented");
       break;
 
     case Transaction::OP_OMAP_CLEAR:
@@ -2793,66 +2797,6 @@ void KStore::_dump_onode(OnodeRef o)
   }
 }
 
-void KStore::_pad_zeros(
-  OnodeRef o,
-  bufferlist *bl, uint64_t *offset, uint64_t *length,
-  uint64_t block_size)
-{
-  dout(40) << "before:\n";
-  bl->hexdump(*_dout);
-  *_dout << dendl;
-  // front
-  size_t front_pad = *offset % block_size;
-  size_t back_pad = 0;
-  if (front_pad) {
-    size_t front_copy = MIN(block_size - front_pad, *length);
-    bufferptr z = buffer::create_page_aligned(block_size);
-    memset(z.c_str(), 0, front_pad);
-    memcpy(z.c_str() + front_pad, bl->get_contiguous(0, front_copy), front_copy);
-    if (front_copy + front_pad < block_size) {
-      back_pad = block_size - (*length + front_pad);
-      memset(z.c_str() + front_pad + *length, 0, back_pad);
-    }
-    bufferlist old, t;
-    old.swap(*bl);
-    t.substr_of(old, front_copy, *length - front_copy);
-    bl->append(z);
-    bl->claim_append(t);
-    *offset -= front_pad;
-    *length += front_pad + back_pad;
-  }
-
-  // back
-  uint64_t end = *offset + *length;
-  unsigned back_copy = end % block_size;
-  if (back_copy) {
-    assert(back_pad == 0);
-    back_pad = block_size - back_copy;
-    assert(back_copy <= *length);
-    bufferptr tail(block_size);
-    memcpy(tail.c_str(), bl->get_contiguous(*length - back_copy, back_copy),
-	   back_copy);
-    memset(tail.c_str() + back_copy, 0, back_pad);
-    bufferlist old;
-    old.swap(*bl);
-    bl->substr_of(old, 0, *length - back_copy);
-    bl->append(tail);
-    *length += back_pad;
-    if (end > o->onode.size && g_conf->kstore_cache_tails) {
-      o->tail_bl.clear();
-      o->tail_bl.append(tail, 0, back_copy);
-      o->tail_offset = end - back_copy;
-      dout(20) << __func__ << " cached "<< back_copy << " of tail block at "
-	       << o->tail_offset << dendl;
-    }
-  }
-  dout(20) << __func__ << " pad " << front_pad << " + " << back_pad
-	   << " on front/back, now " << *offset << "~" << *length << dendl;
-  dout(40) << "after:\n";
-  bl->hexdump(*_dout);
-  *_dout << dendl;
-}
-
 void KStore::_do_read_stripe(OnodeRef o, uint64_t offset, bufferlist *pbl)
 {
   map<uint64_t,bufferlist>::iterator p = o->pending_stripes.find(offset);
@@ -2935,10 +2879,8 @@ int KStore::_do_write(TransContext *txc,
 	bl.substr_of(prev, 0, p);
       }
       if (p < offset_rem) {
-	bufferptr z(offset_rem - p);
-	dout(20) << __func__ << " add leading " << z.length() << " zeros" << dendl;
-	z.zero();
-	bl.append(z);
+	dout(20) << __func__ << " add leading " << offset_rem - p << " zeros" << dendl;
+	bl.append_zero(offset_rem - p);
       }
     }
     unsigned use = stripe_size - offset_rem;
@@ -3029,10 +2971,9 @@ int KStore::_zero(TransContext *txc,
 	  dout(20) << __func__ << " truncated stripe " << pos - stripe_off
 		   << " to " << bl.length() << dendl;
 	} else {
-	  bufferptr z(end - (pos - stripe_off + bl.length()));
-	  z.zero();
-	  bl.append(z);
-	  dout(20) << __func__ << " adding " << z.length() << " of zeros" << dendl;
+          auto len = end - (pos - stripe_off + bl.length());
+	  bl.append_zero(len);
+	  dout(20) << __func__ << " adding " << len << " of zeros" << dendl;
 	  if (stripe.length() > bl.length()) {
 	    unsigned l = stripe.length() - bl.length();
 	    bufferlist t;
@@ -3068,6 +3009,8 @@ int KStore::_do_truncate(TransContext *txc, OnodeRef o, uint64_t offset)
 {
   uint64_t stripe_size = o->onode.stripe_size;
 
+  o->flush();
+
   // trim down stripes
   if (stripe_size) {
     uint64_t pos = offset;
@@ -3514,6 +3457,8 @@ int KStore::_clone(TransContext *txc,
   newo->exists = true;
   _assign_nid(txc, newo);
 
+  oldo->flush();
+
   r = _do_read(oldo, 0, oldo->onode.size, bl, 0);
   if (r < 0)
     goto out;
@@ -3590,6 +3535,7 @@ int KStore::_clone_range(TransContext *txc,
   newo = c->get_onode(new_oid, true);
   assert(newo);
   newo->exists = true;
+  _assign_nid(txc, newo);
 
   r = _do_read(oldo, srcoff, length, bl, 0);
   if (r < 0)
@@ -3627,13 +3573,13 @@ int KStore::_rename(TransContext *txc,
     r = -ENOENT;
     goto out;
   }
-  newo = c->get_onode(new_oid, true);
-  assert(newo);
 
-  if (newo->exists) {
+  newo = c->get_onode(new_oid, false);
+  if (newo && newo->exists) {
+    // destination object already exists, remove it first
     r = _do_remove(txc, newo);
     if (r < 0)
-      return r;
+      goto out;
   }
 
   txc->t->rmkey(PREFIX_OBJ, oldo->key);
diff --git a/src/os/kstore/KStore.h b/src/os/kstore/KStore.h
index d8cbdc2..0710436 100644
--- a/src/os/kstore/KStore.h
+++ b/src/os/kstore/KStore.h
@@ -201,7 +201,7 @@ public:
 
     CollectionRef first_collection;  ///< first referenced collection
 
-    TransContext(OpSequencer *o)
+    explicit TransContext(OpSequencer *o)
       : state(STATE_PREPARE),
 	osr(o),
 	ops(0),
@@ -272,7 +272,7 @@ public:
 
   struct KVSyncThread : public Thread {
     KStore *store;
-    KVSyncThread(KStore *s) : store(s) {}
+    explicit KVSyncThread(KStore *s) : store(s) {}
     void *entry() {
       store->_kv_sync_thread();
       return NULL;
@@ -309,7 +309,6 @@ private:
   Logger *logger;
 
   Mutex reap_lock;
-  Cond reap_cond;
   list<CollectionRef> removed_collections;
 
 
@@ -371,6 +370,10 @@ public:
   KStore(CephContext *cct, const string& path);
   ~KStore();
 
+  string get_type() {
+    return "kstore";
+  }
+
   bool needs_journal() { return false; };
   bool wants_journal() { return false; };
   bool allows_journal() { return false; };
@@ -399,14 +402,17 @@ public:
 
   int statfs(struct statfs *buf);
 
-  bool exists(coll_t cid, const ghobject_t& oid);
+  using ObjectStore::exists;
+  bool exists(const coll_t& cid, const ghobject_t& oid);
+  using ObjectStore::stat;
   int stat(
-    coll_t cid,
+    const coll_t& cid,
     const ghobject_t& oid,
     struct stat *st,
     bool allow_eio = false); // struct stat?
+  using ObjectStore::read;
   int read(
-    coll_t cid,
+    const coll_t& cid,
     const ghobject_t& oid,
     uint64_t offset,
     size_t len,
@@ -420,58 +426,68 @@ public:
     bufferlist& bl,
     uint32_t op_flags = 0);
 
-  int fiemap(coll_t cid, const ghobject_t& oid, uint64_t offset, size_t len, bufferlist& bl);
-  int getattr(coll_t cid, const ghobject_t& oid, const char *name, bufferptr& value);
-  int getattrs(coll_t cid, const ghobject_t& oid, map<string,bufferptr>& aset);
+  using ObjectStore::fiemap;
+  int fiemap(const coll_t& cid, const ghobject_t& oid, uint64_t offset, size_t len, bufferlist& bl);
+  using ObjectStore::getattr;
+  int getattr(const coll_t& cid, const ghobject_t& oid, const char *name, bufferptr& value);
+  using ObjectStore::getattrs;
+  int getattrs(const coll_t& cid, const ghobject_t& oid, map<string,bufferptr>& aset);
 
   int list_collections(vector<coll_t>& ls);
-  bool collection_exists(coll_t c);
-  bool collection_empty(coll_t c);
+  bool collection_exists(const coll_t& c);
+  bool collection_empty(const coll_t& c);
 
-  int collection_list(coll_t cid, ghobject_t start, ghobject_t end,
+  using ObjectStore::collection_list;
+  int collection_list(const coll_t& cid, ghobject_t start, ghobject_t end,
 		      bool sort_bitwise, int max,
 		      vector<ghobject_t> *ls, ghobject_t *next);
 
+  using ObjectStore::omap_get;
   int omap_get(
-    coll_t cid,                ///< [in] Collection containing oid
+    const coll_t& cid,                ///< [in] Collection containing oid
     const ghobject_t &oid,   ///< [in] Object containing omap
     bufferlist *header,      ///< [out] omap header
     map<string, bufferlist> *out /// < [out] Key to value map
     );
 
+  using ObjectStore::omap_get_header;
   /// Get omap header
   int omap_get_header(
-    coll_t cid,                ///< [in] Collection containing oid
+    const coll_t& cid,                ///< [in] Collection containing oid
     const ghobject_t &oid,   ///< [in] Object containing omap
     bufferlist *header,      ///< [out] omap header
     bool allow_eio = false ///< [in] don't assert on eio
     );
 
+  using ObjectStore::omap_get_keys;
   /// Get keys defined on oid
   int omap_get_keys(
-    coll_t cid,              ///< [in] Collection containing oid
+    const coll_t& cid,              ///< [in] Collection containing oid
     const ghobject_t &oid, ///< [in] Object containing omap
     set<string> *keys      ///< [out] Keys defined on oid
     );
 
+  using ObjectStore::omap_get_values;
   /// Get key values
   int omap_get_values(
-    coll_t cid,                    ///< [in] Collection containing oid
+    const coll_t& cid,                    ///< [in] Collection containing oid
     const ghobject_t &oid,       ///< [in] Object containing omap
     const set<string> &keys,     ///< [in] Keys to get
     map<string, bufferlist> *out ///< [out] Returned keys and values
     );
 
+  using ObjectStore::omap_check_keys;
   /// Filters keys into out which are defined on oid
   int omap_check_keys(
-    coll_t cid,                ///< [in] Collection containing oid
+    const coll_t& cid,                ///< [in] Collection containing oid
     const ghobject_t &oid,   ///< [in] Object containing omap
     const set<string> &keys, ///< [in] Keys to check
     set<string> *out         ///< [out] Subset of keys defined on oid
     );
 
+  using ObjectStore::get_omap_iterator;
   ObjectMap::ObjectMapIterator get_omap_iterator(
-    coll_t cid,              ///< [in] collection
+    const coll_t& cid,              ///< [in] collection
     const ghobject_t &oid  ///< [in] object
     );
 
@@ -488,7 +504,7 @@ public:
 
   int queue_transactions(
     Sequencer *osr,
-    list<Transaction*>& tls,
+    vector<Transaction>& tls,
     TrackedOpRef op = TrackedOpRef(),
     ThreadPool::TPHandle *handle = NULL);
 
@@ -506,8 +522,6 @@ private:
 	     uint64_t offset, size_t len,
 	     bufferlist& bl,
 	     uint32_t fadvise_flags);
-  void _pad_zeros(OnodeRef o, bufferlist *bl, uint64_t *offset, uint64_t *length,
-		  uint64_t block_size);
   int _do_write(TransContext *txc,
 		OnodeRef o,
 		uint64_t offset, uint64_t length,
diff --git a/src/os/kstore/kstore_types.h b/src/os/kstore/kstore_types.h
index 02c75cb..8a88074 100644
--- a/src/os/kstore/kstore_types.h
+++ b/src/os/kstore/kstore_types.h
@@ -28,7 +28,7 @@ namespace ceph {
 struct kstore_cnode_t {
   uint32_t bits;   ///< how many bits of coll pgid are significant
 
-  kstore_cnode_t(int b=0) : bits(b) {}
+  explicit kstore_cnode_t(int b=0) : bits(b) {}
 
   void encode(bufferlist& bl) const;
   void decode(bufferlist::iterator& p);
diff --git a/src/os/memstore/MemStore.cc b/src/os/memstore/MemStore.cc
index 06919d0..68972dd 100644
--- a/src/os/memstore/MemStore.cc
+++ b/src/os/memstore/MemStore.cc
@@ -52,6 +52,7 @@ int MemStore::mount()
 
 int MemStore::umount()
 {
+  finisher.wait_for_empty();
   finisher.stop();
   return _save();
 }
@@ -59,7 +60,6 @@ int MemStore::umount()
 int MemStore::_save()
 {
   dout(10) << __func__ << dendl;
-  Mutex::Locker l(apply_lock); // block any writer
   dump_all();
   set<coll_t> collections;
   for (ceph::unordered_map<coll_t,CollectionRef>::iterator p = coll_map.begin();
@@ -157,7 +157,7 @@ int MemStore::_load()
     int r = cbl.read_file(fn.c_str(), &err);
     if (r < 0)
       return r;
-    CollectionRef c(new Collection(cct));
+    CollectionRef c(new Collection(cct, *q));
     bufferlist::iterator p = cbl.begin();
     c->decode(p);
     coll_map[*q] = c;
@@ -223,7 +223,7 @@ int MemStore::mkfs()
 int MemStore::statfs(struct statfs *st)
 {
   dout(10) << __func__ << dendl;
-  st->f_bsize = 1024;
+  st->f_bsize = 4096;
 
   // Device size is a configured constant
   st->f_blocks = g_conf->memstore_device_bytes / st->f_bsize;
@@ -240,7 +240,7 @@ objectstore_perf_stat_t MemStore::get_cur_stats()
   return objectstore_perf_stat_t();
 }
 
-MemStore::CollectionRef MemStore::get_collection(coll_t cid)
+MemStore::CollectionRef MemStore::get_collection(const coll_t& cid)
 {
   RWLock::RLocker l(coll_lock);
   ceph::unordered_map<coll_t,CollectionRef>::iterator cp = coll_map.find(cid);
@@ -253,12 +253,20 @@ MemStore::CollectionRef MemStore::get_collection(coll_t cid)
 // ---------------
 // read operations
 
-bool MemStore::exists(coll_t cid, const ghobject_t& oid)
+bool MemStore::exists(const coll_t& cid, const ghobject_t& oid)
 {
-  dout(10) << __func__ << " " << cid << " " << oid << dendl;
-  CollectionRef c = get_collection(cid);
+  CollectionHandle c = get_collection(cid);
   if (!c)
     return false;
+  return exists(c, oid);
+}
+
+bool MemStore::exists(CollectionHandle &c_, const ghobject_t& oid)
+{
+  Collection *c = static_cast<Collection*>(c_.get());
+  dout(10) << __func__ << " " << c->get_cid() << " " << oid << dendl;
+  if (!c->exists)
+    return false;
 
   // Perform equivalent of c->get_object_(oid) != NULL. In C++11 the
   // shared_ptr needs to be compared to nullptr.
@@ -266,16 +274,27 @@ bool MemStore::exists(coll_t cid, const ghobject_t& oid)
 }
 
 int MemStore::stat(
-    coll_t cid,
+    const coll_t& cid,
     const ghobject_t& oid,
     struct stat *st,
     bool allow_eio)
 {
-  dout(10) << __func__ << " " << cid << " " << oid << dendl;
-  CollectionRef c = get_collection(cid);
+  CollectionHandle c = get_collection(cid);
   if (!c)
     return -ENOENT;
+  return stat(c, oid, st, allow_eio);
+}
 
+int MemStore::stat(
+  CollectionHandle &c_,
+  const ghobject_t& oid,
+  struct stat *st,
+  bool allow_eio)
+{
+  Collection *c = static_cast<Collection*>(c_.get());
+  dout(10) << __func__ << " " << c->cid << " " << oid << dendl;
+  if (!c->exists)
+    return -ENOENT;
   ObjectRef o = c->get_object(oid);
   if (!o)
     return -ENOENT;
@@ -287,7 +306,7 @@ int MemStore::stat(
 }
 
 int MemStore::read(
-    coll_t cid,
+    const coll_t& cid,
     const ghobject_t& oid,
     uint64_t offset,
     size_t len,
@@ -295,12 +314,26 @@ int MemStore::read(
     uint32_t op_flags,
     bool allow_eio)
 {
-  dout(10) << __func__ << " " << cid << " " << oid << " "
-	   << offset << "~" << len << dendl;
-  CollectionRef c = get_collection(cid);
+  CollectionHandle c = get_collection(cid);
   if (!c)
     return -ENOENT;
+  return read(c, oid, offset, len, bl, op_flags, allow_eio);
+}
 
+int MemStore::read(
+  CollectionHandle &c_,
+  const ghobject_t& oid,
+  uint64_t offset,
+  size_t len,
+  bufferlist& bl,
+  uint32_t op_flags,
+  bool allow_eio)
+{
+  Collection *c = static_cast<Collection*>(c_.get());
+  dout(10) << __func__ << " " << c->cid << " " << oid << " "
+	   << offset << "~" << len << dendl;
+  if (!c->exists)
+    return -ENOENT;
   ObjectRef o = c->get_object(oid);
   if (!o)
     return -ENOENT;
@@ -315,7 +348,7 @@ int MemStore::read(
   return o->read(offset, l, bl);
 }
 
-int MemStore::fiemap(coll_t cid, const ghobject_t& oid,
+int MemStore::fiemap(const coll_t& cid, const ghobject_t& oid,
 		     uint64_t offset, size_t len, bufferlist& bl)
 {
   dout(10) << __func__ << " " << cid << " " << oid << " " << offset << "~"
@@ -329,26 +362,32 @@ int MemStore::fiemap(coll_t cid, const ghobject_t& oid,
     return -ENOENT;
   map<uint64_t, uint64_t> m;
   size_t l = len;
-  if (offset == 0 && len == 0)
-    l = o->get_size();
   if (offset + l > o->get_size())
     l = o->get_size() - offset;
   if (offset >= o->get_size())
     goto out;
- out:
   m[offset] = l;
+ out:
   ::encode(m, bl);
   return 0;
 }
 
-int MemStore::getattr(coll_t cid, const ghobject_t& oid,
+int MemStore::getattr(const coll_t& cid, const ghobject_t& oid,
 		      const char *name, bufferptr& value)
 {
-  dout(10) << __func__ << " " << cid << " " << oid << " " << name << dendl;
-  CollectionRef c = get_collection(cid);
+  CollectionHandle c = get_collection(cid);
   if (!c)
     return -ENOENT;
+  return getattr(c, oid, name, value);
+}
 
+int MemStore::getattr(CollectionHandle &c_, const ghobject_t& oid,
+		      const char *name, bufferptr& value)
+{
+  Collection *c = static_cast<Collection*>(c_.get());
+  dout(10) << __func__ << " " << c->cid << " " << oid << " " << name << dendl;
+  if (!c->exists)
+    return -ENOENT;
   ObjectRef o = c->get_object(oid);
   if (!o)
     return -ENOENT;
@@ -361,13 +400,22 @@ int MemStore::getattr(coll_t cid, const ghobject_t& oid,
   return 0;
 }
 
-int MemStore::getattrs(coll_t cid, const ghobject_t& oid,
+int MemStore::getattrs(const coll_t& cid, const ghobject_t& oid,
 		       map<string,bufferptr>& aset)
 {
-  dout(10) << __func__ << " " << cid << " " << oid << dendl;
-  CollectionRef c = get_collection(cid);
+  CollectionHandle c = get_collection(cid);
   if (!c)
     return -ENOENT;
+  return getattrs(c, oid, aset);
+}
+
+int MemStore::getattrs(CollectionHandle &c_, const ghobject_t& oid,
+		       map<string,bufferptr>& aset)
+{
+  Collection *c = static_cast<Collection*>(c_.get());
+  dout(10) << __func__ << " " << c->cid << " " << oid << dendl;
+  if (!c->exists)
+    return -ENOENT;
 
   ObjectRef o = c->get_object(oid);
   if (!o)
@@ -389,14 +437,14 @@ int MemStore::list_collections(vector<coll_t>& ls)
   return 0;
 }
 
-bool MemStore::collection_exists(coll_t cid)
+bool MemStore::collection_exists(const coll_t& cid)
 {
   dout(10) << __func__ << " " << cid << dendl;
   RWLock::RLocker l(coll_lock);
   return coll_map.count(cid);
 }
 
-bool MemStore::collection_empty(coll_t cid)
+bool MemStore::collection_empty(const coll_t& cid)
 {
   dout(10) << __func__ << " " << cid << dendl;
   CollectionRef c = get_collection(cid);
@@ -407,7 +455,7 @@ bool MemStore::collection_empty(coll_t cid)
   return c->object_map.empty();
 }
 
-int MemStore::collection_list(coll_t cid, ghobject_t start, ghobject_t end,
+int MemStore::collection_list(const coll_t& cid, ghobject_t start, ghobject_t end,
 			      bool sort_bitwise, int max,
 			      vector<ghobject_t> *ls, ghobject_t *next)
 {
@@ -418,6 +466,8 @@ int MemStore::collection_list(coll_t cid, ghobject_t start, ghobject_t end,
     return -ENOENT;
   RWLock::RLocker l(c->lock);
 
+  dout(10) << __func__ << " cid " << cid << " start " << start
+	   << " end " << end << dendl;
   map<ghobject_t,ObjectRef,ghobject_t::BitwiseComparator>::iterator p = c->object_map.lower_bound(start);
   while (p != c->object_map.end() &&
 	 ls->size() < (unsigned)max &&
@@ -431,11 +481,12 @@ int MemStore::collection_list(coll_t cid, ghobject_t start, ghobject_t end,
     else
       *next = p->first;
   }
+  dout(10) << __func__ << " cid " << cid << " got " << ls->size() << dendl;
   return 0;
 }
 
 int MemStore::omap_get(
-    coll_t cid,                ///< [in] Collection containing oid
+    const coll_t& cid,                ///< [in] Collection containing oid
     const ghobject_t &oid,   ///< [in] Object containing omap
     bufferlist *header,      ///< [out] omap header
     map<string, bufferlist> *out /// < [out] Key to value map
@@ -456,7 +507,7 @@ int MemStore::omap_get(
 }
 
 int MemStore::omap_get_header(
-    coll_t cid,                ///< [in] Collection containing oid
+    const coll_t& cid,                ///< [in] Collection containing oid
     const ghobject_t &oid,   ///< [in] Object containing omap
     bufferlist *header,      ///< [out] omap header
     bool allow_eio ///< [in] don't assert on eio
@@ -476,7 +527,7 @@ int MemStore::omap_get_header(
 }
 
 int MemStore::omap_get_keys(
-    coll_t cid,              ///< [in] Collection containing oid
+    const coll_t& cid,              ///< [in] Collection containing oid
     const ghobject_t &oid, ///< [in] Object containing omap
     set<string> *keys      ///< [out] Keys defined on oid
     )
@@ -498,7 +549,7 @@ int MemStore::omap_get_keys(
 }
 
 int MemStore::omap_get_values(
-    coll_t cid,                    ///< [in] Collection containing oid
+    const coll_t& cid,                    ///< [in] Collection containing oid
     const ghobject_t &oid,       ///< [in] Object containing omap
     const set<string> &keys,     ///< [in] Keys to get
     map<string, bufferlist> *out ///< [out] Returned keys and values
@@ -524,7 +575,7 @@ int MemStore::omap_get_values(
 }
 
 int MemStore::omap_check_keys(
-    coll_t cid,                ///< [in] Collection containing oid
+    const coll_t& cid,                ///< [in] Collection containing oid
     const ghobject_t &oid,   ///< [in] Object containing omap
     const set<string> &keys, ///< [in] Keys to check
     set<string> *out         ///< [out] Subset of keys defined on oid
@@ -549,7 +600,7 @@ int MemStore::omap_check_keys(
   return 0;
 }
 
-ObjectMap::ObjectMapIterator MemStore::get_omap_iterator(coll_t cid,
+ObjectMap::ObjectMapIterator MemStore::get_omap_iterator(const coll_t& cid,
 							 const ghobject_t& oid)
 {
   dout(10) << __func__ << " " << cid << " " << oid << dendl;
@@ -568,7 +619,7 @@ ObjectMap::ObjectMapIterator MemStore::get_omap_iterator(coll_t cid,
 // write operations
 
 int MemStore::queue_transactions(Sequencer *osr,
-				 list<Transaction*>& tls,
+				 vector<Transaction>& tls,
 				 TrackedOpRef op,
 				 ThreadPool::TPHandle *handle)
 {
@@ -589,12 +640,12 @@ int MemStore::queue_transactions(Sequencer *osr,
     lock = std::unique_lock<std::mutex>((*seq)->mutex);
   }
 
-  for (list<Transaction*>::iterator p = tls.begin(); p != tls.end(); ++p) {
+  for (vector<Transaction>::iterator p = tls.begin(); p != tls.end(); ++p) {
     // poke the TPHandle heartbeat just to exercise that code path
     if (handle)
       handle->reset_tp_timeout();
 
-    _do_transaction(**p);
+    _do_transaction(*p);
   }
 
   Context *on_apply = NULL, *on_apply_sync = NULL, *on_commit = NULL;
@@ -946,7 +997,7 @@ void MemStore::_do_transaction(Transaction& t)
   }
 }
 
-int MemStore::_touch(coll_t cid, const ghobject_t& oid)
+int MemStore::_touch(const coll_t& cid, const ghobject_t& oid)
 {
   dout(10) << __func__ << " " << cid << " " << oid << dendl;
   CollectionRef c = get_collection(cid);
@@ -957,7 +1008,7 @@ int MemStore::_touch(coll_t cid, const ghobject_t& oid)
   return 0;
 }
 
-int MemStore::_write(coll_t cid, const ghobject_t& oid,
+int MemStore::_write(const coll_t& cid, const ghobject_t& oid,
 		     uint64_t offset, size_t len, const bufferlist& bl,
 		     uint32_t fadvise_flags)
 {
@@ -977,19 +1028,17 @@ int MemStore::_write(coll_t cid, const ghobject_t& oid,
   return 0;
 }
 
-int MemStore::_zero(coll_t cid, const ghobject_t& oid,
+int MemStore::_zero(const coll_t& cid, const ghobject_t& oid,
 		    uint64_t offset, size_t len)
 {
   dout(10) << __func__ << " " << cid << " " << oid << " " << offset << "~"
 	   << len << dendl;
-  bufferptr bp(len);
-  bp.zero();
   bufferlist bl;
-  bl.push_back(bp);
+  bl.append_zero(len);
   return _write(cid, oid, offset, len, bl);
 }
 
-int MemStore::_truncate(coll_t cid, const ghobject_t& oid, uint64_t size)
+int MemStore::_truncate(const coll_t& cid, const ghobject_t& oid, uint64_t size)
 {
   dout(10) << __func__ << " " << cid << " " << oid << " " << size << dendl;
   CollectionRef c = get_collection(cid);
@@ -1005,7 +1054,7 @@ int MemStore::_truncate(coll_t cid, const ghobject_t& oid, uint64_t size)
   return r;
 }
 
-int MemStore::_remove(coll_t cid, const ghobject_t& oid)
+int MemStore::_remove(const coll_t& cid, const ghobject_t& oid)
 {
   dout(10) << __func__ << " " << cid << " " << oid << dendl;
   CollectionRef c = get_collection(cid);
@@ -1023,7 +1072,7 @@ int MemStore::_remove(coll_t cid, const ghobject_t& oid)
   return 0;
 }
 
-int MemStore::_setattrs(coll_t cid, const ghobject_t& oid,
+int MemStore::_setattrs(const coll_t& cid, const ghobject_t& oid,
 			map<string,bufferptr>& aset)
 {
   dout(10) << __func__ << " " << cid << " " << oid << dendl;
@@ -1040,7 +1089,7 @@ int MemStore::_setattrs(coll_t cid, const ghobject_t& oid,
   return 0;
 }
 
-int MemStore::_rmattr(coll_t cid, const ghobject_t& oid, const char *name)
+int MemStore::_rmattr(const coll_t& cid, const ghobject_t& oid, const char *name)
 {
   dout(10) << __func__ << " " << cid << " " << oid << " " << name << dendl;
   CollectionRef c = get_collection(cid);
@@ -1058,7 +1107,7 @@ int MemStore::_rmattr(coll_t cid, const ghobject_t& oid, const char *name)
   return 0;
 }
 
-int MemStore::_rmattrs(coll_t cid, const ghobject_t& oid)
+int MemStore::_rmattrs(const coll_t& cid, const ghobject_t& oid)
 {
   dout(10) << __func__ << " " << cid << " " << oid << dendl;
   CollectionRef c = get_collection(cid);
@@ -1073,7 +1122,7 @@ int MemStore::_rmattrs(coll_t cid, const ghobject_t& oid)
   return 0;
 }
 
-int MemStore::_clone(coll_t cid, const ghobject_t& oldoid,
+int MemStore::_clone(const coll_t& cid, const ghobject_t& oldoid,
 		     const ghobject_t& newoid)
 {
   dout(10) << __func__ << " " << cid << " " << oldoid
@@ -1103,7 +1152,7 @@ int MemStore::_clone(coll_t cid, const ghobject_t& oldoid,
   return 0;
 }
 
-int MemStore::_clone_range(coll_t cid, const ghobject_t& oldoid,
+int MemStore::_clone_range(const coll_t& cid, const ghobject_t& oldoid,
 			   const ghobject_t& newoid,
 			   uint64_t srcoff, uint64_t len, uint64_t dstoff)
 {
@@ -1131,7 +1180,7 @@ int MemStore::_clone_range(coll_t cid, const ghobject_t& oldoid,
   return len;
 }
 
-int MemStore::_omap_clear(coll_t cid, const ghobject_t &oid)
+int MemStore::_omap_clear(const coll_t& cid, const ghobject_t &oid)
 {
   dout(10) << __func__ << " " << cid << " " << oid << dendl;
   CollectionRef c = get_collection(cid);
@@ -1147,7 +1196,7 @@ int MemStore::_omap_clear(coll_t cid, const ghobject_t &oid)
   return 0;
 }
 
-int MemStore::_omap_setkeys(coll_t cid, const ghobject_t &oid,
+int MemStore::_omap_setkeys(const coll_t& cid, const ghobject_t &oid,
 			    bufferlist& aset_bl)
 {
   dout(10) << __func__ << " " << cid << " " << oid << dendl;
@@ -1170,7 +1219,7 @@ int MemStore::_omap_setkeys(coll_t cid, const ghobject_t &oid,
   return 0;
 }
 
-int MemStore::_omap_rmkeys(coll_t cid, const ghobject_t &oid,
+int MemStore::_omap_rmkeys(const coll_t& cid, const ghobject_t &oid,
 			   bufferlist& keys_bl)
 {
   dout(10) << __func__ << " " << cid << " " << oid << dendl;
@@ -1193,7 +1242,7 @@ int MemStore::_omap_rmkeys(coll_t cid, const ghobject_t &oid,
   return 0;
 }
 
-int MemStore::_omap_rmkeyrange(coll_t cid, const ghobject_t &oid,
+int MemStore::_omap_rmkeyrange(const coll_t& cid, const ghobject_t &oid,
 			       const string& first, const string& last)
 {
   dout(10) << __func__ << " " << cid << " " << oid << " " << first
@@ -1212,7 +1261,7 @@ int MemStore::_omap_rmkeyrange(coll_t cid, const ghobject_t &oid,
   return 0;
 }
 
-int MemStore::_omap_setheader(coll_t cid, const ghobject_t &oid,
+int MemStore::_omap_setheader(const coll_t& cid, const ghobject_t &oid,
 			      const bufferlist &bl)
 {
   dout(10) << __func__ << " " << cid << " " << oid << dendl;
@@ -1228,18 +1277,18 @@ int MemStore::_omap_setheader(coll_t cid, const ghobject_t &oid,
   return 0;
 }
 
-int MemStore::_create_collection(coll_t cid)
+int MemStore::_create_collection(const coll_t& cid)
 {
   dout(10) << __func__ << " " << cid << dendl;
   RWLock::WLocker l(coll_lock);
   auto result = coll_map.insert(std::make_pair(cid, CollectionRef()));
   if (!result.second)
     return -EEXIST;
-  result.first->second.reset(new Collection(cct));
+  result.first->second.reset(new Collection(cct, cid));
   return 0;
 }
 
-int MemStore::_destroy_collection(coll_t cid)
+int MemStore::_destroy_collection(const coll_t& cid)
 {
   dout(10) << __func__ << " " << cid << dendl;
   RWLock::WLocker l(coll_lock);
@@ -1250,13 +1299,14 @@ int MemStore::_destroy_collection(coll_t cid)
     RWLock::RLocker l2(cp->second->lock);
     if (!cp->second->object_map.empty())
       return -ENOTEMPTY;
+    cp->second->exists = false;
   }
   used_bytes -= cp->second->used_bytes();
   coll_map.erase(cp);
   return 0;
 }
 
-int MemStore::_collection_add(coll_t cid, coll_t ocid, const ghobject_t& oid)
+int MemStore::_collection_add(const coll_t& cid, const coll_t& ocid, const ghobject_t& oid)
 {
   dout(10) << __func__ << " " << cid << " " << ocid << " " << oid << dendl;
   CollectionRef c = get_collection(cid);
@@ -1278,7 +1328,7 @@ int MemStore::_collection_add(coll_t cid, coll_t ocid, const ghobject_t& oid)
   return 0;
 }
 
-int MemStore::_collection_move_rename(coll_t oldcid, const ghobject_t& oldoid,
+int MemStore::_collection_move_rename(const coll_t& oldcid, const ghobject_t& oldoid,
 				      coll_t cid, const ghobject_t& oid)
 {
   dout(10) << __func__ << " " << oldcid << " " << oldoid << " -> "
@@ -1322,7 +1372,7 @@ int MemStore::_collection_move_rename(coll_t oldcid, const ghobject_t& oldoid,
   return r;
 }
 
-int MemStore::_split_collection(coll_t cid, uint32_t bits, uint32_t match,
+int MemStore::_split_collection(const coll_t& cid, uint32_t bits, uint32_t match,
 				coll_t dest)
 {
   dout(10) << __func__ << " " << cid << " " << bits << " " << match << " "
@@ -1373,9 +1423,7 @@ int MemStore::BufferlistObject::write(uint64_t offset, const bufferlist &src)
     newdata.substr_of(data, 0, offset);
   } else {
     newdata.substr_of(data, 0, get_size());
-    bufferptr bp(offset - get_size());
-    bp.zero();
-    newdata.append(bp);
+    newdata.append(offset - get_size());
   }
 
   newdata.append(src);
@@ -1420,9 +1468,7 @@ int MemStore::BufferlistObject::truncate(uint64_t size)
   } else if (get_size() == size) {
     // do nothing
   } else {
-    bufferptr bp(size - get_size());
-    bp.zero();
-    data.append(bp);
+    data.append_zero(size - get_size());
   }
   return 0;
 }
@@ -1483,7 +1529,7 @@ int MemStore::PageSetObject::read(uint64_t offset, uint64_t len, bufferlist& bl)
 
   tls_pages.clear(); // drop page refs
 
-  bl.append(buf);
+  bl.append(std::move(buf));
   return len;
 }
 
diff --git a/src/os/memstore/MemStore.h b/src/os/memstore/MemStore.h
index ac410a1..2d809f3 100644
--- a/src/os/memstore/MemStore.h
+++ b/src/os/memstore/MemStore.h
@@ -130,7 +130,7 @@ public:
     static thread_local PageSet::page_vector tls_pages;
 #endif
 
-    PageSetObject(size_t page_size) : data(page_size), data_len(0) {}
+    explicit PageSetObject(size_t page_size) : data(page_size), data_len(0) {}
 
     size_t get_size() const override { return data_len; }
 
@@ -156,18 +156,24 @@ public:
     }
   };
 
-  struct Collection : public RefCountedObject {
+  struct Collection : public CollectionImpl {
+    coll_t cid;
     CephContext *cct;
     bool use_page_set;
     ceph::unordered_map<ghobject_t, ObjectRef> object_hash;  ///< for lookup
     map<ghobject_t, ObjectRef,ghobject_t::BitwiseComparator> object_map;        ///< for iteration
     map<string,bufferptr> xattr;
     RWLock lock;   ///< for object_{map,hash}
+    bool exists;
 
     typedef boost::intrusive_ptr<Collection> Ref;
     friend void intrusive_ptr_add_ref(Collection *c) { c->get(); }
     friend void intrusive_ptr_release(Collection *c) { c->put(); }
 
+    const coll_t &get_cid() override {
+      return cid;
+    }
+
     ObjectRef create_object() const {
       if (use_page_set)
         return new PageSetObject(cct->_conf->memstore_page_size);
@@ -237,9 +243,12 @@ public:
       return result;
     }
 
-    Collection(CephContext *cct)
-      : cct(cct), use_page_set(cct->_conf->memstore_page_set),
-        lock("MemStore::Collection::lock") {}
+    explicit Collection(CephContext *cct, coll_t c)
+      : cid(c),
+	cct(cct),
+	use_page_set(cct->_conf->memstore_page_set),
+        lock("MemStore::Collection::lock", true, false),
+	exists(true) {}
   };
   typedef Collection::Ref CollectionRef;
 
@@ -292,9 +301,8 @@ private:
 
   ceph::unordered_map<coll_t, CollectionRef> coll_map;
   RWLock coll_lock;    ///< rwlock to protect coll_map
-  Mutex apply_lock;    ///< serialize all updates
 
-  CollectionRef get_collection(coll_t cid);
+  CollectionRef get_collection(const coll_t& cid);
 
   Finisher finisher;
 
@@ -302,34 +310,34 @@ private:
 
   void _do_transaction(Transaction& t);
 
-  int _touch(coll_t cid, const ghobject_t& oid);
-  int _write(coll_t cid, const ghobject_t& oid, uint64_t offset, size_t len,
+  int _touch(const coll_t& cid, const ghobject_t& oid);
+  int _write(const coll_t& cid, const ghobject_t& oid, uint64_t offset, size_t len,
 	      const bufferlist& bl, uint32_t fadvsie_flags = 0);
-  int _zero(coll_t cid, const ghobject_t& oid, uint64_t offset, size_t len);
-  int _truncate(coll_t cid, const ghobject_t& oid, uint64_t size);
-  int _remove(coll_t cid, const ghobject_t& oid);
-  int _setattrs(coll_t cid, const ghobject_t& oid, map<string,bufferptr>& aset);
-  int _rmattr(coll_t cid, const ghobject_t& oid, const char *name);
-  int _rmattrs(coll_t cid, const ghobject_t& oid);
-  int _clone(coll_t cid, const ghobject_t& oldoid, const ghobject_t& newoid);
-  int _clone_range(coll_t cid, const ghobject_t& oldoid,
+  int _zero(const coll_t& cid, const ghobject_t& oid, uint64_t offset, size_t len);
+  int _truncate(const coll_t& cid, const ghobject_t& oid, uint64_t size);
+  int _remove(const coll_t& cid, const ghobject_t& oid);
+  int _setattrs(const coll_t& cid, const ghobject_t& oid, map<string,bufferptr>& aset);
+  int _rmattr(const coll_t& cid, const ghobject_t& oid, const char *name);
+  int _rmattrs(const coll_t& cid, const ghobject_t& oid);
+  int _clone(const coll_t& cid, const ghobject_t& oldoid, const ghobject_t& newoid);
+  int _clone_range(const coll_t& cid, const ghobject_t& oldoid,
 		   const ghobject_t& newoid,
 		   uint64_t srcoff, uint64_t len, uint64_t dstoff);
-  int _omap_clear(coll_t cid, const ghobject_t &oid);
-  int _omap_setkeys(coll_t cid, const ghobject_t &oid, bufferlist& aset_bl);
-  int _omap_rmkeys(coll_t cid, const ghobject_t &oid, bufferlist& keys_bl);
-  int _omap_rmkeyrange(coll_t cid, const ghobject_t &oid,
+  int _omap_clear(const coll_t& cid, const ghobject_t &oid);
+  int _omap_setkeys(const coll_t& cid, const ghobject_t &oid, bufferlist& aset_bl);
+  int _omap_rmkeys(const coll_t& cid, const ghobject_t &oid, bufferlist& keys_bl);
+  int _omap_rmkeyrange(const coll_t& cid, const ghobject_t &oid,
 		       const string& first, const string& last);
-  int _omap_setheader(coll_t cid, const ghobject_t &oid, const bufferlist &bl);
+  int _omap_setheader(const coll_t& cid, const ghobject_t &oid, const bufferlist &bl);
 
-  int _collection_hint_expected_num_objs(coll_t cid, uint32_t pg_num,
+  int _collection_hint_expected_num_objs(const coll_t& cid, uint32_t pg_num,
       uint64_t num_objs) const { return 0; }
-  int _create_collection(coll_t c);
-  int _destroy_collection(coll_t c);
-  int _collection_add(coll_t cid, coll_t ocid, const ghobject_t& oid);
-  int _collection_move_rename(coll_t oldcid, const ghobject_t& oldoid,
+  int _create_collection(const coll_t& c);
+  int _destroy_collection(const coll_t& c);
+  int _collection_add(const coll_t& cid, const coll_t& ocid, const ghobject_t& oid);
+  int _collection_move_rename(const coll_t& oldcid, const ghobject_t& oldoid,
 			      coll_t cid, const ghobject_t& o);
-  int _split_collection(coll_t cid, uint32_t bits, uint32_t rem, coll_t dest);
+  int _split_collection(const coll_t& cid, uint32_t bits, uint32_t rem, coll_t dest);
 
   int _save();
   int _load();
@@ -342,11 +350,14 @@ public:
     : ObjectStore(path),
       cct(cct),
       coll_lock("MemStore::coll_lock"),
-      apply_lock("MemStore::apply_lock"),
       finisher(cct),
       used_bytes(0) {}
   ~MemStore() { }
 
+  string get_type() {
+    return "memstore";
+  }
+
   bool test_mount_in_use() {
     return false;
   }
@@ -377,71 +388,97 @@ public:
 
   int statfs(struct statfs *buf);
 
-  bool exists(coll_t cid, const ghobject_t& oid);
-  int stat(
-    coll_t cid,
+  bool exists(const coll_t& cid, const ghobject_t& oid) override;
+  bool exists(CollectionHandle &c, const ghobject_t& oid) override;
+  int stat(const coll_t& cid, const ghobject_t& oid,
+	   struct stat *st, bool allow_eio = false) override;
+  int stat(CollectionHandle &c, const ghobject_t& oid,
+	   struct stat *st, bool allow_eio = false) override;
+  int read(
+    const coll_t& cid,
     const ghobject_t& oid,
-    struct stat *st,
-    bool allow_eio = false); // struct stat?
+    uint64_t offset,
+    size_t len,
+    bufferlist& bl,
+    uint32_t op_flags = 0,
+    bool allow_eio = false) override;
   int read(
-    coll_t cid,
+    CollectionHandle &c,
     const ghobject_t& oid,
     uint64_t offset,
     size_t len,
     bufferlist& bl,
     uint32_t op_flags = 0,
-    bool allow_eio = false);
-  int fiemap(coll_t cid, const ghobject_t& oid, uint64_t offset, size_t len, bufferlist& bl);
-  int getattr(coll_t cid, const ghobject_t& oid, const char *name, bufferptr& value);
-  int getattrs(coll_t cid, const ghobject_t& oid, map<string,bufferptr>& aset);
+    bool allow_eio = false) override;
+  using ObjectStore::fiemap;
+  int fiemap(const coll_t& cid, const ghobject_t& oid, uint64_t offset, size_t len, bufferlist& bl);
+  int getattr(const coll_t& cid, const ghobject_t& oid, const char *name,
+	      bufferptr& value) override;
+  int getattr(CollectionHandle &c, const ghobject_t& oid, const char *name,
+	      bufferptr& value) override;
+  int getattrs(const coll_t& cid, const ghobject_t& oid,
+	       map<string,bufferptr>& aset) override;
+  int getattrs(CollectionHandle &c, const ghobject_t& oid,
+	       map<string,bufferptr>& aset) override;
 
   int list_collections(vector<coll_t>& ls);
-  bool collection_exists(coll_t c);
-  bool collection_empty(coll_t c);
-  int collection_list(coll_t cid, ghobject_t start, ghobject_t end,
+
+  CollectionHandle open_collection(const coll_t& c) {
+    return get_collection(c);
+  }
+  bool collection_exists(const coll_t& c);
+  bool collection_empty(const coll_t& c);
+  using ObjectStore::collection_list;
+  int collection_list(const coll_t& cid, ghobject_t start, ghobject_t end,
 		      bool sort_bitwise, int max,
 		      vector<ghobject_t> *ls, ghobject_t *next);
 
+  using ObjectStore::omap_get;
   int omap_get(
-    coll_t cid,                ///< [in] Collection containing oid
+    const coll_t& cid,                ///< [in] Collection containing oid
     const ghobject_t &oid,   ///< [in] Object containing omap
     bufferlist *header,      ///< [out] omap header
     map<string, bufferlist> *out /// < [out] Key to value map
     );
 
+  using ObjectStore::omap_get_header;
   /// Get omap header
   int omap_get_header(
-    coll_t cid,                ///< [in] Collection containing oid
+    const coll_t& cid,                ///< [in] Collection containing oid
     const ghobject_t &oid,   ///< [in] Object containing omap
     bufferlist *header,      ///< [out] omap header
     bool allow_eio = false ///< [in] don't assert on eio
     );
 
+  using ObjectStore::omap_get_keys;
   /// Get keys defined on oid
   int omap_get_keys(
-    coll_t cid,              ///< [in] Collection containing oid
+    const coll_t& cid,              ///< [in] Collection containing oid
     const ghobject_t &oid, ///< [in] Object containing omap
     set<string> *keys      ///< [out] Keys defined on oid
     );
 
+  using ObjectStore::omap_get_values;
   /// Get key values
   int omap_get_values(
-    coll_t cid,                    ///< [in] Collection containing oid
+    const coll_t& cid,                    ///< [in] Collection containing oid
     const ghobject_t &oid,       ///< [in] Object containing omap
     const set<string> &keys,     ///< [in] Keys to get
     map<string, bufferlist> *out ///< [out] Returned keys and values
     );
 
+  using ObjectStore::omap_check_keys;
   /// Filters keys into out which are defined on oid
   int omap_check_keys(
-    coll_t cid,                ///< [in] Collection containing oid
+    const coll_t& cid,                ///< [in] Collection containing oid
     const ghobject_t &oid,   ///< [in] Object containing omap
     const set<string> &keys, ///< [in] Keys to check
     set<string> *out         ///< [out] Subset of keys defined on oid
     );
 
+  using ObjectStore::get_omap_iterator;
   ObjectMap::ObjectMapIterator get_omap_iterator(
-    coll_t cid,              ///< [in] collection
+    const coll_t& cid,              ///< [in] collection
     const ghobject_t &oid  ///< [in] object
     );
 
@@ -451,7 +488,7 @@ public:
   objectstore_perf_stat_t get_cur_stats();
 
   int queue_transactions(
-    Sequencer *osr, list<Transaction*>& tls,
+    Sequencer *osr, vector<Transaction>& tls,
     TrackedOpRef op = TrackedOpRef(),
     ThreadPool::TPHandle *handle = NULL);
 };
diff --git a/src/os/memstore/PageSet.h b/src/os/memstore/PageSet.h
index b7ef12b..b3e1f13 100644
--- a/src/os/memstore/PageSet.h
+++ b/src/os/memstore/PageSet.h
@@ -126,7 +126,7 @@ class PageSet {
   }
 
  public:
-  PageSet(size_t page_size) : page_size(page_size) {}
+  explicit PageSet(size_t page_size) : page_size(page_size) {}
   PageSet(PageSet &&rhs)
     : pages(std::move(rhs.pages)), page_size(rhs.page_size) {}
   ~PageSet() {
diff --git a/src/osd/ClassHandler.h b/src/osd/ClassHandler.h
index a78c9e6..3fe1f98 100644
--- a/src/osd/ClassHandler.h
+++ b/src/osd/ClassHandler.h
@@ -107,7 +107,7 @@ private:
   int _load_class(ClassData *cls);
 
 public:
-  ClassHandler(CephContext *cct_) : cct(cct_), mutex("ClassHandler") {}
+  explicit ClassHandler(CephContext *cct_) : cct(cct_), mutex("ClassHandler") {}
   
   int open_all_classes();
 
diff --git a/src/osd/ECBackend.cc b/src/osd/ECBackend.cc
index dadfeaa..feefcd7 100644
--- a/src/osd/ECBackend.cc
+++ b/src/osd/ECBackend.cc
@@ -171,11 +171,12 @@ void ECBackend::RecoveryOp::dump(Formatter *f) const
 ECBackend::ECBackend(
   PGBackend::Listener *pg,
   coll_t coll,
+  ObjectStore::CollectionHandle &ch,
   ObjectStore *store,
   CephContext *cct,
   ErasureCodeInterfaceRef ec_impl,
   uint64_t stripe_width)
-  : PGBackend(pg, store, coll),
+  : PGBackend(pg, store, coll, ch),
     cct(cct),
     ec_impl(ec_impl),
     sinfo(ec_impl->get_data_chunk_count(), stripe_width) {
@@ -235,16 +236,15 @@ struct RecoveryMessages {
 
   map<pg_shard_t, vector<PushOp> > pushes;
   map<pg_shard_t, vector<PushReplyOp> > push_replies;
-  ObjectStore::Transaction *t;
-  RecoveryMessages() : t(NULL) {}
-  ~RecoveryMessages() { assert(!t); }
+  ObjectStore::Transaction t;
+  RecoveryMessages() {}
+  ~RecoveryMessages(){}
 };
 
 void ECBackend::handle_recovery_push(
   PushOp &op,
   RecoveryMessages *m)
 {
-  assert(m->t);
 
   bool oneshot = op.before_progress.first && op.after_progress.data_complete;
   ghobject_t tobj;
@@ -264,8 +264,8 @@ void ECBackend::handle_recovery_push(
   }
 
   if (op.before_progress.first) {
-    m->t->remove(coll, tobj);
-    m->t->touch(coll, tobj);
+    m->t.remove(coll, tobj);
+    m->t.touch(coll, tobj);
   }
 
   if (!op.data_included.empty()) {
@@ -273,7 +273,7 @@ void ECBackend::handle_recovery_push(
     uint64_t end = op.data_included.range_end();
     assert(op.data.length() == (end - start));
 
-    m->t->write(
+    m->t.write(
       coll,
       tobj,
       start,
@@ -285,7 +285,7 @@ void ECBackend::handle_recovery_push(
 
   if (op.before_progress.first) {
     assert(op.attrset.count(string("_")));
-    m->t->setattrs(
+    m->t.setattrs(
       coll,
       tobj,
       op.attrset);
@@ -295,9 +295,9 @@ void ECBackend::handle_recovery_push(
     dout(10) << __func__ << ": Removing oid "
 	     << tobj.hobj << " from the temp collection" << dendl;
     clear_temp_obj(tobj.hobj);
-    m->t->remove(coll, ghobject_t(
+    m->t.remove(coll, ghobject_t(
 	op.soid, ghobject_t::NO_GEN, get_parent()->whoami_shard().shard));
-    m->t->collection_move_rename(
+    m->t.collection_move_rename(
       coll, tobj,
       coll, ghobject_t(
 	op.soid, ghobject_t::NO_GEN, get_parent()->whoami_shard().shard));
@@ -306,22 +306,17 @@ void ECBackend::handle_recovery_push(
     if ((get_parent()->pgb_is_primary())) {
       assert(recovery_ops.count(op.soid));
       assert(recovery_ops[op.soid].obc);
-      object_stat_sum_t stats;
-      stats.num_objects_recovered = 1;
-      stats.num_bytes_recovered = recovery_ops[op.soid].obc->obs.oi.size;
       get_parent()->on_local_recover(
 	op.soid,
-	stats,
 	op.recovery_info,
 	recovery_ops[op.soid].obc,
-	m->t);
+	&m->t);
     } else {
       get_parent()->on_local_recover(
 	op.soid,
-	object_stat_sum_t(),
 	op.recovery_info,
 	ObjectContextRef(),
-	m->t);
+	&m->t);
     }
   }
   m->push_replies[get_parent()->primary_shard()].push_back(PushReplyOp());
@@ -467,19 +462,14 @@ void ECBackend::dispatch_recovery_messages(RecoveryMessages &m, int priority)
   }
 
   if (!replies.empty()) {
-    m.t->register_on_complete(
+    (m.t).register_on_complete(
 	get_parent()->bless_context(
 	  new SendPushReplies(
 	    get_parent(),
 	    get_parent()->get_epoch(),
 	    replies)));
-    m.t->register_on_applied(
-	new ObjectStore::C_DeleteTransaction(m.t));
-    get_parent()->queue_transaction(m.t);
-    m.t = NULL;
-  } else {
-    assert(!m.t);
-  }
+    get_parent()->queue_transaction(std::move(m.t));
+  } 
 
   if (m.reads.empty())
     return;
@@ -600,7 +590,11 @@ void ECBackend::continue_recovery_op(
 		object_stat_sum_t());
 	    }
 	  }
-	  get_parent()->on_global_recover(op.hoid);
+	  object_stat_sum_t stat;
+	  stat.num_bytes_recovered = op.recovery_info.size;
+	  stat.num_keys_recovered = 0; // ??? op ... omap_entries.size(); ?
+	  stat.num_objects_recovered = 1;
+	  get_parent()->on_global_recover(op.hoid, stat);
 	  dout(10) << __func__ << ": WRITING return " << op << dendl;
 	  recovery_ops.erase(op.hoid);
 	  return;
@@ -717,8 +711,6 @@ bool ECBackend::handle_message(
   case MSG_OSD_PG_PUSH: {
     MOSDPGPush *op = static_cast<MOSDPGPush *>(_op->get_req());
     RecoveryMessages rm;
-    rm.t = new ObjectStore::Transaction;
-    assert(rm.t);
     for (vector<PushOp>::iterator i = op->pushes.begin();
 	 i != op->pushes.end();
 	 ++i) {
@@ -840,8 +832,8 @@ void ECBackend::handle_sub_write(
   assert(!get_parent()->get_log().get_missing().is_missing(op.soid));
   if (!get_parent()->pgb_is_primary())
     get_parent()->update_stats(op.stats);
-  ObjectStore::Transaction *localt = new ObjectStore::Transaction;
-  localt->set_use_tbl(op.t.get_use_tbl());
+  ObjectStore::Transaction localt;
+  localt.set_use_tbl(op.t.get_use_tbl());
   if (!op.temp_added.empty()) {
     add_temp_objs(op.temp_added);
   }
@@ -851,7 +843,7 @@ void ECBackend::handle_sub_write(
 	 ++i) {
       dout(10) << __func__ << ": removing object " << *i
 	       << " since we won't get the transaction" << dendl;
-      localt->remove(
+      localt.remove(
 	coll,
 	ghobject_t(
 	  *i,
@@ -866,33 +858,30 @@ void ECBackend::handle_sub_write(
     op.trim_to,
     op.trim_rollback_to,
     !(op.t.empty()),
-    localt);
+    &localt);
 
-  if (!(dynamic_cast<ReplicatedPG *>(get_parent())->is_undersized()) &&
+  ReplicatedPG *_rPG = dynamic_cast<ReplicatedPG *>(get_parent());
+  if (_rPG && !_rPG->is_undersized() &&
       (unsigned)get_parent()->whoami_shard().shard >= ec_impl->get_data_chunk_count())
     op.t.set_fadvise_flag(CEPH_OSD_OP_FLAG_FADVISE_DONTNEED);
 
   if (on_local_applied_sync) {
     dout(10) << "Queueing onreadable_sync: " << on_local_applied_sync << dendl;
-    localt->register_on_applied_sync(on_local_applied_sync);
+    localt.register_on_applied_sync(on_local_applied_sync);
   }
-  localt->register_on_commit(
+  localt.register_on_commit(
     get_parent()->bless_context(
       new SubWriteCommitted(
 	this, msg, op.tid,
 	op.at_version,
 	get_parent()->get_info().last_complete)));
-  localt->register_on_applied(
+  localt.register_on_applied(
     get_parent()->bless_context(
       new SubWriteApplied(this, msg, op.tid, op.at_version)));
-  localt->register_on_applied(
-    new ObjectStore::C_DeleteTransaction(localt));
-  list<ObjectStore::Transaction*> tls;
-  tls.push_back(localt);
-  tls.push_back(new ObjectStore::Transaction);
-  tls.back()->swap(op.t);
-  tls.back()->register_on_complete(
-    new ObjectStore::C_DeleteTransaction(tls.back()));
+  vector<ObjectStore::Transaction> tls;
+  tls.reserve(2);
+  tls.push_back(std::move(localt));
+  tls.push_back(std::move(op.t));
   get_parent()->queue_transactions(tls, msg);
 }
 
@@ -918,7 +907,7 @@ void ECBackend::handle_sub_read(
 	   i->second.begin(); j != i->second.end(); ++j) {
       bufferlist bl;
       r = store->read(
-	coll,
+	ch,
 	ghobject_t(i->first, ghobject_t::NO_GEN, shard),
 	j->get<0>(),
 	j->get<1>(),
@@ -975,7 +964,7 @@ error:
     if (reply->errors.count(*i))
       continue;
     int r = store->getattrs(
-      coll,
+      ch,
       ghobject_t(
 	*i, ghobject_t::NO_GEN, get_parent()->whoami_shard().shard),
       reply->attrs_read[*i]);
@@ -1711,7 +1700,7 @@ ECUtil::HashInfoRef ECBackend::get_hash_info(
     dout(10) << __func__ << ": not in cache " << hoid << dendl;
     struct stat st;
     int r = store->stat(
-      coll,
+      ch,
       ghobject_t(hoid, ghobject_t::NO_GEN, get_parent()->whoami_shard().shard),
       &st);
     ECUtil::HashInfo hinfo(ec_impl->get_chunk_count());
@@ -1728,7 +1717,7 @@ ECUtil::HashInfoRef ECBackend::get_hash_info(
 	}
       } else {
 	r = store->getattr(
-	  coll,
+	  ch,
 	  ghobject_t(hoid, ghobject_t::NO_GEN, get_parent()->whoami_shard().shard),
 	  ECUtil::get_hinfo_key(),
 	  bl);
@@ -2032,7 +2021,7 @@ int ECBackend::objects_get_attrs(
   map<string, bufferlist> *out)
 {
   int r = store->getattrs(
-    coll,
+    ch,
     ghobject_t(hoid, ghobject_t::NO_GEN, get_parent()->whoami_shard().shard),
     *out);
   if (r < 0)
@@ -2080,7 +2069,7 @@ void ECBackend::be_deep_scrub(
     bufferlist bl;
     handle.reset_tp_timeout();
     r = store->read(
-      coll,
+      ch,
       ghobject_t(
 	poid, ghobject_t::NO_GEN, get_parent()->whoami_shard().shard),
       pos,
diff --git a/src/osd/ECBackend.h b/src/osd/ECBackend.h
index bb064f8..bee32fd 100644
--- a/src/osd/ECBackend.h
+++ b/src/osd/ECBackend.h
@@ -137,7 +137,7 @@ public:
   struct ClientAsyncReadStatus {
     bool complete;
     Context *on_complete;
-    ClientAsyncReadStatus(Context *on_complete)
+    explicit ClientAsyncReadStatus(Context *on_complete)
     : complete(false), on_complete(on_complete) {}
   };
   list<ClientAsyncReadStatus> in_progress_client_reads;
@@ -412,7 +412,7 @@ public:
     set<int> want;
     ErasureCodeInterfaceRef ec_impl;
   public:
-    ECRecPred(ErasureCodeInterfaceRef ec_impl) : ec_impl(ec_impl) {
+    explicit ECRecPred(ErasureCodeInterfaceRef ec_impl) : ec_impl(ec_impl) {
       for (unsigned i = 0; i < ec_impl->get_chunk_count(); ++i) {
 	want.insert(i);
       }
@@ -466,6 +466,7 @@ public:
   ECBackend(
     PGBackend::Listener *pg,
     coll_t coll,
+    ObjectStore::CollectionHandle &ch,
     ObjectStore *store,
     CephContext *cct,
     ErasureCodeInterfaceRef ec_impl,
diff --git a/src/osd/ECTransaction.cc b/src/osd/ECTransaction.cc
index c5d39eb..64dd89f 100644
--- a/src/osd/ECTransaction.cc
+++ b/src/osd/ECTransaction.cc
@@ -24,7 +24,7 @@
 
 struct AppendObjectsGenerator: public boost::static_visitor<void> {
   set<hobject_t, hobject_t::BitwiseComparator> *out;
-  AppendObjectsGenerator(set<hobject_t, hobject_t::BitwiseComparator> *out) : out(out) {}
+  explicit AppendObjectsGenerator(set<hobject_t, hobject_t::BitwiseComparator> *out) : out(out) {}
   void operator()(const ECTransaction::AppendOp &op) {
     out->insert(op.oid);
   }
diff --git a/src/osd/ECTransaction.h b/src/osd/ECTransaction.h
index 2615226..17be722 100644
--- a/src/osd/ECTransaction.h
+++ b/src/osd/ECTransaction.h
@@ -52,11 +52,11 @@ public:
   };
   struct TouchOp {
     hobject_t oid;
-    TouchOp(const hobject_t &oid) : oid(oid) {}
+    explicit TouchOp(const hobject_t &oid) : oid(oid) {}
   };
   struct RemoveOp {
     hobject_t oid;
-    RemoveOp(const hobject_t &oid) : oid(oid) {}
+    explicit RemoveOp(const hobject_t &oid) : oid(oid) {}
   };
   struct SetAttrsOp {
     hobject_t oid;
@@ -104,7 +104,6 @@ public:
   /// Write
   void touch(
     const hobject_t &hoid) {
-    bufferlist bl;
     ops.push_back(TouchOp(hoid));
   }
   void append(
diff --git a/src/osd/ECUtil.h b/src/osd/ECUtil.h
index 8e1261c..968056d 100644
--- a/src/osd/ECUtil.h
+++ b/src/osd/ECUtil.h
@@ -109,7 +109,7 @@ class HashInfo {
   vector<uint32_t> cumulative_shard_hashes;
 public:
   HashInfo() : total_chunk_size(0) {}
-  HashInfo(unsigned num_chunks)
+  explicit HashInfo(unsigned num_chunks)
   : total_chunk_size(0),
     cumulative_shard_hashes(num_chunks, -1) {}
   void append(uint64_t old_size, map<int, bufferlist> &to_append);
diff --git a/src/osd/HitSet.h b/src/osd/HitSet.h
index 2b981a3..4229297 100644
--- a/src/osd/HitSet.h
+++ b/src/osd/HitSet.h
@@ -92,7 +92,7 @@ public:
     };
 
     Params()  {}
-    Params(Impl *i) : impl(i) {}
+    explicit Params(Impl *i) : impl(i) {}
     virtual ~Params() {}
 
     boost::scoped_ptr<Params::Impl> impl;
@@ -115,8 +115,8 @@ public:
   };
 
   HitSet() : impl(NULL), sealed(false) {}
-  HitSet(Impl *i) : impl(i), sealed(false) {}
-  HitSet(const HitSet::Params& params);
+  explicit HitSet(Impl *i) : impl(i), sealed(false) {}
+  explicit HitSet(const HitSet::Params& params);
 
   HitSet(const HitSet& o) {
     sealed = o.sealed;
@@ -195,7 +195,7 @@ public:
   };
 
   ExplicitHashHitSet() : count(0) {}
-  ExplicitHashHitSet(const ExplicitHashHitSet::Params *p) : count(0) {}
+  explicit ExplicitHashHitSet(const ExplicitHashHitSet::Params *p) : count(0) {}
   ExplicitHashHitSet(const ExplicitHashHitSet &o) : count(o.count),
       hits(o.hits) {}
 
@@ -272,7 +272,7 @@ public:
   };
 
   ExplicitObjectHitSet() : count(0) {}
-  ExplicitObjectHitSet(const ExplicitObjectHitSet::Params *p) : count(0) {}
+  explicit ExplicitObjectHitSet(const ExplicitObjectHitSet::Params *p) : count(0) {}
   ExplicitObjectHitSet(const ExplicitObjectHitSet &o) : count(o.count),
       hits(o.hits) {}
 
@@ -409,7 +409,7 @@ public:
   BloomHitSet(unsigned inserts, double fpp, int seed)
     : bloom(inserts, fpp, seed)
   {}
-  BloomHitSet(const BloomHitSet::Params *p) : bloom(p->target_size,
+  explicit BloomHitSet(const BloomHitSet::Params *p) : bloom(p->target_size,
                                                     p->get_fpp(),
                                                     p->seed)
   {}
diff --git a/src/osd/OSD.cc b/src/osd/OSD.cc
index 785ac50..cb83446 100644
--- a/src/osd/OSD.cc
+++ b/src/osd/OSD.cc
@@ -1,4 +1,4 @@
-// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- 
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
 // vim: ts=8 sw=2 smarttab
 /*
  * Ceph - scalable distributed file system
@@ -7,9 +7,9 @@
  *
  * This is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
- * License version 2.1, as published by the Free Software 
+ * License version 2.1, as published by the Free Software
  * Foundation.  See file COPYING.
- * 
+ *
  */
 #include "acconfig.h"
 
@@ -44,6 +44,9 @@
 #include "common/io_priority.h"
 
 #include "os/ObjectStore.h"
+#ifdef HAVE_LIBFUSE
+#include "os/FuseStore.h"
+#endif
 
 #include "ReplicatedPG.h"
 
@@ -504,7 +507,7 @@ void OSDService::activate_map()
 class AgentTimeoutCB : public Context {
   PGRef pg;
 public:
-  AgentTimeoutCB(PGRef _pg) : pg(_pg) {}
+  explicit AgentTimeoutCB(PGRef _pg) : pg(_pg) {}
   void finish(int) {
     pg->agent_choose_mode_restart();
   }
@@ -667,7 +670,11 @@ void OSDService::update_osd_stat(vector<int>& hb_peers)
 
   // fill in osd stats too
   struct statfs stbuf;
-  osd->store->statfs(&stbuf);
+  int r = osd->store->statfs(&stbuf);
+  if (r < 0) {
+    derr << "statfs() failed: " << cpp_strerror(r) << dendl;
+    return;
+  }
 
   uint64_t bytes = stbuf.f_blocks * stbuf.f_bsize;
   uint64_t used = (stbuf.f_blocks - stbuf.f_bfree) * stbuf.f_bsize;
@@ -828,7 +835,7 @@ epoch_t OSDService::note_peer_epoch(int peer, epoch_t e)
     return e;
   }
 }
- 
+
 void OSDService::forget_peer_epoch(int peer, epoch_t as_of)
 {
   Mutex::Locker l(peer_map_epoch_lock);
@@ -1097,7 +1104,7 @@ MOSDMap *OSDService::build_incremental_map_msg(epoch_t since, epoch_t to,
   MOSDMap *m = new MOSDMap(monc->get_fsid());
   m->oldest_map = sblock.oldest_map;
   m->newest_map = sblock.newest_map;
-  
+
   for (epoch_t e = to; e > since; e--) {
     bufferlist bl;
     if (e > m->oldest_map && get_inc_map_bl(e, bl)) {
@@ -1141,13 +1148,13 @@ void OSDService::send_incremental_map(epoch_t since, Connection *con,
       send_map(m, con);
       return;
     }
-    
+
     if (to > since && (int64_t)(to - since) > cct->_conf->osd_map_share_max_epochs) {
       dout(10) << "  " << (to - since) << " > max " << cct->_conf->osd_map_share_max_epochs
 	       << ", only sending most recent" << dendl;
       since = to - cct->_conf->osd_map_share_max_epochs;
     }
-    
+
     if (to - since > (epoch_t)cct->_conf->osd_map_message_max)
       to = since + cct->_conf->osd_map_message_max;
     m = build_incremental_map_msg(since, to, sblock);
@@ -1423,7 +1430,7 @@ int OSD::mkfs(CephContext *cct, ObjectStore *store, const string &dev,
     ObjectStore::Transaction t;
     t.create_collection(coll_t::meta(), 0);
     t.write(coll_t::meta(), OSD_SUPERBLOCK_POBJECT, 0, bl.length(), bl);
-    ret = store->apply_transaction(osr.get(), t);
+    ret = store->apply_transaction(osr.get(), std::move(t));
     if (ret) {
       derr << "OSD::mkfs: error while writing OSD_SUPERBLOCK_POBJECT: "
 	   << "apply_transaction returned " << ret << dendl;
@@ -1452,7 +1459,7 @@ int OSD::write_meta(ObjectStore *store, uuid_d& cluster_fsid, uuid_d& osd_fsid,
 {
   char val[80];
   int r;
-  
+
   snprintf(val, sizeof(val), "%s", CEPH_OSD_ONDISK_MAGIC);
   r = store->write_meta("magic", val);
   if (r < 0)
@@ -1570,9 +1577,11 @@ OSD::OSD(CephContext *cct_, ObjectStore *store_,
   heartbeat_thread(this),
   heartbeat_dispatcher(this),
   finished_lock("OSD::finished_lock"),
-  op_tracker(cct, cct->_conf->osd_enable_op_tracker, 
+  op_tracker(cct, cct->_conf->osd_enable_op_tracker,
                   cct->_conf->osd_num_op_tracker_shard),
   test_ops_hook(NULL),
+  op_queue(get_io_queue()),
+  op_prio_cutoff(get_io_prio_cut()),
   op_shardedwq(
     cct->_conf->osd_op_num_shards,
     this,
@@ -1647,7 +1656,7 @@ int OSD::pre_init()
   Mutex::Locker lock(osd_lock);
   if (is_stopping())
     return 0;
-  
+
   if (store->test_mount_in_use()) {
     derr << "OSD::pre_init: object store '" << dev_path << "' is "
          << "currently in use. (Is ceph-osd already running?)" << dendl;
@@ -1663,7 +1672,7 @@ int OSD::pre_init()
 class OSDSocketHook : public AdminSocketHook {
   OSD *osd;
 public:
-  OSDSocketHook(OSD *o) : osd(o) {}
+  explicit OSDSocketHook(OSD *o) : osd(o) {}
   bool call(std::string command, cmdmap_t& cmdmap, std::string format,
 	    bufferlist& out) {
     stringstream ss;
@@ -1696,14 +1705,24 @@ bool OSD::asok_command(string command, cmdmap_t& cmdmap, string format,
 	     command == "ops") {
     RWLock::RLocker l(op_tracker.lock);
     if (!op_tracker.tracking_enabled) {
-      ss << "op_tracker tracking is not enabled";
+      ss << "op_tracker tracking is not enabled now, so no ops are tracked currently, even those get stuck. \
+	Please enable \"osd_enable_op_tracker\", and the tracker will start to track new ops received afterwards.";
     } else {
       op_tracker.dump_ops_in_flight(f);
     }
+  } else if (command == "dump_blocked_ops") {
+    if (!op_tracker.tracking_enabled) {
+      ss << "op_tracker tracking is not enabled now, so no ops are tracked currently, even those get stuck. \
+	Please enable \"osd_enable_op_tracker\", and the tracker will start to track new ops received afterwards.";
+    } else {
+      op_tracker.dump_ops_in_flight(f, true);
+    }
+
   } else if (command == "dump_historic_ops") {
     RWLock::RLocker l(op_tracker.lock);
     if (!op_tracker.tracking_enabled) {
-      ss << "op_tracker tracking is not enabled";
+      ss << "op_tracker tracking is not enabled now, so no ops are tracked currently, even those get stuck. \
+	Please enable \"osd_enable_op_tracker\", and the tracker will start to track new ops received afterwards.";
     } else {
       op_tracker.dump_historic_ops(f);
     }
@@ -1781,6 +1800,49 @@ bool OSD::asok_command(string command, cmdmap_t& cmdmap, string format,
     f->close_section();
   } else if (command == "get_latest_osdmap") {
     get_latest_osdmap();
+  } else if (command == "set_heap_property") {
+    string property;
+    int64_t value = 0;
+    string error;
+    bool success = false;
+    if (!cmd_getval(cct, cmdmap, "property", property)) {
+      error = "unable to get property";
+      success = false;
+    } else if (!cmd_getval(cct, cmdmap, "value", value)) {
+      error = "unable to get value";
+      success = false;
+    } else if (value < 0) {
+      error = "negative value not allowed";
+      success = false;
+    } else if (!ceph_heap_set_numeric_property(property.c_str(), (size_t)value)) {
+      error = "invalid property";
+      success = false;
+    } else {
+      success = true;
+    }
+    f->open_object_section("result");
+    f->dump_string("error", error);
+    f->dump_bool("success", success);
+    f->close_section();
+  } else if (command == "get_heap_property") {
+    string property;
+    size_t value = 0;
+    string error;
+    bool success = false;
+    if (!cmd_getval(cct, cmdmap, "property", property)) {
+      error = "unable to get property";
+      success = false;
+    } else if (!ceph_heap_get_numeric_property(property.c_str(), &value)) {
+      error = "invalid property";
+      success = false;
+    } else {
+      success = true;
+    }
+    f->open_object_section("result");
+    f->dump_string("error", error);
+    f->dump_bool("success", success);
+    f->dump_int("value", value);
+    f->close_section();
   } else {
     assert(0 == "broken asok registration");
   }
@@ -1806,6 +1868,47 @@ public:
 
 };
 
+int OSD::enable_disable_fuse(bool stop)
+{
+#ifdef HAVE_LIBFUSE
+  int r;
+  string mntpath = g_conf->osd_data + "/fuse";
+  if (fuse_store && (stop || !g_conf->osd_objectstore_fuse)) {
+    dout(1) << __func__ << " disabling" << dendl;
+    fuse_store->stop();
+    delete fuse_store;
+    fuse_store = NULL;
+    r = ::rmdir(mntpath.c_str());
+    if (r < 0)
+      r = -errno;
+    if (r < 0) {
+      derr << __func__ << " failed to rmdir " << mntpath << dendl;
+      return r;
+    }
+  }
+  if (!fuse_store && g_conf->osd_objectstore_fuse) {
+    dout(1) << __func__ << " enabling" << dendl;
+    r = ::mkdir(mntpath.c_str(), 0700);
+    if (r < 0)
+      r = -errno;
+    if (r < 0 && r != -EEXIST) {
+      derr << __func__ << " unable to create " << mntpath << ": "
+	   << cpp_strerror(r) << dendl;
+      return r;
+    }
+    fuse_store = new FuseStore(store, mntpath);
+    r = fuse_store->start();
+    if (r < 0) {
+      derr << __func__ << " unable to start fuse: " << cpp_strerror(r) << dendl;
+      delete fuse_store;
+      fuse_store = NULL;
+      return r;
+    }
+  }
+#endif  // HAVE_LIBFUSE
+  return 0;
+}
+
 int OSD::init()
 {
   CompatSet initial, diff;
@@ -1828,6 +1931,8 @@ int OSD::init()
     return r;
   }
 
+  enable_disable_fuse(false);
+
   dout(2) << "boot" << dendl;
 
   // initialize the daily loadavg with current 15min loadavg
@@ -1882,7 +1987,7 @@ int OSD::init()
     dout(5) << "Upgrading superblock adding: " << diff << dendl;
     ObjectStore::Transaction t;
     write_superblock(t);
-    r = store->apply_transaction(service.meta_osr.get(), t);
+    r = store->apply_transaction(service.meta_osr.get(), std::move(t));
     if (r < 0)
       goto out;
   }
@@ -1892,7 +1997,7 @@ int OSD::init()
     dout(10) << "init creating/touching snapmapper object" << dendl;
     ObjectStore::Transaction t;
     t.touch(coll_t::meta(), OSD::make_snapmapper_oid());
-    r = store->apply_transaction(service.meta_osr.get(), t);
+    r = store->apply_transaction(service.meta_osr.get(), std::move(t));
     if (r < 0)
       goto out;
   }
@@ -1929,6 +2034,8 @@ int OSD::init()
   load_pgs();
 
   dout(2) << "superblock: i am osd." << superblock.whoami << dendl;
+  dout(0) << "using " << op_queue << " op queue with priority op cut off at " <<
+    op_prio_cutoff << "." << dendl;
 
   create_logger();
 
@@ -2018,8 +2125,10 @@ monout:
   monc->shutdown();
 
 out:
+  enable_disable_fuse(true);
   store->umount();
   delete store;
+  store = NULL;
   return r;
 }
 
@@ -2043,6 +2152,10 @@ void OSD::final_init()
 				     "ops", asok_hook,
 				     "show the ops currently in flight");
   assert(r == 0);
+  r = admin_socket->register_command("dump_blocked_ops",
+				     "dump_blocked_ops", asok_hook,
+				     "show the blocked ops currently in flight");
+  assert(r == 0);
   r = admin_socket->register_command("dump_historic_ops", "dump_historic_ops",
 				     asok_hook,
 				     "show slowest recent ops");
@@ -2070,6 +2183,22 @@ void OSD::final_init()
 				     "the mon");
   assert(r == 0);
 
+  r = admin_socket->register_command("set_heap_property",
+				     "set_heap_property " \
+				     "name=property,type=CephString " \
+				     "name=value,type=CephInt",
+				     asok_hook,
+				     "update malloc extension heap property");
+  assert(r == 0);
+
+  r = admin_socket->register_command("get_heap_property",
+				     "get_heap_property " \
+				     "name=property,type=CephString",
+				     asok_hook,
+				     "get malloc extension heap property");
+  assert(r == 0);
+
+
   test_ops_hook = new TestOpsSocketHook(&(this->service), this->store);
   // Note: pools are CephString instead of CephPoolname because
   // these commands traditionally support both pool names and numbers
@@ -2163,48 +2292,48 @@ void OSD::create_logger()
       "Client operations total write size", "wr");       // client op in bytes (writes)
   osd_plb.add_u64_counter(l_osd_op_outb,  "op_out_bytes",
       "Client operations total read size", "rd");      // client op out bytes (reads)
-  osd_plb.add_time_avg(l_osd_op_lat,   "op_latency", 
+  osd_plb.add_time_avg(l_osd_op_lat,   "op_latency",
       "Latency of client operations (including queue time)", "lat");       // client op latency
-  osd_plb.add_time_avg(l_osd_op_process_lat, "op_process_latency", 
+  osd_plb.add_time_avg(l_osd_op_process_lat, "op_process_latency",
       "Latency of client operations (excluding queue time)");   // client op process latency
   osd_plb.add_time_avg(l_osd_op_prepare_lat, "op_prepare_latency",
       "Latency of client operations (excluding queue time and wait for finished)"); // client op prepare latency
 
-  osd_plb.add_u64_counter(l_osd_op_r,      "op_r", 
+  osd_plb.add_u64_counter(l_osd_op_r,      "op_r",
       "Client read operations");        // client reads
-  osd_plb.add_u64_counter(l_osd_op_r_outb, "op_r_out_bytes", 
+  osd_plb.add_u64_counter(l_osd_op_r_outb, "op_r_out_bytes",
       "Client data read");   // client read out bytes
-  osd_plb.add_time_avg(l_osd_op_r_lat,  "op_r_latency", 
+  osd_plb.add_time_avg(l_osd_op_r_lat,  "op_r_latency",
       "Latency of read operation (including queue time)");    // client read latency
-  osd_plb.add_time_avg(l_osd_op_r_process_lat, "op_r_process_latency", 
+  osd_plb.add_time_avg(l_osd_op_r_process_lat, "op_r_process_latency",
       "Latency of read operation (excluding queue time)");   // client read process latency
   osd_plb.add_time_avg(l_osd_op_r_prepare_lat, "op_r_prepare_latency",
       "Latency of read operations (excluding queue time and wait for finished)"); // client read prepare latency
-  osd_plb.add_u64_counter(l_osd_op_w,      "op_w", 
+  osd_plb.add_u64_counter(l_osd_op_w,      "op_w",
       "Client write operations");        // client writes
-  osd_plb.add_u64_counter(l_osd_op_w_inb,  "op_w_in_bytes", 
+  osd_plb.add_u64_counter(l_osd_op_w_inb,  "op_w_in_bytes",
       "Client data written");    // client write in bytes
-  osd_plb.add_time_avg(l_osd_op_w_rlat, "op_w_rlat", 
+  osd_plb.add_time_avg(l_osd_op_w_rlat, "op_w_rlat",
       "Client write operation readable/applied latency");   // client write readable/applied latency
-  osd_plb.add_time_avg(l_osd_op_w_lat,  "op_w_latency", 
+  osd_plb.add_time_avg(l_osd_op_w_lat,  "op_w_latency",
       "Latency of write operation (including queue time)");    // client write latency
-  osd_plb.add_time_avg(l_osd_op_w_process_lat, "op_w_process_latency", 
+  osd_plb.add_time_avg(l_osd_op_w_process_lat, "op_w_process_latency",
       "Latency of write operation (excluding queue time)");   // client write process latency
   osd_plb.add_time_avg(l_osd_op_w_prepare_lat, "op_w_prepare_latency",
       "Latency of write operations (excluding queue time and wait for finished)"); // client write prepare latency
-  osd_plb.add_u64_counter(l_osd_op_rw,     "op_rw", 
+  osd_plb.add_u64_counter(l_osd_op_rw,     "op_rw",
       "Client read-modify-write operations");       // client rmw
-  osd_plb.add_u64_counter(l_osd_op_rw_inb, "op_rw_in_bytes", 
+  osd_plb.add_u64_counter(l_osd_op_rw_inb, "op_rw_in_bytes",
       "Client read-modify-write operations write in");   // client rmw in bytes
-  osd_plb.add_u64_counter(l_osd_op_rw_outb,"op_rw_out_bytes", 
+  osd_plb.add_u64_counter(l_osd_op_rw_outb,"op_rw_out_bytes",
       "Client read-modify-write operations read out ");  // client rmw out bytes
-  osd_plb.add_time_avg(l_osd_op_rw_rlat,"op_rw_rlat", 
+  osd_plb.add_time_avg(l_osd_op_rw_rlat,"op_rw_rlat",
       "Client read-modify-write operation readable/applied latency");  // client rmw readable/applied latency
-  osd_plb.add_time_avg(l_osd_op_rw_lat, "op_rw_latency", 
+  osd_plb.add_time_avg(l_osd_op_rw_lat, "op_rw_latency",
       "Latency of read-modify-write operation (including queue time)");   // client rmw latency
-  osd_plb.add_time_avg(l_osd_op_rw_process_lat, "op_rw_process_latency", 
+  osd_plb.add_time_avg(l_osd_op_rw_process_lat, "op_rw_process_latency",
       "Latency of read-modify-write operation (excluding queue time)");   // client rmw process latency
-  osd_plb.add_time_avg(l_osd_op_rw_prepare_lat, "op_rw_prepare_latency", 
+  osd_plb.add_time_avg(l_osd_op_rw_prepare_lat, "op_rw_prepare_latency",
       "Latency of read-modify-write operations (excluding queue time and wait for finished)"); // client rmw prepare latency
 
   osd_plb.add_u64_counter(l_osd_sop,       "subop", "Suboperations");         // subops
@@ -2362,7 +2491,7 @@ int OSD::shutdown()
     }
   }
   clear_pg_stat_queue();
-  
+
   // finish ops
   op_shardedwq.drain(); // should already be empty except for lagard PGs
   {
@@ -2375,12 +2504,15 @@ int OSD::shutdown()
   cct->get_admin_socket()->unregister_command("flush_journal");
   cct->get_admin_socket()->unregister_command("dump_ops_in_flight");
   cct->get_admin_socket()->unregister_command("ops");
+  cct->get_admin_socket()->unregister_command("dump_blocked_ops");
   cct->get_admin_socket()->unregister_command("dump_historic_ops");
   cct->get_admin_socket()->unregister_command("dump_op_pq_state");
   cct->get_admin_socket()->unregister_command("dump_blacklist");
   cct->get_admin_socket()->unregister_command("dump_watchers");
   cct->get_admin_socket()->unregister_command("dump_reservations");
   cct->get_admin_socket()->unregister_command("get_latest_osdmap");
+  cct->get_admin_socket()->unregister_command("set_heap_property");
+  cct->get_admin_socket()->unregister_command("get_heap_property");
   delete asok_hook;
   asok_hook = NULL;
 
@@ -2444,13 +2576,14 @@ int OSD::shutdown()
   superblock.clean_thru = osdmap->get_epoch();
   ObjectStore::Transaction t;
   write_superblock(t);
-  int r = store->apply_transaction(service.meta_osr.get(), t);
+  int r = store->apply_transaction(service.meta_osr.get(), std::move(t));
   if (r) {
     derr << "OSD::shutdown: error writing superblock: "
 	 << cpp_strerror(r) << dendl;
   }
 
   dout(10) << "syncing store" << dendl;
+  enable_disable_fuse(true);
   store->umount();
   delete store;
   store = 0;
@@ -2534,7 +2667,7 @@ int OSD::read_superblock()
   ::decode(superblock, p);
 
   dout(10) << "read_superblock " << superblock << dendl;
-  
+
   return 0;
 }
 
@@ -2580,7 +2713,7 @@ void OSD::clear_temp_objects()
 	dout(20) << "  removing " << *p << " object " << *q << dendl;
 	t.remove(*p, *q);
       }
-      store->apply_transaction(service.meta_osr.get(), t);
+      store->apply_transaction(service.meta_osr.get(), std::move(t));
     }
   }
 }
@@ -2612,14 +2745,14 @@ void OSD::recursive_remove_collection(ObjectStore *store, spg_t pgid, coll_t tmp
       assert(0);
     t.remove(tmp, *p);
     if (removed > 300) {
-      int r = store->apply_transaction(osr.get(), t);
+      int r = store->apply_transaction(osr.get(), std::move(t));
       assert(r == 0);
       t = ObjectStore::Transaction();
       removed = 0;
     }
   }
   t.remove_collection(tmp);
-  int r = store->apply_transaction(osr.get(), t);
+  int r = store->apply_transaction(osr.get(), std::move(t));
   assert(r == 0);
 
   C_SaferCond waiter;
@@ -2642,7 +2775,7 @@ PGPool OSD::_get_pool(int id, OSDMapRef createmap)
 
   PGPool p = PGPool(id, createmap->get_pool_name(id),
 		    createmap->get_pg_pool(id)->auid);
-    
+
   const pg_pool_t *pi = createmap->get_pg_pool(id);
   p.info = *pi;
   p.snapc = pi->get_snap_context();
@@ -2681,7 +2814,7 @@ PG* OSD::_make_pg(
   if (createmap->get_pg_type(pgid.pgid) == pg_pool_t::TYPE_REPLICATED ||
       createmap->get_pg_type(pgid.pgid) == pg_pool_t::TYPE_ERASURE)
     pg = new ReplicatedPG(&service, createmap, pool, pgid);
-  else 
+  else
     assert(0);
 
   return pg;
@@ -2947,6 +3080,8 @@ void OSD::load_pgs()
     }
     // there can be no waiters here, so we don't call wake_pg_waiters
 
+    pg->ch = store->open_collection(pg->coll);
+
     // read pg state, log
     pg->read_state(store, bl);
 
@@ -2998,14 +3133,14 @@ void OSD::load_pgs()
     dout(1) << __func__ << " removing legacy infos object" << dendl;
     ObjectStore::Transaction t;
     t.remove(coll_t::meta(), OSD::make_infos_oid());
-    int r = store->apply_transaction(service.meta_osr.get(), t);
+    int r = store->apply_transaction(service.meta_osr.get(), std::move(t));
     if (r != 0) {
       derr << __func__ << ": apply_transaction returned "
 	   << cpp_strerror(r) << dendl;
       assert(0);
     }
   }
-  
+
   build_past_intervals_parallel();
 }
 
@@ -3140,6 +3275,10 @@ void OSD::build_past_intervals_parallel()
 
     // Verify same_interval_since is correct
     if (pg->info.history.same_interval_since) {
+      if (pg->info.history.same_interval_since != p.same_interval_since) {
+	dout(0) << __func__ << " history same_interval_since " << pg->info.history.same_interval_since << dendl;
+	dout(0) << __func__ << " same_interval_since " << p.same_interval_since << " pg " << *pg << dendl;
+      }
       assert(pg->info.history.same_interval_since == p.same_interval_since);
     } else {
       assert(p.same_interval_since);
@@ -3167,13 +3306,13 @@ void OSD::build_past_intervals_parallel()
 
     // don't let the transaction get too big
     if (++num >= cct->_conf->osd_target_transaction_size) {
-      store->apply_transaction(service.meta_osr.get(), t);
+      store->apply_transaction(service.meta_osr.get(), std::move(t));
       t = ObjectStore::Transaction();
       num = 0;
     }
   }
   if (!t.empty())
-    store->apply_transaction(service.meta_osr.get(), t);
+    store->apply_transaction(service.meta_osr.get(), std::move(t));
 }
 
 /*
@@ -3389,11 +3528,11 @@ bool OSD::project_pg_history(spg_t pgid, pg_history_t& h, epoch_t from,
 	 upprimary != currentupprimary ||
 	 acting != currentacting ||
 	 up != currentup) && e > h.same_interval_since) {
-      dout(15) << "project_pg_history " << pgid << " acting|up changed in " << e 
+      dout(15) << "project_pg_history " << pgid << " acting|up changed in " << e
 	       << " from " << acting << "/" << up
 	       << " " << actingprimary << "/" << upprimary
 	       << " -> " << currentacting << "/" << currentup
-	       << " " << currentactingprimary << "/" << currentupprimary 
+	       << " " << currentactingprimary << "/" << currentupprimary
 	       << dendl;
       h.same_interval_since = e;
     }
@@ -3406,7 +3545,7 @@ bool OSD::project_pg_history(spg_t pgid, pg_history_t& h, epoch_t from,
     // up set change?
     if ((up != currentup || upprimary != currentupprimary)
 	&& e > h.same_up_since) {
-      dout(15) << "project_pg_history " << pgid << " up changed in " << e 
+      dout(15) << "project_pg_history " << pgid << " up changed in " << e
 	       << " from " << up << " " << upprimary
 	       << " -> " << currentup << " " << currentupprimary << dendl;
       h.same_up_since = e;
@@ -3646,7 +3785,7 @@ void OSD::handle_osd_ping(MOSDPing *m)
   }
 
   OSDMapRef curmap = service.get_osdmap();
-  
+
   switch (m->op) {
 
   case MOSDPing::PING:
@@ -3793,7 +3932,7 @@ void OSD::heartbeat_check()
   assert(heartbeat_lock.is_locked());
   utime_t now = ceph_clock_now(cct);
   double age = hbclient_messenger->get_dispatch_queue_max_age(now);
-  if (age > (cct->_conf->osd_heartbeat_grace / 2)) {
+  if (age > ((double)cct->_conf->osd_heartbeat_grace / 2.0)) {
     derr << "skipping heartbeat_check, hbqueue max age: " << age << dendl;
     return; // hb dispatch is too backed up for our hb status to be meaningful
   }
@@ -3883,7 +4022,7 @@ void OSD::heartbeat()
 
   logger->set(l_osd_hb_to, heartbeat_peers.size());
   logger->set(l_osd_hb_from, 0);
-  
+
   // hmm.. am i all alone?
   dout(30) << "heartbeat lonely?" << dendl;
   if (heartbeat_peers.empty()) {
@@ -4103,7 +4242,7 @@ void TestOpsSocketHook::test_ops(OSDService *service, ObjectStore *store,
     pg_t rawpg;
     int64_t pool;
     OSDMapRef curmap = service->get_osdmap();
-    int r;
+    int r = -1;
 
     string poolstr;
 
@@ -4116,7 +4255,7 @@ void TestOpsSocketHook::test_ops(OSDService *service, ObjectStore *store,
       ss << "Invalid pool" << poolstr;
       return;
     }
-    r = -1;
+
     string objname, nspace;
     cmd_getval(service->cct, cmdmap, "objname", objname);
     std::size_t found = objname.find_first_of('/');
@@ -4156,7 +4295,7 @@ void TestOpsSocketHook::test_ops(OSDService *service, ObjectStore *store,
       val.append(valstr);
       newattrs[key] = val;
       t.omap_setkeys(coll_t(pgid), ghobject_t(obj), newattrs);
-      r = store->apply_transaction(service->meta_osr.get(), t);
+      r = store->apply_transaction(service->meta_osr.get(), std::move(t));
       if (r < 0)
         ss << "error=" << r;
       else
@@ -4168,7 +4307,7 @@ void TestOpsSocketHook::test_ops(OSDService *service, ObjectStore *store,
 
       keys.insert(key);
       t.omap_rmkeys(coll_t(pgid), ghobject_t(obj), keys);
-      r = store->apply_transaction(service->meta_osr.get(), t);
+      r = store->apply_transaction(service->meta_osr.get(), std::move(t));
       if (r < 0)
         ss << "error=" << r;
       else
@@ -4180,7 +4319,7 @@ void TestOpsSocketHook::test_ops(OSDService *service, ObjectStore *store,
       cmd_getval(service->cct, cmdmap, "header", headerstr);
       newheader.append(headerstr);
       t.omap_setheader(coll_t(pgid), ghobject_t(obj), newheader);
-      r = store->apply_transaction(service->meta_osr.get(), t);
+      r = store->apply_transaction(service->meta_osr.get(), std::move(t));
       if (r < 0)
         ss << "error=" << r;
       else
@@ -4203,7 +4342,7 @@ void TestOpsSocketHook::test_ops(OSDService *service, ObjectStore *store,
       int64_t trunclen;
       cmd_getval(service->cct, cmdmap, "len", trunclen);
       t.truncate(coll_t(pgid), ghobject_t(obj), trunclen);
-      r = store->apply_transaction(service->meta_osr.get(), t);
+      r = store->apply_transaction(service->meta_osr.get(), std::move(t));
       if (r < 0)
 	ss << "error=" << r;
       else
@@ -4275,7 +4414,7 @@ bool remove_dir(
     t.remove(coll, *i);
     if (num >= cct->_conf->osd_target_transaction_size) {
       C_SaferCond waiter;
-      store->queue_transaction(osr, &t, &waiter);
+      store->queue_transaction(osr, std::move(t), &waiter);
       bool cont = dstate->pause_clearing();
       handle.suspend_tp_timeout();
       waiter.wait();
@@ -4288,9 +4427,8 @@ bool remove_dir(
       num = 0;
     }
   }
-
   C_SaferCond waiter;
-  store->queue_transaction(osr, &t, &waiter);
+  store->queue_transaction(osr, std::move(t), &waiter);
   bool cont = dstate->pause_clearing();
   handle.suspend_tp_timeout();
   waiter.wait();
@@ -4330,24 +4468,23 @@ void OSD::RemoveWQ::_process(
   if (!item.second->start_deleting())
     return;
 
-  ObjectStore::Transaction *t = new ObjectStore::Transaction;
-  PGLog::clear_info_log(pg->info.pgid, t);
+  ObjectStore::Transaction t;
+  PGLog::clear_info_log(pg->info.pgid, &t);
 
   if (g_conf->osd_inject_failure_on_pg_removal) {
     generic_derr << "osd_inject_failure_on_pg_removal" << dendl;
     exit(1);
   }
-  t->remove_collection(coll);
+  t.remove_collection(coll);
 
   // We need the sequencer to stick around until the op is complete
   store->queue_transaction(
     pg->osr.get(),
-    t,
+    std::move(t),
     0, // onapplied
     0, // oncommit
     0, // onreadable sync
-    new ObjectStore::C_DeleteTransactionHolder<PGRef>(
-      t, pg), // oncomplete
+    new ContainerContext<PGRef>(pg),
     TrackedOpRef());
 
   item.second->finish_deleting();
@@ -4447,7 +4584,7 @@ bool OSD::ms_handle_reset(Connection *con)
 struct C_OSD_GetVersion : public Context {
   OSD *osd;
   uint64_t oldest, newest;
-  C_OSD_GetVersion(OSD *o) : osd(o), oldest(0), newest(0) {}
+  explicit C_OSD_GetVersion(OSD *o) : osd(o), oldest(0), newest(0) {}
   void finish(int r) {
     if (r >= 0)
       osd->_got_mon_epochs(oldest, newest);
@@ -4502,7 +4639,7 @@ void OSD::_preboot(epoch_t oldest, epoch_t newest)
     _send_boot();
     return;
   }
-  
+
   // get all the latest maps
   if (osdmap->get_epoch() + 1 >= oldest)
     osdmap_subscribe(osdmap->get_epoch() + 1, false);
@@ -4637,13 +4774,13 @@ void OSD::queue_want_up_thru(epoch_t want)
   epoch_t cur = osdmap->get_up_thru(whoami);
   Mutex::Locker l(mon_report_lock);
   if (want > up_thru_wanted) {
-    dout(10) << "queue_want_up_thru now " << want << " (was " << up_thru_wanted << ")" 
+    dout(10) << "queue_want_up_thru now " << want << " (was " << up_thru_wanted << ")"
 	     << ", currently " << cur
 	     << dendl;
     up_thru_wanted = want;
     send_alive();
   } else {
-    dout(10) << "queue_want_up_thru want " << want << " <= queued " << up_thru_wanted 
+    dout(10) << "queue_want_up_thru want " << want << " <= queued " << up_thru_wanted
 	     << ", currently " << cur
 	     << dendl;
   }
@@ -4772,7 +4909,7 @@ void OSD::send_pg_stats(const utime_t &now)
   osd_stat_t cur_stat = service.get_osd_stat();
 
   cur_stat.fs_perf_stat = store->get_cur_stats();
-   
+
   pg_stat_queue_lock.Lock();
 
   if (osd_stat_updated || !pg_stat_queue.empty()) {
@@ -4956,7 +5093,7 @@ struct OSDCommand {
 
 // yes, these are really pg commands, but there's a limit to how
 // much work it's worth.  The OSD returns all of them.  Make this
-// form (pg <pgid> <cmd>) valid only for the cli. 
+// form (pg <pgid> <cmd>) valid only for the cli.
 // Rest uses "tell <pgid> <cmd>"
 
 COMMAND("pg " \
@@ -4976,7 +5113,7 @@ COMMAND("pg " \
 	"list missing objects on this pg, perhaps starting at an offset given in JSON",
 	"osd", "r", "cli")
 
-// new form: tell <pgid> <cmd> for both cli and rest 
+// new form: tell <pgid> <cmd> for both cli and rest
 
 COMMAND("query",
 	"show details of a specific pg", "osd", "r", "cli,rest")
@@ -5245,30 +5382,30 @@ void OSD::do_command(Connection *con, ceph_tid_t tid, vector<string>& cmd, buffe
     dout(1) << " bench count " << count
             << " bsize " << prettybyte_t(bsize) << dendl;
 
-    ObjectStore::Transaction *cleanupt = new ObjectStore::Transaction;
+    ObjectStore::Transaction cleanupt;
 
     if (osize && onum) {
       bufferlist bl;
       bufferptr bp(osize);
       bp.zero();
-      bl.push_back(bp);
+      bl.push_back(std::move(bp));
       bl.rebuild_page_aligned();
       for (int i=0; i<onum; ++i) {
 	char nm[30];
 	snprintf(nm, sizeof(nm), "disk_bw_test_%d", i);
 	object_t oid(nm);
 	hobject_t soid(sobject_t(oid, 0));
-	ObjectStore::Transaction *t = new ObjectStore::Transaction;
-	t->write(coll_t(), ghobject_t(soid), 0, osize, bl);
-	store->queue_transaction_and_cleanup(osr.get(), t);
-	cleanupt->remove(coll_t(), ghobject_t(soid));
+	ObjectStore::Transaction t;
+	t.write(coll_t(), ghobject_t(soid), 0, osize, bl);
+	store->queue_transaction(osr.get(), std::move(t), NULL);
+	cleanupt.remove(coll_t(), ghobject_t(soid));
       }
     }
 
     bufferlist bl;
     bufferptr bp(bsize);
     bp.zero();
-    bl.push_back(bp);
+    bl.push_back(std::move(bp));
     bl.rebuild_page_aligned();
 
     {
@@ -5290,11 +5427,11 @@ void OSD::do_command(Connection *con, ceph_tid_t tid, vector<string>& cmd, buffe
       }
       object_t oid(nm);
       hobject_t soid(sobject_t(oid, 0));
-      ObjectStore::Transaction *t = new ObjectStore::Transaction;
-      t->write(coll_t::meta(), ghobject_t(soid), offset, bsize, bl);
-      store->queue_transaction_and_cleanup(osr.get(), t);
+      ObjectStore::Transaction t;
+      t.write(coll_t::meta(), ghobject_t(soid), offset, bsize, bl);
+      store->queue_transaction(osr.get(), std::move(t), NULL);
       if (!onum || !osize)
-	cleanupt->remove(coll_t::meta(), ghobject_t(soid));
+	cleanupt.remove(coll_t::meta(), ghobject_t(soid));
     }
 
     {
@@ -5306,7 +5443,7 @@ void OSD::do_command(Connection *con, ceph_tid_t tid, vector<string>& cmd, buffe
     utime_t end = ceph_clock_now(cct);
 
     // clean up
-    store->queue_transaction_and_cleanup(osr.get(), cleanupt);
+    store->queue_transaction(osr.get(), std::move(cleanupt), NULL);
     {
       C_SaferCond waiter;
       if (!osr->flush_commit(&waiter)) {
@@ -5332,7 +5469,7 @@ void OSD::do_command(Connection *con, ceph_tid_t tid, vector<string>& cmd, buffe
   else if (prefix == "flush_pg_stats") {
     flush_pg_stats();
   }
-  
+
   else if (prefix == "heap") {
     if (!ceph_using_tcmalloc()) {
       r = -EOPNOTSUPP;
@@ -5464,7 +5601,7 @@ bool OSD::heartbeat_dispatch(Message *m)
 {
   dout(30) << "heartbeat_dispatch " << m << dendl;
   switch (m->get_type()) {
-    
+
   case CEPH_MSG_PING:
     dout(10) << "ping from " << m->get_source_inst() << dendl;
     m->put();
@@ -5725,7 +5862,7 @@ bool OSD::ms_verify_authorizer(Connection *con, int peer_type,
     if (caps_info.allow_all)
       s->caps.set_allow_all();
     s->auid = auid;
- 
+
     if (caps_info.caps.length() > 0) {
       bufferlist::iterator p = caps_info.caps.begin();
       string str;
@@ -5740,7 +5877,7 @@ bool OSD::ms_verify_authorizer(Connection *con, int peer_type,
       else
 	dout(10) << " session " << s << " " << s->entity_name << " failed to parse caps '" << str << "'" << dendl;
     }
-    
+
     s->put();
   }
   return true;
@@ -5749,7 +5886,7 @@ bool OSD::ms_verify_authorizer(Connection *con, int peer_type,
 void OSD::do_waiters()
 {
   assert(osd_lock.is_locked());
-  
+
   dout(10) << "do_waiters -- start" << dendl;
   finished_lock.Lock();
   while (!finished.empty()) {
@@ -5947,7 +6084,7 @@ void OSD::_dispatch(Message *m)
 
   switch (m->get_type()) {
 
-    // -- don't need lock -- 
+    // -- don't need lock --
   case CEPH_MSG_PING:
     dout(10) << "ping from " << m->get_source() << dendl;
     m->put();
@@ -5989,7 +6126,7 @@ void OSD::_dispatch(Message *m)
 	op->mark_delayed("no osdmap");
         break;
       }
-      
+
       // need OSDMap
       dispatch_op(op);
     }
@@ -6050,7 +6187,7 @@ void OSD::handle_scrub(MOSDScrub *m)
       }
     }
   }
-  
+
   m->put();
 }
 
@@ -6096,18 +6233,18 @@ bool OSDService::ScrubJob::ScrubJob::operator<(const OSDService::ScrubJob& rhs)
 
 bool OSD::scrub_time_permit(utime_t now)
 {
-  struct tm bdt; 
+  struct tm bdt;
   time_t tt = now.sec();
   localtime_r(&tt, &bdt);
   bool time_permit = false;
   if (cct->_conf->osd_scrub_begin_hour < cct->_conf->osd_scrub_end_hour) {
     if (bdt.tm_hour >= cct->_conf->osd_scrub_begin_hour && bdt.tm_hour < cct->_conf->osd_scrub_end_hour) {
       time_permit = true;
-    }    
+    }
   } else {
     if (bdt.tm_hour >= cct->_conf->osd_scrub_begin_hour || bdt.tm_hour < cct->_conf->osd_scrub_end_hour) {
       time_permit = true;
-    }    
+    }
   }
   if (!time_permit) {
     dout(20) << __func__ << " should run between " << cct->_conf->osd_scrub_begin_hour
@@ -6194,7 +6331,7 @@ void OSD::sched_scrub()
       }
       pg->unlock();
     } while (service.next_scrub_stamp(scrub, &scrub));
-  }    
+  }
   dout(20) << "sched_scrub done" << dendl;
 }
 
@@ -6209,7 +6346,7 @@ void OSD::wait_for_new_map(OpRequestRef op)
   if (waiting_for_osdmap.empty()) {
     osdmap_subscribe(osdmap->get_epoch() + 1, false);
   }
-  
+
   logger->inc(l_osd_waiting_for_map);
   waiting_for_osdmap.push_back(op);
   op->mark_delayed("wait for new map");
@@ -6246,14 +6383,12 @@ void OSD::note_up_osd(int peer)
 
 struct C_OnMapApply : public Context {
   OSDService *service;
-  boost::scoped_ptr<ObjectStore::Transaction> t;
   list<OSDMapRef> pinned_maps;
   epoch_t e;
   C_OnMapApply(OSDService *service,
-	       ObjectStore::Transaction *t,
 	       const list<OSDMapRef> &pinned_maps,
 	       epoch_t e)
-    : service(service), t(t), pinned_maps(pinned_maps), e(e) {}
+    : service(service), pinned_maps(pinned_maps), e(e) {}
   void finish(int r) {
     service->clear_map_bl_cache_pins(e);
   }
@@ -6342,8 +6477,7 @@ void OSD::handle_osd_map(MOSDMap *m)
     skip_maps = true;
   }
 
-  ObjectStore::Transaction *_t = new ObjectStore::Transaction;
-  ObjectStore::Transaction &t = *_t;
+  ObjectStore::Transaction t;
 
   // store new maps: queue for disk and put in the osdmap cache
   epoch_t start = MAX(osdmap->get_epoch() + 1, first);
@@ -6354,7 +6488,7 @@ void OSD::handle_osd_map(MOSDMap *m)
       dout(10) << "handle_osd_map  got full map for epoch " << e << dendl;
       OSDMap *o = new OSDMap;
       bufferlist& bl = p->second;
-      
+
       o->decode(bl);
 
       ghobject_t fulloid = get_osdmap_pobject_name(e);
@@ -6429,7 +6563,6 @@ void OSD::handle_osd_map(MOSDMap *m)
 
   if (last <= osdmap->get_epoch()) {
     dout(10) << " no new maps here, dropping" << dendl;
-    delete _t;
     m->put();
     return;
   }
@@ -6482,7 +6615,7 @@ void OSD::handle_osd_map(MOSDMap *m)
 	note_down_osd(*p);
       }
     }
-    
+
     osdmap = newmap;
 
     superblock.current_epoch = cur;
@@ -6544,7 +6677,7 @@ void OSD::handle_osd_map(MOSDMap *m)
 	clog->error() << "map e" << osdmap->get_epoch()
 		    << " had wrong hb front addr (" << osdmap->get_hb_front_addr(whoami)
 		     << " != my " << hb_front_server_messenger->get_myaddr() << ")";
-      
+
       if (!service.is_stopping()) {
         epoch_t up_epoch = 0;
         epoch_t bind_epoch = osdmap->get_epoch();
@@ -6607,8 +6740,8 @@ void OSD::handle_osd_map(MOSDMap *m)
   write_superblock(t);
   store->queue_transaction(
     service.meta_osr.get(),
-    _t,
-    new C_OnMapApply(&service, _t, pinned_maps, osdmap->get_epoch()),
+    std::move(t),
+    new C_OnMapApply(&service, pinned_maps, osdmap->get_epoch()),
     0, 0);
   service.publish_superblock(superblock);
 
@@ -6635,14 +6768,14 @@ void OSD::handle_osd_map(MOSDMap *m)
   }
   else if (do_shutdown) {
     if (network_error) {
-      Mutex::Locker l(heartbeat_lock);	
+      Mutex::Locker l(heartbeat_lock);
       map<int,pair<utime_t,entity_inst_t>>::iterator it = failure_pending.begin();
       while (it != failure_pending.end()) {
         dout(10) << "handle_osd_ping canceling in-flight failure report for osd." << it->first << dendl;
         send_still_alive(osdmap->get_epoch(), it->second.second);
         failure_pending.erase(it++);
       }
-    }	
+    }
     osd_lock.Unlock();
     shutdown();
     osd_lock.Lock();
@@ -6709,9 +6842,9 @@ void OSD::check_osdmap_features(ObjectStore *fs)
 	!superblock.compat_features.incompat.contains(CEPH_OSD_FEATURE_INCOMPAT_SHARDS)) {
       dout(0) << __func__ << " enabling on-disk ERASURE CODES compat feature" << dendl;
       superblock.compat_features.incompat.insert(CEPH_OSD_FEATURE_INCOMPAT_SHARDS);
-      ObjectStore::Transaction *t = new ObjectStore::Transaction;
-      write_superblock(*t);
-      int err = store->queue_transaction_and_cleanup(service.meta_osr.get(), t);
+      ObjectStore::Transaction t;
+      write_superblock(t);
+      int err = store->queue_transaction(service.meta_osr.get(), std::move(t), NULL);
       assert(err == 0);
     }
   }
@@ -6788,7 +6921,7 @@ bool OSD::advance_pg(
   return true;
 }
 
-/** 
+/**
  * update service map; check pg creations
  */
 void OSD::advance_map()
@@ -6872,11 +7005,7 @@ void OSD::consume_map()
   for (set<spg_t>::iterator p = pgs_to_check.begin();
        p != pgs_to_check.end();
        ++p) {
-    vector<int> acting;
-    int nrep = osdmap->pg_to_acting_osds(p->pgid, acting);
-    int role = osdmap->calc_pg_role(whoami, acting, nrep);
-
-    if (role < 0) {
+    if (!(osdmap->is_acting_osd_shard(p->pgid, whoami, p->shard))) {
       set<Session*> concerned_sessions;
       get_sessions_possibly_interested_in_pg(*p, &concerned_sessions);
       for (set<Session*>::iterator i = concerned_sessions.begin();
@@ -6985,7 +7114,7 @@ bool OSD::require_same_peer_instance(Message *m, OSDMapRef& map,
 				     bool is_fast_dispatch)
 {
   int from = m->get_source().num();
-  
+
   if (!map->have_inst(from) ||
       (map->get_cluster_addr(from) != m->get_source_inst().addr)) {
     dout(5) << "from dead osd." << from << ", marking down, "
@@ -7078,6 +7207,7 @@ void OSD::split_pgs(
     PG* child = _make_pg(nextmap, *i);
     child->lock(true);
     out_pgs->insert(child);
+    rctx->created_pgs.insert(child);
 
     unsigned split_bits = i->get_split_bits(pg_num);
     dout(10) << "pg_num is " << pg_num << dendl;
@@ -7103,7 +7233,7 @@ void OSD::split_pgs(
   parent->info.stats.stats.sum = *stat_iter;
   parent->write_if_dirty(*(rctx->transaction));
 }
-  
+
 /*
  * holding osd_lock
  */
@@ -7151,7 +7281,7 @@ void OSD::handle_pg_create(OpRequestRef op)
     }
 
     dout(20) << "mkpg " << on << " e" << created << "@" << ci->second << dendl;
-   
+
     // is it still ours?
     vector<int> up, acting;
     int up_primary = -1;
@@ -7239,15 +7369,32 @@ PG::RecoveryCtx OSD::create_context()
   return rctx;
 }
 
+struct C_OpenPGs : public Context {
+  set<PGRef> pgs;
+  ObjectStore *store;
+  C_OpenPGs(set<PGRef>& p, ObjectStore *s) : store(s) {
+    pgs.swap(p);
+  }
+  void finish(int r) {
+    for (auto p : pgs) {
+      p->ch = store->open_collection(p->coll);
+      assert(p->ch);
+    }
+  }
+};
+
 void OSD::dispatch_context_transaction(PG::RecoveryCtx &ctx, PG *pg,
                                        ThreadPool::TPHandle *handle)
 {
   if (!ctx.transaction->empty()) {
-    ctx.on_applied->add(new ObjectStore::C_DeleteTransaction(ctx.transaction));
+    if (!ctx.created_pgs.empty()) {
+      ctx.on_applied->add(new C_OpenPGs(ctx.created_pgs, store));
+    }
     int tr = store->queue_transaction(
       pg->osr.get(),
-      ctx.transaction, ctx.on_applied, ctx.on_safe, NULL,
+      std::move(*ctx.transaction), ctx.on_applied, ctx.on_safe, NULL,
       TrackedOpRef(), handle);
+    delete (ctx.transaction);
     assert(tr == 0);
     ctx.transaction = new ObjectStore::Transaction;
     ctx.on_applied = new C_Contexts(cct);
@@ -7269,16 +7416,21 @@ void OSD::dispatch_context(PG::RecoveryCtx &ctx, PG *pg, OSDMapRef curmap,
   delete ctx.info_map;
   if ((ctx.on_applied->empty() &&
        ctx.on_safe->empty() &&
-       ctx.transaction->empty()) || !pg) {
+       ctx.transaction->empty() &&
+       ctx.created_pgs.empty()) || !pg) {
     delete ctx.transaction;
     delete ctx.on_applied;
     delete ctx.on_safe;
+    assert(ctx.created_pgs.empty());
   } else {
-    ctx.on_applied->add(new ObjectStore::C_DeleteTransaction(ctx.transaction));
+    if (!ctx.created_pgs.empty()) {
+      ctx.on_applied->add(new C_OpenPGs(ctx.created_pgs, store));
+    }
     int tr = store->queue_transaction(
       pg->osr.get(),
-      ctx.transaction, ctx.on_applied, ctx.on_safe, NULL, TrackedOpRef(),
+      std::move(*ctx.transaction), ctx.on_applied, ctx.on_safe, NULL, TrackedOpRef(),
       handle);
+    delete (ctx.transaction);
     assert(tr == 0);
   }
 }
@@ -7355,7 +7507,7 @@ void OSD::do_infos(map<int,
 	   vector<pair<pg_notify_t, pg_interval_map_t> > >::iterator p =
 	 info_map.begin();
        p != info_map.end();
-       ++p) { 
+       ++p) {
     if (!curmap->is_up(p->first)) {
       dout(20) << __func__ << " skipping down osd." << p->first << dendl;
       continue;
@@ -7535,15 +7687,14 @@ void OSD::handle_pg_trim(OpRequestRef op)
       }
     } else {
       // primary is instructing us to trim
-      ObjectStore::Transaction *t = new ObjectStore::Transaction;
+      ObjectStore::Transaction t;
       PG::PGLogEntryHandler handler;
       pg->pg_log.trim(&handler, m->trim_to, pg->info);
-      handler.apply(pg, t);
+      handler.apply(pg, &t);
       pg->dirty_info = true;
-      pg->write_if_dirty(*t);
+      pg->write_if_dirty(t);
       int tr = store->queue_transaction(
-	pg->osr.get(), t,
-	new ObjectStore::C_DeleteTransaction(t));
+	pg->osr.get(), std::move(t), NULL);
       assert(tr == 0);
     }
     pg->unlock();
@@ -7665,14 +7816,14 @@ void OSD::handle_pg_query(OpRequestRef op)
 
   dout(7) << "handle_pg_query from " << m->get_source() << " epoch " << m->get_epoch() << dendl;
   int from = m->get_source().num();
-  
+
   if (!require_same_or_newer_map(op, m->get_epoch(), false))
     return;
 
   op->mark_started();
 
   map< int, vector<pair<pg_notify_t, pg_interval_map_t> > > notify_list;
-  
+
   for (map<spg_t,pg_query_t>::iterator it = m->pg_list.begin();
        it != m->pg_list.end();
        ++it) {
@@ -7773,10 +7924,10 @@ void OSD::handle_pg_remove(OpRequestRef op)
 
   dout(7) << "handle_pg_remove from " << m->get_source() << " on "
 	  << m->pg_list.size() << " pgs" << dendl;
-  
+
   if (!require_same_or_newer_map(op, m->get_epoch(), false))
     return;
-  
+
   op->mark_started();
 
   for (vector<spg_t>::iterator it = m->pg_list.begin();
@@ -7787,7 +7938,7 @@ void OSD::handle_pg_remove(OpRequestRef op)
       dout(10) << "ignoring localized pg " << pgid << dendl;
       continue;
     }
-    
+
     RWLock::WLocker l(pg_map_lock);
     if (pg_map.count(pgid) == 0) {
       dout(10) << " don't have pg " << pgid << dendl;
@@ -7820,20 +7971,18 @@ void OSD::handle_pg_remove(OpRequestRef op)
 
 void OSD::_remove_pg(PG *pg)
 {
-  ObjectStore::Transaction *rmt = new ObjectStore::Transaction;
+  ObjectStore::Transaction rmt ;
 
   // on_removal, which calls remove_watchers_and_notifies, and the erasure from
   // the pg_map must be done together without unlocking the pg lock,
   // to avoid racing with watcher cleanup in ms_handle_reset
   // and handle_notify_timeout
-  pg->on_removal(rmt);
+  pg->on_removal(&rmt);
 
   service.cancel_pending_splits_for_parent(pg->info.pgid);
 
   store->queue_transaction(
-    pg->osr.get(), rmt,
-    new ObjectStore::C_DeleteTransactionHolder<
-      SequencerRef>(rmt, pg->osr),
+    pg->osr.get(), std::move(rmt), NULL, 
     new ContainerContext<
       SequencerRef>(pg->osr));
 
@@ -7944,12 +8093,12 @@ void OSD::do_recovery(PG *pg, ThreadPool::TPHandle &handle)
       pg->unlock();
       goto out;
     }
-    
+
     dout(10) << "do_recovery starting " << max << " " << *pg << dendl;
 #ifdef DEBUG_RECOVERY_OIDS
     dout(20) << "  active was " << recovery_oids[pg->info.pgid] << dendl;
 #endif
-    
+
     int started = 0;
     bool more = pg->start_recovery_ops(max, handle, &started);
     dout(10) << "do_recovery started " << started << "/" << max << " on " << *pg << dendl;
@@ -8074,7 +8223,7 @@ public:
     if (session) {
       session->sent_epoch_lock.lock();
       if (session->last_sent_epoch < last_sent_epoch) {
-	session->last_sent_epoch = last_sent_epoch;	
+	session->last_sent_epoch = last_sent_epoch;
       }
       session->sent_epoch_lock.unlock();
       session->put();
@@ -8219,7 +8368,7 @@ void OSD::handle_replica_op(OpRequestRef& op, OSDMapRef& osdmap)
 
   // must be a rep op.
   assert(m->get_source().is_osd());
-  
+
   // share our map with sender, if they're old
   bool should_share_map = false;
   Session *peer_session =
@@ -8280,22 +8429,22 @@ void OSD::ShardedOpWQ::_process(uint32_t thread_index, heartbeat_handle_d *hb )
   ShardData* sdata = shard_list[shard_index];
   assert(NULL != sdata);
   sdata->sdata_op_ordering_lock.Lock();
-  if (sdata->pqueue.empty()) {
+  if (sdata->pqueue->empty()) {
     sdata->sdata_op_ordering_lock.Unlock();
     osd->cct->get_heartbeat_map()->reset_timeout(hb, 4, 0);
     sdata->sdata_lock.Lock();
     sdata->sdata_cond.WaitInterval(osd->cct, sdata->sdata_lock, utime_t(2, 0));
     sdata->sdata_lock.Unlock();
     sdata->sdata_op_ordering_lock.Lock();
-    if(sdata->pqueue.empty()) {
+    if(sdata->pqueue->empty()) {
       sdata->sdata_op_ordering_lock.Unlock();
       return;
     }
   }
-  pair<PGRef, PGQueueable> item = sdata->pqueue.dequeue();
+  pair<PGRef, PGQueueable> item = sdata->pqueue->dequeue();
   sdata->pg_for_processing[&*(item.first)].push_back(item.second);
   sdata->sdata_op_ordering_lock.Unlock();
-  ThreadPool::TPHandle tp_handle(osd->cct, hb, timeout_interval, 
+  ThreadPool::TPHandle tp_handle(osd->cct, hb, timeout_interval,
     suicide_interval);
 
   (item.first)->lock_suspend_timeout(tp_handle);
@@ -8312,7 +8461,7 @@ void OSD::ShardedOpWQ::_process(uint32_t thread_index, heartbeat_handle_d *hb )
     sdata->pg_for_processing[&*(item.first)].pop_front();
     if (!(sdata->pg_for_processing[&*(item.first)].size()))
       sdata->pg_for_processing.erase(&*(item.first));
-  }  
+  }
 
   // osd:opwq_process marks the point at which an operation has been dequeued
   // and will begin to be handled by a worker thread.
@@ -8362,11 +8511,11 @@ void OSD::ShardedOpWQ::_enqueue(pair<PGRef, PGQueueable> item) {
   unsigned cost = item.second.get_cost();
   sdata->sdata_op_ordering_lock.Lock();
  
-  if (priority >= CEPH_MSG_PRIO_LOW)
-    sdata->pqueue.enqueue_strict(
+  if (priority >= osd->op_prio_cutoff)
+    sdata->pqueue->enqueue_strict(
       item.second.get_owner(), priority, item);
   else
-    sdata->pqueue.enqueue(
+    sdata->pqueue->enqueue(
       item.second.get_owner(),
       priority, cost, item);
   sdata->sdata_op_ordering_lock.Unlock();
@@ -8391,12 +8540,12 @@ void OSD::ShardedOpWQ::_enqueue_front(pair<PGRef, PGQueueable> item) {
   }
   unsigned priority = item.second.get_priority();
   unsigned cost = item.second.get_cost();
-  if (priority >= CEPH_MSG_PRIO_LOW)
-    sdata->pqueue.enqueue_strict_front(
+  if (priority >= osd->op_prio_cutoff)
+    sdata->pqueue->enqueue_strict_front(
       item.second.get_owner(),
       priority, item);
   else
-    sdata->pqueue.enqueue_front(
+    sdata->pqueue->enqueue_front(
       item.second.get_owner(),
       priority, cost, item);
 
@@ -8565,6 +8714,12 @@ const char** OSD::get_tracked_conf_keys() const
     "clog_to_syslog",
     "clog_to_syslog_facility",
     "clog_to_syslog_level",
+    "osd_objectstore_fuse",
+    "clog_to_graylog",
+    "clog_to_graylog_host",
+    "clog_to_graylog_port",
+    "host",
+    "fsid",
     NULL
   };
   return KEYS;
@@ -8606,10 +8761,21 @@ void OSD::handle_conf_change(const struct md_config_t *conf,
   if (changed.count("clog_to_monitors") ||
       changed.count("clog_to_syslog") ||
       changed.count("clog_to_syslog_level") ||
-      changed.count("clog_to_syslog_facility")) {
+      changed.count("clog_to_syslog_facility") ||
+      changed.count("clog_to_graylog") ||
+      changed.count("clog_to_graylog_host") ||
+      changed.count("clog_to_graylog_port") ||
+      changed.count("host") ||
+      changed.count("fsid")) {
     update_log_config();
   }
-
+#ifdef HAVE_LIBFUSE
+  if (changed.count("osd_objectstore_fuse")) {
+    if (store) {
+      enable_disable_fuse(false);
+    }
+  }
+#endif
   check_config();
 }
 
@@ -8619,10 +8785,20 @@ void OSD::update_log_config()
   map<string,string> log_to_syslog;
   map<string,string> log_channel;
   map<string,string> log_prio;
+  map<string,string> log_to_graylog;
+  map<string,string> log_to_graylog_host;
+  map<string,string> log_to_graylog_port;
+  uuid_d fsid;
+  string host;
+
   if (parse_log_client_options(g_ceph_context, log_to_monitors, log_to_syslog,
-			       log_channel, log_prio) == 0)
+			       log_channel, log_prio, log_to_graylog,
+			       log_to_graylog_host, log_to_graylog_port,
+			       fsid, host) == 0)
     clog->update_config(log_to_monitors, log_to_syslog,
-			log_channel, log_prio);
+			log_channel, log_prio, log_to_graylog,
+			log_to_graylog_host, log_to_graylog_port,
+			fsid, host);
   derr << "log_to_monitors " << log_to_monitors << dendl;
 }
 
diff --git a/src/osd/OSD.h b/src/osd/OSD.h
index 04bbe00..169623a 100644
--- a/src/osd/OSD.h
+++ b/src/osd/OSD.h
@@ -52,7 +52,9 @@ using namespace std;
 #include "common/shared_cache.hpp"
 #include "common/simple_cache.hpp"
 #include "common/sharedptr_registry.hpp"
+#include "common/WeightedPriorityQueue.h"
 #include "common/PrioritizedQueue.h"
+#include "common/OpQueue.h"
 #include "messages/MOSDOp.h"
 #include "include/Spinlock.h"
 
@@ -202,6 +204,7 @@ class Message;
 class MonClient;
 class PerfCounters;
 class ObjectStore;
+class FuseStore;
 class OSDMap;
 class MLog;
 class MClass;
@@ -236,7 +239,7 @@ class DeletingState {
 public:
   const spg_t pgid;
   const PGRef old_pg_state;
-  DeletingState(const pair<spg_t, PGRef> &in) :
+  explicit DeletingState(const pair<spg_t, PGRef> &in) :
     lock("DeletingState::lock"), status(QUEUED), stop_deleting(false),
     pgid(in.first), old_pg_state(in.second) {
     }
@@ -327,7 +330,7 @@ class OSD;
 
 struct PGScrub {
   epoch_t epoch_queued;
-  PGScrub(epoch_t e) : epoch_queued(e) {}
+  explicit PGScrub(epoch_t e) : epoch_queued(e) {}
   ostream &operator<<(ostream &rhs) {
     return rhs << "PGScrub";
   }
@@ -335,7 +338,7 @@ struct PGScrub {
 
 struct PGSnapTrim {
   epoch_t epoch_queued;
-  PGSnapTrim(epoch_t e) : epoch_queued(e) {}
+  explicit PGSnapTrim(epoch_t e) : epoch_queued(e) {}
   ostream &operator<<(ostream &rhs) {
     return rhs << "PGSnapTrim";
   }
@@ -363,6 +366,7 @@ class PGQueueable {
     void operator()(PGScrub &op);
   };
 public:
+  // cppcheck-suppress noExplicitConstructor
   PGQueueable(OpRequestRef op)
     : qvariant(op), cost(op->get_req()->get_cost()),
       priority(op->get_req()->get_priority()),
@@ -666,7 +670,7 @@ public:
   bool agent_active;
   struct AgentThread : public Thread {
     OSDService *osd;
-    AgentThread(OSDService *o) : osd(o) {}
+    explicit AgentThread(OSDService *o) : osd(o) {}
     void *entry() {
       osd->agent_entry();
       return NULL;
@@ -1031,7 +1035,7 @@ public:
   }
 #endif
 
-  OSDService(OSD *osd);
+  explicit OSDService(OSD *osd);
   ~OSDService();
 };
 
@@ -1079,7 +1083,9 @@ protected:
   PerfCounters      *logger;
   PerfCounters      *recoverystate_perf;
   ObjectStore *store;
-
+#ifdef HAVE_LIBFUSE
+  FuseStore *fuse_store = nullptr;
+#endif
   LogClient log_client;
   LogChannelRef clog;
 
@@ -1089,7 +1095,7 @@ protected:
   class C_Tick : public Context {
     OSD *osd;
   public:
-    C_Tick(OSD *o) : osd(o) {}
+    explicit C_Tick(OSD *o) : osd(o) {}
     void finish(int r) {
       osd->tick();
     }
@@ -1098,7 +1104,7 @@ protected:
   class C_Tick_WithoutOSDLock : public Context {
     OSD *osd;
   public:
-    C_Tick_WithoutOSDLock(OSD *o) : osd(o) {}
+    explicit C_Tick_WithoutOSDLock(OSD *o) : osd(o) {}
     void finish(int r) {
       osd->tick_without_osd_lock();
     }
@@ -1313,7 +1319,7 @@ public:
     Spinlock received_map_lock;
     epoch_t received_map_epoch; // largest epoch seen in MOSDMap from here
 
-    Session(CephContext *cct) :
+    explicit Session(CephContext *cct) :
       RefCountedObject(cct),
       auid(-1), con(0),
       session_dispatch_lock("Session::session_dispatch_lock"), 
@@ -1506,7 +1512,7 @@ private:
   /// state attached to outgoing heartbeat connections
   struct HeartbeatSession : public RefCountedObject {
     int peer;
-    HeartbeatSession(int p) : peer(p) {}
+    explicit HeartbeatSession(int p) : peer(p) {}
   };
   Mutex heartbeat_lock;
   map<int, int> debug_heartbeat_drops_remaining;
@@ -1547,7 +1553,7 @@ private:
 
   struct T_Heartbeat : public Thread {
     OSD *osd;
-    T_Heartbeat(OSD *o) : osd(o) {}
+    explicit T_Heartbeat(OSD *o) : osd(o) {}
     void *entry() {
       osd->heartbeat_entry();
       return 0;
@@ -1559,7 +1565,7 @@ public:
 
   struct HeartbeatDispatcher : public Dispatcher {
     OSD *osd;
-    HeartbeatDispatcher(OSD *o) : Dispatcher(cct), osd(o) {}
+    explicit HeartbeatDispatcher(OSD *o) : Dispatcher(cct), osd(o) {}
     bool ms_dispatch(Message *m) {
       return osd->heartbeat_dispatch(m);
     }
@@ -1606,6 +1612,11 @@ private:
   friend struct C_CompleteSplits;
 
   // -- op queue --
+  enum io_queue {
+    prioritized,
+    weightedpriority};
+  const io_queue op_queue;
+  const unsigned int op_prio_cutoff;
 
   friend class PGQueueable;
   class ShardedOpWQ: public ShardedThreadPool::ShardedWQ < pair <PGRef, PGQueueable> > {
@@ -1615,13 +1626,25 @@ private:
       Cond sdata_cond;
       Mutex sdata_op_ordering_lock;
       map<PG*, list<PGQueueable> > pg_for_processing;
-      PrioritizedQueue< pair<PGRef, PGQueueable>, entity_inst_t> pqueue;
+      std::unique_ptr<OpQueue< pair<PGRef, PGQueueable>, entity_inst_t>> pqueue;
       ShardData(
 	string lock_name, string ordering_lock,
-	uint64_t max_tok_per_prio, uint64_t min_cost, CephContext *cct)
+	uint64_t max_tok_per_prio, uint64_t min_cost, CephContext *cct,
+	io_queue opqueue)
 	: sdata_lock(lock_name.c_str(), false, true, false, cct),
-	  sdata_op_ordering_lock(ordering_lock.c_str(), false, true, false, cct),
-	  pqueue(max_tok_per_prio, min_cost) {}
+	  sdata_op_ordering_lock(ordering_lock.c_str(), false, true, false, cct) {
+	    if (opqueue == weightedpriority) {
+	      pqueue = std::unique_ptr
+		<WeightedPriorityQueue< pair<PGRef, PGQueueable>, entity_inst_t>>(
+		  new WeightedPriorityQueue< pair<PGRef, PGQueueable>, entity_inst_t>(
+		    max_tok_per_prio, min_cost));
+	    } else if (opqueue == prioritized) {
+	      pqueue = std::unique_ptr
+		<PrioritizedQueue< pair<PGRef, PGQueueable>, entity_inst_t>>(
+		  new PrioritizedQueue< pair<PGRef, PGQueueable>, entity_inst_t>(
+		    max_tok_per_prio, min_cost));
+	    }
+	  }
     };
     
     vector<ShardData*> shard_list;
@@ -1642,7 +1665,7 @@ private:
 	ShardData* one_shard = new ShardData(
 	  lock_name, order_lock,
 	  osd->cct->_conf->osd_op_pq_max_tokens_per_priority, 
-	  osd->cct->_conf->osd_op_pq_min_cost, osd->cct);
+	  osd->cct->_conf->osd_op_pq_min_cost, osd->cct, osd->op_queue);
 	shard_list.push_back(one_shard);
       }
     }
@@ -1676,7 +1699,7 @@ private:
 	assert (NULL != sdata);
 	sdata->sdata_op_ordering_lock.Lock();
 	f->open_object_section(lock_name);
-	sdata->pqueue.dump(f);
+	sdata->pqueue->dump(f);
 	f->close_section();
 	sdata->sdata_op_ordering_lock.Unlock();
       }
@@ -1684,7 +1707,7 @@ private:
 
     struct Pred {
       PG *pg;
-      Pred(PG *pg) : pg(pg) {}
+      explicit Pred(PG *pg) : pg(pg) {}
       bool operator()(const pair<PGRef, PGQueueable> &op) {
 	return op.first == pg;
       }
@@ -1697,7 +1720,7 @@ private:
       sdata = shard_list[shard_index];
       assert(sdata != NULL);
       sdata->sdata_op_ordering_lock.Lock();
-      sdata->pqueue.remove_by_filter(Pred(pg));
+      sdata->pqueue->remove_by_filter(Pred(pg), 0);
       sdata->pg_for_processing.erase(pg);
       sdata->sdata_op_ordering_lock.Unlock();
     }
@@ -1711,7 +1734,7 @@ private:
       assert(dequeued);
       list<pair<PGRef, PGQueueable> > _dequeued;
       sdata->sdata_op_ordering_lock.Lock();
-      sdata->pqueue.remove_by_filter(Pred(pg), &_dequeued);
+      sdata->pqueue->remove_by_filter(Pred(pg), &_dequeued);
       for (list<pair<PGRef, PGQueueable> >::iterator i = _dequeued.begin();
 	   i != _dequeued.end(); ++i) {
 	boost::optional<OpRequestRef> mop = i->second.maybe_get_op();
@@ -1738,7 +1761,7 @@ private:
       ShardData* sdata = shard_list[shard_index];
       assert(NULL != sdata);
       Mutex::Locker l(sdata->sdata_op_ordering_lock);
-      return sdata->pqueue.empty();
+      return sdata->pqueue->empty();
     }
   } op_shardedwq;
 
@@ -1780,7 +1803,7 @@ private:
     void _dequeue(list<PG*> *out);
     void _process(
       const list<PG *> &pgs,
-      ThreadPool::TPHandle &handle) {
+      ThreadPool::TPHandle &handle) override {
       osd->process_peering_events(pgs, handle);
       for (list<PG *>::const_iterator i = pgs.begin();
 	   i != pgs.end();
@@ -1788,7 +1811,6 @@ private:
 	(*i)->put("PeeringWQ");
       }
     }
-    using ThreadPool::BatchWorkQueue<PG>::_process;
     void _process_finish(const list<PG *> &pgs) {
       for (list<PG*>::const_iterator i = pgs.begin();
 	   i != pgs.end();
@@ -2132,7 +2154,7 @@ protected:
       osd->command_queue.pop_front();
       return c;
     }
-    void _process(Command *c) {
+    void _process(Command *c, ThreadPool::TPHandle &) override {
       osd->osd_lock.Lock();
       if (osd->is_stopping()) {
 	osd->osd_lock.Unlock();
@@ -2143,9 +2165,6 @@ protected:
       osd->osd_lock.Unlock();
       delete c;
     }
-    void _process(Command *c, ThreadPool::TPHandle &tp) {
-      _process(c);
-    }
     void _clear() {
       while (!osd->command_queue.empty()) {
 	Command *c = osd->command_queue.front();
@@ -2197,11 +2216,10 @@ protected:
 	osd->recovery_queue.push_front(&pg->recovery_item);
       }
     }
-    void _process(PG *pg, ThreadPool::TPHandle &handle) {
+    void _process(PG *pg, ThreadPool::TPHandle &handle) override {
       osd->do_recovery(pg, handle);
       pg->put("RecoveryWQ");
     }
-    using ThreadPool::WorkQueue<PG>::_process;
     void _clear() {
       while (!osd->recovery_queue.empty()) {
 	PG *pg = osd->recovery_queue.front();
@@ -2256,8 +2274,8 @@ protected:
       remove_queue.pop_front();
       return item;
     }
-    using ThreadPool::WorkQueueVal<pair<PGRef, DeletingStateRef> >::_process;
-    void _process(pair<PGRef, DeletingStateRef>, ThreadPool::TPHandle &);
+    void _process(pair<PGRef, DeletingStateRef>,
+		  ThreadPool::TPHandle &) override;
     void _clear() {
       remove_queue.clear();
     }
@@ -2300,6 +2318,28 @@ protected:
   bool ms_handle_reset(Connection *con);
   void ms_handle_remote_reset(Connection *con) {}
 
+  io_queue get_io_queue() const {
+    if (cct->_conf->osd_op_queue == "debug_random") {
+      srand(time(NULL));
+      return (rand() % 2 < 1) ? prioritized : weightedpriority;
+    } else if (cct->_conf->osd_op_queue == "wpq") {
+      return weightedpriority;
+    } else {
+      return prioritized;
+    }
+  }
+
+  unsigned int get_io_prio_cut() const {
+    if (cct->_conf->osd_op_queue_cut_off == "debug_random") {
+      srand(time(NULL));
+      return (rand() % 2 < 1) ? CEPH_MSG_PRIO_HIGH : CEPH_MSG_PRIO_LOW;
+    } else if (cct->_conf->osd_op_queue_cut_off == "low") {
+      return CEPH_MSG_PRIO_LOW;
+    } else {
+      return CEPH_MSG_PRIO_HIGH;
+    }
+  }
+
  public:
   /* internal and external can point to the same messenger, they will still
    * be cleaned up properly*/
@@ -2354,6 +2394,8 @@ public:
   int init();
   void final_init();
 
+  int enable_disable_fuse(bool stop);
+
   void suicide(int exitcode);
   int shutdown();
 
diff --git a/src/osd/OSDCap.h b/src/osd/OSDCap.h
index 905fa55..55d7e9c 100644
--- a/src/osd/OSDCap.h
+++ b/src/osd/OSDCap.h
@@ -42,6 +42,7 @@ static const __u8 OSD_CAP_ANY   = 0xff;          // *
 struct osd_rwxa_t {
   __u8 val;
 
+  // cppcheck-suppress noExplicitConstructor
   osd_rwxa_t(__u8 v = 0) : val(v) {}
   osd_rwxa_t& operator=(__u8 v) {
     val = v;
@@ -60,8 +61,8 @@ struct OSDCapSpec {
   std::string class_allow;
 
   OSDCapSpec() : allow(0) {}
-  OSDCapSpec(osd_rwxa_t v) : allow(v) {}
-  OSDCapSpec(std::string n) : allow(0), class_name(n) {}
+  explicit OSDCapSpec(osd_rwxa_t v) : allow(v) {}
+  explicit OSDCapSpec(std::string n) : allow(0), class_name(n) {}
   OSDCapSpec(std::string n, std::string a) : allow(0), class_name(n), class_allow(a) {}
 
   bool allow_all() const {
@@ -119,7 +120,7 @@ struct OSDCap {
   std::vector<OSDCapGrant> grants;
 
   OSDCap() {}
-  OSDCap(std::vector<OSDCapGrant> g) : grants(g) {}
+  explicit OSDCap(std::vector<OSDCapGrant> g) : grants(g) {}
 
   bool allow_all() const;
   void set_allow_all();
diff --git a/src/osd/OSDMap.h b/src/osd/OSDMap.h
index 24c287f..cb8bbf8 100644
--- a/src/osd/OSDMap.h
+++ b/src/osd/OSDMap.h
@@ -171,17 +171,17 @@ public:
     void dump(Formatter *f) const;
     static void generate_test_instances(list<Incremental*>& o);
 
-    Incremental(epoch_t e=0) :
+    explicit Incremental(epoch_t e=0) :
       encode_features(0),
       epoch(e), new_pool_max(-1), new_flags(-1), new_max_osd(-1),
       have_crc(false), full_crc(0), inc_crc(0) {
       memset(&fsid, 0, sizeof(fsid));
     }
-    Incremental(bufferlist &bl) {
+    explicit Incremental(bufferlist &bl) {
       bufferlist::iterator p = bl.begin();
       decode(p);
     }
-    Incremental(bufferlist::iterator &p) {
+    explicit Incremental(bufferlist::iterator &p) {
       decode(p);
     }
 
@@ -775,6 +775,15 @@ public:
       return group[group.size()-1];
     return -1;  // we fail!
   }
+  bool is_acting_osd_shard(pg_t pg, int osd, shard_id_t shard) const {
+    vector<int> acting;
+    int nrep = pg_to_acting_osds(pg, acting);
+    if (shard == shard_id_t::NO_SHARD)
+      return calc_pg_role(osd, acting, nrep) >= 0;
+    if (shard >= (int)acting.size())
+      return false;
+    return acting[shard] == osd;
+  }
 
 
   /* what replica # is a given osd? 0 primary, -1 for none. */
diff --git a/src/osd/PG.cc b/src/osd/PG.cc
index 2fd8dd2..c2d5b0b 100644
--- a/src/osd/PG.cc
+++ b/src/osd/PG.cc
@@ -156,8 +156,18 @@ void PGPool::update(OSDMapRef map)
   name = map->get_pool_name(id);
   if (pi->get_snap_epoch() == map->get_epoch()) {
     pi->build_removed_snaps(newly_removed_snaps);
-    newly_removed_snaps.subtract(cached_removed_snaps);
-    cached_removed_snaps.union_of(newly_removed_snaps);
+    interval_set<snapid_t> intersection;
+    intersection.intersection_of(newly_removed_snaps, cached_removed_snaps);
+    if (intersection == cached_removed_snaps) {
+        newly_removed_snaps.subtract(cached_removed_snaps);
+        cached_removed_snaps.union_of(newly_removed_snaps);
+    } else {
+        lgeneric_subdout(g_ceph_context, osd, 0) << __func__
+          << " cached_removed_snaps shrank from " << cached_removed_snaps
+          << " to " << newly_removed_snaps << dendl;
+        cached_removed_snaps = newly_removed_snaps;
+        newly_removed_snaps.clear();
+    }
     snapc = pi->get_snap_context();
   } else {
     newly_removed_snaps.clear();
@@ -185,7 +195,7 @@ PG::PG(OSDService *o, OSDMapRef curmap,
     _pool.id,
     p.shard),
   map_lock("PG::map_lock"),
-  osdmap_ref(curmap), pool(_pool),
+  osdmap_ref(curmap), last_persisted_osdmap_ref(curmap), pool(_pool),
   _lock("PG::_lock"),
   ref(0),
   #ifdef PG_DEBUG_REFS
@@ -223,8 +233,7 @@ PG::PG(OSDService *o, OSDMapRef curmap,
   acting_features(CEPH_FEATURES_SUPPORTED_DEFAULT),
   upacting_features(CEPH_FEATURES_SUPPORTED_DEFAULT),
   do_sort_bitwise(false),
-  last_epoch(0),
-  last_persisted_epoch(curmap->get_epoch())
+  last_epoch(0)
 {
 #ifdef PG_DEBUG_REFS
   osd->add_pgid(p, this);
@@ -1586,7 +1595,16 @@ void PG::activate(ObjectStore::Transaction& t,
     dout(20) << "activate - purged_snaps " << info.purged_snaps
 	     << " cached_removed_snaps " << pool.cached_removed_snaps << dendl;
     snap_trimq = pool.cached_removed_snaps;
-    snap_trimq.subtract(info.purged_snaps);
+    interval_set<snapid_t> intersection;
+    intersection.intersection_of(snap_trimq, info.purged_snaps);
+    if (intersection == info.purged_snaps) {
+      snap_trimq.subtract(info.purged_snaps);
+    } else {
+        dout(0) << "warning: info.purged_snaps (" << info.purged_snaps
+                << ") is not a subset of pool.cached_removed_snaps ("
+                << pool.cached_removed_snaps << ")" << dendl;
+        snap_trimq.subtract(intersection);
+    }
     dout(10) << "activate - snap_trimq " << snap_trimq << dendl;
     if (!snap_trimq.empty() && is_clean())
       queue_snap_trim();
@@ -1973,9 +1991,9 @@ void PG::_activate_committed(epoch_t epoch, epoch_t activation_epoch)
   }
 
   if (dirty_info) {
-    ObjectStore::Transaction *t = new ObjectStore::Transaction;
-    write_if_dirty(*t);
-    int tr = osd->store->queue_transaction_and_cleanup(osr.get(), t);
+    ObjectStore::Transaction t;
+    write_if_dirty(t);
+    int tr = osd->store->queue_transaction(osr.get(), std::move(t), NULL);
     assert(tr == 0);
   }
 
@@ -2050,7 +2068,7 @@ bool PG::queue_scrub()
 
 struct C_PG_FinishRecovery : public Context {
   PGRef pg;
-  C_PG_FinishRecovery(PG *p) : pg(p) {}
+  explicit C_PG_FinishRecovery(PG *p) : pg(p) {}
   void finish(int r) {
     pg->_finish_recovery(this);
   }
@@ -2702,7 +2720,7 @@ void PG::upgrade(ObjectStore *store)
 
   ceph::shared_ptr<ObjectStore::Sequencer> osr(
     new ObjectStore::Sequencer("upgrade"));
-  int r = store->apply_transaction(osr.get(), t);
+  int r = store->apply_transaction(osr.get(), std::move(t));
   if (r != 0) {
     derr << __func__ << ": apply_transaction returned "
 	 << cpp_strerror(r) << dendl;
@@ -2787,7 +2805,7 @@ void PG::prepare_write_info(map<string,bufferlist> *km)
   assert(ret == 0);
   if (need_update_epoch)
     last_epoch = get_osdmap()->get_epoch();
-  last_persisted_epoch = last_epoch;
+  last_persisted_osdmap_ref = osdmap_ref;
 
   dirty_info = false;
   dirty_big_info = false;
@@ -3594,7 +3612,7 @@ void PG::_scan_rollback_obs(
   const vector<ghobject_t> &rollback_obs,
   ThreadPool::TPHandle &handle)
 {
-  ObjectStore::Transaction *t = NULL;
+  ObjectStore::Transaction t;
   eversion_t trimmed_to = last_rollback_info_trimmed_to_applied;
   for (vector<ghobject_t>::const_iterator i = rollback_obs.begin();
        i != rollback_obs.end();
@@ -3606,15 +3624,13 @@ void PG::_scan_rollback_obs(
 			<< *i << " generation < trimmed_to "
 			<< trimmed_to
 			<< "...repaired";
-      if (!t)
-	t = new ObjectStore::Transaction;
-      t->remove(coll, *i);
+      t.remove(coll, *i);
     }
   }
-  if (t) {
+  if (!t.empty()) {
     derr << __func__ << ": queueing trans to clean up obsolete rollback objs"
 	 << dendl;
-    osd->store->queue_transaction_and_cleanup(osr.get(), t);
+    osd->store->queue_transaction(osr.get(), std::move(t), NULL);
   }
 }
 
@@ -3687,7 +3703,7 @@ void PG::_scan_snaps(ScrubMap &smap)
 			    << "...repaired";
 	}
 	snap_mapper.add_oid(hoid, oi_snaps, &_t);
-	r = osd->store->apply_transaction(osr.get(), t);
+	r = osd->store->apply_transaction(osr.get(), std::move(t));
 	if (r != 0) {
 	  derr << __func__ << ": apply_transaction got " << cpp_strerror(r)
 	       << dendl;
@@ -4433,10 +4449,10 @@ void PG::scrub_finish()
   reg_next_scrub();
 
   {
-    ObjectStore::Transaction *t = new ObjectStore::Transaction;
+    ObjectStore::Transaction t;
     dirty_info = true;
-    write_if_dirty(*t);
-    int tr = osd->store->queue_transaction_and_cleanup(osr.get(), t);
+    write_if_dirty(t);
+    int tr = osd->store->queue_transaction(osr.get(), std::move(t), NULL);
     assert(tr == 0);
   }
 
@@ -5423,15 +5439,15 @@ void PG::handle_activate_map(RecoveryCtx *rctx)
   dout(10) << "handle_activate_map " << dendl;
   ActMap evt;
   recovery_state.handle_event(evt, rctx);
-  if (osdmap_ref->get_epoch() - last_persisted_epoch >
+  if (osdmap_ref->get_epoch() - last_persisted_osdmap_ref->get_epoch() >
     cct->_conf->osd_pg_epoch_persisted_max_stale) {
     dout(20) << __func__ << ": Dirtying info: last_persisted is "
-	     << last_persisted_epoch
+	     << last_persisted_osdmap_ref->get_epoch()
 	     << " while current is " << osdmap_ref->get_epoch() << dendl;
     dirty_info = true;
   } else {
     dout(20) << __func__ << ": Not dirtying info: last_persisted is "
-	     << last_persisted_epoch
+	     << last_persisted_osdmap_ref->get_epoch()
 	     << " while current is " << osdmap_ref->get_epoch() << dendl;
   }
   if (osdmap_ref->check_new_blacklist_entries()) check_blacklisted_watchers();
@@ -5447,6 +5463,7 @@ void PG::handle_loaded(RecoveryCtx *rctx)
 void PG::handle_create(RecoveryCtx *rctx)
 {
   dout(10) << "handle_create" << dendl;
+  rctx->created_pgs.insert(this);
   Initialize evt;
   recovery_state.handle_event(evt, rctx);
   ActMap evt2;
diff --git a/src/osd/PG.h b/src/osd/PG.h
index c5f1c12..ff306cc 100644
--- a/src/osd/PG.h
+++ b/src/osd/PG.h
@@ -206,6 +206,7 @@ protected:
   Mutex map_lock;
   list<OpRequestRef> waiting_for_map;
   OSDMapRef osdmap_ref;
+  OSDMapRef last_persisted_osdmap_ref;
   PGPool pool;
 
   void queue_op(OpRequestRef& op);
@@ -298,6 +299,7 @@ public:
   void upgrade(ObjectStore *store);
 
   const coll_t coll;
+  ObjectStore::CollectionHandle ch;
   PGLog  pg_log;
   static string get_info_key(spg_t pgid) {
     return stringify(pgid) + "_info";
@@ -319,7 +321,7 @@ public:
   public:
     boost::scoped_ptr<IsPGReadablePredicate> is_readable;
     boost::scoped_ptr<IsPGRecoverablePredicate> is_recoverable;
-    MissingLoc(PG *pg)
+    explicit MissingLoc(PG *pg)
       : pg(pg) {}
     void set_backend_predicates(
       IsPGReadablePredicate *_is_readable,
@@ -530,6 +532,7 @@ public:
     map<int, map<spg_t, pg_query_t> > *query_map;
     map<int, vector<pair<pg_notify_t, pg_interval_map_t> > > *info_map;
     map<int, vector<pair<pg_notify_t, pg_interval_map_t> > > *notify_list;
+    set<PGRef> created_pgs;
     C_Contexts *on_applied;
     C_Contexts *on_safe;
     ObjectStore::Transaction *transaction;
@@ -656,7 +659,7 @@ public:
     hobject_t begin;
     hobject_t end;
 
-    BackfillInterval(bool bitwise=true)
+    explicit BackfillInterval(bool bitwise=true)
       : objects(hobject_t::Comparator(bitwise)),
 	sort_bitwise(bitwise)
     {}
@@ -1331,7 +1334,7 @@ public:
 
   struct QueryState : boost::statechart::event< QueryState > {
     Formatter *f;
-    QueryState(Formatter *f) : f(f) {}
+    explicit QueryState(Formatter *f) : f(f) {}
     void print(std::ostream *out) const {
       *out << "Query";
     }
@@ -1410,7 +1413,7 @@ public:
   };
   struct Activate : boost::statechart::event< Activate > {
     epoch_t activation_epoch;
-    Activate(epoch_t q) : boost::statechart::event< Activate >(),
+    explicit Activate(epoch_t q) : boost::statechart::event< Activate >(),
 			  activation_epoch(q) {}
     void print(std::ostream *out) const {
       *out << "Activate from " << activation_epoch;
@@ -1418,7 +1421,7 @@ public:
   };
   struct RequestBackfillPrio : boost::statechart::event< RequestBackfillPrio > {
     unsigned priority;
-    RequestBackfillPrio(unsigned prio) :
+    explicit RequestBackfillPrio(unsigned prio) :
               boost::statechart::event< RequestBackfillPrio >(),
 			  priority(prio) {}
     void print(std::ostream *out) const {
@@ -1542,14 +1545,14 @@ public:
     /* States */
 
     struct Crashed : boost::statechart::state< Crashed, RecoveryMachine >, NamedState {
-      Crashed(my_context ctx);
+      explicit Crashed(my_context ctx);
     };
 
     struct Started;
     struct Reset;
 
     struct Initial : boost::statechart::state< Initial, RecoveryMachine >, NamedState {
-      Initial(my_context ctx);
+      explicit Initial(my_context ctx);
       void exit();
 
       typedef boost::mpl::list <
@@ -1569,7 +1572,7 @@ public:
     };
 
     struct Reset : boost::statechart::state< Reset, RecoveryMachine >, NamedState {
-      Reset(my_context ctx);
+      explicit Reset(my_context ctx);
       void exit();
 
       typedef boost::mpl::list <
@@ -1594,7 +1597,7 @@ public:
     struct Start;
 
     struct Started : boost::statechart::state< Started, RecoveryMachine, Start >, NamedState {
-      Started(my_context ctx);
+      explicit Started(my_context ctx);
       void exit();
 
       typedef boost::mpl::list <
@@ -1624,7 +1627,7 @@ public:
     struct Stray;
 
     struct Start : boost::statechart::state< Start, Started >, NamedState {
-      Start(my_context ctx);
+      explicit Start(my_context ctx);
       void exit();
 
       typedef boost::mpl::list <
@@ -1644,7 +1647,7 @@ public:
     };
 
     struct Primary : boost::statechart::state< Primary, Started, Peering >, NamedState {
-      Primary(my_context ctx);
+      explicit Primary(my_context ctx);
       void exit();
 
       typedef boost::mpl::list <
@@ -1665,7 +1668,7 @@ public:
 	boost::statechart::custom_reaction< MInfoRec >,
 	boost::statechart::custom_reaction< MNotifyRec >
 	> reactions;
-      WaitActingChange(my_context ctx);
+      explicit WaitActingChange(my_context ctx);
       boost::statechart::result react(const QueryState& q);
       boost::statechart::result react(const AdvMap&);
       boost::statechart::result react(const MLogRec&);
@@ -1680,7 +1683,7 @@ public:
     struct Peering : boost::statechart::state< Peering, Primary, GetInfo >, NamedState {
       std::unique_ptr< PriorSet > prior_set;
 
-      Peering(my_context ctx);
+      explicit Peering(my_context ctx);
       void exit();
 
       typedef boost::mpl::list <
@@ -1695,7 +1698,7 @@ public:
     struct WaitLocalRecoveryReserved;
     struct Activating;
     struct Active : boost::statechart::state< Active, Primary, Activating >, NamedState {
-      Active(my_context ctx);
+      explicit Active(my_context ctx);
       void exit();
 
       const set<pg_shard_t> remote_shards_to_reserve_recovery;
@@ -1728,7 +1731,7 @@ public:
       typedef boost::mpl::list<
 	boost::statechart::transition< DoRecovery, WaitLocalRecoveryReserved >
       > reactions;
-      Clean(my_context ctx);
+      explicit Clean(my_context ctx);
       void exit();
     };
 
@@ -1737,7 +1740,7 @@ public:
 	boost::statechart::transition< GoClean, Clean >,
 	boost::statechart::custom_reaction< AllReplicasActivated >
       > reactions;
-      Recovered(my_context ctx);
+      explicit Recovered(my_context ctx);
       void exit();
       boost::statechart::result react(const AllReplicasActivated&) {
 	post_event(GoClean());
@@ -1750,7 +1753,7 @@ public:
 	boost::statechart::transition< Backfilled, Recovered >,
 	boost::statechart::custom_reaction< RemoteReservationRejected >
 	> reactions;
-      Backfilling(my_context ctx);
+      explicit Backfilling(my_context ctx);
       boost::statechart::result react(const RemoteReservationRejected& evt);
       void exit();
     };
@@ -1762,7 +1765,7 @@ public:
 	boost::statechart::transition< AllBackfillsReserved, Backfilling >
 	> reactions;
       set<pg_shard_t>::const_iterator backfill_osd_it;
-      WaitRemoteBackfillReserved(my_context ctx);
+      explicit WaitRemoteBackfillReserved(my_context ctx);
       void exit();
       boost::statechart::result react(const RemoteBackfillReserved& evt);
       boost::statechart::result react(const RemoteReservationRejected& evt);
@@ -1772,7 +1775,7 @@ public:
       typedef boost::mpl::list<
 	boost::statechart::transition< LocalBackfillReserved, WaitRemoteBackfillReserved >
 	> reactions;
-      WaitLocalBackfillReserved(my_context ctx);
+      explicit WaitLocalBackfillReserved(my_context ctx);
       void exit();
     };
 
@@ -1782,7 +1785,7 @@ public:
 	boost::statechart::custom_reaction< RemoteBackfillReserved >,
 	boost::statechart::custom_reaction< RemoteReservationRejected >
 	> reactions;
-      NotBackfilling(my_context ctx);
+      explicit NotBackfilling(my_context ctx);
       void exit();
       boost::statechart::result react(const RemoteBackfillReserved& evt);
       boost::statechart::result react(const RemoteReservationRejected& evt);
@@ -1790,7 +1793,7 @@ public:
 
     struct RepNotRecovering;
     struct ReplicaActive : boost::statechart::state< ReplicaActive, Started, RepNotRecovering >, NamedState {
-      ReplicaActive(my_context ctx);
+      explicit ReplicaActive(my_context ctx);
       void exit();
 
       typedef boost::mpl::list <
@@ -1815,7 +1818,7 @@ public:
 	boost::statechart::transition< RemoteReservationRejected, RepNotRecovering >,
 	boost::statechart::custom_reaction< BackfillTooFull >
 	> reactions;
-      RepRecovering(my_context ctx);
+      explicit RepRecovering(my_context ctx);
       boost::statechart::result react(const BackfillTooFull &evt);
       void exit();
     };
@@ -1825,7 +1828,7 @@ public:
 	boost::statechart::custom_reaction< RemoteBackfillReserved >,
 	boost::statechart::custom_reaction< RemoteReservationRejected >
 	> reactions;
-      RepWaitBackfillReserved(my_context ctx);
+      explicit RepWaitBackfillReserved(my_context ctx);
       void exit();
       boost::statechart::result react(const RemoteBackfillReserved &evt);
       boost::statechart::result react(const RemoteReservationRejected &evt);
@@ -1835,7 +1838,7 @@ public:
       typedef boost::mpl::list<
 	boost::statechart::custom_reaction< RemoteRecoveryReserved >
 	> reactions;
-      RepWaitRecoveryReserved(my_context ctx);
+      explicit RepWaitRecoveryReserved(my_context ctx);
       void exit();
       boost::statechart::result react(const RemoteRecoveryReserved &evt);
     };
@@ -1846,7 +1849,7 @@ public:
         boost::statechart::transition< RequestRecovery, RepWaitRecoveryReserved >,
 	boost::statechart::transition< RecoveryDone, RepNotRecovering >  // for compat with pre-reservation peers
 	> reactions;
-      RepNotRecovering(my_context ctx);
+      explicit RepNotRecovering(my_context ctx);
       boost::statechart::result react(const RequestBackfillPrio &evt);
       void exit();
     };
@@ -1856,7 +1859,7 @@ public:
 	boost::statechart::custom_reaction< AllReplicasRecovered >,
 	boost::statechart::custom_reaction< RequestBackfill >
 	> reactions;
-      Recovering(my_context ctx);
+      explicit Recovering(my_context ctx);
       void exit();
       void release_reservations();
       boost::statechart::result react(const AllReplicasRecovered &evt);
@@ -1869,7 +1872,7 @@ public:
 	boost::statechart::transition< AllRemotesReserved, Recovering >
 	> reactions;
       set<pg_shard_t>::const_iterator remote_recovery_reservation_it;
-      WaitRemoteRecoveryReserved(my_context ctx);
+      explicit WaitRemoteRecoveryReserved(my_context ctx);
       boost::statechart::result react(const RemoteRecoveryReserved &evt);
       void exit();
     };
@@ -1878,7 +1881,7 @@ public:
       typedef boost::mpl::list <
 	boost::statechart::transition< LocalRecoveryReserved, WaitRemoteRecoveryReserved >
 	> reactions;
-      WaitLocalRecoveryReserved(my_context ctx);
+      explicit WaitLocalRecoveryReserved(my_context ctx);
       void exit();
     };
 
@@ -1888,14 +1891,14 @@ public:
 	boost::statechart::transition< DoRecovery, WaitLocalRecoveryReserved >,
 	boost::statechart::transition< RequestBackfill, WaitLocalBackfillReserved >
 	> reactions;
-      Activating(my_context ctx);
+      explicit Activating(my_context ctx);
       void exit();
     };
 
     struct Stray : boost::statechart::state< Stray, Started >, NamedState {
       map<int, pair<pg_query_t, epoch_t> > pending_queries;
 
-      Stray(my_context ctx);
+      explicit Stray(my_context ctx);
       void exit();
 
       typedef boost::mpl::list <
@@ -1919,7 +1922,7 @@ public:
     struct GetInfo : boost::statechart::state< GetInfo, Peering >, NamedState {
       set<pg_shard_t> peer_info_requested;
 
-      GetInfo(my_context ctx);
+      explicit GetInfo(my_context ctx);
       void exit();
       void get_infos();
 
@@ -1941,7 +1944,7 @@ public:
       pg_shard_t auth_log_shard;
       boost::intrusive_ptr<MOSDPGLog> msg;
 
-      GetLog(my_context ctx);
+      explicit GetLog(my_context ctx);
       void exit();
 
       typedef boost::mpl::list <
@@ -1962,7 +1965,7 @@ public:
     struct GetMissing : boost::statechart::state< GetMissing, Peering >, NamedState {
       set<pg_shard_t> peer_missing_requested;
 
-      GetMissing(my_context ctx);
+      explicit GetMissing(my_context ctx);
       void exit();
 
       typedef boost::mpl::list <
@@ -1975,7 +1978,7 @@ public:
     };
 
     struct WaitUpThru : boost::statechart::state< WaitUpThru, Peering >, NamedState {
-      WaitUpThru(my_context ctx);
+      explicit WaitUpThru(my_context ctx);
       void exit();
 
       typedef boost::mpl::list <
@@ -1993,7 +1996,7 @@ public:
 	boost::statechart::custom_reaction< AdvMap >,
 	boost::statechart::custom_reaction< MNotifyRec >
 	> reactions;
-      Incomplete(my_context ctx);
+      explicit Incomplete(my_context ctx);
       boost::statechart::result react(const AdvMap &advmap);
       boost::statechart::result react(const MNotifyRec& infoevt);
       void exit();
@@ -2017,7 +2020,7 @@ public:
     boost::optional<RecoveryCtx> rctx;
 
   public:
-    RecoveryState(PG *pg)
+    explicit RecoveryState(PG *pg)
       : machine(this, pg), pg(pg), orig_ctx(0) {
       machine.initiate();
     }
@@ -2055,7 +2058,6 @@ public:
 
   bool do_sort_bitwise;
   epoch_t last_epoch;
-  epoch_t last_persisted_epoch;
 
  public:
   const spg_t&      get_pgid() const { return pg_id; }
diff --git a/src/osd/PGBackend.cc b/src/osd/PGBackend.cc
index 7ef6c47..85bd78b 100644
--- a/src/osd/PGBackend.cc
+++ b/src/osd/PGBackend.cc
@@ -115,7 +115,7 @@ int PGBackend::objects_list_partial(
   while (!_next.is_max() && ls->size() < (unsigned)min) {
     vector<ghobject_t> objects;
     int r = store->collection_list(
-      coll,
+      ch,
       _next,
       ghobject_t::get_max(),
       parent->sort_bitwise(),
@@ -150,7 +150,7 @@ int PGBackend::objects_list_range(
   assert(ls);
   vector<ghobject_t> objects;
   int r = store->collection_list(
-    coll,
+    ch,
     ghobject_t(start, ghobject_t::NO_GEN, get_parent()->whoami_shard().shard),
     ghobject_t(end, ghobject_t::NO_GEN, get_parent()->whoami_shard().shard),
     parent->sort_bitwise(),
@@ -180,13 +180,13 @@ int PGBackend::objects_get_attr(
 {
   bufferptr bp;
   int r = store->getattr(
-    coll,
+    ch,
     ghobject_t(hoid, ghobject_t::NO_GEN, get_parent()->whoami_shard().shard),
     attr.c_str(),
     bp);
   if (r >= 0 && out) {
     out->clear();
-    out->push_back(bp);
+    out->push_back(std::move(bp));
   }
   return r;
 }
@@ -196,7 +196,7 @@ int PGBackend::objects_get_attrs(
   map<string, bufferlist> *out)
 {
   return store->getattrs(
-    coll,
+    ch,
     ghobject_t(hoid, ghobject_t::NO_GEN, get_parent()->whoami_shard().shard),
     *out);
 }
@@ -274,12 +274,13 @@ PGBackend *PGBackend::build_pg_backend(
   const OSDMapRef curmap,
   Listener *l,
   coll_t coll,
+  ObjectStore::CollectionHandle &ch,
   ObjectStore *store,
   CephContext *cct)
 {
   switch (pool.type) {
   case pg_pool_t::TYPE_REPLICATED: {
-    return new ReplicatedBackend(l, coll, store, cct);
+    return new ReplicatedBackend(l, coll, ch, store, cct);
   }
   case pg_pool_t::TYPE_ERASURE: {
     ErasureCodeInterfaceRef ec_impl;
@@ -296,6 +297,7 @@ PGBackend *PGBackend::build_pg_backend(
     return new ECBackend(
       l,
       coll,
+      ch,
       store,
       cct,
       ec_impl,
@@ -325,7 +327,7 @@ void PGBackend::be_scan_list(
 
     struct stat st;
     int r = store->stat(
-      coll,
+      ch,
       ghobject_t(
 	poid, ghobject_t::NO_GEN, get_parent()->whoami_shard().shard),
       &st,
@@ -335,7 +337,7 @@ void PGBackend::be_scan_list(
       o.size = st.st_size;
       assert(!o.negative);
       store->getattrs(
-	coll,
+	ch,
 	ghobject_t(
 	  poid, ghobject_t::NO_GEN, get_parent()->whoami_shard().shard),
 	o.attrs);
diff --git a/src/osd/PGBackend.h b/src/osd/PGBackend.h
index 1e410c7..7706797 100644
--- a/src/osd/PGBackend.h
+++ b/src/osd/PGBackend.h
@@ -43,6 +43,7 @@
  protected:
    ObjectStore *store;
    const coll_t coll;
+   ObjectStore::CollectionHandle &ch;
  public:	
    /**
     * Provides interfaces for PGBackend callbacks
@@ -60,7 +61,6 @@
       */
      virtual void on_local_recover(
        const hobject_t &oid,
-       const object_stat_sum_t &stat_diff,
        const ObjectRecoveryInfo &recovery_info,
        ObjectContextRef obc,
        ObjectStore::Transaction *t
@@ -70,7 +70,10 @@
       * Called when transaction recovering oid is durable and
       * applied on all replicas
       */
-     virtual void on_global_recover(const hobject_t &oid) = 0;
+     virtual void on_global_recover(
+       const hobject_t &oid,
+       const object_stat_sum_t &stat_diff
+       ) = 0;
 
      /**
       * Called when peer is recovered
@@ -102,11 +105,11 @@
 
      virtual void send_message(int to_osd, Message *m) = 0;
      virtual void queue_transaction(
-       ObjectStore::Transaction *t,
+       ObjectStore::Transaction&& t,
        OpRequestRef op = OpRequestRef()
        ) = 0;
      virtual void queue_transactions(
-       list<ObjectStore::Transaction*>& tls,
+       vector<ObjectStore::Transaction>& tls,
        OpRequestRef op = OpRequestRef()
        ) = 0;
      virtual epoch_t get_epoch() const = 0;
@@ -230,9 +233,11 @@
    };
    Listener *parent;
    Listener *get_parent() const { return parent; }
-   PGBackend(Listener *l, ObjectStore *store, coll_t coll) :
+   PGBackend(Listener *l, ObjectStore *store, coll_t coll,
+	     ObjectStore::CollectionHandle &ch) :
      store(store),
      coll(coll),
+     ch(ch),
      parent(l) {}
    bool is_primary() const { return get_parent()->pgb_is_primary(); }
    OSDMapRef get_osdmap() const { return get_parent()->pgb_get_osdmap(); }
@@ -555,8 +560,8 @@
 		pair<bufferlist*, Context*> > > &to_read,
      Context *on_complete, bool fast_read = false) = 0;
 
-   virtual bool scrub_supported() { return false; }
-   virtual bool auto_repair_supported() const { return false; }
+   virtual bool scrub_supported() = 0;
+   virtual bool auto_repair_supported() const = 0;
    void be_scan_list(
      ScrubMap &map, const vector<hobject_t> &ls, bool deep, uint32_t seed,
      ThreadPool::TPHandle &handle);
@@ -585,18 +590,19 @@
      const vector<int> &acting,
      ostream &errorstream);
    virtual uint64_t be_get_ondisk_size(
-     uint64_t logical_size) { assert(0); return 0; }
+     uint64_t logical_size) = 0;
    virtual void be_deep_scrub(
      const hobject_t &poid,
      uint32_t seed,
      ScrubMap::object &o,
-     ThreadPool::TPHandle &handle) { assert(0); }
+     ThreadPool::TPHandle &handle) = 0;
 
    static PGBackend *build_pg_backend(
      const pg_pool_t &pool,
      const OSDMapRef curmap,
      Listener *l,
      coll_t coll,
+     ObjectStore::CollectionHandle &ch,
      ObjectStore *store,
      CephContext *cct);
  };
diff --git a/src/osd/PGLog.h b/src/osd/PGLog.h
index 87e7ab2..3a9b697 100644
--- a/src/osd/PGLog.h
+++ b/src/osd/PGLog.h
@@ -478,6 +478,7 @@ protected:
     check();
   }
 public:
+  // cppcheck-suppress noExplicitConstructor
   PGLog(CephContext *cct = 0) :
     dirty_from(eversion_t::max()),
     writeout_from(eversion_t::max()), 
diff --git a/src/osd/ReplicatedBackend.cc b/src/osd/ReplicatedBackend.cc
index 1ffa50d..bef96d6 100644
--- a/src/osd/ReplicatedBackend.cc
+++ b/src/osd/ReplicatedBackend.cc
@@ -62,9 +62,10 @@ static void log_subop_stats(
 ReplicatedBackend::ReplicatedBackend(
   PGBackend::Listener *pg,
   coll_t coll,
+  ObjectStore::CollectionHandle &c,
   ObjectStore *store,
   CephContext *cct) :
-  PGBackend(pg, store, coll),
+  PGBackend(pg, store, coll, c),
   cct(cct) {}
 
 void ReplicatedBackend::run_recovery_op(
@@ -259,7 +260,7 @@ int ReplicatedBackend::objects_read_sync(
   uint32_t op_flags,
   bufferlist *bl)
 {
-  return store->read(coll, ghobject_t(hoid), off, len, *bl, op_flags);
+  return store->read(ch, ghobject_t(hoid), off, len, *bl, op_flags);
 }
 
 struct AsyncReadCallback : public GenContext<ThreadPool::TPHandle&> {
@@ -290,7 +291,7 @@ void ReplicatedBackend::objects_read_async(
 	   to_read.begin();
        i != to_read.end() && r >= 0;
        ++i) {
-    int _r = store->read(coll, ghobject_t(hoid), i->first.get<0>(),
+    int _r = store->read(ch, ghobject_t(hoid), i->first.get<0>(),
 			 i->first.get<1>(), *(i->second.first),
 			 i->first.get<2>());
     if (i->second.second) {
@@ -311,7 +312,7 @@ class RPGTransaction : public PGBackend::PGTransaction {
   coll_t coll;
   set<hobject_t, hobject_t::BitwiseComparator> temp_added;
   set<hobject_t, hobject_t::BitwiseComparator> temp_cleared;
-  ObjectStore::Transaction *t;
+  mutable ObjectStore::Transaction t;
   uint64_t written;
   const coll_t &get_coll_ct(const hobject_t &hoid) {
     if (hoid.is_temp()) {
@@ -332,15 +333,13 @@ class RPGTransaction : public PGBackend::PGTransaction {
   }
 public:
   RPGTransaction(coll_t coll, bool use_tbl)
-    : coll(coll), t(new ObjectStore::Transaction), written(0) {
-    t->set_use_tbl(use_tbl);
+    : coll(coll), written(0) {
+    t.set_use_tbl(use_tbl);
   }
 
   /// Yields ownership of contained transaction
-  ObjectStore::Transaction *get_transaction() {
-    ObjectStore::Transaction *_t = t;
-    t = 0;
-    return _t;
+  ObjectStore::Transaction&& get_transaction() {
+    return std::move(t);
   }
   const set<hobject_t, hobject_t::BitwiseComparator> &get_temp_added() {
     return temp_added;
@@ -357,17 +356,17 @@ public:
     uint32_t fadvise_flags
     ) {
     written += len;
-    t->write(get_coll_ct(hoid), ghobject_t(hoid), off, len, bl, fadvise_flags);
+    t.write(get_coll_ct(hoid), ghobject_t(hoid), off, len, bl, fadvise_flags);
   }
   void remove(
     const hobject_t &hoid
     ) {
-    t->remove(get_coll_rm(hoid), ghobject_t(hoid));
+    t.remove(get_coll_rm(hoid), ghobject_t(hoid));
   }
   void stash(
     const hobject_t &hoid,
     version_t former_version) {
-    t->collection_move_rename(
+    t.collection_move_rename(
       coll, ghobject_t(hoid), coll,
       ghobject_t(hoid, former_version, shard_id_t::NO_SHARD));
   }
@@ -375,20 +374,20 @@ public:
     const hobject_t &hoid,
     map<string, bufferlist> &attrs
     ) {
-    t->setattrs(get_coll(hoid), ghobject_t(hoid), attrs);
+    t.setattrs(get_coll(hoid), ghobject_t(hoid), attrs);
   }
   void setattr(
     const hobject_t &hoid,
     const string &attrname,
     bufferlist &bl
     ) {
-    t->setattr(get_coll(hoid), ghobject_t(hoid), attrname, bl);
+    t.setattr(get_coll(hoid), ghobject_t(hoid), attrname, bl);
   }
   void rmattr(
     const hobject_t &hoid,
     const string &attrname
     ) {
-    t->rmattr(get_coll(hoid), ghobject_t(hoid), attrname);
+    t.rmattr(get_coll(hoid), ghobject_t(hoid), attrname);
   }
   void omap_setkeys(
     const hobject_t &hoid,
@@ -396,38 +395,38 @@ public:
     ) {
     for (map<string, bufferlist>::iterator p = keys.begin(); p != keys.end(); ++p)
       written += p->first.length() + p->second.length();
-    return t->omap_setkeys(get_coll(hoid), ghobject_t(hoid), keys);
+    return t.omap_setkeys(get_coll(hoid), ghobject_t(hoid), keys);
   }
   void omap_setkeys(
     const hobject_t &hoid,
     bufferlist &keys_bl
     ) {
     written += keys_bl.length();
-    return t->omap_setkeys(get_coll(hoid), ghobject_t(hoid), keys_bl);
+    return t.omap_setkeys(get_coll(hoid), ghobject_t(hoid), keys_bl);
   }
   void omap_rmkeys(
     const hobject_t &hoid,
     set<string> &keys
     ) {
-    t->omap_rmkeys(get_coll(hoid), ghobject_t(hoid), keys);
+    t.omap_rmkeys(get_coll(hoid), ghobject_t(hoid), keys);
   }
   void omap_rmkeys(
     const hobject_t &hoid,
     bufferlist &keys_bl
     ) {
-    t->omap_rmkeys(get_coll(hoid), ghobject_t(hoid), keys_bl);
+    t.omap_rmkeys(get_coll(hoid), ghobject_t(hoid), keys_bl);
   }
   void omap_clear(
     const hobject_t &hoid
     ) {
-    t->omap_clear(get_coll(hoid), ghobject_t(hoid));
+    t.omap_clear(get_coll(hoid), ghobject_t(hoid));
   }
   void omap_setheader(
     const hobject_t &hoid,
     bufferlist &header
     ) {
     written += header.length();
-    t->omap_setheader(get_coll(hoid), ghobject_t(hoid), header);
+    t.omap_setheader(get_coll(hoid), ghobject_t(hoid), header);
   }
   void clone_range(
     const hobject_t &from,
@@ -437,20 +436,20 @@ public:
     uint64_t tooff
     ) {
     assert(get_coll(from) == get_coll_ct(to)  && get_coll(from) == coll);
-    t->clone_range(coll, ghobject_t(from), ghobject_t(to), fromoff, len, tooff);
+    t.clone_range(coll, ghobject_t(from), ghobject_t(to), fromoff, len, tooff);
   }
   void clone(
     const hobject_t &from,
     const hobject_t &to
     ) {
     assert(get_coll(from) == get_coll_ct(to)  && get_coll(from) == coll);
-    t->clone(coll, ghobject_t(from), ghobject_t(to));
+    t.clone(coll, ghobject_t(from), ghobject_t(to));
   }
   void rename(
     const hobject_t &from,
     const hobject_t &to
     ) {
-    t->collection_move_rename(
+    t.collection_move_rename(
       get_coll_rm(from),
       ghobject_t(from),
       get_coll_ct(to),
@@ -460,21 +459,21 @@ public:
   void touch(
     const hobject_t &hoid
     ) {
-    t->touch(get_coll_ct(hoid), ghobject_t(hoid));
+    t.touch(get_coll_ct(hoid), ghobject_t(hoid));
   }
 
   void truncate(
     const hobject_t &hoid,
     uint64_t off
     ) {
-    t->truncate(get_coll(hoid), ghobject_t(hoid), off);
+    t.truncate(get_coll(hoid), ghobject_t(hoid), off);
   }
   void zero(
     const hobject_t &hoid,
     uint64_t off,
     uint64_t len
     ) {
-    t->zero(get_coll(hoid), ghobject_t(hoid), off, len);
+    t.zero(get_coll(hoid), ghobject_t(hoid), off, len);
   }
 
   void set_alloc_hint(
@@ -482,7 +481,7 @@ public:
     uint64_t expected_object_size,
     uint64_t expected_write_size
     ) {
-    t->set_alloc_hint(get_coll(hoid), ghobject_t(hoid), expected_object_size,
+    t.set_alloc_hint(get_coll(hoid), ghobject_t(hoid), expected_object_size,
                       expected_write_size);
   }
 
@@ -494,7 +493,7 @@ public:
     assert(to_append);
     written += to_append->written;
     to_append->written = 0;
-    t->append(*(to_append->t));
+    t.append((to_append->t));
     for (set<hobject_t, hobject_t::BitwiseComparator>::iterator i = to_append->temp_added.begin();
 	 i != to_append->temp_added.end();
 	 ++i) {
@@ -509,15 +508,15 @@ public:
     }
   }
   void nop() {
-    t->nop();
+    t.nop();
   }
   bool empty() const {
-    return t->empty();
+    return t.empty(); 
   }
   uint64_t get_bytes_written() const {
     return written;
   }
-  ~RPGTransaction() { delete t; }
+  ~RPGTransaction() { }
 };
 
 PGBackend::PGTransaction *ReplicatedBackend::get_transaction()
@@ -564,7 +563,7 @@ void ReplicatedBackend::submit_transaction(
 {
   RPGTransaction *t = dynamic_cast<RPGTransaction*>(_t);
   assert(t);
-  ObjectStore::Transaction *op_t = t->get_transaction();
+  ObjectStore::Transaction op_t = t->get_transaction();
 
   assert(t->get_temp_added().size() <= 1);
   assert(t->get_temp_cleared().size() <= 1);
@@ -600,7 +599,7 @@ void ReplicatedBackend::submit_transaction(
     log_entries,
     hset_history,
     &op,
-    op_t);
+    &op_t);
 
   if (!(t->get_temp_added().empty())) {
     add_temp_objs(t->get_temp_added());
@@ -613,20 +612,17 @@ void ReplicatedBackend::submit_transaction(
     trim_to,
     trim_rollback_to,
     true,
-    op_t);
+    &op_t);
   
-  op_t->register_on_applied_sync(on_local_applied_sync);
-  op_t->register_on_applied(
+  op_t.register_on_applied_sync(on_local_applied_sync);
+  op_t.register_on_applied(
     parent->bless_context(
       new C_OSD_OnOpApplied(this, &op)));
-  op_t->register_on_applied(
-    new ObjectStore::C_DeleteTransaction(op_t));
-  op_t->register_on_commit(
+  op_t.register_on_commit(
     parent->bless_context(
       new C_OSD_OnOpCommit(this, &op)));
-
-  list<ObjectStore::Transaction*> tls;
-  tls.push_back(op_t);
+  vector<ObjectStore::Transaction> tls;
+  tls.push_back(std::move(op_t));
   parent->queue_transactions(tls, op.op);
   delete t;
 }
@@ -761,7 +757,7 @@ void ReplicatedBackend::be_deep_scrub(
   while (true) {
     handle.reset_tp_timeout();
     r = store->read(
-	  coll,
+	  ch,
 	  ghobject_t(
 	    poid, ghobject_t::NO_GEN, get_parent()->whoami_shard().shard),
 	  pos,
@@ -851,12 +847,12 @@ void ReplicatedBackend::_do_push(OpRequestRef op)
   pg_shard_t from = m->from;
 
   vector<PushReplyOp> replies;
-  ObjectStore::Transaction *t = new ObjectStore::Transaction;
+  ObjectStore::Transaction t;
   for (vector<PushOp>::iterator i = m->pushes.begin();
        i != m->pushes.end();
        ++i) {
     replies.push_back(PushReplyOp());
-    handle_push(from, *i, &(replies.back()), t);
+    handle_push(from, *i, &(replies.back()), &t);
   }
 
   MOSDPGPushReply *reply = new MOSDPGPushReply;
@@ -867,13 +863,11 @@ void ReplicatedBackend::_do_push(OpRequestRef op)
   reply->replies.swap(replies);
   reply->compute_cost(cct);
 
-  t->register_on_complete(
+  t.register_on_complete(
     new PG_SendMessageOnConn(
       get_parent(), reply, m->get_connection()));
 
-  t->register_on_applied(
-    new ObjectStore::C_DeleteTransaction(t));
-  get_parent()->queue_transaction(t);
+  get_parent()->queue_transaction(std::move(t));
 }
 
 struct C_ReplicatedBackend_OnPullComplete : GenContext<ThreadPool::TPHandle&> {
@@ -894,7 +888,7 @@ struct C_ReplicatedBackend_OnPullComplete : GenContext<ThreadPool::TPHandle&> {
       assert(j != bc->pulling.end());
       if (!bc->start_pushes(*i, j->second.obc, h)) {
 	bc->get_parent()->on_global_recover(
-	  *i);
+	  *i, j->second.stat);
       }
       bc->pulling.erase(*i);
       handle.reset_tp_timeout();
@@ -910,12 +904,12 @@ void ReplicatedBackend::_do_pull_response(OpRequestRef op)
   pg_shard_t from = m->from;
 
   vector<PullOp> replies(1);
-  ObjectStore::Transaction *t = new ObjectStore::Transaction;
+  ObjectStore::Transaction t;
   list<hobject_t> to_continue;
   for (vector<PushOp>::iterator i = m->pushes.begin();
        i != m->pushes.end();
        ++i) {
-    bool more = handle_pull_response(from, *i, &(replies.back()), &to_continue, t);
+    bool more = handle_pull_response(from, *i, &(replies.back()), &to_continue, &t);
     if (more)
       replies.push_back(PullOp());
   }
@@ -925,7 +919,7 @@ void ReplicatedBackend::_do_pull_response(OpRequestRef op)
 	this,
 	m->get_priority());
     c->to_continue.swap(to_continue);
-    t->register_on_complete(
+    t.register_on_complete(
       new PG_RecoveryQueueAsync(
 	get_parent(),
 	get_parent()->bless_gencontext(c)));
@@ -941,14 +935,12 @@ void ReplicatedBackend::_do_pull_response(OpRequestRef op)
     reply->pulls.swap(replies);
     reply->compute_cost(cct);
 
-    t->register_on_complete(
+    t.register_on_complete(
       new PG_SendMessageOnConn(
 	get_parent(), reply, m->get_connection()));
   }
 
-  t->register_on_applied(
-    new ObjectStore::C_DeleteTransaction(t));
-  get_parent()->queue_transaction(t);
+  get_parent()->queue_transaction(std::move(t));
 }
 
 void ReplicatedBackend::do_pull(OpRequestRef op)
@@ -1208,17 +1200,16 @@ void ReplicatedBackend::sub_op_modify_impl(OpRequestRef op)
     update_snaps,
     &(rm->localt));
 
-  rm->bytes_written = rm->opt.get_encoded_bytes();
-
   rm->opt.register_on_commit(
     parent->bless_context(
       new C_OSD_RepModifyCommit(this, rm)));
   rm->localt.register_on_applied(
     parent->bless_context(
       new C_OSD_RepModifyApply(this, rm)));
-  list<ObjectStore::Transaction*> tls;
-  tls.push_back(&(rm->localt));
-  tls.push_back(&(rm->opt));
+  vector<ObjectStore::Transaction> tls;
+  tls.reserve(2);
+  tls.push_back(std::move(rm->localt));
+  tls.push_back(std::move(rm->opt));
   parent->queue_transactions(tls, op);
   // op is cleaned up by oncommit/onapply when both are executed
 }
@@ -1863,8 +1854,6 @@ bool ReplicatedBackend::handle_pull_response(
 
   pi.recovery_progress = pop.after_progress;
 
-  pi.stat.num_bytes_recovered += data.length();
-
   dout(10) << "new recovery_info " << pi.recovery_info
 	   << ", new progress " << pi.recovery_progress
 	   << dendl;
@@ -1879,13 +1868,10 @@ bool ReplicatedBackend::handle_pull_response(
 		   pop.omap_entries,
 		   t);
 
-  pi.stat.num_keys_recovered += pop.omap_entries.size();
-
   if (complete) {
     to_continue->push_back(hoid);
-    pi.stat.num_objects_recovered++;
     get_parent()->on_local_recover(
-      hoid, pi.stat, pi.recovery_info, pi.obc, t);
+      hoid, pi.recovery_info, pi.obc, t);
     pull_from_peer[from].erase(hoid);
     if (pull_from_peer[from].empty())
       pull_from_peer.erase(from);
@@ -1927,7 +1913,6 @@ void ReplicatedBackend::handle_push(
   if (complete)
     get_parent()->on_local_recover(
       pop.recovery_info.soid,
-      object_stat_sum_t(),
       pop.recovery_info,
       ObjectContextRef(), // ok, is replica
       t);
@@ -2013,7 +1998,7 @@ int ReplicatedBackend::build_push_op(const ObjectRecoveryInfo &recovery_info,
 
   if (progress.first) {
     store->omap_get_header(coll, ghobject_t(recovery_info.soid), &out_op->omap_header);
-    store->getattrs(coll, ghobject_t(recovery_info.soid), out_op->attrset);
+    store->getattrs(ch, ghobject_t(recovery_info.soid), out_op->attrset);
 
     // Debug
     bufferlist bv = out_op->attrset[OI_ATTR];
@@ -2059,7 +2044,7 @@ int ReplicatedBackend::build_push_op(const ObjectRecoveryInfo &recovery_info,
     if (!recovery_info.copy_subset.empty()) {
       interval_set<uint64_t> copy_subset = recovery_info.copy_subset;
       bufferlist bl;
-      int r = store->fiemap(coll, ghobject_t(recovery_info.soid), 0,
+      int r = store->fiemap(ch, ghobject_t(recovery_info.soid), 0,
                             copy_subset.range_end(), bl);
       if (r >= 0)  {
         interval_set<uint64_t> fiemap_included;
@@ -2088,7 +2073,7 @@ int ReplicatedBackend::build_push_op(const ObjectRecoveryInfo &recovery_info,
        p != out_op->data_included.end();
        ++p) {
     bufferlist bit;
-    store->read(coll, ghobject_t(recovery_info.soid),
+    store->read(ch, ghobject_t(recovery_info.soid),
 		p.get_start(), p.get_len(), bit,
                 cache_dont_need ? CEPH_OSD_OP_FLAG_FADVISE_DONTNEED: 0);
     if (p.get_len() != bit.length()) {
@@ -2220,12 +2205,17 @@ bool ReplicatedBackend::handle_push_reply(pg_shard_t peer, PushReplyOp &op, Push
 	peer, soid, pi->recovery_info,
 	pi->stat);
 
+      object_stat_sum_t stat;
+      stat.num_bytes_recovered = pi->recovery_info.size;
+      stat.num_keys_recovered = reply->omap_entries.size();
+      stat.num_objects_recovered = 1;
+
       pushing[soid].erase(peer);
       pi = NULL;
 
 
       if (pushing[soid].empty()) {
-	get_parent()->on_global_recover(soid);
+	get_parent()->on_global_recover(soid, stat);
 	pushing.erase(soid);
       } else {
 	dout(10) << "pushed " << soid << ", still waiting for push ack from "
@@ -2274,7 +2264,7 @@ void ReplicatedBackend::handle_pull(pg_shard_t peer, PullOp &op, PushOp *reply)
 {
   const hobject_t &soid = op.soid;
   struct stat st;
-  int r = store->stat(coll, ghobject_t(soid), &st);
+  int r = store->stat(ch, ghobject_t(soid), &st);
   if (r != 0) {
     get_parent()->clog_error() << get_info().pgid << " "
 			       << peer << " tried to pull " << soid
@@ -2361,7 +2351,7 @@ void ReplicatedBackend::sub_op_push(OpRequestRef op)
   pop.recovery_info = m->recovery_info;
   pop.before_progress = m->current_progress;
   pop.after_progress = m->recovery_progress;
-  ObjectStore::Transaction *t = new ObjectStore::Transaction;
+  ObjectStore::Transaction t;
 
   if (is_primary()) {
     PullOp resp;
@@ -2369,7 +2359,7 @@ void ReplicatedBackend::sub_op_push(OpRequestRef op)
     list<hobject_t> to_continue;
     bool more = handle_pull_response(
       m->from, pop, &resp,
-      &to_continue, t);
+      &to_continue, &t);
     if (more) {
       send_pull_legacy(
 	m->get_priority(),
@@ -2382,7 +2372,7 @@ void ReplicatedBackend::sub_op_push(OpRequestRef op)
 	  this,
 	  op->get_req()->get_priority());
       c->to_continue.swap(to_continue);
-      t->register_on_complete(
+      t.register_on_complete(
 	new PG_RecoveryQueueAsync(
 	  get_parent(),
 	  get_parent()->bless_gencontext(c)));
@@ -2395,13 +2385,11 @@ void ReplicatedBackend::sub_op_push(OpRequestRef op)
       get_osdmap()->get_epoch(), CEPH_OSD_FLAG_ACK);
     reply->set_priority(m->get_priority());
     assert(entity_name_t::TYPE_OSD == m->get_connection()->peer_type);
-    handle_push(m->from, pop, &resp, t);
-    t->register_on_complete(new PG_SendMessageOnConn(
+    handle_push(m->from, pop, &resp, &t);
+    t.register_on_complete(new PG_SendMessageOnConn(
 			      get_parent(), reply, m->get_connection()));
   }
-  t->register_on_applied(
-    new ObjectStore::C_DeleteTransaction(t));
-  get_parent()->queue_transaction(t);
+  get_parent()->queue_transaction(std::move(t));
   return;
 }
 
diff --git a/src/osd/ReplicatedBackend.h b/src/osd/ReplicatedBackend.h
index 7a1b72a..1f04150 100644
--- a/src/osd/ReplicatedBackend.h
+++ b/src/osd/ReplicatedBackend.h
@@ -33,6 +33,7 @@ public:
   ReplicatedBackend(
     PGBackend::Listener *pg,
     coll_t coll,
+    ObjectStore::CollectionHandle &ch,
     ObjectStore *store,
     CephContext *cct);
 
@@ -85,7 +86,7 @@ public:
   class RPCReadPred : public IsPGReadablePredicate {
     pg_shard_t whoami;
   public:
-    RPCReadPred(pg_shard_t whoami) : whoami(whoami) {}
+    explicit RPCReadPred(pg_shard_t whoami) : whoami(whoami) {}
     bool operator()(const set<pg_shard_t> &have) const {
       return have.count(whoami);
     }
@@ -404,12 +405,10 @@ private:
     eversion_t last_complete;
     epoch_t epoch_started;
 
-    uint64_t bytes_written;
-
     ObjectStore::Transaction opt, localt;
     
     RepModify() : applied(false), committed(false), ackerosd(-1),
-		  epoch_started(0), bytes_written(0) {}
+		  epoch_started(0) {}
   };
   typedef ceph::shared_ptr<RepModify> RepModifyRef;
 
@@ -434,6 +433,8 @@ private:
   void sub_op_modify_applied(RepModifyRef rm);
   void sub_op_modify_commit(RepModifyRef rm);
   bool scrub_supported() { return true; }
+  bool auto_repair_supported() const { return false; }
+
 
   void be_deep_scrub(
     const hobject_t &obj,
diff --git a/src/osd/ReplicatedPG.cc b/src/osd/ReplicatedPG.cc
index 4d5c94d..9205118 100644
--- a/src/osd/ReplicatedPG.cc
+++ b/src/osd/ReplicatedPG.cc
@@ -132,7 +132,7 @@ public:
   ReplicatedPG::CopyResults *results;
   int retval;
   ReplicatedPG::OpContext *ctx;
-  CopyFromCallback(ReplicatedPG::OpContext *ctx_)
+  explicit CopyFromCallback(ReplicatedPG::OpContext *ctx_)
     : results(NULL),
       retval(0),
       ctx(ctx_) {}
@@ -178,7 +178,6 @@ public:
 
 void ReplicatedPG::on_local_recover(
   const hobject_t &hoid,
-  const object_stat_sum_t &stat_diff,
   const ObjectRecoveryInfo &_recovery_info,
   ObjectContextRef obc,
   ObjectStore::Transaction *t
@@ -227,8 +226,6 @@ void ReplicatedPG::on_local_recover(
   recover_got(recovery_info.soid, recovery_info.version);
 
   if (is_primary()) {
-    info.stats.stats.sum.add(stat_diff);
-
     assert(obc);
     obc->obs.exists = true;
     obc->ondisk_write_lock();
@@ -276,8 +273,10 @@ void ReplicatedPG::on_local_recover(
 }
 
 void ReplicatedPG::on_global_recover(
-  const hobject_t &soid)
+  const hobject_t &soid,
+  const object_stat_sum_t &stat_diff)
 {
+  info.stats.stats.sum.add(stat_diff);
   missing_loc.recovered(soid);
   publish_stats_to_osd();
   dout(10) << "pushed " << soid << " to all replicas" << dendl;
@@ -1287,7 +1286,7 @@ void ReplicatedPG::do_pg_op(OpRequestRef op)
 	    wait_for_unreadable_object(oid, op);
 	    return;
 	  }
-	  result = osd->store->read(coll, ghobject_t(oid), 0, 0, osd_op.outdata);
+	  result = osd->store->read(ch, ghobject_t(oid), 0, 0, osd_op.outdata);
 	}
       }
       break;
@@ -1352,7 +1351,7 @@ ReplicatedPG::ReplicatedPG(OSDService *o, OSDMapRef curmap,
   PG(o, curmap, _pool, p),
   pgbackend(
     PGBackend::build_pg_backend(
-      _pool.info, curmap, this, coll_t(p), o->store, cct)),
+      _pool.info, curmap, this, coll_t(p), ch, o->store, cct)),
   object_contexts(o->cct, g_conf->osd_pg_object_context_cache_count),
   snapset_contexts_lock("ReplicatedPG::snapset_contexts"),
   backfills_in_flight(hobject_t::Comparator(true)),
@@ -3171,10 +3170,10 @@ void ReplicatedPG::do_backfill(OpRequestRef op)
 	info.stats = m->stats;
       }
 
-      ObjectStore::Transaction *t = new ObjectStore::Transaction;
+      ObjectStore::Transaction t;
       dirty_info = true;
-      write_if_dirty(*t);
-      int tr = osd->store->queue_transaction_and_cleanup(osr.get(), t);
+      write_if_dirty(t);
+      int tr = osd->store->queue_transaction(osr.get(), std::move(t), NULL);
       assert(tr == 0);
     }
     break;
@@ -3238,14 +3237,6 @@ ReplicatedPG::RepGather *ReplicatedPG::trim_object(const hobject_t &coid)
     return NULL;
   }
 
-  RepGather *repop = simple_repop_create(obc);
-  OpContext *ctx = repop->ctx;
-  ctx->snapset_obc = snapset_obc;
-  ctx->lock_to_release = OpContext::W_LOCK;
-  ctx->release_snapset_obc = true;
-  ctx->at_version = get_next_version();
-
-  PGBackend::PGTransaction *t = ctx->op_t;
   set<snapid_t> new_snaps;
   for (set<snapid_t>::iterator i = old_snaps.begin();
        i != old_snaps.end();
@@ -3254,22 +3245,33 @@ ReplicatedPG::RepGather *ReplicatedPG::trim_object(const hobject_t &coid)
       new_snaps.insert(*i);
   }
 
+  vector<snapid_t>::iterator p = snapset.clones.end();
+
+  if (new_snaps.empty()) {
+    p = std::find(snapset.clones.begin(), snapset.clones.end(), coid.snap);
+    if (p == snapset.clones.end()) {
+      osd->clog->error() << __func__ << " Snap " << coid.snap << " not in clones" << "\n";
+      return NULL;
+    }
+  }
+
+  RepGather *repop = simple_repop_create(obc);
+  OpContext *ctx = repop->ctx;
+  ctx->snapset_obc = snapset_obc;
+  ctx->lock_to_release = OpContext::W_LOCK;
+  ctx->release_snapset_obc = true;
+  ctx->at_version = get_next_version();
+  PGBackend::PGTransaction *t = ctx->op_t;
+ 
   if (new_snaps.empty()) {
     // remove clone
     dout(10) << coid << " snaps " << old_snaps << " -> "
 	     << new_snaps << " ... deleting" << dendl;
 
     // ...from snapset
+    assert(p != snapset.clones.end());
+  
     snapid_t last = coid.snap;
-    vector<snapid_t>::iterator p;
-    for (p = snapset.clones.begin(); p != snapset.clones.end(); ++p)
-      if (*p == last)
-	break;
-    if (p == snapset.clones.end()) {
-      osd->clog->error() << __func__ << " Snap " << coid.snap << " not in clones" << "\n";
-      return NULL;
-    }
-
     ctx->delta_stats.num_bytes -= snapset.get_clone_bytes(last);
 
     if (p != snapset.clones.begin()) {
@@ -4110,8 +4112,8 @@ int ReplicatedPG::do_osd_ops(OpContext *ctx, vector<OSDOp>& ops)
       {
 	// read into a buffer
 	bufferlist bl;
-	int r = osd->store->fiemap(coll, ghobject_t(soid, ghobject_t::NO_GEN,
-						    info.pgid.shard),
+	int r = osd->store->fiemap(ch, ghobject_t(soid, ghobject_t::NO_GEN,
+						  info.pgid.shard),
 				   op.extent.offset, op.extent.length, bl);
 	osd_op.outdata.claim(bl);
 	if (r < 0)
@@ -4144,8 +4146,8 @@ int ReplicatedPG::do_osd_ops(OpContext *ctx, vector<OSDOp>& ops)
 	// read into a buffer
 	bufferlist bl;
         uint32_t total_read = 0;
-	int r = osd->store->fiemap(coll, ghobject_t(soid, ghobject_t::NO_GEN,
-						    info.pgid.shard),
+	int r = osd->store->fiemap(ch, ghobject_t(soid, ghobject_t::NO_GEN,
+						  info.pgid.shard),
 				   op.extent.offset, op.extent.length, bl);
 	if (r < 0)  {
 	  result = r;
@@ -5480,7 +5482,7 @@ int ReplicatedPG::do_osd_ops(OpContext *ctx, vector<OSDOp>& ops)
       }
       ++ctx->num_read;
       {
-	osd->store->omap_get_header(coll, ghobject_t(soid), &osd_op.outdata);
+	osd->store->omap_get_header(ch, ghobject_t(soid), &osd_op.outdata);
 	ctx->delta_stats.num_rd_kb += SHIFT_ROUND_UP(osd_op.outdata.length(), 10);
 	ctx->delta_stats.num_rd++;
       }
@@ -5501,7 +5503,7 @@ int ReplicatedPG::do_osd_ops(OpContext *ctx, vector<OSDOp>& ops)
 	tracepoint(osd, do_osd_op_pre_omapgetvalsbykeys, soid.oid.name.c_str(), soid.snap.val, list_entries(keys_to_get).c_str());
 	map<string, bufferlist> out;
 	if (pool.info.supports_omap()) {
-	  osd->store->omap_get_values(coll, ghobject_t(soid), keys_to_get, &out);
+	  osd->store->omap_get_values(ch, ghobject_t(soid), keys_to_get, &out);
 	} // else return empty omap entries
 	::encode(out, osd_op.outdata);
 	ctx->delta_stats.num_rd_kb += SHIFT_ROUND_UP(osd_op.outdata.length(), 10);
@@ -5536,7 +5538,7 @@ int ReplicatedPG::do_osd_ops(OpContext *ctx, vector<OSDOp>& ops)
 	       i != assertions.end();
 	       ++i)
 	    to_get.insert(i->first);
-	  int r = osd->store->omap_get_values(coll, ghobject_t(soid),
+	  int r = osd->store->omap_get_values(ch, ghobject_t(soid),
 					      to_get, &out);
 	  if (r < 0) {
 	    result = r;
@@ -6846,7 +6848,7 @@ int ReplicatedPG::fill_in_copy_get(
     if (left > 0 && !cursor.omap_complete) {
       assert(cursor.data_complete);
       if (cursor.omap_offset.empty()) {
-	osd->store->omap_get_header(coll, ghobject_t(oi.soid),
+	osd->store->omap_get_header(ch, ghobject_t(oi.soid),
 				    &reply_obj.omap_header);
       }
       bufferlist omap_data;
@@ -7169,15 +7171,19 @@ void ReplicatedPG::process_copy_chunk(hobject_t oid, ceph_tid_t tid, int r)
   // cancel and requeue proxy ops on this object
   if (!r) {
     for (map<ceph_tid_t, ProxyReadOpRef>::iterator it = proxyread_ops.begin();
-	it != proxyread_ops.end(); ++it) {
+	it != proxyread_ops.end();) {
       if (it->second->soid == cobc->obs.oi.soid) {
-	cancel_proxy_read(it->second);
+	cancel_proxy_read((it++)->second);
+      } else {
+	++it;
       }
     }
     for (map<ceph_tid_t, ProxyWriteOpRef>::iterator it = proxywrite_ops.begin();
-	 it != proxywrite_ops.end(); ++it) {
+	 it != proxywrite_ops.end();) {
       if (it->second->soid == cobc->obs.oi.soid) {
-	cancel_proxy_write(it->second);
+	cancel_proxy_write((it++)->second);
+      } else {
+	++it;
       }
     }
     kick_proxy_ops_blocked(cobc->obs.oi.soid);
@@ -9291,9 +9297,9 @@ void ReplicatedPG::sub_op_remove(OpRequestRef op)
 
   op->mark_started();
 
-  ObjectStore::Transaction *t = new ObjectStore::Transaction;
-  remove_snap_mapped_object(*t, m->poid);
-  int r = osd->store->queue_transaction_and_cleanup(osr.get(), t);
+  ObjectStore::Transaction t;
+  remove_snap_mapped_object(t, m->poid);
+  int r = osd->store->queue_transaction(osr.get(), std::move(t), NULL);
   assert(r == 0);
 }
 
@@ -9364,7 +9370,7 @@ ObjectContextRef ReplicatedPG::mark_object_lost(ObjectStore::Transaction *t,
 struct C_PG_MarkUnfoundLost : public Context {
   ReplicatedPGRef pg;
   list<ObjectContextRef> obcs;
-  C_PG_MarkUnfoundLost(ReplicatedPG *p) : pg(p) {}
+  explicit C_PG_MarkUnfoundLost(ReplicatedPG *p) : pg(p) {}
   void finish(int r) {
     pg->_finish_mark_all_unfound_lost(obcs);
   }
@@ -9380,7 +9386,7 @@ void ReplicatedPG::mark_all_unfound_lost(int what)
   pg_log.get_log().print(*_dout);
   *_dout << dendl;
 
-  ObjectStore::Transaction *t = new ObjectStore::Transaction;
+  ObjectStore::Transaction t;
   C_PG_MarkUnfoundLost *c = new C_PG_MarkUnfoundLost(this);
 
   utime_t mtime = ceph_clock_now(cct);
@@ -9403,7 +9409,7 @@ void ReplicatedPG::mark_all_unfound_lost(int what)
 
     switch (what) {
     case pg_log_entry_t::LOST_MARK:
-      obc = mark_object_lost(t, oid, m->second.need, mtime, pg_log_entry_t::LOST_MARK);
+      obc = mark_object_lost(&t, oid, m->second.need, mtime, pg_log_entry_t::LOST_MARK);
       pg_log.missing_got(m++);
       assert(0 == "actually, not implemented yet!");
       // we need to be careful about how this is handled on the replica!
@@ -9438,7 +9444,7 @@ void ReplicatedPG::mark_all_unfound_lost(int what)
 	pg_log.add(e);
 	dout(10) << e << dendl;
 
-	t->remove(
+	t.remove(
 	  coll,
 	  ghobject_t(oid, ghobject_t::NO_GEN, pg_whoami.shard));
 	pg_log.missing_add_event(e);
@@ -9467,11 +9473,11 @@ void ReplicatedPG::mark_all_unfound_lost(int what)
   }
 
   dirty_info = true;
-  write_if_dirty(*t);
-
-  t->register_on_complete(new ObjectStore::C_DeleteTransaction(t));
+  write_if_dirty(t);
 
-  osd->store->queue_transaction(osr.get(), t, c, NULL, new C_OSD_OndiskWriteUnlockList(&c->obcs));
+  
+  osd->store->queue_transaction(osr.get(), std::move(t), c, NULL, 
+                            new C_OSD_OndiskWriteUnlockList(&c->obcs));
 	      
   // Send out the PG log to all replicas
   // So that they know what is lost
@@ -10168,19 +10174,18 @@ int ReplicatedPG::recover_primary(int max, ThreadPool::TPHandle &handle)
 	      obc->ondisk_write_lock();
 	      obc->obs.oi.version = latest->version;
 
-	      ObjectStore::Transaction *t = new ObjectStore::Transaction;
-	      t->register_on_applied(new ObjectStore::C_DeleteTransaction(t));
+	      ObjectStore::Transaction t;
 	      bufferlist b2;
 	      obc->obs.oi.encode(b2);
 	      assert(!pool.info.require_rollback());
-	      t->setattr(coll, ghobject_t(soid), OI_ATTR, b2);
+	      t.setattr(coll, ghobject_t(soid), OI_ATTR, b2);
 
 	      recover_got(soid, latest->version);
 	      missing_loc.add_location(soid, pg_whoami);
 
 	      ++active_pushes;
 
-	      osd->store->queue_transaction(osr.get(), t,
+	      osd->store->queue_transaction(osr.get(), std::move(t),
 					    new C_OSD_AppliedRecoveredObject(this, obc),
 					    new C_OSD_CommittedPushedObject(
 					      this,
@@ -11000,7 +11005,7 @@ void ReplicatedPG::check_local()
 	       << " at " << p->version << dendl;
       struct stat st;
       int r = osd->store->stat(
-	coll,
+	ch,
 	ghobject_t(p->soid, ghobject_t::NO_GEN, pg_whoami.shard),
 	&st);
       if (r != -ENOENT) {
@@ -11647,7 +11652,7 @@ void ReplicatedPG::agent_load_hit_sets()
 	bufferlist bl;
 	{
 	  obc->ondisk_read_lock();
-	  int r = osd->store->read(coll, ghobject_t(oid), 0, 0, bl);
+	  int r = osd->store->read(ch, ghobject_t(oid), 0, 0, bl);
 	  assert(r >= 0);
 	  obc->ondisk_read_unlock();
 	}
@@ -11730,7 +11735,7 @@ bool ReplicatedPG::agent_maybe_flush(ObjectContextRef& obc)
 
 struct C_AgentEvictStartStop : public Context {
   ReplicatedPGRef pg;
-  C_AgentEvictStartStop(ReplicatedPG *p) : pg(p) {
+  explicit C_AgentEvictStartStop(ReplicatedPG *p) : pg(p) {
     pg->osd->agent_start_evict_op();
   }
   void finish(int r) {
@@ -11767,6 +11772,19 @@ bool ReplicatedPG::agent_maybe_evict(ObjectContextRef& obc, bool after_flush)
   }
 
   if (agent_state->evict_mode != TierAgentState::EVICT_MODE_FULL) {
+    // is this object old than cache_min_evict_age?
+    utime_t now = ceph_clock_now(NULL);
+    utime_t ob_local_mtime;
+    if (obc->obs.oi.local_mtime != utime_t()) {
+      ob_local_mtime = obc->obs.oi.local_mtime;
+    } else {
+      ob_local_mtime = obc->obs.oi.mtime;
+    }
+    if (ob_local_mtime + utime_t(pool.info.cache_min_evict_age, 0) > now) {
+      dout(20) << __func__ << " skip (too young) " << obc->obs.oi << dendl;
+      osd->logger->inc(l_osd_agent_skip);
+      return false;
+    }
     // is this object old and/or cold enough?
     int temp = 0;
     uint64_t temp_upper = 0, temp_lower = 0;
@@ -12121,7 +12139,7 @@ bool ReplicatedPG::_range_available_for_scrub(
 
 struct C_ScrubDigestUpdated : public Context {
   ReplicatedPGRef pg;
-  C_ScrubDigestUpdated(ReplicatedPG *pg) : pg(pg) {}
+  explicit C_ScrubDigestUpdated(ReplicatedPG *pg) : pg(pg) {}
   void finish(int r) {
     pg->_scrub_digest_updated();
   }
@@ -12762,10 +12780,10 @@ boost::statechart::result ReplicatedPG::WaitingOnReplicas::react(const SnapTrim&
   dout(10) << "purged_snaps now " << pg->info.purged_snaps << ", snap_trimq now " 
 	   << pg->snap_trimq << dendl;
   
-  ObjectStore::Transaction *t = new ObjectStore::Transaction;
+  ObjectStore::Transaction t;
   pg->dirty_big_info = true;
-  pg->write_if_dirty(*t);
-  int tr = pg->osd->store->queue_transaction_and_cleanup(pg->osr.get(), t);
+  pg->write_if_dirty(t);
+  int tr = pg->osd->store->queue_transaction(pg->osr.get(), std::move(t), NULL);
   assert(tr == 0);
 
   context<SnapTrimmer>().need_share_pg_info = true;
diff --git a/src/osd/ReplicatedPG.h b/src/osd/ReplicatedPG.h
index c596cb0..0ee87a7 100644
--- a/src/osd/ReplicatedPG.h
+++ b/src/osd/ReplicatedPG.h
@@ -263,7 +263,6 @@ public:
   /// Listener methods
   void on_local_recover(
     const hobject_t &oid,
-    const object_stat_sum_t &stat_diff,
     const ObjectRecoveryInfo &recovery_info,
     ObjectContextRef obc,
     ObjectStore::Transaction *t
@@ -278,7 +277,8 @@ public:
     pg_shard_t peer,
     const hobject_t oid);
   void on_global_recover(
-    const hobject_t &oid);
+    const hobject_t &oid,
+    const object_stat_sum_t &stat_diff);
   void failed_push(pg_shard_t from, const hobject_t &soid);
   void cancel_pull(const hobject_t &soid);
 
@@ -327,11 +327,11 @@ public:
   void send_message(int to_osd, Message *m) {
     osd->send_message_osd_cluster(to_osd, m, get_osdmap()->get_epoch());
   }
-  void queue_transaction(ObjectStore::Transaction *t, OpRequestRef op) {
-    osd->store->queue_transaction(osr.get(), t, 0, 0, 0, op);
+  void queue_transaction(ObjectStore::Transaction&& t, OpRequestRef op) {
+    osd->store->queue_transaction(osr.get(), std::move(t), 0, 0, 0, op);
   }
-  void queue_transactions(list<ObjectStore::Transaction*>& tls, OpRequestRef op) {
-    osd->store->queue_transactions(osr.get(), tls, 0, 0, 0, op);
+  void queue_transactions(vector<ObjectStore::Transaction>& tls, OpRequestRef op) {
+    osd->store->queue_transactions(osr.get(), tls, 0, 0, 0, op, NULL);
   }
   epoch_t get_epoch() const {
     return get_osdmap()->get_epoch();
@@ -509,7 +509,7 @@ public:
       boost::optional<uint64_t> watch_cookie;
       uint64_t notify_id;
       bufferlist reply_bl;
-      NotifyAck(uint64_t notify_id) : notify_id(notify_id) {}
+      explicit NotifyAck(uint64_t notify_id) : notify_id(notify_id) {}
       NotifyAck(uint64_t notify_id, uint64_t cookie, bufferlist& rbl)
 	: watch_cookie(cookie), notify_id(notify_id) {
 	reply_bl.claim(rbl);
@@ -1298,7 +1298,7 @@ protected:
   };
   struct C_OSD_OndiskWriteUnlockList : public Context {
     list<ObjectContextRef> *pls;
-    C_OSD_OndiskWriteUnlockList(list<ObjectContextRef> *l) : pls(l) {}
+    explicit C_OSD_OndiskWriteUnlockList(list<ObjectContextRef> *l) : pls(l) {}
     void finish(int r) {
       for (list<ObjectContextRef>::iterator p = pls->begin(); p != pls->end(); ++p)
 	(*p)->ondisk_write_unlock();
@@ -1327,7 +1327,7 @@ protected:
   };
   struct C_OSD_AppliedRecoveredObjectReplica : public Context {
     ReplicatedPGRef pg;
-    C_OSD_AppliedRecoveredObjectReplica(ReplicatedPG *p) :
+    explicit C_OSD_AppliedRecoveredObjectReplica(ReplicatedPG *p) :
       pg(p) {}
     void finish(int r) {
       pg->_applied_recovered_object_replica();
@@ -1543,7 +1543,7 @@ private:
     set<RepGather *> repops;
     snapid_t snap_to_trim;
     bool need_share_pg_info;
-    SnapTrimmer(ReplicatedPG *pg) : pg(pg), need_share_pg_info(false) {}
+    explicit SnapTrimmer(ReplicatedPG *pg) : pg(pg), need_share_pg_info(false) {}
     ~SnapTrimmer();
     void log_enter(const char *state_name);
     void log_exit(const char *state_name, utime_t duration);
@@ -1556,7 +1556,7 @@ private:
       boost::statechart::transition< Reset, NotTrimming >
       > reactions;
     hobject_t pos;
-    TrimmingObjects(my_context ctx);
+    explicit TrimmingObjects(my_context ctx);
     void exit();
     boost::statechart::result react(const SnapTrim&);
   };
@@ -1566,7 +1566,7 @@ private:
       boost::statechart::custom_reaction< SnapTrim >,
       boost::statechart::transition< Reset, NotTrimming >
       > reactions;
-    WaitingOnReplicas(my_context ctx);
+    explicit WaitingOnReplicas(my_context ctx);
     void exit();
     boost::statechart::result react(const SnapTrim&);
   };
@@ -1576,7 +1576,7 @@ private:
       boost::statechart::custom_reaction< SnapTrim >,
       boost::statechart::transition< Reset, NotTrimming >
       > reactions;
-    NotTrimming(my_context ctx);
+    explicit NotTrimming(my_context ctx);
     void exit();
     boost::statechart::result react(const SnapTrim&);
   };
diff --git a/src/osd/SnapMapper.cc b/src/osd/SnapMapper.cc
index ea25b4e..6be5018 100644
--- a/src/osd/SnapMapper.cc
+++ b/src/osd/SnapMapper.cc
@@ -53,7 +53,7 @@ int OSDriver::get_next(
 struct Mapping {
   snapid_t snap;
   hobject_t hoid;
-  Mapping(const pair<snapid_t, hobject_t> &in)
+  explicit Mapping(const pair<snapid_t, hobject_t> &in)
     : snap(in.first), hoid(in.second) {}
   Mapping() : snap(0) {}
   void encode(bufferlist &bl) const {
diff --git a/src/osd/Watch.cc b/src/osd/Watch.cc
index 3e440b7..dc665e0 100644
--- a/src/osd/Watch.cc
+++ b/src/osd/Watch.cc
@@ -71,7 +71,7 @@ class NotifyTimeoutCB : public CancelableContext {
   NotifyRef notif;
   bool canceled; // protected by notif lock
 public:
-  NotifyTimeoutCB(NotifyRef notif) : notif(notif), canceled(false) {}
+  explicit NotifyTimeoutCB(NotifyRef notif) : notif(notif), canceled(false) {}
   void finish(int) {
     notif->osd->watch_lock.Unlock();
     notif->lock.Lock();
@@ -234,7 +234,7 @@ class HandleWatchTimeout : public CancelableContext {
   WatchRef watch;
 public:
   bool canceled; // protected by watch->pg->lock
-  HandleWatchTimeout(WatchRef watch) : watch(watch), canceled(false) {}
+  explicit HandleWatchTimeout(WatchRef watch) : watch(watch), canceled(false) {}
   void cancel() {
     canceled = true;
   }
@@ -258,7 +258,7 @@ class HandleDelayedWatchTimeout : public CancelableContext {
   WatchRef watch;
 public:
   bool canceled;
-  HandleDelayedWatchTimeout(WatchRef watch) : watch(watch), canceled(false) {}
+  explicit HandleDelayedWatchTimeout(WatchRef watch) : watch(watch), canceled(false) {}
   void cancel() {
     canceled = true;
   }
diff --git a/src/osd/osd_types.cc b/src/osd/osd_types.cc
index f3e3b08..2d874e5 100644
--- a/src/osd/osd_types.cc
+++ b/src/osd/osd_types.cc
@@ -1007,7 +1007,7 @@ void pool_opts_t::dump(const std::string& name, Formatter* f) const
 void pool_opts_t::dump(Formatter* f) const
 {
   for (opt_mapping_t::iterator i = opt_mapping.begin(); i != opt_mapping.end();
-       i++) {
+       ++i) {
     const std::string& name = i->first;
     const opt_desc_t& desc = i->second;
     opts_t::const_iterator j = opts.find(desc.key);
@@ -1021,7 +1021,7 @@ void pool_opts_t::dump(Formatter* f) const
 class pool_opts_encoder_t : public boost::static_visitor<>
 {
 public:
-  pool_opts_encoder_t(bufferlist& bl_) : bl(bl_) {}
+  explicit pool_opts_encoder_t(bufferlist& bl_) : bl(bl_) {}
 
   void operator()(std::string s) const {
     ::encode(static_cast<int32_t>(pool_opts_t::STR), bl);
@@ -1082,7 +1082,7 @@ void pool_opts_t::decode(bufferlist::iterator& bl) {
 ostream& operator<<(ostream& out, const pool_opts_t& opts)
 {
   for (opt_mapping_t::iterator i = opt_mapping.begin(); i != opt_mapping.end();
-       i++) {
+       ++i) {
     const std::string& name = i->first;
     const pool_opts_t::opt_desc_t& desc = i->second;
     pool_opts_t::opts_t::const_iterator j = opts.opts.find(desc.key);
@@ -3284,7 +3284,7 @@ void ObjectModDesc::visit(Visitor *visitor) const
 
 struct DumpVisitor : public ObjectModDesc::Visitor {
   Formatter *f;
-  DumpVisitor(Formatter *f) : f(f) {}
+  explicit DumpVisitor(Formatter *f) : f(f) {}
   void append(uint64_t old_size) {
     f->open_object_section("op");
     f->dump_string("code", "APPEND");
diff --git a/src/osd/osd_types.h b/src/osd/osd_types.h
index 1134ffd..84770d3 100644
--- a/src/osd/osd_types.h
+++ b/src/osd/osd_types.h
@@ -303,9 +303,11 @@ struct pg_t {
   pg_t() : m_pool(0), m_seed(0), m_preferred(-1) {}
   pg_t(ps_t seed, uint64_t pool, int pref=-1) :
     m_pool(pool), m_seed(seed), m_preferred(pref) {}
+  // cppcheck-suppress noExplicitConstructor
   pg_t(const ceph_pg& cpg) :
     m_pool(cpg.pool), m_seed(cpg.ps), m_preferred((__s16)cpg.preferred) {}
 
+  // cppcheck-suppress noExplicitConstructor
   pg_t(const old_pg_t& opg) {
     *this = opg.v;
   }
@@ -712,12 +714,13 @@ public:
   eversion_t() : version(0), epoch(0), __pad(0) {}
   eversion_t(epoch_t e, version_t v) : version(v), epoch(e), __pad(0) {}
 
+  // cppcheck-suppress noExplicitConstructor
   eversion_t(const ceph_eversion& ce) : 
     version(ce.version),
     epoch(ce.epoch),
     __pad(0) { }
 
-  eversion_t(bufferlist& bl) : __pad(0) { decode(bl); }
+  explicit eversion_t(bufferlist& bl) : __pad(0) { decode(bl); }
 
   static eversion_t max() {
     eversion_t max;
@@ -1939,7 +1942,7 @@ struct pg_hit_set_info_t {
   utime_t begin, end;   ///< time interval
   eversion_t version;   ///< version this HitSet object was written
   bool using_gmt;	///< use gmt for creating the hit_set archive object name
-  pg_hit_set_info_t(bool using_gmt = true)
+  explicit pg_hit_set_info_t(bool using_gmt = true)
     : using_gmt(using_gmt) {}
 
   void encode(bufferlist &bl) const;
@@ -2100,6 +2103,7 @@ struct pg_info_t {
       last_backfill(hobject_t::get_max()),
       last_backfill_bitwise(false)
   { }
+  // cppcheck-suppress noExplicitConstructor
   pg_info_t(spg_t p)
     : pgid(p),
       last_epoch_started(0), last_user_version(0),
@@ -2716,7 +2720,7 @@ struct pg_missing_t {
   struct item {
     eversion_t need, have;
     item() {}
-    item(eversion_t n) : need(n) {}  // have no old version
+    explicit item(eversion_t n) : need(n) {}  // have no old version
     item(eversion_t n, eversion_t h) : need(n), have(h) {}
 
     void encode(bufferlist& bl) const {
@@ -3108,7 +3112,7 @@ struct SnapSet {
   map<snapid_t, uint64_t> clone_size;
 
   SnapSet() : seq(0), head_exists(false) {}
-  SnapSet(bufferlist& bl) {
+  explicit SnapSet(bufferlist& bl) {
     bufferlist::iterator p = bl.begin();
     decode(p);
   }
@@ -3332,14 +3336,14 @@ struct object_info_t {
       data_digest(-1), omap_digest(-1)
   {}
 
-  object_info_t(const hobject_t& s)
+  explicit object_info_t(const hobject_t& s)
     : soid(s),
       user_version(0), size(0), flags((flag_t)0),
       truncate_seq(0), truncate_size(0),
       data_digest(-1), omap_digest(-1)
   {}
 
-  object_info_t(bufferlist& bl) {
+  explicit object_info_t(bufferlist& bl) {
     decode(bl);
   }
   object_info_t operator=(bufferlist& bl) {
@@ -3366,7 +3370,7 @@ struct SnapSetContext {
   bool registered : 1;
   bool exists : 1;
 
-  SnapSetContext(const hobject_t& o) :
+  explicit SnapSetContext(const hobject_t& o) :
     oid(o), ref(0), registered(false), exists(true) { }
 };
 
diff --git a/src/osdc/Filer.cc b/src/osdc/Filer.cc
index 0a9d6e9..bf5f23c 100644
--- a/src/osdc/Filer.cc
+++ b/src/osdc/Filer.cc
@@ -13,6 +13,8 @@
  */
 
 
+#include <mutex>
+
 #include "Filer.h"
 #include "osd/OSDMap.h"
 #include "Striper.h"
@@ -49,13 +51,13 @@ public:
 
     bool probe_complete;
     {
-      probe->lock.Lock();
+      Probe::unique_lock pl(probe->lock);
       if (r != 0) {
 	probe->err = r;
       }
 
-      probe_complete = filer->_probed(probe, oid, size, mtime);
-      assert(!probe->lock.is_locked_by_me());
+      probe_complete = filer->_probed(probe, oid, size, mtime, pl);
+      assert(!pl.owns_lock());
     }
     if (probe_complete) {
       probe->onfinish->complete(probe->err);
@@ -128,9 +130,9 @@ int Filer::probe_impl(Probe* probe, ceph_file_layout *layout,
     probe->probing_off -= probe->probing_len;
   }
 
-  probe->lock.Lock();
-  _probe(probe);
-  assert(!probe->lock.is_locked_by_me());
+  Probe::unique_lock pl(probe->lock);
+  _probe(probe, pl);
+  assert(!pl.owns_lock());
 
   return 0;
 }
@@ -140,9 +142,9 @@ int Filer::probe_impl(Probe* probe, ceph_file_layout *layout,
 /**
  * probe->lock must be initially locked, this function will release it
  */
-void Filer::_probe(Probe *probe)
+void Filer::_probe(Probe *probe, Probe::unique_lock& pl)
 {
-  assert(probe->lock.is_locked_by_me());
+  assert(pl.owns_lock() && pl.mutex() == &probe->lock);
 
   ldout(cct, 10) << "_probe " << hex << probe->ino << dec
 		 << " " << probe->probing_off << "~" << probe->probing_len
@@ -163,7 +165,7 @@ void Filer::_probe(Probe *probe)
     stat_extents.push_back(*p);
   }
 
-  probe->lock.Unlock();
+  pl.unlock();
   for (std::vector<ObjectExtent>::iterator i = stat_extents.begin();
        i != stat_extents.end(); ++i) {
     C_Probe *c = new C_Probe(this, probe, i->oid);
@@ -179,9 +181,9 @@ void Filer::_probe(Probe *probe)
  * @return true if probe is complete and Probe object may be freed.
  */
 bool Filer::_probed(Probe *probe, const object_t& oid, uint64_t size,
-		    ceph::real_time mtime)
+		    ceph::real_time mtime, Probe::unique_lock& pl)
 {
-  assert(probe->lock.is_locked_by_me());
+  assert(pl.owns_lock() && pl.mutex() == &probe->lock);
 
   ldout(cct, 10) << "_probed " << probe->ino << " object " << oid
 	   << " has size " << size << " mtime " << mtime << dendl;
@@ -194,12 +196,12 @@ bool Filer::_probed(Probe *probe, const object_t& oid, uint64_t size,
   probe->ops.erase(oid);
 
   if (!probe->ops.empty()) {
-    probe->lock.Unlock();
+    pl.unlock();
     return false;  // waiting for more!
   }
 
   if (probe->err) { // we hit an error, propagate back up
-    probe->lock.Unlock();
+    pl.unlock();
     return true;
   }
 
@@ -278,8 +280,8 @@ bool Filer::_probed(Probe *probe, const object_t& oid, uint64_t size,
       probe->probing_len = period;
       probe->probing_off -= period;
     }
-    _probe(probe);
-    assert(!probe->lock.is_locked_by_me());
+    _probe(probe, pl);
+    assert(!pl.owns_lock());
     return false;
   } else if (probe->pmtime) {
     ldout(cct, 10) << "_probed found mtime " << probe->max_mtime << dendl;
@@ -289,7 +291,7 @@ bool Filer::_probed(Probe *probe, const object_t& oid, uint64_t size,
     *probe->pumtime = ceph::real_clock::to_ceph_timespec(probe->max_mtime);
   }
   // done!
-  probe->lock.Unlock();
+  pl.unlock();
   return true;
 }
 
@@ -297,7 +299,9 @@ bool Filer::_probed(Probe *probe, const object_t& oid, uint64_t size,
 // -----------------------
 
 struct PurgeRange {
-  Mutex lock;
+  std::mutex lock;
+  typedef std::lock_guard<std::mutex> lock_guard;
+  typedef std::unique_lock<std::mutex> unique_lock;
   inodeno_t ino;
   ceph_file_layout layout;
   SnapContext snapc;
@@ -309,9 +313,8 @@ struct PurgeRange {
   PurgeRange(inodeno_t i, ceph_file_layout& l, const SnapContext& sc,
 	     uint64_t fo, uint64_t no, ceph::real_time t, int fl,
 	     Context *fin)
-    : lock("Filer::PurgeRange"), ino(i), layout(l), snapc(sc),
-      first(fo), num(no), mtime(t), flags(fl), oncommit(fin),
-      uncommitted(0) {}
+    : ino(i), layout(l), snapc(sc), first(fo), num(no), mtime(t), flags(fl),
+      oncommit(fin), uncommitted(0) {}
 };
 
 int Filer::purge_range(inodeno_t ino,
@@ -327,9 +330,7 @@ int Filer::purge_range(inodeno_t ino,
   // single object?  easy!
   if (num_obj == 1) {
     object_t oid = file_object_t(ino, first_obj);
-    const OSDMap *osdmap = objecter->get_osdmap_read();
-    object_locator_t oloc = osdmap->file_to_object_locator(*layout);
-    objecter->put_osdmap_read();
+    object_locator_t oloc = OSDMap::file_to_object_locator(*layout);
     objecter->remove(oid, oloc, snapc, mtime, flags, NULL, oncommit);
     return 0;
   }
@@ -352,7 +353,7 @@ struct C_PurgeRange : public Context {
 
 void Filer::_do_purge_range(PurgeRange *pr, int fin)
 {
-  pr->lock.Lock();
+  PurgeRange::unique_lock prl(pr->lock);
   pr->uncommitted -= fin;
   ldout(cct, 10) << "_do_purge_range " << pr->ino << " objects " << pr->first
 		 << "~" << pr->num << " uncommitted " << pr->uncommitted
@@ -360,7 +361,7 @@ void Filer::_do_purge_range(PurgeRange *pr, int fin)
 
   if (pr->num == 0 && pr->uncommitted == 0) {
     pr->oncommit->complete(0);
-    pr->lock.Unlock();
+    prl.unlock();
     delete pr;
     return;
   }
@@ -375,15 +376,11 @@ void Filer::_do_purge_range(PurgeRange *pr, int fin)
     pr->num--;
     max--;
   }
-  pr->lock.Unlock();
+  prl.unlock();
 
   // Issue objecter ops outside pr->lock to avoid lock dependency loop
-  for (std::vector<object_t>::iterator i = remove_oids.begin();
-      i != remove_oids.end(); ++i) {
-    const object_t oid = *i;
-    const OSDMap *osdmap = objecter->get_osdmap_read();
-    const object_locator_t oloc = osdmap->file_to_object_locator(pr->layout);
-    objecter->put_osdmap_read();
+  for (const auto& oid : remove_oids) {
+    object_locator_t oloc = OSDMap::file_to_object_locator(pr->layout);
     objecter->remove(oid, oloc, pr->snapc, pr->mtime, pr->flags, NULL,
 		     new C_OnFinisher(new C_PurgeRange(this, pr), finisher));
   }
diff --git a/src/osdc/Filer.h b/src/osdc/Filer.h
index c2359c7..0c7b862 100644
--- a/src/osdc/Filer.h
+++ b/src/osdc/Filer.h
@@ -27,6 +27,8 @@
  */
 
 
+#include <mutex>
+
 #include "include/types.h"
 
 #include "common/ceph_time.h"
@@ -50,7 +52,9 @@ class Filer {
 
   // probes
   struct Probe {
-    Mutex lock;
+    std::mutex lock;
+    typedef std::lock_guard<std::mutex> lock_guard;
+    typedef std::unique_lock<std::mutex> unique_lock;
     inodeno_t ino;
     ceph_file_layout layout;
     snapid_t snapid;
@@ -79,7 +83,7 @@ class Filer {
     Probe(inodeno_t i, ceph_file_layout &l, snapid_t sn,
 	  uint64_t f, uint64_t *e, ceph::real_time *m, int fl, bool fw,
 	  Context *c) :
-      lock("Filer::Probe"), ino(i), layout(l), snapid(sn),
+      ino(i), layout(l), snapid(sn),
       psize(e), pmtime(m), pumtime(nullptr), flags(fl), fwd(fw), onfinish(c),
       probing_off(f), probing_len(0),
       err(0), found_size(false) {}
@@ -87,7 +91,7 @@ class Filer {
     Probe(inodeno_t i, ceph_file_layout &l, snapid_t sn,
 	  uint64_t f, uint64_t *e, utime_t *m, int fl, bool fw,
 	  Context *c) :
-      lock("Filer::Probe"), ino(i), layout(l), snapid(sn),
+      ino(i), layout(l), snapid(sn),
       psize(e), pmtime(nullptr), pumtime(m), flags(fl), fwd(fw),
       onfinish(c), probing_off(f), probing_len(0),
       err(0), found_size(false) {}
@@ -95,9 +99,9 @@ class Filer {
 
   class C_Probe;
 
-  void _probe(Probe *p);
+  void _probe(Probe *p, Probe::unique_lock& pl);
   bool _probed(Probe *p, const object_t& oid, uint64_t size,
-	       ceph::real_time mtime);
+	       ceph::real_time mtime, Probe::unique_lock& pl);
 
  public:
   Filer(const Filer& other);
diff --git a/src/osdc/Journaler.cc b/src/osdc/Journaler.cc
index 02f93d3..487bb60 100644
--- a/src/osdc/Journaler.cc
+++ b/src/osdc/Journaler.cc
@@ -31,7 +31,7 @@ using std::chrono::seconds;
 
 void Journaler::set_readonly()
 {
-  Mutex::Locker l(lock);
+  lock_guard l(lock);
 
   ldout(cct, 1) << "set_readonly" << dendl;
   readonly = true;
@@ -39,7 +39,7 @@ void Journaler::set_readonly()
 
 void Journaler::set_writeable()
 {
-  Mutex::Locker l(lock);
+  lock_guard l(lock);
 
   ldout(cct, 1) << "set_writeable" << dendl;
   readonly = false;
@@ -47,7 +47,7 @@ void Journaler::set_writeable()
 
 void Journaler::create(ceph_file_layout *l, stream_format_t const sf)
 {
-  Mutex::Locker lk(lock);
+  lock_guard lk(lock);
 
   assert(!readonly);
   state = STATE_ACTIVE;
@@ -67,7 +67,7 @@ void Journaler::create(ceph_file_layout *l, stream_format_t const sf)
 
 void Journaler::set_layout(ceph_file_layout const *l)
 {
-    Mutex::Locker lk(lock);
+    lock_guard lk(lock);
     _set_layout(l);
 }
 
@@ -103,7 +103,7 @@ class Journaler::C_ReadHead : public Context {
   Journaler *ls;
 public:
   bufferlist bl;
-  C_ReadHead(Journaler *l) : ls(l) {}
+  explicit C_ReadHead(Journaler *l) : ls(l) {}
   void finish(int r) {
     ls->_finish_read_head(r, bl);
   }
@@ -125,7 +125,7 @@ class Journaler::C_ProbeEnd : public Context {
   Journaler *ls;
 public:
   uint64_t end;
-  C_ProbeEnd(Journaler *l) : ls(l), end(-1) {}
+  explicit C_ProbeEnd(Journaler *l) : ls(l), end(-1) {}
   void finish(int r) {
     ls->_finish_probe_end(r, end);
   }
@@ -145,7 +145,7 @@ public:
 
 void Journaler::recover(Context *onread) 
 {
-  Mutex::Locker l(lock);
+  lock_guard l(lock);
   if (stopping) {
     onread->complete(-EAGAIN);
     return;
@@ -171,7 +171,7 @@ void Journaler::recover(Context *onread)
 
 void Journaler::_read_head(Context *on_finish, bufferlist *bl)
 {
-  assert(lock.is_locked_by_me());
+  // lock is locked
   assert(state == STATE_READHEAD || state == STATE_REREADHEAD);
 
   object_t oid = file_object_t(ino, 0);
@@ -181,7 +181,7 @@ void Journaler::_read_head(Context *on_finish, bufferlist *bl)
 
 void Journaler::reread_head(Context *onfinish)
 {
-  Mutex::Locker l(lock);
+  lock_guard l(lock);
   _reread_head(wrap_finisher(onfinish));
 }
 
@@ -205,7 +205,7 @@ void Journaler::_reread_head(Context *onfinish)
 
 void Journaler::_finish_reread_head(int r, bufferlist& bl, Context *finish)
 {
-  Mutex::Locker l(lock);
+  lock_guard l(lock);
 
   //read on-disk header into
   assert(bl.length() || r < 0 );
@@ -233,7 +233,7 @@ void Journaler::_finish_reread_head(int r, bufferlist& bl, Context *finish)
 
 void Journaler::_finish_read_head(int r, bufferlist& bl)
 {
-  Mutex::Locker l(lock);
+  lock_guard l(lock);
 
   assert(state == STATE_READHEAD);
 
@@ -301,7 +301,7 @@ void Journaler::_finish_read_head(int r, bufferlist& bl)
 
 void Journaler::_probe(Context *finish, uint64_t *end)
 {
-  assert(lock.is_locked_by_me());
+  // lock is locked
   ldout(cct, 1) << "probing for end of the log" << dendl;
   assert(state == STATE_PROBING || state == STATE_REPROBING);
   // probe the log
@@ -323,7 +323,7 @@ void Journaler::_reprobe(C_OnFinisher *finish)
 void Journaler::_finish_reprobe(int r, uint64_t new_end,
 				C_OnFinisher *onfinish)
 {
-  Mutex::Locker l(lock);
+  lock_guard l(lock);
 
   assert(new_end >= write_pos || r < 0);
   ldout(cct, 1) << "_finish_reprobe new_end = " << new_end
@@ -336,7 +336,7 @@ void Journaler::_finish_reprobe(int r, uint64_t new_end,
 
 void Journaler::_finish_probe_end(int r, uint64_t end)
 {
-  Mutex::Locker l(lock);
+  lock_guard l(lock);
 
   assert(state == STATE_PROBING);
   if (r < 0) { // error in probing
@@ -379,7 +379,7 @@ public:
 
 void Journaler::reread_head_and_probe(Context *onfinish)
 {
-  Mutex::Locker l(lock);
+  lock_guard l(lock);
 
   assert(state == STATE_ACTIVE);
   _reread_head(new C_RereadHeadProbe(this, wrap_finisher(onfinish)));
@@ -388,7 +388,7 @@ void Journaler::reread_head_and_probe(Context *onfinish)
 void Journaler::_finish_reread_head_and_probe(int r, C_OnFinisher *onfinish)
 {
   // Expect to be called back from finish_reread_head, which already takes lock
-  assert(lock.is_locked_by_me());
+  // lock is locked
 
   assert(!r); //if we get an error, we're boned
   _reprobe(onfinish);
@@ -411,7 +411,7 @@ public:
 
 void Journaler::write_head(Context *oncommit)
 {
-  Mutex::Locker l(lock);
+  lock_guard l(lock);
   _write_head(oncommit);
 }
 
@@ -449,7 +449,7 @@ void Journaler::_write_head(Context *oncommit)
 void Journaler::_finish_write_head(int r, Header &wrote,
 				   C_OnFinisher *oncommit)
 {
-  Mutex::Locker l(lock);
+  lock_guard l(lock);
 
   if (r < 0) {
     lderr(cct) << "_finish_write_head got " << cpp_strerror(r) << dendl;
@@ -483,7 +483,7 @@ public:
 
 void Journaler::_finish_flush(int r, uint64_t start, ceph::real_time stamp)
 {
-  Mutex::Locker l(lock);
+  lock_guard l(lock);
   assert(!readonly);
 
   if (r < 0) {
@@ -529,7 +529,7 @@ void Journaler::_finish_flush(int r, uint64_t start, ceph::real_time stamp)
 
 uint64_t Journaler::append_entry(bufferlist& bl)
 {
-  Mutex::Locker l(lock);
+  lock_guard l(lock);
 
   assert(!readonly);
   uint32_t s = bl.length();
@@ -659,7 +659,7 @@ void Journaler::_do_flush(unsigned amount)
 
 void Journaler::wait_for_flush(Context *onsafe)
 {
-  Mutex::Locker l(lock);
+  lock_guard l(lock);
   if (stopping) {
     onsafe->complete(-EAGAIN);
     return;
@@ -692,7 +692,7 @@ void Journaler::_wait_for_flush(Context *onsafe)
 
 void Journaler::flush(Context *onsafe)
 {
-  Mutex::Locker l(lock);
+  lock_guard l(lock);
   _flush(wrap_finisher(onsafe));
 }
 
@@ -794,7 +794,7 @@ void Journaler::_issue_prezero()
 // lock before calling into objecter to do I/O.
 void Journaler::_finish_prezero(int r, uint64_t start, uint64_t len)
 {
-  Mutex::Locker l(lock);
+  lock_guard l(lock);
 
   ldout(cct, 10) << "_prezeroed to " << start << "~" << len
 		 << ", prezeroing/prezero was " << prezeroing_pos << "/"
@@ -849,10 +849,11 @@ public:
 class Journaler::C_RetryRead : public Context {
   Journaler *ls;
 public:
-  C_RetryRead(Journaler *l) : ls(l) {}
+  explicit C_RetryRead(Journaler *l) : ls(l) {}
+
   void finish(int r) {
     // Should only be called from waitfor_safe i.e. already inside lock
-    assert(ls->lock.is_locked_by_me());
+    // (ls->lock is locked
     ls->_prefetch();
   }
 };
@@ -860,7 +861,7 @@ public:
 void Journaler::_finish_read(int r, uint64_t offset, uint64_t length,
 			     bufferlist& bl)
 {
-  Mutex::Locker l(lock);
+  lock_guard l(lock);
 
   if (r < 0) {
     ldout(cct, 0) << "_finish_read got error " << r << dendl;
@@ -1085,7 +1086,7 @@ bool Journaler::_is_readable()
  */
 bool Journaler::is_readable()
 {
-  Mutex::Locker l(lock);
+  lock_guard l(lock);
 
   if (error != 0) {
     return false;
@@ -1112,7 +1113,7 @@ class Journaler::C_EraseFinish : public Context {
  */
 void Journaler::erase(Context *completion)
 {
-  Mutex::Locker l(lock);
+  lock_guard l(lock);
 
   // Async delete the journal data
   uint64_t first = trimmed_pos / get_layout_period();
@@ -1130,7 +1131,7 @@ void Journaler::erase(Context *completion)
 
 void Journaler::_finish_erase(int data_result, C_OnFinisher *completion)
 {
-  Mutex::Locker l(lock);
+  lock_guard l(lock);
 
   if (data_result == 0) {
     // Async delete the journal header
@@ -1149,7 +1150,7 @@ void Journaler::_finish_erase(int data_result, C_OnFinisher *completion)
  */
 bool Journaler::try_read_entry(bufferlist& bl)
 {
-  Mutex::Locker l(lock);
+  lock_guard l(lock);
 
   if (!readable) {
     ldout(cct, 10) << "try_read_entry at " << read_pos << " not readable"
@@ -1191,7 +1192,7 @@ bool Journaler::try_read_entry(bufferlist& bl)
 
 void Journaler::wait_for_readable(Context *onreadable)
 {
-  Mutex::Locker l(lock);
+  lock_guard l(lock);
   if (stopping) {
     onreadable->complete(-EAGAIN);
     return;
@@ -1226,7 +1227,7 @@ public:
 
 void Journaler::trim()
 {
-  Mutex::Locker l(lock);
+  lock_guard l(lock);
   _trim();
 }
 
@@ -1272,7 +1273,7 @@ void Journaler::_trim()
 
 void Journaler::_finish_trim(int r, uint64_t to)
 {
-  Mutex::Locker l(lock);
+  lock_guard l(lock);
 
   assert(!readonly);
   ldout(cct, 10) << "_finish_trim trimmed_pos was " << trimmed_pos
@@ -1294,7 +1295,7 @@ void Journaler::_finish_trim(int r, uint64_t to)
 
 void Journaler::handle_write_error(int r)
 {
-  assert(lock.is_locked_by_me());
+  // lock is locked
 
   lderr(cct) << "handle_write_error " << cpp_strerror(r) << dendl;
   if (on_write_error) {
@@ -1454,7 +1455,7 @@ size_t JournalStream::write(bufferlist &entry, bufferlist *to,
  * @param c callback/context to trigger on error
  */
 void Journaler::set_write_error_handler(Context *c) {
-  Mutex::Locker l(lock);
+  lock_guard l(lock);
   assert(!on_write_error);
   on_write_error = wrap_finisher(c);
   called_write_error = false;
@@ -1478,7 +1479,7 @@ C_OnFinisher *Journaler::wrap_finisher(Context *c)
 
 void Journaler::shutdown()
 {
-  Mutex::Locker l(lock);
+  lock_guard l(lock);
 
   ldout(cct, 1) << __func__ << dendl;
 
diff --git a/src/osdc/Journaler.h b/src/osdc/Journaler.h
index c12bc78..da397f6 100644
--- a/src/osdc/Journaler.h
+++ b/src/osdc/Journaler.h
@@ -215,7 +215,9 @@ public:
 private:
   // me
   CephContext *cct;
-  Mutex lock;
+  std::mutex lock;
+  typedef std::lock_guard<std::mutex> lock_guard;
+  typedef std::unique_lock<std::mutex> unique_lock;
   Finisher *finisher;
   Header last_written;
   inodeno_t ino;
@@ -252,7 +254,7 @@ private:
   void _do_delayed_flush()
   {
     assert(delay_flush_event != NULL);
-    Mutex::Locker l(lock);
+    lock_guard l(lock);
     delay_flush_event = NULL;
     _do_flush();
   }
@@ -399,8 +401,7 @@ public:
   Journaler(inodeno_t ino_, int64_t pool, const char *mag, Objecter *obj,
 	    PerfCounters *l, int lkey, SafeTimer *tim, Finisher *f) :
     last_committed(mag),
-    cct(obj->cct), lock("Journaler"), finisher(f),
-    last_written(mag),
+    cct(obj->cct), finisher(f), last_written(mag),
     ino(ino_), pg_pool(pool), readonly(true),
     stream_format(-1), journal_stream(-1),
     magic(mag),
@@ -425,7 +426,7 @@ public:
    * "erase" method.
    */
   void reset() {
-    Mutex::Locker l(lock);
+    lock_guard l(lock);
     assert(state == STATE_ACTIVE);
 
     readonly = true;
@@ -466,11 +467,11 @@ public:
   void set_readonly();
   void set_writeable();
   void set_write_pos(int64_t p) {
-    Mutex::Locker l(lock);
+    lock_guard l(lock);
     prezeroing_pos = prezero_pos = write_pos = flush_pos = safe_pos = p;
   }
   void set_read_pos(int64_t p) {
-    Mutex::Locker l(lock);
+    lock_guard l(lock);
     // we can't cope w/ in-progress read right now.
     assert(requested_pos == received_pos);
     read_pos = requested_pos = received_pos = p;
@@ -478,17 +479,17 @@ public:
   }
   uint64_t append_entry(bufferlist& bl);
   void set_expire_pos(int64_t ep) {
-      Mutex::Locker l(lock);
+      lock_guard l(lock);
       expire_pos = ep;
   }
   void set_trimmed_pos(int64_t p) {
-      Mutex::Locker l(lock);
+      lock_guard l(lock);
       trimming_pos = trimmed_pos = p;
   }
 
   void trim();
   void trim_tail() {
-    Mutex::Locker l(lock);
+    lock_guard l(lock);
 
     assert(!readonly);
     _issue_prezero();
diff --git a/src/osdc/ObjectCacher.cc b/src/osdc/ObjectCacher.cc
index acf6004..f7ae589 100644
--- a/src/osdc/ObjectCacher.cc
+++ b/src/osdc/ObjectCacher.cc
@@ -724,12 +724,10 @@ void ObjectCacher::bh_read_finish(int64_t poolid, sobject_t oid,
 		<< dendl;
 
   if (r >= 0 && bl.length() < length) {
-    bufferptr bp(length - bl.length());
-    bp.zero();
     ldout(cct, 7) << "bh_read_finish " << oid << " padding " << start << "~"
-		  << length << " with " << bp.length() << " bytes of zeroes"
+		  << length << " with " << length - bl.length() << " bytes of zeroes"
 		  << dendl;
-    bl.push_back(bp);
+    bl.append_zero(length - bl.length());
   }
 
   list<Context*> ls;
@@ -1459,9 +1457,7 @@ int ObjectCacher::_readx(OSDRead *rd, ObjectSet *oset, Context *onfinish,
 	  // put substr here first, since substr_of clobbers, and we
 	  // may get multiple bh's at this stripe_map position
 	  if (bh->is_zero()) {
-	    bufferptr bp(len);
-	    bp.zero();
-	    stripe_map[f_it->first].push_back(bp);
+	    stripe_map[f_it->first].append_zero(len);
 	  } else {
 	    bit.substr_of(bh->bl,
 		opos - bh->start(),
diff --git a/src/osdc/ObjectCacher.h b/src/osdc/ObjectCacher.h
index 791412e..b2ec73d 100644
--- a/src/osdc/ObjectCacher.h
+++ b/src/osdc/ObjectCacher.h
@@ -130,7 +130,7 @@ class ObjectCacher {
     map<loff_t, list<Context*> > waitfor_read;
 
     // cons
-    BufferHead(Object *o) :
+    explicit BufferHead(Object *o) :
       state(STATE_MISSING),
       ref(0),
       dontneed(false),
@@ -424,7 +424,7 @@ class ObjectCacher {
   class FlusherThread : public Thread {
     ObjectCacher *oc;
   public:
-    FlusherThread(ObjectCacher *o) : oc(o) {}
+    explicit FlusherThread(ObjectCacher *o) : oc(o) {}
     void *entry() {
       oc->flusher_entry();
       return 0;
diff --git a/src/osdc/Objecter.cc b/src/osdc/Objecter.cc
index 9bfbb06..9ace054 100644
--- a/src/osdc/Objecter.cc
+++ b/src/osdc/Objecter.cc
@@ -153,13 +153,18 @@ static const char *config_keys[] = {
   NULL
 };
 
-Mutex *Objecter::OSDSession::get_lock(object_t& oid)
+Objecter::OSDSession::unique_completion_lock Objecter::OSDSession::get_lock(
+  object_t& oid)
 {
-#define HASH_PRIME 1021
+  if (oid.name.empty())
+    return unique_completion_lock();
+
+  static constexpr uint32_t HASH_PRIME = 1021;
   uint32_t h = ceph_str_hash_linux(oid.name.c_str(), oid.name.size())
     % HASH_PRIME;
 
-  return completion_locks[h % num_locks];
+  return unique_completion_lock(completion_locks[h % num_locks],
+				std::defer_lock);
 }
 
 const char** Objecter::get_tracked_conf_keys() const
@@ -178,15 +183,17 @@ void Objecter::handle_conf_change(const struct md_config_t *conf,
 
 void Objecter::update_crush_location()
 {
-  RWLock::WLocker rwlocker(rwlock);
-  crush_location.clear();
+  unique_lock wl(rwlock);
+  std::multimap<string,string> new_crush_location;
   vector<string> lvec;
   get_str_vec(cct->_conf->crush_location, ";, \t", lvec);
-  int r = CrushWrapper::parse_loc_multimap(lvec, &crush_location);
+  int r = CrushWrapper::parse_loc_multimap(lvec, &new_crush_location);
   if (r < 0) {
     lderr(cct) << "warning: crush_location '" << cct->_conf->crush_location
-	       << "' does not parse" << dendl;
+	       << "' does not parse, leave origin crush_location untouched." << dendl;
+    return;
   }
+  crush_location = new_crush_location;
 }
 
 // messages ------------------------------
@@ -353,7 +360,7 @@ void Objecter::init()
  */
 void Objecter::start()
 {
-  RWLock::RLocker rl(rwlock);
+  shared_lock rl(rwlock);
 
   start_tick();
   if (osdmap->get_epoch() == 0) {
@@ -365,7 +372,7 @@ void Objecter::shutdown()
 {
   assert(initialized.read());
 
-  rwlock.get_write();
+  unique_lock wl(rwlock);
 
   initialized.set(0);
 
@@ -421,7 +428,7 @@ void Objecter::shutdown()
     ldout(cct, 10) << " linger_op " << i->first << dendl;
     LingerOp *lop = i->second;
     {
-      RWLock::WLocker wl(homeless_session->lock);
+      OSDSession::unique_lock swl(homeless_session->lock);
       _session_linger_op_remove(homeless_session, lop);
     }
     linger_ops.erase(lop->linger_id);
@@ -434,7 +441,7 @@ void Objecter::shutdown()
     ldout(cct, 10) << " op " << i->first << dendl;
     Op *op = i->second;
     {
-      RWLock::WLocker wl(homeless_session->lock);
+      OSDSession::unique_lock swl(homeless_session->lock);
       _session_op_remove(homeless_session, op);
     }
     op->put();
@@ -446,7 +453,7 @@ void Objecter::shutdown()
     ldout(cct, 10) << " command_op " << i->first << dendl;
     CommandOp *cop = i->second;
     {
-      RWLock::WLocker wl(homeless_session->lock);
+      OSDSession::unique_lock swl(homeless_session->lock);
       _session_command_op_remove(homeless_session, cop);
     }
     cop->put();
@@ -473,18 +480,17 @@ void Objecter::shutdown()
   }
 
   // Let go of Objecter write lock so timer thread can shutdown
-  rwlock.unlock();
+  wl.unlock();
 }
 
-void Objecter::_send_linger(LingerOp *info)
+void Objecter::_send_linger(LingerOp *info,
+			    shunique_lock& sul)
 {
-  assert(rwlock.is_wlocked());
-
-  RWLock::Context lc(rwlock, RWLock::Context::TakenForWrite);
+  assert(sul.owns_lock() && sul.mutex() == &rwlock);
 
   vector<OSDOp> opv;
   Context *oncommit = NULL;
-  info->watch_lock.get_read(); // just to read registered status
+  LingerOp::shared_lock watchl(info->watch_lock);
   bufferlist *poutbl = NULL;
   if (info->registered && info->is_watch) {
     ldout(cct, 15) << "send_linger " << info->linger_id << " reconnect"
@@ -506,7 +512,7 @@ void Objecter::_send_linger(LingerOp *info)
     }
     oncommit = c;
   }
-  info->watch_lock.put_read();
+  watchl.unlock();
   Op *o = new Op(info->target.base_oid, info->target.base_oloc,
 		 opv, info->target.flags | CEPH_OSD_FLAG_READ,
 		 NULL, NULL,
@@ -525,18 +531,18 @@ void Objecter::_send_linger(LingerOp *info)
 
   if (info->register_tid) {
     // repeat send.  cancel old registeration op, if any.
-    info->session->lock.get_write();
+    OSDSession::unique_lock sl(info->session->lock);
     if (info->session->ops.count(info->register_tid)) {
       Op *o = info->session->ops[info->register_tid];
       _op_cancel_map_check(o);
       _cancel_linger_op(o);
     }
-    info->session->lock.unlock();
+    sl.unlock();
 
-    info->register_tid = _op_submit(o, lc);
+    info->register_tid = _op_submit(o, sul);
   } else {
     // first send
-    info->register_tid = _op_submit_with_budget(o, lc);
+    info->register_tid = _op_submit_with_budget(o, sul);
   }
 
   logger->inc(l_osdc_linger_send);
@@ -544,7 +550,7 @@ void Objecter::_send_linger(LingerOp *info)
 
 void Objecter::_linger_commit(LingerOp *info, int r, bufferlist& outbl)
 {
-  RWLock::WLocker wl(info->watch_lock);
+  LingerOp::unique_lock wl(info->watch_lock);
   ldout(cct, 10) << "_linger_commit " << info->linger_id << dendl;
   if (info->on_reg_commit) {
     info->on_reg_commit->complete(r);
@@ -578,9 +584,9 @@ struct C_DoWatchError : public Context {
     info->_queued_async();
   }
   void finish(int r) {
-    objecter->rwlock.get_read();
+    Objecter::unique_lock wl(objecter->rwlock);
     bool canceled = info->canceled;
-    objecter->rwlock.put_read();
+    wl.unlock();
 
     if (!canceled) {
       info->watch_context->handle_error(info->get_cookie(), err);
@@ -607,7 +613,7 @@ void Objecter::_linger_reconnect(LingerOp *info, int r)
   ldout(cct, 10) << __func__ << " " << info->linger_id << " = " << r
 		 << " (last_error " << info->last_error << ")" << dendl;
   if (r < 0) {
-    info->watch_lock.get_write();
+    LingerOp::unique_lock wl(info->watch_lock);
     if (!info->last_error) {
       r = _normalize_watch_error(r);
       info->last_error = r;
@@ -616,14 +622,14 @@ void Objecter::_linger_reconnect(LingerOp *info, int r)
 	_linger_callback_queue();
       }
     }
-    info->watch_lock.put_write();
+    wl.unlock();
   }
 }
 
 void Objecter::_send_linger_ping(LingerOp *info)
 {
-  assert(rwlock.is_locked());
-  assert(info->session->lock.is_locked());
+  // rwlock is locked unique
+  // info->session->lock is locked
 
   if (cct->_conf->objecter_inject_no_watch_ping) {
     ldout(cct, 10) << __func__ << " " << info->linger_id << " SKIPPING"
@@ -665,7 +671,7 @@ void Objecter::_send_linger_ping(LingerOp *info)
 void Objecter::_linger_ping(LingerOp *info, int r, mono_time sent,
 			    uint32_t register_gen)
 {
-  RWLock::WLocker l(info->watch_lock);
+  LingerOp::unique_lock l(info->watch_lock);
   ldout(cct, 10) << __func__ << " " << info->linger_id
 		 << " sent " << sent << " gen " << register_gen << " = " << r
 		 << " (last_error " << info->last_error
@@ -688,7 +694,7 @@ void Objecter::_linger_ping(LingerOp *info, int r, mono_time sent,
 
 int Objecter::linger_check(LingerOp *info)
 {
-  RWLock::RLocker l(info->watch_lock);
+  LingerOp::shared_lock l(info->watch_lock);
 
   mono_time stamp = info->watch_valid_thru;
   if (!info->watch_pending_async.empty())
@@ -705,20 +711,20 @@ int Objecter::linger_check(LingerOp *info)
 
 void Objecter::linger_cancel(LingerOp *info)
 {
-  RWLock::WLocker wl(rwlock);
+  unique_lock wl(rwlock);
   _linger_cancel(info);
   info->put();
 }
 
 void Objecter::_linger_cancel(LingerOp *info)
 {
-  assert(rwlock.is_wlocked());
+  // rwlock is locked unique
   ldout(cct, 20) << __func__ << " linger_id=" << info->linger_id << dendl;
   if (!info->canceled) {
     OSDSession *s = info->session;
-    s->lock.get_write();
+    OSDSession::unique_lock sl(s->lock);
     _session_linger_op_remove(s, info);
-    s->lock.unlock();
+    sl.unlock();
 
     linger_ops.erase(info->linger_id);
     linger_ops_set.erase(info);
@@ -745,7 +751,7 @@ Objecter::LingerOp *Objecter::linger_register(const object_t& oid,
   info->target.flags = flags;
   info->watch_valid_thru = mono_clock::now();
 
-  RWLock::WLocker l(rwlock);
+  unique_lock l(rwlock);
 
   // Acquire linger ID
   info->linger_id = ++max_linger_id;
@@ -779,8 +785,8 @@ ceph_tid_t Objecter::linger_watch(LingerOp *info,
   info->pobjver = objver;
   info->on_reg_commit = oncommit;
 
-  RWLock::WLocker wl(rwlock);
-  _linger_submit(info);
+  shunique_lock sul(rwlock, ceph::acquire_unique);
+  _linger_submit(info, sul);
   logger->inc(l_osdc_linger_active);
 
   return info->linger_id;
@@ -801,18 +807,16 @@ ceph_tid_t Objecter::linger_notify(LingerOp *info,
   info->pobjver = objver;
   info->on_reg_commit = onfinish;
 
-  RWLock::WLocker wl(rwlock);
-  _linger_submit(info);
+  shunique_lock sul(rwlock, ceph::acquire_unique);
+  _linger_submit(info, sul);
   logger->inc(l_osdc_linger_active);
 
   return info->linger_id;
 }
 
-void Objecter::_linger_submit(LingerOp *info)
+void Objecter::_linger_submit(LingerOp *info, shunique_lock& sul)
 {
-  assert(rwlock.is_wlocked());
-  RWLock::Context lc(rwlock, RWLock::Context::TakenForWrite);
-
+  assert(sul.owns_lock() && sul.mutex() == &rwlock);
   assert(info->linger_id);
 
   // Populate Op::target
@@ -820,14 +824,14 @@ void Objecter::_linger_submit(LingerOp *info)
   _calc_target(&info->target, &info->last_force_resend);
 
   // Create LingerOp<->OSDSession relation
-  int r = _get_session(info->target.osd, &s, lc);
+  int r = _get_session(info->target.osd, &s, sul);
   assert(r == 0);
-  s->lock.get_write();
+  OSDSession::unique_lock sl(s->lock);
   _session_linger_op_assign(s, info);
-  s->lock.unlock();
+  sl.unlock();
   put_session(s);
 
-  _send_linger(info);
+  _send_linger(info, sul);
 }
 
 struct C_DoWatchNotify : public Context {
@@ -847,7 +851,7 @@ struct C_DoWatchNotify : public Context {
 
 void Objecter::handle_watch_notify(MWatchNotify *m)
 {
-  RWLock::RLocker l(rwlock);
+  shared_lock l(rwlock);
   if (!initialized.read()) {
     return;
   }
@@ -857,7 +861,7 @@ void Objecter::handle_watch_notify(MWatchNotify *m)
     ldout(cct, 7) << __func__ << " cookie " << m->cookie << " dne" << dendl;
     return;
   }
-  RWLock::WLocker wl(info->watch_lock);
+  LingerOp::unique_lock wl(info->watch_lock);
   if (m->opcode == CEPH_WATCH_EVENT_DISCONNECT) {
     if (!info->last_error) {
       info->last_error = -ENOTCONN;
@@ -892,11 +896,11 @@ void Objecter::_do_watch_notify(LingerOp *info, MWatchNotify *m)
 {
   ldout(cct, 10) << __func__ << " " << *m << dendl;
 
-  rwlock.get_read();
+  shared_lock l(rwlock);
   assert(initialized.read());
 
   if (info->canceled) {
-    rwlock.put_read();
+    l.unlock();
     goto out;
   }
 
@@ -905,7 +909,7 @@ void Objecter::_do_watch_notify(LingerOp *info, MWatchNotify *m)
   assert(info->watch_context);
   assert(m->opcode != CEPH_WATCH_EVENT_DISCONNECT);
 
-  rwlock.put_read();
+  l.unlock();
 
   switch (m->opcode) {
   case CEPH_WATCH_EVENT_NOTIFY:
@@ -974,15 +978,14 @@ void Objecter::_scan_requests(OSDSession *s,
 			      map<int64_t, bool> *pool_full_map,
 			      map<ceph_tid_t, Op*>& need_resend,
 			      list<LingerOp*>& need_resend_linger,
-			      map<ceph_tid_t, CommandOp*>& need_resend_command)
+			      map<ceph_tid_t, CommandOp*>& need_resend_command,
+			      shunique_lock& sul)
 {
-  assert(rwlock.is_wlocked());
+  assert(sul.owns_lock() && sul.mutex() == &rwlock);
 
   list<LingerOp*> unregister_lingers;
 
-  RWLock::Context lc(rwlock, RWLock::Context::TakenForWrite);
-
-  s->lock.get_write();
+  OSDSession::unique_lock sl(s->lock);
 
   // check for changed linger mappings (_before_ regular ops)
   map<ceph_tid_t,LingerOp*>::iterator lp = s->linger_ops.begin();
@@ -994,7 +997,7 @@ void Objecter::_scan_requests(OSDSession *s,
     ++lp;
     ldout(cct, 10) << " checking linger op " << op->linger_id << dendl;
     bool unregister, force_resend_writes = cluster_full;
-    int r = _recalc_linger_op_target(op, lc);
+    int r = _recalc_linger_op_target(op, sul);
     if (pool_full_map)
       force_resend_writes = force_resend_writes ||
 	(*pool_full_map)[op->target.base_oloc.pool];
@@ -1044,7 +1047,7 @@ void Objecter::_scan_requests(OSDSession *s,
       _op_cancel_map_check(op);
       break;
     case RECALC_OP_TARGET_POOL_DNE:
-      _check_op_pool_dne(op, true);
+      _check_op_pool_dne(op, sl);
       break;
     }
   }
@@ -1059,7 +1062,7 @@ void Objecter::_scan_requests(OSDSession *s,
     if (pool_full_map)
       force_resend_writes = force_resend_writes ||
 	(*pool_full_map)[c->target_pg.pool()];
-    int r = _calc_command_target(c);
+    int r = _calc_command_target(c, sul);
     switch (r) {
     case RECALC_OP_TARGET_NO_ACTION:
       // resend if skipped map; otherwise do nothing.
@@ -1081,7 +1084,7 @@ void Objecter::_scan_requests(OSDSession *s,
     }
   }
 
-  s->lock.unlock();
+  sl.unlock();
 
   for (list<LingerOp*>::iterator iter = unregister_lingers.begin();
        iter != unregister_lingers.end();
@@ -1093,7 +1096,7 @@ void Objecter::_scan_requests(OSDSession *s,
 
 void Objecter::handle_osd_map(MOSDMap *m)
 {
-  RWLock::WLocker wl(rwlock);
+  shunique_lock sul(rwlock, acquire_unique);
   if (!initialized.read())
     return;
 
@@ -1169,7 +1172,7 @@ void Objecter::handle_osd_map(MOSDMap *m)
 	update_pool_full_map(pool_full_map);
 	_scan_requests(homeless_session, skipped_map, cluster_full,
 		       &pool_full_map, need_resend,
-		       need_resend_linger, need_resend_command);
+		       need_resend_linger, need_resend_command, sul);
 
 	// osd addr changes?
 	for (map<int,OSDSession*>::iterator p = osd_sessions.begin();
@@ -1177,7 +1180,7 @@ void Objecter::handle_osd_map(MOSDMap *m)
 	  OSDSession *s = p->second;
 	  _scan_requests(s, skipped_map, cluster_full,
 			 &pool_full_map, need_resend,
-			 need_resend_linger, need_resend_command);
+			 need_resend_linger, need_resend_command, sul);
 	  ++p;
 	  if (!osdmap->is_up(s->osd) ||
 	      (s->con &&
@@ -1196,7 +1199,7 @@ void Objecter::handle_osd_map(MOSDMap *m)
 	     p != osd_sessions.end(); ++p) {
 	  OSDSession *s = p->second;
 	  _scan_requests(s, false, false, NULL, need_resend,
-			 need_resend_linger, need_resend_command);
+			 need_resend_linger, need_resend_command, sul);
 	}
 	ldout(cct, 3) << "handle_osd_map decoding full epoch "
 		      << m->get_last() << dendl;
@@ -1204,7 +1207,7 @@ void Objecter::handle_osd_map(MOSDMap *m)
 
 	_scan_requests(homeless_session, false, false, NULL,
 		       need_resend, need_resend_linger,
-		       need_resend_command);
+		       need_resend_command, sul);
       } else {
 	ldout(cct, 3) << "handle_osd_map hmm, i want a full map, requesting"
 		      << dendl;
@@ -1224,8 +1227,6 @@ void Objecter::handle_osd_map(MOSDMap *m)
     _maybe_request_map();
   }
 
-  RWLock::Context lc(rwlock, RWLock::Context::TakenForWrite);
-
   // resend requests
   for (map<ceph_tid_t, Op*>::iterator p = need_resend.begin();
        p != need_resend.end(); ++p) {
@@ -1233,13 +1234,13 @@ void Objecter::handle_osd_map(MOSDMap *m)
     OSDSession *s = op->session;
     bool mapped_session = false;
     if (!s) {
-      int r = _map_session(&op->target, &s, lc);
+      int r = _map_session(&op->target, &s, sul);
       assert(r == 0);
       mapped_session = true;
     } else {
       get_session(s);
     }
-    s->lock.get_write();
+    OSDSession::unique_lock sl(s->lock);
     if (mapped_session) {
       _session_op_assign(s, op);
     }
@@ -1252,7 +1253,7 @@ void Objecter::handle_osd_map(MOSDMap *m)
       _op_cancel_map_check(op);
       _cancel_linger_op(op);
     }
-    s->lock.unlock();
+    sl.unlock();
     put_session(s);
   }
   for (list<LingerOp*>::iterator p = need_resend_linger.begin();
@@ -1261,7 +1262,7 @@ void Objecter::handle_osd_map(MOSDMap *m)
     if (!op->session) {
       _calc_target(&op->target, &op->last_force_resend);
       OSDSession *s = NULL;
-      int const r = _get_session(op->target.osd, &s, lc);
+      int const r = _get_session(op->target.osd, &s, sul);
       assert(r == 0);
       assert(s != NULL);
       op->session = s;
@@ -1269,13 +1270,13 @@ void Objecter::handle_osd_map(MOSDMap *m)
     }
     if (!op->session->is_homeless()) {
       logger->inc(l_osdc_linger_resend);
-      _send_linger(op);
+      _send_linger(op, sul);
     }
   }
   for (map<ceph_tid_t,CommandOp*>::iterator p = need_resend_command.begin();
        p != need_resend_command.end(); ++p) {
     CommandOp *c = p->second;
-    _assign_command_session(c);
+    _assign_command_session(c, sul);
     if (c->session && !c->session->is_homeless()) {
       _send_command(c);
     }
@@ -1314,7 +1315,7 @@ void Objecter::C_Op_Map_Latest::finish(int r)
     << "op_map_latest r=" << r << " tid=" << tid
     << " latest " << latest << dendl;
 
-  RWLock::WLocker wl(objecter->rwlock);
+  Objecter::unique_lock wl(objecter->rwlock);
 
   map<ceph_tid_t, Op*>::iterator iter =
     objecter->check_latest_map_ops.find(tid);
@@ -1333,7 +1334,8 @@ void Objecter::C_Op_Map_Latest::finish(int r)
   if (op->map_dne_bound == 0)
     op->map_dne_bound = latest;
 
-  objecter->_check_op_pool_dne(op, false);
+  OSDSession::unique_lock sl(op->session->lock, defer_lock);
+  objecter->_check_op_pool_dne(op, sl);
 
   op->put();
 }
@@ -1341,7 +1343,7 @@ void Objecter::C_Op_Map_Latest::finish(int r)
 int Objecter::pool_snap_by_name(int64_t poolid, const char *snap_name,
 				snapid_t *snap)
 {
-  RWLock::RLocker rl(rwlock);
+  shared_lock rl(rwlock);
 
   const map<int64_t, pg_pool_t>& pools = osdmap->get_pools();
   map<int64_t, pg_pool_t>::const_iterator iter = pools.find(poolid);
@@ -1364,7 +1366,7 @@ int Objecter::pool_snap_by_name(int64_t poolid, const char *snap_name,
 int Objecter::pool_snap_get_info(int64_t poolid, snapid_t snap,
 				 pool_snap_info_t *info)
 {
-  RWLock::RLocker rl(rwlock);
+  shared_lock rl(rwlock);
 
   const map<int64_t, pg_pool_t>& pools = osdmap->get_pools();
   map<int64_t, pg_pool_t>::const_iterator iter = pools.find(poolid);
@@ -1382,7 +1384,7 @@ int Objecter::pool_snap_get_info(int64_t poolid, snapid_t snap,
 
 int Objecter::pool_snap_list(int64_t poolid, vector<uint64_t> *snaps)
 {
-  RWLock::RLocker rl(rwlock);
+  shared_lock rl(rwlock);
 
   const pg_pool_t *pi = osdmap->get_pg_pool(poolid);
   if (!pi)
@@ -1395,9 +1397,10 @@ int Objecter::pool_snap_list(int64_t poolid, vector<uint64_t> *snaps)
   return 0;
 }
 
-void Objecter::_check_op_pool_dne(Op *op, bool session_locked)
+// sl may be unlocked.
+void Objecter::_check_op_pool_dne(Op *op, unique_lock& sl)
 {
-  assert(rwlock.is_wlocked());
+  // rwlock is locked unique
 
   if (op->attempts) {
     // we send a reply earlier, which means that previously the pool
@@ -1430,13 +1433,15 @@ void Objecter::_check_op_pool_dne(Op *op, bool session_locked)
 
       OSDSession *s = op->session;
       assert(s != NULL);
+      assert(sl.mutex() == &s->lock);
 
+      bool session_locked = sl.owns_lock();
       if (!session_locked) {
-	s->lock.get_write();
+	sl.lock();
       }
       _finish_op(op, 0);
       if (!session_locked) {
-	s->lock.unlock();
+	sl.unlock();
       }
     }
   } else {
@@ -1446,7 +1451,7 @@ void Objecter::_check_op_pool_dne(Op *op, bool session_locked)
 
 void Objecter::_send_op_map_check(Op *op)
 {
-  assert(rwlock.is_wlocked());
+  // rwlock is locked unique
   // ask the monitor
   if (check_latest_map_ops.count(op->tid) == 0) {
     op->get();
@@ -1458,7 +1463,7 @@ void Objecter::_send_op_map_check(Op *op)
 
 void Objecter::_op_cancel_map_check(Op *op)
 {
-  assert(rwlock.is_wlocked());
+  // rwlock is locked unique
   map<ceph_tid_t, Op*>::iterator iter =
     check_latest_map_ops.find(op->tid);
   if (iter != check_latest_map_ops.end()) {
@@ -1477,7 +1482,7 @@ void Objecter::C_Linger_Map_Latest::finish(int r)
     return;
   }
 
-  RWLock::WLocker wl(objecter->rwlock);
+  unique_lock wl(objecter->rwlock);
 
   map<uint64_t, LingerOp*>::iterator iter =
     objecter->check_latest_map_lingers.find(linger_id);
@@ -1503,7 +1508,7 @@ void Objecter::C_Linger_Map_Latest::finish(int r)
 
 void Objecter::_check_linger_pool_dne(LingerOp *op, bool *need_unregister)
 {
-  assert(rwlock.is_wlocked());
+  // rwlock is locked unique
 
   *need_unregister = false;
 
@@ -1543,7 +1548,7 @@ void Objecter::_send_linger_map_check(LingerOp *op)
 
 void Objecter::_linger_cancel_map_check(LingerOp *op)
 {
-  assert(rwlock.is_wlocked());
+  // rwlock is locked unique
 
   map<uint64_t, LingerOp*>::iterator iter =
     check_latest_map_lingers.find(op->linger_id);
@@ -1563,7 +1568,7 @@ void Objecter::C_Command_Map_Latest::finish(int r)
     return;
   }
 
-  RWLock::WLocker wl(objecter->rwlock);
+  unique_lock wl(objecter->rwlock);
 
   map<uint64_t, CommandOp*>::iterator iter =
     objecter->check_latest_map_commands.find(tid);
@@ -1584,7 +1589,7 @@ void Objecter::C_Command_Map_Latest::finish(int r)
 
 void Objecter::_check_command_map_dne(CommandOp *c)
 {
-  assert(rwlock.is_wlocked());
+  // rwlock is locked unique
 
   ldout(cct, 10) << "_check_command_map_dne tid " << c->tid
 		 << " current " << osdmap->get_epoch()
@@ -1601,7 +1606,7 @@ void Objecter::_check_command_map_dne(CommandOp *c)
 
 void Objecter::_send_command_map_check(CommandOp *c)
 {
-  assert(rwlock.is_wlocked());
+  // rwlock is locked unique
 
   // ask the monitor
   if (check_latest_map_commands.count(c->tid) == 0) {
@@ -1614,7 +1619,7 @@ void Objecter::_send_command_map_check(CommandOp *c)
 
 void Objecter::_command_cancel_map_check(CommandOp *c)
 {
-  assert(rwlock.is_wlocked());
+  // rwlock is locked uniqe
 
   map<uint64_t, CommandOp*>::iterator iter =
     check_latest_map_commands.find(c->tid);
@@ -1632,9 +1637,9 @@ void Objecter::_command_cancel_map_check(CommandOp *c)
  * @returns 0 on success, or -EAGAIN if the lock context requires
  * promotion to write.
  */
-int Objecter::_get_session(int osd, OSDSession **session, RWLock::Context& lc)
+int Objecter::_get_session(int osd, OSDSession **session, shunique_lock& sul)
 {
-  assert(rwlock.is_locked());
+  assert(sul && sul.mutex() == &rwlock);
 
   if (osd < 0) {
     *session = homeless_session;
@@ -1652,7 +1657,7 @@ int Objecter::_get_session(int osd, OSDSession **session, RWLock::Context& lc)
 		   << s->get_nref() << dendl;
     return 0;
   }
-  if (!lc.is_wlocked()) {
+  if (!sul.owns_lock()) {
     return -EAGAIN;
   }
   OSDSession *s = new OSDSession(cct, osd);
@@ -1689,7 +1694,7 @@ void Objecter::get_session(Objecter::OSDSession *s)
 
 void Objecter::_reopen_session(OSDSession *s)
 {
-  assert(s->lock.is_locked());
+  // s->lock is locked
 
   entity_inst_t inst = osdmap->get_inst(s->osd);
   ldout(cct, 10) << "reopen_session osd." << s->osd << " session, addr now "
@@ -1705,14 +1710,14 @@ void Objecter::_reopen_session(OSDSession *s)
 
 void Objecter::close_session(OSDSession *s)
 {
-  assert(rwlock.is_wlocked());
+  // rwlock is locked unique
 
   ldout(cct, 10) << "close_session for osd." << s->osd << dendl;
   if (s->con) {
     s->con->mark_down();
     logger->inc(l_osdc_osd_session_close);
   }
-  s->lock.get_write();
+  OSDSession::unique_lock sl(s->lock);
 
   std::list<LingerOp*> homeless_lingers;
   std::list<CommandOp*> homeless_commands;
@@ -1740,12 +1745,12 @@ void Objecter::close_session(OSDSession *s)
   }
 
   osd_sessions.erase(s->osd);
-  s->lock.unlock();
+  sl.unlock();
   put_session(s);
 
   // Assign any leftover ops to the homeless session
   {
-    RWLock::WLocker wl(homeless_session->lock);
+    OSDSession::unique_lock hsl(homeless_session->lock);
     for (std::list<LingerOp*>::iterator i = homeless_lingers.begin();
 	 i != homeless_lingers.end(); ++i) {
       _session_linger_op_assign(homeless_session, *i);
@@ -1765,19 +1770,20 @@ void Objecter::close_session(OSDSession *s)
 
 void Objecter::wait_for_osd_map()
 {
-  rwlock.get_write();
+  unique_lock l(rwlock);
   if (osdmap->get_epoch()) {
-    rwlock.put_write();
+    l.unlock();
     return;
   }
 
+  // Leave this since it goes with C_SafeCond
   Mutex lock("");
   Cond cond;
   bool done;
   lock.Lock();
   C_SafeCond *context = new C_SafeCond(&lock, &cond, &done, NULL);
   waiting_for_map[0].push_back(pair<Context*, int>(context, 0));
-  rwlock.put_write();
+  l.unlock();
   while (!done)
     cond.Wait(lock);
   lock.Unlock();
@@ -1810,14 +1816,14 @@ void Objecter::wait_for_latest_osdmap(Context *fin)
 
 void Objecter::get_latest_version(epoch_t oldest, epoch_t newest, Context *fin)
 {
-  RWLock::WLocker wl(rwlock);
+  unique_lock wl(rwlock);
   _get_latest_version(oldest, newest, fin);
 }
 
 void Objecter::_get_latest_version(epoch_t oldest, epoch_t newest,
 				   Context *fin)
 {
-  assert(rwlock.is_wlocked());
+  // rwlock is locked unique
   if (osdmap->get_epoch() >= newest) {
   ldout(cct, 10) << __func__ << " latest " << newest << ", have it" << dendl;
     if (fin)
@@ -1831,13 +1837,13 @@ void Objecter::_get_latest_version(epoch_t oldest, epoch_t newest,
 
 void Objecter::maybe_request_map()
 {
-  RWLock::RLocker rl(rwlock);
+  shared_lock rl(rwlock);
   _maybe_request_map();
 }
 
 void Objecter::_maybe_request_map()
 {
-  assert(rwlock.is_locked());
+  // rwlock is locked
   int flag = 0;
   if (_osdmap_full_flag()
       || osdmap->test_flag(CEPH_OSDMAP_PAUSERD)
@@ -1857,7 +1863,7 @@ void Objecter::_maybe_request_map()
 
 void Objecter::_wait_for_new_map(Context *c, epoch_t epoch, int err)
 {
-  assert(rwlock.is_wlocked());
+  // rwlock is locked unique
   waiting_for_map[epoch].push_back(pair<Context *, int>(c, err));
   _maybe_request_map();
 }
@@ -1875,7 +1881,7 @@ void Objecter::_wait_for_new_map(Context *c, epoch_t epoch, int err)
  */
 bool Objecter::have_map(const epoch_t epoch)
 {
-  RWLock::RLocker rl(rwlock);
+  shared_lock rl(rwlock);
   if (osdmap->get_epoch() >= epoch) {
     return true;
   } else {
@@ -1885,7 +1891,7 @@ bool Objecter::have_map(const epoch_t epoch)
 
 bool Objecter::wait_for_map(epoch_t epoch, Context *c, int err)
 {
-  RWLock::WLocker wl(rwlock);
+  unique_lock wl(rwlock);
   if (osdmap->get_epoch() >= epoch) {
     return true;
   }
@@ -1898,19 +1904,19 @@ void Objecter::kick_requests(OSDSession *session)
   ldout(cct, 10) << "kick_requests for osd." << session->osd << dendl;
 
   map<uint64_t, LingerOp *> lresend;
-  RWLock::WLocker wl(rwlock);
+  unique_lock wl(rwlock);
 
-  session->lock.get_write();
+  OSDSession::unique_lock sl(session->lock);
   _kick_requests(session, lresend);
-  session->lock.unlock();
+  sl.unlock();
 
-  _linger_ops_resend(lresend);
+  _linger_ops_resend(lresend, wl);
 }
 
 void Objecter::_kick_requests(OSDSession *session,
 			      map<uint64_t, LingerOp *>& lresend)
 {
-  assert(rwlock.is_wlocked());
+  // rwlock is locked unique
 
   // resend ops
   map<ceph_tid_t,Op*> resend;  // resend in tid order
@@ -1956,18 +1962,20 @@ void Objecter::_kick_requests(OSDSession *session,
   }
 }
 
-void Objecter::_linger_ops_resend(map<uint64_t, LingerOp *>& lresend)
+void Objecter::_linger_ops_resend(map<uint64_t, LingerOp *>& lresend,
+				  unique_lock& ul)
 {
-  assert(rwlock.is_wlocked());
-
+  assert(ul.owns_lock());
+  shunique_lock sul(std::move(ul));
   while (!lresend.empty()) {
     LingerOp *op = lresend.begin()->second;
     if (!op->canceled) {
-      _send_linger(op);
+      _send_linger(op, sul);
     }
     op->put();
     lresend.erase(lresend.begin());
   }
+  ul = unique_lock(sul.release_to_unique());
 }
 
 void Objecter::start_tick()
@@ -1980,7 +1988,7 @@ void Objecter::start_tick()
 
 void Objecter::tick()
 {
-  RWLock::RLocker rl(rwlock);
+  shared_lock rl(rwlock);
 
   ldout(cct, 10) << "tick" << dendl;
 
@@ -2005,7 +2013,7 @@ void Objecter::tick()
   for (map<int,OSDSession*>::iterator siter = osd_sessions.begin();
        siter != osd_sessions.end(); ++siter) {
     OSDSession *s = siter->second;
-    RWLock::WLocker l(s->lock);
+    OSDSession::lock_guard l(s->lock);
     bool found = false;
     for (map<ceph_tid_t,Op*>::iterator p = s->ops.begin();
 	p != s->ops.end();
@@ -2023,7 +2031,7 @@ void Objecter::tick()
 	p != s->linger_ops.end();
 	++p) {
       LingerOp *op = p->second;
-      RWLock::WLocker wl(op->watch_lock);
+      LingerOp::unique_lock wl(op->watch_lock);
       assert(op->session);
       ldout(cct, 10) << " pinging osd that serves lingering tid " << p->first
 		     << " (osd." << op->session->osd << ")" << dendl;
@@ -2069,7 +2077,7 @@ void Objecter::tick()
 
 void Objecter::resend_mon_ops()
 {
-  RWLock::WLocker wl(rwlock);
+  unique_lock wl(rwlock);
 
   ldout(cct, 10) << "resend_mon_ops" << dendl;
 
@@ -2122,12 +2130,11 @@ void Objecter::resend_mon_ops()
 
 ceph_tid_t Objecter::op_submit(Op *op, int *ctx_budget)
 {
-  RWLock::RLocker rl(rwlock);
-  RWLock::Context lc(rwlock, RWLock::Context::TakenForRead);
-  return _op_submit_with_budget(op, lc, ctx_budget);
+  shunique_lock rl(rwlock, ceph::acquire_shared);
+  return _op_submit_with_budget(op, rl, ctx_budget);
 }
 
-ceph_tid_t Objecter::_op_submit_with_budget(Op *op, RWLock::Context& lc,
+ceph_tid_t Objecter::_op_submit_with_budget(Op *op, shunique_lock& sul,
 					    int *ctx_budget)
 {
   assert(initialized.read());
@@ -2139,7 +2146,7 @@ ceph_tid_t Objecter::_op_submit_with_budget(Op *op, RWLock::Context& lc,
   // throttle.  before we look at any state, because
   // _take_op_budget() may drop our lock while it blocks.
   if (!op->ctx_budgeted || (ctx_budget && (*ctx_budget == -1))) {
-    int op_budget = _take_op_budget(op);
+    int op_budget = _take_op_budget(op, sul);
     // take and pass out the budget for the first OP
     // in the context session
     if (ctx_budget && (*ctx_budget == -1)) {
@@ -2156,7 +2163,7 @@ ceph_tid_t Objecter::_op_submit_with_budget(Op *op, RWLock::Context& lc,
 				      op_cancel(tid, -ETIMEDOUT); });
   }
 
-  return _op_submit(op, lc);
+  return _op_submit(op, sul);
 }
 
 void Objecter::_send_op_account(Op *op)
@@ -2238,9 +2245,9 @@ void Objecter::_send_op_account(Op *op)
   }
 }
 
-ceph_tid_t Objecter::_op_submit(Op *op, RWLock::Context& lc)
+ceph_tid_t Objecter::_op_submit(Op *op, shunique_lock& sul)
 {
-  assert(rwlock.is_locked());
+  // rwlock is locked
 
   ldout(cct, 10) << __func__ << " op " << op << dendl;
 
@@ -2253,18 +2260,20 @@ ceph_tid_t Objecter::_op_submit(Op *op, RWLock::Context& lc)
     == RECALC_OP_TARGET_POOL_DNE;
 
   // Try to get a session, including a retry if we need to take write lock
-  int r = _get_session(op->target.osd, &s, lc);
+  int r = _get_session(op->target.osd, &s, sul);
   if (r == -EAGAIN) {
     assert(s == NULL);
-    lc.promote();
-    r = _get_session(op->target.osd, &s, lc);
+    sul.unlock();
+    sul.lock();
+    r = _get_session(op->target.osd, &s, sul);
   }
   assert(r == 0);
   assert(s);  // may be homeless
 
   // We may need to take wlock if we will need to _set_op_map_check later.
-  if (check_for_latest_map && !lc.is_wlocked()) {
-    lc.promote();
+  if (check_for_latest_map && sul.owns_lock_shared()) {
+    sul.unlock();
+    sul.lock();
   }
 
   _send_op_account(op);
@@ -2312,7 +2321,7 @@ ceph_tid_t Objecter::_op_submit(Op *op, RWLock::Context& lc)
     m = _prepare_osd_op(op);
   }
 
-  s->lock.get_write();
+  OSDSession::unique_lock sl(s->lock);
   if (op->tid == 0)
     op->tid = last_tid.inc();
   _session_op_assign(s, op);
@@ -2329,7 +2338,7 @@ ceph_tid_t Objecter::_op_submit(Op *op, RWLock::Context& lc)
   }
   op = NULL;
 
-  s->lock.unlock();
+  sl.unlock();
   put_session(s);
 
   ldout(cct, 5) << num_unacked.read() << " unacked, " << num_uncommitted.read()
@@ -2342,7 +2351,7 @@ int Objecter::op_cancel(OSDSession *s, ceph_tid_t tid, int r)
 {
   assert(initialized.read());
 
-  s->lock.get_write();
+  OSDSession::unique_lock sl(s->lock);
 
   map<ceph_tid_t, Op*>::iterator p = s->ops.find(tid);
   if (p == s->ops.end()) {
@@ -2377,7 +2386,7 @@ int Objecter::op_cancel(OSDSession *s, ceph_tid_t tid, int r)
   }
   _op_cancel_map_check(op);
   _finish_op(op, r);
-  s->lock.unlock();
+  sl.unlock();
 
   return 0;
 }
@@ -2386,9 +2395,8 @@ int Objecter::op_cancel(ceph_tid_t tid, int r)
 {
   int ret = 0;
 
-  rwlock.get_write();
+  unique_lock wl(rwlock);
   ret = _op_cancel(tid, r);
-  rwlock.unlock();
 
   return ret;
 }
@@ -2405,9 +2413,9 @@ start:
   for (map<int, OSDSession *>::iterator siter = osd_sessions.begin();
        siter != osd_sessions.end(); ++siter) {
     OSDSession *s = siter->second;
-    s->lock.get_read();
+    OSDSession::shared_lock sl(s->lock);
     if (s->ops.find(tid) != s->ops.end()) {
-      s->lock.unlock();
+      sl.unlock();
       ret = op_cancel(s, tid, r);
       if (ret == -ENOENT) {
 	/* oh no! raced, maybe tid moved to another session, restarting */
@@ -2415,16 +2423,15 @@ start:
       }
       return ret;
     }
-    s->lock.unlock();
   }
 
   ldout(cct, 5) << __func__ << ": tid " << tid
 		<< " not found in live sessions" << dendl;
 
   // Handle case where the op is in homeless session
-  homeless_session->lock.get_read();
+  OSDSession::shared_lock sl(homeless_session->lock);
   if (homeless_session->ops.find(tid) != homeless_session->ops.end()) {
-    homeless_session->lock.unlock();
+    sl.unlock();
     ret = op_cancel(homeless_session, tid, r);
     if (ret == -ENOENT) {
       /* oh no! raced, maybe tid moved to another session, restarting */
@@ -2433,7 +2440,7 @@ start:
       return ret;
     }
   } else {
-    homeless_session->lock.unlock();
+    sl.unlock();
   }
 
   ldout(cct, 5) << __func__ << ": tid " << tid
@@ -2445,7 +2452,7 @@ start:
 
 epoch_t Objecter::op_cancel_writes(int r, int64_t pool)
 {
-  rwlock.get_write();
+  unique_lock wl(rwlock);
 
   std::vector<ceph_tid_t> to_cancel;
   bool found = false;
@@ -2453,7 +2460,7 @@ epoch_t Objecter::op_cancel_writes(int r, int64_t pool)
   for (map<int, OSDSession *>::iterator siter = osd_sessions.begin();
        siter != osd_sessions.end(); ++siter) {
     OSDSession *s = siter->second;
-    s->lock.get_read();
+    OSDSession::shared_lock sl(s->lock);
     for (map<ceph_tid_t, Op*>::iterator op_i = s->ops.begin();
 	 op_i != s->ops.end(); ++op_i) {
       if (op_i->second->target.flags & CEPH_OSD_FLAG_WRITE
@@ -2461,7 +2468,7 @@ epoch_t Objecter::op_cancel_writes(int r, int64_t pool)
 	to_cancel.push_back(op_i->first);
       }
     }
-    s->lock.unlock();
+    sl.unlock();
 
     for (std::vector<ceph_tid_t>::iterator titer = to_cancel.begin();
 	 titer != to_cancel.end();
@@ -2478,7 +2485,7 @@ epoch_t Objecter::op_cancel_writes(int r, int64_t pool)
 
   const epoch_t epoch = osdmap->get_epoch();
 
-  rwlock.unlock();
+  wl.unlock();
 
   if (found) {
     return epoch;
@@ -2522,14 +2529,14 @@ bool Objecter::target_should_be_paused(op_target_t *t)
  */
 bool Objecter::osdmap_full_flag() const
 {
-  RWLock::RLocker rl(rwlock);
+  shared_lock rl(rwlock);
 
   return _osdmap_full_flag();
 }
 
 bool Objecter::osdmap_pool_full(const int64_t pool_id) const
 {
-  RWLock::RLocker rl(rwlock);
+  shared_lock rl(rwlock);
 
   if (_osdmap_full_flag()) {
     return true;
@@ -2591,6 +2598,7 @@ void Objecter::update_pool_full_map(map<int64_t, bool>& pool_full_map)
 int64_t Objecter::get_object_hash_position(int64_t pool, const string& key,
 					   const string& ns)
 {
+  shared_lock rl(rwlock);
   const pg_pool_t *p = osdmap->get_pg_pool(pool);
   if (!p)
     return -ENOENT;
@@ -2600,6 +2608,7 @@ int64_t Objecter::get_object_hash_position(int64_t pool, const string& key,
 int64_t Objecter::get_object_pg_hash_position(int64_t pool, const string& key,
 					      const string& ns)
 {
+  shared_lock rl(rwlock);
   const pg_pool_t *p = osdmap->get_pg_pool(pool);
   if (!p)
     return -ENOENT;
@@ -2609,7 +2618,7 @@ int64_t Objecter::get_object_pg_hash_position(int64_t pool, const string& key,
 int Objecter::_calc_target(op_target_t *t, epoch_t *last_force_resend,
 			   bool any_change)
 {
-  assert(rwlock.is_locked());
+  // rwlock is locked
 
   bool is_read = t->flags & CEPH_OSD_FLAG_READ;
   bool is_write = t->flags & CEPH_OSD_FLAG_WRITE;
@@ -2771,15 +2780,15 @@ int Objecter::_calc_target(op_target_t *t, epoch_t *last_force_resend,
 }
 
 int Objecter::_map_session(op_target_t *target, OSDSession **s,
-			   RWLock::Context& lc)
+			   shunique_lock& sul)
 {
   _calc_target(target);
-  return _get_session(target->osd, s, lc);
+  return _get_session(target->osd, s, sul);
 }
 
 void Objecter::_session_op_assign(OSDSession *to, Op *op)
 {
-  assert(to->lock.is_locked());
+  // to->lock is locked
   assert(op->session == NULL);
   assert(op->tid);
 
@@ -2797,7 +2806,7 @@ void Objecter::_session_op_assign(OSDSession *to, Op *op)
 void Objecter::_session_op_remove(OSDSession *from, Op *op)
 {
   assert(op->session == from);
-  assert(from->lock.is_locked());
+  // from->lock is locked
 
   if (from->is_homeless()) {
     num_homeless_ops.dec();
@@ -2812,7 +2821,7 @@ void Objecter::_session_op_remove(OSDSession *from, Op *op)
 
 void Objecter::_session_linger_op_assign(OSDSession *to, LingerOp *op)
 {
-  assert(to->lock.is_wlocked());
+  // to lock is locked unique
   assert(op->session == NULL);
 
   if (to->is_homeless()) {
@@ -2830,7 +2839,7 @@ void Objecter::_session_linger_op_assign(OSDSession *to, LingerOp *op)
 void Objecter::_session_linger_op_remove(OSDSession *from, LingerOp *op)
 {
   assert(from == op->session);
-  assert(from->lock.is_wlocked());
+  // from->lock is locked unique
 
   if (from->is_homeless()) {
     num_homeless_ops.dec();
@@ -2847,7 +2856,7 @@ void Objecter::_session_linger_op_remove(OSDSession *from, LingerOp *op)
 void Objecter::_session_command_op_remove(OSDSession *from, CommandOp *op)
 {
   assert(from == op->session);
-  assert(from->lock.is_locked());
+  // from->lock is locked
 
   if (from->is_homeless()) {
     num_homeless_ops.dec();
@@ -2862,7 +2871,7 @@ void Objecter::_session_command_op_remove(OSDSession *from, CommandOp *op)
 
 void Objecter::_session_command_op_assign(OSDSession *to, CommandOp *op)
 {
-  assert(to->lock.is_locked());
+  // to->lock is locked
   assert(op->session == NULL);
   assert(op->tid);
 
@@ -2878,9 +2887,9 @@ void Objecter::_session_command_op_assign(OSDSession *to, CommandOp *op)
 }
 
 int Objecter::_recalc_linger_op_target(LingerOp *linger_op,
-				       RWLock::Context& lc)
+				       shunique_lock& sul)
 {
-  assert(rwlock.is_wlocked());
+  // rwlock is locked unique
 
   int r = _calc_target(&linger_op->target, &linger_op->last_force_resend,
 		       true);
@@ -2890,7 +2899,7 @@ int Objecter::_recalc_linger_op_target(LingerOp *linger_op,
 		   << " acting " << linger_op->target.acting << dendl;
 
     OSDSession *s = NULL;
-    r = _get_session(linger_op->target.osd, &s, lc);
+    r = _get_session(linger_op->target.osd, &s, sul);
     assert(r == 0);
 
     if (linger_op->session != s) {
@@ -2898,10 +2907,9 @@ int Objecter::_recalc_linger_op_target(LingerOp *linger_op,
       // same time here is only safe because we are the only one that
       // takes two, and we are holding rwlock for write.  Disable
       // lockdep because it doesn't know that.
-      s->lock.get_write(false);
+      OSDSession::unique_lock sl(s->lock);
       _session_linger_op_remove(linger_op->session, linger_op);
       _session_linger_op_assign(s, linger_op);
-      s->lock.unlock(false);
     }
 
     put_session(s);
@@ -2932,7 +2940,7 @@ void Objecter::_finish_op(Op *op, int r)
 {
   ldout(cct, 15) << "finish_op " << op->tid << dendl;
 
-  assert(op->session->lock.is_wlocked());
+  // op->session->lock is locked unique
 
   if (!op->ctx_budgeted && op->budgeted)
     put_op_budget(op);
@@ -2954,9 +2962,9 @@ void Objecter::_finish_op(Op *op, int r)
 void Objecter::finish_op(OSDSession *session, ceph_tid_t tid)
 {
   ldout(cct, 15) << "finish_op " << tid << dendl;
-  RWLock::RLocker rl(rwlock);
+  shared_lock rl(rwlock);
 
-  RWLock::WLocker wl(session->lock);
+  OSDSession::unique_lock wl(session->lock);
 
   map<ceph_tid_t, Op *>::iterator iter = session->ops.find(tid);
   if (iter == session->ops.end())
@@ -2969,7 +2977,7 @@ void Objecter::finish_op(OSDSession *session, ceph_tid_t tid)
 
 MOSDOp *Objecter::_prepare_osd_op(Op *op)
 {
-  assert(rwlock.is_locked());
+  // rwlock is locked
 
   int flags = op->target.flags;
   flags |= CEPH_OSD_FLAG_KNOWN_REDIR;
@@ -3018,8 +3026,8 @@ MOSDOp *Objecter::_prepare_osd_op(Op *op)
 
 void Objecter::_send_op(Op *op, MOSDOp *m)
 {
-  assert(rwlock.is_locked());
-  assert(op->session->lock.is_locked());
+  // rwlock is locked
+  // op->session->lock is locked
 
   if (!m) {
     assert(op->tid > 0);
@@ -3074,31 +3082,38 @@ int Objecter::calc_op_budget(Op *op)
   return op_budget;
 }
 
-void Objecter::_throttle_op(Op *op, int op_budget)
+void Objecter::_throttle_op(Op *op,
+			    shunique_lock& sul,
+			    int op_budget)
 {
-  assert(rwlock.is_locked());
-
-  bool locked_for_write = rwlock.is_wlocked();
+  assert(sul && sul.mutex() == &rwlock);
+  bool locked_for_write = sul.owns_lock();
 
   if (!op_budget)
     op_budget = calc_op_budget(op);
   if (!op_throttle_bytes.get_or_fail(op_budget)) { //couldn't take right now
-    rwlock.unlock();
+    sul.unlock();
     op_throttle_bytes.get(op_budget);
-    rwlock.get(locked_for_write);
+    if (locked_for_write)
+      sul.lock();
+    else
+      sul.lock_shared();
   }
   if (!op_throttle_ops.get_or_fail(1)) { //couldn't take right now
-    rwlock.unlock();
+    sul.unlock();
     op_throttle_ops.get(1);
-    rwlock.get(locked_for_write);
+    if (locked_for_write)
+      sul.lock();
+    else
+      sul.lock_shared();
   }
 }
 
 void Objecter::unregister_op(Op *op)
 {
-  op->session->lock.get_write();
+  OSDSession::unique_lock sl(op->session->lock);
   op->session->ops.erase(op->tid);
-  op->session->lock.unlock();
+  sl.unlock();
   put_session(op->session);
   op->session = NULL;
 
@@ -3115,12 +3130,11 @@ void Objecter::handle_osd_op_reply(MOSDOpReply *m)
 
   int osd_num = (int)m->get_source().num();
 
-  RWLock::RLocker l(rwlock);
+  shunique_lock sul(rwlock, ceph::acquire_shared);
   if (!initialized.read()) {
     m->put();
     return;
   }
-  RWLock::Context lc(rwlock, RWLock::Context::TakenForRead);
 
   map<int, OSDSession *>::iterator siter = osd_sessions.find(osd_num);
   if (siter == osd_sessions.end()) {
@@ -3135,7 +3149,7 @@ void Objecter::handle_osd_op_reply(MOSDOpReply *m)
   OSDSession *s = siter->second;
   get_session(s);
 
-  s->lock.get_write();
+  OSDSession::unique_lock sl(s->lock);
 
   map<ceph_tid_t, Op *>::iterator iter = s->ops.find(tid);
   if (iter == s->ops.end()) {
@@ -3143,7 +3157,7 @@ void Objecter::handle_osd_op_reply(MOSDOpReply *m)
 		  << (m->is_ondisk() ? " ondisk" : (m->is_onnvram() ?
 						    " onnvram" : " ack"))
 		  << " ... stray" << dendl;
-    s->lock.unlock();
+    sl.unlock();
     put_session(s);
     m->put();
     return;
@@ -3167,7 +3181,7 @@ void Objecter::handle_osd_op_reply(MOSDOpReply *m)
 		    << "; last attempt " << (op->attempts - 1) << " sent to "
 		    << op->session->con->get_peer_addr() << dendl;
       m->put();
-      s->lock.unlock();
+      sl.unlock();
       put_session(s);
       return;
     }
@@ -3189,7 +3203,7 @@ void Objecter::handle_osd_op_reply(MOSDOpReply *m)
     if (op->oncommit || op->oncommit_sync)
       num_uncommitted.dec();
     _session_op_remove(s, op);
-    s->lock.unlock();
+    sl.unlock();
     put_session(s);
 
     // FIXME: two redirects could race and reorder
@@ -3198,7 +3212,7 @@ void Objecter::handle_osd_op_reply(MOSDOpReply *m)
     m->get_redirect().combine_with_locator(op->target.target_oloc,
 					   op->target.target_oid.name);
     op->target.flags |= CEPH_OSD_FLAG_REDIRECTED;
-    _op_submit(op, lc);
+    _op_submit(op, sul);
     m->put();
     return;
   }
@@ -3211,14 +3225,13 @@ void Objecter::handle_osd_op_reply(MOSDOpReply *m)
     op->tid = last_tid.inc();
 
     _send_op(op);
-    s->lock.unlock();
+    sl.unlock();
     put_session(s);
     m->put();
     return;
   }
 
-  l.unlock();
-  lc.set_state(RWLock::Context::Untaken);
+  sul.unlock();
 
   if (op->objver)
     *op->objver = m->get_user_version();
@@ -3295,8 +3308,9 @@ void Objecter::handle_osd_op_reply(MOSDOpReply *m)
   }
 
   /* get it before we call _finish_op() */
-  Mutex *completion_lock = (op->target.base_oid.name.size() ?
-			    s->get_lock(op->target.base_oid) : NULL);
+  auto completion_lock =
+    (op->target.base_oid.name.size() ? s->get_lock(op->target.base_oid) :
+     OSDSession::unique_completion_lock());
 
   // done with this tid?
   if (!op->onack && !op->oncommit && !op->oncommit_sync) {
@@ -3308,10 +3322,10 @@ void Objecter::handle_osd_op_reply(MOSDOpReply *m)
 		<< " uncommitted" << dendl;
 
   // serialize completions
-  if (completion_lock) {
-    completion_lock->Lock();
+  if (completion_lock.mutex()) {
+    completion_lock.lock();
   }
-  s->lock.unlock();
+  sl.unlock();
 
   // do callbacks
   if (onack) {
@@ -3320,8 +3334,8 @@ void Objecter::handle_osd_op_reply(MOSDOpReply *m)
   if (oncommit) {
     oncommit->complete(rc);
   }
-  if (completion_lock) {
-    completion_lock->Unlock();
+  if (completion_lock.mutex()) {
+    completion_lock.unlock();
   }
 
   m->put();
@@ -3330,9 +3344,9 @@ void Objecter::handle_osd_op_reply(MOSDOpReply *m)
 
 
 uint32_t Objecter::list_nobjects_seek(NListContext *list_context,
-				     uint32_t pos)
+				      uint32_t pos)
 {
-  RWLock::RLocker rl(rwlock);
+  shared_lock rl(rwlock);
   pg_t actual = osdmap->raw_pg_to_pg(pg_t(pos, list_context->pool_id));
   ldout(cct, 10) << "list_objects_seek " << list_context
 		 << " pos " << pos << " -> " << actual << dendl;
@@ -3376,16 +3390,16 @@ void Objecter::list_nobjects(NListContext *list_context, Context *onfinish)
     return;
   }
 
-  rwlock.get_read();
+  shared_lock rl(rwlock);
   const pg_pool_t *pool = osdmap->get_pg_pool(list_context->pool_id);
   if (!pool) { // pool is gone
-    rwlock.unlock();
+    rl.unlock();
     put_nlist_context_budget(list_context);
     onfinish->complete(-ENOENT);
     return;
   }
   int pg_num = pool->get_pg_num();
-  rwlock.unlock();
+  rl.unlock();
 
   if (list_context->starting_pg_num == 0) {     // there can't be zero pgs!
     list_context->starting_pg_num = pg_num;
@@ -3422,7 +3436,7 @@ void Objecter::list_nobjects(NListContext *list_context, Context *onfinish)
 }
 
 void Objecter::_nlist_reply(NListContext *list_context, int r,
-			   Context *final_finish, epoch_t reply_epoch)
+			    Context *final_finish, epoch_t reply_epoch)
 {
   ldout(cct, 10) << "_list_reply" << dendl;
 
@@ -3488,7 +3502,7 @@ void Objecter::put_nlist_context_budget(NListContext *list_context) {
 uint32_t Objecter::list_objects_seek(ListContext *list_context,
 				     uint32_t pos)
 {
-  RWLock::RLocker rl(rwlock);
+  shared_lock rl(rwlock);
   pg_t actual = osdmap->raw_pg_to_pg(pg_t(pos, list_context->pool_id));
   ldout(cct, 10) << "list_objects_seek " << list_context
 		 << " pos " << pos << " -> " << actual << dendl;
@@ -3532,16 +3546,16 @@ void Objecter::list_objects(ListContext *list_context, Context *onfinish)
     return;
   }
 
-  rwlock.get_read();
+  shared_lock rl(rwlock);
   const pg_pool_t *pool = osdmap->get_pg_pool(list_context->pool_id);
   if (!pool) { // pool is gone
-    rwlock.unlock();
+    rl.unlock();
     put_list_context_budget(list_context);
     onfinish->complete(-ENOENT);
     return;
   }
   int pg_num = pool->get_pg_num();
-  rwlock.unlock();
+  rl.unlock();
 
   if (list_context->starting_pg_num == 0) {     // there can't be zero pgs!
     list_context->starting_pg_num = pg_num;
@@ -3646,7 +3660,7 @@ void Objecter::put_list_context_budget(ListContext *list_context) {
 int Objecter::create_pool_snap(int64_t pool, string& snap_name,
 			       Context *onfinish)
 {
-  RWLock::WLocker wl(rwlock);
+  unique_lock wl(rwlock);
   ldout(cct, 10) << "create_pool_snap; pool: " << pool << "; snap: "
 		 << snap_name << dendl;
 
@@ -3688,7 +3702,7 @@ struct C_SelfmanagedSnap : public Context {
 int Objecter::allocate_selfmanaged_snap(int64_t pool, snapid_t *psnapid,
 					Context *onfinish)
 {
-  RWLock::WLocker wl(rwlock);
+  unique_lock wl(rwlock);
   ldout(cct, 10) << "allocate_selfmanaged_snap; pool: " << pool << dendl;
   PoolOp *op = new PoolOp;
   if (!op) return -ENOMEM;
@@ -3707,7 +3721,7 @@ int Objecter::allocate_selfmanaged_snap(int64_t pool, snapid_t *psnapid,
 int Objecter::delete_pool_snap(int64_t pool, string& snap_name,
 			       Context *onfinish)
 {
-  RWLock::WLocker wl(rwlock);
+  unique_lock wl(rwlock);
   ldout(cct, 10) << "delete_pool_snap; pool: " << pool << "; snap: "
 		 << snap_name << dendl;
 
@@ -3735,7 +3749,7 @@ int Objecter::delete_pool_snap(int64_t pool, string& snap_name,
 int Objecter::delete_selfmanaged_snap(int64_t pool, snapid_t snap,
 				      Context *onfinish)
 {
-  RWLock::WLocker wl(rwlock);
+  unique_lock wl(rwlock);
   ldout(cct, 10) << "delete_selfmanaged_snap; pool: " << pool << "; snap: "
 		 << snap << dendl;
   PoolOp *op = new PoolOp;
@@ -3755,7 +3769,7 @@ int Objecter::delete_selfmanaged_snap(int64_t pool, snapid_t snap,
 int Objecter::create_pool(string& name, Context *onfinish, uint64_t auid,
 			  int crush_rule)
 {
-  RWLock::WLocker wl(rwlock);
+  unique_lock wl(rwlock);
   ldout(cct, 10) << "create_pool name=" << name << dendl;
 
   if (osdmap->lookup_pg_pool_name(name.c_str()) >= 0)
@@ -3780,7 +3794,7 @@ int Objecter::create_pool(string& name, Context *onfinish, uint64_t auid,
 
 int Objecter::delete_pool(int64_t pool, Context *onfinish)
 {
-  RWLock::WLocker wl(rwlock);
+  unique_lock wl(rwlock);
   ldout(cct, 10) << "delete_pool " << pool << dendl;
 
   if (!osdmap->have_pg_pool(pool))
@@ -3792,7 +3806,7 @@ int Objecter::delete_pool(int64_t pool, Context *onfinish)
 
 int Objecter::delete_pool(const string &pool_name, Context *onfinish)
 {
-  RWLock::WLocker wl(rwlock);
+  unique_lock wl(rwlock);
   ldout(cct, 10) << "delete_pool " << pool_name << dendl;
 
   int64_t pool = osdmap->lookup_pg_pool_name(pool_name);
@@ -3823,7 +3837,7 @@ void Objecter::_do_delete_pool(int64_t pool, Context *onfinish)
  */
 int Objecter::change_pool_auid(int64_t pool, Context *onfinish, uint64_t auid)
 {
-  RWLock::WLocker wl(rwlock);
+  unique_lock wl(rwlock);
   ldout(cct, 10) << "change_pool_auid " << pool << " to " << auid << dendl;
   PoolOp *op = new PoolOp;
   if (!op) return -ENOMEM;
@@ -3843,7 +3857,7 @@ int Objecter::change_pool_auid(int64_t pool, Context *onfinish, uint64_t auid)
 
 void Objecter::pool_op_submit(PoolOp *op)
 {
-  assert(rwlock.is_locked());
+  // rwlock is locked
   if (mon_timeout > timespan(0)) {
     op->ontimeout = timer.add_event(mon_timeout,
 				    [this, op]() {
@@ -3854,7 +3868,7 @@ void Objecter::pool_op_submit(PoolOp *op)
 
 void Objecter::_pool_op_submit(PoolOp *op)
 {
-  assert(rwlock.is_wlocked());
+  // rwlock is locked unique
 
   ldout(cct, 10) << "pool_op_submit " << op->tid << dendl;
   MPoolOp *m = new MPoolOp(monc->get_fsid(), op->tid, op->pool,
@@ -3877,9 +3891,9 @@ void Objecter::_pool_op_submit(PoolOp *op)
  */
 void Objecter::handle_pool_op_reply(MPoolOpReply *m)
 {
-  rwlock.get_read();
+  shunique_lock sul(rwlock, acquire_shared);
   if (!initialized.read()) {
-    rwlock.put_read();
+    sul.unlock();
     m->put();
     return;
   }
@@ -3896,8 +3910,8 @@ void Objecter::handle_pool_op_reply(MPoolOpReply *m)
     if (m->version > last_seen_osdmap_version)
       last_seen_osdmap_version = m->version;
     if (osdmap->get_epoch() < m->epoch) {
-      rwlock.unlock();
-      rwlock.get_write();
+      sul.unlock();
+      sul.lock();
       // recheck op existence since we have let go of rwlock
       // (for promotion) above.
       iter = pool_ops.find(tid);
@@ -3914,15 +3928,14 @@ void Objecter::handle_pool_op_reply(MPoolOpReply *m)
 	assert(op->onfinish);
 	op->onfinish->complete(m->replyCode);
       }
-    }
-    else {
+    } else {
       assert(op->onfinish);
       op->onfinish->complete(m->replyCode);
     }
     op->onfinish = NULL;
-    if (!rwlock.is_wlocked()) {
-      rwlock.unlock();
-      rwlock.get_write();
+    if (!sul.owns_lock()) {
+      sul.unlock();
+      sul.lock();
     }
     iter = pool_ops.find(tid);
     if (iter != pool_ops.end()) {
@@ -3933,7 +3946,8 @@ void Objecter::handle_pool_op_reply(MPoolOpReply *m)
   }
 
 done:
-  rwlock.unlock();
+  // Not strictly necessary, since we'll release it on return.
+  sul.unlock();
 
   ldout(cct, 10) << "done" << dendl;
   m->put();
@@ -3943,7 +3957,7 @@ int Objecter::pool_op_cancel(ceph_tid_t tid, int r)
 {
   assert(initialized.read());
 
-  RWLock::WLocker wl(rwlock);
+  unique_lock wl(rwlock);
 
   map<ceph_tid_t, PoolOp*>::iterator it = pool_ops.find(tid);
   if (it == pool_ops.end()) {
@@ -3963,7 +3977,7 @@ int Objecter::pool_op_cancel(ceph_tid_t tid, int r)
 
 void Objecter::_finish_pool_op(PoolOp *op, int r)
 {
-  assert(rwlock.is_wlocked());
+  // rwlock is locked unique
   pool_ops.erase(op->tid);
   logger->set(l_osdc_poolop_active, pool_ops.size());
 
@@ -3996,7 +4010,7 @@ void Objecter::get_pool_stats(list<string>& pools,
     op->ontimeout = 0;
   }
 
-  RWLock::WLocker wl(rwlock);
+  unique_lock wl(rwlock);
 
   poolstat_ops[op->tid] = op;
 
@@ -4021,7 +4035,7 @@ void Objecter::handle_get_pool_stats_reply(MGetPoolStatsReply *m)
   ldout(cct, 10) << "handle_get_pool_stats_reply " << *m << dendl;
   ceph_tid_t tid = m->get_tid();
 
-  RWLock::WLocker wl(rwlock);
+  unique_lock wl(rwlock);
   if (!initialized.read()) {
     m->put();
     return;
@@ -4048,7 +4062,7 @@ int Objecter::pool_stat_op_cancel(ceph_tid_t tid, int r)
 {
   assert(initialized.read());
 
-  RWLock::WLocker wl(rwlock);
+  unique_lock wl(rwlock);
 
   map<ceph_tid_t, PoolStatOp*>::iterator it = poolstat_ops.find(tid);
   if (it == poolstat_ops.end()) {
@@ -4067,7 +4081,7 @@ int Objecter::pool_stat_op_cancel(ceph_tid_t tid, int r)
 
 void Objecter::_finish_pool_stat_op(PoolStatOp *op, int r)
 {
-  assert(rwlock.is_wlocked());
+  // rwlock is locked unique
 
   poolstat_ops.erase(op->tid);
   logger->set(l_osdc_poolstat_active, poolstat_ops.size());
@@ -4081,7 +4095,7 @@ void Objecter::_finish_pool_stat_op(PoolStatOp *op, int r)
 void Objecter::get_fs_stats(ceph_statfs& result, Context *onfinish)
 {
   ldout(cct, 10) << "get_fs_stats" << dendl;
-  RWLock::WLocker l(rwlock);
+  unique_lock l(rwlock);
 
   StatfsOp *op = new StatfsOp;
   op->tid = last_tid.inc();
@@ -4104,7 +4118,7 @@ void Objecter::get_fs_stats(ceph_statfs& result, Context *onfinish)
 
 void Objecter::_fs_stats_submit(StatfsOp *op)
 {
-  assert(rwlock.is_wlocked());
+  // rwlock is locked unique
 
   ldout(cct, 10) << "fs_stats_submit" << op->tid << dendl;
   monc->send_mon_message(new MStatfs(monc->get_fsid(), op->tid,
@@ -4116,7 +4130,7 @@ void Objecter::_fs_stats_submit(StatfsOp *op)
 
 void Objecter::handle_fs_stats_reply(MStatfsReply *m)
 {
-  RWLock::WLocker wl(rwlock);
+  unique_lock wl(rwlock);
   if (!initialized.read()) {
     m->put();
     return;
@@ -4144,7 +4158,7 @@ int Objecter::statfs_op_cancel(ceph_tid_t tid, int r)
 {
   assert(initialized.read());
 
-  RWLock::WLocker wl(rwlock);
+  unique_lock wl(rwlock);
 
   map<ceph_tid_t, StatfsOp*>::iterator it = statfs_ops.find(tid);
   if (it == statfs_ops.end()) {
@@ -4163,7 +4177,7 @@ int Objecter::statfs_op_cancel(ceph_tid_t tid, int r)
 
 void Objecter::_finish_statfs_op(StatfsOp *op, int r)
 {
-  assert(rwlock.is_wlocked());
+  // rwlock is locked unique
 
   statfs_ops.erase(op->tid);
   logger->set(l_osdc_statfs_active, statfs_ops.size());
@@ -4226,24 +4240,24 @@ bool Objecter::ms_handle_reset(Connection *con)
     int osd = osdmap->identify_osd(con->get_peer_addr());
     if (osd >= 0) {
       ldout(cct, 1) << "ms_handle_reset on osd." << osd << dendl;
-      rwlock.get_write();
+      unique_lock wl(rwlock);
       if (!initialized.read()) {
-	rwlock.put_write();
+	wl.unlock();
 	return false;
       }
       map<int,OSDSession*>::iterator p = osd_sessions.find(osd);
       if (p != osd_sessions.end()) {
 	OSDSession *session = p->second;
 	map<uint64_t, LingerOp *> lresend;
-	session->lock.get_write();
+	OSDSession::unique_lock sl(session->lock);
 	_reopen_session(session);
 	_kick_requests(session, lresend);
-	session->lock.unlock();
-	_linger_ops_resend(lresend);
-	rwlock.unlock();
+	sl.unlock();
+	_linger_ops_resend(lresend, wl);
+	wl.unlock();
 	maybe_request_map();
       } else {
-	rwlock.unlock();
+	wl.unlock();
       }
     } else {
       ldout(cct, 10) << "ms_handle_reset on unknown osd addr "
@@ -4308,22 +4322,23 @@ void Objecter::_dump_active()
   for (map<int, OSDSession *>::iterator siter = osd_sessions.begin();
        siter != osd_sessions.end(); ++siter) {
     OSDSession *s = siter->second;
-    s->lock.get_read();
+    OSDSession::shared_lock sl(s->lock);
     _dump_active(s);
-    s->lock.unlock();
+    sl.unlock();
   }
   _dump_active(homeless_session);
 }
 
 void Objecter::dump_active()
 {
-  rwlock.get_read();
+  shared_lock rl(rwlock);
   _dump_active();
-  rwlock.unlock();
+  rl.unlock();
 }
 
 void Objecter::dump_requests(Formatter *fmt)
 {
+  // Read-lock on Objecter held here
   fmt->open_object_section("requests");
   dump_ops(fmt);
   dump_linger_ops(fmt);
@@ -4363,13 +4378,14 @@ void Objecter::_dump_ops(const OSDSession *s, Formatter *fmt)
 
 void Objecter::dump_ops(Formatter *fmt)
 {
+  // Read-lock on Objecter held
   fmt->open_array_section("ops");
   for (map<int, OSDSession *>::const_iterator siter = osd_sessions.begin();
        siter != osd_sessions.end(); ++siter) {
     OSDSession *s = siter->second;
-    s->lock.get_read();
+    OSDSession::shared_lock sl(s->lock);
     _dump_ops(s, fmt);
-    s->lock.unlock();
+    sl.unlock();
   }
   _dump_ops(homeless_session, fmt);
   fmt->close_section(); // ops array
@@ -4392,13 +4408,14 @@ void Objecter::_dump_linger_ops(const OSDSession *s, Formatter *fmt)
 
 void Objecter::dump_linger_ops(Formatter *fmt)
 {
+  // We have a read-lock on the objecter
   fmt->open_array_section("linger_ops");
   for (map<int, OSDSession *>::const_iterator siter = osd_sessions.begin();
        siter != osd_sessions.end(); ++siter) {
     OSDSession *s = siter->second;
-    s->lock.get_read();
+    OSDSession::shared_lock sl(s->lock);
     _dump_linger_ops(s, fmt);
-    s->lock.unlock();
+    sl.unlock();
   }
   _dump_linger_ops(homeless_session, fmt);
   fmt->close_section(); // linger_ops array
@@ -4428,13 +4445,14 @@ void Objecter::_dump_command_ops(const OSDSession *s, Formatter *fmt)
 
 void Objecter::dump_command_ops(Formatter *fmt)
 {
+  // We have a read-lock on the Objecter here
   fmt->open_array_section("command_ops");
   for (map<int, OSDSession *>::const_iterator siter = osd_sessions.begin();
        siter != osd_sessions.end(); ++siter) {
     OSDSession *s = siter->second;
-    s->lock.get_read();
+    OSDSession::shared_lock sl(s->lock);
     _dump_command_ops(s, fmt);
-    s->lock.unlock();
+    sl.unlock();
   }
   _dump_command_ops(homeless_session, fmt);
   fmt->close_section(); // command_ops array
@@ -4509,7 +4527,7 @@ bool Objecter::RequestStateHook::call(std::string command, cmdmap_t& cmdmap,
 				      std::string format, bufferlist& out)
 {
   Formatter *f = Formatter::create(format, "json-pretty", "json-pretty");
-  RWLock::RLocker rl(m_objecter->rwlock);
+  shared_lock rl(m_objecter->rwlock);
   m_objecter->dump_requests(f);
   f->flush(out);
   delete f;
@@ -4542,7 +4560,7 @@ void Objecter::handle_command_reply(MCommandReply *m)
 {
   int osd_num = (int)m->get_source().num();
 
-  RWLock::WLocker wl(rwlock);
+  unique_lock wl(rwlock);
   if (!initialized.read()) {
     m->put();
     return;
@@ -4558,13 +4576,13 @@ void Objecter::handle_command_reply(MCommandReply *m)
 
   OSDSession *s = siter->second;
 
-  s->lock.get_read();
+  OSDSession::shared_lock sl(s->lock);
   map<ceph_tid_t,CommandOp*>::iterator p = s->command_ops.find(m->get_tid());
   if (p == s->command_ops.end()) {
     ldout(cct, 10) << "handle_command_reply tid " << m->get_tid()
 		   << " not found" << dendl;
     m->put();
-    s->lock.unlock();
+    sl.unlock();
     return;
   }
 
@@ -4576,13 +4594,13 @@ void Objecter::handle_command_reply(MCommandReply *m)
 		   << m->get_connection() << " " << m->get_source_inst()
 		   << dendl;
     m->put();
-    s->lock.unlock();
+    sl.unlock();
     return;
   }
   if (c->poutbl)
     c->poutbl->claim(m->get_data());
 
-  s->lock.unlock();
+  sl.unlock();
 
 
   _finish_command(c, m->r, m->rs);
@@ -4591,21 +4609,19 @@ void Objecter::handle_command_reply(MCommandReply *m)
 
 int Objecter::submit_command(CommandOp *c, ceph_tid_t *ptid)
 {
-  RWLock::WLocker wl(rwlock);
-
-  RWLock::Context lc(rwlock, RWLock::Context::TakenForWrite);
+  shunique_lock sul(rwlock, ceph::acquire_unique);
 
   ceph_tid_t tid = last_tid.inc();
   ldout(cct, 10) << "_submit_command " << tid << " " << c->cmd << dendl;
   c->tid = tid;
 
   {
-   RWLock::WLocker hs_wl(homeless_session->lock);
-  _session_command_op_assign(homeless_session, c);
+    OSDSession::unique_lock hs_wl(homeless_session->lock);
+    _session_command_op_assign(homeless_session, c);
   }
 
-  (void)_calc_command_target(c);
-  _assign_command_session(c);
+  _calc_command_target(c, sul);
+  _assign_command_session(c, sul);
   if (osd_timeout > timespan(0)) {
     c->ontimeout = timer.add_event(osd_timeout,
 				   [this, c, tid]() {
@@ -4627,11 +4643,9 @@ int Objecter::submit_command(CommandOp *c, ceph_tid_t *ptid)
   return 0;
 }
 
-int Objecter::_calc_command_target(CommandOp *c)
+int Objecter::_calc_command_target(CommandOp *c, shunique_lock& sul)
 {
-  assert(rwlock.is_wlocked());
-
-  RWLock::Context lc(rwlock, RWLock::Context::TakenForWrite);
+  assert(sul.owns_lock() && sul.mutex() == &rwlock);
 
   c->map_check_error = 0;
 
@@ -4658,7 +4672,7 @@ int Objecter::_calc_command_target(CommandOp *c)
   }
 
   OSDSession *s;
-  int r = _get_session(c->osd, &s, lc);
+  int r = _get_session(c->osd, &s, sul);
   assert(r != -EAGAIN); /* shouldn't happen as we're holding the write lock */
 
   if (c->session != s) {
@@ -4674,26 +4688,24 @@ int Objecter::_calc_command_target(CommandOp *c)
   return RECALC_OP_TARGET_NO_ACTION;
 }
 
-void Objecter::_assign_command_session(CommandOp *c)
+void Objecter::_assign_command_session(CommandOp *c,
+				       shunique_lock& sul)
 {
-  assert(rwlock.is_wlocked());
-
-  RWLock::Context lc(rwlock, RWLock::Context::TakenForWrite);
+  assert(sul.owns_lock() && sul.mutex() == &rwlock);
 
   OSDSession *s;
-  int r = _get_session(c->osd, &s, lc);
+  int r = _get_session(c->osd, &s, sul);
   assert(r != -EAGAIN); /* shouldn't happen as we're holding the write lock */
 
   if (c->session != s) {
     if (c->session) {
       OSDSession *cs = c->session;
-      cs->lock.get_write();
+      OSDSession::unique_lock csl(cs->lock);
       _session_command_op_remove(c->session, c);
-      cs->lock.unlock();
+      csl.unlock();
     }
-    s->lock.get_write();
+    OSDSession::unique_lock sl(s->lock);
     _session_command_op_assign(s, c);
-    s->lock.unlock();
   }
 
   put_session(s);
@@ -4716,7 +4728,7 @@ int Objecter::command_op_cancel(OSDSession *s, ceph_tid_t tid, int r)
 {
   assert(initialized.read());
 
-  RWLock::WLocker wl(rwlock);
+  unique_lock wl(rwlock);
 
   map<ceph_tid_t, CommandOp*>::iterator it = s->command_ops.find(tid);
   if (it == s->command_ops.end()) {
@@ -4734,7 +4746,7 @@ int Objecter::command_op_cancel(OSDSession *s, ceph_tid_t tid, int r)
 
 void Objecter::_finish_command(CommandOp *c, int r, string rs)
 {
-  assert(rwlock.is_wlocked());
+  // rwlock is locked unique
 
   ldout(cct, 10) << "_finish_command " << c->tid << " = " << r << " "
 		 << rs << dendl;
@@ -4747,9 +4759,9 @@ void Objecter::_finish_command(CommandOp *c, int r, string rs)
     timer.cancel_event(c->ontimeout);
 
   OSDSession *s = c->session;
-  s->lock.get_write();
+  OSDSession::unique_lock sl(s->lock);
   _session_command_op_remove(c->session, c);
-  s->lock.unlock();
+  sl.unlock();
 
   c->put();
 
@@ -4763,11 +4775,6 @@ Objecter::OSDSession::~OSDSession()
   assert(ops.empty());
   assert(linger_ops.empty());
   assert(command_ops.empty());
-
-  for (int i = 0; i < num_locks; i++) {
-    delete completion_locks[i];
-  }
-  delete[] completion_locks;
 }
 
 Objecter::~Objecter()
@@ -4801,7 +4808,7 @@ Objecter::~Objecter()
  */
 void Objecter::set_epoch_barrier(epoch_t epoch)
 {
-  RWLock::WLocker wl(rwlock);
+  unique_lock wl(rwlock);
 
   ldout(cct, 7) << __func__ << ": barrier " << epoch << " (was "
 		<< epoch_barrier << ") current epoch " << osdmap->get_epoch()
@@ -4847,7 +4854,7 @@ struct C_EnumerateReply : public Context {
 
   void finish(int r) {
     objecter->_enumerate_reply(
-        bl, r, end, pool_id, budget, epoch, result, next, on_finish);
+      bl, r, end, pool_id, budget, epoch, result, next, on_finish);
   }
 };
 
@@ -4857,7 +4864,7 @@ void Objecter::enumerate_objects(
     const hobject_t &start,
     const hobject_t &end,
     const uint32_t max,
-    std::list<librados::ListObjectImpl> *result, 
+    std::list<librados::ListObjectImpl> *result,
     hobject_t *next,
     Context *on_finish)
 {
@@ -4880,23 +4887,23 @@ void Objecter::enumerate_objects(
     return;
   }
 
-  rwlock.get_read();
+  shared_lock rl(rwlock);
   assert(osdmap->get_epoch());
   if (!osdmap->test_flag(CEPH_OSDMAP_SORTBITWISE)) {
-    rwlock.unlock();
+    rl.unlock();
     lderr(cct) << __func__ << ": SORTBITWISE cluster flag not set" << dendl;
     on_finish->complete(-EOPNOTSUPP);
     return;
   }
   const pg_pool_t *p = osdmap->get_pg_pool(pool_id);
   if (!p) {
-    lderr(cct) << __func__ << ": pool " << pool_id << " DNE in"
-                     "osd epoch " << osdmap->get_epoch() << dendl;
-    rwlock.unlock();
+    lderr(cct) << __func__ << ": pool " << pool_id << " DNE in osd epoch "
+	       << osdmap->get_epoch() << dendl;
+    rl.unlock();
     on_finish->complete(-ENOENT);
     return;
   } else {
-    rwlock.unlock();
+    rl.unlock();
   }
 
   ldout(cct, 20) << __func__ << ": start=" << start << " end=" << end << dendl;
@@ -4924,7 +4931,7 @@ void Objecter::_enumerate_reply(
     const int64_t pool_id,
     int budget,
     epoch_t reply_epoch,
-    std::list<librados::ListObjectImpl> *result, 
+    std::list<librados::ListObjectImpl> *result,
     hobject_t *next,
     Context *on_finish)
 {
@@ -4955,20 +4962,21 @@ void Objecter::_enumerate_reply(
 		 << " handle " << response.handle
 		 << " reply_epoch " << reply_epoch << dendl;
   ldout(cct, 20) << __func__ << ": response.entries.size "
-                 << response.entries.size() << ", response.entries "
-                 << response.entries << dendl;
+		 << response.entries.size() << ", response.entries "
+		 << response.entries << dendl;
   if (cmp_bitwise(response.handle, end) <= 0) {
     *next = response.handle;
   } else {
-    ldout(cct, 10) << __func__ << ": adjusted next down to end " << end << dendl;
+    ldout(cct, 10) << __func__ << ": adjusted next down to end " << end
+		   << dendl;
     *next = end;
 
     // drop anything after 'end'
-    rwlock.get_read();
+    shared_lock rl(rwlock);
     const pg_pool_t *pool = osdmap->get_pg_pool(pool_id);
     if (!pool) {
       // pool is gone, drop any results which are now meaningless.
-      rwlock.put_read();
+      rl.unlock();
       on_finish->complete(-ENOENT);
       return;
     }
@@ -4990,7 +4998,7 @@ void Objecter::_enumerate_reply(
 		     << " >= end " << end << dendl;
       response.entries.pop_back();
     }
-    rwlock.put_read();
+    rl.unlock();
   }
   if (!response.entries.empty()) {
     result->merge(response.entries);
diff --git a/src/osdc/Objecter.h b/src/osdc/Objecter.h
index 16d8c0a..8e156a0 100644
--- a/src/osdc/Objecter.h
+++ b/src/osdc/Objecter.h
@@ -15,19 +15,25 @@
 #ifndef CEPH_OBJECTER_H
 #define CEPH_OBJECTER_H
 
+#include <condition_variable>
 #include <list>
 #include <map>
+#include <mutex>
 #include <memory>
 #include <sstream>
+#include <type_traits>
 
-#include "include/types.h"
+#include <boost/thread/shared_mutex.hpp>
+
+#include "include/assert.h"
 #include "include/buffer.h"
+#include "include/types.h"
 #include "include/rados/rados_types.hpp"
 
 #include "common/admin_socket.h"
 #include "common/ceph_time.h"
 #include "common/ceph_timer.h"
-#include "common/RWLock.h"
+#include "common/shunique_lock.h"
 
 #include "messages/MOSDOp.h"
 #include "osd/OSDMap.h"
@@ -1128,7 +1134,11 @@ private:
   version_t last_seen_osdmap_version;
   version_t last_seen_pgmap_version;
 
-  RWLock rwlock;
+  mutable boost::shared_mutex rwlock;
+  using lock_guard = std::unique_lock<decltype(rwlock)>;
+  using unique_lock = std::unique_lock<decltype(rwlock)>;
+  using shared_lock = boost::shared_lock<decltype(rwlock)>;
+  using shunique_lock = ceph::shunique_lock<decltype(rwlock)>;
   ceph::timer<ceph::mono_clock> timer;
 
   PerfCounters *logger;
@@ -1142,7 +1152,7 @@ private:
   class RequestStateHook : public AdminSocketHook {
     Objecter *m_objecter;
   public:
-    RequestStateHook(Objecter *objecter);
+    explicit RequestStateHook(Objecter *objecter);
     bool call(std::string command, cmdmap_t& cmdmap, std::string format,
 	      bufferlist& out);
   };
@@ -1566,8 +1576,8 @@ public:
   };
 
   int submit_command(CommandOp *c, ceph_tid_t *ptid);
-  int _calc_command_target(CommandOp *c);
-  void _assign_command_session(CommandOp *c);
+  int _calc_command_target(CommandOp *c, shunique_lock &sul);
+  void _assign_command_session(CommandOp *c, shunique_lock &sul);
   void _send_command(CommandOp *c);
   int command_op_cancel(OSDSession *s, ceph_tid_t tid, int r);
   void _finish_command(CommandOp *c, int r, string rs);
@@ -1603,7 +1613,11 @@ public:
     bool is_watch;
     ceph::mono_time watch_valid_thru; ///< send time for last acked ping
     int last_error;  ///< error from last failed ping|reconnect, if any
-    RWLock watch_lock;
+    boost::shared_mutex watch_lock;
+    using lock_guard = std::unique_lock<decltype(watch_lock)>;
+    using unique_lock = std::unique_lock<decltype(watch_lock)>;
+    using shared_lock = boost::shared_lock<decltype(watch_lock)>;
+    using shunique_lock = ceph::shunique_lock<decltype(watch_lock)>;
 
     // queue of pending async operations, with the timestamp of
     // when they were queued.
@@ -1630,11 +1644,11 @@ public:
     epoch_t last_force_resend;
 
     void _queued_async() {
-      assert(watch_lock.is_locked());
+      // watch_lock ust be locked unique
       watch_pending_async.push_back(ceph::mono_clock::now());
     }
     void finished_async() {
-      RWLock::WLocker l(watch_lock);
+      unique_lock l(watch_lock);
       assert(!watch_pending_async.empty());
       watch_pending_async.pop_front();
     }
@@ -1643,7 +1657,6 @@ public:
 		 target(object_t(), object_locator_t(), 0),
 		 snap(CEPH_NOSNAP), poutbl(NULL), pobjver(NULL),
 		 is_watch(false), last_error(0),
-		 watch_lock("Objecter::LingerOp::watch_lock"),
 		 register_gen(0),
 		 registered(false),
 		 canceled(false),
@@ -1729,8 +1742,11 @@ public:
 
   // -- osd sessions --
   struct OSDSession : public RefCountedObject {
-    RWLock lock;
-    Mutex **completion_locks;
+    boost::shared_mutex lock;
+    using lock_guard = std::lock_guard<decltype(lock)>;
+    using unique_lock = std::unique_lock<decltype(lock)>;
+    using shared_lock = boost::shared_lock<decltype(lock)>;
+    using shunique_lcok = ceph::shunique_lock<decltype(lock)>;
 
     // pending ops
     map<ceph_tid_t,Op*> ops;
@@ -1739,26 +1755,23 @@ public:
 
     int osd;
     int incarnation;
-    int num_locks;
     ConnectionRef con;
+    int num_locks;
+    std::unique_ptr<std::mutex[]> completion_locks;
+    using unique_completion_lock = std::unique_lock<
+      decltype(completion_locks)::element_type>;
+
 
     OSDSession(CephContext *cct, int o) :
-      lock("OSDSession"),
-      osd(o),
-      incarnation(0),
-      con(NULL) {
-      num_locks = cct->_conf->objecter_completion_locks_per_session;
-      completion_locks = new Mutex *[num_locks];
-      for (int i = 0; i < num_locks; i++) {
-	completion_locks[i] = new Mutex("OSDSession::completion_lock");
-      }
-    }
+      osd(o), incarnation(0), con(NULL),
+      num_locks(cct->_conf->objecter_completion_locks_per_session),
+      completion_locks(new std::mutex[num_locks]) {}
 
     ~OSDSession();
 
     bool is_homeless() { return (osd == -1); }
 
-    Mutex *get_lock(object_t& oid);
+    unique_completion_lock get_lock(object_t& oid);
   };
   map<int,OSDSession*> osd_sessions;
 
@@ -1781,8 +1794,10 @@ public:
   // we use this just to confirm a cookie is valid before dereferencing the ptr
   set<LingerOp*> linger_ops_set;
   int num_linger_callbacks;
-  Mutex linger_callback_lock;
-  Cond linger_callback_cond;
+  std::mutex linger_callback_lock;
+  typedef std::unique_lock<std::mutex> unique_linger_cb_lock;
+  typedef std::lock_guard<std::mutex> linger_cb_lock_guard;
+  std::condition_variable linger_callback_cond;
 
   map<ceph_tid_t,PoolStatOp*> poolstat_ops;
   map<ceph_tid_t,StatfsOp*> statfs_ops;
@@ -1828,7 +1843,7 @@ public:
   int _calc_target(op_target_t *t, epoch_t *last_force_resend = 0,
 		   bool any_change = false);
   int _map_session(op_target_t *op, OSDSession **s,
-		   RWLock::Context& lc);
+		   shunique_lock& lc);
 
   void _session_op_assign(OSDSession *s, Op *op);
   void _session_op_remove(OSDSession *s, Op *op);
@@ -1837,13 +1852,13 @@ public:
   void _session_command_op_assign(OSDSession *to, CommandOp *op);
   void _session_command_op_remove(OSDSession *from, CommandOp *op);
 
-  int _assign_op_target_session(Op *op, RWLock::Context& lc,
+  int _assign_op_target_session(Op *op, shunique_lock& lc,
 				bool src_session_locked,
 				bool dst_session_locked);
-  int _recalc_linger_op_target(LingerOp *op, RWLock::Context& lc);
+  int _recalc_linger_op_target(LingerOp *op, shunique_lock& lc);
 
-  void _linger_submit(LingerOp *info);
-  void _send_linger(LingerOp *info);
+  void _linger_submit(LingerOp *info, shunique_lock& sul);
+  void _send_linger(LingerOp *info, shunique_lock& sul);
   void _linger_commit(LingerOp *info, int r, bufferlist& outbl);
   void _linger_reconnect(LingerOp *info, int r);
   void _send_linger_ping(LingerOp *info);
@@ -1852,25 +1867,26 @@ public:
   int _normalize_watch_error(int r);
 
   void _linger_callback_queue() {
-    Mutex::Locker l(linger_callback_lock);
+    linger_cb_lock_guard l(linger_callback_lock);
     ++num_linger_callbacks;
   }
   void _linger_callback_finish() {
-    Mutex::Locker l(linger_callback_lock);
+    linger_cb_lock_guard l(linger_callback_lock);
     if (--num_linger_callbacks == 0)
-      linger_callback_cond.SignalAll();
+      linger_callback_cond.notify_all();
     assert(num_linger_callbacks >= 0);
   }
   friend class C_DoWatchError;
 public:
   void linger_callback_flush() {
-    Mutex::Locker l(linger_callback_lock);
-    while (num_linger_callbacks > 0)
-      linger_callback_cond.Wait(linger_callback_lock);
+    unique_linger_cb_lock l(linger_callback_lock);
+    linger_callback_cond.wait(l, [this]() {
+	return num_linger_callbacks <= 0;
+      });
   }
 
 private:
-  void _check_op_pool_dne(Op *op, bool session_locked);
+  void _check_op_pool_dne(Op *op, unique_lock& sl);
   void _send_op_map_check(Op *op);
   void _op_cancel_map_check(Op *op);
   void _check_linger_pool_dne(LingerOp *op, bool *need_unregister);
@@ -1882,9 +1898,9 @@ private:
 
   void kick_requests(OSDSession *session);
   void _kick_requests(OSDSession *session, map<uint64_t, LingerOp *>& lresend);
-  void _linger_ops_resend(map<uint64_t, LingerOp *>& lresend);
+  void _linger_ops_resend(map<uint64_t, LingerOp *>& lresend, unique_lock& ul);
 
-  int _get_session(int osd, OSDSession **session, RWLock::Context& lc);
+  int _get_session(int osd, OSDSession **session, shunique_lock& sul);
   void put_session(OSDSession *s);
   void get_session(OSDSession *s);
   void _reopen_session(OSDSession *session);
@@ -1904,12 +1920,12 @@ private:
    * If throttle_op needs to throttle it will unlock client_lock.
    */
   int calc_op_budget(Op *op);
-  void _throttle_op(Op *op, int op_size=0);
-  int _take_op_budget(Op *op) {
-    assert(rwlock.is_locked());
+  void _throttle_op(Op *op, shunique_lock& sul, int op_size = 0);
+  int _take_op_budget(Op *op, shunique_lock& sul) {
+    assert(sul && sul.mutex() == &rwlock);
     int op_budget = calc_op_budget(op);
     if (keep_balanced_budget) {
-      _throttle_op(op, op_budget);
+      _throttle_op(op, sul, op_budget);
     } else {
       op_throttle_bytes.take(op_budget);
       op_throttle_ops.take(1);
@@ -1941,10 +1957,9 @@ private:
     max_linger_id(0), num_unacked(0), num_uncommitted(0), global_op_flags(0),
     keep_balanced_budget(false), honor_osdmap_full(true),
     last_seen_osdmap_version(0), last_seen_pgmap_version(0),
-    rwlock("Objecter::rwlock"), logger(NULL), tick_event(0),
-    m_request_state_hook(NULL), num_linger_callbacks(0),
-    linger_callback_lock("Objecter::linger_callback_lock"),
-    num_homeless_ops(0), homeless_session(new OSDSession(cct, -1)),
+    logger(NULL), tick_event(0), m_request_state_hook(NULL),
+    num_linger_callbacks(0), num_homeless_ops(0),
+    homeless_session(new OSDSession(cct, -1)),
     mon_timeout(ceph::make_timespan(mon_timeout)),
     osd_timeout(ceph::make_timespan(osd_timeout)),
     op_throttle_bytes(cct, "objecter_bytes",
@@ -1958,14 +1973,44 @@ private:
   void start();
   void shutdown();
 
-  const OSDMap *get_osdmap_read() {
-    rwlock.get_read();
-    return osdmap;
-  }
-  void put_osdmap_read() {
-    rwlock.put_read();
+  // These two templates replace osdmap_(get)|(put)_read. Simply wrap
+  // whatever functionality you want to use the OSDMap in a lambda like:
+  //
+  // with_osdmap([](const OSDMap& o) { o.do_stuff(); });
+  //
+  // or
+  //
+  // auto t = with_osdmap([&](const OSDMap& o) { return o.lookup_stuff(x); });
+  //
+  // Do not call into something that will try to lock the OSDMap from
+  // here or you will have great woe and misery.
+
+  template<typename Callback, typename...Args>
+  auto with_osdmap(Callback&& cb, Args&&...args) ->
+    typename std::enable_if<
+      std::is_void<
+    decltype(cb(const_cast<const OSDMap&>(*osdmap),
+		std::forward<Args>(args)...))>::value,
+      void>::type {
+    shared_lock l(rwlock);
+    std::forward<Callback>(cb)(const_cast<const OSDMap&>(*osdmap),
+			       std::forward<Args>(args)...);
+  }
+
+  template<typename Callback, typename...Args>
+  auto with_osdmap(Callback&& cb, Args&&... args) ->
+    typename std::enable_if<
+      !std::is_void<
+	decltype(cb(const_cast<const OSDMap&>(*osdmap),
+		    std::forward<Args>(args)...))>::value,
+      decltype(cb(const_cast<const OSDMap&>(*osdmap),
+		  std::forward<Args>(args)...))>::type {
+    shared_lock l(rwlock);
+    return std::forward<Callback>(cb)(const_cast<const OSDMap&>(*osdmap),
+				      std::forward<Args>(args)...);
   }
 
+
   /**
    * Tell the objecter to throttle outgoing ops according to its
    * budget (in _conf). If you do this, ops can block, in
@@ -1985,7 +2030,8 @@ private:
 		      map<int64_t, bool> *pool_full_map,
 		      map<ceph_tid_t, Op*>& need_resend,
 		      list<LingerOp*>& need_resend_linger,
-		      map<ceph_tid_t, CommandOp*>& need_resend_command);
+		      map<ceph_tid_t, CommandOp*>& need_resend_command,
+		      shunique_lock& sul);
 
   int64_t get_object_hash_position(int64_t pool, const string& key,
 				   const string& ns);
@@ -2023,8 +2069,8 @@ private:
 private:
 
   // low-level
-  ceph_tid_t _op_submit(Op *op, RWLock::Context& lc);
-  ceph_tid_t _op_submit_with_budget(Op *op, RWLock::Context& lc,
+  ceph_tid_t _op_submit(Op *op, shunique_lock& lc);
+  ceph_tid_t _op_submit_with_budget(Op *op, shunique_lock& lc,
 				    int *ctx_budget = NULL);
   inline void unregister_op(Op *op);
 
@@ -2032,7 +2078,7 @@ private:
 public:
   ceph_tid_t op_submit(Op *op, int *ctx_budget = NULL);
   bool is_active() {
-    RWLock::RLocker l(rwlock);
+    shared_lock l(rwlock);
     return !((!inflight_ops.read()) && linger_ops.empty() &&
 	     poolstat_ops.empty() && statfs_ops.empty());
   }
diff --git a/src/osdc/Striper.cc b/src/osdc/Striper.cc
index 9a35ffe..4395106 100644
--- a/src/osdc/Striper.cc
+++ b/src/osdc/Striper.cc
@@ -369,9 +369,7 @@ void Striper::StripedReadResult::assemble_result(CephContext *cct,
     size_t len = p->second.first.length();
     if (len < p->second.second) {
       if (zero_tail || bl.length()) {
-	bufferptr bp(p->second.second - len);
-	bp.zero();
-	bl.push_front(bp);
+	bl.append_zero(p->second.second - len);
 	bl.claim_prepend(p->second.first);
       } else {
 	bl.claim_prepend(p->second.first);
diff --git a/src/perfglue/disabled_heap_profiler.cc b/src/perfglue/disabled_heap_profiler.cc
index d2d4cb7..238ab4a 100644
--- a/src/perfglue/disabled_heap_profiler.cc
+++ b/src/perfglue/disabled_heap_profiler.cc
@@ -29,5 +29,15 @@ void ceph_heap_profiler_stop() { return; }
 
 void ceph_heap_profiler_dump(const char *reason) { return; }
 
+bool ceph_heap_get_numeric_property(const char *property, size_t *value)
+{
+  return false;
+}
+
+bool ceph_heap_set_numeric_property(const char *property, size_t value)
+{
+  return false;
+}
+
 void ceph_heap_profiler_handle_command(const std::vector<std::string>& cmd,
                                        ostream& out) { return; }
diff --git a/src/perfglue/heap_profiler.cc b/src/perfglue/heap_profiler.cc
index a0307ca..685b4be 100644
--- a/src/perfglue/heap_profiler.cc
+++ b/src/perfglue/heap_profiler.cc
@@ -59,6 +59,22 @@ void ceph_heap_release_free_memory()
   MallocExtension::instance()->ReleaseFreeMemory();
 }
 
+bool ceph_heap_get_numeric_property(
+  const char *property, size_t *value)
+{
+  return MallocExtension::instance()->GetNumericProperty(
+    property,
+    value);
+}
+
+bool ceph_heap_set_numeric_property(
+  const char *property, size_t value)
+{
+  return MallocExtension::instance()->SetNumericProperty(
+    property,
+    value);
+}
+
 bool ceph_heap_profiler_running()
 {
 #ifdef HAVE_LIBTCMALLOC
diff --git a/src/perfglue/heap_profiler.h b/src/perfglue/heap_profiler.h
index dac20d4..75fba8a 100644
--- a/src/perfglue/heap_profiler.h
+++ b/src/perfglue/heap_profiler.h
@@ -43,6 +43,10 @@ void ceph_heap_profiler_stop();
 
 void ceph_heap_profiler_dump(const char *reason);
 
+bool ceph_heap_get_numeric_property(const char *property, size_t *value);
+
+bool ceph_heap_set_numeric_property(const char *property, size_t value);
+
 void ceph_heap_profiler_handle_command(const std::vector<std::string> &cmd,
                                        ostream& out);
 
diff --git a/src/pybind/rados.py b/src/pybind/rados.py
index 2d4022f..a599346 100644
--- a/src/pybind/rados.py
+++ b/src/pybind/rados.py
@@ -27,6 +27,7 @@ LIBRADOS_OP_FLAG_FADVISE_SEQUENTIAL = 0x8
 LIBRADOS_OP_FLAG_FADVISE_WILLNEED = 0x10
 LIBRADOS_OP_FLAG_FADVISE_DONTNEED = 0x20
 LIBRADOS_OP_FLAG_FADVISE_NOCACHE = 0x40
+LIBRADOS_SNAP_HEAD = -2
 
 
 # Are we running Python 2.x
@@ -1507,6 +1508,21 @@ class Ioctx(object):
         """
         return self.locator_key
 
+    @requires(('snap_id', int))
+    def set_read(self, snap_id):
+        """
+        Set the snapshot for reading objects.
+
+        To stop to read from snapshot, use set_read(LIBRADOS_SNAP_HEAD)
+
+        :param snap_id: the snapshot Id
+        :type snap_id: int
+
+        :raises: :class:`TypeError`
+        """
+        self.require_ioctx_open()
+        run_in_thread(self.librados.rados_ioctx_snap_set_read,
+                      (self.io, c_uint64(snap_id)))
 
     @requires(('nspace', str_type))
     def set_namespace(self, nspace):
diff --git a/src/rbd_fuse/rbd-fuse.cc b/src/rbd_fuse/rbd-fuse.cc
index 9e92441..c55f513 100644
--- a/src/rbd_fuse/rbd-fuse.cc
+++ b/src/rbd_fuse/rbd-fuse.cc
@@ -26,6 +26,7 @@
 
 #include "include/compat.h"
 #include "include/rbd/librbd.h"
+#include "common/Mutex.h"
 
 static int gotrados = 0;
 char *pool_name;
@@ -33,7 +34,7 @@ char *mount_image_name;
 rados_t cluster;
 rados_ioctx_t ioctx;
 
-static pthread_mutex_t readdir_lock;
+Mutex readdir_lock("read_dir");
 
 struct rbd_stat {
 	u_char valid;
@@ -212,11 +213,11 @@ iter_images(void *cookie,
 {
 	struct rbd_image *im;
 
-	pthread_mutex_lock(&readdir_lock);
-
+	readdir_lock.Lock();
+	
 	for (im = rbd_image_data.images; im != NULL; im = im->next)
 		iter(cookie, im->image_name);
-	pthread_mutex_unlock(&readdir_lock);
+	readdir_lock.Unlock();
 }
 
 static void count_images_cb(void *cookie, const char *image)
@@ -228,9 +229,9 @@ static int count_images(void)
 {
 	unsigned int count = 0;
 
-	pthread_mutex_lock(&readdir_lock);
+	readdir_lock.Lock();
 	enumerate_images(&rbd_image_data);
-	pthread_mutex_unlock(&readdir_lock);
+	readdir_lock.Unlock();
 
 	iter_images(&count, count_images_cb);
 	return count;
@@ -269,9 +270,9 @@ static int rbdfs_getattr(const char *path, struct stat *stbuf)
 	}
 
 	if (!in_opendir) {
-		pthread_mutex_lock(&readdir_lock);
+		readdir_lock.Lock();
 		enumerate_images(&rbd_image_data);
-		pthread_mutex_unlock(&readdir_lock);
+		readdir_lock.Unlock();
 	}
 	fd = open_rbd_image(path + 1);
 	if (fd < 0)
@@ -303,9 +304,9 @@ static int rbdfs_open(const char *path, struct fuse_file_info *fi)
 	if (path[0] == 0)
 		return -ENOENT;
 
-	pthread_mutex_lock(&readdir_lock);
+	readdir_lock.Lock();
 	enumerate_images(&rbd_image_data);
-	pthread_mutex_unlock(&readdir_lock);
+	readdir_lock.Unlock();
 	fd = open_rbd_image(path + 1);
 	if (fd < 0)
 		return -ENOENT;
@@ -401,9 +402,9 @@ static int rbdfs_statfs(const char *path, struct statvfs *buf)
 
 	num[0] = 1;
 	num[1] = 0;
-	pthread_mutex_lock(&readdir_lock);
+	readdir_lock.Lock();
 	enumerate_images(&rbd_image_data);
-	pthread_mutex_unlock(&readdir_lock);
+	readdir_lock.Unlock();
 	iter_images(num, rbdfs_statfs_image_cb);
 
 #define	RBDFS_BSIZE	4096
@@ -434,10 +435,10 @@ static int rbdfs_fsync(const char *path, int datasync,
 static int rbdfs_opendir(const char *path, struct fuse_file_info *fi)
 {
 	// only one directory, so global "in_opendir" flag should be fine
-	pthread_mutex_lock(&readdir_lock);
+	readdir_lock.Lock();
 	in_opendir++;
 	enumerate_images(&rbd_image_data);
-	pthread_mutex_unlock(&readdir_lock);
+	readdir_lock.Unlock();
 	return 0;
 }
 
@@ -475,9 +476,9 @@ static int rbdfs_readdir(const char *path, void *buf, fuse_fill_dir_t filler,
 static int rbdfs_releasedir(const char *path, struct fuse_file_info *fi)
 {
 	// see opendir comments
-	pthread_mutex_lock(&readdir_lock);
+	readdir_lock.Lock();
 	in_opendir--;
-	pthread_mutex_unlock(&readdir_lock);
+	readdir_lock.Unlock();
 	return 0;
 }
 
@@ -883,7 +884,5 @@ int main(int argc, char *argv[])
 		exit(1);
 	}
 
-	pthread_mutex_init(&readdir_lock, NULL);
-
 	return fuse_main(args.argc, args.argv, &rbdfs_oper, NULL);
 }
diff --git a/src/rbd_replay/ActionTypes.cc b/src/rbd_replay/ActionTypes.cc
index 36ed3ca..dc5388f 100644
--- a/src/rbd_replay/ActionTypes.cc
+++ b/src/rbd_replay/ActionTypes.cc
@@ -36,7 +36,7 @@ void decode_big_endian_string(std::string &str, bufferlist::iterator &it) {
 
 class EncodeVisitor : public boost::static_visitor<void> {
 public:
-  EncodeVisitor(bufferlist &bl) : m_bl(bl) {
+  explicit EncodeVisitor(bufferlist &bl) : m_bl(bl) {
   }
 
   template <typename Action>
@@ -65,7 +65,7 @@ private:
 
 class DumpVisitor : public boost::static_visitor<void> {
 public:
-  DumpVisitor(Formatter *formatter) : m_formatter(formatter) {}
+  explicit DumpVisitor(Formatter *formatter) : m_formatter(formatter) {}
 
   template <typename Action>
   inline void operator()(const Action &action) const {
@@ -218,6 +218,32 @@ void OpenImageAction::dump(Formatter *f) const {
   f->dump_bool("read_only", read_only);
 }
 
+void AioOpenImageAction::encode(bufferlist &bl) const {
+  ImageActionBase::encode(bl);
+  ::encode(name, bl);
+  ::encode(snap_name, bl);
+  ::encode(read_only, bl);
+}
+
+void AioOpenImageAction::decode(__u8 version, bufferlist::iterator &it) {
+  ImageActionBase::decode(version, it);
+  if (byte_swap_required(version)) {
+    decode_big_endian_string(name, it);
+    decode_big_endian_string(snap_name, it);
+  } else {
+    ::decode(name, it);
+    ::decode(snap_name, it);
+  }
+  ::decode(read_only, it);
+}
+
+void AioOpenImageAction::dump(Formatter *f) const {
+  ImageActionBase::dump(f);
+  f->dump_string("name", name);
+  f->dump_string("snap_name", snap_name);
+  f->dump_bool("read_only", read_only);
+}
+
 void UnknownAction::encode(bufferlist &bl) const {
   assert(false);
 }
@@ -274,6 +300,12 @@ void ActionEntry::decode(__u8 version, bufferlist::iterator &it) {
   case ACTION_TYPE_CLOSE_IMAGE:
     action = CloseImageAction();
     break;
+  case ACTION_TYPE_AIO_OPEN_IMAGE:
+    action = AioOpenImageAction();
+    break;
+  case ACTION_TYPE_AIO_CLOSE_IMAGE:
+    action = AioCloseImageAction();
+    break;
   }
 
   boost::apply_visitor(DecodeVisitor(version, it), action);
@@ -311,6 +343,13 @@ void ActionEntry::generate_test_instances(std::list<ActionEntry *> &o) {
                                               true)));
   o.push_back(new ActionEntry(CloseImageAction()));
   o.push_back(new ActionEntry(CloseImageAction(1, 123456789, dependencies, 3)));
+
+  o.push_back(new ActionEntry(AioOpenImageAction()));
+  o.push_back(new ActionEntry(AioOpenImageAction(1, 123456789, dependencies, 3,
+                                              "image_name", "snap_name",
+                                              true)));
+  o.push_back(new ActionEntry(AioCloseImageAction()));
+  o.push_back(new ActionEntry(AioCloseImageAction(1, 123456789, dependencies, 3)));
 }
 
 } // namespace action
@@ -345,6 +384,12 @@ std::ostream &operator<<(std::ostream &out,
   case ACTION_TYPE_CLOSE_IMAGE:
     out << "CloseImage";
     break;
+  case ACTION_TYPE_AIO_OPEN_IMAGE:
+    out << "AioOpenImage";
+    break;
+  case ACTION_TYPE_AIO_CLOSE_IMAGE:
+    out << "AioCloseImage";
+    break;
   default:
     out << "Unknown (" << static_cast<uint32_t>(type) << ")";
     break;
diff --git a/src/rbd_replay/ActionTypes.h b/src/rbd_replay/ActionTypes.h
index fcceca8..302b9e2 100644
--- a/src/rbd_replay/ActionTypes.h
+++ b/src/rbd_replay/ActionTypes.h
@@ -62,14 +62,16 @@ WRITE_CLASS_ENCODER(Dependency);
 typedef std::vector<Dependency> Dependencies;
 
 enum ActionType {
-  ACTION_TYPE_START_THREAD = 0,
-  ACTION_TYPE_STOP_THREAD  = 1,
-  ACTION_TYPE_READ         = 2,
-  ACTION_TYPE_WRITE        = 3,
-  ACTION_TYPE_AIO_READ     = 4,
-  ACTION_TYPE_AIO_WRITE    = 5,
-  ACTION_TYPE_OPEN_IMAGE   = 6,
-  ACTION_TYPE_CLOSE_IMAGE  = 7
+  ACTION_TYPE_START_THREAD    = 0,
+  ACTION_TYPE_STOP_THREAD     = 1,
+  ACTION_TYPE_READ            = 2,
+  ACTION_TYPE_WRITE           = 3,
+  ACTION_TYPE_AIO_READ        = 4,
+  ACTION_TYPE_AIO_WRITE       = 5,
+  ACTION_TYPE_OPEN_IMAGE      = 6,
+  ACTION_TYPE_CLOSE_IMAGE     = 7,
+  ACTION_TYPE_AIO_OPEN_IMAGE  = 8,
+  ACTION_TYPE_AIO_CLOSE_IMAGE = 9,
 };
 
 struct ActionBase {
@@ -225,6 +227,39 @@ struct CloseImageAction : public ImageActionBase {
   }
 };
 
+struct AioOpenImageAction : public ImageActionBase {
+  static const ActionType ACTION_TYPE = ACTION_TYPE_AIO_OPEN_IMAGE;
+
+  std::string name;
+  std::string snap_name;
+  bool read_only;
+
+  AioOpenImageAction() : read_only(false) {
+  }
+  AioOpenImageAction(action_id_t id, thread_id_t thread_id,
+		     const Dependencies &dependencies, imagectx_id_t imagectx_id,
+		     const std::string &name, const std::string &snap_name,
+		     bool read_only)
+    : ImageActionBase(id, thread_id, dependencies, imagectx_id),
+      name(name), snap_name(snap_name), read_only(read_only) {
+  }
+
+  void encode(bufferlist &bl) const;
+  void decode(__u8 version, bufferlist::iterator &it);
+  void dump(Formatter *f) const;
+};
+
+struct AioCloseImageAction : public ImageActionBase {
+  static const ActionType ACTION_TYPE = ACTION_TYPE_AIO_CLOSE_IMAGE;
+
+  AioCloseImageAction() {
+  }
+  AioCloseImageAction(action_id_t id, thread_id_t thread_id,
+		      const Dependencies &dependencies, imagectx_id_t imagectx_id)
+    : ImageActionBase(id, thread_id, dependencies, imagectx_id) {
+  }
+};
+
 struct UnknownAction {
   static const ActionType ACTION_TYPE = static_cast<ActionType>(-1);
 
@@ -241,6 +276,8 @@ typedef boost::variant<StartThreadAction,
                        AioWriteAction,
                        OpenImageAction,
                        CloseImageAction,
+                       AioOpenImageAction,
+                       AioCloseImageAction,
                        UnknownAction> Action;
 
 class ActionEntry {
diff --git a/src/rbd_replay/BufferReader.cc b/src/rbd_replay/BufferReader.cc
index f1327b7..a93302a 100644
--- a/src/rbd_replay/BufferReader.cc
+++ b/src/rbd_replay/BufferReader.cc
@@ -9,7 +9,7 @@ namespace rbd_replay {
 
 BufferReader::BufferReader(int fd, size_t min_bytes, size_t max_bytes)
   : m_fd(fd), m_min_bytes(min_bytes), m_max_bytes(max_bytes),
-    m_bl_it(m_bl.begin()) {
+    m_bl_it(m_bl.begin()), m_eof_reached(false) {
   assert(m_min_bytes <= m_max_bytes);
 }
 
@@ -17,11 +17,14 @@ int BufferReader::fetch(bufferlist::iterator **it) {
   if (m_bl_it.get_remaining() < m_min_bytes) {
     ssize_t bytes_to_read = ROUND_UP_TO(m_max_bytes - m_bl_it.get_remaining(),
                                         CEPH_BUFFER_APPEND_SIZE);
-    while (bytes_to_read > 0) {
+    while (!m_eof_reached && bytes_to_read > 0) {
       int r = m_bl.read_fd(m_fd, CEPH_BUFFER_APPEND_SIZE);
       if (r < 0) {
         return r;
       }
+      if (r == 0) {
+	m_eof_reached = true;
+      }
       assert(r <= bytes_to_read);
       bytes_to_read -= r;
     }
diff --git a/src/rbd_replay/BufferReader.h b/src/rbd_replay/BufferReader.h
index 95b1533..07f7b09 100644
--- a/src/rbd_replay/BufferReader.h
+++ b/src/rbd_replay/BufferReader.h
@@ -25,6 +25,7 @@ private:
   size_t m_max_bytes;
   bufferlist m_bl;
   bufferlist::iterator m_bl_it;
+  bool m_eof_reached;
 
 };
 
diff --git a/src/rbd_replay/Replayer.cc b/src/rbd_replay/Replayer.cc
index b37f226..ea0f9a0 100644
--- a/src/rbd_replay/Replayer.cc
+++ b/src/rbd_replay/Replayer.cc
@@ -224,6 +224,9 @@ void Replayer::run(const std::string& replay_file) {
                       << std::endl;
             exit(-r);
           }
+	  if (it->get_remaining() == 0) {
+	    break;
+	  }
 
           if (versioned) {
             action_entry.decode(*it);
@@ -231,7 +234,7 @@ void Replayer::run(const std::string& replay_file) {
             action_entry.decode_unversioned(*it);
           }
         } catch (const buffer::error &err) {
-          std::cerr << "Failed to decode trace action" << std::endl;
+          std::cerr << "Failed to decode trace action: " << err.what() << std::endl;
           exit(1);
         }
 
diff --git a/src/rbd_replay/Replayer.hpp b/src/rbd_replay/Replayer.hpp
index acad725..5cb5ee5 100644
--- a/src/rbd_replay/Replayer.hpp
+++ b/src/rbd_replay/Replayer.hpp
@@ -77,7 +77,7 @@ private:
 
 class Replayer {
 public:
-  Replayer(int num_action_trackers);
+  explicit Replayer(int num_action_trackers);
 
   ~Replayer();
 
diff --git a/src/rbd_replay/actions.cc b/src/rbd_replay/actions.cc
index 7726d08..33c8de5 100644
--- a/src/rbd_replay/actions.cc
+++ b/src/rbd_replay/actions.cc
@@ -65,6 +65,14 @@ struct ConstructVisitor : public boost::static_visitor<Action::ptr> {
     return Action::ptr(new CloseImageAction(action));
   }
 
+  inline Action::ptr operator()(const action::AioOpenImageAction &action) const {
+    return Action::ptr(new AioOpenImageAction(action));
+  }
+
+  inline Action::ptr operator()(const action::AioCloseImageAction &action) const {
+    return Action::ptr(new AioCloseImageAction(action));
+  }
+
   inline Action::ptr operator()(const action::UnknownAction &action) const {
     return Action::ptr();
   }
@@ -175,3 +183,35 @@ void CloseImageAction::perform(ActionCtx &worker) {
   worker.set_action_complete(pending_io_id());
 }
 
+void AioOpenImageAction::perform(ActionCtx &worker) {
+  dout(ACTION_LEVEL) << "Performing " << *this << dendl;
+  // TODO: Make it async
+  PendingIO::ptr io(new PendingIO(pending_io_id(), worker));
+  worker.add_pending(io);
+  librbd::Image *image = new librbd::Image();
+  librbd::RBD *rbd = worker.rbd();
+  rbd_loc name(worker.map_image_name(m_action.name, m_action.snap_name));
+  int r;
+  if (m_action.read_only || worker.readonly()) {
+    r = rbd->open_read_only(*worker.ioctx(), *image, name.image.c_str(), name.snap.c_str());
+  } else {
+    r = rbd->open(*worker.ioctx(), *image, name.image.c_str(), name.snap.c_str());
+  }
+  if (r) {
+    cerr << "Unable to open image '" << m_action.name
+	 << "' with snap '" << m_action.snap_name
+	 << "' (mapped to '" << name.str()
+	 << "') and readonly " << m_action.read_only
+	 << ": (" << -r << ") " << strerror(-r) << std::endl;
+    exit(1);
+  }
+  worker.put_image(m_action.imagectx_id, image);
+  worker.remove_pending(io);
+}
+
+void AioCloseImageAction::perform(ActionCtx &worker) {
+  dout(ACTION_LEVEL) << "Performing " << *this << dendl;
+  // TODO: Make it async
+  worker.erase_image(m_action.imagectx_id);
+  worker.set_action_complete(pending_io_id());
+}
diff --git a/src/rbd_replay/actions.hpp b/src/rbd_replay/actions.hpp
index ea46a88..eec655a 100644
--- a/src/rbd_replay/actions.hpp
+++ b/src/rbd_replay/actions.hpp
@@ -124,7 +124,7 @@ public:
 template <typename ActionType>
 class TypedAction : public Action {
 public:
-  TypedAction(const ActionType &action) : m_action(action) {
+  explicit TypedAction(const ActionType &action) : m_action(action) {
   }
 
   virtual action_id_t id() const {
@@ -193,7 +193,7 @@ protected:
 
 class AioReadAction : public TypedAction<action::AioReadAction> {
 public:
-  AioReadAction(const action::AioReadAction &action)
+  explicit AioReadAction(const action::AioReadAction &action)
     : TypedAction<action::AioReadAction>(action) {
   }
 
@@ -208,7 +208,7 @@ protected:
 
 class ReadAction : public TypedAction<action::ReadAction> {
 public:
-  ReadAction(const action::ReadAction &action)
+  explicit ReadAction(const action::ReadAction &action)
     : TypedAction<action::ReadAction>(action) {
   }
 
@@ -223,7 +223,7 @@ protected:
 
 class AioWriteAction : public TypedAction<action::AioWriteAction> {
 public:
-  AioWriteAction(const action::AioWriteAction &action)
+  explicit AioWriteAction(const action::AioWriteAction &action)
     : TypedAction<action::AioWriteAction>(action) {
   }
 
@@ -238,7 +238,7 @@ protected:
 
 class WriteAction : public TypedAction<action::WriteAction> {
 public:
-  WriteAction(const action::WriteAction &action)
+  explicit WriteAction(const action::WriteAction &action)
     : TypedAction<action::WriteAction>(action) {
   }
 
@@ -253,7 +253,7 @@ protected:
 
 class OpenImageAction : public TypedAction<action::OpenImageAction> {
 public:
-  OpenImageAction(const action::OpenImageAction &action)
+  explicit OpenImageAction(const action::OpenImageAction &action)
     : TypedAction<action::OpenImageAction>(action) {
   }
 
@@ -268,7 +268,7 @@ protected:
 
 class CloseImageAction : public TypedAction<action::CloseImageAction> {
 public:
-  CloseImageAction(const action::CloseImageAction &action)
+  explicit CloseImageAction(const action::CloseImageAction &action)
     : TypedAction<action::CloseImageAction>(action) {
   }
 
@@ -280,6 +280,35 @@ protected:
   }
 };
 
+class AioOpenImageAction : public TypedAction<action::AioOpenImageAction> {
+public:
+  explicit AioOpenImageAction(const action::AioOpenImageAction &action)
+    : TypedAction<action::AioOpenImageAction>(action) {
+  }
+
+  virtual void perform(ActionCtx &ctx);
+
+protected:
+  virtual const char *get_action_name() const {
+    return "AioOpenImageAction";
+  }
+};
+
+
+class AioCloseImageAction : public TypedAction<action::AioCloseImageAction> {
+public:
+  explicit AioCloseImageAction(const action::AioCloseImageAction &action)
+    : TypedAction<action::AioCloseImageAction>(action) {
+  }
+
+  virtual void perform(ActionCtx &ctx);
+
+protected:
+  virtual const char *get_action_name() const {
+    return "AioCloseImageAction";
+  }
+};
+
 }
 
 #endif
diff --git a/src/rbd_replay/ios.cc b/src/rbd_replay/ios.cc
index 7437bed..52d885a 100644
--- a/src/rbd_replay/ios.cc
+++ b/src/rbd_replay/ios.cc
@@ -158,3 +158,27 @@ void CloseImageIO::write_debug(std::ostream& out) const {
   write_debug_base(out, "close image");
   out << ", imagectx=" << m_imagectx;
 }
+
+void AioOpenImageIO::encode(bufferlist &bl) const {
+  action::Action action((action::AioOpenImageAction(
+    ionum(), thread_id(), convert_dependencies(start_time(), dependencies()),
+    m_imagectx, m_name, m_snap_name, m_readonly)));
+  ::encode(action, bl);
+}
+
+void AioOpenImageIO::write_debug(std::ostream& out) const {
+  write_debug_base(out, "aio open image");
+  out << ", imagectx=" << m_imagectx << ", name='" << m_name << "', snap_name='" << m_snap_name << "', readonly=" << m_readonly;
+}
+
+void AioCloseImageIO::encode(bufferlist &bl) const {
+  action::Action action((action::AioCloseImageAction(
+    ionum(), thread_id(), convert_dependencies(start_time(), dependencies()),
+    m_imagectx)));
+  ::encode(action, bl);
+}
+
+void AioCloseImageIO::write_debug(std::ostream& out) const {
+  write_debug_base(out, "aio close image");
+  out << ", imagectx=" << m_imagectx;
+}
diff --git a/src/rbd_replay/ios.hpp b/src/rbd_replay/ios.hpp
index 218717b..e6c0bf5 100644
--- a/src/rbd_replay/ios.hpp
+++ b/src/rbd_replay/ios.hpp
@@ -291,6 +291,61 @@ private:
   imagectx_id_t m_imagectx;
 };
 
+class AioOpenImageIO : public IO {
+public:
+  AioOpenImageIO(action_id_t ionum,
+		 uint64_t start_time,
+		 thread_id_t thread_id,
+		 const io_set_t& deps,
+		 imagectx_id_t imagectx,
+		 const std::string& name,
+		 const std::string& snap_name,
+		 bool readonly)
+    : IO(ionum, start_time, thread_id, deps),
+      m_imagectx(imagectx),
+      m_name(name),
+      m_snap_name(snap_name),
+      m_readonly(readonly) {
+  }
+
+  virtual void encode(bufferlist &bl) const;
+
+  imagectx_id_t imagectx() const {
+    return m_imagectx;
+  }
+
+  void write_debug(std::ostream& out) const;
+
+private:
+  imagectx_id_t m_imagectx;
+  std::string m_name;
+  std::string m_snap_name;
+  bool m_readonly;
+};
+
+class AioCloseImageIO : public IO {
+public:
+  AioCloseImageIO(action_id_t ionum,
+		  uint64_t start_time,
+		  thread_id_t thread_id,
+		  const io_set_t& deps,
+		  imagectx_id_t imagectx)
+    : IO(ionum, start_time, thread_id, deps),
+      m_imagectx(imagectx) {
+  }
+
+  virtual void encode(bufferlist &bl) const;
+
+  imagectx_id_t imagectx() const {
+    return m_imagectx;
+  }
+
+  void write_debug(std::ostream& out) const;
+
+private:
+  imagectx_id_t m_imagectx;
+};
+
 }
 
 #endif
diff --git a/src/rbd_replay/rbd-replay-prep.cc b/src/rbd_replay/rbd-replay-prep.cc
index 61cff59..c887813 100644
--- a/src/rbd_replay/rbd-replay-prep.cc
+++ b/src/rbd_replay/rbd-replay-prep.cc
@@ -153,7 +153,7 @@ public:
 	  usage_exit(args[0], "--window requires an argument");
 	}
 	m_window = (uint64_t)(1e9 * atof(args[++i].c_str()));
-      } else if (arg.find("--window=") == 0) {
+      } else if (arg.compare(0, 9, "--window=") == 0) {
 	m_window = (uint64_t)(1e9 * atof(arg.c_str() + sizeof("--window=")));
       } else if (arg == "--anonymize") {
 	m_anonymize = true;
@@ -162,7 +162,7 @@ public:
       } else if (arg == "-h" || arg == "--help") {
 	usage(args[0]);
 	exit(0);
-      } else if (arg.find("-") == 0) {
+      } else if (arg.compare(0, 1, "-") == 0) {
 	usage_exit(args[0], "Unrecognized argument: " + arg);
       } else if (!got_input) {
 	input_file_name = arg;
@@ -372,6 +372,29 @@ private:
       boost::shared_ptr<CloseImageIO> io(boost::dynamic_pointer_cast<CloseImageIO>(thread->latest_io()));
       assert(io);
       m_open_images.erase(io->imagectx());
+    } else if (strcmp(event_name, "librbd:aio_open_image_enter") == 0) {
+      string name(fields.string("name"));
+      string snap_name(fields.string("snap_name"));
+      bool readonly = fields.uint64("read_only");
+      imagectx_id_t imagectx = fields.uint64("imagectx");
+      uint64_t completion = fields.uint64("completion");
+      action_id_t ionum = next_id();
+      pair<string, string> aname(map_image_snap(name, snap_name));
+      IO::ptr io(new AioOpenImageIO(ionum, ts, threadID, m_recent_completions,
+				    imagectx, aname.first, aname.second,
+				    readonly));
+      thread->issued_io(io, &m_latest_ios);
+      ios->push_back(io);
+      m_pending_ios[completion] = io;
+    } else if (strcmp(event_name, "librbd:aio_close_image_enter") == 0) {
+      imagectx_id_t imagectx = fields.uint64("imagectx");
+      uint64_t completion = fields.uint64("completion");
+      action_id_t ionum = next_id();
+      IO::ptr io(new AioCloseImageIO(ionum, ts, threadID, m_recent_completions,
+				     imagectx));
+      thread->issued_io(io, &m_latest_ios);
+      ios->push_back(thread->latest_io());
+      m_pending_ios[completion] = io;
     } else if (strcmp(event_name, "librbd:read_enter") == 0 ||
                strcmp(event_name, "librbd:read2_enter") == 0) {
       string name(fields.string("name"));
diff --git a/src/rgw/Makefile.am b/src/rgw/Makefile.am
index 98cd4b0..0fcc93d 100644
--- a/src/rgw/Makefile.am
+++ b/src/rgw/Makefile.am
@@ -22,6 +22,7 @@ librgw_la_SOURCES =  \
 	rgw/rgw_xml.cc \
 	rgw/rgw_usage.cc \
 	rgw/rgw_json_enc.cc \
+	rgw/rgw_xml_enc.cc \
 	rgw/rgw_user.cc \
 	rgw/rgw_bucket.cc\
 	rgw/rgw_tools.cc \
@@ -48,7 +49,8 @@ librgw_la_SOURCES =  \
 	rgw/rgw_keystone.cc \
 	rgw/rgw_quota.cc \
 	rgw/rgw_dencoder.cc \
-	rgw/rgw_object_expirer_core.cc
+	rgw/rgw_object_expirer_core.cc \
+	rgw/rgw_website.cc
 librgw_la_CXXFLAGS = -Woverloaded-virtual ${AM_CXXFLAGS}
 noinst_LTLIBRARIES += librgw.la
 
@@ -179,6 +181,8 @@ noinst_HEADERS += \
 	rgw/rgw_keystone.h \
 	rgw/rgw_civetweb.h \
 	rgw/rgw_civetweb_log.h \
+	rgw/rgw_website.h \
+	rgw/rgw_rest_s3website.h \
 	civetweb/civetweb.h \
 	civetweb/include/civetweb.h \
 	civetweb/include/civetweb_conf.h \
diff --git a/src/rgw/librgw.cc b/src/rgw/librgw.cc
index 556f57f..f9b39ae 100644
--- a/src/rgw/librgw.cc
+++ b/src/rgw/librgw.cc
@@ -33,7 +33,8 @@ int librgw_create(librgw_t *rgw, const char * const id)
   if (id) {
     iparams.name.set(CEPH_ENTITY_TYPE_CLIENT, id);
   }
-  CephContext *cct = common_preinit(iparams, CODE_ENVIRONMENT_LIBRARY, 0);
+  CephContext *cct = common_preinit(iparams, CODE_ENVIRONMENT_LIBRARY, 0,
+				    "rgw_data");
   cct->_conf->set_val("log_to_stderr", "false"); // quiet by default
   cct->_conf->set_val("err_to_stderr", "true"); // quiet by default
   cct->_conf->parse_env(); // environment variables override
diff --git a/src/rgw/rgw_acl.cc b/src/rgw/rgw_acl.cc
index d117caa..8c0066e 100644
--- a/src/rgw/rgw_acl.cc
+++ b/src/rgw/rgw_acl.cc
@@ -82,7 +82,7 @@ int RGWAccessControlPolicy::get_perm(rgw_user& id, int perm_mask) {
   if ((perm & perm_mask) != perm_mask) {
     perm |= acl.get_group_perm(ACL_GROUP_ALL_USERS, perm_mask);
 
-    if (!compare_group_name(id.id, ACL_GROUP_ALL_USERS)) {
+    if (id.compare(RGW_USER_ANON_ID)) {
       /* this is not the anonymous user */
       perm |= acl.get_group_perm(ACL_GROUP_AUTHENTICATED_USERS, perm_mask);
     }
diff --git a/src/rgw/rgw_acl.h b/src/rgw/rgw_acl.h
index fc2a7ef..fe7249c 100644
--- a/src/rgw/rgw_acl.h
+++ b/src/rgw/rgw_acl.h
@@ -195,7 +195,7 @@ protected:
   multimap<string, ACLGrant> grant_map;
   void _add_grant(ACLGrant *grant);
 public:
-  RGWAccessControlList(CephContext *_cct) : cct(_cct) {}
+  explicit RGWAccessControlList(CephContext *_cct) : cct(_cct) {}
   RGWAccessControlList() : cct(NULL) {}
 
   void set_ctx(CephContext *ctx) {
@@ -293,7 +293,7 @@ protected:
   ACLOwner owner;
 
 public:
-  RGWAccessControlPolicy(CephContext *_cct) : cct(_cct), acl(_cct) {}
+  explicit RGWAccessControlPolicy(CephContext *_cct) : cct(_cct), acl(_cct) {}
   RGWAccessControlPolicy() : cct(NULL), acl(NULL) {}
   virtual ~RGWAccessControlPolicy() {}
 
diff --git a/src/rgw/rgw_acl_s3.h b/src/rgw/rgw_acl_s3.h
index 694cc1d..209a17b 100644
--- a/src/rgw/rgw_acl_s3.h
+++ b/src/rgw/rgw_acl_s3.h
@@ -9,8 +9,6 @@
 #include <iosfwd>
 #include <include/types.h>
 
-#include <expat.h>
-
 #include "include/str_list.h"
 #include "rgw_xml.h"
 #include "rgw_acl.h"
@@ -57,7 +55,7 @@ public:
 class RGWAccessControlList_S3 : public RGWAccessControlList, public XMLObj
 {
 public:
-  RGWAccessControlList_S3(CephContext *_cct) : RGWAccessControlList(_cct) {}
+  explicit RGWAccessControlList_S3(CephContext *_cct) : RGWAccessControlList(_cct) {}
   ~RGWAccessControlList_S3() {}
 
   bool xml_end(const char *el);
@@ -82,7 +80,7 @@ class RGWEnv;
 class RGWAccessControlPolicy_S3 : public RGWAccessControlPolicy, public XMLObj
 {
 public:
-  RGWAccessControlPolicy_S3(CephContext *_cct) : RGWAccessControlPolicy(_cct) {}
+  explicit RGWAccessControlPolicy_S3(CephContext *_cct) : RGWAccessControlPolicy(_cct) {}
   ~RGWAccessControlPolicy_S3() {}
 
   bool xml_end(const char *el);
@@ -110,7 +108,7 @@ class RGWACLXMLParser_S3 : public RGWXMLParser
 
   XMLObj *alloc_obj(const char *el);
 public:
-  RGWACLXMLParser_S3(CephContext *_cct) : cct(_cct) {}
+  explicit RGWACLXMLParser_S3(CephContext *_cct) : cct(_cct) {}
 };
 
 #endif
diff --git a/src/rgw/rgw_acl_swift.cc b/src/rgw/rgw_acl_swift.cc
index 1f8f1ab..a9729e3 100644
--- a/src/rgw/rgw_acl_swift.cc
+++ b/src/rgw/rgw_acl_swift.cc
@@ -21,16 +21,18 @@ using namespace std;
 static int parse_list(string& uid_list, list<string>& uids)
 {
   char *s = strdup(uid_list.c_str());
-  if (!s)
+  if (!s) {
     return -ENOMEM;
+  }
 
-  const char *p = strtok(s, " ,");
+  char *tokctx;
+  const char *p = strtok_r(s, " ,", &tokctx);
   while (p) {
     if (*p) {
       string acl = p;
       uids.push_back(acl);
     }
-    p = strtok(NULL, " ,");
+    p = strtok_r(NULL, " ,", &tokctx);
   }
   free(s);
   return 0;
diff --git a/src/rgw/rgw_acl_swift.h b/src/rgw/rgw_acl_swift.h
index cbadfaa..ba72d7b 100644
--- a/src/rgw/rgw_acl_swift.h
+++ b/src/rgw/rgw_acl_swift.h
@@ -16,7 +16,7 @@ using namespace std;
 class RGWAccessControlPolicy_SWIFT : public RGWAccessControlPolicy
 {
 public:
-  RGWAccessControlPolicy_SWIFT(CephContext *_cct) : RGWAccessControlPolicy(_cct) {}
+  explicit RGWAccessControlPolicy_SWIFT(CephContext *_cct) : RGWAccessControlPolicy(_cct) {}
   ~RGWAccessControlPolicy_SWIFT() {}
 
   void add_grants(RGWRados *store, list<string>& uids, int perm);
diff --git a/src/rgw/rgw_admin.cc b/src/rgw/rgw_admin.cc
index c9365fa..115dc0a 100644
--- a/src/rgw/rgw_admin.cc
+++ b/src/rgw/rgw_admin.cc
@@ -117,6 +117,8 @@ void _usage()
   cout << "  replicalog get             get replica metadata log entry\n";
   cout << "  replicalog update          update replica metadata log entry\n";
   cout << "  replicalog delete          delete replica metadata log entry\n";
+  cout << "  orphans find               init and run search for leaked rados objects\n";
+  cout << "  orphans finish             clean up search for leaked rados objects\n";
   cout << "options:\n";
   cout << "   --uid=<id>                user id\n";
   cout << "   --subuser=<name>          subuser name\n";
@@ -180,6 +182,9 @@ void _usage()
   cout << "   --max-objects             specify max objects (negative value to disable)\n";
   cout << "   --max-size                specify max size (in bytes, negative value to disable)\n";
   cout << "   --quota-scope             scope of quota (bucket, user)\n";
+  cout << "\nOrphans search options:\n";
+  cout << "   --pool                    data pool to scan for leaked rados objects in\n";
+  cout << "   --num-shards              num of shards to use for keeping the temporary scan info\n";
   cout << "\n";
   generic_client_usage();
 }
@@ -625,7 +630,7 @@ int bucket_stats(rgw_bucket& bucket, Formatter *formatter)
 class StoreDestructor {
   RGWRados *store;
 public:
-  StoreDestructor(RGWRados *_s) : store(_s) {}
+  explicit StoreDestructor(RGWRados *_s) : store(_s) {}
   ~StoreDestructor() {
     RGWStoreManager::close_storage(store);
   }
diff --git a/src/rgw/rgw_basic_types.h b/src/rgw/rgw_basic_types.h
index 4a9c42c..8538af6 100644
--- a/src/rgw/rgw_basic_types.h
+++ b/src/rgw/rgw_basic_types.h
@@ -10,6 +10,7 @@ struct rgw_user {
   std::string id;
 
   rgw_user() {}
+  // cppcheck-suppress noExplicitConstructor
   rgw_user(const std::string& s) {
     from_str(s);
   }
diff --git a/src/rgw/rgw_bucket.cc b/src/rgw/rgw_bucket.cc
index 248dc83..3e4e854 100644
--- a/src/rgw/rgw_bucket.cc
+++ b/src/rgw/rgw_bucket.cc
@@ -46,7 +46,9 @@ void rgw_get_buckets_obj(const rgw_user& user_id, string& buckets_obj_id)
  * with the legacy or S3 buckets.
  */
 void rgw_make_bucket_entry_name(const string& tenant_name, const string& bucket_name, string& bucket_entry) {
-  if (tenant_name.empty()) {
+  if (bucket_name.empty()) {
+    bucket_entry.clear();
+  } else if (tenant_name.empty()) {
     bucket_entry = bucket_name;
   } else {
     bucket_entry = tenant_name + "/" + bucket_name;
@@ -56,7 +58,9 @@ void rgw_make_bucket_entry_name(const string& tenant_name, const string& bucket_
 string rgw_make_bucket_entry_name(const string& tenant_name, const string& bucket_name) {
   string bucket_entry;
 
-  if (tenant_name.empty()) {
+  if (bucket_name.empty()) {
+    bucket_entry.clear();
+  } else if (tenant_name.empty()) {
     bucket_entry = bucket_name;
   } else {
     bucket_entry = tenant_name + "/" + bucket_name;
@@ -69,15 +73,22 @@ string rgw_make_bucket_entry_name(const string& tenant_name, const string& bucke
  * Tenants are separated from buckets in URLs by a colon in S3.
  * This function is not to be used on Swift URLs, not even for COPY arguments.
  */
-void rgw_parse_url_bucket(const string &bucket,
+void rgw_parse_url_bucket(const string &bucket, const string& auth_tenant,
                           string &tenant_name, string &bucket_name) {
+
   int pos = bucket.find(':');
   if (pos >= 0) {
+    /*
+     * N.B.: We allow ":bucket" syntax with explicit empty tenant in order
+     * to refer to the legacy tenant, in case users in new named tenants
+     * want to access old global buckets.
+     */
     tenant_name = bucket.substr(0, pos);
+    bucket_name = bucket.substr(pos + 1);
   } else {
-    tenant_name.clear();
+    tenant_name = auth_tenant;
+    bucket_name = bucket;
   }
-  bucket_name = bucket.substr(pos + 1);
 }
 
 /**
diff --git a/src/rgw/rgw_bucket.h b/src/rgw/rgw_bucket.h
index a905010..61f5c62 100644
--- a/src/rgw/rgw_bucket.h
+++ b/src/rgw/rgw_bucket.h
@@ -49,7 +49,8 @@ extern void rgw_make_bucket_entry_name(const string& tenant_name,
                                        string& bucket_entry);
 extern string rgw_make_bucket_entry_name(const string& tenant_name,
                                        const string& bucket_name);
-extern void rgw_parse_url_bucket(const string &bucket,
+extern void rgw_parse_url_bucket(const string& bucket,
+                                 const string& auth_tenant,
                                  string &tenant_name, string &bucket_name);
 
 /**
diff --git a/src/rgw/rgw_client_io.cc b/src/rgw/rgw_client_io.cc
index 1f8b803..b6ef745 100644
--- a/src/rgw/rgw_client_io.cc
+++ b/src/rgw/rgw_client_io.cc
@@ -51,6 +51,10 @@ int RGWClientIO::print(const char *format, ...)
 
 int RGWClientIO::write(const char *buf, int len)
 {
+  if (len == 0) {
+    return 0;
+  }
+
   int ret = write_data(buf, len);
   if (ret < 0)
     return ret;
diff --git a/src/rgw/rgw_client_io.h b/src/rgw/rgw_client_io.h
index 46e6684..ac610c6 100644
--- a/src/rgw/rgw_client_io.h
+++ b/src/rgw/rgw_client_io.h
@@ -107,7 +107,7 @@ class RGWClientIOStream : private RGWClientIOStreamBuf, public std::istream {
  * ctor is being called prior to construction of any member of this class. */
 
 public:
-  RGWClientIOStream(RGWClientIO &c)
+  explicit RGWClientIOStream(RGWClientIO &c)
     : RGWClientIOStreamBuf(c, 1, 2),
       istream(static_cast<RGWClientIOStreamBuf *>(this)) {
   }
diff --git a/src/rgw/rgw_common.cc b/src/rgw/rgw_common.cc
index dfd3105..80b6c1b 100644
--- a/src/rgw/rgw_common.cc
+++ b/src/rgw/rgw_common.cc
@@ -612,6 +612,7 @@ int RGWHTTPArgs::parse()
           (name.compare("versionId") == 0) ||
           (name.compare("versions") == 0) ||
           (name.compare("versioning") == 0) ||
+          (name.compare("website") == 0) ||
           (name.compare("requestPayment") == 0) ||
           (name.compare("torrent") == 0)) {
         sub_resources[name] = val;
diff --git a/src/rgw/rgw_common.h b/src/rgw/rgw_common.h
index 5989352..85405de 100644
--- a/src/rgw/rgw_common.h
+++ b/src/rgw/rgw_common.h
@@ -4,6 +4,7 @@
  * Ceph - scalable distributed file system
  *
  * Copyright (C) 2004-2009 Sage Weil <sage at newdream.net>
+ * Copyright (C) 2015 Yehuda Sadeh <yehuda at redhat.com>
  *
  * This is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
@@ -32,6 +33,7 @@
 #include "rgw_cors.h"
 #include "rgw_quota.h"
 #include "rgw_string.h"
+#include "rgw_website.h"
 #include "cls/version/cls_version_types.h"
 #include "cls/user/cls_user_types.h"
 #include "cls/rgw/cls_rgw_types.h"
@@ -51,7 +53,9 @@ using ceph::crypto::MD5;
 #define RGW_HTTP_RGWX_ATTR_PREFIX "RGWX_ATTR_"
 #define RGW_HTTP_RGWX_ATTR_PREFIX_OUT "Rgwx-Attr-"
 
-#define RGW_AMZ_META_PREFIX "x-amz-meta-"
+#define RGW_AMZ_PREFIX "x-amz-"
+#define RGW_AMZ_META_PREFIX RGW_AMZ_PREFIX "meta-"
+#define RGW_AMZ_WEBSITE_REDIRECT_LOCATION RGW_AMZ_PREFIX "website-redirect-location"
 
 #define RGW_SYS_PARAM_PREFIX "rgwx-"
 
@@ -71,6 +75,7 @@ using ceph::crypto::MD5;
 #define RGW_ATTR_SHADOW_OBJ    	RGW_ATTR_PREFIX "shadow_name"
 #define RGW_ATTR_MANIFEST    	RGW_ATTR_PREFIX "manifest"
 #define RGW_ATTR_USER_MANIFEST  RGW_ATTR_PREFIX "user_manifest"
+#define RGW_ATTR_AMZ_WEBSITE_REDIRECT_LOCATION	RGW_ATTR_PREFIX RGW_AMZ_WEBSITE_REDIRECT_LOCATION
 #define RGW_ATTR_SLO_MANIFEST   RGW_ATTR_PREFIX "slo_manifest"
 /* Information whether an object is SLO or not must be exposed to
  * user through custom HTTP header named X-Static-Large-Object. */
@@ -94,6 +99,7 @@ using ceph::crypto::MD5;
 #define RGW_FORMAT_PLAIN        0
 #define RGW_FORMAT_XML          1
 #define RGW_FORMAT_JSON         2
+#define RGW_FORMAT_HTML         3
 
 #define RGW_CAP_READ            0x1
 #define RGW_CAP_WRITE           0x2
@@ -101,6 +107,8 @@ using ceph::crypto::MD5;
 
 #define RGW_REST_SWIFT          0x1
 #define RGW_REST_SWIFT_AUTH     0x2
+#define RGW_REST_S3             0x4
+#define RGW_REST_WEBSITE     0x8
 
 #define RGW_SUSPENDED_USER_AUID (uint64_t)-2
 
@@ -161,6 +169,9 @@ using ceph::crypto::MD5;
 #define ERR_INVALID_SECRET_KEY   2034
 #define ERR_INVALID_KEY_TYPE     2035
 #define ERR_INVALID_CAP          2036
+#define ERR_INVALID_TENANT_NAME  2037
+#define ERR_WEBSITE_REDIRECT     2038
+#define ERR_NO_SUCH_WEBSITE_CONFIGURATION 2039
 #define ERR_USER_SUSPENDED       2100
 #define ERR_INTERNAL_ERROR       2200
 #define ERR_NOT_IMPLEMENTED      2201
@@ -249,7 +260,7 @@ class NameVal
    string name;
    string val;
  public:
-    NameVal(string nv) : str(nv) {}
+    explicit NameVal(string nv) : str(nv) {}
 
     int parse();
 
@@ -268,6 +279,7 @@ class RGWHTTPArgs
   bool has_resp_modifier;
  public:
   RGWHTTPArgs() : has_resp_modifier(false) {}
+
   /** Set the arguments; as received */
   void set(string s) {
     has_resp_modifier = false;
@@ -610,14 +622,12 @@ struct rgw_bucket {
                     */
 
   rgw_bucket() { }
-  rgw_bucket(const cls_user_bucket& b) {
-    name = b.name;
-    data_pool = b.data_pool;
-    data_extra_pool = b.data_extra_pool;
-    index_pool = b.index_pool;
-    marker = b.marker;
-    bucket_id = b.bucket_id;
-  }
+  // cppcheck-suppress noExplicitConstructor
+  rgw_bucket(const cls_user_bucket& b) : name(b.name), data_pool(b.data_pool),
+					 data_extra_pool(b.data_extra_pool),
+					 index_pool(b.index_pool), marker(b.marker),
+					 bucket_id(b.bucket_id) {}
+  // cppcheck-suppress noExplicitConstructor
   rgw_bucket(const char *n) : name(n) {
     assert(*n == '.'); // only rgw private buckets should be initialized without pool
     data_pool = index_pool = n;
@@ -809,8 +819,11 @@ struct RGWBucketInfo
 
   bool requester_pays;
 
+  bool has_website;
+  RGWBucketWebsiteConf website_conf;
+
   void encode(bufferlist& bl) const {
-     ENCODE_START(13, 4, bl);
+     ENCODE_START(14, 4, bl);
      ::encode(bucket, bl);
      ::encode(owner.id, bl);
      ::encode(flags, bl);
@@ -824,10 +837,14 @@ struct RGWBucketInfo
      ::encode(bucket_index_shard_hash_type, bl);
      ::encode(requester_pays, bl);
      ::encode(owner.tenant, bl);
+     ::encode(has_website, bl);
+     if (has_website) {
+       ::encode(website_conf, bl);
+     }
      ENCODE_FINISH(bl);
   }
   void decode(bufferlist::iterator& bl) {
-    DECODE_START_LEGACY_COMPAT_LEN_32(13, 4, 4, bl);
+    DECODE_START_LEGACY_COMPAT_LEN_32(14, 4, 4, bl);
      ::decode(bucket, bl);
      if (struct_v >= 2) {
        string s;
@@ -857,6 +874,14 @@ struct RGWBucketInfo
        ::decode(requester_pays, bl);
      if (struct_v >= 13)
        ::decode(owner.tenant, bl);
+     if (struct_v >= 14) {
+       ::decode(has_website, bl);
+       if (has_website) {
+         ::decode(website_conf, bl);
+       } else {
+         website_conf = RGWBucketWebsiteConf();
+       }
+     }
      DECODE_FINISH(bl);
   }
   void dump(Formatter *f) const;
@@ -868,7 +893,8 @@ struct RGWBucketInfo
   int versioning_status() { return flags & (BUCKET_VERSIONED | BUCKET_VERSIONS_SUSPENDED); }
   bool versioning_enabled() { return versioning_status() == BUCKET_VERSIONED; }
 
-  RGWBucketInfo() : flags(0), creation_time(0), has_instance_obj(false), num_shards(0), bucket_index_shard_hash_type(MOD), requester_pays(false) {}
+  RGWBucketInfo() : flags(0), creation_time(0), has_instance_obj(false), num_shards(0), bucket_index_shard_hash_type(MOD), requester_pays(false),
+                    has_website(false) {}
 };
 WRITE_CLASS_ENCODER(RGWBucketInfo)
 
@@ -962,6 +988,7 @@ struct rgw_obj_key {
   string instance;
 
   rgw_obj_key() {}
+  // cppcheck-suppress noExplicitConstructor
   rgw_obj_key(const string& n) {
     set(n);
   }
@@ -969,6 +996,7 @@ struct rgw_obj_key {
     set(n, i);
   }
 
+  // cppcheck-suppress noExplicitConstructor
   rgw_obj_key(const cls_rgw_obj_key& k) {
     set(k);
   }
@@ -1034,6 +1062,12 @@ inline ostream& operator<<(ostream& out, const rgw_obj_key &o) {
   }
 }
 
+struct req_init_state {
+  /* Keeps [[tenant]:]bucket until we parse the token. */
+  string url_bucket;
+  string src_bucket;
+};
+
 /** Store all the state necessary to complete and respond to an HTTP request*/
 struct req_state {
    CephContext *cct;
@@ -1057,7 +1091,7 @@ struct req_state {
    uint32_t perm_mask;
    utime_t header_time;
 
-   /* Set once when req_state is initialized and not violated thereafter */
+   /* Set once when url_bucket is parsed and not violated thereafter. */
    string bucket_tenant;
    string bucket_name;
 
@@ -1072,6 +1106,8 @@ struct req_state {
    string region_endpoint;
    string bucket_instance_id;
 
+   string redirect;
+
    RGWBucketInfo bucket_info;
    map<string, bufferlist> bucket_attrs;
    bool bucket_exists;
@@ -1106,6 +1142,7 @@ struct req_state {
    string trans_id;
 
    req_info info;
+   req_init_state init_state;
 
    req_state(CephContext *_cct, class RGWEnv *e);
    ~req_state();
@@ -1150,13 +1187,11 @@ struct RGWBucketEnt {
 
   RGWBucketEnt() : size(0), size_rounded(0), creation_time(0), count(0) {}
 
-  RGWBucketEnt(const cls_user_bucket_entry& e) {
-    bucket = e.bucket;
-    size = e.size;
-    size_rounded = e.size_rounded;
-    creation_time = e.creation_time;
-    count = e.count;
-  }
+  explicit RGWBucketEnt(const cls_user_bucket_entry& e) : bucket(e.bucket),
+		  					  size(e.size), 
+			  				  size_rounded(e.size_rounded),
+							  creation_time(e.creation_time),
+							  count(e.count) {}
 
   void convert(cls_user_bucket_entry *b) {
     bucket.convert(&b->bucket);
diff --git a/src/rgw/rgw_cors_s3.h b/src/rgw/rgw_cors_s3.h
index 3a96160..818d02f 100644
--- a/src/rgw/rgw_cors_s3.h
+++ b/src/rgw/rgw_cors_s3.h
@@ -53,6 +53,6 @@ class RGWCORSXMLParser_S3 : public RGWXMLParser
 
   XMLObj *alloc_obj(const char *el);
 public:
-  RGWCORSXMLParser_S3(CephContext *_cct) : cct(_cct) {}
+  explicit RGWCORSXMLParser_S3(CephContext *_cct) : cct(_cct) {}
 };
 #endif /*CEPH_RGW_CORS_S3_H*/
diff --git a/src/rgw/rgw_fcgi.h b/src/rgw/rgw_fcgi.h
index 88889b5..92171f4 100644
--- a/src/rgw/rgw_fcgi.h
+++ b/src/rgw/rgw_fcgi.h
@@ -27,7 +27,7 @@ protected:
   int complete_request() { return 0; }
   int send_content_length(uint64_t len);
 public:
-  RGWFCGX(FCGX_Request *_fcgx) : fcgx(_fcgx), status_num(0) {}
+  explicit RGWFCGX(FCGX_Request *_fcgx) : fcgx(_fcgx), status_num(0) {}
   void flush();
 };
 
diff --git a/src/rgw/rgw_formats.h b/src/rgw/rgw_formats.h
index 43c087d..9df5251 100644
--- a/src/rgw/rgw_formats.h
+++ b/src/rgw/rgw_formats.h
@@ -22,9 +22,12 @@ struct plain_stack_entry {
 class RGWFormatter_Plain : public Formatter {
   void reset_buf();
 public:
-  RGWFormatter_Plain(bool use_kv = false);
+  explicit RGWFormatter_Plain(bool use_kv = false);
   virtual ~RGWFormatter_Plain();
 
+  virtual void set_status(int status, const char* status_name) {};
+  virtual void output_header() {};
+  virtual void output_footer() {};
   virtual void flush(ostream& os);
   virtual void reset();
 
@@ -66,7 +69,7 @@ protected:
     formatter = f;
   }
 public:
-  RGWFormatterFlusher(Formatter *f) : formatter(f), flushed(false), started(false) {}
+  explicit RGWFormatterFlusher(Formatter *f) : formatter(f), flushed(false), started(false) {}
   virtual ~RGWFormatterFlusher() {}
 
   void flush() {
diff --git a/src/rgw/rgw_http_client.h b/src/rgw/rgw_http_client.h
index 9e0e314..705b848 100644
--- a/src/rgw/rgw_http_client.h
+++ b/src/rgw/rgw_http_client.h
@@ -18,7 +18,7 @@ protected:
   list<pair<string, string> > headers;
 public:
   virtual ~RGWHTTPClient() {}
-  RGWHTTPClient(CephContext *_cct): send_len (0), has_send_len(false), cct(_cct) {}
+  explicit RGWHTTPClient(CephContext *_cct): send_len (0), has_send_len(false), cct(_cct) {}
 
   void append_header(const string& name, const string& val) {
     headers.push_back(pair<string, string>(name, val));
diff --git a/src/rgw/rgw_http_errors.h b/src/rgw/rgw_http_errors.h
index fbe805f..8ff290d 100644
--- a/src/rgw/rgw_http_errors.h
+++ b/src/rgw/rgw_http_errors.h
@@ -19,6 +19,7 @@ const static struct rgw_http_errors RGW_HTTP_ERRORS[] = {
     { STATUS_NO_CONTENT, 204, "NoContent" },
     { STATUS_PARTIAL_CONTENT, 206, "" },
     { ERR_PERMANENT_REDIRECT, 301, "PermanentRedirect" },
+    { ERR_WEBSITE_REDIRECT, 301, "WebsiteRedirect" },
     { STATUS_REDIRECT, 303, "" },
     { ERR_NOT_MODIFIED, 304, "NotModified" },
     { EINVAL, 400, "InvalidArgument" },
@@ -45,6 +46,7 @@ const static struct rgw_http_errors RGW_HTTP_ERRORS[] = {
     { ERR_QUOTA_EXCEEDED, 403, "QuotaExceeded" },
     { ENOENT, 404, "NoSuchKey" },
     { ERR_NO_SUCH_BUCKET, 404, "NoSuchBucket" },
+    { ERR_NO_SUCH_WEBSITE_CONFIGURATION, 404, "NoSuchWebsiteConfiguration" },
     { ERR_NO_SUCH_UPLOAD, 404, "NoSuchUpload" },
     { ERR_NOT_FOUND, 404, "Not Found"},
     { ERR_METHOD_NOT_ALLOWED, 405, "MethodNotAllowed" },
@@ -56,6 +58,7 @@ const static struct rgw_http_errors RGW_HTTP_ERRORS[] = {
     { ERR_INVALID_SECRET_KEY, 400, "InvalidSecretKey"},
     { ERR_INVALID_KEY_TYPE, 400, "InvalidKeyType"},
     { ERR_INVALID_CAP, 400, "InvalidCapability"},
+    { ERR_INVALID_TENANT_NAME, 400, "InvalidTenantName" },
     { ENOTEMPTY, 409, "BucketNotEmpty" },
     { ERR_PRECONDITION_FAILED, 412, "PreconditionFailed" },
     { ERANGE, 416, "InvalidRange" },
@@ -90,6 +93,7 @@ const static struct rgw_http_status_code http_codes[] = {
   { 207, "Multi Status" },
   { 208, "Already Reported" },
   { 300, "Multiple Choices" },
+  { 301, "Moved Permanently" },
   { 302, "Found" },
   { 303, "See Other" },
   { 304, "Not Modified" },
diff --git a/src/rgw/rgw_json_enc.cc b/src/rgw/rgw_json_enc.cc
index e18bbb6..3e6a031 100644
--- a/src/rgw/rgw_json_enc.cc
+++ b/src/rgw/rgw_json_enc.cc
@@ -543,6 +543,80 @@ void RGWStorageStats::dump(Formatter *f) const
   encode_json("num_objects", num_objects, f);
 }
 
+void RGWRedirectInfo::dump(Formatter *f) const
+{
+  encode_json("protocol", protocol, f);
+  encode_json("hostname", hostname, f);
+  encode_json("http_redirect_code", (int)http_redirect_code, f);
+}
+
+void RGWRedirectInfo::decode_json(JSONObj *obj) {
+  JSONDecoder::decode_json("protocol", protocol, obj);
+  JSONDecoder::decode_json("hostname", hostname, obj);
+  int code;
+  JSONDecoder::decode_json("http_redirect_code", code, obj);
+  http_redirect_code = code;
+}
+
+void RGWBWRedirectInfo::dump(Formatter *f) const
+{
+  encode_json("redirect", redirect, f);
+  encode_json("replace_key_prefix_with", replace_key_prefix_with, f);
+  encode_json("replace_key_with", replace_key_with, f);
+}
+
+void RGWBWRedirectInfo::decode_json(JSONObj *obj) {
+  JSONDecoder::decode_json("redirect", redirect, obj);
+  JSONDecoder::decode_json("replace_key_prefix_with", replace_key_prefix_with, obj);
+  JSONDecoder::decode_json("replace_key_with", replace_key_with, obj);
+}
+
+void RGWBWRoutingRuleCondition::dump(Formatter *f) const
+{
+  encode_json("key_prefix_equals", key_prefix_equals, f);
+  encode_json("http_error_code_returned_equals", (int)http_error_code_returned_equals, f);
+}
+
+void RGWBWRoutingRuleCondition::decode_json(JSONObj *obj) {
+  JSONDecoder::decode_json("key_prefix_equals", key_prefix_equals, obj);
+  int code;
+  JSONDecoder::decode_json("http_error_code_returned_equals", code, obj);
+  http_error_code_returned_equals = code;
+}
+
+void RGWBWRoutingRule::dump(Formatter *f) const
+{
+  encode_json("condition", condition, f);
+  encode_json("redirect_info", redirect_info, f);
+}
+
+void RGWBWRoutingRule::decode_json(JSONObj *obj) {
+  JSONDecoder::decode_json("condition", condition, obj);
+  JSONDecoder::decode_json("redirect_info", redirect_info, obj);
+}
+
+void RGWBWRoutingRules::dump(Formatter *f) const
+{
+  encode_json("rules", rules, f);
+}
+
+void RGWBWRoutingRules::decode_json(JSONObj *obj) {
+  JSONDecoder::decode_json("rules", rules, obj);
+}
+
+void RGWBucketWebsiteConf::dump(Formatter *f) const
+{
+  encode_json("index_doc_suffix", index_doc_suffix, f);
+  encode_json("error_doc", error_doc, f);
+  encode_json("routing_rules", routing_rules, f);
+}
+
+void RGWBucketWebsiteConf::decode_json(JSONObj *obj) {
+  JSONDecoder::decode_json("index_doc_suffix", index_doc_suffix, obj);
+  JSONDecoder::decode_json("error_doc", error_doc, obj);
+  JSONDecoder::decode_json("routing_rules", routing_rules, obj);
+}
+
 void RGWBucketInfo::dump(Formatter *f) const
 {
   encode_json("bucket", bucket, f);
@@ -556,6 +630,10 @@ void RGWBucketInfo::dump(Formatter *f) const
   encode_json("num_shards", num_shards, f);
   encode_json("bi_shard_hash_type", (uint32_t)bucket_index_shard_hash_type, f);
   encode_json("requester_pays", requester_pays, f);
+  encode_json("has_website", has_website, f);
+  if (has_website) {
+    encode_json("website_conf", website_conf, f);
+  }
 }
 
 void RGWBucketInfo::decode_json(JSONObj *obj) {
@@ -572,6 +650,10 @@ void RGWBucketInfo::decode_json(JSONObj *obj) {
   JSONDecoder::decode_json("bi_shard_hash_type", hash_type, obj);
   bucket_index_shard_hash_type = (uint8_t)hash_type;
   JSONDecoder::decode_json("requester_pays", requester_pays, obj);
+  JSONDecoder::decode_json("has_website", has_website, obj);
+  if (has_website) {
+    JSONDecoder::decode_json("website_conf", website_conf, obj);
+  }
 }
 
 void RGWObjEnt::dump(Formatter *f) const
@@ -709,6 +791,7 @@ void RGWRegion::dump(Formatter *f) const
   encode_json("is_master", is_master, f);
   encode_json("endpoints", endpoints, f);
   encode_json("hostnames", hostnames, f);
+  encode_json("hostnames_s3website", hostnames_s3website, f);
   encode_json("master_zone", master_zone, f);
   encode_json_map("zones", zones, f); /* more friendly representation */
   encode_json_map("placement_targets", placement_targets, f); /* more friendly representation */
@@ -737,6 +820,7 @@ void RGWRegion::decode_json(JSONObj *obj)
   JSONDecoder::decode_json("is_master", is_master, obj);
   JSONDecoder::decode_json("endpoints", endpoints, obj);
   JSONDecoder::decode_json("hostnames", hostnames, obj);
+  JSONDecoder::decode_json("hostnames_s3website", hostnames_s3website, obj);
   JSONDecoder::decode_json("master_zone", master_zone, obj);
   JSONDecoder::decode_json("zones", zones, decode_zones, obj);
   JSONDecoder::decode_json("placement_targets", placement_targets, decode_placement_targets, obj);
diff --git a/src/rgw/rgw_loadgen.h b/src/rgw/rgw_loadgen.h
index e5636ed..8751315 100644
--- a/src/rgw/rgw_loadgen.h
+++ b/src/rgw/rgw_loadgen.h
@@ -40,7 +40,7 @@ public:
   int complete_request();
   int send_content_length(uint64_t len);
 
-  RGWLoadGenIO(RGWLoadGenRequestEnv *_re) : left_to_read(0), req(_re) {}
+  explicit RGWLoadGenIO(RGWLoadGenRequestEnv *_re) : left_to_read(0), req(_re) {}
   void flush();
 };
 
diff --git a/src/rgw/rgw_log.cc b/src/rgw/rgw_log.cc
index 8f85fde..c600386 100644
--- a/src/rgw/rgw_log.cc
+++ b/src/rgw/rgw_log.cc
@@ -95,7 +95,7 @@ class UsageLogger {
   class C_UsageLogTimeout : public Context {
     UsageLogger *logger;
   public:
-    C_UsageLogTimeout(UsageLogger *_l) : logger(_l) {}
+    explicit C_UsageLogTimeout(UsageLogger *_l) : logger(_l) {}
     void finish(int r) {
       logger->flush();
       logger->set_timer();
diff --git a/src/rgw/rgw_main.cc b/src/rgw/rgw_main.cc
index a19b462..09b2e99 100644
--- a/src/rgw/rgw_main.cc
+++ b/src/rgw/rgw_main.cc
@@ -96,7 +96,7 @@ struct RGWRequest
   RGWOp *op;
   utime_t ts;
 
-  RGWRequest(uint64_t id) : id(id), s(NULL), op(NULL) {
+  explicit RGWRequest(uint64_t id) : id(id), s(NULL), op(NULL) {
   }
 
   virtual ~RGWRequest() {}
@@ -139,7 +139,7 @@ class RGWFrontendConfig {
   int parse_config(const string& config, map<string, string>& config_map);
   string framework;
 public:
-  RGWFrontendConfig(const string& _conf) : config(_conf) {}
+  explicit RGWFrontendConfig(const string& _conf) : config(_conf) {}
   int init() {
     int ret = parse_config(config, config_map);
     if (ret < 0)
@@ -215,8 +215,7 @@ protected:
       perfcounter->inc(l_rgw_qlen, -1);
       return req;
     }
-    using ThreadPool::WorkQueue<RGWRequest>::_process;
-    void _process(RGWRequest *req) {
+    void _process(RGWRequest *req, ThreadPool::TPHandle &) override {
       perfcounter->inc(l_rgw_qactive);
       process->handle_request(req);
       process->req_throttle.put(1);
@@ -567,51 +566,81 @@ static int process_request(RGWRados *store, RGWREST *rest, RGWRequest *req, RGWC
   RGWRESTMgr *mgr;
   RGWHandler *handler = rest->get_handler(store, s, client_io, &mgr, &init_error);
   if (init_error != 0) {
-    abort_early(s, NULL, init_error);
+    abort_early(s, NULL, init_error, NULL);
     goto done;
   }
+  dout(10) << "handler=" << typeid(*handler).name() << dendl;
 
   should_log = mgr->get_logging();
 
-  req->log(s, "getting op");
+  req->log_format(s, "getting op %d", s->op);
   op = handler->get_op(store);
   if (!op) {
-    abort_early(s, NULL, -ERR_METHOD_NOT_ALLOWED);
+    abort_early(s, NULL, -ERR_METHOD_NOT_ALLOWED, handler);
     goto done;
   }
   req->op = op;
+  dout(10) << "op=" << typeid(*op).name() << dendl;
 
   req->log(s, "authorizing");
   ret = handler->authorize();
   if (ret < 0) {
     dout(10) << "failed to authorize request" << dendl;
-    abort_early(s, op, ret);
+    abort_early(s, NULL, ret, handler);
+    goto done;
+  }
+
+  req->log(s, "normalizing buckets and tenants");
+  ret = handler->postauth_init();
+  if (ret < 0) {
+    dout(10) << "failed to run post-auth init" << dendl;
+    abort_early(s, op, ret, handler);
     goto done;
   }
 
   if (s->user.suspended) {
     dout(10) << "user is suspended, uid=" << s->user.user_id << dendl;
-    abort_early(s, op, -ERR_USER_SUSPENDED);
+    abort_early(s, op, -ERR_USER_SUSPENDED, handler);
+    goto done;
+  }
+
+  req->log(s, "init permissions");
+  ret = handler->init_permissions(op);
+  if (ret < 0) {
+    abort_early(s, op, ret, handler);
+    goto done;
+  }
+
+  /**
+   * Only some accesses support website mode, and website mode does NOT apply
+   * if you are using the REST endpoint either (ergo, no authenticated access)
+   */
+  req->log(s, "recalculating target");
+  ret = handler->retarget(op, &op);
+  if (ret < 0) {
+    abort_early(s, op, ret, handler);
     goto done;
   }
+  req->op = op;
+
   req->log(s, "reading permissions");
   ret = handler->read_permissions(op);
   if (ret < 0) {
-    abort_early(s, op, ret);
+    abort_early(s, op, ret, handler);
     goto done;
   }
 
   req->log(s, "init op");
   ret = op->init_processing();
   if (ret < 0) {
-    abort_early(s, op, ret);
+    abort_early(s, op, ret, handler);
     goto done;
   }
 
   req->log(s, "verifying op mask");
   ret = op->verify_op_mask();
   if (ret < 0) {
-    abort_early(s, op, ret);
+    abort_early(s, op, ret, handler);
     goto done;
   }
 
@@ -621,7 +650,7 @@ static int process_request(RGWRados *store, RGWREST *rest, RGWRequest *req, RGWC
     if (s->system_request) {
       dout(2) << "overriding permissions due to system operation" << dendl;
     } else {
-      abort_early(s, op, ret);
+      abort_early(s, op, ret, handler);
       goto done;
     }
   }
@@ -629,13 +658,17 @@ static int process_request(RGWRados *store, RGWREST *rest, RGWRequest *req, RGWC
   req->log(s, "verifying op params");
   ret = op->verify_params();
   if (ret < 0) {
-    abort_early(s, op, ret);
+    abort_early(s, op, ret, handler);
     goto done;
   }
 
-  req->log(s, "executing");
+  req->log(s, "pre-executing");
   op->pre_exec();
+
+  req->log(s, "executing");
   op->execute();
+
+  req->log(s, "completing");
   op->complete();
 done:
   int r = client_io->complete_request();
@@ -648,13 +681,23 @@ done:
 
   int http_ret = s->err.http_ret;
 
+  int op_ret = 0;
+  if (op) {
+    op_ret = op->get_ret();
+  }
+
+  req->log_format(s, "op status=%d", op_ret);
   req->log_format(s, "http status=%d", http_ret);
 
   if (handler)
     handler->put_op(op);
   rest->put_handler(handler);
 
-  dout(1) << "====== req done req=" << hex << req << dec << " http_status=" << http_ret << " ======" << dendl;
+  dout(1) << "====== req done req=" << hex << req << dec
+	  << " op status=" << op_ret
+	  << " http_status=" << http_ret
+	  << " ======"
+	  << dendl;
 
   return (ret < 0 ? ret : s->err.ret);
 }
@@ -857,7 +900,7 @@ public:
 class RGWProcessControlThread : public Thread {
   RGWProcess *pprocess;
 public:
-  RGWProcessControlThread(RGWProcess *_pprocess) : pprocess(_pprocess) {}
+  explicit RGWProcessControlThread(RGWProcess *_pprocess) : pprocess(_pprocess) {}
 
   void *entry() {
     pprocess->run();
@@ -1036,7 +1079,7 @@ int main(int argc, const char **argv)
   argv_to_vec(argc, argv, args);
   env_to_vec(args);
   global_init(&def_args, args, CEPH_ENTITY_TYPE_CLIENT, CODE_ENVIRONMENT_DAEMON,
-	      CINIT_FLAG_UNPRIVILEGED_DAEMON_DEFAULTS);
+	      CINIT_FLAG_UNPRIVILEGED_DAEMON_DEFAULTS, "rgw_data");
 
   for (std::vector<const char*>::iterator i = args.begin(); i != args.end(); ++i) {
     if (ceph_argparse_flag(args, i, "-h", "--help", (char*)NULL)) {
@@ -1110,8 +1153,10 @@ int main(int argc, const char **argv)
     apis_map[*li] = true;
   }
 
-  if (apis_map.count("s3") > 0)
-    rest.register_default_mgr(set_logging(new RGWRESTMgr_S3));
+  // S3 website mode is a specialization of S3
+  bool s3website_enabled = apis_map.count("s3website") > 0;
+  if (apis_map.count("s3") > 0 || s3website_enabled)
+    rest.register_default_mgr(set_logging(new RGWRESTMgr_S3(s3website_enabled)));
 
   if (apis_map.count("swift") > 0) {
     do_swift = true;
diff --git a/src/rgw/rgw_metadata.cc b/src/rgw/rgw_metadata.cc
index 80ed822..783cc61 100644
--- a/src/rgw/rgw_metadata.cc
+++ b/src/rgw/rgw_metadata.cc
@@ -13,7 +13,7 @@
 struct LogStatusDump {
   RGWMDLogStatus status;
 
-  LogStatusDump(RGWMDLogStatus _status) : status(_status) {}
+  explicit LogStatusDump(RGWMDLogStatus _status) : status(_status) {}
   void dump(Formatter *f) const {
     string s;
     switch (status) {
diff --git a/src/rgw/rgw_object_expirer.cc b/src/rgw/rgw_object_expirer.cc
index dcbfbc1..a53e1a7 100644
--- a/src/rgw/rgw_object_expirer.cc
+++ b/src/rgw/rgw_object_expirer.cc
@@ -41,7 +41,7 @@ class StoreDestructor {
   RGWRados *store;
 
 public:
-  StoreDestructor(RGWRados *_s) : store(_s) {}
+  explicit StoreDestructor(RGWRados *_s) : store(_s) {}
   ~StoreDestructor() {
     if (store) {
       RGWStoreManager::close_storage(store);
@@ -61,7 +61,7 @@ int main(const int argc, const char **argv)
   env_to_vec(args);
 
   global_init(NULL, args, CEPH_ENTITY_TYPE_CLIENT, CODE_ENVIRONMENT_DAEMON,
-	      CINIT_FLAG_UNPRIVILEGED_DAEMON_DEFAULTS);
+	      CINIT_FLAG_UNPRIVILEGED_DAEMON_DEFAULTS, "rgw_data");
 
   for (std::vector<const char *>::iterator i = args.begin(); i != args.end(); ) {
     if (ceph_argparse_double_dash(args, i)) {
diff --git a/src/rgw/rgw_object_expirer_core.h b/src/rgw/rgw_object_expirer_core.h
index c9d56da..284dc14 100644
--- a/src/rgw/rgw_object_expirer_core.h
+++ b/src/rgw/rgw_object_expirer_core.h
@@ -62,7 +62,7 @@ protected:
   atomic_t down_flag;
 
 public:
-  RGWObjectExpirer(RGWRados *_store)
+  explicit RGWObjectExpirer(RGWRados *_store)
     : store(_store)
   {}
 
diff --git a/src/rgw/rgw_op.cc b/src/rgw/rgw_op.cc
index e912a2e..8475f48 100644
--- a/src/rgw/rgw_op.cc
+++ b/src/rgw/rgw_op.cc
@@ -341,7 +341,7 @@ static int read_policy(RGWRados *store, struct req_state *s,
  * only_bucket: If true, reads the bucket ACL rather than the object ACL.
  * Returns: 0 on success, -ERR# otherwise.
  */
-static int rgw_build_policies(RGWRados *store, struct req_state *s, bool only_bucket, bool prefetch_data)
+static int rgw_build_bucket_policies(RGWRados *store, struct req_state *s)
 {
   int ret = 0;
   rgw_obj_key obj;
@@ -425,9 +425,20 @@ static int rgw_build_policies(RGWRados *store, struct req_state *s, bool only_bu
     }
   }
 
-  /* we're passed only_bucket = true when we specifically need the bucket's
-     acls, that happens on write operations */
-  if (!only_bucket && !s->object.empty()) {
+  return ret;
+}
+
+/**
+ * Get the AccessControlPolicy for a bucket or object off of disk.
+ * s: The req_state to draw information from.
+ * only_bucket: If true, reads the bucket ACL rather than the object ACL.
+ * Returns: 0 on success, -ERR# otherwise.
+ */
+static int rgw_build_object_policies(RGWRados *store, struct req_state *s, bool prefetch_data)
+{
+  int ret = 0;
+
+  if (!s->object.empty()) {
     if (!s->bucket_exists) {
       return -ERR_NO_SUCH_BUCKET;
     }
@@ -621,8 +632,8 @@ bool RGWOp::generate_cors_headers(string& origin, string& method, string& header
 
   /* Custom: */
   origin = orig;
-  int ret = read_bucket_cors();
-  if (ret < 0) {
+  op_ret = read_bucket_cors();
+  if (op_ret < 0) {
     return false;
   }
 
@@ -692,9 +703,9 @@ int RGWGetObj::read_user_manifest_part(rgw_bucket& bucket,
   read_op.params.obj_size = &obj_size;
   read_op.params.perr = &s->err;
 
-  ret = read_op.prepare(&cur_ofs, &cur_end);
-  if (ret < 0)
-    return ret;
+  op_ret = read_op.prepare(&cur_ofs, &cur_end);
+  if (op_ret < 0)
+    return op_ret;
 
   if (obj_size != ent.size) {
     // hmm.. something wrong, object not as expected, abort!
@@ -702,9 +713,9 @@ int RGWGetObj::read_user_manifest_part(rgw_bucket& bucket,
     return -EIO;
   }
 
-  ret = rgw_policy_from_attrset(s->cct, attrs, &obj_policy);
-  if (ret < 0)
-    return ret;
+  op_ret = rgw_policy_from_attrset(s->cct, attrs, &obj_policy);
+  if (op_ret < 0)
+    return op_ret;
 
   if (!verify_object_permission(s, bucket_policy, &obj_policy, RGW_PERM_READ)) {
     return -EPERM;
@@ -713,13 +724,13 @@ int RGWGetObj::read_user_manifest_part(rgw_bucket& bucket,
   perfcounter->inc(l_rgw_get_b, cur_end - cur_ofs);
   while (cur_ofs <= cur_end) {
     bufferlist bl;
-    ret = read_op.read(cur_ofs, cur_end, bl);
-    if (ret < 0)
-      return ret;
+    op_ret = read_op.read(cur_ofs, cur_end, bl);
+    if (op_ret < 0)
+      return op_ret;
 
     off_t len = bl.length();
     cur_ofs += len;
-    ret = 0;
+    op_ret = 0; /* XXX redundant? */
     perfcounter->tinc(l_rgw_get_lat,
                       (ceph_clock_now(s->cct) - start_time));
     send_response_data(bl, 0, len);
@@ -1081,18 +1092,6 @@ int RGWGetObj::handle_slo_manifest(bufferlist& bl)
   return 0;
 }
 
-class RGWGetObj_CB : public RGWGetDataCB
-{
-  RGWGetObj *op;
-public:
-  RGWGetObj_CB(RGWGetObj *_op) : op(_op) {}
-  virtual ~RGWGetObj_CB() {}
-
-  int handle_data(bufferlist& bl, off_t bl_ofs, off_t bl_len) {
-    return op->get_data_cb(bl, bl_ofs, bl_len);
-  }
-};
-
 int RGWGetObj::get_data_cb(bufferlist& bl, off_t bl_ofs, off_t bl_len)
 {
   /* garbage collection related handling */
@@ -1176,12 +1175,12 @@ void RGWGetObj::execute()
   RGWRados::Object op_target(store, s->bucket_info, *static_cast<RGWObjectCtx *>(s->obj_ctx), obj);
   RGWRados::Object::Read read_op(&op_target);
 
-  ret = get_params();
-  if (ret < 0)
+  op_ret = get_params();
+  if (op_ret < 0)
     goto done_err;
 
-  ret = init_common();
-  if (ret < 0)
+  op_ret = init_common();
+  if (op_ret < 0)
     goto done_err;
 
   new_ofs = ofs;
@@ -1197,24 +1196,26 @@ void RGWGetObj::execute()
   read_op.params.obj_size = &s->obj_size;
   read_op.params.perr = &s->err;
 
-  ret = read_op.prepare(&new_ofs, &new_end);
-  if (ret < 0)
+  op_ret = read_op.prepare(&new_ofs, &new_end);
+  if (op_ret < 0)
     goto done_err;
 
   attr_iter = attrs.find(RGW_ATTR_USER_MANIFEST);
   if (attr_iter != attrs.end() && !skip_manifest) {
-    ret = handle_user_manifest(attr_iter->second.c_str());
-    if (ret < 0) {
-      ldout(s->cct, 0) << "ERROR: failed to handle user manifest ret=" << ret << dendl;
+    op_ret = handle_user_manifest(attr_iter->second.c_str());
+    if (op_ret < 0) {
+      ldout(s->cct, 0) << "ERROR: failed to handle user manifest ret="
+		       << op_ret << dendl;
     }
     return;
   }
   attr_iter = attrs.find(RGW_ATTR_SLO_MANIFEST);
   if (attr_iter != attrs.end()) {
     is_slo = true;
-    ret = handle_slo_manifest(attr_iter->second);
-    if (ret < 0) {
-      ldout(s->cct, 0) << "ERROR: failed to handle slo manifest ret=" << ret << dendl;
+    op_ret = handle_slo_manifest(attr_iter->second);
+    if (op_ret < 0) {
+      ldout(s->cct, 0) << "ERROR: failed to handle slo manifest ret=" << op_ret
+		       << dendl;
       goto done_err;
     }
     return;
@@ -1223,7 +1224,7 @@ void RGWGetObj::execute()
   /* Check whether the object has expired. Swift API documentation
    * stands that we should return 404 Not Found in such case. */
   if (need_object_expiration() && object_is_expired(attrs)) {
-    ret = -ENOENT;
+    op_ret = -ENOENT;
     goto done_err;
   }
 
@@ -1237,15 +1238,18 @@ void RGWGetObj::execute()
 
   perfcounter->inc(l_rgw_get_b, end - ofs);
 
-  ret = read_op.iterate(ofs, end, &cb);
+  op_ret = read_op.iterate(ofs, end, &cb);
 
   perfcounter->tinc(l_rgw_get_lat,
                    (ceph_clock_now(s->cct) - start_time));
-  if (ret < 0) {
+  if (op_ret < 0) {
     goto done_err;
   }
 
-  send_response_data(bl, 0, 0);
+  op_ret = send_response_data(bl, 0, 0);
+  if (op_ret < 0) {
+    goto done_err;
+  }
   return;
 
 done_err:
@@ -1290,14 +1294,14 @@ void RGWListBuckets::execute()
 
   uint64_t max_buckets = s->cct->_conf->rgw_list_buckets_max_chunk;
 
-  ret = get_params();
-  if (ret < 0) {
+  op_ret = get_params();
+  if (op_ret < 0) {
     goto send_end;
   }
 
   if (supports_account_metadata()) {
-    ret = rgw_get_user_attrs_by_uid(store, s->user.user_id, attrs);
-    if (ret < 0) {
+    op_ret = rgw_get_user_attrs_by_uid(store, s->user.user_id, attrs);
+    if (op_ret < 0) {
       goto send_end;
     }
   }
@@ -1311,10 +1315,11 @@ void RGWListBuckets::execute()
       read_count = max_buckets;
     }
 
-    ret = rgw_read_user_buckets(store, s->user.user_id, buckets,
-                                marker, end_marker, read_count, should_get_stats(), 0);
+    op_ret = rgw_read_user_buckets(store, s->user.user_id, buckets,
+				   marker, end_marker, read_count,
+				   should_get_stats(), 0);
 
-    if (ret < 0) {
+    if (op_ret < 0) {
       /* hmm.. something wrong here.. the user was authenticated, so it
          should exist */
       ldout(s->cct, 10) << "WARNING: failed on rgw_get_user_buckets uid=" << s->user.user_id << dendl;
@@ -1369,9 +1374,9 @@ void RGWStatAccount::execute()
   do {
     RGWUserBuckets buckets;
 
-    ret = rgw_read_user_buckets(store, s->user.user_id, buckets,
-                                marker, string(), max_buckets, false);
-    if (ret < 0) {
+    op_ret = rgw_read_user_buckets(store, s->user.user_id, buckets,
+				   marker, string(), max_buckets, false);
+    if (op_ret < 0) {
       /* hmm.. something wrong here.. the user was authenticated, so it
          should exist */
       ldout(s->cct, 10) << "WARNING: failed on rgw_get_user_buckets uid=" << s->user.user_id << dendl;
@@ -1428,9 +1433,8 @@ void RGWSetBucketVersioning::pre_exec()
 
 void RGWSetBucketVersioning::execute()
 {
-  ret = get_params();
-
-  if (ret < 0)
+  op_ret = get_params();
+  if (op_ret < 0)
     return;
 
   if (enable_versioning) {
@@ -1440,9 +1444,86 @@ void RGWSetBucketVersioning::execute()
     s->bucket_info.flags |= (BUCKET_VERSIONED | BUCKET_VERSIONS_SUSPENDED);
   }
 
-  ret = store->put_bucket_instance_info(s->bucket_info, false, 0, &s->bucket_attrs);
-  if (ret < 0) {
-    ldout(s->cct, 0) << "NOTICE: put_bucket_info on bucket=" << s->bucket.name << " returned err=" << ret << dendl;
+  op_ret = store->put_bucket_instance_info(s->bucket_info, false, 0,
+					  &s->bucket_attrs);
+  if (op_ret < 0) {
+    ldout(s->cct, 0) << "NOTICE: put_bucket_info on bucket=" << s->bucket.name
+		     << " returned err=" << op_ret << dendl;
+    return;
+  }
+}
+
+int RGWGetBucketWebsite::verify_permission()
+{
+  if (s->user.user_id.compare(s->bucket_owner.get_id()) != 0)
+    return -EACCES;
+
+  return 0;
+}
+
+void RGWGetBucketWebsite::pre_exec()
+{
+  rgw_bucket_object_pre_exec(s);
+}
+
+void RGWGetBucketWebsite::execute()
+{
+  if (!s->bucket_info.has_website) {
+    op_ret = -ENOENT;
+  }
+}
+
+int RGWSetBucketWebsite::verify_permission()
+{
+  if (s->user.user_id.compare(s->bucket_owner.get_id()) != 0)
+    return -EACCES;
+
+  return 0;
+}
+
+void RGWSetBucketWebsite::pre_exec()
+{
+  rgw_bucket_object_pre_exec(s);
+}
+
+void RGWSetBucketWebsite::execute()
+{
+  op_ret = get_params();
+
+  if (op_ret < 0)
+    return;
+
+  s->bucket_info.has_website = true;
+  s->bucket_info.website_conf = website_conf;
+
+  op_ret = store->put_bucket_instance_info(s->bucket_info, false, 0, &s->bucket_attrs);
+  if (op_ret < 0) {
+    ldout(s->cct, 0) << "NOTICE: put_bucket_info on bucket=" << s->bucket.name << " returned err=" << op_ret << dendl;
+    return;
+  }
+}
+
+int RGWDeleteBucketWebsite::verify_permission()
+{
+  if (s->user.user_id.compare(s->bucket_owner.get_id()) != 0)
+    return -EACCES;
+
+  return 0;
+}
+
+void RGWDeleteBucketWebsite::pre_exec()
+{
+  rgw_bucket_object_pre_exec(s);
+}
+
+void RGWDeleteBucketWebsite::execute()
+{
+  s->bucket_info.has_website = false;
+  s->bucket_info.website_conf = RGWBucketWebsiteConf();
+
+  op_ret = store->put_bucket_instance_info(s->bucket_info, false, 0, &s->bucket_attrs);
+  if (op_ret < 0) {
+    ldout(s->cct, 0) << "NOTICE: put_bucket_info on bucket=" << s->bucket.name << " returned err=" << op_ret << dendl;
     return;
   }
 }
@@ -1466,16 +1547,16 @@ void RGWStatBucket::execute()
   bucket.bucket = s->bucket;
   buckets.add(bucket);
   map<string, RGWBucketEnt>& m = buckets.get_buckets();
-  ret = store->update_containers_stats(m);
-  if (!ret)
-    ret = -EEXIST;
-  if (ret > 0) {
-    ret = 0;
+  op_ret = store->update_containers_stats(m);
+  if (! op_ret)
+    op_ret = -EEXIST;
+  if (op_ret > 0) {
+    op_ret = 0;
     map<string, RGWBucketEnt>::iterator iter = m.find(bucket.bucket.name);
     if (iter != m.end()) {
       bucket = iter->second;
     } else {
-      ret = -EINVAL;
+      op_ret = -EINVAL;
     }
   }
 }
@@ -1514,16 +1595,16 @@ void RGWListBucket::pre_exec()
 
 void RGWListBucket::execute()
 {
-  ret = get_params();
-  if (ret < 0)
+  op_ret = get_params();
+  if (op_ret < 0)
     return;
 
   if (need_container_stats()) {
     map<string, RGWBucketEnt> m;
     m[s->bucket.name] = RGWBucketEnt();
     m.begin()->second.bucket = s->bucket;
-    ret = store->update_containers_stats(m);
-    if (ret > 0) {
+    op_ret = store->update_containers_stats(m);
+    if (op_ret > 0) {
       bucket = m.begin()->second;
     }
   }
@@ -1537,8 +1618,8 @@ void RGWListBucket::execute()
   list_op.params.end_marker = end_marker;
   list_op.params.list_versions = list_versions;
 
-  ret = list_op.list_objects(max, &objs, &common_prefixes, &is_truncated);
-  if (ret >= 0 && !delimiter.empty()) {
+  op_ret = list_op.list_objects(max, &objs, &common_prefixes, &is_truncated);
+  if (op_ret >= 0 && !delimiter.empty()) {
     next_marker = list_op.get_next_marker();
   }
 }
@@ -1572,10 +1653,11 @@ int RGWCreateBucket::verify_permission()
   if (s->user.max_buckets) {
     RGWUserBuckets buckets;
     string marker;
-    int ret = rgw_read_user_buckets(store, s->user.user_id, buckets,
-                                    marker, string(), s->user.max_buckets, false);
-    if (ret < 0)
-      return ret;
+    op_ret = rgw_read_user_buckets(store, s->user.user_id, buckets,
+				   marker, string(), s->user.max_buckets,
+				   false);
+    if (op_ret < 0)
+      return op_ret;
 
     map<string, RGWBucketEnt>& m = buckets.get_buckets();
     if (m.size() >= s->user.max_buckets) {
@@ -1626,24 +1708,24 @@ void RGWCreateBucket::execute()
   rgw_obj obj(store->zone.domain_root, bucket_name);
   obj_version objv, *pobjv = NULL;
 
-  ret = get_params();
-  if (ret < 0)
+  op_ret = get_params();
+  if (op_ret < 0)
     return;
 
   if (!store->region.is_master &&
       store->region.api_name != location_constraint) {
     ldout(s->cct, 0) << "location constraint (" << location_constraint << ") doesn't match region" << " (" << store->region.api_name << ")" << dendl;
-    ret = -EINVAL;
+    op_ret = -EINVAL;
     return;
   }
 
   /* we need to make sure we read bucket info, it's not read before for this specific request */
   RGWObjectCtx& obj_ctx = *static_cast<RGWObjectCtx *>(s->obj_ctx);
-  ret = store->get_bucket_info(obj_ctx, s->bucket_tenant, s->bucket_name,
-                               s->bucket_info, NULL, &s->bucket_attrs);
-  if (ret < 0 && ret != -ENOENT)
+  op_ret = store->get_bucket_info(obj_ctx, s->bucket_tenant, s->bucket_name,
+				  s->bucket_info, NULL, &s->bucket_attrs);
+  if (op_ret < 0 && op_ret != -ENOENT)
     return;
-  s->bucket_exists = (ret != -ENOENT);
+  s->bucket_exists = (op_ret != -ENOENT);
 
   s->bucket_owner.set_id(s->user.user_id);
   s->bucket_owner.set_name(s->user.display_name);
@@ -1652,7 +1734,7 @@ void RGWCreateBucket::execute()
                                  &old_policy, obj);
     if (r >= 0)  {
       if (old_policy.get_owner().get_id().compare(s->user.user_id) != 0) {
-        ret = -EEXIST;
+        op_ret = -EEXIST;
         return;
       }
     }
@@ -1664,8 +1746,8 @@ void RGWCreateBucket::execute()
 
   if (!store->region.is_master) {
     JSONParser jp;
-    ret = forward_request_to_master(s, NULL, store, in_data, &jp);
-    if (ret < 0)
+    op_ret = forward_request_to_master(s, NULL, store, in_data, &jp);
+    if (op_ret < 0)
       return;
 
     JSONDecoder::decode_json("entry_point_object_ver", ep_objv, &jp);
@@ -1695,11 +1777,12 @@ void RGWCreateBucket::execute()
   if (s->bucket_exists) {
     string selected_placement_rule;
     rgw_bucket bucket;
-    ret = store->select_bucket_placement(s->user, region_name, placement_rule,
-                                         s->bucket_tenant, s->bucket_name, bucket,
-                                         &selected_placement_rule);
+    op_ret = store->select_bucket_placement(s->user, region_name,
+					    placement_rule,
+					    s->bucket_tenant, s->bucket_name,
+					    bucket, &selected_placement_rule);
     if (selected_placement_rule != s->bucket_info.placement_rule) {
-      ret = -EEXIST;
+      op_ret = -EEXIST;
       return;
     }
   }
@@ -1714,16 +1797,17 @@ void RGWCreateBucket::execute()
   }
   s->bucket.tenant = s->bucket_tenant; /* ignored if bucket exists */
   s->bucket.name = s->bucket_name;
-  ret = store->create_bucket(s->user, s->bucket, region_name, placement_rule, attrs, info, pobjv,
-                             &ep_objv, creation_time, pmaster_bucket, true);
+  op_ret = store->create_bucket(s->user, s->bucket, region_name, placement_rule,
+				attrs, info, pobjv, &ep_objv, creation_time,
+				pmaster_bucket, true);
   /* continue if EEXIST and create_bucket will fail below.  this way we can recover
    * from a partial create by retrying it. */
-  ldout(s->cct, 20) << "rgw_create_bucket returned ret=" << ret << " bucket=" << s->bucket << dendl;
+  ldout(s->cct, 20) << "rgw_create_bucket returned ret=" << op_ret << " bucket=" << s->bucket << dendl;
 
-  if (ret && ret != -EEXIST)
+  if (op_ret && op_ret != -EEXIST)
     return;
 
-  existed = (ret == -EEXIST);
+  existed = (op_ret == -EEXIST);
 
   if (existed) {
     /* bucket already existed, might have raced with another bucket creation, or
@@ -1733,20 +1817,24 @@ void RGWCreateBucket::execute()
      * Otherwise inform client about a name conflict.
      */
     if (info.owner.compare(s->user.user_id) != 0) {
-      ret = -EEXIST;
+      op_ret = -EEXIST;
       return;
     }
     s->bucket = info.bucket;
   }
 
-  ret = rgw_link_bucket(store, s->user.user_id, s->bucket, info.creation_time, false);
-  if (ret && !existed && ret != -EEXIST) {  /* if it exists (or previously existed), don't remove it! */
-    ret = rgw_unlink_bucket(store, s->user.user_id, s->bucket.tenant, s->bucket.name);
-    if (ret < 0) {
-      ldout(s->cct, 0) << "WARNING: failed to unlink bucket: ret=" << ret << dendl;
+  op_ret = rgw_link_bucket(store, s->user.user_id, s->bucket,
+			   info.creation_time, false);
+  if (op_ret && !existed && op_ret != -EEXIST) {
+    /* if it exists (or previously existed), don't remove it! */
+    op_ret = rgw_unlink_bucket(store, s->user.user_id, s->bucket.tenant,
+			       s->bucket.name);
+    if (op_ret < 0) {
+      ldout(s->cct, 0) << "WARNING: failed to unlink bucket: ret=" << op_ret
+		       << dendl;
     }
-  } else if (ret == -EEXIST || (ret == 0 && existed)) {
-    ret = -ERR_BUCKET_EXISTS;
+  } else if (op_ret == -EEXIST || (op_ret == 0 && existed)) {
+    op_ret = -ERR_BUCKET_EXISTS;
   }
 }
 
@@ -1765,7 +1853,7 @@ void RGWDeleteBucket::pre_exec()
 
 void RGWDeleteBucket::execute()
 {
-  ret = -EINVAL;
+  op_ret = -EINVAL;
 
   if (s->bucket_name.empty())
     return;
@@ -1783,34 +1871,37 @@ void RGWDeleteBucket::execute()
       ver = strict_strtol(ver_str.c_str(), 10, &err);
       if (!err.empty()) {
         ldout(s->cct, 0) << "failed to parse ver param" << dendl;
-        ret = -EINVAL;
+        op_ret = -EINVAL;
         return;
       }
       ot.read_version.ver = ver;
     }
   }
 
-  ret = store->delete_bucket(s->bucket, ot);
-
-  if (ret == 0) {
-    ret = rgw_unlink_bucket(store, s->user.user_id, s->bucket.tenant, s->bucket.name, false);
-    if (ret < 0) {
-      ldout(s->cct, 0) << "WARNING: failed to unlink bucket: ret=" << ret << dendl;
+  op_ret = store->delete_bucket(s->bucket, ot);
+  if (op_ret == 0) {
+    op_ret = rgw_unlink_bucket(store, s->user.user_id, s->bucket.tenant,
+			       s->bucket.name, false);
+    if (op_ret < 0) {
+      ldout(s->cct, 0) << "WARNING: failed to unlink bucket: ret=" << op_ret
+		       << dendl;
     }
   }
 
-  if (ret < 0) {
+  if (op_ret < 0) {
     return;
   }
 
   if (!store->region.is_master) {
     bufferlist in_data;
     JSONParser jp;
-    ret = forward_request_to_master(s, &ot.read_version, store, in_data, &jp);
-    if (ret < 0) {
-      if (ret == -ENOENT) { /* adjust error,
-                               we want to return with NoSuchBucket and not NoSuchKey */
-        ret = -ERR_NO_SUCH_BUCKET;
+    op_ret = forward_request_to_master(s, &ot.read_version, store, in_data,
+				       &jp);
+    if (op_ret < 0) {
+      if (op_ret == -ENOENT) {
+        /* adjust error, we want to return with NoSuchBucket and not
+	 * NoSuchKey */
+        op_ret = -ERR_NO_SUCH_BUCKET;
       }
       return;
     }
@@ -2100,21 +2191,21 @@ void RGWPutObj::execute()
 
 
   perfcounter->inc(l_rgw_put);
-  ret = -EINVAL;
+  op_ret = -EINVAL;
   if (s->object.empty()) {
     goto done;
   }
 
-  ret = get_params();
-  if (ret < 0) {
-    ldout(s->cct, 20) << "get_params() returned ret=" << ret << dendl;
+  op_ret = get_params();
+  if (op_ret < 0) {
+    ldout(s->cct, 20) << "get_params() returned ret=" << op_ret << dendl;
     goto done;
   }
 
-  ret = get_system_versioning_params(s, &olh_epoch, &version_id);
-  if (ret < 0) {
-    ldout(s->cct, 20) << "get_system_versioning_params() returned ret=" \
-        << ret << dendl;
+  op_ret = get_system_versioning_params(s, &olh_epoch, &version_id);
+  if (op_ret < 0) {
+    ldout(s->cct, 20) << "get_system_versioning_params() returned ret="
+		      << op_ret << dendl;
     goto done;
   }
 
@@ -2122,11 +2213,11 @@ void RGWPutObj::execute()
     need_calc_md5 = true;
 
     ldout(s->cct, 15) << "supplied_md5_b64=" << supplied_md5_b64 << dendl;
-    ret = ceph_unarmor(supplied_md5_bin, &supplied_md5_bin[CEPH_CRYPTO_MD5_DIGESTSIZE + 1],
+    op_ret = ceph_unarmor(supplied_md5_bin, &supplied_md5_bin[CEPH_CRYPTO_MD5_DIGESTSIZE + 1],
                        supplied_md5_b64, supplied_md5_b64 + strlen(supplied_md5_b64));
-    ldout(s->cct, 15) << "ceph_armor ret=" << ret << dendl;
-    if (ret != CEPH_CRYPTO_MD5_DIGESTSIZE) {
-      ret = -ERR_INVALID_DIGEST;
+    ldout(s->cct, 15) << "ceph_armor ret=" << op_ret << dendl;
+    if (op_ret != CEPH_CRYPTO_MD5_DIGESTSIZE) {
+      op_ret = -ERR_INVALID_DIGEST;
       goto done;
     }
 
@@ -2136,10 +2227,10 @@ void RGWPutObj::execute()
 
   if (!chunked_upload) { /* with chunked upload we don't know how big is the upload.
                             we also check sizes at the end anyway */
-    ret = store->check_quota(s->bucket_owner.get_id(), s->bucket,
-                             user_quota, bucket_quota, s->content_length);
-    if (ret < 0) {
-      ldout(s->cct, 20) << "check_quota() returned ret=" << ret << dendl;
+    op_ret = store->check_quota(s->bucket_owner.get_id(), s->bucket,
+				user_quota, bucket_quota, s->content_length);
+    if (op_ret < 0) {
+      ldout(s->cct, 20) << "check_quota() returned ret=" << op_ret << dendl;
       goto done;
     }
   }
@@ -2151,9 +2242,10 @@ void RGWPutObj::execute()
 
   processor = select_processor(*static_cast<RGWObjectCtx *>(s->obj_ctx), &multipart);
 
-  ret = processor->prepare(store, NULL);
-  if (ret < 0) {
-    ldout(s->cct, 20) << "processor->prepare() returned ret=" << ret << dendl;
+  op_ret = processor->prepare(store, NULL);
+  if (op_ret < 0) {
+    ldout(s->cct, 20) << "processor->prepare() returned ret=" << op_ret
+		      << dendl;
     goto done;
   }
 
@@ -2161,7 +2253,7 @@ void RGWPutObj::execute()
     bufferlist data;
     len = get_data(data);
     if (len < 0) {
-      ret = len;
+      op_ret = len;
       goto done;
     }
     if (!len)
@@ -2178,10 +2270,12 @@ void RGWPutObj::execute()
       orig_data = data;
     }
 
-    ret = put_data_and_throttle(processor, data, ofs, (need_calc_md5 ? &hash : NULL), need_to_wait);
-    if (ret < 0) {
-      if (!need_to_wait || ret != -EEXIST) {
-        ldout(s->cct, 20) << "processor->thottle_data() returned ret=" << ret << dendl;
+    op_ret = put_data_and_throttle(processor, data, ofs,
+				  (need_calc_md5 ? &hash : NULL), need_to_wait);
+    if (op_ret < 0) {
+      if (!need_to_wait || op_ret != -EEXIST) {
+        ldout(s->cct, 20) << "processor->thottle_data() returned ret="
+			  << op_ret << dendl;
         goto done;
       }
 
@@ -2200,14 +2294,15 @@ void RGWPutObj::execute()
       gen_rand_alphanumeric(store->ctx(), buf, sizeof(buf) - 1);
       oid_rand.append(buf);
 
-      ret = processor->prepare(store, &oid_rand);
-      if (ret < 0) {
-        ldout(s->cct, 0) << "ERROR: processor->prepare() returned " << ret << dendl;
+      op_ret = processor->prepare(store, &oid_rand);
+      if (op_ret < 0) {
+        ldout(s->cct, 0) << "ERROR: processor->prepare() returned "
+			 << op_ret << dendl;
         goto done;
       }
 
-      ret = put_data_and_throttle(processor, data, ofs, NULL, false);
-      if (ret < 0) {
+      op_ret = put_data_and_throttle(processor, data, ofs, NULL, false);
+      if (op_ret < 0) {
         goto done;
       }
     }
@@ -2216,17 +2311,18 @@ void RGWPutObj::execute()
   } while (len > 0);
 
   if (!chunked_upload && ofs != s->content_length) {
-    ret = -ERR_REQUEST_TIMEOUT;
+    op_ret = -ERR_REQUEST_TIMEOUT;
     goto done;
   }
   s->obj_size = ofs;
 
   perfcounter->inc(l_rgw_put_b, s->obj_size);
 
-  ret = store->check_quota(s->bucket_owner.get_id(), s->bucket,
-                           user_quota, bucket_quota, s->obj_size);
-  if (ret < 0) {
-    ldout(s->cct, 20) << "second check_quota() returned ret=" << ret << dendl;
+  op_ret = store->check_quota(s->bucket_owner.get_id(), s->bucket,
+			      user_quota, bucket_quota, s->obj_size);
+  if (op_ret < 0) {
+    ldout(s->cct, 20) << "second check_quota() returned ret=" << op_ret
+		      << dendl;
     goto done;
   }
 
@@ -2239,7 +2335,7 @@ void RGWPutObj::execute()
   etag = calc_md5;
 
   if (supplied_md5_b64 && strcmp(calc_md5, supplied_md5)) {
-    ret = -ERR_BAD_DIGEST;
+    op_ret = -ERR_BAD_DIGEST;
     goto done;
   }
 
@@ -2248,8 +2344,8 @@ void RGWPutObj::execute()
   attrs[RGW_ATTR_ACL] = aclbl;
 
   if (dlo_manifest) {
-    ret = encode_dlo_manifest_attr(dlo_manifest, attrs);
-    if (ret < 0) {
+    op_ret = encode_dlo_manifest_attr(dlo_manifest, attrs);
+    if (op_ret < 0) {
       ldout(s->cct, 0) << "bad user manifest: " << dlo_manifest << dendl;
       goto done;
     }
@@ -2268,13 +2364,14 @@ void RGWPutObj::execute()
   }
 
   if (supplied_etag && etag.compare(supplied_etag) != 0) {
-    ret = -ERR_UNPROCESSABLE_ENTITY;
+    op_ret = -ERR_UNPROCESSABLE_ENTITY;
     goto done;
   }
   bl.append(etag.c_str(), etag.size() + 1);
   attrs[RGW_ATTR_ETAG] = bl;
 
-  for (iter = s->generic_attrs.begin(); iter != s->generic_attrs.end(); ++iter) {
+  for (iter = s->generic_attrs.begin(); iter != s->generic_attrs.end();
+       ++iter) {
     bufferlist& attrbl = attrs[iter->first];
     const string& val = iter->second;
     attrbl.append(val.c_str(), val.size() + 1);
@@ -2292,7 +2389,8 @@ void RGWPutObj::execute()
     attrs[RGW_ATTR_SLO_UINDICATOR] = slo_userindicator_bl;
   }
 
-  ret = processor->complete(etag, &mtime, 0, attrs, delete_at, if_match, if_nomatch);
+  op_ret = processor->complete(etag, &mtime, 0, attrs, delete_at, if_match,
+			       if_nomatch);
 
 done:
   dispose_processor(processor);
@@ -2336,29 +2434,29 @@ void RGWPostObj::execute()
   int len = 0;
 
   // read in the data from the POST form
-  ret = get_params();
-  if (ret < 0)
+  op_ret = get_params();
+  if (op_ret < 0)
     goto done;
 
-  ret = verify_params();
-  if (ret < 0)
+  op_ret = verify_params();
+  if (op_ret < 0)
     goto done;
 
   if (!verify_bucket_permission(s, RGW_PERM_WRITE)) {
-    ret = -EACCES;
+    op_ret = -EACCES;
     goto done;
   }
 
-  ret = store->check_quota(s->bucket_owner.get_id(), s->bucket,
-                           user_quota, bucket_quota, s->content_length);
-  if (ret < 0) {
+  op_ret = store->check_quota(s->bucket_owner.get_id(), s->bucket,
+			      user_quota, bucket_quota, s->content_length);
+  if (op_ret < 0) {
     goto done;
   }
 
   processor = select_processor(*static_cast<RGWObjectCtx *>(s->obj_ctx));
 
-  ret = processor->prepare(store, NULL);
-  if (ret < 0)
+  op_ret = processor->prepare(store, NULL);
+  if (op_ret < 0)
     goto done;
 
   while (data_pending) {
@@ -2366,33 +2464,33 @@ void RGWPostObj::execute()
      len = get_data(data);
 
      if (len < 0) {
-       ret = len;
+       op_ret = len;
        goto done;
      }
 
      if (!len)
        break;
 
-     ret = put_data_and_throttle(processor, data, ofs, &hash, false);
+     op_ret = put_data_and_throttle(processor, data, ofs, &hash, false);
 
      ofs += len;
 
      if (ofs > max_len) {
-       ret = -ERR_TOO_LARGE;
+       op_ret = -ERR_TOO_LARGE;
        goto done;
      }
    }
 
   if (len < min_len) {
-    ret = -ERR_TOO_SMALL;
+    op_ret = -ERR_TOO_SMALL;
     goto done;
   }
 
   s->obj_size = ofs;
 
-  ret = store->check_quota(s->bucket_owner.get_id(), s->bucket,
-                           user_quota, bucket_quota, s->obj_size);
-  if (ret < 0) {
+  op_ret = store->check_quota(s->bucket_owner.get_id(), s->bucket,
+			      user_quota, bucket_quota, s->obj_size);
+  if (op_ret < 0) {
     goto done;
   }
 
@@ -2413,7 +2511,7 @@ void RGWPostObj::execute()
     attrs[RGW_ATTR_CONTENT_TYPE] = ct_bl;
   }
 
-  ret = processor->complete(etag, NULL, 0, attrs, delete_at);
+  op_ret = processor->complete(etag, NULL, 0, attrs, delete_at);
 
 done:
   dispose_processor(processor);
@@ -2487,17 +2585,19 @@ int RGWPutMetadataAccount::handle_temp_url_update(const map<int, string>& temp_u
   }
 
   RGWUser user;
-  ret = user.init(store, user_op);
-  if (ret < 0) {
-    ldout(store->ctx(), 0) << "ERROR: could not init user ret=" << ret << dendl;
-    return ret;
+  op_ret = user.init(store, user_op);
+  if (op_ret < 0) {
+    ldout(store->ctx(), 0) << "ERROR: could not init user ret=" << op_ret
+			   << dendl;
+    return op_ret;
   }
 
   string err_msg;
-  ret = user.modify(user_op, &err_msg);
-  if (ret < 0) {
-    ldout(store->ctx(), 10) << "user.modify() returned " << ret << ": " << err_msg << dendl;
-    return ret;
+  op_ret = user.modify(user_op, &err_msg);
+  if (op_ret < 0) {
+    ldout(store->ctx(), 10) << "user.modify() returned " << op_ret << ": "
+			    << err_msg << dendl;
+    return op_ret;
   }
   return 0;
 }
@@ -2549,8 +2649,8 @@ void RGWPutMetadataAccount::execute()
   map<string, bufferlist> attrs, orig_attrs, rmattrs;
   RGWObjVersionTracker acct_op_tracker;
 
-  ret = get_params();
-  if (ret < 0) {
+  op_ret = get_params();
+  if (op_ret < 0) {
     return;
   }
 
@@ -2564,20 +2664,21 @@ void RGWPutMetadataAccount::execute()
   filter_out_temp_url(attrs, rmattr_names, temp_url_keys);
   if (!temp_url_keys.empty()) {
     if (s->perm_mask != RGW_PERM_FULL_CONTROL) {
-      ret = -EPERM;
+      op_ret = -EPERM;
       return;
     }
   }
 
   /* XXX tenant needed? */
-  ret = rgw_store_user_attrs(store, s->user.user_id.id, attrs, &rmattrs, &acct_op_tracker);
-  if (ret < 0) {
+  op_ret = rgw_store_user_attrs(store, s->user.user_id.id, attrs, &rmattrs,
+				&acct_op_tracker);
+  if (op_ret < 0) {
     return;
   }
 
   if (!temp_url_keys.empty()) {
-    ret = handle_temp_url_update(temp_url_keys);
-    if (ret < 0) {
+    op_ret = handle_temp_url_update(temp_url_keys);
+    if (op_ret < 0) {
       return;
     }
   }
@@ -2601,8 +2702,8 @@ void RGWPutMetadataBucket::execute()
 {
   map<string, bufferlist> attrs, orig_attrs, rmattrs;
 
-  ret = get_params();
-  if (ret < 0) {
+  op_ret = get_params();
+  if (op_ret < 0) {
     return;
   }
 
@@ -2610,7 +2711,7 @@ void RGWPutMetadataBucket::execute()
 
   if (!placement_rule.empty() &&
       placement_rule != s->bucket_info.placement_rule) {
-    ret = -EEXIST;
+    op_ret = -EEXIST;
     return;
   }
 
@@ -2630,8 +2731,8 @@ void RGWPutMetadataBucket::execute()
     attrs[RGW_ATTR_CORS] = bl;
   }
 
-  ret = rgw_bucket_set_attrs(store, s->bucket_info, attrs, &rmattrs,
-          &s->bucket_info.objv_tracker);
+  op_ret = rgw_bucket_set_attrs(store, s->bucket_info, attrs, &rmattrs,
+				&s->bucket_info.objv_tracker);
 }
 
 int RGWPutMetadataObject::verify_permission()
@@ -2655,22 +2756,22 @@ void RGWPutMetadataObject::execute()
 
   store->set_atomic(s->obj_ctx, obj);
 
-  ret = get_params();
-  if (ret < 0) {
+  op_ret = get_params();
+  if (op_ret < 0) {
     return;
   }
 
   rgw_get_request_metadata(s->cct, s->info, attrs);
   /* check if obj exists, read orig attrs */
-  ret = get_obj_attrs(store, s, obj, orig_attrs);
-  if (ret < 0) {
+  op_ret = get_obj_attrs(store, s, obj, orig_attrs);
+  if (op_ret < 0) {
     return;
   }
 
   /* Check whether the object has expired. Swift API documentation
    * stands that we should return 404 Not Found in such case. */
   if (need_object_expiration() && object_is_expired(orig_attrs)) {
-    ret = -ENOENT;
+    op_ret = -ENOENT;
     return;
   }
 
@@ -2680,14 +2781,14 @@ void RGWPutMetadataObject::execute()
   encode_delete_at_attr(delete_at, attrs);
 
   if (dlo_manifest) {
-    ret = encode_dlo_manifest_attr(dlo_manifest, attrs);
-    if (ret < 0) {
+    op_ret = encode_dlo_manifest_attr(dlo_manifest, attrs);
+    if (op_ret < 0) {
       ldout(s->cct, 0) << "bad user manifest: " << dlo_manifest << dendl;
       return;
     }
   }
 
-  ret = store->set_attrs(s->obj_ctx, obj, attrs, &rmattrs, NULL);
+  op_ret = store->set_attrs(s->obj_ctx, obj, attrs, &rmattrs, NULL);
 }
 
 int RGWDeleteObj::handle_slo_manifest(bufferlist& bl)
@@ -2760,20 +2861,20 @@ void RGWDeleteObj::pre_exec()
 
 void RGWDeleteObj::execute()
 {
-  ret = -EINVAL;
+  op_ret = -EINVAL;
   rgw_obj obj(s->bucket, s->object);
   map<string, bufferlist> attrs;
 
-  ret = get_params();
-  if (ret < 0) {
+  op_ret = get_params();
+  if (op_ret < 0) {
     return;
   }
 
   if (!s->object.empty()) {
     if (need_object_expiration() || multipart_delete) {
       /* check if obj exists, read orig attrs */
-      ret = get_obj_attrs(store, s, obj, attrs);
-      if (ret < 0) {
+      op_ret = get_obj_attrs(store, s, obj, attrs);
+      if (op_ret < 0) {
         return;
       }
     }
@@ -2782,12 +2883,12 @@ void RGWDeleteObj::execute()
       const auto slo_attr = attrs.find(RGW_ATTR_SLO_MANIFEST);
 
       if (slo_attr != attrs.end()) {
-        ret = handle_slo_manifest(slo_attr->second);
-        if (ret < 0) {
-          ldout(s->cct, 0) << "ERROR: failed to handle slo manifest ret=" << ret << dendl;
+        op_ret = handle_slo_manifest(slo_attr->second);
+        if (op_ret < 0) {
+          ldout(s->cct, 0) << "ERROR: failed to handle slo manifest ret=" << op_ret << dendl;
         }
       } else {
-        ret = -ERR_NOT_SLO_MANIFEST;
+        op_ret = -ERR_NOT_SLO_MANIFEST;
       }
 
       return;
@@ -2800,8 +2901,9 @@ void RGWDeleteObj::execute()
     RGWRados::Object del_target(store, s->bucket_info, *obj_ctx, obj);
     RGWRados::Object::Delete del_op(&del_target);
 
-    ret = get_system_versioning_params(s, &del_op.params.olh_epoch, &del_op.params.marker_version_id);
-    if (ret < 0) {
+    op_ret = get_system_versioning_params(s, &del_op.params.olh_epoch,
+					  &del_op.params.marker_version_id);
+    if (op_ret < 0) {
       return;
     }
 
@@ -2809,8 +2911,8 @@ void RGWDeleteObj::execute()
     del_op.params.versioning_status = s->bucket_info.versioning_status();
     del_op.params.obj_owner = s->owner;
 
-    ret = del_op.delete_obj();
-    if (ret >= 0) {
+    op_ret = del_op.delete_obj();
+    if (op_ret >= 0) {
       delete_marker = del_op.result.delete_marker;
       version_id = del_op.result.version_id;
     }
@@ -2818,7 +2920,7 @@ void RGWDeleteObj::execute()
     /* Check whether the object has expired. Swift API documentation
      * stands that we should return 404 Not Found in such case. */
     if (need_object_expiration() && object_is_expired(attrs)) {
-      ret = -ENOENT;
+      op_ret = -ENOENT;
       return;
     }
   }
@@ -2872,22 +2974,22 @@ bool RGWCopyObj::parse_copy_location(const string& url_src, string& bucket_name,
 int RGWCopyObj::verify_permission()
 {
   RGWAccessControlPolicy src_policy(s->cct);
-  ret = get_params();
-  if (ret < 0)
-    return ret;
+  op_ret = get_params();
+  if (op_ret < 0)
+    return op_ret;
 
-  ret = get_system_versioning_params(s, &olh_epoch, &version_id);
-  if (ret < 0) {
-    return ret;
+  op_ret = get_system_versioning_params(s, &olh_epoch, &version_id);
+  if (op_ret < 0) {
+    return op_ret;
   }
   map<string, bufferlist> src_attrs;
 
   RGWObjectCtx& obj_ctx = *static_cast<RGWObjectCtx *>(s->obj_ctx);
 
-  ret = store->get_bucket_info(obj_ctx, src_tenant_name, src_bucket_name,
-                               src_bucket_info, NULL, &src_attrs);
-  if (ret < 0)
-    return ret;
+  op_ret = store->get_bucket_info(obj_ctx, src_tenant_name, src_bucket_name,
+				  src_bucket_info, NULL, &src_attrs);
+  if (op_ret < 0)
+    return op_ret;
 
   src_bucket = src_bucket_info.bucket;
 
@@ -2898,9 +3000,10 @@ int RGWCopyObj::verify_permission()
     store->set_prefetch_data(s->obj_ctx, src_obj);
 
     /* check source object permissions */
-    ret = read_policy(store, s, src_bucket_info, src_attrs, &src_policy, src_bucket, src_object);
-    if (ret < 0)
-      return ret;
+    op_ret = read_policy(store, s, src_bucket_info, src_attrs, &src_policy,
+			 src_bucket, src_object);
+    if (op_ret < 0)
+      return op_ret;
 
     if (!s->system_request && /* system request overrides permission checks */
         !src_policy.verify_permission(s->user.user_id, s->perm_mask, RGW_PERM_READ))
@@ -2914,10 +3017,10 @@ int RGWCopyObj::verify_permission()
     dest_bucket_info = src_bucket_info;
     dest_attrs = src_attrs;
   } else {
-    ret = store->get_bucket_info(obj_ctx, dest_tenant_name, dest_bucket_name,
-                                 dest_bucket_info, NULL, &dest_attrs);
-    if (ret < 0)
-      return ret;
+    op_ret = store->get_bucket_info(obj_ctx, dest_tenant_name, dest_bucket_name,
+				    dest_bucket_info, NULL, &dest_attrs);
+    if (op_ret < 0)
+      return op_ret;
   }
 
   dest_bucket = dest_bucket_info.bucket;
@@ -2928,17 +3031,18 @@ int RGWCopyObj::verify_permission()
   rgw_obj_key no_obj;
 
   /* check dest bucket permissions */
-  ret = read_policy(store, s, dest_bucket_info, dest_attrs, &dest_bucket_policy, dest_bucket, no_obj);
-  if (ret < 0)
-    return ret;
+  op_ret = read_policy(store, s, dest_bucket_info, dest_attrs,
+		       &dest_bucket_policy, dest_bucket, no_obj);
+  if (op_ret < 0)
+    return op_ret;
 
   if (!s->system_request && /* system request overrides permission checks */
       !dest_bucket_policy.verify_permission(s->user.user_id, s->perm_mask, RGW_PERM_WRITE))
     return -EACCES;
 
-  ret = init_dest_policy();
-  if (ret < 0)
-    return ret;
+  op_ret = init_dest_policy();
+  if (op_ret < 0)
+    return op_ret;
 
   return 0;
 }
@@ -2948,16 +3052,16 @@ int RGWCopyObj::init_common()
 {
   if (if_mod) {
     if (parse_time(if_mod, &mod_time) < 0) {
-      ret = -EINVAL;
-      return ret;
+      op_ret = -EINVAL;
+      return op_ret;
     }
     mod_ptr = &mod_time;
   }
 
   if (if_unmod) {
     if (parse_time(if_unmod, &unmod_time) < 0) {
-      ret = -EINVAL;
-      return ret;
+      op_ret = -EINVAL;
+      return op_ret;
     }
     unmod_ptr = &unmod_time;
   }
@@ -3016,32 +3120,32 @@ void RGWCopyObj::execute()
 
   encode_delete_at_attr(delete_at, attrs);
 
-  ret = store->copy_obj(obj_ctx,
-                        s->user.user_id,
-                        client_id,
-                        op_id,
-                        &s->info,
-                        source_zone,
-                        dst_obj,
-                        src_obj,
-                        dest_bucket_info,
-                        src_bucket_info,
-                        &src_mtime,
-                        &mtime,
-                        mod_ptr,
-                        unmod_ptr,
-                        if_match,
-                        if_nomatch,
-                        attrs_mod,
-                        attrs, RGW_OBJ_CATEGORY_MAIN,
-                        olh_epoch,
-			delete_at,
-                        (version_id.empty() ? NULL : &version_id),
-                        &s->req_id, /* use req_id as tag */
-                        &etag,
-                        &s->err,
-                        copy_obj_progress_cb, (void *)this
-                        );
+  op_ret = store->copy_obj(obj_ctx,
+			   s->user.user_id,
+			   client_id,
+			   op_id,
+			   &s->info,
+			   source_zone,
+			   dst_obj,
+			   src_obj,
+			   dest_bucket_info,
+			   src_bucket_info,
+			   &src_mtime,
+			   &mtime,
+			   mod_ptr,
+			   unmod_ptr,
+			   if_match,
+			   if_nomatch,
+			   attrs_mod,
+			   attrs, RGW_OBJ_CATEGORY_MAIN,
+			   olh_epoch,
+			   delete_at,
+			   (version_id.empty() ? NULL : &version_id),
+			   &s->req_id, /* use req_id as tag */
+			   &etag,
+			   &s->err,
+			   copy_obj_progress_cb, (void *)this
+    );
 }
 
 int RGWGetACLs::verify_permission()
@@ -3104,10 +3208,10 @@ void RGWPutACLs::execute()
   char *new_data = NULL;
   rgw_obj obj;
 
-  ret = 0;
+  op_ret = 0; /* XXX redundant? */
 
   if (!parser.init()) {
-    ret = -EINVAL;
+    op_ret = -EINVAL;
     return;
   }
 
@@ -3116,20 +3220,20 @@ void RGWPutACLs::execute()
 
   owner = existing_policy->get_owner();
 
-  ret = get_params();
-  if (ret < 0)
+  op_ret = get_params();
+  if (op_ret < 0)
     return;
 
   ldout(s->cct, 15) << "read len=" << len << " data=" << (data ? data : "") << dendl;
 
   if (!s->canned_acl.empty() && len) {
-    ret = -EINVAL;
+    op_ret = -EINVAL;
     return;
   }
 
   if (!s->canned_acl.empty() || s->has_acl_header) {
-    ret = get_policy_from_state(store, s, ss);
-    if (ret < 0)
+    op_ret = get_policy_from_state(store, s, ss);
+    if (op_ret < 0)
       return;
 
     new_data = strdup(ss.str().c_str());
@@ -3139,12 +3243,12 @@ void RGWPutACLs::execute()
   }
 
   if (!parser.parse(data, len, 1)) {
-    ret = -EACCES;
+    op_ret = -EACCES;
     return;
   }
   policy = static_cast<RGWAccessControlPolicy_S3 *>(parser.find_first("AccessControlPolicy"));
   if (!policy) {
-    ret = -EINVAL;
+    op_ret = -EINVAL;
     return;
   }
 
@@ -3154,8 +3258,8 @@ void RGWPutACLs::execute()
     *_dout << dendl;
   }
 
-  ret = policy->rebuild(store, &owner, new_policy);
-  if (ret < 0)
+  op_ret = policy->rebuild(store, &owner, new_policy);
+  if (op_ret < 0)
     return;
 
   if (s->cct->_conf->subsys.should_gather(ceph_subsys_rgw, 15)) {
@@ -3173,17 +3277,17 @@ void RGWPutACLs::execute()
   store->set_atomic(s->obj_ctx, obj);
 
   if (!s->object.empty()) {
-    ret = get_obj_attrs(store, s, obj, attrs);
-    if (ret < 0)
+    op_ret = get_obj_attrs(store, s, obj, attrs);
+    if (op_ret < 0)
       return;
   }
   
   attrs[RGW_ATTR_ACL] = bl;
 
   if (!s->object.empty()) {
-    ret = store->set_attrs(s->obj_ctx, obj, attrs, NULL, ptracker);
+    op_ret = store->set_attrs(s->obj_ctx, obj, attrs, NULL, ptracker);
   } else {
-    ret = rgw_bucket_set_attrs(store, s->bucket_info, attrs, NULL, ptracker);
+    op_ret = rgw_bucket_set_attrs(store, s->bucket_info, attrs, NULL, ptracker);
   }
 }
 
@@ -3197,13 +3301,13 @@ int RGWGetCORS::verify_permission()
 
 void RGWGetCORS::execute()
 {
-  ret = read_bucket_cors();
-  if (ret < 0)
+  op_ret = read_bucket_cors();
+  if (op_ret < 0)
     return ;
 
   if (!cors_exist) {
     dout(2) << "No CORS configuration set yet for this bucket" << dendl;
-    ret = -ENOENT;
+    op_ret = -ENOENT;
     return;
   }
 }
@@ -3220,8 +3324,8 @@ void RGWPutCORS::execute()
 {
   rgw_obj obj;
 
-  ret = get_params();
-  if (ret < 0)
+  op_ret = get_params();
+  if (op_ret < 0)
     return;
 
   RGWObjVersionTracker *ptracker = (!s->object.empty() ? NULL : &s->bucket_info.objv_tracker);
@@ -3230,11 +3334,11 @@ void RGWPutCORS::execute()
   if (is_object_op) {
     store->get_bucket_instance_obj(s->bucket, obj);
     store->set_atomic(s->obj_ctx, obj);
-    ret = store->set_attr(s->obj_ctx, obj, RGW_ATTR_CORS, cors_bl, ptracker);
+    op_ret = store->set_attr(s->obj_ctx, obj, RGW_ATTR_CORS, cors_bl, ptracker);
   } else {
     map<string, bufferlist> attrs;
     attrs[RGW_ATTR_CORS] = cors_bl;
-    ret = rgw_bucket_set_attrs(store, s->bucket_info, attrs, NULL, ptracker);
+    op_ret = rgw_bucket_set_attrs(store, s->bucket_info, attrs, NULL, ptracker);
   }
 }
 
@@ -3248,15 +3352,15 @@ int RGWDeleteCORS::verify_permission()
 
 void RGWDeleteCORS::execute()
 {
-  ret = read_bucket_cors();
-  if (ret < 0)
+  op_ret = read_bucket_cors();
+  if (op_ret < 0)
     return;
 
   bufferlist bl;
   rgw_obj obj;
   if (!cors_exist) {
     dout(2) << "No CORS configuration set yet for this bucket" << dendl;
-    ret = -ENOENT;
+    op_ret = -ENOENT;
     return;
   }
   store->get_bucket_instance_obj(s->bucket, obj);
@@ -3270,13 +3374,13 @@ void RGWDeleteCORS::execute()
 
   if (is_object_op) {
     /* check if obj exists, read orig attrs */
-    ret = get_obj_attrs(store, s, obj, orig_attrs);
-    if (ret < 0)
+    op_ret = get_obj_attrs(store, s, obj, orig_attrs);
+    if (op_ret < 0)
       return;
   } else {
     ptracker = (!s->object.empty() ? NULL : &s->bucket_info.objv_tracker);
-    ret = get_system_obj_attrs(store, s, obj, orig_attrs, NULL, ptracker);
-    if (ret < 0)
+    op_ret = get_system_obj_attrs(store, s, obj, orig_attrs, NULL, ptracker);
+    if (op_ret < 0)
       return;
   }
 
@@ -3291,9 +3395,10 @@ void RGWDeleteCORS::execute()
     }
   }
   if (is_object_op) {
-    ret = store->set_attrs(s->obj_ctx, obj, attrs, &rmattrs, ptracker);
+    op_ret = store->set_attrs(s->obj_ctx, obj, attrs, &rmattrs, ptracker);
   } else {
-    ret = rgw_bucket_set_attrs(store, s->bucket_info, attrs, &rmattrs, ptracker);
+    op_ret = rgw_bucket_set_attrs(store, s->bucket_info, attrs, &rmattrs,
+				  ptracker);
   }
 }
 
@@ -3316,8 +3421,8 @@ int RGWOptionsCORS::validate_cors_request(RGWCORSConfiguration *cc) {
 
 void RGWOptionsCORS::execute()
 {
-  ret = read_bucket_cors();
-  if (ret < 0)
+  op_ret = read_bucket_cors();
+  if (op_ret < 0)
     return;
 
   origin = s->info.env->get("HTTP_ORIGIN");
@@ -3325,7 +3430,7 @@ void RGWOptionsCORS::execute()
     dout(0) <<
     "Preflight request without mandatory Origin header"
     << dendl;
-    ret = -EINVAL;
+    op_ret = -EINVAL;
     return;
   }
   req_meth = s->info.env->get("HTTP_ACCESS_CONTROL_REQUEST_METHOD");
@@ -3333,16 +3438,16 @@ void RGWOptionsCORS::execute()
     dout(0) <<
     "Preflight request without mandatory Access-control-request-method header"
     << dendl;
-    ret = -EINVAL;
+    op_ret = -EINVAL;
     return;
   }
   if (!cors_exist) {
     dout(2) << "No CORS configuration set yet for this bucket" << dendl;
-    ret = -ENOENT;
+    op_ret = -ENOENT;
     return;
   }
   req_hdrs = s->info.env->get("HTTP_ACCESS_CONTROL_REQUEST_HEADERS");
-  ret = validate_cors_request(&bucket_cors);
+  op_ret = validate_cors_request(&bucket_cors);
   if (!rule) {
     origin = req_meth = NULL;
     return;
@@ -3380,15 +3485,17 @@ void RGWSetRequestPayment::pre_exec()
 
 void RGWSetRequestPayment::execute()
 {
-  ret = get_params();
+  op_ret = get_params();
 
-  if (ret < 0)
+  if (op_ret < 0)
     return;
 
   s->bucket_info.requester_pays = requester_pays;
-  ret = store->put_bucket_instance_info(s->bucket_info, false, 0, &s->bucket_attrs);
-  if (ret < 0) {
-    ldout(s->cct, 0) << "NOTICE: put_bucket_info on bucket=" << s->bucket.name << " returned err=" << ret << dendl;
+  op_ret = store->put_bucket_instance_info(s->bucket_info, false, 0,
+					   &s->bucket_attrs);
+  if (op_ret < 0) {
+    ldout(s->cct, 0) << "NOTICE: put_bucket_info on bucket=" << s->bucket.name
+		     << " returned err=" << op_ret << dendl;
     return;
   }
 }
@@ -3415,7 +3522,7 @@ void RGWInitMultipart::execute()
 
   if (get_params() < 0)
     return;
-  ret = -EINVAL;
+  op_ret = -EINVAL;
   if (s->object.empty())
     return;
 
@@ -3455,12 +3562,14 @@ void RGWInitMultipart::execute()
     obj_op.meta.category = RGW_OBJ_CATEGORY_MULTIMETA;
     obj_op.meta.flags = PUT_OBJ_CREATE_EXCL;
 
-    ret = obj_op.write_meta(0, attrs);
-  } while (ret == -EEXIST);
+    op_ret = obj_op.write_meta(0, attrs);
+  } while (op_ret == -EEXIST);
 }
 
-static int get_multipart_info(RGWRados *store, struct req_state *s, string& meta_oid,
-                              RGWAccessControlPolicy *policy, map<string, bufferlist>& attrs)
+static int get_multipart_info(RGWRados *store, struct req_state *s,
+			      string& meta_oid,
+                              RGWAccessControlPolicy *policy,
+			      map<string, bufferlist>& attrs)
 {
   map<string, bufferlist>::iterator iter;
   bufferlist header;
@@ -3469,12 +3578,12 @@ static int get_multipart_info(RGWRados *store, struct req_state *s, string& meta
   obj.init_ns(s->bucket, meta_oid, mp_ns);
   obj.set_in_extra_data(true);
 
-  int ret = get_obj_attrs(store, s, obj, attrs);
-  if (ret < 0) {
-    if (ret == -ENOENT) {
+  int op_ret = get_obj_attrs(store, s, obj, attrs);
+  if (op_ret < 0) {
+    if (op_ret == -ENOENT) {
       return -ERR_NO_SUCH_UPLOAD;
     }
-    return ret;
+    return op_ret;
   }
 
   if (policy) {
@@ -3627,38 +3736,39 @@ void RGWCompleteMultipart::execute()
   uint64_t olh_epoch = 0;
   string version_id;
 
-  ret = get_params();
-  if (ret < 0)
+  op_ret = get_params();
+  if (op_ret < 0)
     return;
 
-  ret = get_system_versioning_params(s, &olh_epoch, &version_id);
-  if (ret < 0) {
+  op_ret = get_system_versioning_params(s, &olh_epoch, &version_id);
+  if (op_ret < 0) {
     return;
   }
 
   if (!data || !len) {
-    ret = -ERR_MALFORMED_XML;
+    op_ret = -ERR_MALFORMED_XML;
     return;
   }
 
   if (!parser.init()) {
-    ret = -EIO;
+    op_ret = -EIO;
     return;
   }
 
   if (!parser.parse(data, len, 1)) {
-    ret = -ERR_MALFORMED_XML;
+    op_ret = -ERR_MALFORMED_XML;
     return;
   }
 
   parts = static_cast<RGWMultiCompleteUpload *>(parser.find_first("CompleteMultipartUpload"));
   if (!parts || parts->parts.empty()) {
-    ret = -ERR_MALFORMED_XML;
+    op_ret = -ERR_MALFORMED_XML;
     return;
   }
 
-  if ((int)parts->parts.size() > s->cct->_conf->rgw_multipart_part_upload_limit) {
-    ret = -ERANGE;
+  if ((int)parts->parts.size() >
+      s->cct->_conf->rgw_multipart_part_upload_limit) {
+    op_ret = -ERANGE;
     return;
   }
 
@@ -3683,23 +3793,25 @@ void RGWCompleteMultipart::execute()
   meta_obj.set_in_extra_data(true);
   meta_obj.index_hash_source = s->object.name;
 
-  ret = get_obj_attrs(store, s, meta_obj, attrs);
-  if (ret < 0) {
-    ldout(s->cct, 0) << "ERROR: failed to get obj attrs, obj=" << meta_obj << " ret=" << ret << dendl;
+  op_ret = get_obj_attrs(store, s, meta_obj, attrs);
+  if (op_ret < 0) {
+    ldout(s->cct, 0) << "ERROR: failed to get obj attrs, obj=" << meta_obj
+		     << " ret=" << op_ret << dendl;
     return;
   }
 
   do {
-    ret = list_multipart_parts(store, s, upload_id, meta_oid, max_parts, marker, obj_parts, &marker, &truncated);
-    if (ret == -ENOENT) {
-      ret = -ERR_NO_SUCH_UPLOAD;
+    op_ret = list_multipart_parts(store, s, upload_id, meta_oid, max_parts,
+				  marker, obj_parts, &marker, &truncated);
+    if (op_ret == -ENOENT) {
+      op_ret = -ERR_NO_SUCH_UPLOAD;
     }
-    if (ret < 0)
+    if (op_ret < 0)
       return;
 
     total_parts += obj_parts.size();
     if (!truncated && total_parts != (int)parts->parts.size()) {
-      ret = -ERR_INVALID_PART;
+      op_ret = -ERR_INVALID_PART;
       return;
     }
 
@@ -3707,24 +3819,28 @@ void RGWCompleteMultipart::execute()
       uint64_t part_size = obj_iter->second.size;
       if (handled_parts < (int)parts->parts.size() - 1 &&
           part_size < min_part_size) {
-        ret = -ERR_TOO_SMALL;
+        op_ret = -ERR_TOO_SMALL;
         return;
       }
 
       char petag[CEPH_CRYPTO_MD5_DIGESTSIZE];
       if (iter->first != (int)obj_iter->first) {
-        ldout(s->cct, 0) << "NOTICE: parts num mismatch: next requested: " << iter->first << " next uploaded: " << obj_iter->first << dendl;
-        ret = -ERR_INVALID_PART;
+        ldout(s->cct, 0) << "NOTICE: parts num mismatch: next requested: "
+			 << iter->first << " next uploaded: "
+			 << obj_iter->first << dendl;
+        op_ret = -ERR_INVALID_PART;
         return;
       }
       string part_etag = rgw_string_unquote(iter->second);
       if (part_etag.compare(obj_iter->second.etag) != 0) {
-        ldout(s->cct, 0) << "NOTICE: etag mismatch: part: " << iter->first << " etag: " << iter->second << dendl;
-        ret = -ERR_INVALID_PART;
+        ldout(s->cct, 0) << "NOTICE: etag mismatch: part: " << iter->first
+			 << " etag: " << iter->second << dendl;
+        op_ret = -ERR_INVALID_PART;
         return;
       }
 
-      hex_to_buf(obj_iter->second.etag.c_str(), petag, CEPH_CRYPTO_MD5_DIGESTSIZE);
+      hex_to_buf(obj_iter->second.etag.c_str(), petag,
+		CEPH_CRYPTO_MD5_DIGESTSIZE);
       hash.Update((const byte *)petag, sizeof(petag));
 
       RGWUploadPartInfo& obj_part = obj_iter->second;
@@ -3735,8 +3851,9 @@ void RGWCompleteMultipart::execute()
       src_obj.init_ns(s->bucket, oid, mp_ns);
 
       if (obj_part.manifest.empty()) {
-        ldout(s->cct, 0) << "ERROR: empty manifest for object part: obj=" << src_obj << dendl;
-        ret = -ERR_INVALID_PART;
+        ldout(s->cct, 0) << "ERROR: empty manifest for object part: obj="
+			 << src_obj << dendl;
+        op_ret = -ERR_INVALID_PART;
         return;
       } else {
         manifest.append(obj_part.manifest);
@@ -3781,12 +3898,13 @@ void RGWCompleteMultipart::execute()
   obj_op.meta.owner = s->owner.get_id();
   obj_op.meta.flags = PUT_OBJ_CREATE;
 
-  ret = obj_op.write_meta(ofs, attrs);
-  if (ret < 0)
+  op_ret = obj_op.write_meta(ofs, attrs);
+  if (op_ret < 0)
     return;
 
   // remove the upload obj
-  int r = store->delete_obj(*static_cast<RGWObjectCtx *>(s->obj_ctx), s->bucket_info, meta_obj, 0);
+  int r = store->delete_obj(*static_cast<RGWObjectCtx *>(s->obj_ctx),
+			    s->bucket_info, meta_obj, 0);
   if (r < 0) {
     ldout(store->ctx(), 0) << "WARNING: failed to remove object " << meta_obj << dendl;
   }
@@ -3807,7 +3925,7 @@ void RGWAbortMultipart::pre_exec()
 
 void RGWAbortMultipart::execute()
 {
-  ret = -EINVAL;
+  op_ret = -EINVAL;
   string upload_id;
   string meta_oid;
   upload_id = s->info.args.get("uploadId");
@@ -3823,8 +3941,8 @@ void RGWAbortMultipart::execute()
   mp.init(s->object.name, upload_id);
   meta_oid = mp.get_meta();
 
-  ret = get_multipart_info(store, s, meta_oid, NULL, attrs);
-  if (ret < 0)
+  op_ret = get_multipart_info(store, s, meta_oid, NULL, attrs);
+  if (op_ret < 0)
     return;
 
   bool truncated;
@@ -3842,11 +3960,13 @@ void RGWAbortMultipart::execute()
   list<rgw_obj_key> remove_objs;
 
   do {
-    ret = list_multipart_parts(store, s, upload_id, meta_oid, max_parts, marker, obj_parts, &marker, &truncated);
-    if (ret < 0)
+    op_ret = list_multipart_parts(store, s, upload_id, meta_oid, max_parts,
+				  marker, obj_parts, &marker, &truncated);
+    if (op_ret < 0)
       return;
 
-    for (obj_iter = obj_parts.begin(); obj_iter != obj_parts.end(); ++obj_iter) {
+    for (obj_iter = obj_parts.begin();
+	 obj_iter != obj_parts.end(); ++obj_iter) {
       RGWUploadPartInfo& obj_part = obj_iter->second;
 
       if (obj_part.manifest.empty()) {
@@ -3854,8 +3974,8 @@ void RGWAbortMultipart::execute()
         rgw_obj obj;
         obj.init_ns(s->bucket, oid, mp_ns);
         obj.index_hash_source = s->object.name;
-        ret = store->delete_obj(*obj_ctx, s->bucket_info, obj, 0);
-        if (ret < 0 && ret != -ENOENT)
+        op_ret = store->delete_obj(*obj_ctx, s->bucket_info, obj, 0);
+        if (op_ret < 0 && op_ret != -ENOENT)
           return;
       } else {
         store->update_gc_chain(meta_obj, obj_part.manifest, &chain);
@@ -3871,9 +3991,9 @@ void RGWAbortMultipart::execute()
   } while (truncated);
 
   /* use upload id as tag */
-  ret = store->send_chain_to_gc(chain, upload_id , false);  // do it async
-  if (ret < 0) {
-    ldout(store->ctx(), 5) << "gc->send_chain() returned " << ret << dendl;
+  op_ret = store->send_chain_to_gc(chain, upload_id , false);  // do it async
+  if (op_ret < 0) {
+    ldout(store->ctx(), 5) << "gc->send_chain() returned " << op_ret << dendl;
     return;
   }
 
@@ -3887,9 +4007,9 @@ void RGWAbortMultipart::execute()
   }
 
   // and also remove the metadata obj
-  ret = del_op.delete_obj();
-  if (ret == -ENOENT) {
-    ret = -ERR_NO_SUCH_BUCKET;
+  op_ret = del_op.delete_obj();
+  if (op_ret == -ENOENT) {
+    op_ret = -ERR_NO_SUCH_BUCKET;
   }
 }
 
@@ -3912,18 +4032,19 @@ void RGWListMultipart::execute()
   string meta_oid;
   RGWMPObj mp;
 
-  ret = get_params();
-  if (ret < 0)
+  op_ret = get_params();
+  if (op_ret < 0)
     return;
 
   mp.init(s->object.name, upload_id);
   meta_oid = mp.get_meta();
 
-  ret = get_multipart_info(store, s, meta_oid, &policy, xattrs);
-  if (ret < 0)
+  op_ret = get_multipart_info(store, s, meta_oid, &policy, xattrs);
+  if (op_ret < 0)
     return;
 
-  ret = list_multipart_parts(store, s, upload_id, meta_oid, max_parts, marker, parts, NULL, &truncated);
+  op_ret = list_multipart_parts(store, s, upload_id, meta_oid, max_parts,
+				marker, parts, NULL, &truncated);
 }
 
 int RGWListBucketMultiparts::verify_permission()
@@ -3944,8 +4065,8 @@ void RGWListBucketMultiparts::execute()
   vector<RGWObjEnt> objs;
   string marker_meta;
 
-  ret = get_params();
-  if (ret < 0)
+  op_ret = get_params();
+  if (op_ret < 0)
     return;
 
   if (s->prot_flags & RGW_REST_SWIFT) {
@@ -3953,7 +4074,7 @@ void RGWListBucketMultiparts::execute()
     path_args = s->info.args.get("path");
     if (!path_args.empty()) {
       if (!delimiter.empty() || !prefix.empty()) {
-        ret = -EINVAL;
+        op_ret = -EINVAL;
         return;
       }
       prefix = path_args;
@@ -3971,7 +4092,8 @@ void RGWListBucketMultiparts::execute()
   list_op.params.ns = mp_ns;
   list_op.params.filter = &mp_filter;
 
-  ret = list_op.list_objects(max_uploads, &objs, &common_prefixes, &is_truncated);
+  op_ret = list_op.list_objects(max_uploads, &objs, &common_prefixes,
+				&is_truncated);
   if (!objs.empty()) {
     vector<RGWObjEnt>::iterator iter;
     RGWMultipartUploadEntry entry;
@@ -4007,29 +4129,29 @@ void RGWDeleteMultiObj::execute()
   int num_processed = 0;
   RGWObjectCtx *obj_ctx = static_cast<RGWObjectCtx *>(s->obj_ctx);
 
-  ret = get_params();
-  if (ret < 0) {
+  op_ret = get_params();
+  if (op_ret < 0) {
     goto error;
   }
 
   if (!data) {
-    ret = -EINVAL;
+    op_ret = -EINVAL;
     goto error;
   }
 
   if (!parser.init()) {
-    ret = -EINVAL;
+    op_ret = -EINVAL;
     goto error;
   }
 
   if (!parser.parse(data, len, 1)) {
-    ret = -EINVAL;
+    op_ret = -EINVAL;
     goto error;
   }
 
   multi_delete = static_cast<RGWMultiDelDelete *>(parser.find_first("Delete"));
   if (!multi_delete) {
-    ret = -EINVAL;
+    op_ret = -EINVAL;
     goto error;
   }
 
@@ -4055,17 +4177,18 @@ void RGWDeleteMultiObj::execute()
     del_op.params.versioning_status = s->bucket_info.versioning_status();
     del_op.params.obj_owner = s->owner;
 
-    ret = del_op.delete_obj();
-    if (ret == -ENOENT) {
-      ret = 0;
+    op_ret = del_op.delete_obj();
+    if (op_ret == -ENOENT) {
+      op_ret = 0;
     }
 
-    send_partial_response(*iter, del_op.result.delete_marker, del_op.result.version_id, ret);
+    send_partial_response(*iter, del_op.result.delete_marker,
+			  del_op.result.version_id, op_ret);
   }
 
   /*  set the return code to zero, errors at this point will be
   dumped to the response */
-  ret = 0;
+  op_ret = 0;
 
 done:
   // will likely segfault if begin_response() has not been called
@@ -4085,11 +4208,9 @@ bool RGWBulkDelete::Deleter::verify_permission(RGWBucketInfo& binfo,
                                                rgw_obj& obj,
                                                ACLOwner& bucket_owner /* out */)
 {
-  int ret = 0;
-
   RGWAccessControlPolicy bacl(store->ctx());
   rgw_obj_key no_obj;
-  ret = read_policy(store, s, binfo, battrs, &bacl, binfo.bucket, no_obj);
+  int ret = read_policy(store, s, binfo, battrs, &bacl, binfo.bucket, no_obj);
   if (ret < 0) {
     return false;
   }
@@ -4108,11 +4229,9 @@ bool RGWBulkDelete::Deleter::verify_permission(RGWBucketInfo& binfo,
 bool RGWBulkDelete::Deleter::verify_permission(RGWBucketInfo& binfo,
                                                map<string, bufferlist>& battrs)
 {
-  int ret = 0;
-
   RGWAccessControlPolicy bacl(store->ctx());
   rgw_obj_key no_obj;
-  ret = read_policy(store, s, binfo, battrs, &bacl, binfo.bucket, no_obj);
+  int ret = read_policy(store, s, binfo, battrs, &bacl, binfo.bucket, no_obj);
   if (ret < 0) {
     return false;
   }
@@ -4122,13 +4241,12 @@ bool RGWBulkDelete::Deleter::verify_permission(RGWBucketInfo& binfo,
 
 bool RGWBulkDelete::Deleter::delete_single(const acct_path_t& path)
 {
-  int ret = 0;
   auto& obj_ctx = *static_cast<RGWObjectCtx *>(s->obj_ctx);
 
   RGWBucketInfo binfo;
   map<string, bufferlist> battrs;
-  ret = store->get_bucket_info(obj_ctx, s->user.user_id.tenant,
-      path.bucket_name, binfo, NULL, &battrs);
+  int ret = store->get_bucket_info(obj_ctx, s->user.user_id.tenant,
+				   path.bucket_name, binfo, NULL, &battrs);
   if (ret < 0) {
     goto binfo_fail;
   }
@@ -4165,9 +4283,11 @@ bool RGWBulkDelete::Deleter::delete_single(const acct_path_t& path)
 
     ret = store->delete_bucket(binfo.bucket, ot);
     if (0 == ret) {
-      ret = rgw_unlink_bucket(store, binfo.owner, binfo.bucket.tenant, binfo.bucket.name, false);
+      ret = rgw_unlink_bucket(store, binfo.owner, binfo.bucket.tenant,
+			      binfo.bucket.name, false);
       if (ret < 0) {
-        ldout(s->cct, 0) << "WARNING: failed to unlink bucket: ret=" << ret << dendl;
+        ldout(s->cct, 0) << "WARNING: failed to unlink bucket: ret=" << ret
+			 << dendl;
       }
     }
     if (ret < 0) {
@@ -4177,10 +4297,12 @@ bool RGWBulkDelete::Deleter::delete_single(const acct_path_t& path)
     if (!store->region.is_master) {
       bufferlist in_data;
       JSONParser jp;
-      ret = forward_request_to_master(s, &ot.read_version, store, in_data, &jp);
+      ret = forward_request_to_master(s, &ot.read_version, store, in_data,
+				      &jp);
       if (ret < 0) {
-        if (ret == -ENOENT) { /* adjust error,
-                               we want to return with NoSuchBucket and not NoSuchKey */
+        if (ret == -ENOENT) {
+          /* adjust error, we want to return with NoSuchBucket and not
+	   * NoSuchKey */
           ret = -ERR_NO_SUCH_BUCKET;
         }
         goto delop_fail;
@@ -4197,7 +4319,8 @@ binfo_fail:
       ldout(store->ctx(), 20) << "cannot find bucket = " << path.bucket_name << dendl;
       num_unfound++;
     } else {
-      ldout(store->ctx(), 20) << "cannot get bucket info, ret = " << ret << dendl;
+      ldout(store->ctx(), 20) << "cannot get bucket info, ret = " << ret
+			      << dendl;
 
       fail_desc_t failed_item = {
         .err  = ret,
@@ -4267,7 +4390,7 @@ void RGWBulkDelete::execute()
     }
 
     ret = deleter->delete_chunk(items);
-  } while (!ret && is_truncated);
+  } while (!op_ret && is_truncated);
 
   return;
 }
@@ -4284,12 +4407,29 @@ int RGWHandler::init(RGWRados *_store, struct req_state *_s, RGWClientIO *cio)
   return 0;
 }
 
+int RGWHandler::do_init_permissions()
+{
+  int ret = rgw_build_bucket_policies(store, s);
+
+  if (ret < 0) {
+    ldout(s->cct, 10) << "read_permissions on " << s->bucket << " ret=" << ret << dendl;
+    if (ret == -ENODATA)
+      ret = -EACCES;
+  }
+
+  return ret;
+}
+
 int RGWHandler::do_read_permissions(RGWOp *op, bool only_bucket)
 {
-  int ret = rgw_build_policies(store, s, only_bucket, op->prefetch_data());
+  if (only_bucket) {
+    /* already read bucket info */
+    return 0;
+  }
+  int ret = rgw_build_object_policies(store, s, op->prefetch_data());
 
   if (ret < 0) {
-    ldout(s->cct, 10) << "read_permissions on " << s->bucket << ":" <<s->object << " only_bucket=" << only_bucket << " ret=" << ret << dendl;
+    ldout(s->cct, 10) << "read_permissions on " << s->bucket << ":" << s->object << " ret=" << ret << dendl;
     if (ret == -ENODATA)
       ret = -EACCES;
   }
@@ -4338,3 +4478,11 @@ void RGWHandler::put_op(RGWOp *op)
   delete op;
 }
 
+int RGWOp::error_handler(int err_no, string *error_content) {
+  return dialect_handler->error_handler(err_no, error_content);
+}
+
+int RGWHandler::error_handler(int err_no, string *error_content) {
+  // This is the do-nothing error handler
+  return err_no;
+}
diff --git a/src/rgw/rgw_op.h b/src/rgw/rgw_op.h
index 880c634..254fbd4 100644
--- a/src/rgw/rgw_op.h
+++ b/src/rgw/rgw_op.h
@@ -40,6 +40,8 @@ enum RGWOpType {
   RGW_OP_GET_BUCKET_LOGGING,
   RGW_OP_GET_BUCKET_VERSIONING,
   RGW_OP_SET_BUCKET_VERSIONING,
+  RGW_OP_GET_BUCKET_WEBSITE,
+  RGW_OP_SET_BUCKET_WEBSITE,
   RGW_OP_STAT_BUCKET,
   RGW_OP_CREATE_BUCKET,
   RGW_OP_DELETE_BUCKET,
@@ -80,16 +82,20 @@ protected:
   bool cors_exist;
   RGWQuotaInfo bucket_quota;
   RGWQuotaInfo user_quota;
+  int op_ret;
 
   virtual int init_quota();
 public:
-  RGWOp() : s(NULL), dialect_handler(NULL), store(NULL), cors_exist(false) {}
+RGWOp() : s(NULL), dialect_handler(NULL), store(NULL), cors_exist(false),
+    op_ret(0) {}
   virtual ~RGWOp() {}
 
+  int get_ret() const { return op_ret; }
+
   virtual int init_processing() {
-    int ret = init_quota();
-    if (ret < 0)
-      return ret;
+    op_ret = init_quota();
+    if (op_ret < 0)
+      return op_ret;
 
     return 0;
   }
@@ -116,6 +122,8 @@ public:
   virtual RGWOpType get_type() { return RGW_OP_UNKNOWN; }
 
   virtual uint32_t op_mask() { return 0; }
+
+  virtual int error_handler(int err_no, string *error_content);
 };
 
 class RGWGetObj : public RGWOp {
@@ -135,7 +143,6 @@ protected:
   time_t *mod_ptr;
   time_t *unmod_ptr;
   map<string, bufferlist> attrs;
-  int ret;
   bool get_data;
   bool partial_content;
   bool range_parsed;
@@ -165,7 +172,6 @@ public:
     partial_content = false;
     range_parsed = false;
     skip_manifest = false;
-    ret = 0;
     is_slo = false;
  }
 
@@ -197,6 +203,18 @@ public:
   virtual bool need_object_expiration() { return false; }
 };
 
+class RGWGetObj_CB : public RGWGetDataCB
+{
+  RGWGetObj *op;
+public:
+  explicit RGWGetObj_CB(RGWGetObj *_op) : op(_op) {}
+  virtual ~RGWGetObj_CB() {}
+
+  int handle_data(bufferlist& bl, off_t bl_ofs, off_t bl_len) {
+    return op->get_data_cb(bl, bl_ofs, bl_len);
+  }
+};
+
 class RGWBulkDelete : public RGWOp {
 public:
   struct acct_path_t {
@@ -252,13 +270,11 @@ public:
   static const size_t MAX_CHUNK_ENTRIES = 1024;
 
 protected:
-  int ret;
   std::unique_ptr<Deleter> deleter;
 
 public:
   RGWBulkDelete()
-    : ret(0),
-      deleter(nullptr) {
+    : deleter(nullptr) {
   }
 
   int verify_permission();
@@ -282,7 +298,6 @@ inline ostream& operator<<(ostream& out, const RGWBulkDelete::acct_path_t &o) {
 
 class RGWListBuckets : public RGWOp {
 protected:
-  int ret;
   bool sent_data;
   string marker;
   string end_marker;
@@ -295,7 +310,7 @@ protected:
   map<string, bufferlist> attrs;
 
 public:
-  RGWListBuckets() : ret(0), sent_data(false) {
+  RGWListBuckets() : sent_data(false) {
     limit = limit_max = RGW_LIST_BUCKETS_LIMIT_MAX;
     buckets_count = 0;
     buckets_objcount = 0;
@@ -322,7 +337,6 @@ public:
 
 class RGWStatAccount : public RGWOp {
 protected:
-  int ret;
   uint32_t buckets_count;
   uint64_t buckets_objcount;
   uint64_t buckets_size;
@@ -330,7 +344,6 @@ protected:
 
 public:
   RGWStatAccount() {
-    ret = 0;
     buckets_count = 0;
     buckets_objcount = 0;
     buckets_size = 0;
@@ -358,7 +371,6 @@ protected:
   string encoding_type;
   bool list_versions;
   int max;
-  int ret;
   vector<RGWObjEnt> objs;
   map<string, bool> common_prefixes;
 
@@ -368,7 +380,7 @@ protected:
   int parse_max_keys();
 
 public:
-  RGWListBucket() : list_versions(false), max(0), ret(0),
+  RGWListBucket() : list_versions(false), max(0),
                     default_max(0), is_truncated(false) {}
   int verify_permission();
   void pre_exec();
@@ -386,7 +398,7 @@ class RGWGetBucketLogging : public RGWOp {
 public:
   RGWGetBucketLogging() {}
   int verify_permission();
-  void execute() {}
+  void execute() { }
 
   virtual void send_response() = 0;
   virtual const string name() { return "get_bucket_logging"; }
@@ -399,7 +411,7 @@ public:
   RGWGetBucketLocation() {}
   ~RGWGetBucketLocation() {}
   int verify_permission();
-  void execute() {}
+  void execute() { }
 
   virtual void send_response() = 0;
   virtual const string name() { return "get_bucket_location"; }
@@ -426,9 +438,8 @@ public:
 class RGWSetBucketVersioning : public RGWOp {
 protected:
   bool enable_versioning;
-  int ret;
 public:
-  RGWSetBucketVersioning() : enable_versioning(false), ret(0) {}
+  RGWSetBucketVersioning() : enable_versioning(false) {}
 
   int verify_permission();
   void pre_exec();
@@ -442,13 +453,58 @@ public:
   virtual uint32_t op_mask() { return RGW_OP_TYPE_WRITE; }
 };
 
+class RGWGetBucketWebsite : public RGWOp {
+public:
+  RGWGetBucketWebsite() {}
+
+  int verify_permission();
+  void pre_exec();
+  void execute();
+
+  virtual void send_response() = 0;
+  virtual const string name() { return "get_bucket_website"; }
+  virtual RGWOpType get_type() { return RGW_OP_GET_BUCKET_WEBSITE; }
+  virtual uint32_t op_mask() { return RGW_OP_TYPE_READ; }
+};
+
+class RGWSetBucketWebsite : public RGWOp {
+protected:
+  RGWBucketWebsiteConf website_conf;
+public:
+  RGWSetBucketWebsite() {}
+
+  int verify_permission();
+  void pre_exec();
+  void execute();
+
+  virtual int get_params() { return 0; }
+
+  virtual void send_response() = 0;
+  virtual const string name() { return "set_bucket_website"; }
+  virtual RGWOpType get_type() { return RGW_OP_SET_BUCKET_WEBSITE; }
+  virtual uint32_t op_mask() { return RGW_OP_TYPE_WRITE; }
+};
+
+class RGWDeleteBucketWebsite : public RGWOp {
+public:
+  RGWDeleteBucketWebsite() {}
+
+  int verify_permission();
+  void pre_exec();
+  void execute();
+
+  virtual void send_response() = 0;
+  virtual const string name() { return "delete_bucket_website"; }
+  virtual RGWOpType get_type() { return RGW_OP_SET_BUCKET_WEBSITE; }
+  virtual uint32_t op_mask() { return RGW_OP_TYPE_WRITE; }
+};
+
 class RGWStatBucket : public RGWOp {
 protected:
-  int ret;
   RGWBucketEnt bucket;
 
 public:
-  RGWStatBucket() : ret(0) {}
+  RGWStatBucket() {}
   ~RGWStatBucket() {}
 
   int verify_permission();
@@ -463,7 +519,6 @@ public:
 
 class RGWCreateBucket : public RGWOp {
 protected:
-  int ret;
   RGWAccessControlPolicy policy;
   string location_constraint;
   string placement_rule;
@@ -475,7 +530,7 @@ protected:
   bufferlist in_data;
 
 public:
-  RGWCreateBucket() : ret(0), has_cors(false) {}
+  RGWCreateBucket() : has_cors(false) {}
 
   int verify_permission();
   void pre_exec();
@@ -493,12 +548,10 @@ public:
 
 class RGWDeleteBucket : public RGWOp {
 protected:
-  int ret;
-
   RGWObjVersionTracker objv_tracker;
 
 public:
-  RGWDeleteBucket() : ret(0) {}
+  RGWDeleteBucket() {}
 
   int verify_permission();
   void pre_exec();
@@ -571,7 +624,6 @@ class RGWPutObj : public RGWOp {
   friend class RGWPutObjProcessor;
 
 protected:
-  int ret;
   off_t ofs;
   const char *supplied_md5_b64;
   const char *supplied_etag;
@@ -590,7 +642,7 @@ protected:
   time_t delete_at;
 
 public:
-  RGWPutObj() : ret(0), ofs(0),
+  RGWPutObj() : ofs(0),
                 supplied_md5_b64(NULL),
                 supplied_etag(NULL),
                 if_match(NULL),
@@ -633,7 +685,6 @@ class RGWPostObj : public RGWOp {
 protected:
   off_t min_len;
   off_t max_len;
-  int ret;
   int len;
   off_t ofs;
   const char *supplied_md5_b64;
@@ -647,7 +698,7 @@ protected:
   time_t delete_at;
 
 public:
-  RGWPostObj() : min_len(0), max_len(LLONG_MAX), ret(0), len(0), ofs(0),
+  RGWPostObj() : min_len(0), max_len(LLONG_MAX), len(0), ofs(0),
 		 supplied_md5_b64(NULL), supplied_etag(NULL),
 		 data_pending(false), delete_at(0) {}
 
@@ -673,21 +724,18 @@ public:
 
 class RGWPutMetadataAccount : public RGWOp {
 protected:
-  int ret;
   set<string> rmattr_names;
   RGWAccessControlPolicy policy;
 
 public:
-  RGWPutMetadataAccount()
-    : ret(0)
-  {}
+  RGWPutMetadataAccount() {}
 
   virtual void init(RGWRados *store, struct req_state *s, RGWHandler *h) {
     RGWOp::init(store, s, h);
     policy.set_ctx(s->cct);
   }
   int verify_permission();
-  void pre_exec() { return; }
+  void pre_exec() { }
   void execute();
 
   virtual int get_params() = 0;
@@ -703,7 +751,6 @@ public:
 
 class RGWPutMetadataBucket : public RGWOp {
 protected:
-  int ret;
   set<string> rmattr_names;
   bool has_policy, has_cors;
   RGWAccessControlPolicy policy;
@@ -712,7 +759,7 @@ protected:
 
 public:
   RGWPutMetadataBucket()
-    : ret(0), has_policy(false), has_cors(false)
+    : has_policy(false), has_cors(false)
   {}
 
   virtual void init(RGWRados *store, struct req_state *s, RGWHandler *h) {
@@ -732,7 +779,6 @@ public:
 
 class RGWPutMetadataObject : public RGWOp {
 protected:
-  int ret;
   RGWAccessControlPolicy policy;
   string placement_rule;
   time_t delete_at;
@@ -740,8 +786,7 @@ protected:
 
 public:
   RGWPutMetadataObject()
-    : ret(0),
-      delete_at(0),
+    : delete_at(0),
       dlo_manifest(NULL)
   {}
 
@@ -763,7 +808,6 @@ public:
 
 class RGWDeleteObj : public RGWOp {
 protected:
-  int ret;
   bool delete_marker;
   bool multipart_delete;
   string version_id;
@@ -771,8 +815,7 @@ protected:
 
 public:
   RGWDeleteObj()
-    : ret(0),
-      delete_marker(false),
+    : delete_marker(false),
       multipart_delete(false),
       deleter(nullptr) {
   }
@@ -804,7 +847,6 @@ protected:
   time_t unmod_time;
   time_t *mod_ptr;
   time_t *unmod_ptr;
-  int ret;
   map<string, bufferlist> attrs;
   string src_tenant_name, src_bucket_name;
   rgw_bucket src_bucket;
@@ -844,7 +886,6 @@ public:
     unmod_time = 0;
     mod_ptr = NULL;
     unmod_ptr = NULL;
-    ret = 0;
     src_mtime = 0;
     mtime = 0;
     attrs_mod = RGWRados::ATTRSMOD_NONE;
@@ -877,11 +918,10 @@ public:
 
 class RGWGetACLs : public RGWOp {
 protected:
-  int ret;
   string acls;
 
 public:
-  RGWGetACLs() : ret(0) {}
+  RGWGetACLs() {}
 
   int verify_permission();
   void pre_exec();
@@ -895,14 +935,12 @@ public:
 
 class RGWPutACLs : public RGWOp {
 protected:
-  int ret;
   size_t len;
   char *data;
   ACLOwner owner;
 
 public:
   RGWPutACLs() {
-    ret = 0;
     len = 0;
     data = NULL;
   }
@@ -924,10 +962,9 @@ public:
 
 class RGWGetCORS : public RGWOp {
 protected:
-  int ret;
 
 public:
-  RGWGetCORS() : ret(0) {}
+  RGWGetCORS() {}
 
   int verify_permission();
   void execute();
@@ -940,14 +977,11 @@ public:
 
 class RGWPutCORS : public RGWOp {
 protected:
-  int ret;
   bufferlist cors_bl;
 
 public:
-  RGWPutCORS() {
-    ret = 0;
-  }
-  virtual ~RGWPutCORS() { }
+  RGWPutCORS() {}
+  virtual ~RGWPutCORS() {}
 
   int verify_permission();
   void execute();
@@ -961,10 +995,9 @@ public:
 
 class RGWDeleteCORS : public RGWOp {
 protected:
-  int ret;
 
 public:
-  RGWDeleteCORS() : ret(0) {}
+  RGWDeleteCORS() {}
 
   int verify_permission();
   void execute();
@@ -977,12 +1010,11 @@ public:
 
 class RGWOptionsCORS : public RGWOp {
 protected:
-  int ret;
   RGWCORSRule *rule;
   const char *origin, *req_hdrs, *req_meth;
 
 public:
-  RGWOptionsCORS() : ret(0), rule(NULL), origin(NULL),
+  RGWOptionsCORS() : rule(NULL), origin(NULL),
                      req_hdrs(NULL), req_meth(NULL) {
   }
 
@@ -1016,9 +1048,8 @@ public:
 class RGWSetRequestPayment : public RGWOp {
 protected:
   bool requester_pays;
-  int ret;
 public:
- RGWSetRequestPayment() : requester_pays(false), ret(0) {}
+ RGWSetRequestPayment() : requester_pays(false) {}
 
   int verify_permission();
   void pre_exec();
@@ -1034,14 +1065,11 @@ public:
 
 class RGWInitMultipart : public RGWOp {
 protected:
-  int ret;
   string upload_id;
   RGWAccessControlPolicy policy;
 
 public:
-  RGWInitMultipart() {
-    ret = 0;
-  }
+  RGWInitMultipart() {}
 
   virtual void init(RGWRados *store, struct req_state *s, RGWHandler *h) {
     RGWOp::init(store, s, h);
@@ -1060,7 +1088,6 @@ public:
 
 class RGWCompleteMultipart : public RGWOp {
 protected:
-  int ret;
   string upload_id;
   string etag;
   char *data;
@@ -1068,7 +1095,6 @@ protected:
 
 public:
   RGWCompleteMultipart() {
-    ret = 0;
     data = NULL;
     len = 0;
   }
@@ -1088,11 +1114,8 @@ public:
 };
 
 class RGWAbortMultipart : public RGWOp {
-protected:
-  int ret;
-
 public:
-  RGWAbortMultipart() : ret(0) {}
+  RGWAbortMultipart() {}
 
   int verify_permission();
   void pre_exec();
@@ -1106,7 +1129,6 @@ public:
 
 class RGWListMultipart : public RGWOp {
 protected:
-  int ret;
   string upload_id;
   map<uint32_t, RGWUploadPartInfo> parts;
   int max_parts;
@@ -1116,7 +1138,6 @@ protected:
 
 public:
   RGWListMultipart() {
-    ret = 0;
     max_parts = 1000;
     marker = 0;
     truncated = false;
@@ -1215,7 +1236,6 @@ protected:
   RGWMultipartUploadEntry next_marker; 
   int max_uploads;
   string delimiter;
-  int ret;
   vector<RGWMultipartUploadEntry> uploads;
   map<string, bool> common_prefixes;
   bool is_truncated;
@@ -1224,7 +1244,6 @@ protected:
 public:
   RGWListBucketMultiparts() {
     max_uploads = 0;
-    ret = 0;
     is_truncated = false;
     default_max = 0;
   }
@@ -1248,7 +1267,6 @@ public:
 
 class RGWDeleteMultiObj : public RGWOp {
 protected:
-  int ret;
   int max_to_delete;
   size_t len;
   char *data;
@@ -1258,7 +1276,6 @@ protected:
 
 public:
   RGWDeleteMultiObj() {
-    ret = 0;
     max_to_delete = 1000;
     len = 0;
     data = NULL;
@@ -1286,6 +1303,7 @@ protected:
   RGWRados *store;
   struct req_state *s;
 
+  int do_init_permissions();
   int do_read_permissions(RGWOp *op, bool only_bucket);
 
   virtual RGWOp *op_get() { return NULL; }
@@ -1302,8 +1320,17 @@ public:
 
   virtual RGWOp *get_op(RGWRados *store);
   virtual void put_op(RGWOp *op);
+  virtual int init_permissions(RGWOp *op) {
+    return 0;
+  }
+  virtual int retarget(RGWOp *op, RGWOp **new_op) {
+    *new_op = op;
+    return 0;
+  }
   virtual int read_permissions(RGWOp *op) = 0;
   virtual int authorize() = 0;
+  virtual int postauth_init() = 0;
+  virtual int error_handler(int err_no, string *error_content);
 };
 
 #endif
diff --git a/src/rgw/rgw_orphan.h b/src/rgw/rgw_orphan.h
index ad539b2..8d50866 100644
--- a/src/rgw/rgw_orphan.h
+++ b/src/rgw/rgw_orphan.h
@@ -43,7 +43,7 @@ struct RGWOrphanSearchStage {
   string marker;
 
   RGWOrphanSearchStage() : stage(ORPHAN_SEARCH_STAGE_UNKNOWN), shard(0) {}
-  RGWOrphanSearchStage(RGWOrphanSearchStageId _stage) : stage(_stage), shard(0) {}
+  explicit RGWOrphanSearchStage(RGWOrphanSearchStageId _stage) : stage(_stage), shard(0) {}
   RGWOrphanSearchStage(RGWOrphanSearchStageId _stage, int _shard, const string& _marker) : stage(_stage), shard(_shard), marker(_marker) {}
 
   void encode(bufferlist& bl) const {
@@ -127,9 +127,7 @@ class RGWOrphanStore {
   string oid;
 
 public:
-  RGWOrphanStore(RGWRados *_store) : store(_store) {
-    oid = RGW_ORPHAN_INDEX_OID;
-  }
+  explicit RGWOrphanStore(RGWRados *_store) : store(_store), oid(RGW_ORPHAN_INDEX_OID) {}
 
   librados::IoCtx& get_ioctx() { return ioctx; }
 
diff --git a/src/rgw/rgw_quota.cc b/src/rgw/rgw_quota.cc
index c300c44..c6c62e7 100644
--- a/src/rgw/rgw_quota.cc
+++ b/src/rgw/rgw_quota.cc
@@ -306,7 +306,7 @@ protected:
   int fetch_stats_from_storage(const rgw_user& user, rgw_bucket& bucket, RGWStorageStats& stats);
 
 public:
-  RGWBucketStatsCache(RGWRados *_store) : RGWQuotaCache<rgw_bucket>(_store, _store->ctx()->_conf->rgw_bucket_quota_cache_size) {
+  explicit RGWBucketStatsCache(RGWRados *_store) : RGWQuotaCache<rgw_bucket>(_store, _store->ctx()->_conf->rgw_bucket_quota_cache_size) {
   }
 
   AsyncRefreshHandler *allocate_refresh_handler(const rgw_user& user, rgw_bucket& bucket) {
diff --git a/src/rgw/rgw_rados.cc b/src/rgw/rgw_rados.cc
index aa5b3b4..a75b69e 100644
--- a/src/rgw/rgw_rados.cc
+++ b/src/rgw/rgw_rados.cc
@@ -1300,7 +1300,7 @@ class RGWWatcher : public librados::WatchCtx2 {
   class C_ReinitWatch : public Context {
     RGWWatcher *watcher;
     public:
-      C_ReinitWatch(RGWWatcher *_watcher) : watcher(_watcher) {}
+      explicit C_ReinitWatch(RGWWatcher *_watcher) : watcher(_watcher) {}
       void finish(int r) {
         watcher->reinit();
       }
@@ -4122,8 +4122,10 @@ int RGWRados::copy_obj_to_remote_dest(RGWObjState *astate,
   RGWRESTStreamWriteRequest *out_stream_req;
 
   int ret = rest_master_conn->put_obj_init(user_id, dest_obj, astate->size, src_attrs, &out_stream_req);
-  if (ret < 0)
+  if (ret < 0) {
+    delete out_stream_req;
     return ret;
+  }
 
   ret = read_op.iterate(0, astate->size - 1, out_stream_req->get_out_cb());
   if (ret < 0)
@@ -6119,7 +6121,7 @@ struct get_obj_data : public RefCountedObject {
   Throttle throttle;
   list<bufferlist> read_list;
 
-  get_obj_data(CephContext *_cct)
+  explicit get_obj_data(CephContext *_cct)
     : cct(_cct),
       rados(NULL), ctx(NULL),
       total_read(0), lock("get_obj_data"), data_lock("get_obj_data::data_lock"),
@@ -7419,7 +7421,7 @@ class RGWGetUserStatsContext : public RGWGetUserHeader_CB {
   RGWGetUserStats_CB *cb;
 
 public:
-  RGWGetUserStatsContext(RGWGetUserStats_CB *_cb) : cb(_cb) {}
+  explicit RGWGetUserStatsContext(RGWGetUserStats_CB *_cb) : cb(_cb) {}
   void handle_response(int r, cls_user_header& header) {
     cls_user_stats& hs = header.stats;
     if (r >= 0) {
@@ -7940,7 +7942,7 @@ int RGWRados::pool_iterate(RGWPoolIterCtx& ctx, uint32_t num, vector<RGWObjEnt>&
 struct RGWAccessListFilterPrefix : public RGWAccessListFilter {
   string prefix;
 
-  RGWAccessListFilterPrefix(const string& _prefix) : prefix(_prefix) {}
+  explicit RGWAccessListFilterPrefix(const string& _prefix) : prefix(_prefix) {}
   virtual bool filter(string& name, string& key) {
     return (prefix.compare(key.substr(0, prefix.size())) == 0);
   }
diff --git a/src/rgw/rgw_rados.h b/src/rgw/rgw_rados.h
index f0f11b4..cb3c880 100644
--- a/src/rgw/rgw_rados.h
+++ b/src/rgw/rgw_rados.h
@@ -464,7 +464,7 @@ public:
     obj_iterator() : manifest(NULL) {
       init();
     }
-    obj_iterator(RGWObjManifest *_m) : manifest(_m) {
+    explicit obj_iterator(RGWObjManifest *_m) : manifest(_m) {
       init();
       if (!manifest->empty()) {
         seek(0);
@@ -629,8 +629,7 @@ struct RGWObjState {
   RGWObjState() : is_atomic(false), has_attrs(0), exists(false),
                   size(0), mtime(0), epoch(0), fake_tag(false), has_manifest(false),
                   has_data(false), prefetch_data(false), keep_tail(false), is_olh(false) {}
-  RGWObjState(const RGWObjState& rhs) {
-    obj = rhs.obj;
+  RGWObjState(const RGWObjState& rhs) : obj (rhs.obj) {
     is_atomic = rhs.is_atomic;
     has_attrs = rhs.has_attrs;
     exists = rhs.exists;
@@ -917,6 +916,23 @@ struct RGWRegion {
   string default_placement;
 
   list<string> hostnames;
+  list<string> hostnames_s3website;
+  // TODO: Maybe convert hostnames to a map<string,list<string>> for
+  // endpoint_type->hostnames
+/*
+20:05 < _robbat21irssi> maybe I do someting like: if (hostname_map.empty()) { populate all map keys from hostnames; };
+20:05 < _robbat21irssi> but that's a later compatability migration planning bit
+20:06 < yehudasa> more like if (!hostnames.empty()) {
+20:06 < yehudasa> for (list<string>::iterator iter = hostnames.begin(); iter != hostnames.end(); ++iter) {
+20:06 < yehudasa> hostname_map["s3"].append(iter->second);
+20:07 < yehudasa> hostname_map["s3website"].append(iter->second);
+20:07 < yehudasa> s/append/push_back/g
+20:08 < _robbat21irssi> inner loop over APIs
+20:08 < yehudasa> yeah, probably
+20:08 < _robbat21irssi> s3, s3website, swift, swith_auth, swift_website
+*/
+  map<string, list<string> > api_hostname_map;
+  map<string, list<string> > api_endpoints_map;
 
   CephContext *cct;
   RGWRados *store;
@@ -924,7 +940,7 @@ struct RGWRegion {
   RGWRegion() : is_master(false), cct(NULL), store(NULL) {}
 
   void encode(bufferlist& bl) const {
-    ENCODE_START(2, 1, bl);
+    ENCODE_START(3, 1, bl);
     ::encode(name, bl);
     ::encode(api_name, bl);
     ::encode(is_master, bl);
@@ -934,11 +950,12 @@ struct RGWRegion {
     ::encode(placement_targets, bl);
     ::encode(default_placement, bl);
     ::encode(hostnames, bl);
+    ::encode(hostnames_s3website, bl);
     ENCODE_FINISH(bl);
   }
 
   void decode(bufferlist::iterator& bl) {
-    DECODE_START(2, bl);
+    DECODE_START(3, bl);
     ::decode(name, bl);
     ::decode(api_name, bl);
     ::decode(is_master, bl);
@@ -950,6 +967,9 @@ struct RGWRegion {
     if (struct_v >= 2) {
       ::decode(hostnames, bl);
     }
+    if (struct_v >= 3) {
+      ::decode(hostnames_s3website, bl);
+    }
     DECODE_FINISH(bl);
   }
 
@@ -1110,7 +1130,7 @@ public:
     OPSTATE_CANCELLED   = 5,
   };
 
-  RGWOpState(RGWRados *_store);
+  explicit RGWOpState(RGWRados *_store);
 
   int state_from_str(const string& s, OpState *state);
   int set_state(const string& client_id, const string& op_id, const string& object, OpState state);
@@ -1141,7 +1161,7 @@ protected:
   rgw_bucket bucket;
   map<RGWObjCategory, RGWStorageStats> *stats;
 public:
-  RGWGetBucketStats_CB(rgw_bucket& _bucket) : bucket(_bucket), stats(NULL) {}
+  explicit RGWGetBucketStats_CB(rgw_bucket& _bucket) : bucket(_bucket), stats(NULL) {}
   virtual ~RGWGetBucketStats_CB() {}
   virtual void handle_response(int r) = 0;
   virtual void set_response(map<RGWObjCategory, RGWStorageStats> *_stats) {
@@ -1154,7 +1174,7 @@ protected:
   rgw_user user;
   RGWStorageStats stats;
 public:
-  RGWGetUserStats_CB(const rgw_user& _user) : user(_user) {}
+  explicit RGWGetUserStats_CB(const rgw_user& _user) : user(_user) {}
   virtual ~RGWGetUserStats_CB() {}
   virtual void handle_response(int r) = 0;
   virtual void set_response(RGWStorageStats& _stats) {
@@ -1193,7 +1213,7 @@ struct RGWObjectCtx {
   map<rgw_obj, RGWObjState> objs_state;
   void *user_ctx;
 
-  RGWObjectCtx(RGWRados *_store) : store(_store), user_ctx(NULL) { }
+  explicit RGWObjectCtx(RGWRados *_store) : store(_store), user_ctx(NULL) { }
   RGWObjectCtx(RGWRados *_store, void *_user_ctx) : store(_store), user_ctx(_user_ctx) { }
 
   RGWObjState *get_state(rgw_obj& obj);
@@ -1244,7 +1264,7 @@ class RGWRados
   class C_Tick : public Context {
     RGWRados *rados;
   public:
-    C_Tick(RGWRados *_r) : rados(_r) {}
+    explicit C_Tick(RGWRados *_r) : rados(_r) {}
     void finish(int r) {
       rados->tick();
     }
@@ -1489,7 +1509,7 @@ public:
         rgw_cache_entry_info *cache_info;
       } read_params;
 
-      Read(RGWRados::SystemObject *_source) : source(_source) {}
+      explicit Read(RGWRados::SystemObject *_source) : source(_source) {}
 
       int stat(RGWObjVersionTracker *objv_tracker);
       int read(int64_t ofs, int64_t end, bufferlist& bl, RGWObjVersionTracker *objv_tracker);
@@ -1504,7 +1524,7 @@ public:
     librados::IoCtx index_ctx;
     string bucket_obj;
 
-    BucketShard(RGWRados *_store) : store(_store), shard_id(-1) {}
+    explicit BucketShard(RGWRados *_store) : store(_store), shard_id(-1) {}
     int init(rgw_bucket& _bucket, rgw_obj& obj);
   };
 
@@ -1589,7 +1609,7 @@ public:
         Params() : lastmod(NULL), read_size(NULL), obj_size(NULL), attrs(NULL), perr(NULL) {}
       } params;
 
-      Read(RGWRados::Object *_source) : source(_source) {}
+      explicit Read(RGWRados::Object *_source) : source(_source) {}
 
       int prepare(int64_t *pofs, int64_t *pend);
       int read(int64_t ofs, int64_t end, bufferlist& bl);
@@ -1621,7 +1641,7 @@ public:
                  if_match(NULL), if_nomatch(NULL), olh_epoch(0), delete_at(0) {}
       } meta;
 
-      Write(RGWRados::Object *_target) : target(_target) {}
+      explicit Write(RGWRados::Object *_target) : target(_target) {}
 
       int write_meta(uint64_t size,  map<std::string, bufferlist>& attrs);
       int write_data(const char *data, uint64_t ofs, uint64_t len, bool exclusive);
@@ -1650,7 +1670,7 @@ public:
         DeleteResult() : delete_marker(false) {}
       } result;
       
-      Delete(RGWRados::Object *_target) : target(_target) {}
+      explicit Delete(RGWRados::Object *_target) : target(_target) {}
 
       int delete_obj();
     };
@@ -1678,7 +1698,7 @@ public:
       } state;
 
 
-      Stat(RGWRados::Object *_source) : source(_source) {}
+      explicit Stat(RGWRados::Object *_source) : source(_source) {}
 
       int stat_async();
       int wait();
@@ -1755,7 +1775,7 @@ public:
       } params;
 
     public:
-      List(RGWRados::Bucket *_target) : target(_target) {}
+      explicit List(RGWRados::Bucket *_target) : target(_target) {}
 
       int list_objects(int max, vector<RGWObjEnt> *result, map<string, bool> *common_prefixes, bool *is_truncated);
       rgw_obj_key& get_next_marker() {
diff --git a/src/rgw/rgw_replica_log.h b/src/rgw/rgw_replica_log.h
index 9ea3982..794a5d9 100644
--- a/src/rgw/rgw_replica_log.h
+++ b/src/rgw/rgw_replica_log.h
@@ -45,7 +45,7 @@ protected:
   RGWRados *store;
   int open_ioctx(librados::IoCtx& ctx, const string& pool);
 
-  RGWReplicaLogger(RGWRados *_store);
+  explicit RGWReplicaLogger(RGWRados *_store);
 
   int update_bound(const string& oid, const string& pool,
                    const string& daemon_id, const string& marker,
@@ -105,7 +105,7 @@ class RGWReplicaBucketLogger : private RGWReplicaLogger {
   string obj_name(const rgw_bucket& bucket, int shard_id, bool index_by_instance);
 
 public:
-  RGWReplicaBucketLogger(RGWRados *_store);
+  explicit RGWReplicaBucketLogger(RGWRados *_store);
   int update_bound(const rgw_bucket& bucket, int shard_id, const string& daemon_id,
                    const string& marker, const utime_t& time,
                    const list<RGWReplicaItemMarker> *entries);
diff --git a/src/rgw/rgw_rest.cc b/src/rgw/rgw_rest.cc
index d7f6b60..0097295 100644
--- a/src/rgw/rgw_rest.cc
+++ b/src/rgw/rgw_rest.cc
@@ -5,6 +5,7 @@
 #include <limits.h>
 
 #include "common/Formatter.h"
+#include "common/HTMLFormatter.h"
 #include "common/utf8.h"
 #include "include/str_list.h"
 #include "rgw_common.h"
@@ -21,6 +22,8 @@
 #include "rgw_client_io.h"
 #include "rgw_resolve.h"
 
+#include <numeric>
+
 #define dout_subsys ceph_subsys_rgw
 
 
@@ -39,6 +42,11 @@ static const struct rgw_http_attr base_rgw_to_http_attrs[] = {
   { RGW_ATTR_CONTENT_DISP,      "Content-Disposition" },
   { RGW_ATTR_CONTENT_ENC,       "Content-Encoding" },
   { RGW_ATTR_USER_MANIFEST,     "X-Object-Manifest" },
+  /* RGW_ATTR_AMZ_WEBSITE_REDIRECT_LOCATION header depends on access mode:
+   * S3 endpoint: x-amz-website-redirect-location
+   * S3Website endpoint: Location
+   */
+  { RGW_ATTR_AMZ_WEBSITE_REDIRECT_LOCATION, "x-amz-website-redirect-location" },
 };
 
 
@@ -160,8 +168,9 @@ string camelcase_dash_http_attr(const string& orig)
   return string(buf);
 }
 
-/* avoid duplicate hostnames in hostnames list */
+/* avoid duplicate hostnames in hostnames lists */
 static set<string> hostnames_set;
+static set<string> hostnames_s3website_set;
 
 void rgw_rest_init(CephContext *cct, RGWRegion& region)
 {
@@ -197,6 +206,8 @@ void rgw_rest_init(CephContext *cct, RGWRegion& region)
     hostnames_set.insert(cct->_conf->rgw_dns_name);
   }
   hostnames_set.insert(region.hostnames.begin(),  region.hostnames.end());
+  string s;
+  ldout(cct, 20) << "RGW hostnames: " << std::accumulate(hostnames_set.begin(), hostnames_set.end(), s) << dendl;
   /* TODO: We should have a sanity check that no hostname matches the end of
    * any other hostname, otherwise we will get ambigious results from
    * rgw_find_host_in_domains.
@@ -206,6 +217,16 @@ void rgw_rest_init(CephContext *cct, RGWRegion& region)
    * Z.A clearly splits to subdomain=Z, domain=Z
    * X.B.A ambigously splits to both {X, B.A} and {X.B, A}
    */
+
+  if (!cct->_conf->rgw_dns_s3website_name.empty()) {
+    hostnames_s3website_set.insert(cct->_conf->rgw_dns_s3website_name);
+  }
+  hostnames_s3website_set.insert(region.hostnames_s3website.begin(), region.hostnames_s3website.end());
+  s.clear();
+  ldout(cct, 20) << "RGW S3website hostnames: " << std::accumulate(hostnames_s3website_set.begin(), hostnames_s3website_set.end(), s) << dendl;
+  /* TODO: we should repeat the hostnames_set sanity check here
+   * and ALSO decide about overlap, if any
+   */
 }
 
 static bool str_ends_with(const string& s, const string& suffix, size_t *pos)
@@ -223,10 +244,14 @@ static bool str_ends_with(const string& s, const string& suffix, size_t *pos)
   return s.compare(p, len, suffix) == 0;
 }
 
-static bool rgw_find_host_in_domains(const string& host, string *domain, string *subdomain)
+static bool rgw_find_host_in_domains(const string& host, string *domain, string *subdomain, set<string> valid_hostnames_set)
 {
   set<string>::iterator iter;
-  for (iter = hostnames_set.begin(); iter != hostnames_set.end(); ++iter) {
+  /** TODO, Future optimization
+   * store hostnames_set elements _reversed_, and look for a prefix match,
+   * which is much faster than a suffix match.
+   */
+  for (iter = valid_hostnames_set.begin(); iter != valid_hostnames_set.end(); ++iter) {
     size_t pos;
     if (!str_ends_with(host, *iter, &pos))
       continue;
@@ -249,6 +274,7 @@ static bool rgw_find_host_in_domains(const string& host, string *domain, string
 
 static void dump_status(struct req_state *s, int status, const char *status_name)
 {
+  s->formatter->set_status(status, status_name);
   int r = s->cio->send_status(status, status_name);
   if (r < 0) {
     ldout(s->cct, 0) << "ERROR: s->cio->send_status() returned err=" << r << dendl;
@@ -258,6 +284,7 @@ static void dump_status(struct req_state *s, int status, const char *status_name
 void rgw_flush_formatter_and_reset(struct req_state *s, Formatter *formatter)
 {
   std::ostringstream oss;
+  formatter->output_footer();
   formatter->flush(oss);
   std::string outs(oss.str());
   if (!outs.empty() && s->op != OP_HEAD) {
@@ -294,9 +321,14 @@ void set_req_state_err(struct rgw_err& err,     /* out */
       return;
     }
   }
+
   r = search_err(err_no, RGW_HTTP_ERRORS, ARRAY_LEN(RGW_HTTP_ERRORS));
   if (r) {
-    err.http_ret = r->http_ret;
+    if (prot_flags & RGW_REST_WEBSITE && err_no == ERR_WEBSITE_REDIRECT && err.is_clear()) {
+      // http_ret was custom set, so don't change it!
+    } else {
+      err.http_ret = r->http_ret;
+    }
     err.s3_code = r->s3_code;
     return;
   }
@@ -346,9 +378,10 @@ void dump_content_length(struct req_state *s, uint64_t len)
 {
   int r = s->cio->send_content_length(len);
   if (r < 0) {
-    ldout(s->cct, 0) << "ERROR: s->cio->print() returned err=" << r << dendl;
+    ldout(s->cct, 0) << "ERROR: s->cio->send_content_length() returned err="
+                     << r << dendl;
   }
-  r = s->cio->print("Accept-Ranges: %s\r\n", "bytes");
+  r = s->cio->print("Accept-Ranges: bytes\r\n");
   if (r < 0) {
     ldout(s->cct, 0) << "ERROR: s->cio->print() returned err=" << r << dendl;
   }
@@ -521,8 +554,7 @@ void dump_access_control(req_state *s, RGWOp *op)
 void dump_start(struct req_state *s)
 {
   if (!s->content_started) {
-    if (s->format == RGW_FORMAT_XML)
-      s->formatter->write_raw_data(XMLFormatter::XML_1_DTD);
+    s->formatter->output_header();
     s->content_started = true;
   }
 }
@@ -538,7 +570,7 @@ void dump_trans_id(req_state *s)
 }
 
 void end_header(struct req_state *s, RGWOp *op, const char *content_type, const int64_t proposed_content_length,
-		bool force_content_type)
+		bool force_content_type, bool force_no_error)
 {
   string ctype;
 
@@ -568,6 +600,9 @@ void end_header(struct req_state *s, RGWOp *op, const char *content_type, const
     case RGW_FORMAT_JSON:
       ctype = "application/json";
       break;
+    case RGW_FORMAT_HTML:
+      ctype = "text/html";
+      break;
     default:
       ctype = "text/plain";
       break;
@@ -576,16 +611,24 @@ void end_header(struct req_state *s, RGWOp *op, const char *content_type, const
       ctype.append("; charset=utf-8");
     content_type = ctype.c_str();
   }
-  if (s->err.is_err()) {
+  if (!force_no_error && s->err.is_err()) {
     dump_start(s);
-    s->formatter->open_object_section("Error");
+    if (s->format != RGW_FORMAT_HTML) {
+      s->formatter->open_object_section("Error");
+    }
     if (!s->err.s3_code.empty())
       s->formatter->dump_string("Code", s->err.s3_code);
     if (!s->err.message.empty())
       s->formatter->dump_string("Message", s->err.message);
-    if (!s->trans_id.empty())
+    if (!s->bucket_name.empty()) // TODO: connect to expose_bucket
+      s->formatter->dump_string("BucketName", s->bucket_name);
+    if (!s->trans_id.empty()) // TODO: connect to expose_bucket or another toggle
       s->formatter->dump_string("RequestId", s->trans_id);
-    s->formatter->close_section();
+    s->formatter->dump_string("HostId", "FIXME-TODO-How-does-amazon-generate-HostId"); // TODO, FIXME
+    if (s->format != RGW_FORMAT_HTML) {
+      s->formatter->close_section();
+    }
+    s->formatter->output_footer();
     dump_content_length(s, s->formatter->get_len());
   } else {
     if (proposed_content_length != NO_CONTENT_LENGTH) {
@@ -609,32 +652,68 @@ void end_header(struct req_state *s, RGWOp *op, const char *content_type, const
   rgw_flush_formatter_and_reset(s, s->formatter);
 }
 
-void abort_early(struct req_state *s, RGWOp *op, int err_no)
+void abort_early(struct req_state *s, RGWOp *op, int err_no, RGWHandler* handler)
 {
+  string error_content("");
   if (!s->formatter) {
     s->formatter = new JSONFormatter;
     s->format = RGW_FORMAT_JSON;
   }
+
+  // op->error_handler is responsible for calling it's handler error_handler
+  if (op != NULL) {
+    int new_err_no;
+    new_err_no = op->error_handler(err_no, &error_content);
+    ldout(s->cct, 20) << "op->ERRORHANDLER: err_no=" << err_no << " new_err_no=" << new_err_no << dendl;
+    err_no = new_err_no;
+  } else if (handler != NULL) {
+    int new_err_no;
+    new_err_no = handler->error_handler(err_no, &error_content);
+    ldout(s->cct, 20) << "handler->ERRORHANDLER: err_no=" << err_no << " new_err_no=" << new_err_no << dendl;
+    err_no = new_err_no;
+  }
   set_req_state_err(s, err_no);
   dump_errno(s);
   dump_bucket_from_state(s);
-  if (err_no == -ERR_PERMANENT_REDIRECT && !s->region_endpoint.empty()) {
-    string dest_uri = s->region_endpoint;
-    /*
-     * reqest_uri is always start with slash, so we need to remove
-     * the unnecessary slash at the end of dest_uri.
-     */
-    if (dest_uri[dest_uri.size() - 1] == '/') {
-      dest_uri = dest_uri.substr(0, dest_uri.size() - 1);
+  if (err_no == -ERR_PERMANENT_REDIRECT || err_no == -ERR_WEBSITE_REDIRECT) {
+    string dest_uri;
+    if (!s->redirect.empty()) {
+      dest_uri = s->redirect;
+    } else if (!s->region_endpoint.empty()) {
+      string dest_uri = s->region_endpoint;
+      /*
+       * reqest_uri is always start with slash, so we need to remove
+       * the unnecessary slash at the end of dest_uri.
+       */
+      if (dest_uri[dest_uri.size() - 1] == '/') {
+        dest_uri = dest_uri.substr(0, dest_uri.size() - 1);
+      }
+      dest_uri += s->info.request_uri;
+      dest_uri += "?";
+      dest_uri += s->info.request_params;
     }
-    dest_uri += s->info.request_uri;
-    dest_uri += "?";
-    dest_uri += s->info.request_params;
 
-    dump_redirect(s, dest_uri);
+    if (!dest_uri.empty()) {
+      dump_redirect(s, dest_uri);
+    }
+  }
+  if (!error_content.empty()) {
+    ldout(s->cct, 20) << "error_content is set, we need to serve it INSTEAD of firing the formatter" << dendl;
+    /*
+     * FIXME we must add all error entries as headers here:
+     * when having a working errordoc, then the s3 error fields are rendered as HTTP headers, e.g.:
+     *
+     *   x-amz-error-code: NoSuchKey
+     *   x-amz-error-message: The specified key does not exist.
+     *   x-amz-error-detail-Key: foo
+     */
+    end_header(s, op, NULL, NO_CONTENT_LENGTH, false, true);
+    s->cio->write(error_content.c_str(), error_content.size());
+    s->formatter->reset();
+  } else {
+    end_header(s, op);
+    rgw_flush_formatter_and_reset(s, s->formatter);
   }
-  end_header(s, op);
-  rgw_flush_formatter_and_reset(s, s->formatter);
   perfcounter->inc(l_rgw_failed_req);
 }
 
@@ -930,8 +1009,8 @@ int RGWPutACLs_ObjStore::get_params()
   if (cl) {
     data = (char *)malloc(cl + 1);
     if (!data) {
-       ret = -ENOMEM;
-       return ret;
+       op_ret = -ENOMEM;
+       return op_ret;
     }
     int read_len;
     int r = s->cio->read(data, cl, &read_len);
@@ -943,7 +1022,7 @@ int RGWPutACLs_ObjStore::get_params()
     len = 0;
   }
 
-  return ret;
+  return op_ret;
 }
 
 static int read_all_chunked_input(req_state *s, char **pdata, int *plen, int max_read)
@@ -1039,14 +1118,14 @@ int RGWCompleteMultipart_ObjStore::get_params()
   upload_id = s->info.args.get("uploadId");
 
   if (upload_id.empty()) {
-    ret = -ENOTSUP;
-    return ret;
+    op_ret = -ENOTSUP;
+    return op_ret;
   }
 
 #define COMPLETE_MULTIPART_MAX_LEN (1024 * 1024) /* api defines max 10,000 parts, this should be enough */
-  ret = rgw_rest_read_all_input(s, &data, &len, COMPLETE_MULTIPART_MAX_LEN);
-  if (ret < 0)
-    return ret;
+  op_ret = rgw_rest_read_all_input(s, &data, &len, COMPLETE_MULTIPART_MAX_LEN);
+  if (op_ret < 0)
+    return op_ret;
 
   return 0;
 }
@@ -1056,7 +1135,7 @@ int RGWListMultipart_ObjStore::get_params()
   upload_id = s->info.args.get("uploadId");
 
   if (upload_id.empty()) {
-    ret = -ENOTSUP;
+    op_ret = -ENOTSUP;
   }
   string marker_str = s->info.args.get("part-number-marker");
 
@@ -1065,8 +1144,8 @@ int RGWListMultipart_ObjStore::get_params()
     marker = strict_strtol(marker_str.c_str(), 10, &err);
     if (!err.empty()) {
       ldout(s->cct, 20) << "bad marker: "  << marker << dendl;
-      ret = -EINVAL;
-      return ret;
+      op_ret = -EINVAL;
+      return op_ret;
     }
   }
   
@@ -1074,7 +1153,7 @@ int RGWListMultipart_ObjStore::get_params()
   if (!str.empty())
     max_parts = atoi(str.c_str());
 
-  return ret;
+  return op_ret;
 }
 
 int RGWListBucketMultiparts_ObjStore::get_params()
@@ -1099,8 +1178,8 @@ int RGWDeleteMultiObj_ObjStore::get_params()
 {
 
   if (s->bucket_name.empty()) {
-    ret = -EINVAL;
-    return ret;
+    op_ret = -EINVAL;
+    return op_ret;
   }
 
   // everything is probably fine, set the bucket
@@ -1113,20 +1192,20 @@ int RGWDeleteMultiObj_ObjStore::get_params()
   if (cl) {
     data = (char *)malloc(cl + 1);
     if (!data) {
-      ret = -ENOMEM;
-      return ret;
+      op_ret = -ENOMEM;
+      return op_ret;
     }
     int read_len;
-    ret = s->cio->read(data, cl, &read_len);
+    op_ret = s->cio->read(data, cl, &read_len);
     len = read_len;
-    if (ret < 0)
-      return ret;
+    if (op_ret < 0)
+      return op_ret;
     data[len] = '\0';
   } else {
     return -EINVAL;
   }
 
-  return ret;
+  return op_ret;
 }
 
 
@@ -1155,6 +1234,8 @@ int RGWHandler_ObjStore::allocate_formatter(struct req_state *s, int default_typ
       s->format = RGW_FORMAT_XML;
     } else if (format_str.compare("json") == 0) {
       s->format = RGW_FORMAT_JSON;
+    } else if (format_str.compare("html") == 0) {
+      s->format = RGW_FORMAT_HTML;
     } else {
       const char *accept = s->info.env->get("HTTP_ACCEPT");
       if (accept) {
@@ -1168,6 +1249,8 @@ int RGWHandler_ObjStore::allocate_formatter(struct req_state *s, int default_typ
           s->format = RGW_FORMAT_XML;
         } else if (strcmp(format_buf, "application/json") == 0) {
           s->format = RGW_FORMAT_JSON;
+        } else if (strcmp(format_buf, "text/html") == 0) {
+          s->format = RGW_FORMAT_HTML;
         }
       }
     }
@@ -1192,24 +1275,27 @@ int RGWHandler_ObjStore::allocate_formatter(struct req_state *s, int default_typ
     case RGW_FORMAT_JSON:
       s->formatter = new JSONFormatter(false);
       break;
+    case RGW_FORMAT_HTML:
+      s->formatter = new HTMLFormatter(s->prot_flags & RGW_REST_WEBSITE);
+      break;
     default:
       return -EINVAL;
 
   };
-  s->formatter->reset();
+  //s->formatter->reset(); // All formatters should reset on create already
 
   return 0;
 }
 
 int RGWHandler_ObjStore::validate_tenant_name(string const& t)
 {
-  struct tench {
-    static bool is_good(char ch) {
-      return isalnum(ch) || ch == '_';
-    }
-  };
-  std::string::const_iterator it = std::find_if(t.begin(), t.end(), tench::is_good);
-  return (it == t.end())? 0: -ERR_INVALID_BUCKET_NAME;
+  const char *p = t.c_str();
+  for (unsigned int i = 0; i < t.size(); i++) {
+    char ch = p[i];
+    if (!(isalnum(ch) || ch == '_'))
+      return -ERR_INVALID_TENANT_NAME;
+  }
+  return 0;
 }
 
 // This function enforces Amazon's spec for bucket names.
@@ -1274,6 +1360,14 @@ static http_op op_from_method(const char *method)
   return OP_UNKNOWN;
 }
 
+int RGWHandler_ObjStore::init_permissions(RGWOp *op)
+{
+  if (op->get_type() == RGW_OP_CREATE_BUCKET)
+    return 0;
+
+  return do_init_permissions();
+}
+
 int RGWHandler_ObjStore::read_permissions(RGWOp *op_obj)
 {
   bool only_bucket;
@@ -1411,24 +1505,59 @@ int RGWREST::preprocess(struct req_state *s, RGWClientIO *cio)
     ldout(s->cct, 10) << "host=" << info.host << dendl;
     string domain;
     string subdomain;
-    bool in_hosted_domain = rgw_find_host_in_domains(info.host, &domain,
-						     &subdomain);
-    ldout(s->cct, 20) << "subdomain=" << subdomain << " domain=" << domain
-		      << " in_hosted_domain=" << in_hosted_domain << dendl;
+    bool in_hosted_domain_s3website = false;
+    bool in_hosted_domain = rgw_find_host_in_domains(info.host, &domain, &subdomain, hostnames_set);
+
+    bool s3website_enabled = g_conf->rgw_enable_apis.find("s3website") != std::string::npos;
+    string s3website_domain;
+    string s3website_subdomain;
+
+    if (s3website_enabled) {
+      in_hosted_domain_s3website = rgw_find_host_in_domains(info.host, &s3website_domain, &s3website_subdomain, hostnames_s3website_set);
+      if (in_hosted_domain_s3website) {
+	in_hosted_domain = true; // TODO: should hostnames be a strict superset of hostnames_s3website?
+        domain = s3website_domain;
+        subdomain = s3website_subdomain;
+        s->prot_flags |= RGW_REST_WEBSITE;
+      }
+    }
+
+    ldout(s->cct, 20)
+      << "subdomain=" << subdomain 
+      << " domain=" << domain 
+      << " in_hosted_domain=" << in_hosted_domain 
+      << " in_hosted_domain_s3website=" << in_hosted_domain_s3website 
+      << dendl;
 
-    if (g_conf->rgw_resolve_cname && !in_hosted_domain) {
+    if (g_conf->rgw_resolve_cname && !in_hosted_domain && !in_hosted_domain_s3website) {
       string cname;
       bool found;
       int r = rgw_resolver->resolve_cname(info.host, cname, &found);
       if (r < 0) {
-	ldout(s->cct, 0) << "WARNING: rgw_resolver->resolve_cname() returned r=" << r << dendl;
+        ldout(s->cct, 0) << "WARNING: rgw_resolver->resolve_cname() returned r=" << r << dendl;
       }
+
       if (found) {
         ldout(s->cct, 5) << "resolved host cname " << info.host << " -> "
 			 << cname << dendl;
-        in_hosted_domain = rgw_find_host_in_domains(cname, &domain, &subdomain);
-        ldout(s->cct, 20) << "subdomain=" << subdomain << " domain=" << domain
-			  << " in_hosted_domain=" << in_hosted_domain << dendl;
+        in_hosted_domain = rgw_find_host_in_domains(cname, &domain, &subdomain, hostnames_set);
+
+        if (s3website_enabled && !in_hosted_domain_s3website) {
+            in_hosted_domain_s3website = rgw_find_host_in_domains(cname, &s3website_domain, &s3website_subdomain, hostnames_s3website_set);
+	    if (in_hosted_domain_s3website) {
+	      in_hosted_domain = true; // TODO: should hostnames be a strict superset of hostnames_s3website?
+	      domain = s3website_domain;
+	      subdomain = s3website_subdomain;
+	      s->prot_flags |= RGW_REST_WEBSITE;
+	    }
+        }
+
+        ldout(s->cct, 20)
+          << "subdomain=" << subdomain 
+          << " domain=" << domain 
+          << " in_hosted_domain=" << in_hosted_domain 
+          << " in_hosted_domain_s3website=" << in_hosted_domain_s3website 
+          << dendl;
       }
     }
 
diff --git a/src/rgw/rgw_rest.h b/src/rgw/rgw_rest.h
index 4ba3c96..8f1c4ed 100644
--- a/src/rgw/rgw_rest.h
+++ b/src/rgw/rgw_rest.h
@@ -356,15 +356,22 @@ protected:
 public:
   RGWHandler_ObjStore() {}
   virtual ~RGWHandler_ObjStore() {}
+  int init_permissions(RGWOp *op);
   int read_permissions(RGWOp *op);
+  virtual int retarget(RGWOp *op, RGWOp **new_op) {
+    *new_op = op;
+    return 0;
+  }
 
   virtual int authorize() = 0;
+  // virtual int postauth_init(struct req_init_state *t) = 0;
 };
 
 class RGWHandler_ObjStore_SWIFT;
 class RGWHandler_SWIFT_Auth;
 class RGWHandler_ObjStore_S3;
 
+
 class RGWRESTMgr {
   bool should_log;
 protected:
@@ -422,7 +429,8 @@ extern void end_header(struct req_state *s,
                        RGWOp *op = NULL,
                        const char *content_type = NULL,
                        const int64_t proposed_content_length = NO_CONTENT_LENGTH,
-		       bool force_content_type = false);
+		       bool force_content_type = false,
+		       bool force_no_error = false);
 extern void dump_start(struct req_state *s);
 extern void list_all_buckets_start(struct req_state *s);
 extern void dump_owner(struct req_state *s, rgw_user& id, string& name, const char *section = NULL);
@@ -432,7 +440,7 @@ extern void dump_etag(struct req_state *s, const char *etag);
 extern void dump_epoch_header(struct req_state *s, const char *name, time_t t);
 extern void dump_time_header(struct req_state *s, const char *name, time_t t);
 extern void dump_last_modified(struct req_state *s, time_t t);
-extern void abort_early(struct req_state *s, RGWOp *op, int err);
+extern void abort_early(struct req_state *s, RGWOp *op, int err, RGWHandler* handler);
 extern void dump_range(struct req_state *s, uint64_t ofs, uint64_t end, uint64_t total_size);
 extern void dump_continue(struct req_state *s);
 extern void list_all_buckets_end(struct req_state *s);
diff --git a/src/rgw/rgw_rest_client.cc b/src/rgw/rgw_rest_client.cc
index 4eaf42e..4ca88ab 100644
--- a/src/rgw/rgw_rest_client.cc
+++ b/src/rgw/rgw_rest_client.cc
@@ -272,7 +272,7 @@ int RGWRESTSimpleRequest::forward_request(RGWAccessKey& key, req_info& info, siz
 class RGWRESTStreamOutCB : public RGWGetDataCB {
   RGWRESTStreamWriteRequest *req;
 public:
-  RGWRESTStreamOutCB(RGWRESTStreamWriteRequest *_req) : req(_req) {}
+  explicit RGWRESTStreamOutCB(RGWRESTStreamWriteRequest *_req) : req(_req) {}
   int handle_data(bufferlist& bl, off_t bl_ofs, off_t bl_len); /* callback for object iteration when sending data */
 };
 
diff --git a/src/rgw/rgw_rest_conn.cc b/src/rgw/rgw_rest_conn.cc
index f15d649..e278ad2 100644
--- a/src/rgw/rgw_rest_conn.cc
+++ b/src/rgw/rgw_rest_conn.cc
@@ -53,7 +53,7 @@ int RGWRESTConn::forward(const rgw_user& uid, req_info& info, obj_version *objv,
 class StreamObjData : public RGWGetDataCB {
   rgw_obj obj;
 public:
-    StreamObjData(rgw_obj& _obj) : obj(_obj) {}
+    explicit StreamObjData(rgw_obj& _obj) : obj(_obj) {}
 };
 
 int RGWRESTConn::put_obj_init(const rgw_user& uid, rgw_obj& obj, uint64_t obj_size,
diff --git a/src/rgw/rgw_rest_s3.cc b/src/rgw/rgw_rest_s3.cc
index 8c00e19..9572193 100644
--- a/src/rgw/rgw_rest_s3.cc
+++ b/src/rgw/rgw_rest_s3.cc
@@ -11,6 +11,7 @@
 
 #include "rgw_rest.h"
 #include "rgw_rest_s3.h"
+#include "rgw_rest_s3website.h"
 #include "rgw_auth_s3.h"
 #include "rgw_acl.h"
 #include "rgw_policy_s3.h"
@@ -20,6 +21,12 @@
 
 #include "rgw_client_io.h"
 
+/* This header consists several Keystone-related primitives
+ * we want to reuse here. */
+#include "rgw_swift.h"
+
+#include <typeinfo> // for 'typeid'
+
 #define dout_subsys ceph_subsys_rgw
 
 using namespace ceph::crypto;
@@ -72,6 +79,26 @@ static struct response_attr_param resp_attr_params[] = {
   {NULL, NULL},
 };
 
+int RGWGetObj_ObjStore_S3Website::send_response_data(bufferlist& bl, off_t bl_ofs, off_t bl_len) {
+  map<string, bufferlist>::iterator iter;
+  iter = attrs.find(RGW_ATTR_AMZ_WEBSITE_REDIRECT_LOCATION);
+  if (iter != attrs.end()) {
+    bufferlist &bl = iter->second;
+    s->redirect = string(bl.c_str(), bl.length());
+    s->err.http_ret = 301;
+    ldout(s->cct, 20) << __CEPH_ASSERT_FUNCTION << " redirectng per x-amz-website-redirect-location=" << s->redirect << dendl;
+    op_ret = -ERR_WEBSITE_REDIRECT;
+    return op_ret;
+  } else {
+    return RGWGetObj_ObjStore_S3::send_response_data(bl, bl_ofs, bl_len);
+  }
+}
+
+int RGWGetObj_ObjStore_S3Website::send_response_data_error()
+{
+  return RGWGetObj_ObjStore_S3::send_response_data_error();
+}
+
 int RGWGetObj_ObjStore_S3::send_response_data_error()
 {
   bufferlist bl;
@@ -86,7 +113,7 @@ int RGWGetObj_ObjStore_S3::send_response_data(bufferlist& bl, off_t bl_ofs, off_
   map<string, string>::iterator riter;
   bufferlist metadata_bl;
 
-  if (ret)
+  if (op_ret)
     goto done;
 
   if (sent_header)
@@ -119,7 +146,7 @@ int RGWGetObj_ObjStore_S3::send_response_data(bufferlist& bl, off_t bl_ofs, off_
   dump_content_length(s, total_len);
   dump_last_modified(s, lastmod);
 
-  if (!ret) {
+  if (! op_ret) {
     map<string, bufferlist>::iterator iter = attrs.find(RGW_ATTR_ETAG);
     if (iter != attrs.end()) {
       bufferlist& bl = iter->second;
@@ -165,7 +192,8 @@ int RGWGetObj_ObjStore_S3::send_response_data(bufferlist& bl, off_t bl_ofs, off_
   }
 
 done:
-  set_req_state_err(s, (partial_content && !ret) ? STATUS_PARTIAL_CONTENT : ret);
+  set_req_state_err(s, (partial_content && !op_ret) ? STATUS_PARTIAL_CONTENT :
+		    op_ret);
 
   dump_errno(s);
 
@@ -184,7 +212,7 @@ done:
   sent_header = true;
 
 send_data:
-  if (get_data && !ret) {
+  if (get_data && !op_ret) {
     int r = s->cio->write(bl.c_str() + bl_ofs, bl_len);
     if (r < 0)
       return r;
@@ -195,13 +223,13 @@ send_data:
 
 void RGWListBuckets_ObjStore_S3::send_response_begin(bool has_buckets)
 {
-  if (ret)
-    set_req_state_err(s, ret);
+  if (op_ret)
+    set_req_state_err(s, op_ret);
   dump_errno(s);
   dump_start(s);
   end_header(s, NULL, "application/xml");
 
-  if (!ret) {
+  if (! op_ret) {
     list_all_buckets_start(s);
     dump_owner(s, s->user.user_id, s->user.display_name);
     s->formatter->open_array_section("Buckets");
@@ -244,9 +272,9 @@ int RGWListBucket_ObjStore_S3::get_params()
     marker.instance = s->info.args.get("version-id-marker");
   }
   max_keys = s->info.args.get("max-keys");
-  ret = parse_max_keys();
-  if (ret < 0) {
-    return ret;
+  op_ret = parse_max_keys();
+  if (op_ret < 0) {
+    return op_ret;
   }
   delimiter = s->info.args.get("delimiter");
   encoding_type = s->info.args.get("encoding-type");
@@ -274,7 +302,7 @@ void RGWListBucket_ObjStore_S3::send_versioned_response()
   if (strcasecmp(encoding_type.c_str(), "url") == 0)
     encode_key = true;
 
-  if (ret >= 0) {
+  if (op_ret >= 0) {
     vector<RGWObjEnt>::iterator iter;
     for (iter = objs.begin(); iter != objs.end(); ++iter) {
       time_t mtime = iter->mtime.sec();
@@ -320,13 +348,13 @@ void RGWListBucket_ObjStore_S3::send_versioned_response()
 
 void RGWListBucket_ObjStore_S3::send_response()
 {
-  if (ret < 0)
-    set_req_state_err(s, ret);
+  if (op_ret < 0)
+    set_req_state_err(s, op_ret);
   dump_errno(s);
 
   end_header(s, this, "application/xml");
   dump_start(s);
-  if (ret < 0)
+  if (op_ret < 0)
     return;
 
   if (list_versions) {
@@ -353,7 +381,7 @@ void RGWListBucket_ObjStore_S3::send_response()
   if (strcasecmp(encoding_type.c_str(), "url") == 0)
     encode_key = true;
 
-  if (ret >= 0) {
+  if (op_ret >= 0) {
     vector<RGWObjEnt>::iterator iter;
     for (iter = objs.begin(); iter != objs.end(); ++iter) {
       s->formatter->open_array_section("Contents");
@@ -505,12 +533,84 @@ done:
 
 void RGWSetBucketVersioning_ObjStore_S3::send_response()
 {
-  if (ret)
-    set_req_state_err(s, ret);
+  if (op_ret)
+    set_req_state_err(s, op_ret);
   dump_errno(s);
   end_header(s);
 }
 
+int RGWSetBucketWebsite_ObjStore_S3::get_params()
+{
+#define GET_BUCKET_WEBSITE_BUF_MAX (128 * 1024)
+
+  char *data;
+  int len = 0;
+  int r = rgw_rest_read_all_input(s, &data, &len, GET_BUCKET_WEBSITE_BUF_MAX);
+  if (r < 0) {
+    return r;
+  }
+
+  bufferlist bl;
+  bl.append(data, len);
+
+  RGWXMLDecoder::XMLParser parser;
+  parser.init();
+
+  if (!parser.parse(data, len, 1)) {
+    string str(data, len);
+    ldout(s->cct, 5) << "failed to parse xml: " << str << dendl;
+    return -EINVAL;
+  }
+
+  try {
+    RGWXMLDecoder::decode_xml("WebsiteConfiguration", website_conf, &parser, true);
+  } catch (RGWXMLDecoder::err& err) {
+    string str(data, len);
+    ldout(s->cct, 5) << "unexpected xml: " << str << dendl;
+    return -EINVAL;
+  }
+
+  return 0;
+}
+
+void RGWSetBucketWebsite_ObjStore_S3::send_response()
+{
+  if (op_ret < 0)
+    set_req_state_err(s, op_ret);
+  dump_errno(s);
+  end_header(s);
+}
+
+void RGWDeleteBucketWebsite_ObjStore_S3::send_response()
+{
+  if (op_ret == 0) {
+    op_ret = STATUS_NO_CONTENT;
+  }
+  set_req_state_err(s, op_ret);
+  dump_errno(s);
+  end_header(s);
+}
+
+void RGWGetBucketWebsite_ObjStore_S3::send_response()
+{
+  if (op_ret)
+    set_req_state_err(s, op_ret);
+  dump_errno(s);
+  end_header(s, this, "application/xml");
+  dump_start(s);
+
+  if (op_ret < 0) {
+    return;
+  }
+
+  RGWBucketWebsiteConf& conf = s->bucket_info.website_conf;
+
+  s->formatter->open_object_section_in_ns("WebsiteConfiguration",
+					  "http://doc.s3.amazonaws.com/doc/2006-03-01/");
+  conf.dump_xml(s->formatter);
+  s->formatter->close_section(); // WebsiteConfiguration
+  rgw_flush_formatter_and_reset(s, s->formatter);
+}
 
 static void dump_bucket_metadata(struct req_state *s, RGWBucketEnt& bucket)
 {
@@ -523,11 +623,11 @@ static void dump_bucket_metadata(struct req_state *s, RGWBucketEnt& bucket)
 
 void RGWStatBucket_ObjStore_S3::send_response()
 {
-  if (ret >= 0) {
+  if (op_ret >= 0) {
     dump_bucket_metadata(s, bucket);
   }
 
-  set_req_state_err(s, ret);
+  set_req_state_err(s, op_ret);
   dump_errno(s);
 
   end_header(s, this);
@@ -608,9 +708,9 @@ int RGWCreateBucket_ObjStore_S3::get_params()
   int len = 0;
   char *data;
 #define CREATE_BUCKET_MAX_REQ_LEN (512 * 1024) /* this is way more than enough */
-  ret = rgw_rest_read_all_input(s, &data, &len, CREATE_BUCKET_MAX_REQ_LEN);
-  if ((ret < 0) && (ret != -ERR_LENGTH_REQUIRED))
-    return ret;
+  op_ret = rgw_rest_read_all_input(s, &data, &len, CREATE_BUCKET_MAX_REQ_LEN);
+  if ((op_ret < 0) && (op_ret != -ERR_LENGTH_REQUIRED))
+    return op_ret;
 
   bufferptr in_ptr(data, len);
   in_data.append(in_ptr);
@@ -652,14 +752,14 @@ int RGWCreateBucket_ObjStore_S3::get_params()
 
 void RGWCreateBucket_ObjStore_S3::send_response()
 {
-  if (ret == -ERR_BUCKET_EXISTS)
-    ret = 0;
-  if (ret)
-    set_req_state_err(s, ret);
+  if (op_ret == -ERR_BUCKET_EXISTS)
+    op_ret = 0;
+  if (op_ret)
+    set_req_state_err(s, op_ret);
   dump_errno(s);
   end_header(s);
 
-  if (ret < 0)
+  if (op_ret < 0)
     return;
 
   if (s->system_request) {
@@ -676,7 +776,7 @@ void RGWCreateBucket_ObjStore_S3::send_response()
 
 void RGWDeleteBucket_ObjStore_S3::send_response()
 {
-  int r = ret;
+  int r = op_ret;
   if (!r)
     r = STATUS_NO_CONTENT;
 
@@ -725,12 +825,13 @@ static int get_success_retcode(int code)
 
 void RGWPutObj_ObjStore_S3::send_response()
 {
-  if (ret) {
-    set_req_state_err(s, ret);
+  if (op_ret) {
+    set_req_state_err(s, op_ret);
   } else {
     if (s->cct->_conf->rgw_s3_success_create_obj_status) {
-      ret = get_success_retcode(s->cct->_conf->rgw_s3_success_create_obj_status);
-      set_req_state_err(s, ret);
+      op_ret = get_success_retcode(
+	s->cct->_conf->rgw_s3_success_create_obj_status);
+      set_req_state_err(s, op_ret);
     }
     dump_etag(s, etag.c_str());
     dump_content_length(s, 0);
@@ -1144,6 +1245,21 @@ int RGWPostObj_ObjStore_S3::get_params()
 
     attrs[attr_name] = attr_bl;
   }
+  // TODO: refactor this and the above loop to share code
+  piter = parts.find(RGW_AMZ_WEBSITE_REDIRECT_LOCATION);
+  if (piter != parts.end()) {
+    string n = piter->first;
+    string attr_name = RGW_ATTR_PREFIX;
+    attr_name.append(n);
+    /* need to null terminate it */
+    bufferlist& data = piter->second.data;
+    string str = string(data.c_str(), data.length());
+
+    bufferlist attr_bl;
+    attr_bl.append(str.c_str(), str.size() + 1);
+
+    attrs[attr_name] = attr_bl;
+  }
 
   int r = get_policy();
   if (r < 0)
@@ -1177,8 +1293,8 @@ int RGWPostObj_ObjStore_S3::get_policy()
 
     RGWUserInfo user_info;
 
-    ret = rgw_get_user_info_by_access_key(store, s3_access_key, user_info);
-    if (ret < 0) {
+    op_ret = rgw_get_user_info_by_access_key(store, s3_access_key, user_info);
+    if (op_ret < 0) {
       // Try keystone authentication as well
       int keystone_result = -EINVAL;
       if (!store->ctx()->_conf->rgw_s3_auth_use_keystone ||
@@ -1346,7 +1462,7 @@ int RGWPostObj_ObjStore_S3::get_data(bufferlist& bl)
 
 void RGWPostObj_ObjStore_S3::send_response()
 {
-  if (ret == 0 && parts.count("success_action_redirect")) {
+  if (op_ret == 0 && parts.count("success_action_redirect")) {
     string redirect;
 
     part_str("success_action_redirect", &redirect);
@@ -1390,12 +1506,12 @@ void RGWPostObj_ObjStore_S3::send_response()
 
     int r = check_utf8(redirect.c_str(), redirect.size());
     if (r < 0) {
-      ret = r;
+      op_ret = r;
       goto done;
     }
     dump_redirect(s, redirect);
-    ret = STATUS_REDIRECT;
-  } else if (ret == 0 && parts.count("success_action_status")) {
+    op_ret = STATUS_REDIRECT;
+  } else if (op_ret == 0 && parts.count("success_action_status")) {
     string status_string;
     uint32_t status_int;
 
@@ -1403,7 +1519,7 @@ void RGWPostObj_ObjStore_S3::send_response()
 
     int r = stringtoul(status_string, &status_int);
     if (r < 0) {
-      ret = r;
+      op_ret = r;
       goto done;
     }
 
@@ -1411,18 +1527,18 @@ void RGWPostObj_ObjStore_S3::send_response()
       case 200:
 	break;
       case 201:
-	ret = STATUS_CREATED;
+	op_ret = STATUS_CREATED;
 	break;
       default:
-	ret = STATUS_NO_CONTENT;
+	op_ret = STATUS_NO_CONTENT;
 	break;
     }
-  } else if (!ret) {
-    ret = STATUS_NO_CONTENT;
+  } else if (! op_ret) {
+    op_ret = STATUS_NO_CONTENT;
   }
 
 done:
-  if (ret == STATUS_CREATED) {
+  if (op_ret == STATUS_CREATED) {
     s->formatter->open_object_section("PostResponse");
     if (g_conf->rgw_dns_name.length())
       s->formatter->dump_format("Location", "%s/%s", s->info.script_uri.c_str(), s->object.name.c_str());
@@ -1433,13 +1549,13 @@ done:
     s->formatter->close_section();
   }
   s->err.message = err_msg;
-  set_req_state_err(s, ret);
+  set_req_state_err(s, op_ret);
   dump_errno(s);
-  if (ret >= 0) {
+  if (op_ret >= 0) {
     dump_content_length(s, s->formatter->get_len());
   }
   end_header(s, this);
-  if (ret != STATUS_CREATED)
+  if (op_ret != STATUS_CREATED)
     return;
 
   rgw_flush_formatter_and_reset(s, s->formatter);
@@ -1448,7 +1564,7 @@ done:
 
 void RGWDeleteObj_ObjStore_S3::send_response()
 {
-  int r = ret;
+  int r = op_ret;
   if (r == -ENOENT)
     r = 0;
   if (!r)
@@ -1539,13 +1655,13 @@ int RGWCopyObj_ObjStore_S3::get_params()
 
 void RGWCopyObj_ObjStore_S3::send_partial_response(off_t ofs)
 {
-  if (!sent_header) {
-    if (ret)
-    set_req_state_err(s, ret);
+  if (! sent_header) {
+    if (op_ret)
+    set_req_state_err(s, op_ret);
     dump_errno(s);
 
     end_header(s, this, "application/xml");
-    if (ret == 0) {
+    if (op_ret == 0) {
       s->formatter->open_object_section("CopyObjectResult");
     }
     sent_header = true;
@@ -1563,7 +1679,7 @@ void RGWCopyObj_ObjStore_S3::send_response()
   if (!sent_header)
     send_partial_response(0);
 
-  if (ret == 0) {
+  if (op_ret == 0) {
     dump_time(s, "LastModified", &mtime);
     if (!etag.empty()) {
       s->formatter->dump_string("ETag", etag);
@@ -1575,8 +1691,8 @@ void RGWCopyObj_ObjStore_S3::send_response()
 
 void RGWGetACLs_ObjStore_S3::send_response()
 {
-  if (ret)
-    set_req_state_err(s, ret);
+  if (op_ret)
+    set_req_state_err(s, op_ret);
   dump_errno(s);
   end_header(s, this, "application/xml");
   dump_start(s);
@@ -1605,8 +1721,8 @@ int RGWPutACLs_ObjStore_S3::get_policy_from_state(RGWRados *store, struct req_st
 
 void RGWPutACLs_ObjStore_S3::send_response()
 {
-  if (ret)
-    set_req_state_err(s, ret);
+  if (op_ret)
+    set_req_state_err(s, op_ret);
   dump_errno(s);
   end_header(s, this, "application/xml");
   dump_start(s);
@@ -1614,16 +1730,16 @@ void RGWPutACLs_ObjStore_S3::send_response()
 
 void RGWGetCORS_ObjStore_S3::send_response()
 {
-  if (ret) {
-    if (ret == -ENOENT) 
+  if (op_ret) {
+    if (op_ret == -ENOENT)
       set_req_state_err(s, ERR_NOT_FOUND);
-    else 
-      set_req_state_err(s, ret);
+    else
+      set_req_state_err(s, op_ret);
   }
   dump_errno(s);
   end_header(s, NULL, "application/xml");
   dump_start(s);
-  if (!ret) {
+  if (! op_ret) {
     string cors;
     RGWCORSConfiguration_S3 *s3cors = static_cast<RGWCORSConfiguration_S3 *>(&bucket_cors);
     stringstream ss;
@@ -1693,8 +1809,8 @@ done_err:
 
 void RGWPutCORS_ObjStore_S3::send_response()
 {
-  if (ret)
-    set_req_state_err(s, ret);
+  if (op_ret)
+    set_req_state_err(s, op_ret);
   dump_errno(s);
   end_header(s, NULL, "application/xml");
   dump_start(s);
@@ -1702,7 +1818,7 @@ void RGWPutCORS_ObjStore_S3::send_response()
 
 void RGWDeleteCORS_ObjStore_S3::send_response()
 {
-  int r = ret;
+  int r = op_ret;
   if (!r || r == -ENOENT)
     r = STATUS_NO_CONTENT;
 
@@ -1718,10 +1834,10 @@ void RGWOptionsCORS_ObjStore_S3::send_response()
   /*EACCES means, there is no CORS registered yet for the bucket
    *ENOENT means, there is no match of the Origin in the list of CORSRule
    */
-  if (ret == -ENOENT)
-    ret = -EACCES;
-  if (ret < 0) {
-    set_req_state_err(s, ret);
+  if (op_ret == -ENOENT)
+    op_ret = -EACCES;
+  if (op_ret < 0) {
+    set_req_state_err(s, op_ret);
     dump_errno(s);
     end_header(s, NULL);
     return;
@@ -1814,8 +1930,8 @@ done:
 
 void RGWSetRequestPayment_ObjStore_S3::send_response()
 {
-  if (ret)
-    set_req_state_err(s, ret);
+  if (op_ret)
+    set_req_state_err(s, op_ret);
   dump_errno(s);
   end_header(s);
 }
@@ -1823,9 +1939,9 @@ void RGWSetRequestPayment_ObjStore_S3::send_response()
 int RGWInitMultipart_ObjStore_S3::get_params()
 {
   RGWAccessControlPolicy_S3 s3policy(s->cct);
-  ret = create_s3_policy(s, store, s3policy, s->owner);
-  if (ret < 0)
-    return ret;
+  op_ret = create_s3_policy(s, store, s3policy, s->owner);
+  if (op_ret < 0)
+    return op_ret;
 
   policy = s3policy;
 
@@ -1834,11 +1950,11 @@ int RGWInitMultipart_ObjStore_S3::get_params()
 
 void RGWInitMultipart_ObjStore_S3::send_response()
 {
-  if (ret)
-    set_req_state_err(s, ret);
+  if (op_ret)
+    set_req_state_err(s, op_ret);
   dump_errno(s);
   end_header(s, this, "application/xml");
-  if (ret == 0) { 
+  if (op_ret == 0) {
     dump_start(s);
     s->formatter->open_object_section_in_ns("InitiateMultipartUploadResult",
 		  "http://s3.amazonaws.com/doc/2006-03-01/");
@@ -1854,11 +1970,11 @@ void RGWInitMultipart_ObjStore_S3::send_response()
 
 void RGWCompleteMultipart_ObjStore_S3::send_response()
 {
-  if (ret)
-    set_req_state_err(s, ret);
+  if (op_ret)
+    set_req_state_err(s, op_ret);
   dump_errno(s);
   end_header(s, this, "application/xml");
-  if (ret == 0) { 
+  if (op_ret == 0) { 
     dump_start(s);
     s->formatter->open_object_section_in_ns("CompleteMultipartUploadResult",
 			  "http://s3.amazonaws.com/doc/2006-03-01/");
@@ -1887,7 +2003,7 @@ void RGWCompleteMultipart_ObjStore_S3::send_response()
 
 void RGWAbortMultipart_ObjStore_S3::send_response()
 {
-  int r = ret;
+  int r = op_ret;
   if (!r)
     r = STATUS_NO_CONTENT;
 
@@ -1898,12 +2014,12 @@ void RGWAbortMultipart_ObjStore_S3::send_response()
 
 void RGWListMultipart_ObjStore_S3::send_response()
 {
-  if (ret)
-    set_req_state_err(s, ret);
+  if (op_ret)
+    set_req_state_err(s, op_ret);
   dump_errno(s);
   end_header(s, this, "application/xml");
 
-  if (ret == 0) { 
+  if (op_ret == 0) {
     dump_start(s);
     s->formatter->open_object_section_in_ns("ListPartsResult",
 		    "http://s3.amazonaws.com/doc/2006-03-01/");
@@ -1956,13 +2072,13 @@ void RGWListMultipart_ObjStore_S3::send_response()
 
 void RGWListBucketMultiparts_ObjStore_S3::send_response()
 {
-  if (ret < 0)
-    set_req_state_err(s, ret);
+  if (op_ret < 0)
+    set_req_state_err(s, op_ret);
   dump_errno(s);
 
   end_header(s, this, "application/xml");
   dump_start(s);
-  if (ret < 0)
+  if (op_ret < 0)
     return;
 
   s->formatter->open_object_section("ListMultipartUploadsResult");
@@ -1988,7 +2104,7 @@ void RGWListBucketMultiparts_ObjStore_S3::send_response()
     s->formatter->dump_string("Delimiter", delimiter);
   s->formatter->dump_string("IsTruncated", (is_truncated ? "true" : "false"));
 
-  if (ret >= 0) {
+  if (op_ret >= 0) {
     vector<RGWMultipartUploadEntry>::iterator iter;
     for (iter = uploads.begin(); iter != uploads.end(); ++iter) {
       RGWMPObj& mp = iter->mp;
@@ -2017,9 +2133,9 @@ void RGWListBucketMultiparts_ObjStore_S3::send_response()
 
 void RGWDeleteMultiObj_ObjStore_S3::send_status()
 {
-  if (!status_dumped) {
-    if (ret < 0)
-      set_req_state_err(s, ret);
+  if (! status_dumped) {
+    if (op_ret < 0)
+      set_req_state_err(s, op_ret);
     dump_errno(s);
     status_dumped = true;
   }
@@ -2044,7 +2160,7 @@ void RGWDeleteMultiObj_ObjStore_S3::send_partial_response(rgw_obj_key& key, bool
                                                           const string& marker_version_id, int ret)
 {
   if (!key.empty()) {
-    if (ret == 0 && !quiet) {
+    if (op_ret == 0 && !quiet) {
       s->formatter->open_object_section("Deleted");
       s->formatter->dump_string("Key", key.name);
       if (!key.instance.empty()) {
@@ -2055,13 +2171,13 @@ void RGWDeleteMultiObj_ObjStore_S3::send_partial_response(rgw_obj_key& key, bool
         s->formatter->dump_string("DeleteMarkerVersionId", marker_version_id);
       }
       s->formatter->close_section();
-    } else if (ret < 0) {
+    } else if (op_ret < 0) {
       struct rgw_http_errors r;
       int err_no;
 
       s->formatter->open_object_section("Error");
 
-      err_no = -ret;
+      err_no = -op_ret;
       rgw_get_errno_s3(&r, err_no);
 
       s->formatter->dump_string("Key", key.name);
@@ -2094,6 +2210,7 @@ RGWOp *RGWHandler_ObjStore_Service_S3::op_head()
 
 RGWOp *RGWHandler_ObjStore_Bucket_S3::get_obj_op(bool get_data)
 {
+  // Non-website mode
   if (get_data)
     return new RGWListBucket_ObjStore_S3;
   else
@@ -2111,6 +2228,13 @@ RGWOp *RGWHandler_ObjStore_Bucket_S3::op_get()
   if (s->info.args.sub_resource_exists("versioning"))
     return new RGWGetBucketVersioning_ObjStore_S3;
 
+  if (s->info.args.sub_resource_exists("website")) {
+    if (!s->cct->_conf->rgw_enable_static_website) {
+      return NULL;
+    }
+    return new RGWGetBucketWebsite_ObjStore_S3;
+  }
+
   if (is_acl_op()) {
     return new RGWGetACLs_ObjStore_S3;
   } else if (is_cors_op()) {
@@ -2139,6 +2263,12 @@ RGWOp *RGWHandler_ObjStore_Bucket_S3::op_put()
     return NULL;
   if (s->info.args.sub_resource_exists("versioning"))
     return new RGWSetBucketVersioning_ObjStore_S3;
+  if (s->info.args.sub_resource_exists("website")) {
+    if (!s->cct->_conf->rgw_enable_static_website) {
+      return NULL;
+    }
+    return new RGWSetBucketWebsite_ObjStore_S3;
+  }
   if (is_acl_op()) {
     return new RGWPutACLs_ObjStore_S3;
   } else if (is_cors_op()) {
@@ -2154,6 +2284,14 @@ RGWOp *RGWHandler_ObjStore_Bucket_S3::op_delete()
   if (is_cors_op()) {
     return new RGWDeleteCORS_ObjStore_S3;
   }
+
+  if (s->info.args.sub_resource_exists("website")) {
+    if (!s->cct->_conf->rgw_enable_static_website) {
+      return NULL;
+    }
+    return new RGWDeleteBucketWebsite_ObjStore_S3;
+  }
+
   return new RGWDeleteBucket_ObjStore_S3;
 }
 
@@ -2206,7 +2344,7 @@ RGWOp *RGWHandler_ObjStore_Obj_S3::op_put()
   if (is_acl_op()) {
     return new RGWPutACLs_ObjStore_S3;
   }
-  if (s->src_bucket_name.empty())
+  if (s->init_state.src_bucket.empty())
     return new RGWPutObj_ObjStore_S3;
   else
     return new RGWCopyObj_ObjStore_S3;
@@ -2286,12 +2424,9 @@ int RGWHandler_ObjStore_S3::init_from_header(struct req_state *s, int default_fo
    * the bucket (and its tenant) from DNS and Host: header (HTTP_HOST)
    * into req_status.bucket_name directly.
    */
-  if (s->bucket_name.empty()) {
-    rgw_parse_url_bucket(first, s->bucket_tenant, s->bucket_name);
-    if (s->bucket_tenant.empty())
-      s->bucket_tenant = s->user.user_id.tenant;
-
-    ldout(s->cct, 20) << "s->user.user_id=" << s->user.user_id << " s->bucket_tenant=" << s->bucket_tenant << " s->bucket_name=" << s->bucket_name << dendl;
+  if (s->init_state.url_bucket.empty()) {
+    // Save bucket to tide us over until token is parsed.
+    s->init_state.url_bucket = first;
 
     if (pos >= 0) {
       string encoded_obj_str = req.substr(pos+1);
@@ -2303,6 +2438,39 @@ int RGWHandler_ObjStore_S3::init_from_header(struct req_state *s, int default_fo
   return 0;
 }
 
+int RGWHandler_ObjStore_S3::postauth_init()
+{
+  struct req_init_state *t = &s->init_state;
+  bool relaxed_names = s->cct->_conf->rgw_relaxed_s3_bucket_names;
+
+  rgw_parse_url_bucket(t->url_bucket, s->user.user_id.tenant, s->bucket_tenant, s->bucket_name);
+
+  dout(10) << "s->object=" << (!s->object.empty() ? s->object : rgw_obj_key("<NULL>"))
+           << " s->bucket=" << rgw_make_bucket_entry_name(s->bucket_tenant, s->bucket_name) << dendl;
+
+  int ret;
+  ret = validate_tenant_name(s->bucket_tenant);
+  if (ret)
+    return ret;
+  ret = validate_bucket_name(s->bucket_name, relaxed_names);
+  if (ret)
+    return ret;
+  ret = validate_object_name(s->object.name);
+  if (ret)
+    return ret;
+
+  if (!t->src_bucket.empty()) {
+    rgw_parse_url_bucket(t->src_bucket, s->user.user_id.tenant, s->src_tenant_name, s->src_bucket_name);
+    ret = validate_tenant_name(s->src_tenant_name);
+    if (ret)
+      return ret;
+    ret = validate_bucket_name(s->src_bucket_name, relaxed_names);
+    if (ret)
+      return ret;
+  }
+  return 0;
+}
+
 static bool looks_like_ip_address(const char *bucket)
 {
   int num_periods = 0;
@@ -2363,20 +2531,9 @@ int RGWHandler_ObjStore_S3::validate_bucket_name(const string& bucket, bool rela
 
 int RGWHandler_ObjStore_S3::init(RGWRados *store, struct req_state *s, RGWClientIO *cio)
 {
-  dout(10) << "s->object=" << (!s->object.empty() ? s->object : rgw_obj_key("<NULL>"))
-           << " s->bucket=" << rgw_make_bucket_entry_name(s->bucket_tenant, s->bucket_name) << dendl;
-
   int ret;
-  ret = validate_tenant_name(s->bucket_tenant);
-  if (ret)
-    return ret;
-  bool relaxed_names = s->cct->_conf->rgw_relaxed_s3_bucket_names;
-  ret = validate_bucket_name(s->bucket_name, relaxed_names);
-  if (ret)
-    return ret;
-  ret = validate_object_name(s->object.name);
-  if (ret)
-    return ret;
+
+  s->dialect = "s3";
 
   const char *cacl = s->info.env->get("HTTP_X_AMZ_ACL");
   if (cacl)
@@ -2386,19 +2543,13 @@ int RGWHandler_ObjStore_S3::init(RGWRados *store, struct req_state *s, RGWClient
 
   const char *copy_source = s->info.env->get("HTTP_X_AMZ_COPY_SOURCE");
   if (copy_source) {
-    string src_bucket_str;
-    ret = RGWCopyObj::parse_copy_location(copy_source, src_bucket_str, s->src_object);
+    ret = RGWCopyObj::parse_copy_location(copy_source, s->init_state.src_bucket, s->src_object);
     if (!ret) {
       ldout(s->cct, 0) << "failed to parse copy location" << dendl;
       return -EINVAL; // XXX why not -ERR_INVALID_BUCKET_NAME or -ERR_BAD_URL?
     }
-    rgw_parse_url_bucket(src_bucket_str, s->src_tenant_name, s->src_bucket_name);
-    if (s->src_tenant_name.empty())
-      s->src_tenant_name = s->user.user_id.tenant;
   }
 
-  s->dialect = "s3";
-
   return RGWHandler_ObjStore::init(store, s, cio);
 }
 
@@ -2413,8 +2564,16 @@ int RGW_Auth_S3_Keystone_ValidateToken::validate_s3token(const string& auth_id,
     keystone_url.append("/");
   keystone_url.append("v2.0/s3tokens");
 
+  /* get authentication token for Keystone. */
+  string admin_token_id;
+  int r = RGWSwift::get_keystone_admin_token(cct, admin_token_id);
+  if (r < 0) {
+    ldout(cct, 2) << "s3 keystone: cannot get token for keystone access" << dendl;
+    return r;
+  }
+
   /* set required headers for keystone request */
-  append_header("X-Auth-Token", cct->_conf->rgw_keystone_admin_token);
+  append_header("X-Auth-Token", admin_token_id);
   append_header("Content-Type", "application/json");
 
   /* encode token */
@@ -2565,10 +2724,6 @@ int RGW_Auth_S3::authorize(RGWRados *store, struct req_state *s)
         }
 
         s->perm_mask = RGW_PERM_FULL_CONTROL;
-
-        if (s->bucket_tenant.empty()) {
-          s->bucket_tenant = s->user.user_id.tenant;
-        }
       }
     }
   }
@@ -2586,10 +2741,6 @@ int RGW_Auth_S3::authorize(RGWRados *store, struct req_state *s)
       return -ERR_INVALID_ACCESS_KEY;
     }
 
-    if (s->bucket_tenant.empty()) {
-      s->bucket_tenant = s->user.user_id.tenant;
-    }
-
     /* now verify signature */
 
     string auth_hdr;
@@ -2662,7 +2813,6 @@ int RGW_Auth_S3::authorize(RGWRados *store, struct req_state *s)
   s->owner.set_id(s->user.user_id);
   s->owner.set_name(s->user.display_name);
 
-
   return  0;
 }
 
@@ -2677,15 +2827,185 @@ int RGWHandler_Auth_S3::init(RGWRados *store, struct req_state *state, RGWClient
 
 RGWHandler *RGWRESTMgr_S3::get_handler(struct req_state *s)
 {
-  int ret = RGWHandler_ObjStore_S3::init_from_header(s, RGW_FORMAT_XML, false);
+  bool is_s3website = enable_s3website && (s->prot_flags & RGW_REST_WEBSITE);
+  int ret = RGWHandler_ObjStore_S3::init_from_header(s, is_s3website ? RGW_FORMAT_HTML : RGW_FORMAT_XML, false);
   if (ret < 0)
     return NULL;
 
-  if (s->bucket_name.empty())
-    return new RGWHandler_ObjStore_Service_S3;
+  RGWHandler* handler;
+  // TODO: Make this more readable
+  if (is_s3website) {
+    if (s->init_state.url_bucket.empty()) {
+      handler = new RGWHandler_ObjStore_Service_S3Website;
+    } else if (s->object.empty()) {
+      handler = new RGWHandler_ObjStore_Bucket_S3Website;
+    } else {
+      handler = new RGWHandler_ObjStore_Obj_S3Website;
+    }
+  } else {
+    if (s->init_state.url_bucket.empty()) {
+      handler = new RGWHandler_ObjStore_Service_S3;
+    } else if (s->object.empty()) {
+      handler = new RGWHandler_ObjStore_Bucket_S3;
+    } else {
+      handler = new RGWHandler_ObjStore_Obj_S3;
+    }
+  }
+
+  ldout(s->cct, 20) << __func__ << " handler=" << typeid(*handler).name() << dendl;
+  return handler;
+}
+
+int RGWHandler_ObjStore_S3Website::retarget(RGWOp *op, RGWOp **new_op) {
+  *new_op = op;
+  ldout(s->cct, 10) << __func__ << "Starting retarget" << dendl;
+
+  if (!(s->prot_flags & RGW_REST_WEBSITE))
+    return 0;
+
+  RGWObjectCtx& obj_ctx = *static_cast<RGWObjectCtx *>(s->obj_ctx);
+  int ret = store->get_bucket_info(obj_ctx, s->bucket_tenant, s->bucket_name, s->bucket_info, NULL, &s->bucket_attrs);
+  if (ret < 0) {
+      // TODO-FUTURE: if the bucket does not exist, maybe expose it here?
+      return -ERR_NO_SUCH_BUCKET;
+  }
+  if (!s->bucket_info.has_website) {
+      // TODO-FUTURE: if the bucket has no WebsiteConfig, expose it here
+      return -ERR_NO_SUCH_WEBSITE_CONFIGURATION;
+  }
+
+  rgw_obj_key new_obj;
+  s->bucket_info.website_conf.get_effective_key(s->object.name, &new_obj.name);
+  ldout(s->cct, 10) << "retarget get_effective_key " << s->object << " -> " << new_obj << dendl;
+
+  RGWBWRoutingRule rrule;
+  bool should_redirect = s->bucket_info.website_conf.should_redirect(new_obj.name, 0, &rrule);
+
+  if (should_redirect) {
+    const string& hostname = s->info.env->get("HTTP_HOST", "");
+    const string& protocol = (s->info.env->get("SERVER_PORT_SECURE") ? "https" : "http");
+    int redirect_code = 0;
+    rrule.apply_rule(protocol, hostname, s->object.name, &s->redirect, &redirect_code);
+    // APply a custom HTTP response code
+    if (redirect_code > 0)
+      s->err.http_ret = redirect_code; // Apply a custom HTTP response code
+    ldout(s->cct, 10) << "retarget redirect code=" << redirect_code << " proto+host:" << protocol << "://" << hostname << " -> " << s->redirect << dendl;
+    return -ERR_WEBSITE_REDIRECT;
+  }
+
+  /*
+   * FIXME: if s->object != new_obj, drop op and create a new op to handle operation. Or
+   * remove this comment if it's not applicable anymore
+   */
+
+  s->object = new_obj;
+
+  return 0;
+}
+
+RGWOp *RGWHandler_ObjStore_S3Website::op_get()
+{
+  return get_obj_op(true);
+}
+
+RGWOp *RGWHandler_ObjStore_S3Website::op_head()
+{
+  return get_obj_op(false);
+}
+
+int RGWHandler_ObjStore_S3Website::get_errordoc(const string errordoc_key, string *error_content) {
+    ldout(s->cct, 20) << "TODO Serve Custom error page here if bucket has <Error>" << dendl;
+    *error_content = errordoc_key;
+    // 1. Check if errordoc exists
+    // 2. Check if errordoc is public
+    // 3. Fetch errordoc content
+    /*
+     * FIXME maybe:  need to make sure all of the fields for conditional requests are cleared
+     */
+    RGWGetObj_ObjStore_S3Website *getop = new RGWGetObj_ObjStore_S3Website(true);
+    getop->set_get_data(true);
+    getop->init(store, s, this);
+
+    RGWGetObj_CB cb(getop);
+    rgw_obj obj(s->bucket, errordoc_key);
+    RGWObjectCtx rctx(store);
+    //RGWRados::Object op_target(store, s->bucket_info, *static_cast<RGWObjectCtx *>(s->obj_ctx), obj);
+    RGWRados::Object op_target(store, s->bucket_info, rctx, obj);
+    RGWRados::Object::Read read_op(&op_target);
+
+    int ret;
+    int64_t ofs = 0; 
+    int64_t end = -1;
+    ret = read_op.prepare(&ofs, &end);
+    if (ret < 0) {
+      goto done;
+    }
+
+    ret = read_op.iterate(ofs, end, &cb); // FIXME: need to know the final size?
+done:
+    delete getop;
+    return ret;
+}
+  
+int RGWHandler_ObjStore_S3Website::error_handler(int err_no, string *error_content) {
+  const struct rgw_http_errors *r;
+  int http_error_code = -1;
+  r = search_err(err_no, RGW_HTTP_ERRORS, ARRAY_LEN(RGW_HTTP_ERRORS));
+  if (r) {
+    http_error_code = r->http_ret;
+  }
+
+  RGWBWRoutingRule rrule;
+  bool should_redirect = s->bucket_info.website_conf.should_redirect(s->object.name, http_error_code, &rrule);
+
+  if (should_redirect) {
+    const string& hostname = s->info.env->get("HTTP_HOST", "");
+    const string& protocol = (s->info.env->get("SERVER_PORT_SECURE") ? "https" : "http");
+    int redirect_code = 0;
+    rrule.apply_rule(protocol, hostname, s->object.name, &s->redirect, &redirect_code);
+    // APply a custom HTTP response code
+    if (redirect_code > 0)
+      s->err.http_ret = redirect_code; // Apply a custom HTTP response code
+    ldout(s->cct, 10) << "error handler redirect code=" << redirect_code << " proto+host:" << protocol << "://" << hostname << " -> " << s->redirect << dendl;
+    return -ERR_WEBSITE_REDIRECT;
+  } else if (!s->bucket_info.website_conf.error_doc.empty()) {
+    RGWHandler_ObjStore_S3Website::get_errordoc(s->bucket_info.website_conf.error_doc, error_content);
+  } else {
+    ldout(s->cct, 20) << "No special error handling today!" << dendl;
+  }
+
+  return err_no;
+}
 
-  if (s->object.empty())
-    return new RGWHandler_ObjStore_Bucket_S3;
+RGWOp *RGWHandler_ObjStore_Obj_S3Website::get_obj_op(bool get_data)
+{
+  /** If we are in website mode, then it is explicitly impossible to run GET or
+   * HEAD on the actual directory. We must convert the request to run on the
+   * suffix object instead!
+   */
+  RGWGetObj_ObjStore_S3Website *op = new RGWGetObj_ObjStore_S3Website;
+  op->set_get_data(get_data);
+  return op;
+}
 
-  return new RGWHandler_ObjStore_Obj_S3;
+RGWOp *RGWHandler_ObjStore_Bucket_S3Website::get_obj_op(bool get_data)
+{
+  /** If we are in website mode, then it is explicitly impossible to run GET or
+   * HEAD on the actual directory. We must convert the request to run on the
+   * suffix object instead!
+   */
+  RGWGetObj_ObjStore_S3Website *op = new RGWGetObj_ObjStore_S3Website;
+  op->set_get_data(get_data);
+  return op;
+}
+
+RGWOp *RGWHandler_ObjStore_Service_S3Website::get_obj_op(bool get_data)
+{
+  /** If we are in website mode, then it is explicitly impossible to run GET or
+   * HEAD on the actual directory. We must convert the request to run on the
+   * suffix object instead!
+   */
+  RGWGetObj_ObjStore_S3Website *op = new RGWGetObj_ObjStore_S3Website;
+  op->set_get_data(get_data);
+  return op;
 }
diff --git a/src/rgw/rgw_rest_s3.h b/src/rgw/rgw_rest_s3.h
index 1c2d529..ada5562 100644
--- a/src/rgw/rgw_rest_s3.h
+++ b/src/rgw/rgw_rest_s3.h
@@ -84,6 +84,31 @@ public:
   void send_response();
 };
 
+class RGWGetBucketWebsite_ObjStore_S3 : public RGWGetBucketWebsite {
+public:
+  RGWGetBucketWebsite_ObjStore_S3() {}
+  ~RGWGetBucketWebsite_ObjStore_S3() {}
+
+  void send_response();
+};
+
+class RGWSetBucketWebsite_ObjStore_S3 : public RGWSetBucketWebsite {
+public:
+  RGWSetBucketWebsite_ObjStore_S3() {}
+  ~RGWSetBucketWebsite_ObjStore_S3() {}
+
+  int get_params();
+  void send_response();
+};
+
+class RGWDeleteBucketWebsite_ObjStore_S3 : public RGWDeleteBucketWebsite {
+public:
+  RGWDeleteBucketWebsite_ObjStore_S3() {}
+  ~RGWDeleteBucketWebsite_ObjStore_S3() {}
+
+  void send_response();
+};
+
 class RGWStatBucket_ObjStore_S3 : public RGWStatBucket_ObjStore {
 public:
   RGWStatBucket_ObjStore_S3() {}
@@ -326,7 +351,7 @@ private:
   }
 
 public:
-  RGW_Auth_S3_Keystone_ValidateToken(CephContext *_cct)
+  explicit RGW_Auth_S3_Keystone_ValidateToken(CephContext *_cct)
       : RGWHTTPClient(_cct) {
     get_str_list(cct->_conf->rgw_keystone_accepted_roles, roles_list);
   }
@@ -375,10 +400,11 @@ public:
 
   virtual int validate_object_name(const string& bucket) { return 0; }
 
-  virtual int init(RGWRados *store, struct req_state *state, RGWClientIO *cio);
+  virtual int init(RGWRados *store, struct req_state *s, RGWClientIO *cio);
   virtual int authorize() {
     return RGW_Auth_S3::authorize(store, s);
   }
+  int postauth_init() { return 0; }
 };
 
 class RGWHandler_ObjStore_S3 : public RGWHandler_ObjStore {
@@ -392,10 +418,15 @@ public:
   int validate_bucket_name(const string& bucket, bool relaxed_names);
   using RGWHandler_ObjStore::validate_bucket_name;
   
-  virtual int init(RGWRados *store, struct req_state *state, RGWClientIO *cio);
+  virtual int init(RGWRados *store, struct req_state *s, RGWClientIO *cio);
   virtual int authorize() {
     return RGW_Auth_S3::authorize(store, s);
   }
+  int postauth_init();
+  virtual int retarget(RGWOp *op, RGWOp **new_op) {
+    *new_op = op;
+    return 0;
+  }
 };
 
 class RGWHandler_ObjStore_Service_S3 : public RGWHandler_ObjStore_S3 {
@@ -459,12 +490,15 @@ public:
 };
 
 class RGWRESTMgr_S3 : public RGWRESTMgr {
+private:
+  bool enable_s3website;
 public:
-  RGWRESTMgr_S3() {}
+  explicit RGWRESTMgr_S3(bool enable_s3website) : enable_s3website(false) { this->enable_s3website = enable_s3website; }
   virtual ~RGWRESTMgr_S3() {}
 
   virtual RGWHandler *get_handler(struct req_state *s);
 };
 
+class RGWHandler_ObjStore_Obj_S3Website;
 
 #endif
diff --git a/src/rgw/rgw_rest_s3website.h b/src/rgw/rgw_rest_s3website.h
new file mode 100644
index 0000000..d6cd7d8
--- /dev/null
+++ b/src/rgw/rgw_rest_s3website.h
@@ -0,0 +1,96 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2015 Robin H. Johnson <robin.johnson at dreamhost.com>
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software 
+ * Foundation.  See file COPYING.
+ * 
+ */
+#ifndef CEPH_RGW_REST_S3WEBSITE_H
+#define CEPH_RGW_REST_S3WEBSITE_H
+ 
+#include "rgw_rest_s3.h"
+
+class RGWHandler_ObjStore_S3Website : public RGWHandler_ObjStore_S3 {
+protected:
+  int retarget(RGWOp *op, RGWOp **new_op);
+  // TODO: this should be virtual I think, and ensure that it's always
+  // overridden, but that conflates that op_get/op_head are defined in this
+  // class and call this; and don't need to be overridden later.
+  virtual RGWOp *get_obj_op(bool get_data) { return NULL; }
+  RGWOp *op_get();
+  RGWOp *op_head();
+  // Only allowed to use GET+HEAD
+  RGWOp *op_put() { return NULL; }
+  RGWOp *op_delete() { return NULL; }
+  RGWOp *op_post() { return NULL; }
+  RGWOp *op_copy() { return NULL; }
+  RGWOp *op_options() { return NULL; }
+
+  int get_errordoc(const string errordoc_key, string *error_content);
+public:
+  RGWHandler_ObjStore_S3Website() : RGWHandler_ObjStore_S3() {}
+  virtual ~RGWHandler_ObjStore_S3Website() {}
+  virtual int error_handler(int err_no, string *error_content);
+};
+
+class RGWHandler_ObjStore_Service_S3Website : public RGWHandler_ObjStore_S3Website {
+protected:
+  virtual RGWOp *get_obj_op(bool get_data);
+public:
+  RGWHandler_ObjStore_Service_S3Website() {}
+  virtual ~RGWHandler_ObjStore_Service_S3Website() {}
+};
+
+class RGWHandler_ObjStore_Obj_S3Website : public RGWHandler_ObjStore_S3Website {
+protected:
+  virtual RGWOp *get_obj_op(bool get_data);
+public:
+  RGWHandler_ObjStore_Obj_S3Website() {}
+  virtual ~RGWHandler_ObjStore_Obj_S3Website() {}
+};
+
+/* The cross-inheritance from Obj to Bucket is deliberate!
+ * S3Websites do NOT support any bucket operations
+ */
+class RGWHandler_ObjStore_Bucket_S3Website : public RGWHandler_ObjStore_S3Website {
+protected:
+  RGWOp *get_obj_op(bool get_data);
+public:
+  RGWHandler_ObjStore_Bucket_S3Website() {}
+  virtual ~RGWHandler_ObjStore_Bucket_S3Website() {}
+};
+
+// TODO: do we actually need this?
+class  RGWGetObj_ObjStore_S3Website : public RGWGetObj_ObjStore_S3
+{
+private:
+   bool is_errordoc_request;
+public:
+  RGWGetObj_ObjStore_S3Website() : is_errordoc_request(false) {}
+  explicit RGWGetObj_ObjStore_S3Website(bool is_errordoc_request) : is_errordoc_request(false) { this->is_errordoc_request = is_errordoc_request; }
+  ~RGWGetObj_ObjStore_S3Website() {}
+  int send_response_data_error();
+  int send_response_data(bufferlist& bl, off_t ofs, off_t len);
+  // We override RGWGetObj_ObjStore::get_params here, to allow ignoring all
+  // conditional params for error pages.
+  int get_params() {
+      if (is_errordoc_request) {
+        range_str = NULL;
+        if_mod = NULL;
+        if_unmod = NULL;
+        if_match = NULL;
+        if_nomatch = NULL;
+               return 0;
+      } else {
+        return RGWGetObj_ObjStore_S3::get_params();
+      }
+  }
+};
+ 
+#endif
diff --git a/src/rgw/rgw_rest_swift.cc b/src/rgw/rgw_rest_swift.cc
index b170272..94d44d6 100644
--- a/src/rgw/rgw_rest_swift.cc
+++ b/src/rgw/rgw_rest_swift.cc
@@ -102,11 +102,11 @@ static void dump_account_metadata(struct req_state * const s,
 
 void RGWListBuckets_ObjStore_SWIFT::send_response_begin(bool has_buckets)
 {
-  if (ret) {
-    set_req_state_err(s, ret);
+  if (op_ret) {
+    set_req_state_err(s, op_ret);
   } else if (!has_buckets && s->format == RGW_FORMAT_PLAIN) {
-    ret = STATUS_NO_CONTENT;
-    set_req_state_err(s, ret);
+    op_ret = STATUS_NO_CONTENT;
+    set_req_state_err(s, op_ret);
   }
 
   if (!g_conf->rgw_swift_enforce_content_length) {
@@ -121,7 +121,7 @@ void RGWListBuckets_ObjStore_SWIFT::send_response_begin(bool has_buckets)
     end_header(s, NULL, NULL, NO_CONTENT_LENGTH, true);
   }
 
-  if (!ret) {
+  if (! op_ret) {
     dump_start(s);
     s->formatter->open_array_section_with_attrs("account",
             FormatterAttrs("name", s->user.display_name.c_str(), NULL));
@@ -182,9 +182,9 @@ int RGWListBucket_ObjStore_SWIFT::get_params()
   marker = s->info.args.get("marker");
   end_marker = s->info.args.get("end_marker");
   max_keys = s->info.args.get("limit");
-  ret = parse_max_keys();
-  if (ret < 0) {
-    return ret;
+  op_ret = parse_max_keys();
+  if (op_ret < 0) {
+    return op_ret;
   }
   if (max > default_max)
     return -ERR_PRECONDITION_FAILED;
@@ -296,19 +296,19 @@ next:
   s->formatter->close_section();
 
   int64_t content_len = 0;
-  if (!ret) {
+  if (! op_ret) {
     content_len = s->formatter->get_len();
     if (content_len == 0) {
-      ret = STATUS_NO_CONTENT;
+      op_ret = STATUS_NO_CONTENT;
     }
-  } else if (ret > 0) {
-    ret = 0;
+  } else if (op_ret > 0) {
+    op_ret = 0;
   }
 
-  set_req_state_err(s, ret);
+  set_req_state_err(s, op_ret);
   dump_errno(s);
   end_header(s, this, NULL, content_len);
-  if (ret < 0) {
+  if (op_ret < 0) {
     return;
   }
 
@@ -364,17 +364,17 @@ void RGWStatAccount_ObjStore_SWIFT::execute()
 {
   RGWStatAccount_ObjStore::execute();
 
-  ret = rgw_get_user_attrs_by_uid(store, s->user.user_id, attrs);
+  op_ret = rgw_get_user_attrs_by_uid(store, s->user.user_id, attrs);
 }
 
 void RGWStatAccount_ObjStore_SWIFT::send_response()
 {
-  if (ret >= 0) {
-    ret = STATUS_NO_CONTENT;
+  if (op_ret >= 0) {
+    op_ret = STATUS_NO_CONTENT;
     dump_account_metadata(s, buckets_count, buckets_objcount, buckets_size, buckets_size_rounded, attrs);
   }
 
-  set_req_state_err(s, ret);
+  set_req_state_err(s, op_ret);
   dump_errno(s);
 
   end_header(s, NULL, NULL, 0,  true);
@@ -384,15 +384,15 @@ void RGWStatAccount_ObjStore_SWIFT::send_response()
 
 void RGWStatBucket_ObjStore_SWIFT::send_response()
 {
-  if (ret >= 0) {
-    ret = STATUS_NO_CONTENT;
+  if (op_ret >= 0) {
+    op_ret = STATUS_NO_CONTENT;
     dump_container_metadata(s, bucket);
   }
 
-  set_req_state_err(s, ret);
+  set_req_state_err(s, op_ret);
   dump_errno(s);
 
-  end_header(s, this,NULL,0, true);
+  end_header(s, this, NULL, 0, true);
   dump_start(s);
 }
 
@@ -467,11 +467,11 @@ int RGWCreateBucket_ObjStore_SWIFT::get_params()
 
 void RGWCreateBucket_ObjStore_SWIFT::send_response()
 {
-  if (!ret)
-    ret = STATUS_CREATED;
-  else if (ret == -ERR_BUCKET_EXISTS)
-    ret = STATUS_ACCEPTED;
-  set_req_state_err(s, ret);
+  if (! op_ret)
+    op_ret = STATUS_CREATED;
+  else if (op_ret == -ERR_BUCKET_EXISTS)
+    op_ret = STATUS_ACCEPTED;
+  set_req_state_err(s, op_ret);
   dump_errno(s);
   /* Propose ending HTTP header with 0 Content-Length header. */
   end_header(s, NULL, NULL, 0);
@@ -480,7 +480,7 @@ void RGWCreateBucket_ObjStore_SWIFT::send_response()
 
 void RGWDeleteBucket_ObjStore_SWIFT::send_response()
 {
-  int r = ret;
+  int r = op_ret;
   if (!r)
     r = STATUS_NO_CONTENT;
 
@@ -605,11 +605,11 @@ int RGWPutObj_ObjStore_SWIFT::get_params()
 
 void RGWPutObj_ObjStore_SWIFT::send_response()
 {
-  if (!ret)
-    ret = STATUS_CREATED;
+  if (! op_ret)
+    op_ret = STATUS_CREATED;
   dump_etag(s, etag.c_str());
   dump_last_modified(s, mtime);
-  set_req_state_err(s, ret);
+  set_req_state_err(s, op_ret);
   dump_errno(s);
   end_header(s, this);
   rgw_flush_formatter_and_reset(s, s->formatter);
@@ -663,10 +663,10 @@ int RGWPutMetadataAccount_ObjStore_SWIFT::get_params()
 
 void RGWPutMetadataAccount_ObjStore_SWIFT::send_response()
 {
-  if (!ret) {
-    ret = STATUS_NO_CONTENT;
+  if (! op_ret) {
+    op_ret = STATUS_NO_CONTENT;
   }
-  set_req_state_err(s, ret);
+  set_req_state_err(s, op_ret);
   dump_errno(s);
   end_header(s, this);
   rgw_flush_formatter_and_reset(s, s->formatter);
@@ -690,10 +690,10 @@ int RGWPutMetadataBucket_ObjStore_SWIFT::get_params()
 
 void RGWPutMetadataBucket_ObjStore_SWIFT::send_response()
 {
-  if (!ret) {
-    ret = STATUS_NO_CONTENT;
+  if (! op_ret) {
+    op_ret = STATUS_NO_CONTENT;
   }
-  set_req_state_err(s, ret);
+  set_req_state_err(s, op_ret);
   dump_errno(s);
   end_header(s, this);
   rgw_flush_formatter_and_reset(s, s->formatter);
@@ -720,10 +720,10 @@ int RGWPutMetadataObject_ObjStore_SWIFT::get_params()
 
 void RGWPutMetadataObject_ObjStore_SWIFT::send_response()
 {
-  if (!ret) {
-    ret = STATUS_ACCEPTED;
+  if (! op_ret) {
+    op_ret = STATUS_ACCEPTED;
   }
-  set_req_state_err(s, ret);
+  set_req_state_err(s, op_ret);
   if (!s->err.is_err()) {
     dump_content_length(s, 0);
   }
@@ -797,7 +797,7 @@ int RGWDeleteObj_ObjStore_SWIFT::get_params()
 
 void RGWDeleteObj_ObjStore_SWIFT::send_response()
 {
-  int r = ret;
+  int r = op_ret;
 
   if (multipart_delete) {
     r = 0;
@@ -816,7 +816,7 @@ void RGWDeleteObj_ObjStore_SWIFT::send_response()
                          deleter->get_failures(),
                          s->prot_flags,
                          *s->formatter);
-    } else if (-ENOENT == ret) {
+    } else if (-ENOENT == op_ret) {
       bulkdelete_respond(0, 1, {}, s->prot_flags, *s->formatter);
     } else {
       RGWBulkDelete::acct_path_t path;
@@ -824,7 +824,7 @@ void RGWDeleteObj_ObjStore_SWIFT::send_response()
       path.obj_key = s->object;
 
       RGWBulkDelete::fail_desc_t fail_desc;
-      fail_desc.err = ret;
+      fail_desc.err = op_ret;
       fail_desc.path = path;
 
       bulkdelete_respond(0, 0, { fail_desc }, s->prot_flags, *s->formatter);
@@ -893,7 +893,6 @@ int RGWCopyObj_ObjStore_SWIFT::get_params()
   if_match = s->info.env->get("HTTP_COPY_IF_MATCH");
   if_nomatch = s->info.env->get("HTTP_COPY_IF_NONE_MATCH");
 
-  /* XXX why copy this? just use req_state in rgw_op.cc:verify_permission */
   src_tenant_name = s->src_tenant_name;
   src_bucket_name = s->src_bucket_name;
   src_object = s->src_object;
@@ -919,17 +918,17 @@ int RGWCopyObj_ObjStore_SWIFT::get_params()
 
 void RGWCopyObj_ObjStore_SWIFT::send_partial_response(off_t ofs)
 {
-  if (!sent_header) {
-    if (!ret)
-      ret = STATUS_CREATED;
-    set_req_state_err(s, ret);
+  if (! sent_header) {
+    if (! op_ret)
+      op_ret = STATUS_CREATED;
+    set_req_state_err(s, op_ret);
     dump_errno(s);
     end_header(s, this);
 
     /* Send progress information. Note that this diverge from the original swift
      * spec. We do this in order to keep connection alive.
      */
-    if (ret == 0) {
+    if (op_ret == 0) {
       s->formatter->open_array_section("progress");
     }
     sent_header = true;
@@ -958,11 +957,11 @@ void RGWCopyObj_ObjStore_SWIFT::dump_copy_info()
 
 void RGWCopyObj_ObjStore_SWIFT::send_response()
 {
-  if (!sent_header) {
+  if (! sent_header) {
     string content_type;
-    if (!ret)
-      ret = STATUS_CREATED;
-    set_req_state_err(s, ret);
+    if (! op_ret)
+      op_ret = STATUS_CREATED;
+    set_req_state_err(s, op_ret);
     dump_errno(s);
     dump_etag(s, etag.c_str());
     dump_last_modified(s, mtime);
@@ -998,7 +997,8 @@ int RGWGetObj_ObjStore_SWIFT::send_response_data(bufferlist& bl, off_t bl_ofs, o
     goto send_data;
   }
 
-  set_req_state_err(s, (partial_content && !ret) ? STATUS_PARTIAL_CONTENT : ret);
+  set_req_state_err(s, (partial_content && !op_ret) ? STATUS_PARTIAL_CONTENT :
+		    op_ret);
   dump_errno(s);
   if (s->err.is_err()) {
     end_header(s, NULL);
@@ -1016,7 +1016,7 @@ int RGWGetObj_ObjStore_SWIFT::send_response_data(bufferlist& bl, off_t bl_ofs, o
     s->cio->print("X-Static-Large-Object: True\r\n");
   }
 
-  if (!ret) {
+  if (! op_ret) {
     map<string, bufferlist>::iterator iter = attrs.find(RGW_ATTR_ETAG);
     if (iter != attrs.end()) {
       bufferlist& bl = iter->second;
@@ -1035,7 +1035,7 @@ int RGWGetObj_ObjStore_SWIFT::send_response_data(bufferlist& bl, off_t bl_ofs, o
   sent_header = true;
 
 send_data:
-  if (get_data && !ret) {
+  if (get_data && !op_ret) {
     int r = s->cio->write(bl.c_str() + bl_ofs, bl_len);
     if (r < 0)
       return r;
@@ -1052,10 +1052,10 @@ void RGWOptionsCORS_ObjStore_SWIFT::send_response()
   /*EACCES means, there is no CORS registered yet for the bucket
    *ENOENT means, there is no match of the Origin in the list of CORSRule
    */
-  if (ret == -ENOENT)
-    ret = -EACCES;
-  if (ret < 0) {
-    set_req_state_err(s, ret);
+  if (op_ret == -ENOENT)
+    op_ret = -EACCES;
+  if (op_ret < 0) {
+    set_req_state_err(s, op_ret);
     dump_errno(s);
     end_header(s, NULL);
     return;
@@ -1110,7 +1110,7 @@ int RGWBulkDelete_ObjStore_SWIFT::get_data(list<RGWBulkDelete::acct_path_t>& ite
 
 void RGWBulkDelete_ObjStore_SWIFT::send_response()
 {
-  set_req_state_err(s, ret);
+  set_req_state_err(s, op_ret);
   dump_errno(s);
   end_header(s, NULL);
 
@@ -1231,7 +1231,7 @@ RGWOp *RGWHandler_ObjStore_Obj_SWIFT::op_put()
   if (is_acl_op()) {
     return new RGWPutACLs_ObjStore_SWIFT;
   }
-  if (s->src_bucket_name.empty())
+  if (s->init_state.src_bucket.empty())
     return new RGWPutObj_ObjStore_SWIFT;
   else
     return new RGWCopyObj_ObjStore_SWIFT;
@@ -1274,6 +1274,49 @@ int RGWHandler_ObjStore_SWIFT::authorize()
   return 0;
 }
 
+int RGWHandler_ObjStore_SWIFT::postauth_init()
+{
+  struct req_init_state *t = &s->init_state;
+
+  /* XXX Stub this until Swift Auth sets account into URL. */
+  s->bucket_tenant = s->user.user_id.tenant;
+  s->bucket_name = t->url_bucket;
+
+  dout(10) << "s->object=" << (!s->object.empty() ? s->object : rgw_obj_key("<NULL>"))
+           << " s->bucket=" << rgw_make_bucket_entry_name(s->bucket_tenant, s->bucket_name) << dendl;
+
+  int ret;
+  ret = validate_tenant_name(s->bucket_tenant);
+  if (ret)
+    return ret;
+  ret = validate_bucket_name(s->bucket_name);
+  if (ret)
+    return ret;
+  ret = validate_object_name(s->object.name);
+  if (ret)
+    return ret;
+
+  if (!t->src_bucket.empty()) {
+    /*
+     * We don't allow cross-tenant copy at present. It requires account
+     * names in the URL for Swift.
+     */
+    s->src_tenant_name = s->user.user_id.tenant;
+    s->src_bucket_name = t->src_bucket;
+
+    ret = validate_bucket_name(s->src_bucket_name);
+    if (ret < 0) {
+      return ret;
+    }
+    ret = validate_object_name(s->src_object.name);
+    if (ret < 0) {
+      return ret;
+    }
+  }
+
+  return 0;
+}
+
 int RGWHandler_ObjStore_SWIFT::validate_bucket_name(const string& bucket)
 {
   int ret = RGWHandler_ObjStore::validate_bucket_name(bucket);
@@ -1399,9 +1442,8 @@ int RGWHandler_ObjStore_SWIFT::init_from_header(struct req_state *s)
 
   s->info.effective_uri = "/" + first;
 
-  /* XXX Temporarily not parsing URL until Auth puts something in there. */
-  s->bucket_tenant = s->user.user_id.tenant;
-  s->bucket_name = first;
+  // Save bucket to tide us over until token is parsed.
+  s->init_state.url_bucket = first;
 
   if (req.size()) {
     s->object = rgw_obj_key(req, s->info.env->get("HTTP_X_OBJECT_VERSION_ID", "")); /* rgw swift extension */
@@ -1413,59 +1455,34 @@ int RGWHandler_ObjStore_SWIFT::init_from_header(struct req_state *s)
 
 int RGWHandler_ObjStore_SWIFT::init(RGWRados *store, struct req_state *s, RGWClientIO *cio)
 {
-  dout(10) << "s->object=" << (!s->object.empty() ? s->object : rgw_obj_key("<NULL>"))
-           << " s->bucket=" << rgw_make_bucket_entry_name(s->bucket_tenant, s->bucket_name) << dendl;
+  struct req_init_state *t = &s->init_state;
 
-  int ret;
-  ret = validate_tenant_name(s->bucket_tenant);
-  if (ret)
-    return ret;
-  ret = validate_bucket_name(s->bucket_name);
-  if (ret)
-    return ret;
-  ret = validate_object_name(s->object.name);
-  if (ret)
-    return ret;
+  s->dialect = "swift";
 
   const char *copy_source = s->info.env->get("HTTP_X_COPY_FROM");
   if (copy_source) {
-    bool result = RGWCopyObj::parse_copy_location(copy_source, s->src_bucket_name, s->src_object);
+    bool result = RGWCopyObj::parse_copy_location(copy_source, t->src_bucket, s->src_object);
     if (!result)
        return -ERR_BAD_URL;
-    s->src_tenant_name = s->user.user_id.tenant;
   }
 
-  s->dialect = "swift";
-
   if (s->op == OP_COPY) {
     const char *req_dest = s->info.env->get("HTTP_DESTINATION");
     if (!req_dest)
       return -ERR_BAD_URL;
 
-    string dest_tenant_name, dest_bucket_name;
+    string dest_bucket_name;
     rgw_obj_key dest_obj_key;
     bool result = RGWCopyObj::parse_copy_location(req_dest, dest_bucket_name, dest_obj_key);
     if (!result)
        return -ERR_BAD_URL;
-    dest_tenant_name = s->user.user_id.tenant;
 
     string dest_object = dest_obj_key.name;
-    if (dest_bucket_name != s->bucket_name) {
-      ret = validate_bucket_name(dest_bucket_name);
-      if (ret < 0)
-        return ret;
-    }
-
-    ret = validate_tenant_name(dest_tenant_name);
-    if (ret < 0)
-      return ret;
 
     /* convert COPY operation into PUT */
-    s->src_tenant_name = s->bucket_tenant;
-    s->src_bucket_name = s->bucket_name;
+    t->src_bucket = t->url_bucket;
     s->src_object = s->object;
-    s->bucket_tenant = dest_tenant_name;
-    s->bucket_name = dest_bucket_name;
+    t->url_bucket = dest_bucket_name;
     s->object = rgw_obj_key(dest_object);
     s->op = OP_PUT;
   }
@@ -1480,8 +1497,9 @@ RGWHandler *RGWRESTMgr_SWIFT::get_handler(struct req_state *s)
   if (ret < 0)
     return NULL;
 
-  if (s->bucket_name.empty())
+  if (s->init_state.url_bucket.empty())
     return new RGWHandler_ObjStore_Service_SWIFT;
+
   if (s->object.empty())
     return new RGWHandler_ObjStore_Bucket_SWIFT;
 
diff --git a/src/rgw/rgw_rest_swift.h b/src/rgw/rgw_rest_swift.h
index a64c996..1d483b3 100644
--- a/src/rgw/rgw_rest_swift.h
+++ b/src/rgw/rgw_rest_swift.h
@@ -192,8 +192,9 @@ public:
 
   int validate_bucket_name(const string& bucket);
 
-  int init(RGWRados *store, struct req_state *state, RGWClientIO *cio);
+  int init(RGWRados *store, struct req_state *s, RGWClientIO *cio);
   int authorize();
+  int postauth_init();
 
   RGWAccessControlPolicy *alloc_policy() { return NULL; /* return new RGWAccessControlPolicy_SWIFT; */ }
   void free_policy(RGWAccessControlPolicy *policy) { delete policy; }
diff --git a/src/rgw/rgw_rest_user.cc b/src/rgw/rgw_rest_user.cc
index 6086e76..46132b0 100644
--- a/src/rgw/rgw_rest_user.cc
+++ b/src/rgw/rgw_rest_user.cc
@@ -646,7 +646,7 @@ struct UserQuotas {
 
   UserQuotas() {}
 
-  UserQuotas(RGWUserInfo& info) : bucket_quota(info.bucket_quota), 
+  explicit UserQuotas(RGWUserInfo& info) : bucket_quota(info.bucket_quota), 
 				  user_quota(info.user_quota) {}
 
   void dump(Formatter *f) const {
diff --git a/src/rgw/rgw_swift.cc b/src/rgw/rgw_swift.cc
index b51d37f..76eda3c 100644
--- a/src/rgw/rgw_swift.cc
+++ b/src/rgw/rgw_swift.cc
@@ -229,7 +229,8 @@ static int decode_b64_cms(CephContext *cct, const string& signed_b64, bufferlist
   return 0;
 }
 
-int	RGWSwift::get_keystone_url(std::string& url)
+int RGWSwift::get_keystone_url(CephContext * const cct,
+                               std::string& url)
 {
   bufferlist bl;
   RGWGetRevokedTokens req(cct, &bl);
@@ -244,11 +245,22 @@ int	RGWSwift::get_keystone_url(std::string& url)
   return 0;
 }
 
-int	RGWSwift::get_keystone_admin_token(std::string& token)
+int RGWSwift::get_keystone_url(std::string& url)
+{
+  return RGWSwift::get_keystone_url(cct, url);
+}
+
+int RGWSwift::get_keystone_admin_token(std::string& token)
+{
+  return RGWSwift::get_keystone_admin_token(cct, token);
+}
+
+int RGWSwift::get_keystone_admin_token(CephContext * const cct,
+                                       std::string& token)
 {
   std::string token_url;
 
-  if (get_keystone_url(token_url) < 0)
+  if (get_keystone_url(cct, token_url) < 0)
     return -EINVAL;
   if (cct->_conf->rgw_keystone_admin_token.empty()) {
     token_url.append("v2.0/tokens");
diff --git a/src/rgw/rgw_swift.h b/src/rgw/rgw_swift.h
index 63596e0..ff449e9 100644
--- a/src/rgw/rgw_swift.h
+++ b/src/rgw/rgw_swift.h
@@ -61,7 +61,7 @@ protected:
   int check_revoked();
 public:
 
-  RGWSwift(CephContext *_cct) : cct(_cct), keystone_revoke_thread(NULL) {
+  explicit RGWSwift(CephContext *_cct) : cct(_cct), keystone_revoke_thread(NULL) {
     init();
   }
   ~RGWSwift() {
@@ -70,6 +70,10 @@ public:
 
   bool verify_swift_token(RGWRados *store, req_state *s);
   bool going_down();
+
+  /* Static methods shared between Swift API and S3. */
+  static int get_keystone_url(CephContext *cct, std::string& url);
+  static int get_keystone_admin_token(CephContext *cct, std::string& token);
 };
 
 extern RGWSwift *rgw_swift;
diff --git a/src/rgw/rgw_swift_auth.h b/src/rgw/rgw_swift_auth.h
index 2fe5d34..6b19d04 100644
--- a/src/rgw/rgw_swift_auth.h
+++ b/src/rgw/rgw_swift_auth.h
@@ -29,6 +29,7 @@ public:
 
   int init(RGWRados *store, struct req_state *state, RGWClientIO *cio);
   int authorize();
+  int postauth_init() { return 0; }
   int read_permissions(RGWOp *op) { return 0; }
 
   virtual RGWAccessControlPolicy *alloc_policy() { return NULL; }
diff --git a/src/rgw/rgw_user.cc b/src/rgw/rgw_user.cc
index 81b96bb..fb313d0 100644
--- a/src/rgw/rgw_user.cc
+++ b/src/rgw/rgw_user.cc
@@ -1428,14 +1428,17 @@ int RGWSubUserPool::execute_remove(RGWUserAdminOpState& op_state,
 
   map<std::string, RGWSubUser>::iterator siter;
   siter = subuser_map->find(subuser_str);
-
+  if (siter == subuser_map->end()){
+    set_err_msg(err_msg, "subuser not found: " + subuser_str);
+    return -EINVAL;
+  }
   if (!op_state.has_existing_subuser()) {
     set_err_msg(err_msg, "subuser not found: " + subuser_str);
     return -EINVAL;
   }
 
   // always purge all associate keys
-  user->keys.remove_subuser_keys(op_state, &subprocess_msg, defer_user_update);
+  user->keys.remove_subuser_keys(op_state, &subprocess_msg, true);
 
   // remove the subuser from the user info
   subuser_map->erase(siter);
diff --git a/src/rgw/rgw_user.h b/src/rgw/rgw_user.h
index 57d40fa..bff4a52 100644
--- a/src/rgw/rgw_user.h
+++ b/src/rgw/rgw_user.h
@@ -526,7 +526,7 @@ private:
   int add(RGWUserAdminOpState& op_state, std::string *err_msg, bool defer_save);
   int remove(RGWUserAdminOpState& op_state, std::string *err_msg, bool defer_save);
 public:
-  RGWAccessKeyPool(RGWUser* usr);
+  explicit RGWAccessKeyPool(RGWUser* usr);
   ~RGWAccessKeyPool();
 
   int init(RGWUserAdminOpState& op_state);
@@ -561,7 +561,7 @@ private:
   int remove(RGWUserAdminOpState& op_state, std::string *err_msg, bool defer_save);
   int modify(RGWUserAdminOpState& op_state, std::string *err_msg, bool defer_save);
 public:
-  RGWSubUserPool(RGWUser *user);
+  explicit RGWSubUserPool(RGWUser *user);
   ~RGWSubUserPool();
 
   bool exists(std::string subuser);
@@ -586,7 +586,7 @@ private:
   int remove(RGWUserAdminOpState& op_state, std::string *err_msg, bool defer_save);
 
 public:
-  RGWUserCapPool(RGWUser *user);
+  explicit RGWUserCapPool(RGWUser *user);
   ~RGWUserCapPool();
 
   int init(RGWUserAdminOpState& op_state);
diff --git a/src/rgw/rgw_website.cc b/src/rgw/rgw_website.cc
new file mode 100644
index 0000000..a69ffe1
--- /dev/null
+++ b/src/rgw/rgw_website.cc
@@ -0,0 +1,119 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- 
+// vim: ts=8 sw=2 smarttab
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2015 Yehuda Sadeh <yehuda at redhat.com>
+ * Copyright (C) 2015 Robin H. Johnson <robin.johnson at dreamhost.com>
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software 
+ * Foundation.  See file COPYING.
+ * 
+ */
+#include "common/debug.h"
+#include "common/ceph_json.h"
+
+#include "acconfig.h"
+
+#include <errno.h>
+#include <string>
+#include <list>
+#include "include/types.h"
+#include "rgw_website.h"
+
+using namespace std;
+
+
+bool RGWBWRoutingRuleCondition::check_key_condition(const string& key) {
+  return (key.size() >= key_prefix_equals.size() &&
+          key.compare(0, key_prefix_equals.size(), key_prefix_equals) == 0);
+}
+
+
+void RGWBWRoutingRule::apply_rule(const string& default_protocol, const string& default_hostname,
+                                           const string& key, string *new_url, int *redirect_code)
+{
+  RGWRedirectInfo& redirect = redirect_info.redirect;
+
+  string protocol = (!redirect.protocol.empty() ? redirect.protocol : default_protocol);
+  string hostname = (!redirect.hostname.empty() ? redirect.hostname : default_hostname);
+
+  *new_url = protocol + "://" + hostname + "/";
+
+  if (!redirect_info.replace_key_prefix_with.empty()) {
+    *new_url += redirect_info.replace_key_prefix_with;
+    *new_url += key.substr(condition.key_prefix_equals.size());
+  } else if (!redirect_info.replace_key_with.empty()) {
+    *new_url += redirect_info.replace_key_with;
+  } else {
+    *new_url += key;
+  }
+
+  if(redirect.http_redirect_code > 0) 
+	  *redirect_code = redirect.http_redirect_code;
+}
+
+bool RGWBWRoutingRules::check_key_and_error_code_condition(const string &key, int error_code, RGWBWRoutingRule **rule)
+{
+  for (list<RGWBWRoutingRule>::iterator iter = rules.begin(); iter != rules.end(); ++iter) {
+    if (iter->check_key_condition(key) && iter->check_error_code_condition(error_code)) {
+      *rule = &(*iter);
+      return true;
+    }
+  }
+  return false;
+}
+
+bool RGWBWRoutingRules::check_key_condition(const string& key, RGWBWRoutingRule **rule)
+{
+  for (list<RGWBWRoutingRule>::iterator iter = rules.begin(); iter != rules.end(); ++iter) {
+    if (iter->check_key_condition(key)) {
+      *rule = &(*iter);
+      return true;
+    }
+  }
+  return false;
+}
+
+bool RGWBWRoutingRules::check_error_code_condition(const int http_error_code, RGWBWRoutingRule **rule)
+{
+  for (list<RGWBWRoutingRule>::iterator iter = rules.begin(); iter != rules.end(); ++iter) {
+    if (iter->check_error_code_condition(http_error_code)) {
+      *rule = &(*iter);
+      return true;
+    }
+  }
+  return false;
+}
+
+bool RGWBucketWebsiteConf::should_redirect(const string& key, const int http_error_code, RGWBWRoutingRule *redirect)
+{
+  RGWBWRoutingRule *rule;
+  if(!redirect_all.hostname.empty()) {
+	RGWBWRoutingRule redirect_all_rule;
+	redirect_all_rule.redirect_info.redirect = redirect_all;
+	redirect_all.http_redirect_code = 301;
+	*redirect = redirect_all_rule;
+	return true;
+  } else if (!routing_rules.check_key_and_error_code_condition(key, http_error_code, &rule)) {
+    return false;
+  }
+
+  *redirect = *rule;
+
+  return true;
+}
+
+void RGWBucketWebsiteConf::get_effective_key(const string& key, string *effective_key)
+{
+
+  if (key.empty()) {
+    *effective_key = index_doc_suffix;
+  } else if (key[key.size() - 1] == '/') {
+    *effective_key = key + index_doc_suffix;
+  } else {
+    *effective_key = key;
+  }
+}
diff --git a/src/rgw/rgw_website.h b/src/rgw/rgw_website.h
new file mode 100644
index 0000000..6c1a92b
--- /dev/null
+++ b/src/rgw/rgw_website.h
@@ -0,0 +1,200 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- 
+// vim: ts=8 sw=2 smarttab
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2015 Yehuda Sadeh <yehuda at redhat.com>
+ * Copyright (C) 2015 Robin H. Johnson <robin.johnson at dreamhost.com>
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software 
+ * Foundation.  See file COPYING.
+ * 
+ */
+#ifndef RGW_WEBSITE_H
+#define RGW_WEBSITE_H
+
+#include "rgw_xml.h"
+
+struct RGWRedirectInfo
+{
+  string protocol;
+  string hostname;
+  uint16_t http_redirect_code;
+
+  void encode(bufferlist& bl) const {
+    ENCODE_START(1, 1, bl);
+    ::encode(protocol, bl);
+    ::encode(hostname, bl);
+    ::encode(http_redirect_code, bl);
+    ENCODE_FINISH(bl);
+  }
+  void decode(bufferlist::iterator& bl) {
+    DECODE_START(1, bl);
+    ::decode(protocol, bl);
+    ::decode(hostname, bl);
+    ::decode(http_redirect_code, bl);
+    DECODE_FINISH(bl);
+  }
+
+  void dump(Formatter *f) const;
+  void decode_json(JSONObj *obj);
+};
+WRITE_CLASS_ENCODER(RGWRedirectInfo)
+
+
+struct RGWBWRedirectInfo
+{
+  RGWRedirectInfo redirect;
+  string replace_key_prefix_with;
+  string replace_key_with;
+
+  void encode(bufferlist& bl) const {
+    ENCODE_START(1, 1, bl);
+    ::encode(redirect, bl);
+    ::encode(replace_key_prefix_with, bl);
+    ::encode(replace_key_with, bl);
+    ENCODE_FINISH(bl);
+  }
+  void decode(bufferlist::iterator& bl) {
+    DECODE_START(1, bl);
+    ::decode(redirect, bl);
+    ::decode(replace_key_prefix_with, bl);
+    ::decode(replace_key_with, bl);
+    DECODE_FINISH(bl);
+  }
+
+  void dump(Formatter *f) const;
+  void dump_xml(Formatter *f) const;
+  void decode_json(JSONObj *obj);
+  void decode_xml(XMLObj *obj);
+};
+WRITE_CLASS_ENCODER(RGWBWRedirectInfo)
+
+struct RGWBWRoutingRuleCondition
+{
+  string key_prefix_equals;
+  uint16_t http_error_code_returned_equals;
+
+  void encode(bufferlist& bl) const {
+    ENCODE_START(1, 1, bl);
+    ::encode(key_prefix_equals, bl);
+    ::encode(http_error_code_returned_equals, bl);
+    ENCODE_FINISH(bl);
+  }
+  void decode(bufferlist::iterator& bl) {
+    DECODE_START(1, bl);
+    ::decode(key_prefix_equals, bl);
+    ::decode(http_error_code_returned_equals, bl);
+    DECODE_FINISH(bl);
+  }
+
+  void dump(Formatter *f) const;
+  void dump_xml(Formatter *f) const;
+  void decode_json(JSONObj *obj);
+  void decode_xml(XMLObj *obj);
+
+  bool check_key_condition(const string& key);
+  bool check_error_code_condition(const int error_code) {
+    return (uint16_t)error_code == http_error_code_returned_equals;
+  }
+};
+WRITE_CLASS_ENCODER(RGWBWRoutingRuleCondition)
+
+struct RGWBWRoutingRule
+{
+  RGWBWRoutingRuleCondition condition;
+  RGWBWRedirectInfo redirect_info;
+
+  void encode(bufferlist& bl) const {
+    ENCODE_START(1, 1, bl);
+    ::encode(condition, bl);
+    ::encode(redirect_info, bl);
+    ENCODE_FINISH(bl);
+  }
+  void decode(bufferlist::iterator& bl) {
+    DECODE_START(1, bl);
+    ::decode(condition, bl);
+    ::decode(redirect_info, bl);
+    DECODE_FINISH(bl);
+  }
+
+  void dump(Formatter *f) const;
+  void dump_xml(Formatter *f) const;
+  void decode_json(JSONObj *obj);
+  void decode_xml(XMLObj *obj);
+
+  bool check_key_condition(const string& key) {
+    return condition.check_key_condition(key);
+  }
+  bool check_error_code_condition(int error_code) {
+    return condition.check_error_code_condition(error_code);
+  }
+
+  void apply_rule(const string& default_protocol, const string& default_hostname, const string& key, string *redirect, int *redirect_code);
+};
+WRITE_CLASS_ENCODER(RGWBWRoutingRule)
+
+struct RGWBWRoutingRules
+{
+  list<RGWBWRoutingRule> rules;
+
+  void encode(bufferlist& bl) const {
+    ENCODE_START(1, 1, bl);
+    ::encode(rules, bl);
+    ENCODE_FINISH(bl);
+  }
+  void decode(bufferlist::iterator& bl) {
+    DECODE_START(1, bl);
+    ::decode(rules, bl);
+    DECODE_FINISH(bl);
+  }
+
+  void dump(Formatter *f) const;
+  void dump_xml(Formatter *f) const;
+  void decode_json(JSONObj *obj);
+
+  bool check_key_condition(const string& key, RGWBWRoutingRule **rule);
+  bool check_error_code_condition(int error_code, RGWBWRoutingRule **rule);
+  bool check_key_and_error_code_condition(const string& key, const int error_code, RGWBWRoutingRule **rule);
+};
+WRITE_CLASS_ENCODER(RGWBWRoutingRules)
+
+struct RGWBucketWebsiteConf
+{
+  RGWRedirectInfo redirect_all;
+  string index_doc_suffix;
+  string error_doc;
+  RGWBWRoutingRules routing_rules;
+
+  RGWBucketWebsiteConf() {}
+
+  void encode(bufferlist& bl) const {
+    ENCODE_START(1, 1, bl);
+    ::encode(index_doc_suffix, bl);
+    ::encode(error_doc, bl);
+    ::encode(routing_rules, bl);
+    ::encode(redirect_all, bl);
+    ENCODE_FINISH(bl);
+  }
+  void decode(bufferlist::iterator& bl) {
+    DECODE_START(1, bl);
+    ::decode(index_doc_suffix, bl);
+    ::decode(error_doc, bl);
+    ::decode(routing_rules, bl);
+    ::decode(redirect_all, bl);
+    DECODE_FINISH(bl);
+  }
+
+  void dump(Formatter *f) const;
+  void decode_json(JSONObj *obj);
+  void decode_xml(XMLObj *obj);
+  void dump_xml(Formatter *f) const;
+
+  bool should_redirect(const string& key, const int http_error_code, RGWBWRoutingRule *redirect);
+  void get_effective_key(const string& key, string *effective_key);
+};
+WRITE_CLASS_ENCODER(RGWBucketWebsiteConf)
+
+#endif
diff --git a/src/rgw/rgw_xml.cc b/src/rgw/rgw_xml.cc
index 3e38f80..df555f8 100644
--- a/src/rgw/rgw_xml.cc
+++ b/src/rgw/rgw_xml.cc
@@ -6,6 +6,8 @@
 #include <iostream>
 #include <map>
 
+#include <expat.h>
+
 #include "include/types.h"
 
 #include "rgw_common.h"
@@ -148,17 +150,21 @@ RGWXMLParser::
   XML_ParserFree(p);
 
   free(buf);
-  vector<XMLObj *>::iterator iter;
-  for (iter = objs.begin(); iter != objs.end(); ++iter) {
+  list<XMLObj *>::iterator iter;
+  for (iter = allocated_objs.begin(); iter != allocated_objs.end(); ++iter) {
     XMLObj *obj = *iter;
     delete obj;
   }
 }
 
+
 bool RGWXMLParser::xml_start(const char *el, const char **attr) {
   XMLObj * obj = alloc_obj(el);
   if (!obj) {
-    obj = new XMLObj();
+    unallocated_objs.push_back(XMLObj());
+    obj = &unallocated_objs.back();
+  } else {
+    allocated_objs.push_back(obj);
   }
   if (!obj->xml_start(cur_obj, el, attr))
     return false;
@@ -238,3 +244,254 @@ bool RGWXMLParser::parse(const char *_buf, int len, int done)
 
   return success;
 }
+
+void decode_xml_obj(unsigned long& val, XMLObj *obj)
+{
+  string& s = obj->get_data();
+  const char *start = s.c_str();
+  char *p;
+
+  errno = 0;
+  val = strtoul(start, &p, 10);
+
+  /* Check for various possible errors */
+
+ if ((errno == ERANGE && val == ULONG_MAX) ||
+     (errno != 0 && val == 0)) {
+   throw RGWXMLDecoder::err("failed to number");
+ }
+
+ if (p == start) {
+   throw RGWXMLDecoder::err("failed to parse number");
+ }
+
+ while (*p != '\0') {
+   if (!isspace(*p)) {
+     throw RGWXMLDecoder::err("failed to parse number");
+   }
+   p++;
+ }
+}
+
+
+void decode_xml_obj(long& val, XMLObj *obj)
+{
+  string s = obj->get_data();
+  const char *start = s.c_str();
+  char *p;
+
+  errno = 0;
+  val = strtol(start, &p, 10);
+
+  /* Check for various possible errors */
+
+ if ((errno == ERANGE && (val == LONG_MAX || val == LONG_MIN)) ||
+     (errno != 0 && val == 0)) {
+   throw RGWXMLDecoder::err("failed to parse number");
+ }
+
+ if (p == start) {
+   throw RGWXMLDecoder::err("failed to parse number");
+ }
+
+ while (*p != '\0') {
+   if (!isspace(*p)) {
+     throw RGWXMLDecoder::err("failed to parse number");
+   }
+   p++;
+ }
+}
+
+void decode_xml_obj(long long& val, XMLObj *obj)
+{
+  string s = obj->get_data();
+  const char *start = s.c_str();
+  char *p;
+
+  errno = 0;
+  val = strtoll(start, &p, 10);
+
+  /* Check for various possible errors */
+
+ if ((errno == ERANGE && (val == LLONG_MAX || val == LLONG_MIN)) ||
+     (errno != 0 && val == 0)) {
+   throw RGWXMLDecoder::err("failed to parse number");
+ }
+
+ if (p == start) {
+   throw RGWXMLDecoder::err("failed to parse number");
+ }
+
+ while (*p != '\0') {
+   if (!isspace(*p)) {
+     throw RGWXMLDecoder::err("failed to parse number");
+   }
+   p++;
+ }
+}
+
+void decode_xml_obj(unsigned long long& val, XMLObj *obj)
+{
+  string s = obj->get_data();
+  const char *start = s.c_str();
+  char *p;
+
+  errno = 0;
+  val = strtoull(start, &p, 10);
+
+  /* Check for various possible errors */
+
+ if ((errno == ERANGE && val == ULLONG_MAX) ||
+     (errno != 0 && val == 0)) {
+   throw RGWXMLDecoder::err("failed to number");
+ }
+
+ if (p == start) {
+   throw RGWXMLDecoder::err("failed to parse number");
+ }
+
+ while (*p != '\0') {
+   if (!isspace(*p)) {
+     throw RGWXMLDecoder::err("failed to parse number");
+   }
+   p++;
+ }
+}
+
+void decode_xml_obj(int& val, XMLObj *obj)
+{
+  long l;
+  decode_xml_obj(l, obj);
+#if LONG_MAX > INT_MAX
+  if (l > INT_MAX || l < INT_MIN) {
+    throw RGWXMLDecoder::err("integer out of range");
+  }
+#endif
+
+  val = (int)l;
+}
+
+void decode_xml_obj(unsigned& val, XMLObj *obj)
+{
+  unsigned long l;
+  decode_xml_obj(l, obj);
+#if ULONG_MAX > UINT_MAX
+  if (l > UINT_MAX) {
+    throw RGWXMLDecoder::err("unsigned integer out of range");
+  }
+#endif
+
+  val = (unsigned)l;
+}
+
+void decode_xml_obj(bool& val, XMLObj *obj)
+{
+  string s = obj->get_data();
+  if (strcasecmp(s.c_str(), "true") == 0) {
+    val = true;
+    return;
+  }
+  if (strcasecmp(s.c_str(), "false") == 0) {
+    val = false;
+    return;
+  }
+  int i;
+  decode_xml_obj(i, obj);
+  val = (bool)i;
+}
+
+void decode_xml_obj(bufferlist& val, XMLObj *obj)
+{
+  string s = obj->get_data();
+
+  bufferlist bl;
+  bl.append(s.c_str(), s.size());
+  try {
+    val.decode_base64(bl);
+  } catch (buffer::error& err) {
+   throw RGWXMLDecoder::err("failed to decode base64");
+  }
+}
+
+void decode_xml_obj(utime_t& val, XMLObj *obj)
+{
+  string s = obj->get_data();
+  uint64_t epoch;
+  uint64_t nsec;
+  int r = utime_t::parse_date(s, &epoch, &nsec);
+  if (r == 0) {
+    val = utime_t(epoch, nsec);
+  } else {
+    throw RGWXMLDecoder::err("failed to decode utime_t");
+  }
+}
+
+void encode_xml(const char *name, const string& val, Formatter *f)
+{
+  f->dump_string(name, val);
+}
+
+void encode_xml(const char *name, const char *val, Formatter *f)
+{
+  f->dump_string(name, val);
+}
+
+void encode_xml(const char *name, bool val, Formatter *f)
+{
+  string s;
+  if (val)
+    s = "True";
+  else
+    s = "False";
+
+  f->dump_string(name, s);
+}
+
+void encode_xml(const char *name, int val, Formatter *f)
+{
+  f->dump_int(name, val);
+}
+
+void encode_xml(const char *name, long val, Formatter *f)
+{
+  f->dump_int(name, val);
+}
+
+void encode_xml(const char *name, unsigned val, Formatter *f)
+{
+  f->dump_unsigned(name, val);
+}
+
+void encode_xml(const char *name, unsigned long val, Formatter *f)
+{
+  f->dump_unsigned(name, val);
+}
+
+void encode_xml(const char *name, unsigned long long val, Formatter *f)
+{
+  f->dump_unsigned(name, val);
+}
+
+void encode_xml(const char *name, long long val, Formatter *f)
+{
+  f->dump_int(name, val);
+}
+
+void encode_xml(const char *name, const utime_t& val, Formatter *f)
+{
+  val.gmtime(f->dump_stream(name));
+}
+
+void encode_xml(const char *name, const bufferlist& bl, Formatter *f)
+{
+  /* need to copy data from bl, as it is const bufferlist */
+  bufferlist src = bl;
+
+  bufferlist b64;
+  src.encode_base64(b64);
+
+  string s(b64.c_str(), b64.length());
+
+  encode_xml(name, s, f);
+}
+
diff --git a/src/rgw/rgw_xml.h b/src/rgw/rgw_xml.h
index c4722ab..257a156 100644
--- a/src/rgw/rgw_xml.h
+++ b/src/rgw/rgw_xml.h
@@ -8,8 +8,7 @@
 #include <string>
 #include <iosfwd>
 #include <include/types.h>
-
-#include <expat.h>
+#include <common/Formatter.h>
 
 using namespace std;
 
@@ -60,15 +59,20 @@ public:
   friend ostream& operator<<(ostream& out, XMLObj& obj);
 };
 
+struct XML_ParserStruct;
 class RGWXMLParser : public XMLObj
 {
-  XML_Parser p;
+  XML_ParserStruct *p;
   char *buf;
   int buf_len;
   XMLObj *cur_obj;
   vector<XMLObj *> objs;
+  list<XMLObj *> allocated_objs;
+  list<XMLObj> unallocated_objs;
 protected:
-  virtual XMLObj *alloc_obj(const char *el) = 0;
+  virtual XMLObj *alloc_obj(const char *el) {
+    return NULL;
+  }
 public:
   RGWXMLParser();
   virtual ~RGWXMLParser();
@@ -85,4 +89,191 @@ private:
   bool success;
 };
 
+class RGWXMLDecoder {
+public:
+  struct err {
+    string message;
+
+    explicit err(const string& m) : message(m) {}
+  };
+
+  class XMLParser : public RGWXMLParser {
+  public:
+    XMLParser() {}
+    virtual ~XMLParser() {}
+  } parser;
+
+  explicit RGWXMLDecoder(bufferlist& bl) {
+    if (!parser.parse(bl.c_str(), bl.length(), 1)) {
+      cout << "RGWXMLDecoder::err()" << std::endl;
+      throw RGWXMLDecoder::err("failed to parse XML input");
+    }
+  }
+
+  template<class T>
+  static bool decode_xml(const char *name, T& val, XMLObj *obj, bool mandatory = false);
+
+  template<class C>
+  static bool decode_xml(const char *name, C& container, void (*cb)(C&, XMLObj *obj), XMLObj *obj, bool mandatory = false);
+
+  template<class T>
+  static void decode_xml(const char *name, T& val, T& default_val, XMLObj *obj);
+};
+
+template<class T>
+void decode_xml_obj(T& val, XMLObj *obj)
+{
+  val.decode_xml(obj);
+}
+
+static inline void decode_xml_obj(string& val, XMLObj *obj)
+{
+  val = obj->get_data();
+}
+
+void decode_xml_obj(unsigned long long& val, XMLObj *obj);
+void decode_xml_obj(long long& val, XMLObj *obj);
+void decode_xml_obj(unsigned long& val, XMLObj *obj);
+void decode_xml_obj(long& val, XMLObj *obj);
+void decode_xml_obj(unsigned& val, XMLObj *obj);
+void decode_xml_obj(int& val, XMLObj *obj);
+void decode_xml_obj(bool& val, XMLObj *obj);
+void decode_xml_obj(bufferlist& val, XMLObj *obj);
+class utime_t;
+void decode_xml_obj(utime_t& val, XMLObj *obj);
+
+template<class T>
+void do_decode_xml_obj(list<T>& l, const string& name, XMLObj *obj)
+{
+  l.clear();
+
+  XMLObjIter iter = obj->find(name);
+  XMLObj *o;
+
+  while ((o = iter.get_next())) {
+    T val;
+    decode_xml_obj(val, o);
+    l.push_back(val);
+  }
+}
+
+template<class T>
+void do_decode_xml_obj(vector<T>& l, const string& name, XMLObj *obj)
+{
+  l.clear();
+
+  XMLObjIter iter = obj->find(name);
+  XMLObj *o;
+
+  while (o = iter.get_next()) {
+    T val;
+    decode_xml_obj(val, o);
+    l.push_back(val);
+  }
+}
+
+template<class T>
+bool RGWXMLDecoder::decode_xml(const char *name, T& val, XMLObj *obj, bool mandatory)
+{
+  XMLObjIter iter = obj->find(name);
+  XMLObj *o = iter.get_next();
+  if (!o) {
+    if (mandatory) {
+      string s = "missing mandatory field " + string(name);
+      throw err(s);
+    }
+    val = T();
+    return false;
+  }
+
+  try {
+    decode_xml_obj(val, o);
+  } catch (err& e) {
+    string s = string(name) + ": ";
+    s.append(e.message);
+    throw err(s);
+  }
+
+  return true;
+}
+
+template<class C>
+bool RGWXMLDecoder::decode_xml(const char *name, C& container, void (*cb)(C&, XMLObj *), XMLObj *obj, bool mandatory)
+{
+  container.clear();
+
+  XMLObjIter iter = obj->find(name);
+  XMLObj *o = iter.get_next();
+  if (!o) {
+    if (mandatory) {
+      string s = "missing mandatory field " + string(name);
+      throw err(s);
+    }
+    return false;
+  }
+
+  try {
+    decode_xml_obj(container, cb, o);
+  } catch (err& e) {
+    string s = string(name) + ": ";
+    s.append(e.message);
+    throw err(s);
+  }
+
+  return true;
+}
+
+template<class T>
+void RGWXMLDecoder::decode_xml(const char *name, T& val, T& default_val, XMLObj *obj)
+{
+  XMLObjIter iter = obj->find(name);
+  XMLObj *o = iter.get_next();
+  if (!o) {
+    val = default_val;
+    return;
+  }
+
+  try {
+    decode_xml_obj(val, o);
+  } catch (err& e) {
+    val = default_val;
+    string s = string(name) + ": ";
+    s.append(e.message);
+    throw err(s);
+  }
+}
+
+template<class T>
+static void encode_xml(const char *name, const T& val, ceph::Formatter *f)
+{
+  f->open_object_section(name);
+  val.dump_xml(f);
+  f->close_section();
+}
+
+void encode_xml(const char *name, const string& val, ceph::Formatter *f);
+void encode_xml(const char *name, const char *val, ceph::Formatter *f);
+void encode_xml(const char *name, bool val, ceph::Formatter *f);
+void encode_xml(const char *name, int val, ceph::Formatter *f);
+void encode_xml(const char *name, unsigned val, ceph::Formatter *f);
+void encode_xml(const char *name, long val, ceph::Formatter *f);
+void encode_xml(const char *name, unsigned long val, ceph::Formatter *f);
+void encode_xml(const char *name, long long val, ceph::Formatter *f);
+void encode_xml(const char *name, const utime_t& val, ceph::Formatter *f);
+void encode_xml(const char *name, const bufferlist& bl, ceph::Formatter *f);
+void encode_xml(const char *name, long long val, ceph::Formatter *f);
+void encode_xml(const char *name, long long unsigned val, ceph::Formatter *f);
+
+template<class T>
+static void do_encode_xml(const char *name, const std::list<T>& l, const char *entry_name, ceph::Formatter *f)
+{
+  f->open_array_section(name);
+  for (typename std::list<T>::const_iterator iter = l.begin(); iter != l.end(); ++iter) {
+    encode_xml(entry_name, *iter, f);
+  }
+  f->close_section();
+}
+
+
+
 #endif
diff --git a/src/rgw/rgw_xml_enc.cc b/src/rgw/rgw_xml_enc.cc
new file mode 100644
index 0000000..ff64efc
--- /dev/null
+++ b/src/rgw/rgw_xml_enc.cc
@@ -0,0 +1,131 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2015 Yehuda Sadeh <yehuda at redhat.com>
+ * Copyright (C) 2015 Robin H. Johnson <robin.johnson at dreamhost.com>
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software 
+ * Foundation.  See file COPYING.
+ * 
+ */
+#include "rgw_common.h"
+#include "rgw_xml.h"
+
+#include "common/Formatter.h"
+
+#define dout_subsys ceph_subsys_rgw
+
+void RGWBWRedirectInfo::dump_xml(Formatter *f) const
+{
+  if (!redirect.protocol.empty()) {
+    encode_xml("Protocol", redirect.protocol, f);
+  }
+  if (!redirect.hostname.empty()) {
+    encode_xml("HostName", redirect.hostname, f);
+  }
+  if (redirect.http_redirect_code > 0) {
+    encode_xml("HttpRedirectCode", (int)redirect.http_redirect_code, f);
+  }
+  if (!replace_key_prefix_with.empty()) {
+    encode_xml("ReplaceKeyPrefixWith", replace_key_prefix_with, f);
+  }
+  if (!replace_key_with.empty()) {
+    encode_xml("ReplaceKeyWith", replace_key_with, f);
+  }
+}
+
+void RGWBWRedirectInfo::decode_xml(XMLObj *obj) {
+  RGWXMLDecoder::decode_xml("Protocol", redirect.protocol, obj);
+  RGWXMLDecoder::decode_xml("HostName", redirect.hostname, obj);
+  int code = 0;
+  RGWXMLDecoder::decode_xml("HttpRedirectCode", code, obj);
+  redirect.http_redirect_code = code;
+  RGWXMLDecoder::decode_xml("ReplaceKeyPrefixWith", replace_key_prefix_with, obj);
+  RGWXMLDecoder::decode_xml("ReplaceKeyWith", replace_key_with, obj);
+}
+
+void RGWBWRoutingRuleCondition::dump_xml(Formatter *f) const
+{
+  if (!key_prefix_equals.empty()) {
+    encode_xml("KeyPrefixEquals", key_prefix_equals, f);
+  }
+  if (http_error_code_returned_equals > 0) {
+    encode_xml("HttpErrorCodeReturnedEquals", (int)http_error_code_returned_equals, f);
+  }
+}
+
+void RGWBWRoutingRuleCondition::decode_xml(XMLObj *obj) {
+  RGWXMLDecoder::decode_xml("KeyPrefixEquals", key_prefix_equals, obj);
+  int code = 0;
+  RGWXMLDecoder::decode_xml("HttpErrorCodeReturnedEquals", code, obj);
+  http_error_code_returned_equals = code;
+}
+
+void RGWBWRoutingRule::dump_xml(Formatter *f) const
+{
+  encode_xml("Condition", condition, f);
+  encode_xml("Redirect", redirect_info, f);
+}
+
+void RGWBWRoutingRule::decode_xml(XMLObj *obj) {
+  RGWXMLDecoder::decode_xml("Condition", condition, obj);
+  RGWXMLDecoder::decode_xml("Redirect", redirect_info, obj);
+}
+
+static void encode_xml(const char *name, const std::list<RGWBWRoutingRule>& l, ceph::Formatter *f)
+{
+  do_encode_xml("RoutingRules", l, "RoutingRule", f);
+}
+
+void RGWBucketWebsiteConf::dump_xml(Formatter *f) const
+{
+  if (!redirect_all.hostname.empty()) {
+    f->open_object_section("RedirectAllRequestsTo");
+    encode_xml("HostName", redirect_all.hostname, f);
+    if (!redirect_all.protocol.empty()) {
+      encode_xml("Protocol", redirect_all.protocol, f);
+    }
+    f->close_section();
+  }
+  if (!index_doc_suffix.empty()) {
+    f->open_object_section("IndexDocument");
+    encode_xml("Suffix", index_doc_suffix, f);
+    f->close_section();
+  }
+  if (!error_doc.empty()) {
+    f->open_object_section("ErrorDocument");
+    encode_xml("Key", error_doc, f);
+    f->close_section();
+  }
+  if (!routing_rules.rules.empty()) {
+    encode_xml("RoutingRules", routing_rules.rules, f);
+  }
+}
+
+void decode_xml_obj(list<RGWBWRoutingRule>& l, XMLObj *obj)
+{
+  do_decode_xml_obj(l, "RoutingRule", obj);
+}
+
+void RGWBucketWebsiteConf::decode_xml(XMLObj *obj) {
+  XMLObj *o = obj->find_first("RedirectAllRequestsTo");
+  if (o) {
+    RGWXMLDecoder::decode_xml("HostName", redirect_all.hostname, o, true);
+    RGWXMLDecoder::decode_xml("Protocol", redirect_all.protocol, o);
+  } else {
+    o = obj->find_first("IndexDocument");
+    if (o) {
+      RGWXMLDecoder::decode_xml("Suffix", index_doc_suffix, o);
+    }
+    o = obj->find_first("ErrorDocument");
+    if (o) {
+      RGWXMLDecoder::decode_xml("Key", error_doc, o);
+    }
+    RGWXMLDecoder::decode_xml("RoutingRules", routing_rules.rules, obj);
+  }
+}
+
diff --git a/src/spdk/CONFIG b/src/spdk/CONFIG
new file mode 100644
index 0000000..ddce2cf
--- /dev/null
+++ b/src/spdk/CONFIG
@@ -0,0 +1,51 @@
+#
+#  BSD LICENSE
+#
+#  Copyright (c) Intel Corporation.
+#  All rights reserved.
+#
+#  Redistribution and use in source and binary forms, with or without
+#  modification, are permitted provided that the following conditions
+#  are met:
+#
+#    * Redistributions of source code must retain the above copyright
+#      notice, this list of conditions and the following disclaimer.
+#    * Redistributions in binary form must reproduce the above copyright
+#      notice, this list of conditions and the following disclaimer in
+#      the documentation and/or other materials provided with the
+#      distribution.
+#    * Neither the name of Intel Corporation nor the names of its
+#      contributors may be used to endorse or promote products derived
+#      from this software without specific prior written permission.
+#
+#  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+#  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+#  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+#  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+#  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+#  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+#  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+
+# Build with debug logging. Turn off for performance testing and normal usage
+CONFIG_DEBUG?=n
+
+# Build with code coverage instrumentation.
+CONFIG_COVERAGE?=n
+
+# This directory should contain 'include' and 'lib' directories for your DPDK
+# installation. Alternatively you can specify this on the command line
+# with 'make DPDK_DIR=/path/to/dpdk'.
+CONFIG_DPDK_DIR?=/path/to/dpdk
+
+# Header file to use for NVMe implementation specific functions.
+# Defaults to depending on DPDK.
+CONFIG_NVME_IMPL?=nvme_impl.h
+
+# Header file to use for IOAT implementation specific functions.
+# Defaults to depending on DPDK.
+CONFIG_IOAT_IMPL?=ioat_impl.h
diff --git a/src/spdk/LICENSE b/src/spdk/LICENSE
new file mode 100644
index 0000000..8e12726
--- /dev/null
+++ b/src/spdk/LICENSE
@@ -0,0 +1,30 @@
+BSD LICENSE
+
+Copyright (c) Intel Corporation.
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+  * Redistributions of source code must retain the above copyright
+    notice, this list of conditions and the following disclaimer.
+  * Redistributions in binary form must reproduce the above copyright
+    notice, this list of conditions and the following disclaimer in
+    the documentation and/or other materials provided with the
+    distribution.
+  * Neither the name of Intel Corporation nor the names of its
+    contributors may be used to endorse or promote products derived
+    from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/src/spdk/Makefile b/src/spdk/Makefile
new file mode 100644
index 0000000..605529a
--- /dev/null
+++ b/src/spdk/Makefile
@@ -0,0 +1,47 @@
+#
+#  BSD LICENSE
+#
+#  Copyright (c) Intel Corporation.
+#  All rights reserved.
+#
+#  Redistribution and use in source and binary forms, with or without
+#  modification, are permitted provided that the following conditions
+#  are met:
+#
+#    * Redistributions of source code must retain the above copyright
+#      notice, this list of conditions and the following disclaimer.
+#    * Redistributions in binary form must reproduce the above copyright
+#      notice, this list of conditions and the following disclaimer in
+#      the documentation and/or other materials provided with the
+#      distribution.
+#    * Neither the name of Intel Corporation nor the names of its
+#      contributors may be used to endorse or promote products derived
+#      from this software without specific prior written permission.
+#
+#  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+#  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+#  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+#  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+#  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+#  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+#  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+
+SPDK_ROOT_DIR := $(CURDIR)
+include $(SPDK_ROOT_DIR)/mk/spdk.common.mk
+
+DIRS-y += lib test examples
+
+.PHONY: all clean $(DIRS-y)
+
+all: $(DIRS-y)
+clean: $(DIRS-y)
+
+test: lib
+examples: lib
+
+include $(SPDK_ROOT_DIR)/mk/spdk.subdirs.mk
diff --git a/src/spdk/PORTING.md b/src/spdk/PORTING.md
new file mode 100644
index 0000000..5266a62
--- /dev/null
+++ b/src/spdk/PORTING.md
@@ -0,0 +1,23 @@
+SPDK Porting Guide
+==================
+
+SPDK is ported to new environments by implementing the nvme_impl
+interface.  The nvme_impl interface provides APIs for the driver
+to allocate physically contiguous and pinned memory, perform PCI
+operations (config cycles and mapping BARs), virtual to physical
+address translation and allocating per I/O data structures.
+
+SPDK includes a default implementation of the nvme_impl API based
+on the Data Plane Development Kit ([DPDK](dpdk.org)) and
+libpciaccess.  This DPDK implementation can be found in
+lib/nvme/nvme_impl.h.  DPDK is currently supported on Linux and
+FreeBSD only.
+
+Users who want to use SPDK on other operating system, or in
+userspace driver frameworks other than DPDK, will need to implement
+a new version of nvme_impl.h.  The new nvme_impl.h can be
+integrated into the SPDK build by updating the following line
+in CONFIG:
+
+    CONFIG_NVME_IMPL=nvme_impl.h
+
diff --git a/src/spdk/README.md b/src/spdk/README.md
new file mode 100644
index 0000000..5e34958
--- /dev/null
+++ b/src/spdk/README.md
@@ -0,0 +1,91 @@
+Storage Performance Development Kit
+===================================
+
+[![Build Status](https://travis-ci.org/spdk/spdk.svg?branch=master)](https://travis-ci.org/spdk/spdk)
+
+[SPDK on 01.org](https://01.org/spdk)
+
+The Storage Performance Development Kit (SPDK) provides a set of tools
+and libraries for writing high performance, scalable, user-mode storage
+applications.
+It achieves high performance by moving all of the necessary drivers into
+userspace and operating in a polled mode instead of relying on interrupts,
+which avoids kernel context switches and eliminates interrupt handling
+overhead.
+
+Documentation
+=============
+
+[Doxygen API documentation](https://spdk.github.io/spdk/doc/)
+
+[Porting Guide](PORTING.md)
+
+Prerequisites
+=============
+
+To build SPDK, some dependencies must be installed.
+
+Fedora/CentOS:
+
+- gcc
+- libpciaccess-devel
+- CUnit-devel
+- libaio-devel
+
+Ubuntu/Debian:
+
+- gcc
+- libpciaccess-dev
+- make
+- libcunit1-dev
+- libaio-dev
+
+FreeBSD:
+
+- gcc
+- libpciaccess
+- gmake
+- cunit
+
+Additionally, [DPDK](http://dpdk.org/doc/quick-start) is required.
+
+    1) cd /path/to/spdk
+    2) wget http://dpdk.org/browse/dpdk/snapshot/dpdk-2.2.0.tar.gz
+    3) tar xfz dpdk-2.2.0.tar.gz
+    4) cd dpdk-2.2.0
+
+Linux:
+
+    5) make install T=x86_64-native-linuxapp-gcc DESTDIR=.
+
+FreeBSD:
+
+    5) gmake install T=x86_64-native-bsdapp-clang DESTDIR=.
+
+Building
+========
+
+Once the prerequisites are installed, run 'make' within the SPDK directory
+to build the SPDK libraries and examples.
+
+    make DPDK_DIR=/path/to/dpdk
+
+If you followed the instructions above for building DPDK:
+
+Linux:
+
+    make DPDK_DIR=./dpdk-2.2.0/x86_64-native-linuxapp-gcc
+
+FreeBSD:
+
+    gmake DPDK_DIR=./dpdk-2.2.0/x86_64-native-bsdapp-clang
+
+Hugepages and Device Binding
+============================
+
+Before running an SPDK application, some hugepages must be allocated and
+any NVMe and I/OAT devices must be unbound from the native kernel drivers.
+SPDK includes scripts to automate this process on both Linux and FreeBSD.
+
+    1) scripts/configure_hugepages.sh
+    2) scripts/unbind.sh
diff --git a/src/spdk/autobuild.sh b/src/spdk/autobuild.sh
new file mode 100755
index 0000000..d04ee62
--- /dev/null
+++ b/src/spdk/autobuild.sh
@@ -0,0 +1,70 @@
+#!/usr/bin/env bash
+
+set -e
+
+rootdir=$(readlink -f $(dirname $0))
+source "$rootdir/scripts/autotest_common.sh"
+
+out=$PWD
+
+umask 022
+
+cd $rootdir
+
+timing_enter autobuild
+
+timing_enter check_format
+./scripts/check_format.sh
+timing_exit check_format
+
+timing_enter build_kmod
+./scripts/build_kmod.sh build
+timing_exit build_kmod
+
+scanbuild=''
+if hash scan-build; then
+	scanbuild="scan-build -o $out/scan-build-tmp --status-bugs"
+fi
+
+$MAKE $MAKEFLAGS clean
+
+timing_enter scanbuild_make
+fail=0
+time $scanbuild $MAKE $MAKEFLAGS DPDK_DIR=$DPDK_DIR $MAKECONFIG || fail=1
+timing_exit scanbuild_make
+
+# Check that header file dependencies are working correctly by
+#  capturing a binary's stat data before and after touching a
+#  header file and re-making.
+STAT1=`stat examples/nvme/identify/identify`
+sleep 1
+touch lib/nvme/nvme_internal.h
+$MAKE $MAKEFLAGS DPDK_DIR=$DPDK_DIR $MAKECONFIG || fail=1
+STAT2=`stat examples/nvme/identify/identify`
+
+if [ "$STAT1" == "$STAT2" ]; then
+	fail=1
+fi
+
+if [ -d $out/scan-build-tmp ]; then
+	scanoutput=$(ls -1 $out/scan-build-tmp/)
+	mv $out/scan-build-tmp/$scanoutput $out/scan-build
+	rm -rf $out/scan-build-tmp
+	chmod -R a+rX $out/scan-build
+fi
+
+timing_enter doxygen
+if hash doxygen; then
+	(cd "$rootdir"/doc; $MAKE $MAKEFLAGS)
+	mkdir -p "$out"/doc
+	for d in "$rootdir"/doc/output.*; do
+		component=$(basename "$d" | sed -e 's/^output.//')
+		mv "$d"/html "$out"/doc/$component
+		rm -rf "$d"
+	done
+fi
+timing_exit doxygen
+
+timing_exit autobuild
+
+exit $fail
diff --git a/src/spdk/autopackage.sh b/src/spdk/autopackage.sh
new file mode 100755
index 0000000..35f3b60
--- /dev/null
+++ b/src/spdk/autopackage.sh
@@ -0,0 +1,45 @@
+#!/usr/bin/env bash
+
+set -xe
+
+rootdir=$(readlink -f $(dirname $0))
+source "$rootdir/scripts/autotest_common.sh"
+
+out=$PWD
+
+MAKEFLAGS=${MAKEFLAGS:--j16}
+cd $rootdir
+
+timing_enter autopackage
+
+$MAKE clean
+
+if [ `git status --porcelain | wc -l` -ne 0 ]; then
+	echo make clean left the following files:
+	git status --porcelain
+	exit 1
+fi
+
+pv=spdk-$(date +%Y_%m_%d)
+
+find . -iname "spdk-*.tar.gz" -delete
+git archive HEAD -9 --prefix=${pv}/ -o ${pv}.tar.gz
+
+tarball=$(ls -1 spdk-*.tar.gz)
+if [ $PWD != $out ]; then
+	mv $tarball $out/
+fi
+
+# Build from packaged source
+tmpdir=$(mktemp -d)
+echo "tmpdir=$tmpdir"
+tar -C "$tmpdir" -xf $out/$tarball
+(
+	cd "$tmpdir"/spdk-*
+	time $MAKE ${MAKEFLAGS} DPDK_DIR=$DPDK_DIR CONFIG_DEBUG=n
+)
+rm -rf "$tmpdir"
+
+timing_exit autopackage
+
+timing_finish
diff --git a/src/spdk/autotest.sh b/src/spdk/autotest.sh
new file mode 100755
index 0000000..808f582
--- /dev/null
+++ b/src/spdk/autotest.sh
@@ -0,0 +1,77 @@
+#!/usr/bin/env bash
+
+rootdir=$(readlink -f $(dirname $0))
+source "$rootdir/scripts/autotest_common.sh"
+
+set -xe
+
+if [ $EUID -ne 0 ]; then
+	echo "$0 must be run as root"
+	exit 1
+fi
+
+trap "process_core; $rootdir/scripts/cleanup.sh; exit 1" SIGINT SIGTERM EXIT
+
+timing_enter autotest
+
+src=$(readlink -f $(dirname $0))
+out=$PWD
+cd $src
+
+if hash lcov; then
+	export LCOV_OPTS="
+		--rc lcov_branch_coverage=1
+		--rc lcov_function_coverage=1
+		--rc genhtml_branch_coverage=1
+		--rc genhtml_function_coverage=1
+		--rc genhtml_legend=1
+		--rc geninfo_all_blocks=1
+		"
+	export LCOV="lcov $LCOV_OPTS"
+	export GENHTML="genhtml $LCOV_OPTS"
+	# zero out coverage data
+	$LCOV -q -c -i -t "Baseline" -d $src -o cov_base.info
+fi
+
+# set up huge pages
+timing_enter afterboot
+./scripts/configure_hugepages.sh 1024
+timing_exit afterboot
+
+./scripts/unbind.sh
+
+#####################
+# Unit Tests
+#####################
+
+timing_enter lib
+
+time test/lib/nvme/nvme.sh
+time test/lib/memory/memory.sh
+time test/lib/ioat/ioat.sh
+
+timing_exit lib
+
+./scripts/cleanup.sh
+./scripts/build_kmod.sh clean
+
+timing_exit autotest
+chmod a+r $output_dir/timing.txt
+
+trap - SIGINT SIGTERM EXIT
+
+# catch any stray core files
+process_core
+
+if hash lcov; then
+	# generate coverage data and combine with baseline
+	$LCOV -q -c -d $src -t "$(hostname)" -o cov_test.info
+	$LCOV -q -a cov_base.info -a cov_test.info -o cov_total.info
+	$LCOV -q -r cov_total.info '/usr/*' -o cov_total.info
+	$LCOV -q -r cov_total.info 'test/*' -o cov_total.info
+	$GENHTML cov_total.info -t "$(hostname)" -o $out/coverage
+	chmod -R a+rX $out/coverage
+	rm cov_base.info cov_test.info
+	mv cov_total.info $out/cov_total.info
+	find . -name "*.gcda" -delete
+fi
diff --git a/src/spdk/include/spdk/assert.h b/src/spdk/include/spdk/assert.h
new file mode 100644
index 0000000..6a71022
--- /dev/null
+++ b/src/spdk/include/spdk/assert.h
@@ -0,0 +1,55 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright (c) Intel Corporation.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef SPDK_ASSERT_H
+#define SPDK_ASSERT_H
+
+#include <assert.h>
+
+#define SPDK_CONCAT_(x, y) x##y
+#define SPDK_CONCAT(x, y) SPDK_CONCAT_(x, y)
+
+#ifdef static_assert
+#define SPDK_STATIC_ASSERT(cond, msg) static_assert(cond, msg)
+#else
+/*
+ * Fallback for older compilers that don't support static_assert
+ *
+ * The array size will expand to 0 if the condition is true, or
+ * -1 if the condition is false (causing compilation to fail).
+ */
+#define SPDK_STATIC_ASSERT(cond, msg) \
+        typedef char SPDK_CONCAT(SPDK_STATIC_ASSERT_, __LINE__)[!!(cond) - 1]
+#endif
+
+#endif /* SPDK_ASSERT_H */
diff --git a/src/spdk/include/spdk/barrier.h b/src/spdk/include/spdk/barrier.h
new file mode 100644
index 0000000..0295967
--- /dev/null
+++ b/src/spdk/include/spdk/barrier.h
@@ -0,0 +1,40 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright (c) Intel Corporation.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef SPDK_BARRIER_H
+#define SPDK_BARRIER_H
+
+#define wmb()	__asm volatile("sfence" ::: "memory")
+#define mb()	__asm volatile("mfence" ::: "memory")
+
+#endif
diff --git a/src/spdk/include/spdk/file.h b/src/spdk/include/spdk/file.h
new file mode 100644
index 0000000..e696da3
--- /dev/null
+++ b/src/spdk/include/spdk/file.h
@@ -0,0 +1,42 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright (c) Intel Corporation.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef SPDK_FILE_H
+#define SPDK_FILE_H
+
+#include <stdint.h>
+
+uint64_t file_get_size(int fd);
+uint32_t dev_get_blocklen(int fd);
+
+#endif
diff --git a/src/spdk/include/spdk/ioat.h b/src/spdk/include/spdk/ioat.h
new file mode 100644
index 0000000..3eec6b1
--- /dev/null
+++ b/src/spdk/include/spdk/ioat.h
@@ -0,0 +1,103 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright (c) Intel Corporation.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * This file defines the public interface to the I/OAT DMA engine driver.
+ */
+
+#ifndef __IOAT_H__
+#define __IOAT_H__
+
+#include <inttypes.h>
+#include <stdbool.h>
+
+/**
+ * Signature for callback function invoked when a request is completed.
+ */
+typedef void (*ioat_callback_t)(void *arg);
+
+/**
+ * Returns true if vendor_id and device_id match a known IOAT PCI device ID.
+ */
+bool ioat_pci_device_match_id(uint16_t vendor_id, uint16_t device_id);
+
+/**
+ * Attach an I/OAT PCI device to the I/OAT userspace driver.
+ *
+ * To stop using the the device and release its associated resources,
+ * call \ref ioat_detach with the ioat_channel instance returned by this function.
+ */
+struct ioat_channel *ioat_attach(void *device);
+
+/**
+ * Detaches specified device returned by \ref ioat_attach() from the I/OAT driver.
+ */
+int ioat_detach(struct ioat_channel *ioat);
+
+/**
+ * Request a DMA engine channel for the calling thread.
+ *
+ * Must be called before submitting any requests from a thread.
+ *
+ * The \ref ioat_unregister_thread() function can be called to release the channel.
+ */
+int ioat_register_thread(void);
+
+/**
+ * Unregister the current thread's I/OAT channel.
+ *
+ * This function can be called after \ref ioat_register_thread() to release the thread's
+ * DMA engine channel for use by other threads.
+ */
+void ioat_unregister_thread(void);
+
+/**
+ * Submit a DMA engine memory copy request.
+ *
+ * Before submitting any requests on a thread, the thread must be registered
+ * using the \ref ioat_register_thread() function.
+ */
+int64_t ioat_submit_copy(void *cb_arg, ioat_callback_t cb_fn,
+			 void *dst, const void *src, uint64_t nbytes);
+
+/**
+ * Check for completed requests on the current thread.
+ *
+ * Before submitting any requests on a thread, the thread must be registered
+ * using the \ref ioat_register_thread() function.
+ *
+ * \returns 0 on success or negative if something went wrong.
+ */
+int ioat_process_events(void);
+
+#endif
diff --git a/src/spdk/include/spdk/ioat_spec.h b/src/spdk/include/spdk/ioat_spec.h
new file mode 100644
index 0000000..bb65b5b
--- /dev/null
+++ b/src/spdk/include/spdk/ioat_spec.h
@@ -0,0 +1,308 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright (c) Intel Corporation.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __IOAT_SPEC_H__
+#define __IOAT_SPEC_H__
+
+#include <inttypes.h>
+
+#include "spdk/assert.h"
+
+#define IOAT_INTRCTRL_MASTER_INT_EN	0x01
+
+#define IOAT_VER_3_0                0x30
+#define IOAT_VER_3_3                0x33
+
+/* DMA Channel Registers */
+#define IOAT_CHANCTRL_CHANNEL_PRIORITY_MASK	0xF000
+#define IOAT_CHANCTRL_COMPL_DCA_EN		0x0200
+#define IOAT_CHANCTRL_CHANNEL_IN_USE		0x0100
+#define IOAT_CHANCTRL_DESCRIPTOR_ADDR_SNOOP_CONTROL	0x0020
+#define IOAT_CHANCTRL_ERR_INT_EN		0x0010
+#define IOAT_CHANCTRL_ANY_ERR_ABORT_EN		0x0008
+#define IOAT_CHANCTRL_ERR_COMPLETION_EN		0x0004
+#define IOAT_CHANCTRL_INT_REARM			0x0001
+
+
+struct ioat_registers {
+	uint8_t		chancnt;
+	uint8_t		xfercap;
+	uint8_t		genctrl;
+	uint8_t		intrctrl;
+	uint32_t	attnstatus;
+	uint8_t		cbver;		/* 0x08 */
+	uint8_t		reserved4[0x3]; /* 0x09 */
+	uint16_t	intrdelay;	/* 0x0C */
+	uint16_t	cs_status;	/* 0x0E */
+	uint32_t	dmacapability;	/* 0x10 */
+	uint8_t		reserved5[0x6C]; /* 0x14 */
+	uint16_t	chanctrl;	/* 0x80 */
+	uint8_t		reserved6[0x2];	/* 0x82 */
+	uint8_t		chancmd;	/* 0x84 */
+	uint8_t		reserved3[1];	/* 0x85 */
+	uint16_t	dmacount;	/* 0x86 */
+	uint64_t	chansts;	/* 0x88 */
+	uint64_t	chainaddr;	/* 0x90 */
+	uint64_t	chancmp;	/* 0x98 */
+	uint8_t		reserved2[0x8];	/* 0xA0 */
+	uint32_t	chanerr;	/* 0xA8 */
+	uint32_t	chanerrmask;	/* 0xAC */
+} __attribute__((packed));
+
+#define IOAT_CHANCMD_RESET		0x20
+#define IOAT_CHANCMD_SUSPEND		0x04
+
+#define IOAT_CHANSTS_STATUS		0x7ULL
+#define IOAT_CHANSTS_ACTIVE		0x0
+#define IOAT_CHANSTS_IDLE		0x1
+#define IOAT_CHANSTS_SUSPENDED		0x2
+#define IOAT_CHANSTS_HALTED		0x3
+#define IOAT_CHANSTS_ARMED		0x4
+
+#define IOAT_CHANSTS_UNAFFILIATED_ERROR	0x8ULL
+#define IOAT_CHANSTS_SOFT_ERROR		0x10ULL
+
+#define IOAT_CHANSTS_COMPLETED_DESCRIPTOR_MASK	(~0x3FULL)
+
+#define IOAT_CHANCMP_ALIGN		8	/* CHANCMP address must be 64-bit aligned */
+
+struct ioat_generic_hw_descriptor {
+	uint32_t size;
+	union {
+		uint32_t control_raw;
+		struct {
+			uint32_t int_enable: 1;
+			uint32_t src_snoop_disable: 1;
+			uint32_t dest_snoop_disable: 1;
+			uint32_t completion_update: 1;
+			uint32_t fence: 1;
+			uint32_t reserved2: 1;
+			uint32_t src_page_break: 1;
+			uint32_t dest_page_break: 1;
+			uint32_t bundle: 1;
+			uint32_t dest_dca: 1;
+			uint32_t hint: 1;
+			uint32_t reserved: 13;
+			uint32_t op: 8;
+		} control;
+	} u;
+	uint64_t src_addr;
+	uint64_t dest_addr;
+	uint64_t next;
+	uint64_t op_specific[4];
+};
+
+struct ioat_dma_hw_descriptor {
+	uint32_t size;
+	union {
+		uint32_t control_raw;
+		struct {
+			uint32_t int_enable: 1;
+			uint32_t src_snoop_disable: 1;
+			uint32_t dest_snoop_disable: 1;
+			uint32_t completion_update: 1;
+			uint32_t fence: 1;
+			uint32_t null: 1;
+			uint32_t src_page_break: 1;
+			uint32_t dest_page_break: 1;
+			uint32_t bundle: 1;
+			uint32_t dest_dca: 1;
+			uint32_t hint: 1;
+			uint32_t reserved: 13;
+#define IOAT_OP_COPY 0x00
+			uint32_t op: 8;
+		} control;
+	} u;
+	uint64_t src_addr;
+	uint64_t dest_addr;
+	uint64_t next;
+	uint64_t reserved;
+	uint64_t reserved2;
+	uint64_t user1;
+	uint64_t user2;
+};
+
+struct ioat_fill_hw_descriptor {
+	uint32_t size;
+	union {
+		uint32_t control_raw;
+		struct {
+			uint32_t int_enable: 1;
+			uint32_t reserved: 1;
+			uint32_t dest_snoop_disable: 1;
+			uint32_t completion_update: 1;
+			uint32_t fence: 1;
+			uint32_t reserved2: 2;
+			uint32_t dest_page_break: 1;
+			uint32_t bundle: 1;
+			uint32_t reserved3: 15;
+#define IOAT_OP_FILL 0x01
+			uint32_t op: 8;
+		} control;
+	} u;
+	uint64_t src_data;
+	uint64_t dest_addr;
+	uint64_t next;
+	uint64_t reserved;
+	uint64_t next_dest_addr;
+	uint64_t user1;
+	uint64_t user2;
+};
+
+struct ioat_xor_hw_descriptor {
+	uint32_t size;
+	union {
+		uint32_t control_raw;
+		struct {
+			uint32_t int_enable: 1;
+			uint32_t src_snoop_disable: 1;
+			uint32_t dest_snoop_disable: 1;
+			uint32_t completion_update: 1;
+			uint32_t fence: 1;
+			uint32_t src_count: 3;
+			uint32_t bundle: 1;
+			uint32_t dest_dca: 1;
+			uint32_t hint: 1;
+			uint32_t reserved: 13;
+#define IOAT_OP_XOR 0x87
+#define IOAT_OP_XOR_VAL 0x88
+			uint32_t op: 8;
+		} control;
+	} u;
+	uint64_t src_addr;
+	uint64_t dest_addr;
+	uint64_t next;
+	uint64_t src_addr2;
+	uint64_t src_addr3;
+	uint64_t src_addr4;
+	uint64_t src_addr5;
+};
+
+struct ioat_xor_ext_hw_descriptor {
+	uint64_t src_addr6;
+	uint64_t src_addr7;
+	uint64_t src_addr8;
+	uint64_t next;
+	uint64_t reserved[4];
+};
+
+struct ioat_pq_hw_descriptor {
+	uint32_t size;
+	union {
+		uint32_t control_raw;
+		struct {
+			uint32_t int_enable: 1;
+			uint32_t src_snoop_disable: 1;
+			uint32_t dest_snoop_disable: 1;
+			uint32_t completion_update: 1;
+			uint32_t fence: 1;
+			uint32_t src_count: 3;
+			uint32_t bundle: 1;
+			uint32_t dest_dca: 1;
+			uint32_t hint: 1;
+			uint32_t p_disable: 1;
+			uint32_t q_disable: 1;
+			uint32_t reserved: 11;
+#define IOAT_OP_PQ 0x89
+#define IOAT_OP_PQ_VAL 0x8a
+			uint32_t op: 8;
+		} control;
+	} u;
+	uint64_t src_addr;
+	uint64_t p_addr;
+	uint64_t next;
+	uint64_t src_addr2;
+	uint64_t src_addr3;
+	uint8_t  coef[8];
+	uint64_t q_addr;
+};
+
+struct ioat_pq_ext_hw_descriptor {
+	uint64_t src_addr4;
+	uint64_t src_addr5;
+	uint64_t src_addr6;
+	uint64_t next;
+	uint64_t src_addr7;
+	uint64_t src_addr8;
+	uint64_t reserved[2];
+};
+
+struct ioat_pq_update_hw_descriptor {
+	uint32_t size;
+	union {
+		uint32_t control_raw;
+		struct {
+			uint32_t int_enable: 1;
+			uint32_t src_snoop_disable: 1;
+			uint32_t dest_snoop_disable: 1;
+			uint32_t completion_update: 1;
+			uint32_t fence: 1;
+			uint32_t src_cnt: 3;
+			uint32_t bundle: 1;
+			uint32_t dest_dca: 1;
+			uint32_t hint: 1;
+			uint32_t p_disable: 1;
+			uint32_t q_disable: 1;
+			uint32_t reserved: 3;
+			uint32_t coef: 8;
+#define IOAT_OP_PQ_UP 0x8b
+			uint32_t op: 8;
+		} control;
+	} u;
+	uint64_t src_addr;
+	uint64_t p_addr;
+	uint64_t next;
+	uint64_t src_addr2;
+	uint64_t p_src;
+	uint64_t q_src;
+	uint64_t q_addr;
+};
+
+struct ioat_raw_hw_descriptor {
+	uint64_t field[8];
+};
+
+union ioat_hw_descriptor {
+	struct ioat_raw_hw_descriptor raw;
+	struct ioat_generic_hw_descriptor generic;
+	struct ioat_dma_hw_descriptor dma;
+	struct ioat_fill_hw_descriptor fill;
+	struct ioat_xor_hw_descriptor xor;
+	struct ioat_xor_ext_hw_descriptor xor_ext;
+	struct ioat_pq_hw_descriptor pq;
+	struct ioat_pq_ext_hw_descriptor pq_ext;
+	struct ioat_pq_update_hw_descriptor pq_update;
+};
+SPDK_STATIC_ASSERT(sizeof(union ioat_hw_descriptor) == 64, "incorrect ioat_hw_descriptor layout");
+
+#endif /* __IOAT_SPEC_H__ */
diff --git a/src/spdk/include/spdk/mmio.h b/src/spdk/include/spdk/mmio.h
new file mode 100644
index 0000000..a9f3902
--- /dev/null
+++ b/src/spdk/include/spdk/mmio.h
@@ -0,0 +1,91 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright (c) Intel Corporation.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef SPDK_MMIO_H
+#define SPDK_MMIO_H
+
+#include <inttypes.h>
+
+#ifdef __x86_64__
+#define SPDK_MMIO_64BIT	1 /* Can do atomic 64-bit memory read/write (over PCIe) */
+#else
+#define SPDK_MMIO_64BIT	0
+#endif
+
+static inline uint32_t
+spdk_mmio_read_4(const volatile uint32_t *addr)
+{
+	return *addr;
+}
+
+static inline void
+spdk_mmio_write_4(volatile uint32_t *addr, uint32_t val)
+{
+	*addr = val;
+}
+
+static inline uint64_t
+spdk_mmio_read_8(volatile uint64_t *addr)
+{
+	uint64_t val;
+	volatile uint32_t *addr32 = (volatile uint32_t *)addr;
+
+	if (SPDK_MMIO_64BIT) {
+		val = *addr;
+	} else {
+		/*
+		 * Read lower 4 bytes before upper 4 bytes.
+		 * This particular order is required by I/OAT.
+		 * If the other order is required, use a pair of spdk_mmio_read_4() calls.
+		 */
+		val = addr32[0];
+		val |= (uint64_t)addr32[1] << 32;
+	}
+
+	return val;
+}
+
+static inline void
+spdk_mmio_write_8(volatile uint64_t *addr, uint64_t val)
+{
+	volatile uint32_t *addr32 = (volatile uint32_t *)addr;
+
+	if (SPDK_MMIO_64BIT) {
+		*addr = val;
+	} else {
+		addr32[0] = (uint32_t)val;
+		addr32[1] = (uint32_t)(val >> 32);
+	}
+}
+
+#endif
diff --git a/src/spdk/include/spdk/nvme.h b/src/spdk/include/spdk/nvme.h
new file mode 100644
index 0000000..33dc547
--- /dev/null
+++ b/src/spdk/include/spdk/nvme.h
@@ -0,0 +1,634 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright (c) Intel Corporation.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef SPDK_NVME_H
+#define SPDK_NVME_H
+
+#include <stddef.h>
+#include "nvme_spec.h"
+
+/** \file
+ *
+ */
+
+#define NVME_DEFAULT_RETRY_COUNT	(4)
+extern int32_t		nvme_retry_count;
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/** \brief Opaque handle to a controller. Obtained by calling nvme_attach(). */
+struct nvme_controller;
+
+/**
+ * \brief Attaches specified device to the NVMe driver.
+ *
+ * On success, the nvme_controller handle is valid for other nvme_ctrlr_* functions.
+ * On failure, the return value will be NULL.
+ *
+ * This function should be called from a single thread while no other threads or drivers
+ * are actively using the NVMe device.
+ *
+ * To stop using the the controller and release its associated resources,
+ * call \ref nvme_detach with the nvme_controller instance returned by this function.
+ */
+struct nvme_controller *nvme_attach(void *devhandle);
+
+/**
+ * \brief Detaches specified device returned by \ref nvme_attach() from the NVMe driver.
+ *
+ * On success, the nvme_controller handle is no longer valid.
+ *
+ * This function should be called from a single thread while no other threads
+ * are actively using the NVMe device.
+ *
+ */
+int nvme_detach(struct nvme_controller *ctrlr);
+
+/**
+ * \brief Perform a full hardware reset of the NVMe controller.
+ *
+ * This function should be called from a single thread while no other threads
+ * are actively using the NVMe device.
+ *
+ * Any pointers returned from nvme_ctrlr_get_ns() and nvme_ns_get_data() may be invalidated
+ * by calling this function.  The number of namespaces as returned by nvme_ctrlr_get_num_ns() may
+ * also change.
+ */
+int nvme_ctrlr_reset(struct nvme_controller *ctrlr);
+
+/**
+ * \brief Get the identify controller data as defined by the NVMe specification.
+ *
+ * This function is thread safe and can be called at any point after nvme_attach().
+ *
+ */
+const struct nvme_controller_data *nvme_ctrlr_get_data(struct nvme_controller *ctrlr);
+
+/**
+ * \brief Get the number of namespaces for the given NVMe controller.
+ *
+ * This function is thread safe and can be called at any point after nvme_attach().
+ *
+ * This is equivalent to calling nvme_ctrlr_get_data() to get the
+ * nvme_controller_data and then reading the nn field.
+ *
+ */
+uint32_t nvme_ctrlr_get_num_ns(struct nvme_controller *ctrlr);
+
+/**
+ * \brief Determine if a particular log page is supported by the given NVMe controller.
+ *
+ * This function is thread safe and can be called at any point after nvme_attach().
+ *
+ * \sa nvme_ctrlr_cmd_get_log_page()
+ */
+bool nvme_ctrlr_is_log_page_supported(struct nvme_controller *ctrlr, uint8_t log_page);
+
+/**
+ * \brief Determine if a particular feature is supported by the given NVMe controller.
+ *
+ * This function is thread safe and can be called at any point after nvme_attach().
+ *
+ * \sa nvme_ctrlr_cmd_get_feature()
+ */
+bool nvme_ctrlr_is_feature_supported(struct nvme_controller *ctrlr, uint8_t feature_code);
+
+/**
+ * Signature for callback function invoked when a command is completed.
+ *
+ * The nvme_completion parameter contains the completion status.
+ */
+typedef void (*nvme_cb_fn_t)(void *, const struct nvme_completion *);
+
+/**
+ * Signature for callback function invoked when an asynchronous error
+ *  request command is completed.
+ *
+ * The aer_cb_arg parameter is set to the context specified by
+ *  nvme_register_aer_callback().
+ * The nvme_completion parameter contains the completion status of the
+ *  asynchronous event request that was completed.
+ */
+typedef void (*nvme_aer_cb_fn_t)(void *aer_cb_arg,
+				 const struct nvme_completion *);
+
+void nvme_ctrlr_register_aer_callback(struct nvme_controller *ctrlr,
+				      nvme_aer_cb_fn_t aer_cb_fn,
+				      void *aer_cb_arg);
+
+/**
+ * \brief Send the given NVM I/O command to the NVMe controller.
+ *
+ * This is a low level interface for submitting I/O commands directly. Prefer
+ * the nvme_ns_cmd_* functions instead. The validity of the command will
+ * not be checked!
+ *
+ * When constructing the nvme_command it is not necessary to fill out the PRP
+ * list/SGL or the CID. The driver will handle both of those for you.
+ *
+ * This function is thread safe and can be called at any point after
+ * nvme_register_io_thread().
+ *
+ */
+int nvme_ctrlr_cmd_io_raw(struct nvme_controller *ctrlr,
+			  struct nvme_command *cmd,
+			  void *buf, uint32_t len,
+			  nvme_cb_fn_t cb_fn, void *cb_arg);
+
+/**
+ * \brief Process any outstanding completions for I/O submitted on the current thread.
+ *
+ * This will only process completions for I/O that were submitted on the same thread
+ * that this function is called from. This call is also non-blocking, i.e. it only
+ * processes completions that are ready at the time of this function call. It does not
+ * wait for outstanding commands to finish.
+ *
+ * \param max_completions Limit the number of completions to be processed in one call, or 0
+ * for unlimited.
+ *
+ * \return Number of completions processed (may be 0) or negative on error.
+ *
+ * This function is thread safe and can be called at any point after nvme_attach().
+ *
+ */
+int32_t nvme_ctrlr_process_io_completions(struct nvme_controller *ctrlr, uint32_t max_completions);
+
+/**
+ * \brief Send the given admin command to the NVMe controller.
+ *
+ * This is a low level interface for submitting admin commands directly. Prefer
+ * the nvme_ctrlr_cmd_* functions instead. The validity of the command will
+ * not be checked!
+ *
+ * When constructing the nvme_command it is not necessary to fill out the PRP
+ * list/SGL or the CID. The driver will handle both of those for you.
+ *
+ * This function is thread safe and can be called at any point after
+ * \ref nvme_attach().
+ *
+ * Call \ref nvme_ctrlr_process_admin_completions() to poll for completion
+ * of commands submitted through this function.
+ */
+int nvme_ctrlr_cmd_admin_raw(struct nvme_controller *ctrlr,
+			     struct nvme_command *cmd,
+			     void *buf, uint32_t len,
+			     nvme_cb_fn_t cb_fn, void *cb_arg);
+
+/**
+ * \brief Process any outstanding completions for admin commands.
+ *
+ * This will process completions for admin commands submitted on any thread.
+ *
+ * This call is non-blocking, i.e. it only processes completions that are ready
+ * at the time of this function call. It does not wait for outstanding commands to
+ * finish.
+ *
+ * \return Number of completions processed (may be 0) or negative on error.
+ *
+ * This function is thread safe and can be called at any point after nvme_attach().
+ */
+int32_t nvme_ctrlr_process_admin_completions(struct nvme_controller *ctrlr);
+
+
+/** \brief Opaque handle to a namespace. Obtained by calling nvme_ctrlr_get_ns(). */
+struct nvme_namespace;
+
+/**
+ * \brief Get a handle to a namespace for the given controller.
+ *
+ * Namespaces are numbered from 1 to the total number of namespaces. There will never
+ * be any gaps in the numbering. The number of namespaces is obtained by calling
+ * nvme_ctrlr_get_num_ns().
+ *
+ * This function is thread safe and can be called at any point after nvme_attach().
+ *
+ */
+struct nvme_namespace *nvme_ctrlr_get_ns(struct nvme_controller *ctrlr, uint32_t ns_id);
+
+/**
+ * \brief Get a specific log page from the NVMe controller.
+ *
+ * \param log_page The log page identifier.
+ * \param nsid Depending on the log page, this may be 0, a namespace identifier, or NVME_GLOBAL_NAMESPACE_TAG.
+ * \param payload The pointer to the payload buffer.
+ * \param payload_size The size of payload buffer.
+ * \param cb_fn Callback function to invoke when the log page has been retrieved.
+ * \param cb_arg Argument to pass to the callback function.
+ *
+ * \return 0 if successfully submitted, ENOMEM if resources could not be allocated for this request
+ *
+ * This function is thread safe and can be called at any point after nvme_attach().
+ *
+ * Call \ref nvme_ctrlr_process_admin_completions() to poll for completion
+ * of commands submitted through this function.
+ *
+ * \sa nvme_ctrlr_is_log_page_supported()
+ */
+int nvme_ctrlr_cmd_get_log_page(struct nvme_controller *ctrlr,
+				uint8_t log_page, uint32_t nsid,
+				void *payload, uint32_t payload_size,
+				nvme_cb_fn_t cb_fn, void *cb_arg);
+
+/**
+ * \brief Set specific feature for the given NVMe controller.
+ *
+ * \param feature The feature identifier.
+ * \param cdw11 as defined by the specification for this command.
+ * \param cdw12 as defined by the specification for this command.
+ * \param payload The pointer to the payload buffer.
+ * \param payload_size The size of payload buffer.
+ * \param cb_fn Callback function to invoke when the feature has been set.
+ * \param cb_arg Argument to pass to the callback function.
+ *
+ * \return 0 if successfully submitted, ENOMEM if resources could not be allocated for this request
+ *
+ * This function is thread safe and can be called at any point after nvme_attach().
+ *
+ * Call \ref nvme_ctrlr_process_admin_completions() to poll for completion
+ * of commands submitted through this function.
+ *
+ * \sa nvme_ctrlr_cmd_set_feature()
+ */
+int nvme_ctrlr_cmd_set_feature(struct nvme_controller *ctrlr,
+			       uint8_t feature, uint32_t cdw11, uint32_t cdw12,
+			       void *payload, uint32_t payload_size,
+			       nvme_cb_fn_t cb_fn, void *cb_arg);
+
+/**
+ * \brief Get specific feature from given NVMe controller.
+ *
+ * \param feature The feature identifier.
+ * \param cdw11 as defined by the specification for this command.
+ * \param payload The pointer to the payload buffer.
+ * \param payload_size The size of payload buffer.
+ * \param cb_fn Callback function to invoke when the feature has been retrieved.
+ * \param cb_arg Argument to pass to the callback function.
+ *
+ * \return 0 if successfully submitted, ENOMEM if resources could not be allocated for this request
+ *
+ * This function is thread safe and can be called at any point after nvme_attach().
+ *
+ * Call \ref nvme_ctrlr_process_admin_completions() to poll for completion
+ * of commands submitted through this function.
+ *
+ * \sa nvme_ctrlr_cmd_get_feature()
+ */
+int nvme_ctrlr_cmd_get_feature(struct nvme_controller *ctrlr,
+			       uint8_t feature, uint32_t cdw11,
+			       void *payload, uint32_t payload_size,
+			       nvme_cb_fn_t cb_fn, void *cb_arg);
+
+/**
+ * \brief Get the identify namespace data as defined by the NVMe specification.
+ *
+ * This function is thread safe and can be called at any point after nvme_attach().
+ *
+ */
+const struct nvme_namespace_data *nvme_ns_get_data(struct nvme_namespace *ns);
+
+/**
+ * \brief Get the namespace id (index number) from the given namespace handle.
+ *
+ * This function is thread safe and can be called at any point after nvme_attach().
+ *
+ */
+uint32_t nvme_ns_get_id(struct nvme_namespace *ns);
+
+/**
+ * \brief Get the maximum transfer size, in bytes, for an I/O sent to the given namespace.
+ *
+ * This function is thread safe and can be called at any point after nvme_attach().
+ *
+ */
+uint32_t nvme_ns_get_max_io_xfer_size(struct nvme_namespace *ns);
+
+/**
+ * \brief Get the sector size, in bytes, of the given namespace.
+ *
+ * This function is thread safe and can be called at any point after nvme_attach().
+ *
+ */
+uint32_t nvme_ns_get_sector_size(struct nvme_namespace *ns);
+
+/**
+ * \brief Get the number of sectors for the given namespace.
+ *
+ * This function is thread safe and can be called at any point after nvme_attach().
+ *
+ */
+uint64_t nvme_ns_get_num_sectors(struct nvme_namespace *ns);
+
+/**
+ * \brief Get the size, in bytes, of the given namespace.
+ *
+ * This function is thread safe and can be called at any point after nvme_attach().
+ *
+ */
+uint64_t nvme_ns_get_size(struct nvme_namespace *ns);
+
+/**
+ * \brief Namespace command support flags.
+ */
+enum nvme_namespace_flags {
+	NVME_NS_DEALLOCATE_SUPPORTED	= 0x1, /**< The deallocate command is supported */
+	NVME_NS_FLUSH_SUPPORTED		= 0x2, /**< The flush command is supported */
+	NVME_NS_RESERVATION_SUPPORTED	= 0x4, /**< The reservation command is supported */
+};
+
+/**
+ * \brief Get the flags for the given namespace.
+ *
+ * See nvme_namespace_flags for the possible flags returned.
+ *
+ * This function is thread safe and can be called at any point after nvme_attach().
+ *
+ */
+uint32_t nvme_ns_get_flags(struct nvme_namespace *ns);
+
+/**
+ * Restart the SGL walk to the specified offset when the command has scattered payloads.
+ *
+ * The cb_arg parameter is the value passed to readv/writev.
+ */
+typedef void (*nvme_req_reset_sgl_fn_t)(void *cb_arg, uint32_t offset);
+
+/**
+ * Fill out *address and *length with the current SGL entry and advance to the next
+ * entry for the next time the callback is invoked.
+ *
+ * The cb_arg parameter is the value passed to readv/writev.
+ * The address parameter contains the physical address of this segment.
+ * The length parameter contains the length of this physical segment.
+ */
+typedef int (*nvme_req_next_sge_fn_t)(void *cb_arg, uint64_t *address, uint32_t *length);
+
+/**
+ * \brief Submits a write I/O to the specified NVMe namespace.
+ *
+ * \param ns NVMe namespace to submit the write I/O
+ * \param payload virtual address pointer to the data payload
+ * \param lba starting LBA to write the data
+ * \param lba_count length (in sectors) for the write operation
+ * \param cb_fn callback function to invoke when the I/O is completed
+ * \param cb_arg argument to pass to the callback function
+ * \param io_flags set flags, defined by the NVME_IO_FLAGS_* entries
+ * 			in spdk/nvme_spec.h, for this I/O.
+ *
+ * \return 0 if successfully submitted, ENOMEM if an nvme_request
+ *	     structure cannot be allocated for the I/O request
+ *
+ * This function is thread safe and can be called at any point after
+ * nvme_register_io_thread().
+ */
+int nvme_ns_cmd_write(struct nvme_namespace *ns, void *payload,
+		      uint64_t lba, uint32_t lba_count, nvme_cb_fn_t cb_fn,
+		      void *cb_arg, uint32_t io_flags);
+
+/**
+ * \brief Submits a write I/O to the specified NVMe namespace.
+ *
+ * \param ns NVMe namespace to submit the write I/O
+ * \param lba starting LBA to write the data
+ * \param lba_count length (in sectors) for the write operation
+ * \param cb_fn callback function to invoke when the I/O is completed
+ * \param cb_arg argument to pass to the callback function
+ * \param io_flags set flags, defined in nvme_spec.h, for this I/O
+ * \param reset_sgl_fn callback function to reset scattered payload
+ * \param next_sge_fn callback function to iterate each scattered
+ * payload memory segment
+ *
+ * \return 0 if successfully submitted, ENOMEM if an nvme_request
+ *	     structure cannot be allocated for the I/O request
+ *
+ * This function is thread safe and can be called at any point after
+ * nvme_register_io_thread().
+ */
+int nvme_ns_cmd_writev(struct nvme_namespace *ns, uint64_t lba, uint32_t lba_count,
+		       nvme_cb_fn_t cb_fn, void *cb_arg, uint32_t io_flags,
+		       nvme_req_reset_sgl_fn_t reset_sgl_fn,
+		       nvme_req_next_sge_fn_t next_sge_fn);
+
+/**
+ * \brief Submits a read I/O to the specified NVMe namespace.
+ *
+ * \param ns NVMe namespace to submit the read I/O
+ * \param payload virtual address pointer to the data payload
+ * \param lba starting LBA to read the data
+ * \param lba_count length (in sectors) for the read operation
+ * \param cb_fn callback function to invoke when the I/O is completed
+ * \param cb_arg argument to pass to the callback function
+ * \param io_flags set flags, defined in nvme_spec.h, for this I/O
+ *
+ * \return 0 if successfully submitted, ENOMEM if an nvme_request
+ *	     structure cannot be allocated for the I/O request
+ *
+ * This function is thread safe and can be called at any point after
+ * nvme_register_io_thread().
+ */
+int nvme_ns_cmd_read(struct nvme_namespace *ns, void *payload,
+		     uint64_t lba, uint32_t lba_count, nvme_cb_fn_t cb_fn,
+		     void *cb_arg, uint32_t io_flags);
+
+/**
+ * \brief Submits a read I/O to the specified NVMe namespace.
+ *
+ * \param ns NVMe namespace to submit the read I/O
+ * \param lba starting LBA to read the data
+ * \param lba_count length (in sectors) for the read operation
+ * \param cb_fn callback function to invoke when the I/O is completed
+ * \param cb_arg argument to pass to the callback function
+ * \param io_flags set flags, defined in nvme_spec.h, for this I/O
+ * \param reset_sgl_fn callback function to reset scattered payload
+ * \param next_sge_fn callback function to iterate each scattered
+ * payload memory segment
+ *
+ * \return 0 if successfully submitted, ENOMEM if an nvme_request
+ *	     structure cannot be allocated for the I/O request
+ *
+ * This function is thread safe and can be called at any point after
+ * nvme_register_io_thread().
+ */
+int nvme_ns_cmd_readv(struct nvme_namespace *ns, uint64_t lba, uint32_t lba_count,
+		      nvme_cb_fn_t cb_fn, void *cb_arg, uint32_t io_flags,
+		      nvme_req_reset_sgl_fn_t reset_sgl_fn,
+		      nvme_req_next_sge_fn_t next_sge_fn);
+
+
+/**
+ * \brief Submits a deallocation request to the specified NVMe namespace.
+ *
+ * \param ns NVMe namespace to submit the deallocation request
+ * \param payload virtual address pointer to the list of LBA ranges to
+ *                deallocate
+ * \param num_ranges number of ranges in the list pointed to by payload; must be
+ *                between 1 and \ref NVME_DATASET_MANAGEMENT_MAX_RANGES, inclusive.
+ * \param cb_fn callback function to invoke when the I/O is completed
+ * \param cb_arg argument to pass to the callback function
+ *
+ * \return 0 if successfully submitted, ENOMEM if an nvme_request
+ *	     structure cannot be allocated for the I/O request
+ *
+ * This function is thread safe and can be called at any point after
+ * nvme_register_io_thread().
+ */
+int nvme_ns_cmd_deallocate(struct nvme_namespace *ns, void *payload,
+			   uint16_t num_ranges, nvme_cb_fn_t cb_fn,
+			   void *cb_arg);
+
+/**
+ * \brief Submits a flush request to the specified NVMe namespace.
+ *
+ * \param ns NVMe namespace to submit the flush request
+ * \param cb_fn callback function to invoke when the I/O is completed
+ * \param cb_arg argument to pass to the callback function
+ *
+ * \return 0 if successfully submitted, ENOMEM if an nvme_request
+ *	     structure cannot be allocated for the I/O request
+ *
+ * This function is thread safe and can be called at any point after
+ * nvme_register_io_thread().
+ */
+int nvme_ns_cmd_flush(struct nvme_namespace *ns, nvme_cb_fn_t cb_fn,
+		      void *cb_arg);
+
+/**
+ * \brief Submits a reservation register to the specified NVMe namespace.
+ *
+ * \param ns NVMe namespace to submit the reservation register request
+ * \param payload virtual address pointer to the reservation register data
+ * \param ignore_key '1' the current reservation key check is disabled
+ * \param action specifies the registration action
+ * \param cptpl change the Persist Through Power Loss state
+ * \param cb_fn callback function to invoke when the I/O is completed
+ * \param cb_arg argument to pass to the callback function
+ *
+ * \return 0 if successfully submitted, ENOMEM if an nvme_request
+ *	     structure cannot be allocated for the I/O request
+ *
+ * This function is thread safe and can be called at any point after
+ * nvme_register_io_thread().
+ */
+int nvme_ns_cmd_reservation_register(struct nvme_namespace *ns,
+				     struct nvme_reservation_register_data *payload,
+				     bool ignore_key,
+				     enum nvme_reservation_register_action action,
+				     enum nvme_reservation_register_cptpl cptpl,
+				     nvme_cb_fn_t cb_fn, void *cb_arg);
+
+/**
+ * \brief Submits a reservation release to the specified NVMe namespace.
+ *
+ * \param ns NVMe namespace to submit the reservation release request
+ * \param payload virtual address pointer to current reservation key
+ * \param ignore_key '1' the current reservation key check is disabled
+ * \param action specifies the reservation release action
+ * \param type reservation type for the namespace
+ * \param cb_fn callback function to invoke when the I/O is completed
+ * \param cb_arg argument to pass to the callback function
+ *
+ * \return 0 if successfully submitted, ENOMEM if an nvme_request
+ *	     structure cannot be allocated for the I/O request
+ *
+ * This function is thread safe and can be called at any point after
+ * nvme_register_io_thread().
+ */
+int nvme_ns_cmd_reservation_release(struct nvme_namespace *ns,
+				    struct nvme_reservation_key_data *payload,
+				    bool ignore_key,
+				    enum nvme_reservation_release_action action,
+				    enum nvme_reservation_type type,
+				    nvme_cb_fn_t cb_fn, void *cb_arg);
+
+/**
+ * \brief Submits a reservation acquire to the specified NVMe namespace.
+ *
+ * \param ns NVMe namespace to submit the reservation acquire request
+ * \param payload virtual address pointer to reservation acquire data
+ * \param ignore_key '1' the current reservation key check is disabled
+ * \param action specifies the reservation acquire action
+ * \param type reservation type for the namespace
+ * \param cb_fn callback function to invoke when the I/O is completed
+ * \param cb_arg argument to pass to the callback function
+ *
+ * \return 0 if successfully submitted, ENOMEM if an nvme_request
+ *	     structure cannot be allocated for the I/O request
+ *
+ * This function is thread safe and can be called at any point after
+ * nvme_register_io_thread().
+ */
+int nvme_ns_cmd_reservation_acquire(struct nvme_namespace *ns,
+				    struct nvme_reservation_acquire_data *payload,
+				    bool ignore_key,
+				    enum nvme_reservation_acquire_action action,
+				    enum nvme_reservation_type type,
+				    nvme_cb_fn_t cb_fn, void *cb_arg);
+
+/**
+ * \brief Submits a reservation report to the specified NVMe namespace.
+ *
+ * \param ns NVMe namespace to submit the reservation report request
+ * \param payload virtual address pointer for reservation status data
+ * \param len length bytes for reservation status data structure
+ * \param cb_fn callback function to invoke when the I/O is completed
+ * \param cb_arg argument to pass to the callback function
+ *
+ * \return 0 if successfully submitted, ENOMEM if an nvme_request
+ *	     structure cannot be allocated for the I/O request
+ *
+ * This function is thread safe and can be called at any point after
+ * nvme_register_io_thread().
+ */
+int nvme_ns_cmd_reservation_report(struct nvme_namespace *ns, void *payload,
+				   uint32_t len, nvme_cb_fn_t cb_fn, void *cb_arg);
+
+/**
+ * \brief Get the size, in bytes, of an nvme_request.
+ *
+ * This is the size of the request objects that need to be allocated by the
+ * nvme_alloc_request macro in nvme_impl.h
+ *
+ * This function is thread safe and can be called at any time.
+ *
+ */
+size_t nvme_request_size(void);
+
+int nvme_register_io_thread(void);
+void nvme_unregister_io_thread(void);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/src/spdk/include/spdk/nvme_intel.h b/src/spdk/include/spdk/nvme_intel.h
new file mode 100644
index 0000000..421c458
--- /dev/null
+++ b/src/spdk/include/spdk/nvme_intel.h
@@ -0,0 +1,196 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright (c) Intel Corporation.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef SPDK_NVME_INTEL_H
+#define SPDK_NVME_INTEL_H
+
+#include <stdint.h>
+#include <stddef.h>
+
+#include "spdk/assert.h"
+
+/**
+ * \file
+ *
+ * reference:
+ * http://www.intel.com/content/dam/www/public/us/en/documents/product-specifications/
+ * ssd-dc-p3700-spec.pdf
+ */
+
+enum nvme_intel_feature {
+	NVME_INTEL_FEAT_MAX_LBA				= 0xC1,
+	NVME_INTEL_FEAT_NATIVE_MAX_LBA			= 0xC2,
+	NVME_INTEL_FEAT_POWER_GOVERNOR_SETTING		= 0xC6,
+	NVME_INTEL_FEAT_SMBUS_ADDRESS			= 0xC8,
+	NVME_INTEL_FEAT_LED_PATTERN			= 0xC9,
+	NVME_INTEL_FEAT_RESET_TIMED_WORKLOAD_COUNTERS	= 0xD5,
+	NVME_INTEL_FEAT_LATENCY_TRACKING		= 0xE2,
+};
+
+enum nvme_intel_set_max_lba_command_status_code {
+	NVME_INTEL_EXCEEDS_AVAILABLE_CAPACITY		= 0xC0,
+	NVME_INTEL_SMALLER_THAN_MIN_LIMIT		= 0xC1,
+	NVME_INTEL_SMALLER_THAN_NS_REQUIREMENTS		= 0xC2,
+};
+
+enum nvme_intel_log_page {
+	NVME_INTEL_LOG_PAGE_DIRECTORY		= 0xC0,
+	NVME_INTEL_LOG_READ_CMD_LATENCY		= 0xC1,
+	NVME_INTEL_LOG_WRITE_CMD_LATENCY	= 0xC2,
+	NVME_INTEL_LOG_TEMPERATURE		= 0xC5,
+	NVME_INTEL_LOG_SMART			= 0xCA,
+};
+
+enum nvme_intel_smart_attribute_code {
+	NVME_INTEL_SMART_PROGRAM_FAIL_COUNT			= 0xAB,
+	NVME_INTEL_SMART_ERASE_FAIL_COUNT			= 0xAC,
+	NVME_INTEL_SMART_WEAR_LEVELING_COUNT			= 0xAD,
+	NVME_INTEL_SMART_E2E_ERROR_COUNT			= 0xB8,
+	NVME_INTEL_SMART_CRC_ERROR_COUNT			= 0xC7,
+	NVME_INTEL_SMART_MEDIA_WEAR				= 0xE2,
+	NVME_INTEL_SMART_HOST_READ_PERCENTAGE			= 0xE3,
+	NVME_INTEL_SMART_TIMER					= 0xE4,
+	NVME_INTEL_SMART_THERMAL_THROTTLE_STATUS		= 0xEA,
+	NVME_INTEL_SMART_RETRY_BUFFER_OVERFLOW_COUNTER		= 0xF0,
+	NVME_INTEL_SMART_PLL_LOCK_LOSS_COUNT			= 0xF3,
+	NVME_INTEL_SMART_NAND_BYTES_WRITTEN			= 0xF4,
+	NVME_INTEL_SMART_HOST_BYTES_WRITTEN			= 0xF5,
+};
+
+struct nvme_intel_log_page_directory {
+	uint8_t		version[2];
+	uint8_t		reserved[384];
+	uint8_t		read_latency_log_len;
+	uint8_t		reserved2;
+	uint8_t		write_latency_log_len;
+	uint8_t		reserved3[5];
+	uint8_t		temperature_statistics_log_len;
+	uint8_t		reserved4[9];
+	uint8_t		smart_log_len;
+	uint8_t		reserved5[107];
+};
+SPDK_STATIC_ASSERT(sizeof(struct nvme_intel_log_page_directory) == 512, "Incorrect size");
+
+struct nvme_intel_rw_latency_page {
+	uint16_t		major_revison;
+	uint16_t		minor_revison;
+	uint32_t		buckets_32us[32];
+	uint32_t		buckets_1ms[31];
+	uint32_t		buckets_32ms[31];
+};
+SPDK_STATIC_ASSERT(sizeof(struct nvme_intel_rw_latency_page) == 380, "Incorrect size");
+
+struct nvme_intel_temperature_page {
+	uint64_t		current_temperature;
+	uint64_t		shutdown_flag_last;
+	uint64_t		shutdown_flag_life;
+	uint64_t		highest_temperature;
+	uint64_t		lowest_temperature;
+	uint64_t		reserved[5];
+	uint64_t		specified_max_op_temperature;
+	uint64_t		reserved2;
+	uint64_t		specified_min_op_temperature;
+	uint64_t		estimated_offset;
+};
+SPDK_STATIC_ASSERT(sizeof(struct nvme_intel_temperature_page) == 112, "Incorrect size");
+
+struct nvme_intel_smart_attribute {
+	uint8_t			code;
+	uint8_t			reserved[2];
+	uint8_t			normalized_value;
+	uint8_t			reserved2;
+	uint8_t			raw_value[6];
+	uint8_t			reserved3;
+};
+
+struct __attribute__((packed)) nvme_intel_smart_information_page {
+	struct nvme_intel_smart_attribute	nvme_intel_smart_attributes[13];
+};
+SPDK_STATIC_ASSERT(sizeof(struct nvme_intel_smart_information_page) == 156, "Incorrect size");
+
+union nvme_intel_power_governor_feature {
+	uint32_t	raw;
+	struct {
+		/** power governor setting : 00h = 25W 01h = 20W 02h = 10W */
+		uint32_t power_governor_setting		: 8;
+		uint32_t reserved	: 24;
+	} bits;
+};
+SPDK_STATIC_ASSERT(sizeof(union nvme_intel_power_governor_feature) == 4, "Incorrect size");
+
+union nvme_intel_smbus_address_feature {
+	uint32_t	raw;
+	struct {
+		uint32_t reserved	: 1;
+		uint32_t smbus_controller_address	: 8;
+		uint32_t reserved2	: 23;
+	} bits;
+};
+SPDK_STATIC_ASSERT(sizeof(union nvme_intel_smbus_address_feature) == 4, "Incorrect size");
+
+union nvme_intel_led_pattern_feature {
+	uint32_t	raw;
+	struct {
+		uint32_t feature_options	: 24;
+		uint32_t value	: 8;
+	} bits;
+};
+SPDK_STATIC_ASSERT(sizeof(union nvme_intel_led_pattern_feature) == 4, "Incorrect size");
+
+union nvme_intel_reset_timed_workload_counters_feature {
+	uint32_t	raw;
+	struct {
+		/**
+		 * Write Usage: 00 = NOP, 1 = Reset E2, E3,E4 counters;
+		 * Read Usage: Not Supported
+		 */
+		uint32_t reset	: 1;
+		uint32_t reserved	: 31;
+	} bits;
+};
+SPDK_STATIC_ASSERT(sizeof(union nvme_intel_reset_timed_workload_counters_feature) == 4,
+		   "Incorrect size");
+
+union nvme_intel_enable_latency_tracking_feature {
+	uint32_t	raw;
+	struct {
+		/**
+		 * Write Usage:
+		 * 00h = Disable Latency Tracking (Default)
+		 * 01h = Enable Latency Tracking
+		 */
+		uint32_t enable	: 32;
+	} bits;
+};
+SPDK_STATIC_ASSERT(sizeof(union nvme_intel_enable_latency_tracking_feature) == 4, "Incorrect size");
+#endif
diff --git a/src/spdk/include/spdk/nvme_spec.h b/src/spdk/include/spdk/nvme_spec.h
new file mode 100644
index 0000000..52f6304
--- /dev/null
+++ b/src/spdk/include/spdk/nvme_spec.h
@@ -0,0 +1,1114 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright (c) Intel Corporation.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef SPDK_NVME_SPEC_H
+#define SPDK_NVME_SPEC_H
+
+#include <stdint.h>
+#include <stddef.h>
+
+#include "spdk/assert.h"
+
+/**
+ * \file
+ *
+ */
+
+/**
+ * PCI class code for NVMe devices.
+ *
+ * Base class code 01h: mass storage
+ * Subclass code 08h: non-volatile memory
+ * Programming interface 02h: NVM Express
+ */
+#define NVME_CLASS_CODE 0x10802
+
+/**
+ * Use to mark a command to apply to all namespaces, or to retrieve global
+ *  log pages.
+ */
+#define NVME_GLOBAL_NAMESPACE_TAG	((uint32_t)0xFFFFFFFF)
+
+#define NVME_MAX_IO_QUEUES		(1 << 16)
+
+/**
+ * Indicates the maximum number of range sets that may be specified
+ *  in the dataset mangement command.
+ */
+#define NVME_DATASET_MANAGEMENT_MAX_RANGES	256
+
+union nvme_cap_lo_register {
+	uint32_t	raw;
+	struct {
+		/** maximum queue entries supported */
+		uint32_t mqes		: 16;
+
+		/** contiguous queues required */
+		uint32_t cqr		: 1;
+
+		/** arbitration mechanism supported */
+		uint32_t ams		: 2;
+
+		uint32_t reserved1	: 5;
+
+		/** timeout */
+		uint32_t to		: 8;
+	} bits;
+};
+SPDK_STATIC_ASSERT(sizeof(union nvme_cap_lo_register) == 4, "Incorrect size");
+
+union nvme_cap_hi_register {
+	uint32_t	raw;
+	struct {
+		/** doorbell stride */
+		uint32_t dstrd		: 4;
+
+		uint32_t reserved3	: 1;
+
+		/** command sets supported */
+		uint32_t css_nvm	: 1;
+
+		uint32_t css_reserved	: 3;
+		uint32_t reserved2	: 7;
+
+		/** memory page size minimum */
+		uint32_t mpsmin		: 4;
+
+		/** memory page size maximum */
+		uint32_t mpsmax		: 4;
+
+		uint32_t reserved1	: 8;
+	} bits;
+};
+SPDK_STATIC_ASSERT(sizeof(union nvme_cap_hi_register) == 4, "Incorrect size");
+
+union nvme_cc_register {
+	uint32_t	raw;
+	struct {
+		/** enable */
+		uint32_t en		: 1;
+
+		uint32_t reserved1	: 3;
+
+		/** i/o command set selected */
+		uint32_t css		: 3;
+
+		/** memory page size */
+		uint32_t mps		: 4;
+
+		/** arbitration mechanism selected */
+		uint32_t ams		: 3;
+
+		/** shutdown notification */
+		uint32_t shn		: 2;
+
+		/** i/o submission queue entry size */
+		uint32_t iosqes		: 4;
+
+		/** i/o completion queue entry size */
+		uint32_t iocqes		: 4;
+
+		uint32_t reserved2	: 8;
+	} bits;
+};
+SPDK_STATIC_ASSERT(sizeof(union nvme_cc_register) == 4, "Incorrect size");
+
+enum nvme_shn_value {
+	NVME_SHN_NORMAL		= 0x1,
+	NVME_SHN_ABRUPT		= 0x2,
+};
+
+union nvme_csts_register {
+	uint32_t	raw;
+	struct {
+		/** ready */
+		uint32_t rdy		: 1;
+
+		/** controller fatal status */
+		uint32_t cfs		: 1;
+
+		/** shutdown status */
+		uint32_t shst		: 2;
+
+		uint32_t reserved1	: 28;
+	} bits;
+};
+SPDK_STATIC_ASSERT(sizeof(union nvme_csts_register) == 4, "Incorrect size");
+
+enum nvme_shst_value {
+	NVME_SHST_NORMAL	= 0x0,
+	NVME_SHST_OCCURRING	= 0x1,
+	NVME_SHST_COMPLETE	= 0x2,
+};
+
+union nvme_aqa_register {
+	uint32_t	raw;
+	struct {
+		/** admin submission queue size */
+		uint32_t asqs		: 12;
+
+		uint32_t reserved1	: 4;
+
+		/** admin completion queue size */
+		uint32_t acqs		: 12;
+
+		uint32_t reserved2	: 4;
+	} bits;
+};
+SPDK_STATIC_ASSERT(sizeof(union nvme_aqa_register) == 4, "Incorrect size");
+
+struct nvme_registers {
+	/** controller capabilities */
+	union nvme_cap_lo_register	cap_lo;
+	union nvme_cap_hi_register	cap_hi;
+
+	uint32_t	vs;		/* version */
+	uint32_t	intms;		/* interrupt mask set */
+	uint32_t	intmc;		/* interrupt mask clear */
+
+	/** controller configuration */
+	union nvme_cc_register	cc;
+
+	uint32_t	reserved1;
+	uint32_t	csts;		/* controller status */
+	uint32_t	nssr;		/* NVM subsystem reset */
+
+	/** admin queue attributes */
+	union nvme_aqa_register	aqa;
+
+	uint64_t	asq;		/* admin submission queue base addr */
+	uint64_t	acq;		/* admin completion queue base addr */
+	uint32_t	reserved3[0x3f2];
+
+	struct {
+		uint32_t	sq_tdbl;	/* submission queue tail doorbell */
+		uint32_t	cq_hdbl;	/* completion queue head doorbell */
+	} doorbell[1];
+};
+
+/* NVMe controller register space offsets */
+SPDK_STATIC_ASSERT(0x00 == offsetof(struct nvme_registers, cap_lo), "Incorrect register offset");
+SPDK_STATIC_ASSERT(0x08 == offsetof(struct nvme_registers, vs), "Incorrect register offset");
+SPDK_STATIC_ASSERT(0x0C == offsetof(struct nvme_registers, intms), "Incorrect register offset");
+SPDK_STATIC_ASSERT(0x10 == offsetof(struct nvme_registers, intmc), "Incorrect register offset");
+SPDK_STATIC_ASSERT(0x14 == offsetof(struct nvme_registers, cc), "Incorrect register offset");
+SPDK_STATIC_ASSERT(0x1C == offsetof(struct nvme_registers, csts), "Incorrect register offset");
+SPDK_STATIC_ASSERT(0x20 == offsetof(struct nvme_registers, nssr), "Incorrect register offset");
+SPDK_STATIC_ASSERT(0x24 == offsetof(struct nvme_registers, aqa), "Incorrect register offset");
+SPDK_STATIC_ASSERT(0x28 == offsetof(struct nvme_registers, asq), "Incorrect register offset");
+SPDK_STATIC_ASSERT(0x30 == offsetof(struct nvme_registers, acq), "Incorrect register offset");
+
+enum nvme_sgl_descriptor_type {
+	NVME_SGL_TYPE_DATA_BLOCK	= 0x0,
+	NVME_SGL_TYPE_BIT_BUCKET	= 0x1,
+	NVME_SGL_TYPE_SEGMENT		= 0x2,
+	NVME_SGL_TYPE_LAST_SEGMENT	= 0x3,
+	/* 0x4 - 0xe reserved */
+	NVME_SGL_TYPE_VENDOR_SPECIFIC	= 0xf
+};
+
+struct __attribute__((packed)) nvme_sgl_descriptor {
+	uint64_t address;
+	uint32_t length;
+	uint8_t reserved[3];
+
+	/** SGL descriptor type */
+	uint8_t type : 4;
+
+	/** SGL descriptor type specific */
+	uint8_t type_specific : 4;
+};
+SPDK_STATIC_ASSERT(sizeof(struct nvme_sgl_descriptor) == 16, "Incorrect size");
+
+enum nvme_psdt_value {
+	NVME_PSDT_PRP			= 0x0,
+	NVME_PSDT_SGL_MPTR_CONTIG	= 0x1,
+	NVME_PSDT_SGL_MPTR_SGL		= 0x2,
+	NVME_PSDT_RESERVED		= 0x3
+};
+
+struct nvme_command {
+	/* dword 0 */
+	uint16_t opc	:  8;	/* opcode */
+	uint16_t fuse	:  2;	/* fused operation */
+	uint16_t rsvd1	:  4;
+	uint16_t psdt	:  2;
+	uint16_t cid;		/* command identifier */
+
+	/* dword 1 */
+	uint32_t nsid;		/* namespace identifier */
+
+	/* dword 2-3 */
+	uint32_t rsvd2;
+	uint32_t rsvd3;
+
+	/* dword 4-5 */
+	uint64_t mptr;		/* metadata pointer */
+
+	/* dword 6-9: data pointer */
+	union {
+		struct {
+			uint64_t prp1;		/* prp entry 1 */
+			uint64_t prp2;		/* prp entry 2 */
+		} prp;
+
+		struct nvme_sgl_descriptor sgl1;
+	} dptr;
+
+	/* dword 10-15 */
+	uint32_t cdw10;		/* command-specific */
+	uint32_t cdw11;		/* command-specific */
+	uint32_t cdw12;		/* command-specific */
+	uint32_t cdw13;		/* command-specific */
+	uint32_t cdw14;		/* command-specific */
+	uint32_t cdw15;		/* command-specific */
+};
+SPDK_STATIC_ASSERT(sizeof(struct nvme_command) == 64, "Incorrect size");
+
+struct nvme_status {
+	uint16_t p	:  1;	/* phase tag */
+	uint16_t sc	:  8;	/* status code */
+	uint16_t sct	:  3;	/* status code type */
+	uint16_t rsvd2	:  2;
+	uint16_t m	:  1;	/* more */
+	uint16_t dnr	:  1;	/* do not retry */
+};
+SPDK_STATIC_ASSERT(sizeof(struct nvme_status) == 2, "Incorrect size");
+
+struct nvme_completion {
+	/* dword 0 */
+	uint32_t		cdw0;	/* command-specific */
+
+	/* dword 1 */
+	uint32_t		rsvd1;
+
+	/* dword 2 */
+	uint16_t		sqhd;	/* submission queue head pointer */
+	uint16_t		sqid;	/* submission queue identifier */
+
+	/* dword 3 */
+	uint16_t		cid;	/* command identifier */
+	struct nvme_status	status;
+};
+SPDK_STATIC_ASSERT(sizeof(struct nvme_completion) == 16, "Incorrect size");
+
+struct nvme_dsm_range {
+	uint32_t attributes;
+	uint32_t length;
+	uint64_t starting_lba;
+};
+SPDK_STATIC_ASSERT(sizeof(struct nvme_dsm_range) == 16, "Incorrect size");
+
+/* status code types */
+enum nvme_status_code_type {
+	NVME_SCT_GENERIC		= 0x0,
+	NVME_SCT_COMMAND_SPECIFIC	= 0x1,
+	NVME_SCT_MEDIA_ERROR		= 0x2,
+	/* 0x3-0x6 - reserved */
+	NVME_SCT_VENDOR_SPECIFIC	= 0x7,
+};
+
+/* generic command status codes */
+enum nvme_generic_command_status_code {
+	NVME_SC_SUCCESS				= 0x00,
+	NVME_SC_INVALID_OPCODE			= 0x01,
+	NVME_SC_INVALID_FIELD			= 0x02,
+	NVME_SC_COMMAND_ID_CONFLICT		= 0x03,
+	NVME_SC_DATA_TRANSFER_ERROR		= 0x04,
+	NVME_SC_ABORTED_POWER_LOSS		= 0x05,
+	NVME_SC_INTERNAL_DEVICE_ERROR		= 0x06,
+	NVME_SC_ABORTED_BY_REQUEST		= 0x07,
+	NVME_SC_ABORTED_SQ_DELETION		= 0x08,
+	NVME_SC_ABORTED_FAILED_FUSED		= 0x09,
+	NVME_SC_ABORTED_MISSING_FUSED		= 0x0a,
+	NVME_SC_INVALID_NAMESPACE_OR_FORMAT	= 0x0b,
+	NVME_SC_COMMAND_SEQUENCE_ERROR		= 0x0c,
+
+	NVME_SC_LBA_OUT_OF_RANGE		= 0x80,
+	NVME_SC_CAPACITY_EXCEEDED		= 0x81,
+	NVME_SC_NAMESPACE_NOT_READY		= 0x82,
+};
+
+/* command specific status codes */
+enum nvme_command_specific_status_code {
+	NVME_SC_COMPLETION_QUEUE_INVALID	= 0x00,
+	NVME_SC_INVALID_QUEUE_IDENTIFIER	= 0x01,
+	NVME_SC_MAXIMUM_QUEUE_SIZE_EXCEEDED	= 0x02,
+	NVME_SC_ABORT_COMMAND_LIMIT_EXCEEDED	= 0x03,
+	/* 0x04 - reserved */
+	NVME_SC_ASYNC_EVENT_REQUEST_LIMIT_EXCEEDED = 0x05,
+	NVME_SC_INVALID_FIRMWARE_SLOT		= 0x06,
+	NVME_SC_INVALID_FIRMWARE_IMAGE		= 0x07,
+	NVME_SC_INVALID_INTERRUPT_VECTOR	= 0x08,
+	NVME_SC_INVALID_LOG_PAGE		= 0x09,
+	NVME_SC_INVALID_FORMAT			= 0x0a,
+	NVME_SC_FIRMWARE_REQUIRES_RESET		= 0x0b,
+
+	NVME_SC_CONFLICTING_ATTRIBUTES		= 0x80,
+	NVME_SC_INVALID_PROTECTION_INFO		= 0x81,
+	NVME_SC_ATTEMPTED_WRITE_TO_RO_PAGE	= 0x82,
+};
+
+/* media error status codes */
+enum nvme_media_error_status_code {
+	NVME_SC_WRITE_FAULTS			= 0x80,
+	NVME_SC_UNRECOVERED_READ_ERROR		= 0x81,
+	NVME_SC_GUARD_CHECK_ERROR		= 0x82,
+	NVME_SC_APPLICATION_TAG_CHECK_ERROR	= 0x83,
+	NVME_SC_REFERENCE_TAG_CHECK_ERROR	= 0x84,
+	NVME_SC_COMPARE_FAILURE			= 0x85,
+	NVME_SC_ACCESS_DENIED			= 0x86,
+};
+
+/* admin opcodes */
+enum nvme_admin_opcode {
+	NVME_OPC_DELETE_IO_SQ			= 0x00,
+	NVME_OPC_CREATE_IO_SQ			= 0x01,
+	NVME_OPC_GET_LOG_PAGE			= 0x02,
+	/* 0x03 - reserved */
+	NVME_OPC_DELETE_IO_CQ			= 0x04,
+	NVME_OPC_CREATE_IO_CQ			= 0x05,
+	NVME_OPC_IDENTIFY			= 0x06,
+	/* 0x07 - reserved */
+	NVME_OPC_ABORT				= 0x08,
+	NVME_OPC_SET_FEATURES			= 0x09,
+	NVME_OPC_GET_FEATURES			= 0x0a,
+	/* 0x0b - reserved */
+	NVME_OPC_ASYNC_EVENT_REQUEST		= 0x0c,
+	NVME_OPC_NAMESPACE_MANAGEMENT		= 0x0d,
+	/* 0x0e-0x0f - reserved */
+	NVME_OPC_FIRMWARE_COMMIT		= 0x10,
+	NVME_OPC_FIRMWARE_IMAGE_DOWNLOAD	= 0x11,
+
+	NVME_OPC_NAMESPACE_ATTACHMENT		= 0x15,
+
+	NVME_OPC_FORMAT_NVM			= 0x80,
+	NVME_OPC_SECURITY_SEND			= 0x81,
+	NVME_OPC_SECURITY_RECEIVE		= 0x82,
+};
+
+/* nvme nvm opcodes */
+enum nvme_nvm_opcode {
+	NVME_OPC_FLUSH				= 0x00,
+	NVME_OPC_WRITE				= 0x01,
+	NVME_OPC_READ				= 0x02,
+	/* 0x03 - reserved */
+	NVME_OPC_WRITE_UNCORRECTABLE		= 0x04,
+	NVME_OPC_COMPARE			= 0x05,
+	/* 0x06-0x07 - reserved */
+	NVME_OPC_WRITE_ZEROES			= 0x08,
+	NVME_OPC_DATASET_MANAGEMENT		= 0x09,
+
+	NVME_OPC_RESERVATION_REGISTER		= 0x0d,
+	NVME_OPC_RESERVATION_REPORT		= 0x0e,
+
+	NVME_OPC_RESERVATION_ACQUIRE		= 0x11,
+	NVME_OPC_RESERVATION_RELEASE		= 0x15,
+};
+
+enum nvme_feature {
+	/* 0x00 - reserved */
+	NVME_FEAT_ARBITRATION			= 0x01,
+	NVME_FEAT_POWER_MANAGEMENT		= 0x02,
+	NVME_FEAT_LBA_RANGE_TYPE		= 0x03,
+	NVME_FEAT_TEMPERATURE_THRESHOLD		= 0x04,
+	NVME_FEAT_ERROR_RECOVERY		= 0x05,
+	NVME_FEAT_VOLATILE_WRITE_CACHE		= 0x06,
+	NVME_FEAT_NUMBER_OF_QUEUES		= 0x07,
+	NVME_FEAT_INTERRUPT_COALESCING		= 0x08,
+	NVME_FEAT_INTERRUPT_VECTOR_CONFIGURATION = 0x09,
+	NVME_FEAT_WRITE_ATOMICITY		= 0x0A,
+	NVME_FEAT_ASYNC_EVENT_CONFIGURATION	= 0x0B,
+	NVME_FEAT_AUTONOMOUS_POWER_STATE_TRANSITION	= 0x0C,
+	NVME_FEAT_HOST_MEM_BUFFER		= 0x0D,
+	/* 0x0C-0x7F - reserved */
+	NVME_FEAT_SOFTWARE_PROGRESS_MARKER	= 0x80,
+	/* 0x81-0xBF - command set specific */
+	NVME_FEAT_HOST_IDENTIFIER		= 0x81,
+	NVME_FEAT_HOST_RESERVE_MASK		= 0x82,
+	NVME_FEAT_HOST_RESERVE_PERSIST		= 0x83,
+	/* 0xC0-0xFF - vendor specific */
+};
+
+enum nvme_dsm_attribute {
+	NVME_DSM_ATTR_INTEGRAL_READ		= 0x1,
+	NVME_DSM_ATTR_INTEGRAL_WRITE		= 0x2,
+	NVME_DSM_ATTR_DEALLOCATE		= 0x4,
+};
+
+struct nvme_power_state {
+	uint16_t mp;				/* bits 15:00: maximum power */
+
+	uint8_t reserved1;
+
+	uint8_t mps		: 1;		/* bit 24: max power scale */
+	uint8_t nops		: 1;		/* bit 25: non-operational state */
+	uint8_t reserved2	: 6;
+
+	uint32_t enlat;				/* bits 63:32: entry latency in microseconds */
+	uint32_t exlat;				/* bits 95:64: exit latency in microseconds */
+
+	uint8_t rrt		: 5;		/* bits 100:96: relative read throughput */
+	uint8_t reserved3	: 3;
+
+	uint8_t rrl		: 5;		/* bits 108:104: relative read latency */
+	uint8_t reserved4	: 3;
+
+	uint8_t rwt		: 5;		/* bits 116:112: relative write throughput */
+	uint8_t reserved5	: 3;
+
+	uint8_t rwl		: 5;		/* bits 124:120: relative write latency */
+	uint8_t reserved6	: 3;
+
+	uint8_t reserved7[16];
+};
+SPDK_STATIC_ASSERT(sizeof(struct nvme_power_state) == 32, "Incorrect size");
+
+struct __attribute__((packed)) nvme_controller_data {
+	/* bytes 0-255: controller capabilities and features */
+
+	/** pci vendor id */
+	uint16_t		vid;
+
+	/** pci subsystem vendor id */
+	uint16_t		ssvid;
+
+	/** serial number */
+	int8_t			sn[20];
+
+	/** model number */
+	int8_t			mn[40];
+
+	/** firmware revision */
+	uint8_t			fr[8];
+
+	/** recommended arbitration burst */
+	uint8_t			rab;
+
+	/** ieee oui identifier */
+	uint8_t			ieee[3];
+
+	/** multi-interface capabilities */
+	uint8_t			mic;
+
+	/** maximum data transfer size */
+	uint8_t			mdts;
+
+	/** controller id */
+	uint16_t		cntlid;
+
+	/** version */
+	uint32_t		ver;
+
+	/** RTD3 resume latency */
+	uint32_t		rtd3r;
+
+	/** RTD3 entry latency */
+	uint32_t		rtd3e;
+
+	/** optional asynchronous events supported */
+	uint32_t		oaes;
+
+	uint8_t			reserved1[160];
+
+	/* bytes 256-511: admin command set attributes */
+
+	/** optional admin command support */
+	struct {
+		/* supports security send/receive commands */
+		uint16_t	security  : 1;
+
+		/* supports format nvm command */
+		uint16_t	format    : 1;
+
+		/* supports firmware activate/download commands */
+		uint16_t	firmware  : 1;
+
+		uint16_t	oacs_rsvd : 13;
+	} oacs;
+
+	/** abort command limit */
+	uint8_t			acl;
+
+	/** asynchronous event request limit */
+	uint8_t			aerl;
+
+	/** firmware updates */
+	struct {
+		/* first slot is read-only */
+		uint8_t		slot1_ro  : 1;
+
+		/* number of firmware slots */
+		uint8_t		num_slots : 3;
+
+		uint8_t		frmw_rsvd : 4;
+	} frmw;
+
+	/** log page attributes */
+	struct {
+		/* per namespace smart/health log page */
+		uint8_t		ns_smart : 1;
+		/* command effects log page */
+		uint8_t		celp : 1;
+		uint8_t		lpa_rsvd : 6;
+	} lpa;
+
+	/** error log page entries */
+	uint8_t			elpe;
+
+	/** number of power states supported */
+	uint8_t			npss;
+
+	/** admin vendor specific command configuration */
+	struct {
+		/* admin vendor specific commands use disk format */
+		uint8_t		spec_format : 1;
+
+		uint8_t		avscc_rsvd  : 7;
+	} avscc;
+
+	/** autonomous power state transition attributes */
+	struct {
+		/** controller supports autonomous power state transitions */
+		uint8_t		supported  : 1;
+
+		uint8_t		apsta_rsvd : 7;
+	} apsta;
+
+	/** warning composite temperature threshold */
+	uint16_t		wctemp;
+
+	/** critical composite temperature threshold */
+	uint16_t		cctemp;
+
+	/** maximum time for firmware activation */
+	uint16_t		mtfa;
+
+	/** host memory buffer preferred size */
+	uint32_t		hmpre;
+
+	/** host memory buffer minimum size */
+	uint32_t		hmmin;
+
+	/** total NVM capacity */
+	uint64_t		tnvmcap[2];
+
+	/** unallocated NVM capacity */
+	uint64_t		unvmcap[2];
+
+	/** replay protected memory block support */
+	struct {
+		uint8_t		num_rpmb_units	: 3;
+		uint8_t		auth_method	: 3;
+		uint8_t		reserved1	: 2;
+
+		uint8_t		reserved2;
+
+		uint8_t		total_size;
+		uint8_t		access_size;
+	} rpmbs;
+
+	uint8_t			reserved2[196];
+
+	/* bytes 512-703: nvm command set attributes */
+
+	/** submission queue entry size */
+	struct {
+		uint8_t		min : 4;
+		uint8_t		max : 4;
+	} sqes;
+
+	/** completion queue entry size */
+	struct {
+		uint8_t		min : 4;
+		uint8_t		max : 4;
+	} cqes;
+
+	uint8_t			reserved3[2];
+
+	/** number of namespaces */
+	uint32_t		nn;
+
+	/** optional nvm command support */
+	struct {
+		uint16_t	compare : 1;
+		uint16_t	write_unc : 1;
+		uint16_t	dsm: 1;
+		uint16_t	write_zeroes: 1;
+		uint16_t	set_features_save: 1;
+		uint16_t	reservations: 1;
+		uint16_t	reserved: 10;
+	} oncs;
+
+	/** fused operation support */
+	uint16_t		fuses;
+
+	/** format nvm attributes */
+	uint8_t			fna;
+
+	/** volatile write cache */
+	struct {
+		uint8_t		present : 1;
+		uint8_t		reserved : 7;
+	} vwc;
+
+	/** atomic write unit normal */
+	uint16_t		awun;
+
+	/** atomic write unit power fail */
+	uint16_t		awupf;
+
+	/** NVM vendor specific command configuration */
+	uint8_t			nvscc;
+
+	uint8_t			reserved531;
+
+	/** atomic compare & write unit */
+	uint16_t		acwu;
+
+	uint16_t		reserved534;
+
+	/** SGL support */
+	struct {
+		uint32_t	supported : 1;
+		uint32_t	reserved : 15;
+		uint32_t	bit_bucket_descriptor_supported : 1;
+		uint32_t	metadata_pointer_supported : 1;
+		uint32_t	oversized_sgl_supported : 1;
+	} sgls;
+
+	uint8_t			reserved4[164];
+
+	/* bytes 704-2047: i/o command set attributes */
+	uint8_t			reserved5[1344];
+
+	/* bytes 2048-3071: power state descriptors */
+	struct nvme_power_state	psd[32];
+
+	/* bytes 3072-4095: vendor specific */
+	uint8_t			vs[1024];
+};
+SPDK_STATIC_ASSERT(sizeof(struct nvme_controller_data) == 4096, "Incorrect size");
+
+struct nvme_namespace_data {
+	/** namespace size */
+	uint64_t		nsze;
+
+	/** namespace capacity */
+	uint64_t		ncap;
+
+	/** namespace utilization */
+	uint64_t		nuse;
+
+	/** namespace features */
+	struct {
+		/** thin provisioning */
+		uint8_t		thin_prov : 1;
+		uint8_t		reserved1 : 7;
+	} nsfeat;
+
+	/** number of lba formats */
+	uint8_t			nlbaf;
+
+	/** formatted lba size */
+	struct {
+		uint8_t		format    : 4;
+		uint8_t		extended  : 1;
+		uint8_t		reserved2 : 3;
+	} flbas;
+
+	/** metadata capabilities */
+	struct {
+		/** metadata can be transferred as part of data prp list */
+		uint8_t		extended  : 1;
+
+		/** metadata can be transferred with separate metadata pointer */
+		uint8_t		pointer   : 1;
+
+		/** reserved */
+		uint8_t		reserved3 : 6;
+	} mc;
+
+	/** end-to-end data protection capabilities */
+	struct {
+		/** protection information type 1 */
+		uint8_t		pit1     : 1;
+
+		/** protection information type 2 */
+		uint8_t		pit2     : 1;
+
+		/** protection information type 3 */
+		uint8_t		pit3     : 1;
+
+		/** first eight bytes of metadata */
+		uint8_t		md_start : 1;
+
+		/** last eight bytes of metadata */
+		uint8_t		md_end   : 1;
+	} dpc;
+
+	/** end-to-end data protection type settings */
+	struct {
+		/** protection information type */
+		uint8_t		pit       : 3;
+
+		/** 1 == protection info transferred at start of metadata */
+		/** 0 == protection info transferred at end of metadata */
+		uint8_t		md_start  : 1;
+
+		uint8_t		reserved4 : 4;
+	} dps;
+
+	/** namespace multi-path I/O and namespace sharing capabilities */
+	struct {
+		uint8_t		can_share : 1;
+		uint8_t		reserved : 7;
+	} nmic;
+
+	/** reservation capabilities */
+	union {
+		struct {
+			/** supports persist through power loss */
+			uint8_t		persist : 1;
+
+			/** supports write exclusive */
+			uint8_t		write_exclusive : 1;
+
+			/** supports exclusive access */
+			uint8_t		exclusive_access : 1;
+
+			/** supports write exclusive - registrants only */
+			uint8_t		write_exclusive_reg_only : 1;
+
+			/** supports exclusive access - registrants only */
+			uint8_t		exclusive_access_reg_only : 1;
+
+			/** supports write exclusive - all registrants */
+			uint8_t		write_exclusive_all_reg : 1;
+
+			/** supports exclusive access - all registrants */
+			uint8_t		exclusive_access_all_reg : 1;
+
+			uint8_t		reserved : 1;
+		} rescap;
+		uint8_t		raw;
+	} nsrescap;
+	/** format progress indicator */
+	uint8_t			fpi;
+
+	uint8_t			reserved33;
+
+	/** namespace atomic write unit normal */
+	uint16_t		nawun;
+
+	/** namespace atomic write unit power fail */
+	uint16_t		nawupf;
+
+	/** namespace atomic compare & write unit */
+	uint16_t		nacwu;
+
+	/** namespace atomic boundary size normal */
+	uint16_t		nabsn;
+
+	/** namespace atomic boundary offset */
+	uint16_t		nabo;
+
+	/** namespace atomic boundary size power fail */
+	uint16_t		nabspf;
+
+	uint16_t		reserved46;
+
+	/** NVM capacity */
+	uint64_t		nvmcap[2];
+
+	uint8_t			reserved64[40];
+
+	/** namespace globally unique identifier */
+	uint8_t			nguid[16];
+
+	/** IEEE extended unique identifier */
+	uint64_t		eui64;
+
+	/** lba format support */
+	struct {
+		/** metadata size */
+		uint32_t	ms	  : 16;
+
+		/** lba data size */
+		uint32_t	lbads	  : 8;
+
+		/** relative performance */
+		uint32_t	rp	  : 2;
+
+		uint32_t	reserved6 : 6;
+	} lbaf[16];
+
+	uint8_t			reserved6[192];
+
+	uint8_t			vendor_specific[3712];
+};
+SPDK_STATIC_ASSERT(sizeof(struct nvme_namespace_data) == 4096, "Incorrect size");
+
+/**
+ * Reservation Type Encoding
+ */
+enum nvme_reservation_type {
+	/* 0x00 - reserved */
+
+	/* Write Exclusive Reservation */
+	NVME_RESERVE_WRITE_EXCLUSIVE	= 0x1,
+
+	/* Exclusive Access Reservation */
+	NVME_RESERVE_EXCLUSIVE_ACCESS	= 0x2,
+
+	/* Write Exclusive - Registrants Only Reservation */
+	NVME_RESERVE_WRITE_EXCLUSIVE_REG_ONLY	= 0x3,
+
+	/* Exclusive Access - Registrants Only Reservation */
+	NVME_RESERVE_EXCLUSIVE_ACCESS_REG_ONLY	= 0x4,
+
+	/* Write Exclusive - All Registrants Reservation */
+	NVME_RESERVE_WRITE_EXCLUSIVE_ALL_REGS	= 0x5,
+
+	/* Exclusive Access - All Registrants Reservation */
+	NVME_RESERVE_EXCLUSIVE_ACCESS_ALL_REGS	= 0x6,
+
+	/* 0x7-0xFF - Reserved */
+};
+
+struct nvme_reservation_acquire_data {
+	/** current reservation key */
+	uint64_t		crkey;
+	/** preempt reservation key */
+	uint64_t		prkey;
+};
+SPDK_STATIC_ASSERT(sizeof(struct nvme_reservation_acquire_data) == 16, "Incorrect size");
+
+/**
+ * Reservation Acquire action
+ */
+enum nvme_reservation_acquire_action {
+	NVME_RESERVE_ACQUIRE		= 0x0,
+	NVME_RESERVE_PREEMPT		= 0x1,
+	NVME_RESERVE_PREEMPT_ABORT	= 0x2,
+};
+
+struct __attribute__((packed)) nvme_reservation_status_data {
+	/** reservation action generation counter */
+	uint32_t		generation;
+	/** reservation type */
+	uint8_t			type;
+	/** number of registered controllers */
+	uint16_t		nr_regctl;
+	uint16_t		reserved1;
+	/** persist through power loss state */
+	uint8_t			ptpl_state;
+	uint8_t			reserved[14];
+};
+SPDK_STATIC_ASSERT(sizeof(struct nvme_reservation_status_data) == 24, "Incorrect size");
+
+struct __attribute__((packed)) nvme_reservation_controller_data {
+	uint16_t		ctrlr_id;
+	/** reservation status */
+	struct {
+		uint8_t		status    : 1;
+		uint8_t		reserved1 : 7;
+	} rcsts;
+	uint8_t			reserved2[5];
+	/** host identifier */
+	uint64_t		host_id;
+	/** reservation key */
+	uint64_t		key;
+};
+SPDK_STATIC_ASSERT(sizeof(struct nvme_reservation_controller_data) == 24, "Incorrect size");
+
+/**
+ * Change persist through power loss state for
+ *  Reservation Register command
+ */
+enum nvme_reservation_register_cptpl {
+	NVME_RESERVE_PTPL_NO_CHANGES		= 0x0,
+	NVME_RESERVE_PTPL_CLEAR_POWER_ON	= 0x2,
+	NVME_RESERVE_PTPL_PERSIST_POWER_LOSS	= 0x3,
+};
+
+/**
+ * Registration action for Reservation Register command
+ */
+enum nvme_reservation_register_action {
+	NVME_RESERVE_REGISTER_KEY	= 0x0,
+	NVME_RESERVE_UNREGISTER_KEY	= 0x1,
+	NVME_RESERVE_REPLACE_KEY	= 0x2,
+};
+
+struct nvme_reservation_register_data {
+	/** current reservation key */
+	uint64_t		crkey;
+	/** new reservation key */
+	uint64_t		nrkey;
+};
+SPDK_STATIC_ASSERT(sizeof(struct nvme_reservation_register_data) == 16, "Incorrect size");
+
+struct nvme_reservation_key_data {
+	/** current reservation key */
+	uint64_t		crkey;
+};
+SPDK_STATIC_ASSERT(sizeof(struct nvme_reservation_key_data) == 8, "Incorrect size");
+
+/**
+ * Reservation Release action
+ */
+enum nvme_reservation_release_action {
+	NVME_RESERVE_RELEASE		= 0x0,
+	NVME_RESERVE_CLEAR		= 0x1,
+};
+
+/**
+ * Log page identifiers for NVME_OPC_GET_LOG_PAGE
+ */
+enum nvme_log_page {
+	/* 0x00 - reserved */
+
+	/** Error information (mandatory) - \ref nvme_error_information_entry */
+	NVME_LOG_ERROR			= 0x01,
+
+	/** SMART / health information (mandatory) - \ref nvme_health_information_page */
+	NVME_LOG_HEALTH_INFORMATION	= 0x02,
+
+	/** Firmware slot information (mandatory) - \ref nvme_firmware_page */
+	NVME_LOG_FIRMWARE_SLOT		= 0x03,
+
+	/** Changed namespace list (optional) */
+	NVME_LOG_CHANGED_NS_LIST	= 0x04,
+
+	/** Command effects log (optional) */
+	NVME_LOG_COMMAND_EFFECTS_LOG	= 0x05,
+
+	/* 0x06-0x7F - reserved */
+
+	/** Reservation notification (optional) */
+	NVME_LOG_RESERVATION_NOTIFICATION	= 0x80,
+
+	/* 0x81-0xBF - I/O command set specific */
+
+	/* 0xC0-0xFF - vendor specific */
+};
+
+/**
+ * Error information log page (\ref NVME_LOG_ERROR)
+ */
+struct nvme_error_information_entry {
+	uint64_t		error_count;
+	uint16_t		sqid;
+	uint16_t		cid;
+	struct nvme_status	status;
+	uint16_t		error_location;
+	uint64_t		lba;
+	uint32_t		nsid;
+	uint8_t			vendor_specific;
+	uint8_t			reserved[35];
+};
+SPDK_STATIC_ASSERT(sizeof(struct nvme_error_information_entry) == 64, "Incorrect size");
+
+union nvme_critical_warning_state {
+	uint8_t		raw;
+
+	struct {
+		uint8_t	available_spare		: 1;
+		uint8_t	temperature		: 1;
+		uint8_t	device_reliability	: 1;
+		uint8_t	read_only		: 1;
+		uint8_t	volatile_memory_backup	: 1;
+		uint8_t	reserved		: 3;
+	} bits;
+};
+SPDK_STATIC_ASSERT(sizeof(union nvme_critical_warning_state) == 1, "Incorrect size");
+
+/**
+ * SMART / health information page (\ref NVME_LOG_HEALTH_INFORMATION)
+ */
+struct __attribute__((packed)) nvme_health_information_page {
+	union nvme_critical_warning_state	critical_warning;
+
+	uint16_t		temperature;
+	uint8_t			available_spare;
+	uint8_t			available_spare_threshold;
+	uint8_t			percentage_used;
+
+	uint8_t			reserved[26];
+
+	/*
+	 * Note that the following are 128-bit values, but are
+	 *  defined as an array of 2 64-bit values.
+	 */
+	/* Data Units Read is always in 512-byte units. */
+	uint64_t		data_units_read[2];
+	/* Data Units Written is always in 512-byte units. */
+	uint64_t		data_units_written[2];
+	/* For NVM command set, this includes Compare commands. */
+	uint64_t		host_read_commands[2];
+	uint64_t		host_write_commands[2];
+	/* Controller Busy Time is reported in minutes. */
+	uint64_t		controller_busy_time[2];
+	uint64_t		power_cycles[2];
+	uint64_t		power_on_hours[2];
+	uint64_t		unsafe_shutdowns[2];
+	uint64_t		media_errors[2];
+	uint64_t		num_error_info_log_entries[2];
+
+	uint8_t			reserved2[320];
+};
+SPDK_STATIC_ASSERT(sizeof(struct nvme_health_information_page) == 512, "Incorrect size");
+
+/**
+ * Firmware slot information page (\ref NVME_LOG_FIRMWARE_SLOT)
+ */
+struct nvme_firmware_page {
+	struct {
+		uint8_t	slot		: 3; /* slot for current FW */
+		uint8_t	reserved	: 5;
+	} afi;
+
+	uint8_t			reserved[7];
+	uint64_t		revision[7]; /* revisions for 7 slots */
+	uint8_t			reserved2[448];
+};
+SPDK_STATIC_ASSERT(sizeof(struct nvme_firmware_page) == 512, "Incorrect size");
+
+#define nvme_completion_is_error(cpl)					\
+	((cpl)->status.sc != 0 || (cpl)->status.sct != 0)
+
+#define NVME_IO_FLAGS_FORCE_UNIT_ACCESS (1U << 30)
+#define NVME_IO_FLAGS_LIMITED_RETRY (1U << 31)
+
+#endif
diff --git a/src/spdk/include/spdk/pci.h b/src/spdk/include/spdk/pci.h
new file mode 100644
index 0000000..13401d9
--- /dev/null
+++ b/src/spdk/include/spdk/pci.h
@@ -0,0 +1,55 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright (c) Intel Corporation.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef SPDK_PCI_H
+#define SPDK_PCI_H
+
+#define spdk_pci_device_get_domain(dev)	(dev->domain)
+#define spdk_pci_device_get_bus(dev)	(dev->bus)
+#define spdk_pci_device_get_dev(dev)	(dev->dev)
+#define spdk_pci_device_get_func(dev)	(dev->func)
+#define spdk_pci_device_get_vendor_id(dev) (dev->vendor_id)
+#define spdk_pci_device_get_device_id(dev) (dev->device_id)
+
+#define PCI_CFG_SIZE		256
+#define PCI_EXT_CAP_ID_SN	0x03
+#define PCI_UIO_DRIVER		"uio_pci_generic"
+
+int pci_device_get_serial_number(struct pci_device *dev, char *sn, int len);
+int pci_device_has_non_uio_driver(struct pci_device *dev);
+int pci_device_unbind_kernel_driver(struct pci_device *dev);
+int pci_device_bind_uio_driver(struct pci_device *dev, char *driver_name);
+int pci_device_switch_to_uio_driver(struct pci_device *pci_dev);
+int pci_device_claim(struct pci_device *dev);
+
+#endif
diff --git a/src/spdk/include/spdk/pci_ids.h b/src/spdk/include/spdk/pci_ids.h
new file mode 100644
index 0000000..ab377fe
--- /dev/null
+++ b/src/spdk/include/spdk/pci_ids.h
@@ -0,0 +1,39 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright (c) Intel Corporation.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __PCI_IDS_H__
+#define __PCI_IDS_H__
+
+#define PCI_VENDOR_ID_INTEL	0x8086
+
+#endif /* __PCI_IDS_H__ */
diff --git a/src/spdk/include/spdk/queue.h b/src/spdk/include/spdk/queue.h
new file mode 100644
index 0000000..2b27f55
--- /dev/null
+++ b/src/spdk/include/spdk/queue.h
@@ -0,0 +1,49 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright (c) Intel Corporation.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef SPDK_QUEUE_H
+#define SPDK_QUEUE_H
+
+#include <sys/cdefs.h>
+#include <sys/queue.h>
+
+/*
+ * The SPDK NVMe driver was originally ported from FreeBSD, which makes
+ *  use of features in FreeBSD's queue.h that do not exist on Linux.
+ *  Include a header with these additional features on Linux only.
+ */
+#ifndef __FreeBSD__
+#include <spdk/queue_extras.h>
+#endif
+
+#endif
diff --git a/src/spdk/include/spdk/queue_extras.h b/src/spdk/include/spdk/queue_extras.h
new file mode 100644
index 0000000..7fa14b7
--- /dev/null
+++ b/src/spdk/include/spdk/queue_extras.h
@@ -0,0 +1,341 @@
+/*-
+ * Copyright (c) 1991, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)queue.h	8.5 (Berkeley) 8/20/94
+ * $FreeBSD$
+ */
+
+#ifndef SPDK_QUEUE_EXTRAS_H
+#define SPDK_QUEUE_EXTRAS_H
+
+/*
+ * This file defines four types of data structures: singly-linked lists,
+ * singly-linked tail queues, lists and tail queues.
+ *
+ * A singly-linked list is headed by a single forward pointer. The elements
+ * are singly linked for minimum space and pointer manipulation overhead at
+ * the expense of O(n) removal for arbitrary elements. New elements can be
+ * added to the list after an existing element or at the head of the list.
+ * Elements being removed from the head of the list should use the explicit
+ * macro for this purpose for optimum efficiency. A singly-linked list may
+ * only be traversed in the forward direction.  Singly-linked lists are ideal
+ * for applications with large datasets and few or no removals or for
+ * implementing a LIFO queue.
+ *
+ * A singly-linked tail queue is headed by a pair of pointers, one to the
+ * head of the list and the other to the tail of the list. The elements are
+ * singly linked for minimum space and pointer manipulation overhead at the
+ * expense of O(n) removal for arbitrary elements. New elements can be added
+ * to the list after an existing element, at the head of the list, or at the
+ * end of the list. Elements being removed from the head of the tail queue
+ * should use the explicit macro for this purpose for optimum efficiency.
+ * A singly-linked tail queue may only be traversed in the forward direction.
+ * Singly-linked tail queues are ideal for applications with large datasets
+ * and few or no removals or for implementing a FIFO queue.
+ *
+ * A list is headed by a single forward pointer (or an array of forward
+ * pointers for a hash table header). The elements are doubly linked
+ * so that an arbitrary element can be removed without a need to
+ * traverse the list. New elements can be added to the list before
+ * or after an existing element or at the head of the list. A list
+ * may be traversed in either direction.
+ *
+ * A tail queue is headed by a pair of pointers, one to the head of the
+ * list and the other to the tail of the list. The elements are doubly
+ * linked so that an arbitrary element can be removed without a need to
+ * traverse the list. New elements can be added to the list before or
+ * after an existing element, at the head of the list, or at the end of
+ * the list. A tail queue may be traversed in either direction.
+ *
+ * For details on the use of these macros, see the queue(3) manual page.
+ *
+ *
+ *				SLIST	LIST	STAILQ	TAILQ
+ * _HEAD			+	+	+	+
+ * _HEAD_INITIALIZER		+	+	+	+
+ * _ENTRY			+	+	+	+
+ * _INIT			+	+	+	+
+ * _EMPTY			+	+	+	+
+ * _FIRST			+	+	+	+
+ * _NEXT			+	+	+	+
+ * _PREV			-	+	-	+
+ * _LAST			-	-	+	+
+ * _FOREACH			+	+	+	+
+ * _FOREACH_FROM		+	+	+	+
+ * _FOREACH_SAFE		+	+	+	+
+ * _FOREACH_FROM_SAFE		+	+	+	+
+ * _FOREACH_REVERSE		-	-	-	+
+ * _FOREACH_REVERSE_FROM	-	-	-	+
+ * _FOREACH_REVERSE_SAFE	-	-	-	+
+ * _FOREACH_REVERSE_FROM_SAFE	-	-	-	+
+ * _INSERT_HEAD			+	+	+	+
+ * _INSERT_BEFORE		-	+	-	+
+ * _INSERT_AFTER		+	+	+	+
+ * _INSERT_TAIL			-	-	+	+
+ * _CONCAT			-	-	+	+
+ * _REMOVE_AFTER		+	-	+	-
+ * _REMOVE_HEAD			+	-	+	-
+ * _REMOVE			+	+	+	+
+ * _SWAP			+	+	+	+
+ *
+ */
+
+/*
+ * Singly-linked Tail queue declarations.
+ */
+#define	STAILQ_HEAD(name, type)						\
+struct name {								\
+	struct type *stqh_first;/* first element */			\
+	struct type **stqh_last;/* addr of last next element */		\
+}
+
+#define	STAILQ_HEAD_INITIALIZER(head)					\
+	{ NULL, &(head).stqh_first }
+
+/*
+ * Singly-linked Tail queue functions.
+ */
+#define	STAILQ_EMPTY(head)	((head)->stqh_first == NULL)
+
+#define	STAILQ_FIRST(head)	((head)->stqh_first)
+
+#define	STAILQ_FOREACH_FROM(var, head, field)				\
+	for ((var) = ((var) ? (var) : STAILQ_FIRST((head)));		\
+	   (var);							\
+	   (var) = STAILQ_NEXT((var), field))
+
+#define	STAILQ_FOREACH_SAFE(var, head, field, tvar)			\
+	for ((var) = STAILQ_FIRST((head));				\
+	    (var) && ((tvar) = STAILQ_NEXT((var), field), 1);		\
+	    (var) = (tvar))
+
+#define	STAILQ_FOREACH_FROM_SAFE(var, head, field, tvar)		\
+	for ((var) = ((var) ? (var) : STAILQ_FIRST((head)));		\
+	    (var) && ((tvar) = STAILQ_NEXT((var), field), 1);		\
+	    (var) = (tvar))
+
+#define	STAILQ_LAST(head, type, field)					\
+	(STAILQ_EMPTY((head)) ? NULL :					\
+	    __containerof((head)->stqh_last, struct type, field.stqe_next))
+
+#define	STAILQ_NEXT(elm, field)	((elm)->field.stqe_next)
+
+#define STAILQ_REMOVE_AFTER(head, elm, field) do {			\
+	if ((STAILQ_NEXT(elm, field) =					\
+	     STAILQ_NEXT(STAILQ_NEXT(elm, field), field)) == NULL)	\
+		(head)->stqh_last = &STAILQ_NEXT((elm), field);		\
+} while (0)
+
+#define STAILQ_SWAP(head1, head2, type) do {				\
+	struct type *swap_first = STAILQ_FIRST(head1);			\
+	struct type **swap_last = (head1)->stqh_last;			\
+	STAILQ_FIRST(head1) = STAILQ_FIRST(head2);			\
+	(head1)->stqh_last = (head2)->stqh_last;			\
+	STAILQ_FIRST(head2) = swap_first;				\
+	(head2)->stqh_last = swap_last;					\
+	if (STAILQ_EMPTY(head1))					\
+		(head1)->stqh_last = &STAILQ_FIRST(head1);		\
+	if (STAILQ_EMPTY(head2))					\
+		(head2)->stqh_last = &STAILQ_FIRST(head2);		\
+} while (0)
+
+/*
+ * List declarations.
+ */
+#define	LIST_HEAD(name, type)						\
+struct name {								\
+	struct type *lh_first;	/* first element */			\
+}
+
+#define	LIST_HEAD_INITIALIZER(head)					\
+	{ NULL }
+
+#define	LIST_ENTRY(type)						\
+struct {								\
+	struct type *le_next;	/* next element */			\
+	struct type **le_prev;	/* address of previous next element */	\
+}
+
+/*
+ * List functions.
+ */
+
+#if (defined(_KERNEL) && defined(INVARIANTS))
+#define	QMD_LIST_CHECK_HEAD(head, field) do {				\
+	if (LIST_FIRST((head)) != NULL &&				\
+	    LIST_FIRST((head))->field.le_prev !=			\
+	     &LIST_FIRST((head)))					\
+		panic("Bad list head %p first->prev != head", (head));	\
+} while (0)
+
+#define	QMD_LIST_CHECK_NEXT(elm, field) do {				\
+	if (LIST_NEXT((elm), field) != NULL &&				\
+	    LIST_NEXT((elm), field)->field.le_prev !=			\
+	     &((elm)->field.le_next))					\
+	     	panic("Bad link elm %p next->prev != elm", (elm));	\
+} while (0)
+
+#define	QMD_LIST_CHECK_PREV(elm, field) do {				\
+	if (*(elm)->field.le_prev != (elm))				\
+		panic("Bad link elm %p prev->next != elm", (elm));	\
+} while (0)
+#else
+#define	QMD_LIST_CHECK_HEAD(head, field)
+#define	QMD_LIST_CHECK_NEXT(elm, field)
+#define	QMD_LIST_CHECK_PREV(elm, field)
+#endif /* (_KERNEL && INVARIANTS) */
+
+#define	LIST_EMPTY(head)	((head)->lh_first == NULL)
+
+#define	LIST_FIRST(head)	((head)->lh_first)
+
+#define	LIST_FOREACH_FROM(var, head, field)				\
+	for ((var) = ((var) ? (var) : LIST_FIRST((head)));		\
+	    (var);							\
+	    (var) = LIST_NEXT((var), field))
+
+#define	LIST_FOREACH_SAFE(var, head, field, tvar)			\
+	for ((var) = LIST_FIRST((head));				\
+	    (var) && ((tvar) = LIST_NEXT((var), field), 1);		\
+	    (var) = (tvar))
+
+#define	LIST_FOREACH_FROM_SAFE(var, head, field, tvar)			\
+	for ((var) = ((var) ? (var) : LIST_FIRST((head)));		\
+	    (var) && ((tvar) = LIST_NEXT((var), field), 1);		\
+	    (var) = (tvar))
+
+#define	LIST_NEXT(elm, field)	((elm)->field.le_next)
+
+#define	LIST_PREV(elm, head, type, field)				\
+	((elm)->field.le_prev == &LIST_FIRST((head)) ? NULL :		\
+	    __containerof((elm)->field.le_prev, struct type, field.le_next))
+
+#define LIST_SWAP(head1, head2, type, field) do {			\
+	struct type *swap_tmp = LIST_FIRST((head1));			\
+	LIST_FIRST((head1)) = LIST_FIRST((head2));			\
+	LIST_FIRST((head2)) = swap_tmp;					\
+	if ((swap_tmp = LIST_FIRST((head1))) != NULL)			\
+		swap_tmp->field.le_prev = &LIST_FIRST((head1));		\
+	if ((swap_tmp = LIST_FIRST((head2))) != NULL)			\
+		swap_tmp->field.le_prev = &LIST_FIRST((head2));		\
+} while (0)
+
+/*
+ * Tail queue functions.
+ */
+#if (defined(_KERNEL) && defined(INVARIANTS))
+#define	QMD_TAILQ_CHECK_HEAD(head, field) do {				\
+	if (!TAILQ_EMPTY(head) &&					\
+	    TAILQ_FIRST((head))->field.tqe_prev !=			\
+	     &TAILQ_FIRST((head)))					\
+		panic("Bad tailq head %p first->prev != head", (head));	\
+} while (0)
+
+#define	QMD_TAILQ_CHECK_TAIL(head, field) do {				\
+	if (*(head)->tqh_last != NULL)					\
+	    	panic("Bad tailq NEXT(%p->tqh_last) != NULL", (head)); 	\
+} while (0)
+
+#define	QMD_TAILQ_CHECK_NEXT(elm, field) do {				\
+	if (TAILQ_NEXT((elm), field) != NULL &&				\
+	    TAILQ_NEXT((elm), field)->field.tqe_prev !=			\
+	     &((elm)->field.tqe_next))					\
+		panic("Bad link elm %p next->prev != elm", (elm));	\
+} while (0)
+
+#define	QMD_TAILQ_CHECK_PREV(elm, field) do {				\
+	if (*(elm)->field.tqe_prev != (elm))				\
+		panic("Bad link elm %p prev->next != elm", (elm));	\
+} while (0)
+#else
+#define	QMD_TAILQ_CHECK_HEAD(head, field)
+#define	QMD_TAILQ_CHECK_TAIL(head, headname)
+#define	QMD_TAILQ_CHECK_NEXT(elm, field)
+#define	QMD_TAILQ_CHECK_PREV(elm, field)
+#endif /* (_KERNEL && INVARIANTS) */
+
+#define	TAILQ_EMPTY(head)	((head)->tqh_first == NULL)
+
+#define	TAILQ_FIRST(head)	((head)->tqh_first)
+
+#define	TAILQ_FOREACH_FROM(var, head, field)				\
+	for ((var) = ((var) ? (var) : TAILQ_FIRST((head)));		\
+	    (var);							\
+	    (var) = TAILQ_NEXT((var), field))
+
+#define	TAILQ_FOREACH_SAFE(var, head, field, tvar)			\
+	for ((var) = TAILQ_FIRST((head));				\
+	    (var) && ((tvar) = TAILQ_NEXT((var), field), 1);		\
+	    (var) = (tvar))
+
+#define	TAILQ_FOREACH_FROM_SAFE(var, head, field, tvar)			\
+	for ((var) = ((var) ? (var) : TAILQ_FIRST((head)));		\
+	    (var) && ((tvar) = TAILQ_NEXT((var), field), 1);		\
+	    (var) = (tvar))
+
+#define	TAILQ_FOREACH_REVERSE_FROM(var, head, headname, field)		\
+	for ((var) = ((var) ? (var) : TAILQ_LAST((head), headname));	\
+	    (var);							\
+	    (var) = TAILQ_PREV((var), headname, field))
+
+#define	TAILQ_FOREACH_REVERSE_SAFE(var, head, headname, field, tvar)	\
+	for ((var) = TAILQ_LAST((head), headname);			\
+	    (var) && ((tvar) = TAILQ_PREV((var), headname, field), 1);	\
+	    (var) = (tvar))
+
+#define	TAILQ_FOREACH_REVERSE_FROM_SAFE(var, head, headname, field, tvar) \
+	for ((var) = ((var) ? (var) : TAILQ_LAST((head), headname));	\
+	    (var) && ((tvar) = TAILQ_PREV((var), headname, field), 1);	\
+	    (var) = (tvar))
+
+#define	TAILQ_LAST(head, headname)					\
+	(*(((struct headname *)((head)->tqh_last))->tqh_last))
+
+#define	TAILQ_NEXT(elm, field) ((elm)->field.tqe_next)
+
+#define	TAILQ_PREV(elm, headname, field)				\
+	(*(((struct headname *)((elm)->field.tqe_prev))->tqh_last))
+
+#define TAILQ_SWAP(head1, head2, type, field) do {			\
+	struct type *swap_first = (head1)->tqh_first;			\
+	struct type **swap_last = (head1)->tqh_last;			\
+	(head1)->tqh_first = (head2)->tqh_first;			\
+	(head1)->tqh_last = (head2)->tqh_last;				\
+	(head2)->tqh_first = swap_first;				\
+	(head2)->tqh_last = swap_last;					\
+	if ((swap_first = (head1)->tqh_first) != NULL)			\
+		swap_first->field.tqe_prev = &(head1)->tqh_first;	\
+	else								\
+		(head1)->tqh_last = &(head1)->tqh_first;		\
+	if ((swap_first = (head2)->tqh_first) != NULL)			\
+		swap_first->field.tqe_prev = &(head2)->tqh_first;	\
+	else								\
+		(head2)->tqh_last = &(head2)->tqh_first;		\
+} while (0)
+
+#endif
diff --git a/src/spdk/include/spdk/string.h b/src/spdk/include/spdk/string.h
new file mode 100644
index 0000000..d7e1226
--- /dev/null
+++ b/src/spdk/include/spdk/string.h
@@ -0,0 +1,46 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright (c) Intel Corporation.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef SPDK_STRING_H
+#define SPDK_STRING_H
+
+/**
+ * sprintf with automatic buffer allocation.
+ *
+ * The return value is the formatted string,
+ * which should be passed to free() when no longer needed,
+ * or NULL on failure.
+ */
+char *sprintf_alloc(const char *format, ...) __attribute__((format(printf, 1, 2)));
+
+#endif
diff --git a/src/spdk/include/spdk/vtophys.h b/src/spdk/include/spdk/vtophys.h
new file mode 100644
index 0000000..4d6d57e
--- /dev/null
+++ b/src/spdk/include/spdk/vtophys.h
@@ -0,0 +1,51 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright (c) Intel Corporation.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef SPDK_VTOPHYS_H
+#define SPDK_VTOPHYS_H
+
+#include <stdint.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define VTOPHYS_ERROR	(0xFFFFFFFFFFFFFFFFULL)
+
+uint64_t vtophys(void *buf);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/src/spdk/lib/Makefile b/src/spdk/lib/Makefile
new file mode 100644
index 0000000..7ab33a4
--- /dev/null
+++ b/src/spdk/lib/Makefile
@@ -0,0 +1,44 @@
+#
+#  BSD LICENSE
+#
+#  Copyright (c) Intel Corporation.
+#  All rights reserved.
+#
+#  Redistribution and use in source and binary forms, with or without
+#  modification, are permitted provided that the following conditions
+#  are met:
+#
+#    * Redistributions of source code must retain the above copyright
+#      notice, this list of conditions and the following disclaimer.
+#    * Redistributions in binary form must reproduce the above copyright
+#      notice, this list of conditions and the following disclaimer in
+#      the documentation and/or other materials provided with the
+#      distribution.
+#    * Neither the name of Intel Corporation nor the names of its
+#      contributors may be used to endorse or promote products derived
+#      from this software without specific prior written permission.
+#
+#  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+#  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+#  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+#  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+#  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+#  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+#  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+
+SPDK_ROOT_DIR := $(CURDIR)/..
+include $(SPDK_ROOT_DIR)/mk/spdk.common.mk
+
+DIRS-y += memory util nvme ioat
+
+.PHONY: all clean $(DIRS-y)
+
+all: $(DIRS-y)
+clean: $(DIRS-y)
+
+include $(SPDK_ROOT_DIR)/mk/spdk.subdirs.mk
diff --git a/src/spdk/lib/ioat/Makefile b/src/spdk/lib/ioat/Makefile
new file mode 100644
index 0000000..d4905d9
--- /dev/null
+++ b/src/spdk/lib/ioat/Makefile
@@ -0,0 +1,51 @@
+#
+#  BSD LICENSE
+#
+#  Copyright (c) Intel Corporation.
+#  All rights reserved.
+#
+#  Redistribution and use in source and binary forms, with or without
+#  modification, are permitted provided that the following conditions
+#  are met:
+#
+#    * Redistributions of source code must retain the above copyright
+#      notice, this list of conditions and the following disclaimer.
+#    * Redistributions in binary form must reproduce the above copyright
+#      notice, this list of conditions and the following disclaimer in
+#      the documentation and/or other materials provided with the
+#      distribution.
+#    * Neither the name of Intel Corporation nor the names of its
+#      contributors may be used to endorse or promote products derived
+#      from this software without specific prior written permission.
+#
+#  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+#  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+#  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+#  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+#  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+#  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+#  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+
+SPDK_ROOT_DIR := $(CURDIR)/../..
+include $(SPDK_ROOT_DIR)/mk/spdk.common.mk
+
+CFLAGS += $(DPDK_INC) -include $(CONFIG_IOAT_IMPL)
+
+C_SRCS = ioat.c
+
+LIB = libspdk_ioat.a
+
+all: $(LIB)
+
+clean:
+	$(CLEAN_C)
+
+$(LIB): $(OBJS)
+	$(LIB_C)
+
+include $(SPDK_ROOT_DIR)/mk/spdk.deps.mk
diff --git a/src/spdk/lib/ioat/ioat.c b/src/spdk/lib/ioat/ioat.c
new file mode 100644
index 0000000..b90c90e
--- /dev/null
+++ b/src/spdk/lib/ioat/ioat.c
@@ -0,0 +1,673 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright (c) Intel Corporation.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "ioat_internal.h"
+#include "ioat_pci.h"
+
+/** List of channels that have been attached but are not yet assigned to a thread.
+ *
+ * Must hold g_ioat_driver.lock while manipulating this list.
+ */
+static SLIST_HEAD(, ioat_channel) ioat_free_channels;
+
+/** IOAT channel assigned to this thread (or NULL if not assigned yet). */
+static __thread struct ioat_channel *ioat_thread_channel;
+
+struct ioat_driver {
+	ioat_mutex_t	lock;
+};
+
+static struct ioat_driver g_ioat_driver = {
+	.lock = IOAT_MUTEX_INITIALIZER,
+};
+
+struct pci_device_id {
+	uint16_t vendor;
+	uint16_t device;
+};
+
+static const struct pci_device_id ioat_pci_table[] = {
+	{PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB0},
+	{PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB1},
+	{PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB2},
+	{PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB3},
+	{PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB4},
+	{PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB5},
+	{PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB6},
+	{PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB7},
+	{PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_IVB0},
+	{PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_IVB1},
+	{PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_IVB2},
+	{PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_IVB3},
+	{PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_IVB4},
+	{PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_IVB5},
+	{PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_IVB6},
+	{PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_IVB7},
+	{PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_HSW0},
+	{PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_HSW1},
+	{PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_HSW2},
+	{PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_HSW3},
+	{PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_HSW4},
+	{PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_HSW5},
+	{PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_HSW6},
+	{PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_HSW7},
+	{PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_BDX0},
+	{PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_BDX1},
+	{PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_BDX2},
+	{PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_BDX3},
+	{PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_BDX4},
+	{PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_BDX5},
+	{PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_BDX6},
+	{PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_BDX7},
+	{PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_BDX8},
+	{PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_BDX9},
+	{PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_BWD0},
+	{PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_BWD1},
+	{PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_BWD2},
+	{PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_BWD3},
+	{PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_BDXDE0},
+	{PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_BDXDE1},
+	{PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_BDXDE2},
+	{PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_BDXDE3},
+};
+
+bool
+ioat_pci_device_match_id(uint16_t vendor_id, uint16_t device_id)
+{
+	size_t i;
+	const struct pci_device_id *ids;
+
+	for (i = 0; i < sizeof(ioat_pci_table) / sizeof(struct pci_device_id); i++) {
+		ids = &ioat_pci_table[i];
+		if (ids->device == device_id && ids->vendor == vendor_id) {
+			return true;
+		}
+	}
+	return false;
+}
+
+static uint64_t
+ioat_get_chansts(struct ioat_channel *ioat)
+{
+	return spdk_mmio_read_8(&ioat->regs->chansts);
+}
+
+static void
+ioat_write_chancmp(struct ioat_channel *ioat, uint64_t addr)
+{
+	spdk_mmio_write_8(&ioat->regs->chancmp, addr);
+}
+
+static void
+ioat_write_chainaddr(struct ioat_channel *ioat, uint64_t addr)
+{
+	spdk_mmio_write_8(&ioat->regs->chainaddr, addr);
+}
+
+static inline void
+ioat_suspend(struct ioat_channel *ioat)
+{
+	ioat->regs->chancmd = IOAT_CHANCMD_SUSPEND;
+}
+
+static inline void
+ioat_reset(struct ioat_channel *ioat)
+{
+	ioat->regs->chancmd = IOAT_CHANCMD_RESET;
+}
+
+static inline uint32_t
+ioat_reset_pending(struct ioat_channel *ioat)
+{
+	uint8_t cmd;
+
+	cmd = ioat->regs->chancmd;
+	return (cmd & IOAT_CHANCMD_RESET) == IOAT_CHANCMD_RESET;
+}
+
+static int
+ioat_map_pci_bar(struct ioat_channel *ioat)
+{
+	int regs_bar, rc;
+	void *addr;
+
+	regs_bar = 0;
+	rc = ioat_pcicfg_map_bar(ioat->device, regs_bar, 0, &addr);
+	if (rc != 0 || addr == NULL) {
+		ioat_printf(ioat, "%s: pci_device_map_range failed with error code %d\n",
+			    __func__, rc);
+		return -1;
+	}
+
+	ioat->regs = (volatile struct ioat_registers *)addr;
+
+	return 0;
+}
+
+static int
+ioat_unmap_pci_bar(struct ioat_channel *ioat)
+{
+	int rc = 0;
+	void *addr = (void *)ioat->regs;
+
+	if (addr) {
+		rc = ioat_pcicfg_unmap_bar(ioat->device, 0, addr);
+	}
+	return rc;
+}
+
+
+static inline uint32_t
+ioat_get_active(struct ioat_channel *ioat)
+{
+	return (ioat->head - ioat->tail) & ((1 << ioat->ring_size_order) - 1);
+}
+
+static inline uint32_t
+ioat_get_ring_space(struct ioat_channel *ioat)
+{
+	return (1 << ioat->ring_size_order) - ioat_get_active(ioat) - 1;
+}
+
+static uint32_t
+ioat_get_ring_index(struct ioat_channel *ioat, uint32_t index)
+{
+	return index & ((1 << ioat->ring_size_order) - 1);
+}
+
+static void
+ioat_get_ring_entry(struct ioat_channel *ioat, uint32_t index,
+		    struct ioat_descriptor **desc,
+		    union ioat_hw_descriptor **hw_desc)
+{
+	uint32_t i = ioat_get_ring_index(ioat, index);
+
+	*desc = &ioat->ring[i];
+	*hw_desc = &ioat->hw_ring[i];
+}
+
+static uint64_t
+ioat_get_desc_phys_addr(struct ioat_channel *ioat, uint32_t index)
+{
+	return ioat->hw_ring_phys_addr +
+	       ioat_get_ring_index(ioat, index) * sizeof(union ioat_hw_descriptor);
+}
+
+static void
+ioat_submit_single(struct ioat_channel *ioat)
+{
+	ioat->head++;
+}
+
+static void
+ioat_flush(struct ioat_channel *ioat)
+{
+	ioat->regs->dmacount = (uint16_t)ioat->head;
+}
+
+static struct ioat_descriptor *
+ioat_prep_null(struct ioat_channel *ioat)
+{
+	struct ioat_descriptor *desc;
+	union ioat_hw_descriptor *hw_desc;
+
+	if (ioat_get_ring_space(ioat) < 1) {
+		return NULL;
+	}
+
+	ioat_get_ring_entry(ioat, ioat->head, &desc, &hw_desc);
+
+	hw_desc->dma.u.control_raw = 0;
+	hw_desc->dma.u.control.op = IOAT_OP_COPY;
+	hw_desc->dma.u.control.null = 1;
+	hw_desc->dma.u.control.completion_update = 1;
+
+	hw_desc->dma.size = 8;
+	hw_desc->dma.src_addr = 0;
+	hw_desc->dma.dest_addr = 0;
+
+	desc->callback_fn = NULL;
+	desc->callback_arg = NULL;
+
+	ioat_submit_single(ioat);
+
+	return desc;
+}
+
+static struct ioat_descriptor *
+ioat_prep_copy(struct ioat_channel *ioat, uint64_t dst,
+	       uint64_t src, uint32_t len)
+{
+	struct ioat_descriptor *desc;
+	union ioat_hw_descriptor *hw_desc;
+
+	ioat_assert(len <= ioat->max_xfer_size);
+
+	if (ioat_get_ring_space(ioat) < 1) {
+		return NULL;
+	}
+
+	ioat_get_ring_entry(ioat, ioat->head, &desc, &hw_desc);
+
+	hw_desc->dma.u.control_raw = 0;
+	hw_desc->dma.u.control.op = IOAT_OP_COPY;
+	hw_desc->dma.u.control.completion_update = 1;
+
+	hw_desc->dma.size = len;
+	hw_desc->dma.src_addr = src;
+	hw_desc->dma.dest_addr = dst;
+
+	desc->callback_fn = NULL;
+	desc->callback_arg = NULL;
+
+	ioat_submit_single(ioat);
+
+	return desc;
+}
+
+static int ioat_reset_hw(struct ioat_channel *ioat)
+{
+	int timeout;
+	uint64_t status;
+	uint32_t chanerr;
+
+	status = ioat_get_chansts(ioat);
+	if (is_ioat_active(status) || is_ioat_idle(status)) {
+		ioat_suspend(ioat);
+	}
+
+	timeout = 20; /* in milliseconds */
+	while (is_ioat_active(status) || is_ioat_idle(status)) {
+		ioat_delay_us(1000);
+		timeout--;
+		if (timeout == 0) {
+			ioat_printf(ioat, "%s: timed out waiting for suspend\n", __func__);
+			return -1;
+		}
+		status = ioat_get_chansts(ioat);
+	}
+
+	/*
+	 * Clear any outstanding errors.
+	 * CHANERR is write-1-to-clear, so write the current CHANERR bits back to reset everything.
+	 */
+	chanerr = ioat->regs->chanerr;
+	ioat->regs->chanerr = chanerr;
+
+	ioat_reset(ioat);
+
+	timeout = 20;
+	while (ioat_reset_pending(ioat)) {
+		ioat_delay_us(1000);
+		timeout--;
+		if (timeout == 0) {
+			ioat_printf(ioat, "%s: timed out waiting for reset\n", __func__);
+			return -1;
+		}
+	}
+
+	return 0;
+}
+
+static int
+ioat_process_channel_events(struct ioat_channel *ioat)
+{
+	struct ioat_descriptor *desc;
+	uint64_t status, completed_descriptor, hw_desc_phys_addr;
+	uint32_t tail;
+
+	if (ioat->head == ioat->tail) {
+		return 0;
+	}
+
+	status = *ioat->comp_update;
+	completed_descriptor = status & IOAT_CHANSTS_COMPLETED_DESCRIPTOR_MASK;
+
+	if (is_ioat_halted(status)) {
+		ioat_printf(ioat, "%s: Channel halted (%x)\n", __func__, ioat->regs->chanerr);
+		return -1;
+	}
+
+	if (completed_descriptor == ioat->last_seen) {
+		return 0;
+	}
+
+	do {
+		tail = ioat_get_ring_index(ioat, ioat->tail);
+		desc = &ioat->ring[tail];
+
+		if (desc->callback_fn) {
+			desc->callback_fn(desc->callback_arg);
+		}
+
+		hw_desc_phys_addr = ioat_get_desc_phys_addr(ioat, ioat->tail);
+		ioat->tail++;
+	} while (hw_desc_phys_addr != completed_descriptor);
+
+	ioat->last_seen = hw_desc_phys_addr;
+	return 0;
+}
+
+static int
+ioat_channel_destruct(struct ioat_channel *ioat)
+{
+	ioat_unmap_pci_bar(ioat);
+
+	if (ioat->ring) {
+		free(ioat->ring);
+	}
+
+	if (ioat->hw_ring) {
+		ioat_free(ioat->hw_ring);
+	}
+
+	if (ioat->comp_update) {
+		ioat_free((void *)ioat->comp_update);
+		ioat->comp_update = NULL;
+	}
+
+	return 0;
+}
+
+static int
+ioat_channel_start(struct ioat_channel *ioat)
+{
+	uint8_t xfercap, version;
+	uint64_t status;
+	int i, num_descriptors;
+	uint64_t comp_update_bus_addr;
+
+	if (ioat_map_pci_bar(ioat) != 0) {
+		ioat_printf(ioat, "%s: ioat_map_pci_bar() failed\n", __func__);
+		return -1;
+	}
+
+	version = ioat->regs->cbver;
+	if (version < IOAT_VER_3_0) {
+		ioat_printf(ioat, "%s: unsupported IOAT version %u.%u\n",
+			    __func__, version >> 4, version & 0xF);
+		return -1;
+	}
+
+	xfercap = ioat->regs->xfercap;
+
+	/* Only bits [4:0] are valid. */
+	xfercap &= 0x1f;
+	if (xfercap == 0) {
+		/* 0 means 4 GB max transfer size. */
+		ioat->max_xfer_size = 1ULL << 32;
+	} else if (xfercap < 12) {
+		/* XFCERCAP must be at least 12 (4 KB) according to the spec. */
+		ioat_printf(ioat, "%s: invalid XFERCAP value %u\n", __func__, xfercap);
+		return -1;
+	} else {
+		ioat->max_xfer_size = 1U << xfercap;
+	}
+
+	ioat->comp_update = ioat_zmalloc(NULL, sizeof(*ioat->comp_update), IOAT_CHANCMP_ALIGN,
+					 &comp_update_bus_addr);
+	if (ioat->comp_update == NULL) {
+		return -1;
+	}
+
+	ioat->ring_size_order = IOAT_DEFAULT_ORDER;
+
+	num_descriptors = 1 << ioat->ring_size_order;
+
+	ioat->ring = calloc(num_descriptors, sizeof(struct ioat_descriptor));
+	if (!ioat->ring) {
+		return -1;
+	}
+
+	ioat->hw_ring = ioat_zmalloc(NULL, num_descriptors * sizeof(union ioat_hw_descriptor), 64,
+				     &ioat->hw_ring_phys_addr);
+	if (!ioat->hw_ring) {
+		return -1;
+	}
+
+	for (i = 0; i < num_descriptors; i++) {
+		ioat->hw_ring[i].generic.next = ioat_get_desc_phys_addr(ioat, i + 1);
+	}
+
+	ioat->head = 0;
+	ioat->tail = 0;
+	ioat->last_seen = 0;
+
+	ioat_reset_hw(ioat);
+
+	ioat->regs->chanctrl = IOAT_CHANCTRL_ANY_ERR_ABORT_EN;
+	ioat_write_chancmp(ioat, comp_update_bus_addr);
+	ioat_write_chainaddr(ioat, ioat->hw_ring_phys_addr);
+
+	ioat_prep_null(ioat);
+	ioat_flush(ioat);
+
+	i = 100;
+	while (i-- > 0) {
+		ioat_delay_us(100);
+		status = ioat_get_chansts(ioat);
+		if (is_ioat_idle(status))
+			break;
+	}
+
+	if (is_ioat_idle(status)) {
+		ioat_process_channel_events(ioat);
+	} else {
+		ioat_printf(ioat, "%s: could not start channel: status = %p\n error = %#x\n",
+			    __func__, (void *)status, ioat->regs->chanerr);
+		return -1;
+	}
+
+	return 0;
+}
+
+struct ioat_channel *
+ioat_attach(void *device)
+{
+	struct ioat_driver	*driver = &g_ioat_driver;
+	struct ioat_channel 	*ioat;
+	uint32_t cmd_reg;
+
+	ioat = calloc(1, sizeof(struct ioat_channel));
+	if (ioat == NULL) {
+		return NULL;
+	}
+
+	/* Enable PCI busmaster. */
+	ioat_pcicfg_read32(device, &cmd_reg, 4);
+	cmd_reg |= 0x4;
+	ioat_pcicfg_write32(device, cmd_reg, 4);
+
+	ioat->device = device;
+
+	if (ioat_channel_start(ioat) != 0) {
+		ioat_channel_destruct(ioat);
+		free(ioat);
+		return NULL;
+	}
+
+	ioat_mutex_lock(&driver->lock);
+	SLIST_INSERT_HEAD(&ioat_free_channels, ioat, next);
+	ioat_mutex_unlock(&driver->lock);
+
+	return ioat;
+}
+
+int
+ioat_detach(struct ioat_channel *ioat)
+{
+	struct ioat_driver	*driver = &g_ioat_driver;
+
+	/* ioat should be in the free list (not registered to a thread)
+	 * when calling ioat_detach().
+	 */
+	ioat_mutex_lock(&driver->lock);
+	SLIST_REMOVE(&ioat_free_channels, ioat, ioat_channel, next);
+	ioat_mutex_unlock(&driver->lock);
+
+	ioat_channel_destruct(ioat);
+	free(ioat);
+
+	return 0;
+}
+
+int
+ioat_register_thread(void)
+{
+	struct ioat_driver	*driver = &g_ioat_driver;
+
+	if (ioat_thread_channel) {
+		ioat_printf(NULL, "%s: thread already registered\n", __func__);
+		return -1;
+	}
+
+	ioat_mutex_lock(&driver->lock);
+
+	ioat_thread_channel = SLIST_FIRST(&ioat_free_channels);
+	if (ioat_thread_channel) {
+		SLIST_REMOVE_HEAD(&ioat_free_channels, next);
+	}
+
+	ioat_mutex_unlock(&driver->lock);
+
+	return ioat_thread_channel ? 0 : -1;
+}
+
+void
+ioat_unregister_thread(void)
+{
+	struct ioat_driver	*driver = &g_ioat_driver;
+
+	if (!ioat_thread_channel) {
+		return;
+	}
+
+	ioat_mutex_lock(&driver->lock);
+
+	SLIST_INSERT_HEAD(&ioat_free_channels, ioat_thread_channel, next);
+	ioat_thread_channel = NULL;
+
+	ioat_mutex_unlock(&driver->lock);
+}
+
+#define min(a, b) (((a)<(b))?(a):(b))
+
+#define _2MB_PAGE(ptr)		((ptr) & ~(0x200000 - 1))
+#define _2MB_OFFSET(ptr)	((ptr) &  (0x200000 - 1))
+
+int64_t
+ioat_submit_copy(void *cb_arg, ioat_callback_t cb_fn,
+		 void *dst, const void *src, uint64_t nbytes)
+{
+	struct ioat_channel	*ioat;
+	struct ioat_descriptor	*last_desc;
+	uint64_t	remaining, op_size;
+	uint64_t	vdst, vsrc;
+	uint64_t	vdst_page, vsrc_page;
+	uint64_t	pdst_page, psrc_page;
+	uint32_t	orig_head;
+
+	ioat = ioat_thread_channel;
+	if (!ioat) {
+		return -1;
+	}
+
+	orig_head = ioat->head;
+
+	vdst = (uint64_t)dst;
+	vsrc = (uint64_t)src;
+	vsrc_page = _2MB_PAGE(vsrc);
+	vdst_page = _2MB_PAGE(vdst);
+	psrc_page = ioat_vtophys((void *)vsrc_page);
+	pdst_page = ioat_vtophys((void *)vdst_page);
+
+	remaining = nbytes;
+
+	while (remaining) {
+		op_size = remaining;
+		op_size = min(op_size, (0x200000 - _2MB_OFFSET(vsrc)));
+		op_size = min(op_size, (0x200000 - _2MB_OFFSET(vdst)));
+		op_size = min(op_size, ioat->max_xfer_size);
+		remaining -= op_size;
+
+		last_desc = ioat_prep_copy(ioat,
+					   pdst_page + _2MB_OFFSET(vdst),
+					   psrc_page + _2MB_OFFSET(vsrc),
+					   op_size);
+
+		if (remaining == 0 || last_desc == NULL) {
+			break;
+		}
+
+		vsrc += op_size;
+		vdst += op_size;
+
+		if (_2MB_PAGE(vsrc) != vsrc_page) {
+			vsrc_page = _2MB_PAGE(vsrc);
+			psrc_page = ioat_vtophys((void *)vsrc_page);
+		}
+
+		if (_2MB_PAGE(vdst) != vdst_page) {
+			vdst_page = _2MB_PAGE(vdst);
+			pdst_page = ioat_vtophys((void *)vdst_page);
+		}
+	}
+	/* Issue null descriptor for null transfer */
+	if (nbytes == 0) {
+		last_desc = ioat_prep_null(ioat);
+	}
+
+	if (last_desc) {
+		last_desc->callback_fn = cb_fn;
+		last_desc->callback_arg = cb_arg;
+	} else {
+		/*
+		 * Ran out of descriptors in the ring - reset head to leave things as they were
+		 * in case we managed to fill out any descriptors.
+		 */
+		ioat->head = orig_head;
+		return -1;
+	}
+
+	ioat_flush(ioat);
+	return nbytes;
+}
+
+int ioat_process_events(void)
+{
+	if (!ioat_thread_channel) {
+		return -1;
+	}
+
+	return ioat_process_channel_events(ioat_thread_channel);
+}
diff --git a/src/spdk/lib/ioat/ioat_impl.h b/src/spdk/lib/ioat/ioat_impl.h
new file mode 100644
index 0000000..1224a27
--- /dev/null
+++ b/src/spdk/lib/ioat/ioat_impl.h
@@ -0,0 +1,93 @@
+#ifndef __IOAT_IMPL_H__
+#define __IOAT_IMPL_H__
+
+#include <assert.h>
+#include <pthread.h>
+#include <pciaccess.h>
+#include <stdio.h>
+#include <rte_malloc.h>
+#include <rte_config.h>
+#include <rte_atomic.h>
+#include <rte_cycles.h>
+
+#include "spdk/vtophys.h"
+
+/**
+ * \file
+ *
+ * This file describes the functions required to integrate
+ * the userspace IOAT driver for a specific implementation.  This
+ * implementation is specific for DPDK.  Users would revise it as
+ * necessary for their own particular environment if not using it
+ * within the SPDK framework.
+ */
+
+/**
+ * Allocate a pinned, physically contiguous memory buffer with the
+ * given size and alignment.
+ */
+static inline void *
+ioat_zmalloc(const char *tag, size_t size, unsigned align, uint64_t *phys_addr)
+{
+	void *buf = rte_zmalloc(tag, size, align);
+	*phys_addr = rte_malloc_virt2phy(buf);
+	return buf;
+}
+
+/**
+ * Free a memory buffer previously allocated with ioat_zmalloc.
+ */
+#define ioat_free(buf)			rte_free(buf)
+
+/**
+ * Return the physical address for the specified virtual address.
+ */
+#define ioat_vtophys(buf)		vtophys(buf)
+
+/**
+ * Delay us.
+ */
+#define ioat_delay_us(us)        rte_delay_us(us)
+
+/**
+ * Assert a condition and panic/abort as desired.  Failures of these
+ *  assertions indicate catastrophic failures within the driver.
+ */
+#define ioat_assert(check)		assert(check)
+
+/**
+ * Log or print a message from the driver.
+ */
+#define ioat_printf(chan, fmt, args...) printf(fmt, ##args)
+
+/**
+ *
+ */
+#define ioat_pcicfg_read32(handle, var, offset)  pci_device_cfg_read_u32(handle, var, offset)
+#define ioat_pcicfg_write32(handle, var, offset) pci_device_cfg_write_u32(handle, var, offset)
+
+static inline int
+ioat_pcicfg_map_bar(void *devhandle, uint32_t bar, uint32_t read_only, void **mapped_addr)
+{
+	struct pci_device *dev = devhandle;
+	uint32_t flags = (read_only ? 0 : PCI_DEV_MAP_FLAG_WRITABLE);
+
+	return pci_device_map_range(dev, dev->regions[bar].base_addr, 4096,
+				    flags, mapped_addr);
+}
+
+static inline int
+ioat_pcicfg_unmap_bar(void *devhandle, uint32_t bar, void *addr)
+{
+	struct pci_device *dev = devhandle;
+
+	return pci_device_unmap_range(dev, addr, dev->regions[bar].size);
+}
+
+typedef pthread_mutex_t ioat_mutex_t;
+
+#define ioat_mutex_lock pthread_mutex_lock
+#define ioat_mutex_unlock pthread_mutex_unlock
+#define IOAT_MUTEX_INITIALIZER PTHREAD_MUTEX_INITIALIZER
+
+#endif /* __IOAT_IMPL_H__ */
diff --git a/src/spdk/lib/ioat/ioat_internal.h b/src/spdk/lib/ioat/ioat_internal.h
new file mode 100644
index 0000000..030d278
--- /dev/null
+++ b/src/spdk/lib/ioat/ioat_internal.h
@@ -0,0 +1,103 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright (c) Intel Corporation.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __IOAT_INTERNAL_H__
+#define __IOAT_INTERNAL_H__
+
+#include "spdk/ioat.h"
+#include "spdk/ioat_spec.h"
+
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <inttypes.h>
+
+#include "spdk/queue.h"
+#include "spdk/mmio.h"
+
+/* Allocate 2 << 15 (32K) descriptors per channel by default. */
+#define IOAT_DEFAULT_ORDER			15
+
+struct ioat_descriptor {
+	ioat_callback_t		callback_fn;
+	void			*callback_arg;
+};
+
+/* One of these per allocated PCI device. */
+struct ioat_channel {
+	SLIST_ENTRY(ioat_channel) next;
+
+	/* Opaque handle to upper layer */
+	void                *device;
+	uint64_t            max_xfer_size;
+	volatile struct ioat_registers *regs;
+
+	volatile uint64_t   *comp_update;
+
+	uint32_t            head;
+	uint32_t            tail;
+
+	uint32_t            ring_size_order;
+	uint64_t            last_seen;
+
+	struct ioat_descriptor		*ring;
+	union ioat_hw_descriptor	*hw_ring;
+	uint64_t			hw_ring_phys_addr;
+};
+
+static inline uint32_t
+is_ioat_active(uint64_t status)
+{
+	return (status & IOAT_CHANSTS_STATUS) == IOAT_CHANSTS_ACTIVE;
+}
+
+static inline uint32_t
+is_ioat_idle(uint64_t status)
+{
+	return (status & IOAT_CHANSTS_STATUS) == IOAT_CHANSTS_IDLE;
+}
+
+static inline uint32_t
+is_ioat_halted(uint64_t status)
+{
+	return (status & IOAT_CHANSTS_STATUS) == IOAT_CHANSTS_HALTED;
+}
+
+static inline uint32_t
+is_ioat_suspended(uint64_t status)
+{
+	return (status & IOAT_CHANSTS_STATUS) == IOAT_CHANSTS_SUSPENDED;
+}
+
+#endif /* __IOAT_INTERNAL_H__ */
diff --git a/src/spdk/lib/ioat/ioat_pci.h b/src/spdk/lib/ioat/ioat_pci.h
new file mode 100644
index 0000000..3f6d4ff
--- /dev/null
+++ b/src/spdk/lib/ioat/ioat_pci.h
@@ -0,0 +1,94 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright (c) Intel Corporation.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __IOAT_PCI_H__
+#define __IOAT_PCI_H__
+
+#include "spdk/pci_ids.h"
+
+#define PCI_DEVICE_ID_INTEL_IOAT_SNB0	0x3c20
+#define PCI_DEVICE_ID_INTEL_IOAT_SNB1	0x3c21
+#define PCI_DEVICE_ID_INTEL_IOAT_SNB2	0x3c22
+#define PCI_DEVICE_ID_INTEL_IOAT_SNB3	0x3c23
+#define PCI_DEVICE_ID_INTEL_IOAT_SNB4	0x3c24
+#define PCI_DEVICE_ID_INTEL_IOAT_SNB5	0x3c25
+#define PCI_DEVICE_ID_INTEL_IOAT_SNB6	0x3c26
+#define PCI_DEVICE_ID_INTEL_IOAT_SNB7	0x3c27
+#define PCI_DEVICE_ID_INTEL_IOAT_SNB8	0x3c2e
+#define PCI_DEVICE_ID_INTEL_IOAT_SNB9	0x3c2f
+
+#define PCI_DEVICE_ID_INTEL_IOAT_IVB0	0x0e20
+#define PCI_DEVICE_ID_INTEL_IOAT_IVB1	0x0e21
+#define PCI_DEVICE_ID_INTEL_IOAT_IVB2	0x0e22
+#define PCI_DEVICE_ID_INTEL_IOAT_IVB3	0x0e23
+#define PCI_DEVICE_ID_INTEL_IOAT_IVB4	0x0e24
+#define PCI_DEVICE_ID_INTEL_IOAT_IVB5	0x0e25
+#define PCI_DEVICE_ID_INTEL_IOAT_IVB6	0x0e26
+#define PCI_DEVICE_ID_INTEL_IOAT_IVB7	0x0e27
+#define PCI_DEVICE_ID_INTEL_IOAT_IVB8	0x0e2e
+#define PCI_DEVICE_ID_INTEL_IOAT_IVB9	0x0e2f
+
+#define PCI_DEVICE_ID_INTEL_IOAT_HSW0	0x2f20
+#define PCI_DEVICE_ID_INTEL_IOAT_HSW1	0x2f21
+#define PCI_DEVICE_ID_INTEL_IOAT_HSW2	0x2f22
+#define PCI_DEVICE_ID_INTEL_IOAT_HSW3	0x2f23
+#define PCI_DEVICE_ID_INTEL_IOAT_HSW4	0x2f24
+#define PCI_DEVICE_ID_INTEL_IOAT_HSW5	0x2f25
+#define PCI_DEVICE_ID_INTEL_IOAT_HSW6	0x2f26
+#define PCI_DEVICE_ID_INTEL_IOAT_HSW7	0x2f27
+#define PCI_DEVICE_ID_INTEL_IOAT_HSW8	0x2f2e
+#define PCI_DEVICE_ID_INTEL_IOAT_HSW9	0x2f2f
+
+#define PCI_DEVICE_ID_INTEL_IOAT_BWD0	0x0C50
+#define PCI_DEVICE_ID_INTEL_IOAT_BWD1	0x0C51
+#define PCI_DEVICE_ID_INTEL_IOAT_BWD2	0x0C52
+#define PCI_DEVICE_ID_INTEL_IOAT_BWD3	0x0C53
+
+#define PCI_DEVICE_ID_INTEL_IOAT_BDXDE0	0x6f50
+#define PCI_DEVICE_ID_INTEL_IOAT_BDXDE1	0x6f51
+#define PCI_DEVICE_ID_INTEL_IOAT_BDXDE2	0x6f52
+#define PCI_DEVICE_ID_INTEL_IOAT_BDXDE3	0x6f53
+
+#define PCI_DEVICE_ID_INTEL_IOAT_BDX0	0x6f20
+#define PCI_DEVICE_ID_INTEL_IOAT_BDX1	0x6f21
+#define PCI_DEVICE_ID_INTEL_IOAT_BDX2	0x6f22
+#define PCI_DEVICE_ID_INTEL_IOAT_BDX3	0x6f23
+#define PCI_DEVICE_ID_INTEL_IOAT_BDX4	0x6f24
+#define PCI_DEVICE_ID_INTEL_IOAT_BDX5	0x6f25
+#define PCI_DEVICE_ID_INTEL_IOAT_BDX6	0x6f26
+#define PCI_DEVICE_ID_INTEL_IOAT_BDX7	0x6f27
+#define PCI_DEVICE_ID_INTEL_IOAT_BDX8	0x6f2e
+#define PCI_DEVICE_ID_INTEL_IOAT_BDX9	0x6f2f
+
+#endif /* __IOAT_PCI_H__ */
+
diff --git a/src/spdk/lib/memory/Makefile b/src/spdk/lib/memory/Makefile
new file mode 100644
index 0000000..5711111
--- /dev/null
+++ b/src/spdk/lib/memory/Makefile
@@ -0,0 +1,51 @@
+#
+#  BSD LICENSE
+#
+#  Copyright (c) Intel Corporation.
+#  All rights reserved.
+#
+#  Redistribution and use in source and binary forms, with or without
+#  modification, are permitted provided that the following conditions
+#  are met:
+#
+#    * Redistributions of source code must retain the above copyright
+#      notice, this list of conditions and the following disclaimer.
+#    * Redistributions in binary form must reproduce the above copyright
+#      notice, this list of conditions and the following disclaimer in
+#      the documentation and/or other materials provided with the
+#      distribution.
+#    * Neither the name of Intel Corporation nor the names of its
+#      contributors may be used to endorse or promote products derived
+#      from this software without specific prior written permission.
+#
+#  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+#  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+#  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+#  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+#  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+#  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+#  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+
+SPDK_ROOT_DIR := $(CURDIR)/../..
+include $(SPDK_ROOT_DIR)/mk/spdk.common.mk
+
+CFLAGS += $(DPDK_INC)
+
+C_SRCS = vtophys.c
+
+LIB = libspdk_memory.a
+
+all : $(LIB)
+
+clean :
+	$(CLEAN_C)
+
+$(LIB) : $(OBJS)
+	$(LIB_C)
+
+include $(SPDK_ROOT_DIR)/mk/spdk.deps.mk
diff --git a/src/spdk/lib/memory/vtophys.c b/src/spdk/lib/memory/vtophys.c
new file mode 100644
index 0000000..40972f7
--- /dev/null
+++ b/src/spdk/lib/memory/vtophys.c
@@ -0,0 +1,183 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright (c) Intel Corporation.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdlib.h>
+#include <unistd.h>
+#include <stdio.h>
+#include <string.h>
+#include <pthread.h>
+
+#include "rte_config.h"
+#include "rte_eal.h"
+#include "rte_eal_memconfig.h"
+#include "spdk/vtophys.h"
+
+/* x86-64 userspace virtual addresses use only the low 47 bits [0..46],
+ * which is enough to cover 128 TB.
+ */
+#define SHIFT_128TB	47 /* (1 << 47) == 128 TB */
+#define MASK_128TB	((1ULL << SHIFT_128TB) - 1)
+
+#define SHIFT_1GB	30 /* (1 << 30) == 1 GB */
+#define MASK_1GB	((1ULL << SHIFT_1GB) - 1)
+
+#define SHIFT_2MB	21 /* (1 << 21) == 2MB */
+#define MASK_2MB	((1ULL << SHIFT_2MB) - 1)
+
+#define SHIFT_4KB	12 /* (1 << 12) == 4KB */
+#define MASK_4KB	((1ULL << SHIFT_4KB) - 1)
+
+#define FN_2MB_TO_4KB(fn)	(fn << (SHIFT_2MB - SHIFT_4KB))
+#define FN_4KB_TO_2MB(fn)	(fn >> (SHIFT_2MB - SHIFT_4KB))
+
+#define MAP_128TB_IDX(vfn_2mb)	((vfn_2mb) >> (SHIFT_1GB - SHIFT_2MB))
+#define MAP_1GB_IDX(vfn_2mb)	((vfn_2mb) & ((1ULL << (SHIFT_1GB - SHIFT_2MB + 1)) - 1))
+
+/* Physical page frame number of a single 2MB page. */
+struct map_2mb {
+	uint64_t pfn_2mb;
+};
+
+/* Second-level map table indexed by bits [21..29] of the virtual address.
+ * Each entry contains the 2MB physical page frame number or VTOPHYS_ERROR for entries that haven't
+ * been retrieved yet.
+ */
+struct map_1gb {
+	struct map_2mb map[1ULL << (SHIFT_1GB - SHIFT_2MB + 1)];
+};
+
+/* Top-level map table indexed by bits [30..46] of the virtual address.
+ * Each entry points to a second-level map table or NULL.
+ */
+struct map_128tb {
+	struct map_1gb *map[1ULL << (SHIFT_128TB - SHIFT_1GB + 1)];
+};
+
+static struct map_128tb vtophys_map_128tb = {};
+static pthread_mutex_t vtophys_mutex = PTHREAD_MUTEX_INITIALIZER;
+
+static struct map_2mb *
+vtophys_get_map(uint64_t vfn_2mb)
+{
+	struct map_1gb *map_1gb;
+	struct map_2mb *map_2mb;
+	uint64_t idx_128tb = MAP_128TB_IDX(vfn_2mb);
+	uint64_t idx_1gb = MAP_1GB_IDX(vfn_2mb);
+
+	if (vfn_2mb & ~MASK_128TB) {
+		printf("invalid usermode virtual address\n");
+		return NULL;
+	}
+
+	map_1gb = vtophys_map_128tb.map[idx_128tb];
+
+	if (!map_1gb) {
+		pthread_mutex_lock(&vtophys_mutex);
+
+		/* Recheck to make sure nobody else got the mutex first. */
+		map_1gb = vtophys_map_128tb.map[idx_128tb];
+		if (!map_1gb) {
+			map_1gb = malloc(sizeof(struct map_1gb));
+			if (map_1gb) {
+				/* initialize all entries to all 0xFF (VTOPHYS_ERROR) */
+				memset(map_1gb, 0xFF, sizeof(struct map_1gb));
+				vtophys_map_128tb.map[idx_128tb] = map_1gb;
+			}
+		}
+
+		pthread_mutex_unlock(&vtophys_mutex);
+
+		if (!map_1gb) {
+			printf("allocation failed\n");
+			return NULL;
+		}
+	}
+
+	map_2mb = &map_1gb->map[idx_1gb];
+	return map_2mb;
+}
+
+static uint64_t
+vtophys_get_pfn_2mb(uint64_t vfn_2mb)
+{
+	uintptr_t vaddr, paddr;
+	struct rte_mem_config *mcfg;
+	struct rte_memseg *seg;
+	uint32_t seg_idx;
+
+	vaddr = vfn_2mb << SHIFT_2MB;
+	mcfg = rte_eal_get_configuration()->mem_config;
+
+	for (seg_idx = 0; seg_idx < RTE_MAX_MEMSEG; seg_idx++) {
+		seg = &mcfg->memseg[seg_idx];
+		if (seg->addr == NULL) {
+			break;
+		}
+
+		if (vaddr >= (uintptr_t)seg->addr &&
+		    vaddr < ((uintptr_t)seg->addr + seg->len)) {
+			paddr = seg->phys_addr;
+			paddr += (vaddr - (uintptr_t)seg->addr);
+			return paddr >> SHIFT_2MB;
+		}
+	}
+
+	fprintf(stderr, "could not find 2MB vfn 0x%jx in DPDK mem config\n", vfn_2mb);
+	return -1;
+}
+
+uint64_t
+vtophys(void *buf)
+{
+	struct map_2mb *map_2mb;
+	uint64_t vfn_2mb, pfn_2mb;
+
+	vfn_2mb = (uint64_t)buf;
+	vfn_2mb >>= SHIFT_2MB;
+
+	map_2mb = vtophys_get_map(vfn_2mb);
+	if (!map_2mb) {
+		return VTOPHYS_ERROR;
+	}
+
+	pfn_2mb = map_2mb->pfn_2mb;
+	if (pfn_2mb == VTOPHYS_ERROR) {
+		pfn_2mb = vtophys_get_pfn_2mb(vfn_2mb);
+		if (pfn_2mb == VTOPHYS_ERROR) {
+			return VTOPHYS_ERROR;
+		}
+		map_2mb->pfn_2mb = pfn_2mb;
+	}
+
+	return (pfn_2mb << SHIFT_2MB) | ((uint64_t)buf & MASK_2MB);
+}
diff --git a/src/spdk/lib/nvme/Makefile b/src/spdk/lib/nvme/Makefile
new file mode 100644
index 0000000..18572c4
--- /dev/null
+++ b/src/spdk/lib/nvme/Makefile
@@ -0,0 +1,51 @@
+#
+#  BSD LICENSE
+#
+#  Copyright (c) Intel Corporation.
+#  All rights reserved.
+#
+#  Redistribution and use in source and binary forms, with or without
+#  modification, are permitted provided that the following conditions
+#  are met:
+#
+#    * Redistributions of source code must retain the above copyright
+#      notice, this list of conditions and the following disclaimer.
+#    * Redistributions in binary form must reproduce the above copyright
+#      notice, this list of conditions and the following disclaimer in
+#      the documentation and/or other materials provided with the
+#      distribution.
+#    * Neither the name of Intel Corporation nor the names of its
+#      contributors may be used to endorse or promote products derived
+#      from this software without specific prior written permission.
+#
+#  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+#  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+#  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+#  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+#  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+#  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+#  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+
+SPDK_ROOT_DIR := $(CURDIR)/../..
+include $(SPDK_ROOT_DIR)/mk/spdk.common.mk
+
+CFLAGS += $(DPDK_INC) -include $(CONFIG_NVME_IMPL)
+
+C_SRCS = nvme_ctrlr_cmd.c nvme_ctrlr.c nvme_ns_cmd.c nvme_ns.c nvme_qpair.c nvme.c
+
+LIB = libspdk_nvme.a
+
+all : $(LIB)
+
+clean :
+	$(CLEAN_C)
+
+$(LIB) : $(OBJS)
+	$(LIB_C)
+
+include $(SPDK_ROOT_DIR)/mk/spdk.deps.mk
diff --git a/src/spdk/lib/nvme/nvme.c b/src/spdk/lib/nvme/nvme.c
new file mode 100644
index 0000000..072a1ed
--- /dev/null
+++ b/src/spdk/lib/nvme/nvme.c
@@ -0,0 +1,249 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright (c) Intel Corporation.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "nvme_internal.h"
+
+/** \file
+ *
+ */
+
+struct nvme_driver g_nvme_driver = {
+	.lock = NVME_MUTEX_INITIALIZER,
+	.max_io_queues = DEFAULT_MAX_IO_QUEUES
+};
+
+int32_t		nvme_retry_count;
+__thread int	nvme_thread_ioq_index = -1;
+
+
+/**
+ * \page nvme_initialization NVMe Initialization
+
+\msc
+
+	app [label="Application"], nvme [label="NVMe Driver"];
+	app=>nvme [label="nvme_attach(devhandle)"];
+	app<<nvme [label="nvme_controller ptr"];
+	app=>nvme [label="nvme_ctrlr_start(nvme_controller ptr)"];
+	nvme=>nvme [label="identify controller"];
+	nvme=>nvme [label="create queue pairs"];
+	nvme=>nvme [label="identify namespace(s)"];
+	app=>app [label="create block devices based on controller's namespaces"];
+
+\endmsc
+
+ */
+
+struct nvme_controller *
+nvme_attach(void *devhandle)
+{
+	struct nvme_controller	*ctrlr;
+	int			status;
+	uint64_t		phys_addr = 0;
+
+	ctrlr = nvme_malloc("nvme_ctrlr", sizeof(struct nvme_controller),
+			    64, &phys_addr);
+	if (ctrlr == NULL) {
+		nvme_printf(NULL, "could not allocate ctrlr\n");
+		return NULL;
+	}
+
+	status = nvme_ctrlr_construct(ctrlr, devhandle);
+	if (status != 0) {
+		nvme_free(ctrlr);
+		return NULL;
+	}
+
+	if (nvme_ctrlr_start(ctrlr) != 0) {
+		nvme_ctrlr_destruct(ctrlr);
+		nvme_free(ctrlr);
+		return NULL;
+	}
+
+	return ctrlr;
+}
+
+int
+nvme_detach(struct nvme_controller *ctrlr)
+{
+	nvme_ctrlr_destruct(ctrlr);
+	nvme_free(ctrlr);
+	return 0;
+}
+
+void
+nvme_completion_poll_cb(void *arg, const struct nvme_completion *cpl)
+{
+	struct nvme_completion_poll_status	*status = arg;
+
+	/*
+	 * Copy status into the argument passed by the caller, so that
+	 *  the caller can check the status to determine if the
+	 *  the request passed or failed.
+	 */
+	memcpy(&status->cpl, cpl, sizeof(*cpl));
+	status->done = true;
+}
+
+size_t
+nvme_request_size(void)
+{
+	return sizeof(struct nvme_request);
+}
+
+struct nvme_request *
+nvme_allocate_request(const struct nvme_payload *payload, uint32_t payload_size,
+		      nvme_cb_fn_t cb_fn, void *cb_arg)
+{
+	struct nvme_request *req = NULL;
+
+	nvme_alloc_request(&req);
+
+	if (req == NULL) {
+		return req;
+	}
+
+	/*
+	 * Only memset up to (but not including) the children
+	 *  TAILQ_ENTRY.  children, and following members, are
+	 *  only used as part of I/O splitting so we avoid
+	 *  memsetting them until it is actually needed.
+	 *  They will be initialized in nvme_request_add_child()
+	 *  if the request is split.
+	 */
+	memset(req, 0, offsetof(struct nvme_request, children));
+	req->cb_fn = cb_fn;
+	req->cb_arg = cb_arg;
+	req->timeout = true;
+	req->parent = NULL;
+	req->payload = *payload;
+	req->payload_size = payload_size;
+
+	return req;
+}
+
+struct nvme_request *
+nvme_allocate_request_contig(void *buffer, uint32_t payload_size, nvme_cb_fn_t cb_fn, void *cb_arg)
+{
+	struct nvme_payload payload;
+
+	payload.type = NVME_PAYLOAD_TYPE_CONTIG;
+	payload.u.contig = buffer;
+
+	return nvme_allocate_request(&payload, payload_size, cb_fn, cb_arg);
+}
+
+struct nvme_request *
+nvme_allocate_request_null(nvme_cb_fn_t cb_fn, void *cb_arg)
+{
+	return nvme_allocate_request_contig(NULL, 0, cb_fn, cb_arg);
+}
+
+void
+nvme_free_request(struct nvme_request *req)
+{
+	nvme_assert(req != NULL, ("nvme_free_request(NULL)\n"));
+	nvme_dealloc_request(req);
+}
+
+static int
+nvme_allocate_ioq_index(void)
+{
+	struct nvme_driver	*driver = &g_nvme_driver;
+	uint32_t		i;
+
+	nvme_mutex_lock(&driver->lock);
+	if (driver->ioq_index_pool == NULL) {
+		driver->ioq_index_pool =
+			calloc(driver->max_io_queues, sizeof(*driver->ioq_index_pool));
+		if (driver->ioq_index_pool) {
+			for (i = 0; i < driver->max_io_queues; i++) {
+				driver->ioq_index_pool[i] = i;
+			}
+		} else {
+			nvme_mutex_unlock(&driver->lock);
+			return -1;
+		}
+		driver->ioq_index_pool_next = 0;
+	}
+
+	if (driver->ioq_index_pool_next < driver->max_io_queues) {
+		nvme_thread_ioq_index = driver->ioq_index_pool[driver->ioq_index_pool_next];
+		driver->ioq_index_pool[driver->ioq_index_pool_next] = -1;
+		driver->ioq_index_pool_next++;
+	} else {
+		nvme_thread_ioq_index = -1;
+	}
+
+	nvme_mutex_unlock(&driver->lock);
+	return 0;
+}
+
+static void
+nvme_free_ioq_index(void)
+{
+	struct nvme_driver	*driver = &g_nvme_driver;
+
+	nvme_mutex_lock(&driver->lock);
+	if (nvme_thread_ioq_index >= 0) {
+		driver->ioq_index_pool_next--;
+		driver->ioq_index_pool[driver->ioq_index_pool_next] = nvme_thread_ioq_index;
+		nvme_thread_ioq_index = -1;
+	}
+	nvme_mutex_unlock(&driver->lock);
+}
+
+int
+nvme_register_io_thread(void)
+{
+	int rc = 0;
+
+	if (nvme_thread_ioq_index >= 0) {
+		nvme_printf(NULL, "thread already registered\n");
+		return -1;
+	}
+
+	rc = nvme_allocate_ioq_index();
+	if (rc) {
+		nvme_printf(NULL, "ioq_index_pool alloc failed\n");
+		return rc;
+	}
+	return (nvme_thread_ioq_index >= 0) ? 0 : -1;
+}
+
+void
+nvme_unregister_io_thread(void)
+{
+	nvme_free_ioq_index();
+}
+
diff --git a/src/spdk/lib/nvme/nvme_ctrlr.c b/src/spdk/lib/nvme/nvme_ctrlr.c
new file mode 100644
index 0000000..4920384
--- /dev/null
+++ b/src/spdk/lib/nvme/nvme_ctrlr.c
@@ -0,0 +1,915 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright (c) Intel Corporation.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "nvme_internal.h"
+#include "spdk/nvme_intel.h"
+/**
+ * \file
+ *
+ */
+
+static int nvme_ctrlr_construct_and_submit_aer(struct nvme_controller *ctrlr,
+		struct nvme_async_event_request *aer);
+
+static void
+nvme_ctrlr_construct_intel_support_log_page_list(struct nvme_controller *ctrlr,
+		struct nvme_intel_log_page_directory *log_page_directory)
+{
+	if (ctrlr->cdata.vid != PCI_VENDOR_ID_INTEL || log_page_directory == NULL)
+		return;
+
+	ctrlr->log_page_supported[NVME_INTEL_LOG_PAGE_DIRECTORY] = true;
+
+	if (log_page_directory->read_latency_log_len) {
+		ctrlr->log_page_supported[NVME_INTEL_LOG_READ_CMD_LATENCY] = true;
+	}
+	if (log_page_directory->write_latency_log_len) {
+		ctrlr->log_page_supported[NVME_INTEL_LOG_WRITE_CMD_LATENCY] = true;
+	}
+	if (log_page_directory->temperature_statistics_log_len) {
+		ctrlr->log_page_supported[NVME_INTEL_LOG_TEMPERATURE] = true;
+	}
+	if (log_page_directory->smart_log_len) {
+		ctrlr->log_page_supported[NVME_INTEL_LOG_SMART] = true;
+	}
+}
+
+static int nvme_ctrlr_set_intel_support_log_pages(struct nvme_controller *ctrlr)
+{
+	uint64_t phys_addr = 0;
+	struct nvme_completion_poll_status	status;
+	struct nvme_intel_log_page_directory *log_page_directory;
+
+	log_page_directory = nvme_malloc("nvme_log_page_directory",
+					 sizeof(struct nvme_intel_log_page_directory),
+					 64, &phys_addr);
+	if (log_page_directory == NULL) {
+		nvme_printf(NULL, "could not allocate log_page_directory\n");
+		return ENXIO;
+	}
+
+	status.done = false;
+	nvme_ctrlr_cmd_get_log_page(ctrlr, NVME_INTEL_LOG_PAGE_DIRECTORY, NVME_GLOBAL_NAMESPACE_TAG,
+				    log_page_directory, sizeof(struct nvme_intel_log_page_directory),
+				    nvme_completion_poll_cb,
+				    &status);
+	while (status.done == false) {
+		nvme_qpair_process_completions(&ctrlr->adminq, 0);
+	}
+	if (nvme_completion_is_error(&status.cpl)) {
+		nvme_free(log_page_directory);
+		nvme_printf(ctrlr, "nvme_ctrlr_cmd_get_log_page failed!\n");
+		return ENXIO;
+	}
+
+	nvme_ctrlr_construct_intel_support_log_page_list(ctrlr, log_page_directory);
+	nvme_free(log_page_directory);
+	return 0;
+}
+
+static void
+nvme_ctrlr_set_supported_log_pages(struct nvme_controller *ctrlr)
+{
+	memset(ctrlr->log_page_supported, 0, sizeof(ctrlr->log_page_supported));
+	/* Mandatory pages */
+	ctrlr->log_page_supported[NVME_LOG_ERROR] = true;
+	ctrlr->log_page_supported[NVME_LOG_HEALTH_INFORMATION] = true;
+	ctrlr->log_page_supported[NVME_LOG_FIRMWARE_SLOT] = true;
+	if (ctrlr->cdata.lpa.celp) {
+		ctrlr->log_page_supported[NVME_LOG_COMMAND_EFFECTS_LOG] = true;
+	}
+	if (ctrlr->cdata.vid == PCI_VENDOR_ID_INTEL) {
+		nvme_ctrlr_set_intel_support_log_pages(ctrlr);
+	}
+}
+
+static void
+nvme_ctrlr_set_intel_supported_features(struct nvme_controller *ctrlr)
+{
+	ctrlr->feature_supported[NVME_INTEL_FEAT_MAX_LBA] = true;
+	ctrlr->feature_supported[NVME_INTEL_FEAT_NATIVE_MAX_LBA] = true;
+	ctrlr->feature_supported[NVME_INTEL_FEAT_POWER_GOVERNOR_SETTING] = true;
+	ctrlr->feature_supported[NVME_INTEL_FEAT_SMBUS_ADDRESS] = true;
+	ctrlr->feature_supported[NVME_INTEL_FEAT_LED_PATTERN] = true;
+	ctrlr->feature_supported[NVME_INTEL_FEAT_RESET_TIMED_WORKLOAD_COUNTERS] = true;
+	ctrlr->feature_supported[NVME_INTEL_FEAT_LATENCY_TRACKING] = true;
+}
+
+static void
+nvme_ctrlr_set_supported_features(struct nvme_controller *ctrlr)
+{
+	memset(ctrlr->feature_supported, 0, sizeof(ctrlr->feature_supported));
+	/* Mandatory features */
+	ctrlr->feature_supported[NVME_FEAT_ARBITRATION] = true;
+	ctrlr->feature_supported[NVME_FEAT_POWER_MANAGEMENT] = true;
+	ctrlr->feature_supported[NVME_FEAT_TEMPERATURE_THRESHOLD] = true;
+	ctrlr->feature_supported[NVME_FEAT_ERROR_RECOVERY] = true;
+	ctrlr->feature_supported[NVME_FEAT_NUMBER_OF_QUEUES] = true;
+	ctrlr->feature_supported[NVME_FEAT_INTERRUPT_COALESCING] = true;
+	ctrlr->feature_supported[NVME_FEAT_INTERRUPT_VECTOR_CONFIGURATION] = true;
+	ctrlr->feature_supported[NVME_FEAT_WRITE_ATOMICITY] = true;
+	ctrlr->feature_supported[NVME_FEAT_ASYNC_EVENT_CONFIGURATION] = true;
+	/* Optional features */
+	if (ctrlr->cdata.vwc.present) {
+		ctrlr->feature_supported[NVME_FEAT_VOLATILE_WRITE_CACHE] = true;
+	}
+	if (ctrlr->cdata.apsta.supported) {
+		ctrlr->feature_supported[NVME_FEAT_AUTONOMOUS_POWER_STATE_TRANSITION] = true;
+	}
+	if (ctrlr->cdata.hmpre) {
+		ctrlr->feature_supported[NVME_FEAT_HOST_MEM_BUFFER] = true;
+	}
+	if (ctrlr->cdata.vid == PCI_VENDOR_ID_INTEL) {
+		nvme_ctrlr_set_intel_supported_features(ctrlr);
+	}
+}
+
+static int
+nvme_ctrlr_construct_admin_qpair(struct nvme_controller *ctrlr)
+{
+	return nvme_qpair_construct(&ctrlr->adminq,
+				    0, /* qpair ID */
+				    NVME_ADMIN_ENTRIES,
+				    NVME_ADMIN_TRACKERS,
+				    ctrlr);
+}
+
+static int
+nvme_ctrlr_construct_io_qpairs(struct nvme_controller *ctrlr)
+{
+	struct nvme_qpair		*qpair;
+	union nvme_cap_lo_register	cap_lo;
+	uint32_t			i, num_entries, num_trackers;
+	int				rc;
+
+	if (ctrlr->ioq != NULL) {
+		/*
+		 * io_qpairs were already constructed, so just return.
+		 *  This typically happens when the controller is
+		 *  initialized a second (or subsequent) time after a
+		 *  controller reset.
+		 */
+		return 0;
+	}
+
+	/*
+	 * NVMe spec sets a hard limit of 64K max entries, but
+	 *  devices may specify a smaller limit, so we need to check
+	 *  the MQES field in the capabilities register.
+	 */
+	cap_lo.raw = nvme_mmio_read_4(ctrlr, cap_lo.raw);
+	num_entries = nvme_min(NVME_IO_ENTRIES, cap_lo.bits.mqes + 1);
+
+	/*
+	 * No need to have more trackers than entries in the submit queue.
+	 *  Note also that for a queue size of N, we can only have (N-1)
+	 *  commands outstanding, hence the "-1" here.
+	 */
+	num_trackers = nvme_min(NVME_IO_TRACKERS, (num_entries - 1));
+
+	ctrlr->max_xfer_size = NVME_MAX_XFER_SIZE;
+
+	ctrlr->ioq = calloc(ctrlr->num_io_queues, sizeof(struct nvme_qpair));
+
+	if (ctrlr->ioq == NULL)
+		return -1;
+
+	for (i = 0; i < ctrlr->num_io_queues; i++) {
+		qpair = &ctrlr->ioq[i];
+
+		/*
+		 * Admin queue has ID=0. IO queues start at ID=1 -
+		 *  hence the 'i+1' here.
+		 */
+		rc = nvme_qpair_construct(qpair,
+					  i + 1, /* qpair ID */
+					  num_entries,
+					  num_trackers,
+					  ctrlr);
+		if (rc)
+			return -1;
+	}
+
+	return 0;
+}
+
+static void
+nvme_ctrlr_fail(struct nvme_controller *ctrlr)
+{
+	uint32_t i;
+
+	ctrlr->is_failed = true;
+	nvme_qpair_fail(&ctrlr->adminq);
+	for (i = 0; i < ctrlr->num_io_queues; i++) {
+		nvme_qpair_fail(&ctrlr->ioq[i]);
+	}
+}
+
+static int
+_nvme_ctrlr_wait_for_ready(struct nvme_controller *ctrlr, int desired_ready_value)
+{
+	int ms_waited, ready_timeout_in_ms;
+	union nvme_csts_register csts;
+	union nvme_cap_lo_register cap_lo;
+
+	/* Get ready timeout value from controller, in units of 500ms. */
+	cap_lo.raw = nvme_mmio_read_4(ctrlr, cap_lo.raw);
+	ready_timeout_in_ms = cap_lo.bits.to * 500;
+
+	csts.raw = nvme_mmio_read_4(ctrlr, csts);
+
+	ms_waited = 0;
+
+	while (csts.bits.rdy != desired_ready_value) {
+		nvme_delay(1000);
+		if (ms_waited++ > ready_timeout_in_ms) {
+			nvme_printf(ctrlr, "controller ready did not become %d "
+				    "within %d ms\n", desired_ready_value, ready_timeout_in_ms);
+			return ENXIO;
+		}
+		csts.raw = nvme_mmio_read_4(ctrlr, csts);
+	}
+
+	return 0;
+}
+
+static int
+nvme_ctrlr_wait_for_ready(struct nvme_controller *ctrlr)
+{
+	union nvme_cc_register cc;
+
+	cc.raw = nvme_mmio_read_4(ctrlr, cc.raw);
+
+	if (!cc.bits.en) {
+		nvme_printf(ctrlr, "%s called with cc.en = 0\n", __func__);
+		return ENXIO;
+	}
+
+	return _nvme_ctrlr_wait_for_ready(ctrlr, 1);
+}
+
+static void
+nvme_ctrlr_disable(struct nvme_controller *ctrlr)
+{
+	union nvme_cc_register cc;
+	union nvme_csts_register csts;
+
+	cc.raw = nvme_mmio_read_4(ctrlr, cc.raw);
+	csts.raw = nvme_mmio_read_4(ctrlr, csts);
+
+	if (cc.bits.en == 1 && csts.bits.rdy == 0) {
+		_nvme_ctrlr_wait_for_ready(ctrlr, 1);
+	}
+
+	cc.bits.en = 0;
+	nvme_mmio_write_4(ctrlr, cc.raw, cc.raw);
+
+	_nvme_ctrlr_wait_for_ready(ctrlr, 0);
+}
+
+static void
+nvme_ctrlr_shutdown(struct nvme_controller *ctrlr)
+{
+	union nvme_cc_register		cc;
+	union nvme_csts_register	csts;
+	int				ms_waited = 0;
+
+	cc.raw = nvme_mmio_read_4(ctrlr, cc.raw);
+	cc.bits.shn = NVME_SHN_NORMAL;
+	nvme_mmio_write_4(ctrlr, cc.raw, cc.raw);
+
+	csts.raw = nvme_mmio_read_4(ctrlr, csts);
+	/*
+	 * The NVMe spec does not define a timeout period
+	 *  for shutdown notification, so we just pick
+	 *  5 seconds as a reasonable amount of time to
+	 *  wait before proceeding.
+	 */
+	while (csts.bits.shst != NVME_SHST_COMPLETE) {
+		nvme_delay(1000);
+		csts.raw = nvme_mmio_read_4(ctrlr, csts);
+		if (ms_waited++ >= 5000)
+			break;
+	}
+	if (csts.bits.shst != NVME_SHST_COMPLETE)
+		nvme_printf(ctrlr, "did not shutdown within 5 seconds\n");
+}
+
+static int
+nvme_ctrlr_enable(struct nvme_controller *ctrlr)
+{
+	union nvme_cc_register		cc;
+	union nvme_csts_register	csts;
+	union nvme_aqa_register		aqa;
+
+	cc.raw = nvme_mmio_read_4(ctrlr, cc.raw);
+	csts.raw = nvme_mmio_read_4(ctrlr, csts);
+
+	if (cc.bits.en == 1) {
+		if (csts.bits.rdy == 1) {
+			return 0;
+		} else {
+			return nvme_ctrlr_wait_for_ready(ctrlr);
+		}
+	}
+
+	nvme_mmio_write_8(ctrlr, asq, ctrlr->adminq.cmd_bus_addr);
+	nvme_mmio_write_8(ctrlr, acq, ctrlr->adminq.cpl_bus_addr);
+
+	aqa.raw = 0;
+	/* acqs and asqs are 0-based. */
+	aqa.bits.acqs = ctrlr->adminq.num_entries - 1;
+	aqa.bits.asqs = ctrlr->adminq.num_entries - 1;
+	nvme_mmio_write_4(ctrlr, aqa.raw, aqa.raw);
+
+	cc.bits.en = 1;
+	cc.bits.css = 0;
+	cc.bits.ams = 0;
+	cc.bits.shn = 0;
+	cc.bits.iosqes = 6; /* SQ entry size == 64 == 2^6 */
+	cc.bits.iocqes = 4; /* CQ entry size == 16 == 2^4 */
+
+	/* Page size is 2 ^ (12 + mps). */
+	cc.bits.mps = nvme_u32log2(PAGE_SIZE) - 12;
+
+	nvme_mmio_write_4(ctrlr, cc.raw, cc.raw);
+
+	return nvme_ctrlr_wait_for_ready(ctrlr);
+}
+
+static int
+nvme_ctrlr_hw_reset(struct nvme_controller *ctrlr)
+{
+	uint32_t i;
+	int rc;
+	union nvme_cc_register cc;
+
+	cc.raw = nvme_mmio_read_4(ctrlr, cc.raw);
+	if (cc.bits.en) {
+		nvme_qpair_disable(&ctrlr->adminq);
+		for (i = 0; i < ctrlr->num_io_queues; i++) {
+			nvme_qpair_disable(&ctrlr->ioq[i]);
+		}
+	} else {
+		/*
+		 * Ensure we do a transition from cc.en==1 to cc.en==0.
+		 *  If we started disabled (cc.en==0), then we have to enable
+		 *  first to get a reset.
+		 */
+		nvme_ctrlr_enable(ctrlr);
+	}
+
+	nvme_ctrlr_disable(ctrlr);
+	rc = nvme_ctrlr_enable(ctrlr);
+
+	return rc;
+}
+
+int
+nvme_ctrlr_reset(struct nvme_controller *ctrlr)
+{
+	int rc;
+
+	nvme_mutex_lock(&ctrlr->ctrlr_lock);
+
+	if (ctrlr->is_resetting || ctrlr->is_failed) {
+		/*
+		 * Controller is already resetting or has failed.  Return
+		 *  immediately since there is no need to kick off another
+		 *  reset in these cases.
+		 */
+		nvme_mutex_unlock(&ctrlr->ctrlr_lock);
+		return 0;
+	}
+
+	ctrlr->is_resetting = true;
+
+	nvme_printf(ctrlr, "resetting controller\n");
+	/* nvme_ctrlr_start() issues a reset as its first step */
+	rc = nvme_ctrlr_start(ctrlr);
+	if (rc) {
+		nvme_ctrlr_fail(ctrlr);
+	}
+
+	ctrlr->is_resetting = false;
+
+	nvme_mutex_unlock(&ctrlr->ctrlr_lock);
+
+	return rc;
+}
+
+static int
+nvme_ctrlr_identify(struct nvme_controller *ctrlr)
+{
+	struct nvme_completion_poll_status	status;
+
+	status.done = false;
+	nvme_ctrlr_cmd_identify_controller(ctrlr, &ctrlr->cdata,
+					   nvme_completion_poll_cb, &status);
+	while (status.done == false) {
+		nvme_qpair_process_completions(&ctrlr->adminq, 0);
+	}
+	if (nvme_completion_is_error(&status.cpl)) {
+		nvme_printf(ctrlr, "nvme_identify_controller failed!\n");
+		return ENXIO;
+	}
+
+	/*
+	 * Use MDTS to ensure our default max_xfer_size doesn't exceed what the
+	 *  controller supports.
+	 */
+	if (ctrlr->cdata.mdts > 0) {
+		ctrlr->max_xfer_size = nvme_min(ctrlr->max_xfer_size,
+						ctrlr->min_page_size * (1 << (ctrlr->cdata.mdts)));
+	}
+
+	return 0;
+}
+
+static int
+nvme_ctrlr_set_num_qpairs(struct nvme_controller *ctrlr)
+{
+	struct nvme_driver			*driver = &g_nvme_driver;
+	struct nvme_completion_poll_status	status;
+	int					cq_allocated, sq_allocated;
+	uint32_t				max_io_queues;
+
+	status.done = false;
+
+	nvme_mutex_lock(&driver->lock);
+	max_io_queues = driver->max_io_queues;
+	nvme_mutex_unlock(&driver->lock);
+
+	nvme_ctrlr_cmd_set_num_queues(ctrlr, max_io_queues,
+				      nvme_completion_poll_cb, &status);
+	while (status.done == false) {
+		nvme_qpair_process_completions(&ctrlr->adminq, 0);
+	}
+	if (nvme_completion_is_error(&status.cpl)) {
+		nvme_printf(ctrlr, "nvme_set_num_queues failed!\n");
+		return ENXIO;
+	}
+
+	/*
+	 * Data in cdw0 is 0-based.
+	 * Lower 16-bits indicate number of submission queues allocated.
+	 * Upper 16-bits indicate number of completion queues allocated.
+	 */
+	sq_allocated = (status.cpl.cdw0 & 0xFFFF) + 1;
+	cq_allocated = (status.cpl.cdw0 >> 16) + 1;
+
+	ctrlr->num_io_queues = nvme_min(sq_allocated, cq_allocated);
+
+	nvme_mutex_lock(&driver->lock);
+	driver->max_io_queues = nvme_min(driver->max_io_queues, ctrlr->num_io_queues);
+	nvme_mutex_unlock(&driver->lock);
+
+	return 0;
+}
+
+static int
+nvme_ctrlr_create_qpairs(struct nvme_controller *ctrlr)
+{
+	struct nvme_completion_poll_status	status;
+	struct nvme_qpair			*qpair;
+	uint32_t				i;
+
+	if (nvme_ctrlr_construct_io_qpairs(ctrlr)) {
+		nvme_printf(ctrlr, "nvme_ctrlr_construct_io_qpairs failed!\n");
+		return ENXIO;
+	}
+
+	for (i = 0; i < ctrlr->num_io_queues; i++) {
+		qpair = &ctrlr->ioq[i];
+
+		status.done = false;
+		nvme_ctrlr_cmd_create_io_cq(ctrlr, qpair,
+					    nvme_completion_poll_cb, &status);
+		while (status.done == false) {
+			nvme_qpair_process_completions(&ctrlr->adminq, 0);
+		}
+		if (nvme_completion_is_error(&status.cpl)) {
+			nvme_printf(ctrlr, "nvme_create_io_cq failed!\n");
+			return ENXIO;
+		}
+
+		status.done = false;
+		nvme_ctrlr_cmd_create_io_sq(qpair->ctrlr, qpair,
+					    nvme_completion_poll_cb, &status);
+		while (status.done == false) {
+			nvme_qpair_process_completions(&ctrlr->adminq, 0);
+		}
+		if (nvme_completion_is_error(&status.cpl)) {
+			nvme_printf(ctrlr, "nvme_create_io_sq failed!\n");
+			return ENXIO;
+		}
+
+		nvme_qpair_reset(qpair);
+	}
+
+	return 0;
+}
+
+static void
+nvme_ctrlr_destruct_namespaces(struct nvme_controller *ctrlr)
+{
+	if (ctrlr->ns) {
+		uint32_t i, num_ns = ctrlr->num_ns;
+
+		for (i = 0; i < num_ns; i++) {
+			nvme_ns_destruct(&ctrlr->ns[i]);
+		}
+
+		free(ctrlr->ns);
+		ctrlr->ns = NULL;
+		ctrlr->num_ns = 0;
+	}
+
+	if (ctrlr->nsdata) {
+		nvme_free(ctrlr->nsdata);
+		ctrlr->nsdata = NULL;
+	}
+}
+
+static int
+nvme_ctrlr_construct_namespaces(struct nvme_controller *ctrlr)
+{
+	uint32_t i, nn = ctrlr->cdata.nn;
+	uint64_t phys_addr = 0;
+
+	if (nn == 0) {
+		nvme_printf(ctrlr, "controller has 0 namespaces\n");
+		return -1;
+	}
+
+	/* ctrlr->num_ns may be 0 (startup) or a different number of namespaces (reset),
+	 * so check if we need to reallocate.
+	 */
+	if (nn != ctrlr->num_ns) {
+		nvme_ctrlr_destruct_namespaces(ctrlr);
+
+		ctrlr->ns = calloc(nn, sizeof(struct nvme_namespace));
+		if (ctrlr->ns == NULL) {
+			goto fail;
+		}
+
+		ctrlr->nsdata = nvme_malloc("nvme_namespaces",
+					    nn * sizeof(struct nvme_namespace_data), 64,
+					    &phys_addr);
+		if (ctrlr->nsdata == NULL) {
+			goto fail;
+		}
+
+		ctrlr->num_ns = nn;
+	}
+
+	for (i = 0; i < nn; i++) {
+		struct nvme_namespace	*ns = &ctrlr->ns[i];
+		uint32_t 		nsid = i + 1;
+
+		if (nvme_ns_construct(ns, nsid, ctrlr) != 0) {
+			goto fail;
+		}
+	}
+
+	return 0;
+
+fail:
+	nvme_ctrlr_destruct_namespaces(ctrlr);
+	return -1;
+}
+
+static void
+nvme_ctrlr_async_event_cb(void *arg, const struct nvme_completion *cpl)
+{
+	struct nvme_async_event_request	*aer = arg;
+	struct nvme_controller		*ctrlr = aer->ctrlr;
+
+	if (cpl->status.sc == NVME_SC_ABORTED_SQ_DELETION) {
+		/*
+		 *  This is simulated when controller is being shut down, to
+		 *  effectively abort outstanding asynchronous event requests
+		 *  and make sure all memory is freed.  Do not repost the
+		 *  request in this case.
+		 */
+		return;
+	}
+
+	if (ctrlr->aer_cb_fn != NULL) {
+		ctrlr->aer_cb_fn(ctrlr->aer_cb_arg, cpl);
+	}
+
+	/*
+	 * Repost another asynchronous event request to replace the one
+	 *  that just completed.
+	 */
+	if (nvme_ctrlr_construct_and_submit_aer(ctrlr, aer)) {
+		/*
+		 * We can't do anything to recover from a failure here,
+		 * so just print a warning message and leave the AER unsubmitted.
+		 */
+		nvme_printf(ctrlr, "resubmitting AER failed!\n");
+	}
+}
+
+static int
+nvme_ctrlr_construct_and_submit_aer(struct nvme_controller *ctrlr,
+				    struct nvme_async_event_request *aer)
+{
+	struct nvme_request *req;
+
+	aer->ctrlr = ctrlr;
+	req = nvme_allocate_request_null(nvme_ctrlr_async_event_cb, aer);
+	aer->req = req;
+	if (req == NULL) {
+		return -1;
+	}
+
+	/*
+	 * Disable timeout here, since asynchronous event requests should by
+	 *  nature never be timed out.
+	 */
+	req->timeout = false;
+	req->cmd.opc = NVME_OPC_ASYNC_EVENT_REQUEST;
+	nvme_ctrlr_submit_admin_request(ctrlr, req);
+
+	return 0;
+}
+
+static int
+nvme_ctrlr_configure_aer(struct nvme_controller *ctrlr)
+{
+	union nvme_critical_warning_state	state;
+	struct nvme_async_event_request		*aer;
+	uint32_t				i;
+	struct nvme_completion_poll_status	status;
+
+	status.done = false;
+
+	state.raw = 0xFF;
+	state.bits.reserved = 0;
+	nvme_ctrlr_cmd_set_async_event_config(ctrlr, state, nvme_completion_poll_cb, &status);
+
+	while (status.done == false) {
+		nvme_qpair_process_completions(&ctrlr->adminq, 0);
+	}
+	if (nvme_completion_is_error(&status.cpl)) {
+		nvme_printf(ctrlr, "nvme_ctrlr_cmd_set_async_event_config failed!\n");
+		return ENXIO;
+	}
+
+	/* aerl is a zero-based value, so we need to add 1 here. */
+	ctrlr->num_aers = nvme_min(NVME_MAX_ASYNC_EVENTS, (ctrlr->cdata.aerl + 1));
+
+	for (i = 0; i < ctrlr->num_aers; i++) {
+		aer = &ctrlr->aer[i];
+		if (nvme_ctrlr_construct_and_submit_aer(ctrlr, aer)) {
+			nvme_printf(ctrlr, "nvme_ctrlr_construct_and_submit_aer failed!\n");
+			return -1;
+		}
+	}
+
+	return 0;
+}
+
+int
+nvme_ctrlr_start(struct nvme_controller *ctrlr)
+{
+	if (nvme_ctrlr_hw_reset(ctrlr) != 0) {
+		return -1;
+	}
+
+	nvme_qpair_reset(&ctrlr->adminq);
+
+	nvme_qpair_enable(&ctrlr->adminq);
+
+	if (nvme_ctrlr_identify(ctrlr) != 0) {
+		return -1;
+	}
+
+	if (nvme_ctrlr_set_num_qpairs(ctrlr) != 0) {
+		return -1;
+	}
+
+	if (nvme_ctrlr_create_qpairs(ctrlr) != 0) {
+		return -1;
+	}
+
+	if (nvme_ctrlr_construct_namespaces(ctrlr) != 0) {
+		return -1;
+	}
+
+	if (nvme_ctrlr_configure_aer(ctrlr) != 0) {
+		return -1;
+	}
+
+	nvme_ctrlr_set_supported_log_pages(ctrlr);
+	nvme_ctrlr_set_supported_features(ctrlr);
+	return 0;
+}
+
+static int
+nvme_ctrlr_allocate_bars(struct nvme_controller *ctrlr)
+{
+	int rc;
+	void *addr;
+
+	rc = nvme_pcicfg_map_bar(ctrlr->devhandle, 0, 0 /* writable */, &addr);
+	ctrlr->regs = (volatile struct nvme_registers *)addr;
+	if ((ctrlr->regs == NULL) || (rc != 0)) {
+		nvme_printf(ctrlr, "pci_device_map_range failed with error code %d\n", rc);
+		return -1;
+	}
+
+	return 0;
+}
+
+static int
+nvme_ctrlr_free_bars(struct nvme_controller *ctrlr)
+{
+	int rc = 0;
+	void *addr = (void *)ctrlr->regs;
+
+	if (addr) {
+		rc = nvme_pcicfg_unmap_bar(ctrlr->devhandle, 0, addr);
+	}
+	return rc;
+}
+
+int
+nvme_ctrlr_construct(struct nvme_controller *ctrlr, void *devhandle)
+{
+	union nvme_cap_hi_register	cap_hi;
+	uint32_t			cmd_reg;
+	int				status;
+	int				rc;
+
+	ctrlr->devhandle = devhandle;
+
+	status = nvme_ctrlr_allocate_bars(ctrlr);
+	if (status != 0) {
+		return status;
+	}
+
+	/* Enable PCI busmaster. */
+	nvme_pcicfg_read32(devhandle, &cmd_reg, 4);
+	cmd_reg |= 0x4;
+	nvme_pcicfg_write32(devhandle, cmd_reg, 4);
+
+	cap_hi.raw = nvme_mmio_read_4(ctrlr, cap_hi.raw);
+
+	/* Doorbell stride is 2 ^ (dstrd + 2),
+	 * but we want multiples of 4, so drop the + 2 */
+	ctrlr->doorbell_stride_u32 = 1 << cap_hi.bits.dstrd;
+
+	ctrlr->min_page_size = 1 << (12 + cap_hi.bits.mpsmin);
+
+	rc = nvme_ctrlr_construct_admin_qpair(ctrlr);
+	if (rc)
+		return rc;
+
+	ctrlr->is_resetting = false;
+	ctrlr->is_failed = false;
+
+	nvme_mutex_init_recursive(&ctrlr->ctrlr_lock);
+
+	return 0;
+}
+
+void
+nvme_ctrlr_destruct(struct nvme_controller *ctrlr)
+{
+	uint32_t	i;
+
+	nvme_ctrlr_disable(ctrlr);
+	nvme_ctrlr_shutdown(ctrlr);
+
+	nvme_ctrlr_destruct_namespaces(ctrlr);
+
+	for (i = 0; i < ctrlr->num_io_queues; i++) {
+		nvme_qpair_destroy(&ctrlr->ioq[i]);
+	}
+
+	free(ctrlr->ioq);
+
+	nvme_qpair_destroy(&ctrlr->adminq);
+
+	nvme_ctrlr_free_bars(ctrlr);
+	nvme_mutex_destroy(&ctrlr->ctrlr_lock);
+}
+
+void
+nvme_ctrlr_submit_admin_request(struct nvme_controller *ctrlr,
+				struct nvme_request *req)
+{
+	nvme_qpair_submit_request(&ctrlr->adminq, req);
+}
+
+void
+nvme_ctrlr_submit_io_request(struct nvme_controller *ctrlr,
+			     struct nvme_request *req)
+{
+	struct nvme_qpair       *qpair;
+
+	nvme_assert(nvme_thread_ioq_index >= 0, ("no ioq_index assigned for thread\n"));
+	qpair = &ctrlr->ioq[nvme_thread_ioq_index];
+
+	nvme_qpair_submit_request(qpair, req);
+}
+
+int32_t
+nvme_ctrlr_process_io_completions(struct nvme_controller *ctrlr, uint32_t max_completions)
+{
+	nvme_assert(nvme_thread_ioq_index >= 0, ("no ioq_index assigned for thread\n"));
+	return nvme_qpair_process_completions(&ctrlr->ioq[nvme_thread_ioq_index], max_completions);
+}
+
+int32_t
+nvme_ctrlr_process_admin_completions(struct nvme_controller *ctrlr)
+{
+	int32_t num_completions;
+
+	nvme_mutex_lock(&ctrlr->ctrlr_lock);
+	num_completions = nvme_qpair_process_completions(&ctrlr->adminq, 0);
+	nvme_mutex_unlock(&ctrlr->ctrlr_lock);
+
+	return num_completions;
+}
+
+const struct nvme_controller_data *
+nvme_ctrlr_get_data(struct nvme_controller *ctrlr)
+{
+
+	return &ctrlr->cdata;
+}
+
+uint32_t
+nvme_ctrlr_get_num_ns(struct nvme_controller *ctrlr)
+{
+	return ctrlr->num_ns;
+}
+
+struct nvme_namespace *
+nvme_ctrlr_get_ns(struct nvme_controller *ctrlr, uint32_t ns_id)
+{
+	if (ns_id < 1 || ns_id > ctrlr->num_ns) {
+		return NULL;
+	}
+
+	return &ctrlr->ns[ns_id - 1];
+}
+
+void
+nvme_ctrlr_register_aer_callback(struct nvme_controller *ctrlr,
+				 nvme_aer_cb_fn_t aer_cb_fn,
+				 void *aer_cb_arg)
+{
+	ctrlr->aer_cb_fn = aer_cb_fn;
+	ctrlr->aer_cb_arg = aer_cb_arg;
+}
+
+bool
+nvme_ctrlr_is_log_page_supported(struct nvme_controller *ctrlr, uint8_t log_page)
+{
+	/* No bounds check necessary, since log_page is uint8_t and log_page_supported has 256 entries */
+	SPDK_STATIC_ASSERT(sizeof(ctrlr->log_page_supported) == 256, "log_page_supported size mismatch");
+	return ctrlr->log_page_supported[log_page];
+}
+
+bool
+nvme_ctrlr_is_feature_supported(struct nvme_controller *ctrlr, uint8_t feature_code)
+{
+	/* No bounds check necessary, since feature_code is uint8_t and feature_supported has 256 entries */
+	SPDK_STATIC_ASSERT(sizeof(ctrlr->feature_supported) == 256, "feature_supported size mismatch");
+	return ctrlr->feature_supported[feature_code];
+}
diff --git a/src/spdk/lib/nvme/nvme_ctrlr_cmd.c b/src/spdk/lib/nvme/nvme_ctrlr_cmd.c
new file mode 100644
index 0000000..564349c
--- /dev/null
+++ b/src/spdk/lib/nvme/nvme_ctrlr_cmd.c
@@ -0,0 +1,294 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright (c) Intel Corporation.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "nvme_internal.h"
+
+int
+nvme_ctrlr_cmd_io_raw(struct nvme_controller *ctrlr,
+		      struct nvme_command *cmd,
+		      void *buf, uint32_t len,
+		      nvme_cb_fn_t cb_fn, void *cb_arg)
+{
+	struct nvme_request	*req;
+
+	req = nvme_allocate_request_contig(buf, len, cb_fn, cb_arg);
+
+	if (req == NULL) {
+		return ENOMEM;
+	}
+
+	memcpy(&req->cmd, cmd, sizeof(req->cmd));
+
+	nvme_ctrlr_submit_io_request(ctrlr, req);
+	return 0;
+}
+
+int
+nvme_ctrlr_cmd_admin_raw(struct nvme_controller *ctrlr,
+			 struct nvme_command *cmd,
+			 void *buf, uint32_t len,
+			 nvme_cb_fn_t cb_fn, void *cb_arg)
+{
+	struct nvme_request	*req;
+
+	nvme_mutex_lock(&ctrlr->ctrlr_lock);
+	req = nvme_allocate_request_contig(buf, len, cb_fn, cb_arg);
+	if (req == NULL) {
+		nvme_mutex_unlock(&ctrlr->ctrlr_lock);
+		return ENOMEM;
+	}
+
+	memcpy(&req->cmd, cmd, sizeof(req->cmd));
+
+	nvme_ctrlr_submit_admin_request(ctrlr, req);
+
+	nvme_mutex_unlock(&ctrlr->ctrlr_lock);
+	return 0;
+}
+
+void
+nvme_ctrlr_cmd_identify_controller(struct nvme_controller *ctrlr, void *payload,
+				   nvme_cb_fn_t cb_fn, void *cb_arg)
+{
+	struct nvme_request *req;
+	struct nvme_command *cmd;
+
+	req = nvme_allocate_request_contig(payload,
+					   sizeof(struct nvme_controller_data),
+					   cb_fn, cb_arg);
+
+	cmd = &req->cmd;
+	cmd->opc = NVME_OPC_IDENTIFY;
+
+	/*
+	 * TODO: create an identify command data structure, which
+	 *  includes this CNS bit in cdw10.
+	 */
+	cmd->cdw10 = 1;
+
+	nvme_ctrlr_submit_admin_request(ctrlr, req);
+}
+
+void
+nvme_ctrlr_cmd_identify_namespace(struct nvme_controller *ctrlr, uint16_t nsid,
+				  void *payload, nvme_cb_fn_t cb_fn, void *cb_arg)
+{
+	struct nvme_request *req;
+	struct nvme_command *cmd;
+
+	req = nvme_allocate_request_contig(payload,
+					   sizeof(struct nvme_namespace_data),
+					   cb_fn, cb_arg);
+
+	cmd = &req->cmd;
+	cmd->opc = NVME_OPC_IDENTIFY;
+
+	/*
+	 * TODO: create an identify command data structure
+	 */
+	cmd->nsid = nsid;
+
+	nvme_ctrlr_submit_admin_request(ctrlr, req);
+}
+
+void
+nvme_ctrlr_cmd_create_io_cq(struct nvme_controller *ctrlr,
+			    struct nvme_qpair *io_que, nvme_cb_fn_t cb_fn,
+			    void *cb_arg)
+{
+	struct nvme_request *req;
+	struct nvme_command *cmd;
+
+	req = nvme_allocate_request_null(cb_fn, cb_arg);
+
+	cmd = &req->cmd;
+	cmd->opc = NVME_OPC_CREATE_IO_CQ;
+
+	/*
+	 * TODO: create a create io completion queue command data
+	 *  structure.
+	 */
+	cmd->cdw10 = ((io_que->num_entries - 1) << 16) | io_que->id;
+	/*
+	 * 0x2 = interrupts enabled
+	 * 0x1 = physically contiguous
+	 */
+	cmd->cdw11 = (io_que->id << 16) | 0x1;
+	cmd->dptr.prp.prp1 = io_que->cpl_bus_addr;
+
+	nvme_ctrlr_submit_admin_request(ctrlr, req);
+}
+
+void
+nvme_ctrlr_cmd_create_io_sq(struct nvme_controller *ctrlr,
+			    struct nvme_qpair *io_que, nvme_cb_fn_t cb_fn, void *cb_arg)
+{
+	struct nvme_request *req;
+	struct nvme_command *cmd;
+
+	req = nvme_allocate_request_null(cb_fn, cb_arg);
+
+	cmd = &req->cmd;
+	cmd->opc = NVME_OPC_CREATE_IO_SQ;
+
+	/*
+	 * TODO: create a create io submission queue command data
+	 *  structure.
+	 */
+	cmd->cdw10 = ((io_que->num_entries - 1) << 16) | io_que->id;
+	/* 0x1 = physically contiguous */
+	cmd->cdw11 = (io_que->id << 16) | 0x1;
+	cmd->dptr.prp.prp1 = io_que->cmd_bus_addr;
+
+	nvme_ctrlr_submit_admin_request(ctrlr, req);
+}
+
+int
+nvme_ctrlr_cmd_set_feature(struct nvme_controller *ctrlr, uint8_t feature,
+			   uint32_t cdw11, uint32_t cdw12, void *payload, uint32_t payload_size,
+			   nvme_cb_fn_t cb_fn, void *cb_arg)
+{
+	struct nvme_request *req;
+	struct nvme_command *cmd;
+
+	nvme_mutex_lock(&ctrlr->ctrlr_lock);
+	req = nvme_allocate_request_null(cb_fn, cb_arg);
+	if (req == NULL) {
+		nvme_mutex_unlock(&ctrlr->ctrlr_lock);
+		return ENOMEM;
+	}
+
+	cmd = &req->cmd;
+	cmd->opc = NVME_OPC_SET_FEATURES;
+	cmd->cdw10 = feature;
+	cmd->cdw11 = cdw11;
+	cmd->cdw12 = cdw12;
+
+	nvme_ctrlr_submit_admin_request(ctrlr, req);
+	nvme_mutex_unlock(&ctrlr->ctrlr_lock);
+
+	return 0;
+}
+
+int
+nvme_ctrlr_cmd_get_feature(struct nvme_controller *ctrlr, uint8_t feature,
+			   uint32_t cdw11, void *payload, uint32_t payload_size,
+			   nvme_cb_fn_t cb_fn, void *cb_arg)
+{
+	struct nvme_request *req;
+	struct nvme_command *cmd;
+
+	nvme_mutex_lock(&ctrlr->ctrlr_lock);
+	req = nvme_allocate_request_null(cb_fn, cb_arg);
+	if (req == NULL) {
+		nvme_mutex_unlock(&ctrlr->ctrlr_lock);
+		return ENOMEM;
+	}
+
+	cmd = &req->cmd;
+	cmd->opc = NVME_OPC_GET_FEATURES;
+	cmd->cdw10 = feature;
+	cmd->cdw11 = cdw11;
+
+	nvme_ctrlr_submit_admin_request(ctrlr, req);
+	nvme_mutex_unlock(&ctrlr->ctrlr_lock);
+
+	return 0;
+}
+
+void
+nvme_ctrlr_cmd_set_num_queues(struct nvme_controller *ctrlr,
+			      uint32_t num_queues, nvme_cb_fn_t cb_fn, void *cb_arg)
+{
+	uint32_t cdw11;
+
+	cdw11 = ((num_queues - 1) << 16) | (num_queues - 1);
+	nvme_ctrlr_cmd_set_feature(ctrlr, NVME_FEAT_NUMBER_OF_QUEUES, cdw11, 0,
+				   NULL, 0, cb_fn, cb_arg);
+}
+
+void
+nvme_ctrlr_cmd_set_async_event_config(struct nvme_controller *ctrlr,
+				      union nvme_critical_warning_state state, nvme_cb_fn_t cb_fn,
+				      void *cb_arg)
+{
+	uint32_t cdw11;
+
+	cdw11 = state.raw;
+	nvme_ctrlr_cmd_set_feature(ctrlr,
+				   NVME_FEAT_ASYNC_EVENT_CONFIGURATION, cdw11, 0, NULL, 0, cb_fn,
+				   cb_arg);
+}
+
+int
+nvme_ctrlr_cmd_get_log_page(struct nvme_controller *ctrlr, uint8_t log_page,
+			    uint32_t nsid, void *payload, uint32_t payload_size, nvme_cb_fn_t cb_fn,
+			    void *cb_arg)
+{
+	struct nvme_request *req;
+	struct nvme_command *cmd;
+
+	nvme_mutex_lock(&ctrlr->ctrlr_lock);
+	req = nvme_allocate_request_contig(payload, payload_size, cb_fn, cb_arg);
+	if (req == NULL) {
+		nvme_mutex_unlock(&ctrlr->ctrlr_lock);
+		return ENOMEM;
+	}
+
+	cmd = &req->cmd;
+	cmd->opc = NVME_OPC_GET_LOG_PAGE;
+	cmd->nsid = nsid;
+	cmd->cdw10 = ((payload_size / sizeof(uint32_t)) - 1) << 16;
+	cmd->cdw10 |= log_page;
+
+	nvme_ctrlr_submit_admin_request(ctrlr, req);
+	nvme_mutex_unlock(&ctrlr->ctrlr_lock);
+
+	return 0;
+}
+
+void
+nvme_ctrlr_cmd_abort(struct nvme_controller *ctrlr, uint16_t cid,
+		     uint16_t sqid, nvme_cb_fn_t cb_fn, void *cb_arg)
+{
+	struct nvme_request *req;
+	struct nvme_command *cmd;
+
+	req = nvme_allocate_request_null(cb_fn, cb_arg);
+
+	cmd = &req->cmd;
+	cmd->opc = NVME_OPC_ABORT;
+	cmd->cdw10 = (cid << 16) | sqid;
+
+	nvme_ctrlr_submit_admin_request(ctrlr, req);
+}
diff --git a/src/spdk/lib/nvme/nvme_impl.h b/src/spdk/lib/nvme/nvme_impl.h
new file mode 100644
index 0000000..9c580c9
--- /dev/null
+++ b/src/spdk/lib/nvme/nvme_impl.h
@@ -0,0 +1,171 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright (c) Intel Corporation.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __NVME_IMPL_H__
+#define __NVME_IMPL_H__
+
+#include "spdk/vtophys.h"
+#include <assert.h>
+#include <pciaccess.h>
+#include <rte_malloc.h>
+#include <rte_config.h>
+#include <rte_mempool.h>
+#include <rte_memcpy.h>
+
+/**
+ * \file
+ *
+ * This file describes the callback functions required to integrate
+ * the userspace NVMe driver for a specific implementation.  This
+ * implementation is specific for DPDK for Storage.  Users would
+ * revise it as necessary for their own particular environment if not
+ * using it within the DPDK for Storage framework.
+ */
+
+/**
+ * \page nvme_driver_integration NVMe Driver Integration
+ *
+ * Users can integrate the userspace NVMe driver into their environment
+ * by implementing the callbacks in nvme_impl.h.  These callbacks
+ * enable users to specify how to allocate pinned and physically
+ * contiguous memory, performance virtual to physical address
+ * translations, log messages, PCI configuration and register mapping,
+ * and a number of other facilities that may differ depending on the
+ * environment.
+ */
+
+/**
+ * Allocate a pinned, physically contiguous memory buffer with the
+ *   given size and alignment.
+ * Note: these calls are only made during driver initialization.  Per
+ *   I/O allocations during driver operation use the nvme_alloc_request
+ *   callback.
+ */
+static inline void *
+nvme_malloc(const char *tag, size_t size, unsigned align, uint64_t *phys_addr)
+{
+	void *buf = rte_zmalloc(tag, size, align);
+	*phys_addr = rte_malloc_virt2phy(buf);
+	return buf;
+}
+
+/**
+ * Free a memory buffer previously allocated with nvme_malloc.
+ */
+#define nvme_free(buf)			rte_free(buf)
+
+/**
+ * Log or print a message from the NVMe driver.
+ */
+#define nvme_printf(ctrlr, fmt, args...) printf(fmt, ##args)
+
+/**
+ * Assert a condition and panic/abort as desired.  Failures of these
+ *  assertions indicate catastrophic failures within the driver.
+ */
+#define nvme_assert(check, str) assert(check)
+
+/**
+ * Return the physical address for the specified virtual address.
+ */
+#define nvme_vtophys(buf)		vtophys(buf)
+#define NVME_VTOPHYS_ERROR		VTOPHYS_ERROR
+
+extern struct rte_mempool *request_mempool;
+
+/**
+ * Return a buffer for an nvme_request object.  These objects are allocated
+ *  for each I/O.  They do not need to be pinned nor physically contiguous.
+ */
+#define nvme_alloc_request(bufp)	rte_mempool_get(request_mempool, (void **)(bufp));
+
+/**
+ * Free a buffer previously allocated with nvme_alloc_request().
+ */
+#define nvme_dealloc_request(buf)	rte_mempool_put(request_mempool, buf)
+
+/**
+ *
+ */
+#define nvme_pcicfg_read32(handle, var, offset)  pci_device_cfg_read_u32(handle, var, offset)
+#define nvme_pcicfg_write32(handle, var, offset) pci_device_cfg_write_u32(handle, var, offset)
+
+static inline int
+nvme_pcicfg_map_bar(void *devhandle, uint32_t bar, uint32_t read_only, void **mapped_addr)
+{
+	struct pci_device *dev = devhandle;
+	uint32_t flags = (read_only ? 0 : PCI_DEV_MAP_FLAG_WRITABLE);
+
+	return pci_device_map_range(dev, dev->regions[bar].base_addr, dev->regions[bar].size,
+				    flags, mapped_addr);
+}
+
+static inline int
+nvme_pcicfg_unmap_bar(void *devhandle, uint32_t bar, void *addr)
+{
+	struct pci_device *dev = devhandle;
+
+	return pci_device_unmap_range(dev, addr, dev->regions[bar].size);
+}
+
+typedef pthread_mutex_t nvme_mutex_t;
+
+#define nvme_mutex_init(x) pthread_mutex_init((x), NULL)
+#define nvme_mutex_destroy(x) pthread_mutex_destroy((x))
+#define nvme_mutex_lock pthread_mutex_lock
+#define nvme_mutex_unlock pthread_mutex_unlock
+#define NVME_MUTEX_INITIALIZER PTHREAD_MUTEX_INITIALIZER
+
+static inline int
+nvme_mutex_init_recursive(nvme_mutex_t *mtx)
+{
+	pthread_mutexattr_t attr;
+	int rc = 0;
+
+	if (pthread_mutexattr_init(&attr)) {
+		return -1;
+	}
+	if (pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_RECURSIVE) ||
+	    pthread_mutex_init(mtx, &attr)) {
+		rc = -1;
+	}
+	pthread_mutexattr_destroy(&attr);
+	return rc;
+}
+
+/**
+ * Copy a struct nvme_command from one memory location to another.
+ */
+#define nvme_copy_command(dst, src)	rte_memcpy((dst), (src), sizeof(struct nvme_command))
+
+#endif /* __NVME_IMPL_H__ */
diff --git a/src/spdk/lib/nvme/nvme_internal.h b/src/spdk/lib/nvme/nvme_internal.h
new file mode 100644
index 0000000..338e531
--- /dev/null
+++ b/src/spdk/lib/nvme/nvme_internal.h
@@ -0,0 +1,444 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright (c) Intel Corporation.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __NVME_INTERNAL_H__
+#define __NVME_INTERNAL_H__
+
+#include <errno.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdbool.h>
+#include <string.h>
+#include <time.h>
+#include <unistd.h>
+#include <x86intrin.h>
+
+#include <sys/user.h>
+
+#include "spdk/nvme.h"
+
+#include "spdk/queue.h"
+#include "spdk/barrier.h"
+#include "spdk/mmio.h"
+#include "spdk/pci_ids.h"
+#include "spdk/nvme_intel.h"
+
+#define NVME_MAX_PRP_LIST_ENTRIES	(32)
+
+/*
+ * For commands requiring more than 2 PRP entries, one PRP will be
+ *  embedded in the command (prp1), and the rest of the PRP entries
+ *  will be in a list pointed to by the command (prp2).  This means
+ *  that real max number of PRP entries we support is 32+1, which
+ *  results in a max xfer size of 32*PAGE_SIZE.
+ */
+#define NVME_MAX_XFER_SIZE	NVME_MAX_PRP_LIST_ENTRIES * PAGE_SIZE
+
+#define NVME_ADMIN_TRACKERS	(16)
+#define NVME_ADMIN_ENTRIES	(128)
+/* min and max are defined in admin queue attributes section of spec */
+#define NVME_MIN_ADMIN_ENTRIES	(2)
+#define NVME_MAX_ADMIN_ENTRIES	(4096)
+
+/*
+ * NVME_IO_ENTRIES defines the size of an I/O qpair's submission and completion
+ *  queues, while NVME_IO_TRACKERS defines the maximum number of I/O that we
+ *  will allow outstanding on an I/O qpair at any time.  The only advantage in
+ *  having IO_ENTRIES > IO_TRACKERS is for debugging purposes - when dumping
+ *  the contents of the submission and completion queues, it will show a longer
+ *  history of data.
+ */
+#define NVME_IO_ENTRIES		(256)
+#define NVME_IO_TRACKERS	(128)
+#define NVME_MIN_IO_TRACKERS	(4)
+#define NVME_MAX_IO_TRACKERS	(1024)
+
+/*
+ * NVME_MAX_IO_ENTRIES is not defined, since it is specified in CC.MQES
+ *  for each controller.
+ */
+
+#define NVME_MAX_ASYNC_EVENTS	(8)
+
+#define NVME_MIN_TIMEOUT_PERIOD		(5)
+#define NVME_MAX_TIMEOUT_PERIOD		(120)
+
+/* Maximum log page size to fetch for AERs. */
+#define NVME_MAX_AER_LOG_SIZE		(4096)
+
+/*
+ * NVME_MAX_IO_QUEUES in nvme_spec.h defines the 64K spec-limit, but this
+ *  define specifies the maximum number of queues this driver will actually
+ *  try to configure, if available.
+ */
+#define DEFAULT_MAX_IO_QUEUES		(1024)
+
+enum nvme_payload_type {
+	NVME_PAYLOAD_TYPE_INVALID = 0,
+
+	/** nvme_request::u.payload.contig_buffer is valid for this request */
+	NVME_PAYLOAD_TYPE_CONTIG,
+
+	/** nvme_request::u.sgl is valid for this request */
+	NVME_PAYLOAD_TYPE_SGL,
+};
+
+/**
+ * Descriptor for a request data payload.
+ *
+ * This struct is arranged so that it fits nicely in struct nvme_request.
+ */
+struct __attribute__((packed)) nvme_payload {
+	union {
+		/** Virtual memory address of a single physically contiguous buffer */
+		void *contig;
+
+		/**
+		 * Functions for retrieving physical addresses for scattered payloads.
+		 */
+		struct {
+			nvme_req_reset_sgl_fn_t reset_sgl_fn;
+			nvme_req_next_sge_fn_t next_sge_fn;
+		} sgl;
+	} u;
+
+	/** \ref nvme_payload_type */
+	uint8_t type;
+};
+
+struct nvme_request {
+	struct nvme_command		cmd;
+
+	/**
+	 * Data payload for this request's command.
+	 */
+	struct nvme_payload		payload;
+
+	uint8_t				timeout;
+	uint8_t				retries;
+
+	/**
+	 * Number of children requests still outstanding for this
+	 *  request which was split into multiple child requests.
+	 */
+	uint8_t				num_children;
+	uint32_t			payload_size;
+
+	/**
+	 * Offset in bytes from the beginning of payload for this request.
+	 * This is used for I/O commands that are split into multiple requests.
+	 */
+	uint32_t			payload_offset;
+
+	nvme_cb_fn_t			cb_fn;
+	void				*cb_arg;
+	STAILQ_ENTRY(nvme_request)	stailq;
+
+	/**
+	 * The following members should not be reordered with members
+	 *  above.  These members are only needed when splitting
+	 *  requests which is done rarely, and the driver is careful
+	 *  to not touch the following fields until a split operation is
+	 *  needed, to avoid touching an extra cacheline.
+	 */
+
+	/**
+	 * Points to the outstanding child requests for a parent request.
+	 *  Only valid if a request was split into multiple children
+	 *  requests, and is not initialized for non-split requests.
+	 */
+	TAILQ_HEAD(, nvme_request)	children;
+
+	/**
+	 * Linked-list pointers for a child request in its parent's list.
+	 */
+	TAILQ_ENTRY(nvme_request)	child_tailq;
+
+	/**
+	 * Points to a parent request if part of a split request,
+	 *   NULL otherwise.
+	 */
+	struct nvme_request		*parent;
+
+	/**
+	 * Completion status for a parent request.  Initialized to all 0's
+	 *  (SUCCESS) before child requests are submitted.  If a child
+	 *  request completes with error, the error status is copied here,
+	 *  to ensure that the parent request is also completed with error
+	 *  status once all child requests are completed.
+	 */
+	struct nvme_completion		parent_status;
+};
+
+struct nvme_completion_poll_status {
+	struct nvme_completion	cpl;
+	bool			done;
+};
+
+struct nvme_async_event_request {
+	struct nvme_controller		*ctrlr;
+	struct nvme_request		*req;
+	struct nvme_completion		cpl;
+};
+
+struct nvme_tracker {
+	LIST_ENTRY(nvme_tracker)	list;
+
+	struct nvme_request		*req;
+	uint16_t			cid;
+
+	uint64_t			prp_bus_addr;
+	uint64_t			prp[NVME_MAX_PRP_LIST_ENTRIES];
+};
+
+struct nvme_qpair {
+	volatile uint32_t		*sq_tdbl;
+	volatile uint32_t		*cq_hdbl;
+
+	/**
+	 * Submission queue
+	 */
+	struct nvme_command		*cmd;
+
+	/**
+	 * Completion queue
+	 */
+	struct nvme_completion		*cpl;
+
+	LIST_HEAD(, nvme_tracker)	free_tr;
+	LIST_HEAD(, nvme_tracker)	outstanding_tr;
+
+	STAILQ_HEAD(, nvme_request)	queued_req;
+
+	struct nvme_tracker		**act_tr;
+
+	uint16_t			id;
+
+	uint16_t			num_entries;
+	uint16_t			sq_tail;
+	uint16_t			cq_head;
+
+	uint8_t				phase;
+
+	bool				is_enabled;
+
+	/*
+	 * Fields below this point should not be touched on the normal I/O happy path.
+	 */
+	struct nvme_controller		*ctrlr;
+
+	uint64_t			cmd_bus_addr;
+	uint64_t			cpl_bus_addr;
+};
+
+struct nvme_namespace {
+	struct nvme_controller		*ctrlr;
+	uint32_t			stripe_size;
+	uint32_t			sector_size;
+	uint32_t			sectors_per_max_io;
+	uint32_t			sectors_per_stripe;
+	uint16_t			id;
+	uint16_t			flags;
+};
+
+/*
+ * One of these per allocated PCI device.
+ */
+struct nvme_controller {
+	/* Hot data (accessed in I/O path) starts here. */
+
+	/** NVMe MMIO register space */
+	volatile struct nvme_registers	*regs;
+
+	/** I/O queue pairs */
+	struct nvme_qpair		*ioq;
+
+	/** Array of namespaces indexed by nsid - 1 */
+	struct nvme_namespace		*ns;
+
+	uint32_t			num_ns;
+
+	bool				is_resetting;
+
+	bool				is_failed;
+
+	/* Cold data (not accessed in normal I/O path) is after this point. */
+
+	/** All the log pages supported */
+	bool				log_page_supported[256];
+
+	/** All the features supported */
+	bool				feature_supported[256];
+
+	/* Opaque handle to associated PCI device. */
+	void				*devhandle;
+
+	uint32_t			num_io_queues;
+
+	/** maximum i/o size in bytes */
+	uint32_t			max_xfer_size;
+
+	/** minimum page size supported by this controller in bytes */
+	uint32_t			min_page_size;
+
+	/** stride in uint32_t units between doorbell registers (1 = 4 bytes, 2 = 8 bytes, ...) */
+	uint32_t			doorbell_stride_u32;
+
+	uint32_t			num_aers;
+	struct nvme_async_event_request	aer[NVME_MAX_ASYNC_EVENTS];
+	nvme_aer_cb_fn_t		aer_cb_fn;
+	void				*aer_cb_arg;
+
+	/** guards access to the controller itself, including admin queues */
+	nvme_mutex_t			ctrlr_lock;
+
+
+	struct nvme_qpair		adminq;
+
+	/**
+	 * Identify Controller data.
+	 */
+	struct nvme_controller_data	cdata;
+
+	/**
+	 * Array of Identify Namespace data.
+	 *
+	 * Stored separately from ns since nsdata should not normally be accessed during I/O.
+	 */
+	struct nvme_namespace_data	*nsdata;
+};
+
+extern __thread int nvme_thread_ioq_index;
+
+struct nvme_driver {
+	nvme_mutex_t	lock;
+	uint16_t	*ioq_index_pool;
+	uint32_t	max_io_queues;
+	uint16_t	ioq_index_pool_next;
+};
+
+extern struct nvme_driver g_nvme_driver;
+
+#define nvme_min(a,b) (((a)<(b))?(a):(b))
+
+#define INTEL_DC_P3X00_DEVID	0x09538086
+
+#define nvme_mmio_read_4(sc, reg) \
+	spdk_mmio_read_4(&(sc)->regs->reg)
+
+#define nvme_mmio_write_4(sc, reg, val) \
+	spdk_mmio_write_4(&(sc)->regs->reg, val)
+
+#define nvme_mmio_write_8(sc, reg, val) \
+	spdk_mmio_write_8(&(sc)->regs->reg, val)
+
+#define nvme_delay		usleep
+
+static inline uint32_t
+nvme_u32log2(uint32_t x)
+{
+	if (x == 0) {
+		/* __builtin_clz(0) is undefined, so just bail */
+		return 0;
+	}
+	return 31u - __builtin_clz(x);
+}
+
+static inline uint32_t
+nvme_align32pow2(uint32_t x)
+{
+	return 1u << (1 + nvme_u32log2(x - 1));
+}
+
+/* Admin functions */
+void	nvme_ctrlr_cmd_identify_controller(struct nvme_controller *ctrlr,
+		void *payload,
+		nvme_cb_fn_t cb_fn, void *cb_arg);
+void	nvme_ctrlr_cmd_identify_namespace(struct nvme_controller *ctrlr,
+		uint16_t nsid, void *payload,
+		nvme_cb_fn_t cb_fn, void *cb_arg);
+void	nvme_ctrlr_cmd_create_io_cq(struct nvme_controller *ctrlr,
+				    struct nvme_qpair *io_que,
+				    nvme_cb_fn_t cb_fn, void *cb_arg);
+void	nvme_ctrlr_cmd_create_io_sq(struct nvme_controller *ctrlr,
+				    struct nvme_qpair *io_que,
+				    nvme_cb_fn_t cb_fn, void *cb_arg);
+void	nvme_ctrlr_cmd_set_num_queues(struct nvme_controller *ctrlr,
+				      uint32_t num_queues, nvme_cb_fn_t cb_fn,
+				      void *cb_arg);
+void	nvme_ctrlr_cmd_set_async_event_config(struct nvme_controller *ctrlr,
+		union nvme_critical_warning_state state,
+		nvme_cb_fn_t cb_fn, void *cb_arg);
+void	nvme_ctrlr_cmd_abort(struct nvme_controller *ctrlr, uint16_t cid,
+			     uint16_t sqid, nvme_cb_fn_t cb_fn, void *cb_arg);
+
+void	nvme_completion_poll_cb(void *arg, const struct nvme_completion *cpl);
+
+int	nvme_ctrlr_construct(struct nvme_controller *ctrlr, void *devhandle);
+void	nvme_ctrlr_destruct(struct nvme_controller *ctrlr);
+int	nvme_ctrlr_start(struct nvme_controller *ctrlr);
+
+void	nvme_ctrlr_submit_admin_request(struct nvme_controller *ctrlr,
+					struct nvme_request *req);
+void	nvme_ctrlr_submit_io_request(struct nvme_controller *ctrlr,
+				     struct nvme_request *req);
+void	nvme_ctrlr_post_failed_request(struct nvme_controller *ctrlr,
+				       struct nvme_request *req);
+
+int	nvme_qpair_construct(struct nvme_qpair *qpair, uint16_t id,
+			     uint16_t num_entries,
+			     uint16_t num_trackers,
+			     struct nvme_controller *ctrlr);
+void	nvme_qpair_destroy(struct nvme_qpair *qpair);
+void	nvme_qpair_enable(struct nvme_qpair *qpair);
+void	nvme_qpair_disable(struct nvme_qpair *qpair);
+int32_t	nvme_qpair_process_completions(struct nvme_qpair *qpair, uint32_t max_completions);
+void	nvme_qpair_submit_request(struct nvme_qpair *qpair,
+				  struct nvme_request *req);
+void	nvme_qpair_reset(struct nvme_qpair *qpair);
+void	nvme_qpair_fail(struct nvme_qpair *qpair);
+
+int	nvme_ns_construct(struct nvme_namespace *ns, uint16_t id,
+			  struct nvme_controller *ctrlr);
+void	nvme_ns_destruct(struct nvme_namespace *ns);
+
+struct nvme_request *nvme_allocate_request(const struct nvme_payload *payload,
+		uint32_t payload_size, nvme_cb_fn_t cb_fn, void *cb_arg);
+struct nvme_request *nvme_allocate_request_null(nvme_cb_fn_t cb_fn, void *cb_arg);
+struct nvme_request *nvme_allocate_request_contig(void *buffer, uint32_t payload_size,
+		nvme_cb_fn_t cb_fn, void *cb_arg);
+void	nvme_free_request(struct nvme_request *req);
+
+#endif /* __NVME_INTERNAL_H__ */
diff --git a/src/spdk/lib/nvme/nvme_ns.c b/src/spdk/lib/nvme/nvme_ns.c
new file mode 100644
index 0000000..6e2ff90
--- /dev/null
+++ b/src/spdk/lib/nvme/nvme_ns.c
@@ -0,0 +1,139 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright (c) Intel Corporation.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "nvme_internal.h"
+
+static inline struct nvme_namespace_data *
+_nvme_ns_get_data(struct nvme_namespace *ns)
+{
+	return &ns->ctrlr->nsdata[ns->id - 1];
+}
+
+uint32_t
+nvme_ns_get_id(struct nvme_namespace *ns)
+{
+	return ns->id;
+}
+
+uint32_t
+nvme_ns_get_max_io_xfer_size(struct nvme_namespace *ns)
+{
+	return ns->ctrlr->max_xfer_size;
+}
+
+uint32_t
+nvme_ns_get_sector_size(struct nvme_namespace *ns)
+{
+	return ns->sector_size;
+}
+
+uint64_t
+nvme_ns_get_num_sectors(struct nvme_namespace *ns)
+{
+	return _nvme_ns_get_data(ns)->nsze;
+}
+
+uint64_t
+nvme_ns_get_size(struct nvme_namespace *ns)
+{
+	return nvme_ns_get_num_sectors(ns) * nvme_ns_get_sector_size(ns);
+}
+
+uint32_t
+nvme_ns_get_flags(struct nvme_namespace *ns)
+{
+	return ns->flags;
+}
+
+const struct nvme_namespace_data *
+nvme_ns_get_data(struct nvme_namespace *ns)
+{
+	return _nvme_ns_get_data(ns);
+}
+
+int
+nvme_ns_construct(struct nvme_namespace *ns, uint16_t id,
+		  struct nvme_controller *ctrlr)
+{
+	struct nvme_completion_poll_status	status;
+	struct nvme_namespace_data		*nsdata;
+	uint32_t				pci_devid;
+
+	nvme_assert(id > 0, ("invalid namespace id %d", id));
+
+	ns->ctrlr = ctrlr;
+	ns->id = id;
+	ns->stripe_size = 0;
+
+	nvme_pcicfg_read32(ctrlr->devhandle, &pci_devid, 0);
+	if (pci_devid == INTEL_DC_P3X00_DEVID && ctrlr->cdata.vs[3] != 0) {
+		ns->stripe_size = (1 << ctrlr->cdata.vs[3]) * ctrlr->min_page_size;
+	}
+
+	nsdata = _nvme_ns_get_data(ns);
+
+	status.done = false;
+	nvme_ctrlr_cmd_identify_namespace(ctrlr, id, nsdata,
+					  nvme_completion_poll_cb, &status);
+	while (status.done == false) {
+		nvme_qpair_process_completions(&ctrlr->adminq, 0);
+	}
+	if (nvme_completion_is_error(&status.cpl)) {
+		nvme_printf(ctrlr, "nvme_identify_namespace failed\n");
+		return ENXIO;
+	}
+
+	ns->sector_size = 1 << nsdata->lbaf[nsdata->flbas.format].lbads;
+
+	ns->sectors_per_max_io = nvme_ns_get_max_io_xfer_size(ns) / ns->sector_size;
+	ns->sectors_per_stripe = ns->stripe_size / ns->sector_size;
+
+	if (ctrlr->cdata.oncs.dsm) {
+		ns->flags |= NVME_NS_DEALLOCATE_SUPPORTED;
+	}
+
+	if (ctrlr->cdata.vwc.present) {
+		ns->flags |= NVME_NS_FLUSH_SUPPORTED;
+	}
+
+	if (nsdata->nsrescap.raw) {
+		ns->flags |= NVME_NS_RESERVATION_SUPPORTED;
+	}
+
+	return 0;
+}
+
+void nvme_ns_destruct(struct nvme_namespace *ns)
+{
+
+}
diff --git a/src/spdk/lib/nvme/nvme_ns_cmd.c b/src/spdk/lib/nvme/nvme_ns_cmd.c
new file mode 100644
index 0000000..0ca5f60
--- /dev/null
+++ b/src/spdk/lib/nvme/nvme_ns_cmd.c
@@ -0,0 +1,439 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright (c) Intel Corporation.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "nvme_internal.h"
+
+/**
+ * \file
+ *
+ */
+
+static struct nvme_request *_nvme_ns_cmd_rw(struct nvme_namespace *ns,
+		const struct nvme_payload *payload, uint64_t lba,
+		uint32_t lba_count, nvme_cb_fn_t cb_fn,
+		void *cb_arg, uint32_t opc, uint32_t io_flags);
+
+static void
+nvme_cb_complete_child(void *child_arg, const struct nvme_completion *cpl)
+{
+	struct nvme_request *child = child_arg;
+	struct nvme_request *parent = child->parent;
+
+	parent->num_children--;
+	TAILQ_REMOVE(&parent->children, child, child_tailq);
+
+	if (nvme_completion_is_error(cpl)) {
+		memcpy(&parent->parent_status, cpl, sizeof(*cpl));
+	}
+
+	if (parent->num_children == 0) {
+		if (parent->cb_fn) {
+			parent->cb_fn(parent->cb_arg, &parent->parent_status);
+		}
+		nvme_free_request(parent);
+	}
+}
+
+static void
+nvme_request_add_child(struct nvme_request *parent, struct nvme_request *child)
+{
+	if (parent->num_children == 0) {
+		/*
+		 * Defer initialization of the children TAILQ since it falls
+		 *  on a separate cacheline.  This ensures we do not touch this
+		 *  cacheline except on request splitting cases, which are
+		 *  relatively rare.
+		 */
+		TAILQ_INIT(&parent->children);
+		parent->parent = NULL;
+		memset(&parent->parent_status, 0, sizeof(struct nvme_completion));
+	}
+
+	parent->num_children++;
+	TAILQ_INSERT_TAIL(&parent->children, child, child_tailq);
+	child->parent = parent;
+	child->cb_fn = nvme_cb_complete_child;
+	child->cb_arg = child;
+}
+
+static struct nvme_request *
+_nvme_ns_cmd_split_request(struct nvme_namespace *ns,
+			   const struct nvme_payload *payload,
+			   uint64_t lba, uint32_t lba_count,
+			   nvme_cb_fn_t cb_fn, void *cb_arg, uint32_t opc,
+			   uint32_t io_flags, struct nvme_request *req,
+			   uint32_t sectors_per_max_io, uint32_t sector_mask)
+{
+	uint32_t		sector_size = ns->sector_size;
+	uint32_t		remaining_lba_count = lba_count;
+	uint32_t		offset = 0;
+	struct nvme_request	*child;
+
+	while (remaining_lba_count > 0) {
+		lba_count = sectors_per_max_io - (lba & sector_mask);
+		lba_count = nvme_min(remaining_lba_count, lba_count);
+
+		child = _nvme_ns_cmd_rw(ns, payload, lba, lba_count, cb_fn,
+					cb_arg, opc, io_flags);
+		if (child == NULL) {
+			nvme_free_request(req);
+			return NULL;
+		}
+		child->payload_offset = offset;
+		nvme_request_add_child(req, child);
+		remaining_lba_count -= lba_count;
+		lba += lba_count;
+		offset += lba_count * sector_size;
+	}
+
+	return req;
+}
+
+static struct nvme_request *
+_nvme_ns_cmd_rw(struct nvme_namespace *ns, const struct nvme_payload *payload,
+		uint64_t lba, uint32_t lba_count, nvme_cb_fn_t cb_fn, void *cb_arg, uint32_t opc,
+		uint32_t io_flags)
+{
+	struct nvme_request	*req;
+	struct nvme_command	*cmd;
+	uint64_t		*tmp_lba;
+	uint32_t		sector_size;
+	uint32_t		sectors_per_max_io;
+	uint32_t		sectors_per_stripe;
+
+	if (io_flags & 0xFFFF) {
+		/* The bottom 16 bits must be empty */
+		return NULL;
+	}
+
+	sector_size = ns->sector_size;
+	sectors_per_max_io = ns->sectors_per_max_io;
+	sectors_per_stripe = ns->sectors_per_stripe;
+
+	req = nvme_allocate_request(payload, lba_count * sector_size, cb_fn, cb_arg);
+	if (req == NULL) {
+		return NULL;
+	}
+
+	/*
+	 * Intel DC P3*00 NVMe controllers benefit from driver-assisted striping.
+	 * If this controller defines a stripe boundary and this I/O spans a stripe
+	 *  boundary, split the request into multiple requests and submit each
+	 *  separately to hardware.
+	 */
+	if (sectors_per_stripe > 0 &&
+	    (((lba & (sectors_per_stripe - 1)) + lba_count) > sectors_per_stripe)) {
+
+		return _nvme_ns_cmd_split_request(ns, payload, lba, lba_count, cb_fn, cb_arg, opc,
+						  io_flags, req, sectors_per_stripe, sectors_per_stripe - 1);
+	} else if (lba_count > sectors_per_max_io) {
+		return _nvme_ns_cmd_split_request(ns, payload, lba, lba_count, cb_fn, cb_arg, opc,
+						  io_flags, req, sectors_per_max_io, 0);
+	} else {
+		cmd = &req->cmd;
+		cmd->opc = opc;
+		cmd->nsid = ns->id;
+
+		tmp_lba = (uint64_t *)&cmd->cdw10;
+		*tmp_lba = lba;
+
+		cmd->cdw12 = lba_count - 1;
+		cmd->cdw12 |= io_flags;
+	}
+
+	return req;
+}
+
+int
+nvme_ns_cmd_read(struct nvme_namespace *ns, void *buffer, uint64_t lba,
+		 uint32_t lba_count, nvme_cb_fn_t cb_fn, void *cb_arg,
+		 uint32_t io_flags)
+{
+	struct nvme_request *req;
+	struct nvme_payload payload;
+
+	payload.type = NVME_PAYLOAD_TYPE_CONTIG;
+	payload.u.contig = buffer;
+
+	req = _nvme_ns_cmd_rw(ns, &payload, lba, lba_count, cb_fn, cb_arg, NVME_OPC_READ, io_flags);
+	if (req != NULL) {
+		nvme_ctrlr_submit_io_request(ns->ctrlr, req);
+		return 0;
+	} else {
+		return ENOMEM;
+	}
+}
+
+int
+nvme_ns_cmd_readv(struct nvme_namespace *ns, uint64_t lba, uint32_t lba_count,
+		  nvme_cb_fn_t cb_fn, void *cb_arg, uint32_t io_flags,
+		  nvme_req_reset_sgl_fn_t reset_sgl_fn,
+		  nvme_req_next_sge_fn_t next_sge_fn)
+{
+	struct nvme_request *req;
+	struct nvme_payload payload;
+
+	payload.type = NVME_PAYLOAD_TYPE_SGL;
+	payload.u.sgl.reset_sgl_fn = reset_sgl_fn;
+	payload.u.sgl.next_sge_fn = next_sge_fn;
+
+	req = _nvme_ns_cmd_rw(ns, &payload, lba, lba_count, cb_fn, cb_arg, NVME_OPC_READ, io_flags);
+	if (req != NULL) {
+		nvme_ctrlr_submit_io_request(ns->ctrlr, req);
+		return 0;
+	} else {
+		return ENOMEM;
+	}
+}
+
+int
+nvme_ns_cmd_write(struct nvme_namespace *ns, void *buffer, uint64_t lba,
+		  uint32_t lba_count, nvme_cb_fn_t cb_fn, void *cb_arg,
+		  uint32_t io_flags)
+{
+	struct nvme_request *req;
+	struct nvme_payload payload;
+
+	payload.type = NVME_PAYLOAD_TYPE_CONTIG;
+	payload.u.contig = buffer;
+
+	req = _nvme_ns_cmd_rw(ns, &payload, lba, lba_count, cb_fn, cb_arg, NVME_OPC_WRITE, io_flags);
+	if (req != NULL) {
+		nvme_ctrlr_submit_io_request(ns->ctrlr, req);
+		return 0;
+	} else {
+		return ENOMEM;
+	}
+}
+
+int
+nvme_ns_cmd_writev(struct nvme_namespace *ns, uint64_t lba, uint32_t lba_count,
+		   nvme_cb_fn_t cb_fn, void *cb_arg, uint32_t io_flags,
+		   nvme_req_reset_sgl_fn_t reset_sgl_fn,
+		   nvme_req_next_sge_fn_t next_sge_fn)
+{
+	struct nvme_request *req;
+	struct nvme_payload payload;
+
+	payload.type = NVME_PAYLOAD_TYPE_SGL;
+	payload.u.sgl.reset_sgl_fn = reset_sgl_fn;
+	payload.u.sgl.next_sge_fn = next_sge_fn;
+
+	req = _nvme_ns_cmd_rw(ns, &payload, lba, lba_count, cb_fn, cb_arg, NVME_OPC_WRITE, io_flags);
+	if (req != NULL) {
+		nvme_ctrlr_submit_io_request(ns->ctrlr, req);
+		return 0;
+	} else {
+		return ENOMEM;
+	}
+}
+
+int
+nvme_ns_cmd_deallocate(struct nvme_namespace *ns, void *payload,
+		       uint16_t num_ranges, nvme_cb_fn_t cb_fn, void *cb_arg)
+{
+	struct nvme_request	*req;
+	struct nvme_command	*cmd;
+
+	if (num_ranges == 0 || num_ranges > NVME_DATASET_MANAGEMENT_MAX_RANGES) {
+		return EINVAL;
+	}
+
+	req = nvme_allocate_request_contig(payload,
+					   num_ranges * sizeof(struct nvme_dsm_range),
+					   cb_fn, cb_arg);
+	if (req == NULL) {
+		return ENOMEM;
+	}
+
+	cmd = &req->cmd;
+	cmd->opc = NVME_OPC_DATASET_MANAGEMENT;
+	cmd->nsid = ns->id;
+
+	/* TODO: create a delete command data structure */
+	cmd->cdw10 = num_ranges - 1;
+	cmd->cdw11 = NVME_DSM_ATTR_DEALLOCATE;
+
+	nvme_ctrlr_submit_io_request(ns->ctrlr, req);
+
+	return 0;
+}
+
+int
+nvme_ns_cmd_flush(struct nvme_namespace *ns, nvme_cb_fn_t cb_fn, void *cb_arg)
+{
+	struct nvme_request	*req;
+	struct nvme_command	*cmd;
+
+	req = nvme_allocate_request_null(cb_fn, cb_arg);
+	if (req == NULL) {
+		return ENOMEM;
+	}
+
+	cmd = &req->cmd;
+	cmd->opc = NVME_OPC_FLUSH;
+	cmd->nsid = ns->id;
+
+	nvme_ctrlr_submit_io_request(ns->ctrlr, req);
+
+	return 0;
+}
+
+int
+nvme_ns_cmd_reservation_register(struct nvme_namespace *ns,
+				 struct nvme_reservation_register_data *payload,
+				 bool ignore_key,
+				 enum nvme_reservation_register_action action,
+				 enum nvme_reservation_register_cptpl cptpl,
+				 nvme_cb_fn_t cb_fn, void *cb_arg)
+{
+	struct nvme_request	*req;
+	struct nvme_command	*cmd;
+
+	req = nvme_allocate_request_contig(payload,
+					   sizeof(struct nvme_reservation_register_data),
+					   cb_fn, cb_arg);
+	if (req == NULL) {
+		return ENOMEM;
+	}
+
+	cmd = &req->cmd;
+	cmd->opc = NVME_OPC_RESERVATION_REGISTER;
+	cmd->nsid = ns->id;
+
+	/* Bits 0-2 */
+	cmd->cdw10 = action;
+	/* Bit 3 */
+	cmd->cdw10 |= ignore_key ? 1 << 3 : 0;
+	/* Bits 30-31 */
+	cmd->cdw10 |= (uint32_t)cptpl << 30;
+
+	nvme_ctrlr_submit_io_request(ns->ctrlr, req);
+
+	return 0;
+}
+
+int
+nvme_ns_cmd_reservation_release(struct nvme_namespace *ns,
+				struct nvme_reservation_key_data *payload,
+				bool ignore_key,
+				enum nvme_reservation_release_action action,
+				enum nvme_reservation_type type,
+				nvme_cb_fn_t cb_fn, void *cb_arg)
+{
+	struct nvme_request	*req;
+	struct nvme_command	*cmd;
+
+	req = nvme_allocate_request_contig(payload, sizeof(struct nvme_reservation_key_data), cb_fn,
+					   cb_arg);
+	if (req == NULL) {
+		return ENOMEM;
+	}
+
+	cmd = &req->cmd;
+	cmd->opc = NVME_OPC_RESERVATION_RELEASE;
+	cmd->nsid = ns->id;
+
+	/* Bits 0-2 */
+	cmd->cdw10 = action;
+	/* Bit 3 */
+	cmd->cdw10 |= ignore_key ? 1 << 3 : 0;
+	/* Bits 8-15 */
+	cmd->cdw10 |= (uint32_t)type << 8;
+
+	nvme_ctrlr_submit_io_request(ns->ctrlr, req);
+
+	return 0;
+}
+
+int
+nvme_ns_cmd_reservation_acquire(struct nvme_namespace *ns,
+				struct nvme_reservation_acquire_data *payload,
+				bool ignore_key,
+				enum nvme_reservation_acquire_action action,
+				enum nvme_reservation_type type,
+				nvme_cb_fn_t cb_fn, void *cb_arg)
+{
+	struct nvme_request	*req;
+	struct nvme_command	*cmd;
+
+	req = nvme_allocate_request_contig(payload,
+					   sizeof(struct nvme_reservation_acquire_data),
+					   cb_fn, cb_arg);
+	if (req == NULL) {
+		return ENOMEM;
+	}
+
+	cmd = &req->cmd;
+	cmd->opc = NVME_OPC_RESERVATION_ACQUIRE;
+	cmd->nsid = ns->id;
+
+	/* Bits 0-2 */
+	cmd->cdw10 = action;
+	/* Bit 3 */
+	cmd->cdw10 |= ignore_key ? 1 << 3 : 0;
+	/* Bits 8-15 */
+	cmd->cdw10 |= (uint32_t)type << 8;
+
+	nvme_ctrlr_submit_io_request(ns->ctrlr, req);
+
+	return 0;
+}
+
+int
+nvme_ns_cmd_reservation_report(struct nvme_namespace *ns, void *payload,
+			       uint32_t len, nvme_cb_fn_t cb_fn, void *cb_arg)
+{
+	uint32_t		num_dwords;
+	struct nvme_request	*req;
+	struct nvme_command	*cmd;
+
+	if (len % 4)
+		return EINVAL;
+	num_dwords = len / 4;
+
+	req = nvme_allocate_request(payload, num_dwords, cb_fn, cb_arg);
+	if (req == NULL) {
+		return ENOMEM;
+	}
+
+	cmd = &req->cmd;
+	cmd->opc = NVME_OPC_RESERVATION_REPORT;
+	cmd->nsid = ns->id;
+
+	cmd->cdw10 = num_dwords;
+
+	nvme_ctrlr_submit_io_request(ns->ctrlr, req);
+
+	return 0;
+}
diff --git a/src/spdk/lib/nvme/nvme_qpair.c b/src/spdk/lib/nvme/nvme_qpair.c
new file mode 100644
index 0000000..b3e5586
--- /dev/null
+++ b/src/spdk/lib/nvme/nvme_qpair.c
@@ -0,0 +1,1011 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright (c) Intel Corporation.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "nvme_internal.h"
+
+/**
+ * \file
+ *
+ */
+
+static inline bool nvme_qpair_is_admin_queue(struct nvme_qpair *qpair)
+{
+	return qpair->id == 0;
+}
+
+static inline bool nvme_qpair_is_io_queue(struct nvme_qpair *qpair)
+{
+	return qpair->id != 0;
+}
+
+struct nvme_string {
+	uint16_t	value;
+	const char 	*str;
+};
+
+static const struct nvme_string admin_opcode[] = {
+	{ NVME_OPC_DELETE_IO_SQ, "DELETE IO SQ" },
+	{ NVME_OPC_CREATE_IO_SQ, "CREATE IO SQ" },
+	{ NVME_OPC_GET_LOG_PAGE, "GET LOG PAGE" },
+	{ NVME_OPC_DELETE_IO_CQ, "DELETE IO CQ" },
+	{ NVME_OPC_CREATE_IO_CQ, "CREATE IO CQ" },
+	{ NVME_OPC_IDENTIFY, "IDENTIFY" },
+	{ NVME_OPC_ABORT, "ABORT" },
+	{ NVME_OPC_SET_FEATURES, "SET FEATURES" },
+	{ NVME_OPC_GET_FEATURES, "GET FEATURES" },
+	{ NVME_OPC_ASYNC_EVENT_REQUEST, "ASYNC EVENT REQUEST" },
+	{ NVME_OPC_NAMESPACE_MANAGEMENT, "NAMESPACE MANAGEMENT" },
+	{ NVME_OPC_FIRMWARE_COMMIT, "FIRMWARE COMMIT" },
+	{ NVME_OPC_FIRMWARE_IMAGE_DOWNLOAD, "FIRMWARE IMAGE DOWNLOAD" },
+	{ NVME_OPC_NAMESPACE_ATTACHMENT, "NAMESPACE ATTACHMENT" },
+	{ NVME_OPC_FORMAT_NVM, "FORMAT NVM" },
+	{ NVME_OPC_SECURITY_SEND, "SECURITY SEND" },
+	{ NVME_OPC_SECURITY_RECEIVE, "SECURITY RECEIVE" },
+	{ 0xFFFF, "ADMIN COMMAND" }
+};
+
+static const struct nvme_string io_opcode[] = {
+	{ NVME_OPC_FLUSH, "FLUSH" },
+	{ NVME_OPC_WRITE, "WRITE" },
+	{ NVME_OPC_READ, "READ" },
+	{ NVME_OPC_WRITE_UNCORRECTABLE, "WRITE UNCORRECTABLE" },
+	{ NVME_OPC_COMPARE, "COMPARE" },
+	{ NVME_OPC_WRITE_ZEROES, "WRITE ZEROES" },
+	{ NVME_OPC_DATASET_MANAGEMENT, "DATASET MANAGEMENT" },
+	{ NVME_OPC_RESERVATION_REGISTER, "RESERVATION REGISTER" },
+	{ NVME_OPC_RESERVATION_REPORT, "RESERVATION REPORT" },
+	{ NVME_OPC_RESERVATION_ACQUIRE, "RESERVATION ACQUIRE" },
+	{ NVME_OPC_RESERVATION_RELEASE, "RESERVATION RELEASE" },
+	{ 0xFFFF, "IO COMMAND" }
+};
+
+static const char *
+nvme_get_string(const struct nvme_string *strings, uint16_t value)
+{
+	const struct nvme_string *entry;
+
+	entry = strings;
+
+	while (entry->value != 0xFFFF) {
+		if (entry->value == value) {
+			return entry->str;
+		}
+		entry++;
+	}
+	return entry->str;
+}
+
+static void
+nvme_admin_qpair_print_command(struct nvme_qpair *qpair,
+			       struct nvme_command *cmd)
+{
+
+	nvme_printf(qpair->ctrlr, "%s (%02x) sqid:%d cid:%d nsid:%x "
+		    "cdw10:%08x cdw11:%08x\n",
+		    nvme_get_string(admin_opcode, cmd->opc), cmd->opc, qpair->id, cmd->cid,
+		    cmd->nsid, cmd->cdw10, cmd->cdw11);
+}
+
+static void
+nvme_io_qpair_print_command(struct nvme_qpair *qpair,
+			    struct nvme_command *cmd)
+{
+
+	switch ((int)cmd->opc) {
+	case NVME_OPC_WRITE:
+	case NVME_OPC_READ:
+	case NVME_OPC_WRITE_UNCORRECTABLE:
+	case NVME_OPC_COMPARE:
+		nvme_printf(qpair->ctrlr, "%s sqid:%d cid:%d nsid:%d "
+			    "lba:%llu len:%d\n",
+			    nvme_get_string(io_opcode, cmd->opc), qpair->id, cmd->cid,
+			    cmd->nsid,
+			    ((unsigned long long)cmd->cdw11 << 32) + cmd->cdw10,
+			    (cmd->cdw12 & 0xFFFF) + 1);
+		break;
+	case NVME_OPC_FLUSH:
+	case NVME_OPC_DATASET_MANAGEMENT:
+		nvme_printf(qpair->ctrlr, "%s sqid:%d cid:%d nsid:%d\n",
+			    nvme_get_string(io_opcode, cmd->opc), qpair->id, cmd->cid,
+			    cmd->nsid);
+		break;
+	default:
+		nvme_printf(qpair->ctrlr, "%s (%02x) sqid:%d cid:%d nsid:%d\n",
+			    nvme_get_string(io_opcode, cmd->opc), cmd->opc, qpair->id,
+			    cmd->cid, cmd->nsid);
+		break;
+	}
+}
+
+static void
+nvme_qpair_print_command(struct nvme_qpair *qpair, struct nvme_command *cmd)
+{
+	nvme_assert(qpair != NULL, ("qpair can not be NULL"));
+	nvme_assert(cmd != NULL, ("cmd can not be NULL"));
+
+	if (nvme_qpair_is_admin_queue(qpair)) {
+		nvme_admin_qpair_print_command(qpair, cmd);
+	} else {
+		nvme_io_qpair_print_command(qpair, cmd);
+	}
+}
+
+static const struct nvme_string generic_status[] = {
+	{ NVME_SC_SUCCESS, "SUCCESS" },
+	{ NVME_SC_INVALID_OPCODE, "INVALID OPCODE" },
+	{ NVME_SC_INVALID_FIELD, "INVALID_FIELD" },
+	{ NVME_SC_COMMAND_ID_CONFLICT, "COMMAND ID CONFLICT" },
+	{ NVME_SC_DATA_TRANSFER_ERROR, "DATA TRANSFER ERROR" },
+	{ NVME_SC_ABORTED_POWER_LOSS, "ABORTED - POWER LOSS" },
+	{ NVME_SC_INTERNAL_DEVICE_ERROR, "INTERNAL DEVICE ERROR" },
+	{ NVME_SC_ABORTED_BY_REQUEST, "ABORTED - BY REQUEST" },
+	{ NVME_SC_ABORTED_SQ_DELETION, "ABORTED - SQ DELETION" },
+	{ NVME_SC_ABORTED_FAILED_FUSED, "ABORTED - FAILED FUSED" },
+	{ NVME_SC_ABORTED_MISSING_FUSED, "ABORTED - MISSING FUSED" },
+	{ NVME_SC_INVALID_NAMESPACE_OR_FORMAT, "INVALID NAMESPACE OR FORMAT" },
+	{ NVME_SC_COMMAND_SEQUENCE_ERROR, "COMMAND SEQUENCE ERROR" },
+	{ NVME_SC_LBA_OUT_OF_RANGE, "LBA OUT OF RANGE" },
+	{ NVME_SC_CAPACITY_EXCEEDED, "CAPACITY EXCEEDED" },
+	{ NVME_SC_NAMESPACE_NOT_READY, "NAMESPACE NOT READY" },
+	{ 0xFFFF, "GENERIC" }
+};
+
+static const struct nvme_string command_specific_status[] = {
+	{ NVME_SC_COMPLETION_QUEUE_INVALID, "INVALID COMPLETION QUEUE" },
+	{ NVME_SC_INVALID_QUEUE_IDENTIFIER, "INVALID QUEUE IDENTIFIER" },
+	{ NVME_SC_MAXIMUM_QUEUE_SIZE_EXCEEDED, "MAX QUEUE SIZE EXCEEDED" },
+	{ NVME_SC_ABORT_COMMAND_LIMIT_EXCEEDED, "ABORT CMD LIMIT EXCEEDED" },
+	{ NVME_SC_ASYNC_EVENT_REQUEST_LIMIT_EXCEEDED, "ASYNC LIMIT EXCEEDED" },
+	{ NVME_SC_INVALID_FIRMWARE_SLOT, "INVALID FIRMWARE SLOT" },
+	{ NVME_SC_INVALID_FIRMWARE_IMAGE, "INVALID FIRMWARE IMAGE" },
+	{ NVME_SC_INVALID_INTERRUPT_VECTOR, "INVALID INTERRUPT VECTOR" },
+	{ NVME_SC_INVALID_LOG_PAGE, "INVALID LOG PAGE" },
+	{ NVME_SC_INVALID_FORMAT, "INVALID FORMAT" },
+	{ NVME_SC_FIRMWARE_REQUIRES_RESET, "FIRMWARE REQUIRES RESET" },
+	{ NVME_SC_CONFLICTING_ATTRIBUTES, "CONFLICTING ATTRIBUTES" },
+	{ NVME_SC_INVALID_PROTECTION_INFO, "INVALID PROTECTION INFO" },
+	{ NVME_SC_ATTEMPTED_WRITE_TO_RO_PAGE, "WRITE TO RO PAGE" },
+	{ 0xFFFF, "COMMAND SPECIFIC" }
+};
+
+static const struct nvme_string media_error_status[] = {
+	{ NVME_SC_WRITE_FAULTS, "WRITE FAULTS" },
+	{ NVME_SC_UNRECOVERED_READ_ERROR, "UNRECOVERED READ ERROR" },
+	{ NVME_SC_GUARD_CHECK_ERROR, "GUARD CHECK ERROR" },
+	{ NVME_SC_APPLICATION_TAG_CHECK_ERROR, "APPLICATION TAG CHECK ERROR" },
+	{ NVME_SC_REFERENCE_TAG_CHECK_ERROR, "REFERENCE TAG CHECK ERROR" },
+	{ NVME_SC_COMPARE_FAILURE, "COMPARE FAILURE" },
+	{ NVME_SC_ACCESS_DENIED, "ACCESS DENIED" },
+	{ 0xFFFF, "MEDIA ERROR" }
+};
+
+static const char *
+get_status_string(uint16_t sct, uint16_t sc)
+{
+	const struct nvme_string *entry;
+
+	switch (sct) {
+	case NVME_SCT_GENERIC:
+		entry = generic_status;
+		break;
+	case NVME_SCT_COMMAND_SPECIFIC:
+		entry = command_specific_status;
+		break;
+	case NVME_SCT_MEDIA_ERROR:
+		entry = media_error_status;
+		break;
+	case NVME_SCT_VENDOR_SPECIFIC:
+		return "VENDOR SPECIFIC";
+	default:
+		return "RESERVED";
+	}
+
+	return nvme_get_string(entry, sc);
+}
+
+static void
+nvme_qpair_print_completion(struct nvme_qpair *qpair,
+			    struct nvme_completion *cpl)
+{
+	nvme_printf(qpair->ctrlr, "%s (%02x/%02x) sqid:%d cid:%d cdw0:%x sqhd:%04x p:%x m:%x dnr:%x\n",
+		    get_status_string(cpl->status.sct, cpl->status.sc),
+		    cpl->status.sct, cpl->status.sc, cpl->sqid, cpl->cid, cpl->cdw0,
+		    cpl->sqhd, cpl->status.p, cpl->status.m, cpl->status.dnr);
+}
+
+static bool
+nvme_completion_is_retry(const struct nvme_completion *cpl)
+{
+	/*
+	 * TODO: spec is not clear how commands that are aborted due
+	 *  to TLER will be marked.  So for now, it seems
+	 *  NAMESPACE_NOT_READY is the only case where we should
+	 *  look at the DNR bit.
+	 */
+	switch ((int)cpl->status.sct) {
+	case NVME_SCT_GENERIC:
+		switch ((int)cpl->status.sc) {
+		case NVME_SC_ABORTED_BY_REQUEST:
+		case NVME_SC_NAMESPACE_NOT_READY:
+			if (cpl->status.dnr) {
+				return false;
+			} else {
+				return true;
+			}
+		case NVME_SC_INVALID_OPCODE:
+		case NVME_SC_INVALID_FIELD:
+		case NVME_SC_COMMAND_ID_CONFLICT:
+		case NVME_SC_DATA_TRANSFER_ERROR:
+		case NVME_SC_ABORTED_POWER_LOSS:
+		case NVME_SC_INTERNAL_DEVICE_ERROR:
+		case NVME_SC_ABORTED_SQ_DELETION:
+		case NVME_SC_ABORTED_FAILED_FUSED:
+		case NVME_SC_ABORTED_MISSING_FUSED:
+		case NVME_SC_INVALID_NAMESPACE_OR_FORMAT:
+		case NVME_SC_COMMAND_SEQUENCE_ERROR:
+		case NVME_SC_LBA_OUT_OF_RANGE:
+		case NVME_SC_CAPACITY_EXCEEDED:
+		default:
+			return false;
+		}
+	case NVME_SCT_COMMAND_SPECIFIC:
+	case NVME_SCT_MEDIA_ERROR:
+	case NVME_SCT_VENDOR_SPECIFIC:
+	default:
+		return false;
+	}
+}
+
+static void
+nvme_qpair_construct_tracker(struct nvme_tracker *tr, uint16_t cid, uint64_t phys_addr)
+{
+	tr->prp_bus_addr = phys_addr + offsetof(struct nvme_tracker, prp);
+	tr->cid = cid;
+}
+
+static void
+nvme_qpair_submit_tracker(struct nvme_qpair *qpair, struct nvme_tracker *tr)
+{
+	struct nvme_request	*req;
+
+	req = tr->req;
+	qpair->act_tr[tr->cid] = tr;
+
+	/* Copy the command from the tracker to the submission queue. */
+	nvme_copy_command(&qpair->cmd[qpair->sq_tail], &req->cmd);
+
+	if (++qpair->sq_tail == qpair->num_entries) {
+		qpair->sq_tail = 0;
+	}
+
+	wmb();
+	spdk_mmio_write_4(qpair->sq_tdbl, qpair->sq_tail);
+}
+
+static void
+nvme_qpair_complete_tracker(struct nvme_qpair *qpair, struct nvme_tracker *tr,
+			    struct nvme_completion *cpl, bool print_on_error)
+{
+	struct nvme_request	*req;
+	bool			retry, error;
+
+	req = tr->req;
+
+	nvme_assert(req != NULL, ("tr has NULL req\n"));
+
+	error = nvme_completion_is_error(cpl);
+	retry = error && nvme_completion_is_retry(cpl) &&
+		req->retries < nvme_retry_count;
+
+	if (error && print_on_error) {
+		nvme_qpair_print_command(qpair, &req->cmd);
+		nvme_qpair_print_completion(qpair, cpl);
+	}
+
+	qpair->act_tr[cpl->cid] = NULL;
+
+	nvme_assert(cpl->cid == req->cmd.cid, ("cpl cid does not match cmd cid\n"));
+
+	if (retry) {
+		req->retries++;
+		nvme_qpair_submit_tracker(qpair, tr);
+	} else {
+		if (req->cb_fn) {
+			req->cb_fn(req->cb_arg, cpl);
+		}
+
+		nvme_free_request(req);
+		tr->req = NULL;
+
+		LIST_REMOVE(tr, list);
+		LIST_INSERT_HEAD(&qpair->free_tr, tr, list);
+
+		/*
+		 * If the controller is in the middle of resetting, don't
+		 *  try to submit queued requests here - let the reset logic
+		 *  handle that instead.
+		 */
+		if (!STAILQ_EMPTY(&qpair->queued_req) &&
+		    !qpair->ctrlr->is_resetting) {
+			req = STAILQ_FIRST(&qpair->queued_req);
+			STAILQ_REMOVE_HEAD(&qpair->queued_req, stailq);
+			nvme_qpair_submit_request(qpair, req);
+		}
+	}
+}
+
+static void
+nvme_qpair_manual_complete_tracker(struct nvme_qpair *qpair,
+				   struct nvme_tracker *tr, uint32_t sct, uint32_t sc, uint32_t dnr,
+				   bool print_on_error)
+{
+	struct nvme_completion	cpl;
+
+	memset(&cpl, 0, sizeof(cpl));
+	cpl.sqid = qpair->id;
+	cpl.cid = tr->cid;
+	cpl.status.sct = sct;
+	cpl.status.sc = sc;
+	cpl.status.dnr = dnr;
+	nvme_qpair_complete_tracker(qpair, tr, &cpl, print_on_error);
+}
+
+static void
+nvme_qpair_manual_complete_request(struct nvme_qpair *qpair,
+				   struct nvme_request *req, uint32_t sct, uint32_t sc,
+				   bool print_on_error)
+{
+	struct nvme_completion	cpl;
+	bool			error;
+
+	memset(&cpl, 0, sizeof(cpl));
+	cpl.sqid = qpair->id;
+	cpl.status.sct = sct;
+	cpl.status.sc = sc;
+
+	error = nvme_completion_is_error(&cpl);
+
+	if (error && print_on_error) {
+		nvme_qpair_print_command(qpair, &req->cmd);
+		nvme_qpair_print_completion(qpair, &cpl);
+	}
+
+	if (req->cb_fn) {
+		req->cb_fn(req->cb_arg, &cpl);
+	}
+
+	nvme_free_request(req);
+}
+
+static inline bool
+nvme_qpair_check_enabled(struct nvme_qpair *qpair)
+{
+	if (!qpair->is_enabled &&
+	    !qpair->ctrlr->is_resetting) {
+		nvme_qpair_enable(qpair);
+	}
+	return qpair->is_enabled;
+}
+
+/**
+ * \page nvme_async_completion NVMe Asynchronous Completion
+ *
+ * The userspace NVMe driver follows an asynchronous polled model for
+ * I/O completion.
+ *
+ * \section async_io I/O commands
+ *
+ * The application may submit I/O from one or more threads
+ * and must call nvme_ctrlr_process_io_completions()
+ * from each thread that submitted I/O.
+ *
+ * When the application calls nvme_ctrlr_process_io_completions(),
+ * if the NVMe driver detects completed I/Os that were submitted on that thread,
+ * it will invoke the registered callback function
+ * for each I/O within the context of nvme_ctrlr_process_io_completions().
+ *
+ * \section async_admin Admin commands
+ *
+ * The application may submit admin commands from one or more threads
+ * and must call nvme_ctrlr_process_admin_completions()
+ * from at least one thread to receive admin command completions.
+ * The thread that processes admin completions need not be the same thread that submitted the
+ * admin commands.
+ *
+ * When the application calls nvme_ctrlr_process_admin_completions(),
+ * if the NVMe driver detects completed admin commands submitted from any thread,
+ * it will invote the registered callback function
+ * for each command within the context of nvme_ctrlr_process_admin_completions().
+ *
+ * It is the application's responsibility to manage the order of submitted admin commands.
+ * If certain admin commands must be submitted while no other commands are outstanding,
+ * it is the application's responsibility to enforce this rule
+ * using its own synchronization method.
+ */
+
+/**
+ * \brief Checks for and processes completions on the specified qpair.
+ *
+ * For each completed command, the request's callback function will
+ *  be called if specified as non-NULL when the request was submitted.
+ *
+ * \sa nvme_cb_fn_t
+ */
+int32_t
+nvme_qpair_process_completions(struct nvme_qpair *qpair, uint32_t max_completions)
+{
+	struct nvme_tracker	*tr;
+	struct nvme_completion	*cpl;
+	uint32_t num_completions = 0;
+
+	if (!nvme_qpair_check_enabled(qpair)) {
+		/*
+		 * qpair is not enabled, likely because a controller reset is
+		 *  is in progress.  Ignore the interrupt - any I/O that was
+		 *  associated with this interrupt will get retried when the
+		 *  reset is complete.
+		 */
+		return 0;
+	}
+
+	if (max_completions == 0) {
+		/*
+		 * max_completions == 0 means unlimited; set it to the max uint32_t value
+		 *  to avoid a special case in the loop.  The maximum possible queue size is
+		 *  only 64K, so num_completions will never reach this value.
+		 */
+		max_completions = UINT32_MAX;
+	}
+
+	while (1) {
+		cpl = &qpair->cpl[qpair->cq_head];
+
+		if (cpl->status.p != qpair->phase)
+			break;
+
+		tr = qpair->act_tr[cpl->cid];
+
+		if (tr != NULL) {
+			nvme_qpair_complete_tracker(qpair, tr, cpl, true);
+		} else {
+			nvme_printf(qpair->ctrlr,
+				    "cpl does not map to outstanding cmd\n");
+			nvme_qpair_print_completion(qpair, cpl);
+			nvme_assert(0, ("received completion for unknown cmd\n"));
+		}
+
+		if (++qpair->cq_head == qpair->num_entries) {
+			qpair->cq_head = 0;
+			qpair->phase = !qpair->phase;
+		}
+
+		spdk_mmio_write_4(qpair->cq_hdbl, qpair->cq_head);
+
+		if (++num_completions == max_completions) {
+			break;
+		}
+	}
+
+	return num_completions;
+}
+
+int
+nvme_qpair_construct(struct nvme_qpair *qpair, uint16_t id,
+		     uint16_t num_entries, uint16_t num_trackers,
+		     struct nvme_controller *ctrlr)
+{
+	struct nvme_tracker	*tr;
+	uint16_t		i;
+	volatile uint32_t	*doorbell_base;
+	uint64_t		phys_addr = 0;
+
+	nvme_assert(num_entries != 0, ("invalid num_entries\n"));
+	nvme_assert(num_trackers != 0, ("invalid num_trackers\n"));
+
+	qpair->id = id;
+	qpair->num_entries = num_entries;
+
+	qpair->ctrlr = ctrlr;
+
+	/* cmd and cpl rings must be aligned on 4KB boundaries. */
+	qpair->cmd = nvme_malloc("qpair_cmd",
+				 qpair->num_entries * sizeof(struct nvme_command),
+				 0x1000,
+				 &qpair->cmd_bus_addr);
+	if (qpair->cmd == NULL) {
+		nvme_printf(ctrlr, "alloc qpair_cmd failed\n");
+		goto fail;
+	}
+	qpair->cpl = nvme_malloc("qpair_cpl",
+				 qpair->num_entries * sizeof(struct nvme_completion),
+				 0x1000,
+				 &qpair->cpl_bus_addr);
+	if (qpair->cpl == NULL) {
+		nvme_printf(ctrlr, "alloc qpair_cpl failed\n");
+		goto fail;
+	}
+
+	doorbell_base = &ctrlr->regs->doorbell[0].sq_tdbl;
+	qpair->sq_tdbl = doorbell_base + (2 * id + 0) * ctrlr->doorbell_stride_u32;
+	qpair->cq_hdbl = doorbell_base + (2 * id + 1) * ctrlr->doorbell_stride_u32;
+
+	LIST_INIT(&qpair->free_tr);
+	LIST_INIT(&qpair->outstanding_tr);
+	STAILQ_INIT(&qpair->queued_req);
+
+	for (i = 0; i < num_trackers; i++) {
+		/*
+		 * Round alignment up to next power of 2.  This ensures the PRP
+		 *  list embedded in the nvme_tracker object will not span a
+		 *  4KB boundary.
+		 */
+		tr = nvme_malloc("nvme_tr", sizeof(*tr), nvme_align32pow2(sizeof(*tr)), &phys_addr);
+		if (tr == NULL) {
+			nvme_printf(ctrlr, "nvme_tr failed\n");
+			goto fail;
+		}
+		nvme_qpair_construct_tracker(tr, i, phys_addr);
+		LIST_INSERT_HEAD(&qpair->free_tr, tr, list);
+	}
+
+	qpair->act_tr = calloc(num_trackers, sizeof(struct nvme_tracker *));
+	if (qpair->act_tr == NULL) {
+		nvme_printf(ctrlr, "alloc nvme_act_tr failed\n");
+		goto fail;
+	}
+	nvme_qpair_reset(qpair);
+	return 0;
+fail:
+	nvme_qpair_destroy(qpair);
+	return -1;
+}
+
+static void
+nvme_admin_qpair_abort_aers(struct nvme_qpair *qpair)
+{
+	struct nvme_tracker	*tr;
+
+	tr = LIST_FIRST(&qpair->outstanding_tr);
+	while (tr != NULL) {
+		if (tr->req->cmd.opc == NVME_OPC_ASYNC_EVENT_REQUEST) {
+			nvme_qpair_manual_complete_tracker(qpair, tr,
+							   NVME_SCT_GENERIC, NVME_SC_ABORTED_SQ_DELETION, 0,
+							   false);
+			tr = LIST_FIRST(&qpair->outstanding_tr);
+		} else {
+			tr = LIST_NEXT(tr, list);
+		}
+	}
+}
+
+static void
+_nvme_admin_qpair_destroy(struct nvme_qpair *qpair)
+{
+	nvme_admin_qpair_abort_aers(qpair);
+}
+
+
+void
+nvme_qpair_destroy(struct nvme_qpair *qpair)
+{
+	struct nvme_tracker	*tr;
+
+	if (nvme_qpair_is_admin_queue(qpair)) {
+		_nvme_admin_qpair_destroy(qpair);
+	}
+	if (qpair->cmd)
+		nvme_free(qpair->cmd);
+	if (qpair->cpl)
+		nvme_free(qpair->cpl);
+	if (qpair->act_tr)
+		free(qpair->act_tr);
+
+	while (!LIST_EMPTY(&qpair->free_tr)) {
+		tr = LIST_FIRST(&qpair->free_tr);
+		LIST_REMOVE(tr, list);
+		nvme_free(tr);
+	}
+}
+
+/**
+ * \page nvme_io_submission NVMe I/O Submission
+ *
+ * I/O is submitted to an NVMe namespace using nvme_ns_cmd_xxx functions
+ * defined in nvme_ns_cmd.c.  The NVMe driver submits the I/O request
+ * as an NVMe submission queue entry on the nvme_qpair associated with
+ * the logical core that submits the I/O.
+ *
+ * \sa nvme_ns_cmd_read, nvme_ns_cmd_write, nvme_ns_cmd_deallocate,
+ *     nvme_ns_cmd_flush, nvme_get_ioq_idx
+ */
+
+static void
+_nvme_fail_request_bad_vtophys(struct nvme_qpair *qpair, struct nvme_tracker *tr)
+{
+	/*
+	 * Bad vtophys translation, so abort this request and return
+	 *  immediately.
+	 */
+	nvme_qpair_manual_complete_tracker(qpair, tr, NVME_SCT_GENERIC,
+					   NVME_SC_INVALID_FIELD,
+					   1 /* do not retry */, true);
+}
+
+static void
+_nvme_fail_request_ctrlr_failed(struct nvme_qpair *qpair, struct nvme_request *req)
+{
+	nvme_qpair_manual_complete_request(qpair, req, NVME_SCT_GENERIC,
+					   NVME_SC_ABORTED_BY_REQUEST, true);
+}
+
+/**
+ * Build PRP list describing physically contiguous payload buffer.
+ */
+static int
+_nvme_qpair_build_contig_request(struct nvme_qpair *qpair, struct nvme_request *req,
+				 struct nvme_tracker *tr)
+{
+	uint64_t phys_addr;
+	void *seg_addr;
+	uint32_t nseg, cur_nseg, modulo, unaligned;
+	void *payload = req->payload.u.contig + req->payload_offset;
+
+	phys_addr = nvme_vtophys(payload);
+	if (phys_addr == NVME_VTOPHYS_ERROR) {
+		_nvme_fail_request_bad_vtophys(qpair, tr);
+		return -1;
+	}
+	nseg = req->payload_size >> nvme_u32log2(PAGE_SIZE);
+	modulo = req->payload_size & (PAGE_SIZE - 1);
+	unaligned = phys_addr & (PAGE_SIZE - 1);
+	if (modulo || unaligned) {
+		nseg += 1 + ((modulo + unaligned - 1) >> nvme_u32log2(PAGE_SIZE));
+	}
+
+	tr->req->cmd.psdt = NVME_PSDT_PRP;
+	tr->req->cmd.dptr.prp.prp1 = phys_addr;
+	if (nseg == 2) {
+		seg_addr = payload + PAGE_SIZE - unaligned;
+		tr->req->cmd.dptr.prp.prp2 = nvme_vtophys(seg_addr);
+	} else if (nseg > 2) {
+		cur_nseg = 1;
+		tr->req->cmd.dptr.prp.prp2 = (uint64_t)tr->prp_bus_addr;
+		while (cur_nseg < nseg) {
+			seg_addr = payload + cur_nseg * PAGE_SIZE - unaligned;
+			phys_addr = nvme_vtophys(seg_addr);
+			if (phys_addr == NVME_VTOPHYS_ERROR) {
+				_nvme_fail_request_bad_vtophys(qpair, tr);
+				return -1;
+			}
+			tr->prp[cur_nseg - 1] = phys_addr;
+			cur_nseg++;
+		}
+	}
+
+	return 0;
+}
+
+static int
+_nvme_qpair_build_sgl_request(struct nvme_qpair *qpair, struct nvme_request *req,
+			      struct nvme_tracker *tr)
+{
+	int rc;
+	uint64_t phys_addr;
+	uint32_t data_transfered, remaining_transfer_len, length;
+	uint32_t nseg, cur_nseg, total_nseg, last_nseg, modulo, unaligned;
+	uint32_t sge_count = 0;
+	uint64_t prp2 = 0;
+	struct nvme_request *parent;
+
+	/*
+	 * Build scattered payloads.
+	 */
+
+	parent = req->parent ? req->parent : req;
+	nvme_assert(req->payload.type == NVME_PAYLOAD_TYPE_SGL, ("sgl payload type required\n"));
+	nvme_assert(req->payload.u.sgl.reset_sgl_fn != NULL, ("sgl reset callback required\n"));
+	req->payload.u.sgl.reset_sgl_fn(parent->cb_arg, req->payload_offset);
+
+	remaining_transfer_len = req->payload_size;
+	total_nseg = 0;
+	last_nseg = 0;
+
+	while (remaining_transfer_len > 0) {
+		nvme_assert(req->payload.u.sgl.next_sge_fn != NULL, ("sgl callback required\n"));
+		rc = req->payload.u.sgl.next_sge_fn(parent->cb_arg, &phys_addr, &length);
+		if (rc) {
+			_nvme_fail_request_bad_vtophys(qpair, tr);
+			return -1;
+		}
+
+		data_transfered = nvme_min(remaining_transfer_len, length);
+
+		nseg = data_transfered >> nvme_u32log2(PAGE_SIZE);
+		modulo = data_transfered & (PAGE_SIZE - 1);
+		unaligned = phys_addr & (PAGE_SIZE - 1);
+		if (modulo || unaligned) {
+			nseg += 1 + ((modulo + unaligned - 1) >> nvme_u32log2(PAGE_SIZE));
+		}
+
+		if (total_nseg == 0) {
+			req->cmd.psdt = NVME_PSDT_PRP;
+			req->cmd.dptr.prp.prp1 = phys_addr;
+		}
+
+		total_nseg += nseg;
+		sge_count++;
+		remaining_transfer_len -= data_transfered;
+
+		if (total_nseg == 2) {
+			if (sge_count == 1)
+				tr->req->cmd.dptr.prp.prp2 = phys_addr + PAGE_SIZE - unaligned;
+			else if (sge_count == 2)
+				tr->req->cmd.dptr.prp.prp2 = phys_addr;
+			/* save prp2 value */
+			prp2 = tr->req->cmd.dptr.prp.prp2;
+		} else if (total_nseg > 2) {
+			if (sge_count == 1)
+				cur_nseg = 1;
+			else
+				cur_nseg = 0;
+
+			tr->req->cmd.dptr.prp.prp2 = (uint64_t)tr->prp_bus_addr;
+			while (cur_nseg < nseg) {
+				if (prp2) {
+					tr->prp[0] = prp2;
+					tr->prp[last_nseg + 1] = phys_addr + cur_nseg * PAGE_SIZE - unaligned;
+				} else
+					tr->prp[last_nseg] = phys_addr + cur_nseg * PAGE_SIZE - unaligned;
+
+				last_nseg++;
+				cur_nseg++;
+
+				/* physical address and length check */
+				if (remaining_transfer_len || (!remaining_transfer_len && (cur_nseg < nseg))) {
+					if ((length & (PAGE_SIZE - 1)) || unaligned) {
+						_nvme_fail_request_bad_vtophys(qpair, tr);
+						return -1;
+					}
+				}
+			}
+		}
+	}
+
+	return 0;
+}
+
+void
+nvme_qpair_submit_request(struct nvme_qpair *qpair, struct nvme_request *req)
+{
+	int			rc;
+	struct nvme_tracker	*tr;
+	struct nvme_request	*child_req;
+
+	nvme_qpair_check_enabled(qpair);
+
+	if (req->num_children) {
+		/*
+		 * This is a split (parent) request. Submit all of the children but not the parent
+		 * request itself, since the parent is the original unsplit request.
+		 */
+		TAILQ_FOREACH(child_req, &req->children, child_tailq) {
+			nvme_qpair_submit_request(qpair, child_req);
+		}
+		return;
+	}
+
+	tr = LIST_FIRST(&qpair->free_tr);
+
+	if (tr == NULL || !qpair->is_enabled) {
+		/*
+		 * No tracker is available, or the qpair is disabled due to
+		 *  an in-progress controller-level reset or controller
+		 *  failure.
+		 */
+
+		if (qpair->ctrlr->is_failed) {
+			_nvme_fail_request_ctrlr_failed(qpair, req);
+		} else {
+			/*
+			 * Put the request on the qpair's request queue to be
+			 *  processed when a tracker frees up via a command
+			 *  completion or when the controller reset is
+			 *  completed.
+			 */
+			STAILQ_INSERT_TAIL(&qpair->queued_req, req, stailq);
+		}
+		return;
+	}
+
+	LIST_REMOVE(tr, list); /* remove tr from free_tr */
+	LIST_INSERT_HEAD(&qpair->outstanding_tr, tr, list);
+	tr->req = req;
+	req->cmd.cid = tr->cid;
+
+	if (req->payload_size == 0) {
+		/* Null payload - leave PRP fields zeroed */
+	} else if (req->payload.type == NVME_PAYLOAD_TYPE_CONTIG) {
+		rc = _nvme_qpair_build_contig_request(qpair, req, tr);
+		if (rc < 0) {
+			return;
+		}
+	} else if (req->payload.type == NVME_PAYLOAD_TYPE_SGL) {
+		rc = _nvme_qpair_build_sgl_request(qpair, req, tr);
+		if (rc < 0) {
+			return;
+		}
+	} else {
+		nvme_assert(0, ("invalid NVMe payload type %d\n", req->payload.type));
+		_nvme_fail_request_bad_vtophys(qpair, tr);
+		return;
+	}
+
+	nvme_qpair_submit_tracker(qpair, tr);
+}
+
+void
+nvme_qpair_reset(struct nvme_qpair *qpair)
+{
+	qpair->sq_tail = qpair->cq_head = 0;
+
+	/*
+	 * First time through the completion queue, HW will set phase
+	 *  bit on completions to 1.  So set this to 1 here, indicating
+	 *  we're looking for a 1 to know which entries have completed.
+	 *  we'll toggle the bit each time when the completion queue
+	 *  rolls over.
+	 */
+	qpair->phase = 1;
+
+	memset(qpair->cmd, 0,
+	       qpair->num_entries * sizeof(struct nvme_command));
+	memset(qpair->cpl, 0,
+	       qpair->num_entries * sizeof(struct nvme_completion));
+}
+
+static void
+_nvme_admin_qpair_enable(struct nvme_qpair *qpair)
+{
+	struct nvme_tracker		*tr;
+	struct nvme_tracker		*tr_temp;
+
+	/*
+	 * Manually abort each outstanding admin command.  Do not retry
+	 *  admin commands found here, since they will be left over from
+	 *  a controller reset and its likely the context in which the
+	 *  command was issued no longer applies.
+	 */
+	LIST_FOREACH_SAFE(tr, &qpair->outstanding_tr, list, tr_temp) {
+		nvme_printf(qpair->ctrlr,
+			    "aborting outstanding admin command\n");
+		nvme_qpair_manual_complete_tracker(qpair, tr, NVME_SCT_GENERIC,
+						   NVME_SC_ABORTED_BY_REQUEST, 1 /* do not retry */, true);
+	}
+
+	qpair->is_enabled = true;
+}
+
+static void
+_nvme_io_qpair_enable(struct nvme_qpair *qpair)
+{
+	STAILQ_HEAD(, nvme_request)	temp;
+	struct nvme_tracker		*tr;
+	struct nvme_tracker		*tr_temp;
+	struct nvme_request		*req;
+
+	qpair->is_enabled = true;
+	/*
+	 * Manually abort each outstanding I/O.  This normally results in a
+	 *  retry, unless the retry count on the associated request has
+	 *  reached its limit.
+	 */
+	LIST_FOREACH_SAFE(tr, &qpair->outstanding_tr, list, tr_temp) {
+		nvme_printf(qpair->ctrlr, "aborting outstanding i/o\n");
+		nvme_qpair_manual_complete_tracker(qpair, tr, NVME_SCT_GENERIC,
+						   NVME_SC_ABORTED_BY_REQUEST, 0, true);
+	}
+
+
+	STAILQ_INIT(&temp);
+	STAILQ_SWAP(&qpair->queued_req, &temp, nvme_request);
+
+	while (!STAILQ_EMPTY(&temp)) {
+		req = STAILQ_FIRST(&temp);
+		STAILQ_REMOVE_HEAD(&temp, stailq);
+
+		nvme_printf(qpair->ctrlr, "resubmitting queued i/o\n");
+		nvme_qpair_print_command(qpair, &req->cmd);
+		nvme_qpair_submit_request(qpair, req);
+	}
+}
+
+void
+nvme_qpair_enable(struct nvme_qpair *qpair)
+{
+	if (nvme_qpair_is_io_queue(qpair)) {
+		_nvme_io_qpair_enable(qpair);
+	} else {
+		_nvme_admin_qpair_enable(qpair);
+	}
+}
+
+static void
+_nvme_admin_qpair_disable(struct nvme_qpair *qpair)
+{
+	qpair->is_enabled = false;
+	nvme_admin_qpair_abort_aers(qpair);
+}
+
+static void
+_nvme_io_qpair_disable(struct nvme_qpair *qpair)
+{
+	qpair->is_enabled = false;
+}
+
+void
+nvme_qpair_disable(struct nvme_qpair *qpair)
+{
+	if (nvme_qpair_is_io_queue(qpair)) {
+		_nvme_io_qpair_disable(qpair);
+	} else {
+		_nvme_admin_qpair_disable(qpair);
+	}
+}
+
+void
+nvme_qpair_fail(struct nvme_qpair *qpair)
+{
+	struct nvme_tracker		*tr;
+	struct nvme_request		*req;
+
+	while (!STAILQ_EMPTY(&qpair->queued_req)) {
+		req = STAILQ_FIRST(&qpair->queued_req);
+		STAILQ_REMOVE_HEAD(&qpair->queued_req, stailq);
+		nvme_printf(qpair->ctrlr, "failing queued i/o\n");
+		nvme_qpair_manual_complete_request(qpair, req, NVME_SCT_GENERIC,
+						   NVME_SC_ABORTED_BY_REQUEST, true);
+	}
+
+	/* Manually abort each outstanding I/O. */
+	while (!LIST_EMPTY(&qpair->outstanding_tr)) {
+		tr = LIST_FIRST(&qpair->outstanding_tr);
+		/*
+		 * Do not remove the tracker.  The abort_tracker path will
+		 *  do that for us.
+		 */
+		nvme_printf(qpair->ctrlr, "failing outstanding i/o\n");
+		nvme_qpair_manual_complete_tracker(qpair, tr, NVME_SCT_GENERIC,
+						   NVME_SC_ABORTED_BY_REQUEST, 1 /* do not retry */, true);
+	}
+}
+
diff --git a/src/spdk/lib/util/Makefile b/src/spdk/lib/util/Makefile
new file mode 100644
index 0000000..19eca32
--- /dev/null
+++ b/src/spdk/lib/util/Makefile
@@ -0,0 +1,51 @@
+#
+#  BSD LICENSE
+#
+#  Copyright (c) Intel Corporation.
+#  All rights reserved.
+#
+#  Redistribution and use in source and binary forms, with or without
+#  modification, are permitted provided that the following conditions
+#  are met:
+#
+#    * Redistributions of source code must retain the above copyright
+#      notice, this list of conditions and the following disclaimer.
+#    * Redistributions in binary form must reproduce the above copyright
+#      notice, this list of conditions and the following disclaimer in
+#      the documentation and/or other materials provided with the
+#      distribution.
+#    * Neither the name of Intel Corporation nor the names of its
+#      contributors may be used to endorse or promote products derived
+#      from this software without specific prior written permission.
+#
+#  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+#  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+#  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+#  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+#  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+#  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+#  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+
+SPDK_ROOT_DIR := $(CURDIR)/../..
+include $(SPDK_ROOT_DIR)/mk/spdk.common.mk
+
+CFLAGS += $(DPDK_INC)
+
+C_SRCS = file.c string.c pci.c
+
+LIB = libspdk_util.a
+
+all : $(LIB)
+
+clean :
+	$(CLEAN_C)
+
+$(LIB) : $(OBJS)
+	$(LIB_C)
+
+include $(SPDK_ROOT_DIR)/mk/spdk.deps.mk
diff --git a/src/spdk/lib/util/file.c b/src/spdk/lib/util/file.c
new file mode 100644
index 0000000..b002502
--- /dev/null
+++ b/src/spdk/lib/util/file.c
@@ -0,0 +1,108 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright (c) Intel Corporation.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "spdk/file.h"
+
+#include <sys/ioctl.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#include <stdint.h>
+
+#ifdef __linux__
+#include <linux/fs.h>
+#endif
+
+static uint64_t
+dev_get_size(int fd)
+{
+#if defined(DIOCGMEDIASIZE) /* FreeBSD */
+	off_t size;
+
+	if (ioctl(fd, DIOCGMEDIASIZE, &size) == 0) {
+		return size;
+	}
+#elif defined(__linux__) && defined(BLKGETSIZE64)
+	uint64_t size;
+
+	if (ioctl(fd, BLKGETSIZE64, &size) == 0) {
+		return size;
+	}
+#endif
+
+	return 0;
+}
+
+uint32_t
+dev_get_blocklen(int fd)
+{
+#if defined(DKIOCGETBLOCKSIZE) /* FreeBSD */
+	uint32_t blocklen;
+
+	if (ioctl(fd, DKIOCGETBLOCKSIZE, &blocklen) == 0) {
+		return blocklen;
+	}
+#elif defined(__linux__) && defined(BLKSSZGET)
+	uint32_t blocklen;
+
+	if (ioctl(fd, BLKSSZGET, &blocklen) == 0) {
+		return blocklen;
+	}
+#endif
+
+	/* Assume 512 if none of the other methods worked */
+	return 512;
+}
+
+uint64_t
+file_get_size(int fd)
+{
+	struct stat st;
+
+	if (fstat(fd, &st) != 0) {
+		return 0;
+	}
+
+	if (S_ISLNK(st.st_mode)) {
+		return 0;
+	}
+
+	if (S_ISBLK(st.st_mode) || S_ISCHR(st.st_mode)) {
+		return dev_get_size(fd);
+	} else if (S_ISREG(st.st_mode)) {
+		return st.st_size;
+	}
+
+	/* Not REG, CHR or BLK */
+	return 0;
+}
diff --git a/src/spdk/lib/util/pci.c b/src/spdk/lib/util/pci.c
new file mode 100644
index 0000000..6651ece
--- /dev/null
+++ b/src/spdk/lib/util/pci.c
@@ -0,0 +1,360 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright (c) Intel Corporation.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdio.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <string.h>
+#include <fcntl.h>
+#include <dirent.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+
+#include <pciaccess.h>
+
+#ifdef __FreeBSD__
+#include <sys/pciio.h>
+#endif
+
+#include "spdk/pci.h"
+
+#define SYSFS_PCI_DEVICES	"/sys/bus/pci/devices"
+#define SYSFS_PCI_DRIVERS	"/sys/bus/pci/drivers"
+#define PCI_PRI_FMT		"%04x:%02x:%02x.%1u"
+#define SPDK_PCI_PATH_MAX	256
+
+
+/* var should be the pointer */
+#define spdk_pcicfg_read32(handle, var, offset)  pci_device_cfg_read_u32(handle, var, offset)
+#define spdk_pcicfg_write32(handle, var, offset) pci_device_cfg_write_u32(handle, *var, offset)
+
+int
+pci_device_get_serial_number(struct pci_device *dev, char *sn, int len)
+{
+	int err;
+	uint32_t pos, header = 0;
+	uint32_t i, buf[2];
+
+	if (len < 17)
+		return -1;
+
+	err = spdk_pcicfg_read32(dev, &header, PCI_CFG_SIZE);
+	if (err || !header)
+		return -1;
+
+	pos = PCI_CFG_SIZE;
+	while (1) {
+		if ((header & 0x0000ffff) == PCI_EXT_CAP_ID_SN) {
+			if (pos) {
+				/*skip the header*/
+				pos += 4;
+				for (i = 0; i < 2; i++) {
+					err = spdk_pcicfg_read32(dev,
+								 &buf[i], pos + 4 * i);
+					if (err)
+						return -1;
+				}
+				sprintf(sn, "%08x%08x", buf[1], buf[0]);
+				return 0;
+			}
+		}
+		pos = (header >> 20) & 0xffc;
+		/*0 if no other items exist*/
+		if (pos < PCI_CFG_SIZE)
+			return -1;
+		err = spdk_pcicfg_read32(dev, &header, pos);
+		if (err)
+			return -1;
+	}
+	return -1;
+}
+
+#ifdef __linux__
+int
+pci_device_has_non_uio_driver(struct pci_device *dev)
+{
+	char linkname[SPDK_PCI_PATH_MAX];
+	char driver[SPDK_PCI_PATH_MAX];
+	ssize_t driver_len;
+	char *driver_begin;
+
+	snprintf(linkname, sizeof(linkname),
+		 SYSFS_PCI_DEVICES "/" PCI_PRI_FMT "/driver",
+		 spdk_pci_device_get_domain(dev), spdk_pci_device_get_bus(dev),
+		 spdk_pci_device_get_dev(dev), spdk_pci_device_get_func(dev));
+
+	driver_len = readlink(linkname, driver, sizeof(driver));
+
+	if (driver_len < 0 || driver_len >= SPDK_PCI_PATH_MAX) {
+		return 0;
+	}
+
+	driver[driver_len] = '\0'; /* readlink() doesn't null terminate, so we have to */
+
+	driver_begin = strrchr(driver, '/');
+	if (driver_begin) {
+		/* Advance to the character after the slash */
+		driver_begin++;
+	} else {
+		/* This shouldn't normally happen - driver should be a relative path with slashes */
+		driver_begin = driver;
+	}
+
+	return strcmp(driver_begin, "uio_pci_generic") != 0;
+}
+#endif
+
+#ifdef __FreeBSD__
+int
+pci_device_has_non_uio_driver(struct pci_device *dev)
+{
+	struct pci_conf_io	configsel;
+	struct pci_match_conf	pattern;
+	struct pci_conf		conf;
+	int			fd;
+
+	memset(&pattern, 0, sizeof(pattern));
+	pattern.pc_sel.pc_domain = spdk_pci_device_get_domain(dev);
+	pattern.pc_sel.pc_bus = spdk_pci_device_get_bus(dev);
+	pattern.pc_sel.pc_dev = spdk_pci_device_get_dev(dev);
+	pattern.pc_sel.pc_func = spdk_pci_device_get_func(dev);
+	pattern.flags = PCI_GETCONF_MATCH_DOMAIN |
+			PCI_GETCONF_MATCH_BUS |
+			PCI_GETCONF_MATCH_DEV |
+			PCI_GETCONF_MATCH_FUNC;
+
+	memset(&configsel, 0, sizeof(configsel));
+	configsel.match_buf_len = sizeof(conf);
+	configsel.matches = &conf;
+	configsel.num_patterns = 1;
+	configsel.pat_buf_len = sizeof(pattern);
+	configsel.patterns = &pattern;
+
+	fd = open("/dev/pci", O_RDONLY, 0);
+	if (fd < 0) {
+		fprintf(stderr, "could not open /dev/pci\n");
+		return -1;
+	}
+
+	if (ioctl(fd, PCIOCGETCONF, &configsel) == -1) {
+		fprintf(stderr, "ioctl(PCIOCGETCONF) failed\n");
+		close(fd);
+		return -1;
+	}
+
+	close(fd);
+
+	if (configsel.num_matches != 1) {
+		fprintf(stderr, "could not find specified device\n");
+		return -1;
+	}
+
+	if (conf.pd_name[0] == '\0' || !strcmp(conf.pd_name, "nic_uio")) {
+		return 0;
+	} else {
+		return 1;
+	}
+}
+#endif
+
+int
+pci_device_unbind_kernel_driver(struct pci_device *dev)
+{
+	int n;
+	FILE *fd;
+	char filename[SPDK_PCI_PATH_MAX];
+	char buf[256];
+
+	snprintf(filename, sizeof(filename),
+		 SYSFS_PCI_DEVICES "/" PCI_PRI_FMT "/driver/unbind",
+		 spdk_pci_device_get_domain(dev), spdk_pci_device_get_bus(dev),
+		 spdk_pci_device_get_dev(dev), spdk_pci_device_get_func(dev));
+
+	fd = fopen(filename, "w");
+	if (!fd)
+		return 0;
+
+	n = snprintf(buf, sizeof(buf), PCI_PRI_FMT,
+		     spdk_pci_device_get_domain(dev), spdk_pci_device_get_bus(dev),
+		     spdk_pci_device_get_dev(dev), spdk_pci_device_get_dev(dev));
+
+	if (fwrite(buf, n, 1, fd) == 0)
+		goto error;
+
+	fclose(fd);
+	return 0;
+
+error:
+	fclose(fd);
+	return -1;
+}
+
+static int
+check_modules(char *driver_name)
+{
+	FILE *fd;
+	const char *proc_modules = "/proc/modules";
+	char buffer[256];
+
+	fd = fopen(proc_modules, "r");
+	if (!fd)
+		return -1;
+
+	while (fgets(buffer, sizeof(buffer), fd)) {
+		if (strstr(buffer, driver_name) == NULL)
+			continue;
+		else {
+			fclose(fd);
+			return 0;
+		}
+	}
+	fclose(fd);
+
+	return -1;
+}
+
+int
+pci_device_bind_uio_driver(struct pci_device *dev, char *driver_name)
+{
+	int err, n;
+	FILE *fd;
+	char filename[SPDK_PCI_PATH_MAX];
+	char buf[256];
+
+	err = check_modules(driver_name);
+	if (err < 0) {
+		fprintf(stderr, "No %s module loaded\n", driver_name);
+		return err;
+	}
+
+	snprintf(filename, sizeof(filename),
+		 SYSFS_PCI_DRIVERS "/" "%s" "/new_id", driver_name);
+
+	fd = fopen(filename, "w");
+	if (!fd) {
+		return -1;
+	}
+
+	n = snprintf(buf, sizeof(buf), "%04x %04x",
+		     spdk_pci_device_get_vendor_id(dev),
+		     spdk_pci_device_get_device_id(dev));
+
+	if (fwrite(buf, n, 1, fd) == 0)
+		goto error;
+
+	fclose(fd);
+	return 0;
+
+error:
+	fclose(fd);
+	return -1;
+}
+
+int
+pci_device_switch_to_uio_driver(struct pci_device *dev)
+{
+	if (pci_device_unbind_kernel_driver(dev)) {
+		fprintf(stderr, "Device %d:%d:%d unbind from "
+			"kernel driver failed\n",
+			spdk_pci_device_get_bus(dev),
+			spdk_pci_device_get_dev(dev),
+			spdk_pci_device_get_func(dev));
+		return -1;
+	}
+	if (pci_device_bind_uio_driver(dev, PCI_UIO_DRIVER)) {
+		fprintf(stderr, "Device %d:%d:%d bind to "
+			"uio driver failed\n",
+			spdk_pci_device_get_bus(dev),
+			spdk_pci_device_get_dev(dev),
+			spdk_pci_device_get_func(dev));
+		return -1;
+	}
+	printf("Device %d:%d:%d bind to uio driver success\n",
+	       spdk_pci_device_get_bus(dev), spdk_pci_device_get_dev(dev),
+	       spdk_pci_device_get_func(dev));
+	return 0;
+}
+
+int
+pci_device_claim(struct pci_device *dev)
+{
+	int dev_fd;
+	char shm_name[64];
+	int pid;
+	void *dev_map;
+	struct flock pcidev_lock = {
+		.l_type = F_WRLCK,
+		.l_whence = SEEK_SET,
+		.l_start = 0,
+		.l_len = 0,
+	};
+
+	sprintf(shm_name, PCI_PRI_FMT, spdk_pci_device_get_domain(dev),
+		spdk_pci_device_get_bus(dev), spdk_pci_device_get_dev(dev),
+		spdk_pci_device_get_func(dev));
+
+	dev_fd = shm_open(shm_name, O_RDWR | O_CREAT, 0600);
+	if (dev_fd == -1) {
+		fprintf(stderr, "could not shm_open %s\n", shm_name);
+		return -1;
+	}
+
+	if (ftruncate(dev_fd, sizeof(int)) != 0) {
+		fprintf(stderr, "could not truncate shm %s\n", shm_name);
+		close(dev_fd);
+		return -1;
+	}
+
+	dev_map = mmap(NULL, sizeof(int), PROT_READ | PROT_WRITE,
+		       MAP_SHARED, dev_fd, 0);
+	if (dev_map == NULL) {
+		fprintf(stderr, "could not mmap shm %s\n", shm_name);
+		close(dev_fd);
+		return -1;
+	}
+
+	if (fcntl(dev_fd, F_SETLK, &pcidev_lock) != 0) {
+		pid = *(int *)dev_map;
+		fprintf(stderr, "Cannot create lock on device %s, probably"
+			" process %d has claimed it\n", shm_name, pid);
+		munmap(dev_map, sizeof(int));
+		close(dev_fd);
+		return -1;
+	}
+
+	*(int *)dev_map = (int)getpid();
+	munmap(dev_map, sizeof(int));
+	/* Keep dev_fd open to maintain the lock. */
+	return 0;
+}
diff --git a/src/spdk/lib/util/string.c b/src/spdk/lib/util/string.c
new file mode 100644
index 0000000..2ec50e8
--- /dev/null
+++ b/src/spdk/lib/util/string.c
@@ -0,0 +1,82 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright (c) Intel Corporation.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdarg.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "spdk/string.h"
+
+char *
+sprintf_alloc(const char *format, ...)
+{
+	va_list args;
+	char *buf;
+	size_t bufsize;
+	int rc;
+
+	/* Try with a small buffer first. */
+	bufsize = 32;
+
+	/* Limit maximum buffer size to something reasonable so we don't loop forever. */
+	while (bufsize <= 1024 * 1024) {
+		buf = malloc(bufsize);
+		if (buf == NULL) {
+			return NULL;
+		}
+
+		va_start(args, format);
+		rc = vsnprintf(buf, bufsize, format, args);
+		va_end(args);
+
+		/*
+		 * If vsnprintf() returned a count within our current buffer size, we are done.
+		 * The count does not include the \0 terminator, so rc == bufsize is not OK.
+		 */
+		if (rc >= 0 && (size_t)rc < bufsize) {
+			return buf;
+		}
+
+		/*
+		 * vsnprintf() should return the required space, but some libc versions do not
+		 * implement this correctly, so just double the buffer size and try again.
+		 *
+		 * We don't need the data in buf, so rather than realloc(), use free() and malloc()
+		 * again to avoid a copy.
+		 */
+		free(buf);
+		bufsize *= 2;
+	}
+
+	return NULL;
+}
diff --git a/src/spdk/mk/spdk.common.mk b/src/spdk/mk/spdk.common.mk
new file mode 100644
index 0000000..16ab059
--- /dev/null
+++ b/src/spdk/mk/spdk.common.mk
@@ -0,0 +1,139 @@
+#
+#  BSD LICENSE
+#
+#  Copyright (c) Intel Corporation.
+#  All rights reserved.
+#
+#  Redistribution and use in source and binary forms, with or without
+#  modification, are permitted provided that the following conditions
+#  are met:
+#
+#    * Redistributions of source code must retain the above copyright
+#      notice, this list of conditions and the following disclaimer.
+#    * Redistributions in binary form must reproduce the above copyright
+#      notice, this list of conditions and the following disclaimer in
+#      the documentation and/or other materials provided with the
+#      distribution.
+#    * Neither the name of Intel Corporation nor the names of its
+#      contributors may be used to endorse or promote products derived
+#      from this software without specific prior written permission.
+#
+#  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+#  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+#  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+#  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+#  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+#  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+#  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+
+include $(SPDK_ROOT_DIR)/CONFIG
+
+C_OPT ?= -fno-omit-frame-pointer
+Q ?= @
+S ?= $(notdir $(CURDIR))
+
+ifeq ($(MAKECMDGOALS),)
+MAKECMDGOALS=$(.DEFAULT_GOAL)
+endif
+
+OS := $(shell uname)
+
+COMMON_CFLAGS = -g $(C_OPT) -Wall -Wextra -Wno-unused-parameter -Wno-missing-field-initializers -Wmissing-declarations -Wstrict-prototypes -Werror -fno-strict-aliasing -march=native -m64 -I$(SPDK_ROOT_DIR)/include
+
+COMMON_CFLAGS += -Wformat -Wformat-security -Wformat-nonliteral
+
+COMMON_CFLAGS += -D_GNU_SOURCE
+
+# Always build PIC code so that objects can be used in shared libs and position-independent executables
+COMMON_CFLAGS += -fPIC
+
+# Enable stack buffer overflow checking
+COMMON_CFLAGS += -fstack-protector
+
+# Enable full RELRO - no lazy relocation (resolve everything at load time).
+# This allows the GOT to be made read-only early in the loading process.
+LDFLAGS += -Wl,-z,relro,-z,now
+
+# Make the stack non-executable.
+# This is the default in most environments, but it doesn't hurt to set it explicitly.
+LDFLAGS += -Wl,-z,noexecstack
+
+ifeq ($(OS),FreeBSD)
+LIBS += -L/usr/local/lib
+COMMON_CFLAGS += -I/usr/local/include
+endif
+
+ifeq ($(CONFIG_DEBUG), y)
+COMMON_CFLAGS += -DDEBUG -O0
+else
+COMMON_CFLAGS += -DNDEBUG -O2
+# Enable _FORTIFY_SOURCE checks - these only work when optimizations are enabled.
+COMMON_CFLAGS += -U_FORTIFY_SOURCE -D_FORTIFY_SOURCE=2
+endif
+
+ifeq ($(CONFIG_COVERAGE), y)
+COMMON_CFLAGS += -fprofile-arcs -ftest-coverage
+LDFLAGS += -fprofile-arcs -ftest-coverage
+ifeq ($(OS),FreeBSD)
+LDFLAGS += --coverage
+endif
+endif
+
+CFLAGS   += $(COMMON_CFLAGS) -Wno-pointer-sign -std=gnu99
+
+MAKEFLAGS += --no-print-directory
+
+OBJS = $(C_SRCS:.c=.o)
+
+DEPFLAGS = -MMD -MP -MF $*.d.tmp
+
+# Compile first input $< (.c) into $@ (.o)
+COMPILE_C=\
+	$(Q)echo "  CC $@"; \
+	$(CC) -o $@ $(DEPFLAGS) $(CFLAGS) -c $< && \
+	mv -f $*.d.tmp $*.d
+
+# Link $(OBJS) and $(LIBS) into $@ (app)
+LINK_C=\
+	$(Q)echo "  LINK $@"; \
+	$(CC) -o $@ $(CPPFLAGS) $(LDFLAGS) $(OBJS) $(LIBS)
+
+# Archive $(OBJS) into $@ (.a)
+LIB_C=\
+	$(Q)echo "  LIB $@"; \
+	ar crDs $@ $(OBJS)
+
+# Clean up generated files listed as arguments plus a default list
+CLEAN_C=\
+	$(Q)rm -f *.a *.o *.d *.d.tmp *.gcno *.gcda
+
+%.o: %.c %.d $(MAKEFILE_LIST)
+	$(COMPILE_C)
+
+%.d: ;
+
+DPDK_DIR ?= $(CONFIG_DPDK_DIR)
+export DPDK_DIR_ABS = $(abspath $(DPDK_DIR))
+DPDK_INC_DIR ?= $(DPDK_DIR_ABS)/include
+DPDK_LIB_DIR ?= $(DPDK_DIR_ABS)/lib
+
+DPDK_INC = -I$(DPDK_INC_DIR)
+DPDK_LIB = -L$(DPDK_LIB_DIR) -lrte_eal -lrte_mempool -lrte_ring -Wl,-rpath=$(DPDK_LIB_DIR)
+# librte_malloc was removed after DPDK 2.1.  Link this library conditionally based on its
+#  existence to maintain backward compatibility.
+ifneq ($(wildcard $(DPDK_DIR_ABS)/lib/librte_malloc.*),)
+DPDK_LIB += -lrte_malloc
+endif
+
+# DPDK requires dl library for dlopen/dlclose on Linux.
+ifeq ($(OS),Linux)
+DPDK_LIB += -ldl
+endif
+ifeq ($(OS),FreeBSD)
+DPDK_LIB += -lexecinfo
+endif
diff --git a/src/spdk/mk/spdk.deps.mk b/src/spdk/mk/spdk.deps.mk
new file mode 100644
index 0000000..237d7f9
--- /dev/null
+++ b/src/spdk/mk/spdk.deps.mk
@@ -0,0 +1,3 @@
+.PRECIOUS: $(OBJS)
+
+-include $(OBJS:.o=.d)
diff --git a/src/spdk/mk/spdk.subdirs.mk b/src/spdk/mk/spdk.subdirs.mk
new file mode 100644
index 0000000..e806447
--- /dev/null
+++ b/src/spdk/mk/spdk.subdirs.mk
@@ -0,0 +1,3 @@
+$(DIRS-y) :
+	@echo "== $S/$@ ($(MAKECMDGOALS))"
+	$(Q)$(MAKE) -e -C $@ S=$S/$@ $(MAKECMDGOALS) $(MAKESUBDIRFLAGS)
diff --git a/src/test/Makefile-client.am b/src/test/Makefile-client.am
index 967dde4..4a60b47 100644
--- a/src/test/Makefile-client.am
+++ b/src/test/Makefile-client.am
@@ -29,6 +29,8 @@ endif
 
 bin_PROGRAMS += ceph-dencoder
 
+noinst_HEADERS += \
+	test/encoding/test_ceph_time.h
 
 if WITH_RADOS
 
@@ -365,6 +367,7 @@ unittest_librbd_SOURCES = \
 	test/librbd/test_mock_Journal.cc \
 	test/librbd/exclusive_lock/test_mock_AcquireRequest.cc \
 	test/librbd/exclusive_lock/test_mock_ReleaseRequest.cc \
+	test/librbd/image/test_mock_RefreshRequest.cc \
 	test/librbd/journal/test_mock_Replay.cc \
 	test/librbd/object_map/test_mock_InvalidateRequest.cc \
 	test/librbd/object_map/test_mock_LockRequest.cc \
@@ -427,6 +430,50 @@ noinst_HEADERS += \
 	test/librbd/mock/MockReadahead.h \
 	test/librbd/object_map/mock/MockInvalidateRequest.h
 
+librbd_mirror_test_la_SOURCES = \
+	test/rbd_mirror/test_ClusterWatcher.cc \
+	test/rbd_mirror/test_PoolWatcher.cc
+librbd_mirror_test_la_CXXFLAGS = $(UNITTEST_CXXFLAGS)
+noinst_LTLIBRARIES += librbd_mirror_test.la
+
+unittest_rbd_mirror_SOURCES = \
+	test/rbd_mirror/test_main.cc
+unittest_rbd_mirror_CXXFLAGS = $(UNITTEST_CXXFLAGS)
+unittest_rbd_mirror_LDADD = \
+	librbd_mirror_test.la \
+	librados_test_stub.la \
+	librbd_mirror_internal.la \
+	librbd_internal.la \
+	librbd_api.la \
+	libjournal.la \
+	librados_internal.la \
+	libcls_rbd_client.la \
+	libcls_lock_client.la \
+	libcls_journal_client.la \
+	$(LIBRBD_TYPES) \
+	$(LIBRADOS) $(LIBOSDC) $(UNITTEST_LDADD) \
+	$(CEPH_GLOBAL) $(RADOS_TEST_LDADD)
+check_PROGRAMS += unittest_rbd_mirror
+
+ceph_test_rbd_mirror_SOURCES = \
+        test/rbd_mirror/test_main.cc
+ceph_test_rbd_mirror_CXXFLAGS = $(UNITTEST_CXXFLAGS)
+ceph_test_rbd_mirror_LDADD = \
+	librbd_mirror_test.la \
+	librbd_mirror_internal.la \
+	librbd_internal.la \
+	librbd_api.la \
+	libjournal.la \
+	librados_internal.la \
+	libcls_rbd_client.la \
+	libcls_lock_client.la \
+	libcls_journal_client.la \
+	$(LIBRBD_TYPES) \
+	librados_api.la $(LIBRADOS_DEPS) \
+	$(LIBOSDC) $(UNITTEST_LDADD) \
+	$(CEPH_GLOBAL) $(RADOS_TEST_LDADD)
+bin_DEBUGPROGRAMS += ceph_test_rbd_mirror
+
 if LINUX
 ceph_test_librbd_fsx_SOURCES = test/librbd/fsx.cc
 ceph_test_librbd_fsx_LDADD = \
diff --git a/src/test/Makefile-server.am b/src/test/Makefile-server.am
index fc4d53d..fbb42e4 100644
--- a/src/test/Makefile-server.am
+++ b/src/test/Makefile-server.am
@@ -94,6 +94,11 @@ ceph_test_filestore_idempotent_sequence_SOURCES = \
 ceph_test_filestore_idempotent_sequence_LDADD = $(LIBOS) $(CEPH_GLOBAL)
 bin_DEBUGPROGRAMS += ceph_test_filestore_idempotent_sequence
 
+unittest_transaction_SOURCES = test/objectstore/test_transaction.cc
+unittest_transaction_LDADD = $(LIBOS) $(UNITTEST_LDADD) $(CEPH_GLOBAL)
+unittest_transaction_CXXFLAGS = $(UNITTEST_CXXFLAGS)
+check_TESTPROGRAMS += unittest_transaction
+
 ceph_xattr_bench_SOURCES = test/xattr_bench.cc
 ceph_xattr_bench_LDADD = $(LIBOS) $(UNITTEST_LDADD) $(CEPH_GLOBAL)
 ceph_xattr_bench_CXXFLAGS = $(UNITTEST_CXXFLAGS)
diff --git a/src/test/Makefile.am b/src/test/Makefile.am
index a9d3dbd..ed6a80d 100644
--- a/src/test/Makefile.am
+++ b/src/test/Makefile.am
@@ -76,6 +76,7 @@ check_SCRIPTS += \
 	test/mon/misc.sh \
 	test/mon/osd-crush.sh \
 	test/mon/mon-ping.sh \
+	test/mon/mon-created-time.sh \
 	test/mon/osd-erasure-code-profile.sh \
 	test/mon/mkfs.sh \
 	test/mon/mon-scrub.sh \
@@ -89,14 +90,10 @@ check_SCRIPTS += \
 	test/osd/osd-markdown.sh \
 	test/mon/mon-handle-forward.sh \
 	test/libradosstriper/rados-striper.sh \
-	test/test_objectstore_memstore.sh
-
-check_SCRIPTS += test/ceph-disk.sh
+	test/test_objectstore_memstore.sh \
+        test/test_pidfile.sh
 
 EXTRA_DIST += \
-	$(srcdir)/test/python/ceph-disk/setup.py \
-	$(srcdir)/test/python/ceph-disk/tox.ini \
-	$(srcdir)/test/python/ceph-disk/tests/test_ceph_disk.py \
 	$(srcdir)/test/python/brag-client/setup.py \
 	$(srcdir)/test/python/brag-client/tox.ini \
 	$(srcdir)/test/python/brag-client/tests/test_ceph_brag.py \
@@ -171,12 +168,26 @@ unittest_prioritized_queue_CXXFLAGS = $(UNITTEST_CXXFLAGS)
 unittest_prioritized_queue_LDADD = $(UNITTEST_LDADD) $(CEPH_GLOBAL)
 check_TESTPROGRAMS += unittest_prioritized_queue
 
+unittest_weighted_priority_queue_SOURCES = test/common/test_weighted_priority_queue.cc
+unittest_weighted_priority_queue_CXXFLAGS = $(UNITTEST_CXXFLAGS)
+unittest_weighted_priority_queue_LDADD = $(UNITTEST_LDADD) $(CEPH_GLOBAL)
+check_TESTPROGRAMS += unittest_weighted_priority_queue
 
 unittest_str_map_SOURCES = test/common/test_str_map.cc
 unittest_str_map_CXXFLAGS = $(UNITTEST_CXXFLAGS)
 unittest_str_map_LDADD = $(UNITTEST_LDADD) $(CEPH_GLOBAL)
 check_TESTPROGRAMS += unittest_str_map
 
+unittest_mutex_debug_SOURCES = test/common/test_mutex_debug.cc
+unittest_mutex_debug_CXXFLAGS = $(UNITTEST_CXXFLAGS)
+unittest_mutex_debug_LDADD = $(UNITTEST_LDADD) $(CEPH_GLOBAL) ${EXTRALIBS}
+check_TESTPROGRAMS += unittest_mutex_debug
+
+unittest_shunique_lock_SOURCES = test/common/test_shunique_lock.cc
+unittest_shunique_lock_CXXFLAGS = $(UNITTEST_CXXFLAGS)
+unittest_shunique_lock_LDADD = $(UNITTEST_LDADD) $(CEPH_GLOBAL) ${EXTRALIBS}
+check_TESTPROGRAMS += unittest_shunique_lock
+
 unittest_sharedptr_registry_SOURCES = test/common/test_sharedptr_registry.cc
 unittest_sharedptr_registry_CXXFLAGS = $(UNITTEST_CXXFLAGS)
 unittest_sharedptr_registry_LDADD = $(UNITTEST_LDADD) $(CEPH_GLOBAL)
@@ -419,6 +430,11 @@ unittest_bit_vector_CXXFLAGS = $(UNITTEST_CXXFLAGS)
 unittest_bit_vector_LDADD = $(UNITTEST_LDADD) $(CEPH_GLOBAL)
 check_TESTPROGRAMS += unittest_bit_vector
 
+unittest_interval_set_SOURCES = test/common/test_interval_set.cc
+unittest_interval_set_CXXFLAGS = $(UNITTEST_CXXFLAGS)
+unittest_interval_set_LDADD = $(UNITTEST_LDADD) $(CEPH_GLOBAL)
+check_TESTPROGRAMS += unittest_interval_set
+
 unittest_subprocess_SOURCES = test/test_subprocess.cc
 unittest_subprocess_LDADD = $(LIBCOMMON) $(UNITTEST_LDADD)
 unittest_subprocess_CXXFLAGS = $(UNITTEST_CXXFLAGS)
diff --git a/src/test/ObjectMap/KeyValueDBMemory.cc b/src/test/ObjectMap/KeyValueDBMemory.cc
index fc59b7d..a99641f 100644
--- a/src/test/ObjectMap/KeyValueDBMemory.cc
+++ b/src/test/ObjectMap/KeyValueDBMemory.cc
@@ -26,7 +26,7 @@ protected:
   map<pair<string,string>, bufferlist>::iterator it;
 
 public:
-  WholeSpaceMemIterator(KeyValueDBMemory *db) : db(db), ready(false) { }
+  explicit WholeSpaceMemIterator(KeyValueDBMemory *db) : db(db), ready(false) { }
   virtual ~WholeSpaceMemIterator() { }
 
   int seek_to_first() {
@@ -238,7 +238,7 @@ public:
    * keep it in mind.
    */
 
-  WholeSpaceSnapshotMemIterator(KeyValueDBMemory *db) :
+  explicit WholeSpaceSnapshotMemIterator(KeyValueDBMemory *db) :
     WholeSpaceMemIterator(db) { }
   ~WholeSpaceSnapshotMemIterator() {
     delete db;
diff --git a/src/test/ObjectMap/KeyValueDBMemory.h b/src/test/ObjectMap/KeyValueDBMemory.h
index 94e224b..0712fa0 100644
--- a/src/test/ObjectMap/KeyValueDBMemory.h
+++ b/src/test/ObjectMap/KeyValueDBMemory.h
@@ -16,7 +16,7 @@ public:
   std::map<std::pair<string,string>,bufferlist> db;
 
   KeyValueDBMemory() { }
-  KeyValueDBMemory(KeyValueDBMemory *db) : db(db->db) { }
+  explicit KeyValueDBMemory(KeyValueDBMemory *db) : db(db->db) { }
   virtual ~KeyValueDBMemory() { }
 
   virtual int init(string _opt) {
@@ -34,6 +34,7 @@ public:
     const std::set<string> &key,
     std::map<string, bufferlist> *out
     );
+  using KeyValueDB::get;
 
   int get_keys(
     const string &prefix,
@@ -61,7 +62,7 @@ public:
     list<Context *> on_commit;
     KeyValueDBMemory *db;
 
-    TransactionImpl_(KeyValueDBMemory *db) : db(db) {}
+    explicit TransactionImpl_(KeyValueDBMemory *db) : db(db) {}
 
 
     struct SetOp : public Context {
diff --git a/src/test/TestTimers.cc b/src/test/TestTimers.cc
index 0dc8a06..5e2b6b9 100644
--- a/src/test/TestTimers.cc
+++ b/src/test/TestTimers.cc
@@ -26,7 +26,7 @@ namespace
 class TestContext : public Context
 {
 public:
-  TestContext(int num_)
+  explicit TestContext(int num_)
     : num(num_)
   {
   }
@@ -50,7 +50,7 @@ protected:
 class StrictOrderTestContext : public TestContext
 {
 public:
-  StrictOrderTestContext (int num_)
+  explicit StrictOrderTestContext (int num_)
     : TestContext(num_)
   {
   }
diff --git a/src/test/admin_socket.cc b/src/test/admin_socket.cc
index cee215d..212d754 100644
--- a/src/test/admin_socket.cc
+++ b/src/test/admin_socket.cc
@@ -30,7 +30,7 @@
 class AdminSocketTest
 {
 public:
-  AdminSocketTest(AdminSocket *asokc)
+  explicit AdminSocketTest(AdminSocket *asokc)
     : m_asokc(asokc)
   {
   }
diff --git a/src/test/bench/bencher.cc b/src/test/bench/bencher.cc
index aebe729a..90f888d 100644
--- a/src/test/bench/bencher.cc
+++ b/src/test/bench/bencher.cc
@@ -10,7 +10,7 @@
 template<typename T>
 struct C_Holder : public Context {
   T obj;
-  C_Holder(
+  explicit C_Holder(
     T obj)
     : obj(obj) {}
   void finish(int r) {
@@ -20,13 +20,13 @@ struct C_Holder : public Context {
 
 struct OnDelete {
   Context *c;
-  OnDelete(Context *c) : c(c) {}
+  explicit OnDelete(Context *c) : c(c) {}
   ~OnDelete() { c->complete(0); }
 };
 
 struct Cleanup : public Context {
   Bencher *bench;
-  Cleanup(Bencher *bench) : bench(bench) {}
+  explicit Cleanup(Bencher *bench) : bench(bench) {}
   void finish(int r) {
     bench->complete_op();
   }
diff --git a/src/test/bench/distribution.h b/src/test/bench/distribution.h
index d3525b8..545195f 100644
--- a/src/test/bench/distribution.h
+++ b/src/test/bench/distribution.h
@@ -127,7 +127,7 @@ public:
 class Uniform : public Distribution<uint64_t> {
   uint64_t val;
 public:
-  Uniform(uint64_t val) : val(val) {}
+  explicit Uniform(uint64_t val) : val(val) {}
   virtual uint64_t operator()() {
     return val;
   }
diff --git a/src/test/bench/dumb_backend.h b/src/test/bench/dumb_backend.h
index 38e160d..941a974 100644
--- a/src/test/bench/dumb_backend.h
+++ b/src/test/bench/dumb_backend.h
@@ -42,7 +42,7 @@ class DumbBackend : public Backend {
   class SyncThread : public Thread {
     DumbBackend *backend;
   public:
-    SyncThread(DumbBackend *backend) : backend(backend) {}
+    explicit SyncThread(DumbBackend *backend) : backend(backend) {}
     void *entry() {
       backend->sync_loop();
       return 0;
@@ -84,8 +84,7 @@ class DumbBackend : public Backend {
     bool _empty() {
       return item_queue.empty();
     }
-    using ThreadPool::WorkQueue<write_item>::_process;
-    void _process(write_item *item) {
+    void _process(write_item *item, ThreadPool::TPHandle &) override {
       return backend->_write(
 	item->oid,
 	item->offset,
diff --git a/src/test/bench/rados_backend.h b/src/test/bench/rados_backend.h
index 911d6c7..a607ae7 100644
--- a/src/test/bench/rados_backend.h
+++ b/src/test/bench/rados_backend.h
@@ -10,7 +10,7 @@
 class RadosBackend : public Backend {
   librados::IoCtx *ioctx;
 public:
-  RadosBackend(
+  explicit RadosBackend(
     librados::IoCtx *ioctx)
     : ioctx(ioctx) {}
   void write(
diff --git a/src/test/bench/rbd_backend.h b/src/test/bench/rbd_backend.h
index 9c3bc8b..9994bc7 100644
--- a/src/test/bench/rbd_backend.h
+++ b/src/test/bench/rbd_backend.h
@@ -10,7 +10,7 @@
 class RBDBackend : public Backend {
   map<string, ceph::shared_ptr<librbd::Image> > *m_images;
 public:
-  RBDBackend(map<string, ceph::shared_ptr<librbd::Image> > *images)
+  explicit RBDBackend(map<string, ceph::shared_ptr<librbd::Image> > *images)
     : m_images(images) {}
   void write(
     const string &oid,
diff --git a/src/test/bench/small_io_bench_fs.cc b/src/test/bench/small_io_bench_fs.cc
index 75ec051..9466bd4 100644
--- a/src/test/bench/small_io_bench_fs.cc
+++ b/src/test/bench/small_io_bench_fs.cc
@@ -29,7 +29,7 @@ using namespace std;
 
 struct MorePrinting : public DetailedStatCollector::AdditionalPrinting {
   CephContext *cct;
-  MorePrinting(CephContext *cct) : cct(cct) {}
+  explicit MorePrinting(CephContext *cct) : cct(cct) {}
   void operator()(std::ostream *out) {
     bufferlist bl;
     Formatter *f = Formatter::create("json-pretty");
@@ -173,12 +173,12 @@ int main(int argc, char **argv)
     std::cout << "collection " << pgid << std::endl;
     ObjectStore::Transaction t;
     t.create_collection(coll_t(pgid), 0);
-    fs.apply_transaction(&osr, t);
+    fs.apply_transaction(&osr, std::move(t));
   }
   {
     ObjectStore::Transaction t;
     t.create_collection(coll_t(), 0);
-    fs.apply_transaction(&osr, t);
+    fs.apply_transaction(&osr, std::move(t));
   }
 
   vector<ceph::shared_ptr<Bencher> > benchers(
diff --git a/src/test/bench/testfilestore_backend.cc b/src/test/bench/testfilestore_backend.cc
index 2ce1f87..8a210ec 100644
--- a/src/test/bench/testfilestore_backend.cc
+++ b/src/test/bench/testfilestore_backend.cc
@@ -4,17 +4,6 @@
 #include "global/global_init.h"
 #include "os/ObjectStore.h"
 
-struct C_DeleteTransWrapper : public Context {
-  Context *c;
-  ObjectStore::Transaction *t;
-  C_DeleteTransWrapper(
-    ObjectStore::Transaction *t,
-    Context *c) : c(c), t(t) {}
-  void finish(int r) {
-    c->complete(r);
-    delete t;
-  }
-};
 
 TestFileStoreBackend::TestFileStoreBackend(
   ObjectStore *os, bool write_infos)
@@ -57,9 +46,10 @@ void TestFileStoreBackend::write(
 
   os->queue_transaction(
     osr,
-    t,
-    new C_DeleteTransWrapper(t, on_applied),
+    std::move(*t),
+    on_applied,
     on_commit);
+  delete t;
 }
 
 void TestFileStoreBackend::read(
diff --git a/src/test/bench/tp_bench.cc b/src/test/bench/tp_bench.cc
index 6bc6be1..6a50b6e 100644
--- a/src/test/bench/tp_bench.cc
+++ b/src/test/bench/tp_bench.cc
@@ -93,8 +93,7 @@ class PassAlong : public ThreadPool::WorkQueue<unsigned> {
     q.pop_front();
     return val;
   }
-  using ThreadPool::WorkQueue<unsigned>::_process;
-  void _process(unsigned *item) {
+  void _process(unsigned *item, ThreadPool::TPHandle &) override {
     next->queue(item);
   }
   void _clear() { q.clear(); }
diff --git a/src/test/bench_log.cc b/src/test/bench_log.cc
index a80e14a..39e8813 100644
--- a/src/test/bench_log.cc
+++ b/src/test/bench_log.cc
@@ -13,7 +13,7 @@ struct T : public Thread {
   int num;
   set<int> myset;
   map<int,string> mymap;
-  T(int n) : num(n) {
+  explicit T(int n) : num(n) {
     myset.insert(123);
     myset.insert(456);
     mymap[1] = "foo";
diff --git a/src/test/bufferlist.cc b/src/test/bufferlist.cc
index 77ca3ec..d98db9a 100644
--- a/src/test/bufferlist.cc
+++ b/src/test/bufferlist.cc
@@ -448,6 +448,42 @@ TEST(BufferPtr, constructors) {
     EXPECT_DEATH(bufferptr(original, 0, original.length() + 1), "");
     EXPECT_DEATH(bufferptr(bufferptr(), 0, 0), "");
   }
+  //
+  // ptr(ptr&& p)
+  //
+  {
+    const std::string str(len, 'X');
+    bufferptr original(str.c_str(), len);
+    bufferptr ptr(std::move(original));
+    EXPECT_TRUE(ptr.have_raw());
+    EXPECT_FALSE(original.have_raw());
+    EXPECT_EQ(str.compare(0, str.size(), ptr.c_str()), 0);
+    EXPECT_EQ(1, ptr.raw_nref());
+  }
+}
+
+TEST(BufferPtr, operator_assign) {
+  //
+  // ptr& operator= (const ptr& p)
+  //
+  bufferptr ptr(10);
+  ptr.copy_in(0, 3, "ABC");
+  char dest[1];
+  {
+    bufferptr copy = ptr;
+    copy.copy_out(1, 1, dest);
+    ASSERT_EQ('B', dest[0]);
+  }
+
+  //
+  // ptr& operator= (ptr&& p)
+  //
+  bufferptr move = std::move(ptr);
+  {
+    move.copy_out(1, 1, dest);
+    ASSERT_EQ('B', dest[0]);
+  }
+  EXPECT_FALSE(ptr.have_raw());
 }
 
 TEST(BufferPtr, assignment) {
@@ -1201,6 +1237,9 @@ TEST(BufferList, constructors) {
 }
 
 TEST(BufferList, operator_equal) {
+  //
+  // list& operator= (const list& other)
+  //
   bufferlist bl;
   bl.append("ABC", 3);
   {
@@ -1208,13 +1247,24 @@ TEST(BufferList, operator_equal) {
     bl.copy(1, 1, dest);
     ASSERT_EQ('B', dest[0]);
   }
-  bufferlist copy;
-  copy = bl;
   {
+    bufferlist copy = bl;
     std::string dest;
     copy.copy(1, 1, dest);
     ASSERT_EQ('B', dest[0]);
   }
+
+  //
+  // list& operator= (list&& other)
+  //
+  bufferlist move = std::move(bl);
+  {
+    std::string dest;
+    move.copy(1, 1, dest);
+    ASSERT_EQ('B', dest[0]);
+  }
+  EXPECT_TRUE(move.length());
+  EXPECT_TRUE(!bl.length());
 }
 
 TEST(BufferList, buffers) {
@@ -1518,6 +1568,27 @@ TEST(BufferList, push_front) {
     EXPECT_EQ('B', bl.buffers().front()[0]);
     EXPECT_EQ(ptr.get_raw(), bl.buffers().front().get_raw());
   }
+  //
+  // void push_front(ptr&& bp)
+  //
+  {
+    bufferlist bl;
+    bufferptr ptr;
+    bl.push_front(std::move(ptr));
+    EXPECT_EQ((unsigned)0, bl.length());
+    EXPECT_EQ((unsigned)0, bl.buffers().size());
+  }
+  {
+    bufferlist bl;
+    bl.append('A');
+    bufferptr ptr(len);
+    ptr.c_str()[0] = 'B';
+    bl.push_front(std::move(ptr));
+    EXPECT_EQ((unsigned)(1 + len), bl.length());
+    EXPECT_EQ((unsigned)2, bl.buffers().size());
+    EXPECT_EQ('B', bl.buffers().front()[0]);
+    EXPECT_FALSE(ptr.get_raw());
+  }
 }
 
 TEST(BufferList, push_back) {
@@ -1557,6 +1628,27 @@ TEST(BufferList, push_back) {
     EXPECT_EQ('B', bl.buffers().back()[0]);
     EXPECT_EQ(ptr.get_raw(), bl.buffers().back().get_raw());
   }
+  //
+  // void push_back(ptr&& bp)
+  //
+  {
+    bufferlist bl;
+    bufferptr ptr;
+    bl.push_back(std::move(ptr));
+    EXPECT_EQ((unsigned)0, bl.length());
+    EXPECT_EQ((unsigned)0, bl.buffers().size());
+  }
+  {
+    bufferlist bl;
+    bl.append('A');
+    bufferptr ptr(len);
+    ptr.c_str()[0] = 'B';
+    bl.push_back(std::move(ptr));
+    EXPECT_EQ((unsigned)(1 + len), bl.length());
+    EXPECT_EQ((unsigned)2, bl.buffers().size());
+    EXPECT_EQ('B', bl.buffers().back()[0]);
+    EXPECT_FALSE(ptr.get_raw());
+  }
 }
 
 TEST(BufferList, is_contiguous) {
@@ -1935,6 +2027,27 @@ TEST(BufferList, append) {
     EXPECT_EQ(0, ::memcmp(expected.c_str(), bl.c_str(), expected.size()));
     EXPECT_EQ(expected.size(), bl.length());
   }
+  //
+  // void append(ptr&& bp);
+  //
+  {
+    bufferlist bl;
+    EXPECT_EQ((unsigned)0, bl.buffers().size());
+    EXPECT_EQ((unsigned)0, bl.length());
+    {
+      bufferptr ptr;
+      bl.append(std::move(ptr));
+      EXPECT_EQ((unsigned)0, bl.buffers().size());
+      EXPECT_EQ((unsigned)0, bl.length());
+    }
+    {
+      bufferptr ptr(3);
+      bl.append(std::move(ptr));
+      EXPECT_EQ((unsigned)1, bl.buffers().size());
+      EXPECT_EQ((unsigned)3, bl.length());
+      EXPECT_FALSE(ptr.get_raw());
+    }
+  }
 }
 
 TEST(BufferList, append_zero) {
diff --git a/src/test/centos-6/ceph.spec.in b/src/test/centos-6/ceph.spec.in
index 487232c..498eac4 100644
--- a/src/test/centos-6/ceph.spec.in
+++ b/src/test/centos-6/ceph.spec.in
@@ -56,7 +56,7 @@ restorecon -R /var/log/ceph > /dev/null 2>&1;
 # the _with_systemd variable only implies that we'll install
 # /etc/tmpfiles.d/ceph.conf in order to set up the socket directory in
 # /var/run/ceph.
-%if 0%{?fedora} || 0%{?rhel} >= 7 || 0%{?suse_version} >= 1210
+%if 0%{?fedora} || 0%{?rhel} >= 7 || 0%{?suse_version}
 %global _with_systemd 1
 %{!?tmpfiles_create: %global tmpfiles_create systemd-tmpfiles --create}
 %endif
@@ -66,6 +66,10 @@ restorecon -R /var/log/ceph > /dev/null 2>&1;
 %global _with_lttng 1
 %endif
 
+# unify libexec for all targets
+%global _libexecdir %{_exec_prefix}/lib
+
+
 #################################################################################
 # common
 #################################################################################
@@ -86,27 +90,9 @@ Patch0:		init-ceph.in-fedora.patch
 #################################################################################
 # dependencies that apply across all distro families
 #################################################################################
-Requires:	librbd1 = %{epoch}:%{version}-%{release}
-Requires:	librados2 = %{epoch}:%{version}-%{release}
-Requires:	libcephfs1 = %{epoch}:%{version}-%{release}
-Requires:	ceph-common = %{epoch}:%{version}-%{release}
-%if 0%{with selinux}
-Requires:	ceph-selinux = %{epoch}:%{version}-%{release}
-%endif
-Requires:	python-rados = %{epoch}:%{version}-%{release}
-Requires:	python-rbd = %{epoch}:%{version}-%{release}
-Requires:	python-cephfs = %{epoch}:%{version}-%{release}
-Requires:	python
-Requires:	python-requests
-Requires:	grep
-Requires:	xfsprogs
-Requires:	logrotate
-Requires:	parted
-Requires:	util-linux
-Requires:	hdparm
-Requires:	cryptsetup
-Requires:	findutils
-Requires:	which
+Requires:       ceph-osd = %{epoch}:%{version}-%{release}
+Requires:       ceph-mds = %{epoch}:%{version}-%{release}
+Requires:       ceph-mon = %{epoch}:%{version}-%{release}
 Requires(post):	binutils
 %if 0%{with cephfs_java}
 BuildRequires:	java-devel
@@ -132,7 +118,6 @@ BuildRequires:	hdparm
 BuildRequires:	leveldb-devel > 1.2
 BuildRequires:	libaio-devel
 BuildRequires:	libcurl-devel
-BuildRequires:	libedit-devel
 BuildRequires:	libxml2-devel
 BuildRequires:	libblkid-devel >= 2.17
 BuildRequires:	libudev-devel
@@ -165,41 +150,35 @@ BuildRequires:	systemd
 %{?systemd_requires}
 %endif
 PreReq:		%fillup_prereq
-Requires:	python-Flask
 BuildRequires:	net-tools
 BuildRequires:	libbz2-devel
-%if 0%{?suse_version} > 1210
-Requires:	gptfdisk
 %if 0%{with tcmalloc}
 BuildRequires:	gperftools-devel
 %endif
-%else
-Requires:	scsirastools
-BuildRequires:	google-perftools-devel
-%endif
 BuildRequires:	mozilla-nss-devel
 BuildRequires:	keyutils-devel
 BuildRequires:	libatomic-ops-devel
-%else
+BuildRequires:  lsb-release
+%endif
+%if 0%{?fedora} || 0%{?rhel} 
 %if 0%{?_with_systemd}
 Requires:	systemd
 %endif
 BuildRequires:	nss-devel
 BuildRequires:	keyutils-libs-devel
 BuildRequires:	libatomic_ops-devel
-Requires:	gdisk
 Requires(post):	chkconfig
 Requires(preun):	chkconfig
 Requires(preun):	initscripts
 BuildRequires:	gperftools-devel
-Requires:	python-flask
+BuildRequires:  redhat-lsb-core
 %endif
 # boost
 %if 0%{?fedora} || 0%{?rhel} 
 BuildRequires:  boost-random
 %endif
 # python-argparse for distros with Python 2.6 or lower
-%if (0%{?rhel} && 0%{?rhel} <= 6) || (0%{?suse_version} && 0%{?suse_version} <= 1110)
+%if (0%{?rhel} && 0%{?rhel} <= 6)
 BuildRequires:	python-argparse
 %endif
 # lttng and babeltrace for rbd-replay-prep
@@ -238,6 +217,37 @@ on commodity hardware and delivers object, block and file system storage.
 #################################################################################
 # packages
 #################################################################################
+%package base
+Summary:       Ceph Base Package
+Group:         System Environment/Base
+Requires:      ceph-common = %{epoch}:%{version}-%{release}
+Requires:      librbd1 = %{epoch}:%{version}-%{release}
+Requires:      librados2 = %{epoch}:%{version}-%{release}
+Requires:      libcephfs1 = %{epoch}:%{version}-%{release}
+%if 0%{with selinux}
+Requires:      ceph-selinux = %{epoch}:%{version}-%{release}
+%endif
+Requires:      python
+Requires:      python-requests
+Requires:      python-setuptools
+Requires:      grep
+Requires:      xfsprogs
+Requires:      logrotate
+Requires:      parted
+Requires:      util-linux
+Requires:      hdparm
+Requires:      cryptsetup
+Requires:      findutils
+Requires:      which
+%if 0%{?suse_version}
+Requires:      lsb-release
+%endif
+%if 0%{?fedora} || 0%{?rhel}
+Requires:      redhat-lsb-core
+%endif
+%description base
+Base is the package that includes all the files shared amongst ceph servers
+
 %package -n ceph-common
 Summary:	Ceph Common
 Group:		System Environment/Base
@@ -254,11 +264,38 @@ Requires:	python-requests
 Requires(pre):	pwdutils
 %endif
 # python-argparse is only needed in distros with Python 2.6 or lower
-%if (0%{?rhel} && 0%{?rhel} <= 6) || (0%{?suse_version} && 0%{?suse_version} <= 1110)
+%if (0%{?rhel} && 0%{?rhel} <= 6)
 Requires:	python-argparse
 %endif
 %description -n ceph-common
 Common utilities to mount and interact with a ceph storage cluster.
+Comprised of files that are common to Ceph clients and servers.
+
+%package mds
+Summary:	Ceph Metadata Server Daemon
+Group:		System Environment/Base
+Requires:	ceph-base = %{epoch}:%{version}-%{release}
+%description mds
+ceph-mds is the metadata server daemon for the Ceph distributed file system.
+One or more instances of ceph-mds collectively manage the file system
+namespace, coordinating access to the shared OSD cluster.
+
+%package mon
+Summary:	Ceph Monitor Daemon
+Group:		System Environment/Base
+Requires:	ceph-base = %{epoch}:%{version}-%{release}
+# For ceph-rest-api
+%if 0%{?fedora} || 0%{?rhel}
+Requires:      python-flask
+%endif
+%if 0%{?suse_version}
+Requires:      python-Flask
+%endif
+%description mon
+ceph-mon is the cluster monitor daemon for the Ceph distributed file
+system. One or more instances of ceph-mon form a Paxos part-time
+parliament cluster that provides extremely reliable and durable storage
+of cluster membership, configuration, and state.
 
 %package fuse
 Summary:	Ceph fuse-based client
@@ -276,6 +313,16 @@ Requires:	librbd1 = %{epoch}:%{version}-%{release}
 %description -n rbd-fuse
 FUSE based client to map Ceph rbd images to files
 
+%package -n rbd-mirror
+Summary:	Ceph daemon for mirroring RBD images
+Group:		System Environment/Base
+Requires:	%{name}
+Requires:	ceph-common = %{epoch}:%{version}-%{release}
+Requires:	librados2 = %{epoch}:%{version}-%{release}
+%description -n rbd-mirror
+Daemon for mirroring RBD images between Ceph clusters, streaming
+changes asynchronously.
+
 %package -n rbd-nbd
 Summary:	Ceph RBD client base on NBD
 Group:		System Environment/Base
@@ -295,6 +342,12 @@ Requires:	ceph-selinux = %{epoch}:%{version}-%{release}
 Requires:	librados2 = %{epoch}:%{version}-%{release}
 %if 0%{?rhel} || 0%{?fedora}
 Requires:	mailcap
+# python-flask for powerdns
+Requires:	python-flask
+%endif
+%if 0%{?suse_version}
+# python-Flask for powerdns
+Requires:      python-Flask
 %endif
 %description radosgw
 This package is an S3 HTTP REST gateway for the RADOS object store. It
@@ -314,6 +367,22 @@ under Open Cluster Framework (OCF) compliant resource
 managers such as Pacemaker.
 %endif
 
+%package osd
+Summary:	Ceph Object Storage Daemon
+Group:		System Environment/Base
+Requires:	ceph-base = %{epoch}:%{version}-%{release}
+# for sgdisk, used by ceph-disk
+%if 0%{?fedora} || 0%{?rhel}
+Requires:	gdisk
+%endif
+%if 0%{?suse_version}
+Requires:	gptfdisk
+%endif
+%description osd
+ceph-osd is the object storage daemon for the Ceph distributed file
+system.  It is responsible for storing objects on a local file system
+and providing access to them over the network.
+
 %package -n librados2
 Summary:	RADOS distributed object store client library
 Group:		System Environment/Libraries
@@ -588,6 +657,7 @@ export RPM_OPT_FLAGS=`echo $RPM_OPT_FLAGS | sed -e 's/i386/i486/'`
 
 %{configure}	CPPFLAGS="$java_inc" \
 		--prefix=/usr \
+                --libexecdir=%{_libexecdir} \
 		--localstatedir=/var \
 		--sysconfdir=/etc \
 %if 0%{?_with_systemd}
@@ -606,16 +676,6 @@ export RPM_OPT_FLAGS=`echo $RPM_OPT_FLAGS | sed -e 's/i386/i486/'`
 		--with-selinux \
 %endif
 		--with-librocksdb-static=check \
-%if 0%{?rhel} || 0%{?fedora}
-		--with-systemd-libexec-dir=/usr/libexec/ceph \
-		--with-rgw-user=root \
-		--with-rgw-group=root \
-%endif
-%if 0%{?suse_version}
-		--with-systemd-libexec-dir=/usr/lib/ceph/ \
-		--with-rgw-user=wwwrun \
-		--with-rgw-group=www \
-%endif
 		--with-radosgw \
 		$CEPH_EXTRA_CONFIGURE_ARGS \
 		%{?_with_ocf} \
@@ -642,6 +702,20 @@ make %{?_smp_mflags} check-local
 
 %install
 make DESTDIR=$RPM_BUILD_ROOT install
+rm -f $RPM_BUILD_ROOT%{_libdir}/ceph/erasure-code/libec_example.so
+rm -f $RPM_BUILD_ROOT%{_libdir}/ceph/erasure-code/libec_fail_to_initialize.so
+rm -f $RPM_BUILD_ROOT%{_libdir}/ceph/erasure-code/libec_fail_to_register.so
+rm -f $RPM_BUILD_ROOT%{_libdir}/ceph/erasure-code/libec_hangs.so
+rm -f $RPM_BUILD_ROOT%{_libdir}/ceph/erasure-code/libec_missing_entry_point.so
+rm -f $RPM_BUILD_ROOT%{_libdir}/ceph/erasure-code/libec_missing_version.so
+rm -f $RPM_BUILD_ROOT%{_libdir}/ceph/erasure-code/libec_test_jerasure_generic.so
+rm -f $RPM_BUILD_ROOT%{_libdir}/ceph/erasure-code/libec_test_jerasure_neon.so
+rm -f $RPM_BUILD_ROOT%{_libdir}/ceph/erasure-code/libec_test_jerasure_sse3.so
+rm -f $RPM_BUILD_ROOT%{_libdir}/ceph/erasure-code/libec_test_jerasure_sse4.so
+rm -f $RPM_BUILD_ROOT%{_libdir}/ceph/erasure-code/libec_test_shec_generic.so
+rm -f $RPM_BUILD_ROOT%{_libdir}/ceph/erasure-code/libec_test_shec_neon.so
+rm -f $RPM_BUILD_ROOT%{_libdir}/ceph/erasure-code/libec_test_shec_sse3.so
+rm -f $RPM_BUILD_ROOT%{_libdir}/ceph/erasure-code/libec_test_shec_sse4.so
 find $RPM_BUILD_ROOT -type f -name "*.la" -exec rm -f {} ';'
 find $RPM_BUILD_ROOT -type f -name "*.a" -exec rm -f {} ';'
 install -D src/etc-rbdmap $RPM_BUILD_ROOT%{_sysconfdir}/ceph/rbdmap
@@ -718,120 +792,29 @@ mkdir -p $RPM_BUILD_ROOT%{_localstatedir}/lib/ceph/bootstrap-rgw
 %clean
 rm -rf $RPM_BUILD_ROOT
 
-%pre
-%if 0%{?_with_systemd}
-  %if 0%{?suse_version}
-    # service_add_pre and friends don't work with parameterized systemd service
-    # instances, only with single services or targets, so we always pass
-    # ceph.target to these macros
-    %service_add_pre ceph.target
-  %endif
-%endif
-
-
-%post
-/sbin/ldconfig
-%if 0%{?_with_systemd}
-  %if 0%{?suse_version}
-    %fillup_only
-    %service_add_post ceph.target
-  %endif
-%else
-  /sbin/chkconfig --add ceph
-%endif
-
-%preun
-%if 0%{?_with_systemd}
-  %if 0%{?suse_version}
-    %service_del_preun ceph.target
-  %endif
-  # Disable and stop on removal.
-  if [ $1 = 0 ] ; then
-    SERVICE_LIST=$(systemctl | grep -E '^ceph-mon@|^ceph-create-keys@|^ceph-osd@|^ceph-mds@|^ceph-disk-'  | cut -d' ' -f1)
-    if [ -n "$SERVICE_LIST" ]; then
-      for SERVICE in $SERVICE_LIST; do
-        /usr/bin/systemctl --no-reload disable $SERVICE > /dev/null 2>&1 || :
-        /usr/bin/systemctl stop $SERVICE > /dev/null 2>&1 || :
-      done
-    fi
-  fi
-%else
-  %if 0%{?rhel} || 0%{?fedora}
-    if [ $1 = 0 ] ; then
-      /sbin/service ceph stop >/dev/null 2>&1
-      /sbin/chkconfig --del ceph
-    fi
-  %endif
-%endif
-
-%postun
-/sbin/ldconfig
-%if 0%{?_with_systemd}
-  if [ $1 = 1 ] ; then
-    # Restart on upgrade, but only if "CEPH_AUTO_RESTART_ON_UPGRADE" is set to
-    # "yes". In any case: if units are not running, do not touch them.
-    SYSCONF_CEPH=/etc/sysconfig/ceph
-    if [ -f $SYSCONF_CEPH -a -r $SYSCONF_CEPH ] ; then
-      source $SYSCONF_CEPH
-    fi
-    if [ "X$CEPH_AUTO_RESTART_ON_UPGRADE" = "Xyes" ] ; then
-      SERVICE_LIST=$(systemctl | grep -E '^ceph-mon@|^ceph-create-keys@|^ceph-osd@|^ceph-mds@|^ceph-disk-'  | cut -d' ' -f1)
-      if [ -n "$SERVICE_LIST" ]; then
-        for SERVICE in $SERVICE_LIST; do
-          /usr/bin/systemctl try-restart $SERVICE > /dev/null 2>&1 || :
-        done
-      fi
-    fi
-  fi
-%endif
-
 #################################################################################
-# files
+# files and systemd scriptlets
 #################################################################################
 %files
+
+%files base
 %defattr(-,root,root,-)
 %docdir %{_docdir}
 %dir %{_docdir}/ceph
 %{_docdir}/ceph/sample.ceph.conf
 %{_docdir}/ceph/sample.fetch_config
-%{_bindir}/cephfs
-%{_bindir}/ceph-clsinfo
-%{_bindir}/ceph-rest-api
-%{python_sitelib}/ceph_rest_api.py*
 %{_bindir}/crushtool
 %{_bindir}/monmaptool
 %{_bindir}/osdmaptool
 %{_bindir}/ceph-run
-%{_bindir}/ceph-mon
-%{_bindir}/ceph-mds
-%{_bindir}/ceph-objectstore-tool
-%{_bindir}/ceph-bluefs-tool
-%{_bindir}/ceph-osd
 %{_bindir}/ceph-detect-init
-%{_bindir}/librados-config
 %{_bindir}/ceph-client-debug
-%{_bindir}/cephfs-journal-tool
-%{_bindir}/cephfs-table-tool
-%{_bindir}/cephfs-data-scan
-%{_bindir}/ceph-debugpack
-%{_bindir}/ceph-coverage
+%{_bindir}/cephfs
 %if 0%{?_with_systemd}
-%{_unitdir}/ceph-mds at .service
-%{_unitdir}/ceph-mon at .service
 %{_unitdir}/ceph-create-keys at .service
-%{_unitdir}/ceph-osd at .service
-%{_unitdir}/ceph-radosgw at .service
-%{_unitdir}/ceph-disk at .service
-%{_unitdir}/ceph.target
-%{_unitdir}/ceph-osd.target
-%{_unitdir}/ceph-mon.target
-%{_unitdir}/ceph-mds.target
-%{_unitdir}/ceph-radosgw.target
 %else
 %{_initrddir}/ceph
 %endif
-%{_sbindir}/ceph-disk
-%{_sbindir}/ceph-disk-udev
 %{_sbindir}/ceph-create-keys
 %{_sbindir}/rcceph
 %if 0%{?rhel} >= 7 || 0%{?fedora} || 0%{?suse_version}
@@ -839,25 +822,11 @@ rm -rf $RPM_BUILD_ROOT
 %else
 /sbin/mount.ceph
 %endif
-%dir %{_libdir}/ceph
-%{_libdir}/ceph/ceph_common.sh
-%{_libexecdir}/ceph/ceph-osd-prestart.sh
+%dir %{_libexecdir}/ceph
+%{_libexecdir}/ceph/ceph_common.sh
 %dir %{_libdir}/rados-classes
-%{_libdir}/rados-classes/libcls_cephfs.so*
-%{_libdir}/rados-classes/libcls_rbd.so*
-%{_libdir}/rados-classes/libcls_hello.so*
-%{_libdir}/rados-classes/libcls_numops.so*
-%{_libdir}/rados-classes/libcls_rgw.so*
-%{_libdir}/rados-classes/libcls_lock.so*
-%{_libdir}/rados-classes/libcls_kvs.so*
-%{_libdir}/rados-classes/libcls_refcount.so*
-%{_libdir}/rados-classes/libcls_log.so*
-%{_libdir}/rados-classes/libcls_replica_log.so*
-%{_libdir}/rados-classes/libcls_statelog.so*
-%{_libdir}/rados-classes/libcls_timeindex.so*
-%{_libdir}/rados-classes/libcls_user.so*
-%{_libdir}/rados-classes/libcls_version.so*
-%{_libdir}/rados-classes/libcls_journal.so*
+%{_libdir}/rados-classes/*
+%dir %{_libdir}/ceph
 %dir %{_libdir}/ceph/erasure-code
 %{_libdir}/ceph/erasure-code/libec_*.so*
 %dir %{_libdir}/ceph/compressor
@@ -866,8 +835,6 @@ rm -rf $RPM_BUILD_ROOT
 %{_libdir}/libos_tp.so*
 %{_libdir}/libosd_tp.so*
 %endif
-%{_udevrulesdir}/60-ceph-partuuid-workaround.rules
-%{_udevrulesdir}/95-ceph-osd.rules
 %config %{_sysconfdir}/bash_completion.d/ceph
 %config(noreplace) %{_sysconfdir}/logrotate.d/ceph
 %if 0%{?fedora} || 0%{?rhel}
@@ -878,29 +845,20 @@ rm -rf $RPM_BUILD_ROOT
 %config %{_sysconfdir}/sysconfig/SuSEfirewall2.d/services/ceph-mon
 %config %{_sysconfdir}/sysconfig/SuSEfirewall2.d/services/ceph-osd-mds
 %endif
+%{_unitdir}/ceph.target
 %{python_sitelib}/ceph_detect_init*
+%{python_sitelib}/ceph_disk*
 %{_mandir}/man8/ceph-deploy.8*
 %{_mandir}/man8/ceph-detect-init.8*
-%{_mandir}/man8/ceph-disk.8*
 %{_mandir}/man8/ceph-create-keys.8*
-%{_mandir}/man8/ceph-mon.8*
-%{_mandir}/man8/ceph-mds.8*
-%{_mandir}/man8/ceph-osd.8*
 %{_mandir}/man8/ceph-run.8*
-%{_mandir}/man8/ceph-rest-api.8*
 %{_mandir}/man8/crushtool.8*
 %{_mandir}/man8/osdmaptool.8*
 %{_mandir}/man8/monmaptool.8*
 %{_mandir}/man8/cephfs.8*
 %{_mandir}/man8/mount.ceph.8*
-%{_mandir}/man8/ceph-debugpack.8*
-%{_mandir}/man8/ceph-clsinfo.8*
-%{_mandir}/man8/librados-config.8*
 #set up placeholder directories
 %attr(750,ceph,ceph) %dir %{_localstatedir}/lib/ceph/tmp
-%attr(750,ceph,ceph) %dir %{_localstatedir}/lib/ceph/mon
-%attr(750,ceph,ceph) %dir %{_localstatedir}/lib/ceph/osd
-%attr(750,ceph,ceph) %dir %{_localstatedir}/lib/ceph/mds
 %attr(750,ceph,ceph) %dir %{_localstatedir}/lib/ceph/bootstrap-osd
 %attr(750,ceph,ceph) %dir %{_localstatedir}/lib/ceph/bootstrap-mds
 %attr(750,ceph,ceph) %dir %{_localstatedir}/lib/ceph/bootstrap-rgw
@@ -908,8 +866,74 @@ rm -rf $RPM_BUILD_ROOT
 %attr(770,ceph,ceph) %dir %{_localstatedir}/run/ceph
 %endif
 
+%pre base
+%if 0%{?_with_systemd}
+  %if 0%{?suse_version}
+    # service_add_pre and friends don't work with parameterized systemd service
+    # instances, only with single services or targets, so we always pass
+    # ceph.target to these macros
+    %service_add_pre ceph.target
+  %endif
+%endif
+
+%post base
+/sbin/ldconfig
+%if 0%{?_with_systemd}
+  %if 0%{?suse_version}
+    %fillup_only
+    %service_add_post ceph.target
+  %endif
+%else
+  /sbin/chkconfig --add ceph
+%endif
+
+%preun base
+%if 0%{?_with_systemd}
+  %if 0%{?suse_version}
+    %service_del_preun ceph.target
+  %endif
+  # Disable and stop on removal.
+  if [ $1 = 0 ] ; then
+    SERVICE_LIST=$(systemctl | grep -E '^ceph-mon@|^ceph-create-keys@|^ceph-osd@|^ceph-mds@|^ceph-disk-'  | cut -d' ' -f1)
+    if [ -n "$SERVICE_LIST" ]; then
+      for SERVICE in $SERVICE_LIST; do
+        /usr/bin/systemctl --no-reload disable $SERVICE > /dev/null 2>&1 || :
+        /usr/bin/systemctl stop $SERVICE > /dev/null 2>&1 || :
+      done
+    fi
+  fi
+%else
+  %if 0%{?rhel} || 0%{?fedora}
+    if [ $1 = 0 ] ; then
+      /sbin/service ceph stop >/dev/null 2>&1
+      /sbin/chkconfig --del ceph
+    fi
+  %endif
+%endif
+
+%postun base
+/sbin/ldconfig
+%if 0%{?_with_systemd}
+  if [ $1 = 1 ] ; then
+    # Restart on upgrade, but only if "CEPH_AUTO_RESTART_ON_UPGRADE" is set to
+    # "yes". In any case: if units are not running, do not touch them.
+    SYSCONF_CEPH=/etc/sysconfig/ceph
+    if [ -f $SYSCONF_CEPH -a -r $SYSCONF_CEPH ] ; then
+      source $SYSCONF_CEPH
+    fi
+    if [ "X$CEPH_AUTO_RESTART_ON_UPGRADE" = "Xyes" ] ; then
+      SERVICE_LIST=$(systemctl | grep -E '^ceph-mon@|^ceph-create-keys@|^ceph-osd@|^ceph-mds@|^ceph-disk-'  | cut -d' ' -f1)
+      if [ -n "$SERVICE_LIST" ]; then
+        for SERVICE in $SERVICE_LIST; do
+          /usr/bin/systemctl try-restart $SERVICE > /dev/null 2>&1 || :
+        done
+      fi
+    fi
+  fi
+%endif
+
 #################################################################################
-%files -n ceph-common
+%files common
 %defattr(-,root,root,-)
 %{_bindir}/ceph
 %{_bindir}/ceph-authtool
@@ -943,12 +967,11 @@ rm -rf $RPM_BUILD_ROOT
 %{_mandir}/man8/rbd-replay.8*
 %{_mandir}/man8/rbd-replay-many.8*
 %{_mandir}/man8/rbd-replay-prep.8*
+%dir %{_datadir}/ceph/
 %{_datadir}/ceph/known_hosts_drop.ceph.com
 %{_datadir}/ceph/id_dsa_drop.ceph.com
 %{_datadir}/ceph/id_dsa_drop.ceph.com.pub
 %dir %{_sysconfdir}/ceph/
-%dir %{_datarootdir}/ceph/
-%dir %{_libexecdir}/ceph/
 %config %{_sysconfdir}/bash_completion.d/rados
 %config %{_sysconfdir}/bash_completion.d/rbd
 %config(noreplace) %{_sysconfdir}/ceph/rbdmap
@@ -963,7 +986,7 @@ rm -rf $RPM_BUILD_ROOT
 %attr(3770,ceph,ceph) %dir %{_localstatedir}/log/ceph/
 %attr(750,ceph,ceph) %dir %{_localstatedir}/lib/ceph/
 
-%pre -n ceph-common
+%pre common
 CEPH_GROUP_ID=""
 CEPH_USER_ID=""
 %if 0%{?rhel} || 0%{?fedora}
@@ -980,12 +1003,12 @@ getent passwd ceph >/dev/null || useradd -r -g ceph -d %{_localstatedir}/lib/cep
 %endif
 exit 0
 
-%post -n ceph-common
+%post common
 %if 0%{?_with_systemd}
 %tmpfiles_create %{_tmpfilesdir}/ceph-common.conf
 %endif
 
-%postun -n ceph-common
+%postun common
 # Package removal cleanup
 if [ "$1" -eq "0" ] ; then
     rm -rf /var/log/ceph
@@ -993,6 +1016,36 @@ if [ "$1" -eq "0" ] ; then
 fi
 
 #################################################################################
+%files mds
+%{_bindir}/ceph-mds
+%{_bindir}/cephfs-journal-tool
+%{_bindir}/cephfs-table-tool
+%{_bindir}/cephfs-data-scan
+%{_mandir}/man8/ceph-mds.8*
+%if 0%{?_with_systemd}
+%{_unitdir}/ceph-mds at .service
+%{_unitdir}/ceph-mds.target
+%else
+%{_initrddir}/ceph
+%endif
+%attr(750,ceph,ceph) %dir %{_localstatedir}/lib/ceph/mds
+
+#################################################################################
+%files mon
+%{_bindir}/ceph-mon
+%{_bindir}/ceph-rest-api
+%{_mandir}/man8/ceph-mon.8*
+%{_mandir}/man8/ceph-rest-api.8*
+%{python_sitelib}/ceph_rest_api.py*
+%if 0%{?_with_systemd}
+%{_unitdir}/ceph-mon at .service
+%{_unitdir}/ceph-mon.target
+%else
+%{_initrddir}/ceph
+%endif
+%attr(750,ceph,ceph) %dir %{_localstatedir}/lib/ceph/mon
+
+#################################################################################
 %files fuse
 %defattr(-,root,root,-)
 %{_bindir}/ceph-fuse
@@ -1010,6 +1063,12 @@ fi
 %{_mandir}/man8/rbd-fuse.8*
 
 #################################################################################
+%files -n rbd-mirror
+%defattr(-,root,root,-)
+%{_bindir}/rbd-mirror
+%{_mandir}/man8/rbd-mirror.8*
+
+#################################################################################
 %files -n rbd-nbd
 %defattr(-,root,root,-)
 %{_bindir}/rbd-nbd
@@ -1026,6 +1085,8 @@ fi
 %config %{_sysconfdir}/bash_completion.d/radosgw-admin
 %dir %{_localstatedir}/lib/ceph/radosgw
 %if 0%{?_with_systemd}
+%{_unitdir}/ceph-radosgw at .service
+%{_unitdir}/ceph-radosgw.target
 %else
 %{_initrddir}/ceph-radosgw
 %{_sbindir}/rcceph-radosgw
@@ -1076,6 +1137,29 @@ fi
 %endif
 
 #################################################################################
+%files osd
+%{_bindir}/ceph-clsinfo
+%{_bindir}/ceph-bluefs-tool
+%{_bindir}/ceph-objectstore-tool
+%{_bindir}/ceph-osd
+%{_sbindir}/ceph-disk
+%{_sbindir}/ceph-disk-udev
+%{_libexecdir}/ceph/ceph-osd-prestart.sh
+%{_udevrulesdir}/60-ceph-partuuid-workaround.rules
+%{_udevrulesdir}/95-ceph-osd.rules
+%{_mandir}/man8/ceph-clsinfo.8*
+%{_mandir}/man8/ceph-disk.8*
+%{_mandir}/man8/ceph-osd.8*
+%if 0%{?_with_systemd}
+%{_unitdir}/ceph-osd at .service
+%{_unitdir}/ceph-osd.target
+%{_unitdir}/ceph-disk at .service
+%else
+%{_initrddir}/ceph
+%endif
+%attr(750,ceph,ceph) %dir %{_localstatedir}/lib/ceph/osd
+
+#################################################################################
 %if %{with ocf}
 %files resource-agents
 %defattr(0755,root,root,-)
@@ -1116,6 +1200,8 @@ fi
 %if 0%{?_with_lttng}
 %{_libdir}/librados_tp.so
 %endif
+%{_bindir}/librados-config
+%{_mandir}/man8/librados-config.8*
 
 #################################################################################
 %files -n python-rados
@@ -1226,9 +1312,12 @@ ln -sf %{_libdir}/librbd.so.1 /usr/lib64/qemu/librbd.so.1
 %{_bindir}/ceph_test_*
 %{_bindir}/ceph_tpbench
 %{_bindir}/ceph_xattr_bench
+%{_bindir}/ceph-coverage
 %{_bindir}/ceph-monstore-tool
 %{_bindir}/ceph-osdomap-tool
 %{_bindir}/ceph-kvstore-tool
+%{_bindir}/ceph-debugpack
+%{_mandir}/man8/ceph-debugpack.8*
 %dir %{_libdir}/ceph
 %{_libdir}/ceph/ceph-monstore-update-crush.sh
 
@@ -1377,4 +1466,5 @@ exit 0
 # We need an empty %%files list for python-ceph-compat, to tell rpmbuild to
 # actually build this meta package.
 
+
 %changelog
diff --git a/src/test/centos-7/ceph.spec.in b/src/test/centos-7/ceph.spec.in
index 487232c..498eac4 100644
--- a/src/test/centos-7/ceph.spec.in
+++ b/src/test/centos-7/ceph.spec.in
@@ -56,7 +56,7 @@ restorecon -R /var/log/ceph > /dev/null 2>&1;
 # the _with_systemd variable only implies that we'll install
 # /etc/tmpfiles.d/ceph.conf in order to set up the socket directory in
 # /var/run/ceph.
-%if 0%{?fedora} || 0%{?rhel} >= 7 || 0%{?suse_version} >= 1210
+%if 0%{?fedora} || 0%{?rhel} >= 7 || 0%{?suse_version}
 %global _with_systemd 1
 %{!?tmpfiles_create: %global tmpfiles_create systemd-tmpfiles --create}
 %endif
@@ -66,6 +66,10 @@ restorecon -R /var/log/ceph > /dev/null 2>&1;
 %global _with_lttng 1
 %endif
 
+# unify libexec for all targets
+%global _libexecdir %{_exec_prefix}/lib
+
+
 #################################################################################
 # common
 #################################################################################
@@ -86,27 +90,9 @@ Patch0:		init-ceph.in-fedora.patch
 #################################################################################
 # dependencies that apply across all distro families
 #################################################################################
-Requires:	librbd1 = %{epoch}:%{version}-%{release}
-Requires:	librados2 = %{epoch}:%{version}-%{release}
-Requires:	libcephfs1 = %{epoch}:%{version}-%{release}
-Requires:	ceph-common = %{epoch}:%{version}-%{release}
-%if 0%{with selinux}
-Requires:	ceph-selinux = %{epoch}:%{version}-%{release}
-%endif
-Requires:	python-rados = %{epoch}:%{version}-%{release}
-Requires:	python-rbd = %{epoch}:%{version}-%{release}
-Requires:	python-cephfs = %{epoch}:%{version}-%{release}
-Requires:	python
-Requires:	python-requests
-Requires:	grep
-Requires:	xfsprogs
-Requires:	logrotate
-Requires:	parted
-Requires:	util-linux
-Requires:	hdparm
-Requires:	cryptsetup
-Requires:	findutils
-Requires:	which
+Requires:       ceph-osd = %{epoch}:%{version}-%{release}
+Requires:       ceph-mds = %{epoch}:%{version}-%{release}
+Requires:       ceph-mon = %{epoch}:%{version}-%{release}
 Requires(post):	binutils
 %if 0%{with cephfs_java}
 BuildRequires:	java-devel
@@ -132,7 +118,6 @@ BuildRequires:	hdparm
 BuildRequires:	leveldb-devel > 1.2
 BuildRequires:	libaio-devel
 BuildRequires:	libcurl-devel
-BuildRequires:	libedit-devel
 BuildRequires:	libxml2-devel
 BuildRequires:	libblkid-devel >= 2.17
 BuildRequires:	libudev-devel
@@ -165,41 +150,35 @@ BuildRequires:	systemd
 %{?systemd_requires}
 %endif
 PreReq:		%fillup_prereq
-Requires:	python-Flask
 BuildRequires:	net-tools
 BuildRequires:	libbz2-devel
-%if 0%{?suse_version} > 1210
-Requires:	gptfdisk
 %if 0%{with tcmalloc}
 BuildRequires:	gperftools-devel
 %endif
-%else
-Requires:	scsirastools
-BuildRequires:	google-perftools-devel
-%endif
 BuildRequires:	mozilla-nss-devel
 BuildRequires:	keyutils-devel
 BuildRequires:	libatomic-ops-devel
-%else
+BuildRequires:  lsb-release
+%endif
+%if 0%{?fedora} || 0%{?rhel} 
 %if 0%{?_with_systemd}
 Requires:	systemd
 %endif
 BuildRequires:	nss-devel
 BuildRequires:	keyutils-libs-devel
 BuildRequires:	libatomic_ops-devel
-Requires:	gdisk
 Requires(post):	chkconfig
 Requires(preun):	chkconfig
 Requires(preun):	initscripts
 BuildRequires:	gperftools-devel
-Requires:	python-flask
+BuildRequires:  redhat-lsb-core
 %endif
 # boost
 %if 0%{?fedora} || 0%{?rhel} 
 BuildRequires:  boost-random
 %endif
 # python-argparse for distros with Python 2.6 or lower
-%if (0%{?rhel} && 0%{?rhel} <= 6) || (0%{?suse_version} && 0%{?suse_version} <= 1110)
+%if (0%{?rhel} && 0%{?rhel} <= 6)
 BuildRequires:	python-argparse
 %endif
 # lttng and babeltrace for rbd-replay-prep
@@ -238,6 +217,37 @@ on commodity hardware and delivers object, block and file system storage.
 #################################################################################
 # packages
 #################################################################################
+%package base
+Summary:       Ceph Base Package
+Group:         System Environment/Base
+Requires:      ceph-common = %{epoch}:%{version}-%{release}
+Requires:      librbd1 = %{epoch}:%{version}-%{release}
+Requires:      librados2 = %{epoch}:%{version}-%{release}
+Requires:      libcephfs1 = %{epoch}:%{version}-%{release}
+%if 0%{with selinux}
+Requires:      ceph-selinux = %{epoch}:%{version}-%{release}
+%endif
+Requires:      python
+Requires:      python-requests
+Requires:      python-setuptools
+Requires:      grep
+Requires:      xfsprogs
+Requires:      logrotate
+Requires:      parted
+Requires:      util-linux
+Requires:      hdparm
+Requires:      cryptsetup
+Requires:      findutils
+Requires:      which
+%if 0%{?suse_version}
+Requires:      lsb-release
+%endif
+%if 0%{?fedora} || 0%{?rhel}
+Requires:      redhat-lsb-core
+%endif
+%description base
+Base is the package that includes all the files shared amongst ceph servers
+
 %package -n ceph-common
 Summary:	Ceph Common
 Group:		System Environment/Base
@@ -254,11 +264,38 @@ Requires:	python-requests
 Requires(pre):	pwdutils
 %endif
 # python-argparse is only needed in distros with Python 2.6 or lower
-%if (0%{?rhel} && 0%{?rhel} <= 6) || (0%{?suse_version} && 0%{?suse_version} <= 1110)
+%if (0%{?rhel} && 0%{?rhel} <= 6)
 Requires:	python-argparse
 %endif
 %description -n ceph-common
 Common utilities to mount and interact with a ceph storage cluster.
+Comprised of files that are common to Ceph clients and servers.
+
+%package mds
+Summary:	Ceph Metadata Server Daemon
+Group:		System Environment/Base
+Requires:	ceph-base = %{epoch}:%{version}-%{release}
+%description mds
+ceph-mds is the metadata server daemon for the Ceph distributed file system.
+One or more instances of ceph-mds collectively manage the file system
+namespace, coordinating access to the shared OSD cluster.
+
+%package mon
+Summary:	Ceph Monitor Daemon
+Group:		System Environment/Base
+Requires:	ceph-base = %{epoch}:%{version}-%{release}
+# For ceph-rest-api
+%if 0%{?fedora} || 0%{?rhel}
+Requires:      python-flask
+%endif
+%if 0%{?suse_version}
+Requires:      python-Flask
+%endif
+%description mon
+ceph-mon is the cluster monitor daemon for the Ceph distributed file
+system. One or more instances of ceph-mon form a Paxos part-time
+parliament cluster that provides extremely reliable and durable storage
+of cluster membership, configuration, and state.
 
 %package fuse
 Summary:	Ceph fuse-based client
@@ -276,6 +313,16 @@ Requires:	librbd1 = %{epoch}:%{version}-%{release}
 %description -n rbd-fuse
 FUSE based client to map Ceph rbd images to files
 
+%package -n rbd-mirror
+Summary:	Ceph daemon for mirroring RBD images
+Group:		System Environment/Base
+Requires:	%{name}
+Requires:	ceph-common = %{epoch}:%{version}-%{release}
+Requires:	librados2 = %{epoch}:%{version}-%{release}
+%description -n rbd-mirror
+Daemon for mirroring RBD images between Ceph clusters, streaming
+changes asynchronously.
+
 %package -n rbd-nbd
 Summary:	Ceph RBD client base on NBD
 Group:		System Environment/Base
@@ -295,6 +342,12 @@ Requires:	ceph-selinux = %{epoch}:%{version}-%{release}
 Requires:	librados2 = %{epoch}:%{version}-%{release}
 %if 0%{?rhel} || 0%{?fedora}
 Requires:	mailcap
+# python-flask for powerdns
+Requires:	python-flask
+%endif
+%if 0%{?suse_version}
+# python-Flask for powerdns
+Requires:      python-Flask
 %endif
 %description radosgw
 This package is an S3 HTTP REST gateway for the RADOS object store. It
@@ -314,6 +367,22 @@ under Open Cluster Framework (OCF) compliant resource
 managers such as Pacemaker.
 %endif
 
+%package osd
+Summary:	Ceph Object Storage Daemon
+Group:		System Environment/Base
+Requires:	ceph-base = %{epoch}:%{version}-%{release}
+# for sgdisk, used by ceph-disk
+%if 0%{?fedora} || 0%{?rhel}
+Requires:	gdisk
+%endif
+%if 0%{?suse_version}
+Requires:	gptfdisk
+%endif
+%description osd
+ceph-osd is the object storage daemon for the Ceph distributed file
+system.  It is responsible for storing objects on a local file system
+and providing access to them over the network.
+
 %package -n librados2
 Summary:	RADOS distributed object store client library
 Group:		System Environment/Libraries
@@ -588,6 +657,7 @@ export RPM_OPT_FLAGS=`echo $RPM_OPT_FLAGS | sed -e 's/i386/i486/'`
 
 %{configure}	CPPFLAGS="$java_inc" \
 		--prefix=/usr \
+                --libexecdir=%{_libexecdir} \
 		--localstatedir=/var \
 		--sysconfdir=/etc \
 %if 0%{?_with_systemd}
@@ -606,16 +676,6 @@ export RPM_OPT_FLAGS=`echo $RPM_OPT_FLAGS | sed -e 's/i386/i486/'`
 		--with-selinux \
 %endif
 		--with-librocksdb-static=check \
-%if 0%{?rhel} || 0%{?fedora}
-		--with-systemd-libexec-dir=/usr/libexec/ceph \
-		--with-rgw-user=root \
-		--with-rgw-group=root \
-%endif
-%if 0%{?suse_version}
-		--with-systemd-libexec-dir=/usr/lib/ceph/ \
-		--with-rgw-user=wwwrun \
-		--with-rgw-group=www \
-%endif
 		--with-radosgw \
 		$CEPH_EXTRA_CONFIGURE_ARGS \
 		%{?_with_ocf} \
@@ -642,6 +702,20 @@ make %{?_smp_mflags} check-local
 
 %install
 make DESTDIR=$RPM_BUILD_ROOT install
+rm -f $RPM_BUILD_ROOT%{_libdir}/ceph/erasure-code/libec_example.so
+rm -f $RPM_BUILD_ROOT%{_libdir}/ceph/erasure-code/libec_fail_to_initialize.so
+rm -f $RPM_BUILD_ROOT%{_libdir}/ceph/erasure-code/libec_fail_to_register.so
+rm -f $RPM_BUILD_ROOT%{_libdir}/ceph/erasure-code/libec_hangs.so
+rm -f $RPM_BUILD_ROOT%{_libdir}/ceph/erasure-code/libec_missing_entry_point.so
+rm -f $RPM_BUILD_ROOT%{_libdir}/ceph/erasure-code/libec_missing_version.so
+rm -f $RPM_BUILD_ROOT%{_libdir}/ceph/erasure-code/libec_test_jerasure_generic.so
+rm -f $RPM_BUILD_ROOT%{_libdir}/ceph/erasure-code/libec_test_jerasure_neon.so
+rm -f $RPM_BUILD_ROOT%{_libdir}/ceph/erasure-code/libec_test_jerasure_sse3.so
+rm -f $RPM_BUILD_ROOT%{_libdir}/ceph/erasure-code/libec_test_jerasure_sse4.so
+rm -f $RPM_BUILD_ROOT%{_libdir}/ceph/erasure-code/libec_test_shec_generic.so
+rm -f $RPM_BUILD_ROOT%{_libdir}/ceph/erasure-code/libec_test_shec_neon.so
+rm -f $RPM_BUILD_ROOT%{_libdir}/ceph/erasure-code/libec_test_shec_sse3.so
+rm -f $RPM_BUILD_ROOT%{_libdir}/ceph/erasure-code/libec_test_shec_sse4.so
 find $RPM_BUILD_ROOT -type f -name "*.la" -exec rm -f {} ';'
 find $RPM_BUILD_ROOT -type f -name "*.a" -exec rm -f {} ';'
 install -D src/etc-rbdmap $RPM_BUILD_ROOT%{_sysconfdir}/ceph/rbdmap
@@ -718,120 +792,29 @@ mkdir -p $RPM_BUILD_ROOT%{_localstatedir}/lib/ceph/bootstrap-rgw
 %clean
 rm -rf $RPM_BUILD_ROOT
 
-%pre
-%if 0%{?_with_systemd}
-  %if 0%{?suse_version}
-    # service_add_pre and friends don't work with parameterized systemd service
-    # instances, only with single services or targets, so we always pass
-    # ceph.target to these macros
-    %service_add_pre ceph.target
-  %endif
-%endif
-
-
-%post
-/sbin/ldconfig
-%if 0%{?_with_systemd}
-  %if 0%{?suse_version}
-    %fillup_only
-    %service_add_post ceph.target
-  %endif
-%else
-  /sbin/chkconfig --add ceph
-%endif
-
-%preun
-%if 0%{?_with_systemd}
-  %if 0%{?suse_version}
-    %service_del_preun ceph.target
-  %endif
-  # Disable and stop on removal.
-  if [ $1 = 0 ] ; then
-    SERVICE_LIST=$(systemctl | grep -E '^ceph-mon@|^ceph-create-keys@|^ceph-osd@|^ceph-mds@|^ceph-disk-'  | cut -d' ' -f1)
-    if [ -n "$SERVICE_LIST" ]; then
-      for SERVICE in $SERVICE_LIST; do
-        /usr/bin/systemctl --no-reload disable $SERVICE > /dev/null 2>&1 || :
-        /usr/bin/systemctl stop $SERVICE > /dev/null 2>&1 || :
-      done
-    fi
-  fi
-%else
-  %if 0%{?rhel} || 0%{?fedora}
-    if [ $1 = 0 ] ; then
-      /sbin/service ceph stop >/dev/null 2>&1
-      /sbin/chkconfig --del ceph
-    fi
-  %endif
-%endif
-
-%postun
-/sbin/ldconfig
-%if 0%{?_with_systemd}
-  if [ $1 = 1 ] ; then
-    # Restart on upgrade, but only if "CEPH_AUTO_RESTART_ON_UPGRADE" is set to
-    # "yes". In any case: if units are not running, do not touch them.
-    SYSCONF_CEPH=/etc/sysconfig/ceph
-    if [ -f $SYSCONF_CEPH -a -r $SYSCONF_CEPH ] ; then
-      source $SYSCONF_CEPH
-    fi
-    if [ "X$CEPH_AUTO_RESTART_ON_UPGRADE" = "Xyes" ] ; then
-      SERVICE_LIST=$(systemctl | grep -E '^ceph-mon@|^ceph-create-keys@|^ceph-osd@|^ceph-mds@|^ceph-disk-'  | cut -d' ' -f1)
-      if [ -n "$SERVICE_LIST" ]; then
-        for SERVICE in $SERVICE_LIST; do
-          /usr/bin/systemctl try-restart $SERVICE > /dev/null 2>&1 || :
-        done
-      fi
-    fi
-  fi
-%endif
-
 #################################################################################
-# files
+# files and systemd scriptlets
 #################################################################################
 %files
+
+%files base
 %defattr(-,root,root,-)
 %docdir %{_docdir}
 %dir %{_docdir}/ceph
 %{_docdir}/ceph/sample.ceph.conf
 %{_docdir}/ceph/sample.fetch_config
-%{_bindir}/cephfs
-%{_bindir}/ceph-clsinfo
-%{_bindir}/ceph-rest-api
-%{python_sitelib}/ceph_rest_api.py*
 %{_bindir}/crushtool
 %{_bindir}/monmaptool
 %{_bindir}/osdmaptool
 %{_bindir}/ceph-run
-%{_bindir}/ceph-mon
-%{_bindir}/ceph-mds
-%{_bindir}/ceph-objectstore-tool
-%{_bindir}/ceph-bluefs-tool
-%{_bindir}/ceph-osd
 %{_bindir}/ceph-detect-init
-%{_bindir}/librados-config
 %{_bindir}/ceph-client-debug
-%{_bindir}/cephfs-journal-tool
-%{_bindir}/cephfs-table-tool
-%{_bindir}/cephfs-data-scan
-%{_bindir}/ceph-debugpack
-%{_bindir}/ceph-coverage
+%{_bindir}/cephfs
 %if 0%{?_with_systemd}
-%{_unitdir}/ceph-mds at .service
-%{_unitdir}/ceph-mon at .service
 %{_unitdir}/ceph-create-keys at .service
-%{_unitdir}/ceph-osd at .service
-%{_unitdir}/ceph-radosgw at .service
-%{_unitdir}/ceph-disk at .service
-%{_unitdir}/ceph.target
-%{_unitdir}/ceph-osd.target
-%{_unitdir}/ceph-mon.target
-%{_unitdir}/ceph-mds.target
-%{_unitdir}/ceph-radosgw.target
 %else
 %{_initrddir}/ceph
 %endif
-%{_sbindir}/ceph-disk
-%{_sbindir}/ceph-disk-udev
 %{_sbindir}/ceph-create-keys
 %{_sbindir}/rcceph
 %if 0%{?rhel} >= 7 || 0%{?fedora} || 0%{?suse_version}
@@ -839,25 +822,11 @@ rm -rf $RPM_BUILD_ROOT
 %else
 /sbin/mount.ceph
 %endif
-%dir %{_libdir}/ceph
-%{_libdir}/ceph/ceph_common.sh
-%{_libexecdir}/ceph/ceph-osd-prestart.sh
+%dir %{_libexecdir}/ceph
+%{_libexecdir}/ceph/ceph_common.sh
 %dir %{_libdir}/rados-classes
-%{_libdir}/rados-classes/libcls_cephfs.so*
-%{_libdir}/rados-classes/libcls_rbd.so*
-%{_libdir}/rados-classes/libcls_hello.so*
-%{_libdir}/rados-classes/libcls_numops.so*
-%{_libdir}/rados-classes/libcls_rgw.so*
-%{_libdir}/rados-classes/libcls_lock.so*
-%{_libdir}/rados-classes/libcls_kvs.so*
-%{_libdir}/rados-classes/libcls_refcount.so*
-%{_libdir}/rados-classes/libcls_log.so*
-%{_libdir}/rados-classes/libcls_replica_log.so*
-%{_libdir}/rados-classes/libcls_statelog.so*
-%{_libdir}/rados-classes/libcls_timeindex.so*
-%{_libdir}/rados-classes/libcls_user.so*
-%{_libdir}/rados-classes/libcls_version.so*
-%{_libdir}/rados-classes/libcls_journal.so*
+%{_libdir}/rados-classes/*
+%dir %{_libdir}/ceph
 %dir %{_libdir}/ceph/erasure-code
 %{_libdir}/ceph/erasure-code/libec_*.so*
 %dir %{_libdir}/ceph/compressor
@@ -866,8 +835,6 @@ rm -rf $RPM_BUILD_ROOT
 %{_libdir}/libos_tp.so*
 %{_libdir}/libosd_tp.so*
 %endif
-%{_udevrulesdir}/60-ceph-partuuid-workaround.rules
-%{_udevrulesdir}/95-ceph-osd.rules
 %config %{_sysconfdir}/bash_completion.d/ceph
 %config(noreplace) %{_sysconfdir}/logrotate.d/ceph
 %if 0%{?fedora} || 0%{?rhel}
@@ -878,29 +845,20 @@ rm -rf $RPM_BUILD_ROOT
 %config %{_sysconfdir}/sysconfig/SuSEfirewall2.d/services/ceph-mon
 %config %{_sysconfdir}/sysconfig/SuSEfirewall2.d/services/ceph-osd-mds
 %endif
+%{_unitdir}/ceph.target
 %{python_sitelib}/ceph_detect_init*
+%{python_sitelib}/ceph_disk*
 %{_mandir}/man8/ceph-deploy.8*
 %{_mandir}/man8/ceph-detect-init.8*
-%{_mandir}/man8/ceph-disk.8*
 %{_mandir}/man8/ceph-create-keys.8*
-%{_mandir}/man8/ceph-mon.8*
-%{_mandir}/man8/ceph-mds.8*
-%{_mandir}/man8/ceph-osd.8*
 %{_mandir}/man8/ceph-run.8*
-%{_mandir}/man8/ceph-rest-api.8*
 %{_mandir}/man8/crushtool.8*
 %{_mandir}/man8/osdmaptool.8*
 %{_mandir}/man8/monmaptool.8*
 %{_mandir}/man8/cephfs.8*
 %{_mandir}/man8/mount.ceph.8*
-%{_mandir}/man8/ceph-debugpack.8*
-%{_mandir}/man8/ceph-clsinfo.8*
-%{_mandir}/man8/librados-config.8*
 #set up placeholder directories
 %attr(750,ceph,ceph) %dir %{_localstatedir}/lib/ceph/tmp
-%attr(750,ceph,ceph) %dir %{_localstatedir}/lib/ceph/mon
-%attr(750,ceph,ceph) %dir %{_localstatedir}/lib/ceph/osd
-%attr(750,ceph,ceph) %dir %{_localstatedir}/lib/ceph/mds
 %attr(750,ceph,ceph) %dir %{_localstatedir}/lib/ceph/bootstrap-osd
 %attr(750,ceph,ceph) %dir %{_localstatedir}/lib/ceph/bootstrap-mds
 %attr(750,ceph,ceph) %dir %{_localstatedir}/lib/ceph/bootstrap-rgw
@@ -908,8 +866,74 @@ rm -rf $RPM_BUILD_ROOT
 %attr(770,ceph,ceph) %dir %{_localstatedir}/run/ceph
 %endif
 
+%pre base
+%if 0%{?_with_systemd}
+  %if 0%{?suse_version}
+    # service_add_pre and friends don't work with parameterized systemd service
+    # instances, only with single services or targets, so we always pass
+    # ceph.target to these macros
+    %service_add_pre ceph.target
+  %endif
+%endif
+
+%post base
+/sbin/ldconfig
+%if 0%{?_with_systemd}
+  %if 0%{?suse_version}
+    %fillup_only
+    %service_add_post ceph.target
+  %endif
+%else
+  /sbin/chkconfig --add ceph
+%endif
+
+%preun base
+%if 0%{?_with_systemd}
+  %if 0%{?suse_version}
+    %service_del_preun ceph.target
+  %endif
+  # Disable and stop on removal.
+  if [ $1 = 0 ] ; then
+    SERVICE_LIST=$(systemctl | grep -E '^ceph-mon@|^ceph-create-keys@|^ceph-osd@|^ceph-mds@|^ceph-disk-'  | cut -d' ' -f1)
+    if [ -n "$SERVICE_LIST" ]; then
+      for SERVICE in $SERVICE_LIST; do
+        /usr/bin/systemctl --no-reload disable $SERVICE > /dev/null 2>&1 || :
+        /usr/bin/systemctl stop $SERVICE > /dev/null 2>&1 || :
+      done
+    fi
+  fi
+%else
+  %if 0%{?rhel} || 0%{?fedora}
+    if [ $1 = 0 ] ; then
+      /sbin/service ceph stop >/dev/null 2>&1
+      /sbin/chkconfig --del ceph
+    fi
+  %endif
+%endif
+
+%postun base
+/sbin/ldconfig
+%if 0%{?_with_systemd}
+  if [ $1 = 1 ] ; then
+    # Restart on upgrade, but only if "CEPH_AUTO_RESTART_ON_UPGRADE" is set to
+    # "yes". In any case: if units are not running, do not touch them.
+    SYSCONF_CEPH=/etc/sysconfig/ceph
+    if [ -f $SYSCONF_CEPH -a -r $SYSCONF_CEPH ] ; then
+      source $SYSCONF_CEPH
+    fi
+    if [ "X$CEPH_AUTO_RESTART_ON_UPGRADE" = "Xyes" ] ; then
+      SERVICE_LIST=$(systemctl | grep -E '^ceph-mon@|^ceph-create-keys@|^ceph-osd@|^ceph-mds@|^ceph-disk-'  | cut -d' ' -f1)
+      if [ -n "$SERVICE_LIST" ]; then
+        for SERVICE in $SERVICE_LIST; do
+          /usr/bin/systemctl try-restart $SERVICE > /dev/null 2>&1 || :
+        done
+      fi
+    fi
+  fi
+%endif
+
 #################################################################################
-%files -n ceph-common
+%files common
 %defattr(-,root,root,-)
 %{_bindir}/ceph
 %{_bindir}/ceph-authtool
@@ -943,12 +967,11 @@ rm -rf $RPM_BUILD_ROOT
 %{_mandir}/man8/rbd-replay.8*
 %{_mandir}/man8/rbd-replay-many.8*
 %{_mandir}/man8/rbd-replay-prep.8*
+%dir %{_datadir}/ceph/
 %{_datadir}/ceph/known_hosts_drop.ceph.com
 %{_datadir}/ceph/id_dsa_drop.ceph.com
 %{_datadir}/ceph/id_dsa_drop.ceph.com.pub
 %dir %{_sysconfdir}/ceph/
-%dir %{_datarootdir}/ceph/
-%dir %{_libexecdir}/ceph/
 %config %{_sysconfdir}/bash_completion.d/rados
 %config %{_sysconfdir}/bash_completion.d/rbd
 %config(noreplace) %{_sysconfdir}/ceph/rbdmap
@@ -963,7 +986,7 @@ rm -rf $RPM_BUILD_ROOT
 %attr(3770,ceph,ceph) %dir %{_localstatedir}/log/ceph/
 %attr(750,ceph,ceph) %dir %{_localstatedir}/lib/ceph/
 
-%pre -n ceph-common
+%pre common
 CEPH_GROUP_ID=""
 CEPH_USER_ID=""
 %if 0%{?rhel} || 0%{?fedora}
@@ -980,12 +1003,12 @@ getent passwd ceph >/dev/null || useradd -r -g ceph -d %{_localstatedir}/lib/cep
 %endif
 exit 0
 
-%post -n ceph-common
+%post common
 %if 0%{?_with_systemd}
 %tmpfiles_create %{_tmpfilesdir}/ceph-common.conf
 %endif
 
-%postun -n ceph-common
+%postun common
 # Package removal cleanup
 if [ "$1" -eq "0" ] ; then
     rm -rf /var/log/ceph
@@ -993,6 +1016,36 @@ if [ "$1" -eq "0" ] ; then
 fi
 
 #################################################################################
+%files mds
+%{_bindir}/ceph-mds
+%{_bindir}/cephfs-journal-tool
+%{_bindir}/cephfs-table-tool
+%{_bindir}/cephfs-data-scan
+%{_mandir}/man8/ceph-mds.8*
+%if 0%{?_with_systemd}
+%{_unitdir}/ceph-mds at .service
+%{_unitdir}/ceph-mds.target
+%else
+%{_initrddir}/ceph
+%endif
+%attr(750,ceph,ceph) %dir %{_localstatedir}/lib/ceph/mds
+
+#################################################################################
+%files mon
+%{_bindir}/ceph-mon
+%{_bindir}/ceph-rest-api
+%{_mandir}/man8/ceph-mon.8*
+%{_mandir}/man8/ceph-rest-api.8*
+%{python_sitelib}/ceph_rest_api.py*
+%if 0%{?_with_systemd}
+%{_unitdir}/ceph-mon at .service
+%{_unitdir}/ceph-mon.target
+%else
+%{_initrddir}/ceph
+%endif
+%attr(750,ceph,ceph) %dir %{_localstatedir}/lib/ceph/mon
+
+#################################################################################
 %files fuse
 %defattr(-,root,root,-)
 %{_bindir}/ceph-fuse
@@ -1010,6 +1063,12 @@ fi
 %{_mandir}/man8/rbd-fuse.8*
 
 #################################################################################
+%files -n rbd-mirror
+%defattr(-,root,root,-)
+%{_bindir}/rbd-mirror
+%{_mandir}/man8/rbd-mirror.8*
+
+#################################################################################
 %files -n rbd-nbd
 %defattr(-,root,root,-)
 %{_bindir}/rbd-nbd
@@ -1026,6 +1085,8 @@ fi
 %config %{_sysconfdir}/bash_completion.d/radosgw-admin
 %dir %{_localstatedir}/lib/ceph/radosgw
 %if 0%{?_with_systemd}
+%{_unitdir}/ceph-radosgw at .service
+%{_unitdir}/ceph-radosgw.target
 %else
 %{_initrddir}/ceph-radosgw
 %{_sbindir}/rcceph-radosgw
@@ -1076,6 +1137,29 @@ fi
 %endif
 
 #################################################################################
+%files osd
+%{_bindir}/ceph-clsinfo
+%{_bindir}/ceph-bluefs-tool
+%{_bindir}/ceph-objectstore-tool
+%{_bindir}/ceph-osd
+%{_sbindir}/ceph-disk
+%{_sbindir}/ceph-disk-udev
+%{_libexecdir}/ceph/ceph-osd-prestart.sh
+%{_udevrulesdir}/60-ceph-partuuid-workaround.rules
+%{_udevrulesdir}/95-ceph-osd.rules
+%{_mandir}/man8/ceph-clsinfo.8*
+%{_mandir}/man8/ceph-disk.8*
+%{_mandir}/man8/ceph-osd.8*
+%if 0%{?_with_systemd}
+%{_unitdir}/ceph-osd at .service
+%{_unitdir}/ceph-osd.target
+%{_unitdir}/ceph-disk at .service
+%else
+%{_initrddir}/ceph
+%endif
+%attr(750,ceph,ceph) %dir %{_localstatedir}/lib/ceph/osd
+
+#################################################################################
 %if %{with ocf}
 %files resource-agents
 %defattr(0755,root,root,-)
@@ -1116,6 +1200,8 @@ fi
 %if 0%{?_with_lttng}
 %{_libdir}/librados_tp.so
 %endif
+%{_bindir}/librados-config
+%{_mandir}/man8/librados-config.8*
 
 #################################################################################
 %files -n python-rados
@@ -1226,9 +1312,12 @@ ln -sf %{_libdir}/librbd.so.1 /usr/lib64/qemu/librbd.so.1
 %{_bindir}/ceph_test_*
 %{_bindir}/ceph_tpbench
 %{_bindir}/ceph_xattr_bench
+%{_bindir}/ceph-coverage
 %{_bindir}/ceph-monstore-tool
 %{_bindir}/ceph-osdomap-tool
 %{_bindir}/ceph-kvstore-tool
+%{_bindir}/ceph-debugpack
+%{_mandir}/man8/ceph-debugpack.8*
 %dir %{_libdir}/ceph
 %{_libdir}/ceph/ceph-monstore-update-crush.sh
 
@@ -1377,4 +1466,5 @@ exit 0
 # We need an empty %%files list for python-ceph-compat, to tell rpmbuild to
 # actually build this meta package.
 
+
 %changelog
diff --git a/src/test/ceph-disk.sh b/src/test/ceph-disk.sh
deleted file mode 100755
index 6b63527..0000000
--- a/src/test/ceph-disk.sh
+++ /dev/null
@@ -1,363 +0,0 @@
-#!/bin/bash
-#
-# Copyright (C) 2014 Cloudwatt <libre.licensing at cloudwatt.com>
-# Copyright (C) 2014, 2015 Red Hat <contact at redhat.com>
-#
-# Author: Loic Dachary <loic at dachary.org>
-#
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU Library Public License as published by
-# the Free Software Foundation; either version 2, or (at your option)
-# any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-# GNU Library Public License for more details.
-#
-source test/test_btrfs_common.sh
-
-PS4='${BASH_SOURCE[0]}:$LINENO: ${FUNCNAME[0]}:  '
-
-export PATH=.:$PATH # make sure program from sources are prefered
-DIR=test-ceph-disk
-if virtualenv virtualenv-$DIR && test -d ceph-detect-init ; then
-    . virtualenv-$DIR/bin/activate
-    (
-	# older versions of pip will not install wrap_console scripts
-	# when using wheel packages
-	pip install --upgrade 'pip >= 6.1'
-	if test -d ceph-detect-init/wheelhouse ; then
-            wheelhouse="--no-index --use-wheel --find-links=ceph-detect-init/wheelhouse"
-	fi
-	pip --log virtualenv-$DIR/log.txt install $wheelhouse --editable ceph-detect-init
-    )
-fi
-OSD_DATA=$DIR/osd
-MON_ID=a
-MONA=127.0.0.1:7451
-TEST_POOL=rbd
-FSID=$(uuidgen)
-export CEPH_CONF=$DIR/ceph.conf
-export CEPH_ARGS="--fsid $FSID"
-CEPH_ARGS+=" --chdir="
-CEPH_ARGS+=" --journal-dio=false"
-CEPH_ARGS+=" --run-dir=$DIR"
-CEPH_ARGS+=" --osd-failsafe-full-ratio=.99"
-CEPH_ARGS+=" --mon-host=$MONA"
-CEPH_ARGS+=" --log-file=$DIR/\$name.log"
-CEPH_ARGS+=" --pid-file=$DIR/\$name.pidfile"
-if test -d .libs ; then
-    CEPH_ARGS+=" --erasure-code-dir=.libs"
-    CEPH_ARGS+=" --compression-dir=.libs"
-fi
-CEPH_ARGS+=" --auth-supported=none"
-CEPH_ARGS+=" --osd-journal-size=100"
-CEPH_DISK_ARGS=
-CEPH_DISK_ARGS+=" --statedir=$DIR"
-CEPH_DISK_ARGS+=" --sysconfdir=$DIR"
-CEPH_DISK_ARGS+=" --prepend-to-path="
-CEPH_DISK_ARGS+=" --verbose"
-TIMEOUT=360
-
-cat=$(which cat)
-timeout=$(which timeout)
-diff=$(which diff)
-mkdir=$(which mkdir)
-rm=$(which rm)
-uuidgen=$(which uuidgen)
-
-function setup() {
-    teardown
-    mkdir $DIR
-    mkdir $OSD_DATA
-    touch $DIR/ceph.conf # so ceph-disk think ceph is the cluster
-}
-
-function teardown() {
-    kill_daemons
-    if [ $(stat -f -c '%T' .) == "btrfs" ]; then
-        rm -fr $DIR/*/*db
-        teardown_btrfs $DIR
-    fi
-    grep " $(pwd)/$DIR/" < /proc/mounts | while read mounted rest ; do
-        umount $mounted
-    done
-    rm -fr $DIR
-}
-
-function run_mon() {
-    local mon_dir=$DIR/$MON_ID
-
-    ceph-mon \
-        --id $MON_ID \
-        --mkfs \
-        --mon-data=$mon_dir \
-        --mon-initial-members=$MON_ID \
-        "$@"
-
-    ceph-mon \
-        --id $MON_ID \
-        --mon-data=$mon_dir \
-        --mon-osd-full-ratio=.99 \
-        --mon-data-avail-crit=1 \
-        --mon-cluster-log-file=$mon_dir/log \
-        --public-addr $MONA \
-        "$@"
-}
-
-function kill_daemons() {
-    if ! test -e $DIR ; then
-        return
-    fi
-    for pidfile in $(find $DIR | grep pidfile) ; do
-        pid=$(cat $pidfile)
-        for try in 0 1 1 1 2 3 ; do
-            kill $pid 2>/dev/null || break
-            sleep $try
-        done
-    done
-}
-
-function command_fixture() {
-    local command=$1
-
-    [ $(which $command) = ./$command ] || [ $(which $command) = `readlink -f $(pwd)/$command` ] || return 1
-
-    cat > $DIR/$command <<EOF
-#!/bin/bash
-touch $DIR/used-$command
-exec ./$command "\$@"
-EOF
-    chmod +x $DIR/$command
-}
-
-function tweak_path() {
-    local tweaker=$1
-
-    setup
-
-    command_fixture ceph-conf || return 1
-    command_fixture ceph-osd || return 1
-
-    test_activate_dir
-
-    [ ! -f $DIR/used-ceph-conf ] || return 1
-    [ ! -f $DIR/used-ceph-osd ] || return 1
-
-    teardown
-
-    setup
-
-    command_fixture ceph-conf || return 1
-    command_fixture ceph-osd || return 1
-
-    $tweaker test_activate_dir || return 1
-
-    [ -f $DIR/used-ceph-conf ] || return 1
-    [ -f $DIR/used-ceph-osd ] || return 1
-
-    teardown
-}
-
-function use_prepend_to_path() {
-    local ceph_disk_args
-    ceph_disk_args+=" --statedir=$DIR"
-    ceph_disk_args+=" --sysconfdir=$DIR"
-    ceph_disk_args+=" --prepend-to-path=$DIR"
-    ceph_disk_args+=" --verbose"
-    CEPH_DISK_ARGS="$ceph_disk_args" \
-        "$@" || return 1
-}
-
-function test_prepend_to_path() {
-    tweak_path use_prepend_to_path || return 1
-}
-
-function use_path() {
-    PATH="$DIR:$PATH" \
-        "$@" || return 1
-}
-
-function test_path() {
-    tweak_path use_path || return 1
-}
-
-function test_no_path() {
-    ( unset PATH ; test_activate_dir ) || return 1
-}
-
-function test_mark_init() {
-    run_mon
-
-    local osd_data=$(pwd)/$DIR/dir
-    $mkdir -p $osd_data
-
-    local osd_uuid=$($uuidgen)
-
-    $mkdir -p $OSD_DATA
-
-    ceph-disk $CEPH_DISK_ARGS \
-        prepare --osd-uuid $osd_uuid $osd_data || return 1
-
-    $timeout $TIMEOUT ceph-disk $CEPH_DISK_ARGS \
-        --verbose \
-        activate \
-        --mark-init=auto \
-        --no-start-daemon \
-        $osd_data || return 1
-
-    test -f $osd_data/$(ceph-detect-init) || return 1
-
-    if test systemd = $(ceph-detect-init) ; then
-        expected=sysvinit
-    else
-        expected=systemd
-    fi
-    $timeout $TIMEOUT ceph-disk $CEPH_DISK_ARGS \
-        --verbose \
-        activate \
-        --mark-init=$expected \
-        --no-start-daemon \
-        $osd_data || return 1
-
-    ! test -f $osd_data/$(ceph-detect-init) || return 1
-    test -f $osd_data/$expected || return 1
-
-    $rm -fr $osd_data
-}
-
-function test_zap() {
-    local osd_data=$DIR/dir
-    $mkdir -p $osd_data
-
-    ceph-disk $CEPH_DISK_ARGS zap $osd_data 2>&1 | grep -q 'not full block device' || return 1
-
-    $rm -fr $osd_data
-}
-
-# ceph-disk prepare returns immediately on success if the magic file
-# exists in the --osd-data directory.
-function test_activate_dir_magic() {
-    local uuid=$($uuidgen)
-    local osd_data=$DIR/osd
-
-    echo a failure to create the fsid file implies the magic file is not created
-
-    mkdir -p $osd_data/fsid
-    CEPH_ARGS="--fsid $uuid" \
-     ceph-disk $CEPH_DISK_ARGS prepare $osd_data > $DIR/out 2>&1
-    grep --quiet 'Is a directory' $DIR/out || return 1
-    ! [ -f $osd_data/magic ] || return 1
-    rmdir $osd_data/fsid
-
-    echo successfully prepare the OSD
-
-    CEPH_ARGS="--fsid $uuid" \
-     ceph-disk $CEPH_DISK_ARGS prepare $osd_data 2>&1 | tee $DIR/out
-    grep --quiet 'Preparing osd data dir' $DIR/out || return 1
-    grep --quiet $uuid $osd_data/ceph_fsid || return 1
-    [ -f $osd_data/magic ] || return 1
-
-    echo will not override an existing OSD
-
-    CEPH_ARGS="--fsid $($uuidgen)" \
-     ceph-disk $CEPH_DISK_ARGS prepare $osd_data 2>&1 | tee $DIR/out
-    grep --quiet 'ceph-disk:Data dir .* already exists' $DIR/out || return 1
-    grep --quiet $uuid $osd_data/ceph_fsid || return 1
-}
-
-function test_pool_read_write() {
-    local osd_uuid=$1
-
-    $timeout $TIMEOUT ceph osd pool set $TEST_POOL size 1 || return 1
-
-    local id=$(ceph osd create $osd_uuid)
-    local weight=1
-    ceph osd crush add osd.$id $weight root=default host=localhost || return 1
-    echo FOO > $DIR/BAR
-    $timeout $TIMEOUT rados --pool $TEST_POOL put BAR $DIR/BAR || return 1
-    $timeout $TIMEOUT rados --pool $TEST_POOL get BAR $DIR/BAR.copy || return 1
-    $diff $DIR/BAR $DIR/BAR.copy || return 1
-}
-
-function test_activate() {
-    local to_prepare=$1
-    local to_activate=$2
-    local journal=$3
-    local osd_uuid=$($uuidgen)
-
-    $mkdir -p $OSD_DATA
-
-    ceph-disk $CEPH_DISK_ARGS \
-        prepare --osd-uuid $osd_uuid $to_prepare $journal || return 1
-
-    $timeout $TIMEOUT ceph-disk $CEPH_DISK_ARGS \
-        activate \
-        --mark-init=none \
-        $to_activate || return 1
-
-    test_pool_read_write $osd_uuid || return 1
-}
-
-function test_activate_dir() {
-    run_mon
-
-    local osd_data=$DIR/dir
-    $mkdir -p $osd_data
-    test_activate $osd_data $osd_data || return 1
-    $rm -fr $osd_data
-}
-
-function test_find_cluster_by_uuid() {
-    setup
-    test_activate_dir 2>&1 | tee $DIR/test_find
-    ! grep "No cluster conf found in $DIR" $DIR/test_find || return 1
-    teardown
-
-    setup
-    rm $DIR/ceph.conf
-    test_activate_dir > $DIR/test_find 2>&1 
-    grep --quiet "No cluster conf found in $DIR" $DIR/test_find || return 1
-    teardown
-}
-
-# http://tracker.ceph.com/issues/9653
-function test_keyring_path() {
-    test_activate_dir 2>&1 | tee $DIR/test_keyring
-    grep --quiet "keyring $DIR/bootstrap-osd/ceph.keyring" $DIR/test_keyring || return 1
-}
-
-function run() {
-    local default_actions
-    default_actions+="test_path "
-    default_actions+="test_no_path "
-    default_actions+="test_find_cluster_by_uuid "
-    default_actions+="test_prepend_to_path "
-    default_actions+="test_activate_dir_magic "
-    default_actions+="test_activate_dir "
-    default_actions+="test_keyring_path "
-    default_actions+="test_mark_init "
-    default_actions+="test_zap "
-    local actions=${@:-$default_actions}
-    local status
-    for action in $actions  ; do
-        setup
-        set -x
-        $action
-        status=$?
-        set +x
-        teardown
-        if test $status != 0 ; then
-            break
-        fi
-    done
-    rm -fr virtualenv-$DIR
-    return $status
-}
-
-run $@
-
-# Local Variables:
-# compile-command: "cd .. ; test/ceph-disk.sh # test_activate_dir"
-# End:
diff --git a/src/test/ceph_argparse.cc b/src/test/ceph_argparse.cc
index f846db7..c4f99c8 100644
--- a/src/test/ceph_argparse.cc
+++ b/src/test/ceph_argparse.cc
@@ -27,7 +27,7 @@
 class VectorContainer
 {
 public:
-  VectorContainer(const char** arr_) {
+  explicit VectorContainer(const char** arr_) {
     for (const char **a = arr_; *a; ++a) {
       const char *str = (const char*)strdup(*a);
       arr.push_back(str);
diff --git a/src/test/ceph_objectstore_tool.py b/src/test/ceph_objectstore_tool.py
index 060e144..651326e 100755
--- a/src/test/ceph_objectstore_tool.py
+++ b/src/test/ceph_objectstore_tool.py
@@ -417,7 +417,7 @@ def set_osd_weight(CFSD_PREFIX, osd_ids, osd_path, weight):
                                                                         osdmap_file=osdmap_file.name)
     output = check_output(cmd, shell=True)
     epoch = int(re.findall('#(\d+)', output)[0])
-    
+
     new_crush_file = tempfile.NamedTemporaryFile(delete=False)
     old_crush_file = tempfile.NamedTemporaryFile(delete=False)
     ret = call("./osdmaptool --export-crush {crush_file} {osdmap_file}".format(osdmap_file=osdmap_file.name,
@@ -592,7 +592,7 @@ def main(argv):
     pid = os.getpid()
     TESTDIR = "/tmp/test.{pid}".format(pid=pid)
     DATADIR = "/tmp/data.{pid}".format(pid=pid)
-    CFSD_PREFIX = "./ceph-objectstore-tool --data-path " + OSDDIR + "/{osd} --journal-path " + OSDDIR + "/{osd}.journal "
+    CFSD_PREFIX = "./ceph-objectstore-tool --data-path " + OSDDIR + "/{osd} "
     PROFNAME = "testecprofile"
 
     os.environ['CEPH_CONF'] = CEPH_CONF
@@ -872,7 +872,7 @@ def main(argv):
     cmd = (CFSD_PREFIX + "--op import --file {FOO}").format(osd=ONEOSD, FOO=OTHERFILE)
     ERRORS += test_failure(cmd, "file: {FOO}: No such file or directory".format(FOO=OTHERFILE))
 
-    cmd = "./ceph-objectstore-tool --data-path BAD_DATA_PATH --journal-path " + OSDDIR + "/{osd}.journal --op list".format(osd=ONEOSD)
+    cmd = "./ceph-objectstore-tool --data-path BAD_DATA_PATH --op list".format(osd=ONEOSD)
     ERRORS += test_failure(cmd, "data-path: BAD_DATA_PATH: No such file or directory")
 
     cmd = "./ceph-objectstore-tool --journal-path BAD_JOURNAL_PATH --op dump-journal"
@@ -889,10 +889,10 @@ def main(argv):
     # Specify a bad --type
     os.mkdir(OSDDIR + "/fakeosd")
     cmd = ("./ceph-objectstore-tool --data-path " + OSDDIR + "/{osd} --type foobar --op list --pgid {pg}").format(osd="fakeosd", pg=ONEPG)
-    ERRORS += test_failure(cmd, "Need a valid --type e.g. filestore, memstore, keyvaluestore")
+    ERRORS += test_failure(cmd, "Unable to create store of type foobar")
 
     # Don't specify a data-path
-    cmd = "./ceph-objectstore-tool --journal-path {dir}/{osd}.journal --type memstore --op list --pgid {pg}".format(dir=OSDDIR, osd=ONEOSD, pg=ONEPG)
+    cmd = "./ceph-objectstore-tool --type memstore --op list --pgid {pg}".format(dir=OSDDIR, osd=ONEOSD, pg=ONEPG)
     ERRORS += test_failure(cmd, "Must provide --data-path")
 
     cmd = (CFSD_PREFIX + "--op remove").format(osd=ONEOSD)
@@ -904,7 +904,7 @@ def main(argv):
 
     # Specify a bad --op command
     cmd = (CFSD_PREFIX + "--op oops").format(osd=ONEOSD)
-    ERRORS += test_failure(cmd, "Must provide --op (info, log, remove, fsck, export, import, list, fix-lost, list-pgs, rm-past-intervals, dump-journal, dump-super, meta-list, get-osdmap, set-osdmap, get-inc-osdmap, set-inc-osdmap, mark-complete)")
+    ERRORS += test_failure(cmd, "Must provide --op (info, log, remove, mkfs, fsck, export, import, list, fix-lost, list-pgs, rm-past-intervals, dump-journal, dump-super, meta-list, get-osdmap, set-osdmap, get-inc-osdmap, set-inc-osdmap, mark-complete)")
 
     # Provide just the object param not a command
     cmd = (CFSD_PREFIX + "object").format(osd=ONEOSD)
diff --git a/src/test/cli/radosgw-admin/help.t b/src/test/cli/radosgw-admin/help.t
index c233f61..260de6d 100644
--- a/src/test/cli/radosgw-admin/help.t
+++ b/src/test/cli/radosgw-admin/help.t
@@ -73,6 +73,8 @@
     replicalog get             get replica metadata log entry
     replicalog update          update replica metadata log entry
     replicalog delete          delete replica metadata log entry
+    orphans find               init and run search for leaked rados objects
+    orphans finish             clean up search for leaked rados objects
   options:
      --uid=<id>                user id
      --subuser=<name>          subuser name
@@ -137,6 +139,10 @@
      --max-size                specify max size (in bytes, negative value to disable)
      --quota-scope             scope of quota (bucket, user)
   
+  Orphans search options:
+     --pool                    data pool to scan for leaked rados objects in
+     --num-shards              num of shards to use for keeping the temporary scan info
+  
     --conf/-c FILE    read configuration from the given configuration file
     --id/-i ID        set ID portion of my name
     --name/-n TYPE.ID set name
diff --git a/src/test/cli/rbd/help.t b/src/test/cli/rbd/help.t
index 1654ff4..6832012 100644
--- a/src/test/cli/rbd/help.t
+++ b/src/test/cli/rbd/help.t
@@ -1019,6 +1019,7 @@
   rbd help snap rename
   usage: rbd snap rename [--pool <pool>] [--image <image>] [--snap <snap>] 
                          [--dest-pool <dest-pool>] [--dest <dest>] 
+                         [--dest-snap <dest-snap>] 
                          <source-snap-spec> <dest-snap-spec> 
   
   Rename a snapshot.
@@ -1035,6 +1036,7 @@
     --snap arg           source snapshot name
     --dest-pool arg      destination pool name
     --dest arg           destination image name
+    --dest-snap arg      destination snapshot name
   
   rbd help snap rollback
   usage: rbd snap rollback [--pool <pool>] [--image <image>] [--snap <snap>] 
diff --git a/src/test/cls_journal/test_cls_journal.cc b/src/test/cls_journal/test_cls_journal.cc
index b6405ff..3a26e8f 100644
--- a/src/test/cls_journal/test_cls_journal.cc
+++ b/src/test/cls_journal/test_cls_journal.cc
@@ -192,18 +192,36 @@ TEST_F(TestClsJournal, CreateDuplicate) {
   ASSERT_EQ(-EEXIST, client::create(ioctx, oid, 3, 5, ioctx.get_id()));
 }
 
+TEST_F(TestClsJournal, GetClient) {
+  librados::IoCtx ioctx;
+  ASSERT_EQ(0, _rados.ioctx_create(_pool_name.c_str(), ioctx));
+
+  std::string oid = get_temp_image_name();
+
+  Client client;
+  ASSERT_EQ(-ENOENT, client::get_client(ioctx, oid, "id", &client));
+
+  bufferlist data;
+  data.append(std::string('1', 128));
+  ASSERT_EQ(0, client::client_register(ioctx, oid, "id1", data));
+
+  ASSERT_EQ(0, client::get_client(ioctx, oid, "id1", &client));
+  Client expected_client("id1", data);
+  ASSERT_EQ(expected_client, client);
+}
+
 TEST_F(TestClsJournal, ClientRegister) {
   librados::IoCtx ioctx;
   ASSERT_EQ(0, _rados.ioctx_create(_pool_name.c_str(), ioctx));
 
   std::string oid = get_temp_image_name();
 
-  ASSERT_EQ(0, client::client_register(ioctx, oid, "id1", "desc1"));
+  ASSERT_EQ(0, client::client_register(ioctx, oid, "id1", bufferlist()));
 
   std::set<Client> clients;
   ASSERT_EQ(0, client::client_list(ioctx, oid, &clients));
 
-  std::set<Client> expected_clients = {Client("id1", "desc1")};
+  std::set<Client> expected_clients = {Client("id1", bufferlist())};
   ASSERT_EQ(expected_clients, clients);
 }
 
@@ -213,8 +231,28 @@ TEST_F(TestClsJournal, ClientRegisterDuplicate) {
 
   std::string oid = get_temp_image_name();
 
-  ASSERT_EQ(0, client::client_register(ioctx, oid, "id1", "desc1"));
-  ASSERT_EQ(-EEXIST, client::client_register(ioctx, oid, "id1", "desc2"));
+  ASSERT_EQ(0, client::client_register(ioctx, oid, "id1", bufferlist()));
+  ASSERT_EQ(-EEXIST, client::client_register(ioctx, oid, "id1", bufferlist()));
+}
+
+TEST_F(TestClsJournal, ClientUpdate) {
+  librados::IoCtx ioctx;
+  ASSERT_EQ(0, _rados.ioctx_create(_pool_name.c_str(), ioctx));
+
+  std::string oid = get_temp_image_name();
+
+  ASSERT_EQ(-ENOENT, client::client_update(ioctx, oid, "id1", bufferlist()));
+
+  ASSERT_EQ(0, client::client_register(ioctx, oid, "id1", bufferlist()));
+
+  bufferlist data;
+  data.append(std::string('1', 128));
+  ASSERT_EQ(0, client::client_update(ioctx, oid, "id1", data));
+
+  Client client;
+  ASSERT_EQ(0, client::get_client(ioctx, oid, "id1", &client));
+  Client expected_client("id1", data);
+  ASSERT_EQ(expected_client, client);
 }
 
 TEST_F(TestClsJournal, ClientUnregister) {
@@ -223,7 +261,7 @@ TEST_F(TestClsJournal, ClientUnregister) {
 
   std::string oid = get_temp_image_name();
 
-  ASSERT_EQ(0, client::client_register(ioctx, oid, "id1", "desc1"));
+  ASSERT_EQ(0, client::client_register(ioctx, oid, "id1", bufferlist()));
   ASSERT_EQ(0, client::client_unregister(ioctx, oid, "id1"));
 }
 
@@ -233,11 +271,40 @@ TEST_F(TestClsJournal, ClientUnregisterDNE) {
 
   std::string oid = get_temp_image_name();
 
-  ASSERT_EQ(0, client::client_register(ioctx, oid, "id1", "desc1"));
+  ASSERT_EQ(0, client::client_register(ioctx, oid, "id1", bufferlist()));
   ASSERT_EQ(0, client::client_unregister(ioctx, oid, "id1"));
   ASSERT_EQ(-ENOENT, client::client_unregister(ioctx, oid, "id1"));
 }
 
+TEST_F(TestClsJournal, ClientUnregisterPruneTags) {
+  librados::IoCtx ioctx;
+  ASSERT_EQ(0, _rados.ioctx_create(_pool_name.c_str(), ioctx));
+
+  std::string oid = get_temp_image_name();
+
+  ASSERT_EQ(0, client::create(ioctx, oid, 2, 2, ioctx.get_id()));
+  ASSERT_EQ(0, client::client_register(ioctx, oid, "id1", bufferlist()));
+  ASSERT_EQ(0, client::client_register(ioctx, oid, "id2", bufferlist()));
+
+  ASSERT_EQ(0, client::tag_create(ioctx, oid, 0, Tag::TAG_CLASS_NEW,
+                                  bufferlist()));
+  ASSERT_EQ(0, client::tag_create(ioctx, oid, 1, Tag::TAG_CLASS_NEW,
+                                  bufferlist()));
+  ASSERT_EQ(0, client::tag_create(ioctx, oid, 2, 1, bufferlist()));
+
+  librados::ObjectWriteOperation op1;
+  client::client_commit(&op1, "id1", {1, {{2, 120}}});
+  ASSERT_EQ(0, ioctx.operate(oid, &op1));
+
+  ASSERT_EQ(0, client::client_unregister(ioctx, oid, "id2"));
+
+  std::set<Tag> expected_tags = {{0, 0, {}}, {2, 1, {}}};
+  std::set<Tag> tags;
+  ASSERT_EQ(0, client::tag_list(ioctx, oid, "id1",
+                                boost::optional<uint64_t>(), &tags));
+  ASSERT_EQ(expected_tags, tags);
+}
+
 TEST_F(TestClsJournal, ClientCommit) {
   librados::IoCtx ioctx;
   ASSERT_EQ(0, _rados.ioctx_create(_pool_name.c_str(), ioctx));
@@ -245,12 +312,12 @@ TEST_F(TestClsJournal, ClientCommit) {
   std::string oid = get_temp_image_name();
 
   ASSERT_EQ(0, client::create(ioctx, oid, 2, 2, ioctx.get_id()));
-  ASSERT_EQ(0, client::client_register(ioctx, oid, "id1", "desc1"));
+  ASSERT_EQ(0, client::client_register(ioctx, oid, "id1", bufferlist()));
 
   cls::journal::EntryPositions entry_positions;
   entry_positions = {
-    cls::journal::EntryPosition("tag1", 120),
-    cls::journal::EntryPosition("tag2", 121)};
+    cls::journal::EntryPosition(234, 120),
+    cls::journal::EntryPosition(235, 121)};
   cls::journal::ObjectSetPosition object_set_position(
     1, entry_positions);
 
@@ -262,7 +329,7 @@ TEST_F(TestClsJournal, ClientCommit) {
   ASSERT_EQ(0, client::client_list(ioctx, oid, &clients));
 
   std::set<Client> expected_clients = {
-    Client("id1", "desc1", object_set_position)};
+    Client("id1", bufferlist(), object_set_position)};
   ASSERT_EQ(expected_clients, clients);
 }
 
@@ -273,13 +340,13 @@ TEST_F(TestClsJournal, ClientCommitInvalid) {
   std::string oid = get_temp_image_name();
 
   ASSERT_EQ(0, client::create(ioctx, oid, 2, 2, ioctx.get_id()));
-  ASSERT_EQ(0, client::client_register(ioctx, oid, "id1", "desc1"));
+  ASSERT_EQ(0, client::client_register(ioctx, oid, "id1", bufferlist()));
 
   cls::journal::EntryPositions entry_positions;
   entry_positions = {
-    cls::journal::EntryPosition("tag1", 120),
-    cls::journal::EntryPosition("tag1", 121),
-    cls::journal::EntryPosition("tag2", 121)};
+    cls::journal::EntryPosition(234, 120),
+    cls::journal::EntryPosition(234, 121),
+    cls::journal::EntryPosition(235, 121)};
   cls::journal::ObjectSetPosition object_set_position(
     1, entry_positions);
 
@@ -313,8 +380,8 @@ TEST_F(TestClsJournal, ClientList) {
   librados::ObjectWriteOperation op1;
   for (uint32_t i = 0; i < 512; ++i) {
     std::string id =  "id" + stringify(i + 1);
-    expected_clients.insert(Client(id, ""));
-    client::client_register(&op1, id, "");
+    expected_clients.insert(Client(id, bufferlist()));
+    client::client_register(&op1, id, bufferlist());
   }
   ASSERT_EQ(0, ioctx.operate(oid, &op1));
 
@@ -332,6 +399,124 @@ TEST_F(TestClsJournal, ClientList) {
   ASSERT_EQ(expected_clients, read_clients);
 }
 
+TEST_F(TestClsJournal, GetNextTagTid) {
+  librados::IoCtx ioctx;
+  ASSERT_EQ(0, _rados.ioctx_create(_pool_name.c_str(), ioctx));
+
+  std::string oid = get_temp_image_name();
+
+  uint64_t tag_tid;
+  ASSERT_EQ(-ENOENT, client::get_next_tag_tid(ioctx, oid, &tag_tid));
+
+  ASSERT_EQ(0, client::create(ioctx, oid, 2, 2, ioctx.get_id()));
+  ASSERT_EQ(0, client::client_register(ioctx, oid, "id1", bufferlist()));
+
+  ASSERT_EQ(0, client::get_next_tag_tid(ioctx, oid, &tag_tid));
+  ASSERT_EQ(0U, tag_tid);
+
+  ASSERT_EQ(0, client::tag_create(ioctx, oid, 0, Tag::TAG_CLASS_NEW,
+                                  bufferlist()));
+  ASSERT_EQ(0, client::get_next_tag_tid(ioctx, oid, &tag_tid));
+  ASSERT_EQ(1U, tag_tid);
+}
+
+TEST_F(TestClsJournal, TagCreate) {
+  librados::IoCtx ioctx;
+  ASSERT_EQ(0, _rados.ioctx_create(_pool_name.c_str(), ioctx));
+
+  std::string oid = get_temp_image_name();
+
+  ASSERT_EQ(-ENOENT, client::tag_create(ioctx, oid, 0, Tag::TAG_CLASS_NEW,
+                                        bufferlist()));
+
+  ASSERT_EQ(0, client::create(ioctx, oid, 2, 2, ioctx.get_id()));
+  ASSERT_EQ(0, client::client_register(ioctx, oid, "id1", bufferlist()));
+
+  ASSERT_EQ(-ESTALE, client::tag_create(ioctx, oid, 1, Tag::TAG_CLASS_NEW,
+                                        bufferlist()));
+  ASSERT_EQ(-EINVAL, client::tag_create(ioctx, oid, 0, 1, bufferlist()));
+
+  ASSERT_EQ(0, client::tag_create(ioctx, oid, 0, Tag::TAG_CLASS_NEW,
+                                  bufferlist()));
+  ASSERT_EQ(-EEXIST, client::tag_create(ioctx, oid, 0, Tag::TAG_CLASS_NEW,
+                                        bufferlist()));
+  ASSERT_EQ(0, client::tag_create(ioctx, oid, 1, Tag::TAG_CLASS_NEW,
+                                  bufferlist()));
+  ASSERT_EQ(0, client::tag_create(ioctx, oid, 2, 1, bufferlist()));
+
+  std::set<Tag> expected_tags = {
+    {0, 0, {}}, {1, 1, {}}, {2, 1, {}}};
+  std::set<Tag> tags;
+  ASSERT_EQ(0, client::tag_list(ioctx, oid, "id1",
+                                boost::optional<uint64_t>(), &tags));
+  ASSERT_EQ(expected_tags, tags);
+}
+
+TEST_F(TestClsJournal, TagCreatePrunesTags) {
+  librados::IoCtx ioctx;
+  ASSERT_EQ(0, _rados.ioctx_create(_pool_name.c_str(), ioctx));
+
+  std::string oid = get_temp_image_name();
+
+  ASSERT_EQ(0, client::create(ioctx, oid, 2, 2, ioctx.get_id()));
+  ASSERT_EQ(0, client::client_register(ioctx, oid, "id1", bufferlist()));
+
+  ASSERT_EQ(0, client::tag_create(ioctx, oid, 0, Tag::TAG_CLASS_NEW,
+                                  bufferlist()));
+  ASSERT_EQ(0, client::tag_create(ioctx, oid, 1, Tag::TAG_CLASS_NEW,
+                                  bufferlist()));
+  ASSERT_EQ(0, client::tag_create(ioctx, oid, 2, 1, bufferlist()));
+
+  librados::ObjectWriteOperation op1;
+  client::client_commit(&op1, "id1", {1, {{2, 120}}});
+  ASSERT_EQ(0, ioctx.operate(oid, &op1));
+
+  ASSERT_EQ(0, client::tag_create(ioctx, oid, 3, 0, bufferlist()));
+
+  std::set<Tag> expected_tags = {
+    {0, 0, {}}, {2, 1, {}}, {3, 0, {}}};
+  std::set<Tag> tags;
+  ASSERT_EQ(0, client::tag_list(ioctx, oid, "id1",
+                                boost::optional<uint64_t>(), &tags));
+  ASSERT_EQ(expected_tags, tags);
+}
+
+TEST_F(TestClsJournal, TagList) {
+  librados::IoCtx ioctx;
+  ASSERT_EQ(0, _rados.ioctx_create(_pool_name.c_str(), ioctx));
+
+  std::string oid = get_temp_image_name();
+
+  ASSERT_EQ(0, client::create(ioctx, oid, 2, 2, ioctx.get_id()));
+  ASSERT_EQ(0, client::client_register(ioctx, oid, "id1", bufferlist()));
+
+  std::set<Tag> expected_all_tags;
+  std::set<Tag> expected_filtered_tags;
+  for (uint32_t i = 0; i < 96; ++i) {
+    uint64_t tag_class = Tag::TAG_CLASS_NEW;
+    if (i > 1) {
+      tag_class = i % 2 == 0 ? 0 : 1;
+    }
+
+    Tag tag(i, i % 2 == 0 ? 0 : 1, bufferlist());
+    expected_all_tags.insert(tag);
+    if (i % 2 == 0) {
+      expected_filtered_tags.insert(tag);
+    }
+    ASSERT_EQ(0, client::tag_create(ioctx, oid, i, tag_class,
+                                    bufferlist()));
+  }
+
+  std::set<Tag> tags;
+  ASSERT_EQ(0, client::tag_list(ioctx, oid, "id1", boost::optional<uint64_t>(),
+                                &tags));
+  ASSERT_EQ(expected_all_tags, tags);
+
+  ASSERT_EQ(0, client::tag_list(ioctx, oid, "id1", boost::optional<uint64_t>(0),
+                                &tags));
+  ASSERT_EQ(expected_filtered_tags, tags);
+}
+
 TEST_F(TestClsJournal, GuardAppend) {
   librados::IoCtx ioctx;
   ASSERT_EQ(0, _rados.ioctx_create(_pool_name.c_str(), ioctx));
diff --git a/src/test/cls_log/test_cls_log.cc b/src/test/cls_log/test_cls_log.cc
index ce97025..6b8a31d 100644
--- a/src/test/cls_log/test_cls_log.cc
+++ b/src/test/cls_log/test_cls_log.cc
@@ -187,6 +187,9 @@ TEST(cls_rgw, test_log_add_same_time)
   ASSERT_EQ(1, (int)truncated);
 
   delete rop;
+
+  /* destroy pool */
+  ASSERT_EQ(0, destroy_one_pool_pp(pool_name, rados));
 }
 
 TEST(cls_rgw, test_log_add_different_time)
@@ -280,6 +283,9 @@ TEST(cls_rgw, test_log_add_different_time)
 
   ASSERT_EQ(10, i);
   delete rop;
+
+  /* destroy pool */
+  ASSERT_EQ(0, destroy_one_pool_pp(pool_name, rados));
 }
 
 TEST(cls_rgw, test_log_trim)
@@ -331,4 +337,7 @@ TEST(cls_rgw, test_log_trim)
     ASSERT_EQ(0, (int)truncated);
   }
   delete rop;
+
+  /* destroy pool */
+  ASSERT_EQ(0, destroy_one_pool_pp(pool_name, rados));
 }
diff --git a/src/test/common/ObjectContents.h b/src/test/common/ObjectContents.h
index 0f467b1..3892c75 100644
--- a/src/test/common/ObjectContents.h
+++ b/src/test/common/ObjectContents.h
@@ -16,20 +16,20 @@ bool test_object_contents();
 
 class ObjectContents {
   uint64_t _size;
-  map<uint64_t, unsigned int> seeds;
+  std::map<uint64_t, unsigned int> seeds;
   interval_set<uint64_t> written;
   bool _exists;
 public:
   class Iterator {
     ObjectContents *parent;
-    map<uint64_t, unsigned int>::iterator iter;
+    std::map<uint64_t, unsigned int>::iterator iter;
     unsigned int current_state;
     int current_val;
     uint64_t pos;
   private:
     unsigned int get_state(uint64_t pos);
   public:
-    Iterator(ObjectContents *parent) :
+    explicit Iterator(ObjectContents *parent) :
       parent(parent), iter(parent->seeds.end()),
       current_state(0), current_val(0), pos(-1) {
       seek_to_first();
@@ -77,7 +77,7 @@ public:
     seeds[0] = 0;
   }
 
-  ObjectContents(bufferlist::iterator &bp) {
+  explicit ObjectContents(bufferlist::iterator &bp) {
     ::decode(_size, bp);
     ::decode(seeds, bp);
     ::decode(written, bp);
@@ -100,7 +100,7 @@ public:
   void debug(std::ostream &out) {
     out << "_size is " << _size << std::endl;
     out << "seeds is: (";
-    for (map<uint64_t, unsigned int>::iterator i = seeds.begin();
+    for (std::map<uint64_t, unsigned int>::iterator i = seeds.begin();
 	 i != seeds.end();
 	 ++i) {
       out << "[" << i->first << "," << i->second << "], ";
diff --git a/src/test/common/test_async_compressor.cc b/src/test/common/test_async_compressor.cc
index 1655596..e80a499 100644
--- a/src/test/common/test_async_compressor.cc
+++ b/src/test/common/test_async_compressor.cc
@@ -102,7 +102,7 @@ class SyntheticWorkload {
   static const uint64_t MAX_INFLIGHT = 128;
 
  public:
-  SyntheticWorkload(AsyncCompressor *ac): async_compressor(ac), rng(time(NULL)) {
+  explicit SyntheticWorkload(AsyncCompressor *ac): async_compressor(ac), rng(time(NULL)) {
     for (int i = 0; i < 100; i++) {
       bufferlist bl;
       boost::uniform_int<> u(4096, 1<<24);
diff --git a/src/test/common/test_config.cc b/src/test/common/test_config.cc
index 5f3bc3f..3286267 100644
--- a/src/test/common/test_config.cc
+++ b/src/test/common/test_config.cc
@@ -112,13 +112,15 @@ public:
 
   void test_expand_all_meta() {
     Mutex::Locker l(lock);
-    int before_count = 0;
+    int before_count = 0, data_dir = 0;
     for (int i = 0; i < NUM_CONFIG_OPTIONS; i++) {
       config_option *opt = config_optionsp + i;
       if (opt->type == OPT_STR) {
         std::string *str = (std::string *)opt->conf_ptr(this);
         if (str->find("$") != string::npos)
           before_count++;
+        if (str->find("$data_dir") != string::npos)
+          data_dir++;
       }
     }
     // if there are no meta variables in the default configuration,
@@ -143,7 +145,7 @@ public:
         }
       }
     }
-    ASSERT_EQ(0, after_count);
+    ASSERT_EQ(data_dir, after_count);
   }
 };
 
diff --git a/src/test/common/test_crc32c.cc b/src/test/common/test_crc32c.cc
index a311616..54200b3 100644
--- a/src/test/common/test_crc32c.cc
+++ b/src/test/common/test_crc32c.cc
@@ -162,14 +162,15 @@ static uint32_t crc_check_table[] = {
 
 TEST(Crc32c, Range) {
   int len = sizeof(crc_check_table) / sizeof(crc_check_table[0]);
-  const char *b = (const char *)malloc(len);
-  memset((void *)b, 1, len);
+  unsigned char *b = (unsigned char *)malloc(len);
+  memset(b, 1, len);
   uint32_t crc = 0;
   uint32_t *check = crc_check_table;
   for (int i = 0 ; i < len; i++, check++) {
-    crc = ceph_crc32c(crc, (unsigned char *)b+i, len-i);
+    crc = ceph_crc32c(crc, b+i, len-i);
     ASSERT_EQ(crc, *check);
   }
+  free(b);
 }
 
 static uint32_t crc_zero_check_table[] = {
@@ -241,15 +242,16 @@ static uint32_t crc_zero_check_table[] = {
 
 TEST(Crc32c, RangeZero) {
   int len = sizeof(crc_zero_check_table) / sizeof(crc_zero_check_table[0]);
-  const char *b = (const char *)malloc(len);
-  memset((void *)b, 0, len);
+  unsigned char *b = (unsigned char *)malloc(len);
+  memset(b, 0, len);
   uint32_t crc = 1; /* when checking zero buffer we want to start with a non zero crc, otherwise
                        all the results are going to be zero */
   uint32_t *check = crc_zero_check_table;
   for (int i = 0 ; i < len; i++, check++) {
-    crc = ceph_crc32c(crc, (unsigned char *)b+i, len-i);
+    crc = ceph_crc32c(crc, b+i, len-i);
     ASSERT_EQ(crc, *check);
   }
+  free(b);
 }
 
 TEST(Crc32c, RangeNull) {
diff --git a/src/test/common/test_interval_set.cc b/src/test/common/test_interval_set.cc
new file mode 100644
index 0000000..66a79a6
--- /dev/null
+++ b/src/test/common/test_interval_set.cc
@@ -0,0 +1,564 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2015 Mirantis, Inc.
+ *
+ * Author: Igor Fedotov <ifedotov at mirantis.com>
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation.  See file COPYING.
+ *
+ */
+
+#include <gtest/gtest.h>
+#include "include/interval_set.h"
+#include "include/btree_interval_set.h"
+
+using namespace ceph;
+
+typedef uint64_t IntervalValueType;
+
+template<typename T>  // tuple<type to test on, test array size>
+class IntervalSetTest : public ::testing::Test {
+
+ public:
+  typedef T ISet;
+};
+
+typedef ::testing::Types< interval_set<IntervalValueType> ,  btree_interval_set<IntervalValueType> > IntervalSetTypes;
+
+TYPED_TEST_CASE(IntervalSetTest, IntervalSetTypes);
+
+TYPED_TEST(IntervalSetTest, compare) {
+  typedef typename TestFixture::ISet ISet;
+  ISet iset1, iset2;
+  ASSERT_TRUE(iset1 == iset1);
+  ASSERT_TRUE(iset1 == iset2);
+
+  iset1.insert(1);
+  ASSERT_FALSE(iset1 == iset2);
+
+  iset2.insert(1);
+  ASSERT_TRUE(iset1 == iset2);
+
+  iset1.insert(2, 3);
+  iset2.insert(2, 4);
+  ASSERT_FALSE(iset1 == iset2);
+
+  iset2.erase(2, 4);
+  iset2.erase(1);
+  iset2.insert(2, 3);
+  iset2.insert(1);
+  ASSERT_TRUE(iset1 == iset2);
+
+  iset1.insert(100, 10);
+  iset2.insert(100, 5);
+  ASSERT_FALSE(iset1 == iset2);
+  iset2.insert(105, 5);
+  ASSERT_TRUE(iset1 == iset2);
+
+  iset1.insert(200, 10);
+  iset2.insert(205, 5);
+  ASSERT_FALSE(iset1 == iset2);
+  iset2.insert(200, 1);
+  iset2.insert(202, 3);
+  ASSERT_FALSE(iset1 == iset2);
+  iset2.insert(201, 1);
+  ASSERT_TRUE(iset1 == iset2);
+
+  iset1.clear();
+  ASSERT_FALSE(iset1 == iset2);
+  iset2.clear();
+  ASSERT_TRUE(iset1 == iset2);
+}
+
+TYPED_TEST(IntervalSetTest, contains) {
+  typedef typename TestFixture::ISet ISet;
+  ISet iset1;
+  ASSERT_FALSE(iset1.contains( 1 ));
+  ASSERT_FALSE(iset1.contains( 0, 1 ));
+
+  iset1.insert(1);
+  ASSERT_TRUE(iset1.contains( 1 ));
+  ASSERT_FALSE(iset1.contains( 0 ));
+  ASSERT_FALSE(iset1.contains( 2 ));
+  ASSERT_FALSE(iset1.contains( 0, 1 ));
+  ASSERT_FALSE(iset1.contains( 0, 2 ));
+  ASSERT_TRUE(iset1.contains( 1, 1 ));
+  ASSERT_FALSE(iset1.contains( 1, 2 ));
+
+  iset1.insert(2, 3);
+  ASSERT_TRUE(iset1.contains( 1 ));
+  ASSERT_FALSE(iset1.contains( 0 ));
+  ASSERT_TRUE(iset1.contains( 2 ));
+  ASSERT_FALSE(iset1.contains( 0, 1 ));
+  ASSERT_FALSE(iset1.contains( 0, 2 ));
+  ASSERT_TRUE(iset1.contains( 1, 1 ));
+  ASSERT_TRUE(iset1.contains( 1, 2 ));
+  ASSERT_TRUE(iset1.contains( 1, 3 ));
+  ASSERT_TRUE(iset1.contains( 1, 4 ));
+  ASSERT_FALSE(iset1.contains( 1, 5 ));
+  ASSERT_TRUE(iset1.contains( 2, 1 ));
+  ASSERT_TRUE(iset1.contains( 2, 2 ));
+  ASSERT_TRUE(iset1.contains( 2, 3 ));
+  ASSERT_FALSE(iset1.contains( 2, 4 ));
+  ASSERT_TRUE(iset1.contains( 3, 2 ));
+  ASSERT_TRUE(iset1.contains( 4, 1 ));
+  ASSERT_FALSE(iset1.contains( 4, 2 ));
+
+  iset1.insert(10, 10);
+  ASSERT_TRUE(iset1.contains( 1, 4 ));
+  ASSERT_FALSE(iset1.contains( 1, 5 ));
+  ASSERT_TRUE(iset1.contains( 2, 2 ));
+  ASSERT_FALSE(iset1.contains( 2, 4 ));
+
+  ASSERT_FALSE(iset1.contains( 1, 10 ));
+  ASSERT_FALSE(iset1.contains( 9, 1 ));
+  ASSERT_FALSE(iset1.contains( 9 ));
+  ASSERT_FALSE(iset1.contains( 9, 11 ));
+  ASSERT_TRUE(iset1.contains( 10, 1 ));
+  ASSERT_TRUE(iset1.contains( 11, 9 ));
+  ASSERT_TRUE(iset1.contains( 11, 2 ));
+  ASSERT_TRUE(iset1.contains( 18, 2 ));
+  ASSERT_TRUE(iset1.contains( 18, 2 ));
+  ASSERT_TRUE(iset1.contains( 10 ));
+  ASSERT_TRUE(iset1.contains( 19 ));
+  ASSERT_FALSE(iset1.contains( 20 ));
+  ASSERT_FALSE(iset1.contains( 21 ));
+
+  ASSERT_FALSE(iset1.contains( 11, 11 ));
+  ASSERT_FALSE(iset1.contains( 18, 9 ));
+
+  iset1.clear();
+  ASSERT_FALSE(iset1.contains( 1 ));
+  ASSERT_FALSE(iset1.contains( 0 ));
+  ASSERT_FALSE(iset1.contains( 2 ));
+  ASSERT_FALSE(iset1.contains( 0, 1 ));
+  ASSERT_FALSE(iset1.contains( 0, 2 ));
+  ASSERT_FALSE(iset1.contains( 1, 1 ));
+  ASSERT_FALSE(iset1.contains( 10, 2 ));
+}
+
+TYPED_TEST(IntervalSetTest, intersects) {
+  typedef typename TestFixture::ISet ISet;
+  ISet iset1;
+  ASSERT_FALSE(iset1.intersects( 1, 1 ));
+  ASSERT_FALSE(iset1.intersects( 0, 1 ));
+  ASSERT_FALSE(iset1.intersects( 0, 10 ));
+
+  iset1.insert(1);
+  ASSERT_TRUE(iset1.intersects( 1, 1 ));
+  ASSERT_FALSE(iset1.intersects( 0, 1 ));
+  ASSERT_FALSE(iset1.intersects( 2, 1 ));
+  ASSERT_TRUE(iset1.intersects( 0, 2 ));
+  ASSERT_TRUE(iset1.intersects( 0, 20 ));
+  ASSERT_TRUE(iset1.intersects( 1, 2 ));
+  ASSERT_TRUE(iset1.intersects( 1, 20 ));
+
+  iset1.insert(2, 3);
+  ASSERT_FALSE(iset1.intersects( 0, 1 ));
+  ASSERT_TRUE(iset1.intersects( 0, 2 ));
+  ASSERT_TRUE(iset1.intersects( 0, 200 ));
+  ASSERT_TRUE(iset1.intersects( 1, 1 ));
+  ASSERT_TRUE(iset1.intersects( 1, 4 ));
+  ASSERT_TRUE(iset1.intersects( 1, 5 ));
+  ASSERT_TRUE(iset1.intersects( 2, 1 ));
+  ASSERT_TRUE(iset1.intersects( 2, 2 ));
+  ASSERT_TRUE(iset1.intersects( 2, 3 ));
+  ASSERT_TRUE(iset1.intersects( 2, 4 ));
+  ASSERT_TRUE(iset1.intersects( 3, 2 ));
+  ASSERT_TRUE(iset1.intersects( 4, 1 ));
+  ASSERT_TRUE(iset1.intersects( 4, 2 ));
+  ASSERT_FALSE(iset1.intersects( 5, 2 ));
+
+  iset1.insert(10, 10);
+  ASSERT_TRUE(iset1.intersects( 1, 4 ));
+  ASSERT_TRUE(iset1.intersects( 1, 5 ));
+  ASSERT_TRUE(iset1.intersects( 1, 10 ));
+  ASSERT_TRUE(iset1.intersects( 2, 2 ));
+  ASSERT_TRUE(iset1.intersects( 2, 4 ));
+  ASSERT_FALSE(iset1.intersects( 5, 1 ));
+  ASSERT_FALSE(iset1.intersects( 5, 2 ));
+  ASSERT_FALSE(iset1.intersects( 5, 5 ));
+  ASSERT_TRUE(iset1.intersects( 5, 12 ));
+  ASSERT_TRUE(iset1.intersects( 5, 20 ));
+
+  ASSERT_FALSE(iset1.intersects( 9, 1 ));
+  ASSERT_TRUE(iset1.intersects( 9, 2 ));
+
+  ASSERT_TRUE(iset1.intersects( 9, 11 ));
+  ASSERT_TRUE(iset1.intersects( 10, 1 ));
+  ASSERT_TRUE(iset1.intersects( 11, 9 ));
+  ASSERT_TRUE(iset1.intersects( 11, 2 ));
+  ASSERT_TRUE(iset1.intersects( 11, 11 ));
+  ASSERT_TRUE(iset1.intersects( 18, 2 ));
+  ASSERT_TRUE(iset1.intersects( 18, 9 ));
+  ASSERT_FALSE(iset1.intersects( 20, 1 ));
+  ASSERT_FALSE(iset1.intersects( 21, 12 ));
+
+  iset1.clear();
+  ASSERT_FALSE(iset1.intersects( 0, 1 ));
+  ASSERT_FALSE(iset1.intersects( 0, 2 ));
+  ASSERT_FALSE(iset1.intersects( 1, 1 ));
+  ASSERT_FALSE(iset1.intersects( 5, 2 ));
+  ASSERT_FALSE(iset1.intersects( 10, 2 ));
+}
+
+TYPED_TEST(IntervalSetTest, insert_erase) {
+  typedef typename TestFixture::ISet ISet;
+  ISet iset1, iset2;
+  IntervalValueType start, len;
+  
+  iset1.insert(3, 5, &start, &len);
+  ASSERT_TRUE( start == 3 );
+  ASSERT_TRUE( len == 5 );
+  ASSERT_TRUE( iset1.num_intervals() == 1 );
+  ASSERT_TRUE( iset1.size() == 5 );
+
+  //adding standalone interval
+  iset1.insert(15, 10, &start, &len);
+  ASSERT_TRUE( start == 15 );
+  ASSERT_TRUE( len == 10 );
+  ASSERT_TRUE( iset1.num_intervals() == 2 );
+  ASSERT_EQ( iset1.size(), 15 );
+
+  //adding leftmost standalone interval
+  iset1.insert(1, 1, &start, &len);
+  ASSERT_TRUE( start == 1 );
+  ASSERT_TRUE( len == 1 );
+  ASSERT_TRUE( iset1.num_intervals() == 3 );
+  ASSERT_EQ( iset1.size(), 16 );
+
+  //adding leftmost adjusent interval
+  iset1.insert(0, 1, &start, &len);
+  ASSERT_TRUE( start == 0 );
+  ASSERT_TRUE( len == 2 );
+  ASSERT_TRUE( iset1.num_intervals() == 3 );
+  ASSERT_EQ( iset1.size(), 17 );
+
+  //adding interim interval that merges leftmost and subseqent intervals
+  iset1.insert(2, 1, &start, &len);
+  ASSERT_TRUE( start == 0 );
+  ASSERT_TRUE( len == 8 );
+  ASSERT_TRUE( iset1.num_intervals() == 2);
+  ASSERT_EQ( iset1.size(), 18);
+
+  //adding rigtmost standalone interval 
+  iset1.insert(30, 5, &start, &len);
+  ASSERT_TRUE( start == 30 );
+  ASSERT_TRUE( len == 5 );
+  ASSERT_TRUE( iset1.num_intervals() == 3);
+  ASSERT_EQ( iset1.size(), 23 );
+
+  //adding rigtmost adjusent interval 
+  iset1.insert(35, 10, &start, &len);
+  ASSERT_TRUE( start == 30 );
+  ASSERT_TRUE( len == 15 );
+  ASSERT_TRUE( iset1.num_intervals() == 3);
+  ASSERT_EQ( iset1.size(), 33 );
+
+  //adding interim interval that merges with the interval preceeding the rightmost
+  iset1.insert(25, 1, &start, &len);
+  ASSERT_TRUE( start == 15 );
+  ASSERT_TRUE( len == 11 );
+  ASSERT_TRUE( iset1.num_intervals() == 3);
+  ASSERT_EQ( iset1.size(), 34);
+
+  //adding interim interval that merges with the rightmost and preceeding intervals
+  iset1.insert(26, 4, &start, &len);
+  ASSERT_TRUE( start == 15 );
+  ASSERT_TRUE( len == 30 );
+  ASSERT_TRUE( iset1.num_intervals() == 2);
+  ASSERT_EQ( iset1.size(), 38);
+
+  //and finally build single interval filling the gap at  8-15 using different interval set
+  iset2.insert( 8, 1 );
+  iset2.insert( 14, 1 );
+  iset2.insert( 9, 4 );
+  iset1.insert( iset2 );
+  iset1.insert(13, 1, &start, &len);
+  ASSERT_TRUE( start == 0 );
+  ASSERT_TRUE( len == 45 );
+  ASSERT_TRUE( iset1.num_intervals() == 1);
+  ASSERT_EQ( iset1.size(), 45);
+
+  //now reverses the process using subtract & erase
+  iset1.subtract( iset2 );
+  iset1.erase(13, 1);
+  ASSERT_TRUE( iset1.num_intervals() == 2);
+  ASSERT_EQ( iset1.size(), 38);
+  ASSERT_TRUE( iset1.contains( 7, 1 ));
+  ASSERT_FALSE( iset1.contains( 8, 7 ));
+  ASSERT_TRUE( iset1.contains( 15, 1 ));
+  ASSERT_TRUE( iset1.contains( 26, 4 ));
+
+  iset1.erase(26, 4);
+  ASSERT_TRUE( iset1.num_intervals() == 3);
+  ASSERT_EQ( iset1.size(), 34);
+  ASSERT_TRUE( iset1.contains( 7, 1 ));
+  ASSERT_FALSE( iset1.intersects( 8, 7 ));
+  ASSERT_TRUE( iset1.contains( 15, 1 ));
+  ASSERT_TRUE( iset1.contains( 25, 1 ));
+  ASSERT_FALSE( iset1.contains( 26, 4 ));
+  ASSERT_TRUE( iset1.contains( 30, 1 ));
+
+  iset1.erase(25, 1);
+  ASSERT_TRUE( iset1.num_intervals() == 3);
+  ASSERT_EQ( iset1.size(), 33 );
+  ASSERT_TRUE( iset1.contains( 24, 1 ));
+  ASSERT_FALSE( iset1.contains( 25, 1 ));
+  ASSERT_FALSE( iset1.intersects( 26, 4 ));
+  ASSERT_TRUE( iset1.contains( 30, 1 ));
+  ASSERT_TRUE( iset1.contains( 35, 10 ));
+
+  iset1.erase(35, 10);
+  ASSERT_TRUE( iset1.num_intervals() == 3);
+  ASSERT_EQ( iset1.size(), 23 );
+  ASSERT_TRUE( iset1.contains( 30, 5 ));
+  ASSERT_TRUE( iset1.contains( 34, 1 ));
+  ASSERT_FALSE( iset1.contains( 35, 10 ));
+  ASSERT_FALSE(iset1.contains( 45, 1 ));
+
+  iset1.erase(30, 5);
+  ASSERT_TRUE( iset1.num_intervals() == 2);
+  ASSERT_EQ( iset1.size(), 18);
+  ASSERT_TRUE( iset1.contains( 2, 1 ));
+  ASSERT_TRUE( iset1.contains( 24, 1 ));
+  ASSERT_FALSE( iset1.contains( 25, 1 ));
+  ASSERT_FALSE( iset1.contains( 29, 1 ));
+  ASSERT_FALSE( iset1.contains( 30, 5 ));
+  ASSERT_FALSE( iset1.contains( 35, 1 ));
+
+  iset1.erase(2, 1);
+  ASSERT_TRUE( iset1.num_intervals() == 3 );
+  ASSERT_EQ( iset1.size(), 17 );
+  ASSERT_TRUE( iset1.contains( 0, 1 ));
+  ASSERT_TRUE( iset1.contains( 1, 1 ));
+  ASSERT_FALSE( iset1.contains( 2, 1 ));
+  ASSERT_TRUE( iset1.contains( 3, 1 ));
+  ASSERT_TRUE( iset1.contains( 15, 1 ));
+  ASSERT_FALSE( iset1.contains( 25, 1 ));
+
+  iset1.erase( 0, 1);
+  ASSERT_TRUE( iset1.num_intervals() == 3 );
+  ASSERT_EQ( iset1.size(), 16 );
+  ASSERT_FALSE( iset1.contains( 0, 1 ));
+  ASSERT_TRUE( iset1.contains( 1, 1 ));
+  ASSERT_FALSE( iset1.contains( 2, 1 ));
+  ASSERT_TRUE( iset1.contains( 3, 1 ));
+  ASSERT_TRUE( iset1.contains( 15, 1 ));
+
+  iset1.erase(1, 1);
+  ASSERT_TRUE( iset1.num_intervals() == 2 );
+  ASSERT_EQ( iset1.size(), 15 );
+  ASSERT_FALSE( iset1.contains( 1, 1 ));
+  ASSERT_TRUE( iset1.contains( 15, 10 ));
+  ASSERT_TRUE( iset1.contains( 3, 5 ));
+
+  iset1.erase(15, 10);
+  ASSERT_TRUE( iset1.num_intervals() == 1 );
+  ASSERT_TRUE( iset1.size() == 5 );
+  ASSERT_FALSE( iset1.contains( 1, 1 ));
+  ASSERT_FALSE( iset1.contains( 15, 10 ));
+  ASSERT_FALSE( iset1.contains( 25, 1 ));
+  ASSERT_TRUE( iset1.contains( 3, 5 ));
+
+  iset1.erase( 3, 1);
+  ASSERT_TRUE( iset1.num_intervals() == 1 );
+  ASSERT_TRUE( iset1.size() == 4 );
+  ASSERT_FALSE( iset1.contains( 1, 1 ));
+  ASSERT_FALSE( iset1.contains( 15, 10 ));
+  ASSERT_FALSE( iset1.contains( 25, 1 ));
+  ASSERT_TRUE( iset1.contains( 4, 4 ));
+  ASSERT_FALSE( iset1.contains( 3, 5 ));
+
+  iset1.erase( 4, 4);
+  ASSERT_TRUE( iset1.num_intervals() == 0);
+  ASSERT_TRUE( iset1.size() == 0);
+  ASSERT_FALSE( iset1.contains( 1, 1 ));
+  ASSERT_FALSE( iset1.contains( 15, 10 ));
+  ASSERT_FALSE( iset1.contains( 25, 1 ));
+  ASSERT_FALSE( iset1.contains( 3, 4 ));
+  ASSERT_FALSE( iset1.contains( 3, 5 ));
+  ASSERT_FALSE( iset1.contains( 4, 4 ));
+
+
+}
+
+TYPED_TEST(IntervalSetTest, intersect_of) {
+  typedef typename TestFixture::ISet ISet;
+  ISet iset1, iset2, iset3;
+
+  iset1.intersection_of( iset2, iset3 );
+  ASSERT_TRUE( iset1.num_intervals() == 0);
+  ASSERT_TRUE( iset1.size() == 0);
+
+  iset2.insert( 0, 1 );
+  iset2.insert( 5, 10 );
+  iset2.insert( 30, 10 );
+
+  iset3.insert( 0, 2 );
+  iset3.insert( 15, 1 );
+  iset3.insert( 20, 5 );
+  iset3.insert( 29, 3 );
+  iset3.insert( 35, 3 );
+  iset3.insert( 39, 3 );
+
+  iset1.intersection_of( iset2, iset3 );
+  ASSERT_TRUE( iset1.num_intervals() == 4);
+  ASSERT_TRUE( iset1.size() == 7);
+
+  ASSERT_TRUE( iset1.contains( 0, 1 ));
+  ASSERT_FALSE( iset1.contains( 0, 2 ));
+
+  ASSERT_FALSE( iset1.contains( 5, 11 ));
+  ASSERT_FALSE( iset1.contains( 4, 1 ));
+  ASSERT_FALSE( iset1.contains( 16, 1 ));
+  
+  ASSERT_FALSE( iset1.contains( 20, 5 ));
+
+  ASSERT_FALSE( iset1.contains( 29, 1 ));
+  ASSERT_FALSE( iset1.contains( 30, 10 ));
+
+  ASSERT_TRUE( iset1.contains( 30, 2 ));
+  ASSERT_TRUE( iset1.contains( 35, 3 ));
+  ASSERT_FALSE( iset1.contains( 35, 4 ));
+
+  ASSERT_TRUE( iset1.contains( 39, 1 ));
+  ASSERT_FALSE( iset1.contains( 38, 2 ));
+  ASSERT_FALSE( iset1.contains( 39, 2 ));
+
+  iset3=iset1;
+  iset1.intersection_of(iset2);
+  ASSERT_TRUE( iset1 == iset3);
+
+  iset2.clear();
+  iset2.insert(0,1);
+  iset1.intersection_of(iset2);
+  ASSERT_TRUE( iset1.num_intervals() == 1);
+  ASSERT_TRUE( iset1.size() == 1);
+
+  iset1 = iset3;
+  iset2.clear();
+  iset1.intersection_of(iset2);
+  ASSERT_TRUE( iset1.num_intervals() == 0);
+  ASSERT_TRUE( iset1.size() == 0);
+
+}
+
+TYPED_TEST(IntervalSetTest, union_of) {
+  typedef typename TestFixture::ISet ISet;
+  ISet iset1, iset2, iset3;
+
+  iset1.union_of( iset2, iset3 );
+  ASSERT_TRUE( iset1.num_intervals() == 0);
+  ASSERT_TRUE( iset1.size() == 0);
+
+  iset2.insert( 0, 1 );
+  iset2.insert( 5, 10 );
+  iset2.insert( 30, 10 );
+
+  iset3.insert( 0, 2 );
+  iset3.insert( 15, 1 );
+  iset3.insert( 20, 5 );
+  iset3.insert( 29, 3 );
+  iset3.insert( 39, 3 );
+
+  iset1.union_of( iset2, iset3 );
+  ASSERT_TRUE( iset1.num_intervals() == 4);
+  ASSERT_EQ( iset1.size(), 31);
+  ASSERT_TRUE( iset1.contains( 0, 2 ));
+  ASSERT_FALSE( iset1.contains( 0, 3 ));
+
+  ASSERT_TRUE( iset1.contains( 5, 11 ));
+  ASSERT_FALSE( iset1.contains( 4, 1 ));
+  ASSERT_FALSE( iset1.contains( 16, 1 ));
+  
+  ASSERT_TRUE( iset1.contains( 20, 5 ));
+
+  ASSERT_TRUE( iset1.contains( 30, 10 ));
+  ASSERT_TRUE( iset1.contains( 29, 13 ));
+  ASSERT_FALSE( iset1.contains( 29, 14 ));
+  ASSERT_FALSE( iset1.contains( 42, 1 ));
+
+  iset2.clear();
+  iset1.union_of(iset2);
+  ASSERT_TRUE( iset1.num_intervals() == 4);
+  ASSERT_EQ( iset1.size(), 31);
+
+  iset3.clear();
+  iset3.insert( 29, 3 );
+  iset3.insert( 39, 2 );
+  iset1.union_of(iset3);
+
+  ASSERT_TRUE( iset1.num_intervals() == 4);
+  ASSERT_EQ( iset1.size(), 31); //actually we added nothing
+  ASSERT_TRUE( iset1.contains( 29, 13 ));
+  ASSERT_FALSE( iset1.contains( 29, 14 ));
+  ASSERT_FALSE( iset1.contains( 42, 1 ));
+
+}
+
+TYPED_TEST(IntervalSetTest, subset_of) {
+  typedef typename TestFixture::ISet ISet;
+  ISet iset1, iset2;
+
+  ASSERT_TRUE(iset1.subset_of(iset2));
+
+  iset1.insert(5,10);
+  ASSERT_FALSE(iset1.subset_of(iset2));
+
+  iset2.insert(6,8);
+  ASSERT_FALSE(iset1.subset_of(iset2));
+
+  iset2.insert(5,1);
+  ASSERT_FALSE(iset1.subset_of(iset2));
+
+  iset2.insert(14,10);
+  ASSERT_TRUE(iset1.subset_of(iset2));
+
+  iset1.insert( 20, 4);
+  ASSERT_TRUE(iset1.subset_of(iset2));
+
+  iset1.insert( 24, 1);
+  ASSERT_FALSE(iset1.subset_of(iset2));
+}
+
+TYPED_TEST(IntervalSetTest, span_of) {
+  typedef typename TestFixture::ISet ISet;
+  ISet iset1, iset2;
+
+  iset2.insert(5,5);
+  iset2.insert(20,5);
+
+  iset1.span_of( iset2, 8, 5 );
+  ASSERT_EQ( iset1.num_intervals(), 2);
+  ASSERT_EQ( iset1.size(), 5);
+  ASSERT_TRUE( iset1.contains( 8, 2 ));
+  ASSERT_TRUE( iset1.contains( 20, 3 ));
+  
+  iset1.span_of( iset2, 3, 5 );
+  ASSERT_EQ( iset1.num_intervals(), 1);
+  ASSERT_EQ( iset1.size(), 5);
+  ASSERT_TRUE( iset1.contains( 5, 5 ));
+
+  iset1.span_of( iset2, 10, 7 );
+  ASSERT_EQ( iset1.num_intervals(), 1);
+  ASSERT_EQ( iset1.size(), 5);
+  ASSERT_TRUE( iset1.contains( 20, 5 ));
+  ASSERT_FALSE( iset1.contains( 20, 6 ));
+
+  iset1.span_of( iset2, 5, 10);
+  ASSERT_EQ( iset1.num_intervals(), 2);
+  ASSERT_EQ( iset1.size(), 10);
+  ASSERT_TRUE( iset1.contains( 5, 5 ));
+  ASSERT_TRUE( iset1.contains( 20, 5 ));
+
+  iset1.span_of( iset2, 100, 5 );
+  ASSERT_EQ( iset1.num_intervals(), 0);
+  ASSERT_EQ( iset1.size(), 0);
+}
\ No newline at end of file
diff --git a/src/test/common/test_lru.cc b/src/test/common/test_lru.cc
index 7483718..b363079 100644
--- a/src/test/common/test_lru.cc
+++ b/src/test/common/test_lru.cc
@@ -23,7 +23,7 @@
 class Item : public LRUObject {
 public:
   int id;
-  Item(int v) : id(v) {}
+  explicit Item(int v) : id(v) {}
 };
 
 
diff --git a/src/test/common/test_mutex_debug.cc b/src/test/common/test_mutex_debug.cc
new file mode 100644
index 0000000..49cd499
--- /dev/null
+++ b/src/test/common/test_mutex_debug.cc
@@ -0,0 +1,101 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 &smarttab
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2011 New Dream Network
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License version 2, as published by the Free Software
+ * Foundation.  See file COPYING.
+ *
+ */
+
+#include <future>
+#include <mutex>
+#include <thread>
+
+#include "common/mutex_debug.h"
+
+#include "gtest/gtest.h"
+
+
+template<typename Mutex>
+static bool test_try_lock(Mutex* m) {
+  if (!m->try_lock())
+    return false;
+  m->unlock();
+  return true;
+}
+
+template<typename Mutex>
+static void test_lock() {
+  Mutex m;
+  auto ttl = &test_try_lock<Mutex>;
+
+  m.lock();
+  ASSERT_TRUE(m.is_locked());
+  auto f1 = std::async(std::launch::async, ttl, &m);
+  ASSERT_FALSE(f1.get());
+
+  ASSERT_TRUE(m.is_locked());
+  ASSERT_TRUE(!!m);
+
+  m.unlock();
+  ASSERT_FALSE(m.is_locked());
+  ASSERT_FALSE(!!m);
+
+  auto f3 = std::async(std::launch::async, ttl, &m);
+  ASSERT_TRUE(f3.get());
+
+  ASSERT_FALSE(m.is_locked());
+  ASSERT_FALSE(!!m);
+}
+
+TEST(MutexDebug, Lock) {
+  test_lock<ceph::mutex_debug>();
+}
+
+TEST(MutexDebug, NotRecursive) {
+  ceph::mutex_debug m;
+  auto ttl = &test_try_lock<mutex_debug>;
+
+  ASSERT_NO_THROW(m.lock());
+  ASSERT_TRUE(m.is_locked());
+  ASSERT_FALSE(std::async(std::launch::async, ttl, &m).get());
+
+  ASSERT_THROW(m.lock(), std::system_error);
+  ASSERT_TRUE(m.is_locked());
+  ASSERT_FALSE(std::async(std::launch::async, ttl, &m).get());
+
+  ASSERT_NO_THROW(m.unlock());
+  ASSERT_FALSE(m.is_locked());
+  ASSERT_TRUE(std::async(std::launch::async, ttl, &m).get());
+}
+
+TEST(MutexRecursiveDebug, Lock) {
+  test_lock<ceph::mutex_recursive_debug>();
+}
+
+
+TEST(MutexRecursiveDebug, Recursive) {
+  ceph::mutex_recursive_debug m;
+  auto ttl = &test_try_lock<mutex_recursive_debug>;
+
+  ASSERT_NO_THROW(m.lock());
+  ASSERT_TRUE(m.is_locked());
+  ASSERT_FALSE(std::async(std::launch::async, ttl, &m).get());
+
+  ASSERT_NO_THROW(m.lock());
+  ASSERT_TRUE(m.is_locked());
+  ASSERT_FALSE(std::async(std::launch::async, ttl, &m).get());
+
+  ASSERT_NO_THROW(m.unlock());
+  ASSERT_TRUE(m.is_locked());
+  ASSERT_FALSE(std::async(std::launch::async, ttl, &m).get());
+
+  ASSERT_NO_THROW(m.unlock());
+  ASSERT_FALSE(m.is_locked());
+  ASSERT_TRUE(std::async(std::launch::async, ttl, &m).get());
+}
diff --git a/src/test/common/test_prioritized_queue.cc b/src/test/common/test_prioritized_queue.cc
index 00709a1..84ccf9e 100644
--- a/src/test/common/test_prioritized_queue.cc
+++ b/src/test/common/test_prioritized_queue.cc
@@ -6,7 +6,7 @@
 
 #include <numeric>
 #include <vector>
-
+#include <algorithm>
 
 using std::vector;
 
@@ -23,7 +23,7 @@ protected:
     for (int i = 0; i < item_size; i++) {
       items.push_back(Item(i));
     }
-    random_shuffle(items.begin(), items.end());
+    std::random_shuffle(items.begin(), items.end());
   }
   virtual void TearDown() {
     items.clear();
@@ -164,7 +164,7 @@ TEST_F(PrioritizedQueueTest, fairness_by_class) {
 template <typename T>
 struct Greater {
   const T rhs;
-  Greater(const T& v) : rhs(v)
+  explicit Greater(const T& v) : rhs(v)
   {}
   bool operator()(const T& lhs) const {
     return lhs > rhs;
diff --git a/src/test/common/test_shunique_lock.cc b/src/test/common/test_shunique_lock.cc
new file mode 100644
index 0000000..77f9708
--- /dev/null
+++ b/src/test/common/test_shunique_lock.cc
@@ -0,0 +1,576 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 &smarttab
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2011 New Dream Network
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License version 2, as published by the Free Software
+ * Foundation.  See file COPYING.
+ *
+ */
+
+#include <future>
+#include <mutex>
+#include <thread>
+
+#include <boost/thread/shared_mutex.hpp>
+
+#include "common/ceph_time.h"
+#include "common/shunique_lock.h"
+
+#include "gtest/gtest.h"
+
+template<typename SharedMutex>
+static bool test_try_lock(SharedMutex* sm) {
+  if (!sm->try_lock())
+    return false;
+  sm->unlock();
+  return true;
+}
+
+template<typename SharedMutex>
+static bool test_try_lock_shared(SharedMutex* sm) {
+  if (!sm->try_lock_shared())
+    return false;
+  sm->unlock_shared();
+  return true;
+}
+
+template<typename SharedMutex, typename AcquireType>
+static void check_conflicts(SharedMutex sm, AcquireType) {
+}
+
+template<typename SharedMutex>
+static void ensure_conflicts(SharedMutex& sm, ceph::acquire_unique_t) {
+  auto ttl = &test_try_lock<boost::shared_mutex>;
+  auto ttls = &test_try_lock_shared<boost::shared_mutex>;
+  ASSERT_FALSE(std::async(std::launch::async, ttl, &sm).get());
+  ASSERT_FALSE(std::async(std::launch::async, ttls, &sm).get());
+}
+
+template<typename SharedMutex>
+static void ensure_conflicts(SharedMutex& sm, ceph::acquire_shared_t) {
+  auto ttl = &test_try_lock<boost::shared_mutex>;
+  auto ttls = &test_try_lock_shared<boost::shared_mutex>;
+  ASSERT_FALSE(std::async(std::launch::async, ttl, &sm).get());
+  ASSERT_TRUE(std::async(std::launch::async, ttls, &sm).get());
+}
+
+template<typename SharedMutex>
+static void ensure_free(SharedMutex& sm) {
+  auto ttl = &test_try_lock<boost::shared_mutex>;
+  auto ttls = &test_try_lock_shared<boost::shared_mutex>;
+  ASSERT_TRUE(std::async(std::launch::async, ttl, &sm).get());
+  ASSERT_TRUE(std::async(std::launch::async, ttls, &sm).get());
+}
+
+template<typename SharedMutex, typename AcquireType>
+static void check_owns_lock(const SharedMutex& sm,
+			    const ceph::shunique_lock<SharedMutex>& sul,
+			    AcquireType) {
+}
+
+template<typename SharedMutex>
+static void check_owns_lock(const SharedMutex& sm,
+			    const ceph::shunique_lock<SharedMutex>& sul,
+			    ceph::acquire_unique_t) {
+  ASSERT_TRUE(sul.mutex() == &sm);
+  ASSERT_TRUE(sul.owns_lock());
+  ASSERT_TRUE(!!sul);
+}
+
+template<typename SharedMutex>
+static void check_owns_lock(const SharedMutex& sm,
+			    const ceph::shunique_lock<SharedMutex>& sul,
+			    ceph::acquire_shared_t) {
+  ASSERT_TRUE(sul.owns_lock_shared());
+  ASSERT_TRUE(!!sul);
+}
+
+template<typename SharedMutex>
+static void check_abjures_lock(const SharedMutex& sm,
+			       const ceph::shunique_lock<SharedMutex>& sul) {
+  ASSERT_EQ(sul.mutex(), &sm);
+  ASSERT_FALSE(sul.owns_lock());
+  ASSERT_FALSE(sul.owns_lock_shared());
+  ASSERT_FALSE(!!sul);
+}
+
+template<typename SharedMutex>
+static void check_abjures_lock(const ceph::shunique_lock<SharedMutex>& sul) {
+  ASSERT_EQ(sul.mutex(), nullptr);
+  ASSERT_FALSE(sul.owns_lock());
+  ASSERT_FALSE(sul.owns_lock_shared());
+  ASSERT_FALSE(!!sul);
+}
+
+TEST(ShuniqueLock, DefaultConstructor) {
+  typedef ceph::shunique_lock<boost::shared_mutex> shunique_lock;
+
+  shunique_lock l;
+
+  ASSERT_EQ(l.mutex(), nullptr);
+  ASSERT_FALSE(l.owns_lock());
+  ASSERT_FALSE(!!l);
+
+  ASSERT_THROW(l.lock(), std::system_error);
+  ASSERT_THROW(l.try_lock(), std::system_error);
+
+  ASSERT_THROW(l.lock_shared(), std::system_error);
+  ASSERT_THROW(l.try_lock_shared(), std::system_error);
+
+  ASSERT_THROW(l.unlock(), std::system_error);
+
+  ASSERT_EQ(l.mutex(), nullptr);
+  ASSERT_FALSE(l.owns_lock());
+  ASSERT_FALSE(!!l);
+
+  ASSERT_EQ(l.release(), nullptr);
+
+  ASSERT_EQ(l.mutex(), nullptr);
+  ASSERT_FALSE(l.owns_lock());
+  ASSERT_FALSE(l.owns_lock_shared());
+  ASSERT_FALSE(!!l);
+}
+
+template<typename AcquireType>
+void lock_unlock(AcquireType at) {
+  boost::shared_mutex sm;
+  typedef ceph::shunique_lock<boost::shared_mutex> shunique_lock;
+
+  shunique_lock l(sm, at);
+
+  check_owns_lock(sm, l, at);
+  ensure_conflicts(sm, at);
+
+  l.unlock();
+
+  check_abjures_lock(sm, l);
+  ensure_free(sm);
+
+  l.lock(at);
+
+  check_owns_lock(sm, l, at);
+  ensure_conflicts(sm, at);
+}
+
+TEST(ShuniqueLock, LockUnlock) {
+  lock_unlock(ceph::acquire_unique);
+  lock_unlock(ceph::acquire_shared);
+}
+
+template<typename AcquireType>
+void lock_destruct(AcquireType at) {
+  boost::shared_mutex sm;
+  typedef ceph::shunique_lock<boost::shared_mutex> shunique_lock;
+
+  {
+    shunique_lock l(sm, at);
+
+    check_owns_lock(sm, l, at);
+    ensure_conflicts(sm, at);
+  }
+
+  ensure_free(sm);
+}
+
+TEST(ShuniqueLock, LockDestruct) {
+  lock_destruct(ceph::acquire_unique);
+  lock_destruct(ceph::acquire_shared);
+}
+
+template<typename AcquireType>
+void move_construct(AcquireType at) {
+  boost::shared_mutex sm;
+
+  typedef ceph::shunique_lock<boost::shared_mutex> shunique_lock;
+
+  {
+    shunique_lock l(sm, at);
+
+    check_owns_lock(sm, l, at);
+    ensure_conflicts(sm, at);
+
+    shunique_lock o(std::move(l));
+
+    check_abjures_lock(l);
+
+    check_owns_lock(sm, o, at);
+    ensure_conflicts(sm, at);
+
+    o.unlock();
+
+    shunique_lock c(std::move(o));
+
+
+    ASSERT_EQ(o.mutex(), nullptr);
+    ASSERT_FALSE(!!o);
+
+    check_abjures_lock(sm, c);
+
+    ensure_free(sm);
+  }
+}
+
+TEST(ShuniqueLock, MoveConstruct) {
+  move_construct(ceph::acquire_unique);
+  move_construct(ceph::acquire_shared);
+
+  boost::shared_mutex sm;
+  {
+    std::unique_lock<boost::shared_mutex> ul(sm);
+    ensure_conflicts(sm, ceph::acquire_unique);
+    ceph::shunique_lock<boost::shared_mutex> l(std::move(ul));
+    check_owns_lock(sm, l, ceph::acquire_unique);
+    ensure_conflicts(sm, ceph::acquire_unique);
+  }
+  {
+    std::unique_lock<boost::shared_mutex> ul(sm, std::defer_lock);
+    ensure_free(sm);
+    ceph::shunique_lock<boost::shared_mutex> l(std::move(ul));
+    check_abjures_lock(sm, l);
+    ensure_free(sm);
+  }
+  {
+    std::unique_lock<boost::shared_mutex> ul;
+    ceph::shunique_lock<boost::shared_mutex> l(std::move(ul));
+    check_abjures_lock(l);
+  }
+  {
+    boost::shared_lock<boost::shared_mutex> sl(sm);
+    ensure_conflicts(sm, ceph::acquire_shared);
+    ceph::shunique_lock<boost::shared_mutex> l(std::move(sl));
+    check_owns_lock(sm, l, ceph::acquire_shared);
+    ensure_conflicts(sm, ceph::acquire_shared);
+  }
+  {
+    boost::shared_lock<boost::shared_mutex> sl;
+    ceph::shunique_lock<boost::shared_mutex> l(std::move(sl));
+    check_abjures_lock(l);
+  }
+}
+
+template<typename AcquireType>
+void move_assign(AcquireType at) {
+  boost::shared_mutex sm;
+
+  typedef ceph::shunique_lock<boost::shared_mutex> shunique_lock;
+
+  {
+    shunique_lock l(sm, at);
+
+    check_owns_lock(sm, l, at);
+    ensure_conflicts(sm, at);
+
+    shunique_lock o;
+
+    o = std::move(l);
+
+    check_abjures_lock(l);
+
+    check_owns_lock(sm, o, at);
+    ensure_conflicts(sm, at);
+
+    o.unlock();
+
+    shunique_lock c(std::move(o));
+
+    check_abjures_lock(o);
+    check_abjures_lock(sm, c);
+
+    ensure_free(sm);
+
+    shunique_lock k;
+
+    c = std::move(k);
+
+    check_abjures_lock(k);
+    check_abjures_lock(c);
+
+    ensure_free(sm);
+  }
+}
+
+TEST(ShuniqueLock, MoveAssign) {
+  move_assign(ceph::acquire_unique);
+  move_assign(ceph::acquire_shared);
+
+  boost::shared_mutex sm;
+  {
+    std::unique_lock<boost::shared_mutex> ul(sm);
+    ensure_conflicts(sm, ceph::acquire_unique);
+    ceph::shunique_lock<boost::shared_mutex> l;
+    l = std::move(ul);
+    check_owns_lock(sm, l, ceph::acquire_unique);
+    ensure_conflicts(sm, ceph::acquire_unique);
+  }
+  {
+    std::unique_lock<boost::shared_mutex> ul(sm, std::defer_lock);
+    ensure_free(sm);
+    ceph::shunique_lock<boost::shared_mutex> l;
+    l = std::move(ul);
+    check_abjures_lock(sm, l);
+    ensure_free(sm);
+  }
+  {
+    std::unique_lock<boost::shared_mutex> ul;
+    ceph::shunique_lock<boost::shared_mutex> l;
+    l = std::move(ul);
+    check_abjures_lock(l);
+  }
+  {
+    boost::shared_lock<boost::shared_mutex> sl(sm);
+    ensure_conflicts(sm, ceph::acquire_shared);
+    ceph::shunique_lock<boost::shared_mutex> l;
+    l = std::move(sl);
+    check_owns_lock(sm, l, ceph::acquire_shared);
+    ensure_conflicts(sm, ceph::acquire_shared);
+  }
+  {
+    boost::shared_lock<boost::shared_mutex> sl;
+    ceph::shunique_lock<boost::shared_mutex> l;
+    l = std::move(sl);
+    check_abjures_lock(l);
+  }
+
+}
+
+template<typename AcquireType>
+void construct_deferred(AcquireType at) {
+  boost::shared_mutex sm;
+
+  typedef ceph::shunique_lock<boost::shared_mutex> shunique_lock;
+
+  {
+    shunique_lock l(sm, std::defer_lock);
+    check_abjures_lock(sm, l);
+    ensure_free(sm);
+
+    ASSERT_THROW(l.unlock(), std::system_error);
+
+    check_abjures_lock(sm, l);
+    ensure_free(sm);
+
+    l.lock(at);
+    check_owns_lock(sm, l, at);
+    ensure_conflicts(sm, at);
+  }
+
+  {
+    shunique_lock l(sm, std::defer_lock);
+    check_abjures_lock(sm, l);
+    ensure_free(sm);
+
+    ASSERT_THROW(l.unlock(), std::system_error);
+
+    check_abjures_lock(sm, l);
+    ensure_free(sm);
+  }
+  ensure_free(sm);
+}
+
+TEST(ShuniqueLock, ConstructDeferred) {
+  construct_deferred(ceph::acquire_unique);
+  construct_deferred(ceph::acquire_shared);
+}
+
+template<typename AcquireType>
+void construct_try(AcquireType at) {
+  boost::shared_mutex sm;
+  typedef ceph::shunique_lock<boost::shared_mutex> shunique_lock;
+
+  {
+    shunique_lock l(sm, at, std::try_to_lock);
+    check_owns_lock(sm, l, at);
+    ensure_conflicts(sm, at);
+  }
+
+  {
+    std::unique_lock<boost::shared_mutex> l(sm);
+    ensure_conflicts(sm, ceph::acquire_unique);
+
+    std::async(std::launch::async, [&sm, at]() {
+	shunique_lock l(sm, at, std::try_to_lock);
+	check_abjures_lock(sm, l);
+	ensure_conflicts(sm, ceph::acquire_unique);
+      }).get();
+
+    l.unlock();
+
+    std::async(std::launch::async, [&sm, at]() {
+	shunique_lock l(sm, at, std::try_to_lock);
+	check_owns_lock(sm, l, at);
+	ensure_conflicts(sm, at);
+      }).get();
+  }
+}
+
+TEST(ShuniqueLock, ConstructTry) {
+  construct_try(ceph::acquire_unique);
+  construct_try(ceph::acquire_shared);
+}
+
+template<typename AcquireType>
+void construct_adopt(AcquireType at) {
+  boost::shared_mutex sm;
+
+  typedef ceph::shunique_lock<boost::shared_mutex> shunique_lock;
+
+  {
+    shunique_lock d(sm, at);
+    d.release();
+  }
+
+  ensure_conflicts(sm, at);
+
+  {
+    shunique_lock l(sm, at, std::adopt_lock);
+    check_owns_lock(sm, l, at);
+    ensure_conflicts(sm, at);
+  }
+
+  ensure_free(sm);
+}
+
+TEST(ShuniqueLock, ConstructAdopt) {
+  construct_adopt(ceph::acquire_unique);
+  construct_adopt(ceph::acquire_shared);
+}
+
+template<typename AcquireType>
+void try_lock(AcquireType at) {
+  boost::shared_mutex sm;
+
+  typedef ceph::shunique_lock<boost::shared_mutex> shunique_lock;
+
+  {
+    shunique_lock l(sm, std::defer_lock);
+    l.try_lock(at);
+
+    check_owns_lock(sm, l, at);
+    ensure_conflicts(sm, at);
+  }
+
+  {
+    std::unique_lock<boost::shared_mutex> l(sm);
+
+    std::async(std::launch::async, [&sm, at]() {
+	shunique_lock l(sm, std::defer_lock);
+	l.try_lock(at);
+
+	check_abjures_lock(sm, l);
+	ensure_conflicts(sm, ceph::acquire_unique);
+      }).get();
+
+
+    l.unlock();
+    std::async(std::launch::async, [&sm, at]() {
+	shunique_lock l(sm, std::defer_lock);
+	l.try_lock(at);
+
+	check_owns_lock(sm, l, at);
+	ensure_conflicts(sm, at);
+      }).get();
+  }
+}
+
+TEST(ShuniqueLock, TryLock) {
+  try_lock(ceph::acquire_unique);
+  try_lock(ceph::acquire_shared);
+}
+
+TEST(ShuniqueLock, Release) {
+  boost::shared_mutex sm;
+  typedef ceph::shunique_lock<boost::shared_mutex> shunique_lock;
+
+  {
+    shunique_lock l(sm, ceph::acquire_unique);
+    check_owns_lock(sm, l, ceph::acquire_unique);
+    ensure_conflicts(sm, ceph::acquire_unique);
+
+    l.release();
+    check_abjures_lock(l);
+    ensure_conflicts(sm, ceph::acquire_unique);
+  }
+  ensure_conflicts(sm, ceph::acquire_unique);
+  sm.unlock();
+  ensure_free(sm);
+
+  {
+    shunique_lock l(sm, ceph::acquire_shared);
+    check_owns_lock(sm, l, ceph::acquire_shared);
+    ensure_conflicts(sm, ceph::acquire_shared);
+
+    l.release();
+    check_abjures_lock(l);
+    ensure_conflicts(sm, ceph::acquire_shared);
+  }
+  ensure_conflicts(sm, ceph::acquire_shared);
+  sm.unlock_shared();
+  ensure_free(sm);
+
+  sm.lock();
+  {
+    shunique_lock l(sm, std::defer_lock);
+    check_abjures_lock(sm, l);
+    ensure_conflicts(sm, ceph::acquire_unique);
+
+    l.release();
+    check_abjures_lock(l);
+    ensure_conflicts(sm, ceph::acquire_unique);
+  }
+  ensure_conflicts(sm, ceph::acquire_unique);
+  sm.unlock();
+
+  ensure_free(sm);
+
+  {
+    std::unique_lock<boost::shared_mutex> ul;
+    shunique_lock l(sm, std::defer_lock);
+    check_abjures_lock(sm, l);
+    ensure_free(sm);
+
+    ASSERT_NO_THROW(ul = l.release_to_unique());
+    check_abjures_lock(l);
+    ASSERT_EQ(ul.mutex(), &sm);
+    ASSERT_FALSE(ul.owns_lock());
+    ensure_free(sm);
+  }
+  ensure_free(sm);
+
+  {
+    std::unique_lock<boost::shared_mutex> ul;
+    shunique_lock l;
+    check_abjures_lock(l);
+
+    ASSERT_NO_THROW(ul = l.release_to_unique());
+    check_abjures_lock(l);
+    ASSERT_EQ(ul.mutex(), nullptr);
+    ASSERT_FALSE(ul.owns_lock());
+  }
+}
+
+TEST(ShuniqueLock, NoRecursion) {
+  boost::shared_mutex sm;
+
+  typedef ceph::shunique_lock<boost::shared_mutex> shunique_lock;
+
+  {
+    shunique_lock l(sm, ceph::acquire_unique);
+    ASSERT_THROW(l.lock(), std::system_error);
+    ASSERT_THROW(l.try_lock(), std::system_error);
+    ASSERT_THROW(l.lock_shared(), std::system_error);
+    ASSERT_THROW(l.try_lock_shared(), std::system_error);
+  }
+
+  {
+    shunique_lock l(sm, ceph::acquire_shared);
+    ASSERT_THROW(l.lock(), std::system_error);
+    ASSERT_THROW(l.try_lock(), std::system_error);
+    ASSERT_THROW(l.lock_shared(), std::system_error);
+    ASSERT_THROW(l.try_lock_shared(), std::system_error);
+  }
+}
diff --git a/src/test/common/test_str_map.cc b/src/test/common/test_str_map.cc
index 5a324ba..e96c792 100644
--- a/src/test/common/test_str_map.cc
+++ b/src/test/common/test_str_map.cc
@@ -58,7 +58,7 @@ TEST(str_map, plaintext) {
   }
   {
     map<string,string> str_map;
-    ASSERT_EQ(0, get_str_map(" key1=val1; key2=\tval2; key3\t = \t val3; \n ", "\n;", &str_map));
+    ASSERT_EQ(0, get_str_map(" key1=val1; key2=\tval2; key3\t = \t val3; \n ", &str_map, "\n;"));
     ASSERT_EQ(4u, str_map.size());
     ASSERT_EQ("val1", str_map["key1"]);
     ASSERT_EQ("val2", str_map["key2"]);
diff --git a/src/test/common/test_time.cc b/src/test/common/test_time.cc
index e363085..2e6ad4b 100644
--- a/src/test/common/test_time.cc
+++ b/src/test/common/test_time.cc
@@ -56,8 +56,7 @@ static constexpr double bd = bs + ((double)bns / 1000000000.);
 
 template<typename Clock>
 static void system_clock_sanity() {
-  static constexpr typename Clock::time_point brt(seconds(bs)
-						  + nanoseconds(bns));
+  static const typename Clock::time_point brt(seconds(bs) + nanoseconds(bns));
   const typename Clock::time_point now(Clock::now());
 
   ASSERT_GT(now, brt);
diff --git a/src/test/common/test_weighted_priority_queue.cc b/src/test/common/test_weighted_priority_queue.cc
new file mode 100644
index 0000000..d3ebc41
--- /dev/null
+++ b/src/test/common/test_weighted_priority_queue.cc
@@ -0,0 +1,287 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "gtest/gtest.h"
+#include "common/Formatter.h"
+#include "common/WeightedPriorityQueue.h"
+
+#include <numeric>
+#include <vector>
+#include <map>
+#include <list>
+#include <tuple>
+
+#define CEPH_OP_CLASS_STRICT	0
+#define CEPH_OP_CLASS_NORMAL	0
+#define CEPH_OP_QUEUE_BACK	0
+#define CEPH_OP_QUEUE_FRONT	0
+
+class WeightedPriorityQueueTest : public testing::Test
+{
+protected:
+  typedef unsigned Klass;
+  // tuple<Prio, Klass, OpID> so that we can verfiy the op
+  typedef std::tuple<unsigned, unsigned, unsigned> Item;
+  typedef unsigned Prio;
+  typedef unsigned Kost;
+  typedef WeightedPriorityQueue<Item, Klass> WQ;
+  // Simulate queue structure
+  typedef std::list<std::pair<Kost, Item> > ItemList;
+  typedef std::map<Klass, ItemList> KlassItem;
+  typedef std::map<Prio, KlassItem> LQ;
+  typedef std::list<Item> Removed;
+  const unsigned max_prios = 5; // (0-4) * 64
+  const unsigned klasses = 37;  // Make prime to help get good coverage
+
+  void fill_queue(WQ &wq, LQ &strictq, LQ &normq,
+      unsigned item_size, bool randomize = false) {
+    unsigned p, k, c, o, op_queue, fob;
+    for (unsigned i = 1; i <= item_size; ++i) {
+      // Choose priority, class, cost and 'op' for this op.
+      if (randomize) {
+        p = (rand() % max_prios) * 64;
+        k = rand() % klasses;
+        c = rand() % (1<<22);  // 4M cost
+        // Make some of the costs 0, but make sure small costs
+        // still work ok.
+        if (c > (1<<19) && c < (1<<20)) {
+          c = 0;
+	}
+        op_queue = rand() % 10;
+        fob = rand() % 10;
+      } else {
+        p = (i % max_prios) * 64;
+        k = i % klasses;
+        c = (i % 8 == 0 || i % 16 == 0) ? 0 : 1 << (i % 23);
+        op_queue = i % 7; // Use prime numbers to
+        fob = i % 11;     // get better coverage
+      }
+      o = rand() % (1<<16);
+      // Choose how to enqueue this op.
+      switch (op_queue) {
+      case 6 :
+        // Strict Queue
+        if (fob == 4) {
+	  // Queue to the front.
+	  strictq[p][k].push_front(std::make_pair(
+  	    c, std::make_tuple(p, k, o)));
+	  wq.enqueue_strict_front(Klass(k), p, std::make_tuple(p, k, o));
+        } else {
+	  //Queue to the back.
+	  strictq[p][k].push_back(std::make_pair(
+	    c, std::make_tuple(p, k, o)));
+	  wq.enqueue_strict(Klass(k), p, std::make_tuple(p, k, o));
+        }
+        break;
+      default:
+        // Normal queue
+        if (fob == 4) {
+	  // Queue to the front.
+	  normq[p][k].push_front(std::make_pair(
+	    c, std::make_tuple(p, k, o)));
+	  wq.enqueue_front(Klass(k), p, c, std::make_tuple(p, k, o));
+        } else {
+	  //Queue to the back.
+	  normq[p][k].push_back(std::make_pair(
+	    c, std::make_tuple(p, k, o)));
+	  wq.enqueue(Klass(k), p, c, std::make_tuple(p, k, o));
+        }
+        break;
+      }
+    }
+  }
+  void test_queue(unsigned item_size, bool randomize = false) {
+    // Due to the WRR queue having a lot of probabilistic logic
+    // we can't determine the exact order OPs will be dequeued.
+    // However, the queue should not dequeue a priority out of
+    // order. It should also dequeue the strict priority queue
+    // first and in order. In both the strict and normal queues
+    // push front and back should be respected. Here we keep
+    // track of the ops queued and make sure they dequeue
+    // correctly.
+  
+    // Set up local tracking queues
+    WQ wq(0, 0);
+    LQ strictq, normq;
+    fill_queue(wq, strictq, normq, item_size, randomize);
+    // Test that the queue is dequeuing properly.
+    typedef std::map<unsigned, unsigned> LastKlass;
+    LastKlass last_strict, last_norm;
+    while (!(wq.empty())) {
+      Item r = wq.dequeue();
+      if (!(strictq.empty())) {
+        // Check that there are no higher priorities
+        // in the strict queue.
+        LQ::reverse_iterator ri = strictq.rbegin();
+        EXPECT_EQ(std::get<0>(r), ri->first);
+        // Check that if there are multiple classes in a priority
+        // that it is not dequeueing the same class each time.
+        LastKlass::iterator si = last_strict.find(std::get<0>(r));
+        if (strictq[std::get<0>(r)].size() > 1 && si != last_strict.end()) {
+	  EXPECT_NE(std::get<1>(r), si->second);
+	}
+        last_strict[std::get<0>(r)] = std::get<1>(r);
+
+	Item t = strictq[std::get<0>(r)][std::get<1>(r)].front().second;
+        EXPECT_EQ(std::get<2>(r), std::get<2>(t));
+        strictq[std::get<0>(r)][std::get<1>(r)].pop_front();
+        if (strictq[std::get<0>(r)][std::get<1>(r)].empty()) {
+	  strictq[std::get<0>(r)].erase(std::get<1>(r));
+	}
+        if (strictq[std::get<0>(r)].empty()) {
+	  strictq.erase(std::get<0>(r));
+	}
+      } else {
+        // Check that if there are multiple classes in a priority
+        // that it is not dequeueing the same class each time.
+        LastKlass::iterator si = last_norm.find(std::get<0>(r));
+        if (normq[std::get<0>(r)].size() > 1 && si != last_norm.end()) {
+	  EXPECT_NE(std::get<1>(r), si->second);
+	}
+        last_norm[std::get<0>(r)] = std::get<1>(r);
+
+	Item t = normq[std::get<0>(r)][std::get<1>(r)].front().second;
+        EXPECT_EQ(std::get<2>(r), std::get<2>(t));
+        normq[std::get<0>(r)][std::get<1>(r)].pop_front();
+        if (normq[std::get<0>(r)][std::get<1>(r)].empty()) {
+	  normq[std::get<0>(r)].erase(std::get<1>(r));
+	}
+        if (normq[std::get<0>(r)].empty()) {
+	  normq.erase(std::get<0>(r));
+	}
+      }
+    }
+  }
+
+  virtual void SetUp() {
+    srand(time(0));
+  }
+  virtual void TearDown() {
+  }
+};
+
+TEST_F(WeightedPriorityQueueTest, wpq_size){
+  WQ wq(0, 0);
+  EXPECT_TRUE(wq.empty());
+  EXPECT_EQ(0u, wq.length());
+
+  // Test the strict queue size.
+  for (unsigned i = 1; i < 5; ++i) {
+    wq.enqueue_strict(Klass(i),i, std::make_tuple(i, i, i));
+    EXPECT_FALSE(wq.empty());
+    EXPECT_EQ(i, wq.length());
+  }
+  // Test the normal queue size.
+  for (unsigned i = 5; i < 10; ++i) {
+    wq.enqueue(Klass(i), i, i, std::make_tuple(i, i, i));
+    EXPECT_FALSE(wq.empty());
+    EXPECT_EQ(i, wq.length());
+  }
+  // Test that as both queues are emptied
+  // the size is correct.
+  for (unsigned i = 8; i >0; --i) {
+    wq.dequeue();
+    EXPECT_FALSE(wq.empty());
+    EXPECT_EQ(i, wq.length());
+  }
+  wq.dequeue();
+  EXPECT_TRUE(wq.empty());
+  EXPECT_EQ(0u, wq.length());
+}
+
+TEST_F(WeightedPriorityQueueTest, wpq_test_static) {
+  test_queue(1000);
+} 
+
+TEST_F(WeightedPriorityQueueTest, wpq_test_random) {
+  test_queue(rand() % 500 + 500, true);
+} 
+
+template <typename T>
+struct Greater {
+  const T rhs;
+  Greater(const T &v) : rhs(v) {}
+  bool operator()(const T &lhs) const {
+    return std::get<2>(lhs) > std::get<2>(rhs);
+  }
+};
+
+TEST_F(WeightedPriorityQueueTest, wpq_test_remove_by_filter) {
+  WQ wq(0, 0);
+  LQ strictq, normq;
+  unsigned num_items = 1000;
+  fill_queue(wq, strictq, normq, num_items);
+  const Greater<Item> pred(std::make_tuple(0, 0, (1 << 16) - (1 << 16)/10));
+  Removed r_strictq, r_normq;
+  unsigned num_to_remove = 0;
+  // Figure out from what has been queued what we
+  // expect to be removed
+  for (LQ::iterator pi = strictq.begin();
+       pi != strictq.end(); ++pi) {
+    for (KlassItem::iterator ki = pi->second.begin();
+	 ki != pi->second.end(); ++ki) {
+      for (ItemList::iterator li = ki->second.begin();
+	   li != ki->second.end(); ++li) {
+	if (pred(li->second)) {
+	  ++num_to_remove;
+	}
+      }
+    }
+  }
+  for (LQ::iterator pi = normq.begin();
+       pi != normq.end(); ++pi) {
+    for (KlassItem::iterator ki = pi->second.begin();
+	 ki != pi->second.end(); ++ki) {
+      for (ItemList::iterator li = ki->second.begin();
+	   li != ki->second.end(); ++li) {
+	if (pred(li->second)) {
+	  ++num_to_remove;
+	}
+      }
+    }
+  }
+  Removed wq_removed;
+  wq.remove_by_filter(pred, &wq_removed);
+  // Check that what was removed was correct
+  for (Removed::iterator it = wq_removed.begin();
+       it != wq_removed.end(); ++it) {
+    EXPECT_TRUE(pred(*it));
+  }
+  EXPECT_EQ(num_to_remove, wq_removed.size());
+  EXPECT_EQ(num_items - num_to_remove, wq.length());
+  // Make sure that none were missed
+  while (!(wq.empty())) {
+    EXPECT_FALSE(pred(wq.dequeue()));
+  }
+}
+
+TEST_F(WeightedPriorityQueueTest, wpq_test_remove_by_class) {
+  WQ wq(0, 0);
+  LQ strictq, normq;
+  unsigned num_items = 1000;
+  fill_queue(wq, strictq, normq, num_items);
+  unsigned num_to_remove = 0;
+  const Klass k = 5;
+  // Find how many ops are in the class
+  for (LQ::iterator it = strictq.begin();
+       it != strictq.end(); ++it) {
+    num_to_remove += it->second[k].size();
+  }
+  for (LQ::iterator it = normq.begin();
+       it != normq.end(); ++it) {
+    num_to_remove += it->second[k].size();
+  }
+  Removed wq_removed;
+  wq.remove_by_class(k, &wq_removed);
+  // Check that the right ops were removed.
+  EXPECT_EQ(num_to_remove, wq_removed.size());
+  EXPECT_EQ(num_items - num_to_remove, wq.length());
+  for (Removed::iterator it = wq_removed.begin();
+       it != wq_removed.end(); ++it) {
+    EXPECT_EQ(k, std::get<1>(*it));
+  }
+  // Check that none were missed
+  while (!(wq.empty())) {
+    EXPECT_NE(k, std::get<1>(wq.dequeue()));
+  }
+}
diff --git a/src/test/compressor/Makefile.am b/src/test/compressor/Makefile.am
index f5a8f4e..4a41354 100644
--- a/src/test/compressor/Makefile.am
+++ b/src/test/compressor/Makefile.am
@@ -45,5 +45,27 @@ unittest_compression_plugin_snappy_LDADD += -ldl
 endif
 check_TESTPROGRAMS += unittest_compression_plugin_snappy
 
+unittest_compression_zlib_SOURCES = \
+	test/compressor/test_compression_zlib.cc \
+	${zlib_sources}
+unittest_compression_zlib_CXXFLAGS = $(UNITTEST_CXXFLAGS)
+unittest_compression_zlib_LDADD = $(LIBOSD) $(LIBCOMMON) $(UNITTEST_LDADD) $(CEPH_GLOBAL)
+unittest_compression_zlib_LDFLAGS = -lz
+if LINUX
+unittest_compression_zlib_LDADD += -ldl
+endif
+check_TESTPROGRAMS += unittest_compression_zlib
+
+unittest_compression_plugin_zlib_SOURCES = \
+	test/compressor/test_compression_plugin_zlib.cc \
+	${zlib_sources}
+unittest_compression_plugin_zlib_CXXFLAGS = ${AM_CXXFLAGS} ${UNITTEST_CXXFLAGS}
+unittest_compression_plugin_zlib_LDADD = $(LIBOSD) $(LIBCOMMON) $(UNITTEST_LDADD) $(CEPH_GLOBAL) $(LIBCOMPRESSOR)
+unittest_compression_plugin_zlib_LDFLAGS = -lz
+if LINUX
+unittest_compression_plugin_zlib_LDADD += -ldl
+endif
+check_TESTPROGRAMS += unittest_compression_plugin_zlib
+
 endif # WITH_OSD
 endif # ENABLE_SERVER
diff --git a/src/test/compressor/compressor_example.h b/src/test/compressor/compressor_example.h
index 3a96f88..148c63a 100644
--- a/src/test/compressor/compressor_example.h
+++ b/src/test/compressor/compressor_example.h
@@ -31,13 +31,13 @@ class CompressorExample : public Compressor {
 public:
   virtual ~CompressorExample() {}
 
-  virtual int compress(bufferlist &in, bufferlist &out)
+  virtual int compress(const bufferlist &in, bufferlist &out)
   {
     out = in;
     return 0;
   }
 
-  virtual int decompress(bufferlist &in, bufferlist &out)
+  virtual int decompress(const bufferlist &in, bufferlist &out)
   {
     out = in;
     return 0;
diff --git a/src/test/compressor/compressor_plugin_example.cc b/src/test/compressor/compressor_plugin_example.cc
index 96914ee..1913553 100644
--- a/src/test/compressor/compressor_plugin_example.cc
+++ b/src/test/compressor/compressor_plugin_example.cc
@@ -23,7 +23,7 @@
 class CompressorPluginExample : public CompressionPlugin {
 public:
 
-  CompressorPluginExample(CephContext* cct) : CompressionPlugin(cct)
+  explicit CompressorPluginExample(CephContext* cct) : CompressionPlugin(cct)
   {}
 
   virtual int factory(CompressorRef *cs,
diff --git a/src/test/compressor/test_compression_snappy.cc b/src/test/compressor/test_compression_plugin_zlib.cc
similarity index 67%
copy from src/test/compressor/test_compression_snappy.cc
copy to src/test/compressor/test_compression_plugin_zlib.cc
index 69b0ebf..6cf1085 100644
--- a/src/test/compressor/test_compression_snappy.cc
+++ b/src/test/compressor/test_compression_plugin_zlib.cc
@@ -15,27 +15,19 @@
  */
 
 #include <errno.h>
-#include <string.h>
+#include <signal.h>
+#include <stdlib.h>
 #include <gtest/gtest.h>
 #include "global/global_init.h"
-#include "compressor/snappy/SnappyCompressor.h"
+#include "compressor/Compressor.h"
 #include "common/ceph_argparse.h"
 #include "global/global_context.h"
 #include "common/config.h"
 
-TEST(SnappyCompressor, compress_decompress)
+TEST(CompressionPluginZlib, all)
 {
-  SnappyCompressor sp;
-  EXPECT_EQ(sp.get_method_name(), "snappy");
-  char* test = "This is test text";
-  int len = strlen(test);
-  bufferlist in, out;
-  in.append(test, len);
-  int res = sp.compress(in, out);
-  EXPECT_EQ(res, 0);
-  bufferlist after;
-  res = sp.decompress(out, after);
-  EXPECT_EQ(res, 0);
+  CompressorRef compressor = Compressor::create(g_ceph_context, "zlib");
+  EXPECT_TRUE(compressor.get());
 }
 
 int main(int argc, char **argv) {
@@ -45,7 +37,10 @@ int main(int argc, char **argv) {
   global_init(NULL, args, CEPH_ENTITY_TYPE_CLIENT, CODE_ENVIRONMENT_UTILITY, 0);
   common_init_finish(g_ceph_context);
 
-  g_conf->set_val("compression_dir", ".libs", false, false);
+  system("mkdir -p .libs/compressor");
+  system("cp .libs/libceph_zlib.so* .libs/compressor/");
+
+  g_conf->set_val("plugin_dir", ".libs", false, false);
 
   ::testing::InitGoogleTest(&argc, argv);
   return RUN_ALL_TESTS();
@@ -54,9 +49,9 @@ int main(int argc, char **argv) {
 /*
  * Local Variables:
  * compile-command: "cd ../.. ; make -j4 && 
- *   make unittest_compression_snappy && 
+ *   make unittest_compression_plugin_zlib && 
  *   valgrind --tool=memcheck \
- *      ./unittest_compression_snappy \
+ *      ./unittest_compression_plugin_zlib \
  *      --gtest_filter=*.* --log-to-stderr=true --debug-osd=20"
  * End:
  */
diff --git a/src/test/compressor/test_compression_snappy.cc b/src/test/compressor/test_compression_snappy.cc
index 69b0ebf..556390b 100644
--- a/src/test/compressor/test_compression_snappy.cc
+++ b/src/test/compressor/test_compression_snappy.cc
@@ -27,7 +27,7 @@ TEST(SnappyCompressor, compress_decompress)
 {
   SnappyCompressor sp;
   EXPECT_EQ(sp.get_method_name(), "snappy");
-  char* test = "This is test text";
+  const char* test = "This is test text";
   int len = strlen(test);
   bufferlist in, out;
   in.append(test, len);
@@ -38,6 +38,38 @@ TEST(SnappyCompressor, compress_decompress)
   EXPECT_EQ(res, 0);
 }
 
+TEST(SnappyCompressor, sharded_input_decompress)
+{
+  const size_t small_prefix_size=3;
+
+  SnappyCompressor sp;
+  EXPECT_EQ(sp.get_method_name(), "snappy");
+  string test(128*1024,0);
+  int len = test.size();
+  bufferlist in, out;
+  in.append(test.c_str(), len);
+  int res = sp.compress(in, out);
+  EXPECT_EQ(res, 0);
+  EXPECT_GT(out.length(), small_prefix_size);
+  
+  bufferlist out2, tmp;
+  tmp.substr_of(out, 0, small_prefix_size );
+  out2.append( tmp );
+  size_t left = out.length()-small_prefix_size;
+  size_t offs = small_prefix_size;
+  while( left > 0 ){
+    size_t shard_size = MIN( 2048, left ); 
+    tmp.substr_of(out, offs, shard_size );
+    out2.append( tmp );
+    left -= shard_size;
+    offs += shard_size;
+  }
+
+  bufferlist after;
+  res = sp.decompress(out2, after);
+  EXPECT_EQ(res, 0);
+}
+
 int main(int argc, char **argv) {
   vector<const char*> args;
   argv_to_vec(argc, (const char **)argv, args);
diff --git a/src/test/compressor/test_compression_snappy.cc b/src/test/compressor/test_compression_zlib.cc
similarity index 66%
copy from src/test/compressor/test_compression_snappy.cc
copy to src/test/compressor/test_compression_zlib.cc
index 69b0ebf..aba2f44 100644
--- a/src/test/compressor/test_compression_snappy.cc
+++ b/src/test/compressor/test_compression_zlib.cc
@@ -18,15 +18,15 @@
 #include <string.h>
 #include <gtest/gtest.h>
 #include "global/global_init.h"
-#include "compressor/snappy/SnappyCompressor.h"
+#include "compressor/zlib/CompressionZlib.h"
 #include "common/ceph_argparse.h"
 #include "global/global_context.h"
 #include "common/config.h"
 
-TEST(SnappyCompressor, compress_decompress)
+TEST(CompressionZlib, compress_decompress)
 {
-  SnappyCompressor sp;
-  EXPECT_EQ(sp.get_method_name(), "snappy");
+  CompressionZlib sp;
+  EXPECT_EQ(sp.get_method_name(), "zlib");
   char* test = "This is test text";
   int len = strlen(test);
   bufferlist in, out;
@@ -36,6 +36,25 @@ TEST(SnappyCompressor, compress_decompress)
   bufferlist after;
   res = sp.decompress(out, after);
   EXPECT_EQ(res, 0);
+  EXPECT_STREQ(test, after.c_str());
+}
+
+TEST(CompressionZlib, compress_decompress_chunk)
+{
+  CompressionZlib sp;
+  EXPECT_EQ(sp.get_method_name(), "zlib");
+  char* test = "This is test text";
+  buffer::ptr test2 ("1234567890", 10);
+  int len = strlen(test);
+  bufferlist in, out;
+  in.append(test, len);
+  in.append(test2);
+  int res = sp.compress(in, out);
+  EXPECT_EQ(res, 0);
+  bufferlist after;
+  res = sp.decompress(out, after);
+  EXPECT_EQ(res, 0);
+  EXPECT_STREQ("This is test text1234567890", after.c_str());
 }
 
 int main(int argc, char **argv) {
@@ -54,9 +73,9 @@ int main(int argc, char **argv) {
 /*
  * Local Variables:
  * compile-command: "cd ../.. ; make -j4 && 
- *   make unittest_compression_snappy && 
+ *   make unittest_compression_zlib && 
  *   valgrind --tool=memcheck \
- *      ./unittest_compression_snappy \
+ *      ./unittest_compression_zlib \
  *      --gtest_filter=*.* --log-to-stderr=true --debug-osd=20"
  * End:
  */
diff --git a/src/test/daemon_config.cc b/src/test/daemon_config.cc
index 6d32e15..6187ee5 100644
--- a/src/test/daemon_config.cc
+++ b/src/test/daemon_config.cc
@@ -339,14 +339,6 @@ TEST(DaemonConfig, ThreadSafety1) {
 
 TEST(DaemonConfig, InvalidIntegers) {
   {
-    int ret = g_ceph_context->_conf->set_val("num_client", "-1");
-    ASSERT_EQ(ret, -EINVAL);
-  }
-  {
-    int ret = g_ceph_context->_conf->set_val("num_client", "-1K");
-    ASSERT_EQ(ret, -EINVAL);
-  }
-  {
     long long bad_value = (long long)std::numeric_limits<int>::max() + 1;
     string str = boost::lexical_cast<string>(bad_value);
     int ret = g_ceph_context->_conf->set_val("num_client", str);
diff --git a/src/test/encoding.cc b/src/test/encoding.cc
index 4f2b26c..da4d6fb 100644
--- a/src/test/encoding.cc
+++ b/src/test/encoding.cc
@@ -69,7 +69,7 @@ public:
     default_ctor++;
   }
 
-  ConstructorCounter(const T& data_)
+  explicit ConstructorCounter(const T& data_)
     : data(data_)
   {
     one_arg_ctor++;
diff --git a/src/test/encoding/ceph_dencoder.cc b/src/test/encoding/ceph_dencoder.cc
index 7a30ebd..119145a 100644
--- a/src/test/encoding/ceph_dencoder.cc
+++ b/src/test/encoding/ceph_dencoder.cc
@@ -349,7 +349,6 @@ int main(int argc, const char **argv)
 	exit(1);
       }
       features = atoi(*i);
-
     } else if (*i == string("encode")) {
       if (!den) {
 	cerr << "must first select type with 'type <name>'" << std::endl;
@@ -449,6 +448,11 @@ int main(int argc, const char **argv)
       int n = atoi(*i);
       err = den->select_generated(n);
     } else if (*i == string("is_deterministic")) {
+      if (!den) {
+	cerr << "must first select type with 'type <name>'" << std::endl;
+	usage(cerr);
+	exit(1);
+      }
       if (den->is_deterministic())
 	exit(0);
       else
diff --git a/src/test/encoding/test_ceph_time.h b/src/test/encoding/test_ceph_time.h
new file mode 100644
index 0000000..7adcb8f
--- /dev/null
+++ b/src/test/encoding/test_ceph_time.h
@@ -0,0 +1,30 @@
+#ifndef TEST_CEPH_TIME_H
+#define TEST_CEPH_TIME_H
+
+#include <list>
+#include "common/ceph_time.h"
+
+// wrapper for ceph::real_time that implements the dencoder interface
+class real_time_wrapper {
+  ceph::real_time t;
+ public:
+  real_time_wrapper() = default;
+  real_time_wrapper(const ceph::real_time& t) : t(t) {}
+
+  void encode(bufferlist& bl) const {
+    ::encode(t, bl);
+  }
+  void decode(bufferlist::iterator &p) {
+    ::decode(t, p);
+  }
+  void dump(Formatter* f) {
+    auto epoch_time = ceph::real_clock::to_time_t(t);
+    f->dump_string("time", std::ctime(&epoch_time));
+  }
+  static void generate_test_instances(std::list<real_time_wrapper*>& ls) {
+    constexpr time_t t{455500800}; // Ghostbusters release date
+    ls.push_back(new real_time_wrapper(ceph::real_clock::from_time_t(t)));
+  }
+};
+
+#endif
diff --git a/src/test/encoding/types.h b/src/test/encoding/types.h
index e63dc8d..41796f4 100644
--- a/src/test/encoding/types.h
+++ b/src/test/encoding/types.h
@@ -14,6 +14,9 @@ TYPE(BitVector<2>)
 TYPE(bloom_filter)
 TYPE(compressible_bloom_filter)
 
+#include "test_ceph_time.h"
+TYPE(real_time_wrapper)
+
 #include "common/snap_types.h"
 TYPE(SnapContext)
 TYPE(SnapRealmInfo)
@@ -236,8 +239,10 @@ TYPE(ETableServer)
 TYPE(EUpdate)
 
 #ifdef WITH_RBD
-#include "librbd/journal/Entries.h"
+#include "librbd/journal/Types.h"
 TYPE(librbd::journal::EventEntry)
+TYPE(librbd::journal::ClientData)
+TYPE(librbd::journal::TagData)
 #include "librbd/WatchNotifyTypes.h"
 TYPE(librbd::watch_notify::NotifyMessage)
 TYPE(librbd::watch_notify::ResponseMessage)
diff --git a/src/test/erasure-code/Makefile.am b/src/test/erasure-code/Makefile.am
index ed0a014..bdce080 100644
--- a/src/test/erasure-code/Makefile.am
+++ b/src/test/erasure-code/Makefile.am
@@ -42,7 +42,10 @@ test/erasure-code/ErasureCodePluginExample.cc: ./ceph_ver.h
 libec_example_la_CFLAGS = ${AM_CFLAGS}
 libec_example_la_CXXFLAGS= ${AM_CXXFLAGS}
 libec_example_la_LIBADD = $(LIBCRUSH) $(PTHREAD_LIBS) $(EXTRALIBS)
-libec_example_la_LDFLAGS = ${AM_LDFLAGS} -export-symbols-regex '.*__erasure_code_.*'
+libec_example_la_LDFLAGS = ${AM_LDFLAGS} -module -avoid-version -shared
+if LINUX
+libec_example_la_LDFLAGS += -export-symbols-regex '.*__erasure_code_.*'
+endif
 erasure_codelib_LTLIBRARIES += libec_example.la
 
 libec_missing_entry_point_la_SOURCES = test/erasure-code/ErasureCodePluginMissingEntryPoint.cc
@@ -50,14 +53,20 @@ test/erasure-code/ErasureCodePluginMissingEntryPoint.cc: ./ceph_ver.h
 libec_missing_entry_point_la_CFLAGS = ${AM_CFLAGS}
 libec_missing_entry_point_la_CXXFLAGS= ${AM_CXXFLAGS}
 libec_missing_entry_point_la_LIBADD = $(PTHREAD_LIBS) $(EXTRALIBS)
-libec_missing_entry_point_la_LDFLAGS = ${AM_LDFLAGS} -export-symbols-regex '.*__erasure_code_.*'
+libec_missing_entry_point_la_LDFLAGS = ${AM_LDFLAGS} -module -avoid-version -shared
+if LINUX
+libec_missing_entry_point_la_LDFLAGS += -export-symbols-regex '.*__erasure_code_.*'
+endif
 erasure_codelib_LTLIBRARIES += libec_missing_entry_point.la
 
 libec_missing_version_la_SOURCES = test/erasure-code/ErasureCodePluginMissingVersion.cc
 libec_missing_version_la_CFLAGS = ${AM_CFLAGS}
 libec_missing_version_la_CXXFLAGS= ${AM_CXXFLAGS}
 libec_missing_version_la_LIBADD = $(PTHREAD_LIBS) $(EXTRALIBS)
-libec_missing_version_la_LDFLAGS = ${AM_LDFLAGS} -export-symbols-regex '.*__erasure_code_.*'
+libec_missing_version_la_LDFLAGS = ${AM_LDFLAGS} -module -avoid-version -shared
+if LINUX
+libec_missing_version_la_LDFLAGS += -export-symbols-regex '.*__erasure_code_.*'
+endif
 erasure_codelib_LTLIBRARIES += libec_missing_version.la
 
 libec_hangs_la_SOURCES = test/erasure-code/ErasureCodePluginHangs.cc
@@ -65,7 +74,10 @@ test/erasure-code/ErasureCodePluginHangs.cc: ./ceph_ver.h
 libec_hangs_la_CFLAGS = ${AM_CFLAGS}
 libec_hangs_la_CXXFLAGS= ${AM_CXXFLAGS}
 libec_hangs_la_LIBADD = $(PTHREAD_LIBS) $(EXTRALIBS)
-libec_hangs_la_LDFLAGS = ${AM_LDFLAGS} -export-symbols-regex '.*__erasure_code_.*'
+libec_hangs_la_LDFLAGS = ${AM_LDFLAGS} -module -avoid-version -shared
+if LINUX
+libec_hangs_la_LDFLAGS += -export-symbols-regex '.*__erasure_code_.*'
+endif
 erasure_codelib_LTLIBRARIES += libec_hangs.la
 
 libec_fail_to_initialize_la_SOURCES = test/erasure-code/ErasureCodePluginFailToInitialize.cc
@@ -73,7 +85,10 @@ test/erasure-code/ErasureCodePluginFailToInitialize.cc: ./ceph_ver.h
 libec_fail_to_initialize_la_CFLAGS = ${AM_CFLAGS}
 libec_fail_to_initialize_la_CXXFLAGS= ${AM_CXXFLAGS}
 libec_fail_to_initialize_la_LIBADD = $(PTHREAD_LIBS) $(EXTRALIBS)
-libec_fail_to_initialize_la_LDFLAGS = ${AM_LDFLAGS} -export-symbols-regex '.*__erasure_code_.*'
+libec_fail_to_initialize_la_LDFLAGS = ${AM_LDFLAGS} -module -avoid-version -shared
+if LINUX
+libec_fail_to_initialize_la_LDFLAGS += -export-symbols-regex '.*__erasure_code_.*'
+endif
 erasure_codelib_LTLIBRARIES += libec_fail_to_initialize.la
 
 libec_fail_to_register_la_SOURCES = test/erasure-code/ErasureCodePluginFailToRegister.cc
@@ -81,7 +96,10 @@ test/erasure-code/ErasureCodePluginFailToRegister.cc: ./ceph_ver.h
 libec_fail_to_register_la_CFLAGS = ${AM_CFLAGS}
 libec_fail_to_register_la_CXXFLAGS= ${AM_CXXFLAGS}
 libec_fail_to_register_la_LIBADD = $(PTHREAD_LIBS) $(EXTRALIBS)
-libec_fail_to_register_la_LDFLAGS = ${AM_LDFLAGS} -export-symbols-regex '.*__erasure_code_.*'
+libec_fail_to_register_la_LDFLAGS = ${AM_LDFLAGS} -module -avoid-version -shared
+if LINUX
+libec_fail_to_register_la_LDFLAGS += -export-symbols-regex '.*__erasure_code_.*'
+endif
 erasure_codelib_LTLIBRARIES += libec_fail_to_register.la
 
 libec_test_jerasure_neon_la_SOURCES = test/erasure-code/TestJerasurePluginNEON.cc
@@ -89,7 +107,10 @@ test/erasure-code/TestJerasurePluginNEON.cc: ./ceph_ver.h
 libec_test_jerasure_neon_la_CFLAGS = ${AM_CFLAGS}
 libec_test_jerasure_neon_la_CXXFLAGS= ${AM_CXXFLAGS}
 libec_test_jerasure_neon_la_LIBADD = $(PTHREAD_LIBS) $(EXTRALIBS)
-libec_test_jerasure_neon_la_LDFLAGS = ${AM_LDFLAGS} -export-symbols-regex '.*__erasure_code_.*'
+libec_test_jerasure_neon_la_LDFLAGS = ${AM_LDFLAGS} -module -avoid-version -shared
+if LINUX
+libec_test_jerasure_neon_la_LDFLAGS += -export-symbols-regex '.*__erasure_code_.*'
+endif
 erasure_codelib_LTLIBRARIES += libec_test_jerasure_neon.la
 
 libec_test_jerasure_sse4_la_SOURCES = test/erasure-code/TestJerasurePluginSSE4.cc
@@ -97,7 +118,10 @@ test/erasure-code/TestJerasurePluginSSE4.cc: ./ceph_ver.h
 libec_test_jerasure_sse4_la_CFLAGS = ${AM_CFLAGS}
 libec_test_jerasure_sse4_la_CXXFLAGS= ${AM_CXXFLAGS}
 libec_test_jerasure_sse4_la_LIBADD = $(PTHREAD_LIBS) $(EXTRALIBS)
-libec_test_jerasure_sse4_la_LDFLAGS = ${AM_LDFLAGS} -export-symbols-regex '.*__erasure_code_.*'
+libec_test_jerasure_sse4_la_LDFLAGS = ${AM_LDFLAGS} -module -avoid-version -shared
+if LINUX
+libec_test_jerasure_sse4_la_LDFLAGS += -export-symbols-regex '.*__erasure_code_.*'
+endif
 erasure_codelib_LTLIBRARIES += libec_test_jerasure_sse4.la
 
 libec_test_jerasure_sse3_la_SOURCES = test/erasure-code/TestJerasurePluginSSE3.cc
@@ -105,7 +129,10 @@ test/erasure-code/TestJerasurePluginSSE3.cc: ./ceph_ver.h
 libec_test_jerasure_sse3_la_CFLAGS = ${AM_CFLAGS}
 libec_test_jerasure_sse3_la_CXXFLAGS= ${AM_CXXFLAGS}
 libec_test_jerasure_sse3_la_LIBADD = $(PTHREAD_LIBS) $(EXTRALIBS)
-libec_test_jerasure_sse3_la_LDFLAGS = ${AM_LDFLAGS} -export-symbols-regex '.*__erasure_code_.*'
+libec_test_jerasure_sse3_la_LDFLAGS = ${AM_LDFLAGS} -module -avoid-version -shared
+if LINUX
+libec_test_jerasure_sse3_la_LDFLAGS += -export-symbols-regex '.*__erasure_code_.*'
+endif
 erasure_codelib_LTLIBRARIES += libec_test_jerasure_sse3.la
 
 libec_test_jerasure_generic_la_SOURCES = test/erasure-code/TestJerasurePluginGeneric.cc
@@ -113,7 +140,10 @@ test/erasure-code/TestJerasurePluginGeneric.cc: ./ceph_ver.h
 libec_test_jerasure_generic_la_CFLAGS = ${AM_CFLAGS}
 libec_test_jerasure_generic_la_CXXFLAGS= ${AM_CXXFLAGS}
 libec_test_jerasure_generic_la_LIBADD = $(PTHREAD_LIBS) $(EXTRALIBS)
-libec_test_jerasure_generic_la_LDFLAGS = ${AM_LDFLAGS} -export-symbols-regex '.*__erasure_code_.*'
+libec_test_jerasure_generic_la_LDFLAGS = ${AM_LDFLAGS} -module -avoid-version -shared
+if LINUX
+libec_test_jerasure_generic_la_LDFLAGS += -export-symbols-regex '.*__erasure_code_.*'
+endif
 erasure_codelib_LTLIBRARIES += libec_test_jerasure_generic.la
 
 unittest_erasure_code_plugin_SOURCES = \
@@ -162,7 +192,7 @@ unittest_erasure_code_isa_SOURCES = \
 	erasure-code/ErasureCode.cc \
 	test/erasure-code/TestErasureCodeIsa.cc
 unittest_erasure_code_isa_CXXFLAGS = ${AM_CXXFLAGS} ${UNITTEST_CXXFLAGS}
-unittest_erasure_code_isa_LDADD = $(LIBOSD) $(LIBCOMMON) $(UNITTEST_LDADD) $(CEPH_GLOBAL) .libs/libec_isa.la $(LIBERASURE_CODE)
+unittest_erasure_code_isa_LDADD = $(LIBOSD) $(LIBCOMMON) $(UNITTEST_LDADD) $(CEPH_GLOBAL) libisa.la $(LIBERASURE_CODE)
 if LINUX
 unittest_erasure_code_isa_LDADD += -ldl
 endif
@@ -172,7 +202,7 @@ unittest_erasure_code_plugin_isa_SOURCES = \
 	erasure-code/ErasureCode.cc \
 	test/erasure-code/TestErasureCodePluginIsa.cc
 unittest_erasure_code_plugin_isa_CXXFLAGS = ${AM_CXXFLAGS} ${UNITTEST_CXXFLAGS}
-unittest_erasure_code_plugin_isa_LDADD = $(LIBOSD) $(LIBCOMMON) $(UNITTEST_LDADD) $(CEPH_GLOBAL) .libs/libec_isa.la $(LIBERASURE_CODE)
+unittest_erasure_code_plugin_isa_LDADD = $(LIBOSD) $(LIBCOMMON) $(UNITTEST_LDADD) $(CEPH_GLOBAL) $(LIBERASURE_CODE)
 if LINUX
 unittest_erasure_code_plugin_isa_LDADD += -ldl
 endif
@@ -288,7 +318,10 @@ test/erasure-code/TestShecPluginNEON.cc: ./ceph_ver.h
 libec_test_shec_neon_la_CFLAGS = ${AM_CFLAGS}
 libec_test_shec_neon_la_CXXFLAGS= ${AM_CXXFLAGS}
 libec_test_shec_neon_la_LIBADD = $(PTHREAD_LIBS) $(EXTRALIBS)
-libec_test_shec_neon_la_LDFLAGS = ${AM_LDFLAGS} -export-symbols-regex '.*__erasure_code_.*'
+libec_test_shec_neon_la_LDFLAGS = ${AM_LDFLAGS} -module -avoid-version -shared
+if LINUX
+libec_test_shec_neon_la_LDFLAGS += -export-symbols-regex '.*__erasure_code_.*'
+endif
 erasure_codelib_LTLIBRARIES += libec_test_shec_neon.la
 
 libec_test_shec_sse4_la_SOURCES = test/erasure-code/TestShecPluginSSE4.cc
@@ -296,7 +329,10 @@ test/erasure-code/TestShecPluginSSE4.cc: ./ceph_ver.h
 libec_test_shec_sse4_la_CFLAGS = ${AM_CFLAGS}
 libec_test_shec_sse4_la_CXXFLAGS= ${AM_CXXFLAGS}
 libec_test_shec_sse4_la_LIBADD = $(PTHREAD_LIBS) $(EXTRALIBS)
-libec_test_shec_sse4_la_LDFLAGS = ${AM_LDFLAGS} -export-symbols-regex '.*__erasure_code_.*'
+libec_test_shec_sse4_la_LDFLAGS = ${AM_LDFLAGS} -module -avoid-version -shared
+if LINUX
+libec_test_shec_sse4_la_LDFLAGS += -export-symbols-regex '.*__erasure_code_.*'
+endif
 erasure_codelib_LTLIBRARIES += libec_test_shec_sse4.la
 
 libec_test_shec_sse3_la_SOURCES = test/erasure-code/TestShecPluginSSE3.cc
@@ -304,7 +340,10 @@ test/erasure-code/TestShecPluginSSE3.cc: ./ceph_ver.h
 libec_test_shec_sse3_la_CFLAGS = ${AM_CFLAGS}
 libec_test_shec_sse3_la_CXXFLAGS= ${AM_CXXFLAGS}
 libec_test_shec_sse3_la_LIBADD = $(PTHREAD_LIBS) $(EXTRALIBS)
-libec_test_shec_sse3_la_LDFLAGS = ${AM_LDFLAGS} -export-symbols-regex '.*__erasure_code_.*'
+libec_test_shec_sse3_la_LDFLAGS = ${AM_LDFLAGS} -module -avoid-version -shared
+if LINUX
+libec_test_shec_sse3_la_LDFLAGS += -export-symbols-regex '.*__erasure_code_.*'
+endif
 erasure_codelib_LTLIBRARIES += libec_test_shec_sse3.la
 
 libec_test_shec_generic_la_SOURCES = test/erasure-code/TestShecPluginGeneric.cc
@@ -312,7 +351,10 @@ test/erasure-code/TestShecPluginGeneric.cc: ./ceph_ver.h
 libec_test_shec_generic_la_CFLAGS = ${AM_CFLAGS}
 libec_test_shec_generic_la_CXXFLAGS= ${AM_CXXFLAGS}
 libec_test_shec_generic_la_LIBADD = $(PTHREAD_LIBS) $(EXTRALIBS)
-libec_test_shec_generic_la_LDFLAGS = ${AM_LDFLAGS} -export-symbols-regex '.*__erasure_code_.*'
+libec_test_shec_generic_la_LDFLAGS = ${AM_LDFLAGS} -module -avoid-version -shared
+if LINUX
+libec_test_shec_generic_la_LDFLAGS += -export-symbols-regex '.*__erasure_code_.*'
+endif
 erasure_codelib_LTLIBRARIES += libec_test_shec_generic.la
 
 unittest_erasure_code_example_SOURCES = \
diff --git a/src/test/fedora-21/ceph.spec.in b/src/test/fedora-21/ceph.spec.in
index 487232c..498eac4 100644
--- a/src/test/fedora-21/ceph.spec.in
+++ b/src/test/fedora-21/ceph.spec.in
@@ -56,7 +56,7 @@ restorecon -R /var/log/ceph > /dev/null 2>&1;
 # the _with_systemd variable only implies that we'll install
 # /etc/tmpfiles.d/ceph.conf in order to set up the socket directory in
 # /var/run/ceph.
-%if 0%{?fedora} || 0%{?rhel} >= 7 || 0%{?suse_version} >= 1210
+%if 0%{?fedora} || 0%{?rhel} >= 7 || 0%{?suse_version}
 %global _with_systemd 1
 %{!?tmpfiles_create: %global tmpfiles_create systemd-tmpfiles --create}
 %endif
@@ -66,6 +66,10 @@ restorecon -R /var/log/ceph > /dev/null 2>&1;
 %global _with_lttng 1
 %endif
 
+# unify libexec for all targets
+%global _libexecdir %{_exec_prefix}/lib
+
+
 #################################################################################
 # common
 #################################################################################
@@ -86,27 +90,9 @@ Patch0:		init-ceph.in-fedora.patch
 #################################################################################
 # dependencies that apply across all distro families
 #################################################################################
-Requires:	librbd1 = %{epoch}:%{version}-%{release}
-Requires:	librados2 = %{epoch}:%{version}-%{release}
-Requires:	libcephfs1 = %{epoch}:%{version}-%{release}
-Requires:	ceph-common = %{epoch}:%{version}-%{release}
-%if 0%{with selinux}
-Requires:	ceph-selinux = %{epoch}:%{version}-%{release}
-%endif
-Requires:	python-rados = %{epoch}:%{version}-%{release}
-Requires:	python-rbd = %{epoch}:%{version}-%{release}
-Requires:	python-cephfs = %{epoch}:%{version}-%{release}
-Requires:	python
-Requires:	python-requests
-Requires:	grep
-Requires:	xfsprogs
-Requires:	logrotate
-Requires:	parted
-Requires:	util-linux
-Requires:	hdparm
-Requires:	cryptsetup
-Requires:	findutils
-Requires:	which
+Requires:       ceph-osd = %{epoch}:%{version}-%{release}
+Requires:       ceph-mds = %{epoch}:%{version}-%{release}
+Requires:       ceph-mon = %{epoch}:%{version}-%{release}
 Requires(post):	binutils
 %if 0%{with cephfs_java}
 BuildRequires:	java-devel
@@ -132,7 +118,6 @@ BuildRequires:	hdparm
 BuildRequires:	leveldb-devel > 1.2
 BuildRequires:	libaio-devel
 BuildRequires:	libcurl-devel
-BuildRequires:	libedit-devel
 BuildRequires:	libxml2-devel
 BuildRequires:	libblkid-devel >= 2.17
 BuildRequires:	libudev-devel
@@ -165,41 +150,35 @@ BuildRequires:	systemd
 %{?systemd_requires}
 %endif
 PreReq:		%fillup_prereq
-Requires:	python-Flask
 BuildRequires:	net-tools
 BuildRequires:	libbz2-devel
-%if 0%{?suse_version} > 1210
-Requires:	gptfdisk
 %if 0%{with tcmalloc}
 BuildRequires:	gperftools-devel
 %endif
-%else
-Requires:	scsirastools
-BuildRequires:	google-perftools-devel
-%endif
 BuildRequires:	mozilla-nss-devel
 BuildRequires:	keyutils-devel
 BuildRequires:	libatomic-ops-devel
-%else
+BuildRequires:  lsb-release
+%endif
+%if 0%{?fedora} || 0%{?rhel} 
 %if 0%{?_with_systemd}
 Requires:	systemd
 %endif
 BuildRequires:	nss-devel
 BuildRequires:	keyutils-libs-devel
 BuildRequires:	libatomic_ops-devel
-Requires:	gdisk
 Requires(post):	chkconfig
 Requires(preun):	chkconfig
 Requires(preun):	initscripts
 BuildRequires:	gperftools-devel
-Requires:	python-flask
+BuildRequires:  redhat-lsb-core
 %endif
 # boost
 %if 0%{?fedora} || 0%{?rhel} 
 BuildRequires:  boost-random
 %endif
 # python-argparse for distros with Python 2.6 or lower
-%if (0%{?rhel} && 0%{?rhel} <= 6) || (0%{?suse_version} && 0%{?suse_version} <= 1110)
+%if (0%{?rhel} && 0%{?rhel} <= 6)
 BuildRequires:	python-argparse
 %endif
 # lttng and babeltrace for rbd-replay-prep
@@ -238,6 +217,37 @@ on commodity hardware and delivers object, block and file system storage.
 #################################################################################
 # packages
 #################################################################################
+%package base
+Summary:       Ceph Base Package
+Group:         System Environment/Base
+Requires:      ceph-common = %{epoch}:%{version}-%{release}
+Requires:      librbd1 = %{epoch}:%{version}-%{release}
+Requires:      librados2 = %{epoch}:%{version}-%{release}
+Requires:      libcephfs1 = %{epoch}:%{version}-%{release}
+%if 0%{with selinux}
+Requires:      ceph-selinux = %{epoch}:%{version}-%{release}
+%endif
+Requires:      python
+Requires:      python-requests
+Requires:      python-setuptools
+Requires:      grep
+Requires:      xfsprogs
+Requires:      logrotate
+Requires:      parted
+Requires:      util-linux
+Requires:      hdparm
+Requires:      cryptsetup
+Requires:      findutils
+Requires:      which
+%if 0%{?suse_version}
+Requires:      lsb-release
+%endif
+%if 0%{?fedora} || 0%{?rhel}
+Requires:      redhat-lsb-core
+%endif
+%description base
+Base is the package that includes all the files shared amongst ceph servers
+
 %package -n ceph-common
 Summary:	Ceph Common
 Group:		System Environment/Base
@@ -254,11 +264,38 @@ Requires:	python-requests
 Requires(pre):	pwdutils
 %endif
 # python-argparse is only needed in distros with Python 2.6 or lower
-%if (0%{?rhel} && 0%{?rhel} <= 6) || (0%{?suse_version} && 0%{?suse_version} <= 1110)
+%if (0%{?rhel} && 0%{?rhel} <= 6)
 Requires:	python-argparse
 %endif
 %description -n ceph-common
 Common utilities to mount and interact with a ceph storage cluster.
+Comprised of files that are common to Ceph clients and servers.
+
+%package mds
+Summary:	Ceph Metadata Server Daemon
+Group:		System Environment/Base
+Requires:	ceph-base = %{epoch}:%{version}-%{release}
+%description mds
+ceph-mds is the metadata server daemon for the Ceph distributed file system.
+One or more instances of ceph-mds collectively manage the file system
+namespace, coordinating access to the shared OSD cluster.
+
+%package mon
+Summary:	Ceph Monitor Daemon
+Group:		System Environment/Base
+Requires:	ceph-base = %{epoch}:%{version}-%{release}
+# For ceph-rest-api
+%if 0%{?fedora} || 0%{?rhel}
+Requires:      python-flask
+%endif
+%if 0%{?suse_version}
+Requires:      python-Flask
+%endif
+%description mon
+ceph-mon is the cluster monitor daemon for the Ceph distributed file
+system. One or more instances of ceph-mon form a Paxos part-time
+parliament cluster that provides extremely reliable and durable storage
+of cluster membership, configuration, and state.
 
 %package fuse
 Summary:	Ceph fuse-based client
@@ -276,6 +313,16 @@ Requires:	librbd1 = %{epoch}:%{version}-%{release}
 %description -n rbd-fuse
 FUSE based client to map Ceph rbd images to files
 
+%package -n rbd-mirror
+Summary:	Ceph daemon for mirroring RBD images
+Group:		System Environment/Base
+Requires:	%{name}
+Requires:	ceph-common = %{epoch}:%{version}-%{release}
+Requires:	librados2 = %{epoch}:%{version}-%{release}
+%description -n rbd-mirror
+Daemon for mirroring RBD images between Ceph clusters, streaming
+changes asynchronously.
+
 %package -n rbd-nbd
 Summary:	Ceph RBD client base on NBD
 Group:		System Environment/Base
@@ -295,6 +342,12 @@ Requires:	ceph-selinux = %{epoch}:%{version}-%{release}
 Requires:	librados2 = %{epoch}:%{version}-%{release}
 %if 0%{?rhel} || 0%{?fedora}
 Requires:	mailcap
+# python-flask for powerdns
+Requires:	python-flask
+%endif
+%if 0%{?suse_version}
+# python-Flask for powerdns
+Requires:      python-Flask
 %endif
 %description radosgw
 This package is an S3 HTTP REST gateway for the RADOS object store. It
@@ -314,6 +367,22 @@ under Open Cluster Framework (OCF) compliant resource
 managers such as Pacemaker.
 %endif
 
+%package osd
+Summary:	Ceph Object Storage Daemon
+Group:		System Environment/Base
+Requires:	ceph-base = %{epoch}:%{version}-%{release}
+# for sgdisk, used by ceph-disk
+%if 0%{?fedora} || 0%{?rhel}
+Requires:	gdisk
+%endif
+%if 0%{?suse_version}
+Requires:	gptfdisk
+%endif
+%description osd
+ceph-osd is the object storage daemon for the Ceph distributed file
+system.  It is responsible for storing objects on a local file system
+and providing access to them over the network.
+
 %package -n librados2
 Summary:	RADOS distributed object store client library
 Group:		System Environment/Libraries
@@ -588,6 +657,7 @@ export RPM_OPT_FLAGS=`echo $RPM_OPT_FLAGS | sed -e 's/i386/i486/'`
 
 %{configure}	CPPFLAGS="$java_inc" \
 		--prefix=/usr \
+                --libexecdir=%{_libexecdir} \
 		--localstatedir=/var \
 		--sysconfdir=/etc \
 %if 0%{?_with_systemd}
@@ -606,16 +676,6 @@ export RPM_OPT_FLAGS=`echo $RPM_OPT_FLAGS | sed -e 's/i386/i486/'`
 		--with-selinux \
 %endif
 		--with-librocksdb-static=check \
-%if 0%{?rhel} || 0%{?fedora}
-		--with-systemd-libexec-dir=/usr/libexec/ceph \
-		--with-rgw-user=root \
-		--with-rgw-group=root \
-%endif
-%if 0%{?suse_version}
-		--with-systemd-libexec-dir=/usr/lib/ceph/ \
-		--with-rgw-user=wwwrun \
-		--with-rgw-group=www \
-%endif
 		--with-radosgw \
 		$CEPH_EXTRA_CONFIGURE_ARGS \
 		%{?_with_ocf} \
@@ -642,6 +702,20 @@ make %{?_smp_mflags} check-local
 
 %install
 make DESTDIR=$RPM_BUILD_ROOT install
+rm -f $RPM_BUILD_ROOT%{_libdir}/ceph/erasure-code/libec_example.so
+rm -f $RPM_BUILD_ROOT%{_libdir}/ceph/erasure-code/libec_fail_to_initialize.so
+rm -f $RPM_BUILD_ROOT%{_libdir}/ceph/erasure-code/libec_fail_to_register.so
+rm -f $RPM_BUILD_ROOT%{_libdir}/ceph/erasure-code/libec_hangs.so
+rm -f $RPM_BUILD_ROOT%{_libdir}/ceph/erasure-code/libec_missing_entry_point.so
+rm -f $RPM_BUILD_ROOT%{_libdir}/ceph/erasure-code/libec_missing_version.so
+rm -f $RPM_BUILD_ROOT%{_libdir}/ceph/erasure-code/libec_test_jerasure_generic.so
+rm -f $RPM_BUILD_ROOT%{_libdir}/ceph/erasure-code/libec_test_jerasure_neon.so
+rm -f $RPM_BUILD_ROOT%{_libdir}/ceph/erasure-code/libec_test_jerasure_sse3.so
+rm -f $RPM_BUILD_ROOT%{_libdir}/ceph/erasure-code/libec_test_jerasure_sse4.so
+rm -f $RPM_BUILD_ROOT%{_libdir}/ceph/erasure-code/libec_test_shec_generic.so
+rm -f $RPM_BUILD_ROOT%{_libdir}/ceph/erasure-code/libec_test_shec_neon.so
+rm -f $RPM_BUILD_ROOT%{_libdir}/ceph/erasure-code/libec_test_shec_sse3.so
+rm -f $RPM_BUILD_ROOT%{_libdir}/ceph/erasure-code/libec_test_shec_sse4.so
 find $RPM_BUILD_ROOT -type f -name "*.la" -exec rm -f {} ';'
 find $RPM_BUILD_ROOT -type f -name "*.a" -exec rm -f {} ';'
 install -D src/etc-rbdmap $RPM_BUILD_ROOT%{_sysconfdir}/ceph/rbdmap
@@ -718,120 +792,29 @@ mkdir -p $RPM_BUILD_ROOT%{_localstatedir}/lib/ceph/bootstrap-rgw
 %clean
 rm -rf $RPM_BUILD_ROOT
 
-%pre
-%if 0%{?_with_systemd}
-  %if 0%{?suse_version}
-    # service_add_pre and friends don't work with parameterized systemd service
-    # instances, only with single services or targets, so we always pass
-    # ceph.target to these macros
-    %service_add_pre ceph.target
-  %endif
-%endif
-
-
-%post
-/sbin/ldconfig
-%if 0%{?_with_systemd}
-  %if 0%{?suse_version}
-    %fillup_only
-    %service_add_post ceph.target
-  %endif
-%else
-  /sbin/chkconfig --add ceph
-%endif
-
-%preun
-%if 0%{?_with_systemd}
-  %if 0%{?suse_version}
-    %service_del_preun ceph.target
-  %endif
-  # Disable and stop on removal.
-  if [ $1 = 0 ] ; then
-    SERVICE_LIST=$(systemctl | grep -E '^ceph-mon@|^ceph-create-keys@|^ceph-osd@|^ceph-mds@|^ceph-disk-'  | cut -d' ' -f1)
-    if [ -n "$SERVICE_LIST" ]; then
-      for SERVICE in $SERVICE_LIST; do
-        /usr/bin/systemctl --no-reload disable $SERVICE > /dev/null 2>&1 || :
-        /usr/bin/systemctl stop $SERVICE > /dev/null 2>&1 || :
-      done
-    fi
-  fi
-%else
-  %if 0%{?rhel} || 0%{?fedora}
-    if [ $1 = 0 ] ; then
-      /sbin/service ceph stop >/dev/null 2>&1
-      /sbin/chkconfig --del ceph
-    fi
-  %endif
-%endif
-
-%postun
-/sbin/ldconfig
-%if 0%{?_with_systemd}
-  if [ $1 = 1 ] ; then
-    # Restart on upgrade, but only if "CEPH_AUTO_RESTART_ON_UPGRADE" is set to
-    # "yes". In any case: if units are not running, do not touch them.
-    SYSCONF_CEPH=/etc/sysconfig/ceph
-    if [ -f $SYSCONF_CEPH -a -r $SYSCONF_CEPH ] ; then
-      source $SYSCONF_CEPH
-    fi
-    if [ "X$CEPH_AUTO_RESTART_ON_UPGRADE" = "Xyes" ] ; then
-      SERVICE_LIST=$(systemctl | grep -E '^ceph-mon@|^ceph-create-keys@|^ceph-osd@|^ceph-mds@|^ceph-disk-'  | cut -d' ' -f1)
-      if [ -n "$SERVICE_LIST" ]; then
-        for SERVICE in $SERVICE_LIST; do
-          /usr/bin/systemctl try-restart $SERVICE > /dev/null 2>&1 || :
-        done
-      fi
-    fi
-  fi
-%endif
-
 #################################################################################
-# files
+# files and systemd scriptlets
 #################################################################################
 %files
+
+%files base
 %defattr(-,root,root,-)
 %docdir %{_docdir}
 %dir %{_docdir}/ceph
 %{_docdir}/ceph/sample.ceph.conf
 %{_docdir}/ceph/sample.fetch_config
-%{_bindir}/cephfs
-%{_bindir}/ceph-clsinfo
-%{_bindir}/ceph-rest-api
-%{python_sitelib}/ceph_rest_api.py*
 %{_bindir}/crushtool
 %{_bindir}/monmaptool
 %{_bindir}/osdmaptool
 %{_bindir}/ceph-run
-%{_bindir}/ceph-mon
-%{_bindir}/ceph-mds
-%{_bindir}/ceph-objectstore-tool
-%{_bindir}/ceph-bluefs-tool
-%{_bindir}/ceph-osd
 %{_bindir}/ceph-detect-init
-%{_bindir}/librados-config
 %{_bindir}/ceph-client-debug
-%{_bindir}/cephfs-journal-tool
-%{_bindir}/cephfs-table-tool
-%{_bindir}/cephfs-data-scan
-%{_bindir}/ceph-debugpack
-%{_bindir}/ceph-coverage
+%{_bindir}/cephfs
 %if 0%{?_with_systemd}
-%{_unitdir}/ceph-mds at .service
-%{_unitdir}/ceph-mon at .service
 %{_unitdir}/ceph-create-keys at .service
-%{_unitdir}/ceph-osd at .service
-%{_unitdir}/ceph-radosgw at .service
-%{_unitdir}/ceph-disk at .service
-%{_unitdir}/ceph.target
-%{_unitdir}/ceph-osd.target
-%{_unitdir}/ceph-mon.target
-%{_unitdir}/ceph-mds.target
-%{_unitdir}/ceph-radosgw.target
 %else
 %{_initrddir}/ceph
 %endif
-%{_sbindir}/ceph-disk
-%{_sbindir}/ceph-disk-udev
 %{_sbindir}/ceph-create-keys
 %{_sbindir}/rcceph
 %if 0%{?rhel} >= 7 || 0%{?fedora} || 0%{?suse_version}
@@ -839,25 +822,11 @@ rm -rf $RPM_BUILD_ROOT
 %else
 /sbin/mount.ceph
 %endif
-%dir %{_libdir}/ceph
-%{_libdir}/ceph/ceph_common.sh
-%{_libexecdir}/ceph/ceph-osd-prestart.sh
+%dir %{_libexecdir}/ceph
+%{_libexecdir}/ceph/ceph_common.sh
 %dir %{_libdir}/rados-classes
-%{_libdir}/rados-classes/libcls_cephfs.so*
-%{_libdir}/rados-classes/libcls_rbd.so*
-%{_libdir}/rados-classes/libcls_hello.so*
-%{_libdir}/rados-classes/libcls_numops.so*
-%{_libdir}/rados-classes/libcls_rgw.so*
-%{_libdir}/rados-classes/libcls_lock.so*
-%{_libdir}/rados-classes/libcls_kvs.so*
-%{_libdir}/rados-classes/libcls_refcount.so*
-%{_libdir}/rados-classes/libcls_log.so*
-%{_libdir}/rados-classes/libcls_replica_log.so*
-%{_libdir}/rados-classes/libcls_statelog.so*
-%{_libdir}/rados-classes/libcls_timeindex.so*
-%{_libdir}/rados-classes/libcls_user.so*
-%{_libdir}/rados-classes/libcls_version.so*
-%{_libdir}/rados-classes/libcls_journal.so*
+%{_libdir}/rados-classes/*
+%dir %{_libdir}/ceph
 %dir %{_libdir}/ceph/erasure-code
 %{_libdir}/ceph/erasure-code/libec_*.so*
 %dir %{_libdir}/ceph/compressor
@@ -866,8 +835,6 @@ rm -rf $RPM_BUILD_ROOT
 %{_libdir}/libos_tp.so*
 %{_libdir}/libosd_tp.so*
 %endif
-%{_udevrulesdir}/60-ceph-partuuid-workaround.rules
-%{_udevrulesdir}/95-ceph-osd.rules
 %config %{_sysconfdir}/bash_completion.d/ceph
 %config(noreplace) %{_sysconfdir}/logrotate.d/ceph
 %if 0%{?fedora} || 0%{?rhel}
@@ -878,29 +845,20 @@ rm -rf $RPM_BUILD_ROOT
 %config %{_sysconfdir}/sysconfig/SuSEfirewall2.d/services/ceph-mon
 %config %{_sysconfdir}/sysconfig/SuSEfirewall2.d/services/ceph-osd-mds
 %endif
+%{_unitdir}/ceph.target
 %{python_sitelib}/ceph_detect_init*
+%{python_sitelib}/ceph_disk*
 %{_mandir}/man8/ceph-deploy.8*
 %{_mandir}/man8/ceph-detect-init.8*
-%{_mandir}/man8/ceph-disk.8*
 %{_mandir}/man8/ceph-create-keys.8*
-%{_mandir}/man8/ceph-mon.8*
-%{_mandir}/man8/ceph-mds.8*
-%{_mandir}/man8/ceph-osd.8*
 %{_mandir}/man8/ceph-run.8*
-%{_mandir}/man8/ceph-rest-api.8*
 %{_mandir}/man8/crushtool.8*
 %{_mandir}/man8/osdmaptool.8*
 %{_mandir}/man8/monmaptool.8*
 %{_mandir}/man8/cephfs.8*
 %{_mandir}/man8/mount.ceph.8*
-%{_mandir}/man8/ceph-debugpack.8*
-%{_mandir}/man8/ceph-clsinfo.8*
-%{_mandir}/man8/librados-config.8*
 #set up placeholder directories
 %attr(750,ceph,ceph) %dir %{_localstatedir}/lib/ceph/tmp
-%attr(750,ceph,ceph) %dir %{_localstatedir}/lib/ceph/mon
-%attr(750,ceph,ceph) %dir %{_localstatedir}/lib/ceph/osd
-%attr(750,ceph,ceph) %dir %{_localstatedir}/lib/ceph/mds
 %attr(750,ceph,ceph) %dir %{_localstatedir}/lib/ceph/bootstrap-osd
 %attr(750,ceph,ceph) %dir %{_localstatedir}/lib/ceph/bootstrap-mds
 %attr(750,ceph,ceph) %dir %{_localstatedir}/lib/ceph/bootstrap-rgw
@@ -908,8 +866,74 @@ rm -rf $RPM_BUILD_ROOT
 %attr(770,ceph,ceph) %dir %{_localstatedir}/run/ceph
 %endif
 
+%pre base
+%if 0%{?_with_systemd}
+  %if 0%{?suse_version}
+    # service_add_pre and friends don't work with parameterized systemd service
+    # instances, only with single services or targets, so we always pass
+    # ceph.target to these macros
+    %service_add_pre ceph.target
+  %endif
+%endif
+
+%post base
+/sbin/ldconfig
+%if 0%{?_with_systemd}
+  %if 0%{?suse_version}
+    %fillup_only
+    %service_add_post ceph.target
+  %endif
+%else
+  /sbin/chkconfig --add ceph
+%endif
+
+%preun base
+%if 0%{?_with_systemd}
+  %if 0%{?suse_version}
+    %service_del_preun ceph.target
+  %endif
+  # Disable and stop on removal.
+  if [ $1 = 0 ] ; then
+    SERVICE_LIST=$(systemctl | grep -E '^ceph-mon@|^ceph-create-keys@|^ceph-osd@|^ceph-mds@|^ceph-disk-'  | cut -d' ' -f1)
+    if [ -n "$SERVICE_LIST" ]; then
+      for SERVICE in $SERVICE_LIST; do
+        /usr/bin/systemctl --no-reload disable $SERVICE > /dev/null 2>&1 || :
+        /usr/bin/systemctl stop $SERVICE > /dev/null 2>&1 || :
+      done
+    fi
+  fi
+%else
+  %if 0%{?rhel} || 0%{?fedora}
+    if [ $1 = 0 ] ; then
+      /sbin/service ceph stop >/dev/null 2>&1
+      /sbin/chkconfig --del ceph
+    fi
+  %endif
+%endif
+
+%postun base
+/sbin/ldconfig
+%if 0%{?_with_systemd}
+  if [ $1 = 1 ] ; then
+    # Restart on upgrade, but only if "CEPH_AUTO_RESTART_ON_UPGRADE" is set to
+    # "yes". In any case: if units are not running, do not touch them.
+    SYSCONF_CEPH=/etc/sysconfig/ceph
+    if [ -f $SYSCONF_CEPH -a -r $SYSCONF_CEPH ] ; then
+      source $SYSCONF_CEPH
+    fi
+    if [ "X$CEPH_AUTO_RESTART_ON_UPGRADE" = "Xyes" ] ; then
+      SERVICE_LIST=$(systemctl | grep -E '^ceph-mon@|^ceph-create-keys@|^ceph-osd@|^ceph-mds@|^ceph-disk-'  | cut -d' ' -f1)
+      if [ -n "$SERVICE_LIST" ]; then
+        for SERVICE in $SERVICE_LIST; do
+          /usr/bin/systemctl try-restart $SERVICE > /dev/null 2>&1 || :
+        done
+      fi
+    fi
+  fi
+%endif
+
 #################################################################################
-%files -n ceph-common
+%files common
 %defattr(-,root,root,-)
 %{_bindir}/ceph
 %{_bindir}/ceph-authtool
@@ -943,12 +967,11 @@ rm -rf $RPM_BUILD_ROOT
 %{_mandir}/man8/rbd-replay.8*
 %{_mandir}/man8/rbd-replay-many.8*
 %{_mandir}/man8/rbd-replay-prep.8*
+%dir %{_datadir}/ceph/
 %{_datadir}/ceph/known_hosts_drop.ceph.com
 %{_datadir}/ceph/id_dsa_drop.ceph.com
 %{_datadir}/ceph/id_dsa_drop.ceph.com.pub
 %dir %{_sysconfdir}/ceph/
-%dir %{_datarootdir}/ceph/
-%dir %{_libexecdir}/ceph/
 %config %{_sysconfdir}/bash_completion.d/rados
 %config %{_sysconfdir}/bash_completion.d/rbd
 %config(noreplace) %{_sysconfdir}/ceph/rbdmap
@@ -963,7 +986,7 @@ rm -rf $RPM_BUILD_ROOT
 %attr(3770,ceph,ceph) %dir %{_localstatedir}/log/ceph/
 %attr(750,ceph,ceph) %dir %{_localstatedir}/lib/ceph/
 
-%pre -n ceph-common
+%pre common
 CEPH_GROUP_ID=""
 CEPH_USER_ID=""
 %if 0%{?rhel} || 0%{?fedora}
@@ -980,12 +1003,12 @@ getent passwd ceph >/dev/null || useradd -r -g ceph -d %{_localstatedir}/lib/cep
 %endif
 exit 0
 
-%post -n ceph-common
+%post common
 %if 0%{?_with_systemd}
 %tmpfiles_create %{_tmpfilesdir}/ceph-common.conf
 %endif
 
-%postun -n ceph-common
+%postun common
 # Package removal cleanup
 if [ "$1" -eq "0" ] ; then
     rm -rf /var/log/ceph
@@ -993,6 +1016,36 @@ if [ "$1" -eq "0" ] ; then
 fi
 
 #################################################################################
+%files mds
+%{_bindir}/ceph-mds
+%{_bindir}/cephfs-journal-tool
+%{_bindir}/cephfs-table-tool
+%{_bindir}/cephfs-data-scan
+%{_mandir}/man8/ceph-mds.8*
+%if 0%{?_with_systemd}
+%{_unitdir}/ceph-mds at .service
+%{_unitdir}/ceph-mds.target
+%else
+%{_initrddir}/ceph
+%endif
+%attr(750,ceph,ceph) %dir %{_localstatedir}/lib/ceph/mds
+
+#################################################################################
+%files mon
+%{_bindir}/ceph-mon
+%{_bindir}/ceph-rest-api
+%{_mandir}/man8/ceph-mon.8*
+%{_mandir}/man8/ceph-rest-api.8*
+%{python_sitelib}/ceph_rest_api.py*
+%if 0%{?_with_systemd}
+%{_unitdir}/ceph-mon at .service
+%{_unitdir}/ceph-mon.target
+%else
+%{_initrddir}/ceph
+%endif
+%attr(750,ceph,ceph) %dir %{_localstatedir}/lib/ceph/mon
+
+#################################################################################
 %files fuse
 %defattr(-,root,root,-)
 %{_bindir}/ceph-fuse
@@ -1010,6 +1063,12 @@ fi
 %{_mandir}/man8/rbd-fuse.8*
 
 #################################################################################
+%files -n rbd-mirror
+%defattr(-,root,root,-)
+%{_bindir}/rbd-mirror
+%{_mandir}/man8/rbd-mirror.8*
+
+#################################################################################
 %files -n rbd-nbd
 %defattr(-,root,root,-)
 %{_bindir}/rbd-nbd
@@ -1026,6 +1085,8 @@ fi
 %config %{_sysconfdir}/bash_completion.d/radosgw-admin
 %dir %{_localstatedir}/lib/ceph/radosgw
 %if 0%{?_with_systemd}
+%{_unitdir}/ceph-radosgw at .service
+%{_unitdir}/ceph-radosgw.target
 %else
 %{_initrddir}/ceph-radosgw
 %{_sbindir}/rcceph-radosgw
@@ -1076,6 +1137,29 @@ fi
 %endif
 
 #################################################################################
+%files osd
+%{_bindir}/ceph-clsinfo
+%{_bindir}/ceph-bluefs-tool
+%{_bindir}/ceph-objectstore-tool
+%{_bindir}/ceph-osd
+%{_sbindir}/ceph-disk
+%{_sbindir}/ceph-disk-udev
+%{_libexecdir}/ceph/ceph-osd-prestart.sh
+%{_udevrulesdir}/60-ceph-partuuid-workaround.rules
+%{_udevrulesdir}/95-ceph-osd.rules
+%{_mandir}/man8/ceph-clsinfo.8*
+%{_mandir}/man8/ceph-disk.8*
+%{_mandir}/man8/ceph-osd.8*
+%if 0%{?_with_systemd}
+%{_unitdir}/ceph-osd at .service
+%{_unitdir}/ceph-osd.target
+%{_unitdir}/ceph-disk at .service
+%else
+%{_initrddir}/ceph
+%endif
+%attr(750,ceph,ceph) %dir %{_localstatedir}/lib/ceph/osd
+
+#################################################################################
 %if %{with ocf}
 %files resource-agents
 %defattr(0755,root,root,-)
@@ -1116,6 +1200,8 @@ fi
 %if 0%{?_with_lttng}
 %{_libdir}/librados_tp.so
 %endif
+%{_bindir}/librados-config
+%{_mandir}/man8/librados-config.8*
 
 #################################################################################
 %files -n python-rados
@@ -1226,9 +1312,12 @@ ln -sf %{_libdir}/librbd.so.1 /usr/lib64/qemu/librbd.so.1
 %{_bindir}/ceph_test_*
 %{_bindir}/ceph_tpbench
 %{_bindir}/ceph_xattr_bench
+%{_bindir}/ceph-coverage
 %{_bindir}/ceph-monstore-tool
 %{_bindir}/ceph-osdomap-tool
 %{_bindir}/ceph-kvstore-tool
+%{_bindir}/ceph-debugpack
+%{_mandir}/man8/ceph-debugpack.8*
 %dir %{_libdir}/ceph
 %{_libdir}/ceph/ceph-monstore-update-crush.sh
 
@@ -1377,4 +1466,5 @@ exit 0
 # We need an empty %%files list for python-ceph-compat, to tell rpmbuild to
 # actually build this meta package.
 
+
 %changelog
diff --git a/src/test/formatter.cc b/src/test/formatter.cc
index aab7e59..3913cc6 100644
--- a/src/test/formatter.cc
+++ b/src/test/formatter.cc
@@ -14,6 +14,7 @@
 
 #include "test/unit.h"
 #include "common/Formatter.h"
+#include "common/HTMLFormatter.h"
 
 #include <sstream>
 #include <string>
@@ -130,7 +131,7 @@ TEST(XmlFormatter, DTD) {
   ostringstream oss;
   XMLFormatter fmt(false);
 
-  fmt.write_raw_data(XMLFormatter::XML_1_DTD);
+  fmt.output_header();
   fmt.open_array_section("foo");
   fmt.dump_stream("blah") << "hithere";
   fmt.dump_float("pi", 3.14);
@@ -144,7 +145,7 @@ TEST(XmlFormatter, Clear) {
   ostringstream oss;
   XMLFormatter fmt(false);
 
-  fmt.write_raw_data(XMLFormatter::XML_1_DTD);
+  fmt.output_header();
   fmt.open_array_section("foo");
   fmt.dump_stream("blah") << "hithere";
   fmt.dump_float("pi", 3.14);
@@ -167,7 +168,7 @@ TEST(XmlFormatter, NamespaceTest) {
   ostringstream oss;
   XMLFormatter fmt(false);
 
-  fmt.write_raw_data(XMLFormatter::XML_1_DTD);
+  fmt.output_header();
   fmt.open_array_section_in_ns("foo",
 			   "http://s3.amazonaws.com/doc/2006-03-01/");
   fmt.dump_stream("blah") << "hithere";
@@ -197,3 +198,145 @@ TEST(XmlFormatter, DumpFormatNameSpaceTest) {
   fmt.flush(oss2);
   ASSERT_EQ(oss2.str(),"<foo>bar</foo>");
 }
+
+TEST(HtmlFormatter, Simple1) {
+  ostringstream oss;
+  HTMLFormatter fmt(false);
+  fmt.open_object_section("foo");
+  fmt.dump_int("a", 1);
+  fmt.dump_int("b", 2);
+  fmt.dump_int("c", 3);
+  fmt.close_section();
+  fmt.flush(oss);
+  ASSERT_EQ(oss.str(), "<foo><li>a: 1</li><li>b: 2</li><li>c: 3</li></foo>");
+}
+
+TEST(HtmlFormatter, Simple2) {
+  ostringstream oss;
+  HTMLFormatter fmt(false);
+  fmt.open_object_section("foo");
+  fmt.open_object_section("bar");
+  fmt.dump_int("int", 0xf00000000000ll);
+  fmt.dump_unsigned("unsigned", 0x8000000000000001llu);
+  fmt.dump_float("float", 1.234);
+  fmt.close_section();
+  fmt.dump_string("string", "str");
+  fmt.close_section();
+  fmt.flush(oss);
+  ASSERT_EQ(oss.str(), "<foo><bar>\
+<li>int: 263882790666240</li>\
+<li>unsigned: 9223372036854775809</li>\
+<li>float: 1.234</li>\
+</bar><li>string: str</li>\
+</foo>");
+}
+
+TEST(HtmlFormatter, Empty) {
+  ostringstream oss;
+  HTMLFormatter fmt(false);
+  fmt.flush(oss);
+  ASSERT_EQ(oss.str(), "");
+}
+
+TEST(HtmlFormatter, DumpStream1) {
+  ostringstream oss;
+  HTMLFormatter fmt(false);
+  fmt.dump_stream("blah") << "hithere";
+  fmt.flush(oss);
+  ASSERT_EQ(oss.str(), "<li>blah: hithere</li>");
+}
+
+TEST(HtmlFormatter, DumpStream2) {
+  ostringstream oss;
+  HTMLFormatter fmt(false);
+
+  fmt.open_array_section("foo");
+  fmt.dump_stream("blah") << "hithere";
+  fmt.close_section();
+  fmt.flush(oss);
+  ASSERT_EQ(oss.str(), "<foo><li>blah: hithere</li></foo>");
+}
+
+TEST(HtmlFormatter, DumpStream3) {
+  ostringstream oss;
+  HTMLFormatter fmt(false);
+
+  fmt.open_array_section("foo");
+  fmt.dump_stream("blah") << "hithere";
+  fmt.dump_float("pi", 3.14);
+  fmt.close_section();
+  fmt.flush(oss);
+  ASSERT_EQ(oss.str(), "<foo><li>blah: hithere</li><li>pi: 3.14</li></foo>");
+}
+
+TEST(HtmlFormatter, DTD) {
+  ostringstream oss;
+  HTMLFormatter fmt(false);
+
+  fmt.write_raw_data(HTMLFormatter::XML_1_DTD);
+  fmt.open_array_section("foo");
+  fmt.dump_stream("blah") << "hithere";
+  fmt.dump_float("pi", 3.14);
+  fmt.close_section();
+  fmt.flush(oss);
+  ASSERT_EQ(oss.str(), "<?xml version=\"1.0\" encoding=\"UTF-8\"?>"
+    "<foo><li>blah: hithere</li><li>pi: 3.14</li></foo>");
+}
+
+TEST(HtmlFormatter, Clear) {
+  ostringstream oss;
+  HTMLFormatter fmt(false);
+
+  fmt.write_raw_data(HTMLFormatter::XML_1_DTD);
+  fmt.open_array_section("foo");
+  fmt.dump_stream("blah") << "hithere";
+  fmt.dump_float("pi", 3.14);
+  fmt.close_section();
+  fmt.flush(oss);
+  ASSERT_EQ(oss.str(), "<?xml version=\"1.0\" encoding=\"UTF-8\"?>"
+    "<foo><li>blah: hithere</li><li>pi: 3.14</li></foo>");
+
+  ostringstream oss2;
+  fmt.flush(oss2);
+  ASSERT_EQ(oss2.str(), "");
+
+  ostringstream oss3;
+  fmt.reset();
+  fmt.flush(oss3);
+  ASSERT_EQ(oss3.str(), "");
+}
+
+TEST(HtmlFormatter, NamespaceTest) {
+  ostringstream oss;
+  HTMLFormatter fmt(false);
+
+  fmt.write_raw_data(HTMLFormatter::XML_1_DTD);
+  fmt.open_array_section_in_ns("foo",
+			   "http://s3.amazonaws.com/doc/2006-03-01/");
+  fmt.dump_stream("blah") << "hithere";
+  fmt.dump_float("pi", 3.14);
+  fmt.close_section();
+  fmt.flush(oss);
+  ASSERT_EQ(oss.str(), "<?xml version=\"1.0\" encoding=\"UTF-8\"?>"
+    "<foo xmlns=\"http://s3.amazonaws.com/doc/2006-03-01/\">"
+    "<li>blah: hithere</li><li>pi: 3.14</li></foo>");
+}
+
+TEST(HtmlFormatter, DumpFormatNameSpaceTest) {
+  ostringstream oss1;
+  HTMLFormatter fmt(false);
+
+  fmt.dump_format_ns("foo",
+		     "http://s3.amazonaws.com/doc/2006-03-01/",
+		     "%s","bar");
+  fmt.flush(oss1);
+  ASSERT_EQ(oss1.str(),
+	    "<li xmlns=\"http://s3.amazonaws.com/doc/2006-03-01/\">foo: bar</li>");
+
+  // Testing with a null ns..should be same as dump format
+  ostringstream oss2;
+  fmt.reset();
+  fmt.dump_format_ns("foo",NULL,"%s","bar");
+  fmt.flush(oss2);
+  ASSERT_EQ(oss2.str(),"<li>foo: bar</li>");
+}
diff --git a/src/test/journal/RadosTestFixture.cc b/src/test/journal/RadosTestFixture.cc
index d5f1b32..c965c52 100644
--- a/src/test/journal/RadosTestFixture.cc
+++ b/src/test/journal/RadosTestFixture.cc
@@ -52,7 +52,9 @@ int RadosTestFixture::append(const std::string &oid, const bufferlist &bl) {
 int RadosTestFixture::client_register(const std::string &oid,
                                       const std::string &id,
                                       const std::string &description) {
-  return cls::journal::client::client_register(m_ioctx, oid, id, description);
+  bufferlist data;
+  data.append(description);
+  return cls::journal::client::client_register(m_ioctx, oid, id, data);
 }
 
 int RadosTestFixture::client_commit(const std::string &oid,
diff --git a/src/test/journal/test_Entry.cc b/src/test/journal/test_Entry.cc
index e042978..44fbb0f 100644
--- a/src/test/journal/test_Entry.cc
+++ b/src/test/journal/test_Entry.cc
@@ -9,8 +9,8 @@ class TestEntry : public ::testing::Test {
 
 TEST_F(TestEntry, DefaultConstructor) {
   journal::Entry entry;
-  ASSERT_EQ(0U, entry.get_tid());
-  ASSERT_EQ("", entry.get_tag());
+  ASSERT_EQ(0U, entry.get_entry_tid());
+  ASSERT_EQ(0U, entry.get_tag_tid());
 
   bufferlist data(entry.get_data());
   bufferlist expected_data;
@@ -20,7 +20,7 @@ TEST_F(TestEntry, DefaultConstructor) {
 TEST_F(TestEntry, Constructor) {
   bufferlist data;
   data.append("data");
-  journal::Entry entry("tag", 123, data);
+  journal::Entry entry(234, 123, data);
 
   data.clear();
   data = entry.get_data();
@@ -28,15 +28,15 @@ TEST_F(TestEntry, Constructor) {
   bufferlist expected_data;
   expected_data.append("data");
 
-  ASSERT_EQ(123U, entry.get_tid());
-  ASSERT_EQ("tag", entry.get_tag());
+  ASSERT_EQ(123U, entry.get_entry_tid());
+  ASSERT_EQ(234U, entry.get_tag_tid());
   ASSERT_TRUE(data.contents_equal(expected_data));
 }
 
 TEST_F(TestEntry, IsReadable) {
   bufferlist data;
   data.append("data");
-  journal::Entry entry("tag", 123, data);
+  journal::Entry entry(234, 123, data);
 
   bufferlist full_bl;
   ::encode(entry, full_bl);
@@ -58,7 +58,7 @@ TEST_F(TestEntry, IsReadable) {
 TEST_F(TestEntry, IsReadableBadPreamble) {
   bufferlist data;
   data.append("data");
-  journal::Entry entry("tag", 123, data);
+  journal::Entry entry(234, 123, data);
 
   uint64_t stray_bytes = 0x1122334455667788;
   bufferlist full_bl;
@@ -78,7 +78,7 @@ TEST_F(TestEntry, IsReadableBadPreamble) {
 TEST_F(TestEntry, IsReadableBadCRC) {
   bufferlist data;
   data.append("data");
-  journal::Entry entry("tag", 123, data);
+  journal::Entry entry(234, 123, data);
 
   bufferlist full_bl;
   ::encode(entry, full_bl);
diff --git a/src/test/journal/test_FutureImpl.cc b/src/test/journal/test_FutureImpl.cc
index 5d5bb04..3a45a8d 100644
--- a/src/test/journal/test_FutureImpl.cc
+++ b/src/test/journal/test_FutureImpl.cc
@@ -40,12 +40,12 @@ public:
     m_finisher->start();
   }
 
-  journal::FutureImplPtr create_future(const std::string &tag, uint64_t tid,
+  journal::FutureImplPtr create_future(uint64_t tag_tid, uint64_t entry_tid,
                                        uint64_t commit_tid,
                                        const journal::FutureImplPtr &prev =
                                          journal::FutureImplPtr()) {
     journal::FutureImplPtr future(new journal::FutureImpl(*m_finisher,
-                                                          tag, tid,
+                                                          tag_tid, entry_tid,
                                                           commit_tid));
     future->init(prev);
     return future;
@@ -60,20 +60,20 @@ public:
 };
 
 TEST_F(TestFutureImpl, Getters) {
-  journal::FutureImplPtr future = create_future("tag", 123, 456);
-  ASSERT_EQ("tag", future->get_tag());
-  ASSERT_EQ(123U, future->get_tid());
+  journal::FutureImplPtr future = create_future(234, 123, 456);
+  ASSERT_EQ(234U, future->get_tag_tid());
+  ASSERT_EQ(123U, future->get_entry_tid());
   ASSERT_EQ(456U, future->get_commit_tid());
 }
 
 TEST_F(TestFutureImpl, Attach) {
-  journal::FutureImplPtr future = create_future("tag", 123, 456);
+  journal::FutureImplPtr future = create_future(234, 123, 456);
   ASSERT_FALSE(future->attach(&m_flush_handler));
   ASSERT_EQ(1U, m_flush_handler.refs);
 }
 
 TEST_F(TestFutureImpl, AttachWithPendingFlush) {
-  journal::FutureImplPtr future = create_future("tag", 123, 456);
+  journal::FutureImplPtr future = create_future(234, 123, 456);
   future->flush(NULL);
 
   ASSERT_TRUE(future->attach(&m_flush_handler));
@@ -81,21 +81,21 @@ TEST_F(TestFutureImpl, AttachWithPendingFlush) {
 }
 
 TEST_F(TestFutureImpl, Detach) {
-  journal::FutureImplPtr future = create_future("tag", 123, 456);
+  journal::FutureImplPtr future = create_future(234, 123, 456);
   ASSERT_FALSE(future->attach(&m_flush_handler));
   future->detach();
   ASSERT_EQ(0U, m_flush_handler.refs);
 }
 
 TEST_F(TestFutureImpl, DetachImplicit) {
-  journal::FutureImplPtr future = create_future("tag", 123, 456);
+  journal::FutureImplPtr future = create_future(234, 123, 456);
   ASSERT_FALSE(future->attach(&m_flush_handler));
   future.reset();
   ASSERT_EQ(0U, m_flush_handler.refs);
 }
 
 TEST_F(TestFutureImpl, Flush) {
-  journal::FutureImplPtr future = create_future("tag", 123, 456);
+  journal::FutureImplPtr future = create_future(234, 123, 456);
   ASSERT_FALSE(future->attach(&m_flush_handler));
 
   C_SaferCond cond;
@@ -107,7 +107,7 @@ TEST_F(TestFutureImpl, Flush) {
 }
 
 TEST_F(TestFutureImpl, FlushWithoutContext) {
-  journal::FutureImplPtr future = create_future("tag", 123, 456);
+  journal::FutureImplPtr future = create_future(234, 123, 456);
   ASSERT_FALSE(future->attach(&m_flush_handler));
 
   future->flush(NULL);
@@ -118,9 +118,9 @@ TEST_F(TestFutureImpl, FlushWithoutContext) {
 }
 
 TEST_F(TestFutureImpl, FlushChain) {
-  journal::FutureImplPtr future1 = create_future("tag1", 123, 456);
-  journal::FutureImplPtr future2 = create_future("tag1", 124, 457, future1);
-  journal::FutureImplPtr future3 = create_future("tag2", 1, 458, future2);
+  journal::FutureImplPtr future1 = create_future(234, 123, 456);
+  journal::FutureImplPtr future2 = create_future(234, 124, 457, future1);
+  journal::FutureImplPtr future3 = create_future(235, 1, 458, future2);
   ASSERT_FALSE(future1->attach(&m_flush_handler));
   ASSERT_FALSE(future2->attach(&m_flush_handler));
   ASSERT_FALSE(future3->attach(&m_flush_handler));
@@ -144,8 +144,8 @@ TEST_F(TestFutureImpl, FlushChain) {
 }
 
 TEST_F(TestFutureImpl, FlushInProgress) {
-  journal::FutureImplPtr future1 = create_future("tag1", 123, 456);
-  journal::FutureImplPtr future2 = create_future("tag1", 124, 457, future1);
+  journal::FutureImplPtr future1 = create_future(234, 123, 456);
+  journal::FutureImplPtr future2 = create_future(234, 124, 457, future1);
   ASSERT_FALSE(future1->attach(&m_flush_handler));
   ASSERT_FALSE(future2->attach(&m_flush_handler));
 
@@ -159,7 +159,7 @@ TEST_F(TestFutureImpl, FlushInProgress) {
 }
 
 TEST_F(TestFutureImpl, FlushAlreadyComplete) {
-  journal::FutureImplPtr future = create_future("tag1", 123, 456);
+  journal::FutureImplPtr future = create_future(234, 123, 456);
   future->safe(-EIO);
 
   C_SaferCond cond;
@@ -168,7 +168,7 @@ TEST_F(TestFutureImpl, FlushAlreadyComplete) {
 }
 
 TEST_F(TestFutureImpl, Wait) {
-  journal::FutureImplPtr future = create_future("tag", 1, 456);
+  journal::FutureImplPtr future = create_future(234, 1, 456);
 
   C_SaferCond cond;
   future->wait(&cond);
@@ -177,7 +177,7 @@ TEST_F(TestFutureImpl, Wait) {
 }
 
 TEST_F(TestFutureImpl, WaitAlreadyComplete) {
-  journal::FutureImplPtr future = create_future("tag", 1, 456);
+  journal::FutureImplPtr future = create_future(234, 1, 456);
   future->safe(-EEXIST);
 
   C_SaferCond cond;
@@ -186,8 +186,8 @@ TEST_F(TestFutureImpl, WaitAlreadyComplete) {
 }
 
 TEST_F(TestFutureImpl, SafePreservesError) {
-  journal::FutureImplPtr future1 = create_future("tag1", 123, 456);
-  journal::FutureImplPtr future2 = create_future("tag1", 124, 457, future1);
+  journal::FutureImplPtr future1 = create_future(234, 123, 456);
+  journal::FutureImplPtr future2 = create_future(234, 124, 457, future1);
 
   future1->safe(-EIO);
   future2->safe(-EEXIST);
@@ -196,8 +196,8 @@ TEST_F(TestFutureImpl, SafePreservesError) {
 }
 
 TEST_F(TestFutureImpl, ConsistentPreservesError) {
-  journal::FutureImplPtr future1 = create_future("tag1", 123, 456);
-  journal::FutureImplPtr future2 = create_future("tag1", 124, 457, future1);
+  journal::FutureImplPtr future1 = create_future(234, 123, 456);
+  journal::FutureImplPtr future2 = create_future(234, 124, 457, future1);
 
   future2->safe(-EEXIST);
   future1->safe(-EIO);
diff --git a/src/test/journal/test_JournalMetadata.cc b/src/test/journal/test_JournalMetadata.cc
index e0bd918..1a97ae7 100644
--- a/src/test/journal/test_JournalMetadata.cc
+++ b/src/test/journal/test_JournalMetadata.cc
@@ -70,7 +70,7 @@ TEST_F(TestJournalMetadata, SetCommitPositions) {
 
   journal::JournalMetadata::EntryPositions entry_positions;
   entry_positions = {
-    cls::journal::EntryPosition("tag1", 122)};
+    cls::journal::EntryPosition(123, 122)};
   commit_position = journal::JournalMetadata::ObjectSetPosition(1, entry_positions);
 
   C_SaferCond cond;
diff --git a/src/test/journal/test_JournalPlayer.cc b/src/test/journal/test_JournalPlayer.cc
index c4c2f92..6636327 100644
--- a/src/test/journal/test_JournalPlayer.cc
+++ b/src/test/journal/test_JournalPlayer.cc
@@ -31,10 +31,6 @@ public:
     virtual void get() {}
     virtual void put() {}
 
-    virtual bool filter_entry(const std::string &tag) {
-      return false;
-    }
-
     virtual void handle_entries_available() {
       Mutex::Locker locker(lock);
       entries_available = true;
@@ -70,10 +66,10 @@ public:
     return RadosTestFixture::client_commit(oid, "client", position);
   }
 
-  journal::Entry create_entry(const std::string &tag, uint64_t tid) {
+  journal::Entry create_entry(uint64_t tag_tid, uint64_t entry_tid) {
     bufferlist payload_bl;
     payload_bl.append("playload");
-    return journal::Entry(tag, tid, payload_bl);
+    return journal::Entry(tag_tid, entry_tid, payload_bl);
   }
 
   journal::JournalMetadataPtr create_metadata(const std::string &oid) {
@@ -134,9 +130,9 @@ public:
   }
 
   int write_entry(const std::string &oid, uint64_t object_num,
-                  const std::string &tag, uint64_t tid) {
+                  uint64_t tag_tid, uint64_t entry_tid) {
     bufferlist bl;
-    ::encode(create_entry(tag, tid), bl);
+    ::encode(create_entry(tag_tid, entry_tid), bl);
     return append(oid + "." + stringify(object_num), bl);
   }
 
@@ -149,7 +145,7 @@ TEST_F(TestJournalPlayer, Prefetch) {
 
   journal::JournalPlayer::EntryPositions positions;
   positions = {
-    cls::journal::EntryPosition("tag1", 122) };
+    cls::journal::EntryPosition(234, 122) };
   cls::journal::ObjectSetPosition commit_position(0, positions);
 
   ASSERT_EQ(0, create(oid));
@@ -161,10 +157,10 @@ TEST_F(TestJournalPlayer, Prefetch) {
 
   journal::JournalPlayer *player = create_player(oid, metadata);
 
-  ASSERT_EQ(0, write_entry(oid, 0, "tag1", 122));
-  ASSERT_EQ(0, write_entry(oid, 1, "tag1", 123));
-  ASSERT_EQ(0, write_entry(oid, 0, "tag1", 124));
-  ASSERT_EQ(0, write_entry(oid, 1, "tag1", 125));
+  ASSERT_EQ(0, write_entry(oid, 0, 234, 122));
+  ASSERT_EQ(0, write_entry(oid, 1, 234, 123));
+  ASSERT_EQ(0, write_entry(oid, 0, 234, 124));
+  ASSERT_EQ(0, write_entry(oid, 1, 234, 125));
 
   player->prefetch();
 
@@ -174,13 +170,13 @@ TEST_F(TestJournalPlayer, Prefetch) {
 
   Entries expected_entries;
   expected_entries = {
-    create_entry("tag1", 123),
-    create_entry("tag1", 124),
-    create_entry("tag1", 125)};
+    create_entry(234, 123),
+    create_entry(234, 124),
+    create_entry(234, 125)};
   ASSERT_EQ(expected_entries, entries);
 
   uint64_t last_tid;
-  ASSERT_TRUE(metadata->get_last_allocated_tid("tag1", &last_tid));
+  ASSERT_TRUE(metadata->get_last_allocated_entry_tid(234, &last_tid));
   ASSERT_EQ(125U, last_tid);
 }
 
@@ -189,7 +185,7 @@ TEST_F(TestJournalPlayer, PrefetchSkip) {
 
   journal::JournalPlayer::EntryPositions positions;
   positions = {
-    cls::journal::EntryPosition("tag1", 125) };
+    cls::journal::EntryPosition(234, 125) };
   cls::journal::ObjectSetPosition commit_position(0, positions);
 
   ASSERT_EQ(0, create(oid));
@@ -201,10 +197,10 @@ TEST_F(TestJournalPlayer, PrefetchSkip) {
 
   journal::JournalPlayer *player = create_player(oid, metadata);
 
-  ASSERT_EQ(0, write_entry(oid, 0, "tag1", 122));
-  ASSERT_EQ(0, write_entry(oid, 1, "tag1", 123));
-  ASSERT_EQ(0, write_entry(oid, 0, "tag1", 124));
-  ASSERT_EQ(0, write_entry(oid, 1, "tag1", 125));
+  ASSERT_EQ(0, write_entry(oid, 0, 234, 122));
+  ASSERT_EQ(0, write_entry(oid, 1, 234, 123));
+  ASSERT_EQ(0, write_entry(oid, 0, 234, 124));
+  ASSERT_EQ(0, write_entry(oid, 1, 234, 125));
 
   player->prefetch();
 
@@ -213,7 +209,7 @@ TEST_F(TestJournalPlayer, PrefetchSkip) {
   ASSERT_TRUE(wait_for_complete(player));
 
   uint64_t last_tid;
-  ASSERT_TRUE(metadata->get_last_allocated_tid("tag1", &last_tid));
+  ASSERT_TRUE(metadata->get_last_allocated_entry_tid(234, &last_tid));
   ASSERT_EQ(125U, last_tid);
 }
 
@@ -231,8 +227,8 @@ TEST_F(TestJournalPlayer, PrefetchWithoutCommit) {
 
   journal::JournalPlayer *player = create_player(oid, metadata);
 
-  ASSERT_EQ(0, write_entry(oid, 0, "tag1", 122));
-  ASSERT_EQ(0, write_entry(oid, 1, "tag1", 123));
+  ASSERT_EQ(0, write_entry(oid, 0, 234, 122));
+  ASSERT_EQ(0, write_entry(oid, 1, 234, 123));
 
   player->prefetch();
 
@@ -242,8 +238,8 @@ TEST_F(TestJournalPlayer, PrefetchWithoutCommit) {
 
   Entries expected_entries;
   expected_entries = {
-    create_entry("tag1", 122),
-    create_entry("tag1", 123)};
+    create_entry(234, 122),
+    create_entry(234, 123)};
   ASSERT_EQ(expected_entries, entries);
 }
 
@@ -252,8 +248,8 @@ TEST_F(TestJournalPlayer, PrefetchMultipleTags) {
 
   journal::JournalPlayer::EntryPositions positions;
   positions = {
-    cls::journal::EntryPosition("tag1", 122),
-    cls::journal::EntryPosition("tag2", 1)};
+    cls::journal::EntryPosition(234, 122),
+    cls::journal::EntryPosition(345, 1)};
   cls::journal::ObjectSetPosition commit_position(0, positions);
 
   ASSERT_EQ(0, create(oid));
@@ -265,14 +261,14 @@ TEST_F(TestJournalPlayer, PrefetchMultipleTags) {
 
   journal::JournalPlayer *player = create_player(oid, metadata);
 
-  ASSERT_EQ(0, write_entry(oid, 0, "tag1", 120));
-  ASSERT_EQ(0, write_entry(oid, 0, "tag2", 0));
-  ASSERT_EQ(0, write_entry(oid, 1, "tag1", 121));
-  ASSERT_EQ(0, write_entry(oid, 1, "tag2", 1));
-  ASSERT_EQ(0, write_entry(oid, 0, "tag1", 122));
-  ASSERT_EQ(0, write_entry(oid, 1, "tag1", 123));
-  ASSERT_EQ(0, write_entry(oid, 0, "tag1", 124));
-  ASSERT_EQ(0, write_entry(oid, 0, "tag2", 2));
+  ASSERT_EQ(0, write_entry(oid, 0, 234, 120));
+  ASSERT_EQ(0, write_entry(oid, 0, 345, 0));
+  ASSERT_EQ(0, write_entry(oid, 1, 234, 121));
+  ASSERT_EQ(0, write_entry(oid, 1, 345, 1));
+  ASSERT_EQ(0, write_entry(oid, 0, 234, 122));
+  ASSERT_EQ(0, write_entry(oid, 1, 234, 123));
+  ASSERT_EQ(0, write_entry(oid, 0, 234, 124));
+  ASSERT_EQ(0, write_entry(oid, 0, 345, 2));
 
   player->prefetch();
 
@@ -281,9 +277,9 @@ TEST_F(TestJournalPlayer, PrefetchMultipleTags) {
   ASSERT_TRUE(wait_for_complete(player));
 
   uint64_t last_tid;
-  ASSERT_TRUE(metadata->get_last_allocated_tid("tag1", &last_tid));
+  ASSERT_TRUE(metadata->get_last_allocated_entry_tid(234, &last_tid));
   ASSERT_EQ(124U, last_tid);
-  ASSERT_TRUE(metadata->get_last_allocated_tid("tag2", &last_tid));
+  ASSERT_TRUE(metadata->get_last_allocated_entry_tid(345, &last_tid));
   ASSERT_EQ(2U, last_tid);
 }
 
@@ -301,10 +297,10 @@ TEST_F(TestJournalPlayer, PrefetchCorruptSequence) {
 
   journal::JournalPlayer *player = create_player(oid, metadata);
 
-  ASSERT_EQ(0, write_entry(oid, 0, "tag1", 120));
-  ASSERT_EQ(0, write_entry(oid, 0, "tag2", 0));
-  ASSERT_EQ(0, write_entry(oid, 1, "tag1", 121));
-  ASSERT_EQ(0, write_entry(oid, 0, "tag1", 124));
+  ASSERT_EQ(0, write_entry(oid, 0, 234, 120));
+  ASSERT_EQ(0, write_entry(oid, 0, 345, 0));
+  ASSERT_EQ(0, write_entry(oid, 1, 234, 121));
+  ASSERT_EQ(0, write_entry(oid, 0, 234, 124));
 
   player->prefetch();
   Entries entries;
@@ -322,7 +318,7 @@ TEST_F(TestJournalPlayer, PrefetchAndWatch) {
 
   journal::JournalPlayer::EntryPositions positions;
   positions = {
-    cls::journal::EntryPosition("tag1", 122)};
+    cls::journal::EntryPosition(234, 122)};
   cls::journal::ObjectSetPosition commit_position(0, positions);
 
   ASSERT_EQ(0, create(oid));
@@ -334,22 +330,22 @@ TEST_F(TestJournalPlayer, PrefetchAndWatch) {
 
   journal::JournalPlayer *player = create_player(oid, metadata);
 
-  ASSERT_EQ(0, write_entry(oid, 0, "tag1", 122));
+  ASSERT_EQ(0, write_entry(oid, 0, 234, 122));
 
   player->prefetch_and_watch(0.25);
 
   Entries entries;
-  ASSERT_EQ(0, write_entry(oid, 1, "tag1", 123));
+  ASSERT_EQ(0, write_entry(oid, 1, 234, 123));
   ASSERT_TRUE(wait_for_entries(player, 1, &entries));
 
   Entries expected_entries;
-  expected_entries = {create_entry("tag1", 123)};
+  expected_entries = {create_entry(234, 123)};
   ASSERT_EQ(expected_entries, entries);
 
-  ASSERT_EQ(0, write_entry(oid, 0, "tag1", 124));
+  ASSERT_EQ(0, write_entry(oid, 0, 234, 124));
   ASSERT_TRUE(wait_for_entries(player, 1, &entries));
 
-  expected_entries = {create_entry("tag1", 124)};
+  expected_entries = {create_entry(234, 124)};
   ASSERT_EQ(expected_entries, entries);
 }
 
@@ -368,11 +364,11 @@ TEST_F(TestJournalPlayer, PrefetchSkippedObject) {
 
   journal::JournalPlayer *player = create_player(oid, metadata);
 
-  ASSERT_EQ(0, write_entry(oid, 0, "tag1", 122));
-  ASSERT_EQ(0, write_entry(oid, 1, "tag1", 123));
-  ASSERT_EQ(0, write_entry(oid, 5, "tag1", 124));
-  ASSERT_EQ(0, write_entry(oid, 6, "tag1", 125));
-  ASSERT_EQ(0, write_entry(oid, 7, "tag1", 126));
+  ASSERT_EQ(0, write_entry(oid, 0, 234, 122));
+  ASSERT_EQ(0, write_entry(oid, 1, 234, 123));
+  ASSERT_EQ(0, write_entry(oid, 5, 234, 124));
+  ASSERT_EQ(0, write_entry(oid, 6, 234, 125));
+  ASSERT_EQ(0, write_entry(oid, 7, 234, 126));
 
   player->prefetch();
 
@@ -382,14 +378,14 @@ TEST_F(TestJournalPlayer, PrefetchSkippedObject) {
 
   Entries expected_entries;
   expected_entries = {
-    create_entry("tag1", 122),
-    create_entry("tag1", 123),
-    create_entry("tag1", 124),
-    create_entry("tag1", 125),
-    create_entry("tag1", 126)};
+    create_entry(234, 122),
+    create_entry(234, 123),
+    create_entry(234, 124),
+    create_entry(234, 125),
+    create_entry(234, 126)};
   ASSERT_EQ(expected_entries, entries);
 
   uint64_t last_tid;
-  ASSERT_TRUE(metadata->get_last_allocated_tid("tag1", &last_tid));
+  ASSERT_TRUE(metadata->get_last_allocated_entry_tid(234, &last_tid));
   ASSERT_EQ(126U, last_tid);
 }
diff --git a/src/test/journal/test_JournalRecorder.cc b/src/test/journal/test_JournalRecorder.cc
index 73b3f35..099c9a2 100644
--- a/src/test/journal/test_JournalRecorder.cc
+++ b/src/test/journal/test_JournalRecorder.cc
@@ -50,7 +50,7 @@ TEST_F(TestJournalRecorder, Append) {
 
   journal::JournalRecorder *recorder = create_recorder(oid, metadata);
 
-  journal::Future future1 = recorder->append("tag1", create_payload("payload"));
+  journal::Future future1 = recorder->append(123, create_payload("payload"));
 
   C_SaferCond cond;
   future1.flush(&cond);
@@ -68,8 +68,8 @@ TEST_F(TestJournalRecorder, AppendKnownOverflow) {
 
   journal::JournalRecorder *recorder = create_recorder(oid, metadata);
 
-  recorder->append("tag1", create_payload(std::string(1 << 12, '1')));
-  journal::Future future2 = recorder->append("tag1", create_payload(std::string(1, '2')));
+  recorder->append(123, create_payload(std::string(1 << 12, '1')));
+  journal::Future future2 = recorder->append(123, create_payload(std::string(1, '2')));
 
   C_SaferCond cond;
   future2.flush(&cond);
@@ -90,10 +90,10 @@ TEST_F(TestJournalRecorder, AppendDelayedOverflow) {
   journal::JournalRecorder *recorder1 = create_recorder(oid, metadata);
   journal::JournalRecorder *recorder2 = create_recorder(oid, metadata);
 
-  recorder1->append("tag1", create_payload(std::string(1, '1')));
-  recorder2->append("tag2", create_payload(std::string(1 << 12, '2')));
+  recorder1->append(123, create_payload(std::string(1, '1')));
+  recorder2->append(234, create_payload(std::string(1 << 12, '2')));
 
-  journal::Future future = recorder2->append("tag1", create_payload(std::string(1, '3')));
+  journal::Future future = recorder2->append(123, create_payload(std::string(1, '3')));
 
   C_SaferCond cond;
   future.flush(&cond);
@@ -112,8 +112,8 @@ TEST_F(TestJournalRecorder, FutureFlush) {
 
   journal::JournalRecorder *recorder = create_recorder(oid, metadata);
 
-  journal::Future future1 = recorder->append("tag1", create_payload("payload1"));
-  journal::Future future2 = recorder->append("tag1", create_payload("payload2"));
+  journal::Future future1 = recorder->append(123, create_payload("payload1"));
+  journal::Future future2 = recorder->append(123, create_payload("payload2"));
 
   C_SaferCond cond;
   future2.flush(&cond);
@@ -132,8 +132,8 @@ TEST_F(TestJournalRecorder, Flush) {
 
   journal::JournalRecorder *recorder = create_recorder(oid, metadata);
 
-  journal::Future future1 = recorder->append("tag1", create_payload("payload1"));
-  journal::Future future2 = recorder->append("tag1", create_payload("payload2"));
+  journal::Future future1 = recorder->append(123, create_payload("payload1"));
+  journal::Future future2 = recorder->append(123, create_payload("payload2"));
 
   C_SaferCond cond1;
   recorder->flush(&cond1);
diff --git a/src/test/journal/test_JournalTrimmer.cc b/src/test/journal/test_JournalTrimmer.cc
index 18572aa..896f80c 100644
--- a/src/test/journal/test_JournalTrimmer.cc
+++ b/src/test/journal/test_JournalTrimmer.cc
@@ -27,7 +27,7 @@ public:
                      const std::string &oid, uint64_t object_num,
                      const std::string &payload, uint64_t *commit_tid) {
     int r = append(oid + "." + stringify(object_num), create_payload(payload));
-    uint64_t tid = metadata->allocate_commit_tid(object_num, "tag", 123);
+    uint64_t tid = metadata->allocate_commit_tid(object_num, 234, 123);
     if (commit_tid != NULL) {
       *commit_tid = tid;
     }
diff --git a/src/test/journal/test_Journaler.cc b/src/test/journal/test_Journaler.cc
index 5a19910..1d45bdc 100644
--- a/src/test/journal/test_Journaler.cc
+++ b/src/test/journal/test_Journaler.cc
@@ -40,7 +40,9 @@ public:
 
   int register_client(const std::string &client_id, const std::string &desc) {
     journal::Journaler journaler(m_ioctx, m_journal_id, client_id, 5);
-    return journaler.register_client(desc);
+    bufferlist data;
+    data.append(desc);
+    return journaler.register_client(data);
   }
 
   static uint64_t _journal_id;
@@ -82,3 +84,50 @@ TEST_F(TestJournaler, RegisterClientDuplicate) {
   ASSERT_EQ(-EEXIST, register_client(CLIENT_ID, "foo2"));
 }
 
+TEST_F(TestJournaler, AllocateTag) {
+  ASSERT_EQ(0, create_journal(12, 8));
+
+  cls::journal::Tag tag;
+
+  bufferlist data;
+  data.append(std::string(128, '1'));
+
+  // allocate a new tag class
+  C_SaferCond ctx1;
+  m_journaler->allocate_tag(data, &tag, &ctx1);
+  ASSERT_EQ(0, ctx1.wait());
+  ASSERT_EQ(cls::journal::Tag(0, 0, data), tag);
+
+  // re-use an existing tag class
+  C_SaferCond ctx2;
+  m_journaler->allocate_tag(tag.tag_class, bufferlist(), &tag, &ctx2);
+  ASSERT_EQ(0, ctx2.wait());
+  ASSERT_EQ(cls::journal::Tag(1, 0, bufferlist()), tag);
+}
+
+TEST_F(TestJournaler, GetTags) {
+  ASSERT_EQ(0, create_journal(12, 8));
+  ASSERT_EQ(0, register_client(CLIENT_ID, "foo"));
+
+  std::list<cls::journal::Tag> expected_tags;
+  for (size_t i = 0; i < 256; ++i) {
+    C_SaferCond ctx;
+    cls::journal::Tag tag;
+    if (i < 2) {
+      m_journaler->allocate_tag(bufferlist(), &tag, &ctx);
+    } else {
+      m_journaler->allocate_tag(i % 2, bufferlist(), &tag, &ctx);
+    }
+    ASSERT_EQ(0, ctx.wait());
+
+    if (i % 2 == 0) {
+      expected_tags.push_back(tag);
+    }
+  }
+
+  std::list<cls::journal::Tag> tags;
+  C_SaferCond ctx;
+  m_journaler->get_tags(0, &tags, &ctx);
+  ASSERT_EQ(0, ctx.wait());
+  ASSERT_EQ(expected_tags, tags);
+}
diff --git a/src/test/journal/test_ObjectPlayer.cc b/src/test/journal/test_ObjectPlayer.cc
index 3e9a2af..86bf3ca 100644
--- a/src/test/journal/test_ObjectPlayer.cc
+++ b/src/test/journal/test_ObjectPlayer.cc
@@ -27,8 +27,8 @@ public:
 TEST_F(TestObjectPlayer, Fetch) {
   std::string oid = get_temp_oid();
 
-  journal::Entry entry1("tag1", 123, create_payload(std::string(24, '1')));
-  journal::Entry entry2("tag1", 124, create_payload(std::string(24, '1')));
+  journal::Entry entry1(234, 123, create_payload(std::string(24, '1')));
+  journal::Entry entry2(234, 124, create_payload(std::string(24, '1')));
 
   bufferlist bl;
   ::encode(entry1, bl);
@@ -52,9 +52,9 @@ TEST_F(TestObjectPlayer, Fetch) {
 TEST_F(TestObjectPlayer, FetchLarge) {
   std::string oid = get_temp_oid();
 
-  journal::Entry entry1("tag1", 123,
+  journal::Entry entry1(234, 123,
                         create_payload(std::string(8192 - 33, '1')));
-  journal::Entry entry2("tag1", 124, create_payload(""));
+  journal::Entry entry2(234, 124, create_payload(""));
 
   bufferlist bl;
   ::encode(entry1, bl);
@@ -78,8 +78,8 @@ TEST_F(TestObjectPlayer, FetchLarge) {
 TEST_F(TestObjectPlayer, FetchDeDup) {
   std::string oid = get_temp_oid();
 
-  journal::Entry entry1("tag1", 123, create_payload(std::string(24, '1')));
-  journal::Entry entry2("tag1", 123, create_payload(std::string(24, '2')));
+  journal::Entry entry1(234, 123, create_payload(std::string(24, '1')));
+  journal::Entry entry2(234, 123, create_payload(std::string(24, '2')));
 
   bufferlist bl;
   ::encode(entry1, bl);
@@ -128,8 +128,8 @@ TEST_F(TestObjectPlayer, FetchError) {
 TEST_F(TestObjectPlayer, FetchCorrupt) {
   std::string oid = get_temp_oid();
 
-  journal::Entry entry1("tag1", 123, create_payload(std::string(24, '1')));
-  journal::Entry entry2("tag1", 124, create_payload(std::string(24, '2')));
+  journal::Entry entry1(234, 123, create_payload(std::string(24, '1')));
+  journal::Entry entry2(234, 124, create_payload(std::string(24, '2')));
 
   bufferlist bl;
   ::encode(entry1, bl);
@@ -154,8 +154,8 @@ TEST_F(TestObjectPlayer, FetchCorrupt) {
 TEST_F(TestObjectPlayer, FetchAppend) {
   std::string oid = get_temp_oid();
 
-  journal::Entry entry1("tag1", 123, create_payload(std::string(24, '1')));
-  journal::Entry entry2("tag1", 124, create_payload(std::string(24, '2')));
+  journal::Entry entry1(234, 123, create_payload(std::string(24, '1')));
+  journal::Entry entry2(234, 124, create_payload(std::string(24, '2')));
 
   bufferlist bl;
   ::encode(entry1, bl);
@@ -192,8 +192,8 @@ TEST_F(TestObjectPlayer, FetchAppend) {
 TEST_F(TestObjectPlayer, PopEntry) {
   std::string oid = get_temp_oid();
 
-  journal::Entry entry1("tag1", 123, create_payload(std::string(24, '1')));
-  journal::Entry entry2("tag1", 124, create_payload(std::string(24, '1')));
+  journal::Entry entry1(234, 123, create_payload(std::string(24, '1')));
+  journal::Entry entry2(234, 124, create_payload(std::string(24, '1')));
 
   bufferlist bl;
   ::encode(entry1, bl);
@@ -227,8 +227,8 @@ TEST_F(TestObjectPlayer, Watch) {
   C_SaferCond cond1;
   object->watch(&cond1, 0.1);
 
-  journal::Entry entry1("tag1", 123, create_payload(std::string(24, '1')));
-  journal::Entry entry2("tag1", 124, create_payload(std::string(24, '1')));
+  journal::Entry entry1(234, 123, create_payload(std::string(24, '1')));
+  journal::Entry entry2(234, 124, create_payload(std::string(24, '1')));
 
   bufferlist bl;
   ::encode(entry1, bl);
diff --git a/src/test/journal/test_ObjectRecorder.cc b/src/test/journal/test_ObjectRecorder.cc
index 8c27430..9d3d0e5 100644
--- a/src/test/journal/test_ObjectRecorder.cc
+++ b/src/test/journal/test_ObjectRecorder.cc
@@ -80,11 +80,10 @@ public:
     m_flush_age = i;
   }
 
-  journal::AppendBuffer create_append_buffer(const std::string &tag,
-                                             uint64_t tid,
+  journal::AppendBuffer create_append_buffer(uint64_t tag_tid, uint64_t entry_tid,
                                              const std::string &payload) {
     journal::FutureImplPtr future(new journal::FutureImpl(*m_finisher,
-                                                          tag, tid, 456));
+                                                          tag_tid, entry_tid, 456));
     future->init(journal::FutureImplPtr());
 
     bufferlist bl;
@@ -107,14 +106,14 @@ TEST_F(TestObjectRecorder, Append) {
 
   journal::ObjectRecorderPtr object = create_object(oid, 24);
 
-  journal::AppendBuffer append_buffer1 = create_append_buffer("tag", 123,
+  journal::AppendBuffer append_buffer1 = create_append_buffer(234, 123,
                                                              "payload");
   journal::AppendBuffers append_buffers;
   append_buffers = {append_buffer1};
   ASSERT_FALSE(object->append(append_buffers));
   ASSERT_EQ(1U, object->get_pending_appends());
 
-  journal::AppendBuffer append_buffer2 = create_append_buffer("tag", 124,
+  journal::AppendBuffer append_buffer2 = create_append_buffer(234, 124,
                                                              "payload");
   append_buffers = {append_buffer2};
   ASSERT_FALSE(object->append(append_buffers));
@@ -132,14 +131,14 @@ TEST_F(TestObjectRecorder, AppendFlushByCount) {
   set_flush_interval(2);
   journal::ObjectRecorderPtr object = create_object(oid, 24);
 
-  journal::AppendBuffer append_buffer1 = create_append_buffer("tag", 123,
+  journal::AppendBuffer append_buffer1 = create_append_buffer(234, 123,
                                                              "payload");
   journal::AppendBuffers append_buffers;
   append_buffers = {append_buffer1};
   ASSERT_FALSE(object->append(append_buffers));
   ASSERT_EQ(1U, object->get_pending_appends());
 
-  journal::AppendBuffer append_buffer2 = create_append_buffer("tag", 124,
+  journal::AppendBuffer append_buffer2 = create_append_buffer(234, 124,
                                                              "payload");
   append_buffers = {append_buffer2};
   ASSERT_FALSE(object->append(append_buffers));
@@ -156,14 +155,14 @@ TEST_F(TestObjectRecorder, AppendFlushByBytes) {
   set_flush_bytes(10);
   journal::ObjectRecorderPtr object = create_object(oid, 24);
 
-  journal::AppendBuffer append_buffer1 = create_append_buffer("tag", 123,
+  journal::AppendBuffer append_buffer1 = create_append_buffer(234, 123,
                                                              "payload");
   journal::AppendBuffers append_buffers;
   append_buffers = {append_buffer1};
   ASSERT_FALSE(object->append(append_buffers));
   ASSERT_EQ(1U, object->get_pending_appends());
 
-  journal::AppendBuffer append_buffer2 = create_append_buffer("tag", 124,
+  journal::AppendBuffer append_buffer2 = create_append_buffer(234, 124,
                                                              "payload");
   append_buffers = {append_buffer2};
   ASSERT_FALSE(object->append(append_buffers));
@@ -180,13 +179,13 @@ TEST_F(TestObjectRecorder, AppendFlushByAge) {
   set_flush_age(0.1);
   journal::ObjectRecorderPtr object = create_object(oid, 24);
 
-  journal::AppendBuffer append_buffer1 = create_append_buffer("tag", 123,
+  journal::AppendBuffer append_buffer1 = create_append_buffer(234, 123,
                                                              "payload");
   journal::AppendBuffers append_buffers;
   append_buffers = {append_buffer1};
   ASSERT_FALSE(object->append(append_buffers));
 
-  journal::AppendBuffer append_buffer2 = create_append_buffer("tag", 124,
+  journal::AppendBuffer append_buffer2 = create_append_buffer(234, 124,
                                                              "payload");
   append_buffers = {append_buffer2};
   ASSERT_FALSE(object->append(append_buffers));
@@ -203,13 +202,13 @@ TEST_F(TestObjectRecorder, AppendFilledObject) {
   journal::ObjectRecorderPtr object = create_object(oid, 12);
 
   std::string payload(2048, '1');
-  journal::AppendBuffer append_buffer1 = create_append_buffer("tag", 123,
+  journal::AppendBuffer append_buffer1 = create_append_buffer(234, 123,
                                                               payload);
   journal::AppendBuffers append_buffers;
   append_buffers = {append_buffer1};
   ASSERT_FALSE(object->append(append_buffers));
 
-  journal::AppendBuffer append_buffer2 = create_append_buffer("tag", 124,
+  journal::AppendBuffer append_buffer2 = create_append_buffer(234, 124,
                                                               payload);
   append_buffers = {append_buffer2};
   ASSERT_TRUE(object->append(append_buffers));
@@ -225,7 +224,7 @@ TEST_F(TestObjectRecorder, Flush) {
 
   journal::ObjectRecorderPtr object = create_object(oid, 24);
 
-  journal::AppendBuffer append_buffer1 = create_append_buffer("tag", 123,
+  journal::AppendBuffer append_buffer1 = create_append_buffer(234, 123,
                                                              "payload");
   journal::AppendBuffers append_buffers;
   append_buffers = {append_buffer1};
@@ -247,7 +246,7 @@ TEST_F(TestObjectRecorder, FlushFuture) {
 
   journal::ObjectRecorderPtr object = create_object(oid, 24);
 
-  journal::AppendBuffer append_buffer = create_append_buffer("tag", 123,
+  journal::AppendBuffer append_buffer = create_append_buffer(234, 123,
                                                              "payload");
   journal::AppendBuffers append_buffers;
   append_buffers = {append_buffer};
@@ -267,7 +266,7 @@ TEST_F(TestObjectRecorder, FlushDetachedFuture) {
 
   journal::ObjectRecorderPtr object = create_object(oid, 24);
 
-  journal::AppendBuffer append_buffer = create_append_buffer("tag", 123,
+  journal::AppendBuffer append_buffer = create_append_buffer(234, 123,
                                                              "payload");
 
   journal::AppendBuffers append_buffers;
@@ -290,9 +289,9 @@ TEST_F(TestObjectRecorder, Overflow) {
   journal::ObjectRecorderPtr object2 = create_object(oid, 12);
 
   std::string payload(2048, '1');
-  journal::AppendBuffer append_buffer1 = create_append_buffer("tag", 123,
+  journal::AppendBuffer append_buffer1 = create_append_buffer(234, 123,
                                                               payload);
-  journal::AppendBuffer append_buffer2 = create_append_buffer("tag", 124,
+  journal::AppendBuffer append_buffer2 = create_append_buffer(234, 124,
                                                               payload);
   journal::AppendBuffers append_buffers;
   append_buffers = {append_buffer1, append_buffer2};
@@ -303,7 +302,7 @@ TEST_F(TestObjectRecorder, Overflow) {
   ASSERT_EQ(0, cond.wait());
   ASSERT_EQ(0U, object1->get_pending_appends());
 
-  journal::AppendBuffer append_buffer3 = create_append_buffer("bar", 123,
+  journal::AppendBuffer append_buffer3 = create_append_buffer(456, 123,
                                                               payload);
   append_buffers = {append_buffer3};
 
diff --git a/src/test/librados/TestCase.cc b/src/test/librados/TestCase.cc
index d52d4d9..5efb891 100644
--- a/src/test/librados/TestCase.cc
+++ b/src/test/librados/TestCase.cc
@@ -25,7 +25,9 @@ void RadosTestNS::SetUp()
 {
   cluster = RadosTestNS::s_cluster;
   ASSERT_EQ(0, rados_ioctx_create(cluster, pool_name.c_str(), &ioctx));
-  ASSERT_FALSE(rados_ioctx_pool_requires_alignment(ioctx));
+  int requires;
+  ASSERT_EQ(0, rados_ioctx_pool_requires_alignment2(ioctx, &requires));
+  ASSERT_FALSE(requires);
 }
 
 void RadosTestNS::TearDown()
@@ -71,7 +73,9 @@ void RadosTestPPNS::TearDownTestCase()
 void RadosTestPPNS::SetUp()
 {
   ASSERT_EQ(0, cluster.ioctx_create(pool_name.c_str(), ioctx));
-  ASSERT_FALSE(ioctx.pool_requires_alignment());
+  bool requires;
+  ASSERT_EQ(0, ioctx.pool_requires_alignment2(&requires));
+  ASSERT_FALSE(requires);
 }
 
 void RadosTestPPNS::TearDown()
@@ -151,7 +155,9 @@ void RadosTestParamPPNS::SetUp()
   }
 
   ASSERT_EQ(0, cluster.ioctx_create(pool_name.c_str(), ioctx));
-  ASSERT_FALSE(ioctx.pool_requires_alignment());
+  bool requires;
+  ASSERT_EQ(0, ioctx.pool_requires_alignment2(&requires));
+  ASSERT_FALSE(requires);
 }
 
 void RadosTestParamPPNS::TearDown()
@@ -191,9 +197,11 @@ void RadosTestECNS::SetUp()
 {
   cluster = RadosTestECNS::s_cluster;
   ASSERT_EQ(0, rados_ioctx_create(cluster, pool_name.c_str(), &ioctx));
-  ASSERT_TRUE(rados_ioctx_pool_requires_alignment(ioctx));
-  alignment = rados_ioctx_pool_required_alignment(ioctx);
-  ASSERT_NE((unsigned)0, alignment);
+  int requires;
+  ASSERT_EQ(0, rados_ioctx_pool_requires_alignment2(ioctx, &requires));
+  ASSERT_TRUE(requires);
+  ASSERT_EQ(0, rados_ioctx_pool_required_alignment2(ioctx, &alignment));
+  ASSERT_NE(0U, alignment);
 }
 
 void RadosTestECNS::TearDown()
@@ -219,9 +227,11 @@ void RadosTestECPPNS::TearDownTestCase()
 void RadosTestECPPNS::SetUp()
 {
   ASSERT_EQ(0, cluster.ioctx_create(pool_name.c_str(), ioctx));
-  ASSERT_TRUE(ioctx.pool_requires_alignment());
-  alignment = ioctx.pool_required_alignment();
-  ASSERT_NE((unsigned)0, alignment);
+  bool requires;
+  ASSERT_EQ(0, ioctx.pool_requires_alignment2(&requires));
+  ASSERT_TRUE(requires);
+  ASSERT_EQ(0, ioctx.pool_required_alignment2(&alignment));
+  ASSERT_NE(0U, alignment);
 }
 
 void RadosTestECPPNS::TearDown()
@@ -250,7 +260,9 @@ void RadosTest::SetUp()
   ASSERT_EQ(0, rados_ioctx_create(cluster, pool_name.c_str(), &ioctx));
   nspace = get_temp_pool_name();
   rados_ioctx_set_namespace(ioctx, nspace.c_str());
-  ASSERT_FALSE(rados_ioctx_pool_requires_alignment(ioctx));
+  int requires;
+  ASSERT_EQ(0, rados_ioctx_pool_requires_alignment2(ioctx, &requires));
+  ASSERT_FALSE(requires);
 }
 
 void RadosTest::TearDown()
@@ -303,7 +315,9 @@ void RadosTestPP::SetUp()
   ASSERT_EQ(0, cluster.ioctx_create(pool_name.c_str(), ioctx));
   nspace = get_temp_pool_name();
   ioctx.set_namespace(nspace);
-  ASSERT_FALSE(ioctx.pool_requires_alignment());
+  bool requires;
+  ASSERT_EQ(0, ioctx.pool_requires_alignment2(&requires));
+  ASSERT_FALSE(requires);
 }
 
 void RadosTestPP::TearDown()
@@ -398,7 +412,9 @@ void RadosTestParamPP::SetUp()
   ASSERT_EQ(0, cluster.ioctx_create(pool_name.c_str(), ioctx));
   nspace = get_temp_pool_name();
   ioctx.set_namespace(nspace);
-  ASSERT_FALSE(ioctx.pool_requires_alignment());
+  bool requires;
+  ASSERT_EQ(0, ioctx.pool_requires_alignment2(&requires));
+  ASSERT_FALSE(requires);
 }
 
 void RadosTestParamPP::TearDown()
@@ -446,9 +462,11 @@ void RadosTestEC::SetUp()
   ASSERT_EQ(0, rados_ioctx_create(cluster, pool_name.c_str(), &ioctx));
   nspace = get_temp_pool_name();
   rados_ioctx_set_namespace(ioctx, nspace.c_str());
-  ASSERT_TRUE(rados_ioctx_pool_requires_alignment(ioctx));
-  alignment = rados_ioctx_pool_required_alignment(ioctx);
-  ASSERT_NE((unsigned)0, alignment);
+  int requires;
+  ASSERT_EQ(0, rados_ioctx_pool_requires_alignment2(ioctx, &requires));
+  ASSERT_TRUE(requires);
+  ASSERT_EQ(0, rados_ioctx_pool_required_alignment2(ioctx, &alignment));
+  ASSERT_NE(0U, alignment);
 }
 
 void RadosTestEC::TearDown()
@@ -477,9 +495,11 @@ void RadosTestECPP::SetUp()
   ASSERT_EQ(0, cluster.ioctx_create(pool_name.c_str(), ioctx));
   nspace = get_temp_pool_name();
   ioctx.set_namespace(nspace);
-  ASSERT_TRUE(ioctx.pool_requires_alignment());
-  alignment = ioctx.pool_required_alignment();
-  ASSERT_NE((unsigned)0, alignment);
+  bool requires;
+  ASSERT_EQ(0, ioctx.pool_requires_alignment2(&requires));
+  ASSERT_TRUE(requires);
+  ASSERT_EQ(0, ioctx.pool_required_alignment2(&alignment));
+  ASSERT_NE(0U, alignment);
 }
 
 void RadosTestECPP::TearDown()
diff --git a/src/test/librados/aio.cc b/src/test/librados/aio.cc
index cbae877..1be4801 100644
--- a/src/test/librados/aio.cc
+++ b/src/test/librados/aio.cc
@@ -2208,9 +2208,12 @@ TEST(LibRadosAioEC, RoundTripAppend) {
   ASSERT_EQ("", test_data.init());
   ASSERT_EQ(0, rados_aio_create_completion((void*)&test_data,
 	      set_completion_completeEC, set_completion_safeEC, &my_completion));
-  ASSERT_TRUE(rados_ioctx_pool_requires_alignment(test_data.m_ioctx));
-  uint64_t alignment = rados_ioctx_pool_required_alignment(test_data.m_ioctx);
-  ASSERT_NE((unsigned)0, alignment);
+  int requires;
+  ASSERT_EQ(0, rados_ioctx_pool_requires_alignment2(test_data.m_ioctx, &requires));
+  ASSERT_NE(0, requires);
+  uint64_t alignment;
+  ASSERT_EQ(0, rados_ioctx_pool_required_alignment2(test_data.m_ioctx, &alignment));
+  ASSERT_NE(0U, alignment);
 
   int bsize = alignment;
   char *buf = (char *)new char[bsize];
@@ -2276,8 +2279,11 @@ TEST(LibRadosAioEC, RoundTripAppendPP) {
 	  (void*)&test_data, set_completion_completeEC, set_completion_safeEC);
   AioCompletion *my_completion_null = NULL;
   ASSERT_NE(my_completion, my_completion_null);
-  ASSERT_TRUE(test_data.m_ioctx.pool_requires_alignment());
-  uint64_t alignment = test_data.m_ioctx.pool_required_alignment();
+  bool requires;
+  ASSERT_EQ(0, test_data.m_ioctx.pool_requires_alignment2(&requires));
+  ASSERT_TRUE(requires);
+  uint64_t alignment;
+  ASSERT_EQ(0, test_data.m_ioctx.pool_required_alignment2(&alignment));
   ASSERT_NE((unsigned)0, alignment);
   int bsize = alignment;
   char *buf = (char *)new char[bsize];
diff --git a/src/test/librados/list.cc b/src/test/librados/list.cc
index eb65ded..39b02b5 100644
--- a/src/test/librados/list.cc
+++ b/src/test/librados/list.cc
@@ -808,7 +808,7 @@ TEST_F(LibRadosListPP, EnumerateObjectsPP) {
     std::vector<ObjectItem> result;
     int r = ioctx.object_list(c, end, 12, &result, &c);
     ASSERT_GE(r, 0);
-    ASSERT_EQ(r, result.size());
+    ASSERT_EQ(r, (int)result.size());
     for (int i = 0; i < r; ++i) {
       auto oid = result[i].oid;
       if (saw_obj.count(oid)) {
diff --git a/src/test/librados/test.cc b/src/test/librados/test.cc
index 469b714..5f3ff2d 100644
--- a/src/test/librados/test.cc
+++ b/src/test/librados/test.cc
@@ -104,7 +104,7 @@ int rados_pool_set(
     rados_t *cluster,
     const std::string &pool_name,
     const std::string &var,
-    const std::string val)
+    const std::string &val)
 {
   JSONFormatter cmd_f;
   cmd_f.open_object_section("command");
@@ -145,14 +145,22 @@ std::string create_one_pool(
   return "";
 }
 
-int destroy_ec_profile(rados_t *cluster, std::ostream &oss)
+int destroy_ec_profile(rados_t *cluster,
+		       const std::string& pool_name,
+		       std::ostream &oss)
 {
+  char buf[1000];
+  snprintf(buf, sizeof(buf),
+	   "{\"prefix\": \"osd erasure-code-profile rm\", \"name\": \"testprofile-%s\"}",
+	   pool_name.c_str());
   char *cmd[2];
-  cmd[0] = (char *)"{\"prefix\": \"osd erasure-code-profile rm\", \"name\": \"testprofile\"}";
+  cmd[0] = buf;
   cmd[1] = NULL;
-  int ret = rados_mon_command(*cluster, (const char **)cmd, 1, "", 0, NULL, 0, NULL, 0);
+  int ret = rados_mon_command(*cluster, (const char **)cmd, 1, "", 0, NULL,
+			      0, NULL, 0);
   if (ret)
-    oss << "rados_mon_command: erasure-code-profile rm testprofile failed with error " << ret;
+    oss << "rados_mon_command: erasure-code-profile rm testprofile-"
+	<< pool_name << " failed with error " << ret;
   return ret;
 }
 
@@ -176,7 +184,7 @@ int destroy_ec_profile_and_ruleset(rados_t *cluster,
                                    std::ostream &oss)
 {
   int ret;
-  ret = destroy_ec_profile(cluster, oss);
+  ret = destroy_ec_profile(cluster, ruleset, oss);
   if (ret)
     return ret;
   return destroy_ruleset(cluster, ruleset, oss);
@@ -228,21 +236,21 @@ std::string create_one_ec_pool(const std::string &pool_name, rados_t *cluster)
   char *cmd[2];
   cmd[1] = NULL;
 
-  std::string profile_create = "{\"prefix\": \"osd erasure-code-profile set\", \"name\": \"testprofile\", \"profile\": [ \"k=2\", \"m=1\", \"ruleset-failure-domain=osd\"]}";
+  std::string profile_create = "{\"prefix\": \"osd erasure-code-profile set\", \"name\": \"testprofile-" + pool_name + "\", \"profile\": [ \"k=2\", \"m=1\", \"ruleset-failure-domain=osd\"]}";
   cmd[0] = (char *)profile_create.c_str();
   ret = rados_mon_command(*cluster, (const char **)cmd, 1, "", 0, NULL, 0, NULL, 0);
   if (ret) {
     rados_shutdown(*cluster);
-    oss << "rados_mon_command erasure-code-profile set name:testprofile failed with error " << ret;
+    oss << "rados_mon_command erasure-code-profile set name:testprofile-" << pool_name << " failed with error " << ret;
     return oss.str();
   }
 
   std::string cmdstr = "{\"prefix\": \"osd pool create\", \"pool\": \"" +
-     pool_name + "\", \"pool_type\":\"erasure\", \"pg_num\":8, \"pgp_num\":8, \"erasure_code_profile\":\"testprofile\"}";
+     pool_name + "\", \"pool_type\":\"erasure\", \"pg_num\":8, \"pgp_num\":8, \"erasure_code_profile\":\"testprofile-" + pool_name + "\"}";
   cmd[0] = (char *)cmdstr.c_str();
   ret = rados_mon_command(*cluster, (const char **)cmd, 1, "", 0, NULL, 0, NULL, 0);
   if (ret) {
-    destroy_ec_profile(cluster, oss);
+    destroy_ec_profile(cluster, pool_name, oss);
     rados_shutdown(*cluster);
     oss << "rados_mon_command osd pool create failed with error " << ret;
     return oss.str();
@@ -279,13 +287,14 @@ int destroy_ruleset_pp(Rados &cluster,
   return ret;
 }
 
-int destroy_ec_profile_pp(Rados &cluster, std::ostream &oss)
+int destroy_ec_profile_pp(Rados &cluster, const std::string& pool_name,
+			  std::ostream &oss)
 {
   bufferlist inbl;
-  int ret = cluster.mon_command("{\"prefix\": \"osd erasure-code-profile rm\", \"name\": \"testprofile\"}",
+  int ret = cluster.mon_command("{\"prefix\": \"osd erasure-code-profile rm\", \"name\": \"testprofile-" + pool_name + "\"}",
                                 inbl, NULL, NULL);
   if (ret)
-    oss << "mon_command: osd erasure-code-profile rm testprofile failed with error " << ret << std::endl;
+    oss << "mon_command: osd erasure-code-profile rm testprofile-" << pool_name << " failed with error " << ret << std::endl;
   return ret;
 }
 
@@ -294,7 +303,7 @@ int destroy_ec_profile_and_ruleset_pp(Rados &cluster,
                                       std::ostream &oss)
 {
   int ret;
-  ret = destroy_ec_profile_pp(cluster, oss);
+  ret = destroy_ec_profile_pp(cluster, ruleset, oss);
   if (ret)
     return ret;
   return destroy_ruleset_pp(cluster, ruleset, oss);
@@ -315,20 +324,20 @@ std::string create_one_ec_pool_pp(const std::string &pool_name, Rados &cluster)
 
   bufferlist inbl;
   ret = cluster.mon_command(
-    "{\"prefix\": \"osd erasure-code-profile set\", \"name\": \"testprofile\", \"profile\": [ \"k=2\", \"m=1\", \"ruleset-failure-domain=osd\"]}",
+    "{\"prefix\": \"osd erasure-code-profile set\", \"name\": \"testprofile-" + pool_name + "\", \"profile\": [ \"k=2\", \"m=1\", \"ruleset-failure-domain=osd\"]}",
     inbl, NULL, NULL);
   if (ret) {
     cluster.shutdown();
-    oss << "mon_command erasure-code-profile set name:testprofile failed with error " << ret;
+    oss << "mon_command erasure-code-profile set name:testprofile-" << pool_name << " failed with error " << ret;
     return oss.str();
   }
     
   ret = cluster.mon_command(
-    "{\"prefix\": \"osd pool create\", \"pool\": \"" + pool_name + "\", \"pool_type\":\"erasure\", \"pg_num\":8, \"pgp_num\":8, \"erasure_code_profile\":\"testprofile\"}",
+    "{\"prefix\": \"osd pool create\", \"pool\": \"" + pool_name + "\", \"pool_type\":\"erasure\", \"pg_num\":8, \"pgp_num\":8, \"erasure_code_profile\":\"testprofile-" + pool_name + "\"}",
     inbl, NULL, NULL);
   if (ret) {
     bufferlist inbl;
-    destroy_ec_profile_pp(cluster, oss);
+    destroy_ec_profile_pp(cluster, pool_name, oss);
     cluster.shutdown();
     oss << "mon_command osd pool create pool:" << pool_name << " pool_type:erasure failed with error " << ret;
     return oss.str();
diff --git a/src/test/librados/tier.cc b/src/test/librados/tier.cc
old mode 100644
new mode 100755
index 6517f82..3c57c7f
--- a/src/test/librados/tier.cc
+++ b/src/test/librados/tier.cc
@@ -2158,7 +2158,8 @@ TEST_F(LibRadosTwoPoolsPP, HitSetRead) {
     ASSERT_TRUE(now < hard_stop);
 
     string name = "foo";
-    uint32_t hash = cache_ioctx.get_object_hash_position(name);
+    uint32_t hash; 
+    ASSERT_EQ(0, cache_ioctx.get_object_hash_position2(name, &hash));
     hobject_t oid(sobject_t(name, CEPH_NOSNAP), "", hash,
 		  cluster.pool_lookup(cache_pool_name.c_str()), "");
 
@@ -2284,7 +2285,8 @@ TEST_F(LibRadosTwoPoolsPP, HitSetWrite) {
 
   for (int i=0; i<num; ++i) {
     string n = stringify(i);
-    uint32_t hash = cache_ioctx.get_object_hash_position(n);
+    uint32_t hash;
+    ASSERT_EQ(0, cache_ioctx.get_object_hash_position2(n, &hash));
     hobject_t oid(sobject_t(n, CEPH_NOSNAP), "", hash,
 		  cluster.pool_lookup(cache_pool_name.c_str()), "");
     std::cout << "checking for " << oid << std::endl;
@@ -2333,7 +2335,8 @@ TEST_F(LibRadosTwoPoolsPP, HitSetTrim) {
   time_t first = 0;
   while (true) {
     string name = "foo";
-    uint32_t hash = cache_ioctx.get_object_hash_position(name);
+    uint32_t hash; 
+    ASSERT_EQ(0, cache_ioctx.get_object_hash_position2(name, &hash));
     hobject_t oid(sobject_t(name, CEPH_NOSNAP), "", hash, -1, "");
 
     bufferlist bl;
@@ -3025,10 +3028,12 @@ TEST_F(LibRadosTwoPoolsECPP, PromoteSnap) {
     for (int tries = 0; tries < 5; ++tries) {
       IoCtx cache_ioctx;
       ASSERT_EQ(0, cluster.ioctx_create(cache_pool_name.c_str(), cache_ioctx));
+      uint32_t hash;
+      ASSERT_EQ(0, ioctx.get_object_pg_hash_position2("foo", &hash));
       ostringstream ss;
       ss << "{\"prefix\": \"pg scrub\", \"pgid\": \""
 	 << cache_ioctx.get_id() << "."
-	 << ioctx.get_object_pg_hash_position("foo")
+	 << hash
 	 << "\"}";
       int r = cluster.mon_command(ss.str(), inbl, NULL, NULL);
       if (r == -EAGAIN)
@@ -4547,7 +4552,8 @@ TEST_F(LibRadosTwoPoolsECPP, HitSetRead) {
     ASSERT_TRUE(now < hard_stop);
 
     string name = "foo";
-    uint32_t hash = cache_ioctx.get_object_hash_position(name);
+    uint32_t hash;
+    ASSERT_EQ(0, cache_ioctx.get_object_hash_position2(name, &hash));
     hobject_t oid(sobject_t(name, CEPH_NOSNAP), "", hash,
 		  cluster.pool_lookup(cache_pool_name.c_str()), "");
 
@@ -4691,7 +4697,8 @@ TEST_F(LibRadosTwoPoolsECPP, HitSetTrim) {
 
   while (true) {
     string name = "foo";
-    uint32_t hash = cache_ioctx.get_object_hash_position(name);
+    uint32_t hash;
+    ASSERT_EQ(0, cache_ioctx.get_object_hash_position2(name, &hash));
     hobject_t oid(sobject_t(name, CEPH_NOSNAP), "", hash, -1, "");
 
     bufferlist bl;
diff --git a/src/test/librados_test_stub/TestRadosClient.cc b/src/test/librados_test_stub/TestRadosClient.cc
index 1a9792c..c72aa28 100644
--- a/src/test/librados_test_stub/TestRadosClient.cc
+++ b/src/test/librados_test_stub/TestRadosClient.cc
@@ -84,7 +84,8 @@ private:
 
 TestRadosClient::TestRadosClient(CephContext *cct)
   : m_cct(cct->get()),
-    m_watch_notify(m_cct),
+    m_aio_finisher(new Finisher(m_cct)),
+    m_watch_notify(m_cct, m_aio_finisher),
     m_transaction_lock("TestRadosClient::m_transaction_lock")
 {
   get();
@@ -97,7 +98,6 @@ TestRadosClient::TestRadosClient(CephContext *cct)
   }
 
   // replicate AIO callback processing
-  m_aio_finisher = new Finisher(m_cct);
   m_aio_finisher->start();
 }
 
diff --git a/src/test/librados_test_stub/TestWatchNotify.cc b/src/test/librados_test_stub/TestWatchNotify.cc
index ef8f537..7bd2ef7 100644
--- a/src/test/librados_test_stub/TestWatchNotify.cc
+++ b/src/test/librados_test_stub/TestWatchNotify.cc
@@ -9,40 +9,28 @@
 
 namespace librados {
 
-TestWatchNotify::TestWatchNotify(CephContext *cct)
-  : m_cct(cct), m_finisher(new Finisher(cct)), m_handle(), m_notify_id(),
-    m_file_watcher_lock("librados::TestWatchNotify::m_file_watcher_lock"),
+TestWatchNotify::TestWatchNotify(CephContext *cct, Finisher *finisher)
+  : m_cct(cct), m_finisher(finisher), m_handle(), m_notify_id(),
+    m_lock("librados::TestWatchNotify::m_lock"),
     m_pending_notifies(0) {
   m_cct->get();
-  m_finisher->start();
 }
 
 TestWatchNotify::~TestWatchNotify() {
-  m_finisher->stop();
-  delete m_finisher;
   m_cct->put();
 }
 
-TestWatchNotify::NotifyHandle::NotifyHandle()
-  : pbl(NULL), pending_responses(),
-    lock("TestWatchNotify::NotifyHandle::lock") {
-}
-
-TestWatchNotify::Watcher::Watcher()
-  : lock("TestWatchNotify::Watcher::lock") {
-}
-
 void TestWatchNotify::flush() {
-  Mutex::Locker file_watcher_locker(m_file_watcher_lock);
+  Mutex::Locker locker(m_lock);
   while (m_pending_notifies > 0) {
-    m_file_watcher_cond.Wait(m_file_watcher_lock);
+    m_file_watcher_cond.Wait(m_lock);
   }
 }
 
 int TestWatchNotify::list_watchers(const std::string& o,
                                    std::list<obj_watch_t> *out_watchers) {
+  Mutex::Locker lock(m_lock);
   SharedWatcher watcher = get_watcher(o);
-  RWLock::RLocker l(watcher->lock);
 
   out_watchers->clear();
   for (TestWatchNotify::WatchHandles::iterator it =
@@ -50,7 +38,7 @@ int TestWatchNotify::list_watchers(const std::string& o,
        it != watcher->watch_handles.end(); ++it) {
     obj_watch_t obj;
     strcpy(obj.addr, ":/0");
-    obj.watcher_id = static_cast<int64_t>(it->second.instance_id);
+    obj.watcher_id = static_cast<int64_t>(it->second.gid);
     obj.cookie = it->second.handle;
     obj.timeout_seconds = 30;
     out_watchers->push_back(obj);
@@ -61,20 +49,24 @@ int TestWatchNotify::list_watchers(const std::string& o,
 void TestWatchNotify::aio_notify(const std::string& oid, bufferlist& bl,
                                  uint64_t timeout_ms, bufferlist *pbl,
                                  Context *on_notify) {
-  SharedWatcher watcher = get_watcher(oid);
-  RWLock::WLocker watcher_locker(watcher->lock);
-  Mutex::Locker file_watcher_lock(m_file_watcher_lock);
+  Mutex::Locker lock(m_lock);
   ++m_pending_notifies;
   uint64_t notify_id = ++m_notify_id;
 
+  SharedWatcher watcher = get_watcher(oid);
+
   SharedNotifyHandle notify_handle(new NotifyHandle());
   notify_handle->pbl = pbl;
-
+  notify_handle->on_notify = on_notify;
+  for (auto &watch_handle_pair : watcher->watch_handles) {
+    WatchHandle &watch_handle = watch_handle_pair.second;
+    notify_handle->pending_watcher_ids.insert(std::make_pair(
+      watch_handle.gid, watch_handle.handle));
+  }
   watcher->notify_handles[notify_id] = notify_handle;
 
   FunctionContext *ctx = new FunctionContext(
-      boost::bind(&TestWatchNotify::execute_notify, this,
-                  oid, bl, notify_id, on_notify));
+    boost::bind(&TestWatchNotify::execute_notify, this, oid, bl, notify_id));
   m_finisher->queue(ctx);
 }
 
@@ -88,32 +80,20 @@ int TestWatchNotify::notify(const std::string& oid, bufferlist& bl,
 void TestWatchNotify::notify_ack(const std::string& o, uint64_t notify_id,
                                  uint64_t handle, uint64_t gid,
                                  bufferlist& bl) {
-  SharedWatcher watcher = get_watcher(o);
-
-  RWLock::RLocker l(watcher->lock);
-  NotifyHandles::iterator it = watcher->notify_handles.find(notify_id);
-  if (it == watcher->notify_handles.end()) {
-    return;
-  }
-
-  bufferlist response;
-  response.append(bl);
-
-  SharedNotifyHandle notify_handle = it->second;
-  Mutex::Locker l2(notify_handle->lock);
-  --notify_handle->pending_responses;
-  notify_handle->notify_responses[std::make_pair(gid, handle)] = response;
-  notify_handle->cond.Signal();
+  Mutex::Locker lock(m_lock);
+  WatcherID watcher_id = std::make_pair(gid, handle);
+  ack_notify(o, notify_id, watcher_id, bl);
+  finish_notify(o, notify_id);
 }
 
-int TestWatchNotify::watch(const std::string& o, uint64_t instance_id,
+int TestWatchNotify::watch(const std::string& o, uint64_t gid,
                            uint64_t *handle, librados::WatchCtx *ctx,
                            librados::WatchCtx2 *ctx2) {
+  Mutex::Locker lock(m_lock);
   SharedWatcher watcher = get_watcher(o);
 
-  RWLock::WLocker l(watcher->lock);
   WatchHandle watch_handle;
-  watch_handle.instance_id = instance_id;
+  watch_handle.gid = gid;
   watch_handle.handle = ++m_handle;
   watch_handle.watch_ctx = ctx;
   watch_handle.watch_ctx2 = ctx2;
@@ -124,35 +104,26 @@ int TestWatchNotify::watch(const std::string& o, uint64_t instance_id,
 }
 
 int TestWatchNotify::unwatch(uint64_t handle) {
-
-  SharedWatcher watcher;
-  {
-    Mutex::Locker l(m_file_watcher_lock);
-    for (FileWatchers::iterator it = m_file_watchers.begin();
-         it != m_file_watchers.end(); ++it) {
-      if (it->second->watch_handles.find(handle) !=
-            it->second->watch_handles.end()) {
-        watcher = it->second;
-        break;
+  Mutex::Locker locker(m_lock);
+  for (FileWatchers::iterator it = m_file_watchers.begin();
+       it != m_file_watchers.end(); ++it) {
+    SharedWatcher watcher = it->second;
+
+    WatchHandles::iterator w_it = watcher->watch_handles.find(handle);
+    if (w_it != watcher->watch_handles.end()) {
+      watcher->watch_handles.erase(w_it);
+      if (watcher->watch_handles.empty() && watcher->notify_handles.empty()) {
+        m_file_watchers.erase(it);
       }
+      break;
     }
   }
-
-  if (watcher) {
-    RWLock::WLocker l(watcher->lock);
-    watcher->watch_handles.erase(handle);
-  }
   return 0;
 }
 
 TestWatchNotify::SharedWatcher TestWatchNotify::get_watcher(
     const std::string& oid) {
-  Mutex::Locker l(m_file_watcher_lock);
-  return _get_watcher(oid);
-}
-
-TestWatchNotify::SharedWatcher TestWatchNotify::_get_watcher(
-    const std::string& oid) {
+  assert(m_lock.is_locked());
   SharedWatcher &watcher = m_file_watchers[oid];
   if (!watcher) {
     watcher.reset(new Watcher());
@@ -161,58 +132,101 @@ TestWatchNotify::SharedWatcher TestWatchNotify::_get_watcher(
 }
 
 void TestWatchNotify::execute_notify(const std::string &oid,
-                                     bufferlist &bl, uint64_t notify_id,
-                                     Context *on_notify) {
-  WatchHandles watch_handles;
-  SharedNotifyHandle notify_handle;
-
-  {
-    SharedWatcher watcher = get_watcher(oid);
-    RWLock::RLocker l(watcher->lock);
-
-    NotifyHandles::iterator n_it = watcher->notify_handles.find(notify_id);
-    if (n_it == watcher->notify_handles.end()) {
-      return;
-    }
+                                     bufferlist &bl, uint64_t notify_id) {
+  Mutex::Locker lock(m_lock);
+  SharedWatcher watcher = get_watcher(oid);
+  WatchHandles &watch_handles = watcher->watch_handles;
 
-    watch_handles = watcher->watch_handles;
-    notify_handle = n_it->second;
+  NotifyHandles::iterator n_it = watcher->notify_handles.find(notify_id);
+  if (n_it == watcher->notify_handles.end()) {
+    return;
   }
 
-  utime_t timeout;
-  timeout.set_from_double(ceph_clock_now(m_cct) + 15);
-
-  for (WatchHandles::iterator w_it = watch_handles.begin();
-       w_it != watch_handles.end(); ++w_it) {
-    WatchHandle &watch_handle = w_it->second;
+  SharedNotifyHandle notify_handle = n_it->second;
+  WatcherIDs watcher_ids(notify_handle->pending_watcher_ids);
+  for (WatcherIDs::iterator w_id_it = watcher_ids.begin();
+       w_id_it != watcher_ids.end(); ++w_id_it) {
+    WatcherID watcher_id = *w_id_it;
+    WatchHandles::iterator w_it = watch_handles.find(watcher_id.second);
+    if (w_it == watch_handles.end()) {
+      // client disconnected before notification processed
+      notify_handle->pending_watcher_ids.erase(watcher_id);
+    } else {
+      WatchHandle watch_handle = w_it->second;
+      assert(watch_handle.gid == watcher_id.first);
+      assert(watch_handle.handle == watcher_id.second);
+
+      bufferlist notify_bl;
+      notify_bl.append(bl);
+
+      m_lock.Unlock();
+      if (watch_handle.watch_ctx2 != NULL) {
+        watch_handle.watch_ctx2->handle_notify(notify_id, w_it->first, 0,
+                                               notify_bl);
+      } else if (watch_handle.watch_ctx != NULL) {
+        watch_handle.watch_ctx->notify(0, 0, notify_bl);
+      }
+      m_lock.Lock();
 
-    bufferlist notify_bl;
-    notify_bl.append(bl);
-    if (watch_handle.watch_ctx2 != NULL) {
-      {
-        Mutex::Locker l2(notify_handle->lock);
-        ++notify_handle->pending_responses;
+      if (watch_handle.watch_ctx2 == NULL) {
+        // auto ack old-style watch/notify clients
+        ack_notify(oid, notify_id, watcher_id, bufferlist());
       }
-      watch_handle.watch_ctx2->handle_notify(notify_id, w_it->first, 0,
-                                             notify_bl);
-    } else if (watch_handle.watch_ctx != NULL) {
-      watch_handle.watch_ctx->notify(0, 0, notify_bl);
     }
   }
 
-  {
-    Mutex::Locker l2(notify_handle->lock);
-    while (notify_handle->pending_responses > 0) {
-      notify_handle->cond.WaitUntil(notify_handle->lock, timeout);
-    }
-    if (notify_handle->pbl != NULL) {
-      ::encode(notify_handle->notify_responses, *notify_handle->pbl);
-    }
+  finish_notify(oid, notify_id);
+}
+
+void TestWatchNotify::ack_notify(const std::string &oid,
+                                 uint64_t notify_id,
+                                 const WatcherID &watcher_id,
+                                 const bufferlist &bl) {
+  assert(m_lock.is_locked());
+  SharedWatcher watcher = get_watcher(oid);
+
+  NotifyHandles::iterator it = watcher->notify_handles.find(notify_id);
+  if (it == watcher->notify_handles.end()) {
+    return;
+  }
+
+  bufferlist response;
+  response.append(bl);
+
+  SharedNotifyHandle notify_handle = it->second;
+  notify_handle->notify_responses[watcher_id] = response;
+  notify_handle->pending_watcher_ids.erase(watcher_id);
+}
+
+void TestWatchNotify::finish_notify(const std::string &oid,
+                                    uint64_t notify_id) {
+  assert(m_lock.is_locked());
+  SharedWatcher watcher = get_watcher(oid);
+
+  NotifyHandles::iterator it = watcher->notify_handles.find(notify_id);
+  if (it == watcher->notify_handles.end()) {
+    return;
   }
 
-  on_notify->complete(0);
+  SharedNotifyHandle notify_handle = it->second;
+  if (!notify_handle->pending_watcher_ids.empty()) {
+    return;
+  }
+
+  if (notify_handle->pbl != NULL) {
+    ::encode(notify_handle->notify_responses, *notify_handle->pbl);
+    ::encode(notify_handle->pending_watcher_ids, *notify_handle->pbl);
+  }
+
+  m_lock.Unlock();
+  notify_handle->on_notify->complete(0);
+  m_lock.Lock();
+
+  watcher->notify_handles.erase(notify_id);
+  if (watcher->watch_handles.empty() && watcher->notify_handles.empty()) {
+    m_file_watchers.erase(oid);
+  }
 
-  Mutex::Locker file_watcher_locker(m_file_watcher_lock);
   if (--m_pending_notifies == 0) {
     m_file_watcher_cond.Signal();
   }
diff --git a/src/test/librados_test_stub/TestWatchNotify.h b/src/test/librados_test_stub/TestWatchNotify.h
index 6f99704..a40f560 100644
--- a/src/test/librados_test_stub/TestWatchNotify.h
+++ b/src/test/librados_test_stub/TestWatchNotify.h
@@ -1,4 +1,4 @@
-
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
 // vim: ts=8 sw=2 smarttab
 
 #ifndef CEPH_TEST_WATCH_NOTIFY_H
@@ -7,7 +7,6 @@
 #include "include/rados/librados.hpp"
 #include "common/Cond.h"
 #include "common/Mutex.h"
-#include "common/RWLock.h"
 #include <boost/noncopyable.hpp>
 #include <boost/shared_ptr.hpp>
 #include <list>
@@ -21,21 +20,21 @@ namespace librados {
 
 class TestWatchNotify : boost::noncopyable {
 public:
+  typedef std::pair<uint64_t, uint64_t> WatcherID;
+  typedef std::set<WatcherID> WatcherIDs;
   typedef std::map<std::pair<uint64_t, uint64_t>, bufferlist> NotifyResponses;
 
   struct NotifyHandle {
-    NotifyHandle();
+    WatcherIDs pending_watcher_ids;
     NotifyResponses notify_responses;
-    bufferlist *pbl;
-    size_t pending_responses;
-    Mutex lock;
-    Cond cond;
+    bufferlist *pbl = nullptr;
+    Context *on_notify = nullptr;
   };
   typedef boost::shared_ptr<NotifyHandle> SharedNotifyHandle;
   typedef std::map<uint64_t, SharedNotifyHandle> NotifyHandles;
 
   struct WatchHandle {
-    uint64_t instance_id;
+    uint64_t gid;
     uint64_t handle;
     librados::WatchCtx* watch_ctx;
     librados::WatchCtx2* watch_ctx2;
@@ -44,14 +43,12 @@ public:
   typedef std::map<uint64_t, WatchHandle> WatchHandles;
 
   struct Watcher {
-    Watcher();
     WatchHandles watch_handles;
     NotifyHandles notify_handles;
-    RWLock lock;
   };
   typedef boost::shared_ptr<Watcher> SharedWatcher;
 
-  TestWatchNotify(CephContext *cct);
+  TestWatchNotify(CephContext *cct, Finisher *finisher);
   ~TestWatchNotify();
 
   void flush();
@@ -63,7 +60,7 @@ public:
              uint64_t timeout_ms, bufferlist *pbl);
   void notify_ack(const std::string& o, uint64_t notify_id,
                   uint64_t handle, uint64_t gid, bufferlist& bl);
-  int watch(const std::string& o, uint64_t instance_id, uint64_t *handle,
+  int watch(const std::string& o, uint64_t gid, uint64_t *handle,
             librados::WatchCtx *ctx, librados::WatchCtx2 *ctx2);
   int unwatch(uint64_t handle);
 
@@ -77,17 +74,19 @@ private:
   uint64_t m_handle;
   uint64_t m_notify_id;
 
-  Mutex m_file_watcher_lock;
-  Cond m_file_watcher_cond;
+  Mutex m_lock;
   uint64_t m_pending_notifies;
 
+  Cond m_file_watcher_cond;
   FileWatchers	m_file_watchers;
 
   SharedWatcher get_watcher(const std::string& oid);
-  SharedWatcher _get_watcher(const std::string& oid);
-  void execute_notify(const std::string &oid, bufferlist &bl,
-                      uint64_t notify_id, Context *on_notify);
 
+  void execute_notify(const std::string &oid, bufferlist &bl,
+                      uint64_t notify_id);
+  void ack_notify(const std::string &oid, uint64_t notify_id,
+                  const WatcherID &watcher_id, const bufferlist &bl);
+  void finish_notify(const std::string &oid, uint64_t notify_id);
 };
 
 } // namespace librados
diff --git a/src/test/librbd/exclusive_lock/test_mock_AcquireRequest.cc b/src/test/librbd/exclusive_lock/test_mock_AcquireRequest.cc
index 2732bd8..e899491 100644
--- a/src/test/librbd/exclusive_lock/test_mock_AcquireRequest.cc
+++ b/src/test/librbd/exclusive_lock/test_mock_AcquireRequest.cc
@@ -61,15 +61,9 @@ public:
                   .WillOnce(CompleteContext(0, mock_image_ctx.image_ctx->op_work_queue));
   }
 
-  void expect_lock_object_map(MockImageCtx &mock_image_ctx,
+  void expect_close_object_map(MockImageCtx &mock_image_ctx,
                               MockObjectMap &mock_object_map) {
-    EXPECT_CALL(mock_object_map, lock(_))
-                  .WillOnce(CompleteContext(0, mock_image_ctx.image_ctx->op_work_queue));
-  }
-
-  void expect_unlock_object_map(MockImageCtx &mock_image_ctx,
-                              MockObjectMap &mock_object_map) {
-    EXPECT_CALL(mock_object_map, unlock(_))
+    EXPECT_CALL(mock_object_map, close(_))
                   .WillOnce(CompleteContext(0, mock_image_ctx.image_ctx->op_work_queue));
   }
 
@@ -165,7 +159,6 @@ TEST_F(TestMockExclusiveLockAcquireRequest, Success) {
   expect_test_features(mock_image_ctx, RBD_FEATURE_OBJECT_MAP, true);
   expect_create_object_map(mock_image_ctx, &mock_object_map);
   expect_open_object_map(mock_image_ctx, mock_object_map);
-  expect_lock_object_map(mock_image_ctx, mock_object_map);
 
   MockJournal mock_journal;
   expect_test_features(mock_image_ctx, RBD_FEATURE_JOURNALING, true);
@@ -198,7 +191,6 @@ TEST_F(TestMockExclusiveLockAcquireRequest, SuccessJournalDisabled) {
   expect_test_features(mock_image_ctx, RBD_FEATURE_OBJECT_MAP, true);
   expect_create_object_map(mock_image_ctx, &mock_object_map);
   expect_open_object_map(mock_image_ctx, mock_object_map);
-  expect_lock_object_map(mock_image_ctx, mock_object_map);
 
   expect_test_features(mock_image_ctx, RBD_FEATURE_JOURNALING, false);
 
@@ -257,13 +249,12 @@ TEST_F(TestMockExclusiveLockAcquireRequest, JournalError) {
   expect_test_features(mock_image_ctx, RBD_FEATURE_OBJECT_MAP, true);
   expect_create_object_map(mock_image_ctx, mock_object_map);
   expect_open_object_map(mock_image_ctx, *mock_object_map);
-  expect_lock_object_map(mock_image_ctx, *mock_object_map);
 
   MockJournal *mock_journal = new MockJournal();
   expect_test_features(mock_image_ctx, RBD_FEATURE_JOURNALING, true);
   expect_create_journal(mock_image_ctx, mock_journal);
   expect_open_journal(mock_image_ctx, *mock_journal, -EINVAL);
-  expect_unlock_object_map(mock_image_ctx, *mock_object_map);
+  expect_close_object_map(mock_image_ctx, *mock_object_map);
 
   C_SaferCond acquire_ctx;
   C_SaferCond ctx;
diff --git a/src/test/librbd/exclusive_lock/test_mock_ReleaseRequest.cc b/src/test/librbd/exclusive_lock/test_mock_ReleaseRequest.cc
index c99b361..b32deee 100644
--- a/src/test/librbd/exclusive_lock/test_mock_ReleaseRequest.cc
+++ b/src/test/librbd/exclusive_lock/test_mock_ReleaseRequest.cc
@@ -55,9 +55,9 @@ public:
                   .WillOnce(CompleteContext(r, mock_image_ctx.image_ctx->op_work_queue));
   }
 
-  void expect_unlock_object_map(MockImageCtx &mock_image_ctx,
-                                MockObjectMap &mock_object_map) {
-    EXPECT_CALL(mock_object_map, unlock(_))
+  void expect_close_object_map(MockImageCtx &mock_image_ctx,
+                               MockObjectMap &mock_object_map) {
+    EXPECT_CALL(mock_object_map, close(_))
                   .WillOnce(CompleteContext(0, mock_image_ctx.image_ctx->op_work_queue));
   }
 };
@@ -81,7 +81,7 @@ TEST_F(TestMockExclusiveLockReleaseRequest, Success) {
 
   MockObjectMap *mock_object_map = new MockObjectMap();
   mock_image_ctx.object_map = mock_object_map;
-  expect_unlock_object_map(mock_image_ctx, *mock_object_map);
+  expect_close_object_map(mock_image_ctx, *mock_object_map);
 
   expect_unlock(mock_image_ctx, 0);
 
@@ -110,7 +110,7 @@ TEST_F(TestMockExclusiveLockReleaseRequest, SuccessJournalDisabled) {
 
   MockObjectMap *mock_object_map = new MockObjectMap();
   mock_image_ctx.object_map = mock_object_map;
-  expect_unlock_object_map(mock_image_ctx, *mock_object_map);
+  expect_close_object_map(mock_image_ctx, *mock_object_map);
 
   expect_unlock(mock_image_ctx, 0);
 
diff --git a/src/test/librbd/fsx.cc b/src/test/librbd/fsx.cc
index 7330b99..55a2ba3 100644
--- a/src/test/librbd/fsx.cc
+++ b/src/test/librbd/fsx.cc
@@ -1123,7 +1123,7 @@ check_buffers(char *good_buf, char *temp_buf, unsigned offset, unsigned size)
 					unsigned bad = short_at(&temp_buf[i]);
 				        prt("0x%5x\t0x%04x\t0x%04x", offset,
 				            short_at(&good_buf[offset]), bad);
-					unsigned op = temp_buf[offset & 1 ? i+1 : i];
+					unsigned op = temp_buf[(offset & 1) ? i+1 : i];
 				        prt("\t0x%5x\n", n);
 					if (op)
 						prt("operation# (mod 256) for "
@@ -1397,12 +1397,12 @@ dowrite(unsigned offset, unsigned size)
 		       (debug &&
 		       (monitorstart == -1 ||
 			(offset + size > monitorstart &&
-			(monitorend == -1 || offset <= monitorend))))))
+			 (monitorend == -1 || (long)offset <= monitorend))))))
 		prt("%lu write\t0x%x thru\t0x%x\t(0x%x bytes)\n", testcalls,
 		    offset, offset + size - 1, size);
 
 	ret = ops->write(&ctx, offset, size, good_buf + offset);
-	if (ret != size) {
+	if (ret != (ssize_t)size) {
 		if (ret < 0)
 			prterrcode("dowrite: ops->write", ret);
 		else
@@ -1442,7 +1442,7 @@ dotruncate(unsigned size)
 
 	if ((progressinterval && testcalls % progressinterval == 0) ||
 	    (debug && (monitorstart == -1 || monitorend == -1 ||
-		      size <= monitorend)))
+		       (long)size <= monitorend)))
 		prt("%lu trunc\tfrom 0x%x to 0x%x\n", testcalls, oldsize, size);
 
 	ret = ops->resize(&ctx, size);
@@ -1485,7 +1485,7 @@ do_punch_hole(unsigned offset, unsigned length)
 
 	if ((progressinterval && testcalls % progressinterval == 0) ||
 	    (debug && (monitorstart == -1 || monitorend == -1 ||
-		      end_offset <= monitorend))) {
+		       (long)end_offset <= monitorend))) {
 		prt("%lu punch\tfrom 0x%x to 0x%x, (0x%x bytes)\n", testcalls,
 			offset, offset+length, length);
 	}
diff --git a/src/test/librbd/image/test_mock_RefreshRequest.cc b/src/test/librbd/image/test_mock_RefreshRequest.cc
new file mode 100644
index 0000000..6d8791f
--- /dev/null
+++ b/src/test/librbd/image/test_mock_RefreshRequest.cc
@@ -0,0 +1,711 @@
+// -*- mode:C; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "test/librbd/test_mock_fixture.h"
+#include "test/librbd/test_support.h"
+#include "test/librbd/mock/MockImageCtx.h"
+#include "test/librbd/mock/MockJournal.h"
+#include "test/librbd/mock/MockObjectMap.h"
+#include "test/librados_test_stub/MockTestMemIoCtxImpl.h"
+#include "test/librados_test_stub/MockTestMemRadosClient.h"
+#include "librbd/internal.h"
+#include "librbd/Operations.h"
+#include "librbd/image/RefreshRequest.h"
+#include "librbd/image/RefreshParentRequest.h"
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include <arpa/inet.h>
+#include <list>
+#include <boost/scope_exit.hpp>
+
+namespace librbd {
+namespace image {
+
+template <>
+struct RefreshParentRequest<MockImageCtx> {
+  static RefreshParentRequest* s_instance;
+  static RefreshParentRequest* create(MockImageCtx &mock_image_ctx,
+                                      const parent_info& parent_md,
+                                      Context *on_finish) {
+    assert(s_instance != nullptr);
+    s_instance->on_finish = on_finish;
+    return s_instance;
+  }
+  static bool is_refresh_required(MockImageCtx &mock_image_ctx,
+                                  const parent_info& parent_md) {
+    assert(s_instance != nullptr);
+    return s_instance->is_refresh_required();
+  }
+
+  Context *on_finish = nullptr;
+
+  RefreshParentRequest() {
+    s_instance = this;
+  }
+
+  MOCK_CONST_METHOD0(is_refresh_required, bool());
+  MOCK_METHOD0(send, void());
+  MOCK_METHOD0(apply, void());
+  MOCK_METHOD1(finalize, void(Context *));
+};
+
+RefreshParentRequest<MockImageCtx>* RefreshParentRequest<MockImageCtx>::s_instance = nullptr;
+
+} // namespace image
+} // namespace librbd
+
+// template definitions
+#include "librbd/image/RefreshRequest.cc"
+template class librbd::image::RefreshRequest<librbd::MockImageCtx>;
+
+ACTION_P(TestFeatures, image_ctx) {
+  return ((image_ctx->features & arg0) != 0);
+}
+
+ACTION_P(ShutDownExclusiveLock, image_ctx) {
+  // shutting down exclusive lock will close object map and journal
+  image_ctx->object_map = nullptr;
+  image_ctx->journal = nullptr;
+}
+
+namespace librbd {
+namespace image {
+
+using ::testing::_;
+using ::testing::DoAll;
+using ::testing::DoDefault;
+using ::testing::InSequence;
+using ::testing::Return;
+
+class TestMockImageRefreshRequest : public TestMockFixture {
+public:
+  typedef RefreshRequest<MockImageCtx> MockRefreshRequest;
+  typedef RefreshParentRequest<MockImageCtx> MockRefreshParentRequest;
+
+  void expect_v1_read_header(MockImageCtx &mock_image_ctx, int r) {
+    auto &expect = EXPECT_CALL(get_mock_io_ctx(mock_image_ctx.md_ctx),
+                               read(mock_image_ctx.header_oid, _, _, _));
+    if (r < 0) {
+      expect.WillOnce(Return(r));
+    } else {
+      expect.WillOnce(DoDefault());
+    }
+  }
+
+  void expect_v1_get_snapshots(MockImageCtx &mock_image_ctx, int r) {
+    auto &expect = EXPECT_CALL(get_mock_io_ctx(mock_image_ctx.md_ctx),
+                               exec(mock_image_ctx.header_oid, _, "rbd", "snap_list", _, _, _));
+    if (r < 0) {
+      expect.WillOnce(Return(r));
+    } else {
+      expect.WillOnce(DoDefault());
+    }
+  }
+
+  void expect_v1_get_locks(MockImageCtx &mock_image_ctx, int r) {
+    auto &expect = EXPECT_CALL(get_mock_io_ctx(mock_image_ctx.md_ctx),
+                               exec(mock_image_ctx.header_oid, _, "lock", "get_info", _, _, _));
+    if (r < 0) {
+      expect.WillOnce(Return(r));
+    } else {
+      expect.WillOnce(DoDefault());
+    }
+  }
+
+  void expect_get_mutable_metadata(MockImageCtx &mock_image_ctx, int r) {
+    auto &expect = EXPECT_CALL(get_mock_io_ctx(mock_image_ctx.md_ctx),
+                               exec(mock_image_ctx.header_oid, _, "rbd", "get_size", _, _, _));
+    if (r < 0) {
+      expect.WillOnce(Return(r));
+    } else {
+      expect.WillOnce(DoDefault());
+      EXPECT_CALL(get_mock_io_ctx(mock_image_ctx.md_ctx),
+                  exec(mock_image_ctx.header_oid, _, "rbd", "get_features", _, _, _))
+                    .WillOnce(DoDefault());
+      EXPECT_CALL(get_mock_io_ctx(mock_image_ctx.md_ctx),
+                  exec(mock_image_ctx.header_oid, _, "rbd", "get_snapcontext", _, _, _))
+                    .WillOnce(DoDefault());
+      EXPECT_CALL(get_mock_io_ctx(mock_image_ctx.md_ctx),
+                  exec(mock_image_ctx.header_oid, _, "rbd", "get_parent", _, _, _))
+                    .WillOnce(DoDefault());
+      EXPECT_CALL(get_mock_io_ctx(mock_image_ctx.md_ctx),
+                  exec(mock_image_ctx.header_oid, _, "lock", "get_info", _, _, _))
+                    .WillOnce(DoDefault());
+    }
+  }
+
+  void expect_get_flags(MockImageCtx &mock_image_ctx, int r) {
+    auto &expect = EXPECT_CALL(get_mock_io_ctx(mock_image_ctx.md_ctx),
+                               exec(mock_image_ctx.header_oid, _, "rbd", "get_flags", _, _, _));
+    if (r < 0) {
+      expect.WillOnce(Return(r));
+    } else {
+      expect.WillOnce(DoDefault());
+    }
+  }
+
+  void expect_get_snapshots(MockImageCtx &mock_image_ctx, int r) {
+    auto &expect = EXPECT_CALL(get_mock_io_ctx(mock_image_ctx.md_ctx),
+                               exec(mock_image_ctx.header_oid, _, "rbd", "get_snapshot_name", _, _, _));
+    if (r < 0) {
+      expect.WillOnce(Return(r));
+    } else {
+      expect.WillOnce(DoDefault());
+      EXPECT_CALL(get_mock_io_ctx(mock_image_ctx.md_ctx),
+                  exec(mock_image_ctx.header_oid, _, "rbd", "get_size", _, _, _))
+                    .WillOnce(DoDefault());
+      EXPECT_CALL(get_mock_io_ctx(mock_image_ctx.md_ctx),
+                  exec(mock_image_ctx.header_oid, _, "rbd", "get_parent", _, _, _))
+                    .WillOnce(DoDefault());
+      EXPECT_CALL(get_mock_io_ctx(mock_image_ctx.md_ctx),
+                  exec(mock_image_ctx.header_oid, _, "rbd", "get_protection_status", _, _, _))
+                    .WillOnce(DoDefault());
+    }
+  }
+
+  void expect_add_snap(MockImageCtx &mock_image_ctx,
+                       const std::string &snap_name, uint64_t snap_id) {
+    EXPECT_CALL(mock_image_ctx, add_snap(snap_name, snap_id, _, _, _, _));
+  }
+
+  void expect_init_exclusive_lock(MockImageCtx &mock_image_ctx,
+                                  MockExclusiveLock &mock_exclusive_lock,
+                                  int r) {
+    EXPECT_CALL(mock_image_ctx, create_exclusive_lock())
+                  .WillOnce(Return(&mock_exclusive_lock));
+    EXPECT_CALL(mock_exclusive_lock, init(_))
+                  .WillOnce(CompleteContext(r, mock_image_ctx.image_ctx->op_work_queue));
+  }
+
+  void expect_shut_down_exclusive_lock(MockImageCtx &mock_image_ctx,
+                                       MockExclusiveLock &mock_exclusive_lock,
+                                       int r) {
+    EXPECT_CALL(mock_exclusive_lock, shut_down(_))
+                  .WillOnce(DoAll(ShutDownExclusiveLock(&mock_image_ctx),
+                                  CompleteContext(r, mock_image_ctx.image_ctx->op_work_queue)));
+  }
+
+  void expect_init_layout(MockImageCtx &mock_image_ctx) {
+    EXPECT_CALL(mock_image_ctx, init_layout());
+  }
+
+  void expect_test_features(MockImageCtx &mock_image_ctx) {
+    EXPECT_CALL(mock_image_ctx, test_features(_, _))
+                  .WillRepeatedly(TestFeatures(&mock_image_ctx));
+  }
+
+  void expect_refresh_parent_is_required(MockRefreshParentRequest &mock_refresh_parent_request,
+                                         bool required) {
+    EXPECT_CALL(mock_refresh_parent_request, is_refresh_required())
+                  .WillRepeatedly(Return(required));
+  }
+
+  void expect_refresh_parent_send(MockImageCtx &mock_image_ctx,
+                                  MockRefreshParentRequest &mock_refresh_parent_request,
+                                  int r) {
+    EXPECT_CALL(mock_refresh_parent_request, send())
+                  .WillOnce(FinishRequest(&mock_refresh_parent_request, r,
+                                          &mock_image_ctx));
+  }
+
+  void expect_refresh_parent_apply(MockRefreshParentRequest &mock_refresh_parent_request) {
+    EXPECT_CALL(mock_refresh_parent_request, apply());
+  }
+
+  void expect_refresh_parent_finalize(MockImageCtx &mock_image_ctx,
+                                      MockRefreshParentRequest &mock_refresh_parent_request,
+                                      int r) {
+    EXPECT_CALL(mock_refresh_parent_request, finalize(_))
+                  .WillOnce(CompleteContext(r, mock_image_ctx.image_ctx->op_work_queue));
+  }
+
+  void expect_is_exclusive_lock_owner(MockExclusiveLock &mock_exclusive_lock,
+                                      bool is_owner) {
+    EXPECT_CALL(mock_exclusive_lock, is_lock_owner()).WillOnce(Return(is_owner));
+  }
+
+  void expect_open_journal(MockImageCtx &mock_image_ctx,
+                           MockJournal &mock_journal, int r) {
+    EXPECT_CALL(mock_image_ctx, create_journal())
+                  .WillOnce(Return(&mock_journal));
+    EXPECT_CALL(mock_journal, open(_))
+                  .WillOnce(CompleteContext(r, mock_image_ctx.image_ctx->op_work_queue));
+  }
+
+  void expect_close_journal(MockImageCtx &mock_image_ctx,
+                            MockJournal &mock_journal, int r) {
+    EXPECT_CALL(mock_journal, close(_))
+                  .WillOnce(CompleteContext(r, mock_image_ctx.image_ctx->op_work_queue));
+  }
+
+  void expect_open_object_map(MockImageCtx &mock_image_ctx,
+                              MockObjectMap &mock_object_map, int r) {
+    EXPECT_CALL(mock_image_ctx, create_object_map(_))
+                  .WillOnce(Return(&mock_object_map));
+    EXPECT_CALL(mock_object_map, open(_))
+                  .WillOnce(CompleteContext(r, mock_image_ctx.image_ctx->op_work_queue));
+  }
+
+  void expect_close_object_map(MockImageCtx &mock_image_ctx,
+                               MockObjectMap &mock_object_map, int r) {
+    EXPECT_CALL(mock_object_map, close(_))
+                  .WillOnce(CompleteContext(r, mock_image_ctx.image_ctx->op_work_queue));
+  }
+
+  void expect_get_snap_id(MockImageCtx &mock_image_ctx,
+                          const std::string &snap_name, uint64_t snap_id) {
+    EXPECT_CALL(mock_image_ctx, get_snap_id(snap_name)).WillOnce(Return(snap_id));
+  }
+};
+
+TEST_F(TestMockImageRefreshRequest, SuccessV1) {
+  REQUIRE_FORMAT_V1();
+
+  librbd::ImageCtx *ictx;
+  ASSERT_EQ(0, open_image(m_image_name, &ictx));
+
+  MockImageCtx mock_image_ctx(*ictx);
+  expect_op_work_queue(mock_image_ctx);
+  expect_test_features(mock_image_ctx);
+
+  InSequence seq;
+  expect_v1_read_header(mock_image_ctx, 0);
+  expect_v1_get_snapshots(mock_image_ctx, 0);
+  expect_v1_get_locks(mock_image_ctx, 0);
+  expect_init_layout(mock_image_ctx);
+
+  C_SaferCond ctx;
+  MockRefreshRequest *req = new MockRefreshRequest(mock_image_ctx, &ctx);
+  req->send();
+
+  ASSERT_EQ(0, ctx.wait());
+}
+
+TEST_F(TestMockImageRefreshRequest, SuccessSnapshotV1) {
+  REQUIRE_FORMAT_V1();
+
+  librbd::ImageCtx *ictx;
+  ASSERT_EQ(0, open_image(m_image_name, &ictx));
+  ASSERT_EQ(0, snap_create(*ictx, "snap"));
+
+  MockImageCtx mock_image_ctx(*ictx);
+  expect_op_work_queue(mock_image_ctx);
+  expect_test_features(mock_image_ctx);
+
+  InSequence seq;
+  expect_v1_read_header(mock_image_ctx, 0);
+  expect_v1_get_snapshots(mock_image_ctx, 0);
+  expect_v1_get_locks(mock_image_ctx, 0);
+  expect_init_layout(mock_image_ctx);
+  expect_add_snap(mock_image_ctx, "snap", ictx->snap_ids.begin()->second);
+
+  C_SaferCond ctx;
+  MockRefreshRequest *req = new MockRefreshRequest(mock_image_ctx, &ctx);
+  req->send();
+
+  ASSERT_EQ(0, ctx.wait());
+}
+
+TEST_F(TestMockImageRefreshRequest, SuccessV2) {
+  REQUIRE_FORMAT_V2();
+
+  librbd::ImageCtx *ictx;
+  ASSERT_EQ(0, open_image(m_image_name, &ictx));
+
+  MockImageCtx mock_image_ctx(*ictx);
+  MockRefreshParentRequest mock_refresh_parent_request;
+  MockExclusiveLock mock_exclusive_lock;
+  expect_op_work_queue(mock_image_ctx);
+  expect_test_features(mock_image_ctx);
+
+  InSequence seq;
+  expect_get_mutable_metadata(mock_image_ctx, 0);
+  expect_get_flags(mock_image_ctx, 0);
+  expect_refresh_parent_is_required(mock_refresh_parent_request, false);
+  if (ictx->test_features(RBD_FEATURE_EXCLUSIVE_LOCK)) {
+    expect_init_exclusive_lock(mock_image_ctx, mock_exclusive_lock, 0);
+  }
+
+  C_SaferCond ctx;
+  MockRefreshRequest *req = new MockRefreshRequest(mock_image_ctx, &ctx);
+  req->send();
+
+  ASSERT_EQ(0, ctx.wait());
+}
+
+TEST_F(TestMockImageRefreshRequest, SuccessSnapshotV2) {
+  REQUIRE_FORMAT_V2();
+
+  librbd::ImageCtx *ictx;
+  ASSERT_EQ(0, open_image(m_image_name, &ictx));
+  ASSERT_EQ(0, snap_create(*ictx, "snap"));
+
+  MockImageCtx mock_image_ctx(*ictx);
+  MockRefreshParentRequest mock_refresh_parent_request;
+  MockExclusiveLock mock_exclusive_lock;
+  expect_op_work_queue(mock_image_ctx);
+  expect_test_features(mock_image_ctx);
+
+  InSequence seq;
+  expect_get_mutable_metadata(mock_image_ctx, 0);
+  expect_get_flags(mock_image_ctx, 0);
+  expect_get_flags(mock_image_ctx, 0);
+  expect_get_snapshots(mock_image_ctx, 0);
+  expect_refresh_parent_is_required(mock_refresh_parent_request, false);
+  if (ictx->test_features(RBD_FEATURE_EXCLUSIVE_LOCK)) {
+    expect_init_exclusive_lock(mock_image_ctx, mock_exclusive_lock, 0);
+  }
+  expect_add_snap(mock_image_ctx, "snap", ictx->snap_ids.begin()->second);
+
+  C_SaferCond ctx;
+  MockRefreshRequest *req = new MockRefreshRequest(mock_image_ctx, &ctx);
+  req->send();
+
+  ASSERT_EQ(0, ctx.wait());
+}
+
+TEST_F(TestMockImageRefreshRequest, SuccessSetSnapshotV2) {
+  REQUIRE_FORMAT_V2();
+
+  librbd::ImageCtx *ictx;
+  ASSERT_EQ(0, open_image(m_image_name, &ictx));
+  ASSERT_EQ(0, snap_create(*ictx, "snap"));
+  ASSERT_EQ(0, librbd::snap_set(ictx, "snap"));
+
+  MockImageCtx mock_image_ctx(*ictx);
+  MockRefreshParentRequest mock_refresh_parent_request;
+  MockObjectMap mock_object_map;
+  expect_op_work_queue(mock_image_ctx);
+  expect_test_features(mock_image_ctx);
+
+  InSequence seq;
+  expect_get_mutable_metadata(mock_image_ctx, 0);
+  expect_get_flags(mock_image_ctx, 0);
+  expect_get_flags(mock_image_ctx, 0);
+  expect_get_snapshots(mock_image_ctx, 0);
+  expect_refresh_parent_is_required(mock_refresh_parent_request, false);
+  if (ictx->test_features(RBD_FEATURE_OBJECT_MAP)) {
+    expect_open_object_map(mock_image_ctx, mock_object_map, 0);
+  }
+  expect_add_snap(mock_image_ctx, "snap", ictx->snap_ids.begin()->second);
+  expect_get_snap_id(mock_image_ctx, "snap", ictx->snap_ids.begin()->second);
+
+  C_SaferCond ctx;
+  MockRefreshRequest *req = new MockRefreshRequest(mock_image_ctx, &ctx);
+  req->send();
+
+  ASSERT_EQ(0, ctx.wait());
+}
+
+TEST_F(TestMockImageRefreshRequest, SuccessChild) {
+  REQUIRE_FEATURE(RBD_FEATURE_LAYERING);
+
+  librbd::ImageCtx *ictx;
+  librbd::ImageCtx *ictx2 = nullptr;
+  std::string clone_name = get_temp_image_name();
+
+  ASSERT_EQ(0, open_image(m_image_name, &ictx));
+  ASSERT_EQ(0, snap_create(*ictx, "snap"));
+  ASSERT_EQ(0, snap_protect(*ictx, "snap"));
+  BOOST_SCOPE_EXIT_ALL((&)) {
+    if (ictx2 != nullptr) {
+      close_image(ictx2);
+    }
+
+    librbd::NoOpProgressContext no_op;
+    ASSERT_EQ(0, librbd::remove(m_ioctx, clone_name.c_str(), no_op));
+    ASSERT_EQ(0, ictx->operations->snap_unprotect("snap"));
+  };
+
+  int order = ictx->order;
+  ASSERT_EQ(0, librbd::clone(m_ioctx, m_image_name.c_str(), "snap", m_ioctx,
+                             clone_name.c_str(), ictx->features, &order, 0, 0));
+
+  ASSERT_EQ(0, open_image(clone_name, &ictx2));
+
+  MockImageCtx mock_image_ctx(*ictx2);
+  MockRefreshParentRequest *mock_refresh_parent_request = new MockRefreshParentRequest();
+  MockExclusiveLock mock_exclusive_lock;
+  expect_op_work_queue(mock_image_ctx);
+  expect_test_features(mock_image_ctx);
+
+  InSequence seq;
+  expect_get_mutable_metadata(mock_image_ctx, 0);
+  expect_get_flags(mock_image_ctx, 0);
+  expect_refresh_parent_is_required(*mock_refresh_parent_request, true);
+  expect_refresh_parent_send(mock_image_ctx, *mock_refresh_parent_request, 0);
+  if (ictx->test_features(RBD_FEATURE_EXCLUSIVE_LOCK)) {
+    expect_init_exclusive_lock(mock_image_ctx, mock_exclusive_lock, 0);
+  }
+  expect_refresh_parent_apply(*mock_refresh_parent_request);
+  expect_refresh_parent_finalize(mock_image_ctx, *mock_refresh_parent_request, 0);
+
+  C_SaferCond ctx;
+  MockRefreshRequest *req = new MockRefreshRequest(mock_image_ctx, &ctx);
+  req->send();
+
+  ASSERT_EQ(0, ctx.wait());
+}
+
+TEST_F(TestMockImageRefreshRequest, DisableExclusiveLock) {
+  REQUIRE_FEATURE(RBD_FEATURE_EXCLUSIVE_LOCK);
+
+  librbd::ImageCtx *ictx;
+  ASSERT_EQ(0, open_image(m_image_name, &ictx));
+
+  MockImageCtx mock_image_ctx(*ictx);
+  MockRefreshParentRequest mock_refresh_parent_request;
+
+  MockExclusiveLock *mock_exclusive_lock = new MockExclusiveLock();
+  mock_image_ctx.exclusive_lock = mock_exclusive_lock;
+
+  MockObjectMap mock_object_map;
+  if (ictx->test_features(RBD_FEATURE_OBJECT_MAP)) {
+    mock_image_ctx.object_map = &mock_object_map;
+  }
+
+  MockJournal mock_journal;
+  if (ictx->test_features(RBD_FEATURE_JOURNALING)) {
+    mock_image_ctx.journal = &mock_journal;
+  }
+
+  ASSERT_EQ(0, update_features(ictx,
+                               RBD_FEATURE_EXCLUSIVE_LOCK |
+                               RBD_FEATURE_OBJECT_MAP |
+                               RBD_FEATURE_FAST_DIFF |
+                               RBD_FEATURE_JOURNALING, false));
+
+  expect_op_work_queue(mock_image_ctx);
+  expect_test_features(mock_image_ctx);
+
+  // verify that exclusive lock is properly handled when object map
+  // and journaling were never enabled (or active)
+  InSequence seq;
+  expect_get_mutable_metadata(mock_image_ctx, 0);
+  expect_get_flags(mock_image_ctx, 0);
+  expect_refresh_parent_is_required(mock_refresh_parent_request, false);
+  expect_shut_down_exclusive_lock(mock_image_ctx, *mock_exclusive_lock, 0);
+
+  C_SaferCond ctx;
+  MockRefreshRequest *req = new MockRefreshRequest(mock_image_ctx, &ctx);
+  req->send();
+
+  ASSERT_EQ(0, ctx.wait());
+}
+
+TEST_F(TestMockImageRefreshRequest, EnableJournalWithExclusiveLock) {
+  REQUIRE_FEATURE(RBD_FEATURE_JOURNALING);
+
+  librbd::ImageCtx *ictx;
+  ASSERT_EQ(0, open_image(m_image_name, &ictx));
+
+  ASSERT_EQ(0, update_features(ictx,
+                               RBD_FEATURE_OBJECT_MAP |
+                               RBD_FEATURE_FAST_DIFF, false));
+
+  MockImageCtx mock_image_ctx(*ictx);
+  MockRefreshParentRequest mock_refresh_parent_request;
+
+  MockExclusiveLock mock_exclusive_lock;
+  mock_image_ctx.exclusive_lock = &mock_exclusive_lock;
+
+  MockJournal mock_journal;
+
+  expect_op_work_queue(mock_image_ctx);
+  expect_test_features(mock_image_ctx);
+  expect_is_exclusive_lock_owner(mock_exclusive_lock, true);
+
+  // journal should be immediately opened if exclusive lock owned
+  InSequence seq;
+  expect_get_mutable_metadata(mock_image_ctx, 0);
+  expect_get_flags(mock_image_ctx, 0);
+  expect_refresh_parent_is_required(mock_refresh_parent_request, false);
+  expect_open_journal(mock_image_ctx, mock_journal, 0);
+
+  C_SaferCond ctx;
+  MockRefreshRequest *req = new MockRefreshRequest(mock_image_ctx, &ctx);
+  req->send();
+
+  ASSERT_EQ(0, ctx.wait());
+}
+
+TEST_F(TestMockImageRefreshRequest, EnableJournalWithoutExclusiveLock) {
+  REQUIRE_FEATURE(RBD_FEATURE_JOURNALING);
+
+  librbd::ImageCtx *ictx;
+  ASSERT_EQ(0, open_image(m_image_name, &ictx));
+
+  ASSERT_EQ(0, update_features(ictx,
+                               RBD_FEATURE_OBJECT_MAP |
+                               RBD_FEATURE_FAST_DIFF, false));
+
+  MockImageCtx mock_image_ctx(*ictx);
+  MockRefreshParentRequest mock_refresh_parent_request;
+
+  MockExclusiveLock mock_exclusive_lock;
+  mock_image_ctx.exclusive_lock = &mock_exclusive_lock;
+
+  expect_op_work_queue(mock_image_ctx);
+  expect_test_features(mock_image_ctx);
+  expect_is_exclusive_lock_owner(mock_exclusive_lock, false);
+
+  // do not open the journal if exclusive lock is not owned
+  InSequence seq;
+  expect_get_mutable_metadata(mock_image_ctx, 0);
+  expect_get_flags(mock_image_ctx, 0);
+  expect_refresh_parent_is_required(mock_refresh_parent_request, false);
+
+  C_SaferCond ctx;
+  MockRefreshRequest *req = new MockRefreshRequest(mock_image_ctx, &ctx);
+  req->send();
+
+  ASSERT_EQ(0, ctx.wait());
+}
+
+TEST_F(TestMockImageRefreshRequest, DisableJournal) {
+  REQUIRE_FEATURE(RBD_FEATURE_JOURNALING);
+
+  librbd::ImageCtx *ictx;
+  ASSERT_EQ(0, open_image(m_image_name, &ictx));
+
+  MockImageCtx mock_image_ctx(*ictx);
+  MockRefreshParentRequest mock_refresh_parent_request;
+
+  MockExclusiveLock mock_exclusive_lock;
+  mock_image_ctx.exclusive_lock = &mock_exclusive_lock;
+
+  MockObjectMap mock_object_map;
+  if (ictx->test_features(RBD_FEATURE_OBJECT_MAP)) {
+    mock_image_ctx.object_map = &mock_object_map;
+  }
+
+  MockJournal *mock_journal = new MockJournal();
+  mock_image_ctx.journal = mock_journal;
+
+  ASSERT_EQ(0, update_features(ictx, RBD_FEATURE_JOURNALING, false));
+
+  expect_op_work_queue(mock_image_ctx);
+  expect_test_features(mock_image_ctx);
+
+  // verify journal is closed if feature disabled
+  InSequence seq;
+  expect_get_mutable_metadata(mock_image_ctx, 0);
+  expect_get_flags(mock_image_ctx, 0);
+  expect_refresh_parent_is_required(mock_refresh_parent_request, false);
+  expect_close_journal(mock_image_ctx, *mock_journal, 0);
+
+  C_SaferCond ctx;
+  MockRefreshRequest *req = new MockRefreshRequest(mock_image_ctx, &ctx);
+  req->send();
+
+  ASSERT_EQ(0, ctx.wait());
+}
+
+TEST_F(TestMockImageRefreshRequest, EnableObjectMapWithExclusiveLock) {
+  REQUIRE_FEATURE(RBD_FEATURE_OBJECT_MAP);
+
+  librbd::ImageCtx *ictx;
+  ASSERT_EQ(0, open_image(m_image_name, &ictx));
+
+  ASSERT_EQ(0, update_features(ictx, RBD_FEATURE_JOURNALING, false));
+
+  MockImageCtx mock_image_ctx(*ictx);
+  MockRefreshParentRequest mock_refresh_parent_request;
+
+  MockExclusiveLock mock_exclusive_lock;
+  mock_image_ctx.exclusive_lock = &mock_exclusive_lock;
+
+  MockObjectMap mock_object_map;
+
+  expect_op_work_queue(mock_image_ctx);
+  expect_test_features(mock_image_ctx);
+  expect_is_exclusive_lock_owner(mock_exclusive_lock, true);
+
+  // object map should be immediately opened if exclusive lock owned
+  InSequence seq;
+  expect_get_mutable_metadata(mock_image_ctx, 0);
+  expect_get_flags(mock_image_ctx, 0);
+  expect_refresh_parent_is_required(mock_refresh_parent_request, false);
+  expect_open_object_map(mock_image_ctx, mock_object_map, 0);
+
+  C_SaferCond ctx;
+  MockRefreshRequest *req = new MockRefreshRequest(mock_image_ctx, &ctx);
+  req->send();
+
+  ASSERT_EQ(0, ctx.wait());
+}
+
+TEST_F(TestMockImageRefreshRequest, EnableObjectMapWithoutExclusiveLock) {
+  REQUIRE_FEATURE(RBD_FEATURE_OBJECT_MAP);
+
+  librbd::ImageCtx *ictx;
+  ASSERT_EQ(0, open_image(m_image_name, &ictx));
+
+  ASSERT_EQ(0, update_features(ictx, RBD_FEATURE_JOURNALING, false));
+
+  MockImageCtx mock_image_ctx(*ictx);
+  MockRefreshParentRequest mock_refresh_parent_request;
+
+  MockExclusiveLock mock_exclusive_lock;
+  mock_image_ctx.exclusive_lock = &mock_exclusive_lock;
+
+  expect_op_work_queue(mock_image_ctx);
+  expect_test_features(mock_image_ctx);
+  expect_is_exclusive_lock_owner(mock_exclusive_lock, false);
+
+  // do not open the object map if exclusive lock is not owned
+  InSequence seq;
+  expect_get_mutable_metadata(mock_image_ctx, 0);
+  expect_get_flags(mock_image_ctx, 0);
+  expect_refresh_parent_is_required(mock_refresh_parent_request, false);
+
+  C_SaferCond ctx;
+  MockRefreshRequest *req = new MockRefreshRequest(mock_image_ctx, &ctx);
+  req->send();
+
+  ASSERT_EQ(0, ctx.wait());
+}
+
+TEST_F(TestMockImageRefreshRequest, DisableObjectMap) {
+  REQUIRE_FEATURE(RBD_FEATURE_OBJECT_MAP);
+
+  librbd::ImageCtx *ictx;
+  ASSERT_EQ(0, open_image(m_image_name, &ictx));
+
+  MockImageCtx mock_image_ctx(*ictx);
+  MockRefreshParentRequest mock_refresh_parent_request;
+
+  MockExclusiveLock mock_exclusive_lock;
+  mock_image_ctx.exclusive_lock = &mock_exclusive_lock;
+
+  MockObjectMap *mock_object_map = new MockObjectMap();
+  mock_image_ctx.object_map = mock_object_map;
+
+  MockJournal mock_journal;
+  if (ictx->test_features(RBD_FEATURE_JOURNALING)) {
+    mock_image_ctx.journal = &mock_journal;
+  }
+
+  ASSERT_EQ(0, update_features(ictx,
+                               RBD_FEATURE_OBJECT_MAP |
+                               RBD_FEATURE_FAST_DIFF, false));
+
+  expect_op_work_queue(mock_image_ctx);
+  expect_test_features(mock_image_ctx);
+
+  // verify object map is closed if feature disabled
+  InSequence seq;
+  expect_get_mutable_metadata(mock_image_ctx, 0);
+  expect_get_flags(mock_image_ctx, 0);
+  expect_refresh_parent_is_required(mock_refresh_parent_request, false);
+  expect_close_object_map(mock_image_ctx, *mock_object_map, 0);
+
+  C_SaferCond ctx;
+  MockRefreshRequest *req = new MockRefreshRequest(mock_image_ctx, &ctx);
+  req->send();
+
+  ASSERT_EQ(0, ctx.wait());
+}
+
+} // namespace image
+} // namespace librbd
diff --git a/src/test/librbd/journal/test_Entries.cc b/src/test/librbd/journal/test_Entries.cc
index d651d6f..91c5c78 100644
--- a/src/test/librbd/journal/test_Entries.cc
+++ b/src/test/librbd/journal/test_Entries.cc
@@ -7,7 +7,7 @@
 #include "librbd/AioImageRequestWQ.h"
 #include "librbd/internal.h"
 #include "librbd/Journal.h"
-#include "librbd/journal/Entries.h"
+#include "librbd/journal/Types.h"
 #include "journal/Journaler.h"
 #include "journal/ReplayEntry.h"
 #include "journal/ReplayHandler.h"
@@ -67,7 +67,7 @@ public:
     journal::Journaler *journaler = new journal::Journaler(
       ictx->md_ctx, ictx->id, "dummy client", 1);
 
-    int r = journaler->register_client("unit test client");
+    int r = journaler->register_client(bufferlist());
     if (r < 0) {
       ADD_FAILURE() << "failed to register journal client";
       delete journaler;
diff --git a/src/test/librbd/journal/test_Replay.cc b/src/test/librbd/journal/test_Replay.cc
index f0f0409..c144cbf 100644
--- a/src/test/librbd/journal/test_Replay.cc
+++ b/src/test/librbd/journal/test_Replay.cc
@@ -10,7 +10,7 @@
 #include "librbd/ImageCtx.h"
 #include "librbd/ImageWatcher.h"
 #include "librbd/Journal.h"
-#include "librbd/journal/Entries.h"
+#include "librbd/journal/Types.h"
 
 void register_test_journal_replay() {
 }
diff --git a/src/test/librbd/journal/test_mock_Replay.cc b/src/test/librbd/journal/test_mock_Replay.cc
index 81e8dd1..5302b8f 100644
--- a/src/test/librbd/journal/test_mock_Replay.cc
+++ b/src/test/librbd/journal/test_mock_Replay.cc
@@ -5,8 +5,8 @@
 #include "test/librbd/test_support.h"
 #include "test/librbd/mock/MockImageCtx.h"
 #include "librbd/AioImageRequest.h"
-#include "librbd/journal/Entries.h"
 #include "librbd/journal/Replay.h"
+#include "librbd/journal/Types.h"
 #include "gmock/gmock.h"
 #include "gtest/gtest.h"
 #include <boost/scope_exit.hpp>
diff --git a/src/test/librbd/mock/MockExclusiveLock.h b/src/test/librbd/mock/MockExclusiveLock.h
index 8227d3e..430dbc7 100644
--- a/src/test/librbd/mock/MockExclusiveLock.h
+++ b/src/test/librbd/mock/MockExclusiveLock.h
@@ -17,6 +17,7 @@ struct MockExclusiveLock {
 
   MOCK_METHOD1(assert_header_locked, void(librados::ObjectWriteOperation *));
 
+  MOCK_METHOD1(init, void(Context*));
   MOCK_METHOD1(shut_down, void(Context*));
 };
 
diff --git a/src/test/librbd/mock/MockImageCtx.h b/src/test/librbd/mock/MockImageCtx.h
index 7d198f5..0c9b7a1 100644
--- a/src/test/librbd/mock/MockImageCtx.h
+++ b/src/test/librbd/mock/MockImageCtx.h
@@ -27,21 +27,31 @@ struct MockImageCtx {
   MockImageCtx(librbd::ImageCtx &image_ctx)
     : image_ctx(&image_ctx),
       cct(image_ctx.cct),
+      snap_name(image_ctx.snap_name),
       snap_id(image_ctx.snap_id),
+      snap_exists(image_ctx.snap_exists),
       snapc(image_ctx.snapc),
       snaps(image_ctx.snaps),
       snap_info(image_ctx.snap_info),
+      snap_ids(image_ctx.snap_ids),
       object_cacher(image_ctx.object_cacher),
       old_format(image_ctx.old_format),
       read_only(image_ctx.read_only),
+      lockers(image_ctx.lockers),
+      exclusive_locked(image_ctx.exclusive_locked),
+      lock_tag(image_ctx.lock_tag),
       owner_lock("owner_lock"),
       md_lock("md_lock"),
+      cache_lock("cache_lock"),
       snap_lock("snap_lock"),
       parent_lock("parent_lock"),
       object_map_lock("object_map_lock"),
       async_ops_lock("async_ops_lock"),
+      order(image_ctx.order),
       size(image_ctx.size),
       features(image_ctx.features),
+      flags(image_ctx.flags),
+      object_prefix(image_ctx.object_prefix),
       header_oid(image_ctx.header_oid),
       id(image_ctx.id),
       parent_md(image_ctx.parent_md),
@@ -95,6 +105,8 @@ struct MockImageCtx {
     ctx.wait();
   }
 
+  MOCK_METHOD0(init_layout, void());
+
   MOCK_CONST_METHOD1(get_object_name, std::string(uint64_t));
   MOCK_CONST_METHOD0(get_current_size, uint64_t());
   MOCK_CONST_METHOD1(get_image_size, uint64_t(librados::snap_t));
@@ -120,37 +132,53 @@ struct MockImageCtx {
   MOCK_METHOD1(shut_down_cache, void(Context *));
 
   MOCK_CONST_METHOD1(test_features, bool(uint64_t test_features));
+  MOCK_CONST_METHOD2(test_features, bool(uint64_t test_features,
+                                         const RWLock &in_snap_lock));
 
   MOCK_METHOD1(cancel_async_requests, void(Context*));
 
+  MOCK_METHOD0(create_exclusive_lock, MockExclusiveLock*());
   MOCK_METHOD1(create_object_map, MockObjectMap*(uint64_t));
   MOCK_METHOD0(create_journal, MockJournal*());
 
   ImageCtx *image_ctx;
   CephContext *cct;
 
+  std::string snap_name;
   uint64_t snap_id;
+  bool snap_exists;
+
   ::SnapContext snapc;
   std::vector<librados::snap_t> snaps;
   std::map<librados::snap_t, SnapInfo> snap_info;
+  std::map<std::string, librados::snap_t> snap_ids;
 
   ObjectCacher *object_cacher;
 
   bool old_format;
   bool read_only;
 
+  std::map<rados::cls::lock::locker_id_t,
+           rados::cls::lock::locker_info_t> lockers;
+  bool exclusive_locked;
+  std::string lock_tag;
+
   librados::IoCtx md_ctx;
   librados::IoCtx data_ctx;
 
   RWLock owner_lock;
   RWLock md_lock;
+  Mutex cache_lock;
   RWLock snap_lock;
   RWLock parent_lock;
   RWLock object_map_lock;
   Mutex async_ops_lock;
 
+  uint8_t order;
   uint64_t size;
   uint64_t features;
+  uint64_t flags;
+  std::string object_prefix;
   std::string header_oid;
   std::string id;
   parent_info parent_md;
diff --git a/src/test/librbd/mock/MockJournal.h b/src/test/librbd/mock/MockJournal.h
index b71505e..9fe3518 100644
--- a/src/test/librbd/mock/MockJournal.h
+++ b/src/test/librbd/mock/MockJournal.h
@@ -6,7 +6,7 @@
 
 #include "gmock/gmock.h"
 #include "librbd/Journal.h"
-#include "librbd/journal/Entries.h"
+#include "librbd/journal/Types.h"
 
 namespace librbd {
 
diff --git a/src/test/librbd/mock/MockObjectMap.h b/src/test/librbd/mock/MockObjectMap.h
index 5a0aeec..ae452a5 100644
--- a/src/test/librbd/mock/MockObjectMap.h
+++ b/src/test/librbd/mock/MockObjectMap.h
@@ -12,9 +12,7 @@ struct MockObjectMap {
   MOCK_CONST_METHOD1(enabled, bool(const RWLock &object_map_lock));
 
   MOCK_METHOD1(open, void(Context *on_finish));
-
-  MOCK_METHOD1(lock, void(Context *on_finish));
-  MOCK_METHOD1(unlock, void(Context *on_finish));
+  MOCK_METHOD1(close, void(Context *on_finish));
 
   MOCK_METHOD3(aio_resize, void(uint64_t new_size, uint8_t default_object_state,
                                 Context *on_finish));
diff --git a/src/test/librbd/object_map/test_mock_RefreshRequest.cc b/src/test/librbd/object_map/test_mock_RefreshRequest.cc
index 2af4517..2abfd8e 100644
--- a/src/test/librbd/object_map/test_mock_RefreshRequest.cc
+++ b/src/test/librbd/object_map/test_mock_RefreshRequest.cc
@@ -9,9 +9,38 @@
 #include "common/bit_vector.hpp"
 #include "librbd/ObjectMap.h"
 #include "librbd/object_map/RefreshRequest.h"
+#include "librbd/object_map/LockRequest.h"
+
+namespace librbd {
+namespace object_map {
+
+template <>
+class LockRequest<MockImageCtx> {
+public:
+  static LockRequest *s_instance;
+  static LockRequest *create(MockImageCtx &image_ctx, Context *on_finish) {
+    assert(s_instance != nullptr);
+    s_instance->on_finish = on_finish;
+    return s_instance;
+  }
+
+  Context *on_finish = nullptr;
+
+  LockRequest() {
+    s_instance = this;
+  }
+
+  MOCK_METHOD0(send, void());
+};
+
+LockRequest<MockImageCtx> *LockRequest<MockImageCtx>::s_instance = nullptr;
+
+} // namespace object_map
+} // namespace librbd
 
 // template definitions
 #include "librbd/object_map/RefreshRequest.cc"
+#include "librbd/object_map/LockRequest.cc"
 
 namespace librbd {
 namespace object_map {
@@ -28,10 +57,19 @@ public:
   static const uint64_t TEST_SNAP_ID = 123;
 
   typedef RefreshRequest<MockImageCtx> MockRefreshRequest;
+  typedef LockRequest<MockImageCtx> MockLockRequest;
+
+  void expect_object_map_lock(MockImageCtx &mock_image_ctx,
+                              MockLockRequest &mock_lock_request) {
+    EXPECT_CALL(mock_lock_request, send())
+                  .WillOnce(FinishRequest(&mock_lock_request, 0,
+                                          &mock_image_ctx));
+  }
 
   void expect_object_map_load(MockImageCtx &mock_image_ctx,
-                              ceph::BitVector<2> *object_map, int r) {
-    std::string oid(ObjectMap::object_map_name(mock_image_ctx.id, TEST_SNAP_ID));
+                              ceph::BitVector<2> *object_map, uint64_t snap_id,
+                              int r) {
+    std::string oid(ObjectMap::object_map_name(mock_image_ctx.id, snap_id));
     auto &expect = EXPECT_CALL(get_mock_io_ctx(mock_image_ctx.md_ctx),
                                exec(oid, _, "rbd", "object_map_load", _, _, _));
     if (r < 0) {
@@ -48,8 +86,9 @@ public:
     }
   }
 
-  void expect_get_image_size(MockImageCtx &mock_image_ctx, uint64_t size) {
-    EXPECT_CALL(mock_image_ctx, get_image_size(TEST_SNAP_ID))
+  void expect_get_image_size(MockImageCtx &mock_image_ctx, uint64_t snap_id,
+                             uint64_t size) {
+    EXPECT_CALL(mock_image_ctx, get_image_size(snap_id))
                   .WillOnce(Return(size));
   }
 
@@ -85,7 +124,35 @@ public:
   }
 };
 
-TEST_F(TestMockObjectMapRefreshRequest, Success) {
+TEST_F(TestMockObjectMapRefreshRequest, SuccessHead) {
+  librbd::ImageCtx *ictx;
+  ASSERT_EQ(0, open_image(m_image_name, &ictx));
+
+  MockImageCtx mock_image_ctx(*ictx);
+
+  ceph::BitVector<2> on_disk_object_map;
+  init_object_map(mock_image_ctx, &on_disk_object_map);
+
+  C_SaferCond ctx;
+  ceph::BitVector<2> object_map;
+  MockLockRequest mock_lock_request;
+  MockRefreshRequest *req = new MockRefreshRequest(mock_image_ctx, &object_map,
+                                                   CEPH_NOSNAP, &ctx);
+
+  InSequence seq;
+  expect_get_image_size(mock_image_ctx, CEPH_NOSNAP,
+                        mock_image_ctx.image_ctx->size);
+  expect_object_map_lock(mock_image_ctx, mock_lock_request);
+  expect_object_map_load(mock_image_ctx, &on_disk_object_map, CEPH_NOSNAP, 0);
+  expect_get_image_size(mock_image_ctx, CEPH_NOSNAP,
+                        mock_image_ctx.image_ctx->size);
+  req->send();
+  ASSERT_EQ(0, ctx.wait());
+
+  ASSERT_EQ(on_disk_object_map, object_map);
+}
+
+TEST_F(TestMockObjectMapRefreshRequest, SuccessSnapshot) {
   librbd::ImageCtx *ictx;
   ASSERT_EQ(0, open_image(m_image_name, &ictx));
 
@@ -100,9 +167,11 @@ TEST_F(TestMockObjectMapRefreshRequest, Success) {
                                                    TEST_SNAP_ID, &ctx);
 
   InSequence seq;
-  expect_get_image_size(mock_image_ctx, mock_image_ctx.image_ctx->size);
-  expect_object_map_load(mock_image_ctx, &on_disk_object_map, 0);
-  expect_get_image_size(mock_image_ctx, mock_image_ctx.image_ctx->size);
+  expect_get_image_size(mock_image_ctx, TEST_SNAP_ID,
+                        mock_image_ctx.image_ctx->size);
+  expect_object_map_load(mock_image_ctx, &on_disk_object_map, TEST_SNAP_ID, 0);
+  expect_get_image_size(mock_image_ctx, TEST_SNAP_ID,
+                        mock_image_ctx.image_ctx->size);
   req->send();
   ASSERT_EQ(0, ctx.wait());
 
@@ -124,12 +193,14 @@ TEST_F(TestMockObjectMapRefreshRequest, LoadError) {
                                                    TEST_SNAP_ID, &ctx);
 
   InSequence seq;
-  expect_get_image_size(mock_image_ctx, mock_image_ctx.image_ctx->size);
-  expect_object_map_load(mock_image_ctx, nullptr, -ENOENT);
+  expect_get_image_size(mock_image_ctx, TEST_SNAP_ID,
+                        mock_image_ctx.image_ctx->size);
+  expect_object_map_load(mock_image_ctx, nullptr, TEST_SNAP_ID, -ENOENT);
 
   MockInvalidateRequest invalidate_request;
   expect_invalidate_request(mock_image_ctx, invalidate_request);
-  expect_get_image_size(mock_image_ctx, mock_image_ctx.image_ctx->size);
+  expect_get_image_size(mock_image_ctx, TEST_SNAP_ID,
+                        mock_image_ctx.image_ctx->size);
 
   req->send();
   ASSERT_EQ(0, ctx.wait());
@@ -150,14 +221,16 @@ TEST_F(TestMockObjectMapRefreshRequest, LoadCorrupt) {
                                                    TEST_SNAP_ID, &ctx);
 
   InSequence seq;
-  expect_get_image_size(mock_image_ctx, mock_image_ctx.image_ctx->size);
-  expect_object_map_load(mock_image_ctx, nullptr, -EINVAL);
+  expect_get_image_size(mock_image_ctx, TEST_SNAP_ID,
+                        mock_image_ctx.image_ctx->size);
+  expect_object_map_load(mock_image_ctx, nullptr, TEST_SNAP_ID, -EINVAL);
 
   MockInvalidateRequest invalidate_request;
   expect_invalidate_request(mock_image_ctx, invalidate_request);
   expect_truncate_request(mock_image_ctx);
   expect_object_map_resize(mock_image_ctx, on_disk_object_map.size(), 0);
-  expect_get_image_size(mock_image_ctx, mock_image_ctx.image_ctx->size);
+  expect_get_image_size(mock_image_ctx, TEST_SNAP_ID,
+                        mock_image_ctx.image_ctx->size);
 
   req->send();
   ASSERT_EQ(0, ctx.wait());
@@ -180,13 +253,15 @@ TEST_F(TestMockObjectMapRefreshRequest, TooSmall) {
                                                    TEST_SNAP_ID, &ctx);
 
   InSequence seq;
-  expect_get_image_size(mock_image_ctx, mock_image_ctx.image_ctx->size);
-  expect_object_map_load(mock_image_ctx, &small_object_map, 0);
+  expect_get_image_size(mock_image_ctx, TEST_SNAP_ID,
+                        mock_image_ctx.image_ctx->size);
+  expect_object_map_load(mock_image_ctx, &small_object_map, TEST_SNAP_ID, 0);
 
   MockInvalidateRequest invalidate_request;
   expect_invalidate_request(mock_image_ctx, invalidate_request);
   expect_object_map_resize(mock_image_ctx, on_disk_object_map.size(), 0);
-  expect_get_image_size(mock_image_ctx, mock_image_ctx.image_ctx->size);
+  expect_get_image_size(mock_image_ctx, TEST_SNAP_ID,
+                        mock_image_ctx.image_ctx->size);
 
   req->send();
   ASSERT_EQ(0, ctx.wait());
@@ -210,9 +285,11 @@ TEST_F(TestMockObjectMapRefreshRequest, TooLarge) {
                                                    TEST_SNAP_ID, &ctx);
 
   InSequence seq;
-  expect_get_image_size(mock_image_ctx, mock_image_ctx.image_ctx->size);
-  expect_object_map_load(mock_image_ctx, &large_object_map, 0);
-  expect_get_image_size(mock_image_ctx, mock_image_ctx.image_ctx->size);
+  expect_get_image_size(mock_image_ctx, TEST_SNAP_ID,
+                        mock_image_ctx.image_ctx->size);
+  expect_object_map_load(mock_image_ctx, &large_object_map, TEST_SNAP_ID, 0);
+  expect_get_image_size(mock_image_ctx, TEST_SNAP_ID,
+                        mock_image_ctx.image_ctx->size);
   req->send();
   ASSERT_EQ(0, ctx.wait());
 }
@@ -234,13 +311,15 @@ TEST_F(TestMockObjectMapRefreshRequest, ResizeError) {
                                                    TEST_SNAP_ID, &ctx);
 
   InSequence seq;
-  expect_get_image_size(mock_image_ctx, mock_image_ctx.image_ctx->size);
-  expect_object_map_load(mock_image_ctx, &small_object_map, 0);
+  expect_get_image_size(mock_image_ctx, TEST_SNAP_ID,
+                        mock_image_ctx.image_ctx->size);
+  expect_object_map_load(mock_image_ctx, &small_object_map, TEST_SNAP_ID, 0);
 
   MockInvalidateRequest invalidate_request;
   expect_invalidate_request(mock_image_ctx, invalidate_request);
   expect_object_map_resize(mock_image_ctx, on_disk_object_map.size(), -ESTALE);
-  expect_get_image_size(mock_image_ctx, mock_image_ctx.image_ctx->size);
+  expect_get_image_size(mock_image_ctx, TEST_SNAP_ID,
+                        mock_image_ctx.image_ctx->size);
 
   req->send();
   ASSERT_EQ(0, ctx.wait());
diff --git a/src/test/librbd/operation/test_mock_SnapshotCreateRequest.cc b/src/test/librbd/operation/test_mock_SnapshotCreateRequest.cc
index d401943..96fe0cd 100644
--- a/src/test/librbd/operation/test_mock_SnapshotCreateRequest.cc
+++ b/src/test/librbd/operation/test_mock_SnapshotCreateRequest.cc
@@ -105,6 +105,8 @@ public:
 };
 
 TEST_F(TestMockOperationSnapshotCreateRequest, Success) {
+  REQUIRE_FORMAT_V2();
+
   librbd::ImageCtx *ictx;
   ASSERT_EQ(0, open_image(m_image_name, &ictx));
 
@@ -127,8 +129,10 @@ TEST_F(TestMockOperationSnapshotCreateRequest, Success) {
   expect_block_writes(mock_image_ctx);
   expect_allocate_snap_id(mock_image_ctx, 0);
   expect_snap_create(mock_image_ctx, 0);
-  expect_update_snap_context(mock_image_ctx);
-  expect_object_map_snap_create(mock_image_ctx);
+  if (!mock_image_ctx.old_format) {
+    expect_update_snap_context(mock_image_ctx);
+    expect_object_map_snap_create(mock_image_ctx);
+  }
   expect_unblock_writes(mock_image_ctx);
 
   C_SaferCond cond_ctx;
@@ -192,8 +196,10 @@ TEST_F(TestMockOperationSnapshotCreateRequest, CreateSnapStale) {
   expect_block_writes(mock_image_ctx);
   expect_allocate_snap_id(mock_image_ctx, -ESTALE);
   expect_snap_create(mock_image_ctx, -ESTALE);
-  expect_update_snap_context(mock_image_ctx);
-  expect_object_map_snap_create(mock_image_ctx);
+  if (!mock_image_ctx.old_format) {
+    expect_update_snap_context(mock_image_ctx);
+    expect_object_map_snap_create(mock_image_ctx);
+  }
   expect_unblock_writes(mock_image_ctx);
 
   C_SaferCond cond_ctx;
diff --git a/src/test/librbd/operation/test_mock_SnapshotRollbackRequest.cc b/src/test/librbd/operation/test_mock_SnapshotRollbackRequest.cc
index 365d086..dab3418 100644
--- a/src/test/librbd/operation/test_mock_SnapshotRollbackRequest.cc
+++ b/src/test/librbd/operation/test_mock_SnapshotRollbackRequest.cc
@@ -127,12 +127,6 @@ public:
                   .WillOnce(CompleteContext(0, mock_image_ctx.image_ctx->op_work_queue));
   }
 
-  void expect_lock_object_map(MockImageCtx &mock_image_ctx,
-                              MockObjectMap &mock_object_map) {
-    EXPECT_CALL(mock_object_map, lock(_))
-                  .WillOnce(CompleteContext(0, mock_image_ctx.image_ctx->op_work_queue));
-  }
-
   void expect_refresh_object_map(MockImageCtx &mock_image_ctx,
                                  MockObjectMap &mock_object_map) {
     if (mock_image_ctx.object_map != nullptr) {
diff --git a/src/test/librbd/test_ImageWatcher.cc b/src/test/librbd/test_ImageWatcher.cc
index c816853..7ae6643 100644
--- a/src/test/librbd/test_ImageWatcher.cc
+++ b/src/test/librbd/test_ImageWatcher.cc
@@ -47,7 +47,7 @@ public:
 
   class WatchCtx : public librados::WatchCtx2 {
   public:
-    WatchCtx(TestImageWatcher &parent) : m_parent(parent), m_handle(0) {}
+    explicit WatchCtx(TestImageWatcher &parent) : m_parent(parent), m_handle(0) {}
 
     int watch(const librbd::ImageCtx &ictx) {
       m_header_oid = ictx.header_oid;
diff --git a/src/test/librbd/test_librbd.cc b/src/test/librbd/test_librbd.cc
index 15f5084..38e8609 100644
--- a/src/test/librbd/test_librbd.cc
+++ b/src/test/librbd/test_librbd.cc
@@ -285,6 +285,134 @@ TEST_F(TestLibRBD, CreateAndStatPP)
   ioctx.close();
 }
 
+TEST_F(TestLibRBD, OpenAio)
+{
+  rados_ioctx_t ioctx;
+  ASSERT_EQ(0, rados_ioctx_create(_cluster, m_pool_name.c_str(), &ioctx));
+
+  rbd_image_info_t info;
+  rbd_image_t image;
+  int order = 0;
+  std::string name = get_temp_image_name();
+  uint64_t size = 2 << 20;
+
+  ASSERT_EQ(0, create_image(ioctx, name.c_str(), size, &order));
+
+  rbd_completion_t open_comp;
+  ASSERT_EQ(0, rbd_aio_create_completion(NULL, NULL, &open_comp));
+  ASSERT_EQ(0, rbd_aio_open(ioctx, name.c_str(), &image, NULL, open_comp));
+  ASSERT_EQ(0, rbd_aio_wait_for_complete(open_comp));
+  ASSERT_EQ(1, rbd_aio_is_complete(open_comp));
+  ASSERT_EQ(0, rbd_aio_get_return_value(open_comp));
+  rbd_aio_release(open_comp);
+
+  ASSERT_EQ(0, rbd_stat(image, &info, sizeof(info)));
+  printf("image has size %llu and order %d\n", (unsigned long long) info.size, info.order);
+  ASSERT_EQ(info.size, size);
+  ASSERT_EQ(info.order, order);
+
+  rbd_completion_t close_comp;
+  ASSERT_EQ(0, rbd_aio_create_completion(NULL, NULL, &close_comp));
+  ASSERT_EQ(0, rbd_aio_close(image, close_comp));
+  ASSERT_EQ(0, rbd_aio_wait_for_complete(close_comp));
+  ASSERT_EQ(1, rbd_aio_is_complete(close_comp));
+  ASSERT_EQ(0, rbd_aio_get_return_value(close_comp));
+  rbd_aio_release(close_comp);
+
+  rados_ioctx_destroy(ioctx);
+}
+
+TEST_F(TestLibRBD, OpenAioFail)
+{
+  rados_ioctx_t ioctx;
+  ASSERT_EQ(0, rados_ioctx_create(_cluster, m_pool_name.c_str(), &ioctx));
+
+  std::string name = get_temp_image_name();
+  rbd_image_t image;
+  rbd_completion_t open_comp;
+  ASSERT_EQ(0, rbd_aio_create_completion(NULL, NULL, &open_comp));
+  ASSERT_EQ(0, rbd_aio_open(ioctx, name.c_str(), &image, NULL, open_comp));
+  ASSERT_EQ(0, rbd_aio_wait_for_complete(open_comp));
+  ASSERT_EQ(1, rbd_aio_is_complete(open_comp));
+  ASSERT_EQ(-ENOENT, rbd_aio_get_return_value(open_comp));
+  rbd_aio_release(open_comp);
+
+  rados_ioctx_destroy(ioctx);
+}
+
+TEST_F(TestLibRBD, OpenAioPP)
+{
+  librados::IoCtx ioctx;
+  ASSERT_EQ(0, _rados.ioctx_create(m_pool_name.c_str(), ioctx));
+
+  librbd::RBD rbd;
+  librbd::image_info_t info;
+  librbd::Image image;
+  int order = 0;
+  std::string name = get_temp_image_name();
+  uint64_t size = 2 << 20;
+
+  ASSERT_EQ(0, create_image_pp(rbd, ioctx, name.c_str(), size, &order));
+
+  librbd::RBD::AioCompletion *open_comp =
+      new librbd::RBD::AioCompletion(NULL, NULL);
+  ASSERT_EQ(0, rbd.aio_open(ioctx, image, name.c_str(), NULL, open_comp));
+  ASSERT_EQ(0, open_comp->wait_for_complete());
+  ASSERT_EQ(1, open_comp->is_complete());
+  ASSERT_EQ(0, open_comp->get_return_value());
+  open_comp->release();
+
+  ASSERT_EQ(0, image.stat(info, sizeof(info)));
+  ASSERT_EQ(info.size, size);
+  ASSERT_EQ(info.order, order);
+
+  // reopen
+  open_comp = new librbd::RBD::AioCompletion(NULL, NULL);
+  ASSERT_EQ(0, rbd.aio_open(ioctx, image, name.c_str(), NULL, open_comp));
+  ASSERT_EQ(0, open_comp->wait_for_complete());
+  ASSERT_EQ(1, open_comp->is_complete());
+  ASSERT_EQ(0, open_comp->get_return_value());
+  open_comp->release();
+
+  // close
+  librbd::RBD::AioCompletion *close_comp =
+      new librbd::RBD::AioCompletion(NULL, NULL);
+  ASSERT_EQ(0, image.aio_close(close_comp));
+  ASSERT_EQ(0, close_comp->wait_for_complete());
+  ASSERT_EQ(1, close_comp->is_complete());
+  ASSERT_EQ(0, close_comp->get_return_value());
+  close_comp->release();
+
+  // close closed image
+  close_comp = new librbd::RBD::AioCompletion(NULL, NULL);
+  ASSERT_EQ(-EINVAL, image.aio_close(close_comp));
+  close_comp->release();
+
+  ioctx.close();
+}
+
+TEST_F(TestLibRBD, OpenAioFailPP)
+{
+  librados::IoCtx ioctx;
+  ASSERT_EQ(0, _rados.ioctx_create(m_pool_name.c_str(), ioctx));
+
+  {
+    librbd::RBD rbd;
+    librbd::Image image;
+    std::string name = get_temp_image_name();
+
+    librbd::RBD::AioCompletion *open_comp =
+      new librbd::RBD::AioCompletion(NULL, NULL);
+    ASSERT_EQ(0, rbd.aio_open(ioctx, image, name.c_str(), NULL, open_comp));
+    ASSERT_EQ(0, open_comp->wait_for_complete());
+    ASSERT_EQ(1, open_comp->is_complete());
+    ASSERT_EQ(-ENOENT, open_comp->get_return_value());
+    open_comp->release();
+  }
+
+  ioctx.close();
+}
+
 TEST_F(TestLibRBD, ResizeAndStat)
 {
   rados_ioctx_t ioctx;
@@ -2911,8 +3039,12 @@ TEST_F(TestLibRBD, SnapCreateViaLockOwner)
   librbd::Image image1;
   ASSERT_EQ(0, rbd.open(ioctx, image1, name.c_str(), NULL));
 
+  // switch to writeback cache
+  ASSERT_EQ(0, image1.flush());
+
   bufferlist bl;
-  ASSERT_EQ(0, image1.write(0, 0, bl));
+  bl.append(std::string(4096, '1'));
+  ASSERT_EQ(bl.length(), image1.write(0, bl.length(), bl));
 
   bool lock_owner;
   ASSERT_EQ(0, image1.is_exclusive_lock_owner(&lock_owner));
@@ -3188,7 +3320,7 @@ TEST_F(TestLibRBD, ResizeViaLockOwner)
 
 class RBDWriter : public Thread {
  public:
-   RBDWriter(librbd::Image &image) : m_image(image) {};
+   explicit RBDWriter(librbd::Image &image) : m_image(image) {};
  protected:
   void *entry() {
     librbd::image_info_t info;
diff --git a/src/test/librbd/test_mock_Journal.cc b/src/test/librbd/test_mock_Journal.cc
index 0de97a3..c849e60 100644
--- a/src/test/librbd/test_mock_Journal.cc
+++ b/src/test/librbd/test_mock_Journal.cc
@@ -6,10 +6,11 @@
 #include "test/librbd/mock/MockImageCtx.h"
 #include "common/Cond.h"
 #include "common/Mutex.h"
+#include "cls/journal/cls_journal_types.h"
 #include "librbd/Journal.h"
 #include "librbd/Utils.h"
-#include "librbd/journal/Entries.h"
 #include "librbd/journal/Replay.h"
+#include "librbd/journal/Types.h"
 #include "gmock/gmock.h"
 #include "gtest/gtest.h"
 #include <functional>
@@ -91,7 +92,7 @@ struct MockJournaler {
 
   MOCK_METHOD3(start_append, void(int flush_interval, uint64_t flush_bytes,
                                   double flush_age));
-  MOCK_METHOD2(append, MockFutureProxy(const std::string &tag,
+  MOCK_METHOD2(append, MockFutureProxy(uint64_t tag_id,
                                        const bufferlist &bl));
   MOCK_METHOD1(flush, void(Context *on_safe));
   MOCK_METHOD1(stop_append, void(Context *on_safe));
@@ -116,9 +117,13 @@ struct MockJournalerProxy {
   int remove(bool force) {
     return -EINVAL;
   }
-  int register_client(const std::string &description) {
+  int register_client(const bufferlist &data) {
     return -EINVAL;
   }
+  void allocate_tag(uint64_t, const bufferlist &,
+                    cls::journal::Tag*, Context *on_finish) {
+    on_finish->complete(-EINVAL);
+  }
 
   void get_metadata(uint8_t *order, uint8_t *splay_width, int64_t *pool_id) {
     MockJournaler::get_instance().get_metadata(order, splay_width, pool_id);
@@ -145,8 +150,8 @@ struct MockJournalerProxy {
                                                flush_age);
   }
 
-  MockFutureProxy append(const std::string &tag, const bufferlist &bl) {
-    return MockJournaler::get_instance().append(tag, bl);
+  MockFutureProxy append(uint64_t tag_id, const bufferlist &bl) {
+    return MockJournaler::get_instance().append(tag_id, bl);
   }
 
   void flush(Context *on_safe) {
@@ -328,8 +333,8 @@ public:
 
   void expect_replay_process(MockJournalReplay &mock_journal_replay) {
     EXPECT_CALL(mock_journal_replay, process(_, _, _))
-                  .WillOnce(DoAll(WithArg<2>(Invoke(this, &TestMockJournal::save_commit_context)),
-                                  WithArg<1>(CompleteContext(0, NULL))));
+                  .WillOnce(DoAll(WithArg<1>(CompleteContext(0, NULL)),
+                                  WithArg<2>(Invoke(this, &TestMockJournal::save_commit_context))));
   }
 
   void expect_start_append(::journal::MockJournaler &mock_journaler) {
@@ -392,6 +397,11 @@ public:
     m_cond.Signal();
   }
 
+  void wake_up() {
+    Mutex::Locker locker(m_lock);
+    m_cond.Signal();
+  }
+
   void commit_replay(MockImageCtx &mock_image_ctx, Context *on_flush, int r) {
     Contexts commit_contexts;
     std::swap(commit_contexts, m_commit_contexts);
@@ -718,8 +728,10 @@ TEST_F(TestMockJournal, ReplayOnDiskPostFlushError) {
   expect_try_pop_front(mock_journaler, false, mock_replay_entry);
   expect_stop_replay(mock_journaler);
 
-  Context *on_flush;
-  EXPECT_CALL(mock_journal_replay, flush(_)).WillOnce(SaveArg<0>(&on_flush));
+  Context *on_flush = nullptr;
+  EXPECT_CALL(mock_journal_replay, flush(_))
+    .WillOnce(DoAll(SaveArg<0>(&on_flush),
+                    InvokeWithoutArgs(this, &TestMockJournal::wake_up)));
 
   // replay write-to-disk failure should result in replay-restart
   expect_construct_journaler(mock_journaler);
@@ -747,6 +759,13 @@ TEST_F(TestMockJournal, ReplayOnDiskPostFlushError) {
   m_commit_contexts.clear();
 
   // proceed with the flush
+  {
+    // wait for on_flush callback
+    Mutex::Locker locker(m_lock);
+    while (on_flush == nullptr) {
+      m_cond.Wait(m_lock);
+    }
+  }
   on_flush->complete(0);
 
   ASSERT_EQ(0, ctx.wait());
diff --git a/src/test/librbd/test_support.cc b/src/test/librbd/test_support.cc
index 318b5be..db573ef 100644
--- a/src/test/librbd/test_support.cc
+++ b/src/test/librbd/test_support.cc
@@ -23,8 +23,17 @@ bool is_feature_enabled(uint64_t feature) {
 
 int create_image_pp(librbd::RBD &rbd, librados::IoCtx &ioctx,
                     const std::string &name, uint64_t size) {
-  uint64_t features = 0;
-  get_features(&features);
   int order = 0;
-  return rbd.create2(ioctx, name.c_str(), size, features, &order);
+  uint64_t features = 0;
+  if (!get_features(&features)) {
+    // ensure old-format tests actually use the old format
+    librados::Rados rados(ioctx);
+    int r = rados.conf_set("rbd_default_format", "1");
+    if (r < 0) {
+      return r;
+    }
+    return rbd.create(ioctx, name.c_str(), size, &order);
+  } else {
+    return rbd.create2(ioctx, name.c_str(), size, features, &order);
+  }
 }
diff --git a/src/test/librbd/test_support.h b/src/test/librbd/test_support.h
index 5bdd958..63c5e3a 100644
--- a/src/test/librbd/test_support.h
+++ b/src/test/librbd/test_support.h
@@ -17,3 +17,11 @@ int create_image_pp(librbd::RBD &rbd, librados::IoCtx &ioctx,
   } 					  \
 }
 
+#define REQUIRE_FORMAT_V1() { 	          \
+  if (is_feature_enabled(0)) { 	          \
+    std::cout << "SKIPPING" << std::endl; \
+    return SUCCEED(); 			  \
+  } 					  \
+}
+
+#define REQUIRE_FORMAT_V2() REQUIRE_FEATURE(0)
diff --git a/src/test/messenger/simple_dispatcher.h b/src/test/messenger/simple_dispatcher.h
index e239790..0c08003 100644
--- a/src/test/messenger/simple_dispatcher.h
+++ b/src/test/messenger/simple_dispatcher.h
@@ -24,7 +24,7 @@ private:
   Messenger *messenger;
   uint64_t dcount;
 public:
-  SimpleDispatcher(Messenger *msgr);
+  explicit SimpleDispatcher(Messenger *msgr);
   virtual ~SimpleDispatcher();
 
   uint64_t get_dcount() { return dcount; }
diff --git a/src/test/messenger/xio_dispatcher.h b/src/test/messenger/xio_dispatcher.h
index f8b76d3..29c71a0 100644
--- a/src/test/messenger/xio_dispatcher.h
+++ b/src/test/messenger/xio_dispatcher.h
@@ -24,7 +24,7 @@ private:
   Messenger *messenger;
   uint64_t dcount;
 public:
-  XioDispatcher(Messenger *msgr);
+  explicit XioDispatcher(Messenger *msgr);
   virtual ~XioDispatcher();
 
   uint64_t get_dcount() { return dcount; }
diff --git a/src/test/mon/mon-created-time.sh b/src/test/mon/mon-created-time.sh
new file mode 100755
index 0000000..b4f7d1a
--- /dev/null
+++ b/src/test/mon/mon-created-time.sh
@@ -0,0 +1,54 @@
+#!/bin/bash
+#
+# Copyright (C) 2015 SUSE LINUX GmbH
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Library Public License as published by
+# the Free Software Foundation; either version 2, or (at your option)
+# any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Library Public License for more details.
+#
+source ../qa/workunits/ceph-helpers.sh
+
+function run() {
+    local dir=$1
+    shift
+
+    export CEPH_MON="127.0.0.1:7122" # git grep '\<7122\>' : there must be only one
+    export CEPH_ARGS
+    CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none "
+    CEPH_ARGS+="--mon-host=$CEPH_MON "
+
+    local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')}
+    for func in $funcs ; do
+        setup $dir || return 1
+        $func $dir || return 1
+        teardown $dir || return 1
+    done
+}
+
+function TEST_mon_created_time() {
+    local dir=$1
+
+    run_mon $dir a || return 1
+
+    ./ceph mon dump || return 1
+
+    if test "$(./ceph mon dump 2>/dev/null | sed -n '/created/p' | awk '{print $NF}')"x = ""x ; then
+        return 1
+    fi
+ 
+    if test "$(./ceph mon dump 2>/dev/null | sed -n '/created/p' | awk '{print $NF}')"x = "0.000000"x ; then
+        return 1
+    fi
+}
+
+main mon-created-time "$@"
+
+# Local Variables:
+# compile-command: "cd ../.. ; make -j4 && test/mon/mon-created-time.sh"
+# End:
diff --git a/src/test/mon/test-mon-msg.cc b/src/test/mon/test-mon-msg.cc
index a20741a..3e4e050 100644
--- a/src/test/mon/test-mon-msg.cc
+++ b/src/test/mon/test-mon-msg.cc
@@ -59,7 +59,7 @@ protected:
 
 public:
 
-  MonClientHelper(CephContext *cct_)
+  explicit MonClientHelper(CephContext *cct_)
     : Dispatcher(cct_),
       cct(cct_),
       monc(cct_),
diff --git a/src/test/mon/test_mon_workloadgen.cc b/src/test/mon/test_mon_workloadgen.cc
index 64575fe..4ad26c3 100644
--- a/src/test/mon/test_mon_workloadgen.cc
+++ b/src/test/mon/test_mon_workloadgen.cc
@@ -90,7 +90,7 @@ class TestStub : public Dispatcher
 
   struct C_Tick : public Context {
     TestStub *s;
-    C_Tick(TestStub *stub) : s(stub) {}
+    explicit C_Tick(TestStub *stub) : s(stub) {}
     void finish(int r) {
       generic_dout(20) << "C_Tick::" << __func__ << dendl;
       if (r == -ECANCELED) {
@@ -230,7 +230,7 @@ class ClientStub : public TestStub
   }
 
  public:
-  ClientStub(CephContext *cct)
+  explicit ClientStub(CephContext *cct)
     : TestStub(cct, "client"),
       gen((int) time(NULL))
   { }
@@ -328,7 +328,7 @@ class OSDStub : public TestStub
 
   struct C_CreatePGs : public Context {
     OSDStub *s;
-    C_CreatePGs(OSDStub *stub) : s(stub) {}
+    explicit C_CreatePGs(OSDStub *stub) : s(stub) {}
     void finish(int r) {
       if (r == -ECANCELED) {
 	generic_dout(20) << "C_CreatePGs::" << __func__
diff --git a/src/test/msgr/perf_msgr_server.cc b/src/test/msgr/perf_msgr_server.cc
index 3989cd2..3eae9a5 100644
--- a/src/test/msgr/perf_msgr_server.cc
+++ b/src/test/msgr/perf_msgr_server.cc
@@ -58,7 +58,7 @@ class ServerDispatcher : public Dispatcher {
       messages.pop_front();
       return m;
     }
-    void _process(Message *m, ThreadPool::TPHandle &handle) {
+    void _process(Message *m, ThreadPool::TPHandle &handle) override {
       MOSDOp *osd_op = static_cast<MOSDOp*>(m);
       MOSDOpReply *reply = new MOSDOpReply(osd_op, 0, 0, 0, false);
       m->get_connection()->send_message(reply);
diff --git a/src/test/msgr/test_async_driver.cc b/src/test/msgr/test_async_driver.cc
index fb46374..59ac8bf 100644
--- a/src/test/msgr/test_async_driver.cc
+++ b/src/test/msgr/test_async_driver.cc
@@ -272,7 +272,7 @@ class Worker : public Thread {
 
  public:
   EventCenter center;
-  Worker(CephContext *c): cct(c), done(false), center(c) {
+  explicit Worker(CephContext *c): cct(c), done(false), center(c) {
     center.init(100);
   }
   void stop() {
@@ -280,7 +280,7 @@ class Worker : public Thread {
     center.wakeup();
   }
   void* entry() {
-    center.set_owner(pthread_self());
+    center.set_owner();
     while (!done)
       center.process_events(1000000);
     return 0;
diff --git a/src/test/msgr/test_msgr.cc b/src/test/msgr/test_msgr.cc
index 8829c1e..b195349 100644
--- a/src/test/msgr/test_msgr.cc
+++ b/src/test/msgr/test_msgr.cc
@@ -76,7 +76,7 @@ class FakeDispatcher : public Dispatcher {
     uint64_t count;
     ConnectionRef con;
 
-    Session(ConnectionRef c): RefCountedObject(g_ceph_context), lock("FakeDispatcher::Session::lock"), count(0), con(c) {
+    explicit Session(ConnectionRef c): RefCountedObject(g_ceph_context), lock("FakeDispatcher::Session::lock"), count(0), con(c) {
     }
     uint64_t get_count() { return count; }
   };
@@ -89,7 +89,7 @@ class FakeDispatcher : public Dispatcher {
   bool got_connect;
   bool loopback;
 
-  FakeDispatcher(bool s): Dispatcher(g_ceph_context), lock("FakeDispatcher::lock"),
+  explicit FakeDispatcher(bool s): Dispatcher(g_ceph_context), lock("FakeDispatcher::lock"),
                           is_server(s), got_new(false), got_remote_reset(false),
                           got_connect(false), loopback(false) {}
   bool ms_can_fast_dispatch_any() const { return true; }
@@ -1238,7 +1238,7 @@ class MarkdownDispatcher : public Dispatcher {
   bool last_mark;
  public:
   atomic_t count;
-  MarkdownDispatcher(bool s): Dispatcher(g_ceph_context), lock("MarkdownDispatcher::lock"),
+  explicit MarkdownDispatcher(bool s): Dispatcher(g_ceph_context), lock("MarkdownDispatcher::lock"),
                               last_mark(false), count(0) {}
   bool ms_can_fast_dispatch_any() const { return false; }
   bool ms_can_fast_dispatch(Message *m) const {
diff --git a/src/test/objectstore/DeterministicOpSequence.cc b/src/test/objectstore/DeterministicOpSequence.cc
index c26173f..4f0f65d 100644
--- a/src/test/objectstore/DeterministicOpSequence.cc
+++ b/src/test/objectstore/DeterministicOpSequence.cc
@@ -452,7 +452,7 @@ void DeterministicOpSequence::_do_coll_create(coll_t cid, uint32_t pg_num, uint6
   dout(0) << "Give collection: " << cid << " a hint, pg_num is: " << pg_num << ", num_objs is: "
     << num_objs << dendl;
 
-  m_store->apply_transaction(&m_osr, t);
+  m_store->apply_transaction(&m_osr, std::move(t));
 }
 
 void DeterministicOpSequence::_do_touch(coll_t coll, hobject_t& obj)
@@ -460,7 +460,7 @@ void DeterministicOpSequence::_do_touch(coll_t coll, hobject_t& obj)
   ObjectStore::Transaction t;
   note_txn(&t);
   t.touch(coll, ghobject_t(obj));
-  m_store->apply_transaction(&m_osr, t);
+  m_store->apply_transaction(&m_osr, std::move(t));
 }
 
 void DeterministicOpSequence::_do_remove(coll_t coll, hobject_t& obj)
@@ -468,7 +468,7 @@ void DeterministicOpSequence::_do_remove(coll_t coll, hobject_t& obj)
   ObjectStore::Transaction t;
   note_txn(&t);
   t.remove(coll, ghobject_t(obj));
-  m_store->apply_transaction(&m_osr, t);
+  m_store->apply_transaction(&m_osr, std::move(t));
 }
 
 void DeterministicOpSequence::_do_set_attrs(coll_t coll,
@@ -478,7 +478,7 @@ void DeterministicOpSequence::_do_set_attrs(coll_t coll,
   ObjectStore::Transaction t;
   note_txn(&t);
   t.omap_setkeys(coll, ghobject_t(obj), attrs);
-  m_store->apply_transaction(&m_osr, t);
+  m_store->apply_transaction(&m_osr, std::move(t));
 }
 
 void DeterministicOpSequence::_do_write(coll_t coll, hobject_t& obj,
@@ -487,7 +487,7 @@ void DeterministicOpSequence::_do_write(coll_t coll, hobject_t& obj,
   ObjectStore::Transaction t;
   note_txn(&t);
   t.write(coll, ghobject_t(obj), off, len, data);
-  m_store->apply_transaction(&m_osr, t);
+  m_store->apply_transaction(&m_osr, std::move(t));
 }
 
 void DeterministicOpSequence::_do_clone(coll_t coll, hobject_t& orig_obj,
@@ -496,7 +496,7 @@ void DeterministicOpSequence::_do_clone(coll_t coll, hobject_t& orig_obj,
   ObjectStore::Transaction t;
   note_txn(&t);
   t.clone(coll, ghobject_t(orig_obj), ghobject_t(new_obj));
-  m_store->apply_transaction(&m_osr, t);
+  m_store->apply_transaction(&m_osr, std::move(t));
 }
 
 void DeterministicOpSequence::_do_clone_range(coll_t coll,
@@ -507,7 +507,7 @@ void DeterministicOpSequence::_do_clone_range(coll_t coll,
   note_txn(&t);
   t.clone_range(coll, ghobject_t(orig_obj), ghobject_t(new_obj),
 		srcoff, srclen, dstoff);
-  m_store->apply_transaction(&m_osr, t);
+  m_store->apply_transaction(&m_osr, std::move(t));
 }
 
 void DeterministicOpSequence::_do_write_and_clone_range(coll_t coll,
@@ -523,7 +523,7 @@ void DeterministicOpSequence::_do_write_and_clone_range(coll_t coll,
   t.write(coll, ghobject_t(orig_obj), srcoff, bl.length(), bl);
   t.clone_range(coll, ghobject_t(orig_obj), ghobject_t(new_obj),
 		srcoff, srclen, dstoff);
-  m_store->apply_transaction(&m_osr, t);
+  m_store->apply_transaction(&m_osr, std::move(t));
 }
 
 void DeterministicOpSequence::_do_coll_move(coll_t orig_coll, coll_t new_coll,
@@ -533,6 +533,6 @@ void DeterministicOpSequence::_do_coll_move(coll_t orig_coll, coll_t new_coll,
   note_txn(&t);
   t.remove(new_coll, ghobject_t(obj));
   t.collection_move_rename(orig_coll, ghobject_t(obj), new_coll, ghobject_t(obj));
-  m_store->apply_transaction(&m_osr, t);
+  m_store->apply_transaction(&m_osr, std::move(t));
 }
 
diff --git a/src/test/objectstore/FileStoreTracker.cc b/src/test/objectstore/FileStoreTracker.cc
index 3e4cf97..dd46fb5 100644
--- a/src/test/objectstore/FileStoreTracker.cc
+++ b/src/test/objectstore/FileStoreTracker.cc
@@ -9,12 +9,10 @@
 class OnApplied : public Context {
   FileStoreTracker *tracker;
   list<pair<pair<coll_t, string>, uint64_t> > in_flight;
-  ObjectStore::Transaction *t;
 public:
   OnApplied(FileStoreTracker *tracker,
-	    list<pair<pair<coll_t, string>, uint64_t> > in_flight,
-	    ObjectStore::Transaction *t)
-    : tracker(tracker), in_flight(in_flight), t(t) {}
+	    list<pair<pair<coll_t, string>, uint64_t> > in_flight)
+    : tracker(tracker), in_flight(in_flight) {}
 
   void finish(int r) {
     for (list<pair<pair<coll_t, string>, uint64_t> >::iterator i =
@@ -23,7 +21,6 @@ public:
 	 ++i) {
       tracker->applied(i->first, i->second);
     }
-    delete t;
   }
 };
 
@@ -77,9 +74,10 @@ void FileStoreTracker::submit_transaction(Transaction &t)
     (**i)(this, &out);
   }
   store->queue_transaction(
-    0, out.t,
-    new OnApplied(this, in_flight, out.t),
+    0, std::move(*out.t),
+    new OnApplied(this, in_flight),
     new OnCommitted(this, in_flight));
+  delete out.t;
 }
 
 void FileStoreTracker::write(const pair<coll_t, string> &obj,
diff --git a/src/test/objectstore/ObjectStoreTransactionBenchmark.cc b/src/test/objectstore/ObjectStoreTransactionBenchmark.cc
index 5b624da..7c0dc09 100644
--- a/src/test/objectstore/ObjectStoreTransactionBenchmark.cc
+++ b/src/test/objectstore/ObjectStoreTransactionBenchmark.cc
@@ -92,7 +92,6 @@ class Transaction {
       switch (op->op) {
       case ObjectStore::Transaction::OP_WRITE:
         {
-          coll_t cid = i.get_cid(op->cid);
           ghobject_t oid = i.get_oid(op->oid);
           bufferlist bl;
           i.decode_bl(bl);
@@ -100,7 +99,6 @@ class Transaction {
         break;
       case ObjectStore::Transaction::OP_SETATTR:
         {
-          coll_t cid = i.get_cid(op->cid);
           ghobject_t oid = i.get_oid(op->oid);
           string name = i.decode_string();
           bufferlist bl;
@@ -111,7 +109,6 @@ class Transaction {
         break;
       case ObjectStore::Transaction::OP_OMAP_SETKEYS:
         {
-          coll_t cid = i.get_cid(op->cid);
           ghobject_t oid = i.get_oid(op->oid);
           map<string, bufferptr> aset;
           i.decode_attrset(aset);
@@ -119,7 +116,6 @@ class Transaction {
         break;
       case ObjectStore::Transaction::OP_OMAP_RMKEYS:
         {
-          coll_t cid = i.get_cid(op->cid);
           ghobject_t oid = i.get_oid(op->oid);
           set<string> keys;
           i.decode_keyset(keys);
diff --git a/src/test/objectstore/TestObjectStoreState.cc b/src/test/objectstore/TestObjectStoreState.cc
index e4252ce..873c68a 100644
--- a/src/test/objectstore/TestObjectStoreState.cc
+++ b/src/test/objectstore/TestObjectStoreState.cc
@@ -34,11 +34,10 @@ void TestObjectStoreState::init(int colls, int objs)
   dout(5) << "init " << colls << " colls " << objs << " objs" << dendl;
 
   ObjectStore::Sequencer osr(__func__);
-  ObjectStore::Transaction *t;
-  t = new ObjectStore::Transaction;
+  ObjectStore::Transaction t;
 
-  t->create_collection(coll_t::meta(), 0);
-  m_store->apply_transaction(&osr, *t);
+  t.create_collection(coll_t::meta(), 0);
+  m_store->apply_transaction(&osr, std::move(t));
 
   wait_for_ready();
 
@@ -49,7 +48,7 @@ void TestObjectStoreState::init(int colls, int objs)
     dout(5) << "init create collection " << entry->m_coll.to_str()
         << " meta " << entry->m_meta_obj << dendl;
 
-    t = new ObjectStore::Transaction;
+    ObjectStore::Transaction *t = new ObjectStore::Transaction;
     t->create_collection(entry->m_coll, 32);
     bufferlist hint;
     uint32_t pg_num = colls;
@@ -68,8 +67,9 @@ void TestObjectStoreState::init(int colls, int objs)
     }
     baseid += objs;
 
-    m_store->queue_transaction(&(entry->m_osr), t,
-        new C_OnFinished(this, t));
+    m_store->queue_transaction(&(entry->m_osr), std::move(*t),
+        new C_OnFinished(this));
+    delete t;
     inc_in_flight();
 
     m_collections.insert(make_pair(coll_id, entry));
diff --git a/src/test/objectstore/TestObjectStoreState.h b/src/test/objectstore/TestObjectStoreState.h
index bd13e15..80164c3 100644
--- a/src/test/objectstore/TestObjectStoreState.h
+++ b/src/test/objectstore/TestObjectStoreState.h
@@ -98,7 +98,7 @@ public:
   int m_next_pool;
 
  public:
-  TestObjectStoreState(ObjectStore *store) :
+  explicit TestObjectStoreState(ObjectStore *store) :
     m_next_coll_nr(0), m_num_objs_per_coll(10), m_num_objects(0),
     m_max_in_flight(0), m_finished_lock("Finished Lock"), m_next_pool(1) {
     m_in_flight.set(0);
@@ -131,18 +131,15 @@ public:
   class C_OnFinished: public Context {
    protected:
     TestObjectStoreState *m_state;
-    ObjectStore::Transaction *m_tx;
 
    public:
-    C_OnFinished(TestObjectStoreState *state,
-        ObjectStore::Transaction *t) : m_state(state), m_tx(t) { }
+    explicit C_OnFinished(TestObjectStoreState *state) : m_state(state) { }
 
     void finish(int r) {
       Mutex::Locker locker(m_state->m_finished_lock);
       m_state->dec_in_flight();
       m_state->m_finished_cond.Signal();
 
-      delete m_tx;
     }
   };
 };
diff --git a/src/test/objectstore/store_test.cc b/src/test/objectstore/store_test.cc
index d57e82c..36cb2ae 100644
--- a/src/test/objectstore/store_test.cc
+++ b/src/test/objectstore/store_test.cc
@@ -104,8 +104,6 @@ TEST_P(StoreTest, collect_metadata) {
     ASSERT_NE(pm.count("filestore_f_type"), 0u);
     ASSERT_NE(pm.count("backend_filestore_partition_path"), 0u);
     ASSERT_NE(pm.count("backend_filestore_dev_node"), 0u);
-  } else if (GetParam() == string("keyvaluestore")) {
-    ASSERT_NE(pm.count("keyvaluestore_backend"), 0u);
   }
 }
 
@@ -131,7 +129,7 @@ TEST_P(StoreTest, SimpleRemount) {
     ObjectStore::Transaction t;
     t.create_collection(cid, 0);
     t.write(cid, hoid, 0, bl.length(), bl);
-    r = store->apply_transaction(&osr, t);
+    r = store->apply_transaction(&osr, std::move(t));
     ASSERT_EQ(r, 0);
   }
   store->umount();
@@ -140,7 +138,7 @@ TEST_P(StoreTest, SimpleRemount) {
   {
     ObjectStore::Transaction t;
     t.write(cid, hoid2, 0, bl.length(), bl);
-    r = store->apply_transaction(&osr, t);
+    r = store->apply_transaction(&osr, std::move(t));
     ASSERT_EQ(r, 0);
   }
   {
@@ -149,7 +147,7 @@ TEST_P(StoreTest, SimpleRemount) {
     t.remove(cid, hoid2);
     t.remove_collection(cid);
     cerr << "remove collection" << std::endl;
-    r = store->apply_transaction(&osr, t);
+    r = store->apply_transaction(&osr, std::move(t));
     ASSERT_EQ(r, 0);
   }
   store->umount();
@@ -158,7 +156,7 @@ TEST_P(StoreTest, SimpleRemount) {
   {
     ObjectStore::Transaction t;
     t.create_collection(cid, 0);
-    r = store->apply_transaction(&osr, t);
+    r = store->apply_transaction(&osr, std::move(t));
     ASSERT_EQ(r, 0);
     bool exists = store->exists(cid, hoid);
     ASSERT_TRUE(!exists);
@@ -167,7 +165,7 @@ TEST_P(StoreTest, SimpleRemount) {
     ObjectStore::Transaction t;
     t.remove_collection(cid);
     cerr << "remove collection" << std::endl;
-    r = store->apply_transaction(&osr, t);
+    r = store->apply_transaction(&osr, std::move(t));
     ASSERT_EQ(r, 0);
   }
 }
@@ -186,7 +184,7 @@ TEST_P(StoreTest, IORemount) {
       ghobject_t hoid(hobject_t(sobject_t("Object " + stringify(n), CEPH_NOSNAP)));
       t.write(cid, hoid, 0, bl.length(), bl);
     }
-    r = store->apply_transaction(&osr, t);
+    r = store->apply_transaction(&osr, std::move(t));
     ASSERT_EQ(r, 0);
   }
   // overwrites
@@ -196,7 +194,7 @@ TEST_P(StoreTest, IORemount) {
       ObjectStore::Transaction t;
       ghobject_t hoid(hobject_t(sobject_t("Object " + stringify(n), CEPH_NOSNAP)));
       t.write(cid, hoid, 1, bl.length(), bl);
-      r = store->apply_transaction(&osr, t);
+      r = store->apply_transaction(&osr, std::move(t));
       ASSERT_EQ(r, 0);
     }
   }
@@ -210,7 +208,7 @@ TEST_P(StoreTest, IORemount) {
       t.remove(cid, hoid);
     }
     t.remove_collection(cid);
-    r = store->apply_transaction(&osr, t);
+    r = store->apply_transaction(&osr, std::move(t));
     ASSERT_EQ(r, 0);
   }
 }
@@ -225,7 +223,7 @@ TEST_P(StoreTest, FiemapEmpty) {
     t.create_collection(cid, 0);
     t.touch(cid, oid);
     t.truncate(cid, oid, 100000);
-    r = store->apply_transaction(&osr, t);
+    r = store->apply_transaction(&osr, std::move(t));
     ASSERT_EQ(r, 0);
   }
   {
@@ -243,7 +241,7 @@ TEST_P(StoreTest, FiemapEmpty) {
     t.remove(cid, oid);
     t.remove_collection(cid);
     cerr << "remove collection" << std::endl;
-    r = store->apply_transaction(&osr, t);
+    r = store->apply_transaction(&osr, std::move(t));
     ASSERT_EQ(r, 0);
   }
 }
@@ -262,18 +260,18 @@ TEST_P(StoreTest, FiemapHoles) {
     t.write(cid, oid, 0, 3, bl);
     t.write(cid, oid, 1048576, 3, bl);
     t.write(cid, oid, 4194304, 3, bl);
-    r = store->apply_transaction(&osr, t);
+    r = store->apply_transaction(&osr, std::move(t));
     ASSERT_EQ(r, 0);
   }
   {
     bufferlist bl;
-    store->fiemap(cid, oid, 0, 0, bl);
+    store->fiemap(cid, oid, 0, 4194307, bl);
     map<uint64_t,uint64_t> m, e;
     bufferlist::iterator p = bl.begin();
     ::decode(m, p);
     cout << " got " << m << std::endl;
     ASSERT_TRUE(!m.empty());
-    ASSERT_GE(m[0], 3);
+    ASSERT_GE(m[0], 3u);
     ASSERT_TRUE((m.size() == 1 &&
 		 m[0] > 4194304u) ||
 		(m.size() == 3 &&
@@ -286,7 +284,7 @@ TEST_P(StoreTest, FiemapHoles) {
     t.remove(cid, oid);
     t.remove_collection(cid);
     cerr << "remove collection" << std::endl;
-    r = store->apply_transaction(&osr, t);
+    r = store->apply_transaction(&osr, std::move(t));
     ASSERT_EQ(r, 0);
   }
 }
@@ -299,28 +297,28 @@ TEST_P(StoreTest, SimpleMetaColTest) {
     ObjectStore::Transaction t;
     t.create_collection(cid, 0);
     cerr << "create collection" << std::endl;
-    r = store->apply_transaction(&osr, t);
+    r = store->apply_transaction(&osr, std::move(t));
     ASSERT_EQ(r, 0);
   }
   {
     ObjectStore::Transaction t;
     t.remove_collection(cid);
     cerr << "remove collection" << std::endl;
-    r = store->apply_transaction(&osr, t);
+    r = store->apply_transaction(&osr, std::move(t));
     ASSERT_EQ(r, 0);
   }
   {
     ObjectStore::Transaction t;
     t.create_collection(cid, 0);
     cerr << "add collection" << std::endl;
-    r = store->apply_transaction(&osr, t);
+    r = store->apply_transaction(&osr, std::move(t));
     ASSERT_EQ(r, 0);
   }
   {
     ObjectStore::Transaction t;
     t.remove_collection(cid);
     cerr << "remove collection" << std::endl;
-    r = store->apply_transaction(&osr, t);
+    r = store->apply_transaction(&osr, std::move(t));
     ASSERT_EQ(r, 0);
   }
 }
@@ -333,28 +331,28 @@ TEST_P(StoreTest, SimplePGColTest) {
     ObjectStore::Transaction t;
     t.create_collection(cid, 4);
     cerr << "create collection" << std::endl;
-    r = store->apply_transaction(&osr, t);
+    r = store->apply_transaction(&osr, std::move(t));
     ASSERT_EQ(r, 0);
   }
   {
     ObjectStore::Transaction t;
     t.remove_collection(cid);
     cerr << "remove collection" << std::endl;
-    r = store->apply_transaction(&osr, t);
+    r = store->apply_transaction(&osr, std::move(t));
     ASSERT_EQ(r, 0);
   }
   {
     ObjectStore::Transaction t;
     t.create_collection(cid, 4);
     cerr << "add collection" << std::endl;
-    r = store->apply_transaction(&osr, t);
+    r = store->apply_transaction(&osr, std::move(t));
     ASSERT_EQ(r, 0);
   }
   {
     ObjectStore::Transaction t;
     t.remove_collection(cid);
     cerr << "remove collection" << std::endl;
-    r = store->apply_transaction(&osr, t);
+    r = store->apply_transaction(&osr, std::move(t));
     ASSERT_EQ(r, 0);
   }
 }
@@ -392,7 +390,7 @@ TEST_P(StoreTest, SimpleColPreHashTest) {
     ::encode(expected_num_objs, hint);
     t.collection_hint(cid, ObjectStore::Transaction::COLL_HINT_EXPECTED_NUM_OBJECTS, hint);
     cerr << "collection hint" << std::endl;
-    r = store->apply_transaction(&osr, t);
+    r = store->apply_transaction(&osr, std::move(t));
     ASSERT_EQ(r, 0);
   }
   {
@@ -400,7 +398,7 @@ TEST_P(StoreTest, SimpleColPreHashTest) {
     ObjectStore::Transaction t;
     t.remove_collection(cid);
     cerr << "remove collection" << std::endl;
-    r = store->apply_transaction(&osr, t);
+    r = store->apply_transaction(&osr, std::move(t));
     ASSERT_EQ(r, 0);
   }
   // Revert the config change so that it does not affect the split/merge tests
@@ -425,7 +423,7 @@ TEST_P(StoreTest, SimpleObjectTest) {
     ObjectStore::Transaction t;
     t.create_collection(cid, 0);
     cerr << "Creating collection " << cid << std::endl;
-    r = store->apply_transaction(&osr, t);
+    r = store->apply_transaction(&osr, std::move(t));
     ASSERT_EQ(r, 0);
   }
   {
@@ -435,7 +433,7 @@ TEST_P(StoreTest, SimpleObjectTest) {
     ObjectStore::Transaction t;
     t.touch(cid, hoid);
     cerr << "Creating object " << hoid << std::endl;
-    r = store->apply_transaction(&osr, t);
+    r = store->apply_transaction(&osr, std::move(t));
     ASSERT_EQ(r, 0);
 
     exists = store->exists(cid, hoid);
@@ -446,7 +444,7 @@ TEST_P(StoreTest, SimpleObjectTest) {
     t.remove(cid, hoid);
     t.touch(cid, hoid);
     cerr << "Remove then create" << std::endl;
-    r = store->apply_transaction(&osr, t);
+    r = store->apply_transaction(&osr, std::move(t));
     ASSERT_EQ(r, 0);
   }
   {
@@ -457,7 +455,7 @@ TEST_P(StoreTest, SimpleObjectTest) {
     t.remove(cid, hoid);
     t.write(cid, hoid, 0, 5, bl);
     cerr << "Remove then create" << std::endl;
-    r = store->apply_transaction(&osr, t);
+    r = store->apply_transaction(&osr, std::move(t));
     ASSERT_EQ(r, 0);
 
     bufferlist in;
@@ -473,7 +471,7 @@ TEST_P(StoreTest, SimpleObjectTest) {
     exp.append(bl);
     t.write(cid, hoid, 5, 5, bl);
     cerr << "Append" << std::endl;
-    r = store->apply_transaction(&osr, t);
+    r = store->apply_transaction(&osr, std::move(t));
     ASSERT_EQ(r, 0);
 
     bufferlist in;
@@ -488,7 +486,7 @@ TEST_P(StoreTest, SimpleObjectTest) {
     exp = bl;
     t.write(cid, hoid, 0, 10, bl);
     cerr << "Full overwrite" << std::endl;
-    r = store->apply_transaction(&osr, t);
+    r = store->apply_transaction(&osr, std::move(t));
     ASSERT_EQ(r, 0);
 
     bufferlist in;
@@ -502,7 +500,7 @@ TEST_P(StoreTest, SimpleObjectTest) {
     bl.append("abcde");
     t.write(cid, hoid, 3, 5, bl);
     cerr << "Partial overwrite" << std::endl;
-    r = store->apply_transaction(&osr, t);
+    r = store->apply_transaction(&osr, std::move(t));
     ASSERT_EQ(r, 0);
 
     bufferlist in, exp;
@@ -518,7 +516,7 @@ TEST_P(StoreTest, SimpleObjectTest) {
     bl.append("abcde01234012340123401234abcde01234012340123401234abcde01234012340123401234abcde01234012340123401234");
     t.write(cid, hoid, 0, bl.length(), bl);
     cerr << "larger overwrite" << std::endl;
-    r = store->apply_transaction(&osr, t);
+    r = store->apply_transaction(&osr, std::move(t));
     ASSERT_EQ(r, 0);
 
     bufferlist in;
@@ -532,7 +530,7 @@ TEST_P(StoreTest, SimpleObjectTest) {
     t.remove(cid, hoid);
     t.remove_collection(cid);
     cerr << "Cleaning" << std::endl;
-    r = store->apply_transaction(&osr, t);
+    r = store->apply_transaction(&osr, std::move(t));
     ASSERT_EQ(r, 0);
   }
 }
@@ -547,7 +545,7 @@ TEST_P(StoreTest, ManySmallWrite) {
     ObjectStore::Transaction t;
     t.create_collection(cid, 0);
     cerr << "Creating collection " << cid << std::endl;
-    r = store->apply_transaction(&osr, t);
+    r = store->apply_transaction(&osr, std::move(t));
     ASSERT_EQ(r, 0);
   }
   bufferlist bl;
@@ -557,13 +555,13 @@ TEST_P(StoreTest, ManySmallWrite) {
   for (int i=0; i<100; ++i) {
     ObjectStore::Transaction t;
     t.write(cid, a, i*4096, 4096, bl, 0);
-    r = store->apply_transaction(&osr, t);
+    r = store->apply_transaction(&osr, std::move(t));
     ASSERT_EQ(r, 0);
   }
   for (int i=0; i<100; ++i) {
     ObjectStore::Transaction t;
     t.write(cid, b, (rand() % 1024)*4096, 4096, bl, 0);
-    r = store->apply_transaction(&osr, t);
+    r = store->apply_transaction(&osr, std::move(t));
     ASSERT_EQ(r, 0);
   }
   {
@@ -572,7 +570,7 @@ TEST_P(StoreTest, ManySmallWrite) {
     t.remove(cid, b);
     t.remove_collection(cid);
     cerr << "Cleaning" << std::endl;
-    r = store->apply_transaction(&osr, t);
+    r = store->apply_transaction(&osr, std::move(t));
     ASSERT_EQ(r, 0);
   }
 }
@@ -586,14 +584,14 @@ TEST_P(StoreTest, SmallSkipFront) {
     ObjectStore::Transaction t;
     t.create_collection(cid, 0);
     cerr << "Creating collection " << cid << std::endl;
-    r = store->apply_transaction(&osr, t);
+    r = store->apply_transaction(&osr, std::move(t));
     ASSERT_EQ(r, 0);
   }
   {
     ObjectStore::Transaction t;
     t.touch(cid, a);
     t.truncate(cid, a, 3000);
-    r = store->apply_transaction(&osr, t);
+    r = store->apply_transaction(&osr, std::move(t));
     ASSERT_EQ(r, 0);
   }
   {
@@ -603,7 +601,7 @@ TEST_P(StoreTest, SmallSkipFront) {
     bl.append(bp);
     ObjectStore::Transaction t;
     t.write(cid, a, 4096, 4096, bl);
-    r = store->apply_transaction(&osr, t);
+    r = store->apply_transaction(&osr, std::move(t));
     ASSERT_EQ(r, 0);
   }
   {
@@ -619,7 +617,7 @@ TEST_P(StoreTest, SmallSkipFront) {
     t.remove(cid, a);
     t.remove_collection(cid);
     cerr << "Cleaning" << std::endl;
-    r = store->apply_transaction(&osr, t);
+    r = store->apply_transaction(&osr, std::move(t));
     ASSERT_EQ(r, 0);
   }
 }
@@ -633,7 +631,7 @@ TEST_P(StoreTest, SmallSequentialUnaligned) {
     ObjectStore::Transaction t;
     t.create_collection(cid, 0);
     cerr << "Creating collection " << cid << std::endl;
-    r = store->apply_transaction(&osr, t);
+    r = store->apply_transaction(&osr, std::move(t));
     ASSERT_EQ(r, 0);
   }
   bufferlist bl;
@@ -644,7 +642,7 @@ TEST_P(StoreTest, SmallSequentialUnaligned) {
   for (int i=0; i<1000; ++i) {
     ObjectStore::Transaction t;
     t.write(cid, a, i*len, len, bl, 0);
-    r = store->apply_transaction(&osr, t);
+    r = store->apply_transaction(&osr, std::move(t));
     ASSERT_EQ(r, 0);
   }
   {
@@ -652,7 +650,7 @@ TEST_P(StoreTest, SmallSequentialUnaligned) {
     t.remove(cid, a);
     t.remove_collection(cid);
     cerr << "Cleaning" << std::endl;
-    r = store->apply_transaction(&osr, t);
+    r = store->apply_transaction(&osr, std::move(t));
     ASSERT_EQ(r, 0);
   }
 }
@@ -667,7 +665,7 @@ TEST_P(StoreTest, ManyBigWrite) {
     ObjectStore::Transaction t;
     t.create_collection(cid, 0);
     cerr << "Creating collection " << cid << std::endl;
-    r = store->apply_transaction(&osr, t);
+    r = store->apply_transaction(&osr, std::move(t));
     ASSERT_EQ(r, 0);
   }
   bufferlist bl;
@@ -677,28 +675,28 @@ TEST_P(StoreTest, ManyBigWrite) {
   for (int i=0; i<10; ++i) {
     ObjectStore::Transaction t;
     t.write(cid, a, i*4*1048586, 4*1048576, bl, 0);
-    r = store->apply_transaction(&osr, t);
+    r = store->apply_transaction(&osr, std::move(t));
     ASSERT_EQ(r, 0);
   }
   // aligned
   for (int i=0; i<10; ++i) {
     ObjectStore::Transaction t;
     t.write(cid, b, (rand() % 256)*4*1048576, 4*1048576, bl, 0);
-    r = store->apply_transaction(&osr, t);
+    r = store->apply_transaction(&osr, std::move(t));
     ASSERT_EQ(r, 0);
   }
   // unaligned
   for (int i=0; i<10; ++i) {
     ObjectStore::Transaction t;
     t.write(cid, b, (rand() % (256*4096))*1024, 4*1048576, bl, 0);
-    r = store->apply_transaction(&osr, t);
+    r = store->apply_transaction(&osr, std::move(t));
     ASSERT_EQ(r, 0);
   }
   // do some zeros
   for (int i=0; i<10; ++i) {
     ObjectStore::Transaction t;
     t.zero(cid, b, (rand() % (256*4096))*1024, 16*1048576);
-    r = store->apply_transaction(&osr, t);
+    r = store->apply_transaction(&osr, std::move(t));
     ASSERT_EQ(r, 0);
   }
   {
@@ -707,7 +705,7 @@ TEST_P(StoreTest, ManyBigWrite) {
     t.remove(cid, b);
     t.remove_collection(cid);
     cerr << "Cleaning" << std::endl;
-    r = store->apply_transaction(&osr, t);
+    r = store->apply_transaction(&osr, std::move(t));
     ASSERT_EQ(r, 0);
   }
 }
@@ -721,7 +719,7 @@ TEST_P(StoreTest, MiscFragmentTests) {
     ObjectStore::Transaction t;
     t.create_collection(cid, 0);
     cerr << "Creating collection " << cid << std::endl;
-    r = store->apply_transaction(&osr, t);
+    r = store->apply_transaction(&osr, std::move(t));
     ASSERT_EQ(r, 0);
   }
   bufferlist bl;
@@ -731,13 +729,13 @@ TEST_P(StoreTest, MiscFragmentTests) {
   {
     ObjectStore::Transaction t;
     t.write(cid, a, 0, 524288, bl, 0);
-    r = store->apply_transaction(&osr, t);
+    r = store->apply_transaction(&osr, std::move(t));
     ASSERT_EQ(r, 0);
   }
   {
     ObjectStore::Transaction t;
     t.write(cid, a, 1048576, 524288, bl, 0);
-    r = store->apply_transaction(&osr, t);
+    r = store->apply_transaction(&osr, std::move(t));
     ASSERT_EQ(r, 0);
   }
   {
@@ -750,7 +748,7 @@ TEST_P(StoreTest, MiscFragmentTests) {
   {
     ObjectStore::Transaction t;
     t.write(cid, a, 1048576 - 4096, 524288, bl, 0);
-    r = store->apply_transaction(&osr, t);
+    r = store->apply_transaction(&osr, std::move(t));
     ASSERT_EQ(r, 0);
   }
   {
@@ -758,7 +756,7 @@ TEST_P(StoreTest, MiscFragmentTests) {
     t.remove(cid, a);
     t.remove_collection(cid);
     cerr << "Cleaning" << std::endl;
-    r = store->apply_transaction(&osr, t);
+    r = store->apply_transaction(&osr, std::move(t));
     ASSERT_EQ(r, 0);
   }
 
@@ -783,7 +781,7 @@ TEST_P(StoreTest, SimpleAttrTest) {
   {
     ObjectStore::Transaction t;
     t.create_collection(cid, 0);
-    r = store->apply_transaction(&osr, t);
+    r = store->apply_transaction(&osr, std::move(t));
     ASSERT_EQ(r, 0);
   }
   {
@@ -800,7 +798,7 @@ TEST_P(StoreTest, SimpleAttrTest) {
     t.touch(cid, hoid);
     t.setattr(cid, hoid, "foo", val);
     t.setattr(cid, hoid, "bar", val2);
-    r = store->apply_transaction(&osr, t);
+    r = store->apply_transaction(&osr, std::move(t));
     ASSERT_EQ(r, 0);
   }
   {
@@ -827,7 +825,7 @@ TEST_P(StoreTest, SimpleAttrTest) {
     ObjectStore::Transaction t;
     t.remove(cid, hoid);
     t.remove_collection(cid);
-    r = store->apply_transaction(&osr, t);
+    r = store->apply_transaction(&osr, std::move(t));
     ASSERT_EQ(r, 0);
   }
 }
@@ -840,7 +838,7 @@ TEST_P(StoreTest, SimpleListTest) {
     ObjectStore::Transaction t;
     t.create_collection(cid, 0);
     cerr << "Creating collection " << cid << std::endl;
-    r = store->apply_transaction(&osr, t);
+    r = store->apply_transaction(&osr, std::move(t));
     ASSERT_EQ(r, 0);
   }
   set<ghobject_t, ghobject_t::BitwiseComparator> all;
@@ -856,7 +854,7 @@ TEST_P(StoreTest, SimpleListTest) {
       t.touch(cid, hoid);
       cerr << "Creating object " << hoid << std::endl;
     }
-    r = store->apply_transaction(&osr, t);
+    r = store->apply_transaction(&osr, std::move(t));
     ASSERT_EQ(r, 0);
   }
   for (int bitwise=0; bitwise<2; ++bitwise) {
@@ -895,7 +893,7 @@ TEST_P(StoreTest, SimpleListTest) {
       t.remove(cid, *p);
     t.remove_collection(cid);
     cerr << "Cleaning" << std::endl;
-    r = store->apply_transaction(&osr, t);
+    r = store->apply_transaction(&osr, std::move(t));
     ASSERT_EQ(r, 0);
   }
 }
@@ -937,7 +935,7 @@ TEST_P(StoreTest, MultipoolListTest) {
     ObjectStore::Transaction t;
     t.create_collection(cid, 0);
     cerr << "Creating collection " << cid << std::endl;
-    r = store->apply_transaction(&osr, t);
+    r = store->apply_transaction(&osr, std::move(t));
     ASSERT_EQ(r, 0);
   }
   set<ghobject_t, ghobject_t::BitwiseComparator> all, saw;
@@ -955,7 +953,7 @@ TEST_P(StoreTest, MultipoolListTest) {
       t.touch(cid, hoid);
       cerr << "Creating object " << hoid << std::endl;
     }
-    r = store->apply_transaction(&osr, t);
+    r = store->apply_transaction(&osr, std::move(t));
     ASSERT_EQ(r, 0);
   }
   {
@@ -982,7 +980,7 @@ TEST_P(StoreTest, MultipoolListTest) {
       t.remove(cid, *p);
     t.remove_collection(cid);
     cerr << "Cleaning" << std::endl;
-    r = store->apply_transaction(&osr, t);
+    r = store->apply_transaction(&osr, std::move(t));
     ASSERT_EQ(r, 0);
   }
 }
@@ -995,7 +993,7 @@ TEST_P(StoreTest, SimpleCloneTest) {
     ObjectStore::Transaction t;
     t.create_collection(cid, 0);
     cerr << "Creating collection " << cid << std::endl;
-    r = store->apply_transaction(&osr, t);
+    r = store->apply_transaction(&osr, std::move(t));
     ASSERT_EQ(r, 0);
   }
   ghobject_t hoid(hobject_t(sobject_t("Object 1", CEPH_NOSNAP),
@@ -1013,7 +1011,7 @@ TEST_P(StoreTest, SimpleCloneTest) {
     t.write(cid, hoid, 0, small.length(), small);
     t.write(cid, hoid, 10, small.length(), small);
     cerr << "Creating object and set attr " << hoid << std::endl;
-    r = store->apply_transaction(&osr, t);
+    r = store->apply_transaction(&osr, std::move(t));
     ASSERT_EQ(r, 0);
   }
   ghobject_t hoid2(hobject_t(sobject_t("Object 2", CEPH_NOSNAP),
@@ -1027,7 +1025,7 @@ TEST_P(StoreTest, SimpleCloneTest) {
     t.setattr(cid, hoid, "attr1", large);
     t.setattr(cid, hoid, "attr2", small);
     cerr << "Clone object and rm attr" << std::endl;
-    r = store->apply_transaction(&osr, t);
+    r = store->apply_transaction(&osr, std::move(t));
     ASSERT_EQ(r, 0);
 
     r = store->read(cid, hoid, 10, 5, newdata);
@@ -1062,7 +1060,7 @@ TEST_P(StoreTest, SimpleCloneTest) {
     ObjectStore::Transaction t;
     t.remove(cid, hoid);
     t.remove(cid, hoid2);
-    ASSERT_EQ(0u, store->apply_transaction(&osr, t));
+    ASSERT_EQ(0u, store->apply_transaction(&osr, std::move(t)));
   }
   {
     bufferlist final;
@@ -1080,7 +1078,7 @@ TEST_P(StoreTest, SimpleCloneTest) {
     al.append(a);
     final.append(a);
     t.write(cid, hoid, pl.length(), a.length(), al);
-    r = store->apply_transaction(&osr, t);
+    r = store->apply_transaction(&osr, std::move(t));
     ASSERT_EQ(r, 0);
     bufferlist rl;
     ASSERT_EQ((int)final.length(),
@@ -1091,7 +1089,7 @@ TEST_P(StoreTest, SimpleCloneTest) {
     ObjectStore::Transaction t;
     t.remove(cid, hoid);
     t.remove(cid, hoid2);
-    ASSERT_EQ(0u, store->apply_transaction(&osr, t));
+    ASSERT_EQ(0u, store->apply_transaction(&osr, std::move(t)));
   }
   {
     bufferlist final;
@@ -1112,7 +1110,7 @@ TEST_P(StoreTest, SimpleCloneTest) {
     al.append(a);
     final.append(a);
     t.write(cid, hoid, pl.length() + z.length(), a.length(), al);
-    r = store->apply_transaction(&osr, t);
+    r = store->apply_transaction(&osr, std::move(t));
     ASSERT_EQ(r, 0);
     bufferlist rl;
     ASSERT_EQ((int)final.length(),
@@ -1123,7 +1121,7 @@ TEST_P(StoreTest, SimpleCloneTest) {
     ObjectStore::Transaction t;
     t.remove(cid, hoid);
     t.remove(cid, hoid2);
-    ASSERT_EQ(0u, store->apply_transaction(&osr, t));
+    ASSERT_EQ(0u, store->apply_transaction(&osr, std::move(t)));
   }
   {
     bufferlist final;
@@ -1144,7 +1142,7 @@ TEST_P(StoreTest, SimpleCloneTest) {
     al.append(a);
     final.append(a);
     t.write(cid, hoid, 17000, a.length(), al);
-    ASSERT_EQ(0u, store->apply_transaction(&osr, t));
+    ASSERT_EQ(0u, store->apply_transaction(&osr, std::move(t)));
     bufferlist rl;
     ASSERT_EQ((int)final.length(),
 	      store->read(cid, hoid, 0, final.length(), rl));
@@ -1158,7 +1156,7 @@ TEST_P(StoreTest, SimpleCloneTest) {
     ObjectStore::Transaction t;
     t.remove(cid, hoid);
     t.remove(cid, hoid2);
-    ASSERT_EQ(0u, store->apply_transaction(&osr, t));
+    ASSERT_EQ(0u, store->apply_transaction(&osr, std::move(t)));
   }
   {
     bufferptr p(1048576);
@@ -1173,7 +1171,7 @@ TEST_P(StoreTest, SimpleCloneTest) {
     bufferlist al;
     al.append(a);
     t.write(cid, hoid, a.length(), a.length(), al);
-    ASSERT_EQ(0u, store->apply_transaction(&osr, t));
+    ASSERT_EQ(0u, store->apply_transaction(&osr, std::move(t)));
     bufferlist rl;
     bufferlist final;
     final.substr_of(pl, 0, al.length());
@@ -1193,7 +1191,7 @@ TEST_P(StoreTest, SimpleCloneTest) {
     ObjectStore::Transaction t;
     t.remove(cid, hoid);
     t.remove(cid, hoid2);
-    ASSERT_EQ(0u, store->apply_transaction(&osr, t));
+    ASSERT_EQ(0u, store->apply_transaction(&osr, std::move(t)));
   }
   {
     bufferptr p(65536);
@@ -1208,7 +1206,7 @@ TEST_P(StoreTest, SimpleCloneTest) {
     bufferlist al;
     al.append(a);
     t.write(cid, hoid, 32768, a.length(), al);
-    ASSERT_EQ(0u, store->apply_transaction(&osr, t));
+    ASSERT_EQ(0u, store->apply_transaction(&osr, std::move(t)));
     bufferlist rl;
     bufferlist final;
     final.substr_of(pl, 0, 32768);
@@ -1228,7 +1226,7 @@ TEST_P(StoreTest, SimpleCloneTest) {
     ObjectStore::Transaction t;
     t.remove(cid, hoid);
     t.remove(cid, hoid2);
-    ASSERT_EQ(0u, store->apply_transaction(&osr, t));
+    ASSERT_EQ(0u, store->apply_transaction(&osr, std::move(t)));
   }
   {
     bufferptr p(65536);
@@ -1243,7 +1241,7 @@ TEST_P(StoreTest, SimpleCloneTest) {
     bufferlist al;
     al.append(a);
     t.write(cid, hoid, 33768, a.length(), al);
-    ASSERT_EQ(0u, store->apply_transaction(&osr, t));
+    ASSERT_EQ(0u, store->apply_transaction(&osr, std::move(t)));
     bufferlist rl;
     bufferlist final;
     final.substr_of(pl, 0, 33768);
@@ -1265,7 +1263,7 @@ TEST_P(StoreTest, SimpleCloneTest) {
     t.remove(cid, hoid2);
     t.remove_collection(cid);
     cerr << "Cleaning" << std::endl;
-    r = store->apply_transaction(&osr, t);
+    r = store->apply_transaction(&osr, std::move(t));
     ASSERT_EQ(r, 0);
   }
 }
@@ -1278,7 +1276,7 @@ TEST_P(StoreTest, OmapSimple) {
     ObjectStore::Transaction t;
     t.create_collection(cid, 0);
     cerr << "Creating collection " << cid << std::endl;
-    r = store->apply_transaction(&osr, t);
+    r = store->apply_transaction(&osr, std::move(t));
     ASSERT_EQ(r, 0);
   }
   ghobject_t hoid(hobject_t(sobject_t("omap_obj", CEPH_NOSNAP),
@@ -1296,7 +1294,7 @@ TEST_P(StoreTest, OmapSimple) {
     t.omap_setkeys(cid, hoid, km);
     t.omap_setheader(cid, hoid, header);
     cerr << "Creating object and set omap " << hoid << std::endl;
-    r = store->apply_transaction(&osr, t);
+    r = store->apply_transaction(&osr, std::move(t));
     ASSERT_EQ(r, 0);
   }
   // get header, keys
@@ -1333,7 +1331,7 @@ TEST_P(StoreTest, OmapSimple) {
     t.remove(cid, hoid);
     t.remove_collection(cid);
     cerr << "Cleaning" << std::endl;
-    r = store->apply_transaction(&osr, t);
+    r = store->apply_transaction(&osr, std::move(t));
     ASSERT_EQ(r, 0);
   }
 }
@@ -1346,7 +1344,7 @@ TEST_P(StoreTest, OmapCloneTest) {
     ObjectStore::Transaction t;
     t.create_collection(cid, 0);
     cerr << "Creating collection " << cid << std::endl;
-    r = store->apply_transaction(&osr, t);
+    r = store->apply_transaction(&osr, std::move(t));
     ASSERT_EQ(r, 0);
   }
   ghobject_t hoid(hobject_t(sobject_t("Object 1", CEPH_NOSNAP),
@@ -1364,7 +1362,7 @@ TEST_P(StoreTest, OmapCloneTest) {
     t.omap_setkeys(cid, hoid, km);
     t.omap_setheader(cid, hoid, header);
     cerr << "Creating object and set omap " << hoid << std::endl;
-    r = store->apply_transaction(&osr, t);
+    r = store->apply_transaction(&osr, std::move(t));
     ASSERT_EQ(r, 0);
   }
   ghobject_t hoid2(hobject_t(sobject_t("Object 2", CEPH_NOSNAP),
@@ -1373,7 +1371,7 @@ TEST_P(StoreTest, OmapCloneTest) {
     ObjectStore::Transaction t;
     t.clone(cid, hoid, hoid2);
     cerr << "Clone object" << std::endl;
-    r = store->apply_transaction(&osr, t);
+    r = store->apply_transaction(&osr, std::move(t));
     ASSERT_EQ(r, 0);
   }
   {
@@ -1389,7 +1387,7 @@ TEST_P(StoreTest, OmapCloneTest) {
     t.remove(cid, hoid2);
     t.remove_collection(cid);
     cerr << "Cleaning" << std::endl;
-    r = store->apply_transaction(&osr, t);
+    r = store->apply_transaction(&osr, std::move(t));
     ASSERT_EQ(r, 0);
   }
 }
@@ -1402,7 +1400,7 @@ TEST_P(StoreTest, SimpleCloneRangeTest) {
     ObjectStore::Transaction t;
     t.create_collection(cid, 0);
     cerr << "Creating collection " << cid << std::endl;
-    r = store->apply_transaction(&osr, t);
+    r = store->apply_transaction(&osr, std::move(t));
     ASSERT_EQ(r, 0);
   }
   ghobject_t hoid(hobject_t(sobject_t("Object 1", CEPH_NOSNAP)));
@@ -1413,7 +1411,7 @@ TEST_P(StoreTest, SimpleCloneRangeTest) {
     ObjectStore::Transaction t;
     t.write(cid, hoid, 10, 5, small);
     cerr << "Creating object and write bl " << hoid << std::endl;
-    r = store->apply_transaction(&osr, t);
+    r = store->apply_transaction(&osr, std::move(t));
     ASSERT_EQ(r, 0);
   }
   ghobject_t hoid2(hobject_t(sobject_t("Object 2", CEPH_NOSNAP)));
@@ -1422,7 +1420,7 @@ TEST_P(StoreTest, SimpleCloneRangeTest) {
     ObjectStore::Transaction t;
     t.clone_range(cid, hoid, hoid2, 10, 5, 0);
     cerr << "Clone range object" << std::endl;
-    r = store->apply_transaction(&osr, t);
+    r = store->apply_transaction(&osr, std::move(t));
     ASSERT_EQ(r, 0);
     r = store->read(cid, hoid2, 0, 5, newdata);
     ASSERT_EQ(r, 5);
@@ -1433,7 +1431,7 @@ TEST_P(StoreTest, SimpleCloneRangeTest) {
     t.truncate(cid, hoid, 1024*1024);
     t.clone_range(cid, hoid, hoid2, 0, 1024*1024, 0);
     cerr << "Clone range object" << std::endl;
-    r = store->apply_transaction(&osr, t);
+    r = store->apply_transaction(&osr, std::move(t));
     ASSERT_EQ(r, 0);
     struct stat stat, stat2;
     r = store->stat(cid, hoid, &stat);
@@ -1447,7 +1445,7 @@ TEST_P(StoreTest, SimpleCloneRangeTest) {
     t.remove(cid, hoid2);
     t.remove_collection(cid);
     cerr << "Cleaning" << std::endl;
-    r = store->apply_transaction(&osr, t);
+    r = store->apply_transaction(&osr, std::move(t));
     ASSERT_EQ(r, 0);
   }
 }
@@ -1461,7 +1459,7 @@ TEST_P(StoreTest, SimpleObjectLongnameTest) {
     ObjectStore::Transaction t;
     t.create_collection(cid, 0);
     cerr << "Creating collection " << cid << std::endl;
-    r = store->apply_transaction(&osr, t);
+    r = store->apply_transaction(&osr, std::move(t));
     ASSERT_EQ(r, 0);
   }
   ghobject_t hoid(hobject_t(sobject_t("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaObjectaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa 1", CEPH_NOSNAP)));
@@ -1469,7 +1467,7 @@ TEST_P(StoreTest, SimpleObjectLongnameTest) {
     ObjectStore::Transaction t;
     t.touch(cid, hoid);
     cerr << "Creating object " << hoid << std::endl;
-    r = store->apply_transaction(&osr, t);
+    r = store->apply_transaction(&osr, std::move(t));
     ASSERT_EQ(r, 0);
   }
   {
@@ -1477,7 +1475,7 @@ TEST_P(StoreTest, SimpleObjectLongnameTest) {
     t.remove(cid, hoid);
     t.remove_collection(cid);
     cerr << "Cleaning" << std::endl;
-    r = store->apply_transaction(&osr, t);
+    r = store->apply_transaction(&osr, std::move(t));
     ASSERT_EQ(r, 0);
   }
 }
@@ -1493,7 +1491,7 @@ TEST_P(StoreTest, ManyObjectTest) {
   {
     ObjectStore::Transaction t;
     t.create_collection(cid, 0);
-    r = store->apply_transaction(&osr, t);
+    r = store->apply_transaction(&osr, std::move(t));
     ASSERT_EQ(r, 0);
   }
   for (int i = 0; i < NUM_OBJS; ++i) {
@@ -1506,7 +1504,7 @@ TEST_P(StoreTest, ManyObjectTest) {
     ghobject_t hoid(hobject_t(sobject_t(string(buf) + base, CEPH_NOSNAP)));
     t.touch(cid, hoid);
     created.insert(hoid);
-    r = store->apply_transaction(&osr, t);
+    r = store->apply_transaction(&osr, std::move(t));
     ASSERT_EQ(r, 0);
   }
 
@@ -1596,14 +1594,14 @@ TEST_P(StoreTest, ManyObjectTest) {
        ++i) {
     ObjectStore::Transaction t;
     t.remove(cid, *i);
-    r = store->apply_transaction(&osr, t);
+    r = store->apply_transaction(&osr, std::move(t));
     ASSERT_EQ(r, 0);
   }
   cerr << "cleaning up" << std::endl;
   {
     ObjectStore::Transaction t;
     t.remove_collection(cid);
-    r = store->apply_transaction(&osr, t);
+    r = store->apply_transaction(&osr, std::move(t));
     ASSERT_EQ(r, 0);
   }
 }
@@ -1619,7 +1617,7 @@ class MixedGenerator : public ObjectGenerator {
 public:
   unsigned seq;
   int64_t poolid;
-  MixedGenerator(int64_t p) : seq(0), poolid(p) {}
+  explicit MixedGenerator(int64_t p) : seq(0), poolid(p) {}
   ghobject_t create_object(gen_type *gen) {
     char buf[100];
     snprintf(buf, sizeof(buf), "OBJ_%u", seq);
@@ -1674,7 +1672,7 @@ public:
 
   struct EnterExit {
     const char *msg;
-    EnterExit(const char *m) : msg(m) {
+    explicit EnterExit(const char *m) : msg(m) {
       //cout << pthread_self() << " enter " << msg << std::endl;
     }
     ~EnterExit() {
@@ -1773,7 +1771,7 @@ public:
   int init() {
     ObjectStore::Transaction t;
     t.create_collection(cid, 0);
-    return store->apply_transaction(osr, t);
+    return store->apply_transaction(osr, std::move(t));
   }
   void shutdown() {
     while (1) {
@@ -1788,11 +1786,11 @@ public:
 	   p != objects.end(); ++p) {
 	t.remove(cid, *p);
       }
-      store->apply_transaction(osr, t);
+      store->apply_transaction(osr, std::move(t));
     }
     ObjectStore::Transaction t;
     t.remove_collection(cid);
-    store->apply_transaction(osr, t);
+    store->apply_transaction(osr, std::move(t));
   }
 
   ghobject_t get_uniform_random_object() {
@@ -1840,7 +1838,9 @@ public:
     in_flight_objects.insert(new_obj);
     if (!contents.count(new_obj))
       contents[new_obj] = Object();
-    return store->queue_transaction(osr, t, new C_SyntheticOnReadable(this, t, new_obj));
+    int status = store->queue_transaction(osr, std::move(*t), new C_SyntheticOnReadable(this, t, new_obj));
+    delete t;
+    return status;
   }
 
   int clone() {
@@ -1873,7 +1873,9 @@ public:
     contents[new_obj].data.clear();
     contents[new_obj].data.append(contents[old_obj].data.c_str(),
 				  contents[old_obj].data.length());
-    return store->queue_transaction(osr, t, new C_SyntheticOnClone(this, t, old_obj, new_obj));
+    int status = store->queue_transaction(osr, std::move(*t), new C_SyntheticOnClone(this, t, old_obj, new_obj));
+    delete t;
+    return status;
   }
 
   int setattrs() {
@@ -1919,7 +1921,9 @@ public:
     t->setattrs(cid, obj, attrs);
     ++in_flight;
     in_flight_objects.insert(obj);
-    return store->queue_transaction(osr, t, new C_SyntheticOnReadable(this, t, obj));
+    int status = store->queue_transaction(osr, std::move(*t), new C_SyntheticOnReadable(this, t, obj));
+    delete t;
+    return status;
   }
 
   void getattrs() {
@@ -2016,7 +2020,9 @@ public:
     contents[obj].attrs.erase(it->first);
     ++in_flight;
     in_flight_objects.insert(obj);
-    return store->queue_transaction(osr, t, new C_SyntheticOnReadable(this, t, obj));
+    int status = store->queue_transaction(osr, std::move(*t), new C_SyntheticOnReadable(this, t, obj));
+    delete t;
+    return status;
   }
 
   int write() {
@@ -2058,7 +2064,9 @@ public:
     t->write(cid, new_obj, offset, len, bl);
     ++in_flight;
     in_flight_objects.insert(new_obj);
-    return store->queue_transaction(osr, t, new C_SyntheticOnReadable(this, t, new_obj));
+    int status = store->queue_transaction(osr, std::move(*t), new C_SyntheticOnReadable(this, t, new_obj));
+    delete t;
+    return status;
   }
 
   void read() {
@@ -2137,7 +2145,9 @@ public:
       bl.swap(data);
     }
 
-    return store->queue_transaction(osr, t, new C_SyntheticOnReadable(this, t, obj));
+    int status = store->queue_transaction(osr, std::move(*t), new C_SyntheticOnReadable(this, t, obj));
+    delete t;
+    return status;
   }
 
   void scan() {
@@ -2241,7 +2251,9 @@ public:
     available_objects.erase(to_remove);
     in_flight_objects.insert(to_remove);
     contents.erase(to_remove);
-    return store->queue_transaction(osr, t, new C_SyntheticOnReadable(this, t, to_remove));
+    int status = store->queue_transaction(osr, std::move(*t), new C_SyntheticOnReadable(this, t, to_remove));
+    delete t;
+    return status;
   }
 
   int zero() {
@@ -2270,7 +2282,9 @@ public:
     t->zero(cid, new_obj, offset, len);
     ++in_flight;
     in_flight_objects.insert(new_obj);
-    return store->queue_transaction(osr, t, new C_SyntheticOnReadable(this, t, new_obj));
+    int status = store->queue_transaction(osr, std::move(*t), new C_SyntheticOnReadable(this, t, new_obj));
+    delete t;
+    return status;
   }
 
   void print_internal_state() {
@@ -2370,7 +2384,7 @@ TEST_P(StoreTest, HashCollisionTest) {
   {
     ObjectStore::Transaction t;
     t.create_collection(cid, 0);
-    r = store->apply_transaction(&osr, t);
+    r = store->apply_transaction(&osr, std::move(t));
     ASSERT_EQ(r, 0);
   }
   string base = "";
@@ -2389,7 +2403,7 @@ TEST_P(StoreTest, HashCollisionTest) {
     {
       ObjectStore::Transaction t;
       t.touch(cid, hoid);
-      r = store->apply_transaction(&osr, t);
+      r = store->apply_transaction(&osr, std::move(t));
       ASSERT_EQ(r, 0);
     }
     created.insert(hoid);
@@ -2436,12 +2450,12 @@ TEST_P(StoreTest, HashCollisionTest) {
        ++i) {
     ObjectStore::Transaction t;
     t.remove(cid, *i);
-    r = store->apply_transaction(&osr, t);
+    r = store->apply_transaction(&osr, std::move(t));
     ASSERT_EQ(r, 0);
   }
   ObjectStore::Transaction t;
   t.remove_collection(cid);
-  r = store->apply_transaction(&osr, t);
+  r = store->apply_transaction(&osr, std::move(t));
   ASSERT_EQ(r, 0);
 }
 
@@ -2453,7 +2467,7 @@ TEST_P(StoreTest, ScrubTest) {
   {
     ObjectStore::Transaction t;
     t.create_collection(cid, 0);
-    r = store->apply_transaction(&osr, t);
+    r = store->apply_transaction(&osr, std::move(t));
     ASSERT_EQ(r, 0);
   }
   string base = "aaaaa";
@@ -2470,7 +2484,7 @@ TEST_P(StoreTest, ScrubTest) {
     {
       ObjectStore::Transaction t;
       t.touch(cid, hoid);
-      r = store->apply_transaction(&osr, t);
+      r = store->apply_transaction(&osr, std::move(t));
       ASSERT_EQ(r, 0);
     }
     created.insert(hoid);
@@ -2486,7 +2500,7 @@ TEST_P(StoreTest, ScrubTest) {
     t.touch(cid, hoid1);
     t.touch(cid, hoid2);
     t.touch(cid, hoid3);
-    r = store->apply_transaction(&osr, t);
+    r = store->apply_transaction(&osr, std::move(t));
     created.insert(hoid1);
     created.insert(hoid2);
     created.insert(hoid3);
@@ -2534,12 +2548,12 @@ TEST_P(StoreTest, ScrubTest) {
        ++i) {
     ObjectStore::Transaction t;
     t.remove(cid, *i);
-    r = store->apply_transaction(&osr, t);
+    r = store->apply_transaction(&osr, std::move(t));
     ASSERT_EQ(r, 0);
   }
   ObjectStore::Transaction t;
   t.remove_collection(cid);
-  r = store->apply_transaction(&osr, t);
+  r = store->apply_transaction(&osr, std::move(t));
   ASSERT_EQ(r, 0);
 }
 
@@ -2552,7 +2566,7 @@ TEST_P(StoreTest, OMapTest) {
   {
     ObjectStore::Transaction t;
     t.create_collection(cid, 0);
-    r = store->apply_transaction(&osr, t);
+    r = store->apply_transaction(&osr, std::move(t));
     ASSERT_EQ(r, 0);
   }
 
@@ -2563,7 +2577,7 @@ TEST_P(StoreTest, OMapTest) {
     t.omap_clear(cid, hoid);
     map<string, bufferlist> start_set;
     t.omap_setkeys(cid, hoid, start_set);
-    store->apply_transaction(&osr, t);
+    store->apply_transaction(&osr, std::move(t));
   }
 
   for (int i = 0; i < 100; i++) {
@@ -2598,7 +2612,7 @@ TEST_P(StoreTest, OMapTest) {
     to_add.insert(pair<string, bufferlist>("key-" + string(buf), bl));
     attrs.insert(pair<string, bufferlist>("key-" + string(buf), bl));
     t.omap_setkeys(cid, hoid, to_add);
-    store->apply_transaction(&osr, t);
+    store->apply_transaction(&osr, std::move(t));
   }
 
   int i = 0;
@@ -2628,7 +2642,7 @@ TEST_P(StoreTest, OMapTest) {
     set<string> keys_to_remove;
     keys_to_remove.insert(to_remove);
     t.omap_rmkeys(cid, hoid, keys_to_remove);
-    store->apply_transaction(&osr, t);
+    store->apply_transaction(&osr, std::move(t));
 
     attrs.erase(to_remove);
 
@@ -2640,14 +2654,15 @@ TEST_P(StoreTest, OMapTest) {
     bl1.append("omap_header");
     ObjectStore::Transaction t;
     t.omap_setheader(cid, hoid, bl1);
-    store->apply_transaction(&osr, t);
-
+    store->apply_transaction(&osr, std::move(t));
+    t = ObjectStore::Transaction();
+ 
     bufferlist bl2;
     bl2.append("value");
     map<string, bufferlist> to_add;
     to_add.insert(pair<string, bufferlist>("key", bl2));
     t.omap_setkeys(cid, hoid, to_add);
-    store->apply_transaction(&osr, t);
+    store->apply_transaction(&osr, std::move(t));
 
     bufferlist bl3;
     map<string, bufferlist> cur_attrs;
@@ -2676,12 +2691,12 @@ TEST_P(StoreTest, OMapTest) {
       t.touch(cid, hoid);
       t.omap_setheader(cid, hoid, h);
       t.omap_setkeys(cid, hoid, to_set);
-      store->apply_transaction(&osr, t);
+      store->apply_transaction(&osr, std::move(t));
     }
     {
       ObjectStore::Transaction t;
       t.omap_rmkeyrange(cid, hoid, "3", "7");
-      store->apply_transaction(&osr, t);
+      store->apply_transaction(&osr, std::move(t));
     }
     {
       bufferlist hdr;
@@ -2699,7 +2714,7 @@ TEST_P(StoreTest, OMapTest) {
     {
       ObjectStore::Transaction t;
       t.omap_clear(cid, hoid);
-      store->apply_transaction(&osr, t);
+      store->apply_transaction(&osr, std::move(t));
     }
     {
       bufferlist hdr;
@@ -2713,7 +2728,7 @@ TEST_P(StoreTest, OMapTest) {
   ObjectStore::Transaction t;
   t.remove(cid, hoid);
   t.remove_collection(cid);
-  r = store->apply_transaction(&osr, t);
+  r = store->apply_transaction(&osr, std::move(t));
   ASSERT_EQ(r, 0);
 }
 
@@ -2726,7 +2741,7 @@ TEST_P(StoreTest, OMapIterator) {
   {
     ObjectStore::Transaction t;
     t.create_collection(cid, 0);
-    r = store->apply_transaction(&osr, t);
+    r = store->apply_transaction(&osr, std::move(t));
     ASSERT_EQ(r, 0);
   }
 
@@ -2737,7 +2752,7 @@ TEST_P(StoreTest, OMapIterator) {
     t.omap_clear(cid, hoid);
     map<string, bufferlist> start_set;
     t.omap_setkeys(cid, hoid, start_set);
-    store->apply_transaction(&osr, t);
+    store->apply_transaction(&osr, std::move(t));
   }
   ObjectMap::ObjectMapIterator iter;
   bool correct;
@@ -2780,7 +2795,7 @@ TEST_P(StoreTest, OMapIterator) {
     attrs.insert(pair<string, bufferlist>("key-" + string(buf), bl));
     ObjectStore::Transaction t;
     t.omap_setkeys(cid, hoid, to_add);
-    store->apply_transaction(&osr, t);
+    store->apply_transaction(&osr, std::move(t));
   }
 
   iter = store->get_omap_iterator(cid, hoid);
@@ -2806,7 +2821,7 @@ TEST_P(StoreTest, OMapIterator) {
     ObjectStore::Transaction t;
     t.remove(cid, hoid);
     t.remove_collection(cid);
-    r = store->apply_transaction(&osr, t);
+    r = store->apply_transaction(&osr, std::move(t));
     ASSERT_EQ(r, 0);
   }
 }
@@ -2828,7 +2843,7 @@ TEST_P(StoreTest, XattrTest) {
     ObjectStore::Transaction t;
     t.create_collection(cid, 0);
     t.touch(cid, hoid);
-    r = store->apply_transaction(&osr, t);
+    r = store->apply_transaction(&osr, std::move(t));
     ASSERT_EQ(r, 0);
   }
 
@@ -2847,7 +2862,7 @@ TEST_P(StoreTest, XattrTest) {
     attrs["attr4"] = big;
     t.setattr(cid, hoid, "attr3", big);
     attrs["attr3"] = big;
-    r = store->apply_transaction(&osr, t);
+    r = store->apply_transaction(&osr, std::move(t));
     ASSERT_EQ(r, 0);
   }
 
@@ -2866,7 +2881,7 @@ TEST_P(StoreTest, XattrTest) {
     ObjectStore::Transaction t;
     t.rmattr(cid, hoid, "attr2");
     attrs.erase("attr2");
-    r = store->apply_transaction(&osr, t);
+    r = store->apply_transaction(&osr, std::move(t));
     ASSERT_EQ(r, 0);
   }
 
@@ -2894,7 +2909,7 @@ TEST_P(StoreTest, XattrTest) {
   ObjectStore::Transaction t;
   t.remove(cid, hoid);
   t.remove_collection(cid);
-  r = store->apply_transaction(&osr, t);
+  r = store->apply_transaction(&osr, std::move(t));
   ASSERT_EQ(r, 0);
 }
 
@@ -2910,7 +2925,7 @@ void colsplittest(
   {
     ObjectStore::Transaction t;
     t.create_collection(cid, common_suffix_size);
-    r = store->apply_transaction(&osr, t);
+    r = store->apply_transaction(&osr, std::move(t));
     ASSERT_EQ(r, 0);
   }
   {
@@ -2925,14 +2940,14 @@ void colsplittest(
 	  i<<common_suffix_size,
 	  52, "")));
     }
-    r = store->apply_transaction(&osr, t);
+    r = store->apply_transaction(&osr, std::move(t));
     ASSERT_EQ(r, 0);
   }
   {
     ObjectStore::Transaction t;
     t.create_collection(tid, common_suffix_size + 1);
     t.split_collection(cid, common_suffix_size+1, 1<<common_suffix_size, tid);
-    r = store->apply_transaction(&osr, t);
+    r = store->apply_transaction(&osr, std::move(t));
     ASSERT_EQ(r, 0);
   }
 
@@ -2963,7 +2978,7 @@ void colsplittest(
 
   t.remove_collection(cid);
   t.remove_collection(tid);
-  r = store->apply_transaction(&osr, t);
+  r = store->apply_transaction(&osr, std::move(t));
   ASSERT_EQ(r, 0);
 }
 
@@ -2994,7 +3009,7 @@ TEST_P(StoreTest, TwoHash) {
   {
     ObjectStore::Transaction t;
     t.create_collection(cid, 0);
-    r = store->apply_transaction(&osr, t);
+    r = store->apply_transaction(&osr, std::move(t));
     ASSERT_EQ(r, 0);
   }
   std::cout << "Making objects" << std::endl;
@@ -3008,7 +3023,7 @@ TEST_P(StoreTest, TwoHash) {
     }
     o.hobj.set_hash((i << 16) | 0xB1);
     t.touch(cid, o);
-    r = store->apply_transaction(&osr, t);
+    r = store->apply_transaction(&osr, std::move(t));
     ASSERT_EQ(r, 0);
   }
   std::cout << "Removing half" << std::endl;
@@ -3018,7 +3033,7 @@ TEST_P(StoreTest, TwoHash) {
     o.hobj.pool = -1;
     o.hobj.set_hash((i << 16) | 0xA1);
     t.remove(cid, o);
-    r = store->apply_transaction(&osr, t);
+    r = store->apply_transaction(&osr, std::move(t));
     ASSERT_EQ(r, 0);
   }
   std::cout << "Checking" << std::endl;
@@ -3046,12 +3061,12 @@ TEST_P(StoreTest, TwoHash) {
     t.remove(cid, o);
     o.hobj.set_hash((i << 16) | 0xB1);
     t.remove(cid, o);
-    r = store->apply_transaction(&osr, t);
+    r = store->apply_transaction(&osr, std::move(t));
     ASSERT_EQ(r, 0);
   }
   ObjectStore::Transaction t;
   t.remove_collection(cid);
-  r = store->apply_transaction(&osr, t);
+  r = store->apply_transaction(&osr, std::move(t));
   ASSERT_EQ(r, 0);
 }
 
@@ -3067,7 +3082,7 @@ TEST_P(StoreTest, Rename) {
     ObjectStore::Transaction t;
     t.create_collection(cid, 0);
     t.write(cid, srcoid, 0, data.length(), data);
-    r = store->apply_transaction(&osr, t);
+    r = store->apply_transaction(&osr, std::move(t));
     ASSERT_EQ(r, 0);
   }
   ASSERT_TRUE(store->exists(cid, srcoid));
@@ -3076,7 +3091,7 @@ TEST_P(StoreTest, Rename) {
     t.collection_move_rename(cid, srcoid, cid, dstoid);
     t.write(cid, srcoid, 0, data.length(), data);
     t.setattr(cid, srcoid, "attr", data);
-    r = store->apply_transaction(&osr, t);
+    r = store->apply_transaction(&osr, std::move(t));
     ASSERT_EQ(r, 0);
   }
   ASSERT_TRUE(store->exists(cid, srcoid));
@@ -3086,7 +3101,7 @@ TEST_P(StoreTest, Rename) {
     t.remove(cid, dstoid);
     t.remove(cid, srcoid);
     t.remove_collection(cid);
-    r = store->apply_transaction(&osr, t);
+    r = store->apply_transaction(&osr, std::move(t));
     ASSERT_EQ(r, 0);
   }
 }
@@ -3101,7 +3116,7 @@ TEST_P(StoreTest, MoveRename) {
     ObjectStore::Transaction t;
     t.create_collection(cid, 0);
     t.touch(cid, oid);
-    r = store->apply_transaction(&osr, t);
+    r = store->apply_transaction(&osr, std::move(t));
     ASSERT_EQ(r, 0);
   }
   ASSERT_TRUE(store->exists(cid, oid));
@@ -3116,7 +3131,7 @@ TEST_P(StoreTest, MoveRename) {
     t.write(cid, temp_oid, 0, data.length(), data);
     t.setattr(cid, temp_oid, "attr", attr);
     t.omap_setkeys(cid, temp_oid, omap);
-    r = store->apply_transaction(&osr, t);
+    r = store->apply_transaction(&osr, std::move(t));
     ASSERT_EQ(r, 0);
   }
   ASSERT_TRUE(store->exists(cid, temp_oid));
@@ -3124,7 +3139,7 @@ TEST_P(StoreTest, MoveRename) {
     ObjectStore::Transaction t;
     t.remove(cid, oid);
     t.collection_move_rename(cid, temp_oid, cid, oid);
-    r = store->apply_transaction(&osr, t);
+    r = store->apply_transaction(&osr, std::move(t));
     ASSERT_EQ(r, 0);
   }
   ASSERT_TRUE(store->exists(cid, oid));
@@ -3151,7 +3166,7 @@ TEST_P(StoreTest, MoveRename) {
     ObjectStore::Transaction t;
     t.remove(cid, oid);
     t.remove_collection(cid);
-    r = store->apply_transaction(&osr, t);
+    r = store->apply_transaction(&osr, std::move(t));
     ASSERT_EQ(r, 0);
   }
 }
@@ -3182,14 +3197,14 @@ TEST_P(StoreTest, BigRGWObjectName) {
     t.collection_move_rename(cid, oidhead, cid, oid);
     t.touch(cid, oidhead);
     t.collection_move_rename(cid, oidhead, cid, oid2);
-    r = store->apply_transaction(&osr, t);
+    r = store->apply_transaction(&osr, std::move(t));
     ASSERT_EQ(r, 0);
   }
 
   {
     ObjectStore::Transaction t;
     t.remove(cid, oid);
-    r = store->apply_transaction(&osr, t);
+    r = store->apply_transaction(&osr, std::move(t));
     ASSERT_EQ(r, 0);
   }
 
@@ -3208,7 +3223,7 @@ TEST_P(StoreTest, BigRGWObjectName) {
     ObjectStore::Transaction t;
     t.remove(cid, oid2);
     t.remove_collection(cid);
-    r = store->apply_transaction(&osr, t);
+    r = store->apply_transaction(&osr, std::move(t));
     ASSERT_EQ(r, 0);
 
   }
@@ -3223,31 +3238,31 @@ TEST_P(StoreTest, SetAllocHint) {
     ObjectStore::Transaction t;
     t.create_collection(cid, 0);
     t.touch(cid, hoid);
-    r = store->apply_transaction(&osr, t);
+    r = store->apply_transaction(&osr, std::move(t));
     ASSERT_EQ(r, 0);
   }
   {
     ObjectStore::Transaction t;
     t.set_alloc_hint(cid, hoid, 4*1024*1024, 1024*4);
-    r = store->apply_transaction(&osr, t);
+    r = store->apply_transaction(&osr, std::move(t));
     ASSERT_EQ(r, 0);
   }
   {
     ObjectStore::Transaction t;
     t.remove(cid, hoid);
-    r = store->apply_transaction(&osr, t);
+    r = store->apply_transaction(&osr, std::move(t));
     ASSERT_EQ(r, 0);
   }
   {
     ObjectStore::Transaction t;
     t.set_alloc_hint(cid, hoid, 4*1024*1024, 1024*4);
-    r = store->apply_transaction(&osr, t);
+    r = store->apply_transaction(&osr, std::move(t));
     ASSERT_EQ(r, 0);
   }
   {
     ObjectStore::Transaction t;
     t.remove_collection(cid);
-    r = store->apply_transaction(&osr, t);
+    r = store->apply_transaction(&osr, std::move(t));
     ASSERT_EQ(r, 0);
   }
 }
@@ -3258,7 +3273,6 @@ INSTANTIATE_TEST_CASE_P(
   ::testing::Values(
     "memstore",
     "filestore",
-    "keyvaluestore",
     "bluestore",
     "kstore"));
 
@@ -3274,64 +3288,6 @@ TEST(DummyTest, ValueParameterizedTestsAreNotSupportedOnThisPlatform) {}
 
 #endif
 
-
-//
-// support tests for qa/workunits/filestore/filestore.sh
-//
-TEST(EXT4StoreTest, _detect_fs) {
-  if (::getenv("DISK") == NULL || ::getenv("MOUNTPOINT") == NULL) {
-    cerr << "SKIP because DISK and MOUNTPOINT environment variables are not set. It is meant to run from qa/workunits/filestore/filestore.sh " << std::endl;
-    return;
-  }
-  const string disk(::getenv("DISK"));
-  EXPECT_LT((unsigned)0, disk.size());
-  const string mnt(::getenv("MOUNTPOINT"));
-  EXPECT_LT((unsigned)0, mnt.size());
-  ::umount(mnt.c_str());
-
-  const string dir("store_test_temp_dir");
-  const string journal("store_test_temp_journal");
-
-  //
-  // without user_xattr, ext4 fails
-  //
-  {
-    g_ceph_context->_conf->set_val("filestore_xattr_use_omap", "true");
-    EXPECT_EQ(::system((string("mount -o loop,nouser_xattr ") + disk + " " + mnt).c_str()), 0);
-    EXPECT_EQ(::chdir(mnt.c_str()), 0);
-    EXPECT_EQ(::mkdir(dir.c_str(), 0755), 0);
-    FileStore store(dir, journal);
-    EXPECT_EQ(store._detect_fs(), -ENOTSUP);
-    EXPECT_EQ(::chdir(".."), 0);
-    EXPECT_EQ(::umount(mnt.c_str()), 0);
-  }
-  //
-  // mounted with user_xattr, ext4 fails if filestore_xattr_use_omap is false
-  //
-  {
-    g_ceph_context->_conf->set_val("filestore_xattr_use_omap", "false");
-    EXPECT_EQ(::system((string("mount -o loop,user_xattr ") + disk + " " + mnt).c_str()), 0);
-    EXPECT_EQ(::chdir(mnt.c_str()), 0);
-    FileStore store(dir, journal);
-    EXPECT_EQ(store._detect_fs(), -ENOTSUP);
-    EXPECT_EQ(::chdir(".."), 0);
-    EXPECT_EQ(::umount(mnt.c_str()), 0);
-  }
-  //
-  // mounted with user_xattr, ext4 succeeds if filestore_xattr_use_omap is true
-  //
-  {
-    g_ceph_context->_conf->set_val("filestore_xattr_use_omap", "true");
-    EXPECT_EQ(::system((string("mount -o loop,user_xattr ") + disk + " " + mnt).c_str()), 0);
-    EXPECT_EQ(::chdir(mnt.c_str()), 0);
-    FileStore store(dir, journal);
-    EXPECT_EQ(store._detect_fs(), 0);
-    EXPECT_EQ(::chdir(".."), 0);
-    EXPECT_EQ(::umount(mnt.c_str()), 0);
-  }
-}
-
-
 int main(int argc, char **argv) {
   vector<const char*> args;
   argv_to_vec(argc, (const char **)argv, args);
diff --git a/src/test/objectstore/test_idempotent.cc b/src/test/objectstore/test_idempotent.cc
index 8c663a6..edb8e9b 100644
--- a/src/test/objectstore/test_idempotent.cc
+++ b/src/test/objectstore/test_idempotent.cc
@@ -76,7 +76,7 @@ int main(int argc, char **argv) {
     ObjectStore::Transaction t;
     assert(!store->mount());
     t.create_collection(coll, 0);
-    store->apply_transaction(&osr, t);
+    store->apply_transaction(&osr, std::move(t));
   } else {
     assert(!store->mount());
   }
diff --git a/src/test/objectstore/test_transaction.cc b/src/test/objectstore/test_transaction.cc
new file mode 100644
index 0000000..6e12b5d
--- /dev/null
+++ b/src/test/objectstore/test_transaction.cc
@@ -0,0 +1,75 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2016 Casey Bodley <cbodley at redhat.com>
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation.  See file COPYING.
+ *
+ */
+
+#include "os/ObjectStore.h"
+#include <gtest/gtest.h>
+
+TEST(Transaction, MoveConstruct)
+{
+  auto a = ObjectStore::Transaction{};
+  a.nop();
+  ASSERT_FALSE(a.empty());
+
+  // move-construct in b
+  auto b = std::move(a);
+  ASSERT_TRUE(a.empty());
+  ASSERT_FALSE(b.empty());
+}
+
+TEST(Transaction, MoveAssign)
+{
+  auto a = ObjectStore::Transaction{};
+  a.nop();
+  ASSERT_FALSE(a.empty());
+
+  auto b = ObjectStore::Transaction{};
+  b = std::move(a); // move-assign to b
+  ASSERT_TRUE(a.empty());
+  ASSERT_FALSE(b.empty());
+}
+
+TEST(Transaction, CopyConstruct)
+{
+  auto a = ObjectStore::Transaction{};
+  a.nop();
+  ASSERT_FALSE(a.empty());
+
+  auto b = a; // copy-construct in b
+  ASSERT_FALSE(a.empty());
+  ASSERT_FALSE(b.empty());
+}
+
+TEST(Transaction, CopyAssign)
+{
+  auto a = ObjectStore::Transaction{};
+  a.nop();
+  ASSERT_FALSE(a.empty());
+
+  auto b = ObjectStore::Transaction{};
+  b = a; // copy-assign to b
+  ASSERT_FALSE(a.empty());
+  ASSERT_FALSE(b.empty());
+}
+
+TEST(Transaction, Swap)
+{
+  auto a = ObjectStore::Transaction{};
+  a.nop();
+  ASSERT_FALSE(a.empty());
+
+  auto b = ObjectStore::Transaction{};
+  std::swap(a, b); // swap a and b
+  ASSERT_TRUE(a.empty());
+  ASSERT_FALSE(b.empty());
+}
diff --git a/src/test/objectstore/workload_generator.cc b/src/test/objectstore/workload_generator.cc
index 25047b5..bbe3c74 100644
--- a/src/test/objectstore/workload_generator.cc
+++ b/src/test/objectstore/workload_generator.cc
@@ -459,7 +459,7 @@ void WorkloadGenerator::run()
         break;
       }
 
-      c = new C_OnReadable(this, t);
+      c = new C_OnReadable(this);
       goto queue_tx;
     }
 
@@ -469,7 +469,7 @@ void WorkloadGenerator::run()
 
     if (destroy_collection) {
       do_destroy_collection(t, entry, stat_state);
-      c = new C_OnDestroyed(this, t, entry);
+      c = new C_OnDestroyed(this, entry);
       if (!m_num_ops)
         create_coll = true;
     } else {
@@ -480,7 +480,7 @@ void WorkloadGenerator::run()
       do_pgmeta_omap_set(t, entry->m_pgid, entry->m_coll, stat_state);
       do_append_log(t, entry, stat_state);
 
-      c = new C_OnReadable(this, t);
+      c = new C_OnReadable(this);
     }
 
 queue_tx:
@@ -490,7 +490,8 @@ queue_tx:
       c = new C_StatWrapper(stat_state, tmp);
     }
 
-    m_store->queue_transaction(&(entry->m_osr), t, c);
+    m_store->queue_transaction(&(entry->m_osr), std::move(*t), c);
+    delete t;
 
     inc_in_flight();
 
diff --git a/src/test/objectstore/workload_generator.h b/src/test/objectstore/workload_generator.h
index 6591659..0d5360e 100644
--- a/src/test/objectstore/workload_generator.h
+++ b/src/test/objectstore/workload_generator.h
@@ -120,7 +120,7 @@ class WorkloadGenerator : public TestObjectStoreState {
   void do_stats();
 
 public:
-  WorkloadGenerator(vector<const char*> args);
+  explicit WorkloadGenerator(vector<const char*> args);
   ~WorkloadGenerator() {
     m_store->umount();
   }
@@ -129,9 +129,8 @@ public:
     WorkloadGenerator *wrkldgen_state;
 
   public:
-    C_OnReadable(WorkloadGenerator *state,
-                                  ObjectStore::Transaction *t)
-     :TestObjectStoreState::C_OnFinished(state, t), wrkldgen_state(state) { }
+    explicit C_OnReadable(WorkloadGenerator *state)
+     :TestObjectStoreState::C_OnFinished(state), wrkldgen_state(state) { }
 
     void finish(int r)
     {
@@ -144,9 +143,8 @@ public:
     coll_entry_t *m_entry;
 
   public:
-    C_OnDestroyed(WorkloadGenerator *state,
-        ObjectStore::Transaction *t, coll_entry_t *entry) :
-          C_OnReadable(state, t), m_entry(entry) {}
+    C_OnDestroyed(WorkloadGenerator *state, coll_entry_t *entry) :
+          C_OnReadable(state), m_entry(entry) {}
 
     void finish(int r) {
       C_OnReadable::finish(r);
diff --git a/src/test/objectstore_bench.cc b/src/test/objectstore_bench.cc
index 097e406..d8cd166 100644
--- a/src/test/objectstore_bench.cc
+++ b/src/test/objectstore_bench.cc
@@ -36,6 +36,7 @@ static void usage()
 // helper class for bytes with units
 struct byte_units {
   size_t v;
+  // cppcheck-suppress noExplicitConstructor
   byte_units(size_t v) : v(v) {}
 
   bool parse(const std::string &val, std::string *err);
@@ -111,7 +112,7 @@ void osbench_worker(ObjectStore *os, const Config &cfg,
     uint64_t offset = starting_offset;
     size_t len = cfg.size;
 
-    list<ObjectStore::Transaction*> tls;
+    vector<ObjectStore::Transaction> tls;
 
     std::cout << "Write cycle " << i << std::endl;
     while (len) {
@@ -119,7 +120,8 @@ void osbench_worker(ObjectStore *os, const Config &cfg,
 
       auto t = new ObjectStore::Transaction;
       t->write(cid, oid, offset, count, data);
-      tls.push_back(t);
+      tls.push_back(std::move(*t));
+      delete t;
 
       offset += count;
       if (offset > cfg.size)
@@ -139,11 +141,7 @@ void osbench_worker(ObjectStore *os, const Config &cfg,
     cond.wait(lock, [&done](){ return done; });
     lock.unlock();
 
-    while (!tls.empty()) {
-      auto t = tls.front();
-      tls.pop_front();
-      delete t;
-    }
+
   }
   sequencer.flush();
 }
@@ -261,7 +259,7 @@ int main(int argc, const char *argv[])
     ObjectStore::Sequencer osr(__func__);
     ObjectStore::Transaction t;
     t.create_collection(cid, 0);
-    os->apply_transaction(&osr, t);
+    os->apply_transaction(&osr, std::move(t));
   }
 
   // create the objects
@@ -276,7 +274,7 @@ int main(int argc, const char *argv[])
       ObjectStore::Sequencer osr(__func__);
       ObjectStore::Transaction t;
       t.touch(cid, oids[i]);
-      int r = os->apply_transaction(&osr, t);
+      int r = os->apply_transaction(&osr, std::move(t));
       assert(r == 0);
     }
   } else {
@@ -285,7 +283,7 @@ int main(int argc, const char *argv[])
     ObjectStore::Sequencer osr(__func__);
     ObjectStore::Transaction t;
     t.touch(cid, oids.back());
-    int r = os->apply_transaction(&osr, t);
+    int r = os->apply_transaction(&osr, std::move(t));
     assert(r == 0);
   }
 
@@ -318,7 +316,7 @@ int main(int argc, const char *argv[])
   ObjectStore::Transaction t;
   for (const auto &oid : oids)
     t.remove(cid, oid);
-  os->apply_transaction(&osr,t);
+  os->apply_transaction(&osr,std::move(t));
 
   os->umount();
   return 0;
diff --git a/src/test/opensuse-13.2/ceph.spec.in b/src/test/opensuse-13.2/ceph.spec.in
index 487232c..498eac4 100644
--- a/src/test/opensuse-13.2/ceph.spec.in
+++ b/src/test/opensuse-13.2/ceph.spec.in
@@ -56,7 +56,7 @@ restorecon -R /var/log/ceph > /dev/null 2>&1;
 # the _with_systemd variable only implies that we'll install
 # /etc/tmpfiles.d/ceph.conf in order to set up the socket directory in
 # /var/run/ceph.
-%if 0%{?fedora} || 0%{?rhel} >= 7 || 0%{?suse_version} >= 1210
+%if 0%{?fedora} || 0%{?rhel} >= 7 || 0%{?suse_version}
 %global _with_systemd 1
 %{!?tmpfiles_create: %global tmpfiles_create systemd-tmpfiles --create}
 %endif
@@ -66,6 +66,10 @@ restorecon -R /var/log/ceph > /dev/null 2>&1;
 %global _with_lttng 1
 %endif
 
+# unify libexec for all targets
+%global _libexecdir %{_exec_prefix}/lib
+
+
 #################################################################################
 # common
 #################################################################################
@@ -86,27 +90,9 @@ Patch0:		init-ceph.in-fedora.patch
 #################################################################################
 # dependencies that apply across all distro families
 #################################################################################
-Requires:	librbd1 = %{epoch}:%{version}-%{release}
-Requires:	librados2 = %{epoch}:%{version}-%{release}
-Requires:	libcephfs1 = %{epoch}:%{version}-%{release}
-Requires:	ceph-common = %{epoch}:%{version}-%{release}
-%if 0%{with selinux}
-Requires:	ceph-selinux = %{epoch}:%{version}-%{release}
-%endif
-Requires:	python-rados = %{epoch}:%{version}-%{release}
-Requires:	python-rbd = %{epoch}:%{version}-%{release}
-Requires:	python-cephfs = %{epoch}:%{version}-%{release}
-Requires:	python
-Requires:	python-requests
-Requires:	grep
-Requires:	xfsprogs
-Requires:	logrotate
-Requires:	parted
-Requires:	util-linux
-Requires:	hdparm
-Requires:	cryptsetup
-Requires:	findutils
-Requires:	which
+Requires:       ceph-osd = %{epoch}:%{version}-%{release}
+Requires:       ceph-mds = %{epoch}:%{version}-%{release}
+Requires:       ceph-mon = %{epoch}:%{version}-%{release}
 Requires(post):	binutils
 %if 0%{with cephfs_java}
 BuildRequires:	java-devel
@@ -132,7 +118,6 @@ BuildRequires:	hdparm
 BuildRequires:	leveldb-devel > 1.2
 BuildRequires:	libaio-devel
 BuildRequires:	libcurl-devel
-BuildRequires:	libedit-devel
 BuildRequires:	libxml2-devel
 BuildRequires:	libblkid-devel >= 2.17
 BuildRequires:	libudev-devel
@@ -165,41 +150,35 @@ BuildRequires:	systemd
 %{?systemd_requires}
 %endif
 PreReq:		%fillup_prereq
-Requires:	python-Flask
 BuildRequires:	net-tools
 BuildRequires:	libbz2-devel
-%if 0%{?suse_version} > 1210
-Requires:	gptfdisk
 %if 0%{with tcmalloc}
 BuildRequires:	gperftools-devel
 %endif
-%else
-Requires:	scsirastools
-BuildRequires:	google-perftools-devel
-%endif
 BuildRequires:	mozilla-nss-devel
 BuildRequires:	keyutils-devel
 BuildRequires:	libatomic-ops-devel
-%else
+BuildRequires:  lsb-release
+%endif
+%if 0%{?fedora} || 0%{?rhel} 
 %if 0%{?_with_systemd}
 Requires:	systemd
 %endif
 BuildRequires:	nss-devel
 BuildRequires:	keyutils-libs-devel
 BuildRequires:	libatomic_ops-devel
-Requires:	gdisk
 Requires(post):	chkconfig
 Requires(preun):	chkconfig
 Requires(preun):	initscripts
 BuildRequires:	gperftools-devel
-Requires:	python-flask
+BuildRequires:  redhat-lsb-core
 %endif
 # boost
 %if 0%{?fedora} || 0%{?rhel} 
 BuildRequires:  boost-random
 %endif
 # python-argparse for distros with Python 2.6 or lower
-%if (0%{?rhel} && 0%{?rhel} <= 6) || (0%{?suse_version} && 0%{?suse_version} <= 1110)
+%if (0%{?rhel} && 0%{?rhel} <= 6)
 BuildRequires:	python-argparse
 %endif
 # lttng and babeltrace for rbd-replay-prep
@@ -238,6 +217,37 @@ on commodity hardware and delivers object, block and file system storage.
 #################################################################################
 # packages
 #################################################################################
+%package base
+Summary:       Ceph Base Package
+Group:         System Environment/Base
+Requires:      ceph-common = %{epoch}:%{version}-%{release}
+Requires:      librbd1 = %{epoch}:%{version}-%{release}
+Requires:      librados2 = %{epoch}:%{version}-%{release}
+Requires:      libcephfs1 = %{epoch}:%{version}-%{release}
+%if 0%{with selinux}
+Requires:      ceph-selinux = %{epoch}:%{version}-%{release}
+%endif
+Requires:      python
+Requires:      python-requests
+Requires:      python-setuptools
+Requires:      grep
+Requires:      xfsprogs
+Requires:      logrotate
+Requires:      parted
+Requires:      util-linux
+Requires:      hdparm
+Requires:      cryptsetup
+Requires:      findutils
+Requires:      which
+%if 0%{?suse_version}
+Requires:      lsb-release
+%endif
+%if 0%{?fedora} || 0%{?rhel}
+Requires:      redhat-lsb-core
+%endif
+%description base
+Base is the package that includes all the files shared amongst ceph servers
+
 %package -n ceph-common
 Summary:	Ceph Common
 Group:		System Environment/Base
@@ -254,11 +264,38 @@ Requires:	python-requests
 Requires(pre):	pwdutils
 %endif
 # python-argparse is only needed in distros with Python 2.6 or lower
-%if (0%{?rhel} && 0%{?rhel} <= 6) || (0%{?suse_version} && 0%{?suse_version} <= 1110)
+%if (0%{?rhel} && 0%{?rhel} <= 6)
 Requires:	python-argparse
 %endif
 %description -n ceph-common
 Common utilities to mount and interact with a ceph storage cluster.
+Comprised of files that are common to Ceph clients and servers.
+
+%package mds
+Summary:	Ceph Metadata Server Daemon
+Group:		System Environment/Base
+Requires:	ceph-base = %{epoch}:%{version}-%{release}
+%description mds
+ceph-mds is the metadata server daemon for the Ceph distributed file system.
+One or more instances of ceph-mds collectively manage the file system
+namespace, coordinating access to the shared OSD cluster.
+
+%package mon
+Summary:	Ceph Monitor Daemon
+Group:		System Environment/Base
+Requires:	ceph-base = %{epoch}:%{version}-%{release}
+# For ceph-rest-api
+%if 0%{?fedora} || 0%{?rhel}
+Requires:      python-flask
+%endif
+%if 0%{?suse_version}
+Requires:      python-Flask
+%endif
+%description mon
+ceph-mon is the cluster monitor daemon for the Ceph distributed file
+system. One or more instances of ceph-mon form a Paxos part-time
+parliament cluster that provides extremely reliable and durable storage
+of cluster membership, configuration, and state.
 
 %package fuse
 Summary:	Ceph fuse-based client
@@ -276,6 +313,16 @@ Requires:	librbd1 = %{epoch}:%{version}-%{release}
 %description -n rbd-fuse
 FUSE based client to map Ceph rbd images to files
 
+%package -n rbd-mirror
+Summary:	Ceph daemon for mirroring RBD images
+Group:		System Environment/Base
+Requires:	%{name}
+Requires:	ceph-common = %{epoch}:%{version}-%{release}
+Requires:	librados2 = %{epoch}:%{version}-%{release}
+%description -n rbd-mirror
+Daemon for mirroring RBD images between Ceph clusters, streaming
+changes asynchronously.
+
 %package -n rbd-nbd
 Summary:	Ceph RBD client base on NBD
 Group:		System Environment/Base
@@ -295,6 +342,12 @@ Requires:	ceph-selinux = %{epoch}:%{version}-%{release}
 Requires:	librados2 = %{epoch}:%{version}-%{release}
 %if 0%{?rhel} || 0%{?fedora}
 Requires:	mailcap
+# python-flask for powerdns
+Requires:	python-flask
+%endif
+%if 0%{?suse_version}
+# python-Flask for powerdns
+Requires:      python-Flask
 %endif
 %description radosgw
 This package is an S3 HTTP REST gateway for the RADOS object store. It
@@ -314,6 +367,22 @@ under Open Cluster Framework (OCF) compliant resource
 managers such as Pacemaker.
 %endif
 
+%package osd
+Summary:	Ceph Object Storage Daemon
+Group:		System Environment/Base
+Requires:	ceph-base = %{epoch}:%{version}-%{release}
+# for sgdisk, used by ceph-disk
+%if 0%{?fedora} || 0%{?rhel}
+Requires:	gdisk
+%endif
+%if 0%{?suse_version}
+Requires:	gptfdisk
+%endif
+%description osd
+ceph-osd is the object storage daemon for the Ceph distributed file
+system.  It is responsible for storing objects on a local file system
+and providing access to them over the network.
+
 %package -n librados2
 Summary:	RADOS distributed object store client library
 Group:		System Environment/Libraries
@@ -588,6 +657,7 @@ export RPM_OPT_FLAGS=`echo $RPM_OPT_FLAGS | sed -e 's/i386/i486/'`
 
 %{configure}	CPPFLAGS="$java_inc" \
 		--prefix=/usr \
+                --libexecdir=%{_libexecdir} \
 		--localstatedir=/var \
 		--sysconfdir=/etc \
 %if 0%{?_with_systemd}
@@ -606,16 +676,6 @@ export RPM_OPT_FLAGS=`echo $RPM_OPT_FLAGS | sed -e 's/i386/i486/'`
 		--with-selinux \
 %endif
 		--with-librocksdb-static=check \
-%if 0%{?rhel} || 0%{?fedora}
-		--with-systemd-libexec-dir=/usr/libexec/ceph \
-		--with-rgw-user=root \
-		--with-rgw-group=root \
-%endif
-%if 0%{?suse_version}
-		--with-systemd-libexec-dir=/usr/lib/ceph/ \
-		--with-rgw-user=wwwrun \
-		--with-rgw-group=www \
-%endif
 		--with-radosgw \
 		$CEPH_EXTRA_CONFIGURE_ARGS \
 		%{?_with_ocf} \
@@ -642,6 +702,20 @@ make %{?_smp_mflags} check-local
 
 %install
 make DESTDIR=$RPM_BUILD_ROOT install
+rm -f $RPM_BUILD_ROOT%{_libdir}/ceph/erasure-code/libec_example.so
+rm -f $RPM_BUILD_ROOT%{_libdir}/ceph/erasure-code/libec_fail_to_initialize.so
+rm -f $RPM_BUILD_ROOT%{_libdir}/ceph/erasure-code/libec_fail_to_register.so
+rm -f $RPM_BUILD_ROOT%{_libdir}/ceph/erasure-code/libec_hangs.so
+rm -f $RPM_BUILD_ROOT%{_libdir}/ceph/erasure-code/libec_missing_entry_point.so
+rm -f $RPM_BUILD_ROOT%{_libdir}/ceph/erasure-code/libec_missing_version.so
+rm -f $RPM_BUILD_ROOT%{_libdir}/ceph/erasure-code/libec_test_jerasure_generic.so
+rm -f $RPM_BUILD_ROOT%{_libdir}/ceph/erasure-code/libec_test_jerasure_neon.so
+rm -f $RPM_BUILD_ROOT%{_libdir}/ceph/erasure-code/libec_test_jerasure_sse3.so
+rm -f $RPM_BUILD_ROOT%{_libdir}/ceph/erasure-code/libec_test_jerasure_sse4.so
+rm -f $RPM_BUILD_ROOT%{_libdir}/ceph/erasure-code/libec_test_shec_generic.so
+rm -f $RPM_BUILD_ROOT%{_libdir}/ceph/erasure-code/libec_test_shec_neon.so
+rm -f $RPM_BUILD_ROOT%{_libdir}/ceph/erasure-code/libec_test_shec_sse3.so
+rm -f $RPM_BUILD_ROOT%{_libdir}/ceph/erasure-code/libec_test_shec_sse4.so
 find $RPM_BUILD_ROOT -type f -name "*.la" -exec rm -f {} ';'
 find $RPM_BUILD_ROOT -type f -name "*.a" -exec rm -f {} ';'
 install -D src/etc-rbdmap $RPM_BUILD_ROOT%{_sysconfdir}/ceph/rbdmap
@@ -718,120 +792,29 @@ mkdir -p $RPM_BUILD_ROOT%{_localstatedir}/lib/ceph/bootstrap-rgw
 %clean
 rm -rf $RPM_BUILD_ROOT
 
-%pre
-%if 0%{?_with_systemd}
-  %if 0%{?suse_version}
-    # service_add_pre and friends don't work with parameterized systemd service
-    # instances, only with single services or targets, so we always pass
-    # ceph.target to these macros
-    %service_add_pre ceph.target
-  %endif
-%endif
-
-
-%post
-/sbin/ldconfig
-%if 0%{?_with_systemd}
-  %if 0%{?suse_version}
-    %fillup_only
-    %service_add_post ceph.target
-  %endif
-%else
-  /sbin/chkconfig --add ceph
-%endif
-
-%preun
-%if 0%{?_with_systemd}
-  %if 0%{?suse_version}
-    %service_del_preun ceph.target
-  %endif
-  # Disable and stop on removal.
-  if [ $1 = 0 ] ; then
-    SERVICE_LIST=$(systemctl | grep -E '^ceph-mon@|^ceph-create-keys@|^ceph-osd@|^ceph-mds@|^ceph-disk-'  | cut -d' ' -f1)
-    if [ -n "$SERVICE_LIST" ]; then
-      for SERVICE in $SERVICE_LIST; do
-        /usr/bin/systemctl --no-reload disable $SERVICE > /dev/null 2>&1 || :
-        /usr/bin/systemctl stop $SERVICE > /dev/null 2>&1 || :
-      done
-    fi
-  fi
-%else
-  %if 0%{?rhel} || 0%{?fedora}
-    if [ $1 = 0 ] ; then
-      /sbin/service ceph stop >/dev/null 2>&1
-      /sbin/chkconfig --del ceph
-    fi
-  %endif
-%endif
-
-%postun
-/sbin/ldconfig
-%if 0%{?_with_systemd}
-  if [ $1 = 1 ] ; then
-    # Restart on upgrade, but only if "CEPH_AUTO_RESTART_ON_UPGRADE" is set to
-    # "yes". In any case: if units are not running, do not touch them.
-    SYSCONF_CEPH=/etc/sysconfig/ceph
-    if [ -f $SYSCONF_CEPH -a -r $SYSCONF_CEPH ] ; then
-      source $SYSCONF_CEPH
-    fi
-    if [ "X$CEPH_AUTO_RESTART_ON_UPGRADE" = "Xyes" ] ; then
-      SERVICE_LIST=$(systemctl | grep -E '^ceph-mon@|^ceph-create-keys@|^ceph-osd@|^ceph-mds@|^ceph-disk-'  | cut -d' ' -f1)
-      if [ -n "$SERVICE_LIST" ]; then
-        for SERVICE in $SERVICE_LIST; do
-          /usr/bin/systemctl try-restart $SERVICE > /dev/null 2>&1 || :
-        done
-      fi
-    fi
-  fi
-%endif
-
 #################################################################################
-# files
+# files and systemd scriptlets
 #################################################################################
 %files
+
+%files base
 %defattr(-,root,root,-)
 %docdir %{_docdir}
 %dir %{_docdir}/ceph
 %{_docdir}/ceph/sample.ceph.conf
 %{_docdir}/ceph/sample.fetch_config
-%{_bindir}/cephfs
-%{_bindir}/ceph-clsinfo
-%{_bindir}/ceph-rest-api
-%{python_sitelib}/ceph_rest_api.py*
 %{_bindir}/crushtool
 %{_bindir}/monmaptool
 %{_bindir}/osdmaptool
 %{_bindir}/ceph-run
-%{_bindir}/ceph-mon
-%{_bindir}/ceph-mds
-%{_bindir}/ceph-objectstore-tool
-%{_bindir}/ceph-bluefs-tool
-%{_bindir}/ceph-osd
 %{_bindir}/ceph-detect-init
-%{_bindir}/librados-config
 %{_bindir}/ceph-client-debug
-%{_bindir}/cephfs-journal-tool
-%{_bindir}/cephfs-table-tool
-%{_bindir}/cephfs-data-scan
-%{_bindir}/ceph-debugpack
-%{_bindir}/ceph-coverage
+%{_bindir}/cephfs
 %if 0%{?_with_systemd}
-%{_unitdir}/ceph-mds at .service
-%{_unitdir}/ceph-mon at .service
 %{_unitdir}/ceph-create-keys at .service
-%{_unitdir}/ceph-osd at .service
-%{_unitdir}/ceph-radosgw at .service
-%{_unitdir}/ceph-disk at .service
-%{_unitdir}/ceph.target
-%{_unitdir}/ceph-osd.target
-%{_unitdir}/ceph-mon.target
-%{_unitdir}/ceph-mds.target
-%{_unitdir}/ceph-radosgw.target
 %else
 %{_initrddir}/ceph
 %endif
-%{_sbindir}/ceph-disk
-%{_sbindir}/ceph-disk-udev
 %{_sbindir}/ceph-create-keys
 %{_sbindir}/rcceph
 %if 0%{?rhel} >= 7 || 0%{?fedora} || 0%{?suse_version}
@@ -839,25 +822,11 @@ rm -rf $RPM_BUILD_ROOT
 %else
 /sbin/mount.ceph
 %endif
-%dir %{_libdir}/ceph
-%{_libdir}/ceph/ceph_common.sh
-%{_libexecdir}/ceph/ceph-osd-prestart.sh
+%dir %{_libexecdir}/ceph
+%{_libexecdir}/ceph/ceph_common.sh
 %dir %{_libdir}/rados-classes
-%{_libdir}/rados-classes/libcls_cephfs.so*
-%{_libdir}/rados-classes/libcls_rbd.so*
-%{_libdir}/rados-classes/libcls_hello.so*
-%{_libdir}/rados-classes/libcls_numops.so*
-%{_libdir}/rados-classes/libcls_rgw.so*
-%{_libdir}/rados-classes/libcls_lock.so*
-%{_libdir}/rados-classes/libcls_kvs.so*
-%{_libdir}/rados-classes/libcls_refcount.so*
-%{_libdir}/rados-classes/libcls_log.so*
-%{_libdir}/rados-classes/libcls_replica_log.so*
-%{_libdir}/rados-classes/libcls_statelog.so*
-%{_libdir}/rados-classes/libcls_timeindex.so*
-%{_libdir}/rados-classes/libcls_user.so*
-%{_libdir}/rados-classes/libcls_version.so*
-%{_libdir}/rados-classes/libcls_journal.so*
+%{_libdir}/rados-classes/*
+%dir %{_libdir}/ceph
 %dir %{_libdir}/ceph/erasure-code
 %{_libdir}/ceph/erasure-code/libec_*.so*
 %dir %{_libdir}/ceph/compressor
@@ -866,8 +835,6 @@ rm -rf $RPM_BUILD_ROOT
 %{_libdir}/libos_tp.so*
 %{_libdir}/libosd_tp.so*
 %endif
-%{_udevrulesdir}/60-ceph-partuuid-workaround.rules
-%{_udevrulesdir}/95-ceph-osd.rules
 %config %{_sysconfdir}/bash_completion.d/ceph
 %config(noreplace) %{_sysconfdir}/logrotate.d/ceph
 %if 0%{?fedora} || 0%{?rhel}
@@ -878,29 +845,20 @@ rm -rf $RPM_BUILD_ROOT
 %config %{_sysconfdir}/sysconfig/SuSEfirewall2.d/services/ceph-mon
 %config %{_sysconfdir}/sysconfig/SuSEfirewall2.d/services/ceph-osd-mds
 %endif
+%{_unitdir}/ceph.target
 %{python_sitelib}/ceph_detect_init*
+%{python_sitelib}/ceph_disk*
 %{_mandir}/man8/ceph-deploy.8*
 %{_mandir}/man8/ceph-detect-init.8*
-%{_mandir}/man8/ceph-disk.8*
 %{_mandir}/man8/ceph-create-keys.8*
-%{_mandir}/man8/ceph-mon.8*
-%{_mandir}/man8/ceph-mds.8*
-%{_mandir}/man8/ceph-osd.8*
 %{_mandir}/man8/ceph-run.8*
-%{_mandir}/man8/ceph-rest-api.8*
 %{_mandir}/man8/crushtool.8*
 %{_mandir}/man8/osdmaptool.8*
 %{_mandir}/man8/monmaptool.8*
 %{_mandir}/man8/cephfs.8*
 %{_mandir}/man8/mount.ceph.8*
-%{_mandir}/man8/ceph-debugpack.8*
-%{_mandir}/man8/ceph-clsinfo.8*
-%{_mandir}/man8/librados-config.8*
 #set up placeholder directories
 %attr(750,ceph,ceph) %dir %{_localstatedir}/lib/ceph/tmp
-%attr(750,ceph,ceph) %dir %{_localstatedir}/lib/ceph/mon
-%attr(750,ceph,ceph) %dir %{_localstatedir}/lib/ceph/osd
-%attr(750,ceph,ceph) %dir %{_localstatedir}/lib/ceph/mds
 %attr(750,ceph,ceph) %dir %{_localstatedir}/lib/ceph/bootstrap-osd
 %attr(750,ceph,ceph) %dir %{_localstatedir}/lib/ceph/bootstrap-mds
 %attr(750,ceph,ceph) %dir %{_localstatedir}/lib/ceph/bootstrap-rgw
@@ -908,8 +866,74 @@ rm -rf $RPM_BUILD_ROOT
 %attr(770,ceph,ceph) %dir %{_localstatedir}/run/ceph
 %endif
 
+%pre base
+%if 0%{?_with_systemd}
+  %if 0%{?suse_version}
+    # service_add_pre and friends don't work with parameterized systemd service
+    # instances, only with single services or targets, so we always pass
+    # ceph.target to these macros
+    %service_add_pre ceph.target
+  %endif
+%endif
+
+%post base
+/sbin/ldconfig
+%if 0%{?_with_systemd}
+  %if 0%{?suse_version}
+    %fillup_only
+    %service_add_post ceph.target
+  %endif
+%else
+  /sbin/chkconfig --add ceph
+%endif
+
+%preun base
+%if 0%{?_with_systemd}
+  %if 0%{?suse_version}
+    %service_del_preun ceph.target
+  %endif
+  # Disable and stop on removal.
+  if [ $1 = 0 ] ; then
+    SERVICE_LIST=$(systemctl | grep -E '^ceph-mon@|^ceph-create-keys@|^ceph-osd@|^ceph-mds@|^ceph-disk-'  | cut -d' ' -f1)
+    if [ -n "$SERVICE_LIST" ]; then
+      for SERVICE in $SERVICE_LIST; do
+        /usr/bin/systemctl --no-reload disable $SERVICE > /dev/null 2>&1 || :
+        /usr/bin/systemctl stop $SERVICE > /dev/null 2>&1 || :
+      done
+    fi
+  fi
+%else
+  %if 0%{?rhel} || 0%{?fedora}
+    if [ $1 = 0 ] ; then
+      /sbin/service ceph stop >/dev/null 2>&1
+      /sbin/chkconfig --del ceph
+    fi
+  %endif
+%endif
+
+%postun base
+/sbin/ldconfig
+%if 0%{?_with_systemd}
+  if [ $1 = 1 ] ; then
+    # Restart on upgrade, but only if "CEPH_AUTO_RESTART_ON_UPGRADE" is set to
+    # "yes". In any case: if units are not running, do not touch them.
+    SYSCONF_CEPH=/etc/sysconfig/ceph
+    if [ -f $SYSCONF_CEPH -a -r $SYSCONF_CEPH ] ; then
+      source $SYSCONF_CEPH
+    fi
+    if [ "X$CEPH_AUTO_RESTART_ON_UPGRADE" = "Xyes" ] ; then
+      SERVICE_LIST=$(systemctl | grep -E '^ceph-mon@|^ceph-create-keys@|^ceph-osd@|^ceph-mds@|^ceph-disk-'  | cut -d' ' -f1)
+      if [ -n "$SERVICE_LIST" ]; then
+        for SERVICE in $SERVICE_LIST; do
+          /usr/bin/systemctl try-restart $SERVICE > /dev/null 2>&1 || :
+        done
+      fi
+    fi
+  fi
+%endif
+
 #################################################################################
-%files -n ceph-common
+%files common
 %defattr(-,root,root,-)
 %{_bindir}/ceph
 %{_bindir}/ceph-authtool
@@ -943,12 +967,11 @@ rm -rf $RPM_BUILD_ROOT
 %{_mandir}/man8/rbd-replay.8*
 %{_mandir}/man8/rbd-replay-many.8*
 %{_mandir}/man8/rbd-replay-prep.8*
+%dir %{_datadir}/ceph/
 %{_datadir}/ceph/known_hosts_drop.ceph.com
 %{_datadir}/ceph/id_dsa_drop.ceph.com
 %{_datadir}/ceph/id_dsa_drop.ceph.com.pub
 %dir %{_sysconfdir}/ceph/
-%dir %{_datarootdir}/ceph/
-%dir %{_libexecdir}/ceph/
 %config %{_sysconfdir}/bash_completion.d/rados
 %config %{_sysconfdir}/bash_completion.d/rbd
 %config(noreplace) %{_sysconfdir}/ceph/rbdmap
@@ -963,7 +986,7 @@ rm -rf $RPM_BUILD_ROOT
 %attr(3770,ceph,ceph) %dir %{_localstatedir}/log/ceph/
 %attr(750,ceph,ceph) %dir %{_localstatedir}/lib/ceph/
 
-%pre -n ceph-common
+%pre common
 CEPH_GROUP_ID=""
 CEPH_USER_ID=""
 %if 0%{?rhel} || 0%{?fedora}
@@ -980,12 +1003,12 @@ getent passwd ceph >/dev/null || useradd -r -g ceph -d %{_localstatedir}/lib/cep
 %endif
 exit 0
 
-%post -n ceph-common
+%post common
 %if 0%{?_with_systemd}
 %tmpfiles_create %{_tmpfilesdir}/ceph-common.conf
 %endif
 
-%postun -n ceph-common
+%postun common
 # Package removal cleanup
 if [ "$1" -eq "0" ] ; then
     rm -rf /var/log/ceph
@@ -993,6 +1016,36 @@ if [ "$1" -eq "0" ] ; then
 fi
 
 #################################################################################
+%files mds
+%{_bindir}/ceph-mds
+%{_bindir}/cephfs-journal-tool
+%{_bindir}/cephfs-table-tool
+%{_bindir}/cephfs-data-scan
+%{_mandir}/man8/ceph-mds.8*
+%if 0%{?_with_systemd}
+%{_unitdir}/ceph-mds at .service
+%{_unitdir}/ceph-mds.target
+%else
+%{_initrddir}/ceph
+%endif
+%attr(750,ceph,ceph) %dir %{_localstatedir}/lib/ceph/mds
+
+#################################################################################
+%files mon
+%{_bindir}/ceph-mon
+%{_bindir}/ceph-rest-api
+%{_mandir}/man8/ceph-mon.8*
+%{_mandir}/man8/ceph-rest-api.8*
+%{python_sitelib}/ceph_rest_api.py*
+%if 0%{?_with_systemd}
+%{_unitdir}/ceph-mon at .service
+%{_unitdir}/ceph-mon.target
+%else
+%{_initrddir}/ceph
+%endif
+%attr(750,ceph,ceph) %dir %{_localstatedir}/lib/ceph/mon
+
+#################################################################################
 %files fuse
 %defattr(-,root,root,-)
 %{_bindir}/ceph-fuse
@@ -1010,6 +1063,12 @@ fi
 %{_mandir}/man8/rbd-fuse.8*
 
 #################################################################################
+%files -n rbd-mirror
+%defattr(-,root,root,-)
+%{_bindir}/rbd-mirror
+%{_mandir}/man8/rbd-mirror.8*
+
+#################################################################################
 %files -n rbd-nbd
 %defattr(-,root,root,-)
 %{_bindir}/rbd-nbd
@@ -1026,6 +1085,8 @@ fi
 %config %{_sysconfdir}/bash_completion.d/radosgw-admin
 %dir %{_localstatedir}/lib/ceph/radosgw
 %if 0%{?_with_systemd}
+%{_unitdir}/ceph-radosgw at .service
+%{_unitdir}/ceph-radosgw.target
 %else
 %{_initrddir}/ceph-radosgw
 %{_sbindir}/rcceph-radosgw
@@ -1076,6 +1137,29 @@ fi
 %endif
 
 #################################################################################
+%files osd
+%{_bindir}/ceph-clsinfo
+%{_bindir}/ceph-bluefs-tool
+%{_bindir}/ceph-objectstore-tool
+%{_bindir}/ceph-osd
+%{_sbindir}/ceph-disk
+%{_sbindir}/ceph-disk-udev
+%{_libexecdir}/ceph/ceph-osd-prestart.sh
+%{_udevrulesdir}/60-ceph-partuuid-workaround.rules
+%{_udevrulesdir}/95-ceph-osd.rules
+%{_mandir}/man8/ceph-clsinfo.8*
+%{_mandir}/man8/ceph-disk.8*
+%{_mandir}/man8/ceph-osd.8*
+%if 0%{?_with_systemd}
+%{_unitdir}/ceph-osd at .service
+%{_unitdir}/ceph-osd.target
+%{_unitdir}/ceph-disk at .service
+%else
+%{_initrddir}/ceph
+%endif
+%attr(750,ceph,ceph) %dir %{_localstatedir}/lib/ceph/osd
+
+#################################################################################
 %if %{with ocf}
 %files resource-agents
 %defattr(0755,root,root,-)
@@ -1116,6 +1200,8 @@ fi
 %if 0%{?_with_lttng}
 %{_libdir}/librados_tp.so
 %endif
+%{_bindir}/librados-config
+%{_mandir}/man8/librados-config.8*
 
 #################################################################################
 %files -n python-rados
@@ -1226,9 +1312,12 @@ ln -sf %{_libdir}/librbd.so.1 /usr/lib64/qemu/librbd.so.1
 %{_bindir}/ceph_test_*
 %{_bindir}/ceph_tpbench
 %{_bindir}/ceph_xattr_bench
+%{_bindir}/ceph-coverage
 %{_bindir}/ceph-monstore-tool
 %{_bindir}/ceph-osdomap-tool
 %{_bindir}/ceph-kvstore-tool
+%{_bindir}/ceph-debugpack
+%{_mandir}/man8/ceph-debugpack.8*
 %dir %{_libdir}/ceph
 %{_libdir}/ceph/ceph-monstore-update-crush.sh
 
@@ -1377,4 +1466,5 @@ exit 0
 # We need an empty %%files list for python-ceph-compat, to tell rpmbuild to
 # actually build this meta package.
 
+
 %changelog
diff --git a/src/test/osd/Object.cc b/src/test/osd/Object.cc
index 6990053..587e37e 100644
--- a/src/test/osd/Object.cc
+++ b/src/test/osd/Object.cc
@@ -30,7 +30,7 @@ void ContDesc::decode(bufferlist::iterator &bl)
   DECODE_FINISH(bl);
 }
 
-ostream &operator<<(ostream &out, const ContDesc &rhs)
+std::ostream &operator<<(std::ostream &out, const ContDesc &rhs)
 {
   return out << "(ObjNum " << rhs.objnum
 	     << " snap " << rhs.cursnap
@@ -40,7 +40,7 @@ ostream &operator<<(ostream &out, const ContDesc &rhs)
 }
 
 void AppendGenerator::get_ranges_map(
-  const ContDesc &cont, map<uint64_t, uint64_t> &out) {
+  const ContDesc &cont, std::map<uint64_t, uint64_t> &out) {
   RandWrap rand(cont.seqnum);
   uint64_t pos = off;
   uint64_t limit = off + get_append_size(cont);
@@ -54,13 +54,13 @@ void AppendGenerator::get_ranges_map(
     }
     if (alignment)
       assert(segment_length % alignment == 0);
-    out.insert(pair<uint64_t, uint64_t>(pos, segment_length));
+    out.insert(std::pair<uint64_t, uint64_t>(pos, segment_length));
     pos += segment_length;
   }
 }
 
 void VarLenGenerator::get_ranges_map(
-  const ContDesc &cont, map<uint64_t, uint64_t> &out) {
+  const ContDesc &cont, std::map<uint64_t, uint64_t> &out) {
   RandWrap rand(cont.seqnum);
   uint64_t pos = 0;
   uint64_t limit = get_length(cont);
@@ -73,7 +73,7 @@ void VarLenGenerator::get_ranges_map(
       segment_length = limit - pos;
     }
     if (include) {
-      out.insert(pair<uint64_t, uint64_t>(pos, segment_length));
+      out.insert(std::pair<uint64_t, uint64_t>(pos, segment_length));
       include = false;
     } else {
       include = true;
@@ -82,68 +82,44 @@ void VarLenGenerator::get_ranges_map(
   }
 }
 
-ObjectDesc::iterator &ObjectDesc::iterator::advance(bool init) {
-  assert(pos < limit);
-  assert(!end());
-  if (!init) {
-    pos++;
-  }
-  if (end()) {
-    return *this;
-  }
-  while (pos == limit) {
-    cur_cont = stack.begin()->first;
-    limit = stack.begin()->second;
+void ObjectDesc::iterator::adjust_stack() {
+  while (!stack.empty() && pos >= stack.front().second.next) {
+    assert(pos == stack.front().second.next);
+    size = stack.front().second.size;
+    current = stack.front().first;
     stack.pop_front();
   }
 
-  if (cur_cont == obj.layers.end()) {
-    return *this;
+  if (stack.empty()) {
+    cur_valid_till = std::numeric_limits<uint64_t>::max();
+  } else {
+    cur_valid_till = stack.front().second.next;
   }
 
-  interval_set<uint64_t> ranges;
-  cur_cont->first->get_ranges(cur_cont->second, ranges);
-  while (!ranges.contains(pos)) {
-    stack.push_front(pair<list<pair<ceph::shared_ptr<ContentsGenerator>,
-				    ContDesc> >::iterator,
-		     uint64_t>(cur_cont, limit));
-    uint64_t length = cur_cont->first->get_length(cur_cont->second);
-    uint64_t next;
-    if (pos >= length) {
-      next = limit;
-      cur_cont = obj.layers.end();
-    } else if (ranges.empty() || pos >= ranges.range_end()) {
-      next = length;
-      ++cur_cont;
-    } else {
-      next = ranges.start_after(pos);
-      ++cur_cont;
-    }
-    if (next < limit) {
-      limit = next;
+  while (current != layers.end() && !current->covers(pos)) {
+    uint64_t next = current->next(pos);
+    if (next < cur_valid_till) {
+      stack.push_front(
+	make_pair(
+	  current,
+	  StackState{next, size}
+	  )
+	);
+      cur_valid_till = next;
     }
-    if (cur_cont == obj.layers.end()) {
-      break;
-    }
-
-    ranges.clear();
-    cur_cont->first->get_ranges(cur_cont->second, ranges);
-  }
 
-  if (cur_cont == obj.layers.end()) {
-    return *this;
+    ++current;
   }
 
-  if (!cont_iters.count(cur_cont->second)) {
-    cont_iters.insert(pair<ContDesc,ContentsGenerator::iterator>(
-			cur_cont->second,
-			cur_cont->first->get_iterator(cur_cont->second)));
+  if (current == layers.end()) {
+    size = 0;
+  } else {
+    current->iter.seek(pos);
+    size = std::min(size, current->get_size());
+    cur_valid_till = std::min(
+      current->valid_till(pos),
+      cur_valid_till);
   }
-  map<ContDesc,ContentsGenerator::iterator>::iterator j = cont_iters.find(
-    cur_cont->second);
-  assert(j != cont_iters.end());
-  j->second.seek(pos);
-  return *this;
 }
 
 const ContDesc &ObjectDesc::most_recent() {
@@ -151,73 +127,80 @@ const ContDesc &ObjectDesc::most_recent() {
 }
 
 void ObjectDesc::update(ContentsGenerator *gen, const ContDesc &next) {
-  layers.push_front(pair<ceph::shared_ptr<ContentsGenerator>, ContDesc>(ceph::shared_ptr<ContentsGenerator>(gen), next));
+  layers.push_front(std::pair<ceph::shared_ptr<ContentsGenerator>, ContDesc>(ceph::shared_ptr<ContentsGenerator>(gen), next));
   return;
 }
 
 bool ObjectDesc::check(bufferlist &to_check) {
-  iterator i = begin();
-  uint64_t pos = 0;
-  for (bufferlist::iterator p = to_check.begin();
-       !p.end();
-       ++p, ++i, ++pos) {
-    if (i.end()) {
-      std::cout << "reached end of iterator first" << std::endl;
-      return false;
-    }
-    if (*i != *p) {
-      std::cout << "incorrect buffer at pos " << pos << std::endl;
-      return false;
-    }
+  iterator objiter = begin();
+  uint64_t error_at = 0;
+  if (!objiter.check_bl_advance(to_check, &error_at)) {
+    std::cout << "incorrect buffer at pos " << error_at << std::endl;
+    return false;
   }
-  uint64_t size = layers.empty() ? 0 : 
-    most_recent_gen()->get_length(most_recent());
-  if (pos != size) {
-    std::cout << "only read " << pos << " out of size " << size << std::endl;
+
+  uint64_t size = layers.begin()->first->get_length(layers.begin()->second);
+  if (to_check.length() < size) {
+    std::cout << "only read " << to_check.length()
+	      << " out of size " << size << std::endl;
     return false;
   }
   return true;
 }
 
 bool ObjectDesc::check_sparse(const std::map<uint64_t, uint64_t>& extents,
-			      bufferlist &to_check) {
-  auto i = begin();
-  auto p = to_check.begin();
+			      bufferlist &to_check)
+{
+  uint64_t off = 0;
   uint64_t pos = 0;
-  for (auto extent : extents) {
-    const uint64_t start = extent.first;
-    const uint64_t end = start + extent.second;
-    for (; pos < end; ++i, ++pos) {
-      if (i.end()) {
-	std::cout << "reached end of iterator first" << std::endl;
+  auto objiter = begin();
+  for (auto &&extiter : extents) {
+    // verify hole
+    {
+      bufferlist bl;
+      bl.append_zero(extiter.first - pos);
+      uint64_t error_at = 0;
+      if (!objiter.check_bl_advance(bl, &error_at)) {
+	std::cout << "sparse read omitted non-zero data at "
+		  << error_at << std::endl;
 	return false;
       }
-      if (pos < start) {
-	// check the hole
-	if (*i != '\0') {
-	  std::cout << "incorrect buffer at pos " << pos << std::endl;
-	  return false;
-	}
-      } else {
-	// then the extent
-	if (*i != *p) {
-	  std::cout << "incorrect buffer at pos " << pos << std::endl;
-	  return false;
-	}
-	++p;
+    }
+
+    assert(off <= to_check.length());
+    pos = extiter.first;
+    objiter.seek(pos);
+
+    {
+      bufferlist bl;
+      bl.substr_of(
+	to_check,
+	off,
+	std::min(to_check.length() - off, extiter.second));
+      uint64_t error_at = 0;
+      if (!objiter.check_bl_advance(bl, &error_at)) {
+	std::cout << "incorrect buffer at pos " << error_at << std::endl;
+	return false;
       }
+      off += extiter.second;
+      pos += extiter.second;
     }
-  }
-  uint64_t size = layers.empty() ? 0 :
-    most_recent_gen()->get_length(most_recent());
-  while (pos < size) {
-    if (*i != '\0') {
-      std::cout << "sparse read omitted non-zero data at " << pos << std::endl;
+
+    if (pos < extiter.first + extiter.second) {
+      std::cout << "reached end of iterator first" << std::endl;
       return false;
     }
-    ++i;
-    ++pos;
   }
-  assert(pos == size);
+
+  // final hole
+  bufferlist bl;
+  uint64_t size = layers.begin()->first->get_length(layers.begin()->second);
+  bl.append_zero(size - pos);
+  uint64_t error_at;
+  if (!objiter.check_bl_advance(bl, &error_at)) {
+    std::cout << "sparse read omitted non-zero data at "
+	      << error_at << std::endl;
+    return false;
+  }
   return true;
 }
diff --git a/src/test/osd/Object.h b/src/test/osd/Object.h
index feeefeb..a0cc628 100644
--- a/src/test/osd/Object.h
+++ b/src/test/osd/Object.h
@@ -5,6 +5,7 @@
 #include <list>
 #include <map>
 #include <set>
+#include <random>
 
 #ifndef OBJECT_H
 #define OBJECT_H
@@ -14,8 +15,8 @@ public:
   int objnum;
   int cursnap;
   unsigned seqnum;
-  string prefix;
-  string oid;
+  std::string prefix;
+  std::string oid;
 
   ContDesc() :
     objnum(0), cursnap(0),
@@ -24,7 +25,7 @@ public:
   ContDesc(int objnum,
 	   int cursnap,
 	   unsigned seqnum,
-	   const string &prefix) :
+	   const std::string &prefix) :
     objnum(objnum), cursnap(cursnap),
     seqnum(seqnum), prefix(prefix) {}
 
@@ -48,7 +49,7 @@ public:
 };
 WRITE_CLASS_ENCODER(ContDesc)
 
-ostream &operator<<(ostream &out, const ContDesc &rhs);
+std::ostream &operator<<(std::ostream &out, const ContDesc &rhs);
 
 class ContentsGenerator {
 public:
@@ -61,6 +62,28 @@ public:
     virtual bool end() = 0;
     virtual ContDesc get_cont() const = 0;
     virtual uint64_t get_pos() const = 0;
+    virtual bufferlist gen_bl_advance(uint64_t s) {
+      bufferptr ret = buffer::create(s);
+      for (uint64_t i = 0; i < s; ++i, ++(*this)) {
+	ret[i] = **this;
+      }
+      bufferlist _ret;
+      _ret.push_back(ret);
+      return _ret;
+    }
+    virtual bool check_bl_advance(bufferlist &bl, uint64_t *off = nullptr) {
+      uint64_t _off = 0;
+      for (bufferlist::iterator i = bl.begin();
+	   !i.end();
+	   ++i, ++_off, ++(*this)) {
+	if (*i != **this) {
+	  if (off)
+	    *off = _off;
+	  return false;
+	}
+      }
+      return true;
+    }
     virtual ~iterator_impl() {};
   };
 
@@ -90,6 +113,12 @@ public:
       other.impl = impl;
       impl = otherimpl;
     }
+    bufferlist gen_bl_advance(uint64_t s) {
+      return impl->gen_bl_advance(s);
+    }
+    bool check_bl_advance(bufferlist &bl, uint64_t *off = nullptr) {
+      return impl->check_bl_advance(bl, off);
+    }
     iterator(ContentsGenerator *parent, iterator_impl *impl) :
       parent(parent), impl(impl) {}
   };
@@ -97,11 +126,11 @@ public:
   virtual uint64_t get_length(const ContDesc &in) = 0;
 
   virtual void get_ranges_map(
-    const ContDesc &cont, map<uint64_t, uint64_t> &out) = 0;
+    const ContDesc &cont, std::map<uint64_t, uint64_t> &out) = 0;
   void get_ranges(const ContDesc &cont, interval_set<uint64_t> &out) {
-    map<uint64_t, uint64_t> ranges;
+    std::map<uint64_t, uint64_t> ranges;
     get_ranges_map(cont, ranges);
-    for (map<uint64_t, uint64_t>::iterator i = ranges.begin();
+    for (std::map<uint64_t, uint64_t>::iterator i = ranges.begin();
 	 i != ranges.end();
 	 ++i) {
       out.insert(i->first, i->second);
@@ -124,19 +153,7 @@ public:
 
 class RandGenerator : public ContentsGenerator {
 public:
-  class RandWrap {
-  public:
-    unsigned int state;
-    RandWrap(unsigned int seed)
-    {
-      state = seed;
-    }
-
-    int operator()()
-    {
-      return rand_r(&state);
-    }
-  };
+  typedef std::minstd_rand0 RandWrap;
 
   class iterator_impl : public ContentsGenerator::iterator_impl {
   public:
@@ -207,7 +224,7 @@ public:
     min_stride_size(min_stride_size),
     max_stride_size(max_stride_size) {}
   void get_ranges_map(
-    const ContDesc &cont, map<uint64_t, uint64_t> &out);
+    const ContDesc &cont, std::map<uint64_t, uint64_t> &out);
   uint64_t get_length(const ContDesc &in) {
     RandWrap rand(in.seqnum);
     if (max_length == 0)
@@ -223,8 +240,8 @@ public:
   AttrGenerator(uint64_t max_len, uint64_t big_max_len)
     : max_len(max_len), big_max_len(big_max_len) {}
   void get_ranges_map(
-    const ContDesc &cont, map<uint64_t, uint64_t> &out) {
-    out.insert(pair<uint64_t, uint64_t>(0, get_length(cont)));
+    const ContDesc &cont, std::map<uint64_t, uint64_t> &out) {
+    out.insert(std::pair<uint64_t, uint64_t>(0, get_length(cont)));
   }
   uint64_t get_length(const ContDesc &in) {
     RandWrap rand(in.seqnum);
@@ -279,7 +296,7 @@ public:
     return off + get_append_size(in);
   }
   void get_ranges_map(
-    const ContDesc &cont, map<uint64_t, uint64_t> &out);
+    const ContDesc &cont, std::map<uint64_t, uint64_t> &out);
 };
 
 class ObjectDesc {
@@ -290,47 +307,97 @@ public:
   ObjectDesc(const ContDesc &init, ContentsGenerator *cont_gen)
     : exists(false), dirty(false),
       version(0) {
-    layers.push_front(pair<ceph::shared_ptr<ContentsGenerator>, ContDesc>(ceph::shared_ptr<ContentsGenerator>(cont_gen), init));
+    layers.push_front(std::pair<ceph::shared_ptr<ContentsGenerator>, ContDesc>(ceph::shared_ptr<ContentsGenerator>(cont_gen), init));
   }
 
   class iterator {
   public:
     uint64_t pos;
-    ObjectDesc &obj;
-    list<pair<list<pair<ceph::shared_ptr<ContentsGenerator>,
-			ContDesc> >::iterator,
-	      uint64_t> > stack;
-    map<ContDesc,ContentsGenerator::iterator> cont_iters;
-    uint64_t limit;
-    list<pair<ceph::shared_ptr<ContentsGenerator>,
-	      ContDesc> >::iterator cur_cont;
-    
-    iterator(ObjectDesc &obj) :
-      pos(0), obj(obj) {
-      limit = obj.layers.begin()->first->get_length(obj.layers.begin()->second);
-      cur_cont = obj.layers.begin();
-      advance(true);
+    uint64_t size;
+    uint64_t cur_valid_till;
+
+    class ContState {
+      interval_set<uint64_t> ranges;
+      const uint64_t size;
+
+    public:
+      ContDesc cont;
+      ceph::shared_ptr<ContentsGenerator> gen;
+      ContentsGenerator::iterator iter;
+
+      ContState(
+	ContDesc _cont,
+	ceph::shared_ptr<ContentsGenerator> _gen,
+	ContentsGenerator::iterator _iter)
+	: size(_gen->get_length(_cont)), cont(_cont), gen(_gen), iter(_iter) {
+	gen->get_ranges(cont, ranges);
+      }
+
+      const interval_set<uint64_t> &get_ranges() {
+	return ranges;
+      }
+
+      uint64_t get_size() {
+	return gen->get_length(cont);
+      }
+
+      bool covers(uint64_t pos) {
+	return ranges.contains(pos) || (!ranges.starts_after(pos) && pos >= size);
+      }
+
+      uint64_t next(uint64_t pos) {
+	assert(!covers(pos));
+	return ranges.starts_after(pos) ? ranges.start_after(pos) : size;
+      }
+
+      uint64_t valid_till(uint64_t pos) {
+	assert(covers(pos));
+	return ranges.contains(pos) ?
+	  ranges.end_after(pos) :
+	  std::numeric_limits<uint64_t>::max();
+      }
+    };
+    std::list<ContState> layers;
+
+    struct StackState {
+      const uint64_t next;
+      const uint64_t size;
+    };
+    std::list<std::pair<std::list<ContState>::iterator, StackState> > stack;
+    std::list<ContState>::iterator current;
+
+    explicit iterator(ObjectDesc &obj) :
+      pos(0),
+      size(obj.layers.begin()->first->get_length(obj.layers.begin()->second)),
+      cur_valid_till(0) {
+      for (auto &&i : obj.layers) {
+	layers.push_back({i.second, i.first, i.first->get_iterator(i.second)});
+      }
+      current = layers.begin();
+
+      adjust_stack();
     }
 
-    iterator &advance(bool init);
+    void adjust_stack();
     iterator &operator++() {
-      return advance(false);
+      assert(cur_valid_till >= pos);
+      ++pos;
+      if (pos >= cur_valid_till) {
+	adjust_stack();
+      }
+      return *this;
     }
 
     char operator*() {
-      if (cur_cont == obj.layers.end()) {
+      if (current == layers.end()) {
 	return '\0';
       } else {
-	map<ContDesc,ContentsGenerator::iterator>::iterator j = cont_iters.find(
-	  cur_cont->second);
-	assert(j != cont_iters.end());
-	return *(j->second);
+	return pos >= size ? '\0' : *(current->iter);
       }
     }
 
     bool end() {
-      return pos >= obj.layers.begin()->first->get_length(
-	obj.layers.begin()->second);
+      return pos >= size;
     }
 
     void seek(uint64_t _pos) {
@@ -338,8 +405,77 @@ public:
 	assert(0);
       }
       while (pos < _pos) {
-	++(*this);
+	assert(cur_valid_till >= pos);
+	uint64_t next = std::min(_pos - pos, cur_valid_till - pos);
+	pos += next;
+
+	if (pos >= cur_valid_till) {
+	  assert(pos == cur_valid_till);
+	  adjust_stack();
+	}
+      }
+      assert(pos == _pos);
+    }
+
+    bufferlist gen_bl_advance(uint64_t s) {
+      bufferlist ret;
+      while (s > 0) {
+	assert(cur_valid_till >= pos);
+	uint64_t next = std::min(s, cur_valid_till - pos);
+	if (current != layers.end() && pos < size) {
+	  ret.append(current->iter.gen_bl_advance(next));
+	} else {
+	  ret.append_zero(next);
+	}
+
+	pos += next;
+	assert(next <= s);
+	s -= next;
+
+	if (pos >= cur_valid_till) {
+	  assert(cur_valid_till == pos);
+	  adjust_stack();
+	}
+      }
+      return ret;
+    }
+
+    bool check_bl_advance(bufferlist &bl, uint64_t *error_at = nullptr) {
+      uint64_t off = 0;
+      while (off < bl.length()) {
+	assert(cur_valid_till >= pos);
+	uint64_t next = std::min(bl.length() - off, cur_valid_till - pos);
+
+	bufferlist to_check;
+	to_check.substr_of(bl, off, next);
+	if (current != layers.end() && pos < size) {
+	  if (!current->iter.check_bl_advance(to_check, error_at)) {
+	    if (error_at)
+	      *error_at += off;
+	    return false;
+	  }
+	} else {
+	  uint64_t at = pos;
+	  for (auto i = to_check.begin(); !i.end(); ++i, ++at) {
+	    if (*i) {
+	      if (error_at)
+		*error_at = at;
+	      return false;
+	    }
+	  }
+	}
+
+	pos += next;
+	off += next;
+	assert(off <= bl.length());
+
+	if (pos >= cur_valid_till) {
+	  assert(cur_valid_till == pos);
+	  adjust_stack();
+	}
       }
+      assert(off == bl.length());
+      return true;
     }
   };
     
@@ -364,14 +500,14 @@ public:
   ContentsGenerator *most_recent_gen() {
     return layers.begin()->first.get();
   }
-  map<string, ContDesc> attrs; // Both omap and xattrs
+  std::map<std::string, ContDesc> attrs; // Both omap and xattrs
   bufferlist header;
   bool exists;
   bool dirty;
 
   uint64_t version;
 private:
-  list<pair<ceph::shared_ptr<ContentsGenerator>, ContDesc> > layers;
+  std::list<std::pair<ceph::shared_ptr<ContentsGenerator>, ContDesc> > layers;
 };
 
 #endif
diff --git a/src/test/osd/RadosModel.h b/src/test/osd/RadosModel.h
index 81c825a..b66c4db 100644
--- a/src/test/osd/RadosModel.h
+++ b/src/test/osd/RadosModel.h
@@ -111,7 +111,7 @@ public:
     : num(n),
       context(context),
       stat(stat),
-      done(0)
+      done(false)
   {}
 
   virtual ~TestOp() {};
@@ -122,7 +122,7 @@ public:
    */
   struct CallbackInfo {
     uint64_t id;
-    CallbackInfo(uint64_t id) : id(id) {}
+    explicit CallbackInfo(uint64_t id) : id(id) {}
     virtual ~CallbackInfo() {};
   };
 
@@ -526,11 +526,10 @@ public:
   string oid;
   librados::ObjectWriteOperation op;
   librados::AioCompletion *comp;
-  bool done;
   RemoveAttrsOp(int n, RadosTestContext *context,
 	       const string &oid,
 	       TestOpStat *stat)
-    : TestOp(n, context, stat), oid(oid), comp(NULL), done(false)
+    : TestOp(n, context, stat), oid(oid), comp(NULL)
   {}
 
   void _begin()
@@ -620,13 +619,12 @@ public:
   string oid;
   librados::ObjectWriteOperation op;
   librados::AioCompletion *comp;
-  bool done;
   SetAttrsOp(int n,
 	     RadosTestContext *context,
 	     const string &oid,
 	     TestOpStat *stat)
     : TestOp(n, context, stat),
-      oid(oid), comp(NULL), done(false)
+      oid(oid), comp(NULL)
   {}
 
   void _begin()
@@ -756,10 +754,18 @@ public:
       uint64_t prev_length = found && old_value.has_contents() ?
 	old_value.most_recent_gen()->get_length(old_value.most_recent()) :
 	0;
+      bool requires;
+      int r = context->io_ctx.pool_requires_alignment2(&requires);
+      assert(r == 0);
+      uint64_t alignment = 0;
+      if (requires) {
+        r = context->io_ctx.pool_required_alignment2(&alignment);
+        assert(r == 0);
+        assert(alignment != 0);
+      }
       cont_gen = new AppendGenerator(
 	prev_length,
-	(context->io_ctx.pool_requires_alignment() ?
-	 context->io_ctx.pool_required_alignment() : 0),
+	alignment,
 	context->min_stride_size,
 	context->max_stride_size,
 	3);
@@ -785,11 +791,8 @@ public:
     for (map<uint64_t, uint64_t>::iterator i = ranges.begin(); 
 	 i != ranges.end();
 	 ++i, ++tid) {
-      bufferlist to_write;
       gen_pos.seek(i->first);
-      for (uint64_t k = 0; k != i->second; ++k, ++gen_pos) {
-	to_write.append(*gen_pos);
-      }
+      bufferlist to_write = gen_pos.gen_bl_advance(i->second);
       assert(to_write.length() == i->second);
       assert(to_write.length() > 0);
       std::cout << num << ":  writing " << context->prefix+oid
@@ -1474,7 +1477,6 @@ class RollbackOp : public TestOp {
 public:
   string oid;
   int roll_back_to;
-  bool done;
   librados::ObjectWriteOperation zero_write_op1;
   librados::ObjectWriteOperation zero_write_op2;
   librados::ObjectWriteOperation op;
@@ -1489,7 +1491,6 @@ public:
 	     TestOpStat *stat = 0)
     : TestOp(n, context, stat),
       oid(_oid), roll_back_to(-1), 
-      done(false),
       comps(3, NULL),
       last_finished(-1), outstanding(3)
   {}
@@ -1739,7 +1740,6 @@ public:
 };
 
 class HitSetListOp : public TestOp {
-  bool done;
   librados::AioCompletion *comp1, *comp2;
   uint32_t hash;
   std::list< std::pair<time_t, time_t> > ls;
@@ -1751,7 +1751,7 @@ public:
 	       uint32_t hash,
 	       TestOpStat *stat = 0)
     : TestOp(n, context, stat),
-      done(false), comp1(NULL), comp2(NULL),
+      comp1(NULL), comp2(NULL),
       hash(hash)
   {}
 
diff --git a/src/test/osd/TestPGLog.cc b/src/test/osd/TestPGLog.cc
index a7f074e..f3c6121 100644
--- a/src/test/osd/TestPGLog.cc
+++ b/src/test/osd/TestPGLog.cc
@@ -265,7 +265,7 @@ public:
 
 struct TestHandler : public PGLog::LogEntryHandler {
   list<hobject_t> &removed;
-  TestHandler(list<hobject_t> &removed) : removed(removed) {}
+  explicit TestHandler(list<hobject_t> &removed) : removed(removed) {}
 
   void rollback(
     const pg_log_entry_t &entry) {}
diff --git a/src/test/osd/hitset.cc b/src/test/osd/hitset.cc
index 7fab186..4140640 100644
--- a/src/test/osd/hitset.cc
+++ b/src/test/osd/hitset.cc
@@ -18,7 +18,7 @@ class HitSetTestStrap {
 public:
   HitSet *hitset;
 
-  HitSetTestStrap(HitSet *h) : hitset(h) {}
+  explicit HitSetTestStrap(HitSet *h) : hitset(h) {}
 
   void fill(unsigned count) {
     char buf[50];
diff --git a/src/test/osd/osd-scrub-snaps.sh b/src/test/osd/osd-scrub-snaps.sh
index bf59570..71eeb19 100755
--- a/src/test/osd/osd-scrub-snaps.sh
+++ b/src/test/osd/osd-scrub-snaps.sh
@@ -175,28 +175,28 @@ function TEST_scrub_snaps() {
     kill_daemons $dir || return 1
 
     declare -a err_strings
-    err_strings[0]="log_channel[(]cluster[)] log [[]ERR[]] : scrub [0-9]*[.]0 [0-9]*/2acecc8b/obj10/1 is missing in clone_overlap"
-    err_strings[1]="log_channel[(]cluster[)] log [[]ERR[]] : scrub [0-9]*[.]0 [0-9]*/666934a3/obj5/7 no '_' attr"
-    err_strings[2]="log_channel[(]cluster[)] log [[]ERR[]] : scrub [0-9]*[.]0 [0-9]*/666934a3/obj5/7 is an unexpected clone"
-    err_strings[3]="log_channel[(]cluster[)] log [[]ERR[]] : scrub [0-9]*[.]0 [0-9]*/666934a3/obj5/4 on disk size [(]4608[)] does not match object info size [(]512[)] adjusted for ondisk to [(]512[)]"
-    err_strings[4]="log_channel[(]cluster[)] log [[]ERR[]] : scrub [0-9]*[.]0 [0-9]*/666934a3/obj5/head expected clone [0-9]*/666934a3/obj5/2"
-    err_strings[5]="log_channel[(]cluster[)] log [[]ERR[]] : scrub [0-9]*[.]0 [0-9]*/666934a3/obj5/head expected clone [0-9]*/666934a3/obj5/1"
-    err_strings[6]="log_channel[(]cluster[)] log [[]INF[]] : scrub [0-9]*[.]0 [0-9]*/666934a3/obj5/head 2 missing clone[(]s[)]"
-    err_strings[7]="log_channel[(]cluster[)] log [[]ERR[]] : scrub [0-9]*[.]0 [0-9]*/d3a9faf5/obj12/head snapset.head_exists=false, but head exists"
-    err_strings[8]="log_channel[(]cluster[)] log [[]ERR[]] : scrub [0-9]*[.]0 [0-9]*/8df7eaa5/obj8/head snaps.seq not set"
-    err_strings[9]="log_channel[(]cluster[)] log [[]ERR[]] : scrub [0-9]*[.]0 [0-9]*/5c889059/obj7/head snapset.head_exists=false, but head exists"
-    err_strings[10]="log_channel[(]cluster[)] log [[]ERR[]] : scrub [0-9]*[.]0 [0-9]*/5c889059/obj7/1 is an unexpected clone"
-    err_strings[11]="log_channel[(]cluster[)] log [[]ERR[]] : scrub [0-9]*[.]0 [0-9]*/61f68bb1/obj3/head on disk size [(]3840[)] does not match object info size [(]768[)] adjusted for ondisk to [(]768[)]"
-    err_strings[12]="log_channel[(]cluster[)] log [[]ERR[]] : scrub [0-9]*[.]0 [0-9]*/83425cc4/obj6/1 is an unexpected clone"
-    err_strings[13]="log_channel[(]cluster[)] log [[]ERR[]] : scrub [0-9]*[.]0 [0-9]*/3f1ee208/obj2/snapdir no 'snapset' attr"
-    err_strings[14]="log_channel[(]cluster[)] log [[]INF[]] : scrub [0-9]*[.]0 [0-9]*/3f1ee208/obj2/7 clone ignored due to missing snapset"
-    err_strings[15]="log_channel[(]cluster[)] log [[]INF[]] : scrub [0-9]*[.]0 [0-9]*/3f1ee208/obj2/4 clone ignored due to missing snapset"
-    err_strings[16]="log_channel[(]cluster[)] log [[]ERR[]] : scrub [0-9]*[.]0 [0-9]*/a8759770/obj4/snapdir expected clone [0-9]*/a8759770/obj4/7"
-    err_strings[17]="log_channel[(]cluster[)] log [[]INF[]] : scrub [0-9]*[.]0 [0-9]*/a8759770/obj4/snapdir 1 missing clone[(]s[)]"
-    err_strings[18]="log_channel[(]cluster[)] log [[]ERR[]] : scrub [0-9]*[.]0 [0-9]*/6cf8deff/obj1/1 is an unexpected clone"
-    err_strings[19]="log_channel[(]cluster[)] log [[]ERR[]] : scrub [0-9]*[.]0 [0-9]*/e478ac7f/obj9/1 is missing in clone_size"
-    err_strings[20]="log_channel[(]cluster[)] log [[]ERR[]] : scrub [0-9]*[.]0 [0-9]*/29547577/obj11/1 is an unexpected clone"
-    err_strings[21]="log_channel[(]cluster[)] log [[]ERR[]] : scrub [0-9]*[.]0 [0-9]*/94122507/obj14/1 size 1032 != clone_size 1033"
+    err_strings[0]="log_channel[(]cluster[)] log [[]ERR[]] : scrub [0-9]*[.]0 .*::obj10:.* is missing in clone_overlap"
+    err_strings[1]="log_channel[(]cluster[)] log [[]ERR[]] : scrub [0-9]*[.]0 .*::obj5:7 no '_' attr"
+    err_strings[2]="log_channel[(]cluster[)] log [[]ERR[]] : scrub [0-9]*[.]0 .*::obj5:7 is an unexpected clone"
+    err_strings[3]="log_channel[(]cluster[)] log [[]ERR[]] : scrub [0-9]*[.]0 .*::obj5:4 on disk size [(]4608[)] does not match object info size [(]512[)] adjusted for ondisk to [(]512[)]"
+    err_strings[4]="log_channel[(]cluster[)] log [[]ERR[]] : scrub [0-9]*[.]0 .*:::obj5:head expected clone .*:::obj5:2"
+    err_strings[5]="log_channel[(]cluster[)] log [[]ERR[]] : scrub [0-9]*[.]0 .*:::obj5:head expected clone .*:::obj5:1"
+    err_strings[6]="log_channel[(]cluster[)] log [[]INF[]] : scrub [0-9]*[.]0 .*:::obj5:head 2 missing clone[(]s[)]"
+    err_strings[7]="log_channel[(]cluster[)] log [[]ERR[]] : scrub [0-9]*[.]0 .*:::obj12:head snapset.head_exists=false, but head exists"
+    err_strings[8]="log_channel[(]cluster[)] log [[]ERR[]] : scrub [0-9]*[.]0 .*:::obj8:head snaps.seq not set"
+    err_strings[9]="log_channel[(]cluster[)] log [[]ERR[]] : scrub [0-9]*[.]0 .*:::obj7:head snapset.head_exists=false, but head exists"
+    err_strings[10]="log_channel[(]cluster[)] log [[]ERR[]] : scrub [0-9]*[.]0 .*:::obj7:1 is an unexpected clone"
+    err_strings[11]="log_channel[(]cluster[)] log [[]ERR[]] : scrub [0-9]*[.]0 .*:::obj3:head on disk size [(]3840[)] does not match object info size [(]768[)] adjusted for ondisk to [(]768[)]"
+    err_strings[12]="log_channel[(]cluster[)] log [[]ERR[]] : scrub [0-9]*[.]0 .*:::obj6:1 is an unexpected clone"
+    err_strings[13]="log_channel[(]cluster[)] log [[]ERR[]] : scrub [0-9]*[.]0 .*:::obj2:snapdir no 'snapset' attr"
+    err_strings[14]="log_channel[(]cluster[)] log [[]INF[]] : scrub [0-9]*[.]0 .*:::obj2:7 clone ignored due to missing snapset"
+    err_strings[15]="log_channel[(]cluster[)] log [[]INF[]] : scrub [0-9]*[.]0 .*:::obj2:4 clone ignored due to missing snapset"
+    err_strings[16]="log_channel[(]cluster[)] log [[]ERR[]] : scrub [0-9]*[.]0 .*:::obj4:snapdir expected clone .*:::obj4:7"
+    err_strings[17]="log_channel[(]cluster[)] log [[]INF[]] : scrub [0-9]*[.]0 .*:::obj4:snapdir 1 missing clone[(]s[)]"
+    err_strings[18]="log_channel[(]cluster[)] log [[]ERR[]] : scrub [0-9]*[.]0 .*:::obj1:1 is an unexpected clone"
+    err_strings[19]="log_channel[(]cluster[)] log [[]ERR[]] : scrub [0-9]*[.]0 .*:::obj9:1 is missing in clone_size"
+    err_strings[20]="log_channel[(]cluster[)] log [[]ERR[]] : scrub [0-9]*[.]0 .*:::obj11:1 is an unexpected clone"
+    err_strings[21]="log_channel[(]cluster[)] log [[]ERR[]] : scrub [0-9]*[.]0 .*:::obj14:1 size 1032 != clone_size 1033"
     err_strings[22]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 scrub 19 errors"
 
     for i in `seq 0 ${#err_strings[@]}`
diff --git a/src/test/osd/types.cc b/src/test/osd/types.cc
index 67d6ac8..ca1e56a 100644
--- a/src/test/osd/types.cc
+++ b/src/test/osd/types.cc
@@ -1074,7 +1074,7 @@ protected:
   public:
     ObjectContext &obc;
 
-    Thread_read_lock(ObjectContext& _obc) :
+    explicit Thread_read_lock(ObjectContext& _obc) :
       obc(_obc)
     {
     }
@@ -1089,7 +1089,7 @@ protected:
   public:
     ObjectContext &obc;
 
-    Thread_write_lock(ObjectContext& _obc) :
+    explicit Thread_write_lock(ObjectContext& _obc) :
       obc(_obc)
     {
     }
@@ -1391,6 +1391,37 @@ TEST(coll_t, assigment) {
   ASSERT_NE(left.c_str(), middle.c_str());
 }
 
+TEST(hobject_t, parse) {
+  const char *v[] = {
+    "MIN",
+    "MAX",
+    "-1:60c2fa6d:::inc_osdmap.1:0",
+    "-1:60c2fa6d:::inc_osdmap.1:333",
+    "0:00000000::::head",
+    "1:00000000:nspace:key:obj:head",
+    "-40:00000000:nspace::obj:head",
+    "20:00000000::key:obj:head",
+    "20:00000000:::o%fdj:head",
+    "20:00000000:::o%02fdj:head",
+    "20:00000000:::_zero_%00_:head",
+    NULL
+  };
+
+  for (unsigned i=0; v[i]; ++i) {
+    hobject_t o;
+    bool b = o.parse(v[i]);
+    if (!b) {
+      cout << "failed to parse " << v[i] << std::endl;
+      ASSERT_TRUE(false);
+    }
+    string s = stringify(o);
+    if (s != v[i]) {
+      cout << v[i] << " -> " << o << " -> " << s << std::endl;
+      ASSERT_EQ(s, string(v[i]));
+    }
+  }
+}
+
 TEST(ghobject_t, cmp) {
   ghobject_t min;
   ghobject_t sep;
@@ -1407,6 +1438,37 @@ TEST(ghobject_t, cmp) {
   ASSERT_TRUE(cmp_bitwise(o, sep) > 0);
 }
 
+TEST(ghobject_t, parse) {
+  const char *v[] = {
+    "GHMIN",
+    "GHMAX",
+    "13 at 0:00000000::::head@",
+    "13 at 0:00000000::::head at deadbeef",
+    "@-1:60c2fa6d:::inc_osdmap.1:333 at deadbeef",
+    "@-1:60c2fa6d:::inc%02osdmap.1:333 at deadbeef",
+    "@-1:60c2fa6d:::inc_osdmap.1:333@",
+    "1 at MIN@deadbeefff",
+    "1 at MAX@",
+    "@MAX at 123",
+    "@-40:00000000:nspace::obj:head@",
+    NULL
+  };
+
+  for (unsigned i=0; v[i]; ++i) {
+    ghobject_t o;
+    bool b = o.parse(v[i]);
+    if (!b) {
+      cout << "failed to parse " << v[i] << std::endl;
+      ASSERT_TRUE(false);
+    }
+    string s = stringify(o);
+    if (s != v[i]) {
+      cout << v[i] << " -> " << o << " -> " << s << std::endl;
+      ASSERT_EQ(s, string(v[i]));
+    }
+  }
+}
+
 TEST(pool_opts_t, invalid_opt) {
   EXPECT_FALSE(pool_opts_t::is_opt_name("INVALID_OPT"));
   EXPECT_DEATH(pool_opts_t::get_opt_desc("INVALID_OPT"), "");
diff --git a/src/test/osdc/FakeWriteback.h b/src/test/osdc/FakeWriteback.h
index 2d77e92..8ab665c 100644
--- a/src/test/osdc/FakeWriteback.h
+++ b/src/test/osdc/FakeWriteback.h
@@ -29,6 +29,8 @@ public:
 			   __u32 trunc_seq, ceph_tid_t journal_tid,
 			   Context *oncommit);
 
+  using WritebackHandler::write;
+
   virtual bool may_copy_on_write(const object_t&, uint64_t, uint64_t,
 				 snapid_t);
 private:
diff --git a/src/test/perf_local.cc b/src/test/perf_local.cc
index fe64258..49440c2 100644
--- a/src/test/perf_local.cc
+++ b/src/test/perf_local.cc
@@ -329,7 +329,7 @@ class CondPingPong {
   class Consumer : public Thread {
     CondPingPong *p;
    public:
-    Consumer(CondPingPong *p): p(p) {}
+    explicit Consumer(CondPingPong *p): p(p) {}
     void* entry() {
       p->consume();
       return 0;
@@ -451,7 +451,7 @@ double eventcenter_poll()
   int count = 1000000;
   EventCenter center(g_ceph_context);
   center.init(1000);
-  center.set_owner(pthread_self());
+  center.set_owner();
   uint64_t start = Cycles::rdtsc();
   for (int i = 0; i < count; i++) {
     center.process_events(0);
@@ -466,7 +466,7 @@ class CenterWorker : public Thread {
 
  public:
   EventCenter center;
-  CenterWorker(CephContext *c): cct(c), done(false), center(c) {
+  explicit CenterWorker(CephContext *c): cct(c), done(false), center(c) {
     center.init(100);
   }
   void stop() {
@@ -474,7 +474,7 @@ class CenterWorker : public Thread {
     center.wakeup();
   }
   void* entry() {
-    center.set_owner(pthread_self());
+    center.set_owner();
     bind_thread_to_cpu(2);
     while (!done)
       center.process_events(1000);
@@ -486,7 +486,7 @@ class CountEvent: public EventCallback {
   atomic_t *count;
 
  public:
-  CountEvent(atomic_t *atomic): count(atomic) {}
+  explicit CountEvent(atomic_t *atomic): count(atomic) {}
   void do_request(int id) {
     count->dec();
   }
diff --git a/src/test/python/ceph-disk/setup.py b/src/test/python/ceph-disk/setup.py
deleted file mode 100644
index 91652ba..0000000
--- a/src/test/python/ceph-disk/setup.py
+++ /dev/null
@@ -1,27 +0,0 @@
-import os
-from setuptools import setup, find_packages
-
-# link ceph-disk script here so we can "install" it
-current_dir = os.path.abspath(os.path.dirname(__file__))
-src_dir = os.path.dirname(os.path.dirname(os.path.dirname(current_dir)))
-script_path = os.path.join(src_dir, 'ceph-disk')
-
-
-def link_target(source, destination):
-    if not os.path.exists(destination):
-        try:
-            os.symlink(source, destination)
-        except (IOError, OSError) as error:
-            print 'Ignoring linking of target: %s' % str(error)
-
-link_target(script_path, 'ceph_disk.py')
-
-setup(
-    name='ceph_disk',
-    version='0.1',
-    description='',
-    author='',
-    author_email='',
-    zip_safe=False,
-    packages=find_packages(),
-)
diff --git a/src/test/python/ceph-disk/tox.ini b/src/test/python/ceph-disk/tox.ini
deleted file mode 100644
index a89f0e3..0000000
--- a/src/test/python/ceph-disk/tox.ini
+++ /dev/null
@@ -1,19 +0,0 @@
-[tox]
-envlist = py27, flake8
-skipsdist=True
-
-[testenv]
-deps=
-  pytest
-  mock
-  pytest-cov==1.6
-  coverage==3.7.1
-
-commands=
-  python setup.py develop
-  py.test -vv --cov=ceph_disk.py --cov-report=term-missing {posargs:tests}
-
-[testenv:flake8]
-deps=
-  flake8
-commands=flake8 --select=F,E9 ceph_disk.py
diff --git a/src/test/rbd_mirror/test_ClusterWatcher.cc b/src/test/rbd_mirror/test_ClusterWatcher.cc
new file mode 100644
index 0000000..97f4859
--- /dev/null
+++ b/src/test/rbd_mirror/test_ClusterWatcher.cc
@@ -0,0 +1,190 @@
+// -*- mode:C; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+#include "include/rados/librados.hpp"
+#include "common/Cond.h"
+#include "common/errno.h"
+#include "common/Mutex.h"
+#include "librbd/internal.h"
+#include "tools/rbd_mirror/ClusterWatcher.h"
+#include "tools/rbd_mirror/types.h"
+#include "test/librados/test.h"
+#include "gtest/gtest.h"
+#include <boost/scope_exit.hpp>
+#include <iostream>
+#include <map>
+#include <memory>
+#include <set>
+
+using rbd::mirror::ClusterWatcher;
+using rbd::mirror::peer_t;
+using rbd::mirror::RadosRef;
+using std::map;
+using std::set;
+using std::string;
+
+void register_test_cluster_watcher() {
+}
+
+class TestClusterWatcher : public ::testing::Test {
+public:
+
+  TestClusterWatcher() : m_lock("TestClusterWatcherLock")
+  {
+    m_cluster = std::make_shared<librados::Rados>();
+    EXPECT_EQ("", connect_cluster_pp(*m_cluster));
+    m_cluster_watcher.reset(new ClusterWatcher(m_cluster, m_lock));
+  }
+
+  ~TestClusterWatcher() {
+    m_cluster->wait_for_latest_osdmap();
+    for (auto& pool : m_pools) {
+      EXPECT_EQ(0, m_cluster->pool_delete(pool.c_str()));
+    }
+  }
+
+  void create_pool(bool enable_mirroring, const peer_t &peer, string *name=nullptr) {
+    string pool_name = get_temp_pool_name();
+    ASSERT_EQ("", create_one_pool_pp(pool_name, *m_cluster));
+    int64_t pool_id = m_cluster->pool_lookup(pool_name.c_str());
+    ASSERT_GE(pool_id, 0);
+    m_pools.insert(pool_name);
+    if (enable_mirroring) {
+      librados::IoCtx ioctx;
+      ASSERT_EQ(0, m_cluster->ioctx_create2(pool_id, ioctx));
+      ASSERT_EQ(0, librbd::mirror_set_enabled(ioctx, true));
+      ASSERT_EQ(0, librbd::mirror_peer_add(ioctx, peer.cluster_uuid,
+					   peer.cluster_name,
+					   peer.client_name));
+      m_peer_configs[peer].insert(pool_id);
+      m_mirrored_pools.insert(pool_name);
+    }
+    if (name != nullptr) {
+      *name = pool_name;
+    }
+  }
+
+  void delete_pool(const string &name, const peer_t &peer) {
+    int64_t pool_id = m_cluster->pool_lookup(name.c_str());
+    ASSERT_GE(pool_id, 0);
+    if (m_peer_configs.find(peer) != m_peer_configs.end()) {
+      m_peer_configs[peer].erase(pool_id);
+      m_mirrored_pools.erase(name);
+      if (m_peer_configs[peer].empty()) {
+	m_peer_configs.erase(peer);
+      }
+    }
+    m_pools.erase(name);
+    ASSERT_EQ(0, m_cluster->pool_delete(name.c_str()));
+  }
+
+  void create_cache_pool(const string &base_pool, string *cache_pool_name) {
+    bufferlist inbl;
+    *cache_pool_name = get_temp_pool_name();
+    ASSERT_EQ("", create_one_pool_pp(*cache_pool_name, *m_cluster));
+    ASSERT_EQ(0, m_cluster->mon_command(
+      "{\"prefix\": \"osd tier add\", \"pool\": \"" + base_pool +
+      "\", \"tierpool\": \"" + *cache_pool_name +
+      "\", \"force_nonempty\": \"--force-nonempty\" }",
+      inbl, NULL, NULL));
+    ASSERT_EQ(0, m_cluster->mon_command(
+      "{\"prefix\": \"osd tier set-overlay\", \"pool\": \"" + base_pool +
+      "\", \"overlaypool\": \"" + *cache_pool_name + "\"}",
+      inbl, NULL, NULL));
+    ASSERT_EQ(0, m_cluster->mon_command(
+      "{\"prefix\": \"osd tier cache-mode\", \"pool\": \"" + *cache_pool_name +
+      "\", \"mode\": \"writeback\"}",
+      inbl, NULL, NULL));
+    m_cluster->wait_for_latest_osdmap();
+  }
+
+  void remove_cache_pool(const string &base_pool, const string &cache_pool) {
+    bufferlist inbl;
+    // tear down tiers
+    ASSERT_EQ(0, m_cluster->mon_command(
+      "{\"prefix\": \"osd tier remove-overlay\", \"pool\": \"" + base_pool +
+      "\"}",
+      inbl, NULL, NULL));
+    ASSERT_EQ(0, m_cluster->mon_command(
+      "{\"prefix\": \"osd tier remove\", \"pool\": \"" + base_pool +
+      "\", \"tierpool\": \"" + cache_pool + "\"}",
+      inbl, NULL, NULL));
+    m_cluster->wait_for_latest_osdmap();
+    m_cluster->pool_delete(cache_pool.c_str());
+  }
+
+  void check_peers() {
+    m_cluster_watcher->refresh_pools();
+    Mutex::Locker l(m_lock);
+    ASSERT_EQ(m_peer_configs, m_cluster_watcher->get_peer_configs());
+    ASSERT_EQ(m_mirrored_pools, m_cluster_watcher->get_pool_names());
+  }
+
+  Mutex m_lock;
+  RadosRef m_cluster;
+  unique_ptr<ClusterWatcher> m_cluster_watcher;
+
+  set<string> m_pools;
+  set<string> m_mirrored_pools;
+  map<peer_t, set<int64_t> > m_peer_configs;
+};
+
+TEST_F(TestClusterWatcher, NoPools) {
+  check_peers();
+}
+
+TEST_F(TestClusterWatcher, NoMirroredPools) {
+  check_peers();
+  create_pool(false, peer_t());
+  check_peers();
+  create_pool(false, peer_t());
+  check_peers();
+  create_pool(false, peer_t());
+  check_peers();
+}
+
+TEST_F(TestClusterWatcher, ReplicatedPools) {
+  string uuid1 = "00000000-0000-0000-0000-000000000001";
+  string uuid2 = "20000000-2222-2222-2222-000000000002";
+  peer_t site1(uuid1, "site1", "mirror1");
+  peer_t site2(uuid2, "site2", "mirror2");
+  string first_pool, last_pool;
+  check_peers();
+  create_pool(true, site1, &first_pool);
+  check_peers();
+  create_pool(false, peer_t());
+  check_peers();
+  create_pool(false, peer_t());
+  check_peers();
+  create_pool(false, peer_t());
+  check_peers();
+  create_pool(true, site2);
+  check_peers();
+  create_pool(true, site2);
+  check_peers();
+  create_pool(true, site2, &last_pool);
+  check_peers();
+  delete_pool(first_pool, site1);
+  check_peers();
+  delete_pool(last_pool, site2);
+  check_peers();
+}
+
+TEST_F(TestClusterWatcher, CachePools) {
+  peer_t site1("11111111-1111-1111-1111-111111111111", "site1", "mirror1");
+  string base1, base2, cache1, cache2;
+  create_pool(true, site1, &base1);
+  check_peers();
+
+  create_cache_pool(base1, &cache1);
+  BOOST_SCOPE_EXIT( base1, cache1, this_ ) {
+    this_->remove_cache_pool(base1, cache1);
+  } BOOST_SCOPE_EXIT_END;
+  check_peers();
+
+  create_pool(false, peer_t(), &base2);
+  create_cache_pool(base2, &cache2);
+  BOOST_SCOPE_EXIT( base2, cache2, this_ ) {
+    this_->remove_cache_pool(base2, cache2);
+  } BOOST_SCOPE_EXIT_END;
+  check_peers();
+}
diff --git a/src/test/rbd_mirror/test_PoolWatcher.cc b/src/test/rbd_mirror/test_PoolWatcher.cc
new file mode 100644
index 0000000..d176828
--- /dev/null
+++ b/src/test/rbd_mirror/test_PoolWatcher.cc
@@ -0,0 +1,260 @@
+// -*- mode:C; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+#include "include/rados/librados.hpp"
+#include "include/rbd/librbd.hpp"
+#include "include/stringify.h"
+#include "cls/rbd/cls_rbd_client.h"
+#include "include/rbd_types.h"
+#include "librbd/internal.h"
+#include "librbd/ImageCtx.h"
+#include "librbd/ImageState.h"
+#include "librbd/Operations.h"
+#include "librbd/Utils.h"
+#include "common/Cond.h"
+#include "common/errno.h"
+#include "common/Mutex.h"
+#include "tools/rbd_mirror/PoolWatcher.h"
+#include "tools/rbd_mirror/types.h"
+#include "test/librados/test.h"
+#include "gtest/gtest.h"
+#include <boost/scope_exit.hpp>
+#include <iostream>
+#include <map>
+#include <memory>
+#include <set>
+#include <vector>
+
+using rbd::mirror::PoolWatcher;
+using rbd::mirror::peer_t;
+using rbd::mirror::RadosRef;
+using std::map;
+using std::set;
+using std::string;
+
+void register_test_pool_watcher() {
+}
+
+class TestPoolWatcher : public ::testing::Test {
+public:
+
+TestPoolWatcher() : m_lock("TestPoolWatcherLock"),
+    m_image_number(0), m_snap_number(0)
+  {
+    m_cluster = std::make_shared<librados::Rados>();
+    EXPECT_EQ("", connect_cluster_pp(*m_cluster));
+    m_pool_watcher.reset(new PoolWatcher(m_cluster, 30, m_lock, m_cond));
+  }
+
+  ~TestPoolWatcher() {
+    m_cluster->wait_for_latest_osdmap();
+    for (auto& pool : m_pools) {
+      EXPECT_EQ(0, m_cluster->pool_delete(pool.c_str()));
+    }
+  }
+
+  void create_pool(bool enable_mirroring, const peer_t &peer, string *name=nullptr) {
+    string pool_name = get_temp_pool_name();
+    ASSERT_EQ("", create_one_pool_pp(pool_name, *m_cluster));
+    int64_t pool_id = m_cluster->pool_lookup(pool_name.c_str());
+    ASSERT_GE(pool_id, 0);
+    m_pools.insert(pool_name);
+    if (enable_mirroring) {
+      librados::IoCtx ioctx;
+      ASSERT_EQ(0, m_cluster->ioctx_create2(pool_id, ioctx));
+      ASSERT_EQ(0, librbd::mirror_set_enabled(ioctx, true));
+      ASSERT_EQ(0, librbd::mirror_peer_add(ioctx, peer.cluster_uuid,
+					   peer.cluster_name,
+					   peer.client_name));
+    }
+    if (name != nullptr) {
+      *name = pool_name;
+    }
+  }
+
+  void delete_pool(const string &name, const peer_t &peer) {
+    int64_t pool_id = m_cluster->pool_lookup(name.c_str());
+    ASSERT_GE(pool_id, 0);
+    m_pools.erase(name);
+    ASSERT_EQ(0, m_cluster->pool_delete(name.c_str()));
+    m_mirrored_images.erase(pool_id);
+  }
+
+  void create_cache_pool(const string &base_pool, string *cache_pool_name) {
+    bufferlist inbl;
+    *cache_pool_name = get_temp_pool_name();
+    ASSERT_EQ("", create_one_pool_pp(*cache_pool_name, *m_cluster));
+    ASSERT_EQ(0, m_cluster->mon_command(
+      "{\"prefix\": \"osd tier add\", \"pool\": \"" + base_pool +
+      "\", \"tierpool\": \"" + *cache_pool_name +
+      "\", \"force_nonempty\": \"--force-nonempty\" }",
+      inbl, NULL, NULL));
+    ASSERT_EQ(0, m_cluster->mon_command(
+      "{\"prefix\": \"osd tier set-overlay\", \"pool\": \"" + base_pool +
+      "\", \"overlaypool\": \"" + *cache_pool_name + "\"}",
+      inbl, NULL, NULL));
+    ASSERT_EQ(0, m_cluster->mon_command(
+      "{\"prefix\": \"osd tier cache-mode\", \"pool\": \"" + *cache_pool_name +
+      "\", \"mode\": \"writeback\"}",
+      inbl, NULL, NULL));
+    m_cluster->wait_for_latest_osdmap();
+  }
+
+  void remove_cache_pool(const string &base_pool, const string &cache_pool) {
+    bufferlist inbl;
+    // tear down tiers
+    ASSERT_EQ(0, m_cluster->mon_command(
+      "{\"prefix\": \"osd tier remove-overlay\", \"pool\": \"" + base_pool +
+      "\"}",
+      inbl, NULL, NULL));
+    ASSERT_EQ(0, m_cluster->mon_command(
+      "{\"prefix\": \"osd tier remove\", \"pool\": \"" + base_pool +
+      "\", \"tierpool\": \"" + cache_pool + "\"}",
+      inbl, NULL, NULL));
+    m_cluster->wait_for_latest_osdmap();
+    m_cluster->pool_delete(cache_pool.c_str());
+  }
+
+  string get_image_id(librados::IoCtx *ioctx, const string &image_name) {
+    string obj = librbd::util::id_obj_name(image_name);
+    string id;
+    EXPECT_EQ(0, librbd::cls_client::get_id(ioctx, obj, &id));
+    return id;
+  }
+
+  void create_image(const string &pool_name, bool mirrored=true,
+		    string *image_name=nullptr) {
+    uint64_t features = g_ceph_context->_conf->rbd_default_features;
+    string name = "image" + stringify(++m_image_number);
+    if (mirrored) {
+      features |= RBD_FEATURE_EXCLUSIVE_LOCK | RBD_FEATURE_JOURNALING;
+    }
+
+    librados::IoCtx ioctx;
+    ASSERT_EQ(0, m_cluster->ioctx_create(pool_name.c_str(), ioctx));
+    int order = 0;
+    ASSERT_EQ(0, librbd::create(ioctx, name.c_str(), 1 << 22, false,
+				features, &order, 0, 0));
+    if (mirrored)
+      m_mirrored_images[ioctx.get_id()].insert(get_image_id(&ioctx, name));
+    if (image_name != nullptr)
+      *image_name = name;
+  }
+
+  void clone_image(const string &parent_pool_name,
+		   const string &parent_image_name,
+		   const string &clone_pool_name,
+		   bool mirrored=true,
+		   string *image_name=nullptr) {
+    librados::IoCtx pioctx, cioctx;
+    ASSERT_EQ(0, m_cluster->ioctx_create(parent_pool_name.c_str(), pioctx));
+    ASSERT_EQ(0, m_cluster->ioctx_create(clone_pool_name.c_str(), cioctx));
+
+    string snap_name = "snap" + stringify(++m_snap_number);
+    {
+      librbd::ImageCtx *ictx = new librbd::ImageCtx(parent_image_name.c_str(),
+						    "", "", pioctx, false);
+      ictx->state->open();
+      EXPECT_EQ(0, ictx->operations->snap_create(snap_name.c_str()));
+      EXPECT_EQ(0, ictx->operations->snap_protect(snap_name.c_str()));
+      ictx->state->close();
+    }
+
+    uint64_t features = g_ceph_context->_conf->rbd_default_features;
+    string name = "clone" + stringify(++m_image_number);
+    if (mirrored) {
+      features |= RBD_FEATURE_EXCLUSIVE_LOCK | RBD_FEATURE_JOURNALING;
+    }
+    int order = 0;
+    librbd::clone(pioctx, parent_image_name.c_str(), snap_name.c_str(),
+		  cioctx, name.c_str(), features, &order, 0, 0);
+    if (mirrored)
+      m_mirrored_images[cioctx.get_id()].insert(get_image_id(&cioctx, name));
+    if (image_name != nullptr)
+      *image_name = name;
+  }
+
+  void check_images() {
+    m_pool_watcher->refresh_images(false);
+    Mutex::Locker l(m_lock);
+    ASSERT_EQ(m_mirrored_images, m_pool_watcher->get_images());
+  }
+
+  Mutex m_lock;
+  Cond m_cond;
+  RadosRef m_cluster;
+  unique_ptr<PoolWatcher> m_pool_watcher;
+
+  set<string> m_pools;
+  std::map<int64_t, std::set<std::string> > m_mirrored_images;
+
+  uint64_t m_image_number;
+  uint64_t m_snap_number;
+};
+
+TEST_F(TestPoolWatcher, NoPools) {
+  check_images();
+}
+
+TEST_F(TestPoolWatcher, ReplicatedPools) {
+  string uuid1 = "00000000-0000-0000-0000-000000000001";
+  string uuid2 = "20000000-2222-2222-2222-000000000002";
+  peer_t site1(uuid1, "site1", "mirror1");
+  peer_t site2(uuid2, "site2", "mirror2");
+  string first_pool, local_pool, last_pool;
+  check_images();
+  create_pool(true, site1, &first_pool);
+  check_images();
+  create_image(first_pool);
+  check_images();
+  string parent_image, parent_image2;
+  create_image(first_pool, true, &parent_image);
+  check_images();
+  clone_image(first_pool, parent_image, first_pool);
+  check_images();
+  clone_image(first_pool, parent_image, first_pool, true, &parent_image2);
+  check_images();
+
+  create_pool(false, peer_t(), &local_pool);
+  check_images();
+  create_image(local_pool, false);
+  check_images();
+  clone_image(first_pool, parent_image2, local_pool, false);
+  check_images();
+  create_pool(true, site2);
+  check_images();
+
+  create_pool(true, site2, &last_pool);
+  check_images();
+  clone_image(first_pool, parent_image2, last_pool);
+  check_images();
+  create_image(last_pool);
+  check_images();
+  delete_pool(last_pool, site2);
+  check_images();
+  delete_pool(first_pool, site1);
+  check_images();
+}
+
+TEST_F(TestPoolWatcher, CachePools) {
+  peer_t site1("11111111-1111-1111-1111-111111111111", "site1", "mirror1");
+  string base1, base2, cache1, cache2;
+  create_pool(true, site1, &base1);
+  check_images();
+
+  create_cache_pool(base1, &cache1);
+  BOOST_SCOPE_EXIT( base1, cache1, this_ ) {
+    this_->remove_cache_pool(base1, cache1);
+  } BOOST_SCOPE_EXIT_END;
+  check_images();
+  create_image(base1);
+  check_images();
+
+  create_pool(false, peer_t(), &base2);
+  create_cache_pool(base2, &cache2);
+  BOOST_SCOPE_EXIT( base2, cache2, this_ ) {
+    this_->remove_cache_pool(base2, cache2);
+  } BOOST_SCOPE_EXIT_END;
+  check_images();
+  create_image(base2, false);
+  check_images();
+}
diff --git a/src/test/rbd_mirror/test_main.cc b/src/test/rbd_mirror/test_main.cc
new file mode 100644
index 0000000..ca24d94
--- /dev/null
+++ b/src/test/rbd_mirror/test_main.cc
@@ -0,0 +1,36 @@
+// -*- mode:C; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "include/rados/librados.hpp"
+#include "global/global_context.h"
+#include "test/librados/test.h"
+#include "gtest/gtest.h"
+#include <iostream>
+#include <string>
+
+extern void register_test_cluster_watcher();
+extern void register_test_pool_watcher();
+
+int main(int argc, char **argv)
+{
+  register_test_cluster_watcher();
+  register_test_pool_watcher();
+
+  ::testing::InitGoogleTest(&argc, argv);
+
+  librados::Rados rados;
+  std::string result = connect_cluster_pp(rados);
+  if (result != "" ) {
+    std::cerr << result << std::endl;
+    return 1;
+  }
+
+  g_ceph_context = reinterpret_cast<CephContext*>(rados.cct());
+
+  int r = rados.conf_set("lockdep", "true");
+  if (r < 0) {
+    std::cerr << "failed to enable lockdep" << std::endl;
+    return -r;
+  }
+  return RUN_ALL_TESTS();
+}
diff --git a/src/test/system/cross_process_sem.h b/src/test/system/cross_process_sem.h
index d087d29..0cbedf4 100644
--- a/src/test/system/cross_process_sem.h
+++ b/src/test/system/cross_process_sem.h
@@ -35,6 +35,6 @@ public:
   int reinit(int dval);
 
 private:
-  CrossProcessSem(struct cross_process_sem_data_t *data);
+  explicit CrossProcessSem(struct cross_process_sem_data_t *data);
   struct cross_process_sem_data_t *m_data;
 };
diff --git a/src/test/test_filejournal.cc b/src/test/test_filejournal.cc
index d9dfb30..0d88eed 100644
--- a/src/test/test_filejournal.cc
+++ b/src/test/test_filejournal.cc
@@ -138,7 +138,7 @@ TEST(TestFileJournal, WriteSmall) {
     ASSERT_EQ(0, j.create());
     j.make_writeable();
 
-    list<ObjectStore::Transaction*> tls;
+    vector<ObjectStore::Transaction> tls;
     bufferlist bl;
     bl.append("small");
     int orig_len = j.prepare_entry(tls, &bl);
@@ -168,7 +168,7 @@ TEST(TestFileJournal, WriteBig) {
       memset(foo, 1, sizeof(foo));
       bl.append(foo, sizeof(foo));
     }
-    list<ObjectStore::Transaction*> tls;
+    vector<ObjectStore::Transaction> tls;
     int orig_len = j.prepare_entry(tls, &bl);
     j.submit_entry(1, bl, orig_len, new C_SafeCond(&wait_lock, &cond, &done));
     wait();
@@ -191,7 +191,7 @@ TEST(TestFileJournal, WriteMany) {
 
     C_GatherBuilder gb(g_ceph_context, new C_SafeCond(&wait_lock, &cond, &done));
 
-    list<ObjectStore::Transaction*> tls;
+    vector<ObjectStore::Transaction> tls;
     bufferlist bl;
     bl.append("small");
     uint64_t seq = 1;
@@ -225,7 +225,7 @@ TEST(TestFileJournal, WriteManyVecs) {
 
     bufferlist first;
     first.append("small");
-    list<ObjectStore::Transaction*> tls;
+    vector<ObjectStore::Transaction> tls;
     int orig_len = j.prepare_entry(tls, &first);
     j.submit_entry(1, first, orig_len, gb.new_sub());
 
@@ -261,7 +261,7 @@ TEST(TestFileJournal, ReplaySmall) {
   g_ceph_context->_conf->set_val("journal_write_header_frequency", "0");
   g_ceph_context->_conf->apply_changes(NULL);
 
-  list<ObjectStore::Transaction*> tls;
+  vector<ObjectStore::Transaction> tls;
 
   for (unsigned i = 0 ; i < 3; ++i) {
     SCOPED_TRACE(subtests[i].description);
@@ -319,7 +319,7 @@ TEST(TestFileJournal, ReplayCorrupt) {
   g_ceph_context->_conf->set_val("journal_write_header_frequency", "0");
   g_ceph_context->_conf->apply_changes(NULL);
 
-  list<ObjectStore::Transaction*> tls;
+  vector<ObjectStore::Transaction> tls;
   for (unsigned i = 0 ; i < 3; ++i) {
     SCOPED_TRACE(subtests[i].description);
     fsid.generate_random();
@@ -416,7 +416,7 @@ TEST(TestFileJournal, WriteTrim) {
     memset(foo, 1, sizeof(foo));
 
     uint64_t seq = 1, committed = 0;
-    list<ObjectStore::Transaction*> tls;
+    vector<ObjectStore::Transaction> tls;
 
     for (unsigned i=0; i<size_mb*2; i++) {
       bl.clear();
@@ -450,7 +450,7 @@ TEST(TestFileJournal, WriteTrimSmall) {
   g_ceph_context->_conf->set_val("journal_ignore_corruption", "false");
   g_ceph_context->_conf->set_val("journal_write_header_frequency", "0");
   g_ceph_context->_conf->apply_changes(NULL);
-  list<ObjectStore::Transaction*> tls;
+  vector<ObjectStore::Transaction> tls;
 
   for (unsigned i = 0 ; i < 3; ++i) {
     SCOPED_TRACE(subtests[i].description);
@@ -500,7 +500,7 @@ TEST(TestFileJournal, ReplayDetectCorruptFooterMagic) {
   g_ceph_context->_conf->set_val("journal_write_header_frequency", "1");
   g_ceph_context->_conf->apply_changes(NULL);
 
-  list<ObjectStore::Transaction*> tls;
+  vector<ObjectStore::Transaction> tls;
   for (unsigned i = 0 ; i < 3; ++i) {
     SCOPED_TRACE(subtests[i].description);
     fsid.generate_random();
@@ -557,7 +557,7 @@ TEST(TestFileJournal, ReplayDetectCorruptPayload) {
   g_ceph_context->_conf->set_val("journal_write_header_frequency", "1");
   g_ceph_context->_conf->apply_changes(NULL);
 
-  list<ObjectStore::Transaction*> tls;
+  vector<ObjectStore::Transaction> tls;
   for (unsigned i = 0 ; i < 3; ++i) {
     SCOPED_TRACE(subtests[i].description);
     fsid.generate_random();
@@ -614,7 +614,7 @@ TEST(TestFileJournal, ReplayDetectCorruptHeader) {
   g_ceph_context->_conf->set_val("journal_write_header_frequency", "1");
   g_ceph_context->_conf->apply_changes(NULL);
 
-  list<ObjectStore::Transaction*> tls;
+  vector<ObjectStore::Transaction> tls;
   for (unsigned i = 0 ; i < 3; ++i) {
     SCOPED_TRACE(subtests[i].description);
     fsid.generate_random();
diff --git a/src/test/test_ipaddr.cc b/src/test/test_ipaddr.cc
index 1f58130..240a3a7 100644
--- a/src/test/test_ipaddr.cc
+++ b/src/test/test_ipaddr.cc
@@ -19,6 +19,9 @@ static void ipv6(struct sockaddr_in6 *addr, const char *s) {
   ASSERT_EQ(1, err);
 }
 
+static char eth0[] = "eth0";
+static char eth1[] = "eth1";
+
 TEST(CommonIPAddr, TestNotFound)
 {
   struct ifaddrs one, two;
@@ -29,11 +32,11 @@ TEST(CommonIPAddr, TestNotFound)
 
   one.ifa_next = &two;
   one.ifa_addr = (struct sockaddr*)&a_one;
-  one.ifa_name = "eth0";
+  one.ifa_name = eth0;
 
   two.ifa_next = NULL;
   two.ifa_addr = (struct sockaddr*)&a_two;
-  two.ifa_name = "eth1";
+  two.ifa_name = eth1;
 
   ipv4(&a_one, "10.11.12.13");
   ipv6(&a_two, "2001:1234:5678:90ab::cdef");
@@ -53,11 +56,11 @@ TEST(CommonIPAddr, TestV4_Simple)
 
   one.ifa_next = &two;
   one.ifa_addr = (struct sockaddr*)&a_one;
-  one.ifa_name = "eth0";
+  one.ifa_name = eth0;
 
   two.ifa_next = NULL;
   two.ifa_addr = (struct sockaddr*)&a_two;
-  two.ifa_name = "eth1";
+  two.ifa_name = eth1;
 
   ipv4(&a_one, "10.11.12.13");
   ipv6(&a_two, "2001:1234:5678:90ab::cdef");
@@ -77,11 +80,11 @@ TEST(CommonIPAddr, TestV4_Prefix25)
 
   one.ifa_next = &two;
   one.ifa_addr = (struct sockaddr*)&a_one;
-  one.ifa_name = "eth0";
+  one.ifa_name = eth0;
 
   two.ifa_next = NULL;
   two.ifa_addr = (struct sockaddr*)&a_two;
-  two.ifa_name = "eth1";
+  two.ifa_name = eth1;
 
   ipv4(&a_one, "10.11.12.13");
   ipv4(&a_two, "10.11.12.129");
@@ -101,11 +104,11 @@ TEST(CommonIPAddr, TestV4_Prefix16)
 
   one.ifa_next = &two;
   one.ifa_addr = (struct sockaddr*)&a_one;
-  one.ifa_name = "eth0";
+  one.ifa_name = eth0;
 
   two.ifa_next = NULL;
   two.ifa_addr = (struct sockaddr*)&a_two;
-  two.ifa_name = "eth1";
+  two.ifa_name = eth1;
 
   ipv4(&a_one, "10.1.1.2");
   ipv4(&a_two, "10.2.1.123");
@@ -124,7 +127,7 @@ TEST(CommonIPAddr, TestV4_PrefixTooLong)
 
   one.ifa_next = NULL;
   one.ifa_addr = (struct sockaddr*)&a_one;
-  one.ifa_name = "eth0";
+  one.ifa_name = eth0;
 
   ipv4(&a_one, "10.11.12.13");
   ipv4(&net, "10.11.12.12");
@@ -143,11 +146,11 @@ TEST(CommonIPAddr, TestV4_PrefixZero)
 
   one.ifa_next = &two;
   one.ifa_addr = (struct sockaddr*)&a_one;
-  one.ifa_name = "eth0";
+  one.ifa_name = eth0;
 
   two.ifa_next = NULL;
   two.ifa_addr = (struct sockaddr*)&a_two;
-  two.ifa_name = "eth1";
+  two.ifa_name = eth1;
 
   ipv6(&a_one, "2001:1234:5678:900F::cdef");
   ipv4(&a_two, "10.1.2.3");
@@ -167,11 +170,11 @@ TEST(CommonIPAddr, TestV6_Simple)
 
   one.ifa_next = &two;
   one.ifa_addr = (struct sockaddr*)&a_one;
-  one.ifa_name = "eth0";
+  one.ifa_name = eth0;
 
   two.ifa_next = NULL;
   two.ifa_addr = (struct sockaddr*)&a_two;
-  two.ifa_name = "eth1";
+  two.ifa_name = eth1;
 
   ipv4(&a_one, "10.11.12.13");
   ipv6(&a_two, "2001:1234:5678:90ab::cdef");
@@ -191,11 +194,11 @@ TEST(CommonIPAddr, TestV6_Prefix57)
 
   one.ifa_next = &two;
   one.ifa_addr = (struct sockaddr*)&a_one;
-  one.ifa_name = "eth0";
+  one.ifa_name = eth0;
 
   two.ifa_next = NULL;
   two.ifa_addr = (struct sockaddr*)&a_two;
-  two.ifa_name = "eth1";
+  two.ifa_name = eth1;
 
   ipv6(&a_one, "2001:1234:5678:900F::cdef");
   ipv6(&a_two, "2001:1234:5678:90ab::cdef");
@@ -214,7 +217,7 @@ TEST(CommonIPAddr, TestV6_PrefixTooLong)
 
   one.ifa_next = NULL;
   one.ifa_addr = (struct sockaddr*)&a_one;
-  one.ifa_name = "eth0";
+  one.ifa_name = eth0;
 
   ipv6(&a_one, "2001:1234:5678:900F::cdef");
   ipv6(&net, "2001:1234:5678:900F::cdee");
@@ -233,11 +236,11 @@ TEST(CommonIPAddr, TestV6_PrefixZero)
 
   one.ifa_next = &two;
   one.ifa_addr = (struct sockaddr*)&a_one;
-  one.ifa_name = "eth0";
+  one.ifa_name = eth0;
 
   two.ifa_next = NULL;
   two.ifa_addr = (struct sockaddr*)&a_two;
-  two.ifa_name = "eth1";
+  two.ifa_name = eth1;
 
   ipv4(&a_one, "10.2.3.4");
   ipv6(&a_two, "2001:f00b::1");
diff --git a/src/test/test_pidfile.sh b/src/test/test_pidfile.sh
new file mode 100755
index 0000000..2dec1f7
--- /dev/null
+++ b/src/test/test_pidfile.sh
@@ -0,0 +1,102 @@
+#!/bin/bash 
+
+#
+# test pidfile here 
+#
+
+# Includes
+source ../qa/workunits/ceph-helpers.sh
+
+function run() {
+    local dir=$1
+    shift
+
+    export CEPH_MON="127.0.0.1:7124" # git grep '\<7124\>' : there must be only one
+    export CEPH_ARGS
+    CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none "
+    CEPH_ARGS+="--mon-host=$CEPH_MON "
+
+    local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')}
+    for func in $funcs ; do
+        $func $dir || return 1
+    done
+}
+
+function TEST_without_pidfile() {
+    local dir=$1
+    setup $dir
+    local RUNID=`uuidgen`
+    run_mon $dir a --pid-file= --daemonize=$RUNID || { teardown_unexist_pidfile $dir $RUNID; return 1; }
+    run_osd $dir 0 --pid-file= --daemonize=$RUNID || { teardown_unexist_pidfile $dir $RUNID; return 1; }
+    teardown_unexist_pidfile $dir $RUNID || return 1
+}
+
+function teardown_unexist_pidfile() {
+    local dir=$1
+    shift
+    local RUNID=$1
+    shift
+    local delays=${4:-0 0 1 1 1 2 3 5 5 5 10 10 20 60 60 60 120}
+    local pids=$(ps aux|awk "/cep[h].*$RUNID.*/ {print \$2}")
+    local status=0
+    for i in $pids ; do
+        local kill_complete=false
+        for try in $delays ; do  
+            if kill $i 2> /dev/null ; then
+                kill_complete=false
+                sleep $try
+            else
+                kill_complete=true
+                break
+            fi
+       done
+       if ! $kill_complete ; then
+            status=1
+       fi   
+    done
+    if [ $(stat -f -c '%T' .) == "btrfs" ]; then
+         __teardown_btrfs $dir
+    fi
+    rm -fr $dir
+    return $status
+}
+
+function TEST_pidfile() {
+    local dir=$1
+    setup $dir 
+
+    # no daemon can use a pidfile that is owned by another daemon
+    run_mon $dir a || return 1
+    run_mon $dir a 2>&1 | grep "failed to lock pidfile" || return 1
+
+    run_osd $dir 0 || return 1
+    run_osd $dir 0 2>&1 | grep "failed to lock pidfile" || return 1
+
+    # when a daemon shutdown, it will not unlink a path different from
+    # the one it owns
+    mv $dir/osd.0.pid $dir/osd.0.pid.old || return 1
+    cp $dir/osd.0.pid.old $dir/osd.0.pid || return 1
+    kill_daemons $dir TERM osd.0 || return 1
+    test -f $dir/osd.0.pid || return 1
+
+    # when a daemon starts, it re-uses the pid file if no other daemon
+    # has it locked
+    run_osd $dir 0 || return 1
+    ! cmp $dir/osd.0.pid $dir/osd.0.pid.old || return 1
+
+    # if the pid in the file is different from the pid of the daemon
+    # the file is not removed because it is assumed to be owned by
+    # another daemon
+    echo 123 > $dir/osd.0.pid
+    kill_daemons $dir TERM osd.0 || return 1
+    test -f $dir/osd.0.pid || return 1
+
+    # when the daemon shutdown, it removes its own pid file
+    test -f $dir/mon.a.pid || return 1
+    kill_daemons $dir TERM mon.a || return 1
+    ! test -f $dir/mon.a.pid || return 1
+
+    teardown $dir || return 1
+}
+
+main pidfile
diff --git a/src/test/test_snap_mapper.cc b/src/test/test_snap_mapper.cc
index 17cb898..93c87ab 100644
--- a/src/test/test_snap_mapper.cc
+++ b/src/test/test_snap_mapper.cc
@@ -45,7 +45,7 @@ class PausyAsyncMap : public MapCacher::StoreDriver<string, bufferlist> {
   typedef ceph::shared_ptr<_Op> Op;
   struct Remove : public _Op {
     set<string> to_remove;
-    Remove(const set<string> &to_remove) : to_remove(to_remove) {}
+    explicit Remove(const set<string> &to_remove) : to_remove(to_remove) {}
     void operate(map<string, bufferlist> *store) {
       for (set<string>::iterator i = to_remove.begin();
 	   i != to_remove.end();
@@ -56,7 +56,7 @@ class PausyAsyncMap : public MapCacher::StoreDriver<string, bufferlist> {
   };
   struct Insert : public _Op {
     map<string, bufferlist> to_insert;
-    Insert(const map<string, bufferlist> &to_insert) : to_insert(to_insert) {}
+    explicit Insert(const map<string, bufferlist> &to_insert) : to_insert(to_insert) {}
     void operate(map<string, bufferlist> *store) {
       for (map<string, bufferlist>::iterator i = to_insert.begin();
 	   i != to_insert.end();
@@ -68,7 +68,7 @@ class PausyAsyncMap : public MapCacher::StoreDriver<string, bufferlist> {
   };
   struct Callback : public _Op {
     Context *context;
-    Callback(Context *c) : context(c) {}
+    explicit Callback(Context *c) : context(c) {}
     void operate(map<string, bufferlist> *store) {
       context->complete(0);
     }
@@ -103,7 +103,7 @@ private:
     bool paused;
     list<Op> queue;
   public:
-    Doer(PausyAsyncMap *parent) :
+    explicit Doer(PausyAsyncMap *parent) :
       parent(parent), lock("Doer lock"), stopping(0), paused(false) {}
     virtual void *entry() {
       while (1) {
diff --git a/src/test/test_stress_watch.cc b/src/test/test_stress_watch.cc
index cafdc20..1dd3738 100644
--- a/src/test/test_stress_watch.cc
+++ b/src/test/test_stress_watch.cc
@@ -40,7 +40,7 @@ public:
 
 struct WatcherUnwatcher : public Thread {
   string pool;
-  WatcherUnwatcher(string& _pool) : pool(_pool) {}
+  explicit WatcherUnwatcher(string& _pool) : pool(_pool) {}
 
   void *entry() {
     Rados cluster;
diff --git a/src/test/test_trans.cc b/src/test/test_trans.cc
index 415dea3..e671e47 100644
--- a/src/test/test_trans.cc
+++ b/src/test/test_trans.cc
@@ -72,7 +72,7 @@ int main(int argc, const char **argv)
   dout(0) << "starting thread" << dendl;
   foo.create("foo");
   dout(0) << "starting op" << dendl;
-  fs->apply_transaction(&osr, t);
+  fs->apply_transaction(&osr, std::move(t));
 
 }
 
diff --git a/src/test/test_xlist.cc b/src/test/test_xlist.cc
index 9d5eadd..c34f8c5 100644
--- a/src/test/test_xlist.cc
+++ b/src/test/test_xlist.cc
@@ -10,7 +10,7 @@ struct Item {
   xlist<Item*>::item xitem;
   int val;
 
-  Item(int v) :
+  explicit Item(int v) :
     xitem(this),
     val(v)
   {}
diff --git a/src/test/xattr_bench.cc b/src/test/xattr_bench.cc
index 544bb34..5c57b89 100644
--- a/src/test/xattr_bench.cc
+++ b/src/test/xattr_bench.cc
@@ -106,7 +106,7 @@ uint64_t do_run(ObjectStore *store, int attrsize, int numattrs,
     }
     collections[coll] = make_pair(objects, new ObjectStore::Sequencer(coll.to_str()));
   }
-  store->apply_transaction(&osr, t);
+  store->apply_transaction(&osr, std::move(t));
 
   bufferlist bl;
   for (int i = 0; i < attrsize; ++i) {
@@ -135,9 +135,10 @@ uint64_t do_run(ObjectStore *store, int attrsize, int numattrs,
 		   bl);
       }
     }
-    store->queue_transaction(iter->second.second, t,
+    store->queue_transaction(iter->second.second, std::move(*t),
 			     new OnApplied(&lock, &cond, &in_flight,
 					   t));
+    delete t;
   }
   {
     Mutex::Locker l(lock);
@@ -153,14 +154,6 @@ int main(int argc, char **argv) {
 
   global_init(0, args, CEPH_ENTITY_TYPE_CLIENT, CODE_ENVIRONMENT_UTILITY, 0);
   common_init_finish(g_ceph_context);
-  if (args[0] == string("omap")) {
-    std::cerr << "using omap xattrs" << std::endl;
-    g_ceph_context->_conf->set_val("filestore_xattr_use_omap", "true");
-  } else {
-    std::cerr << "not using omap xattrs" << std::endl;
-    g_ceph_context->_conf->set_val("filestore_xattr_use_omap", "false");
-  }
-  g_ceph_context->_conf->apply_changes(NULL);
 
   std::cerr << "args: " << args << std::endl;
   if (args.size() < 3) {
diff --git a/src/tools/Makefile-client.am b/src/tools/Makefile-client.am
index 72d1058..0a31d5c 100644
--- a/src/tools/Makefile-client.am
+++ b/src/tools/Makefile-client.am
@@ -79,6 +79,39 @@ bin_PROGRAMS += rbd-nbd
 
 endif # LINUX
 
+# library for unit tests
+librbd_mirror_internal_la_SOURCES = \
+	tools/rbd_mirror/ClusterWatcher.cc \
+	tools/rbd_mirror/ImageReplayer.cc \
+	tools/rbd_mirror/Mirror.cc \
+	tools/rbd_mirror/PoolWatcher.cc \
+	tools/rbd_mirror/Replayer.cc \
+	tools/rbd_mirror/types.cc
+noinst_LTLIBRARIES += librbd_mirror_internal.la
+noinst_HEADERS += \
+	tools/rbd_mirror/ClusterWatcher.h \
+	tools/rbd_mirror/ImageReplayer.h \
+	tools/rbd_mirror/Mirror.h \
+	tools/rbd_mirror/PoolWatcher.h \
+	tools/rbd_mirror/Replayer.h \
+	tools/rbd_mirror/types.h
+
+rbd_mirror_SOURCES = \
+	tools/rbd_mirror/main.cc
+rbd_mirror_LDADD = \
+	librbd_mirror_internal.la \
+	librbd_internal.la \
+	librbd_api.la \
+	$(LIBRBD_TYPES) \
+	libjournal.la \
+	$(LIBRADOS) $(LIBOSDC) \
+	librados_internal.la \
+	libcls_rbd_client.la \
+	libcls_lock_client.la \
+	libcls_journal_client.la \
+	$(CEPH_GLOBAL)
+bin_PROGRAMS += rbd-mirror
+
 endif # WITH_RBD
 
 if WITH_CEPHFS
diff --git a/src/tools/Makefile.am b/src/tools/Makefile.am
index e14f3f8..0976bee 100644
--- a/src/tools/Makefile.am
+++ b/src/tools/Makefile.am
@@ -45,3 +45,5 @@ noinst_HEADERS += \
 	tools/rados/PoolDump.h \
 	tools/cephfs/DataScan.h
 
+EXTRA_DIST += \
+	tools/setup-virtualenv.sh
diff --git a/src/tools/RadosDump.h b/src/tools/RadosDump.h
index 92c0eeb..54cec99 100644
--- a/src/tools/RadosDump.h
+++ b/src/tools/RadosDump.h
@@ -161,7 +161,7 @@ struct object_begin {
   // of object processing.
   object_info_t oi;
 
-  object_begin(const ghobject_t &hoid): hoid(hoid) { }
+  explicit object_begin(const ghobject_t &hoid): hoid(hoid) { }
   object_begin() { }
 
   // If superblock doesn't include CEPH_FS_FEATURE_INCOMPAT_SHARDS then
@@ -218,8 +218,8 @@ struct data_section {
 
 struct attr_section {
   map<string,bufferlist> data;
-  attr_section(const map<string,bufferlist> &data) : data(data) { }
-  attr_section(map<string, bufferptr> &data_)
+  explicit attr_section(const map<string,bufferlist> &data) : data(data) { }
+  explicit attr_section(map<string, bufferptr> &data_)
   {
     for (std::map<std::string, bufferptr>::iterator i = data_.begin();
          i != data_.end(); ++i) {
@@ -245,7 +245,7 @@ struct attr_section {
 
 struct omap_hdr_section {
   bufferlist hdr;
-  omap_hdr_section(bufferlist hdr) : hdr(hdr) { }
+  explicit omap_hdr_section(bufferlist hdr) : hdr(hdr) { }
   omap_hdr_section() { }
 
   void encode(bufferlist& bl) const {
@@ -262,7 +262,7 @@ struct omap_hdr_section {
 
 struct omap_section {
   map<string, bufferlist> omap;
-  omap_section(const map<string, bufferlist> &omap) :
+  explicit omap_section(const map<string, bufferlist> &omap) :
     omap(omap) { }
   omap_section() { }
 
diff --git a/src/tools/ceph_monstore_tool.cc b/src/tools/ceph_monstore_tool.cc
index c37c3af..f9b4696 100644
--- a/src/tools/ceph_monstore_tool.cc
+++ b/src/tools/ceph_monstore_tool.cc
@@ -37,7 +37,7 @@ class TraceIter {
   unsigned idx;
   MonitorDBStore::TransactionRef t;
 public:
-  TraceIter(string fname) : fd(-1), idx(-1) {
+  explicit TraceIter(string fname) : fd(-1), idx(-1) {
     fd = ::open(fname.c_str(), O_RDONLY);
     t.reset(new MonitorDBStore::Transaction);
   }
diff --git a/src/tools/ceph_objectstore_tool.cc b/src/tools/ceph_objectstore_tool.cc
index 7f39f92..9a44cfc 100644
--- a/src/tools/ceph_objectstore_tool.cc
+++ b/src/tools/ceph_objectstore_tool.cc
@@ -26,6 +26,9 @@
 
 #include "os/ObjectStore.h"
 #include "os/filestore/FileJournal.h"
+#ifdef HAVE_LIBFUSE
+#include "os/FuseStore.h"
+#endif
 
 #include "osd/PGLog.h"
 #include "osd/OSD.h"
@@ -340,12 +343,6 @@ void myexit(int ret)
   exit(ret);
 }
 
-static void invalid_filestore_path(string &path)
-{
-  cerr << "Invalid filestore path specified: " << path << "\n";
-  myexit(1);
-}
-
 int get_log(ObjectStore *fs, __u8 struct_ver,
    coll_t coll, spg_t pgid, const pg_info_t &info,
    PGLog::IndexedLog &log, pg_missing_t &missing,
@@ -484,7 +481,7 @@ int initiate_new_remove_pg(ObjectStore *store, spg_t r_pgid,
   if (r < 0) {
     return r;
   }
-  store->apply_transaction(&osr, rmt);
+  store->apply_transaction(&osr, std::move(rmt));
   finish_remove_pgs(store);
   return r;
 }
@@ -703,7 +700,7 @@ int set_inc_osdmap(ObjectStore *store, epoch_t e, bufferlist& bl, bool force,
   ObjectStore::Transaction t;
   t.write(coll_t::meta(), inc_oid, 0, bl.length(), bl);
   t.truncate(coll_t::meta(), inc_oid, bl.length());
-  int ret = store->apply_transaction(&osr, t);
+  int ret = store->apply_transaction(&osr, std::move(t));
   if (ret) {
     cerr << "Failed to set inc-osdmap (" << inc_oid << "): " << ret << std::endl;
   } else {
@@ -750,7 +747,7 @@ int set_osdmap(ObjectStore *store, epoch_t e, bufferlist& bl, bool force,
   ObjectStore::Transaction t;
   t.write(coll_t::meta(), full_oid, 0, bl.length(), bl);
   t.truncate(coll_t::meta(), full_oid, bl.length());
-  int ret = store->apply_transaction(&osr, t);
+  int ret = store->apply_transaction(&osr, std::move(t));
   if (ret) {
     cerr << "Failed to set osdmap (" << full_oid << "): " << ret << std::endl;
   } else {
@@ -1002,7 +999,7 @@ int ObjectStoreTool::get_object(ObjectStore *store, coll_t coll,
     }
   }
   if (!dry_run)
-    store->apply_transaction(&osr, *t);
+    store->apply_transaction(&osr, std::move(*t));
   return 0;
 }
 
@@ -1308,7 +1305,7 @@ int ObjectStoreTool::do_import(ObjectStore *store, OSDSuperblock& sb,
     ::encode((char)1, values["_remove"]);
     t.omap_setkeys(coll, pgid.make_pgmeta_oid(), values);
 
-    store->apply_transaction(&osr, t);
+    store->apply_transaction(&osr, std::move(t));
   }
 
   cout << "Importing pgid " << pgid;
@@ -1404,7 +1401,7 @@ int ObjectStoreTool::do_import(ObjectStore *store, OSDSuperblock& sb,
     set<string> remove;
     remove.insert("_remove");
     t.omap_rmkeys(coll, pgid.make_pgmeta_oid(), remove);
-    store->apply_transaction(&osr, t);
+    store->apply_transaction(&osr, std::move(t));
   }
 
   return 0;
@@ -1471,7 +1468,7 @@ int do_remove_object(ObjectStore *store, coll_t coll,
 
   t.remove(coll, ghobj);
 
-  store->apply_transaction(&osr, t);
+  store->apply_transaction(&osr, std::move(t));
   return 0;
 }
 
@@ -1598,7 +1595,7 @@ int do_set_bytes(ObjectStore *store, coll_t coll,
   } while(true);
 
   if (!dry_run)
-    store->apply_transaction(&osr, *t);
+    store->apply_transaction(&osr, std::move(*t));
   return 0;
 }
 
@@ -1644,7 +1641,7 @@ int do_set_attr(ObjectStore *store, coll_t coll,
 
   t->setattr(coll, ghobj, key,  bl);
 
-  store->apply_transaction(&osr, *t);
+  store->apply_transaction(&osr, std::move(*t));
   return 0;
 }
 
@@ -1663,7 +1660,7 @@ int do_rm_attr(ObjectStore *store, coll_t coll,
 
   t->rmattr(coll, ghobj, key);
 
-  store->apply_transaction(&osr, *t);
+  store->apply_transaction(&osr, std::move(*t));
   return 0;
 }
 
@@ -1723,7 +1720,7 @@ int do_set_omap(ObjectStore *store, coll_t coll,
 
   t->omap_setkeys(coll, ghobj, attrset);
 
-  store->apply_transaction(&osr, *t);
+  store->apply_transaction(&osr, std::move(*t));
   return 0;
 }
 
@@ -1745,7 +1742,7 @@ int do_rm_omap(ObjectStore *store, coll_t coll,
 
   t->omap_rmkeys(coll, ghobj, keys);
 
-  store->apply_transaction(&osr, *t);
+  store->apply_transaction(&osr, std::move(*t));
   return 0;
 }
 
@@ -1791,14 +1788,14 @@ int do_set_omaphdr(ObjectStore *store, coll_t coll,
 
   t->omap_setheader(coll, ghobj, hdrbl);
 
-  store->apply_transaction(&osr, *t);
+  store->apply_transaction(&osr, std::move(*t));
   return 0;
 }
 
 struct do_fix_lost : public action_on_object_t {
   ObjectStore::Sequencer *osr;
 
-  do_fix_lost(ObjectStore::Sequencer *_osr) : osr(_osr) {}
+  explicit do_fix_lost(ObjectStore::Sequencer *_osr) : osr(_osr) {}
 
   virtual int call(ObjectStore *store, coll_t coll,
 		   ghobject_t &ghobj, object_info_t &oi) {
@@ -1814,7 +1811,7 @@ struct do_fix_lost : public action_on_object_t {
       ::encode(oi, bl);
       ObjectStore::Transaction t;
       t.setattr(coll, ghobj, OI_ATTR, bl);
-      int r = store->apply_transaction(osr, t);
+      int r = store->apply_transaction(osr, std::move(t));
       if (r < 0) {
 	cerr << "Error getting fixing attr on : " << make_pair(coll, ghobj)
 	     << ", "
@@ -1993,7 +1990,7 @@ int set_size(ObjectStore *store, coll_t coll, ghobject_t &ghobj, uint64_t setsiz
       ::encode(ss, snapattr);
       t.setattr(coll, head, SS_ATTR, snapattr);
     }
-    r = store->apply_transaction(&osr, t);
+    r = store->apply_transaction(&osr, std::move(t));
     if (r < 0) {
       cerr << "Error writing object info: " << make_pair(coll, ghobj) << ", "
          << cpp_strerror(r) << std::endl;
@@ -2047,7 +2044,7 @@ int clear_snapset(ObjectStore *store, coll_t coll, ghobject_t &ghobj,
     ::encode(ss, bl);
     ObjectStore::Transaction t;
     t.setattr(coll, ghobj, SS_ATTR, bl);
-    int r = store->apply_transaction(&osr, t);
+    int r = store->apply_transaction(&osr, std::move(t));
     if (r < 0) {
       cerr << "Error setting snapset on : " << make_pair(coll, ghobj) << ", "
 	   << cpp_strerror(r) << std::endl;
@@ -2145,7 +2142,7 @@ int remove_clone(ObjectStore *store, coll_t coll, ghobject_t &ghobj, snapid_t cl
   ::encode(snapset, bl);
   ObjectStore::Transaction t;
   t.setattr(coll, ghobj, SS_ATTR, bl);
-  int r = store->apply_transaction(&osr, t);
+  int r = store->apply_transaction(&osr, std::move(t));
   if (r < 0) {
     cerr << "Error setting snapset on : " << make_pair(coll, ghobj) << ", "
 	 << cpp_strerror(r) << std::endl;
@@ -2207,7 +2204,7 @@ int mydump_journal(Formatter *f, string journalpath, bool m_journal_dio)
 
 int main(int argc, char **argv)
 {
-  string dpath, jpath, pgidstr, op, file, object, objcmd, arg1, arg2, type, format;
+  string dpath, jpath, pgidstr, op, file, mountpoint, object, objcmd, arg1, arg2, type, format;
   spg_t pgid;
   unsigned epoch = 0;
   ghobject_t ghobj;
@@ -2220,7 +2217,7 @@ int main(int argc, char **argv)
   desc.add_options()
     ("help", "produce help message")
     ("type", po::value<string>(&type),
-     "Arg is one of [filestore (default), memstore, keyvaluestore]")
+     "Arg is one of [filestore (default), memstore]")
     ("data-path", po::value<string>(&dpath),
      "path to object store, mandatory")
     ("journal-path", po::value<string>(&jpath),
@@ -2228,12 +2225,14 @@ int main(int argc, char **argv)
     ("pgid", po::value<string>(&pgidstr),
      "PG id, mandatory for info, log, remove, export, rm-past-intervals, mark-complete")
     ("op", po::value<string>(&op),
-     "Arg is one of [info, log, remove, fsck, export, import, list, fix-lost, list-pgs, rm-past-intervals, dump-journal, dump-super, meta-list, "
+     "Arg is one of [info, log, remove, mkfs, fsck, fuse, export, import, list, fix-lost, list-pgs, rm-past-intervals, dump-journal, dump-super, meta-list, "
 	 "get-osdmap, set-osdmap, get-inc-osdmap, set-inc-osdmap, mark-complete]")
     ("epoch", po::value<unsigned>(&epoch),
      "epoch# for get-osdmap and get-inc-osdmap, the current epoch in use if not specified")
     ("file", po::value<string>(&file),
      "path of file to export, import, get-osdmap, set-osdmap, get-inc-osdmap or set-inc-osdmap")
+    ("mountpoint", po::value<string>(&mountpoint),
+     "fuse mountpoint")
     ("format", po::value<string>(&format)->default_value("json-pretty"),
      "Output format which may be json, json-pretty, xml, xml-pretty")
     ("debug", "Enable diagnostic output to stderr")
@@ -2342,7 +2341,8 @@ int main(int argc, char **argv)
     usage(desc);
     myexit(1);
   }
-  if (op != "list" && op != "fsck" && vm.count("op") && vm.count("object")) {
+  if (op != "list" &&
+      vm.count("op") && vm.count("object")) {
     cerr << "Can't specify both --op and object command syntax" << std::endl;
     usage(desc);
     myexit(1);
@@ -2352,6 +2352,11 @@ int main(int argc, char **argv)
     usage(desc);
     myexit(1);
   }
+  if (op == "fuse" && mountpoint.length() == 0) {
+    cerr << "Missing fuse mountpoint" << std::endl;
+    usage(desc);
+    myexit(1);
+  }
   outistty = isatty(STDOUT_FILENO);
 
   file_fd = fd_none;
@@ -2436,28 +2441,6 @@ int main(int argc, char **argv)
     perror(err.c_str());
     myexit(1);
   }
-  //Verify data data-path really is a filestore
-  if (type == "filestore") {
-    if (!S_ISDIR(st.st_mode)) {
-      invalid_filestore_path(dpath);
-    }
-    string check = dpath + "/whoami";
-    if (::stat(check.c_str(), &st) == -1) {
-       perror("whoami");
-       invalid_filestore_path(dpath);
-    }
-    if (!S_ISREG(st.st_mode)) {
-      invalid_filestore_path(dpath);
-    }
-    check = dpath + "/current";
-    if (::stat(check.c_str(), &st) == -1) {
-       perror("current");
-       invalid_filestore_path(dpath);
-    }
-    if (!S_ISDIR(st.st_mode)) {
-      invalid_filestore_path(dpath);
-    }
-  }
 
   if (pgidstr.length() && !pgid.parse(pgidstr.c_str())) {
     cerr << "Invalid pgid '" << pgidstr << "' specified" << std::endl;
@@ -2466,12 +2449,7 @@ int main(int argc, char **argv)
 
   ObjectStore *fs = ObjectStore::create(g_ceph_context, type, dpath, jpath, flags);
   if (fs == NULL) {
-    cerr << "Need a valid --type e.g. filestore, memstore, keyvaluestore" << std::endl;
-    if (type == "keyvaluestore") {
-      cerr << "Add \"keyvaluestore\" to "
-           << "enable_experimental_unrecoverable_data_corrupting_features"
-           << std::endl;
-    }
+    cerr << "Unable to create store of type " << type << std::endl;
     myexit(1);
   }
 
@@ -2479,15 +2457,23 @@ int main(int argc, char **argv)
     int r = fs->fsck();
     if (r < 0) {
       cerr << "fsck failed: " << cpp_strerror(r) << std::endl;
-      exit(1);
+      myexit(1);
     }
     if (r > 0) {
       cerr << "fsck found " << r << " errors" << std::endl;
-      exit(1);
+      myexit(1);
     }
     cout << "fsck found no errors" << std::endl;
     exit(0);
   }
+  if (op == "mkfs") {
+    int r = fs->mkfs();
+    if (r < 0) {
+      cerr << "fsck failed: " << cpp_strerror(r) << std::endl;
+      myexit(1);
+    }
+    myexit(0);
+  }
 
   ObjectStore::Sequencer *osr = new ObjectStore::Sequencer(__func__);
   int ret = fs->mount();
@@ -2500,6 +2486,21 @@ int main(int argc, char **argv)
     myexit(1);
   }
 
+  if (op == "fuse") {
+#ifdef HAVE_LIBFUSE
+    FuseStore fuse(fs, mountpoint);
+    cout << "mounting fuse at " << mountpoint << " ..." << std::endl;
+    int r = fuse.main();
+    if (r < 0) {
+      cerr << "failed to mount fuse: " << cpp_strerror(r) << std::endl;
+      myexit(1);
+    }
+#else
+    cerr << "fuse support not enabled" << std::endl;
+#endif
+    myexit(0);
+  }
+
   vector<coll_t> ls;
   vector<coll_t>::iterator it;
   CompatSet supported;
@@ -2823,7 +2824,7 @@ int main(int argc, char **argv)
   // If not an object command nor any of the ops handled below, then output this usage
   // before complaining about a bad pgid
   if (!vm.count("objcmd") && op != "export" && op != "info" && op != "log" && op != "rm-past-intervals" && op != "mark-complete") {
-    cerr << "Must provide --op (info, log, remove, fsck, export, import, list, fix-lost, list-pgs, rm-past-intervals, dump-journal, dump-super, meta-list, "
+    cerr << "Must provide --op (info, log, remove, mkfs, fsck, export, import, list, fix-lost, list-pgs, rm-past-intervals, dump-journal, dump-super, meta-list, "
       "get-osdmap, set-osdmap, get-inc-osdmap, set-inc-osdmap, mark-complete)"
 	 << std::endl;
     usage(desc);
@@ -3136,7 +3137,7 @@ int main(int argc, char **argv)
       ret = write_info(*t, map_epoch, info, past_intervals);
 
       if (ret == 0) {
-        fs->apply_transaction(osr, *t);
+        fs->apply_transaction(osr, std::move(*t));
         cout << "Removal succeeded" << std::endl;
       }
     } else if (op == "mark-complete") {
@@ -3164,7 +3165,7 @@ int main(int argc, char **argv)
 	ret = write_info(*t, map_epoch, info, past_intervals);
 	if (ret != 0)
 	  goto out;
-	fs->apply_transaction(osr, *t);
+	fs->apply_transaction(osr, std::move(*t));
       }
       cout << "Marking complete succeeded" << std::endl;
     } else {
diff --git a/src/tools/cephfs/DataScan.cc b/src/tools/cephfs/DataScan.cc
index 965ab52..ce6a4e0 100644
--- a/src/tools/cephfs/DataScan.cc
+++ b/src/tools/cephfs/DataScan.cc
@@ -1406,7 +1406,7 @@ int MetadataDriver::find_or_create_dirfrag(
     bufferlist fnode_bl;
     fnode_t blank_fnode;
     blank_fnode.version = 1;
-    blank_fnode.damage_flags |= (DAMAGE_RSTATS | DAMAGE_RSTATS);
+    blank_fnode.damage_flags |= (DAMAGE_STATS | DAMAGE_RSTATS);
     blank_fnode.encode(fnode_bl);
 
 
diff --git a/src/tools/cephfs/TableTool.cc b/src/tools/cephfs/TableTool.cc
index ebfcd0a..2d14a4f 100644
--- a/src/tools/cephfs/TableTool.cc
+++ b/src/tools/cephfs/TableTool.cc
@@ -277,7 +277,7 @@ public:
 class InoTableHandler : public TableHandler<InoTable>
 {
   public:
-  InoTableHandler(mds_rank_t r)
+  explicit InoTableHandler(mds_rank_t r)
     : TableHandler(r, "inotable", true)
   {}
 
diff --git a/src/tools/rados/PoolDump.h b/src/tools/rados/PoolDump.h
index 6b4eae5..0415050 100644
--- a/src/tools/rados/PoolDump.h
+++ b/src/tools/rados/PoolDump.h
@@ -25,7 +25,7 @@ namespace librados {
 class PoolDump : public RadosDump
 {
   public:
-    PoolDump(int file_fd_) : RadosDump(file_fd_, false) {}
+    explicit PoolDump(int file_fd_) : RadosDump(file_fd_, false) {}
     int dump(librados::IoCtx *io_ctx);
 };
 
diff --git a/src/tools/rados/RadosImport.cc b/src/tools/rados/RadosImport.cc
index b4b397b..4f72142 100644
--- a/src/tools/rados/RadosImport.cc
+++ b/src/tools/rados/RadosImport.cc
@@ -243,8 +243,22 @@ int RadosImport::get_object_rados(librados::IoCtx &ioctx, bufferlist &bl, bool n
     need_align = true;
     alignment = align;
   } else {
-    if ((need_align = ioctx.pool_requires_alignment()))
-      alignment = ioctx.pool_required_alignment();
+    int ret = ioctx.pool_requires_alignment2(&need_align);
+    if (ret < 0) {
+      cerr << "pool_requires_alignment2 failed: " << cpp_strerror(ret)
+        << std::endl;
+      return ret;
+    }
+
+    if (need_align) {
+      ret = ioctx.pool_required_alignment2(&alignment);
+      if (ret < 0) {
+        cerr << "pool_required_alignment2 failed: " << cpp_strerror(ret)
+	  << std::endl;
+	return ret;
+      }
+      assert(alignment != 0);
+    }
   }
 
   if (need_align) {
diff --git a/src/tools/rados/rados.cc b/src/tools/rados/rados.cc
index ad75e9f..fb4967f 100644
--- a/src/tools/rados/rados.cc
+++ b/src/tools/rados/rados.cc
@@ -27,6 +27,7 @@ using namespace libradosstriper;
 #include "common/errno.h"
 #include "common/Formatter.h"
 #include "common/obj_bencher.h"
+#include "include/stringify.h"
 #include "mds/inode_backtrace.h"
 #include "auth/Crypto.h"
 #include <iostream>
@@ -159,6 +160,10 @@ void usage(ostream& out)
 "        select target pool by name\n"
 "   -b op_size\n"
 "        set the block size for put/get ops and for write benchmarking\n"
+"   -o object_size\n"
+"        set the object size for put/get ops and for write benchmarking\n"
+"   --max-objects\n"
+"        set the max number of objects for write benchmarking\n"
 "   -s name\n"
 "   --snap name\n"
 "        select given snap name for (read) IO\n"
@@ -537,7 +542,7 @@ public:
     librados::AioCompletion *completion;
 
     LoadGenOp() : id(0), type(0), off(0), len(0), lg(NULL), completion(NULL) {}
-    LoadGenOp(LoadGen *_lg) : id(0), type(0), off(0), len(0), lg(_lg), completion(NULL) {}
+    explicit LoadGenOp(LoadGen *_lg) : id(0), type(0), off(0), len(0), lg(_lg), completion(NULL) {}
   };
 
   int max_op;
@@ -564,7 +569,7 @@ public:
     utime_t now = ceph_clock_now(g_ceph_context);
     now -= start_time;
     uint64_t ns = now.nsec();
-    float total = ns / 1000000000;
+    float total = (float) ns / 1000000000.0;
     total += now.sec();
     return total;
   }
@@ -572,7 +577,7 @@ public:
   Mutex lock;
   Cond cond;
 
-  LoadGen(Rados *_rados) : rados(_rados), going_down(false), lock("LoadGen") {
+  explicit LoadGen(Rados *_rados) : rados(_rados), going_down(false), lock("LoadGen") {
     read_percent = 80;
     min_obj_len = 1024;
     max_obj_len = 5ull * 1024ull * 1024ull * 1024ull;
@@ -593,10 +598,10 @@ public:
   void cleanup();
 
   void io_cb(completion_t c, LoadGenOp *op) {
-    total_completed += op->len;
-
     Mutex::Locker l(lock);
 
+    total_completed += op->len;
+
     double rate = (double)cur_completed_rate() / (1024 * 1024);
     std::streamsize original_precision = cout.precision();
     cout.precision(3);
@@ -864,25 +869,29 @@ protected:
     completions[slot] = 0;
   }
 
-  int aio_read(const std::string& oid, int slot, bufferlist *pbl, size_t len) {
+  int aio_read(const std::string& oid, int slot, bufferlist *pbl, size_t len,
+	       size_t offset) {
     return io_ctx.aio_read(oid, completions[slot], pbl, len, 0);
   }
 
-  int aio_write(const std::string& oid, int slot, bufferlist& bl, size_t len) {
+  int aio_write(const std::string& oid, int slot, bufferlist& bl, size_t len,
+		size_t offset) {
     librados::ObjectWriteOperation op;
 
     if (write_destination & OP_WRITE_DEST_OBJ) {
-      op.write(0, bl);
+      op.write(offset, bl);
     }
 
     if (write_destination & OP_WRITE_DEST_OMAP) {
       std::map<std::string, librados::bufferlist> omap;
-      omap["bench-omap-key"] = bl;
+      omap[string("bench-omap-key-") + stringify(offset)] = bl;
       op.omap_set(omap);
     }
 
     if (write_destination & OP_WRITE_DEST_XATTR) {
-      op.setxattr("bench-xattr-key", bl);
+      char key[80];
+      snprintf(key, sizeof(key), "bench-xattr-key-%d", (int)offset);
+      op.setxattr(key, bl);
     }
 
     return io_ctx.aio_operate(oid, completions[slot], &op);
@@ -1214,6 +1223,8 @@ static int rados_tool_common(const std::map < std::string, std::string > &opts,
   string oloc, target_oloc, nspace, target_nspace;
   int concurrent_ios = 16;
   unsigned op_size = default_op_size;
+  unsigned object_size = 0;
+  unsigned max_objects = 0;
   bool block_size_specified = false;
   int bench_write_dest = 0;
   bool cleanup = true;
@@ -1298,6 +1309,19 @@ static int rados_tool_common(const std::map < std::string, std::string > &opts,
     }
     block_size_specified = true;
   }
+  i = opts.find("object-size");
+  if (i != opts.end()) {
+    if (rados_sistrtoll(i, &object_size)) {
+      return -EINVAL;
+    }
+    block_size_specified = true;
+  }
+  i = opts.find("max-objects");
+  if (i != opts.end()) {
+    if (rados_sistrtoll(i, &max_objects)) {
+      return -EINVAL;
+    }
+  }
   i = opts.find("snap");
   if (i != opts.end()) {
     snapname = i->second.c_str();
@@ -2518,8 +2542,11 @@ static int rados_tool_common(const std::map < std::string, std::string > &opts,
         outstream = &cout;
       bencher.set_outstream(*outstream);
     }
+    if (!object_size)
+      object_size = op_size;
     ret = bencher.aio_bench(operation, seconds,
-			    concurrent_ios, op_size, cleanup, run_name, no_verify);
+			    concurrent_ios, op_size, object_size,
+			    max_objects, cleanup, run_name, no_verify);
     if (ret != 0)
       cerr << "error during benchmark: " << ret << std::endl;
     if (formatter && output)
@@ -2996,6 +3023,12 @@ int main(int argc, const char **argv)
       opts["block-size"] = val;
     } else if (ceph_argparse_witharg(args, i, &val, "-b", (char*)NULL)) {
       opts["block-size"] = val;
+    } else if (ceph_argparse_witharg(args, i, &val, "--object-size", (char*)NULL)) {
+      opts["object-size"] = val;
+    } else if (ceph_argparse_witharg(args, i, &val, "--max-objects", (char*)NULL)) {
+      opts["max-objects"] = val;
+    } else if (ceph_argparse_witharg(args, i, &val, "-o", (char*)NULL)) {
+      opts["object-size"] = val;
     } else if (ceph_argparse_witharg(args, i, &val, "-s", "--snap", (char*)NULL)) {
       opts["snap"] = val;
     } else if (ceph_argparse_witharg(args, i, &val, "-S", "--snapid", (char*)NULL)) {
diff --git a/src/tools/rbd/ArgumentTypes.cc b/src/tools/rbd/ArgumentTypes.cc
index a403f01..698a643 100644
--- a/src/tools/rbd/ArgumentTypes.cc
+++ b/src/tools/rbd/ArgumentTypes.cc
@@ -104,15 +104,15 @@ void add_image_option(po::options_description *opt,
 
 void add_snap_option(po::options_description *opt,
                       ArgumentModifier modifier) {
-  if (modifier == ARGUMENT_MODIFIER_DEST) {
-    return;
-  }
 
   std::string name = SNAPSHOT_NAME;
   std::string description = "snapshot name";
   switch (modifier) {
   case ARGUMENT_MODIFIER_NONE:
+    break;
   case ARGUMENT_MODIFIER_DEST:
+    name = DEST_SNAPSHOT_NAME;
+    description = "destination " + description;
     break;
   case ARGUMENT_MODIFIER_SOURCE:
     description = "source " + description;
diff --git a/src/tools/rbd/ArgumentTypes.h b/src/tools/rbd/ArgumentTypes.h
index 8313cf0..50c74aa 100644
--- a/src/tools/rbd/ArgumentTypes.h
+++ b/src/tools/rbd/ArgumentTypes.h
@@ -50,6 +50,7 @@ static const std::string DEST_POOL_NAME("dest-pool");
 static const std::string IMAGE_NAME("image");
 static const std::string DEST_IMAGE_NAME("dest");
 static const std::string SNAPSHOT_NAME("snap");
+static const std::string DEST_SNAPSHOT_NAME("dest-snap");
 static const std::string JOURNAL_NAME("journal");
 static const std::string DEST_JOURNAL_NAME("dest-journal");
 static const std::string PATH("path");
diff --git a/src/tools/rbd/Utils.cc b/src/tools/rbd/Utils.cc
index 4f4b2e8..c4fbee4 100644
--- a/src/tools/rbd/Utils.cc
+++ b/src/tools/rbd/Utils.cc
@@ -144,6 +144,8 @@ int get_pool_image_snapshot_names(const po::variables_map &vm,
     at::DEST_POOL_NAME : at::POOL_NAME);
   std::string image_key = (mod == at::ARGUMENT_MODIFIER_DEST ?
     at::DEST_IMAGE_NAME : at::IMAGE_NAME);
+  std::string snap_key = (mod == at::ARGUMENT_MODIFIER_DEST ?
+	at::DEST_SNAPSHOT_NAME : at::SNAPSHOT_NAME);
 
   if (vm.count(pool_key) && pool_name != nullptr) {
     *pool_name = vm[pool_key].as<std::string>();
@@ -151,11 +153,10 @@ int get_pool_image_snapshot_names(const po::variables_map &vm,
   if (vm.count(image_key) && image_name != nullptr) {
     *image_name = vm[image_key].as<std::string>();
   }
-  if (vm.count(at::SNAPSHOT_NAME) && snap_name != nullptr &&
-      mod != at::ARGUMENT_MODIFIER_DEST) {
-    *snap_name = vm[at::SNAPSHOT_NAME].as<std::string>();
-  }
-
+  if (vm.count(snap_key) && snap_name != nullptr) {
+     *snap_name = vm[snap_key].as<std::string>();
+   }
+  
   if (image_name != nullptr && !image_name->empty()) {
     // despite the separate pool and snapshot name options,
     // we can also specify them via the image option
@@ -591,7 +592,7 @@ int init_and_open_image(const std::string &pool_name,
   return 0;
 }
 
-int snap_set(librbd::Image &image, const std::string snap_name) {
+int snap_set(librbd::Image &image, const std::string &snap_name) {
   int r = image.snap_set(snap_name.c_str());
   if (r < 0) {
     std::cerr << "error setting snapshot context: " << cpp_strerror(r)
diff --git a/src/tools/rbd/Utils.h b/src/tools/rbd/Utils.h
index 0f290df..7caf43d 100644
--- a/src/tools/rbd/Utils.h
+++ b/src/tools/rbd/Utils.h
@@ -96,7 +96,7 @@ int init_and_open_image(const std::string &pool_name,
                         librados::Rados *rados, librados::IoCtx *io_ctx,
                         librbd::Image *image);
 
-int snap_set(librbd::Image &image, const std::string snap_name);
+int snap_set(librbd::Image &image, const std::string &snap_name);
 
 std::string image_id(librbd::Image& image);
 
diff --git a/src/tools/rbd/action/BenchWrite.cc b/src/tools/rbd/action/BenchWrite.cc
index d767c08..849340b 100644
--- a/src/tools/rbd/action/BenchWrite.cc
+++ b/src/tools/rbd/action/BenchWrite.cc
@@ -64,7 +64,7 @@ struct rbd_bencher {
   Cond cond;
   int in_flight;
 
-  rbd_bencher(librbd::Image *i)
+  explicit rbd_bencher(librbd::Image *i)
     : image(i),
       lock("rbd_bencher::lock"),
       in_flight(0)
diff --git a/src/tools/rbd/action/Journal.cc b/src/tools/rbd/action/Journal.cc
index cd4213a..2c556a6 100644
--- a/src/tools/rbd/action/Journal.cc
+++ b/src/tools/rbd/action/Journal.cc
@@ -21,7 +21,7 @@
 #include "journal/ReplayEntry.h"
 #include "journal/ReplayHandler.h"
 //#include "librbd/Journal.h" // XXXMG: for librbd::Journal::reset()
-#include "librbd/journal/Entries.h"
+#include "librbd/journal/Types.h"
 
 namespace rbd {
 namespace action {
@@ -113,7 +113,7 @@ static int do_show_journal_status(librados::IoCtx& io_ctx,
     f->dump_unsigned("active_set", active_set);
     f->open_object_section("registered_clients");
     for (std::set<cls::journal::Client>::iterator c =
-          registered_clients.begin(); c != registered_clients.end(); c++) {
+          registered_clients.begin(); c != registered_clients.end(); ++c) {
       c->dump(f);
     }
     f->close_section();
@@ -124,7 +124,7 @@ static int do_show_journal_status(librados::IoCtx& io_ctx,
     std::cout << "active_set: " << active_set << std::endl;
     std::cout << "registered clients: " << std::endl;
     for (std::set<cls::journal::Client>::iterator c =
-          registered_clients.begin(); c != registered_clients.end(); c++) {
+          registered_clients.begin(); c != registered_clients.end(); ++c) {
       std::cout << "\t" << *c << std::endl;
     }
   }
@@ -164,10 +164,8 @@ static int do_reset_journal(librados::IoCtx& io_ctx,
     return r;
   }
 
-  // XXXMG
-  const std::string CLIENT_DESCRIPTION = "master image";
-
-  r = journaler.register_client(CLIENT_DESCRIPTION);
+  // TODO register with librbd payload
+  r = journaler.register_client(bufferlist());
   if (r < 0) {
     std::cerr << "failed to register client: " << cpp_strerror(r) << std::endl;
     return r;
@@ -185,7 +183,8 @@ public:
   int init() {
     int r;
 
-    r = register_client("rbd journal");
+    // TODO register with librbd payload
+    r = register_client(bufferlist());
     if (r < 0) {
       std::cerr << "failed to register client: " << cpp_strerror(r)
 		<< std::endl;
@@ -262,7 +261,7 @@ public:
 protected:
   struct ReplayHandler : public ::journal::ReplayHandler {
     JournalPlayer *journal;
-    ReplayHandler(JournalPlayer *_journal) : journal(_journal) {}
+    explicit ReplayHandler(JournalPlayer *_journal) : journal(_journal) {}
 
     virtual void get() {}
     virtual void put() {}
@@ -279,12 +278,12 @@ protected:
     int r = 0;
     while (true) {
       ::journal::ReplayEntry replay_entry;
-      std::string tag;
-      if (!m_journaler.try_pop_front(&replay_entry, &tag)) {
+      uint64_t tag_id;
+      if (!m_journaler.try_pop_front(&replay_entry, &tag_id)) {
 	break;
       }
 
-      r = process_entry(replay_entry, tag);
+      r = process_entry(replay_entry, tag_id);
       if (r < 0) {
 	break;
       }
@@ -292,7 +291,7 @@ protected:
   }
 
   virtual int process_entry(::journal::ReplayEntry replay_entry,
-			    std::string& tag) = 0;
+			    uint64_t tag_id) = 0;
 
   void handle_replay_complete(int r) {
     m_journaler.stop_replay();
@@ -354,10 +353,10 @@ private:
   };
 
   int process_entry(::journal::ReplayEntry replay_entry,
-		    std::string& tag) {
+		    uint64_t tag_id) {
     m_s.total++;
     if (m_verbose) {
-      std::cout << "Entry: tag=" << tag << ", commit_tid="
+      std::cout << "Entry: tag_id=" << tag_id << ", commit_tid="
 		<< replay_entry.get_commit_tid() << std::endl;
     }
     bufferlist data = replay_entry.get_data();
@@ -381,27 +380,27 @@ static int do_inspect_journal(librados::IoCtx& io_ctx,
 }
 
 struct ExportEntry {
-  std::string tag;
+  uint64_t tag_id;
   uint64_t commit_tid;
   int type;
   bufferlist entry;
 
-  ExportEntry() : tag(), commit_tid(0), type(0), entry() {}
+  ExportEntry() : tag_id(0), commit_tid(0), type(0), entry() {}
 
-  ExportEntry(const std::string& tag, uint64_t commit_tid, int type,
+  ExportEntry(uint64_t tag_id, uint64_t commit_tid, int type,
 	      const bufferlist& entry)
-    : tag(tag), commit_tid(commit_tid), type(type), entry(entry) {
+    : tag_id(tag_id), commit_tid(commit_tid), type(type), entry(entry) {
   }
 
   void dump(Formatter *f) const {
-    ::encode_json("tag", tag, f);
+    ::encode_json("tag_id", tag_id, f);
     ::encode_json("commit_tid", commit_tid, f);
     ::encode_json("type", type, f);
     ::encode_json("entry", entry, f);
   }
 
   void decode_json(JSONObj *obj) {
-    JSONDecoder::decode_json("tag", tag, obj);
+    JSONDecoder::decode_json("tag_id", tag_id, obj);
     JSONDecoder::decode_json("commit_tid", commit_tid, obj);
     JSONDecoder::decode_json("type", type, obj);
     JSONDecoder::decode_json("entry", entry, obj);
@@ -448,7 +447,7 @@ private:
   };
 
   int process_entry(::journal::ReplayEntry replay_entry,
-		    std::string& tag) {
+		    uint64_t tag_id) {
     m_s.total++;
     int type = -1;
     bufferlist entry = replay_entry.get_data();
@@ -461,7 +460,8 @@ private:
     } else {
       type = event_entry.get_event_type();
     }
-    ExportEntry export_entry(tag, replay_entry.get_commit_tid(), type, entry);
+    ExportEntry export_entry(tag_id, replay_entry.get_commit_tid(), type,
+                             entry);
     JSONFormatter f;
     ::encode_json("event_entry", export_entry, &f);
     std::ostringstream oss;
@@ -651,7 +651,7 @@ public:
       librbd::journal::EventEntry event_entry;
       r = inspect_entry(e.entry, event_entry, m_verbose);
       if (r < 0) {
-	std::cerr << "rbd: corrupted entry " << n << ": tag=" << e.tag
+	std::cerr << "rbd: corrupted entry " << n << ": tag_tid=" << e.tag_id
 		  << ", commit_tid=" << e.commit_tid << std::endl;
 	if (m_no_error) {
 	  r1 = r;
@@ -660,7 +660,7 @@ public:
 	  break;
 	}
       }
-      m_journaler.append(e.tag, e.entry);
+      m_journaler.append(e.tag_id, e.entry);
       error_count--;
     }
 
diff --git a/src/tools/rbd/action/Kernel.cc b/src/tools/rbd/action/Kernel.cc
index 541da95..3c552d9 100644
--- a/src/tools/rbd/action/Kernel.cc
+++ b/src/tools/rbd/action/Kernel.cc
@@ -62,12 +62,12 @@ static std::string map_option_int_cb(const char *value_char)
   return stringify(d);
 }
 
-static void put_map_option(const std::string key, std::string val)
+static void put_map_option(const std::string &key, std::string val)
 {
   map_options[key] = val;
 }
 
-static int put_map_option_value(const std::string opt, const char *value_char,
+static int put_map_option_value(const std::string &opt, const char *value_char,
                                 std::string (*parse_cb)(const char *))
 {
   if (!value_char || *value_char == '\0') {
diff --git a/src/tools/rbd/action/Lock.cc b/src/tools/rbd/action/Lock.cc
index c39a4c5..f9f4a22 100644
--- a/src/tools/rbd/action/Lock.cc
+++ b/src/tools/rbd/action/Lock.cc
@@ -189,7 +189,7 @@ int execute_add(const po::variables_map &vm) {
   if (r < 0) {
     if (r == -EBUSY || r == -EEXIST) {
       if (!lock_tag.empty()) {
-        std::cerr << "rbd: lock is alrady held by someone else"
+        std::cerr << "rbd: lock is already held by someone else"
                   << " with a different tag" << std::endl;
       } else {
         std::cerr << "rbd: lock is already held by someone else" << std::endl;
diff --git a/src/tools/rbd/action/MergeDiff.cc b/src/tools/rbd/action/MergeDiff.cc
index f56ec1f..7289e11 100644
--- a/src/tools/rbd/action/MergeDiff.cc
+++ b/src/tools/rbd/action/MergeDiff.cc
@@ -377,7 +377,7 @@ static int do_merge_diff(const char *first, const char *second,
 done:
   if (pd > 2)
     close(pd);
-  if (sd > 2)
+  if (sd)
     close(sd);
   if (fd > 2)
     close(fd);
diff --git a/src/tools/rbd/action/Nbd.cc b/src/tools/rbd/action/Nbd.cc
index be42173..81ff183 100644
--- a/src/tools/rbd/action/Nbd.cc
+++ b/src/tools/rbd/action/Nbd.cc
@@ -66,7 +66,7 @@ static int call_nbd_cmd(const po::variables_map &vm,
   }
 
   for (std::vector<const char*>::const_iterator p = args.begin();
-       p != args.end(); p++)
+       p != args.end(); ++p)
     process.add_cmd_arg(*p);
 
   if (process.spawn()) {
diff --git a/src/tools/rbd/action/Snap.cc b/src/tools/rbd/action/Snap.cc
index e20e878..571853c 100644
--- a/src/tools/rbd/action/Snap.cc
+++ b/src/tools/rbd/action/Snap.cc
@@ -446,7 +446,7 @@ int execute_rename(const po::variables_map &vm) {
               << std::endl;
     return -EINVAL;
   }
-
+  
   librados::Rados rados;
   librados::IoCtx io_ctx;
   librbd::Image image;
diff --git a/src/tools/rbd_mirror/ClusterWatcher.cc b/src/tools/rbd_mirror/ClusterWatcher.cc
new file mode 100644
index 0000000..91a7c5a
--- /dev/null
+++ b/src/tools/rbd_mirror/ClusterWatcher.cc
@@ -0,0 +1,130 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "common/debug.h"
+#include "common/errno.h"
+
+#include "librbd/internal.h"
+
+#include "ClusterWatcher.h"
+
+#define dout_subsys ceph_subsys_rbd_mirror
+#undef dout_prefix
+#define dout_prefix *_dout << "rbd-mirror: "
+
+using std::list;
+using std::map;
+using std::set;
+using std::string;
+using std::unique_ptr;
+using std::vector;
+
+using librados::Rados;
+using librados::IoCtx;
+
+namespace rbd {
+namespace mirror {
+
+ClusterWatcher::ClusterWatcher(RadosRef cluster, Mutex &lock) :
+  m_lock(lock),
+  m_cluster(cluster)
+{
+}
+
+const map<peer_t, set<int64_t> >& ClusterWatcher::get_peer_configs() const
+{
+  assert(m_lock.is_locked());
+  return m_peer_configs;
+}
+
+const std::set<std::string>& ClusterWatcher::get_pool_names() const
+{
+  assert(m_lock.is_locked());
+  return m_pool_names;
+}
+
+void ClusterWatcher::refresh_pools()
+{
+  dout(20) << __func__ << dendl;
+  map<peer_t, set<int64_t> > peer_configs;
+  set<string> pool_names;
+  read_configs(&peer_configs, &pool_names);
+
+  Mutex::Locker l(m_lock);
+  m_peer_configs = peer_configs;
+  m_pool_names = pool_names;
+  // TODO: perhaps use a workqueue instead, once we get notifications
+  // about config changes for existing pools
+}
+
+void ClusterWatcher::read_configs(map<peer_t, set<int64_t> > *peer_configs,
+				  set<string> *pool_names)
+{
+  list<pair<int64_t, string> > pools;
+  int r = m_cluster->pool_list2(pools);
+  if (r < 0) {
+    derr << "error listing pools: " << cpp_strerror(r) << dendl;
+    return;
+  }
+
+  for (auto kv : pools) {
+    int64_t pool_id = kv.first;
+    string pool_name = kv.second;
+    int64_t base_tier;
+    r = m_cluster->pool_get_base_tier(pool_id, &base_tier);
+    if (r == -ENOENT) {
+      dout(10) << "pool " << pool_name << " no longer exists" << dendl;
+      continue;
+    } else if (r < 0) {
+      derr << "Error retrieving base tier for pool " << pool_name << dendl;
+      continue;
+    }
+    if (pool_id != base_tier) {
+      // pool is a cache; skip it
+      continue;
+    }
+
+    IoCtx ioctx;
+    r = m_cluster->ioctx_create2(pool_id, ioctx);
+    if (r == -ENOENT) {
+      dout(10) << "pool " << pool_id << " no longer exists" << dendl;
+      continue;
+    } else if (r < 0) {
+      derr << "Error accessing pool " << pool_name << cpp_strerror(r) << dendl;
+      continue;
+    }
+
+    bool enabled;
+    r = librbd::mirror_is_enabled(ioctx, &enabled);
+    if (r < 0) {
+      derr << "could not tell whether mirroring was enabled for " << pool_name
+	   << " : " << cpp_strerror(r) << dendl;
+      continue;
+    }
+    if (!enabled) {
+      dout(10) << "mirroring is disabled for pool " << pool_name << dendl;
+      continue;
+    }
+
+    vector<librbd::mirror_peer_t> configs;
+    r = librbd::mirror_peer_list(ioctx, &configs);
+    if (r == -ENOENT)
+      continue; // raced with disabling mirroring
+    if (r < 0) {
+      derr << "error reading mirroring config for pool " << pool_name
+	   << cpp_strerror(r) << dendl;
+      continue;
+    }
+
+    for (peer_t peer : configs) {
+      dout(20) << "pool " << pool_name << " has mirroring enabled for peer "
+	       << peer << dendl;
+      (*peer_configs)[peer].insert(pool_id);
+    }
+
+    pool_names->insert(ioctx.get_pool_name());
+  }
+}
+
+} // namespace mirror
+} // namespace rbd
diff --git a/src/tools/rbd_mirror/ClusterWatcher.h b/src/tools/rbd_mirror/ClusterWatcher.h
new file mode 100644
index 0000000..d708748
--- /dev/null
+++ b/src/tools/rbd_mirror/ClusterWatcher.h
@@ -0,0 +1,48 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_RBD_MIRROR_CLUSTER_WATCHER_H
+#define CEPH_RBD_MIRROR_CLUSTER_WATCHER_H
+
+#include <map>
+#include <memory>
+#include <set>
+
+#include "common/ceph_context.h"
+#include "common/Mutex.h"
+#include "common/Timer.h"
+#include "include/rados/librados.hpp"
+#include "types.h"
+
+namespace rbd {
+namespace mirror {
+
+/**
+ * Tracks mirroring configuration for pools in a single
+ * cluster.
+ */
+class ClusterWatcher {
+public:
+  ClusterWatcher(RadosRef cluster, Mutex &lock);
+  ~ClusterWatcher() = default;
+  ClusterWatcher(const ClusterWatcher&) = delete;
+  ClusterWatcher& operator=(const ClusterWatcher&) = delete;
+  // Caller controls frequency of calls
+  void refresh_pools();
+  const std::map<peer_t, std::set<int64_t> >& get_peer_configs() const;
+  const std::set<std::string>& get_pool_names() const;
+
+private:
+  void read_configs(std::map<peer_t, std::set<int64_t> > *peer_configs,
+		    std::set<std::string> *pool_names);
+
+  Mutex &m_lock;
+  RadosRef m_cluster;
+  std::map<peer_t, std::set<int64_t> > m_peer_configs;
+  std::set<std::string> m_pool_names;
+};
+
+} // namespace mirror
+} // namespace rbd
+
+#endif // CEPH_RBD_MIRROR_CLUSTER_WATCHER_H
diff --git a/src/tools/rbd_mirror/ImageReplayer.cc b/src/tools/rbd_mirror/ImageReplayer.cc
new file mode 100644
index 0000000..af83d01
--- /dev/null
+++ b/src/tools/rbd_mirror/ImageReplayer.cc
@@ -0,0 +1,62 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "common/debug.h"
+#include "common/errno.h"
+#include "include/stringify.h"
+#include "ImageReplayer.h"
+
+#define dout_subsys ceph_subsys_rbd_mirror
+#undef dout_prefix
+#define dout_prefix *_dout << "rbd-mirror: "
+
+using std::map;
+using std::string;
+using std::unique_ptr;
+using std::vector;
+
+namespace rbd {
+namespace mirror {
+
+ImageReplayer::ImageReplayer(RadosRef local, RadosRef remote,
+			     int64_t remote_pool_id,
+			     const string &remote_image_id) :
+  m_lock(stringify("rbd::mirror::ImageReplayer ") + stringify(remote_pool_id) +
+	 string(" ") + remote_image_id),
+  m_remote_pool_id(remote_pool_id),
+  m_image_id(remote_image_id),
+  m_local(local),
+  m_remote(remote)
+{
+}
+
+ImageReplayer::~ImageReplayer()
+{
+}
+
+int ImageReplayer::start()
+{
+  int r = m_remote->ioctx_create2(m_remote_pool_id, m_remote_ioctx);
+  if (r < 0) {
+    derr << "error opening ioctx for remote pool " << m_remote_pool_id
+	 << " : " << cpp_strerror(r) << dendl;
+    return r;
+  }
+  m_pool_name = m_remote_ioctx.get_pool_name();
+  r = m_local->ioctx_create(m_pool_name.c_str(), m_local_ioctx);
+  if (r < 0) {
+    derr << "error opening ioctx for local pool " << m_pool_name
+	 << " : " << cpp_strerror(r) << dendl;
+    return r;
+  }
+  return 0;
+}
+
+void ImageReplayer::stop()
+{
+  m_remote_ioctx.close();
+  m_local_ioctx.close();
+}
+
+} // namespace mirror
+} // namespace rbd
diff --git a/src/tools/rbd_mirror/ImageReplayer.h b/src/tools/rbd_mirror/ImageReplayer.h
new file mode 100644
index 0000000..311f96e
--- /dev/null
+++ b/src/tools/rbd_mirror/ImageReplayer.h
@@ -0,0 +1,45 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_RBD_MIRROR_IMAGE_REPLAYER_H
+#define CEPH_RBD_MIRROR_IMAGE_REPLAYER_H
+
+#include <map>
+#include <string>
+#include <vector>
+
+#include "common/Mutex.h"
+#include "common/WorkQueue.h"
+#include "include/rados/librados.hpp"
+#include "types.h"
+
+namespace rbd {
+namespace mirror {
+
+/**
+ * Replays changes from a remote cluster for a single image.
+ */
+class ImageReplayer {
+public:
+  ImageReplayer(RadosRef local, RadosRef remote,
+		int64_t remote_pool_id, const std::string &remote_image_id);
+  ~ImageReplayer();
+  ImageReplayer(const ImageReplayer&) = delete;
+  ImageReplayer& operator=(const ImageReplayer&) = delete;
+
+  int start();
+  void stop();
+
+private:
+  Mutex m_lock;
+  int64_t m_remote_pool_id;
+  std::string m_pool_name;
+  std::string m_image_id;
+  RadosRef m_local, m_remote;
+  librados::IoCtx m_local_ioctx, m_remote_ioctx;
+};
+
+} // namespace mirror
+} // namespace rbd
+
+#endif // CEPH_RBD_MIRROR_IMAGE_REPLAYER_H
diff --git a/src/tools/rbd_mirror/Mirror.cc b/src/tools/rbd_mirror/Mirror.cc
new file mode 100644
index 0000000..67111b0
--- /dev/null
+++ b/src/tools/rbd_mirror/Mirror.cc
@@ -0,0 +1,101 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include <boost/range/adaptor/map.hpp>
+
+#include "common/debug.h"
+#include "common/errno.h"
+#include "Mirror.h"
+
+#define dout_subsys ceph_subsys_rbd_mirror
+#undef dout_prefix
+#define dout_prefix *_dout << "rbd-mirror: "
+
+using std::chrono::seconds;
+using std::list;
+using std::map;
+using std::set;
+using std::string;
+using std::unique_ptr;
+using std::vector;
+
+using librados::Rados;
+using librados::IoCtx;
+using librbd::mirror_peer_t;
+
+namespace rbd {
+namespace mirror {
+
+Mirror::Mirror(CephContext *cct) :
+  m_cct(cct),
+  m_lock("rbd::mirror::Mirror"),
+  m_local(new librados::Rados())
+{
+}
+
+void Mirror::handle_signal(int signum)
+{
+  m_stopping.set(1);
+}
+
+int Mirror::init()
+{
+  int r = m_local->init_with_context(m_cct);
+  if (r < 0) {
+    derr << "could not initialize rados handle" << dendl;
+    return r;
+  }
+
+  r = m_local->connect();
+  if (r < 0) {
+    derr << "error connecting to local cluster" << dendl;
+    return r;
+  }
+
+  // TODO: make interval configurable
+  m_local_cluster_watcher.reset(new ClusterWatcher(m_local, m_lock));
+
+  return r;
+}
+
+void Mirror::run()
+{
+  while (!m_stopping.read()) {
+    m_local_cluster_watcher->refresh_pools();
+    Mutex::Locker l(m_lock);
+    update_replayers(m_local_cluster_watcher->get_peer_configs());
+    // TODO: make interval configurable
+    m_cond.WaitInterval(g_ceph_context, m_lock, seconds(30));
+  }
+}
+
+void Mirror::update_replayers(const map<peer_t, set<int64_t> > &peer_configs)
+{
+  assert(m_lock.is_locked());
+  set<peer_t> peers;
+  for (auto &kv : peer_configs) {
+    const peer_t &peer = kv.first;
+    if (m_replayers.find(peer) == m_replayers.end()) {
+      unique_ptr<Replayer> replayer(new Replayer(m_local, peer));
+      // TODO: make async, and retry connecting within replayer
+      int r = replayer->init();
+      if (r < 0) {
+	continue;
+      }
+      m_replayers.insert(std::make_pair(peer, std::move(replayer)));
+    }
+  }
+
+  // TODO: make async
+  for (auto it = m_replayers.begin(); it != m_replayers.end();) {
+    peer_t peer = it->first;
+    if (peers.find(peer) == peers.end()) {
+      m_replayers.erase(it++);
+    } else {
+      ++it;
+    }
+  }
+}
+
+} // namespace mirror
+} // namespace rbd
diff --git a/src/tools/rbd_mirror/Mirror.h b/src/tools/rbd_mirror/Mirror.h
new file mode 100644
index 0000000..cafbdd8
--- /dev/null
+++ b/src/tools/rbd_mirror/Mirror.h
@@ -0,0 +1,56 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_RBD_MIRROR_H
+#define CEPH_RBD_MIRROR_H
+
+#include <map>
+#include <memory>
+#include <set>
+
+#include "common/ceph_context.h"
+#include "common/Mutex.h"
+#include "include/atomic.h"
+#include "include/rados/librados.hpp"
+#include "ClusterWatcher.h"
+#include "Replayer.h"
+#include "types.h"
+
+namespace rbd {
+namespace mirror {
+
+/**
+ * Contains the main loop and overall state for rbd-mirror.
+ *
+ * Sets up mirroring, and coordinates between noticing config
+ * changes and applying them.
+ */
+class Mirror {
+public:
+  Mirror(CephContext *cct);
+  Mirror(const Mirror&) = delete;
+  Mirror& operator=(const Mirror&) = delete;
+
+  int init();
+  void run();
+  void handle_signal(int signum);
+
+private:
+  void refresh_peers(const set<peer_t> &peers);
+  void update_replayers(const map<peer_t, set<int64_t> > &peer_configs);
+
+  CephContext *m_cct;
+  Mutex m_lock;
+  Cond m_cond;
+  RadosRef m_local;
+
+  // monitor local cluster for config changes in peers
+  std::unique_ptr<ClusterWatcher> m_local_cluster_watcher;
+  std::map<peer_t, std::unique_ptr<Replayer> > m_replayers;
+  atomic_t m_stopping;
+};
+
+} // namespace mirror
+} // namespace rbd
+
+#endif // CEPH_RBD_MIRROR_H
diff --git a/src/tools/rbd_mirror/PoolWatcher.cc b/src/tools/rbd_mirror/PoolWatcher.cc
new file mode 100644
index 0000000..480df57
--- /dev/null
+++ b/src/tools/rbd_mirror/PoolWatcher.cc
@@ -0,0 +1,152 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include <boost/bind.hpp>
+
+#include "common/debug.h"
+#include "common/errno.h"
+
+#include "cls/rbd/cls_rbd_client.h"
+#include "include/rbd_types.h"
+#include "librbd/internal.h"
+
+#include "PoolWatcher.h"
+
+#define dout_subsys ceph_subsys_rbd_mirror
+#undef dout_prefix
+#define dout_prefix *_dout << "rbd-mirror: "
+
+using std::list;
+using std::map;
+using std::set;
+using std::string;
+using std::unique_ptr;
+using std::vector;
+
+using librados::Rados;
+using librados::IoCtx;
+using librbd::cls_client::dir_list;
+
+namespace rbd {
+namespace mirror {
+
+PoolWatcher::PoolWatcher(RadosRef cluster, double interval_seconds,
+			 Mutex &lock, Cond &cond) :
+  m_lock(lock),
+  m_refresh_cond(cond),
+  m_stopping(false),
+  m_cluster(cluster),
+  m_timer(g_ceph_context, m_lock),
+  m_interval(interval_seconds)
+{
+  m_timer.init();
+}
+
+PoolWatcher::~PoolWatcher()
+{
+  Mutex::Locker l(m_lock);
+  m_stopping = true;
+  m_timer.shutdown();
+}
+
+const map<int64_t, set<string> >& PoolWatcher::get_images() const
+{
+  assert(m_lock.is_locked());
+  return m_images;
+}
+
+void PoolWatcher::refresh_images(bool reschedule)
+{
+  dout(20) << __func__ << dendl;
+  map<int64_t, set<string> > images;
+  list<pair<int64_t, string> > pools;
+  int r = m_cluster->pool_list2(pools);
+  if (r < 0) {
+    derr << "error listing pools: " << cpp_strerror(r) << dendl;
+    return;
+  }
+
+  for (auto kv : pools) {
+    int64_t pool_id = kv.first;
+    string pool_name = kv.second;
+    int64_t base_tier;
+    r = m_cluster->pool_get_base_tier(pool_id, &base_tier);
+    if (r == -ENOENT) {
+      dout(10) << "pool " << pool_name << " no longer exists" << dendl;
+      continue;
+    } else if (r < 0) {
+      derr << "Error retrieving base tier for pool " << pool_name << dendl;
+      continue;
+    }
+    if (pool_id != base_tier) {
+      // pool is a cache; skip it
+      continue;
+    }
+
+    IoCtx ioctx;
+    r = m_cluster->ioctx_create2(pool_id, ioctx);
+    if (r == -ENOENT) {
+      dout(10) << "pool " << pool_name << " no longer exists" << dendl;
+      continue;
+    } else if (r < 0) {
+      derr << "Error accessing pool " << pool_name << cpp_strerror(r) << dendl;
+      continue;
+    }
+
+    // TODO: read mirrored images from mirroring settings object. For
+    // now just treat all images in a pool with mirroring enabled as mirrored
+    bool enabled;
+    r = librbd::mirror_is_enabled(ioctx, &enabled);
+    if (r < 0) {
+      derr << "could not tell whether mirroring was enabled for " << pool_name
+	   << " : " << cpp_strerror(r) << dendl;
+      continue;
+    }
+    if (!enabled) {
+      dout(20) << "pool " << pool_name << " has mirroring disabled" << dendl;
+      continue;
+    }
+
+    set<string> image_ids;
+
+    // only format 2 images can be mirrored, so only check the format
+    // 2 rbd_directory structure
+    int max_read = 1024;
+    string last_read = "";
+    do {
+      map<string, string> pool_images;
+      r = dir_list(&ioctx, RBD_DIRECTORY,
+		   last_read, max_read, &pool_images);
+      if (r < 0) {
+        derr << "error listing images in pool " << pool_name << ": "
+	     << cpp_strerror(r) << dendl;
+        continue;
+      }
+      for (auto& pair : pool_images) {
+	image_ids.insert(pair.second);
+      }
+      if (!pool_images.empty()) {
+	last_read = pool_images.rbegin()->first;
+      }
+      r = pool_images.size();
+    } while (r == max_read);
+
+    if (r > 0) {
+      images[pool_id] = std::move(image_ids);
+    }
+  }
+
+  Mutex::Locker l(m_lock);
+  m_images = std::move(images);
+  if (!m_stopping && reschedule) {
+    FunctionContext *ctx = new FunctionContext(
+      boost::bind(&PoolWatcher::refresh_images, this, true));
+    m_timer.add_event_after(m_interval, ctx);
+  }
+  m_refresh_cond.Signal();
+  // TODO: perhaps use a workqueue instead, once we get notifications
+  // about new/removed mirrored images
+}
+
+} // namespace mirror
+} // namespace rbd
diff --git a/src/tools/rbd_mirror/PoolWatcher.h b/src/tools/rbd_mirror/PoolWatcher.h
new file mode 100644
index 0000000..1358539
--- /dev/null
+++ b/src/tools/rbd_mirror/PoolWatcher.h
@@ -0,0 +1,50 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_RBD_MIRROR_POOL_WATCHER_H
+#define CEPH_RBD_MIRROR_POOL_WATCHER_H
+
+#include <map>
+#include <memory>
+#include <set>
+#include <string>
+
+#include "common/ceph_context.h"
+#include "common/Mutex.h"
+#include "common/Timer.h"
+#include "include/rados/librados.hpp"
+#include "types.h"
+
+namespace rbd {
+namespace mirror {
+
+/**
+ * Keeps track of images that have mirroring enabled within all
+ * pools.
+ */
+class PoolWatcher {
+public:
+  PoolWatcher(RadosRef cluster, double interval_seconds,
+	      Mutex &lock, Cond &cond);
+  ~PoolWatcher();
+  PoolWatcher(const PoolWatcher&) = delete;
+  PoolWatcher& operator=(const PoolWatcher&) = delete;
+  const std::map<int64_t, std::set<std::string> >& get_images() const;
+  void refresh_images(bool reschedule=true);
+
+private:
+  Mutex &m_lock;
+  Cond &m_refresh_cond;
+  bool m_stopping;
+
+  RadosRef m_cluster;
+  SafeTimer m_timer;
+  double m_interval;
+  // pool id -> image id
+  std::map<int64_t, std::set<std::string> > m_images;
+};
+
+} // namespace mirror
+} // namespace rbd
+
+#endif // CEPH_RBD_MIRROR_POOL_WATCHER_H
diff --git a/src/tools/rbd_mirror/Replayer.cc b/src/tools/rbd_mirror/Replayer.cc
new file mode 100644
index 0000000..e787187
--- /dev/null
+++ b/src/tools/rbd_mirror/Replayer.cc
@@ -0,0 +1,146 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include <boost/bind.hpp>
+
+#include "common/debug.h"
+#include "common/errno.h"
+#include "include/stringify.h"
+#include "Replayer.h"
+
+#define dout_subsys ceph_subsys_rbd_mirror
+#undef dout_prefix
+#define dout_prefix *_dout << "rbd-mirror: "
+
+using std::chrono::seconds;
+using std::map;
+using std::string;
+using std::unique_ptr;
+using std::vector;
+
+namespace rbd {
+namespace mirror {
+
+Replayer::Replayer(RadosRef local_cluster, const peer_t &peer) :
+  m_lock(stringify("rbd::mirror::Replayer ") + stringify(peer)),
+  m_peer(peer),
+  m_local(local_cluster),
+  m_remote(new librados::Rados),
+  m_replayer_thread(this)
+{
+}
+
+Replayer::~Replayer()
+{
+  m_stopping.set(1);
+  {
+    Mutex::Locker l(m_lock);
+    m_cond.Signal();
+  }
+  m_replayer_thread.join();
+}
+
+int Replayer::init()
+{
+  dout(20) << __func__ << "Replaying for " << m_peer << dendl;
+
+  int r = m_remote->init2(m_peer.client_name.c_str(),
+			  m_peer.cluster_name.c_str(), 0);
+  if (r < 0) {
+    derr << "error initializing remote cluster handle for " << m_peer
+	 << " : " << cpp_strerror(r) << dendl;
+    return r;
+  }
+
+  r = m_remote->conf_read_file(nullptr);
+  if (r < 0) {
+    derr << "could not read ceph conf for " << m_peer
+	 << " : " << cpp_strerror(r) << dendl;
+    return r;
+  }
+
+  r = m_remote->connect();
+  if (r < 0) {
+    derr << "error connecting to remote cluster " << m_peer
+	 << " : " << cpp_strerror(r) << dendl;
+    return r;
+  }
+
+  string cluster_uuid;
+  r = m_remote->cluster_fsid(&cluster_uuid);
+  if (r < 0) {
+    derr << "error reading cluster uuid from remote cluster " << m_peer
+	 << " : " << cpp_strerror(r) << dendl;
+    return r;
+  }
+
+  if (cluster_uuid != m_peer.cluster_uuid) {
+    derr << "configured cluster uuid does not match actual cluster uuid. "
+	 << "expected: " << m_peer.cluster_uuid
+	 << " observed: " << cluster_uuid << dendl;
+    return -EINVAL;
+  }
+
+  dout(20) << __func__ << "connected to " << m_peer << dendl;
+
+  // TODO: make interval configurable
+  m_pool_watcher.reset(new PoolWatcher(m_remote, 30, m_lock, m_cond));
+  m_pool_watcher->refresh_images();
+
+  return 0;
+}
+
+void Replayer::run()
+{
+  while (!m_stopping.read()) {
+    Mutex::Locker l(m_lock);
+    set_sources(m_pool_watcher->get_images());
+    m_cond.WaitInterval(g_ceph_context, m_lock, seconds(30));
+  }
+}
+
+void Replayer::set_sources(const map<int64_t, set<string> > &images)
+{
+  assert(m_lock.is_locked());
+  // TODO: make stopping and starting ImageReplayers async
+  for (auto it = m_images.begin(); it != m_images.end();) {
+    int64_t pool_id = it->first;
+    auto &pool_images = it->second;
+    if (images.find(pool_id) == images.end()) {
+      m_images.erase(it++);
+      continue;
+    }
+    for (auto images_it = pool_images.begin();
+	 images_it != pool_images.end();) {
+      if (images.at(pool_id).find(images_it->first) ==
+	  images.at(pool_id).end()) {
+	pool_images.erase(images_it++);
+      } else {
+	++images_it;
+      }
+    }
+    ++it;
+  }
+
+  for (const auto &kv : images) {
+    int64_t pool_id = kv.first;
+    // create entry for pool if it doesn't exist
+    auto &pool_replayers = m_images[pool_id];
+    for (const auto &image_id : kv.second) {
+      if (pool_replayers.find(image_id) == pool_replayers.end()) {
+	unique_ptr<ImageReplayer> image_replayer(new ImageReplayer(m_local,
+								   m_remote,
+								   pool_id,
+								   image_id));
+	int r = image_replayer->start();
+	if (r < 0) {
+	  continue;
+	}
+	pool_replayers.insert(std::make_pair(image_id, std::move(image_replayer)));
+      }
+    }
+  }
+}
+
+} // namespace mirror
+} // namespace rbd
diff --git a/src/tools/rbd_mirror/Replayer.h b/src/tools/rbd_mirror/Replayer.h
new file mode 100644
index 0000000..ca4d3e7
--- /dev/null
+++ b/src/tools/rbd_mirror/Replayer.h
@@ -0,0 +1,69 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_RBD_MIRROR_REPLAYER_H
+#define CEPH_RBD_MIRROR_REPLAYER_H
+
+#include <map>
+#include <memory>
+#include <set>
+#include <string>
+
+#include "common/Cond.h"
+#include "common/Mutex.h"
+#include "common/WorkQueue.h"
+#include "include/atomic.h"
+#include "include/rados/librados.hpp"
+
+#include "ClusterWatcher.h"
+#include "ImageReplayer.h"
+#include "PoolWatcher.h"
+#include "types.h"
+
+namespace rbd {
+namespace mirror {
+
+/**
+ * Controls mirroring for a single remote cluster.
+ */
+class Replayer {
+public:
+  Replayer(RadosRef local_cluster, const peer_t &peer);
+  ~Replayer();
+  Replayer(const Replayer&) = delete;
+  Replayer& operator=(const Replayer&) = delete;
+
+  int init();
+  void run();
+  void shutdown();
+
+private:
+  void set_sources(const std::map<int64_t, std::set<std::string> > &images);
+
+  Mutex m_lock;
+  Cond m_cond;
+  atomic_t m_stopping;
+
+  peer_t m_peer;
+  RadosRef m_local, m_remote;
+  std::unique_ptr<PoolWatcher> m_pool_watcher;
+  // index by pool so it's easy to tell what is affected
+  // when a pool's configuration changes
+  std::map<int64_t, std::map<std::string,
+			     std::unique_ptr<ImageReplayer> > > m_images;
+
+  class ReplayerThread : public Thread {
+    Replayer *m_replayer;
+  public:
+    ReplayerThread(Replayer *replayer) : m_replayer(replayer) {}
+    void *entry() {
+      m_replayer->run();
+      return 0;
+    }
+  } m_replayer_thread;
+};
+
+} // namespace mirror
+} // namespace rbd
+
+#endif // CEPH_RBD_MIRROR_REPLAYER_H
diff --git a/src/tools/rbd_mirror/main.cc b/src/tools/rbd_mirror/main.cc
new file mode 100644
index 0000000..1236bf2
--- /dev/null
+++ b/src/tools/rbd_mirror/main.cc
@@ -0,0 +1,80 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "common/ceph_argparse.h"
+#include "common/config.h"
+#include "common/debug.h"
+#include "common/errno.h"
+#include "global/global_init.h"
+#include "global/signal_handler.h"
+#include "Mirror.h"
+
+#include <vector>
+
+rbd::mirror::Mirror *mirror = nullptr;
+
+void usage() {
+  std::cout << "usage: rbd-mirror [options...]" << std::endl;
+  std::cout << "options:\n";
+  std::cout << "  -m monaddress[:port]      connect to specified monitor\n";
+  std::cout << "  --keyring=<path>          path to keyring for local cluster\n";
+  std::cout << "  --log-file=<logfile>       file to log debug output\n";
+  std::cout << "  --debug-rbd-mirror=<log-level>/<memory-level>  set rbd-mirror debug level\n";
+  generic_server_usage();
+}
+
+static void handle_signal(int signum)
+{
+  if (mirror)
+    mirror->handle_signal(signum);
+}
+
+int main(int argc, const char **argv)
+{
+  std::vector<const char*> args;
+  argv_to_vec(argc, argv, args);
+  env_to_vec(args);
+
+  global_init(nullptr, args, CEPH_ENTITY_TYPE_CLIENT,
+	      CODE_ENVIRONMENT_DAEMON,
+	      CINIT_FLAG_UNPRIVILEGED_DAEMON_DEFAULTS);
+
+  for (auto i = args.begin(); i != args.end(); ++i) {
+    if (ceph_argparse_flag(args, i, "-h", "--help", (char*)NULL)) {
+      usage();
+      return EXIT_SUCCESS;
+    }
+  }
+
+  if (g_conf->daemonize) {
+    global_init_daemonize(g_ceph_context);
+  }
+  g_ceph_context->enable_perf_counter();
+
+  common_init_finish(g_ceph_context);
+
+  init_async_signal_handler();
+  register_async_signal_handler(SIGHUP, sighup_handler);
+  register_async_signal_handler_oneshot(SIGINT, handle_signal);
+  register_async_signal_handler_oneshot(SIGTERM, handle_signal);
+
+  mirror = new rbd::mirror::Mirror(g_ceph_context);
+  int r = mirror->init();
+  if (r < 0) {
+    std::cerr << "failed to initialize: " << cpp_strerror(r) << std::endl;
+    goto cleanup;
+  }
+
+  mirror->run();
+
+ cleanup:
+  unregister_async_signal_handler(SIGHUP, sighup_handler);
+  unregister_async_signal_handler(SIGINT, handle_signal);
+  unregister_async_signal_handler(SIGTERM, handle_signal);
+  shutdown_async_signal_handler();
+
+  delete mirror;
+  g_ceph_context->put();
+
+  return r < 0 ? EXIT_SUCCESS : EXIT_FAILURE;
+}
diff --git a/src/tools/rbd_mirror/types.cc b/src/tools/rbd_mirror/types.cc
new file mode 100644
index 0000000..88ad3b2
--- /dev/null
+++ b/src/tools/rbd_mirror/types.cc
@@ -0,0 +1,11 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "types.h"
+
+std::ostream& operator<<(std::ostream& lhs, const rbd::mirror::peer_t &peer)
+{
+  return lhs << "name: " << peer.cluster_name
+	     << " uuid: " << peer.cluster_uuid
+	     << " client: " << peer.client_name;
+}
diff --git a/src/tools/rbd_mirror/types.h b/src/tools/rbd_mirror/types.h
new file mode 100644
index 0000000..bdfc23b
--- /dev/null
+++ b/src/tools/rbd_mirror/types.h
@@ -0,0 +1,53 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_RBD_MIRROR_TYPES_H
+#define CEPH_RBD_MIRROR_TYPES_H
+
+#include <iostream>
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "include/rbd/librbd.hpp"
+
+namespace rbd {
+namespace mirror {
+
+typedef shared_ptr<librados::Rados> RadosRef;
+typedef shared_ptr<librados::IoCtx> IoCtxRef;
+typedef shared_ptr<librbd::Image> ImageRef;
+
+struct peer_t {
+  peer_t() = default;
+  peer_t(const std::string &uuid, const std::string &cluster_name,
+	 const std::string &client_name)
+    : cluster_uuid(uuid), cluster_name(cluster_name), client_name(client_name)
+  {
+  }
+  peer_t(const librbd::mirror_peer_t &peer) :
+    cluster_uuid(peer.cluster_uuid),
+    cluster_name(peer.cluster_name),
+    client_name(peer.client_name)
+  {
+  }
+  std::string cluster_uuid;
+  std::string cluster_name;
+  std::string client_name;
+  bool operator<(const peer_t &rhs) const {
+    return this->cluster_uuid < rhs.cluster_uuid;
+  }
+  bool operator()(const peer_t &lhs, const peer_t &rhs) const {
+    return lhs.cluster_uuid < rhs.cluster_uuid;
+  }
+  bool operator==(const peer_t &rhs) const {
+    return cluster_uuid == rhs.cluster_uuid;
+  }
+};
+
+} // namespace mirror
+} // namespace rbd
+
+std::ostream& operator<<(std::ostream& lhs, const rbd::mirror::peer_t &peer);
+
+#endif // CEPH_RBD_MIRROR_TYPES_H
diff --git a/src/tools/rbd_nbd/rbd-nbd.cc b/src/tools/rbd_nbd/rbd-nbd.cc
index 41fcfc2..e0c0284 100644
--- a/src/tools/rbd_nbd/rbd-nbd.cc
+++ b/src/tools/rbd_nbd/rbd-nbd.cc
@@ -751,13 +751,22 @@ static int rbd_nbd(int argc, const char *argv[])
               CINIT_FLAG_UNPRIVILEGED_DAEMON_DEFAULTS);
 
   std::vector<const char*>::iterator i;
+  std::ostringstream err;
 
   for (i = args.begin(); i != args.end(); ) {
     if (ceph_argparse_flag(args, i, "-h", "--help", (char*)NULL)) {
       usage();
       return 0;
     } else if (ceph_argparse_witharg(args, i, &devpath, "--device", (char *)NULL)) {
-    } else if (ceph_argparse_witharg(args, i, &nbds_max, cerr, "--nbds_max", (char *)NULL)) {
+    } else if (ceph_argparse_witharg(args, i, &nbds_max, err, "--nbds_max", (char *)NULL)) {
+      if (!err.str().empty()) {
+        cerr << err.str() << std::endl;
+        return EXIT_FAILURE;
+      }
+      if (nbds_max < 0) {
+        cerr << "rbd-nbd: Invalid argument for nbds_max!" << std::endl;
+        return EXIT_FAILURE;
+      }
     } else if (ceph_argparse_flag(args, i, "--read-only", (char *)NULL)) {
       readonly = true;
     } else {
diff --git a/src/ceph-detect-init/run-tox.sh b/src/tools/setup-virtualenv.sh
similarity index 57%
copy from src/ceph-detect-init/run-tox.sh
copy to src/tools/setup-virtualenv.sh
index 206938e..f1c3f9a 100755
--- a/src/ceph-detect-init/run-tox.sh
+++ b/src/tools/setup-virtualenv.sh
@@ -1,9 +1,7 @@
 #!/bin/bash
 #
-# Copyright (C) 2015 SUSE LINUX GmbH
-# Copyright (C) 2015 <contact at redhat.com>
+# Copyright (C) 2016 <contact at redhat.com>
 #
-# Author: Owen Synge <osynge at suse.com>
 # Author: Loic Dachary <loic at dachary.org>
 #
 # This program is free software; you can redistribute it and/or modify
@@ -17,20 +15,17 @@
 # GNU Library Public License for more details.
 #
 
-# run from the ceph-detect-init directory or from its parent
-test -d ceph-detect-init && cd ceph-detect-init
-trap "rm -fr make-check" EXIT
-virtualenv make-check
-. make-check/bin/activate
+rm -fr virtualenv
+virtualenv virtualenv
+. virtualenv/bin/activate
 # older versions of pip will not install wrap_console scripts
 # when using wheel packages
-pip --log make-check/log.txt install --upgrade 'pip >= 6.1'
+pip --log virtualenv/log.txt install --upgrade 'pip >= 6.1'
 if test -d wheelhouse ; then
     export NO_INDEX=--no-index
 fi
-pip --log make-check/log.txt install $NO_INDEX --use-wheel --find-links=file://$(pwd)/wheelhouse --upgrade distribute
-pip --log make-check/log.txt install $NO_INDEX --use-wheel --find-links=file://$(pwd)/wheelhouse 'tox >=1.9' 
-tox > make-check/tox.out 2>&1 
-status=$?
-grep -v InterpreterNotFound < make-check/tox.out
-exit $status
+pip --log virtualenv/log.txt install $NO_INDEX --use-wheel --find-links=file://$(pwd)/wheelhouse --upgrade distribute
+pip --log virtualenv/log.txt install $NO_INDEX --use-wheel --find-links=file://$(pwd)/wheelhouse 'tox >=1.9' 
+if test -f requirements.txt ; then
+    pip --log virtualenv/log.txt install $NO_INDEX --use-wheel --find-links=file://$(pwd)/wheelhouse -r requirements.txt
+fi
diff --git a/src/tracing/Makefile.am b/src/tracing/Makefile.am
index 5c6a4e2..1c2349e 100644
--- a/src/tracing/Makefile.am
+++ b/src/tracing/Makefile.am
@@ -24,7 +24,7 @@ nodist_libosd_tp_la_SOURCES = \
 	tracing/osd.h \
 	tracing/pg.h
 endif
-libosd_tp_la_LIBADD = -llttng-ust -ldl
+libosd_tp_la_LIBADD = -ldl -llttng-ust
 libosd_tp_la_CFLAGS = -I$(top_srcdir)/src/tracing -I$(top_srcdir)/src $(AM_CFLAGS) -fpic
 libosd_tp_la_LDFLAGS = -version-info 1:0:0
 
@@ -34,7 +34,7 @@ librados_tp_la_SOURCES = \
 nodist_librados_tp_la_SOURCES = \
 	tracing/librados.h
 endif
-librados_tp_la_LIBADD = -llttng-ust -ldl
+librados_tp_la_LIBADD = -ldl -llttng-ust
 librados_tp_la_CFLAGS = -I$(top_srcdir)/src/tracing -I$(top_srcdir)/src $(AM_CFLAGS) -fpic
 librados_tp_la_LDFLAGS = -version-info 2:0:0
 
@@ -44,7 +44,7 @@ librbd_tp_la_SOURCES = \
 nodist_librbd_tp_la_SOURCES = \
 	tracing/librbd.h
 endif
-librbd_tp_la_LIBADD = -llttng-ust -ldl
+librbd_tp_la_LIBADD = -ldl -llttng-ust
 librbd_tp_la_CFLAGS = -I$(top_srcdir)/src/tracing -I$(top_srcdir)/src $(AM_CFLAGS) -fpic
 librbd_tp_la_LDFLAGS = -version-info 1:0:0
 
@@ -54,7 +54,7 @@ libos_tp_la_SOURCES = \
 nodist_libos_tp_la_SOURCES = \
 	tracing/objectstore.h
 endif
-libos_tp_la_LIBADD = -llttng-ust -ldl
+libos_tp_la_LIBADD = -ldl -llttng-ust
 libos_tp_la_CFLAGS = -I$(top_srcdir)/src/tracing -I$(top_srcdir)/src $(AM_CFLAGS) -fpic
 libos_tp_la_LDFLAGS = -version-info 1:0:0
 
diff --git a/src/tracing/librbd.tp b/src/tracing/librbd.tp
index 58cbc13..f91e4e3 100644
--- a/src/tracing/librbd.tp
+++ b/src/tracing/librbd.tp
@@ -172,6 +172,32 @@ TRACEPOINT_EVENT(librbd, open_image_exit,
     )
 )
 
+TRACEPOINT_EVENT(librbd, aio_open_image_enter,
+    TP_ARGS(
+        void*, imagectx,
+        const char*, name,
+        const char*, id,
+        const char*, snap_name,
+        int, read_only,
+        const void*, completion),
+    TP_FIELDS(
+        ctf_integer_hex(void*, imagectx, imagectx)
+        ctf_string(name, name)
+        ctf_string(id, id)
+        ctf_string(snap_name, snap_name)
+        ctf_integer(uint8_t, read_only, read_only ? 1 : 0)
+        ctf_integer_hex(const void*, completion, completion)
+    )
+)
+
+TRACEPOINT_EVENT(librbd, aio_open_image_exit,
+    TP_ARGS(
+        int, retval),
+    TP_FIELDS(
+        ctf_integer(int, retval, retval)
+    )
+)
+
 TRACEPOINT_EVENT(librbd, close_image_enter,
     TP_ARGS(
         void*, imagectx,
@@ -191,6 +217,27 @@ TRACEPOINT_EVENT(librbd, close_image_exit,
 	ctf_integer(int, retval, retval))
 )
 
+TRACEPOINT_EVENT(librbd, aio_close_image_enter,
+    TP_ARGS(
+        void*, imagectx,
+        const char*, name,
+        const char*, id,
+        const void*, completion),
+    TP_FIELDS(
+        ctf_integer_hex(void*, imagectx, imagectx)
+        ctf_string(name, name)
+        ctf_string(id, id)
+        ctf_integer_hex(const void*, completion, completion)
+    )
+)
+
+TRACEPOINT_EVENT(librbd, aio_close_image_exit,
+    TP_ARGS(
+	int, retval),
+    TP_FIELDS(
+	ctf_integer(int, retval, retval))
+)
+
 TRACEPOINT_EVENT(librbd, list_enter,
     TP_ARGS(
         const char*, pool_name,
diff --git a/src/upstart/ceph-osd.conf b/src/upstart/ceph-osd.conf
index 02ca238..60b7b82 100644
--- a/src/upstart/ceph-osd.conf
+++ b/src/upstart/ceph-osd.conf
@@ -15,11 +15,15 @@ pre-start script
 
     install -d -m0770 -o ceph -g ceph /var/run/ceph
 
-    /usr/libexec/ceph/ceph-osd-prestart.sh --cluster="${cluster:-ceph}" -i "$id"
+    /usr/lib/ceph/ceph-osd-prestart.sh --cluster="${cluster:-ceph}" -i "$id"
 end script
 
 instance ${cluster:-ceph}/$id
 export cluster
 export id
 
-exec /usr/bin/ceph-osd --cluster="${cluster:-ceph}" -i "$id" -f --setuser ceph --setgroup ceph
+script
+    test -f /etc/default/ceph && . /etc/default/ceph
+    export TCMALLOC_MAX_TOTAL_THREAD_CACHE_BYTES
+    exec /usr/bin/ceph-osd --cluster="${cluster:-ceph}" -i "$id" -f --setuser ceph --setgroup ceph
+end script
diff --git a/src/vstart.sh b/src/vstart.sh
index e515479..cccab88 100755
--- a/src/vstart.sh
+++ b/src/vstart.sh
@@ -106,7 +106,6 @@ cephx=1 #turn cephx on by default
 cache=""
 memstore=0
 bluestore=0
-journal=1
 
 MON_ADDR=""
 
@@ -133,11 +132,13 @@ usage=$usage"\t-X disable cephx\n"
 usage=$usage"\t--hitset <pool> <hit_set_type>: enable hitset tracking\n"
 usage=$usage"\t-e : create an erasure pool\n";
 usage=$usage"\t-o config\t\t add extra config parameters to all sections\n"
-usage=$usage"\t-J no journal\t\tdisable filestore journal\n"
 usage=$usage"\t--mon_num specify ceph monitor count\n"
 usage=$usage"\t--osd_num specify ceph osd count\n"
 usage=$usage"\t--mds_num specify ceph mds count\n"
 usage=$usage"\t--rgw_port specify ceph rgw http listen port\n"
+usage=$usage"\t--bluestore use bluestore as the osd objectstore backend\n"
+usage=$usage"\t--memstore use memstore as the osd objectstore backend\n"
+usage=$usage"\t--cache <pool>: enable cache tiering on pool\n"
 
 usage_exit() {
 	printf "$usage"
@@ -235,9 +236,6 @@ case $1 in
     -X )
 	    cephx=0
 	    ;;
-    -J )
-	    journal=0
-	    ;;
     -k )
 	    overwrite_conf=0
 	    ;;
@@ -350,10 +348,7 @@ if [ "$memstore" -eq 1 ]; then
 fi
 if [ "$bluestore" -eq 1 ]; then
     COSDMEMSTORE='
-	osd objectstore = bluestore
-	bluestore fsck on mount = true
-	bluestore block db size = 67108864
-	bluestore block wal size = 134217728'
+	osd objectstore = bluestore'
 fi
 
 # lockdep everywhere?
@@ -454,6 +449,7 @@ if [ "$start_mon" -eq 1 ]; then
         filestore fd cache size = 32
         run dir = $CEPH_OUT_DIR
         enable experimental unrecoverable data corrupting features = *
+        lockdep = true
 EOF
 if [ "$cephx" -eq 1 ] ; then
 cat <<EOF >> $conf_fn
@@ -466,11 +462,6 @@ cat <<EOF >> $conf_fn
 	auth client required = none
 EOF
 fi
-                        if [ $journal -eq 1 ]; then
-			    journal_path="$CEPH_DEV_DIR/osd\$id.journal"
-			else
-			    journal_path=""
-			fi
 			cat <<EOF >> $conf_fn
 
 [client]
@@ -491,7 +482,7 @@ $extra_conf
 [osd]
 $DAEMONOPTS
         osd data = $CEPH_DEV_DIR/osd\$id
-        osd journal = $journal_path
+        osd journal = $CEPH_DEV_DIR/osd\$id/journal
         osd journal size = 100
         osd class tmp = out
         osd class dir = $OBJCLASS_PATH
@@ -503,6 +494,12 @@ $DAEMONOPTS
         filestore wbthrottle btrfs ios start flusher = 10
         filestore wbthrottle btrfs ios hard limit = 20
         filestore wbthrottle btrfs inodes hard limit = 30
+	bluestore fsck on mount = true
+	bluestore block create = true
+	bluestore block db size = 67108864
+	bluestore block db create = true
+	bluestore block wal size = 134217728
+	bluestore block wal create = true
 $COSDDEBUG
 $COSDMEMSTORE
 $extra_conf
@@ -593,12 +590,12 @@ if [ "$start_osd" -eq 1 ]; then
 [osd.$osd]
         host = $HOSTNAME
 EOF
-		    rm -rf $CEPH_DEV_DIR/osd$osd || true
-		    for f in $CEPH_DEV_DIR/osd$osd/* ; do btrfs sub delete $f || true ; done || true
-		    mkdir -p $CEPH_DEV_DIR/osd$osd
-
 	    fi
 
+	    rm -rf $CEPH_DEV_DIR/osd$osd || true
+	    for f in $CEPH_DEV_DIR/osd$osd/* ; do btrfs sub delete $f || true ; done || true
+	    mkdir -p $CEPH_DEV_DIR/osd$osd
+
 	    uuid=`uuidgen`
 	    echo "add osd$osd $uuid"
 	    $SUDO $CEPH_ADM osd create $uuid
diff --git a/src/yasm-wrapper b/src/yasm-wrapper
index 3695192..9500d2f 100755
--- a/src/yasm-wrapper
+++ b/src/yasm-wrapper
@@ -12,7 +12,7 @@ while [ -n "$*" ]; do
 	    new="$new -f $1"
 	    shift
 	    ;;
-	-g* | -f* | -W* | -MD | -MP | -fPIC | -c | -D* | --param* | -O* | -m* | -pipe )
+	-g* | -f* | -W* | -MD | -MP | -fPIC | -c | -D* | --param* | -O* | -m* | -pipe | ggc-min* )
 	    shift
 	    ;;
 	-I )
@@ -41,4 +41,4 @@ yasm $new
 
 [ -n "$touch" ] && touch $touch
 
-true
\ No newline at end of file
+true
diff --git a/systemd/Makefile.am b/systemd/Makefile.am
index 02dee06..6d93c3c 100644
--- a/systemd/Makefile.am
+++ b/systemd/Makefile.am
@@ -19,5 +19,4 @@ unit_DATA = $(unitfiles)
 EXTRA_DIST = \
 	$(unitfiles) \
 	ceph \
-	ceph.tmpfiles.d \
-	ceph-radosgw-prestart.sh
+	ceph.tmpfiles.d
diff --git a/systemd/Makefile.in b/systemd/Makefile.in
index c15b42b..49ec69d 100644
--- a/systemd/Makefile.in
+++ b/systemd/Makefile.in
@@ -235,12 +235,14 @@ JDK_CPPFLAGS = @JDK_CPPFLAGS@
 KEYUTILS_LIB = @KEYUTILS_LIB@
 LD = @LD@
 LDFLAGS = @LDFLAGS@
-LIBEDIT_CFLAGS = @LIBEDIT_CFLAGS@
-LIBEDIT_LIBS = @LIBEDIT_LIBS@
+LIBDPDK_CFLAGS = @LIBDPDK_CFLAGS@
+LIBDPDK_LIBS = @LIBDPDK_LIBS@
 LIBFUSE_CFLAGS = @LIBFUSE_CFLAGS@
 LIBFUSE_LIBS = @LIBFUSE_LIBS@
 LIBJEMALLOC = @LIBJEMALLOC@
 LIBOBJS = @LIBOBJS@
+LIBPCIACCESS_CFLAGS = @LIBPCIACCESS_CFLAGS@
+LIBPCIACCESS_LIBS = @LIBPCIACCESS_LIBS@
 LIBROCKSDB_CFLAGS = @LIBROCKSDB_CFLAGS@
 LIBROCKSDB_LIBS = @LIBROCKSDB_LIBS@
 LIBS = @LIBS@
@@ -325,7 +327,6 @@ datarootdir = @datarootdir@
 docdir = @docdir@
 dvidir = @dvidir@
 exec_prefix = @exec_prefix@
-group_rgw = @group_rgw@
 host = @host@
 host_alias = @host_alias@
 host_cpu = @host_cpu@
@@ -356,7 +357,6 @@ sharedstatedir = @sharedstatedir@
 srcdir = @srcdir@
 subdirs = @subdirs@
 sysconfdir = @sysconfdir@
-systemd_libexec_dir = @systemd_libexec_dir@
 systemd_unit_dir = @systemd_unit_dir@
 target = @target@
 target_alias = @target_alias@
@@ -366,7 +366,6 @@ target_vendor = @target_vendor@
 top_build_prefix = @top_build_prefix@
 top_builddir = @top_builddir@
 top_srcdir = @top_srcdir@
-user_rgw = @user_rgw@
 unitfiles = \
 	ceph.target \
         ceph-osd.target \
@@ -386,8 +385,7 @@ unit_DATA = $(unitfiles)
 EXTRA_DIST = \
 	$(unitfiles) \
 	ceph \
-	ceph.tmpfiles.d \
-	ceph-radosgw-prestart.sh
+	ceph.tmpfiles.d
 
 all: all-am
 
diff --git a/systemd/ceph-mds at .service b/systemd/ceph-mds at .service
index 708f42c..e122580 100644
--- a/systemd/ceph-mds at .service
+++ b/systemd/ceph-mds at .service
@@ -11,6 +11,10 @@ EnvironmentFile=-/etc/sysconfig/ceph
 Environment=CLUSTER=ceph
 ExecStart=/usr/bin/ceph-mds -f --cluster ${CLUSTER} --id %i --setuser ceph --setgroup ceph
 ExecReload=/bin/kill -HUP $MAINPID
+PrivateDevices=yes
+ProtectHome=true
+ProtectSystem=full
+PrivateTmp=true
 
 [Install]
 WantedBy=ceph-mds.target
diff --git a/systemd/ceph-mon at .service b/systemd/ceph-mon at .service
index 03a9b6c..a8d427b 100644
--- a/systemd/ceph-mon at .service
+++ b/systemd/ceph-mon at .service
@@ -17,6 +17,10 @@ EnvironmentFile=-/etc/sysconfig/ceph
 Environment=CLUSTER=ceph
 ExecStart=/usr/bin/ceph-mon -f --cluster ${CLUSTER} --id %i --setuser ceph --setgroup ceph
 ExecReload=/bin/kill -HUP $MAINPID
+PrivateDevices=yes
+ProtectHome=true
+ProtectSystem=full
+PrivateTmp=true
 
 [Install]
 WantedBy=ceph-mon.target
diff --git a/systemd/ceph-osd at .service b/systemd/ceph-osd at .service
index 82dabdf..0d73afb 100644
--- a/systemd/ceph-osd at .service
+++ b/systemd/ceph-osd at .service
@@ -10,8 +10,11 @@ LimitNPROC=1048576
 EnvironmentFile=-/etc/sysconfig/ceph
 Environment=CLUSTER=ceph
 ExecStart=/usr/bin/ceph-osd -f --cluster ${CLUSTER} --id %i --setuser ceph --setgroup ceph
-ExecStartPre=/usr/libexec/ceph/ceph-osd-prestart.sh --cluster ${CLUSTER} --id %i --setuser ceph --setgroup ceph
+ExecStartPre=/usr/lib/ceph/ceph-osd-prestart.sh --cluster ${CLUSTER} --id %i --setuser ceph --setgroup ceph
 ExecReload=/bin/kill -HUP $MAINPID
+ProtectHome=true
+ProtectSystem=full
+PrivateTmp=true
 
 [Install]
 WantedBy=ceph-osd.target
diff --git a/systemd/ceph-radosgw-prestart.sh b/systemd/ceph-radosgw-prestart.sh
deleted file mode 100644
index be0a95b..0000000
--- a/systemd/ceph-radosgw-prestart.sh
+++ /dev/null
@@ -1,100 +0,0 @@
-#!/bin/bash
-
-eval set -- "$(getopt -o n: --long name:,cluster: -- $@)"
-
-while true ; do
-  case "$1" in
-    -n|--name) name=$2; shift 2 ;;
-    --cluster) cluster=$2; shift 2 ;;
-    --) shift ; break ;;
-  esac
-done
-
-CEPHCONF=`which ceph-conf`
-RADOSGW=`which radosgw`
-
-if [ -z "${CEPHCONF}"  ]; then
-  CEPHCONF=/usr/bin/${CEPHCONF}
-fi
-
-if [ ! -x "${CEPHCONF}" ]; then
-  echo "${CEPHCONF} could not start, it is not executable."
-  exit 1
-fi
-
-if [ -z "$RADOSGW"  ]; then
-  RADOSGW=/usr/bin/radosgw
-fi
-
-if [ ! -x "$RADOSGW" ]; then
-  echo "$RADOSGW could not start, it is not executable."
-  exit 1
-fi
-
-# prefix for radosgw instances in ceph.conf
-PREFIX='client.radosgw.'
-
-if [ -z "$name"  ]; then
-  echo "no name paramter"
-  exit 1
-fi
-
-if [ -z "$cluster"  ]; then
-  cluster="ceph"
-fi
-
-ceph_conf_file="/etc/ceph/${cluster}.conf"
-
-if [ ! -f "${ceph_conf_file}" ] ; then
-  echo "ceph config file not found: $ceph_conf_file"
-  exit 1
-fi
-
-longname=${PREFIX}${name}
-testname=$(${CEPHCONF} -c ${ceph_conf_file} --list-sections $PREFIX | grep $longname )
-
-if [ -z "$testname"  ]; then
-  echo "error parsing '$name' : valid types are: $(echo $(${CEPHCONF} -c ${ceph_conf_file} --list-sections $PREFIX | sed s/$PREFIX//))"
-  exit 1
-fi
-
-auto_start=`${CEPHCONF} -c ${ceph_conf_file} -n $longname 'auto start'`
-if [ "$auto_start" = "no" ] || [ "$auto_start" = "false" ] || [ "$auto_start" = "0" ]; then
-  echo "ceph.conf:[$longname], says not to start."
-  exit 1
-fi
-
-# is the socket defined?  if it's not, this instance shouldn't run as a daemon.
-rgw_socket=`$RADOSGW -c ${ceph_conf_file} -n $longname --show-config-value rgw_socket_path`
-if [ -z "$rgw_socket" ]; then
-  echo "socket $rgw_socket could not be found in ceph.conf:[$longname], not starting."
-  exit 1
-fi
-
-# mapped to this host?
-host=`${CEPHCONF} -c ${ceph_conf_file} -n $longname host`
-hostname=`hostname -s`
-if [ "$host" != "$hostname" ]; then
-  echo "hostname $hostname could not be found in ceph.conf:[$longname], not starting."
-  exit 1
-fi
-
-user=`${CEPHCONF} -c ${ceph_conf_file} -n $longname user`
-if [ -n "$user" ]; then
-  if [ "$USER" != "$user" ]; then
-    echo "enviroment \$USER '$USER' does not match '$longname' user '$user'"
-    exit 1
-  fi
-fi
-
-
-log_file=`$RADOSGW -c ${ceph_conf_file} -n $longname --show-config-value log_file`
-if [ -n "$log_file" ]; then
-  if [ ! -f "$log_file" ]; then
-    touch "$log_file"
-    touchrc=$?
-    if [ 0 != $touchrc ] ; then
-      exit $touchrc
-    fi
-  fi
-fi
diff --git a/systemd/ceph-radosgw at .service b/systemd/ceph-radosgw at .service
index fb09e19..66d9eb8 100644
--- a/systemd/ceph-radosgw at .service
+++ b/systemd/ceph-radosgw at .service
@@ -10,6 +10,10 @@ LimitNPROC=1048576
 EnvironmentFile=-/etc/sysconfig/ceph
 Environment=CLUSTER=ceph
 ExecStart=/usr/bin/radosgw -f --cluster ${CLUSTER} --name client.%i --setuser ceph --setgroup ceph
+PrivateDevices=yes
+ProtectHome=true
+ProtectSystem=full
+PrivateTmp=true
 
 [Install]
 WantedBy=ceph-radosgw.target
diff --git a/udev/95-ceph-osd.rules b/udev/95-ceph-osd.rules
index d8db85d..808436f 100644
--- a/udev/95-ceph-osd.rules
+++ b/udev/95-ceph-osd.rules
@@ -18,6 +18,16 @@ ACTION=="change", SUBSYSTEM=="block", \
   ENV{ID_PART_ENTRY_TYPE}=="45b0969e-9b03-4f30-b4c6-b4b80ceff106", \
   OWNER="ceph", GROUP="ceph", MODE="660"
 
+# BLOCK_UUID
+ACTION=="add", SUBSYSTEM=="block", \
+  ENV{DEVTYPE}=="partition", \
+  ENV{ID_PART_ENTRY_TYPE}=="cafecafe-9b03-4f30-b4c6-b4b80ceff106", \
+  OWNER:="ceph", GROUP:="ceph", MODE:="660", \
+  RUN+="/usr/sbin/ceph-disk --log-stdout -v trigger /dev/$name"
+ACTION=="change", SUBSYSTEM=="block", \
+  ENV{ID_PART_ENTRY_TYPE}=="cafecafe-9b03-4f30-b4c6-b4b80ceff106", \
+  OWNER="ceph", GROUP="ceph", MODE="660"
+
 # MPATH_OSD_UUID
 ACTION=="add", SUBSYSTEM=="block", \
   ENV{ID_PART_ENTRY_TYPE}=="4fbd7e29-8ae0-4982-bf9d-5a8d867af560", \
@@ -36,6 +46,15 @@ ACTION=="change", SUBSYSTEM=="block", \
   ENV{ID_PART_ENTRY_TYPE}=="45b0969e-8ae0-4982-bf9d-5a8d867af560", \
   OWNER="ceph", GROUP="ceph", MODE="660"
 
+# MPATH_BLOCK_UUID
+ACTION=="add", SUBSYSTEM=="block", \
+  ENV{ID_PART_ENTRY_TYPE}=="cafecafe-8ae0-4982-bf9d-5a8d867af560", \
+  OWNER:="ceph", GROUP:="ceph", MODE:="660", \
+  RUN+="/usr/sbin/ceph-disk --log-stdout -v trigger /dev/$name"
+ACTION=="change", SUBSYSTEM=="block", \
+  ENV{ID_PART_ENTRY_TYPE}=="cafecafe-8ae0-4982-bf9d-5a8d867af560", \
+  OWNER="ceph", GROUP="ceph", MODE="660"
+
 # DMCRYPT_JOURNAL_UUID
 ACTION=="add" SUBSYSTEM=="block", \
   ENV{DEVTYPE}=="partition", \
@@ -46,6 +65,16 @@ ACTION=="change", SUBSYSTEM=="block", \
   ENV{ID_PART_ENTRY_TYPE}=="45b0969e-9b03-4f30-b4c6-5ec00ceff106", \
   OWNER="ceph", GROUP="ceph", MODE="660"
 
+# DMCRYPT_BLOCK_UUID
+ACTION=="add" SUBSYSTEM=="block", \
+  ENV{DEVTYPE}=="partition", \
+  ENV{ID_PART_ENTRY_TYPE}=="cafecafe-9b03-4f30-b4c6-5ec00ceff106", \
+  OWNER:="ceph", GROUP:="ceph", MODE:="660", \
+  RUN+="/usr/sbin/ceph-disk --log-stdout -v trigger /dev/$name"
+ACTION=="change", SUBSYSTEM=="block", \
+  ENV{ID_PART_ENTRY_TYPE}=="cafecafe-9b03-4f30-b4c6-5ec00ceff106", \
+  OWNER="ceph", GROUP="ceph", MODE="660"
+
 # DMCRYPT_LUKS_JOURNAL_UUID
 ACTION=="add" SUBSYSTEM=="block", \
   ENV{DEVTYPE}=="partition", \
@@ -56,6 +85,16 @@ ACTION=="change", SUBSYSTEM=="block", \
   ENV{ID_PART_ENTRY_TYPE}=="45b0969e-9b03-4f30-b4c6-35865ceff106", \
   OWNER="ceph", GROUP="ceph", MODE="660"
 
+# DMCRYPT_LUKS_BLOCK_UUID
+ACTION=="add" SUBSYSTEM=="block", \
+  ENV{DEVTYPE}=="partition", \
+  ENV{ID_PART_ENTRY_TYPE}=="cafecafe-9b03-4f30-b4c6-35865ceff106", \
+  OWNER:="ceph", GROUP:="ceph", MODE:="660", \
+  RUN+="/usr/sbin/ceph-disk --log-stdout -v trigger /dev/$name"
+ACTION=="change", SUBSYSTEM=="block", \
+  ENV{ID_PART_ENTRY_TYPE}=="cafecafe-9b03-4f30-b4c6-35865ceff106", \
+  OWNER="ceph", GROUP="ceph", MODE="660"
+
 # DMCRYPT_OID_UUID
 ACTION=="add" SUBSYSTEM=="block", \
   ENV{DEVTYPE}=="partition", \

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/pkg-ceph/ceph.git



More information about the Pkg-ceph-commits mailing list