[Pkg-ceph-commits] [ceph] 02/05: Imported Upstream version 10.1.2
James Downing Page
jamespage at moszumanska.debian.org
Thu Apr 14 09:10:53 UTC 2016
This is an automated email from the git hooks/post-receive script.
jamespage pushed a commit to branch ubuntu-xenial
in repository ceph.
commit e35cc1756373df95a7f0e30dbdff6fccdef24385
Author: James Page <james.page at ubuntu.com>
Date: Thu Apr 14 10:04:52 2016 +0100
Imported Upstream version 10.1.2
---
AUTHORS | 7 +-
ChangeLog | 124 +++++++++++-
ceph.spec | 2 +-
configure | 20 +-
configure.ac | 2 +-
doc/man/8/ceph-authtool.rst | 40 +++-
doc/man/8/radosgw-admin.rst | 2 +-
man/ceph-authtool.8 | 46 ++++-
man/ceph-clsinfo.8 | 2 +-
man/ceph-conf.8 | 2 +-
man/ceph-create-keys.8 | 2 +-
man/ceph-debugpack.8 | 2 +-
man/ceph-dencoder.8 | 2 +-
man/ceph-deploy.8 | 2 +-
man/ceph-detect-init.8 | 2 +-
man/ceph-disk.8 | 2 +-
man/ceph-fuse.8 | 2 +-
man/ceph-mds.8 | 2 +-
man/ceph-mon.8 | 2 +-
man/ceph-osd.8 | 2 +-
man/ceph-post-file.8 | 2 +-
man/ceph-rbdnamer.8 | 2 +-
man/ceph-rest-api.8 | 2 +-
man/ceph-run.8 | 2 +-
man/ceph-syn.8 | 2 +-
man/ceph.8 | 2 +-
man/cephfs.8 | 2 +-
man/crushtool.8 | 2 +-
man/librados-config.8 | 2 +-
man/monmaptool.8 | 2 +-
man/mount.ceph.8 | 2 +-
man/osdmaptool.8 | 2 +-
man/rados.8 | 2 +-
man/radosgw-admin.8 | 4 +-
man/radosgw.8 | 2 +-
man/rbd-fuse.8 | 2 +-
man/rbd-mirror.8 | 2 +-
man/rbd-nbd.8 | 2 +-
man/rbd-replay-many.8 | 2 +-
man/rbd-replay-prep.8 | 2 +-
man/rbd-replay.8 | 2 +-
man/rbd.8 | 2 +-
man/rbdmap.8 | 2 +-
src/.git_version | 4 +-
src/Makefile-env.am | 4 +-
src/Makefile.in | 8 +-
src/ceph-disk/ceph_disk/main.py | 18 +-
src/ceph_fuse.cc | 7 +
src/ceph_osd.cc | 3 +-
src/client/Client.cc | 22 ++-
src/client/Client.h | 2 +-
src/cls/rgw/cls_rgw_ops.h | 6 +-
src/common/Thread.cc | 5 +-
src/common/buffer.cc | 10 +-
src/common/ceph_time.h | 7 +-
src/common/config_opts.h | 14 ++
src/common/fs_types.cc | 16 +-
src/crush/CrushWrapper.cc | 2 +-
src/include/ceph_fs.h | 5 +
src/include/fs_types.h | 2 +
src/include/rados.h | 5 -
src/journal/FutureImpl.cc | 14 +-
src/journal/FutureImpl.h | 6 +-
src/journal/JournalRecorder.cc | 3 +-
src/journal/JournalTrimmer.cc | 3 +-
src/journal/ObjectPlayer.cc | 15 +-
src/librados/IoCtxImpl.cc | 109 +++++++----
src/librbd/ImageCtx.cc | 2 +-
src/librbd/Journal.cc | 10 +-
src/librbd/internal.cc | 16 +-
src/logrotate.conf | 2 +-
src/mds/Beacon.cc | 7 +
src/mds/CInode.cc | 47 ++---
src/mds/CInode.h | 9 +
src/mds/FSMap.cc | 50 ++++-
src/mds/FSMap.h | 9 +-
src/mds/Locker.cc | 1 +
src/mds/MDBalancer.cc | 2 +
src/mds/MDCache.cc | 5 +
src/mds/MDSDaemon.cc | 26 ++-
src/mds/MDSMap.cc | 32 +++-
src/mds/MDSMap.h | 30 ++-
src/mds/MDSRank.cc | 48 +++--
src/mds/events/ESessions.h | 2 +-
src/mds/journal.cc | 19 ++
src/messages/MMDSBeacon.h | 3 +-
src/mon/MDSMonitor.cc | 101 +++++++++-
src/mon/MonCommands.h | 11 +-
src/mon/OSDMonitor.cc | 13 +-
src/os/ObjectStore.cc | 11 +-
src/os/ObjectStore.h | 16 +-
src/os/bluestore/BlueFS.cc | 244 +++++++++++++++++-------
src/os/bluestore/BlueFS.h | 59 ++++--
src/os/bluestore/BlueStore.cc | 63 ++++---
src/os/bluestore/BlueStore.h | 4 +-
src/os/filestore/FileStore.cc | 53 +++++-
src/os/filestore/FileStore.h | 8 +-
src/os/filestore/IndexManager.cc | 5 +-
src/os/filestore/LFNIndex.cc | 211 +++++++++++----------
src/os/filestore/LFNIndex.h | 25 ++-
src/os/filestore/chain_xattr.cc | 110 ++++-------
src/os/filestore/chain_xattr.h | 99 +++++++++-
src/os/kstore/KStore.h | 5 +-
src/os/memstore/MemStore.h | 4 +-
src/osd/OSD.cc | 24 +--
src/osd/OSD.h | 8 +-
src/osd/PG.cc | 19 +-
src/osd/ReplicatedPG.cc | 70 +++++--
src/osdc/Objecter.cc | 26 +--
src/osdc/Objecter.h | 290 ++++++++++++++++++++---------
src/pybind/ceph_rest_api.py | 9 -
src/rgw/librgw.cc | 15 +-
src/rgw/rgw_admin.cc | 17 +-
src/rgw/rgw_common.h | 1 +
src/rgw/rgw_ldap.h | 2 +
src/rgw/rgw_op.cc | 6 +-
src/rgw/rgw_op.h | 2 +
src/rgw/rgw_rest.cc | 5 +
src/rgw/rgw_rest_conn.cc | 2 +-
src/rgw/rgw_rest_s3.cc | 18 +-
src/rgw/rgw_sync.cc | 25 ++-
src/test/cli/ceph-authtool/help.t | 11 +-
src/test/cli/ceph-authtool/manpage.t | 11 +-
src/test/cli/ceph-authtool/simple.t | 11 +-
src/test/encoding/check-generated.sh | 2 +-
src/test/encoding/types.h | 4 +
src/test/journal/test_FutureImpl.cc | 43 +++--
src/test/journal/test_ObjectRecorder.cc | 55 ++----
src/test/librados/misc.cc | 108 ++++++++++-
src/test/librbd/test_mock_Journal.cc | 3 +
src/test/librgw_file_nfsns.cc | 15 ++
src/test/objectstore/chain_xattr.cc | 114 ++++++++++++
src/test/objectstore/test_bluefs.cc | 20 +-
src/test/os/TestLFNIndex.cc | 2 +-
src/test/pybind/test_ceph_argparse.py | 5 +
src/test/rbd_mirror/test_ClusterWatcher.cc | 6 +-
src/test/rbd_mirror/test_ImageReplayer.cc | 11 +-
src/test/rbd_mirror/test_ImageSync.cc | 5 +
src/test/rbd_mirror/test_PoolWatcher.cc | 6 +-
src/test/rbd_mirror/test_fixture.cc | 6 +-
src/tools/Makefile-client.am | 1 +
src/tools/ceph_authtool.cc | 15 +-
src/tools/rbd/action/Journal.cc | 42 ++---
src/tools/rbd_mirror/Replayer.cc | 56 ++++--
src/vstart.sh | 2 +-
systemd/ceph-mds at .service | 1 +
systemd/ceph-mon at .service | 1 +
systemd/ceph-osd at .service | 1 +
systemd/ceph-radosgw at .service | 1 +
systemd/ceph-rbd-mirror at .service | 1 +
150 files changed, 2165 insertions(+), 900 deletions(-)
diff --git a/AUTHORS b/AUTHORS
index f0a01f6..eba7075 100644
--- a/AUTHORS
+++ b/AUTHORS
@@ -2,6 +2,7 @@ Aaron Bassett <abassett at gmail.com>
Abhishek Dixit <dixitabhi at gmail.com>
Abhishek Lekshmanan <abhishek.lekshmanan at ril.com>
Abhishek Lekshmanan <abhishek at suse.com>
+Abhishek Lekshmanan <alekshmanan at suse.com>
Abhishek Varshney <abhishek.varshney at flipkart.com>
Accela Zhao <accelazh at gmail.com>
Adam C. Emerson <aemerson at linuxbox.com>
@@ -228,7 +229,7 @@ Jean-Rémi Deveaux <jeanremi.deveaux at gmail.com>
Jeff Epstein <jepst79 at gmail.com>
Jeffrey Lu <lzhng2000 at aliyun.com>
Jeff Weber <jweber at cofront.net>
-Jenkins Build Slave User <jenkins-build at trusty-small-unique--5c6e9c4e-81af-43d3-957d-c650c692c441.localdomain>
+Jenkins Build Slave User <jenkins-build at trusty-small-unique--a7f82f5f-8832-433e-a632-928924f47e04.localdomain>
Jenkins <jenkins at ceph.com>
Jens-Christian Fischer <jens-christian.fischer at switch.ch>
Jeremy Qian <vanpire110 at 163.com>
@@ -289,6 +290,7 @@ Kevin Jones <k.j.jonez at gmail.com>
Kim Vandry <vandry at TZoNE.ORG>
Kiseleva Alyona <akiselyova at mirantis.com>
Kongming Wu <wu.kongming at h3c.com>
+Kris Jurka <kjurka at locatortechnologies.com>
Krzysztof Kosiński <krzysztof.kosinski at intel.com>
Kuan Kai Chiu <big.chiu at bigtera.com>
Kun Huang <academicgareth at gmail.com>
@@ -417,6 +419,7 @@ Ross Turk <rturk at redhat.com>
Ruben Kerkhof <ruben at rubenkerkhof.com>
Ruifeng Yang <yangruifeng.09209 at h3c.com>
runsisi <runsisi at hust.edu.cn>
+runsisi <runsisi at zte.com.cn>
Rust Shen <rustinpeace at 163.com>
Rutger ter Borg <rutger at terborg.net>
Sage Weil <sage at inktank.com>
@@ -430,7 +433,6 @@ Sandon Van Ness <sandon at inktank.com>
Sandon Van Ness <svanness at redhat.com>
Sangdi Xu <xu.sangdi at h3c.com>
Sarthak Munshi <sarthakmunshi at gmail.com>
-scienceluo <luo.kexue at zte.com.cn>
Scott A. Brandt <scott at cs.ucsc.edu>
Scott Devoid <devoid at anl.gov>
Sean Channel <pentabular at gmail.com>
@@ -516,6 +518,7 @@ Weijun Duan <duanweijun at h3c.com>
Wei Luo <luowei at yahoo-inc.com>
Wei Luo <weilluo at tencent.com>
Wei Qian <weiq at dtdream.com>
+weiqiaomiao <wei.qiaomiao at zte.com.cn>
Wenjun Huang <wenjunhuang at tencent.com>
Wesley Spikes <wesley.spikes at dreamhost.com>
Wido den Hollander <wido at 42on.com>
diff --git a/ChangeLog b/ChangeLog
index 4c75c68..55473c6 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,107 @@
-ce50389 (HEAD, tag: v10.1.1, origin/jewel) 10.1.1
+4a2a6f7 (HEAD, tag: v10.1.2, origin/jewel) 10.1.2
+8b98556 PG: set epoch_created and parent_split_bits for child pg
+bd1c548 test: fix ut test failure caused by lfn change
+45219e0 Fixed ceph-common install.
+fd2f455 mds: take standby_for_fscid into account in FSMap::find_unused()
+b6d8c32 librbd: Fixed bug in disabling non-primary image mirroring
+14a66f6 ceph-disk: fix PrepareData.set_type() args should be class member.
+4c203b3 ceph-disk: fix spelling mistake geattr to getattr.
+5b098ca ceph-disk: fix lockbox set_or_create_partition()
+b7708da radosgw-admin: fix name checking
+770846b radosgw-admin: allow setting zone when there is no realm
+49886d5 check-generated.sh: can't source bash from sh
+8bc8085 tests: add Ubuntu 16.04 xenial dockerfile
+0e4a92e crush: fix typo
+0a622e6 doc: rgw admin uses "region list" not "regions list"
+3c77292 journal: fix final result for JournalTrimmer::C_RemoveSet
+3b54d5d test/rados/misc.cc: add long locator key and namespace tests
+cb03d4d LFNIndex: use chain_getxattr_buf
+c7db303 chain_xattr: add chain_getxattr_buf
+755c685 LFNIndex::lfn_parse_object_name: return int rather than bool
+e4916f6 LFNIndex::list_objects: lfn_translate does not set errno
+25f937e FileStore::set_xattr_limits_via_conf: add warning if max xattr size smaller than max name
+18b9f95 test/objectstore/chain_xattr.cc: add test for ensure_single_attr
+73778f1 LFNIndex: ensure that lfn and directory attrs are written atomically
+ac750ce chain_[f]getxattr: always use size, no reaon to consider CHAIN_XATTR_MAX_BLOCK_LEN
+8dc0330 chain_xattr: s/onechunk/skip_chain_cleanup, add ensure_single_attr
+21487fd os/,osd/: restructure the rados name length check
+8770043 ceph-disk: fix set_data_partition() when data is partition.
+a330078 rgw-admin: fix period delete error message
+3320f8f rgw-admin: remove unused iterator
+64a8a6a rbd-mirror: fixed bug that caused infinite loop when disabling image mirroring
+a651598 mailmap: Luo Kexue name normalization
+c36c5d4 mailmap: Ning Yao affiliation
+eb536bb mailmap: Vitja Makarov affiliation
+323276e release-notes: v10.1.1 release notes
+18d06ef release-notes: v10.1.1 release notes (draft)
+1be8dba mailmap: sunspot affiliation
+c256a9c mailmap: Qinghua Jin affiliation
+c312b52 mailmap: Richard W.M. Jones affiliation
+b441722 mailmap: Ira Cooper affiliation
+f4f9d91 mailmap: Star Guo affiliation
+ceb2a72 mailmap: Xiaoxi Chen affiliation
+885db95 mailmap: Jenkins name normalization
+fb63721 releases: firefly was EOL december 2015
+fc29dc2 releases: update the v10.1.0 release link
+d82c497 doc: add a page for CephFS' experimental features describing lockout rules
+6974ed4 doc: update cephfs administration page for enable_multiple safety check
+4b3802e mdsmonitor: unify experimental warnings
+ea1b276 test: update tests for new flag requirements
+4bc4e74 MDSMap: lock out multimds clusters and directory fragmentation by default
+15d1fd4 MDSMap: switch from booleans to flags for feature enablement logging
+cda1c1a FSMap: add output for enabled_multiple flags
+3130132 MDSMonitor: make enabling multiple FSes a lot scarier
+3b9371a FSMap: print out compat instead of erroneously repeating enable_multiple
+2429463 ceph-dencoder: add FSMap
+15cabdc rgw_ldap: make ldap.h inclusion conditional
+d58e5fe rgw: fix problem deleting objects begining with double underscore
+492a572 test: fix memory leaks in rbd-mirror test cases
+acfc2b1 test: avoid leaking librados connections when creating pools
+88e244c rbd: journal reset should disable/re-enable journaling feature
+2fa4147 osd/ReplicatedPG: clean up temp object if copy-from fails
+a0bb575 ceph_test_rados_api_misc: make CopyFrom omap test be a big object
+134416a Revert "rados: Add new field flags for ceph_osd_op.copy_get."
+017d830 rbd-mirror: workaround for intermingled lockdep singletons
+98744fd logrotate.conf: poke ceph-fuse after log rotation
+91e0be0 ceph-fuse: reopen log file on SIGHUP
+60679fc librbd: restore out-of-band future callbacks to avoid lock cycles
+e98d046 MDS: unregister command add in clean_up_admin_socket
+2f4bc84 rgw: aws4 subdomain calling bugfix
+dab0b55 rgw: the map 'headers' is assigned a wrong value
+f01261f authtool: fix test output validation & minor whitespace.
+408964e journal: fix context memory leak when shutting down live replay
+f931066 journal: Future does not require metadata shared pointer
+d3dbd85 mon: warn if 'sortbitwise' flag is not set and no legacy OSDs are present
+d544e44 mds: validate file layouts during replay
+9414bef debian/rules: include ceph-mds-*.conf upstart files in ceph-mds
+45a0bc1 mds: add operator<< for file_layout_t
+693f46a mds: skip size recovery on non-file inodes
+0f09642 mds: tidy up backtrace pool handling
+4ddcf41 mds: health metric for being read only
+79b19a6 osd: cancel scrub if noscrub is set for pool or all
+4d3aef7 osd: reset tp handle when search for boundary of chunky-scrub
+25d8007 os/bluestore: use short, relative paths with bluefs
+ce50389 (tag: v10.1.1) 10.1.1
+6c0ab75 librbd: disallow unsafe rbd_op_threads values
+bb4c2ca librgw/rgw_file: correctly handle object permissions
+6851822 rgw_file: print DIRS1 read parameters at verbose
+d84f55f rgw_file: fix attributes for "special" test cases
+1bd1ffd rgw_file unit tests: validate Unix owners in DIRS1
+8e2c804 authtool: update --help and manpage to match code.
+dffd867 build: Respect TMPDIR for virtualenv.
+048251b common/fs_types: dump pool_id signed
+cd41ca2 mds: fix legacy layout decode with pool 0
+b8e0458 rgw: retry read_log_info() while master is down
+05cafcf Drop any systemd imposed process/thread limits
02ab8a2 mrun: update path to cmake binaries
+67f8f1f os/bluestore/BlueFS: add some perfcounters
+75ddd73 os/bluestore/BlueFS: revamp bdev ids
+a5564a6 os/ObjectStore: make device uuid probe output something friendly
+4f6523d rgw: aws4 uri encoding bugfix
+bc9607b mon/OSDMonitor: fix off-by-one for osd_map_message_max
+81cc288 osd: improve full map requests
+2e22f54 osd: create rerequest_full_maps() helper
+961a46f client: fix pool permisson check
d248128 config: fix setuser_match_path typo
d5ec33f tests: Removing one ceph-dencoder call in check-generated.sh
4af1aa6 tests: Fixing python statement in ceph_objectstore_tool.py
@@ -18,10 +120,24 @@ d66c852 tests: Adding parallelism for sequential ceph-dencoder calls
8b6be11 tests: Adding parallelism to encoding/readable.sh
db31cc6 tests: Adding parallelism helpers in ceph-helpers.sh
93ace63 cmake: fix the build of test_rados_api_list
+524f8e6 mds: unregister newly added commands correctly
+da27c33 mds: avoid key renew storm on clock skew
+7dae094 mds: drop message reference on error exit
+1f54f73 mds: fix message leak during handle_core_message()
b7a5f8b test: TestMirroringWatcher test cases were not closing images
+ae39517 rgw: fix a typo in error message
8231208 global/global_init: expand metavariables in setuser_match_path
+1a6c686 mds: Add cmapv to ESessions default constructor initializer list
dd167cf crush: fix error log
+3562323 librados: use Objecter::prepare_*_op helpers to set c->tid safely
+cd167c0 osdc/Objecter: create more prepare_foo_op() helpers
f47e06b tests: Fixing broken test/cephtool-test-mon.sh test
+8f9e7b0 rgw: Do not try to encode or decode time_t
+778506e time: Change temporary variable types in time decode
+4a88a7f librados: fix narrow race with tid return value assignment
+b7eb86f osdc/Objecter: fix narrow race with tid assignment
+1de73d7 qa/workunits/rest/test.py: fs flag set enable_multiple true
+42e692a ceph-rest-api: do not include single-option CephChoices in prefix
9565a50 set 128MB tcmalloc cache size by bytes
ff9843b Striper: reduce assemble_result log level
f812199 qa/workunits/rbd: qemu tests need to wait for image to be created
@@ -36,6 +152,7 @@ a92fa83 osdmap: rm nonused variable
c432691 os/ObjectStore: add noexcept to ensure move ctor is used
1c2831a common/Cycles: Do not initialize Cycles globally.
ec79b64 unittest_erasure_code_plugin: fix deadlock caused by locked mutex in cancelled thread
+554d1b4 ReplicatedPG::_rollback_to: update the OMAP flag
aedc529 test: Fix test to run with btrfs which has snap_### dirs
3dd5249 librbd: avoid throwing error if mirroring is unsupported
280b8a1 rgw: add exclusive flag to set_as_default()
@@ -44,7 +161,9 @@ aedc529 test: Fix test to run with btrfs which has snap_### dirs
c4efef5 rgw: add a few missing cmdline switches in help
09b5356 cls_journal: fix -EEXIST checking
2c0f03a rgw_admin: remove unused parent_period arg
+85229f6 mds: fix potential null pointer access
a29b96a debian/rules: put init-ceph in /etc/init.d/ceph, not ceph-base
+09c4195 MDSMonitor: introduce command 'fs set_default <fs_name>'
602425a configure: Add -D_LARGEFILE64_SOURCE to Linux build.
639f158 mon: remove unnecessary comment for update_from_paxos
f5ef4d4 cmake: add missing librbd/MirrorWatcher.cc and librd/ObjectWatcher.cc
@@ -161,6 +280,7 @@ f3ebe46 rbd: rbd-mirroring: Updated rbd mirroring unit tests to reflect the new
bc254c8 rbd: rbd-mirroring: Enabling image mirroring depends on pool mirroring mode
668c8f9 script: subscription-manager support (part 3)
97b74bd osd/ClassHandler: only dlclose() the classes not missing
+dad3b84 client: pass 'newly issued caps' to Client::check_cap_issue()
349c81f ceph_test_rados_api_pool: fix command for readonly cache-mode
ad2e6f4 ceph.in: update for cmake path changes
5da6ae8 vstart: update for cmake build path changes
@@ -592,6 +712,8 @@ a0a8dcc rgw: free components on shutdown
bdcff15 mds: fix FSMap upgrade on mixed mon versions
38fd3f1 rgw: LDAP pass-through authentication
e52f7b4 mds: fix FSMap upgrade with daemons in the map
+7b33156 common: thread: allow set_affinity() to return a error code
+e10c6e4 common: buffer: put a guard for stat() syscall during read_file
1ea1735 osd: fix wrong counter for batch objects removal during remove_dir()
12d151f osd: initialize last_recalibrate field at construction
f1a4490 ceph.spec.in: disable lttng and babeltrace explicitly
diff --git a/ceph.spec b/ceph.spec
index 5190979..265a7c9 100644
--- a/ceph.spec
+++ b/ceph.spec
@@ -74,7 +74,7 @@ restorecon -R /var/log/radosgw > /dev/null 2>&1;
# common
#################################################################################
Name: ceph
-Version: 10.1.1
+Version: 10.1.2
Release: 0%{?dist}
Epoch: 1
Summary: User space components of the Ceph file system
diff --git a/configure b/configure
index 01072fd..1a373da 100755
--- a/configure
+++ b/configure
@@ -1,6 +1,6 @@
#! /bin/sh
# Guess values for system-dependent variables and create Makefiles.
-# Generated by GNU Autoconf 2.69 for ceph 10.1.1.
+# Generated by GNU Autoconf 2.69 for ceph 10.1.2.
#
# Report bugs to <ceph-devel at vger.kernel.org>.
#
@@ -590,8 +590,8 @@ MAKEFLAGS=
# Identity of this package.
PACKAGE_NAME='ceph'
PACKAGE_TARNAME='ceph'
-PACKAGE_VERSION='10.1.1'
-PACKAGE_STRING='ceph 10.1.1'
+PACKAGE_VERSION='10.1.2'
+PACKAGE_STRING='ceph 10.1.2'
PACKAGE_BUGREPORT='ceph-devel at vger.kernel.org'
PACKAGE_URL=''
@@ -1582,7 +1582,7 @@ if test "$ac_init_help" = "long"; then
# Omit some internal or obsolete options to make the list less imposing.
# This message is too long to be a string in the A/UX 3.1 sh.
cat <<_ACEOF
-\`configure' configures ceph 10.1.1 to adapt to many kinds of systems.
+\`configure' configures ceph 10.1.2 to adapt to many kinds of systems.
Usage: $0 [OPTION]... [VAR=VALUE]...
@@ -1653,7 +1653,7 @@ fi
if test -n "$ac_init_help"; then
case $ac_init_help in
- short | recursive ) echo "Configuration of ceph 10.1.1:";;
+ short | recursive ) echo "Configuration of ceph 10.1.2:";;
esac
cat <<\_ACEOF
@@ -1837,7 +1837,7 @@ fi
test -n "$ac_init_help" && exit $ac_status
if $ac_init_version; then
cat <<\_ACEOF
-ceph configure 10.1.1
+ceph configure 10.1.2
generated by GNU Autoconf 2.69
Copyright (C) 2012 Free Software Foundation, Inc.
@@ -2913,7 +2913,7 @@ cat >config.log <<_ACEOF
This file contains any messages produced by compilers while
running configure, to aid debugging if configure makes a mistake.
-It was created by ceph $as_me 10.1.1, which was
+It was created by ceph $as_me 10.1.2, which was
generated by GNU Autoconf 2.69. Invocation command line was
$ $0 $@
@@ -16408,7 +16408,7 @@ fi
# Define the identity of the package.
PACKAGE='ceph'
- VERSION='10.1.1'
+ VERSION='10.1.2'
cat >>confdefs.h <<_ACEOF
@@ -26100,7 +26100,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
# report actual input values of CONFIG_FILES etc. instead of their
# values after options handling.
ac_log="
-This file was extended by ceph $as_me 10.1.1, which was
+This file was extended by ceph $as_me 10.1.2, which was
generated by GNU Autoconf 2.69. Invocation command line was
CONFIG_FILES = $CONFIG_FILES
@@ -26166,7 +26166,7 @@ _ACEOF
cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`"
ac_cs_version="\\
-ceph config.status 10.1.1
+ceph config.status 10.1.2
configured by $0, generated by GNU Autoconf 2.69,
with options \\"\$ac_cs_config\\"
diff --git a/configure.ac b/configure.ac
index efd760a..0d6427f 100644
--- a/configure.ac
+++ b/configure.ac
@@ -8,7 +8,7 @@ AC_PREREQ(2.59)
# VERSION define is not used by the code. It gets a version string
# from 'git describe'; see src/ceph_ver.[ch]
-AC_INIT([ceph], [10.1.1], [ceph-devel at vger.kernel.org])
+AC_INIT([ceph], [10.1.2], [ceph-devel at vger.kernel.org])
AX_CXX_COMPILE_STDCXX_11(, mandatory)
diff --git a/doc/man/8/ceph-authtool.rst b/doc/man/8/ceph-authtool.rst
index 8565eba..0187d89 100644
--- a/doc/man/8/ceph-authtool.rst
+++ b/doc/man/8/ceph-authtool.rst
@@ -9,9 +9,18 @@
Synopsis
========
-| **ceph-authtool** *keyringfile* [ -l | --list ] [ -C | --create-keyring
- ] [ -p | --print ] [ -n | --name *entityname* ] [ --gen-key ] [ -a |
- --add-key *base64_key* ] [ --caps *capfile* ]
+| **ceph-authtool** *keyringfile*
+ [ -l | --list ]
+ [ -p | --print ]
+ [ -C | --create-keyring ]
+ [ -g | --gen-key ]
+ [ --gen-print-key ]
+ [ --import-keyring *otherkeyringfile* ]
+ [ -n | --name *entityname* ]
+ [ -u | --set-uid *auid* ]
+ [ -a | --add-key *base64_key* ]
+ [ --cap *subsystem* *capability* ]
+ [ --caps *capfile* ]
Description
@@ -45,19 +54,36 @@ Options
will create a new keyring, overwriting any existing keyringfile
-.. option:: --gen-key
+.. option:: -g, --gen-key
will generate a new secret key for the specified entityname
-.. option:: --add-key
+.. option:: --gen-print-key
+
+ will generate a new secret key for the specified entityname,
+ without altering the keyringfile, printing the secret to stdout
+
+.. option:: --import-keyring *secondkeyringfile*
+
+ will import the content of a given keyring to the keyringfile
+
+.. option:: -n, --name *name*
+
+ specify entityname to operate on
+
+.. option:: -u, --set-uid *auid*
+
+ sets the auid (authenticated user id) for the specified entityname
+
+.. option:: -a, --add-key *base64_key*
will add an encoded key to the keyring
-.. option:: --cap subsystem capability
+.. option:: --cap *subsystem* *capability*
will set the capability for given subsystem
-.. option:: --caps capsfile
+.. option:: --caps *capsfile*
will set all of capabilities associated with a given key, for all subsystems
diff --git a/doc/man/8/radosgw-admin.rst b/doc/man/8/radosgw-admin.rst
index 54d690e..b4d75ff 100644
--- a/doc/man/8/radosgw-admin.rst
+++ b/doc/man/8/radosgw-admin.rst
@@ -107,7 +107,7 @@ which are as follows:
:command:`region get`
Show region info.
-:command:`regions list`
+:command:`region list`
List all regions set on this cluster.
:command:`region set`
diff --git a/man/ceph-authtool.8 b/man/ceph-authtool.8
index 075efe3..44c7610 100644
--- a/man/ceph-authtool.8
+++ b/man/ceph-authtool.8
@@ -1,6 +1,6 @@
.\" Man page generated from reStructuredText.
.
-.TH "CEPH-AUTHTOOL" "8" "April 06, 2016" "dev" "Ceph"
+.TH "CEPH-AUTHTOOL" "8" "April 12, 2016" "dev" "Ceph"
.SH NAME
ceph-authtool \- ceph keyring manipulation tool
.
@@ -32,9 +32,18 @@ level margin: \\n[rst2man-indent\\n[rst2man-indent-level]]
..
.SH SYNOPSIS
.nf
-\fBceph\-authtool\fP \fIkeyringfile\fP [ \-l | \-\-list ] [ \-C | \-\-create\-keyring
-] [ \-p | \-\-print ] [ \-n | \-\-name \fIentityname\fP ] [ \-\-gen\-key ] [ \-a |
-\-\-add\-key \fIbase64_key\fP ] [ \-\-caps \fIcapfile\fP ]
+\fBceph\-authtool\fP \fIkeyringfile\fP
+[ \-l | \-\-list ]
+[ \-p | \-\-print ]
+[ \-C | \-\-create\-keyring ]
+[ \-g | \-\-gen\-key ]
+[ \-\-gen\-print\-key ]
+[ \-\-import\-keyring \fIotherkeyringfile\fP ]
+[ \-n | \-\-name \fIentityname\fP ]
+[ \-u | \-\-set\-uid \fIauid\fP ]
+[ \-a | \-\-add\-key \fIbase64_key\fP ]
+[ \-\-cap \fIsubsystem\fP \fIcapability\fP ]
+[ \-\-caps \fIcapfile\fP ]
.fi
.sp
.SH DESCRIPTION
@@ -69,22 +78,43 @@ will create a new keyring, overwriting any existing keyringfile
.UNINDENT
.INDENT 0.0
.TP
-.B \-\-gen\-key
+.B \-g, \-\-gen\-key
will generate a new secret key for the specified entityname
.UNINDENT
.INDENT 0.0
.TP
-.B \-\-add\-key
+.B \-\-gen\-print\-key
+will generate a new secret key for the specified entityname,
+without altering the keyringfile, printing the secret to stdout
+.UNINDENT
+.INDENT 0.0
+.TP
+.B \-\-import\-keyring *secondkeyringfile*
+will import the content of a given keyring to the keyringfile
+.UNINDENT
+.INDENT 0.0
+.TP
+.B \-n, \-\-name *name*
+specify entityname to operate on
+.UNINDENT
+.INDENT 0.0
+.TP
+.B \-u, \-\-set\-uid *auid*
+sets the auid (authenticated user id) for the specified entityname
+.UNINDENT
+.INDENT 0.0
+.TP
+.B \-a, \-\-add\-key *base64_key*
will add an encoded key to the keyring
.UNINDENT
.INDENT 0.0
.TP
-.B \-\-cap subsystem capability
+.B \-\-cap *subsystem* *capability*
will set the capability for given subsystem
.UNINDENT
.INDENT 0.0
.TP
-.B \-\-caps capsfile
+.B \-\-caps *capsfile*
will set all of capabilities associated with a given key, for all subsystems
.UNINDENT
.SH CAPABILITIES
diff --git a/man/ceph-clsinfo.8 b/man/ceph-clsinfo.8
index d1974b0..62dfccb 100644
--- a/man/ceph-clsinfo.8
+++ b/man/ceph-clsinfo.8
@@ -1,6 +1,6 @@
.\" Man page generated from reStructuredText.
.
-.TH "CEPH-CLSINFO" "8" "April 06, 2016" "dev" "Ceph"
+.TH "CEPH-CLSINFO" "8" "April 12, 2016" "dev" "Ceph"
.SH NAME
ceph-clsinfo \- show class object information
.
diff --git a/man/ceph-conf.8 b/man/ceph-conf.8
index 73ea29c..ec78d67 100644
--- a/man/ceph-conf.8
+++ b/man/ceph-conf.8
@@ -1,6 +1,6 @@
.\" Man page generated from reStructuredText.
.
-.TH "CEPH-CONF" "8" "April 06, 2016" "dev" "Ceph"
+.TH "CEPH-CONF" "8" "April 12, 2016" "dev" "Ceph"
.SH NAME
ceph-conf \- ceph conf file tool
.
diff --git a/man/ceph-create-keys.8 b/man/ceph-create-keys.8
index d9c5ea4..04d8248 100644
--- a/man/ceph-create-keys.8
+++ b/man/ceph-create-keys.8
@@ -1,6 +1,6 @@
.\" Man page generated from reStructuredText.
.
-.TH "CEPH-CREATE-KEYS" "8" "April 06, 2016" "dev" "Ceph"
+.TH "CEPH-CREATE-KEYS" "8" "April 12, 2016" "dev" "Ceph"
.SH NAME
ceph-create-keys \- ceph keyring generate tool
.
diff --git a/man/ceph-debugpack.8 b/man/ceph-debugpack.8
index f6efc37..b130dc6 100644
--- a/man/ceph-debugpack.8
+++ b/man/ceph-debugpack.8
@@ -1,6 +1,6 @@
.\" Man page generated from reStructuredText.
.
-.TH "CEPH-DEBUGPACK" "8" "April 06, 2016" "dev" "Ceph"
+.TH "CEPH-DEBUGPACK" "8" "April 12, 2016" "dev" "Ceph"
.SH NAME
ceph-debugpack \- ceph debug packer utility
.
diff --git a/man/ceph-dencoder.8 b/man/ceph-dencoder.8
index afe930d..3637d95 100644
--- a/man/ceph-dencoder.8
+++ b/man/ceph-dencoder.8
@@ -1,6 +1,6 @@
.\" Man page generated from reStructuredText.
.
-.TH "CEPH-DENCODER" "8" "April 06, 2016" "dev" "Ceph"
+.TH "CEPH-DENCODER" "8" "April 12, 2016" "dev" "Ceph"
.SH NAME
ceph-dencoder \- ceph encoder/decoder utility
.
diff --git a/man/ceph-deploy.8 b/man/ceph-deploy.8
index 0ca7600..e3d5d4a 100644
--- a/man/ceph-deploy.8
+++ b/man/ceph-deploy.8
@@ -1,6 +1,6 @@
.\" Man page generated from reStructuredText.
.
-.TH "CEPH-DEPLOY" "8" "April 06, 2016" "dev" "Ceph"
+.TH "CEPH-DEPLOY" "8" "April 12, 2016" "dev" "Ceph"
.SH NAME
ceph-deploy \- Ceph deployment tool
.
diff --git a/man/ceph-detect-init.8 b/man/ceph-detect-init.8
index 21a65f4..ccd3e6c 100644
--- a/man/ceph-detect-init.8
+++ b/man/ceph-detect-init.8
@@ -1,6 +1,6 @@
.\" Man page generated from reStructuredText.
.
-.TH "CEPH-DETECT-INIT" "8" "April 06, 2016" "dev" "Ceph"
+.TH "CEPH-DETECT-INIT" "8" "April 12, 2016" "dev" "Ceph"
.SH NAME
ceph-detect-init \- display the init system Ceph should use
.
diff --git a/man/ceph-disk.8 b/man/ceph-disk.8
index fa4728e..c59faf9 100644
--- a/man/ceph-disk.8
+++ b/man/ceph-disk.8
@@ -1,6 +1,6 @@
.\" Man page generated from reStructuredText.
.
-.TH "CEPH-DISK" "8" "April 06, 2016" "dev" "Ceph"
+.TH "CEPH-DISK" "8" "April 12, 2016" "dev" "Ceph"
.SH NAME
ceph-disk \- Ceph disk utility for OSD
.
diff --git a/man/ceph-fuse.8 b/man/ceph-fuse.8
index 0a03a44..3cea297 100644
--- a/man/ceph-fuse.8
+++ b/man/ceph-fuse.8
@@ -1,6 +1,6 @@
.\" Man page generated from reStructuredText.
.
-.TH "CEPH-FUSE" "8" "April 06, 2016" "dev" "Ceph"
+.TH "CEPH-FUSE" "8" "April 12, 2016" "dev" "Ceph"
.SH NAME
ceph-fuse \- FUSE-based client for ceph
.
diff --git a/man/ceph-mds.8 b/man/ceph-mds.8
index ac0cc19..e77e3c2 100644
--- a/man/ceph-mds.8
+++ b/man/ceph-mds.8
@@ -1,6 +1,6 @@
.\" Man page generated from reStructuredText.
.
-.TH "CEPH-MDS" "8" "April 06, 2016" "dev" "Ceph"
+.TH "CEPH-MDS" "8" "April 12, 2016" "dev" "Ceph"
.SH NAME
ceph-mds \- ceph metadata server daemon
.
diff --git a/man/ceph-mon.8 b/man/ceph-mon.8
index d2a8707..a8c744f 100644
--- a/man/ceph-mon.8
+++ b/man/ceph-mon.8
@@ -1,6 +1,6 @@
.\" Man page generated from reStructuredText.
.
-.TH "CEPH-MON" "8" "April 06, 2016" "dev" "Ceph"
+.TH "CEPH-MON" "8" "April 12, 2016" "dev" "Ceph"
.SH NAME
ceph-mon \- ceph monitor daemon
.
diff --git a/man/ceph-osd.8 b/man/ceph-osd.8
index a7c6345..94dd69c 100644
--- a/man/ceph-osd.8
+++ b/man/ceph-osd.8
@@ -1,6 +1,6 @@
.\" Man page generated from reStructuredText.
.
-.TH "CEPH-OSD" "8" "April 06, 2016" "dev" "Ceph"
+.TH "CEPH-OSD" "8" "April 12, 2016" "dev" "Ceph"
.SH NAME
ceph-osd \- ceph object storage daemon
.
diff --git a/man/ceph-post-file.8 b/man/ceph-post-file.8
index b49d538..a762e4e 100644
--- a/man/ceph-post-file.8
+++ b/man/ceph-post-file.8
@@ -1,6 +1,6 @@
.\" Man page generated from reStructuredText.
.
-.TH "CEPH-POST-FILE" "8" "April 06, 2016" "dev" "Ceph"
+.TH "CEPH-POST-FILE" "8" "April 12, 2016" "dev" "Ceph"
.SH NAME
ceph-post-file \- post files for ceph developers
.
diff --git a/man/ceph-rbdnamer.8 b/man/ceph-rbdnamer.8
index 28278a7..64749e8 100644
--- a/man/ceph-rbdnamer.8
+++ b/man/ceph-rbdnamer.8
@@ -1,6 +1,6 @@
.\" Man page generated from reStructuredText.
.
-.TH "CEPH-RBDNAMER" "8" "April 06, 2016" "dev" "Ceph"
+.TH "CEPH-RBDNAMER" "8" "April 12, 2016" "dev" "Ceph"
.SH NAME
ceph-rbdnamer \- udev helper to name RBD devices
.
diff --git a/man/ceph-rest-api.8 b/man/ceph-rest-api.8
index 6d42a39..62ea726 100644
--- a/man/ceph-rest-api.8
+++ b/man/ceph-rest-api.8
@@ -1,6 +1,6 @@
.\" Man page generated from reStructuredText.
.
-.TH "CEPH-REST-API" "8" "April 06, 2016" "dev" "Ceph"
+.TH "CEPH-REST-API" "8" "April 12, 2016" "dev" "Ceph"
.SH NAME
ceph-rest-api \- ceph RESTlike administration server
.
diff --git a/man/ceph-run.8 b/man/ceph-run.8
index f1f9743..1c78647 100644
--- a/man/ceph-run.8
+++ b/man/ceph-run.8
@@ -1,6 +1,6 @@
.\" Man page generated from reStructuredText.
.
-.TH "CEPH-RUN" "8" "April 06, 2016" "dev" "Ceph"
+.TH "CEPH-RUN" "8" "April 12, 2016" "dev" "Ceph"
.SH NAME
ceph-run \- restart daemon on core dump
.
diff --git a/man/ceph-syn.8 b/man/ceph-syn.8
index 05f5a91..0fc4b59 100644
--- a/man/ceph-syn.8
+++ b/man/ceph-syn.8
@@ -1,6 +1,6 @@
.\" Man page generated from reStructuredText.
.
-.TH "CEPH-SYN" "8" "April 06, 2016" "dev" "Ceph"
+.TH "CEPH-SYN" "8" "April 12, 2016" "dev" "Ceph"
.SH NAME
ceph-syn \- ceph synthetic workload generator
.
diff --git a/man/ceph.8 b/man/ceph.8
index 9ee403b..1a33b1b 100644
--- a/man/ceph.8
+++ b/man/ceph.8
@@ -1,6 +1,6 @@
.\" Man page generated from reStructuredText.
.
-.TH "CEPH" "8" "April 06, 2016" "dev" "Ceph"
+.TH "CEPH" "8" "April 12, 2016" "dev" "Ceph"
.SH NAME
ceph \- ceph administration tool
.
diff --git a/man/cephfs.8 b/man/cephfs.8
index 2772697..8e234ad 100644
--- a/man/cephfs.8
+++ b/man/cephfs.8
@@ -1,6 +1,6 @@
.\" Man page generated from reStructuredText.
.
-.TH "CEPHFS" "8" "April 06, 2016" "dev" "Ceph"
+.TH "CEPHFS" "8" "April 12, 2016" "dev" "Ceph"
.SH NAME
cephfs \- ceph file system options utility
.
diff --git a/man/crushtool.8 b/man/crushtool.8
index c7ebbd7..4859e14 100644
--- a/man/crushtool.8
+++ b/man/crushtool.8
@@ -1,6 +1,6 @@
.\" Man page generated from reStructuredText.
.
-.TH "CRUSHTOOL" "8" "April 06, 2016" "dev" "Ceph"
+.TH "CRUSHTOOL" "8" "April 12, 2016" "dev" "Ceph"
.SH NAME
crushtool \- CRUSH map manipulation tool
.
diff --git a/man/librados-config.8 b/man/librados-config.8
index d6567e5..fc0d539 100644
--- a/man/librados-config.8
+++ b/man/librados-config.8
@@ -1,6 +1,6 @@
.\" Man page generated from reStructuredText.
.
-.TH "LIBRADOS-CONFIG" "8" "April 06, 2016" "dev" "Ceph"
+.TH "LIBRADOS-CONFIG" "8" "April 12, 2016" "dev" "Ceph"
.SH NAME
librados-config \- display information about librados
.
diff --git a/man/monmaptool.8 b/man/monmaptool.8
index ac6f7f2..a505fe6 100644
--- a/man/monmaptool.8
+++ b/man/monmaptool.8
@@ -1,6 +1,6 @@
.\" Man page generated from reStructuredText.
.
-.TH "MONMAPTOOL" "8" "April 06, 2016" "dev" "Ceph"
+.TH "MONMAPTOOL" "8" "April 12, 2016" "dev" "Ceph"
.SH NAME
monmaptool \- ceph monitor cluster map manipulation tool
.
diff --git a/man/mount.ceph.8 b/man/mount.ceph.8
index eb9ba29..cf0cf68 100644
--- a/man/mount.ceph.8
+++ b/man/mount.ceph.8
@@ -1,6 +1,6 @@
.\" Man page generated from reStructuredText.
.
-.TH "MOUNT.CEPH" "8" "April 06, 2016" "dev" "Ceph"
+.TH "MOUNT.CEPH" "8" "April 12, 2016" "dev" "Ceph"
.SH NAME
mount.ceph \- mount a ceph file system
.
diff --git a/man/osdmaptool.8 b/man/osdmaptool.8
index c369b14..1d5e4a7 100644
--- a/man/osdmaptool.8
+++ b/man/osdmaptool.8
@@ -1,6 +1,6 @@
.\" Man page generated from reStructuredText.
.
-.TH "OSDMAPTOOL" "8" "April 06, 2016" "dev" "Ceph"
+.TH "OSDMAPTOOL" "8" "April 12, 2016" "dev" "Ceph"
.SH NAME
osdmaptool \- ceph osd cluster map manipulation tool
.
diff --git a/man/rados.8 b/man/rados.8
index 5cf4aac..670e5ff 100644
--- a/man/rados.8
+++ b/man/rados.8
@@ -1,6 +1,6 @@
.\" Man page generated from reStructuredText.
.
-.TH "RADOS" "8" "April 06, 2016" "dev" "Ceph"
+.TH "RADOS" "8" "April 12, 2016" "dev" "Ceph"
.SH NAME
rados \- rados object storage utility
.
diff --git a/man/radosgw-admin.8 b/man/radosgw-admin.8
index d77aff4..8da8cc2 100644
--- a/man/radosgw-admin.8
+++ b/man/radosgw-admin.8
@@ -1,6 +1,6 @@
.\" Man page generated from reStructuredText.
.
-.TH "RADOSGW-ADMIN" "8" "April 06, 2016" "dev" "Ceph"
+.TH "RADOSGW-ADMIN" "8" "April 12, 2016" "dev" "Ceph"
.SH NAME
radosgw-admin \- rados REST gateway user administration utility
.
@@ -127,7 +127,7 @@ Disable quota.
.B \fBregion get\fP
Show region info.
.TP
-.B \fBregions list\fP
+.B \fBregion list\fP
List all regions set on this cluster.
.TP
.B \fBregion set\fP
diff --git a/man/radosgw.8 b/man/radosgw.8
index 1402859..5daa2c8 100644
--- a/man/radosgw.8
+++ b/man/radosgw.8
@@ -1,6 +1,6 @@
.\" Man page generated from reStructuredText.
.
-.TH "RADOSGW" "8" "April 06, 2016" "dev" "Ceph"
+.TH "RADOSGW" "8" "April 12, 2016" "dev" "Ceph"
.SH NAME
radosgw \- rados REST gateway
.
diff --git a/man/rbd-fuse.8 b/man/rbd-fuse.8
index 744f90b..eaff3ef 100644
--- a/man/rbd-fuse.8
+++ b/man/rbd-fuse.8
@@ -1,6 +1,6 @@
.\" Man page generated from reStructuredText.
.
-.TH "RBD-FUSE" "8" "April 06, 2016" "dev" "Ceph"
+.TH "RBD-FUSE" "8" "April 12, 2016" "dev" "Ceph"
.SH NAME
rbd-fuse \- expose rbd images as files
.
diff --git a/man/rbd-mirror.8 b/man/rbd-mirror.8
index 7f99e32..df2c65d 100644
--- a/man/rbd-mirror.8
+++ b/man/rbd-mirror.8
@@ -1,6 +1,6 @@
.\" Man page generated from reStructuredText.
.
-.TH "RBD-MIRROR" "8" "April 06, 2016" "dev" "Ceph"
+.TH "RBD-MIRROR" "8" "April 12, 2016" "dev" "Ceph"
.SH NAME
rbd-mirror \- Ceph daemon for mirroring RBD images
.
diff --git a/man/rbd-nbd.8 b/man/rbd-nbd.8
index 38b46eb..fafa198 100644
--- a/man/rbd-nbd.8
+++ b/man/rbd-nbd.8
@@ -1,6 +1,6 @@
.\" Man page generated from reStructuredText.
.
-.TH "RBD-NBD" "8" "April 06, 2016" "dev" "Ceph"
+.TH "RBD-NBD" "8" "April 12, 2016" "dev" "Ceph"
.SH NAME
rbd-nbd \- map rbd images to nbd device
.
diff --git a/man/rbd-replay-many.8 b/man/rbd-replay-many.8
index ea3f0dd..0bb3f1c 100644
--- a/man/rbd-replay-many.8
+++ b/man/rbd-replay-many.8
@@ -1,6 +1,6 @@
.\" Man page generated from reStructuredText.
.
-.TH "RBD-REPLAY-MANY" "8" "April 06, 2016" "dev" "Ceph"
+.TH "RBD-REPLAY-MANY" "8" "April 12, 2016" "dev" "Ceph"
.SH NAME
rbd-replay-many \- replay a rados block device (RBD) workload on several clients
.
diff --git a/man/rbd-replay-prep.8 b/man/rbd-replay-prep.8
index c0a080f..49af438 100644
--- a/man/rbd-replay-prep.8
+++ b/man/rbd-replay-prep.8
@@ -1,6 +1,6 @@
.\" Man page generated from reStructuredText.
.
-.TH "RBD-REPLAY-PREP" "8" "April 06, 2016" "dev" "Ceph"
+.TH "RBD-REPLAY-PREP" "8" "April 12, 2016" "dev" "Ceph"
.SH NAME
rbd-replay-prep \- prepare captured rados block device (RBD) workloads for replay
.
diff --git a/man/rbd-replay.8 b/man/rbd-replay.8
index 593563b..d876089 100644
--- a/man/rbd-replay.8
+++ b/man/rbd-replay.8
@@ -1,6 +1,6 @@
.\" Man page generated from reStructuredText.
.
-.TH "RBD-REPLAY" "8" "April 06, 2016" "dev" "Ceph"
+.TH "RBD-REPLAY" "8" "April 12, 2016" "dev" "Ceph"
.SH NAME
rbd-replay \- replay rados block device (RBD) workloads
.
diff --git a/man/rbd.8 b/man/rbd.8
index a0e5603..dc19338 100644
--- a/man/rbd.8
+++ b/man/rbd.8
@@ -1,6 +1,6 @@
.\" Man page generated from reStructuredText.
.
-.TH "RBD" "8" "April 06, 2016" "dev" "Ceph"
+.TH "RBD" "8" "April 12, 2016" "dev" "Ceph"
.SH NAME
rbd \- manage rados block device (RBD) images
.
diff --git a/man/rbdmap.8 b/man/rbdmap.8
index b746517..ebb7d5d 100644
--- a/man/rbdmap.8
+++ b/man/rbdmap.8
@@ -1,6 +1,6 @@
.\" Man page generated from reStructuredText.
.
-.TH "RBDMAP" "8" "April 06, 2016" "dev" "Ceph"
+.TH "RBDMAP" "8" "April 12, 2016" "dev" "Ceph"
.SH NAME
rbdmap \- map RBD devices at boot time
.
diff --git a/src/.git_version b/src/.git_version
index 2f3b43d..17a9430 100644
--- a/src/.git_version
+++ b/src/.git_version
@@ -1,2 +1,2 @@
-ce50389b773fe7f72fca40a3dd69cfe6613eaeb1
-v10.1.1
+4a2a6f72640d6b74a3bbd92798bb913ed380dcd4
+v10.1.2
diff --git a/src/Makefile-env.am b/src/Makefile-env.am
index 2fb22a6..df225d6 100644
--- a/src/Makefile-env.am
+++ b/src/Makefile-env.am
@@ -299,6 +299,8 @@ DENCODER_DEPS =
# put virtualenvs in this directory
# otherwise it may overflow #! 80 kernel limit
-export CEPH_BUILD_VIRTUALENV = /tmp
+# beware that some build environments might not be able to write to /tmp
+export TMPDIR ?= /tmp
+export CEPH_BUILD_VIRTUALENV = $(TMPDIR)
radoslibdir = $(libdir)/rados-classes
diff --git a/src/Makefile.in b/src/Makefile.in
index 1c8f334..4cf18ab 100644
--- a/src/Makefile.in
+++ b/src/Makefile.in
@@ -5153,6 +5153,8 @@ am__rbd_SOURCES_DIST = tools/rbd/rbd.cc tools/rbd/ArgumentTypes.cc \
rbd_OBJECTS = $(am_rbd_OBJECTS)
@ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE at rbd_DEPENDENCIES = libjournal.la \
@ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@ libcls_journal_client.la \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@ libcls_rbd_client.la \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@ libcls_lock_client.la \
@ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@ $(LIBRBD) \
@ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@ $(LIBRBD_TYPES) \
@ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@ $(LIBRADOS) \
@@ -12261,6 +12263,8 @@ ceph_test_cfuse_cache_invalidate_SOURCES = test/test_cfuse_cache_invalidate.cc
@ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@ $(am__append_267)
@ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE at rbd_LDADD = libjournal.la \
@ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@ libcls_journal_client.la \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@ libcls_rbd_client.la \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@ libcls_lock_client.la \
@ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@ $(LIBRBD) \
@ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@ $(LIBRBD_TYPES) \
@ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@ $(LIBRADOS) \
@@ -32388,7 +32392,9 @@ export PYTHONPATH=$(top_srcdir)/src/pybind
# put virtualenvs in this directory
# otherwise it may overflow #! 80 kernel limit
-export CEPH_BUILD_VIRTUALENV = /tmp
+# beware that some build environments might not be able to write to /tmp
+export TMPDIR ?= /tmp
+export CEPH_BUILD_VIRTUALENV = $(TMPDIR)
@NO_GIT_VERSION_TRUE at export NO_VERSION="yes"
diff --git a/src/ceph-disk/ceph_disk/main.py b/src/ceph-disk/ceph_disk/main.py
index d0ec596..45e9bb2 100755
--- a/src/ceph-disk/ceph_disk/main.py
+++ b/src/ceph-disk/ceph_disk/main.py
@@ -122,7 +122,7 @@ class Ptype(object):
@staticmethod
def get_ready_by_name(name):
- return [x[name]['ready'] for x in PTYPE.values()]
+ return [x[name]['ready'] for x in PTYPE.values() if name in x]
@staticmethod
def is_regular_space(ptype):
@@ -1876,18 +1876,18 @@ class PrepareSpace(object):
if stat.S_ISBLK(mode):
if getattr(args, name + '_file'):
raise Error('%s is not a regular file' % name.capitalize,
- geattr(args, name))
+ getattr(args, name))
self.type = self.DEVICE
return
if stat.S_ISREG(mode):
if getattr(args, name + '_dev'):
raise Error('%s is not a block device' % name.capitalize,
- geattr(args, name))
+ getattr(args, name))
self.type = self.FILE
raise Error('%s %s is neither a block device nor regular file' %
- (name.capitalize, geattr(args, name)))
+ (name.capitalize, getattr(args, name)))
def is_none(self):
return self.type == self.NONE
@@ -2241,8 +2241,8 @@ class Lockbox(object):
self.args.lockbox)
self.partition = DevicePartition.factory(
path=None, dev=self.args.lockbox, args=self.args)
- ptype = partition.get_ptype()
- ready = Ptype.get_ready_by_type('lockbox')
+ ptype = self.partition.get_ptype()
+ ready = Ptype.get_ready_by_name('lockbox')
if ptype not in ready:
LOG.warning('incorrect partition UUID: %s, expected %s'
% (ptype, str(ready)))
@@ -2384,7 +2384,7 @@ class PrepareData(object):
elif stat.S_ISBLK(dmode):
self.type = self.DEVICE
else:
- raise Error('not a dir or block device', args.data)
+ raise Error('not a dir or block device', self.args.data)
def is_file(self):
return self.type == self.FILE
@@ -2539,8 +2539,8 @@ class PrepareData(object):
self.args.data)
self.partition = DevicePartition.factory(
path=None, dev=self.args.data, args=self.args)
- ptype = partition.get_ptype()
- ready = Ptype.get_ready_by_type('osd')
+ ptype = self.partition.get_ptype()
+ ready = Ptype.get_ready_by_name('osd')
if ptype not in ready:
LOG.warning('incorrect partition UUID: %s, expected %s'
% (ptype, str(ready)))
diff --git a/src/ceph_fuse.cc b/src/ceph_fuse.cc
index 1b72288..a682448 100644
--- a/src/ceph_fuse.cc
+++ b/src/ceph_fuse.cc
@@ -34,6 +34,7 @@ using namespace std;
#include "common/linux_version.h"
#endif
#include "global/global_init.h"
+#include "global/signal_handler.h"
#include "common/safe_io.h"
#include <sys/types.h>
@@ -223,6 +224,9 @@ int main(int argc, const char **argv, const char *envp[]) {
goto out_messenger_start_failed;
}
+ init_async_signal_handler();
+ register_async_signal_handler(SIGHUP, sighup_handler);
+
// start client
r = client->init();
if (r < 0) {
@@ -268,6 +272,9 @@ int main(int argc, const char **argv, const char *envp[]) {
out_shutdown:
client->shutdown();
out_init_failed:
+ unregister_async_signal_handler(SIGHUP, sighup_handler);
+ shutdown_async_signal_handler();
+
// wait for messenger to finish
messenger->shutdown();
messenger->wait();
diff --git a/src/ceph_osd.cc b/src/ceph_osd.cc
index 0c25fb6..7deb5a1 100644
--- a/src/ceph_osd.cc
+++ b/src/ceph_osd.cc
@@ -189,7 +189,8 @@ int main(int argc, const char **argv)
}
if (get_device_fsid) {
uuid_d uuid;
- int r = ObjectStore::probe_block_device_fsid(device_path, &uuid);
+ int r = ObjectStore::probe_block_device_fsid(g_ceph_context, device_path,
+ &uuid);
if (r < 0) {
cerr << "failed to get device fsid for " << device_path
<< ": " << cpp_strerror(r) << std::endl;
diff --git a/src/client/Client.cc b/src/client/Client.cc
index 177a61e..7fcd907 100644
--- a/src/client/Client.cc
+++ b/src/client/Client.cc
@@ -4807,7 +4807,7 @@ void Client::handle_cap_grant(MetaSession *session, Inode *in, Cap *cap, MClient
if (m->get_op() == CEPH_CAP_OP_IMPORT && m->get_wanted() != wanted)
check = true;
- check_cap_issue(in, cap, issued);
+ check_cap_issue(in, cap, new_caps);
// update caps
if (old_caps & ~new_caps) {
@@ -12099,10 +12099,12 @@ int Client::check_pool_perm(Inode *in, int need)
if (!cct->_conf->client_check_pool_perm)
return 0;
- int64_t pool = in->layout.pool_id;
+ int64_t pool_id = in->layout.pool_id;
+ std::string pool_ns = in->layout.pool_ns;
+ std::pair<int64_t, std::string> perm_key(pool_id, pool_ns);
int have = 0;
while (true) {
- std::map<int64_t, int>::iterator it = pool_perms.find(pool);
+ auto it = pool_perms.find(perm_key);
if (it == pool_perms.end())
break;
if (it->second == POOL_CHECKING) {
@@ -12123,7 +12125,7 @@ int Client::check_pool_perm(Inode *in, int need)
return 0;
}
- pool_perms[pool] = POOL_CHECKING;
+ pool_perms[perm_key] = POOL_CHECKING;
char oid_buf[32];
snprintf(oid_buf, sizeof(oid_buf), "%llx.00000000", (unsigned long long)in->ino);
@@ -12155,7 +12157,7 @@ int Client::check_pool_perm(Inode *in, int need)
if (rd_ret == 0 || rd_ret == -ENOENT)
have |= POOL_READ;
else if (rd_ret != -EPERM) {
- ldout(cct, 10) << "check_pool_perm on pool " << pool
+ ldout(cct, 10) << "check_pool_perm on pool " << pool_id << " ns " << pool_ns
<< " rd_err = " << rd_ret << " wr_err = " << wr_ret << dendl;
errored = true;
}
@@ -12163,7 +12165,7 @@ int Client::check_pool_perm(Inode *in, int need)
if (wr_ret == 0 || wr_ret == -EEXIST)
have |= POOL_WRITE;
else if (wr_ret != -EPERM) {
- ldout(cct, 10) << "check_pool_perm on pool " << pool
+ ldout(cct, 10) << "check_pool_perm on pool " << pool_id << " ns " << pool_ns
<< " rd_err = " << rd_ret << " wr_err = " << wr_ret << dendl;
errored = true;
}
@@ -12172,22 +12174,22 @@ int Client::check_pool_perm(Inode *in, int need)
// Indeterminate: erase CHECKING state so that subsequent calls re-check.
// Raise EIO because actual error code might be misleading for
// userspace filesystem user.
- pool_perms.erase(pool);
+ pool_perms.erase(perm_key);
signal_cond_list(waiting_for_pool_perm);
return -EIO;
}
- pool_perms[pool] = have | POOL_CHECKED;
+ pool_perms[perm_key] = have | POOL_CHECKED;
signal_cond_list(waiting_for_pool_perm);
}
if ((need & CEPH_CAP_FILE_RD) && !(have & POOL_READ)) {
- ldout(cct, 10) << "check_pool_perm on pool " << pool
+ ldout(cct, 10) << "check_pool_perm on pool " << pool_id << " ns " << pool_ns
<< " need " << ccap_string(need) << ", but no read perm" << dendl;
return -EPERM;
}
if ((need & CEPH_CAP_FILE_WR) && !(have & POOL_WRITE)) {
- ldout(cct, 10) << "check_pool_perm on pool " << pool
+ ldout(cct, 10) << "check_pool_perm on pool " << pool_id << " ns " << pool_ns
<< " need " << ccap_string(need) << ", but no write perm" << dendl;
return -EPERM;
}
diff --git a/src/client/Client.h b/src/client/Client.h
index d53ca1d..d912db0 100644
--- a/src/client/Client.h
+++ b/src/client/Client.h
@@ -541,7 +541,7 @@ protected:
bool is_quota_bytes_exceeded(Inode *in, int64_t new_bytes);
bool is_quota_bytes_approaching(Inode *in);
- std::map<int64_t, int> pool_perms;
+ std::map<std::pair<int64_t,std::string>, int> pool_perms;
list<Cond*> waiting_for_pool_perm;
int check_pool_perm(Inode *in, int need);
diff --git a/src/cls/rgw/cls_rgw_ops.h b/src/cls/rgw/cls_rgw_ops.h
index e8a7661..7d8ad2d 100644
--- a/src/cls/rgw/cls_rgw_ops.h
+++ b/src/cls/rgw/cls_rgw_ops.h
@@ -180,7 +180,7 @@ struct rgw_cls_link_olh_op {
::encode(olh_epoch, bl);
::encode(log_op, bl);
::encode(bilog_flags, bl);
- time_t t = ceph::real_clock::to_time_t(unmod_since);
+ uint64_t t = ceph::real_clock::to_time_t(unmod_since);
::encode(t, bl);
::encode(unmod_since, bl);
::encode(high_precision_time, bl);
@@ -198,9 +198,9 @@ struct rgw_cls_link_olh_op {
::decode(log_op, bl);
::decode(bilog_flags, bl);
if (struct_v == 2) {
- time_t t;
+ uint64_t t;
::decode(t, bl);
- unmod_since = ceph::real_clock::from_time_t(t);
+ unmod_since = ceph::real_clock::from_time_t(static_cast<time_t>(t));
}
if (struct_v >= 3) {
::decode(unmod_since, bl);
diff --git a/src/common/Thread.cc b/src/common/Thread.cc
index 1f716f9..c1c3be5 100644
--- a/src/common/Thread.cc
+++ b/src/common/Thread.cc
@@ -200,8 +200,9 @@ int Thread::set_ioprio(int cls, int prio)
int Thread::set_affinity(int id)
{
+ int r = 0;
cpuid = id;
if (pid && ceph_gettid() == pid)
- _set_affinity(id);
- return 0;
+ r = _set_affinity(id);
+ return r;
}
diff --git a/src/common/buffer.cc b/src/common/buffer.cc
index 0368979..63339ea 100644
--- a/src/common/buffer.cc
+++ b/src/common/buffer.cc
@@ -1982,7 +1982,15 @@ int buffer::list::read_file(const char *fn, std::string *error)
struct stat st;
memset(&st, 0, sizeof(st));
- ::fstat(fd, &st);
+ if (::fstat(fd, &st) < 0) {
+ int err = errno;
+ std::ostringstream oss;
+ oss << "bufferlist::read_file(" << fn << "): stat error: "
+ << cpp_strerror(err);
+ *error = oss.str();
+ VOID_TEMP_FAILURE_RETRY(::close(fd));
+ return -err;
+ }
ssize_t ret = read_fd(fd, st.st_size);
if (ret < 0) {
diff --git a/src/common/ceph_time.h b/src/common/ceph_time.h
index ef187c0..2c7061a 100644
--- a/src/common/ceph_time.h
+++ b/src/common/ceph_time.h
@@ -382,10 +382,13 @@ void encode(const std::chrono::time_point<Clock, Duration>& t,
template<typename Clock, typename Duration>
void decode(std::chrono::time_point<Clock, Duration>& t,
bufferlist::iterator& p) {
- uint32_t s, ns;
+ uint32_t s;
+ uint32_t ns;
::decode(s, p);
::decode(ns, p);
- struct timespec ts = {s, ns};
+ struct timespec ts = {
+ static_cast<time_t>(s),
+ static_cast<long int>(ns)};
t = Clock::from_timespec(ts);
}
diff --git a/src/common/config_opts.h b/src/common/config_opts.h
index 50356c7..c2a577f 100644
--- a/src/common/config_opts.h
+++ b/src/common/config_opts.h
@@ -271,6 +271,7 @@ OPTION(mon_crush_min_required_version, OPT_STR, "firefly")
OPTION(mon_warn_on_crush_straw_calc_version_zero, OPT_BOOL, true) // warn if crush straw_calc_version==0
OPTION(mon_warn_on_osd_down_out_interval_zero, OPT_BOOL, true) // warn if 'mon_osd_down_out_interval == 0'
OPTION(mon_warn_on_cache_pools_without_hit_sets, OPT_BOOL, true)
+OPTION(mon_warn_on_no_sortbitwise, OPT_BOOL, true) // warn when sortbitwise not set
OPTION(mon_min_osdmap_epochs, OPT_INT, 500)
OPTION(mon_max_pgmap_epochs, OPT_INT, 500)
OPTION(mon_max_log_epochs, OPT_INT, 500)
@@ -869,6 +870,7 @@ OPTION(osd_mon_shutdown_timeout, OPT_DOUBLE, 5)
OPTION(osd_max_object_size, OPT_U64, 100*1024L*1024L*1024L) // OSD's maximum object size
OPTION(osd_max_object_name_len, OPT_U32, 2048) // max rados object name len
+OPTION(osd_max_object_namespace_len, OPT_U32, 256) // max rados object namespace len
OPTION(osd_max_attr_name_len, OPT_U32, 100) // max rados attr name len; cannot go higher than 100 chars for file system backends
OPTION(osd_max_attr_size, OPT_U64, 0)
@@ -1026,6 +1028,18 @@ OPTION(filestore_max_inline_xattrs_xfs, OPT_U32, 10)
OPTION(filestore_max_inline_xattrs_btrfs, OPT_U32, 10)
OPTION(filestore_max_inline_xattrs_other, OPT_U32, 2)
+// max xattr value size
+OPTION(filestore_max_xattr_value_size, OPT_U32, 0) //Override
+OPTION(filestore_max_xattr_value_size_xfs, OPT_U32, 64<<10)
+OPTION(filestore_max_xattr_value_size_btrfs, OPT_U32, 64<<10)
+// ext4 allows 4k xattrs total including some smallish extra fields and the
+// keys. We're allowing 2 512 inline attrs in addition some some filestore
+// replay attrs. After accounting for those, we still need to fit up to
+// two attrs of this value. That means we need this value to be around 1k
+// to be safe. This is hacky, but it's not worth complicating the code
+// to work around ext4's total xattr limit.
+OPTION(filestore_max_xattr_value_size_other, OPT_U32, 1<<10)
+
OPTION(filestore_sloppy_crc, OPT_BOOL, false) // track sloppy crcs
OPTION(filestore_sloppy_crc_block_size, OPT_INT, 65536)
diff --git a/src/common/fs_types.cc b/src/common/fs_types.cc
index 741e4f1..929f3f2 100644
--- a/src/common/fs_types.cc
+++ b/src/common/fs_types.cc
@@ -46,8 +46,9 @@ void file_layout_t::from_legacy(const ceph_file_layout& fl)
stripe_count = fl.fl_stripe_count;
object_size = fl.fl_object_size;
pool_id = (int32_t)fl.fl_pg_pool;
- // in the legacy encoding, pool 0 was undefined.
- if (pool_id == 0)
+ // in the legacy encoding, a zeroed structure was the default and
+ // would have pool 0 instead of -1.
+ if (pool_id == 0 && stripe_unit == 0 && stripe_count == 0 && object_size == 0)
pool_id = -1;
pool_ns.clear();
}
@@ -108,7 +109,7 @@ void file_layout_t::dump(Formatter *f) const
f->dump_unsigned("stripe_unit", stripe_unit);
f->dump_unsigned("stripe_count", stripe_count);
f->dump_unsigned("object_size", object_size);
- f->dump_unsigned("pool_id", pool_id);
+ f->dump_int("pool_id", pool_id);
f->dump_string("pool_ns", pool_ns);
}
@@ -122,3 +123,12 @@ void file_layout_t::generate_test_instances(list<file_layout_t*>& o)
o.back()->pool_id = 3;
o.back()->pool_ns = "myns";
}
+
+ostream& operator<<(ostream& out, const file_layout_t &layout)
+{
+ JSONFormatter f;
+ layout.dump(&f);
+ f.flush(out);
+ return out;
+}
+
diff --git a/src/crush/CrushWrapper.cc b/src/crush/CrushWrapper.cc
index 5748078..3450b48 100644
--- a/src/crush/CrushWrapper.cc
+++ b/src/crush/CrushWrapper.cc
@@ -353,7 +353,7 @@ int CrushWrapper::remove_item_under(CephContext *cct, int item, int ancestor, bo
if (item < 0 && !unlink_only) {
crush_bucket *t = get_bucket(item);
if (t && t->size) {
- ldout(cct, 1) << "remove_item_undef bucket " << item << " has " << t->size
+ ldout(cct, 1) << "remove_item_under bucket " << item << " has " << t->size
<< " items, not empty" << dendl;
return -ENOTEMPTY;
}
diff --git a/src/include/ceph_fs.h b/src/include/ceph_fs.h
index 593fd5f..fe0a8d5 100644
--- a/src/include/ceph_fs.h
+++ b/src/include/ceph_fs.h
@@ -234,6 +234,11 @@ struct ceph_mon_subscribe_ack {
*/
#define CEPH_MDSMAP_DOWN (1<<0) /* cluster deliberately down */
#define CEPH_MDSMAP_ALLOW_SNAPS (1<<1) /* cluster allowed to create snapshots */
+#define CEPH_MDSMAP_ALLOW_MULTIMDS (1<<2) /* cluster allowed to have >1 active MDS */
+#define CEPH_MDSMAP_ALLOW_DIRFRAGS (1<<3) /* cluster allowed to fragment directories */
+
+#define CEPH_MDSMAP_ALLOW_CLASSICS (CEPH_MDSMAP_ALLOW_SNAPS | CEPH_MDSMAP_ALLOW_MULTIMDS | \
+ CEPH_MDSMAP_ALLOW_DIRFRAGS)
/*
* mds states
diff --git a/src/include/fs_types.h b/src/include/fs_types.h
index 388508b..c9271cc 100644
--- a/src/include/fs_types.h
+++ b/src/include/fs_types.h
@@ -103,4 +103,6 @@ WRITE_CLASS_ENCODER_FEATURES(file_layout_t)
WRITE_EQ_OPERATORS_5(file_layout_t, stripe_unit, stripe_count, object_size, pool_id, pool_ns);
+ostream& operator<<(ostream& out, const file_layout_t &layout);
+
#endif
diff --git a/src/include/rados.h b/src/include/rados.h
index f14d677..c58277f 100644
--- a/src/include/rados.h
+++ b/src/include/rados.h
@@ -446,10 +446,6 @@ enum {
};
enum {
- CEPH_OSD_COPY_GET_FLAG_NOTSUPP_OMAP = 1, /* mean dest pool don't support omap*/
-};
-
-enum {
CEPH_OSD_TMAP2OMAP_NULLOK = 1,
};
@@ -516,7 +512,6 @@ struct ceph_osd_op {
} __attribute__ ((packed)) clonerange;
struct {
__le64 max; /* max data in reply */
- __le32 flags;
} __attribute__ ((packed)) copy_get;
struct {
__le64 snapid;
diff --git a/src/journal/FutureImpl.cc b/src/journal/FutureImpl.cc
index 11eda44..aebfe12 100644
--- a/src/journal/FutureImpl.cc
+++ b/src/journal/FutureImpl.cc
@@ -2,15 +2,14 @@
// vim: ts=8 sw=2 smarttab
#include "journal/FutureImpl.h"
-#include "journal/JournalMetadata.h"
#include "journal/Utils.h"
namespace journal {
-FutureImpl::FutureImpl(JournalMetadataPtr journal_metadata, uint64_t tag_tid,
- uint64_t entry_tid, uint64_t commit_tid)
- : RefCountedObject(NULL, 0), m_journal_metadata(journal_metadata),
- m_tag_tid(tag_tid), m_entry_tid(entry_tid), m_commit_tid(commit_tid),
+FutureImpl::FutureImpl(uint64_t tag_tid, uint64_t entry_tid,
+ uint64_t commit_tid)
+ : RefCountedObject(NULL, 0), m_tag_tid(tag_tid), m_entry_tid(entry_tid),
+ m_commit_tid(commit_tid),
m_lock(utils::unique_lock_name("FutureImpl::m_lock", this)), m_safe(false),
m_consistent(false), m_return_value(0), m_flush_state(FLUSH_STATE_NONE),
m_consistent_ack(this) {
@@ -51,7 +50,7 @@ void FutureImpl::flush(Context *on_safe) {
}
if (complete && on_safe != NULL) {
- m_journal_metadata->queue(on_safe, m_return_value);
+ on_safe->complete(m_return_value);
} else if (flush_handler) {
// attached to journal object -- instruct it to flush all entries through
// this one. possible to become detached while lock is released, so flush
@@ -69,7 +68,8 @@ void FutureImpl::wait(Context *on_safe) {
return;
}
}
- m_journal_metadata->queue(on_safe, m_return_value);
+
+ on_safe->complete(m_return_value);
}
bool FutureImpl::is_complete() const {
diff --git a/src/journal/FutureImpl.h b/src/journal/FutureImpl.h
index 0a9eba5..5c11c4b 100644
--- a/src/journal/FutureImpl.h
+++ b/src/journal/FutureImpl.h
@@ -18,7 +18,6 @@ class Context;
namespace journal {
class FutureImpl;
-class JournalMetadata;
typedef boost::intrusive_ptr<FutureImpl> FutureImplPtr;
class FutureImpl : public RefCountedObject, boost::noncopyable {
@@ -29,11 +28,9 @@ public:
virtual void get() = 0;
virtual void put() = 0;
};
- typedef boost::intrusive_ptr<JournalMetadata> JournalMetadataPtr;
typedef boost::intrusive_ptr<FlushHandler> FlushHandlerPtr;
- FutureImpl(JournalMetadataPtr journal_metadata, uint64_t tag_tid,
- uint64_t entry_tid, uint64_t commit_tid);
+ FutureImpl(uint64_t tag_tid, uint64_t entry_tid, uint64_t commit_tid);
void init(const FutureImplPtr &prev_future);
@@ -96,7 +93,6 @@ private:
virtual void finish(int r) {}
};
- JournalMetadataPtr m_journal_metadata;
uint64_t m_tag_tid;
uint64_t m_entry_tid;
uint64_t m_commit_tid;
diff --git a/src/journal/JournalRecorder.cc b/src/journal/JournalRecorder.cc
index 065f692..b730b26 100644
--- a/src/journal/JournalRecorder.cc
+++ b/src/journal/JournalRecorder.cc
@@ -80,8 +80,7 @@ Future JournalRecorder::append(uint64_t tag_tid,
ObjectRecorderPtr object_ptr = get_object(splay_offset);
uint64_t commit_tid = m_journal_metadata->allocate_commit_tid(
object_ptr->get_object_number(), tag_tid, entry_tid);
- FutureImplPtr future(new FutureImpl(m_journal_metadata, tag_tid, entry_tid,
- commit_tid));
+ FutureImplPtr future(new FutureImpl(tag_tid, entry_tid, commit_tid));
future->init(m_prev_future);
m_prev_future = future;
diff --git a/src/journal/JournalTrimmer.cc b/src/journal/JournalTrimmer.cc
index 68ba5f4..74df78a 100644
--- a/src/journal/JournalTrimmer.cc
+++ b/src/journal/JournalTrimmer.cc
@@ -194,7 +194,8 @@ JournalTrimmer::C_RemoveSet::C_RemoveSet(JournalTrimmer *_journal_trimmer,
void JournalTrimmer::C_RemoveSet::complete(int r) {
lock.Lock();
- if (r < 0 && r != -ENOENT && return_value == -ENOENT) {
+ if (r < 0 && r != -ENOENT &&
+ (return_value == -ENOENT || return_value == 0)) {
return_value = r;
} else if (r == 0 && return_value == -ENOENT) {
return_value = 0;
diff --git a/src/journal/ObjectPlayer.cc b/src/journal/ObjectPlayer.cc
index e890dfa..db49d46 100644
--- a/src/journal/ObjectPlayer.cc
+++ b/src/journal/ObjectPlayer.cc
@@ -70,9 +70,15 @@ void ObjectPlayer::watch(Context *on_fetch, double interval) {
void ObjectPlayer::unwatch() {
ldout(m_cct, 20) << __func__ << ": " << m_oid << " unwatch" << dendl;
Mutex::Locker timer_locker(m_timer_lock);
+
cancel_watch();
- m_watch_ctx = NULL;
+ Context *watch_ctx = nullptr;
+ std::swap(watch_ctx, m_watch_ctx);
+ if (watch_ctx != nullptr) {
+ delete watch_ctx;
+ }
+
while (m_watch_in_progress) {
m_watch_in_progress_cond.Wait(m_timer_lock);
}
@@ -202,18 +208,17 @@ void ObjectPlayer::handle_watch_fetched(int r) {
ldout(m_cct, 10) << __func__ << ": " << m_oid << " poll complete, r=" << r
<< dendl;
- Context *on_finish = NULL;
+ Context *on_finish = nullptr;
{
Mutex::Locker timer_locker(m_timer_lock);
assert(m_watch_in_progress);
if (r == -ENOENT) {
r = 0;
}
- on_finish = m_watch_ctx;
- m_watch_ctx = NULL;
+ std::swap(on_finish, m_watch_ctx);
}
- if (on_finish != NULL) {
+ if (on_finish != nullptr) {
on_finish->complete(r);
}
diff --git a/src/librados/IoCtxImpl.cc b/src/librados/IoCtxImpl.cc
index a1479d1..ce1a220 100644
--- a/src/librados/IoCtxImpl.cc
+++ b/src/librados/IoCtxImpl.cc
@@ -678,7 +678,8 @@ int librados::IoCtxImpl::operate(const object_t& oid, ::ObjectOperation *o,
Context *oncommit = new C_SafeCond(&mylock, &cond, &done, &r);
int op = o->ops[0].op.op;
- ldout(client->cct, 10) << ceph_osd_op_name(op) << " oid=" << oid << " nspace=" << oloc.nspace << dendl;
+ ldout(client->cct, 10) << ceph_osd_op_name(op) << " oid=" << oid
+ << " nspace=" << oloc.nspace << dendl;
Objecter::Op *objecter_op = objecter->prepare_mutate_op(oid, oloc,
*o, snapc, ut, flags,
NULL, oncommit, &ver);
@@ -745,7 +746,7 @@ int librados::IoCtxImpl::aio_operate_read(const object_t &oid,
Objecter::Op *objecter_op = objecter->prepare_read_op(oid, oloc,
*o, snap_seq, pbl, flags,
onack, &c->objver);
- c->tid = objecter->op_submit(objecter_op);
+ objecter->op_submit(objecter_op, &c->tid);
return 0;
}
@@ -764,8 +765,10 @@ int librados::IoCtxImpl::aio_operate(const object_t& oid,
c->io = this;
queue_aio_write(c);
- c->tid = objecter->mutate(oid, oloc, *o, snap_context, ut, flags, onack,
- oncommit, &c->objver);
+ Objecter::Op *op = objecter->prepare_mutate_op(
+ oid, oloc, *o, snap_context, ut, flags, onack,
+ oncommit, &c->objver);
+ objecter->op_submit(op, &c->tid);
return 0;
}
@@ -783,9 +786,11 @@ int librados::IoCtxImpl::aio_read(const object_t oid, AioCompletionImpl *c,
c->io = this;
c->blp = pbl;
- c->tid = objecter->read(oid, oloc,
- off, len, snapid, pbl, 0,
- onack, &c->objver);
+ Objecter::Op *o = objecter->prepare_read_op(
+ oid, oloc,
+ off, len, snapid, pbl, 0,
+ onack, &c->objver);
+ objecter->op_submit(o, &c->tid);
return 0;
}
@@ -804,10 +809,11 @@ int librados::IoCtxImpl::aio_read(const object_t oid, AioCompletionImpl *c,
c->bl.push_back(buffer::create_static(len, buf));
c->blp = &c->bl;
- c->tid = objecter->read(oid, oloc,
- off, len, snapid, &c->bl, 0,
- onack, &c->objver);
-
+ Objecter::Op *o = objecter->prepare_read_op(
+ oid, oloc,
+ off, len, snapid, &c->bl, 0,
+ onack, &c->objver);
+ objecter->op_submit(o, &c->tid);
return 0;
}
@@ -839,9 +845,11 @@ int librados::IoCtxImpl::aio_sparse_read(const object_t oid,
onack->m_ops.sparse_read(off, len, m, data_bl, NULL);
- c->tid = objecter->read(oid, oloc,
- onack->m_ops, snap_seq, NULL, 0,
- onack, &c->objver);
+ Objecter::Op *o = objecter->prepare_read_op(
+ oid, oloc,
+ onack->m_ops, snap_seq, NULL, 0,
+ onack, &c->objver);
+ objecter->op_submit(o, &c->tid);
return 0;
}
@@ -864,9 +872,11 @@ int librados::IoCtxImpl::aio_write(const object_t &oid, AioCompletionImpl *c,
c->io = this;
queue_aio_write(c);
- c->tid = objecter->write(oid, oloc,
- off, len, snapc, bl, ut, 0,
- onack, onsafe, &c->objver);
+ Objecter::Op *o = objecter->prepare_write_op(
+ oid, oloc,
+ off, len, snapc, bl, ut, 0,
+ onack, onsafe, &c->objver);
+ objecter->op_submit(o, &c->tid);
return 0;
}
@@ -888,9 +898,11 @@ int librados::IoCtxImpl::aio_append(const object_t &oid, AioCompletionImpl *c,
c->io = this;
queue_aio_write(c);
- c->tid = objecter->append(oid, oloc,
- len, snapc, bl, ut, 0,
- onack, onsafe, &c->objver);
+ Objecter::Op *o = objecter->prepare_append_op(
+ oid, oloc,
+ len, snapc, bl, ut, 0,
+ onack, onsafe, &c->objver);
+ objecter->op_submit(o, &c->tid);
return 0;
}
@@ -913,9 +925,11 @@ int librados::IoCtxImpl::aio_write_full(const object_t &oid,
c->io = this;
queue_aio_write(c);
- c->tid = objecter->write_full(oid, oloc,
- snapc, bl, ut, 0,
- onack, onsafe, &c->objver);
+ Objecter::Op *o = objecter->prepare_write_full_op(
+ oid, oloc,
+ snapc, bl, ut, 0,
+ onack, onsafe, &c->objver);
+ objecter->op_submit(o, &c->tid);
return 0;
}
@@ -934,9 +948,11 @@ int librados::IoCtxImpl::aio_remove(const object_t &oid, AioCompletionImpl *c)
c->io = this;
queue_aio_write(c);
- c->tid = objecter->remove(oid, oloc,
- snapc, ut, 0,
- onack, onsafe, &c->objver);
+ Objecter::Op *o = objecter->prepare_remove_op(
+ oid, oloc,
+ snapc, ut, 0,
+ onack, onsafe, &c->objver);
+ objecter->op_submit(o, &c->tid);
return 0;
}
@@ -948,9 +964,11 @@ int librados::IoCtxImpl::aio_stat(const object_t& oid, AioCompletionImpl *c,
C_aio_stat_Ack *onack = new C_aio_stat_Ack(c, pmtime);
c->io = this;
- c->tid = objecter->stat(oid, oloc,
- snap_seq, psize, &onack->mtime, 0,
- onack, &c->objver);
+ Objecter::Op *o = objecter->prepare_stat_op(
+ oid, oloc,
+ snap_seq, psize, &onack->mtime, 0,
+ onack, &c->objver);
+ objecter->op_submit(o, &c->tid);
return 0;
}
@@ -961,9 +979,11 @@ int librados::IoCtxImpl::aio_stat2(const object_t& oid, AioCompletionImpl *c,
C_aio_stat2_Ack *onack = new C_aio_stat2_Ack(c, pts);
c->io = this;
- c->tid = objecter->stat(oid, oloc,
- snap_seq, psize, &onack->mtime, 0,
- onack, &c->objver);
+ Objecter::Op *o = objecter->prepare_stat_op(
+ oid, oloc,
+ snap_seq, psize, &onack->mtime, 0,
+ onack, &c->objver);
+ objecter->op_submit(o, &c->tid);
return 0;
}
@@ -984,7 +1004,9 @@ int librados::IoCtxImpl::hit_set_list(uint32_t hash, AioCompletionImpl *c,
::ObjectOperation rd;
rd.hit_set_ls(pls, NULL);
object_locator_t oloc(poolid);
- c->tid = objecter->pg_read(hash, oloc, rd, NULL, 0, onack, NULL, NULL);
+ Objecter::Op *o = objecter->prepare_pg_read_op(
+ hash, oloc, rd, NULL, 0, onack, NULL, NULL);
+ objecter->op_submit(o, &c->tid);
return 0;
}
@@ -999,7 +1021,9 @@ int librados::IoCtxImpl::hit_set_get(uint32_t hash, AioCompletionImpl *c,
::ObjectOperation rd;
rd.hit_set_get(ceph::real_clock::from_time_t(stamp), pbl, 0);
object_locator_t oloc(poolid);
- c->tid = objecter->pg_read(hash, oloc, rd, NULL, 0, onack, NULL, NULL);
+ Objecter::Op *o = objecter->prepare_pg_read_op(
+ hash, oloc, rd, NULL, 0, onack, NULL, NULL);
+ objecter->op_submit(o, &c->tid);
return 0;
}
@@ -1041,8 +1065,10 @@ int librados::IoCtxImpl::get_inconsistent_objects(const pg_t& pg,
::ObjectOperation op;
op.scrub_ls(start_after, max_to_get, objects, interval, nullptr);
object_locator_t oloc{poolid, pg.ps()};
- c->tid = objecter->pg_read(oloc.hash, oloc, op, nullptr, CEPH_OSD_FLAG_PGOP, onack,
- nullptr, nullptr);
+ Objecter::Op *o = objecter->prepare_pg_read_op(
+ oloc.hash, oloc, op, nullptr, CEPH_OSD_FLAG_PGOP, onack,
+ nullptr, nullptr);
+ objecter->op_submit(o, &c->tid);
return 0;
}
@@ -1060,8 +1086,10 @@ int librados::IoCtxImpl::get_inconsistent_snapsets(const pg_t& pg,
::ObjectOperation op;
op.scrub_ls(start_after, max_to_get, snapsets, interval, nullptr);
object_locator_t oloc{poolid, pg.ps()};
- c->tid = objecter->pg_read(oloc.hash, oloc, op, nullptr, CEPH_OSD_FLAG_PGOP, onack,
- nullptr, nullptr);
+ Objecter::Op *o = objecter->prepare_pg_read_op(
+ oloc.hash, oloc, op, nullptr, CEPH_OSD_FLAG_PGOP, onack,
+ nullptr, nullptr);
+ objecter->op_submit(o, &c->tid);
return 0;
}
@@ -1119,8 +1147,9 @@ int librados::IoCtxImpl::aio_exec(const object_t& oid, AioCompletionImpl *c,
::ObjectOperation rd;
prepare_assert_ops(&rd);
rd.call(cls, method, inbl);
- c->tid = objecter->read(oid, oloc, rd, snap_seq, outbl, 0, onack, &c->objver);
-
+ Objecter::Op *o = objecter->prepare_read_op(
+ oid, oloc, rd, snap_seq, outbl, 0, onack, &c->objver);
+ objecter->op_submit(o, &c->tid);
return 0;
}
diff --git a/src/librbd/ImageCtx.cc b/src/librbd/ImageCtx.cc
index 770e871..fa05103 100644
--- a/src/librbd/ImageCtx.cc
+++ b/src/librbd/ImageCtx.cc
@@ -52,7 +52,7 @@ namespace {
class ThreadPoolSingleton : public ThreadPool {
public:
explicit ThreadPoolSingleton(CephContext *cct)
- : ThreadPool(cct, "librbd::thread_pool", "tp_librbd", cct->_conf->rbd_op_threads,
+ : ThreadPool(cct, "librbd::thread_pool", "tp_librbd", 1,
"rbd_op_threads") {
start();
}
diff --git a/src/librbd/Journal.cc b/src/librbd/Journal.cc
index df30cb3..265a869 100644
--- a/src/librbd/Journal.cc
+++ b/src/librbd/Journal.cc
@@ -827,7 +827,8 @@ uint64_t Journal<I>::append_io_event(AioCompletion *aio_comp,
<< "length=" << length << ", "
<< "flush=" << flush_entry << ", tid=" << tid << dendl;
- Context *on_safe = new C_IOEventSafe(this, tid);
+ Context *on_safe = create_async_context_callback(
+ m_image_ctx, new C_IOEventSafe(this, tid));
if (flush_entry) {
future.flush(on_safe);
} else {
@@ -942,8 +943,9 @@ void Journal<I>::commit_op_event(uint64_t op_tid, int r) {
op_finish_future = m_journaler->append(m_tag_tid, bl);
}
- op_finish_future.flush(new C_OpEventSafe(this, op_tid, op_start_future,
- op_finish_future));
+ op_finish_future.flush(create_async_context_callback(
+ m_image_ctx, new C_OpEventSafe(this, op_tid, op_start_future,
+ op_finish_future)));
}
template <typename I>
@@ -971,7 +973,7 @@ void Journal<I>::flush_event(uint64_t tid, Context *on_safe) {
}
if (future.is_valid()) {
- future.flush(NULL);
+ future.flush(nullptr);
}
}
diff --git a/src/librbd/internal.cc b/src/librbd/internal.cc
index 6ace24f..983e622 100644
--- a/src/librbd/internal.cc
+++ b/src/librbd/internal.cc
@@ -312,13 +312,10 @@ int mirror_image_disable_internal(ImageCtx *ictx, bool force) {
return r;
}
- if (!is_primary) {
- if (!force) {
- lderr(cct) << "Mirrored image is not the primary, add force option to"
- " disable mirroring" << dendl;
- return -EINVAL;
- }
- goto remove_mirroring_image;
+ if (!is_primary && !force) {
+ lderr(cct) << "Mirrored image is not the primary, add force option to"
+ " disable mirroring" << dendl;
+ return -EINVAL;
}
mirror_image_internal.state = cls::rbd::MIRROR_IMAGE_STATE_DISABLING;
@@ -329,6 +326,10 @@ int mirror_image_disable_internal(ImageCtx *ictx, bool force) {
return r;
}
+ if (!is_primary) {
+ goto remove_mirroring_image;
+ }
+
r = MirroringWatcher<>::notify_image_updated(
ictx->md_ctx, cls::rbd::MIRROR_IMAGE_STATE_DISABLING,
ictx->id, mirror_image_internal.global_image_id);
@@ -341,6 +342,7 @@ int mirror_image_disable_internal(ImageCtx *ictx, bool force) {
header_oid = ::journal::Journaler::header_oid(ictx->id);
while(true) {
+ clients.clear();
r = cls::journal::client::client_list(ictx->md_ctx, header_oid, &clients);
if (r < 0) {
lderr(cct) << "cannot disable mirroring: " << cpp_strerror(r) << dendl;
diff --git a/src/logrotate.conf b/src/logrotate.conf
index 08ad4b4..061965b 100644
--- a/src/logrotate.conf
+++ b/src/logrotate.conf
@@ -4,7 +4,7 @@
compress
sharedscripts
postrotate
- killall -q -1 ceph-mon ceph-mds ceph-osd radosgw || true
+ killall -q -1 ceph-mon ceph-mds ceph-osd ceph-fuse radosgw || true
endscript
missingok
notifempty
diff --git a/src/mds/Beacon.cc b/src/mds/Beacon.cc
index 16f20ba..9a07b91 100644
--- a/src/mds/Beacon.cc
+++ b/src/mds/Beacon.cc
@@ -443,6 +443,13 @@ void Beacon::notify_health(MDSRank const *mds)
large_completed_requests_metrics.clear();
}
}
+
+ // Report a health warning if we are readonly
+ if (mds->mdcache->is_readonly()) {
+ MDSHealthMetric m(MDS_HEALTH_READ_ONLY, HEALTH_WARN,
+ "MDS in read-only mode");
+ health.metrics.push_back(m);
+ }
}
MDSMap::DaemonState Beacon::get_want_state() const
diff --git a/src/mds/CInode.cc b/src/mds/CInode.cc
index 423be96..718c0bc 100644
--- a/src/mds/CInode.cc
+++ b/src/mds/CInode.cc
@@ -1143,13 +1143,7 @@ void CInode::store_backtrace(MDSInternalContextBase *fin, int op_prio)
auth_pin(this);
- int64_t pool;
- if (is_dir()) {
- pool = mdcache->mds->mdsmap->get_metadata_pool();
- } else {
- pool = inode.layout.pool_id;
- }
-
+ const int64_t pool = get_backtrace_pool();
inode_backtrace_t bt;
build_backtrace(pool, bt);
bufferlist parent_bl;
@@ -1212,8 +1206,10 @@ void CInode::_stored_backtrace(int r, version_t v, Context *fin)
{
if (r < 0) {
dout(1) << "store backtrace error " << r << " v " << v << dendl;
- mdcache->mds->clog->error() << "failed to store backtrace on dir ino "
- << ino() << " object, errno " << r << "\n";
+ mdcache->mds->clog->error() << "failed to store backtrace on ino "
+ << ino() << " object"
+ << ", pool " << get_backtrace_pool()
+ << ", errno " << r << "\n";
mdcache->mds->handle_write_error(r);
return;
}
@@ -1229,13 +1225,7 @@ void CInode::_stored_backtrace(int r, version_t v, Context *fin)
void CInode::fetch_backtrace(Context *fin, bufferlist *backtrace)
{
- int64_t pool;
- if (is_dir())
- pool = mdcache->mds->mdsmap->get_metadata_pool();
- else
- pool = inode.layout.pool_id;
-
- mdcache->fetch_backtrace(inode.ino, pool, *backtrace, fin);
+ mdcache->fetch_backtrace(inode.ino, get_backtrace_pool(), *backtrace, fin);
}
void CInode::_mark_dirty_parent(LogSegment *ls, bool dirty_pool)
@@ -3725,12 +3715,7 @@ void CInode::validate_disk_state(CInode::validated_data *results,
void fetch_backtrace_and_tag(CInode *in, std::string tag,
Context *fin, int *bt_r, bufferlist *bt)
{
- int64_t pool;
- if (in->is_dir())
- pool = in->mdcache->mds->mdsmap->get_metadata_pool();
- else
- pool = in->inode.layout.pool_id;
-
+ const int64_t pool = in->get_backtrace_pool();
object_t oid = CInode::get_object_name(in->ino(), frag_t(), "");
ObjectOperation fetch;
@@ -3788,11 +3773,7 @@ void CInode::validate_disk_state(CInode::validated_data *results,
results->performed_validation = true; // at least, some of it!
results->backtrace.checked = true;
- int64_t pool;
- if (in->is_dir())
- pool = in->mdcache->mds->mdsmap->get_metadata_pool();
- else
- pool = in->inode.layout.pool_id;
+ const int64_t pool = in->get_backtrace_pool();
inode_backtrace_t& memory_backtrace = results->backtrace.memory_value;
in->build_backtrace(pool, memory_backtrace);
bool equivalent, divergent;
@@ -4302,3 +4283,15 @@ void CInode::scrub_finished(MDSInternalContextBase **c) {
clog->info() << "scrub complete with tag '" << scrub_infop->header->tag << "'";
}
}
+
+int64_t CInode::get_backtrace_pool() const
+{
+ if (is_dir()) {
+ return mdcache->mds->mdsmap->get_metadata_pool();
+ } else {
+ // Files are required to have an explicit layout that specifies
+ // a pool
+ assert(inode.layout.pool_id != -1);
+ return inode.layout.pool_id;
+ }
+}
diff --git a/src/mds/CInode.h b/src/mds/CInode.h
index 01f6797..8f27bf0 100644
--- a/src/mds/CInode.h
+++ b/src/mds/CInode.h
@@ -769,6 +769,15 @@ public:
void store_backtrace(MDSInternalContextBase *fin, int op_prio=-1);
void _stored_backtrace(int r, version_t v, Context *fin);
void fetch_backtrace(Context *fin, bufferlist *backtrace);
+protected:
+ /**
+ * Return the pool ID where we currently write backtraces for
+ * this inode (in addition to inode.old_pools)
+ *
+ * @returns a pool ID >=0
+ */
+ int64_t get_backtrace_pool() const;
+public:
void _mark_dirty_parent(LogSegment *ls, bool dirty_pool=false);
void clear_dirty_parent();
void verify_diri_backtrace(bufferlist &bl, int err);
diff --git a/src/mds/FSMap.cc b/src/mds/FSMap.cc
index 29f94dc..73e425d 100644
--- a/src/mds/FSMap.cc
+++ b/src/mds/FSMap.cc
@@ -35,6 +35,11 @@ void FSMap::dump(Formatter *f) const
compat.dump(f);
f->close_section();
+ f->open_object_section("feature flags");
+ f->dump_bool("enable_multiple", enable_multiple);
+ f->dump_bool("ever_enabled_multiple", ever_enabled_multiple);
+ f->close_section();
+
f->open_array_section("standbys");
for (const auto &i : standby_daemons) {
f->open_object_section("info");
@@ -76,8 +81,9 @@ void FSMap::generate_test_instances(list<FSMap*>& ls)
void FSMap::print(ostream& out) const
{
out << "e" << epoch << std::endl;
- out << "enable_multiple: " << enable_multiple << std::endl;
- out << "compat: " << enable_multiple << std::endl;
+ out << "enable_multiple, ever_enabled_multiple: " << enable_multiple << ","
+ << ever_enabled_multiple << std::endl;
+ out << "compat: " << compat << std::endl;
out << " " << std::endl;
if (filesystems.empty()) {
@@ -231,7 +237,7 @@ void FSMap::get_health(list<pair<health_status_t,string> >& summary,
void FSMap::encode(bufferlist& bl, uint64_t features) const
{
if (features & CEPH_FEATURE_SERVER_JEWEL) {
- ENCODE_START(6, 6, bl);
+ ENCODE_START(7, 6, bl);
::encode(epoch, bl);
::encode(next_filesystem_id, bl);
::encode(legacy_client_fscid, bl);
@@ -245,6 +251,7 @@ void FSMap::encode(bufferlist& bl, uint64_t features) const
::encode(mds_roles, bl);
::encode(standby_daemons, bl, features);
::encode(standby_epochs, bl);
+ ::encode(ever_enabled_multiple, bl);
ENCODE_FINISH(bl);
} else {
if (filesystems.empty()) {
@@ -280,7 +287,7 @@ void FSMap::decode(bufferlist::iterator& p)
// MDSMonitor to store an FSMap instead of an MDSMap was
// 5, so anything older than 6 is decoded as an MDSMap,
// and anything newer is decoded as an FSMap.
- DECODE_START_LEGACY_COMPAT_LEN_16(6, 4, 4, p);
+ DECODE_START_LEGACY_COMPAT_LEN_16(7, 4, 4, p);
if (struct_v < 6) {
// Decoding an MDSMap (upgrade)
::decode(epoch, p);
@@ -334,11 +341,28 @@ void FSMap::decode(bufferlist::iterator& p)
if (ev >= 4)
::decode(legacy_mds_map.last_failure_osd_epoch, p);
if (ev >= 6) {
- ::decode(legacy_mds_map.ever_allowed_snaps, p);
- ::decode(legacy_mds_map.explicitly_allowed_snaps, p);
+ if (ev < 10) {
+ // previously this was a bool about snaps, not a flag map
+ bool flag;
+ ::decode(flag, p);
+ legacy_mds_map.ever_allowed_features = flag ?
+ CEPH_MDSMAP_ALLOW_SNAPS : 0;
+ ::decode(flag, p);
+ legacy_mds_map.explicitly_allowed_features = flag ?
+ CEPH_MDSMAP_ALLOW_SNAPS : 0;
+ if (legacy_mds_map.max_mds > 1) {
+ legacy_mds_map.set_multimds_allowed();
+ }
+ } else {
+ ::decode(legacy_mds_map.ever_allowed_features, p);
+ ::decode(legacy_mds_map.explicitly_allowed_features, p);
+ }
} else {
- legacy_mds_map.ever_allowed_snaps = true;
- legacy_mds_map.explicitly_allowed_snaps = false;
+ legacy_mds_map.ever_allowed_features = CEPH_MDSMAP_ALLOW_CLASSICS;
+ legacy_mds_map.explicitly_allowed_features = 0;
+ if (legacy_mds_map.max_mds > 1) {
+ legacy_mds_map.set_multimds_allowed();
+ }
}
if (ev >= 7)
::decode(legacy_mds_map.inline_data_enabled, p);
@@ -416,6 +440,7 @@ void FSMap::decode(bufferlist::iterator& p)
::decode(mds_roles, p);
::decode(standby_daemons, p);
::decode(standby_epochs, p);
+ ::decode(ever_enabled_multiple, p);
}
DECODE_FINISH(p);
@@ -512,7 +537,8 @@ mds_gid_t FSMap::find_standby_for(mds_role_t role, const std::string& name) cons
return result;
}
-mds_gid_t FSMap::find_unused(bool force_standby_active) const {
+mds_gid_t FSMap::find_unused(fs_cluster_id_t fscid,
+ bool force_standby_active) const {
for (const auto &i : standby_daemons) {
const auto &gid = i.first;
const auto &info = i.second;
@@ -521,6 +547,10 @@ mds_gid_t FSMap::find_unused(bool force_standby_active) const {
if (info.laggy() || info.rank >= 0)
continue;
+ if (info.standby_for_fscid != FS_CLUSTER_ID_NONE &&
+ info.standby_for_fscid != fscid)
+ continue;
+
if ((info.standby_for_rank == MDSMap::MDS_NO_STANDBY_PREF ||
info.standby_for_rank == MDSMap::MDS_MATCHED_ACTIVE ||
(info.standby_for_rank == MDSMap::MDS_STANDBY_ANY
@@ -537,7 +567,7 @@ mds_gid_t FSMap::find_replacement_for(mds_role_t role, const std::string& name,
if (standby)
return standby;
else
- return find_unused(force_standby_active);
+ return find_unused(role.fscid, force_standby_active);
}
void FSMap::sanity() const
diff --git a/src/mds/FSMap.h b/src/mds/FSMap.h
index 1f6b069..d14e365 100644
--- a/src/mds/FSMap.h
+++ b/src/mds/FSMap.h
@@ -95,6 +95,7 @@ protected:
fs_cluster_id_t legacy_client_fscid;
CompatSet compat;
bool enable_multiple;
+ bool ever_enabled_multiple; // < the cluster had multiple MDSes enabled once
std::map<fs_cluster_id_t, std::shared_ptr<Filesystem> > filesystems;
@@ -115,7 +116,7 @@ public:
next_filesystem_id(FS_CLUSTER_ID_ANONYMOUS + 1),
legacy_client_fscid(FS_CLUSTER_ID_NONE),
compat(get_mdsmap_compat_set_default()),
- enable_multiple(false)
+ enable_multiple(false), ever_enabled_multiple(false)
{ }
FSMap(const FSMap &rhs)
@@ -125,6 +126,7 @@ public:
legacy_client_fscid(rhs.legacy_client_fscid),
compat(rhs.compat),
enable_multiple(rhs.enable_multiple),
+ ever_enabled_multiple(rhs.ever_enabled_multiple),
mds_roles(rhs.mds_roles),
standby_daemons(rhs.standby_daemons),
standby_epochs(rhs.standby_epochs)
@@ -159,6 +161,9 @@ public:
void set_enable_multiple(const bool v)
{
enable_multiple = v;
+ if (true == v) {
+ ever_enabled_multiple = true;
+ }
}
bool get_enable_multiple() const
@@ -411,7 +416,7 @@ public:
mds_gid_t find_standby_for(mds_role_t mds, const std::string& name) const;
- mds_gid_t find_unused(bool force_standby_active) const;
+ mds_gid_t find_unused(fs_cluster_id_t fscid, bool force_standby_active) const;
mds_gid_t find_replacement_for(mds_role_t mds, const std::string& name,
bool force_standby_active) const;
diff --git a/src/mds/Locker.cc b/src/mds/Locker.cc
index ddd99b2..84e0e48 100644
--- a/src/mds/Locker.cc
+++ b/src/mds/Locker.cc
@@ -2187,6 +2187,7 @@ bool Locker::check_inode_max_size(CInode *in, bool force_wrlock,
utime_t new_mtime)
{
assert(in->is_auth());
+ assert(in->is_file());
inode_t *latest = in->get_projected_inode();
map<client_t, client_writeable_range_t> new_ranges;
diff --git a/src/mds/MDBalancer.cc b/src/mds/MDBalancer.cc
index 4c42568..99845fb 100644
--- a/src/mds/MDBalancer.cc
+++ b/src/mds/MDBalancer.cc
@@ -333,6 +333,7 @@ double MDBalancer::try_match(mds_rank_t ex, double& maxex,
void MDBalancer::queue_split(CDir *dir)
{
+ assert(mds->mdsmap->allows_dirfrags());
split_queue.insert(dir->dirfrag());
}
@@ -984,6 +985,7 @@ void MDBalancer::hit_dir(utime_t now, CDir *dir, int type, int who, double amoun
// split
if (g_conf->mds_bal_split_size > 0 &&
+ mds->mdsmap->allows_dirfrags() &&
(dir->should_split() ||
(v > g_conf->mds_bal_split_rd && type == META_POP_IRD) ||
(v > g_conf->mds_bal_split_wr && type == META_POP_IWR)) &&
diff --git a/src/mds/MDCache.cc b/src/mds/MDCache.cc
index d9e5901..152b47b 100644
--- a/src/mds/MDCache.cc
+++ b/src/mds/MDCache.cc
@@ -6038,6 +6038,11 @@ void MDCache::identify_files_to_recover(vector<CInode*>& recover_q, vector<CInod
CInode *in = p->second;
if (!in->is_auth())
continue;
+
+ // Only normal files need file size recovery
+ if (!in->is_file()) {
+ continue;
+ }
bool recover = false;
for (map<client_t,client_writeable_range_t>::iterator p = in->inode.client_ranges.begin();
diff --git a/src/mds/MDSDaemon.cc b/src/mds/MDSDaemon.cc
index daa0cb0..a950b0b 100644
--- a/src/mds/MDSDaemon.cc
+++ b/src/mds/MDSDaemon.cc
@@ -337,11 +337,19 @@ void MDSDaemon::clean_up_admin_socket()
admin_socket->unregister_command("dump_blocked_ops");
admin_socket->unregister_command("dump_historic_ops");
admin_socket->unregister_command("scrub_path");
+ admin_socket->unregister_command("tag path");
admin_socket->unregister_command("flush_path");
+ admin_socket->unregister_command("export dir");
+ admin_socket->unregister_command("dump cache");
admin_socket->unregister_command("session evict");
+ admin_socket->unregister_command("osdmap barrier");
admin_socket->unregister_command("session ls");
admin_socket->unregister_command("flush journal");
admin_socket->unregister_command("force_readonly");
+ admin_socket->unregister_command("get subtrees");
+ admin_socket->unregister_command("dirfrag split");
+ admin_socket->unregister_command("dirfrag merge");
+ admin_socket->unregister_command("dirfrag ls");
delete asok_hook;
asok_hook = NULL;
}
@@ -474,8 +482,21 @@ int MDSDaemon::init(MDSMap::DaemonState wanted_state)
mds_lock.Unlock();
return r;
}
+
+ int rotating_auth_attempts = 0;
+ const int max_rotating_auth_attempts = 10;
+
while (monc->wait_auth_rotating(30.0) < 0) {
- derr << "unable to obtain rotating service keys; retrying" << dendl;
+ if (++rotating_auth_attempts <= max_rotating_auth_attempts) {
+ derr << "unable to obtain rotating service keys; retrying" << dendl;
+ continue;
+ }
+ derr << "ERROR: failed to refresh rotating keys, "
+ << "maximum retry time reached." << dendl;
+ mds_lock.Lock();
+ suicide();
+ mds_lock.Unlock();
+ return -ETIMEDOUT;
}
objecter->start();
@@ -801,6 +822,7 @@ int MDSDaemon::_handle_command(
if (mds_rank == NULL) {
r = -EINVAL;
ss << "MDS not active";
+ goto out;
}
// FIXME harmonize `session kill` with admin socket session evict
int64_t session_id = 0;
@@ -988,6 +1010,7 @@ void MDSDaemon::handle_mds_map(MMDSMap *m)
// has taken our ID, we don't want to keep restarting and
// fighting them for the ID.
suicide();
+ m->put();
return;
}
}
@@ -1244,6 +1267,7 @@ bool MDSDaemon::handle_core_message(Message *m)
if (mds_rank) {
mds_rank->handle_osd_map();
}
+ m->put();
break;
default:
diff --git a/src/mds/MDSMap.cc b/src/mds/MDSMap.cc
index 9a7c26f..f66fc7a 100644
--- a/src/mds/MDSMap.cc
+++ b/src/mds/MDSMap.cc
@@ -134,6 +134,8 @@ void MDSMap::dump(Formatter *f) const
{
f->dump_int("epoch", epoch);
f->dump_unsigned("flags", flags);
+ f->dump_unsigned("ever_allowed_features", ever_allowed_features);
+ f->dump_unsigned("explicitly_allowed_features", explicitly_allowed_features);
f->dump_stream("created") << created;
f->dump_stream("modified") << modified;
f->dump_int("tableserver", tableserver);
@@ -547,7 +549,7 @@ void MDSMap::encode(bufferlist& bl, uint64_t features) const
::encode(cas_pool, bl);
// kclient ignores everything from here
- __u16 ev = 9;
+ __u16 ev = 10;
::encode(ev, bl);
::encode(compat, bl);
::encode(metadata_pool, bl);
@@ -560,8 +562,8 @@ void MDSMap::encode(bufferlist& bl, uint64_t features) const
::encode(failed, bl);
::encode(stopped, bl);
::encode(last_failure_osd_epoch, bl);
- ::encode(ever_allowed_snaps, bl);
- ::encode(explicitly_allowed_snaps, bl);
+ ::encode(ever_allowed_features, bl);
+ ::encode(explicitly_allowed_features, bl);
::encode(inline_data_enabled, bl);
::encode(enabled, bl);
::encode(fs_name, bl);
@@ -624,11 +626,27 @@ void MDSMap::decode(bufferlist::iterator& p)
if (ev >= 4)
::decode(last_failure_osd_epoch, p);
if (ev >= 6) {
- ::decode(ever_allowed_snaps, p);
- ::decode(explicitly_allowed_snaps, p);
+ if (ev < 10) {
+ // previously this was a bool about snaps, not a flag map
+ bool flag;
+ ::decode(flag, p);
+ ever_allowed_features = flag ? CEPH_MDSMAP_ALLOW_SNAPS : 0;
+ ever_allowed_features |= CEPH_MDSMAP_ALLOW_MULTIMDS|CEPH_MDSMAP_ALLOW_DIRFRAGS;
+ ::decode(flag, p);
+ explicitly_allowed_features = flag ? CEPH_MDSMAP_ALLOW_SNAPS : 0;
+ if (max_mds > 1) {
+ set_multimds_allowed();
+ }
+ } else {
+ ::decode(ever_allowed_features, p);
+ ::decode(explicitly_allowed_features, p);
+ }
} else {
- ever_allowed_snaps = true;
- explicitly_allowed_snaps = false;
+ ever_allowed_features = CEPH_MDSMAP_ALLOW_CLASSICS;
+ explicitly_allowed_features = 0;
+ if (max_mds > 1) {
+ set_multimds_allowed();
+ }
}
if (ev >= 7)
::decode(inline_data_enabled, p);
diff --git a/src/mds/MDSMap.h b/src/mds/MDSMap.h
index 2842f93..bb69a75 100644
--- a/src/mds/MDSMap.h
+++ b/src/mds/MDSMap.h
@@ -209,8 +209,8 @@ protected:
std::map<mds_rank_t, mds_gid_t> up; // who is in those roles
std::map<mds_gid_t, mds_info_t> mds_info;
- bool ever_allowed_snaps; //< the cluster has ever allowed snap creation
- bool explicitly_allowed_snaps; //< the user has explicitly enabled snap creation
+ uint8_t ever_allowed_features; //< bitmap of features the cluster has allowed
+ uint8_t explicitly_allowed_features; //< bitmap of features explicitly enabled
bool inline_data_enabled;
@@ -235,8 +235,8 @@ public:
cas_pool(-1),
metadata_pool(0),
max_mds(0),
- ever_allowed_snaps(false),
- explicitly_allowed_snaps(false),
+ ever_allowed_features(0),
+ explicitly_allowed_features(0),
inline_data_enabled(false),
cached_up_features(0)
{ }
@@ -259,11 +259,27 @@ public:
void set_snaps_allowed() {
set_flag(CEPH_MDSMAP_ALLOW_SNAPS);
- ever_allowed_snaps = true;
- explicitly_allowed_snaps = true;
+ ever_allowed_features |= CEPH_MDSMAP_ALLOW_SNAPS;
+ explicitly_allowed_features |= CEPH_MDSMAP_ALLOW_SNAPS;
}
- bool allows_snaps() { return test_flag(CEPH_MDSMAP_ALLOW_SNAPS); }
void clear_snaps_allowed() { clear_flag(CEPH_MDSMAP_ALLOW_SNAPS); }
+ bool allows_snaps() const { return test_flag(CEPH_MDSMAP_ALLOW_SNAPS); }
+
+ void set_multimds_allowed() {
+ set_flag(CEPH_MDSMAP_ALLOW_MULTIMDS);
+ ever_allowed_features |= CEPH_MDSMAP_ALLOW_MULTIMDS;
+ explicitly_allowed_features |= CEPH_MDSMAP_ALLOW_MULTIMDS;
+ }
+ void clear_multimds_allowed() { clear_flag(CEPH_MDSMAP_ALLOW_MULTIMDS); }
+ bool allows_multimds() const { return test_flag(CEPH_MDSMAP_ALLOW_MULTIMDS); }
+
+ void set_dirfrags_allowed() {
+ set_flag(CEPH_MDSMAP_ALLOW_DIRFRAGS);
+ ever_allowed_features |= CEPH_MDSMAP_ALLOW_DIRFRAGS;
+ explicitly_allowed_features |= CEPH_MDSMAP_ALLOW_DIRFRAGS;
+ }
+ void clear_dirfrags_allowed() { clear_flag(CEPH_MDSMAP_ALLOW_DIRFRAGS); }
+ bool allows_dirfrags() const { return test_flag(CEPH_MDSMAP_ALLOW_DIRFRAGS); }
epoch_t get_epoch() const { return epoch; }
void inc_epoch() { epoch++; }
diff --git a/src/mds/MDSRank.cc b/src/mds/MDSRank.cc
index 71e4925..feb4897 100644
--- a/src/mds/MDSRank.cc
+++ b/src/mds/MDSRank.cc
@@ -502,7 +502,8 @@ bool MDSRank::_dispatch(Message *m, bool new_msg)
if (!dir->get_parent_dir()) continue; // must be linked.
if (!dir->is_auth()) continue; // must be auth.
frag_t fg = dir->get_frag();
- if (fg == frag_t() || (rand() % (1 << fg.bits()) == 0))
+ if (mdsmap->allows_dirfrags() &&
+ (fg == frag_t() || (rand() % (1 << fg.bits()) == 0)))
mdcache->split_dir(dir, 1);
else
balancer->queue_merge(dir);
@@ -2166,6 +2167,11 @@ bool MDSRank::command_dirfrag_split(
cmdmap_t cmdmap,
std::ostream &ss)
{
+ if (!mdsmap->allows_dirfrags()) {
+ ss << "dirfrags are disallowed by the mds map!";
+ return false;
+ }
+
int64_t by = 0;
if (!cmd_getval(g_ceph_context, cmdmap, "bits", by)) {
ss << "missing bits argument";
@@ -2467,25 +2473,27 @@ bool MDSRankDispatcher::handle_command_legacy(std::vector<std::string> args)
dout(20) << "try_eval(" << inum << ", " << mask << ")" << dendl;
} else dout(15) << "inode " << inum << " not in mdcache!" << dendl;
} else if (args[0] == "fragment_dir") {
- if (args.size() == 4) {
- filepath fp(args[1].c_str());
- CInode *in = mdcache->cache_traverse(fp);
- if (in) {
- frag_t fg;
- if (fg.parse(args[2].c_str())) {
- CDir *dir = in->get_dirfrag(fg);
- if (dir) {
- if (dir->is_auth()) {
- int by = atoi(args[3].c_str());
- if (by)
- mdcache->split_dir(dir, by);
- else
- dout(0) << "need to split by >0 bits" << dendl;
- } else dout(0) << "dir " << dir->dirfrag() << " not auth" << dendl;
- } else dout(0) << "dir " << in->ino() << " " << fg << " dne" << dendl;
- } else dout(0) << " frag " << args[2] << " does not parse" << dendl;
- } else dout(0) << "path " << fp << " not found" << dendl;
- } else dout(0) << "bad syntax" << dendl;
+ if (!mdsmap->allows_dirfrags()) {
+ if (args.size() == 4) {
+ filepath fp(args[1].c_str());
+ CInode *in = mdcache->cache_traverse(fp);
+ if (in) {
+ frag_t fg;
+ if (fg.parse(args[2].c_str())) {
+ CDir *dir = in->get_dirfrag(fg);
+ if (dir) {
+ if (dir->is_auth()) {
+ int by = atoi(args[3].c_str());
+ if (by)
+ mdcache->split_dir(dir, by);
+ else
+ dout(0) << "need to split by >0 bits" << dendl;
+ } else dout(0) << "dir " << dir->dirfrag() << " not auth" << dendl;
+ } else dout(0) << "dir " << in->ino() << " " << fg << " dne" << dendl;
+ } else dout(0) << " frag " << args[2] << " does not parse" << dendl;
+ } else dout(0) << "path " << fp << " not found" << dendl;
+ } else dout(0) << "bad syntax" << dendl;
+ } else dout(0) << "dirfrags are disallowed by the mds map!" << dendl;
} else if (args[0] == "merge_dir") {
if (args.size() == 3) {
filepath fp(args[1].c_str());
diff --git a/src/mds/events/ESessions.h b/src/mds/events/ESessions.h
index 35a6ce7..a9a834c 100644
--- a/src/mds/events/ESessions.h
+++ b/src/mds/events/ESessions.h
@@ -28,7 +28,7 @@ public:
map<client_t,entity_inst_t> client_map;
bool old_style_encode;
- ESessions() : LogEvent(EVENT_SESSIONS), old_style_encode(false) { }
+ ESessions() : LogEvent(EVENT_SESSIONS), cmapv(0), old_style_encode(false) { }
ESessions(version_t pv, map<client_t,entity_inst_t>& cm) :
LogEvent(EVENT_SESSIONS),
cmapv(pv),
diff --git a/src/mds/journal.cc b/src/mds/journal.cc
index 64d4d2a..0f27971 100644
--- a/src/mds/journal.cc
+++ b/src/mds/journal.cc
@@ -572,6 +572,25 @@ void EMetaBlob::fullbit::update_inode(MDSRank *mds, CInode *in)
*/
in->oldest_snap = oldest_snap;
in->decode_snap_blob(snapbl);
+
+ /*
+ * In case there was anything malformed in the journal that we are
+ * replaying, do sanity checks on the inodes we're replaying and
+ * go damaged instead of letting any trash into a live cache
+ */
+ if (in->is_file()) {
+ // Files must have valid layouts with a pool set
+ if (in->inode.layout.pool_id == -1 || !in->inode.layout.is_valid()) {
+ dout(0) << "EMetaBlob.replay invalid layout on ino " << *in
+ << ": " << in->inode.layout << dendl;
+ std::ostringstream oss;
+ oss << "Invalid layout for inode 0x" << std::hex << in->inode.ino
+ << std::dec << " in journal";
+ mds->clog->error() << oss.str();
+ mds->damaged();
+ assert(0); // Should be unreachable because damaged() calls respawn()
+ }
+ }
}
// EMetaBlob::remotebit
diff --git a/src/messages/MMDSBeacon.h b/src/messages/MMDSBeacon.h
index a155075..727aaad 100644
--- a/src/messages/MMDSBeacon.h
+++ b/src/messages/MMDSBeacon.h
@@ -37,7 +37,8 @@ enum mds_metric_t {
MDS_HEALTH_CLIENT_LATE_RELEASE_MANY,
MDS_HEALTH_CLIENT_OLDEST_TID,
MDS_HEALTH_CLIENT_OLDEST_TID_MANY,
- MDS_HEALTH_DAMAGE
+ MDS_HEALTH_DAMAGE,
+ MDS_HEALTH_READ_ONLY
};
/**
diff --git a/src/mon/MDSMonitor.cc b/src/mon/MDSMonitor.cc
index c5ed1a2..00f06a0 100644
--- a/src/mon/MDSMonitor.cc
+++ b/src/mon/MDSMonitor.cc
@@ -70,6 +70,11 @@ template<> bool cmd_getval(CephContext *cct, const cmdmap_t& cmdmap,
return cmd_getval(cct, cmdmap, k, (int64_t&)val);
}
+static const string EXPERIMENTAL_WARNING("Warning! This feature is experimental."
+"It may cause problems up to and including data loss."
+"Consult the documentation at ceph.com, and if unsure, do not proceed."
+"Add --yes-i-really-mean-it if you are certain.");
+
static const string MDS_METADATA_PREFIX("mds_metadata");
@@ -1508,6 +1513,9 @@ class FlagSetHandler : public FileSystemCommandHandler
string flag_val;
cmd_getval(g_ceph_context, cmdmap, "val", flag_val);
+ string confirm;
+ cmd_getval(g_ceph_context, cmdmap, "confirm", confirm);
+
if (flag_name == "enable_multiple") {
bool flag_bool = false;
int r = parse_bool(flag_val, &flag_bool, ss);
@@ -1521,7 +1529,9 @@ class FlagSetHandler : public FileSystemCommandHandler
ss << "Multiple-filesystems are forbidden until all mons are updated";
return -EINVAL;
}
-
+ if (confirm != "--yes-i-really-mean-it") {
+ ss << EXPERIMENTAL_WARNING;
+ }
fsmap.set_enable_multiple(flag_bool);
return 0;
} else {
@@ -1740,6 +1750,17 @@ int MDSMonitor::management_command(
// Persist the new FSMap
pending_fsmap.filesystems[new_fs->fscid] = new_fs;
return 0;
+ } else if (prefix == "fs set_default") {
+ string fs_name;
+ cmd_getval(g_ceph_context, cmdmap, "fs_name", fs_name);
+ auto fs = pending_fsmap.get_filesystem(fs_name);
+ if (fs == nullptr) {
+ ss << "filesystem '" << fs_name << "' does not exist";
+ return -ENOENT;
+ }
+
+ pending_fsmap.legacy_client_fscid = fs->fscid;
+ return 0;
} else {
return -ENOSYS;
}
@@ -1812,6 +1833,11 @@ public:
if (interr.length()) {
return -EINVAL;
}
+ if (!fs->mds_map.allows_multimds() && n > fs->mds_map.get_max_mds() &&
+ n > 1) {
+ ss << "multi-MDS clusters are not enabled; set 'allow_multimds' to enable";
+ return -EINVAL;
+ }
if (n > MAX_MDS) {
ss << "may not have more than " << MAX_MDS << " MDS ranks";
return -EINVAL;
@@ -1833,7 +1859,7 @@ public:
string confirm;
if (!cmd_getval(g_ceph_context, cmdmap, "confirm", confirm) ||
confirm != "--yes-i-really-mean-it") {
- ss << "inline data is new and experimental; you must specify --yes-i-really-mean-it";
+ ss << EXPERIMENTAL_WARNING;
return -EPERM;
}
ss << "inline data enabled";
@@ -1892,7 +1918,7 @@ public:
string confirm;
if (!cmd_getval(g_ceph_context, cmdmap, "confirm", confirm) ||
confirm != "--yes-i-really-mean-it") {
- ss << "Snapshots are unstable and will probably break your FS! Set to --yes-i-really-mean-it if you are sure you want to enable them";
+ ss << EXPERIMENTAL_WARNING;
return -EPERM;
}
fsmap.modify_filesystem(
@@ -1903,6 +1929,64 @@ public:
});
ss << "enabled new snapshots";
}
+ } else if (var == "allow_multimds") {
+ bool enable_multimds = false;
+ int r = parse_bool(val, &enable_multimds, ss);
+ if (r != 0) {
+ return r;
+ }
+
+ if (!enable_multimds) {
+ fsmap.modify_filesystem(fs->fscid,
+ [](std::shared_ptr<Filesystem> fs)
+ {
+ fs->mds_map.clear_multimds_allowed();
+ });
+ ss << "disallowed increasing the cluster size past 1";
+ } else {
+ string confirm;
+ if (!cmd_getval(g_ceph_context, cmdmap, "confirm", confirm) ||
+ confirm != "--yes-i-really-mean-it") {
+ ss << EXPERIMENTAL_WARNING;
+ return -EPERM;
+ }
+ fsmap.modify_filesystem(
+ fs->fscid,
+ [](std::shared_ptr<Filesystem> fs)
+ {
+ fs->mds_map.set_multimds_allowed();
+ });
+ ss << "enabled creation of more than 1 active MDS";
+ }
+ } else if (var == "allow_dirfrags") {
+ bool enable_dirfrags = false;
+ int r = parse_bool(val, &enable_dirfrags, ss);
+ if (r != 0) {
+ return r;
+ }
+
+ if (!enable_dirfrags) {
+ fsmap.modify_filesystem(fs->fscid,
+ [](std::shared_ptr<Filesystem> fs)
+ {
+ fs->mds_map.clear_dirfrags_allowed();
+ });
+ ss << "disallowed new directory fragmentation";
+ } else {
+ string confirm;
+ if (!cmd_getval(g_ceph_context, cmdmap, "confirm", confirm) ||
+ confirm != "--yes-i-really-mean-it") {
+ ss << EXPERIMENTAL_WARNING;
+ return -EPERM;
+ }
+ fsmap.modify_filesystem(
+ fs->fscid,
+ [](std::shared_ptr<Filesystem> fs)
+ {
+ fs->mds_map.set_dirfrags_allowed();
+ });
+ ss << "enabled directory fragmentation";
+ }
} else if (var == "cluster_down") {
bool is_down = false;
int r = parse_bool(val, &is_down, ss);
@@ -2329,6 +2413,17 @@ int MDSMonitor::legacy_filesystem_command(
if (!cmd_getval(g_ceph_context, cmdmap, "maxmds", maxmds) || maxmds < 0) {
return -EINVAL;
}
+
+ const MDSMap& mdsmap =
+ pending_fsmap.filesystems.at(pending_fsmap.legacy_client_fscid)->mds_map;
+
+ if (!mdsmap.allows_multimds() &&
+ maxmds > mdsmap.get_max_mds() &&
+ maxmds > 1) {
+ ss << "multi-MDS clusters are not enabled; set 'allow_multimds' to enable";
+ return -EINVAL;
+ }
+
if (maxmds > MAX_MDS) {
ss << "may not have more than " << MAX_MDS << " MDS ranks";
return -EINVAL;
diff --git a/src/mon/MonCommands.h b/src/mon/MonCommands.h
index c7e923f..b3d8e14 100644
--- a/src/mon/MonCommands.h
+++ b/src/mon/MonCommands.h
@@ -329,7 +329,8 @@ COMMAND("mds set_max_mds " \
"name=maxmds,type=CephInt,range=0", \
"set max MDS index", "mds", "rw", "cli,rest")
COMMAND("mds set " \
- "name=var,type=CephChoices,strings=max_mds|max_file_size|allow_new_snaps|inline_data " \
+ "name=var,type=CephChoices,strings=max_mds|max_file_size"
+ "|allow_new_snaps|inline_data|allow_multimds|allow_dirfrags " \
"name=val,type=CephString " \
"name=confirm,type=CephString,req=false", \
"set mds parameter <var> to <val>", "mds", "rw", "cli,rest")
@@ -397,12 +398,13 @@ COMMAND("fs get name=fs_name,type=CephString", \
COMMAND("fs set " \
"name=fs_name,type=CephString " \
"name=var,type=CephChoices,strings=max_mds|max_file_size"
- "|allow_new_snaps|inline_data|cluster_down " \
+ "|allow_new_snaps|inline_data|cluster_down|allow_multimds|allow_dirfrags " \
"name=val,type=CephString " \
"name=confirm,type=CephString,req=false", \
"set mds parameter <var> to <val>", "mds", "rw", "cli,rest")
COMMAND("fs flag set name=flag_name,type=CephChoices,strings=enable_multiple "
- "name=val,type=CephString", \
+ "name=val,type=CephString " \
+ "name=confirm,type=CephChoices,strings=--yes-i-really-mean-it,req=false", \
"Set a global CephFS flag", \
"fs", "rw", "cli,rest")
COMMAND("fs add_data_pool name=fs_name,type=CephString " \
@@ -411,6 +413,9 @@ COMMAND("fs add_data_pool name=fs_name,type=CephString " \
COMMAND("fs rm_data_pool name=fs_name,type=CephString " \
"name=pool,type=CephString", \
"remove data pool <pool>", "mds", "rw", "cli,rest")
+COMMAND("fs set_default name=fs_name,type=CephString", \
+ "set the default to the named filesystem", \
+ "fs", "rw", "cli,rest")
/*
* Monmap commands
diff --git a/src/mon/OSDMonitor.cc b/src/mon/OSDMonitor.cc
index 8114154..f9cf0fd 100644
--- a/src/mon/OSDMonitor.cc
+++ b/src/mon/OSDMonitor.cc
@@ -2564,7 +2564,8 @@ void OSDMonitor::send_incremental(epoch_t first,
}
while (first <= osdmap.get_epoch()) {
- epoch_t last = MIN(first + g_conf->osd_map_message_max, osdmap.get_epoch());
+ epoch_t last = MIN(first + g_conf->osd_map_message_max - 1,
+ osdmap.get_epoch());
MOSDMap *m = build_incremental(first, last);
if (req) {
@@ -2955,6 +2956,16 @@ void OSDMonitor::get_health(list<pair<health_status_t,string> >& summary,
}
}
+ // Not using 'sortbitwise' and should be?
+ if (g_conf->mon_warn_on_no_sortbitwise &&
+ !osdmap.test_flag(CEPH_OSDMAP_SORTBITWISE) &&
+ (osdmap.get_features(CEPH_ENTITY_TYPE_OSD, NULL) &
+ CEPH_FEATURE_OSD_BITWISE_HOBJ_SORT)) {
+ ostringstream ss;
+ ss << "no legacy OSD present but 'sortbitwise' flag is not set";
+ summary.push_back(make_pair(HEALTH_WARN, ss.str()));
+ }
+
// Warn if 'mon_osd_down_out_interval' is set to zero.
// Having this option set to zero on the leader acts much like the
// 'noout' flag. It's hard to figure out what's going wrong with clusters
diff --git a/src/os/ObjectStore.cc b/src/os/ObjectStore.cc
index f319e76..d03ab3b 100644
--- a/src/os/ObjectStore.cc
+++ b/src/os/ObjectStore.cc
@@ -86,6 +86,7 @@ ObjectStore *ObjectStore::create(CephContext *cct,
}
int ObjectStore::probe_block_device_fsid(
+ CephContext *cct,
const string& path,
uuid_d *fsid)
{
@@ -95,14 +96,20 @@ int ObjectStore::probe_block_device_fsid(
// first try bluestore -- it has a crc on its header and will fail
// reliably.
r = BlueStore::get_block_device_fsid(path, fsid);
- if (r == 0)
+ if (r == 0) {
+ lgeneric_dout(cct, 0) << __func__ << " " << path << " is bluestore, "
+ << *fsid << dendl;
return r;
+ }
#endif
// okay, try FileStore (journal).
r = FileStore::get_block_device_fsid(path, fsid);
- if (r == 0)
+ if (r == 0) {
+ lgeneric_dout(cct, 0) << __func__ << " " << path << " is filestore, "
+ << *fsid << dendl;
return r;
+ }
return -EINVAL;
}
diff --git a/src/os/ObjectStore.h b/src/os/ObjectStore.h
index c561d31..93ae4bb 100644
--- a/src/os/ObjectStore.h
+++ b/src/os/ObjectStore.h
@@ -119,8 +119,10 @@ public:
* @param path path to device
* @param fsid [out] osd uuid
*/
- static int probe_block_device_fsid(const string& path,
- uuid_d *fsid);
+ static int probe_block_device_fsid(
+ CephContext *cct,
+ const string& path,
+ uuid_d *fsid);
Logger *logger;
@@ -1926,7 +1928,15 @@ public:
virtual int fsck() {
return -EOPNOTSUPP;
}
- virtual unsigned get_max_object_name_length() = 0;
+
+ /**
+ * Returns 0 if the hobject is valid, -error otherwise
+ *
+ * Errors:
+ * -ENAMETOOLONG: locator/namespace/name too large
+ */
+ virtual int validate_hobject_key(const hobject_t &obj) const = 0;
+
virtual unsigned get_max_attr_name_length() = 0;
virtual int mkfs() = 0; // wipe
virtual int mkjournal() = 0; // journal only
diff --git a/src/os/bluestore/BlueFS.cc b/src/os/bluestore/BlueFS.cc
index 8a9118f..41ae52a 100644
--- a/src/os/bluestore/BlueFS.cc
+++ b/src/os/bluestore/BlueFS.cc
@@ -5,6 +5,7 @@
#include "common/debug.h"
#include "common/errno.h"
+#include "common/perf_counters.h"
#include "BlockDevice.h"
#include "Allocator.h"
#include "StupidAllocator.h"
@@ -15,23 +16,76 @@
#define dout_prefix *_dout << "bluefs "
BlueFS::BlueFS()
- : ino_last(0),
+ : logger(NULL),
+ ino_last(0),
log_seq(0),
- log_writer(NULL)
+ log_writer(NULL),
+ bdev(MAX_BDEV),
+ ioc(MAX_BDEV),
+ block_all(MAX_BDEV),
+ block_total(MAX_BDEV, 0)
{
}
BlueFS::~BlueFS()
{
for (auto p : bdev) {
- p->close();
- delete p;
+ if (p) {
+ p->close();
+ delete p;
+ }
}
for (auto p : ioc) {
delete p;
}
}
+void BlueFS::_init_logger()
+{
+ PerfCountersBuilder b(g_ceph_context, "BlueFS",
+ l_bluefs_first, l_bluefs_last);
+ b.add_u64_counter(l_bluefs_gift_bytes, "gift_bytes", "Bytes gifted from BlueStore");
+ b.add_u64_counter(l_bluefs_reclaim_bytes, "reclaim_bytes", "Bytes reclaimed by BlueStore");
+ b.add_u64(l_bluefs_db_total_bytes, "db_total_bytes", "Total bytes (main db device)");
+ b.add_u64(l_bluefs_db_free_bytes, "db_free_bytes", "Free bytes (main db device)");
+ b.add_u64(l_bluefs_wal_total_bytes, "wal_total_bytes", "Total bytes (wal device)");
+ b.add_u64(l_bluefs_wal_free_bytes, "wal_free_bytes", "Free bytes (wal device)");
+ b.add_u64(l_bluefs_slow_total_bytes, "slow_total_bytes", "Total bytes (slow device)");
+ b.add_u64(l_bluefs_slow_free_bytes, "slow_free_bytes", "Free bytes (slow device)");
+ b.add_u64(l_bluefs_num_files, "num_files", "File count");
+ b.add_u64(l_bluefs_log_bytes, "log_bytes", "Size of the metadata log");
+ b.add_u64_counter(l_bluefs_log_compactions, "log_compactions", "Compactions of the metadata log");
+ b.add_u64_counter(l_bluefs_logged_bytes, "logged_bytes", "Bytes written to the metadata log");
+ logger = b.create_perf_counters();
+ g_ceph_context->get_perfcounters_collection()->add(logger);
+}
+
+void BlueFS::_shutdown_logger()
+{
+ g_ceph_context->get_perfcounters_collection()->remove(logger);
+ delete logger;
+}
+
+void BlueFS::_update_logger_stats()
+{
+ // we must be holding the lock
+ logger->set(l_bluefs_num_files, file_map.size());
+ logger->set(l_bluefs_log_bytes, log_writer->file->fnode.size);
+
+ if (alloc[BDEV_WAL]) {
+ logger->set(l_bluefs_wal_total_bytes, block_total[BDEV_WAL]);
+ logger->set(l_bluefs_wal_free_bytes, alloc[BDEV_WAL]->get_free());
+ }
+ if (alloc[BDEV_DB]) {
+ logger->set(l_bluefs_db_total_bytes, block_total[BDEV_DB]);
+ logger->set(l_bluefs_db_free_bytes, alloc[BDEV_DB]->get_free());
+ }
+ if (alloc[BDEV_SLOW]) {
+ logger->set(l_bluefs_slow_total_bytes, block_total[BDEV_SLOW]);
+ logger->set(l_bluefs_slow_free_bytes, alloc[BDEV_SLOW]->get_free());
+ }
+}
+
/*static void aio_cb(void *priv, void *priv2)
{
BlueFS *fs = static_cast<BlueFS*>(priv);
@@ -42,7 +96,8 @@ BlueFS::~BlueFS()
int BlueFS::add_block_device(unsigned id, string path)
{
dout(10) << __func__ << " bdev " << id << " path " << path << dendl;
- assert(id == bdev.size());
+ assert(id < bdev.size());
+ assert(bdev[id] == NULL);
BlockDevice *b = BlockDevice::create(path, NULL, NULL); //aio_cb, this);
int r = b->open(path);
if (r < 0) {
@@ -51,15 +106,16 @@ int BlueFS::add_block_device(unsigned id, string path)
}
dout(1) << __func__ << " bdev " << id << " path " << path
<< " size " << pretty_si_t(b->get_size()) << "B" << dendl;
- bdev.push_back(b);
- ioc.push_back(new IOContext(NULL));
- block_all.resize(bdev.size());
+ bdev[id] = b;
+ ioc[id] = new IOContext(NULL);
return 0;
}
uint64_t BlueFS::get_block_device_size(unsigned id)
{
- return bdev[id]->get_size();
+ if (bdev[id])
+ return bdev[id]->get_size();
+ return 0;
}
void BlueFS::add_block_extent(unsigned id, uint64_t offset, uint64_t length)
@@ -68,8 +124,10 @@ void BlueFS::add_block_extent(unsigned id, uint64_t offset, uint64_t length)
dout(1) << __func__ << " bdev " << id << " " << offset << "~" << length
<< dendl;
assert(id < bdev.size());
+ assert(bdev[id]);
assert(bdev[id]->get_size() >= offset + length);
block_all[id].insert(offset, length);
+ block_total[id] += length;
if (alloc.size()) {
log_t.op_alloc_add(id, offset, length);
@@ -77,6 +135,9 @@ void BlueFS::add_block_extent(unsigned id, uint64_t offset, uint64_t length)
assert(r == 0);
alloc[id]->init_add_free(offset, length);
}
+
+ if (logger)
+ logger->inc(l_bluefs_gift_bytes, length);
dout(10) << __func__ << " done" << dendl;
}
@@ -86,6 +147,7 @@ int BlueFS::reclaim_blocks(unsigned id, uint64_t want,
std::lock_guard<std::mutex> l(lock);
dout(1) << __func__ << " bdev " << id << " want " << want << dendl;
assert(id < alloc.size());
+ assert(alloc[id]);
int r = alloc[id]->reserve(want);
assert(r == 0); // caller shouldn't ask for more than they can get
@@ -96,10 +158,13 @@ int BlueFS::reclaim_blocks(unsigned id, uint64_t want,
alloc[id]->unreserve(want - *length);
block_all[id].erase(*offset, *length);
+ block_total[id] -= *length;
log_t.op_alloc_rm(id, *offset, *length);
r = _flush_log();
assert(r == 0);
+ if (logger)
+ logger->inc(l_bluefs_reclaim_bytes, *length);
dout(1) << __func__ << " bdev " << id << " want " << want
<< " got " << *offset << "~" << *length << dendl;
return 0;
@@ -109,12 +174,7 @@ uint64_t BlueFS::get_total(unsigned id)
{
std::lock_guard<std::mutex> l(lock);
assert(id < block_all.size());
- uint64_t r = 0;
- interval_set<uint64_t>& p = block_all[id];
- for (interval_set<uint64_t>::iterator q = p.begin(); q != p.end(); ++q) {
- r += q.get_len();
- }
- return r;
+ return block_total[id];
}
uint64_t BlueFS::get_free(unsigned id)
@@ -129,14 +189,14 @@ void BlueFS::get_usage(vector<pair<uint64_t,uint64_t>> *usage)
std::lock_guard<std::mutex> l(lock);
usage->resize(bdev.size());
for (unsigned id = 0; id < bdev.size(); ++id) {
- uint64_t total = 0;
- interval_set<uint64_t>& p = block_all[id];
- for (interval_set<uint64_t>::iterator q = p.begin(); q != p.end(); ++q) {
- total += q.get_len();
+ if (!bdev[id]) {
+ (*usage)[id] = make_pair(0, 0);
+ continue;
}
(*usage)[id].first = alloc[id]->get_free();
- (*usage)[id].second = total;
- uint64_t used = (total - (*usage)[id].first) * 100 / total;
+ (*usage)[id].second = block_total[id];
+ uint64_t used =
+ (block_total[id] - (*usage)[id].first) * 100 / block_total[id];
dout(10) << __func__ << " bdev " << id
<< " free " << (*usage)[id].first
<< " (" << pretty_si_t((*usage)[id].first) << "B)"
@@ -162,12 +222,12 @@ int BlueFS::mkfs(uuid_d osd_uuid)
dout(1) << __func__
<< " osd_uuid " << osd_uuid
<< dendl;
- assert(bdev.size() >= 1);
_init_alloc();
+ _init_logger();
super.version = 1;
- super.block_size = bdev[0]->get_block_size();
+ super.block_size = bdev[BDEV_DB]->get_block_size();
super.osd_uuid = osd_uuid;
super.uuid.generate_random();
dout(1) << __func__ << " uuid " << super.uuid << dendl;
@@ -175,17 +235,20 @@ int BlueFS::mkfs(uuid_d osd_uuid)
// init log
FileRef log_file = new File;
log_file->fnode.ino = 1;
- log_file->fnode.prefer_bdev = bdev.size() - 1;
- int r = _allocate(log_file->fnode.prefer_bdev,
- g_conf->bluefs_max_log_runway,
- &log_file->fnode.extents);
+ log_file->fnode.prefer_bdev = BDEV_WAL;
+ int r = _allocate(
+ log_file->fnode.prefer_bdev,
+ g_conf->bluefs_max_log_runway,
+ &log_file->fnode.extents);
assert(r == 0);
- log_writer = new FileWriter(log_file, bdev.size());
+ log_writer = _create_writer(log_file);
// initial txn
log_t.op_init();
- for (unsigned bdev = 0; bdev < block_all.size(); ++bdev) {
+ for (unsigned bdev = 0; bdev < MAX_BDEV; ++bdev) {
interval_set<uint64_t>& p = block_all[bdev];
+ if (p.empty())
+ continue;
for (interval_set<uint64_t>::iterator q = p.begin(); q != p.end(); ++q) {
dout(20) << __func__ << " op_alloc_add " << bdev << " " << q.get_start()
<< "~" << q.get_len() << dendl;
@@ -204,7 +267,9 @@ int BlueFS::mkfs(uuid_d osd_uuid)
_close_writer(log_writer);
log_writer = NULL;
block_all.clear();
+ block_total.clear();
_stop_alloc();
+ _shutdown_logger();
dout(10) << __func__ << " success" << dendl;
return 0;
@@ -213,8 +278,10 @@ int BlueFS::mkfs(uuid_d osd_uuid)
void BlueFS::_init_alloc()
{
dout(20) << __func__ << dendl;
- alloc.resize(bdev.size());
+ alloc.resize(MAX_BDEV);
for (unsigned id = 0; id < bdev.size(); ++id) {
+ if (!bdev[id])
+ continue;
alloc[id] = new StupidAllocator;
interval_set<uint64_t>& p = block_all[id];
for (interval_set<uint64_t>::iterator q = p.begin(); q != p.end(); ++q) {
@@ -235,7 +302,6 @@ void BlueFS::_stop_alloc()
int BlueFS::mount()
{
dout(1) << __func__ << dendl;
- assert(!bdev.empty());
int r = _open_super();
if (r < 0) {
@@ -244,7 +310,9 @@ int BlueFS::mount()
}
block_all.clear();
- block_all.resize(bdev.size());
+ block_all.resize(MAX_BDEV);
+ block_total.clear();
+ block_total.resize(MAX_BDEV, 0);
_init_alloc();
r = _replay();
@@ -263,10 +331,12 @@ int BlueFS::mount()
}
// set up the log for future writes
- log_writer = new FileWriter(_get_file(1), bdev.size());
+ log_writer = _create_writer(_get_file(1));
assert(log_writer->file->fnode.ino == 1);
log_writer->pos = log_writer->file->fnode.size;
dout(10) << __func__ << " log write pos set to " << log_writer->pos << dendl;
+
+ _init_logger();
return 0;
out:
@@ -283,12 +353,12 @@ void BlueFS::umount()
_close_writer(log_writer);
log_writer = NULL;
- block_all.clear();
_stop_alloc();
file_map.clear();
dir_map.clear();
super = bluefs_super_t();
log_t.clear();
+ _shutdown_logger();
}
int BlueFS::fsck()
@@ -311,8 +381,8 @@ int BlueFS::_write_super()
bl.rebuild();
IOContext ioc(NULL);
- bdev[0]->aio_write(get_super_offset(), bl, &ioc, false);
- bdev[0]->aio_submit(&ioc);
+ bdev[BDEV_DB]->aio_write(get_super_offset(), bl, &ioc, false);
+ bdev[BDEV_DB]->aio_submit(&ioc);
ioc.aio_wait();
dout(20) << __func__ << " v " << super.version << " crc " << crc
<< " offset " << get_super_offset() << dendl;
@@ -328,8 +398,8 @@ int BlueFS::_open_super()
int r;
// always the second block
- r = bdev[0]->read(get_super_offset(), get_super_length(),
- &bl, ioc[0], false);
+ r = bdev[BDEV_DB]->read(get_super_offset(), get_super_length(),
+ &bl, ioc[BDEV_DB], false);
if (r < 0)
return r;
@@ -458,6 +528,7 @@ int BlueFS::_replay()
dout(20) << __func__ << " " << pos << ": op_alloc_add "
<< " " << (int)id << ":" << offset << "~" << length << dendl;
block_all[id].insert(offset, length);
+ block_total[id] += length;
alloc[id]->init_add_free(offset, length);
}
break;
@@ -472,6 +543,7 @@ int BlueFS::_replay()
dout(20) << __func__ << " " << pos << ": op_alloc_rm "
<< " " << (int)id << ":" << offset << "~" << length << dendl;
block_all[id].erase(offset, length);
+ block_total[id] -= length;
alloc[id]->init_rm_free(offset, length);
}
break;
@@ -823,7 +895,7 @@ void BlueFS::_compact_log()
t.uuid = super.uuid;
dout(20) << __func__ << " op_init" << dendl;
t.op_init();
- for (unsigned bdev = 0; bdev < block_all.size(); ++bdev) {
+ for (unsigned bdev = 0; bdev < MAX_BDEV; ++bdev) {
interval_set<uint64_t>& p = block_all[bdev];
for (interval_set<uint64_t>::iterator q = p.begin(); q != p.end(); ++q) {
dout(20) << __func__ << " op_alloc_add " << bdev << " " << q.get_start()
@@ -868,7 +940,7 @@ void BlueFS::_compact_log()
_close_writer(log_writer);
log_file->fnode.size = bl.length();
- log_writer = new FileWriter(log_file, bdev.size());
+ log_writer = _create_writer(log_file);
log_writer->append(bl);
int r = _flush(log_writer, true);
assert(r == 0);
@@ -884,6 +956,8 @@ void BlueFS::_compact_log()
for (auto& r : old_extents) {
alloc[r.bdev]->release(r.offset, r.length);
}
+
+ logger->inc(l_bluefs_log_compactions);
}
void BlueFS::_pad_bl(bufferlist& bl)
@@ -922,6 +996,8 @@ int BlueFS::_flush_log()
_pad_bl(bl);
log_writer->append(bl);
+ logger->inc(l_bluefs_logged_bytes, bl.length());
+
log_t.clear();
log_t.seq = 0; // just so debug output is less confusing
@@ -941,6 +1017,8 @@ int BlueFS::_flush_log()
dirty_files.erase(p++);
}
+ _update_logger_stats();
+
return 0;
}
@@ -1004,7 +1082,9 @@ int BlueFS::_flush_range(FileWriter *h, uint64_t offset, uint64_t length)
length += partial;
dout(20) << __func__ << " waiting for previous aio to complete" << dendl;
for (auto p : h->iocv) {
- p->aio_wait();
+ if (p) {
+ p->aio_wait();
+ }
}
}
if (length == partial + h->buffer.length()) {
@@ -1044,8 +1124,9 @@ int BlueFS::_flush_range(FileWriter *h, uint64_t offset, uint64_t length)
++p;
x_off = 0;
}
- for (unsigned i = 0; i < bdev.size(); ++i) {
- if (h->iocv[i]->has_aios()) {
+ for (unsigned i = 0; i < MAX_BDEV; ++i) {
+ if (bdev[i] && h->iocv[i]->has_aios()) {
+ assert(h->iocv[i]);
bdev[i]->aio_submit(h->iocv[i]);
}
}
@@ -1058,7 +1139,9 @@ void BlueFS::_flush_wait(FileWriter *h)
dout(10) << __func__ << " " << h << dendl;
utime_t start = ceph_clock_now(NULL);
for (auto p : h->iocv) {
- p->aio_wait();
+ if (p) {
+ p->aio_wait();
+ }
}
utime_t end = ceph_clock_now(NULL);
utime_t dur = end - start;
@@ -1135,7 +1218,8 @@ void BlueFS::_flush_bdev()
{
dout(20) << __func__ << dendl;
for (auto p : bdev) {
- p->flush();
+ if (p)
+ p->flush();
}
}
@@ -1145,16 +1229,24 @@ int BlueFS::_allocate(unsigned id, uint64_t len, vector<bluefs_extent_t> *ev)
assert(id < alloc.size());
uint64_t left = ROUND_UP_TO(len, g_conf->bluefs_alloc_size);
- int r = alloc[id]->reserve(left);
+ int r = -ENOSPC;
+ if (alloc[id]) {
+ r = alloc[id]->reserve(left);
+ }
if (r < 0) {
- if (id) {
- derr << __func__ << " failed to allocate " << left << " on bdev " << id
- << ", free " << alloc[id]->get_free()
- << "; fallback to bdev 0" << dendl;
- return _allocate(0, len, ev);
+ if (id != BDEV_SLOW) {
+ if (bdev[id])
+ derr << __func__ << " failed to allocate " << left << " on bdev " << id
+ << ", free " << alloc[id]->get_free()
+ << "; fallback to bdev " << id + 1 << dendl;
+ return _allocate(id + 1, len, ev);
}
- derr << __func__ << " failed to allocate " << left << " on bdev " << id
- << ", free " << alloc[id]->get_free() << dendl;
+ if (bdev[id])
+ derr << __func__ << " failed to allocate " << left << " on bdev " << id
+ << ", free " << alloc[id]->get_free() << dendl;
+ else
+ derr << __func__ << " failed to allocate " << left << " on bdev " << id
+ << ", dne" << dendl;
return r;
}
@@ -1208,11 +1300,15 @@ void BlueFS::sync_metadata()
dout(10) << __func__ << dendl;
utime_t start = ceph_clock_now(NULL);
for (auto p : alloc) {
- p->commit_start();
+ if (p) {
+ p->commit_start();
+ }
}
_flush_log();
for (auto p : alloc) {
- p->commit_finish();
+ if (p) {
+ p->commit_finish();
+ }
}
_maybe_compact_log();
utime_t end = ceph_clock_now(NULL);
@@ -1276,41 +1372,53 @@ int BlueFS::open_for_write(
file->fnode.mtime = ceph_clock_now(NULL);
}
+ file->fnode.prefer_bdev = BlueFS::BDEV_DB;
if (dirname.length() > 5) {
// the "db.slow" and "db.wal" directory names are hard-coded at
// match up with bluestore. the slow device is always the second
// one (when a dedicated block.db device is present and used at
// bdev 0). the wal device is always last.
if (strcmp(dirname.c_str() + dirname.length() - 5, ".slow") == 0) {
- assert(bdev.size() > 1);
- dout(20) << __func__ << " mapping " << dirname << "/" << filename
- << " to bdev 1" << dendl;
- file->fnode.prefer_bdev = 1;
+ file->fnode.prefer_bdev = BlueFS::BDEV_SLOW;
} else if (strcmp(dirname.c_str() + dirname.length() - 4, ".wal") == 0) {
- assert(bdev.size() > 1);
- file->fnode.prefer_bdev = bdev.size() - 1;
- dout(20) << __func__ << " mapping " << dirname << "/" << filename
- << " to bdev " << (int)file->fnode.prefer_bdev << dendl;
+ file->fnode.prefer_bdev = BlueFS::BDEV_WAL;
}
}
+ dout(20) << __func__ << " mapping " << dirname << "/" << filename
+ << " to bdev " << (int)file->fnode.prefer_bdev << dendl;
log_t.op_file_update(file->fnode);
if (create)
log_t.op_dir_link(dirname, filename, file->fnode.ino);
- *h = new FileWriter(file, bdev.size());
+ *h = _create_writer(file);
dout(10) << __func__ << " h " << *h << " on " << file->fnode << dendl;
return 0;
}
+BlueFS::FileWriter *BlueFS::_create_writer(FileRef f)
+{
+ FileWriter *w = new FileWriter(f);
+ for (unsigned i = 0; i < MAX_BDEV; ++i) {
+ if (bdev[i]) {
+ w->iocv[i] = new IOContext(NULL);
+ } else {
+ w->iocv[i] = NULL;
+ }
+ }
+ return w;
+}
+
void BlueFS::_close_writer(FileWriter *h)
{
dout(10) << __func__ << " " << h << dendl;
- for (unsigned i=0; i<bdev.size(); ++i) {
- h->iocv[i]->aio_wait();
- bdev[i]->queue_reap_ioc(h->iocv[i]);
+ for (unsigned i=0; i<MAX_BDEV; ++i) {
+ if (bdev[i]) {
+ assert(h->iocv[i]);
+ h->iocv[i]->aio_wait();
+ bdev[i]->queue_reap_ioc(h->iocv[i]);
+ }
}
- h->iocv.clear();
delete h;
}
diff --git a/src/os/bluestore/BlueFS.h b/src/os/bluestore/BlueFS.h
index b665bb7..0f9ce7e 100644
--- a/src/os/bluestore/BlueFS.h
+++ b/src/os/bluestore/BlueFS.h
@@ -13,10 +13,34 @@
#include "boost/intrusive/list.hpp"
#include <boost/intrusive_ptr.hpp>
+class PerfCounters;
+
class Allocator;
+enum {
+ l_bluefs_first = 732600,
+ l_bluefs_gift_bytes,
+ l_bluefs_reclaim_bytes,
+ l_bluefs_db_total_bytes,
+ l_bluefs_db_free_bytes,
+ l_bluefs_wal_total_bytes,
+ l_bluefs_wal_free_bytes,
+ l_bluefs_slow_total_bytes,
+ l_bluefs_slow_free_bytes,
+ l_bluefs_num_files,
+ l_bluefs_log_bytes,
+ l_bluefs_log_compactions,
+ l_bluefs_logged_bytes,
+ l_bluefs_last,
+};
+
class BlueFS {
public:
+ static constexpr unsigned MAX_BDEV = 3;
+ static constexpr unsigned BDEV_WAL = 0;
+ static constexpr unsigned BDEV_DB = 1;
+ static constexpr unsigned BDEV_SLOW = 2;
+
struct File : public RefCountedObject {
bluefs_fnode_t fnode;
int refs;
@@ -80,22 +104,17 @@ public:
bufferlist tail_block; ///< existing partial block at end of file, if any
std::mutex lock;
- vector<IOContext*> iocv; ///< one for each bdev
+ std::array<IOContext*,MAX_BDEV> iocv; ///< for each bdev
- FileWriter(FileRef f, unsigned num_bdev)
+ FileWriter(FileRef f)
: file(f),
pos(0) {
++file->num_writers;
- iocv.resize(num_bdev);
- for (unsigned i = 0; i < num_bdev; ++i) {
- iocv[i] = new IOContext(NULL);
- }
}
+ // NOTE: caller must call BlueFS::close_writer()
~FileWriter() {
--file->num_writers;
- assert(iocv.empty()); // caller must call BlueFS::close_writer()
}
-
void append(const char *buf, size_t len) {
buffer.append(buf, len);
}
@@ -161,6 +180,8 @@ public:
private:
std::mutex lock;
+ PerfCounters *logger;
+
// cache
map<string, DirRef> dir_map; ///< dirname -> Dir
ceph::unordered_map<uint64_t,FileRef> file_map; ///< ino -> File
@@ -173,25 +194,22 @@ private:
bluefs_transaction_t log_t; ///< pending, unwritten log transaction
/*
- * - there can be from 1 to 3 block devices.
- *
- * - the first device always has the superblock.
- *
- * - if there is a dedicated db device, it is the first device, and the
- * second device is shared with bluestore. the first device will be
- * db/, and the second device will be db.slow/.
+ * There are up to 3 block devices:
*
- * - if there is no dedicated db device, then the first device is shared, and
- * maps to the db/ directory.
- *
- * - a wal device, if present, it always the last device. it should be
- * used for any files in the db.wal/ directory.
+ * BDEV_DB db/ - the primary db device
+ * BDEV_WAL db.wal/ - a small, fast device, specifically for the WAL
+ * BDEV_SLOW db.slow/ - a big, slow device, to spill over to as BDEV_DB fills
*/
vector<BlockDevice*> bdev; ///< block devices we can use
vector<IOContext*> ioc; ///< IOContexts for bdevs
vector<interval_set<uint64_t> > block_all; ///< extents in bdev we own
+ vector<uint64_t> block_total; ///< sum of block_all
vector<Allocator*> alloc; ///< allocators for bdevs
+ void _init_logger();
+ void _shutdown_logger();
+ void _update_logger_stats();
+
void _init_alloc();
void _stop_alloc();
@@ -237,6 +255,7 @@ private:
int _write_super();
int _replay(); ///< replay journal
+ FileWriter *_create_writer(FileRef f);
void _close_writer(FileWriter *h);
// always put the super in the second 4k block. FIXME should this be
diff --git a/src/os/bluestore/BlueStore.cc b/src/os/bluestore/BlueStore.cc
index 6f5710c..451262d 100644
--- a/src/os/bluestore/BlueStore.cc
+++ b/src/os/bluestore/BlueStore.cc
@@ -1145,7 +1145,8 @@ int BlueStore::_open_db(bool create)
} else if (s == "0") {
do_bluefs = false;
} else {
- derr << __func__ << " bluefs = " << s << " : not 0 or 1, aborting" << dendl;
+ derr << __func__ << " bluefs = " << s << " : not 0 or 1, aborting"
+ << dendl;
return -EIO;
}
}
@@ -1162,36 +1163,42 @@ int BlueStore::_open_db(bool create)
char bfn[PATH_MAX];
struct stat st;
- int id = 0;
snprintf(bfn, sizeof(bfn), "%s/block.db", path.c_str());
if (::stat(bfn, &st) == 0) {
- r = bluefs->add_block_device(id, bfn);
+ r = bluefs->add_block_device(BlueFS::BDEV_DB, bfn);
if (r < 0) {
derr << __func__ << " add block device(" << bfn << ") returned: "
<< cpp_strerror(r) << dendl;
goto free_bluefs;
}
- r = _check_or_set_bdev_label(bfn, bluefs->get_block_device_size(id),
+ r = _check_or_set_bdev_label(
+ bfn,
+ bluefs->get_block_device_size(BlueFS::BDEV_DB),
"bluefs db", create);
if (r < 0) {
- derr << __func__ << " check block device(" << bfn << ") label returned: "
+ derr << __func__
+ << " check block device(" << bfn << ") label returned: "
<< cpp_strerror(r) << dendl;
goto free_bluefs;
}
if (create) {
bluefs->add_block_extent(
- id, BLUEFS_START,
- bluefs->get_block_device_size(id) - BLUEFS_START);
+ BlueFS::BDEV_DB,
+ BLUEFS_START,
+ bluefs->get_block_device_size(BlueFS::BDEV_DB) - BLUEFS_START);
}
- ++id;
+ bluefs_shared_bdev = BlueFS::BDEV_SLOW;
+ } else {
+ bluefs_shared_bdev = BlueFS::BDEV_DB;
}
+ // shared device
snprintf(bfn, sizeof(bfn), "%s/block", path.c_str());
- r = bluefs->add_block_device(id, bfn);
+ r = bluefs->add_block_device(bluefs_shared_bdev, bfn);
if (r < 0) {
derr << __func__ << " add block device(" << bfn << ") returned: "
- << cpp_strerror(r) << dendl;
+ << cpp_strerror(r) << dendl;
goto free_bluefs;
}
if (create) {
@@ -1204,21 +1211,23 @@ int BlueStore::_open_db(bool create)
// align to bluefs's alloc_size
initial = ROUND_UP_TO(initial, g_conf->bluefs_alloc_size);
initial += g_conf->bluefs_alloc_size - BLUEFS_START;
- bluefs->add_block_extent(id, BLUEFS_START, initial);
+ bluefs->add_block_extent(bluefs_shared_bdev, BLUEFS_START, initial);
bluefs_extents.insert(BLUEFS_START, initial);
}
- bluefs_shared_bdev = id;
- ++id;
- if (id == 2) {
+
+ // use a short, relative path, if it's bluefs.
+ strcpy(fn, "db");
+
+ if (bluefs_shared_bdev == BlueFS::BDEV_SLOW) {
// we have both block.db and block; tell rocksdb!
// note: the second (last) size value doesn't really matter
char db_paths[PATH_MAX*3];
snprintf(
- db_paths, sizeof(db_paths), "%s/db,%lld %s/db.slow,%lld",
- path.c_str(),
- (unsigned long long)bluefs->get_block_device_size(0) * 95 / 100,
- path.c_str(),
- (unsigned long long)bluefs->get_block_device_size(1) * 95 / 100);
+ db_paths, sizeof(db_paths), "db,%lld db.slow,%lld",
+ (unsigned long long)bluefs->get_block_device_size(BlueFS::BDEV_DB) *
+ 95 / 100,
+ (unsigned long long)bluefs->get_block_device_size(BlueFS::BDEV_SLOW) *
+ 95 / 100);
g_conf->set_val("rocksdb_db_paths", db_paths, false, false);
dout(10) << __func__ << " set rocksdb_db_paths to "
<< g_conf->rocksdb_db_paths << dendl;
@@ -1226,23 +1235,26 @@ int BlueStore::_open_db(bool create)
snprintf(bfn, sizeof(bfn), "%s/block.wal", path.c_str());
if (::stat(bfn, &st) == 0) {
- r = bluefs->add_block_device(id, bfn);
+ r = bluefs->add_block_device(BlueFS::BDEV_WAL, bfn);
if (r < 0) {
derr << __func__ << " add block device(" << bfn << ") returned: "
<< cpp_strerror(r) << dendl;
goto free_bluefs;
}
- r = _check_or_set_bdev_label(bfn, bluefs->get_block_device_size(id),
+ r = _check_or_set_bdev_label(
+ bfn,
+ bluefs->get_block_device_size(BlueFS::BDEV_WAL),
"bluefs wal", create);
if (r < 0) {
- derr << __func__ << " check block device(" << bfn << ") label returned: "
+ derr << __func__ << " check block device(" << bfn << ") label returned: "
<< cpp_strerror(r) << dendl;
goto free_bluefs;
}
if (create) {
bluefs->add_block_extent(
- id, BDEV_LABEL_BLOCK_SIZE,
- bluefs->get_block_device_size(id) - BDEV_LABEL_BLOCK_SIZE);
+ BlueFS::BDEV_WAL, BDEV_LABEL_BLOCK_SIZE,
+ bluefs->get_block_device_size(BlueFS::BDEV_WAL) -
+ BDEV_LABEL_BLOCK_SIZE);
}
g_conf->set_val("rocksdb_separate_wal_dir", "true");
} else {
@@ -1320,7 +1332,8 @@ int BlueStore::_open_db(bool create)
delete bluefs;
bluefs = NULL;
}
- // delete env manually here since we can't depend on db to do this under this case
+ // delete env manually here since we can't depend on db to do this
+ // under this case
delete env;
env = NULL;
return -EIO;
diff --git a/src/os/bluestore/BlueStore.h b/src/os/bluestore/BlueStore.h
index 828dd91..91aec62 100644
--- a/src/os/bluestore/BlueStore.h
+++ b/src/os/bluestore/BlueStore.h
@@ -667,8 +667,8 @@ public:
int fsck() override;
- unsigned get_max_object_name_length() override {
- return 4096;
+ int validate_hobject_key(const hobject_t &obj) const override {
+ return 0;
}
unsigned get_max_attr_name_length() override {
return 256; // arbitrary; there is no real limit internally
diff --git a/src/os/filestore/FileStore.cc b/src/os/filestore/FileStore.cc
index 97b952e..95a4f5f 100644
--- a/src/os/filestore/FileStore.cc
+++ b/src/os/filestore/FileStore.cc
@@ -123,6 +123,12 @@ static CompatSet get_fs_supported_compat_set() {
return compat;
}
+int FileStore::validate_hobject_key(const hobject_t &obj) const
+{
+ unsigned len = LFNIndex::get_max_escaped_name_len(obj);
+ return len > m_filestore_max_xattr_value_size ? -ENAMETOOLONG : 0;
+}
+
int FileStore::get_block_device_fsid(const string& path, uuid_d *fsid)
{
// make sure we don't try to use aio or direct_io (and get annoying
@@ -300,8 +306,9 @@ int FileStore::lfn_open(const coll_t& cid,
<< ") in index: " << cpp_strerror(-r) << dendl;
goto fail;
}
- r = chain_fsetxattr(fd, XATTR_SPILL_OUT_NAME,
- XATTR_NO_SPILL_OUT, sizeof(XATTR_NO_SPILL_OUT), true);
+ r = chain_fsetxattr<true, true>(
+ fd, XATTR_SPILL_OUT_NAME,
+ XATTR_NO_SPILL_OUT, sizeof(XATTR_NO_SPILL_OUT));
if (r < 0) {
VOID_TEMP_FAILURE_RETRY(::close(fd));
derr << "error setting spillout xattr for oid " << oid << " (" << (*path)->path()
@@ -559,7 +566,8 @@ FileStore::FileStore(const std::string &base, const std::string &jdev, osflagbit
m_filestore_max_alloc_hint_size(g_conf->filestore_max_alloc_hint_size),
m_fs_type(0),
m_filestore_max_inline_xattr_size(0),
- m_filestore_max_inline_xattrs(0)
+ m_filestore_max_inline_xattrs(0),
+ m_filestore_max_xattr_value_size(0)
{
m_filestore_kill_at.set(g_conf->filestore_kill_at);
for (int i = 0; i < m_ondisk_finisher_num; ++i) {
@@ -2146,7 +2154,8 @@ void FileStore::_set_global_replay_guard(const coll_t& cid,
// then record that we did it
bufferlist v;
::encode(spos, v);
- int r = chain_fsetxattr(fd, GLOBAL_REPLAY_GUARD_XATTR, v.c_str(), v.length(), true);
+ int r = chain_fsetxattr<true, true>(
+ fd, GLOBAL_REPLAY_GUARD_XATTR, v.c_str(), v.length());
if (r < 0) {
derr << __func__ << ": fsetxattr " << GLOBAL_REPLAY_GUARD_XATTR
<< " got " << cpp_strerror(r) << dendl;
@@ -2236,7 +2245,8 @@ void FileStore::_set_replay_guard(int fd,
bufferlist v(40);
::encode(spos, v);
::encode(in_progress, v);
- int r = chain_fsetxattr(fd, REPLAY_GUARD_XATTR, v.c_str(), v.length(), true);
+ int r = chain_fsetxattr<true, true>(
+ fd, REPLAY_GUARD_XATTR, v.c_str(), v.length());
if (r < 0) {
derr << "fsetxattr " << REPLAY_GUARD_XATTR << " got " << cpp_strerror(r) << dendl;
assert(0 == "fsetxattr failed");
@@ -2279,7 +2289,8 @@ void FileStore::_close_replay_guard(int fd, const SequencerPosition& spos)
::encode(spos, v);
bool in_progress = false;
::encode(in_progress, v);
- int r = chain_fsetxattr(fd, REPLAY_GUARD_XATTR, v.c_str(), v.length(), true);
+ int r = chain_fsetxattr<true, true>(
+ fd, REPLAY_GUARD_XATTR, v.c_str(), v.length());
if (r < 0) {
derr << "fsetxattr " << REPLAY_GUARD_XATTR << " got " << cpp_strerror(r) << dendl;
assert(0 == "fsetxattr failed");
@@ -3391,11 +3402,11 @@ int FileStore::_clone(const coll_t& cid, const ghobject_t& oldoid, const ghobjec
r = chain_fgetxattr(**o, XATTR_SPILL_OUT_NAME, buf, sizeof(buf));
if (r >= 0 && !strncmp(buf, XATTR_NO_SPILL_OUT, sizeof(XATTR_NO_SPILL_OUT))) {
- r = chain_fsetxattr(**n, XATTR_SPILL_OUT_NAME, XATTR_NO_SPILL_OUT,
- sizeof(XATTR_NO_SPILL_OUT), true);
+ r = chain_fsetxattr<true, true>(**n, XATTR_SPILL_OUT_NAME, XATTR_NO_SPILL_OUT,
+ sizeof(XATTR_NO_SPILL_OUT));
} else {
- r = chain_fsetxattr(**n, XATTR_SPILL_OUT_NAME, XATTR_SPILL_OUT,
- sizeof(XATTR_SPILL_OUT), true);
+ r = chain_fsetxattr<true, true>(**n, XATTR_SPILL_OUT_NAME, XATTR_SPILL_OUT,
+ sizeof(XATTR_SPILL_OUT));
}
if (r < 0)
goto out3;
@@ -5663,21 +5674,25 @@ void FileStore::set_xattr_limits_via_conf()
{
uint32_t fs_xattr_size;
uint32_t fs_xattrs;
+ uint32_t fs_xattr_max_value_size;
switch (m_fs_type) {
#if defined(__linux__)
case XFS_SUPER_MAGIC:
fs_xattr_size = g_conf->filestore_max_inline_xattr_size_xfs;
fs_xattrs = g_conf->filestore_max_inline_xattrs_xfs;
+ fs_xattr_max_value_size = g_conf->filestore_max_xattr_value_size_xfs;
break;
case BTRFS_SUPER_MAGIC:
fs_xattr_size = g_conf->filestore_max_inline_xattr_size_btrfs;
fs_xattrs = g_conf->filestore_max_inline_xattrs_btrfs;
+ fs_xattr_max_value_size = g_conf->filestore_max_xattr_value_size_btrfs;
break;
#endif
default:
fs_xattr_size = g_conf->filestore_max_inline_xattr_size_other;
fs_xattrs = g_conf->filestore_max_inline_xattrs_other;
+ fs_xattr_max_value_size = g_conf->filestore_max_xattr_value_size_other;
break;
}
@@ -5692,6 +5707,24 @@ void FileStore::set_xattr_limits_via_conf()
m_filestore_max_inline_xattrs = g_conf->filestore_max_inline_xattrs;
else
m_filestore_max_inline_xattrs = fs_xattrs;
+
+ // Use override value if set
+ if (g_conf->filestore_max_xattr_value_size)
+ m_filestore_max_xattr_value_size = g_conf->filestore_max_xattr_value_size;
+ else
+ m_filestore_max_xattr_value_size = fs_xattr_max_value_size;
+
+ if (m_filestore_max_xattr_value_size < g_conf->osd_max_object_name_len) {
+ derr << "WARNING: max attr value size ("
+ << m_filestore_max_xattr_value_size
+ << ") is smaller than osd_max_object_name_len ("
+ << g_conf->osd_max_object_name_len
+ << "). Your backend filesystem appears to not support attrs large "
+ << "enough to handle the configured max rados name size. You may get "
+ << "unexpected ENAMETOOLONG errors on rados operations or buggy "
+ << "behavior"
+ << dendl;
+ }
}
// -- FSSuperblock --
diff --git a/src/os/filestore/FileStore.h b/src/os/filestore/FileStore.h
index d81f8b0..a5cd75d 100644
--- a/src/os/filestore/FileStore.h
+++ b/src/os/filestore/FileStore.h
@@ -432,10 +432,9 @@ public:
int write_op_seq(int, uint64_t seq);
int mount();
int umount();
- unsigned get_max_object_name_length() {
- // not safe for all file systems, btw! use the tunable to limit this.
- return 4096;
- }
+
+ int validate_hobject_key(const hobject_t &obj) const override;
+
unsigned get_max_attr_name_length() {
// xattr limit is 128; leave room for our prefixes (user.ceph._),
// some margin, and cap at 100
@@ -739,6 +738,7 @@ private:
void set_xattr_limits_via_conf();
uint32_t m_filestore_max_inline_xattr_size;
uint32_t m_filestore_max_inline_xattrs;
+ uint32_t m_filestore_max_xattr_value_size;
FSSuperblock superblock;
diff --git a/src/os/filestore/IndexManager.cc b/src/os/filestore/IndexManager.cc
index 3a3e5c9..078550d 100644
--- a/src/os/filestore/IndexManager.cc
+++ b/src/os/filestore/IndexManager.cc
@@ -36,8 +36,9 @@
static int set_version(const char *path, uint32_t version) {
bufferlist bl;
::encode(version, bl);
- return chain_setxattr(path, "user.cephos.collection_version", bl.c_str(),
- bl.length(), true);
+ return chain_setxattr<true, true>(
+ path, "user.cephos.collection_version", bl.c_str(),
+ bl.length());
}
static int get_version(const char *path, uint32_t *version) {
diff --git a/src/os/filestore/LFNIndex.cc b/src/os/filestore/LFNIndex.cc
index 47436ea..1994d5a 100644
--- a/src/os/filestore/LFNIndex.cc
+++ b/src/os/filestore/LFNIndex.cc
@@ -74,6 +74,14 @@ struct FDCloser {
/* Public methods */
+uint64_t LFNIndex::get_max_escaped_name_len(const hobject_t &obj)
+{
+ ghobject_t ghobj(obj);
+ ghobj.shard_id = shard_id_t(0);
+ ghobj.generation = 0;
+ ghobj.hobj.snap = 0;
+ return lfn_generate_object_name_current(ghobj).size();
+}
int LFNIndex::init()
{
@@ -375,14 +383,18 @@ static int get_hobject_from_oinfo(const char *dir, const char *file,
ghobject_t *o)
{
char path[PATH_MAX];
- bufferptr bp(PATH_MAX);
snprintf(path, sizeof(path), "%s/%s", dir, file);
// Hack, user.ceph._ is the attribute used to store the object info
- int r = chain_getxattr(path, "user.ceph._", bp.c_str(), bp.length());
+ bufferptr bp;
+ int r = chain_getxattr_buf(
+ path,
+ "user.ceph._",
+ &bp);
if (r < 0)
return r;
bufferlist bl;
- bl.push_back(bp);
+ if (r > 0)
+ bl.push_back(bp);
object_info_t oi(bl);
*o = ghobject_t(oi.soid);
return 0;
@@ -421,10 +433,11 @@ int LFNIndex::list_objects(const vector<string> &to_list, int max_objs,
ghobject_t obj;
if (lfn_is_object(short_name)) {
r = lfn_translate(to_list, short_name, &obj);
- if (r < 0) {
- r = -errno;
+ if (r == -EINVAL) {
+ continue;
+ } else if (r < 0) {
goto cleanup;
- } else if (r > 0) {
+ } else {
string long_name = lfn_generate_object_name(obj);
if (!lfn_must_hash(long_name)) {
assert(long_name == short_name);
@@ -434,8 +447,6 @@ int LFNIndex::list_objects(const vector<string> &to_list, int max_objs,
out->insert(pair<string, ghobject_t>(short_name, obj));
++listed;
- } else {
- continue;
}
}
}
@@ -521,9 +532,10 @@ int LFNIndex::add_attr_path(const vector<string> &path,
{
string full_path = get_full_path_subdir(path);
maybe_inject_failure();
- return chain_setxattr(full_path.c_str(), mangle_attr_name(attr_name).c_str(),
- reinterpret_cast<void *>(attr_value.c_str()),
- attr_value.length());
+ return chain_setxattr<false, true>(
+ full_path.c_str(), mangle_attr_name(attr_name).c_str(),
+ reinterpret_cast<void *>(attr_value.c_str()),
+ attr_value.length());
}
int LFNIndex::get_attr_path(const vector<string> &path,
@@ -531,26 +543,14 @@ int LFNIndex::get_attr_path(const vector<string> &path,
bufferlist &attr_value)
{
string full_path = get_full_path_subdir(path);
- size_t size = 1024; // Initial
- while (1) {
- bufferptr buf(size);
- int r = chain_getxattr(full_path.c_str(), mangle_attr_name(attr_name).c_str(),
- reinterpret_cast<void *>(buf.c_str()),
- size);
- if (r > 0) {
- buf.set_length(r);
- attr_value.push_back(buf);
- break;
- } else {
- r = -errno;
- if (r == -ERANGE) {
- size *= 2;
- } else {
- return r;
- }
- }
- }
- return 0;
+ bufferptr bp;
+ int r = chain_getxattr_buf(
+ full_path.c_str(),
+ mangle_attr_name(attr_name).c_str(),
+ &bp);
+ if (r > 0)
+ attr_value.push_back(bp);
+ return r;
}
int LFNIndex::remove_attr_path(const vector<string> &path,
@@ -621,13 +621,8 @@ static void append_escaped(string::const_iterator begin,
}
}
-string LFNIndex::lfn_generate_object_name(const ghobject_t &oid)
+string LFNIndex::lfn_generate_object_name_current(const ghobject_t &oid)
{
- if (index_version == HASH_INDEX_TAG)
- return lfn_generate_object_name_keyless(oid);
- if (index_version == HASH_INDEX_TAG_2)
- return lfn_generate_object_name_poolless(oid);
-
string full_name;
string::const_iterator i = oid.hobj.oid.name.begin();
if (oid.hobj.oid.name.substr(0, 4) == "DIR_") {
@@ -754,12 +749,14 @@ int LFNIndex::lfn_get_name(const vector<string> &path,
int i = 0;
string candidate;
string candidate_path;
- char buf[FILENAME_MAX_LEN + 1];
for ( ; ; ++i) {
candidate = lfn_get_short_name(oid, i);
candidate_path = get_full_path(path, candidate);
- r = chain_getxattr(candidate_path.c_str(), get_lfn_attr().c_str(),
- buf, sizeof(buf));
+ bufferptr bp;
+ r = chain_getxattr_buf(
+ candidate_path.c_str(),
+ get_lfn_attr().c_str(),
+ &bp);
if (r < 0) {
if (errno != ENODATA && errno != ENOENT)
return -errno;
@@ -780,8 +777,8 @@ int LFNIndex::lfn_get_name(const vector<string> &path,
return 0;
}
assert(r > 0);
- buf[MIN((int)sizeof(buf) - 1, r)] = '\0';
- if (!strcmp(buf, full_name.c_str())) {
+ string lfn(bp.c_str(), bp.length());
+ if (lfn == full_name) {
if (mangled_name)
*mangled_name = candidate;
if (out_path)
@@ -793,8 +790,11 @@ int LFNIndex::lfn_get_name(const vector<string> &path,
}
return 0;
}
- r = chain_getxattr(candidate_path.c_str(), get_alt_lfn_attr().c_str(),
- buf, sizeof(buf));
+ bp = bufferptr();
+ r = chain_getxattr_buf(
+ candidate_path.c_str(),
+ get_alt_lfn_attr().c_str(),
+ &bp);
if (r > 0) {
// only consider alt name if nlink > 1
struct stat st;
@@ -805,7 +805,7 @@ int LFNIndex::lfn_get_name(const vector<string> &path,
// left over from incomplete unlink, remove
maybe_inject_failure();
dout(20) << __func__ << " found extra alt attr for " << candidate_path
- << ", long name " << string(buf, r) << dendl;
+ << ", long name " << string(bp.c_str(), bp.length()) << dendl;
rc = chain_removexattr(candidate_path.c_str(),
get_alt_lfn_attr().c_str());
maybe_inject_failure();
@@ -813,8 +813,8 @@ int LFNIndex::lfn_get_name(const vector<string> &path,
return rc;
continue;
}
- buf[MIN((int)sizeof(buf) - 1, r)] = '\0';
- if (!strcmp(buf, full_name.c_str())) {
+ string lfn(bp.c_str(), bp.length());
+ if (lfn == full_name) {
dout(20) << __func__ << " used alt attr for " << full_name << dendl;
if (mangled_name)
*mangled_name = candidate;
@@ -841,23 +841,29 @@ int LFNIndex::lfn_created(const vector<string> &path,
maybe_inject_failure();
// if the main attr exists and is different, move it to the alt attr.
- char buf[FILENAME_MAX_LEN + 1];
- int r = chain_getxattr(full_path.c_str(), get_lfn_attr().c_str(),
- buf, sizeof(buf));
- if (r >= 0 && (r != (int)full_name.length() ||
- memcmp(buf, full_name.c_str(), full_name.length()))) {
- dout(20) << __func__ << " " << mangled_name
- << " moving old name to alt attr "
- << string(buf, r)
- << ", new name is " << full_name << dendl;
- r = chain_setxattr(full_path.c_str(), get_alt_lfn_attr().c_str(),
- buf, r);
- if (r < 0)
- return r;
+ bufferptr bp;
+ int r = chain_getxattr_buf(
+ full_path.c_str(),
+ get_lfn_attr().c_str(),
+ &bp);
+ if (r > 0) {
+ string lfn(bp.c_str(), bp.length());
+ if (lfn != full_name) {
+ dout(20) << __func__ << " " << mangled_name
+ << " moving old name to alt attr "
+ << lfn
+ << ", new name is " << full_name << dendl;
+ r = chain_setxattr<false, true>(
+ full_path.c_str(), get_alt_lfn_attr().c_str(),
+ bp.c_str(), bp.length());
+ if (r < 0)
+ return r;
+ }
}
- return chain_setxattr(full_path.c_str(), get_lfn_attr().c_str(),
- full_name.c_str(), full_name.size());
+ return chain_setxattr<false, true>(
+ full_path.c_str(), get_lfn_attr().c_str(),
+ full_name.c_str(), full_name.size());
}
int LFNIndex::lfn_unlink(const vector<string> &path,
@@ -936,31 +942,32 @@ int LFNIndex::lfn_translate(const vector<string> &path,
return lfn_parse_object_name(short_name, out);
}
string full_path = get_full_path(path, short_name);
- char attr[PATH_MAX];
// First, check alt attr
- int r = chain_getxattr(
+ bufferptr bp;
+ int r = chain_getxattr_buf(
full_path.c_str(),
get_alt_lfn_attr().c_str(),
- attr,
- sizeof(attr) - 1);
- if (r >= 0) {
+ &bp);
+ if (r > 0) {
// There is an alt attr, does it match?
- if (r < (int)sizeof(attr))
- attr[r] = '\0';
- if (short_name_matches(short_name.c_str(), attr)) {
- string long_name(attr);
- return lfn_parse_object_name(long_name, out);
+ string lfn(bp.c_str(), bp.length());
+ if (short_name_matches(short_name.c_str(), lfn.c_str())) {
+ return lfn_parse_object_name(lfn, out);
}
}
// Get lfn_attr
- r = chain_getxattr(full_path.c_str(), get_lfn_attr().c_str(), attr, sizeof(attr) - 1);
+ bp = bufferptr();
+ r = chain_getxattr_buf(
+ full_path.c_str(),
+ get_lfn_attr().c_str(),
+ &bp);
if (r < 0)
- return -errno;
- if (r < (int)sizeof(attr))
- attr[r] = '\0';
+ return r;
+ if (r == 0)
+ return -EINVAL;
- string long_name(attr);
+ string long_name(bp.c_str(), bp.length());
return lfn_parse_object_name(long_name, out);
}
@@ -1032,7 +1039,7 @@ static int parse_object(const char *s, ghobject_t& o)
return 0;
}
-bool LFNIndex::lfn_parse_object_name_keyless(const string &long_name, ghobject_t *out)
+int LFNIndex::lfn_parse_object_name_keyless(const string &long_name, ghobject_t *out)
{
bool r = parse_object(long_name.c_str(), *out);
int64_t pool = -1;
@@ -1042,7 +1049,7 @@ bool LFNIndex::lfn_parse_object_name_keyless(const string &long_name, ghobject_t
out->hobj.pool = pool;
if (!r) return r;
string temp = lfn_generate_object_name(*out);
- return r;
+ return r ? 0 : -EINVAL;
}
static bool append_unescaped(string::const_iterator begin,
@@ -1069,8 +1076,8 @@ static bool append_unescaped(string::const_iterator begin,
return true;
}
-bool LFNIndex::lfn_parse_object_name_poolless(const string &long_name,
- ghobject_t *out)
+int LFNIndex::lfn_parse_object_name_poolless(const string &long_name,
+ ghobject_t *out)
{
string name;
string key;
@@ -1081,7 +1088,7 @@ bool LFNIndex::lfn_parse_object_name_poolless(const string &long_name,
if (*current == '\\') {
++current;
if (current == long_name.end()) {
- return false;
+ return -EINVAL;
} else if (*current == 'd') {
name.append("DIR_");
++current;
@@ -1096,27 +1103,27 @@ bool LFNIndex::lfn_parse_object_name_poolless(const string &long_name,
string::const_iterator end = current;
for ( ; end != long_name.end() && *end != '_'; ++end) ;
if (end == long_name.end())
- return false;
+ return -EINVAL;
if (!append_unescaped(current, end, &name))
- return false;
+ return -EINVAL;
current = ++end;
for ( ; end != long_name.end() && *end != '_'; ++end) ;
if (end == long_name.end())
- return false;
+ return -EINVAL;
if (!append_unescaped(current, end, &key))
- return false;
+ return -EINVAL;
current = ++end;
for ( ; end != long_name.end() && *end != '_'; ++end) ;
if (end == long_name.end())
- return false;
+ return -EINVAL;
string snap_str(current, end);
current = ++end;
for ( ; end != long_name.end() && *end != '_'; ++end) ;
if (end != long_name.end())
- return false;
+ return -EINVAL;
string hash_str(current, end);
if (snap_str == "head")
@@ -1133,11 +1140,11 @@ bool LFNIndex::lfn_parse_object_name_poolless(const string &long_name,
if (coll().is_pg_prefix(&pg))
pool = (int64_t)pg.pgid.pool();
(*out) = ghobject_t(hobject_t(name, key, snap, hash, pool, ""));
- return true;
+ return 0;
}
-bool LFNIndex::lfn_parse_object_name(const string &long_name, ghobject_t *out)
+int LFNIndex::lfn_parse_object_name(const string &long_name, ghobject_t *out)
{
string name;
string key;
@@ -1157,7 +1164,7 @@ bool LFNIndex::lfn_parse_object_name(const string &long_name, ghobject_t *out)
if (*current == '\\') {
++current;
if (current == long_name.end()) {
- return false;
+ return -EINVAL;
} else if (*current == 'd') {
name.append("DIR_");
++current;
@@ -1172,35 +1179,35 @@ bool LFNIndex::lfn_parse_object_name(const string &long_name, ghobject_t *out)
string::const_iterator end = current;
for ( ; end != long_name.end() && *end != '_'; ++end) ;
if (end == long_name.end())
- return false;
+ return -EINVAL;
if (!append_unescaped(current, end, &name))
- return false;
+ return -EINVAL;
current = ++end;
for ( ; end != long_name.end() && *end != '_'; ++end) ;
if (end == long_name.end())
- return false;
+ return -EINVAL;
if (!append_unescaped(current, end, &key))
- return false;
+ return -EINVAL;
current = ++end;
for ( ; end != long_name.end() && *end != '_'; ++end) ;
if (end == long_name.end())
- return false;
+ return -EINVAL;
string snap_str(current, end);
current = ++end;
for ( ; end != long_name.end() && *end != '_'; ++end) ;
if (end == long_name.end())
- return false;
+ return -EINVAL;
string hash_str(current, end);
current = ++end;
for ( ; end != long_name.end() && *end != '_'; ++end) ;
if (end == long_name.end())
- return false;
+ return -EINVAL;
if (!append_unescaped(current, end, &ns))
- return false;
+ return -EINVAL;
current = ++end;
for ( ; end != long_name.end() && *end != '_'; ++end) ;
@@ -1212,7 +1219,7 @@ bool LFNIndex::lfn_parse_object_name(const string &long_name, ghobject_t *out)
current = ++end;
for ( ; end != long_name.end() && *end != '_'; ++end) ;
if (end == long_name.end())
- return false;
+ return -EINVAL;
genstring = string(current, end);
generation = (gen_t)strtoull(genstring.c_str(), NULL, 16);
@@ -1220,7 +1227,7 @@ bool LFNIndex::lfn_parse_object_name(const string &long_name, ghobject_t *out)
current = ++end;
for ( ; end != long_name.end() && *end != '_'; ++end) ;
if (end != long_name.end())
- return false;
+ return -EINVAL;
shardstring = string(current, end);
shard_id = (shard_id_t)strtoul(shardstring.c_str(), NULL, 16);
@@ -1240,7 +1247,7 @@ bool LFNIndex::lfn_parse_object_name(const string &long_name, ghobject_t *out)
pool = strtoull(pstring.c_str(), NULL, 16);
(*out) = ghobject_t(hobject_t(name, key, snap, hash, (int64_t)pool, ns), generation, shard_id);
- return true;
+ return 0;
}
bool LFNIndex::lfn_is_hashed_filename(const string &name)
diff --git a/src/os/filestore/LFNIndex.h b/src/os/filestore/LFNIndex.h
index 1cf4f0b..4efc313 100644
--- a/src/os/filestore/LFNIndex.h
+++ b/src/os/filestore/LFNIndex.h
@@ -212,6 +212,11 @@ public:
);
}
+ /**
+ * Returns the length of the longest escaped name which could result
+ * from any clone, shard, or rollback object of this object
+ */
+ static uint64_t get_max_escaped_name_len(const hobject_t &obj);
protected:
virtual int _init() = 0;
@@ -480,24 +485,36 @@ private:
); ///< @return Generated object name.
/// Generate object name
- string lfn_generate_object_name(
+ static string lfn_generate_object_name_current(
const ghobject_t &oid ///< [in] Object for which to generate.
); ///< @return Generated object name.
+ /// Generate object name
+ string lfn_generate_object_name(
+ const ghobject_t &oid ///< [in] Object for which to generate.
+ ) {
+ if (index_version == HASH_INDEX_TAG)
+ return lfn_generate_object_name_keyless(oid);
+ if (index_version == HASH_INDEX_TAG_2)
+ return lfn_generate_object_name_poolless(oid);
+ else
+ return lfn_generate_object_name_current(oid);
+ } ///< @return Generated object name.
+
/// Parse object name
- bool lfn_parse_object_name_keyless(
+ int lfn_parse_object_name_keyless(
const string &long_name, ///< [in] Name to parse
ghobject_t *out ///< [out] Resulting Object
); ///< @return True if successfull, False otherwise.
/// Parse object name
- bool lfn_parse_object_name_poolless(
+ int lfn_parse_object_name_poolless(
const string &long_name, ///< [in] Name to parse
ghobject_t *out ///< [out] Resulting Object
); ///< @return True if successfull, False otherwise.
/// Parse object name
- bool lfn_parse_object_name(
+ int lfn_parse_object_name(
const string &long_name, ///< [in] Name to parse
ghobject_t *out ///< [out] Resulting Object
); ///< @return True if successfull, False otherwise.
diff --git a/src/os/filestore/chain_xattr.cc b/src/os/filestore/chain_xattr.cc
index 28bb87b..0461c19 100644
--- a/src/os/filestore/chain_xattr.cc
+++ b/src/os/filestore/chain_xattr.cc
@@ -37,7 +37,7 @@
* where <id> marks the num of xattr in the chain.
*/
-static void get_raw_xattr_name(const char *name, int i, char *raw_name, int raw_len)
+void get_raw_xattr_name(const char *name, int i, char *raw_name, int raw_len)
{
int pos = 0;
@@ -135,7 +135,7 @@ int chain_getxattr(const char *fn, const char *name, void *val, size_t size)
return getxattr_len(fn, name);
do {
- chunk_size = (size < CHAIN_XATTR_MAX_BLOCK_LEN ? size : CHAIN_XATTR_MAX_BLOCK_LEN);
+ chunk_size = size;
get_raw_xattr_name(name, i, raw_name, sizeof(raw_name));
r = sys_getxattr(fn, raw_name, (char *)val + pos, chunk_size);
@@ -173,6 +173,35 @@ int chain_getxattr(const char *fn, const char *name, void *val, size_t size)
return ret;
}
+int chain_getxattr_buf(const char *fn, const char *name, bufferptr *bp)
+{
+ size_t size = 1024; // Initial
+ while (1) {
+ bufferptr buf(size);
+ int r = chain_getxattr(
+ fn,
+ name,
+ buf.c_str(),
+ size);
+ if (r > 0) {
+ buf.set_length(r);
+ if (bp)
+ bp->swap(buf);
+ return r;
+ } else if (r == 0) {
+ return 0;
+ } else {
+ if (r == -ERANGE) {
+ size *= 2;
+ } else {
+ return r;
+ }
+ }
+ }
+ assert(0 == "unreachable");
+ return 0;
+}
+
static int chain_fgetxattr_len(int fd, const char *name)
{
int i = 0, total = 0;
@@ -206,7 +235,7 @@ int chain_fgetxattr(int fd, const char *name, void *val, size_t size)
return chain_fgetxattr_len(fd, name);
do {
- chunk_size = (size < CHAIN_XATTR_MAX_BLOCK_LEN ? size : CHAIN_XATTR_MAX_BLOCK_LEN);
+ chunk_size = size;
get_raw_xattr_name(name, i, raw_name, sizeof(raw_name));
r = sys_fgetxattr(fd, raw_name, (char *)val + pos, chunk_size);
@@ -247,7 +276,7 @@ int chain_fgetxattr(int fd, const char *name, void *val, size_t size)
// setxattr
-static int get_xattr_block_size(size_t size)
+int get_xattr_block_size(size_t size)
{
if (size <= CHAIN_XATTR_SHORT_LEN_THRESHOLD)
// this may fit in the inode; stripe over short attrs so that XFS
@@ -256,79 +285,6 @@ static int get_xattr_block_size(size_t size)
return CHAIN_XATTR_MAX_BLOCK_LEN;
}
-int chain_setxattr(const char *fn, const char *name, const void *val, size_t size, bool onechunk)
-{
- int i = 0, pos = 0;
- char raw_name[CHAIN_XATTR_MAX_NAME_LEN * 2 + 16];
- int ret = 0;
- size_t max_chunk_size = get_xattr_block_size(size);
-
- do {
- size_t chunk_size = (size < max_chunk_size ? size : max_chunk_size);
- get_raw_xattr_name(name, i, raw_name, sizeof(raw_name));
- size -= chunk_size;
-
- int r = sys_setxattr(fn, raw_name, (char *)val + pos, chunk_size);
- if (r < 0) {
- ret = r;
- break;
- }
- pos += chunk_size;
- ret = pos;
- i++;
- } while (size);
-
- if (ret >= 0 && !onechunk) {
- int r;
- do {
- get_raw_xattr_name(name, i, raw_name, sizeof(raw_name));
- r = sys_removexattr(fn, raw_name);
- if (r < 0 && r != -ENODATA)
- ret = r;
- i++;
- } while (r != -ENODATA);
- }
-
- return ret;
-}
-
-int chain_fsetxattr(int fd, const char *name, const void *val, size_t size, bool onechunk)
-{
- int i = 0, pos = 0;
- char raw_name[CHAIN_XATTR_MAX_NAME_LEN * 2 + 16];
- int ret = 0;
- size_t max_chunk_size = get_xattr_block_size(size);
-
- do {
- size_t chunk_size = (size < max_chunk_size ? size : max_chunk_size);
- get_raw_xattr_name(name, i, raw_name, sizeof(raw_name));
- size -= chunk_size;
-
- int r = sys_fsetxattr(fd, raw_name, (char *)val + pos, chunk_size);
- if (r < 0) {
- ret = r;
- break;
- }
- pos += chunk_size;
- ret = pos;
- i++;
- } while (size);
-
- if (ret >= 0 && !onechunk) {
- int r;
- do {
- get_raw_xattr_name(name, i, raw_name, sizeof(raw_name));
- r = sys_fremovexattr(fd, raw_name);
- if (r < 0 && r != -ENODATA)
- ret = r;
- i++;
- } while (r != -ENODATA);
- }
-
- return ret;
-}
-
-
// removexattr
int chain_removexattr(const char *fn, const char *name)
diff --git a/src/os/filestore/chain_xattr.h b/src/os/filestore/chain_xattr.h
index 6ee8050..54a8568 100644
--- a/src/os/filestore/chain_xattr.h
+++ b/src/os/filestore/chain_xattr.h
@@ -5,6 +5,10 @@
#define __CEPH_OSD_CHAIN_XATTR_H
#include "common/xattr.h"
+#include "include/assert.h"
+#include "include/buffer.h"
+#include <string.h>
+#include <stdio.h>
#include <errno.h>
@@ -77,9 +81,100 @@ static inline int sys_fremovexattr(int fd, const char *name)
// wrappers to chain large values across multiple xattrs
int chain_getxattr(const char *fn, const char *name, void *val, size_t size);
+int chain_getxattr_buf(const char *fn, const char *name, bufferptr *bp);
int chain_fgetxattr(int fd, const char *name, void *val, size_t size);
-int chain_setxattr(const char *fn, const char *name, const void *val, size_t size, bool onechunk=false);
-int chain_fsetxattr(int fd, const char *name, const void *val, size_t size, bool onechunk=false);
+
+int get_xattr_block_size(size_t size);
+void get_raw_xattr_name(const char *name, int i, char *raw_name, int raw_len);
+
+template <bool skip_chain_cleanup=false, bool ensure_single_attr=false>
+int chain_setxattr(
+ const char *fn, const char *name, const void *val, size_t size)
+{
+ int i = 0, pos = 0;
+ char raw_name[CHAIN_XATTR_MAX_NAME_LEN * 2 + 16];
+ int ret = 0;
+ size_t max_chunk_size =
+ ensure_single_attr ? size : get_xattr_block_size(size);
+
+ static_assert(
+ !skip_chain_cleanup || ensure_single_attr,
+ "skip_chain_cleanup must imply ensure_single_attr");
+
+ do {
+ size_t chunk_size = (size < max_chunk_size ? size : max_chunk_size);
+ get_raw_xattr_name(name, i, raw_name, sizeof(raw_name));
+ size -= chunk_size;
+
+ int r = sys_setxattr(fn, raw_name, (char *)val + pos, chunk_size);
+ if (r < 0) {
+ ret = r;
+ break;
+ }
+ pos += chunk_size;
+ ret = pos;
+ i++;
+ assert(size == 0 || !ensure_single_attr);
+ } while (size);
+
+ if (ret >= 0 && !skip_chain_cleanup) {
+ int r;
+ do {
+ get_raw_xattr_name(name, i, raw_name, sizeof(raw_name));
+ r = sys_removexattr(fn, raw_name);
+ if (r < 0 && r != -ENODATA)
+ ret = r;
+ i++;
+ } while (r != -ENODATA);
+ }
+
+ return ret;
+}
+
+template <bool skip_chain_cleanup=false, bool ensure_single_attr=false>
+int chain_fsetxattr(
+ int fd, const char *name, const void *val, size_t size)
+{
+ int i = 0, pos = 0;
+ char raw_name[CHAIN_XATTR_MAX_NAME_LEN * 2 + 16];
+ int ret = 0;
+ size_t max_chunk_size =
+ ensure_single_attr ? size : get_xattr_block_size(size);
+
+ static_assert(
+ !skip_chain_cleanup || ensure_single_attr,
+ "skip_chain_cleanup must imply ensure_single_attr");
+
+ do {
+ size_t chunk_size = (size < max_chunk_size ? size : max_chunk_size);
+ get_raw_xattr_name(name, i, raw_name, sizeof(raw_name));
+ size -= chunk_size;
+
+ int r = sys_fsetxattr(fd, raw_name, (char *)val + pos, chunk_size);
+ if (r < 0) {
+ ret = r;
+ break;
+ }
+ pos += chunk_size;
+ ret = pos;
+ i++;
+ assert(size == 0 || !ensure_single_attr);
+ } while (size);
+
+ if (ret >= 0 && !skip_chain_cleanup) {
+ int r;
+ do {
+ get_raw_xattr_name(name, i, raw_name, sizeof(raw_name));
+ r = sys_fremovexattr(fd, raw_name);
+ if (r < 0 && r != -ENODATA)
+ ret = r;
+ i++;
+ } while (r != -ENODATA);
+ }
+
+ return ret;
+}
+
int chain_listxattr(const char *fn, char *names, size_t len);
int chain_flistxattr(int fd, char *names, size_t len);
int chain_removexattr(const char *fn, const char *name);
diff --git a/src/os/kstore/KStore.h b/src/os/kstore/KStore.h
index 4b00663..09483de 100644
--- a/src/os/kstore/KStore.h
+++ b/src/os/kstore/KStore.h
@@ -411,8 +411,9 @@ public:
int fsck();
- unsigned get_max_object_name_length() {
- return 4096;
+
+ int validate_hobject_key(const hobject_t &obj) const override {
+ return 0;
}
unsigned get_max_attr_name_length() {
return 256; // arbitrary; there is no real limit internally
diff --git a/src/os/memstore/MemStore.h b/src/os/memstore/MemStore.h
index 2d809f3..64f9afc 100644
--- a/src/os/memstore/MemStore.h
+++ b/src/os/memstore/MemStore.h
@@ -365,8 +365,8 @@ public:
int mount();
int umount();
- unsigned get_max_object_name_length() {
- return 4096;
+ int validate_hobject_key(const hobject_t &obj) const override {
+ return 0;
}
unsigned get_max_attr_name_length() {
return 256; // arbitrary; there is no real limit internally
diff --git a/src/osd/OSD.cc b/src/osd/OSD.cc
index 1ed664f..7c260ff 100644
--- a/src/osd/OSD.cc
+++ b/src/osd/OSD.cc
@@ -4615,11 +4615,7 @@ void OSD::ms_handle_connect(Connection *con)
// full map requests may happen while active or pre-boot
if (requested_full_first) {
- epoch_t first = requested_full_first;
- epoch_t last = requested_full_last;
- requested_full_first = 0;
- requested_full_last = 0;
- request_full_map(first, last);
+ rerequest_full_maps();
}
}
}
@@ -4918,17 +4914,6 @@ void OSD::request_full_map(epoch_t first, epoch_t last)
monc->send_mon_message(req);
}
-void OSD::finish_full_map_request()
-{
- if (requested_full_first == 0 && requested_full_last == 0)
- return;
- //Had requested some map but didn't receive in this message,
- //This might because monitor capping the message to osd_map_message_max
- dout(10) << __func__ << "still missing " << requested_full_first
- << ".." << requested_full_last << ", but now give up." << dendl;
- requested_full_first = requested_full_last = 0;
-}
-
void OSD::got_full_map(epoch_t e)
{
assert(requested_full_first <= requested_full_last);
@@ -6693,8 +6678,11 @@ void OSD::handle_osd_map(MOSDMap *m)
// even if this map isn't from a mon, we may have satisfied our subscription
monc->sub_got("osdmap", last);
- if (!m->maps.empty())
- finish_full_map_request();
+ if (!m->maps.empty() && requested_full_first) {
+ dout(10) << __func__ << " still missing full maps " << requested_full_first
+ << ".." << requested_full_last << dendl;
+ rerequest_full_maps();
+ }
if (last <= superblock.newest_map) {
dout(10) << " no new maps here, dropping" << dendl;
diff --git a/src/osd/OSD.h b/src/osd/OSD.h
index 9c4d68c..8a62ebb 100644
--- a/src/osd/OSD.h
+++ b/src/osd/OSD.h
@@ -2050,7 +2050,13 @@ protected:
epoch_t requested_full_first, requested_full_last;
void request_full_map(epoch_t first, epoch_t last);
- void finish_full_map_request();
+ void rerequest_full_maps() {
+ epoch_t first = requested_full_first;
+ epoch_t last = requested_full_last;
+ requested_full_first = 0;
+ requested_full_last = 0;
+ request_full_map(first, last);
+ }
void got_full_map(epoch_t e);
// -- failures --
diff --git a/src/osd/PG.cc b/src/osd/PG.cc
index 2804c73..58d2cc5 100644
--- a/src/osd/PG.cc
+++ b/src/osd/PG.cc
@@ -2298,6 +2298,7 @@ void PG::split_into(pg_t child_pgid, PG *child, unsigned split_bits)
// Info
child->info.history = info.history;
+ child->info.history.epoch_created = get_osdmap()->get_epoch();
child->info.purged_snaps = info.purged_snaps;
if (info.last_backfill.is_max()) {
@@ -2313,6 +2314,7 @@ void PG::split_into(pg_t child_pgid, PG *child, unsigned split_bits)
}
child->info.stats = info.stats;
+ child->info.stats.parent_split_bits = split_bits;
info.stats.stats_invalid = true;
child->info.stats.stats_invalid = true;
child->info.last_epoch_started = info.last_epoch_started;
@@ -3357,8 +3359,16 @@ bool PG::sched_scrub()
//NOSCRUB so skip regular scrubs
if ((osd->osd->get_osdmap()->test_flag(CEPH_OSDMAP_NOSCRUB) ||
- pool.info.has_flag(pg_pool_t::FLAG_NOSCRUB)) && !time_for_deep)
+ pool.info.has_flag(pg_pool_t::FLAG_NOSCRUB)) && !time_for_deep) {
+ if (scrubber.reserved) {
+ // cancel scrub if it is still in scheduling,
+ // so pgs from other pools where scrub are still legal
+ // have a chance to go ahead with scrubbing.
+ clear_scrub_reserved();
+ scrub_unreserve_replicas();
+ }
return false;
+ }
}
if (cct->_conf->osd_scrub_auto_repair
@@ -4108,6 +4118,7 @@ void PG::chunky_scrub(ThreadPool::TPHandle &handle)
bool boundary_found = false;
hobject_t start = scrubber.start;
+ unsigned loop = 0;
while (!boundary_found) {
vector<hobject_t> objects;
ret = get_pgbackend()->objects_list_partial(
@@ -4137,6 +4148,12 @@ void PG::chunky_scrub(ThreadPool::TPHandle &handle)
boundary_found = true;
}
}
+
+ // reset handle once in a while, the search maybe takes long.
+ if (++loop >= g_conf->osd_loop_before_reset_tphandle) {
+ handle.reset_tp_timeout();
+ loop = 0;
+ }
}
if (!_range_available_for_scrub(scrubber.start, candidate_end)) {
diff --git a/src/osd/ReplicatedPG.cc b/src/osd/ReplicatedPG.cc
index 3958f89..052d6c7 100644
--- a/src/osd/ReplicatedPG.cc
+++ b/src/osd/ReplicatedPG.cc
@@ -1627,16 +1627,41 @@ void ReplicatedPG::do_op(OpRequestRef& op)
return;
}
+ hobject_t head(m->get_oid(), m->get_object_locator().key,
+ CEPH_NOSNAP, m->get_pg().ps(),
+ info.pgid.pool(), m->get_object_locator().nspace);
+
// object name too long?
- unsigned max_name_len = MIN(g_conf->osd_max_object_name_len,
- osd->osd->store->get_max_object_name_length());
- if (m->get_oid().name.size() > max_name_len) {
- dout(4) << "do_op '" << m->get_oid().name << "' is longer than "
- << max_name_len << " bytes" << dendl;
+ if (m->get_oid().name.size() > g_conf->osd_max_object_name_len) {
+ dout(4) << "do_op name is longer than "
+ << g_conf->osd_max_object_name_len
+ << " bytes" << dendl;
+ osd->reply_op_error(op, -ENAMETOOLONG);
+ return;
+ }
+ if (m->get_object_locator().key.size() > g_conf->osd_max_object_name_len) {
+ dout(4) << "do_op locator is longer than "
+ << g_conf->osd_max_object_name_len
+ << " bytes" << dendl;
+ osd->reply_op_error(op, -ENAMETOOLONG);
+ return;
+ }
+ if (m->get_object_locator().nspace.size() >
+ g_conf->osd_max_object_namespace_len) {
+ dout(4) << "do_op namespace is longer than "
+ << g_conf->osd_max_object_namespace_len
+ << " bytes" << dendl;
osd->reply_op_error(op, -ENAMETOOLONG);
return;
}
+ if (int r = osd->store->validate_hobject_key(head)) {
+ dout(4) << "do_op object " << head << " invalid for backing store: "
+ << r << dendl;
+ osd->reply_op_error(op, r);
+ return;
+ }
+
// blacklisted?
if (get_osdmap()->is_blacklisted(m->get_source_addr())) {
dout(10) << "do_op " << m->get_source_addr() << " is blacklisted" << dendl;
@@ -1702,11 +1727,6 @@ void ReplicatedPG::do_op(OpRequestRef& op)
<< " flags " << ceph_osd_flag_string(m->get_flags())
<< dendl;
- hobject_t head(m->get_oid(), m->get_object_locator().key,
- CEPH_NOSNAP, m->get_pg().ps(),
- info.pgid.pool(), m->get_object_locator().nspace);
-
-
if (write_ordered &&
scrubber.write_blocked_by_scrub(head, get_sort_bitwise())) {
dout(20) << __func__ << ": waiting for scrub" << dendl;
@@ -6199,6 +6219,15 @@ int ReplicatedPG::_rollback_to(OpContext *ctx, ceph_osd_op& op)
obs.oi.set_omap_digest(rollback_to->obs.oi.omap_digest);
else
obs.oi.clear_omap_digest();
+
+ if (rollback_to->obs.oi.is_omap()) {
+ dout(10) << __func__ << " setting omap flag on " << obs.oi.soid << dendl;
+ obs.oi.set_flag(object_info_t::FLAG_OMAP);
+ } else {
+ dout(10) << __func__ << " clearing omap flag on " << obs.oi.soid << dendl;
+ obs.oi.clear_flag(object_info_t::FLAG_OMAP);
+ }
+
snapset.head_exists = true;
}
}
@@ -6919,10 +6948,6 @@ int ReplicatedPG::fill_in_copy_get(
return result;
}
- if ((osd_op.op.copy_get.flags & CEPH_OSD_COPY_GET_FLAG_NOTSUPP_OMAP) &&
- oi.is_omap())
- return -EOPNOTSUPP;
-
MOSDOp *op = reinterpret_cast<MOSDOp*>(ctx->op->get_req());
uint64_t features = op->get_features();
@@ -7162,12 +7187,7 @@ void ReplicatedPG::_copy_some(ObjectContextRef obc, CopyOpRef cop)
// it already!
assert(cop->cursor.is_initial());
}
-
- uint32_t copyget_flags = 0;
- if (!pool.info.supports_omap())
- copyget_flags |= CEPH_OSD_COPY_GET_FLAG_NOTSUPP_OMAP;
-
- op.copy_get(&cop->cursor, get_copy_chunk_size(), copyget_flags,
+ op.copy_get(&cop->cursor, get_copy_chunk_size(),
&cop->results.object_size, &cop->results.mtime,
&cop->attrs, &cop->data, &cop->omap_header, &cop->omap_data,
&cop->results.snaps, &cop->results.snap_seq,
@@ -7330,6 +7350,16 @@ void ReplicatedPG::process_copy_chunk(hobject_t oid, ceph_tid_t tid, int r)
copy_ops.erase(cobc->obs.oi.soid);
cobc->stop_block();
+ if (r < 0 && cop->results.started_temp_obj) {
+ dout(10) << __func__ << " deleting partial temp object "
+ << cop->results.temp_oid << dendl;
+ ObjectContextRef tempobc = get_object_context(cop->results.temp_oid, true);
+ OpContextUPtr ctx = simple_opc_create(tempobc);
+ ctx->op_t->remove(cop->results.temp_oid);
+ ctx->discard_temp_oid = cop->results.temp_oid;
+ simple_opc_submit(std::move(ctx));
+ }
+
// cancel and requeue proxy ops on this object
if (!r) {
for (map<ceph_tid_t, ProxyReadOpRef>::iterator it = proxyread_ops.begin();
diff --git a/src/osdc/Objecter.cc b/src/osdc/Objecter.cc
index b208254..92a8698 100644
--- a/src/osdc/Objecter.cc
+++ b/src/osdc/Objecter.cc
@@ -542,10 +542,10 @@ void Objecter::_send_linger(LingerOp *info,
}
sl.unlock();
- info->register_tid = _op_submit(o, sul);
+ _op_submit(o, sul, &info->register_tid);
} else {
// first send
- info->register_tid = _op_submit_with_budget(o, sul);
+ _op_submit_with_budget(o, sul, &info->register_tid);
}
logger->inc(l_osdc_linger_send);
@@ -2125,14 +2125,18 @@ void Objecter::resend_mon_ops()
// read | write ---------------------------
-ceph_tid_t Objecter::op_submit(Op *op, int *ctx_budget)
+void Objecter::op_submit(Op *op, ceph_tid_t *ptid, int *ctx_budget)
{
shunique_lock rl(rwlock, ceph::acquire_shared);
- return _op_submit_with_budget(op, rl, ctx_budget);
+ ceph_tid_t tid = 0;
+ if (!ptid)
+ ptid = &tid;
+ _op_submit_with_budget(op, rl, ptid, ctx_budget);
}
-ceph_tid_t Objecter::_op_submit_with_budget(Op *op, shunique_lock& sul,
- int *ctx_budget)
+void Objecter::_op_submit_with_budget(Op *op, shunique_lock& sul,
+ ceph_tid_t *ptid,
+ int *ctx_budget)
{
assert(initialized.read());
@@ -2160,7 +2164,7 @@ ceph_tid_t Objecter::_op_submit_with_budget(Op *op, shunique_lock& sul,
op_cancel(tid, -ETIMEDOUT); });
}
- return _op_submit(op, sul);
+ _op_submit(op, sul, ptid);
}
void Objecter::_send_op_account(Op *op)
@@ -2242,7 +2246,7 @@ void Objecter::_send_op_account(Op *op)
}
}
-ceph_tid_t Objecter::_op_submit(Op *op, shunique_lock& sul)
+void Objecter::_op_submit(Op *op, shunique_lock& sul, ceph_tid_t *ptid)
{
// rwlock is locked
@@ -2335,6 +2339,8 @@ ceph_tid_t Objecter::_op_submit(Op *op, shunique_lock& sul)
if (check_for_latest_map) {
_send_op_map_check(op);
}
+ if (ptid)
+ *ptid = tid;
op = NULL;
sl.unlock();
@@ -2342,8 +2348,6 @@ ceph_tid_t Objecter::_op_submit(Op *op, shunique_lock& sul)
ldout(cct, 5) << num_unacked.read() << " unacked, " << num_uncommitted.read()
<< " uncommitted" << dendl;
-
- return tid;
}
int Objecter::op_cancel(OSDSession *s, ceph_tid_t tid, int r)
@@ -3219,7 +3223,7 @@ void Objecter::handle_osd_op_reply(MOSDOpReply *m)
m->get_redirect().combine_with_locator(op->target.target_oloc,
op->target.target_oid.name);
op->target.flags |= CEPH_OSD_FLAG_REDIRECTED;
- _op_submit(op, sul);
+ _op_submit(op, sul, NULL);
m->put();
return;
}
diff --git a/src/osdc/Objecter.h b/src/osdc/Objecter.h
index 05b29bb..6b519ff 100644
--- a/src/osdc/Objecter.h
+++ b/src/osdc/Objecter.h
@@ -747,7 +747,6 @@ struct ObjectOperation {
void copy_get(object_copy_cursor_t *cursor,
uint64_t max,
- uint32_t copyget_flags,
uint64_t *out_size,
ceph::real_time *out_mtime,
std::map<std::string,bufferlist> *out_attrs,
@@ -765,7 +764,6 @@ struct ObjectOperation {
int *prval) {
OSDOp& osd_op = add_op(CEPH_OSD_OP_COPY_GET);
osd_op.op.copy_get.max = max;
- osd_op.op.copy_get.flags = copyget_flags;
::encode(*cursor, osd_op.indata);
::encode(max, osd_op.indata);
unsigned p = ops.size() - 1;
@@ -2074,14 +2072,15 @@ private:
private:
// low-level
- ceph_tid_t _op_submit(Op *op, shunique_lock& lc);
- ceph_tid_t _op_submit_with_budget(Op *op, shunique_lock& lc,
- int *ctx_budget = NULL);
+ void _op_submit(Op *op, shunique_lock& lc, ceph_tid_t *ptid);
+ void _op_submit_with_budget(Op *op, shunique_lock& lc,
+ ceph_tid_t *ptid,
+ int *ctx_budget = NULL);
inline void unregister_op(Op *op);
// public interface
public:
- ceph_tid_t op_submit(Op *op, int *ctx_budget = NULL);
+ void op_submit(Op *op, ceph_tid_t *ptid = NULL, int *ctx_budget = NULL);
bool is_active() {
shared_lock l(rwlock);
return !((!inflight_ops.read()) && linger_ops.empty() &&
@@ -2173,11 +2172,12 @@ public:
}
// mid-level helpers
- Op *prepare_mutate_op(const object_t& oid, const object_locator_t& oloc,
- ObjectOperation& op, const SnapContext& snapc,
- ceph::real_time mtime, int flags, Context *onack,
- Context *oncommit, version_t *objver = NULL,
- osd_reqid_t reqid = osd_reqid_t()) {
+ Op *prepare_mutate_op(
+ const object_t& oid, const object_locator_t& oloc,
+ ObjectOperation& op, const SnapContext& snapc,
+ ceph::real_time mtime, int flags, Context *onack,
+ Context *oncommit, version_t *objver = NULL,
+ osd_reqid_t reqid = osd_reqid_t()) {
Op *o = new Op(oid, oloc, op.ops, flags | global_op_flags.read() |
CEPH_OSD_FLAG_WRITE, onack, oncommit, objver);
o->priority = op.priority;
@@ -2187,20 +2187,25 @@ public:
o->reqid = reqid;
return o;
}
- ceph_tid_t mutate(const object_t& oid, const object_locator_t& oloc,
- ObjectOperation& op, const SnapContext& snapc,
- ceph::real_time mtime, int flags, Context *onack,
- Context *oncommit, version_t *objver = NULL,
- osd_reqid_t reqid = osd_reqid_t()) {
+ ceph_tid_t mutate(
+ const object_t& oid, const object_locator_t& oloc,
+ ObjectOperation& op, const SnapContext& snapc,
+ ceph::real_time mtime, int flags, Context *onack,
+ Context *oncommit, version_t *objver = NULL,
+ osd_reqid_t reqid = osd_reqid_t()) {
Op *o = prepare_mutate_op(oid, oloc, op, snapc, mtime, flags, onack,
oncommit, objver, reqid);
- return op_submit(o);
- }
- Op *prepare_read_op(const object_t& oid, const object_locator_t& oloc,
- ObjectOperation& op,
- snapid_t snapid, bufferlist *pbl, int flags,
- Context *onack, version_t *objver = NULL,
- int *data_offset = NULL) {
+ ceph_tid_t tid;
+ op_submit(o, &tid);
+ return tid;
+ }
+ Op *prepare_read_op(
+ const object_t& oid, const object_locator_t& oloc,
+ ObjectOperation& op,
+ snapid_t snapid, bufferlist *pbl, int flags,
+ Context *onack, version_t *objver = NULL,
+ int *data_offset = NULL,
+ uint64_t features = 0) {
Op *o = new Op(oid, oloc, op.ops, flags | global_op_flags.read() |
CEPH_OSD_FLAG_READ, onack, NULL, objver, data_offset);
o->priority = op.priority;
@@ -2213,22 +2218,26 @@ public:
o->out_rval.swap(op.out_rval);
return o;
}
- ceph_tid_t read(const object_t& oid, const object_locator_t& oloc,
- ObjectOperation& op,
- snapid_t snapid, bufferlist *pbl, int flags,
- Context *onack, version_t *objver = NULL,
- int *data_offset = NULL,
- uint64_t features = 0) {
+ ceph_tid_t read(
+ const object_t& oid, const object_locator_t& oloc,
+ ObjectOperation& op,
+ snapid_t snapid, bufferlist *pbl, int flags,
+ Context *onack, version_t *objver = NULL,
+ int *data_offset = NULL,
+ uint64_t features = 0) {
Op *o = prepare_read_op(oid, oloc, op, snapid, pbl, flags, onack, objver,
data_offset);
if (features)
o->features = features;
- return op_submit(o);
- }
- ceph_tid_t pg_read(uint32_t hash, object_locator_t oloc,
- ObjectOperation& op, bufferlist *pbl, int flags,
- Context *onack, epoch_t *reply_epoch,
- int *ctx_budget) {
+ ceph_tid_t tid;
+ op_submit(o, &tid);
+ return tid;
+ }
+ Op *prepare_pg_read_op(
+ uint32_t hash, object_locator_t oloc,
+ ObjectOperation& op, bufferlist *pbl, int flags,
+ Context *onack, epoch_t *reply_epoch,
+ int *ctx_budget) {
Op *o = new Op(object_t(), oloc,
op.ops, flags | global_op_flags.read() | CEPH_OSD_FLAG_READ,
onack, NULL, NULL);
@@ -2245,7 +2254,18 @@ public:
// budget is tracked by listing context
o->ctx_budgeted = true;
}
- return op_submit(o, ctx_budget);
+ return o;
+ }
+ ceph_tid_t pg_read(
+ uint32_t hash, object_locator_t oloc,
+ ObjectOperation& op, bufferlist *pbl, int flags,
+ Context *onack, epoch_t *reply_epoch,
+ int *ctx_budget) {
+ Op *o = prepare_pg_read_op(hash, oloc, op, pbl, flags,
+ onack, reply_epoch, ctx_budget);
+ ceph_tid_t tid;
+ op_submit(o, &tid, ctx_budget);
+ return tid;
}
// caller owns a ref
@@ -2297,10 +2317,11 @@ public:
// high-level helpers
- ceph_tid_t stat(const object_t& oid, const object_locator_t& oloc,
- snapid_t snap, uint64_t *psize, ceph::real_time *pmtime,
- int flags, Context *onfinish, version_t *objver = NULL,
- ObjectOperation *extra_ops = NULL) {
+ Op *prepare_stat_op(
+ const object_t& oid, const object_locator_t& oloc,
+ snapid_t snap, uint64_t *psize, ceph::real_time *pmtime,
+ int flags, Context *onfinish, version_t *objver = NULL,
+ ObjectOperation *extra_ops = NULL) {
vector<OSDOp> ops;
int i = init_ops(ops, 1, extra_ops);
ops[i].op.op = CEPH_OSD_OP_STAT;
@@ -2309,13 +2330,25 @@ public:
CEPH_OSD_FLAG_READ, fin, 0, objver);
o->snapid = snap;
o->outbl = &fin->bl;
- return op_submit(o);
+ return o;
+ }
+ ceph_tid_t stat(
+ const object_t& oid, const object_locator_t& oloc,
+ snapid_t snap, uint64_t *psize, ceph::real_time *pmtime,
+ int flags, Context *onfinish, version_t *objver = NULL,
+ ObjectOperation *extra_ops = NULL) {
+ Op *o = prepare_stat_op(oid, oloc, snap, psize, pmtime, flags,
+ onfinish, objver, extra_ops);
+ ceph_tid_t tid;
+ op_submit(o, &tid);
+ return tid;
}
- ceph_tid_t read(const object_t& oid, const object_locator_t& oloc,
- uint64_t off, uint64_t len, snapid_t snap, bufferlist *pbl,
- int flags, Context *onfinish, version_t *objver = NULL,
- ObjectOperation *extra_ops = NULL, int op_flags = 0) {
+ Op *prepare_read_op(
+ const object_t& oid, const object_locator_t& oloc,
+ uint64_t off, uint64_t len, snapid_t snap, bufferlist *pbl,
+ int flags, Context *onfinish, version_t *objver = NULL,
+ ObjectOperation *extra_ops = NULL, int op_flags = 0) {
vector<OSDOp> ops;
int i = init_ops(ops, 1, extra_ops);
ops[i].op.op = CEPH_OSD_OP_READ;
@@ -2328,7 +2361,18 @@ public:
CEPH_OSD_FLAG_READ, onfinish, 0, objver);
o->snapid = snap;
o->outbl = pbl;
- return op_submit(o);
+ return o;
+ }
+ ceph_tid_t read(
+ const object_t& oid, const object_locator_t& oloc,
+ uint64_t off, uint64_t len, snapid_t snap, bufferlist *pbl,
+ int flags, Context *onfinish, version_t *objver = NULL,
+ ObjectOperation *extra_ops = NULL, int op_flags = 0) {
+ Op *o = prepare_read_op(oid, oloc, off, len, snap, pbl, flags,
+ onfinish, objver, extra_ops, op_flags);
+ ceph_tid_t tid;
+ op_submit(o, &tid);
+ return tid;
}
ceph_tid_t read_trunc(const object_t& oid, const object_locator_t& oloc,
@@ -2349,7 +2393,9 @@ public:
CEPH_OSD_FLAG_READ, onfinish, 0, objver);
o->snapid = snap;
o->outbl = pbl;
- return op_submit(o);
+ ceph_tid_t tid;
+ op_submit(o, &tid);
+ return tid;
}
ceph_tid_t mapext(const object_t& oid, const object_locator_t& oloc,
uint64_t off, uint64_t len, snapid_t snap, bufferlist *pbl,
@@ -2366,7 +2412,9 @@ public:
CEPH_OSD_FLAG_READ, onfinish, 0, objver);
o->snapid = snap;
o->outbl = pbl;
- return op_submit(o);
+ ceph_tid_t tid;
+ op_submit(o, &tid);
+ return tid;
}
ceph_tid_t getxattr(const object_t& oid, const object_locator_t& oloc,
const char *name, snapid_t snap, bufferlist *pbl, int flags,
@@ -2383,7 +2431,9 @@ public:
CEPH_OSD_FLAG_READ, onfinish, 0, objver);
o->snapid = snap;
o->outbl = pbl;
- return op_submit(o);
+ ceph_tid_t tid;
+ op_submit(o, &tid);
+ return tid;
}
ceph_tid_t getxattrs(const object_t& oid, const object_locator_t& oloc,
@@ -2398,7 +2448,9 @@ public:
CEPH_OSD_FLAG_READ, fin, 0, objver);
o->snapid = snap;
o->outbl = &fin->bl;
- return op_submit(o);
+ ceph_tid_t tid;
+ op_submit(o, &tid);
+ return tid;
}
ceph_tid_t read_full(const object_t& oid, const object_locator_t& oloc,
@@ -2420,13 +2472,16 @@ public:
CEPH_OSD_FLAG_WRITE, onack, oncommit, objver);
o->mtime = mtime;
o->snapc = snapc;
- return op_submit(o);
- }
- ceph_tid_t write(const object_t& oid, const object_locator_t& oloc,
- uint64_t off, uint64_t len, const SnapContext& snapc,
- const bufferlist &bl, ceph::real_time mtime, int flags,
- Context *onack, Context *oncommit, version_t *objver = NULL,
- ObjectOperation *extra_ops = NULL, int op_flags = 0) {
+ ceph_tid_t tid;
+ op_submit(o, &tid);
+ return tid;
+ }
+ Op *prepare_write_op(
+ const object_t& oid, const object_locator_t& oloc,
+ uint64_t off, uint64_t len, const SnapContext& snapc,
+ const bufferlist &bl, ceph::real_time mtime, int flags,
+ Context *onack, Context *oncommit, version_t *objver = NULL,
+ ObjectOperation *extra_ops = NULL, int op_flags = 0) {
vector<OSDOp> ops;
int i = init_ops(ops, 1, extra_ops);
ops[i].op.op = CEPH_OSD_OP_WRITE;
@@ -2440,14 +2495,27 @@ public:
CEPH_OSD_FLAG_WRITE, onack, oncommit, objver);
o->mtime = mtime;
o->snapc = snapc;
- return op_submit(o);
+ return o;
}
- ceph_tid_t append(const object_t& oid, const object_locator_t& oloc,
- uint64_t len, const SnapContext& snapc,
- const bufferlist &bl, ceph::real_time mtime, int flags,
- Context *onack, Context *oncommit,
- version_t *objver = NULL,
- ObjectOperation *extra_ops = NULL) {
+ ceph_tid_t write(
+ const object_t& oid, const object_locator_t& oloc,
+ uint64_t off, uint64_t len, const SnapContext& snapc,
+ const bufferlist &bl, ceph::real_time mtime, int flags,
+ Context *onack, Context *oncommit, version_t *objver = NULL,
+ ObjectOperation *extra_ops = NULL, int op_flags = 0) {
+ Op *o = prepare_write_op(oid, oloc, off, len, snapc, bl, mtime, flags,
+ onack, oncommit, objver, extra_ops, op_flags);
+ ceph_tid_t tid;
+ op_submit(o, &tid);
+ return tid;
+ }
+ Op *prepare_append_op(
+ const object_t& oid, const object_locator_t& oloc,
+ uint64_t len, const SnapContext& snapc,
+ const bufferlist &bl, ceph::real_time mtime, int flags,
+ Context *onack, Context *oncommit,
+ version_t *objver = NULL,
+ ObjectOperation *extra_ops = NULL) {
vector<OSDOp> ops;
int i = init_ops(ops, 1, extra_ops);
ops[i].op.op = CEPH_OSD_OP_APPEND;
@@ -2460,7 +2528,20 @@ public:
CEPH_OSD_FLAG_WRITE, onack, oncommit, objver);
o->mtime = mtime;
o->snapc = snapc;
- return op_submit(o);
+ return o;
+ }
+ ceph_tid_t append(
+ const object_t& oid, const object_locator_t& oloc,
+ uint64_t len, const SnapContext& snapc,
+ const bufferlist &bl, ceph::real_time mtime, int flags,
+ Context *onack, Context *oncommit,
+ version_t *objver = NULL,
+ ObjectOperation *extra_ops = NULL) {
+ Op *o = prepare_append_op(oid, oloc, len, snapc, bl, mtime, flags,
+ onack, oncommit, objver, extra_ops);
+ ceph_tid_t tid;
+ op_submit(o, &tid);
+ return tid;
}
ceph_tid_t write_trunc(const object_t& oid, const object_locator_t& oloc,
uint64_t off, uint64_t len, const SnapContext& snapc,
@@ -2482,13 +2563,16 @@ public:
CEPH_OSD_FLAG_WRITE, onack, oncommit, objver);
o->mtime = mtime;
o->snapc = snapc;
- return op_submit(o);
- }
- ceph_tid_t write_full(const object_t& oid, const object_locator_t& oloc,
- const SnapContext& snapc, const bufferlist &bl,
- ceph::real_time mtime, int flags, Context *onack,
- Context *oncommit, version_t *objver = NULL,
- ObjectOperation *extra_ops = NULL, int op_flags = 0) {
+ ceph_tid_t tid;
+ op_submit(o, &tid);
+ return tid;
+ }
+ Op *prepare_write_full_op(
+ const object_t& oid, const object_locator_t& oloc,
+ const SnapContext& snapc, const bufferlist &bl,
+ ceph::real_time mtime, int flags, Context *onack,
+ Context *oncommit, version_t *objver = NULL,
+ ObjectOperation *extra_ops = NULL, int op_flags = 0) {
vector<OSDOp> ops;
int i = init_ops(ops, 1, extra_ops);
ops[i].op.op = CEPH_OSD_OP_WRITEFULL;
@@ -2500,7 +2584,19 @@ public:
CEPH_OSD_FLAG_WRITE, onack, oncommit, objver);
o->mtime = mtime;
o->snapc = snapc;
- return op_submit(o);
+ return o;
+ }
+ ceph_tid_t write_full(
+ const object_t& oid, const object_locator_t& oloc,
+ const SnapContext& snapc, const bufferlist &bl,
+ ceph::real_time mtime, int flags, Context *onack,
+ Context *oncommit, version_t *objver = NULL,
+ ObjectOperation *extra_ops = NULL, int op_flags = 0) {
+ Op *o = prepare_write_full_op(oid, oloc, snapc, bl, mtime, flags,
+ onack, oncommit, objver, extra_ops, op_flags);
+ ceph_tid_t tid;
+ op_submit(o, &tid);
+ return tid;
}
ceph_tid_t trunc(const object_t& oid, const object_locator_t& oloc,
const SnapContext& snapc, ceph::real_time mtime, int flags,
@@ -2517,7 +2613,9 @@ public:
CEPH_OSD_FLAG_WRITE, onack, oncommit, objver);
o->mtime = mtime;
o->snapc = snapc;
- return op_submit(o);
+ ceph_tid_t tid;
+ op_submit(o, &tid);
+ return tid;
}
ceph_tid_t zero(const object_t& oid, const object_locator_t& oloc,
uint64_t off, uint64_t len, const SnapContext& snapc,
@@ -2532,7 +2630,9 @@ public:
CEPH_OSD_FLAG_WRITE, onack, oncommit, objver);
o->mtime = mtime;
o->snapc = snapc;
- return op_submit(o);
+ ceph_tid_t tid;
+ op_submit(o, &tid);
+ return tid;
}
ceph_tid_t rollback_object(const object_t& oid, const object_locator_t& oloc,
const SnapContext& snapc, snapid_t snapid,
@@ -2547,7 +2647,9 @@ public:
objver);
o->mtime = mtime;
o->snapc = snapc;
- return op_submit(o);
+ ceph_tid_t tid;
+ op_submit(o, &tid);
+ return tid;
}
ceph_tid_t create(const object_t& oid, const object_locator_t& oloc,
const SnapContext& snapc, ceph::real_time mtime, int global_flags,
@@ -2562,12 +2664,15 @@ public:
CEPH_OSD_FLAG_WRITE, onack, oncommit, objver);
o->mtime = mtime;
o->snapc = snapc;
- return op_submit(o);
- }
- ceph_tid_t remove(const object_t& oid, const object_locator_t& oloc,
- const SnapContext& snapc, ceph::real_time mtime, int flags,
- Context *onack, Context *oncommit,
- version_t *objver = NULL, ObjectOperation *extra_ops = NULL) {
+ ceph_tid_t tid;
+ op_submit(o, &tid);
+ return tid;
+ }
+ Op *prepare_remove_op(
+ const object_t& oid, const object_locator_t& oloc,
+ const SnapContext& snapc, ceph::real_time mtime, int flags,
+ Context *onack, Context *oncommit,
+ version_t *objver = NULL, ObjectOperation *extra_ops = NULL) {
vector<OSDOp> ops;
int i = init_ops(ops, 1, extra_ops);
ops[i].op.op = CEPH_OSD_OP_DELETE;
@@ -2575,7 +2680,18 @@ public:
CEPH_OSD_FLAG_WRITE, onack, oncommit, objver);
o->mtime = mtime;
o->snapc = snapc;
- return op_submit(o);
+ return o;
+ }
+ ceph_tid_t remove(
+ const object_t& oid, const object_locator_t& oloc,
+ const SnapContext& snapc, ceph::real_time mtime, int flags,
+ Context *onack, Context *oncommit,
+ version_t *objver = NULL, ObjectOperation *extra_ops = NULL) {
+ Op *o = prepare_remove_op(oid, oloc, snapc, mtime, flags,
+ onack, oncommit, objver, extra_ops);
+ ceph_tid_t tid;
+ op_submit(o, &tid);
+ return tid;
}
ceph_tid_t setxattr(const object_t& oid, const object_locator_t& oloc,
@@ -2595,7 +2711,9 @@ public:
CEPH_OSD_FLAG_WRITE, onack, oncommit, objver);
o->mtime = mtime;
o->snapc = snapc;
- return op_submit(o);
+ ceph_tid_t tid;
+ op_submit(o, &tid);
+ return tid;
}
ceph_tid_t removexattr(const object_t& oid, const object_locator_t& oloc,
const char *name, const SnapContext& snapc,
@@ -2613,7 +2731,9 @@ public:
CEPH_OSD_FLAG_WRITE, onack, oncommit, objver);
o->mtime = mtime;
o->snapc = snapc;
- return op_submit(o);
+ ceph_tid_t tid;
+ op_submit(o, &tid);
+ return tid;
}
void list_nobjects(NListContext *p, Context *onfinish);
diff --git a/src/pybind/ceph_rest_api.py b/src/pybind/ceph_rest_api.py
index 7792013..2dfe6b6 100755
--- a/src/pybind/ceph_rest_api.py
+++ b/src/pybind/ceph_rest_api.py
@@ -224,15 +224,6 @@ def generate_url_and_params(app, sig, flavor):
# prefixes go in the URL path
if desc.t == CephPrefix:
url += '/' + desc.instance.prefix
- # CephChoices with 1 required string (not --) do too, unless
- # we've already started collecting params, in which case they
- # too are params
- elif (desc.t == CephChoices and
- len(desc.instance.strings) == 1 and
- desc.req and
- not str(desc.instance).startswith('--') and
- not params):
- url += '/' + str(desc.instance)
else:
# tell/<target> is a weird case; the URL includes what
# would everywhere else be a parameter
diff --git a/src/rgw/librgw.cc b/src/rgw/librgw.cc
index 220d024..37414fc 100644
--- a/src/rgw/librgw.cc
+++ b/src/rgw/librgw.cc
@@ -545,16 +545,29 @@ namespace rgw {
}
int RGWLibRequest::read_permissions(RGWOp* op) {
+ /* bucket and object ops */
int ret =
rgw_build_bucket_policies(rgwlib.get_store(), get_state());
if (ret < 0) {
- ldout(get_state()->cct, 10) << "read_permissions on "
+ ldout(get_state()->cct, 10) << "read_permissions (bucket policy) on "
<< get_state()->bucket << ":"
<< get_state()->object
<< " only_bucket=" << only_bucket()
<< " ret=" << ret << dendl;
if (ret == -ENODATA)
ret = -EACCES;
+ } else if (! only_bucket()) {
+ /* object ops */
+ ret = rgw_build_object_policies(rgwlib.get_store(), get_state(),
+ op->prefetch_data());
+ if (ret < 0) {
+ ldout(get_state()->cct, 10) << "read_permissions (object policy) on"
+ << get_state()->bucket << ":"
+ << get_state()->object
+ << " ret=" << ret << dendl;
+ if (ret == -ENODATA)
+ ret = -EACCES;
+ }
}
return ret;
} /* RGWLibRequest::read_permissions */
diff --git a/src/rgw/rgw_admin.cc b/src/rgw/rgw_admin.cc
index 1f3cb61..f95afc6 100644
--- a/src/rgw/rgw_admin.cc
+++ b/src/rgw/rgw_admin.cc
@@ -2293,7 +2293,6 @@ int main(int argc, char **argv)
} else if (ceph_argparse_witharg(args, i, &val, "--zone-new-name", (char*)NULL)) {
zone_new_name = val;
} else if (ceph_argparse_witharg(args, i, &val, "--endpoints", (char*)NULL)) {
- list<string>::iterator iter;
get_str_list(val, endpoints);
} else if (ceph_argparse_witharg(args, i, &val, "--source-zone", (char*)NULL)) {
source_zone_name = val;
@@ -2308,7 +2307,7 @@ int main(int argc, char **argv)
tenant = user_id.tenant;
} else {
if (user_id.empty()) {
- cerr << "ERROR: --tennant is set, but there's no user ID" << std::endl;
+ cerr << "ERROR: --tenant is set, but there's no user ID" << std::endl;
return EINVAL;
}
user_id.tenant = tenant;
@@ -2453,7 +2452,7 @@ int main(int argc, char **argv)
case OPT_PERIOD_DELETE:
{
if (period_id.empty()) {
- cerr << "missing realm name or id" << std::endl;
+ cerr << "missing period id" << std::endl;
return -EINVAL;
}
RGWPeriod period(period_id);
@@ -3315,13 +3314,18 @@ int main(int argc, char **argv)
if(zone.realm_id.empty()) {
RGWRealm realm(realm_id, realm_name);
int ret = realm.init(g_ceph_context, store);
- if (ret < 0) {
+ if (ret < 0 && ret != -ENOENT) {
cerr << "failed to init realm: " << cpp_strerror(-ret) << std::endl;
return -ret;
}
zone.realm_id = realm.get_id();
}
+ if( !zone_name.empty() && !zone.get_name().empty() && zone.get_name() != zone_name) {
+ cerr << "Error: zone name" << zone_name << " is different than the zone name " << zone.get_name() << " in the provided json " << std::endl;
+ return -EINVAL;
+ }
+
if (zone.get_name().empty()) {
zone.set_name(zone_name);
if (zone.get_name().empty()) {
@@ -3330,11 +3334,6 @@ int main(int argc, char **argv)
}
}
- if(zone.get_name() != zone_name) {
- cerr << "Error: zone name" << zone_name << " is different than the zone name " << zone.get_name() << " in the provided json " << std::endl;
- return -EINVAL;
- }
-
zone_name = zone.get_name();
if (zone.get_id().empty()) {
diff --git a/src/rgw/rgw_common.h b/src/rgw/rgw_common.h
index 90d2ae5..037703e 100644
--- a/src/rgw/rgw_common.h
+++ b/src/rgw/rgw_common.h
@@ -1081,6 +1081,7 @@ struct req_info {
const char *method;
string script_uri;
string request_uri;
+ string request_uri_aws4;
string effective_uri;
string request_params;
string domain;
diff --git a/src/rgw/rgw_ldap.h b/src/rgw/rgw_ldap.h
index 46b05ff..02eb61e 100644
--- a/src/rgw/rgw_ldap.h
+++ b/src/rgw/rgw_ldap.h
@@ -4,8 +4,10 @@
#ifndef RGW_LDAP_H
#define RGW_LDAP_H
+#if defined(HAVE_OPENLDAP)
#define LDAP_DEPRECATED 1
#include "ldap.h"
+#endif
#include <stdint.h>
#include <tuple>
diff --git a/src/rgw/rgw_op.cc b/src/rgw/rgw_op.cc
index 66ecd55..c781eb9 100644
--- a/src/rgw/rgw_op.cc
+++ b/src/rgw/rgw_op.cc
@@ -398,7 +398,8 @@ int rgw_build_bucket_policies(RGWRados* store, struct req_state* s)
* only_bucket: If true, reads the bucket ACL rather than the object ACL.
* Returns: 0 on success, -ERR# otherwise.
*/
-static int rgw_build_object_policies(RGWRados *store, struct req_state *s, bool prefetch_data)
+int rgw_build_object_policies(RGWRados *store, struct req_state *s,
+ bool prefetch_data)
{
int ret = 0;
@@ -2994,7 +2995,8 @@ void RGWDeleteObj::execute()
return;
}
- rgw_obj obj(s->bucket, s->object);
+ rgw_obj obj(s->bucket, s->object.name);
+ obj.set_instance(s->object.instance);
map<string, bufferlist> attrs;
diff --git a/src/rgw/rgw_op.h b/src/rgw/rgw_op.h
index e3ecd60..825dd93 100644
--- a/src/rgw/rgw_op.h
+++ b/src/rgw/rgw_op.h
@@ -1338,6 +1338,8 @@ public:
};
extern int rgw_build_bucket_policies(RGWRados* store, struct req_state* s);
+extern int rgw_build_object_policies(RGWRados *store, struct req_state *s,
+ bool prefetch_data);
static inline int put_data_and_throttle(RGWPutObjProcessor *processor,
bufferlist& data, off_t ofs,
diff --git a/src/rgw/rgw_rest.cc b/src/rgw/rgw_rest.cc
index b60999f..a165b65 100644
--- a/src/rgw/rgw_rest.cc
+++ b/src/rgw/rgw_rest.cc
@@ -1590,6 +1590,11 @@ int RGWREST::preprocess(struct req_state *s, RGWClientIO* cio)
{
req_info& info = s->info;
+ /* save the request uri used to hash on the client side. request_uri may suffer
+ modifications as part of the bucket encoding in the subdomain calling format.
+ request_uri_aws4 will be used under aws4 auth */
+ s->info.request_uri_aws4 = s->info.request_uri;
+
s->cio = cio;
if (info.host.size()) {
ldout(s->cct, 10) << "host=" << info.host << dendl;
diff --git a/src/rgw/rgw_rest_conn.cc b/src/rgw/rgw_rest_conn.cc
index f45e94a..78fe0ae 100644
--- a/src/rgw/rgw_rest_conn.cc
+++ b/src/rgw/rgw_rest_conn.cc
@@ -199,7 +199,7 @@ int RGWRESTConn::get_resource(const string& resource,
map<string, string> headers;
if (extra_headers) {
- headers.insert(extra_params->begin(), extra_params->end());
+ headers.insert(extra_headers->begin(), extra_headers->end());
}
ret = req.get_resource(key, headers, resource, mgr);
diff --git a/src/rgw/rgw_rest_s3.cc b/src/rgw/rgw_rest_s3.cc
index e4731ba..c912fc2 100644
--- a/src/rgw/rgw_rest_s3.cc
+++ b/src/rgw/rgw_rest_s3.cc
@@ -3409,7 +3409,7 @@ int RGW_Auth_S3::authorize_v4(RGWRados *store, struct req_state *s)
* that SigV4 typically does. this code follows the same approach that boto library
* see auth.py:canonical_uri(...) */
- s->aws4_auth->canonical_uri = s->info.request_uri;
+ s->aws4_auth->canonical_uri = s->info.request_uri_aws4;
if (s->aws4_auth->canonical_uri.empty()) {
s->aws4_auth->canonical_uri = "/";
@@ -3437,8 +3437,20 @@ int RGW_Auth_S3::authorize_v4(RGWRados *store, struct req_state *s)
string encoded_key;
string encoded_val;
if (key != "X-Amz-Credential") {
- aws4_uri_encode(key, encoded_key);
- aws4_uri_encode(val, encoded_val);
+ string key_decoded;
+ url_decode(key, key_decoded);
+ if (key.length() != key_decoded.length()) {
+ encoded_key = key;
+ } else {
+ aws4_uri_encode(key, encoded_key);
+ }
+ string val_decoded;
+ url_decode(val, val_decoded);
+ if (val.length() != val_decoded.length()) {
+ encoded_val = val;
+ } else {
+ aws4_uri_encode(val, encoded_val);
+ }
} else {
encoded_key = key;
encoded_val = val;
diff --git a/src/rgw/rgw_sync.cc b/src/rgw/rgw_sync.cc
index 38f92ed..922a904 100644
--- a/src/rgw/rgw_sync.cc
+++ b/src/rgw/rgw_sync.cc
@@ -1868,13 +1868,28 @@ int RGWRemoteMetaLog::run_sync()
}
RGWObjectCtx obj_ctx(store, NULL);
+ int r = 0;
// get shard count and oldest log period from master
rgw_mdlog_info mdlog_info;
- int r = read_log_info(&mdlog_info);
- if (r < 0) {
- lderr(store->ctx()) << "ERROR: fail to fetch master log info (r=" << r << ")" << dendl;
- return r;
+ for (;;) {
+ if (going_down.read()) {
+ ldout(store->ctx(), 1) << __func__ << "(): going down" << dendl;
+ return 0;
+ }
+ r = read_log_info(&mdlog_info);
+ if (r == -EIO) {
+ // keep retrying if master isn't alive
+ ldout(store->ctx(), 10) << __func__ << "(): waiting for master.." << dendl;
+ backoff.backoff_sleep();
+ continue;
+ }
+ backoff.reset();
+ if (r < 0) {
+ lderr(store->ctx()) << "ERROR: fail to fetch master log info (r=" << r << ")" << dendl;
+ return r;
+ }
+ break;
}
do {
@@ -1924,7 +1939,7 @@ int RGWRemoteMetaLog::run_sync()
auto num_shards = sync_status.sync_info.num_shards;
if (num_shards != mdlog_info.num_shards) {
lderr(store->ctx()) << "ERROR: can't sync, mismatch between num shards, master num_shards=" << mdlog_info.num_shards << " local num_shards=" << num_shards << dendl;
- return r;
+ return -EINVAL;
}
RGWPeriodHistory::Cursor cursor;
diff --git a/src/test/cli/ceph-authtool/help.t b/src/test/cli/ceph-authtool/help.t
index 062c967..9a6c883 100644
--- a/src/test/cli/ceph-authtool/help.t
+++ b/src/test/cli/ceph-authtool/help.t
@@ -14,12 +14,13 @@
specified entityname
--gen-print-key will generate a new secret key without set it
to the keyringfile, prints the secret to stdout
- --import-keyring will import the content of a given keyring
+ --import-keyring FILE will import the content of a given keyring
into the keyringfile
- -u, --set-uid sets the auid (authenticated user id) for the
+ -n NAME, --name NAME specify entityname to operate on
+ -u AUID, --set-uid AUID sets the auid (authenticated user id) for the
specified entityname
- -a, --add-key will add an encoded key to the keyring
- --cap subsystem capability will set the capability for given subsystem
- --caps capsfile will set all of capabilities associated with a
+ -a BASE64, --add-key BASE64 will add an encoded key to the keyring
+ --cap SUBSYSTEM CAPABILITY will set the capability for given subsystem
+ --caps CAPSFILE will set all of capabilities associated with a
given key, for all subsystems
[1]
diff --git a/src/test/cli/ceph-authtool/manpage.t b/src/test/cli/ceph-authtool/manpage.t
index a9e1408..f84b794 100644
--- a/src/test/cli/ceph-authtool/manpage.t
+++ b/src/test/cli/ceph-authtool/manpage.t
@@ -13,13 +13,14 @@
specified entityname
--gen-print-key will generate a new secret key without set it
to the keyringfile, prints the secret to stdout
- --import-keyring will import the content of a given keyring
+ --import-keyring FILE will import the content of a given keyring
into the keyringfile
- -u, --set-uid sets the auid (authenticated user id) for the
+ -n NAME, --name NAME specify entityname to operate on
+ -u AUID, --set-uid AUID sets the auid (authenticated user id) for the
specified entityname
- -a, --add-key will add an encoded key to the keyring
- --cap subsystem capability will set the capability for given subsystem
- --caps capsfile will set all of capabilities associated with a
+ -a BASE64, --add-key BASE64 will add an encoded key to the keyring
+ --cap SUBSYSTEM CAPABILITY will set the capability for given subsystem
+ --caps CAPSFILE will set all of capabilities associated with a
given key, for all subsystems
[1]
diff --git a/src/test/cli/ceph-authtool/simple.t b/src/test/cli/ceph-authtool/simple.t
index b86476a..35905ad 100644
--- a/src/test/cli/ceph-authtool/simple.t
+++ b/src/test/cli/ceph-authtool/simple.t
@@ -13,12 +13,13 @@
specified entityname
--gen-print-key will generate a new secret key without set it
to the keyringfile, prints the secret to stdout
- --import-keyring will import the content of a given keyring
+ --import-keyring FILE will import the content of a given keyring
into the keyringfile
- -u, --set-uid sets the auid (authenticated user id) for the
+ -n NAME, --name NAME specify entityname to operate on
+ -u AUID, --set-uid AUID sets the auid (authenticated user id) for the
specified entityname
- -a, --add-key will add an encoded key to the keyring
- --cap subsystem capability will set the capability for given subsystem
- --caps capsfile will set all of capabilities associated with a
+ -a BASE64, --add-key BASE64 will add an encoded key to the keyring
+ --cap SUBSYSTEM CAPABILITY will set the capability for given subsystem
+ --caps CAPSFILE will set all of capabilities associated with a
given key, for all subsystems
[1]
diff --git a/src/test/encoding/check-generated.sh b/src/test/encoding/check-generated.sh
index ee55fab..c34fce8 100755
--- a/src/test/encoding/check-generated.sh
+++ b/src/test/encoding/check-generated.sh
@@ -1,4 +1,4 @@
-#!/bin/sh -e
+#!/bin/bash -e
source ../qa/workunits/ceph-helpers.sh
diff --git a/src/test/encoding/types.h b/src/test/encoding/types.h
index 11dfc50..22919bd 100644
--- a/src/test/encoding/types.h
+++ b/src/test/encoding/types.h
@@ -192,6 +192,10 @@ TYPE_FEATUREFUL(InodeStore)
TYPE_FEATUREFUL(MDSMap)
TYPE_FEATUREFUL(MDSMap::mds_info_t)
+#include "mds/FSMap.h"
+//TYPE_FEATUREFUL(Filesystem)
+TYPE_FEATUREFUL(FSMap)
+
#include "mds/Capability.h"
TYPE_NOCOPY(Capability)
diff --git a/src/test/journal/test_FutureImpl.cc b/src/test/journal/test_FutureImpl.cc
index 51e19cf..eb5f806 100644
--- a/src/test/journal/test_FutureImpl.cc
+++ b/src/test/journal/test_FutureImpl.cc
@@ -25,12 +25,11 @@ public:
}
};
- journal::FutureImplPtr create_future(journal::JournalMetadataPtr metadata,
- uint64_t tag_tid, uint64_t entry_tid,
+ journal::FutureImplPtr create_future(uint64_t tag_tid, uint64_t entry_tid,
uint64_t commit_tid,
const journal::FutureImplPtr &prev =
journal::FutureImplPtr()) {
- journal::FutureImplPtr future(new journal::FutureImpl(metadata, tag_tid,
+ journal::FutureImplPtr future(new journal::FutureImpl(tag_tid,
entry_tid,
commit_tid));
future->init(prev);
@@ -50,7 +49,7 @@ TEST_F(TestFutureImpl, Getters) {
journal::JournalMetadataPtr metadata = create_metadata(oid);
ASSERT_EQ(0, init_metadata(metadata));
- journal::FutureImplPtr future = create_future(metadata, 234, 123, 456);
+ journal::FutureImplPtr future = create_future(234, 123, 456);
ASSERT_EQ(234U, future->get_tag_tid());
ASSERT_EQ(123U, future->get_entry_tid());
ASSERT_EQ(456U, future->get_commit_tid());
@@ -63,7 +62,7 @@ TEST_F(TestFutureImpl, Attach) {
journal::JournalMetadataPtr metadata = create_metadata(oid);
ASSERT_EQ(0, init_metadata(metadata));
- journal::FutureImplPtr future = create_future(metadata, 234, 123, 456);
+ journal::FutureImplPtr future = create_future(234, 123, 456);
ASSERT_FALSE(future->attach(&m_flush_handler));
ASSERT_EQ(1U, m_flush_handler.refs);
}
@@ -75,7 +74,7 @@ TEST_F(TestFutureImpl, AttachWithPendingFlush) {
journal::JournalMetadataPtr metadata = create_metadata(oid);
ASSERT_EQ(0, init_metadata(metadata));
- journal::FutureImplPtr future = create_future(metadata, 234, 123, 456);
+ journal::FutureImplPtr future = create_future(234, 123, 456);
future->flush(NULL);
ASSERT_TRUE(future->attach(&m_flush_handler));
@@ -89,7 +88,7 @@ TEST_F(TestFutureImpl, Detach) {
journal::JournalMetadataPtr metadata = create_metadata(oid);
ASSERT_EQ(0, init_metadata(metadata));
- journal::FutureImplPtr future = create_future(metadata, 234, 123, 456);
+ journal::FutureImplPtr future = create_future(234, 123, 456);
ASSERT_FALSE(future->attach(&m_flush_handler));
future->detach();
ASSERT_EQ(0U, m_flush_handler.refs);
@@ -102,7 +101,7 @@ TEST_F(TestFutureImpl, DetachImplicit) {
journal::JournalMetadataPtr metadata = create_metadata(oid);
ASSERT_EQ(0, init_metadata(metadata));
- journal::FutureImplPtr future = create_future(metadata, 234, 123, 456);
+ journal::FutureImplPtr future = create_future(234, 123, 456);
ASSERT_FALSE(future->attach(&m_flush_handler));
future.reset();
ASSERT_EQ(0U, m_flush_handler.refs);
@@ -115,7 +114,7 @@ TEST_F(TestFutureImpl, Flush) {
journal::JournalMetadataPtr metadata = create_metadata(oid);
ASSERT_EQ(0, init_metadata(metadata));
- journal::FutureImplPtr future = create_future(metadata, 234, 123, 456);
+ journal::FutureImplPtr future = create_future(234, 123, 456);
ASSERT_FALSE(future->attach(&m_flush_handler));
C_SaferCond cond;
@@ -133,7 +132,7 @@ TEST_F(TestFutureImpl, FlushWithoutContext) {
journal::JournalMetadataPtr metadata = create_metadata(oid);
ASSERT_EQ(0, init_metadata(metadata));
- journal::FutureImplPtr future = create_future(metadata, 234, 123, 456);
+ journal::FutureImplPtr future = create_future(234, 123, 456);
ASSERT_FALSE(future->attach(&m_flush_handler));
future->flush(NULL);
@@ -150,10 +149,10 @@ TEST_F(TestFutureImpl, FlushChain) {
journal::JournalMetadataPtr metadata = create_metadata(oid);
ASSERT_EQ(0, init_metadata(metadata));
- journal::FutureImplPtr future1 = create_future(metadata, 234, 123, 456);
- journal::FutureImplPtr future2 = create_future(metadata, 234, 124, 457,
+ journal::FutureImplPtr future1 = create_future(234, 123, 456);
+ journal::FutureImplPtr future2 = create_future(234, 124, 457,
future1);
- journal::FutureImplPtr future3 = create_future(metadata, 235, 1, 458,
+ journal::FutureImplPtr future3 = create_future(235, 1, 458,
future2);
ASSERT_FALSE(future1->attach(&m_flush_handler));
ASSERT_FALSE(future2->attach(&m_flush_handler));
@@ -184,8 +183,8 @@ TEST_F(TestFutureImpl, FlushInProgress) {
journal::JournalMetadataPtr metadata = create_metadata(oid);
ASSERT_EQ(0, init_metadata(metadata));
- journal::FutureImplPtr future1 = create_future(metadata, 234, 123, 456);
- journal::FutureImplPtr future2 = create_future(metadata, 234, 124, 457,
+ journal::FutureImplPtr future1 = create_future(234, 123, 456);
+ journal::FutureImplPtr future2 = create_future(234, 124, 457,
future1);
ASSERT_FALSE(future1->attach(&m_flush_handler));
ASSERT_FALSE(future2->attach(&m_flush_handler));
@@ -206,7 +205,7 @@ TEST_F(TestFutureImpl, FlushAlreadyComplete) {
journal::JournalMetadataPtr metadata = create_metadata(oid);
ASSERT_EQ(0, init_metadata(metadata));
- journal::FutureImplPtr future = create_future(metadata, 234, 123, 456);
+ journal::FutureImplPtr future = create_future(234, 123, 456);
future->safe(-EIO);
C_SaferCond cond;
@@ -221,7 +220,7 @@ TEST_F(TestFutureImpl, Wait) {
journal::JournalMetadataPtr metadata = create_metadata(oid);
ASSERT_EQ(0, init_metadata(metadata));
- journal::FutureImplPtr future = create_future(metadata, 234, 1, 456);
+ journal::FutureImplPtr future = create_future(234, 1, 456);
C_SaferCond cond;
future->wait(&cond);
@@ -236,7 +235,7 @@ TEST_F(TestFutureImpl, WaitAlreadyComplete) {
journal::JournalMetadataPtr metadata = create_metadata(oid);
ASSERT_EQ(0, init_metadata(metadata));
- journal::FutureImplPtr future = create_future(metadata, 234, 1, 456);
+ journal::FutureImplPtr future = create_future(234, 1, 456);
future->safe(-EEXIST);
C_SaferCond cond;
@@ -251,8 +250,8 @@ TEST_F(TestFutureImpl, SafePreservesError) {
journal::JournalMetadataPtr metadata = create_metadata(oid);
ASSERT_EQ(0, init_metadata(metadata));
- journal::FutureImplPtr future1 = create_future(metadata, 234, 123, 456);
- journal::FutureImplPtr future2 = create_future(metadata, 234, 124, 457,
+ journal::FutureImplPtr future1 = create_future(234, 123, 456);
+ journal::FutureImplPtr future2 = create_future(234, 124, 457,
future1);
future1->safe(-EIO);
@@ -268,8 +267,8 @@ TEST_F(TestFutureImpl, ConsistentPreservesError) {
journal::JournalMetadataPtr metadata = create_metadata(oid);
ASSERT_EQ(0, init_metadata(metadata));
- journal::FutureImplPtr future1 = create_future(metadata, 234, 123, 456);
- journal::FutureImplPtr future2 = create_future(metadata, 234, 124, 457,
+ journal::FutureImplPtr future1 = create_future(234, 123, 456);
+ journal::FutureImplPtr future2 = create_future(234, 124, 457,
future1);
future2->safe(-EEXIST);
diff --git a/src/test/journal/test_ObjectRecorder.cc b/src/test/journal/test_ObjectRecorder.cc
index 65d74b6..f26e526 100644
--- a/src/test/journal/test_ObjectRecorder.cc
+++ b/src/test/journal/test_ObjectRecorder.cc
@@ -67,11 +67,10 @@ public:
m_flush_age = i;
}
- journal::AppendBuffer create_append_buffer(journal::JournalMetadataPtr metadata,
- uint64_t tag_tid, uint64_t entry_tid,
+ journal::AppendBuffer create_append_buffer(uint64_t tag_tid, uint64_t entry_tid,
const std::string &payload) {
- journal::FutureImplPtr future(new journal::FutureImpl(metadata, tag_tid,
- entry_tid, 456));
+ journal::FutureImplPtr future(new journal::FutureImpl(tag_tid, entry_tid,
+ 456));
future->init(journal::FutureImplPtr());
bufferlist bl;
@@ -98,16 +97,14 @@ TEST_F(TestObjectRecorder, Append) {
journal::ObjectRecorderPtr object = create_object(oid, 24);
- journal::AppendBuffer append_buffer1 = create_append_buffer(metadata,
- 234, 123,
+ journal::AppendBuffer append_buffer1 = create_append_buffer(234, 123,
"payload");
journal::AppendBuffers append_buffers;
append_buffers = {append_buffer1};
ASSERT_FALSE(object->append(append_buffers));
ASSERT_EQ(1U, object->get_pending_appends());
- journal::AppendBuffer append_buffer2 = create_append_buffer(metadata,
- 234, 124,
+ journal::AppendBuffer append_buffer2 = create_append_buffer(234, 124,
"payload");
append_buffers = {append_buffer2};
ASSERT_FALSE(object->append(append_buffers));
@@ -129,16 +126,14 @@ TEST_F(TestObjectRecorder, AppendFlushByCount) {
set_flush_interval(2);
journal::ObjectRecorderPtr object = create_object(oid, 24);
- journal::AppendBuffer append_buffer1 = create_append_buffer(metadata,
- 234, 123,
+ journal::AppendBuffer append_buffer1 = create_append_buffer(234, 123,
"payload");
journal::AppendBuffers append_buffers;
append_buffers = {append_buffer1};
ASSERT_FALSE(object->append(append_buffers));
ASSERT_EQ(1U, object->get_pending_appends());
- journal::AppendBuffer append_buffer2 = create_append_buffer(metadata,
- 234, 124,
+ journal::AppendBuffer append_buffer2 = create_append_buffer(234, 124,
"payload");
append_buffers = {append_buffer2};
ASSERT_FALSE(object->append(append_buffers));
@@ -159,16 +154,14 @@ TEST_F(TestObjectRecorder, AppendFlushByBytes) {
set_flush_bytes(10);
journal::ObjectRecorderPtr object = create_object(oid, 24);
- journal::AppendBuffer append_buffer1 = create_append_buffer(metadata,
- 234, 123,
+ journal::AppendBuffer append_buffer1 = create_append_buffer(234, 123,
"payload");
journal::AppendBuffers append_buffers;
append_buffers = {append_buffer1};
ASSERT_FALSE(object->append(append_buffers));
ASSERT_EQ(1U, object->get_pending_appends());
- journal::AppendBuffer append_buffer2 = create_append_buffer(metadata,
- 234, 124,
+ journal::AppendBuffer append_buffer2 = create_append_buffer(234, 124,
"payload");
append_buffers = {append_buffer2};
ASSERT_FALSE(object->append(append_buffers));
@@ -189,15 +182,13 @@ TEST_F(TestObjectRecorder, AppendFlushByAge) {
set_flush_age(0.1);
journal::ObjectRecorderPtr object = create_object(oid, 24);
- journal::AppendBuffer append_buffer1 = create_append_buffer(metadata,
- 234, 123,
+ journal::AppendBuffer append_buffer1 = create_append_buffer(234, 123,
"payload");
journal::AppendBuffers append_buffers;
append_buffers = {append_buffer1};
ASSERT_FALSE(object->append(append_buffers));
- journal::AppendBuffer append_buffer2 = create_append_buffer(metadata,
- 234, 124,
+ journal::AppendBuffer append_buffer2 = create_append_buffer(234, 124,
"payload");
append_buffers = {append_buffer2};
ASSERT_FALSE(object->append(append_buffers));
@@ -218,15 +209,13 @@ TEST_F(TestObjectRecorder, AppendFilledObject) {
journal::ObjectRecorderPtr object = create_object(oid, 12);
std::string payload(2048, '1');
- journal::AppendBuffer append_buffer1 = create_append_buffer(metadata,
- 234, 123,
+ journal::AppendBuffer append_buffer1 = create_append_buffer(234, 123,
payload);
journal::AppendBuffers append_buffers;
append_buffers = {append_buffer1};
ASSERT_FALSE(object->append(append_buffers));
- journal::AppendBuffer append_buffer2 = create_append_buffer(metadata,
- 234, 124,
+ journal::AppendBuffer append_buffer2 = create_append_buffer(234, 124,
payload);
append_buffers = {append_buffer2};
ASSERT_TRUE(object->append(append_buffers));
@@ -246,8 +235,7 @@ TEST_F(TestObjectRecorder, Flush) {
journal::ObjectRecorderPtr object = create_object(oid, 24);
- journal::AppendBuffer append_buffer1 = create_append_buffer(metadata,
- 234, 123,
+ journal::AppendBuffer append_buffer1 = create_append_buffer(234, 123,
"payload");
journal::AppendBuffers append_buffers;
append_buffers = {append_buffer1};
@@ -273,8 +261,7 @@ TEST_F(TestObjectRecorder, FlushFuture) {
journal::ObjectRecorderPtr object = create_object(oid, 24);
- journal::AppendBuffer append_buffer = create_append_buffer(metadata,
- 234, 123,
+ journal::AppendBuffer append_buffer = create_append_buffer(234, 123,
"payload");
journal::AppendBuffers append_buffers;
append_buffers = {append_buffer};
@@ -298,8 +285,7 @@ TEST_F(TestObjectRecorder, FlushDetachedFuture) {
journal::ObjectRecorderPtr object = create_object(oid, 24);
- journal::AppendBuffer append_buffer = create_append_buffer(metadata,
- 234, 123,
+ journal::AppendBuffer append_buffer = create_append_buffer(234, 123,
"payload");
journal::AppendBuffers append_buffers;
@@ -326,11 +312,9 @@ TEST_F(TestObjectRecorder, Overflow) {
journal::ObjectRecorderPtr object2 = create_object(oid, 12);
std::string payload(2048, '1');
- journal::AppendBuffer append_buffer1 = create_append_buffer(metadata,
- 234, 123,
+ journal::AppendBuffer append_buffer1 = create_append_buffer(234, 123,
payload);
- journal::AppendBuffer append_buffer2 = create_append_buffer(metadata,
- 234, 124,
+ journal::AppendBuffer append_buffer2 = create_append_buffer(234, 124,
payload);
journal::AppendBuffers append_buffers;
append_buffers = {append_buffer1, append_buffer2};
@@ -341,8 +325,7 @@ TEST_F(TestObjectRecorder, Overflow) {
ASSERT_EQ(0, cond.wait());
ASSERT_EQ(0U, object1->get_pending_appends());
- journal::AppendBuffer append_buffer3 = create_append_buffer(metadata,
- 456, 123,
+ journal::AppendBuffer append_buffer3 = create_append_buffer(456, 123,
payload);
append_buffers = {append_buffer3};
diff --git a/src/test/librados/misc.cc b/src/test/librados/misc.cc
index ed19e00..7d89b21 100644
--- a/src/test/librados/misc.cc
+++ b/src/test/librados/misc.cc
@@ -95,6 +95,88 @@ TEST_F(LibRadosMiscPP, LongNamePP) {
ASSERT_EQ(-ENAMETOOLONG, ioctx.write(string(maxlen*2, 'a').c_str(), bl, bl.length(), 0));
}
+TEST_F(LibRadosMiscPP, LongLocatorPP) {
+ bufferlist bl;
+ bl.append("content");
+ int maxlen = g_conf->osd_max_object_name_len;
+ ioctx.locator_set_key(
+ string((maxlen/2), 'a'));
+ ASSERT_EQ(
+ 0,
+ ioctx.write(
+ string("a").c_str(),
+ bl, bl.length(), 0));
+ ioctx.locator_set_key(
+ string(maxlen - 1, 'a'));
+ ASSERT_EQ(
+ 0,
+ ioctx.write(
+ string("a").c_str(),
+ bl, bl.length(), 0));
+ ioctx.locator_set_key(
+ string(maxlen, 'a'));
+ ASSERT_EQ(
+ 0,
+ ioctx.write(
+ string("a").c_str(),
+ bl, bl.length(), 0));
+ ioctx.locator_set_key(
+ string(maxlen+1, 'a'));
+ ASSERT_EQ(
+ -ENAMETOOLONG,
+ ioctx.write(
+ string("a").c_str(),
+ bl, bl.length(), 0));
+ ioctx.locator_set_key(
+ string((maxlen*2), 'a'));
+ ASSERT_EQ(
+ -ENAMETOOLONG,
+ ioctx.write(
+ string("a").c_str(),
+ bl, bl.length(), 0));
+}
+
+TEST_F(LibRadosMiscPP, LongNSpacePP) {
+ bufferlist bl;
+ bl.append("content");
+ int maxlen = g_conf->osd_max_object_namespace_len;
+ ioctx.set_namespace(
+ string((maxlen/2), 'a'));
+ ASSERT_EQ(
+ 0,
+ ioctx.write(
+ string("a").c_str(),
+ bl, bl.length(), 0));
+ ioctx.set_namespace(
+ string(maxlen - 1, 'a'));
+ ASSERT_EQ(
+ 0,
+ ioctx.write(
+ string("a").c_str(),
+ bl, bl.length(), 0));
+ ioctx.set_namespace(
+ string(maxlen, 'a'));
+ ASSERT_EQ(
+ 0,
+ ioctx.write(
+ string("a").c_str(),
+ bl, bl.length(), 0));
+ ioctx.set_namespace(
+ string(maxlen+1, 'a'));
+ ASSERT_EQ(
+ -ENAMETOOLONG,
+ ioctx.write(
+ string("a").c_str(),
+ bl, bl.length(), 0));
+ ioctx.set_namespace(
+ string((maxlen*2), 'a'));
+ ASSERT_EQ(
+ -ENAMETOOLONG,
+ ioctx.write(
+ string("a").c_str(),
+ bl, bl.length(), 0));
+}
+
TEST_F(LibRadosMiscPP, LongAttrNamePP) {
bufferlist bl;
bl.append("content");
@@ -734,15 +816,29 @@ std::string LibRadosTwoPoolsECPP::src_pool_name;
//copy_from between ecpool and no-ecpool.
TEST_F(LibRadosTwoPoolsECPP, CopyFrom) {
- //create object w/ omapheader
+ bufferlist z;
+ z.append_zero(4194304*2);
bufferlist b;
b.append("copyfrom");
- ASSERT_EQ(0, src_ioctx.omap_set_header("foo", b));
- version_t uv = src_ioctx.get_last_version();
- ObjectWriteOperation op;
- op.copy_from("foo", src_ioctx, uv);
- ASSERT_EQ(-EOPNOTSUPP, ioctx.operate("foo.copy", &op));
+ // create big object w/ omapheader
+ {
+ ASSERT_EQ(0, src_ioctx.write_full("foo", z));
+ ASSERT_EQ(0, src_ioctx.omap_set_header("foo", b));
+ version_t uv = src_ioctx.get_last_version();
+ ObjectWriteOperation op;
+ op.copy_from("foo", src_ioctx, uv);
+ ASSERT_EQ(-EOPNOTSUPP, ioctx.operate("foo.copy", &op));
+ }
+
+ // same with small object
+ {
+ ASSERT_EQ(0, src_ioctx.omap_set_header("bar", b));
+ version_t uv = src_ioctx.get_last_version();
+ ObjectWriteOperation op;
+ op.copy_from("bar", src_ioctx, uv);
+ ASSERT_EQ(-EOPNOTSUPP, ioctx.operate("bar.copy", &op));
+ }
}
TEST_F(LibRadosMiscPP, CopyScrubPP) {
diff --git a/src/test/librbd/test_mock_Journal.cc b/src/test/librbd/test_mock_Journal.cc
index 881ac16..9ebea8f 100644
--- a/src/test/librbd/test_mock_Journal.cc
+++ b/src/test/librbd/test_mock_Journal.cc
@@ -944,6 +944,7 @@ TEST_F(TestMockJournal, EventAndIOCommitOrder) {
// commit journal event followed by IO event (standard)
on_journal_safe1->complete(0);
+ ictx->op_work_queue->drain();
expect_future_committed(mock_journaler);
mock_journal.commit_io_event(1U, 0);
@@ -954,6 +955,7 @@ TEST_F(TestMockJournal, EventAndIOCommitOrder) {
C_SaferCond event_ctx;
mock_journal.wait_event(2U, &event_ctx);
on_journal_safe2->complete(0);
+ ictx->op_work_queue->drain();
ASSERT_EQ(0, event_ctx.wait());
}
@@ -1054,6 +1056,7 @@ TEST_F(TestMockJournal, IOCommitError) {
// failed IO remains uncommitted in journal
on_journal_safe->complete(0);
+ ictx->op_work_queue->drain();
mock_journal.commit_io_event(1U, -EINVAL);
}
diff --git a/src/test/librgw_file_nfsns.cc b/src/test/librgw_file_nfsns.cc
index b838cb1..7f52c80 100644
--- a/src/test/librgw_file_nfsns.cc
+++ b/src/test/librgw_file_nfsns.cc
@@ -331,6 +331,14 @@ TEST(LibRGW, SETUP_DIRS1) {
ASSERT_EQ(rc, 0);
sf.sync();
ASSERT_TRUE(sf.rgw_fh->is_file());
+
+ /* because we made it the hard way, fixup attributes */
+ struct stat st;
+ st.st_uid = owner_uid;
+ st.st_gid = owner_gid;
+ st.st_mode = 644;
+ sf.rgw_fh->create_stat(&st, create_mask);
+
/* open handle */
rc = rgw_open(fs, sf.fh, 0 /* flags */);
ASSERT_EQ(rc, 0);
@@ -523,6 +531,9 @@ TEST(LibRGW, GETATTR_DIRS1)
ASSERT_TRUE(sobj.rgw_fh->is_dir());
ASSERT_TRUE(S_ISDIR(st.st_mode));
}
+ /* validate Unix owners */
+ ASSERT_EQ(st.st_uid, owner_uid);
+ ASSERT_EQ(st.st_gid, owner_gid);
if (verbose) {
obj_rec_st rec_st{sobj, st};
std::cout << "\t"
@@ -557,6 +568,10 @@ TEST(LibRGW, READ_DIRS1)
ASSERT_EQ(sobj.rgw_fh->get_size(), 16UL);
// do it
memset(buf, 0, 256);
+ if (verbose) {
+ std::cout << "reading 0,256 " << sobj.rgw_fh->relative_object_name()
+ << std::endl;
+ }
rc = rgw_read(fs, sobj.fh, 0, 256, &nread, buf, RGW_READ_FLAG_NONE);
ASSERT_EQ(rc, 0);
if (verbose) {
diff --git a/src/test/objectstore/chain_xattr.cc b/src/test/objectstore/chain_xattr.cc
index 5080321..d3b4b1e 100644
--- a/src/test/objectstore/chain_xattr.cc
+++ b/src/test/objectstore/chain_xattr.cc
@@ -259,6 +259,120 @@ TEST(chain_xattr, listxattr) {
::unlink(file);
}
+list<string> get_xattrs(int fd)
+{
+ char _buf[1024];
+ char *buf = _buf;
+ int len = sys_flistxattr(fd, _buf, sizeof(_buf));
+ if (len < 0)
+ return list<string>();
+ list<string> ret;
+ while (len > 0) {
+ size_t next_len = strlen(buf);
+ ret.push_back(string(buf, buf + next_len));
+ assert(len >= (int)(next_len + 1));
+ buf += (next_len + 1);
+ len -= (next_len + 1);
+ }
+ return ret;
+}
+
+list<string> get_xattrs(string fn)
+{
+ int fd = ::open(fn.c_str(), O_RDONLY);
+ if (fd < 0)
+ return list<string>();
+ auto ret = get_xattrs(fd);
+ ::close(fd);
+ return ret;
+}
+
+TEST(chain_xattr, fskip_chain_cleanup_and_ensure_single_attr)
+{
+ const char *name = "user.foo";
+ const char *file = FILENAME;
+ ::unlink(file);
+ int fd = ::open(file, O_CREAT|O_RDWR|O_TRUNC, 0700);
+
+ char buf[800];
+ memset(buf, sizeof(buf), 0x1F);
+ // set chunked without either
+ {
+ int r = chain_fsetxattr(fd, name, buf, sizeof(buf));
+ ASSERT_EQ(r, sizeof(buf));
+ ASSERT_GT(get_xattrs(fd).size(), 1);
+ }
+
+ // verify
+ {
+ char buf2[sizeof(buf)*2];
+ int r = chain_fgetxattr(fd, name, buf2, sizeof(buf2));
+ ASSERT_EQ(r, sizeof(buf));
+ ASSERT_EQ(memcmp(buf, buf2, sizeof(buf)), 0);
+ }
+
+ // overwrite
+ {
+ int r = chain_fsetxattr<false, true>(fd, name, buf, sizeof(buf));
+ ASSERT_EQ(r, sizeof (buf));
+ ASSERT_EQ(get_xattrs(fd).size(), 1);
+ }
+
+ // verify
+ {
+ char buf2[sizeof(buf)*2];
+ int r = chain_fgetxattr(fd, name, buf2, sizeof(buf2));
+ ASSERT_EQ(r, sizeof(buf));
+ ASSERT_EQ(memcmp(buf, buf2, sizeof(buf)), 0);
+ }
+
+ ::close(fd);
+ ::unlink(file);
+}
+
+TEST(chain_xattr, skip_chain_cleanup_and_ensure_single_attr)
+{
+ const char *name = "user.foo";
+ const char *file = FILENAME;
+ ::unlink(file);
+ int fd = ::open(file, O_CREAT|O_RDWR|O_TRUNC, 0700);
+ ::close(fd);
+
+ char buf[3000];
+ memset(buf, sizeof(buf), 0x1F);
+ // set chunked without either
+ {
+ int r = chain_setxattr(file, name, buf, sizeof(buf));
+ ASSERT_EQ(r, sizeof(buf));
+ ASSERT_GT(get_xattrs(file).size(), 1);
+ }
+
+ // verify
+ {
+ char buf2[sizeof(buf)*2];
+ int r = chain_getxattr(file, name, buf2, sizeof(buf2));
+ ASSERT_EQ(r, sizeof(buf));
+ ASSERT_EQ(memcmp(buf, buf2, sizeof(buf)), 0);
+ }
+
+ // overwrite
+ {
+ int r = chain_setxattr<false, true>(file, name, buf, sizeof(buf));
+ ASSERT_EQ(r, sizeof (buf));
+ ASSERT_EQ(get_xattrs(file).size(), 1);
+ }
+
+ // verify
+ {
+ char buf2[sizeof(buf)*2];
+ int r = chain_getxattr(file, name, buf2, sizeof(buf2));
+ ASSERT_EQ(r, sizeof(buf));
+ ASSERT_EQ(memcmp(buf, buf2, sizeof(buf)), 0);
+ }
+
+ ::unlink(file);
+}
+
int main(int argc, char **argv) {
vector<const char*> args;
argv_to_vec(argc, (const char **)argv, args);
diff --git a/src/test/objectstore/test_bluefs.cc b/src/test/objectstore/test_bluefs.cc
index b1f9013..41bcf99 100644
--- a/src/test/objectstore/test_bluefs.cc
+++ b/src/test/objectstore/test_bluefs.cc
@@ -38,8 +38,8 @@ TEST(BlueFS, mkfs) {
string fn = get_temp_bdev(size);
uuid_d fsid;
BlueFS fs;
- fs.add_block_device(0, fn);
- fs.add_block_extent(0, 1048576, size - 1048576);
+ fs.add_block_device(BlueFS::BDEV_DB, fn);
+ fs.add_block_extent(BlueFS::BDEV_DB, 1048576, size - 1048576);
fs.mkfs(fsid);
rm_temp_bdev(fn);
}
@@ -48,13 +48,13 @@ TEST(BlueFS, mkfs_mount) {
uint64_t size = 1048476 * 128;
string fn = get_temp_bdev(size);
BlueFS fs;
- ASSERT_EQ(0, fs.add_block_device(0, fn));
- fs.add_block_extent(0, 1048576, size - 1048576);
+ ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_DB, fn));
+ fs.add_block_extent(BlueFS::BDEV_DB, 1048576, size - 1048576);
uuid_d fsid;
ASSERT_EQ(0, fs.mkfs(fsid));
ASSERT_EQ(0, fs.mount());
- ASSERT_EQ(fs.get_total(0), size - 1048576);
- ASSERT_LT(fs.get_free(0), size - 1048576);
+ ASSERT_EQ(fs.get_total(BlueFS::BDEV_DB), size - 1048576);
+ ASSERT_LT(fs.get_free(BlueFS::BDEV_DB), size - 1048576);
fs.umount();
rm_temp_bdev(fn);
}
@@ -63,8 +63,8 @@ TEST(BlueFS, write_read) {
uint64_t size = 1048476 * 128;
string fn = get_temp_bdev(size);
BlueFS fs;
- ASSERT_EQ(0, fs.add_block_device(0, fn));
- fs.add_block_extent(0, 1048576, size - 1048576);
+ ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_DB, fn));
+ fs.add_block_extent(BlueFS::BDEV_DB, 1048576, size - 1048576);
uuid_d fsid;
ASSERT_EQ(0, fs.mkfs(fsid));
ASSERT_EQ(0, fs.mount());
@@ -99,8 +99,8 @@ TEST(BlueFS, small_appends) {
uint64_t size = 1048476 * 128;
string fn = get_temp_bdev(size);
BlueFS fs;
- ASSERT_EQ(0, fs.add_block_device(0, fn));
- fs.add_block_extent(0, 1048576, size - 1048576);
+ ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_DB, fn));
+ fs.add_block_extent(BlueFS::BDEV_DB, 1048576, size - 1048576);
uuid_d fsid;
ASSERT_EQ(0, fs.mkfs(fsid));
ASSERT_EQ(0, fs.mount());
diff --git a/src/test/os/TestLFNIndex.cc b/src/test/os/TestLFNIndex.cc
index ad4cb75..1ff2e4d 100644
--- a/src/test/os/TestLFNIndex.cc
+++ b/src/test/os/TestLFNIndex.cc
@@ -49,7 +49,7 @@ public:
const std::string mangled_name = lfn_generate_object_name(hoid);
EXPECT_EQ(mangled_expected, mangled_name);
ghobject_t hoid_parsed;
- EXPECT_TRUE(lfn_parse_object_name(mangled_name, &hoid_parsed));
+ EXPECT_EQ(0, lfn_parse_object_name(mangled_name, &hoid_parsed));
EXPECT_EQ(hoid, hoid_parsed);
}
diff --git a/src/test/pybind/test_ceph_argparse.py b/src/test/pybind/test_ceph_argparse.py
index a649731..4bf6c27 100755
--- a/src/test/pybind/test_ceph_argparse.py
+++ b/src/test/pybind/test_ceph_argparse.py
@@ -491,6 +491,11 @@ class TestFS(TestArgparse):
self.assert_valid_command(['fs', 'ls'])
assert_equal({}, validate_command(sigdict, ['fs', 'ls', 'toomany']))
+ def test_fs_set_default(self):
+ self.assert_valid_command(['fs', 'set_default', 'cephfs'])
+ assert_equal({}, validate_command(sigdict, ['fs', 'set_default']))
+ assert_equal({}, validate_command(sigdict, ['fs', 'set_default', 'cephfs', 'toomany']))
+
class TestMon(TestArgparse):
def test_dump(self):
diff --git a/src/test/rbd_mirror/test_ClusterWatcher.cc b/src/test/rbd_mirror/test_ClusterWatcher.cc
index 1629a16..2d7d3f2 100644
--- a/src/test/rbd_mirror/test_ClusterWatcher.cc
+++ b/src/test/rbd_mirror/test_ClusterWatcher.cc
@@ -45,7 +45,8 @@ public:
void create_pool(bool enable_mirroring, const peer_t &peer,
string *uuid = nullptr, string *name=nullptr) {
string pool_name = get_temp_pool_name("test-rbd-mirror-");
- ASSERT_EQ("", create_one_pool_pp(pool_name, *m_cluster));
+ ASSERT_EQ(0, m_cluster->pool_create(pool_name.c_str()));
+
int64_t pool_id = m_cluster->pool_lookup(pool_name.c_str());
ASSERT_GE(pool_id, 0);
m_pools.insert(pool_name);
@@ -84,7 +85,8 @@ public:
void create_cache_pool(const string &base_pool, string *cache_pool_name) {
bufferlist inbl;
*cache_pool_name = get_temp_pool_name("test-rbd-mirror-");
- ASSERT_EQ("", create_one_pool_pp(*cache_pool_name, *m_cluster));
+ ASSERT_EQ(0, m_cluster->pool_create(cache_pool_name->c_str()));
+
ASSERT_EQ(0, m_cluster->mon_command(
"{\"prefix\": \"osd tier add\", \"pool\": \"" + base_pool +
"\", \"tierpool\": \"" + *cache_pool_name +
diff --git a/src/test/rbd_mirror/test_ImageReplayer.cc b/src/test/rbd_mirror/test_ImageReplayer.cc
index 18f1441..84e4afc 100644
--- a/src/test/rbd_mirror/test_ImageReplayer.cc
+++ b/src/test/rbd_mirror/test_ImageReplayer.cc
@@ -76,14 +76,14 @@ public:
EXPECT_EQ("", connect_cluster_pp(m_local_cluster));
m_local_pool_name = get_temp_pool_name();
- EXPECT_EQ("", create_one_pool_pp(m_local_pool_name, m_local_cluster));
+ EXPECT_EQ(0, m_local_cluster.pool_create(m_local_pool_name.c_str()));
EXPECT_EQ(0, m_local_cluster.ioctx_create(m_local_pool_name.c_str(),
m_local_ioctx));
EXPECT_EQ("", connect_cluster_pp(m_remote_cluster));
m_remote_pool_name = get_temp_pool_name();
- EXPECT_EQ("", create_one_pool_pp(m_remote_pool_name, m_remote_cluster));
+ EXPECT_EQ(0, m_remote_cluster.pool_create(m_remote_pool_name.c_str()));
m_remote_pool_id = m_remote_cluster.pool_lookup(m_remote_pool_name.c_str());
EXPECT_GE(m_remote_pool_id, 0);
@@ -104,6 +104,13 @@ public:
~TestImageReplayer()
{
+ if (m_watch_handle != 0) {
+ m_remote_ioctx.unwatch2(m_watch_handle);
+ delete m_watch_ctx;
+ m_watch_ctx = nullptr;
+ m_watch_handle = 0;
+ }
+
delete m_replayer;
delete m_threads;
diff --git a/src/test/rbd_mirror/test_ImageSync.cc b/src/test/rbd_mirror/test_ImageSync.cc
index e9b234e..5e6b0a9 100644
--- a/src/test/rbd_mirror/test_ImageSync.cc
+++ b/src/test/rbd_mirror/test_ImageSync.cc
@@ -58,6 +58,11 @@ public:
ASSERT_EQ(0, m_remote_journaler->register_client(client_data_bl));
}
+ virtual void TearDown() {
+ TestFixture::TearDown();
+ delete m_remote_journaler;
+ }
+
void create_and_open(librados::IoCtx &io_ctx, librbd::ImageCtx **image_ctx) {
librbd::RBD rbd;
ASSERT_EQ(0, create_image(rbd, io_ctx, m_image_name, m_image_size));
diff --git a/src/test/rbd_mirror/test_PoolWatcher.cc b/src/test/rbd_mirror/test_PoolWatcher.cc
index b8ff311..5d131d3 100644
--- a/src/test/rbd_mirror/test_PoolWatcher.cc
+++ b/src/test/rbd_mirror/test_PoolWatcher.cc
@@ -54,7 +54,8 @@ TestPoolWatcher() : m_lock("TestPoolWatcherLock"),
void create_pool(bool enable_mirroring, const peer_t &peer, string *name=nullptr) {
string pool_name = get_temp_pool_name("test-rbd-mirror-");
- ASSERT_EQ("", create_one_pool_pp(pool_name, *m_cluster));
+ ASSERT_EQ(0, m_cluster->pool_create(pool_name.c_str()));
+
int64_t pool_id = m_cluster->pool_lookup(pool_name.c_str());
ASSERT_GE(pool_id, 0);
m_pools.insert(pool_name);
@@ -83,7 +84,8 @@ TestPoolWatcher() : m_lock("TestPoolWatcherLock"),
void create_cache_pool(const string &base_pool, string *cache_pool_name) {
bufferlist inbl;
*cache_pool_name = get_temp_pool_name("test-rbd-mirror-");
- ASSERT_EQ("", create_one_pool_pp(*cache_pool_name, *m_cluster));
+ ASSERT_EQ(0, m_cluster->pool_create(cache_pool_name->c_str()));
+
ASSERT_EQ(0, m_cluster->mon_command(
"{\"prefix\": \"osd tier add\", \"pool\": \"" + base_pool +
"\", \"tierpool\": \"" + *cache_pool_name +
diff --git a/src/test/rbd_mirror/test_fixture.cc b/src/test/rbd_mirror/test_fixture.cc
index 56620db..34981ea 100644
--- a/src/test/rbd_mirror/test_fixture.cc
+++ b/src/test/rbd_mirror/test_fixture.cc
@@ -22,11 +22,13 @@ TestFixture::TestFixture() {
}
void TestFixture::SetUpTestCase() {
+ ASSERT_EQ("", connect_cluster_pp(_rados));
+
_local_pool_name = get_temp_pool_name("test-rbd-mirror-");
- ASSERT_EQ("", create_one_pool_pp(_local_pool_name, _rados));
+ ASSERT_EQ(0, _rados.pool_create(_local_pool_name.c_str()));
_remote_pool_name = get_temp_pool_name("test-rbd-mirror-");
- ASSERT_EQ("", create_one_pool_pp(_remote_pool_name, _rados));
+ ASSERT_EQ(0, _rados.pool_create(_remote_pool_name.c_str()));
}
void TestFixture::TearDownTestCase() {
diff --git a/src/tools/Makefile-client.am b/src/tools/Makefile-client.am
index 509cba3..0b81549 100644
--- a/src/tools/Makefile-client.am
+++ b/src/tools/Makefile-client.am
@@ -71,6 +71,7 @@ noinst_HEADERS += \
tools/rbd/Utils.h
rbd_LDADD = \
libjournal.la libcls_journal_client.la \
+ libcls_rbd_client.la libcls_lock_client.la \
$(LIBRBD) $(LIBRBD_TYPES) $(LIBRADOS) $(CEPH_GLOBAL) \
$(BOOST_REGEX_LIBS) $(BOOST_PROGRAM_OPTIONS_LIBS)
if LINUX
diff --git a/src/tools/ceph_authtool.cc b/src/tools/ceph_authtool.cc
index 469c9f3..a213391 100644
--- a/src/tools/ceph_authtool.cc
+++ b/src/tools/ceph_authtool.cc
@@ -37,13 +37,14 @@ void usage()
<< " specified entityname\n"
<< " --gen-print-key will generate a new secret key without set it\n"
<< " to the keyringfile, prints the secret to stdout\n"
- << " --import-keyring will import the content of a given keyring\n"
+ << " --import-keyring FILE will import the content of a given keyring\n"
<< " into the keyringfile\n"
- << " -u, --set-uid sets the auid (authenticated user id) for the\n"
+ << " -n NAME, --name NAME specify entityname to operate on\n"
+ << " -u AUID, --set-uid AUID sets the auid (authenticated user id) for the\n"
<< " specified entityname\n"
- << " -a, --add-key will add an encoded key to the keyring\n"
- << " --cap subsystem capability will set the capability for given subsystem\n"
- << " --caps capsfile will set all of capabilities associated with a\n"
+ << " -a BASE64, --add-key BASE64 will add an encoded key to the keyring\n"
+ << " --cap SUBSYSTEM CAPABILITY will set the capability for given subsystem\n"
+ << " --caps CAPSFILE will set all of capabilities associated with a\n"
<< " given key, for all subsystems"
<< std::endl;
exit(1);
@@ -73,6 +74,9 @@ int main(int argc, const char **argv)
bool set_auid = false;
std::vector<const char*>::iterator i;
+ /* Handle options unique to ceph-authtool
+ * -n NAME, --name NAME is handled by global_init
+ * */
for (i = args.begin(); i != args.end(); ) {
std::string val;
if (ceph_argparse_double_dash(args, i)) {
@@ -117,6 +121,7 @@ int main(int argc, const char **argv)
usage();
}
}
+
if (fn.empty() && !gen_print_key) {
cerr << argv[0] << ": must specify filename" << std::endl;
usage();
diff --git a/src/tools/rbd/action/Journal.cc b/src/tools/rbd/action/Journal.cc
index 2995684..e82265c 100644
--- a/src/tools/rbd/action/Journal.cc
+++ b/src/tools/rbd/action/Journal.cc
@@ -13,14 +13,13 @@
#include <fstream>
#include <sstream>
#include <boost/program_options.hpp>
-
+#include "cls/rbd/cls_rbd_client.h"
#include "cls/journal/cls_journal_types.h"
#include "cls/journal/cls_journal_client.h"
#include "journal/Journaler.h"
#include "journal/ReplayEntry.h"
#include "journal/ReplayHandler.h"
-//#include "librbd/Journal.h" // XXXMG: for librbd::Journal::reset()
#include "librbd/journal/Types.h"
namespace rbd {
@@ -134,40 +133,35 @@ static int do_show_journal_status(librados::IoCtx& io_ctx,
static int do_reset_journal(librados::IoCtx& io_ctx,
const std::string& journal_id)
{
- // XXXMG: does not work due to a linking issue
- //return librbd::Journal::reset(io_ctx, journal_id);
-
- ::journal::Journaler journaler(io_ctx, journal_id, "", 5);
-
- C_SaferCond cond;
- journaler.init(&cond);
-
- int r = cond.wait();
+ // disable/re-enable journaling to delete/re-create the journal
+ // to properly handle mirroring constraints
+ std::string image_name;
+ int r = librbd::cls_client::dir_get_name(&io_ctx, RBD_DIRECTORY, journal_id,
+ &image_name);
if (r < 0) {
- std::cerr << "failed to initialize journal: " << cpp_strerror(r)
- << std::endl;
+ std::cerr << "failed to locate journal's image: " << cpp_strerror(r)
+ << std::endl;
return r;
}
- uint8_t order, splay_width;
- int64_t pool_id;
- journaler.get_metadata(&order, &splay_width, &pool_id);
-
- r = journaler.remove(true);
+ librbd::Image image;
+ r = utils::open_image(io_ctx, image_name, false, &image);
if (r < 0) {
- std::cerr << "failed to reset journal: " << cpp_strerror(r) << std::endl;
+ std::cerr << "failed to open image: " << cpp_strerror(r) << std::endl;
return r;
}
- r = journaler.create(order, splay_width, pool_id);
+
+ r = image.update_features(RBD_FEATURE_JOURNALING, false);
if (r < 0) {
- std::cerr << "failed to create journal: " << cpp_strerror(r) << std::endl;
+ std::cerr << "failed to disable image journaling: " << cpp_strerror(r)
+ << std::endl;
return r;
}
- // TODO register with librbd payload
- r = journaler.register_client(bufferlist());
+ r = image.update_features(RBD_FEATURE_JOURNALING, true);
if (r < 0) {
- std::cerr << "failed to register client: " << cpp_strerror(r) << std::endl;
+ std::cerr << "failed to re-enable image journaling: " << cpp_strerror(r)
+ << std::endl;
return r;
}
return 0;
diff --git a/src/tools/rbd_mirror/Replayer.cc b/src/tools/rbd_mirror/Replayer.cc
index ce76f99..d73bc10 100644
--- a/src/tools/rbd_mirror/Replayer.cc
+++ b/src/tools/rbd_mirror/Replayer.cc
@@ -5,6 +5,9 @@
#include "common/Formatter.h"
#include "common/admin_socket.h"
+#include "common/ceph_argparse.h"
+#include "common/code_environment.h"
+#include "common/common_init.h"
#include "common/debug.h"
#include "common/errno.h"
#include "include/stringify.h"
@@ -143,37 +146,60 @@ int Replayer::init()
{
dout(20) << "replaying for " << m_peer << dendl;
- int r = m_remote->init2(m_peer.client_name.c_str(),
- m_peer.cluster_name.c_str(), 0);
- if (r < 0) {
- derr << "error initializing remote cluster handle for " << m_peer
- << " : " << cpp_strerror(r) << dendl;
- return r;
+ // NOTE: manually bootstrap a CephContext here instead of via
+ // the librados API to avoid mixing global singletons between
+ // the librados shared library and the daemon
+ // TODO: eliminate intermingling of global singletons within Ceph APIs
+ CephInitParameters iparams(CEPH_ENTITY_TYPE_CLIENT);
+ if (m_peer.client_name.empty() ||
+ !iparams.name.from_str(m_peer.client_name)) {
+ derr << "error initializing remote cluster handle for " << m_peer << dendl;
+ return -EINVAL;
}
- r = m_remote->conf_read_file(nullptr);
+ CephContext *cct = common_preinit(iparams, CODE_ENVIRONMENT_LIBRARY,
+ CINIT_FLAG_UNPRIVILEGED_DAEMON_DEFAULTS);
+ cct->_conf->cluster = m_peer.cluster_name;
+
+ // librados::Rados::conf_read_file
+ int r = cct->_conf->parse_config_files(nullptr, nullptr, 0);
if (r < 0) {
- derr << "could not read ceph conf for " << m_peer
- << " : " << cpp_strerror(r) << dendl;
+ derr << "could not read ceph conf for " << m_peer << ": "
+ << cpp_strerror(r) << dendl;
+ cct->put();
return r;
}
+ cct->_conf->parse_env();
- r = m_remote->conf_parse_env(nullptr);
+ // librados::Rados::conf_parse_env
+ std::vector<const char*> args;
+ env_to_vec(args, nullptr);
+ r = cct->_conf->parse_argv(args);
if (r < 0) {
- derr << "could not parse environment for " << m_peer
- << " : " << cpp_strerror(r) << dendl;
+ derr << "could not parse environment for " << m_peer << ":"
+ << cpp_strerror(r) << dendl;
+ cct->put();
return r;
}
if (!m_args.empty()) {
- r = m_remote->conf_parse_argv(m_args.size(), &m_args[0]);
+ // librados::Rados::conf_parse_argv
+ r = cct->_conf->parse_argv(m_args);
if (r < 0) {
- derr << "could not parse command line args for " << m_peer
- << " : " << cpp_strerror(r) << dendl;
+ derr << "could not parse command line args for " << m_peer << ": "
+ << cpp_strerror(r) << dendl;
+ cct->put();
return r;
}
}
+ cct->_conf->apply_changes(nullptr);
+ cct->_conf->complain_about_parse_errors(cct);
+
+ r = m_remote->init_with_context(cct);
+ assert(r == 0);
+ cct->put();
+
r = m_remote->connect();
if (r < 0) {
derr << "error connecting to remote cluster " << m_peer
diff --git a/src/vstart.sh b/src/vstart.sh
index c81a793..9d225ef 100755
--- a/src/vstart.sh
+++ b/src/vstart.sh
@@ -625,7 +625,7 @@ fi
if [ "$start_mds" -eq 1 -a "$CEPH_NUM_MDS" -gt 0 ]; then
if [ "$CEPH_NUM_FS" -gt "1" ] ; then
- $CEPH_ADM fs flag set enable_multiple true
+ $CEPH_ADM fs flag set enable_multiple true --yes-i-really-mean-it
fi
fs=0
diff --git a/systemd/ceph-mds at .service b/systemd/ceph-mds at .service
index e122580..f13cef4 100644
--- a/systemd/ceph-mds at .service
+++ b/systemd/ceph-mds at .service
@@ -15,6 +15,7 @@ PrivateDevices=yes
ProtectHome=true
ProtectSystem=full
PrivateTmp=true
+TasksMax=infinity
[Install]
WantedBy=ceph-mds.target
diff --git a/systemd/ceph-mon at .service b/systemd/ceph-mon at .service
index a8d427b..b9501d6 100644
--- a/systemd/ceph-mon at .service
+++ b/systemd/ceph-mon at .service
@@ -21,6 +21,7 @@ PrivateDevices=yes
ProtectHome=true
ProtectSystem=full
PrivateTmp=true
+TasksMax=infinity
[Install]
WantedBy=ceph-mon.target
diff --git a/systemd/ceph-osd at .service b/systemd/ceph-osd at .service
index 0d73afb..1778db7 100644
--- a/systemd/ceph-osd at .service
+++ b/systemd/ceph-osd at .service
@@ -15,6 +15,7 @@ ExecReload=/bin/kill -HUP $MAINPID
ProtectHome=true
ProtectSystem=full
PrivateTmp=true
+TasksMax=infinity
[Install]
WantedBy=ceph-osd.target
diff --git a/systemd/ceph-radosgw at .service b/systemd/ceph-radosgw at .service
index 66d9eb8..cfa5788 100644
--- a/systemd/ceph-radosgw at .service
+++ b/systemd/ceph-radosgw at .service
@@ -14,6 +14,7 @@ PrivateDevices=yes
ProtectHome=true
ProtectSystem=full
PrivateTmp=true
+TasksMax=infinity
[Install]
WantedBy=ceph-radosgw.target
diff --git a/systemd/ceph-rbd-mirror at .service b/systemd/ceph-rbd-mirror at .service
index 4c2e2f0..d38aec5 100644
--- a/systemd/ceph-rbd-mirror at .service
+++ b/systemd/ceph-rbd-mirror at .service
@@ -17,6 +17,7 @@ PrivateTmp=true
Restart=on-failure
StartLimitInterval=30min
StartLimitBurst=3
+TasksMax=infinity
[Install]
WantedBy=ceph-rbd-mirror.target
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/pkg-ceph/ceph.git
More information about the Pkg-ceph-commits
mailing list