[Pkg-ceph-commits] [ceph] 01/02: Imported Upstream version 0.80.9
Gaudenz Steinlin
gaudenz at moszumanska.debian.org
Fri May 1 22:18:44 UTC 2015
This is an automated email from the git hooks/post-receive script.
gaudenz pushed a commit to branch master
in repository ceph.
commit 4b4ee2c92a3d179818d3af4d41b3d266da95acd8
Author: Gaudenz Steinlin <gaudenz at debian.org>
Date: Sat May 2 00:18:08 2015 +0200
Imported Upstream version 0.80.9
---
ceph.spec | 65 +-
ceph.spec.in | 63 +-
configure | 22 +-
configure.ac | 2 +-
man/ceph.8 | 2307 +++++++++++++++++++-
src/.git_version | 4 +-
src/Makefile-env.am | 1 +
src/Makefile.am | 1 +
src/Makefile.in | 358 ++-
src/ceph-disk | 114 +-
src/ceph.in | 6 +-
src/ceph_mon.cc | 20 +
src/civetweb/civetweb.h | 24 +-
src/civetweb/include/civetweb.h | 24 +-
src/civetweb/include/civetweb_conf.h | 6 +
src/civetweb/src/civetweb.c | 771 +++++--
src/client/Client.cc | 374 +++-
src/client/Client.h | 23 +-
src/client/Fh.h | 8 +-
src/client/Inode.h | 11 +-
src/client/MetaRequest.h | 14 +-
src/client/fuse_ll.cc | 77 +-
src/common/Makefile.am | 3 +-
src/common/Thread.cc | 3 +-
src/common/Thread.h | 1 +
src/common/TrackedOp.cc | 2 +-
src/common/WorkQueue.cc | 4 +
src/common/ceph_crypto.h | 8 +-
src/common/config.cc | 4 +-
src/common/config_opts.h | 7 +-
src/common/crc32c_intel_fast_asm.S | 2 +
src/common/crc32c_intel_fast_zero_asm.S | 2 +
src/common/hobject.cc | 4 +-
src/common/io_priority.cc | 4 +-
src/common/util.cc | 22 +
src/crush/CrushCompiler.cc | 4 +
src/crush/CrushTester.cc | 16 +-
src/crush/CrushTester.h | 9 +
src/crush/CrushWrapper.cc | 108 +-
src/crush/CrushWrapper.h | 75 +-
src/crush/builder.c | 243 ++-
src/crush/builder.h | 11 +-
src/crush/crush.h | 8 +
src/crush/mapper.c | 16 +-
src/include/ceph_features.h | 2 +
src/include/ceph_fs.h | 6 +-
src/include/util.h | 24 +
src/init-radosgw.sysv | 6 +-
src/librbd/AioCompletion.h | 12 +
src/librbd/ImageCtx.cc | 14 +-
src/librbd/ImageCtx.h | 10 +-
src/librbd/internal.cc | 81 +-
src/mds/CInode.h | 1 +
src/mds/Dumper.cc | 23 +-
src/mds/Locker.cc | 20 +-
src/mds/MDCache.cc | 8 +-
src/mds/Makefile.am | 1 -
src/mds/Server.cc | 21 +-
src/mds/flock.cc | 69 +-
src/mds/flock.h | 2 +
src/mds/mdstypes.h | 4 +-
src/messages/MClientReconnect.h | 6 +-
src/mon/DataHealthService.cc | 39 +-
src/mon/MonCommands.h | 12 +
src/mon/Monitor.cc | 27 +-
src/mon/MonitorDBStore.h | 27 +-
src/mon/OSDMonitor.cc | 169 +-
src/mon/PGMap.cc | 37 +-
src/mon/PGMonitor.cc | 1 +
src/mon/Paxos.cc | 169 +-
src/mon/Paxos.h | 44 +
src/mon/mon_types.h | 45 +-
src/os/FileJournal.cc | 89 +-
src/osd/ECBackend.cc | 50 +-
src/osd/OSD.cc | 108 +-
src/osd/OSD.h | 6 +-
src/osd/OSDMap.h | 12 +
src/osd/PG.cc | 106 +-
src/osd/PG.h | 19 +-
src/osd/ReplicatedPG.cc | 112 +-
src/osd/ReplicatedPG.h | 6 +-
src/osd/Watch.h | 7 +-
src/osd/osd_types.cc | 14 +-
src/osdc/ObjectCacher.cc | 47 +-
src/osdc/ObjectCacher.h | 3 +
src/osdc/Objecter.cc | 9 +-
src/pybind/rados.py | 8 +-
src/rgw/Makefile.am | 20 +-
src/rgw/rgw_admin.cc | 14 +-
src/rgw/rgw_civetweb.cc | 6 +-
src/rgw/rgw_civetweb_log.cc | 14 +
src/rgw/rgw_civetweb_log.h | 6 +
src/rgw/rgw_common.cc | 5 +-
src/rgw/rgw_common.h | 2 +-
src/rgw/rgw_fcgi.cc | 4 +-
src/rgw/rgw_http_client.cc | 52 +-
src/rgw/rgw_json_enc.cc | 2 +-
src/rgw/rgw_main.cc | 35 +-
src/rgw/rgw_op.cc | 88 +-
src/rgw/rgw_op.h | 4 +-
src/rgw/rgw_rados.cc | 161 +-
src/rgw/rgw_rados.h | 19 +-
src/rgw/rgw_rest.cc | 38 +-
src/rgw/rgw_rest.h | 1 -
src/rgw/rgw_rest_s3.cc | 47 +-
src/rgw/rgw_rest_swift.cc | 30 +-
src/rgw/rgw_swift.cc | 32 +-
src/rgw/rgw_swift.h | 1 +
src/rgw/rgw_swift_auth.cc | 9 +-
src/rgw/rgw_swift_auth.h | 2 +-
src/rgw/rgw_user.cc | 11 +-
src/test/Makefile.am | 10 +-
src/test/cli/crushtool/add-item-in-tree.t | 10 +
src/test/cli/crushtool/adjust-item-weight.t | 17 +
src/test/cli/crushtool/build.t | 2 +-
src/test/cli/crushtool/help.t | 1 +
src/test/cli/crushtool/set-choose.t | 9 +-
src/test/cli/crushtool/simple.template.adj.one | 56 +
src/test/cli/crushtool/simple.template.adj.three | 64 +
src/test/cli/crushtool/simple.template.adj.two | 64 +
src/test/cli/crushtool/test-map-bobtail-tunables.t | 2 +-
src/test/cli/crushtool/test-map-firefly-tunables.t | 2 +-
src/test/cli/crushtool/test-map-firstn-indep.t | 14 +
src/test/cli/crushtool/test-map-firstn-indep.txt | 443 ++++
src/test/cli/crushtool/test-map-indep.t | 2 +-
src/test/cli/crushtool/test-map-legacy-tunables.t | 2 +-
src/test/cli/crushtool/test-map-tries-vs-retries.t | 2 +-
src/test/cli/crushtool/test-map-vary-r-0.t | 2 +-
src/test/cli/crushtool/test-map-vary-r-1.t | 2 +-
src/test/cli/crushtool/test-map-vary-r-2.t | 2 +-
src/test/cli/crushtool/test-map-vary-r-3.t | 2 +-
src/test/cli/crushtool/test-map-vary-r-4.t | 2 +-
src/test/cli/crushtool/tree.template | Bin 0 -> 376 bytes
src/test/cli/crushtool/tree.template.final | 70 +
src/test/cli/osdmaptool/create-print.t | 1 +
src/test/cli/osdmaptool/create-racks.t | 1 +
src/test/cli/osdmaptool/crush.t | 2 +-
src/test/cli/osdmaptool/help.t | 1 +
src/test/cli/osdmaptool/missing-argument.t | 1 +
src/test/cli/osdmaptool/test-map-pgs.t | 4 +-
src/test/common/histogram.cc | 113 +-
src/test/common/test_io_priority.cc | 51 +
src/test/crush/TestCrushWrapper.cc | 270 +++
src/test/crush/indep.cc | 33 +-
src/test/erasure-code/Makefile.am | 8 +
src/test/erasure-code/TestErasureCodeJerasure.cc | 30 -
.../ceph_erasure_code_non_regression.cc | 325 +++
src/test/libcephfs/test.cc | 1 +
src/test/librados/misc.cc | 6 +-
src/test/librados/snapshots.cc | 18 +
src/test/librados/tier.cc | 76 +
src/test/librbd/test_librbd.cc | 106 +
src/test/mon/mon-test-helpers.sh | 3 +-
src/tools/crushtool.cc | 16 +-
src/tools/osdmaptool.cc | 11 +-
155 files changed, 7297 insertions(+), 1490 deletions(-)
diff --git a/ceph.spec b/ceph.spec
index 1e9a2a6..4984c05 100644
--- a/ceph.spec
+++ b/ceph.spec
@@ -9,13 +9,16 @@
# common
#################################################################################
Name: ceph
-Version: 0.80.7
+Version: 0.80.9
Release: 0%{?dist}
Summary: User space components of the Ceph file system
License: GPL-2.0
Group: System Environment/Base
URL: http://ceph.com/
Source0: http://ceph.com/download/%{name}-%{version}.tar.bz2
+%if 0%{?fedora} || 0%{?centos} || 0%{?rhel}
+Patch0: init-ceph.in-fedora.patch
+%endif
Requires: librbd1 = %{version}-%{release}
Requires: librados2 = %{version}-%{release}
Requires: libcephfs1 = %{version}-%{release}
@@ -24,13 +27,13 @@ Requires: python
Requires: python-argparse
Requires: python-ceph
Requires: python-requests
+Requires: python-flask
Requires: xfsprogs
Requires: cryptsetup
Requires: parted
Requires: util-linux
Requires: hdparm
Requires(post): binutils
-BuildRoot: %{_tmppath}/%{name}-%{version}-build
BuildRequires: make
BuildRequires: gcc-c++
BuildRequires: libtool
@@ -50,14 +53,14 @@ BuildRequires: libblkid-devel >= 2.17
BuildRequires: leveldb-devel > 1.2
BuildRequires: xfsprogs-devel
BuildRequires: yasm
-%if 0%{?rhel_version} || 0%{?centos_version} || 0%{?fedora}
+%if 0%{?rhel} || 0%{?centos} || 0%{?fedora}
BuildRequires: snappy-devel
%endif
#################################################################################
# specific
#################################################################################
-%if ! 0%{?rhel}
+%if ! 0%{?rhel} || 0%{?fedora}
BuildRequires: sharutils
%endif
@@ -173,8 +176,8 @@ managers such as Pacemaker.
Summary: RADOS distributed object store client library
Group: System Environment/Libraries
License: LGPL-2.0
-%if 0%{?rhel_version} || 0%{?centos_version} || 0%{?fedora}
-Obsoletes: ceph-libs
+%if 0%{?rhel} || 0%{?centos} || 0%{?fedora}
+Obsoletes: ceph-libs < %{version}-%{release}
%endif
%description -n librados2
RADOS is a reliable, autonomic distributed object storage cluster
@@ -187,8 +190,8 @@ Summary: RADOS block device client library
Group: System Environment/Libraries
License: LGPL-2.0
Requires: librados2 = %{version}-%{release}
-%if 0%{?rhel_version} || 0%{?centos_version} || 0%{?fedora}
-Obsoletes: ceph-libs
+%if 0%{?rhel} || 0%{?centos} || 0%{?fedora}
+Obsoletes: ceph-libs < %{version}-%{release}
%endif
%description -n librbd1
RBD is a block device striped across multiple distributed objects in
@@ -200,8 +203,9 @@ shared library allowing applications to manage these block devices.
Summary: Ceph distributed file system client library
Group: System Environment/Libraries
License: LGPL-2.0
-%if 0%{?rhel_version} || 0%{?centos_version} || 0%{?fedora}
-Obsoletes: ceph-libs
+%if 0%{?rhel} || 0%{?centos} || 0%{?fedora}
+Obsoletes: ceph-libs < %{version}-%{release}
+Obsoletes: ceph-libcephfs
%endif
%description -n libcephfs1
Ceph is a distributed network file system designed to provide excellent
@@ -215,7 +219,6 @@ Group: System Environment/Libraries
License: LGPL-2.0
Requires: librados2 = %{version}-%{release}
Requires: librbd1 = %{version}-%{release}
-Requires: python-flask
%if 0%{defined suse_version}
%py_requires
%endif
@@ -264,6 +267,23 @@ BuildRequires: junit4
%description -n cephfs-java
This package contains the Java libraries for the Ceph File System.
+%package libs-compat
+Summary: Meta package to include ceph libraries.
+Group: System Environment/Libraries
+License: LGPL-2.0
+Obsoletes: ceph-libs
+Requires: librados2 = %{version}-%{release}
+Requires: librbd1 = %{version}-%{release}
+Requires: libcephfs1 = %{version}-%{release}
+Provides: ceph-libs
+
+%description libs-compat
+This is a meta package, that pulls in librados2, librbd1 and libcephfs1. It
+is included for backwards compatibility with distributions that depend on the
+former ceph-libs package, which is now split up into these three subpackages.
+Packages still depending on ceph-libs should be fixed to depend on librados2,
+librbd1 or libcephfs1 instead.
+
%if 0%{?opensuse} || 0%{?suse_version}
%debug_package
%endif
@@ -273,6 +293,9 @@ This package contains the Java libraries for the Ceph File System.
#################################################################################
%prep
%setup -q
+%if 0%{?fedora} || 0%{?rhel} || 0%{?centos}
+%patch0 -p1 -b .init
+%endif
%build
# Find jni.h
@@ -329,7 +352,7 @@ chmod 0644 $RPM_BUILD_ROOT%{_docdir}/ceph/sample.ceph.conf
chmod 0644 $RPM_BUILD_ROOT%{_docdir}/ceph/sample.fetch_config
# udev rules
-%if 0%{?rhel} >= 7
+%if 0%{?rhel} >= 7 || 0%{?fedora}
install -m 0644 -D udev/50-rbd.rules $RPM_BUILD_ROOT/usr/lib/udev/rules.d/50-rbd.rules
install -m 0644 -D udev/60-ceph-partuuid-workaround.rules $RPM_BUILD_ROOT/usr/lib/udev/rules.d/60-ceph-partuuid-workaround.rules
%else
@@ -337,13 +360,13 @@ install -m 0644 -D udev/50-rbd.rules $RPM_BUILD_ROOT/lib/udev/rules.d/50-rbd.rul
install -m 0644 -D udev/60-ceph-partuuid-workaround.rules $RPM_BUILD_ROOT/lib/udev/rules.d/60-ceph-partuuid-workaround.rules
%endif
-%if (0%{?rhel} || 0%{?rhel} < 7)
+%if (0%{?rhel} && 0%{?rhel} < 7)
install -m 0644 -D udev/95-ceph-osd-alt.rules $RPM_BUILD_ROOT/lib/udev/rules.d/95-ceph-osd.rules
%else
install -m 0644 -D udev/95-ceph-osd.rules $RPM_BUILD_ROOT/lib/udev/rules.d/95-ceph-osd.rules
%endif
-%if 0%{?rhel} >= 7
+%if 0%{?rhel} >= 7 || 0%{?fedora}
mv $RPM_BUILD_ROOT/lib/udev/rules.d/95-ceph-osd.rules $RPM_BUILD_ROOT/usr/lib/udev/rules.d/95-ceph-osd.rules
mv $RPM_BUILD_ROOT/sbin/mkcephfs $RPM_BUILD_ROOT/usr/sbin/mkcephfs
mv $RPM_BUILD_ROOT/sbin/mount.ceph $RPM_BUILD_ROOT/usr/sbin/mount.ceph
@@ -404,6 +427,7 @@ fi
%{_bindir}/cephfs
%{_bindir}/ceph-clsinfo
%{_bindir}/ceph-rest-api
+%{python_sitelib}/ceph_rest_api.py*
%{_bindir}/crushtool
%{_bindir}/monmaptool
%{_bindir}/osdmaptool
@@ -424,7 +448,7 @@ fi
%{_sbindir}/ceph-disk-udev
%{_sbindir}/ceph-create-keys
%{_sbindir}/rcceph
-%if 0%{?rhel} >= 7
+%if 0%{?rhel} >= 7 || 0%{?fedora}
%{_sbindir}/mount.ceph
%else
/sbin/mount.ceph
@@ -451,7 +475,7 @@ fi
%{_libdir}/ceph/erasure-code/libec_jerasure*.so*
%{_libdir}/ceph/erasure-code/libec_test_jerasure*.so*
%{_libdir}/ceph/erasure-code/libec_missing_entry_point.so*
-%if 0%{?rhel} >= 7
+%if 0%{?rhel} >= 7 || 0%{?fedora}
/usr/lib/udev/rules.d/60-ceph-partuuid-workaround.rules
/usr/lib/udev/rules.d/95-ceph-osd.rules
%else
@@ -529,7 +553,7 @@ fi
%defattr(-,root,root,-)
%{_bindir}/ceph-fuse
%{_mandir}/man8/ceph-fuse.8*
-%if 0%{?rhel} >= 7
+%if 0%{?rhel} >= 7 || 0%{?fedora}
%{_sbindir}/mount.fuse.ceph
%else
/sbin/mount.fuse.ceph
@@ -624,7 +648,7 @@ fi
%files -n librbd1
%defattr(-,root,root,-)
%{_libdir}/librbd.so.*
-%if 0%{?rhel} >= 7
+%if 0%{?rhel} >= 7 || 0%{?fedora}
/usr/lib/udev/rules.d/50-rbd.rules
%else
/lib/udev/rules.d/50-rbd.rules
@@ -656,7 +680,6 @@ ln -sf %{_libdir}/librbd.so.1 /usr/lib64/qemu/librbd.so.1
%{python_sitelib}/rbd.py*
%{python_sitelib}/cephfs.py*
%{python_sitelib}/ceph_argparse.py*
-%{python_sitelib}/ceph_rest_api.py*
#################################################################################
%files -n rest-bench
@@ -702,4 +725,8 @@ ln -sf %{_libdir}/librbd.so.1 /usr/lib64/qemu/librbd.so.1
%{_javadir}/libcephfs.jar
%{_javadir}/libcephfs-test.jar
+%files libs-compat
+# We need an empty %files list for ceph-libs-compat, to tell rpmbuild to actually
+# build this meta package.
+
%changelog
diff --git a/ceph.spec.in b/ceph.spec.in
index 5454454..02b300b 100644
--- a/ceph.spec.in
+++ b/ceph.spec.in
@@ -16,6 +16,9 @@ License: GPL-2.0
Group: System Environment/Base
URL: http://ceph.com/
Source0: http://ceph.com/download/%{name}-%{version}.tar.bz2
+%if 0%{?fedora} || 0%{?centos} || 0%{?rhel}
+Patch0: init-ceph.in-fedora.patch
+%endif
Requires: librbd1 = %{version}-%{release}
Requires: librados2 = %{version}-%{release}
Requires: libcephfs1 = %{version}-%{release}
@@ -24,13 +27,13 @@ Requires: python
Requires: python-argparse
Requires: python-ceph
Requires: python-requests
+Requires: python-flask
Requires: xfsprogs
Requires: cryptsetup
Requires: parted
Requires: util-linux
Requires: hdparm
Requires(post): binutils
-BuildRoot: %{_tmppath}/%{name}-%{version}-build
BuildRequires: make
BuildRequires: gcc-c++
BuildRequires: libtool
@@ -50,14 +53,14 @@ BuildRequires: libblkid-devel >= 2.17
BuildRequires: leveldb-devel > 1.2
BuildRequires: xfsprogs-devel
BuildRequires: yasm
-%if 0%{?rhel_version} || 0%{?centos_version} || 0%{?fedora}
+%if 0%{?rhel} || 0%{?centos} || 0%{?fedora}
BuildRequires: snappy-devel
%endif
#################################################################################
# specific
#################################################################################
-%if ! 0%{?rhel}
+%if ! 0%{?rhel} || 0%{?fedora}
BuildRequires: sharutils
%endif
@@ -173,8 +176,8 @@ managers such as Pacemaker.
Summary: RADOS distributed object store client library
Group: System Environment/Libraries
License: LGPL-2.0
-%if 0%{?rhel_version} || 0%{?centos_version} || 0%{?fedora}
-Obsoletes: ceph-libs
+%if 0%{?rhel} || 0%{?centos} || 0%{?fedora}
+Obsoletes: ceph-libs < %{version}-%{release}
%endif
%description -n librados2
RADOS is a reliable, autonomic distributed object storage cluster
@@ -187,8 +190,8 @@ Summary: RADOS block device client library
Group: System Environment/Libraries
License: LGPL-2.0
Requires: librados2 = %{version}-%{release}
-%if 0%{?rhel_version} || 0%{?centos_version} || 0%{?fedora}
-Obsoletes: ceph-libs
+%if 0%{?rhel} || 0%{?centos} || 0%{?fedora}
+Obsoletes: ceph-libs < %{version}-%{release}
%endif
%description -n librbd1
RBD is a block device striped across multiple distributed objects in
@@ -200,8 +203,9 @@ shared library allowing applications to manage these block devices.
Summary: Ceph distributed file system client library
Group: System Environment/Libraries
License: LGPL-2.0
-%if 0%{?rhel_version} || 0%{?centos_version} || 0%{?fedora}
-Obsoletes: ceph-libs
+%if 0%{?rhel} || 0%{?centos} || 0%{?fedora}
+Obsoletes: ceph-libs < %{version}-%{release}
+Obsoletes: ceph-libcephfs
%endif
%description -n libcephfs1
Ceph is a distributed network file system designed to provide excellent
@@ -215,7 +219,6 @@ Group: System Environment/Libraries
License: LGPL-2.0
Requires: librados2 = %{version}-%{release}
Requires: librbd1 = %{version}-%{release}
-Requires: python-flask
%if 0%{defined suse_version}
%py_requires
%endif
@@ -264,6 +267,23 @@ BuildRequires: junit4
%description -n cephfs-java
This package contains the Java libraries for the Ceph File System.
+%package libs-compat
+Summary: Meta package to include ceph libraries.
+Group: System Environment/Libraries
+License: LGPL-2.0
+Obsoletes: ceph-libs
+Requires: librados2 = %{version}-%{release}
+Requires: librbd1 = %{version}-%{release}
+Requires: libcephfs1 = %{version}-%{release}
+Provides: ceph-libs
+
+%description libs-compat
+This is a meta package, that pulls in librados2, librbd1 and libcephfs1. It
+is included for backwards compatibility with distributions that depend on the
+former ceph-libs package, which is now split up into these three subpackages.
+Packages still depending on ceph-libs should be fixed to depend on librados2,
+librbd1 or libcephfs1 instead.
+
%if 0%{?opensuse} || 0%{?suse_version}
%debug_package
%endif
@@ -273,6 +293,9 @@ This package contains the Java libraries for the Ceph File System.
#################################################################################
%prep
%setup -q
+%if 0%{?fedora} || 0%{?rhel} || 0%{?centos}
+%patch0 -p1 -b .init
+%endif
%build
# Find jni.h
@@ -329,7 +352,7 @@ chmod 0644 $RPM_BUILD_ROOT%{_docdir}/ceph/sample.ceph.conf
chmod 0644 $RPM_BUILD_ROOT%{_docdir}/ceph/sample.fetch_config
# udev rules
-%if 0%{?rhel} >= 7
+%if 0%{?rhel} >= 7 || 0%{?fedora}
install -m 0644 -D udev/50-rbd.rules $RPM_BUILD_ROOT/usr/lib/udev/rules.d/50-rbd.rules
install -m 0644 -D udev/60-ceph-partuuid-workaround.rules $RPM_BUILD_ROOT/usr/lib/udev/rules.d/60-ceph-partuuid-workaround.rules
%else
@@ -337,13 +360,13 @@ install -m 0644 -D udev/50-rbd.rules $RPM_BUILD_ROOT/lib/udev/rules.d/50-rbd.rul
install -m 0644 -D udev/60-ceph-partuuid-workaround.rules $RPM_BUILD_ROOT/lib/udev/rules.d/60-ceph-partuuid-workaround.rules
%endif
-%if (0%{?rhel} || 0%{?rhel} < 7)
+%if (0%{?rhel} && 0%{?rhel} < 7)
install -m 0644 -D udev/95-ceph-osd-alt.rules $RPM_BUILD_ROOT/lib/udev/rules.d/95-ceph-osd.rules
%else
install -m 0644 -D udev/95-ceph-osd.rules $RPM_BUILD_ROOT/lib/udev/rules.d/95-ceph-osd.rules
%endif
-%if 0%{?rhel} >= 7
+%if 0%{?rhel} >= 7 || 0%{?fedora}
mv $RPM_BUILD_ROOT/lib/udev/rules.d/95-ceph-osd.rules $RPM_BUILD_ROOT/usr/lib/udev/rules.d/95-ceph-osd.rules
mv $RPM_BUILD_ROOT/sbin/mkcephfs $RPM_BUILD_ROOT/usr/sbin/mkcephfs
mv $RPM_BUILD_ROOT/sbin/mount.ceph $RPM_BUILD_ROOT/usr/sbin/mount.ceph
@@ -404,6 +427,7 @@ fi
%{_bindir}/cephfs
%{_bindir}/ceph-clsinfo
%{_bindir}/ceph-rest-api
+%{python_sitelib}/ceph_rest_api.py*
%{_bindir}/crushtool
%{_bindir}/monmaptool
%{_bindir}/osdmaptool
@@ -424,7 +448,7 @@ fi
%{_sbindir}/ceph-disk-udev
%{_sbindir}/ceph-create-keys
%{_sbindir}/rcceph
-%if 0%{?rhel} >= 7
+%if 0%{?rhel} >= 7 || 0%{?fedora}
%{_sbindir}/mount.ceph
%else
/sbin/mount.ceph
@@ -451,7 +475,7 @@ fi
%{_libdir}/ceph/erasure-code/libec_jerasure*.so*
%{_libdir}/ceph/erasure-code/libec_test_jerasure*.so*
%{_libdir}/ceph/erasure-code/libec_missing_entry_point.so*
-%if 0%{?rhel} >= 7
+%if 0%{?rhel} >= 7 || 0%{?fedora}
/usr/lib/udev/rules.d/60-ceph-partuuid-workaround.rules
/usr/lib/udev/rules.d/95-ceph-osd.rules
%else
@@ -529,7 +553,7 @@ fi
%defattr(-,root,root,-)
%{_bindir}/ceph-fuse
%{_mandir}/man8/ceph-fuse.8*
-%if 0%{?rhel} >= 7
+%if 0%{?rhel} >= 7 || 0%{?fedora}
%{_sbindir}/mount.fuse.ceph
%else
/sbin/mount.fuse.ceph
@@ -624,7 +648,7 @@ fi
%files -n librbd1
%defattr(-,root,root,-)
%{_libdir}/librbd.so.*
-%if 0%{?rhel} >= 7
+%if 0%{?rhel} >= 7 || 0%{?fedora}
/usr/lib/udev/rules.d/50-rbd.rules
%else
/lib/udev/rules.d/50-rbd.rules
@@ -656,7 +680,6 @@ ln -sf %{_libdir}/librbd.so.1 /usr/lib64/qemu/librbd.so.1
%{python_sitelib}/rbd.py*
%{python_sitelib}/cephfs.py*
%{python_sitelib}/ceph_argparse.py*
-%{python_sitelib}/ceph_rest_api.py*
#################################################################################
%files -n rest-bench
@@ -702,4 +725,8 @@ ln -sf %{_libdir}/librbd.so.1 /usr/lib64/qemu/librbd.so.1
%{_javadir}/libcephfs.jar
%{_javadir}/libcephfs-test.jar
+%files libs-compat
+# We need an empty %files list for ceph-libs-compat, to tell rpmbuild to actually
+# build this meta package.
+
%changelog
diff --git a/configure b/configure
index d8d4d5c..1953007 100755
--- a/configure
+++ b/configure
@@ -1,6 +1,6 @@
#! /bin/sh
# Guess values for system-dependent variables and create Makefiles.
-# Generated by GNU Autoconf 2.68 for ceph 0.80.7.
+# Generated by GNU Autoconf 2.68 for ceph 0.80.9.
#
# Report bugs to <ceph-devel at vger.kernel.org>.
#
@@ -570,8 +570,8 @@ MAKEFLAGS=
# Identity of this package.
PACKAGE_NAME='ceph'
PACKAGE_TARNAME='ceph'
-PACKAGE_VERSION='0.80.7'
-PACKAGE_STRING='ceph 0.80.7'
+PACKAGE_VERSION='0.80.9'
+PACKAGE_STRING='ceph 0.80.9'
PACKAGE_BUGREPORT='ceph-devel at vger.kernel.org'
PACKAGE_URL=''
@@ -1441,7 +1441,7 @@ if test "$ac_init_help" = "long"; then
# Omit some internal or obsolete options to make the list less imposing.
# This message is too long to be a string in the A/UX 3.1 sh.
cat <<_ACEOF
-\`configure' configures ceph 0.80.7 to adapt to many kinds of systems.
+\`configure' configures ceph 0.80.9 to adapt to many kinds of systems.
Usage: $0 [OPTION]... [VAR=VALUE]...
@@ -1512,7 +1512,7 @@ fi
if test -n "$ac_init_help"; then
case $ac_init_help in
- short | recursive ) echo "Configuration of ceph 0.80.7:";;
+ short | recursive ) echo "Configuration of ceph 0.80.9:";;
esac
cat <<\_ACEOF
@@ -1657,7 +1657,7 @@ fi
test -n "$ac_init_help" && exit $ac_status
if $ac_init_version; then
cat <<\_ACEOF
-ceph configure 0.80.7
+ceph configure 0.80.9
generated by GNU Autoconf 2.68
Copyright (C) 2010 Free Software Foundation, Inc.
@@ -2682,7 +2682,7 @@ cat >config.log <<_ACEOF
This file contains any messages produced by compilers while
running configure, to aid debugging if configure makes a mistake.
-It was created by ceph $as_me 0.80.7, which was
+It was created by ceph $as_me 0.80.9, which was
generated by GNU Autoconf 2.68. Invocation command line was
$ $0 $@
@@ -4682,7 +4682,7 @@ fi
# Define the identity of the package.
PACKAGE='ceph'
- VERSION='0.80.7'
+ VERSION='0.80.9'
cat >>confdefs.h <<_ACEOF
@@ -12660,7 +12660,7 @@ fi
# Define the identity of the package.
PACKAGE='ceph'
- VERSION='0.80.7'
+ VERSION='0.80.9'
cat >>confdefs.h <<_ACEOF
@@ -22464,7 +22464,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
# report actual input values of CONFIG_FILES etc. instead of their
# values after options handling.
ac_log="
-This file was extended by ceph $as_me 0.80.7, which was
+This file was extended by ceph $as_me 0.80.9, which was
generated by GNU Autoconf 2.68. Invocation command line was
CONFIG_FILES = $CONFIG_FILES
@@ -22530,7 +22530,7 @@ _ACEOF
cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`"
ac_cs_version="\\
-ceph config.status 0.80.7
+ceph config.status 0.80.9
configured by $0, generated by GNU Autoconf 2.68,
with options \\"\$ac_cs_config\\"
diff --git a/configure.ac b/configure.ac
index 7255c7c..4621738 100644
--- a/configure.ac
+++ b/configure.ac
@@ -8,7 +8,7 @@ AC_PREREQ(2.59)
# VERSION define is not used by the code. It gets a version string
# from 'git describe'; see src/ceph_ver.[ch]
-AC_INIT([ceph], [0.80.7], [ceph-devel at vger.kernel.org])
+AC_INIT([ceph], [0.80.9], [ceph-devel at vger.kernel.org])
# Create release string. Used with VERSION for RPMs.
RPM_RELEASE=0
diff --git a/man/ceph.8 b/man/ceph.8
index 9bb903c..5f7b8dc 100644
--- a/man/ceph.8
+++ b/man/ceph.8
@@ -1,8 +1,8 @@
.\" Man page generated from reStructuredText.
.
-.TH "CEPH" "8" "January 12, 2014" "dev" "Ceph"
+.TH "CEPH" "8" "December 18, 2014" "dev" "Ceph"
.SH NAME
-ceph \- ceph file system control utility
+ceph \- ceph administration tool
.
.nr rst2man-indent-level 0
.
@@ -59,103 +59,2320 @@ level margin: \\n[rst2man-indent\\n[rst2man-indent-level]]
..
.SH SYNOPSIS
.nf
-\fBceph\fP [ \-m \fImonaddr\fP ] [ \-w | \fIcommand\fP ... ]
+\fBceph\fP \fBauth\fP \fIadd\fP \fI<entity>\fP {\fI<caps>\fP [\fI<caps>\fP\&...]}
+.fi
+.sp
+.nf
+\fBceph\fP \fBauth\fP \fIexport\fP \fI<entity>\fP
+.fi
+.sp
+.nf
+\fBceph\fP \fBconfig\-key\fP \fIget\fP \fI<key>\fP
+.fi
+.sp
+.nf
+\fBceph\fP \fBmds\fP \fIadd_data_pool\fP \fI<pool>\fP
+.fi
+.sp
+.nf
+\fBceph\fP \fBmds\fP \fIgetmap\fP {\fI<int[0\-]>\fP}
+.fi
+.sp
+.nf
+\fBceph\fP \fBmon\fP \fIadd\fP \fI<name>\fP <\fIIPaddr[:port]\fP>
+.fi
+.sp
+.nf
+\fBceph\fP \fBmon_status\fP
+.fi
+.sp
+.nf
+\fBceph\fP \fBosd\fP \fIcreate\fP {\fI<uuid>\fP}
+.fi
+.sp
+.nf
+\fBceph\fP \fBosd\fP \fBcrush\fP \fIadd\fP \fI<osdname (id|osd.id)>\fP
+.fi
+.sp
+.sp
+\fI<float[0.0\-]>\fP \fI<args>\fP [\fI<args>\fP\&...]
+.nf
+\fBceph\fP \fBpg\fP \fIforce_create_pg\fP \fI<pgid>\fP
+.fi
+.sp
+.nf
+\fBceph\fP \fBpg\fP \fIstat\fP
+.fi
+.sp
+.nf
+\fBceph\fP \fBquorum_status\fP
.fi
.sp
.SH DESCRIPTION
.sp
-\fBceph\fP is a control utility for communicating with the monitor
-cluster of a running Ceph distributed storage system.
+\fBceph\fP is a control utility which is used for manual deployment and maintenance
+of a Ceph cluster. It provides a diverse set of commands that allows deployment of
+monitors, OSDs, placement groups, MDS and overall maintenance, administration
+of the cluster.
+.SH COMMANDS
+.SS auth
.sp
-There are three basic modes of operation.
-.SS Interactive mode
+Manage authentication keys. It is used for adding, removing, exporting
+or updating of authentication keys for a particular entity such as a monitor or
+OSD. It uses some additional subcommands.
.sp
-To start in interactive mode, no arguments are necessary. Control\-d or
-\(aqquit\(aq will exit.
-.SS Watch mode
+Subcommand \fBadd\fP adds authentication info for a particular entity from input
+file, or random key if no input given and/or any caps specified in the command.
.sp
-Watch mode shows cluster state changes as they occur. For example:
+Usage:
.INDENT 0.0
.INDENT 3.5
.sp
.nf
.ft C
-ceph \-w
+ceph auth add <entity> {<caps> [<caps>...]}
.ft P
.fi
.UNINDENT
.UNINDENT
-.SS Command line mode
.sp
-Finally, to send a single instruction to the monitor cluster (and wait
-for a response), the command can be specified on the command line.
-.SH OPTIONS
+Subcommand \fBcaps\fP updates caps for \fBname\fP from caps specified in the command.
+.sp
+Usage:
.INDENT 0.0
-.TP
-.B \-i infile
-will specify an input file to be passed along as a payload with the
-command to the monitor cluster. This is only used for specific
-monitor commands.
+.INDENT 3.5
+.sp
+.nf
+.ft C
+ceph auth caps <entity> <caps> [<caps>...]
+.ft P
+.fi
+.UNINDENT
.UNINDENT
+.sp
+Subcommand \fBdel\fP deletes all caps for \fBname\fP\&.
+.sp
+Usage:
.INDENT 0.0
-.TP
-.B \-o outfile
-will write any payload returned by the monitor cluster with its
-reply to outfile. Only specific monitor commands (e.g. osd getmap)
-return a payload.
+.INDENT 3.5
+.sp
+.nf
+.ft C
+ceph auth del <entity>
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.sp
+Subcommand \fBexport\fP writes keyring for requested entity, or master keyring if
+none given.
+.sp
+Usage:
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+ceph auth export {<entity>}
+.ft P
+.fi
.UNINDENT
+.UNINDENT
+.sp
+Subcommand \fBget\fP writes keyring file with requested key.
+.sp
+Usage:
.INDENT 0.0
-.TP
-.B \-c ceph.conf, \-\-conf=ceph.conf
-Use ceph.conf configuration file instead of the default
-/etc/ceph/ceph.conf to determine monitor addresses during startup.
+.INDENT 3.5
+.sp
+.nf
+.ft C
+ceph auth get <entity>
+.ft P
+.fi
.UNINDENT
+.UNINDENT
+.sp
+Subcommand \fBget\-key\fP displays requested key.
+.sp
+Usage:
.INDENT 0.0
-.TP
-.B \-m monaddress[:port]
-Connect to specified monitor (instead of looking through ceph.conf).
+.INDENT 3.5
+.sp
+.nf
+.ft C
+ceph auth get\-key <entity>
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.sp
+Subcommand \fBget\-or\-create\fP adds authentication info for a particular entity
+from input file, or random key if no input given and/or any caps specified in the
+command.
+.sp
+Usage:
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+ceph auth get\-or\-create <entity> {<caps> [<caps>...]}
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.sp
+Subcommand \fBget\-or\-create\-key\fP gets or adds key for \fBname\fP from system/caps
+pairs specified in the command. If key already exists, any given caps must match
+the existing caps for that key.
+.sp
+Usage:
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+ceph auth get\-or\-create\-key <entity> {<caps> [<caps>...]}
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.sp
+Subcommand \fBimport\fP reads keyring from input file.
+.sp
+Usage:
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+ceph auth import
+.ft P
+.fi
+.UNINDENT
.UNINDENT
-.SH EXAMPLES
.sp
-To grab a copy of the current OSD map:
+Subcommand \fBlist\fP lists authentication state.
+.sp
+Usage:
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+ceph auth list
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.sp
+Subcommand \fBprint\-key\fP displays requested key.
+.sp
+Usage:
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+ceph auth print\-key <entity>
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.sp
+Subcommand \fBprint_key\fP displays requested key.
+.sp
+Usage:
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+ceph auth print_key <entity>
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.SS compact
+.sp
+Causes compaction of monitor\(aqs leveldb storage.
+.sp
+Usage:
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+ceph compact
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.SS config\-key
+.sp
+Manage configuration key. It uses some additional subcommands.
+.sp
+Subcommand \fBget\fP gets the configuration key.
+.sp
+Usage:
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+ceph config\-key get <key>
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.sp
+Subcommand \fBput\fP puts configuration key and values.
+.sp
+Usage:
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+ceph config\-key put <key> {<val>}
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.sp
+Subcommand \fBexists\fP checks for configuration keys existence.
+.sp
+Usage:
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+ceph config\-key exists <key>
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.sp
+Subcommand \fBlist\fP lists configuration keys.
+.sp
+Usage:
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+ceph config\-key list
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.sp
+Subcommand \fBdel\fP deletes configuration key.
+.sp
+Usage:
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+ceph config\-key del <key>
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.SS df
+.sp
+Show cluster\(aqs free space status.
+.sp
+Usage:
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+ceph df
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.SS fsid
+.sp
+Show cluster\(aqs FSID/UUID.
+.sp
+Usage:
.INDENT 0.0
.INDENT 3.5
.sp
.nf
.ft C
-ceph \-m 1.2.3.4:6789 osd getmap \-o osdmap
+ceph fsid
.ft P
.fi
.UNINDENT
.UNINDENT
+.SS health
.sp
-To get a dump of placement group (PG) state:
+Show cluster\(aqs health.
+.sp
+Usage:
.INDENT 0.0
.INDENT 3.5
.sp
.nf
.ft C
-ceph pg dump \-o pg.txt
+ceph health
.ft P
.fi
.UNINDENT
.UNINDENT
-.SH MONITOR COMMANDS
+.SS heap
+.sp
+Show heap usage info (available only if compiled with tcmalloc)
.sp
-A more complete summary of commands understood by the monitor cluster can be found in the
-online documentation, at
+Usage:
.INDENT 0.0
.INDENT 3.5
-\fI\%http://ceph.com/docs/master/rados/operations/control\fP
+.sp
+.nf
+.ft C
+ceph heap dump|start_profiler|stop_profiler|release|stats
+.ft P
+.fi
+.UNINDENT
.UNINDENT
+.SS injectargs
+.sp
+Inject configuration arguments into monitor.
+.sp
+Usage:
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+ceph injectargs <injected_args> [<injected_args>...]
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.SS log
+.sp
+Log supplied text to the monitor log.
+.sp
+Usage:
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+ceph log <logtext> [<logtext>...]
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.SS mds
+.sp
+Manage metadata server configuration and administration. It uses some
+additional subcommands.
+.sp
+Subcommand \fBadd_data_pool\fP adds data pool.
+.sp
+Usage:
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+ceph mds add_data_pool <pool>
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.sp
+Subcommand \fBcluster_down\fP takes mds cluster down.
+.sp
+Usage:
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+ceph mds cluster_down
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.sp
+Subcommand \fBcluster_up\fP brings mds cluster up.
+.sp
+Usage:
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+ceph mds cluster_up
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.sp
+Subcommand \fBcompat\fP manages compatible features. It uses some additional
+subcommands.
+.sp
+Subcommand \fBrm_compat\fP removes compatible feature.
+.sp
+Usage:
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+ceph mds compat rm_compat <int[0\-]>
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.sp
+Subcommand \fBrm_incompat\fP removes incompatible feature.
+.sp
+Usage:
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+ceph mds compat rm_incompat <int[0\-]>
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.sp
+Subcommand \fBshow\fP shows mds compatibility settings.
+.sp
+Usage:
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+ceph mds compat show
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.sp
+Subcommand \fBdeactivate\fP stops mds.
+.sp
+Usage:
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+ceph mds deactivate <who>
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.sp
+Subcommand \fBdump\fP dumps information, optionally from epoch.
+.sp
+Usage:
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+ceph mds dump {<int[0\-]>}
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.sp
+Subcommand \fBfail\fP forces mds to status fail.
+.sp
+Usage:
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+ceph mds fail <who>
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.sp
+Subcommand \fBgetmap\fP gets MDS map, optionally from epoch.
+.sp
+Usage:
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+ceph mds getmap {<int[0\-]>}
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.sp
+Subcommand \fBnewfs\fP makes new filesystem using pools <metadata> and <data>.
+.sp
+Usage:
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+ceph mds newfs <int[0\-]> <int[0\-]> {\-\-yes\-i\-really\-mean\-it}
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.sp
+Subcommand \fBremove_data_pool\fP removes data pool.
+.sp
+Usage:
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+ceph mds remove_data_pool <pool>
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.sp
+Subcommand \fBrm\fP removes inactive mds.
+.sp
+Usage:
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+ceph mds rm <int[0\-]> <name> (type.id)>
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.sp
+Subcommand \fBrmfailed\fP removes failed mds.
+.sp
+Usage:
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+ceph mds rmfailed <int[0\-]>
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.sp
+Subcommand \fBset_max_mds\fP sets max MDS index.
+.sp
+Usage:
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+ceph mds set_max_mds <int[0\-]>
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.sp
+Subcommand \fBset_state\fP sets mds state of <gid> to <numeric\-state>.
+.sp
+Usage:
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+ceph mds set_state <int[0\-]> <int[0\-20]>
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.sp
+Subcommand \fBsetmap\fP sets mds map; must supply correct epoch number.
+.sp
+Usage:
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+ceph mds setmap <int[0\-]>
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.sp
+Subcommand \fBstat\fP shows MDS status.
+.sp
+Usage:
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+ceph mds stat
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.sp
+Subcommand \fBstop\fP stops mds.
+.sp
+Usage:
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+ceph mds stop <who>
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.sp
+Subcommand \fBtell\fP sends command to particular mds.
+.sp
+Usage:
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+ceph mds tell <who> <args> [<args>...]
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.SS mon
+.sp
+Manage monitor configuration and administration. It uses some additional
+subcommands.
+.sp
+Subcommand \fBadd\fP adds new monitor named <name> at <addr>.
+.sp
+Usage:
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+ceph mon add <name> <IPaddr[:port]>
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.sp
+Subcommand \fBdump\fP dumps formatted monmap (optionally from epoch)
+.sp
+Usage:
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+ceph mon dump {<int[0\-]>}
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.sp
+Subcommand \fBgetmap\fP gets monmap.
+.sp
+Usage:
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+ceph mon getmap {<int[0\-]>}
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.sp
+Subcommand \fBremove\fP removes monitor named <name>.
+.sp
+Usage:
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+ceph mon remove <name>
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.sp
+Subcommand \fBstat\fP summarizes monitor status.
+.sp
+Usage:
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+ceph mon stat
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.sp
+Subcommand \fBmon_status\fP reports status of monitors.
+.sp
+Usage:
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+ceph mon_status
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.SS osd
+.sp
+Manage OSD configuration and administration. It uses some additional
+subcommands.
+.sp
+Subcommand \fBcreate\fP creates new osd (with optional UUID).
+.sp
+Usage:
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+ceph osd create {<uuid>}
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.sp
+Subcommand \fBcrush\fP is used for CRUSH management. It uses some additional
+subcommands.
+.sp
+Subcommand \fBadd\fP adds or updates crushmap position and weight for <name> with
+<weight> and location <args>.
+.sp
+Usage:
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+ceph osd crush add <osdname (id|osd.id)> <float[0.0\-]> <args> [<args>...]
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.sp
+Subcommand \fBadd\-bucket\fP adds no\-parent (probably root) crush bucket <name> of
+type <type>.
+.sp
+Usage:
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+ceph osd crush add\-bucket <name> <type>
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.sp
+Subcommand \fBcreate\-or\-move\fP creates entry or moves existing entry for <name>
+<weight> at/to location <args>.
+.sp
+Usage:
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+ceph osd crush create\-or\-move <osdname (id|osd.id)> <float[0.0\-]> <args>
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.sp
+[<args>...]
+.sp
+Subcommand \fBdump\fP dumps crush map.
+.sp
+Usage:
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+ceph osd crush dump
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.sp
+Subcommand \fBlink\fP links existing entry for <name> under location <args>.
+.sp
+Usage:
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+ceph osd crush link <name> <args> [<args>...]
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.sp
+Subcommand \fBmove\fP moves existing entry for <name> to location <args>.
+.sp
+Usage:
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+ceph osd crush move <name> <args> [<args>...]
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.sp
+Subcommand \fBremove\fP removes <name> from crush map (everywhere, or just at
+<ancestor>).
+.sp
+Usage:
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+ceph osd crush remove <name> {<ancestor>}
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.sp
+Subcommand \fBreweight\fP change <name>\(aqs weight to <weight> in crush map.
+.sp
+Usage:
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+ceph osd crush reweight <name> <float[0.0\-]>
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.sp
+Subcommand \fBrm\fP removes <name> from crush map (everywhere, or just at
+<ancestor>).
+.sp
+Usage:
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+ceph osd crush rm <name> {<ancestor>}
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.sp
+Subcommand \fBrule\fP is used for creating crush rules. It uses some additional
+subcommands.
+.sp
+Subcommand \fBcreate\-erasure\fP creates crush rule <name> for erasure coded pool
+created with <profile> (default default).
+.sp
+Usage:
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+ceph osd crush rule create\-erasure <name> {<profile>}
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.sp
+Subcommand \fBcreate\-simple\fP creates crush rule <name> to start from <root>,
+replicate across buckets of type <type>, using a choose mode of <firstn|indep>
+(default firstn; indep best for erasure pools).
+.sp
+Usage:
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+ceph osd crush rule create\-simple <name> <root> <type> {firstn|indep}
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.sp
+Subcommand \fBdump\fP dumps crush rule <name> (default all).
+.sp
+Usage:
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+ceph osd crush rule dump {<name>}
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.sp
+Subcommand \fBlist\fP lists crush rules.
+.sp
+Usage:
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+ceph osd crush rule list
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.sp
+Subcommand \fBls\fP lists crush rules.
+.sp
+Usage:
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+ceph osd crush rule ls
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.sp
+Subcommand \fBrm\fP removes crush rule <name>.
+.sp
+Usage:
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+ceph osd crush rule rm <name>
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.sp
+Subcommand \fBset\fP sets crush map from input file.
+.sp
+Usage:
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+ceph osd crush set
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.sp
+Subcommand \fBset\fP with osdname/osd.id update crushmap position and weight
+for <name> to <weight> with location <args>.
+.sp
+Usage:
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+ceph osd crush set <osdname (id|osd.id)> <float[0.0\-]> <args> [<args>...]
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.sp
+Subcommand \fBshow\-tunables\fP shows current crush tunables.
+.sp
+Usage:
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+ceph osd crush show\-tunables
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.sp
+Subcommand \fBtunables\fP sets crush tunables values to <profile>.
+.sp
+Usage:
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+ceph osd crush tunables legacy|argonaut|bobtail|firefly|optimal|default
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.sp
+Subcommand \fBunlink\fP unlinks <name> from crush map (everywhere, or just at
+<ancestor>).
+.sp
+Usage:
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+ceph osd crush unlink <name> {<ancestor>}
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.sp
+Subcommand \fBdeep\-scrub\fP initiates deep scrub on specified osd.
+.sp
+Usage:
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+ceph osd deep\-scrub <who>
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.sp
+Subcommand \fBdown\fP sets osd(s) <id> [<id>...] down.
+.sp
+Usage:
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+ceph osd down <ids> [<ids>...]
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.sp
+Subcommand \fBdump\fP prints summary of OSD map.
+.sp
+Usage:
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+ceph osd dump {<int[0\-]>}
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.sp
+Subcommand \fBerasure\-code\-profile\fP is used for managing the erasure code
+profiles. It uses some additional subcommands.
+.sp
+Subcommand \fBget\fP gets erasure code profile <name>.
+.sp
+Usage:
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+ceph osd erasure\-code\-profile get <name>
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.sp
+Subcommand \fBls\fP lists all erasure code profiles.
+.sp
+Usage:
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+ceph osd erasure\-code\-profile ls
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.sp
+Subcommand \fBrm\fP removes erasure code profile <name>.
+.sp
+Usage:
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+ceph osd erasure\-code\-profile rm <name>
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.sp
+Subcommand \fBset\fP creates erasure code profile <name> with [<key[=value]> ...]
+pairs. Add a \-\-force at the end to override an existing profile (IT IS RISKY).
+.sp
+Usage:
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+ceph osd erasure\-code\-profile set <name> {<profile> [<profile>...]}
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.sp
+Subcommand \fBfind\fP find osd <id> in the CRUSH map and shows its location.
+.sp
+Usage:
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+ceph osd find <int[0\-]>
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.sp
+Subcommand \fBgetcrushmap\fP gets CRUSH map.
+.sp
+Usage:
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+ceph osd getcrushmap {<int[0\-]>}
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.sp
+Subcommand \fBgetmap\fP gets OSD map.
+.sp
+Usage:
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+ceph osd getmap {<int[0\-]>}
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.sp
+Subcommand \fBgetmaxosd\fP shows largest OSD id.
+.sp
+Usage:
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+ceph osd getmaxosd
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.sp
+Subcommand \fBin\fP sets osd(s) <id> [<id>...] in.
+.sp
+Usage:
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+ceph osd in <ids> [<ids>...]
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.sp
+Subcommand \fBlost\fP marks osd as permanently lost. THIS DESTROYS DATA IF NO
+MORE REPLICAS EXIST, BE CAREFUL.
+.sp
+Usage:
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+ceph osd lost <int[0\-]> {\-\-yes\-i\-really\-mean\-it}
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.sp
+Subcommand \fBls\fP shows all OSD ids.
+.sp
+Usage:
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+ceph osd ls {<int[0\-]>}
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.sp
+Subcommand \fBlspools\fP lists pools.
+.sp
+Usage:
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+ceph osd lspools {<int>}
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.sp
+Subcommand \fBmap\fP finds pg for <object> in <pool>.
+.sp
+Usage:
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+ceph osd map <poolname> <objectname>
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.sp
+Subcommand \fBmetadata\fP fetches metadata for osd <id>.
+.sp
+Usage:
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+ceph osd metadata <int[0\-]>
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.sp
+Subcommand \fBout\fP sets osd(s) <id> [<id>...] out.
+.sp
+Usage:
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+ceph osd out <ids> [<ids>...]
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.sp
+Subcommand \fBpause\fP pauses osd.
+.sp
+Usage:
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+ceph osd pause
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.sp
+Subcommand \fBperf\fP prints dump of OSD perf summary stats.
+.sp
+Usage:
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+ceph osd perf
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.sp
+Subcommand \fBpg\-temp\fP set pg_temp mapping pgid:[<id> [<id>...]] (developers
+only).
+.sp
+Usage:
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+ceph osd pg\-temp <pgid> {<id> [<id>...]}
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.sp
+Subcommand \fBpool\fP is used for managing data pools. It uses some additional
+subcommands.
+.sp
+Subcommand \fBcreate\fP creates pool.
+.sp
+Usage:
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+ceph osd pool create <poolname> <int[0\-]> {<int[0\-]>} {replicated|erasure}
+{<erasure_code_profile>} {<ruleset>}
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.sp
+Subcommand \fBdelete\fP deletes pool.
+.sp
+Usage:
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+ceph osd pool delete <poolname> {<poolname>} {\-\-yes\-i\-really\-really\-mean\-it}
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.sp
+Subcommand \fBget\fP gets pool parameter <var>.
+.sp
+Usage:
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+ceph osd pool get <poolname> size|min_size|crash_replay_interval|pg_num|
+pgp_num|crush_ruleset|hit_set_type|hit_set_period|hit_set_count|hit_set_fpp|
+
+ceph osd pool get <poolname> auid|target_max_objects|target_max_bytes
+
+ceph osd pool get <poolname> cache_target_dirty_ratio|cache_target_full_ratio
+
+ceph osd pool get <poolname> cache_min_flush_age|cache_min_evict_age|
+erasure_code_profile
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.sp
+Subcommand \fBget\-quota\fP obtains object or byte limits for pool.
+.sp
+Usage:
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+ceph osd pool get\-quota <poolname>
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.sp
+Subcommand \fBmksnap\fP makes snapshot <snap> in <pool>.
+.sp
+Usage:
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+ceph osd pool mksnap <poolname> <snap>
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.sp
+Subcommand \fBrename\fP renames <srcpool> to <destpool>.
+.sp
+Usage:
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+ceph osd pool rename <poolname> <poolname>
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.sp
+Subcommand \fBrmsnap\fP removes snapshot <snap> from <pool>.
+.sp
+Usage:
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+ceph osd pool rmsnap <poolname> <snap>
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.sp
+Subcommand \fBset\fP sets pool parameter <var> to <val>.
+.sp
+Usage:
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+ceph osd pool set <poolname> size|min_size|crash_replay_interval|pg_num|
+pgp_num|crush_ruleset|hashpspool|hit_set_type|hit_set_period|
+
+ceph osd pool set <poolname> hit_set_count|hit_set_fpp|debug_fake_ec_pool
+
+ceph osd pool set <poolname> target_max_bytes|target_max_objects
+
+ceph osd pool set <poolname> cache_target_dirty_ratio|cache_target_full_ratio
+
+ceph osd pool set <poolname> cache_min_flush_age
+
+ceph osd pool set <poolname> cache_min_evict_age|auid <val>
+{\-\-yes\-i\-really\-mean\-it}
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.sp
+Subcommand \fBset\-quota\fP sets object or byte limit on pool.
+.sp
+Usage:
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+ceph osd pool set\-quota <poolname> max_objects|max_bytes <val>
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.sp
+Subcommand \fBstats\fP obtain stats from all pools, or from specified pool.
+.sp
+Usage:
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+ceph osd pool stats {<name>}
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.sp
+Subcommand \fBprimary\-affinity\fP adjust osd primary\-affinity from 0.0 <=<weight>
+<= 1.0
+.sp
+Usage:
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+ceph osd primary\-affinity <osdname (id|osd.id)> <float[0.0\-1.0]>
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.sp
+Subcommand \fBprimary\-temp\fP sets primary_temp mapping pgid:<id>|\-1 (developers
+only).
+.sp
+Usage:
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+ceph osd primary\-temp <pgid> <id>
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.sp
+Subcommand \fBrepair\fP initiates repair on a specified osd.
+.sp
+Usage:
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+ceph osd repair <who>
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.sp
+Subcommand \fBreweight\fP reweights osd to 0.0 < <weight> < 1.0.
+.sp
+Usage:
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+osd reweight <int[0\-]> <float[0.0\-1.0]>
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.sp
+Subcommand \fBreweight\-by\-utilization\fP reweight OSDs by utilization
+[overload\-percentage\-for\-consideration, default 120].
+.sp
+Usage:
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+ceph osd reweight\-by\-utilization {<int[100\-]>}
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.sp
+Subcommand \fBrm\fP removes osd(s) <id> [<id>...] in the cluster.
+.sp
+Usage:
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+ceph osd rm <ids> [<ids>...]
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.sp
+Subcommand \fBscrub\fP initiates scrub on specified osd.
+.sp
+Usage:
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+ceph osd scrub <who>
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.sp
+Subcommand \fBset\fP sets <key>.
+.sp
+Usage:
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+ceph osd set pause|noup|nodown|noout|noin|nobackfill|norecover|noscrub|
+nodeep\-scrub|notieragent
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.sp
+Subcommand \fBsetcrushmap\fP sets crush map from input file.
+.sp
+Usage:
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+ceph osd setcrushmap
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.sp
+Subcommand \fBsetmaxosd\fP sets new maximum osd value.
+.sp
+Usage:
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+ceph osd setmaxosd <int[0\-]>
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.sp
+Subcommand \fBstat\fP prints summary of OSD map.
+.sp
+Usage:
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+ceph osd stat
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.sp
+Subcommand \fBthrash\fP thrashes OSDs for <num_epochs>.
+.sp
+Usage:
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+ceph osd thrash <int[0\-]>
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.sp
+Subcommand \fBtier\fP is used for managing tiers. It uses some additional
+subcommands.
+.sp
+Subcommand \fBadd\fP adds the tier <tierpool> (the second one) to base pool <pool>
+(the first one).
+.sp
+Usage:
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+ceph osd tier add <poolname> <poolname> {\-\-force\-nonempty}
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.sp
+Subcommand \fBadd\-cache\fP adds a cache <tierpool> (the second one) of size <size>
+to existing pool <pool> (the first one).
+.sp
+Usage:
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+ceph osd tier add\-cache <poolname> <poolname> <int[0\-]>
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.sp
+Subcommand \fBcache\-mode\fP specifies the caching mode for cache tier <pool>.
+.sp
+Usage:
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+ceph osd tier cache\-mode <poolname> none|writeback|forward|readonly
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.sp
+Subcommand \fBremove\fP removes the tier <tierpool> (the second one) from base pool
+<pool> (the first one).
+.sp
+Usage:
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+ceph osd tier remove <poolname> <poolname>
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.sp
+Subcommand \fBremove\-overlay\fP removes the overlay pool for base pool <pool>.
+.sp
+Usage:
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+ceph osd tier remove\-overlay <poolname>
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.sp
+Subcommand \fBset\-overlay\fP set the overlay pool for base pool <pool> to be
+<overlaypool>.
+.sp
+Usage:
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+ceph osd tier set\-overlay <poolname> <poolname>
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.sp
+Subcommand \fBtree\fP prints OSD tree.
+.sp
+Usage:
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+ceph osd tree {<int[0\-]>}
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.sp
+Subcommand \fBunpause\fP unpauses osd.
+.sp
+Usage:
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+ceph osd unpause
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.sp
+Subcommand \fBunset\fP unsets <key>.
+.sp
+Usage:
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+osd unset pause|noup|nodown|noout|noin|nobackfill|norecover|noscrub|
+nodeep\-scrub|notieragent
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.SS pg
+.sp
+It is used for managing the placement groups in OSDs. It uses some
+additional subcommands.
+.sp
+Subcommand \fBdebug\fP shows debug info about pgs.
+.sp
+Usage:
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+ceph pg debug unfound_objects_exist|degraded_pgs_exist
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.sp
+Subcommand \fBdeep\-scrub\fP starts deep\-scrub on <pgid>.
+.sp
+Usage:
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+ceph pg deep\-scrub <pgid>
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.sp
+Subcommand \fBdump\fP shows human\-readable versions of pg map (only \(aqall\(aq valid
+with plain).
+.sp
+Usage:
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+ceph pg dump {all|summary|sum|delta|pools|osds|pgs|pgs_brief}
+
+ceph pg dump {all|summary|sum|delta|pools|osds|pgs|pgs_brief...}
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.sp
+Subcommand \fBdump_json\fP shows human\-readable version of pg map in json only.
+.sp
+Usage:
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+ceph pg dump_json {all|summary|sum|pools|osds|pgs[all|summary|sum|pools|
+osds|pgs...]}
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.sp
+Subcommand \fBdump_pools_json\fP shows pg pools info in json only.
+.sp
+Usage:
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+ceph pg dump_pools_json
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.sp
+Subcommand \fBdump_stuck\fP shows information about stuck pgs.
+.sp
+Usage:
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+ceph pg dump_stuck {inactive|unclean|stale[inactive|unclean|stale...]}
+{<int>}
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.sp
+Subcommand \fBforce_create_pg\fP forces creation of pg <pgid>.
+.sp
+Usage:
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+ceph pg force_create_pg <pgid>
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.sp
+Subcommand \fBgetmap\fP gets binary pg map to \-o/stdout.
+.sp
+Usage:
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+ceph pg getmap
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.sp
+Subcommand \fBmap\fP shows mapping of pg to osds.
+.sp
+Usage:
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+ceph pg map <pgid>
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.sp
+Subcommand \fBrepair\fP starts repair on <pgid>.
+.sp
+Usage:
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+ceph pg repair <pgid>
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.sp
+Subcommand \fBscrub\fP starts scrub on <pgid>.
+.sp
+Usage:
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+ceph pg scrub <pgid>
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.sp
+Subcommand \fBsend_pg_creates\fP triggers pg creates to be issued.
+.sp
+Usage:
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+ceph pg send_pg_creates
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.sp
+Subcommand \fBset_full_ratio\fP sets ratio at which pgs are considered full.
+.sp
+Usage:
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+ceph pg set_full_ratio <float[0.0\-1.0]>
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.sp
+Subcommand \fBset_nearfull_ratio\fP sets ratio at which pgs are considered nearly
+full.
+.sp
+Usage:
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+ceph pg set_nearfull_ratio <float[0.0\-1.0]>
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.sp
+Subcommand \fBstat\fP shows placement group status.
+.sp
+Usage:
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+ceph pg stat
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.SS quorum
+.sp
+Enter or exit quorum.
+.sp
+Usage:
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+ceph quorum enter|exit
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.SS quorum_status
+.sp
+Reports status of monitor quorum.
+.sp
+Usage:
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+ceph quorum_status
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.SS report
+.sp
+Reports full status of cluster, optional title tag strings.
+.sp
+Usage:
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+ceph report {<tags> [<tags>...]}
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.SS scrub
+.sp
+Scrubs the monitor stores.
+.sp
+Usage:
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+ceph scrub
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.SS status
+.sp
+Shows cluster status.
+.sp
+Usage:
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+ceph status
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.SS sync force
+.sp
+Forces sync of and clear monitor store.
+.sp
+Usage:
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+ceph sync force {\-\-yes\-i\-really\-mean\-it} {\-\-i\-know\-what\-i\-am\-doing}
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.SS tell
+.sp
+Sends a command to a specific daemon.
+.sp
+Usage:
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+ceph tell <name (type.id)> <args> [<args>...]
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.SH OPTIONS
+.INDENT 0.0
+.TP
+.B \-i infile
+will specify an input file to be passed along as a payload with the
+command to the monitor cluster. This is only used for specific
+monitor commands.
+.UNINDENT
+.INDENT 0.0
+.TP
+.B \-o outfile
+will write any payload returned by the monitor cluster with its
+reply to outfile. Only specific monitor commands (e.g. osd getmap)
+return a payload.
+.UNINDENT
+.INDENT 0.0
+.TP
+.B \-c ceph.conf, \-\-conf=ceph.conf
+Use ceph.conf configuration file instead of the default
+\fB/etc/ceph/ceph.conf\fP to determine monitor addresses during startup.
+.UNINDENT
+.INDENT 0.0
+.TP
+.B \-\-id CLIENT_ID, \-\-user CLIENT_ID
+Client id for authentication.
+.UNINDENT
+.INDENT 0.0
+.TP
+.B \-\-name CLIENT_NAME, \-n CLIENT_NAME
+Client name for authentication.
+.UNINDENT
+.INDENT 0.0
+.TP
+.B \-\-cluster CLUSTER
+Name of the Ceph cluster.
+.UNINDENT
+.INDENT 0.0
+.TP
+.B \-\-admin\-daemon ADMIN_SOCKET
+Submit admin\-socket commands.
+.UNINDENT
+.INDENT 0.0
+.TP
+.B \-\-admin\-socket ADMIN_SOCKET_NOPE
+You probably mean \-\-admin\-daemon
+.UNINDENT
+.INDENT 0.0
+.TP
+.B \-s, \-\-status
+Show cluster status.
+.UNINDENT
+.INDENT 0.0
+.TP
+.B \-w, \-\-watch
+Watch live cluster changes.
+.UNINDENT
+.INDENT 0.0
+.TP
+.B \-\-watch\-debug
+Watch debug events.
+.UNINDENT
+.INDENT 0.0
+.TP
+.B \-\-watch\-info
+Watch info events.
+.UNINDENT
+.INDENT 0.0
+.TP
+.B \-\-watch\-sec
+Watch security events.
+.UNINDENT
+.INDENT 0.0
+.TP
+.B \-\-watch\-warn
+Watch warning events.
+.UNINDENT
+.INDENT 0.0
+.TP
+.B \-\-watch\-error
+Watch error events.
+.UNINDENT
+.INDENT 0.0
+.TP
+.B \-\-version, \-v
+Display version.
+.UNINDENT
+.INDENT 0.0
+.TP
+.B \-\-verbose
+Make verbose.
+.UNINDENT
+.INDENT 0.0
+.TP
+.B \-\-concise
+Make less verbose.
+.UNINDENT
+.INDENT 0.0
+.TP
+.B \-f {json,json\-pretty,xml,xml\-pretty,plain}, \-\-format
+Format of output.
+.UNINDENT
+.INDENT 0.0
+.TP
+.B \-\-connect\-timeout CLUSTER_TIMEOUT
+Set a timeout for connecting to the cluster.
.UNINDENT
.SH AVAILABILITY
.sp
-\fBceph\fP is part of the Ceph distributed storage system. Please refer to the Ceph documentation at
-\fI\%http://ceph.com/docs\fP for more information.
+\fBceph\fP is a part of the Ceph distributed storage system. Please refer to
+the Ceph documentation at \fI\%http://ceph.com/docs\fP for more information.
.SH SEE ALSO
.sp
-\fBceph\fP(8),
+\fBceph\-mon\fP(8),
+\fBceph\-osd\fP(8),
+\fBceph\-mds\fP(8)
.SH COPYRIGHT
2010-2014, Inktank Storage, Inc. and contributors. Licensed under Creative Commons BY-SA
.\" Generated by docutils manpage writer.
diff --git a/src/.git_version b/src/.git_version
index 1727fed..b5dcc6d 100644
--- a/src/.git_version
+++ b/src/.git_version
@@ -1,2 +1,2 @@
-6c0127fcb58008793d3c8b62d925bc91963672a3
-v0.80.7
+b5a67f0e1d15385bc0d60a6da6e7fc810bde6047
+v0.80.9
diff --git a/src/Makefile-env.am b/src/Makefile-env.am
index d62247b..b45b156 100644
--- a/src/Makefile-env.am
+++ b/src/Makefile-env.am
@@ -150,6 +150,7 @@ LIBCLIENT = libclient.la
LIBCLIENT_FUSE = libclient_fuse.la
LIBRADOS = librados.la
LIBRGW = librgw.la
+LIBCIVETWEB = libcivetweb.la
LIBRBD = librbd.la
LIBCEPHFS = libcephfs.la
LIBERASURE_CODE = liberasure_code.la
diff --git a/src/Makefile.am b/src/Makefile.am
index edec05e..9c394e8 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -258,6 +258,7 @@ TESTS = \
$(check_SCRIPTS)
check-local:
+ $(top_srcdir)/qa/workunits/erasure-code/encode-decode-non-regression.sh
$(srcdir)/test/encoding/readable.sh ../ceph-object-corpus
diff --git a/src/Makefile.in b/src/Makefile.in
index afa524b..5d9ea8b 100644
--- a/src/Makefile.in
+++ b/src/Makefile.in
@@ -69,7 +69,8 @@ bin_PROGRAMS = $(am__EXEEXT_9) $(am__EXEEXT_10) ceph-dencoder$(EXEEXT) \
ceph-mon$(EXEEXT) ceph-osd$(EXEEXT) ceph-mds$(EXEEXT) \
cephfs$(EXEEXT) librados-config$(EXEEXT) ceph-syn$(EXEEXT) \
$(am__EXEEXT_12) $(am__EXEEXT_13)
-noinst_PROGRAMS = get_command_descriptions$(EXEEXT)
+noinst_PROGRAMS = ceph_erasure_code_non_regression$(EXEEXT) \
+ get_command_descriptions$(EXEEXT)
sbin_PROGRAMS =
su_sbin_PROGRAMS = $(am__EXEEXT_14)
check_PROGRAMS = unittest_erasure_code_plugin$(EXEEXT) \
@@ -88,20 +89,21 @@ check_PROGRAMS = unittest_erasure_code_plugin$(EXEEXT) \
unittest_base64$(EXEEXT) unittest_ceph_argparse$(EXEEXT) \
unittest_ceph_compatset$(EXEEXT) unittest_osd_types$(EXEEXT) \
unittest_pglog$(EXEEXT) unittest_ecbackend$(EXEEXT) \
- unittest_hitset$(EXEEXT) unittest_gather$(EXEEXT) \
- unittest_run_cmd$(EXEEXT) unittest_signals$(EXEEXT) \
- unittest_simple_spin$(EXEEXT) unittest_librados$(EXEEXT) \
- unittest_bufferlist$(EXEEXT) unittest_crc32c$(EXEEXT) \
- unittest_arch$(EXEEXT) unittest_crypto$(EXEEXT) \
- unittest_crypto_init$(EXEEXT) unittest_perf_counters$(EXEEXT) \
- unittest_admin_socket$(EXEEXT) unittest_ceph_crypto$(EXEEXT) \
- unittest_utf8$(EXEEXT) unittest_mime$(EXEEXT) \
- unittest_escape$(EXEEXT) unittest_chain_xattr$(EXEEXT) \
- unittest_flatindex$(EXEEXT) unittest_strtol$(EXEEXT) \
- unittest_confutils$(EXEEXT) unittest_config$(EXEEXT) \
- unittest_context$(EXEEXT) unittest_heartbeatmap$(EXEEXT) \
- unittest_formatter$(EXEEXT) unittest_libcephfs_config$(EXEEXT) \
- unittest_lfnindex$(EXEEXT) unittest_librados_config$(EXEEXT) \
+ unittest_hitset$(EXEEXT) unittest_io_priority$(EXEEXT) \
+ unittest_gather$(EXEEXT) unittest_run_cmd$(EXEEXT) \
+ unittest_signals$(EXEEXT) unittest_simple_spin$(EXEEXT) \
+ unittest_librados$(EXEEXT) unittest_bufferlist$(EXEEXT) \
+ unittest_crc32c$(EXEEXT) unittest_arch$(EXEEXT) \
+ unittest_crypto$(EXEEXT) unittest_crypto_init$(EXEEXT) \
+ unittest_perf_counters$(EXEEXT) unittest_admin_socket$(EXEEXT) \
+ unittest_ceph_crypto$(EXEEXT) unittest_utf8$(EXEEXT) \
+ unittest_mime$(EXEEXT) unittest_escape$(EXEEXT) \
+ unittest_chain_xattr$(EXEEXT) unittest_flatindex$(EXEEXT) \
+ unittest_strtol$(EXEEXT) unittest_confutils$(EXEEXT) \
+ unittest_config$(EXEEXT) unittest_context$(EXEEXT) \
+ unittest_heartbeatmap$(EXEEXT) unittest_formatter$(EXEEXT) \
+ unittest_libcephfs_config$(EXEEXT) unittest_lfnindex$(EXEEXT) \
+ unittest_librados_config$(EXEEXT) \
unittest_daemon_config$(EXEEXT) unittest_osd_osdcap$(EXEEXT) \
unittest_mon_moncap$(EXEEXT) unittest_mon_pgmap$(EXEEXT) \
unittest_ipaddr$(EXEEXT) unittest_texttable$(EXEEXT) \
@@ -142,7 +144,7 @@ check_PROGRAMS = unittest_erasure_code_plugin$(EXEEXT) \
@LINUX_TRUE at am__append_32 = -lrt
@LINUX_TRUE at am__append_33 = -export-symbols-regex '^rados_.*'
@LINUX_TRUE at am__append_34 = -export-symbols-regex '^rbd_.*'
- at WITH_RADOSGW_TRUE@am__append_35 = librgw.la
+ at WITH_RADOSGW_TRUE@am__append_35 = librgw.la libcivetweb.la
@WITH_RADOSGW_TRUE at am__append_36 = \
@WITH_RADOSGW_TRUE@ $(LIBRADOS) \
@WITH_RADOSGW_TRUE@ libcls_rgw_client.la \
@@ -177,36 +179,37 @@ check_PROGRAMS = unittest_erasure_code_plugin$(EXEEXT) \
@LINUX_TRUE at am__append_43 = -ldl
@LINUX_TRUE at am__append_44 = -ldl
@LINUX_TRUE at am__append_45 = -ldl
- at COMPILER_HAS_VTA_TRUE@am__append_46 = -fno-var-tracking-assignments
+ at LINUX_TRUE@am__append_46 = -ldl
@COMPILER_HAS_VTA_TRUE at am__append_47 = -fno-var-tracking-assignments
- at WITH_BUILD_TESTS_TRUE@am__append_48 = test_build_libcommon \
+ at COMPILER_HAS_VTA_TRUE@am__append_48 = -fno-var-tracking-assignments
+ at WITH_BUILD_TESTS_TRUE@am__append_49 = test_build_libcommon \
@WITH_BUILD_TESTS_TRUE@ test_build_librados test_build_librgw \
@WITH_BUILD_TESTS_TRUE@ test_build_libcephfs
- at LINUX_TRUE@am__append_49 = ceph_kvstorebench \
+ at LINUX_TRUE@am__append_50 = ceph_kvstorebench \
@LINUX_TRUE@ ceph_test_rados_list_parallel \
@LINUX_TRUE@ ceph_test_rados_open_pools_parallel \
@LINUX_TRUE@ ceph_test_rados_delete_pools_parallel \
@LINUX_TRUE@ ceph_test_rados_watch_notify
- at LINUX_TRUE@am__append_50 = libsystest.la
- at LINUX_TRUE@am__append_51 = -ldl
- at WITH_RADOSGW_TRUE@am__append_52 = ceph_test_cors \
+ at LINUX_TRUE@am__append_51 = libsystest.la
+ at LINUX_TRUE@am__append_52 = -ldl
+ at WITH_RADOSGW_TRUE@am__append_53 = ceph_test_cors \
@WITH_RADOSGW_TRUE@ ceph_test_rgw_manifest \
@WITH_RADOSGW_TRUE@ ceph_test_cls_rgw_meta \
@WITH_RADOSGW_TRUE@ ceph_test_cls_rgw_log \
@WITH_RADOSGW_TRUE@ ceph_test_cls_rgw_opstate
- at LINUX_TRUE@am__append_53 = ceph_test_librbd_fsx
- at WITH_RADOSGW_TRUE@am__append_54 = ceph_test_cls_rgw
- at LINUX_TRUE@am__append_55 = ceph_test_objectstore
- at LINUX_TRUE@am__append_56 = -ldl
+ at LINUX_TRUE@am__append_54 = ceph_test_librbd_fsx
+ at WITH_RADOSGW_TRUE@am__append_55 = ceph_test_cls_rgw
+ at LINUX_TRUE@am__append_56 = ceph_test_objectstore
@LINUX_TRUE at am__append_57 = -ldl
- at WITH_REST_BENCH_TRUE@am__append_58 = rest-bench
- at WITH_REST_BENCH_TRUE@@WITH_SYSTEM_LIBS3_TRUE at am__append_59 = -ls3
- at WITH_REST_BENCH_TRUE@@WITH_SYSTEM_LIBS3_FALSE at am__append_60 = libs3/build/lib/libs3.a -lcurl -lxml2
- at WITH_REST_BENCH_TRUE@@WITH_SYSTEM_LIBS3_FALSE at am__append_61 = libs3
- at LINUX_TRUE@am__append_62 = mount.ceph
- at LINUX_TRUE@am__append_63 = rbd
- at WITH_FUSE_TRUE@am__append_64 = ceph-fuse rbd-fuse
- at ENABLE_CEPHFS_JAVA_TRUE@am__append_65 = libcephfs_jni.la
+ at LINUX_TRUE@am__append_58 = -ldl
+ at WITH_REST_BENCH_TRUE@am__append_59 = rest-bench
+ at WITH_REST_BENCH_TRUE@@WITH_SYSTEM_LIBS3_TRUE at am__append_60 = -ls3
+ at WITH_REST_BENCH_TRUE@@WITH_SYSTEM_LIBS3_FALSE at am__append_61 = libs3/build/lib/libs3.a -lcurl -lxml2
+ at WITH_REST_BENCH_TRUE@@WITH_SYSTEM_LIBS3_FALSE at am__append_62 = libs3
+ at LINUX_TRUE@am__append_63 = mount.ceph
+ at LINUX_TRUE@am__append_64 = rbd
+ at WITH_FUSE_TRUE@am__append_65 = ceph-fuse rbd-fuse
+ at ENABLE_CEPHFS_JAVA_TRUE@am__append_66 = libcephfs_jni.la
subdir = src
ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
am__aclocal_m4_deps = $(top_srcdir)/m4/ac_check_classpath.m4 \
@@ -357,6 +360,19 @@ libcephfs_jni_la_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
$(AM_CXXFLAGS) $(CXXFLAGS) $(libcephfs_jni_la_LDFLAGS) \
$(LDFLAGS) -o $@
@ENABLE_CEPHFS_JAVA_TRUE at am_libcephfs_jni_la_rpath = -rpath $(libdir)
+libcivetweb_la_LIBADD =
+am__libcivetweb_la_SOURCES_DIST = rgw/rgw_civetweb.cc \
+ rgw/rgw_civetweb_log.cc civetweb/src/civetweb.c
+ at WITH_RADOSGW_TRUE@am_libcivetweb_la_OBJECTS = \
+ at WITH_RADOSGW_TRUE@ rgw/libcivetweb_la-rgw_civetweb.lo \
+ at WITH_RADOSGW_TRUE@ rgw/libcivetweb_la-rgw_civetweb_log.lo \
+ at WITH_RADOSGW_TRUE@ civetweb/src/libcivetweb_la-civetweb.lo
+libcivetweb_la_OBJECTS = $(am_libcivetweb_la_OBJECTS)
+libcivetweb_la_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
+ $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
+ $(libcivetweb_la_CXXFLAGS) $(CXXFLAGS) $(AM_LDFLAGS) \
+ $(LDFLAGS) -o $@
+ at WITH_RADOSGW_TRUE@am_libcivetweb_la_rpath =
libclient_la_DEPENDENCIES = $(LIBOSDC) $(am__DEPENDENCIES_1)
am_libclient_la_OBJECTS = client/Client.lo client/Inode.lo \
client/Dentry.lo client/MetaRequest.lo \
@@ -509,7 +525,7 @@ am_libcommon_la_OBJECTS = ceph_ver.lo common/DecayCounter.lo \
common/bloom_filter.lo common/linux_version.lo mon/MonCap.lo \
mon/MonClient.lo mon/MonMap.lo osd/OSDMap.lo osd/osd_types.lo \
osd/ECMsgTypes.lo osd/HitSet.lo mds/MDSMap.lo \
- mds/inode_backtrace.lo mds/mdstypes.lo
+ mds/inode_backtrace.lo mds/mdstypes.lo mds/flock.lo
libcommon_la_OBJECTS = $(am_libcommon_la_OBJECTS)
libcommon_crc_la_LIBADD =
am__libcommon_crc_la_SOURCES_DIST = common/sctp_crc32.c \
@@ -707,11 +723,11 @@ am_liblog_la_OBJECTS = log/Log.lo log/SubsystemMap.lo
liblog_la_OBJECTS = $(am_liblog_la_OBJECTS)
libmds_la_DEPENDENCIES = $(LIBOSDC)
am_libmds_la_OBJECTS = mds/Anchor.lo mds/Capability.lo mds/Dumper.lo \
- mds/Resetter.lo mds/MDS.lo mds/flock.lo mds/locks.lo \
- mds/journal.lo mds/Server.lo mds/Mutation.lo mds/MDCache.lo \
- mds/Locker.lo mds/Migrator.lo mds/MDBalancer.lo mds/CDentry.lo \
- mds/CDir.lo mds/CInode.lo mds/LogEvent.lo mds/MDSTable.lo \
- mds/InoTable.lo mds/MDSTableClient.lo mds/MDSTableServer.lo \
+ mds/Resetter.lo mds/MDS.lo mds/locks.lo mds/journal.lo \
+ mds/Server.lo mds/Mutation.lo mds/MDCache.lo mds/Locker.lo \
+ mds/Migrator.lo mds/MDBalancer.lo mds/CDentry.lo mds/CDir.lo \
+ mds/CInode.lo mds/LogEvent.lo mds/MDSTable.lo mds/InoTable.lo \
+ mds/MDSTableClient.lo mds/MDSTableServer.lo \
mds/AnchorServer.lo mds/AnchorClient.lo mds/SnapRealm.lo \
mds/SnapServer.lo mds/snap.lo mds/SessionMap.lo mds/MDLog.lo \
mds/MDSUtility.lo
@@ -1073,6 +1089,13 @@ ceph_erasure_code_benchmark_OBJECTS = \
ceph_erasure_code_benchmark_DEPENDENCIES = $(am__DEPENDENCIES_10) \
$(LIBCOMMON) $(am__DEPENDENCIES_1) $(am__DEPENDENCIES_6) \
$(am__DEPENDENCIES_1)
+am_ceph_erasure_code_non_regression_OBJECTS = \
+ test/erasure-code/ceph_erasure_code_non_regression.$(OBJEXT)
+ceph_erasure_code_non_regression_OBJECTS = \
+ $(am_ceph_erasure_code_non_regression_OBJECTS)
+ceph_erasure_code_non_regression_DEPENDENCIES = \
+ $(am__DEPENDENCIES_10) $(LIBCOMMON) $(am__DEPENDENCIES_1) \
+ $(am__DEPENDENCIES_6) $(am__DEPENDENCIES_1)
am_ceph_filestore_dump_OBJECTS = tools/ceph_filestore_dump.$(OBJEXT)
ceph_filestore_dump_OBJECTS = $(am_ceph_filestore_dump_OBJECTS)
ceph_filestore_dump_DEPENDENCIES = $(am__DEPENDENCIES_10) \
@@ -1774,8 +1797,7 @@ am__radosgw_SOURCES_DIST = rgw/rgw_resolve.cc rgw/rgw_rest.cc \
rgw/rgw_rest_log.cc rgw/rgw_rest_opstate.cc \
rgw/rgw_rest_replica_log.cc rgw/rgw_rest_config.cc \
rgw/rgw_http_client.cc rgw/rgw_swift.cc rgw/rgw_swift_auth.cc \
- rgw/rgw_loadgen.cc rgw/rgw_civetweb.cc civetweb/src/civetweb.c \
- rgw/rgw_main.cc
+ rgw/rgw_loadgen.cc rgw/rgw_main.cc
@WITH_RADOSGW_TRUE at am_radosgw_OBJECTS = rgw/rgw_resolve.$(OBJEXT) \
@WITH_RADOSGW_TRUE@ rgw/rgw_rest.$(OBJEXT) \
@WITH_RADOSGW_TRUE@ rgw/rgw_rest_swift.$(OBJEXT) \
@@ -1793,11 +1815,9 @@ am__radosgw_SOURCES_DIST = rgw/rgw_resolve.cc rgw/rgw_rest.cc \
@WITH_RADOSGW_TRUE@ rgw/rgw_swift.$(OBJEXT) \
@WITH_RADOSGW_TRUE@ rgw/rgw_swift_auth.$(OBJEXT) \
@WITH_RADOSGW_TRUE@ rgw/rgw_loadgen.$(OBJEXT) \
- at WITH_RADOSGW_TRUE@ rgw/rgw_civetweb.$(OBJEXT) \
- at WITH_RADOSGW_TRUE@ civetweb/src/radosgw-civetweb.$(OBJEXT) \
@WITH_RADOSGW_TRUE@ rgw/rgw_main.$(OBJEXT)
radosgw_OBJECTS = $(am_radosgw_OBJECTS)
- at WITH_RADOSGW_TRUE@radosgw_DEPENDENCIES = $(LIBRGW) \
+ at WITH_RADOSGW_TRUE@radosgw_DEPENDENCIES = $(LIBRGW) $(LIBCIVETWEB) \
@WITH_RADOSGW_TRUE@ $(am__DEPENDENCIES_12) \
@WITH_RADOSGW_TRUE@ $(am__DEPENDENCIES_1) $(am__DEPENDENCIES_6)
am__radosgw_admin_SOURCES_DIST = rgw/rgw_admin.cc
@@ -1876,7 +1896,7 @@ am__test_build_libcommon_SOURCES_DIST = test/buildtest_skeleton.cc \
common/bloom_filter.cc common/linux_version.c mon/MonCap.cc \
mon/MonClient.cc mon/MonMap.cc osd/OSDMap.cc osd/osd_types.cc \
osd/ECMsgTypes.cc osd/HitSet.cc mds/MDSMap.cc \
- mds/inode_backtrace.cc mds/mdstypes.cc
+ mds/inode_backtrace.cc mds/mdstypes.cc mds/flock.cc
am__objects_15 = test_build_libcommon-ceph_ver.$(OBJEXT) \
common/test_build_libcommon-DecayCounter.$(OBJEXT) \
common/test_build_libcommon-LogClient.$(OBJEXT) \
@@ -1958,7 +1978,8 @@ am__objects_15 = test_build_libcommon-ceph_ver.$(OBJEXT) \
osd/test_build_libcommon-HitSet.$(OBJEXT) \
mds/test_build_libcommon-MDSMap.$(OBJEXT) \
mds/test_build_libcommon-inode_backtrace.$(OBJEXT) \
- mds/test_build_libcommon-mdstypes.$(OBJEXT)
+ mds/test_build_libcommon-mdstypes.$(OBJEXT) \
+ mds/test_build_libcommon-flock.$(OBJEXT)
@WITH_BUILD_TESTS_TRUE at am_test_build_libcommon_OBJECTS = test/test_build_libcommon-buildtest_skeleton.$(OBJEXT) \
@WITH_BUILD_TESTS_TRUE@ $(am__objects_15)
test_build_libcommon_OBJECTS = $(am_test_build_libcommon_OBJECTS)
@@ -2356,6 +2377,15 @@ unittest_hitset_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
$(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
$(unittest_hitset_CXXFLAGS) $(CXXFLAGS) $(AM_LDFLAGS) \
$(LDFLAGS) -o $@
+am_unittest_io_priority_OBJECTS = \
+ test/common/unittest_io_priority-test_io_priority.$(OBJEXT)
+unittest_io_priority_OBJECTS = $(am_unittest_io_priority_OBJECTS)
+unittest_io_priority_DEPENDENCIES = $(am__DEPENDENCIES_13) \
+ $(am__DEPENDENCIES_6)
+unittest_io_priority_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
+ $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
+ $(unittest_io_priority_CXXFLAGS) $(CXXFLAGS) $(AM_LDFLAGS) \
+ $(LDFLAGS) -o $@
am_unittest_ipaddr_OBJECTS = \
test/unittest_ipaddr-test_ipaddr.$(OBJEXT)
unittest_ipaddr_OBJECTS = $(am_unittest_ipaddr_OBJECTS)
@@ -2668,11 +2698,12 @@ SOURCES = $(libcls_log_client_a_SOURCES) \
$(libcls_version_client_a_SOURCES) $(libos_zfs_a_SOURCES) \
$(libarch_la_SOURCES) $(libauth_la_SOURCES) \
$(libcephfs_la_SOURCES) $(libcephfs_jni_la_SOURCES) \
- $(libclient_la_SOURCES) $(libclient_fuse_la_SOURCES) \
- $(libcls_hello_la_SOURCES) $(libcls_kvs_la_SOURCES) \
- $(libcls_lock_la_SOURCES) $(libcls_lock_client_la_SOURCES) \
- $(libcls_log_la_SOURCES) $(libcls_rbd_la_SOURCES) \
- $(libcls_rbd_client_la_SOURCES) $(libcls_refcount_la_SOURCES) \
+ $(libcivetweb_la_SOURCES) $(libclient_la_SOURCES) \
+ $(libclient_fuse_la_SOURCES) $(libcls_hello_la_SOURCES) \
+ $(libcls_kvs_la_SOURCES) $(libcls_lock_la_SOURCES) \
+ $(libcls_lock_client_la_SOURCES) $(libcls_log_la_SOURCES) \
+ $(libcls_rbd_la_SOURCES) $(libcls_rbd_client_la_SOURCES) \
+ $(libcls_refcount_la_SOURCES) \
$(libcls_refcount_client_la_SOURCES) \
$(libcls_replica_log_la_SOURCES) $(libcls_rgw_la_SOURCES) \
$(libcls_rgw_client_la_SOURCES) $(libcls_statelog_la_SOURCES) \
@@ -2707,6 +2738,7 @@ SOURCES = $(libcls_log_client_a_SOURCES) \
$(ceph_syn_SOURCES) $(ceph_bench_log_SOURCES) \
$(ceph_dupstore_SOURCES) $(ceph_erasure_code_SOURCES) \
$(ceph_erasure_code_benchmark_SOURCES) \
+ $(ceph_erasure_code_non_regression_SOURCES) \
$(ceph_filestore_dump_SOURCES) $(ceph_filestore_tool_SOURCES) \
$(ceph_kvstorebench_SOURCES) \
$(ceph_mon_store_converter_SOURCES) \
@@ -2794,8 +2826,8 @@ SOURCES = $(libcls_log_client_a_SOURCES) \
$(unittest_escape_SOURCES) $(unittest_flatindex_SOURCES) \
$(unittest_formatter_SOURCES) $(unittest_gather_SOURCES) \
$(unittest_heartbeatmap_SOURCES) $(unittest_histogram_SOURCES) \
- $(unittest_hitset_SOURCES) $(unittest_ipaddr_SOURCES) \
- $(unittest_lfnindex_SOURCES) \
+ $(unittest_hitset_SOURCES) $(unittest_io_priority_SOURCES) \
+ $(unittest_ipaddr_SOURCES) $(unittest_lfnindex_SOURCES) \
$(unittest_libcephfs_config_SOURCES) \
$(unittest_librados_SOURCES) \
$(unittest_librados_config_SOURCES) $(unittest_log_SOURCES) \
@@ -2821,7 +2853,8 @@ DIST_SOURCES = $(libcls_log_client_a_SOURCES) \
$(libcls_version_client_a_SOURCES) \
$(am__libos_zfs_a_SOURCES_DIST) $(libarch_la_SOURCES) \
$(libauth_la_SOURCES) $(libcephfs_la_SOURCES) \
- $(am__libcephfs_jni_la_SOURCES_DIST) $(libclient_la_SOURCES) \
+ $(am__libcephfs_jni_la_SOURCES_DIST) \
+ $(am__libcivetweb_la_SOURCES_DIST) $(libclient_la_SOURCES) \
$(am__libclient_fuse_la_SOURCES_DIST) \
$(libcls_hello_la_SOURCES) $(am__libcls_kvs_la_SOURCES_DIST) \
$(libcls_lock_la_SOURCES) $(libcls_lock_client_la_SOURCES) \
@@ -2862,6 +2895,7 @@ DIST_SOURCES = $(libcls_log_client_a_SOURCES) \
$(ceph_bench_log_SOURCES) $(ceph_dupstore_SOURCES) \
$(ceph_erasure_code_SOURCES) \
$(ceph_erasure_code_benchmark_SOURCES) \
+ $(ceph_erasure_code_non_regression_SOURCES) \
$(ceph_filestore_dump_SOURCES) $(ceph_filestore_tool_SOURCES) \
$(am__ceph_kvstorebench_SOURCES_DIST) \
$(ceph_mon_store_converter_SOURCES) \
@@ -2955,8 +2989,8 @@ DIST_SOURCES = $(libcls_log_client_a_SOURCES) \
$(unittest_escape_SOURCES) $(unittest_flatindex_SOURCES) \
$(unittest_formatter_SOURCES) $(unittest_gather_SOURCES) \
$(unittest_heartbeatmap_SOURCES) $(unittest_histogram_SOURCES) \
- $(unittest_hitset_SOURCES) $(unittest_ipaddr_SOURCES) \
- $(unittest_lfnindex_SOURCES) \
+ $(unittest_hitset_SOURCES) $(unittest_io_priority_SOURCES) \
+ $(unittest_ipaddr_SOURCES) $(unittest_lfnindex_SOURCES) \
$(unittest_libcephfs_config_SOURCES) \
$(unittest_librados_SOURCES) \
$(unittest_librados_config_SOURCES) $(unittest_log_SOURCES) \
@@ -3219,8 +3253,9 @@ am__noinst_HEADERS_DIST = arch/intel.h arch/neon.h arch/probe.h \
rgw/rgw_rest_log.h rgw/rgw_rest_opstate.h \
rgw/rgw_rest_replica_log.h rgw/rgw_rest_config.h \
rgw/rgw_usage.h rgw/rgw_user.h rgw/rgw_bucket.h \
- rgw/rgw_keystone.h rgw/rgw_civetweb.h civetweb/civetweb.h \
- civetweb/include/civetweb.h civetweb/src/md5.h \
+ rgw/rgw_keystone.h rgw/rgw_civetweb.h rgw/rgw_civetweb_log.h \
+ civetweb/civetweb.h civetweb/include/civetweb.h \
+ civetweb/include/civetweb_conf.h civetweb/src/md5.h \
cls/lock/cls_lock_types.h cls/lock/cls_lock_ops.h \
cls/lock/cls_lock_client.h cls/rbd/cls_rbd.h \
cls/rbd/cls_rbd_client.h cls/refcount/cls_refcount_ops.h \
@@ -3485,7 +3520,7 @@ top_build_prefix = @top_build_prefix@
top_builddir = @top_builddir@
top_srcdir = @top_srcdir@
AUTOMAKE_OPTIONS = gnu subdir-objects
-SUBDIRS = ocf java $(am__append_61)
+SUBDIRS = ocf java $(am__append_62)
DIST_SUBDIRS = gtest ocf libs3 java
BUILT_SOURCES = init-ceph
@@ -3763,8 +3798,9 @@ noinst_HEADERS = arch/intel.h arch/neon.h arch/probe.h \
rgw/rgw_rest_log.h rgw/rgw_rest_opstate.h \
rgw/rgw_rest_replica_log.h rgw/rgw_rest_config.h \
rgw/rgw_usage.h rgw/rgw_user.h rgw/rgw_bucket.h \
- rgw/rgw_keystone.h rgw/rgw_civetweb.h civetweb/civetweb.h \
- civetweb/include/civetweb.h civetweb/src/md5.h \
+ rgw/rgw_keystone.h rgw/rgw_civetweb.h rgw/rgw_civetweb_log.h \
+ civetweb/civetweb.h civetweb/include/civetweb.h \
+ civetweb/include/civetweb_conf.h civetweb/src/md5.h \
cls/lock/cls_lock_types.h cls/lock/cls_lock_ops.h \
cls/lock/cls_lock_client.h cls/rbd/cls_rbd.h \
cls/rbd/cls_rbd_client.h cls/refcount/cls_refcount_ops.h \
@@ -3819,7 +3855,7 @@ bin_SCRIPTS = brag/client/ceph-brag ceph ceph-run ceph-rest-api \
sbin_SCRIPTS =
su_sbin_SCRIPTS = mount.fuse.ceph mkcephfs
dist_bin_SCRIPTS =
-lib_LTLIBRARIES = librados.la librbd.la libcephfs.la $(am__append_65)
+lib_LTLIBRARIES = librados.la librbd.la libcephfs.la $(am__append_66)
noinst_LTLIBRARIES = libarch.la libauth.la libcrush.la libmon_types.la \
libmon.la libmds.la libos_types.la libos.la libosd_types.la \
libosd.la liberasure_code.la libosdc.la libclient.la \
@@ -3827,7 +3863,7 @@ noinst_LTLIBRARIES = libarch.la libauth.la libcrush.la libmon_types.la \
libperfglue.la libcommon_crc.la libcommon.la libmsg.la \
$(am__append_35) libcls_lock_client.la \
libcls_refcount_client.la libcls_rgw_client.la \
- libcls_rbd_client.la $(am__append_50) libradostest.la
+ libcls_rbd_client.la $(am__append_51) libradostest.la
noinst_LIBRARIES = $(am__append_17) libcls_version_client.a \
libcls_log_client.a libcls_statelog_client.a \
libcls_replica_log_client.a libcls_user_client.a
@@ -3842,14 +3878,14 @@ bin_DEBUGPROGRAMS = ceph_test_ioctls $(am__append_38) \
ceph_test_signal_handlers ceph_test_rados ceph_test_mutate \
ceph_test_rewrite_latency ceph_test_msgr ceph_streamtest \
ceph_test_trans ceph_test_crypto ceph_test_keys \
- $(am__append_48) ceph_smalliobench ceph_smalliobenchfs \
+ $(am__append_49) ceph_smalliobench ceph_smalliobenchfs \
ceph_smalliobenchdumb ceph_smalliobenchrbd ceph_tpbench \
- ceph_omapbench $(am__append_49) ceph_bench_log \
- $(am__append_52) ceph_multi_stress_watch ceph_test_librbd \
- $(am__append_53) ceph_test_cls_rbd ceph_test_cls_refcount \
+ ceph_omapbench $(am__append_50) ceph_bench_log \
+ $(am__append_53) ceph_multi_stress_watch ceph_test_librbd \
+ $(am__append_54) ceph_test_cls_rbd ceph_test_cls_refcount \
ceph_test_cls_version ceph_test_cls_log ceph_test_cls_statelog \
ceph_test_cls_replica_log ceph_test_cls_lock \
- ceph_test_cls_hello $(am__append_54) ceph_test_mon_workloadgen \
+ ceph_test_cls_hello $(am__append_55) ceph_test_mon_workloadgen \
ceph_test_rados_api_cmd ceph_test_rados_api_io \
ceph_test_rados_api_c_write_operations \
ceph_test_rados_api_c_read_operations ceph_test_rados_api_aio \
@@ -3857,7 +3893,7 @@ bin_DEBUGPROGRAMS = ceph_test_ioctls $(am__append_38) \
ceph_test_rados_api_stat ceph_test_rados_api_watch_notify \
ceph_test_rados_api_snapshots ceph_test_rados_api_cls \
ceph_test_rados_api_misc ceph_test_rados_api_tier \
- ceph_test_rados_api_lock ceph_test_libcephfs $(am__append_55) \
+ ceph_test_rados_api_lock ceph_test_libcephfs $(am__append_56) \
ceph_test_objectstore_workloadgen \
ceph_test_filestore_idempotent \
ceph_test_filestore_idempotent_sequence ceph_xattr_bench \
@@ -3951,6 +3987,7 @@ LIBCLIENT = libclient.la
LIBCLIENT_FUSE = libclient_fuse.la
LIBRADOS = librados.la
LIBRGW = librgw.la
+LIBCIVETWEB = libcivetweb.la
LIBRBD = librbd.la
LIBCEPHFS = libcephfs.la
LIBERASURE_CODE = liberasure_code.la
@@ -4031,7 +4068,6 @@ libmds_la_SOURCES = \
mds/Dumper.cc \
mds/Resetter.cc \
mds/MDS.cc \
- mds/flock.cc \
mds/locks.c \
mds/journal.cc \
mds/Server.cc \
@@ -4265,7 +4301,7 @@ libcommon_la_SOURCES = ceph_ver.c common/DecayCounter.cc \
common/bloom_filter.cc common/linux_version.c mon/MonCap.cc \
mon/MonClient.cc mon/MonMap.cc osd/OSDMap.cc osd/osd_types.cc \
osd/ECMsgTypes.cc osd/HitSet.cc mds/MDSMap.cc \
- mds/inode_backtrace.cc mds/mdstypes.cc
+ mds/inode_backtrace.cc mds/mdstypes.cc mds/flock.cc
# inject crc in common
libcommon_crc_la_SOURCES = common/sctp_crc32.c common/crc32c.cc \
@@ -4365,6 +4401,14 @@ librbd_la_LDFLAGS = ${AM_LDFLAGS} -version-info 1:0:0 $(am__append_34)
@WITH_RADOSGW_TRUE@ rgw/rgw_dencoder.cc
@WITH_RADOSGW_TRUE at librgw_la_CXXFLAGS = -Woverloaded-virtual ${AM_CXXFLAGS}
+ at WITH_RADOSGW_TRUE@CIVETWEB_INCLUDE = --include civetweb/include/civetweb_conf.h
+ at WITH_RADOSGW_TRUE@libcivetweb_la_SOURCES = \
+ at WITH_RADOSGW_TRUE@ rgw/rgw_civetweb.cc \
+ at WITH_RADOSGW_TRUE@ rgw/rgw_civetweb_log.cc \
+ at WITH_RADOSGW_TRUE@ civetweb/src/civetweb.c
+
+ at WITH_RADOSGW_TRUE@libcivetweb_la_CXXFLAGS = ${CIVETWEB_INCLUDE} -Woverloaded-virtual ${AM_CXXFLAGS}
+ at WITH_RADOSGW_TRUE@libcivetweb_la_CFLAGS = -Icivetweb/include ${CIVETWEB_INCLUDE}
@WITH_RADOSGW_TRUE at radosgw_SOURCES = \
@WITH_RADOSGW_TRUE@ rgw/rgw_resolve.cc \
@WITH_RADOSGW_TRUE@ rgw/rgw_rest.cc \
@@ -4383,12 +4427,10 @@ librbd_la_LDFLAGS = ${AM_LDFLAGS} -version-info 1:0:0 $(am__append_34)
@WITH_RADOSGW_TRUE@ rgw/rgw_swift.cc \
@WITH_RADOSGW_TRUE@ rgw/rgw_swift_auth.cc \
@WITH_RADOSGW_TRUE@ rgw/rgw_loadgen.cc \
- at WITH_RADOSGW_TRUE@ rgw/rgw_civetweb.cc \
- at WITH_RADOSGW_TRUE@ civetweb/src/civetweb.c \
@WITH_RADOSGW_TRUE@ rgw/rgw_main.cc
- at WITH_RADOSGW_TRUE@radosgw_CFLAGS = -Icivetweb/include
- at WITH_RADOSGW_TRUE@radosgw_LDADD = $(LIBRGW) $(LIBRGW_DEPS) $(RESOLV_LIBS) $(CEPH_GLOBAL)
+ at WITH_RADOSGW_TRUE@radosgw_CFLAGS = -I$(srcdir)/civetweb/include
+ at WITH_RADOSGW_TRUE@radosgw_LDADD = $(LIBRGW) $(LIBCIVETWEB) $(LIBRGW_DEPS) $(RESOLV_LIBS) $(CEPH_GLOBAL)
@WITH_RADOSGW_TRUE at radosgw_admin_SOURCES = rgw/rgw_admin.cc
@WITH_RADOSGW_TRUE at radosgw_admin_LDADD = $(LIBRGW) $(LIBRGW_DEPS) $(CEPH_GLOBAL)
@WITH_RADOSGW_TRUE at ceph_rgw_multiparser_SOURCES = rgw/rgw_multiparser.cc
@@ -4477,11 +4519,16 @@ ceph_erasure_code_benchmark_SOURCES = \
ceph_erasure_code_benchmark_LDADD = $(LIBOSD) $(LIBCOMMON) \
$(BOOST_PROGRAM_OPTIONS_LIBS) $(CEPH_GLOBAL) $(am__append_41)
+ceph_erasure_code_non_regression_SOURCES = \
+ test/erasure-code/ceph_erasure_code_non_regression.cc
+
+ceph_erasure_code_non_regression_LDADD = $(LIBOSD) $(LIBCOMMON) \
+ $(BOOST_PROGRAM_OPTIONS_LIBS) $(CEPH_GLOBAL) $(am__append_42)
ceph_erasure_code_SOURCES = \
test/erasure-code/ceph_erasure_code.cc
ceph_erasure_code_LDADD = $(LIBOSD) $(LIBCOMMON) \
- $(BOOST_PROGRAM_OPTIONS_LIBS) $(CEPH_GLOBAL) $(am__append_42)
+ $(BOOST_PROGRAM_OPTIONS_LIBS) $(CEPH_GLOBAL) $(am__append_43)
libec_example_la_SOURCES = test/erasure-code/ErasureCodePluginExample.cc
libec_example_la_CFLAGS = ${AM_CFLAGS}
libec_example_la_CXXFLAGS = ${AM_CXXFLAGS}
@@ -4525,7 +4572,7 @@ libec_test_jerasure_generic_la_LDFLAGS = ${AM_LDFLAGS} -export-symbols-regex '.*
unittest_erasure_code_plugin_SOURCES = test/erasure-code/TestErasureCodePlugin.cc
unittest_erasure_code_plugin_CXXFLAGS = $(UNITTEST_CXXFLAGS)
unittest_erasure_code_plugin_LDADD = $(LIBOSD) $(LIBCOMMON) \
- $(UNITTEST_LDADD) $(CEPH_GLOBAL) $(am__append_43)
+ $(UNITTEST_LDADD) $(CEPH_GLOBAL) $(am__append_44)
unittest_erasure_code_jerasure_SOURCES = \
test/erasure-code/TestErasureCodeJerasure.cc \
${jerasure_sources}
@@ -4539,13 +4586,13 @@ unittest_erasure_code_jerasure_CXXFLAGS = $(UNITTEST_CXXFLAGS) \
-Ierasure-code/jerasure/jerasure/include
unittest_erasure_code_jerasure_LDADD = $(LIBOSD) $(LIBCOMMON) \
- $(UNITTEST_LDADD) $(CEPH_GLOBAL) $(am__append_44)
+ $(UNITTEST_LDADD) $(CEPH_GLOBAL) $(am__append_45)
unittest_erasure_code_plugin_jerasure_SOURCES = \
test/erasure-code/TestErasureCodePluginJerasure.cc
unittest_erasure_code_plugin_jerasure_CXXFLAGS = ${AM_CXXFLAGS} ${UNITTEST_CXXFLAGS}
unittest_erasure_code_plugin_jerasure_LDADD = $(LIBOSD) $(LIBCOMMON) \
- $(UNITTEST_LDADD) $(CEPH_GLOBAL) $(am__append_45)
+ $(UNITTEST_LDADD) $(CEPH_GLOBAL) $(am__append_46)
unittest_erasure_code_example_SOURCES = test/erasure-code/TestErasureCodeExample.cc
unittest_erasure_code_example_CXXFLAGS = $(UNITTEST_CXXFLAGS)
unittest_erasure_code_example_LDADD = $(LIBOSD) $(LIBCOMMON) $(UNITTEST_LDADD) $(CEPH_GLOBAL)
@@ -4588,8 +4635,8 @@ ceph_dencoder_LDADD = \
# These should always use explicit _CFLAGS/_CXXFLAGS so avoid basename conflicts
-ceph_dencoder_CFLAGS = ${AM_CFLAGS} $(am__append_46)
-ceph_dencoder_CXXFLAGS = ${AM_CXXFLAGS} $(am__append_47)
+ceph_dencoder_CFLAGS = ${AM_CFLAGS} $(am__append_47)
+ceph_dencoder_CXXFLAGS = ${AM_CXXFLAGS} $(am__append_48)
get_command_descriptions_SOURCES = test/common/get_command_descriptions.cc
get_command_descriptions_LDADD = $(LIBMON) $(LIBCOMMON) $(CEPH_GLOBAL)
@@ -4792,13 +4839,16 @@ unittest_osd_types_LDADD = $(UNITTEST_LDADD) $(CEPH_GLOBAL)
unittest_pglog_SOURCES = test/osd/TestPGLog.cc
unittest_pglog_CXXFLAGS = $(UNITTEST_CXXFLAGS)
unittest_pglog_LDADD = $(LIBOSD) $(UNITTEST_LDADD) $(CEPH_GLOBAL) \
- $(am__append_51)
+ $(am__append_52)
unittest_ecbackend_SOURCES = test/osd/TestECBackend.cc
unittest_ecbackend_CXXFLAGS = $(UNITTEST_CXXFLAGS)
unittest_ecbackend_LDADD = $(LIBOSD) $(UNITTEST_LDADD) $(CEPH_GLOBAL)
unittest_hitset_SOURCES = test/osd/hitset.cc
unittest_hitset_CXXFLAGS = $(UNITTEST_CXXFLAGS)
unittest_hitset_LDADD = $(LIBOSD) $(UNITTEST_LDADD) $(CEPH_GLOBAL)
+unittest_io_priority_SOURCES = test/common/test_io_priority.cc
+unittest_io_priority_CXXFLAGS = $(UNITTEST_CXXFLAGS)
+unittest_io_priority_LDADD = $(UNITTEST_LDADD) $(CEPH_GLOBAL)
unittest_gather_SOURCES = test/gather.cc
unittest_gather_LDADD = $(UNITTEST_LDADD) $(CEPH_GLOBAL)
unittest_gather_CXXFLAGS = $(UNITTEST_CXXFLAGS)
@@ -5136,10 +5186,10 @@ ceph_kvstore_tool_LDADD = $(LIBOS) $(CEPH_GLOBAL)
ceph_kvstore_tool_CXXFLAGS = $(UNITTEST_CXXFLAGS)
ceph_filestore_tool_SOURCES = tools/ceph_filestore_tool.cc
ceph_filestore_tool_LDADD = $(LIBOSD) $(LIBOS) $(CEPH_GLOBAL) \
- -lboost_program_options $(am__append_56)
+ -lboost_program_options $(am__append_57)
ceph_filestore_dump_SOURCES = tools/ceph_filestore_dump.cc
ceph_filestore_dump_LDADD = $(LIBOSD) $(LIBOS) $(CEPH_GLOBAL) \
- $(BOOST_PROGRAM_OPTIONS_LIBS) $(am__append_57)
+ $(BOOST_PROGRAM_OPTIONS_LIBS) $(am__append_58)
monmaptool_SOURCES = tools/monmaptool.cc
monmaptool_LDADD = $(CEPH_GLOBAL) $(LIBCOMMON)
crushtool_SOURCES = tools/crushtool.cc
@@ -5167,7 +5217,7 @@ rados_LDADD = libcls_lock_client.la $(LIBRADOS) $(CEPH_GLOBAL)
@WITH_REST_BENCH_TRUE@ common/obj_bencher.cc # needs cleanup so \
@WITH_REST_BENCH_TRUE@ it can go in libcommon.la
@WITH_REST_BENCH_TRUE at rest_bench_LDADD = $(CEPH_GLOBAL) \
- at WITH_REST_BENCH_TRUE@ $(am__append_59) $(am__append_60)
+ at WITH_REST_BENCH_TRUE@ $(am__append_60) $(am__append_61)
@WITH_REST_BENCH_TRUE@@WITH_SYSTEM_LIBS3_FALSE at rest_bench_CXXFLAGS = ${AM_CXXFLAGS} -I$(top_srcdir)/src/libs3/inc
ceph_conf_SOURCES = tools/ceph_conf.cc
ceph_conf_LDADD = $(CEPH_GLOBAL) $(LIBCOMMON)
@@ -5600,6 +5650,26 @@ java/native/libcephfs_jni_la-JniConstants.lo: \
java/native/$(DEPDIR)/$(am__dirstamp)
libcephfs_jni.la: $(libcephfs_jni_la_OBJECTS) $(libcephfs_jni_la_DEPENDENCIES) $(EXTRA_libcephfs_jni_la_DEPENDENCIES)
$(AM_V_CXXLD)$(libcephfs_jni_la_LINK) $(am_libcephfs_jni_la_rpath) $(libcephfs_jni_la_OBJECTS) $(libcephfs_jni_la_LIBADD) $(LIBS)
+rgw/$(am__dirstamp):
+ @$(MKDIR_P) rgw
+ @: > rgw/$(am__dirstamp)
+rgw/$(DEPDIR)/$(am__dirstamp):
+ @$(MKDIR_P) rgw/$(DEPDIR)
+ @: > rgw/$(DEPDIR)/$(am__dirstamp)
+rgw/libcivetweb_la-rgw_civetweb.lo: rgw/$(am__dirstamp) \
+ rgw/$(DEPDIR)/$(am__dirstamp)
+rgw/libcivetweb_la-rgw_civetweb_log.lo: rgw/$(am__dirstamp) \
+ rgw/$(DEPDIR)/$(am__dirstamp)
+civetweb/src/$(am__dirstamp):
+ @$(MKDIR_P) civetweb/src
+ @: > civetweb/src/$(am__dirstamp)
+civetweb/src/$(DEPDIR)/$(am__dirstamp):
+ @$(MKDIR_P) civetweb/src/$(DEPDIR)
+ @: > civetweb/src/$(DEPDIR)/$(am__dirstamp)
+civetweb/src/libcivetweb_la-civetweb.lo: civetweb/src/$(am__dirstamp) \
+ civetweb/src/$(DEPDIR)/$(am__dirstamp)
+libcivetweb.la: $(libcivetweb_la_OBJECTS) $(libcivetweb_la_DEPENDENCIES) $(EXTRA_libcivetweb_la_DEPENDENCIES)
+ $(AM_V_CXXLD)$(libcivetweb_la_LINK) $(am_libcivetweb_la_rpath) $(libcivetweb_la_OBJECTS) $(libcivetweb_la_LIBADD) $(LIBS)
client/$(am__dirstamp):
@$(MKDIR_P) client
@: > client/$(am__dirstamp)
@@ -5907,6 +5977,7 @@ mds/MDSMap.lo: mds/$(am__dirstamp) mds/$(DEPDIR)/$(am__dirstamp)
mds/inode_backtrace.lo: mds/$(am__dirstamp) \
mds/$(DEPDIR)/$(am__dirstamp)
mds/mdstypes.lo: mds/$(am__dirstamp) mds/$(DEPDIR)/$(am__dirstamp)
+mds/flock.lo: mds/$(am__dirstamp) mds/$(DEPDIR)/$(am__dirstamp)
libcommon.la: $(libcommon_la_OBJECTS) $(libcommon_la_DEPENDENCIES) $(EXTRA_libcommon_la_DEPENDENCIES)
$(AM_V_CXXLD)$(CXXLINK) $(libcommon_la_OBJECTS) $(libcommon_la_LIBADD) $(LIBS)
common/libcommon_crc_la-sctp_crc32.lo: common/$(am__dirstamp) \
@@ -6232,7 +6303,6 @@ mds/Capability.lo: mds/$(am__dirstamp) mds/$(DEPDIR)/$(am__dirstamp)
mds/Dumper.lo: mds/$(am__dirstamp) mds/$(DEPDIR)/$(am__dirstamp)
mds/Resetter.lo: mds/$(am__dirstamp) mds/$(DEPDIR)/$(am__dirstamp)
mds/MDS.lo: mds/$(am__dirstamp) mds/$(DEPDIR)/$(am__dirstamp)
-mds/flock.lo: mds/$(am__dirstamp) mds/$(DEPDIR)/$(am__dirstamp)
mds/locks.lo: mds/$(am__dirstamp) mds/$(DEPDIR)/$(am__dirstamp)
mds/journal.lo: mds/$(am__dirstamp) mds/$(DEPDIR)/$(am__dirstamp)
mds/Server.lo: mds/$(am__dirstamp) mds/$(DEPDIR)/$(am__dirstamp)
@@ -6477,12 +6547,6 @@ librbd/WatchCtx.lo: librbd/$(am__dirstamp) \
librbd/$(DEPDIR)/$(am__dirstamp)
librbd.la: $(librbd_la_OBJECTS) $(librbd_la_DEPENDENCIES) $(EXTRA_librbd_la_DEPENDENCIES)
$(AM_V_CXXLD)$(librbd_la_LINK) -rpath $(libdir) $(librbd_la_OBJECTS) $(librbd_la_LIBADD) $(LIBS)
-rgw/$(am__dirstamp):
- @$(MKDIR_P) rgw
- @: > rgw/$(am__dirstamp)
-rgw/$(DEPDIR)/$(am__dirstamp):
- @$(MKDIR_P) rgw/$(DEPDIR)
- @: > rgw/$(DEPDIR)/$(am__dirstamp)
rgw/librgw_la-librgw.lo: rgw/$(am__dirstamp) \
rgw/$(DEPDIR)/$(am__dirstamp)
rgw/librgw_la-rgw_acl.lo: rgw/$(am__dirstamp) \
@@ -6817,6 +6881,12 @@ test/erasure-code/ceph_erasure_code_benchmark.$(OBJEXT): \
ceph_erasure_code_benchmark$(EXEEXT): $(ceph_erasure_code_benchmark_OBJECTS) $(ceph_erasure_code_benchmark_DEPENDENCIES) $(EXTRA_ceph_erasure_code_benchmark_DEPENDENCIES)
@rm -f ceph_erasure_code_benchmark$(EXEEXT)
$(AM_V_CXXLD)$(CXXLINK) $(ceph_erasure_code_benchmark_OBJECTS) $(ceph_erasure_code_benchmark_LDADD) $(LIBS)
+test/erasure-code/ceph_erasure_code_non_regression.$(OBJEXT): \
+ test/erasure-code/$(am__dirstamp) \
+ test/erasure-code/$(DEPDIR)/$(am__dirstamp)
+ceph_erasure_code_non_regression$(EXEEXT): $(ceph_erasure_code_non_regression_OBJECTS) $(ceph_erasure_code_non_regression_DEPENDENCIES) $(EXTRA_ceph_erasure_code_non_regression_DEPENDENCIES)
+ @rm -f ceph_erasure_code_non_regression$(EXEEXT)
+ $(AM_V_CXXLD)$(CXXLINK) $(ceph_erasure_code_non_regression_OBJECTS) $(ceph_erasure_code_non_regression_LDADD) $(LIBS)
tools/ceph_filestore_dump.$(OBJEXT): tools/$(am__dirstamp) \
tools/$(DEPDIR)/$(am__dirstamp)
ceph_filestore_dump$(EXEEXT): $(ceph_filestore_dump_OBJECTS) $(ceph_filestore_dump_DEPENDENCIES) $(EXTRA_ceph_filestore_dump_DEPENDENCIES)
@@ -7533,16 +7603,6 @@ rgw/rgw_swift_auth.$(OBJEXT): rgw/$(am__dirstamp) \
rgw/$(DEPDIR)/$(am__dirstamp)
rgw/rgw_loadgen.$(OBJEXT): rgw/$(am__dirstamp) \
rgw/$(DEPDIR)/$(am__dirstamp)
-rgw/rgw_civetweb.$(OBJEXT): rgw/$(am__dirstamp) \
- rgw/$(DEPDIR)/$(am__dirstamp)
-civetweb/src/$(am__dirstamp):
- @$(MKDIR_P) civetweb/src
- @: > civetweb/src/$(am__dirstamp)
-civetweb/src/$(DEPDIR)/$(am__dirstamp):
- @$(MKDIR_P) civetweb/src/$(DEPDIR)
- @: > civetweb/src/$(DEPDIR)/$(am__dirstamp)
-civetweb/src/radosgw-civetweb.$(OBJEXT): civetweb/src/$(am__dirstamp) \
- civetweb/src/$(DEPDIR)/$(am__dirstamp)
rgw/rgw_main.$(OBJEXT): rgw/$(am__dirstamp) \
rgw/$(DEPDIR)/$(am__dirstamp)
radosgw$(EXEEXT): $(radosgw_OBJECTS) $(radosgw_DEPENDENCIES) $(EXTRA_radosgw_DEPENDENCIES)
@@ -7753,6 +7813,8 @@ mds/test_build_libcommon-inode_backtrace.$(OBJEXT): \
mds/$(am__dirstamp) mds/$(DEPDIR)/$(am__dirstamp)
mds/test_build_libcommon-mdstypes.$(OBJEXT): mds/$(am__dirstamp) \
mds/$(DEPDIR)/$(am__dirstamp)
+mds/test_build_libcommon-flock.$(OBJEXT): mds/$(am__dirstamp) \
+ mds/$(DEPDIR)/$(am__dirstamp)
test_build_libcommon$(EXEEXT): $(test_build_libcommon_OBJECTS) $(test_build_libcommon_DEPENDENCIES) $(EXTRA_test_build_libcommon_DEPENDENCIES)
@rm -f test_build_libcommon$(EXEEXT)
$(AM_V_CXXLD)$(test_build_libcommon_LINK) $(test_build_libcommon_OBJECTS) $(test_build_libcommon_LDADD) $(LIBS)
@@ -8082,6 +8144,12 @@ test/osd/unittest_hitset-hitset.$(OBJEXT): test/osd/$(am__dirstamp) \
unittest_hitset$(EXEEXT): $(unittest_hitset_OBJECTS) $(unittest_hitset_DEPENDENCIES) $(EXTRA_unittest_hitset_DEPENDENCIES)
@rm -f unittest_hitset$(EXEEXT)
$(AM_V_CXXLD)$(unittest_hitset_LINK) $(unittest_hitset_OBJECTS) $(unittest_hitset_LDADD) $(LIBS)
+test/common/unittest_io_priority-test_io_priority.$(OBJEXT): \
+ test/common/$(am__dirstamp) \
+ test/common/$(DEPDIR)/$(am__dirstamp)
+unittest_io_priority$(EXEEXT): $(unittest_io_priority_OBJECTS) $(unittest_io_priority_DEPENDENCIES) $(EXTRA_unittest_io_priority_DEPENDENCIES)
+ @rm -f unittest_io_priority$(EXEEXT)
+ $(AM_V_CXXLD)$(unittest_io_priority_LINK) $(unittest_io_priority_OBJECTS) $(unittest_io_priority_LDADD) $(LIBS)
test/unittest_ipaddr-test_ipaddr.$(OBJEXT): test/$(am__dirstamp) \
test/$(DEPDIR)/$(am__dirstamp)
unittest_ipaddr$(EXEEXT): $(unittest_ipaddr_OBJECTS) $(unittest_ipaddr_DEPENDENCIES) $(EXTRA_unittest_ipaddr_DEPENDENCIES)
@@ -8472,7 +8540,8 @@ mostlyclean-compile:
-rm -f auth/none/AuthNoneAuthorizeHandler.lo
-rm -f auth/unknown/AuthUnknownAuthorizeHandler.$(OBJEXT)
-rm -f auth/unknown/AuthUnknownAuthorizeHandler.lo
- -rm -f civetweb/src/radosgw-civetweb.$(OBJEXT)
+ -rm -f civetweb/src/libcivetweb_la-civetweb.$(OBJEXT)
+ -rm -f civetweb/src/libcivetweb_la-civetweb.lo
-rm -f client/Client.$(OBJEXT)
-rm -f client/Client.lo
-rm -f client/ClientSnapRealm.$(OBJEXT)
@@ -9032,6 +9101,7 @@ mostlyclean-compile:
-rm -f mds/snap.$(OBJEXT)
-rm -f mds/snap.lo
-rm -f mds/test_build_libcommon-MDSMap.$(OBJEXT)
+ -rm -f mds/test_build_libcommon-flock.$(OBJEXT)
-rm -f mds/test_build_libcommon-inode_backtrace.$(OBJEXT)
-rm -f mds/test_build_libcommon-mdstypes.$(OBJEXT)
-rm -f mon/AuthMonitor.$(OBJEXT)
@@ -9209,6 +9279,10 @@ mostlyclean-compile:
-rm -f rgw/ceph_dencoder-rgw_dencoder.$(OBJEXT)
-rm -f rgw/ceph_dencoder-rgw_env.$(OBJEXT)
-rm -f rgw/ceph_dencoder-rgw_json_enc.$(OBJEXT)
+ -rm -f rgw/libcivetweb_la-rgw_civetweb.$(OBJEXT)
+ -rm -f rgw/libcivetweb_la-rgw_civetweb.lo
+ -rm -f rgw/libcivetweb_la-rgw_civetweb_log.$(OBJEXT)
+ -rm -f rgw/libcivetweb_la-rgw_civetweb_log.lo
-rm -f rgw/librgw_la-librgw.$(OBJEXT)
-rm -f rgw/librgw_la-librgw.lo
-rm -f rgw/librgw_la-rgw_acl.$(OBJEXT)
@@ -9278,7 +9352,6 @@ mostlyclean-compile:
-rm -f rgw/librgw_la-rgw_xml.$(OBJEXT)
-rm -f rgw/librgw_la-rgw_xml.lo
-rm -f rgw/rgw_admin.$(OBJEXT)
- -rm -f rgw/rgw_civetweb.$(OBJEXT)
-rm -f rgw/rgw_common.$(OBJEXT)
-rm -f rgw/rgw_env.$(OBJEXT)
-rm -f rgw/rgw_http_client.$(OBJEXT)
@@ -9380,6 +9453,7 @@ mostlyclean-compile:
-rm -f test/common/unittest_context-test_context.$(OBJEXT)
-rm -f test/common/unittest_crc32c-test_crc32c.$(OBJEXT)
-rm -f test/common/unittest_histogram-histogram.$(OBJEXT)
+ -rm -f test/common/unittest_io_priority-test_io_priority.$(OBJEXT)
-rm -f test/common/unittest_sharedptr_registry-test_sharedptr_registry.$(OBJEXT)
-rm -f test/common/unittest_sloppy_crc_map-test_sloppy_crc_map.$(OBJEXT)
-rm -f test/common/unittest_str_map-test_str_map.$(OBJEXT)
@@ -9390,6 +9464,7 @@ mostlyclean-compile:
-rm -f test/encoding/ceph_dencoder-ceph_dencoder.$(OBJEXT)
-rm -f test/erasure-code/ceph_erasure_code.$(OBJEXT)
-rm -f test/erasure-code/ceph_erasure_code_benchmark.$(OBJEXT)
+ -rm -f test/erasure-code/ceph_erasure_code_non_regression.$(OBJEXT)
-rm -f test/erasure-code/libec_example_la-ErasureCodePluginExample.$(OBJEXT)
-rm -f test/erasure-code/libec_example_la-ErasureCodePluginExample.lo
-rm -f test/erasure-code/libec_fail_to_initialize_la-ErasureCodePluginFailToInitialize.$(OBJEXT)
@@ -9584,7 +9659,7 @@ distclean-compile:
@AMDEP_TRUE@@am__include@ @am__quote at auth/cephx/$(DEPDIR)/CephxSessionHandler.Plo at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at auth/none/$(DEPDIR)/AuthNoneAuthorizeHandler.Plo at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at auth/unknown/$(DEPDIR)/AuthUnknownAuthorizeHandler.Plo at am__quote@
- at AMDEP_TRUE@@am__include@ @am__quote at civetweb/src/$(DEPDIR)/radosgw-civetweb.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at civetweb/src/$(DEPDIR)/libcivetweb_la-civetweb.Plo at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at client/$(DEPDIR)/Client.Plo at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at client/$(DEPDIR)/ClientSnapRealm.Plo at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at client/$(DEPDIR)/Dentry.Plo at am__quote@
@@ -9920,6 +9995,7 @@ distclean-compile:
@AMDEP_TRUE@@am__include@ @am__quote at mds/$(DEPDIR)/mdstypes.Plo at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at mds/$(DEPDIR)/snap.Plo at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at mds/$(DEPDIR)/test_build_libcommon-MDSMap.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at mds/$(DEPDIR)/test_build_libcommon-flock.Po at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at mds/$(DEPDIR)/test_build_libcommon-inode_backtrace.Po at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at mds/$(DEPDIR)/test_build_libcommon-mdstypes.Po at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at mon/$(DEPDIR)/AuthMonitor.Plo at am__quote@
@@ -10020,6 +10096,8 @@ distclean-compile:
@AMDEP_TRUE@@am__include@ @am__quote at rgw/$(DEPDIR)/ceph_dencoder-rgw_dencoder.Po at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at rgw/$(DEPDIR)/ceph_dencoder-rgw_env.Po at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at rgw/$(DEPDIR)/ceph_dencoder-rgw_json_enc.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at rgw/$(DEPDIR)/libcivetweb_la-rgw_civetweb.Plo at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at rgw/$(DEPDIR)/libcivetweb_la-rgw_civetweb_log.Plo at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at rgw/$(DEPDIR)/librgw_la-librgw.Plo at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at rgw/$(DEPDIR)/librgw_la-rgw_acl.Plo at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at rgw/$(DEPDIR)/librgw_la-rgw_acl_s3.Plo at am__quote@
@@ -10055,7 +10133,6 @@ distclean-compile:
@AMDEP_TRUE@@am__include@ @am__quote at rgw/$(DEPDIR)/librgw_la-rgw_user.Plo at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at rgw/$(DEPDIR)/librgw_la-rgw_xml.Plo at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at rgw/$(DEPDIR)/rgw_admin.Po at am__quote@
- at AMDEP_TRUE@@am__include@ @am__quote at rgw/$(DEPDIR)/rgw_civetweb.Po at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at rgw/$(DEPDIR)/rgw_common.Po at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at rgw/$(DEPDIR)/rgw_env.Po at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at rgw/$(DEPDIR)/rgw_http_client.Po at am__quote@
@@ -10206,6 +10283,7 @@ distclean-compile:
@AMDEP_TRUE@@am__include@ @am__quote at test/common/$(DEPDIR)/unittest_context-test_context.Po at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at test/common/$(DEPDIR)/unittest_crc32c-test_crc32c.Po at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at test/common/$(DEPDIR)/unittest_histogram-histogram.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at test/common/$(DEPDIR)/unittest_io_priority-test_io_priority.Po at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at test/common/$(DEPDIR)/unittest_sharedptr_registry-test_sharedptr_registry.Po at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at test/common/$(DEPDIR)/unittest_sloppy_crc_map-test_sloppy_crc_map.Po at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at test/common/$(DEPDIR)/unittest_str_map-test_str_map.Po at am__quote@
@@ -10216,6 +10294,7 @@ distclean-compile:
@AMDEP_TRUE@@am__include@ @am__quote at test/encoding/$(DEPDIR)/ceph_dencoder-ceph_dencoder.Po at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at test/erasure-code/$(DEPDIR)/ceph_erasure_code.Po at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at test/erasure-code/$(DEPDIR)/ceph_erasure_code_benchmark.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at test/erasure-code/$(DEPDIR)/ceph_erasure_code_non_regression.Po at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at test/erasure-code/$(DEPDIR)/libec_example_la-ErasureCodePluginExample.Plo at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at test/erasure-code/$(DEPDIR)/libec_fail_to_initialize_la-ErasureCodePluginFailToInitialize.Plo at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at test/erasure-code/$(DEPDIR)/libec_fail_to_register_la-ErasureCodePluginFailToRegister.Plo at am__quote@
@@ -10377,6 +10456,13 @@ common/libcommon_crc_la-crc32c_intel_fast_zero_asm.lo: common/crc32c_intel_fast_
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
@am__fastdepCC_FALSE@ $(AM_V_CC at am__nodep@)$(LTCOMPILE) -c -o $@ $<
+civetweb/src/libcivetweb_la-civetweb.lo: civetweb/src/civetweb.c
+ at am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libcivetweb_la_CFLAGS) $(CFLAGS) -MT civetweb/src/libcivetweb_la-civetweb.lo -MD -MP -MF civetweb/src/$(DEPDIR)/libcivetweb_la-civetweb.Tpo -c -o civetweb/src/libcivetweb_la-civetweb.lo `test -f 'civetweb/src/civetweb.c' || echo '$(srcdir)/'`civetweb/src/civetweb.c
+ at am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) civetweb/src/$(DEPDIR)/libcivetweb_la-civetweb.Tpo civetweb/src/$(DEPDIR)/libcivetweb_la-civetweb.Plo
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='civetweb/src/civetweb.c' object='civetweb/src/libcivetweb_la-civetweb.lo' libtool=yes @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@ $(AM_V_CC at am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libcivetweb_la_CFLAGS) $(CFLAGS) -c -o civetweb/src/libcivetweb_la-civetweb.lo `test -f 'civetweb/src/civetweb.c' || echo '$(srcdir)/'`civetweb/src/civetweb.c
+
common/libcommon_crc_la-sctp_crc32.lo: common/sctp_crc32.c
@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(libcommon_crc_la_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT common/libcommon_crc_la-sctp_crc32.lo -MD -MP -MF common/$(DEPDIR)/libcommon_crc_la-sctp_crc32.Tpo -c -o common/libcommon_crc_la-sctp_crc32.lo `test -f 'common/sctp_crc32.c' || echo '$(srcdir)/'`common/sctp_crc32.c
@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) common/$(DEPDIR)/libcommon_crc_la-sctp_crc32.Tpo common/$(DEPDIR)/libcommon_crc_la-sctp_crc32.Plo
@@ -10748,20 +10834,6 @@ test/librbd/ceph_test_librbd_fsx-fsx.obj: test/librbd/fsx.c
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
@am__fastdepCC_FALSE@ $(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(ceph_test_librbd_fsx_CFLAGS) $(CFLAGS) -c -o test/librbd/ceph_test_librbd_fsx-fsx.obj `if test -f 'test/librbd/fsx.c'; then $(CYGPATH_W) 'test/librbd/fsx.c'; else $(CYGPATH_W) '$(srcdir)/test/librbd/fsx.c'; fi`
-civetweb/src/radosgw-civetweb.o: civetweb/src/civetweb.c
- at am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(radosgw_CFLAGS) $(CFLAGS) -MT civetweb/src/radosgw-civetweb.o -MD -MP -MF civetweb/src/$(DEPDIR)/radosgw-civetweb.Tpo -c -o civetweb/src/radosgw-civetweb.o `test -f 'civetweb/src/civetweb.c' || echo '$(srcdir)/'`civetweb/src/civetweb.c
- at am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) civetweb/src/$(DEPDIR)/radosgw-civetweb.Tpo civetweb/src/$(DEPDIR)/radosgw-civetweb.Po
- at AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='civetweb/src/civetweb.c' object='civetweb/src/radosgw-civetweb.o' libtool=no @AMDEPBACKSLASH@
- at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
- at am__fastdepCC_FALSE@ $(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(radosgw_CFLAGS) $(CFLAGS) -c -o civetweb/src/radosgw-civetweb.o `test -f 'civetweb/src/civetweb.c' || echo '$(srcdir)/'`civetweb/src/civetweb.c
-
-civetweb/src/radosgw-civetweb.obj: civetweb/src/civetweb.c
- at am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(radosgw_CFLAGS) $(CFLAGS) -MT civetweb/src/radosgw-civetweb.obj -MD -MP -MF civetweb/src/$(DEPDIR)/radosgw-civetweb.Tpo -c -o civetweb/src/radosgw-civetweb.obj `if test -f 'civetweb/src/civetweb.c'; then $(CYGPATH_W) 'civetweb/src/civetweb.c'; else $(CYGPATH_W) '$(srcdir)/civetweb/src/civetweb.c'; fi`
- at am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) civetweb/src/$(DEPDIR)/radosgw-civetweb.Tpo civetweb/src/$(DEPDIR)/radosgw-civetweb.Po
- at AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='civetweb/src/civetweb.c' object='civetweb/src/radosgw-civetweb.obj' libtool=no @AMDEPBACKSLASH@
- at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
- at am__fastdepCC_FALSE@ $(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(radosgw_CFLAGS) $(CFLAGS) -c -o civetweb/src/radosgw-civetweb.obj `if test -f 'civetweb/src/civetweb.c'; then $(CYGPATH_W) 'civetweb/src/civetweb.c'; else $(CYGPATH_W) '$(srcdir)/civetweb/src/civetweb.c'; fi`
-
test_build_libcommon-ceph_ver.o: ceph_ver.c
@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(test_build_libcommon_CFLAGS) $(CFLAGS) -MT test_build_libcommon-ceph_ver.o -MD -MP -MF $(DEPDIR)/test_build_libcommon-ceph_ver.Tpo -c -o test_build_libcommon-ceph_ver.o `test -f 'ceph_ver.c' || echo '$(srcdir)/'`ceph_ver.c
@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/test_build_libcommon-ceph_ver.Tpo $(DEPDIR)/test_build_libcommon-ceph_ver.Po
@@ -11178,6 +11250,20 @@ java/native/libcephfs_jni_la-JniConstants.lo: java/native/JniConstants.cpp
@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
@am__fastdepCXX_FALSE@ $(AM_V_CXX at am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CXX $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libcephfs_jni_la_CPPFLAGS) $(CPPFLAGS) $(AM_CXXFLAGS) $(CXXFLAGS) -c -o java/native/libcephfs_jni_la-JniConstants.lo `test -f 'java/native/JniConstants.cpp' || echo '$(srcdir)/'`java/native/JniConstants.cpp
+rgw/libcivetweb_la-rgw_civetweb.lo: rgw/rgw_civetweb.cc
+ at am__fastdepCXX_TRUE@ $(AM_V_CXX)$(LIBTOOL) $(AM_V_lt) --tag=CXX $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libcivetweb_la_CXXFLAGS) $(CXXFLAGS) -MT rgw/libcivetweb_la-rgw_civetweb.lo -MD -MP -MF rgw/$(DEPDIR)/libcivetweb_la-rgw_civetweb.Tpo -c -o rgw/libcivetweb_la-rgw_civetweb.lo `test -f 'rgw/rgw_civetweb.cc' || echo '$(srcdir)/'`rgw/rgw_civetweb.cc
+ at am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) rgw/$(DEPDIR)/libcivetweb_la-rgw_civetweb.Tpo rgw/$(DEPDIR)/libcivetweb_la-rgw_civetweb.Plo
+ at AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='rgw/rgw_civetweb.cc' object='rgw/libcivetweb_la-rgw_civetweb.lo' libtool=yes @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCXX_FALSE@ $(AM_V_CXX at am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CXX $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libcivetweb_la_CXXFLAGS) $(CXXFLAGS) -c -o rgw/libcivetweb_la-rgw_civetweb.lo `test -f 'rgw/rgw_civetweb.cc' || echo '$(srcdir)/'`rgw/rgw_civetweb.cc
+
+rgw/libcivetweb_la-rgw_civetweb_log.lo: rgw/rgw_civetweb_log.cc
+ at am__fastdepCXX_TRUE@ $(AM_V_CXX)$(LIBTOOL) $(AM_V_lt) --tag=CXX $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libcivetweb_la_CXXFLAGS) $(CXXFLAGS) -MT rgw/libcivetweb_la-rgw_civetweb_log.lo -MD -MP -MF rgw/$(DEPDIR)/libcivetweb_la-rgw_civetweb_log.Tpo -c -o rgw/libcivetweb_la-rgw_civetweb_log.lo `test -f 'rgw/rgw_civetweb_log.cc' || echo '$(srcdir)/'`rgw/rgw_civetweb_log.cc
+ at am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) rgw/$(DEPDIR)/libcivetweb_la-rgw_civetweb_log.Tpo rgw/$(DEPDIR)/libcivetweb_la-rgw_civetweb_log.Plo
+ at AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='rgw/rgw_civetweb_log.cc' object='rgw/libcivetweb_la-rgw_civetweb_log.lo' libtool=yes @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCXX_FALSE@ $(AM_V_CXX at am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CXX $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libcivetweb_la_CXXFLAGS) $(CXXFLAGS) -c -o rgw/libcivetweb_la-rgw_civetweb_log.lo `test -f 'rgw/rgw_civetweb_log.cc' || echo '$(srcdir)/'`rgw/rgw_civetweb_log.cc
+
common/libcommon_crc_la-crc32c.lo: common/crc32c.cc
@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(LIBTOOL) $(AM_V_lt) --tag=CXX $(libcommon_crc_la_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CXXFLAGS) $(CXXFLAGS) -MT common/libcommon_crc_la-crc32c.lo -MD -MP -MF common/$(DEPDIR)/libcommon_crc_la-crc32c.Tpo -c -o common/libcommon_crc_la-crc32c.lo `test -f 'common/crc32c.cc' || echo '$(srcdir)/'`common/crc32c.cc
@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) common/$(DEPDIR)/libcommon_crc_la-crc32c.Tpo common/$(DEPDIR)/libcommon_crc_la-crc32c.Plo
@@ -13705,6 +13791,20 @@ mds/test_build_libcommon-mdstypes.obj: mds/mdstypes.cc
@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
@am__fastdepCXX_FALSE@ $(AM_V_CXX at am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(test_build_libcommon_CXXFLAGS) $(CXXFLAGS) -c -o mds/test_build_libcommon-mdstypes.obj `if test -f 'mds/mdstypes.cc'; then $(CYGPATH_W) 'mds/mdstypes.cc'; else $(CYGPATH_W) '$(srcdir)/mds/mdstypes.cc'; fi`
+mds/test_build_libcommon-flock.o: mds/flock.cc
+ at am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(test_build_libcommon_CXXFLAGS) $(CXXFLAGS) -MT mds/test_build_libcommon-flock.o -MD -MP -MF mds/$(DEPDIR)/test_build_libcommon-flock.Tpo -c -o mds/test_build_libcommon-flock.o `test -f 'mds/flock.cc' || echo '$(srcdir)/'`mds/flock.cc
+ at am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) mds/$(DEPDIR)/test_build_libcommon-flock.Tpo mds/$(DEPDIR)/test_build_libcommon-flock.Po
+ at AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='mds/flock.cc' object='mds/test_build_libcommon-flock.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCXX_FALSE@ $(AM_V_CXX at am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(test_build_libcommon_CXXFLAGS) $(CXXFLAGS) -c -o mds/test_build_libcommon-flock.o `test -f 'mds/flock.cc' || echo '$(srcdir)/'`mds/flock.cc
+
+mds/test_build_libcommon-flock.obj: mds/flock.cc
+ at am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(test_build_libcommon_CXXFLAGS) $(CXXFLAGS) -MT mds/test_build_libcommon-flock.obj -MD -MP -MF mds/$(DEPDIR)/test_build_libcommon-flock.Tpo -c -o mds/test_build_libcommon-flock.obj `if test -f 'mds/flock.cc'; then $(CYGPATH_W) 'mds/flock.cc'; else $(CYGPATH_W) '$(srcdir)/mds/flock.cc'; fi`
+ at am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) mds/$(DEPDIR)/test_build_libcommon-flock.Tpo mds/$(DEPDIR)/test_build_libcommon-flock.Po
+ at AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='mds/flock.cc' object='mds/test_build_libcommon-flock.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCXX_FALSE@ $(AM_V_CXX at am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(test_build_libcommon_CXXFLAGS) $(CXXFLAGS) -c -o mds/test_build_libcommon-flock.obj `if test -f 'mds/flock.cc'; then $(CYGPATH_W) 'mds/flock.cc'; else $(CYGPATH_W) '$(srcdir)/mds/flock.cc'; fi`
+
test/test_build_librados-buildtest_skeleton.o: test/buildtest_skeleton.cc
@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(test_build_librados_CXXFLAGS) $(CXXFLAGS) -MT test/test_build_librados-buildtest_skeleton.o -MD -MP -MF test/$(DEPDIR)/test_build_librados-buildtest_skeleton.Tpo -c -o test/test_build_librados-buildtest_skeleton.o `test -f 'test/buildtest_skeleton.cc' || echo '$(srcdir)/'`test/buildtest_skeleton.cc
@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) test/$(DEPDIR)/test_build_librados-buildtest_skeleton.Tpo test/$(DEPDIR)/test_build_librados-buildtest_skeleton.Po
@@ -14755,6 +14855,20 @@ test/osd/unittest_hitset-hitset.obj: test/osd/hitset.cc
@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
@am__fastdepCXX_FALSE@ $(AM_V_CXX at am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(unittest_hitset_CXXFLAGS) $(CXXFLAGS) -c -o test/osd/unittest_hitset-hitset.obj `if test -f 'test/osd/hitset.cc'; then $(CYGPATH_W) 'test/osd/hitset.cc'; else $(CYGPATH_W) '$(srcdir)/test/osd/hitset.cc'; fi`
+test/common/unittest_io_priority-test_io_priority.o: test/common/test_io_priority.cc
+ at am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(unittest_io_priority_CXXFLAGS) $(CXXFLAGS) -MT test/common/unittest_io_priority-test_io_priority.o -MD -MP -MF test/common/$(DEPDIR)/unittest_io_priority-test_io_priority.Tpo -c -o test/common/unittest_io_priority-test_io_priority.o `test -f 'test/common/test_io_priority.cc' || echo '$(srcdir)/'`test/common/test_io_priority.cc
+ at am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) test/common/$(DEPDIR)/unittest_io_priority-test_io_priority.Tpo test/common/$(DEPDIR)/unittest_io_priority-test_io_priority.Po
+ at AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='test/common/test_io_priority.cc' object='test/common/unittest_io_priority-test_io_priority.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCXX_FALSE@ $(AM_V_CXX at am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(unittest_io_priority_CXXFLAGS) $(CXXFLAGS) -c -o test/common/unittest_io_priority-test_io_priority.o `test -f 'test/common/test_io_priority.cc' || echo '$(srcdir)/'`test/common/test_io_priority.cc
+
+test/common/unittest_io_priority-test_io_priority.obj: test/common/test_io_priority.cc
+ at am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(unittest_io_priority_CXXFLAGS) $(CXXFLAGS) -MT test/common/unittest_io_priority-test_io_priority.obj -MD -MP -MF test/common/$(DEPDIR)/unittest_io_priority-test_io_priority.Tpo -c -o test/common/unittest_io_priority-test_io_priority.obj `if test -f 'test/common/test_io_priority.cc'; then $(CYGPATH_W) 'test/common/test_io_priority.cc'; else $(CYGPATH_W) '$(srcdir)/test/common/test_ [...]
+ at am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) test/common/$(DEPDIR)/unittest_io_priority-test_io_priority.Tpo test/common/$(DEPDIR)/unittest_io_priority-test_io_priority.Po
+ at AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='test/common/test_io_priority.cc' object='test/common/unittest_io_priority-test_io_priority.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCXX_FALSE@ $(AM_V_CXX at am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(unittest_io_priority_CXXFLAGS) $(CXXFLAGS) -c -o test/common/unittest_io_priority-test_io_priority.obj `if test -f 'test/common/test_io_priority.cc'; then $(CYGPATH_W) 'test/common/test_io_priority.cc'; else $(CYGPATH_W) '$(srcdir)/test/common/test_io_priority.cc'; fi`
+
test/unittest_ipaddr-test_ipaddr.o: test/test_ipaddr.cc
@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(unittest_ipaddr_CXXFLAGS) $(CXXFLAGS) -MT test/unittest_ipaddr-test_ipaddr.o -MD -MP -MF test/$(DEPDIR)/unittest_ipaddr-test_ipaddr.Tpo -c -o test/unittest_ipaddr-test_ipaddr.o `test -f 'test/test_ipaddr.cc' || echo '$(srcdir)/'`test/test_ipaddr.cc
@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) test/$(DEPDIR)/unittest_ipaddr-test_ipaddr.Tpo test/$(DEPDIR)/unittest_ipaddr-test_ipaddr.Po
@@ -15195,6 +15309,7 @@ clean-libtool:
-rm -rf auth/cephx/.libs auth/cephx/_libs
-rm -rf auth/none/.libs auth/none/_libs
-rm -rf auth/unknown/.libs auth/unknown/_libs
+ -rm -rf civetweb/src/.libs civetweb/src/_libs
-rm -rf client/.libs client/_libs
-rm -rf cls/hello/.libs cls/hello/_libs
-rm -rf cls/lock/.libs cls/lock/_libs
@@ -15994,6 +16109,7 @@ $(shell_scripts): %: %.in
docdir ?= ${datadir}/doc/ceph
check-local:
+ $(top_srcdir)/qa/workunits/erasure-code/encode-decode-non-regression.sh
$(srcdir)/test/encoding/readable.sh ../ceph-object-corpus
# base targets
diff --git a/src/ceph-disk b/src/ceph-disk
index 5d6071d..6bd0220 100755
--- a/src/ceph-disk
+++ b/src/ceph-disk
@@ -89,7 +89,7 @@ MOUNT_OPTIONS = dict(
# issues with ext4 before the xatts-in-leveldb work, and it seemed
# that user_xattr helped
ext4='noatime,user_xattr',
- xfs='noatime',
+ xfs='noatime,inode64',
)
MKFS_ARGS = dict(
@@ -791,11 +791,13 @@ def get_or_create_dmcrypt_key(
# make a new key
try:
if not os.path.exists(key_dir):
- os.makedirs(key_dir)
+ os.makedirs(key_dir, stat.S_IRUSR|stat.S_IWUSR|stat.S_IXUSR)
with file('/dev/urandom', 'rb') as i:
key = i.read(256)
- with file(path, 'wb') as key_file:
- key_file.write(key)
+ fd = os.open(path, os.O_WRONLY|os.O_CREAT,
+ stat.S_IRUSR|stat.S_IWUSR)
+ assert os.write(fd, key) == len(key)
+ os.close(fd)
return path
except:
raise Error('unable to read or create dm-crypt key', path)
@@ -968,6 +970,35 @@ def get_free_partition_index(dev):
return num
+def update_partition(action, dev, description):
+ # try to make sure the kernel refreshes the table. note
+ # that if this gets ebusy, we are probably racing with
+ # udev because it already updated it.. ignore failure here.
+
+ # On RHEL and CentOS distros, calling partprobe forces a reboot of the
+ # server. Since we are not resizing partitons so we rely on calling
+ # partx
+ if platform_distro().startswith(('centos', 'red', 'scientific')):
+ LOG.info('calling partx on %s device %s', description, dev)
+ LOG.info('re-reading known partitions will display errors')
+ command(
+ [
+ 'partx',
+ action,
+ dev,
+ ],
+ )
+
+ else:
+ LOG.debug('Calling partprobe on %s device %s', description, dev)
+ command(
+ [
+ 'partprobe',
+ dev,
+ ],
+ )
+
+
def zap(dev):
"""
Destroy the partition table and content of a given disk.
@@ -993,6 +1024,9 @@ def zap(dev):
dev,
],
)
+
+ update_partition('-d', dev, 'zapped')
+
except subprocess.CalledProcessError as e:
raise Error(e)
@@ -1068,32 +1102,7 @@ def prepare_journal_dev(
],
)
- # try to make sure the kernel refreshes the table. note
- # that if this gets ebusy, we are probably racing with
- # udev because it already updated it.. ignore failure here.
-
- # On RHEL and CentOS distros, calling partprobe forces a reboot of the
- # server. Since we are not resizing partitons so we rely on calling
- # partx
- if platform_distro().startswith(('centos', 'red')):
- LOG.info('calling partx on prepared device %s', journal)
- LOG.info('re-reading known partitions will display errors')
- command(
- [
- 'partx',
- '-a',
- journal,
- ],
- )
-
- else:
- LOG.debug('Calling partprobe on prepared device %s', journal)
- command(
- [
- 'partprobe',
- journal,
- ],
- )
+ update_partition('-a', journal, 'prepared')
# wait for udev event queue to clear
command(
@@ -1118,7 +1127,6 @@ def prepare_journal_dev(
except subprocess.CalledProcessError as e:
raise Error(e)
-
def prepare_journal_file(
journal):
@@ -1279,12 +1287,7 @@ def prepare_dev(
data,
],
)
- command(
- [
- 'partprobe',
- data,
- ],
- )
+ update_partition('-a', data, 'created')
command(
[
# wait for udev event queue to clear
@@ -1500,33 +1503,7 @@ def main_prepare(args):
prepare_lock.release() # noqa
if stat.S_ISBLK(dmode):
- # try to make sure the kernel refreshes the table. note
- # that if this gets ebusy, we are probably racing with
- # udev because it already updated it.. ignore failure here.
-
- # On RHEL and CentOS distros, calling partprobe forces a reboot of
- # the server. Since we are not resizing partitons so we rely on
- # calling partx
- if platform_distro().startswith(('centos', 'red')):
- LOG.info('calling partx on prepared device %s', args.data)
- LOG.info('re-reading known partitions will display errors')
-
- command(
- [
- 'partx',
- '-a',
- args.data,
- ],
- )
-
- else:
- LOG.debug('Calling partprobe on prepared device %s', args.data)
- command(
- [
- 'partprobe',
- args.data,
- ],
- )
+ update_partition('-a', args.data, 'prepared')
except Error as e:
if journal_dm_keypath:
@@ -1918,7 +1895,8 @@ def activate(
raise Error('No OSD uuid assigned.')
LOG.debug('OSD uuid is %s', fsid)
- keyring = activate_key_template.format(cluster=cluster)
+ keyring = activate_key_template.format(cluster=cluster,
+ statedir=STATEDIR)
osd_id = get_osd_id(path)
if osd_id is None:
@@ -2657,7 +2635,7 @@ def parse_args():
help='path to block device or directory',
)
activate_parser.set_defaults(
- activate_key_template=STATEDIR + '/bootstrap-osd/{cluster}.keyring',
+ activate_key_template='{statedir}/bootstrap-osd/{cluster}.keyring',
func=main_activate,
)
@@ -2681,7 +2659,7 @@ def parse_args():
choices=INIT_SYSTEMS,
)
activate_journal_parser.set_defaults(
- activate_key_template=STATEDIR + '/bootstrap-osd/{cluster}.keyring',
+ activate_key_template='{statedir}/bootstrap-osd/{cluster}.keyring',
func=main_activate_journal,
)
@@ -2700,7 +2678,7 @@ def parse_args():
choices=INIT_SYSTEMS,
)
activate_all_parser.set_defaults(
- activate_key_template=STATEDIR + '/bootstrap-osd/{cluster}.keyring',
+ activate_key_template='{statedir}/bootstrap-osd/{cluster}.keyring',
func=main_activate_all,
)
diff --git a/src/ceph.in b/src/ceph.in
index 82c9085..c5b97ef 100755
--- a/src/ceph.in
+++ b/src/ceph.in
@@ -841,4 +841,8 @@ def main():
return 0
if __name__ == '__main__':
- sys.exit(main())
+ retval = main()
+ # shutdown explicitly; Rados() does not
+ if cluster_handle:
+ cluster_handle.shutdown()
+ sys.exit(retval)
diff --git a/src/ceph_mon.cc b/src/ceph_mon.cc
index 80b17a1..1b52f58 100644
--- a/src/ceph_mon.cc
+++ b/src/ceph_mon.cc
@@ -419,6 +419,26 @@ int main(int argc, const char **argv)
return 0;
}
+ {
+ // check fs stats. don't start if it's critically close to full.
+ ceph_data_stats_t stats;
+ int err = get_fs_stats(stats, g_conf->mon_data.c_str());
+ if (err < 0) {
+ cerr << "error checking monitor data's fs stats: " << cpp_strerror(err)
+ << std::endl;
+ exit(-err);
+ }
+ if (stats.avail_percent <= g_conf->mon_data_avail_crit) {
+ cerr << "error: monitor data filesystem reached concerning levels of"
+ << " available storage space (available: "
+ << stats.avail_percent << "% " << prettybyte_t(stats.byte_avail)
+ << ")\nyou may adjust 'mon data avail crit' to a lower value"
+ << " to make this go away (default: " << g_conf->mon_data_avail_crit
+ << "%)\n" << std::endl;
+ exit(ENOSPC);
+ }
+ }
+
// we fork early to prevent leveldb's environment static state from
// screwing us over
Preforker prefork;
diff --git a/src/civetweb/civetweb.h b/src/civetweb/civetweb.h
index a6ca3e7..5da8a73 100644
--- a/src/civetweb/civetweb.h
+++ b/src/civetweb/civetweb.h
@@ -24,7 +24,7 @@
#define CIVETWEB_HEADER_INCLUDED
#ifndef CIVETWEB_VERSION
-#define CIVETWEB_VERSION "1.6"
+#define CIVETWEB_VERSION "1.7"
#endif
#ifndef CIVETWEB_API
@@ -77,7 +77,7 @@ struct mg_request_info {
/* This structure needs to be passed to mg_start(), to let civetweb know
which callbacks to invoke. For detailed description, see
- https://github.com/sunsetbrew/civetweb/blob/master/docs/UserManual.md */
+ https://github.com/bel2125/civetweb/blob/master/docs/UserManual.md */
struct mg_callbacks {
/* Called when civetweb has received new HTTP request.
If callback returns non-zero,
@@ -94,6 +94,10 @@ struct mg_callbacks {
non-zero, civetweb does not log anything. */
int (*log_message)(const struct mg_connection *, const char *message);
+ /* Called when civetweb is about to log access. If callback returns
+ non-zero, civetweb does not log anything. */
+ int (*log_access)(const struct mg_connection *, const char *message);
+
/* Called when civetweb initializes SSL library. */
int (*init_ssl)(void *ssl_context, void *user_data);
@@ -176,7 +180,7 @@ struct mg_callbacks {
};
struct mg_context *ctx = mg_start(&my_func, NULL, options);
- Refer to https://github.com/sunsetbrew/civetweb/blob/master/docs/UserManual.md
+ Refer to https://github.com/bel2125/civetweb/blob/master/docs/UserManual.md
for the list of valid option and their possible values.
Return:
@@ -330,8 +334,18 @@ CIVETWEB_API int mg_websocket_write(struct mg_connection* conn, int opcode,
Invoke this before mg_write or mg_printf when communicating with a
websocket if your code has server-initiated communication as well as
communication in direct response to a message. */
-CIVETWEB_API void mg_lock(struct mg_connection* conn);
-CIVETWEB_API void mg_unlock(struct mg_connection* conn);
+CIVETWEB_API void mg_lock_connection(struct mg_connection* conn);
+CIVETWEB_API void mg_unlock_connection(struct mg_connection* conn);
+
+#if defined(MG_LEGACY_INTERFACE)
+#define mg_lock mg_lock_connection
+#define mg_unlock mg_unlock_connection
+#endif
+
+/* Lock server context. This lock may be used to protect ressources
+ that are shared between different connection/worker threads. */
+CIVETWEB_API void mg_lock_context(struct mg_context* ctx);
+CIVETWEB_API void mg_unlock_context(struct mg_context* ctx);
/* Opcodes, from http://tools.ietf.org/html/rfc6455 */
diff --git a/src/civetweb/include/civetweb.h b/src/civetweb/include/civetweb.h
index a6ca3e7..5da8a73 100644
--- a/src/civetweb/include/civetweb.h
+++ b/src/civetweb/include/civetweb.h
@@ -24,7 +24,7 @@
#define CIVETWEB_HEADER_INCLUDED
#ifndef CIVETWEB_VERSION
-#define CIVETWEB_VERSION "1.6"
+#define CIVETWEB_VERSION "1.7"
#endif
#ifndef CIVETWEB_API
@@ -77,7 +77,7 @@ struct mg_request_info {
/* This structure needs to be passed to mg_start(), to let civetweb know
which callbacks to invoke. For detailed description, see
- https://github.com/sunsetbrew/civetweb/blob/master/docs/UserManual.md */
+ https://github.com/bel2125/civetweb/blob/master/docs/UserManual.md */
struct mg_callbacks {
/* Called when civetweb has received new HTTP request.
If callback returns non-zero,
@@ -94,6 +94,10 @@ struct mg_callbacks {
non-zero, civetweb does not log anything. */
int (*log_message)(const struct mg_connection *, const char *message);
+ /* Called when civetweb is about to log access. If callback returns
+ non-zero, civetweb does not log anything. */
+ int (*log_access)(const struct mg_connection *, const char *message);
+
/* Called when civetweb initializes SSL library. */
int (*init_ssl)(void *ssl_context, void *user_data);
@@ -176,7 +180,7 @@ struct mg_callbacks {
};
struct mg_context *ctx = mg_start(&my_func, NULL, options);
- Refer to https://github.com/sunsetbrew/civetweb/blob/master/docs/UserManual.md
+ Refer to https://github.com/bel2125/civetweb/blob/master/docs/UserManual.md
for the list of valid option and their possible values.
Return:
@@ -330,8 +334,18 @@ CIVETWEB_API int mg_websocket_write(struct mg_connection* conn, int opcode,
Invoke this before mg_write or mg_printf when communicating with a
websocket if your code has server-initiated communication as well as
communication in direct response to a message. */
-CIVETWEB_API void mg_lock(struct mg_connection* conn);
-CIVETWEB_API void mg_unlock(struct mg_connection* conn);
+CIVETWEB_API void mg_lock_connection(struct mg_connection* conn);
+CIVETWEB_API void mg_unlock_connection(struct mg_connection* conn);
+
+#if defined(MG_LEGACY_INTERFACE)
+#define mg_lock mg_lock_connection
+#define mg_unlock mg_unlock_connection
+#endif
+
+/* Lock server context. This lock may be used to protect ressources
+ that are shared between different connection/worker threads. */
+CIVETWEB_API void mg_lock_context(struct mg_context* ctx);
+CIVETWEB_API void mg_unlock_context(struct mg_context* ctx);
/* Opcodes, from http://tools.ietf.org/html/rfc6455 */
diff --git a/src/civetweb/include/civetweb_conf.h b/src/civetweb/include/civetweb_conf.h
new file mode 100644
index 0000000..578143f
--- /dev/null
+++ b/src/civetweb/include/civetweb_conf.h
@@ -0,0 +1,6 @@
+#ifndef CIVETWEB_CONF_H
+#define CIVETWEB_CONF_H
+
+#define USE_IPV6 1
+
+#endif
diff --git a/src/civetweb/src/civetweb.c b/src/civetweb/src/civetweb.c
index 4aa8a02..3567df3 100644
--- a/src/civetweb/src/civetweb.c
+++ b/src/civetweb/src/civetweb.c
@@ -20,6 +20,8 @@
* THE SOFTWARE.
*/
+#define RGW 1
+
#if defined(_WIN32)
#if !defined(_CRT_SECURE_NO_WARNINGS)
#define _CRT_SECURE_NO_WARNINGS /* Disable deprecation warning in VS2005 */
@@ -181,6 +183,10 @@ typedef long off_t;
#define sleep(x) Sleep((x) * 1000)
#define rmdir(x) _rmdir(x)
+#if defined(USE_LUA) && defined(USE_WEBSOCKET)
+#define USE_TIMERS
+#endif
+
#if !defined(va_copy)
#define va_copy(x, y) x = y
#endif /* !va_copy MINGW #defines va_copy */
@@ -309,6 +315,40 @@ typedef int SOCKET;
#endif /* End of Windows and UNIX specific includes */
+#ifdef _WIN32
+static CRITICAL_SECTION global_log_file_lock;
+static DWORD pthread_self(void)
+{
+ return GetCurrentThreadId();
+}
+
+int pthread_key_create(pthread_key_t *key, void (*_must_be_zero)(void*) /* destructor function not supported for windows */)
+{
+ assert(_must_be_zero == NULL);
+ if ((key!=0) && (_must_be_zero == NULL)) {
+ *key = TlsAlloc();
+ return (*key != TLS_OUT_OF_INDEXES) ? 0 : -1;
+ }
+ return -2;
+}
+
+int pthread_key_delete(pthread_key_t key)
+{
+ return TlsFree(key) ? 0 : 1;
+}
+
+int pthread_setspecific(pthread_key_t key, void * value)
+{
+ return TlsSetValue(key, value) ? 0 : 1;
+}
+
+void *pthread_getspecific(pthread_key_t key)
+{
+ return TlsGetValue(key);
+}
+#endif /* _WIN32 */
+
+
#include "civetweb.h"
#define PASSWORDS_FILE_NAME ".htpasswd"
@@ -320,23 +360,30 @@ typedef int SOCKET;
#endif
#define ARRAY_SIZE(array) (sizeof(array) / sizeof(array[0]))
-#ifdef DEBUG_TRACE
-#undef DEBUG_TRACE
-#define DEBUG_TRACE(x)
-#else
+#if !defined(DEBUG_TRACE)
#if defined(DEBUG)
-#define DEBUG_TRACE(x) do { \
- flockfile(stdout); \
- printf("*** %lu.%p.%s.%d: ", \
- (unsigned long) time(NULL), (void *) pthread_self(), \
- __func__, __LINE__); \
- printf x; \
- putchar('\n'); \
- fflush(stdout); \
- funlockfile(stdout); \
-} while (0)
+
+static void DEBUG_TRACE_FUNC(const char *func, unsigned line, PRINTF_FORMAT_STRING(const char *fmt), ...) PRINTF_ARGS(3, 4);
+
+static void DEBUG_TRACE_FUNC(const char *func, unsigned line, const char *fmt, ...) {
+
+ va_list args;
+ flockfile(stdout);
+ printf("*** %lu.%p.%s.%u: ",
+ (unsigned long) time(NULL), (void *) pthread_self(),
+ func, line);
+ va_start(args, fmt);
+ vprintf(fmt, args);
+ va_end(args);
+ putchar('\n');
+ fflush(stdout);
+ funlockfile(stdout);
+}
+
+#define DEBUG_TRACE(fmt, ...) DEBUG_TRACE_FUNC(__func__, __LINE__, fmt, __VA_ARGS__)
+
#else
-#define DEBUG_TRACE(x)
+#define DEBUG_TRACE(fmt, ...)
#endif /* DEBUG */
#endif /* DEBUG_TRACE */
@@ -357,7 +404,7 @@ static void * mg_malloc_ex(size_t size, const char * file, unsigned line) {
memory = (void *)(((char*)data)+sizeof(size_t));
}
- sprintf(mallocStr, "MEM: %p %5u alloc %7u %4u --- %s:%u\n", memory, size, totalMemUsed, blockCount, file, line);
+ sprintf(mallocStr, "MEM: %p %5lu alloc %7lu %4lu --- %s:%u\n", memory, (unsigned long)size, totalMemUsed, blockCount, file, line);
#if defined(_WIN32)
OutputDebugStringA(mallocStr);
#else
@@ -385,7 +432,7 @@ static void mg_free_ex(void * memory, const char * file, unsigned line) {
size = *(size_t*)data;
totalMemUsed -= size;
blockCount--;
- sprintf(mallocStr, "MEM: %p %5u free %7u %4u --- %s:%u\n", memory, size, totalMemUsed, blockCount, file, line);
+ sprintf(mallocStr, "MEM: %p %5lu free %7lu %4lu --- %s:%u\n", memory, (unsigned long)size, totalMemUsed, blockCount, file, line);
#if defined(_WIN32)
OutputDebugStringA(mallocStr);
#else
@@ -400,23 +447,25 @@ static void * mg_realloc_ex(void * memory, size_t newsize, const char * file, un
char mallocStr[256];
void * data;
+ void * _realloc;
size_t oldsize;
if (newsize) {
if (memory) {
data = (void *)(((char*)memory)-sizeof(size_t));
oldsize = *(size_t*)data;
- data = realloc(data, newsize+sizeof(size_t));
- if (data) {
+ _realloc = realloc(data, newsize+sizeof(size_t));
+ if (_realloc) {
+ data = _realloc;
totalMemUsed -= oldsize;
- sprintf(mallocStr, "MEM: %p %5u r-free %7u %4u --- %s:%u\n", memory, oldsize, totalMemUsed, blockCount, file, line);
+ sprintf(mallocStr, "MEM: %p %5lu r-free %7lu %4lu --- %s:%u\n", memory, (unsigned long)oldsize, totalMemUsed, blockCount, file, line);
#if defined(_WIN32)
OutputDebugStringA(mallocStr);
#else
DEBUG_TRACE("%s", mallocStr);
#endif
totalMemUsed += newsize;
- sprintf(mallocStr, "MEM: %p %5u r-alloc %7u %4u --- %s:%u\n", memory, newsize, totalMemUsed, blockCount, file, line);
+ sprintf(mallocStr, "MEM: %p %5lu r-alloc %7lu %4lu --- %s:%u\n", memory, (unsigned long)newsize, totalMemUsed, blockCount, file, line);
#if defined(_WIN32)
OutputDebugStringA(mallocStr);
#else
@@ -430,6 +479,7 @@ static void * mg_realloc_ex(void * memory, size_t newsize, const char * file, un
#else
DEBUG_TRACE("MEM: realloc failed\n");
#endif
+ return _realloc;
}
} else {
data = mg_malloc_ex(newsize, file, line);
@@ -454,43 +504,24 @@ static __inline void * mg_realloc(void * a, size_t b) {return realloc(a, b);}
static __inline void mg_free(void * a) {free(a);}
#endif
+/* This following lines are just meant as a reminder to use the mg-functions for memory management */
+#ifdef malloc
+ #undef malloc
+#endif
+#ifdef calloc
+ #undef calloc
+#endif
+#ifdef realloc
+ #undef realloc
+#endif
+#ifdef free
+ #undef free
+#endif
#define malloc DO_NOT_USE_THIS_FUNCTION__USE_mg_malloc
#define calloc DO_NOT_USE_THIS_FUNCTION__USE_mg_calloc
#define realloc DO_NOT_USE_THIS_FUNCTION__USE_mg_realloc
#define free DO_NOT_USE_THIS_FUNCTION__USE_mg_free
-#ifdef _WIN32
-static CRITICAL_SECTION global_log_file_lock;
-static DWORD pthread_self(void)
-{
- return GetCurrentThreadId();
-}
-
-int pthread_key_create(pthread_key_t *key, void (*_must_be_zero)(void*) /* destructor function not supported for windows */)
-{
- assert(_must_be_zero == NULL);
- if ((key!=0) && (_must_be_zero == NULL)) {
- *key = TlsAlloc();
- return (*key != TLS_OUT_OF_INDEXES) ? 0 : -1;
- }
- return -2;
-}
-
-int pthread_key_delete(pthread_key_t key)
-{
- return TlsFree(key) ? 0 : 1;
-}
-
-int pthread_setspecific(pthread_key_t key, void * value)
-{
- return TlsSetValue(key, value) ? 0 : 1;
-}
-
-void *pthread_getspecific(pthread_key_t key)
-{
- return TlsGetValue(key);
-}
-#endif /* _WIN32 */
#define MD5_STATIC static
#include "md5.h"
@@ -668,6 +699,7 @@ enum {
GLOBAL_PASSWORDS_FILE, INDEX_FILES, ENABLE_KEEP_ALIVE, ACCESS_CONTROL_LIST,
EXTRA_MIME_TYPES, LISTENING_PORTS, DOCUMENT_ROOT, SSL_CERTIFICATE,
NUM_THREADS, RUN_AS_USER, REWRITE, HIDE_FILES, REQUEST_TIMEOUT,
+ DECODE_URL,
#if defined(USE_LUA)
LUA_PRELOAD_FILE, LUA_SCRIPT_EXTENSIONS, LUA_SERVER_PAGE_EXTENSIONS,
@@ -714,6 +746,7 @@ static struct mg_option config_options[] = {
{"url_rewrite_patterns", 12345, NULL},
{"hide_files_patterns", 12345, NULL},
{"request_timeout_ms", CONFIG_TYPE_NUMBER, "30000"},
+ {"decode_url", CONFIG_TYPE_BOOLEAN, "yes"},
#if defined(USE_LUA)
{"lua_preload_file", CONFIG_TYPE_FILE, NULL},
@@ -753,60 +786,64 @@ struct mg_context {
in_port_t *listening_ports;
int num_listening_sockets;
- volatile int num_threads; /* Number of threads */
- pthread_mutex_t mutex; /* Protects (max|num)_threads */
- pthread_cond_t cond; /* Condvar for tracking workers terminations */
+ volatile int num_threads; /* Number of threads */
+ pthread_mutex_t thread_mutex; /* Protects (max|num)_threads */
+ pthread_cond_t thread_cond; /* Condvar for tracking workers terminations */
struct socket queue[MGSQLEN]; /* Accepted sockets */
- volatile int sq_head; /* Head of the socket queue */
- volatile int sq_tail; /* Tail of the socket queue */
- pthread_cond_t sq_full; /* Signaled when socket is produced */
- pthread_cond_t sq_empty; /* Signaled when socket is consumed */
- pthread_t masterthreadid; /* The master thread ID. */
- int workerthreadcount; /* The amount of worker threads. */
- pthread_t *workerthreadids;/* The worker thread IDs. */
+ volatile int sq_head; /* Head of the socket queue */
+ volatile int sq_tail; /* Tail of the socket queue */
+ pthread_cond_t sq_full; /* Signaled when socket is produced */
+ pthread_cond_t sq_empty; /* Signaled when socket is consumed */
+ pthread_t masterthreadid; /* The master thread ID */
+ int workerthreadcount; /* The amount of worker threads. */
+ pthread_t *workerthreadids; /* The worker thread IDs */
- unsigned long start_time; /* Server start time, used for authentication */
- unsigned long nonce_count; /* Used nonces, used for authentication */
+ unsigned long start_time; /* Server start time, used for authentication */
+ pthread_mutex_t nonce_mutex; /* Protects nonce_count */
+ unsigned long nonce_count; /* Used nonces, used for authentication */
- char *systemName; /* What operating system is running */
+ char *systemName; /* What operating system is running */
/* linked list of uri handlers */
struct mg_request_handler_info *request_handlers;
#if defined(USE_LUA) && defined(USE_WEBSOCKET)
/* linked list of shared lua websockets */
- struct mg_shared_lua_websocket *shared_lua_websockets;
+ struct mg_shared_lua_websocket_list *shared_lua_websockets;
+#endif
+
+#ifdef USE_TIMERS
+ struct timers * timers;
#endif
};
struct mg_connection {
struct mg_request_info request_info;
struct mg_context *ctx;
- SSL *ssl; /* SSL descriptor */
- SSL_CTX *client_ssl_ctx; /* SSL context for client connections */
- struct socket client; /* Connected client */
- time_t birth_time; /* Time when request was received */
- int64_t num_bytes_sent; /* Total bytes sent to client */
- int64_t content_len; /* Content-Length header value */
- int64_t consumed_content; /* How many bytes of content have been read */
- char *buf; /* Buffer for received data */
- char *path_info; /* PATH_INFO part of the URL */
- int must_close; /* 1 if connection must be closed */
- int in_error_handler; /* 1 if in handler for user defined error pages */
- int buf_size; /* Buffer size */
- int request_len; /* Size of the request + headers in a buffer */
- int data_len; /* Total size of data in a buffer */
- int status_code; /* HTTP reply status code, e.g. 200 */
- int throttle; /* Throttling, bytes/sec. <= 0 means no
- throttle */
- time_t last_throttle_time; /* Last time throttled data was sent */
- int64_t last_throttle_bytes;/* Bytes sent this second */
- pthread_mutex_t mutex; /* Used by mg_lock/mg_unlock to ensure atomic
- transmissions for websockets */
+ SSL *ssl; /* SSL descriptor */
+ SSL_CTX *client_ssl_ctx; /* SSL context for client connections */
+ struct socket client; /* Connected client */
+ time_t birth_time; /* Time when request was received */
+ int64_t num_bytes_sent; /* Total bytes sent to client */
+ int64_t content_len; /* Content-Length header value */
+ int64_t consumed_content; /* How many bytes of content have been read */
+ char *buf; /* Buffer for received data */
+ char *path_info; /* PATH_INFO part of the URL */
+ int must_close; /* 1 if connection must be closed */
+ int in_error_handler; /* 1 if in handler for user defined error pages */
+ int buf_size; /* Buffer size */
+ int request_len; /* Size of the request + headers in a buffer */
+ int data_len; /* Total size of data in a buffer */
+ int status_code; /* HTTP reply status code, e.g. 200 */
+ int throttle; /* Throttling, bytes/sec. <= 0 means no throttle */
+ time_t last_throttle_time; /* Last time throttled data was sent */
+ int64_t last_throttle_bytes; /* Bytes sent this second */
+ pthread_mutex_t mutex; /* Used by mg_lock_connection/mg_unlock_connection to ensure atomic transmissions for websockets */
#if defined(USE_LUA) && defined(USE_WEBSOCKET)
- void * lua_websocket_state; /* Lua_State for a websocket connection */
+ void * lua_websocket_state; /* Lua_State for a websocket connection */
#endif
+ int is_chunked; /* transfer-encoding is chunked */
};
static pthread_key_t sTlsKey; /* Thread local storage index */
@@ -1253,7 +1290,6 @@ static int match_prefix(const char *pattern, int pattern_len, const char *str)
}
i = j = 0;
- res = -1;
for (; i < pattern_len; i++, j++) {
if (pattern[i] == '?' && str[j] != '\0') {
continue;
@@ -1298,6 +1334,11 @@ static int should_keep_alive(const struct mg_connection *conn)
return 1;
}
+static int should_decode_url(const struct mg_connection *conn)
+{
+ return (mg_strcasecmp(conn->ctx->config[DECODE_URL], "yes") == 0);
+}
+
static const char *suggest_connection_header(const struct mg_connection *conn)
{
return should_keep_alive(conn) ? "keep-alive" : "close";
@@ -1381,7 +1422,7 @@ static void send_http_error(struct mg_connection *conn, int status,
len += mg_vsnprintf(conn, buf + len, sizeof(buf) - len, fmt, ap);
va_end(ap);
}
- DEBUG_TRACE(("[%s]", buf));
+ DEBUG_TRACE("[%s]", buf);
mg_printf(conn, "HTTP/1.1 %d %s\r\n"
"Content-Length: %d\r\n"
@@ -1408,7 +1449,18 @@ static int pthread_mutex_destroy(pthread_mutex_t *mutex)
static int pthread_mutex_lock(pthread_mutex_t *mutex)
{
- return WaitForSingleObject(*mutex, INFINITE) == WAIT_OBJECT_0? 0 : -1;
+ return WaitForSingleObject(*mutex, INFINITE) == WAIT_OBJECT_0 ? 0 : -1;
+}
+
+static int pthread_mutex_trylock(pthread_mutex_t *mutex)
+{
+ switch (WaitForSingleObject(*mutex, 0)) {
+ case WAIT_OBJECT_0:
+ return 0;
+ case WAIT_TIMEOUT:
+ return -2; /* EBUSY */
+ }
+ return -1;
}
static int pthread_mutex_unlock(pthread_mutex_t *mutex)
@@ -1853,7 +1905,7 @@ static int mg_join_thread(pthread_t threadid)
int err;
err = GetLastError();
- DEBUG_TRACE(("WaitForSingleObject() failed, error %d", err));
+ DEBUG_TRACE("WaitForSingleObject() failed, error %d", err);
} else {
if (dwevent == WAIT_OBJECT_0) {
CloseHandle(threadid);
@@ -1959,7 +2011,7 @@ static pid_t spawn_process(struct mg_connection *conn, const char *prog,
mg_snprintf(conn, cmdline, sizeof(cmdline), "%s%s\"%s\\%s\"",
interp, interp[0] == '\0' ? "" : " ", full_dir, prog);
- DEBUG_TRACE(("Running [%s]", cmdline));
+ DEBUG_TRACE("Running [%s]", cmdline);
if (CreateProcessA(NULL, cmdline, NULL, NULL, TRUE,
CREATE_NEW_PROCESS_GROUP, envblk, NULL, &si, &pi) == 0) {
mg_cry(conn, "%s: CreateProcess(%s): %ld",
@@ -2002,9 +2054,9 @@ static int mg_stat(struct mg_connection *conn, const char *path,
static void set_close_on_exec(int fd, struct mg_connection *conn /* may be null */)
{
if (fcntl(fd, F_SETFD, FD_CLOEXEC) != 0) {
- if (conn)
- mg_cry(conn, "%s: fcntl(F_SETFD FD_CLOEXEC) failed: %s",
- __func__, strerror(ERRNO));
+ if (conn) {
+ mg_cry(conn, "%s: fcntl(F_SETFD FD_CLOEXEC) failed: %s", __func__, strerror(ERRNO));
+ }
}
}
@@ -2031,8 +2083,7 @@ int mg_start_thread(mg_thread_func_t func, void *param)
/* Start a thread storing the thread context. */
-static int mg_start_thread_with_id(mg_thread_func_t func, void *param,
- pthread_t *threadidptr)
+static int mg_start_thread_with_id(mg_thread_func_t func, void *param, pthread_t *threadidptr)
{
pthread_t thread_id;
pthread_attr_t attr;
@@ -2128,8 +2179,7 @@ static int set_non_blocking_mode(SOCKET sock)
/* Write data to the IO channel - opened file descriptor, socket or SSL
descriptor. Return number of bytes written. */
-static int64_t push(FILE *fp, SOCKET sock, SSL *ssl, const char *buf,
- int64_t len)
+static int64_t push(FILE *fp, SOCKET sock, SSL *ssl, const char *buf, int64_t len)
{
int64_t sent;
int n, k;
@@ -2207,7 +2257,25 @@ static int pull_all(FILE *fp, struct mg_connection *conn, char *buf, int len)
return nread;
}
-int mg_read(struct mg_connection *conn, void *buf, size_t len)
+static void fast_forward_request(struct mg_connection *conn)
+{
+ char buf[MG_BUF_LEN];
+ int to_read, nread;
+
+ while (conn->consumed_content < conn->content_len) {
+ to_read = sizeof(buf);
+ if ((int64_t) to_read > conn->content_len - conn->consumed_content) {
+ to_read = (int) (conn->content_len - conn->consumed_content);
+ }
+
+ nread = mg_read(conn, buf, to_read);
+ if (nread <= 0) {
+ break;
+ }
+ }
+}
+
+int mg_read_inner(struct mg_connection *conn, void *buf, size_t len)
{
int64_t n, buffered_len, nread;
const char *body;
@@ -2248,6 +2316,43 @@ int mg_read(struct mg_connection *conn, void *buf, size_t len)
return nread;
}
+static int mg_getc(struct mg_connection *conn) {
+ char c;
+ conn->content_len++;
+ if ( mg_read_inner(conn,&c,1) <= 0 ) return EOF;
+ return c;
+}
+
+int mg_read(struct mg_connection *conn, void *buf, size_t len) {
+ if ( conn->is_chunked ) {
+ if (conn->content_len <= 0 ) conn->content_len = 0;
+ if (conn->consumed_content < conn->content_len) return mg_read_inner(conn,buf,len);
+ int i = 0;
+ char str[64];
+ while (1) {
+ int c = mg_getc(conn);
+ if (c == EOF) return EOF;
+ if ( ! ( c == '\n' || c == '\r' ) ) {
+ str[i++] = c;
+ break;
+ }
+ }
+ for (; i < (int)sizeof(str); i++) {
+ int c = mg_getc(conn);
+ if ( c == EOF ) return -1;
+ str[i] = (char) c;
+ if ( i > 0 && str[i] == '\n' && str[i-1] == '\r' ) break;
+ }
+ char *end = 0;
+ long chunkSize = strtol(str,&end,16);
+ if ( end != str+(i-1) ) return -1;
+ if ( chunkSize == 0 ) return 0;
+ conn->content_len += chunkSize;
+ }
+ return mg_read_inner(conn,buf,len);
+}
+
+
int mg_write(struct mg_connection *conn, const void *buf, size_t len)
{
time_t now;
@@ -2381,7 +2486,7 @@ int mg_url_decode(const char *src, int src_len, char *dst,
#define HEXTOI(x) (isdigit(x) ? x - '0' : x - 'W')
for (i = j = 0; i < src_len && j < dst_len - 1; i++, j++) {
- if (src[i] == '%' && i < src_len - 2 &&
+ if (i < src_len - 2 && src[i] == '%' &&
isxdigit(* (const unsigned char *) (src + i + 1)) &&
isxdigit(* (const unsigned char *) (src + i + 2))) {
a = tolower(* (const unsigned char *) (src + i + 1));
@@ -2656,10 +2761,11 @@ static int get_request_len(const char *buf, int buflen)
{
const char *s, *e;
int len = 0;
+ int in_content = 0;
- for (s = buf, e = s + buflen - 1; len <= 0 && s < e; s++)
+ for (s = buf, e = s + buflen - 1; len <= 0 && s < e; s++) {
/* Control characters are not allowed but >=128 is. */
- if (!isprint(* (const unsigned char *) s) && *s != '\r' &&
+ if (!in_content && !isprint(* (const unsigned char *) s) && *s != '\r' &&
*s != '\n' && * (const unsigned char *) s < 128) {
len = -1;
break; /* [i_a] abort scan as soon as one malformed character is
@@ -2670,8 +2776,14 @@ static int get_request_len(const char *buf, int buflen)
} else if (s[0] == '\n' && &s[1] < e &&
s[1] == '\r' && s[2] == '\n') {
len = (int) (s - buf) + 3;
+ in_content = 0;
}
+ if (!in_content && *s == ':') {
+ in_content = 1;
+ }
+ }
+
return len;
}
@@ -2724,6 +2836,7 @@ static time_t parse_date_string(const char *datetime)
return result;
}
+#ifndef RGW
/* Protect against directory disclosure attack by removing '..',
excessive '/' and '\' characters */
static void remove_double_dots_and_double_slashes(char *s)
@@ -2747,6 +2860,7 @@ static void remove_double_dots_and_double_slashes(char *s)
}
*p = '\0';
}
+#endif
static const struct {
const char *extension;
@@ -2976,9 +3090,11 @@ static void open_auth_file(struct mg_connection *conn, const char *path,
}
} else {
/* Try to find .htpasswd in requested directory. */
- for (p = path, e = p + strlen(p) - 1; e > p; e--)
- if (e[0] == '/')
+ for (p = path, e = p + strlen(p) - 1; e > p; e--) {
+ if (e[0] == '/') {
break;
+ }
+ }
mg_snprintf(conn, name, sizeof(name), "%.*s%c%s",
(int) (e - p), p, '/', PASSWORDS_FILE_NAME);
if (!mg_fopen(conn, name, "r", filep)) {
@@ -3108,33 +3224,98 @@ static char *mg_fgets(char *buf, size_t size, struct file *filep, char **p)
}
}
-/* Authorize against the opened passwords file. Return 1 if authorized. */
-static int authorize(struct mg_connection *conn, struct file *filep)
-{
+struct read_auth_file_struct {
+ struct mg_connection *conn;
struct ah ah;
- char line[256], f_user[256] = "", ha1[256] = "", f_domain[256] = "", buf[MG_BUF_LEN], *p;
+ char *domain;
+ char buf[256+256+40];
+ char *f_user;
+ char *f_domain;
+ char *f_ha1;
+};
- if (!parse_auth_header(conn, buf, sizeof(buf), &ah)) {
- return 0;
- }
+static int read_auth_file(struct file *filep, struct read_auth_file_struct * workdata)
+{
+ char *p;
+ int is_authorized = 0;
+ struct file fp;
+ int l;
/* Loop over passwords file */
p = (char *) filep->membuf;
- while (mg_fgets(line, sizeof(line), filep, &p) != NULL) {
- if (sscanf(line, "%255[^:]:%255[^:]:%255s", f_user, f_domain, ha1) != 3) {
+ while (mg_fgets(workdata->buf, sizeof(workdata->buf), filep, &p) != NULL) {
+
+ l = strlen(workdata->buf);
+ while (l>0) {
+ if (isspace(workdata->buf[l-1]) || iscntrl(workdata->buf[l-1])) {
+ l--;
+ workdata->buf[l] = 0;
+ } else break;
+ }
+ if (l<1) continue;
+
+ workdata->f_user = workdata->buf;
+
+ if (workdata->f_user[0]==':') {
+ /* user names may not contain a ':' and may not be empty,
+ so lines starting with ':' may be used for a special purpose */
+ if (workdata->f_user[1]=='#') {
+ /* :# is a comment */
+ continue;
+ } else if (!strncmp(workdata->f_user+1,"include=",8)) {
+ if (mg_fopen(workdata->conn, workdata->f_user+9, "r", &fp)) {
+ is_authorized = read_auth_file(&fp, workdata);
+ mg_fclose(&fp);
+ } else {
+ mg_cry(workdata->conn, "%s: cannot open authorization file: %s", __func__, workdata->buf);
+ }
+ continue;
+ }
+ /* everything is invalid for the moment (might change in the future) */
+ mg_cry(workdata->conn, "%s: syntax error in authorization file: %s", __func__, workdata->buf);
continue;
}
- f_user[255]=0;
- f_domain[255]=0;
- ha1[255]=0;
- if (!strcmp(ah.user, f_user) &&
- !strcmp(conn->ctx->config[AUTHENTICATION_DOMAIN], f_domain))
- return check_password(conn->request_info.request_method, ha1, ah.uri,
- ah.nonce, ah.nc, ah.cnonce, ah.qop, ah.response);
+ workdata->f_domain = strchr(workdata->f_user, ':');
+ if (workdata->f_domain == NULL) {
+ mg_cry(workdata->conn, "%s: syntax error in authorization file: %s", __func__, workdata->buf);
+ continue;
+ }
+ *(workdata->f_domain) = 0;
+ (workdata->f_domain)++;
+
+ workdata->f_ha1 = strchr(workdata->f_domain, ':');
+ if (workdata->f_ha1 == NULL) {
+ mg_cry(workdata->conn, "%s: syntax error in authorization file: %s", __func__, workdata->buf);
+ continue;
+ }
+ *(workdata->f_ha1) = 0;
+ (workdata->f_ha1)++;
+
+ if (!strcmp(workdata->ah.user, workdata->f_user) && !strcmp(workdata->domain, workdata->f_domain)) {
+ return check_password(workdata->conn->request_info.request_method, workdata->f_ha1, workdata->ah.uri,
+ workdata->ah.nonce, workdata->ah.nc, workdata->ah.cnonce, workdata->ah.qop, workdata->ah.response);
+ }
}
- return 0;
+ return is_authorized;
+}
+
+/* Authorize against the opened passwords file. Return 1 if authorized. */
+static int authorize(struct mg_connection *conn, struct file *filep)
+{
+ struct read_auth_file_struct workdata;
+ char buf[MG_BUF_LEN];
+
+ memset(&workdata,0,sizeof(workdata));
+ workdata.conn = conn;
+
+ if (!parse_auth_header(conn, buf, sizeof(buf), &workdata.ah)) {
+ return 0;
+ }
+ workdata.domain = conn->ctx->config[AUTHENTICATION_DOMAIN];
+
+ return read_auth_file(filep, &workdata);
}
/* Return 1 if request is authorised, 0 otherwise. */
@@ -3172,16 +3353,16 @@ static int check_authorization(struct mg_connection *conn, const char *path)
static void send_authorization_request(struct mg_connection *conn)
{
- char date[64];
- time_t curtime = time(NULL);
- unsigned long nonce = (unsigned long)(conn->ctx->start_time);
-
- (void)pthread_mutex_lock(&conn->ctx->mutex);
+ char date[64];
+ time_t curtime = time(NULL);
+ unsigned long nonce = (unsigned long)(conn->ctx->start_time);
+
+ (void)pthread_mutex_lock(&conn->ctx->nonce_mutex);
nonce += conn->ctx->nonce_count;
++conn->ctx->nonce_count;
- (void)pthread_mutex_unlock(&conn->ctx->mutex);
-
- nonce ^= (unsigned long)(conn->ctx);
+ (void)pthread_mutex_unlock(&conn->ctx->nonce_mutex);
+
+ nonce ^= (unsigned long)(conn->ctx);
conn->status_code = 401;
conn->must_close = 1;
@@ -3215,8 +3396,8 @@ static int is_authorized_for_put(struct mg_connection *conn)
int mg_modify_passwords_file(const char *fname, const char *domain,
const char *user, const char *pass)
{
- int found;
- char line[512], u[512] = "", d[512] ="", ha1[33], tmp[PATH_MAX+1];
+ int found, i;
+ char line[512], u[512] = "", d[512] ="", ha1[33], tmp[PATH_MAX+8];
FILE *fp, *fp2;
found = 0;
@@ -3227,6 +3408,25 @@ int mg_modify_passwords_file(const char *fname, const char *domain,
pass = NULL;
}
+ /* Other arguments must not be empty */
+ if (fname == NULL || domain == NULL || user == NULL) return 0;
+
+ /* Using the given file format, user name and domain must not contain ':' */
+ if (strchr(user, ':') != NULL) return 0;
+ if (strchr(domain, ':') != NULL) return 0;
+
+ /* Do not allow control characters like newline in user name and domain.
+ Do not allow excessively long names either. */
+ for (i=0; i<255 && user[i]!=0; i++) {
+ if (iscntrl(user[i])) return 0;
+ }
+ if (user[i]) return 0;
+ for (i=0; i<255 && domain[i]!=0; i++) {
+ if (iscntrl(domain[i])) return 0;
+ }
+ if (domain[i]) return 0;
+
+ /* Create a temporary file name */
(void) snprintf(tmp, sizeof(tmp) - 1, "%s.tmp", fname);
tmp[sizeof(tmp) - 1] = 0;
@@ -3822,6 +4022,7 @@ static int parse_http_message(char *buf, int len, struct mg_request_info *ri)
/* HTTP message could be either HTTP request or HTTP response, e.g.
"GET / HTTP/1.0 ...." or "HTTP/1.0 200 OK ..." */
+#ifndef RGW
is_request = is_valid_http_method(ri->request_method);
if ((is_request && memcmp(ri->http_version, "HTTP/", 5) != 0) ||
(!is_request && memcmp(ri->request_method, "HTTP/", 5) != 0)) {
@@ -3832,6 +4033,17 @@ static int parse_http_message(char *buf, int len, struct mg_request_info *ri)
}
parse_http_headers(&buf, ri);
}
+#else
+ is_request = (memcmp(ri->http_version, "HTTP/", 5) == 0);
+ if (is_request) {
+ ri->http_version += 5;
+ }
+ if (is_request || memcmp(ri->request_method, "HTTP/", 5) == 0) {
+ parse_http_headers(&buf, ri);
+ } else {
+ request_length = -1;
+ }
+#endif
}
return request_length;
}
@@ -3928,7 +4140,7 @@ static int forward_body_data(struct mg_connection *conn, FILE *fp,
expect = mg_get_header(conn, "Expect");
assert(fp != NULL);
- if (conn->content_len == -1) {
+ if (conn->content_len == -1 && !conn->is_chunked) {
send_http_error(conn, 411, "Length Required", "%s", "");
} else if (expect != NULL && mg_strcasecmp(expect, "100-continue")) {
send_http_error(conn, 417, "Expectation Failed", "%s", "");
@@ -4235,7 +4447,7 @@ static void handle_cgi_request(struct mg_connection *conn, const char *prog)
Do not send anything back to client, until we buffer in all
HTTP headers. */
data_len = 0;
- buf = mg_malloc(buflen);
+ buf = (char *)mg_malloc(buflen);
if (buf == NULL) {
send_http_error(conn, 500, http_500_error,
"Not enough memory for buffer (%u bytes)",
@@ -4343,7 +4555,7 @@ static int put_dir(struct mg_connection *conn, const char *path)
buf[len] = '\0';
/* Try to create intermediate directory */
- DEBUG_TRACE(("mkdir(%s)", buf));
+ DEBUG_TRACE("mkdir(%s)", buf);
if (!mg_stat(conn, buf, &file) && mg_mkdir(buf, 0755) != 0) {
res = -1;
break;
@@ -4362,7 +4574,7 @@ static void mkcol(struct mg_connection *conn, const char *path)
{
int rc, body_len;
struct de de;
- char date[64];
+ char date[64];
time_t curtime = time(NULL);
memset(&de.file, 0, sizeof(de.file));
@@ -4413,7 +4625,7 @@ static void put_file(struct mg_connection *conn, const char *path)
const char *range;
int64_t r1, r2;
int rc;
- char date[64];
+ char date[64];
time_t curtime = time(NULL);
conn->status_code = mg_stat(conn, path, &file) ? 200 : 201;
@@ -4687,7 +4899,7 @@ static void handle_propfind(struct mg_connection *conn, const char *path,
struct file *filep)
{
const char *depth = mg_get_header(conn, "Depth");
- char date[64];
+ char date[64];
time_t curtime = time(NULL);
gmt_time_string(date, sizeof(date), &curtime);
@@ -4717,16 +4929,30 @@ static void handle_propfind(struct mg_connection *conn, const char *path,
conn->num_bytes_sent += mg_printf(conn, "%s\n", "</d:multistatus>");
}
-void mg_lock(struct mg_connection* conn)
+void mg_lock_connection(struct mg_connection* conn)
{
(void) pthread_mutex_lock(&conn->mutex);
}
-void mg_unlock(struct mg_connection* conn)
+void mg_unlock_connection(struct mg_connection* conn)
{
(void) pthread_mutex_unlock(&conn->mutex);
}
+void mg_lock_context(struct mg_context* ctx)
+{
+ (void) pthread_mutex_lock(&ctx->nonce_mutex);
+}
+
+void mg_unlock_context(struct mg_context* ctx)
+{
+ (void) pthread_mutex_unlock(&ctx->nonce_mutex);
+}
+
+#if defined(USE_TIMERS)
+#include "timer.inl"
+#endif /* USE_TIMERS */
+
#ifdef USE_LUA
#include "mod_lua.inl"
#endif /* USE_LUA */
@@ -5015,9 +5241,9 @@ static void read_websocket(struct mg_connection *conn)
/* Copy the mask before we shift the queue and destroy it */
if (mask_len > 0) {
- *(uint32_t*)mask = *(uint32_t*)(buf + header_len - mask_len);
+ memcpy(mask, buf + header_len - mask_len, sizeof(mask));
} else {
- *(uint32_t*)mask = 0;
+ memset(mask, 0, sizeof(mask));
}
/* Read frame payload from the first message in the queue into
@@ -5075,7 +5301,7 @@ static void read_websocket(struct mg_connection *conn)
!conn->ctx->callbacks.websocket_data(conn, mop, data, data_len)) ||
#ifdef USE_LUA
(conn->lua_websocket_state &&
- !lua_websocket_data(conn, mop, data, data_len)) ||
+ !lua_websocket_data(conn, conn->lua_websocket_state, mop, data, data_len)) ||
#endif
(buf[0] & 0xf) == WEBSOCKET_OPCODE_CONNECTION_CLOSE) { /* Opcode == 8, connection close */
break;
@@ -5130,10 +5356,10 @@ int mg_websocket_write(struct mg_connection* conn, int opcode, const char* data,
but mongoose's mg_printf/mg_write is not (because of the loop in
push(), although that is only a problem if the packet is large or
outgoing buffer is full). */
- (void) mg_lock(conn);
+ (void) mg_lock_connection(conn);
retval = mg_write(conn, header, headerLen);
retval = mg_write(conn, data, dataLen);
- mg_unlock(conn);
+ mg_unlock_connection(conn);
return retval;
}
@@ -5142,7 +5368,7 @@ static void handle_websocket_request(struct mg_connection *conn, const char *pat
{
const char *version = mg_get_header(conn, "Sec-WebSocket-Version");
#ifdef USE_LUA
- int lua_websock, shared_lua_websock = 0;
+ int lua_websock = 0;
/* TODO: A websocket script may be shared between several clients, allowing them to communicate
directly instead of writing to a data base and polling the data base. */
#endif
@@ -5155,17 +5381,17 @@ static void handle_websocket_request(struct mg_connection *conn, const char *pat
/* The C callback is called before Lua and may prevent Lua from handling the websocket. */
} else {
#ifdef USE_LUA
- lua_websock = conn->ctx->config[LUA_WEBSOCKET_EXTENSIONS] ?
- match_prefix(conn->ctx->config[LUA_WEBSOCKET_EXTENSIONS],
+ if (conn->ctx->config[LUA_WEBSOCKET_EXTENSIONS]) {
+ lua_websock = match_prefix(conn->ctx->config[LUA_WEBSOCKET_EXTENSIONS],
(int)strlen(conn->ctx->config[LUA_WEBSOCKET_EXTENSIONS]),
- path) : 0;
+ path);
+ }
- if (lua_websock || shared_lua_websock) {
- /* TODO */ shared_lua_websock = 0;
- conn->lua_websocket_state = lua_websocket_new(path, conn, !!shared_lua_websock);
+ if (lua_websock) {
+ conn->lua_websocket_state = lua_websocket_new(path, conn);
if (conn->lua_websocket_state) {
send_websocket_handshake(conn);
- if (lua_websocket_ready(conn)) {
+ if (lua_websocket_ready(conn, conn->lua_websocket_state)) {
read_websocket(conn);
}
}
@@ -5295,6 +5521,7 @@ int mg_upload(struct mg_connection *conn, const char *destination_dir)
assert(len >= 0 && len <= (int) sizeof(buf));
while ((n = mg_read(conn, buf + len, sizeof(buf) - len)) > 0) {
len += n;
+ assert(len <= (int) sizeof(buf));
}
if ((headers_len = get_request_len(buf, len)) <= 0) {
break;
@@ -5422,7 +5649,7 @@ static void redirect_to_https_port(struct mg_connection *conn, int ssl_index)
void mg_set_request_handler(struct mg_context *ctx, const char *uri, mg_request_handler handler, void *cbdata)
{
- struct mg_request_handler_info *tmp_rh, *lastref = 0;
+ struct mg_request_handler_info *tmp_rh, *lastref = NULL;
size_t urilen = strlen(uri);
/* first see it the uri exists */
@@ -5507,10 +5734,10 @@ static int use_request_handler(struct mg_connection *conn)
return tmp_rh->handler(conn, tmp_rh->cbdata);
}
-
- /* try for pattern match */
- if (match_prefix(tmp_rh->uri, tmp_rh->uri_len, uri) > 0) {
- return tmp_rh->handler(conn, tmp_rh->cbdata);
+
+ /* try for pattern match */
+ if (match_prefix(tmp_rh->uri, tmp_rh->uri_len, uri) > 0) {
+ return tmp_rh->handler(conn, tmp_rh->cbdata);
}
}
@@ -5528,21 +5755,28 @@ static void handle_request(struct mg_connection *conn)
char path[PATH_MAX];
int uri_len, ssl_index, is_script_resource;
struct file file = STRUCT_FILE_INITIALIZER;
- char date[64];
+ char date[64];
time_t curtime = time(NULL);
if ((conn->request_info.query_string = strchr(ri->uri, '?')) != NULL) {
* ((char *) conn->request_info.query_string++) = '\0';
}
uri_len = (int) strlen(ri->uri);
- mg_url_decode(ri->uri, uri_len, (char *) ri->uri, uri_len + 1, 0);
+
+ if (should_decode_url(conn)) {
+ mg_url_decode(ri->uri, uri_len, (char *) ri->uri, uri_len + 1, 0);
+ }
+
+#ifndef RGW
remove_double_dots_and_double_slashes((char *) ri->uri);
+#endif
+
path[0] = '\0';
convert_uri_to_file_name(conn, path, sizeof(path), &file, &is_script_resource);
conn->throttle = set_throttle(conn->ctx->config[THROTTLE],
get_remote_ip(conn), ri->uri);
- DEBUG_TRACE(("%s", ri->uri));
+ DEBUG_TRACE("%s", ri->uri);
/* Perform redirect and auth checks before calling begin_request() handler.
Otherwise, begin_request() would need to perform auth checks and
redirects. */
@@ -5555,6 +5789,7 @@ static void handle_request(struct mg_connection *conn)
} else if (conn->ctx->callbacks.begin_request != NULL &&
conn->ctx->callbacks.begin_request(conn)) {
/* Do nothing, callback has served the request */
+ fast_forward_request(conn);
#if defined(USE_WEBSOCKET)
} else if (is_websocket_request(conn)) {
handle_websocket_request(conn, path, is_script_resource);
@@ -5630,7 +5865,7 @@ static void handle_file_based_request(struct mg_connection *conn, const char *pa
(int)strlen(conn->ctx->config[LUA_SERVER_PAGE_EXTENSIONS]),
path) > 0) {
/* Lua server page: an SSI like page containing mostly plain html code plus some tags with server generated contents. */
- handle_lsp_request(conn, path, &file, NULL);
+ handle_lsp_request(conn, path, file, NULL);
} else if (match_prefix(conn->ctx->config[LUA_SCRIPT_EXTENSIONS],
(int)strlen(conn->ctx->config[LUA_SCRIPT_EXTENSIONS]),
path) > 0) {
@@ -5663,9 +5898,9 @@ static void close_all_listening_sockets(struct mg_context *ctx)
ctx->listening_sockets[i].sock = INVALID_SOCKET;
}
mg_free(ctx->listening_sockets);
- ctx->listening_sockets=0;
+ ctx->listening_sockets = NULL;
mg_free(ctx->listening_ports);
- ctx->listening_ports=0;
+ ctx->listening_ports = NULL;
}
static int is_valid_port(unsigned int port)
@@ -5695,7 +5930,7 @@ static int parse_port_string(const struct vec *vec, struct socket *so)
so->lsa.sin.sin_addr.s_addr = htonl((a << 24) | (b << 16) | (c << 8) | d);
so->lsa.sin.sin_port = htons((uint16_t) port);
#if defined(USE_IPV6)
- } else if (sscanf(vec->ptr, "[%49[^]]]:%d%n", buf, &port, &len) == 2 &&
+ } else if (sscanf(vec->ptr, "[%49[^]]]:%u%n", buf, &port, &len) == 2 &&
inet_pton(AF_INET6, buf, &so->lsa.sin6.sin6_addr)) {
/* IPv6 address, e.g. [3ffe:2a00:100:7031::1]:8080 */
so->lsa.sin6.sin6_family = AF_INET6;
@@ -5776,6 +6011,7 @@ static int set_ports_option(struct mg_context *ctx)
sizeof(ctx->listening_ports[0]))) == NULL) {
closesocket(so.sock);
so.sock = INVALID_SOCKET;
+ mg_free(ptr);
success = 0;
}
else {
@@ -5795,15 +6031,14 @@ static int set_ports_option(struct mg_context *ctx)
return success;
}
-static void log_header(const struct mg_connection *conn, const char *header,
- FILE *fp)
+static const char* header_val(const struct mg_connection *conn, const char *header)
{
const char *header_value;
if ((header_value = mg_get_header(conn, header)) == NULL) {
- (void) fprintf(fp, "%s", " -");
+ return "-";
} else {
- (void) fprintf(fp, " \"%s\"", header_value);
+ return header_value;
}
}
@@ -5814,10 +6049,15 @@ static void log_access(const struct mg_connection *conn)
char date[64], src_addr[IP_ADDR_STR_LEN];
struct tm *tm;
+ const char *referer;
+ const char *user_agent;
+
+ char buf[4096];
+
fp = conn->ctx->config[ACCESS_LOG_FILE] == NULL ? NULL :
fopen(conn->ctx->config[ACCESS_LOG_FILE], "a+");
- if (fp == NULL)
+ if (fp == NULL && conn->ctx->callbacks.log_message == NULL)
return;
tm = localtime(&conn->birth_time);
@@ -5829,21 +6069,30 @@ static void log_access(const struct mg_connection *conn)
}
ri = &conn->request_info;
- flockfile(fp);
sockaddr_to_string(src_addr, sizeof(src_addr), &conn->client.rsa);
- fprintf(fp, "%s - %s [%s] \"%s %s HTTP/%s\" %d %" INT64_FMT,
+ referer = header_val(conn, "Referer");
+ user_agent = header_val(conn, "User-Agent");
+
+ snprintf(buf, sizeof(buf), "%s - %s [%s] \"%s %s HTTP/%s\" %d %" INT64_FMT " %s %s",
src_addr, ri->remote_user == NULL ? "-" : ri->remote_user, date,
ri->request_method ? ri->request_method : "-",
ri->uri ? ri->uri : "-", ri->http_version,
- conn->status_code, conn->num_bytes_sent);
- log_header(conn, "Referer", fp);
- log_header(conn, "User-Agent", fp);
- fputc('\n', fp);
- fflush(fp);
+ conn->status_code, conn->num_bytes_sent,
+ referer, user_agent);
- funlockfile(fp);
- fclose(fp);
+ if (conn->ctx->callbacks.log_access) {
+ conn->ctx->callbacks.log_access(conn, buf);
+ }
+
+ if (fp) {
+ flockfile(fp);
+ fprintf(fp, "%s", buf);
+ fputc('\n', fp);
+ fflush(fp);
+ funlockfile(fp);
+ fclose(fp);
+ }
}
/* Verify given socket address against the ACL.
@@ -6072,6 +6321,7 @@ static void reset_per_request_attributes(struct mg_connection *conn)
conn->num_bytes_sent = conn->consumed_content = 0;
conn->status_code = -1;
conn->must_close = conn->request_len = conn->throttle = 0;
+ conn->is_chunked = 0;
}
static void close_socket_gracefully(struct mg_connection *conn)
@@ -6116,7 +6366,8 @@ static void close_connection(struct mg_connection *conn)
{
#if defined(USE_LUA) && defined(USE_WEBSOCKET)
if (conn->lua_websocket_state) {
- lua_websocket_close(conn);
+ lua_websocket_close(conn, conn->lua_websocket_state);
+ conn->lua_websocket_state = NULL;
}
#endif
@@ -6124,7 +6375,7 @@ static void close_connection(struct mg_connection *conn)
if (conn->ctx->callbacks.connection_close != NULL)
conn->ctx->callbacks.connection_close(conn);
- mg_lock(conn);
+ mg_lock_connection(conn);
conn->must_close = 1;
@@ -6141,7 +6392,7 @@ static void close_connection(struct mg_connection *conn)
conn->client.sock = INVALID_SOCKET;
}
- mg_unlock(conn);
+ mg_unlock_connection(conn);
}
void mg_close_connection(struct mg_connection *conn)
@@ -6156,7 +6407,7 @@ void mg_close_connection(struct mg_connection *conn)
mg_free(conn);
}
-struct mg_connection *mg_connect(const char *host, int port, int use_ssl,
+static struct mg_connection *mg_connect(const char *host, int port, int use_ssl,
char *ebuf, size_t ebuf_len)
{
static struct mg_context fake_ctx;
@@ -6169,13 +6420,11 @@ struct mg_connection *mg_connect(const char *host, int port, int use_ssl,
mg_calloc(1, sizeof(*conn) + MAX_REQUEST_SIZE)) == NULL) {
snprintf(ebuf, ebuf_len, "calloc(): %s", strerror(ERRNO));
closesocket(sock);
- sock = INVALID_SOCKET;
#ifndef NO_SSL
} else if (use_ssl && (conn->client_ssl_ctx =
SSL_CTX_new(SSLv23_client_method())) == NULL) {
snprintf(ebuf, ebuf_len, "SSL_CTX_new error");
closesocket(sock);
- sock = INVALID_SOCKET;
mg_free(conn);
conn = NULL;
#endif /* NO_SSL */
@@ -6213,11 +6462,12 @@ static int is_valid_uri(const char *uri)
return uri[0] == '/' || (uri[0] == '*' && uri[1] == '\0');
}
-static int getreq(struct mg_connection *conn, char *ebuf, size_t ebuf_len)
+static int getreq(struct mg_connection *conn, char *ebuf, size_t ebuf_len, int *err)
{
const char *cl;
ebuf[0] = '\0';
+ *err = 0;
reset_per_request_attributes(conn);
conn->request_len = read_request(NULL, conn, conn->buf, conn->buf_size,
&conn->data_len);
@@ -6225,16 +6475,30 @@ static int getreq(struct mg_connection *conn, char *ebuf, size_t ebuf_len)
if (conn->request_len == 0 && conn->data_len == conn->buf_size) {
snprintf(ebuf, ebuf_len, "%s", "Request Too Large");
+ *err = 400;
+ return 0;
} else if (conn->request_len <= 0) {
snprintf(ebuf, ebuf_len, "%s", "Client closed connection");
+ return 0;
} else if (parse_http_message(conn->buf, conn->buf_size,
&conn->request_info) <= 0) {
snprintf(ebuf, ebuf_len, "Bad request: [%.*s]", conn->data_len, conn->buf);
+ *err = 400;
+ return 0;
} else {
/* Message is a valid request or response */
- if ((cl = get_header(&conn->request_info, "Content-Length")) != NULL) {
+ if (( cl = get_header(&conn->request_info, "Transfer-encoding")) != NULL && strcmp(cl,"chunked") == 0) {
+ conn->is_chunked = 1;
+ conn->content_len = 0;
+ } else if ((cl = get_header(&conn->request_info, "Content-Length")) != NULL) {
/* Request/response has content length set */
- conn->content_len = strtoll(cl, NULL, 10);
+ char *endptr;
+ conn->content_len = strtoll(cl, &endptr, 10);
+ if (endptr == cl) {
+ snprintf(ebuf, ebuf_len, "%s", "Bad Request");
+ *err = 400;
+ return 0;
+ }
} else if (!mg_strcasecmp(conn->request_info.request_method, "POST") ||
!mg_strcasecmp(conn->request_info.request_method, "PUT")) {
/* POST or PUT request without content length set */
@@ -6248,7 +6512,7 @@ static int getreq(struct mg_connection *conn, char *ebuf, size_t ebuf_len)
}
conn->birth_time = time(NULL);
}
- return ebuf[0] == '\0';
+ return 1;
}
struct mg_connection *mg_download(const char *host, int port, int use_ssl,
@@ -6264,7 +6528,8 @@ struct mg_connection *mg_download(const char *host, int port, int use_ssl,
} else if (mg_vprintf(conn, fmt, ap) <= 0) {
snprintf(ebuf, ebuf_len, "%s", "Error sending request");
} else {
- getreq(conn, ebuf, ebuf_len);
+ int err;
+ getreq(conn, ebuf, ebuf_len, &err);
}
if (ebuf[0] != '\0' && conn != NULL) {
mg_close_connection(conn);
@@ -6282,14 +6547,16 @@ static void process_new_connection(struct mg_connection *conn)
char ebuf[100];
keep_alive_enabled = !strcmp(conn->ctx->config[ENABLE_KEEP_ALIVE], "yes");
- keep_alive = 0;
/* Important: on new connection, reset the receiving buffer. Credit goes
to crule42. */
conn->data_len = 0;
do {
- if (!getreq(conn, ebuf, sizeof(ebuf))) {
- send_http_error(conn, 500, "Server Error", "%s", ebuf);
+ int err;
+ if (!getreq(conn, ebuf, sizeof(ebuf), &err)) {
+ if (err > 0) {
+ send_http_error(conn, err, "Bad Request", "%s", ebuf);
+ }
conn->must_close = 1;
} else if (!is_valid_uri(conn->request_info.uri)) {
snprintf(ebuf, sizeof(ebuf), "Invalid URI: [%s]", ri->uri);
@@ -6336,12 +6603,12 @@ static void process_new_connection(struct mg_connection *conn)
/* Worker threads take accepted socket from the queue */
static int consume_socket(struct mg_context *ctx, struct socket *sp)
{
- (void) pthread_mutex_lock(&ctx->mutex);
- DEBUG_TRACE(("going idle"));
+ (void) pthread_mutex_lock(&ctx->thread_mutex);
+ DEBUG_TRACE("going idle");
/* If the queue is empty, wait. We're idle at this point. */
while (ctx->sq_head == ctx->sq_tail && ctx->stop_flag == 0) {
- pthread_cond_wait(&ctx->sq_full, &ctx->mutex);
+ pthread_cond_wait(&ctx->sq_full, &ctx->thread_mutex);
}
/* If we're stopping, sq_head may be equal to sq_tail. */
@@ -6349,7 +6616,7 @@ static int consume_socket(struct mg_context *ctx, struct socket *sp)
/* Copy socket from the queue and increment tail */
*sp = ctx->queue[ctx->sq_tail % ARRAY_SIZE(ctx->queue)];
ctx->sq_tail++;
- DEBUG_TRACE(("grabbed socket %d, going busy", sp->sock));
+ DEBUG_TRACE("grabbed socket %d, going busy", sp->sock);
/* Wrap pointers if needed */
while (ctx->sq_tail > (int) ARRAY_SIZE(ctx->queue)) {
@@ -6359,7 +6626,7 @@ static int consume_socket(struct mg_context *ctx, struct socket *sp)
}
(void) pthread_cond_signal(&ctx->sq_empty);
- (void) pthread_mutex_unlock(&ctx->mutex);
+ (void) pthread_mutex_unlock(&ctx->thread_mutex);
return !ctx->stop_flag;
}
@@ -6417,19 +6684,19 @@ static void *worker_thread_run(void *thread_func_param)
}
/* Signal master that we're done with connection and exiting */
- (void) pthread_mutex_lock(&ctx->mutex);
+ (void) pthread_mutex_lock(&ctx->thread_mutex);
ctx->num_threads--;
- (void) pthread_cond_signal(&ctx->cond);
+ (void) pthread_cond_signal(&ctx->thread_cond);
assert(ctx->num_threads >= 0);
- (void) pthread_mutex_unlock(&ctx->mutex);
+ (void) pthread_mutex_unlock(&ctx->thread_mutex);
- pthread_setspecific(sTlsKey, 0);
+ pthread_setspecific(sTlsKey, NULL);
#if defined(_WIN32) && !defined(__SYMBIAN32__)
CloseHandle(tls.pthread_cond_helper_mutex);
#endif
mg_free(conn);
- DEBUG_TRACE(("exiting"));
+ DEBUG_TRACE("exiting");
return NULL;
}
@@ -6452,23 +6719,23 @@ static void *worker_thread(void *thread_func_param)
/* Master thread adds accepted socket to a queue */
static void produce_socket(struct mg_context *ctx, const struct socket *sp)
{
- (void) pthread_mutex_lock(&ctx->mutex);
+ (void) pthread_mutex_lock(&ctx->thread_mutex);
/* If the queue is full, wait */
while (ctx->stop_flag == 0 &&
ctx->sq_head - ctx->sq_tail >= (int) ARRAY_SIZE(ctx->queue)) {
- (void) pthread_cond_wait(&ctx->sq_empty, &ctx->mutex);
+ (void) pthread_cond_wait(&ctx->sq_empty, &ctx->thread_mutex);
}
if (ctx->sq_head - ctx->sq_tail < (int) ARRAY_SIZE(ctx->queue)) {
/* Copy socket to the queue and increment head */
ctx->queue[ctx->sq_head % ARRAY_SIZE(ctx->queue)] = *sp;
ctx->sq_head++;
- DEBUG_TRACE(("queued socket %d", sp->sock));
+ DEBUG_TRACE("queued socket %d", sp->sock);
}
(void) pthread_cond_signal(&ctx->sq_full);
- (void) pthread_mutex_unlock(&ctx->mutex);
+ (void) pthread_mutex_unlock(&ctx->thread_mutex);
}
static int set_sock_timeout(SOCKET sock, int milliseconds)
@@ -6500,7 +6767,7 @@ static void accept_new_connection(const struct socket *listener,
so.sock = INVALID_SOCKET;
} else {
/* Put so socket structure into the queue */
- DEBUG_TRACE(("Accepted socket %d", (int) so.sock));
+ DEBUG_TRACE("Accepted socket %d", (int) so.sock);
set_close_on_exec(so.sock, fc(ctx));
so.is_ssl = listener->is_ssl;
so.ssl_redir = listener->ssl_redir;
@@ -6581,7 +6848,7 @@ static void master_thread_run(void *thread_func_param)
}
}
mg_free(pfd);
- DEBUG_TRACE(("stopping workers"));
+ DEBUG_TRACE("stopping workers");
/* Stop signal received: somebody called mg_stop. Quit. */
close_all_listening_sockets(ctx);
@@ -6590,11 +6857,11 @@ static void master_thread_run(void *thread_func_param)
pthread_cond_broadcast(&ctx->sq_full);
/* Wait until all threads finish */
- (void) pthread_mutex_lock(&ctx->mutex);
+ (void) pthread_mutex_lock(&ctx->thread_mutex);
while (ctx->num_threads > 0) {
- (void) pthread_cond_wait(&ctx->cond, &ctx->mutex);
+ (void) pthread_cond_wait(&ctx->thread_cond, &ctx->thread_mutex);
}
- (void) pthread_mutex_unlock(&ctx->mutex);
+ (void) pthread_mutex_unlock(&ctx->thread_mutex);
/* Join all worker threads to avoid leaking threads. */
workerthreadcount = ctx->workerthreadcount;
@@ -6605,12 +6872,12 @@ static void master_thread_run(void *thread_func_param)
#if !defined(NO_SSL)
uninitialize_ssl(ctx);
#endif
- DEBUG_TRACE(("exiting"));
+ DEBUG_TRACE("exiting");
#if defined(_WIN32) && !defined(__SYMBIAN32__)
CloseHandle(tls.pthread_cond_helper_mutex);
#endif
- pthread_setspecific(sTlsKey, 0);
+ pthread_setspecific(sTlsKey, NULL);
/* Signal mg_stop() that we're done.
WARNING: This must be the very last thing this
@@ -6619,7 +6886,6 @@ static void master_thread_run(void *thread_func_param)
}
/* Threads have different return types on Windows and Unix. */
-
#ifdef _WIN32
static unsigned __stdcall master_thread(void *thread_func_param)
{
@@ -6642,12 +6908,19 @@ static void free_context(struct mg_context *ctx)
if (ctx == NULL)
return;
- /* All threads exited, no sync is needed. Destroy mutex and condvars */
- (void) pthread_mutex_destroy(&ctx->mutex);
- (void) pthread_cond_destroy(&ctx->cond);
+ /* All threads exited, no sync is needed. Destroy thread mutex and condvars */
+ (void) pthread_mutex_destroy(&ctx->thread_mutex);
+ (void) pthread_cond_destroy(&ctx->thread_cond);
(void) pthread_cond_destroy(&ctx->sq_empty);
(void) pthread_cond_destroy(&ctx->sq_full);
+ /* Destroy other context global data structures mutex */
+ (void) pthread_mutex_destroy(&ctx->nonce_mutex);
+
+#if defined(USE_TIMERS)
+ timers_exit(ctx);
+#endif
+
/* Deallocate config parameters */
for (i = 0; i < NUM_OPTIONS; i++) {
if (ctx->config[i] != NULL)
@@ -6710,20 +6983,20 @@ void mg_stop(struct mg_context *ctx)
#endif /* _WIN32 && !__SYMBIAN32__ */
}
-void get_system_name(char **sysName)
+static void get_system_name(char **sysName)
{
#if defined(_WIN32)
#if !defined(__SYMBIAN32__)
char name[128];
- DWORD dwVersion = 0;
- DWORD dwMajorVersion = 0;
- DWORD dwMinorVersion = 0;
+ DWORD dwVersion = 0;
+ DWORD dwMajorVersion = 0;
+ DWORD dwMinorVersion = 0;
DWORD dwBuild = 0;
- dwVersion = GetVersion();
-
- dwMajorVersion = (DWORD)(LOBYTE(LOWORD(dwVersion)));
- dwMinorVersion = (DWORD)(HIBYTE(LOWORD(dwVersion)));
+ dwVersion = GetVersion();
+
+ dwMajorVersion = (DWORD)(LOBYTE(LOWORD(dwVersion)));
+ dwMinorVersion = (DWORD)(HIBYTE(LOWORD(dwVersion)));
dwBuild = ((dwVersion < 0x80000000) ? (DWORD)(HIWORD(dwVersion)) : 0);
sprintf(name, "Windows %d.%d", dwMajorVersion, dwMinorVersion);
@@ -6745,7 +7018,7 @@ struct mg_context *mg_start(const struct mg_callbacks *callbacks,
{
struct mg_context *ctx;
const char *name, *value, *default_value;
- int i;
+ int i, ok;
int workerthreadcount;
#if defined(_WIN32) && !defined(__SYMBIAN32__)
@@ -6767,6 +7040,7 @@ struct mg_context *mg_start(const struct mg_callbacks *callbacks,
if (sTlsInit==0) {
if (0 != pthread_key_create(&sTlsKey, NULL)) {
+ /* Fatal error - abort start. However, this situation should never occur in practice. */
mg_cry(fc(ctx), "Cannot initialize thread local storage");
mg_free(ctx);
return NULL;
@@ -6774,11 +7048,23 @@ struct mg_context *mg_start(const struct mg_callbacks *callbacks,
sTlsInit++;
}
+ ok = 0==pthread_mutex_init(&ctx->thread_mutex, NULL);
+ ok &= 0==pthread_cond_init(&ctx->thread_cond, NULL);
+ ok &= 0==pthread_cond_init(&ctx->sq_empty, NULL);
+ ok &= 0==pthread_cond_init(&ctx->sq_full, NULL);
+ ok &= 0==pthread_mutex_init(&ctx->nonce_mutex, NULL);
+ if (!ok) {
+ /* Fatal error - abort start. However, this situation should never occur in practice. */
+ mg_cry(fc(ctx), "Cannot initialize thread synchronization objects");
+ mg_free(ctx);
+ return NULL;
+ }
+
if (callbacks) {
ctx->callbacks = *callbacks;
}
ctx->user_data = user_data;
- ctx->request_handlers = 0;
+ ctx->request_handlers = NULL;
#if defined(USE_LUA) && defined(USE_WEBSOCKET)
ctx->shared_lua_websockets = 0;
@@ -6799,7 +7085,7 @@ struct mg_context *mg_start(const struct mg_callbacks *callbacks,
mg_free(ctx->config[i]);
}
ctx->config[i] = mg_strdup(value);
- DEBUG_TRACE(("[%s] -> [%s]", name, value));
+ DEBUG_TRACE("[%s] -> [%s]", name, value);
}
/* Set default value if needed */
@@ -6833,11 +7119,6 @@ struct mg_context *mg_start(const struct mg_callbacks *callbacks,
(void) signal(SIGPIPE, SIG_IGN);
#endif /* !_WIN32 && !__SYMBIAN32__ */
- (void) pthread_mutex_init(&ctx->mutex, NULL);
- (void) pthread_cond_init(&ctx->cond, NULL);
- (void) pthread_cond_init(&ctx->sq_empty, NULL);
- (void) pthread_cond_init(&ctx->sq_full, NULL);
-
workerthreadcount = atoi(ctx->config[NUM_THREADS]);
if (workerthreadcount > MAX_WORKER_THREADS) {
@@ -6848,7 +7129,7 @@ struct mg_context *mg_start(const struct mg_callbacks *callbacks,
if (workerthreadcount > 0) {
ctx->workerthreadcount = workerthreadcount;
- ctx->workerthreadids = mg_calloc(workerthreadcount, sizeof(pthread_t));
+ ctx->workerthreadids = (pthread_t *)mg_calloc(workerthreadcount, sizeof(pthread_t));
if (ctx->workerthreadids == NULL) {
mg_cry(fc(ctx), "Not enough memory for worker thread ID array");
free_context(ctx);
@@ -6856,19 +7137,27 @@ struct mg_context *mg_start(const struct mg_callbacks *callbacks,
}
}
+#if defined(USE_TIMERS)
+ if (timers_init(ctx) != 0) {
+ mg_cry(fc(ctx), "Error creating timers");
+ free_context(ctx);
+ return NULL;
+ }
+#endif
+
/* Start master (listening) thread */
mg_start_thread_with_id(master_thread, ctx, &ctx->masterthreadid);
/* Start worker threads */
for (i = 0; i < workerthreadcount; i++) {
- (void) pthread_mutex_lock(&ctx->mutex);
+ (void) pthread_mutex_lock(&ctx->thread_mutex);
ctx->num_threads++;
- (void) pthread_mutex_unlock(&ctx->mutex);
+ (void) pthread_mutex_unlock(&ctx->thread_mutex);
if (mg_start_thread_with_id(worker_thread, ctx,
&ctx->workerthreadids[i]) != 0) {
- (void) pthread_mutex_lock(&ctx->mutex);
+ (void) pthread_mutex_lock(&ctx->thread_mutex);
ctx->num_threads--;
- (void) pthread_mutex_unlock(&ctx->mutex);
+ (void) pthread_mutex_unlock(&ctx->thread_mutex);
mg_cry(fc(ctx), "Cannot start worker thread: %ld", (long) ERRNO);
}
}
diff --git a/src/client/Client.cc b/src/client/Client.cc
index 47d1c1d..6f57225 100644
--- a/src/client/Client.cc
+++ b/src/client/Client.cc
@@ -53,7 +53,7 @@ using namespace std;
#include "mon/MonClient.h"
-#include "mds/MDSMap.h"
+#include "mds/flock.h"
#include "osd/OSDMap.h"
#include "mon/MonMap.h"
@@ -149,6 +149,7 @@ Client::Client(Messenger *m, MonClient *mc)
logger(NULL),
m_command_hook(this),
timer(m->cct, client_lock),
+ switch_interrupt_cb(NULL),
ino_invalidate_cb(NULL),
ino_invalidate_cb_handle(NULL),
dentry_invalidate_cb(NULL),
@@ -157,6 +158,7 @@ Client::Client(Messenger *m, MonClient *mc)
getgroups_cb_handle(NULL),
async_ino_invalidator(m->cct),
async_dentry_invalidator(m->cct),
+ interrupt_finisher(m->cct),
tick_event(NULL),
monclient(mc), messenger(m), whoami(m->get_myname().num()),
initialized(false), mounted(false), unmounting(false),
@@ -443,6 +445,12 @@ void Client::shutdown()
async_dentry_invalidator.stop();
}
+ if (switch_interrupt_cb) {
+ ldout(cct, 10) << "shutdown stopping interrupt finisher" << dendl;
+ interrupt_finisher.wait_for_empty();
+ interrupt_finisher.stop();
+ }
+
objectcacher->stop(); // outside of client_lock! this does a join.
client_lock.Lock();
@@ -1437,15 +1445,15 @@ int Client::make_request(MetaRequest *request,
void Client::put_request(MetaRequest *request)
{
- if (request->get_num_ref() == 1) {
+ if (request->_put()) {
if (request->inode())
put_inode(request->take_inode());
if (request->old_inode())
put_inode(request->take_old_inode());
if (request->other_inode())
put_inode(request->take_other_inode());
+ delete request;
}
- request->_put();
}
int Client::encode_inode_release(Inode *in, MetaRequest *req,
@@ -2034,6 +2042,9 @@ void Client::send_reconnect(MetaSession *session)
in->make_long_path(path);
ldout(cct, 10) << " path " << path << dendl;
+ bufferlist flockbl;
+ _encode_filelocks(in, flockbl);
+
in->caps[mds]->seq = 0; // reset seq.
in->caps[mds]->issue_seq = 0; // reset seq.
in->caps[mds]->mseq = 0; // reset seq.
@@ -2042,7 +2053,8 @@ void Client::send_reconnect(MetaSession *session)
path.get_ino(), path.get_path(), // ino
in->caps_wanted(), // wanted
in->caps[mds]->issued, // issued
- in->snaprealm->ino);
+ in->snaprealm->ino,
+ flockbl);
if (did_snaprealm.count(in->snaprealm->ino) == 0) {
ldout(cct, 10) << " snaprealm " << *in->snaprealm << dendl;
@@ -2182,6 +2194,8 @@ void Client::put_inode(Inode *in, int n)
in->snaprealm_item.remove_myself();
if (in == root)
root = 0;
+ delete in->fcntl_locks;
+ delete in->flock_locks;
delete in;
}
}
@@ -3659,9 +3673,10 @@ void Client::handle_cap_flush_ack(MetaSession *session, Inode *in, Cap *cap, MCl
int mds = session->mds_num;
int dirty = m->get_dirty();
int cleaned = 0;
+ uint16_t flush_ack_tid = static_cast<uint16_t>(m->get_client_tid());
for (int i = 0; i < CEPH_CAP_BITS; ++i) {
if ((dirty & (1 << i)) &&
- (m->get_client_tid() == in->flushing_cap_tid[i]))
+ (flush_ack_tid == in->flushing_cap_tid[i]))
cleaned |= 1 << i;
}
@@ -5869,6 +5884,8 @@ int Client::_release_fh(Fh *f)
in->snap_cap_refs--;
}
+ _release_filelocks(f);
+
put_inode(in);
delete f;
@@ -6857,6 +6874,290 @@ int Client::statfs(const char *path, struct statvfs *stbuf)
return rval;
}
+int Client::_do_filelock(Inode *in, Fh *fh, int lock_type, int op, int sleep,
+ struct flock *fl, uint64_t owner, void *fuse_req)
+{
+ ldout(cct, 10) << "_do_filelock ino " << in->ino
+ << (lock_type == CEPH_LOCK_FCNTL ? " fcntl" : " flock")
+ << " type " << fl->l_type << " owner " << owner
+ << " " << fl->l_start << "~" << fl->l_len << dendl;
+
+ int lock_cmd;
+ if (F_RDLCK == fl->l_type)
+ lock_cmd = CEPH_LOCK_SHARED;
+ else if (F_WRLCK == fl->l_type)
+ lock_cmd = CEPH_LOCK_EXCL;
+ else if (F_UNLCK == fl->l_type)
+ lock_cmd = CEPH_LOCK_UNLOCK;
+ else
+ return -EIO;
+
+ if (op != CEPH_MDS_OP_SETFILELOCK || lock_cmd == CEPH_LOCK_UNLOCK)
+ sleep = 0;
+
+ /*
+ * Set the most significant bit, so that MDS knows the 'owner'
+ * is sufficient to identify the owner of lock. (old code uses
+ * both 'owner' and 'pid')
+ */
+ owner |= (1ULL << 63);
+
+ MetaRequest *req = new MetaRequest(op);
+ filepath path;
+ in->make_nosnap_relative_path(path);
+ req->set_filepath(path);
+ req->set_inode(in);
+
+ req->head.args.filelock_change.rule = lock_type;
+ req->head.args.filelock_change.type = lock_cmd;
+ req->head.args.filelock_change.owner = owner;
+ req->head.args.filelock_change.pid = fl->l_pid;
+ req->head.args.filelock_change.start = fl->l_start;
+ req->head.args.filelock_change.length = fl->l_len;
+ req->head.args.filelock_change.wait = sleep;
+
+ int ret;
+ bufferlist bl;
+
+ if (sleep && switch_interrupt_cb && fuse_req) {
+ // enable interrupt
+ switch_interrupt_cb(fuse_req, req->get());
+
+ ret = make_request(req, -1, -1, NULL, NULL, -1, &bl);
+
+ // disable interrupt
+ switch_interrupt_cb(fuse_req, NULL);
+ put_request(req);
+ } else {
+ ret = make_request(req, -1, -1, NULL, NULL, -1, &bl);
+ }
+
+ if (ret == 0) {
+ if (op == CEPH_MDS_OP_GETFILELOCK) {
+ ceph_filelock filelock;
+ bufferlist::iterator p = bl.begin();
+ ::decode(filelock, p);
+
+ if (CEPH_LOCK_SHARED == filelock.type)
+ fl->l_type = F_RDLCK;
+ else if (CEPH_LOCK_EXCL == filelock.type)
+ fl->l_type = F_WRLCK;
+ else
+ fl->l_type = F_UNLCK;
+
+ fl->l_whence = SEEK_SET;
+ fl->l_start = filelock.start;
+ fl->l_len = filelock.length;
+ fl->l_pid = filelock.pid;
+ } else if (op == CEPH_MDS_OP_SETFILELOCK) {
+ ceph_lock_state_t *lock_state;
+ if (lock_type == CEPH_LOCK_FCNTL) {
+ if (!in->fcntl_locks)
+ in->fcntl_locks = new ceph_lock_state_t(cct);
+ lock_state = in->fcntl_locks;
+ } else if (lock_type == CEPH_LOCK_FLOCK) {
+ if (!in->flock_locks)
+ in->flock_locks = new ceph_lock_state_t(cct);
+ lock_state = in->flock_locks;
+ } else
+ assert(0);
+ _update_lock_state(fl, owner, lock_state);
+
+ if (fh) {
+ if (lock_type == CEPH_LOCK_FCNTL) {
+ if (!fh->fcntl_locks)
+ fh->fcntl_locks = new ceph_lock_state_t(cct);
+ lock_state = fh->fcntl_locks;
+ } else {
+ if (!fh->flock_locks)
+ fh->flock_locks = new ceph_lock_state_t(cct);
+ lock_state = fh->flock_locks;
+ }
+ _update_lock_state(fl, owner, lock_state);
+ }
+ } else
+ assert(0);
+ }
+ return ret;
+}
+
+int Client::_interrupt_filelock(MetaRequest *req)
+{
+ Inode *in = req->inode();
+
+ int lock_type;
+ if (req->head.args.filelock_change.rule == CEPH_LOCK_FLOCK)
+ lock_type = CEPH_LOCK_FLOCK_INTR;
+ else if (req->head.args.filelock_change.rule == CEPH_LOCK_FCNTL)
+ lock_type = CEPH_LOCK_FCNTL_INTR;
+ else
+ assert(0);
+
+ MetaRequest *intr_req = new MetaRequest(CEPH_MDS_OP_SETFILELOCK);
+ filepath path;
+ in->make_nosnap_relative_path(path);
+ intr_req->set_filepath(path);
+ intr_req->set_inode(in);
+ intr_req->head.args.filelock_change = req->head.args.filelock_change;
+ intr_req->head.args.filelock_change.rule = lock_type;
+ intr_req->head.args.filelock_change.type = CEPH_LOCK_UNLOCK;
+
+ return make_request(intr_req, -1, -1, NULL, NULL, -1);
+}
+
+void Client::_encode_filelocks(Inode *in, bufferlist& bl)
+{
+ if (!in->fcntl_locks && !in->flock_locks)
+ return;
+
+ unsigned nr_fcntl_locks = in->fcntl_locks ? in->fcntl_locks->held_locks.size() : 0;
+ ::encode(nr_fcntl_locks, bl);
+ if (nr_fcntl_locks) {
+ ceph_lock_state_t* lock_state = in->fcntl_locks;
+ for(multimap<uint64_t, ceph_filelock>::iterator p = lock_state->held_locks.begin();
+ p != lock_state->held_locks.end();
+ ++p)
+ ::encode(p->second, bl);
+ }
+
+ unsigned nr_flock_locks = in->flock_locks ? in->flock_locks->held_locks.size() : 0;
+ ::encode(nr_flock_locks, bl);
+ if (nr_flock_locks) {
+ ceph_lock_state_t* lock_state = in->flock_locks;
+ for(multimap<uint64_t, ceph_filelock>::iterator p = lock_state->held_locks.begin();
+ p != lock_state->held_locks.end();
+ ++p)
+ ::encode(p->second, bl);
+ }
+
+ ldout(cct, 10) << "_encode_filelocks ino " << in->ino << ", " << nr_fcntl_locks
+ << " fcntl locks, " << nr_flock_locks << " flock locks" << dendl;
+}
+
+void Client::_release_filelocks(Fh *fh)
+{
+ if (!fh->fcntl_locks && !fh->flock_locks)
+ return;
+
+ Inode *in = fh->inode;
+ ldout(cct, 10) << "_release_filelocks " << fh << " ino " << in->ino << dendl;
+
+ list<pair<int, ceph_filelock> > to_release;
+
+ if (fh->fcntl_locks) {
+ ceph_lock_state_t* lock_state = fh->fcntl_locks;
+ for(multimap<uint64_t, ceph_filelock>::iterator p = lock_state->held_locks.begin();
+ p != lock_state->held_locks.end();
+ ++p)
+ to_release.push_back(pair<int, ceph_filelock>(CEPH_LOCK_FCNTL, p->second));
+ delete fh->fcntl_locks;
+ }
+ if (fh->flock_locks) {
+ ceph_lock_state_t* lock_state = fh->flock_locks;
+ for(multimap<uint64_t, ceph_filelock>::iterator p = lock_state->held_locks.begin();
+ p != lock_state->held_locks.end();
+ ++p)
+ to_release.push_back(pair<int, ceph_filelock>(CEPH_LOCK_FLOCK, p->second));
+ delete fh->flock_locks;
+ }
+
+ if (to_release.empty())
+ return;
+
+ struct flock fl;
+ memset(&fl, 0, sizeof(fl));
+ fl.l_whence = SEEK_SET;
+ fl.l_type = F_UNLCK;
+
+ for (list<pair<int, ceph_filelock> >::iterator p = to_release.begin();
+ p != to_release.end();
+ ++p) {
+ fl.l_start = p->second.start;
+ fl.l_len = p->second.length;
+ fl.l_pid = p->second.pid;
+ _do_filelock(in, NULL, p->first, CEPH_MDS_OP_SETFILELOCK, 0, &fl, p->second.owner);
+ }
+}
+
+void Client::_update_lock_state(struct flock *fl, uint64_t owner,
+ ceph_lock_state_t *lock_state)
+{
+ int lock_cmd;
+ if (F_RDLCK == fl->l_type)
+ lock_cmd = CEPH_LOCK_SHARED;
+ else if (F_WRLCK == fl->l_type)
+ lock_cmd = CEPH_LOCK_EXCL;
+ else
+ lock_cmd = CEPH_LOCK_UNLOCK;;
+
+ ceph_filelock filelock;
+ filelock.start = fl->l_start;
+ filelock.length = fl->l_len;
+ filelock.client = 0;
+ // see comment in _do_filelock()
+ filelock.owner = owner | (1ULL << 63);
+ filelock.pid = fl->l_pid;
+ filelock.type = lock_cmd;
+
+ if (filelock.type == CEPH_LOCK_UNLOCK) {
+ list<ceph_filelock> activated_locks;
+ lock_state->remove_lock(filelock, activated_locks);
+ } else {
+ bool r = lock_state->add_lock(filelock, false, false);
+ assert(r);
+ }
+}
+
+int Client::_getlk(Fh *fh, struct flock *fl, uint64_t owner)
+{
+ Inode *in = fh->inode;
+ ldout(cct, 10) << "_getlk " << fh << " ino " << in->ino << dendl;
+ int ret = _do_filelock(in, fh, CEPH_LOCK_FCNTL, CEPH_MDS_OP_GETFILELOCK, 0, fl, owner);
+ return ret;
+}
+
+int Client::_setlk(Fh *fh, struct flock *fl, uint64_t owner, int sleep, void *fuse_req)
+{
+ Inode *in = fh->inode;
+ ldout(cct, 10) << "_setlk " << fh << " ino " << in->ino << dendl;
+ int ret = _do_filelock(in, fh, CEPH_LOCK_FCNTL, CEPH_MDS_OP_SETFILELOCK, sleep, fl, owner, fuse_req);
+ ldout(cct, 10) << "_setlk " << fh << " ino " << in->ino << " result=" << ret << dendl;
+ return ret;
+}
+
+int Client::_flock(Fh *fh, int cmd, uint64_t owner, void *fuse_req)
+{
+ Inode *in = fh->inode;
+ ldout(cct, 10) << "_flock " << fh << " ino " << in->ino << dendl;
+
+ int sleep = !(cmd & LOCK_NB);
+ cmd &= ~LOCK_NB;
+
+ int type;
+ switch (cmd) {
+ case LOCK_SH:
+ type = F_RDLCK;
+ break;
+ case LOCK_EX:
+ type = F_WRLCK;
+ break;
+ case LOCK_UN:
+ type = F_UNLCK;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ struct flock fl;
+ memset(&fl, 0, sizeof(fl));
+ fl.l_type = type;
+ fl.l_whence = SEEK_SET;
+
+ int ret = _do_filelock(in, fh, CEPH_LOCK_FLOCK, CEPH_MDS_OP_SETFILELOCK, sleep, &fl, owner, fuse_req);
+ ldout(cct, 10) << "_flock " << fh << " ino " << in->ino << " result=" << ret << dendl;
+ return ret;
+}
+
int Client::ll_statfs(Inode *in, struct statvfs *stbuf)
{
/* Since the only thing this does is wrap a call to statfs, and
@@ -6887,6 +7188,16 @@ void Client::ll_register_dentry_invalidate_cb(client_dentry_callback_t cb, void
async_dentry_invalidator.start();
}
+void Client::ll_register_switch_interrupt_cb(client_switch_interrupt_callback_t cb)
+{
+ Mutex::Locker l(client_lock);
+ ldout(cct, 10) << "ll_register_switch_interrupt_cb cb " << (void*)cb << dendl;
+ if (cb == NULL)
+ return;
+ switch_interrupt_cb = cb;
+ interrupt_finisher.start();
+}
+
void Client::ll_register_getgroups_cb(client_getgroups_callback_t cb, void *handle)
{
Mutex::Locker l(client_lock);
@@ -8739,6 +9050,59 @@ int Client::ll_release(Fh *fh)
return 0;
}
+int Client::ll_getlk(Fh *fh, struct flock *fl, uint64_t owner)
+{
+ Mutex::Locker lock(client_lock);
+
+ ldout(cct, 3) << "ll_getlk (fh)" << fh << " " << fh->inode->ino << dendl;
+ tout(cct) << "ll_getk (fh)" << (unsigned long)fh << std::endl;
+
+ return _getlk(fh, fl, owner);
+}
+
+int Client::ll_setlk(Fh *fh, struct flock *fl, uint64_t owner, int sleep, void *fuse_req)
+{
+ Mutex::Locker lock(client_lock);
+
+ ldout(cct, 3) << "ll_setlk (fh) " << fh << " " << fh->inode->ino << dendl;
+ tout(cct) << "ll_setk (fh)" << (unsigned long)fh << std::endl;
+
+ return _setlk(fh, fl, owner, sleep, fuse_req);
+}
+
+int Client::ll_flock(Fh *fh, int cmd, uint64_t owner, void *fuse_req)
+{
+ Mutex::Locker lock(client_lock);
+
+ ldout(cct, 3) << "ll_flock (fh) " << fh << " " << fh->inode->ino << dendl;
+ tout(cct) << "ll_flock (fh)" << (unsigned long)fh << std::endl;
+
+ return _flock(fh, cmd, owner, fuse_req);
+}
+
+class C_Client_RequestInterrupt : public Context {
+private:
+ Client *client;
+ MetaRequest *req;
+public:
+ C_Client_RequestInterrupt(Client *c, MetaRequest *r) : client(c), req(r) {
+ req->get();
+ }
+ void finish(int r) {
+ Mutex::Locker l(client->client_lock);
+ assert(req->head.op == CEPH_MDS_OP_SETFILELOCK);
+ client->_interrupt_filelock(req);
+ client->put_request(req);
+ }
+};
+
+void Client::ll_interrupt(void *d)
+{
+ MetaRequest *req = static_cast<MetaRequest*>(d);
+ ldout(cct, 3) << "ll_interrupt tid " << req->get_tid() << dendl;
+ tout(cct) << "ll_interrupt tid " << req->get_tid() << std::endl;
+ interrupt_finisher.queue(new C_Client_RequestInterrupt(this, req));
+}
// =========================================
// layout
diff --git a/src/client/Client.h b/src/client/Client.h
index e31e90a..8e1741d 100644
--- a/src/client/Client.h
+++ b/src/client/Client.h
@@ -119,6 +119,7 @@ struct CapSnap;
struct MetaSession;
struct MetaRequest;
+class ceph_lock_state_t;
typedef void (*client_ino_callback_t)(void *handle, vinodeno_t ino, int64_t off, int64_t len);
@@ -127,6 +128,7 @@ typedef void (*client_dentry_callback_t)(void *handle, vinodeno_t dirino,
vinodeno_t ino, string& name);
typedef int (*client_getgroups_callback_t)(void *handle, uid_t uid, gid_t **sgids);
+typedef void(*client_switch_interrupt_callback_t)(void *req, void *data);
// ========================================================
// client interface
@@ -214,6 +216,8 @@ class Client : public Dispatcher {
SafeTimer timer;
+ client_switch_interrupt_callback_t switch_interrupt_cb;
+
client_ino_callback_t ino_invalidate_cb;
void *ino_invalidate_cb_handle;
@@ -225,6 +229,7 @@ class Client : public Dispatcher {
Finisher async_ino_invalidator;
Finisher async_dentry_invalidator;
+ Finisher interrupt_finisher;
Context *tick_event;
utime_t last_cap_renew;
@@ -374,6 +379,7 @@ protected:
friend class C_Client_CacheInvalidate; // calls ino_invalidate_cb
friend class C_Client_DentryInvalidate; // calls dentry_invalidate_cb
friend class C_Block_Sync; // Calls block map and protected helpers
+ friend class C_Client_RequestInterrupt;
//int get_cache_size() { return lru.lru_get_size(); }
//void set_cache_size(int m) { lru.lru_set_max(m); }
@@ -604,6 +610,9 @@ private:
int _fsync(Fh *fh, bool syncdataonly);
int _sync_fs();
int _fallocate(Fh *fh, int mode, int64_t offset, int64_t length);
+ int _getlk(Fh *fh, struct flock *fl, uint64_t owner);
+ int _setlk(Fh *fh, struct flock *fl, uint64_t owner, int sleep, void *fuse_req=NULL);
+ int _flock(Fh *fh, int cmd, uint64_t owner, void *fuse_req=NULL);
int get_or_create(Inode *dir, const char* name,
Dentry **pdn, bool expect_null=false);
@@ -613,6 +622,12 @@ private:
vinodeno_t _get_vino(Inode *in);
inodeno_t _get_inodeno(Inode *in);
+ int _do_filelock(Inode *in, Fh *fh, int lock_type, int op, int sleep,
+ struct flock *fl, uint64_t owner, void *fuse_req=NULL);
+ int _interrupt_filelock(MetaRequest *req);
+ void _encode_filelocks(Inode *in, bufferlist& bl);
+ void _release_filelocks(Fh *fh);
+ void _update_lock_state(struct flock *fl, uint64_t owner, ceph_lock_state_t *lock_state);
public:
int mount(const std::string &mount_root);
void unmount();
@@ -818,6 +833,10 @@ public:
int ll_fsync(Fh *fh, bool syncdataonly);
int ll_fallocate(Fh *fh, int mode, loff_t offset, loff_t length);
int ll_release(Fh *fh);
+ int ll_getlk(Fh *fh, struct flock *fl, uint64_t owner);
+ int ll_setlk(Fh *fh, struct flock *fl, uint64_t owner, int sleep, void *fuse_req);
+ int ll_flock(Fh *fh, int cmd, uint64_t owner, void *fuse_req);
+ void ll_interrupt(void *d);
int ll_get_stripe_osd(struct Inode *in, uint64_t blockno,
ceph_file_layout* layout);
uint64_t ll_get_internal_offset(struct Inode *in, uint64_t blockno);
@@ -825,11 +844,11 @@ public:
int ll_num_osds(void);
int ll_osdaddr(int osd, uint32_t *addr);
int ll_osdaddr(int osd, char* buf, size_t size);
- void ll_register_ino_invalidate_cb(client_ino_callback_t cb, void *handle);
+ void ll_register_ino_invalidate_cb(client_ino_callback_t cb, void *handle);
void ll_register_dentry_invalidate_cb(client_dentry_callback_t cb, void *handle);
-
void ll_register_getgroups_cb(client_getgroups_callback_t cb, void *handle);
+ void ll_register_switch_interrupt_cb(client_switch_interrupt_callback_t cb);
};
#endif
diff --git a/src/client/Fh.h b/src/client/Fh.h
index 083ccd1..237a6d8 100644
--- a/src/client/Fh.h
+++ b/src/client/Fh.h
@@ -5,6 +5,7 @@
class Inode;
class Cond;
+class ceph_lock_state_t;
// file handle for any open file state
@@ -23,8 +24,13 @@ struct Fh {
loff_t consec_read_bytes;
int nr_consec_read;
+ // file lock
+ ceph_lock_state_t *fcntl_locks;
+ ceph_lock_state_t *flock_locks;
+
Fh() : inode(0), pos(0), mds(0), mode(0), flags(0), pos_locked(false),
- last_pos(0), consec_read_bytes(0), nr_consec_read(0) {}
+ last_pos(0), consec_read_bytes(0), nr_consec_read(0),
+ fcntl_locks(NULL), flock_locks(NULL) {}
};
diff --git a/src/client/Inode.h b/src/client/Inode.h
index 221a91a..91ba2fc 100644
--- a/src/client/Inode.h
+++ b/src/client/Inode.h
@@ -17,7 +17,8 @@ struct MetaSession;
class Dentry;
class Dir;
struct SnapRealm;
-class Inode;
+struct Inode;
+class ceph_lock_state_t;
struct Cap {
MetaSession *session;
@@ -210,6 +211,10 @@ class Inode {
ll_ref -= n;
}
+ // file locks
+ ceph_lock_state_t *fcntl_locks;
+ ceph_lock_state_t *flock_locks;
+
Inode(CephContext *cct_, vinodeno_t vino, ceph_file_layout *newlayout)
: cct(cct_), ino(vino.ino), snapid(vino.snapid),
rdev(0), mode(0), uid(0), gid(0), nlink(0),
@@ -224,8 +229,8 @@ class Inode {
snaprealm(0), snaprealm_item(this), snapdir_parent(0),
oset((void *)this, newlayout->fl_pg_pool, ino),
reported_size(0), wanted_max_size(0), requested_max_size(0),
- _ref(0), ll_ref(0),
- dir(0), dn_set()
+ _ref(0), ll_ref(0), dir(0), dn_set(),
+ fcntl_locks(NULL), flock_locks(NULL)
{
memset(&dir_layout, 0, sizeof(dir_layout));
memset(&layout, 0, sizeof(layout));
diff --git a/src/client/MetaRequest.h b/src/client/MetaRequest.h
index 45a90dc..6f82b5c 100644
--- a/src/client/MetaRequest.h
+++ b/src/client/MetaRequest.h
@@ -9,6 +9,7 @@
#include "msg/msg_types.h"
#include "include/xlist.h"
#include "include/filepath.h"
+#include "include/atomic.h"
#include "mds/mdstypes.h"
#include "common/Mutex.h"
@@ -47,7 +48,7 @@ public:
__u32 sent_on_mseq; // mseq at last submission of this request
int num_fwd; // # of times i've been forwarded
int retry_attempt;
- int ref;
+ atomic_t ref;
MClientReply *reply; // the reply
bool kick;
@@ -126,17 +127,14 @@ public:
Dentry *old_dentry();
MetaRequest* get() {
- ++ref;
+ ref.inc();
return this;
}
/// psuedo-private put method; use Client::put_request()
- void _put() {
- if (--ref == 0)
- delete this;
- }
- int get_num_ref() {
- return ref;
+ bool _put() {
+ int v = ref.dec();
+ return v == 0;
}
// normal fields
diff --git a/src/client/fuse_ll.cc b/src/client/fuse_ll.cc
index 7f419c3..4733912 100644
--- a/src/client/fuse_ll.cc
+++ b/src/client/fuse_ll.cc
@@ -636,6 +636,69 @@ static void fuse_ll_statfs(fuse_req_t req, fuse_ino_t ino)
cfuse->iput(in); // iput required
}
+static void fuse_ll_getlk(fuse_req_t req, fuse_ino_t ino,
+ struct fuse_file_info *fi, struct flock *lock)
+{
+ CephFuse::Handle *cfuse = (CephFuse::Handle *)fuse_req_userdata(req);
+ Fh *fh = (Fh*)fi->fh;
+
+ int r = cfuse->client->ll_getlk(fh, lock, fi->lock_owner);
+ if (r == 0)
+ fuse_reply_lock(req, lock);
+ else
+ fuse_reply_err(req, -r);
+}
+
+static void fuse_ll_setlk(fuse_req_t req, fuse_ino_t ino,
+ struct fuse_file_info *fi, struct flock *lock, int sleep)
+{
+ CephFuse::Handle *cfuse = (CephFuse::Handle *)fuse_req_userdata(req);
+ Fh *fh = (Fh*)fi->fh;
+
+ // must use multithread if operation may block
+ if (!cfuse->client->cct->_conf->fuse_multithreaded &&
+ sleep && lock->l_type != F_UNLCK) {
+ fuse_reply_err(req, EDEADLK);
+ return;
+ }
+
+ int r = cfuse->client->ll_setlk(fh, lock, fi->lock_owner, sleep, req);
+ fuse_reply_err(req, -r);
+}
+
+static void fuse_ll_interrupt(fuse_req_t req, void* data)
+{
+ CephFuse::Handle *cfuse = (CephFuse::Handle *)fuse_req_userdata(req);
+ cfuse->client->ll_interrupt(data);
+}
+
+static void switch_interrupt_cb(void *req, void* data)
+{
+ if (data)
+ fuse_req_interrupt_func((fuse_req_t)req, fuse_ll_interrupt, data);
+ else
+ fuse_req_interrupt_func((fuse_req_t)req, NULL, NULL);
+}
+
+#if FUSE_VERSION >= FUSE_MAKE_VERSION(2, 9)
+static void fuse_ll_flock(fuse_req_t req, fuse_ino_t ino,
+ struct fuse_file_info *fi, int cmd)
+{
+ CephFuse::Handle *cfuse = (CephFuse::Handle *)fuse_req_userdata(req);
+ Fh *fh = (Fh*)fi->fh;
+
+ // must use multithread if operation may block
+ if (!cfuse->client->cct->_conf->fuse_multithreaded &&
+ !(cmd & (LOCK_NB | LOCK_UN))) {
+ fuse_reply_err(req, EDEADLK);
+ return;
+ }
+
+ int r = cfuse->client->ll_flock(fh, cmd, fi->lock_owner, req);
+ fuse_reply_err(req, -r);
+}
+#endif
+
#if 0
static int getgroups_cb(void *handle, uid_t uid, gid_t **sgids)
{
@@ -742,8 +805,8 @@ const static struct fuse_lowlevel_ops fuse_ll_oper = {
removexattr: fuse_ll_removexattr,
access: fuse_ll_access,
create: fuse_ll_create,
- getlk: 0,
- setlk: 0,
+ getlk: fuse_ll_getlk,
+ setlk: fuse_ll_setlk,
bmap: 0,
#if FUSE_VERSION >= FUSE_MAKE_VERSION(2, 8)
#ifdef FUSE_IOCTL_COMPAT
@@ -752,13 +815,15 @@ const static struct fuse_lowlevel_ops fuse_ll_oper = {
ioctl: 0,
#endif
poll: 0,
-#if FUSE_VERSION > FUSE_MAKE_VERSION(2, 9)
+#endif
+#if FUSE_VERSION >= FUSE_MAKE_VERSION(2, 9)
write_buf: 0,
retrieve_reply: 0,
forget_multi: 0,
- flock: 0,
- fallocate: fuse_ll_fallocate
+ flock: fuse_ll_flock,
#endif
+#if FUSE_VERSION > FUSE_MAKE_VERSION(2, 9)
+ fallocate: fuse_ll_fallocate
#endif
};
@@ -859,6 +924,8 @@ int CephFuse::Handle::init(int argc, const char *argv[])
fuse_session_add_chan(se, ch);
+ client->ll_register_switch_interrupt_cb(switch_interrupt_cb);
+
/*
* this is broken:
*
diff --git a/src/common/Makefile.am b/src/common/Makefile.am
index 69e5ad3..29a3135 100644
--- a/src/common/Makefile.am
+++ b/src/common/Makefile.am
@@ -83,7 +83,8 @@ libcommon_la_SOURCES += \
osd/HitSet.cc \
mds/MDSMap.cc \
mds/inode_backtrace.cc \
- mds/mdstypes.cc
+ mds/mdstypes.cc \
+ mds/flock.cc
# inject crc in common
libcommon_crc_la_SOURCES = \
diff --git a/src/common/Thread.cc b/src/common/Thread.cc
index 7be0013..a962e06 100644
--- a/src/common/Thread.cc
+++ b/src/common/Thread.cc
@@ -51,7 +51,8 @@ void *Thread::entry_wrapper()
int p = ceph_gettid(); // may return -ENOSYS on other platforms
if (p > 0)
pid = p;
- if (ioprio_class >= 0 &&
+ if (pid &&
+ ioprio_class >= 0 &&
ioprio_priority >= 0) {
ceph_ioprio_set(IOPRIO_WHO_PROCESS,
pid,
diff --git a/src/common/Thread.h b/src/common/Thread.h
index 95f63b4..8173ca5 100644
--- a/src/common/Thread.h
+++ b/src/common/Thread.h
@@ -41,6 +41,7 @@ class Thread {
public:
const pthread_t &get_thread_id();
+ pid_t get_pid() const { return pid; }
bool is_started();
bool am_self();
int kill(int signal);
diff --git a/src/common/TrackedOp.cc b/src/common/TrackedOp.cc
index ddb2f91..5a76f64 100644
--- a/src/common/TrackedOp.cc
+++ b/src/common/TrackedOp.cc
@@ -121,10 +121,10 @@ void OpTracker::unregister_inflight_op(TrackedOp *i)
// caller checks;
assert(tracking_enabled);
+ Mutex::Locker locker(ops_in_flight_lock);
i->request->clear_data();
i->request->clear_payload();
- Mutex::Locker locker(ops_in_flight_lock);
assert(i->xitem.get_list() == &ops_in_flight);
utime_t now = ceph_clock_now(cct);
i->xitem.remove_myself();
diff --git a/src/common/WorkQueue.cc b/src/common/WorkQueue.cc
index 42f402f..0f8bc9d 100644
--- a/src/common/WorkQueue.cc
+++ b/src/common/WorkQueue.cc
@@ -271,6 +271,10 @@ void ThreadPool::set_ioprio(int cls, int priority)
for (set<WorkThread*>::iterator p = _threads.begin();
p != _threads.end();
++p) {
+ ldout(cct,10) << __func__
+ << " class " << cls << " priority " << priority
+ << " pid " << (*p)->get_pid()
+ << dendl;
int r = (*p)->set_ioprio(cls, priority);
if (r < 0)
lderr(cct) << " set_ioprio got " << cpp_strerror(r) << dendl;
diff --git a/src/common/ceph_crypto.h b/src/common/ceph_crypto.h
index c553594..686efb4 100644
--- a/src/common/ceph_crypto.h
+++ b/src/common/ceph_crypto.h
@@ -78,9 +78,11 @@ namespace ceph {
assert(s == SECSuccess);
}
void Update (const byte *input, size_t length) {
- SECStatus s;
- s = PK11_DigestOp(ctx, input, length);
- assert(s == SECSuccess);
+ if (length) {
+ SECStatus s;
+ s = PK11_DigestOp(ctx, input, length);
+ assert(s == SECSuccess);
+ }
}
void Final (byte *digest) {
SECStatus s;
diff --git a/src/common/config.cc b/src/common/config.cc
index 23bfe35..fc47083 100644
--- a/src/common/config.cc
+++ b/src/common/config.cc
@@ -947,7 +947,7 @@ int md_config_t::set_val_raw(const char *val, const config_option *opt)
}
static const char *CONF_METAVARIABLES[] =
- { "cluster", "type", "name", "host", "num", "id", "pid" };
+ { "cluster", "type", "name", "host", "num", "id", "pid", "cctid" };
static const int NUM_CONF_METAVARIABLES =
(sizeof(CONF_METAVARIABLES) / sizeof(CONF_METAVARIABLES[0]));
@@ -1059,6 +1059,8 @@ bool md_config_t::expand_meta(std::string &origval,
out += name.get_id().c_str();
else if (var == "pid")
out += stringify(getpid());
+ else if (var == "cctid")
+ out += stringify((unsigned long long)this);
else
assert(0); // unreachable
expanded = true;
diff --git a/src/common/config_opts.h b/src/common/config_opts.h
index fe00c76..0307441 100644
--- a/src/common/config_opts.h
+++ b/src/common/config_opts.h
@@ -92,6 +92,7 @@ SUBSYS(finisher, 1, 1)
SUBSYS(heartbeatmap, 1, 5)
SUBSYS(perfcounter, 1, 5)
SUBSYS(rgw, 1, 5) // log level for the Rados gateway
+SUBSYS(civetweb, 1, 10)
SUBSYS(javaclient, 1, 5)
SUBSYS(asok, 1, 5)
SUBSYS(throttle, 1, 1)
@@ -283,7 +284,7 @@ OPTION(fuse_default_permissions, OPT_BOOL, true)
OPTION(fuse_big_writes, OPT_BOOL, true)
OPTION(fuse_atomic_o_trunc, OPT_BOOL, true)
OPTION(fuse_debug, OPT_BOOL, false)
-OPTION(fuse_multithreaded, OPT_BOOL, false)
+OPTION(fuse_multithreaded, OPT_BOOL, true)
OPTION(crush_location, OPT_STR, "") // whitespace-separated list of key=value pairs describing crush location
@@ -489,6 +490,9 @@ OPTION(osd_heartbeat_interval, OPT_INT, 6) // (seconds) how often we ping
OPTION(osd_heartbeat_grace, OPT_INT, 20) // (seconds) how long before we decide a peer has failed
OPTION(osd_heartbeat_min_peers, OPT_INT, 10) // minimum number of peers
+// max number of parallel snap trims/pg
+OPTION(osd_pg_max_concurrent_snap_trims, OPT_U64, 2)
+
// minimum number of peers tha tmust be reachable to mark ourselves
// back up after being wrongly marked down.
OPTION(osd_heartbeat_min_healthy_ratio, OPT_FLOAT, .33)
@@ -766,6 +770,7 @@ OPTION(rgw_cache_lru_size, OPT_INT, 10000) // num of entries in rgw cache
OPTION(rgw_socket_path, OPT_STR, "") // path to unix domain socket, if not specified, rgw will not run as external fcgi
OPTION(rgw_host, OPT_STR, "") // host for radosgw, can be an IP, default is 0.0.0.0
OPTION(rgw_port, OPT_STR, "") // port to listen, format as "8080" "5000", if not specified, rgw will not run external fcgi
+OPTION(rgw_fcgi_explicit_free, OPT_BOOL, true) // whether to call FCGX_Free explicitly on every complete request
OPTION(rgw_dns_name, OPT_STR, "")
OPTION(rgw_script_uri, OPT_STR, "") // alternative value for SCRIPT_URI if not set in request
OPTION(rgw_request_uri, OPT_STR, "") // alternative value for REQUEST_URI if not set in request
diff --git a/src/common/crc32c_intel_fast_asm.S b/src/common/crc32c_intel_fast_asm.S
index 4ca5d65..2189684 100644
--- a/src/common/crc32c_intel_fast_asm.S
+++ b/src/common/crc32c_intel_fast_asm.S
@@ -662,3 +662,5 @@ global %1_slver
%endmacro
;;; func core, ver, snum
slversion crc32_iscsi_00, 00, 02, 0014
+; inform linker that this doesn't require executable stack
+section .note.GNU-stack noalloc noexec nowrite progbits
diff --git a/src/common/crc32c_intel_fast_zero_asm.S b/src/common/crc32c_intel_fast_zero_asm.S
index b7246f2..34b7f48 100644
--- a/src/common/crc32c_intel_fast_zero_asm.S
+++ b/src/common/crc32c_intel_fast_zero_asm.S
@@ -644,3 +644,5 @@ global %1_slver
%endmacro
;;; func core, ver, snum
slversion crc32_iscsi_zero_00, 00, 02, 0014
+; inform linker that this doesn't require executable stack
+section .note.GNU-stack noalloc noexec nowrite progbits
diff --git a/src/common/hobject.cc b/src/common/hobject.cc
index ecc8cfd..28cb86a 100644
--- a/src/common/hobject.cc
+++ b/src/common/hobject.cc
@@ -238,10 +238,10 @@ void ghobject_t::decode(bufferlist::iterator& bl)
void ghobject_t::dump(Formatter *f) const
{
hobj.dump(f);
- if (generation != NO_GEN) {
+ if (generation != NO_GEN)
f->dump_int("generation", generation);
+ if (shard_id != ghobject_t::NO_SHARD)
f->dump_int("shard_id", shard_id);
- }
}
void ghobject_t::generate_test_instances(list<ghobject_t*>& o)
diff --git a/src/common/io_priority.cc b/src/common/io_priority.cc
index b9eeae8..be4dc2a 100644
--- a/src/common/io_priority.cc
+++ b/src/common/io_priority.cc
@@ -41,8 +41,8 @@ int ceph_ioprio_set(int whence, int who, int ioprio)
int ceph_ioprio_string_to_class(const std::string& s)
{
- std::string l;
- std::transform(s.begin(), s.end(), l.begin(), ::tolower);
+ std::string l = s;
+ std::transform(l.begin(), l.end(), l.begin(), ::tolower);
if (l == "idle")
return IOPRIO_CLASS_IDLE;
diff --git a/src/common/util.cc b/src/common/util.cc
index ab417be..212384b 100644
--- a/src/common/util.cc
+++ b/src/common/util.cc
@@ -18,6 +18,10 @@
#include "common/errno.h"
#include "common/strtol.h"
+#ifdef HAVE_SYS_VFS_H
+#include <sys/vfs.h>
+#endif
+
// test if an entire buf is zero in 8-byte chunks
bool buf_is_zero(const char *buf, size_t len)
{
@@ -104,3 +108,21 @@ int64_t unit_to_bytesize(string val, ostream *pss)
}
return (r * (1LL << modifier));
}
+
+int get_fs_stats(ceph_data_stats_t &stats, const char *path)
+{
+ if (!path)
+ return -EINVAL;
+
+ struct statfs stbuf;
+ int err = ::statfs(path, &stbuf);
+ if (err < 0) {
+ return -errno;
+ }
+
+ stats.byte_total = stbuf.f_blocks * stbuf.f_bsize;
+ stats.byte_used = (stbuf.f_blocks - stbuf.f_bfree) * stbuf.f_bsize;
+ stats.byte_avail = stbuf.f_bavail * stbuf.f_bsize;
+ stats.avail_percent = (((float)stats.byte_avail/stats.byte_total)*100);
+ return 0;
+}
diff --git a/src/crush/CrushCompiler.cc b/src/crush/CrushCompiler.cc
index b52a55a..33ed1db 100644
--- a/src/crush/CrushCompiler.cc
+++ b/src/crush/CrushCompiler.cc
@@ -191,6 +191,8 @@ int CrushCompiler::decompile(ostream &out)
out << "tunable chooseleaf_descend_once " << crush.get_chooseleaf_descend_once() << "\n";
if (crush.get_chooseleaf_vary_r() != 0)
out << "tunable chooseleaf_vary_r " << crush.get_chooseleaf_vary_r() << "\n";
+ if (crush.get_straw_calc_version() != 0)
+ out << "tunable straw_calc_version " << crush.get_straw_calc_version() << "\n";
out << "\n# devices\n";
for (int i=0; i<crush.get_max_devices(); i++) {
@@ -368,6 +370,8 @@ int CrushCompiler::parse_tunable(iter_t const& i)
crush.set_chooseleaf_descend_once(val);
else if (name == "chooseleaf_vary_r")
crush.set_chooseleaf_vary_r(val);
+ else if (name == "straw_calc_version")
+ crush.set_straw_calc_version(val);
else {
err << "tunable " << name << " not recognized" << std::endl;
return -1;
diff --git a/src/crush/CrushTester.cc b/src/crush/CrushTester.cc
index 23f1a7d..3b45e53 100644
--- a/src/crush/CrushTester.cc
+++ b/src/crush/CrushTester.cc
@@ -487,18 +487,18 @@ int CrushTester::test()
vector<int> out;
if (use_crush) {
- if (output_statistics)
- err << "CRUSH"; // prepend CRUSH to placement output
+ if (output_mappings)
+ err << "CRUSH"; // prepend CRUSH to placement output
crush.do_rule(r, x, out, nr, weight);
} else {
- if (output_statistics)
- err << "RNG"; // prepend RNG to placement output to denote simulation
+ if (output_mappings)
+ err << "RNG"; // prepend RNG to placement output to denote simulation
// test our new monte carlo placement generator
random_placement(r, out, nr, weight);
}
- if (output_statistics)
- err << " rule " << r << " x " << x << " " << out << std::endl;
+ if (output_mappings)
+ err << " rule " << r << " x " << x << " " << out << std::endl;
if (output_data_file)
write_integer_indexed_vector_data_string(tester_data.placement_information, x, out);
@@ -539,14 +539,14 @@ int CrushTester::test()
if (output_statistics)
for (unsigned i = 0; i < per.size(); i++) {
- if (output_utilization && num_batches > 1){
+ if (output_utilization) {
if (num_objects_expected[i] > 0 && per[i] > 0) {
err << " device " << i << ":\t"
<< "\t" << " stored " << ": " << per[i]
<< "\t" << " expected " << ": " << num_objects_expected[i]
<< std::endl;
}
- } else if (output_utilization_all && num_batches > 1) {
+ } else if (output_utilization_all) {
err << " device " << i << ":\t"
<< "\t" << " stored " << ": " << per[i]
<< "\t" << " expected " << ": " << num_objects_expected[i]
diff --git a/src/crush/CrushTester.h b/src/crush/CrushTester.h
index df5a157..8de70f9 100644
--- a/src/crush/CrushTester.h
+++ b/src/crush/CrushTester.h
@@ -27,6 +27,7 @@ class CrushTester {
bool output_utilization;
bool output_utilization_all;
bool output_statistics;
+ bool output_mappings;
bool output_bad_mappings;
bool output_choose_tries;
@@ -176,6 +177,7 @@ public:
output_utilization(false),
output_utilization_all(false),
output_statistics(false),
+ output_mappings(false),
output_bad_mappings(false),
output_choose_tries(false),
output_data_file(false),
@@ -226,6 +228,13 @@ public:
return output_statistics;
}
+ void set_output_mappings(bool b) {
+ output_mappings = b;
+ }
+ bool get_output_mappings() const {
+ return output_mappings;
+ }
+
void set_output_bad_mappings(bool b) {
output_bad_mappings = b;
}
diff --git a/src/crush/CrushWrapper.cc b/src/crush/CrushWrapper.cc
index 31da4f5..805a0c6 100644
--- a/src/crush/CrushWrapper.cc
+++ b/src/crush/CrushWrapper.cc
@@ -150,10 +150,10 @@ int CrushWrapper::remove_item(CephContext *cct, int item, bool unlink_only)
for (unsigned i=0; i<b->size; ++i) {
int id = b->items[i];
if (id == item) {
- adjust_item_weight(cct, item, 0);
ldout(cct, 5) << "remove_item removing item " << item
<< " from bucket " << b->id << dendl;
- crush_bucket_remove_item(b, item);
+ crush_bucket_remove_item(crush, b, item);
+ adjust_item_weight(cct, b->id, b->weight);
ret = 0;
}
}
@@ -171,8 +171,8 @@ bool CrushWrapper::_search_item_exists(int item) const
if (!crush->buckets[i])
continue;
crush_bucket *b = crush->buckets[i];
- for (unsigned i=0; i<b->size; ++i) {
- if (b->items[i] == item)
+ for (unsigned j=0; j<b->size; ++j) {
+ if (b->items[j] == item)
return true;
}
}
@@ -197,9 +197,9 @@ int CrushWrapper::_remove_item_under(CephContext *cct, int item, int ancestor, b
for (unsigned i=0; i<b->size; ++i) {
int id = b->items[i];
if (id == item) {
- adjust_item_weight(cct, item, 0);
ldout(cct, 5) << "_remove_item_under removing item " << item << " from bucket " << b->id << dendl;
- crush_bucket_remove_item(b, item);
+ crush_bucket_remove_item(crush, b, item);
+ adjust_item_weight(cct, b->id, b->weight);
ret = 0;
} else if (id < 0) {
int r = remove_item_under(cct, item, id, unlink_only);
@@ -459,6 +459,8 @@ int CrushWrapper::insert_item(CephContext *cct, int item, float weight, string n
int cur = item;
+ // create locations if locations don't exist and add child in location with 0 weight
+ // the more detail in the insert_item method declaration in CrushWrapper.h
for (map<int,string>::iterator p = type_map.begin(); p != type_map.end(); ++p) {
// ignore device type
if (p->first == 0)
@@ -518,17 +520,17 @@ int CrushWrapper::insert_item(CephContext *cct, int item, float weight, string n
ldout(cct, 5) << "insert_item adding " << cur << " weight " << weight
<< " to bucket " << id << dendl;
- int r = crush_bucket_add_item(b, cur, 0);
+ int r = crush_bucket_add_item(crush, b, cur, 0);
assert (!r);
+ break;
+ }
- // now that we've added the (0-weighted) item and any parent buckets, adjust the weight.
- adjust_item_weightf(cct, item, weight);
-
+ // adjust the item's weight in location
+ if(adjust_item_weightf_in_loc(cct, item, weight, loc) > 0) {
if (item >= crush->max_devices) {
crush->max_devices = item + 1;
ldout(cct, 5) << "insert_item max_devices now " << crush->max_devices << dendl;
}
-
return 0;
}
@@ -585,7 +587,7 @@ int CrushWrapper::create_or_move_item(CephContext *cct, int item, float weight,
if (check_item_loc(cct, item, loc, &old_iweight)) {
ldout(cct, 5) << "create_or_move_item " << item << " already at " << loc << dendl;
} else {
- if (item_exists(item)) {
+ if (_search_item_exists(item)) {
weight = get_item_weightf(item);
ldout(cct, 10) << "create_or_move_item " << item << " exists with weight " << weight << dendl;
remove_item(cct, item, true);
@@ -620,7 +622,7 @@ int CrushWrapper::update_item(CephContext *cct, int item, float weight, string n
if (old_iweight != iweight) {
ldout(cct, 5) << "update_item " << item << " adjusting weight "
<< ((float)old_iweight/(float)0x10000) << " -> " << weight << dendl;
- adjust_item_weight(cct, item, iweight);
+ adjust_item_weight_in_loc(cct, item, iweight, loc);
ret = 1;
}
if (get_item_name(item) != name) {
@@ -641,7 +643,7 @@ int CrushWrapper::update_item(CephContext *cct, int item, float weight, string n
return ret;
}
-int CrushWrapper::get_item_weight(int id)
+int CrushWrapper::get_item_weight(int id) const
{
for (int bidx = 0; bidx < crush->max_buckets; bidx++) {
crush_bucket *b = crush->buckets[bidx];
@@ -654,6 +656,24 @@ int CrushWrapper::get_item_weight(int id)
return -ENOENT;
}
+int CrushWrapper::get_item_weight_in_loc(int id, const map<string,string> &loc)
+{
+ for (map<string,string>::const_iterator l = loc.begin(); l != loc.end(); l++) {
+ int bid = get_item_id(l->second);
+ if (!bucket_exists(bid))
+ continue;
+ crush_bucket *b = get_bucket(bid);
+ if ( b == NULL)
+ continue;
+ for (unsigned int i = 0; i < b->size; i++) {
+ if (b->items[i] == id) {
+ return crush_get_bucket_item_weight(b, i);
+ }
+ }
+ }
+ return -ENOENT;
+}
+
int CrushWrapper::adjust_item_weight(CephContext *cct, int id, int weight)
{
ldout(cct, 5) << "adjust_item_weight " << id << " weight " << weight << dendl;
@@ -664,7 +684,7 @@ int CrushWrapper::adjust_item_weight(CephContext *cct, int id, int weight)
continue;
for (unsigned i = 0; i < b->size; i++) {
if (b->items[i] == id) {
- int diff = crush_bucket_adjust_item_weight(b, id, weight);
+ int diff = crush_bucket_adjust_item_weight(crush, b, id, weight);
ldout(cct, 5) << "adjust_item_weight " << id << " diff " << diff << " in bucket " << bidx << dendl;
adjust_item_weight(cct, -1 - bidx, b->weight);
changed++;
@@ -676,7 +696,33 @@ int CrushWrapper::adjust_item_weight(CephContext *cct, int id, int weight)
return changed;
}
-bool CrushWrapper::check_item_present(int id)
+int CrushWrapper::adjust_item_weight_in_loc(CephContext *cct, int id, int weight, const map<string,string>& loc)
+{
+ ldout(cct, 5) << "adjust_item_weight_in_loc " << id << " weight " << weight << " in " << loc << dendl;
+ int changed = 0;
+
+ for (map<string,string>::const_iterator l = loc.begin(); l != loc.end(); l++) {
+ int bid = get_item_id(l->second);
+ if (!bucket_exists(bid))
+ continue;
+ crush_bucket *b = get_bucket(bid);
+ if ( b == NULL)
+ continue;
+ for (unsigned int i = 0; i < b->size; i++) {
+ if (b->items[i] == id) {
+ int diff = crush_bucket_adjust_item_weight(crush, b, id, weight);
+ ldout(cct, 5) << "adjust_item_weight_in_loc " << id << " diff " << diff << " in bucket " << bid << dendl;
+ adjust_item_weight(cct, bid, b->weight);
+ changed++;
+ }
+ }
+ }
+ if (!changed)
+ return -ENOENT;
+ return changed;
+}
+
+bool CrushWrapper::check_item_present(int id) const
{
bool found = false;
@@ -778,20 +824,18 @@ int CrushWrapper::add_simple_ruleset(string name, string root_name,
return -EINVAL;
}
- int ruleset = 0;
- for (int i = 0; i < get_max_rules(); i++) {
- if (rule_exists(i) &&
- get_rule_mask_ruleset(i) >= ruleset) {
- ruleset = get_rule_mask_ruleset(i) + 1;
- }
+ int rno = -1;
+ for (rno = 0; rno < get_max_rules(); rno++) {
+ if (!rule_exists(rno) && !ruleset_exists(rno))
+ break;
}
-
int steps = 3;
if (mode == "indep")
steps = 4;
int min_rep = mode == "firstn" ? 1 : 3;
int max_rep = mode == "firstn" ? 10 : 20;
- crush_rule *rule = crush_make_rule(steps, ruleset, rule_type, min_rep, max_rep);
+ //set the ruleset the same as rule_id(rno)
+ crush_rule *rule = crush_make_rule(steps, rno, rule_type, min_rep, max_rep);
assert(rule);
int step = 0;
if (mode == "indep")
@@ -810,7 +854,12 @@ int CrushWrapper::add_simple_ruleset(string name, string root_name,
CRUSH_CHOOSE_N,
0);
crush_rule_set_step(rule, step++, CRUSH_RULE_EMIT, 0, 0);
- int rno = crush_add_rule(crush, rule, -1);
+
+ int ret = crush_add_rule(crush, rule, rno);
+ if(ret < 0) {
+ *err << "failed to add rule " << rno << " because " << cpp_strerror(ret);
+ return ret;
+ }
set_rule_name(rno, name);
have_rmaps = false;
return rno;
@@ -965,6 +1014,7 @@ void CrushWrapper::encode(bufferlist& bl, bool lean) const
::encode(crush->choose_total_tries, bl);
::encode(crush->chooseleaf_descend_once, bl);
::encode(crush->chooseleaf_vary_r, bl);
+ ::encode(crush->straw_calc_version, bl);
}
static void decode_32_or_64_string_map(map<int32_t,string>& m, bufferlist::iterator& blp)
@@ -1048,6 +1098,9 @@ void CrushWrapper::decode(bufferlist::iterator& blp)
if (!blp.end()) {
::decode(crush->chooseleaf_vary_r, blp);
}
+ if (!blp.end()) {
+ ::decode(crush->straw_calc_version, blp);
+ }
finalize();
}
catch (...) {
@@ -1231,6 +1284,8 @@ void CrushWrapper::dump_tunables(Formatter *f) const
f->dump_int("choose_local_fallback_tries", get_choose_local_fallback_tries());
f->dump_int("choose_total_tries", get_choose_total_tries());
f->dump_int("chooseleaf_descend_once", get_chooseleaf_descend_once());
+ f->dump_int("chooseleaf_vary_r", get_chooseleaf_vary_r());
+ f->dump_int("straw_calc_version", get_straw_calc_version());
// be helpful about it
if (has_firefly_tunables())
@@ -1246,6 +1301,9 @@ void CrushWrapper::dump_tunables(Formatter *f) const
f->dump_int("require_feature_tunables", (int)has_nondefault_tunables());
f->dump_int("require_feature_tunables2", (int)has_nondefault_tunables2());
+ f->dump_int("require_feature_tunables3", (int)has_nondefault_tunables3());
+ f->dump_int("has_v2_rules", (int)has_v2_rules());
+ f->dump_int("has_v3_rules", (int)has_v3_rules());
}
void CrushWrapper::dump_rules(Formatter *f) const
diff --git a/src/crush/CrushWrapper.h b/src/crush/CrushWrapper.h
index d5d4f4f..9fac2fe 100644
--- a/src/crush/CrushWrapper.h
+++ b/src/crush/CrushWrapper.h
@@ -52,24 +52,23 @@ using namespace std;
class CrushWrapper {
mutable Mutex mapper_lock;
public:
- struct crush_map *crush;
std::map<int32_t, string> type_map; /* bucket/device type names */
std::map<int32_t, string> name_map; /* bucket/device names */
std::map<int32_t, string> rule_name_map;
- /* reverse maps */
- bool have_rmaps;
- std::map<string, int> type_rmap, name_rmap, rule_name_rmap;
-
private:
- void build_rmaps() {
+ struct crush_map *crush;
+ /* reverse maps */
+ mutable bool have_rmaps;
+ mutable std::map<string, int> type_rmap, name_rmap, rule_name_rmap;
+ void build_rmaps() const {
if (have_rmaps) return;
build_rmap(type_map, type_rmap);
build_rmap(name_map, name_rmap);
build_rmap(rule_name_map, rule_name_rmap);
have_rmaps = true;
}
- void build_rmap(const map<int, string> &f, std::map<string, int> &r) {
+ void build_rmap(const map<int, string> &f, std::map<string, int> &r) const {
r.clear();
for (std::map<int, string>::const_iterator p = f.begin(); p != f.end(); ++p)
r[p->second] = p->first;
@@ -88,6 +87,8 @@ public:
crush_destroy(crush);
}
+ crush_map *get_crush_map() { return crush; }
+
/* building */
void create() {
if (crush)
@@ -124,12 +125,15 @@ public:
void set_tunables_legacy() {
set_tunables_argonaut();
+ crush->straw_calc_version = 0;
}
void set_tunables_optimal() {
set_tunables_firefly();
+ crush->straw_calc_version = 1;
}
void set_tunables_default() {
set_tunables_bobtail();
+ crush->straw_calc_version = 1;
}
int get_choose_local_tries() const {
@@ -167,13 +171,21 @@ public:
crush->chooseleaf_vary_r = n;
}
+ int get_straw_calc_version() const {
+ return crush->straw_calc_version;
+ }
+ void set_straw_calc_version(int n) {
+ crush->straw_calc_version = n;
+ }
+
bool has_argonaut_tunables() const {
return
crush->choose_local_tries == 2 &&
crush->choose_local_fallback_tries == 5 &&
crush->choose_total_tries == 19 &&
crush->chooseleaf_descend_once == 0 &&
- crush->chooseleaf_vary_r == 0;
+ crush->chooseleaf_vary_r == 0 &&
+ crush->straw_calc_version == 0;
}
bool has_bobtail_tunables() const {
return
@@ -181,7 +193,8 @@ public:
crush->choose_local_fallback_tries == 0 &&
crush->choose_total_tries == 50 &&
crush->chooseleaf_descend_once == 1 &&
- crush->chooseleaf_vary_r == 0;
+ crush->chooseleaf_vary_r == 0 &&
+ crush->straw_calc_version == 0;
}
bool has_firefly_tunables() const {
return
@@ -189,7 +202,8 @@ public:
crush->choose_local_fallback_tries == 0 &&
crush->choose_total_tries == 50 &&
crush->chooseleaf_descend_once == 1 &&
- crush->chooseleaf_vary_r == 1;
+ crush->chooseleaf_vary_r == 1 &&
+ crush->straw_calc_version == 0;
}
bool has_optimal_tunables() const {
@@ -223,7 +237,7 @@ public:
int get_num_type_names() const {
return type_map.size();
}
- int get_type_id(const string& name) {
+ int get_type_id(const string& name) const {
build_rmaps();
if (type_rmap.count(name))
return type_rmap[name];
@@ -242,14 +256,14 @@ public:
}
// item/bucket names
- bool name_exists(const string& name) {
+ bool name_exists(const string& name) const {
build_rmaps();
return name_rmap.count(name);
}
bool item_exists(int i) {
return name_map.count(i);
}
- int get_item_id(const string& name) {
+ int get_item_id(const string& name) const {
build_rmaps();
if (name_rmap.count(name))
return name_rmap[name];
@@ -271,11 +285,11 @@ public:
}
// rule names
- bool rule_exists(string name) {
+ bool rule_exists(string name) const {
build_rmaps();
return rule_name_rmap.count(name);
}
- int get_rule_id(string name) {
+ int get_rule_id(string name) const {
build_rmaps();
if (rule_name_rmap.count(name))
return rule_name_rmap[name];
@@ -542,19 +556,27 @@ public:
* @param id item id to check
* @return weight of item
*/
- int get_item_weight(int id);
- float get_item_weightf(int id) {
+ int get_item_weight(int id) const;
+ float get_item_weightf(int id) const {
return (float)get_item_weight(id) / (float)0x10000;
}
+ int get_item_weight_in_loc(int id, const map<string,string> &loc);
+ float get_item_weightf_in_loc(int id, const map<string,string> &loc) {
+ return (float)get_item_weight_in_loc(id, loc) / (float)0x10000;
+ }
int adjust_item_weight(CephContext *cct, int id, int weight);
int adjust_item_weightf(CephContext *cct, int id, float weight) {
return adjust_item_weight(cct, id, (int)(weight * (float)0x10000));
}
+ int adjust_item_weight_in_loc(CephContext *cct, int id, int weight, const map<string,string>& loc);
+ int adjust_item_weightf_in_loc(CephContext *cct, int id, float weight, const map<string,string>& loc) {
+ return adjust_item_weight_in_loc(cct, id, (int)(weight * (float)0x10000), loc);
+ }
void reweight(CephContext *cct);
/// check if item id is present in the map hierarchy
- bool check_item_present(int id);
+ bool check_item_present(int id) const;
/*** devices ***/
@@ -745,9 +767,6 @@ private:
crush_bucket *b = get_bucket(item);
unsigned bucket_weight = b->weight;
- // zero out the bucket weight
- adjust_item_weight(cct, item, 0);
-
// get where the bucket is located
pair<string, string> bucket_location = get_immediate_parent(item);
@@ -758,8 +777,12 @@ private:
crush_bucket *parent_bucket = get_bucket(parent_id);
if (!IS_ERR(parent_bucket)) {
+ // zero out the bucket weight
+ crush_bucket_adjust_item_weight(crush, parent_bucket, item, 0);
+ adjust_item_weight(cct, parent_bucket->id, parent_bucket->weight);
+
// remove the bucket from the parent
- crush_bucket_remove_item(parent_bucket, item);
+ crush_bucket_remove_item(crush, parent_bucket, item);
} else if (PTR_ERR(parent_bucket) != -ENOENT) {
return PTR_ERR(parent_bucket);
}
@@ -839,7 +862,7 @@ public:
int *items, int *weights, int *idout) {
if (type == 0)
return -EINVAL;
- crush_bucket *b = crush_make_bucket(alg, hash, type, size, items, weights);
+ crush_bucket *b = crush_make_bucket(crush, alg, hash, type, size, items, weights);
assert(b);
return crush_add_bucket(crush, bucketno, b, idout);
}
@@ -880,9 +903,9 @@ public:
bool ruleset_exists(int ruleset) const {
for (size_t i = 0; i < crush->max_rules; ++i) {
- if (crush->rules[i]->mask.ruleset == ruleset) {
- return true;
- }
+ if (rule_exists(i) && crush->rules[i]->mask.ruleset == ruleset) {
+ return true;
+ }
}
return false;
diff --git a/src/crush/builder.c b/src/crush/builder.c
index eff0bf6..f081562 100644
--- a/src/crush/builder.c
+++ b/src/crush/builder.c
@@ -11,6 +11,8 @@
#include "builder.h"
#include "hash.h"
+#define dprintk(args...) /* printf(args) */
+
#define BUG_ON(x) assert(!(x))
struct crush_map *crush_create()
@@ -27,6 +29,7 @@ struct crush_map *crush_create()
m->choose_total_tries = 19;
m->chooseleaf_descend_once = 0;
m->chooseleaf_vary_r = 0;
+ m->straw_calc_version = 0;
return m;
}
@@ -63,7 +66,7 @@ int crush_add_rule(struct crush_map *map, struct crush_rule *rule, int ruleno)
for (r=0; r < map->max_rules; r++)
if (map->rules[r] == 0)
break;
- assert(r <= INT_MAX);
+ assert(r < CRUSH_MAX_RULES);
}
else
r = ruleno;
@@ -72,6 +75,8 @@ int crush_add_rule(struct crush_map *map, struct crush_rule *rule, int ruleno)
/* expand array */
int oldsize;
void *_realloc = NULL;
+ if (map->max_rules +1 > CRUSH_MAX_RULES)
+ return -ENOSPC;
oldsize = map->max_rules;
map->max_rules = r+1;
if ((_realloc = realloc(map->rules, map->max_rules * sizeof(map->rules[0]))) == NULL) {
@@ -263,7 +268,7 @@ crush_make_list_bucket(int hash, int type, int size,
w += weights[i];
bucket->sum_weights[i] = w;
- /*printf("pos %d item %d weight %d sum %d\n",
+ /*dprintk("pos %d item %d weight %d sum %d\n",
i, items[i], weights[i], bucket->sum_weights[i]);*/
}
@@ -304,6 +309,10 @@ static int parent(int n)
static int calc_depth(int size)
{
+ if (size == 0) {
+ return 0;
+ }
+
int depth = 1;
int t = size - 1;
while (t) {
@@ -332,6 +341,16 @@ crush_make_tree_bucket(int hash, int type, int size,
bucket->h.type = type;
bucket->h.size = size;
+ if (size == 0) {
+ bucket->h.items = NULL;
+ bucket->h.perm = NULL;
+ bucket->h.weight = 0;
+ bucket->node_weights = NULL;
+ bucket->num_nodes = 0;
+ /* printf("size 0 depth 0 nodes 0\n"); */
+ return bucket;
+ }
+
bucket->h.items = malloc(sizeof(__s32)*size);
if (!bucket->h.items)
goto err;
@@ -342,7 +361,7 @@ crush_make_tree_bucket(int hash, int type, int size,
/* calc tree depth */
depth = calc_depth(size);
bucket->num_nodes = 1 << depth;
- printf("size %d depth %d nodes %d\n", size, depth, bucket->num_nodes);
+ dprintk("size %d depth %d nodes %d\n", size, depth, bucket->num_nodes);
bucket->node_weights = malloc(sizeof(__u32)*bucket->num_nodes);
if (!bucket->node_weights)
@@ -354,7 +373,7 @@ crush_make_tree_bucket(int hash, int type, int size,
for (i=0; i<size; i++) {
bucket->h.items[i] = items[i];
node = crush_calc_tree_node(i);
- printf("item %d node %d weight %d\n", i, node, weights[i]);
+ dprintk("item %d node %d weight %d\n", i, node, weights[i]);
bucket->node_weights[node] = weights[i];
if (crush_addition_is_unsafe(bucket->h.weight, weights[i]))
@@ -368,7 +387,7 @@ crush_make_tree_bucket(int hash, int type, int size,
goto err;
bucket->node_weights[node] += weights[i];
- printf(" node %d weight %d\n", node, bucket->node_weights[node]);
+ dprintk(" node %d weight %d\n", node, bucket->node_weights[node]);
}
}
BUG_ON(bucket->node_weights[bucket->num_nodes/2] != bucket->h.weight);
@@ -386,7 +405,34 @@ err:
/* straw bucket */
-int crush_calc_straw(struct crush_bucket_straw *bucket)
+/*
+ * this code was written 8 years ago. i have a vague recollection of
+ * drawing boxes underneath bars of different lengths, where the bar
+ * length represented the probability/weight, and that there was some
+ * trial and error involved in arriving at this implementation.
+ * however, reading the code now after all this time, the intuition
+ * that motivated is lost on me. lame. my only excuse is that I now
+ * know that the approach is fundamentally flawed and am not
+ * particularly motivated to reconstruct the flawed reasoning.
+ *
+ * as best as i can remember, the idea is: sort the weights, and start
+ * with the smallest. arbitrarily scale it at 1.0 (16-bit fixed
+ * point). look at the next larger weight, and calculate the scaling
+ * factor for that straw based on the relative difference in weight so
+ * far. what's not clear to me now is why we are looking at wnext
+ * (the delta to the next bigger weight) for all remaining weights,
+ * and slicing things horizontally instead of considering just the
+ * next item or set of items. or why pow() is used the way it is.
+ *
+ * note that the original version 1 of this function made special
+ * accomodation for the case where straw lengths were identical. this
+ * is also flawed in a non-obvious way; version 2 drops the special
+ * handling and appears to work just as well.
+ *
+ * moral of the story: if you do something clever, write down why it
+ * works.
+ */
+int crush_calc_straw(struct crush_map *map, struct crush_bucket_straw *bucket)
{
int *reverse;
int i, j, k;
@@ -422,41 +468,82 @@ int crush_calc_straw(struct crush_bucket_straw *bucket)
i=0;
while (i < size) {
- /* zero weight items get 0 length straws! */
- if (weights[reverse[i]] == 0) {
- bucket->straws[reverse[i]] = 0;
+ if (map->straw_calc_version == 0) {
+ /* zero weight items get 0 length straws! */
+ if (weights[reverse[i]] == 0) {
+ bucket->straws[reverse[i]] = 0;
+ i++;
+ continue;
+ }
+
+ /* set this item's straw */
+ bucket->straws[reverse[i]] = straw * 0x10000;
+ dprintk("item %d at %d weight %d straw %d (%lf)\n",
+ bucket->h.items[reverse[i]],
+ reverse[i], weights[reverse[i]],
+ bucket->straws[reverse[i]], straw);
i++;
- continue;
- }
+ if (i == size)
+ break;
- /* set this item's straw */
- bucket->straws[reverse[i]] = straw * 0x10000;
- /*printf("item %d at %d weight %d straw %d (%lf)\n",
- items[reverse[i]],
- reverse[i], weights[reverse[i]], bucket->straws[reverse[i]], straw);*/
- i++;
- if (i == size) break;
-
- /* same weight as previous? */
- if (weights[reverse[i]] == weights[reverse[i-1]]) {
- /*printf("same as previous\n");*/
- continue;
- }
+ /* same weight as previous? */
+ if (weights[reverse[i]] == weights[reverse[i-1]]) {
+ dprintk("same as previous\n");
+ continue;
+ }
- /* adjust straw for next guy */
- wbelow += ((double)weights[reverse[i-1]] - lastw) * numleft;
- for (j=i; j<size; j++)
- if (weights[reverse[j]] == weights[reverse[i]])
+ /* adjust straw for next guy */
+ wbelow += ((double)weights[reverse[i-1]] - lastw) *
+ numleft;
+ for (j=i; j<size; j++)
+ if (weights[reverse[j]] == weights[reverse[i]])
+ numleft--;
+ else
+ break;
+ wnext = numleft * (weights[reverse[i]] -
+ weights[reverse[i-1]]);
+ pbelow = wbelow / (wbelow + wnext);
+ dprintk("wbelow %lf wnext %lf pbelow %lf numleft %d\n",
+ wbelow, wnext, pbelow, numleft);
+
+ straw *= pow((double)1.0 / pbelow, (double)1.0 /
+ (double)numleft);
+
+ lastw = weights[reverse[i-1]];
+ } else if (map->straw_calc_version >= 1) {
+ /* zero weight items get 0 length straws! */
+ if (weights[reverse[i]] == 0) {
+ bucket->straws[reverse[i]] = 0;
+ i++;
numleft--;
- else
+ continue;
+ }
+
+ /* set this item's straw */
+ bucket->straws[reverse[i]] = straw * 0x10000;
+ dprintk("item %d at %d weight %d straw %d (%lf)\n",
+ bucket->h.items[reverse[i]],
+ reverse[i], weights[reverse[i]],
+ bucket->straws[reverse[i]], straw);
+ i++;
+ if (i == size)
break;
- wnext = numleft * (weights[reverse[i]] - weights[reverse[i-1]]);
- pbelow = wbelow / (wbelow + wnext);
- /*printf("wbelow %lf wnext %lf pbelow %lf\n", wbelow, wnext, pbelow);*/
- straw *= pow((double)1.0 / pbelow, (double)1.0 / (double)numleft);
+ /* adjust straw for next guy */
+ wbelow += ((double)weights[reverse[i-1]] - lastw) *
+ numleft;
+ numleft--;
+ wnext = numleft * (weights[reverse[i]] -
+ weights[reverse[i-1]]);
+ pbelow = wbelow / (wbelow + wnext);
+ dprintk("wbelow %lf wnext %lf pbelow %lf numleft %d\n",
+ wbelow, wnext, pbelow, numleft);
+
+ straw *= pow((double)1.0 / pbelow, (double)1.0 /
+ (double)numleft);
- lastw = weights[reverse[i-1]];
+ lastw = weights[reverse[i-1]];
+ }
}
free(reverse);
@@ -464,7 +551,8 @@ int crush_calc_straw(struct crush_bucket_straw *bucket)
}
struct crush_bucket_straw *
-crush_make_straw_bucket(int hash,
+crush_make_straw_bucket(struct crush_map *map,
+ int hash,
int type,
int size,
int *items,
@@ -502,7 +590,7 @@ crush_make_straw_bucket(int hash,
bucket->item_weights[i] = weights[i];
}
- if (crush_calc_straw(bucket) < 0)
+ if (crush_calc_straw(map, bucket) < 0)
goto err;
return bucket;
@@ -518,7 +606,8 @@ err:
struct crush_bucket*
-crush_make_bucket(int alg, int hash, int type, int size,
+crush_make_bucket(struct crush_map *map,
+ int alg, int hash, int type, int size,
int *items,
int *weights)
{
@@ -539,7 +628,7 @@ crush_make_bucket(int alg, int hash, int type, int size,
return (struct crush_bucket *)crush_make_tree_bucket(hash, type, size, items, weights);
case CRUSH_BUCKET_STRAW:
- return (struct crush_bucket *)crush_make_straw_bucket(hash, type, size, items, weights);
+ return (struct crush_bucket *)crush_make_straw_bucket(map, hash, type, size, items, weights);
}
return 0;
}
@@ -648,27 +737,39 @@ int crush_add_tree_bucket_item(struct crush_bucket_tree *bucket, int item, int w
node = crush_calc_tree_node(newsize-1);
bucket->node_weights[node] = weight;
+ /* if the depth increase, we need to initialize the new root node's weight before add bucket item */
+ int root = bucket->num_nodes/2;
+ if (depth >= 2 && (node - 1) == root) {
+ /* if the new item is the first node in right sub tree, so
+ * the root node initial weight is left sub tree's weight
+ */
+ bucket->node_weights[root] = bucket->node_weights[root/2];
+ }
+
for (j=1; j<depth; j++) {
node = parent(node);
- if (!crush_addition_is_unsafe(bucket->node_weights[node], weight))
+ if (crush_addition_is_unsafe(bucket->node_weights[node], weight))
return -ERANGE;
bucket->node_weights[node] += weight;
- printf(" node %d weight %d\n", node, bucket->node_weights[node]);
+ dprintk(" node %d weight %d\n", node, bucket->node_weights[node]);
}
if (crush_addition_is_unsafe(bucket->h.weight, weight))
return -ERANGE;
+ bucket->h.items[newsize-1] = item;
bucket->h.weight += weight;
bucket->h.size++;
return 0;
}
-int crush_add_straw_bucket_item(struct crush_bucket_straw *bucket, int item, int weight)
+int crush_add_straw_bucket_item(struct crush_map *map,
+ struct crush_bucket_straw *bucket,
+ int item, int weight)
{
int newsize = bucket->h.size + 1;
@@ -701,13 +802,14 @@ int crush_add_straw_bucket_item(struct crush_bucket_straw *bucket, int item, int
if (crush_addition_is_unsafe(bucket->h.weight, weight))
return -ERANGE;
- bucket->h.weight += weight;
- bucket->h.size++;
+ bucket->h.weight += weight;
+ bucket->h.size++;
- return crush_calc_straw(bucket);
+ return crush_calc_straw(map, bucket);
}
-int crush_bucket_add_item(struct crush_bucket *b, int item, int weight)
+int crush_bucket_add_item(struct crush_map *map,
+ struct crush_bucket *b, int item, int weight)
{
/* invalidate perm cache */
b->perm_n = 0;
@@ -720,7 +822,7 @@ int crush_bucket_add_item(struct crush_bucket *b, int item, int weight)
case CRUSH_BUCKET_TREE:
return crush_add_tree_bucket_item((struct crush_bucket_tree *)b, item, weight);
case CRUSH_BUCKET_STRAW:
- return crush_add_straw_bucket_item((struct crush_bucket_straw *)b, item, weight);
+ return crush_add_straw_bucket_item(map, (struct crush_bucket_straw *)b, item, weight);
default:
return -1;
}
@@ -744,7 +846,10 @@ int crush_remove_uniform_bucket_item(struct crush_bucket_uniform *bucket, int it
for (j = i; j < bucket->h.size; j++)
bucket->h.items[j] = bucket->h.items[j+1];
newsize = --bucket->h.size;
- bucket->h.weight -= bucket->item_weight;
+ if (bucket->item_weight < bucket->h.weight)
+ bucket->h.weight -= bucket->item_weight;
+ else
+ bucket->h.weight = 0;
if ((_realloc = realloc(bucket->h.items, sizeof(__s32)*newsize)) == NULL) {
return -ENOMEM;
@@ -763,7 +868,7 @@ int crush_remove_list_bucket_item(struct crush_bucket_list *bucket, int item)
{
unsigned i, j;
int newsize;
- int weight;
+ unsigned weight;
for (i = 0; i < bucket->h.size; i++)
if (bucket->h.items[i] == item)
@@ -777,7 +882,10 @@ int crush_remove_list_bucket_item(struct crush_bucket_list *bucket, int item)
bucket->item_weights[j] = bucket->item_weights[j+1];
bucket->sum_weights[j] = bucket->sum_weights[j+1] - weight;
}
- bucket->h.weight -= weight;
+ if (weight < bucket->h.weight)
+ bucket->h.weight -= weight;
+ else
+ bucket->h.weight = 0;
newsize = --bucket->h.size;
void *_realloc = NULL;
@@ -812,7 +920,7 @@ int crush_remove_tree_bucket_item(struct crush_bucket_tree *bucket, int item)
for (i = 0; i < bucket->h.size; i++) {
int node;
- int weight;
+ unsigned weight;
int j;
int depth = calc_depth(bucket->h.size);
@@ -826,9 +934,12 @@ int crush_remove_tree_bucket_item(struct crush_bucket_tree *bucket, int item)
for (j = 1; j < depth; j++) {
node = parent(node);
bucket->node_weights[node] -= weight;
- printf(" node %d weight %d\n", node, bucket->node_weights[node]);
+ dprintk(" node %d weight %d\n", node, bucket->node_weights[node]);
}
- bucket->h.weight -= weight;
+ if (weight < bucket->h.weight)
+ bucket->h.weight -= weight;
+ else
+ bucket->h.weight = 0;
break;
}
if (i == bucket->h.size)
@@ -875,7 +986,8 @@ int crush_remove_tree_bucket_item(struct crush_bucket_tree *bucket, int item)
return 0;
}
-int crush_remove_straw_bucket_item(struct crush_bucket_straw *bucket, int item)
+int crush_remove_straw_bucket_item(struct crush_map *map,
+ struct crush_bucket_straw *bucket, int item)
{
int newsize = bucket->h.size - 1;
unsigned i, j;
@@ -883,7 +995,10 @@ int crush_remove_straw_bucket_item(struct crush_bucket_straw *bucket, int item)
for (i = 0; i < bucket->h.size; i++) {
if (bucket->h.items[i] == item) {
bucket->h.size--;
- bucket->h.weight -= bucket->item_weights[i];
+ if (bucket->item_weights[i] < bucket->h.weight)
+ bucket->h.weight -= bucket->item_weights[i];
+ else
+ bucket->h.weight = 0;
for (j = i; j < bucket->h.size; j++) {
bucket->h.items[j] = bucket->h.items[j+1];
bucket->item_weights[j] = bucket->item_weights[j+1];
@@ -917,10 +1032,10 @@ int crush_remove_straw_bucket_item(struct crush_bucket_straw *bucket, int item)
bucket->straws = _realloc;
}
- return crush_calc_straw(bucket);
+ return crush_calc_straw(map, bucket);
}
-int crush_bucket_remove_item(struct crush_bucket *b, int item)
+int crush_bucket_remove_item(struct crush_map *map, struct crush_bucket *b, int item)
{
/* invalidate perm cache */
b->perm_n = 0;
@@ -933,7 +1048,7 @@ int crush_bucket_remove_item(struct crush_bucket *b, int item)
case CRUSH_BUCKET_TREE:
return crush_remove_tree_bucket_item((struct crush_bucket_tree *)b, item);
case CRUSH_BUCKET_STRAW:
- return crush_remove_straw_bucket_item((struct crush_bucket_straw *)b, item);
+ return crush_remove_straw_bucket_item(map, (struct crush_bucket_straw *)b, item);
default:
return -1;
}
@@ -1002,7 +1117,9 @@ int crush_adjust_tree_bucket_item_weight(struct crush_bucket_tree *bucket, int i
return diff;
}
-int crush_adjust_straw_bucket_item_weight(struct crush_bucket_straw *bucket, int item, int weight)
+int crush_adjust_straw_bucket_item_weight(struct crush_map *map,
+ struct crush_bucket_straw *bucket,
+ int item, int weight)
{
unsigned idx;
int diff;
@@ -1018,14 +1135,16 @@ int crush_adjust_straw_bucket_item_weight(struct crush_bucket_straw *bucket, int
bucket->item_weights[idx] = weight;
bucket->h.weight += diff;
- r = crush_calc_straw(bucket);
+ r = crush_calc_straw(map, bucket);
if (r < 0)
return r;
return diff;
}
-int crush_bucket_adjust_item_weight(struct crush_bucket *b, int item, int weight)
+int crush_bucket_adjust_item_weight(struct crush_map *map,
+ struct crush_bucket *b,
+ int item, int weight)
{
switch (b->alg) {
case CRUSH_BUCKET_UNIFORM:
@@ -1038,7 +1157,8 @@ int crush_bucket_adjust_item_weight(struct crush_bucket *b, int item, int weight
return crush_adjust_tree_bucket_item_weight((struct crush_bucket_tree *)b,
item, weight);
case CRUSH_BUCKET_STRAW:
- return crush_adjust_straw_bucket_item_weight((struct crush_bucket_straw *)b,
+ return crush_adjust_straw_bucket_item_weight(map,
+ (struct crush_bucket_straw *)b,
item, weight);
default:
return -1;
@@ -1141,6 +1261,7 @@ static int crush_reweight_straw_bucket(struct crush_map *crush, struct crush_buc
bucket->h.weight += bucket->item_weights[i];
}
+ crush_calc_straw(crush, bucket);
return 0;
}
diff --git a/src/crush/builder.h b/src/crush/builder.h
index 1003c35..efd7c8a 100644
--- a/src/crush/builder.h
+++ b/src/crush/builder.h
@@ -16,12 +16,12 @@ extern int crush_get_next_bucket_id(struct crush_map *map);
extern int crush_add_bucket(struct crush_map *map,
int bucketno,
struct crush_bucket *bucket, int *idout);
-struct crush_bucket *crush_make_bucket(int alg, int hash, int type, int size, int *items, int *weights);
-extern int crush_bucket_add_item(struct crush_bucket *bucket, int item, int weight);
-extern int crush_bucket_adjust_item_weight(struct crush_bucket *bucket, int item, int weight);
+struct crush_bucket *crush_make_bucket(struct crush_map *map, int alg, int hash, int type, int size, int *items, int *weights);
+extern int crush_bucket_add_item(struct crush_map *map, struct crush_bucket *bucket, int item, int weight);
+extern int crush_bucket_adjust_item_weight(struct crush_map *map, struct crush_bucket *bucket, int item, int weight);
extern int crush_reweight_bucket(struct crush_map *crush, struct crush_bucket *bucket);
extern int crush_remove_bucket(struct crush_map *map, struct crush_bucket *bucket);
-extern int crush_bucket_remove_item(struct crush_bucket *bucket, int item);
+extern int crush_bucket_remove_item(struct crush_map *map, struct crush_bucket *bucket, int item);
struct crush_bucket_uniform *
crush_make_uniform_bucket(int hash, int type, int size,
@@ -36,7 +36,8 @@ crush_make_tree_bucket(int hash, int type, int size,
int *items, /* in leaf order */
int *weights);
struct crush_bucket_straw *
-crush_make_straw_bucket(int hash, int type, int size,
+crush_make_straw_bucket(struct crush_map *map,
+ int hash, int type, int size,
int *items,
int *weights);
diff --git a/src/crush/crush.h b/src/crush/crush.h
index 8bac92a..712d534 100644
--- a/src/crush/crush.h
+++ b/src/crush/crush.h
@@ -26,6 +26,8 @@
#define CRUSH_MAGIC 0x00010000ul /* for detecting algorithm revisions */
#define CRUSH_MAX_DEPTH 10 /* max crush hierarchy depth */
+#define CRUSH_MAX_RULESET (1<<8) /*max crush ruleset number*/
+#define CRUSH_MAX_RULES CRUSH_MAX_RULESET /*max crush rules, shold be the same as max rulesets*/
#define CRUSH_MAX_DEVICE_WEIGHT (100u * 0x10000u)
#define CRUSH_MAX_BUCKET_WEIGHT (65535u * 0x10000u)
@@ -189,6 +191,12 @@ struct crush_map {
* mappings line up a bit better with previous mappings. */
__u8 chooseleaf_vary_r;
+ /*
+ * version 0 (original) of straw_calc has various flaws. version 1
+ * fixes a few of them.
+ */
+ __u8 straw_calc_version;
+
__u32 *choose_tries;
};
diff --git a/src/crush/mapper.c b/src/crush/mapper.c
index 22cde51..327668f 100644
--- a/src/crush/mapper.c
+++ b/src/crush/mapper.c
@@ -291,6 +291,7 @@ static int is_out(const struct crush_map *map,
* @type: the type of item to choose
* @out: pointer to output vector
* @outpos: our position in that vector
+ * @out_size: size of the out vector
* @tries: number of attempts to make
* @recurse_tries: number of attempts to have recursive chooseleaf make
* @local_retries: localized retries
@@ -305,6 +306,7 @@ static int crush_choose_firstn(const struct crush_map *map,
const __u32 *weight, int weight_max,
int x, int numrep, int type,
int *out, int outpos,
+ int out_size,
unsigned int tries,
unsigned int recurse_tries,
unsigned int local_retries,
@@ -323,6 +325,7 @@ static int crush_choose_firstn(const struct crush_map *map,
int item = 0;
int itemtype;
int collide, reject;
+ int count = out_size;
dprintk("CHOOSE%s bucket %d x %d outpos %d numrep %d tries %d recurse_tries %d local_retries %d local_fallback_retries %d parent_r %d\n",
recurse_to_leaf ? "_LEAF" : "",
@@ -330,7 +333,7 @@ static int crush_choose_firstn(const struct crush_map *map,
tries, recurse_tries, local_retries, local_fallback_retries,
parent_r);
- for (rep = outpos; rep < numrep; rep++) {
+ for (rep = outpos; rep < numrep && count > 0 ; rep++) {
/* keep trying until we get a non-out, non-colliding item */
ftotal = 0;
skip_rep = 0;
@@ -404,7 +407,7 @@ static int crush_choose_firstn(const struct crush_map *map,
map->buckets[-1-item],
weight, weight_max,
x, outpos+1, 0,
- out2, outpos,
+ out2, outpos, count,
recurse_tries, 0,
local_retries,
local_fallback_retries,
@@ -464,6 +467,7 @@ reject:
dprintk("CHOOSE got %d\n", item);
out[outpos] = item;
outpos++;
+ count--;
if (map->choose_tries && ftotal <= map->choose_total_tries)
map->choose_tries[ftotal]++;
@@ -686,6 +690,7 @@ int crush_do_rule(const struct crush_map *map,
__u32 step;
int i, j;
int numrep;
+ int out_size;
/*
* the original choose_total_tries value was off by one (it
* counted "retries" and not "tries"). add one.
@@ -793,6 +798,7 @@ int crush_do_rule(const struct crush_map *map,
x, numrep,
curstep->arg2,
o+osize, j,
+ result_max-osize,
choose_tries,
recurse_tries,
choose_local_retries,
@@ -802,11 +808,13 @@ int crush_do_rule(const struct crush_map *map,
c+osize,
0);
} else {
+ out_size = ((numrep < (result_max-osize)) ?
+ numrep : (result_max-osize));
crush_choose_indep(
map,
map->buckets[-1-w[i]],
weight, weight_max,
- x, numrep, numrep,
+ x, out_size, numrep,
curstep->arg2,
o+osize, j,
choose_tries,
@@ -815,7 +823,7 @@ int crush_do_rule(const struct crush_map *map,
recurse_to_leaf,
c+osize,
0);
- osize += numrep;
+ osize += out_size;
}
}
diff --git a/src/include/ceph_features.h b/src/include/ceph_features.h
index 6b2a5fb..c3dfcab 100644
--- a/src/include/ceph_features.h
+++ b/src/include/ceph_features.h
@@ -52,6 +52,7 @@
#define CEPH_FEATURE_OSD_PRIMARY_AFFINITY (1ULL<<41) /* overlap w/ tunables3 */
#define CEPH_FEATURE_MSGR_KEEPALIVE2 (1ULL<<42)
#define CEPH_FEATURE_OSD_POOLRESEND (1ULL<<43)
+#define CEPH_FEATURE_OSD_SET_ALLOC_HINT (1ULL<<45)
/*
* The introduction of CEPH_FEATURE_OSD_SNAPMAPPER caused the feature
@@ -124,6 +125,7 @@ static inline unsigned long long ceph_sanitize_features(unsigned long long f) {
CEPH_FEATURE_OSD_PRIMARY_AFFINITY | \
CEPH_FEATURE_MSGR_KEEPALIVE2 | \
CEPH_FEATURE_OSD_POOLRESEND | \
+ CEPH_FEATURE_OSD_SET_ALLOC_HINT | \
0ULL)
#define CEPH_FEATURES_SUPPORTED_DEFAULT CEPH_FEATURES_ALL
diff --git a/src/include/ceph_fs.h b/src/include/ceph_fs.h
index 10b52a5..d16df62 100644
--- a/src/include/ceph_fs.h
+++ b/src/include/ceph_fs.h
@@ -505,8 +505,10 @@ struct ceph_mds_reply_dirfrag {
__le32 dist[];
} __attribute__ ((packed));
-#define CEPH_LOCK_FCNTL 1
-#define CEPH_LOCK_FLOCK 2
+#define CEPH_LOCK_FCNTL 1
+#define CEPH_LOCK_FLOCK 2
+#define CEPH_LOCK_FCNTL_INTR 3
+#define CEPH_LOCK_FLOCK_INTR 4
#define CEPH_LOCK_SHARED 1
#define CEPH_LOCK_EXCL 2
diff --git a/src/include/util.h b/src/include/util.h
index b30132e..4e4476a 100644
--- a/src/include/util.h
+++ b/src/include/util.h
@@ -4,17 +4,41 @@
* Ceph - scalable distributed file system
*
* Copyright (C) 2012 Inktank Storage, Inc.
+ * Copyright (C) 2014 Red Hat <contact at redhat.com>
*
* This is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License version 2.1, as published by the Free Software
* Foundation. See file COPYING.
*/
+#ifndef CEPH_UTIL_H
+#define CEPH_UTIL_H
// is buf~len completely zero (in 8-byte chunks)
+#include "common/Formatter.h"
#include "include/types.h"
bool buf_is_zero(const char *buf, size_t len);
int64_t unit_to_bytesize(string val, ostream *pss);
+
+struct ceph_data_stats
+{
+ uint64_t byte_total;
+ uint64_t byte_used;
+ uint64_t byte_avail;
+ int avail_percent;
+
+ void dump(Formatter *f) const {
+ assert(f != NULL);
+ f->dump_int("total", byte_total);
+ f->dump_int("used", byte_used);
+ f->dump_int("avail", byte_avail);
+ f->dump_int("avail_percent", avail_percent);
+ }
+};
+typedef struct ceph_data_stats ceph_data_stats_t;
+
+int get_fs_stats(ceph_data_stats_t &stats, const char *path);
+#endif /* CEPH_UTIL_H */
diff --git a/src/init-radosgw.sysv b/src/init-radosgw.sysv
index dd3dbb0..2486539 100644
--- a/src/init-radosgw.sysv
+++ b/src/init-radosgw.sysv
@@ -85,10 +85,10 @@ case "$1" in
fi
if [ $SYSTEMD -eq 1 ]; then
- systemd-run -r bash -c "ulimit -n 32768; $RADOSGW -n $name"
+ systemd-run -r sudo -u "$user" bash -c "ulimit -n 32768; $RADOSGW -n $name"
else
- #start-stop-daemon --start -u $user -x $RADOSGW -- -n $name
- daemon --user="$user" "ulimit -n 32768; $RADOSGW -n $name"
+ ulimit -n 32768
+ daemon --user="$user" "$RADOSGW -n $name"
fi
echo "Starting $name..."
done
diff --git a/src/librbd/AioCompletion.h b/src/librbd/AioCompletion.h
index aaccefe..e28cd6a 100644
--- a/src/librbd/AioCompletion.h
+++ b/src/librbd/AioCompletion.h
@@ -93,6 +93,10 @@ namespace librbd {
void init_time(ImageCtx *i, aio_type_t t) {
ictx = i;
+ {
+ Mutex::Locker l(ictx->aio_lock);
+ ++ictx->pending_aio;
+ }
aio_type = t;
start_time = ceph_clock_now(ictx->cct);
}
@@ -114,6 +118,14 @@ namespace librbd {
lderr(ictx->cct) << "completed invalid aio_type: " << aio_type << dendl;
break;
}
+
+ {
+ Mutex::Locker l(ictx->aio_lock);
+ assert(ictx->pending_aio != 0);
+ --ictx->pending_aio;
+ ictx->pending_aio_cond.Signal();
+ }
+
if (complete_cb) {
complete_cb(rbd_comp, complete_arg);
}
diff --git a/src/librbd/ImageCtx.cc b/src/librbd/ImageCtx.cc
index b5c2db6..8fb8e37 100644
--- a/src/librbd/ImageCtx.cc
+++ b/src/librbd/ImageCtx.cc
@@ -45,13 +45,15 @@ namespace librbd {
snap_lock("librbd::ImageCtx::snap_lock"),
parent_lock("librbd::ImageCtx::parent_lock"),
refresh_lock("librbd::ImageCtx::refresh_lock"),
+ aio_lock("librbd::ImageCtx::aio_lock"),
extra_read_flags(0),
old_format(true),
order(0), size(0), features(0),
format_string(NULL),
id(image_id), parent(NULL),
stripe_unit(0), stripe_count(0),
- object_cacher(NULL), writeback_handler(NULL), object_set(NULL)
+ object_cacher(NULL), writeback_handler(NULL), object_set(NULL),
+ pending_aio(0)
{
md_ctx.dup(p);
data_ctx.dup(p);
@@ -586,6 +588,7 @@ namespace librbd {
int r = flush_cache();
if (r)
lderr(cct) << "flush_cache returned " << r << dendl;
+ wait_for_pending_aio();
cache_lock.Lock();
bool unclean = object_cacher->release_set(object_set);
cache_lock.Unlock();
@@ -655,5 +658,12 @@ namespace librbd {
<< ", object overlap " << len
<< " from image extents " << objectx << dendl;
return len;
- }
+ }
+
+ void ImageCtx::wait_for_pending_aio() {
+ Mutex::Locker l(aio_lock);
+ while (pending_aio > 0) {
+ pending_aio_cond.Wait(aio_lock);
+ }
+ }
}
diff --git a/src/librbd/ImageCtx.h b/src/librbd/ImageCtx.h
index 83ed044..5a0d637 100644
--- a/src/librbd/ImageCtx.h
+++ b/src/librbd/ImageCtx.h
@@ -10,6 +10,7 @@
#include <string>
#include <vector>
+#include "common/Cond.h"
#include "common/Mutex.h"
#include "common/RWLock.h"
#include "common/snap_types.h"
@@ -59,7 +60,8 @@ namespace librbd {
/**
* Lock ordering:
- * md_lock, cache_lock, snap_lock, parent_lock, refresh_lock
+ * md_lock, cache_lock, snap_lock, parent_lock, refresh_lock,
+ * aio_lock
*/
RWLock md_lock; // protects access to the mutable image metadata that
// isn't guarded by other locks below
@@ -68,6 +70,7 @@ namespace librbd {
RWLock snap_lock; // protects snapshot-related member variables:
RWLock parent_lock; // protects parent_md and parent
Mutex refresh_lock; // protects refresh_seq and last_refresh
+ Mutex aio_lock; // protects pending_aio and pending_aio_cond
unsigned extra_read_flags;
@@ -89,6 +92,9 @@ namespace librbd {
LibrbdWriteback *writeback_handler;
ObjectCacher::ObjectSet *object_set;
+ Cond pending_aio_cond;
+ uint64_t pending_aio;
+
/**
* Either image_name or image_id must be set.
* If id is not known, pass the empty std::string,
@@ -147,7 +153,7 @@ namespace librbd {
librados::snap_t in_snap_id);
uint64_t prune_parent_extents(vector<pair<uint64_t,uint64_t> >& objectx,
uint64_t overlap);
-
+ void wait_for_pending_aio();
};
}
diff --git a/src/librbd/internal.cc b/src/librbd/internal.cc
index afa4660..598d515 100644
--- a/src/librbd/internal.cc
+++ b/src/librbd/internal.cc
@@ -419,7 +419,15 @@ namespace librbd {
for (std::list<string>::const_iterator it = pools.begin();
it != pools.end(); ++it) {
IoCtx ioctx;
- rados.ioctx_create(it->c_str(), ioctx);
+ r = rados.ioctx_create(it->c_str(), ioctx);
+ if (r == -ENOENT) {
+ ldout(cct, 1) << "pool " << *it << " no longer exists" << dendl;
+ continue;
+ } else if (r < 0) {
+ lderr(cct) << "Error accessing child image pool " << *it << dendl;
+ return r;
+ }
+
set<string> image_ids;
int r = cls_client::get_children(&ioctx, RBD_CHILDREN,
parent_spec, image_ids);
@@ -633,32 +641,46 @@ namespace librbd {
parent_spec pspec(ictx->md_ctx.get_id(), ictx->id, snap_id);
// search all pools for children depending on this snapshot
Rados rados(ictx->md_ctx);
- std::list<std::string> pools;
- rados.pool_list(pools);
- std::set<std::string> children;
- for (std::list<std::string>::const_iterator it = pools.begin(); it != pools.end(); ++it) {
- IoCtx pool_ioctx;
- r = rados.ioctx_create(it->c_str(), pool_ioctx);
- if (r < 0) {
- lderr(ictx->cct) << "snap_unprotect: can't create ioctx for pool "
- << *it << dendl;
- goto reprotect_and_return_err;
- }
- r = cls_client::get_children(&pool_ioctx, RBD_CHILDREN, pspec, children);
- // key should not exist for this parent if there is no entry
- if (((r < 0) && (r != -ENOENT))) {
- lderr(ictx->cct) << "can't get children for pool " << *it << dendl;
- goto reprotect_and_return_err;
- }
- // if we found a child, can't unprotect
- if (r == 0) {
- lderr(ictx->cct) << "snap_unprotect: can't unprotect; at least "
- << children.size() << " child(ren) in pool " << it->c_str() << dendl;
- r = -EBUSY;
- goto reprotect_and_return_err;
+
+ // protect against pools being renamed/deleted
+ bool retry_pool_check;
+ do {
+ retry_pool_check = false;
+
+ std::list<std::string> pools;
+ rados.pool_list(pools);
+ for (std::list<std::string>::const_iterator it = pools.begin(); it != pools.end(); ++it) {
+ IoCtx pool_ioctx;
+ r = rados.ioctx_create(it->c_str(), pool_ioctx);
+ if (r == -ENOENT) {
+ ldout(ictx->cct, 1) << "pool " << *it << " no longer exists" << dendl;
+ retry_pool_check = true;
+ break;
+ } else if (r < 0) {
+ lderr(ictx->cct) << "snap_unprotect: can't create ioctx for pool "
+ << *it << dendl;
+ goto reprotect_and_return_err;
+ }
+
+ std::set<std::string> children;
+ r = cls_client::get_children(&pool_ioctx, RBD_CHILDREN, pspec, children);
+ // key should not exist for this parent if there is no entry
+ if (((r < 0) && (r != -ENOENT))) {
+ lderr(ictx->cct) << "can't get children for pool " << *it << dendl;
+ goto reprotect_and_return_err;
+ }
+ // if we found a child, can't unprotect
+ if (r == 0) {
+ lderr(ictx->cct) << "snap_unprotect: can't unprotect; at least "
+ << children.size() << " child(ren) in pool "
+ << it->c_str() << dendl;
+ r = -EBUSY;
+ goto reprotect_and_return_err;
+ }
+ pool_ioctx.close(); // last one out will self-destruct
}
- pool_ioctx.close(); // last one out will self-destruct
- }
+ } while(retry_pool_check);
+
// didn't find any child in any pool, go ahead with unprotect
r = cls_client::set_protection_status(&ictx->md_ctx,
ictx->header_oid,
@@ -1260,7 +1282,6 @@ reprotect_and_return_err:
if (r < 0) {
lderr(ictx->cct) << "error opening parent image: " << cpp_strerror(r)
<< dendl;
- close_image(ictx->parent);
ictx->parent = NULL;
return r;
}
@@ -2118,10 +2139,12 @@ reprotect_and_return_err:
void close_image(ImageCtx *ictx)
{
ldout(ictx->cct, 20) << "close_image " << ictx << dendl;
- if (ictx->object_cacher)
+ if (ictx->object_cacher) {
ictx->shutdown_cache(); // implicitly flushes
- else
+ } else {
flush(ictx);
+ ictx->wait_for_pending_aio();
+ }
if (ictx->parent) {
close_image(ictx->parent);
diff --git a/src/mds/CInode.h b/src/mds/CInode.h
index cb1add3..dce2dfe 100644
--- a/src/mds/CInode.h
+++ b/src/mds/CInode.h
@@ -442,6 +442,7 @@ private:
parent(0),
inode_auth(CDIR_AUTH_DEFAULT),
replica_caps_wanted(0),
+ fcntl_locks(g_ceph_context), flock_locks(g_ceph_context),
item_dirty(this), item_caps(this), item_open_file(this), item_dirty_parent(this),
item_dirty_dirfrag_dir(this),
item_dirty_dirfrag_nest(this),
diff --git a/src/mds/Dumper.cc b/src/mds/Dumper.cc
index f7f18c9..a1b393e 100644
--- a/src/mds/Dumper.cc
+++ b/src/mds/Dumper.cc
@@ -160,7 +160,7 @@ void Dumper::undump(const char *dump_file)
inodeno_t ino = MDS_INO_LOG_OFFSET + rank;
Journaler::Header h;
- h.trimmed_pos = start;
+ h.trimmed_pos = start - (start % g_default_file_layout.fl_object_size);
h.expire_pos = start;
h.write_pos = start+len;
h.magic = CEPH_FS_ONDISK_MAGIC;
@@ -175,18 +175,14 @@ void Dumper::undump(const char *dump_file)
object_locator_t oloc(mdsmap->get_metadata_pool());
SnapContext snapc;
- bool done = false;
- Cond cond;
-
cout << "writing header " << oid << std::endl;
+ C_SaferCond header_cond;
+ lock.Lock();
objecter->write_full(oid, oloc, snapc, hbl, ceph_clock_now(g_ceph_context), 0,
NULL,
- new C_SafeCond(&lock, &cond, &done));
-
- lock.Lock();
- while (!done)
- cond.Wait(lock);
+ &header_cond);
lock.Unlock();
+ header_cond.wait();
// read
Filer filer(objecter);
@@ -198,13 +194,12 @@ void Dumper::undump(const char *dump_file)
uint64_t l = MIN(left, 1024*1024);
j.read_fd(fd, l);
cout << " writing " << pos << "~" << l << std::endl;
- filer.write(ino, &h.layout, snapc, pos, l, j, ceph_clock_now(g_ceph_context), 0, NULL, new C_SafeCond(&lock, &cond, &done));
-
+ C_SaferCond body_cond;
lock.Lock();
- while (!done)
- cond.Wait(lock);
+ filer.write(ino, &h.layout, snapc, pos, l, j, ceph_clock_now(g_ceph_context), 0, NULL, &body_cond);
lock.Unlock();
-
+ body_cond.wait();
+
pos += l;
left -= l;
}
diff --git a/src/mds/Locker.cc b/src/mds/Locker.cc
index 74305b9..19907b3 100644
--- a/src/mds/Locker.cc
+++ b/src/mds/Locker.cc
@@ -2689,6 +2689,9 @@ static uint64_t calc_bounding(uint64_t t)
return t + 1;
}
+/**
+ * m and ack might be NULL, so don't dereference them unless dirty != 0
+ */
void Locker::_do_snap_update(CInode *in, snapid_t snap, int dirty, snapid_t follows, client_t client, MClientCaps *m, MClientCaps *ack)
{
dout(10) << "_do_snap_update dirty " << ccap_string(dirty)
@@ -2766,14 +2769,22 @@ void Locker::_do_snap_update(CInode *in, snapid_t snap, int dirty, snapid_t foll
client, NULL, ack));
}
-
+/**
+ * m might be NULL, so don't dereference it unless dirty != 0.
+ */
void Locker::_update_cap_fields(CInode *in, int dirty, MClientCaps *m, inode_t *pi)
{
+
+ if (dirty && m->get_ctime() > pi->ctime) {
+ dout(7) << " ctime " << pi->ctime << " -> " << m->get_ctime()
+ << " for " << *in << dendl;
+ pi->ctime = m->get_ctime();
+ }
+
// file
if (dirty & (CEPH_CAP_FILE_EXCL|CEPH_CAP_FILE_WR)) {
utime_t atime = m->get_atime();
utime_t mtime = m->get_mtime();
- utime_t ctime = m->get_ctime();
uint64_t size = m->get_size();
version_t inline_version = m->inline_version;
@@ -2783,11 +2794,6 @@ void Locker::_update_cap_fields(CInode *in, int dirty, MClientCaps *m, inode_t *
<< " for " << *in << dendl;
pi->mtime = mtime;
}
- if (ctime > pi->ctime) {
- dout(7) << " ctime " << pi->ctime << " -> " << ctime
- << " for " << *in << dendl;
- pi->ctime = ctime;
- }
if (in->inode.is_file() && // ONLY if regular file
size > pi->size) {
dout(7) << " size " << pi->size << " -> " << size
diff --git a/src/mds/MDCache.cc b/src/mds/MDCache.cc
index d6cfebd..6c52fbd 100644
--- a/src/mds/MDCache.cc
+++ b/src/mds/MDCache.cc
@@ -369,6 +369,8 @@ void MDCache::create_mydir_hierarchy(C_Gather *gather)
CDir *mydir = my->get_or_open_dirfrag(this, frag_t());
adjust_subtree_auth(mydir, mds->whoami);
+ LogSegment *ls = mds->mdlog->get_current_segment();
+
// stray dir
for (int i = 0; i < NUM_STRAY; ++i) {
CInode *stray = create_system_inode(MDS_INO_STRAY(mds->whoami, i), S_IFDIR);
@@ -384,8 +386,10 @@ void MDCache::create_mydir_hierarchy(C_Gather *gather)
mydir->fnode.fragstat.nsubdirs++;
// save them
straydir->mark_complete();
- straydir->mark_dirty(straydir->pre_dirty(), mds->mdlog->get_current_segment());
+ straydir->mark_dirty(straydir->pre_dirty(), ls);
straydir->commit(0, gather->new_sub());
+ stray->_mark_dirty_parent(ls, true);
+ stray->store_backtrace(gather->new_sub());
}
CInode *journal = create_system_inode(MDS_INO_LOG_OFFSET + mds->whoami, S_IFREG);
@@ -405,7 +409,7 @@ void MDCache::create_mydir_hierarchy(C_Gather *gather)
mydir->mark_complete();
- mydir->mark_dirty(mydir->pre_dirty(), mds->mdlog->get_current_segment());
+ mydir->mark_dirty(mydir->pre_dirty(), ls);
mydir->commit(0, gather->new_sub());
myin->store(gather->new_sub());
diff --git a/src/mds/Makefile.am b/src/mds/Makefile.am
index 4ee3500..dd2a2f3 100644
--- a/src/mds/Makefile.am
+++ b/src/mds/Makefile.am
@@ -4,7 +4,6 @@ libmds_la_SOURCES = \
mds/Dumper.cc \
mds/Resetter.cc \
mds/MDS.cc \
- mds/flock.cc \
mds/locks.c \
mds/journal.cc \
mds/Server.cc \
diff --git a/src/mds/Server.cc b/src/mds/Server.cc
index 64004b2..80a0fba 100644
--- a/src/mds/Server.cc
+++ b/src/mds/Server.cc
@@ -3085,20 +3085,28 @@ void Server::handle_client_file_setlock(MDRequestRef& mdr)
dout(0) << "handle_client_file_setlock: " << set_lock << dendl;
ceph_lock_state_t *lock_state = NULL;
+ bool interrupt = false;
// get the appropriate lock state
switch (req->head.args.filelock_change.rule) {
+ case CEPH_LOCK_FLOCK_INTR:
+ interrupt = true;
+ // fall-thru
case CEPH_LOCK_FLOCK:
lock_state = &cur->flock_locks;
break;
+ case CEPH_LOCK_FCNTL_INTR:
+ interrupt = true;
+ // fall-thru
case CEPH_LOCK_FCNTL:
lock_state = &cur->fcntl_locks;
break;
default:
- dout(0) << "got unknown lock type " << set_lock.type
- << ", dropping request!" << dendl;
+ dout(10) << "got unknown lock type " << set_lock.type
+ << ", dropping request!" << dendl;
+ reply_request(mdr, -EOPNOTSUPP);
return;
}
@@ -3109,16 +3117,15 @@ void Server::handle_client_file_setlock(MDRequestRef& mdr)
if (lock_state->is_waiting(set_lock)) {
dout(10) << " unlock removing waiting lock " << set_lock << dendl;
lock_state->remove_waiting(set_lock);
- } else {
+ cur->take_waiting(CInode::WAIT_FLOCK, waiters);
+ } else if (!interrupt) {
dout(10) << " unlock attempt on " << set_lock << dendl;
lock_state->remove_lock(set_lock, activated_locks);
cur->take_waiting(CInode::WAIT_FLOCK, waiters);
}
- reply_request(mdr, 0);
- /* For now we're ignoring the activated locks because their responses
- * will be sent when the lock comes up again in rotation by the MDS.
- * It's a cheap hack, but it's easy to code. */
mds->queue_waiters(waiters);
+
+ reply_request(mdr, 0);
} else {
dout(10) << " lock attempt on " << set_lock << dendl;
if (mdr->more()->flock_was_waiting &&
diff --git a/src/mds/flock.cc b/src/mds/flock.cc
index 4e825c9..2849c1c 100644
--- a/src/mds/flock.cc
+++ b/src/mds/flock.cc
@@ -44,33 +44,33 @@ void ceph_lock_state_t::remove_waiting(ceph_filelock& fl)
bool ceph_lock_state_t::add_lock(ceph_filelock& new_lock,
bool wait_on_fail, bool replay)
{
- dout(15) << "add_lock " << new_lock << dendl;
+ ldout(cct,15) << "add_lock " << new_lock << dendl;
bool ret = false;
list<multimap<uint64_t, ceph_filelock>::iterator>
overlapping_locks, self_overlapping_locks, neighbor_locks;
// first, get any overlapping locks and split them into owned-by-us and not
if (get_overlapping_locks(new_lock, overlapping_locks, &neighbor_locks)) {
- dout(15) << "got overlapping lock, splitting by owner" << dendl;
+ ldout(cct,15) << "got overlapping lock, splitting by owner" << dendl;
split_by_owner(new_lock, overlapping_locks, self_overlapping_locks);
}
if (!overlapping_locks.empty()) { //overlapping locks owned by others :(
if (CEPH_LOCK_EXCL == new_lock.type) {
//can't set, we want an exclusive
- dout(15) << "overlapping lock, and this lock is exclusive, can't set"
+ ldout(cct,15) << "overlapping lock, and this lock is exclusive, can't set"
<< dendl;
if (wait_on_fail && !replay) {
waiting_locks.insert(pair<uint64_t, ceph_filelock>(new_lock.start, new_lock));
}
} else { //shared lock, check for any exclusive locks blocking us
if (contains_exclusive_lock(overlapping_locks)) { //blocked :(
- dout(15) << " blocked by exclusive lock in overlapping_locks" << dendl;
+ ldout(cct,15) << " blocked by exclusive lock in overlapping_locks" << dendl;
if (wait_on_fail && !replay) {
waiting_locks.insert(pair<uint64_t, ceph_filelock>(new_lock.start, new_lock));
}
} else {
//yay, we can insert a shared lock
- dout(15) << "inserting shared lock" << dendl;
+ ldout(cct,15) << "inserting shared lock" << dendl;
remove_waiting(new_lock);
adjust_locks(self_overlapping_locks, new_lock, neighbor_locks);
held_locks.insert(pair<uint64_t, ceph_filelock>(new_lock.start, new_lock));
@@ -80,7 +80,7 @@ bool ceph_lock_state_t::add_lock(ceph_filelock& new_lock,
} else { //no overlapping locks except our own
remove_waiting(new_lock);
adjust_locks(self_overlapping_locks, new_lock, neighbor_locks);
- dout(15) << "no conflicts, inserting " << new_lock << dendl;
+ ldout(cct,15) << "no conflicts, inserting " << new_lock << dendl;
held_locks.insert(pair<uint64_t, ceph_filelock>
(new_lock.start, new_lock));
ret = true;
@@ -123,9 +123,9 @@ void ceph_lock_state_t::remove_lock(ceph_filelock removal_lock,
list<multimap<uint64_t, ceph_filelock>::iterator> overlapping_locks,
self_overlapping_locks;
if (get_overlapping_locks(removal_lock, overlapping_locks)) {
- dout(15) << "splitting by owner" << dendl;
+ ldout(cct,15) << "splitting by owner" << dendl;
split_by_owner(removal_lock, overlapping_locks, self_overlapping_locks);
- } else dout(15) << "attempt to remove lock at " << removal_lock.start
+ } else ldout(cct,15) << "attempt to remove lock at " << removal_lock.start
<< " but no locks there!" << dendl;
bool remove_to_end = (0 == removal_lock.length);
uint64_t removal_start = removal_lock.start;
@@ -134,13 +134,13 @@ void ceph_lock_state_t::remove_lock(ceph_filelock removal_lock,
__s64 old_lock_client = 0;
ceph_filelock *old_lock;
- dout(15) << "examining " << self_overlapping_locks.size()
+ ldout(cct,15) << "examining " << self_overlapping_locks.size()
<< " self-overlapping locks for removal" << dendl;
for (list<multimap<uint64_t, ceph_filelock>::iterator>::iterator
iter = self_overlapping_locks.begin();
iter != self_overlapping_locks.end();
++iter) {
- dout(15) << "self overlapping lock " << (*iter)->second << dendl;
+ ldout(cct,15) << "self overlapping lock " << (*iter)->second << dendl;
old_lock = &(*iter)->second;
bool old_lock_to_end = (0 == old_lock->length);
old_lock_end = old_lock->start + old_lock->length - 1;
@@ -149,7 +149,7 @@ void ceph_lock_state_t::remove_lock(ceph_filelock removal_lock,
if (old_lock->start < removal_start) {
old_lock->length = removal_start - old_lock->start;
} else {
- dout(15) << "erasing " << (*iter)->second << dendl;
+ ldout(cct,15) << "erasing " << (*iter)->second << dendl;
held_locks.erase(*iter);
--client_held_lock_counts[old_lock_client];
}
@@ -160,7 +160,7 @@ void ceph_lock_state_t::remove_lock(ceph_filelock removal_lock,
(append_lock.start, append_lock));
++client_held_lock_counts[(client_t)old_lock->client];
if (old_lock->start >= removal_start) {
- dout(15) << "erasing " << (*iter)->second << dendl;
+ ldout(cct,15) << "erasing " << (*iter)->second << dendl;
held_locks.erase(*iter);
--client_held_lock_counts[old_lock_client];
} else old_lock->length = removal_start - old_lock->start;
@@ -176,7 +176,7 @@ void ceph_lock_state_t::remove_lock(ceph_filelock removal_lock,
if (old_lock->start < removal_start) {
old_lock->length = removal_start - old_lock->start;
} else {
- dout(15) << "erasing " << (*iter)->second << dendl;
+ ldout(cct,15) << "erasing " << (*iter)->second << dendl;
held_locks.erase(*iter);
--client_held_lock_counts[old_lock_client];
}
@@ -207,7 +207,7 @@ void ceph_lock_state_t::adjust_locks(list<multimap<uint64_t, ceph_filelock>::ite
list<multimap<uint64_t, ceph_filelock>::iterator>
neighbor_locks)
{
- dout(15) << "adjust_locks" << dendl;
+ ldout(cct,15) << "adjust_locks" << dendl;
bool new_lock_to_end = (0 == new_lock.length);
uint64_t new_lock_start = new_lock.start;
uint64_t new_lock_end = new_lock.start + new_lock.length - 1;
@@ -219,7 +219,7 @@ void ceph_lock_state_t::adjust_locks(list<multimap<uint64_t, ceph_filelock>::ite
iter != old_locks.end();
++iter) {
old_lock = &(*iter)->second;
- dout(15) << "adjusting lock: " << *old_lock << dendl;
+ ldout(cct,15) << "adjusting lock: " << *old_lock << dendl;
bool old_lock_to_end = (0 == old_lock->length);
old_lock_start = old_lock->start;
old_lock_end = old_lock->start + old_lock->length - 1;
@@ -228,17 +228,17 @@ void ceph_lock_state_t::adjust_locks(list<multimap<uint64_t, ceph_filelock>::ite
old_lock_client = old_lock->client;
if (new_lock_to_end || old_lock_to_end) {
//special code path to deal with a length set at 0
- dout(15) << "one lock extends forever" << dendl;
+ ldout(cct,15) << "one lock extends forever" << dendl;
if (old_lock->type == new_lock.type) {
//just unify them in new lock, remove old lock
- dout(15) << "same lock type, unifying" << dendl;
+ ldout(cct,15) << "same lock type, unifying" << dendl;
new_lock.start = (new_lock_start < old_lock_start) ? new_lock_start :
old_lock_start;
new_lock.length = 0;
held_locks.erase(*iter);
--client_held_lock_counts[old_lock_client];
} else { //not same type, have to keep any remains of old lock around
- dout(15) << "shrinking old lock" << dendl;
+ ldout(cct,15) << "shrinking old lock" << dendl;
if (new_lock_to_end) {
if (old_lock_start < new_lock_start) {
old_lock->length = new_lock_start - old_lock_start;
@@ -262,17 +262,17 @@ void ceph_lock_state_t::adjust_locks(list<multimap<uint64_t, ceph_filelock>::ite
}
} else {
if (old_lock->type == new_lock.type) { //just merge them!
- dout(15) << "merging locks, they're the same type" << dendl;
+ ldout(cct,15) << "merging locks, they're the same type" << dendl;
new_lock.start = (old_lock_start < new_lock_start ) ? old_lock_start :
new_lock_start;
int new_end = (new_lock_end > old_lock_end) ? new_lock_end :
old_lock_end;
new_lock.length = new_end - new_lock.start + 1;
- dout(15) << "erasing lock " << (*iter)->second << dendl;
+ ldout(cct,15) << "erasing lock " << (*iter)->second << dendl;
held_locks.erase(*iter);
--client_held_lock_counts[old_lock_client];
} else { //we'll have to update sizes and maybe make new locks
- dout(15) << "locks aren't same type, changing sizes" << dendl;
+ ldout(cct,15) << "locks aren't same type, changing sizes" << dendl;
if (old_lock_end > new_lock_end) { //add extra lock after new_lock
ceph_filelock appended_lock = *old_lock;
appended_lock.start = new_lock_end + 1;
@@ -302,7 +302,7 @@ void ceph_lock_state_t::adjust_locks(list<multimap<uint64_t, ceph_filelock>::ite
++iter) {
old_lock = &(*iter)->second;
old_lock_client = old_lock->client;
- dout(15) << "lock to coalesce: " << *old_lock << dendl;
+ ldout(cct,15) << "lock to coalesce: " << *old_lock << dendl;
/* because if it's a neighboring lock there can't be any self-overlapping
locks that covered it */
if (old_lock->type == new_lock.type) { //merge them
@@ -354,8 +354,8 @@ ceph_lock_state_t::get_lower_bound(uint64_t start,
&& (start != 0)
&& (lower_bound != lock_map.begin())) --lower_bound;
if (lock_map.end() == lower_bound)
- dout(15) << "get_lower_dout(15)eturning end()" << dendl;
- else dout(15) << "get_lower_bound returning iterator pointing to "
+ ldout(cct,15) << "get_lower_dout(15)eturning end()" << dendl;
+ else ldout(cct,15) << "get_lower_bound returning iterator pointing to "
<< lower_bound->second << dendl;
return lower_bound;
}
@@ -368,8 +368,8 @@ ceph_lock_state_t::get_last_before(uint64_t end,
lock_map.upper_bound(end);
if (last != lock_map.begin()) --last;
if (lock_map.end() == last)
- dout(15) << "get_last_before returning end()" << dendl;
- else dout(15) << "get_last_before returning iterator pointing to "
+ ldout(cct,15) << "get_last_before returning end()" << dendl;
+ else ldout(cct,15) << "get_last_before returning iterator pointing to "
<< last->second << dendl;
return last;
}
@@ -382,7 +382,7 @@ bool ceph_lock_state_t::share_space(
((iter->first < start) &&
(((iter->first + iter->second.length - 1) >= start) ||
(0 == iter->second.length))));
- dout(15) << "share_space got start: " << start << ", end: " << end
+ ldout(cct,15) << "share_space got start: " << start << ", end: " << end
<< ", lock: " << iter->second << ", returning " << ret << dendl;
return ret;
}
@@ -393,7 +393,7 @@ bool ceph_lock_state_t::get_overlapping_locks(ceph_filelock& lock,
list<multimap<uint64_t,
ceph_filelock>::iterator> *self_neighbors)
{
- dout(15) << "get_overlapping_locks" << dendl;
+ ldout(cct,15) << "get_overlapping_locks" << dendl;
// create a lock starting one earlier and ending one later
// to check for neighbors
ceph_filelock neighbor_check_lock = lock;
@@ -419,8 +419,7 @@ bool ceph_lock_state_t::get_overlapping_locks(ceph_filelock& lock,
if (share_space(iter, lock)) {
overlaps.push_front(iter);
} else if (self_neighbors &&
- (neighbor_check_lock.client == iter->second.client) &&
- (neighbor_check_lock.pid == iter->second.pid) &&
+ ceph_filelock_owner_equal(neighbor_check_lock, iter->second) &&
share_space(iter, neighbor_check_lock)) {
self_neighbors->push_front(iter);
}
@@ -438,7 +437,7 @@ bool ceph_lock_state_t::get_waiting_overlaps(ceph_filelock& lock,
ceph_filelock>::iterator>&
overlaps)
{
- dout(15) << "get_waiting_overlaps" << dendl;
+ ldout(cct,15) << "get_waiting_overlaps" << dendl;
multimap<uint64_t, ceph_filelock>::iterator iter =
get_last_before(lock.start + lock.length - 1, waiting_locks);
bool cont = iter != waiting_locks.end();
@@ -459,15 +458,15 @@ void ceph_lock_state_t::split_by_owner(ceph_filelock& owner,
{
list<multimap<uint64_t, ceph_filelock>::iterator>::iterator
iter = locks.begin();
- dout(15) << "owner lock: " << owner << dendl;
+ ldout(cct,15) << "owner lock: " << owner << dendl;
while (iter != locks.end()) {
- dout(15) << "comparing to " << (*iter)->second << dendl;
+ ldout(cct,15) << "comparing to " << (*iter)->second << dendl;
if (ceph_filelock_owner_equal((*iter)->second, owner)) {
- dout(15) << "success, pushing to owned_locks" << dendl;
+ ldout(cct,15) << "success, pushing to owned_locks" << dendl;
owned_locks.push_back(*iter);
iter = locks.erase(iter);
} else {
- dout(15) << "failure, something not equal in this group "
+ ldout(cct,15) << "failure, something not equal in this group "
<< (*iter)->second.client << ":" << owner.client << ","
<< (*iter)->second.owner << ":" << owner.owner << ","
<< (*iter)->second.pid << ":" << owner.pid << dendl;
diff --git a/src/mds/flock.h b/src/mds/flock.h
index 4791b85..bf3980d 100644
--- a/src/mds/flock.h
+++ b/src/mds/flock.h
@@ -37,7 +37,9 @@ inline bool operator==(ceph_filelock& l, ceph_filelock& r) {
}
class ceph_lock_state_t {
+ CephContext *cct;
public:
+ ceph_lock_state_t(CephContext *cct_) : cct(cct_) {}
multimap<uint64_t, ceph_filelock> held_locks; // current locks
multimap<uint64_t, ceph_filelock> waiting_locks; // locks waiting for other locks
// both of the above are keyed by starting offset
diff --git a/src/mds/mdstypes.h b/src/mds/mdstypes.h
index 184cf70..73f1ba2 100644
--- a/src/mds/mdstypes.h
+++ b/src/mds/mdstypes.h
@@ -703,7 +703,8 @@ struct cap_reconnect_t {
cap_reconnect_t() {
memset(&capinfo, 0, sizeof(capinfo));
}
- cap_reconnect_t(uint64_t cap_id, inodeno_t pino, const string& p, int w, int i, inodeno_t sr) :
+ cap_reconnect_t(uint64_t cap_id, inodeno_t pino, const string& p, int w, int i,
+ inodeno_t sr, bufferlist& lb) :
path(p) {
capinfo.cap_id = cap_id;
capinfo.wanted = w;
@@ -711,6 +712,7 @@ struct cap_reconnect_t {
capinfo.snaprealm = sr;
capinfo.pathbase = pino;
capinfo.flock_len = 0;
+ flockbl.claim(lb);
}
void encode(bufferlist& bl) const;
void decode(bufferlist::iterator& bl);
diff --git a/src/messages/MClientReconnect.h b/src/messages/MClientReconnect.h
index 4e2839c..1b072a3 100644
--- a/src/messages/MClientReconnect.h
+++ b/src/messages/MClientReconnect.h
@@ -40,9 +40,9 @@ public:
}
void add_cap(inodeno_t ino, uint64_t cap_id, inodeno_t pathbase, const string& path,
- int wanted, int issued,
- inodeno_t sr) {
- caps[ino] = cap_reconnect_t(cap_id, pathbase, path, wanted, issued, sr);
+ int wanted, int issued, inodeno_t sr, bufferlist& lb)
+ {
+ caps[ino] = cap_reconnect_t(cap_id, pathbase, path, wanted, issued, sr, lb);
}
void add_snaprealm(inodeno_t ino, snapid_t seq, inodeno_t parent) {
ceph_mds_snaprealm_reconnect r;
diff --git a/src/mon/DataHealthService.cc b/src/mon/DataHealthService.cc
index 6c6ed29..a2bbb1f 100644
--- a/src/mon/DataHealthService.cc
+++ b/src/mon/DataHealthService.cc
@@ -86,10 +86,10 @@ void DataHealthService::get_health(
health_status_t health_status = HEALTH_OK;
string health_detail;
- if (stats.latest_avail_percent <= g_conf->mon_data_avail_crit) {
+ if (stats.fs_stats.avail_percent <= g_conf->mon_data_avail_crit) {
health_status = HEALTH_ERR;
health_detail = "low disk space, shutdown imminent";
- } else if (stats.latest_avail_percent <= g_conf->mon_data_avail_warn) {
+ } else if (stats.fs_stats.avail_percent <= g_conf->mon_data_avail_warn) {
health_status = HEALTH_WARN;
health_detail = "low disk space";
}
@@ -110,7 +110,7 @@ void DataHealthService::get_health(
stringstream ss;
ss << "mon." << mon_name << " " << health_detail;
summary.push_back(make_pair(health_status, ss.str()));
- ss << " -- " << stats.latest_avail_percent << "% avail";
+ ss << " -- " << stats.fs_stats.avail_percent << "% avail";
if (detail)
detail->push_back(make_pair(health_status, ss.str()));
}
@@ -151,23 +151,18 @@ int DataHealthService::update_store_stats(DataStats &ours)
int DataHealthService::update_stats()
{
- struct statfs stbuf;
- int err = ::statfs(g_conf->mon_data.c_str(), &stbuf);
- if (err < 0) {
- derr << __func__ << " statfs error: " << cpp_strerror(errno) << dendl;
- return -errno;
- }
-
entity_inst_t our_inst = mon->messenger->get_myinst();
DataStats& ours = stats[our_inst];
- ours.kb_total = stbuf.f_blocks * stbuf.f_bsize / 1024;
- ours.kb_used = (stbuf.f_blocks - stbuf.f_bfree) * stbuf.f_bsize / 1024;
- ours.kb_avail = stbuf.f_bavail * stbuf.f_bsize / 1024;
- ours.latest_avail_percent = (((float)ours.kb_avail/ours.kb_total)*100);
- dout(0) << __func__ << " avail " << ours.latest_avail_percent << "%"
- << " total " << ours.kb_total << " used " << ours.kb_used << " avail " << ours.kb_avail
- << dendl;
+ int err = get_fs_stats(ours.fs_stats, g_conf->mon_data.c_str());
+ if (err < 0) {
+ derr << __func__ << " get_fs_stats error: " << cpp_strerror(err) << dendl;
+ return err;
+ }
+ dout(0) << __func__ << " avail " << ours.fs_stats.avail_percent << "%"
+ << " total " << prettybyte_t(ours.fs_stats.byte_total)
+ << ", used " << prettybyte_t(ours.fs_stats.byte_used)
+ << ", avail " << prettybyte_t(ours.fs_stats.byte_avail) << dendl;
ours.last_update = ceph_clock_now(g_ceph_context);
return update_store_stats(ours);
@@ -213,7 +208,7 @@ void DataHealthService::service_tick()
DataStats &ours = stats[mon->messenger->get_myinst()];
- if (ours.latest_avail_percent <= g_conf->mon_data_avail_crit) {
+ if (ours.fs_stats.avail_percent <= g_conf->mon_data_avail_crit) {
derr << "reached critical levels of available space on local monitor storage"
<< " -- shutdown!" << dendl;
force_shutdown();
@@ -224,12 +219,12 @@ void DataHealthService::service_tick()
// consumed in-between reports to assess if it's worth to log this info,
// otherwise we may very well contribute to the consumption of the
// already low available disk space.
- if (ours.latest_avail_percent <= g_conf->mon_data_avail_warn) {
- if (ours.latest_avail_percent != last_warned_percent)
+ if (ours.fs_stats.avail_percent <= g_conf->mon_data_avail_warn) {
+ if (ours.fs_stats.avail_percent != last_warned_percent)
mon->clog.warn()
<< "reached concerning levels of available space on local monitor storage"
- << " (" << ours.latest_avail_percent << "% free)\n";
- last_warned_percent = ours.latest_avail_percent;
+ << " (" << ours.fs_stats.avail_percent << "% free)\n";
+ last_warned_percent = ours.fs_stats.avail_percent;
} else {
last_warned_percent = 0;
}
diff --git a/src/mon/MonCommands.h b/src/mon/MonCommands.h
index bd9dd2e..461b3f2 100644
--- a/src/mon/MonCommands.h
+++ b/src/mon/MonCommands.h
@@ -425,6 +425,9 @@ COMMAND("osd crush unlink " \
"name=ancestor,type=CephString,req=false,goodchars=[A-Za-z0-9-_.]", \
"unlink <name> from crush map (everywhere, or just at <ancestor>)", \
"osd", "rw", "cli,rest")
+COMMAND("osd crush reweight-all",
+ "recalculate the weights for the tree to ensure they sum correctly",
+ "osd", "rw", "cli,rest")
COMMAND("osd crush reweight " \
"name=name,type=CephString,goodchars=[A-Za-z0-9-_.] " \
"name=weight,type=CephFloat,range=0.0", \
@@ -433,6 +436,15 @@ COMMAND("osd crush reweight " \
COMMAND("osd crush tunables " \
"name=profile,type=CephChoices,strings=legacy|argonaut|bobtail|firefly|optimal|default", \
"set crush tunables values to <profile>", "osd", "rw", "cli,rest")
+COMMAND("osd crush set-tunable " \
+ "name=tunable,type=CephChoices,strings=straw_calc_version " \
+ "name=value,type=CephInt",
+ "set crush tunable <tunable> to <value>",
+ "osd", "rw", "cli,rest")
+COMMAND("osd crush get-tunable " \
+ "name=tunable,type=CephChoices,strings=straw_calc_version",
+ "get crush tunable <tunable>",
+ "osd", "rw", "cli,rest")
COMMAND("osd crush show-tunables", \
"show current crush tunables", "osd", "r", "cli,rest")
COMMAND("osd crush rule create-simple " \
diff --git a/src/mon/Monitor.cc b/src/mon/Monitor.cc
index fd3a358..ad35e5e 100644
--- a/src/mon/Monitor.cc
+++ b/src/mon/Monitor.cc
@@ -428,6 +428,8 @@ int Monitor::preinit()
cluster_logger = pcb.create_perf_counters();
}
+ paxos->init_logger();
+
// verify cluster_uuid
{
int r = check_fsid();
@@ -1406,6 +1408,8 @@ void Monitor::handle_probe(MMonProbe *m)
*/
void Monitor::handle_probe_probe(MMonProbe *m)
{
+ MMonProbe *r;
+
dout(10) << "handle_probe_probe " << m->get_source_inst() << *m
<< " features " << m->get_connection()->get_features() << dendl;
uint64_t missing = required_features & ~m->get_connection()->get_features();
@@ -1418,12 +1422,26 @@ void Monitor::handle_probe_probe(MMonProbe *m)
m->required_features = required_features;
messenger->send_message(r, m->get_connection());
}
- m->put();
- return;
+ goto out;
+ }
+
+ if (!is_probing() && !is_synchronizing()) {
+ // If the probing mon is way ahead of us, we need to re-bootstrap.
+ // Normally we capture this case when we initially bootstrap, but
+ // it is possible we pass those checks (we overlap with
+ // quorum-to-be) but fail to join a quorum before it moves past
+ // us. We need to be kicked back to bootstrap so we can
+ // synchonize, not keep calling elections.
+ if (paxos->get_version() + 1 < m->paxos_first_version) {
+ dout(1) << " peer " << m->get_source_addr() << " has first_committed "
+ << "ahead of us, re-bootstrapping" << dendl;
+ bootstrap();
+ goto out;
+
+ }
}
- MMonProbe *r = new MMonProbe(monmap->fsid, MMonProbe::OP_REPLY,
- name, has_ever_joined);
+ r = new MMonProbe(monmap->fsid, MMonProbe::OP_REPLY, name, has_ever_joined);
r->name = name;
r->quorum = quorum;
monmap->encode(r->monmap_bl, m->get_connection()->get_features());
@@ -1438,6 +1456,7 @@ void Monitor::handle_probe_probe(MMonProbe *m)
extra_probe_peers.insert(m->get_source_addr());
}
+ out:
m->put();
}
diff --git a/src/mon/MonitorDBStore.h b/src/mon/MonitorDBStore.h
index 88c4f93..1576db7 100644
--- a/src/mon/MonitorDBStore.h
+++ b/src/mon/MonitorDBStore.h
@@ -87,6 +87,9 @@ class MonitorDBStore
struct Transaction {
list<Op> ops;
+ uint64_t bytes, keys;
+
+ Transaction() : bytes(0), keys(0) {}
enum {
OP_PUT = 1,
@@ -96,6 +99,8 @@ class MonitorDBStore
void put(string prefix, string key, bufferlist& bl) {
ops.push_back(Op(OP_PUT, prefix, key, bl));
+ ++keys;
+ bytes += prefix.length() + key.length() + bl.length();
}
void put(string prefix, version_t ver, bufferlist& bl) {
@@ -112,6 +117,8 @@ class MonitorDBStore
void erase(string prefix, string key) {
ops.push_back(Op(OP_ERASE, prefix, key));
+ ++keys;
+ bytes += prefix.length() + key.length();
}
void erase(string prefix, version_t ver) {
@@ -129,14 +136,20 @@ class MonitorDBStore
}
void encode(bufferlist& bl) const {
- ENCODE_START(1, 1, bl);
+ ENCODE_START(2, 1, bl);
::encode(ops, bl);
+ ::encode(bytes, bl);
+ ::encode(keys, bl);
ENCODE_FINISH(bl);
}
void decode(bufferlist::iterator& bl) {
- DECODE_START(1, bl);
+ DECODE_START(2, bl);
::decode(ops, bl);
+ if (struct_v >= 2) {
+ ::decode(bytes, bl);
+ ::decode(keys, bl);
+ }
DECODE_FINISH(bl);
}
@@ -153,6 +166,8 @@ class MonitorDBStore
void append(Transaction& other) {
ops.splice(ops.end(), other.ops);
+ keys += other.keys;
+ bytes += other.bytes;
}
void append_from_encoded(bufferlist& bl) {
@@ -169,6 +184,12 @@ class MonitorDBStore
bool size() {
return ops.size();
}
+ uint64_t get_keys() const {
+ return keys;
+ }
+ uint64_t get_bytes() const {
+ return bytes;
+ }
void dump(ceph::Formatter *f, bool dump_val=false) const {
f->open_object_section("transaction");
@@ -218,6 +239,8 @@ class MonitorDBStore
f->close_section();
}
f->close_section();
+ f->dump_unsigned("num_keys", keys);
+ f->dump_unsigned("num_bytes", bytes);
f->close_section();
}
};
diff --git a/src/mon/OSDMonitor.cc b/src/mon/OSDMonitor.cc
index 7e469b2..da06b86 100644
--- a/src/mon/OSDMonitor.cc
+++ b/src/mon/OSDMonitor.cc
@@ -1211,7 +1211,7 @@ bool OSDMonitor::preprocess_boot(MOSDBoot *m)
osdmap.get_info(from).up_from > m->version) {
dout(7) << "prepare_boot msg from before last up_from, ignoring" << dendl;
send_latest(m, m->sb.current_epoch+1);
- goto ignore;
+ return true;
}
// noup?
@@ -2465,6 +2465,31 @@ bool OSDMonitor::preprocess_command(MMonCommand *m)
}
ss << "listed " << osdmap.blacklist.size() << " entries";
+ } else if (prefix == "osd crush get-tunable") {
+ string tunable;
+ cmd_getval(g_ceph_context, cmdmap, "tunable", tunable);
+ int value;
+ cmd_getval(g_ceph_context, cmdmap, "value", value);
+ ostringstream rss;
+ if (f)
+ f->open_object_section("tunable");
+ if (tunable == "straw_calc_version") {
+ if (f)
+ f->dump_int(tunable.c_str(), osdmap.crush->get_straw_calc_version());
+ else
+ rss << osdmap.crush->get_straw_calc_version() << "\n";
+ } else {
+ r = -EINVAL;
+ goto reply;
+ }
+ if (f) {
+ f->close_section();
+ f->flush(rdata);
+ } else {
+ rdata.append(rss.str());
+ }
+ r = 0;
+
} else if (prefix == "osd pool get") {
string poolstr;
cmd_getval(g_ceph_context, cmdmap, "pool", poolstr);
@@ -3279,11 +3304,38 @@ int OSDMonitor::prepare_pool_crush_ruleset(const unsigned pool_type,
if (*crush_ruleset < 0) {
switch (pool_type) {
case pg_pool_t::TYPE_REPLICATED:
- *crush_ruleset = osdmap.crush->get_osd_pool_default_crush_replicated_ruleset(g_ceph_context);
- if (*crush_ruleset < 0) {
- // Errors may happen e.g. if no valid ruleset is available
- ss << "No suitable CRUSH ruleset exists";
- return *crush_ruleset;
+ {
+ if (ruleset_name == "") {
+ //Use default ruleset
+ *crush_ruleset = osdmap.crush->get_osd_pool_default_crush_replicated_ruleset(g_ceph_context);
+ if (*crush_ruleset < 0) {
+ // Errors may happen e.g. if no valid ruleset is available
+ ss << "No suitable CRUSH ruleset exists";
+ return *crush_ruleset;
+ }
+ } else {
+ int ret;
+ ret = osdmap.crush->get_rule_id(ruleset_name);
+ if (ret != -ENOENT) {
+ // found it, use it
+ *crush_ruleset = ret;
+ } else {
+ CrushWrapper newcrush;
+ _get_pending_crush(newcrush);
+
+ ret = newcrush.get_rule_id(ruleset_name);
+ if (ret != -ENOENT) {
+ // found it, wait for it to be proposed
+ dout(20) << "prepare_pool_crush_ruleset: ruleset "
+ << ruleset_name << " is pending, try again" << dendl;
+ return -EAGAIN;
+ } else {
+ //Cannot find it , return error
+ ss << "Specified ruleset " << ruleset_name << " doesn't exist";
+ return ret;
+ }
+ }
+ }
}
break;
case pg_pool_t::TYPE_ERASURE:
@@ -4115,6 +4167,19 @@ bool OSDMonitor::prepare_command_impl(MMonCommand *m,
}
} while (false);
+ } else if (prefix == "osd crush reweight-all") {
+ // osd crush reweight <name> <weight>
+ CrushWrapper newcrush;
+ _get_pending_crush(newcrush);
+
+ newcrush.reweight(g_ceph_context);
+ pending_inc.crush.clear();
+ newcrush.encode(pending_inc.crush);
+ ss << "reweighted crush hierarchy";
+ getline(ss, rs);
+ wait_for_finished_proposal(new Monitor::C_Command(mon, m, 0, rs,
+ get_last_committed() + 1));
+ return true;
} else if (prefix == "osd crush reweight") {
do {
// osd crush reweight <name> <weight>
@@ -4190,6 +4255,46 @@ bool OSDMonitor::prepare_command_impl(MMonCommand *m,
wait_for_finished_proposal(new Monitor::C_Command(mon, m, 0, rs,
get_last_committed() + 1));
return true;
+ } else if (prefix == "osd crush set-tunable") {
+ CrushWrapper newcrush;
+ _get_pending_crush(newcrush);
+
+ err = 0;
+ string tunable;
+ cmd_getval(g_ceph_context, cmdmap, "tunable", tunable);
+
+ int64_t value = -1;
+ if (!cmd_getval(g_ceph_context, cmdmap, "value", value)) {
+ err = -EINVAL;
+ ss << "failed to parse integer value " << cmd_vartype_stringify(cmdmap["value"]);
+ goto reply;
+ }
+
+ if (tunable == "straw_calc_version") {
+ if (value < 0 || value > 2) {
+ ss << "value must be 0 or 1; got " << value;
+ err = -EINVAL;
+ goto reply;
+ }
+ newcrush.set_straw_calc_version(value);
+ } else {
+ ss << "unrecognized tunable '" << tunable << "'";
+ err = -EINVAL;
+ goto reply;
+ }
+
+ if (!validate_crush_against_features(&newcrush, ss)) {
+ err = -EINVAL;
+ goto reply;
+ }
+
+ pending_inc.crush.clear();
+ newcrush.encode(pending_inc.crush);
+ ss << "adjusted tunable " << tunable << " to " << value;
+ getline(ss, rs);
+ wait_for_finished_proposal(new Monitor::C_Command(mon, m, 0, rs,
+ get_last_committed() + 1));
+ return true;
} else if (prefix == "osd crush rule create-simple") {
string name, root, type, mode;
@@ -5004,35 +5109,41 @@ done:
cmd_getval(g_ceph_context, cmdmap, "ruleset", ruleset_name);
string erasure_code_profile;
cmd_getval(g_ceph_context, cmdmap, "erasure_code_profile", erasure_code_profile);
- if (erasure_code_profile == "")
- erasure_code_profile = "default";
- if (erasure_code_profile == "default") {
- if (!osdmap.has_erasure_code_profile(erasure_code_profile)) {
- if (pending_inc.has_erasure_code_profile(erasure_code_profile)) {
- dout(20) << "erasure code profile " << erasure_code_profile << " already pending" << dendl;
- goto wait;
- }
- map<string,string> profile_map;
- err = osdmap.get_erasure_code_profile_default(g_ceph_context,
+ if (pool_type == pg_pool_t::TYPE_ERASURE) {
+ if (erasure_code_profile == "")
+ erasure_code_profile = "default";
+ //handle the erasure code profile
+ if (erasure_code_profile == "default") {
+ if (!osdmap.has_erasure_code_profile(erasure_code_profile)) {
+ if (pending_inc.has_erasure_code_profile(erasure_code_profile)) {
+ dout(20) << "erasure code profile " << erasure_code_profile << " already pending" << dendl;
+ goto wait;
+ }
+
+ map<string,string> profile_map;
+ err = osdmap.get_erasure_code_profile_default(g_ceph_context,
profile_map,
&ss);
- if (err)
- goto reply;
- dout(20) << "erasure code profile " << erasure_code_profile << " set" << dendl;
- pending_inc.set_erasure_code_profile(erasure_code_profile, profile_map);
- goto wait;
+ if (err)
+ goto reply;
+ dout(20) << "erasure code profile " << erasure_code_profile << " set" << dendl;
+ pending_inc.set_erasure_code_profile(erasure_code_profile, profile_map);
+ goto wait;
+ }
}
- }
-
- if (ruleset_name == "") {
- if (erasure_code_profile == "default") {
- ruleset_name = "erasure-code";
- } else {
- dout(1) << "implicitly use ruleset named after the pool: "
+ if (ruleset_name == "") {
+ if (erasure_code_profile == "default") {
+ ruleset_name = "erasure-code";
+ } else {
+ dout(1) << "implicitly use ruleset named after the pool: "
<< poolstr << dendl;
- ruleset_name = poolstr;
+ ruleset_name = poolstr;
+ }
}
+ } else {
+ //NOTE:for replicated pool,cmd_map will put ruleset_name to erasure_code_profile field
+ ruleset_name = erasure_code_profile;
}
err = prepare_new_pool(poolstr, 0, // auid=0 for admin created pool
diff --git a/src/mon/PGMap.cc b/src/mon/PGMap.cc
index 5ec8ee2..59b6a03 100644
--- a/src/mon/PGMap.cc
+++ b/src/mon/PGMap.cc
@@ -379,17 +379,31 @@ void PGMap::update_pg(pg_t pgid, bufferlist& bl)
{
bufferlist::iterator p = bl.begin();
ceph::unordered_map<pg_t,pg_stat_t>::iterator s = pg_stat.find(pgid);
- if (s != pg_stat.end())
+ epoch_t old_lec = 0;
+ if (s != pg_stat.end()) {
+ old_lec = s->second.get_effective_last_epoch_clean();
stat_pg_sub(pgid, s->second);
+ }
pg_stat_t& r = pg_stat[pgid];
::decode(r, p);
stat_pg_add(pgid, r);
+
+ epoch_t lec = r.get_effective_last_epoch_clean();
+ if (min_last_epoch_clean &&
+ (lec < min_last_epoch_clean || // we did
+ (lec > min_last_epoch_clean && // we might
+ old_lec == min_last_epoch_clean)
+ ))
+ min_last_epoch_clean = 0;
}
void PGMap::remove_pg(pg_t pgid)
{
ceph::unordered_map<pg_t,pg_stat_t>::iterator s = pg_stat.find(pgid);
if (s != pg_stat.end()) {
+ if (min_last_epoch_clean &&
+ s->second.get_effective_last_epoch_clean() == min_last_epoch_clean)
+ min_last_epoch_clean = 0;
stat_pg_sub(pgid, s->second);
pg_stat.erase(s);
}
@@ -399,14 +413,33 @@ void PGMap::update_osd(int osd, bufferlist& bl)
{
bufferlist::iterator p = bl.begin();
ceph::unordered_map<int32_t,osd_stat_t>::iterator o = osd_stat.find(osd);
- if (o != osd_stat.end())
+ epoch_t old_lec = 0;
+ if (o != osd_stat.end()) {
+ ceph::unordered_map<int32_t,epoch_t>::iterator i = osd_epochs.find(osd);
+ if (i != osd_epochs.end())
+ old_lec = i->second;
stat_osd_sub(o->second);
+ }
osd_stat_t& r = osd_stat[osd];
::decode(r, p);
stat_osd_add(r);
// adjust [near]full status
register_nearfull_status(osd, r);
+
+ // epoch?
+ if (!p.end()) {
+ epoch_t e;
+ ::decode(e, p);
+
+ if (e < min_last_epoch_clean ||
+ (e > min_last_epoch_clean &&
+ old_lec == min_last_epoch_clean))
+ min_last_epoch_clean = 0;
+ } else {
+ // WARNING: we are not refreshing min_last_epoch_clean! must be old store
+ // or old mon running.
+ }
}
void PGMap::remove_osd(int osd)
diff --git a/src/mon/PGMonitor.cc b/src/mon/PGMonitor.cc
index 15f6746..364ad20 100644
--- a/src/mon/PGMonitor.cc
+++ b/src/mon/PGMonitor.cc
@@ -545,6 +545,7 @@ void PGMonitor::encode_pending(MonitorDBStore::Transaction *t)
::encode(p->first, dirty);
bufferlist bl;
::encode(p->second, bl, features);
+ ::encode(pending_inc.get_osd_epochs().find(p->first)->second, bl);
t->put(prefix, stringify(p->first), bl);
}
for (set<int32_t>::const_iterator p =
diff --git a/src/mon/Paxos.cc b/src/mon/Paxos.cc
index b38b111..2e41eb8 100644
--- a/src/mon/Paxos.cc
+++ b/src/mon/Paxos.cc
@@ -73,6 +73,44 @@ void Paxos::init()
assert(is_consistent());
}
+void Paxos::init_logger()
+{
+ PerfCountersBuilder pcb(g_ceph_context, "paxos", l_paxos_first, l_paxos_last);
+ pcb.add_u64_counter(l_paxos_start_leader, "start_leader");
+ pcb.add_u64_counter(l_paxos_start_peon, "start_peon");
+ pcb.add_u64_counter(l_paxos_restart, "restart");
+ pcb.add_u64_counter(l_paxos_refresh, "refresh");
+ pcb.add_time_avg(l_paxos_refresh_latency, "refresh_latency");
+ pcb.add_u64_counter(l_paxos_begin, "begin");
+ pcb.add_u64_avg(l_paxos_begin_keys, "begin_keys");
+ pcb.add_u64_avg(l_paxos_begin_bytes, "begin_bytes");
+ pcb.add_time_avg(l_paxos_begin_latency, "begin_latency");
+ pcb.add_u64_counter(l_paxos_commit, "commit");
+ pcb.add_u64_avg(l_paxos_commit_keys, "commit_keys");
+ pcb.add_u64_avg(l_paxos_commit_bytes, "commit_bytes");
+ pcb.add_time_avg(l_paxos_commit_latency, "commit_latency");
+ pcb.add_u64_counter(l_paxos_collect, "collect");
+ pcb.add_u64_avg(l_paxos_collect_keys, "collect_keys");
+ pcb.add_u64_avg(l_paxos_collect_bytes, "collect_bytes");
+ pcb.add_time_avg(l_paxos_collect_latency, "collect_latency");
+ pcb.add_u64_counter(l_paxos_collect_uncommitted, "collect_uncommitted");
+ pcb.add_u64_counter(l_paxos_collect_timeout, "collect_timeout");
+ pcb.add_u64_counter(l_paxos_accept_timeout, "accept_timeout");
+ pcb.add_u64_counter(l_paxos_lease_ack_timeout, "lease_ack_timeout");
+ pcb.add_u64_counter(l_paxos_lease_timeout, "lease_timeout");
+ pcb.add_u64_counter(l_paxos_store_state, "store_state");
+ pcb.add_u64_avg(l_paxos_store_state_keys, "store_state_keys");
+ pcb.add_u64_avg(l_paxos_store_state_bytes, "store_state_bytes");
+ pcb.add_time_avg(l_paxos_store_state_latency, "store_state_latency");
+ pcb.add_u64_counter(l_paxos_share_state, "share_state");
+ pcb.add_u64_avg(l_paxos_share_state_keys, "share_state_keys");
+ pcb.add_u64_avg(l_paxos_share_state_bytes, "share_state_bytes");
+ pcb.add_u64_counter(l_paxos_new_pn, "new_pn");
+ pcb.add_time_avg(l_paxos_new_pn_latency, "new_pn_latency");
+ logger = pcb.create_perf_counters();
+ g_ceph_context->get_perfcounters_collection()->add(logger);
+}
+
void Paxos::dump_info(Formatter *f)
{
f->open_object_section("paxos");
@@ -120,6 +158,8 @@ void Paxos::collect(version_t oldpn)
<< " pn " << uncommitted_pn
<< " (" << uncommitted_value.length() << " bytes) from myself"
<< dendl;
+
+ logger->inc(l_paxos_collect_uncommitted);
}
// pick new pn
@@ -193,7 +233,15 @@ void Paxos::handle_collect(MMonPaxos *collect)
f.flush(*_dout);
*_dout << dendl;
+ logger->inc(l_paxos_collect);
+ logger->inc(l_paxos_collect_keys, t.get_keys());
+ logger->inc(l_paxos_collect_bytes, t.get_bytes());
+ utime_t start = ceph_clock_now(NULL);
+
get_store()->apply_transaction(t);
+
+ utime_t end = ceph_clock_now(NULL);
+ logger->tinc(l_paxos_collect_latency, end - start);
} else {
// don't accept!
dout(10) << "NOT accepting pn " << collect->pn << " from " << collect->pn_from
@@ -229,6 +277,8 @@ void Paxos::handle_collect(MMonPaxos *collect)
<< " and crossing our fingers" << dendl;
last->uncommitted_pn = previous_pn;
}
+
+ logger->inc(l_paxos_collect_uncommitted);
}
// send reply
@@ -258,14 +308,19 @@ void Paxos::share_state(MMonPaxos *m, version_t peer_first_committed,
version_t v = peer_last_committed + 1;
// include incrementals
+ uint64_t bytes = 0;
for ( ; v <= last_committed; v++) {
if (get_store()->exists(get_name(), v)) {
get_store()->get(get_name(), v, m->values[v]);
assert(m->values[v].length());
dout(10) << " sharing " << v << " ("
<< m->values[v].length() << " bytes)" << dendl;
+ bytes += m->values[v].length() + 16; // paxos_ + 10 digits = 16
}
}
+ logger->inc(l_paxos_share_state);
+ logger->inc(l_paxos_share_state_keys, m->values.size());
+ logger->inc(l_paxos_share_state_bytes, bytes);
m->last_committed = last_committed;
}
@@ -318,6 +373,7 @@ bool Paxos::store_state(MMonPaxos *m)
dout(10) << "store_state [" << start->first << ".."
<< last_committed << "]" << dendl;
t.put(get_name(), "last_committed", last_committed);
+
// we should apply the state here -- decode every single bufferlist in the
// map and append the transactions to 't'.
map<version_t,bufferlist>::iterator it;
@@ -345,8 +401,16 @@ bool Paxos::store_state(MMonPaxos *m)
f.flush(*_dout);
*_dout << dendl;
+ logger->inc(l_paxos_store_state);
+ logger->inc(l_paxos_store_state_bytes, t.get_bytes());
+ logger->inc(l_paxos_store_state_keys, t.get_keys());
+ utime_t start = ceph_clock_now(NULL);
+
get_store()->apply_transaction(t);
+ utime_t end = ceph_clock_now(NULL);
+ logger->tinc(l_paxos_store_state_latency, end - start);
+
// refresh first_committed; this txn may have trimmed.
first_committed = get_store()->get(get_name(), "first_committed");
@@ -385,6 +449,7 @@ void Paxos::_sanity_check_store()
void Paxos::handle_last(MMonPaxos *last)
{
bool need_refresh = false;
+ int from = last->get_source().num();
dout(10) << "handle_last " << *last << dendl;
@@ -396,12 +461,13 @@ void Paxos::handle_last(MMonPaxos *last)
// note peer's first_ and last_committed, in case we learn a new
// commit and need to push it to them.
- peer_first_committed[last->get_source().num()] = last->first_committed;
- peer_last_committed[last->get_source().num()] = last->last_committed;
+ peer_first_committed[from] = last->first_committed;
+ peer_last_committed[from] = last->last_committed;
- if (last->first_committed > last_committed+1) {
+ if (last->first_committed > last_committed + 1) {
dout(5) << __func__
- << " peon's lowest version is too high for our last committed"
+ << " mon." << from
+ << " lowest version is too high for our last committed"
<< " (theirs: " << last->first_committed
<< "; ours: " << last_committed << ") -- bootstrap!" << dendl;
last->put();
@@ -416,6 +482,31 @@ void Paxos::handle_last(MMonPaxos *last)
assert(g_conf->paxos_kill_at != 2);
+ // is everyone contiguous and up to date?
+ for (map<int,version_t>::iterator p = peer_last_committed.begin();
+ p != peer_last_committed.end();
+ ++p) {
+ if (p->second + 1 < first_committed && first_committed > 1) {
+ dout(5) << __func__
+ << " peon " << p->first
+ << " last_committed (" << p->second
+ << ") is too low for our first_committed (" << first_committed
+ << ") -- bootstrap!" << dendl;
+ last->put();
+ mon->bootstrap();
+ return;
+ }
+ if (p->second < last_committed) {
+ // share committed values
+ dout(10) << " sending commit to mon." << p->first << dendl;
+ MMonPaxos *commit = new MMonPaxos(mon->get_epoch(),
+ MMonPaxos::OP_COMMIT,
+ ceph_clock_now(g_ceph_context));
+ share_state(commit, peer_first_committed[p->first], p->second);
+ mon->messenger->send_message(commit, mon->monmap->get_inst(p->first));
+ }
+ }
+
// do they accept your pn?
if (last->pn > accepted_pn) {
// no, try again.
@@ -457,21 +548,6 @@ void Paxos::handle_last(MMonPaxos *last)
// cancel timeout event
mon->timer.cancel_event(collect_timeout_event);
collect_timeout_event = 0;
-
- // share committed values?
- for (map<int,version_t>::iterator p = peer_last_committed.begin();
- p != peer_last_committed.end();
- ++p) {
- if (p->second < last_committed) {
- // share committed values
- dout(10) << " sending commit to mon." << p->first << dendl;
- MMonPaxos *commit = new MMonPaxos(mon->get_epoch(),
- MMonPaxos::OP_COMMIT,
- ceph_clock_now(g_ceph_context));
- share_state(commit, peer_first_committed[p->first], p->second);
- mon->messenger->send_message(commit, mon->monmap->get_inst(p->first));
- }
- }
peer_first_committed.clear();
peer_last_committed.clear();
@@ -513,6 +589,7 @@ void Paxos::collect_timeout()
{
dout(1) << "collect timeout, calling fresh election" << dendl;
collect_timeout_event = 0;
+ logger->inc(l_paxos_collect_timeout);
assert(mon->is_leader());
mon->bootstrap();
}
@@ -534,7 +611,7 @@ void Paxos::begin(bufferlist& v)
// and no value, yet.
assert(new_value.length() == 0);
-
+
// accept it ourselves
accepted.clear();
accepted.insert(mon->rank);
@@ -573,8 +650,16 @@ void Paxos::begin(bufferlist& v)
f.flush(*_dout);
*_dout << dendl;
+ logger->inc(l_paxos_begin);
+ logger->inc(l_paxos_begin_keys, t.get_keys());
+ logger->inc(l_paxos_begin_bytes, t.get_bytes());
+ utime_t start = ceph_clock_now(NULL);
+
get_store()->apply_transaction(t);
+ utime_t end = ceph_clock_now(NULL);
+ logger->tinc(l_paxos_begin_latency, end - start);
+
assert(g_conf->paxos_kill_at != 3);
if (mon->get_quorum().size() == 1) {
@@ -629,6 +714,8 @@ void Paxos::handle_begin(MMonPaxos *begin)
assert(g_conf->paxos_kill_at != 4);
+ logger->inc(l_paxos_begin);
+
// set state.
state = STATE_UPDATING;
lease_expire = utime_t(); // cancel lease
@@ -651,8 +738,14 @@ void Paxos::handle_begin(MMonPaxos *begin)
f.flush(*_dout);
*_dout << dendl;
+ logger->inc(l_paxos_begin_bytes, t.get_bytes());
+ utime_t start = ceph_clock_now(NULL);
+
get_store()->apply_transaction(t);
+ utime_t end = ceph_clock_now(NULL);
+ logger->tinc(l_paxos_begin_latency, end - start);
+
assert(g_conf->paxos_kill_at != 5);
// reply
@@ -733,6 +826,7 @@ void Paxos::accept_timeout()
accept_timeout_event = 0;
assert(mon->is_leader());
assert(is_updating() || is_updating_previous());
+ logger->inc(l_paxos_accept_timeout);
mon->bootstrap();
}
@@ -764,8 +858,16 @@ void Paxos::commit()
f.flush(*_dout);
*_dout << dendl;
+ logger->inc(l_paxos_commit);
+ logger->inc(l_paxos_commit_keys, t.get_keys());
+ logger->inc(l_paxos_commit_bytes, t.get_bytes());
+ utime_t start = ceph_clock_now(NULL);
+
get_store()->apply_transaction(t);
+ utime_t end = ceph_clock_now(NULL);
+ logger->tinc(l_paxos_commit_latency, end - start);
+
assert(g_conf->paxos_kill_at != 8);
// refresh first_committed; this txn may have trimmed.
@@ -802,6 +904,8 @@ void Paxos::handle_commit(MMonPaxos *commit)
{
dout(10) << "handle_commit on " << commit->last_committed << dendl;
+ logger->inc(l_paxos_commit);
+
if (!mon->is_peon()) {
dout(10) << "not a peon, dropping" << dendl;
assert(0);
@@ -883,9 +987,15 @@ bool Paxos::do_refresh()
{
bool need_bootstrap = false;
+ utime_t start = ceph_clock_now(NULL);
+
// make sure we have the latest state loaded up
mon->refresh_from_paxos(&need_bootstrap);
+ utime_t end = ceph_clock_now(NULL);
+ logger->inc(l_paxos_refresh);
+ logger->tinc(l_paxos_refresh_latency, end - start);
+
if (need_bootstrap) {
dout(10) << " doing requested bootstrap" << dendl;
mon->bootstrap();
@@ -1019,7 +1129,7 @@ void Paxos::lease_ack_timeout()
dout(1) << "lease_ack_timeout -- calling new election" << dendl;
assert(mon->is_leader());
assert(is_active());
-
+ logger->inc(l_paxos_lease_ack_timeout);
lease_ack_timeout_event = 0;
mon->bootstrap();
}
@@ -1037,7 +1147,7 @@ void Paxos::lease_timeout()
{
dout(1) << "lease_timeout -- calling new election" << dendl;
assert(mon->is_peon());
-
+ logger->inc(l_paxos_lease_timeout);
lease_timeout_event = 0;
mon->bootstrap();
}
@@ -1112,8 +1222,14 @@ version_t Paxos::get_new_proposal_number(version_t gt)
f.flush(*_dout);
*_dout << dendl;
+ logger->inc(l_paxos_new_pn);
+ utime_t start = ceph_clock_now(NULL);
+
get_store()->apply_transaction(t);
+ utime_t end = ceph_clock_now(NULL);
+ logger->tinc(l_paxos_new_pn_latency, end - start);
+
dout(10) << "get_new_proposal_number = " << last_pn << dendl;
return last_pn;
}
@@ -1150,6 +1266,9 @@ void Paxos::shutdown() {
finish_contexts(g_ceph_context, waiting_for_readable, -ECANCELED);
finish_contexts(g_ceph_context, waiting_for_active, -ECANCELED);
finish_contexts(g_ceph_context, proposals, -ECANCELED);
+ if (logger)
+ g_ceph_context->get_perfcounters_collection()->remove(logger);
+ delete logger;
}
void Paxos::leader_init()
@@ -1159,6 +1278,8 @@ void Paxos::leader_init()
finish_contexts(g_ceph_context, proposals, -EAGAIN);
+ logger->inc(l_paxos_start_leader);
+
if (mon->get_quorum().size() == 1) {
state = STATE_ACTIVE;
return;
@@ -1186,6 +1307,8 @@ void Paxos::peon_init()
finish_contexts(g_ceph_context, waiting_for_writeable, -EAGAIN);
finish_contexts(g_ceph_context, waiting_for_commit, -EAGAIN);
finish_contexts(g_ceph_context, proposals, -EAGAIN);
+
+ logger->inc(l_paxos_start_peon);
}
void Paxos::restart()
@@ -1199,6 +1322,8 @@ void Paxos::restart()
finish_contexts(g_ceph_context, proposals, -EAGAIN);
finish_contexts(g_ceph_context, waiting_for_commit, -EAGAIN);
finish_contexts(g_ceph_context, waiting_for_active, -EAGAIN);
+
+ logger->inc(l_paxos_restart);
}
diff --git a/src/mon/Paxos.h b/src/mon/Paxos.h
index b9e43a1..b1ecedc 100644
--- a/src/mon/Paxos.h
+++ b/src/mon/Paxos.h
@@ -118,6 +118,7 @@ e 12v
#include "include/Context.h"
#include "common/Timer.h"
+#include "common/perf_counters.h"
#include <errno.h>
#include "MonitorDBStore.h"
@@ -126,6 +127,43 @@ class Monitor;
class MMonPaxos;
class Paxos;
+enum {
+ l_paxos_first = 45800,
+ l_paxos_start_leader,
+ l_paxos_start_peon,
+ l_paxos_restart,
+ l_paxos_refresh,
+ l_paxos_refresh_latency,
+ l_paxos_begin,
+ l_paxos_begin_keys,
+ l_paxos_begin_bytes,
+ l_paxos_begin_latency,
+ l_paxos_commit,
+ l_paxos_commit_keys,
+ l_paxos_commit_bytes,
+ l_paxos_commit_latency,
+ l_paxos_collect,
+ l_paxos_collect_keys,
+ l_paxos_collect_bytes,
+ l_paxos_collect_latency,
+ l_paxos_collect_uncommitted,
+ l_paxos_collect_timeout,
+ l_paxos_accept_timeout,
+ l_paxos_lease_ack_timeout,
+ l_paxos_lease_timeout,
+ l_paxos_store_state,
+ l_paxos_store_state_keys,
+ l_paxos_store_state_bytes,
+ l_paxos_store_state_latency,
+ l_paxos_share_state,
+ l_paxos_share_state_keys,
+ l_paxos_share_state_bytes,
+ l_paxos_new_pn,
+ l_paxos_new_pn_latency,
+ l_paxos_last,
+};
+
+
// i am one state machine.
/**
* This libary is based on the Paxos algorithm, but varies in a few key ways:
@@ -147,6 +185,11 @@ class Paxos {
*/
Monitor *mon;
+ /// perf counter for internal instrumentations
+ PerfCounters *logger;
+
+ void init_logger();
+
// my state machine info
const string paxos_name;
@@ -1004,6 +1047,7 @@ public:
*/
Paxos(Monitor *m, const string &name)
: mon(m),
+ logger(NULL),
paxos_name(name),
state(STATE_RECOVERING),
first_committed(0),
diff --git a/src/mon/mon_types.h b/src/mon/mon_types.h
index 0ae1aaf..cc68ffb 100644
--- a/src/mon/mon_types.h
+++ b/src/mon/mon_types.h
@@ -16,6 +16,7 @@
#define CEPH_MON_TYPES_H
#include "include/utime.h"
+#include "include/util.h"
#include "common/Formatter.h"
#define PAXOS_PGMAP 0 // before osd, for pg kick to behave
@@ -89,44 +90,50 @@ WRITE_CLASS_ENCODER(LevelDBStoreStats);
// data stats
struct DataStats {
+ ceph_data_stats_t fs_stats;
// data dir
- uint64_t kb_total;
- uint64_t kb_used;
- uint64_t kb_avail;
- int latest_avail_percent;
utime_t last_update;
-
LevelDBStoreStats store_stats;
void dump(Formatter *f) const {
assert(f != NULL);
- f->dump_int("kb_total", kb_total);
- f->dump_int("kb_used", kb_used);
- f->dump_int("kb_avail", kb_avail);
- f->dump_int("avail_percent", latest_avail_percent);
+ f->dump_int("kb_total", (fs_stats.byte_total/1024));
+ f->dump_int("kb_used", (fs_stats.byte_used/1024));
+ f->dump_int("kb_avail", (fs_stats.byte_avail/1024));
+ f->dump_int("avail_percent", fs_stats.avail_percent);
f->dump_stream("last_updated") << last_update;
-
f->open_object_section("store_stats");
store_stats.dump(f);
f->close_section();
}
void encode(bufferlist &bl) const {
- ENCODE_START(2, 1, bl);
- ::encode(kb_total, bl);
- ::encode(kb_used, bl);
- ::encode(kb_avail, bl);
- ::encode(latest_avail_percent, bl);
+ ENCODE_START(3, 1, bl);
+ ::encode(fs_stats.byte_total, bl);
+ ::encode(fs_stats.byte_used, bl);
+ ::encode(fs_stats.byte_avail, bl);
+ ::encode(fs_stats.avail_percent, bl);
::encode(last_update, bl);
::encode(store_stats, bl);
ENCODE_FINISH(bl);
}
void decode(bufferlist::iterator &p) {
DECODE_START(1, p);
- ::decode(kb_total, p);
- ::decode(kb_used, p);
- ::decode(kb_avail, p);
- ::decode(latest_avail_percent, p);
+ // we moved from having fields in kb to fields in byte
+ if (struct_v > 2) {
+ ::decode(fs_stats.byte_total, p);
+ ::decode(fs_stats.byte_used, p);
+ ::decode(fs_stats.byte_avail, p);
+ } else {
+ uint64_t t;
+ ::decode(t, p);
+ fs_stats.byte_total = t*1024;
+ ::decode(t, p);
+ fs_stats.byte_used = t*1024;
+ ::decode(t, p);
+ fs_stats.byte_avail = t*1024;
+ }
+ ::decode(fs_stats.avail_percent, p);
::decode(last_update, p);
if (struct_v > 1)
::decode(store_stats, p);
diff --git a/src/os/FileJournal.cc b/src/os/FileJournal.cc
index 7eb7927..f3f244a 100644
--- a/src/os/FileJournal.cc
+++ b/src/os/FileJournal.cc
@@ -103,12 +103,14 @@ int FileJournal::_open(bool forwrite, bool create)
goto out_fd;
#ifdef HAVE_LIBAIO
- aio_ctx = 0;
- ret = io_setup(128, &aio_ctx);
- if (ret < 0) {
- ret = errno;
- derr << "FileJournal::_open: unable to setup io_context " << cpp_strerror(ret) << dendl;
- goto out_fd;
+ if (aio) {
+ aio_ctx = 0;
+ ret = io_setup(128, &aio_ctx);
+ if (ret < 0) {
+ ret = errno;
+ derr << "FileJournal::_open: unable to setup io_context " << cpp_strerror(ret) << dendl;
+ goto out_fd;
+ }
}
#endif
@@ -544,6 +546,7 @@ void FileJournal::close()
// close
assert(writeq_empty());
+ assert(!must_write_header);
assert(fd >= 0);
VOID_TEMP_FAILURE_RETRY(::close(fd));
fd = -1;
@@ -564,9 +567,9 @@ int FileJournal::dump(ostream& out)
JSONFormatter f(true);
f.open_array_section("journal");
+ uint64_t seq = 0;
while (1) {
bufferlist bl;
- uint64_t seq = 0;
uint64_t pos = read_pos;
if (!read_entry(bl, seq)) {
dout(3) << "journal_replay: end of journal, done." << dendl;
@@ -604,7 +607,8 @@ void FileJournal::start_writer()
write_stop = false;
write_thread.create();
#ifdef HAVE_LIBAIO
- write_finish_thread.create();
+ if (aio)
+ write_finish_thread.create();
#endif
}
@@ -613,19 +617,25 @@ void FileJournal::stop_writer()
{
Mutex::Locker l(write_lock);
#ifdef HAVE_LIBAIO
- Mutex::Locker q(aio_lock);
+ if (aio)
+ aio_lock.Lock();
#endif
Mutex::Locker p(writeq_lock);
write_stop = true;
writeq_cond.Signal();
#ifdef HAVE_LIBAIO
- aio_cond.Signal();
- write_finish_cond.Signal();
+ if (aio) {
+ aio_cond.Signal();
+ write_finish_cond.Signal();
+ aio_lock.Unlock();
+ }
#endif
}
write_thread.join();
#ifdef HAVE_LIBAIO
- write_finish_thread.join();
+ if (aio) {
+ write_finish_thread.join();
+ }
#endif
}
@@ -649,6 +659,13 @@ int FileJournal::read_header()
buffer::ptr bp = buffer::create_page_aligned(block_size);
bp.zero();
int r = ::pread(fd, bp.c_str(), bp.length(), 0);
+
+ if (r < 0) {
+ int err = errno;
+ dout(0) << "read_header got " << cpp_strerror(err) << dendl;
+ return -err;
+ }
+
bl.push_back(bp);
try {
@@ -660,11 +677,6 @@ int FileJournal::read_header()
return -EINVAL;
}
- if (r < 0) {
- int err = errno;
- dout(0) << "read_header got " << cpp_strerror(err) << dendl;
- return -err;
- }
/*
* Unfortunately we weren't initializing the flags field for new
@@ -794,7 +806,8 @@ int FileJournal::prepare_multi_write(bufferlist& bl, uint64_t& orig_ops, uint64_
}
dout(20) << "prepare_multi_write queue_pos now " << queue_pos << dendl;
- //assert(write_pos + bl.length() == queue_pos);
+ assert((write_pos + bl.length() == queue_pos) ||
+ (write_pos + bl.length() - header.max_size + get_top() == queue_pos));
return 0;
}
@@ -993,22 +1006,32 @@ void FileJournal::do_write(bufferlist& bl)
dout(10) << "do_write wrapping, first bit at " << pos << " len " << first.length()
<< " second bit len " << second.length() << " (orig len " << bl.length() << ")" << dendl;
- if (write_bl(pos, first)) {
- derr << "FileJournal::do_write: write_bl(pos=" << pos
- << ") failed" << dendl;
- ceph_abort();
- }
- assert(pos == get_top());
+ //Save pos to write first piece second
+ off64_t first_pos = pos;
+ off64_t orig_pos;
+ pos = get_top();
+ // header too?
if (hbp.length()) {
// be sneaky: include the header in the second fragment
second.push_front(hbp);
pos = 0; // we included the header
}
+ // Write the second portion first possible with the header, so
+ // do_read_entry() won't even get a valid entry_header_t if there
+ // is a crash between the two writes.
+ orig_pos = pos;
if (write_bl(pos, second)) {
- derr << "FileJournal::do_write: write_bl(pos=" << pos
+ derr << "FileJournal::do_write: write_bl(pos=" << orig_pos
+ << ") failed" << dendl;
+ ceph_abort();
+ }
+ orig_pos = first_pos;
+ if (write_bl(first_pos, first)) {
+ derr << "FileJournal::do_write: write_bl(pos=" << orig_pos
<< ") failed" << dendl;
ceph_abort();
}
+ assert(first_pos == get_top());
} else {
// header too?
if (hbp.length()) {
@@ -1102,7 +1125,7 @@ void FileJournal::write_thread_entry()
while (1) {
{
Mutex::Locker locker(writeq_lock);
- if (writeq.empty()) {
+ if (writeq.empty() && !must_write_header) {
if (write_stop)
break;
dout(20) << "write_thread_entry going to sleep" << dendl;
@@ -1113,7 +1136,9 @@ void FileJournal::write_thread_entry()
}
#ifdef HAVE_LIBAIO
- if (aio) {
+ //We hope write_finish_thread_entry return until the last aios complete
+ //when set write_stop. But it can't. So don't use aio mode when shutdown.
+ if (aio && !write_stop) {
Mutex::Locker locker(aio_lock);
// should we back off to limit aios in flight? try to do this
// adaptively so that we submit larger aios once we have lots of
@@ -1164,7 +1189,7 @@ void FileJournal::write_thread_entry()
}
#ifdef HAVE_LIBAIO
- if (aio)
+ if (aio && !write_stop)
do_aio_write(bl);
else
do_write(bl);
@@ -1353,7 +1378,7 @@ void FileJournal::write_finish_thread_entry()
aio_info *ai = (aio_info *)event[i].obj;
if (event[i].res != ai->len) {
derr << "aio to " << ai->off << "~" << ai->len
- << " got " << cpp_strerror(event[i].res) << dendl;
+ << " wrote " << event[i].res << dendl;
assert(0 == "unexpected aio error");
}
dout(10) << "write_finish_thread_entry aio " << ai->off
@@ -1376,7 +1401,7 @@ void FileJournal::check_aio_completion()
assert(aio_lock.is_locked());
dout(20) << "check_aio_completion" << dendl;
- bool completed_something = false;
+ bool completed_something = false, signal = false;
uint64_t new_journaled_seq = 0;
list<aio_info>::iterator p = aio_queue.begin();
@@ -1390,6 +1415,7 @@ void FileJournal::check_aio_completion()
aio_num--;
aio_bytes -= p->len;
aio_queue.erase(p++);
+ signal = true;
}
if (completed_something) {
@@ -1409,7 +1435,8 @@ void FileJournal::check_aio_completion()
queue_completions_thru(journaled_seq);
}
}
-
+ }
+ if (signal) {
// maybe write queue was waiting for aio count to drop?
aio_cond.Signal();
}
diff --git a/src/osd/ECBackend.cc b/src/osd/ECBackend.cc
index aefbb5e..39e3429 100644
--- a/src/osd/ECBackend.cc
+++ b/src/osd/ECBackend.cc
@@ -1245,10 +1245,18 @@ void ECBackend::submit_transaction(
for (set<hobject_t>::iterator i = need_hinfos.begin();
i != need_hinfos.end();
++i) {
+ ECUtil::HashInfoRef ref = get_hash_info(*i);
+ if (!ref) {
+ derr << __func__ << ": get_hash_info(" << *i << ")"
+ << " returned a null pointer and there is no "
+ << " way to recover from such an error in this "
+ << " context" << dendl;
+ assert(0);
+ }
op->unstable_hash_infos.insert(
make_pair(
*i,
- get_hash_info(*i)));
+ ref));
}
for (vector<pg_log_entry_t>::iterator i = op->log_entries.begin();
@@ -1458,7 +1466,7 @@ ECUtil::HashInfoRef ECBackend::get_hash_info(
::decode(hinfo, bp);
assert(hinfo.get_total_chunk_size() == (unsigned)st.st_size);
} else {
- assert(0 == "missing hash attr");
+ return ECUtil::HashInfoRef();
}
}
ref = unstable_hashinfo_registry.lookup_or_create(hoid, hinfo);
@@ -1754,31 +1762,37 @@ void ECBackend::be_deep_scrub(
break;
}
- ECUtil::HashInfoRef hinfo = get_hash_info(poid);
if (r == -EIO) {
dout(0) << "_scan_list " << poid << " got "
<< r << " on read, read_error" << dendl;
o.read_error = true;
}
- if (hinfo->get_chunk_hash(get_parent()->whoami_shard().shard) != h.digest()) {
- dout(0) << "_scan_list " << poid << " got incorrect hash on read" << dendl;
+ ECUtil::HashInfoRef hinfo = get_hash_info(poid);
+ if (!hinfo) {
+ dout(0) << "_scan_list " << poid << " could not retrieve hash info" << dendl;
o.read_error = true;
- }
+ o.digest_present = false;
+ } else {
+ if (hinfo->get_chunk_hash(get_parent()->whoami_shard().shard) != h.digest()) {
+ dout(0) << "_scan_list " << poid << " got incorrect hash on read" << dendl;
+ o.read_error = true;
+ }
- if (hinfo->get_total_chunk_size() != pos) {
- dout(0) << "_scan_list " << poid << " got incorrect size on read" << dendl;
- o.read_error = true;
- }
+ if (hinfo->get_total_chunk_size() != pos) {
+ dout(0) << "_scan_list " << poid << " got incorrect size on read" << dendl;
+ o.read_error = true;
+ }
- /* We checked above that we match our own stored hash. We cannot
- * send a hash of the actual object, so instead we simply send
- * our locally stored hash of shard 0 on the assumption that if
- * we match our chunk hash and our recollection of the hash for
- * chunk 0 matches that of our peers, there is likely no corruption.
- */
- o.digest = hinfo->get_chunk_hash(0);
- o.digest_present = true;
+ /* We checked above that we match our own stored hash. We cannot
+ * send a hash of the actual object, so instead we simply send
+ * our locally stored hash of shard 0 on the assumption that if
+ * we match our chunk hash and our recollection of the hash for
+ * chunk 0 matches that of our peers, there is likely no corruption.
+ */
+ o.digest = hinfo->get_chunk_hash(0);
+ o.digest_present = true;
+ }
o.omap_digest = 0;
o.omap_digest_present = true;
diff --git a/src/osd/OSD.cc b/src/osd/OSD.cc
index dc67fdd..77ed17a 100644
--- a/src/osd/OSD.cc
+++ b/src/osd/OSD.cc
@@ -3762,6 +3762,53 @@ void OSD::_send_boot()
monc->send_mon_message(mboot);
}
+bool OSD::_lsb_release_set (char *buf, const char *str, map<string,string> *pm, const char *key)
+{
+ if (strncmp (buf, str, strlen (str)) == 0) {
+ char *value;
+
+ if (buf[strlen(buf)-1] == '\n')
+ buf[strlen(buf)-1] = '\0';
+
+ value = buf + strlen (str) + 1;
+ (*pm)[key] = value;
+
+ return true;
+ }
+ return false;
+}
+
+void OSD::_lsb_release_parse (map<string,string> *pm)
+{
+ FILE *fp = NULL;
+ char buf[512];
+
+ fp = popen("lsb_release -idrc", "r");
+ if (!fp) {
+ int ret = -errno;
+ derr << "lsb_release_parse - failed to call lsb_release binary with error: " << cpp_strerror(ret) << dendl;
+ return;
+ }
+
+ while (fgets(buf, sizeof(buf) - 1, fp) != NULL) {
+ if (_lsb_release_set(buf, "Distributor ID:", pm, "distro"))
+ continue;
+ if (_lsb_release_set(buf, "Description:", pm, "distro_description"))
+ continue;
+ if (_lsb_release_set(buf, "Release:", pm, "distro_version"))
+ continue;
+ if (_lsb_release_set(buf, "Codename:", pm, "distro_codename"))
+ continue;
+
+ derr << "unhandled output: " << buf << dendl;
+ }
+
+ if (pclose(fp)) {
+ int ret = -errno;
+ derr << "lsb_release_parse - pclose failed: " << cpp_strerror(ret) << dendl;
+ }
+}
+
void OSD::_collect_metadata(map<string,string> *pm)
{
(*pm)["ceph_version"] = pretty_version_to_str();
@@ -3831,34 +3878,7 @@ void OSD::_collect_metadata(map<string,string> *pm)
}
// distro info
- f = fopen("/etc/lsb-release", "r");
- if (f) {
- char buf[100];
- while (!feof(f)) {
- char *line = fgets(buf, sizeof(buf), f);
- if (!line)
- break;
- char *eq = strchr(buf, '=');
- if (!eq)
- break;
- *eq = '\0';
- ++eq;
- while (*eq == '\"')
- ++eq;
- while (*eq && (eq[strlen(eq)-1] == '\n' ||
- eq[strlen(eq)-1] == '\"'))
- eq[strlen(eq)-1] = '\0';
- if (strcmp(buf, "DISTRIB_ID") == 0)
- (*pm)["distro"] = eq;
- else if (strcmp(buf, "DISTRIB_RELEASE") == 0)
- (*pm)["distro_version"] = eq;
- else if (strcmp(buf, "DISTRIB_CODENAME") == 0)
- (*pm)["distro_codename"] = eq;
- else if (strcmp(buf, "DISTRIB_DESCRIPTION") == 0)
- (*pm)["distro_description"] = eq;
- }
- fclose(f);
- }
+ _lsb_release_parse(pm);
dout(10) << __func__ << " " << *pm << dendl;
}
@@ -5784,8 +5804,13 @@ bool OSD::advance_pg(
next_epoch <= osd_epoch && next_epoch <= max;
++next_epoch) {
OSDMapRef nextmap = service.try_get_map(next_epoch);
- if (!nextmap)
+ if (!nextmap) {
+ dout(20) << __func__ << " missing map " << next_epoch << dendl;
+ // make sure max is bumped up so that we can get past any
+ // gap in maps
+ max = MAX(max, next_epoch + g_conf->osd_map_max_advance);
continue;
+ }
vector<int> newup, newacting;
int up_primary, acting_primary;
@@ -5816,7 +5841,7 @@ bool OSD::advance_pg(
service.pg_update_epoch(pg->info.pgid, lastmap->get_epoch());
pg->handle_activate_map(rctx);
if (next_epoch <= osd_epoch) {
- dout(10) << __func__ << " advanced by max " << g_conf->osd_map_max_advance
+ dout(10) << __func__ << " advanced to max " << max
<< " past min epoch " << min_epoch
<< " ... will requeue " << *pg << dendl;
return false;
@@ -5874,10 +5899,7 @@ void OSD::advance_map(ObjectStore::Transaction& t, C_Contexts *tfin)
while (p != waiting_for_pg.end()) {
spg_t pgid = p->first;
- vector<int> acting;
- int nrep = osdmap->pg_to_acting_osds(pgid.pgid, acting);
- int role = osdmap->calc_pg_role(whoami, acting, nrep);
- if (role >= 0) {
+ if (osdmap->osd_is_valid_op_target(pgid.pgid, whoami)) {
++p; // still me
} else {
dout(10) << " discarding waiting ops for " << pgid << dendl;
@@ -6732,7 +6754,8 @@ void OSD::handle_pg_notify(OpRequestRef op)
PG::CephPeeringEvtRef(
new PG::CephPeeringEvt(
it->first.epoch_sent, it->first.query_epoch,
- PG::MNotifyRec(pg_shard_t(from, it->first.from), it->first)))
+ PG::MNotifyRec(pg_shard_t(from, it->first.from), it->first,
+ op->get_req()->get_connection()->get_features())))
);
}
}
@@ -7564,7 +7587,7 @@ void OSD::handle_op(OpRequestRef op)
if (!pg) {
dout(7) << "hit non-existent pg " << pgid << dendl;
- if (osdmap->get_pg_acting_role(pgid.pgid, whoami) >= 0) {
+ if (osdmap->osd_is_valid_op_target(pgid.pgid, whoami)) {
dout(7) << "we are valid target for op, waiting" << dendl;
waiting_for_pg[pgid].push_back(op);
op->mark_delayed("waiting for pg to exist locally");
@@ -7578,7 +7601,7 @@ void OSD::handle_op(OpRequestRef op)
}
OSDMapRef send_map = get_map(m->get_map_epoch());
- if (send_map->get_pg_acting_role(pgid.pgid, whoami) >= 0) {
+ if (send_map->osd_is_valid_op_target(pgid.pgid, whoami)) {
dout(7) << "dropping request; client will resend when they get new map" << dendl;
} else if (!send_map->have_pg_pool(pgid.pool())) {
dout(7) << "dropping request; pool did not exist" << dendl;
@@ -7835,7 +7858,9 @@ void OSD::process_peering_events(
continue;
}
if (!advance_pg(curmap->get_epoch(), pg, handle, &rctx, &split_pgs)) {
- pg->queue_null(curmap->get_epoch(), curmap->get_epoch());
+ // we need to requeue the PG explicitly since we didn't actually
+ // handle an event
+ peering_wq.queue(pg);
} else if (!pg->peering_queue.empty()) {
PG::CephPeeringEvtRef evt = pg->peering_queue.front();
pg->peering_queue.pop_front();
@@ -7931,7 +7956,12 @@ void OSD::set_disk_tp_priority()
<< dendl;
int cls =
ceph_ioprio_string_to_class(cct->_conf->osd_disk_thread_ioprio_class);
- disk_tp.set_ioprio(cls, cct->_conf->osd_disk_thread_ioprio_priority);
+ if (cls < 0)
+ derr << __func__ << cpp_strerror(cls) << ": "
+ << "osd_disk_thread_ioprio_class is " << cct->_conf->osd_disk_thread_ioprio_class
+ << " but only the following values are allowed: idle, be or rt" << dendl;
+ else
+ disk_tp.set_ioprio(cls, cct->_conf->osd_disk_thread_ioprio_priority);
}
// --------------------------------
diff --git a/src/osd/OSD.h b/src/osd/OSD.h
index e2a3c8e..dea216d 100644
--- a/src/osd/OSD.h
+++ b/src/osd/OSD.h
@@ -1472,6 +1472,8 @@ protected:
void _maybe_boot(epoch_t oldest, epoch_t newest);
void _send_boot();
void _collect_metadata(map<string,string> *pmeta);
+ bool _lsb_release_set(char *buf, const char *str, map<string,string> *pm, const char *key);
+ void _lsb_release_parse (map<string,string> *pm);
void start_waiting_for_healthy();
bool _is_healthy();
@@ -1738,7 +1740,9 @@ protected:
pg->put("SnapTrimWQ");
}
void _clear() {
- osd->snap_trim_queue.clear();
+ while (PG *pg = _dequeue()) {
+ pg->put("SnapTrimWQ");
+ }
}
} snap_trim_wq;
diff --git a/src/osd/OSDMap.h b/src/osd/OSDMap.h
index a347583..b3b7ab6 100644
--- a/src/osd/OSDMap.h
+++ b/src/osd/OSDMap.h
@@ -777,6 +777,18 @@ public:
return calc_pg_role(osd, group, nrep);
}
+ bool osd_is_valid_op_target(pg_t pg, int osd) const {
+ int primary;
+ vector<int> group;
+ int nrep = pg_to_acting_osds(pg, &group, &primary);
+ if (osd == primary)
+ return true;
+ if (pg_is_ec(pg))
+ return false;
+
+ return calc_pg_role(osd, group, nrep) >= 0;
+ }
+
/*
* handy helpers to build simple maps...
diff --git a/src/osd/PG.cc b/src/osd/PG.cc
index 9356df4..ebc2020 100644
--- a/src/osd/PG.cc
+++ b/src/osd/PG.cc
@@ -194,7 +194,8 @@ PG::PG(OSDService *o, OSDMapRef curmap,
finish_sync_event(NULL),
scrub_after_recovery(false),
active_pushes(0),
- recovery_state(this)
+ recovery_state(this),
+ peer_features((uint64_t)-1)
{
#ifdef PG_DEBUG_REFS
osd->add_pgid(p, this);
@@ -1552,6 +1553,9 @@ void PG::activate(ObjectStore::Transaction& t,
pi.hit_set = info.hit_set;
pi.stats.stats.clear();
+ // initialize peer with our purged_snaps.
+ pi.purged_snaps = info.purged_snaps;
+
m = new MOSDPGLog(
i->shard, pg_whoami.shard,
get_osdmap()->get_epoch(), pi);
@@ -4685,7 +4689,10 @@ bool PG::old_peering_msg(epoch_t reply_epoch, epoch_t query_epoch)
void PG::set_last_peering_reset()
{
dout(20) << "set_last_peering_reset " << get_osdmap()->get_epoch() << dendl;
- last_peering_reset = get_osdmap()->get_epoch();
+ if (last_peering_reset != get_osdmap()->get_epoch()) {
+ last_peering_reset = get_osdmap()->get_epoch();
+ reset_interval_flush();
+ }
}
struct FlushState {
@@ -4739,7 +4746,6 @@ void PG::start_peering_interval(
const OSDMapRef osdmap = get_osdmap();
set_last_peering_reset();
- reset_interval_flush();
vector<int> oldacting, oldup;
int oldrole = get_role();
@@ -5277,37 +5283,6 @@ void PG::queue_peering_event(CephPeeringEvtRef evt)
osd->queue_for_peering(this);
}
-void PG::queue_notify(epoch_t msg_epoch,
- epoch_t query_epoch,
- pg_shard_t from, pg_notify_t& i)
-{
- dout(10) << "notify " << i << " from replica " << from << dendl;
- queue_peering_event(
- CephPeeringEvtRef(new CephPeeringEvt(msg_epoch, query_epoch,
- MNotifyRec(from, i))));
-}
-
-void PG::queue_info(epoch_t msg_epoch,
- epoch_t query_epoch,
- pg_shard_t from, pg_info_t& i)
-{
- dout(10) << "info " << i << " from replica " << from << dendl;
- queue_peering_event(
- CephPeeringEvtRef(new CephPeeringEvt(msg_epoch, query_epoch,
- MInfoRec(from, i, msg_epoch))));
-}
-
-void PG::queue_log(epoch_t msg_epoch,
- epoch_t query_epoch,
- pg_shard_t from,
- MOSDPGLog *msg)
-{
- dout(10) << "log " << *msg << " from replica " << from << dendl;
- queue_peering_event(
- CephPeeringEvtRef(new CephPeeringEvt(msg_epoch, query_epoch,
- MLogRec(from, msg))));
-}
-
void PG::queue_null(epoch_t msg_epoch,
epoch_t query_epoch)
{
@@ -5810,8 +5785,29 @@ PG::RecoveryState::Backfilling::react(const RemoteReservationRejected &)
pg->osd->local_reserver.cancel_reservation(pg->info.pgid);
pg->state_set(PG_STATE_BACKFILL_TOOFULL);
+ for (set<pg_shard_t>::iterator it = pg->backfill_targets.begin();
+ it != pg->backfill_targets.end();
+ ++it) {
+ assert(*it != pg->pg_whoami);
+ ConnectionRef con = pg->osd->get_con_osd_cluster(
+ it->osd, pg->get_osdmap()->get_epoch());
+ if (con) {
+ if (con->has_feature(CEPH_FEATURE_BACKFILL_RESERVATION)) {
+ pg->osd->send_message_osd_cluster(
+ new MBackfillReserve(
+ MBackfillReserve::REJECT,
+ spg_t(pg->info.pgid.pgid, it->shard),
+ pg->get_osdmap()->get_epoch()),
+ con.get());
+ }
+ }
+ }
+
pg->osd->recovery_wq.dequeue(pg);
+ pg->waiting_on_backfill.clear();
+ pg->finish_recovery_op(hobject_t::get_max());
+
pg->schedule_backfill_full_retry();
return transit<NotBackfilling>();
}
@@ -6066,14 +6062,33 @@ boost::statechart::result
PG::RecoveryState::RepWaitBackfillReserved::react(const RemoteBackfillReserved &evt)
{
PG *pg = context< RecoveryMachine >().pg;
- pg->osd->send_message_osd_cluster(
- pg->primary.osd,
- new MBackfillReserve(
- MBackfillReserve::GRANT,
- spg_t(pg->info.pgid.pgid, pg->primary.shard),
- pg->get_osdmap()->get_epoch()),
- pg->get_osdmap()->get_epoch());
- return transit<RepRecovering>();
+
+ double ratio, max_ratio;
+ if (g_conf->osd_debug_reject_backfill_probability > 0 &&
+ (rand()%1000 < (g_conf->osd_debug_reject_backfill_probability*1000.0))) {
+ dout(10) << "backfill reservation rejected after reservation: "
+ << "failure injection" << dendl;
+ pg->osd->remote_reserver.cancel_reservation(pg->info.pgid);
+ post_event(RemoteReservationRejected());
+ return discard_event();
+ } else if (pg->osd->too_full_for_backfill(&ratio, &max_ratio) &&
+ !pg->cct->_conf->osd_debug_skip_full_check_in_backfill_reservation) {
+ dout(10) << "backfill reservation rejected after reservation: full ratio is "
+ << ratio << ", which is greater than max allowed ratio "
+ << max_ratio << dendl;
+ pg->osd->remote_reserver.cancel_reservation(pg->info.pgid);
+ post_event(RemoteReservationRejected());
+ return discard_event();
+ } else {
+ pg->osd->send_message_osd_cluster(
+ pg->primary.osd,
+ new MBackfillReserve(
+ MBackfillReserve::GRANT,
+ spg_t(pg->info.pgid.pgid, pg->primary.shard),
+ pg->get_osdmap()->get_epoch()),
+ pg->get_osdmap()->get_epoch());
+ return transit<RepRecovering>();
+ }
}
boost::statechart::result
@@ -6097,7 +6112,7 @@ PG::RecoveryState::RepRecovering::react(const BackfillTooFull &)
{
PG *pg = context< RecoveryMachine >().pg;
pg->reject_reservation();
- return transit<RepNotRecovering>();
+ return discard_event();
}
void PG::RecoveryState::RepRecovering::exit()
@@ -6839,6 +6854,7 @@ PG::RecoveryState::GetInfo::GetInfo(my_context ctx)
pg->publish_stats_to_osd();
+ pg->reset_peer_features();
get_infos();
if (peer_info_requested.empty() && !prior_set->pg_down) {
post_event(GotInfo());
@@ -6906,6 +6922,9 @@ boost::statechart::result PG::RecoveryState::GetInfo::react(const MNotifyRec& in
}
get_infos();
}
+ dout(20) << "Adding osd: " << infoevt.from.osd << " features: "
+ << hex << infoevt.features << dec << dendl;
+ pg->apply_peer_features(infoevt.features);
// are we done getting everything?
if (peer_info_requested.empty() && !prior_set->pg_down) {
@@ -6964,6 +6983,7 @@ boost::statechart::result PG::RecoveryState::GetInfo::react(const MNotifyRec& in
break;
}
}
+ dout(20) << "Common features: " << hex << pg->get_min_peer_features() << dec << dendl;
post_event(GotInfo());
}
}
diff --git a/src/osd/PG.h b/src/osd/PG.h
index 1aadaf0..e319477 100644
--- a/src/osd/PG.h
+++ b/src/osd/PG.h
@@ -1308,10 +1308,12 @@ public:
struct MNotifyRec : boost::statechart::event< MNotifyRec > {
pg_shard_t from;
pg_notify_t notify;
- MNotifyRec(pg_shard_t from, pg_notify_t ¬ify) :
- from(from), notify(notify) {}
+ uint64_t features;
+ MNotifyRec(pg_shard_t from, pg_notify_t ¬ify, uint64_t f) :
+ from(from), notify(notify), features(f) {}
void print(std::ostream *out) const {
- *out << "MNotifyRec from " << from << " notify: " << notify;
+ *out << "MNotifyRec from " << from << " notify: " << notify
+ << " features: 0x" << hex << features << dec;
}
};
@@ -1993,11 +1995,16 @@ public:
// Prevent copying
PG(const PG& rhs);
PG& operator=(const PG& rhs);
+ uint64_t peer_features;
public:
spg_t get_pgid() const { return info.pgid; }
int get_nrep() const { return acting.size(); }
+ void reset_peer_features() { peer_features = (uint64_t)-1; }
+ uint64_t get_min_peer_features() { return peer_features; }
+ void apply_peer_features(uint64_t f) { peer_features &= f; }
+
void init_primary_up_acting(
const vector<int> &newup,
const vector<int> &newacting,
@@ -2189,12 +2196,6 @@ public:
void take_waiters();
void queue_peering_event(CephPeeringEvtRef evt);
void handle_peering_event(CephPeeringEvtRef evt, RecoveryCtx *rctx);
- void queue_notify(epoch_t msg_epoch, epoch_t query_epoch,
- pg_shard_t from, pg_notify_t& i);
- void queue_info(epoch_t msg_epoch, epoch_t query_epoch,
- pg_shard_t from, pg_info_t& i);
- void queue_log(epoch_t msg_epoch, epoch_t query_epoch, pg_shard_t from,
- MOSDPGLog *msg);
void queue_query(epoch_t msg_epoch, epoch_t query_epoch,
pg_shard_t from, const pg_query_t& q);
void queue_null(epoch_t msg_epoch, epoch_t query_epoch);
diff --git a/src/osd/ReplicatedPG.cc b/src/osd/ReplicatedPG.cc
index d23e6fc..f1911c1 100644
--- a/src/osd/ReplicatedPG.cc
+++ b/src/osd/ReplicatedPG.cc
@@ -595,6 +595,7 @@ int ReplicatedPG::do_command(cmdmap_t cmdmap, ostream& ss,
if (command == "query") {
f->open_object_section("pg");
f->dump_string("state", pg_state_string(get_state()));
+ f->dump_stream("snap_trimq") << snap_trimq;
f->dump_unsigned("epoch", get_osdmap()->get_epoch());
f->open_array_section("up");
for (vector<int>::iterator p = up.begin(); p != up.end(); ++p)
@@ -2072,12 +2073,16 @@ void ReplicatedPG::do_scan(
}
peer_backfill_info[from] = bi;
- assert(waiting_on_backfill.find(from) != waiting_on_backfill.end());
- waiting_on_backfill.erase(from);
+ if (waiting_on_backfill.find(from) != waiting_on_backfill.end()) {
+ waiting_on_backfill.erase(from);
- if (waiting_on_backfill.empty()) {
- assert(peer_backfill_info.size() == backfill_targets.size());
- finish_recovery_op(hobject_t::get_max());
+ if (waiting_on_backfill.empty()) {
+ assert(peer_backfill_info.size() == backfill_targets.size());
+ finish_recovery_op(hobject_t::get_max());
+ }
+ } else {
+ // we canceled backfill for a while due to a too full, and this
+ // is an extra response from a non-too-full peer
}
}
break;
@@ -2560,10 +2565,6 @@ void ReplicatedPG::snap_trimmer()
// replica collection trimming
snap_trimmer_machine.process_event(SnapTrim());
}
- if (snap_trimmer_machine.requeue) {
- dout(10) << "snap_trimmer requeue" << dendl;
- queue_snap_trim();
- }
unlock();
return;
}
@@ -3353,6 +3354,11 @@ int ReplicatedPG::do_osd_ops(OpContext *ctx, vector<OSDOp>& ops)
break;
}
result = _delete_oid(ctx, true);
+ if (result >= 0) {
+ // mark that this is a cache eviction to avoid triggering normal
+ // make_writeable() clone or snapdir object creation in finish_ctx()
+ ctx->cache_evict = true;
+ }
osd->logger->inc(l_osd_tier_evict);
}
break;
@@ -3645,6 +3651,10 @@ int ReplicatedPG::do_osd_ops(OpContext *ctx, vector<OSDOp>& ops)
case CEPH_OSD_OP_SETALLOCHINT:
++ctx->num_write;
{
+ if (!(get_min_peer_features() & CEPH_FEATURE_OSD_SET_ALLOC_HINT)) {
+ result = -EOPNOTSUPP;
+ break;
+ }
if (!obs.exists) {
ctx->mod_desc.create();
t->touch(soid);
@@ -4836,6 +4846,7 @@ void ReplicatedPG::make_writeable(OpContext *ctx)
if ((ctx->obs->exists && !ctx->obs->oi.is_whiteout()) && // head exist(ed)
snapc.snaps.size() && // there are snaps
+ !ctx->cache_evict &&
snapc.snaps[0] > ctx->new_snapset.seq) { // existing object is old
// clone
hobject_t coid = soid;
@@ -5156,7 +5167,9 @@ void ReplicatedPG::finish_ctx(OpContext *ctx, int log_op_type, bool maintain_ssc
ctx->snapset_obc->obs.exists = false;
}
}
- } else if (ctx->new_snapset.clones.size()) {
+ } else if (ctx->new_snapset.clones.size() &&
+ !ctx->cache_evict &&
+ (!ctx->snapset_obc || !ctx->snapset_obc->obs.exists)) {
// save snapset on _snap
hobject_t snapoid(soid.oid, soid.get_key(), CEPH_SNAPDIR, soid.hash,
info.pgid.pool(), soid.get_namespace());
@@ -5167,7 +5180,8 @@ void ReplicatedPG::finish_ctx(OpContext *ctx, int log_op_type, bool maintain_ssc
eversion_t(),
0, osd_reqid_t(), ctx->mtime));
- ctx->snapset_obc = get_object_context(snapoid, true);
+ if (!ctx->snapset_obc)
+ ctx->snapset_obc = get_object_context(snapoid, true);
bool got = ctx->snapset_obc->get_write_greedy(ctx->op);
assert(got);
dout(20) << " got greedy write on snapset_obc " << *ctx->snapset_obc << dendl;
@@ -6968,6 +6982,7 @@ void ReplicatedPG::check_blacklisted_obc_watchers(ObjectContextRef obc)
if (get_osdmap()->is_blacklisted(ea)) {
dout(10) << "watch: Found blacklisted watcher for " << ea << dendl;
assert(j->second->get_pg() == this);
+ j->second->unregister_cb();
handle_watch_timeout(j->second);
}
}
@@ -9334,6 +9349,13 @@ void ReplicatedPG::on_removal(ObjectStore::Transaction *t)
// adjust info to backfill
info.last_backfill = hobject_t();
dirty_info = true;
+
+
+ // clear log
+ PGLogEntryHandler rollbacker;
+ pg_log.clear_can_rollback_to(&rollbacker);
+ rollbacker.apply(this, t);
+
write_if_dirty(*t);
on_shutdown();
@@ -10952,7 +10974,7 @@ void ReplicatedPG::hit_set_persist()
pg_log_entry_t::MODIFY,
oid,
ctx->at_version,
- ctx->obs->oi.version,
+ eversion_t(),
0,
osd_reqid_t(),
ctx->mtime)
@@ -11974,13 +11996,11 @@ ReplicatedPG::NotTrimming::NotTrimming(my_context ctx)
: my_base(ctx),
NamedState(context< SnapTrimmer >().pg->cct, "NotTrimming")
{
- context< SnapTrimmer >().requeue = false;
context< SnapTrimmer >().log_enter(state_name);
}
void ReplicatedPG::NotTrimming::exit()
{
- context< SnapTrimmer >().requeue = true;
context< SnapTrimmer >().log_exit(state_name, enter_time);
}
@@ -12040,32 +12060,45 @@ boost::statechart::result ReplicatedPG::TrimmingObjects::react(const SnapTrim&)
dout(10) << "TrimmingObjects: trimming snap " << snap_to_trim << dendl;
- // Get next
- hobject_t old_pos = pos;
- int r = pg->snap_mapper.get_next_object_to_trim(snap_to_trim, &pos);
- if (r != 0 && r != -ENOENT) {
- derr << __func__ << ": get_next returned " << cpp_strerror(r) << dendl;
- assert(0);
- } else if (r == -ENOENT) {
- // Done!
- dout(10) << "TrimmingObjects: got ENOENT" << dendl;
- post_event(SnapTrim());
- return transit< WaitingOnReplicas >();
+ for (set<RepGather *>::iterator i = repops.begin();
+ i != repops.end();
+ ) {
+ if ((*i)->all_applied && (*i)->all_committed) {
+ (*i)->put();
+ repops.erase(i++);
+ } else {
+ ++i;
+ }
}
- dout(10) << "TrimmingObjects react trimming " << pos << dendl;
- RepGather *repop = pg->trim_object(pos);
- if (!repop) {
- dout(10) << __func__ << " could not get write lock on obj "
- << pos << dendl;
- pos = old_pos;
- return discard_event();
- }
- assert(repop);
- repop->queue_snap_trimmer = true;
+ while (repops.size() < g_conf->osd_pg_max_concurrent_snap_trims) {
+ // Get next
+ hobject_t old_pos = pos;
+ int r = pg->snap_mapper.get_next_object_to_trim(snap_to_trim, &pos);
+ if (r != 0 && r != -ENOENT) {
+ derr << __func__ << ": get_next returned " << cpp_strerror(r) << dendl;
+ assert(0);
+ } else if (r == -ENOENT) {
+ // Done!
+ dout(10) << "TrimmingObjects: got ENOENT" << dendl;
+ post_event(SnapTrim());
+ return transit< WaitingOnReplicas >();
+ }
+
+ dout(10) << "TrimmingObjects react trimming " << pos << dendl;
+ RepGather *repop = pg->trim_object(pos);
+ if (!repop) {
+ dout(10) << __func__ << " could not get write lock on obj "
+ << pos << dendl;
+ pos = old_pos;
+ return discard_event();
+ }
+ assert(repop);
+ repop->queue_snap_trimmer = true;
- repops.insert(repop->get());
- pg->simple_repop_submit(repop);
+ repops.insert(repop->get());
+ pg->simple_repop_submit(repop);
+ }
return discard_event();
}
/* WaitingOnReplicasObjects */
@@ -12074,7 +12107,6 @@ ReplicatedPG::WaitingOnReplicas::WaitingOnReplicas(my_context ctx)
NamedState(context< SnapTrimmer >().pg->cct, "Trimming/WaitingOnReplicas")
{
context< SnapTrimmer >().log_enter(state_name);
- context< SnapTrimmer >().requeue = false;
}
void ReplicatedPG::WaitingOnReplicas::exit()
@@ -12099,7 +12131,7 @@ boost::statechart::result ReplicatedPG::WaitingOnReplicas::react(const SnapTrim&
for (set<RepGather *>::iterator i = repops.begin();
i != repops.end();
repops.erase(i++)) {
- if (!(*i)->all_applied) {
+ if (!(*i)->all_applied || !(*i)->all_committed) {
return discard_event();
} else {
(*i)->put();
@@ -12124,7 +12156,7 @@ boost::statechart::result ReplicatedPG::WaitingOnReplicas::react(const SnapTrim&
context<SnapTrimmer>().need_share_pg_info = true;
// Back to the start
- post_event(SnapTrim());
+ pg->queue_snap_trim();
return transit< NotTrimming >();
}
diff --git a/src/osd/ReplicatedPG.h b/src/osd/ReplicatedPG.h
index 9ef131c..dc8ee62 100644
--- a/src/osd/ReplicatedPG.h
+++ b/src/osd/ReplicatedPG.h
@@ -438,6 +438,7 @@ public:
bool modify; // (force) modification (even if op_t is empty)
bool user_modify; // user-visible modification
bool undirty; // user explicitly un-dirtying this object
+ bool cache_evict; ///< true if this is a cache eviction
// side effects
list<watch_info_t> watch_connects;
@@ -539,7 +540,7 @@ public:
ReplicatedPG *_pg) :
op(_op), reqid(_reqid), ops(_ops), obs(_obs), snapset(0),
new_obs(_obs->oi, _obs->exists),
- modify(false), user_modify(false), undirty(false),
+ modify(false), user_modify(false), undirty(false), cache_evict(false),
bytes_written(0), bytes_read(0), user_at_version(0),
current_osd_subop_num(0),
op_t(NULL),
@@ -1331,8 +1332,7 @@ private:
set<RepGather *> repops;
snapid_t snap_to_trim;
bool need_share_pg_info;
- bool requeue;
- SnapTrimmer(ReplicatedPG *pg) : pg(pg), need_share_pg_info(false), requeue(false) {}
+ SnapTrimmer(ReplicatedPG *pg) : pg(pg), need_share_pg_info(false) {}
~SnapTrimmer();
void log_enter(const char *state_name);
void log_exit(const char *state_name, utime_t duration);
diff --git a/src/osd/Watch.h b/src/osd/Watch.h
index e2cbfc1..91a4574 100644
--- a/src/osd/Watch.h
+++ b/src/osd/Watch.h
@@ -98,6 +98,7 @@ class Notify {
/// removes the timeout callback, called on completion or cancellation
void unregister_cb();
public:
+
string gen_dbg_prefix() {
stringstream ss;
ss << "Notify(" << make_pair(cookie, notify_id) << " "
@@ -172,15 +173,15 @@ class Watch {
/// Registers the timeout callback with watch_timer
void register_cb();
- /// Unregisters the timeout callback
- void unregister_cb();
-
/// send a Notify message when connected for notif
void send_notify(NotifyRef notif);
/// Cleans up state on discard or remove (including Connection state, obc)
void discard_state();
public:
+ /// Unregisters the timeout callback
+ void unregister_cb();
+
/// NOTE: must be called with pg lock held
~Watch();
diff --git a/src/osd/osd_types.cc b/src/osd/osd_types.cc
index 16bdbaf..d08e9b7 100644
--- a/src/osd/osd_types.cc
+++ b/src/osd/osd_types.cc
@@ -2179,9 +2179,9 @@ void pg_interval_t::dump(Formatter *f) const
f->open_array_section("acting");
for (vector<int>::const_iterator p = acting.begin(); p != acting.end(); ++p)
f->dump_int("osd", *p);
+ f->close_section();
f->dump_int("primary", primary);
f->dump_int("up_primary", up_primary);
- f->close_section();
}
void pg_interval_t::generate_test_instances(list<pg_interval_t*>& o)
@@ -2235,9 +2235,15 @@ bool pg_interval_t::check_new_interval(
i.primary = old_acting_primary;
i.up_primary = old_up_primary;
- if (!i.acting.empty() && i.primary != -1 &&
- i.acting.size() >=
- lastmap->get_pools().find(pool_id)->second.min_size) {
+ unsigned num_acting = 0;
+ for (vector<int>::const_iterator p = i.acting.begin(); p != i.acting.end();
+ ++p)
+ if (*p != CRUSH_ITEM_NONE)
+ ++num_acting;
+
+ if (num_acting &&
+ i.primary != -1 &&
+ num_acting >= lastmap->get_pools().find(pgid.pool())->second.min_size) {
if (out)
*out << "generate_past_intervals " << i
<< ": not rw,"
diff --git a/src/osdc/ObjectCacher.cc b/src/osdc/ObjectCacher.cc
index e1499b4..95abee1 100644
--- a/src/osdc/ObjectCacher.cc
+++ b/src/osdc/ObjectCacher.cc
@@ -796,6 +796,8 @@ void ObjectCacher::bh_read_finish(int64_t poolid, sobject_t oid, ceph_tid_t tid,
ldout(cct, 20) << "finishing waiters " << ls << dendl;
finish_contexts(cct, ls, err);
+ retry_waiting_reads();
+
--reads_outstanding;
read_cond.Signal();
}
@@ -1105,18 +1107,35 @@ int ObjectCacher::_readx(OSDRead *rd, ObjectSet *oset, Context *onfinish,
// TODO: make read path not call _readx for every completion
hits.insert(errors.begin(), errors.end());
}
-
+
if (!missing.empty() || !rx.empty()) {
// read missing
for (map<loff_t, BufferHead*>::iterator bh_it = missing.begin();
bh_it != missing.end();
++bh_it) {
- bh_read(bh_it->second);
- if (success && onfinish) {
- ldout(cct, 10) << "readx missed, waiting on " << *bh_it->second
- << " off " << bh_it->first << dendl;
- bh_it->second->waitfor_read[bh_it->first].push_back( new C_RetryRead(this, rd, oset, onfinish) );
- }
+ uint64_t rx_bytes = static_cast<uint64_t>(
+ stat_rx + bh_it->second->length());
+ if (!waitfor_read.empty() || rx_bytes > max_size) {
+ // cache is full with concurrent reads -- wait for rx's to complete
+ // to constrain memory growth (especially during copy-ups)
+ if (success) {
+ ldout(cct, 10) << "readx missed, waiting on cache to complete "
+ << waitfor_read.size() << " blocked reads, "
+ << (MAX(rx_bytes, max_size) - max_size)
+ << " read bytes" << dendl;
+ waitfor_read.push_back(new C_RetryRead(this, rd, oset, onfinish));
+ }
+
+ bh_remove(o, bh_it->second);
+ delete bh_it->second;
+ } else {
+ bh_read(bh_it->second);
+ if (success && onfinish) {
+ ldout(cct, 10) << "readx missed, waiting on " << *bh_it->second
+ << " off " << bh_it->first << dendl;
+ bh_it->second->waitfor_read[bh_it->first].push_back( new C_RetryRead(this, rd, oset, onfinish) );
+ }
+ }
bytes_not_in_cache += bh_it->second->length();
success = false;
}
@@ -1230,7 +1249,7 @@ int ObjectCacher::_readx(OSDRead *rd, ObjectSet *oset, Context *onfinish,
// no misses... success! do the read.
assert(!hit_ls.empty());
ldout(cct, 10) << "readx has all buffers" << dendl;
-
+
// ok, assemble into result buffer.
uint64_t pos = 0;
if (rd->bl && !error) {
@@ -1263,6 +1282,18 @@ int ObjectCacher::_readx(OSDRead *rd, ObjectSet *oset, Context *onfinish,
return ret;
}
+void ObjectCacher::retry_waiting_reads()
+{
+ list<Context *> ls;
+ ls.swap(waitfor_read);
+
+ while (!ls.empty() && waitfor_read.empty()) {
+ Context *ctx = ls.front();
+ ls.pop_front();
+ ctx->complete(0);
+ }
+ waitfor_read.splice(waitfor_read.end(), ls);
+}
int ObjectCacher::writex(OSDWrite *wr, ObjectSet *oset, Mutex& wait_on_lock,
Context *onfreespace)
diff --git a/src/osdc/ObjectCacher.h b/src/osdc/ObjectCacher.h
index d2aebe9..b48f8ac 100644
--- a/src/osdc/ObjectCacher.h
+++ b/src/osdc/ObjectCacher.h
@@ -341,6 +341,8 @@ class ObjectCacher {
vector<ceph::unordered_map<sobject_t, Object*> > objects; // indexed by pool_id
+ list<Context*> waitfor_read;
+
ceph_tid_t last_read_tid;
set<BufferHead*> dirty_bh;
@@ -457,6 +459,7 @@ class ObjectCacher {
int _readx(OSDRead *rd, ObjectSet *oset, Context *onfinish,
bool external_call);
+ void retry_waiting_reads();
public:
void bh_read_finish(int64_t poolid, sobject_t oid, ceph_tid_t tid,
diff --git a/src/osdc/Objecter.cc b/src/osdc/Objecter.cc
index d82b3e1..57954a5 100644
--- a/src/osdc/Objecter.cc
+++ b/src/osdc/Objecter.cc
@@ -1615,7 +1615,10 @@ void Objecter::finish_op(Op *op)
ops.erase(op->tid);
logger->set(l_osdc_op_active, ops.size());
- assert(check_latest_map_ops.find(op->tid) == check_latest_map_ops.end());
+
+ // our reply may have raced with pool deletion resulting in a map
+ // check in flight.
+ op_cancel_map_check(op);
if (op->ontimeout)
timer.cancel_event(op->ontimeout);
@@ -1640,7 +1643,9 @@ void Objecter::send_op(Op *op)
ldout(cct, 20) << " revoking rx buffer for " << op->tid << " on " << op->con << dendl;
op->con->revoke_rx_buffer(op->tid);
}
- if (op->outbl && op->outbl->length()) {
+ if (op->outbl &&
+ op->ontimeout == NULL && // only post rx_buffer if no timeout; see #9582
+ op->outbl->length()) {
ldout(cct, 20) << " posting rx buffer for " << op->tid << " on " << op->session->con << dendl;
op->con = op->session->con;
op->con->post_rx_buffer(op->tid, *op->outbl);
diff --git a/src/pybind/rados.py b/src/pybind/rados.py
index 0fbd10e..ec68919 100644
--- a/src/pybind/rados.py
+++ b/src/pybind/rados.py
@@ -246,7 +246,8 @@ Rados object in state %s." % (self.state))
def shutdown(self):
"""
- Disconnects from the cluster.
+ Disconnects from the cluster. Call this explicitly when a
+ Rados.connect()ed object is no longer used.
"""
if (self.__dict__.has_key("state") and self.state != "shutdown"):
run_in_thread(self.librados.rados_shutdown, (self.cluster,))
@@ -260,9 +261,6 @@ Rados object in state %s." % (self.state))
self.shutdown()
return False
- def __del__(self):
- self.shutdown()
-
def version(self):
"""
Get the version number of the ``librados`` C library.
@@ -410,7 +408,7 @@ Rados object in state %s." % (self.state))
def connect(self, timeout=0):
"""
- Connect to the cluster.
+ Connect to the cluster. Use shutdown() to release resources.
"""
self.require_state("configuring")
ret = run_in_thread(self.librados.rados_connect, (self.cluster,),
diff --git a/src/rgw/Makefile.am b/src/rgw/Makefile.am
index 3d6886d..78c022b 100644
--- a/src/rgw/Makefile.am
+++ b/src/rgw/Makefile.am
@@ -53,6 +53,18 @@ LIBRGW_DEPS += \
-lfcgi \
-ldl
+CIVETWEB_INCLUDE = --include civetweb/include/civetweb_conf.h
+
+libcivetweb_la_SOURCES = \
+ rgw/rgw_civetweb.cc \
+ rgw/rgw_civetweb_log.cc \
+ civetweb/src/civetweb.c
+
+libcivetweb_la_CXXFLAGS = ${CIVETWEB_INCLUDE} -Woverloaded-virtual ${AM_CXXFLAGS}
+libcivetweb_la_CFLAGS = -Icivetweb/include ${CIVETWEB_INCLUDE}
+
+noinst_LTLIBRARIES += libcivetweb.la
+
radosgw_SOURCES = \
rgw/rgw_resolve.cc \
rgw/rgw_rest.cc \
@@ -71,11 +83,9 @@ radosgw_SOURCES = \
rgw/rgw_swift.cc \
rgw/rgw_swift_auth.cc \
rgw/rgw_loadgen.cc \
- rgw/rgw_civetweb.cc \
- civetweb/src/civetweb.c \
rgw/rgw_main.cc
-radosgw_CFLAGS = -Icivetweb/include
-radosgw_LDADD = $(LIBRGW) $(LIBRGW_DEPS) $(RESOLV_LIBS) $(CEPH_GLOBAL)
+radosgw_CFLAGS = -I$(srcdir)/civetweb/include
+radosgw_LDADD = $(LIBRGW) $(LIBCIVETWEB) $(LIBRGW_DEPS) $(RESOLV_LIBS) $(CEPH_GLOBAL)
bin_PROGRAMS += radosgw
radosgw_admin_SOURCES = rgw/rgw_admin.cc
@@ -158,7 +168,9 @@ noinst_HEADERS += \
rgw/rgw_bucket.h \
rgw/rgw_keystone.h \
rgw/rgw_civetweb.h \
+ rgw/rgw_civetweb_log.h \
civetweb/civetweb.h \
civetweb/include/civetweb.h \
+ civetweb/include/civetweb_conf.h \
civetweb/src/md5.h
diff --git a/src/rgw/rgw_admin.cc b/src/rgw/rgw_admin.cc
index e31a28a..f6c5619 100644
--- a/src/rgw/rgw_admin.cc
+++ b/src/rgw/rgw_admin.cc
@@ -679,7 +679,11 @@ void set_quota_info(RGWQuotaInfo& quota, int opt_cmd, int64_t max_size, int64_t
quota.max_objects = max_objects;
}
if (have_max_size) {
- quota.max_size_kb = rgw_rounded_kb(max_size);
+ if (max_size < 0) {
+ quota.max_size_kb = -1;
+ } else {
+ quota.max_size_kb = rgw_rounded_kb(max_size);
+ }
}
break;
case OPT_QUOTA_DISABLE:
@@ -1364,7 +1368,13 @@ int main(int argc, char **argv)
cerr << "could not create user: " << err_msg << std::endl;
return -ret;
}
-
+ if (!subuser.empty()) {
+ ret = user.subusers.add(user_op, &err_msg);
+ if (ret < 0) {
+ cerr << "could not create subuser: " << err_msg << std::endl;
+ return -ret;
+ }
+ }
break;
case OPT_USER_RM:
ret = user.remove(user_op, &err_msg);
diff --git a/src/rgw/rgw_civetweb.cc b/src/rgw/rgw_civetweb.cc
index a31177f..b44a40c 100644
--- a/src/rgw/rgw_civetweb.cc
+++ b/src/rgw/rgw_civetweb.cc
@@ -42,7 +42,7 @@ int RGWMongoose::complete_request()
if (0 && data.length() == 0) {
has_content_length = true;
- print("Transfer-Enconding: %s\n", "chunked");
+ print("Transfer-Enconding: %s\r\n", "chunked");
data.append("0\r\n\r\n", sizeof("0\r\n\r\n")-1);
} else {
int r = send_content_length(data.length());
@@ -128,7 +128,7 @@ int RGWMongoose::send_status(const char *status, const char *status_name)
if (!status_name)
status_name = "";
- snprintf(buf, sizeof(buf), "HTTP/1.1 %s %s\n", status, status_name);
+ snprintf(buf, sizeof(buf), "HTTP/1.1 %s %s\r\n", status, status_name);
bufferlist bl;
bl.append(buf);
@@ -168,5 +168,5 @@ int RGWMongoose::send_content_length(uint64_t len)
has_content_length = true;
char buf[21];
snprintf(buf, sizeof(buf), "%"PRIu64, len);
- return print("Content-Length: %s\n", buf);
+ return print("Content-Length: %s\r\n", buf);
}
diff --git a/src/rgw/rgw_civetweb_log.cc b/src/rgw/rgw_civetweb_log.cc
new file mode 100644
index 0000000..720bab5
--- /dev/null
+++ b/src/rgw/rgw_civetweb_log.cc
@@ -0,0 +1,14 @@
+#include "common/config.h"
+#include "rgw_common.h"
+
+#include "civetweb/civetweb.h"
+
+#define dout_subsys ceph_subsys_civetweb
+
+
+int rgw_civetweb_log_callback(const struct mg_connection *conn, const char *buf) {
+ dout(10) << "civetweb: " << (void *)conn << ": " << buf << dendl;
+ return 0;
+}
+
+
diff --git a/src/rgw/rgw_civetweb_log.h b/src/rgw/rgw_civetweb_log.h
new file mode 100644
index 0000000..6c6b2c0
--- /dev/null
+++ b/src/rgw/rgw_civetweb_log.h
@@ -0,0 +1,6 @@
+#ifndef CEPH_RGW_CIVETWEB_LOG_H
+#define CEPH_RGW_CIVETWEB_LOG_H
+
+int rgw_civetweb_log_callback(const struct mg_connection *conn, const char *buf);
+
+#endif
diff --git a/src/rgw/rgw_common.cc b/src/rgw/rgw_common.cc
index 5a1043f..36b8ed3 100644
--- a/src/rgw/rgw_common.cc
+++ b/src/rgw/rgw_common.cc
@@ -506,7 +506,7 @@ int XMLArgs::parse()
}
string substr, nameval;
substr = str.substr(pos, fpos - pos);
- url_decode(substr, nameval);
+ url_decode(substr, nameval, true);
NameVal nv(nameval);
int ret = nv.parse();
if (ret >= 0) {
@@ -690,14 +690,13 @@ static char hex_to_num(char c)
return hex_table.to_num(c);
}
-bool url_decode(string& src_str, string& dest_str)
+bool url_decode(string& src_str, string& dest_str, bool in_query)
{
const char *src = src_str.c_str();
char dest[src_str.size() + 1];
int pos = 0;
char c;
- bool in_query = false;
while (*src) {
if (*src != '%') {
if (!in_query || *src != '+') {
diff --git a/src/rgw/rgw_common.h b/src/rgw/rgw_common.h
index 4d7a118..432e82a 100644
--- a/src/rgw/rgw_common.h
+++ b/src/rgw/rgw_common.h
@@ -1343,7 +1343,7 @@ extern bool verify_object_permission(struct req_state *s, RGWAccessControlPolicy
extern bool verify_object_permission(struct req_state *s, int perm);
/** Convert an input URL into a sane object name
* by converting %-escaped strings into characters, etc*/
-extern bool url_decode(string& src_str, string& dest_str);
+extern bool url_decode(string& src_str, string& dest_str, bool in_query = false);
extern void url_encode(const string& src, string& dst);
extern void calc_hmac_sha1(const char *key, int key_len,
diff --git a/src/rgw/rgw_fcgi.cc b/src/rgw/rgw_fcgi.cc
index 4b24dab..a9af45c 100644
--- a/src/rgw/rgw_fcgi.cc
+++ b/src/rgw/rgw_fcgi.cc
@@ -32,7 +32,7 @@ void RGWFCGX::init_env(CephContext *cct)
int RGWFCGX::send_status(const char *status, const char *status_name)
{
- return print("Status: %s\n", status);
+ return print("Status: %s %s\r\n", status, status_name);
}
int RGWFCGX::send_100_continue()
@@ -48,7 +48,7 @@ int RGWFCGX::send_content_length(uint64_t len)
{
char buf[21];
snprintf(buf, sizeof(buf), "%"PRIu64, len);
- return print("Content-Length: %s\n", buf);
+ return print("Content-Length: %s\r\n", buf);
}
int RGWFCGX::complete_header()
diff --git a/src/rgw/rgw_http_client.cc b/src/rgw/rgw_http_client.cc
index 1c6b6d4..3adc0ae 100644
--- a/src/rgw/rgw_http_client.cc
+++ b/src/rgw/rgw_http_client.cc
@@ -42,17 +42,8 @@ static size_t send_http_data(void *ptr, size_t size, size_t nmemb, void *_info)
return ret;
}
-int RGWHTTPClient::process(const char *method, const char *url)
+static curl_slist *headers_to_slist(list<pair<string, string> >& headers)
{
- int ret = 0;
- CURL *curl_handle;
-
- char error_buf[CURL_ERROR_SIZE];
-
- curl_handle = curl_easy_init();
-
- dout(20) << "sending request to " << url << dendl;
-
curl_slist *h = NULL;
list<pair<string, string> >::iterator iter;
@@ -63,11 +54,37 @@ int RGWHTTPClient::process(const char *method, const char *url)
if (strncmp(val.c_str(), "HTTP_", 5) == 0) {
val = val.substr(5);
}
+
+ /* we need to convert all underscores into dashes as some web servers forbid them
+ * in the http header field names
+ */
+ for (size_t i = 0; i < val.size(); i++) {
+ if (val[i] == '_') {
+ val[i] = '-';
+ }
+ }
+
val.append(": ");
val.append(p.second);
h = curl_slist_append(h, val.c_str());
}
+ return h;
+}
+
+int RGWHTTPClient::process(const char *method, const char *url)
+{
+ int ret = 0;
+ CURL *curl_handle;
+
+ char error_buf[CURL_ERROR_SIZE];
+
+ curl_handle = curl_easy_init();
+
+ dout(20) << "sending request to " << url << dendl;
+
+ curl_slist *h = headers_to_slist(headers);
+
curl_easy_setopt(curl_handle, CURLOPT_CUSTOMREQUEST, method);
curl_easy_setopt(curl_handle, CURLOPT_URL, url);
curl_easy_setopt(curl_handle, CURLOPT_NOPROGRESS, 1L);
@@ -139,20 +156,7 @@ int RGWHTTPClient::init_async(const char *method, const char *url, void **handle
dout(20) << "sending request to " << url << dendl;
- curl_slist *h = NULL;
-
- list<pair<string, string> >::iterator iter;
- for (iter = headers.begin(); iter != headers.end(); ++iter) {
- pair<string, string>& p = *iter;
- string val = p.first;
-
- if (strncmp(val.c_str(), "HTTP_", 5) == 0) {
- val = val.substr(5);
- }
- val.append(": ");
- val.append(p.second);
- h = curl_slist_append(h, val.c_str());
- }
+ curl_slist *h = headers_to_slist(headers);
req_data->h = h;
diff --git a/src/rgw/rgw_json_enc.cc b/src/rgw/rgw_json_enc.cc
index a198678..c0f8311 100644
--- a/src/rgw/rgw_json_enc.cc
+++ b/src/rgw/rgw_json_enc.cc
@@ -429,7 +429,7 @@ static void decode_swift_keys(map<string, RGWAccessKey>& m, JSONObj *o)
{
RGWAccessKey k;
k.decode_json(o, true);
- m[k.subuser] = k;
+ m[k.id] = k;
}
static void decode_subusers(map<string, RGWSubUser>& m, JSONObj *o)
diff --git a/src/rgw/rgw_main.cc b/src/rgw/rgw_main.cc
index 9614b07..fc40b64 100644
--- a/src/rgw/rgw_main.cc
+++ b/src/rgw/rgw_main.cc
@@ -54,6 +54,7 @@
#include "rgw_resolve.h"
#include "rgw_loadgen.h"
#include "rgw_civetweb.h"
+#include "rgw_civetweb_log.h"
#include "civetweb/civetweb.h"
@@ -93,6 +94,8 @@ struct RGWRequest
RGWRequest() : id(0), s(NULL), op(NULL) {
}
+ virtual ~RGWRequest() {}
+
void init_state(req_state *_s) {
s = _s;
}
@@ -141,6 +144,8 @@ public:
bool get_val(const string& key, const string& def_val, string *out);
bool get_val(const string& key, int def_val, int *out);
+ map<string, string>& get_config_map() { return config_map; }
+
string get_framework() { return framework; }
};
@@ -636,6 +641,10 @@ void RGWFCGXProcess::handle_request(RGWRequest *r)
FCGX_Finish_r(fcgx);
+ if (store->ctx()->_conf->rgw_fcgi_explicit_free) {
+ FCGX_Free(fcgx, 1);
+ }
+
delete req;
}
@@ -909,6 +918,12 @@ class RGWMongooseFrontend : public RGWFrontend {
struct mg_context *ctx;
RGWProcessEnv env;
+ void set_conf_default(map<string, string>& m, const string& key, const string& def_val) {
+ if (m.find(key) == m.end()) {
+ m[key] = def_val;
+ }
+ }
+
public:
RGWMongooseFrontend(RGWProcessEnv& pe, RGWFrontendConfig *_conf) : conf(_conf), ctx(NULL), env(pe) {
}
@@ -921,12 +936,28 @@ public:
char thread_pool_buf[32];
snprintf(thread_pool_buf, sizeof(thread_pool_buf), "%d", (int)g_conf->rgw_thread_pool_size);
string port_str;
+ map<string, string> conf_map = conf->get_config_map();
conf->get_val("port", "80", &port_str);
- const char *options[] = {"listening_ports", port_str.c_str(), "enable_keep_alive", "yes", "num_threads", thread_pool_buf, NULL};
+ conf_map.erase("port");
+ conf_map["listening_ports"] = port_str;
+ set_conf_default(conf_map, "enable_keep_alive", "yes");
+ set_conf_default(conf_map, "num_threads", thread_pool_buf);
+ set_conf_default(conf_map, "decode_url", "no");
+
+ const char *options[conf_map.size() * 2 + 1];
+ int i = 0;
+ for (map<string, string>::iterator iter = conf_map.begin(); iter != conf_map.end(); ++iter) {
+ options[i] = iter->first.c_str();
+ options[i + 1] = iter->second.c_str();
+ dout(20)<< "civetweb config: " << options[i] << ": " << (options[i + 1] ? options[i + 1] : "<null>") << dendl;
+ i += 2;
+ }
+ options[i] = NULL;
struct mg_callbacks cb;
memset((void *)&cb, 0, sizeof(cb));
cb.begin_request = civetweb_callback;
+ cb.log_message = rgw_civetweb_log_callback;
ctx = mg_start(&cb, &env, (const char **)&options);
if (!ctx) {
@@ -965,7 +996,7 @@ int main(int argc, const char **argv)
vector<const char *> def_args;
def_args.push_back("--debug-rgw=1/5");
def_args.push_back("--keyring=$rgw_data/keyring");
- def_args.push_back("--log-file=/var/log/radosgw/$cluster-$name");
+ def_args.push_back("--log-file=/var/log/radosgw/$cluster-$name.log");
vector<const char*> args;
argv_to_vec(argc, argv, args);
diff --git a/src/rgw/rgw_op.cc b/src/rgw/rgw_op.cc
index 7694748..4cc12ea 100644
--- a/src/rgw/rgw_op.cc
+++ b/src/rgw/rgw_op.cc
@@ -33,7 +33,8 @@ using ceph::crypto::MD5;
static string mp_ns = RGW_OBJ_NS_MULTIPART;
static string shadow_ns = RGW_OBJ_NS_SHADOW;
-#define MULTIPART_UPLOAD_ID_PREFIX "2/" // must contain a unique char that may not come up in gen_rand_alpha()
+#define MULTIPART_UPLOAD_ID_PREFIX_LEGACY "2/"
+#define MULTIPART_UPLOAD_ID_PREFIX "2~" // must contain a unique char that may not come up in gen_rand_alpha()
class MultipartMetaFilter : public RGWAccessListFilter {
public:
@@ -1438,7 +1439,8 @@ static bool is_v2_upload_id(const string& upload_id)
{
const char *uid = upload_id.c_str();
- return (strncmp(uid, MULTIPART_UPLOAD_ID_PREFIX, sizeof(MULTIPART_UPLOAD_ID_PREFIX) - 1) == 0);
+ return (strncmp(uid, MULTIPART_UPLOAD_ID_PREFIX, sizeof(MULTIPART_UPLOAD_ID_PREFIX) - 1) == 0) ||
+ (strncmp(uid, MULTIPART_UPLOAD_ID_PREFIX_LEGACY, sizeof(MULTIPART_UPLOAD_ID_PREFIX_LEGACY) - 1) == 0);
}
int RGWPutObjProcessor_Multipart::do_complete(string& etag, time_t *mtime, time_t set_mtime, map<string, bufferlist>& attrs)
@@ -1524,64 +1526,18 @@ void RGWPutObj::pre_exec()
rgw_bucket_object_pre_exec(s);
}
-static int put_obj_user_manifest_iterate_cb(rgw_bucket& bucket, RGWObjEnt& ent, RGWAccessControlPolicy *bucket_policy, off_t start_ofs, off_t end_ofs,
- void *param)
-{
- RGWPutObj *op = (RGWPutObj *)param;
- return op->user_manifest_iterate_cb(bucket, ent, bucket_policy, start_ofs, end_ofs);
-}
-
-int RGWPutObj::user_manifest_iterate_cb(rgw_bucket& bucket, RGWObjEnt& ent, RGWAccessControlPolicy *bucket_policy, off_t start_ofs, off_t end_ofs)
-{
- rgw_obj part(bucket, ent.name);
-
- map<string, bufferlist> attrs;
-
- int ret = get_obj_attrs(store, s, part, attrs, NULL, NULL);
- if (ret < 0) {
- return ret;
- }
- map<string, bufferlist>::iterator iter = attrs.find(RGW_ATTR_ETAG);
- if (iter == attrs.end()) {
- return 0;
- }
- bufferlist& bl = iter->second;
- const char *buf = bl.c_str();
- int len = bl.length();
- while (len > 0 && buf[len - 1] == '\0') {
- len--;
- }
- if (len > 0) {
- user_manifest_parts_hash->Update((const byte *)bl.c_str(), len);
- }
-
- if (s->cct->_conf->subsys.should_gather(ceph_subsys_rgw, 20)) {
- string e(bl.c_str(), bl.length());
- ldout(s->cct, 20) << __func__ << ": appending user manifest etag: " << e << dendl;
- }
-
- return 0;
-}
-
static int put_data_and_throttle(RGWPutObjProcessor *processor, bufferlist& data, off_t ofs,
MD5 *hash, bool need_to_wait)
{
- const unsigned char *data_ptr = (hash ? (const unsigned char *)data.c_str() : NULL);
bool again;
- uint64_t len = data.length();
do {
void *handle;
- int ret = processor->handle_data(data, ofs, &handle, &again);
+ int ret = processor->handle_data(data, ofs, hash, &handle, &again);
if (ret < 0)
return ret;
- if (hash) {
- hash->Update(data_ptr, len);
- hash = NULL; /* only calculate hash once */
- }
-
ret = processor->throttle_data(handle, need_to_wait);
if (ret < 0)
return ret;
@@ -1719,6 +1675,7 @@ void RGWPutObj::execute()
}
if (need_calc_md5) {
+ processor->complete_hash(&hash);
hash.Final(m);
buf_to_hex(m, CEPH_CRYPTO_MD5_DIGESTSIZE, calc_md5);
@@ -1737,7 +1694,6 @@ void RGWPutObj::execute()
bufferlist manifest_bl;
string manifest_obj_prefix;
string manifest_bucket;
- RGWBucketInfo bucket_info;
char etag_buf[CEPH_CRYPTO_MD5_DIGESTSIZE];
char etag_buf_str[CEPH_CRYPTO_MD5_DIGESTSIZE * 2 + 16];
@@ -1755,16 +1711,6 @@ void RGWPutObj::execute()
manifest_bucket = prefix_str.substr(0, pos);
manifest_obj_prefix = prefix_str.substr(pos + 1);
- ret = store->get_bucket_info(NULL, manifest_bucket, bucket_info, NULL, NULL);
- if (ret < 0) {
- ldout(s->cct, 0) << "could not get bucket info for bucket=" << manifest_bucket << dendl;
- }
- ret = iterate_user_manifest_parts(s->cct, store, 0, -1, bucket_info.bucket, manifest_obj_prefix,
- NULL, NULL, put_obj_user_manifest_iterate_cb, (void *)this);
- if (ret < 0) {
- goto done;
- }
-
hash.Final((byte *)etag_buf);
buf_to_hex((const unsigned char *)etag_buf, CEPH_CRYPTO_MD5_DIGESTSIZE, etag_buf_str);
@@ -1940,10 +1886,14 @@ void RGWPutMetadata::execute()
/* no need to track object versioning, need it for bucket's data only */
RGWObjVersionTracker *ptracker = (s->object ? NULL : &s->bucket_info.objv_tracker);
- /* check if obj exists, read orig attrs */
- ret = get_obj_attrs(store, s, obj, orig_attrs, NULL, ptracker);
- if (ret < 0)
- return;
+ if (s->object) {
+ /* check if obj exists, read orig attrs */
+ ret = get_obj_attrs(store, s, obj, orig_attrs, NULL, ptracker);
+ if (ret < 0)
+ return;
+ } else {
+ orig_attrs = s->bucket_attrs;
+ }
/* only remove meta attrs */
for (iter = orig_attrs.begin(); iter != orig_attrs.end(); ++iter) {
@@ -2214,6 +2164,7 @@ void RGWCopyObj::execute()
replace_attrs,
attrs, RGW_OBJ_CATEGORY_MAIN,
&s->req_id, /* use req_id as tag */
+ &etag,
&s->err,
copy_obj_progress_cb, (void *)this
);
@@ -2277,7 +2228,6 @@ void RGWPutACLs::execute()
RGWAccessControlPolicy_S3 new_policy(s->cct);
stringstream ss;
char *new_data = NULL;
- ACLOwner owner;
rgw_obj obj;
ret = 0;
@@ -2287,8 +2237,10 @@ void RGWPutACLs::execute()
return;
}
- owner.set_id(s->user.user_id);
- owner.set_name(s->user.display_name);
+
+ RGWAccessControlPolicy *existing_policy = (s->object == NULL? s->bucket_acl : s->object_acl);
+
+ owner = existing_policy->get_owner();
ret = get_params();
if (ret < 0)
@@ -2536,7 +2488,7 @@ void RGWInitMultipart::execute()
do {
char buf[33];
gen_rand_alphanumeric(s->cct, buf, sizeof(buf) - 1);
- upload_id = "2/"; /* v2 upload id */
+ upload_id = MULTIPART_UPLOAD_ID_PREFIX; /* v2 upload id */
upload_id.append(buf);
string tmp_obj_name;
diff --git a/src/rgw/rgw_op.h b/src/rgw/rgw_op.h
index b141ed5..bd6f964 100644
--- a/src/rgw/rgw_op.h
+++ b/src/rgw/rgw_op.h
@@ -345,8 +345,6 @@ public:
RGWPutObjProcessor *select_processor(bool *is_multipart);
void dispose_processor(RGWPutObjProcessor *processor);
- int user_manifest_iterate_cb(rgw_bucket& bucket, RGWObjEnt& ent, RGWAccessControlPolicy *bucket_policy, off_t start_ofs, off_t end_ofs);
-
int verify_permission();
void pre_exec();
void execute();
@@ -490,6 +488,7 @@ protected:
string source_zone;
string client_id;
string op_id;
+ string etag;
off_t last_ofs;
@@ -556,6 +555,7 @@ protected:
int ret;
size_t len;
char *data;
+ ACLOwner owner;
public:
RGWPutACLs() {
diff --git a/src/rgw/rgw_rados.cc b/src/rgw/rgw_rados.cc
index e22bef0..52f8a70 100644
--- a/src/rgw/rgw_rados.cc
+++ b/src/rgw/rgw_rados.cc
@@ -876,6 +876,11 @@ int RGWPutObjProcessor::complete(string& etag, time_t *mtime, time_t set_mtime,
return 0;
}
+CephContext *RGWPutObjProcessor::ctx()
+{
+ return store->ctx();
+}
+
RGWPutObjProcessor::~RGWPutObjProcessor()
{
if (is_complete)
@@ -900,8 +905,10 @@ int RGWPutObjProcessor_Plain::prepare(RGWRados *store, void *obj_ctx, string *oi
return 0;
};
-int RGWPutObjProcessor_Plain::handle_data(bufferlist& bl, off_t _ofs, void **phandle, bool *again)
+int RGWPutObjProcessor_Plain::handle_data(bufferlist& bl, off_t _ofs, MD5 *hash, void **phandle, bool *again)
{
+ assert(!hash);
+
*again = false;
if (ofs != _ofs)
@@ -1028,7 +1035,7 @@ int RGWPutObjProcessor_Atomic::write_data(bufferlist& bl, off_t ofs, void **phan
return RGWPutObjProcessor_Aio::handle_obj_data(cur_obj, bl, ofs - cur_part_ofs, ofs, phandle, exclusive);
}
-int RGWPutObjProcessor_Atomic::handle_data(bufferlist& bl, off_t ofs, void **phandle, bool *again)
+int RGWPutObjProcessor_Atomic::handle_data(bufferlist& bl, off_t ofs, MD5 *hash, void **phandle, bool *again)
{
*again = false;
@@ -1062,7 +1069,10 @@ int RGWPutObjProcessor_Atomic::handle_data(bufferlist& bl, off_t ofs, void **pha
if (!data_ofs && !immutable_head()) {
first_chunk.claim(bl);
obj_len = (uint64_t)first_chunk.length();
- int r = prepare_next_part(first_chunk.length());
+ if (hash) {
+ hash->Update((const byte *)first_chunk.c_str(), obj_len);
+ }
+ int r = prepare_next_part(obj_len);
if (r < 0) {
return r;
}
@@ -1074,7 +1084,19 @@ int RGWPutObjProcessor_Atomic::handle_data(bufferlist& bl, off_t ofs, void **pha
bool exclusive = (!write_ofs && immutable_head()); /* immutable head object, need to verify nothing exists there
we could be racing with another upload, to the same
object and cleanup can be messy */
- return write_data(bl, write_ofs, phandle, exclusive);
+ int ret = write_data(bl, write_ofs, phandle, exclusive);
+ if (ret >= 0) { /* we might return, need to clear bl as it was already sent */
+ if (hash) {
+ hash->Update((const byte *)bl.c_str(), bl.length());
+ }
+ bl.clear();
+ }
+ return ret;
+}
+
+void RGWPutObjProcessor_Atomic::complete_hash(MD5 *hash)
+{
+ hash->Update((const byte *)pending_data_bl.c_str(), pending_data_bl.length());
}
@@ -3019,7 +3041,7 @@ public:
do {
void *handle;
- int ret = processor->handle_data(bl, ofs, &handle, &again);
+ int ret = processor->handle_data(bl, ofs, NULL, &handle, &again);
if (ret < 0)
return ret;
@@ -3029,6 +3051,11 @@ public:
*/
ret = opstate->renew_state();
if (ret < 0) {
+ ldout(processor->ctx(), 0) << "ERROR: RGWRadosPutObj::handle_data(): failed to renew op state ret=" << ret << dendl;
+ int r = processor->throttle_data(handle, false);
+ if (r < 0) {
+ ldout(processor->ctx(), 0) << "ERROR: RGWRadosPutObj::handle_data(): processor->throttle_data() returned " << r << dendl;
+ }
/* could not renew state! might have been marked as cancelled */
return ret;
}
@@ -3114,6 +3141,7 @@ int RGWRados::copy_obj(void *ctx,
map<string, bufferlist>& attrs,
RGWObjCategory category,
string *ptag,
+ string *petag,
struct rgw_err *err,
void (*progress_cb)(off_t, void *),
void *progress_data)
@@ -3210,6 +3238,10 @@ int RGWRados::copy_obj(void *ctx,
if (ret < 0)
goto set_err_state;
+ if (petag) {
+ *petag = etag;
+ }
+
{ /* opening scope so that we can do goto, sorry */
bufferlist& extra_data_bl = processor.get_extra_data();
if (extra_data_bl.length()) {
@@ -3275,6 +3307,10 @@ set_err_state:
if (ret < 0)
return ret;
+ if (petag) {
+ *petag = etag;
+ }
+
return 0;
}
@@ -3286,7 +3322,7 @@ set_err_state:
return ret;
}
- bool copy_data = !astate->has_manifest;
+ bool copy_data = !astate->has_manifest || (src_obj.bucket.data_pool != dest_obj.bucket.data_pool);
bool copy_first = false;
if (astate->has_manifest) {
if (!astate->manifest.has_tail()) {
@@ -3304,7 +3340,7 @@ set_err_state:
}
if (copy_data) { /* refcounting tail wouldn't work here, just copy the data */
- return copy_obj_data(ctx, dest_bucket_info.owner, &handle, end, dest_obj, src_obj, max_chunk_size, mtime, src_attrs, category, ptag, err);
+ return copy_obj_data(ctx, dest_bucket_info.owner, &handle, end, dest_obj, src_obj, max_chunk_size, mtime, src_attrs, category, ptag, petag, err);
}
RGWObjManifest::obj_iterator miter = astate->manifest.obj_begin();
@@ -3383,6 +3419,14 @@ set_err_state:
if (mtime)
obj_stat(ctx, dest_obj, NULL, mtime, NULL, NULL, NULL, NULL);
+ if (petag) {
+ map<string, bufferlist>::iterator iter = src_attrs.find(RGW_ATTR_ETAG);
+ if (iter != src_attrs.end()) {
+ bufferlist& etagbl = iter->second;
+ *petag = string(etagbl.c_str(), etagbl.length());
+ }
+ }
+
return 0;
done_ret:
@@ -3419,23 +3463,23 @@ int RGWRados::copy_obj_data(void *ctx,
map<string, bufferlist>& attrs,
RGWObjCategory category,
string *ptag,
+ string *petag,
struct rgw_err *err)
{
bufferlist first_chunk;
RGWObjManifest manifest;
map<uint64_t, RGWObjManifestPart> objs;
- RGWObjManifestPart *first_part;
- map<string, bufferlist>::iterator iter;
- rgw_obj shadow_obj = dest_obj;
- string shadow_oid;
+ string tag;
+ append_rand_alpha(cct, tag, tag, 32);
- append_rand_alpha(cct, dest_obj.object, shadow_oid, 32);
- shadow_obj.init_ns(dest_obj.bucket, shadow_oid, shadow_ns);
+ RGWPutObjProcessor_Atomic processor(owner, dest_obj.bucket, dest_obj.object,
+ cct->_conf->rgw_obj_stripe_size, tag);
+ int ret = processor.prepare(this, ctx, NULL);
+ if (ret < 0)
+ return ret;
- int ret, r;
off_t ofs = 0;
- PutObjMetaExtraParams ep;
do {
bufferlist bl;
@@ -3443,55 +3487,40 @@ int RGWRados::copy_obj_data(void *ctx,
if (ret < 0)
return ret;
- const char *data = bl.c_str();
+ uint64_t read_len = ret;
+ bool again;
- if ((uint64_t)ofs < max_chunk_size) {
- uint64_t len = min(max_chunk_size - ofs, (uint64_t)ret);
- first_chunk.append(data, len);
- ofs += len;
- ret -= len;
- data += len;
- }
+ do {
+ void *handle;
- // In the first call to put_obj_data, we pass ofs == -1 so that it will do
- // a write_full, wiping out whatever was in the object before this
- r = 0;
- if (ret > 0) {
- r = put_obj_data(ctx, shadow_obj, data, ((ofs == 0) ? -1 : ofs), ret, false);
- }
- if (r < 0)
- goto done_err;
+ ret = processor.handle_data(bl, ofs, NULL, &handle, &again);
+ if (ret < 0) {
+ return ret;
+ }
+ ret = processor.throttle_data(handle, false);
+ if (ret < 0)
+ return ret;
+ } while (again);
- ofs += ret;
+ ofs += read_len;
} while (ofs <= end);
- first_part = &objs[0];
- first_part->loc = dest_obj;
- first_part->loc_ofs = 0;
- first_part->size = first_chunk.length();
-
- if ((uint64_t)ofs > max_chunk_size) {
- RGWObjManifestPart& tail = objs[max_chunk_size];
- tail.loc = shadow_obj;
- tail.loc_ofs = max_chunk_size;
- tail.size = ofs - max_chunk_size;
+ string etag;
+ map<string, bufferlist>::iterator iter = attrs.find(RGW_ATTR_ETAG);
+ if (iter != attrs.end()) {
+ bufferlist& bl = iter->second;
+ etag = string(bl.c_str(), bl.length());
+ if (petag) {
+ *petag = etag;
+ }
}
- manifest.set_explicit(ofs, objs);
-
- ep.data = &first_chunk;
- ep.manifest = &manifest;
- ep.ptag = ptag;
- ep.owner = owner;
+ ret = processor.complete(etag, NULL, 0, attrs);
- ret = put_obj_meta(ctx, dest_obj, end + 1, attrs, category, PUT_OBJ_CREATE, ep);
if (mtime)
obj_stat(ctx, dest_obj, NULL, mtime, NULL, NULL, NULL, NULL);
return ret;
-done_err:
- delete_obj(ctx, owner, shadow_obj);
- return r;
}
/**
@@ -4132,7 +4161,33 @@ int RGWRados::set_attrs(void *ctx, rgw_obj& obj,
if (!op.size())
return 0;
+ string tag;
+ if (state) {
+ r = prepare_update_index(state, bucket, CLS_RGW_OP_ADD, obj, tag);
+ if (r < 0)
+ return r;
+ }
+
r = ref.ioctx.operate(ref.oid, &op);
+ if (state) {
+ if (r >= 0) {
+ bufferlist acl_bl = attrs[RGW_ATTR_ACL];
+ bufferlist etag_bl = attrs[RGW_ATTR_ETAG];
+ bufferlist content_type_bl = attrs[RGW_ATTR_CONTENT_TYPE];
+ string etag(etag_bl.c_str(), etag_bl.length());
+ string content_type(content_type_bl.c_str(), content_type_bl.length());
+ uint64_t epoch = ref.ioctx.get_last_version();
+ int64_t poolid = ref.ioctx.get_id();
+ utime_t mtime = ceph_clock_now(cct);
+ r = complete_update_index(bucket, obj.object, tag, poolid, epoch, state->size,
+ mtime, etag, content_type, &acl_bl, RGW_OBJ_CATEGORY_MAIN, NULL);
+ } else {
+ int ret = complete_update_index_cancel(bucket, obj.object, tag);
+ if (ret < 0) {
+ ldout(cct, 0) << "ERROR: comlete_update_index_cancel() returned r=" << r << dendl;
+ }
+ }
+ }
if (r < 0)
return r;
@@ -4636,7 +4691,7 @@ int RGWRados::get_obj(void *ctx, RGWObjVersionTracker *objv_tracker, void **hand
bl.append(read_bl);
done:
- if (bl.length() > 0) {
+ if (r >= 0) {
r = bl.length();
}
if (r < 0 || !len || ((off_t)(ofs + len - 1) == end)) {
diff --git a/src/rgw/rgw_rados.h b/src/rgw/rgw_rados.h
index d811b49..ff161b8 100644
--- a/src/rgw/rgw_rados.h
+++ b/src/rgw/rgw_rados.h
@@ -298,6 +298,11 @@ public:
bool has_tail() {
if (explicit_objs) {
+ if (objs.size() == 1) {
+ map<uint64_t, RGWObjManifestPart>::iterator iter = objs.begin();
+ rgw_obj& obj = iter->second.loc;
+ return head_obj.object != obj.object;
+ }
return (objs.size() >= 2);
}
return (obj_size > head_size);
@@ -548,9 +553,14 @@ public:
obj_ctx = _o;
return 0;
};
- virtual int handle_data(bufferlist& bl, off_t ofs, void **phandle, bool *again) = 0;
+ virtual int handle_data(bufferlist& bl, off_t ofs, MD5 *hash, void **phandle, bool *again) = 0;
virtual int throttle_data(void *handle, bool need_to_wait) = 0;
+ virtual void complete_hash(MD5 *hash) {
+ assert(0);
+ }
virtual int complete(string& etag, time_t *mtime, time_t set_mtime, map<string, bufferlist>& attrs);
+
+ CephContext *ctx();
};
class RGWPutObjProcessor_Plain : public RGWPutObjProcessor
@@ -564,7 +574,7 @@ class RGWPutObjProcessor_Plain : public RGWPutObjProcessor
protected:
int prepare(RGWRados *store, void *obj_ctx, string *oid_rand);
- int handle_data(bufferlist& bl, off_t ofs, void **phandle, bool *again);
+ int handle_data(bufferlist& bl, off_t ofs, MD5 *hash /* NULL expected */, void **phandle, bool *again);
int do_complete(string& etag, time_t *mtime, time_t set_mtime, map<string, bufferlist>& attrs);
public:
@@ -654,7 +664,8 @@ public:
void set_extra_data_len(uint64_t len) {
extra_data_len = len;
}
- virtual int handle_data(bufferlist& bl, off_t ofs, void **phandle, bool *again);
+ virtual int handle_data(bufferlist& bl, off_t ofs, MD5 *hash, void **phandle, bool *again);
+ virtual void complete_hash(MD5 *hash);
bufferlist& get_extra_data() { return extra_data_bl; }
};
@@ -1555,6 +1566,7 @@ public:
map<std::string, bufferlist>& attrs,
RGWObjCategory category,
string *ptag,
+ string *petag,
struct rgw_err *err,
void (*progress_cb)(off_t, void *),
void *progress_data);
@@ -1569,6 +1581,7 @@ public:
map<string, bufferlist>& attrs,
RGWObjCategory category,
string *ptag,
+ string *petag,
struct rgw_err *err);
/**
* Delete a bucket.
diff --git a/src/rgw/rgw_rest.cc b/src/rgw/rgw_rest.cc
index b74002d..59026fb 100644
--- a/src/rgw/rgw_rest.cc
+++ b/src/rgw/rgw_rest.cc
@@ -244,7 +244,7 @@ void dump_content_length(struct req_state *s, uint64_t len)
if (r < 0) {
ldout(s->cct, 0) << "ERROR: s->cio->print() returned err=" << r << dendl;
}
- r = s->cio->print("Accept-Ranges: %s\n", "bytes");
+ r = s->cio->print("Accept-Ranges: %s\r\n", "bytes");
if (r < 0) {
ldout(s->cct, 0) << "ERROR: s->cio->print() returned err=" << r << dendl;
}
@@ -254,9 +254,9 @@ void dump_etag(struct req_state *s, const char *etag)
{
int r;
if (s->prot_flags & RGW_REST_SWIFT)
- r = s->cio->print("etag: %s\n", etag);
+ r = s->cio->print("etag: %s\r\n", etag);
else
- r = s->cio->print("ETag: \"%s\"\n", etag);
+ r = s->cio->print("ETag: \"%s\"\r\n", etag);
if (r < 0) {
ldout(s->cct, 0) << "ERROR: s->cio->print() returned err=" << r << dendl;
}
@@ -265,7 +265,7 @@ void dump_etag(struct req_state *s, const char *etag)
void dump_pair(struct req_state *s, const char *key, const char *value)
{
if ( (strlen(key) > 0) && (strlen(value) > 0))
- s->cio->print("%s: %s\n", key, value);
+ s->cio->print("%s: %s\r\n", key, value);
}
void dump_bucket_from_state(struct req_state *s)
@@ -273,16 +273,10 @@ void dump_bucket_from_state(struct req_state *s)
int expose_bucket = g_conf->rgw_expose_bucket;
if (expose_bucket) {
if (!s->bucket_name_str.empty())
- s->cio->print("Bucket: \"%s\"\n", s->bucket_name_str.c_str());
+ s->cio->print("Bucket: \"%s\"\r\n", s->bucket_name_str.c_str());
}
}
-void dump_object_from_state(struct req_state *s)
-{
- if (!s->object_str.empty())
- s->cio->print("Key: \"%s\"\n", s->object_str.c_str());
-}
-
void dump_uri_from_state(struct req_state *s)
{
if (strcmp(s->info.request_uri.c_str(), "/") == 0) {
@@ -296,12 +290,12 @@ void dump_uri_from_state(struct req_state *s)
location += "/";
if (!s->object_str.empty()) {
location += s->object_str;
- s->cio->print("Location: %s\n", location.c_str());
+ s->cio->print("Location: %s\r\n", location.c_str());
}
}
}
else {
- s->cio->print("Location: \"%s\"\n", s->info.request_uri.c_str());
+ s->cio->print("Location: \"%s\"\r\n", s->info.request_uri.c_str());
}
}
@@ -310,7 +304,7 @@ void dump_redirect(struct req_state *s, const string& redirect)
if (redirect.empty())
return;
- s->cio->print("Location: %s\n", redirect.c_str());
+ s->cio->print("Location: %s\r\n", redirect.c_str());
}
static void dump_time_header(struct req_state *s, const char *name, time_t t)
@@ -325,7 +319,7 @@ static void dump_time_header(struct req_state *s, const char *name, time_t t)
if (strftime(timestr, sizeof(timestr), "%a, %d %b %Y %H:%M:%S %Z", tmp) == 0)
return;
- int r = s->cio->print("%s: %s\n", name, timestr);
+ int r = s->cio->print("%s: %s\r\n", name, timestr);
if (r < 0) {
ldout(s->cct, 0) << "ERROR: s->cio->print() returned err=" << r << dendl;
}
@@ -341,7 +335,7 @@ void dump_epoch_header(struct req_state *s, const char *name, time_t t)
char buf[32];
snprintf(buf, sizeof(buf), "%lld", (long long)t);
- int r = s->cio->print("%s: %s\n", name, buf);
+ int r = s->cio->print("%s: %s\r\n", name, buf);
if (r < 0) {
ldout(s->cct, 0) << "ERROR: s->cio->print() returned err=" << r << dendl;
}
@@ -374,16 +368,16 @@ void dump_owner(struct req_state *s, string& id, string& name, const char *secti
void dump_access_control(struct req_state *s, const char *origin, const char *meth,
const char *hdr, const char *exp_hdr, uint32_t max_age) {
if (origin && (origin[0] != '\0')) {
- s->cio->print("Access-Control-Allow-Origin: %s\n", origin);
+ s->cio->print("Access-Control-Allow-Origin: %s\r\n", origin);
if (meth && (meth[0] != '\0'))
- s->cio->print("Access-Control-Allow-Methods: %s\n", meth);
+ s->cio->print("Access-Control-Allow-Methods: %s\r\n", meth);
if (hdr && (hdr[0] != '\0'))
- s->cio->print("Access-Control-Allow-Headers: %s\n", hdr);
+ s->cio->print("Access-Control-Allow-Headers: %s\r\n", hdr);
if (exp_hdr && (exp_hdr[0] != '\0')) {
- s->cio->print("Access-Control-Expose-Headers: %s\n", exp_hdr);
+ s->cio->print("Access-Control-Expose-Headers: %s\r\n", exp_hdr);
}
if (max_age != CORS_MAX_AGE_INVALID) {
- s->cio->print("Access-Control-Max-Age: %d\n", max_age);
+ s->cio->print("Access-Control-Max-Age: %d\r\n", max_age);
}
}
}
@@ -483,7 +477,7 @@ void dump_range(struct req_state *s, uint64_t ofs, uint64_t end, uint64_t total)
/* dumping range into temp buffer first, as libfcgi will fail to digest %lld */
snprintf(range_buf, sizeof(range_buf), "%lld-%lld/%lld", (long long)ofs, (long long)end, (long long)total);
- int r = s->cio->print("Content-Range: bytes %s\n", range_buf);
+ int r = s->cio->print("Content-Range: bytes %s\r\n", range_buf);
if (r < 0) {
ldout(s->cct, 0) << "ERROR: s->cio->print() returned err=" << r << dendl;
}
diff --git a/src/rgw/rgw_rest.h b/src/rgw/rgw_rest.h
index 38ffd8c..d42ec8d 100644
--- a/src/rgw/rgw_rest.h
+++ b/src/rgw/rgw_rest.h
@@ -369,7 +369,6 @@ extern void dump_continue(struct req_state *s);
extern void list_all_buckets_end(struct req_state *s);
extern void dump_time(struct req_state *s, const char *name, time_t *t);
extern void dump_bucket_from_state(struct req_state *s);
-extern void dump_object_from_state(struct req_state *s);
extern void dump_uri_from_state(struct req_state *s);
extern void dump_redirect(struct req_state *s, const string& redirect);
extern void dump_pair(struct req_state *s, const char *key, const char *value);
diff --git a/src/rgw/rgw_rest_s3.cc b/src/rgw/rgw_rest_s3.cc
index c7961f4..6fcecf7 100644
--- a/src/rgw/rgw_rest_s3.cc
+++ b/src/rgw/rgw_rest_s3.cc
@@ -161,7 +161,7 @@ done:
dump_errno(s);
for (riter = response_attrs.begin(); riter != response_attrs.end(); ++riter) {
- s->cio->print("%s: %s\n", riter->first.c_str(), riter->second.c_str());
+ s->cio->print("%s: %s\r\n", riter->first.c_str(), riter->second.c_str());
}
if (!content_type)
@@ -303,9 +303,9 @@ static void dump_bucket_metadata(struct req_state *s, RGWBucketEnt& bucket)
{
char buf[32];
snprintf(buf, sizeof(buf), "%lld", (long long)bucket.count);
- s->cio->print("X-RGW-Object-Count: %s\n", buf);
+ s->cio->print("X-RGW-Object-Count: %s\r\n", buf);
snprintf(buf, sizeof(buf), "%lld", (long long)bucket.size);
- s->cio->print("X-RGW-Bytes-Used: %s\n", buf);
+ s->cio->print("X-RGW-Bytes-Used: %s\r\n", buf);
}
void RGWStatBucket_ObjStore_S3::send_response()
@@ -321,16 +321,16 @@ void RGWStatBucket_ObjStore_S3::send_response()
dump_start(s);
}
-static int create_s3_policy(struct req_state *s, RGWRados *store, RGWAccessControlPolicy_S3& s3policy)
+static int create_s3_policy(struct req_state *s, RGWRados *store, RGWAccessControlPolicy_S3& s3policy, ACLOwner& owner)
{
if (s->has_acl_header) {
if (!s->canned_acl.empty())
return -ERR_INVALID_REQUEST;
- return s3policy.create_from_headers(store, s->info.env, s->owner);
+ return s3policy.create_from_headers(store, s->info.env, owner);
}
- return s3policy.create_canned(s->owner, s->bucket_owner, s->canned_acl);
+ return s3policy.create_canned(owner, s->bucket_owner, s->canned_acl);
}
class RGWLocationConstraint : public XMLObj
@@ -386,7 +386,7 @@ int RGWCreateBucket_ObjStore_S3::get_params()
{
RGWAccessControlPolicy_S3 s3policy(s->cct);
- int r = create_s3_policy(s, store, s3policy);
+ int r = create_s3_policy(s, store, s3policy, s->owner);
if (r < 0)
return r;
@@ -487,7 +487,7 @@ int RGWPutObj_ObjStore_S3::get_params()
if (!s->length)
return -ERR_LENGTH_REQUIRED;
- int r = create_s3_policy(s, store, s3policy);
+ int r = create_s3_policy(s, store, s3policy, s->owner);
if (r < 0)
return r;
@@ -1198,7 +1198,7 @@ int RGWCopyObj_ObjStore_S3::init_dest_policy()
RGWAccessControlPolicy_S3 s3policy(s->cct);
/* build a policy for the target object */
- int r = create_s3_policy(s, store, s3policy);
+ int r = create_s3_policy(s, store, s3policy, s->owner);
if (r < 0)
return r;
@@ -1264,7 +1264,7 @@ void RGWCopyObj_ObjStore_S3::send_partial_response(off_t ofs)
set_req_state_err(s, ret);
dump_errno(s);
- end_header(s, this, "binary/octet-stream");
+ end_header(s, this, "application/xml");
if (ret == 0) {
s->formatter->open_object_section("CopyObjectResult");
}
@@ -1285,13 +1285,8 @@ void RGWCopyObj_ObjStore_S3::send_response()
if (ret == 0) {
dump_time(s, "LastModified", &mtime);
- map<string, bufferlist>::iterator iter = attrs.find(RGW_ATTR_ETAG);
- if (iter != attrs.end()) {
- bufferlist& bl = iter->second;
- if (bl.length()) {
- char *etag = bl.c_str();
- s->formatter->dump_string("ETag", etag);
- }
+ if (!etag.empty()) {
+ s->formatter->dump_string("ETag", etag);
}
s->formatter->close_section();
rgw_flush_formatter_and_reset(s, s->formatter);
@@ -1318,7 +1313,7 @@ int RGWPutACLs_ObjStore_S3::get_policy_from_state(RGWRados *store, struct req_st
s->canned_acl.clear();
}
- int r = create_s3_policy(s, store, s3policy);
+ int r = create_s3_policy(s, store, s3policy, owner);
if (r < 0)
return r;
@@ -1460,7 +1455,7 @@ void RGWOptionsCORS_ObjStore_S3::send_response()
int RGWInitMultipart_ObjStore_S3::get_params()
{
RGWAccessControlPolicy_S3 s3policy(s->cct);
- ret = create_s3_policy(s, store, s3policy);
+ ret = create_s3_policy(s, store, s3policy, s->owner);
if (ret < 0)
return ret;
@@ -2047,6 +2042,12 @@ int RGW_Auth_S3_Keystone_ValidateToken::validate_s3token(const string& auth_id,
return 0;
}
+static void init_anon_user(struct req_state *s)
+{
+ rgw_get_anon_user(s->user);
+ s->perm_mask = RGW_PERM_FULL_CONTROL;
+}
+
/*
* verify that a signed request comes from the keyholder
* by checking the signature against our locally-computed version
@@ -2067,6 +2068,11 @@ int RGW_Auth_S3::authorize(RGWRados *store, struct req_state *s)
return -EPERM;
}
+ if (s->op == OP_OPTIONS) {
+ init_anon_user(s);
+ return 0;
+ }
+
if (!s->http_auth || !(*s->http_auth)) {
auth_id = s->info.args.get("AWSAccessKeyId");
if (auth_id.size()) {
@@ -2080,8 +2086,7 @@ int RGW_Auth_S3::authorize(RGWRados *store, struct req_state *s)
qsr = true;
} else {
/* anonymous access */
- rgw_get_anon_user(s->user);
- s->perm_mask = RGW_PERM_FULL_CONTROL;
+ init_anon_user(s);
return 0;
}
} else {
diff --git a/src/rgw/rgw_rest_swift.cc b/src/rgw/rgw_rest_swift.cc
index b562079..36544db 100644
--- a/src/rgw/rgw_rest_swift.cc
+++ b/src/rgw/rgw_rest_swift.cc
@@ -223,11 +223,11 @@ static void dump_container_metadata(struct req_state *s, RGWBucketEnt& bucket)
{
char buf[32];
snprintf(buf, sizeof(buf), "%lld", (long long)bucket.count);
- s->cio->print("X-Container-Object-Count: %s\n", buf);
+ s->cio->print("X-Container-Object-Count: %s\r\n", buf);
snprintf(buf, sizeof(buf), "%lld", (long long)bucket.size);
- s->cio->print("X-Container-Bytes-Used: %s\n", buf);
+ s->cio->print("X-Container-Bytes-Used: %s\r\n", buf);
snprintf(buf, sizeof(buf), "%lld", (long long)bucket.size_rounded);
- s->cio->print("X-Container-Bytes-Used-Actual: %s\n", buf);
+ s->cio->print("X-Container-Bytes-Used-Actual: %s\r\n", buf);
if (!s->object) {
RGWAccessControlPolicy_SWIFT *swift_policy = static_cast<RGWAccessControlPolicy_SWIFT *>(s->bucket_acl);
@@ -247,13 +247,13 @@ static void dump_account_metadata(struct req_state *s, uint32_t buckets_count,
{
char buf[32];
snprintf(buf, sizeof(buf), "%lld", (long long)buckets_count);
- s->cio->print("X-Account-Container-Count: %s\n", buf);
+ s->cio->print("X-Account-Container-Count: %s\r\n", buf);
snprintf(buf, sizeof(buf), "%lld", (long long)buckets_object_count);
- s->cio->print("X-Account-Object-Count: %s\n", buf);
+ s->cio->print("X-Account-Object-Count: %s\r\n", buf);
snprintf(buf, sizeof(buf), "%lld", (long long)buckets_size);
- s->cio->print("X-Account-Bytes-Used: %s\n", buf);
+ s->cio->print("X-Account-Bytes-Used: %s\r\n", buf);
snprintf(buf, sizeof(buf), "%lld", (long long)buckets_size_rounded);
- s->cio->print("X-Account-Bytes-Used-Actual: %s\n", buf);
+ s->cio->print("X-Account-Bytes-Used-Actual: %s\r\n", buf);
}
void RGWStatAccount_ObjStore_SWIFT::send_response()
@@ -552,7 +552,6 @@ void RGWCopyObj_ObjStore_SWIFT::send_response()
int RGWGetObj_ObjStore_SWIFT::send_response_data(bufferlist& bl, off_t bl_ofs, off_t bl_len)
{
const char *content_type = NULL;
- int orig_ret = ret;
map<string, string> response_attrs;
map<string, string>::iterator riter;
@@ -593,15 +592,11 @@ int RGWGetObj_ObjStore_SWIFT::send_response_data(bufferlist& bl, off_t bl_ofs, o
}
}
- if (partial_content && !ret)
- ret = -STATUS_PARTIAL_CONTENT;
-
- if (ret)
- set_req_state_err(s, ret);
+ set_req_state_err(s, (partial_content && !ret) ? STATUS_PARTIAL_CONTENT : ret);
dump_errno(s);
for (riter = response_attrs.begin(); riter != response_attrs.end(); ++riter) {
- s->cio->print("%s: %s\n", riter->first.c_str(), riter->second.c_str());
+ s->cio->print("%s: %s\r\n", riter->first.c_str(), riter->second.c_str());
}
if (!content_type)
@@ -611,7 +606,7 @@ int RGWGetObj_ObjStore_SWIFT::send_response_data(bufferlist& bl, off_t bl_ofs, o
sent_header = true;
send_data:
- if (get_data && !orig_ret) {
+ if (get_data && !ret) {
int r = s->cio->write(bl.c_str() + bl_ofs, bl_len);
if (r < 0)
return r;
@@ -776,7 +771,8 @@ RGWOp *RGWHandler_ObjStore_Obj_SWIFT::op_options()
int RGWHandler_ObjStore_SWIFT::authorize()
{
- if (!s->os_auth_token && s->info.args.get("temp_url_sig").empty()) {
+ if ((!s->os_auth_token && s->info.args.get("temp_url_sig").empty()) ||
+ (s->op == OP_OPTIONS)) {
/* anonymous access */
rgw_get_anon_user(s->user);
s->perm_mask = RGW_PERM_FULL_CONTROL;
@@ -787,8 +783,6 @@ int RGWHandler_ObjStore_SWIFT::authorize()
if (!authorized)
return -EPERM;
- s->perm_mask = RGW_PERM_FULL_CONTROL;
-
return 0;
}
diff --git a/src/rgw/rgw_swift.cc b/src/rgw/rgw_swift.cc
index d9654a7..46c45bd 100644
--- a/src/rgw/rgw_swift.cc
+++ b/src/rgw/rgw_swift.cc
@@ -505,6 +505,8 @@ int RGWSwift::validate_keystone_token(RGWRados *store, const string& token, stru
validate.append_header("X-Auth-Token", admin_token);
+ validate.set_send_length(0);
+
int ret = validate.process(url.c_str());
if (ret < 0)
return ret;
@@ -609,13 +611,41 @@ int authenticate_temp_url(RGWRados *store, req_state *s)
bool RGWSwift::verify_swift_token(RGWRados *store, req_state *s)
{
+ if (!do_verify_swift_token(store, s)) {
+ return false;
+ }
+
+ if (!s->swift_user.empty()) {
+ string subuser;
+ ssize_t pos = s->swift_user.find(':');
+ if (pos < 0) {
+ subuser = s->swift_user;
+ } else {
+ subuser = s->swift_user.substr(pos + 1);
+ }
+ s->perm_mask = 0;
+ map<string, RGWSubUser>::iterator iter = s->user.subusers.find(subuser);
+ if (iter != s->user.subusers.end()) {
+ RGWSubUser& subuser = iter->second;
+ s->perm_mask = subuser.perm_mask;
+ }
+ } else {
+ s->perm_mask = RGW_PERM_FULL_CONTROL;
+ }
+
+ return true;
+
+}
+
+bool RGWSwift::do_verify_swift_token(RGWRados *store, req_state *s)
+{
if (!s->os_auth_token) {
int ret = authenticate_temp_url(store, s);
return (ret >= 0);
}
if (strncmp(s->os_auth_token, "AUTH_rgwtk", 10) == 0) {
- int ret = rgw_swift_verify_signed_token(s->cct, store, s->os_auth_token, s->user);
+ int ret = rgw_swift_verify_signed_token(s->cct, store, s->os_auth_token, s->user, &s->swift_user);
if (ret < 0)
return false;
diff --git a/src/rgw/rgw_swift.h b/src/rgw/rgw_swift.h
index 97347e8..300b5eb 100644
--- a/src/rgw/rgw_swift.h
+++ b/src/rgw/rgw_swift.h
@@ -53,6 +53,7 @@ class RGWSwift {
bool supports_keystone() {
return !cct->_conf->rgw_keystone_url.empty();
}
+ bool do_verify_swift_token(RGWRados *store, req_state *s);
protected:
int check_revoked();
public:
diff --git a/src/rgw/rgw_swift_auth.cc b/src/rgw/rgw_swift_auth.cc
index 9c800c4..1384bb0 100644
--- a/src/rgw/rgw_swift_auth.cc
+++ b/src/rgw/rgw_swift_auth.cc
@@ -56,7 +56,7 @@ static int encode_token(CephContext *cct, string& swift_user, string& key, buffe
return ret;
}
-int rgw_swift_verify_signed_token(CephContext *cct, RGWRados *store, const char *token, RGWUserInfo& info)
+int rgw_swift_verify_signed_token(CephContext *cct, RGWRados *store, const char *token, RGWUserInfo& info, string *pswift_user)
{
if (strncmp(token, "AUTH_rgwtk", 10) != 0)
return -EINVAL;
@@ -123,6 +123,7 @@ int rgw_swift_verify_signed_token(CephContext *cct, RGWRados *store, const char
dout(0) << "NOTICE: tokens mismatch tok=" << buf << dendl;
return -EPERM;
}
+ *pswift_user = swift_user;
return 0;
}
@@ -205,7 +206,7 @@ void RGW_SWIFT_Auth_Get::execute()
tenant_path.append(g_conf->rgw_swift_tenant_name);
}
- s->cio->print("X-Storage-Url: %s/%s/v1%s\n", swift_url.c_str(),
+ s->cio->print("X-Storage-Url: %s/%s/v1%s\r\n", swift_url.c_str(),
swift_prefix.c_str(), tenant_path.c_str());
if ((ret = encode_token(s->cct, swift_key->id, swift_key->key, bl)) < 0)
@@ -215,8 +216,8 @@ void RGW_SWIFT_Auth_Get::execute()
char buf[bl.length() * 2 + 1];
buf_to_hex((const unsigned char *)bl.c_str(), bl.length(), buf);
- s->cio->print("X-Storage-Token: AUTH_rgwtk%s\n", buf);
- s->cio->print("X-Auth-Token: AUTH_rgwtk%s\n", buf);
+ s->cio->print("X-Storage-Token: AUTH_rgwtk%s\r\n", buf);
+ s->cio->print("X-Auth-Token: AUTH_rgwtk%s\r\n", buf);
}
ret = STATUS_NO_CONTENT;
diff --git a/src/rgw/rgw_swift_auth.h b/src/rgw/rgw_swift_auth.h
index 6d50d94..61050d8 100644
--- a/src/rgw/rgw_swift_auth.h
+++ b/src/rgw/rgw_swift_auth.h
@@ -6,7 +6,7 @@
#define RGW_SWIFT_TOKEN_EXPIRATION (15 * 60)
-extern int rgw_swift_verify_signed_token(CephContext *cct, RGWRados *store, const char *token, RGWUserInfo& info);
+extern int rgw_swift_verify_signed_token(CephContext *cct, RGWRados *store, const char *token, RGWUserInfo& info, string *pswift_user);
class RGW_SWIFT_Auth_Get : public RGWOp {
public:
diff --git a/src/rgw/rgw_user.cc b/src/rgw/rgw_user.cc
index 23575d8..55d1d6a 100644
--- a/src/rgw/rgw_user.cc
+++ b/src/rgw/rgw_user.cc
@@ -666,15 +666,6 @@ bool RGWAccessKeyPool::check_existing_key(RGWUserAdminOpState& op_state)
switch (key_type) {
case KEY_TYPE_SWIFT:
- kiter = swift_keys->find(kid);
-
- existing_key = (kiter != swift_keys->end());
- if (existing_key)
- break;
-
- if (swift_kid.empty())
- return false;
-
kiter = swift_keys->find(swift_kid);
existing_key = (kiter != swift_keys->end());
@@ -845,7 +836,7 @@ int RGWAccessKeyPool::generate_key(RGWUserAdminOpState& op_state, std::string *e
} while (!rgw_get_user_info_by_access_key(store, id, duplicate_check));
}
- if (key_type == KEY_TYPE_SWIFT && gen_access) {
+ if (key_type == KEY_TYPE_SWIFT) {
id = op_state.build_default_swift_kid();
if (id.empty()) {
set_err_msg(err_msg, "empty swift access key");
diff --git a/src/test/Makefile.am b/src/test/Makefile.am
index f527b96..69f9e84 100644
--- a/src/test/Makefile.am
+++ b/src/test/Makefile.am
@@ -368,6 +368,9 @@ unittest_pglog_SOURCES = test/osd/TestPGLog.cc
unittest_pglog_CXXFLAGS = $(UNITTEST_CXXFLAGS)
unittest_pglog_LDADD = $(LIBOSD) $(UNITTEST_LDADD) $(CEPH_GLOBAL)
check_PROGRAMS += unittest_pglog
+if LINUX
+unittest_pglog_LDADD += -ldl
+endif # LINUX
unittest_ecbackend_SOURCES = test/osd/TestECBackend.cc
unittest_ecbackend_CXXFLAGS = $(UNITTEST_CXXFLAGS)
@@ -379,9 +382,10 @@ unittest_hitset_CXXFLAGS = $(UNITTEST_CXXFLAGS)
unittest_hitset_LDADD = $(LIBOSD) $(UNITTEST_LDADD) $(CEPH_GLOBAL)
check_PROGRAMS += unittest_hitset
-if LINUX
-unittest_pglog_LDADD += -ldl
-endif # LINUX
+unittest_io_priority_SOURCES = test/common/test_io_priority.cc
+unittest_io_priority_CXXFLAGS = $(UNITTEST_CXXFLAGS)
+unittest_io_priority_LDADD = $(UNITTEST_LDADD) $(CEPH_GLOBAL)
+check_PROGRAMS += unittest_io_priority
unittest_gather_SOURCES = test/gather.cc
unittest_gather_LDADD = $(UNITTEST_LDADD) $(CEPH_GLOBAL)
diff --git a/src/test/cli/crushtool/add-item-in-tree.t b/src/test/cli/crushtool/add-item-in-tree.t
new file mode 100644
index 0000000..8790977
--- /dev/null
+++ b/src/test/cli/crushtool/add-item-in-tree.t
@@ -0,0 +1,10 @@
+ $ crushtool -i "$TESTDIR/tree.template" --add-item 0 1.0 device0 --loc host host0 --loc cluster cluster0 -o one > /dev/null
+ $ crushtool -i one --add-item 1 1.0 device1 --loc host host0 --loc cluster cluster0 -o two > /dev/null
+ $ crushtool -i two --add-item 2 1.0 device2 --loc host host0 --loc cluster cluster0 -o tree > /dev/null
+ $ crushtool -i tree --add-item 3 1.0 device3 --loc host host0 --loc cluster cluster0 -o four > /dev/null
+ $ crushtool -i four --add-item 4 1.0 device4 --loc host host0 --loc cluster cluster0 -o five > /dev/null
+ $ crushtool -i five --add-item 5 1.0 device5 --loc host host0 --loc cluster cluster0 -o six > /dev/null
+ $ crushtool -i six --add-item 6 1.0 device6 --loc host host0 --loc cluster cluster0 -o seven > /dev/null
+ $ crushtool -i seven --add-item 7 1.0 device7 --loc host host0 --loc cluster cluster0 -o eight > /dev/null
+ $ crushtool -d eight -o final
+ $ cmp final "$TESTDIR/tree.template.final"
diff --git a/src/test/cli/crushtool/adjust-item-weight.t b/src/test/cli/crushtool/adjust-item-weight.t
new file mode 100644
index 0000000..16d7135
--- /dev/null
+++ b/src/test/cli/crushtool/adjust-item-weight.t
@@ -0,0 +1,17 @@
+ $ crushtool -i "$TESTDIR/simple.template" --add-item 0 1.0 device0 --loc host host0 --loc cluster cluster0 -o one > /dev/null
+
+#
+# add device0 into host=fake, the weight of device0 in host=host0 is 1.0, the weight of device0 in host=fake is 2.0
+#
+
+ $ crushtool -i one --add-item 0 2.0 device0 --loc host fake --loc cluster cluster0 -o two > /dev/null
+ $ crushtool -d two -o final
+ $ cmp final "$TESTDIR/simple.template.adj.two"
+
+#
+# update the weight of device0 in host=host0, it will not affect the weight of device0 in host=fake
+#
+
+ $ crushtool -i two --update-item 0 3.0 device0 --loc host host0 --loc cluster cluster0 -o three > /dev/null
+ $ crushtool -d three -o final
+ $ cmp final "$TESTDIR/simple.template.adj.three"
diff --git a/src/test/cli/crushtool/build.t b/src/test/cli/crushtool/build.t
index ca0804d..f016737 100644
--- a/src/test/cli/crushtool/build.t
+++ b/src/test/cli/crushtool/build.t
@@ -52,7 +52,7 @@
#
# crush rulesets are generated using the OSDMap helpers
#
- $ CEPH_ARGS="--debug-crush 0" crushtool --outfn "$map" --build --num_osds 1 root straw 0
+ $ CEPH_ARGS="--debug-crush 0" crushtool --outfn "$map" --set-straw-calc-version 0 --build --num_osds 1 root straw 0
$ crushtool -o "$map.txt" -d "$map"
$ cat "$map.txt"
# begin crush map
diff --git a/src/test/cli/crushtool/help.t b/src/test/cli/crushtool/help.t
index 3b48930..a9c6222 100644
--- a/src/test/cli/crushtool/help.t
+++ b/src/test/cli/crushtool/help.t
@@ -33,6 +33,7 @@
--show utilization-all
include zero weight items
--show-statistics show chi squared statistics
+ --show-mappings show mappings
--show-bad-mappings show bad mappings
--show-choose-tries show choose tries histogram
--set-choose-local-tries N
diff --git a/src/test/cli/crushtool/set-choose.t b/src/test/cli/crushtool/set-choose.t
index e160ad7..b40494d 100644
--- a/src/test/cli/crushtool/set-choose.t
+++ b/src/test/cli/crushtool/set-choose.t
@@ -1,5 +1,6 @@
$ crushtool -c "$TESTDIR/set-choose.crushmap.txt" -o set-choose.crushmap
- $ crushtool -i set-choose.crushmap --test --show-statistics
+ $ crushtool -i set-choose.crushmap --test --show-mappings --show-statistics --set-straw-calc-version 0
+ crushtool successfully built or modified map. Use '-o <file>' to write it out.
rule 0 (choose), x = 0..1023, numrep = 2..3
CRUSH rule 0 x 0 [0,3]
CRUSH rule 0 x 1 [0,8]
@@ -12306,7 +12307,8 @@
CRUSH rule 5 x 1022 [1,6,4]
CRUSH rule 5 x 1023 [3,2,8]
rule 5 (chooseleaf-set) num_rep 3 result size == 3:\t1024/1024 (esc)
- $ crushtool -i set-choose.crushmap --test --show-statistics --weight 0 0 --weight 1 0 --weight 3 0 --weight 4 0
+ $ crushtool -i set-choose.crushmap --test --show-mappings --show-statistics --weight 0 0 --weight 1 0 --weight 3 0 --weight 4 0 --set-straw-calc-version 0
+ crushtool successfully built or modified map. Use '-o <file>' to write it out.
rule 0 (choose), x = 0..1023, numrep = 2..3
CRUSH rule 0 x 0 [2,5]
CRUSH rule 0 x 1 [2,8]
@@ -24618,7 +24620,8 @@
CRUSH rule 5 x 1022 [2,6,5]
CRUSH rule 5 x 1023 [5,2,8]
rule 5 (chooseleaf-set) num_rep 3 result size == 3:\t1024/1024 (esc)
- $ crushtool -i set-choose.crushmap --test --show-statistics --weight 0 0 --weight 3 0 --weight 4 .5 --weight 5 0 --weight 6 .1 --weight 7 0
+ $ crushtool -i set-choose.crushmap --test --show-mappings --show-statistics --weight 0 0 --weight 3 0 --weight 4 .5 --weight 5 0 --weight 6 .1 --weight 7 0 --set-straw-calc-version 0
+ crushtool successfully built or modified map. Use '-o <file>' to write it out.
rule 0 (choose), x = 0..1023, numrep = 2..3
CRUSH rule 0 x 0 [2,4]
CRUSH rule 0 x 1 [2,8]
diff --git a/src/test/cli/crushtool/simple.template.adj.one b/src/test/cli/crushtool/simple.template.adj.one
new file mode 100644
index 0000000..aa16bbd
--- /dev/null
+++ b/src/test/cli/crushtool/simple.template.adj.one
@@ -0,0 +1,56 @@
+# begin crush map
+
+# devices
+device 0 device0
+
+# types
+type 0 device
+type 1 host
+type 2 cluster
+
+# buckets
+host host0 {
+ id -2 # do not change unnecessarily
+ # weight 1.000
+ alg straw
+ hash 0 # rjenkins1
+ item device0 weight 1.000
+}
+cluster cluster0 {
+ id -1 # do not change unnecessarily
+ # weight 1.000
+ alg straw
+ hash 0 # rjenkins1
+ item host0 weight 1.000
+}
+
+# rules
+rule data {
+ ruleset 0
+ type replicated
+ min_size 1
+ max_size 10
+ step take cluster0
+ step chooseleaf firstn 0 type host
+ step emit
+}
+rule metadata {
+ ruleset 1
+ type replicated
+ min_size 1
+ max_size 10
+ step take cluster0
+ step chooseleaf firstn 0 type host
+ step emit
+}
+rule rbd {
+ ruleset 2
+ type replicated
+ min_size 1
+ max_size 10
+ step take cluster0
+ step chooseleaf firstn 0 type host
+ step emit
+}
+
+# end crush map
diff --git a/src/test/cli/crushtool/simple.template.adj.three b/src/test/cli/crushtool/simple.template.adj.three
new file mode 100644
index 0000000..fca0fe1
--- /dev/null
+++ b/src/test/cli/crushtool/simple.template.adj.three
@@ -0,0 +1,64 @@
+# begin crush map
+
+# devices
+device 0 device0
+
+# types
+type 0 device
+type 1 host
+type 2 cluster
+
+# buckets
+host host0 {
+ id -2 # do not change unnecessarily
+ # weight 3.000
+ alg straw
+ hash 0 # rjenkins1
+ item device0 weight 3.000
+}
+host fake {
+ id -3 # do not change unnecessarily
+ # weight 2.000
+ alg straw
+ hash 0 # rjenkins1
+ item device0 weight 2.000
+}
+cluster cluster0 {
+ id -1 # do not change unnecessarily
+ # weight 5.000
+ alg straw
+ hash 0 # rjenkins1
+ item host0 weight 3.000
+ item fake weight 2.000
+}
+
+# rules
+rule data {
+ ruleset 0
+ type replicated
+ min_size 1
+ max_size 10
+ step take cluster0
+ step chooseleaf firstn 0 type host
+ step emit
+}
+rule metadata {
+ ruleset 1
+ type replicated
+ min_size 1
+ max_size 10
+ step take cluster0
+ step chooseleaf firstn 0 type host
+ step emit
+}
+rule rbd {
+ ruleset 2
+ type replicated
+ min_size 1
+ max_size 10
+ step take cluster0
+ step chooseleaf firstn 0 type host
+ step emit
+}
+
+# end crush map
diff --git a/src/test/cli/crushtool/simple.template.adj.two b/src/test/cli/crushtool/simple.template.adj.two
new file mode 100644
index 0000000..21c29a3
--- /dev/null
+++ b/src/test/cli/crushtool/simple.template.adj.two
@@ -0,0 +1,64 @@
+# begin crush map
+
+# devices
+device 0 device0
+
+# types
+type 0 device
+type 1 host
+type 2 cluster
+
+# buckets
+host host0 {
+ id -2 # do not change unnecessarily
+ # weight 1.000
+ alg straw
+ hash 0 # rjenkins1
+ item device0 weight 1.000
+}
+host fake {
+ id -3 # do not change unnecessarily
+ # weight 2.000
+ alg straw
+ hash 0 # rjenkins1
+ item device0 weight 2.000
+}
+cluster cluster0 {
+ id -1 # do not change unnecessarily
+ # weight 3.000
+ alg straw
+ hash 0 # rjenkins1
+ item host0 weight 1.000
+ item fake weight 2.000
+}
+
+# rules
+rule data {
+ ruleset 0
+ type replicated
+ min_size 1
+ max_size 10
+ step take cluster0
+ step chooseleaf firstn 0 type host
+ step emit
+}
+rule metadata {
+ ruleset 1
+ type replicated
+ min_size 1
+ max_size 10
+ step take cluster0
+ step chooseleaf firstn 0 type host
+ step emit
+}
+rule rbd {
+ ruleset 2
+ type replicated
+ min_size 1
+ max_size 10
+ step take cluster0
+ step chooseleaf firstn 0 type host
+ step emit
+}
+
+# end crush map
diff --git a/src/test/cli/crushtool/test-map-bobtail-tunables.t b/src/test/cli/crushtool/test-map-bobtail-tunables.t
index 7c38260..77f2ce8 100644
--- a/src/test/cli/crushtool/test-map-bobtail-tunables.t
+++ b/src/test/cli/crushtool/test-map-bobtail-tunables.t
@@ -1,4 +1,4 @@
- $ crushtool -i "$TESTDIR/test-map-a.crushmap" --test --show-statistics --rule 0 --set-choose-local-tries 0 --set-choose-local-fallback-tries 0 --set-choose-total-tries 50 --set-chooseleaf-descend-once 1
+ $ crushtool -i "$TESTDIR/test-map-a.crushmap" --test --show-mappings --show-statistics --rule 0 --set-choose-local-tries 0 --set-choose-local-fallback-tries 0 --set-choose-total-tries 50 --set-chooseleaf-descend-once 1
crushtool successfully built or modified map. Use '-o <file>' to write it out.
rule 0 (data), x = 0..1023, numrep = 1..10
CRUSH rule 0 x 0 [36]
diff --git a/src/test/cli/crushtool/test-map-firefly-tunables.t b/src/test/cli/crushtool/test-map-firefly-tunables.t
index 481b6fd..a75e89f 100644
--- a/src/test/cli/crushtool/test-map-firefly-tunables.t
+++ b/src/test/cli/crushtool/test-map-firefly-tunables.t
@@ -1,4 +1,4 @@
- $ crushtool -i "$TESTDIR/test-map-vary-r.crushmap" --test --show-statistics --rule 0 --set-choose-local-tries 0 --set-choose-local-fallback-tries 0 --set-choose-total-tries 50 --set-chooseleaf-descend-once 1 --set-chooseleaf-vary-r 1 --weight 12 0 --weight 20 0 --weight 30 0
+ $ crushtool -i "$TESTDIR/test-map-vary-r.crushmap" --test --show-mappings --show-statistics --rule 0 --set-choose-local-tries 0 --set-choose-local-fallback-tries 0 --set-choose-total-tries 50 --set-chooseleaf-descend-once 1 --set-chooseleaf-vary-r 1 --weight 12 0 --weight 20 0 --weight 30 0
crushtool successfully built or modified map. Use '-o <file>' to write it out.
rule 0 (data), x = 0..1023, numrep = 1..10
CRUSH rule 0 x 0 [101]
diff --git a/src/test/cli/crushtool/test-map-firstn-indep.t b/src/test/cli/crushtool/test-map-firstn-indep.t
new file mode 100644
index 0000000..1b8c736
--- /dev/null
+++ b/src/test/cli/crushtool/test-map-firstn-indep.t
@@ -0,0 +1,14 @@
+ $ crushtool -c "$TESTDIR/test-map-firstn-indep.txt" -o "$TESTDIR/test-map-firstn-indep.crushmap"
+ $ crushtool -i "$TESTDIR/test-map-firstn-indep.crushmap" --test --rule 0 --x 1 --show-bad-mappings
+ bad mapping rule 0 x 1 num_rep 9 result [93,80,88,87,56,50,53,72]
+ bad mapping rule 0 x 1 num_rep 10 result [93,80,88,87,56,50,53,72]
+ $ crushtool -i "$TESTDIR/test-map-firstn-indep.crushmap" --test --rule 1 --x 1 --show-bad-mappings
+ bad mapping rule 1 x 1 num_rep 3 result [93,56]
+ bad mapping rule 1 x 1 num_rep 4 result [93,56]
+ bad mapping rule 1 x 1 num_rep 5 result [93,56]
+ bad mapping rule 1 x 1 num_rep 6 result [93,56]
+ bad mapping rule 1 x 1 num_rep 7 result [93,56]
+ bad mapping rule 1 x 1 num_rep 8 result [93,56]
+ bad mapping rule 1 x 1 num_rep 9 result [93,56]
+ bad mapping rule 1 x 1 num_rep 10 result [93,56]
+ $ rm -f "$TESTDIR/test-map-firstn-indep.crushmap"
diff --git a/src/test/cli/crushtool/test-map-firstn-indep.txt b/src/test/cli/crushtool/test-map-firstn-indep.txt
new file mode 100644
index 0000000..4534eab
--- /dev/null
+++ b/src/test/cli/crushtool/test-map-firstn-indep.txt
@@ -0,0 +1,443 @@
+# begin crush map
+tunable choose_local_tries 0
+tunable choose_local_fallback_tries 0
+tunable choose_total_tries 50
+tunable chooseleaf_descend_once 1
+
+# devices
+device 0 device0
+device 1 device1
+device 2 device2
+device 3 device3
+device 4 device4
+device 5 device5
+device 6 device6
+device 7 device7
+device 8 device8
+device 9 device9
+device 10 device10
+device 11 device11
+device 12 device12
+device 13 device13
+device 14 device14
+device 15 device15
+device 16 device16
+device 17 device17
+device 18 device18
+device 19 device19
+device 20 device20
+device 21 device21
+device 22 device22
+device 23 device23
+device 24 device24
+device 25 device25
+device 26 device26
+device 27 device27
+device 28 device28
+device 29 device29
+device 30 device30
+device 31 device31
+device 32 device32
+device 33 device33
+device 34 device34
+device 35 device35
+device 36 device36
+device 37 device37
+device 38 device38
+device 39 device39
+device 40 device40
+device 41 device41
+device 42 device42
+device 43 device43
+device 44 device44
+device 45 device45
+device 46 device46
+device 47 device47
+device 48 device48
+device 49 device49
+device 50 device50
+device 51 device51
+device 52 device52
+device 53 device53
+device 54 device54
+device 55 device55
+device 56 device56
+device 57 device57
+device 58 device58
+device 59 device59
+device 60 device60
+device 61 device61
+device 62 device62
+device 63 device63
+device 64 device64
+device 65 device65
+device 66 device66
+device 67 device67
+device 68 device68
+device 69 device69
+device 70 device70
+device 71 device71
+device 72 device72
+device 73 device73
+device 74 device74
+device 75 device75
+device 76 device76
+device 77 device77
+device 78 device78
+device 79 device79
+device 80 device80
+device 81 device81
+device 82 device82
+device 83 device83
+device 84 device84
+device 85 device85
+device 86 device86
+device 87 device87
+device 88 device88
+device 89 device89
+device 90 device90
+device 91 device91
+device 92 device92
+device 93 device93
+device 94 device94
+device 95 device95
+device 96 device96
+device 97 device97
+device 98 device98
+device 99 device99
+
+# types
+type 0 device
+type 1 host
+type 2 rack
+type 3 default
+
+# buckets
+host host0 {
+ id -1 # do not change unnecessarily
+ # weight 4.000
+ alg straw
+ hash 0 # rjenkins1
+ item device0 weight 1.000
+ item device1 weight 1.000
+ item device2 weight 1.000
+ item device3 weight 1.000
+}
+host host1 {
+ id -2 # do not change unnecessarily
+ # weight 4.000
+ alg straw
+ hash 0 # rjenkins1
+ item device4 weight 1.000
+ item device5 weight 1.000
+ item device6 weight 1.000
+ item device7 weight 1.000
+}
+host host2 {
+ id -3 # do not change unnecessarily
+ # weight 4.000
+ alg straw
+ hash 0 # rjenkins1
+ item device8 weight 1.000
+ item device9 weight 1.000
+ item device10 weight 1.000
+ item device11 weight 1.000
+}
+host host3 {
+ id -4 # do not change unnecessarily
+ # weight 4.000
+ alg straw
+ hash 0 # rjenkins1
+ item device12 weight 1.000
+ item device13 weight 1.000
+ item device14 weight 1.000
+ item device15 weight 1.000
+}
+host host4 {
+ id -5 # do not change unnecessarily
+ # weight 4.000
+ alg straw
+ hash 0 # rjenkins1
+ item device16 weight 1.000
+ item device17 weight 1.000
+ item device18 weight 1.000
+ item device19 weight 1.000
+}
+host host5 {
+ id -6 # do not change unnecessarily
+ # weight 4.000
+ alg straw
+ hash 0 # rjenkins1
+ item device20 weight 1.000
+ item device21 weight 1.000
+ item device22 weight 1.000
+ item device23 weight 1.000
+}
+host host6 {
+ id -7 # do not change unnecessarily
+ # weight 4.000
+ alg straw
+ hash 0 # rjenkins1
+ item device24 weight 1.000
+ item device25 weight 1.000
+ item device26 weight 1.000
+ item device27 weight 1.000
+}
+host host7 {
+ id -8 # do not change unnecessarily
+ # weight 4.000
+ alg straw
+ hash 0 # rjenkins1
+ item device28 weight 1.000
+ item device29 weight 1.000
+ item device30 weight 1.000
+ item device31 weight 1.000
+}
+host host8 {
+ id -9 # do not change unnecessarily
+ # weight 4.000
+ alg straw
+ hash 0 # rjenkins1
+ item device32 weight 1.000
+ item device33 weight 1.000
+ item device34 weight 1.000
+ item device35 weight 1.000
+}
+host host9 {
+ id -10 # do not change unnecessarily
+ # weight 4.000
+ alg straw
+ hash 0 # rjenkins1
+ item device36 weight 1.000
+ item device37 weight 1.000
+ item device38 weight 1.000
+ item device39 weight 1.000
+}
+host host10 {
+ id -11 # do not change unnecessarily
+ # weight 4.000
+ alg straw
+ hash 0 # rjenkins1
+ item device40 weight 1.000
+ item device41 weight 1.000
+ item device42 weight 1.000
+ item device43 weight 1.000
+}
+host host11 {
+ id -12 # do not change unnecessarily
+ # weight 4.000
+ alg straw
+ hash 0 # rjenkins1
+ item device44 weight 1.000
+ item device45 weight 1.000
+ item device46 weight 1.000
+ item device47 weight 1.000
+}
+host host12 {
+ id -13 # do not change unnecessarily
+ # weight 4.000
+ alg straw
+ hash 0 # rjenkins1
+ item device48 weight 1.000
+ item device49 weight 1.000
+ item device50 weight 1.000
+ item device51 weight 1.000
+}
+host host13 {
+ id -14 # do not change unnecessarily
+ # weight 4.000
+ alg straw
+ hash 0 # rjenkins1
+ item device52 weight 1.000
+ item device53 weight 1.000
+ item device54 weight 1.000
+ item device55 weight 1.000
+}
+host host14 {
+ id -15 # do not change unnecessarily
+ # weight 4.000
+ alg straw
+ hash 0 # rjenkins1
+ item device56 weight 1.000
+ item device57 weight 1.000
+ item device58 weight 1.000
+ item device59 weight 1.000
+}
+host host15 {
+ id -16 # do not change unnecessarily
+ # weight 4.000
+ alg straw
+ hash 0 # rjenkins1
+ item device60 weight 1.000
+ item device61 weight 1.000
+ item device62 weight 1.000
+ item device63 weight 1.000
+}
+host host16 {
+ id -17 # do not change unnecessarily
+ # weight 4.000
+ alg straw
+ hash 0 # rjenkins1
+ item device64 weight 1.000
+ item device65 weight 1.000
+ item device66 weight 1.000
+ item device67 weight 1.000
+}
+host host17 {
+ id -18 # do not change unnecessarily
+ # weight 4.000
+ alg straw
+ hash 0 # rjenkins1
+ item device68 weight 1.000
+ item device69 weight 1.000
+ item device70 weight 1.000
+ item device71 weight 1.000
+}
+host host18 {
+ id -19 # do not change unnecessarily
+ # weight 4.000
+ alg straw
+ hash 0 # rjenkins1
+ item device72 weight 1.000
+ item device73 weight 1.000
+ item device74 weight 1.000
+ item device75 weight 1.000
+}
+host host19 {
+ id -20 # do not change unnecessarily
+ # weight 4.000
+ alg straw
+ hash 0 # rjenkins1
+ item device76 weight 1.000
+ item device77 weight 1.000
+ item device78 weight 1.000
+ item device79 weight 1.000
+}
+host host20 {
+ id -21 # do not change unnecessarily
+ # weight 4.000
+ alg straw
+ hash 0 # rjenkins1
+ item device80 weight 1.000
+ item device81 weight 1.000
+ item device82 weight 1.000
+ item device83 weight 1.000
+}
+host host21 {
+ id -22 # do not change unnecessarily
+ # weight 4.000
+ alg straw
+ hash 0 # rjenkins1
+ item device84 weight 1.000
+ item device85 weight 1.000
+ item device86 weight 1.000
+ item device87 weight 1.000
+}
+host host22 {
+ id -23 # do not change unnecessarily
+ # weight 4.000
+ alg straw
+ hash 0 # rjenkins1
+ item device88 weight 1.000
+ item device89 weight 1.000
+ item device90 weight 1.000
+ item device91 weight 1.000
+}
+host host23 {
+ id -24 # do not change unnecessarily
+ # weight 4.000
+ alg straw
+ hash 0 # rjenkins1
+ item device92 weight 1.000
+ item device93 weight 1.000
+ item device94 weight 1.000
+ item device95 weight 1.000
+}
+host host24 {
+ id -25 # do not change unnecessarily
+ # weight 4.000
+ alg straw
+ hash 0 # rjenkins1
+ item device96 weight 1.000
+ item device97 weight 1.000
+ item device98 weight 1.000
+ item device99 weight 1.000
+}
+rack rack0 {
+ id -26 # do not change unnecessarily
+ # weight 40.000
+ alg straw
+ hash 0 # rjenkins1
+ item host0 weight 4.000
+ item host1 weight 4.000
+ item host2 weight 4.000
+ item host3 weight 4.000
+ item host4 weight 4.000
+ item host5 weight 4.000
+ item host6 weight 4.000
+ item host7 weight 4.000
+ item host8 weight 4.000
+ item host9 weight 4.000
+}
+rack rack1 {
+ id -27 # do not change unnecessarily
+ # weight 40.000
+ alg straw
+ hash 0 # rjenkins1
+ item host10 weight 4.000
+ item host11 weight 4.000
+ item host12 weight 4.000
+ item host13 weight 4.000
+ item host14 weight 4.000
+ item host15 weight 4.000
+ item host16 weight 4.000
+ item host17 weight 4.000
+ item host18 weight 4.000
+ item host19 weight 4.000
+}
+rack rack2 {
+ id -28 # do not change unnecessarily
+ # weight 20.000
+ alg straw
+ hash 0 # rjenkins1
+ item host20 weight 4.000
+ item host21 weight 4.000
+ item host22 weight 4.000
+ item host23 weight 4.000
+ item host24 weight 4.000
+}
+
+default root {
+ id -31 # do not change unnecessarily
+ # weight 100.000
+ alg straw
+ hash 0 # rjenkins1
+ item rack1 weight 40.000
+ item rack1 weight 40.000
+ item rack2 weight 20.000
+}
+
+# rules
+rule myrule {
+ ruleset 0
+ type replicated
+ min_size 1
+ max_size 10
+ step take root
+ step choose firstn 2 type rack
+ step chooseleaf indep 4 type host
+ step emit
+}
+
+rule myrule1 {
+ ruleset 1
+ type replicated
+ min_size 1
+ max_size 10
+ step take root
+ step choose firstn 2 type rack
+ step chooseleaf indep 1 type host
+ step emit
+}
+
+# end crush map
diff --git a/src/test/cli/crushtool/test-map-indep.t b/src/test/cli/crushtool/test-map-indep.t
index 5f6dbb3..f4ee371 100644
--- a/src/test/cli/crushtool/test-map-indep.t
+++ b/src/test/cli/crushtool/test-map-indep.t
@@ -1,4 +1,4 @@
- $ crushtool -i "$TESTDIR/test-map-indep.crushmap" --test --show-statistics --rule 1 --set-choose-local-tries 0 --set-choose-local-fallback-tries 0 --set-choose-total-tries 50 --set-chooseleaf-descend-once 2
+ $ crushtool -i "$TESTDIR/test-map-indep.crushmap" --test --show-mappings --show-statistics --rule 1 --set-choose-local-tries 0 --set-choose-local-fallback-tries 0 --set-choose-total-tries 50 --set-chooseleaf-descend-once 2
crushtool successfully built or modified map. Use '-o <file>' to write it out.
rule 1 (metadata), x = 0..1023, numrep = 1..10
CRUSH rule 1 x 0 [36]
diff --git a/src/test/cli/crushtool/test-map-legacy-tunables.t b/src/test/cli/crushtool/test-map-legacy-tunables.t
index 12bf604..fe28c70 100644
--- a/src/test/cli/crushtool/test-map-legacy-tunables.t
+++ b/src/test/cli/crushtool/test-map-legacy-tunables.t
@@ -1,4 +1,4 @@
- $ crushtool -i "$TESTDIR/test-map-a.crushmap" --test --show-statistics --rule 0
+ $ crushtool -i "$TESTDIR/test-map-a.crushmap" --test --show-mappings --show-statistics --rule 0
rule 0 (data), x = 0..1023, numrep = 1..10
CRUSH rule 0 x 0 [36]
CRUSH rule 0 x 1 [876]
diff --git a/src/test/cli/crushtool/test-map-tries-vs-retries.t b/src/test/cli/crushtool/test-map-tries-vs-retries.t
index 8eac255..2a49838 100644
--- a/src/test/cli/crushtool/test-map-tries-vs-retries.t
+++ b/src/test/cli/crushtool/test-map-tries-vs-retries.t
@@ -1,4 +1,4 @@
- $ crushtool -i "$TESTDIR/test-map-tries-vs-retries.crushmap" --test --show-statistics --weight 0 0 --weight 8 0
+ $ crushtool -i "$TESTDIR/test-map-tries-vs-retries.crushmap" --test --show-mappings --show-statistics --weight 0 0 --weight 8 0
rule 0 (replicated_ruleset), x = 0..1023, numrep = 1..10
CRUSH rule 0 x 0 [7]
CRUSH rule 0 x 1 [10]
diff --git a/src/test/cli/crushtool/test-map-vary-r-0.t b/src/test/cli/crushtool/test-map-vary-r-0.t
index 663ef65..eefd862 100644
--- a/src/test/cli/crushtool/test-map-vary-r-0.t
+++ b/src/test/cli/crushtool/test-map-vary-r-0.t
@@ -1,4 +1,4 @@
- $ crushtool -i "$TESTDIR/test-map-vary-r.crushmap" --test --show-statistics --rule 3 --set-chooseleaf-vary-r 0 --weight 0 0 --weight 4 0 --weight 9 0
+ $ crushtool -i "$TESTDIR/test-map-vary-r.crushmap" --test --show-mappings --show-statistics --rule 3 --set-chooseleaf-vary-r 0 --weight 0 0 --weight 4 0 --weight 9 0
crushtool successfully built or modified map. Use '-o <file>' to write it out.
rule 3 (delltestrule), x = 0..1023, numrep = 2..4
CRUSH rule 3 x 0 [94,85]
diff --git a/src/test/cli/crushtool/test-map-vary-r-1.t b/src/test/cli/crushtool/test-map-vary-r-1.t
index 4ac4c22..a21b9d5 100644
--- a/src/test/cli/crushtool/test-map-vary-r-1.t
+++ b/src/test/cli/crushtool/test-map-vary-r-1.t
@@ -1,4 +1,4 @@
- $ crushtool -i "$TESTDIR/test-map-vary-r.crushmap" --test --show-statistics --rule 3 --set-chooseleaf-vary-r 1 --weight 0 0 --weight 4 0 --weight 9 0
+ $ crushtool -i "$TESTDIR/test-map-vary-r.crushmap" --test --show-mappings --show-statistics --rule 3 --set-chooseleaf-vary-r 1 --weight 0 0 --weight 4 0 --weight 9 0
crushtool successfully built or modified map. Use '-o <file>' to write it out.
rule 3 (delltestrule), x = 0..1023, numrep = 2..4
CRUSH rule 3 x 0 [94,6]
diff --git a/src/test/cli/crushtool/test-map-vary-r-2.t b/src/test/cli/crushtool/test-map-vary-r-2.t
index c9e78c6..eaf0542 100644
--- a/src/test/cli/crushtool/test-map-vary-r-2.t
+++ b/src/test/cli/crushtool/test-map-vary-r-2.t
@@ -1,4 +1,4 @@
- $ crushtool -i "$TESTDIR/test-map-vary-r.crushmap" --test --show-statistics --rule 3 --set-chooseleaf-vary-r 2 --weight 0 0 --weight 4 0 --weight 9 0
+ $ crushtool -i "$TESTDIR/test-map-vary-r.crushmap" --test --show-mappings --show-statistics --rule 3 --set-chooseleaf-vary-r 2 --weight 0 0 --weight 4 0 --weight 9 0
crushtool successfully built or modified map. Use '-o <file>' to write it out.
rule 3 (delltestrule), x = 0..1023, numrep = 2..4
CRUSH rule 3 x 0 [94,45]
diff --git a/src/test/cli/crushtool/test-map-vary-r-3.t b/src/test/cli/crushtool/test-map-vary-r-3.t
index ad02e73..31943b2 100644
--- a/src/test/cli/crushtool/test-map-vary-r-3.t
+++ b/src/test/cli/crushtool/test-map-vary-r-3.t
@@ -1,4 +1,4 @@
- $ crushtool -i "$TESTDIR/test-map-vary-r.crushmap" --test --show-statistics --rule 3 --set-chooseleaf-vary-r 3 --weight 0 0 --weight 4 0 --weight 9 0
+ $ crushtool -i "$TESTDIR/test-map-vary-r.crushmap" --test --show-mappings --show-statistics --rule 3 --set-chooseleaf-vary-r 3 --weight 0 0 --weight 4 0 --weight 9 0
crushtool successfully built or modified map. Use '-o <file>' to write it out.
rule 3 (delltestrule), x = 0..1023, numrep = 2..4
CRUSH rule 3 x 0 [94,85]
diff --git a/src/test/cli/crushtool/test-map-vary-r-4.t b/src/test/cli/crushtool/test-map-vary-r-4.t
index 059da77..24cf0ba 100644
--- a/src/test/cli/crushtool/test-map-vary-r-4.t
+++ b/src/test/cli/crushtool/test-map-vary-r-4.t
@@ -1,4 +1,4 @@
- $ crushtool -i "$TESTDIR/test-map-vary-r.crushmap" --test --show-statistics --rule 3 --set-chooseleaf-vary-r 4 --weight 0 0 --weight 4 0 --weight 9 0
+ $ crushtool -i "$TESTDIR/test-map-vary-r.crushmap" --test --show-mappings --show-statistics --rule 3 --set-chooseleaf-vary-r 4 --weight 0 0 --weight 4 0 --weight 9 0
crushtool successfully built or modified map. Use '-o <file>' to write it out.
rule 3 (delltestrule), x = 0..1023, numrep = 2..4
CRUSH rule 3 x 0 [94,85]
diff --git a/src/test/cli/crushtool/tree.template b/src/test/cli/crushtool/tree.template
new file mode 100644
index 0000000..9808578
Binary files /dev/null and b/src/test/cli/crushtool/tree.template differ
diff --git a/src/test/cli/crushtool/tree.template.final b/src/test/cli/crushtool/tree.template.final
new file mode 100644
index 0000000..6af0701
--- /dev/null
+++ b/src/test/cli/crushtool/tree.template.final
@@ -0,0 +1,70 @@
+# begin crush map
+
+# devices
+device 0 device0
+device 1 device1
+device 2 device2
+device 3 device3
+device 4 device4
+device 5 device5
+device 6 device6
+device 7 device7
+
+# types
+type 0 device
+type 1 host
+type 2 cluster
+
+# buckets
+host host0 {
+ id -2 # do not change unnecessarily
+ # weight 8.000
+ alg tree # do not change pos for existing items unnecessarily
+ hash 0 # rjenkins1
+ item device0 weight 1.000 pos 0
+ item device1 weight 1.000 pos 1
+ item device2 weight 1.000 pos 2
+ item device3 weight 1.000 pos 3
+ item device4 weight 1.000 pos 4
+ item device5 weight 1.000 pos 5
+ item device6 weight 1.000 pos 6
+ item device7 weight 1.000 pos 7
+}
+cluster cluster0 {
+ id -1 # do not change unnecessarily
+ # weight 8.000
+ alg tree # do not change pos for existing items unnecessarily
+ hash 0 # rjenkins1
+ item host0 weight 8.000 pos 0
+}
+
+# rules
+rule data {
+ ruleset 0
+ type replicated
+ min_size 1
+ max_size 10
+ step take cluster0
+ step chooseleaf firstn 0 type host
+ step emit
+}
+rule metadata {
+ ruleset 1
+ type replicated
+ min_size 1
+ max_size 10
+ step take cluster0
+ step chooseleaf firstn 0 type host
+ step emit
+}
+rule rbd {
+ ruleset 2
+ type replicated
+ min_size 1
+ max_size 10
+ step take cluster0
+ step chooseleaf firstn 0 type host
+ step emit
+}
+
+# end crush map
diff --git a/src/test/cli/osdmaptool/create-print.t b/src/test/cli/osdmaptool/create-print.t
index 9ebd274..b642cfb 100644
--- a/src/test/cli/osdmaptool/create-print.t
+++ b/src/test/cli/osdmaptool/create-print.t
@@ -11,6 +11,7 @@
tunable choose_local_fallback_tries 0
tunable choose_total_tries 50
tunable chooseleaf_descend_once 1
+ tunable straw_calc_version 1
# devices
device 0 osd.0
diff --git a/src/test/cli/osdmaptool/create-racks.t b/src/test/cli/osdmaptool/create-racks.t
index 33fa9ee..11e3223 100644
--- a/src/test/cli/osdmaptool/create-racks.t
+++ b/src/test/cli/osdmaptool/create-racks.t
@@ -10,6 +10,7 @@
tunable choose_local_fallback_tries 0
tunable choose_total_tries 50
tunable chooseleaf_descend_once 1
+ tunable straw_calc_version 1
# devices
device 0 device0
diff --git a/src/test/cli/osdmaptool/crush.t b/src/test/cli/osdmaptool/crush.t
index 5833da8..584da09 100644
--- a/src/test/cli/osdmaptool/crush.t
+++ b/src/test/cli/osdmaptool/crush.t
@@ -6,5 +6,5 @@
osdmaptool: exported crush map to oc
$ osdmaptool --import-crush oc myosdmap
osdmaptool: osdmap file 'myosdmap'
- osdmaptool: imported 486 byte crush map from oc
+ osdmaptool: imported 487 byte crush map from oc
osdmaptool: writing epoch 3 to myosdmap
diff --git a/src/test/cli/osdmaptool/help.t b/src/test/cli/osdmaptool/help.t
index 2c5a41d..02f56ef 100644
--- a/src/test/cli/osdmaptool/help.t
+++ b/src/test/cli/osdmaptool/help.t
@@ -4,6 +4,7 @@
--export-crush <file> write osdmap's crush map to <file>
--import-crush <file> replace osdmap's crush map with <file>
--test-map-pgs [--pool <poolid>] map all pgs
+ --test-map-pgs-dump [--pool <poolid>] map all pgs
--mark-up-in mark osds up and in (but do not persist)
--clear-temp clear pg_temp and primary_temp
--test-random do random placements
diff --git a/src/test/cli/osdmaptool/missing-argument.t b/src/test/cli/osdmaptool/missing-argument.t
index d0740ab..87ab3eb 100644
--- a/src/test/cli/osdmaptool/missing-argument.t
+++ b/src/test/cli/osdmaptool/missing-argument.t
@@ -4,6 +4,7 @@
--export-crush <file> write osdmap's crush map to <file>
--import-crush <file> replace osdmap's crush map with <file>
--test-map-pgs [--pool <poolid>] map all pgs
+ --test-map-pgs-dump [--pool <poolid>] map all pgs
--mark-up-in mark osds up and in (but do not persist)
--clear-temp clear pg_temp and primary_temp
--test-random do random placements
diff --git a/src/test/cli/osdmaptool/test-map-pgs.t b/src/test/cli/osdmaptool/test-map-pgs.t
index b64f2d9..222bd76 100644
--- a/src/test/cli/osdmaptool/test-map-pgs.t
+++ b/src/test/cli/osdmaptool/test-map-pgs.t
@@ -24,7 +24,7 @@
pool 1 pg_num 8000
pool 2 pg_num 8000
$ TOTAL=$((POOL_COUNT * $PG_NUM))
- $ PATTERN=$(echo "size $SIZE\t$TOTAL")
+ $ PATTERN=$(echo "size $SIZE.$TOTAL")
$ grep "$PATTERN" $OUT || cat "$OUT"
size 3\t24000 (esc)
$ STATS_CRUSH=$(grep '^ avg ' "$OUT")
@@ -39,7 +39,7 @@
pool 1 pg_num 8000
pool 2 pg_num 8000
$ TOTAL=$((POOL_COUNT * $PG_NUM))
- $ PATTERN=$(echo "size $SIZE\t$TOTAL")
+ $ PATTERN=$(echo "size $SIZE.$TOTAL")
$ grep "$PATTERN" $OUT || cat "$OUT"
size 3\t24000 (esc)
$ STATS_RANDOM=$(grep '^ avg ' "$OUT")
diff --git a/src/test/common/histogram.cc b/src/test/common/histogram.cc
index 2fd3cfe..765f4c9 100644
--- a/src/test/common/histogram.cc
+++ b/src/test/common/histogram.cc
@@ -47,61 +47,64 @@ TEST(Histogram, Set) {
}
TEST(Histogram, Position) {
- {
- pow2_hist_t h;
- uint64_t lb, ub;
- h.add(0);
- ASSERT_EQ(-1, h.get_position_micro(-20, &lb, &ub));
- }
- {
- pow2_hist_t h;
- h.add(0);
- uint64_t lb, ub;
- h.get_position_micro(0, &lb, &ub);
- ASSERT_EQ(0u, lb);
- ASSERT_EQ(1000000u, ub);
- h.add(0);
- h.add(0);
- h.add(0);
- h.get_position_micro(0, &lb, &ub);
- ASSERT_EQ(0u, lb);
- ASSERT_EQ(1000000u, ub);
- }
- {
- pow2_hist_t h;
- h.add(1);
- h.add(1);
- uint64_t lb, ub;
- h.get_position_micro(0, &lb, &ub);
- ASSERT_EQ(0u, lb);
- ASSERT_EQ(0u, ub);
- h.add(0);
- h.get_position_micro(0, &lb, &ub);
- ASSERT_EQ(0u, lb);
- ASSERT_EQ(333333u, ub);
- h.get_position_micro(1, &lb, &ub);
- ASSERT_EQ(333333u, lb);
- ASSERT_EQ(1000000u, ub);
- }
- {
- pow2_hist_t h;
- h.h.resize(10, 0);
- h.h[0] = 1;
- h.h[5] = 1;
- uint64_t lb, ub;
- h.get_position_micro(4, &lb, &ub);
- ASSERT_EQ(500000u, lb);
- ASSERT_EQ(500000u, ub);
- }
- {
- pow2_hist_t h;
- h.h.resize(10, 0);
- h.h[0] = UINT_MAX;
- h.h[5] = UINT_MAX;
- uint64_t lb, ub;
- ASSERT_EQ(500000u, lb);
- ASSERT_EQ(500000u, ub);
- }
+ pow2_hist_t h;
+ uint64_t lb, ub;
+ h.add(0);
+ ASSERT_EQ(-1, h.get_position_micro(-20, &lb, &ub));
+}
+
+TEST(Histogram, Position1) {
+ pow2_hist_t h;
+ h.add(0);
+ uint64_t lb, ub;
+ h.get_position_micro(0, &lb, &ub);
+ ASSERT_EQ(0u, lb);
+ ASSERT_EQ(1000000u, ub);
+ h.add(0);
+ h.add(0);
+ h.add(0);
+ h.get_position_micro(0, &lb, &ub);
+ ASSERT_EQ(0u, lb);
+ ASSERT_EQ(1000000u, ub);
+}
+
+TEST(Histogram, Position2) {
+ pow2_hist_t h;
+ h.add(1);
+ h.add(1);
+ uint64_t lb, ub;
+ h.get_position_micro(0, &lb, &ub);
+ ASSERT_EQ(0u, lb);
+ ASSERT_EQ(0u, ub);
+ h.add(0);
+ h.get_position_micro(0, &lb, &ub);
+ ASSERT_EQ(0u, lb);
+ ASSERT_EQ(333333u, ub);
+ h.get_position_micro(1, &lb, &ub);
+ ASSERT_EQ(333333u, lb);
+ ASSERT_EQ(1000000u, ub);
+}
+
+TEST(Histogram, Position3) {
+ pow2_hist_t h;
+ h.h.resize(10, 0);
+ h.h[0] = 1;
+ h.h[5] = 1;
+ uint64_t lb, ub;
+ h.get_position_micro(4, &lb, &ub);
+ ASSERT_EQ(500000u, lb);
+ ASSERT_EQ(500000u, ub);
+}
+
+TEST(Histogram, Position4) {
+ pow2_hist_t h;
+ h.h.resize(10, 0);
+ h.h[0] = UINT_MAX;
+ h.h[5] = UINT_MAX;
+ uint64_t lb, ub;
+ h.get_position_micro(4, &lb, &ub);
+ ASSERT_EQ(0u, lb);
+ ASSERT_EQ(0u, ub);
}
TEST(Histogram, Decay) {
diff --git a/src/test/common/test_io_priority.cc b/src/test/common/test_io_priority.cc
new file mode 100644
index 0000000..b2d4e26
--- /dev/null
+++ b/src/test/common/test_io_priority.cc
@@ -0,0 +1,51 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2014 Red Hat <contact at redhat.com>
+ *
+ * Author: Loic Dachary <loic at dachary.org>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <errno.h>
+#include <gtest/gtest.h>
+
+#include "common/io_priority.h"
+
+TEST(io_priority, ceph_ioprio_string_to_class) {
+ ASSERT_EQ(IOPRIO_CLASS_IDLE, ceph_ioprio_string_to_class("idle"));
+ ASSERT_EQ(IOPRIO_CLASS_IDLE, ceph_ioprio_string_to_class("IDLE"));
+
+ ASSERT_EQ(IOPRIO_CLASS_BE, ceph_ioprio_string_to_class("be"));
+ ASSERT_EQ(IOPRIO_CLASS_BE, ceph_ioprio_string_to_class("BE"));
+ ASSERT_EQ(IOPRIO_CLASS_BE, ceph_ioprio_string_to_class("besteffort"));
+ ASSERT_EQ(IOPRIO_CLASS_BE, ceph_ioprio_string_to_class("BESTEFFORT"));
+ ASSERT_EQ(IOPRIO_CLASS_BE, ceph_ioprio_string_to_class("best effort"));
+ ASSERT_EQ(IOPRIO_CLASS_BE, ceph_ioprio_string_to_class("BEST EFFORT"));
+
+ ASSERT_EQ(IOPRIO_CLASS_RT, ceph_ioprio_string_to_class("rt"));
+ ASSERT_EQ(IOPRIO_CLASS_RT, ceph_ioprio_string_to_class("RT"));
+ ASSERT_EQ(IOPRIO_CLASS_RT, ceph_ioprio_string_to_class("realtime"));
+ ASSERT_EQ(IOPRIO_CLASS_RT, ceph_ioprio_string_to_class("REALTIME"));
+ ASSERT_EQ(IOPRIO_CLASS_RT, ceph_ioprio_string_to_class("real time"));
+ ASSERT_EQ(IOPRIO_CLASS_RT, ceph_ioprio_string_to_class("REAL TIME"));
+
+ ASSERT_EQ(-EINVAL, ceph_ioprio_string_to_class("invalid"));
+}
+
+/*
+ * Local Variables:
+ * compile-command: "cd ../.. ;
+ * make -j4 unittest_io_priority &&
+ * libtool --mode=execute valgrind --tool=memcheck --leak-check=full \
+ * ./unittest_io_priority
+ * "
+ * End:
+ */
diff --git a/src/test/crush/TestCrushWrapper.cc b/src/test/crush/TestCrushWrapper.cc
index 34d6401..f5dcfa1 100644
--- a/src/test/crush/TestCrushWrapper.cc
+++ b/src/test/crush/TestCrushWrapper.cc
@@ -67,6 +67,166 @@ TEST(CrushWrapper, get_immediate_parent) {
delete c;
}
+TEST(CrushWrapper, straw_zero) {
+ // zero weight items should have no effect on placement.
+
+ CrushWrapper *c = new CrushWrapper;
+ const int ROOT_TYPE = 1;
+ c->set_type_name(ROOT_TYPE, "root");
+ const int OSD_TYPE = 0;
+ c->set_type_name(OSD_TYPE, "osd");
+
+ int n = 5;
+ int items[n], weights[n];
+ for (int i=0; i <n; ++i) {
+ items[i] = i;
+ weights[i] = 0x10000 * (n-i-1);
+ }
+
+ c->set_max_devices(n);
+
+ string root_name0("root0");
+ int root0;
+ EXPECT_EQ(0, c->add_bucket(0, CRUSH_BUCKET_STRAW, CRUSH_HASH_RJENKINS1,
+ ROOT_TYPE, n, items, weights, &root0));
+ EXPECT_EQ(0, c->set_item_name(root0, root_name0));
+
+ string name0("rule0");
+ int ruleset0 = c->add_simple_ruleset(name0, root_name0, "osd",
+ "firstn", pg_pool_t::TYPE_REPLICATED);
+ EXPECT_EQ(0, ruleset0);
+
+ string root_name1("root1");
+ int root1;
+ EXPECT_EQ(0, c->add_bucket(0, CRUSH_BUCKET_STRAW, CRUSH_HASH_RJENKINS1,
+ ROOT_TYPE, n-1, items, weights, &root1));
+ EXPECT_EQ(0, c->set_item_name(root1, root_name1));
+
+ string name1("rule1");
+ int ruleset1 = c->add_simple_ruleset(name1, root_name1, "osd",
+ "firstn", pg_pool_t::TYPE_REPLICATED);
+ EXPECT_EQ(1, ruleset1);
+
+ vector<unsigned> reweight(n, 0x10000);
+ for (int i=0; i<10000; ++i) {
+ vector<int> out0, out1;
+ c->do_rule(ruleset0, i, out0, 1, reweight);
+ ASSERT_EQ(1u, out0.size());
+ c->do_rule(ruleset1, i, out1, 1, reweight);
+ ASSERT_EQ(1u, out1.size());
+ ASSERT_EQ(out0[0], out1[0]);
+ //cout << i << "\t" << out0 << "\t" << out1 << std::endl;
+ }
+}
+
+TEST(CrushWrapper, straw_same) {
+ // items with the same weight should map about the same as items
+ // with very similar weights.
+ //
+ // give the 0 vector a paired stair pattern, with dup weights. note
+ // that the original straw flaw does not appear when there are 2 of
+ // the initial weight, but it does when there is just 1.
+ //
+ // give the 1 vector a similar stair pattern, but make the same
+ // steps weights slightly different (no dups). this works.
+ //
+ // compare the result and verify that the resulting mapping is
+ // almost identical.
+
+ CrushWrapper *c = new CrushWrapper;
+ const int ROOT_TYPE = 1;
+ c->set_type_name(ROOT_TYPE, "root");
+ const int OSD_TYPE = 0;
+ c->set_type_name(OSD_TYPE, "osd");
+
+ int n = 10;
+ int items[n], weights[n];
+ for (int i=0; i <n; ++i) {
+ items[i] = i;
+ weights[i] = 0x10000 * ((i+1)/2 + 1);
+ }
+
+ c->set_max_devices(n);
+
+ string root_name0("root0");
+ int root0;
+ EXPECT_EQ(0, c->add_bucket(0, CRUSH_BUCKET_STRAW, CRUSH_HASH_RJENKINS1,
+ ROOT_TYPE, n, items, weights, &root0));
+ EXPECT_EQ(0, c->set_item_name(root0, root_name0));
+
+ string name0("rule0");
+ int ruleset0 = c->add_simple_ruleset(name0, root_name0, "osd",
+ "firstn", pg_pool_t::TYPE_REPLICATED);
+ EXPECT_EQ(0, ruleset0);
+
+ for (int i=0; i <n; ++i) {
+ items[i] = i;
+ weights[i] = 0x10000 * ((i+1)/2 + 1) + (i%2)*100;
+ }
+
+ string root_name1("root1");
+ int root1;
+ EXPECT_EQ(0, c->add_bucket(0, CRUSH_BUCKET_STRAW, CRUSH_HASH_RJENKINS1,
+ ROOT_TYPE, n, items, weights, &root1));
+ EXPECT_EQ(0, c->set_item_name(root1, root_name1));
+
+ string name1("rule1");
+ int ruleset1 = c->add_simple_ruleset(name1, root_name1, "osd",
+ "firstn", pg_pool_t::TYPE_REPLICATED);
+ EXPECT_EQ(1, ruleset1);
+
+ if (0) {
+ crush_bucket_straw *sb0 = reinterpret_cast<crush_bucket_straw*>(c->get_crush_map()->buckets[-1-root0]);
+ crush_bucket_straw *sb1 = reinterpret_cast<crush_bucket_straw*>(c->get_crush_map()->buckets[-1-root1]);
+
+ for (int i=0; i<n; ++i) {
+ cout << i
+ << "\t" << sb0->item_weights[i]
+ << "\t" << sb1->item_weights[i]
+ << "\t"
+ << "\t" << sb0->straws[i]
+ << "\t" << sb1->straws[i]
+ << std::endl;
+ }
+ }
+
+ if (0) {
+ JSONFormatter jf(true);
+ jf.open_object_section("crush");
+ c->dump(&jf);
+ jf.close_section();
+ jf.flush(cout);
+ }
+
+ vector<int> sum0(n, 0), sum1(n, 0);
+ vector<unsigned> reweight(n, 0x10000);
+ int different = 0;
+ int max = 100000;
+ for (int i=0; i<max; ++i) {
+ vector<int> out0, out1;
+ c->do_rule(ruleset0, i, out0, 1, reweight);
+ ASSERT_EQ(1u, out0.size());
+ c->do_rule(ruleset1, i, out1, 1, reweight);
+ ASSERT_EQ(1u, out1.size());
+ sum0[out0[0]]++;
+ sum1[out1[0]]++;
+ if (out0[0] != out1[0])
+ different++;
+ }
+ for (int i=0; i<n; ++i) {
+ cout << i
+ << "\t" << ((double)weights[i] / (double)weights[0])
+ << "\t" << sum0[i] << "\t" << ((double)sum0[i]/(double)sum0[0])
+ << "\t" << sum1[i] << "\t" << ((double)sum1[i]/(double)sum1[0])
+ << std::endl;
+ }
+ double ratio = ((double)different / (double)max);
+ cout << different << " of " << max << " = "
+ << ratio
+ << " different" << std::endl;
+ ASSERT_LT(ratio, .001);
+}
+
TEST(CrushWrapper, move_bucket) {
CrushWrapper *c = new CrushWrapper;
@@ -290,6 +450,116 @@ TEST(CrushWrapper, update_item) {
delete c;
}
+TEST(CrushWrapper, adjust_item_weight) {
+ CrushWrapper *c = new CrushWrapper;
+
+ const int ROOT_TYPE = 2;
+ c->set_type_name(ROOT_TYPE, "root");
+ const int HOST_TYPE = 1;
+ c->set_type_name(HOST_TYPE, "host");
+ const int OSD_TYPE = 0;
+ c->set_type_name(OSD_TYPE, "osd");
+
+ int rootno;
+ c->add_bucket(0, CRUSH_BUCKET_STRAW, CRUSH_HASH_RJENKINS1,
+ ROOT_TYPE, 0, NULL, NULL, &rootno);
+ c->set_item_name(rootno, "default");
+
+ const string HOST0("host0");
+ int host0;
+ c->add_bucket(0, CRUSH_BUCKET_STRAW, CRUSH_HASH_RJENKINS1,
+ HOST_TYPE, 0, NULL, NULL, &host0);
+ c->set_item_name(host0, HOST0);
+
+ const string FAKE("fake");
+ int hostfake;
+ c->add_bucket(0, CRUSH_BUCKET_STRAW, CRUSH_HASH_RJENKINS1,
+ HOST_TYPE, 0, NULL, NULL, &hostfake);
+ c->set_item_name(hostfake, FAKE);
+
+ int item = 0;
+
+ // construct crush map
+
+ {
+ map<string,string> loc;
+ loc["host"] = "host0";
+ float host_weight = 2.0;
+ int bucket_id = 0;
+
+ item = 0;
+ EXPECT_EQ(0, c->insert_item(g_ceph_context, item, 1.0,
+ "osd." + stringify(item), loc));
+ item = 1;
+ EXPECT_EQ(0, c->insert_item(g_ceph_context, item, 1.0,
+ "osd." + stringify(item), loc));
+
+ bucket_id = c->get_item_id("host0");
+ EXPECT_EQ(true, c->bucket_exists(bucket_id));
+ EXPECT_EQ(host_weight, c->get_bucket_weightf(bucket_id));
+
+ }
+
+ {
+ map<string,string> loc;
+ loc["host"] = "fake";
+ float host_weight = 2.0;
+ int bucket_id = 0;
+
+ item = 0;
+ EXPECT_EQ(0, c->insert_item(g_ceph_context, item, 1.0,
+ "osd." + stringify(item), loc));
+ item = 1;
+ EXPECT_EQ(0, c->insert_item(g_ceph_context, item, 1.0,
+ "osd." + stringify(item), loc));
+
+ bucket_id = c->get_item_id("fake");
+ EXPECT_EQ(true, c->bucket_exists(bucket_id));
+ EXPECT_EQ(host_weight, c->get_bucket_weightf(bucket_id));
+ }
+
+ //
+ // When there is:
+ //
+ // default --> host0 --> osd.0 1.0
+ // | |
+ // | +-> osd.1 1.0
+ // |
+ // +-> fake --> osd.0 1.0
+ // |
+ // +-> osd.1 1.0
+ //
+ // Trying to adjust osd.0 weight to 2.0 in all buckets
+ // Trying to adjust osd.1 weight to 2.0 in host=fake
+ //
+ // So the crush map will be:
+ //
+ // default --> host0 --> osd.0 2.0
+ // | |
+ // | +-> osd.1 1.0
+ // |
+ // +-> fake --> osd.0 2.0
+ // |
+ // +-> osd.1 2.0
+ //
+
+ float original_weight = 1.0;
+ float modified_weight = 2.0;
+ map<string,string> loc_one, loc_two;
+ loc_one["host"] = "host0";
+ loc_two["host"] = "fake";
+
+ item = 0;
+ EXPECT_EQ(2, c->adjust_item_weightf(g_ceph_context, item, modified_weight));
+ EXPECT_EQ(modified_weight, c->get_item_weightf_in_loc(item, loc_one));
+ EXPECT_EQ(modified_weight, c->get_item_weightf_in_loc(item, loc_two));
+
+ item = 1;
+ EXPECT_EQ(1, c->adjust_item_weightf_in_loc(g_ceph_context, item, modified_weight, loc_two));
+ EXPECT_EQ(original_weight, c->get_item_weightf_in_loc(item, loc_one));
+ EXPECT_EQ(modified_weight, c->get_item_weightf_in_loc(item, loc_two));
+}
+
TEST(CrushWrapper, insert_item) {
CrushWrapper *c = new CrushWrapper;
diff --git a/src/test/crush/indep.cc b/src/test/crush/indep.cc
index 896e58f..dd0b542 100644
--- a/src/test/crush/indep.cc
+++ b/src/test/crush/indep.cc
@@ -51,18 +51,21 @@ CrushWrapper *build_indep_map(CephContext *cct, int num_rack, int num_host,
}
}
}
-
- crush_rule *rule = crush_make_rule(4, 0, 123, 1, 20);
- assert(rule);
- crush_rule_set_step(rule, 0, CRUSH_RULE_SET_CHOOSELEAF_TRIES, 10, 0);
- crush_rule_set_step(rule, 1, CRUSH_RULE_TAKE, rootno, 0);
- crush_rule_set_step(rule, 2,
- CRUSH_RULE_CHOOSELEAF_INDEP,
- CRUSH_CHOOSE_N,
- 1);
- crush_rule_set_step(rule, 3, CRUSH_RULE_EMIT, 0, 0);
- int rno = crush_add_rule(c->crush, rule, -1);
- c->set_rule_name(rno, "data");
+ int ret;
+ int ruleno = 0;
+ int ruleset = 0;
+ ruleno = ruleset;
+ ret = c->add_rule(4, ruleset, 123, 1, 20, ruleno);
+ assert(ret == ruleno);
+ ret = c->set_rule_step(ruleno, 0, CRUSH_RULE_SET_CHOOSELEAF_TRIES, 10, 0);
+ assert(ret == 0);
+ ret = c->set_rule_step(ruleno, 1, CRUSH_RULE_TAKE, rootno, 0);
+ assert(ret == 0);
+ ret = c->set_rule_step(ruleno, 2, CRUSH_RULE_CHOOSELEAF_INDEP, CRUSH_CHOOSE_N, 1);
+ assert(ret == 0);
+ ret = c->set_rule_step(ruleno, 3, CRUSH_RULE_EMIT, 0, 0);
+ assert(ret == 0);
+ c->set_rule_name(ruleno, "data");
if (false) {
Formatter *f = new_formatter("json-pretty");
@@ -140,7 +143,7 @@ TEST(CRUSH, indep_out_alt) {
c->dump_tree(weight, &cout, NULL);
// need more retries to get 9/9 hosts for x in 0..99
- c->crush->choose_total_tries = 100;
+ c->set_choose_total_tries(100);
for (int x = 0; x < 100; ++x) {
vector<int> out;
c->do_rule(0, x, out, 9, weight);
@@ -166,7 +169,7 @@ TEST(CRUSH, indep_out_contig) {
weight[i] = 0;
c->dump_tree(weight, &cout, NULL);
- c->crush->choose_total_tries = 100;
+ c->set_choose_total_tries(100);
for (int x = 0; x < 100; ++x) {
vector<int> out;
c->do_rule(0, x, out, 7, weight);
@@ -185,7 +188,7 @@ TEST(CRUSH, indep_out_contig) {
TEST(CRUSH, indep_out_progressive) {
CrushWrapper *c = build_indep_map(g_ceph_context, 3, 3, 3);
- c->crush->choose_total_tries = 100;
+ c->set_choose_total_tries(100);
vector<__u32> tweight(c->get_max_devices(), 0x10000);
c->dump_tree(tweight, &cout, NULL);
diff --git a/src/test/erasure-code/Makefile.am b/src/test/erasure-code/Makefile.am
index fdbe003..c91eef9 100644
--- a/src/test/erasure-code/Makefile.am
+++ b/src/test/erasure-code/Makefile.am
@@ -9,6 +9,14 @@ ceph_erasure_code_benchmark_LDADD += -ldl
endif
bin_DEBUGPROGRAMS += ceph_erasure_code_benchmark
+ceph_erasure_code_non_regression_SOURCES = \
+ test/erasure-code/ceph_erasure_code_non_regression.cc
+ceph_erasure_code_non_regression_LDADD = $(LIBOSD) $(LIBCOMMON) $(BOOST_PROGRAM_OPTIONS_LIBS) $(CEPH_GLOBAL)
+if LINUX
+ceph_erasure_code_non_regression_LDADD += -ldl
+endif
+noinst_PROGRAMS += ceph_erasure_code_non_regression
+
ceph_erasure_code_SOURCES = \
test/erasure-code/ceph_erasure_code.cc
ceph_erasure_code_LDADD = $(LIBOSD) $(LIBCOMMON) $(BOOST_PROGRAM_OPTIONS_LIBS) $(CEPH_GLOBAL)
diff --git a/src/test/erasure-code/TestErasureCodeJerasure.cc b/src/test/erasure-code/TestErasureCodeJerasure.cc
index 5c637da..4b768a8 100644
--- a/src/test/erasure-code/TestErasureCodeJerasure.cc
+++ b/src/test/erasure-code/TestErasureCodeJerasure.cc
@@ -288,36 +288,6 @@ TEST(ErasureCodeTest, create_ruleset)
}
}
- //
- // The ruleid may be different from the ruleset when a crush rule is
- // removed because the removed ruleid will be reused but the removed
- // ruleset will not be reused.
- //
- // This also asserts that the create_ruleset() method returns a
- // ruleset and not a ruleid http://tracker.ceph.com/issues/9044
- //
- {
- stringstream ss;
- ErasureCodeJerasureReedSolomonVandermonde jerasure;
- map<std::string,std::string> parameters;
- parameters["k"] = "2";
- parameters["m"] = "2";
- parameters["w"] = "8";
- jerasure.init(parameters);
- int FIRST = jerasure.create_ruleset("FIRST", *c, &ss);
- int SECOND = jerasure.create_ruleset("SECOND", *c, &ss);
- int FIRST_ruleid = c->get_rule_id("FIRST");
- EXPECT_EQ(0, c->remove_rule(FIRST_ruleid));
- int ruleset = jerasure.create_ruleset("myrule", *c, &ss);
- EXPECT_NE(FIRST, ruleset);
- EXPECT_NE(SECOND, ruleset);
- EXPECT_NE(ruleset, c->get_rule_id("myrule"));
- int SECOND_ruleid = c->get_rule_id("SECOND");
- EXPECT_EQ(0, c->remove_rule(SECOND_ruleid));
- int myrule_ruleid = c->get_rule_id("myrule");
- EXPECT_EQ(0, c->remove_rule(myrule_ruleid));
- }
-
{
stringstream ss;
ErasureCodeJerasureReedSolomonVandermonde jerasure;
diff --git a/src/test/erasure-code/ceph_erasure_code_non_regression.cc b/src/test/erasure-code/ceph_erasure_code_non_regression.cc
new file mode 100644
index 0000000..c04accf
--- /dev/null
+++ b/src/test/erasure-code/ceph_erasure_code_non_regression.cc
@@ -0,0 +1,325 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+/*
+ * Ceph distributed storage system
+ *
+ * Red Hat (C) 2014 Red Hat <contact at redhat.com>
+ *
+ * Author: Loic Dachary <loic at dachary.org>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <errno.h>
+#include <boost/scoped_ptr.hpp>
+#include <boost/lexical_cast.hpp>
+#include <boost/program_options/option.hpp>
+#include <boost/program_options/options_description.hpp>
+#include <boost/program_options/variables_map.hpp>
+#include <boost/program_options/cmdline.hpp>
+#include <boost/program_options/parsers.hpp>
+#include <boost/algorithm/string.hpp>
+
+#include "global/global_context.h"
+#include "global/global_init.h"
+#include "common/errno.h"
+#include "common/ceph_argparse.h"
+#include "common/config.h"
+#include "erasure-code/ErasureCodePlugin.h"
+
+namespace po = boost::program_options;
+using namespace std;
+
+class ErasureCodeNonRegression {
+ unsigned stripe_width;
+ string plugin;
+ bool create;
+ bool check;
+ string base;
+ string directory;
+ map<string,string> parameters;
+public:
+ int setup(int argc, char** argv);
+ int run();
+ int run_create();
+ int run_check();
+ int decode_erasures(ErasureCodeInterfaceRef erasure_code,
+ set<int> erasures,
+ map<int,bufferlist> chunks);
+ string content_path();
+ string chunk_path(unsigned int chunk);
+};
+
+int ErasureCodeNonRegression::setup(int argc, char** argv) {
+
+ po::options_description desc("Allowed options");
+ desc.add_options()
+ ("help,h", "produce help message")
+ ("stripe-width,s", po::value<int>()->default_value(4 * 1024),
+ "stripe_width, i.e. the size of the buffer to be encoded")
+ ("plugin,p", po::value<string>()->default_value("jerasure"),
+ "erasure code plugin name")
+ ("base", po::value<string>()->default_value("."),
+ "prefix all paths with base")
+ ("parameter,P", po::value<vector<string> >(),
+ "parameters")
+ ("create", "create the erasure coded content in the directory")
+ ("check", "check the content in the directory matches the chunks and vice versa")
+ ;
+
+ po::variables_map vm;
+ po::parsed_options parsed =
+ po::command_line_parser(argc, argv).options(desc).allow_unregistered().run();
+ po::store(
+ parsed,
+ vm);
+ po::notify(vm);
+
+ vector<const char *> ceph_options, def_args;
+ vector<string> ceph_option_strings = po::collect_unrecognized(
+ parsed.options, po::include_positional);
+ ceph_options.reserve(ceph_option_strings.size());
+ for (vector<string>::iterator i = ceph_option_strings.begin();
+ i != ceph_option_strings.end();
+ ++i) {
+ ceph_options.push_back(i->c_str());
+ }
+
+ global_init(
+ &def_args, ceph_options, CEPH_ENTITY_TYPE_CLIENT,
+ CODE_ENVIRONMENT_UTILITY,
+ CINIT_FLAG_NO_DEFAULT_CONFIG_FILE);
+ common_init_finish(g_ceph_context);
+ g_ceph_context->_conf->apply_changes(NULL);
+
+ if (vm.count("help")) {
+ cout << desc << std::endl;
+ return 1;
+ }
+
+ stripe_width = vm["stripe-width"].as<int>();
+ plugin = vm["plugin"].as<string>();
+ base = vm["base"].as<string>();
+ check = vm.count("check") > 0;
+ create = vm.count("create") > 0;
+
+ if (!check && !create) {
+ cerr << "must specifify either --check or --create" << endl;
+ return 1;
+ }
+
+ {
+ stringstream path;
+ path << base << "/" << "plugin=" << plugin << " stipe-width=" << stripe_width;
+ directory = path.str();
+ }
+
+ if (vm.count("parameter")) {
+ const vector<string> &p = vm["parameter"].as< vector<string> >();
+ for (vector<string>::const_iterator i = p.begin();
+ i != p.end();
+ ++i) {
+ std::vector<std::string> strs;
+ boost::split(strs, *i, boost::is_any_of("="));
+ if (strs.size() != 2) {
+ cerr << "--parameter " << *i << " ignored because it does not contain exactly one =" << endl;
+ } else {
+ parameters[strs[0]] = strs[1];
+ }
+ if (strs[0] != "directory")
+ directory += " " + *i;
+ }
+ }
+ if (parameters.count("directory") == 0)
+ parameters["directory"] = ".libs";
+
+ return 0;
+}
+
+int ErasureCodeNonRegression::run()
+ {
+ int ret = 0;
+ if(create && (ret = run_create()))
+ return ret;
+ if(check && (ret = run_check()))
+ return ret;
+ return ret;
+}
+
+int ErasureCodeNonRegression::run_create()
+{
+ ErasureCodePluginRegistry &instance = ErasureCodePluginRegistry::instance();
+ ErasureCodeInterfaceRef erasure_code;
+ stringstream messages;
+ int code = instance.factory(plugin, parameters, &erasure_code, messages);
+ if (code) {
+ cerr << messages.str() << endl;
+ return code;
+ }
+
+ if (::mkdir(directory.c_str(), 0755)) {
+ cerr << "mkdir(" << directory << "): " << cpp_strerror(errno) << endl;
+ return 1;
+ }
+ unsigned payload_chunk_size = 37;
+ string payload;
+ for (unsigned j = 0; j < payload_chunk_size; ++j)
+ payload.push_back('a' + (rand() % 26));
+ bufferlist in;
+ for (unsigned j = 0; j < stripe_width; j += payload_chunk_size)
+ in.append(payload);
+ if (stripe_width < in.length())
+ in.splice(stripe_width, in.length() - stripe_width);
+ if (in.write_file(content_path().c_str()))
+ return 1;
+ set<int> want_to_encode;
+ for (unsigned int i = 0; i < erasure_code->get_chunk_count(); i++) {
+ want_to_encode.insert(i);
+ }
+ map<int,bufferlist> encoded;
+ code = erasure_code->encode(want_to_encode, in, &encoded);
+ if (code)
+ return code;
+ for (map<int,bufferlist>::iterator chunk = encoded.begin();
+ chunk != encoded.end();
+ chunk++) {
+ if (chunk->second.write_file(chunk_path(chunk->first).c_str()))
+ return 1;
+ }
+ return 0;
+}
+
+int ErasureCodeNonRegression::decode_erasures(ErasureCodeInterfaceRef erasure_code,
+ set<int> erasures,
+ map<int,bufferlist> chunks)
+{
+ map<int,bufferlist> available;
+ for (map<int,bufferlist>::iterator chunk = chunks.begin();
+ chunk != chunks.end();
+ ++chunk) {
+ if (erasures.count(chunk->first) == 0)
+ available[chunk->first] = chunk->second;
+
+ }
+ map<int,bufferlist> decoded;
+ int code = erasure_code->decode(erasures, available, &decoded);
+ if (code)
+ return code;
+ for (set<int>::iterator erasure = erasures.begin();
+ erasure != erasures.end();
+ ++erasure) {
+ if (!chunks[*erasure].contents_equal(decoded[*erasure])) {
+ cerr << "chunk " << *erasure << " incorrectly recovered" << endl;
+ return 1;
+ }
+ }
+ return 0;
+}
+
+int ErasureCodeNonRegression::run_check()
+{
+ ErasureCodePluginRegistry &instance = ErasureCodePluginRegistry::instance();
+ ErasureCodeInterfaceRef erasure_code;
+ stringstream messages;
+ int code = instance.factory(plugin, parameters, &erasure_code, messages);
+ if (code) {
+ cerr << messages.str() << endl;
+ return code;
+ }
+ string errors;
+ bufferlist in;
+ if (in.read_file(content_path().c_str(), &errors)) {
+ cerr << errors << endl;
+ return 1;
+ }
+ set<int> want_to_encode;
+ for (unsigned int i = 0; i < erasure_code->get_chunk_count(); i++) {
+ want_to_encode.insert(i);
+ }
+
+ map<int,bufferlist> encoded;
+ code = erasure_code->encode(want_to_encode, in, &encoded);
+ if (code)
+ return code;
+
+ for (map<int,bufferlist>::iterator chunk = encoded.begin();
+ chunk != encoded.end();
+ chunk++) {
+ bufferlist existing;
+ if (existing.read_file(chunk_path(chunk->first).c_str(), &errors)) {
+ cerr << errors << endl;
+ return 1;
+ }
+ bufferlist &old = chunk->second;
+ if (existing.length() != old.length() ||
+ memcmp(existing.c_str(), old.c_str(), old.length())) {
+ cerr << "chunk " << chunk->first << " encodes differently" << endl;
+ return 1;
+ }
+ }
+
+ // erasing a single chunk is likely to use a specific code path in every plugin
+ set<int> erasures;
+ erasures.clear();
+ erasures.insert(0);
+ code = decode_erasures(erasure_code, erasures, encoded);
+ if (code)
+ return code;
+
+ if (erasure_code->get_chunk_count() - erasure_code->get_data_chunk_count() > 1) {
+ // erasing two chunks is likely to be the general case
+ erasures.clear();
+ erasures.insert(0);
+ erasures.insert(erasure_code->get_chunk_count() - 1);
+ code = decode_erasures(erasure_code, erasures, encoded);
+ if (code)
+ return code;
+ }
+
+ return 0;
+}
+
+string ErasureCodeNonRegression::content_path()
+{
+ stringstream path;
+ path << directory << "/content";
+ return path.str();
+}
+
+string ErasureCodeNonRegression::chunk_path(unsigned int chunk)
+{
+ stringstream path;
+ path << directory << "/" << chunk;
+ return path.str();
+}
+
+int main(int argc, char** argv) {
+ ErasureCodeNonRegression non_regression;
+ int err = non_regression.setup(argc, argv);
+ if (err)
+ return err;
+ return non_regression.run();
+}
+
+/*
+ * Local Variables:
+ * compile-command: "cd ../.. ; make -j4 &&
+ * make ceph_erasure_code_non_regression &&
+ * libtool --mode=execute valgrind --tool=memcheck --leak-check=full \
+ * ./ceph_erasure_code_non_regression \
+ * --plugin jerasure \
+ * --parameter directory=.libs \
+ * --parameter technique=reed_sol_van \
+ * --parameter k=2 \
+ * --parameter m=2 \
+ * --directory /tmp/ceph_erasure_code_non_regression \
+ * --stripe-width 3181 \
+ * --create \
+ * --check
+ * "
+ * End:
+ */
diff --git a/src/test/libcephfs/test.cc b/src/test/libcephfs/test.cc
index 9d917f5..6baadae 100644
--- a/src/test/libcephfs/test.cc
+++ b/src/test/libcephfs/test.cc
@@ -1138,6 +1138,7 @@ TEST(LibCephFS, GetOsdCrushLocation) {
}
}
+ ceph_close(cmount, fd);
ceph_shutdown(cmount);
}
diff --git a/src/test/librados/misc.cc b/src/test/librados/misc.cc
index ea990b5..38443ce 100644
--- a/src/test/librados/misc.cc
+++ b/src/test/librados/misc.cc
@@ -299,7 +299,8 @@ TEST_F(LibRadosMisc, Exec) {
bufferlist::iterator iter = bl.begin();
uint64_t all_features;
::decode(all_features, iter);
- ASSERT_EQ(all_features, (uint64_t)RBD_FEATURES_ALL);
+ // make sure *some* features are specified; don't care which ones
+ ASSERT_NE(all_features, 0);
}
TEST_F(LibRadosMiscPP, ExecPP) {
@@ -311,7 +312,8 @@ TEST_F(LibRadosMiscPP, ExecPP) {
bufferlist::iterator iter = out.begin();
uint64_t all_features;
::decode(all_features, iter);
- ASSERT_EQ(all_features, (uint64_t)RBD_FEATURES_ALL);
+ // make sure *some* features are specified; don't care which ones
+ ASSERT_NE(all_features, 0);
}
TEST_F(LibRadosMiscPP, Operate1PP) {
diff --git a/src/test/librados/snapshots.cc b/src/test/librados/snapshots.cc
index 020af11..01ab62e 100644
--- a/src/test/librados/snapshots.cc
+++ b/src/test/librados/snapshots.cc
@@ -145,6 +145,24 @@ TEST_F(LibRadosSnapshotsPP, SnapGetNamePP) {
EXPECT_EQ(0, ioctx.snap_remove("snapfoo"));
}
+TEST_F(LibRadosSnapshotsPP, SnapCreateRemovePP) {
+ // reproduces http://tracker.ceph.com/issues/10262
+ bufferlist bl;
+ bl.append("foo");
+ ASSERT_EQ(0, ioctx.write("foo", bl, bl.length(), 0));
+ ASSERT_EQ(0, ioctx.snap_create("snapfoo"));
+ ASSERT_EQ(0, ioctx.remove("foo"));
+ ASSERT_EQ(0, ioctx.snap_create("snapbar"));
+
+ librados::ObjectWriteOperation *op = new librados::ObjectWriteOperation();
+ op->create(false);
+ op->remove();
+ ASSERT_EQ(0, ioctx.operate("foo", op));
+
+ EXPECT_EQ(0, ioctx.snap_remove("snapfoo"));
+ EXPECT_EQ(0, ioctx.snap_remove("snapbar"));
+}
+
TEST_F(LibRadosSnapshotsSelfManaged, Snap) {
std::vector<uint64_t> my_snaps;
my_snaps.push_back(-2);
diff --git a/src/test/librados/tier.cc b/src/test/librados/tier.cc
index 4267389..a89d68b 100644
--- a/src/test/librados/tier.cc
+++ b/src/test/librados/tier.cc
@@ -966,6 +966,82 @@ TEST_F(LibRadosTwoPoolsPP, EvictSnap) {
}
}
+// this test case reproduces http://tracker.ceph.com/issues/8629
+TEST_F(LibRadosTwoPoolsPP, EvictSnap2) {
+ // create object
+ {
+ bufferlist bl;
+ bl.append("hi there");
+ ObjectWriteOperation op;
+ op.write_full(bl);
+ ASSERT_EQ(0, ioctx.operate("foo", &op));
+ }
+ // create a snapshot, clone
+ vector<uint64_t> my_snaps(1);
+ ASSERT_EQ(0, ioctx.selfmanaged_snap_create(&my_snaps[0]));
+ ASSERT_EQ(0, ioctx.selfmanaged_snap_set_write_ctx(my_snaps[0],
+ my_snaps));
+ {
+ bufferlist bl;
+ bl.append("ciao!");
+ ObjectWriteOperation op;
+ op.write_full(bl);
+ ASSERT_EQ(0, ioctx.operate("foo", &op));
+ }
+ // configure cache
+ bufferlist inbl;
+ ASSERT_EQ(0, cluster.mon_command(
+ "{\"prefix\": \"osd tier add\", \"pool\": \"" + pool_name +
+ "\", \"tierpool\": \"" + cache_pool_name +
+ "\", \"force_nonempty\": \"--force-nonempty\" }",
+ inbl, NULL, NULL));
+ ASSERT_EQ(0, cluster.mon_command(
+ "{\"prefix\": \"osd tier set-overlay\", \"pool\": \"" + pool_name +
+ "\", \"overlaypool\": \"" + cache_pool_name + "\"}",
+ inbl, NULL, NULL));
+ ASSERT_EQ(0, cluster.mon_command(
+ "{\"prefix\": \"osd tier cache-mode\", \"pool\": \"" + cache_pool_name +
+ "\", \"mode\": \"writeback\"}",
+ inbl, NULL, NULL));
+
+ // wait for maps to settle
+ cluster.wait_for_latest_osdmap();
+
+ // read, trigger a promote on the head
+ {
+ bufferlist bl;
+ ASSERT_EQ(1, ioctx.read("foo", bl, 1, 0));
+ ASSERT_EQ('c', bl[0]);
+ }
+
+ // evict
+ {
+ ObjectReadOperation op;
+ op.cache_evict();
+ librados::AioCompletion *completion = cluster.aio_create_completion();
+ ASSERT_EQ(0, cache_ioctx.aio_operate(
+ "foo", completion, &op,
+ librados::OPERATION_IGNORE_CACHE, NULL));
+ completion->wait_for_safe();
+ ASSERT_EQ(0, completion->get_return_value());
+ completion->release();
+ }
+
+ // verify the snapdir is not present in the cache pool
+ {
+ ObjectReadOperation op;
+ librados::snap_set_t snapset;
+ op.list_snaps(&snapset, NULL);
+ ioctx.snap_set_read(librados::SNAP_DIR);
+ librados::AioCompletion *completion = cluster.aio_create_completion();
+ ASSERT_EQ(0, ioctx.aio_operate("foo", completion, &op,
+ librados::OPERATION_IGNORE_CACHE, NULL));
+ completion->wait_for_safe();
+ ASSERT_EQ(-ENOENT, completion->get_return_value());
+ completion->release();
+ }
+}
+
TEST_F(LibRadosTwoPoolsPP, TryFlush) {
// configure cache
bufferlist inbl;
diff --git a/src/test/librbd/test_librbd.cc b/src/test/librbd/test_librbd.cc
index 7f35418..c37d884 100644
--- a/src/test/librbd/test_librbd.cc
+++ b/src/test/librbd/test_librbd.cc
@@ -21,6 +21,7 @@
#include "global/global_context.h"
#include "global/global_init.h"
#include "common/ceph_argparse.h"
+#include "common/config.h"
#include "gtest/gtest.h"
@@ -40,6 +41,8 @@
#include "include/interval_set.h"
#include "include/stringify.h"
+#include <boost/scope_exit.hpp>
+
using namespace std;
static int get_features(bool *old_format, uint64_t *features)
@@ -67,6 +70,8 @@ static int create_image_full(rados_ioctx_t ioctx, const char *name,
{
if (old_format) {
return rbd_create(ioctx, name, size, order);
+ } else if ((features & RBD_FEATURE_STRIPINGV2) != 0) {
+ return rbd_create3(ioctx, name, size, features, order, 65536, 16);
} else {
return rbd_create2(ioctx, name, size, features, order);
}
@@ -1859,6 +1864,107 @@ TEST(LibRBD, ZeroLengthRead)
ASSERT_EQ(0, destroy_one_pool(pool_name, &cluster));
}
+TEST(LibRBD, LargeCacheRead)
+{
+ if (!g_conf->rbd_cache) {
+ std::cout << "SKIPPING due to disabled cache" << std::endl;
+ return;
+ }
+
+ rados_t cluster;
+ rados_ioctx_t ioctx;
+ string pool_name = get_temp_pool_name();
+ ASSERT_EQ("", create_one_pool(pool_name, &cluster));
+ rados_ioctx_create(cluster, pool_name.c_str(), &ioctx);
+
+ uint64_t orig_cache_size = g_conf->rbd_cache_size;
+ g_conf->set_val("rbd_cache_size", "16777216");
+ BOOST_SCOPE_EXIT( (orig_cache_size) ) {
+ g_conf->set_val("rbd_cache_size", stringify(orig_cache_size).c_str());
+ } BOOST_SCOPE_EXIT_END;
+ ASSERT_EQ(16777216, g_conf->rbd_cache_size);
+
+ rbd_image_t image;
+ int order = 0;
+ const char *name = "testimg";
+ uint64_t size = g_conf->rbd_cache_size + 1;
+
+ ASSERT_EQ(0, create_image(ioctx, name, size, &order));
+ ASSERT_EQ(0, rbd_open(ioctx, name, &image, NULL));
+
+ std::string buffer(1 << order, '1');
+ for (size_t offs = 0; offs < size; offs += buffer.size()) {
+ size_t len = std::min<uint64_t>(buffer.size(), size - offs);
+ ASSERT_EQ(static_cast<ssize_t>(len),
+ rbd_write(image, offs, len, buffer.c_str()));
+ }
+
+ ASSERT_EQ(0, rbd_invalidate_cache(image));
+
+ buffer.resize(size);
+ ASSERT_EQ(static_cast<ssize_t>(size-1024), rbd_read(image, 1024, size, &buffer[0]));
+
+ ASSERT_EQ(0, rbd_close(image));
+
+ rados_ioctx_destroy(ioctx);
+ ASSERT_EQ(0, destroy_one_pool(pool_name, &cluster));
+}
+
+TEST(LibRBD, TestPendingAio)
+{
+ rados_t cluster;
+ rados_ioctx_t ioctx;
+ string pool_name = get_temp_pool_name();
+ ASSERT_EQ("", create_one_pool(pool_name, &cluster));
+ rados_ioctx_create(cluster, pool_name.c_str(), &ioctx);
+
+ int features = RBD_FEATURE_LAYERING;
+ rbd_image_t image;
+ int order = 0;
+
+ std::string name = "testimg";
+
+ uint64_t size = 4 << 20;
+ ASSERT_EQ(0, create_image_full(ioctx, name.c_str(), size, &order,
+ false, features));
+ ASSERT_EQ(0, rbd_open(ioctx, name.c_str(), &image, NULL));
+
+ char test_data[TEST_IO_SIZE];
+ for (size_t i = 0; i < TEST_IO_SIZE; ++i) {
+ test_data[i] = (char) (rand() % (126 - 33) + 33);
+ }
+
+ size_t num_aios = 256;
+ rbd_completion_t comps[num_aios];
+ for (size_t i = 0; i < num_aios; ++i) {
+ ASSERT_EQ(0, rbd_aio_create_completion(NULL, NULL, &comps[i]));
+ uint64_t offset = rand() % (size - TEST_IO_SIZE);
+ ASSERT_EQ(0, rbd_aio_write(image, offset, TEST_IO_SIZE, test_data,
+ comps[i]));
+ }
+ for (size_t i = 0; i < num_aios; ++i) {
+ ASSERT_EQ(0, rbd_aio_wait_for_complete(comps[i]));
+ rbd_aio_release(comps[i]);
+ }
+ ASSERT_EQ(0, rbd_invalidate_cache(image));
+
+ for (size_t i = 0; i < num_aios; ++i) {
+ ASSERT_EQ(0, rbd_aio_create_completion(NULL, NULL, &comps[i]));
+ uint64_t offset = rand() % (size - TEST_IO_SIZE);
+ ASSERT_LE(0, rbd_aio_read(image, offset, TEST_IO_SIZE, test_data,
+ comps[i]));
+ }
+
+ ASSERT_EQ(0, rbd_close(image));
+ for (size_t i = 0; i < num_aios; ++i) {
+ ASSERT_EQ(1, rbd_aio_is_complete(comps[i]));
+ rbd_aio_release(comps[i]);
+ }
+
+ rados_ioctx_destroy(ioctx);
+ ASSERT_EQ(0, destroy_one_pool(pool_name, &cluster));
+}
+
int main(int argc, char **argv)
{
::testing::InitGoogleTest(&argc, argv);
diff --git a/src/test/mon/mon-test-helpers.sh b/src/test/mon/mon-test-helpers.sh
index d228569..052b1ca 100644
--- a/src/test/mon/mon-test-helpers.sh
+++ b/src/test/mon/mon-test-helpers.sh
@@ -59,8 +59,9 @@ function run_mon() {
function kill_daemons() {
local dir=$1
for pidfile in $(find $dir | grep pidfile) ; do
+ pid=$(cat $pidfile)
for try in 0 1 1 1 2 3 ; do
- kill -9 $(cat $pidfile 2> /dev/null) 2> /dev/null || break
+ kill -9 $pid 2> /dev/null || break
sleep $try
done
done
diff --git a/src/tools/crushtool.cc b/src/tools/crushtool.cc
index 8dcd79c..863bd94 100644
--- a/src/tools/crushtool.cc
+++ b/src/tools/crushtool.cc
@@ -118,6 +118,7 @@ void usage()
cout << " --show utilization-all\n";
cout << " include zero weight items\n";
cout << " --show-statistics show chi squared statistics\n";
+ cout << " --show-mappings show mappings\n";
cout << " --show-bad-mappings show bad mappings\n";
cout << " --show-choose-tries show choose tries histogram\n";
cout << " --set-choose-local-tries N\n";
@@ -190,6 +191,7 @@ int main(int argc, const char **argv)
int choose_total_tries = -1;
int chooseleaf_descend_once = -1;
int chooseleaf_vary_r = -1;
+ int straw_calc_version = -1;
CrushWrapper crush;
@@ -233,6 +235,9 @@ int main(int argc, const char **argv)
} else if (ceph_argparse_flag(args, i, "--show_statistics", (char*)NULL)) {
display = true;
tester.set_output_statistics(true);
+ } else if (ceph_argparse_flag(args, i, "--show_mappings", (char*)NULL)) {
+ display = true;
+ tester.set_output_mappings(true);
} else if (ceph_argparse_flag(args, i, "--show_bad_mappings", (char*)NULL)) {
display = true;
tester.set_output_bad_mappings(true);
@@ -263,6 +268,9 @@ int main(int argc, const char **argv)
} else if (ceph_argparse_withint(args, i, &chooseleaf_vary_r, &err,
"--set_chooseleaf_vary_r", (char*)NULL)) {
adjust = true;
+ } else if (ceph_argparse_withint(args, i, &straw_calc_version, &err,
+ "--set_straw_calc_version", (char*)NULL)) {
+ adjust = true;
} else if (ceph_argparse_flag(args, i, "--reweight", (char*)NULL)) {
reweight = true;
} else if (ceph_argparse_withint(args, i, &add_item, &err, "--add_item", (char*)NULL)) {
@@ -581,10 +589,8 @@ int main(int argc, const char **argv)
dout(2) << " item " << items[j] << " weight " << weights[j] << dendl;
}
- crush_bucket *b = crush_make_bucket(buckettype, CRUSH_HASH_DEFAULT, type, j, items, weights);
- assert(b);
int id;
- int r = crush_add_bucket(crush.crush, 0, b, &id);
+ int r = crush.add_bucket(0, buckettype, CRUSH_HASH_DEFAULT, type, j, items, weights, &id);
if (r < 0) {
dout(2) << "Couldn't add bucket: " << cpp_strerror(r) << dendl;
}
@@ -712,6 +718,10 @@ int main(int argc, const char **argv)
crush.set_chooseleaf_vary_r(chooseleaf_vary_r);
modified = true;
}
+ if (straw_calc_version >= 0) {
+ crush.set_straw_calc_version(straw_calc_version);
+ modified = true;
+ }
if (modified) {
crush.finalize();
diff --git a/src/tools/osdmaptool.cc b/src/tools/osdmaptool.cc
index 0db39da..bfeae4f 100644
--- a/src/tools/osdmaptool.cc
+++ b/src/tools/osdmaptool.cc
@@ -35,6 +35,7 @@ void usage()
cout << " --export-crush <file> write osdmap's crush map to <file>" << std::endl;
cout << " --import-crush <file> replace osdmap's crush map with <file>" << std::endl;
cout << " --test-map-pgs [--pool <poolid>] map all pgs" << std::endl;
+ cout << " --test-map-pgs-dump [--pool <poolid>] map all pgs" << std::endl;
cout << " --mark-up-in mark osds up and in (but do not persist)" << std::endl;
cout << " --clear-temp clear pg_temp and primary_temp" << std::endl;
cout << " --test-random do random placements" << std::endl;
@@ -75,6 +76,7 @@ int main(int argc, const char **argv)
bool mark_up_in = false;
bool clear_temp = false;
bool test_map_pgs = false;
+ bool test_map_pgs_dump = false;
bool test_random = false;
std::string val;
@@ -104,6 +106,8 @@ int main(int argc, const char **argv)
clear_temp = true;
} else if (ceph_argparse_flag(args, i, "--test-map-pgs", (char*)NULL)) {
test_map_pgs = true;
+ } else if (ceph_argparse_flag(args, i, "--test-map-pgs-dump", (char*)NULL)) {
+ test_map_pgs_dump = true;
} else if (ceph_argparse_flag(args, i, "--test-random", (char*)NULL)) {
test_random = true;
} else if (ceph_argparse_flag(args, i, "--clobber", (char*)NULL)) {
@@ -313,7 +317,7 @@ int main(int argc, const char **argv)
<< ") acting (" << acting << ", p" << acting_primary << ")"
<< std::endl;
}
- if (test_map_pgs) {
+ if (test_map_pgs || test_map_pgs_dump) {
if (pool != -1 && !osdmap.have_pg_pool(pool)) {
cerr << "There is no pool " << pool << std::endl;
exit(1);
@@ -348,6 +352,9 @@ int main(int argc, const char **argv)
}
size[osds.size()]++;
+ if (test_map_pgs_dump)
+ cout << pgid << "\t" << osds << "\t" << primary << std::endl;
+
for (unsigned i=0; i<osds.size(); i++) {
//cout << " rep " << i << " on " << osds[i] << std::endl;
count[osds[i]]++;
@@ -452,7 +459,7 @@ int main(int argc, const char **argv)
if (!print && !print_json && !tree && !modified &&
export_crush.empty() && import_crush.empty() &&
test_map_pg.empty() && test_map_object.empty() &&
- !test_map_pgs) {
+ !test_map_pgs && !test_map_pgs_dump) {
cerr << me << ": no action specified?" << std::endl;
usage();
}
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/pkg-ceph/ceph.git
More information about the Pkg-ceph-commits
mailing list