[Pkg-ceph-commits] [ceph] 01/01: Imported Upstream version 0.80.1
Dmitry Smirnov
onlyjob at moszumanska.debian.org
Tue May 13 03:34:40 UTC 2014
This is an automated email from the git hooks/post-receive script.
onlyjob pushed a commit to branch upstream
in repository ceph.
commit 42624ce (upstream)
Author: Dmitry Smirnov <onlyjob at member.fsf.org>
Date: Tue May 13 03:08:55 2014
Imported Upstream version 0.80.1
---
ceph.spec | 2 +-
configure | 22 +++----
configure.ac | 2 +-
src/.git_version | 4 +-
src/client/Client.cc | 31 +++++++++-
src/client/Client.h | 1 +
src/client/MetaSession.cc | 1 +
src/client/MetaSession.h | 1 +
src/common/config_opts.h | 2 +
src/mon/MonClient.cc | 4 --
src/os/KeyValueStore.cc | 5 +-
src/osd/OSD.cc | 30 +++++++++-
src/osd/OSD.h | 3 +
src/osd/PG.cc | 17 ++----
src/osd/PG.h | 11 +---
src/osd/ReplicatedPG.cc | 144 +++++++++++++++++++++++++++++++---------------
src/osd/ReplicatedPG.h | 8 +--
src/osd/TierAgentState.h | 13 ++++-
src/osd/osd_types.h | 3 +-
src/rgw/rgw_rados.cc | 14 ++++-
src/rgw/rgw_rest.cc | 1 +
21 files changed, 219 insertions(+), 100 deletions(-)
diff --git a/ceph.spec b/ceph.spec
index 7105d13..fca0c34 100644
--- a/ceph.spec
+++ b/ceph.spec
@@ -9,7 +9,7 @@
# common
#################################################################################
Name: ceph
-Version: 0.80
+Version: 0.80.1
Release: 0%{?dist}
Summary: User space components of the Ceph file system
License: GPL-2.0
diff --git a/configure b/configure
index b882d94..961026d 100755
--- a/configure
+++ b/configure
@@ -1,6 +1,6 @@
#! /bin/sh
# Guess values for system-dependent variables and create Makefiles.
-# Generated by GNU Autoconf 2.68 for ceph 0.80.
+# Generated by GNU Autoconf 2.68 for ceph 0.80.1.
#
# Report bugs to <ceph-devel at vger.kernel.org>.
#
@@ -570,8 +570,8 @@ MAKEFLAGS=
# Identity of this package.
PACKAGE_NAME='ceph'
PACKAGE_TARNAME='ceph'
-PACKAGE_VERSION='0.80'
-PACKAGE_STRING='ceph 0.80'
+PACKAGE_VERSION='0.80.1'
+PACKAGE_STRING='ceph 0.80.1'
PACKAGE_BUGREPORT='ceph-devel at vger.kernel.org'
PACKAGE_URL=''
@@ -1441,7 +1441,7 @@ if test "$ac_init_help" = "long"; then
# Omit some internal or obsolete options to make the list less imposing.
# This message is too long to be a string in the A/UX 3.1 sh.
cat <<_ACEOF
-\`configure' configures ceph 0.80 to adapt to many kinds of systems.
+\`configure' configures ceph 0.80.1 to adapt to many kinds of systems.
Usage: $0 [OPTION]... [VAR=VALUE]...
@@ -1512,7 +1512,7 @@ fi
if test -n "$ac_init_help"; then
case $ac_init_help in
- short | recursive ) echo "Configuration of ceph 0.80:";;
+ short | recursive ) echo "Configuration of ceph 0.80.1:";;
esac
cat <<\_ACEOF
@@ -1657,7 +1657,7 @@ fi
test -n "$ac_init_help" && exit $ac_status
if $ac_init_version; then
cat <<\_ACEOF
-ceph configure 0.80
+ceph configure 0.80.1
generated by GNU Autoconf 2.68
Copyright (C) 2010 Free Software Foundation, Inc.
@@ -2504,7 +2504,7 @@ cat >config.log <<_ACEOF
This file contains any messages produced by compilers while
running configure, to aid debugging if configure makes a mistake.
-It was created by ceph $as_me 0.80, which was
+It was created by ceph $as_me 0.80.1, which was
generated by GNU Autoconf 2.68. Invocation command line was
$ $0 $@
@@ -4504,7 +4504,7 @@ fi
# Define the identity of the package.
PACKAGE='ceph'
- VERSION='0.80'
+ VERSION='0.80.1'
cat >>confdefs.h <<_ACEOF
@@ -12482,7 +12482,7 @@ fi
# Define the identity of the package.
PACKAGE='ceph'
- VERSION='0.80'
+ VERSION='0.80.1'
cat >>confdefs.h <<_ACEOF
@@ -22258,7 +22258,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
# report actual input values of CONFIG_FILES etc. instead of their
# values after options handling.
ac_log="
-This file was extended by ceph $as_me 0.80, which was
+This file was extended by ceph $as_me 0.80.1, which was
generated by GNU Autoconf 2.68. Invocation command line was
CONFIG_FILES = $CONFIG_FILES
@@ -22324,7 +22324,7 @@ _ACEOF
cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`"
ac_cs_version="\\
-ceph config.status 0.80
+ceph config.status 0.80.1
configured by $0, generated by GNU Autoconf 2.68,
with options \\"\$ac_cs_config\\"
diff --git a/configure.ac b/configure.ac
index f9482d6..36312cf 100644
--- a/configure.ac
+++ b/configure.ac
@@ -8,7 +8,7 @@ AC_PREREQ(2.59)
# VERSION define is not used by the code. It gets a version string
# from 'git describe'; see src/ceph_ver.[ch]
-AC_INIT([ceph], [0.80], [ceph-devel at vger.kernel.org])
+AC_INIT([ceph], [0.80.1], [ceph-devel at vger.kernel.org])
# Create release string. Used with VERSION for RPMs.
RPM_RELEASE=0
diff --git a/src/.git_version b/src/.git_version
index 8e07215..2ca1a25 100644
--- a/src/.git_version
+++ b/src/.git_version
@@ -1,2 +1,2 @@
-b78644e7dee100e48dfeca32c9270a6b210d3003
-v0.80
+a38fe1169b6d2ac98b427334c12d7cf81f809b74
+v0.80.1
diff --git a/src/client/Client.cc b/src/client/Client.cc
index 03b6438..47d1c1d 100644
--- a/src/client/Client.cc
+++ b/src/client/Client.cc
@@ -120,6 +120,8 @@ bool Client::CommandHook::call(std::string command, cmdmap_t& cmdmap,
m_client->dump_mds_sessions(f);
else if (command == "dump_cache")
m_client->dump_cache(f);
+ else if (command == "kick_stale_sessions")
+ m_client->_kick_stale_sessions();
else
assert(0 == "bad command registered");
m_client->client_lock.Unlock();
@@ -404,6 +406,14 @@ int Client::init()
lderr(cct) << "error registering admin socket command: "
<< cpp_strerror(-ret) << dendl;
}
+ ret = admin_socket->register_command("kick_stale_sessions",
+ "kick_stale_sessions",
+ &m_command_hook,
+ "kick sessions that were remote reset");
+ if (ret < 0) {
+ lderr(cct) << "error registering admin socket command: "
+ << cpp_strerror(-ret) << dendl;
+ }
client_lock.Lock();
initialized = true;
@@ -419,6 +429,7 @@ void Client::shutdown()
admin_socket->unregister_command("mds_requests");
admin_socket->unregister_command("mds_sessions");
admin_socket->unregister_command("dump_cache");
+ admin_socket->unregister_command("kick_stale_sessions");
if (ino_invalidate_cb) {
ldout(cct, 10) << "shutdown stopping cache invalidator finisher" << dendl;
@@ -1538,7 +1549,8 @@ bool Client::have_open_session(int mds)
{
return
mds_sessions.count(mds) &&
- mds_sessions[mds]->state == MetaSession::STATE_OPEN;
+ (mds_sessions[mds]->state == MetaSession::STATE_OPEN ||
+ mds_sessions[mds]->state == MetaSession::STATE_STALE);
}
MetaSession *Client::_get_mds_session(int mds, Connection *con)
@@ -1649,6 +1661,19 @@ void Client::handle_client_session(MClientSession *m)
m->put();
}
+void Client::_kick_stale_sessions()
+{
+ ldout(cct, 1) << "kick_stale_sessions" << dendl;
+
+ for (map<int,MetaSession*>::iterator p = mds_sessions.begin();
+ p != mds_sessions.end(); ) {
+ MetaSession *s = p->second;
+ ++p;
+ if (s->state == MetaSession::STATE_STALE)
+ _closed_mds_session(s);
+ }
+}
+
void Client::send_request(MetaRequest *request, MetaSession *session)
{
// make the request
@@ -8960,6 +8985,10 @@ void Client::ms_handle_remote_reset(Connection *con)
break;
case MetaSession::STATE_OPEN:
+ ldout(cct, 1) << "reset from mds we were open; mark session as stale" << dendl;
+ s->state = MetaSession::STATE_STALE;
+ break;
+
case MetaSession::STATE_NEW:
case MetaSession::STATE_CLOSED:
default:
diff --git a/src/client/Client.h b/src/client/Client.h
index 4a3d753..e31e90a 100644
--- a/src/client/Client.h
+++ b/src/client/Client.h
@@ -250,6 +250,7 @@ public:
MetaSession *_open_mds_session(int mds);
void _close_mds_session(MetaSession *s);
void _closed_mds_session(MetaSession *s);
+ void _kick_stale_sessions();
void handle_client_session(MClientSession *m);
void send_reconnect(MetaSession *s);
void resend_unsafe_requests(MetaSession *s);
diff --git a/src/client/MetaSession.cc b/src/client/MetaSession.cc
index ee38db3..9f2a136 100644
--- a/src/client/MetaSession.cc
+++ b/src/client/MetaSession.cc
@@ -15,6 +15,7 @@ const char *MetaSession::get_state_name() const
case STATE_OPEN: return "open";
case STATE_CLOSING: return "closing";
case STATE_CLOSED: return "closed";
+ case STATE_STALE: return "stale";
default: return "unknown";
}
}
diff --git a/src/client/MetaSession.h b/src/client/MetaSession.h
index d9fcbb8..5525f07 100644
--- a/src/client/MetaSession.h
+++ b/src/client/MetaSession.h
@@ -33,6 +33,7 @@ struct MetaSession {
STATE_OPEN,
STATE_CLOSING,
STATE_CLOSED,
+ STATE_STALE,
} state;
list<Context*> waiting_for_open;
diff --git a/src/common/config_opts.h b/src/common/config_opts.h
index a065a77..2c65e6c 100644
--- a/src/common/config_opts.h
+++ b/src/common/config_opts.h
@@ -399,6 +399,7 @@ OPTION(osd_backfill_retry_interval, OPT_DOUBLE, 10.0)
OPTION(osd_agent_max_ops, OPT_INT, 4)
OPTION(osd_agent_min_evict_effort, OPT_FLOAT, .1)
OPTION(osd_agent_quantize_effort, OPT_FLOAT, .1)
+OPTION(osd_agent_delay_time, OPT_FLOAT, 5.0)
// decay atime and hist histograms after how many objects go by
OPTION(osd_agent_hist_halflife, OPT_INT, 1000)
@@ -469,6 +470,7 @@ OPTION(osd_backfill_scan_max, OPT_INT, 512)
OPTION(osd_op_thread_timeout, OPT_INT, 15)
OPTION(osd_recovery_thread_timeout, OPT_INT, 30)
OPTION(osd_snap_trim_thread_timeout, OPT_INT, 60*60*1)
+OPTION(osd_snap_trim_sleep, OPT_FLOAT, 0)
OPTION(osd_scrub_thread_timeout, OPT_INT, 60)
OPTION(osd_scrub_finalize_thread_timeout, OPT_INT, 60*10)
OPTION(osd_remove_thread_timeout, OPT_INT, 60*60)
diff --git a/src/mon/MonClient.cc b/src/mon/MonClient.cc
index f30be1b..3a6dda4 100644
--- a/src/mon/MonClient.cc
+++ b/src/mon/MonClient.cc
@@ -327,8 +327,6 @@ void MonClient::handle_monmap(MMonMap *m)
if (!monmap.get_addr_name(cur_con->get_peer_addr(), cur_mon)) {
ldout(cct, 10) << "mon." << cur_mon << " went away" << dendl;
_reopen_session(); // can't find the mon we were talking to (above)
- } else {
- _finish_hunting();
}
map_cond.Signal();
@@ -756,8 +754,6 @@ void MonClient::_renew_subs()
void MonClient::handle_subscribe_ack(MMonSubscribeAck *m)
{
- _finish_hunting();
-
if (sub_renew_sent != utime_t()) {
sub_renew_after = sub_renew_sent;
sub_renew_after += m->interval / 2.0;
diff --git a/src/os/KeyValueStore.cc b/src/os/KeyValueStore.cc
index cc117fa..fb459b2 100644
--- a/src/os/KeyValueStore.cc
+++ b/src/os/KeyValueStore.cc
@@ -204,11 +204,14 @@ void StripObjectMap::clone_wrap(StripObjectHeader &old_header,
if (target_header)
*target_header = old_header;
+ if (origin_header)
+ *origin_header = old_header;
clone(old_header.header, cid, oid, t, &new_origin_header,
&target_header->header);
- old_header.header = new_origin_header;
+ if(origin_header)
+ origin_header->header = new_origin_header;
if (target_header) {
target_header->oid = oid;
diff --git a/src/osd/OSD.cc b/src/osd/OSD.cc
index 8e71f47..4240ba8 100644
--- a/src/osd/OSD.cc
+++ b/src/osd/OSD.cc
@@ -201,6 +201,8 @@ OSDService::OSDService(OSD *osd) :
agent_active(true),
agent_thread(this),
agent_stop_flag(false),
+ agent_timer_lock("OSD::agent_timer_lock"),
+ agent_timer(osd->client_messenger->cct, agent_timer_lock),
objecter_lock("OSD::objecter_lock"),
objecter_timer(osd->client_messenger->cct, objecter_lock),
objecter(new Objecter(osd->client_messenger->cct, osd->objecter_messenger, osd->monc, &objecter_osdmap,
@@ -435,6 +437,10 @@ void OSDService::shutdown()
Mutex::Locker l(backfill_request_lock);
backfill_request_timer.shutdown();
}
+ {
+ Mutex::Locker l(agent_timer_lock);
+ agent_timer.shutdown();
+ }
osdmap = OSDMapRef();
next_osdmap = OSDMapRef();
}
@@ -451,6 +457,7 @@ void OSDService::init()
objecter->init_locked();
}
watch_timer.init();
+ agent_timer.init();
agent_thread.create();
}
@@ -466,6 +473,15 @@ void OSDService::activate_map()
agent_lock.Unlock();
}
+class AgentTimeoutCB : public Context {
+ PGRef pg;
+public:
+ AgentTimeoutCB(PGRef _pg) : pg(_pg) {}
+ void finish(int) {
+ pg->agent_choose_mode_restart();
+ }
+};
+
void OSDService::agent_entry()
{
dout(10) << __func__ << " start" << dendl;
@@ -501,7 +517,18 @@ void OSDService::agent_entry()
PGRef pg = *agent_queue_pos;
int max = g_conf->osd_agent_max_ops - agent_ops;
agent_lock.Unlock();
- pg->agent_work(max);
+ if (!pg->agent_work(max)) {
+ dout(10) << __func__ << " " << *pg
+ << " no agent_work, delay for " << g_conf->osd_agent_delay_time
+ << " seconds" << dendl;
+
+ osd->logger->inc(l_osd_tier_delay);
+ // Queue a timer to call agent_choose_mode for this pg in 5 seconds
+ agent_timer_lock.Lock();
+ Context *cb = new AgentTimeoutCB(pg);
+ agent_timer.add_event_after(g_conf->osd_agent_delay_time, cb);
+ agent_timer_lock.Unlock();
+ }
agent_lock.Lock();
}
agent_lock.Unlock();
@@ -1478,6 +1505,7 @@ void OSD::create_logger()
osd_plb.add_u64_counter(l_osd_tier_whiteout, "tier_whiteout");
osd_plb.add_u64_counter(l_osd_tier_dirty, "tier_dirty");
osd_plb.add_u64_counter(l_osd_tier_clean, "tier_clean");
+ osd_plb.add_u64_counter(l_osd_tier_delay, "tier_delay");
osd_plb.add_u64_counter(l_osd_agent_wake, "agent_wake");
osd_plb.add_u64_counter(l_osd_agent_skip, "agent_skip");
diff --git a/src/osd/OSD.h b/src/osd/OSD.h
index 6b3c89d..ce8b74c 100644
--- a/src/osd/OSD.h
+++ b/src/osd/OSD.h
@@ -133,6 +133,7 @@ enum {
l_osd_tier_whiteout,
l_osd_tier_dirty,
l_osd_tier_clean,
+ l_osd_tier_delay,
l_osd_agent_wake,
l_osd_agent_skip,
@@ -466,6 +467,8 @@ public:
}
} agent_thread;
bool agent_stop_flag;
+ Mutex agent_timer_lock;
+ SafeTimer agent_timer;
void agent_entry();
void agent_stop();
diff --git a/src/osd/PG.cc b/src/osd/PG.cc
index 2c86f3b..6deb099 100644
--- a/src/osd/PG.cc
+++ b/src/osd/PG.cc
@@ -3880,7 +3880,6 @@ void PG::chunky_scrub(ThreadPool::TPHandle &handle)
scrubber.received_maps.clear();
{
- hobject_t end;
// get the start and end of our scrub chunk
//
@@ -3899,11 +3898,11 @@ void PG::chunky_scrub(ThreadPool::TPHandle &handle)
cct->_conf->osd_scrub_chunk_max,
0,
&objects,
- &end);
+ &scrubber.end);
assert(ret >= 0);
// in case we don't find a boundary: start again at the end
- start = end;
+ start = scrubber.end;
// special case: reached end of file store, implicitly a boundary
if (objects.empty()) {
@@ -3911,25 +3910,19 @@ void PG::chunky_scrub(ThreadPool::TPHandle &handle)
}
// search backward from the end looking for a boundary
- objects.push_back(end);
+ objects.push_back(scrubber.end);
while (!boundary_found && objects.size() > 1) {
hobject_t end = objects.back().get_boundary();
objects.pop_back();
if (objects.back().get_filestore_key() != end.get_filestore_key()) {
- end = end;
+ scrubber.end = end;
boundary_found = true;
}
}
}
-
- if (!_range_available_for_scrub(scrubber.start, end)) {
- // we'll be requeued by whatever made us unavailable for scrub
- done = true;
- break;
- }
- scrubber.end = end;
}
+
scrubber.block_writes = true;
// walk the log to find the latest update that affects our chunk
diff --git a/src/osd/PG.h b/src/osd/PG.h
index 8967a56..1fce297 100644
--- a/src/osd/PG.h
+++ b/src/osd/PG.h
@@ -1118,13 +1118,6 @@ public:
void build_scrub_map(ScrubMap &map, ThreadPool::TPHandle &handle);
void build_inc_scrub_map(
ScrubMap &map, eversion_t v, ThreadPool::TPHandle &handle);
- /**
- * returns true if [begin, end) is good to scrub at this time
- * a false return value obliges the implementer to requeue scrub when the
- * condition preventing scrub clears
- */
- virtual bool _range_available_for_scrub(
- const hobject_t &begin, const hobject_t &end) = 0;
virtual void _scrub(ScrubMap &map) { }
virtual void _scrub_clear_state() { }
virtual void _scrub_finish() { }
@@ -2131,9 +2124,11 @@ public:
virtual void check_blacklisted_watchers() = 0;
virtual void get_watchers(std::list<obj_watch_item_t>&) = 0;
- virtual void agent_work(int max) = 0;
+ virtual bool agent_work(int max) = 0;
virtual void agent_stop() = 0;
+ virtual void agent_delay() = 0;
virtual void agent_clear() = 0;
+ virtual void agent_choose_mode_restart() = 0;
};
ostream& operator<<(ostream& out, const PG& pg);
diff --git a/src/osd/ReplicatedPG.cc b/src/osd/ReplicatedPG.cc
index 95b61a3..94eec05 100644
--- a/src/osd/ReplicatedPG.cc
+++ b/src/osd/ReplicatedPG.cc
@@ -2516,7 +2516,15 @@ ReplicatedPG::RepGather *ReplicatedPG::trim_object(const hobject_t &coid)
void ReplicatedPG::snap_trimmer()
{
- lock();
+ if (g_conf->osd_snap_trim_sleep > 0) {
+ utime_t t;
+ t.set_from_double(g_conf->osd_snap_trim_sleep);
+ t.sleep();
+ lock();
+ dout(20) << __func__ << " slept for " << t << dendl;
+ } else {
+ lock();
+ }
if (deleting) {
unlock();
return;
@@ -4611,8 +4619,8 @@ inline int ReplicatedPG::_delete_oid(OpContext *ctx, bool no_whiteout)
}
oi.size = 0;
- // cache: writeback: set whiteout on delete?
- if (pool.info.cache_mode == pg_pool_t::CACHEMODE_WRITEBACK && !no_whiteout) {
+ // cache: cache: set whiteout on delete?
+ if (pool.info.cache_mode != pg_pool_t::CACHEMODE_NONE && !no_whiteout) {
dout(20) << __func__ << " setting whiteout on " << soid << dendl;
oi.set_flag(object_info_t::FLAG_WHITEOUT);
ctx->delta_stats.num_whiteouts++;
@@ -5080,7 +5088,7 @@ int ReplicatedPG::prepare_transaction(OpContext *ctx)
}
// cache: clear whiteout?
- if (pool.info.cache_mode == pg_pool_t::CACHEMODE_WRITEBACK) {
+ if (pool.info.cache_mode != pg_pool_t::CACHEMODE_NONE) {
if (ctx->user_modify &&
ctx->obc->obs.oi.is_whiteout()) {
dout(10) << __func__ << " clearing whiteout on " << soid << dendl;
@@ -7431,9 +7439,6 @@ void ReplicatedPG::kick_object_context_blocked(ObjectContextRef obc)
dout(10) << __func__ << " " << soid << " requeuing " << ls.size() << " requests" << dendl;
requeue_ops(ls);
waiting_for_blocked_object.erase(p);
-
- if (obc->requeue_scrub_on_unblock)
- osd->queue_for_scrub(this);
}
SnapSetContext *ReplicatedPG::create_snapset_context(const hobject_t& oid)
@@ -10930,15 +10935,11 @@ void ReplicatedPG::hit_set_trim(RepGather *repop, unsigned max)
agent_state->remove_oldest_hit_set();
updated_hit_set_hist.history.pop_front();
- struct stat st;
- int r = osd->store->stat(
- coll,
- ghobject_t(oid, ghobject_t::NO_GEN, pg_whoami.shard),
- &st);
- assert(r == 0);
+ ObjectContextRef obc = get_object_context(oid, false);
+ assert(obc);
--repop->ctx->delta_stats.num_objects;
--repop->ctx->delta_stats.num_objects_hit_set_archive;
- repop->ctx->delta_stats.num_bytes -= st.st_size;
+ repop->ctx->delta_stats.num_bytes -= obc->obs.oi.size;
}
}
@@ -10966,6 +10967,7 @@ void ReplicatedPG::agent_setup()
agent_state->position.hash = pool.info.get_random_pg_position(
info.pgid.pgid,
rand());
+ agent_state->start = agent_state->position;
dout(10) << __func__ << " allocated new state, position "
<< agent_state->position << dendl;
@@ -10986,13 +10988,14 @@ void ReplicatedPG::agent_clear()
agent_state.reset(NULL);
}
-void ReplicatedPG::agent_work(int start_max)
+// Return false if no objects operated on since start of object hash space
+bool ReplicatedPG::agent_work(int start_max)
{
lock();
if (!agent_state) {
dout(10) << __func__ << " no agent state, stopping" << dendl;
unlock();
- return;
+ return true;
}
assert(!deleting);
@@ -11000,7 +11003,7 @@ void ReplicatedPG::agent_work(int start_max)
if (agent_state->is_idle()) {
dout(10) << __func__ << " idle, stopping" << dendl;
unlock();
- return;
+ return true;
}
osd->logger->inc(l_osd_agent_wake);
@@ -11090,8 +11093,12 @@ void ReplicatedPG::agent_work(int start_max)
if (agent_state->evict_mode != TierAgentState::EVICT_MODE_IDLE &&
agent_maybe_evict(obc))
++started;
- if (started >= start_max)
+ if (started >= start_max) {
+ // If finishing early, set "next" to the next object
+ if (++p != ls.end())
+ next = *p;
break;
+ }
}
if (++agent_state->hist_age > g_conf->osd_agent_hist_halflife) {
@@ -11101,13 +11108,42 @@ void ReplicatedPG::agent_work(int start_max)
agent_state->temp_hist.decay();
}
+ // Total objects operated on so far
+ int total_started = agent_state->started + started;
+ bool need_delay = false;
+
+ dout(20) << __func__ << " start pos " << agent_state->position
+ << " next start pos " << next
+ << " started " << total_started << dendl;
+
+ // See if we've made a full pass over the object hash space
+ // This might check at most ls_max objects a second time to notice that
+ // we've checked every objects at least once.
+ if (agent_state->position < agent_state->start && next >= agent_state->start) {
+ dout(20) << __func__ << " wrap around " << agent_state->start << dendl;
+ if (total_started == 0)
+ need_delay = true;
+ else
+ total_started = 0;
+ agent_state->start = next;
+ }
+ agent_state->started = total_started;
+
+ // See if we are starting from beginning
if (next.is_max())
agent_state->position = hobject_t();
else
agent_state->position = next;
- dout(20) << __func__ << " final position " << agent_state->position << dendl;
+
+ if (need_delay) {
+ assert(agent_state->delaying == false);
+ agent_delay();
+ unlock();
+ return false;
+ }
agent_choose_mode();
unlock();
+ return true;
}
void ReplicatedPG::agent_load_hit_sets()
@@ -11309,9 +11345,37 @@ void ReplicatedPG::agent_stop()
}
}
-void ReplicatedPG::agent_choose_mode()
+void ReplicatedPG::agent_delay()
{
+ dout(20) << __func__ << dendl;
+ if (agent_state && !agent_state->is_idle()) {
+ assert(agent_state->delaying == false);
+ agent_state->delaying = true;
+ osd->agent_disable_pg(this, agent_state->evict_effort);
+ }
+}
+
+void ReplicatedPG::agent_choose_mode_restart()
+{
+ dout(20) << __func__ << dendl;
+ lock();
+ if (agent_state && agent_state->delaying) {
+ agent_state->delaying = false;
+ agent_choose_mode(true);
+ }
+ unlock();
+}
+
+void ReplicatedPG::agent_choose_mode(bool restart)
+{
+ // Let delay play out
+ if (agent_state->delaying) {
+ dout(20) << __func__ << this << " delaying, ignored" << dendl;
+ return;
+ }
+
uint64_t divisor = pool.info.get_pg_num_divisor(info.pgid.pgid);
+ assert(divisor > 0);
uint64_t num_user_objects = info.stats.stats.sum.num_objects;
@@ -11360,19 +11424,20 @@ void ReplicatedPG::agent_choose_mode()
info.stats.stats.sum.num_objects;
dirty_micro =
num_dirty * avg_size * 1000000 /
- (pool.info.target_max_bytes / divisor);
+ MAX(pool.info.target_max_bytes / divisor, 1);
full_micro =
num_user_objects * avg_size * 1000000 /
- (pool.info.target_max_bytes / divisor);
+ MAX(pool.info.target_max_bytes / divisor, 1);
}
if (pool.info.target_max_objects) {
uint64_t dirty_objects_micro =
num_dirty * 1000000 /
- (pool.info.target_max_objects / divisor);
+ MAX(pool.info.target_max_objects / divisor, 1);
if (dirty_objects_micro > dirty_micro)
dirty_micro = dirty_objects_micro;
uint64_t full_objects_micro =
- num_user_objects * 1000000 / (pool.info.target_max_objects / divisor);
+ num_user_objects * 1000000 /
+ MAX(pool.info.target_max_objects / divisor, 1);
if (full_objects_micro > full_micro)
full_micro = full_objects_micro;
}
@@ -11384,7 +11449,7 @@ void ReplicatedPG::agent_choose_mode()
TierAgentState::flush_mode_t flush_mode = TierAgentState::FLUSH_MODE_IDLE;
uint64_t flush_target = pool.info.cache_target_dirty_ratio_micro;
uint64_t flush_slop = (float)flush_target * g_conf->osd_agent_slop;
- if (agent_state->flush_mode == TierAgentState::FLUSH_MODE_IDLE)
+ if (restart || agent_state->flush_mode == TierAgentState::FLUSH_MODE_IDLE)
flush_target += flush_slop;
else
flush_target -= MIN(flush_target, flush_slop);
@@ -11401,7 +11466,7 @@ void ReplicatedPG::agent_choose_mode()
unsigned evict_effort = 0;
uint64_t evict_target = pool.info.cache_target_full_ratio_micro;
uint64_t evict_slop = (float)evict_target * g_conf->osd_agent_slop;
- if (agent_state->evict_mode == TierAgentState::EVICT_MODE_IDLE)
+ if (restart || agent_state->evict_mode == TierAgentState::EVICT_MODE_IDLE)
evict_target += evict_slop;
else
evict_target -= MIN(evict_target, evict_slop);
@@ -11416,7 +11481,11 @@ void ReplicatedPG::agent_choose_mode()
// set effort in [0..1] range based on where we are between
evict_mode = TierAgentState::EVICT_MODE_SOME;
uint64_t over = full_micro - evict_target;
- uint64_t span = 1000000 - evict_target;
+ uint64_t span;
+ if (evict_target >= 1000000)
+ span = 1;
+ else
+ span = 1000000 - evict_target;
evict_effort = MAX(over * 1000000 / span,
(unsigned)(1000000.0 * g_conf->osd_agent_min_evict_effort));
@@ -11465,11 +11534,11 @@ void ReplicatedPG::agent_choose_mode()
// (including flush). This is probably fine (they should be
// correlated) but it is not precisely correct.
if (agent_state->is_idle()) {
- if (!old_idle) {
+ if (!restart && !old_idle) {
osd->agent_disable_pg(this, old_effort);
}
} else {
- if (old_idle) {
+ if (restart || old_idle) {
osd->agent_enable_pg(this, agent_state->evict_effort);
} else if (old_effort != agent_state->evict_effort) {
osd->agent_adjust_pg(this, old_effort, agent_state->evict_effort);
@@ -11511,23 +11580,6 @@ void ReplicatedPG::agent_estimate_atime_temp(const hobject_t& oid,
// SCRUB
-bool ReplicatedPG::_range_available_for_scrub(
- const hobject_t &begin, const hobject_t &end)
-{
- pair<hobject_t, ObjectContextRef> next;
- next.second = object_contexts.lookup(begin);
- next.first = begin;
- bool more = true;
- while (more && next.first < end) {
- if (next.second && next.second->is_blocked()) {
- next.second->requeue_scrub_on_unblock = true;
- return true;
- }
- more = object_contexts.get_next(next.first, &next);
- }
- return false;
-}
-
void ReplicatedPG::_scrub(ScrubMap& scrubmap)
{
dout(10) << "_scrub" << dendl;
diff --git a/src/osd/ReplicatedPG.h b/src/osd/ReplicatedPG.h
index abee8bf..3ea4721 100644
--- a/src/osd/ReplicatedPG.h
+++ b/src/osd/ReplicatedPG.h
@@ -809,7 +809,7 @@ protected:
friend class C_HitSetFlushing;
void agent_setup(); ///< initialize agent state
- void agent_work(int max); ///< entry point to do some agent work
+ bool agent_work(int max); ///< entry point to do some agent work
bool agent_maybe_flush(ObjectContextRef& obc); ///< maybe flush
bool agent_maybe_evict(ObjectContextRef& obc); ///< maybe evict
@@ -825,11 +825,13 @@ protected:
/// stop the agent
void agent_stop();
+ void agent_delay();
/// clear agent state
void agent_clear();
- void agent_choose_mode(); ///< choose (new) agent mode(s)
+ void agent_choose_mode(bool restart = false); ///< choose (new) agent mode(s)
+ void agent_choose_mode_restart();
/// true if we can send an ondisk/commit for v
bool already_complete(eversion_t v) {
@@ -1241,8 +1243,6 @@ protected:
friend struct C_Flush;
// -- scrub --
- virtual bool _range_available_for_scrub(
- const hobject_t &begin, const hobject_t &end);
virtual void _scrub(ScrubMap& map);
virtual void _scrub_clear_state();
virtual void _scrub_finish();
diff --git a/src/osd/TierAgentState.h b/src/osd/TierAgentState.h
index b5f7910..e9c22b2 100644
--- a/src/osd/TierAgentState.h
+++ b/src/osd/TierAgentState.h
@@ -17,6 +17,10 @@
struct TierAgentState {
/// current position iterating across pool
hobject_t position;
+ /// Count of agent_work since "start" position of object hash space
+ int started;
+ hobject_t start;
+ bool delaying;
/// histogram of ages we've encountered
pow2_hist_t atime_hist;
@@ -66,7 +70,9 @@ struct TierAgentState {
unsigned evict_effort;
TierAgentState()
- : hist_age(0),
+ : started(0),
+ delaying(false),
+ hist_age(0),
flush_mode(FLUSH_MODE_IDLE),
evict_mode(EVICT_MODE_IDLE),
evict_effort(0)
@@ -75,8 +81,9 @@ struct TierAgentState {
/// false if we have any work to do
bool is_idle() const {
return
- flush_mode == FLUSH_MODE_IDLE &&
- evict_mode == EVICT_MODE_IDLE;
+ delaying ||
+ (flush_mode == FLUSH_MODE_IDLE &&
+ evict_mode == EVICT_MODE_IDLE);
}
/// add archived HitSet
diff --git a/src/osd/osd_types.h b/src/osd/osd_types.h
index 3289620..092d6cc 100644
--- a/src/osd/osd_types.h
+++ b/src/osd/osd_types.h
@@ -2690,7 +2690,6 @@ public:
// set if writes for this object are blocked on another objects recovery
ObjectContextRef blocked_by; // object blocking our writes
set<ObjectContextRef> blocking; // objects whose writes we block
- bool requeue_scrub_on_unblock; // true if we need to requeue scrub on unblock
// any entity in obs.oi.watchers MUST be in either watchers or unconnected_watchers.
map<pair<uint64_t, entity_name_t>, WatchRef> watchers;
@@ -2863,7 +2862,7 @@ public:
destructor_callback(0),
lock("ReplicatedPG::ObjectContext::lock"),
unstable_writes(0), readers(0), writers_waiting(0), readers_waiting(0),
- blocked(false), requeue_scrub_on_unblock(false) {}
+ blocked(false) {}
~ObjectContext() {
assert(rwstate.empty());
diff --git a/src/rgw/rgw_rados.cc b/src/rgw/rgw_rados.cc
index 405f00f..20602d5 100644
--- a/src/rgw/rgw_rados.cc
+++ b/src/rgw/rgw_rados.cc
@@ -597,8 +597,8 @@ void RGWObjManifest::obj_iterator::seek(uint64_t o)
stripe_size = rule.stripe_max_size;
stripe_size = MIN(manifest->get_obj_size() - stripe_ofs, stripe_size);
} else {
- stripe_size = rule.part_size - (ofs - stripe_ofs);
- stripe_size = MIN(stripe_size, rule.stripe_max_size);
+ uint64_t next = MIN(stripe_ofs + rule.stripe_max_size, part_ofs + rule.part_size);
+ stripe_size = next - stripe_ofs;
}
update_location();
@@ -4817,8 +4817,16 @@ void RGWRados::get_obj_aio_completion_cb(completion_t c, void *arg)
ldout(cct, 20) << "get_obj_aio_completion_cb: io completion ofs=" << ofs << " len=" << len << dendl;
d->throttle.put(len);
- if (d->is_cancelled())
+ r = rados_aio_get_return_value(c);
+ if (r < 0) {
+ ldout(cct, 0) << "ERROR: got unexpected error when trying to read object: " << r << dendl;
+ d->set_cancelled(r);
goto done;
+ }
+
+ if (d->is_cancelled()) {
+ goto done;
+ }
d->data_lock.Lock();
diff --git a/src/rgw/rgw_rest.cc b/src/rgw/rgw_rest.cc
index 6473c61..69948a6 100644
--- a/src/rgw/rgw_rest.cc
+++ b/src/rgw/rgw_rest.cc
@@ -36,6 +36,7 @@ static struct rgw_http_attr rgw_to_http_attr_list[] = {
{ RGW_ATTR_CACHE_CONTROL, "Cache-Control"},
{ RGW_ATTR_CONTENT_DISP, "Content-Disposition"},
{ RGW_ATTR_CONTENT_ENC, "Content-Encoding"},
+ { RGW_ATTR_USER_MANIFEST, "X-Object-Manifest"},
{ NULL, NULL},
};
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/pkg-ceph/ceph.git
More information about the Pkg-ceph-commits
mailing list