[Pkg-ceph-commits] [ceph] 01/04: patchworks: added new backported patches; removed unused patch.
Dmitry Smirnov
onlyjob at moszumanska.debian.org
Fri Apr 18 09:20:45 UTC 2014
This is an automated email from the git hooks/post-receive script.
onlyjob pushed a commit to branch experimental
in repository ceph.
commit 90c8e25
Author: Dmitry Smirnov <onlyjob at member.fsf.org>
Date: Fri Apr 18 08:19:33 2014
patchworks: added new backported patches; removed unused patch.
---
debian/patches/5469.patch | 182 ++++++++++++++++++++++++++++
debian/patches/8008.patch | 41 +++++++
debian/patches/_1606.patch | 193 ++++++++++++++++++++++++++++++
debian/patches/defaults-leveldb-osd.patch | 26 ----
debian/patches/series | 4 +-
5 files changed, 419 insertions(+), 27 deletions(-)
diff --git a/debian/patches/5469.patch b/debian/patches/5469.patch
new file mode 100644
index 0000000..69bf0c9
--- /dev/null
+++ b/debian/patches/5469.patch
@@ -0,0 +1,182 @@
+Last-Update: 2014-04-17
+Forwarded: not-needed
+Origin: upstream, http://tracker.ceph.com/projects/ceph/repository/revisions/a8330f5cfddaab853a1844afe43ee9a71f96d0c3
+Author: Josh Durgin <josh.durgin at inktank.com>
+Bug-Ceph: http://tracker.ceph.com/issues/5469
+Description:
+ librbd: fix zero length request handling
+
+ Zero-length writes would hang because the completion was never
+ called. Reads would hit an assert about zero length in
+ Striper::file_to_exents().
+
+ Fix all of these cases by skipping zero-length extents. The completion
+ is created and finished when finish_adding_requests() is called. This
+ is slightly different from usual completions since it comes from the
+ same thread as the one scheduling the request, but zero-length aio
+ requests should never happen from things that might care about this,
+ like QEMU.
+
+ Writes and discards have had this bug since the beginning of
+ librbd. Reads might have avoided it until stripingv2 was added.
+
+ Fixes: #5469
+ Signed-off-by: Josh Durgin <josh.durgin at inktank.com>
+
+diff --git a/src/librbd/internal.cc b/src/librbd/internal.cc
+index 8056fab..127be38 100644
+--- a/src/librbd/internal.cc
++++ b/src/librbd/internal.cc
+@@ -2884,9 +2884,6 @@ reprotect_and_return_err:
+ ldout(cct, 20) << "aio_write " << ictx << " off = " << off << " len = "
+ << len << " buf = " << (void*)buf << dendl;
+
+- if (!len)
+- return 0;
+-
+ int r = ictx_check(ictx);
+ if (r < 0)
+ return r;
+@@ -2912,14 +2909,16 @@ reprotect_and_return_err:
+
+ // map
+ vector<ObjectExtent> extents;
+- Striper::file_to_extents(ictx->cct, ictx->format_string, &ictx->layout, off, mylen, 0, extents);
++ if (len > 0) {
++ Striper::file_to_extents(ictx->cct, ictx->format_string,
++ &ictx->layout, off, mylen, 0, extents);
++ }
+
+ c->get();
+ c->init_time(ictx, AIO_TYPE_WRITE);
+ for (vector<ObjectExtent>::iterator p = extents.begin(); p != extents.end(); ++p) {
+ ldout(cct, 20) << " oid " << p->oid << " " << p->offset << "~" << p->length
+ << " from " << p->buffer_extents << dendl;
+-
+ // assemble extent
+ bufferlist bl;
+ for (vector<pair<uint64_t,uint64_t> >::iterator q = p->buffer_extents.begin();
+@@ -2966,9 +2965,6 @@ reprotect_and_return_err:
+ ldout(cct, 20) << "aio_discard " << ictx << " off = " << off << " len = "
+ << len << dendl;
+
+- if (!len)
+- return 0;
+-
+ int r = ictx_check(ictx);
+ if (r < 0)
+ return r;
+@@ -2992,7 +2988,10 @@ reprotect_and_return_err:
+
+ // map
+ vector<ObjectExtent> extents;
+- Striper::file_to_extents(ictx->cct, ictx->format_string, &ictx->layout, off, len, 0, extents);
++ if (len > 0) {
++ Striper::file_to_extents(ictx->cct, ictx->format_string,
++ &ictx->layout, off, len, 0, extents);
++ }
+
+ c->get();
+ c->init_time(ictx, AIO_TYPE_DISCARD);
+@@ -3086,6 +3085,8 @@ reprotect_and_return_err:
+ r = clip_io(ictx, p->first, &len);
+ if (r < 0)
+ return r;
++ if (len == 0)
++ continue;
+
+ Striper::file_to_extents(ictx->cct, ictx->format_string, &ictx->layout,
+ p->first, len, 0, object_extents, buffer_ofs);
+diff --git a/src/test/librbd/test_librbd.cc b/src/test/librbd/test_librbd.cc
+index d0b9c99..7f35418 100644
+--- a/src/test/librbd/test_librbd.cc
++++ b/src/test/librbd/test_librbd.cc
+@@ -1777,6 +1777,88 @@ TEST(LibRBD, DiffIterateStress)
+ ASSERT_EQ(0, destroy_one_pool_pp(pool_name, rados));
+ }
+
++TEST(LibRBD, ZeroLengthWrite)
++{
++ rados_t cluster;
++ rados_ioctx_t ioctx;
++ string pool_name = get_temp_pool_name();
++ ASSERT_EQ("", create_one_pool(pool_name, &cluster));
++ rados_ioctx_create(cluster, pool_name.c_str(), &ioctx);
++
++ rbd_image_t image;
++ int order = 0;
++ const char *name = "testimg";
++ uint64_t size = 2 << 20;
++
++ ASSERT_EQ(0, create_image(ioctx, name, size, &order));
++ ASSERT_EQ(0, rbd_open(ioctx, name, &image, NULL));
++
++ char read_data[1];
++ ASSERT_EQ(0, rbd_write(image, 0, 0, NULL));
++ ASSERT_EQ(1, rbd_read(image, 0, 1, read_data));
++ ASSERT_EQ('\0', read_data[0]);
++
++ ASSERT_EQ(0, rbd_close(image));
++
++ rados_ioctx_destroy(ioctx);
++ ASSERT_EQ(0, destroy_one_pool(pool_name, &cluster));
++}
++
++
++TEST(LibRBD, ZeroLengthDiscard)
++{
++ rados_t cluster;
++ rados_ioctx_t ioctx;
++ string pool_name = get_temp_pool_name();
++ ASSERT_EQ("", create_one_pool(pool_name, &cluster));
++ rados_ioctx_create(cluster, pool_name.c_str(), &ioctx);
++
++ rbd_image_t image;
++ int order = 0;
++ const char *name = "testimg";
++ uint64_t size = 2 << 20;
++
++ ASSERT_EQ(0, create_image(ioctx, name, size, &order));
++ ASSERT_EQ(0, rbd_open(ioctx, name, &image, NULL));
++
++ const char *data = "blah";
++ char read_data[strlen(data)];
++ ASSERT_EQ((int)strlen(data), rbd_write(image, 0, strlen(data), data));
++ ASSERT_EQ(0, rbd_discard(image, 0, 0));
++ ASSERT_EQ((int)strlen(data), rbd_read(image, 0, strlen(data), read_data));
++ ASSERT_EQ(0, memcmp(data, read_data, strlen(data)));
++
++ ASSERT_EQ(0, rbd_close(image));
++
++ rados_ioctx_destroy(ioctx);
++ ASSERT_EQ(0, destroy_one_pool(pool_name, &cluster));
++}
++
++TEST(LibRBD, ZeroLengthRead)
++{
++ rados_t cluster;
++ rados_ioctx_t ioctx;
++ string pool_name = get_temp_pool_name();
++ ASSERT_EQ("", create_one_pool(pool_name, &cluster));
++ rados_ioctx_create(cluster, pool_name.c_str(), &ioctx);
++
++ rbd_image_t image;
++ int order = 0;
++ const char *name = "testimg";
++ uint64_t size = 2 << 20;
++
++ ASSERT_EQ(0, create_image(ioctx, name, size, &order));
++ ASSERT_EQ(0, rbd_open(ioctx, name, &image, NULL));
++
++ char read_data[1];
++ ASSERT_EQ(0, rbd_read(image, 0, 0, read_data));
++
++ ASSERT_EQ(0, rbd_close(image));
++
++ rados_ioctx_destroy(ioctx);
++ ASSERT_EQ(0, destroy_one_pool(pool_name, &cluster));
++}
++
+ int main(int argc, char **argv)
+ {
+ ::testing::InitGoogleTest(&argc, argv);
diff --git a/debian/patches/8008.patch b/debian/patches/8008.patch
new file mode 100644
index 0000000..d333a60
--- /dev/null
+++ b/debian/patches/8008.patch
@@ -0,0 +1,41 @@
+Last-Update: 2014-04-17
+Forwarded: not-needed
+Origin: upstream, http://tracker.ceph.com/projects/ceph/repository/revisions/6ff645f592cd82f888b3646e10438aea781370a2
+Bug-Ceph: http://tracker.ceph.com/issues/8008
+Description:
+ osd/PG: fix repair_object when missing on primary
+
+ If the object is missing on the primary, we need to fully populate the
+ missing_loc.needs_recovery_map. This broke with the recent refactoring of
+ recovery for EC, somewhere around 84e2f39c557c79e9ca7c3c3f0eb0bfa4860bf899.
+
+ Fixes: #8008
+ Signed-off-by: Sage Weil <sage at inktank.com>
+
+--- a/src/osd/PG.cc
++++ b/src/osd/PG.cc
+@@ -3444,8 +3444,9 @@
+ // We should only be scrubbing if the PG is clean.
+ assert(waiting_for_unreadable_object.empty());
+
+ pg_log.missing_add(soid, oi.version, eversion_t());
++ missing_loc.add_missing(soid, oi.version, eversion_t());
+ missing_loc.add_location(soid, ok_peer);
+
+ pg_log.set_last_requested(0);
+ }
+--- a/src/osd/PG.h
++++ b/src/osd/PG.h
+@@ -378,8 +378,12 @@
+ assert(i->second.need == j->second.need);
+ }
+ }
+ }
++
++ void add_missing(const hobject_t &hoid, eversion_t need, eversion_t have) {
++ needs_recovery_map[hoid] = pg_missing_t::item(need, have);
++ }
+ void revise_need(const hobject_t &hoid, eversion_t need) {
+ assert(needs_recovery(hoid));
+ needs_recovery_map[hoid].need = need;
+ }
diff --git a/debian/patches/_1606.patch b/debian/patches/_1606.patch
new file mode 100644
index 0000000..1f95a6c
--- /dev/null
+++ b/debian/patches/_1606.patch
@@ -0,0 +1,193 @@
+Last-Update: 2014-04-18
+Forwarded: not-needed
+Origin: upstream, https://github.com/ceph/ceph/pull/1606
+From: "Yan, Zheng" <zheng.z.yan at intel.com>
+Description: client: try shrinking kernel inode cache when trimming session caps
+
+ Notify kernel to invalidate top level directory entries. As a side
+ effect, the kernel inode cache get shrinked.
+
+--- a/src/client/Client.cc
++++ b/src/client/Client.cc
+@@ -2202,10 +2202,14 @@
+
+ if (in) { // link to inode
+ dn->inode = in;
+ in->get();
+- if (in->dir)
+- dn->get(); // dir -> dn pin
++ if (in->is_dir()) {
++ if (in->dir)
++ dn->get(); // dir -> dn pin
++ if (in->ll_ref)
++ dn->get(); // ll_ref -> dn pin
++ }
+
+ assert(in->dn_set.count(dn) == 0);
+
+ // only one parent for directories!
+@@ -2230,10 +2234,14 @@
+ << " inode " << dn->inode << dendl;
+
+ // unlink from inode
+ if (in) {
+- if (in->dir)
+- dn->put(); // dir -> dn pin
++ if (in->is_dir()) {
++ if (in->dir)
++ dn->put(); // dir -> dn pin
++ if (in->ll_ref)
++ dn->put(); // ll_ref -> dn pin
++ }
+ dn->inode = 0;
+ assert(in->dn_set.count(dn));
+ in->dn_set.erase(dn);
+ ldout(cct, 20) << "unlink inode " << in << " parents now " << in->dn_set << dendl;
+@@ -3073,8 +3081,19 @@
+ delete cap;
+ }
+ }
+ s->s_cap_iterator = NULL;
++
++ // notify kernel to invalidate top level directory entries. As a side effect,
++ // unused inodes underneath these entries get pruned.
++ if (dentry_invalidate_cb && s->caps.size() > max) {
++ for (ceph::unordered_map<string, Dentry*>::iterator p = root->dir->dentries.begin();
++ p != root->dir->dentries.end();
++ ++p) {
++ if (p->second->inode)
++ _schedule_invalidate_dentry_callback(p->second, false);
++ }
++ }
+ }
+
+ void Client::mark_caps_dirty(Inode *in, int caps)
+ {
+@@ -3663,11 +3682,16 @@
+ vinodeno_t dirino;
+ vinodeno_t ino;
+ string name;
+ public:
+- C_Client_DentryInvalidate(Client *c, Dentry *dn) :
+- client(c), dirino(dn->dir->parent_inode->vino()),
+- ino(dn->inode->vino()), name(dn->name) { }
++ C_Client_DentryInvalidate(Client *c, Dentry *dn, bool del) :
++ client(c), name(dn->name) {
++ dirino = dn->dir->parent_inode->vino();
++ if (del)
++ ino = dn->inode->vino();
++ else
++ ino.ino = inodeno_t();
++ }
+ void finish(int r) {
+ client->_async_dentry_invalidate(dirino, ino, name);
+ }
+ };
+@@ -3678,12 +3702,12 @@
+ << " in dir " << dirino << dendl;
+ dentry_invalidate_cb(dentry_invalidate_cb_handle, dirino, ino, name);
+ }
+
+-void Client::_schedule_invalidate_dentry_callback(Dentry *dn)
++void Client::_schedule_invalidate_dentry_callback(Dentry *dn, bool del)
+ {
+ if (dentry_invalidate_cb && dn->inode->ll_ref > 0)
+- async_dentry_invalidator.queue(new C_Client_DentryInvalidate(this, dn));
++ async_dentry_invalidator.queue(new C_Client_DentryInvalidate(this, dn, del));
+ }
+
+ void Client::_invalidate_inode_parents(Inode *in)
+ {
+@@ -3691,9 +3715,9 @@
+ while (q != in->dn_set.end()) {
+ Dentry *dn = *q++;
+ // FIXME: we play lots of unlink/link tricks when handling MDS replies,
+ // so in->dn_set doesn't always reflect the state of kernel's dcache.
+- _schedule_invalidate_dentry_callback(dn);
++ _schedule_invalidate_dentry_callback(dn, true);
+ unlink(dn, false);
+ }
+ }
+
+@@ -3723,9 +3747,9 @@
+ in->uid = m->head.uid;
+ in->gid = m->head.gid;
+ }
+ bool deleted_inode = false;
+- if ((issued & CEPH_CAP_LINK_EXCL) == 0) {
++ if ((issued & CEPH_CAP_LINK_EXCL) == 0 && in->nlink != (int32_t)m->head.nlink) {
+ in->nlink = m->head.nlink;
+ if (in->nlink == 0 &&
+ (new_caps & (CEPH_CAP_LINK_SHARED | CEPH_CAP_LINK_EXCL)))
+ deleted_inode = true;
+@@ -7014,10 +7038,15 @@
+
+
+ void Client::_ll_get(Inode *in)
+ {
+- if (in->ll_ref == 0)
++ if (in->ll_ref == 0) {
+ in->get();
++ if (in->is_dir() && !in->dn_set.empty()) {
++ assert(in->dn_set.size() == 1); // dirs can't be hard-linked
++ in->get_first_parent()->get(); // pin dentry
++ }
++ }
+ in->ll_get();
+ ldout(cct, 20) << "_ll_get " << in << " " << in->ino << " -> " << in->ll_ref << dendl;
+ }
+
+@@ -7025,8 +7054,12 @@
+ {
+ in->ll_put(num);
+ ldout(cct, 20) << "_ll_put " << in << " " << in->ino << " " << num << " -> " << in->ll_ref << dendl;
+ if (in->ll_ref == 0) {
++ if (in->is_dir() && !in->dn_set.empty()) {
++ assert(in->dn_set.size() == 1); // dirs can't be hard-linked
++ in->get_first_parent()->put(); // unpin dentry
++ }
+ put_inode(in);
+ return 0;
+ } else {
+ return in->ll_ref;
+@@ -7064,10 +7097,10 @@
+ if (in->ll_ref < count) {
+ ldout(cct, 1) << "WARNING: ll_forget on " << ino << " " << count
+ << ", which only has ll_ref=" << in->ll_ref << dendl;
+ _ll_put(in, in->ll_ref);
+- last = true;
+- } else {
++ last = true;
++ } else {
+ if (_ll_put(in, count) == 0)
+ last = true;
+ }
+
+--- a/src/client/Client.h
++++ b/src/client/Client.h
+@@ -478,9 +478,9 @@
+ void queue_cap_snap(Inode *in, snapid_t seq=0);
+ void finish_cap_snap(Inode *in, CapSnap *capsnap, int used);
+ void _flushed_cap_snap(Inode *in, snapid_t seq);
+
+- void _schedule_invalidate_dentry_callback(Dentry *dn);
++ void _schedule_invalidate_dentry_callback(Dentry *dn, bool del);
+ void _async_dentry_invalidate(vinodeno_t dirino, vinodeno_t ino, string& name);
+ void _invalidate_inode_parents(Inode *in);
+
+ void _schedule_invalidate_callback(Inode *in, int64_t off, int64_t len, bool keep_caps);
+--- a/src/client/fuse_ll.cc
++++ b/src/client/fuse_ll.cc
+@@ -679,9 +679,11 @@
+ {
+ CephFuse::Handle *cfuse = (CephFuse::Handle *)handle;
+ fuse_ino_t fdirino = cfuse->make_fake_ino(dirino.ino, dirino.snapid);
+ #if FUSE_VERSION >= FUSE_MAKE_VERSION(2, 9)
+- fuse_ino_t fino = cfuse->make_fake_ino(ino.ino, ino.snapid);
++ fuse_ino_t fino = 0;
++ if (ino.ino != inodeno_t())
++ fino = cfuse->make_fake_ino(ino.ino, ino.snapid);
+ fuse_lowlevel_notify_delete(cfuse->ch, fdirino, fino, name.c_str(), name.length());
+ #elif FUSE_VERSION >= FUSE_MAKE_VERSION(2, 8)
+ fuse_lowlevel_notify_inval_entry(cfuse->ch, fdirino, name.c_str(), name.length());
+ #endif
diff --git a/debian/patches/defaults-leveldb-osd.patch b/debian/patches/defaults-leveldb-osd.patch
deleted file mode 100644
index 4ac5c6a..0000000
--- a/debian/patches/defaults-leveldb-osd.patch
+++ /dev/null
@@ -1,26 +0,0 @@
-Last-Update: 2014-03-27
-Forwarded: no
-Author: Dmitry Smirnov <onlyjob at member.fsf.org>
-Description: increase OSD's leveldb defaults
- The OSD's leveldb currently uses libleveldb's defaults for cache and
- write buffer size, which are both 4 MB.
- Increase the cache size to 128MB and the write buffer to 8MB.
-
- Similar change for filestore was introduced in
- https://github.com/ceph/ceph/pull/1160
-
---- a/src/common/config_opts.h
-+++ b/src/common/config_opts.h
-@@ -536,10 +536,10 @@
- OPTION(osd_op_history_duration, OPT_U32, 600) // Oldest completed op to track
- OPTION(osd_target_transaction_size, OPT_INT, 30) // to adjust various transactions that batch smaller items
- OPTION(osd_failsafe_full_ratio, OPT_FLOAT, .97) // what % full makes an OSD "full" (failsafe)
- OPTION(osd_failsafe_nearfull_ratio, OPT_FLOAT, .90) // what % full makes an OSD near full (failsafe)
--OPTION(osd_leveldb_write_buffer_size, OPT_U64, 0) // OSD's leveldb write buffer size
--OPTION(osd_leveldb_cache_size, OPT_U64, 0) // OSD's leveldb cache size
-+OPTION(osd_leveldb_write_buffer_size, OPT_U64, 8*1024*1024) // OSD's leveldb write buffer size
-+OPTION(osd_leveldb_cache_size, OPT_U64, 256*1024*1024) // OSD's leveldb cache size
- OPTION(osd_leveldb_block_size, OPT_U64, 0) // OSD's leveldb block size
- OPTION(osd_leveldb_bloom_size, OPT_INT, 0) // OSD's leveldb bloom bits per entry
- OPTION(osd_leveldb_max_open_files, OPT_INT, 0) // OSD's leveldb max open files
- OPTION(osd_leveldb_compression, OPT_BOOL, true) // OSD's leveldb uses compression
diff --git a/debian/patches/series b/debian/patches/series
index 1b95214..33117ac 100644
--- a/debian/patches/series
+++ b/debian/patches/series
@@ -1,4 +1,6 @@
-#defaults-leveldb-osd.patch
+5469.patch
+8008.patch
+_1606.patch
gcj.patch
modules.patch
virtualenv-never-download.patch
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/pkg-ceph/ceph.git
More information about the Pkg-ceph-commits
mailing list