[Pkg-ceph-commits] [ceph] 01/04: patchworks: added new backported patches; removed unused patch.

Dmitry Smirnov onlyjob at moszumanska.debian.org
Fri Apr 18 09:20:45 UTC 2014


This is an automated email from the git hooks/post-receive script.

onlyjob pushed a commit to branch experimental
in repository ceph.

commit 90c8e25
Author: Dmitry Smirnov <onlyjob at member.fsf.org>
Date:   Fri Apr 18 08:19:33 2014

    patchworks: added new backported patches; removed unused patch.
---
 debian/patches/5469.patch                 | 182 ++++++++++++++++++++++++++++
 debian/patches/8008.patch                 |  41 +++++++
 debian/patches/_1606.patch                | 193 ++++++++++++++++++++++++++++++
 debian/patches/defaults-leveldb-osd.patch |  26 ----
 debian/patches/series                     |   4 +-
 5 files changed, 419 insertions(+), 27 deletions(-)

diff --git a/debian/patches/5469.patch b/debian/patches/5469.patch
new file mode 100644
index 0000000..69bf0c9
--- /dev/null
+++ b/debian/patches/5469.patch
@@ -0,0 +1,182 @@
+Last-Update: 2014-04-17
+Forwarded: not-needed
+Origin: upstream, http://tracker.ceph.com/projects/ceph/repository/revisions/a8330f5cfddaab853a1844afe43ee9a71f96d0c3
+Author: Josh Durgin <josh.durgin at inktank.com>
+Bug-Ceph: http://tracker.ceph.com/issues/5469
+Description:
+    librbd: fix zero length request handling
+    
+    Zero-length writes would hang because the completion was never
+    called. Reads would hit an assert about zero length in
+    Striper::file_to_exents().
+    
+    Fix all of these cases by skipping zero-length extents. The completion
+    is created and finished when finish_adding_requests() is called. This
+    is slightly different from usual completions since it comes from the
+    same thread as the one scheduling the request, but zero-length aio
+    requests should never happen from things that might care about this,
+    like QEMU.
+    
+    Writes and discards have had this bug since the beginning of
+    librbd. Reads might have avoided it until stripingv2 was added.
+    
+    Fixes: #5469
+    Signed-off-by: Josh Durgin <josh.durgin at inktank.com>
+
+diff --git a/src/librbd/internal.cc b/src/librbd/internal.cc
+index 8056fab..127be38 100644
+--- a/src/librbd/internal.cc
++++ b/src/librbd/internal.cc
+@@ -2884,9 +2884,6 @@ reprotect_and_return_err:
+     ldout(cct, 20) << "aio_write " << ictx << " off = " << off << " len = "
+ 		   << len << " buf = " << (void*)buf << dendl;
+ 
+-    if (!len)
+-      return 0;
+-
+     int r = ictx_check(ictx);
+     if (r < 0)
+       return r;
+@@ -2912,14 +2909,16 @@ reprotect_and_return_err:
+ 
+     // map
+     vector<ObjectExtent> extents;
+-    Striper::file_to_extents(ictx->cct, ictx->format_string, &ictx->layout, off, mylen, 0, extents);
++    if (len > 0) {
++      Striper::file_to_extents(ictx->cct, ictx->format_string,
++			       &ictx->layout, off, mylen, 0, extents);
++    }
+ 
+     c->get();
+     c->init_time(ictx, AIO_TYPE_WRITE);
+     for (vector<ObjectExtent>::iterator p = extents.begin(); p != extents.end(); ++p) {
+       ldout(cct, 20) << " oid " << p->oid << " " << p->offset << "~" << p->length
+ 		     << " from " << p->buffer_extents << dendl;
+-
+       // assemble extent
+       bufferlist bl;
+       for (vector<pair<uint64_t,uint64_t> >::iterator q = p->buffer_extents.begin();
+@@ -2966,9 +2965,6 @@ reprotect_and_return_err:
+     ldout(cct, 20) << "aio_discard " << ictx << " off = " << off << " len = "
+ 		   << len << dendl;
+ 
+-    if (!len)
+-      return 0;
+-
+     int r = ictx_check(ictx);
+     if (r < 0)
+       return r;
+@@ -2992,7 +2988,10 @@ reprotect_and_return_err:
+ 
+     // map
+     vector<ObjectExtent> extents;
+-    Striper::file_to_extents(ictx->cct, ictx->format_string, &ictx->layout, off, len, 0, extents);
++    if (len > 0) {
++      Striper::file_to_extents(ictx->cct, ictx->format_string,
++			       &ictx->layout, off, len, 0, extents);
++    }
+ 
+     c->get();
+     c->init_time(ictx, AIO_TYPE_DISCARD);
+@@ -3086,6 +3085,8 @@ reprotect_and_return_err:
+       r = clip_io(ictx, p->first, &len);
+       if (r < 0)
+ 	return r;
++      if (len == 0)
++	continue;
+ 
+       Striper::file_to_extents(ictx->cct, ictx->format_string, &ictx->layout,
+ 			       p->first, len, 0, object_extents, buffer_ofs);
+diff --git a/src/test/librbd/test_librbd.cc b/src/test/librbd/test_librbd.cc
+index d0b9c99..7f35418 100644
+--- a/src/test/librbd/test_librbd.cc
++++ b/src/test/librbd/test_librbd.cc
+@@ -1777,6 +1777,88 @@ TEST(LibRBD, DiffIterateStress)
+   ASSERT_EQ(0, destroy_one_pool_pp(pool_name, rados));
+ }
+ 
++TEST(LibRBD, ZeroLengthWrite)
++{
++  rados_t cluster;
++  rados_ioctx_t ioctx;
++  string pool_name = get_temp_pool_name();
++  ASSERT_EQ("", create_one_pool(pool_name, &cluster));
++  rados_ioctx_create(cluster, pool_name.c_str(), &ioctx);
++
++  rbd_image_t image;
++  int order = 0;
++  const char *name = "testimg";
++  uint64_t size = 2 << 20;
++
++  ASSERT_EQ(0, create_image(ioctx, name, size, &order));
++  ASSERT_EQ(0, rbd_open(ioctx, name, &image, NULL));
++
++  char read_data[1];
++  ASSERT_EQ(0, rbd_write(image, 0, 0, NULL));
++  ASSERT_EQ(1, rbd_read(image, 0, 1, read_data));
++  ASSERT_EQ('\0', read_data[0]);
++
++  ASSERT_EQ(0, rbd_close(image));
++
++  rados_ioctx_destroy(ioctx);
++  ASSERT_EQ(0, destroy_one_pool(pool_name, &cluster));
++}
++
++
++TEST(LibRBD, ZeroLengthDiscard)
++{
++  rados_t cluster;
++  rados_ioctx_t ioctx;
++  string pool_name = get_temp_pool_name();
++  ASSERT_EQ("", create_one_pool(pool_name, &cluster));
++  rados_ioctx_create(cluster, pool_name.c_str(), &ioctx);
++
++  rbd_image_t image;
++  int order = 0;
++  const char *name = "testimg";
++  uint64_t size = 2 << 20;
++
++  ASSERT_EQ(0, create_image(ioctx, name, size, &order));
++  ASSERT_EQ(0, rbd_open(ioctx, name, &image, NULL));
++
++  const char *data = "blah";
++  char read_data[strlen(data)];
++  ASSERT_EQ((int)strlen(data), rbd_write(image, 0, strlen(data), data));
++  ASSERT_EQ(0, rbd_discard(image, 0, 0));
++  ASSERT_EQ((int)strlen(data), rbd_read(image, 0, strlen(data), read_data));
++  ASSERT_EQ(0, memcmp(data, read_data, strlen(data)));
++
++  ASSERT_EQ(0, rbd_close(image));
++
++  rados_ioctx_destroy(ioctx);
++  ASSERT_EQ(0, destroy_one_pool(pool_name, &cluster));
++}
++
++TEST(LibRBD, ZeroLengthRead)
++{
++  rados_t cluster;
++  rados_ioctx_t ioctx;
++  string pool_name = get_temp_pool_name();
++  ASSERT_EQ("", create_one_pool(pool_name, &cluster));
++  rados_ioctx_create(cluster, pool_name.c_str(), &ioctx);
++
++  rbd_image_t image;
++  int order = 0;
++  const char *name = "testimg";
++  uint64_t size = 2 << 20;
++
++  ASSERT_EQ(0, create_image(ioctx, name, size, &order));
++  ASSERT_EQ(0, rbd_open(ioctx, name, &image, NULL));
++
++  char read_data[1];
++  ASSERT_EQ(0, rbd_read(image, 0, 0, read_data));
++
++  ASSERT_EQ(0, rbd_close(image));
++
++  rados_ioctx_destroy(ioctx);
++  ASSERT_EQ(0, destroy_one_pool(pool_name, &cluster));
++}
++
+ int main(int argc, char **argv)
+ {
+   ::testing::InitGoogleTest(&argc, argv);
diff --git a/debian/patches/8008.patch b/debian/patches/8008.patch
new file mode 100644
index 0000000..d333a60
--- /dev/null
+++ b/debian/patches/8008.patch
@@ -0,0 +1,41 @@
+Last-Update: 2014-04-17
+Forwarded: not-needed
+Origin: upstream, http://tracker.ceph.com/projects/ceph/repository/revisions/6ff645f592cd82f888b3646e10438aea781370a2
+Bug-Ceph: http://tracker.ceph.com/issues/8008
+Description:
+    osd/PG: fix repair_object when missing on primary
+    
+    If the object is missing on the primary, we need to fully populate the
+    missing_loc.needs_recovery_map.  This broke with the recent refactoring of
+    recovery for EC, somewhere around 84e2f39c557c79e9ca7c3c3f0eb0bfa4860bf899.
+    
+    Fixes: #8008
+    Signed-off-by: Sage Weil <sage at inktank.com>
+
+--- a/src/osd/PG.cc
++++ b/src/osd/PG.cc
+@@ -3444,8 +3444,9 @@
+     // We should only be scrubbing if the PG is clean.
+     assert(waiting_for_unreadable_object.empty());
+ 
+     pg_log.missing_add(soid, oi.version, eversion_t());
++    missing_loc.add_missing(soid, oi.version, eversion_t());
+     missing_loc.add_location(soid, ok_peer);
+ 
+     pg_log.set_last_requested(0);
+   }
+--- a/src/osd/PG.h
++++ b/src/osd/PG.h
+@@ -378,8 +378,12 @@
+ 	  assert(i->second.need == j->second.need);
+ 	}
+       }
+     }
++
++    void add_missing(const hobject_t &hoid, eversion_t need, eversion_t have) {
++      needs_recovery_map[hoid] = pg_missing_t::item(need, have);
++    }
+     void revise_need(const hobject_t &hoid, eversion_t need) {
+       assert(needs_recovery(hoid));
+       needs_recovery_map[hoid].need = need;
+     }
diff --git a/debian/patches/_1606.patch b/debian/patches/_1606.patch
new file mode 100644
index 0000000..1f95a6c
--- /dev/null
+++ b/debian/patches/_1606.patch
@@ -0,0 +1,193 @@
+Last-Update: 2014-04-18
+Forwarded: not-needed
+Origin: upstream, https://github.com/ceph/ceph/pull/1606
+From: "Yan, Zheng" <zheng.z.yan at intel.com>
+Description: client: try shrinking kernel inode cache when trimming session caps
+ 
+ Notify kernel to invalidate top level directory entries. As a side
+ effect, the kernel inode cache get shrinked.
+
+--- a/src/client/Client.cc
++++ b/src/client/Client.cc
+@@ -2202,10 +2202,14 @@
+ 
+   if (in) {    // link to inode
+     dn->inode = in;
+     in->get();
+-    if (in->dir)
+-      dn->get();  // dir -> dn pin
++    if (in->is_dir()) {
++      if (in->dir)
++	dn->get(); // dir -> dn pin
++      if (in->ll_ref)
++	dn->get(); // ll_ref -> dn pin
++    }
+ 
+     assert(in->dn_set.count(dn) == 0);
+ 
+     // only one parent for directories!
+@@ -2230,10 +2234,14 @@
+ 		 << " inode " << dn->inode << dendl;
+ 
+   // unlink from inode
+   if (in) {
+-    if (in->dir)
+-      dn->put();        // dir -> dn pin
++    if (in->is_dir()) {
++      if (in->dir)
++	dn->put(); // dir -> dn pin
++      if (in->ll_ref)
++	dn->put(); // ll_ref -> dn pin
++    }
+     dn->inode = 0;
+     assert(in->dn_set.count(dn));
+     in->dn_set.erase(dn);
+     ldout(cct, 20) << "unlink  inode " << in << " parents now " << in->dn_set << dendl; 
+@@ -3073,8 +3081,19 @@
+       delete cap;
+     }
+   }
+   s->s_cap_iterator = NULL;
++
++  // notify kernel to invalidate top level directory entries. As a side effect,
++  // unused inodes underneath these entries get pruned.
++  if (dentry_invalidate_cb && s->caps.size() > max) {
++    for (ceph::unordered_map<string, Dentry*>::iterator p = root->dir->dentries.begin();
++	 p != root->dir->dentries.end();
++	 ++p) {
++      if (p->second->inode)
++	_schedule_invalidate_dentry_callback(p->second, false);
++    }
++  }
+ }
+ 
+ void Client::mark_caps_dirty(Inode *in, int caps)
+ {
+@@ -3663,11 +3682,16 @@
+   vinodeno_t dirino;
+   vinodeno_t ino;
+   string name;
+ public:
+-  C_Client_DentryInvalidate(Client *c, Dentry *dn) :
+-			    client(c), dirino(dn->dir->parent_inode->vino()),
+-			    ino(dn->inode->vino()), name(dn->name) { }
++  C_Client_DentryInvalidate(Client *c, Dentry *dn, bool del) :
++    client(c), name(dn->name) {
++      dirino = dn->dir->parent_inode->vino();
++      if (del)
++	ino = dn->inode->vino();
++      else
++	ino.ino = inodeno_t();
++  }
+   void finish(int r) {
+     client->_async_dentry_invalidate(dirino, ino, name);
+   }
+ };
+@@ -3678,12 +3702,12 @@
+ 		 << " in dir " << dirino << dendl;
+   dentry_invalidate_cb(dentry_invalidate_cb_handle, dirino, ino, name);
+ }
+ 
+-void Client::_schedule_invalidate_dentry_callback(Dentry *dn)
++void Client::_schedule_invalidate_dentry_callback(Dentry *dn, bool del)
+ {
+   if (dentry_invalidate_cb && dn->inode->ll_ref > 0)
+-    async_dentry_invalidator.queue(new C_Client_DentryInvalidate(this, dn));
++    async_dentry_invalidator.queue(new C_Client_DentryInvalidate(this, dn, del));
+ }
+ 
+ void Client::_invalidate_inode_parents(Inode *in)
+ {
+@@ -3691,9 +3715,9 @@
+   while (q != in->dn_set.end()) {
+     Dentry *dn = *q++;
+     // FIXME: we play lots of unlink/link tricks when handling MDS replies,
+     //        so in->dn_set doesn't always reflect the state of kernel's dcache.
+-    _schedule_invalidate_dentry_callback(dn);
++    _schedule_invalidate_dentry_callback(dn, true);
+     unlink(dn, false);
+   }
+ }
+ 
+@@ -3723,9 +3747,9 @@
+     in->uid = m->head.uid;
+     in->gid = m->head.gid;
+   }
+   bool deleted_inode = false;
+-  if ((issued & CEPH_CAP_LINK_EXCL) == 0) {
++  if ((issued & CEPH_CAP_LINK_EXCL) == 0 && in->nlink != (int32_t)m->head.nlink) {
+     in->nlink = m->head.nlink;
+     if (in->nlink == 0 &&
+ 	(new_caps & (CEPH_CAP_LINK_SHARED | CEPH_CAP_LINK_EXCL)))
+       deleted_inode = true;
+@@ -7014,10 +7038,15 @@
+ 
+ 
+ void Client::_ll_get(Inode *in)
+ {
+-  if (in->ll_ref == 0)
++  if (in->ll_ref == 0) {
+     in->get();
++    if (in->is_dir() && !in->dn_set.empty()) {
++      assert(in->dn_set.size() == 1); // dirs can't be hard-linked
++      in->get_first_parent()->get(); // pin dentry
++    }
++  }
+   in->ll_get();
+   ldout(cct, 20) << "_ll_get " << in << " " << in->ino << " -> " << in->ll_ref << dendl;
+ }
+ 
+@@ -7025,8 +7054,12 @@
+ {
+   in->ll_put(num);
+   ldout(cct, 20) << "_ll_put " << in << " " << in->ino << " " << num << " -> " << in->ll_ref << dendl;
+   if (in->ll_ref == 0) {
++    if (in->is_dir() && !in->dn_set.empty()) {
++      assert(in->dn_set.size() == 1); // dirs can't be hard-linked
++      in->get_first_parent()->put(); // unpin dentry
++    }
+     put_inode(in);
+     return 0;
+   } else {
+     return in->ll_ref;
+@@ -7064,10 +7097,10 @@
+   if (in->ll_ref < count) {
+     ldout(cct, 1) << "WARNING: ll_forget on " << ino << " " << count
+ 		  << ", which only has ll_ref=" << in->ll_ref << dendl;
+     _ll_put(in, in->ll_ref);
+-      last = true;
+-    } else {
++    last = true;
++  } else {
+     if (_ll_put(in, count) == 0)
+       last = true;
+   }
+ 
+--- a/src/client/Client.h
++++ b/src/client/Client.h
+@@ -478,9 +478,9 @@
+   void queue_cap_snap(Inode *in, snapid_t seq=0);
+   void finish_cap_snap(Inode *in, CapSnap *capsnap, int used);
+   void _flushed_cap_snap(Inode *in, snapid_t seq);
+ 
+-  void _schedule_invalidate_dentry_callback(Dentry *dn);
++  void _schedule_invalidate_dentry_callback(Dentry *dn, bool del);
+   void _async_dentry_invalidate(vinodeno_t dirino, vinodeno_t ino, string& name);
+   void _invalidate_inode_parents(Inode *in);
+ 
+   void _schedule_invalidate_callback(Inode *in, int64_t off, int64_t len, bool keep_caps);
+--- a/src/client/fuse_ll.cc
++++ b/src/client/fuse_ll.cc
+@@ -679,9 +679,11 @@
+ {
+   CephFuse::Handle *cfuse = (CephFuse::Handle *)handle;
+   fuse_ino_t fdirino = cfuse->make_fake_ino(dirino.ino, dirino.snapid);
+ #if FUSE_VERSION >= FUSE_MAKE_VERSION(2, 9)
+-  fuse_ino_t fino = cfuse->make_fake_ino(ino.ino, ino.snapid);
++  fuse_ino_t fino = 0;
++  if (ino.ino != inodeno_t())
++    fino = cfuse->make_fake_ino(ino.ino, ino.snapid);
+   fuse_lowlevel_notify_delete(cfuse->ch, fdirino, fino, name.c_str(), name.length());
+ #elif FUSE_VERSION >= FUSE_MAKE_VERSION(2, 8)
+   fuse_lowlevel_notify_inval_entry(cfuse->ch, fdirino, name.c_str(), name.length());
+ #endif
diff --git a/debian/patches/defaults-leveldb-osd.patch b/debian/patches/defaults-leveldb-osd.patch
deleted file mode 100644
index 4ac5c6a..0000000
--- a/debian/patches/defaults-leveldb-osd.patch
+++ /dev/null
@@ -1,26 +0,0 @@
-Last-Update: 2014-03-27
-Forwarded: no
-Author: Dmitry Smirnov <onlyjob at member.fsf.org>
-Description: increase OSD's leveldb defaults
- The OSD's leveldb currently uses libleveldb's defaults for cache and
- write buffer size, which are both 4 MB.
- Increase the cache size to 128MB and the write buffer to 8MB.
-
-  Similar change for filestore was introduced in
-    https://github.com/ceph/ceph/pull/1160
-
---- a/src/common/config_opts.h
-+++ b/src/common/config_opts.h
-@@ -536,10 +536,10 @@
- OPTION(osd_op_history_duration, OPT_U32, 600) // Oldest completed op to track
- OPTION(osd_target_transaction_size, OPT_INT, 30)     // to adjust various transactions that batch smaller items
- OPTION(osd_failsafe_full_ratio, OPT_FLOAT, .97) // what % full makes an OSD "full" (failsafe)
- OPTION(osd_failsafe_nearfull_ratio, OPT_FLOAT, .90) // what % full makes an OSD near full (failsafe)
--OPTION(osd_leveldb_write_buffer_size, OPT_U64, 0) // OSD's leveldb write buffer size
--OPTION(osd_leveldb_cache_size, OPT_U64, 0) // OSD's leveldb cache size
-+OPTION(osd_leveldb_write_buffer_size, OPT_U64, 8*1024*1024) // OSD's leveldb write buffer size
-+OPTION(osd_leveldb_cache_size, OPT_U64, 256*1024*1024) // OSD's leveldb cache size
- OPTION(osd_leveldb_block_size, OPT_U64, 0) // OSD's leveldb block size
- OPTION(osd_leveldb_bloom_size, OPT_INT, 0) // OSD's leveldb bloom bits per entry
- OPTION(osd_leveldb_max_open_files, OPT_INT, 0) // OSD's leveldb max open files
- OPTION(osd_leveldb_compression, OPT_BOOL, true) // OSD's leveldb uses compression
diff --git a/debian/patches/series b/debian/patches/series
index 1b95214..33117ac 100644
--- a/debian/patches/series
+++ b/debian/patches/series
@@ -1,4 +1,6 @@
-#defaults-leveldb-osd.patch
+5469.patch
+8008.patch
+_1606.patch
 gcj.patch
 modules.patch
 virtualenv-never-download.patch

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/pkg-ceph/ceph.git



More information about the Pkg-ceph-commits mailing list