[geneagrapher] 205/226: Adding CacheGrabber -- a grabber class that caches results.

Doug Torrance dtorrance-guest at moszumanska.debian.org
Sat Jul 11 17:11:11 UTC 2015


This is an automated email from the git hooks/post-receive script.

dtorrance-guest pushed a commit to branch master
in repository geneagrapher.

commit 8c12590a56afb379f99cf8435b5179875a569e94
Author: David Alber <alber.david at gmail.com>
Date:   Wed Dec 28 21:08:35 2011 -0800

    Adding CacheGrabber -- a grabber class that caches results.
    
    The CacheGrabber uses the standard library's shelve module to persist
    cached records. The cache stores all of the information extracted
    from a Math Genealogy Project web page, in addition to a timestamp
    that records when the record was obtained. The timestamp is used to
    determine when a cached value is invalid due to being stale.
    
    This changeset is part of #8.
---
 src/geneagrapher/cache_grabber.py        |  59 +++++++++
 tests/geneagrapher/test_cache_grabber.py | 207 +++++++++++++++++++++++++++++++
 2 files changed, 266 insertions(+)

diff --git a/src/geneagrapher/cache_grabber.py b/src/geneagrapher/cache_grabber.py
new file mode 100644
index 0000000..78be0e2
--- /dev/null
+++ b/src/geneagrapher/cache_grabber.py
@@ -0,0 +1,59 @@
+import shelve
+from time import time
+from grabber import Grabber
+
+
+class CacheGrabber():
+    def __init__(self, filename='geneacache', record_grabber=Grabber,
+                 expiration_interval=604800.):
+        self.filename = filename
+        self.grabber = record_grabber()
+        self.expiration_interval = float(expiration_interval)
+        self.cache = shelve.open(filename)
+
+    def __enter__(self):
+        return self
+
+    def __exit__(self, exc_type, exc_value, traceback):
+        self.close()
+
+    def close(self):
+        """Close the cache. All methods after calling this will raise
+        ValueError."""
+        self.cache.close()
+
+    def is_expired(self, record):
+        """Returns True if the given record is expired."""
+        return time() - record['timestamp'] > self.expiration_interval
+
+    def get_record(self, id):
+        """Return information for the mathematician associated with the given
+        id."""
+        id_str = str(id)
+        if self.is_cached(id_str):
+            d = self.cache[id_str]
+            return [d['name'], d['institution'], d['year'], d['advisors'],
+                    d['descendants']]
+        else:
+            [name, institution, year, advisors,
+             descendants] = self.grabber.get_record(id)
+            self.load_into_cache(id, name, institution, year, advisors,
+                                 descendants)
+            return [name, institution, year, advisors, descendants]
+
+    def is_cached(self, id):
+        """Return True if an item with the given id is in the cache and has
+        not expired."""
+        return str(id) in self.cache and \
+               not self.is_expired(self.cache[str(id)])
+
+    def load_into_cache(self, id, name, institution, year, advisors,
+                        descendants):
+        """Insert a new record into the cache.
+
+        If the record already exists, its values are replaced with the values
+        provided as input to this method."""
+        d = {'name': name, 'institution': institution, 'year': year,
+             'advisors': advisors, 'descendants': descendants,
+             'timestamp': time()}
+        self.cache[str(id)] = d
diff --git a/tests/geneagrapher/test_cache_grabber.py b/tests/geneagrapher/test_cache_grabber.py
new file mode 100644
index 0000000..25d622b
--- /dev/null
+++ b/tests/geneagrapher/test_cache_grabber.py
@@ -0,0 +1,207 @@
+import os
+from time import time
+import unittest
+from local_data_grabber import LocalDataGrabber
+from geneagrapher.cache_grabber import CacheGrabber
+from geneagrapher.grabber import Grabber
+
+
+class TestCacheGrabberMethods(unittest.TestCase):
+    """Unit tests for the geneagrapher.CacheGrabber class."""
+    def setUp(self):
+        self.name = u"Carl Friedrich Gau\xdf"
+        self.institution = u"Universit\xe4t Helmstedt"
+        self.year = 1799
+        self.advisors = set([18230])
+        self.descendants = set([18603, 18233, 62547, 29642, 55175,
+                                29458, 19953, 18232, 151876])
+
+    def tearDown(self):
+        try:
+            os.remove('geneacache')
+        except OSError:
+            pass
+
+    def test_init1(self):
+        """Test constructor."""
+        cache = CacheGrabber()
+        self.assertEqual(cache.filename, 'geneacache')
+        self.assertEqual(len(cache.cache), 0)
+        self.assertIsInstance(cache.grabber, Grabber)
+        self.assertEqual(cache.expiration_interval, 604800.)
+
+    def test_init2(self):
+        """Test constructor with non-default filename."""
+        cache = CacheGrabber('mycachename')
+        self.assertEqual(cache.filename, 'mycachename')
+        self.assertEqual(len(cache.cache), 0)
+        self.assertIsInstance(cache.grabber, Grabber)
+        self.assertEqual(cache.expiration_interval, 604800.)
+        os.remove('mycachename')
+
+    def test_init3(self):
+        """Test constructor with non-default record grabber."""
+        cache = CacheGrabber(record_grabber=LocalDataGrabber)
+        self.assertEqual(cache.filename, 'geneacache')
+        self.assertEqual(len(cache.cache), 0)
+        self.assertIsInstance(cache.grabber, LocalDataGrabber)
+        self.assertEqual(cache.expiration_interval, 604800.)
+
+    def test_init4(self):
+        """Test constructor with non-default expiration interval."""
+        cache = CacheGrabber(expiration_interval=1209600.)
+        self.assertEqual(cache.filename, 'geneacache')
+        self.assertEqual(len(cache.cache), 0)
+        self.assertIsInstance(cache.grabber, Grabber)
+        self.assertEqual(cache.expiration_interval, 1209600.)
+
+    def test_close(self):
+        """Test the close method."""
+        cache = CacheGrabber(record_grabber=LocalDataGrabber)
+        self.assertEqual(len(cache.cache), 0)
+        cache.close()
+        self.assertRaisesRegexp(ValueError,
+                                'invalid operation on closed shelf',
+                                len, cache.cache)
+
+    def test_is_expired_false(self):
+        """Test the is_expired method."""
+        t = time()
+        d = {'name': self.name, 'institution': self.institution,
+             'year': self.year, 'advisors': self.advisors,
+             'descendants': self.descendants, 'timestamp': t}
+        with CacheGrabber() as cache:
+            self.assertFalse(cache.is_expired(d))
+            d['timestamp'] = time() - cache.expiration_interval + 20
+            self.assertFalse(cache.is_expired(d))
+
+    def test_is_expired_true(self):
+        """Test the is_expired method."""
+        with CacheGrabber() as cache:
+            t = time() - cache.expiration_interval - 1
+            d = {'name': self.name, 'institution': self.institution,
+                 'year': self.year, 'advisors': self.advisors,
+                 'descendants': self.descendants, 'timestamp': t}
+            self.assertTrue(cache.is_expired(d))
+
+    def test_context_manager(self):
+        """Test the context manager methods."""
+        with CacheGrabber(record_grabber=LocalDataGrabber) as cache:
+            self.assertEqual(len(cache.cache), 0)
+        self.assertRaisesRegexp(ValueError,
+                                'invalid operation on closed shelf',
+                                len, cache.cache)
+
+    def test_get_record_bad(self):
+        """Test the get_record method for a bad id."""
+        with CacheGrabber(record_grabber=LocalDataGrabber) as cache:
+            self.assertEqual(len(cache.cache), 0)
+            self.assertRaisesRegexp(ValueError, 'Invalid id 999999999',
+                                    cache.get_record, 999999999)
+            self.assertEqual(len(cache.cache), 0)
+
+    def test_get_record(self):
+        """Test the get_record method for a good id."""
+        with CacheGrabber(record_grabber=LocalDataGrabber) as cache:
+            [name, institution, year, advisors,
+             descendents] = cache.get_record(18231)
+            self.assertEqual(name, u"Carl Friedrich Gau\xdf")
+            self.assertEqual(institution, u"Universit\xe4t Helmstedt")
+            self.assertEqual(year, 1799)
+            self.assertEqual(advisors, set([18230]))
+            self.assertEqual(descendents, set([18603, 18233, 62547, 29642,
+                                               55175, 29458, 19953, 18232,
+                                               151876]))
+            self.assertEqual(len(cache.cache), 1)
+
+            # Make the request again and verify the cached version is returned.
+            d = cache.cache['18231']
+            d['institution'] = u'Rigged for test'
+            cache.cache['18231'] = d
+            [name, institution, year, advisors,
+             descendents] = cache.get_record(18231)
+            self.assertEqual(name, u"Carl Friedrich Gau\xdf")
+            self.assertEqual(institution, u"Rigged for test")
+            self.assertEqual(year, 1799)
+            self.assertEqual(advisors, set([18230]))
+            self.assertEqual(descendents, set([18603, 18233, 62547, 29642,
+                                               55175, 29458, 19953, 18232,
+                                               151876]))
+            self.assertEqual(len(cache.cache), 1)
+
+        with CacheGrabber(record_grabber=LocalDataGrabber) as cache:
+            # Redo the last request, this time with a newly-loaded instance
+            # of the cache from disk.
+            self.assertEqual(len(cache.cache), 1)
+            d = cache.cache['18231']
+            d['institution'] = u'Rigged for test'
+            cache.cache['18231'] = d
+            [name, institution, year, advisors,
+             descendents] = cache.get_record(18231)
+            self.assertEqual(name, u"Carl Friedrich Gau\xdf")
+            self.assertEqual(institution, u"Rigged for test")
+            self.assertEqual(year, 1799)
+            self.assertEqual(advisors, set([18230]))
+            self.assertEqual(descendents, set([18603, 18233, 62547, 29642,
+                                               55175, 29458, 19953, 18232,
+                                               151876]))
+            self.assertEqual(len(cache.cache), 1)
+
+            # Make another request, this time with the cached entry expired,
+            # and verify a new version is retrieved.
+            d['timestamp'] = time() - cache.expiration_interval - 1
+            cache.cache['18231'] = d
+            [name, institution, year, advisors,
+             descendents] = cache.get_record(18231)
+            self.assertEqual(name, u"Carl Friedrich Gau\xdf")
+            self.assertEqual(institution, u"Universit\xe4t Helmstedt")
+            self.assertEqual(year, 1799)
+            self.assertEqual(advisors, set([18230]))
+            self.assertEqual(descendents, set([18603, 18233, 62547, 29642,
+                                               55175, 29458, 19953, 18232,
+                                               151876]))
+            self.assertEqual(len(cache.cache), 1)
+
+    def test_is_in_cache(self):
+        """Test the is_in_cache method."""
+        d = {'name': self.name, 'institution': self.institution,
+             'year': self.year, 'advisors': self.advisors,
+             'descendants': self.descendants, 'timestamp': time()}
+        with CacheGrabber(record_grabber=LocalDataGrabber) as cache:
+            self.assertFalse(cache.is_cached(self.id))
+            cache.cache[str(self.id)] = d
+            self.assertTrue(cache.is_cached(self.id))
+            new_timestamp = time() - cache.expiration_interval - 1
+            d['timestamp'] = new_timestamp
+            cache.cache[str(self.id)] = d
+            self.assertFalse(cache.is_cached(self.id))
+
+    def test_load_into_cache(self):
+        """Test the load_into_cache method."""
+        with CacheGrabber(record_grabber=LocalDataGrabber) as cache:
+            self.assertEqual(len(cache.cache), 0)
+            cache.load_into_cache(self.id, self.name, self.institution,
+                                  self.year, self.advisors, self.descendants)
+            self.assertEqual(len(cache.cache), 1)
+            record = cache.cache[str(self.id)]
+            self.assertEqual(record['name'], self.name)
+            self.assertEqual(record['institution'], self.institution)
+            self.assertEqual(record['year'], self.year)
+            self.assertEqual(record['advisors'], self.advisors)
+            self.assertEqual(record['descendants'], self.descendants)
+            self.assertTrue(time() - record['timestamp'] < 20)
+
+            # Insert the same record a second time to verify replacement
+            # behavior.
+            self.assertEqual(len(cache.cache), 1)
+            record = cache.cache[str(self.id)]
+            self.assertEqual(record['name'], self.name)
+            self.assertEqual(record['institution'], self.institution)
+            self.assertEqual(record['year'], self.year)
+            self.assertEqual(record['advisors'], self.advisors)
+            self.assertEqual(record['descendants'], self.descendants)
+            self.assertTrue(time() - record['timestamp'] < 20)
+
+
+if __name__ == '__main__':
+    unittest.main()

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/geneagrapher.git



More information about the debian-science-commits mailing list