[geneagrapher] 205/226: Adding CacheGrabber -- a grabber class that caches results.
Doug Torrance
dtorrance-guest at moszumanska.debian.org
Sat Jul 11 17:11:11 UTC 2015
This is an automated email from the git hooks/post-receive script.
dtorrance-guest pushed a commit to branch master
in repository geneagrapher.
commit 8c12590a56afb379f99cf8435b5179875a569e94
Author: David Alber <alber.david at gmail.com>
Date: Wed Dec 28 21:08:35 2011 -0800
Adding CacheGrabber -- a grabber class that caches results.
The CacheGrabber uses the standard library's shelve module to persist
cached records. The cache stores all of the information extracted
from a Math Genealogy Project web page, in addition to a timestamp
that records when the record was obtained. The timestamp is used to
determine when a cached value is invalid due to being stale.
This changeset is part of #8.
---
src/geneagrapher/cache_grabber.py | 59 +++++++++
tests/geneagrapher/test_cache_grabber.py | 207 +++++++++++++++++++++++++++++++
2 files changed, 266 insertions(+)
diff --git a/src/geneagrapher/cache_grabber.py b/src/geneagrapher/cache_grabber.py
new file mode 100644
index 0000000..78be0e2
--- /dev/null
+++ b/src/geneagrapher/cache_grabber.py
@@ -0,0 +1,59 @@
+import shelve
+from time import time
+from grabber import Grabber
+
+
+class CacheGrabber():
+ def __init__(self, filename='geneacache', record_grabber=Grabber,
+ expiration_interval=604800.):
+ self.filename = filename
+ self.grabber = record_grabber()
+ self.expiration_interval = float(expiration_interval)
+ self.cache = shelve.open(filename)
+
+ def __enter__(self):
+ return self
+
+ def __exit__(self, exc_type, exc_value, traceback):
+ self.close()
+
+ def close(self):
+ """Close the cache. All methods after calling this will raise
+ ValueError."""
+ self.cache.close()
+
+ def is_expired(self, record):
+ """Returns True if the given record is expired."""
+ return time() - record['timestamp'] > self.expiration_interval
+
+ def get_record(self, id):
+ """Return information for the mathematician associated with the given
+ id."""
+ id_str = str(id)
+ if self.is_cached(id_str):
+ d = self.cache[id_str]
+ return [d['name'], d['institution'], d['year'], d['advisors'],
+ d['descendants']]
+ else:
+ [name, institution, year, advisors,
+ descendants] = self.grabber.get_record(id)
+ self.load_into_cache(id, name, institution, year, advisors,
+ descendants)
+ return [name, institution, year, advisors, descendants]
+
+ def is_cached(self, id):
+ """Return True if an item with the given id is in the cache and has
+ not expired."""
+ return str(id) in self.cache and \
+ not self.is_expired(self.cache[str(id)])
+
+ def load_into_cache(self, id, name, institution, year, advisors,
+ descendants):
+ """Insert a new record into the cache.
+
+ If the record already exists, its values are replaced with the values
+ provided as input to this method."""
+ d = {'name': name, 'institution': institution, 'year': year,
+ 'advisors': advisors, 'descendants': descendants,
+ 'timestamp': time()}
+ self.cache[str(id)] = d
diff --git a/tests/geneagrapher/test_cache_grabber.py b/tests/geneagrapher/test_cache_grabber.py
new file mode 100644
index 0000000..25d622b
--- /dev/null
+++ b/tests/geneagrapher/test_cache_grabber.py
@@ -0,0 +1,207 @@
+import os
+from time import time
+import unittest
+from local_data_grabber import LocalDataGrabber
+from geneagrapher.cache_grabber import CacheGrabber
+from geneagrapher.grabber import Grabber
+
+
+class TestCacheGrabberMethods(unittest.TestCase):
+ """Unit tests for the geneagrapher.CacheGrabber class."""
+ def setUp(self):
+ self.name = u"Carl Friedrich Gau\xdf"
+ self.institution = u"Universit\xe4t Helmstedt"
+ self.year = 1799
+ self.advisors = set([18230])
+ self.descendants = set([18603, 18233, 62547, 29642, 55175,
+ 29458, 19953, 18232, 151876])
+
+ def tearDown(self):
+ try:
+ os.remove('geneacache')
+ except OSError:
+ pass
+
+ def test_init1(self):
+ """Test constructor."""
+ cache = CacheGrabber()
+ self.assertEqual(cache.filename, 'geneacache')
+ self.assertEqual(len(cache.cache), 0)
+ self.assertIsInstance(cache.grabber, Grabber)
+ self.assertEqual(cache.expiration_interval, 604800.)
+
+ def test_init2(self):
+ """Test constructor with non-default filename."""
+ cache = CacheGrabber('mycachename')
+ self.assertEqual(cache.filename, 'mycachename')
+ self.assertEqual(len(cache.cache), 0)
+ self.assertIsInstance(cache.grabber, Grabber)
+ self.assertEqual(cache.expiration_interval, 604800.)
+ os.remove('mycachename')
+
+ def test_init3(self):
+ """Test constructor with non-default record grabber."""
+ cache = CacheGrabber(record_grabber=LocalDataGrabber)
+ self.assertEqual(cache.filename, 'geneacache')
+ self.assertEqual(len(cache.cache), 0)
+ self.assertIsInstance(cache.grabber, LocalDataGrabber)
+ self.assertEqual(cache.expiration_interval, 604800.)
+
+ def test_init4(self):
+ """Test constructor with non-default expiration interval."""
+ cache = CacheGrabber(expiration_interval=1209600.)
+ self.assertEqual(cache.filename, 'geneacache')
+ self.assertEqual(len(cache.cache), 0)
+ self.assertIsInstance(cache.grabber, Grabber)
+ self.assertEqual(cache.expiration_interval, 1209600.)
+
+ def test_close(self):
+ """Test the close method."""
+ cache = CacheGrabber(record_grabber=LocalDataGrabber)
+ self.assertEqual(len(cache.cache), 0)
+ cache.close()
+ self.assertRaisesRegexp(ValueError,
+ 'invalid operation on closed shelf',
+ len, cache.cache)
+
+ def test_is_expired_false(self):
+ """Test the is_expired method."""
+ t = time()
+ d = {'name': self.name, 'institution': self.institution,
+ 'year': self.year, 'advisors': self.advisors,
+ 'descendants': self.descendants, 'timestamp': t}
+ with CacheGrabber() as cache:
+ self.assertFalse(cache.is_expired(d))
+ d['timestamp'] = time() - cache.expiration_interval + 20
+ self.assertFalse(cache.is_expired(d))
+
+ def test_is_expired_true(self):
+ """Test the is_expired method."""
+ with CacheGrabber() as cache:
+ t = time() - cache.expiration_interval - 1
+ d = {'name': self.name, 'institution': self.institution,
+ 'year': self.year, 'advisors': self.advisors,
+ 'descendants': self.descendants, 'timestamp': t}
+ self.assertTrue(cache.is_expired(d))
+
+ def test_context_manager(self):
+ """Test the context manager methods."""
+ with CacheGrabber(record_grabber=LocalDataGrabber) as cache:
+ self.assertEqual(len(cache.cache), 0)
+ self.assertRaisesRegexp(ValueError,
+ 'invalid operation on closed shelf',
+ len, cache.cache)
+
+ def test_get_record_bad(self):
+ """Test the get_record method for a bad id."""
+ with CacheGrabber(record_grabber=LocalDataGrabber) as cache:
+ self.assertEqual(len(cache.cache), 0)
+ self.assertRaisesRegexp(ValueError, 'Invalid id 999999999',
+ cache.get_record, 999999999)
+ self.assertEqual(len(cache.cache), 0)
+
+ def test_get_record(self):
+ """Test the get_record method for a good id."""
+ with CacheGrabber(record_grabber=LocalDataGrabber) as cache:
+ [name, institution, year, advisors,
+ descendents] = cache.get_record(18231)
+ self.assertEqual(name, u"Carl Friedrich Gau\xdf")
+ self.assertEqual(institution, u"Universit\xe4t Helmstedt")
+ self.assertEqual(year, 1799)
+ self.assertEqual(advisors, set([18230]))
+ self.assertEqual(descendents, set([18603, 18233, 62547, 29642,
+ 55175, 29458, 19953, 18232,
+ 151876]))
+ self.assertEqual(len(cache.cache), 1)
+
+ # Make the request again and verify the cached version is returned.
+ d = cache.cache['18231']
+ d['institution'] = u'Rigged for test'
+ cache.cache['18231'] = d
+ [name, institution, year, advisors,
+ descendents] = cache.get_record(18231)
+ self.assertEqual(name, u"Carl Friedrich Gau\xdf")
+ self.assertEqual(institution, u"Rigged for test")
+ self.assertEqual(year, 1799)
+ self.assertEqual(advisors, set([18230]))
+ self.assertEqual(descendents, set([18603, 18233, 62547, 29642,
+ 55175, 29458, 19953, 18232,
+ 151876]))
+ self.assertEqual(len(cache.cache), 1)
+
+ with CacheGrabber(record_grabber=LocalDataGrabber) as cache:
+ # Redo the last request, this time with a newly-loaded instance
+ # of the cache from disk.
+ self.assertEqual(len(cache.cache), 1)
+ d = cache.cache['18231']
+ d['institution'] = u'Rigged for test'
+ cache.cache['18231'] = d
+ [name, institution, year, advisors,
+ descendents] = cache.get_record(18231)
+ self.assertEqual(name, u"Carl Friedrich Gau\xdf")
+ self.assertEqual(institution, u"Rigged for test")
+ self.assertEqual(year, 1799)
+ self.assertEqual(advisors, set([18230]))
+ self.assertEqual(descendents, set([18603, 18233, 62547, 29642,
+ 55175, 29458, 19953, 18232,
+ 151876]))
+ self.assertEqual(len(cache.cache), 1)
+
+ # Make another request, this time with the cached entry expired,
+ # and verify a new version is retrieved.
+ d['timestamp'] = time() - cache.expiration_interval - 1
+ cache.cache['18231'] = d
+ [name, institution, year, advisors,
+ descendents] = cache.get_record(18231)
+ self.assertEqual(name, u"Carl Friedrich Gau\xdf")
+ self.assertEqual(institution, u"Universit\xe4t Helmstedt")
+ self.assertEqual(year, 1799)
+ self.assertEqual(advisors, set([18230]))
+ self.assertEqual(descendents, set([18603, 18233, 62547, 29642,
+ 55175, 29458, 19953, 18232,
+ 151876]))
+ self.assertEqual(len(cache.cache), 1)
+
+ def test_is_in_cache(self):
+ """Test the is_in_cache method."""
+ d = {'name': self.name, 'institution': self.institution,
+ 'year': self.year, 'advisors': self.advisors,
+ 'descendants': self.descendants, 'timestamp': time()}
+ with CacheGrabber(record_grabber=LocalDataGrabber) as cache:
+ self.assertFalse(cache.is_cached(self.id))
+ cache.cache[str(self.id)] = d
+ self.assertTrue(cache.is_cached(self.id))
+ new_timestamp = time() - cache.expiration_interval - 1
+ d['timestamp'] = new_timestamp
+ cache.cache[str(self.id)] = d
+ self.assertFalse(cache.is_cached(self.id))
+
+ def test_load_into_cache(self):
+ """Test the load_into_cache method."""
+ with CacheGrabber(record_grabber=LocalDataGrabber) as cache:
+ self.assertEqual(len(cache.cache), 0)
+ cache.load_into_cache(self.id, self.name, self.institution,
+ self.year, self.advisors, self.descendants)
+ self.assertEqual(len(cache.cache), 1)
+ record = cache.cache[str(self.id)]
+ self.assertEqual(record['name'], self.name)
+ self.assertEqual(record['institution'], self.institution)
+ self.assertEqual(record['year'], self.year)
+ self.assertEqual(record['advisors'], self.advisors)
+ self.assertEqual(record['descendants'], self.descendants)
+ self.assertTrue(time() - record['timestamp'] < 20)
+
+ # Insert the same record a second time to verify replacement
+ # behavior.
+ self.assertEqual(len(cache.cache), 1)
+ record = cache.cache[str(self.id)]
+ self.assertEqual(record['name'], self.name)
+ self.assertEqual(record['institution'], self.institution)
+ self.assertEqual(record['year'], self.year)
+ self.assertEqual(record['advisors'], self.advisors)
+ self.assertEqual(record['descendants'], self.descendants)
+ self.assertTrue(time() - record['timestamp'] < 20)
+
+
+if __name__ == '__main__':
+ unittest.main()
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/geneagrapher.git
More information about the debian-science-commits
mailing list