From 6f39c2d6b790001f65d28ccf75fdfdcbbdf56919 Mon Sep 17 00:00:00 2001 From: Matt Wala Date: Mon, 18 Sep 2017 19:25:15 -0500 Subject: [PATCH 01/17] Add an LRU cache to PersistentDict. This adds an optional in-memory LRU cache. To use the cache, you supply a *in_mem_cache_size* parameter to the PersistentDict. In order to properly support cache invalidation this also implements version tracking of files. This change is backwards compatible with existing persistent dict caches. --- pytools/persistent_dict.py | 244 +++++++++++++++++++++++++++++++---- test/test_persistent_dict.py | 57 ++++++++ 2 files changed, 278 insertions(+), 23 deletions(-) diff --git a/pytools/persistent_dict.py b/pytools/persistent_dict.py index 3fe6948..6a1a707 100644 --- a/pytools/persistent_dict.py +++ b/pytools/persistent_dict.py @@ -54,6 +54,15 @@ except ImportError: new_hash = sha.new +def _make_dir_recursively(dir): + try: + os.makedirs(dir) + except OSError as e: + from errno import EEXIST + if e.errno != EEXIST: + raise + + def _erase_dir(dir): from os import listdir, unlink, rmdir from os.path import join, isdir @@ -274,12 +283,127 @@ class NoSuchEntryError(KeyError): pass +class _LinkedList(object): + """The list operates on nodes of the form [value, leftptr, rightpr]. To create a + node of this form you can use `LinkedList.new_node().` + + Supports inserting at the left and deleting from an arbitrary location. + """ + + def __init__(self): + self.count = 0 + self.head = None + self.end = None + + @staticmethod + def new_node(element): + return [element, None, None] + + def __len__(self): + return self.count + + def appendleft_node(self, node): + self.count += 1 + + if self.head is None: + self.head = self.end = node + return + + self.head[1] = node + node[2] = self.head + + self.head = node + + def pop_node(self): + end = self.end + self.remove_node(end) + return end + + def remove_node(self, node): + self.count -= 1 + + if self.head is self.end: + self.head = self.end = None + return + + left = node[1] + right = node[2] + + if left is None: + self.head = right + else: + left[2] = right + + if right is None: + self.tail = left + else: + right[1] = left + + node[1] = node[2] = None + + +class _LRUCache(object): + """A mapping that keeps at most *maxsize* items with an LRU replacement policy. + """ + + def __init__(self, maxsize): + self.lru_order = _LinkedList() + self.maxsize = maxsize + self.cache = {} + + def __delitem__(self, item): + node = self.cache[item] + self.lru_order.remove_node(node) + del self.cache[item] + + def __getitem__(self, item): + node = self.cache[item] + self.lru_order.remove_node(node) + self.lru_order.appendleft_node(node) + # A linked list node contains a tuple of the form (item, value). + return node[0][1] + + def __contains__(self, item): + return item in self.cache + + def discard(self, item): + if item in self.cache: + del self[item] + + def clear(self): + self.cache.clear() + del self.lru_order + self.lru_order = _LinkedList() + + def __setitem__(self, item, value): + if self.maxsize < 1: + return + + try: + node = self.cache[item] + self.lru_order.remove_node(node) + except KeyError: + if len(self.lru_order) >= self.maxsize: + # Make room for new elements. + end_node = self.lru_order.pop_node() + del self.cache[end_node[0][0]] + + node = self.lru_order.new_node((item, value)) + self.cache[item] = node + + self.lru_order.appendleft_node(node) + return node[0] + + class PersistentDict(object): - def __init__(self, identifier, key_builder=None, container_dir=None): + def __init__(self, identifier, key_builder=None, container_dir=None, + in_mem_cache_size=None): """ :arg identifier: a file-name-compatible string identifying this dictionary :arg key_builder: a subclass of :class:`KeyBuilder` + :arg in_mem_cache_size: If not *None*, retain an in-memory cache of + *in_mem_cache_size* items. The replacement policy is LRU. .. automethod:: __getitem__ .. automethod:: __setitem__ @@ -304,20 +428,20 @@ class PersistentDict(object): ".".join(str(i) for i in sys.version_info),)) self.container_dir = container_dir + self.version_dir = join(container_dir, "version") - self._make_container_dir() + self._make_container_dirs() - def _make_container_dir(self): - # {{{ ensure container directory exists + if in_mem_cache_size is None: + in_mem_cache_size = 0 - try: - os.makedirs(self.container_dir) - except OSError as e: - from errno import EEXIST - if e.errno != EEXIST: - raise + self._use_cache = (in_mem_cache_size >= 1) + self._read_key_cache = _LRUCache(in_mem_cache_size) + self._read_contents_cache = _LRUCache(in_mem_cache_size) - # }}} + def _make_container_dirs(self): + _make_dir_recursively(self.container_dir) + _make_dir_recursively(self.version_dir) def store(self, key, value, info_files={}): hexdigest_key = self.key_builder(key) @@ -348,6 +472,8 @@ class PersistentDict(object): logger.debug("%s: cache store [key=%s]" % ( self.identifier, hexdigest_key)) + self._tick_version(hexdigest_key) + # Write key last, so that if the reader below key_path = item_dir_m.sub("key") with open(key_path, "wb") as outf: @@ -359,6 +485,39 @@ class PersistentDict(object): finally: cleanup_m.clean_up() + def _tick_version(self, hexdigest_key): + from os.path import join + version_path = join(self.version_dir, hexdigest_key) + + from six.moves.cPickle import load, dump, HIGHEST_PROTOCOL + + try: + with open(version_path, "r+b") as versionf: + version = 1 + load(versionf) + versionf.seek(0) + dump(version, versionf, protocol=HIGHEST_PROTOCOL) + + except (IOError, EOFError): + _make_dir_recursively(self.version_dir) + with open(version_path, "wb") as versionf: + dump(0, versionf, protocol=HIGHEST_PROTOCOL) + + def _read_cached(self, file_name, version, cache): + try: + value, cached_version = cache[file_name] + if version == cached_version: + return value + except KeyError: + pass + + with open(file_name, "rb") as inf: + from six.moves.cPickle import load + value = load(inf) + + cache[file_name] = (value, version) + + return value + def fetch(self, key): hexdigest_key = self.key_builder(key) @@ -377,19 +536,51 @@ class PersistentDict(object): item_dir_m = ItemDirManager(cleanup_m, item_dir) key_path = item_dir_m.sub("key") value_path = item_dir_m.sub("contents") + version_path = join(self.version_dir, hexdigest_key) - from six.moves.cPickle import load + # {{{ read version + + version = None + exc = None + + try: + with open(version_path, "rb") as versionf: + from six.moves.cPickle import load + version = load(versionf) + except IOError: + # Not a fatal error - but we won't be able to use the cache. + self._read_key_cache.discard(key_path) + self._read_contents_cache.discard(value_path) + except (OSError, EOFError) as e: + exc = e + + if version is None: + try: + # If the version doesn't exist, reset the version + # counter. + self._tick_version(hexdigest_key) + except (OSError, IOError, EOFError) as e: + exc = e + + if exc is not None: + item_dir_m.reset() + from warnings import warn + warn("pytools.persistent_dict.PersistentDict(%s) " + "encountered an invalid " + "key file for key %s. Entry deleted." + % (self.identifier, hexdigest_key)) + raise NoSuchEntryError(key) + + # }}} # {{{ load key file exc = None try: - with open(key_path, "rb") as inf: - read_key = load(inf) - except IOError as e: - exc = e - except EOFError as e: + read_key = self._read_cached(key_path, version, + self._read_key_cache) + except (OSError, IOError, EOFError) as e: exc = e if exc is not None: @@ -423,11 +614,9 @@ class PersistentDict(object): exc = None try: - with open(value_path, "rb") as inf: - read_contents = load(inf) - except IOError as e: - exc = e - except EOFError as e: + read_contents = self._read_cached(value_path, version, + self._read_contents_cache) + except (OSError, IOError, EOFError) as e: exc = e if exc is not None: @@ -457,6 +646,9 @@ class PersistentDict(object): if not isdir(item_dir): raise NoSuchEntryError(key) + key_file = join(item_dir, "key") + contents_file = join(item_dir, "contents") + cleanup_m = CleanupManager() try: try: @@ -471,6 +663,9 @@ class PersistentDict(object): finally: cleanup_m.clean_up() + self._read_key_cache.discard(key_file) + self._read_contents_cache.discard(contents_file) + def __getitem__(self, key): return self.fetch(key) @@ -487,7 +682,10 @@ class PersistentDict(object): if e.errno != errno.ENOENT: raise - self._make_container_dir() + self._make_container_dirs() + + self._read_key_cache.clear() + self._read_contents_cache.clear() # }}} diff --git a/test/test_persistent_dict.py b/test/test_persistent_dict.py index 3da5748..7904a03 100644 --- a/test/test_persistent_dict.py +++ b/test/test_persistent_dict.py @@ -37,6 +37,63 @@ def test_persistent_dict(): assert d[k] + 1 == pdict[k] +class PDictTestValue(object): + + def __init__(self, val): + self.val = val + + def __getstate__(self): + return {"val": self.val} + + def update_persistent_hash(self, key_hash, key_builder): + key_builder.rec(key_hash, self.val) + + +def test_persistent_dict_in_memory_cache(): + from pytools.persistent_dict import PersistentDict + pdict = PersistentDict("pytools-in-memory-cache-test", in_mem_cache_size=3) + pdict.clear() + + pdict[1] = PDictTestValue(1) + pdict[2] = PDictTestValue(2) + pdict[3] = PDictTestValue(3) + pdict[4] = PDictTestValue(4) + + # {{{ test LRU policy + + val1 = pdict[1] + val1.val = 0 + + assert pdict[1].val == 0 + pdict[2] + assert pdict[1].val == 0 + pdict[3] + assert pdict[1].val == 0 + pdict[2] + assert pdict[1].val == 0 + pdict[4] + assert pdict[1].val == 1 + + # }}} + + # {{{ test cache invalidation by versioning + + assert pdict[1].val == 1 + pdict2 = PersistentDict("pytools-in-memory-cache-test") + pdict2[1] = PDictTestValue(5) + assert pdict[1].val == 5 + + # }}} + + # {{{ test cache invalidation by deletion + + del pdict2[1] + pdict2[1] = PDictTestValue(10) + assert pdict[1].val == 10 + + # }}} + + if __name__ == "__main__": if len(sys.argv) > 1: exec(sys.argv[1]) -- GitLab From ac1bd1b2e462a5cd59dcc8fc469df85b149ce8ab Mon Sep 17 00:00:00 2001 From: Matt Wala Date: Mon, 18 Sep 2017 19:31:07 -0500 Subject: [PATCH 02/17] Bump version. --- pytools/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pytools/version.py b/pytools/version.py index c66785c..a63be76 100644 --- a/pytools/version.py +++ b/pytools/version.py @@ -1,3 +1,3 @@ -VERSION = (2017, 4) +VERSION = (2017, 5) VERSION_STATUS = "" VERSION_TEXT = ".".join(str(x) for x in VERSION) + VERSION_STATUS -- GitLab From fb07563671088a7a5eb715fed7485066336a370c Mon Sep 17 00:00:00 2001 From: Matt Wala Date: Mon, 18 Sep 2017 20:35:21 -0500 Subject: [PATCH 03/17] Disable automagic version ticking on read. Add a note that skipping the version read is for backwards compatibility. --- pytools/persistent_dict.py | 38 +++++++++++++++++++------------------- 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/pytools/persistent_dict.py b/pytools/persistent_dict.py index 6a1a707..11a6cef 100644 --- a/pytools/persistent_dict.py +++ b/pytools/persistent_dict.py @@ -502,19 +502,21 @@ class PersistentDict(object): with open(version_path, "wb") as versionf: dump(0, versionf, protocol=HIGHEST_PROTOCOL) - def _read_cached(self, file_name, version, cache): - try: - value, cached_version = cache[file_name] - if version == cached_version: - return value - except KeyError: - pass + def _read_cached(self, file_name, version, cache, use_cache): + if use_cache: + try: + value, cached_version = cache[file_name] + if version == cached_version: + return value + except KeyError: + pass with open(file_name, "rb") as inf: from six.moves.cPickle import load value = load(inf) - cache[file_name] = (value, version) + if use_cache: + cache[file_name] = (value, version) return value @@ -542,26 +544,24 @@ class PersistentDict(object): version = None exc = None + use_cache = True try: with open(version_path, "rb") as versionf: from six.moves.cPickle import load version = load(versionf) except IOError: - # Not a fatal error - but we won't be able to use the cache. + # Not a fatal error - but we won't be able to use the + # LRU cache. + # + # This allows us to still read cache directory hiearchies + # created by previous version (< 2017.5). self._read_key_cache.discard(key_path) self._read_contents_cache.discard(value_path) + use_cache = False except (OSError, EOFError) as e: exc = e - if version is None: - try: - # If the version doesn't exist, reset the version - # counter. - self._tick_version(hexdigest_key) - except (OSError, IOError, EOFError) as e: - exc = e - if exc is not None: item_dir_m.reset() from warnings import warn @@ -579,7 +579,7 @@ class PersistentDict(object): try: read_key = self._read_cached(key_path, version, - self._read_key_cache) + self._read_key_cache, use_cache) except (OSError, IOError, EOFError) as e: exc = e @@ -615,7 +615,7 @@ class PersistentDict(object): try: read_contents = self._read_cached(value_path, version, - self._read_contents_cache) + self._read_contents_cache, use_cache) except (OSError, IOError, EOFError) as e: exc = e -- GitLab From bbc5bbc2146f1ddf57abb44360f34ee4a98bc4a2 Mon Sep 17 00:00:00 2001 From: Matt Wala Date: Wed, 20 Sep 2017 10:53:47 -0500 Subject: [PATCH 04/17] [ci skip] Make LRU test check object identity. --- test/test_persistent_dict.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/test/test_persistent_dict.py b/test/test_persistent_dict.py index 7904a03..2627232 100644 --- a/test/test_persistent_dict.py +++ b/test/test_persistent_dict.py @@ -62,17 +62,16 @@ def test_persistent_dict_in_memory_cache(): # {{{ test LRU policy val1 = pdict[1] - val1.val = 0 - assert pdict[1].val == 0 + assert pdict[1] is val1 pdict[2] - assert pdict[1].val == 0 + assert pdict[1] is val1 pdict[3] - assert pdict[1].val == 0 + assert pdict[1] is val1 pdict[2] - assert pdict[1].val == 0 + assert pdict[1] is val1 pdict[4] - assert pdict[1].val == 1 + assert pdict[1] is not val1 # }}} -- GitLab From deba49f3375afed3366400d583e9251a24087db7 Mon Sep 17 00:00:00 2001 From: Matt Wala Date: Sat, 23 Sep 2017 12:26:09 -0500 Subject: [PATCH 05/17] Add more tests for PersistentDict and WriteOncePersistentDict. --- test/test_persistent_dict.py | 260 +++++++++++++++++++++++++++-------- 1 file changed, 200 insertions(+), 60 deletions(-) diff --git a/test/test_persistent_dict.py b/test/test_persistent_dict.py index 7904a03..775d3b6 100644 --- a/test/test_persistent_dict.py +++ b/test/test_persistent_dict.py @@ -2,96 +2,236 @@ from __future__ import division, with_statement, absolute_import import pytest # noqa import sys # noqa +import tempfile +import shutil +import warnings + from six.moves import range from six.moves import zip +from pytools.persistent_dict import (PersistentDict, + WriteOncePersistentDict, + NoSuchEntryError, + ReadOnlyEntryError) -def test_persistent_dict(): - from pytools.persistent_dict import PersistentDict - pdict = PersistentDict("pytools-test") - pdict.clear() - from random import randrange +# {{{ type for testing - def rand_str(n=20): - return "".join( - chr(65+randrange(26)) - for i in range(n)) +class PDictTestingKeyOrValue(object): - keys = [(randrange(2000), rand_str(), None) for i in range(20)] - values = [randrange(2000) for i in range(20)] + def __init__(self, val, hash_key=None): + self.val = val + if hash_key is None: + hash_key = val + self.hash_key = hash_key - d = dict(list(zip(keys, values))) + def __getstate__(self): + return {"val": self.val, "hash_key": self.hash_key} - for k, v in zip(keys, values): - pdict[k] = v - pdict.store(k, v, info_files={"hey": str(v)}) + def __eq__(self, other): + return self.val == other.val - for k, v in list(d.items()): - assert d[k] == pdict[k] + def __ne__(self, other): + return not self.__eq__(other) - for k, v in zip(keys, values): - pdict.store(k, v+1, info_files={"hey": str(v)}) + def update_persistent_hash(self, key_hash, key_builder): + key_builder.rec(key_hash, self.hash_key) - for k, v in list(d.items()): - assert d[k] + 1 == pdict[k] +# }}} -class PDictTestValue(object): +def test_persistent_dict_storage_and_lookup(): + try: + tmpdir = tempfile.mkdtemp() + pdict = PersistentDict(tmpdir) - def __init__(self, val): - self.val = val + from random import randrange - def __getstate__(self): - return {"val": self.val} + def rand_str(n=20): + return "".join( + chr(65+randrange(26)) + for i in range(n)) - def update_persistent_hash(self, key_hash, key_builder): - key_builder.rec(key_hash, self.val) + keys = [(randrange(2000), rand_str(), None) for i in range(20)] + values = [randrange(2000) for i in range(20)] + + d = dict(list(zip(keys, values))) + + # {{{ check lookup + + for k, v in zip(keys, values): + pdict[k] = v + + for k, v in d.items(): + assert d[k] == pdict[k] + + # }}} + + # {{{ check updating + + for k, v in zip(keys, values): + pdict[k] = v + 1 + + for k, v in d.items(): + assert d[k] + 1 == pdict[k] + + # }}} + + # {{{ check not found + + with pytest.raises(NoSuchEntryError): + pdict[3000] + + # }}} + + finally: + shutil.rmtree(tmpdir) + + +def test_persistent_dict_deletion(): + try: + tmpdir = tempfile.mkdtemp() + pdict = PersistentDict(tmpdir) + + pdict[0] = 0 + del pdict[0] + + with pytest.raises(NoSuchEntryError): + pdict[0] + + with pytest.raises(NoSuchEntryError): + del pdict[1] + + finally: + shutil.rmtree(tmpdir) + + +def test_persistent_dict_synchronization(): + try: + tmpdir = tempfile.mkdtemp() + pdict1 = PersistentDict(tmpdir) + pdict2 = PersistentDict(tmpdir) + + # check lookup + pdict1[0] = 1 + assert pdict2[0] == 1 + + # check updating + pdict1[0] = 2 + assert pdict2[0] == 2 + + # check deletion + del pdict1[0] + with pytest.raises(NoSuchEntryError): + pdict2[0] + + finally: + shutil.rmtree(tmpdir) + + +def test_persistent_dict_cache_collisions(): + try: + tmpdir = tempfile.mkdtemp() + pdict = PersistentDict(tmpdir) + + key1 = PDictTestingKeyOrValue(1, hash_key=0) + key2 = PDictTestingKeyOrValue(2, hash_key=0) + + pdict[key1] = 1 + + # Suppress pdict collision warnings. + with warnings.catch_warnings(): + # check lookup + with pytest.raises(NoSuchEntryError): + pdict[key2] + + # check deletion + with pytest.raises(NoSuchEntryError): + del pdict[key2] + + finally: + shutil.rmtree(tmpdir) + + +def test_write_once_persistent_dict_storage_and_lookup(): + try: + tmpdir = tempfile.mkdtemp() + pdict = WriteOncePersistentDict(tmpdir) + + # check lookup + pdict[0] = 1 + assert pdict[0] == 1 + + # check updating + with pytest.raises(ReadOnlyEntryError): + pdict[0] = 2 + + finally: + shutil.rmtree(tmpdir) + + +def test_write_once_persistent_dict_lru_policy(): + try: + tmpdir = tempfile.mkdtemp() + pdict = WriteOncePersistentDict(tmpdir, in_mem_cache_size=3) + + pdict[1] = PDictTestingKeyOrValue(1) + pdict[2] = PDictTestingKeyOrValue(2) + pdict[3] = PDictTestingKeyOrValue(3) + pdict[4] = PDictTestingKeyOrValue(4) + + val1 = pdict[1] + assert pdict[1] is val1 + pdict[2] + assert pdict[1] is val1 + pdict[3] + assert pdict[1] is val1 + pdict[2] + assert pdict[1] is val1 + pdict[4] + assert pdict[1] is not val1 -def test_persistent_dict_in_memory_cache(): - from pytools.persistent_dict import PersistentDict - pdict = PersistentDict("pytools-in-memory-cache-test", in_mem_cache_size=3) - pdict.clear() + finally: + shutil.rmtree(tmpdir) - pdict[1] = PDictTestValue(1) - pdict[2] = PDictTestValue(2) - pdict[3] = PDictTestValue(3) - pdict[4] = PDictTestValue(4) - # {{{ test LRU policy +def test_write_once_persistent_dict_synchronization(): + try: + tmpdir = tempfile.mkdtemp() + pdict1 = WriteOncePersistentDict(tmpdir) + pdict2 = WriteOncePersistentDict(tmpdir) - val1 = pdict[1] - val1.val = 0 + # check lookup + pdict1[1] = 0 + assert pdict2[1] == 0 - assert pdict[1].val == 0 - pdict[2] - assert pdict[1].val == 0 - pdict[3] - assert pdict[1].val == 0 - pdict[2] - assert pdict[1].val == 0 - pdict[4] - assert pdict[1].val == 1 + # check updating + with pytest.raises(ReadOnlyEntryError): + pdict2[1] = 1 - # }}} + finally: + shutil.rmtree(tmpdir) - # {{{ test cache invalidation by versioning - assert pdict[1].val == 1 - pdict2 = PersistentDict("pytools-in-memory-cache-test") - pdict2[1] = PDictTestValue(5) - assert pdict[1].val == 5 +def test_write_once_persistent_dict_cache_collisions(): + try: + tmpdir = tempfile.mkdtemp() + pdict = WriteOncePersistentDict(tmpdir) - # }}} + key1 = PDictTestingKeyOrValue(1, hash_key=0) + key2 = PDictTestingKeyOrValue(2, hash_key=0) - # {{{ test cache invalidation by deletion + pdict[key1] = 1 - del pdict2[1] - pdict2[1] = PDictTestValue(10) - assert pdict[1].val == 10 + # Suppress pdict collision warnings. + with warnings.catch_warnings(): + # check lookup + with pytest.raises(NoSuchEntryError): + pdict[key2] - # }}} + finally: + shutil.rmtree(tmpdir) if __name__ == "__main__": -- GitLab From 8adbccb6f36a62f40e77428b1c1e1e6cc2c5454d Mon Sep 17 00:00:00 2001 From: Matt Wala Date: Sat, 23 Sep 2017 16:36:07 -0500 Subject: [PATCH 06/17] Improve the test suite a bit. --- test/test_persistent_dict.py | 83 +++++++++++++++++++++++++++--------- 1 file changed, 63 insertions(+), 20 deletions(-) diff --git a/test/test_persistent_dict.py b/test/test_persistent_dict.py index 775d3b6..b990c7e 100644 --- a/test/test_persistent_dict.py +++ b/test/test_persistent_dict.py @@ -4,15 +4,13 @@ import pytest # noqa import sys # noqa import tempfile import shutil -import warnings from six.moves import range from six.moves import zip -from pytools.persistent_dict import (PersistentDict, - WriteOncePersistentDict, - NoSuchEntryError, - ReadOnlyEntryError) +from pytools.persistent_dict import ( + PersistentDict, WriteOncePersistentDict, NoSuchEntryError, + ReadOnlyEntryError) # {{{ type for testing @@ -37,13 +35,19 @@ class PDictTestingKeyOrValue(object): def update_persistent_hash(self, key_hash, key_builder): key_builder.rec(key_hash, self.hash_key) + def __repr__(self): + return "PDictTestingKeyOrValue(val=%r,hash_key=%r)" % ( + (self.val, self.hash_key)) + + __str__ = __repr__ + # }}} def test_persistent_dict_storage_and_lookup(): try: tmpdir = tempfile.mkdtemp() - pdict = PersistentDict(tmpdir) + pdict = PersistentDict("pytools-test", container_dir=tmpdir) from random import randrange @@ -91,7 +95,7 @@ def test_persistent_dict_storage_and_lookup(): def test_persistent_dict_deletion(): try: tmpdir = tempfile.mkdtemp() - pdict = PersistentDict(tmpdir) + pdict = PersistentDict("pytools-test", container_dir=tmpdir) pdict[0] = 0 del pdict[0] @@ -109,8 +113,8 @@ def test_persistent_dict_deletion(): def test_persistent_dict_synchronization(): try: tmpdir = tempfile.mkdtemp() - pdict1 = PersistentDict(tmpdir) - pdict2 = PersistentDict(tmpdir) + pdict1 = PersistentDict("pytools-test", container_dir=tmpdir) + pdict2 = PersistentDict("pytools-test", container_dir=tmpdir) # check lookup pdict1[0] = 1 @@ -139,16 +143,35 @@ def test_persistent_dict_cache_collisions(): pdict[key1] = 1 - # Suppress pdict collision warnings. - with warnings.catch_warnings(): - # check lookup + # check lookup + with pytest.warns(UserWarning): with pytest.raises(NoSuchEntryError): pdict[key2] - # check deletion + # check deletion + with pytest.warns(UserWarning): with pytest.raises(NoSuchEntryError): del pdict[key2] + # check presence after deletion + pdict[key1] + + finally: + shutil.rmtree(tmpdir) + + +def test_persistent_dict_clear(): + try: + tmpdir = tempfile.mkdtemp() + pdict = PersistentDict("pytools-test", container_dir=tmpdir) + + pdict[0] = 1 + pdict[0] + pdict.clear() + + with pytest.raises(NoSuchEntryError): + pdict[0] + finally: shutil.rmtree(tmpdir) @@ -156,7 +179,7 @@ def test_persistent_dict_cache_collisions(): def test_write_once_persistent_dict_storage_and_lookup(): try: tmpdir = tempfile.mkdtemp() - pdict = WriteOncePersistentDict(tmpdir) + pdict = WriteOncePersistentDict("pytools-test", container_dir=tmpdir) # check lookup pdict[0] = 1 @@ -173,7 +196,8 @@ def test_write_once_persistent_dict_storage_and_lookup(): def test_write_once_persistent_dict_lru_policy(): try: tmpdir = tempfile.mkdtemp() - pdict = WriteOncePersistentDict(tmpdir, in_mem_cache_size=3) + pdict = WriteOncePersistentDict( + "pytools-test", container_dir=tmpdir, in_mem_cache_size=3) pdict[1] = PDictTestingKeyOrValue(1) pdict[2] = PDictTestingKeyOrValue(2) @@ -185,10 +209,15 @@ def test_write_once_persistent_dict_lru_policy(): assert pdict[1] is val1 pdict[2] assert pdict[1] is val1 + pdict[2] pdict[3] assert pdict[1] is val1 pdict[2] + pdict[3] + pdict[2] assert pdict[1] is val1 + pdict[2] + pdict[3] pdict[4] assert pdict[1] is not val1 @@ -199,8 +228,8 @@ def test_write_once_persistent_dict_lru_policy(): def test_write_once_persistent_dict_synchronization(): try: tmpdir = tempfile.mkdtemp() - pdict1 = WriteOncePersistentDict(tmpdir) - pdict2 = WriteOncePersistentDict(tmpdir) + pdict1 = WriteOncePersistentDict("pytools-test", container_dir=tmpdir) + pdict2 = WriteOncePersistentDict("pytools-test", container_dir=tmpdir) # check lookup pdict1[1] = 0 @@ -217,15 +246,14 @@ def test_write_once_persistent_dict_synchronization(): def test_write_once_persistent_dict_cache_collisions(): try: tmpdir = tempfile.mkdtemp() - pdict = WriteOncePersistentDict(tmpdir) + pdict = WriteOncePersistentDict("pytools-test", container_dir=tmpdir) key1 = PDictTestingKeyOrValue(1, hash_key=0) key2 = PDictTestingKeyOrValue(2, hash_key=0) pdict[key1] = 1 - # Suppress pdict collision warnings. - with warnings.catch_warnings(): + with pytest.warns(UserWarning): # check lookup with pytest.raises(NoSuchEntryError): pdict[key2] @@ -234,6 +262,21 @@ def test_write_once_persistent_dict_cache_collisions(): shutil.rmtree(tmpdir) +def test_write_once_persistent_dict_clear(): + try: + tmpdir = tempfile.mkdtemp() + pdict = WriteOncePersistentDict("pytools-test", container_dir=tmpdir) + + pdict[0] = 1 + pdict[0] + pdict.clear() + + with pytest.raises(NoSuchEntryError): + pdict[0] + finally: + shutil.rmtree(tmpdir) + + if __name__ == "__main__": if len(sys.argv) > 1: exec(sys.argv[1]) -- GitLab From 73d42e3dd422c3d8e4dc795b0da21cd64c2fa7a8 Mon Sep 17 00:00:00 2001 From: Matt Wala Date: Sat, 23 Sep 2017 16:39:16 -0500 Subject: [PATCH 07/17] Update PersistentDict and create WriteOncePersistentDict. Major changes to PersistentDict: * Uses a per-file lock instead of a per-container-dir lock. * Removed info files from store(). * Checks for cache collision on delete. * Added more tests. This change also implements WriteOncePersistentDict, which has a write-once policy. WriteOncePersistentDict uses a writer lock but allows for unlocked reads, which makes reads from disk faster. It also uses an in-memory LRU cache to speed up accesses. --- pytools/persistent_dict.py | 578 ++++++++++++++++++++++--------------- 1 file changed, 338 insertions(+), 240 deletions(-) diff --git a/pytools/persistent_dict.py b/pytools/persistent_dict.py index 11a6cef..b728c67 100644 --- a/pytools/persistent_dict.py +++ b/pytools/persistent_dict.py @@ -2,7 +2,10 @@ from __future__ import division, with_statement, absolute_import -__copyright__ = "Copyright (C) 2011,2014 Andreas Kloeckner" +__copyright__ = """ +Copyright (C) 2011,2014 Andreas Kloeckner +Copyright (C) 2017 Matt Wala +""" __license__ = """ Permission is hereby granted, free of charge, to any person obtaining a copy @@ -28,9 +31,11 @@ import logging logger = logging.getLogger(__name__) +import collections import six import sys import os +import shutil import errno __doc__ = """ @@ -41,8 +46,11 @@ This module contains functionality that allows hashing with keys that remain valid across interpreter invocations, unlike Python's built-in hashes. .. autoexception:: NoSuchEntryError +.. autoexception:: ReadOnlyEntryError + .. autoclass:: KeyBuilder .. autoclass:: PersistentDict +.. autoclass:: WriteOncePersistentDict """ try: @@ -63,19 +71,6 @@ def _make_dir_recursively(dir): raise -def _erase_dir(dir): - from os import listdir, unlink, rmdir - from os.path import join, isdir - for name in listdir(dir): - sub_name = join(dir, name) - if isdir(sub_name): - _erase_dir(sub_name) - else: - unlink(sub_name) - - rmdir(dir) - - def update_checksum(checksum, obj): if isinstance(obj, six.text_type): checksum.update(obj.encode("utf8")) @@ -106,34 +101,33 @@ class CleanupManager(CleanupBase): class LockManager(CleanupBase): - def __init__(self, cleanup_m, container_dir): - if container_dir is not None: - self.lock_file = os.path.join(container_dir, "lock") + def __init__(self, cleanup_m, lock_file): + self.lock_file = lock_file - attempts = 0 - while True: - try: - self.fd = os.open(self.lock_file, - os.O_CREAT | os.O_WRONLY | os.O_EXCL) - break - except OSError: - pass + attempts = 0 + while True: + try: + self.fd = os.open(self.lock_file, + os.O_CREAT | os.O_WRONLY | os.O_EXCL) + break + except OSError: + pass - from time import sleep - sleep(1) + from time import sleep + sleep(1) - attempts += 1 + attempts += 1 - if attempts > 10: - from warnings import warn - warn("could not obtain lock--delete '%s' if necessary" - % self.lock_file) - if attempts > 3 * 60: - raise RuntimeError("waited more than three minutes " - "on the lock file '%s'" - "--something is wrong" % self.lock_file) + if attempts > 10: + from warnings import warn + warn("could not obtain lock--delete '%s' if necessary" + % self.lock_file) + if attempts > 3 * 60: + raise RuntimeError("waited more than three minutes " + "on the lock file '%s'" + "--something is wrong" % self.lock_file) - cleanup_m.register(self) + cleanup_m.register(self) def clean_up(self): import os @@ -145,34 +139,26 @@ class LockManager(CleanupBase): class ItemDirManager(CleanupBase): - def __init__(self, cleanup_m, path): - from os import mkdir - import errno + def __init__(self, cleanup_m, path, delete_on_error): + from os.path import isdir + self.existed = isdir(path) self.path = path - try: - mkdir(self.path) - except OSError as e: - if e.errno != errno.EEXIST: - raise - self.existed = True - else: - cleanup_m.register(self) - self.existed = False + self.delete_on_error = delete_on_error - def sub(self, n): - from os.path import join - return join(self.path, n) + cleanup_m.register(self) def reset(self): try: - _erase_dir(self.path) + shutil.rmtree(self.path) except OSError as e: if e.errno != errno.ENOENT: raise + def mkdir(self): + from os import mkdir try: - os.mkdir(self.path) + mkdir(self.path) except OSError as e: if e.errno != errno.EEXIST: raise @@ -181,7 +167,8 @@ class ItemDirManager(CleanupBase): pass def error_clean_up(self): - _erase_dir(self.path) + if self.delete_on_error: + self.reset() # }}} @@ -277,11 +264,7 @@ class KeyBuilder(object): # }}} -# {{{ top-level - -class NoSuchEntryError(KeyError): - pass - +# {{{ lru cache class _LinkedList(object): """The list operates on nodes of the form [value, leftptr, rightpr]. To create a @@ -323,6 +306,7 @@ class _LinkedList(object): self.count -= 1 if self.head is self.end: + assert node is self.head self.head = self.end = None return @@ -335,14 +319,14 @@ class _LinkedList(object): left[2] = right if right is None: - self.tail = left + self.end = left else: right[1] = left node[1] = node[2] = None -class _LRUCache(object): +class _LRUCache(collections.MutableMapping): """A mapping that keeps at most *maxsize* items with an LRU replacement policy. """ @@ -366,9 +350,11 @@ class _LRUCache(object): def __contains__(self, item): return item in self.cache - def discard(self, item): - if item in self.cache: - del self[item] + def __iter__(self): + return iter(self.cache) + + def __len__(self): + return len(self.cache) def clear(self): self.cache.clear() @@ -392,25 +378,28 @@ class _LRUCache(object): self.cache[item] = node self.lru_order.appendleft_node(node) + + assert len(self.cache) == len(self.lru_order), \ + (len(self.cache), len(self.lru_order)) + assert len(self.lru_order) <= self.maxsize + return node[0] +# }}} -class PersistentDict(object): - def __init__(self, identifier, key_builder=None, container_dir=None, - in_mem_cache_size=None): - """ - :arg identifier: a file-name-compatible string identifying this - dictionary - :arg key_builder: a subclass of :class:`KeyBuilder` - :arg in_mem_cache_size: If not *None*, retain an in-memory cache of - *in_mem_cache_size* items. The replacement policy is LRU. - .. automethod:: __getitem__ - .. automethod:: __setitem__ - .. automethod:: __delitem__ - .. automethod:: clear - """ +# {{{ top-level +class NoSuchEntryError(KeyError): + pass + + +class ReadOnlyEntryError(KeyError): + pass + + +class _PersistentDictBase(object): + def __init__(self, identifier, key_builder=None, container_dir=None): self.identifier = identifier if key_builder is None: @@ -428,162 +417,292 @@ class PersistentDict(object): ".".join(str(i) for i in sys.version_info),)) self.container_dir = container_dir - self.version_dir = join(container_dir, "version") - self._make_container_dirs() + self._make_container_dir() + + def store(self, key, value): + raise NotImplementedError() + + def fetch(self, key): + raise NotImplementedError() + + def _read(self, path): + from six.moves.cPickle import load + with open(path, "rb") as inf: + return load(inf) + + def _write(self, path, value): + from six.moves.cPickle import dump, HIGHEST_PROTOCOL + with open(path, "wb") as outf: + dump(value, outf, protocol=HIGHEST_PROTOCOL) + + def _item_dir(self, hexdigest_key): + from os.path import join + return join(self.container_dir, hexdigest_key) - if in_mem_cache_size is None: - in_mem_cache_size = 0 + def _key_file(self, hexdigest_key): + from os.path import join + return join(self._item_dir(hexdigest_key), "key") - self._use_cache = (in_mem_cache_size >= 1) - self._read_key_cache = _LRUCache(in_mem_cache_size) - self._read_contents_cache = _LRUCache(in_mem_cache_size) + def _contents_file(self, hexdigest_key): + from os.path import join + return join(self._item_dir(hexdigest_key), "contents") - def _make_container_dirs(self): + def _lock_file(self, hexdigest_key): + from os.path import join + return join(self.container_dir, str(hexdigest_key) + ".lock") + + def _make_container_dir(self): _make_dir_recursively(self.container_dir) - _make_dir_recursively(self.version_dir) - def store(self, key, value, info_files={}): - hexdigest_key = self.key_builder(key) + def _collision_check(self, key, stored_key): + if stored_key != key: + # Key collision, oh well. + from warnings import warn + warn("%s: key collision in cache at '%s' -- these are " + "sufficiently unlikely that they're often " + "indicative of a broken implementation " + "of equality comparison" + % (self.identifier, self.container_dir)) + # This is here so we can debug the equality comparison + stored_key == key + raise NoSuchEntryError(key) - cleanup_m = CleanupManager() + def __getitem__(self, key): + return self.fetch(key) + + def __setitem__(self, key, value): + self.store(key, value) + + def __delitem__(self, key): + raise NotImplementedError() + + def clear(self): try: - try: - LockManager(cleanup_m, self.container_dir) + shutil.rmtree(self.container_dir) + except OSError as e: + if e.errno != errno.ENOENT: + raise - from os.path import join - item_dir_m = ItemDirManager(cleanup_m, - join(self.container_dir, hexdigest_key)) + self._make_container_dir() - if item_dir_m.existed: - item_dir_m.reset() - for info_name, info_value in six.iteritems(info_files): - info_path = item_dir_m.sub("info_"+info_name) +class WriteOncePersistentDict(_PersistentDictBase): + def __init__(self, identifier, key_builder=None, container_dir=None, + in_mem_cache_size=256): + """ + :arg identifier: a file-name-compatible string identifying this + dictionary + :arg key_builder: a subclass of :class:`KeyBuilder` + :arg in_mem_cache_size: retain an in-memory cache of up to + *in_mem_cache_size* items + + .. automethod:: __getitem__ + .. automethod:: __setitem__ + .. automethod:: clear + """ + _PersistentDictBase.__init__(self, identifier, key_builder, container_dir) + self._cache = _LRUCache(in_mem_cache_size) - with open(info_path, "wt") as outf: - outf.write(info_value) + def _spin_until_removed(self, lock_file): + from os.path import exists - from six.moves.cPickle import dump, HIGHEST_PROTOCOL - value_path = item_dir_m.sub("contents") - with open(value_path, "wb") as outf: - dump(value, outf, protocol=HIGHEST_PROTOCOL) + attempts = 0 + while exists(lock_file): + from time import sleep + sleep(1) - logger.debug("%s: cache store [key=%s]" % ( - self.identifier, hexdigest_key)) + attempts += 1 - self._tick_version(hexdigest_key) + if attempts > 10: + from warnings import warn + warn("waiting until unlocked--delete '%s' if necessary" + % lock_file) - # Write key last, so that if the reader below - key_path = item_dir_m.sub("key") - with open(key_path, "wb") as outf: - dump(key, outf, protocol=HIGHEST_PROTOCOL) + if attempts > 3 * 60: + raise RuntimeError("waited more than three minutes " + "on the lock file '%s'" + "--something is wrong" % lock_file) + def store(self, key, value): + hexdigest_key = self.key_builder(key) + + cleanup_m = CleanupManager() + try: + try: + LockManager(cleanup_m, self._lock_file(hexdigest_key)) + item_dir_m = ItemDirManager( + cleanup_m, self._item_dir(hexdigest_key), + delete_on_error=False) + + if item_dir_m.existed: + raise ReadOnlyEntryError(key) + + item_dir_m.mkdir() + + key_path = self._key_file(hexdigest_key) + value_path = self._contents_file(hexdigest_key) + + self._write(value_path, value) + self._write(key_path, key) + + logger.debug("%s: disk cache store [key=%s]" % ( + self.identifier, hexdigest_key)) except: cleanup_m.error_clean_up() raise finally: cleanup_m.clean_up() - def _tick_version(self, hexdigest_key): - from os.path import join - version_path = join(self.version_dir, hexdigest_key) + def fetch(self, key): + hexdigest_key = self.key_builder(key) - from six.moves.cPickle import load, dump, HIGHEST_PROTOCOL + # {{{ in memory cache try: - with open(version_path, "r+b") as versionf: - version = 1 + load(versionf) - versionf.seek(0) - dump(version, versionf, protocol=HIGHEST_PROTOCOL) - - except (IOError, EOFError): - _make_dir_recursively(self.version_dir) - with open(version_path, "wb") as versionf: - dump(0, versionf, protocol=HIGHEST_PROTOCOL) - - def _read_cached(self, file_name, version, cache, use_cache): - if use_cache: - try: - value, cached_version = cache[file_name] - if version == cached_version: - return value - except KeyError: - pass - - with open(file_name, "rb") as inf: - from six.moves.cPickle import load - value = load(inf) + stored_key, stored_value = self._cache[hexdigest_key] + except KeyError: + pass + else: + logger.debug("%s: in mem cache hit [key=%s]" % ( + self.identifier, hexdigest_key)) + self._collision_check(key, stored_key) + return stored_value - if use_cache: - cache[file_name] = (value, version) + # }}} - return value + # {{{ check path exists and is unlocked - def fetch(self, key): - hexdigest_key = self.key_builder(key) + item_dir = self._item_dir(hexdigest_key) - from os.path import join, isdir - item_dir = join(self.container_dir, hexdigest_key) + from os.path import isdir if not isdir(item_dir): - logger.debug("%s: cache miss [key=%s]" % ( + logger.debug("%s: disk cache miss [key=%s]" % ( + self.identifier, hexdigest_key)) + raise NoSuchEntryError(key) + + lock_file = self._lock_file(hexdigest_key) + self._spin_until_removed(lock_file) + + # }}} + + key_file = self._key_file(hexdigest_key) + contents_file = self._contents_file(hexdigest_key) + + # Note: Unlike PersistentDict, this doesn't autodelete invalid entires, + # because that would lead to a race condition. + + # {{{ load key file and do equality check + + try: + read_key = self._read(key_file) + except: + from warnings import warn + warn("pytools.persistent_dict.WriteOncePersistentDict(%s) " + "encountered an invalid " + "key file for key %s. Remove the directory " + "'%s' if necessary." + % (self.identifier, hexdigest_key, item_dir)) + raise NoSuchEntryError(key) + + self._collision_check(key, read_key) + + # }}} + + logger.debug("%s: disk cache hit [key=%s]" % ( self.identifier, hexdigest_key)) + + # {{{ load contents + + try: + read_contents = self._read(contents_file) + except: + warn("pytools.persistent_dict.WriteOncePersistentDict(%s) " + "encountered an invalid " + "key file for key %s. Remove the directory " + "'%s' if necessary." + % (self.identifier, hexdigest_key, item_dir)) raise NoSuchEntryError(key) + # }}} + + self._cache[hexdigest_key] = (key, read_contents) + return read_contents + + def clear(self): + _PersistentDictBase.clear(self) + self._cache.clear() + + +class PersistentDict(_PersistentDictBase): + def __init__(self, identifier, key_builder=None, container_dir=None): + """ + :arg identifier: a file-name-compatible string identifying this + dictionary + :arg key_builder: a subclass of :class:`KeyBuilder` + + .. automethod:: __getitem__ + .. automethod:: __setitem__ + .. automethod:: __delitem__ + .. automethod:: clear + """ + _PersistentDictBase.__init__(self, identifier, key_builder, container_dir) + + def store(self, key, value): + hexdigest_key = self.key_builder(key) + cleanup_m = CleanupManager() try: try: - LockManager(cleanup_m, self.container_dir) + LockManager(cleanup_m, self._lock_file(hexdigest_key)) + item_dir_m = ItemDirManager( + cleanup_m, self._item_dir(hexdigest_key), + delete_on_error=True) - item_dir_m = ItemDirManager(cleanup_m, item_dir) - key_path = item_dir_m.sub("key") - value_path = item_dir_m.sub("contents") - version_path = join(self.version_dir, hexdigest_key) + if item_dir_m.existed: + item_dir_m.reset() - # {{{ read version + item_dir_m.mkdir() - version = None - exc = None - use_cache = True + key_path = self._key_file(hexdigest_key) + value_path = self._contents_file(hexdigest_key) - try: - with open(version_path, "rb") as versionf: - from six.moves.cPickle import load - version = load(versionf) - except IOError: - # Not a fatal error - but we won't be able to use the - # LRU cache. - # - # This allows us to still read cache directory hiearchies - # created by previous version (< 2017.5). - self._read_key_cache.discard(key_path) - self._read_contents_cache.discard(value_path) - use_cache = False - except (OSError, EOFError) as e: - exc = e - - if exc is not None: - item_dir_m.reset() - from warnings import warn - warn("pytools.persistent_dict.PersistentDict(%s) " - "encountered an invalid " - "key file for key %s. Entry deleted." - % (self.identifier, hexdigest_key)) - raise NoSuchEntryError(key) + self._write(value_path, value) + self._write(key_path, key) - # }}} + logger.debug("%s: cache store [key=%s]" % ( + self.identifier, hexdigest_key)) + except: + cleanup_m.error_clean_up() + raise + finally: + cleanup_m.clean_up() + + def fetch(self, key): + hexdigest_key = self.key_builder(key) + item_dir = self._item_dir(hexdigest_key) + + from os.path import isdir + if not isdir(item_dir): + logger.debug("%s: cache miss [key=%s]" % ( + self.identifier, hexdigest_key)) + raise NoSuchEntryError(key) - # {{{ load key file + cleanup_m = CleanupManager() + try: + try: + LockManager(cleanup_m, self._lock_file(hexdigest_key)) + item_dir_m = ItemDirManager( + cleanup_m, item_dir, delete_on_error=False) - exc = None + key_path = self._key_file(hexdigest_key) + value_path = self._contents_file(hexdigest_key) - try: - read_key = self._read_cached(key_path, version, - self._read_key_cache, use_cache) - except (OSError, IOError, EOFError) as e: - exc = e + # {{{ load key - if exc is not None: + try: + stored_key = self._read(key_path) + except: item_dir_m.reset() from warnings import warn warn("pytools.persistent_dict.PersistentDict(%s) " @@ -592,34 +711,18 @@ class PersistentDict(object): % (self.identifier, hexdigest_key)) raise NoSuchEntryError(key) - # }}} + self._collision_check(key, stored_key) - if read_key != key: - # Key collision, oh well. - from warnings import warn - warn("%s: key collision in cache at '%s' -- these are " - "sufficiently unlikely that they're often " - "indicative of a broken implementation " - "of equality comparison" - % (self.identifier, self.container_dir)) - # This is here so we can debug the equality comparison - read_key == key - raise NoSuchEntryError(key) + # }}} logger.debug("%s: cache hit [key=%s]" % ( - self.identifier, hexdigest_key)) + self.identifier, hexdigest_key)) # {{{ load value - exc = None - try: - read_contents = self._read_cached(value_path, version, - self._read_contents_cache, use_cache) - except (OSError, IOError, EOFError) as e: - exc = e - - if exc is not None: + read_contents = self._read(value_path) + except: item_dir_m.reset() from warnings import warn warn("pytools.persistent_dict.PersistentDict(%s) " @@ -628,10 +731,10 @@ class PersistentDict(object): % (self.identifier, hexdigest_key)) raise NoSuchEntryError(key) - # }}} - return read_contents + # }}} + except: cleanup_m.error_clean_up() raise @@ -641,20 +744,36 @@ class PersistentDict(object): def remove(self, key): hexdigest_key = self.key_builder(key) - from os.path import join, isdir - item_dir = join(self.container_dir, hexdigest_key) + item_dir = self._item_dir(hexdigest_key) + from os.path import isdir if not isdir(item_dir): raise NoSuchEntryError(key) - key_file = join(item_dir, "key") - contents_file = join(item_dir, "contents") - cleanup_m = CleanupManager() try: try: - LockManager(cleanup_m, self.container_dir) + LockManager(cleanup_m, self._lock_file(hexdigest_key)) + item_dir_m = ItemDirManager( + cleanup_m, item_dir, delete_on_error=False) + key_file = self._key_file(hexdigest_key) + + # {{{ load key + + try: + read_key = self._read(key_file) + except: + item_dir_m.reset() + from warnings import warn + warn("pytools.persistent_dict.PersistentDict(%s) " + "encountered an invalid " + "key file for key %s. Entry deleted." + % (self.identifier, hexdigest_key)) + raise NoSuchEntryError(key) + + self._collision_check(key, read_key) + + # }}} - item_dir_m = ItemDirManager(cleanup_m, item_dir) item_dir_m.reset() except: @@ -663,30 +782,9 @@ class PersistentDict(object): finally: cleanup_m.clean_up() - self._read_key_cache.discard(key_file) - self._read_contents_cache.discard(contents_file) - - def __getitem__(self, key): - return self.fetch(key) - - def __setitem__(self, key, value): - return self.store(key, value) - def __delitem__(self, key): self.remove(key) - def clear(self): - try: - _erase_dir(self.container_dir) - except OSError as e: - if e.errno != errno.ENOENT: - raise - - self._make_container_dirs() - - self._read_key_cache.clear() - self._read_contents_cache.clear() - # }}} # vim: foldmethod=marker -- GitLab From 40c636c557b23e119af835c9b82e51d9c3049467 Mon Sep 17 00:00:00 2001 From: Matt Wala Date: Sat, 23 Sep 2017 16:47:42 -0500 Subject: [PATCH 08/17] Fix flake8 error. --- test/test_persistent_dict.py | 1 - 1 file changed, 1 deletion(-) diff --git a/test/test_persistent_dict.py b/test/test_persistent_dict.py index 065f979..d6fbbc7 100644 --- a/test/test_persistent_dict.py +++ b/test/test_persistent_dict.py @@ -250,7 +250,6 @@ def test_write_once_persistent_dict_cache_collisions(): key1 = PDictTestingKeyOrValue(1, hash_key=0) key2 = PDictTestingKeyOrValue(2, hash_key=0) - pdict[key1] = 1 with pytest.warns(UserWarning): -- GitLab From 1c3120e1e289618828bea5dbab7ece3a809b56e8 Mon Sep 17 00:00:00 2001 From: Matt Wala Date: Sat, 23 Sep 2017 16:53:43 -0500 Subject: [PATCH 09/17] Test empty cache. --- test/test_persistent_dict.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/test/test_persistent_dict.py b/test/test_persistent_dict.py index d6fbbc7..339f0e5 100644 --- a/test/test_persistent_dict.py +++ b/test/test_persistent_dict.py @@ -176,10 +176,13 @@ def test_persistent_dict_clear(): shutil.rmtree(tmpdir) -def test_write_once_persistent_dict_storage_and_lookup(): +@pytest.mark.parametrize("in_mem_cache_size", (0, 256)) +def test_write_once_persistent_dict_storage_and_lookup(in_mem_cache_size): try: tmpdir = tempfile.mkdtemp() - pdict = WriteOncePersistentDict("pytools-test", container_dir=tmpdir) + pdict = WriteOncePersistentDict( + "pytools-test", container_dir=tmpdir, + in_mem_cache_size=in_mem_cache_size) # check lookup pdict[0] = 1 -- GitLab From e4439e56193feef5e52ebcf110cfc848f5eb49ec Mon Sep 17 00:00:00 2001 From: Matt Wala Date: Sat, 23 Sep 2017 17:04:21 -0500 Subject: [PATCH 10/17] Variable naming consistency. --- pytools/persistent_dict.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pytools/persistent_dict.py b/pytools/persistent_dict.py index b728c67..cb44d0a 100644 --- a/pytools/persistent_dict.py +++ b/pytools/persistent_dict.py @@ -701,7 +701,7 @@ class PersistentDict(_PersistentDictBase): # {{{ load key try: - stored_key = self._read(key_path) + read_key = self._read(key_path) except: item_dir_m.reset() from warnings import warn @@ -711,7 +711,7 @@ class PersistentDict(_PersistentDictBase): % (self.identifier, hexdigest_key)) raise NoSuchEntryError(key) - self._collision_check(key, stored_key) + self._collision_check(key, read_key) # }}} -- GitLab From 1b45b8c799d50b1413631399c2c63a5bceb6e688 Mon Sep 17 00:00:00 2001 From: Matt Wala Date: Sat, 23 Sep 2017 17:08:02 -0500 Subject: [PATCH 11/17] Fix directory. --- test/test_persistent_dict.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/test_persistent_dict.py b/test/test_persistent_dict.py index 339f0e5..53afd77 100644 --- a/test/test_persistent_dict.py +++ b/test/test_persistent_dict.py @@ -136,7 +136,7 @@ def test_persistent_dict_synchronization(): def test_persistent_dict_cache_collisions(): try: tmpdir = tempfile.mkdtemp() - pdict = PersistentDict(tmpdir) + pdict = PersistentDict("pytools-test", container_dir=tmpdir) key1 = PDictTestingKeyOrValue(1, hash_key=0) key2 = PDictTestingKeyOrValue(2, hash_key=0) -- GitLab From 9ea56ee13e8bb1815ba35bcadb3264746c7b56e5 Mon Sep 17 00:00:00 2001 From: Matt Wala Date: Sat, 23 Sep 2017 17:15:38 -0500 Subject: [PATCH 12/17] Check not found in write once persistent dict. --- test/test_persistent_dict.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/test/test_persistent_dict.py b/test/test_persistent_dict.py index 53afd77..b8954b4 100644 --- a/test/test_persistent_dict.py +++ b/test/test_persistent_dict.py @@ -192,6 +192,10 @@ def test_write_once_persistent_dict_storage_and_lookup(in_mem_cache_size): with pytest.raises(ReadOnlyEntryError): pdict[0] = 2 + # check not found + with pytest.raises(NoSuchEntryError): + pdict[1] + finally: shutil.rmtree(tmpdir) -- GitLab From 3fec170c8f6971216017810c31a6c68777735030 Mon Sep 17 00:00:00 2001 From: Matt Wala Date: Sat, 23 Sep 2017 17:22:39 -0500 Subject: [PATCH 13/17] Newline consistency. --- pytools/persistent_dict.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/pytools/persistent_dict.py b/pytools/persistent_dict.py index cb44d0a..9afcb65 100644 --- a/pytools/persistent_dict.py +++ b/pytools/persistent_dict.py @@ -272,7 +272,6 @@ class _LinkedList(object): Supports inserting at the left and deleting from an arbitrary location. """ - def __init__(self): self.count = 0 self.head = None @@ -329,7 +328,6 @@ class _LinkedList(object): class _LRUCache(collections.MutableMapping): """A mapping that keeps at most *maxsize* items with an LRU replacement policy. """ - def __init__(self, maxsize): self.lru_order = _LinkedList() self.maxsize = maxsize -- GitLab From 99d752217f2075722a7efcf887ca23ece42da742 Mon Sep 17 00:00:00 2001 From: Matt Wala Date: Sat, 23 Sep 2017 17:34:55 -0500 Subject: [PATCH 14/17] Do two lookups in the test to exercise the cache. --- test/test_persistent_dict.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/test/test_persistent_dict.py b/test/test_persistent_dict.py index b8954b4..61338d2 100644 --- a/test/test_persistent_dict.py +++ b/test/test_persistent_dict.py @@ -187,6 +187,8 @@ def test_write_once_persistent_dict_storage_and_lookup(in_mem_cache_size): # check lookup pdict[0] = 1 assert pdict[0] == 1 + # do two lookups to test the cache + assert pdict[0] == 1 # check updating with pytest.raises(ReadOnlyEntryError): -- GitLab From 4a216308d8b0295dd63733a4f6d661c39ef017bb Mon Sep 17 00:00:00 2001 From: Matt Wala Date: Sat, 23 Sep 2017 17:42:43 -0500 Subject: [PATCH 15/17] check cache collision on update in WriteOncePersistentDict --- test/test_persistent_dict.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/test/test_persistent_dict.py b/test/test_persistent_dict.py index 61338d2..bea8808 100644 --- a/test/test_persistent_dict.py +++ b/test/test_persistent_dict.py @@ -261,11 +261,15 @@ def test_write_once_persistent_dict_cache_collisions(): key2 = PDictTestingKeyOrValue(2, hash_key=0) pdict[key1] = 1 + # check lookup with pytest.warns(UserWarning): - # check lookup with pytest.raises(NoSuchEntryError): pdict[key2] + # check update + with pytest.raises(ReadOnlyEntryError): + pdict[key2] = 1 + finally: shutil.rmtree(tmpdir) -- GitLab From 88086e2fa121227be1a732f4749f33c02a282edb Mon Sep 17 00:00:00 2001 From: Matt Wala Date: Sat, 23 Sep 2017 21:09:30 -0500 Subject: [PATCH 16/17] Remove extraneous del. --- pytools/persistent_dict.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pytools/persistent_dict.py b/pytools/persistent_dict.py index 9afcb65..06e0f00 100644 --- a/pytools/persistent_dict.py +++ b/pytools/persistent_dict.py @@ -356,7 +356,6 @@ class _LRUCache(collections.MutableMapping): def clear(self): self.cache.clear() - del self.lru_order self.lru_order = _LinkedList() def __setitem__(self, item, value): -- GitLab From 227ae5e80f8e6cbb0c33400c69fd02443047e664 Mon Sep 17 00:00:00 2001 From: Matt Wala Date: Sat, 23 Sep 2017 21:12:25 -0500 Subject: [PATCH 17/17] flake8 fix --- test/test_persistent_dict.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/test_persistent_dict.py b/test/test_persistent_dict.py index bea8808..dfb1cdc 100644 --- a/test/test_persistent_dict.py +++ b/test/test_persistent_dict.py @@ -261,7 +261,7 @@ def test_write_once_persistent_dict_cache_collisions(): key2 = PDictTestingKeyOrValue(2, hash_key=0) pdict[key1] = 1 - # check lookup + # check lookup with pytest.warns(UserWarning): with pytest.raises(NoSuchEntryError): pdict[key2] -- GitLab