From 5facd423d57885fdfeb231df22d621c1fbc5680c Mon Sep 17 00:00:00 2001 From: Matthias Diener <mdiener@illinois.edu> Date: Tue, 4 Jun 2024 17:25:18 -0600 Subject: [PATCH] PersistentDict: allow setting sync (#229) Co-authored-by: Andreas Kloeckner <inform@tiker.net> --- pytools/persistent_dict.py | 38 ++++++++++++++++------ pytools/test/test_persistent_dict.py | 47 +++++++++++++++++----------- 2 files changed, 57 insertions(+), 28 deletions(-) diff --git a/pytools/persistent_dict.py b/pytools/persistent_dict.py index 3e6ca3e..27860dd 100644 --- a/pytools/persistent_dict.py +++ b/pytools/persistent_dict.py @@ -431,7 +431,8 @@ class _PersistentDictBase(Mapping[K, V]): def __init__(self, identifier: str, key_builder: Optional[KeyBuilder] = None, container_dir: Optional[str] = None, - enable_wal: bool = False) -> None: + enable_wal: bool = False, + safe_sync: Optional[bool] = None) -> None: self.identifier = identifier self.conn = None @@ -471,17 +472,30 @@ class _PersistentDictBase(Mapping[K, V]): if enable_wal: self.conn.execute("PRAGMA journal_mode = 'WAL'") - # Note: the following configuration values were taken from litedict: + # Note: the following configuration values were taken mostly from litedict: # https://github.com/litements/litedict/blob/377603fa597453ffd9997186a493ed4fd23e5399/litedict.py#L67-L70 - # They result in fast operations while maintaining database integrity - # even in the face of concurrent accesses and power loss. - # temp_store=2: use in-memory temp store + # Use in-memory temp store # https://www.sqlite.org/pragma.html#pragma_temp_store - self.conn.execute("PRAGMA temp_store = 2") + self.conn.execute("PRAGMA temp_store = 'MEMORY'") + # fsync() can be extremely slow on some systems. + # See https://github.com/inducer/pytools/issues/227 for context. # https://www.sqlite.org/pragma.html#pragma_synchronous - self.conn.execute("PRAGMA synchronous = NORMAL") + if safe_sync is None or safe_sync: + if safe_sync is None: + from warnings import warn + warn(f"pytools.persistent_dict '{identifier}': " + "enabling safe_sync as default. " + "This provides strong protection against data loss, " + "but can be unnecessarily expensive for use cases such as " + "caches." + "Pass 'safe_sync=False' if occasional data loss is tolerable. " + "Pass 'safe_sync=True' to suppress this warning.", + stacklevel=3) + self.conn.execute("PRAGMA synchronous = 'NORMAL'") + else: + self.conn.execute("PRAGMA synchronous = 'OFF'") # 64 MByte of cache # https://www.sqlite.org/pragma.html#pragma_cache_size @@ -595,7 +609,9 @@ class WriteOncePersistentDict(_PersistentDictBase[K, V]): def __init__(self, identifier: str, key_builder: Optional[KeyBuilder] = None, container_dir: Optional[str] = None, + *, enable_wal: bool = False, + safe_sync: Optional[bool] = None, in_mem_cache_size: int = 256) -> None: """ :arg identifier: a filename-compatible string identifying this @@ -611,7 +627,7 @@ class WriteOncePersistentDict(_PersistentDictBase[K, V]): *in_mem_cache_size* items (with an LRU replacement policy) """ _PersistentDictBase.__init__(self, identifier, key_builder, - container_dir, enable_wal) + container_dir, enable_wal, safe_sync) from functools import lru_cache self._fetch = lru_cache(maxsize=in_mem_cache_size)(self._fetch) @@ -683,7 +699,9 @@ class PersistentDict(_PersistentDictBase[K, V]): identifier: str, key_builder: Optional[KeyBuilder] = None, container_dir: Optional[str] = None, - enable_wal: bool = False) -> None: + *, + enable_wal: bool = False, + safe_sync: Optional[bool] = None) -> None: """ :arg identifier: a filename-compatible string identifying this dictionary @@ -696,7 +714,7 @@ class PersistentDict(_PersistentDictBase[K, V]): not compatible with network filesystems. """ _PersistentDictBase.__init__(self, identifier, key_builder, - container_dir, enable_wal) + container_dir, enable_wal, safe_sync) def store(self, key: K, value: V, _skip_if_present: bool = False) -> None: keyhash = self.key_builder(key) diff --git a/pytools/test/test_persistent_dict.py b/pytools/test/test_persistent_dict.py index 5e8dfd4..bc67e66 100644 --- a/pytools/test/test_persistent_dict.py +++ b/pytools/test/test_persistent_dict.py @@ -64,7 +64,8 @@ def test_persistent_dict_storage_and_lookup() -> None: try: tmpdir = tempfile.mkdtemp() pdict: PersistentDict[Any, int] = PersistentDict("pytools-test", - container_dir=tmpdir) + container_dir=tmpdir, + safe_sync=False) from random import randrange @@ -163,7 +164,8 @@ def test_persistent_dict_deletion() -> None: try: tmpdir = tempfile.mkdtemp() pdict: PersistentDict[int, int] = PersistentDict("pytools-test", - container_dir=tmpdir) + container_dir=tmpdir, + safe_sync=False) pdict[0] = 0 del pdict[0] @@ -185,9 +187,11 @@ def test_persistent_dict_synchronization() -> None: try: tmpdir = tempfile.mkdtemp() pdict1: PersistentDict[int, int] = PersistentDict("pytools-test", - container_dir=tmpdir) + container_dir=tmpdir, + safe_sync=False) pdict2: PersistentDict[int, int] = PersistentDict("pytools-test", - container_dir=tmpdir) + container_dir=tmpdir, + safe_sync=False) # check lookup pdict1[0] = 1 @@ -210,7 +214,7 @@ def test_persistent_dict_cache_collisions() -> None: try: tmpdir = tempfile.mkdtemp() pdict: PersistentDict[PDictTestingKeyOrValue, int] = \ - PersistentDict("pytools-test", container_dir=tmpdir) + PersistentDict("pytools-test", container_dir=tmpdir, safe_sync=False) key1 = PDictTestingKeyOrValue(1, hash_key=0) key2 = PDictTestingKeyOrValue(2, hash_key=0) @@ -242,7 +246,8 @@ def test_persistent_dict_clear() -> None: try: tmpdir = tempfile.mkdtemp() pdict: PersistentDict[int, int] = PersistentDict("pytools-test", - container_dir=tmpdir) + container_dir=tmpdir, + safe_sync=False) pdict[0] = 1 pdict.fetch(0) @@ -261,7 +266,7 @@ def test_write_once_persistent_dict_storage_and_lookup(in_mem_cache_size) -> Non tmpdir = tempfile.mkdtemp() pdict: WriteOncePersistentDict[int, int] = WriteOncePersistentDict( "pytools-test", container_dir=tmpdir, - in_mem_cache_size=in_mem_cache_size) + in_mem_cache_size=in_mem_cache_size, safe_sync=False) # check lookup pdict[0] = 1 @@ -291,7 +296,8 @@ def test_write_once_persistent_dict_lru_policy() -> None: try: tmpdir = tempfile.mkdtemp() pdict: WriteOncePersistentDict[Any, Any] = WriteOncePersistentDict( - "pytools-test", container_dir=tmpdir, in_mem_cache_size=3) + "pytools-test", container_dir=tmpdir, in_mem_cache_size=3, + safe_sync=False) pdict[1] = PDictTestingKeyOrValue(1) pdict[2] = PDictTestingKeyOrValue(2) @@ -331,9 +337,11 @@ def test_write_once_persistent_dict_synchronization() -> None: try: tmpdir = tempfile.mkdtemp() pdict1: WriteOncePersistentDict[int, int] = \ - WriteOncePersistentDict("pytools-test", container_dir=tmpdir) + WriteOncePersistentDict("pytools-test", container_dir=tmpdir, + safe_sync=False) pdict2: WriteOncePersistentDict[int, int] = \ - WriteOncePersistentDict("pytools-test", container_dir=tmpdir) + WriteOncePersistentDict("pytools-test", container_dir=tmpdir, + safe_sync=False) # check lookup pdict1[1] = 0 @@ -351,7 +359,8 @@ def test_write_once_persistent_dict_cache_collisions() -> None: try: tmpdir = tempfile.mkdtemp() pdict: WriteOncePersistentDict[Any, int] = \ - WriteOncePersistentDict("pytools-test", container_dir=tmpdir) + WriteOncePersistentDict("pytools-test", container_dir=tmpdir, + safe_sync=False) key1 = PDictTestingKeyOrValue(1, hash_key=0) key2 = PDictTestingKeyOrValue(2, hash_key=0) @@ -378,7 +387,8 @@ def test_write_once_persistent_dict_clear() -> None: try: tmpdir = tempfile.mkdtemp() pdict: WriteOncePersistentDict[int, int] = \ - WriteOncePersistentDict("pytools-test", container_dir=tmpdir) + WriteOncePersistentDict("pytools-test", container_dir=tmpdir, + safe_sync=False) pdict[0] = 1 pdict.fetch(0) @@ -710,7 +720,7 @@ def test_xdg_cache_home() -> None: try: os.environ["XDG_CACHE_HOME"] = xdg_dir - PersistentDict("pytools-test") + PersistentDict("pytools-test", safe_sync=False) assert os.path.exists(xdg_dir) finally: @@ -726,7 +736,8 @@ def test_speed(): import time tmpdir = tempfile.mkdtemp() - pdict = WriteOncePersistentDict("pytools-test", container_dir=tmpdir) + pdict = WriteOncePersistentDict("pytools-test", container_dir=tmpdir, + safe_sync=False) start = time.time() for i in range(10000): @@ -747,7 +758,7 @@ def test_speed(): def test_size(): try: tmpdir = tempfile.mkdtemp() - pdict = PersistentDict("pytools-test", container_dir=tmpdir) + pdict = PersistentDict("pytools-test", container_dir=tmpdir, safe_sync=False) for i in range(10000): pdict[f"foobarbazfoobbb{i}"] = i @@ -762,7 +773,7 @@ def test_size(): def test_len(): try: tmpdir = tempfile.mkdtemp() - pdict = PersistentDict("pytools-test", container_dir=tmpdir) + pdict = PersistentDict("pytools-test", container_dir=tmpdir, safe_sync=False) assert len(pdict) == 0 @@ -781,7 +792,7 @@ def test_len(): def test_repr(): try: tmpdir = tempfile.mkdtemp() - pdict = PersistentDict("pytools-test", container_dir=tmpdir) + pdict = PersistentDict("pytools-test", container_dir=tmpdir, safe_sync=False) assert repr(pdict)[:15] == "PersistentDict(" finally: @@ -791,7 +802,7 @@ def test_repr(): def test_keys_values_items(): try: tmpdir = tempfile.mkdtemp() - pdict = PersistentDict("pytools-test", container_dir=tmpdir) + pdict = PersistentDict("pytools-test", container_dir=tmpdir, safe_sync=False) for i in range(10000): pdict[i] = i -- GitLab