From 7204c0ab09d7f033f61757a0a1ae9c3162e88c59 Mon Sep 17 00:00:00 2001 From: Matthias Diener <mdiener@illinois.edu> Date: Mon, 13 Dec 2021 14:35:03 -0600 Subject: [PATCH] Cache: reduce sleep time (#504) * Cache: reduce sleep time * Update cache.py * print warnings fix * restructure * add comment about our choice for the timeout --- pyopencl/cache.py | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/pyopencl/cache.py b/pyopencl/cache.py index 6e01c90e..582a7908 100644 --- a/pyopencl/cache.py +++ b/pyopencl/cache.py @@ -88,18 +88,32 @@ class CacheLockManager(CleanupBase): except OSError: pass + # This value was chosen based on the py-filelock package: + # https://github.com/tox-dev/py-filelock/blob/a6c8fabc4192fa7a4ae19b1875ee842ec5eb4f61/src/filelock/_api.py#L113 + # When running pyopencl in an application with multiple ranks + # that share a cache_dir, higher timeouts can lead to + # application stalls even with low numbers of ranks. + # cf. https://github.com/inducer/pyopencl/pull/504 + wait_time_seconds = 0.05 + + # Warn every 10 seconds if not able to acquire lock + warn_attempts = int(10/wait_time_seconds) + + # Exit after 60 seconds if not able to acquire lock + exit_attempts = int(60/wait_time_seconds) + from time import sleep - sleep(1) + sleep(wait_time_seconds) attempts += 1 - if attempts > 10: + if attempts % warn_attempts == 0: from warnings import warn warn("could not obtain cache lock--delete '%s' if necessary" % self.lock_file) - if attempts > 3 * 60: - raise RuntimeError("waited more than three minutes " + if attempts > exit_attempts: + raise RuntimeError("waited more than one minute " "on the lock file '%s'" "--something is wrong" % self.lock_file) -- GitLab