From a5efddf54fd4467d729ec4118faa1a059b309d0b Mon Sep 17 00:00:00 2001
From: Matthias Diener <mdiener@illinois.edu>
Date: Wed, 24 Apr 2024 12:44:25 -0500
Subject: [PATCH] KeyBuilder: add datetime hashing (#219)

* KeyBuilder: add datetime hashing

* (try to) take naive/aware, timezones into account

* add another test

* fix datetime timezones

* another time fix
---
 pytools/persistent_dict.py           | 39 +++++++++--
 pytools/test/test_persistent_dict.py | 99 ++++++++++++++++++++++++++++
 2 files changed, 134 insertions(+), 4 deletions(-)

diff --git a/pytools/persistent_dict.py b/pytools/persistent_dict.py
index 9515f37..fb77d4b 100644
--- a/pytools/persistent_dict.py
+++ b/pytools/persistent_dict.py
@@ -402,7 +402,7 @@ class KeyBuilder:
         key_hash.update(b"<None>")
 
     @staticmethod
-    def update_for_dtype(key_hash, key):
+    def update_for_dtype(key_hash: Hash, key: Any) -> None:
         key_hash.update(key.str.encode("utf8"))
 
     # Handling numpy >= 1.20, for which
@@ -410,11 +410,11 @@ class KeyBuilder:
     # Introducing this method allows subclasses to specially handle all those
     # dtypes.
     @staticmethod
-    def update_for_specific_dtype(key_hash, key):
+    def update_for_specific_dtype(key_hash: Hash, key: Any) -> None:
         key_hash.update(key.str.encode("utf8"))
 
     @staticmethod
-    def update_for_numpy_scalar(key_hash: Hash, key) -> None:
+    def update_for_numpy_scalar(key_hash: Hash, key: Any) -> None:
         import numpy as np
         if hasattr(np, "complex256") and key.dtype == np.dtype("complex256"):
             key_hash.update(repr(complex(key)).encode("utf8"))
@@ -430,7 +430,7 @@ class KeyBuilder:
             self.rec(key_hash, fld.name)
             self.rec(key_hash, getattr(key, fld.name, None))
 
-    def update_for_attrs(self, key_hash: Hash, key) -> None:
+    def update_for_attrs(self, key_hash: Hash, key: Any) -> None:
         self.rec(key_hash, f"{type(key).__qualname__}.{type(key).__name__}")
 
         for fld in attrs.fields(key.__class__):
@@ -449,6 +449,37 @@ class KeyBuilder:
     update_for_PMap = update_for_frozendict  # noqa: N815
     update_for_Map = update_for_frozendict  # noqa: N815
 
+    # {{{ date, time, datetime, timezone
+
+    def update_for_date(self, key_hash: Hash, key: Any) -> None:
+        # 'date' has no timezone information; it is always naive
+        self.rec(key_hash, key.isoformat())
+
+    def update_for_time(self, key_hash: Hash, key: Any) -> None:
+        # 'time' should differentiate between naive and aware
+        import datetime
+
+        # Convert to datetime object
+        self.rec(key_hash, datetime.datetime.combine(datetime.date.today(), key))
+        self.rec(key_hash, "<time>")
+
+    def update_for_datetime(self, key_hash: Hash, key: Any) -> None:
+        # 'datetime' should differentiate between naive and aware
+
+        # https://docs.python.org/3.11/library/datetime.html#determining-if-an-object-is-aware-or-naive
+        if key.tzinfo is not None and key.tzinfo.utcoffset(key) is not None:
+            self.rec(key_hash, key.timestamp())
+            self.rec(key_hash, "<aware>")
+        else:
+            from datetime import timezone
+            self.rec(key_hash, key.replace(tzinfo=timezone.utc).timestamp())
+            self.rec(key_hash, "<naive>")
+
+    def update_for_timezone(self, key_hash: Hash, key: Any) -> None:
+        self.rec(key_hash, repr(key))
+
+    # }}}
+
     # }}}
 
 # }}}
diff --git a/pytools/test/test_persistent_dict.py b/pytools/test/test_persistent_dict.py
index 1bebf61..649bd00 100644
--- a/pytools/test/test_persistent_dict.py
+++ b/pytools/test/test_persistent_dict.py
@@ -598,6 +598,105 @@ def test_attrs_hashing() -> None:
             != keyb(MyAttrs("hi", 1)))  # type: ignore[call-arg]
 
 
+def test_datetime_hashing() -> None:
+    keyb = KeyBuilder()
+
+    import datetime
+
+    # {{{ date
+    # No timezone info; date is always naive
+    assert (keyb(datetime.date(2020, 1, 1))
+            == keyb(datetime.date(2020, 1, 1))
+            == "9fb97d7faabc3603f3e334ca5eb1eb0fe0c92665e5611cb1b5aa77fa0f70f5e3")
+    assert keyb(datetime.date(2020, 1, 1)) != keyb(datetime.date(2020, 1, 2))
+
+    # }}}
+
+    # {{{ time
+
+    # Must distinguish between naive and aware time objects
+
+    # Naive time
+    assert (keyb(datetime.time(12, 0))
+            == keyb(datetime.time(12, 0))
+            == keyb(datetime.time(12, 0, 0))
+            == keyb(datetime.time(12, 0, 0, 0))
+            == "bf73f48b2f2666b5c42f6993e628fdc15e0b6c3127186c3ab44ce08ed83d0472")
+    assert keyb(datetime.time(12, 0)) != keyb(datetime.time(12, 1))
+
+    # Aware time
+    t1 = datetime.time(12, 0, tzinfo=datetime.timezone.utc)
+    t2 = datetime.time(7, 0,
+                            tzinfo=datetime.timezone(datetime.timedelta(hours=-5)))
+    t3 = datetime.time(7, 0,
+                            tzinfo=datetime.timezone(datetime.timedelta(hours=-4)))
+
+    assert t1 == t2
+    assert (keyb(t1)
+            == keyb(t2)
+            == "c0947587c92ab6e2df90475dd497aff1d83df55fbd5af6c55b2a0a221b2437a4")
+
+    assert t1 != t3
+    assert keyb(t1) != keyb(t3)
+
+    # }}}
+
+    # {{{ datetime
+
+    # must distinguish between naive and aware datetime objects
+
+    # Aware datetime
+    dt1 = datetime.datetime(2020, 1, 1, 12, tzinfo=datetime.timezone.utc)
+    dt2 = datetime.datetime(2020, 1, 1, 7,
+                            tzinfo=datetime.timezone(datetime.timedelta(hours=-5)))
+
+    assert dt1 == dt2
+    assert (keyb(dt1)
+            == keyb(dt2)
+            == "cd35722af47e42cb3bc81c389b87eb2e78ee8e20298bb1d8a193b30940d1c142")
+
+    dt3 = datetime.datetime(2020, 1, 1, 7,
+                            tzinfo=datetime.timezone(datetime.timedelta(hours=-4)))
+
+    assert dt1 != dt3
+    assert keyb(dt1) != keyb(dt3)
+
+    # Naive datetime
+    dt4 = datetime.datetime(2020, 1, 1, 6)  # matches dt1 'naively'
+    assert dt1 != dt4  # naive and aware datetime objects are never equal
+    assert keyb(dt1) != keyb(dt4)
+
+    assert (keyb(datetime.datetime(2020, 1, 1))
+            == keyb(datetime.datetime(2020, 1, 1))
+            == keyb(datetime.datetime(2020, 1, 1, 0, 0, 0, 0))
+            == "8f3b843d7b9176afd8e2ce97ebc19789098a1c7774c4ec00d4054ec954ce2b88"
+            )
+    assert keyb(datetime.datetime(2020, 1, 1)) != keyb(datetime.datetime(2020, 1, 2))
+    assert (keyb(datetime.datetime(2020, 1, 1))
+            != keyb(datetime.datetime(2020, 1, 1, tzinfo=datetime.timezone.utc)))
+
+    # }}}
+
+    # {{{ timezone
+
+    tz1 = datetime.timezone(datetime.timedelta(hours=-4))
+    tz2 = datetime.timezone(datetime.timedelta(hours=0))
+    tz3 = datetime.timezone.utc
+
+    assert tz1 != tz2
+    assert keyb(tz1) != keyb(tz2)
+
+    assert tz1 != tz3
+    assert keyb(tz1) != keyb(tz3)
+
+    assert tz2 == tz3
+    assert (keyb(tz2)
+            == keyb(tz3)
+            == "89bd615f32c1f209b0853b1fc7d06ddb6fda7f367a00a8621d60337d52cb8d10")
+
+    # }}}
+
+
 def test_xdg_cache_home() -> None:
     import os
     xdg_dir = "tmpdir_pytools_xdg_test"
-- 
GitLab