From 45b29f7a3c97760ccba104e6e189522b1587b552 Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Mon, 20 Apr 2020 20:34:04 -0500
Subject: [PATCH 001/140] adding lexicographic_order_map.py (creates isl maps
 defining lex orderings and statement instance orderings)

---
 .../checker/lexicographic_order_map.py        | 159 ++++++++++++++++++
 1 file changed, 159 insertions(+)
 create mode 100644 loopy/schedule/checker/lexicographic_order_map.py

diff --git a/loopy/schedule/checker/lexicographic_order_map.py b/loopy/schedule/checker/lexicographic_order_map.py
new file mode 100644
index 000000000..2e063e7d7
--- /dev/null
+++ b/loopy/schedule/checker/lexicographic_order_map.py
@@ -0,0 +1,159 @@
+__copyright__ = "Copyright (C) 2019 James Stevens"
+
+__license__ = """
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+"""
+
+import islpy as isl
+
+
+def get_statement_ordering_map(
+        sched_map_before, sched_map_after, lex_map, before_marker="'"):
+    """Return a mapping that maps each statement instance to
+        all statement instances occuring later.
+
+    :arg sched_map_before: An :class:`islpy.Map` representing instruction
+        instance order for the dependee as a mapping from each statement
+        instance to a point in the lexicographic ordering.
+
+    :arg sched_map_after: An :class:`islpy.Map` representing instruction
+        instance order for the depender as a mapping from each statement
+        instance to a point in the lexicographic ordering.
+
+    :arg lex_map: An :class:`islpy.Map` representing a lexicographic
+        ordering as a mapping from each point in lexicographic time
+        to every point that occurs later in lexicographic time. E.g.::
+
+            {[i0', i1', i2', ...] -> [i0, i1, i2, ...] :
+                i0' < i0 or (i0' = i0 and i1' < i1)
+                or (i0' = i0 and i1' = i1 and i2' < i2) ...}
+
+    :returns: An :class:`islpy.Map` representing the lex schedule as
+        a mapping from each statement instance to all statement instances
+        occuring later. I.e., we compose B -> L -> A^-1, where B
+        is sched_map_before, A is sched_map_after, and L is the
+        lexicographic ordering map.
+
+    """
+
+    sio = sched_map_before.apply_range(
+        lex_map).apply_range(sched_map_after.reverse())
+    # append marker to in names
+    for i in range(sio.dim(isl.dim_type.in_)):
+        sio = sio.set_dim_name(isl.dim_type.in_, i, sio.get_dim_name(
+            isl.dim_type.in_, i)+before_marker)
+    return sio
+
+
+def get_lex_order_constraint(islvars, before_names, after_names):
+    """Return a constraint represented as an :class:`islpy.Set`
+        defining a 'happens before' relationship in a lexicographic
+        ordering.
+
+    :arg islvars: A dictionary from variable names to :class:`islpy.PwAff`
+        instances that represent each of the variables
+        (islvars may be produced by `islpy.make_zero_and_vars`). The key
+        '0' is also include and represents a :class:`islpy.PwAff` zero constant.
+        This dictionary defines the space to be used for the set.
+
+    :arg before_names: A list of :class:`str` variable names representing
+        the lexicographic space dimensions for a point in lexicographic
+        time that occurs before. (see example below)
+
+    :arg after_names: A list of :class:`str` variable names representing
+        the lexicographic space dimensions for a point in lexicographic
+        time that occurs after. (see example below)
+
+    :returns: An :class:`islpy.Set` representing a constraint that enforces a
+        lexicographic ordering. E.g., if ``before_names = [i0', i1', i2']`` and
+        ``after_names = [i0, i1, i2]``, return the set::
+
+            {[i0', i1', i2', i0, i1, i2] :
+                i0' < i0 or (i0' = i0 and i1' < i1)
+                or (i0' = i0 and i1' = i1 and i2' < i2)}
+
+    """
+
+    lex_order_constraint = islvars[before_names[0]].lt_set(islvars[after_names[0]])
+    for i in range(1, len(before_names)):
+        lex_order_constraint_conj = islvars[before_names[i]].lt_set(
+            islvars[after_names[i]])
+        for j in range(i):
+            lex_order_constraint_conj = lex_order_constraint_conj & \
+                islvars[before_names[j]].eq_set(islvars[after_names[j]])
+        lex_order_constraint = lex_order_constraint | lex_order_constraint_conj
+    return lex_order_constraint
+
+
+def create_lex_order_map(
+        n_dims,
+        before_names=None,
+        after_names=None,
+        ):
+    """Return a mapping that maps each point in a lexicographic
+        ordering to every point that occurs later in lexicographic
+        time.
+
+    :arg n_dims: An :class:`int` representing the number of dimensions
+        in the lexicographic ordering.
+
+    :arg before_names: A list of :class:`str` variable names representing
+        the lexicographic space dimensions for a point in lexicographic
+        time that occurs before. (see example below)
+
+    :arg after_names: A list of :class:`str` variable names representing
+        the lexicographic space dimensions for a point in lexicographic
+        time that occurs after. (see example below)
+
+    :returns: An :class:`islpy.Map` representing a lexicographic
+        ordering as a mapping from each point in lexicographic time
+        to every point that occurs later in lexicographic time.
+        E.g., if ``before_names = [i0', i1', i2']`` and
+        ``after_names = [i0, i1, i2]``, return the map::
+
+            {[i0', i1', i2'] -> [i0, i1, i2] :
+                i0' < i0 or (i0' = i0 and i1' < i1)
+                or (i0' = i0 and i1' = i1 and i2' < i2)}
+
+    """
+
+    if before_names is None:
+        before_names = ["i%s" % (i) for i in range(n_dims)]
+    if after_names is None:
+        from loopy.schedule.checker.utils import (
+            append_marker_to_strings,
+        )
+        after_names = append_marker_to_strings(before_names, marker="_")
+
+    assert len(before_names) == len(after_names) == n_dims
+    dim_type = isl.dim_type
+
+    islvars = isl.make_zero_and_vars(
+            before_names+after_names,
+            [])
+
+    lex_order_constraint = get_lex_order_constraint(
+        islvars, before_names, after_names)
+
+    lex_map = isl.Map.from_domain(lex_order_constraint)
+    lex_map = lex_map.move_dims(
+        dim_type.out, 0, dim_type.in_,
+        len(before_names), len(after_names))
+
+    return lex_map
-- 
GitLab


From 782dde2330328a0716bda113efc1526257c3fcbe Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Mon, 20 Apr 2020 20:35:41 -0500
Subject: [PATCH 002/140] add get_lex_order_map_for_sched_space() to schedule
 (gets an isl map defining the lexicographic ordering)

---
 loopy/schedule/checker/schedule.py | 13 +++++++++++++
 loopy/schedule/checker/utils.py    |  7 +++++++
 2 files changed, 20 insertions(+)

diff --git a/loopy/schedule/checker/schedule.py b/loopy/schedule/checker/schedule.py
index 0aca588c3..305d1f74f 100644
--- a/loopy/schedule/checker/schedule.py
+++ b/loopy/schedule/checker/schedule.py
@@ -405,6 +405,19 @@ class LexSchedule(object):
         return [self.lex_var_prefix+str(i)
                 for i in range(self.max_lex_dims())]
 
+    def get_lex_order_map_for_sched_space(self):
+        """Return an :class:`islpy.BasicMap` that maps each point in a
+            lexicographic ordering to every point that is
+            lexocigraphically greater.
+        """
+
+        from loopy.schedule.checker.lexicographic_order_map import (
+            create_lex_order_map,
+        )
+        n_dims = self.max_lex_dims()
+        return create_lex_order_map(
+            n_dims, before_names=self.get_lex_var_names())
+
     def __str__(self):
 
         def stringify_sched_stmt_instance(stmt_inst):
diff --git a/loopy/schedule/checker/utils.py b/loopy/schedule/checker/utils.py
index cb933de6f..8757406b7 100644
--- a/loopy/schedule/checker/utils.py
+++ b/loopy/schedule/checker/utils.py
@@ -143,6 +143,13 @@ def align_isl_maps_by_var_names(input_map, target_map):
     return aligned_input_map
 
 
+def append_marker_to_strings(strings, marker="'"):
+    if not isinstance(strings, list):
+        raise ValueError("append_marker_to_strings did not receive a list")
+    else:
+        return [s+marker for s in strings]
+
+
 def _union_of_isl_sets_or_maps(set_list):
     union = set_list[0]
     for s in set_list[1:]:
-- 
GitLab


From 0e664550837299ff697d5f6947fed9d90d2cc095 Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Mon, 20 Apr 2020 22:13:50 -0500
Subject: [PATCH 003/140] add function append_marker_to_in_dim_names(islmap)

---
 loopy/schedule/checker/lexicographic_order_map.py | 8 ++++----
 loopy/schedule/checker/utils.py                   | 8 ++++++++
 2 files changed, 12 insertions(+), 4 deletions(-)

diff --git a/loopy/schedule/checker/lexicographic_order_map.py b/loopy/schedule/checker/lexicographic_order_map.py
index 2e063e7d7..61f191247 100644
--- a/loopy/schedule/checker/lexicographic_order_map.py
+++ b/loopy/schedule/checker/lexicographic_order_map.py
@@ -55,10 +55,10 @@ def get_statement_ordering_map(
     sio = sched_map_before.apply_range(
         lex_map).apply_range(sched_map_after.reverse())
     # append marker to in names
-    for i in range(sio.dim(isl.dim_type.in_)):
-        sio = sio.set_dim_name(isl.dim_type.in_, i, sio.get_dim_name(
-            isl.dim_type.in_, i)+before_marker)
-    return sio
+    from loopy.schedule.checker.utils import (
+        append_marker_to_in_dim_names,
+    )
+    return append_marker_to_in_dim_names(sio, before_marker)
 
 
 def get_lex_order_constraint(islvars, before_names, after_names):
diff --git a/loopy/schedule/checker/utils.py b/loopy/schedule/checker/utils.py
index 8757406b7..96aa007c7 100644
--- a/loopy/schedule/checker/utils.py
+++ b/loopy/schedule/checker/utils.py
@@ -150,6 +150,14 @@ def append_marker_to_strings(strings, marker="'"):
         return [s+marker for s in strings]
 
 
+def append_marker_to_in_dim_names(islmap, marker="'"):
+    # append marker to in names
+    for i in range(islmap.dim(isl.dim_type.in_)):
+        islmap = islmap.set_dim_name(isl.dim_type.in_, i, islmap.get_dim_name(
+            isl.dim_type.in_, i)+marker)
+    return islmap
+
+
 def _union_of_isl_sets_or_maps(set_list):
     union = set_list[0]
     for s in set_list[1:]:
-- 
GitLab


From ceb9015a1a18d16f0615c8f3deb9cf35f0cb9ca2 Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Mon, 20 Apr 2020 22:14:38 -0500
Subject: [PATCH 004/140] test lexicographic order map creation and statement
 instance order creation

---
 test/test_linearization_checker.py | 203 +++++++++++++++++++++++++++++
 1 file changed, 203 insertions(+)

diff --git a/test/test_linearization_checker.py b/test/test_linearization_checker.py
index c112b40ae..5a05bdd8e 100644
--- a/test/test_linearization_checker.py
+++ b/test/test_linearization_checker.py
@@ -46,6 +46,8 @@ else:
     faulthandler.enable()
 
 
+# {{{ test LexSchedule and isl map creation
+
 def test_lexschedule_and_islmap_creation():
     import islpy as isl
     from loopy.schedule.checker import (
@@ -362,6 +364,207 @@ def test_lexschedule_and_islmap_creation():
     else:
         perform_insn_cd_checks_with(1, 0)
 
+# }}}
+
+
+# {{{ test statement instance ordering creation
+
+def test_statement_instance_ordering_creation():
+    import islpy as isl
+    from loopy.schedule.checker import (
+        get_schedule_for_statement_pair,
+        get_isl_maps_for_LexSchedule,
+    )
+    from loopy.schedule.checker.utils import (
+        align_isl_maps_by_var_names,
+        append_marker_to_in_dim_names,
+    )
+    from loopy.schedule.checker.lexicographic_order_map import (
+        get_statement_ordering_map,
+    )
+
+    # example kernel (add deps to fix loop order)
+    knl = lp.make_kernel(
+        [
+            "{[i]: 0<=i<pi}",
+            "{[k]: 0<=k<pk}",
+            "{[j]: 0<=j<pj}",
+            "{[t]: 0<=t<pt}",
+        ],
+        """
+        for i
+            for k
+                <>temp = b[i,k]  {id=insn_a}
+            end
+            for j
+                a[i,j] = temp + 1  {id=insn_b,dep=insn_a}
+                c[i,j] = d[i,j]  {id=insn_c,dep=insn_b}
+            end
+        end
+        for t
+            e[t] = f[t]  {id=insn_d, dep=insn_c}
+        end
+        """,
+        name="example",
+        assumptions="pi,pj,pk,pt >= 1",
+        lang_version=(2018, 2)
+        )
+    knl = lp.add_and_infer_dtypes(
+            knl,
+            {"b": np.float32, "d": np.float32, "f": np.float32})
+    knl = lp.prioritize_loops(knl, "i,k")
+    knl = lp.prioritize_loops(knl, "i,j")
+
+    # get a linearization
+    knl = preprocess_kernel(knl)
+    knl = get_one_linearized_kernel(knl)
+    linearization_items = knl.linearization
+
+    def check_sio_for_insn_pair(
+            insn_id_before,
+            insn_id_after,
+            expected_lex_order_map,
+            expected_sio,
+            ):
+
+        lex_sched = get_schedule_for_statement_pair(
+            knl,
+            linearization_items,
+            insn_id_before,
+            insn_id_after,
+            )
+
+        # Get two isl maps representing the LexSchedule
+        isl_sched_map_before, isl_sched_map_after = \
+             get_isl_maps_for_LexSchedule(lex_sched, knl, insn_id_before, insn_id_after)
+
+        # get map representing lexicographic ordering
+        sched_lex_order_map = lex_sched.get_lex_order_map_for_sched_space()
+
+        assert sched_lex_order_map == expected_lex_order_map
+
+        # create statement instance ordering,
+        # maps each statement instance to all statement instances occuring later
+        sio = get_statement_ordering_map(
+            isl_sched_map_before,
+            isl_sched_map_after,
+            sched_lex_order_map,
+            )
+
+        print(sio)
+        print(expected_sio)
+
+        sio_aligned = align_isl_maps_by_var_names(sio, expected_sio)
+
+        print(sio_aligned)
+        print(expected_sio)
+
+        assert sio_aligned == expected_sio
+
+    expected_lex_order_map = isl.Map(
+        "{ "
+        "[l0, l1, l2, l3, l4] -> [l0_, l1_, l2_, l3_, l4_] : l0_ > l0; "
+        "[l0, l1, l2, l3, l4] -> [l0_= l0, l1_, l2_, l3_, l4_] : l1_ > l1; "
+        "[l0, l1, l2, l3, l4] -> [l0_= l0, l1_= l1, l2_, l3_, l4_] : l2_ > l2; "
+        "[l0, l1, l2, l3, l4] -> [l0_= l0, l1_= l1, l2_= l2, l3_, l4_] : l3_ > l3; "
+        "[l0, l1, l2, l3, l4] -> [l0_= l0, l1_= l1, l2_= l2, l3_= l3, l4_] : l4_ > l4 "
+        "}"
+        )
+
+    # Relationship between insn_a and insn_b ---------------------------------------
+
+    expected_sio = isl.Map(
+        "[pi, pj, pk] -> { "
+        "[statement' = 0, i', k'] -> [statement = 1, i, j] : "
+        "0 <= i' < pi and 0 <= k' < pk and 0 <= j < pj and 0 <= i < pi and i > i'; "
+        "[statement' = 0, i', k'] -> [statement = 1, i = i', j] : "
+        "0 <= i' < pi and 0 <= k' < pk and 0 <= j < pj "
+        "}"
+        )
+    # isl ignores these apostrophes, so explicitly add them
+    expected_sio = append_marker_to_in_dim_names(expected_sio, "'")
+
+    check_sio_for_insn_pair(
+        "insn_a", "insn_b", expected_lex_order_map, expected_sio)
+
+    # Relationship between insn_a and insn_c ---------------------------------------
+
+    expected_sio = isl.Map(
+        "[pi, pj, pk] -> { "
+        "[statement' = 0, i', k'] -> [statement = 1, i, j] : "
+        "0 <= i' < pi and 0 <= k' < pk and 0 <= j < pj and 0 <= i < pi and i > i'; "
+        "[statement' = 0, i', k'] -> [statement = 1, i = i', j] : "
+        "0 <= i' < pi and 0 <= k' < pk and 0 <= j < pj "
+        "}"
+        )
+    # isl ignores these apostrophes, so explicitly add them
+    expected_sio = append_marker_to_in_dim_names(expected_sio, "'")
+
+    check_sio_for_insn_pair(
+        "insn_a", "insn_c", expected_lex_order_map, expected_sio)
+
+    # Relationship between insn_a and insn_d ---------------------------------------
+
+    expected_sio = isl.Map(
+        "[pt, pi, pk] -> { "
+        "[statement' = 0, i', k'] -> [statement = 1, t] : "
+        "0 <= i' < pi and 0 <= k' < pk and 0 <= t < pt "
+        "}"
+        )
+    # isl ignores these apostrophes, so explicitly add them
+    expected_sio = append_marker_to_in_dim_names(expected_sio, "'")
+
+    check_sio_for_insn_pair(
+        "insn_a", "insn_d", expected_lex_order_map, expected_sio)
+
+    # Relationship between insn_b and insn_c ---------------------------------------
+
+    expected_sio = isl.Map(
+        "[pi, pj] -> { "
+        "[statement' = 0, i', j'] -> [statement = 1, i, j] : "
+        "0 <= i' < pi and 0 <= j' < pj and i > i' and 0 <= i < pi and 0 <= j < pj; "
+        "[statement' = 0, i', j'] -> [statement = 1, i = i', j] : "
+        "0 <= i' < pi and 0 <= j' < pj and j > j' and 0 <= j < pj; "
+        "[statement' = 0, i', j'] -> [statement = 1, i = i', j = j'] : "
+        "0 <= i' < pi and 0 <= j' < pj "
+        "}"
+        )
+    # isl ignores these apostrophes, so explicitly add them
+    expected_sio = append_marker_to_in_dim_names(expected_sio, "'")
+
+    check_sio_for_insn_pair(
+        "insn_b", "insn_c", expected_lex_order_map, expected_sio)
+
+    # Relationship between insn_b and insn_d ---------------------------------------
+
+    expected_sio = isl.Map(
+        "[pt, pi, pj] -> { "
+        "[statement' = 0, i', j'] -> [statement = 1, t] : "
+        "0 <= i' < pi and 0 <= j' < pj and 0 <= t < pt "
+        "}"
+        )
+    # isl ignores these apostrophes, so explicitly add them
+    expected_sio = append_marker_to_in_dim_names(expected_sio, "'")
+
+    check_sio_for_insn_pair(
+        "insn_b", "insn_d", expected_lex_order_map, expected_sio)
+
+    # Relationship between insn_c and insn_d ---------------------------------------
+
+    expected_sio = isl.Map(
+        "[pt, pi, pj] -> { "
+        "[statement' = 0, i', j'] -> [statement = 1, t] : "
+        "0 <= i' < pi and 0 <= j' < pj and 0 <= t < pt "
+        "}"
+        )
+    # isl ignores these apostrophes, so explicitly add them
+    expected_sio = append_marker_to_in_dim_names(expected_sio, "'")
+
+    check_sio_for_insn_pair(
+        "insn_c", "insn_d", expected_lex_order_map, expected_sio)
+
+# }}}
+
 
 if __name__ == "__main__":
     if len(sys.argv) > 1:
-- 
GitLab


From 6f109f979f39a4ab2cc7839ea582b1457c538ac6 Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Mon, 20 Apr 2020 22:28:38 -0500
Subject: [PATCH 005/140] fixing flake8 issues

---
 test/test_linearization_checker.py | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/test/test_linearization_checker.py b/test/test_linearization_checker.py
index 5a05bdd8e..52145915d 100644
--- a/test/test_linearization_checker.py
+++ b/test/test_linearization_checker.py
@@ -435,8 +435,8 @@ def test_statement_instance_ordering_creation():
             )
 
         # Get two isl maps representing the LexSchedule
-        isl_sched_map_before, isl_sched_map_after = \
-             get_isl_maps_for_LexSchedule(lex_sched, knl, insn_id_before, insn_id_after)
+        isl_sched_map_before, isl_sched_map_after = get_isl_maps_for_LexSchedule(
+            lex_sched, knl, insn_id_before, insn_id_after)
 
         # get map representing lexicographic ordering
         sched_lex_order_map = lex_sched.get_lex_order_map_for_sched_space()
@@ -463,11 +463,11 @@ def test_statement_instance_ordering_creation():
 
     expected_lex_order_map = isl.Map(
         "{ "
-        "[l0, l1, l2, l3, l4] -> [l0_, l1_, l2_, l3_, l4_] : l0_ > l0; "
-        "[l0, l1, l2, l3, l4] -> [l0_= l0, l1_, l2_, l3_, l4_] : l1_ > l1; "
-        "[l0, l1, l2, l3, l4] -> [l0_= l0, l1_= l1, l2_, l3_, l4_] : l2_ > l2; "
-        "[l0, l1, l2, l3, l4] -> [l0_= l0, l1_= l1, l2_= l2, l3_, l4_] : l3_ > l3; "
-        "[l0, l1, l2, l3, l4] -> [l0_= l0, l1_= l1, l2_= l2, l3_= l3, l4_] : l4_ > l4 "
+        "[l0, l1, l2, l3, l4] -> [l0_, l1_, l2_, l3_, l4_]: l0_ > l0; "
+        "[l0, l1, l2, l3, l4] -> [l0_= l0, l1_, l2_, l3_, l4_]: l1_ > l1; "
+        "[l0, l1, l2, l3, l4] -> [l0_= l0, l1_= l1, l2_, l3_, l4_]: l2_ > l2; "
+        "[l0, l1, l2, l3, l4] -> [l0_= l0, l1_= l1, l2_= l2, l3_, l4_]: l3_ > l3; "
+        "[l0, l1, l2, l3, l4] -> [l0_= l0, l1_= l1, l2_= l2, l3_= l3, l4_]: l4_ > l4"
         "}"
         )
 
-- 
GitLab


From ae7f906a83159796f0ae21929f7dd8d08d518279 Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Tue, 21 Apr 2020 03:57:15 -0500
Subject: [PATCH 006/140] replace append_marker_to_in_dim_names() with more
 generic append_marker_to_isl_map_var_names() that allows dim specification

---
 .../checker/lexicographic_order_map.py        |  5 ++--
 loopy/schedule/checker/utils.py               | 29 ++++++++++++++-----
 test/test_linearization_checker.py            | 20 ++++++++-----
 3 files changed, 37 insertions(+), 17 deletions(-)

diff --git a/loopy/schedule/checker/lexicographic_order_map.py b/loopy/schedule/checker/lexicographic_order_map.py
index 61f191247..ddc320ed9 100644
--- a/loopy/schedule/checker/lexicographic_order_map.py
+++ b/loopy/schedule/checker/lexicographic_order_map.py
@@ -56,9 +56,10 @@ def get_statement_ordering_map(
         lex_map).apply_range(sched_map_after.reverse())
     # append marker to in names
     from loopy.schedule.checker.utils import (
-        append_marker_to_in_dim_names,
+        append_marker_to_isl_map_var_names,
     )
-    return append_marker_to_in_dim_names(sio, before_marker)
+    return append_marker_to_isl_map_var_names(
+        sio, isl.dim_type.in_, before_marker)
 
 
 def get_lex_order_constraint(islvars, before_names, after_names):
diff --git a/loopy/schedule/checker/utils.py b/loopy/schedule/checker/utils.py
index 96aa007c7..46c33ed3b 100644
--- a/loopy/schedule/checker/utils.py
+++ b/loopy/schedule/checker/utils.py
@@ -143,6 +143,27 @@ def align_isl_maps_by_var_names(input_map, target_map):
     return aligned_input_map
 
 
+def append_marker_to_isl_map_var_names(old_isl_map, dim_type, marker="'"):
+    """Return an isl_map with marker appended to
+        dim_type dimension names.
+
+    :arg old_isl_map: A :class:`islpy.Map`.
+
+    :arg dim_type: A :class:`islpy.dim_type`, i.e., an :class:`int`,
+        specifying the dimension to be marked.
+
+    :returns: A :class:`islpy.Map` matching `old_isl_map` with
+        apostrophes appended to dim_type dimension names.
+
+    """
+
+    new_map = old_isl_map.copy()
+    for i in range(len(old_isl_map.get_var_names(dim_type))):
+        new_map = new_map.set_dim_name(dim_type, i, old_isl_map.get_dim_name(
+            dim_type, i)+marker)
+    return new_map
+
+
 def append_marker_to_strings(strings, marker="'"):
     if not isinstance(strings, list):
         raise ValueError("append_marker_to_strings did not receive a list")
@@ -150,14 +171,6 @@ def append_marker_to_strings(strings, marker="'"):
         return [s+marker for s in strings]
 
 
-def append_marker_to_in_dim_names(islmap, marker="'"):
-    # append marker to in names
-    for i in range(islmap.dim(isl.dim_type.in_)):
-        islmap = islmap.set_dim_name(isl.dim_type.in_, i, islmap.get_dim_name(
-            isl.dim_type.in_, i)+marker)
-    return islmap
-
-
 def _union_of_isl_sets_or_maps(set_list):
     union = set_list[0]
     for s in set_list[1:]:
diff --git a/test/test_linearization_checker.py b/test/test_linearization_checker.py
index 52145915d..a15d48d1c 100644
--- a/test/test_linearization_checker.py
+++ b/test/test_linearization_checker.py
@@ -377,7 +377,7 @@ def test_statement_instance_ordering_creation():
     )
     from loopy.schedule.checker.utils import (
         align_isl_maps_by_var_names,
-        append_marker_to_in_dim_names,
+        append_marker_to_isl_map_var_names,
     )
     from loopy.schedule.checker.lexicographic_order_map import (
         get_statement_ordering_map,
@@ -482,7 +482,8 @@ def test_statement_instance_ordering_creation():
         "}"
         )
     # isl ignores these apostrophes, so explicitly add them
-    expected_sio = append_marker_to_in_dim_names(expected_sio, "'")
+    expected_sio = append_marker_to_isl_map_var_names(
+        expected_sio, isl.dim_type.in_, "'")
 
     check_sio_for_insn_pair(
         "insn_a", "insn_b", expected_lex_order_map, expected_sio)
@@ -498,7 +499,8 @@ def test_statement_instance_ordering_creation():
         "}"
         )
     # isl ignores these apostrophes, so explicitly add them
-    expected_sio = append_marker_to_in_dim_names(expected_sio, "'")
+    expected_sio = append_marker_to_isl_map_var_names(
+        expected_sio, isl.dim_type.in_, "'")
 
     check_sio_for_insn_pair(
         "insn_a", "insn_c", expected_lex_order_map, expected_sio)
@@ -512,7 +514,8 @@ def test_statement_instance_ordering_creation():
         "}"
         )
     # isl ignores these apostrophes, so explicitly add them
-    expected_sio = append_marker_to_in_dim_names(expected_sio, "'")
+    expected_sio = append_marker_to_isl_map_var_names(
+        expected_sio, isl.dim_type.in_, "'")
 
     check_sio_for_insn_pair(
         "insn_a", "insn_d", expected_lex_order_map, expected_sio)
@@ -530,7 +533,8 @@ def test_statement_instance_ordering_creation():
         "}"
         )
     # isl ignores these apostrophes, so explicitly add them
-    expected_sio = append_marker_to_in_dim_names(expected_sio, "'")
+    expected_sio = append_marker_to_isl_map_var_names(
+        expected_sio, isl.dim_type.in_, "'")
 
     check_sio_for_insn_pair(
         "insn_b", "insn_c", expected_lex_order_map, expected_sio)
@@ -544,7 +548,8 @@ def test_statement_instance_ordering_creation():
         "}"
         )
     # isl ignores these apostrophes, so explicitly add them
-    expected_sio = append_marker_to_in_dim_names(expected_sio, "'")
+    expected_sio = append_marker_to_isl_map_var_names(
+        expected_sio, isl.dim_type.in_, "'")
 
     check_sio_for_insn_pair(
         "insn_b", "insn_d", expected_lex_order_map, expected_sio)
@@ -558,7 +563,8 @@ def test_statement_instance_ordering_creation():
         "}"
         )
     # isl ignores these apostrophes, so explicitly add them
-    expected_sio = append_marker_to_in_dim_names(expected_sio, "'")
+    expected_sio = append_marker_to_isl_map_var_names(
+        expected_sio, isl.dim_type.in_, "'")
 
     check_sio_for_insn_pair(
         "insn_c", "insn_d", expected_lex_order_map, expected_sio)
-- 
GitLab


From 2556e7590f6724b1a49c8370925dc9701aab6097 Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Mon, 27 Apr 2020 18:16:23 -0500
Subject: [PATCH 007/140] remove extra args from get_isl_maps_for_LexSchedule()

---
 test/test_linearization_checker.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/test_linearization_checker.py b/test/test_linearization_checker.py
index c6f8d56dc..f51b050ac 100644
--- a/test/test_linearization_checker.py
+++ b/test/test_linearization_checker.py
@@ -436,7 +436,7 @@ def test_statement_instance_ordering_creation():
 
         # Get two isl maps representing the LexSchedule
         isl_sched_map_before, isl_sched_map_after = get_isl_maps_for_LexSchedule(
-            lex_sched, knl, insn_id_before, insn_id_after)
+            lex_sched, knl)
 
         # get map representing lexicographic ordering
         sched_lex_order_map = lex_sched.get_lex_order_map_for_sched_space()
-- 
GitLab


From f38f3027c1b575c6cbce1849b80a37292accbb85 Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Tue, 12 May 2020 00:47:46 -0500
Subject: [PATCH 008/140] add new reserved prefix to map vars

---
 test/test_linearization_checker.py | 55 +++++++++++++++++++-----------
 1 file changed, 36 insertions(+), 19 deletions(-)

diff --git a/test/test_linearization_checker.py b/test/test_linearization_checker.py
index 9ce2f981e..1e5457b94 100644
--- a/test/test_linearization_checker.py
+++ b/test/test_linearization_checker.py
@@ -485,23 +485,40 @@ def test_statement_instance_ordering_creation():
 
         assert sio_aligned == expected_sio
 
-    expected_lex_order_map = isl.Map(
-        "{ "
-        "[l0, l1, l2, l3, l4] -> [l0_, l1_, l2_, l3_, l4_]: l0_ > l0; "
-        "[l0, l1, l2, l3, l4] -> [l0_= l0, l1_, l2_, l3_, l4_]: l1_ > l1; "
-        "[l0, l1, l2, l3, l4] -> [l0_= l0, l1_= l1, l2_, l3_, l4_]: l2_ > l2; "
-        "[l0, l1, l2, l3, l4] -> [l0_= l0, l1_= l1, l2_= l2, l3_, l4_]: l3_ > l3; "
-        "[l0, l1, l2, l3, l4] -> [l0_= l0, l1_= l1, l2_= l2, l3_= l3, l4_]: l4_ > l4"
-        "}"
-        )
+    expected_lex_order_map = isl.Map("{ "
+        "[_lp_sched_l0, _lp_sched_l1, _lp_sched_l2, _lp_sched_l3, _lp_sched_l4] -> "
+        "[_lp_sched_l0_, _lp_sched_l1_, _lp_sched_l2_, _lp_sched_l3_, _lp_sched_l4_]"
+        ":"
+        "("
+        "_lp_sched_l0_ > _lp_sched_l0 "
+        ") or ("
+        "_lp_sched_l0_= _lp_sched_l0 and "
+        "_lp_sched_l1_ > _lp_sched_l1 "
+        ") or ("
+        "_lp_sched_l0_= _lp_sched_l0 and "
+        "_lp_sched_l1_= _lp_sched_l1 and "
+        "_lp_sched_l2_ > _lp_sched_l2 "
+        ") or ("
+        "_lp_sched_l0_= _lp_sched_l0 and "
+        "_lp_sched_l1_= _lp_sched_l1 and "
+        "_lp_sched_l2_= _lp_sched_l2 and "
+        "_lp_sched_l3_ > _lp_sched_l3 "
+        ") or ("
+        "_lp_sched_l0_= _lp_sched_l0 and "
+        "_lp_sched_l1_= _lp_sched_l1 and "
+        "_lp_sched_l2_= _lp_sched_l2 and "
+        "_lp_sched_l3_= _lp_sched_l3 and "
+        "_lp_sched_l4_ > _lp_sched_l4"
+        ")"
+        "}")
 
     # Relationship between insn_a and insn_b ---------------------------------------
 
     expected_sio = isl.Map(
         "[pi, pj, pk] -> { "
-        "[statement' = 0, i', k'] -> [statement = 1, i, j] : "
+        "[_lp_sched_statement'=0, i', k'] -> [_lp_sched_statement=1, i, j]:"
         "0 <= i' < pi and 0 <= k' < pk and 0 <= j < pj and 0 <= i < pi and i > i'; "
-        "[statement' = 0, i', k'] -> [statement = 1, i = i', j] : "
+        "[_lp_sched_statement'=0, i', k'] -> [_lp_sched_statement=1, i=i', j]:"
         "0 <= i' < pi and 0 <= k' < pk and 0 <= j < pj "
         "}"
         )
@@ -516,9 +533,9 @@ def test_statement_instance_ordering_creation():
 
     expected_sio = isl.Map(
         "[pi, pj, pk] -> { "
-        "[statement' = 0, i', k'] -> [statement = 1, i, j] : "
+        "[_lp_sched_statement'=0, i', k'] -> [_lp_sched_statement=1, i, j]:"
         "0 <= i' < pi and 0 <= k' < pk and 0 <= j < pj and 0 <= i < pi and i > i'; "
-        "[statement' = 0, i', k'] -> [statement = 1, i = i', j] : "
+        "[_lp_sched_statement'=0, i', k'] -> [_lp_sched_statement=1, i=i', j]:"
         "0 <= i' < pi and 0 <= k' < pk and 0 <= j < pj "
         "}"
         )
@@ -533,7 +550,7 @@ def test_statement_instance_ordering_creation():
 
     expected_sio = isl.Map(
         "[pt, pi, pk] -> { "
-        "[statement' = 0, i', k'] -> [statement = 1, t] : "
+        "[_lp_sched_statement'=0, i', k'] -> [_lp_sched_statement=1, t]:"
         "0 <= i' < pi and 0 <= k' < pk and 0 <= t < pt "
         "}"
         )
@@ -548,11 +565,11 @@ def test_statement_instance_ordering_creation():
 
     expected_sio = isl.Map(
         "[pi, pj] -> { "
-        "[statement' = 0, i', j'] -> [statement = 1, i, j] : "
+        "[_lp_sched_statement'=0, i', j'] -> [_lp_sched_statement=1, i, j]:"
         "0 <= i' < pi and 0 <= j' < pj and i > i' and 0 <= i < pi and 0 <= j < pj; "
-        "[statement' = 0, i', j'] -> [statement = 1, i = i', j] : "
+        "[_lp_sched_statement'=0, i', j'] -> [_lp_sched_statement=1, i=i', j]:"
         "0 <= i' < pi and 0 <= j' < pj and j > j' and 0 <= j < pj; "
-        "[statement' = 0, i', j'] -> [statement = 1, i = i', j = j'] : "
+        "[_lp_sched_statement'=0, i', j'] -> [_lp_sched_statement=1, i=i', j=j']:"
         "0 <= i' < pi and 0 <= j' < pj "
         "}"
         )
@@ -567,7 +584,7 @@ def test_statement_instance_ordering_creation():
 
     expected_sio = isl.Map(
         "[pt, pi, pj] -> { "
-        "[statement' = 0, i', j'] -> [statement = 1, t] : "
+        "[_lp_sched_statement'=0, i', j'] -> [_lp_sched_statement=1, t]:"
         "0 <= i' < pi and 0 <= j' < pj and 0 <= t < pt "
         "}"
         )
@@ -582,7 +599,7 @@ def test_statement_instance_ordering_creation():
 
     expected_sio = isl.Map(
         "[pt, pi, pj] -> { "
-        "[statement' = 0, i', j'] -> [statement = 1, t] : "
+        "[_lp_sched_statement'=0, i', j'] -> [_lp_sched_statement=1, t]:"
         "0 <= i' < pi and 0 <= j' < pj and 0 <= t < pt "
         "}"
         )
-- 
GitLab


From d4506a0ef3d0f8bf3adf3efbe231f4be6d1cbc09 Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Tue, 12 May 2020 01:08:24 -0500
Subject: [PATCH 009/140] =?UTF-8?q?use=20composition=20symbol=20=E2=97=A6?=
 =?UTF-8?q?=20in=20docstring=20for=20get=5Fstatement=5Fordering=5Fmap?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 loopy/schedule/checker/lexicographic_order_map.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/loopy/schedule/checker/lexicographic_order_map.py b/loopy/schedule/checker/lexicographic_order_map.py
index ddc320ed9..f42e8e610 100644
--- a/loopy/schedule/checker/lexicographic_order_map.py
+++ b/loopy/schedule/checker/lexicographic_order_map.py
@@ -46,7 +46,7 @@ def get_statement_ordering_map(
 
     :returns: An :class:`islpy.Map` representing the lex schedule as
         a mapping from each statement instance to all statement instances
-        occuring later. I.e., we compose B -> L -> A^-1, where B
+        occuring later. I.e., we compose B ◦ L ◦ A^-1, where B
         is sched_map_before, A is sched_map_after, and L is the
         lexicographic ordering map.
 
-- 
GitLab


From 1568d79dd0d36a33e77efb6ad94d997e6fa2e217 Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Tue, 12 May 2020 01:12:18 -0500
Subject: [PATCH 010/140] in docstring for get_statement_ordering_map(),
 clarify that we are composing relations

---
 loopy/schedule/checker/lexicographic_order_map.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/loopy/schedule/checker/lexicographic_order_map.py b/loopy/schedule/checker/lexicographic_order_map.py
index f42e8e610..ce8808119 100644
--- a/loopy/schedule/checker/lexicographic_order_map.py
+++ b/loopy/schedule/checker/lexicographic_order_map.py
@@ -46,9 +46,9 @@ def get_statement_ordering_map(
 
     :returns: An :class:`islpy.Map` representing the lex schedule as
         a mapping from each statement instance to all statement instances
-        occuring later. I.e., we compose B ◦ L ◦ A^-1, where B
-        is sched_map_before, A is sched_map_after, and L is the
-        lexicographic ordering map.
+        occuring later. I.e., we compose relations B, L, and A as
+        B ◦ L ◦ A^-1, where B is sched_map_before, A is sched_map_after,
+        and L is the lexicographic ordering map.
 
     """
 
-- 
GitLab


From a2c007b2f6908d72ccbd1c125347ee1e0f5e1c7a Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Tue, 19 May 2020 00:04:56 -0500
Subject: [PATCH 011/140] try a slightlyl different function composition symbol
 (to address 'Non-ASCII character' syntax error)

---
 loopy/schedule/checker/lexicographic_order_map.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/loopy/schedule/checker/lexicographic_order_map.py b/loopy/schedule/checker/lexicographic_order_map.py
index ce8808119..9807d293f 100644
--- a/loopy/schedule/checker/lexicographic_order_map.py
+++ b/loopy/schedule/checker/lexicographic_order_map.py
@@ -47,7 +47,7 @@ def get_statement_ordering_map(
     :returns: An :class:`islpy.Map` representing the lex schedule as
         a mapping from each statement instance to all statement instances
         occuring later. I.e., we compose relations B, L, and A as
-        B ◦ L ◦ A^-1, where B is sched_map_before, A is sched_map_after,
+        B ∘ L ∘ A^-1, where B is sched_map_before, A is sched_map_after,
         and L is the lexicographic ordering map.
 
     """
-- 
GitLab


From 11f8edd708ada13db5f81aa6b2d87638978155ca Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Tue, 19 May 2020 00:11:31 -0500
Subject: [PATCH 012/140] add 'coding: utf-8' at top of file to allow
 composition character

---
 loopy/schedule/checker/lexicographic_order_map.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/loopy/schedule/checker/lexicographic_order_map.py b/loopy/schedule/checker/lexicographic_order_map.py
index 9807d293f..5ce2bb4a5 100644
--- a/loopy/schedule/checker/lexicographic_order_map.py
+++ b/loopy/schedule/checker/lexicographic_order_map.py
@@ -1,3 +1,4 @@
+# coding: utf-8
 __copyright__ = "Copyright (C) 2019 James Stevens"
 
 __license__ = """
-- 
GitLab


From db5fefe4c803947855484b96ce3132a3dc0a4a45 Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Tue, 19 May 2020 01:57:43 -0500
Subject: [PATCH 013/140] improve time complexity of get_lex_order_constraint()

---
 .../checker/lexicographic_order_map.py        | 30 +++++++++++++++----
 1 file changed, 24 insertions(+), 6 deletions(-)

diff --git a/loopy/schedule/checker/lexicographic_order_map.py b/loopy/schedule/checker/lexicographic_order_map.py
index 5ce2bb4a5..d783bac76 100644
--- a/loopy/schedule/checker/lexicographic_order_map.py
+++ b/loopy/schedule/checker/lexicographic_order_map.py
@@ -92,14 +92,32 @@ def get_lex_order_constraint(islvars, before_names, after_names):
 
     """
 
+    # Initialize constraint with i0' < i0
     lex_order_constraint = islvars[before_names[0]].lt_set(islvars[after_names[0]])
+
+    # Initialize conjunction constraint with True.
+    # For each dim d, starting with d=1, this conjunction will have d equalities,
+    # e.g., (i0' = i0 and i1' = i1 and ... i(d-1)' = i(d-1))
+    equality_constraint_conj = islvars[0].eq_set(islvars[0])
+
     for i in range(1, len(before_names)):
-        lex_order_constraint_conj = islvars[before_names[i]].lt_set(
-            islvars[after_names[i]])
-        for j in range(i):
-            lex_order_constraint_conj = lex_order_constraint_conj & \
-                islvars[before_names[j]].eq_set(islvars[after_names[j]])
-        lex_order_constraint = lex_order_constraint | lex_order_constraint_conj
+
+        # Add the next equality constraint to equality_constraint_conj
+        equality_constraint_conj = equality_constraint_conj & \
+            islvars[before_names[i-1]].eq_set(islvars[after_names[i-1]])
+
+        # Create a conjunction constraint by combining a less-than
+        # constraint for this dim, e.g., (i1' < i1), with the current
+        # equality constraint conjunction.
+        # For each dim d, starting with d=1, this conjunction will have d equalities,
+        # and one inequality,
+        # e.g., (i0' = i0 and i1' = i1 and ... i(d-1)' = i(d-1) and id' < id)
+        full_conj_constraint = islvars[before_names[i]].lt_set(
+            islvars[after_names[i]]) & equality_constraint_conj
+
+        # Union this new constraint with the current lex_order_constraint
+        lex_order_constraint = lex_order_constraint | full_conj_constraint
+
     return lex_order_constraint
 
 
-- 
GitLab


From 97e90820c5c232b845bf5063bfe2a71bd3bee01b Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Tue, 19 May 2020 02:22:12 -0500
Subject: [PATCH 014/140] have create_lex_order_map() put apostrophes on
 'before' vars for consistency with other logic

---
 .../checker/lexicographic_order_map.py        |  6 +--
 loopy/schedule/checker/schedule.py            |  2 +-
 test/test_linearization_checker.py            | 40 +++++++++++--------
 3 files changed, 27 insertions(+), 21 deletions(-)

diff --git a/loopy/schedule/checker/lexicographic_order_map.py b/loopy/schedule/checker/lexicographic_order_map.py
index d783bac76..17b6616ca 100644
--- a/loopy/schedule/checker/lexicographic_order_map.py
+++ b/loopy/schedule/checker/lexicographic_order_map.py
@@ -153,13 +153,13 @@ def create_lex_order_map(
 
     """
 
-    if before_names is None:
-        before_names = ["i%s" % (i) for i in range(n_dims)]
     if after_names is None:
+        after_names = ["i%s" % (i) for i in range(n_dims)]
+    if before_names is None:
         from loopy.schedule.checker.utils import (
             append_marker_to_strings,
         )
-        after_names = append_marker_to_strings(before_names, marker="_")
+        before_names = append_marker_to_strings(after_names, marker="'")
 
     assert len(before_names) == len(after_names) == n_dims
     dim_type = isl.dim_type
diff --git a/loopy/schedule/checker/schedule.py b/loopy/schedule/checker/schedule.py
index ea0829199..a87723480 100644
--- a/loopy/schedule/checker/schedule.py
+++ b/loopy/schedule/checker/schedule.py
@@ -409,7 +409,7 @@ class LexSchedule(object):
         )
         n_dims = self.max_lex_dims()
         return create_lex_order_map(
-            n_dims, before_names=self.get_lex_var_names())
+            n_dims, after_names=self.get_lex_var_names())
 
     def __str__(self):
 
diff --git a/test/test_linearization_checker.py b/test/test_linearization_checker.py
index 1e5457b94..e57df9ac8 100644
--- a/test/test_linearization_checker.py
+++ b/test/test_linearization_checker.py
@@ -486,32 +486,38 @@ def test_statement_instance_ordering_creation():
         assert sio_aligned == expected_sio
 
     expected_lex_order_map = isl.Map("{ "
-        "[_lp_sched_l0, _lp_sched_l1, _lp_sched_l2, _lp_sched_l3, _lp_sched_l4] -> "
-        "[_lp_sched_l0_, _lp_sched_l1_, _lp_sched_l2_, _lp_sched_l3_, _lp_sched_l4_]"
+        "[_lp_sched_l0', _lp_sched_l1', _lp_sched_l2', _lp_sched_l3', _lp_sched_l4']"
+        " -> [_lp_sched_l0, _lp_sched_l1, _lp_sched_l2, _lp_sched_l3, _lp_sched_l4]"
         ":"
         "("
-        "_lp_sched_l0_ > _lp_sched_l0 "
+        "_lp_sched_l0' < _lp_sched_l0 "
         ") or ("
-        "_lp_sched_l0_= _lp_sched_l0 and "
-        "_lp_sched_l1_ > _lp_sched_l1 "
+        "_lp_sched_l0'= _lp_sched_l0 and "
+        "_lp_sched_l1' < _lp_sched_l1 "
         ") or ("
-        "_lp_sched_l0_= _lp_sched_l0 and "
-        "_lp_sched_l1_= _lp_sched_l1 and "
-        "_lp_sched_l2_ > _lp_sched_l2 "
+        "_lp_sched_l0'= _lp_sched_l0 and "
+        "_lp_sched_l1'= _lp_sched_l1 and "
+        "_lp_sched_l2' < _lp_sched_l2 "
         ") or ("
-        "_lp_sched_l0_= _lp_sched_l0 and "
-        "_lp_sched_l1_= _lp_sched_l1 and "
-        "_lp_sched_l2_= _lp_sched_l2 and "
-        "_lp_sched_l3_ > _lp_sched_l3 "
+        "_lp_sched_l0'= _lp_sched_l0 and "
+        "_lp_sched_l1'= _lp_sched_l1 and "
+        "_lp_sched_l2'= _lp_sched_l2 and "
+        "_lp_sched_l3' < _lp_sched_l3 "
         ") or ("
-        "_lp_sched_l0_= _lp_sched_l0 and "
-        "_lp_sched_l1_= _lp_sched_l1 and "
-        "_lp_sched_l2_= _lp_sched_l2 and "
-        "_lp_sched_l3_= _lp_sched_l3 and "
-        "_lp_sched_l4_ > _lp_sched_l4"
+        "_lp_sched_l0'= _lp_sched_l0 and "
+        "_lp_sched_l1'= _lp_sched_l1 and "
+        "_lp_sched_l2'= _lp_sched_l2 and "
+        "_lp_sched_l3'= _lp_sched_l3 and "
+        "_lp_sched_l4' < _lp_sched_l4"
         ")"
         "}")
 
+    # Isl ignores these apostrophes, but test would still pass since it ignores
+    # variable names when checking for equality. Even so, explicitly add apostrophes
+    # for sanity.
+    expected_lex_order_map = append_marker_to_isl_map_var_names(
+        expected_lex_order_map, isl.dim_type.in_, "'")
+
     # Relationship between insn_a and insn_b ---------------------------------------
 
     expected_sio = isl.Map(
-- 
GitLab


From 3b5d4caa5a5f1e272172370f949bcd19a54d9b0a Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Tue, 26 May 2020 10:27:36 -0500
Subject: [PATCH 015/140] rename
 LexScheduleStatement->PairwiseScheduleStatement,
 get_isl_maps_for_LexSchedule->get_isl_maps_from_PairwiseScheduleBuilder,
 LexSchedule->PairwiseScheduleBuilder; also rename other variables for
 consistency

---
 test/test_linearization_checker.py | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/test/test_linearization_checker.py b/test/test_linearization_checker.py
index df40c1dd5..255d2b0a6 100644
--- a/test/test_linearization_checker.py
+++ b/test/test_linearization_checker.py
@@ -46,9 +46,9 @@ else:
     faulthandler.enable()
 
 
-# {{{ test LexSchedule and isl map creation
+# {{{ test PairwiseScheduleBuilder and isl map creation
 
-def test_lexschedule_and_islmap_creation():
+def test_pairwise_schedule_and_islmap_creation():
     import islpy as isl
     from loopy.schedule.checker import (
         get_schedule_for_statement_pair,
@@ -397,7 +397,7 @@ def test_statement_instance_ordering_creation():
     import islpy as isl
     from loopy.schedule.checker import (
         get_schedule_for_statement_pair,
-        get_isl_maps_for_LexSchedule,
+        get_isl_maps_from_PairwiseScheduleBuilder,
     )
     from loopy.schedule.checker.utils import (
         align_isl_maps_by_var_names,
@@ -451,19 +451,19 @@ def test_statement_instance_ordering_creation():
             expected_sio,
             ):
 
-        lex_sched = get_schedule_for_statement_pair(
+        sched_builder = get_schedule_for_statement_pair(
             knl,
             linearization_items,
             insn_id_before,
             insn_id_after,
             )
 
-        # Get two isl maps representing the LexSchedule
-        isl_sched_map_before, isl_sched_map_after = get_isl_maps_for_LexSchedule(
-            lex_sched, knl)
+        # Get two isl maps from the PairwiseScheduleBuilder
+        isl_sched_map_before, isl_sched_map_after = \
+            get_isl_maps_from_PairwiseScheduleBuilder(sched_builder, knl)
 
         # get map representing lexicographic ordering
-        sched_lex_order_map = lex_sched.get_lex_order_map_for_sched_space()
+        sched_lex_order_map = sched_builder.get_lex_order_map_for_sched_space()
 
         assert sched_lex_order_map == expected_lex_order_map
 
-- 
GitLab


From ba46ade4f5b002e72451d593162cac22cfa10553 Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Mon, 1 Jun 2020 22:30:23 -0500
Subject: [PATCH 016/140] update identifier prefix for loopy.schedule.checker
 from _lp_sched_->_lp_linchk_

---
 test/test_linearization_checker.py | 57 ++++++++++++++++--------------
 1 file changed, 30 insertions(+), 27 deletions(-)

diff --git a/test/test_linearization_checker.py b/test/test_linearization_checker.py
index 6841072ff..01e28f24a 100644
--- a/test/test_linearization_checker.py
+++ b/test/test_linearization_checker.py
@@ -486,29 +486,32 @@ def test_statement_instance_ordering_creation():
         assert sio_aligned == expected_sio
 
     expected_lex_order_map = isl.Map("{ "
-        "[_lp_sched_l0', _lp_sched_l1', _lp_sched_l2', _lp_sched_l3', _lp_sched_l4']"
-        " -> [_lp_sched_l0, _lp_sched_l1, _lp_sched_l2, _lp_sched_l3, _lp_sched_l4]"
+        "[_lp_linchk_l0', _lp_linchk_l1', _lp_linchk_l2', _lp_linchk_l3', "
+        "_lp_linchk_l4']"
+        " -> "
+        "[_lp_linchk_l0, _lp_linchk_l1, _lp_linchk_l2, _lp_linchk_l3, "
+        "_lp_linchk_l4]"
         ":"
         "("
-        "_lp_sched_l0' < _lp_sched_l0 "
+        "_lp_linchk_l0' < _lp_linchk_l0 "
         ") or ("
-        "_lp_sched_l0'= _lp_sched_l0 and "
-        "_lp_sched_l1' < _lp_sched_l1 "
+        "_lp_linchk_l0'= _lp_linchk_l0 and "
+        "_lp_linchk_l1' < _lp_linchk_l1 "
         ") or ("
-        "_lp_sched_l0'= _lp_sched_l0 and "
-        "_lp_sched_l1'= _lp_sched_l1 and "
-        "_lp_sched_l2' < _lp_sched_l2 "
+        "_lp_linchk_l0'= _lp_linchk_l0 and "
+        "_lp_linchk_l1'= _lp_linchk_l1 and "
+        "_lp_linchk_l2' < _lp_linchk_l2 "
         ") or ("
-        "_lp_sched_l0'= _lp_sched_l0 and "
-        "_lp_sched_l1'= _lp_sched_l1 and "
-        "_lp_sched_l2'= _lp_sched_l2 and "
-        "_lp_sched_l3' < _lp_sched_l3 "
+        "_lp_linchk_l0'= _lp_linchk_l0 and "
+        "_lp_linchk_l1'= _lp_linchk_l1 and "
+        "_lp_linchk_l2'= _lp_linchk_l2 and "
+        "_lp_linchk_l3' < _lp_linchk_l3 "
         ") or ("
-        "_lp_sched_l0'= _lp_sched_l0 and "
-        "_lp_sched_l1'= _lp_sched_l1 and "
-        "_lp_sched_l2'= _lp_sched_l2 and "
-        "_lp_sched_l3'= _lp_sched_l3 and "
-        "_lp_sched_l4' < _lp_sched_l4"
+        "_lp_linchk_l0'= _lp_linchk_l0 and "
+        "_lp_linchk_l1'= _lp_linchk_l1 and "
+        "_lp_linchk_l2'= _lp_linchk_l2 and "
+        "_lp_linchk_l3'= _lp_linchk_l3 and "
+        "_lp_linchk_l4' < _lp_linchk_l4"
         ")"
         "}")
 
@@ -522,9 +525,9 @@ def test_statement_instance_ordering_creation():
 
     expected_sio = isl.Map(
         "[pi, pj, pk] -> { "
-        "[_lp_sched_statement'=0, i', k'] -> [_lp_sched_statement=1, i, j]:"
+        "[_lp_linchk_statement'=0, i', k'] -> [_lp_linchk_statement=1, i, j]:"
         "0 <= i' < pi and 0 <= k' < pk and 0 <= j < pj and 0 <= i < pi and i > i'; "
-        "[_lp_sched_statement'=0, i', k'] -> [_lp_sched_statement=1, i=i', j]:"
+        "[_lp_linchk_statement'=0, i', k'] -> [_lp_linchk_statement=1, i=i', j]:"
         "0 <= i' < pi and 0 <= k' < pk and 0 <= j < pj "
         "}"
         )
@@ -539,9 +542,9 @@ def test_statement_instance_ordering_creation():
 
     expected_sio = isl.Map(
         "[pi, pj, pk] -> { "
-        "[_lp_sched_statement'=0, i', k'] -> [_lp_sched_statement=1, i, j]:"
+        "[_lp_linchk_statement'=0, i', k'] -> [_lp_linchk_statement=1, i, j]:"
         "0 <= i' < pi and 0 <= k' < pk and 0 <= j < pj and 0 <= i < pi and i > i'; "
-        "[_lp_sched_statement'=0, i', k'] -> [_lp_sched_statement=1, i=i', j]:"
+        "[_lp_linchk_statement'=0, i', k'] -> [_lp_linchk_statement=1, i=i', j]:"
         "0 <= i' < pi and 0 <= k' < pk and 0 <= j < pj "
         "}"
         )
@@ -556,7 +559,7 @@ def test_statement_instance_ordering_creation():
 
     expected_sio = isl.Map(
         "[pt, pi, pk] -> { "
-        "[_lp_sched_statement'=0, i', k'] -> [_lp_sched_statement=1, t]:"
+        "[_lp_linchk_statement'=0, i', k'] -> [_lp_linchk_statement=1, t]:"
         "0 <= i' < pi and 0 <= k' < pk and 0 <= t < pt "
         "}"
         )
@@ -571,11 +574,11 @@ def test_statement_instance_ordering_creation():
 
     expected_sio = isl.Map(
         "[pi, pj] -> { "
-        "[_lp_sched_statement'=0, i', j'] -> [_lp_sched_statement=1, i, j]:"
+        "[_lp_linchk_statement'=0, i', j'] -> [_lp_linchk_statement=1, i, j]:"
         "0 <= i' < pi and 0 <= j' < pj and i > i' and 0 <= i < pi and 0 <= j < pj; "
-        "[_lp_sched_statement'=0, i', j'] -> [_lp_sched_statement=1, i=i', j]:"
+        "[_lp_linchk_statement'=0, i', j'] -> [_lp_linchk_statement=1, i=i', j]:"
         "0 <= i' < pi and 0 <= j' < pj and j > j' and 0 <= j < pj; "
-        "[_lp_sched_statement'=0, i', j'] -> [_lp_sched_statement=1, i=i', j=j']:"
+        "[_lp_linchk_statement'=0, i', j'] -> [_lp_linchk_statement=1, i=i', j=j']:"
         "0 <= i' < pi and 0 <= j' < pj "
         "}"
         )
@@ -590,7 +593,7 @@ def test_statement_instance_ordering_creation():
 
     expected_sio = isl.Map(
         "[pt, pi, pj] -> { "
-        "[_lp_sched_statement'=0, i', j'] -> [_lp_sched_statement=1, t]:"
+        "[_lp_linchk_statement'=0, i', j'] -> [_lp_linchk_statement=1, t]:"
         "0 <= i' < pi and 0 <= j' < pj and 0 <= t < pt "
         "}"
         )
@@ -605,7 +608,7 @@ def test_statement_instance_ordering_creation():
 
     expected_sio = isl.Map(
         "[pt, pi, pj] -> { "
-        "[_lp_sched_statement'=0, i', j'] -> [_lp_sched_statement=1, t]:"
+        "[_lp_linchk_statement'=0, i', j'] -> [_lp_linchk_statement=1, t]:"
         "0 <= i' < pi and 0 <= j' < pj and 0 <= t < pt "
         "}"
         )
-- 
GitLab


From a4c97513effa690b7c3a66f67caf54ed565490ad Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Tue, 2 Jun 2020 03:30:13 -0500
Subject: [PATCH 017/140] don't require islvars be passed to
 get_lex_order_constraint(); islvars default: create islvars from
 before_names+after_names

---
 .../checker/lexicographic_order_map.py        | 27 ++++++++++---------
 1 file changed, 14 insertions(+), 13 deletions(-)

diff --git a/loopy/schedule/checker/lexicographic_order_map.py b/loopy/schedule/checker/lexicographic_order_map.py
index 17b6616ca..b547e1d94 100644
--- a/loopy/schedule/checker/lexicographic_order_map.py
+++ b/loopy/schedule/checker/lexicographic_order_map.py
@@ -63,17 +63,11 @@ def get_statement_ordering_map(
         sio, isl.dim_type.in_, before_marker)
 
 
-def get_lex_order_constraint(islvars, before_names, after_names):
+def get_lex_order_constraint(before_names, after_names, islvars=None):
     """Return a constraint represented as an :class:`islpy.Set`
         defining a 'happens before' relationship in a lexicographic
         ordering.
 
-    :arg islvars: A dictionary from variable names to :class:`islpy.PwAff`
-        instances that represent each of the variables
-        (islvars may be produced by `islpy.make_zero_and_vars`). The key
-        '0' is also include and represents a :class:`islpy.PwAff` zero constant.
-        This dictionary defines the space to be used for the set.
-
     :arg before_names: A list of :class:`str` variable names representing
         the lexicographic space dimensions for a point in lexicographic
         time that occurs before. (see example below)
@@ -82,6 +76,14 @@ def get_lex_order_constraint(islvars, before_names, after_names):
         the lexicographic space dimensions for a point in lexicographic
         time that occurs after. (see example below)
 
+    :arg islvars: A dictionary from variable names to :class:`islpy.PwAff`
+        instances that represent each of the variables
+        (islvars may be produced by `islpy.make_zero_and_vars`). The key
+        '0' is also include and represents a :class:`islpy.PwAff` zero constant.
+        This dictionary defines the space to be used for the set. If no
+        value is passed, the dictionary will be made using ``before_names``
+        and ``after_names``.
+
     :returns: An :class:`islpy.Set` representing a constraint that enforces a
         lexicographic ordering. E.g., if ``before_names = [i0', i1', i2']`` and
         ``after_names = [i0, i1, i2]``, return the set::
@@ -92,6 +94,10 @@ def get_lex_order_constraint(islvars, before_names, after_names):
 
     """
 
+    # If no islvars passed, make them using the names provided
+    if islvars is None:
+        islvars = isl.make_zero_and_vars(before_names+after_names, [])
+
     # Initialize constraint with i0' < i0
     lex_order_constraint = islvars[before_names[0]].lt_set(islvars[after_names[0]])
 
@@ -164,12 +170,7 @@ def create_lex_order_map(
     assert len(before_names) == len(after_names) == n_dims
     dim_type = isl.dim_type
 
-    islvars = isl.make_zero_and_vars(
-            before_names+after_names,
-            [])
-
-    lex_order_constraint = get_lex_order_constraint(
-        islvars, before_names, after_names)
+    lex_order_constraint = get_lex_order_constraint(before_names, after_names)
 
     lex_map = isl.Map.from_domain(lex_order_constraint)
     lex_map = lex_map.move_dims(
-- 
GitLab


From ed8c8fa252fc895c3e7ce254111227d981d1b94c Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Tue, 2 Jun 2020 04:16:23 -0500
Subject: [PATCH 018/140] delete stray print statements in
 test_statement_instance_ordering_creation()

---
 test/test_linearization_checker.py | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/test/test_linearization_checker.py b/test/test_linearization_checker.py
index 01e28f24a..58884b443 100644
--- a/test/test_linearization_checker.py
+++ b/test/test_linearization_checker.py
@@ -475,14 +475,8 @@ def test_statement_instance_ordering_creation():
             sched_lex_order_map,
             )
 
-        print(sio)
-        print(expected_sio)
-
         sio_aligned = align_isl_maps_by_var_names(sio, expected_sio)
 
-        print(sio_aligned)
-        print(expected_sio)
-
         assert sio_aligned == expected_sio
 
     expected_lex_order_map = isl.Map("{ "
-- 
GitLab


From d345c21fc0b6cc4c6c4de3b403c1565f4f35ec17 Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Sun, 7 Jun 2020 16:00:32 -0500
Subject: [PATCH 019/140] update basedon func change:
 get_isl_maps_from_PairwiseScheduleBuilder(sched_builder,
 knl)->sched_builder.build_maps(knl)

---
 test/test_linearization_checker.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/test/test_linearization_checker.py b/test/test_linearization_checker.py
index 9511da729..15d022144 100644
--- a/test/test_linearization_checker.py
+++ b/test/test_linearization_checker.py
@@ -390,7 +390,6 @@ def test_statement_instance_ordering_creation():
     import islpy as isl
     from loopy.schedule.checker import (
         get_schedule_for_statement_pair,
-        get_isl_maps_from_PairwiseScheduleBuilder,
     )
     from loopy.schedule.checker.utils import (
         align_isl_maps_by_var_names,
@@ -452,8 +451,7 @@ def test_statement_instance_ordering_creation():
             )
 
         # Get two isl maps from the PairwiseScheduleBuilder
-        isl_sched_map_before, isl_sched_map_after = \
-            get_isl_maps_from_PairwiseScheduleBuilder(sched_builder, knl)
+        isl_sched_map_before, isl_sched_map_after = sched_builder.build_maps(knl)
 
         # get map representing lexicographic ordering
         sched_lex_order_map = sched_builder.get_lex_order_map_for_sched_space()
-- 
GitLab


From 7c2309ab23db59413b5fb3dbdf3cb58325087941 Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Mon, 8 Jun 2020 14:42:59 -0500
Subject: [PATCH 020/140] rename local vars isl_sched_map_*->sched_map_*

---
 test/test_linearization_checker.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/test/test_linearization_checker.py b/test/test_linearization_checker.py
index 3745564d2..5f7329ba1 100644
--- a/test/test_linearization_checker.py
+++ b/test/test_linearization_checker.py
@@ -451,7 +451,7 @@ def test_statement_instance_ordering_creation():
             )
 
         # Get two isl maps from the PairwiseScheduleBuilder
-        isl_sched_map_before, isl_sched_map_after = sched_builder.build_maps(knl)
+        sched_map_before, sched_map_after = sched_builder.build_maps(knl)
 
         # get map representing lexicographic ordering
         sched_lex_order_map = sched_builder.get_lex_order_map_for_sched_space()
@@ -461,8 +461,8 @@ def test_statement_instance_ordering_creation():
         # create statement instance ordering,
         # maps each statement instance to all statement instances occuring later
         sio = get_statement_ordering_map(
-            isl_sched_map_before,
-            isl_sched_map_after,
+            sched_map_before,
+            sched_map_after,
             sched_lex_order_map,
             )
 
-- 
GitLab


From 0f4269b86ae1d7b1863184b731d007bb8463324f Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Mon, 8 Jun 2020 16:50:25 -0500
Subject: [PATCH 021/140] update after renaming of
 align_isl_maps_by_var_names()->ensure_dim_names_match_and_align()

---
 test/test_linearization_checker.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/test/test_linearization_checker.py b/test/test_linearization_checker.py
index 84decedca..5640da8b8 100644
--- a/test/test_linearization_checker.py
+++ b/test/test_linearization_checker.py
@@ -392,7 +392,7 @@ def test_statement_instance_ordering_creation():
         get_schedule_for_statement_pair,
     )
     from loopy.schedule.checker.utils import (
-        align_isl_maps_by_var_names,
+        ensure_dim_names_match_and_align,
         append_marker_to_isl_map_var_names,
     )
     from loopy.schedule.checker.lexicographic_order_map import (
@@ -466,7 +466,7 @@ def test_statement_instance_ordering_creation():
             sched_lex_order_map,
             )
 
-        sio_aligned = align_isl_maps_by_var_names(sio, expected_sio)
+        sio_aligned = ensure_dim_names_match_and_align(sio, expected_sio)
 
         assert sio_aligned == expected_sio
 
-- 
GitLab


From c549f652e739af191d0297e5b2621bdbe33d44a2 Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Fri, 10 Jul 2020 07:33:43 -0500
Subject: [PATCH 022/140] use STATEMENT_VAR_NAME and LEX_VAR_PREFIX constants
 when building test maps

---
 test/test_linearization_checker.py | 76 ++++++++++++------------------
 1 file changed, 31 insertions(+), 45 deletions(-)

diff --git a/test/test_linearization_checker.py b/test/test_linearization_checker.py
index 2dc12b451..208d9350e 100644
--- a/test/test_linearization_checker.py
+++ b/test/test_linearization_checker.py
@@ -494,35 +494,21 @@ def test_statement_instance_ordering_creation():
 
         assert sio_aligned == expected_sio
 
-    expected_lex_order_map = isl.Map("{ "
-        "[_lp_linchk_l0', _lp_linchk_l1', _lp_linchk_l2', _lp_linchk_l3', "
-        "_lp_linchk_l4']"
-        " -> "
-        "[_lp_linchk_l0, _lp_linchk_l1, _lp_linchk_l2, _lp_linchk_l3, "
-        "_lp_linchk_l4]"
-        ":"
+    expected_lex_order_map = isl.Map(
+        "{{ "
+        "[{0}0', {0}1', {0}2', {0}3', {0}4'] -> [{0}0, {0}1, {0}2, {0}3, {0}4] :"
         "("
-        "_lp_linchk_l0' < _lp_linchk_l0 "
+        "{0}0' < {0}0 "
         ") or ("
-        "_lp_linchk_l0'= _lp_linchk_l0 and "
-        "_lp_linchk_l1' < _lp_linchk_l1 "
+        "{0}0'={0}0 and {0}1' < {0}1 "
         ") or ("
-        "_lp_linchk_l0'= _lp_linchk_l0 and "
-        "_lp_linchk_l1'= _lp_linchk_l1 and "
-        "_lp_linchk_l2' < _lp_linchk_l2 "
+        "{0}0'={0}0 and {0}1'={0}1 and {0}2' < {0}2 "
         ") or ("
-        "_lp_linchk_l0'= _lp_linchk_l0 and "
-        "_lp_linchk_l1'= _lp_linchk_l1 and "
-        "_lp_linchk_l2'= _lp_linchk_l2 and "
-        "_lp_linchk_l3' < _lp_linchk_l3 "
+        "{0}0'={0}0 and {0}1'={0}1 and {0}2'={0}2 and {0}3' < {0}3 "
         ") or ("
-        "_lp_linchk_l0'= _lp_linchk_l0 and "
-        "_lp_linchk_l1'= _lp_linchk_l1 and "
-        "_lp_linchk_l2'= _lp_linchk_l2 and "
-        "_lp_linchk_l3'= _lp_linchk_l3 and "
-        "_lp_linchk_l4' < _lp_linchk_l4"
+        "{0}0'={0}0 and {0}1'={0}1 and {0}2'={0}2 and {0}3'={0}3 and {0}4' < {0}4"
         ")"
-        "}")
+        "}}".format(LEX_VAR_PREFIX))
 
     # Isl ignores these apostrophes, but test would still pass since it ignores
     # variable names when checking for equality. Even so, explicitly add apostrophes
@@ -533,12 +519,12 @@ def test_statement_instance_ordering_creation():
     # Relationship between insn_a and insn_b ---------------------------------------
 
     expected_sio = isl.Map(
-        "[pi, pj, pk] -> { "
-        "[_lp_linchk_statement'=0, i', k'] -> [_lp_linchk_statement=1, i, j]:"
+        "[pi, pj, pk] -> {{ "
+        "[{0}'=0, i', k'] -> [{0}=1, i, j] : "
         "0 <= i' < pi and 0 <= k' < pk and 0 <= j < pj and 0 <= i < pi and i > i'; "
-        "[_lp_linchk_statement'=0, i', k'] -> [_lp_linchk_statement=1, i=i', j]:"
+        "[{0}'=0, i', k'] -> [{0}=1, i=i', j] : "
         "0 <= i' < pi and 0 <= k' < pk and 0 <= j < pj "
-        "}"
+        "}}".format(STATEMENT_VAR_NAME)
         )
     # isl ignores these apostrophes, so explicitly add them
     expected_sio = append_marker_to_isl_map_var_names(
@@ -550,12 +536,12 @@ def test_statement_instance_ordering_creation():
     # Relationship between insn_a and insn_c ---------------------------------------
 
     expected_sio = isl.Map(
-        "[pi, pj, pk] -> { "
-        "[_lp_linchk_statement'=0, i', k'] -> [_lp_linchk_statement=1, i, j]:"
+        "[pi, pj, pk] -> {{ "
+        "[{0}'=0, i', k'] -> [{0}=1, i, j] : "
         "0 <= i' < pi and 0 <= k' < pk and 0 <= j < pj and 0 <= i < pi and i > i'; "
-        "[_lp_linchk_statement'=0, i', k'] -> [_lp_linchk_statement=1, i=i', j]:"
+        "[{0}'=0, i', k'] -> [{0}=1, i=i', j] : "
         "0 <= i' < pi and 0 <= k' < pk and 0 <= j < pj "
-        "}"
+        "}}".format(STATEMENT_VAR_NAME)
         )
     # isl ignores these apostrophes, so explicitly add them
     expected_sio = append_marker_to_isl_map_var_names(
@@ -567,10 +553,10 @@ def test_statement_instance_ordering_creation():
     # Relationship between insn_a and insn_d ---------------------------------------
 
     expected_sio = isl.Map(
-        "[pt, pi, pk] -> { "
-        "[_lp_linchk_statement'=0, i', k'] -> [_lp_linchk_statement=1, t]:"
+        "[pt, pi, pk] -> {{ "
+        "[{0}'=0, i', k'] -> [{0}=1, t] : "
         "0 <= i' < pi and 0 <= k' < pk and 0 <= t < pt "
-        "}"
+        "}}".format(STATEMENT_VAR_NAME)
         )
     # isl ignores these apostrophes, so explicitly add them
     expected_sio = append_marker_to_isl_map_var_names(
@@ -582,14 +568,14 @@ def test_statement_instance_ordering_creation():
     # Relationship between insn_b and insn_c ---------------------------------------
 
     expected_sio = isl.Map(
-        "[pi, pj] -> { "
-        "[_lp_linchk_statement'=0, i', j'] -> [_lp_linchk_statement=1, i, j]:"
+        "[pi, pj] -> {{ "
+        "[{0}'=0, i', j'] -> [{0}=1, i, j] : "
         "0 <= i' < pi and 0 <= j' < pj and i > i' and 0 <= i < pi and 0 <= j < pj; "
-        "[_lp_linchk_statement'=0, i', j'] -> [_lp_linchk_statement=1, i=i', j]:"
+        "[{0}'=0, i', j'] -> [{0}=1, i=i', j] : "
         "0 <= i' < pi and 0 <= j' < pj and j > j' and 0 <= j < pj; "
-        "[_lp_linchk_statement'=0, i', j'] -> [_lp_linchk_statement=1, i=i', j=j']:"
+        "[{0}'=0, i', j'] -> [{0}=1, i=i', j=j'] : "
         "0 <= i' < pi and 0 <= j' < pj "
-        "}"
+        "}}".format(STATEMENT_VAR_NAME)
         )
     # isl ignores these apostrophes, so explicitly add them
     expected_sio = append_marker_to_isl_map_var_names(
@@ -601,10 +587,10 @@ def test_statement_instance_ordering_creation():
     # Relationship between insn_b and insn_d ---------------------------------------
 
     expected_sio = isl.Map(
-        "[pt, pi, pj] -> { "
-        "[_lp_linchk_statement'=0, i', j'] -> [_lp_linchk_statement=1, t]:"
+        "[pt, pi, pj] -> {{ "
+        "[{0}'=0, i', j'] -> [{0}=1, t] : "
         "0 <= i' < pi and 0 <= j' < pj and 0 <= t < pt "
-        "}"
+        "}}".format(STATEMENT_VAR_NAME)
         )
     # isl ignores these apostrophes, so explicitly add them
     expected_sio = append_marker_to_isl_map_var_names(
@@ -616,10 +602,10 @@ def test_statement_instance_ordering_creation():
     # Relationship between insn_c and insn_d ---------------------------------------
 
     expected_sio = isl.Map(
-        "[pt, pi, pj] -> { "
-        "[_lp_linchk_statement'=0, i', j'] -> [_lp_linchk_statement=1, t]:"
+        "[pt, pi, pj] -> {{ "
+        "[{0}'=0, i', j'] -> [{0}=1, t] : "
         "0 <= i' < pi and 0 <= j' < pj and 0 <= t < pt "
-        "}"
+        "}}".format(STATEMENT_VAR_NAME)
         )
     # isl ignores these apostrophes, so explicitly add them
     expected_sio = append_marker_to_isl_map_var_names(
-- 
GitLab


From 200eed41de56f90bec1a8c3f85d6a3ef9ddc05bc Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Tue, 14 Jul 2020 09:07:08 -0500
Subject: [PATCH 023/140] update tests after removeal of
 PairwiseScheduleBuilder class

---
 test/test_linearization_checker.py | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/test/test_linearization_checker.py b/test/test_linearization_checker.py
index 82658bc01..9ad268edb 100644
--- a/test/test_linearization_checker.py
+++ b/test/test_linearization_checker.py
@@ -50,7 +50,7 @@ else:
     faulthandler.enable()
 
 
-# {{{ test PairwiseScheduleBuilder and map creation
+# {{{ test pairwise schedule map creation
 
 def test_pairwise_schedule_and_map_creation():
     import islpy as isl
@@ -379,6 +379,9 @@ def test_statement_instance_ordering_creation():
     from loopy.schedule.checker import (
         get_schedule_for_statement_pair,
     )
+    from loopy.schedule.checker.schedule import (
+        get_lex_order_map_for_sched_space,
+    )
     from loopy.schedule.checker.utils import (
         ensure_dim_names_match_and_align,
         append_marker_to_isl_map_var_names,
@@ -431,18 +434,16 @@ def test_statement_instance_ordering_creation():
             expected_sio,
             ):
 
-        sched_builder = get_schedule_for_statement_pair(
+        # Get pairwise schedule
+        sched_map_before, sched_map_after = get_schedule_for_statement_pair(
             knl,
             linearization_items,
             insn_id_before,
             insn_id_after,
             )
 
-        # Get two isl maps from the PairwiseScheduleBuilder
-        sched_map_before, sched_map_after = sched_builder.build_maps(knl)
-
         # get map representing lexicographic ordering
-        sched_lex_order_map = sched_builder.get_lex_order_map_for_sched_space()
+        sched_lex_order_map = get_lex_order_map_for_sched_space(sched_map_before)
 
         assert sched_lex_order_map == expected_lex_order_map
 
-- 
GitLab


From cd1c1310b88d4f22157e6f9b5b79774f0e5f397f Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Tue, 14 Jul 2020 09:07:49 -0500
Subject: [PATCH 024/140] in create_lex_order_map(), make n_dims arg optional

---
 loopy/schedule/checker/lexicographic_order_map.py | 4 +++-
 loopy/schedule/checker/schedule.py                | 3 +--
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/loopy/schedule/checker/lexicographic_order_map.py b/loopy/schedule/checker/lexicographic_order_map.py
index b547e1d94..0966cba99 100644
--- a/loopy/schedule/checker/lexicographic_order_map.py
+++ b/loopy/schedule/checker/lexicographic_order_map.py
@@ -128,7 +128,7 @@ def get_lex_order_constraint(before_names, after_names, islvars=None):
 
 
 def create_lex_order_map(
-        n_dims,
+        n_dims=None,
         before_names=None,
         after_names=None,
         ):
@@ -166,6 +166,8 @@ def create_lex_order_map(
             append_marker_to_strings,
         )
         before_names = append_marker_to_strings(after_names, marker="'")
+    if n_dims is None:
+        n_dims = len(after_names)
 
     assert len(before_names) == len(after_names) == n_dims
     dim_type = isl.dim_type
diff --git a/loopy/schedule/checker/schedule.py b/loopy/schedule/checker/schedule.py
index ad2ecefc6..a73c72cb2 100644
--- a/loopy/schedule/checker/schedule.py
+++ b/loopy/schedule/checker/schedule.py
@@ -341,5 +341,4 @@ def get_lex_order_map_for_sched_space(schedule):
     )
 
     lex_dim_names = schedule.space.get_var_names(isl.dim_type.out)
-    return create_lex_order_map(
-        len(lex_dim_names), after_names=lex_dim_names)
+    return create_lex_order_map(after_names=lex_dim_names)
-- 
GitLab


From 67887d36ed9eb1b1a229833b4590cac030f7d2b1 Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Sun, 26 Jul 2020 21:02:18 -0500
Subject: [PATCH 025/140] update sio test to deal with new output from
 get_schedules_for_statement_pairs(); don't hardcode expected lex order maps,
 instead create them to match expected dim size

---
 test/test_linearization_checker.py | 79 ++++++++++++++----------------
 1 file changed, 36 insertions(+), 43 deletions(-)

diff --git a/test/test_linearization_checker.py b/test/test_linearization_checker.py
index 7a1723d47..f081e2184 100644
--- a/test/test_linearization_checker.py
+++ b/test/test_linearization_checker.py
@@ -360,7 +360,7 @@ def test_pairwise_schedule_creation():
 def test_statement_instance_ordering_creation():
     import islpy as isl
     from loopy.schedule.checker import (
-        get_schedule_for_statement_pair,
+        get_schedules_for_statement_pairs,
     )
     from loopy.schedule.checker.schedule import (
         get_lex_order_map_for_sched_space,
@@ -371,6 +371,7 @@ def test_statement_instance_ordering_creation():
     )
     from loopy.schedule.checker.lexicographic_order_map import (
         get_statement_ordering_map,
+        create_lex_order_map,
     )
 
     # example kernel (add deps to fix loop order)
@@ -410,24 +411,44 @@ def test_statement_instance_ordering_creation():
     knl = get_one_linearized_kernel(knl)
     linearization_items = knl.linearization
 
+    # Get pairwise schedules
+    insn_id_pairs = [
+        ("insn_a", "insn_b"),
+        ("insn_a", "insn_c"),
+        ("insn_a", "insn_d"),
+        ("insn_b", "insn_c"),
+        ("insn_b", "insn_d"),
+        ("insn_c", "insn_d"),
+        ]
+    sched_maps = get_schedules_for_statement_pairs(
+        knl,
+        linearization_items,
+        insn_id_pairs,
+        )
+
     def check_sio_for_insn_pair(
             insn_id_before,
             insn_id_after,
-            expected_lex_order_map,
+            expected_lex_dims,
             expected_sio,
             ):
 
         # Get pairwise schedule
-        sched_map_before, sched_map_after = get_schedule_for_statement_pair(
-            knl,
-            linearization_items,
-            insn_id_before,
-            insn_id_after,
-            )
+        sched_map_before, sched_map_after = sched_maps[
+            (insn_id_before, insn_id_after)]
 
-        # get map representing lexicographic ordering
+        # Get map representing lexicographic ordering
         sched_lex_order_map = get_lex_order_map_for_sched_space(sched_map_before)
 
+        # Get expected lex order map
+        expected_lex_order_map = create_lex_order_map(
+            n_dims=expected_lex_dims,
+            before_names=["%s%d'" % (LEX_VAR_PREFIX, i)
+                for i in range(expected_lex_dims)],
+            after_names=["%s%d" % (LEX_VAR_PREFIX, i)
+                for i in range(expected_lex_dims)],
+            )
+
         assert sched_lex_order_map == expected_lex_order_map
 
         # create statement instance ordering,
@@ -442,28 +463,6 @@ def test_statement_instance_ordering_creation():
 
         assert sio_aligned == expected_sio
 
-    expected_lex_order_map = isl.Map(
-        "{{ "
-        "[{0}0', {0}1', {0}2', {0}3', {0}4'] -> [{0}0, {0}1, {0}2, {0}3, {0}4] :"
-        "("
-        "{0}0' < {0}0 "
-        ") or ("
-        "{0}0'={0}0 and {0}1' < {0}1 "
-        ") or ("
-        "{0}0'={0}0 and {0}1'={0}1 and {0}2' < {0}2 "
-        ") or ("
-        "{0}0'={0}0 and {0}1'={0}1 and {0}2'={0}2 and {0}3' < {0}3 "
-        ") or ("
-        "{0}0'={0}0 and {0}1'={0}1 and {0}2'={0}2 and {0}3'={0}3 and {0}4' < {0}4"
-        ")"
-        "}}".format(LEX_VAR_PREFIX))
-
-    # Isl ignores these apostrophes, but test would still pass since it ignores
-    # variable names when checking for equality. Even so, explicitly add apostrophes
-    # for sanity.
-    expected_lex_order_map = append_marker_to_isl_map_var_names(
-        expected_lex_order_map, isl.dim_type.in_, "'")
-
     # Relationship between insn_a and insn_b ---------------------------------------
 
     expected_sio = isl.Map(
@@ -478,8 +477,7 @@ def test_statement_instance_ordering_creation():
     expected_sio = append_marker_to_isl_map_var_names(
         expected_sio, isl.dim_type.in_, "'")
 
-    check_sio_for_insn_pair(
-        "insn_a", "insn_b", expected_lex_order_map, expected_sio)
+    check_sio_for_insn_pair("insn_a", "insn_b", 3, expected_sio)
 
     # Relationship between insn_a and insn_c ---------------------------------------
 
@@ -495,8 +493,7 @@ def test_statement_instance_ordering_creation():
     expected_sio = append_marker_to_isl_map_var_names(
         expected_sio, isl.dim_type.in_, "'")
 
-    check_sio_for_insn_pair(
-        "insn_a", "insn_c", expected_lex_order_map, expected_sio)
+    check_sio_for_insn_pair("insn_a", "insn_c", 3, expected_sio)
 
     # Relationship between insn_a and insn_d ---------------------------------------
 
@@ -510,8 +507,7 @@ def test_statement_instance_ordering_creation():
     expected_sio = append_marker_to_isl_map_var_names(
         expected_sio, isl.dim_type.in_, "'")
 
-    check_sio_for_insn_pair(
-        "insn_a", "insn_d", expected_lex_order_map, expected_sio)
+    check_sio_for_insn_pair("insn_a", "insn_d", 3, expected_sio)
 
     # Relationship between insn_b and insn_c ---------------------------------------
 
@@ -529,8 +525,7 @@ def test_statement_instance_ordering_creation():
     expected_sio = append_marker_to_isl_map_var_names(
         expected_sio, isl.dim_type.in_, "'")
 
-    check_sio_for_insn_pair(
-        "insn_b", "insn_c", expected_lex_order_map, expected_sio)
+    check_sio_for_insn_pair("insn_b", "insn_c", 3, expected_sio)
 
     # Relationship between insn_b and insn_d ---------------------------------------
 
@@ -544,8 +539,7 @@ def test_statement_instance_ordering_creation():
     expected_sio = append_marker_to_isl_map_var_names(
         expected_sio, isl.dim_type.in_, "'")
 
-    check_sio_for_insn_pair(
-        "insn_b", "insn_d", expected_lex_order_map, expected_sio)
+    check_sio_for_insn_pair("insn_b", "insn_d", 3, expected_sio)
 
     # Relationship between insn_c and insn_d ---------------------------------------
 
@@ -559,8 +553,7 @@ def test_statement_instance_ordering_creation():
     expected_sio = append_marker_to_isl_map_var_names(
         expected_sio, isl.dim_type.in_, "'")
 
-    check_sio_for_insn_pair(
-        "insn_c", "insn_d", expected_lex_order_map, expected_sio)
+    check_sio_for_insn_pair("insn_c", "insn_d", 3, expected_sio)
 
 # }}}
 
-- 
GitLab


From 81dd0eee59b577edc58c41be83e425f110a2e1b3 Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Sun, 26 Jul 2020 21:14:55 -0500
Subject: [PATCH 026/140] add independent test for lex order map creation

---
 test/test_linearization_checker.py | 61 ++++++++++++++++++++++++++++++
 1 file changed, 61 insertions(+)

diff --git a/test/test_linearization_checker.py b/test/test_linearization_checker.py
index f081e2184..6070909c5 100644
--- a/test/test_linearization_checker.py
+++ b/test/test_linearization_checker.py
@@ -355,6 +355,67 @@ def test_pairwise_schedule_creation():
 # }}}
 
 
+# {{{ test lex order map creation
+
+def test_lex_order_map_creation():
+    import islpy as isl
+    from loopy.schedule.checker.lexicographic_order_map import (
+        create_lex_order_map,
+    )
+    from loopy.schedule.checker.utils import (
+        append_marker_to_isl_map_var_names,
+    )
+
+    def _check_lex_map(expected_lex_order_map, n_dims):
+        # Isl ignores the apostrophes, so explicitly add them
+        expected_lex_order_map = append_marker_to_isl_map_var_names(
+            expected_lex_order_map, isl.dim_type.in_, "'")
+
+        lex_order_map = create_lex_order_map(
+            n_dims=n_dims,
+            before_names=["%s%d'" % (LEX_VAR_PREFIX, i) for i in range(n_dims)],
+            after_names=["%s%d" % (LEX_VAR_PREFIX, i) for i in range(n_dims)],
+            )
+
+        assert lex_order_map == expected_lex_order_map
+        assert (
+            lex_order_map.get_var_names(isl.dim_type.in_) ==
+            expected_lex_order_map.get_var_names(isl.dim_type.in_))
+        assert (
+            lex_order_map.get_var_names(isl.dim_type.out) ==
+            expected_lex_order_map.get_var_names(isl.dim_type.out))
+
+    expected_lex_order_map = isl.Map(
+        "{{ "
+        "[{0}0', {0}1', {0}2', {0}3', {0}4'] -> [{0}0, {0}1, {0}2, {0}3, {0}4] :"
+        "("
+        "{0}0' < {0}0 "
+        ") or ("
+        "{0}0'={0}0 and {0}1' < {0}1 "
+        ") or ("
+        "{0}0'={0}0 and {0}1'={0}1 and {0}2' < {0}2 "
+        ") or ("
+        "{0}0'={0}0 and {0}1'={0}1 and {0}2'={0}2 and {0}3' < {0}3 "
+        ") or ("
+        "{0}0'={0}0 and {0}1'={0}1 and {0}2'={0}2 and {0}3'={0}3 and {0}4' < {0}4"
+        ")"
+        "}}".format(LEX_VAR_PREFIX))
+
+    _check_lex_map(expected_lex_order_map, 5)
+
+    expected_lex_order_map = isl.Map(
+        "{{ "
+        "[{0}0'] -> [{0}0] :"
+        "("
+        "{0}0' < {0}0 "
+        ")"
+        "}}".format(LEX_VAR_PREFIX))
+
+    _check_lex_map(expected_lex_order_map, 1)
+
+# }}}
+
+
 # {{{ test statement instance ordering creation
 
 def test_statement_instance_ordering_creation():
-- 
GitLab


From 5f060a84d96cf960c50a528b0b37b18ec355c170 Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Sun, 2 Aug 2020 21:32:55 -0500
Subject: [PATCH 027/140] reduce the number of dims expected in lex maps after
 update that simplified lex maps

---
 test/test_linearization_checker.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/test/test_linearization_checker.py b/test/test_linearization_checker.py
index a3a95b624..bf33bebb2 100644
--- a/test/test_linearization_checker.py
+++ b/test/test_linearization_checker.py
@@ -538,7 +538,7 @@ def test_statement_instance_ordering_creation():
     expected_sio = append_marker_to_isl_map_var_names(
         expected_sio, isl.dim_type.in_, "'")
 
-    check_sio_for_insn_pair("insn_a", "insn_b", 3, expected_sio)
+    check_sio_for_insn_pair("insn_a", "insn_b", 2, expected_sio)
 
     # Relationship between insn_a and insn_c ---------------------------------------
 
@@ -554,7 +554,7 @@ def test_statement_instance_ordering_creation():
     expected_sio = append_marker_to_isl_map_var_names(
         expected_sio, isl.dim_type.in_, "'")
 
-    check_sio_for_insn_pair("insn_a", "insn_c", 3, expected_sio)
+    check_sio_for_insn_pair("insn_a", "insn_c", 2, expected_sio)
 
     # Relationship between insn_a and insn_d ---------------------------------------
 
@@ -568,7 +568,7 @@ def test_statement_instance_ordering_creation():
     expected_sio = append_marker_to_isl_map_var_names(
         expected_sio, isl.dim_type.in_, "'")
 
-    check_sio_for_insn_pair("insn_a", "insn_d", 3, expected_sio)
+    check_sio_for_insn_pair("insn_a", "insn_d", 1, expected_sio)
 
     # Relationship between insn_b and insn_c ---------------------------------------
 
@@ -600,7 +600,7 @@ def test_statement_instance_ordering_creation():
     expected_sio = append_marker_to_isl_map_var_names(
         expected_sio, isl.dim_type.in_, "'")
 
-    check_sio_for_insn_pair("insn_b", "insn_d", 3, expected_sio)
+    check_sio_for_insn_pair("insn_b", "insn_d", 1, expected_sio)
 
     # Relationship between insn_c and insn_d ---------------------------------------
 
@@ -614,7 +614,7 @@ def test_statement_instance_ordering_creation():
     expected_sio = append_marker_to_isl_map_var_names(
         expected_sio, isl.dim_type.in_, "'")
 
-    check_sio_for_insn_pair("insn_c", "insn_d", 3, expected_sio)
+    check_sio_for_insn_pair("insn_c", "insn_d", 1, expected_sio)
 
 # }}}
 
-- 
GitLab


From 9ab0a22d1232f8dabeb0ae7bb3b2e880f808c225 Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Sun, 27 Sep 2020 21:24:01 -0500
Subject: [PATCH 028/140] rename get_lex_order_constraint->get_lex_order_set;
 lots of documenation/naming/comment improvements for clarity

---
 .../checker/lexicographic_order_map.py        | 168 ++++++++++--------
 loopy/schedule/checker/schedule.py            |  12 +-
 loopy/schedule/checker/utils.py               |  21 +--
 3 files changed, 109 insertions(+), 92 deletions(-)

diff --git a/loopy/schedule/checker/lexicographic_order_map.py b/loopy/schedule/checker/lexicographic_order_map.py
index 0966cba99..d9066030f 100644
--- a/loopy/schedule/checker/lexicographic_order_map.py
+++ b/loopy/schedule/checker/lexicographic_order_map.py
@@ -25,17 +25,19 @@ import islpy as isl
 
 
 def get_statement_ordering_map(
-        sched_map_before, sched_map_after, lex_map, before_marker="'"):
-    """Return a mapping that maps each statement instance to
-        all statement instances occuring later.
+        sched_before, sched_after, lex_map, before_marker="'"):
+    """Return a statement ordering represented as a map from each statement
+        instance to all statement instances occurring later.
 
-    :arg sched_map_before: An :class:`islpy.Map` representing instruction
-        instance order for the dependee as a mapping from each statement
-        instance to a point in the lexicographic ordering.
+    :arg sched_before: An :class:`islpy.Map` representing a schedule
+        as a mapping from statement instances (for one particular statement)
+        to lexicographic time. The statement represented will typically
+        be the dependee in a dependency relationship.
 
-    :arg sched_map_after: An :class:`islpy.Map` representing instruction
-        instance order for the depender as a mapping from each statement
-        instance to a point in the lexicographic ordering.
+    :arg sched_after: An :class:`islpy.Map` representing a schedule
+        as a mapping from statement instances (for one particular statement)
+        to lexicographic time. The statement represented will typically
+        be the depender in a dependency relationship.
 
     :arg lex_map: An :class:`islpy.Map` representing a lexicographic
         ordering as a mapping from each point in lexicographic time
@@ -45,17 +47,23 @@ def get_statement_ordering_map(
                 i0' < i0 or (i0' = i0 and i1' < i1)
                 or (i0' = i0 and i1' = i1 and i2' < i2) ...}
 
-    :returns: An :class:`islpy.Map` representing the lex schedule as
+    :arg before_marker: A :class:`str` to be appended to the names of the
+        map dimensions representing the 'before' statement in the
+        'happens before' relationship.
+
+    :returns: An :class:`islpy.Map` representing the statement odering as
         a mapping from each statement instance to all statement instances
-        occuring later. I.e., we compose relations B, L, and A as
-        B ∘ L ∘ A^-1, where B is sched_map_before, A is sched_map_after,
-        and L is the lexicographic ordering map.
+        occurring later. I.e., we compose relations B, L, and A as
+        B ∘ L ∘ A^-1, where B is `sched_before`, A is `sched_after`,
+        and L is `lex_map`.
 
     """
 
-    sio = sched_map_before.apply_range(
-        lex_map).apply_range(sched_map_after.reverse())
-    # append marker to in names
+    # Perform the composition of relations
+    sio = sched_before.apply_range(
+        lex_map).apply_range(sched_after.reverse())
+
+    # Append marker to in_ dims
     from loopy.schedule.checker.utils import (
         append_marker_to_isl_map_var_names,
     )
@@ -63,30 +71,38 @@ def get_statement_ordering_map(
         sio, isl.dim_type.in_, before_marker)
 
 
-def get_lex_order_constraint(before_names, after_names, islvars=None):
-    """Return a constraint represented as an :class:`islpy.Set`
-        defining a 'happens before' relationship in a lexicographic
-        ordering.
-
-    :arg before_names: A list of :class:`str` variable names representing
-        the lexicographic space dimensions for a point in lexicographic
-        time that occurs before. (see example below)
-
-    :arg after_names: A list of :class:`str` variable names representing
-        the lexicographic space dimensions for a point in lexicographic
-        time that occurs after. (see example below)
-
-    :arg islvars: A dictionary from variable names to :class:`islpy.PwAff`
-        instances that represent each of the variables
-        (islvars may be produced by `islpy.make_zero_and_vars`). The key
-        '0' is also include and represents a :class:`islpy.PwAff` zero constant.
-        This dictionary defines the space to be used for the set. If no
-        value is passed, the dictionary will be made using ``before_names``
-        and ``after_names``.
-
-    :returns: An :class:`islpy.Set` representing a constraint that enforces a
-        lexicographic ordering. E.g., if ``before_names = [i0', i1', i2']`` and
-        ``after_names = [i0, i1, i2]``, return the set::
+def get_lex_order_set(before_names, after_names, islvars=None):
+    """Return an :class:`islpy.Set` representing a lexicographic ordering
+        with the number of dimensions provided in `before_names`
+        (equal to the number of dimensions in `after_names`).
+
+    :arg before_names: A list of :class:`str` variable names to be used
+        to describe lexicographic space dimensions for a point in a lexicographic
+        ordering that occurs before another point, which will be represented using
+        `after_names`. (see example below)
+
+    :arg after_names: A list of :class:`str` variable names to be used
+        to describe lexicographic space dimensions for a point in a lexicographic
+        ordering that occurs after another point, which will be represented using
+        `before_names`. (see example below)
+
+    :arg islvars: A dictionary mapping variable names in `before_names` and
+        `after_names` to :class:`islpy.PwAff` instances that represent each
+        of the variables (islvars may be produced by `islpy.make_zero_and_vars`).
+        The key '0' is also include and represents a :class:`islpy.PwAff` zero
+        constant. This dictionary defines the space to be used for the set. If no
+        value is passed, the dictionary will be made using `before_names`
+        and `after_names`.
+
+    :returns: An :class:`islpy.Set` representing a big-endian lexicographic ordering
+        with the number of dimensions provided in `before_names`. The set
+        has one dimension for each name in *both* `before_names` and
+        `after_names`, and contains all points which meet a 'happens before'
+        constraint defining the lexicographic ordering. E.g., if
+        `before_names = [i0', i1', i2']` and `after_names = [i0, i1, i2]`,
+        return the set containing all points in a 3-dimensional, big-endian
+        lexicographic ordering such that point
+        `[i0', i1', i2']` happens before `[i0, i1, i2]`. I.e., return::
 
             {[i0', i1', i2', i0, i1, i2] :
                 i0' < i0 or (i0' = i0 and i1' < i1)
@@ -98,33 +114,31 @@ def get_lex_order_constraint(before_names, after_names, islvars=None):
     if islvars is None:
         islvars = isl.make_zero_and_vars(before_names+after_names, [])
 
-    # Initialize constraint with i0' < i0
-    lex_order_constraint = islvars[before_names[0]].lt_set(islvars[after_names[0]])
+    # Initialize set with constraint i0' < i0
+    lex_order_set = islvars[before_names[0]].lt_set(islvars[after_names[0]])
 
-    # Initialize conjunction constraint with True.
-    # For each dim d, starting with d=1, this conjunction will have d equalities,
-    # e.g., (i0' = i0 and i1' = i1 and ... i(d-1)' = i(d-1))
-    equality_constraint_conj = islvars[0].eq_set(islvars[0])
+    # For each dim d, starting with d=1, equality_conj_set will be constrained
+    # by d equalities, e.g., (i0' = i0 and i1' = i1 and ... i(d-1)' = i(d-1)).
+    equality_conj_set = islvars[0].eq_set(islvars[0])  # initialize to 'true'
 
     for i in range(1, len(before_names)):
 
-        # Add the next equality constraint to equality_constraint_conj
-        equality_constraint_conj = equality_constraint_conj & \
+        # Add the next equality constraint to equality_conj_set
+        equality_conj_set = equality_conj_set & \
             islvars[before_names[i-1]].eq_set(islvars[after_names[i-1]])
 
-        # Create a conjunction constraint by combining a less-than
-        # constraint for this dim, e.g., (i1' < i1), with the current
-        # equality constraint conjunction.
-        # For each dim d, starting with d=1, this conjunction will have d equalities,
-        # and one inequality,
-        # e.g., (i0' = i0 and i1' = i1 and ... i(d-1)' = i(d-1) and id' < id)
-        full_conj_constraint = islvars[before_names[i]].lt_set(
-            islvars[after_names[i]]) & equality_constraint_conj
+        # Create a set constrained by adding a less-than constraint for this dim,
+        # e.g., (i1' < i1), to the current equality conjunction set.
+        # For each dim d, starting with d=1, this full conjunction will have
+        # d equalities and one inequality, e.g.,
+        # (i0' = i0 and i1' = i1 and ... i(d-1)' = i(d-1) and id' < id)
+        full_conj_set = islvars[before_names[i]].lt_set(
+            islvars[after_names[i]]) & equality_conj_set
 
-        # Union this new constraint with the current lex_order_constraint
-        lex_order_constraint = lex_order_constraint | full_conj_constraint
+        # Union this new constraint with the current lex_order_set
+        lex_order_set = lex_order_set | full_conj_set
 
-    return lex_order_constraint
+    return lex_order_set
 
 
 def create_lex_order_map(
@@ -132,26 +146,28 @@ def create_lex_order_map(
         before_names=None,
         after_names=None,
         ):
-    """Return a mapping that maps each point in a lexicographic
-        ordering to every point that occurs later in lexicographic
-        time.
+    """Return a map from each point in a lexicographic ordering to every
+        point that occurs later in the lexicographic ordering.
 
     :arg n_dims: An :class:`int` representing the number of dimensions
-        in the lexicographic ordering.
+        in the lexicographic ordering. If not provided, `n_dims` will be
+        set to length of `after_names`.
 
-    :arg before_names: A list of :class:`str` variable names representing
-        the lexicographic space dimensions for a point in lexicographic
-        time that occurs before. (see example below)
+    :arg before_names: A list of :class:`str` variable names to be used
+        to describe lexicographic space dimensions for a point in a lexicographic
+        ordering that occurs before another point, which will be represented using
+        `after_names`. (see example below)
 
-    :arg after_names: A list of :class:`str` variable names representing
-        the lexicographic space dimensions for a point in lexicographic
-        time that occurs after. (see example below)
+    :arg after_names: A list of :class:`str` variable names to be used
+        to describe lexicographic space dimensions for a point in a lexicographic
+        ordering that occurs after another point, which will be represented using
+        `before_names`. (see example below)
 
     :returns: An :class:`islpy.Map` representing a lexicographic
         ordering as a mapping from each point in lexicographic time
         to every point that occurs later in lexicographic time.
-        E.g., if ``before_names = [i0', i1', i2']`` and
-        ``after_names = [i0, i1, i2]``, return the map::
+        E.g., if `before_names = [i0', i1', i2']` and
+        `after_names = [i0, i1, i2]`, return the map::
 
             {[i0', i1', i2'] -> [i0, i1, i2] :
                 i0' < i0 or (i0' = i0 and i1' < i1)
@@ -172,11 +188,11 @@ def create_lex_order_map(
     assert len(before_names) == len(after_names) == n_dims
     dim_type = isl.dim_type
 
-    lex_order_constraint = get_lex_order_constraint(before_names, after_names)
+    # First, get a set representing the lexicographic ordering.
+    lex_order_set = get_lex_order_set(before_names, after_names)
 
-    lex_map = isl.Map.from_domain(lex_order_constraint)
-    lex_map = lex_map.move_dims(
+    # Now convert that set to a map.
+    lex_map = isl.Map.from_domain(lex_order_set)
+    return lex_map.move_dims(
         dim_type.out, 0, dim_type.in_,
         len(before_names), len(after_names))
-
-    return lex_map
diff --git a/loopy/schedule/checker/schedule.py b/loopy/schedule/checker/schedule.py
index 97764a5e2..a947da3ac 100644
--- a/loopy/schedule/checker/schedule.py
+++ b/loopy/schedule/checker/schedule.py
@@ -317,17 +317,17 @@ def generate_pairwise_schedules(
 
 def get_lex_order_map_for_sched_space(schedule):
     """Return an :class:`islpy.BasicMap` that maps each point in a
-        lexicographic ordering to every point that is
-        lexocigraphically greater.
+        lexicographic ordering to every point that occurs later.
 
     :arg schedule: A :class:`islpy.Map` representing the ordering of
         statement instances as a mapping from statement instances to
         lexicographic time.
 
-    :returns: An :class:`islpy.BasicMap` that maps each point in a
-        lexicographic ordering to every point that is
-        lexocigraphically greater with the dimension number and names
-        matching the output dimension of `schedule`.
+    :returns: An :class:`islpy.BasicMap` representing a lexicographic
+        ordering as a mapping from each point in lexicographic time
+        to every point that occurs later in lexicographic time, with
+        the dimension count and names matching the output dimension
+        of `schedule`.
 
     """
 
diff --git a/loopy/schedule/checker/utils.py b/loopy/schedule/checker/utils.py
index 959c2116d..db1d861c8 100644
--- a/loopy/schedule/checker/utils.py
+++ b/loopy/schedule/checker/utils.py
@@ -88,16 +88,19 @@ def ensure_dim_names_match_and_align(obj_map, tgt_map):
 
 
 def append_marker_to_isl_map_var_names(old_isl_map, dim_type, marker="'"):
-    """Return an isl_map with marker appended to
-        dim_type dimension names.
+    """Return an :class:`islpy.Map` with a marker appended to the specified
+    dimension names.
 
-    :arg old_isl_map: A :class:`islpy.Map`.
+    :arg old_isl_map: An :class:`islpy.Map`.
 
-    :arg dim_type: A :class:`islpy.dim_type`, i.e., an :class:`int`,
+    :arg dim_type: An :class:`islpy.dim_type`, i.e., an :class:`int`,
         specifying the dimension to be marked.
 
-    :returns: A :class:`islpy.Map` matching `old_isl_map` with
-        apostrophes appended to dim_type dimension names.
+    :arg marker: A :class:`str` to be appended to the specified dimension
+        names. If not provided, `marker` defaults to an apostrophe.
+
+    :returns: An :class:`islpy.Map` matching `old_isl_map` with
+        `marker` appended to the `dim_type` dimension names.
 
     """
 
@@ -109,10 +112,8 @@ def append_marker_to_isl_map_var_names(old_isl_map, dim_type, marker="'"):
 
 
 def append_marker_to_strings(strings, marker="'"):
-    if not isinstance(strings, list):
-        raise ValueError("append_marker_to_strings did not receive a list")
-    else:
-        return [s+marker for s in strings]
+    assert isinstance(strings, list)
+    return [s+marker for s in strings]
 
 
 def sorted_union_of_names_in_isl_sets(
-- 
GitLab


From 48a040ad3b766fd5dff418966d550abea0ec682a Mon Sep 17 00:00:00 2001
From: Kaushik Kulkarni <kaushikcfd@gmail.com>
Date: Wed, 14 Oct 2020 01:24:50 -0500
Subject: [PATCH 029/140] complete the implementation of
 loopy.transform.data.rename_argument

- handle cases with argument in domain
- argument as a part of a variables shape expr.
- added a test to check such use cases
---
 loopy/transform/data.py | 38 ++++++++++++++++++++++++++++++++++++--
 test/test_transform.py  | 12 ++++++++++++
 2 files changed, 48 insertions(+), 2 deletions(-)

diff --git a/loopy/transform/data.py b/loopy/transform/data.py
index 5356d4903..fb55251fa 100644
--- a/loopy/transform/data.py
+++ b/loopy/transform/data.py
@@ -635,12 +635,15 @@ def rename_argument(kernel, old_name, new_name, existing_ok=False):
         raise LoopyError("argument name '%s' conflicts with an existing identifier"
                 "--cannot rename" % new_name)
 
+    # {{{ instructions
+
     from pymbolic import var
     subst_dict = {old_name: var(new_name)}
 
     from loopy.symbolic import (
             RuleAwareSubstitutionMapper,
-            SubstitutionRuleMappingContext)
+            SubstitutionRuleMappingContext,
+            SubstitutionMapper)
     from pymbolic.mapper.substitutor import make_subst_func
     rule_mapping_context = SubstitutionRuleMappingContext(
             kernel.substitutions, var_name_gen)
@@ -650,14 +653,45 @@ def rename_argument(kernel, old_name, new_name, existing_ok=False):
 
     kernel = smap.map_kernel(kernel)
 
+    # }}}
+
+    # {{{ args, temporary_variables
+
+    from loopy.kernel.array import ArrayBase
+    subst_mapper = SubstitutionMapper(make_subst_func(subst_dict))
+
     new_args = []
     for arg in kernel.args:
         if arg.name == old_name:
             arg = arg.copy(name=new_name)
+        if isinstance(arg, ArrayBase) and arg.shape:
+            arg = arg.copy(shape=subst_mapper(arg.shape))
 
         new_args.append(arg)
 
-    return kernel.copy(args=new_args)
+    new_tvs = {}
+    for tv_name, tv in kernel.temporary_variables.items():
+        if tv.shape:
+            tv = tv.copy(shape=subst_mapper(tv.shape))
+
+        new_tvs[tv_name] = tv
+
+    # }}}
+
+    # {{{ domain
+
+    new_domains = []
+    for dom in kernel.domains:
+        if old_name in dom.get_var_dict():
+            dt, pos = dom.get_var_dict()[old_name]
+            dom = dom.set_dim_name(dt, pos, new_name)
+
+        new_domains.append(dom)
+
+    # }}}
+
+    return kernel.copy(domains=new_domains, args=new_args,
+            temporary_variables=new_tvs)
 
 # }}}
 
diff --git a/test/test_transform.py b/test/test_transform.py
index e4ca2af0d..32cdb3710 100644
--- a/test/test_transform.py
+++ b/test/test_transform.py
@@ -672,6 +672,18 @@ def test_add_inames_for_unused_hw_axes(ctx_factory):
             parameters={"n": n})
 
 
+def test_rename_argument_of_domain_params():
+    knl = lp.make_kernel(
+            "{[i]: 0<=i<n}",
+            """
+            y[i] = 2.0f
+            """)
+
+    knl = lp.rename_argument(knl, "n", "N")
+
+    print(lp.generate_code_v2(knl).device_code())
+
+
 if __name__ == "__main__":
     if len(sys.argv) > 1:
         exec(sys.argv[1])
-- 
GitLab


From 30cb9fbbb99057790b468ab0d48d41920d19bf8b Mon Sep 17 00:00:00 2001
From: Kaushik Kulkarni <kaushikcfd@gmail.com>
Date: Wed, 14 Oct 2020 14:06:13 -0500
Subject: [PATCH 030/140] stride expression must also be substituted

---
 loopy/transform/data.py | 10 ++++++++--
 test/test_transform.py  |  5 +++--
 2 files changed, 11 insertions(+), 4 deletions(-)

diff --git a/loopy/transform/data.py b/loopy/transform/data.py
index fb55251fa..8368655a4 100644
--- a/loopy/transform/data.py
+++ b/loopy/transform/data.py
@@ -665,14 +665,20 @@ def rename_argument(kernel, old_name, new_name, existing_ok=False):
         if arg.name == old_name:
             arg = arg.copy(name=new_name)
         if isinstance(arg, ArrayBase) and arg.shape:
-            arg = arg.copy(shape=subst_mapper(arg.shape))
+            arg = arg.copy(
+                    shape=subst_mapper(arg.shape),
+                    dim_tags=[dim_tag.map_expr(subst_mapper)
+                              for dim_tag in arg.dim_tags])
 
         new_args.append(arg)
 
     new_tvs = {}
     for tv_name, tv in kernel.temporary_variables.items():
         if tv.shape:
-            tv = tv.copy(shape=subst_mapper(tv.shape))
+            tv = tv.copy(
+                    shape=subst_mapper(tv.shape),
+                    dim_tags=[dim_tag.map_expr(subst_mapper)
+                              for dim_tag in tv.dim_tags])
 
         new_tvs[tv_name] = tv
 
diff --git a/test/test_transform.py b/test/test_transform.py
index 32cdb3710..d8c20dc21 100644
--- a/test/test_transform.py
+++ b/test/test_transform.py
@@ -674,12 +674,13 @@ def test_add_inames_for_unused_hw_axes(ctx_factory):
 
 def test_rename_argument_of_domain_params():
     knl = lp.make_kernel(
-            "{[i]: 0<=i<n}",
+            "{[i, j]: 0<=i<n and 0<=j<m}",
             """
-            y[i] = 2.0f
+            y[i, j] = 2.0f
             """)
 
     knl = lp.rename_argument(knl, "n", "N")
+    knl = lp.rename_argument(knl, "m", "M")
 
     print(lp.generate_code_v2(knl).device_code())
 
-- 
GitLab


From aba3a811cbadeace18a24a5fd607b778b78bcefc Mon Sep 17 00:00:00 2001
From: Kaushik Kulkarni <15399010+kaushikcfd@users.noreply.github.com>
Date: Wed, 14 Oct 2020 14:10:37 -0500
Subject: [PATCH 031/140] rephrase to reduce ISL calls
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Co-authored-by: Andreas Klöckner <inform@tiker.net>
---
 loopy/transform/data.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/loopy/transform/data.py b/loopy/transform/data.py
index 8368655a4..eee23e984 100644
--- a/loopy/transform/data.py
+++ b/loopy/transform/data.py
@@ -688,8 +688,9 @@ def rename_argument(kernel, old_name, new_name, existing_ok=False):
 
     new_domains = []
     for dom in kernel.domains:
-        if old_name in dom.get_var_dict():
-            dt, pos = dom.get_var_dict()[old_name]
+        dom_var_dict = dom.get_var_dict()
+        if old_name in dom_var_dict:
+            dt, pos = dom_var_dict[old_name]
             dom = dom.set_dim_name(dt, pos, new_name)
 
         new_domains.append(dom)
-- 
GitLab


From 2e8eed1fcdaf8597805e460a636071c82558c9af Mon Sep 17 00:00:00 2001
From: Nicholas Christensen <njchris2@illinois.edu>
Date: Wed, 14 Oct 2020 18:20:29 -0500
Subject: [PATCH 032/140] add tags variable to KernelArgument

---
 loopy/kernel/data.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/loopy/kernel/data.py b/loopy/kernel/data.py
index 43770ffb6..b0136a36e 100644
--- a/loopy/kernel/data.py
+++ b/loopy/kernel/data.py
@@ -263,7 +263,7 @@ def parse_tag(tag):
 # }}}
 
 
-# {{{ memory address space
+# {{{ memory address space 
 
 class AddressSpace:
     """Storage location of a variable.
@@ -339,6 +339,8 @@ class KernelArgument(ImmutableRecord):
 
         dtype = kwargs.pop("dtype", None)
 
+ 	    tags = kwargs.pop("tags", None)
+
         if "for_atomic" in kwargs:
             for_atomic = kwargs["for_atomic"]
         else:
-- 
GitLab


From 1f06347d133e395456201703b6f4518c797bd158 Mon Sep 17 00:00:00 2001
From: Nicholas Christensen <njchris2@illinois.edu>
Date: Wed, 14 Oct 2020 18:25:13 -0500
Subject: [PATCH 033/140] bump pytools requirement

---
 setup.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/setup.py b/setup.py
index bd03daac8..0e562ff1f 100644
--- a/setup.py
+++ b/setup.py
@@ -84,7 +84,7 @@ setup(name="loo.py",
 
       python_requires="~=3.6",
       install_requires=[
-          "pytools>=2020.4",
+          "pytools>=2020.4.2",
           "pymbolic>=2019.2",
           "genpy>=2016.1.2",
           "cgen>=2016.1",
-- 
GitLab


From d7e86afa04ba85091b8ad8e079b982594e8da0e4 Mon Sep 17 00:00:00 2001
From: Nicholas Christensen <njchris2@illinois.edu>
Date: Wed, 14 Oct 2020 18:26:10 -0500
Subject: [PATCH 034/140] bump loopy version

---
 loopy/version.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/loopy/version.py b/loopy/version.py
index fc6408dd7..fddd44479 100644
--- a/loopy/version.py
+++ b/loopy/version.py
@@ -42,7 +42,7 @@ else:
 # }}}
 
 
-VERSION = (2020, 2)
+VERSION = (2020, 2, 1)
 VERSION_STATUS = ""
 VERSION_TEXT = ".".join(str(x) for x in VERSION) + VERSION_STATUS
 
-- 
GitLab


From 1feb883c84ff9400dc76d854587698f5e2ace75e Mon Sep 17 00:00:00 2001
From: Nicholas Christensen <njchris2@illinois.edu>
Date: Wed, 14 Oct 2020 18:39:17 -0500
Subject: [PATCH 035/140] flake8 fixes

---
 loopy/kernel/data.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/loopy/kernel/data.py b/loopy/kernel/data.py
index b0136a36e..3bbe52bb2 100644
--- a/loopy/kernel/data.py
+++ b/loopy/kernel/data.py
@@ -263,7 +263,7 @@ def parse_tag(tag):
 # }}}
 
 
-# {{{ memory address space 
+# {{{ memory address space
 
 class AddressSpace:
     """Storage location of a variable.
@@ -339,7 +339,7 @@ class KernelArgument(ImmutableRecord):
 
         dtype = kwargs.pop("dtype", None)
 
- 	    tags = kwargs.pop("tags", None)
+        tags = kwargs.pop("tags", None)
 
         if "for_atomic" in kwargs:
             for_atomic = kwargs["for_atomic"]
-- 
GitLab


From e51134d6078abda7bdae2cc4f2f00299a0ea19a6 Mon Sep 17 00:00:00 2001
From: Nicholas Christensen <njchris2@illinois.edu>
Date: Wed, 14 Oct 2020 18:45:38 -0500
Subject: [PATCH 036/140] noqa on tags variable

---
 loopy/kernel/data.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/loopy/kernel/data.py b/loopy/kernel/data.py
index 3bbe52bb2..70be4ccee 100644
--- a/loopy/kernel/data.py
+++ b/loopy/kernel/data.py
@@ -339,7 +339,7 @@ class KernelArgument(ImmutableRecord):
 
         dtype = kwargs.pop("dtype", None)
 
-        tags = kwargs.pop("tags", None)
+        tags = kwargs.pop("tags", None) # noqa: F841
 
         if "for_atomic" in kwargs:
             for_atomic = kwargs["for_atomic"]
-- 
GitLab


From f08b5e1e4e9ce1f4d24399a2bbc06e3eaae86dfe Mon Sep 17 00:00:00 2001
From: Nicholas Christensen <njchris2@illinois.edu>
Date: Wed, 14 Oct 2020 18:48:20 -0500
Subject: [PATCH 037/140] additional space

---
 loopy/kernel/data.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/loopy/kernel/data.py b/loopy/kernel/data.py
index 70be4ccee..ce401d647 100644
--- a/loopy/kernel/data.py
+++ b/loopy/kernel/data.py
@@ -339,7 +339,7 @@ class KernelArgument(ImmutableRecord):
 
         dtype = kwargs.pop("dtype", None)
 
-        tags = kwargs.pop("tags", None) # noqa: F841
+        tags = kwargs.pop("tags", None)  # noqa: F841
 
         if "for_atomic" in kwargs:
             for_atomic = kwargs["for_atomic"]
-- 
GitLab


From 95968507ba5c12f42546821966707321626ada88 Mon Sep 17 00:00:00 2001
From: Kaushik Kulkarni <kaushikcfd@gmail.com>
Date: Thu, 15 Oct 2020 13:28:14 -0500
Subject: [PATCH 038/140] RuleAwareIdentityMapper.map_kernel should also map
 shape/stride expressions of variables

---
 loopy/symbolic.py       | 42 +++++++++++++++++++++++++++++++++++------
 loopy/transform/data.py | 26 +++----------------------
 2 files changed, 39 insertions(+), 29 deletions(-)

diff --git a/loopy/symbolic.py b/loopy/symbolic.py
index 19ff83431..165c09a4e 100644
--- a/loopy/symbolic.py
+++ b/loopy/symbolic.py
@@ -980,6 +980,10 @@ class RuleAwareIdentityMapper(IdentityMapper):
             return sym
 
     def __call__(self, expr, kernel, insn):
+        """
+        :arg insn: A :class:`~loopy.kernel.InstructionBase` of which *expr* is
+            a part of, or *None* if *expr*'s source is not an instruction.
+        """
         from loopy.kernel.data import InstructionBase
         assert insn is None or isinstance(insn, InstructionBase)
 
@@ -1003,7 +1007,32 @@ class RuleAwareIdentityMapper(IdentityMapper):
                         lambda expr: self(expr, kernel, insn)))
                 for insn in kernel.instructions]
 
-        return kernel.copy(instructions=new_insns)
+        from loopy.kernel.array import ArrayBase
+        from functools import partial
+        non_insn_self = partial(self, kernel=kernel, insn=None)
+
+        new_args = []
+        for arg in kernel.args:
+            if isinstance(arg, ArrayBase) and arg.shape:
+                arg = arg.copy(
+                        shape=non_insn_self(arg.shape),
+                        dim_tags=[dim_tag.map_expr(non_insn_self)
+                                  for dim_tag in arg.dim_tags])
+
+            new_args.append(arg)
+
+        new_tvs = {}
+        for tv_name, tv in kernel.temporary_variables.items():
+            if tv.shape:
+                tv = tv.copy(
+                        shape=non_insn_self(tv.shape),
+                        dim_tags=[dim_tag.map_expr(non_insn_self)
+                                  for dim_tag in tv.dim_tags])
+
+            new_tvs[tv_name] = tv
+
+        return kernel.copy(instructions=new_insns, args=new_args,
+                           temporary_variables=new_tvs)
 
 
 class RuleAwareSubstitutionMapper(RuleAwareIdentityMapper):
@@ -1014,11 +1043,12 @@ class RuleAwareSubstitutionMapper(RuleAwareIdentityMapper):
         self.within = within
 
     def map_variable(self, expr, expn_state):
-        if (expr.name in expn_state.arg_context
-                or not self.within(
-                    expn_state.kernel, expn_state.instruction, expn_state.stack)):
-            return super(RuleAwareSubstitutionMapper, self).map_variable(
-                    expr, expn_state)
+        if expn_state.instruction is not None:
+            if (expr.name in expn_state.arg_context
+                    or not self.within(expn_state.kernel, expn_state.instruction,
+                                       expn_state.stack)):
+                return super(RuleAwareSubstitutionMapper, self).map_variable(
+                        expr, expn_state)
 
         result = self.subst_func(expr)
         if result is not None:
diff --git a/loopy/transform/data.py b/loopy/transform/data.py
index eee23e984..b915dcc62 100644
--- a/loopy/transform/data.py
+++ b/loopy/transform/data.py
@@ -642,8 +642,7 @@ def rename_argument(kernel, old_name, new_name, existing_ok=False):
 
     from loopy.symbolic import (
             RuleAwareSubstitutionMapper,
-            SubstitutionRuleMappingContext,
-            SubstitutionMapper)
+            SubstitutionRuleMappingContext)
     from pymbolic.mapper.substitutor import make_subst_func
     rule_mapping_context = SubstitutionRuleMappingContext(
             kernel.substitutions, var_name_gen)
@@ -655,33 +654,15 @@ def rename_argument(kernel, old_name, new_name, existing_ok=False):
 
     # }}}
 
-    # {{{ args, temporary_variables
-
-    from loopy.kernel.array import ArrayBase
-    subst_mapper = SubstitutionMapper(make_subst_func(subst_dict))
+    # {{{ args
 
     new_args = []
     for arg in kernel.args:
         if arg.name == old_name:
             arg = arg.copy(name=new_name)
-        if isinstance(arg, ArrayBase) and arg.shape:
-            arg = arg.copy(
-                    shape=subst_mapper(arg.shape),
-                    dim_tags=[dim_tag.map_expr(subst_mapper)
-                              for dim_tag in arg.dim_tags])
 
         new_args.append(arg)
 
-    new_tvs = {}
-    for tv_name, tv in kernel.temporary_variables.items():
-        if tv.shape:
-            tv = tv.copy(
-                    shape=subst_mapper(tv.shape),
-                    dim_tags=[dim_tag.map_expr(subst_mapper)
-                              for dim_tag in tv.dim_tags])
-
-        new_tvs[tv_name] = tv
-
     # }}}
 
     # {{{ domain
@@ -697,8 +678,7 @@ def rename_argument(kernel, old_name, new_name, existing_ok=False):
 
     # }}}
 
-    return kernel.copy(domains=new_domains, args=new_args,
-            temporary_variables=new_tvs)
+    return kernel.copy(domains=new_domains, args=new_args)
 
 # }}}
 
-- 
GitLab


From 9f3c7d22fcd23fefcb3770383559177a0bbd76a2 Mon Sep 17 00:00:00 2001
From: Kaushik Kulkarni <kaushikcfd@gmail.com>
Date: Thu, 15 Oct 2020 14:11:59 -0500
Subject: [PATCH 039/140] use ArrayBase.map_exprs

---
 loopy/symbolic.py | 16 +++++-----------
 1 file changed, 5 insertions(+), 11 deletions(-)

diff --git a/loopy/symbolic.py b/loopy/symbolic.py
index a8a8f3402..db06026d4 100644
--- a/loopy/symbolic.py
+++ b/loopy/symbolic.py
@@ -1008,27 +1008,21 @@ class RuleAwareIdentityMapper(IdentityMapper):
 
         from loopy.kernel.array import ArrayBase
         from functools import partial
+
         non_insn_self = partial(self, kernel=kernel, insn=None)
 
         new_args = []
         for arg in kernel.args:
-            if isinstance(arg, ArrayBase) and arg.shape:
-                arg = arg.copy(
-                        shape=non_insn_self(arg.shape),
-                        dim_tags=[dim_tag.map_expr(non_insn_self)
-                                  for dim_tag in arg.dim_tags])
+            if isinstance(arg, ArrayBase):
+                arg = arg.map_exprs(non_insn_self)
 
             new_args.append(arg)
 
         new_tvs = {}
         for tv_name, tv in kernel.temporary_variables.items():
-            if tv.shape:
-                tv = tv.copy(
-                        shape=non_insn_self(tv.shape),
-                        dim_tags=[dim_tag.map_expr(non_insn_self)
-                                  for dim_tag in tv.dim_tags])
+            new_tvs[tv_name] = tv.map_exprs(non_insn_self)
 
-            new_tvs[tv_name] = tv
+        # variables names, domain dim names not expressions => do not map
 
         return kernel.copy(instructions=new_insns, args=new_args,
                            temporary_variables=new_tvs)
-- 
GitLab


From f0e9708b63af3b81e80fd2fdb09e469da38b814e Mon Sep 17 00:00:00 2001
From: Kaushik Kulkarni <kaushikcfd@gmail.com>
Date: Fri, 30 Oct 2020 11:02:07 -0500
Subject: [PATCH 040/140] infer insn predicates while performing check_bounds

---
 loopy/check.py     | 50 ++++++++++++++++++++++++++++++++++------------
 test/test_loopy.py | 12 +++++++++++
 2 files changed, 49 insertions(+), 13 deletions(-)

diff --git a/loopy/check.py b/loopy/check.py
index e66af04d2..a19ed8634 100644
--- a/loopy/check.py
+++ b/loopy/check.py
@@ -375,6 +375,25 @@ def check_for_data_dependent_parallel_bounds(kernel):
 
 # {{{ check access bounds
 
+def _condition_to_set(space, expr):
+    """
+    Returns an instance of :class:`islpy.Set` is *expr* can be expressed as an
+    ISL-set on *space*, if not then returns *None*.
+    """
+    from loopy.symbolic import get_dependencies
+    if get_dependencies(expr) <= frozenset(
+            space.get_var_dict()):
+        try:
+            from loopy.symbolic import isl_set_from_expr
+            return isl_set_from_expr(space, expr)
+        except ExpressionToAffineConversionError:
+            # non-affine condition: can't do much
+            return None
+    else:
+        # data-dependent condition: can't do much
+        return None
+
+
 class _AccessCheckMapper(WalkMapper):
     def __init__(self, kernel, insn_id):
         self.kernel = kernel
@@ -445,19 +464,11 @@ class _AccessCheckMapper(WalkMapper):
                         % (expr, self.insn_id, access_range, shape_domain))
 
     def map_if(self, expr, domain):
-        from loopy.symbolic import get_dependencies
-        if get_dependencies(expr.condition) <= frozenset(
-                domain.space.get_var_dict()):
-            try:
-                from loopy.symbolic import isl_set_from_expr
-                then_set = isl_set_from_expr(domain.space, expr.condition)
-                else_set = then_set.complement()
-            except ExpressionToAffineConversionError:
-                # non-affine condition: can't do much
-                then_set = else_set = isl.BasicSet.universe(domain.space)
-        else:
-            # data-dependent condition: can't do much
+        then_set = _condition_to_set(domain.space, expr.condition)
+        if then_set is None:
             then_set = else_set = isl.BasicSet.universe(domain.space)
+        else:
+            else_set = then_set.complement()
 
         self.rec(expr.then, domain & then_set)
         self.rec(expr.else_, domain & else_set)
@@ -479,8 +490,21 @@ def check_bounds(kernel):
         domain, assumptions = isl.align_two(domain, kernel.assumptions)
         domain_with_assumptions = domain & assumptions
 
+        # {{{ handle insns predicates
+
+        insn_preds_set = isl.BasicSet.universe(domain.space)
+
+        for predicate in insn.predicates:
+            predicate_as_isl_set = _condition_to_set(domain.space, predicate)
+            if predicate_as_isl_set is not None:
+                insn_preds_set = insn_preds_set & predicate_as_isl_set
+
+        # }}}
+
+        domain_with_assumptions_with_pred = domain_with_assumptions & insn_preds_set
+
         def run_acm(expr):
-            acm(expr, domain_with_assumptions)
+            acm(expr, domain_with_assumptions_with_pred)
             return expr
 
         insn.with_transformed_expressions(run_acm)
diff --git a/test/test_loopy.py b/test/test_loopy.py
index 41b5315e8..c31d008b7 100644
--- a/test/test_loopy.py
+++ b/test/test_loopy.py
@@ -2920,6 +2920,18 @@ def test_access_check_with_conditionals():
         lp.generate_code_v2(legal_but_nonaffine_condition_knl)
 
 
+def test_access_check_with_insn_predicates():
+    knl = lp.make_kernel(
+            "{[i]: 0<i<10}",
+            """
+            if i < 4
+              y[i] = 2*x[i]
+            end
+            """, [lp.GlobalArg("x", dtype=float, shape=(4,)), ...])
+
+    print(lp.generate_code_v2(knl).device_code())
+
+
 if __name__ == "__main__":
     if len(sys.argv) > 1:
         exec(sys.argv[1])
-- 
GitLab


From f3f50d07c92796f1bd92a878320b18290afc5fb1 Mon Sep 17 00:00:00 2001
From: Kaushik Kulkarni <kaushikcfd@gmail.com>
Date: Fri, 30 Oct 2020 12:10:10 -0500
Subject: [PATCH 041/140] fixes typo in _condition_to_set docs

---
 loopy/check.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/loopy/check.py b/loopy/check.py
index a19ed8634..e16c43c53 100644
--- a/loopy/check.py
+++ b/loopy/check.py
@@ -377,7 +377,7 @@ def check_for_data_dependent_parallel_bounds(kernel):
 
 def _condition_to_set(space, expr):
     """
-    Returns an instance of :class:`islpy.Set` is *expr* can be expressed as an
+    Returns an instance of :class:`islpy.Set` if *expr* can be expressed as an
     ISL-set on *space*, if not then returns *None*.
     """
     from loopy.symbolic import get_dependencies
-- 
GitLab


From 70f4980b2a16b0b648f9c18a6f4ed2d0f102aefb Mon Sep 17 00:00:00 2001
From: Nick <nicholas.curtis@uconn.edu>
Date: Thu, 13 Sep 2018 12:07:59 -0400
Subject: [PATCH 042/140] more complicated example w/ parameters, previously
 broken w/ space conflict

---
 test/test_loopy.py | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)

diff --git a/test/test_loopy.py b/test/test_loopy.py
index c31d008b7..aa10f6334 100644
--- a/test/test_loopy.py
+++ b/test/test_loopy.py
@@ -2932,6 +2932,26 @@ def test_access_check_with_insn_predicates():
     print(lp.generate_code_v2(knl).device_code())
 
 
+def test_conditional_access_range_with_parameters(ctx_factory):
+    ctx = ctx_factory()
+    queue = cl.CommandQueue(ctx)
+
+    knl = lp.make_kernel(
+            ["{[i]: 0 <= i < 10}",
+             "{[j]: 0 <= j < problem_size}"],
+            """
+            if i < 8 and j < problem_size
+                tmp[j, i] = tmp[j, i] + 1
+            end
+           """,
+            [lp.GlobalArg("tmp", shape=("problem_size", 8,), dtype=np.int64),
+             lp.ValueArg("problem_size", dtype=np.int64)])
+
+    assert np.array_equal(knl(queue, tmp=np.arange(80).reshape((10, 8)),
+                              problem_size=10)[1][0], np.arange(1, 81).reshape(
+                                (10, 8)))
+
+
 if __name__ == "__main__":
     if len(sys.argv) > 1:
         exec(sys.argv[1])
-- 
GitLab


From 228a5ba6b321522a86da607217d3f5a4934ecb6f Mon Sep 17 00:00:00 2001
From: Nick <nicholas.curtis@uconn.edu>
Date: Thu, 13 Sep 2018 13:05:40 -0400
Subject: [PATCH 043/140] Add test where half of logical and predicate is
 data-dependent (and will fail) but other half will succeed to test

---
 test/test_loopy.py | 21 ++++++++++++++++++++-
 1 file changed, 20 insertions(+), 1 deletion(-)

diff --git a/test/test_loopy.py b/test/test_loopy.py
index aa10f6334..e3b4829cc 100644
--- a/test/test_loopy.py
+++ b/test/test_loopy.py
@@ -2938,7 +2938,7 @@ def test_conditional_access_range_with_parameters(ctx_factory):
 
     knl = lp.make_kernel(
             ["{[i]: 0 <= i < 10}",
-             "{[j]: 0 <= j < problem_size}"],
+             "{[j]: 0 <= j < problem_size+2}"],
             """
             if i < 8 and j < problem_size
                 tmp[j, i] = tmp[j, i] + 1
@@ -2951,6 +2951,25 @@ def test_conditional_access_range_with_parameters(ctx_factory):
                               problem_size=10)[1][0], np.arange(1, 81).reshape(
                                 (10, 8)))
 
+    # test a conditional that's only _half_ data-dependent to ensure the other
+    # half works
+    knl = lp.make_kernel(
+            ["{[i]: 0 <= i < 10}",
+             "{[j]: 0 <= j < problem_size}"],
+            """
+            if i < 8 and (j + offset) < problem_size
+                tmp[j, i] = tmp[j, i] + 1
+            end
+           """,
+            [lp.GlobalArg("tmp", shape=("problem_size", 8,), dtype=np.int64),
+             lp.ValueArg("problem_size", dtype=np.int64),
+             lp.ValueArg("offset", dtype=np.int64)])
+
+    assert np.array_equal(knl(queue, tmp=np.arange(80).reshape((10, 8)),
+                              problem_size=10,
+                              offset=0)[1][0], np.arange(1, 81).reshape(
+                                (10, 8)))
+
 
 if __name__ == "__main__":
     if len(sys.argv) > 1:
-- 
GitLab


From 5f7595ac877001d010f0d7c5aeb2716e39e060a7 Mon Sep 17 00:00:00 2001
From: Kaushik Kulkarni <kaushikcfd@gmail.com>
Date: Fri, 30 Oct 2020 17:30:56 -0500
Subject: [PATCH 044/140] add insn's dependency value args to the domain's
 space

---
 loopy/check.py | 20 +++++++++++++++++++-
 1 file changed, 19 insertions(+), 1 deletion(-)

diff --git a/loopy/check.py b/loopy/check.py
index e16c43c53..bb3080945 100644
--- a/loopy/check.py
+++ b/loopy/check.py
@@ -481,11 +481,29 @@ def check_bounds(kernel):
     temp_var_names = set(kernel.temporary_variables)
     for insn in kernel.instructions:
         domain = kernel.get_inames_domain(kernel.insn_inames(insn))
+        domain_param_names = set(domain.get_var_names(dim_type.param))
 
         # data-dependent bounds? can't do much
-        if set(domain.get_var_names(dim_type.param)) & temp_var_names:
+        if domain_param_names & temp_var_names:
             continue
 
+        # {{{ add read-only ValueArgs to domain
+
+        from loopy.kernel.data import ValueArg
+
+        valueargs_to_add = ({arg.name for arg in kernel.args
+                             if isinstance(arg, ValueArg)
+                             and arg.name not in kernel.get_written_variables()}
+                            - domain_param_names) & insn.read_dependency_names()
+
+        while valueargs_to_add:
+            arg_to_add = valueargs_to_add.pop()
+            idim = domain.dim(isl.dim_type.param)
+            domain = domain.add_dims(isl.dim_type.param, 1)
+            domain = domain.set_dim_name(isl.dim_type.param, idim, arg_to_add)
+
+        # }}}
+
         acm = _AccessCheckMapper(kernel, insn.id)
         domain, assumptions = isl.align_two(domain, kernel.assumptions)
         domain_with_assumptions = domain & assumptions
-- 
GitLab


From ceeab80e631033cad3a12d4fea94630c6b906cf5 Mon Sep 17 00:00:00 2001
From: Kaushik Kulkarni <kaushikcfd@gmail.com>
Date: Sat, 31 Oct 2020 00:23:12 -0500
Subject: [PATCH 045/140] use comprehensions instead of loops

---
 loopy/symbolic.py | 15 ++++++---------
 1 file changed, 6 insertions(+), 9 deletions(-)

diff --git a/loopy/symbolic.py b/loopy/symbolic.py
index db06026d4..3fd3fcf06 100644
--- a/loopy/symbolic.py
+++ b/loopy/symbolic.py
@@ -1011,16 +1011,13 @@ class RuleAwareIdentityMapper(IdentityMapper):
 
         non_insn_self = partial(self, kernel=kernel, insn=None)
 
-        new_args = []
-        for arg in kernel.args:
-            if isinstance(arg, ArrayBase):
-                arg = arg.map_exprs(non_insn_self)
+        new_args = [
+                arg.map_exprs(non_insn_self) if isinstance(arg, ArrayBase) else arg
+                for arg in kernel.args]
 
-            new_args.append(arg)
-
-        new_tvs = {}
-        for tv_name, tv in kernel.temporary_variables.items():
-            new_tvs[tv_name] = tv.map_exprs(non_insn_self)
+        new_tvs = {
+                tv_name: tv.map_exprs(non_insn_self)
+                for tv_name, tv in kernel.temporary_variables.items()}
 
         # variables names, domain dim names not expressions => do not map
 
-- 
GitLab


From 0b8589a23b38ff369483f30e2d5992835f56dc9c Mon Sep 17 00:00:00 2001
From: Kaushik Kulkarni <kaushikcfd@gmail.com>
Date: Sat, 31 Oct 2020 00:24:23 -0500
Subject: [PATCH 046/140] test_rename_arguments: actually test renamed argument
 not present in the gen code

---
 test/test_transform.py | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/test/test_transform.py b/test/test_transform.py
index aa9572de9..ccaaebc19 100644
--- a/test/test_transform.py
+++ b/test/test_transform.py
@@ -670,7 +670,7 @@ def test_add_inames_for_unused_hw_axes(ctx_factory):
             parameters={"n": n})
 
 
-def test_rename_argument_of_domain_params():
+def test_rename_argument_of_domain_params(ctx_factory):
     knl = lp.make_kernel(
             "{[i, j]: 0<=i<n and 0<=j<m}",
             """
@@ -680,7 +680,14 @@ def test_rename_argument_of_domain_params():
     knl = lp.rename_argument(knl, "n", "N")
     knl = lp.rename_argument(knl, "m", "M")
 
-    print(lp.generate_code_v2(knl).device_code())
+    # renamed variables should not appear in the code
+    code_str = lp.generate_code_v2(knl).device_code()
+    assert code_str.find("int const n") == -1
+    assert code_str.find("int const m") == -1
+    assert code_str.find("int const N") != -1
+    assert code_str.find("int const M") != -1
+
+    lp.auto_test_vs_ref(knl, ctx_factory(), knl, parameters={"M": 10, "N": 4})
 
 
 if __name__ == "__main__":
-- 
GitLab


From 5737a4cc6da6ff556e125fcfc68c4f08e538889d Mon Sep 17 00:00:00 2001
From: Kaushik Kulkarni <kaushikcfd@gmail.com>
Date: Sat, 31 Oct 2020 00:44:43 -0500
Subject: [PATCH 047/140] explain a code branch

---
 loopy/symbolic.py | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/loopy/symbolic.py b/loopy/symbolic.py
index 3fd3fcf06..e03e27a48 100644
--- a/loopy/symbolic.py
+++ b/loopy/symbolic.py
@@ -1033,12 +1033,15 @@ class RuleAwareSubstitutionMapper(RuleAwareIdentityMapper):
         self.within = within
 
     def map_variable(self, expr, expn_state):
-        if expn_state.instruction is not None:
-            if (expr.name in expn_state.arg_context
-                    or not self.within(expn_state.kernel, expn_state.instruction,
-                                       expn_state.stack)):
-                return super().map_variable(
-                        expr, expn_state)
+        if expn_state.instruction is None:
+            # expr not a part of instruction => mimic SubstitutionMapper
+            return SubstitutionMapper.map_variable(self, expr)
+
+        if (expr.name in expn_state.arg_context
+                or not self.within(expn_state.kernel, expn_state.instruction,
+                                   expn_state.stack)):
+            return super().map_variable(
+                    expr, expn_state)
 
         result = self.subst_func(expr)
         if result is not None:
-- 
GitLab


From a5cc253720e421b70e8dfb04e02a0e2a9d89fffd Mon Sep 17 00:00:00 2001
From: Kaushik Kulkarni <kaushikcfd@gmail.com>
Date: Sat, 31 Oct 2020 00:47:44 -0500
Subject: [PATCH 048/140] get rid of spurious diff

---
 loopy/symbolic.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/loopy/symbolic.py b/loopy/symbolic.py
index e03e27a48..6a6f55196 100644
--- a/loopy/symbolic.py
+++ b/loopy/symbolic.py
@@ -1038,8 +1038,8 @@ class RuleAwareSubstitutionMapper(RuleAwareIdentityMapper):
             return SubstitutionMapper.map_variable(self, expr)
 
         if (expr.name in expn_state.arg_context
-                or not self.within(expn_state.kernel, expn_state.instruction,
-                                   expn_state.stack)):
+                or not self.within(
+                    expn_state.kernel, expn_state.instruction, expn_state.stack)):
             return super().map_variable(
                     expr, expn_state)
 
-- 
GitLab


From 14aba3bf1bd09763541df0db4fa3460202b02f2b Mon Sep 17 00:00:00 2001
From: Kaushik Kulkarni <kaushikcfd@gmail.com>
Date: Sat, 31 Oct 2020 16:23:14 -0500
Subject: [PATCH 049/140] RuleAwareSubstitionMapper is not a SubstitutionMapper
 for within=False => implement the argument expression mappings in an adhoc
 manner

---
 loopy/symbolic.py       | 27 ++-------------------------
 loopy/transform/data.py | 21 +++++++++++++++++++--
 2 files changed, 21 insertions(+), 27 deletions(-)

diff --git a/loopy/symbolic.py b/loopy/symbolic.py
index 6a6f55196..dfacb4438 100644
--- a/loopy/symbolic.py
+++ b/loopy/symbolic.py
@@ -979,10 +979,6 @@ class RuleAwareIdentityMapper(IdentityMapper):
             return sym
 
     def __call__(self, expr, kernel, insn):
-        """
-        :arg insn: A :class:`~loopy.kernel.InstructionBase` of which *expr* is
-            a part of, or *None* if *expr*'s source is not an instruction.
-        """
         from loopy.kernel.data import InstructionBase
         assert insn is None or isinstance(insn, InstructionBase)
 
@@ -1006,23 +1002,7 @@ class RuleAwareIdentityMapper(IdentityMapper):
                         lambda expr: self(expr, kernel, insn)))
                 for insn in kernel.instructions]
 
-        from loopy.kernel.array import ArrayBase
-        from functools import partial
-
-        non_insn_self = partial(self, kernel=kernel, insn=None)
-
-        new_args = [
-                arg.map_exprs(non_insn_self) if isinstance(arg, ArrayBase) else arg
-                for arg in kernel.args]
-
-        new_tvs = {
-                tv_name: tv.map_exprs(non_insn_self)
-                for tv_name, tv in kernel.temporary_variables.items()}
-
-        # variables names, domain dim names not expressions => do not map
-
-        return kernel.copy(instructions=new_insns, args=new_args,
-                           temporary_variables=new_tvs)
+        return kernel.copy(instructions=new_insns)
 
 
 class RuleAwareSubstitutionMapper(RuleAwareIdentityMapper):
@@ -1033,13 +1013,10 @@ class RuleAwareSubstitutionMapper(RuleAwareIdentityMapper):
         self.within = within
 
     def map_variable(self, expr, expn_state):
-        if expn_state.instruction is None:
-            # expr not a part of instruction => mimic SubstitutionMapper
-            return SubstitutionMapper.map_variable(self, expr)
-
         if (expr.name in expn_state.arg_context
                 or not self.within(
                     expn_state.kernel, expn_state.instruction, expn_state.stack)):
+            # expr not in within => do nothing (call IdentityMapper)
             return super().map_variable(
                     expr, expn_state)
 
diff --git a/loopy/transform/data.py b/loopy/transform/data.py
index 82d770808..9c4725c0d 100644
--- a/loopy/transform/data.py
+++ b/loopy/transform/data.py
@@ -637,6 +637,7 @@ def rename_argument(kernel, old_name, new_name, existing_ok=False):
     subst_dict = {old_name: var(new_name)}
 
     from loopy.symbolic import (
+            SubstitutionMapper,
             RuleAwareSubstitutionMapper,
             SubstitutionRuleMappingContext)
     from pymbolic.mapper.substitutor import make_subst_func
@@ -646,21 +647,36 @@ def rename_argument(kernel, old_name, new_name, existing_ok=False):
                     make_subst_func(subst_dict),
                     within=lambda kernel, insn, stack: True)
 
-    kernel = smap.map_kernel(kernel)
+    kernel = rule_mapping_context.finish_kernel(smap.map_kernel(kernel))
 
     # }}}
 
+    subst_mapper = SubstitutionMapper(make_subst_func(subst_dict))
+
     # {{{ args
 
+    from loopy.kernel.array import ArrayBase
+
     new_args = []
     for arg in kernel.args:
         if arg.name == old_name:
             arg = arg.copy(name=new_name)
 
+        if isinstance(arg, ArrayBase):
+            arg = arg.map_exprs(subst_mapper)
+
         new_args.append(arg)
 
     # }}}
 
+    # {{{ tvs
+
+    new_tvs = {
+            tv_name: tv.map_exprs(subst_mapper)
+            for tv_name, tv in kernel.temporary_variables.items()}
+
+    # }}}
+
     # {{{ domain
 
     new_domains = []
@@ -674,7 +690,8 @@ def rename_argument(kernel, old_name, new_name, existing_ok=False):
 
     # }}}
 
-    return kernel.copy(domains=new_domains, args=new_args)
+    return kernel.copy(domains=new_domains, args=new_args,
+            temporary_variables=new_tvs)
 
 # }}}
 
-- 
GitLab


From 9fb45d266e1484262cb29b1e168031ca1ba738d9 Mon Sep 17 00:00:00 2001
From: Kaushik Kulkarni <kaushikcfd@gmail.com>
Date: Tue, 3 Nov 2020 02:00:12 -0600
Subject: [PATCH 050/140] move _condition_to_set to loopy.symbolic

---
 loopy/check.py    | 28 +++++-----------------------
 loopy/symbolic.py | 19 +++++++++++++++++++
 2 files changed, 24 insertions(+), 23 deletions(-)

diff --git a/loopy/check.py b/loopy/check.py
index bb3080945..24d8dba93 100644
--- a/loopy/check.py
+++ b/loopy/check.py
@@ -24,8 +24,7 @@ THE SOFTWARE.
 from islpy import dim_type
 import islpy as isl
 from loopy.symbolic import WalkMapper
-from loopy.diagnostic import (LoopyError, WriteRaceConditionWarning,
-        warn_with_kernel, ExpressionToAffineConversionError)
+from loopy.diagnostic import LoopyError, WriteRaceConditionWarning, warn_with_kernel
 from loopy.type_inference import TypeInferenceMapper
 from loopy.kernel.instruction import (MultiAssignmentBase, CallInstruction,
         CInstruction, _DataObliviousInstruction)
@@ -375,25 +374,6 @@ def check_for_data_dependent_parallel_bounds(kernel):
 
 # {{{ check access bounds
 
-def _condition_to_set(space, expr):
-    """
-    Returns an instance of :class:`islpy.Set` if *expr* can be expressed as an
-    ISL-set on *space*, if not then returns *None*.
-    """
-    from loopy.symbolic import get_dependencies
-    if get_dependencies(expr) <= frozenset(
-            space.get_var_dict()):
-        try:
-            from loopy.symbolic import isl_set_from_expr
-            return isl_set_from_expr(space, expr)
-        except ExpressionToAffineConversionError:
-            # non-affine condition: can't do much
-            return None
-    else:
-        # data-dependent condition: can't do much
-        return None
-
-
 class _AccessCheckMapper(WalkMapper):
     def __init__(self, kernel, insn_id):
         self.kernel = kernel
@@ -464,7 +444,8 @@ class _AccessCheckMapper(WalkMapper):
                         % (expr, self.insn_id, access_range, shape_domain))
 
     def map_if(self, expr, domain):
-        then_set = _condition_to_set(domain.space, expr.condition)
+        from loopy.symbolic import condition_to_set
+        then_set = condition_to_set(domain.space, expr.condition)
         if then_set is None:
             then_set = else_set = isl.BasicSet.universe(domain.space)
         else:
@@ -513,7 +494,8 @@ def check_bounds(kernel):
         insn_preds_set = isl.BasicSet.universe(domain.space)
 
         for predicate in insn.predicates:
-            predicate_as_isl_set = _condition_to_set(domain.space, predicate)
+            from loopy.symbolic import condition_to_set
+            predicate_as_isl_set = condition_to_set(domain.space, predicate)
             if predicate_as_isl_set is not None:
                 insn_preds_set = insn_preds_set & predicate_as_isl_set
 
diff --git a/loopy/symbolic.py b/loopy/symbolic.py
index 7e5de3164..6d428d606 100644
--- a/loopy/symbolic.py
+++ b/loopy/symbolic.py
@@ -1679,6 +1679,25 @@ def isl_set_from_expr(space, expr):
 
     return set_
 
+
+def condition_to_set(space, expr):
+    """
+    Returns an instance of :class:`islpy.Set` if *expr* can be expressed as an
+    ISL-set on *space*, if not then returns *None*.
+    """
+    from loopy.symbolic import get_dependencies
+    if get_dependencies(expr) <= frozenset(
+            space.get_var_dict()):
+        try:
+            from loopy.symbolic import isl_set_from_expr
+            return isl_set_from_expr(space, expr)
+        except ExpressionToAffineConversionError:
+            # non-affine condition: can't do much
+            return None
+    else:
+        # data-dependent condition: can't do much
+        return None
+
 # }}}
 
 
-- 
GitLab


From 74211876ac6a6424372464dab90f6ff331d87669 Mon Sep 17 00:00:00 2001
From: Kaushik Kulkarni <kaushikcfd@gmail.com>
Date: Tue, 3 Nov 2020 02:01:34 -0600
Subject: [PATCH 051/140] docs: justifies what we do when condition not
 expressible as ISL set

---
 loopy/check.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/loopy/check.py b/loopy/check.py
index 24d8dba93..a8bb7612c 100644
--- a/loopy/check.py
+++ b/loopy/check.py
@@ -447,6 +447,8 @@ class _AccessCheckMapper(WalkMapper):
         from loopy.symbolic import condition_to_set
         then_set = condition_to_set(domain.space, expr.condition)
         if then_set is None:
+            # condition cannot be inferred as ISL expression => ignore
+            # for domain contributions enforced by it
             then_set = else_set = isl.BasicSet.universe(domain.space)
         else:
             else_set = then_set.complement()
-- 
GitLab


From 82e3d6d827e286ced59d440024d0f653a4b36931 Mon Sep 17 00:00:00 2001
From: Kaushik Kulkarni <kaushikcfd@gmail.com>
Date: Tue, 3 Nov 2020 02:02:53 -0600
Subject: [PATCH 052/140] formatting: while -> for

---
 loopy/check.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/loopy/check.py b/loopy/check.py
index a8bb7612c..221914657 100644
--- a/loopy/check.py
+++ b/loopy/check.py
@@ -479,8 +479,7 @@ def check_bounds(kernel):
                              and arg.name not in kernel.get_written_variables()}
                             - domain_param_names) & insn.read_dependency_names()
 
-        while valueargs_to_add:
-            arg_to_add = valueargs_to_add.pop()
+        for arg_to_add in valueargs_to_add:
             idim = domain.dim(isl.dim_type.param)
             domain = domain.add_dims(isl.dim_type.param, 1)
             domain = domain.set_dim_name(isl.dim_type.param, idim, arg_to_add)
-- 
GitLab


From b7f397d5526fc36c3d446cfabece2997b4150961 Mon Sep 17 00:00:00 2001
From: Kaushik Kulkarni <kaushikcfd@gmail.com>
Date: Tue, 3 Nov 2020 02:44:01 -0600
Subject: [PATCH 053/140] re-organize check_bounds and move parts of code to
 InstructionBase.get_domain

---
 loopy/check.py              | 37 +++--------------------------
 loopy/kernel/instruction.py | 46 +++++++++++++++++++++++++++++++++++++
 2 files changed, 49 insertions(+), 34 deletions(-)

diff --git a/loopy/check.py b/loopy/check.py
index 221914657..e8a5f9dca 100644
--- a/loopy/check.py
+++ b/loopy/check.py
@@ -463,49 +463,18 @@ def check_bounds(kernel):
     """
     temp_var_names = set(kernel.temporary_variables)
     for insn in kernel.instructions:
-        domain = kernel.get_inames_domain(kernel.insn_inames(insn))
-        domain_param_names = set(domain.get_var_names(dim_type.param))
+        domain = insn.get_domain(kernel)
 
         # data-dependent bounds? can't do much
-        if domain_param_names & temp_var_names:
+        if set(domain.get_var_names(dim_type.param)) & temp_var_names:
             continue
 
-        # {{{ add read-only ValueArgs to domain
-
-        from loopy.kernel.data import ValueArg
-
-        valueargs_to_add = ({arg.name for arg in kernel.args
-                             if isinstance(arg, ValueArg)
-                             and arg.name not in kernel.get_written_variables()}
-                            - domain_param_names) & insn.read_dependency_names()
-
-        for arg_to_add in valueargs_to_add:
-            idim = domain.dim(isl.dim_type.param)
-            domain = domain.add_dims(isl.dim_type.param, 1)
-            domain = domain.set_dim_name(isl.dim_type.param, idim, arg_to_add)
-
-        # }}}
-
         acm = _AccessCheckMapper(kernel, insn.id)
         domain, assumptions = isl.align_two(domain, kernel.assumptions)
         domain_with_assumptions = domain & assumptions
 
-        # {{{ handle insns predicates
-
-        insn_preds_set = isl.BasicSet.universe(domain.space)
-
-        for predicate in insn.predicates:
-            from loopy.symbolic import condition_to_set
-            predicate_as_isl_set = condition_to_set(domain.space, predicate)
-            if predicate_as_isl_set is not None:
-                insn_preds_set = insn_preds_set & predicate_as_isl_set
-
-        # }}}
-
-        domain_with_assumptions_with_pred = domain_with_assumptions & insn_preds_set
-
         def run_acm(expr):
-            acm(expr, domain_with_assumptions_with_pred)
+            acm(expr, domain_with_assumptions)
             return expr
 
         insn.with_transformed_expressions(run_acm)
diff --git a/loopy/kernel/instruction.py b/loopy/kernel/instruction.py
index 791ea89a6..8471d39f0 100644
--- a/loopy/kernel/instruction.py
+++ b/loopy/kernel/instruction.py
@@ -25,6 +25,7 @@ from pytools import ImmutableRecord, memoize_method
 from loopy.diagnostic import LoopyError
 from loopy.tools import Optional
 from warnings import warn
+import islpy as isl
 
 
 # {{{ instructions: base class
@@ -146,6 +147,7 @@ class InstructionBase(ImmutableRecord):
     .. automethod:: with_transformed_expressions
     .. automethod:: write_dependency_names
     .. automethod:: dependency_names
+    .. automethod:: get_domain
     .. automethod:: copy
     """
 
@@ -409,6 +411,50 @@ class InstructionBase(ImmutableRecord):
         self.within_inames = (
                 intern_frozenset_of_ids(self.within_inames))
 
+    def get_domain(self, kernel):
+        """
+        Returns an instance of :class:`islpy.Set` for the instruction's domain.
+
+        .. note::
+
+            Does not take into account additional hints available through
+            :attr:`loopy.LoopKernel.assumptions`.
+        """
+        domain = kernel.get_inames_domain(self.within_inames)
+
+        # {{{ add read-only ValueArgs to domain
+
+        from loopy.kernel.data import ValueArg
+
+        valueargs_to_add = ({arg.name for arg in kernel.args
+                             if isinstance(arg, ValueArg)
+                             and arg.name not in kernel.get_written_variables()}
+                            - set(domain.get_var_names(isl.dim_type.param)))
+
+        # only consider valueargs relevant to *self*
+        valueargs_to_add = valueargs_to_add & self.read_dependency_names()
+
+        for arg_to_add in valueargs_to_add:
+            idim = domain.dim(isl.dim_type.param)
+            domain = domain.add_dims(isl.dim_type.param, 1)
+            domain = domain.set_dim_name(isl.dim_type.param, idim, arg_to_add)
+
+        # }}}
+
+        # {{{ enforce restriction from predicates
+
+        insn_preds_set = isl.BasicSet.universe(domain.space)
+
+        for predicate in self.predicates:
+            from loopy.symbolic import condition_to_set
+            predicate_as_isl_set = condition_to_set(domain.space, predicate)
+            if predicate_as_isl_set is not None:
+                insn_preds_set = insn_preds_set & predicate_as_isl_set
+
+        # }}}
+
+        return domain & insn_preds_set
+
 # }}}
 
 
-- 
GitLab


From 430d54246e0b54a39b49305dee85584aa56626ab Mon Sep 17 00:00:00 2001
From: Andreas Kloeckner <inform@tiker.net>
Date: Wed, 4 Nov 2020 11:32:40 -0600
Subject: [PATCH 054/140] simplify_using_aff: Restrict usage of inames to those
 that are already there

---
 loopy/symbolic.py | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/loopy/symbolic.py b/loopy/symbolic.py
index 7e5de3164..cda89aa70 100644
--- a/loopy/symbolic.py
+++ b/loopy/symbolic.py
@@ -1525,7 +1525,13 @@ def qpolynomial_from_expr(space, expr):
 def simplify_using_aff(kernel, expr):
     inames = get_dependencies(expr) & kernel.all_inames()
 
-    domain = kernel.get_inames_domain(inames)
+    # FIXME: Ideally, we should find out what inames are usable and allow
+    # the simplification to use all of those. For now, fall back to making
+    # sure that the sipmlification only uses inames that were already there.
+    domain = (
+            kernel
+            .get_inames_domain(inames)
+            .project_out_except(inames, [dim_type.set]))
 
     try:
         aff = guarded_aff_from_expr(domain.space, expr)
-- 
GitLab


From c8212b6a3c38bd6d94f54e1abd76e5d2054a75ce Mon Sep 17 00:00:00 2001
From: Andreas Kloeckner <inform@tiker.net>
Date: Wed, 4 Nov 2020 13:39:53 -0600
Subject: [PATCH 055/140] Factor set processing in split_iname into separate
 function, rename variable split_iname -> iname_to_split

---
 loopy/transform/iname.py | 138 +++++++++++++++++++++------------------
 1 file changed, 74 insertions(+), 64 deletions(-)

diff --git a/loopy/transform/iname.py b/loopy/transform/iname.py
index 241c1492d..4c47abc39 100644
--- a/loopy/transform/iname.py
+++ b/loopy/transform/iname.py
@@ -118,25 +118,25 @@ def prioritize_loops(kernel, loop_priority):
 
 class _InameSplitter(RuleAwareIdentityMapper):
     def __init__(self, rule_mapping_context, within,
-            split_iname, outer_iname, inner_iname, replacement_index):
+            iname_to_split, outer_iname, inner_iname, replacement_index):
         super().__init__(rule_mapping_context)
 
         self.within = within
 
-        self.split_iname = split_iname
+        self.iname_to_split = iname_to_split
         self.outer_iname = outer_iname
         self.inner_iname = inner_iname
 
         self.replacement_index = replacement_index
 
     def map_reduction(self, expr, expn_state):
-        if (self.split_iname in expr.inames
-                and self.split_iname not in expn_state.arg_context
+        if (self.iname_to_split in expr.inames
+                and self.iname_to_split not in expn_state.arg_context
                 and self.within(
                     expn_state.kernel,
                     expn_state.instruction)):
             new_inames = list(expr.inames)
-            new_inames.remove(self.split_iname)
+            new_inames.remove(self.iname_to_split)
             new_inames.extend([self.outer_iname, self.inner_iname])
 
             from loopy.symbolic import Reduction
@@ -147,8 +147,8 @@ class _InameSplitter(RuleAwareIdentityMapper):
             return super().map_reduction(expr, expn_state)
 
     def map_variable(self, expr, expn_state):
-        if (expr.name == self.split_iname
-                and self.split_iname not in expn_state.arg_context
+        if (expr.name == self.iname_to_split
+                and self.iname_to_split not in expn_state.arg_context
                 and self.within(
                     expn_state.kernel,
                     expn_state.instruction)):
@@ -157,7 +157,49 @@ class _InameSplitter(RuleAwareIdentityMapper):
             return super().map_variable(expr, expn_state)
 
 
-def _split_iname_backend(kernel, split_iname,
+def _split_iname_in_set(s, iname_to_split, inner_iname, outer_iname, fixed_length,
+        fixed_length_is_inner, split_iname_should_remain):
+    var_dict = s.get_var_dict()
+
+    if iname_to_split not in var_dict:
+        return s
+
+    orig_dim_type, _ = var_dict[iname_to_split]
+
+    outer_var_nr = s.dim(orig_dim_type)
+    inner_var_nr = s.dim(orig_dim_type)+1
+
+    s = s.add_dims(orig_dim_type, 2)
+    s = s.set_dim_name(orig_dim_type, outer_var_nr, outer_iname)
+    s = s.set_dim_name(orig_dim_type, inner_var_nr, inner_iname)
+
+    from loopy.isl_helpers import make_slab
+
+    if fixed_length_is_inner:
+        fixed_iname, var_length_iname = inner_iname, outer_iname
+    else:
+        fixed_iname, var_length_iname = outer_iname, inner_iname
+
+    space = s.get_space()
+    fixed_constraint_set = (
+            make_slab(space, fixed_iname, 0, fixed_length)
+            # name = fixed_iname + fixed_length*var_length_iname
+            .add_constraint(isl.Constraint.eq_from_names(
+                space, {
+                    iname_to_split: 1,
+                    fixed_iname: -1,
+                    var_length_iname: -fixed_length})))
+
+    name_dim_type, name_idx = space.get_var_dict()[iname_to_split]
+    s = s.intersect(fixed_constraint_set)
+
+    if split_iname_should_remain:
+        return s
+    else:
+        return s.project_out(name_dim_type, name_idx, 1)
+
+
+def _split_iname_backend(kernel, iname_to_split,
         fixed_length, fixed_length_is_inner,
         make_new_loop_index,
         outer_iname=None, inner_iname=None,
@@ -186,88 +228,55 @@ def _split_iname_backend(kernel, split_iname,
 
     # }}}
 
-    existing_tags = kernel.iname_tags(split_iname)
+    existing_tags = kernel.iname_tags(iname_to_split)
     from loopy.kernel.data import ForceSequentialTag, filter_iname_tags_by_type
     if (do_tagged_check and existing_tags
             and not filter_iname_tags_by_type(existing_tags, ForceSequentialTag)):
-        raise LoopyError("cannot split already tagged iname '%s'" % split_iname)
+        raise LoopyError(f"cannot split already tagged iname '{iname_to_split}'")
 
-    if split_iname not in kernel.all_inames():
-        raise ValueError("cannot split loop for unknown variable '%s'" % split_iname)
+    if iname_to_split not in kernel.all_inames():
+        raise ValueError(
+                f"cannot split loop for unknown variable '{iname_to_split}'")
 
     applied_iname_rewrites = kernel.applied_iname_rewrites[:]
 
     vng = kernel.get_var_name_generator()
 
     if outer_iname is None:
-        outer_iname = vng(split_iname+"_outer")
+        outer_iname = vng(iname_to_split+"_outer")
     if inner_iname is None:
-        inner_iname = vng(split_iname+"_inner")
-
-    def process_set(s):
-        var_dict = s.get_var_dict()
-
-        if split_iname not in var_dict:
-            return s
-
-        orig_dim_type, _ = var_dict[split_iname]
-
-        outer_var_nr = s.dim(orig_dim_type)
-        inner_var_nr = s.dim(orig_dim_type)+1
-
-        s = s.add_dims(orig_dim_type, 2)
-        s = s.set_dim_name(orig_dim_type, outer_var_nr, outer_iname)
-        s = s.set_dim_name(orig_dim_type, inner_var_nr, inner_iname)
-
-        from loopy.isl_helpers import make_slab
-
-        if fixed_length_is_inner:
-            fixed_iname, var_length_iname = inner_iname, outer_iname
-        else:
-            fixed_iname, var_length_iname = outer_iname, inner_iname
-
-        space = s.get_space()
-        fixed_constraint_set = (
-                make_slab(space, fixed_iname, 0, fixed_length)
-                # name = fixed_iname + fixed_length*var_length_iname
-                .add_constraint(isl.Constraint.eq_from_names(
-                    space, {
-                        split_iname: 1,
-                        fixed_iname: -1,
-                        var_length_iname: -fixed_length})))
-
-        name_dim_type, name_idx = space.get_var_dict()[split_iname]
-        s = s.intersect(fixed_constraint_set)
+        inner_iname = vng(iname_to_split+"_inner")
 
-        def _project_out_only_if_all_instructions_in_within():
-            for insn in kernel.instructions:
-                if split_iname in insn.within_inames and (
-                        not within(kernel, insn)):
-                    return s
-
-            return s.project_out(name_dim_type, name_idx, 1)
-
-        return _project_out_only_if_all_instructions_in_within()
+    all_insns_using_iname_in_within = all(
+            # "does not use iname or is targeted by the within"
+            # <=>
+            # "'uses iname' implies within"
+            iname_to_split not in insn.within_inames or within(kernel, insn)
+            for insn in kernel.instructions)
 
-    new_domains = [process_set(dom) for dom in kernel.domains]
+    new_domains = [
+            _split_iname_in_set(dom, iname_to_split, inner_iname, outer_iname,
+                fixed_length, fixed_length_is_inner,
+                split_iname_should_remain=not all_insns_using_iname_in_within)
+            for dom in kernel.domains]
 
     from pymbolic import var
     inner = var(inner_iname)
     outer = var(outer_iname)
     new_loop_index = make_new_loop_index(inner, outer)
 
-    subst_map = {var(split_iname): new_loop_index}
+    subst_map = {var(iname_to_split): new_loop_index}
     applied_iname_rewrites.append(subst_map)
 
     # {{{ update within_inames
 
     new_insns = []
     for insn in kernel.instructions:
-        if split_iname in insn.within_inames and (
+        if iname_to_split in insn.within_inames and (
                 within(kernel, insn)):
             new_within_inames = (
                     (insn.within_inames.copy()
-                    - frozenset([split_iname]))
+                    - frozenset([iname_to_split]))
                     | frozenset([outer_iname, inner_iname]))
         else:
             new_within_inames = insn.within_inames
@@ -286,7 +295,7 @@ def _split_iname_backend(kernel, split_iname,
     for prio in kernel.loop_priority:
         new_prio = ()
         for prio_iname in prio:
-            if prio_iname == split_iname:
+            if prio_iname == iname_to_split:
                 new_prio = new_prio + (outer_iname, inner_iname)
             else:
                 new_prio = new_prio + (prio_iname,)
@@ -302,7 +311,7 @@ def _split_iname_backend(kernel, split_iname,
     rule_mapping_context = SubstitutionRuleMappingContext(
             kernel.substitutions, kernel.get_var_name_generator())
     ins = _InameSplitter(rule_mapping_context, within,
-            split_iname, outer_iname, inner_iname, new_loop_index)
+            iname_to_split, outer_iname, inner_iname, new_loop_index)
 
     kernel = ins.map_kernel(kernel)
     kernel = rule_mapping_context.finish_kernel(kernel)
@@ -319,6 +328,7 @@ def _split_iname_backend(kernel, split_iname,
 # {{{ split iname
 
 def split_iname(kernel, split_iname, inner_length,
+        *,
         outer_iname=None, inner_iname=None,
         outer_tag=None, inner_tag=None,
         slabs=(0, 0), do_tagged_check=True,
-- 
GitLab


From 0a18085ff2016fe4e71edc68f3f6bf54041d68bb Mon Sep 17 00:00:00 2001
From: Andreas Kloeckner <inform@tiker.net>
Date: Wed, 4 Nov 2020 14:01:34 -0600
Subject: [PATCH 056/140] Fix split_iname logic to use temporary duplicate
 iname for split, indepependent of original

---
 loopy/transform/iname.py | 31 ++++++++++++++++++++++---------
 1 file changed, 22 insertions(+), 9 deletions(-)

diff --git a/loopy/transform/iname.py b/loopy/transform/iname.py
index 4c47abc39..f52969669 100644
--- a/loopy/transform/iname.py
+++ b/loopy/transform/iname.py
@@ -165,13 +165,25 @@ def _split_iname_in_set(s, iname_to_split, inner_iname, outer_iname, fixed_lengt
         return s
 
     orig_dim_type, _ = var_dict[iname_to_split]
+    assert orig_dim_type == dim_type.set
+    del orig_dim_type
 
-    outer_var_nr = s.dim(orig_dim_type)
-    inner_var_nr = s.dim(orig_dim_type)+1
+    # NB: dup_iname_to_split is not a globally valid identifier: only uniqure
+    # wrt the set s.
+    from pytools import generate_unique_names
+    for dup_iname_to_split in generate_unique_names(f"dup_{iname_to_split}"):
+        if dup_iname_to_split not in var_dict:
+            break
 
-    s = s.add_dims(orig_dim_type, 2)
-    s = s.set_dim_name(orig_dim_type, outer_var_nr, outer_iname)
-    s = s.set_dim_name(orig_dim_type, inner_var_nr, inner_iname)
+    from loopy.isl_helpers import duplicate_axes
+    s = duplicate_axes(s, (iname_to_split,), (dup_iname_to_split,))
+
+    outer_var_nr = s.dim(dim_type.set)
+    inner_var_nr = s.dim(dim_type.set)+1
+
+    s = s.add_dims(dim_type.set, 2)
+    s = s.set_dim_name(dim_type.set, outer_var_nr, outer_iname)
+    s = s.set_dim_name(dim_type.set, inner_var_nr, inner_iname)
 
     from loopy.isl_helpers import make_slab
 
@@ -181,21 +193,22 @@ def _split_iname_in_set(s, iname_to_split, inner_iname, outer_iname, fixed_lengt
         fixed_iname, var_length_iname = outer_iname, inner_iname
 
     space = s.get_space()
-    fixed_constraint_set = (
+    s = s & (
             make_slab(space, fixed_iname, 0, fixed_length)
             # name = fixed_iname + fixed_length*var_length_iname
             .add_constraint(isl.Constraint.eq_from_names(
                 space, {
-                    iname_to_split: 1,
+                    dup_iname_to_split: 1,
                     fixed_iname: -1,
                     var_length_iname: -fixed_length})))
 
-    name_dim_type, name_idx = space.get_var_dict()[iname_to_split]
-    s = s.intersect(fixed_constraint_set)
+    _, dup_name_idx = space.get_var_dict()[dup_iname_to_split]
+    s = s.project_out(dim_type.set, dup_name_idx, 1)
 
     if split_iname_should_remain:
         return s
     else:
+        name_dim_type, name_idx = space.get_var_dict()[iname_to_split]
         return s.project_out(name_dim_type, name_idx, 1)
 
 
-- 
GitLab


From 355a0c37913ff92c11bec8f6723c59fff7989b18 Mon Sep 17 00:00:00 2001
From: Andreas Kloeckner <inform@tiker.net>
Date: Wed, 4 Nov 2020 14:02:16 -0600
Subject: [PATCH 057/140] Add test_split_iname_within (gh-163)

---
 test/test_loopy.py | 27 +++++++++++++++++++++++++++
 1 file changed, 27 insertions(+)

diff --git a/test/test_loopy.py b/test/test_loopy.py
index 41b5315e8..753a8df2e 100644
--- a/test/test_loopy.py
+++ b/test/test_loopy.py
@@ -2920,6 +2920,33 @@ def test_access_check_with_conditionals():
         lp.generate_code_v2(legal_but_nonaffine_condition_knl)
 
 
+def test_split_iname_within(ctx_factory):
+    # https://github.com/inducer/loopy/issues/163
+    ctx = ctx_factory()
+
+    # Two bugs:
+    # - simplify_using_aff introduces variables that have no business being there
+    # - independent copies of i/j should remain
+    knl = lp.make_kernel(
+        "{ [i, j]: 0<=i<n and 0<=j<n }",
+        """
+        x[i, j] = 3 {id=a}
+        y[i, j] = 2 * y[i, j] {id=b}
+        """,
+        options=dict(write_code=True))
+
+    ref_knl = knl
+
+    knl = lp.split_iname(knl, "j", 4,
+                         outer_tag="g.0", inner_tag="l.0",
+                         within="id:a")
+    knl = lp.split_iname(knl, "i", 4,
+                         outer_tag="g.0", inner_tag="l.0",
+                         within="id:b")
+
+    lp.auto_test_vs_ref(ref_knl, ctx, knl, parameters=dict(n=5))
+
+
 if __name__ == "__main__":
     if len(sys.argv) > 1:
         exec(sys.argv[1])
-- 
GitLab


From 6a93094d278224dd7ec086eb0c0702c46bc907a7 Mon Sep 17 00:00:00 2001
From: Andreas Kloeckner <inform@tiker.net>
Date: Wed, 4 Nov 2020 14:04:07 -0600
Subject: [PATCH 058/140] Remove stray temp comment from
 test_split_iname_within

---
 test/test_loopy.py | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/test/test_loopy.py b/test/test_loopy.py
index 753a8df2e..2ac08f8c4 100644
--- a/test/test_loopy.py
+++ b/test/test_loopy.py
@@ -2924,9 +2924,6 @@ def test_split_iname_within(ctx_factory):
     # https://github.com/inducer/loopy/issues/163
     ctx = ctx_factory()
 
-    # Two bugs:
-    # - simplify_using_aff introduces variables that have no business being there
-    # - independent copies of i/j should remain
     knl = lp.make_kernel(
         "{ [i, j]: 0<=i<n and 0<=j<n }",
         """
-- 
GitLab


From fabd9c9f5ded8ea84a0aba265b8efec6ad1da0ca Mon Sep 17 00:00:00 2001
From: Andreas Kloeckner <inform@tiker.net>
Date: Mon, 9 Nov 2020 15:07:36 -0600
Subject: [PATCH 059/140] _split_iname_in_set: Remove assertion that
 orig_dim_type is set

---
 loopy/transform/iname.py | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/loopy/transform/iname.py b/loopy/transform/iname.py
index f52969669..05d6562f5 100644
--- a/loopy/transform/iname.py
+++ b/loopy/transform/iname.py
@@ -165,7 +165,8 @@ def _split_iname_in_set(s, iname_to_split, inner_iname, outer_iname, fixed_lengt
         return s
 
     orig_dim_type, _ = var_dict[iname_to_split]
-    assert orig_dim_type == dim_type.set
+    # orig_dim_type may be set or param (the latter if the iname is
+    # used as a parameter in a subdomain).
     del orig_dim_type
 
     # NB: dup_iname_to_split is not a globally valid identifier: only uniqure
@@ -178,12 +179,12 @@ def _split_iname_in_set(s, iname_to_split, inner_iname, outer_iname, fixed_lengt
     from loopy.isl_helpers import duplicate_axes
     s = duplicate_axes(s, (iname_to_split,), (dup_iname_to_split,))
 
-    outer_var_nr = s.dim(dim_type.set)
-    inner_var_nr = s.dim(dim_type.set)+1
+    outer_var_nr = s.dim(orig_dim_type)
+    inner_var_nr = s.dim(orig_dim_type)+1
 
-    s = s.add_dims(dim_type.set, 2)
-    s = s.set_dim_name(dim_type.set, outer_var_nr, outer_iname)
-    s = s.set_dim_name(dim_type.set, inner_var_nr, inner_iname)
+    s = s.add_dims(orig_dim_type, 2)
+    s = s.set_dim_name(orig_dim_type, outer_var_nr, outer_iname)
+    s = s.set_dim_name(orig_dim_type, inner_var_nr, inner_iname)
 
     from loopy.isl_helpers import make_slab
 
@@ -203,7 +204,7 @@ def _split_iname_in_set(s, iname_to_split, inner_iname, outer_iname, fixed_lengt
                     var_length_iname: -fixed_length})))
 
     _, dup_name_idx = space.get_var_dict()[dup_iname_to_split]
-    s = s.project_out(dim_type.set, dup_name_idx, 1)
+    s = s.project_out(orig_dim_type, dup_name_idx, 1)
 
     if split_iname_should_remain:
         return s
-- 
GitLab


From d3cd7487d2cb92f1246f1032af08934e1d9dd32a Mon Sep 17 00:00:00 2001
From: Andreas Kloeckner <inform@tiker.net>
Date: Mon, 9 Nov 2020 15:09:27 -0600
Subject: [PATCH 060/140] Fix some comment typos relating to gh-167

---
 loopy/symbolic.py        | 2 +-
 loopy/transform/iname.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/loopy/symbolic.py b/loopy/symbolic.py
index cda89aa70..3cdc0708d 100644
--- a/loopy/symbolic.py
+++ b/loopy/symbolic.py
@@ -1527,7 +1527,7 @@ def simplify_using_aff(kernel, expr):
 
     # FIXME: Ideally, we should find out what inames are usable and allow
     # the simplification to use all of those. For now, fall back to making
-    # sure that the sipmlification only uses inames that were already there.
+    # sure that the simplification only uses inames that were already there.
     domain = (
             kernel
             .get_inames_domain(inames)
diff --git a/loopy/transform/iname.py b/loopy/transform/iname.py
index 05d6562f5..cb52b48bb 100644
--- a/loopy/transform/iname.py
+++ b/loopy/transform/iname.py
@@ -169,7 +169,7 @@ def _split_iname_in_set(s, iname_to_split, inner_iname, outer_iname, fixed_lengt
     # used as a parameter in a subdomain).
     del orig_dim_type
 
-    # NB: dup_iname_to_split is not a globally valid identifier: only uniqure
+    # NB: dup_iname_to_split is not a globally valid identifier: only unique
     # wrt the set s.
     from pytools import generate_unique_names
     for dup_iname_to_split in generate_unique_names(f"dup_{iname_to_split}"):
-- 
GitLab


From 3974763c8c1b004adf654cc9ef83aa45d1e4d60d Mon Sep 17 00:00:00 2001
From: Andreas Kloeckner <inform@tiker.net>
Date: Mon, 9 Nov 2020 15:13:03 -0600
Subject: [PATCH 061/140] Delete stray del orig_dim_type in_split_iname_in_set

---
 loopy/transform/iname.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/loopy/transform/iname.py b/loopy/transform/iname.py
index cb52b48bb..372f972e5 100644
--- a/loopy/transform/iname.py
+++ b/loopy/transform/iname.py
@@ -167,7 +167,6 @@ def _split_iname_in_set(s, iname_to_split, inner_iname, outer_iname, fixed_lengt
     orig_dim_type, _ = var_dict[iname_to_split]
     # orig_dim_type may be set or param (the latter if the iname is
     # used as a parameter in a subdomain).
-    del orig_dim_type
 
     # NB: dup_iname_to_split is not a globally valid identifier: only unique
     # wrt the set s.
-- 
GitLab


From 750687c66640c7eb7f020d61e2c205c3a7ea782e Mon Sep 17 00:00:00 2001
From: Nicholas Christensen <njchris2@illinois.edu>
Date: Tue, 10 Nov 2020 05:15:45 -0600
Subject: [PATCH 062/140] add tags to loopy

---
 loopy/kernel/array.py |  3 ++-
 loopy/kernel/data.py  | 11 ++++-------
 2 files changed, 6 insertions(+), 8 deletions(-)

diff --git a/loopy/kernel/array.py b/loopy/kernel/array.py
index 6b0248f4f..9033ebb14 100644
--- a/loopy/kernel/array.py
+++ b/loopy/kernel/array.py
@@ -652,7 +652,7 @@ class ArrayBase(ImmutableRecord):
 
     def __init__(self, name, dtype=None, shape=None, dim_tags=None, offset=0,
             dim_names=None, strides=None, order=None, for_atomic=False,
-            target=None, alignment=None,
+            target=None, alignment=None, tags=None,
             **kwargs):
         """
         All of the following (except *name*) are optional.
@@ -848,6 +848,7 @@ class ArrayBase(ImmutableRecord):
                 order=order,
                 alignment=alignment,
                 for_atomic=for_atomic,
+                tags=tags,
                 **kwargs)
 
     def __eq__(self, other):
diff --git a/loopy/kernel/data.py b/loopy/kernel/data.py
index ce401d647..e7f7cd731 100644
--- a/loopy/kernel/data.py
+++ b/loopy/kernel/data.py
@@ -339,8 +339,6 @@ class KernelArgument(ImmutableRecord):
 
         dtype = kwargs.pop("dtype", None)
 
-        tags = kwargs.pop("tags", None)  # noqa: F841
-
         if "for_atomic" in kwargs:
             for_atomic = kwargs["for_atomic"]
         else:
@@ -359,7 +357,6 @@ class KernelArgument(ImmutableRecord):
                     DeprecationWarning, stacklevel=2)
 
             dtype = None
-
         kwargs["dtype"] = dtype
 
         ImmutableRecord.__init__(self, **kwargs)
@@ -381,13 +378,13 @@ class ArrayArg(ArrayBase, KernelArgument):
 
     allowed_extra_kwargs = [
             "address_space",
-            "is_output_only"]
+            "is_output_only",
+            "tags"]
 
     def __init__(self, *args, **kwargs):
         if "address_space" not in kwargs:
             raise TypeError("'address_space' must be specified")
         kwargs["is_output_only"] = kwargs.pop("is_output_only", False)
-
         super().__init__(*args, **kwargs)
 
     min_target_axes = 0
@@ -455,13 +452,13 @@ class ImageArg(ArrayBase, KernelArgument):
 
 class ValueArg(KernelArgument):
     def __init__(self, name, dtype=None, approximately=1000, target=None,
-            is_output_only=False):
+            is_output_only=False,tags=None):
 
         KernelArgument.__init__(self, name=name,
                 dtype=dtype,
                 approximately=approximately,
                 target=target,
-                is_output_only=is_output_only)
+                is_output_only=is_output_only,tags=tags)
 
     def __str__(self):
         import loopy as lp
-- 
GitLab


From 70231657027019bebd18440099d07038163de5fb Mon Sep 17 00:00:00 2001
From: Nicholas Christensen <njchris2@illinois.edu>
Date: Tue, 10 Nov 2020 05:21:10 -0600
Subject: [PATCH 063/140] placate flake8

---
 loopy/kernel/data.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/loopy/kernel/data.py b/loopy/kernel/data.py
index e7f7cd731..82cf2f4c7 100644
--- a/loopy/kernel/data.py
+++ b/loopy/kernel/data.py
@@ -452,13 +452,13 @@ class ImageArg(ArrayBase, KernelArgument):
 
 class ValueArg(KernelArgument):
     def __init__(self, name, dtype=None, approximately=1000, target=None,
-            is_output_only=False,tags=None):
+            is_output_only=False, tags=None):
 
         KernelArgument.__init__(self, name=name,
                 dtype=dtype,
                 approximately=approximately,
                 target=target,
-                is_output_only=is_output_only,tags=tags)
+                is_output_only=is_output_only, tags=tags)
 
     def __str__(self):
         import loopy as lp
-- 
GitLab


From c36ab97fe07d43f344410454955cae8edfca275d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Andreas=20Kl=C3=B6ckner?= <inform@tiker.net>
Date: Wed, 11 Nov 2020 18:56:57 +0100
Subject: [PATCH 064/140] Require avx2 node tag for Gitlab examples CI

---
 .gitlab-ci.yml | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index d69f0b8c4..f0e9aa0e5 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -89,6 +89,8 @@ Python 3 POCL Examples:
   - python3
   - pocl
   - large-node
+  # For examples/python/ispc-stream-harness.py
+  - avx2
   except:
   - tags
 
-- 
GitLab


From e6d7d6b12b48abe3d6ddf313f2018cdef7b18f71 Mon Sep 17 00:00:00 2001
From: Kaushik Kulkarni <kaushikcfd@gmail.com>
Date: Wed, 11 Nov 2020 20:18:14 -0600
Subject: [PATCH 065/140] define the scope of RuleAwareSubstitutionMapper

---
 loopy/symbolic.py | 32 +++++++++++++++++++++++++++++++-
 1 file changed, 31 insertions(+), 1 deletion(-)

diff --git a/loopy/symbolic.py b/loopy/symbolic.py
index dfacb4438..e170a7854 100644
--- a/loopy/symbolic.py
+++ b/loopy/symbolic.py
@@ -1006,11 +1006,41 @@ class RuleAwareIdentityMapper(IdentityMapper):
 
 
 class RuleAwareSubstitutionMapper(RuleAwareIdentityMapper):
+    """
+    Mapper to substitute expressions and record any divergence of substitution
+    rule expressions of :class:`loopy.LoopKernel`.
+
+    .. attribute:: rule_mapping_context
+
+        An instance of :class:`SubstitutionRuleMappingContext` to record
+        divergence of substitution rules.
+
+    .. attribute:: within
+
+        An instance of :class:`loopy.match.StackMatchComponent`.
+        :class:`RuleAwareSubstitutionMapper` would perform
+        substitutions in the expression if the stack match is ``True`` or
+        if the expression does not arise from an :class:`~loopy.InstructionBase`.
+
+    .. note::
+
+        The mapped kernel should be passed through
+        :meth:`SubstitutionRuleMappingContext.finish_kernel` to perform any
+        renaming mandated by the rule expression divergences.
+    """
     def __init__(self, rule_mapping_context, subst_func, within):
         super().__init__(rule_mapping_context)
 
         self.subst_func = subst_func
-        self.within = within
+        self._within = within
+
+    def within(self, kernel, instruction, stack):
+        if instruction is None:
+            # always perform substitutions on expressions not coming from
+            # instructions.
+            return True
+        else:
+            return self._within(kernel, instruction, stack)
 
     def map_variable(self, expr, expn_state):
         if (expr.name in expn_state.arg_context
-- 
GitLab


From 6e34b689c06f352b39ebacdd08b2829f436cdf0e Mon Sep 17 00:00:00 2001
From: Kaushik Kulkarni <kaushikcfd@gmail.com>
Date: Wed, 11 Nov 2020 20:27:13 -0600
Subject: [PATCH 066/140] argument shape expresssions should be handled in
 RuleAwareSubstitutionMapper.map_kernel

---
 loopy/symbolic.py       | 28 +++++++++++++++++++++++++++-
 loopy/transform/data.py | 19 +------------------
 2 files changed, 28 insertions(+), 19 deletions(-)

diff --git a/loopy/symbolic.py b/loopy/symbolic.py
index e170a7854..ccfc1723a 100644
--- a/loopy/symbolic.py
+++ b/loopy/symbolic.py
@@ -1002,7 +1002,33 @@ class RuleAwareIdentityMapper(IdentityMapper):
                         lambda expr: self(expr, kernel, insn)))
                 for insn in kernel.instructions]
 
-        return kernel.copy(instructions=new_insns)
+        from functools import partial
+
+        non_insn_self = partial(self, kernel=kernel, insn=None)
+
+        from loopy.kernel.array import ArrayBase
+
+        # {{{ args
+
+        new_args = [
+                arg.map_exprs(non_insn_self) if isinstance(arg, ArrayBase) else arg
+                for arg in kernel.args]
+
+        # }}}
+
+        # {{{ tvs
+
+        new_tvs = {
+                tv_name: tv.map_exprs(non_insn_self)
+                for tv_name, tv in kernel.temporary_variables.items()}
+
+        # }}}
+
+        # domains, var names: not exprs => do not map
+
+        return kernel.copy(instructions=new_insns,
+                           args=new_args,
+                           temporary_variables=new_tvs)
 
 
 class RuleAwareSubstitutionMapper(RuleAwareIdentityMapper):
diff --git a/loopy/transform/data.py b/loopy/transform/data.py
index 9c4725c0d..e946a67c0 100644
--- a/loopy/transform/data.py
+++ b/loopy/transform/data.py
@@ -637,7 +637,6 @@ def rename_argument(kernel, old_name, new_name, existing_ok=False):
     subst_dict = {old_name: var(new_name)}
 
     from loopy.symbolic import (
-            SubstitutionMapper,
             RuleAwareSubstitutionMapper,
             SubstitutionRuleMappingContext)
     from pymbolic.mapper.substitutor import make_subst_func
@@ -651,32 +650,17 @@ def rename_argument(kernel, old_name, new_name, existing_ok=False):
 
     # }}}
 
-    subst_mapper = SubstitutionMapper(make_subst_func(subst_dict))
-
     # {{{ args
 
-    from loopy.kernel.array import ArrayBase
-
     new_args = []
     for arg in kernel.args:
         if arg.name == old_name:
             arg = arg.copy(name=new_name)
 
-        if isinstance(arg, ArrayBase):
-            arg = arg.map_exprs(subst_mapper)
-
         new_args.append(arg)
 
     # }}}
 
-    # {{{ tvs
-
-    new_tvs = {
-            tv_name: tv.map_exprs(subst_mapper)
-            for tv_name, tv in kernel.temporary_variables.items()}
-
-    # }}}
-
     # {{{ domain
 
     new_domains = []
@@ -690,8 +674,7 @@ def rename_argument(kernel, old_name, new_name, existing_ok=False):
 
     # }}}
 
-    return kernel.copy(domains=new_domains, args=new_args,
-            temporary_variables=new_tvs)
+    return kernel.copy(domains=new_domains, args=new_args)
 
 # }}}
 
-- 
GitLab


From 039a687459745eea22e00182cd2252a8a590084e Mon Sep 17 00:00:00 2001
From: Andreas Kloeckner <inform@tiker.net>
Date: Wed, 11 Nov 2020 21:37:41 -0600
Subject: [PATCH 067/140] Track iname dim_types in rewritten
 _split_iname_in_set

---
 loopy/transform/iname.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/loopy/transform/iname.py b/loopy/transform/iname.py
index 372f972e5..d2704a024 100644
--- a/loopy/transform/iname.py
+++ b/loopy/transform/iname.py
@@ -202,13 +202,13 @@ def _split_iname_in_set(s, iname_to_split, inner_iname, outer_iname, fixed_lengt
                     fixed_iname: -1,
                     var_length_iname: -fixed_length})))
 
-    _, dup_name_idx = space.get_var_dict()[dup_iname_to_split]
-    s = s.project_out(orig_dim_type, dup_name_idx, 1)
+    dup_iname_dim_type, dup_name_idx = space.get_var_dict()[dup_iname_to_split]
+    s = s.project_out(dup_iname_dim_type, dup_name_idx, 1)
 
     if split_iname_should_remain:
         return s
     else:
-        name_dim_type, name_idx = space.get_var_dict()[iname_to_split]
+        name_dim_type, name_idx = s.space.get_var_dict()[iname_to_split]
         return s.project_out(name_dim_type, name_idx, 1)
 
 
-- 
GitLab


From 2376269efb84f0d38d7a389176c864c23d3c9f96 Mon Sep 17 00:00:00 2001
From: Andreas Kloeckner <inform@tiker.net>
Date: Wed, 11 Nov 2020 22:37:50 -0600
Subject: [PATCH 068/140] remove_unused_inames: Project inames out of all
 domains instead of using DomainChanger

---
 loopy/transform/iname.py | 20 +++++++++++++-------
 1 file changed, 13 insertions(+), 7 deletions(-)

diff --git a/loopy/transform/iname.py b/loopy/transform/iname.py
index d2704a024..eba1d5612 100644
--- a/loopy/transform/iname.py
+++ b/loopy/transform/iname.py
@@ -1220,16 +1220,22 @@ def remove_unused_inames(kernel, inames=None):
 
     # {{{ remove them
 
-    from loopy.kernel.tools import DomainChanger
-
+    domains = kernel.domains
     for iname in unused_inames:
-        domch = DomainChanger(kernel, (iname,))
+        new_domains = []
+
+        for dom in domains:
+            try:
+                dt, idx = dom.get_var_dict()[iname]
+            except KeyError:
+                pass
+            else:
+                dom = dom.project_out(dt, idx, 1)
+            new_domains.append(dom)
 
-        dom = domch.domain
-        dt, idx = dom.get_var_dict()[iname]
-        dom = dom.project_out(dt, idx, 1)
+        domains = new_domains
 
-        kernel = kernel.copy(domains=domch.get_domains_with(dom))
+    kernel = kernel.copy(domains=domains)
 
     # }}}
 
-- 
GitLab


From 8e4d3ebe50cfa91484cfcfd0cae8a8701c488811 Mon Sep 17 00:00:00 2001
From: Andreas Kloeckner <inform@tiker.net>
Date: Wed, 11 Nov 2020 22:38:38 -0600
Subject: [PATCH 069/140] split_iname: use remove_unused_inames

---
 loopy/transform/iname.py | 23 +++++++----------------
 1 file changed, 7 insertions(+), 16 deletions(-)

diff --git a/loopy/transform/iname.py b/loopy/transform/iname.py
index eba1d5612..fefa7ed5f 100644
--- a/loopy/transform/iname.py
+++ b/loopy/transform/iname.py
@@ -158,7 +158,7 @@ class _InameSplitter(RuleAwareIdentityMapper):
 
 
 def _split_iname_in_set(s, iname_to_split, inner_iname, outer_iname, fixed_length,
-        fixed_length_is_inner, split_iname_should_remain):
+        fixed_length_is_inner):
     var_dict = s.get_var_dict()
 
     if iname_to_split not in var_dict:
@@ -205,11 +205,7 @@ def _split_iname_in_set(s, iname_to_split, inner_iname, outer_iname, fixed_lengt
     dup_iname_dim_type, dup_name_idx = space.get_var_dict()[dup_iname_to_split]
     s = s.project_out(dup_iname_dim_type, dup_name_idx, 1)
 
-    if split_iname_should_remain:
-        return s
-    else:
-        name_dim_type, name_idx = s.space.get_var_dict()[iname_to_split]
-        return s.project_out(name_dim_type, name_idx, 1)
+    return s
 
 
 def _split_iname_backend(kernel, iname_to_split,
@@ -260,17 +256,9 @@ def _split_iname_backend(kernel, iname_to_split,
     if inner_iname is None:
         inner_iname = vng(iname_to_split+"_inner")
 
-    all_insns_using_iname_in_within = all(
-            # "does not use iname or is targeted by the within"
-            # <=>
-            # "'uses iname' implies within"
-            iname_to_split not in insn.within_inames or within(kernel, insn)
-            for insn in kernel.instructions)
-
     new_domains = [
             _split_iname_in_set(dom, iname_to_split, inner_iname, outer_iname,
-                fixed_length, fixed_length_is_inner,
-                split_iname_should_remain=not all_insns_using_iname_in_within)
+                fixed_length, fixed_length_is_inner)
             for dom in kernel.domains]
 
     from pymbolic import var
@@ -333,7 +321,10 @@ def _split_iname_backend(kernel, iname_to_split,
         kernel = tag_inames(kernel,
                 {outer_iname: existing_tag, inner_iname: existing_tag})
 
-    return tag_inames(kernel, {outer_iname: outer_tag, inner_iname: inner_tag})
+    kernel = tag_inames(kernel, {outer_iname: outer_tag, inner_iname: inner_tag})
+    kernel = remove_unused_inames(kernel, [iname_to_split])
+
+    return kernel
 
 # }}}
 
-- 
GitLab


From a445efd38e4679bcb9c4b66e3c3891bc0f7ca4fd Mon Sep 17 00:00:00 2001
From: Nicholas Christensen <njchris2@illinois.edu>
Date: Fri, 13 Nov 2020 01:32:10 -0600
Subject: [PATCH 070/140] document tags attribute

---
 loopy/kernel/array.py | 4 +++-
 loopy/kernel/data.py  | 5 +++++
 2 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/loopy/kernel/array.py b/loopy/kernel/array.py
index 9033ebb14..b4468fa15 100644
--- a/loopy/kernel/array.py
+++ b/loopy/kernel/array.py
@@ -691,7 +691,9 @@ class ArrayBase(ImmutableRecord):
             using atomic-capable data types.
         :arg offset: (See :attr:`offset`)
         :arg alignment: memory alignment in bytes
-
+        :arg tags: A metadata tag or list of metadata tags intended for
+            consumption by an application. These could be strings or instances
+            of :class:`pytools.tag` for example.
         """
 
         for kwarg_name in kwargs:
diff --git a/loopy/kernel/data.py b/loopy/kernel/data.py
index 82cf2f4c7..ff4e8f218 100644
--- a/loopy/kernel/data.py
+++ b/loopy/kernel/data.py
@@ -453,6 +453,11 @@ class ImageArg(ArrayBase, KernelArgument):
 class ValueArg(KernelArgument):
     def __init__(self, name, dtype=None, approximately=1000, target=None,
             is_output_only=False, tags=None):
+        """
+        :arg tags: A metadata tag or list of metadata tags intended for
+            consumption by an application. These could be strings or instances
+            of :class:`pytools.tag` for example.
+        """
 
         KernelArgument.__init__(self, name=name,
                 dtype=dtype,
-- 
GitLab


From af807eaab1189b61e990ebe7d8222920b3de0b6e Mon Sep 17 00:00:00 2001
From: Nicholas Christensen <njchris2@illinois.edu>
Date: Fri, 13 Nov 2020 01:51:16 -0600
Subject: [PATCH 071/140] fully specify class name

---
 loopy/kernel/array.py | 2 +-
 loopy/kernel/data.py  | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/loopy/kernel/array.py b/loopy/kernel/array.py
index b4468fa15..3bd8d227a 100644
--- a/loopy/kernel/array.py
+++ b/loopy/kernel/array.py
@@ -693,7 +693,7 @@ class ArrayBase(ImmutableRecord):
         :arg alignment: memory alignment in bytes
         :arg tags: A metadata tag or list of metadata tags intended for
             consumption by an application. These could be strings or instances
-            of :class:`pytools.tag` for example.
+            of :class:`pytools.tag.Tag` for example.
         """
 
         for kwarg_name in kwargs:
diff --git a/loopy/kernel/data.py b/loopy/kernel/data.py
index ff4e8f218..504109e23 100644
--- a/loopy/kernel/data.py
+++ b/loopy/kernel/data.py
@@ -456,7 +456,7 @@ class ValueArg(KernelArgument):
         """
         :arg tags: A metadata tag or list of metadata tags intended for
             consumption by an application. These could be strings or instances
-            of :class:`pytools.tag` for example.
+            of :class:`pytools.tag.Tag` for example.
         """
 
         KernelArgument.__init__(self, name=name,
-- 
GitLab


From 171a47416edf3e60bd0e98853a7d7f1567c5a091 Mon Sep 17 00:00:00 2001
From: Andreas Kloeckner <inform@tiker.net>
Date: Fri, 13 Nov 2020 14:31:29 -0600
Subject: [PATCH 072/140] Switch install docs to miniforge

---
 doc/misc.rst | 11 ++++-------
 1 file changed, 4 insertions(+), 7 deletions(-)

diff --git a/doc/misc.rst b/doc/misc.rst
index 4c8c9867f..e8bcefc65 100644
--- a/doc/misc.rst
+++ b/doc/misc.rst
@@ -49,21 +49,18 @@ MacOS support computers:
     Everywhere else, just making sure you have the ``g++`` package should be
     enough.
 
-#.  Install `miniconda <https://conda.io/miniconda.html>`_.
-    (Both Python 2 and 3 should work. In the absence of other constraints, prefer Python 3.)
+#.  Install `miniforge <https://github.com/conda-forge/miniforge>`_.
 
-#.  ``export CONDA=/WHERE/YOU/INSTALLED/miniconda3``
+#.  ``export CONDA=/WHERE/YOU/INSTALLED/miniforge3``
 
     If you accepted the default location, this should work:
 
-    ``export CONDA=$HOME/miniconda3``
+    ``export CONDA=$HOME/miniforge3``
 
 #.  ``$CONDA/bin/conda create -n dev``
 
 #.  ``source $CONDA/bin/activate dev``
 
-#.  ``conda config --add channels conda-forge``
-
 #.  ``conda install git pip pocl islpy pyopencl`` (Linux)
 
     or
@@ -76,7 +73,7 @@ MacOS support computers:
 
 Next time you want to use :mod:`loopy`, just run the following command::
 
-    source /WHERE/YOU/INSTALLED/miniconda3/bin/activate dev
+    source /WHERE/YOU/INSTALLED/miniforge3/bin/activate dev
 
 You may also like to add this to a startup file (like :file:`$HOME/.bashrc`) or create an alias for it.
 
-- 
GitLab


From d4428fa90f216df6df5410cdbcf3dae87a5f9dd6 Mon Sep 17 00:00:00 2001
From: Kaushik Kulkarni <kaushikcfd@gmail.com>
Date: Sun, 15 Nov 2020 13:23:51 -0600
Subject: [PATCH 073/140] FixedStrideArrayDimTag.map_expr: handle
 stride==lp.auto

---
 loopy/kernel/array.py | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/loopy/kernel/array.py b/loopy/kernel/array.py
index 6b0248f4f..e1b12eeae 100644
--- a/loopy/kernel/array.py
+++ b/loopy/kernel/array.py
@@ -136,6 +136,12 @@ class FixedStrideArrayDimTag(_StrideArrayDimTagBase):
         return self.stringify(True)
 
     def map_expr(self, mapper):
+        from loopy.kernel.data import auto
+
+        if self.stride is auto:
+            # lp.auto not an expr => do not map
+            return self
+
         return self.copy(stride=mapper(self.stride))
 
 
-- 
GitLab


From c94b2c3bdf25c0328777e4c2fb956414bd900ed6 Mon Sep 17 00:00:00 2001
From: Kaushik Kulkarni <kaushikcfd@gmail.com>
Date: Sun, 15 Nov 2020 13:24:46 -0600
Subject: [PATCH 074/140] add test_rename_argument_with_auto_stride

---
 test/test_transform.py | 23 +++++++++++++++++++++++
 1 file changed, 23 insertions(+)

diff --git a/test/test_transform.py b/test/test_transform.py
index ccaaebc19..daa659808 100644
--- a/test/test_transform.py
+++ b/test/test_transform.py
@@ -690,6 +690,29 @@ def test_rename_argument_of_domain_params(ctx_factory):
     lp.auto_test_vs_ref(knl, ctx_factory(), knl, parameters={"M": 10, "N": 4})
 
 
+def test_rename_argument_with_auto_stride(ctx_factory):
+    from loopy.kernel.array import FixedStrideArrayDimTag
+
+    ctx = ctx_factory()
+    queue = cl.CommandQueue(ctx)
+
+    knl = lp.make_kernel(
+            "{[i]: 0<=i<10}",
+            """
+            y[i] = x[i]
+            """, [lp.GlobalArg("x", dtype=float,
+                               shape=lp.auto,
+                               dim_tags=[FixedStrideArrayDimTag(lp.auto)]), ...])
+
+    knl = lp.rename_argument(knl, "x", "x_new")
+
+    code_str = lp.generate_code_v2(knl).device_code()
+    assert code_str.find("double const *__restrict__ x_new,") != -1
+    assert code_str.find("double const *__restrict__ x,") == -1
+
+    evt, (out, ) = knl(queue, x_new=np.random.rand(10))
+
+
 if __name__ == "__main__":
     if len(sys.argv) > 1:
         exec(sys.argv[1])
-- 
GitLab


From dc57c07d53452fa8e6747d3d3814d9cf521db3c9 Mon Sep 17 00:00:00 2001
From: Kaushik Kulkarni <kaushikcfd@gmail.com>
Date: Mon, 16 Nov 2020 00:26:11 -0600
Subject: [PATCH 075/140] InstructionBase.get_domain -> get_insn_domain

---
 loopy/check.py              |  3 +-
 loopy/kernel/instruction.py | 90 ++++++++++++++++++-------------------
 2 files changed, 47 insertions(+), 46 deletions(-)

diff --git a/loopy/check.py b/loopy/check.py
index e8a5f9dca..910327850 100644
--- a/loopy/check.py
+++ b/loopy/check.py
@@ -461,9 +461,10 @@ def check_bounds(kernel):
     """
     Performs out-of-bound check for every array access.
     """
+    from loopy.kernel.instruction import get_insn_domain
     temp_var_names = set(kernel.temporary_variables)
     for insn in kernel.instructions:
-        domain = insn.get_domain(kernel)
+        domain = get_insn_domain(insn, kernel)
 
         # data-dependent bounds? can't do much
         if set(domain.get_var_names(dim_type.param)) & temp_var_names:
diff --git a/loopy/kernel/instruction.py b/loopy/kernel/instruction.py
index 8471d39f0..101d16624 100644
--- a/loopy/kernel/instruction.py
+++ b/loopy/kernel/instruction.py
@@ -147,7 +147,6 @@ class InstructionBase(ImmutableRecord):
     .. automethod:: with_transformed_expressions
     .. automethod:: write_dependency_names
     .. automethod:: dependency_names
-    .. automethod:: get_domain
     .. automethod:: copy
     """
 
@@ -411,50 +410,6 @@ class InstructionBase(ImmutableRecord):
         self.within_inames = (
                 intern_frozenset_of_ids(self.within_inames))
 
-    def get_domain(self, kernel):
-        """
-        Returns an instance of :class:`islpy.Set` for the instruction's domain.
-
-        .. note::
-
-            Does not take into account additional hints available through
-            :attr:`loopy.LoopKernel.assumptions`.
-        """
-        domain = kernel.get_inames_domain(self.within_inames)
-
-        # {{{ add read-only ValueArgs to domain
-
-        from loopy.kernel.data import ValueArg
-
-        valueargs_to_add = ({arg.name for arg in kernel.args
-                             if isinstance(arg, ValueArg)
-                             and arg.name not in kernel.get_written_variables()}
-                            - set(domain.get_var_names(isl.dim_type.param)))
-
-        # only consider valueargs relevant to *self*
-        valueargs_to_add = valueargs_to_add & self.read_dependency_names()
-
-        for arg_to_add in valueargs_to_add:
-            idim = domain.dim(isl.dim_type.param)
-            domain = domain.add_dims(isl.dim_type.param, 1)
-            domain = domain.set_dim_name(isl.dim_type.param, idim, arg_to_add)
-
-        # }}}
-
-        # {{{ enforce restriction from predicates
-
-        insn_preds_set = isl.BasicSet.universe(domain.space)
-
-        for predicate in self.predicates:
-            from loopy.symbolic import condition_to_set
-            predicate_as_isl_set = condition_to_set(domain.space, predicate)
-            if predicate_as_isl_set is not None:
-                insn_preds_set = insn_preds_set & predicate_as_isl_set
-
-        # }}}
-
-        return domain & insn_preds_set
-
 # }}}
 
 
@@ -1484,4 +1439,49 @@ def _check_and_fix_temp_var_type(temp_var_type, stacklevel=2):
 # }}}
 
 
+def get_insn_domain(insn, kernel):
+    """
+    Returns an instance of :class:`islpy.Set` for the *insn*'s domain.
+
+    .. note::
+
+        Does not take into account additional hints available through
+        :attr:`loopy.LoopKernel.assumptions`.
+    """
+    domain = kernel.get_inames_domain(insn.within_inames)
+
+    # {{{ add read-only ValueArgs to domain
+
+    from loopy.kernel.data import ValueArg
+
+    valueargs_to_add = ({arg.name for arg in kernel.args
+                         if isinstance(arg, ValueArg)
+                         and arg.name not in kernel.get_written_variables()}
+                        - set(domain.get_var_names(isl.dim_type.param)))
+
+    # only consider valueargs relevant to *insn*
+    valueargs_to_add = valueargs_to_add & insn.read_dependency_names()
+
+    for arg_to_add in valueargs_to_add:
+        idim = domain.dim(isl.dim_type.param)
+        domain = domain.add_dims(isl.dim_type.param, 1)
+        domain = domain.set_dim_name(isl.dim_type.param, idim, arg_to_add)
+
+    # }}}
+
+    # {{{ enforce restriction from predicates
+
+    insn_preds_set = isl.BasicSet.universe(domain.space)
+
+    for predicate in insn.predicates:
+        from loopy.symbolic import condition_to_set
+        predicate_as_isl_set = condition_to_set(domain.space, predicate)
+        if predicate_as_isl_set is not None:
+            insn_preds_set = insn_preds_set & predicate_as_isl_set
+
+    # }}}
+
+    return domain & insn_preds_set
+
+
 # vim: foldmethod=marker
-- 
GitLab


From 14afe584b7ff70d8b3c54eb6f05e1dda9908176f Mon Sep 17 00:00:00 2001
From: Nicholas Christensen <njchris2@illinois.edu>
Date: Mon, 16 Nov 2020 00:34:00 -0600
Subject: [PATCH 076/140] only mention pytools.tag.Tag

---
 loopy/kernel/array.py | 4 ++--
 loopy/kernel/data.py  | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/loopy/kernel/array.py b/loopy/kernel/array.py
index 3bd8d227a..2ae45a5e8 100644
--- a/loopy/kernel/array.py
+++ b/loopy/kernel/array.py
@@ -692,8 +692,8 @@ class ArrayBase(ImmutableRecord):
         :arg offset: (See :attr:`offset`)
         :arg alignment: memory alignment in bytes
         :arg tags: A metadata tag or list of metadata tags intended for
-            consumption by an application. These could be strings or instances
-            of :class:`pytools.tag.Tag` for example.
+            consumption by an application. It is intended these tags be
+            instances of :class:`pytools.tag.Tag.
         """
 
         for kwarg_name in kwargs:
diff --git a/loopy/kernel/data.py b/loopy/kernel/data.py
index 504109e23..45021c2d1 100644
--- a/loopy/kernel/data.py
+++ b/loopy/kernel/data.py
@@ -455,8 +455,8 @@ class ValueArg(KernelArgument):
             is_output_only=False, tags=None):
         """
         :arg tags: A metadata tag or list of metadata tags intended for
-            consumption by an application. These could be strings or instances
-            of :class:`pytools.tag.Tag` for example.
+            consumption by an application. It is intended these tags be
+            instances of :class:`pytools.tag.Tag`.
         """
 
         KernelArgument.__init__(self, name=name,
-- 
GitLab


From 61610e65af206da51eaac08fdabad0562dd312dc Mon Sep 17 00:00:00 2001
From: Nicholas Christensen <njchris2@illinois.edu>
Date: Mon, 16 Nov 2020 01:07:11 -0600
Subject: [PATCH 077/140] fix doc generation

---
 loopy/kernel/array.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/loopy/kernel/array.py b/loopy/kernel/array.py
index 2ae45a5e8..c1687bb03 100644
--- a/loopy/kernel/array.py
+++ b/loopy/kernel/array.py
@@ -693,7 +693,7 @@ class ArrayBase(ImmutableRecord):
         :arg alignment: memory alignment in bytes
         :arg tags: A metadata tag or list of metadata tags intended for
             consumption by an application. It is intended these tags be
-            instances of :class:`pytools.tag.Tag.
+            instances of :class:`pytools.tag.Tag`.
         """
 
         for kwarg_name in kwargs:
-- 
GitLab


From d88204e1aa4fbf6c16d393cf061ae387e4eab7c5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Andreas=20Kl=C3=B6ckner?= <inform@tiker.net>
Date: Tue, 17 Nov 2020 05:45:29 +0100
Subject: [PATCH 078/140] Specify unit of ArrayBase.offset

---
 loopy/kernel/array.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/loopy/kernel/array.py b/loopy/kernel/array.py
index c004d69ec..4254171db 100644
--- a/loopy/kernel/array.py
+++ b/loopy/kernel/array.py
@@ -606,7 +606,8 @@ class ArrayBase(ImmutableRecord):
     .. attribute:: offset
 
         Offset from the beginning of the buffer to the point from
-            which the strides are counted. May be one of
+        which the strides are counted, in units of the :attr:`dtype`.
+        May be one of
 
             * 0 or None
             * a string (that is interpreted as an argument name).
-- 
GitLab


From ba9880a5c366a17966d6e6077e577b8251f31b44 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Andreas=20Kl=C3=B6ckner?= <inform@tiker.net>
Date: Tue, 17 Nov 2020 14:42:24 -0600
Subject: [PATCH 079/140] Revert "Add tags to Loopy"

---
 loopy/kernel/array.py |  7 ++-----
 loopy/kernel/data.py  | 14 +++++---------
 setup.py              |  2 +-
 3 files changed, 8 insertions(+), 15 deletions(-)

diff --git a/loopy/kernel/array.py b/loopy/kernel/array.py
index 4254171db..d5b4284b8 100644
--- a/loopy/kernel/array.py
+++ b/loopy/kernel/array.py
@@ -659,7 +659,7 @@ class ArrayBase(ImmutableRecord):
 
     def __init__(self, name, dtype=None, shape=None, dim_tags=None, offset=0,
             dim_names=None, strides=None, order=None, for_atomic=False,
-            target=None, alignment=None, tags=None,
+            target=None, alignment=None,
             **kwargs):
         """
         All of the following (except *name*) are optional.
@@ -698,9 +698,7 @@ class ArrayBase(ImmutableRecord):
             using atomic-capable data types.
         :arg offset: (See :attr:`offset`)
         :arg alignment: memory alignment in bytes
-        :arg tags: A metadata tag or list of metadata tags intended for
-            consumption by an application. It is intended these tags be
-            instances of :class:`pytools.tag.Tag`.
+
         """
 
         for kwarg_name in kwargs:
@@ -857,7 +855,6 @@ class ArrayBase(ImmutableRecord):
                 order=order,
                 alignment=alignment,
                 for_atomic=for_atomic,
-                tags=tags,
                 **kwargs)
 
     def __eq__(self, other):
diff --git a/loopy/kernel/data.py b/loopy/kernel/data.py
index 45021c2d1..43770ffb6 100644
--- a/loopy/kernel/data.py
+++ b/loopy/kernel/data.py
@@ -357,6 +357,7 @@ class KernelArgument(ImmutableRecord):
                     DeprecationWarning, stacklevel=2)
 
             dtype = None
+
         kwargs["dtype"] = dtype
 
         ImmutableRecord.__init__(self, **kwargs)
@@ -378,13 +379,13 @@ class ArrayArg(ArrayBase, KernelArgument):
 
     allowed_extra_kwargs = [
             "address_space",
-            "is_output_only",
-            "tags"]
+            "is_output_only"]
 
     def __init__(self, *args, **kwargs):
         if "address_space" not in kwargs:
             raise TypeError("'address_space' must be specified")
         kwargs["is_output_only"] = kwargs.pop("is_output_only", False)
+
         super().__init__(*args, **kwargs)
 
     min_target_axes = 0
@@ -452,18 +453,13 @@ class ImageArg(ArrayBase, KernelArgument):
 
 class ValueArg(KernelArgument):
     def __init__(self, name, dtype=None, approximately=1000, target=None,
-            is_output_only=False, tags=None):
-        """
-        :arg tags: A metadata tag or list of metadata tags intended for
-            consumption by an application. It is intended these tags be
-            instances of :class:`pytools.tag.Tag`.
-        """
+            is_output_only=False):
 
         KernelArgument.__init__(self, name=name,
                 dtype=dtype,
                 approximately=approximately,
                 target=target,
-                is_output_only=is_output_only, tags=tags)
+                is_output_only=is_output_only)
 
     def __str__(self):
         import loopy as lp
diff --git a/setup.py b/setup.py
index 084aaeab5..ddc47fefc 100644
--- a/setup.py
+++ b/setup.py
@@ -84,7 +84,7 @@ setup(name="loopy",
 
       python_requires="~=3.6",
       install_requires=[
-          "pytools>=2020.4.2",
+          "pytools>=2020.4",
           "pymbolic>=2019.2",
           "genpy>=2016.1.2",
           "cgen>=2016.1",
-- 
GitLab


From 32c262687d7951603e45a02b9b0887d9909b0f1c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Andreas=20Kl=C3=B6ckner?= <inform@tiker.net>
Date: Tue, 17 Nov 2020 16:13:16 -0600
Subject: [PATCH 080/140] Revert "Revert "Add tags to Loopy""

---
 loopy/kernel/array.py |  7 +++++--
 loopy/kernel/data.py  | 14 +++++++++-----
 setup.py              |  2 +-
 3 files changed, 15 insertions(+), 8 deletions(-)

diff --git a/loopy/kernel/array.py b/loopy/kernel/array.py
index d5b4284b8..4254171db 100644
--- a/loopy/kernel/array.py
+++ b/loopy/kernel/array.py
@@ -659,7 +659,7 @@ class ArrayBase(ImmutableRecord):
 
     def __init__(self, name, dtype=None, shape=None, dim_tags=None, offset=0,
             dim_names=None, strides=None, order=None, for_atomic=False,
-            target=None, alignment=None,
+            target=None, alignment=None, tags=None,
             **kwargs):
         """
         All of the following (except *name*) are optional.
@@ -698,7 +698,9 @@ class ArrayBase(ImmutableRecord):
             using atomic-capable data types.
         :arg offset: (See :attr:`offset`)
         :arg alignment: memory alignment in bytes
-
+        :arg tags: A metadata tag or list of metadata tags intended for
+            consumption by an application. It is intended these tags be
+            instances of :class:`pytools.tag.Tag`.
         """
 
         for kwarg_name in kwargs:
@@ -855,6 +857,7 @@ class ArrayBase(ImmutableRecord):
                 order=order,
                 alignment=alignment,
                 for_atomic=for_atomic,
+                tags=tags,
                 **kwargs)
 
     def __eq__(self, other):
diff --git a/loopy/kernel/data.py b/loopy/kernel/data.py
index 43770ffb6..45021c2d1 100644
--- a/loopy/kernel/data.py
+++ b/loopy/kernel/data.py
@@ -357,7 +357,6 @@ class KernelArgument(ImmutableRecord):
                     DeprecationWarning, stacklevel=2)
 
             dtype = None
-
         kwargs["dtype"] = dtype
 
         ImmutableRecord.__init__(self, **kwargs)
@@ -379,13 +378,13 @@ class ArrayArg(ArrayBase, KernelArgument):
 
     allowed_extra_kwargs = [
             "address_space",
-            "is_output_only"]
+            "is_output_only",
+            "tags"]
 
     def __init__(self, *args, **kwargs):
         if "address_space" not in kwargs:
             raise TypeError("'address_space' must be specified")
         kwargs["is_output_only"] = kwargs.pop("is_output_only", False)
-
         super().__init__(*args, **kwargs)
 
     min_target_axes = 0
@@ -453,13 +452,18 @@ class ImageArg(ArrayBase, KernelArgument):
 
 class ValueArg(KernelArgument):
     def __init__(self, name, dtype=None, approximately=1000, target=None,
-            is_output_only=False):
+            is_output_only=False, tags=None):
+        """
+        :arg tags: A metadata tag or list of metadata tags intended for
+            consumption by an application. It is intended these tags be
+            instances of :class:`pytools.tag.Tag`.
+        """
 
         KernelArgument.__init__(self, name=name,
                 dtype=dtype,
                 approximately=approximately,
                 target=target,
-                is_output_only=is_output_only)
+                is_output_only=is_output_only, tags=tags)
 
     def __str__(self):
         import loopy as lp
diff --git a/setup.py b/setup.py
index ddc47fefc..084aaeab5 100644
--- a/setup.py
+++ b/setup.py
@@ -84,7 +84,7 @@ setup(name="loopy",
 
       python_requires="~=3.6",
       install_requires=[
-          "pytools>=2020.4",
+          "pytools>=2020.4.2",
           "pymbolic>=2019.2",
           "genpy>=2016.1.2",
           "cgen>=2016.1",
-- 
GitLab


From c7f2c7f4808f7c1d13f0ed2bc0f280cda1bf590a Mon Sep 17 00:00:00 2001
From: Nicholas Christensen <njchris2@illinois.edu>
Date: Tue, 24 Nov 2020 13:58:46 -0600
Subject: [PATCH 081/140] Use Taggable class with ArrayBase and ValueArg

---
 loopy/kernel/array.py | 18 +++++++++++++-----
 loopy/kernel/data.py  | 19 +++++++++++++------
 loopy/version.py      |  2 +-
 setup.py              |  2 +-
 4 files changed, 28 insertions(+), 13 deletions(-)

diff --git a/loopy/kernel/array.py b/loopy/kernel/array.py
index 4254171db..ba97c9088 100644
--- a/loopy/kernel/array.py
+++ b/loopy/kernel/array.py
@@ -26,6 +26,7 @@ THE SOFTWARE.
 import re
 
 from pytools import ImmutableRecord, memoize_method
+from pytools.tag import Taggable
 
 import numpy as np  # noqa
 
@@ -563,7 +564,7 @@ def _parse_shape_or_strides(x):
     return tuple(_pymbolic_parse_if_necessary(xi) for xi in x)
 
 
-class ArrayBase(ImmutableRecord):
+class ArrayBase(ImmutableRecord, Taggable):
     """
     .. attribute :: name
 
@@ -643,6 +644,14 @@ class ArrayBase(ImmutableRecord):
 
         .. versionadded:: 2018.1
 
+    .. attribute:: tags
+
+        A (possibly empty) frozenset of instances of
+        :class:`pytools.tag.Tag` intended for
+        consumption by an application.
+
+        ..versionadded: 2020.2.2
+
     .. automethod:: __init__
     .. automethod:: __eq__
     .. automethod:: num_user_axes
@@ -659,7 +668,7 @@ class ArrayBase(ImmutableRecord):
 
     def __init__(self, name, dtype=None, shape=None, dim_tags=None, offset=0,
             dim_names=None, strides=None, order=None, for_atomic=False,
-            target=None, alignment=None, tags=None,
+            target=None, alignment=None, tags=frozenset(),
             **kwargs):
         """
         All of the following (except *name*) are optional.
@@ -698,9 +707,8 @@ class ArrayBase(ImmutableRecord):
             using atomic-capable data types.
         :arg offset: (See :attr:`offset`)
         :arg alignment: memory alignment in bytes
-        :arg tags: A metadata tag or list of metadata tags intended for
-            consumption by an application. It is intended these tags be
-            instances of :class:`pytools.tag.Tag`.
+        :arg tags: An instance of or an Iterable of instances of
+        :class:`pytools.tag.Tag`.
         """
 
         for kwarg_name in kwargs:
diff --git a/loopy/kernel/data.py b/loopy/kernel/data.py
index 45021c2d1..0702ea618 100644
--- a/loopy/kernel/data.py
+++ b/loopy/kernel/data.py
@@ -27,6 +27,7 @@ THE SOFTWARE.
 from sys import intern
 import numpy as np  # noqa
 from pytools import ImmutableRecord
+from pytools.tag import Taggable
 from loopy.kernel.array import ArrayBase
 from loopy.diagnostic import LoopyError
 from loopy.kernel.instruction import (  # noqa
@@ -449,14 +450,20 @@ class ImageArg(ArrayBase, KernelArgument):
         return ast_builder.get_image_arg_decl(self.name + name_suffix, shape,
                 self.num_target_axes(), dtype, is_written)
 
-
-class ValueArg(KernelArgument):
+"""
+    :attribute tags: A (possibly empty) frozenset of instances of
+        :class:`pytools.tag.Tag` intended for consumption by an
+        application.
+        
+        ..versionadded: 2020.2.2
+"""
+class ValueArg(KernelArgument, Taggable):
     def __init__(self, name, dtype=None, approximately=1000, target=None,
-            is_output_only=False, tags=None):
+            is_output_only=False, tags=frozenset()):
         """
-        :arg tags: A metadata tag or list of metadata tags intended for
-            consumption by an application. It is intended these tags be
-            instances of :class:`pytools.tag.Tag`.
+        :arg tags: A an instance of or Iterable of instances of 
+            :class:`pytools.tag.Tag` intended for consumption by an
+            application.
         """
 
         KernelArgument.__init__(self, name=name,
diff --git a/loopy/version.py b/loopy/version.py
index fddd44479..6f66c5347 100644
--- a/loopy/version.py
+++ b/loopy/version.py
@@ -42,7 +42,7 @@ else:
 # }}}
 
 
-VERSION = (2020, 2, 1)
+VERSION = (2020, 2, 2)
 VERSION_STATUS = ""
 VERSION_TEXT = ".".join(str(x) for x in VERSION) + VERSION_STATUS
 
diff --git a/setup.py b/setup.py
index 084aaeab5..bd76d293a 100644
--- a/setup.py
+++ b/setup.py
@@ -84,7 +84,7 @@ setup(name="loopy",
 
       python_requires="~=3.6",
       install_requires=[
-          "pytools>=2020.4.2",
+          "pytools>=2020.4.4",
           "pymbolic>=2019.2",
           "genpy>=2016.1.2",
           "cgen>=2016.1",
-- 
GitLab


From 32b0cd9393e8650ebf43022db9f6e1db4c3595cb Mon Sep 17 00:00:00 2001
From: Nicholas Christensen <njchris2@illinois.edu>
Date: Tue, 24 Nov 2020 14:02:27 -0600
Subject: [PATCH 082/140] placate flake8

---
 loopy/kernel/data.py | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/loopy/kernel/data.py b/loopy/kernel/data.py
index 0702ea618..be7ccc699 100644
--- a/loopy/kernel/data.py
+++ b/loopy/kernel/data.py
@@ -450,18 +450,21 @@ class ImageArg(ArrayBase, KernelArgument):
         return ast_builder.get_image_arg_decl(self.name + name_suffix, shape,
                 self.num_target_axes(), dtype, is_written)
 
+
 """
     :attribute tags: A (possibly empty) frozenset of instances of
         :class:`pytools.tag.Tag` intended for consumption by an
         application.
-        
+
         ..versionadded: 2020.2.2
 """
+
+
 class ValueArg(KernelArgument, Taggable):
     def __init__(self, name, dtype=None, approximately=1000, target=None,
             is_output_only=False, tags=frozenset()):
         """
-        :arg tags: A an instance of or Iterable of instances of 
+        :arg tags: A an instance of or Iterable of instances of
             :class:`pytools.tag.Tag` intended for consumption by an
             application.
         """
-- 
GitLab


From 87acfe3477d1abc21cf88474ab80a40ef59e3fd1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Andreas=20Kl=C3=B6ckner?= <inform@tiker.net>
Date: Mon, 30 Nov 2020 10:52:45 -0600
Subject: [PATCH 083/140] Add '#egg=' tag to f2py requirement

h/t @matthiasdiener https://github.com/illinois-ceesd/mirgecom/pull/162
---
 requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/requirements.txt b/requirements.txt
index 2105aede0..1072cdec0 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -6,7 +6,7 @@ git+https://github.com/inducer/pymbolic.git#egg=pymbolic
 git+https://github.com/inducer/genpy.git#egg=genpy
 git+https://github.com/inducer/codepy.git#egg=codepy
 
-git+https://github.com/inducer/f2py
+git+https://github.com/inducer/f2py#egg=f2py
 
 # Optional, needed for using the C preprocessor on Fortran
 ply>=3.6
-- 
GitLab


From 130c76658101795678196a55e2fc438e6613c511 Mon Sep 17 00:00:00 2001
From: Nicholas Christensen <njchris2@illinois.edu>
Date: Mon, 30 Nov 2020 17:35:37 -0600
Subject: [PATCH 084/140] change requirements.txt

---
 requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/requirements.txt b/requirements.txt
index 2105aede0..d64f33279 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,4 +1,4 @@
-git+https://github.com/inducer/pytools.git#egg=pytools
+git+https://github.com/nchristensen/pytools.git@master#egg=pytools == 2020.4.4
 git+https://github.com/inducer/islpy.git#egg=islpy
 git+https://github.com/inducer/cgen.git#egg=cgen
 git+https://github.com/inducer/pyopencl.git#egg=pyopencl
-- 
GitLab


From e9ebf7df739d5ee6a3b58da6a403c8a40334930f Mon Sep 17 00:00:00 2001
From: Nicholas Christensen <njchris2@illinois.edu>
Date: Mon, 30 Nov 2020 18:07:54 -0600
Subject: [PATCH 085/140] Trigger

-- 
GitLab


From 23a9b1a7c973026bf9bbf6d127163f159047790e Mon Sep 17 00:00:00 2001
From: Nicholas Christensen <njchris2@illinois.edu>
Date: Mon, 30 Nov 2020 19:55:27 -0600
Subject: [PATCH 086/140] Trigger CI

-- 
GitLab


From fa44cd08c6d1af3054e7534110603ce6ab4981e3 Mon Sep 17 00:00:00 2001
From: Nicholas Christensen <njchris2@illinois.edu>
Date: Mon, 30 Nov 2020 20:48:12 -0600
Subject: [PATCH 087/140] missing colon

---
 loopy/kernel/array.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/loopy/kernel/array.py b/loopy/kernel/array.py
index ba97c9088..fc0ac7e87 100644
--- a/loopy/kernel/array.py
+++ b/loopy/kernel/array.py
@@ -650,7 +650,7 @@ class ArrayBase(ImmutableRecord, Taggable):
         :class:`pytools.tag.Tag` intended for
         consumption by an application.
 
-        ..versionadded: 2020.2.2
+        ..versionadded:: 2020.2.2
 
     .. automethod:: __init__
     .. automethod:: __eq__
-- 
GitLab


From 6b7bc1e2827f498366d3d12317ec5b308435abaa Mon Sep 17 00:00:00 2001
From: Nicholas Christensen <njchris2@illinois.edu>
Date: Mon, 30 Nov 2020 21:18:43 -0600
Subject: [PATCH 088/140] Trigger CI

-- 
GitLab


From 8c15ab2812731dae7f76bf92c674ac16b65270b3 Mon Sep 17 00:00:00 2001
From: Nicholas Christensen <njchris2@illinois.edu>
Date: Mon, 30 Nov 2020 21:31:56 -0600
Subject: [PATCH 089/140] missing tab

---
 loopy/kernel/array.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/loopy/kernel/array.py b/loopy/kernel/array.py
index fc0ac7e87..b2982598a 100644
--- a/loopy/kernel/array.py
+++ b/loopy/kernel/array.py
@@ -650,7 +650,7 @@ class ArrayBase(ImmutableRecord, Taggable):
         :class:`pytools.tag.Tag` intended for
         consumption by an application.
 
-        ..versionadded:: 2020.2.2
+        .. versionadded:: 2020.2.2
 
     .. automethod:: __init__
     .. automethod:: __eq__
@@ -708,7 +708,7 @@ class ArrayBase(ImmutableRecord, Taggable):
         :arg offset: (See :attr:`offset`)
         :arg alignment: memory alignment in bytes
         :arg tags: An instance of or an Iterable of instances of
-        :class:`pytools.tag.Tag`.
+            :class:`pytools.tag.Tag`.
         """
 
         for kwarg_name in kwargs:
-- 
GitLab


From 4bee34179506fdb899cd7f4ffe077134ea62a10e Mon Sep 17 00:00:00 2001
From: Nicholas Christensen <njchris2@illinois.edu>
Date: Mon, 30 Nov 2020 22:04:26 -0600
Subject: [PATCH 090/140] default to None

---
 loopy/kernel/array.py | 3 +--
 loopy/kernel/data.py  | 2 +-
 2 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/loopy/kernel/array.py b/loopy/kernel/array.py
index b2982598a..9fd166ab8 100644
--- a/loopy/kernel/array.py
+++ b/loopy/kernel/array.py
@@ -668,8 +668,7 @@ class ArrayBase(ImmutableRecord, Taggable):
 
     def __init__(self, name, dtype=None, shape=None, dim_tags=None, offset=0,
             dim_names=None, strides=None, order=None, for_atomic=False,
-            target=None, alignment=None, tags=frozenset(),
-            **kwargs):
+            target=None, alignment=None, tags=None, **kwargs):
         """
         All of the following (except *name*) are optional.
         Specify either strides or shape.
diff --git a/loopy/kernel/data.py b/loopy/kernel/data.py
index be7ccc699..6e454d925 100644
--- a/loopy/kernel/data.py
+++ b/loopy/kernel/data.py
@@ -462,7 +462,7 @@ class ImageArg(ArrayBase, KernelArgument):
 
 class ValueArg(KernelArgument, Taggable):
     def __init__(self, name, dtype=None, approximately=1000, target=None,
-            is_output_only=False, tags=frozenset()):
+            is_output_only=False, tags=None):
         """
         :arg tags: A an instance of or Iterable of instances of
             :class:`pytools.tag.Tag` intended for consumption by an
-- 
GitLab


From 77f9036574f32fa33de0e0052da695d392c65eb3 Mon Sep 17 00:00:00 2001
From: Andreas Kloeckner <inform@tiker.net>
Date: Tue, 1 Dec 2020 22:13:11 -0600
Subject: [PATCH 091/140] Switch to furo doc theme

---
 doc/conf.py   | 79 +++++++++++++++++++++++----------------------------
 doc/index.rst |  2 ++
 2 files changed, 37 insertions(+), 44 deletions(-)

diff --git a/doc/conf.py b/doc/conf.py
index 942afcd3c..7912290e1 100644
--- a/doc/conf.py
+++ b/doc/conf.py
@@ -21,32 +21,33 @@ import os
 # -- General configuration -----------------------------------------------------
 
 # If your documentation needs a minimal Sphinx version, state it here.
-#needs_sphinx = '1.0'
+#needs_sphinx = "1.0"
 
 # Add any Sphinx extension module names here, as strings. They can be extensions
-# coming with Sphinx (named 'sphinx.ext.*') or your custom ones.
+# coming with Sphinx (named "sphinx.ext.*") or your custom ones.
 extensions = [
-        'sphinx.ext.autodoc',
-        'sphinx.ext.intersphinx',
-        #'sphinx.ext.viewcode',
-        'sphinx.ext.doctest',
+        "sphinx.ext.autodoc",
+        "sphinx.ext.intersphinx",
+        #"sphinx.ext.viewcode",
+        "sphinx.ext.doctest",
+        "sphinx_copybutton",
         ]
 
 # Add any paths that contain templates here, relative to this directory.
-templates_path = ['_templates']
+templates_path = ["_templates"]
 
 # The suffix of source filenames.
-source_suffix = '.rst'
+source_suffix = ".rst"
 
 # The encoding of source files.
-#source_encoding = 'utf-8-sig'
+#source_encoding = "utf-8-sig"
 
 # The master toctree document.
-master_doc = 'index'
+master_doc = "index"
 
 # General information about the project.
-project = 'loopy'
-copyright = '2016, Andreas Klöckner'
+project = "loopy"
+copyright = "2016, Andreas Klöckner"
 
 # The version info for the project you're documenting, acts as replacement for
 # |version| and |release|, also used in various other places throughout the
@@ -59,7 +60,7 @@ with open(_version_source) as vpy_file:
     version_py = vpy_file.read()
 
 os.environ["AKPYTHON_EXEC_IMPORT_UNAVAILABLE"] = "1"
-exec(compile(version_py, _version_source, 'exec'), ver_dic)
+exec(compile(version_py, _version_source, "exec"), ver_dic)
 version = ".".join(str(x) for x in ver_dic["VERSION"])
 # The full version, including alpha/beta/rc tags.
 release = ver_dic["VERSION_TEXT"]
@@ -77,7 +78,7 @@ del os.environ["AKPYTHON_EXEC_IMPORT_UNAVAILABLE"]
 
 # List of patterns, relative to source directory, that match files and
 # directories to ignore when looking for source files.
-exclude_patterns = ['_build']
+exclude_patterns = ["_build"]
 
 # The reST default role (used for this markup: `text`) to use for all documents.
 #default_role = None
@@ -94,7 +95,7 @@ exclude_patterns = ['_build']
 #show_authors = False
 
 # The name of the Pygments (syntax highlighting) style to use.
-pygments_style = 'sphinx'
+pygments_style = "sphinx"
 
 # A list of ignored prefixes for module index sorting.
 #modindex_common_prefix = []
@@ -102,23 +103,13 @@ pygments_style = 'sphinx'
 
 # -- Options for HTML output ---------------------------------------------------
 
-html_theme = "alabaster"
+html_theme = "furo"
 
 html_theme_options = {
-        "extra_nav_links": {
-            "🚀 Github": "https://github.com/inducer/loopy",
-            "💾 Download Releases": "https://pypi.org/project/loopy",
-            }
         }
 
 html_sidebars = {
-    '**': [
-        'about.html',
-        'navigation.html',
-        'relations.html',
-        'searchbox.html',
-    ]
-}
+        }
 
 # Theme options are theme-specific and customize the look and feel of a theme
 # further.  For a list of options available for each theme, see the
@@ -149,7 +140,7 @@ html_sidebars = {
 # so a file named "default.css" will overwrite the builtin "default.css".
 # html_static_path = ['_static']
 
-# If not '', a 'Last updated on:' timestamp is inserted at every page bottom,
+# If not '', a "Last updated on:" timestamp is inserted at every page bottom,
 # using the given strftime format.
 #html_last_updated_fmt = '%b %d, %Y'
 
@@ -191,22 +182,22 @@ html_show_sourcelink = False
 #html_file_suffix = None
 
 # Output file base name for HTML help builder.
-htmlhelp_basename = 'loopydoc'
+htmlhelp_basename = "loopydoc"
 
 
 # -- Options for LaTeX output --------------------------------------------------
 
-# The paper size ('letter' or 'a4').
-#latex_paper_size = 'letter'
+# The paper size ("letter" or "a4").
+#latex_paper_size = "letter"
 
-# The font size ('10pt', '11pt' or '12pt').
-#latex_font_size = '10pt'
+# The font size ("10pt", "11pt" or "12pt").
+#latex_font_size = "10pt"
 
 # Grouping the document tree into LaTeX files. List of tuples
 # (source start file, target name, title, author, documentclass [howto/manual]).
 latex_documents = [
-        ('index', 'loopy.tex', 'loopy Documentation',
-            'Andreas Kloeckner', 'manual'),
+        ("index", "loopy.tex", "loopy Documentation",
+            "Andreas Kloeckner", "manual"),
 ]
 
 # The name of an image file (relative to this directory) to place at the top of
@@ -238,20 +229,20 @@ latex_documents = [
 # One entry per manual page. List of tuples
 # (source start file, name, description, authors, manual section).
 man_pages = [
-    ('index', 'loopy', 'loopy Documentation',
-     ['Andreas Kloeckner'], 1)
+    ("index", "loopy", "loopy Documentation",
+     ["Andreas Kloeckner"], 1)
 ]
 
 
 # Example configuration for intersphinx: refer to the Python standard library.
 intersphinx_mapping = {
-    'https://docs.python.org/3': None,
-    'https://documen.tician.de/islpy': None,
-    'https://documen.tician.de/pyopencl': None,
-    'https://documen.tician.de/cgen': None,
-    'https://docs.scipy.org/doc/numpy/': None,
-    'https://documen.tician.de/pymbolic': None,
-    'https://documen.tician.de/pytools': None,
+    "https://docs.python.org/3": None,
+    "https://documen.tician.de/islpy": None,
+    "https://documen.tician.de/pyopencl": None,
+    "https://documen.tician.de/cgen": None,
+    "https://docs.scipy.org/doc/numpy/": None,
+    "https://documen.tician.de/pymbolic": None,
+    "https://documen.tician.de/pytools": None,
     }
 
 autoclass_content = "class"
diff --git a/doc/index.rst b/doc/index.rst
index 8eb996f6b..7baff3249 100644
--- a/doc/index.rst
+++ b/doc/index.rst
@@ -46,6 +46,8 @@ Please check :ref:`installation` to get started.
     ref_other
     misc
     ref_internals
+    🚀 Github <https://github.com/inducer/loopy>
+    💾 Download Releases <https://pypi.org/project/loopy>
 
 Indices and tables
 ==================
-- 
GitLab


From 458d82d89183a6a96c0e063389d7491b9bdda2f1 Mon Sep 17 00:00:00 2001
From: Andreas Kloeckner <inform@tiker.net>
Date: Tue, 1 Dec 2020 22:15:24 -0600
Subject: [PATCH 092/140] Fix numpy intersphinx link

---
 doc/conf.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/conf.py b/doc/conf.py
index 7912290e1..1e5deb5f3 100644
--- a/doc/conf.py
+++ b/doc/conf.py
@@ -237,10 +237,10 @@ man_pages = [
 # Example configuration for intersphinx: refer to the Python standard library.
 intersphinx_mapping = {
     "https://docs.python.org/3": None,
+    "https://numpy.org/doc/stable/": None,
     "https://documen.tician.de/islpy": None,
     "https://documen.tician.de/pyopencl": None,
     "https://documen.tician.de/cgen": None,
-    "https://docs.scipy.org/doc/numpy/": None,
     "https://documen.tician.de/pymbolic": None,
     "https://documen.tician.de/pytools": None,
     }
-- 
GitLab


From affa83bbf20bc0993f01743353e0f17a99a2b933 Mon Sep 17 00:00:00 2001
From: Kaushik Kulkarni <kaushikcfd@gmail.com>
Date: Wed, 2 Dec 2020 12:36:00 -0600
Subject: [PATCH 093/140] LoopKernel.insn_inames(insn: InstructionBase) ->
 insn.within_inames

---
 loopy/check.py               | 20 ++++++++++----------
 loopy/codegen/instruction.py |  2 +-
 loopy/kernel/__init__.py     |  2 +-
 loopy/kernel/creation.py     |  4 ++--
 loopy/kernel/tools.py        | 12 ++++++------
 loopy/preprocess.py          | 16 ++++++++--------
 loopy/schedule/__init__.py   |  8 ++++----
 loopy/statistics.py          |  4 ++--
 loopy/symbolic.py            |  2 +-
 loopy/transform/iname.py     |  2 +-
 loopy/transform/privatize.py |  2 +-
 11 files changed, 37 insertions(+), 37 deletions(-)

diff --git a/loopy/check.py b/loopy/check.py
index 910327850..0bf02f7cf 100644
--- a/loopy/check.py
+++ b/loopy/check.py
@@ -215,7 +215,7 @@ def check_for_double_use_of_hw_axes(kernel):
 
     for insn in kernel.instructions:
         insn_tag_keys = set()
-        for iname in kernel.insn_inames(insn):
+        for iname in insn.within_inames:
             for tag in kernel.iname_tags_of_type(iname, UniqueTag):
                 key = tag.key
                 if key in insn_tag_keys:
@@ -232,12 +232,12 @@ def check_for_inactive_iname_access(kernel):
     for insn in kernel.instructions:
         expression_inames = insn.read_dependency_names() & kernel.all_inames()
 
-        if not expression_inames <= kernel.insn_inames(insn):
+        if not expression_inames <= insn.within_inames:
             raise LoopyError(
                     "instruction '%s' references "
                     "inames '%s' that the instruction does not depend on"
                     % (insn.id,
-                        ", ".join(expression_inames - kernel.insn_inames(insn))))
+                        ", ".join(expression_inames - insn.within_inames)))
 
 
 def check_for_unused_inames(kernel):
@@ -293,7 +293,7 @@ def check_for_write_races(kernel):
                 insn.assignee_var_names(),
                 insn.assignee_subscript_deps()):
             assignee_inames = assignee_indices & kernel.all_inames()
-            if not assignee_inames <= kernel.insn_inames(insn):
+            if not assignee_inames <= insn.within_inames:
                 raise LoopyError(
                         "assignee of instructions '%s' references "
                         "iname that the instruction does not depend on"
@@ -304,13 +304,13 @@ def check_for_write_races(kernel):
                 # will cause write races.
 
                 raceable_parallel_insn_inames = {
-                    iname for iname in kernel.insn_inames(insn)
+                    iname for iname in insn.within_inames
                     if kernel.iname_tags_of_type(iname, ConcurrentTag)}
 
             elif assignee_name in kernel.temporary_variables:
                 temp_var = kernel.temporary_variables[assignee_name]
                 raceable_parallel_insn_inames = {
-                        iname for iname in kernel.insn_inames(insn)
+                        iname for iname in insn.within_inames
                         if any(_is_racing_iname_tag(temp_var, tag)
                             for tag in kernel.iname_tags(iname))}
 
@@ -491,7 +491,7 @@ def check_write_destinations(kernel):
             if wvar in kernel.all_inames():
                 raise LoopyError("iname '%s' may not be written" % wvar)
 
-            insn_domain = kernel.get_inames_domain(kernel.insn_inames(insn))
+            insn_domain = kernel.get_inames_domain(insn.within_inames)
             insn_params = set(insn_domain.get_var_names(dim_type.param))
 
             if wvar in kernel.all_params():
@@ -936,7 +936,7 @@ def _check_for_unused_hw_axes_in_kernel_chunk(kernel, sched_index=None):
             group_axes_used = set()
             local_axes_used = set()
 
-            for iname in kernel.insn_inames(insn):
+            for iname in insn.within_inames:
                 ltags = kernel.iname_tags_of_type(iname, LocalIndexTag, max_num=1)
                 gtags = kernel.iname_tags_of_type(iname, GroupIndexTag, max_num=1)
                 altags = kernel.iname_tags_of_type(
@@ -1192,7 +1192,7 @@ def check_implemented_domains(kernel, implemented_domains, code=None):
 
         assert idomains
 
-        insn_inames = kernel.insn_inames(insn)
+        insn_inames = insn.within_inames
 
         # {{{ if we've checked the same thing before, no need to check it again
 
@@ -1269,7 +1269,7 @@ def check_implemented_domains(kernel, implemented_domains, code=None):
 
                 iname_to_dim = pt.get_space().get_var_dict()
                 point_axes = []
-                for iname in kernel.insn_inames(insn) | parameter_inames:
+                for iname in insn_inames | parameter_inames:
                     tp, dim = iname_to_dim[iname]
                     point_axes.append("%s=%d" % (
                         iname, pt.get_coordinate_val(tp, dim).to_python()))
diff --git a/loopy/codegen/instruction.py b/loopy/codegen/instruction.py
index 71133ef7c..14efb64f4 100644
--- a/loopy/codegen/instruction.py
+++ b/loopy/codegen/instruction.py
@@ -89,7 +89,7 @@ def generate_instruction_code(codegen_state, insn):
     else:
         raise RuntimeError("unexpected instruction type")
 
-    insn_inames = kernel.insn_inames(insn)
+    insn_inames = insn.within_inames
 
     return to_codegen_result(
             codegen_state,
diff --git a/loopy/kernel/__init__.py b/loopy/kernel/__init__.py
index 9088f3bfe..72a9f0c2e 100644
--- a/loopy/kernel/__init__.py
+++ b/loopy/kernel/__init__.py
@@ -824,7 +824,7 @@ class LoopKernel(ImmutableRecordWithoutPickling):
         result = {
                 iname: set() for iname in self.all_inames()}
         for insn in self.instructions:
-            for iname in self.insn_inames(insn):
+            for iname in insn.within_inames:
                 result[iname].add(insn.id)
 
         return result
diff --git a/loopy/kernel/creation.py b/loopy/kernel/creation.py
index a22fef9e8..94534382f 100644
--- a/loopy/kernel/creation.py
+++ b/loopy/kernel/creation.py
@@ -1523,7 +1523,7 @@ def determine_shapes_of_temporaries(knl):
     def feed_all_expressions(receiver):
         for insn in knl.instructions:
             insn.with_transformed_expressions(
-                lambda expr: receiver(expr, knl.insn_inames(insn)))
+                lambda expr: receiver(expr, insn.within_inames))
 
     var_to_base_indices, var_to_shape, var_to_error = (
         find_shapes_of_vars(
@@ -1543,7 +1543,7 @@ def determine_shapes_of_temporaries(knl):
         def feed_assignee_of_instruction(receiver):
             for insn in knl.instructions:
                 for assignee in insn.assignees:
-                    receiver(assignee, knl.insn_inames(insn))
+                    receiver(assignee, insn.within_inames)
 
         var_to_base_indices_fallback, var_to_shape_fallback, var_to_error = (
             find_shapes_of_vars(
diff --git a/loopy/kernel/tools.py b/loopy/kernel/tools.py
index 0b8d9841e..541bb45ce 100644
--- a/loopy/kernel/tools.py
+++ b/loopy/kernel/tools.py
@@ -685,7 +685,7 @@ def get_auto_axis_iname_ranking_by_stride(kernel, insn):
 
     from loopy.kernel.data import AutoLocalIndexTagBase
     auto_axis_inames = {
-        iname for iname in kernel.insn_inames(insn)
+        iname for iname in insn.within_inames
         if kernel.iname_tags_of_type(iname, AutoLocalIndexTagBase)}
 
     # }}}
@@ -744,7 +744,7 @@ def get_auto_axis_iname_ranking_by_stride(kernel, insn):
     if aggregate_strides:
         very_large_stride = int(np.iinfo(np.int32).max)
 
-        return sorted((iname for iname in kernel.insn_inames(insn)),
+        return sorted((iname for iname in insn.within_inames),
                 key=lambda iname: (
                     aggregate_strides.get(iname, very_large_stride),
                     iname))
@@ -885,7 +885,7 @@ def assign_automatic_axes(kernel, axis=0, local_size=None):
             continue
 
         auto_axis_inames = [
-            iname for iname in kernel.insn_inames(insn)
+            iname for iname in insn.within_inames
             if kernel.iname_tags_of_type(iname, AutoLocalIndexTagBase)]
 
         if not auto_axis_inames:
@@ -893,7 +893,7 @@ def assign_automatic_axes(kernel, axis=0, local_size=None):
 
         assigned_local_axes = set()
 
-        for iname in kernel.insn_inames(insn):
+        for iname in insn.within_inames:
             tags = kernel.iname_tags_of_type(iname, LocalIndexTag, max_num=1)
             if tags:
                 tag, = tags
@@ -1000,7 +1000,7 @@ def guess_var_shape(kernel, var_name):
     submap = SubstitutionRuleExpander(kernel.substitutions)
 
     def run_through_armap(expr):
-        armap(submap(expr), kernel.insn_inames(insn))
+        armap(submap(expr), insn.within_inames)
         return expr
 
     try:
@@ -1533,7 +1533,7 @@ def stringify_instruction_list(kernel):
             raise LoopyError("unexpected instruction type: %s"
                     % type(insn).__name__)
 
-        adapt_to_new_inames_list(kernel.insn_inames(insn))
+        adapt_to_new_inames_list(insn.within_inames)
 
         options = ["id="+Fore.GREEN+insn.id+Style.RESET_ALL]
         if insn.priority:
diff --git a/loopy/preprocess.py b/loopy/preprocess.py
index 12f1cb469..ab5e703e4 100644
--- a/loopy/preprocess.py
+++ b/loopy/preprocess.py
@@ -1004,7 +1004,7 @@ def realize_reduction(kernel, insn_id_filter=None, unknown_types_ok=True,
 
     def map_reduction_seq(expr, rec, nresults, arg_dtypes,
             reduction_dtypes):
-        outer_insn_inames = temp_kernel.insn_inames(insn)
+        outer_insn_inames = insn.within_inames
 
         from loopy.kernel.data import AddressSpace
         acc_var_names = make_temporaries(
@@ -1041,7 +1041,7 @@ def realize_reduction(kernel, insn_id_filter=None, unknown_types_ok=True,
         update_id = insn_id_gen(
                 based_on="{}_{}_update".format(insn.id, "_".join(expr.inames)))
 
-        update_insn_iname_deps = temp_kernel.insn_inames(insn) | set(expr.inames)
+        update_insn_iname_deps = insn.within_inames | set(expr.inames)
         if insn.within_inames_is_final:
             update_insn_iname_deps = insn.within_inames | set(expr.inames)
 
@@ -1126,7 +1126,7 @@ def realize_reduction(kernel, insn_id_filter=None, unknown_types_ok=True,
 
         size = _get_int_iname_size(red_iname)
 
-        outer_insn_inames = temp_kernel.insn_inames(insn)
+        outer_insn_inames = insn.within_inames
 
         from loopy.kernel.data import LocalIndexTagBase
         outer_local_inames = tuple(oiname for oiname in outer_insn_inames
@@ -1363,7 +1363,7 @@ def realize_reduction(kernel, insn_id_filter=None, unknown_types_ok=True,
     def map_scan_seq(expr, rec, nresults, arg_dtypes,
             reduction_dtypes, sweep_iname, scan_iname, sweep_min_value,
             scan_min_value, stride):
-        outer_insn_inames = temp_kernel.insn_inames(insn)
+        outer_insn_inames = insn.within_inames
         inames_to_remove.add(scan_iname)
 
         track_iname = var_name_gen(
@@ -1417,7 +1417,7 @@ def realize_reduction(kernel, insn_id_filter=None, unknown_types_ok=True,
         update_id = insn_id_gen(
                 based_on="{}_{}_update".format(insn.id, "_".join(expr.inames)))
 
-        update_insn_iname_deps = temp_kernel.insn_inames(insn) | {track_iname}
+        update_insn_iname_deps = insn.within_inames | {track_iname}
         if insn.within_inames_is_final:
             update_insn_iname_deps = insn.within_inames | {track_iname}
 
@@ -1461,7 +1461,7 @@ def realize_reduction(kernel, insn_id_filter=None, unknown_types_ok=True,
             return map_reduction_seq(
                     expr, rec, nresults, arg_dtypes, reduction_dtypes)
 
-        outer_insn_inames = temp_kernel.insn_inames(insn)
+        outer_insn_inames = insn.within_inames
 
         from loopy.kernel.data import LocalIndexTagBase
         outer_local_inames = tuple(oiname for oiname in outer_insn_inames
@@ -1668,7 +1668,7 @@ def realize_reduction(kernel, insn_id_filter=None, unknown_types_ok=True,
                 infer_arg_and_reduction_dtypes_for_reduction_expression(
                         temp_kernel, expr, unknown_types_ok))
 
-        outer_insn_inames = temp_kernel.insn_inames(insn)
+        outer_insn_inames = insn.within_inames
         bad_inames = frozenset(expr.inames) & outer_insn_inames
         if bad_inames:
             raise LoopyError("reduction used within loop(s) that it was "
@@ -1854,7 +1854,7 @@ def realize_reduction(kernel, insn_id_filter=None, unknown_types_ok=True,
                     no_sync_with=insn.no_sync_with
                     | frozenset(new_insn_add_no_sync_with),
                     within_inames=(
-                        temp_kernel.insn_inames(insn)
+                        insn.within_inames
                         | new_insn_add_within_inames))
 
             kwargs.pop("id")
diff --git a/loopy/schedule/__init__.py b/loopy/schedule/__init__.py
index 936c7c4d6..0eae1c4cc 100644
--- a/loopy/schedule/__init__.py
+++ b/loopy/schedule/__init__.py
@@ -296,7 +296,7 @@ def find_loop_insn_dep_map(kernel, loop_nest_with_map, loop_nest_around_map):
                     continue
 
                 dep_insn = kernel.id_to_insn[dep_insn_id]
-                dep_insn_inames = kernel.insn_inames(dep_insn)
+                dep_insn_inames = dep_insn.within_inames
 
                 if iname in dep_insn_inames:
                     # Nothing to be learned, dependency is in loop over iname
@@ -940,7 +940,7 @@ def generate_loop_schedules_internal(
         if not is_ready:
             continue
 
-        want = kernel.insn_inames(insn) - sched_state.parallel_inames
+        want = insn.within_inames - sched_state.parallel_inames
         have = active_inames_set - sched_state.parallel_inames
 
         if want != have:
@@ -1106,7 +1106,7 @@ def generate_loop_schedules_internal(
 
             for insn_id in sched_state.unscheduled_insn_ids:
                 insn = kernel.id_to_insn[insn_id]
-                if last_entered_loop in kernel.insn_inames(insn):
+                if last_entered_loop in insn.within_inames:
                     if debug_mode:
                         print("cannot leave '%s' because '%s' still depends on it"
                                 % (last_entered_loop, format_insn(kernel, insn.id)))
@@ -1294,7 +1294,7 @@ def generate_loop_schedules_internal(
             for insn_id in reachable_insn_ids:
                 insn = kernel.id_to_insn[insn_id]
 
-                want = kernel.insn_inames(insn)
+                want = insn.within_inames
 
                 if hypothetically_active_loops <= want:
                     if usefulness is None:
diff --git a/loopy/statistics.py b/loopy/statistics.py
index eda750120..a0a0f9c7e 100755
--- a/loopy/statistics.py
+++ b/loopy/statistics.py
@@ -1239,7 +1239,7 @@ def get_unused_hw_axes_factor(knl, insn, disregard_local_axes, space=None):
     l_used = set()
 
     from loopy.kernel.data import LocalIndexTag, GroupIndexTag
-    for iname in knl.insn_inames(insn):
+    for iname in insn.within_inames:
         tags = knl.iname_tags_of_type(iname,
                               (LocalIndexTag, GroupIndexTag), max_num=1)
         if tags:
@@ -1273,7 +1273,7 @@ def get_unused_hw_axes_factor(knl, insn, disregard_local_axes, space=None):
 
 def count_insn_runs(knl, insn, count_redundant_work, disregard_local_axes=False):
 
-    insn_inames = knl.insn_inames(insn)
+    insn_inames = insn.within_inames
 
     if disregard_local_axes:
         from loopy.kernel.data import LocalIndexTag
diff --git a/loopy/symbolic.py b/loopy/symbolic.py
index 2a89d7dc5..77f8228b6 100644
--- a/loopy/symbolic.py
+++ b/loopy/symbolic.py
@@ -2118,7 +2118,7 @@ class AccessRangeOverlapChecker:
         arm = BatchedAccessRangeMapper(self.kernel, self.vars, overestimate=True)
 
         for expr in exprs:
-            arm(expr, self.kernel.insn_inames(insn))
+            arm(expr, insn.within_inames)
 
         for name, arange in arm.access_ranges.items():
             if arm.bad_subscripts[name]:
diff --git a/loopy/transform/iname.py b/loopy/transform/iname.py
index fefa7ed5f..fb5e8d781 100644
--- a/loopy/transform/iname.py
+++ b/loopy/transform/iname.py
@@ -1609,7 +1609,7 @@ def find_unused_axis_tag(kernel, kind, insn_match=None):
     insns = [insn for insn in kernel.instructions if match(kernel, insn)]
 
     for insn in insns:
-        for iname in kernel.insn_inames(insn):
+        for iname in insn.within_inames:
             if kernel.iname_tags_of_type(iname, kind):
                 used_axes.add(kind.axis)
 
diff --git a/loopy/transform/privatize.py b/loopy/transform/privatize.py
index 8527023bc..ce2d7942b 100644
--- a/loopy/transform/privatize.py
+++ b/loopy/transform/privatize.py
@@ -124,7 +124,7 @@ def privatize_temporaries_with_inames(
         for writer_insn_id in wmap.get(tv.name, []):
             writer_insn = kernel.id_to_insn[writer_insn_id]
 
-            priv_axis_inames = kernel.insn_inames(writer_insn) & privatizing_inames
+            priv_axis_inames = writer_insn.within_inames & privatizing_inames
 
             referenced_priv_axis_inames = (priv_axis_inames
                     & writer_insn.write_dependency_names())
-- 
GitLab


From 1519e06e615d6c2e0cf20c42e2954e1912043bb2 Mon Sep 17 00:00:00 2001
From: Andreas Kloeckner <inform@tiker.net>
Date: Mon, 7 Dec 2020 13:24:58 -0600
Subject: [PATCH 094/140] Stop using pytools.persistent_dict.new_hash

---
 loopy/kernel/__init__.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/loopy/kernel/__init__.py b/loopy/kernel/__init__.py
index 72a9f0c2e..72b7db07f 100644
--- a/loopy/kernel/__init__.py
+++ b/loopy/kernel/__init__.py
@@ -1563,8 +1563,8 @@ class LoopKernel(ImmutableRecordWithoutPickling):
 
     def __hash__(self):
         from loopy.tools import LoopyKeyBuilder
-        from pytools.persistent_dict import new_hash
-        key_hash = new_hash()
+        import hashlib
+        key_hash = hashlib.sha256()
         self.update_persistent_hash(key_hash, LoopyKeyBuilder())
         return hash(key_hash.digest())
 
-- 
GitLab


From ac2eb5bf356a5435c31b9f32ca71ac8a144fee58 Mon Sep 17 00:00:00 2001
From: Andreas Kloeckner <inform@tiker.net>
Date: Mon, 7 Dec 2020 17:55:50 -0600
Subject: [PATCH 095/140] Add link to canonical name of TargetBase

---
 loopy/target/__init__.py | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/loopy/target/__init__.py b/loopy/target/__init__.py
index 6bad214ec..8af47c412 100644
--- a/loopy/target/__init__.py
+++ b/loopy/target/__init__.py
@@ -39,6 +39,14 @@ __doc__ = """
 .. autoclass:: NumbaTarget
 .. autoclass:: NumbaCudaTarget
 
+References to Canonical Names
+-----------------------------
+
+.. currentmodule:: loopy.target
+
+.. class:: TargetBase
+
+    See :class:`loopy.TargetBase`.
 """
 
 
-- 
GitLab


From 87c398ae6c00f3db8e4559824059752d254ce81f Mon Sep 17 00:00:00 2001
From: Andreas Kloeckner <inform@tiker.net>
Date: Mon, 7 Dec 2020 18:15:46 -0600
Subject: [PATCH 096/140] Remove unnecessary any/all imports

---
 loopy/preprocess.py        | 1 -
 loopy/schedule/__init__.py | 1 -
 2 files changed, 2 deletions(-)

diff --git a/loopy/preprocess.py b/loopy/preprocess.py
index ab5e703e4..40b582734 100644
--- a/loopy/preprocess.py
+++ b/loopy/preprocess.py
@@ -256,7 +256,6 @@ def find_temporary_address_space(kernel):
 
         overall_aspace = max(desired_aspace_per_insn)
 
-        from pytools import all
         if not all(iaspace == overall_aspace for iaspace in desired_aspace_per_insn):
             raise LoopyError("not all instructions agree on the "
                     "the desired address space (private/local/global) of  the "
diff --git a/loopy/schedule/__init__.py b/loopy/schedule/__init__.py
index 0eae1c4cc..fde967c65 100644
--- a/loopy/schedule/__init__.py
+++ b/loopy/schedule/__init__.py
@@ -182,7 +182,6 @@ def has_barrier_within(kernel, sched_index):
     if isinstance(sched_item, BeginBlockItem):
         loop_contents, _ = gather_schedule_block(
                 kernel.schedule, sched_index)
-        from pytools import any
         return any(isinstance(subsched_item, Barrier)
                 for subsched_item in loop_contents)
     elif isinstance(sched_item, Barrier):
-- 
GitLab


From 9d628326d9c1310cdd86813bcc2ee51f0358711c Mon Sep 17 00:00:00 2001
From: Nicholas Christensen <njchris2@illinois.edu>
Date: Mon, 14 Dec 2020 22:41:04 -0600
Subject: [PATCH 097/140] bump required pytools version

---
 requirements.txt | 2 +-
 setup.py         | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/requirements.txt b/requirements.txt
index 1c266ff39..641c75970 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,4 +1,4 @@
-git+https://github.com/nchristensen/pytools.git@master#egg=pytools == 2020.4.4
+git+https://github.com/nchristensen/pytools.git@master#egg=pytools == 2020.4.5
 git+https://github.com/inducer/islpy.git#egg=islpy
 git+https://github.com/inducer/cgen.git#egg=cgen
 git+https://github.com/inducer/pyopencl.git#egg=pyopencl
diff --git a/setup.py b/setup.py
index bd76d293a..c580fabb1 100644
--- a/setup.py
+++ b/setup.py
@@ -84,7 +84,7 @@ setup(name="loopy",
 
       python_requires="~=3.6",
       install_requires=[
-          "pytools>=2020.4.4",
+          "pytools>=2020.4.5",
           "pymbolic>=2019.2",
           "genpy>=2016.1.2",
           "cgen>=2016.1",
-- 
GitLab


From 24045f9ad105a35d9beb722670bf9358c6a8fa5f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Andreas=20Kl=C3=B6ckner?= <inform@tiker.net>
Date: Wed, 16 Dec 2020 16:31:18 -0600
Subject: [PATCH 098/140] Memoize LoopKernel.__hash__

---
 loopy/kernel/__init__.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/loopy/kernel/__init__.py b/loopy/kernel/__init__.py
index 72b7db07f..b24cde2c4 100644
--- a/loopy/kernel/__init__.py
+++ b/loopy/kernel/__init__.py
@@ -1561,6 +1561,7 @@ class LoopKernel(ImmutableRecordWithoutPickling):
         for field_name in self.hash_fields:
             key_builder.rec(key_hash, getattr(self, field_name))
 
+    @memoize_method
     def __hash__(self):
         from loopy.tools import LoopyKeyBuilder
         import hashlib
-- 
GitLab


From b736f1fdd9a8108f9857721f0caf4eb973688a56 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Andreas=20Kl=C3=B6ckner?= <inform@tiker.net>
Date: Mon, 4 Jan 2021 19:28:03 +0100
Subject: [PATCH 099/140] Standardize, shorten doc/conf

---
 doc/conf.py | 130 +---------------------------------------------------
 1 file changed, 2 insertions(+), 128 deletions(-)

diff --git a/doc/conf.py b/doc/conf.py
index 1e5deb5f3..9b8cf81e1 100644
--- a/doc/conf.py
+++ b/doc/conf.py
@@ -1,23 +1,5 @@
-#
-# loopy documentation build configuration file, created by
-# sphinx-quickstart on Tue Aug  9 13:40:49 2011.
-#
-# This file is execfile()d with the current directory set to its containing dir.
-#
-# Note that not all possible configuration values are present in this
-# autogenerated file.
-#
-# All configuration values have a default; values that are commented out
-# serve to show the default.
-
-#import sys
 import os
 
-# If extensions (or modules to document with autodoc) are in another directory,
-# add these directories to sys.path here. If the directory is relative to the
-# documentation root, use os.path.abspath to make it absolute, like shown here.
-#sys.path.insert(0, os.path.abspath('.'))
-
 # -- General configuration -----------------------------------------------------
 
 # If your documentation needs a minimal Sphinx version, state it here.
@@ -111,117 +93,8 @@ html_theme_options = {
 html_sidebars = {
         }
 
-# Theme options are theme-specific and customize the look and feel of a theme
-# further.  For a list of options available for each theme, see the
-# documentation.
-#html_theme_options = {}
-
-# Add any paths that contain custom themes here, relative to this directory.
-#html_theme_path = []
-
-# The name for this set of Sphinx documents.  If None, it defaults to
-# "<project> v<release> documentation".
-#html_title = None
-
-# A shorter title for the navigation bar.  Default is the same as html_title.
-#html_short_title = None
-
-# The name of an image file (relative to this directory) to place at the top
-# of the sidebar.
-#html_logo = None
-
-# The name of an image file (within the static path) to use as favicon of the
-# docs.  This file should be a Windows icon file (.ico) being 16x16 or 32x32
-# pixels large.
-#html_favicon = None
-
-# Add any paths that contain custom static files (such as style sheets) here,
-# relative to this directory. They are copied after the builtin static files,
-# so a file named "default.css" will overwrite the builtin "default.css".
-# html_static_path = ['_static']
-
-# If not '', a "Last updated on:" timestamp is inserted at every page bottom,
-# using the given strftime format.
-#html_last_updated_fmt = '%b %d, %Y'
-
-# If true, SmartyPants will be used to convert quotes and dashes to
-# typographically correct entities.
-#html_use_smartypants = True
-
-# Custom sidebar templates, maps document names to template names.
-#html_sidebars = {}
-
-# Additional templates that should be rendered to pages, maps page names to
-# template names.
-#html_additional_pages = {}
-
-# If false, no module index is generated.
-#html_domain_indices = True
-
-# If false, no index is generated.
-#html_use_index = True
-
-# If true, the index is split into individual pages for each letter.
-#html_split_index = False
-
 # If true, links to the reST sources are added to the pages.
-html_show_sourcelink = False
-
-# If true, "Created using Sphinx" is shown in the HTML footer. Default is True.
-#html_show_sphinx = True
-
-# If true, "(C) Copyright ..." is shown in the HTML footer. Default is True.
-#html_show_copyright = True
-
-# If true, an OpenSearch description file will be output, and all pages will
-# contain a <link> tag referring to it.  The value of this option must be the
-# base URL from which the finished HTML is served.
-#html_use_opensearch = ''
-
-# This is the file name suffix for HTML files (e.g. ".xhtml").
-#html_file_suffix = None
-
-# Output file base name for HTML help builder.
-htmlhelp_basename = "loopydoc"
-
-
-# -- Options for LaTeX output --------------------------------------------------
-
-# The paper size ("letter" or "a4").
-#latex_paper_size = "letter"
-
-# The font size ("10pt", "11pt" or "12pt").
-#latex_font_size = "10pt"
-
-# Grouping the document tree into LaTeX files. List of tuples
-# (source start file, target name, title, author, documentclass [howto/manual]).
-latex_documents = [
-        ("index", "loopy.tex", "loopy Documentation",
-            "Andreas Kloeckner", "manual"),
-]
-
-# The name of an image file (relative to this directory) to place at the top of
-# the title page.
-#latex_logo = None
-
-# For "manual" documents, if this is true, then toplevel headings are parts,
-# not chapters.
-#latex_use_parts = False
-
-# If true, show page references after internal links.
-#latex_show_pagerefs = False
-
-# If true, show URL addresses after external links.
-#latex_show_urls = False
-
-# Additional stuff for the LaTeX preamble.
-#latex_preamble = ''
-
-# Documents to append as an appendix to all manuals.
-#latex_appendices = []
-
-# If false, no module index is generated.
-#latex_domain_indices = True
+html_show_sourcelink = True
 
 
 # -- Options for manual page output --------------------------------------------
@@ -246,3 +119,4 @@ intersphinx_mapping = {
     }
 
 autoclass_content = "class"
+autodoc_typehints = "description"
-- 
GitLab


From a625e8fe0c211ad8e2132d4219ca12a2fd997a59 Mon Sep 17 00:00:00 2001
From: Kaushik Kulkarni <kaushikcfd@gmail.com>
Date: Tue, 5 Jan 2021 10:07:30 -0600
Subject: [PATCH 100/140] check that terms aren't raised to signed int powers

---
 loopy/check.py | 36 ++++++++++++++++++++++++++++++++++++
 1 file changed, 36 insertions(+)

diff --git a/loopy/check.py b/loopy/check.py
index 0bf02f7cf..3ef2804bb 100644
--- a/loopy/check.py
+++ b/loopy/check.py
@@ -130,6 +130,41 @@ def check_for_integer_subscript_indices(kernel):
                 type(insn).__name__))
 
 
+class ExponentIsUnsignedChecker(TypeInferenceMapper):
+    def map_power(self, expr):
+        res_dtype = super().map_power(expr)
+        exp_dtype = self.rec(expr.exponent)
+        if not res_dtype:
+            raise LoopyError(
+                "When checking for unsigned exponents for int-int"
+                f"pow expressions, type inference did not find type of {expr}.")
+
+        if res_dtype[0].is_integral():
+            if exp_dtype[0].numpy_dtype.kind == "i":
+                raise LoopyError("Integers to signed integer powers are not"
+                        " allowed.")
+
+        return res_dtype
+
+
+def check_int_pow_has_unsigned_exponent(kernel):
+    """
+    Checks that all expressions of the ``a**b``, where both ``a``
+    and ``b`` are integers (signed or unsigned) have exponents of type
+    unsigned.
+    """
+    exp_is_uint_checker = ExponentIsUnsignedChecker(kernel)
+    for insn in kernel.instructions:
+        if isinstance(insn, MultiAssignmentBase):
+            exp_is_uint_checker(insn.expression, return_tuple=isinstance(insn,
+                CallInstruction), return_dtype_set=True)
+        elif isinstance(insn, (CInstruction, _DataObliviousInstruction)):
+            pass
+        else:
+            raise NotImplementedError("Unknown insn type %s." % (
+                type(insn).__name__))
+
+
 def check_insn_attributes(kernel):
     """
     Check for legality of attributes of every instruction in *kernel*.
@@ -801,6 +836,7 @@ def pre_schedule_checks(kernel):
         logger.debug("%s: pre-schedule check: start" % kernel.name)
 
         check_for_integer_subscript_indices(kernel)
+        check_int_pow_has_unsigned_exponent(kernel)
         check_for_duplicate_insn_ids(kernel)
         check_for_orphaned_user_hardware_axes(kernel)
         check_for_double_use_of_hw_axes(kernel)
-- 
GitLab


From 891b1d3bdd826cd84d7dcb6e28ed2b04c75ca725 Mon Sep 17 00:00:00 2001
From: Kaushik Kulkarni <kaushikcfd@gmail.com>
Date: Tue, 5 Jan 2021 10:07:58 -0600
Subject: [PATCH 101/140] adds support for integer exponentation in loopy

---
 loopy/target/c/__init__.py           | 25 +++++++++++++++++
 loopy/target/c/codegen/expression.py | 42 ++++++++++++++++++++++------
 2 files changed, 58 insertions(+), 9 deletions(-)

diff --git a/loopy/target/c/__init__.py b/loopy/target/c/__init__.py
index 3234da45d..dff194049 100644
--- a/loopy/target/c/__init__.py
+++ b/loopy/target/c/__init__.py
@@ -172,6 +172,31 @@ def _preamble_generator(preamble_info):
             yield ("04_%s" % func_name, func_body)
             yield undef_integer_types_macro
 
+    for func in preamble_info.seen_functions:
+        if func.name == "int_pow":
+            base_ctype = preamble_info.kernel.target.dtype_to_typename(
+                    func.arg_dtypes[0])
+            exp_ctype = preamble_info.kernel.target.dtype_to_typename(
+                    func.arg_dtypes[1])
+
+            yield("07_int_pow", f"""
+            inline {base_ctype} {func.c_name}({base_ctype} b, {exp_ctype} n) {{
+              if (n == 0)
+                return 1
+
+              {base_ctype} y = 1;
+
+              while (n > 1) {{
+                if (n % 2) {{
+                  x = x * x;
+                  y = x * y;
+                }}
+                else
+                  x = x * x;
+                n = n / 2;
+              }}
+            }}""")
+
 # }}}
 
 
diff --git a/loopy/target/c/codegen/expression.py b/loopy/target/c/codegen/expression.py
index 74f1ead8b..f200a1594 100644
--- a/loopy/target/c/codegen/expression.py
+++ b/loopy/target/c/codegen/expression.py
@@ -701,6 +701,10 @@ class ExpressionToCExpressionMapper(IdentityMapper):
                     self.rec(expr.denominator, type_context, tgt_dtype))
 
     def map_power(self, expr, type_context):
+        tgt_dtype = self.infer_type(expr)
+        base_dtype = self.infer_type(expr.base)
+        exponent_dtype = self.infer_type(expr.exponent)
+
         def base_impl(expr, type_context):
             from pymbolic.primitives import is_constant, is_zero
             if is_constant(expr.exponent):
@@ -711,14 +715,35 @@ class ExpressionToCExpressionMapper(IdentityMapper):
                 elif is_zero(expr.exponent - 2):
                     return self.rec(expr.base*expr.base, type_context)
 
-            return type(expr)(
-                    self.rec(expr.base, type_context),
-                    self.rec(expr.exponent, type_context))
+            if exponent_dtype.numpy_dtype.kind == "u":
+                # FIXME: need to add this to the seen functions
+
+                from loopy.codegen import SeenFunction
+                func_name = ("loopy_pow_"
+                        f"{base_dtype.numpy_dtype}_{exponent_dtype.numpy_dtype}")
+
+                self.codegen_state.seen_functions.add(
+                        SeenFunction(
+                            "int_pow", func_name,
+                            (base_dtype, exponent_dtype)))
+                return var("loopy_pow_"
+                        f"{base_dtype.numpy_dtype}_{exponent_dtype.numpy_dtype}")(
+                                self.rec(expr.base), self.rec(expr.exponent))
+            else:
+                from loopy.types import to_loopy_type
+                loopy_f64_dtype = to_loopy_type(np.float64,
+                        target=self.kernel.target)
+                return self.wrap_in_typecast(
+                        loopy_f64_dtype,
+                        tgt_dtype,
+                        var("pow")(self.rec(expr.base, type_context,
+                                            loopy_f64_dtype),
+                                   self.rec(expr.base, type_context,
+                                            loopy_f64_dtype)))
 
         if not self.allow_complex:
             return base_impl(expr, type_context)
 
-        tgt_dtype = self.infer_type(expr)
         if tgt_dtype.is_complex():
             if expr.exponent in [2, 3, 4]:
                 value = expr.base
@@ -726,8 +751,8 @@ class ExpressionToCExpressionMapper(IdentityMapper):
                     value = value * expr.base
                 return self.rec(value, type_context)
             else:
-                b_complex = self.infer_type(expr.base).is_complex()
-                e_complex = self.infer_type(expr.exponent).is_complex()
+                b_complex = base_dtype.is_complex()
+                e_complex = exponent_dtype.is_complex()
 
                 if b_complex and not e_complex:
                     return var("%s_powr" % self.complex_type_name(tgt_dtype))(
@@ -754,6 +779,7 @@ class ExpressionToCExpressionMapper(IdentityMapper):
 # {{{ C expression to code mapper
 
 class CExpressionToCodeMapper(RecursiveMapper):
+
     # {{{ helpers
 
     def parenthesize_if_needed(self, s, enclosing_prec, my_prec):
@@ -954,9 +980,7 @@ class CExpressionToCodeMapper(RecursiveMapper):
         return self._map_division_operator("%", expr, enclosing_prec)
 
     def map_power(self, expr, enclosing_prec):
-        return "pow({}, {})".format(
-                self.rec(expr.base, PREC_NONE),
-                self.rec(expr.exponent, PREC_NONE))
+        raise NotImplementedError()
 
     def map_array_literal(self, expr, enclosing_prec):
         return "{ %s }" % self.join_rec(", ", expr.children, PREC_NONE)
-- 
GitLab


From e1eb0bf6a211d49744a36a6c67ba9d796ad1f3eb Mon Sep 17 00:00:00 2001
From: Kaushik Kulkarni <kaushikcfd@gmail.com>
Date: Tue, 5 Jan 2021 10:08:27 -0600
Subject: [PATCH 102/140] test loopy pown

---
 test/test_loopy.py | 43 +++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 43 insertions(+)

diff --git a/test/test_loopy.py b/test/test_loopy.py
index 9bc532d53..149afd079 100644
--- a/test/test_loopy.py
+++ b/test/test_loopy.py
@@ -2995,6 +2995,49 @@ def test_split_iname_within(ctx_factory):
     lp.auto_test_vs_ref(ref_knl, ctx, knl, parameters=dict(n=5))
 
 
+@pytest.mark.parametrize("basetype,exptype", [(np.int32, np.uint32), (np.int64,
+    np.uint64), (np.int, np.float), (np.float, np.int)])
+def test_int_int_pow(ctx_factory, basetype, exptype):
+    ctx = ctx_factory()
+    queue = cl.CommandQueue(ctx)
+
+    def _make_random_np_array(shape, dtype):
+        from numpy.random import default_rng
+        rng = default_rng()
+        if isinstance(shape, int):
+            shape = (shape,)
+
+        dtype = np.dtype(dtype)
+        if dtype.kind in ["u", "i"]:
+            # choosing numbers so that we don't have overflow, to not trigger
+            # undefined behavior
+            low = 0 if dtype.kind == "u" else -6
+            high = 6
+            return rng.integers(low=low, high=high, size=shape, dtype=dtype)
+        elif dtype.kind == "f":
+            return rng.random(*shape).astype(dtype)
+        else:
+            raise NotImplementedError()
+
+    base = _make_random_np_array(10, basetype)
+    power = _make_random_np_array(10, exptype)
+    expected_result = base ** power
+
+    knl = lp.make_kernel(
+            "{[i]: 0<=i<n}",
+            """
+            res[i] = base[i] ** power[i]
+            """, [lp.GlobalArg("base", dtype=basetype, shape=lp.auto),
+                  lp.GlobalArg("power", dtype=exptype, shape=lp.auto),
+                  ...])
+
+    evt, (result,) = knl(queue, base=base, power=power)
+
+    assert result.dtype == expected_result.dtype
+
+    np.testing.assert_allclose(expected_result, result)
+
+
 if __name__ == "__main__":
     if len(sys.argv) > 1:
         exec(sys.argv[1])
-- 
GitLab


From 6d51618308724ad4c96caa12c567bad2c8717741 Mon Sep 17 00:00:00 2001
From: Kaushik Kulkarni <kaushikcfd@gmail.com>
Date: Tue, 5 Jan 2021 12:41:14 -0600
Subject: [PATCH 103/140] re-enable int exponents

---
 loopy/check.py | 36 ------------------------------------
 1 file changed, 36 deletions(-)

diff --git a/loopy/check.py b/loopy/check.py
index 3ef2804bb..0bf02f7cf 100644
--- a/loopy/check.py
+++ b/loopy/check.py
@@ -130,41 +130,6 @@ def check_for_integer_subscript_indices(kernel):
                 type(insn).__name__))
 
 
-class ExponentIsUnsignedChecker(TypeInferenceMapper):
-    def map_power(self, expr):
-        res_dtype = super().map_power(expr)
-        exp_dtype = self.rec(expr.exponent)
-        if not res_dtype:
-            raise LoopyError(
-                "When checking for unsigned exponents for int-int"
-                f"pow expressions, type inference did not find type of {expr}.")
-
-        if res_dtype[0].is_integral():
-            if exp_dtype[0].numpy_dtype.kind == "i":
-                raise LoopyError("Integers to signed integer powers are not"
-                        " allowed.")
-
-        return res_dtype
-
-
-def check_int_pow_has_unsigned_exponent(kernel):
-    """
-    Checks that all expressions of the ``a**b``, where both ``a``
-    and ``b`` are integers (signed or unsigned) have exponents of type
-    unsigned.
-    """
-    exp_is_uint_checker = ExponentIsUnsignedChecker(kernel)
-    for insn in kernel.instructions:
-        if isinstance(insn, MultiAssignmentBase):
-            exp_is_uint_checker(insn.expression, return_tuple=isinstance(insn,
-                CallInstruction), return_dtype_set=True)
-        elif isinstance(insn, (CInstruction, _DataObliviousInstruction)):
-            pass
-        else:
-            raise NotImplementedError("Unknown insn type %s." % (
-                type(insn).__name__))
-
-
 def check_insn_attributes(kernel):
     """
     Check for legality of attributes of every instruction in *kernel*.
@@ -836,7 +801,6 @@ def pre_schedule_checks(kernel):
         logger.debug("%s: pre-schedule check: start" % kernel.name)
 
         check_for_integer_subscript_indices(kernel)
-        check_int_pow_has_unsigned_exponent(kernel)
         check_for_duplicate_insn_ids(kernel)
         check_for_orphaned_user_hardware_axes(kernel)
         check_for_double_use_of_hw_axes(kernel)
-- 
GitLab


From 12fd89d59662beb9d7b641883be3afac778f3f38 Mon Sep 17 00:00:00 2001
From: Kaushik Kulkarni <kaushikcfd@gmail.com>
Date: Tue, 5 Jan 2021 12:42:46 -0600
Subject: [PATCH 104/140] also record return dtype in SeenFunction

---
 loopy/codegen/__init__.py            | 9 +++++++--
 loopy/target/c/codegen/expression.py | 9 ++++++---
 2 files changed, 13 insertions(+), 5 deletions(-)

diff --git a/loopy/codegen/__init__.py b/loopy/codegen/__init__.py
index cbae4eac5..54924295a 100644
--- a/loopy/codegen/__init__.py
+++ b/loopy/codegen/__init__.py
@@ -146,13 +146,18 @@ class SeenFunction(ImmutableRecord):
     .. attribute:: arg_dtypes
 
         a tuple of arg dtypes
+
+    .. attribute:: res_dtypes
+
+        a tuple of result dtypes
     """
 
-    def __init__(self, name, c_name, arg_dtypes):
+    def __init__(self, name, c_name, arg_dtypes, res_dtypes=()):
         ImmutableRecord.__init__(self,
                 name=name,
                 c_name=c_name,
-                arg_dtypes=arg_dtypes)
+                arg_dtypes=arg_dtypes,
+                res_dtypes=res_dtypes)
 
 
 class CodeGenerationState:
diff --git a/loopy/target/c/codegen/expression.py b/loopy/target/c/codegen/expression.py
index f200a1594..6f48f4de5 100644
--- a/loopy/target/c/codegen/expression.py
+++ b/loopy/target/c/codegen/expression.py
@@ -325,7 +325,8 @@ class ExpressionToCExpressionMapper(IdentityMapper):
             self.codegen_state.seen_functions.add(
                     SeenFunction(
                         name, f"{name}_{suffix}",
-                        (result_dtype, result_dtype)))
+                        (result_dtype, result_dtype),
+                        (result_dtype,)))
 
         if den_nonneg:
             if num_nonneg:
@@ -538,7 +539,8 @@ class ExpressionToCExpressionMapper(IdentityMapper):
         self.codegen_state.seen_functions.add(
                 SeenFunction(identifier,
                     mangle_result.target_name,
-                    mangle_result.arg_dtypes or par_dtypes))
+                    mangle_result.arg_dtypes or par_dtypes,
+                    mangle_result.result_dtypes))
 
         return var(mangle_result.target_name)(*processed_parameters)
 
@@ -725,7 +727,8 @@ class ExpressionToCExpressionMapper(IdentityMapper):
                 self.codegen_state.seen_functions.add(
                         SeenFunction(
                             "int_pow", func_name,
-                            (base_dtype, exponent_dtype)))
+                            (base_dtype, exponent_dtype),
+                            (tgt_dtype, )))
                 return var("loopy_pow_"
                         f"{base_dtype.numpy_dtype}_{exponent_dtype.numpy_dtype}")(
                                 self.rec(expr.base), self.rec(expr.exponent))
-- 
GitLab


From c58c420c8a88883617a36700ecef78fdd4c77b75 Mon Sep 17 00:00:00 2001
From: Kaushik Kulkarni <kaushikcfd@gmail.com>
Date: Tue, 5 Jan 2021 12:46:20 -0600
Subject: [PATCH 105/140] minor fixes in T**int

---
 loopy/target/c/__init__.py           | 22 ++++++++++++++++++----
 loopy/target/c/codegen/expression.py |  6 ++----
 2 files changed, 20 insertions(+), 8 deletions(-)

diff --git a/loopy/target/c/__init__.py b/loopy/target/c/__init__.py
index dff194049..cae5b2335 100644
--- a/loopy/target/c/__init__.py
+++ b/loopy/target/c/__init__.py
@@ -178,23 +178,37 @@ def _preamble_generator(preamble_info):
                     func.arg_dtypes[0])
             exp_ctype = preamble_info.kernel.target.dtype_to_typename(
                     func.arg_dtypes[1])
+            res_ctype = preamble_info.kernel.target.dtype_to_typename(
+                    func.res_dtypes[0])
+
+            if func.arg_dtypes[1].numpy_dtype.kind == "u":
+                signed_exponent_preamble = ""
+            else:
+                signed_exponent_preamble = """
+              if (n < 0) {
+                x = 1.0/x;
+                n =  -n;
+              }"""
 
             yield("07_int_pow", f"""
-            inline {base_ctype} {func.c_name}({base_ctype} b, {exp_ctype} n) {{
+            inline {res_ctype} {func.c_name}({base_ctype} x, {exp_ctype} n) {{
               if (n == 0)
-                return 1
+                return 1;
+              {signed_exponent_preamble}
 
-              {base_ctype} y = 1;
+              {res_ctype} y = 1;
 
               while (n > 1) {{
                 if (n % 2) {{
-                  x = x * x;
                   y = x * y;
+                  x = x * x;
                 }}
                 else
                   x = x * x;
                 n = n / 2;
               }}
+
+              return x*y;
             }}""")
 
 # }}}
diff --git a/loopy/target/c/codegen/expression.py b/loopy/target/c/codegen/expression.py
index 6f48f4de5..4971d2bab 100644
--- a/loopy/target/c/codegen/expression.py
+++ b/loopy/target/c/codegen/expression.py
@@ -717,9 +717,7 @@ class ExpressionToCExpressionMapper(IdentityMapper):
                 elif is_zero(expr.exponent - 2):
                     return self.rec(expr.base*expr.base, type_context)
 
-            if exponent_dtype.numpy_dtype.kind == "u":
-                # FIXME: need to add this to the seen functions
-
+            if exponent_dtype.is_integral():
                 from loopy.codegen import SeenFunction
                 func_name = ("loopy_pow_"
                         f"{base_dtype.numpy_dtype}_{exponent_dtype.numpy_dtype}")
@@ -741,7 +739,7 @@ class ExpressionToCExpressionMapper(IdentityMapper):
                         tgt_dtype,
                         var("pow")(self.rec(expr.base, type_context,
                                             loopy_f64_dtype),
-                                   self.rec(expr.base, type_context,
+                                   self.rec(expr.exponent, type_context,
                                             loopy_f64_dtype)))
 
         if not self.allow_complex:
-- 
GitLab


From ba6615e9b0469b20e85fcd54749156f38c1eef8e Mon Sep 17 00:00:00 2001
From: Kaushik Kulkarni <kaushikcfd@gmail.com>
Date: Tue, 5 Jan 2021 12:46:40 -0600
Subject: [PATCH 106/140] formatting

---
 test/test_loopy.py | 24 +++++++++++-------------
 1 file changed, 11 insertions(+), 13 deletions(-)

diff --git a/test/test_loopy.py b/test/test_loopy.py
index 149afd079..48484141a 100644
--- a/test/test_loopy.py
+++ b/test/test_loopy.py
@@ -2995,41 +2995,39 @@ def test_split_iname_within(ctx_factory):
     lp.auto_test_vs_ref(ref_knl, ctx, knl, parameters=dict(n=5))
 
 
-@pytest.mark.parametrize("basetype,exptype", [(np.int32, np.uint32), (np.int64,
-    np.uint64), (np.int, np.float), (np.float, np.int)])
-def test_int_int_pow(ctx_factory, basetype, exptype):
+@pytest.mark.parametrize("base_type,exp_type", [(np.int32, np.uint32), (np.int64,
+    np.uint64), (np.int, np.float), (np.float, np.int), (np.int, np.int)])
+def test_int_int_pow(ctx_factory, base_type, exp_type):
     ctx = ctx_factory()
     queue = cl.CommandQueue(ctx)
 
     def _make_random_np_array(shape, dtype):
         from numpy.random import default_rng
-        rng = default_rng()
+        rng = default_rng(0)
         if isinstance(shape, int):
             shape = (shape,)
 
         dtype = np.dtype(dtype)
         if dtype.kind in ["u", "i"]:
-            # choosing numbers so that we don't have overflow, to not trigger
-            # undefined behavior
-            low = 0 if dtype.kind == "u" else -6
-            high = 6
+            low = 0  # numpy might trigger error for -ve int exponents
+            high = 6  # choosing numbers to avoid overflow (undefined behavior)
             return rng.integers(low=low, high=high, size=shape, dtype=dtype)
         elif dtype.kind == "f":
             return rng.random(*shape).astype(dtype)
         else:
             raise NotImplementedError()
 
-    base = _make_random_np_array(10, basetype)
-    power = _make_random_np_array(10, exptype)
+    base = _make_random_np_array(10, base_type)
+    power = _make_random_np_array(10, exp_type)
     expected_result = base ** power
 
     knl = lp.make_kernel(
             "{[i]: 0<=i<n}",
             """
             res[i] = base[i] ** power[i]
-            """, [lp.GlobalArg("base", dtype=basetype, shape=lp.auto),
-                  lp.GlobalArg("power", dtype=exptype, shape=lp.auto),
-                  ...])
+            """)
+
+    knl = lp.add_dtypes(knl, {"base": base_type, "power": exp_type})
 
     evt, (result,) = knl(queue, base=base, power=power)
 
-- 
GitLab


From b0dec244996e5b9f8be0b741826e0b0aac93ee48 Mon Sep 17 00:00:00 2001
From: Kaushik Kulkarni <kaushikcfd@gmail.com>
Date: Tue, 5 Jan 2021 13:04:12 -0600
Subject: [PATCH 107/140] comment: explain CExpressionToCodeMapper.map_power is
 no longer supported

---
 loopy/target/c/codegen/expression.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/loopy/target/c/codegen/expression.py b/loopy/target/c/codegen/expression.py
index 4971d2bab..047902ef6 100644
--- a/loopy/target/c/codegen/expression.py
+++ b/loopy/target/c/codegen/expression.py
@@ -981,7 +981,9 @@ class CExpressionToCodeMapper(RecursiveMapper):
         return self._map_division_operator("%", expr, enclosing_prec)
 
     def map_power(self, expr, enclosing_prec):
-        raise NotImplementedError()
+        # No trivial "**" operator for C-like targets, should have been preprocessed
+        # into other expression types.
+        raise RuntimeError()
 
     def map_array_literal(self, expr, enclosing_prec):
         return "{ %s }" % self.join_rec(", ", expr.children, PREC_NONE)
-- 
GitLab


From 5831d8facbb5baf20b90b39844ee8c0e113e28b1 Mon Sep 17 00:00:00 2001
From: Kaushik Kulkarni <kaushikcfd@gmail.com>
Date: Wed, 6 Jan 2021 23:39:56 -0600
Subject: [PATCH 108/140] res_dtypes->result_dtypes

---
 loopy/codegen/__init__.py  | 6 +++---
 loopy/target/c/__init__.py | 2 +-
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/loopy/codegen/__init__.py b/loopy/codegen/__init__.py
index 54924295a..75ea33bbd 100644
--- a/loopy/codegen/__init__.py
+++ b/loopy/codegen/__init__.py
@@ -147,17 +147,17 @@ class SeenFunction(ImmutableRecord):
 
         a tuple of arg dtypes
 
-    .. attribute:: res_dtypes
+    .. attribute:: result_dtypes
 
         a tuple of result dtypes
     """
 
-    def __init__(self, name, c_name, arg_dtypes, res_dtypes=()):
+    def __init__(self, name, c_name, arg_dtypes, result_dtypes=()):
         ImmutableRecord.__init__(self,
                 name=name,
                 c_name=c_name,
                 arg_dtypes=arg_dtypes,
-                res_dtypes=res_dtypes)
+                result_dtypes=result_dtypes)
 
 
 class CodeGenerationState:
diff --git a/loopy/target/c/__init__.py b/loopy/target/c/__init__.py
index cae5b2335..1aff14627 100644
--- a/loopy/target/c/__init__.py
+++ b/loopy/target/c/__init__.py
@@ -179,7 +179,7 @@ def _preamble_generator(preamble_info):
             exp_ctype = preamble_info.kernel.target.dtype_to_typename(
                     func.arg_dtypes[1])
             res_ctype = preamble_info.kernel.target.dtype_to_typename(
-                    func.res_dtypes[0])
+                    func.result_dtypes[0])
 
             if func.arg_dtypes[1].numpy_dtype.kind == "u":
                 signed_exponent_preamble = ""
-- 
GitLab


From 8b5590cbfc4d292dd57da915e6d718a984a7e669 Mon Sep 17 00:00:00 2001
From: Kaushik Kulkarni <kaushikcfd@gmail.com>
Date: Wed, 6 Jan 2021 23:46:47 -0600
Subject: [PATCH 109/140] remove default initialization of
 SeenFunction.result_dtypes

---
 loopy/codegen/__init__.py  | 2 +-
 loopy/target/c/__init__.py | 3 ++-
 loopy/target/python.py     | 3 ++-
 3 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/loopy/codegen/__init__.py b/loopy/codegen/__init__.py
index 75ea33bbd..0f5d824cc 100644
--- a/loopy/codegen/__init__.py
+++ b/loopy/codegen/__init__.py
@@ -152,7 +152,7 @@ class SeenFunction(ImmutableRecord):
         a tuple of result dtypes
     """
 
-    def __init__(self, name, c_name, arg_dtypes, result_dtypes=()):
+    def __init__(self, name, c_name, arg_dtypes, result_dtypes):
         ImmutableRecord.__init__(self,
                 name=name,
                 c_name=c_name,
diff --git a/loopy/target/c/__init__.py b/loopy/target/c/__init__.py
index 1aff14627..0d65da1b6 100644
--- a/loopy/target/c/__init__.py
+++ b/loopy/target/c/__init__.py
@@ -981,7 +981,8 @@ class CFamilyASTBuilder(ASTBuilderBase):
         codegen_state.seen_functions.add(
                 SeenFunction(func_id,
                     mangle_result.target_name,
-                    mangle_result.arg_dtypes))
+                    mangle_result.arg_dtypes,
+                    mangle_result.result_dtypes))
 
         from pymbolic import var
         for i, (a, tgt_dtype) in enumerate(
diff --git a/loopy/target/python.py b/loopy/target/python.py
index e54aa622f..a1557e47b 100644
--- a/loopy/target/python.py
+++ b/loopy/target/python.py
@@ -118,7 +118,8 @@ class ExpressionToPythonMapper(StringifyMapper):
         self.codegen_state.seen_functions.add(
                 SeenFunction(identifier,
                     mangle_result.target_name,
-                    mangle_result.arg_dtypes or par_dtypes))
+                    mangle_result.arg_dtypes or par_dtypes,
+                    mangle_result.result_dtypes))
 
         return "{}({})".format(mangle_result.target_name, ", ".join(str_parameters))
 
-- 
GitLab


From 3cb6522b7bc5d2b770f295d0ec0ad0a8b4e5e6c3 Mon Sep 17 00:00:00 2001
From: Kaushik Kulkarni <kaushikcfd@gmail.com>
Date: Thu, 7 Jan 2021 01:00:26 -0600
Subject: [PATCH 110/140] respect python indentation

---
 loopy/target/c/__init__.py | 16 ++++++++++------
 1 file changed, 10 insertions(+), 6 deletions(-)

diff --git a/loopy/target/c/__init__.py b/loopy/target/c/__init__.py
index 0d65da1b6..5126172fb 100644
--- a/loopy/target/c/__init__.py
+++ b/loopy/target/c/__init__.py
@@ -34,6 +34,9 @@ from loopy.symbolic import IdentityMapper
 from loopy.types import NumpyType
 import pymbolic.primitives as p
 
+from loopy.tools import remove_common_indentation
+import re
+
 from pytools import memoize_method
 
 __doc__ = """
@@ -184,17 +187,18 @@ def _preamble_generator(preamble_info):
             if func.arg_dtypes[1].numpy_dtype.kind == "u":
                 signed_exponent_preamble = ""
             else:
-                signed_exponent_preamble = """
-              if (n < 0) {
-                x = 1.0/x;
-                n =  -n;
-              }"""
+                signed_exponent_preamble = "\n" + remove_common_indentation(
+                        """
+                        if (n < 0) {
+                          x = 1.0/x;
+                          n =  -n;
+                        }""")
 
             yield("07_int_pow", f"""
             inline {res_ctype} {func.c_name}({base_ctype} x, {exp_ctype} n) {{
               if (n == 0)
                 return 1;
-              {signed_exponent_preamble}
+              {re.sub("^", 14*" ", signed_exponent_preamble, flags=re.M)}
 
               {res_ctype} y = 1;
 
-- 
GitLab


From db8d71ec323e6158070bc035050fdb5cc4707e38 Mon Sep 17 00:00:00 2001
From: Kaushik Kulkarni <kaushikcfd@gmail.com>
Date: Thu, 7 Jan 2021 01:49:12 -0600
Subject: [PATCH 111/140] let ExpressionToCExpressionMapper.map_call handle
 pow(.,.)

---
 loopy/target/c/__init__.py           | 13 ++++++++++++-
 loopy/target/c/codegen/expression.py | 15 ++-------------
 loopy/target/cuda.py                 | 12 ++++++++++++
 loopy/target/opencl.py               |  1 +
 test/test_loopy.py                   |  2 +-
 5 files changed, 28 insertions(+), 15 deletions(-)

diff --git a/loopy/target/c/__init__.py b/loopy/target/c/__init__.py
index 5126172fb..b8cd47b4a 100644
--- a/loopy/target/c/__init__.py
+++ b/loopy/target/c/__init__.py
@@ -497,7 +497,7 @@ def c_math_mangler(target, name, arg_dtypes, modify_name=True):
             [], [dtype.numpy_dtype for dtype in arg_dtypes])
 
         if dtype.kind == "c":
-            raise LoopyTypeError("%s does not support complex numbers")
+            raise LoopyTypeError(f"{name} does not support complex numbers")
 
         elif dtype.kind == "f":
             if modify_name:
@@ -517,6 +517,17 @@ def c_math_mangler(target, name, arg_dtypes, modify_name=True):
                     result_dtypes=(result_dtype,),
                     arg_dtypes=2*(result_dtype,))
 
+    if name == "pow" and len(arg_dtypes) == 2:
+        if any(dtype.is_complex() == "c" for dtype in arg_dtypes):
+            raise LoopyTypeError(f"{name} does not support complex numbers")
+
+        f64_dtype = NumpyType(np.float64)
+
+        # math.h only provides double pow(double, double)
+        return CallMangleInfo(target_name=name,
+                              arg_dtypes=(f64_dtype, f64_dtype),
+                              result_dtypes=(f64_dtype,))
+
     return None
 
 # }}}
diff --git a/loopy/target/c/codegen/expression.py b/loopy/target/c/codegen/expression.py
index 047902ef6..180ee1611 100644
--- a/loopy/target/c/codegen/expression.py
+++ b/loopy/target/c/codegen/expression.py
@@ -731,16 +731,7 @@ class ExpressionToCExpressionMapper(IdentityMapper):
                         f"{base_dtype.numpy_dtype}_{exponent_dtype.numpy_dtype}")(
                                 self.rec(expr.base), self.rec(expr.exponent))
             else:
-                from loopy.types import to_loopy_type
-                loopy_f64_dtype = to_loopy_type(np.float64,
-                        target=self.kernel.target)
-                return self.wrap_in_typecast(
-                        loopy_f64_dtype,
-                        tgt_dtype,
-                        var("pow")(self.rec(expr.base, type_context,
-                                            loopy_f64_dtype),
-                                   self.rec(expr.exponent, type_context,
-                                            loopy_f64_dtype)))
+                return self.rec(var("pow")(expr.base, expr.exponent), type_context)
 
         if not self.allow_complex:
             return base_impl(expr, type_context)
@@ -981,9 +972,7 @@ class CExpressionToCodeMapper(RecursiveMapper):
         return self._map_division_operator("%", expr, enclosing_prec)
 
     def map_power(self, expr, enclosing_prec):
-        # No trivial "**" operator for C-like targets, should have been preprocessed
-        # into other expression types.
-        raise RuntimeError()
+        raise RuntimeError(f"'{expr}' should have been transformed to 'Call' expression node.")
 
     def map_array_literal(self, expr, enclosing_prec):
         return "{ %s }" % self.join_rec(", ", expr.children, PREC_NONE)
diff --git a/loopy/target/cuda.py b/loopy/target/cuda.py
index 2023077bf..67dc1fe24 100644
--- a/loopy/target/cuda.py
+++ b/loopy/target/cuda.py
@@ -127,6 +127,18 @@ def cuda_function_mangler(kernel, name, arg_dtypes):
 
         return dtype, name
 
+    if name in ["pow"] and len(arg_dtypes) == 2:
+        dtype = np.find_common_type([], arg_dtypes)
+
+        if dtype == np.float64:
+            pass  # pow
+        elif dtype == np.float32:
+            name = name + "f"  # powf
+        else:
+            raise RuntimeError(f"{name} does not support type {dtype}")
+
+        return dtype, name
+
     if name in "atan2" and len(arg_dtypes) == 2:
         return arg_dtypes[0], name
 
diff --git a/loopy/target/opencl.py b/loopy/target/opencl.py
index 2ff9ede55..230c73c6f 100644
--- a/loopy/target/opencl.py
+++ b/loopy/target/opencl.py
@@ -144,6 +144,7 @@ _CL_SIMPLE_MULTI_ARG_FUNCTIONS = {
         "rsqrt": 1,
         "clamp": 3,
         "atan2": 2,
+        "pow": 2,
         }
 
 
diff --git a/test/test_loopy.py b/test/test_loopy.py
index 48484141a..18956dbc6 100644
--- a/test/test_loopy.py
+++ b/test/test_loopy.py
@@ -2997,7 +2997,7 @@ def test_split_iname_within(ctx_factory):
 
 @pytest.mark.parametrize("base_type,exp_type", [(np.int32, np.uint32), (np.int64,
     np.uint64), (np.int, np.float), (np.float, np.int), (np.int, np.int)])
-def test_int_int_pow(ctx_factory, base_type, exp_type):
+def test_int_pow(ctx_factory, base_type, exp_type):
     ctx = ctx_factory()
     queue = cl.CommandQueue(ctx)
 
-- 
GitLab


From 0e44c6f42544380db9f47a748a0f9e1d43d4cbe7 Mon Sep 17 00:00:00 2001
From: Kaushik Kulkarni <kaushikcfd@gmail.com>
Date: Thu, 7 Jan 2021 01:52:08 -0600
Subject: [PATCH 112/140] formatting: line length < 85

---
 loopy/target/c/codegen/expression.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/loopy/target/c/codegen/expression.py b/loopy/target/c/codegen/expression.py
index 180ee1611..4705c9c35 100644
--- a/loopy/target/c/codegen/expression.py
+++ b/loopy/target/c/codegen/expression.py
@@ -972,7 +972,8 @@ class CExpressionToCodeMapper(RecursiveMapper):
         return self._map_division_operator("%", expr, enclosing_prec)
 
     def map_power(self, expr, enclosing_prec):
-        raise RuntimeError(f"'{expr}' should have been transformed to 'Call' expression node.")
+        raise RuntimeError(f"'{expr}' should have been transformed to 'Call'"
+                           " expression node.")
 
     def map_array_literal(self, expr, enclosing_prec):
         return "{ %s }" % self.join_rec(", ", expr.children, PREC_NONE)
-- 
GitLab


From efa0670aabfe6c5cce314296d490a76f1890a16b Mon Sep 17 00:00:00 2001
From: Kaushik Kulkarni <kaushikcfd@gmail.com>
Date: Thu, 7 Jan 2021 02:02:44 -0600
Subject: [PATCH 113/140] C supports pow[fl]?

---
 loopy/target/c/__init__.py | 13 +------------
 1 file changed, 1 insertion(+), 12 deletions(-)

diff --git a/loopy/target/c/__init__.py b/loopy/target/c/__init__.py
index b8cd47b4a..2ca08d6bf 100644
--- a/loopy/target/c/__init__.py
+++ b/loopy/target/c/__init__.py
@@ -490,7 +490,7 @@ def c_math_mangler(target, name, arg_dtypes, modify_name=True):
                 arg_dtypes=arg_dtypes)
 
     # binary functions
-    if (name in ["fmax", "fmin", "copysign"]
+    if (name in ["fmax", "fmin", "copysign", "pow"]
             and len(arg_dtypes) == 2):
 
         dtype = np.find_common_type(
@@ -517,17 +517,6 @@ def c_math_mangler(target, name, arg_dtypes, modify_name=True):
                     result_dtypes=(result_dtype,),
                     arg_dtypes=2*(result_dtype,))
 
-    if name == "pow" and len(arg_dtypes) == 2:
-        if any(dtype.is_complex() == "c" for dtype in arg_dtypes):
-            raise LoopyTypeError(f"{name} does not support complex numbers")
-
-        f64_dtype = NumpyType(np.float64)
-
-        # math.h only provides double pow(double, double)
-        return CallMangleInfo(target_name=name,
-                              arg_dtypes=(f64_dtype, f64_dtype),
-                              result_dtypes=(f64_dtype,))
-
     return None
 
 # }}}
-- 
GitLab


From e37f34157ca5e21b6b09d8c45ca75d880e1368f7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Andreas=20Kl=C3=B6ckner?= <inform@tiker.net>
Date: Fri, 8 Jan 2021 18:10:50 -0600
Subject: [PATCH 114/140] Point pytools back to master, for Taggable

---
 requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/requirements.txt b/requirements.txt
index 641c75970..1072cdec0 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,4 +1,4 @@
-git+https://github.com/nchristensen/pytools.git@master#egg=pytools == 2020.4.5
+git+https://github.com/inducer/pytools.git#egg=pytools
 git+https://github.com/inducer/islpy.git#egg=islpy
 git+https://github.com/inducer/cgen.git#egg=cgen
 git+https://github.com/inducer/pyopencl.git#egg=pyopencl
-- 
GitLab


From 3c9f798c8c3000381acd26b1637888ace45b8148 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Andreas=20Kl=C3=B6ckner?= <inform@tiker.net>
Date: Fri, 8 Jan 2021 18:12:02 -0600
Subject: [PATCH 115/140] Bump pytools dep version for Taggable

---
 setup.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/setup.py b/setup.py
index c580fabb1..fcf284bc8 100644
--- a/setup.py
+++ b/setup.py
@@ -84,7 +84,7 @@ setup(name="loopy",
 
       python_requires="~=3.6",
       install_requires=[
-          "pytools>=2020.4.5",
+          "pytools>=2021.1",
           "pymbolic>=2019.2",
           "genpy>=2016.1.2",
           "cgen>=2016.1",
-- 
GitLab


From 2e6cea3b5280a0fd8dc5042802a050353a8f2506 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Andreas=20Kl=C3=B6ckner?= <inform@tiker.net>
Date: Fri, 8 Jan 2021 18:21:03 -0600
Subject: [PATCH 116/140] Add version requirement to requirements.txt for
 pytools

---
 requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/requirements.txt b/requirements.txt
index 1072cdec0..8016ee7a8 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,4 +1,4 @@
-git+https://github.com/inducer/pytools.git#egg=pytools
+git+https://github.com/inducer/pytools.git#egg=pytools >= 2021.1
 git+https://github.com/inducer/islpy.git#egg=islpy
 git+https://github.com/inducer/cgen.git#egg=cgen
 git+https://github.com/inducer/pyopencl.git#egg=pyopencl
-- 
GitLab


From ca744aeaa01c956a4c5edbfc043e8bcb760f6e07 Mon Sep 17 00:00:00 2001
From: Kaushik Kulkarni <kaushikcfd@gmail.com>
Date: Sun, 10 Jan 2021 16:33:41 -0600
Subject: [PATCH 117/140] fixes a bug when there are multiple pown's in a
 kernel

---
 loopy/target/c/__init__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/loopy/target/c/__init__.py b/loopy/target/c/__init__.py
index 2ca08d6bf..d1e474c20 100644
--- a/loopy/target/c/__init__.py
+++ b/loopy/target/c/__init__.py
@@ -194,7 +194,7 @@ def _preamble_generator(preamble_info):
                           n =  -n;
                         }""")
 
-            yield("07_int_pow", f"""
+            yield(f"07_{func.c_name}", f"""
             inline {res_ctype} {func.c_name}({base_ctype} x, {exp_ctype} n) {{
               if (n == 0)
                 return 1;
-- 
GitLab


From c09fc4163af4c610764b75ac1a096bd222db8182 Mon Sep 17 00:00:00 2001
From: Kaushik Kulkarni <kaushikcfd@gmail.com>
Date: Sun, 10 Jan 2021 16:53:47 -0600
Subject: [PATCH 118/140] base type must be cast to tgt_type

---
 loopy/target/c/codegen/expression.py | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/loopy/target/c/codegen/expression.py b/loopy/target/c/codegen/expression.py
index 4705c9c35..c50d89dc6 100644
--- a/loopy/target/c/codegen/expression.py
+++ b/loopy/target/c/codegen/expression.py
@@ -720,16 +720,14 @@ class ExpressionToCExpressionMapper(IdentityMapper):
             if exponent_dtype.is_integral():
                 from loopy.codegen import SeenFunction
                 func_name = ("loopy_pow_"
-                        f"{base_dtype.numpy_dtype}_{exponent_dtype.numpy_dtype}")
+                        f"{tgt_dtype.numpy_dtype}_{exponent_dtype.numpy_dtype}")
 
                 self.codegen_state.seen_functions.add(
                         SeenFunction(
                             "int_pow", func_name,
-                            (base_dtype, exponent_dtype),
+                            (tgt_dtype, exponent_dtype),
                             (tgt_dtype, )))
-                return var("loopy_pow_"
-                        f"{base_dtype.numpy_dtype}_{exponent_dtype.numpy_dtype}")(
-                                self.rec(expr.base), self.rec(expr.exponent))
+                return var(func_name)(self.rec(expr.base), self.rec(expr.exponent))
             else:
                 return self.rec(var("pow")(expr.base, expr.exponent), type_context)
 
-- 
GitLab


From 1b43d409d2eaeb4ad52d70b7a7f7252b44f21913 Mon Sep 17 00:00:00 2001
From: Kaushik Kulkarni <kaushikcfd@gmail.com>
Date: Sun, 10 Jan 2021 18:07:22 -0600
Subject: [PATCH 119/140] tests float ** double

---
 test/test_loopy.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/test/test_loopy.py b/test/test_loopy.py
index 18956dbc6..be595aaa5 100644
--- a/test/test_loopy.py
+++ b/test/test_loopy.py
@@ -2996,8 +2996,9 @@ def test_split_iname_within(ctx_factory):
 
 
 @pytest.mark.parametrize("base_type,exp_type", [(np.int32, np.uint32), (np.int64,
-    np.uint64), (np.int, np.float), (np.float, np.int), (np.int, np.int)])
-def test_int_pow(ctx_factory, base_type, exp_type):
+    np.uint64), (np.int, np.float), (np.float, np.int), (np.int, np.int),
+    (np.float32, np.float64), (np.float64, np.float32)])
+def test_pow(ctx_factory, base_type, exp_type):
     ctx = ctx_factory()
     queue = cl.CommandQueue(ctx)
 
-- 
GitLab


From 7e37f5effc01b4e685de2b0475600e18e3729049 Mon Sep 17 00:00:00 2001
From: Kaushik Kulkarni <kaushikcfd@gmail.com>
Date: Sun, 10 Jan 2021 18:08:20 -0600
Subject: [PATCH 120/140] emits opencl pow to explicitly typed pow variants

---
 loopy/target/opencl.py | 32 ++++++++++++++++++++++++++++++--
 1 file changed, 30 insertions(+), 2 deletions(-)

diff --git a/loopy/target/opencl.py b/loopy/target/opencl.py
index 230c73c6f..c409df380 100644
--- a/loopy/target/opencl.py
+++ b/loopy/target/opencl.py
@@ -28,7 +28,7 @@ import numpy as np
 from loopy.target.c import CFamilyTarget, CFamilyASTBuilder
 from loopy.target.c.codegen.expression import ExpressionToCExpressionMapper
 from pytools import memoize_method
-from loopy.diagnostic import LoopyError
+from loopy.diagnostic import LoopyError, LoopyTypeError
 from loopy.types import NumpyType
 from loopy.target.c import DTypeRegistryWrapper, c_math_mangler
 from loopy.kernel.data import AddressSpace, CallMangleInfo
@@ -144,7 +144,6 @@ _CL_SIMPLE_MULTI_ARG_FUNCTIONS = {
         "rsqrt": 1,
         "clamp": 3,
         "atan2": 2,
-        "pow": 2,
         }
 
 
@@ -182,6 +181,22 @@ def opencl_function_mangler(kernel, name, arg_dtypes):
                     result_dtypes=(result_dtype,),
                     arg_dtypes=2*(result_dtype,))
 
+    if name == "pow" and len(arg_dtypes) == 2:
+        dtype = np.find_common_type(
+                [], [dtype.numpy_dtype for dtype in arg_dtypes])
+        if dtype == np.float64:
+            name = "powf64"
+        elif dtype == np.float32:
+            name = "powf32"
+        else:
+            raise LoopyTypeError(f"'pow' does not support type {dtype}.")
+
+        result_dtype = NumpyType(dtype)
+        return CallMangleInfo(
+                target_name=name,
+                result_dtypes=(result_dtype,),
+                arg_dtypes=2*(result_dtype,))
+
     if name == "dot":
         scalar_dtype, offset, field_name = arg_dtypes[0].numpy_dtype.fields["s0"]
         return CallMangleInfo(
@@ -287,6 +302,19 @@ def opencl_preamble_generator(preamble_info):
                 """ % dict(idx_ctype=kernel.target.dtype_to_typename(
                     kernel.index_dtype))))
 
+    for func in preamble_info.seen_functions:
+        if func.name == "pow" and func.c_name == "powf32":
+            yield("08_clpowf32", """
+            inline float powf32(float x, float y) {
+              return pow(x, y);
+            }""")
+
+        if func.name == "pow" and func.c_name == "powf64":
+            yield("08_clpowf64", """
+            inline double powf64(double x, double y) {
+              return pow(x, y);
+            }""")
+
 # }}}
 
 
-- 
GitLab


From cf2c43bb36890d94e9d4e0e750b4bc0e90900974 Mon Sep 17 00:00:00 2001
From: Kaushik Kulkarni <kaushikcfd@gmail.com>
Date: Sun, 10 Jan 2021 19:08:57 -0600
Subject: [PATCH 121/140] pass in the type context

---
 loopy/target/c/codegen/expression.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/loopy/target/c/codegen/expression.py b/loopy/target/c/codegen/expression.py
index c50d89dc6..9ec99c784 100644
--- a/loopy/target/c/codegen/expression.py
+++ b/loopy/target/c/codegen/expression.py
@@ -727,7 +727,8 @@ class ExpressionToCExpressionMapper(IdentityMapper):
                             "int_pow", func_name,
                             (tgt_dtype, exponent_dtype),
                             (tgt_dtype, )))
-                return var(func_name)(self.rec(expr.base), self.rec(expr.exponent))
+                return var(func_name)(self.rec(expr.base, type_context),
+                                      self.rec(expr.exponent, type_context))
             else:
                 return self.rec(var("pow")(expr.base, expr.exponent), type_context)
 
-- 
GitLab


From 290ee93128d9862a7d354df04fe1dfb3df3083e8 Mon Sep 17 00:00:00 2001
From: Andreas Kloeckner <inform@tiker.net>
Date: Mon, 11 Jan 2021 00:08:37 -0600
Subject: [PATCH 122/140] Draft description of computations expressible in
 Loopy

---
 doc/ref_kernel.rst | 59 ++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 59 insertions(+)

diff --git a/doc/ref_kernel.rst b/doc/ref_kernel.rst
index d339e1b19..b9f8cced0 100644
--- a/doc/ref_kernel.rst
+++ b/doc/ref_kernel.rst
@@ -3,6 +3,65 @@
 Reference: Loopy's Model of a Kernel
 ====================================
 
+What Types of Computation can a Loopy Kernel Express?
+-----------------------------------------------------
+
+Loopy kernels consist of an a-priori unordered set of statements, operating
+on :math:`n`-dimensional arrays.
+
+Arrays consist of "plain old data" and structures thereof, as describable
+by a :class:`numpy.dtype`.  The n-dimensional shape of these arrays is
+given by a tuple of expressions at most affine in parameters that are
+fixed for the duration of program execution.
+
+A statement (still called 'instruction' in some places, cf.
+:class:`loopy.Instruction`) encodes an assignment to an entry of an array.
+The right-hand side of an assignment consists of an expression that may
+consist of arithmetic and calls to functions.
+If the outermost operation of the RHS expression is a function call,
+the RHS value may be a tuple, and multiple (still scalar) arrays appear
+as LHS values. (This is the only sense in which tuple types are supported.)
+Each statement is parametrized by zero or more loop variables ("inames").
+A statement is executed once for each integer point defined by the domain
+forest for the iname tuple. Each execution is called a statement instance.
+Dependencies between these instances as well as instances of other
+statements are encoded in the program representation specify permissible
+execution orderings.  (The semantics of the dependencies are `being
+sharpened <https://github.com/inducer/loopy/pull/168>`__.) Assignments
+(comprising the evaluation of the RHS and the assignment to the LHS) may
+be specified to be atomic.
+
+The basic building blocks of the domain forest are sets given as
+conjunctions of equalities and inequalities of quasi-affine expressions on
+integer tuples, called domains. The entries of each integer tuple are
+either *parameters* or *inames*. Each domain may optionally have a *parent
+domain*. Parameters of parent-less domains are given by value arguments
+supplied to the program that will remain unchanged during program
+execution. Parameters of domains with parents may be
+
+- run-time-constant value arguments to the program, or
+- inames from parent domains, or
+- scalar, integer temporary variables that are written by statements
+  with iteration domains controlled by a parent domain.
+
+For each tuple of parameter values, the set of iname tuples must be
+finite. Each iname is defined by exactly one domain.
+
+For a tuple of inames, the domain forest defines an iteration domain
+by finding all the domains defining the inames involved, along with their
+parent domains. The resulting tree of domains may contain multiple roots,
+but no branches. The iteration domain is then constructed by intersecting
+these domains and constructing the projection of that set onto the space
+given by the required iname tuple. Observe that, via the parent-child
+domain mechanism, imperfectly-nested and data-dependent loops become
+expressible.
+
+The set of functions callable from the language is predefined by the system.
+Additional functions may be defined by the user by registering them. It is
+not currently possible to define functions from within Loopy, however work
+is progressing on permitting this. Even once this is allowed, recursion
+will not be permitted.
+
 .. _domain-tree:
 
 Loop Domain Forest
-- 
GitLab


From 9144a4059333675f7f82715498c9fa51060b4c05 Mon Sep 17 00:00:00 2001
From: Andreas Kloeckner <inform@tiker.net>
Date: Mon, 11 Jan 2021 00:10:23 -0600
Subject: [PATCH 123/140] Un-ignore doc PRs for Github CI

---
 .github/workflows/ci.yml | 2 --
 1 file changed, 2 deletions(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 7d8101763..05b2e3237 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -4,8 +4,6 @@ on:
         branches:
         - master
     pull_request:
-        paths-ignore:
-        - 'doc/*.rst'
     schedule:
         - cron:  '17 3 * * 0'
 
-- 
GitLab


From 2ff9c5ebc3cdd0b5ec3ee1d1fa4fd9b1dad3780b Mon Sep 17 00:00:00 2001
From: Andreas Kloeckner <inform@tiker.net>
Date: Mon, 11 Jan 2021 10:22:47 -0600
Subject: [PATCH 124/140] Fix InstructionBase reference in
 expressible-computations description

---
 doc/ref_kernel.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/ref_kernel.rst b/doc/ref_kernel.rst
index b9f8cced0..068ccbe73 100644
--- a/doc/ref_kernel.rst
+++ b/doc/ref_kernel.rst
@@ -15,7 +15,7 @@ given by a tuple of expressions at most affine in parameters that are
 fixed for the duration of program execution.
 
 A statement (still called 'instruction' in some places, cf.
-:class:`loopy.Instruction`) encodes an assignment to an entry of an array.
+:class:`loopy.InstructionBase`) encodes an assignment to an entry of an array.
 The right-hand side of an assignment consists of an expression that may
 consist of arithmetic and calls to functions.
 If the outermost operation of the RHS expression is a function call,
-- 
GitLab


From 76bcc1fc8ac89f2340ec4f44ec0798dcd99767cc Mon Sep 17 00:00:00 2001
From: Andreas Kloeckner <inform@tiker.net>
Date: Mon, 11 Jan 2021 10:25:07 -0600
Subject: [PATCH 125/140] Describe variables in expressible-computations
 (thanks @kaushikcfd)

---
 doc/ref_kernel.rst | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/doc/ref_kernel.rst b/doc/ref_kernel.rst
index 068ccbe73..aff99c7c6 100644
--- a/doc/ref_kernel.rst
+++ b/doc/ref_kernel.rst
@@ -3,16 +3,20 @@
 Reference: Loopy's Model of a Kernel
 ====================================
 
-What Types of Computation can a Loopy Kernel Express?
------------------------------------------------------
+What Types of Computation can a Loopy Program Express?
+------------------------------------------------------
 
-Loopy kernels consist of an a-priori unordered set of statements, operating
-on :math:`n`-dimensional arrays.
+Loopy programs consist of an a-priori unordered set of statements, operating
+on :math:`n`-dimensional array variables.
 
 Arrays consist of "plain old data" and structures thereof, as describable
 by a :class:`numpy.dtype`.  The n-dimensional shape of these arrays is
 given by a tuple of expressions at most affine in parameters that are
 fixed for the duration of program execution.
+Each array variable in the program is either an argument or a temporary
+variable.  A temporary variable is only live within the program, while
+argument variables are accessible outside the program and constitute the
+program's inputs and outputs.
 
 A statement (still called 'instruction' in some places, cf.
 :class:`loopy.InstructionBase`) encodes an assignment to an entry of an array.
-- 
GitLab


From 25af6b96a08bed5fa4b0de983ec26873365287a2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Andreas=20Kl=C3=B6ckner?= <inform@tiker.net>
Date: Mon, 11 Jan 2021 10:25:57 -0600
Subject: [PATCH 126/140] Expressible computations: arithmetic -> arithmetic
 operations

Co-authored-by: Kaushik Kulkarni <15399010+kaushikcfd@users.noreply.github.com>
---
 doc/ref_kernel.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/ref_kernel.rst b/doc/ref_kernel.rst
index aff99c7c6..392ebe726 100644
--- a/doc/ref_kernel.rst
+++ b/doc/ref_kernel.rst
@@ -21,7 +21,7 @@ program's inputs and outputs.
 A statement (still called 'instruction' in some places, cf.
 :class:`loopy.InstructionBase`) encodes an assignment to an entry of an array.
 The right-hand side of an assignment consists of an expression that may
-consist of arithmetic and calls to functions.
+consist of arithmetic operations and calls to functions.
 If the outermost operation of the RHS expression is a function call,
 the RHS value may be a tuple, and multiple (still scalar) arrays appear
 as LHS values. (This is the only sense in which tuple types are supported.)
-- 
GitLab


From 3f5e253a00af958fdeee7ae74b3854e99329b235 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Andreas=20Kl=C3=B6ckner?= <inform@tiker.net>
Date: Tue, 12 Jan 2021 09:08:50 -0600
Subject: [PATCH 127/140] Fix constant type inference in float context

---
 loopy/type_inference.py | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/loopy/type_inference.py b/loopy/type_inference.py
index 64337864f..787966efc 100644
--- a/loopy/type_inference.py
+++ b/loopy/type_inference.py
@@ -216,8 +216,12 @@ class TypeInferenceMapper(CombineMapper):
             # Numpy types are sized
             return [NumpyType(np.dtype(type(expr)))]
         elif dt.kind == "f":
-            # deduce the smaller type by default
-            return [NumpyType(np.dtype(np.float32))]
+            if np.float32(expr) == np.float64(expr):
+                # No precision is lost by 'guessing' single precision, use that.
+                # This at least covers simple cases like '1j'.
+                return [NumpyType(np.dtype(np.float32))]
+
+            return [NumpyType(np.dtype(np.float64))]
         elif dt.kind == "c":
             if np.complex64(expr) == np.complex128(expr):
                 # (COMPLEX_GUESS_LOGIC)
-- 
GitLab


From a541c8b2f3e034cbd394d40872e0aa357b35406d Mon Sep 17 00:00:00 2001
From: Kaushik Kulkarni <kaushikcfd@gmail.com>
Date: Tue, 12 Jan 2021 11:24:09 -0600
Subject: [PATCH 128/140] explicitly specify 3.14 to be f32

---
 test/test_statistics.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/test_statistics.py b/test/test_statistics.py
index 757f59e86..bcdc542cb 100644
--- a/test/test_statistics.py
+++ b/test/test_statistics.py
@@ -1070,7 +1070,7 @@ def test_floor_div_coefficient_collector():
         [
             "for i_outer",
             "for j_outer",
-            "<> loc[i_inner,j_inner] = 3.14  {id=loc_init}",
+            "<> loc[i_inner,j_inner] = 3.14f  {id=loc_init}",
             "loc[i_inner,(j_inner+r+4) %% %d] = loc[i_inner,(j_inner+r) %% %d]"
             "  {id=add,dep=loc_init}" % (bsize, bsize),
             "out0[i_outer*16+i_inner,j_outer*16+j_inner] = loc[i_inner,j_inner]"
-- 
GitLab


From 73db73a8a78a376682ce06de1772ff2e35210312 Mon Sep 17 00:00:00 2001
From: Andreas Kloeckner <inform@tiker.net>
Date: Wed, 13 Jan 2021 17:23:25 -0600
Subject: [PATCH 129/140] Small tweaks to the description of expressible
 computations

---
 doc/ref_kernel.rst | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/doc/ref_kernel.rst b/doc/ref_kernel.rst
index 392ebe726..f399d812e 100644
--- a/doc/ref_kernel.rst
+++ b/doc/ref_kernel.rst
@@ -27,9 +27,11 @@ the RHS value may be a tuple, and multiple (still scalar) arrays appear
 as LHS values. (This is the only sense in which tuple types are supported.)
 Each statement is parametrized by zero or more loop variables ("inames").
 A statement is executed once for each integer point defined by the domain
-forest for the iname tuple. Each execution is called a statement instance.
-Dependencies between these instances as well as instances of other
-statements are encoded in the program representation specify permissible
+forest for the iname tuple given for that statement
+(:attr:`loopy.InstructionBase.within_inames`). Each execution of a
+statement (with specific values of the inames) is called a *statement
+instance*.  Dependencies between these instances as well as instances of
+other statements are encoded in the program representation and specify permissible
 execution orderings.  (The semantics of the dependencies are `being
 sharpened <https://github.com/inducer/loopy/pull/168>`__.) Assignments
 (comprising the evaluation of the RHS and the assignment to the LHS) may
@@ -37,7 +39,8 @@ be specified to be atomic.
 
 The basic building blocks of the domain forest are sets given as
 conjunctions of equalities and inequalities of quasi-affine expressions on
-integer tuples, called domains. The entries of each integer tuple are
+integer tuples, called domains, and represented as instances of
+:class:`islpy.BasicSet`. The entries of each integer tuple are
 either *parameters* or *inames*. Each domain may optionally have a *parent
 domain*. Parameters of parent-less domains are given by value arguments
 supplied to the program that will remain unchanged during program
@@ -48,7 +51,7 @@ execution. Parameters of domains with parents may be
 - scalar, integer temporary variables that are written by statements
   with iteration domains controlled by a parent domain.
 
-For each tuple of parameter values, the set of iname tuples must be
+For each tuple of concrete parameter values, the set of iname tuples must be
 finite. Each iname is defined by exactly one domain.
 
 For a tuple of inames, the domain forest defines an iteration domain
-- 
GitLab


From a985bf7b8437e3e175a790cb957082309928dd1d Mon Sep 17 00:00:00 2001
From: Andreas Kloeckner <inform@tiker.net>
Date: Sun, 17 Jan 2021 18:12:30 -0600
Subject: [PATCH 130/140] Drop an old BPL workaround for integers in invoker
 generation

---
 loopy/target/pyopencl.py | 8 --------
 1 file changed, 8 deletions(-)

diff --git a/loopy/target/pyopencl.py b/loopy/target/pyopencl.py
index a17416c47..8b329c1a1 100644
--- a/loopy/target/pyopencl.py
+++ b/loopy/target/pyopencl.py
@@ -509,14 +509,6 @@ def generate_value_arg_setup(kernel, devices, implemented_data_info):
                 Raise('RuntimeError("input argument \'{name}\' '
                         'must be supplied")'.format(name=idi.name))))
 
-        if idi.dtype.is_integral():
-            gen(Comment("cast to Python int to avoid trouble "
-                "with struct packing or Boost.Python"))
-            py_type = "int"
-
-            gen(Assign(idi.name, f"{py_type}({idi.name})"))
-            gen(Line())
-
         if idi.dtype.is_composite():
             gen(S("_lpy_knl.set_arg(%d, %s)" % (cl_arg_idx, idi.name)))
             cl_arg_idx += 1
-- 
GitLab


From b967d74209bd8de0a06aa1411876763fabdbe3ff Mon Sep 17 00:00:00 2001
From: Andreas Kloeckner <inform@tiker.net>
Date: Sun, 17 Jan 2021 18:13:01 -0600
Subject: [PATCH 131/140] Use PyOpenCL's internal _set_arg_buf interface to
 speed up ValueArg passing in invoker

---
 loopy/target/pyopencl.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/loopy/target/pyopencl.py b/loopy/target/pyopencl.py
index 8b329c1a1..2bd72efa8 100644
--- a/loopy/target/pyopencl.py
+++ b/loopy/target/pyopencl.py
@@ -570,7 +570,7 @@ def generate_value_arg_setup(kernel, devices, implemented_data_info):
                 fp_arg_count += 1
 
             gen(S(
-                "_lpy_knl.set_arg(%d, _lpy_pack('%s', %s))"
+                "_lpy_knl._set_arg_buf(%d, _lpy_pack('%s', %s))"
                 % (cl_arg_idx, idi.dtype.dtype.char, idi.name)))
 
             cl_arg_idx += 1
-- 
GitLab


From 77860f0762f9d5fed189ef082860cec6cc365874 Mon Sep 17 00:00:00 2001
From: Andreas Kloeckner <inform@tiker.net>
Date: Mon, 18 Jan 2021 00:37:38 -0600
Subject: [PATCH 132/140] Drop redundant future import in invoker

---
 loopy/target/execution.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/loopy/target/execution.py b/loopy/target/execution.py
index 74819b939..d22d020b7 100644
--- a/loopy/target/execution.py
+++ b/loopy/target/execution.py
@@ -639,8 +639,6 @@ class ExecutionWrapperGeneratorBase:
                     if issubclass(idi.arg_class, KernelArgument)
                     ])
 
-        gen.add_to_preamble("from __future__ import division")
-        gen.add_to_preamble("")
         self.target_specific_preamble(gen)
         gen.add_to_preamble("")
         self.generate_host_code(gen, codegen_result)
-- 
GitLab


From d8967594c56273d477cca9b381cacb447c1c2cf7 Mon Sep 17 00:00:00 2001
From: Andreas Kloeckner <inform@tiker.net>
Date: Mon, 18 Jan 2021 00:39:08 -0600
Subject: [PATCH 133/140] Globalize imports in pyopencl invoker

---
 loopy/target/pyopencl.py           | 6 +-----
 loopy/target/pyopencl_execution.py | 1 +
 2 files changed, 2 insertions(+), 5 deletions(-)

diff --git a/loopy/target/pyopencl.py b/loopy/target/pyopencl.py
index 2bd72efa8..4bc768b56 100644
--- a/loopy/target/pyopencl.py
+++ b/loopy/target/pyopencl.py
@@ -624,15 +624,11 @@ class PyOpenCLPythonASTBuilder(PythonASTBuilderBase):
                     if not issubclass(idi.arg_class, TemporaryVariable)]
                 + ["wait_for=None", "allocator=None"])
 
-        from genpy import (For, Function, Suite, Import, ImportAs, Return,
-                FromImport, Line, Statement as S)
+        from genpy import (For, Function, Suite, Return, Line, Statement as S)
         return Function(
                 codegen_result.current_program(codegen_state).name,
                 args,
                 Suite([
-                    FromImport("struct", ["pack as _lpy_pack"]),
-                    ImportAs("pyopencl", "_lpy_cl"),
-                    Import("pyopencl.tools"),
                     Line(),
                     ] + [
                     Line(),
diff --git a/loopy/target/pyopencl_execution.py b/loopy/target/pyopencl_execution.py
index 7fc20f191..cdee5600b 100644
--- a/loopy/target/pyopencl_execution.py
+++ b/loopy/target/pyopencl_execution.py
@@ -142,6 +142,7 @@ class PyOpenCLExecutionWrapperGenerator(ExecutionWrapperGeneratorBase):
         gen.add_to_preamble("import pyopencl as _lpy_cl")
         gen.add_to_preamble("import pyopencl.array as _lpy_cl_array")
         gen.add_to_preamble("import pyopencl.tools as _lpy_cl_tools")
+        gen.add_to_preamble("from struct import pack as _lpy_pack")
 
     def initialize_system_args(self, gen):
         """
-- 
GitLab


From ba57b237e4866b2288ccc7a167dfe081f8cf95f3 Mon Sep 17 00:00:00 2001
From: Andreas Kloeckner <inform@tiker.net>
Date: Mon, 18 Jan 2021 00:39:45 -0600
Subject: [PATCH 134/140] Do not emit global temporaries handling in invoker if
 no global temporaries exist

---
 loopy/target/pyopencl.py | 29 ++++++++++++++++-------------
 1 file changed, 16 insertions(+), 13 deletions(-)

diff --git a/loopy/target/pyopencl.py b/loopy/target/pyopencl.py
index 4bc768b56..ed44daa2f 100644
--- a/loopy/target/pyopencl.py
+++ b/loopy/target/pyopencl.py
@@ -634,11 +634,12 @@ class PyOpenCLPythonASTBuilder(PythonASTBuilderBase):
                     Line(),
                     function_body,
                     Line(),
-                    ] + [
-                    For("_tv", "_global_temporaries",
-                        # free global temporaries
-                        S("_tv.release()"))
-                    ] + [
+                    ] + ([
+                        For("_tv", "_global_temporaries",
+                            # free global temporaries
+                            S("_tv.release()"))
+                        ] if self._get_global_temporaries(codegen_state) else []
+                    ) + [
                     Line(),
                     Return("_lpy_evt"),
                     ]))
@@ -648,6 +649,14 @@ class PyOpenCLPythonASTBuilder(PythonASTBuilderBase):
         # no such thing in Python
         return None
 
+    def _get_global_temporaries(self, codegen_state):
+        from loopy.kernel.data import AddressSpace
+
+        return sorted(
+            (tv for tv in codegen_state.kernel.temporary_variables.values()
+            if tv.address_space == AddressSpace.GLOBAL),
+            key=lambda tv: tv.name)
+
     def get_temporary_decls(self, codegen_state, schedule_state):
         from genpy import Assign, Comment, Line
 
@@ -656,18 +665,12 @@ class PyOpenCLPythonASTBuilder(PythonASTBuilderBase):
             from operator import mul
             return tv.dtype.numpy_dtype.itemsize * reduce(mul, tv.shape, 1)
 
-        from loopy.kernel.data import AddressSpace
-
-        global_temporaries = sorted(
-            (tv for tv in codegen_state.kernel.temporary_variables.values()
-            if tv.address_space == AddressSpace.GLOBAL),
-            key=lambda tv: tv.name)
-
         from pymbolic.mapper.stringifier import PREC_NONE
         ecm = self.get_expression_to_code_mapper(codegen_state)
 
+        global_temporaries = self._get_global_temporaries(codegen_state)
         if not global_temporaries:
-            return [Assign("_global_temporaries", "[]"), Line()]
+            return []
 
         return [
             Comment("{{{ allocate global temporaries"),
-- 
GitLab


From f5dcf9d6febbad98c98ec9405f739a56a279e924 Mon Sep 17 00:00:00 2001
From: Andreas Kloeckner <inform@tiker.net>
Date: Mon, 18 Jan 2021 11:54:20 -0600
Subject: [PATCH 135/140] Use positional args in enqueue_nd_range_kernel in
 invoker

---
 loopy/target/pyopencl.py | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/loopy/target/pyopencl.py b/loopy/target/pyopencl.py
index ed44daa2f..8d0c309b0 100644
--- a/loopy/target/pyopencl.py
+++ b/loopy/target/pyopencl.py
@@ -725,8 +725,13 @@ class PyOpenCLPythonASTBuilder(PythonASTBuilderBase):
             arry_arg_code,
             Assign("_lpy_evt", "%(pyopencl_module_name)s.enqueue_nd_range_kernel("
                 "queue, _lpy_knl, "
-                "%(gsize)s, %(lsize)s,  wait_for=wait_for, "
-                "g_times_l=True, allow_empty_ndrange=True)"
+                "%(gsize)s, %(lsize)s, "
+                # using positional args because pybind is slow with kwargs
+                "None, "  # offset
+                "wait_for, "
+                "True, "  # g_times_l
+                "True, "  # allow_empty_ndrange
+                ")"
                 % dict(
                     pyopencl_module_name=self.target.pyopencl_module_name,
                     gsize=ecm(gsize, prec=PREC_NONE, type_context="i"),
-- 
GitLab


From cc47cedc341c036bcbe5ab5079097c89fb6f5b26 Mon Sep 17 00:00:00 2001
From: Andreas Kloeckner <inform@tiker.net>
Date: Tue, 19 Jan 2021 00:45:12 -0600
Subject: [PATCH 136/140] Fix handling of skip_arg_checks in stride value
 finding

---
 loopy/target/execution.py | 26 +++++++++++++-------------
 1 file changed, 13 insertions(+), 13 deletions(-)

diff --git a/loopy/target/execution.py b/loopy/target/execution.py
index d22d020b7..74887155b 100644
--- a/loopy/target/execution.py
+++ b/loopy/target/execution.py
@@ -281,20 +281,20 @@ class ExecutionWrapperGeneratorBase:
                                     'passed array")'
                                     % (arg.name, impl_array_name))
 
-                        base_arg = kernel.impl_arg_to_arg[impl_array_name]
-
-                        if not options.skip_arg_checks:
-                            gen("%s, _lpy_remdr = divmod(%s.strides[%d], %d)"
-                                    % (arg.name, impl_array_name, stride_impl_axis,
-                                        base_arg.dtype.dtype.itemsize))
+                    base_arg = kernel.impl_arg_to_arg[impl_array_name]
 
-                            gen("assert _lpy_remdr == 0, \"Stride %d of array '%s' "
-                                    ' is not divisible by its dtype itemsize"'
-                                    % (stride_impl_axis, impl_array_name))
-                            gen("del _lpy_remdr")
-                        else:
-                            gen("%s = _lpy_offset // %d"
-                                    % (arg.name, base_arg.dtype.itemsize))
+                    if not options.skip_arg_checks:
+                        gen("%s, _lpy_remdr = divmod(%s.strides[%d], %d)"
+                                % (arg.name, impl_array_name, stride_impl_axis,
+                                    base_arg.dtype.dtype.itemsize))
+
+                        gen("assert _lpy_remdr == 0, \"Stride %d of array '%s' "
+                                ' is not divisible by its dtype itemsize"'
+                                % (stride_impl_axis, impl_array_name))
+                        gen("del _lpy_remdr")
+                    else:
+                        gen("%s = _lpy_offset // %d"
+                                % (arg.name, base_arg.dtype.itemsize))
 
         gen("# }}}")
         gen("")
-- 
GitLab


From 5ca67b040f1bcb4c7d35c981c55d6b93ccd71fbb Mon Sep 17 00:00:00 2001
From: Andreas Kloeckner <inform@tiker.net>
Date: Tue, 19 Jan 2021 00:45:48 -0600
Subject: [PATCH 137/140] Determine skip_arg_checks default according to value
 of python -O flag

---
 loopy/options.py | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/loopy/options.py b/loopy/options.py
index 2dc8f22cd..46ff37947 100644
--- a/loopy/options.py
+++ b/loopy/options.py
@@ -98,6 +98,12 @@ class Options(ImmutableRecord):
         Do not do any checking (data type, data layout, shape,
         etc.) on arguments for a minor performance gain.
 
+        .. versionchanged:: 2021.1
+
+            This now defaults to the same value as the ``optimize``
+            sub-flag from :data:`sys.flags`. This flag can be controlled
+            (i.e. set to *True*) by running Python with the ``-O`` flag.
+
     .. attribute:: no_numpy
 
         Do not check for or accept :mod:`numpy` arrays as
@@ -196,6 +202,7 @@ class Options(ImmutableRecord):
         allow_terminal_colors_def = (
                 ALLOW_TERMINAL_COLORS and allow_terminal_colors_def)
 
+        import sys
         ImmutableRecord.__init__(
                 self,
 
@@ -203,7 +210,7 @@ class Options(ImmutableRecord):
                 trace_assignments=kwargs.get("trace_assignments", False),
                 trace_assignment_values=kwargs.get("trace_assignment_values", False),
 
-                skip_arg_checks=kwargs.get("skip_arg_checks", False),
+                skip_arg_checks=kwargs.get("skip_arg_checks", sys.flags.optimize),
                 no_numpy=kwargs.get("no_numpy", False),
                 cl_exec_manage_array_events=kwargs.get("no_numpy", True),
                 return_dict=kwargs.get("return_dict", False),
-- 
GitLab


From d88b1e9fa9bc8c0832060424cb0a3d1390a5692c Mon Sep 17 00:00:00 2001
From: Isuru Fernando <isuruf@gmail.com>
Date: Sat, 23 Jan 2021 20:09:15 -0600
Subject: [PATCH 138/140] Compare insns by id only for list remove

---
 loopy/schedule/__init__.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/loopy/schedule/__init__.py b/loopy/schedule/__init__.py
index fde967c65..3fcdc4bdf 100644
--- a/loopy/schedule/__init__.py
+++ b/loopy/schedule/__init__.py
@@ -1046,7 +1046,8 @@ def generate_loop_schedules_internal(
                 new_insn_ids_to_try = None
 
             new_toposorted_insns = sched_state.insns_in_topologically_sorted_order[:]
-            new_toposorted_insns.remove(insn)
+            new_toposorted_insns = \
+                list(filter(lambda x: x.id != insn.id, new_toposorted_insns))
 
             # }}}
 
-- 
GitLab


From b9cb3a0f1d64a4a9844469c997270c4c73a194f4 Mon Sep 17 00:00:00 2001
From: Isuru Fernando <isuruf@gmail.com>
Date: Sat, 23 Jan 2021 20:55:08 -0600
Subject: [PATCH 139/140] Add a comment and remove unnecessary copy

---
 loopy/schedule/__init__.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/loopy/schedule/__init__.py b/loopy/schedule/__init__.py
index 3fcdc4bdf..bb52430d6 100644
--- a/loopy/schedule/__init__.py
+++ b/loopy/schedule/__init__.py
@@ -1045,9 +1045,10 @@ def generate_loop_schedules_internal(
                     sched_state.active_group_counts.keys()):
                 new_insn_ids_to_try = None
 
-            new_toposorted_insns = sched_state.insns_in_topologically_sorted_order[:]
+            # explicitly use id to compare to avoid performance issues like #199
             new_toposorted_insns = \
-                list(filter(lambda x: x.id != insn.id, new_toposorted_insns))
+                list(filter(lambda x: x.id != insn.id,
+                            sched_state.insns_in_topologically_sorted_order))
 
             # }}}
 
-- 
GitLab


From e35486bcc4aa5d1ddabf2ff9427eb88b09389e35 Mon Sep 17 00:00:00 2001
From: Isuru Fernando <isuruf@gmail.com>
Date: Sun, 24 Jan 2021 13:04:19 -0600
Subject: [PATCH 140/140] Use list comprehension

---
 loopy/schedule/__init__.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/loopy/schedule/__init__.py b/loopy/schedule/__init__.py
index bb52430d6..ccfe0d5ff 100644
--- a/loopy/schedule/__init__.py
+++ b/loopy/schedule/__init__.py
@@ -1046,9 +1046,8 @@ def generate_loop_schedules_internal(
                 new_insn_ids_to_try = None
 
             # explicitly use id to compare to avoid performance issues like #199
-            new_toposorted_insns = \
-                list(filter(lambda x: x.id != insn.id,
-                            sched_state.insns_in_topologically_sorted_order))
+            new_toposorted_insns = [x for x in
+                sched_state.insns_in_topologically_sorted_order if x.id != insn.id]
 
             # }}}
 
-- 
GitLab