From 2410fe2cf5c3ec621821618ae4fcb888253681c3 Mon Sep 17 00:00:00 2001
From: Andreas Kloeckner <inform@tiker.net>
Date: Mon, 18 Jan 2016 13:44:23 -0600
Subject: [PATCH] Implement chunk_iname

---
 doc/reference.rst        |   2 +
 loopy/__init__.py        |   5 +-
 loopy/transform/iname.py | 152 ++++++++++++++++++++++++++++++++++++---
 test/test_loopy.py       |  18 +++++
 4 files changed, 166 insertions(+), 11 deletions(-)

diff --git a/doc/reference.rst b/doc/reference.rst
index 5499f9255..839851d17 100644
--- a/doc/reference.rst
+++ b/doc/reference.rst
@@ -418,6 +418,8 @@ Wrangling inames
 
 .. autofunction:: split_iname
 
+.. autofunction:: chunk_iname
+
 .. autofunction:: join_inames
 
 .. autofunction:: tag_inames
diff --git a/loopy/__init__.py b/loopy/__init__.py
index 79ea1ee95..fce380f8a 100644
--- a/loopy/__init__.py
+++ b/loopy/__init__.py
@@ -55,7 +55,7 @@ from loopy.library.reduction import register_reduction_parser
 
 from loopy.transform.iname import (
         assume, set_loop_priority,
-        split_iname, join_inames, tag_inames, duplicate_inames,
+        split_iname, chunk_iname, join_inames, tag_inames, duplicate_inames,
         rename_iname, link_inames, remove_unused_inames,
         affine_map_inames)
 
@@ -127,7 +127,8 @@ __all__ = [
         # {{{ transforms
 
         "assume", "set_loop_priority",
-        "split_iname", "join_inames", "tag_inames", "duplicate_inames",
+        "split_iname", "chunk_iname", "join_inames", "tag_inames",
+        "duplicate_inames",
         "rename_iname", "link_inames", "remove_unused_inames",
         "affine_map_inames",
 
diff --git a/loopy/transform/iname.py b/loopy/transform/iname.py
index d92a30615..8927098cf 100644
--- a/loopy/transform/iname.py
+++ b/loopy/transform/iname.py
@@ -76,7 +76,9 @@ def set_loop_priority(kernel, loop_priority):
 # }}}
 
 
-# {{{ split inames
+# {{{ split/chunk inames
+
+# {{{ backend
 
 class _InameSplitter(RuleAwareIdentityMapper):
     def __init__(self, rule_mapping_context, within,
@@ -120,7 +122,9 @@ class _InameSplitter(RuleAwareIdentityMapper):
             return super(_InameSplitter, self).map_variable(expr, expn_state)
 
 
-def split_iname(kernel, split_iname, inner_length,
+def _split_iname_backend(kernel, split_iname,
+        fixed_length, fixed_length_is_inner,
+        make_new_loop_index,
         outer_iname=None, inner_iname=None,
         outer_tag=None, inner_tag=None,
         slabs=(0, 0), do_tagged_check=True,
@@ -166,18 +170,23 @@ def split_iname(kernel, split_iname, inner_length,
 
         from loopy.isl_helpers import make_slab
 
+        if fixed_length_is_inner:
+            fixed_iname, var_length_iname = inner_iname, outer_iname
+        else:
+            fixed_iname, var_length_iname = outer_iname, inner_iname
+
         space = s.get_space()
-        inner_constraint_set = (
-                make_slab(space, inner_iname, 0, inner_length)
-                # name = inner + length*outer
+        fixed_constraint_set = (
+                make_slab(space, fixed_iname, 0, fixed_length)
+                # name = fixed_iname + fixed_length*var_length_iname
                 .add_constraint(isl.Constraint.eq_from_names(
                     space, {
                         split_iname: 1,
-                        inner_iname: -1,
-                        outer_iname: -inner_length})))
+                        fixed_iname: -1,
+                        var_length_iname: -fixed_length})))
 
         name_dim_type, name_idx = space.get_var_dict()[split_iname]
-        s = s.intersect(inner_constraint_set)
+        s = s.intersect(fixed_constraint_set)
 
         if within is None:
             s = s.project_out(name_dim_type, name_idx, 1)
@@ -189,7 +198,7 @@ def split_iname(kernel, split_iname, inner_length,
     from pymbolic import var
     inner = var(inner_iname)
     outer = var(outer_iname)
-    new_loop_index = inner + outer*inner_length
+    new_loop_index = make_new_loop_index(inner, outer)
 
     subst_map = {var(split_iname): new_loop_index}
     applied_iname_rewrites.append(subst_map)
@@ -251,6 +260,131 @@ def split_iname(kernel, split_iname, inner_length,
 # }}}
 
 
+# {{{ split iname
+
+def split_iname(kernel, split_iname, inner_length,
+        outer_iname=None, inner_iname=None,
+        outer_tag=None, inner_tag=None,
+        slabs=(0, 0), do_tagged_check=True,
+        within=None):
+    """Split *split_iname* into two inames (an 'inner' one and an 'outer' one)
+    so that ``split_iname == inner + outer*inner_length`` and *inner* is of
+    fixed length *inner_length*.
+
+    :arg within: a stack match as understood by
+        :func:`loopy.context_matching.parse_stack_match`.
+    """
+    def make_new_loop_index(inner, outer):
+        return inner + outer*inner_length
+
+    return _split_iname_backend(kernel, split_iname,
+            fixed_length=inner_length, fixed_length_is_inner=True,
+            make_new_loop_index=make_new_loop_index,
+            outer_iname=outer_iname, inner_iname=inner_iname,
+            outer_tag=outer_tag, inner_tag=inner_tag,
+            slabs=slabs, do_tagged_check=do_tagged_check,
+            within=within)
+
+# }}}
+
+
+# {{{ chunk iname
+
+def chunk_iname(kernel, split_iname, num_chunks,
+        outer_iname=None, inner_iname=None,
+        outer_tag=None, inner_tag=None,
+        slabs=(0, 0), do_tagged_check=True,
+        within=None):
+    """
+    Split *split_iname* into two inames (an 'inner' one and an 'outer' one)
+    so that ``split_iname == inner + outer*chunk_length`` and *outer* is of
+    fixed length *num_chunks*.
+
+    :arg within: a stack match as understood by
+        :func:`loopy.context_matching.parse_stack_match`.
+
+    .. versionadded:: 2016.2
+    """
+
+    size = kernel.get_iname_bounds(split_iname).size
+    k0 = isl.Aff.zero_on_domain(size.domain().space)
+    chunk_ceil = size.div(k0+num_chunks).ceil()
+    chunk_floor = size.div(k0+num_chunks).floor()
+    chunk_diff = chunk_ceil - chunk_floor
+    chunk_mod = size.mod_val(num_chunks)
+
+    from loopy.symbolic import pw_aff_to_expr
+    from pymbolic.primitives import Min
+
+    def make_new_loop_index(inner, outer):
+        # These two expressions are equivalent. Benchmarking between the
+        # two was inconclusive, although one is shorter.
+
+        if 0:
+            # Triggers isl issues in check pass.
+            return (
+                    inner +
+                    pw_aff_to_expr(chunk_floor) * outer
+                    +
+                    pw_aff_to_expr(chunk_diff) * Min(
+                        (outer, pw_aff_to_expr(chunk_mod))))
+        else:
+            return (
+                    inner +
+                    pw_aff_to_expr(chunk_ceil) * Min(
+                        (outer, pw_aff_to_expr(chunk_mod)))
+                    +
+                    pw_aff_to_expr(chunk_floor) * (
+                        outer - Min((outer, pw_aff_to_expr(chunk_mod)))))
+
+    # {{{ check that iname is a box iname
+
+    # Since the linearization used in the constraint used to map the domain
+    # does not match the linearization in make_new_loop_index, we can't really
+    # tolerate if the iname in question has constraints that make it non-boxy,
+    # since these sub-indices would end up in the wrong spot.
+
+    for dom in kernel.domains:
+        var_dict = dom.get_var_dict()
+        if split_iname not in var_dict:
+            continue
+
+        dt, idx = var_dict[split_iname]
+        assert dt == dim_type.set
+
+        aff_zero = isl.Aff.zero_on_domain(dom.space)
+        aff_split_iname = aff_zero.set_coefficient_val(dim_type.in_, idx, 1)
+        aligned_size = isl.align_spaces(size, aff_zero)
+        box_dom = (
+                dom
+                .eliminate(dt, idx, 1)
+                & aff_zero.le_set(aff_split_iname)
+                & aff_split_iname.lt_set(aligned_size)
+                )
+
+        if not (
+                box_dom <= dom
+                and
+                dom <= box_dom):
+            raise LoopyError("domain '%s' is not box-shape about iname "
+                    "'%s', cannot use chunk_iname()"
+                    % (dom, split_iname))
+
+    # }}}
+
+    return _split_iname_backend(kernel, split_iname,
+            fixed_length=num_chunks, fixed_length_is_inner=False,
+            make_new_loop_index=make_new_loop_index,
+            outer_iname=outer_iname, inner_iname=inner_iname,
+            outer_tag=outer_tag, inner_tag=inner_tag,
+            slabs=slabs, do_tagged_check=do_tagged_check,
+            within=within)
+
+# }}}
+
+# }}}
+
+
 # {{{ join inames
 
 class _InameJoiner(RuleAwareSubstitutionMapper):
diff --git a/test/test_loopy.py b/test/test_loopy.py
index aa1f7b09a..a23782c3c 100644
--- a/test/test_loopy.py
+++ b/test/test_loopy.py
@@ -2357,6 +2357,24 @@ def test_cuda_target():
                     lp.preprocess_kernel(knl)))[0])
 
 
+def test_chunk_iname(ctx_factory):
+    ctx = ctx_factory()
+
+    knl = lp.make_kernel(
+            "{ [i]: 0<=i<n }",
+            "out[i] = 2*a[i]",
+            [
+                lp.GlobalArg("out,a", np.float32, shape=lp.auto),
+                "..."
+                ],
+            assumptions="n>0")
+
+    ref_knl = knl
+    knl = lp.chunk_iname(knl, "i", 3, inner_tag="l.0")
+    knl = lp.set_loop_priority(knl, "i_outer, i_inner")
+    lp.auto_test_vs_ref(ref_knl, ctx, knl, parameters=dict(n=130))
+
+
 if __name__ == "__main__":
     if len(sys.argv) > 1:
         exec(sys.argv[1])
-- 
GitLab