diff --git a/loopy/isl_helpers.py b/loopy/isl_helpers.py index 3e841ba875480466c45a31e85bf0ea91548c59c4..d67df115474ff6e97796ac44bd5ee8fa17f15e08 100644 --- a/loopy/isl_helpers.py +++ b/loopy/isl_helpers.py @@ -60,29 +60,34 @@ def dump_space(ls): # {{{ make_slab -def make_slab(space, iname, start, stop, step=1): +def make_slab(space, iname, start, stop, iname_multiplier=1): """ Returns an instance of :class:`islpy._isl.BasicSet`, which satisfies the - constraint ``start <= step*iname < stop``. + constraint ``start <= iname_multiplier*iname < stop``. :arg space: An instance of :class:`islpy._isl.Space`. :arg iname: + Either an instance of :class:`str` as a name of the ``iname`` or a tuple of ``(iname_dt, iname_dx)`` indicating the *iname* in the space. :arg start: + An instance of :class:`int` or an instance of :class:`islpy._isl.Aff` indicating the lower bound of - ``step*iname``(inclusive). + ``iname_multiplier*iname``(inclusive). :arg stop: + An instance of :class:`int` or an instance of :class:`islpy._isl.Aff` indicating the upper bound of - ``step*iname``. + ``iname_multiplier*iname``. + + :arg iname_multiplier: - :arg step: - An instance of :class:`int`. + A strictly positive :class:`int` denoting *iname*'s coefficient in the + above inequality expression. """ zero = isl.Aff.zero_on_domain(space) @@ -112,25 +117,16 @@ def make_slab(space, iname, start, stop, step=1): iname_aff = zero.add_coefficient_val(iname_dt, iname_idx, 1) - if step > 0: - result = (isl.BasicSet.universe(space) - # start <= step*iname - .add_constraint(isl.Constraint.inequality_from_aff( - step*iname_aff - start)) - # step*iname < stop - .add_constraint(isl.Constraint.inequality_from_aff( - stop-1 - step*iname_aff))) - elif step < 0: + if iname_multiplier > 0: result = (isl.BasicSet.universe(space) - # start >= (-step)*iname + # start <= iname_multiplier*iname .add_constraint(isl.Constraint.inequality_from_aff( - step*iname_aff + start)) - # (-step)*iname > stop + iname_multiplier*iname_aff - start)) + # iname_multiplier*iname < stop .add_constraint(isl.Constraint.inequality_from_aff( - -stop-1 - step*iname_aff))) + stop-1 - iname_multiplier*iname_aff))) else: - # step = 0 - raise LoopyError("0 step not allowed in make_slab.") + raise LoopyError("iname_multiplier must be strictly positive") return result diff --git a/loopy/kernel/creation.py b/loopy/kernel/creation.py index 4d1e86ca732a62b246310a9eed437407c878b312..b9cf234c6cf116d569f578d5941e66b37ebc8b38 100644 --- a/loopy/kernel/creation.py +++ b/loopy/kernel/creation.py @@ -1899,6 +1899,8 @@ def normalize_slice_params(slice, dimension_length): :arg dimension_length: Length of the axis being sliced. """ from pymbolic.primitives import Slice + from numbers import Integral + assert isinstance(slice, Slice) start, stop, step = slice.start, slice.stop, slice.step @@ -1924,6 +1926,10 @@ def normalize_slice_params(slice, dimension_length): # }}} + if not isinstance(step, Integral): + raise LoopyError("Non-integral step sizes lead to non-affine domains =>" + " not supported") + return start, stop, step @@ -2063,7 +2069,12 @@ class SliceToInameReplacer(IdentityMapper): from loopy.isl_helpers import make_slab for iname, (start, stop, step) in sar_bounds.items(): - iname_set = iname_set & make_slab(space, iname, start, stop, step) + if step > 0: + iname_set = iname_set & make_slab(space, iname, 0, + stop-start, step) + else: + iname_set = iname_set & make_slab(space, iname, 0, + start-stop, -step) subarray_ref_domains.append(iname_set) diff --git a/test/test_callables.py b/test/test_callables.py index 4bc37aeba7c53ef68a95e84cb681da1216b2ef03..c19c7f1d058b55dba927fed33d44e6b54320fc03 100644 --- a/test/test_callables.py +++ b/test/test_callables.py @@ -855,6 +855,57 @@ def test_kc_with_floor_div_in_expr(ctx_factory, inline): lp.auto_test_vs_ref(knl, ctx, knl) +@pytest.mark.parametrize("start", [5, 6, 7]) +@pytest.mark.parametrize("inline", [True, False]) +def test_non1_step_slices(ctx_factory, start, inline): + # See https://github.com/inducer/loopy/pull/222#discussion_r645905188 + + ctx = ctx_factory() + cq = cl.CommandQueue(ctx) + + callee = lp.make_function( + "{[i]: 0<=i<n}", + """ + y[i] = i**2 + """, + [lp.ValueArg("n"), ...], + name="squared_arange") + + t_unit = lp.make_kernel( + "{[i_init, j_init]: 0<=i_init, j_init<40}", + f""" + X[i_init] = 42 + X[{start}:40:3] = squared_arange({len(range(start, 40, 3))}) + + Y[j_init] = 1729 + Y[39:{start}:-3] = squared_arange({len(range(39, start, -3))}) + """, + [lp.GlobalArg("X,Y", shape=40)], + seq_dependencies=True) + + expected_out1 = 42*np.ones(40, dtype=np.int64) + expected_out1[start:40:3] = np.arange(len(range(start, 40, 3)))**2 + + expected_out2 = 1729*np.ones(40, dtype=np.int64) + expected_out2[39:start:-3] = np.arange(len(range(39, start, -3)))**2 + + t_unit = lp.merge([t_unit, callee]) + + t_unit = lp.set_options(t_unit, "return_dict") + + # check_bounds isn't smart enough to pass assumptions from caller to callee + # during check_bounds + t_unit = lp.set_options(t_unit, enforce_array_accesses_within_bounds="no_check") + + if inline: + t_unit = lp.inline_callable_kernel(t_unit, "squared_arange") + + evt, out_dict = t_unit(cq) + + np.testing.assert_allclose(out_dict["X"].get(), expected_out1) + np.testing.assert_allclose(out_dict["Y"].get(), expected_out2) + + if __name__ == "__main__": if len(sys.argv) > 1: exec(sys.argv[1])