diff --git a/loopy/check.py b/loopy/check.py index c31304d87494cbda2a15300b42c6503bceed53d1..38539641a3e6ff9f0d12dd0d352a5206d1bbdfb8 100644 --- a/loopy/check.py +++ b/loopy/check.py @@ -276,6 +276,7 @@ class _AccessCheckMapper(WalkMapper): WalkMapper.map_subscript(self, expr) from pymbolic.primitives import Variable + from pymbolic.mapper.evaluator import UnknownVariableError assert isinstance(expr.aggregate, Variable) shape = None @@ -312,8 +313,32 @@ class _AccessCheckMapper(WalkMapper): expr.aggregate.name, expr, len(subscript), len(shape))) + # apply predicates + access_range = self.domain + insn = self.kernel.id_to_insn[self.insn_id] + possible_warns = [] + if insn.predicates: + from loopy.symbolic import constraints_from_expr + for pred in insn.predicates: + if insn.within_inames & get_dependencies(pred): + with isl.SuppressedWarnings(self.domain.get_ctx()): + try: + constraints = constraints_from_expr( + self.domain.space, pred) + for constraint in constraints: + access_range = access_range.add_constraint( + constraint) + + except isl.Error: + # non-affine predicate - store for warning if we fail + # this check + possible_warns += [pred] + except UnknownVariableError: + # data dependent bounds + pass + try: - access_range = get_access_range(self.domain, subscript, + access_range = get_access_range(access_range, subscript, self.kernel.assumptions) except UnableToDetermineAccessRange: # Likely: index was non-affine, nothing we can do. @@ -332,6 +357,13 @@ class _AccessCheckMapper(WalkMapper): shape_domain = shape_domain.intersect(slab) if not access_range.is_subset(shape_domain): + if possible_warns: + import logging + logger = logging.getLogger(__name__) + logger.info("Predicates: ({}) are are expressed in a " + "non-affine manner, and were not considered " + "for out-of-bounds array checking.".format( + ', '.join(str(x) for x in possible_warns))) raise LoopyError("'%s' in instruction '%s' " "accesses out-of-bounds array element" % (expr, self.insn_id)) diff --git a/loopy/symbolic.py b/loopy/symbolic.py index f4d46854b8dd15c8c1e9a716017ce2724b4db2fc..fbb5701eda41762cc4dc588cf99daa1bdc780b8e 100644 --- a/loopy/symbolic.py +++ b/loopy/symbolic.py @@ -1339,6 +1339,63 @@ class PwAffEvaluationMapper(EvaluationMapperBase, IdentityMapperMixin): return num.mod_val(denom) +class ConditionalMapper(EvaluationMapperBase, IdentityMapperMixin): + def __init__(self, space, vars_to_zero): + self.pw_map = PwAffEvaluationMapper(space, vars_to_zero) + super(ConditionalMapper, self).__init__(self.pw_map.context.copy()) + + def map_logical_not(self, expr): + constraints = self.rec(expr.child) + out = [] + for constraint in constraints: + negated = constraint.get_aff().neg() + if constraint.is_equality(): + out.append(isl.Constraint.equality_from_aff(negated)) + else: + # since we're flipping a >= need to account for the ='s + val = int(str(constraint.get_constant_val())) + if val > 0: + val = 1 + elif val < 0: + val = -1 + out.append(isl.Constraint.inequality_from_aff(negated + val)) + return out + + def map_logical_and(self, expr): + from pymbolic.mapper.evaluator import UnknownVariableError + constraints = [] + for child in expr.children: + try: + constraints += [c for c in self.rec(child)] + except UnknownVariableError: + # the child contained data-dependent conditionals -> can't apply + pass + return constraints + + map_logical_or = map_logical_and + + def map_comparison(self, expr): + left = self.rec(expr.left) + right = self.rec(expr.right) + _, aff = (left - right).get_pieces()[-1] + if expr.operator == "==": + return [isl.Constraint.equality_from_aff(aff)] + elif expr.operator == "!=": + # piecewise + return [isl.Constraint.inequality_from_aff(aff + 1), + isl.Constraint.inequality_from_aff(aff - 1)] + elif expr.operator == "<": + return [isl.Constraint.inequality_from_aff((aff + 1).neg())] + elif expr.operator == "<=": + return [isl.Constraint.inequality_from_aff((aff).neg())] + elif expr.operator == ">": + return [isl.Constraint.inequality_from_aff((aff - 1))] + elif expr.operator == ">=": + return [isl.Constraint.inequality_from_aff((aff))] + else: + raise ValueError("invalid comparison operator") + + def aff_from_expr(space, expr, vars_to_zero=None): if vars_to_zero is None: vars_to_zero = frozenset() @@ -1416,14 +1473,11 @@ def simplify_using_aff(kernel, expr): # }}} -# {{{ expression/set <-> constraint conversion - -def eq_constraint_from_expr(space, expr): - return isl.Constraint.equality_from_aff(aff_from_expr(space, expr)) +# {{{ expression/set <-> constraints conversion - -def ineq_constraint_from_expr(space, expr): - return isl.Constraint.inequality_from_aff(aff_from_expr(space, expr)) +def constraints_from_expr(space, expr): + with isl.SuppressedWarnings(space.get_ctx()): + return ConditionalMapper(space, vars_to_zero=[None])(expr) def constraint_to_cond_expr(cns): diff --git a/test/test_loopy.py b/test/test_loopy.py index 38d1cd6b0e5f2e9ccd64c6ddb41b161040e515e4..9a9d1b9071029ffb62ab9237fc4db33b73892fc6 100644 --- a/test/test_loopy.py +++ b/test/test_loopy.py @@ -2908,6 +2908,107 @@ def test_dep_cycle_printing_and_error(): print(lp.generate_code(knl)[0]) +@pytest.mark.parametrize("op", ['>', '>=', '<', '<=', '==', '!=']) +def test_conditional_access_range(ctx_factory, op): + ctx = ctx_factory() + queue = cl.CommandQueue(ctx) + + def get_condition(): + if op == '>': + return 'not (i > 7)' + elif op == '>=': + return 'not (i >= 8)' + elif op == '<': + return 'i < 8' + elif op == '<=': + return 'i <=7' + elif op == '==': + return ' or '.join(['i == {}'.format(i) for i in range(8)]) + elif op == '!=': + return ' and '.join(['i != {}'.format(i) for i in range(8, 10)]) + + condition = get_condition() + knl = lp.make_kernel( + "{[i]: 0 <= i < 10}", + """ + if {condition} + tmp[i] = tmp[i] + 1 + end + """.format(condition=condition), + [lp.GlobalArg('tmp', shape=(8,), dtype=np.int64)]) + + assert np.array_equal(knl(queue, tmp=np.arange(8))[1][0], np.arange(1, 9)) + + +def test_conditional_access_range_with_parameters(ctx_factory): + ctx = ctx_factory() + queue = cl.CommandQueue(ctx) + + # test that conditional on parameter works, otherwise the tmp[j, i] will show + # as OOB + knl = lp.make_kernel( + ["{[i]: 0 <= i < 10}", + "{[j]: 0 <= j < problem_size + 2}"], + """ + if i < 8 and j < problem_size + tmp[j, i] = tmp[j, i] + 1 + end + """, + [lp.GlobalArg('tmp', shape=('problem_size', 8,), dtype=np.int64), + lp.ValueArg('problem_size', dtype=np.int64)]) + + assert np.array_equal(knl(queue, tmp=np.arange(80).reshape((10, 8)), + problem_size=10)[1][0], np.arange(1, 81).reshape( + (10, 8))) + + # test a conditional that's only _half_ data-dependent to ensure the other + # half works + knl = lp.make_kernel( + ["{[i]: 0 <= i < 10}", + "{[j]: 0 <= j < problem_size}"], + """ + if i < 8 and (j + offset) < problem_size + tmp[j, i] = tmp[j, i] + 1 + end + """, + [lp.GlobalArg('tmp', shape=('problem_size', 8,), dtype=np.int64), + lp.ValueArg('problem_size', dtype=np.int64), + lp.ValueArg('offset', dtype=np.int64)]) + + assert np.array_equal(knl(queue, tmp=np.arange(80).reshape((10, 8)), + problem_size=10, + offset=0)[1][0], np.arange(1, 81).reshape( + (10, 8))) + + +def test_conditional_access_range_failure(ctx_factory): + # predicate doesn't actually limit access_range + knl = lp.make_kernel( + "{[i,j]: 0 <= i,j < 10}", + """ + if j < 8 + tmp[i] = tmp[i] + end + """, [lp.GlobalArg('tmp', shape=(8,), dtype=np.int32)]) + + from loopy.diagnostic import LoopyError + with pytest.raises(LoopyError): + lp.generate_code_v2(knl).device_code() + + # predicate non affine + knl = lp.make_kernel( + "{[i,j]: 0 <= i,j < 10}", + """ + if (i+3)*i < 15 + tmp[i] = tmp[i] + end + """, [lp.GlobalArg('tmp', shape=(2,), dtype=np.int32)]) + + from loopy.diagnostic import LoopyError + with pytest.raises(LoopyError): + lp.generate_code_v2(knl).device_code() + + def test_backwards_dep_printing_and_error(): knl = lp.make_kernel( "{[i]: 0<=i