diff --git a/doc/ref_kernel.rst b/doc/ref_kernel.rst index ff27d84f55ea1b8b4222f18ba271723f450b1a42..33d40385b529f72e54da65238304e87bdb2cddab 100644 --- a/doc/ref_kernel.rst +++ b/doc/ref_kernel.rst @@ -270,9 +270,10 @@ Expressions Loopy's expressions are a slight superset of the expressions supported by :mod:`pymbolic`. -* `if` -* `reductions` +* ``if`` +* ``reductions`` * duplication of reduction inames + * ``reduce`` vs ``simul_reduce`` * complex-valued arithmetic * tagging of array access and substitution rule use ("$") * ``indexof``, ``indexof_vec`` diff --git a/loopy/preprocess.py b/loopy/preprocess.py index fe88118e018d5829e9ff2104b70940a39cb95ade..1aca332ff973c672a9c8c3631961f232d4d24f56 100644 --- a/loopy/preprocess.py +++ b/loopy/preprocess.py @@ -66,6 +66,46 @@ def prepare_for_caching(kernel): # }}} +# {{{ check reduction iname uniqueness + +def check_reduction_iname_uniqueness(kernel): + iname_to_reduction_count = {} + iname_to_nonsimultaneous_reduction_count = {} + + def map_reduction(expr, rec): + rec(expr.expr) + for iname in expr.inames: + iname_to_reduction_count[iname] = ( + iname_to_reduction_count.get(iname, 0) + 1) + if not expr.allow_simultaneous: + iname_to_nonsimultaneous_reduction_count[iname] = ( + iname_to_nonsimultaneous_reduction_count.get(iname, 0) + 1) + + return expr + + from loopy.symbolic import ReductionCallbackMapper + cb_mapper = ReductionCallbackMapper(map_reduction) + + for insn in kernel.instructions: + insn.with_transformed_expressions(cb_mapper) + + print(iname_to_reduction_count) + print(iname_to_nonsimultaneous_reduction_count) + for iname, count in six.iteritems(iname_to_reduction_count): + nonsimul_count = iname_to_nonsimultaneous_reduction_count.get(iname, 0) + + if nonsimul_count and count > 1: + raise LoopyError("iname '%s' used in more than one reduction. " + "(%d of them, to be precise.) " + "Since this usage can easily cause loop scheduling " + "problems, this is prohibited by default. " + "If you are sure that this is OK, write the reduction " + "as 'simul_reduce(...)' instead of 'reduce(...)'" + % (iname, count)) + +# }}} + + # {{{ infer types def _infer_var_type(kernel, var_name, type_inf_mapper, subst_expander): @@ -677,11 +717,13 @@ def preprocess_kernel(kernel, device=None): kernel = expand_subst(kernel) # Ordering restriction: - # Type inference doesn't handle substitutions. Get them out of the - # way. + # Type inference and reduction iname uniqueness don't handle substitutions. + # Get them out of the way. kernel = infer_unknown_types(kernel, expect_completion=False) + check_reduction_iname_uniqueness(kernel) + kernel = add_default_dependencies(kernel) # Ordering restrictions: diff --git a/loopy/symbolic.py b/loopy/symbolic.py index 7adab80c68c38f900976eb1adcd90226f40a7d9b..64e6384bc1231bf7e92c1951bd02c1c6b37c0256 100644 --- a/loopy/symbolic.py +++ b/loopy/symbolic.py @@ -79,7 +79,8 @@ class IdentityMapperMixin(object): return expr def map_reduction(self, expr, *args): - return Reduction(expr.operation, expr.inames, self.rec(expr.expr, *args)) + return Reduction(expr.operation, expr.inames, self.rec(expr.expr, *args), + allow_simultaneous=expr.allow_simultaneous) def map_tagged_variable(self, expr, *args): # leaf, doesn't change @@ -146,7 +147,8 @@ class StringifyMapper(StringifyMapperBase): return "loc.%d" % expr.index def map_reduction(self, expr, prec): - return "reduce(%s, [%s], %s)" % ( + return "%sreduce(%s, [%s], %s)" % ( + "simul_" if expr.allow_simultaneous else "", expr.operation, ", ".join(expr.inames), expr.expr) def map_tagged_variable(self, expr, prec): @@ -346,11 +348,16 @@ class Reduction(AlgebraicLeaf): The expression (as a :class:`pymbolic.primitives.Expression`) on which reduction is performed. + + .. attribute:: allow_simultaneous + + A :class:`bool`. If not *True*, an iname is allowed to be used + in precisely one reduction, to avoid mis-nesting errors. """ - init_arg_names = ("operation", "inames", "expr") + init_arg_names = ("operation", "inames", "expr", "allow_simultaneous") - def __init__(self, operation, inames, expr): + def __init__(self, operation, inames, expr, allow_simultaneous=False): if isinstance(inames, str): inames = tuple(iname.strip() for iname in inames.split(",")) @@ -378,9 +385,10 @@ class Reduction(AlgebraicLeaf): self.operation = operation self.inames = inames self.expr = expr + self.allow_simultaneous = allow_simultaneous def __getinitargs__(self): - return (self.operation, self.inames, self.expr) + return (self.operation, self.inames, self.expr, self.allow_simultaneous) def get_hash(self): return hash((self.__class__, self.operation, self.inames, @@ -779,7 +787,8 @@ class FunctionToPrimitiveMapper(IdentityMapper): turns those into the actual pymbolic primitives used for that. """ - def _parse_reduction(self, operation, inames, red_expr): + def _parse_reduction(self, operation, inames, red_expr, + allow_simultaneous=False): if isinstance(inames, Variable): inames = (inames,) @@ -795,7 +804,9 @@ class FunctionToPrimitiveMapper(IdentityMapper): processed_inames.append(iname.name) - return Reduction(operation, tuple(processed_inames), red_expr) + print(allow_simultaneous) + return Reduction(operation, tuple(processed_inames), red_expr, + allow_simultaneous=allow_simultaneous) def map_call(self, expr): from loopy.library.reduction import parse_reduction_op @@ -820,7 +831,7 @@ class FunctionToPrimitiveMapper(IdentityMapper): else: raise TypeError("cse takes two arguments") - elif name == "reduce": + elif name in ["reduce", "simul_reduce"]: if len(expr.parameters) == 3: operation, inames, red_expr = expr.parameters @@ -829,7 +840,8 @@ class FunctionToPrimitiveMapper(IdentityMapper): "must be a symbol") operation = parse_reduction_op(operation.name) - return self._parse_reduction(operation, inames, self.rec(red_expr)) + return self._parse_reduction(operation, inames, self.rec(red_expr), + allow_simultaneous=(name == "simul_reduce")) else: raise TypeError("invalid 'reduce' calling sequence") diff --git a/loopy/transform/iname.py b/loopy/transform/iname.py index c98ed26b41d69189edc8253449b226524a365367..9c882b98dc40595f84ca93189302a25c92e8233a 100644 --- a/loopy/transform/iname.py +++ b/loopy/transform/iname.py @@ -119,7 +119,8 @@ class _InameSplitter(RuleAwareIdentityMapper): from loopy.symbolic import Reduction return Reduction(expr.operation, tuple(new_inames), - self.rec(expr.expr, expn_state)) + self.rec(expr.expr, expn_state), + expr.allow_simultaneous) else: return super(_InameSplitter, self).map_reduction(expr, expn_state) @@ -444,7 +445,8 @@ class _InameJoiner(RuleAwareSubstitutionMapper): from loopy.symbolic import Reduction return Reduction(expr.operation, tuple(new_inames), - self.rec(expr.expr, expn_state)) + self.rec(expr.expr, expn_state), + expr.allow_simultaneous) else: return super(_InameJoiner, self).map_reduction(expr, expn_state) @@ -676,7 +678,8 @@ class _InameDuplicator(RuleAwareIdentityMapper): from loopy.symbolic import Reduction return Reduction(expr.operation, new_inames, - self.rec(expr.expr, expn_state)) + self.rec(expr.expr, expn_state), + expr.allow_simultaneous) else: return super(_InameDuplicator, self).map_reduction(expr, expn_state) @@ -1074,11 +1077,14 @@ class _ReductionSplitter(RuleAwareIdentityMapper): if self.direction == "in": return Reduction(expr.operation, tuple(leftover_inames), Reduction(expr.operation, tuple(self.inames), - self.rec(expr.expr, expn_state))) + self.rec(expr.expr, expn_state), + expr.allow_simultaneous), + expr.allow_simultaneous) elif self.direction == "out": return Reduction(expr.operation, tuple(self.inames), Reduction(expr.operation, tuple(leftover_inames), - self.rec(expr.expr, expn_state))) + self.rec(expr.expr, expn_state), + expr.allow_simultaneous)) else: assert False else: diff --git a/loopy/version.py b/loopy/version.py index 9ad8ac19bebff7a712e91900815057155205ae57..adc069663503b200bcdd1638c05ae0ffae5f14df 100644 --- a/loopy/version.py +++ b/loopy/version.py @@ -32,4 +32,4 @@ except ImportError: else: _islpy_version = islpy.version.VERSION_TEXT -DATA_MODEL_VERSION = "v18-islpy%s" % _islpy_version +DATA_MODEL_VERSION = "v19-islpy%s" % _islpy_version