From 969ce8ddb707ed778f535a72a96ee7b48772f95c Mon Sep 17 00:00:00 2001 From: Matt Wala Date: Fri, 7 Apr 2017 15:39:28 -0500 Subject: [PATCH 01/15] Add support for tuple-typed reductions. * Eliminates inames arguments to reduction functions (closes #32). * Changes the argmax function to accept two arguments. See also: #62 --- loopy/kernel/instruction.py | 6 +- loopy/library/reduction.py | 246 +++++++++++++++++++++++++++++++----- loopy/preprocess.py | 92 +++++++------- loopy/symbolic.py | 110 +++++++++++----- loopy/transform/data.py | 22 +++- loopy/transform/iname.py | 29 ++++- loopy/type_inference.py | 62 +++++++-- test/test_reduction.py | 18 ++- 8 files changed, 443 insertions(+), 142 deletions(-) diff --git a/loopy/kernel/instruction.py b/loopy/kernel/instruction.py index fdd8f1d37..752e3e4da 100644 --- a/loopy/kernel/instruction.py +++ b/loopy/kernel/instruction.py @@ -658,7 +658,11 @@ class MultiAssignmentBase(InstructionBase): @memoize_method def reduction_inames(self): def map_reduction(expr, rec): - rec(expr.expr) + if expr.is_plain_tuple: + for sub_expr in expr.exprs: + rec(sub_expr) + else: + rec(expr.exprs) for iname in expr.inames: result.add(iname) diff --git a/loopy/library/reduction.py b/loopy/library/reduction.py index d24b61c12..e3c7e6099 100644 --- a/loopy/library/reduction.py +++ b/loopy/library/reduction.py @@ -36,15 +36,19 @@ class ReductionOperation(object): equality-comparable. """ - def result_dtypes(self, target, arg_dtype, inames): + def result_dtypes(self, target, *arg_dtypes): """ - :arg arg_dtype: may be None if not known + :arg arg_dtypes: may be None if not known :returns: None if not known, otherwise the returned type """ raise NotImplementedError - def neutral_element(self, dtype, inames): + @property + def arg_count(self): + raise NotImplementedError + + def neutral_element(self, *dtypes): raise NotImplementedError def __hash__(self): @@ -55,12 +59,16 @@ class ReductionOperation(object): # Force subclasses to override raise NotImplementedError - def __call__(self, dtype, operand1, operand2, inames): + def __call__(self, dtype, operand1, operand2): raise NotImplementedError def __ne__(self, other): return not self.__eq__(other) + @property + def is_segmented(self): + raise NotImplementedError + @staticmethod def parse_result_type(target, op_type): try: @@ -87,7 +95,11 @@ class ScalarReductionOperation(ReductionOperation): """ self.forced_result_type = forced_result_type - def result_dtypes(self, kernel, arg_dtype, inames): + @property + def arg_count(self): + return 1 + + def result_dtypes(self, kernel, arg_dtype): if self.forced_result_type is not None: return (self.parse_result_type( kernel.target, self.forced_result_type),) @@ -114,18 +126,18 @@ class ScalarReductionOperation(ReductionOperation): class SumReductionOperation(ScalarReductionOperation): - def neutral_element(self, dtype, inames): + def neutral_element(self, dtype): return 0 - def __call__(self, dtype, operand1, operand2, inames): + def __call__(self, dtype, operand1, operand2): return operand1 + operand2 class ProductReductionOperation(ScalarReductionOperation): - def neutral_element(self, dtype, inames): + def neutral_element(self, dtype): return 1 - def __call__(self, dtype, operand1, operand2, inames): + def __call__(self, dtype, operand1, operand2): return operand1 * operand2 @@ -166,32 +178,144 @@ def get_ge_neutral(dtype): class MaxReductionOperation(ScalarReductionOperation): - def neutral_element(self, dtype, inames): + def neutral_element(self, dtype): return get_ge_neutral(dtype) - def __call__(self, dtype, operand1, operand2, inames): + def __call__(self, dtype, operand1, operand2): return var("max")(operand1, operand2) class MinReductionOperation(ScalarReductionOperation): - def neutral_element(self, dtype, inames): + def neutral_element(self, dtype): return get_le_neutral(dtype) - def __call__(self, dtype, operand1, operand2, inames): + def __call__(self, dtype, operand1, operand2): return var("min")(operand1, operand2) +# {{{ segmented reduction + +class _SegmentedScalarReductionOperation(ReductionOperation): + def __init__(self, **kwargs): + self.inner_reduction = self.base_reduction_class(**kwargs) + + @property + def arg_count(self): + return 2 + + def prefix(self, scalar_dtype, segment_flag_dtype): + return "loopy_segmented_%s_%s_%s" % (self.which, + scalar_dtype.numpy_dtype.type.__name__, + segment_flag_dtype.numpy_dtype.type.__name__) + + def neutral_element(self, scalar_dtype, segment_flag_dtype): + return SegmentedFunction(self, (scalar_dtype, segment_flag_dtype), "init")() + + def result_dtypes(self, kernel, scalar_dtype, segment_flag_dtype): + return (self.inner_reduction.result_dtypes(kernel, scalar_dtype) + + (segment_flag_dtype,)) + + def __str__(self): + return "segmented(%s)" % self.which + + def __hash__(self): + return hash(type(self)) + + def __eq__(self, other): + return type(self) == type(other) + + def __call__(self, dtypes, operand1, operand2): + return SegmentedFunction(self, dtypes, "update")(*(operand1 + operand2)) + + +class SegmentedSumReductionOperation(_SegmentedScalarReductionOperation): + base_reduction_class = SumReductionOperation + which = "sum" + op = "((%s) + (%s))" + + +class SegmentedProductReductionOperation(_SegmentedScalarReductionOperation): + base_reduction_class = ProductReductionOperation + op = "((%s) * (%s))" + which = "product" + + +class SegmentedFunction(FunctionIdentifier): + init_arg_names = ("reduction_op", "dtypes", "name") + + def __init__(self, reduction_op, dtypes, name): + """ + :arg dtypes: A :class:`tuple` of `(scalar_dtype, segment_flag_dtype)` + """ + self.reduction_op = reduction_op + self.dtypes = dtypes + self.name = name + + @property + def scalar_dtype(self): + return self.dtypes[0] + + @property + def segment_flag_dtype(self): + return self.dtypes[1] + + def __getinitargs__(self): + return (self.reduction_op, self.dtypes, self.name) + + +def get_segmented_function_preamble(kernel, func_id): + op = func_id.reduction_op + prefix = op.prefix(func_id.scalar_dtype, func_id.segment_flag_dtype) + + from pymbolic.mapper.c_code import CCodeMapper + + c_code_mapper = CCodeMapper() + + return (prefix, """ + inline %(scalar_t)s %(prefix)s_init(%(segment_flag_t)s *segment_flag_out) + { + *segment_flag_out = 0; + return %(neutral)s; + } + + inline %(scalar_t)s %(prefix)s_update( + %(scalar_t)s op1, %(segment_flag_t)s segment_flag1, + %(scalar_t)s op2, %(segment_flag_t)s segment_flag2, + %(segment_flag_t)s *segment_flag_out) + { + *segment_flag_out = segment_flag1 | segment_flag2; + return segment_flag2 ? op2 : %(combined)s; + } + """ % dict( + scalar_t=kernel.target.dtype_to_typename(func_id.scalar_dtype), + prefix=prefix, + segment_flag_t=kernel.target.dtype_to_typename( + func_id.segment_flag_dtype), + neutral=c_code_mapper( + op.inner_reduction.neutral_element(func_id.scalar_dtype)), + combined=op.op % ("op1", "op2"), + )) + + +# }}} + + # {{{ argmin/argmax class _ArgExtremumReductionOperation(ReductionOperation): - def prefix(self, dtype): - return "loopy_arg%s_%s" % (self.which, dtype.numpy_dtype.type.__name__) + def prefix(self, scalar_dtype, index_dtype): + return "loopy_arg%s_%s_%s" % (self.which, + index_dtype.numpy_dtype.type.__name__, + scalar_dtype.numpy_dtype.type.__name__) + + def result_dtypes(self, kernel, scalar_dtype, index_dtype): + return (scalar_dtype, index_dtype) - def result_dtypes(self, kernel, dtype, inames): - return (dtype, kernel.index_dtype) + def neutral_element(self, scalar_dtype, index_dtype): + return ArgExtFunction(self, (scalar_dtype, index_dtype), "init")() - def neutral_element(self, dtype, inames): - return ArgExtFunction(self, dtype, "init", inames)() + def __str__(self): + return self.which def __hash__(self): return hash(type(self)) @@ -199,11 +323,12 @@ class _ArgExtremumReductionOperation(ReductionOperation): def __eq__(self, other): return type(self) == type(other) - def __call__(self, dtype, operand1, operand2, inames): - iname, = inames + @property + def arg_count(self): + return 2 - return ArgExtFunction(self, dtype, "update", inames)( - *(operand1 + (operand2, var(iname)))) + def __call__(self, dtypes, operand1, operand2): + return ArgExtFunction(self, dtypes, "update")(*(operand1 + operand2)) class ArgMaxReductionOperation(_ArgExtremumReductionOperation): @@ -219,21 +344,28 @@ class ArgMinReductionOperation(_ArgExtremumReductionOperation): class ArgExtFunction(FunctionIdentifier): - init_arg_names = ("reduction_op", "scalar_dtype", "name", "inames") + init_arg_names = ("reduction_op", "dtypes", "name") - def __init__(self, reduction_op, scalar_dtype, name, inames): + def __init__(self, reduction_op, dtypes, name): self.reduction_op = reduction_op - self.scalar_dtype = scalar_dtype + self.dtypes = dtypes self.name = name - self.inames = inames + + @property + def scalar_dtype(self): + return self.dtypes[0] + + @property + def index_dtype(self): + return self.dtypes[1] def __getinitargs__(self): - return (self.reduction_op, self.scalar_dtype, self.name, self.inames) + return (self.reduction_op, self.dtypes, self.name) def get_argext_preamble(kernel, func_id): op = func_id.reduction_op - prefix = op.prefix(func_id.scalar_dtype) + prefix = op.prefix(func_id.scalar_dtype, func_id.index_dtype) from pymbolic.mapper.c_code import CCodeMapper @@ -267,7 +399,7 @@ def get_argext_preamble(kernel, func_id): """ % dict( scalar_t=kernel.target.dtype_to_typename(func_id.scalar_dtype), prefix=prefix, - index_t=kernel.target.dtype_to_typename(kernel.index_dtype), + index_t=kernel.target.dtype_to_typename(func_id.index_dtype), neutral=c_code_mapper(neutral(func_id.scalar_dtype)), comp=op.update_comparison, )) @@ -284,6 +416,8 @@ _REDUCTION_OPS = { "min": MinReductionOperation, "argmax": ArgMaxReductionOperation, "argmin": ArgMinReductionOperation, + "segmented(sum)": SegmentedSumReductionOperation, + "segmented(product)": SegmentedProductReductionOperation, } _REDUCTION_OP_PARSERS = [ @@ -333,9 +467,10 @@ def reduction_function_mangler(kernel, func_id, arg_dtypes): from loopy.kernel.data import CallMangleInfo return CallMangleInfo( - target_name="%s_init" % op.prefix(func_id.scalar_dtype), + target_name="%s_init" % op.prefix( + func_id.scalar_dtype, func_id.index_dtype), result_dtypes=op.result_dtypes( - kernel, func_id.scalar_dtype, func_id.inames), + kernel, func_id.scalar_dtype, func_id.index_dtype), arg_dtypes=(), ) @@ -348,9 +483,10 @@ def reduction_function_mangler(kernel, func_id, arg_dtypes): from loopy.kernel.data import CallMangleInfo return CallMangleInfo( - target_name="%s_update" % op.prefix(func_id.scalar_dtype), + target_name="%s_update" % op.prefix( + func_id.scalar_dtype, func_id.index_dtype), result_dtypes=op.result_dtypes( - kernel, func_id.scalar_dtype, func_id.inames), + kernel, func_id.scalar_dtype, func_id.index_dtype), arg_dtypes=( func_id.scalar_dtype, kernel.index_dtype, @@ -358,6 +494,42 @@ def reduction_function_mangler(kernel, func_id, arg_dtypes): kernel.index_dtype), ) + elif isinstance(func_id, SegmentedFunction) and func_id.name == "init": + from loopy.target.opencl import OpenCLTarget + if not isinstance(kernel.target, OpenCLTarget): + raise LoopyError("only OpenCL supported for now") + + op = func_id.reduction_op + + from loopy.kernel.data import CallMangleInfo + return CallMangleInfo( + target_name="%s_init" % op.prefix( + func_id.scalar_dtype, func_id.segment_flag_dtype), + result_dtypes=op.result_dtypes( + kernel, func_id.scalar_dtype, func_id.segment_flag_dtype), + arg_dtypes=(), + ) + + elif isinstance(func_id, SegmentedFunction) and func_id.name == "update": + from loopy.target.opencl import OpenCLTarget + if not isinstance(kernel.target, OpenCLTarget): + raise LoopyError("only OpenCL supported for now") + + op = func_id.reduction_op + + from loopy.kernel.data import CallMangleInfo + return CallMangleInfo( + target_name="%s_update" % op.prefix( + func_id.scalar_dtype, func_id.segment_flag_dtype), + result_dtypes=op.result_dtypes( + kernel, func_id.scalar_dtype, func_id.segment_flag_dtype), + arg_dtypes=( + func_id.scalar_dtype, + func_id.segment_flag_dtype, + func_id.scalar_dtype, + func_id.segment_flag_dtype), + ) + return None @@ -371,4 +543,10 @@ def reduction_preamble_generator(preamble_info): yield get_argext_preamble(preamble_info.kernel, func.name) + elif isinstance(func.name, SegmentedFunction): + if not isinstance(preamble_info.kernel.target, OpenCLTarget): + raise LoopyError("only OpenCL supported for now") + + yield get_segmented_function_preamble(preamble_info.kernel, func.name) + # vim: fdm=marker diff --git a/loopy/preprocess.py b/loopy/preprocess.py index 2b6d97c38..a5c9b0e4f 100644 --- a/loopy/preprocess.py +++ b/loopy/preprocess.py @@ -26,7 +26,7 @@ THE SOFTWARE. import six from loopy.diagnostic import ( LoopyError, WriteRaceConditionWarning, warn_with_kernel, - LoopyAdvisory, DependencyTypeInferenceFailure) + LoopyAdvisory) import islpy as isl @@ -97,7 +97,12 @@ def check_reduction_iname_uniqueness(kernel): iname_to_nonsimultaneous_reduction_count = {} def map_reduction(expr, rec): - rec(expr.expr) + if expr.is_plain_tuple: + for sub_expr in expr.exprs: + rec(sub_expr) + else: + rec(expr.exprs) + for iname in expr.inames: iname_to_reduction_count[iname] = ( iname_to_reduction_count.get(iname, 0) + 1) @@ -295,12 +300,19 @@ def realize_reduction(kernel, insn_id_filter=None, unknown_types_ok=True): var_name_gen = kernel.get_var_name_generator() new_temporary_variables = kernel.temporary_variables.copy() - from loopy.type_inference import TypeInferenceMapper - type_inf_mapper = TypeInferenceMapper(kernel) + # {{{ helpers + + def _strip_if_scalar(reference, val): + if len(reference) == 1: + return val[0] + else: + return val + + # }}} # {{{ sequential - def map_reduction_seq(expr, rec, nresults, arg_dtype, + def map_reduction_seq(expr, rec, nresults, arg_dtypes, reduction_dtypes): outer_insn_inames = temp_kernel.insn_inames(insn) @@ -328,7 +340,7 @@ def realize_reduction(kernel, insn_id_filter=None, unknown_types_ok=True): within_inames=outer_insn_inames - frozenset(expr.inames), within_inames_is_final=insn.within_inames_is_final, depends_on=frozenset(), - expression=expr.operation.neutral_element(arg_dtype, expr.inames)) + expression=expr.operation.neutral_element(*arg_dtypes)) generated_insns.append(init_insn) @@ -343,9 +355,9 @@ def realize_reduction(kernel, insn_id_filter=None, unknown_types_ok=True): id=update_id, assignees=acc_vars, expression=expr.operation( - arg_dtype, - acc_vars if len(acc_vars) > 1 else acc_vars[0], - expr.expr, expr.inames), + arg_dtypes, + _strip_if_scalar(acc_vars, acc_vars), + _strip_if_scalar(acc_vars, expr.exprs)), depends_on=frozenset([init_insn.id]) | insn.depends_on, within_inames=update_insn_iname_deps, within_inames_is_final=insn.within_inames_is_final) @@ -382,7 +394,15 @@ def realize_reduction(kernel, insn_id_filter=None, unknown_types_ok=True): v[iname].lt_set(v[0] + size)).get_basic_sets() return bs - def map_reduction_local(expr, rec, nresults, arg_dtype, + def _make_slab_set_from_range(iname, lbound, ubound): + v = isl.make_zero_and_vars([iname]) + bs, = ( + v[iname].ge_set(v[0] + lbound) + & + v[iname].lt_set(v[0] + ubound)).get_basic_sets() + return bs + + def map_reduction_local(expr, rec, nresults, arg_dtypes, reduction_dtypes): red_iname, = expr.inames @@ -441,7 +461,7 @@ def realize_reduction(kernel, insn_id_filter=None, unknown_types_ok=True): base_iname_deps = outer_insn_inames - frozenset(expr.inames) - neutral = expr.operation.neutral_element(arg_dtype, expr.inames) + neutral = expr.operation.neutral_element(*arg_dtypes) init_id = insn_id_gen("%s_%s_init" % (insn.id, red_iname)) init_insn = make_assignment( @@ -455,12 +475,6 @@ def realize_reduction(kernel, insn_id_filter=None, unknown_types_ok=True): depends_on=frozenset()) generated_insns.append(init_insn) - def _strip_if_scalar(c): - if len(acc_vars) == 1: - return c[0] - else: - return c - init_neutral_id = insn_id_gen("%s_%s_init_neutral" % (insn.id, red_iname)) init_neutral_insn = make_assignment( id=init_neutral_id, @@ -478,9 +492,11 @@ def realize_reduction(kernel, insn_id_filter=None, unknown_types_ok=True): acc_var[outer_local_iname_vars + (var(red_iname),)] for acc_var in acc_vars), expression=expr.operation( - arg_dtype, - _strip_if_scalar(tuple(var(nvn) for nvn in neutral_var_names)), - expr.expr, expr.inames), + arg_dtypes, + _strip_if_scalar( + expr.exprs, + tuple(var(nvn) for nvn in neutral_var_names)), + _strip_if_scalar(expr.exprs, expr.exprs)), within_inames=( (outer_insn_inames - frozenset(expr.inames)) | frozenset([red_iname])), @@ -513,17 +529,16 @@ def realize_reduction(kernel, insn_id_filter=None, unknown_types_ok=True): acc_var[outer_local_iname_vars + (var(stage_exec_iname),)] for acc_var in acc_vars), expression=expr.operation( - arg_dtype, - _strip_if_scalar(tuple( + arg_dtypes, + _strip_if_scalar(acc_vars, tuple( acc_var[ outer_local_iname_vars + (var(stage_exec_iname),)] for acc_var in acc_vars)), - _strip_if_scalar(tuple( + _strip_if_scalar(acc_vars, tuple( acc_var[ outer_local_iname_vars + ( var(stage_exec_iname) + new_size,)] - for acc_var in acc_vars)), - expr.inames), + for acc_var in acc_vars))), within_inames=( base_iname_deps | frozenset([stage_exec_iname])), within_inames_is_final=insn.within_inames_is_final, @@ -554,24 +569,11 @@ def realize_reduction(kernel, insn_id_filter=None, unknown_types_ok=True): # Only expand one level of reduction at a time, going from outermost to # innermost. Otherwise we get the (iname + insn) dependencies wrong. - try: - arg_dtype = type_inf_mapper(expr.expr) - except DependencyTypeInferenceFailure: - if unknown_types_ok: - arg_dtype = lp.auto - - reduction_dtypes = (lp.auto,)*nresults - - else: - raise LoopyError("failed to determine type of accumulator for " - "reduction '%s'" % expr) - else: - arg_dtype = arg_dtype.with_target(kernel.target) - - reduction_dtypes = expr.operation.result_dtypes( - kernel, arg_dtype, expr.inames) - reduction_dtypes = tuple( - dt.with_target(kernel.target) for dt in reduction_dtypes) + from loopy.type_inference import ( + infer_arg_and_reduction_dtypes_for_reduction_expression) + arg_dtypes, reduction_dtypes = ( + infer_arg_and_reduction_dtypes_for_reduction_expression( + temp_kernel, expr, unknown_types_ok)) outer_insn_inames = temp_kernel.insn_inames(insn) bad_inames = frozenset(expr.inames) & outer_insn_inames @@ -621,10 +623,10 @@ def realize_reduction(kernel, insn_id_filter=None, unknown_types_ok=True): if n_sequential: assert n_local_par == 0 - return map_reduction_seq(expr, rec, nresults, arg_dtype, + return map_reduction_seq(expr, rec, nresults, arg_dtypes, reduction_dtypes) elif n_local_par: - return map_reduction_local(expr, rec, nresults, arg_dtype, + return map_reduction_local(expr, rec, nresults, arg_dtypes, reduction_dtypes) else: from loopy.diagnostic import warn_with_kernel diff --git a/loopy/symbolic.py b/loopy/symbolic.py index 50c891be4..8876e2950 100644 --- a/loopy/symbolic.py +++ b/loopy/symbolic.py @@ -95,7 +95,10 @@ class IdentityMapperMixin(object): new_inames.append(new_sym_iname.name) return Reduction( - expr.operation, tuple(new_inames), self.rec(expr.expr, *args), + expr.operation, tuple(new_inames), + (tuple(self.rec(e, *args) for e in expr.exprs) + if expr.is_plain_tuple + else self.rec(expr.exprs, *args)), allow_simultaneous=expr.allow_simultaneous) def map_tagged_variable(self, expr, *args): @@ -144,7 +147,11 @@ class WalkMapper(WalkMapperBase): if not self.visit(expr): return - self.rec(expr.expr, *args) + if expr.is_plain_tuple: + for sub_expr in expr.exprs: + self.rec(sub_expr, *args) + else: + self.rec(expr.exprs, *args) map_tagged_variable = WalkMapperBase.map_variable @@ -162,7 +169,10 @@ class CallbackMapper(CallbackMapperBase, IdentityMapper): class CombineMapper(CombineMapperBase): def map_reduction(self, expr): - return self.rec(expr.expr) + if expr.is_plain_tuple: + return self.combine(self.rec(sub_expr) for sub_expr in expr.exprs) + else: + return self.rec(expr.exprs) map_linear_subscript = CombineMapperBase.map_subscript @@ -192,9 +202,13 @@ class StringifyMapper(StringifyMapperBase): return "loc.%d" % expr.index def map_reduction(self, expr, prec): + from pymbolic.mapper.stringifier import PREC_NONE return "%sreduce(%s, [%s], %s)" % ( "simul_" if expr.allow_simultaneous else "", - expr.operation, ", ".join(expr.inames), expr.expr) + expr.operation, ", ".join(expr.inames), + (", ".join(self.rec(e, PREC_NONE) for e in expr.exprs) + if expr.is_plain_tuple + else self.rec(expr.exprs, PREC_NONE))) def map_tagged_variable(self, expr, prec): return "%s$%s" % (expr.name, expr.tag) @@ -224,8 +238,17 @@ class UnidirectionalUnifier(UnidirectionalUnifierBase): or type(expr.operation) != type(other.operation) # noqa ): return [] + if expr.is_plain_tuple != other.is_plain_tuple: + return [] + + if expr.is_plain_tuple: + for sub_expr_l, sub_expr_r in zip(expr.exprs, other.exprs): + unis = self.rec(sub_expr_l, sub_expr_r, unis) + if not unis: + break + return unis - return self.rec(expr.expr, other.expr, unis) + return self.rec(expr.exprs, other.exprs, unis) def map_tagged_variable(self, expr, other, urecs): new_uni_record = self.unification_record_from_equation( @@ -258,8 +281,11 @@ class DependencyMapper(DependencyMapperBase): self.rec(child, *args) for child in expr.parameters) def map_reduction(self, expr): - return (self.rec(expr.expr) - - set(p.Variable(iname) for iname in expr.inames)) + if expr.is_plain_tuple: + deps = self.combine(self.rec(sub_expr) for sub_expr in expr.exprs) + else: + deps = self.rec(expr.exprs) + return deps - set(p.Variable(iname) for iname in expr.inames) def map_tagged_variable(self, expr): return set([expr]) @@ -428,7 +454,7 @@ class TaggedVariable(p.Variable): class Reduction(p.Expression): - """Represents a reduction operation on :attr:`expr` + """Represents a reduction operation on :attr:`exprs` across :attr:`inames`. .. attribute:: operation @@ -440,10 +466,12 @@ class Reduction(p.Expression): a list of inames across which reduction on :attr:`expr` is being carried out. - .. attribute:: expr + .. attribute:: exprs - The expression (as a :class:`pymbolic.primitives.Expression`) - on which reduction is performed. + A (tuple-typed) expression which currently may be one of + * a :class:`tuple` of :class:`pymbolic.primitives.Expression`, or + * a :class:`loopy.symbolic.Reduction`, or + * a substitution rule invocation. .. attribute:: allow_simultaneous @@ -451,9 +479,9 @@ class Reduction(p.Expression): in precisely one reduction, to avoid mis-nesting errors. """ - init_arg_names = ("operation", "inames", "expr", "allow_simultaneous") + init_arg_names = ("operation", "inames", "exprs", "allow_simultaneous") - def __init__(self, operation, inames, expr, allow_simultaneous=False): + def __init__(self, operation, inames, exprs, allow_simultaneous=False): if isinstance(inames, str): inames = tuple(iname.strip() for iname in inames.split(",")) @@ -475,30 +503,40 @@ class Reduction(p.Expression): from loopy.library.reduction import parse_reduction_op operation = parse_reduction_op(operation) + if not isinstance(exprs, tuple): + exprs = (exprs,) + from loopy.library.reduction import ReductionOperation assert isinstance(operation, ReductionOperation) self.operation = operation self.inames = inames - self.expr = expr + self.exprs = exprs self.allow_simultaneous = allow_simultaneous def __getinitargs__(self): - return (self.operation, self.inames, self.expr, self.allow_simultaneous) + return (self.operation, self.inames, self.exprs, self.allow_simultaneous) def get_hash(self): - return hash((self.__class__, self.operation, self.inames, - self.expr)) + return hash((self.__class__, self.operation, self.inames, self.exprs)) def is_equal(self, other): return (other.__class__ == self.__class__ and other.operation == self.operation and other.inames == self.inames - and other.expr == self.expr) + and other.exprs == self.exprs) def stringifier(self): return StringifyMapper + @property + def is_plain_tuple(self): + """ + :return: True if the reduction expression is a tuple, False if otherwise + (the inner expression will still have a tuple type) + """ + return isinstance(self.exprs, tuple) + @property @memoize_method def inames_set(self): @@ -924,7 +962,7 @@ class FunctionToPrimitiveMapper(IdentityMapper): turns those into the actual pymbolic primitives used for that. """ - def _parse_reduction(self, operation, inames, red_expr, + def _parse_reduction(self, operation, inames, red_exprs, allow_simultaneous=False): if isinstance(inames, p.Variable): inames = (inames,) @@ -941,7 +979,7 @@ class FunctionToPrimitiveMapper(IdentityMapper): processed_inames.append(iname.name) - return Reduction(operation, tuple(processed_inames), red_expr, + return Reduction(operation, tuple(processed_inames), red_exprs, allow_simultaneous=allow_simultaneous) def map_call(self, expr): @@ -966,15 +1004,13 @@ class FunctionToPrimitiveMapper(IdentityMapper): raise TypeError("cse takes two arguments") elif name in ["reduce", "simul_reduce"]: - if len(expr.parameters) == 3: - operation, inames, red_expr = expr.parameters - - if not isinstance(operation, p.Variable): - raise TypeError("operation argument to reduce() " - "must be a symbol") + if len(expr.parameters) >= 3: + operation, inames = expr.parameters[:2] + red_exprs = expr.parameters[2:] - operation = parse_reduction_op(operation.name) - return self._parse_reduction(operation, inames, self.rec(red_expr), + operation = parse_reduction_op(str(operation)) + return self._parse_reduction(operation, inames, + tuple(self.rec(red_expr) for red_expr in red_exprs), allow_simultaneous=(name == "simul_reduce")) else: raise TypeError("invalid 'reduce' calling sequence") @@ -991,12 +1027,17 @@ class FunctionToPrimitiveMapper(IdentityMapper): operation = parse_reduction_op(name) if operation: - if len(expr.parameters) != 2: + # arg_count counts arguments but not inames + if len(expr.parameters) != 1 + operation.arg_count: raise RuntimeError("invalid invocation of " - "reduction operation '%s'" % expr.function.name) + "reduction operation '%s': expected %d arguments, " + "got %d instead" % (expr.function.name, + 1 + operation.arg_count, + len(expr.parameters))) - inames, red_expr = expr.parameters - return self._parse_reduction(operation, inames, self.rec(red_expr)) + inames = expr.parameters[0] + red_exprs = tuple(self.rec(param) for param in expr.parameters[1:]) + return self._parse_reduction(operation, inames, red_exprs) else: return IdentityMapper.map_call(self, expr) @@ -1385,7 +1426,10 @@ class IndexVariableFinder(CombineMapper): return result def map_reduction(self, expr): - result = self.rec(expr.expr) + if expr.is_plain_tuple: + result = self.combine(self.rec(sub_expr) for sub_expr in expr.exprs) + else: + result = self.rec(expr.exprs) if not (expr.inames_set & result): raise RuntimeError("reduction '%s' does not depend on " diff --git a/loopy/transform/data.py b/loopy/transform/data.py index 575311b11..a1948b615 100644 --- a/loopy/transform/data.py +++ b/loopy/transform/data.py @@ -683,7 +683,8 @@ def set_temporary_scope(kernel, temp_var_names, scope): # {{{ reduction_arg_to_subst_rule -def reduction_arg_to_subst_rule(knl, inames, insn_match=None, subst_rule_name=None): +def reduction_arg_to_subst_rule( + knl, inames, insn_match=None, subst_rule_name=None, arg_number=0): if isinstance(inames, str): inames = [s.strip() for s in inames.split(",")] @@ -695,10 +696,15 @@ def reduction_arg_to_subst_rule(knl, inames, insn_match=None, subst_rule_name=No def map_reduction(expr, rec, nresults=1): if frozenset(expr.inames) != inames_set: + if expr.is_plain_tuple: + rec_result = tuple(rec(sub_expr) for sub_expr in expr.exprs) + else: + rec_result = rec(expr.exprs) + return type(expr)( operation=expr.operation, inames=expr.inames, - expr=rec(expr.expr), + exprs=rec_result, allow_simultaneous=expr.allow_simultaneous) if subst_rule_name is None: @@ -711,19 +717,27 @@ def reduction_arg_to_subst_rule(knl, inames, insn_match=None, subst_rule_name=No raise LoopyError("substitution rule '%s' already exists" % my_subst_rule_name) + if not expr.is_plain_tuple: + raise NotImplemented("non-tuple reduction arguments not supported") + from loopy.kernel.data import SubstitutionRule substs[my_subst_rule_name] = SubstitutionRule( name=my_subst_rule_name, arguments=tuple(inames), - expression=expr.expr) + expression=expr.exprs[arg_number]) from pymbolic import var iname_vars = [var(iname) for iname in inames] + new_exprs = tuple(sub_expr + if i != arg_number + else var(my_subst_rule_name)(*iname_vars) + for i, sub_expr in enumerate(expr.exprs)) + return type(expr)( operation=expr.operation, inames=expr.inames, - expr=var(my_subst_rule_name)(*iname_vars), + exprs=new_exprs, allow_simultaneous=expr.allow_simultaneous) from loopy.symbolic import ReductionCallbackMapper diff --git a/loopy/transform/iname.py b/loopy/transform/iname.py index c35b50643..81db51a7e 100644 --- a/loopy/transform/iname.py +++ b/loopy/transform/iname.py @@ -145,7 +145,10 @@ class _InameSplitter(RuleAwareIdentityMapper): from loopy.symbolic import Reduction return Reduction(expr.operation, tuple(new_inames), - self.rec(expr.expr, expn_state), + (tuple(self.rec(sub_expr, expn_state) + for sub_expr in expr.exprs) + if expr.is_plain_tuple + else self.rec(expr.exprs, expn_state)), expr.allow_simultaneous) else: return super(_InameSplitter, self).map_reduction(expr, expn_state) @@ -1191,13 +1194,19 @@ class _ReductionSplitter(RuleAwareIdentityMapper): if self.direction == "in": return Reduction(expr.operation, tuple(leftover_inames), Reduction(expr.operation, tuple(self.inames), - self.rec(expr.expr, expn_state), + (tuple(self.rec(sub_expr, expn_state) + for sub_expr in expr.exprs) + if expr.is_plain_tuple + else self.rec(expr.exprs, expn_state)), expr.allow_simultaneous), expr.allow_simultaneous) elif self.direction == "out": return Reduction(expr.operation, tuple(self.inames), Reduction(expr.operation, tuple(leftover_inames), - self.rec(expr.expr, expn_state), + (tuple(self.rec(sub_expr, expn_state) + for sub_expr in expr.exprs) + if expr.is_plain_tuple + else self.rec(expr.exprs, expn_state)), expr.allow_simultaneous)) else: assert False @@ -1589,10 +1598,16 @@ class _ReductionInameUniquifier(RuleAwareIdentityMapper): from loopy.symbolic import Reduction return Reduction(expr.operation, tuple(new_inames), - self.rec( - SubstitutionMapper(make_subst_func(subst_dict))( - expr.expr), - expn_state), + (tuple(self.rec( + SubstitutionMapper(make_subst_func(subst_dict))( + sub_expr), + expn_state) + for sub_expr in expr.exprs) + if expr.is_plain_tuple + else self.rec( + SubstitutionMapper(make_subst_func(subst_dict))( + expr.exprs), + expn_state)), expr.allow_simultaneous) else: return super(_ReductionInameUniquifier, self).map_reduction( diff --git a/loopy/type_inference.py b/loopy/type_inference.py index 4c1e423e9..cdba4a5cb 100644 --- a/loopy/type_inference.py +++ b/loopy/type_inference.py @@ -352,28 +352,30 @@ class TypeInferenceMapper(CombineMapper): return [self.kernel.index_dtype] def map_reduction(self, expr, return_tuple=False): - rec_result = self.rec(expr.expr) - - if rec_result: - rec_result, = rec_result - result = expr.operation.result_dtypes( - self.kernel, rec_result, expr.inames) - else: - result = expr.operation.result_dtypes( - self.kernel, None, expr.inames) + """ + :arg return_tuple: If *True*, treat the type of the reduction expression + as a tuple type. Otherwise, the number of expressions being reduced over + must equal 1, and the type of the first expression is returned. + """ + rec_results = tuple(self.rec(sub_expr) for sub_expr in expr.exprs) - if result is None: + if any(len(rec_result) == 0 for rec_result in rec_results): return [] if return_tuple: - return [result] + from itertools import product + return list( + expr.operation.result_dtypes(self.kernel, *product_element) + for product_element in product(*rec_results)) else: - if len(result) != 1 and not return_tuple: + if len(rec_results) != 1: raise LoopyError("reductions with more or fewer than one " "return value may only be used in direct assignments") - return [result[0]] + return list( + expr.operation.result_dtypes(self.kernel, rec_result)[0] + for rec_result in rec_results[0]) # }}} @@ -617,4 +619,38 @@ def infer_unknown_types(kernel, expect_completion=False): # }}} + +# {{{ reduction expression helper + +def infer_arg_and_reduction_dtypes_for_reduction_expression( + kernel, expr, unknown_types_ok): + arg_dtypes = [] + + type_inf_mapper = TypeInferenceMapper(kernel) + import loopy as lp + + for sub_expr in expr.exprs: + try: + arg_dtype = type_inf_mapper(sub_expr) + except DependencyTypeInferenceFailure: + if unknown_types_ok: + arg_dtype = lp.auto + else: + raise LoopyError("failed to determine type of accumulator for " + "reduction sub-expression '%s'" % sub_expr) + else: + arg_dtype = arg_dtype.with_target(kernel.target) + + arg_dtypes.append(arg_dtype) + + reduction_dtypes = expr.operation.result_dtypes(kernel, *arg_dtypes) + reduction_dtypes = tuple( + dt.with_target(kernel.target) + if dt is not lp.auto else dt + for dt in reduction_dtypes) + + return tuple(arg_dtypes), reduction_dtypes + +# }}} + # vim: foldmethod=marker diff --git a/test/test_reduction.py b/test/test_reduction.py index 5887df7a6..1dd11b492 100644 --- a/test/test_reduction.py +++ b/test/test_reduction.py @@ -297,7 +297,7 @@ def test_argmax(ctx_factory): knl = lp.make_kernel( "{[i]: 0<=i<%d}" % n, """ - max_val, max_idx = argmax(i, fabs(a[i])) + max_val, max_idx = argmax(i, fabs(a[i]), i) """) knl = lp.add_and_infer_dtypes(knl, {"a": np.float32}) @@ -393,16 +393,24 @@ def test_double_sum_made_unique(ctx_factory): assert b.get() == ref -def test_parallel_multi_output_reduction(): +def test_parallel_multi_output_reduction(ctx_factory): knl = lp.make_kernel( "{[i]: 0<=i<128}", """ - max_val, max_indices = argmax(i, fabs(a[i])) + max_val, max_indices = argmax(i, fabs(a[i]), i) """) knl = lp.tag_inames(knl, dict(i="l.0")) + knl = lp.add_dtypes(knl, dict(a=np.float64)) knl = lp.realize_reduction(knl) - print(knl) - # TODO: Add functional test + + ctx = ctx_factory() + + with cl.CommandQueue(ctx) as queue: + a = np.random.rand(128) + out, (max_index, max_val) = knl(queue, a=a) + + assert max_val == np.max(a) + assert max_index == np.argmax(np.abs(a)) if __name__ == "__main__": -- GitLab From d0888035b7ac728be58daab17766ccdc8e533d4e Mon Sep 17 00:00:00 2001 From: Matt Wala Date: Fri, 7 Apr 2017 15:44:17 -0500 Subject: [PATCH 02/15] Add private scoping of multiple return values hack (see: #34) --- loopy/preprocess.py | 189 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 189 insertions(+) diff --git a/loopy/preprocess.py b/loopy/preprocess.py index a5c9b0e4f..6d6494b5e 100644 --- a/loopy/preprocess.py +++ b/loopy/preprocess.py @@ -277,6 +277,191 @@ def find_temporary_scope(kernel): # {{{ rewrite reduction to imperative form + +# {{{ reduction utils + +def _hackily_ensure_multi_assignment_return_values_are_scoped_private(kernel): + """ + Multi assignment function calls are currently lowered into OpenCL so that + the function call:: + + a, b = segmented_sum(x, y, z, w) + + becomes:: + + a = segmented_sum_mangled(x, y, z, w, &b). + + For OpenCL, the scope of "b" is significant, and the preamble generation + currently assumes the scope is always private. This function forces that to + be the case by introducing temporary assignments into the kernel. + """ + + insn_id_gen = kernel.get_instruction_id_generator() + var_name_gen = kernel.get_var_name_generator() + + new_or_updated_instructions = {} + new_temporaries = {} + + dep_map = dict( + (insn.id, insn.depends_on) for insn in kernel.instructions) + + inverse_dep_map = dict((insn.id, set()) for insn in kernel.instructions) + + import six + for insn_id, deps in six.iteritems(dep_map): + for dep in deps: + inverse_dep_map[dep].add(insn_id) + + del dep_map + + # {{{ utils + + def _add_to_no_sync_with(insn_id, new_no_sync_with_params): + insn = kernel.id_to_insn.get(insn_id) + insn = new_or_updated_instructions.get(insn_id, insn) + new_or_updated_instructions[insn_id] = ( + insn.copy( + no_sync_with=( + insn.no_sync_with | frozenset(new_no_sync_with_params)))) + + def _add_to_depends_on(insn_id, new_depends_on_params): + insn = kernel.id_to_insn.get(insn_id) + insn = new_or_updated_instructions.get(insn_id, insn) + new_or_updated_instructions[insn_id] = ( + insn.copy( + depends_on=insn.depends_on | frozenset(new_depends_on_params))) + + # }}} + + from loopy.kernel.instruction import CallInstruction + for insn in kernel.instructions: + if not isinstance(insn, CallInstruction): + continue + + if len(insn.assignees) <= 1: + continue + + assignees = insn.assignees + assignee_var_names = insn.assignee_var_names() + + new_assignees = [assignees[0]] + newly_added_assignments_ids = set() + needs_replacement = False + + last_added_insn_id = insn.id + + from loopy.kernel.data import temp_var_scope, TemporaryVariable + + FIRST_POINTER_ASSIGNEE_IDX = 1 # noqa + + for assignee_nr, assignee_var_name, assignee in zip( + range(FIRST_POINTER_ASSIGNEE_IDX, len(assignees)), + assignee_var_names[FIRST_POINTER_ASSIGNEE_IDX:], + assignees[FIRST_POINTER_ASSIGNEE_IDX:]): + + if ( + assignee_var_name in kernel.temporary_variables + and + (kernel.temporary_variables[assignee_var_name].scope + == temp_var_scope.PRIVATE)): + new_assignees.append(assignee) + continue + + needs_replacement = True + + # {{{ generate a new assignent instruction + + new_assignee_name = var_name_gen( + "{insn_id}_retval_{assignee_nr}" + .format(insn_id=insn.id, assignee_nr=assignee_nr)) + + new_assignment_id = insn_id_gen( + "{insn_id}_assign_retval_{assignee_nr}" + .format(insn_id=insn.id, assignee_nr=assignee_nr)) + + newly_added_assignments_ids.add(new_assignment_id) + + import loopy as lp + new_temporaries[new_assignee_name] = ( + TemporaryVariable( + name=new_assignee_name, + dtype=lp.auto, + scope=temp_var_scope.PRIVATE)) + + from pymbolic import var + new_assignee = var(new_assignee_name) + new_assignees.append(new_assignee) + + new_or_updated_instructions[new_assignment_id] = ( + make_assignment( + assignees=(assignee,), + expression=new_assignee, + id=new_assignment_id, + depends_on=frozenset([last_added_insn_id]), + depends_on_is_final=True, + no_sync_with=( + insn.no_sync_with | frozenset([(insn.id, "any")])), + predicates=insn.predicates, + within_inames=insn.within_inames)) + + last_added_insn_id = new_assignment_id + + # }}} + + if not needs_replacement: + continue + + # {{{ update originating instruction + + orig_insn = new_or_updated_instructions.get(insn.id, insn) + + new_or_updated_instructions[insn.id] = ( + orig_insn.copy(assignees=tuple(new_assignees))) + + _add_to_no_sync_with(insn.id, + [(id, "any") for id in newly_added_assignments_ids]) + + # }}} + + # {{{ squash spurious memory dependencies amongst new assignments + + for new_insn_id in newly_added_assignments_ids: + _add_to_no_sync_with(new_insn_id, + [(id, "any") + for id in newly_added_assignments_ids + if id != new_insn_id]) + + # }}} + + # {{{ update instructions that depend on the originating instruction + + for inverse_dep in inverse_dep_map[insn.id]: + _add_to_depends_on(inverse_dep, newly_added_assignments_ids) + + for insn_id, scope in ( + new_or_updated_instructions[inverse_dep].no_sync_with): + if insn_id == insn.id: + _add_to_no_sync_with( + inverse_dep, + [(id, scope) for id in newly_added_assignments_ids]) + + # }}} + + new_temporary_variables = kernel.temporary_variables.copy() + new_temporary_variables.update(new_temporaries) + + new_instructions = ( + list(new_or_updated_instructions.values()) + + list(insn + for insn in kernel.instructions + if insn.id not in new_or_updated_instructions)) + + return kernel.copy(temporary_variables=new_temporary_variables, + instructions=new_instructions) + +# }}} + + def realize_reduction(kernel, insn_id_filter=None, unknown_types_ok=True): """Rewrites reductions into their imperative form. With *insn_id_filter* specified, operate only on the instruction with an instruction id matching @@ -741,6 +926,10 @@ def realize_reduction(kernel, insn_id_filter=None, unknown_types_ok=True): kernel = lp.tag_inames(kernel, new_iname_tags) + kernel = ( + _hackily_ensure_multi_assignment_return_values_are_scoped_private( + kernel)) + return kernel # }}} -- GitLab From 8270bbe73ac69d8d69085018c7e59dcd51e9c57d Mon Sep 17 00:00:00 2001 From: Matt Wala Date: Fri, 7 Apr 2017 15:45:07 -0500 Subject: [PATCH 03/15] Bump kernel version for tuple-typed reduction changes. --- loopy/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/loopy/version.py b/loopy/version.py index 5c6ad47f8..77d0e21bd 100644 --- a/loopy/version.py +++ b/loopy/version.py @@ -32,4 +32,4 @@ except ImportError: else: _islpy_version = islpy.version.VERSION_TEXT -DATA_MODEL_VERSION = "v59-islpy%s" % _islpy_version +DATA_MODEL_VERSION = "v60-islpy%s" % _islpy_version -- GitLab From 2c1d6bcbb79488ea6d48a6b3e0d691a48560d03a Mon Sep 17 00:00:00 2001 From: Matt Wala Date: Fri, 7 Apr 2017 16:03:35 -0500 Subject: [PATCH 04/15] Fix type inferences for reductions with inner calls. --- loopy/type_inference.py | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/loopy/type_inference.py b/loopy/type_inference.py index cdba4a5cb..3c77c9882 100644 --- a/loopy/type_inference.py +++ b/loopy/type_inference.py @@ -357,25 +357,25 @@ class TypeInferenceMapper(CombineMapper): as a tuple type. Otherwise, the number of expressions being reduced over must equal 1, and the type of the first expression is returned. """ - rec_results = tuple(self.rec(sub_expr) for sub_expr in expr.exprs) + if expr.is_plain_tuple: + rec_results = [self.rec(sub_expr) for sub_expr in expr.exprs] + else: + rec_results = [self.rec(expr.exprs, return_tuple=return_tuple)] if any(len(rec_result) == 0 for rec_result in rec_results): return [] if return_tuple: from itertools import product - return list( - expr.operation.result_dtypes(self.kernel, *product_element) - for product_element in product(*rec_results)) + return [expr.operation.result_dtypes(self.kernel, *product_element) + for product_element in product(*rec_results)] - else: - if len(rec_results) != 1: - raise LoopyError("reductions with more or fewer than one " - "return value may only be used in direct assignments") + if len(rec_results) != 1: + raise LoopyError("reductions with more or fewer than one " + "return value may only be used in direct assignments") - return list( - expr.operation.result_dtypes(self.kernel, rec_result)[0] - for rec_result in rec_results[0]) + return [expr.operation.result_dtypes(self.kernel, rec_result)[0] + for rec_result in rec_results[0]] # }}} -- GitLab From 4938c8964f62dcf1892cab5a9494822e06663782 Mon Sep 17 00:00:00 2001 From: Matt Wala Date: Fri, 7 Apr 2017 17:26:16 -0500 Subject: [PATCH 05/15] Change tuple typed reductions to be stricter (constructor takes either a tuple, reduction, or a substitution invocation). Also, rewrite the reduction mapping code to rely on recursive calls map_tuple(). This cleans up a lot of reduction mapping code substantially. --- loopy/kernel/instruction.py | 6 +-- loopy/preprocess.py | 93 ++++++++++++++++++++++++++++++----- loopy/symbolic.py | 64 +++++++++--------------- loopy/transform/data.py | 23 +++------ loopy/transform/iname.py | 31 +++--------- loopy/transform/precompute.py | 28 ++++++++++- loopy/type_inference.py | 19 +++++-- test/test_loopy.py | 22 +++++++++ 8 files changed, 186 insertions(+), 100 deletions(-) diff --git a/loopy/kernel/instruction.py b/loopy/kernel/instruction.py index 752e3e4da..581f09054 100644 --- a/loopy/kernel/instruction.py +++ b/loopy/kernel/instruction.py @@ -658,11 +658,7 @@ class MultiAssignmentBase(InstructionBase): @memoize_method def reduction_inames(self): def map_reduction(expr, rec): - if expr.is_plain_tuple: - for sub_expr in expr.exprs: - rec(sub_expr) - else: - rec(expr.exprs) + rec(expr.exprs) for iname in expr.inames: result.add(iname) diff --git a/loopy/preprocess.py b/loopy/preprocess.py index 6d6494b5e..5ece0db1d 100644 --- a/loopy/preprocess.py +++ b/loopy/preprocess.py @@ -97,11 +97,7 @@ def check_reduction_iname_uniqueness(kernel): iname_to_nonsimultaneous_reduction_count = {} def map_reduction(expr, rec): - if expr.is_plain_tuple: - for sub_expr in expr.exprs: - rec(sub_expr) - else: - rec(expr.exprs) + rec(expr.exprs) for iname in expr.inames: iname_to_reduction_count[iname] = ( @@ -493,6 +489,39 @@ def realize_reduction(kernel, insn_id_filter=None, unknown_types_ok=True): else: return val + def expand_inner_reduction(id, expr, nresults, depends_on, within_inames, + within_inames_is_final): + from pymbolic.primitives import Call + from loopy.symbolic import Reduction + assert isinstance(expr, (Call, Reduction)) + + temp_var_names = [ + var_name_gen(id + "_arg" + str(i)) + for i in range(nresults)] + + for name in temp_var_names: + from loopy.kernel.data import TemporaryVariable, temp_var_scope + new_temporary_variables[name] = TemporaryVariable( + name=name, + shape=(), + dtype=lp.auto, + scope=temp_var_scope.PRIVATE) + + from pymbolic import var + temp_vars = tuple(var(n) for n in temp_var_names) + + call_insn = make_assignment( + id=id, + assignees=temp_vars, + expression=expr, + depends_on=depends_on, + within_inames=within_inames, + within_inames_is_final=within_inames_is_final) + + generated_insns.append(call_insn) + + return temp_vars + # }}} # {{{ sequential @@ -536,14 +565,32 @@ def realize_reduction(kernel, insn_id_filter=None, unknown_types_ok=True): if insn.within_inames_is_final: update_insn_iname_deps = insn.within_inames | set(expr.inames) + reduction_insn_depends_on = set([init_id]) + + if not isinstance(expr.exprs, tuple): + get_args_insn_id = insn_id_gen( + "%s_%s_get" % (insn.id, "_".join(expr.inames))) + + reduction_expr = expand_inner_reduction( + id=get_args_insn_id, + expr=expr.exprs, + nresults=nresults, + depends_on=insn.depends_on, + within_inames=update_insn_iname_deps, + within_inames_is_final=insn.within_inames_is_final) + + reduction_insn_depends_on.add(get_args_insn_id) + else: + reduction_expr = expr.exprs + reduction_insn = make_assignment( id=update_id, assignees=acc_vars, expression=expr.operation( arg_dtypes, _strip_if_scalar(acc_vars, acc_vars), - _strip_if_scalar(acc_vars, expr.exprs)), - depends_on=frozenset([init_insn.id]) | insn.depends_on, + _strip_if_scalar(acc_vars, reduction_expr)), + depends_on=frozenset(reduction_insn_depends_on) | insn.depends_on, within_inames=update_insn_iname_deps, within_inames_is_final=insn.within_inames_is_final) @@ -670,6 +717,26 @@ def realize_reduction(kernel, insn_id_filter=None, unknown_types_ok=True): depends_on=frozenset()) generated_insns.append(init_neutral_insn) + transfer_depends_on = set([init_neutral_id, init_id]) + + if not isinstance(expr.exprs, tuple): + get_args_insn_id = insn_id_gen( + "%s_%s_get" % (insn.id, red_iname)) + + reduction_expr = expand_inner_reduction( + id=get_args_insn_id, + expr=expr.exprs, + nresults=nresults, + depends_on=insn.depends_on, + within_inames=( + (outer_insn_inames - frozenset(expr.inames)) + | frozenset([red_iname])), + within_inames_is_final=insn.within_inames_is_final) + + transfer_depends_on.add(get_args_insn_id) + else: + reduction_expr = expr.exprs + transfer_id = insn_id_gen("%s_%s_transfer" % (insn.id, red_iname)) transfer_insn = make_assignment( id=transfer_id, @@ -679,15 +746,16 @@ def realize_reduction(kernel, insn_id_filter=None, unknown_types_ok=True): expression=expr.operation( arg_dtypes, _strip_if_scalar( - expr.exprs, + neutral_var_names, tuple(var(nvn) for nvn in neutral_var_names)), - _strip_if_scalar(expr.exprs, expr.exprs)), + _strip_if_scalar(neutral_var_names, reduction_expr)), within_inames=( (outer_insn_inames - frozenset(expr.inames)) | frozenset([red_iname])), within_inames_is_final=insn.within_inames_is_final, - depends_on=frozenset([init_id, init_neutral_id]) | insn.depends_on, - no_sync_with=frozenset([(init_id, "any")])) + depends_on=frozenset(transfer_depends_on) | insn.depends_on, + no_sync_with=frozenset( + [(insn_id, "any") for insn_id in transfer_depends_on])) generated_insns.append(transfer_insn) cur_size = 1 @@ -699,7 +767,6 @@ def realize_reduction(kernel, insn_id_filter=None, unknown_types_ok=True): istage = 0 while cur_size > 1: - new_size = cur_size // 2 assert new_size * 2 == cur_size @@ -926,6 +993,8 @@ def realize_reduction(kernel, insn_id_filter=None, unknown_types_ok=True): kernel = lp.tag_inames(kernel, new_iname_tags) + print(kernel) + kernel = ( _hackily_ensure_multi_assignment_return_values_are_scoped_private( kernel)) diff --git a/loopy/symbolic.py b/loopy/symbolic.py index 8876e2950..89ac05f70 100644 --- a/loopy/symbolic.py +++ b/loopy/symbolic.py @@ -96,9 +96,7 @@ class IdentityMapperMixin(object): return Reduction( expr.operation, tuple(new_inames), - (tuple(self.rec(e, *args) for e in expr.exprs) - if expr.is_plain_tuple - else self.rec(expr.exprs, *args)), + self.rec(expr.exprs, *args), allow_simultaneous=expr.allow_simultaneous) def map_tagged_variable(self, expr, *args): @@ -147,11 +145,7 @@ class WalkMapper(WalkMapperBase): if not self.visit(expr): return - if expr.is_plain_tuple: - for sub_expr in expr.exprs: - self.rec(sub_expr, *args) - else: - self.rec(expr.exprs, *args) + self.rec(expr.exprs, *args) map_tagged_variable = WalkMapperBase.map_variable @@ -169,10 +163,7 @@ class CallbackMapper(CallbackMapperBase, IdentityMapper): class CombineMapper(CombineMapperBase): def map_reduction(self, expr): - if expr.is_plain_tuple: - return self.combine(self.rec(sub_expr) for sub_expr in expr.exprs) - else: - return self.rec(expr.exprs) + return self.rec(expr.exprs) map_linear_subscript = CombineMapperBase.map_subscript @@ -203,12 +194,16 @@ class StringifyMapper(StringifyMapperBase): def map_reduction(self, expr, prec): from pymbolic.mapper.stringifier import PREC_NONE + + if isinstance(expr.exprs, tuple): + inner_expr = ", ".join(self.rec(e, PREC_NONE) for e in expr.exprs) + else: + inner_expr = self.rec(expr.exprs, PREC_NONE) + return "%sreduce(%s, [%s], %s)" % ( "simul_" if expr.allow_simultaneous else "", expr.operation, ", ".join(expr.inames), - (", ".join(self.rec(e, PREC_NONE) for e in expr.exprs) - if expr.is_plain_tuple - else self.rec(expr.exprs, PREC_NONE))) + inner_expr) def map_tagged_variable(self, expr, prec): return "%s$%s" % (expr.name, expr.tag) @@ -238,15 +233,6 @@ class UnidirectionalUnifier(UnidirectionalUnifierBase): or type(expr.operation) != type(other.operation) # noqa ): return [] - if expr.is_plain_tuple != other.is_plain_tuple: - return [] - - if expr.is_plain_tuple: - for sub_expr_l, sub_expr_r in zip(expr.exprs, other.exprs): - unis = self.rec(sub_expr_l, sub_expr_r, unis) - if not unis: - break - return unis return self.rec(expr.exprs, other.exprs, unis) @@ -281,10 +267,7 @@ class DependencyMapper(DependencyMapperBase): self.rec(child, *args) for child in expr.parameters) def map_reduction(self, expr): - if expr.is_plain_tuple: - deps = self.combine(self.rec(sub_expr) for sub_expr in expr.exprs) - else: - deps = self.rec(expr.exprs) + deps = self.rec(expr.exprs) return deps - set(p.Variable(iname) for iname in expr.inames) def map_tagged_variable(self, expr): @@ -503,8 +486,13 @@ class Reduction(p.Expression): from loopy.library.reduction import parse_reduction_op operation = parse_reduction_op(operation) - if not isinstance(exprs, tuple): - exprs = (exprs,) + from pymbolic.primitives import Call + if not isinstance(exprs, (tuple, Reduction, Call)): + from loopy.diagnostic import LoopyError + print(exprs) + raise LoopyError( + "reduction argument must be a tuple, reduction, or substitution " + "invocation, got '%s'" % type(exprs).__name__) from loopy.library.reduction import ReductionOperation assert isinstance(operation, ReductionOperation) @@ -530,12 +518,11 @@ class Reduction(p.Expression): return StringifyMapper @property - def is_plain_tuple(self): - """ - :return: True if the reduction expression is a tuple, False if otherwise - (the inner expression will still have a tuple type) - """ - return isinstance(self.exprs, tuple) + def exprs_stripped_if_scalar(self): + if isinstance(self.exprs, tuple) and len(self.exprs) == 1: + return self.exprs[0] + else: + return self.exprs @property @memoize_method @@ -1426,10 +1413,7 @@ class IndexVariableFinder(CombineMapper): return result def map_reduction(self, expr): - if expr.is_plain_tuple: - result = self.combine(self.rec(sub_expr) for sub_expr in expr.exprs) - else: - result = self.rec(expr.exprs) + result = self.rec(expr.exprs) if not (expr.inames_set & result): raise RuntimeError("reduction '%s' does not depend on " diff --git a/loopy/transform/data.py b/loopy/transform/data.py index a1948b615..ee5ffb6bc 100644 --- a/loopy/transform/data.py +++ b/loopy/transform/data.py @@ -684,7 +684,8 @@ def set_temporary_scope(kernel, temp_var_names, scope): # {{{ reduction_arg_to_subst_rule def reduction_arg_to_subst_rule( - knl, inames, insn_match=None, subst_rule_name=None, arg_number=0): + knl, inames, insn_match=None, subst_rule_name=None, + strip_if_scalar=False): if isinstance(inames, str): inames = [s.strip() for s in inames.split(",")] @@ -696,10 +697,7 @@ def reduction_arg_to_subst_rule( def map_reduction(expr, rec, nresults=1): if frozenset(expr.inames) != inames_set: - if expr.is_plain_tuple: - rec_result = tuple(rec(sub_expr) for sub_expr in expr.exprs) - else: - rec_result = rec(expr.exprs) + rec_result = rec(expr.exprs) return type(expr)( operation=expr.operation, @@ -717,27 +715,22 @@ def reduction_arg_to_subst_rule( raise LoopyError("substitution rule '%s' already exists" % my_subst_rule_name) - if not expr.is_plain_tuple: - raise NotImplemented("non-tuple reduction arguments not supported") - from loopy.kernel.data import SubstitutionRule substs[my_subst_rule_name] = SubstitutionRule( name=my_subst_rule_name, arguments=tuple(inames), - expression=expr.exprs[arg_number]) + expression=( + expr.exprs_stripped_if_scalar + if strip_if_scalar + else expr.exprs)) from pymbolic import var iname_vars = [var(iname) for iname in inames] - new_exprs = tuple(sub_expr - if i != arg_number - else var(my_subst_rule_name)(*iname_vars) - for i, sub_expr in enumerate(expr.exprs)) - return type(expr)( operation=expr.operation, inames=expr.inames, - exprs=new_exprs, + exprs=var(my_subst_rule_name)(*iname_vars), allow_simultaneous=expr.allow_simultaneous) from loopy.symbolic import ReductionCallbackMapper diff --git a/loopy/transform/iname.py b/loopy/transform/iname.py index 81db51a7e..b9a386b2b 100644 --- a/loopy/transform/iname.py +++ b/loopy/transform/iname.py @@ -145,10 +145,7 @@ class _InameSplitter(RuleAwareIdentityMapper): from loopy.symbolic import Reduction return Reduction(expr.operation, tuple(new_inames), - (tuple(self.rec(sub_expr, expn_state) - for sub_expr in expr.exprs) - if expr.is_plain_tuple - else self.rec(expr.exprs, expn_state)), + self.rec(expr.exprs, expn_state), expr.allow_simultaneous) else: return super(_InameSplitter, self).map_reduction(expr, expn_state) @@ -1194,20 +1191,15 @@ class _ReductionSplitter(RuleAwareIdentityMapper): if self.direction == "in": return Reduction(expr.operation, tuple(leftover_inames), Reduction(expr.operation, tuple(self.inames), - (tuple(self.rec(sub_expr, expn_state) - for sub_expr in expr.exprs) - if expr.is_plain_tuple - else self.rec(expr.exprs, expn_state)), + self.rec(expr.exprs, expn_state), expr.allow_simultaneous), expr.allow_simultaneous) elif self.direction == "out": return Reduction(expr.operation, tuple(self.inames), Reduction(expr.operation, tuple(leftover_inames), - (tuple(self.rec(sub_expr, expn_state) - for sub_expr in expr.exprs) - if expr.is_plain_tuple - else self.rec(expr.exprs, expn_state)), - expr.allow_simultaneous)) + self.rec(expr.exprs, expn_state), + expr.allow_simultaneous), + expr.allow_simultaneous) else: assert False else: @@ -1598,16 +1590,9 @@ class _ReductionInameUniquifier(RuleAwareIdentityMapper): from loopy.symbolic import Reduction return Reduction(expr.operation, tuple(new_inames), - (tuple(self.rec( - SubstitutionMapper(make_subst_func(subst_dict))( - sub_expr), - expn_state) - for sub_expr in expr.exprs) - if expr.is_plain_tuple - else self.rec( - SubstitutionMapper(make_subst_func(subst_dict))( - expr.exprs), - expn_state)), + self.rec( + SubstitutionMapper(make_subst_func(subst_dict))(expr.exprs), + expn_state), expr.allow_simultaneous) else: return super(_ReductionInameUniquifier, self).map_reduction( diff --git a/loopy/transform/precompute.py b/loopy/transform/precompute.py index a19e06ecd..7e70f8c77 100644 --- a/loopy/transform/precompute.py +++ b/loopy/transform/precompute.py @@ -59,9 +59,33 @@ def storage_axis_exprs(storage_axis_sources, args): return result +# {{{ identity mapper + +class PrecomputeIdentityMapper(RuleAwareIdentityMapper): + + def map_reduction(self, expr, expn_state): + from pymbolic.primitives import Call + new_exprs = self.rec(expr.exprs, expn_state) + + # If the substitution rule was replaced, precompute turned it into a + # scalar, but since reduction only takes tuple types we turn it into a + # tuple here. + if isinstance(expr.exprs, Call) and not isinstance(new_exprs, Call): + new_exprs = (new_exprs,) + + from loopy.symbolic import Reduction + return Reduction( + expr.operation, + expr.inames, + new_exprs, + expr.allow_simultaneous) + +# }}} + + # {{{ gather rule invocations -class RuleInvocationGatherer(RuleAwareIdentityMapper): +class RuleInvocationGatherer(PrecomputeIdentityMapper): def __init__(self, rule_mapping_context, kernel, subst_name, subst_tag, within): super(RuleInvocationGatherer, self).__init__(rule_mapping_context) @@ -131,7 +155,7 @@ class RuleInvocationGatherer(RuleAwareIdentityMapper): # {{{ replace rule invocation -class RuleInvocationReplacer(RuleAwareIdentityMapper): +class RuleInvocationReplacer(PrecomputeIdentityMapper): def __init__(self, rule_mapping_context, subst_name, subst_tag, within, access_descriptors, array_base_map, storage_axis_names, storage_axis_sources, diff --git a/loopy/type_inference.py b/loopy/type_inference.py index 3c77c9882..b6aa5d1ad 100644 --- a/loopy/type_inference.py +++ b/loopy/type_inference.py @@ -357,10 +357,18 @@ class TypeInferenceMapper(CombineMapper): as a tuple type. Otherwise, the number of expressions being reduced over must equal 1, and the type of the first expression is returned. """ - if expr.is_plain_tuple: + from loopy.symbolic import Reduction + from pymbolic.primitives import Call + + if isinstance(expr.exprs, tuple): rec_results = [self.rec(sub_expr) for sub_expr in expr.exprs] + elif isinstance(expr.exprs, Reduction): + rec_results = [self.rec(expr.exprs, return_tuple=True)] + elif isinstance(expr.exprs, Call): + rec_results = [self.map_call(expr.exprs, return_tuple=return_tuple)] else: - rec_results = [self.rec(expr.exprs, return_tuple=return_tuple)] + raise LoopyError("unknown reduction type: '%s'" + % type(expr.exprs).__name__) if any(len(rec_result) == 0 for rec_result in rec_results): return [] @@ -629,7 +637,12 @@ def infer_arg_and_reduction_dtypes_for_reduction_expression( type_inf_mapper = TypeInferenceMapper(kernel) import loopy as lp - for sub_expr in expr.exprs: + if isinstance(expr.exprs, tuple): + exprs = expr.exprs + else: + exprs = (expr.exprs,) + + for sub_expr in exprs: try: arg_dtype = type_inf_mapper(sub_expr) except DependencyTypeInferenceFailure: diff --git a/test/test_loopy.py b/test/test_loopy.py index 851a7f076..d5d1a1f31 100644 --- a/test/test_loopy.py +++ b/test/test_loopy.py @@ -2108,6 +2108,28 @@ def test_barrier_insertion_near_bottom_of_loop(): assert_barrier_between(knl, "ainit", "aupdate", ignore_barriers_in_levels=[1]) +def test_multi_argument_reduction_type_inference(): + from loopy.type_inference import TypeInferenceMapper + from loopy.library.reduction import SegmentedSumReductionOperation + from loopy.types import to_loopy_type + op = SegmentedSumReductionOperation() + + knl = lp.make_kernel("{[i]: 0<=i<10}", "") + + int32 = to_loopy_type(np.int32) + + expr = lp.symbolic.Reduction( + operation=op, + inames=("i",), + exprs=op.neutral_element(int32, int32), + allow_simultaneous=True) + + t_inf_mapper = TypeInferenceMapper(knl) + + print(t_inf_mapper(expr, return_tuple=True)) + 1/0 + + if __name__ == "__main__": if len(sys.argv) > 1: exec(sys.argv[1]) -- GitLab From dfaf84fd8fe03856cc7aa985c163295e8f6f70a1 Mon Sep 17 00:00:00 2001 From: Matt Wala Date: Fri, 7 Apr 2017 19:12:20 -0500 Subject: [PATCH 06/15] Fix reduction test for argmax change. --- test/test_loopy.py | 31 ++++++++++++++++++++----------- 1 file changed, 20 insertions(+), 11 deletions(-) diff --git a/test/test_loopy.py b/test/test_loopy.py index d5d1a1f31..b92161ac7 100644 --- a/test/test_loopy.py +++ b/test/test_loopy.py @@ -1987,19 +1987,28 @@ def test_integer_reduction(ctx_factory): dtype=to_loopy_type(vtype), shape=lp.auto) - reductions = [('max', lambda x: x == np.max(var_int)), - ('min', lambda x: x == np.min(var_int)), - ('sum', lambda x: x == np.sum(var_int)), - ('product', lambda x: x == np.prod(var_int)), - ('argmax', lambda x: (x[0] == np.max(var_int) and - var_int[out[1]] == np.max(var_int))), - ('argmin', lambda x: (x[0] == np.min(var_int) and - var_int[out[1]] == np.min(var_int)))] - - for reduction, function in reductions: + from collections import namedtuple + ReductionTest = namedtuple('ReductionTest', 'kind, check, args') + + reductions = [ + ReductionTest('max', lambda x: x == np.max(var_int), args='var[k]'), + ReductionTest('min', lambda x: x == np.min(var_int), args='var[k]'), + ReductionTest('sum', lambda x: x == np.sum(var_int), args='var[k]'), + ReductionTest('product', lambda x: x == np.prod(var_int), args='var[k]'), + ReductionTest('argmax', + lambda x: ( + x[0] == np.max(var_int) and var_int[out[1]] == np.max(var_int)), + args='var[k], k'), + ReductionTest('argmin', + lambda x: ( + x[0] == np.min(var_int) and var_int[out[1]] == np.min(var_int)), + args='var[k], k') + ] + + for reduction, function, args in reductions: kstr = ("out" if 'arg' not in reduction else "out[0], out[1]") - kstr += ' = {0}(k, var[k])'.format(reduction) + kstr += ' = {0}(k, {1})'.format(reduction, args) knl = lp.make_kernel('{[k]: 0<=k Date: Fri, 7 Apr 2017 19:38:33 -0500 Subject: [PATCH 07/15] Fix type inference. --- loopy/type_inference.py | 30 +++++++++++++++--------------- test/test_loopy.py | 7 ++++--- 2 files changed, 19 insertions(+), 18 deletions(-) diff --git a/loopy/type_inference.py b/loopy/type_inference.py index b6aa5d1ad..bd5a230dc 100644 --- a/loopy/type_inference.py +++ b/loopy/type_inference.py @@ -362,28 +362,28 @@ class TypeInferenceMapper(CombineMapper): if isinstance(expr.exprs, tuple): rec_results = [self.rec(sub_expr) for sub_expr in expr.exprs] + if return_tuple: + from itertools import product + rec_results = product(*rec_results) + else: + rec_results = rec_results[0] elif isinstance(expr.exprs, Reduction): - rec_results = [self.rec(expr.exprs, return_tuple=True)] + rec_results = self.rec(expr.exprs, return_tuple=return_tuple) elif isinstance(expr.exprs, Call): - rec_results = [self.map_call(expr.exprs, return_tuple=return_tuple)] + rec_results = self.map_call(expr.exprs, return_tuple=return_tuple) else: raise LoopyError("unknown reduction type: '%s'" % type(expr.exprs).__name__) - if any(len(rec_result) == 0 for rec_result in rec_results): - return [] - - if return_tuple: - from itertools import product - return [expr.operation.result_dtypes(self.kernel, *product_element) - for product_element in product(*rec_results)] - - if len(rec_results) != 1: - raise LoopyError("reductions with more or fewer than one " - "return value may only be used in direct assignments") + if not return_tuple: + if any(isinstance(rec_result, tuple) for rec_result in rec_results): + raise LoopyError("reductions with more or fewer than one " + "return value may only be used in direct assignments") + return [expr.operation.result_dtypes(self.kernel, rec_result)[0] + for rec_result in rec_results] - return [expr.operation.result_dtypes(self.kernel, rec_result)[0] - for rec_result in rec_results[0]] + return [expr.operation.result_dtypes(self.kernel, *rec_result) + for rec_result in rec_results] # }}} diff --git a/test/test_loopy.py b/test/test_loopy.py index b92161ac7..1cd025c99 100644 --- a/test/test_loopy.py +++ b/test/test_loopy.py @@ -1026,7 +1026,7 @@ def test_within_inames_and_reduction(): from pymbolic.primitives import Subscript, Variable i2 = lp.Assignment("a", - lp.Reduction("sum", "j", Subscript(Variable("phi"), Variable("j"))), + lp.Reduction("sum", "j", (Subscript(Variable("phi"), Variable("j")),)), within_inames=frozenset(), within_inames_is_final=True) @@ -2135,8 +2135,9 @@ def test_multi_argument_reduction_type_inference(): t_inf_mapper = TypeInferenceMapper(knl) - print(t_inf_mapper(expr, return_tuple=True)) - 1/0 + assert ( + t_inf_mapper(expr, return_tuple=True, return_dtype_set=True) + == [(int32, int32)]) if __name__ == "__main__": -- GitLab From 38e82d6eb632d9e673cd1de94177161985301827 Mon Sep 17 00:00:00 2001 From: Matt Wala Date: Fri, 7 Apr 2017 19:47:39 -0500 Subject: [PATCH 08/15] Fix line length. --- loopy/type_inference.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/loopy/type_inference.py b/loopy/type_inference.py index bd5a230dc..34e41740e 100644 --- a/loopy/type_inference.py +++ b/loopy/type_inference.py @@ -378,7 +378,8 @@ class TypeInferenceMapper(CombineMapper): if not return_tuple: if any(isinstance(rec_result, tuple) for rec_result in rec_results): raise LoopyError("reductions with more or fewer than one " - "return value may only be used in direct assignments") + "return value may only be used in direct " + "assignments") return [expr.operation.result_dtypes(self.kernel, rec_result)[0] for rec_result in rec_results] -- GitLab From 7913e68ab317a8be5339ed95a1bfc7d7cf4fe7ee Mon Sep 17 00:00:00 2001 From: Matt Wala Date: Fri, 7 Apr 2017 19:51:04 -0500 Subject: [PATCH 09/15] More reduction fixes. --- test/test_target.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/test_target.py b/test/test_target.py index b656383e7..92aaf0f8e 100644 --- a/test/test_target.py +++ b/test/test_target.py @@ -115,8 +115,8 @@ def test_generate_c_snippet(): knl = lp.make_kernel( "{[I, k]: 0<=I Date: Sat, 8 Apr 2017 15:36:16 -0500 Subject: [PATCH 10/15] Revert "More reduction fixes." This reverts commit 7913e68ab317a8be5339ed95a1bfc7d7cf4fe7ee. --- test/test_target.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/test_target.py b/test/test_target.py index 92aaf0f8e..b656383e7 100644 --- a/test/test_target.py +++ b/test/test_target.py @@ -115,8 +115,8 @@ def test_generate_c_snippet(): knl = lp.make_kernel( "{[I, k]: 0<=I Date: Sat, 8 Apr 2017 15:36:59 -0500 Subject: [PATCH 11/15] Avoid treating a single argument tuple type as the same as a scalar type. Now, the argument to a reduction can be either a scalar or a tuple. --- loopy/kernel/instruction.py | 2 +- loopy/library/reduction.py | 4 -- loopy/preprocess.py | 28 +++++-------- loopy/symbolic.py | 74 +++++++++++++++++++--------------- loopy/transform/data.py | 15 ++----- loopy/transform/iname.py | 9 +++-- loopy/transform/precompute.py | 28 +------------ loopy/type_inference.py | 76 +++++++++++++++++------------------ test/test_loopy.py | 18 +++++++-- 9 files changed, 115 insertions(+), 139 deletions(-) diff --git a/loopy/kernel/instruction.py b/loopy/kernel/instruction.py index 85c501929..0d22dbb88 100644 --- a/loopy/kernel/instruction.py +++ b/loopy/kernel/instruction.py @@ -664,7 +664,7 @@ class MultiAssignmentBase(InstructionBase): @memoize_method def reduction_inames(self): def map_reduction(expr, rec): - rec(expr.exprs) + rec(expr.expr) for iname in expr.inames: result.add(iname) diff --git a/loopy/library/reduction.py b/loopy/library/reduction.py index e3c7e6099..7037de994 100644 --- a/loopy/library/reduction.py +++ b/loopy/library/reduction.py @@ -65,10 +65,6 @@ class ReductionOperation(object): def __ne__(self, other): return not self.__eq__(other) - @property - def is_segmented(self): - raise NotImplementedError - @staticmethod def parse_result_type(target, op_type): try: diff --git a/loopy/preprocess.py b/loopy/preprocess.py index 5f62d1a9d..17226b63a 100644 --- a/loopy/preprocess.py +++ b/loopy/preprocess.py @@ -97,7 +97,7 @@ def check_reduction_iname_uniqueness(kernel): iname_to_nonsimultaneous_reduction_count = {} def map_reduction(expr, rec): - rec(expr.exprs) + rec(expr.expr) for iname in expr.inames: iname_to_reduction_count[iname] = ( @@ -567,13 +567,13 @@ def realize_reduction(kernel, insn_id_filter=None, unknown_types_ok=True): reduction_insn_depends_on = set([init_id]) - if not isinstance(expr.exprs, tuple): + if nresults > 1 and not isinstance(expr.expr, tuple): get_args_insn_id = insn_id_gen( "%s_%s_get" % (insn.id, "_".join(expr.inames))) reduction_expr = expand_inner_reduction( id=get_args_insn_id, - expr=expr.exprs, + expr=expr.expr, nresults=nresults, depends_on=insn.depends_on, within_inames=update_insn_iname_deps, @@ -581,7 +581,7 @@ def realize_reduction(kernel, insn_id_filter=None, unknown_types_ok=True): reduction_insn_depends_on.add(get_args_insn_id) else: - reduction_expr = expr.exprs + reduction_expr = expr.expr reduction_insn = make_assignment( id=update_id, @@ -589,7 +589,7 @@ def realize_reduction(kernel, insn_id_filter=None, unknown_types_ok=True): expression=expr.operation( arg_dtypes, _strip_if_scalar(acc_vars, acc_vars), - _strip_if_scalar(acc_vars, reduction_expr)), + reduction_expr), depends_on=frozenset(reduction_insn_depends_on) | insn.depends_on, within_inames=update_insn_iname_deps, within_inames_is_final=insn.within_inames_is_final) @@ -626,14 +626,6 @@ def realize_reduction(kernel, insn_id_filter=None, unknown_types_ok=True): v[iname].lt_set(v[0] + size)).get_basic_sets() return bs - def _make_slab_set_from_range(iname, lbound, ubound): - v = isl.make_zero_and_vars([iname]) - bs, = ( - v[iname].ge_set(v[0] + lbound) - & - v[iname].lt_set(v[0] + ubound)).get_basic_sets() - return bs - def map_reduction_local(expr, rec, nresults, arg_dtypes, reduction_dtypes): red_iname, = expr.inames @@ -719,13 +711,13 @@ def realize_reduction(kernel, insn_id_filter=None, unknown_types_ok=True): transfer_depends_on = set([init_neutral_id, init_id]) - if not isinstance(expr.exprs, tuple): + if nresults > 1 and not isinstance(expr.expr, tuple): get_args_insn_id = insn_id_gen( "%s_%s_get" % (insn.id, red_iname)) reduction_expr = expand_inner_reduction( id=get_args_insn_id, - expr=expr.exprs, + expr=expr.expr, nresults=nresults, depends_on=insn.depends_on, within_inames=( @@ -735,7 +727,7 @@ def realize_reduction(kernel, insn_id_filter=None, unknown_types_ok=True): transfer_depends_on.add(get_args_insn_id) else: - reduction_expr = expr.exprs + reduction_expr = expr.expr transfer_id = insn_id_gen("%s_%s_transfer" % (insn.id, red_iname)) transfer_insn = make_assignment( @@ -748,7 +740,7 @@ def realize_reduction(kernel, insn_id_filter=None, unknown_types_ok=True): _strip_if_scalar( neutral_var_names, tuple(var(nvn) for nvn in neutral_var_names)), - _strip_if_scalar(neutral_var_names, reduction_expr)), + reduction_expr), within_inames=( (outer_insn_inames - frozenset(expr.inames)) | frozenset([red_iname])), @@ -993,8 +985,6 @@ def realize_reduction(kernel, insn_id_filter=None, unknown_types_ok=True): kernel = lp.tag_inames(kernel, new_iname_tags) - print(kernel) - kernel = ( _hackily_ensure_multi_assignment_return_values_are_scoped_private( kernel)) diff --git a/loopy/symbolic.py b/loopy/symbolic.py index 89ac05f70..3a4623166 100644 --- a/loopy/symbolic.py +++ b/loopy/symbolic.py @@ -96,7 +96,7 @@ class IdentityMapperMixin(object): return Reduction( expr.operation, tuple(new_inames), - self.rec(expr.exprs, *args), + self.rec(expr.expr, *args), allow_simultaneous=expr.allow_simultaneous) def map_tagged_variable(self, expr, *args): @@ -145,7 +145,7 @@ class WalkMapper(WalkMapperBase): if not self.visit(expr): return - self.rec(expr.exprs, *args) + self.rec(expr.expr, *args) map_tagged_variable = WalkMapperBase.map_variable @@ -163,7 +163,7 @@ class CallbackMapper(CallbackMapperBase, IdentityMapper): class CombineMapper(CombineMapperBase): def map_reduction(self, expr): - return self.rec(expr.exprs) + return self.rec(expr.expr) map_linear_subscript = CombineMapperBase.map_subscript @@ -195,15 +195,10 @@ class StringifyMapper(StringifyMapperBase): def map_reduction(self, expr, prec): from pymbolic.mapper.stringifier import PREC_NONE - if isinstance(expr.exprs, tuple): - inner_expr = ", ".join(self.rec(e, PREC_NONE) for e in expr.exprs) - else: - inner_expr = self.rec(expr.exprs, PREC_NONE) - return "%sreduce(%s, [%s], %s)" % ( "simul_" if expr.allow_simultaneous else "", expr.operation, ", ".join(expr.inames), - inner_expr) + self.rec(expr.expr, PREC_NONE)) def map_tagged_variable(self, expr, prec): return "%s$%s" % (expr.name, expr.tag) @@ -234,7 +229,7 @@ class UnidirectionalUnifier(UnidirectionalUnifierBase): ): return [] - return self.rec(expr.exprs, other.exprs, unis) + return self.rec(expr.expr, other.expr, unis) def map_tagged_variable(self, expr, other, urecs): new_uni_record = self.unification_record_from_equation( @@ -267,7 +262,7 @@ class DependencyMapper(DependencyMapperBase): self.rec(child, *args) for child in expr.parameters) def map_reduction(self, expr): - deps = self.rec(expr.exprs) + deps = self.rec(expr.expr) return deps - set(p.Variable(iname) for iname in expr.inames) def map_tagged_variable(self, expr): @@ -449,9 +444,10 @@ class Reduction(p.Expression): a list of inames across which reduction on :attr:`expr` is being carried out. - .. attribute:: exprs + .. attribute:: expr - A (tuple-typed) expression which currently may be one of + An expression which may have tuple type. If the expression has tuple + type, it must be one of the following: * a :class:`tuple` of :class:`pymbolic.primitives.Expression`, or * a :class:`loopy.symbolic.Reduction`, or * a substitution rule invocation. @@ -462,9 +458,9 @@ class Reduction(p.Expression): in precisely one reduction, to avoid mis-nesting errors. """ - init_arg_names = ("operation", "inames", "exprs", "allow_simultaneous") + init_arg_names = ("operation", "inames", "expr", "allow_simultaneous") - def __init__(self, operation, inames, exprs, allow_simultaneous=False): + def __init__(self, operation, inames, expr, allow_simultaneous=False): if isinstance(inames, str): inames = tuple(iname.strip() for iname in inames.split(",")) @@ -486,43 +482,48 @@ class Reduction(p.Expression): from loopy.library.reduction import parse_reduction_op operation = parse_reduction_op(operation) - from pymbolic.primitives import Call - if not isinstance(exprs, (tuple, Reduction, Call)): - from loopy.diagnostic import LoopyError - print(exprs) - raise LoopyError( - "reduction argument must be a tuple, reduction, or substitution " - "invocation, got '%s'" % type(exprs).__name__) - from loopy.library.reduction import ReductionOperation assert isinstance(operation, ReductionOperation) + from loopy.diagnostic import LoopyError + + if operation.arg_count > 1: + from pymbolic.primitives import Call + + if not isinstance(expr, (tuple, Reduction, Call)): + raise LoopyError("reduction argument must be one of " + "a tuple, reduction, or substitution rule " + "invocation, got '%s'" % type(expr).__name__) + else: + # Sanity checks + if isinstance(expr, tuple): + raise LoopyError("got a tuple argument to a scalar reduction") + elif isinstance(expr, Reduction) and expr.is_tuple_typed: + raise LoopyError("got a tuple typed argument to a scalar reduction") + self.operation = operation self.inames = inames - self.exprs = exprs + self.expr = expr self.allow_simultaneous = allow_simultaneous def __getinitargs__(self): - return (self.operation, self.inames, self.exprs, self.allow_simultaneous) + return (self.operation, self.inames, self.expr, self.allow_simultaneous) def get_hash(self): - return hash((self.__class__, self.operation, self.inames, self.exprs)) + return hash((self.__class__, self.operation, self.inames, self.expr)) def is_equal(self, other): return (other.__class__ == self.__class__ and other.operation == self.operation and other.inames == self.inames - and other.exprs == self.exprs) + and other.expr == self.expr) def stringifier(self): return StringifyMapper @property - def exprs_stripped_if_scalar(self): - if isinstance(self.exprs, tuple) and len(self.exprs) == 1: - return self.exprs[0] - else: - return self.exprs + def is_tuple_typed(self): + return self.operation.arg_count > 1 @property @memoize_method @@ -966,6 +967,11 @@ class FunctionToPrimitiveMapper(IdentityMapper): processed_inames.append(iname.name) + if len(red_exprs) == 1: + red_exprs = red_exprs[0] + + print("RED EXPRS ARE", red_exprs) + return Reduction(operation, tuple(processed_inames), red_exprs, allow_simultaneous=allow_simultaneous) @@ -991,6 +997,8 @@ class FunctionToPrimitiveMapper(IdentityMapper): raise TypeError("cse takes two arguments") elif name in ["reduce", "simul_reduce"]: + + if len(expr.parameters) >= 3: operation, inames = expr.parameters[:2] red_exprs = expr.parameters[2:] @@ -1413,7 +1421,7 @@ class IndexVariableFinder(CombineMapper): return result def map_reduction(self, expr): - result = self.rec(expr.exprs) + result = self.rec(expr.expr) if not (expr.inames_set & result): raise RuntimeError("reduction '%s' does not depend on " diff --git a/loopy/transform/data.py b/loopy/transform/data.py index ee5ffb6bc..575311b11 100644 --- a/loopy/transform/data.py +++ b/loopy/transform/data.py @@ -683,9 +683,7 @@ def set_temporary_scope(kernel, temp_var_names, scope): # {{{ reduction_arg_to_subst_rule -def reduction_arg_to_subst_rule( - knl, inames, insn_match=None, subst_rule_name=None, - strip_if_scalar=False): +def reduction_arg_to_subst_rule(knl, inames, insn_match=None, subst_rule_name=None): if isinstance(inames, str): inames = [s.strip() for s in inames.split(",")] @@ -697,12 +695,10 @@ def reduction_arg_to_subst_rule( def map_reduction(expr, rec, nresults=1): if frozenset(expr.inames) != inames_set: - rec_result = rec(expr.exprs) - return type(expr)( operation=expr.operation, inames=expr.inames, - exprs=rec_result, + expr=rec(expr.expr), allow_simultaneous=expr.allow_simultaneous) if subst_rule_name is None: @@ -719,10 +715,7 @@ def reduction_arg_to_subst_rule( substs[my_subst_rule_name] = SubstitutionRule( name=my_subst_rule_name, arguments=tuple(inames), - expression=( - expr.exprs_stripped_if_scalar - if strip_if_scalar - else expr.exprs)) + expression=expr.expr) from pymbolic import var iname_vars = [var(iname) for iname in inames] @@ -730,7 +723,7 @@ def reduction_arg_to_subst_rule( return type(expr)( operation=expr.operation, inames=expr.inames, - exprs=var(my_subst_rule_name)(*iname_vars), + expr=var(my_subst_rule_name)(*iname_vars), allow_simultaneous=expr.allow_simultaneous) from loopy.symbolic import ReductionCallbackMapper diff --git a/loopy/transform/iname.py b/loopy/transform/iname.py index 35c127008..ea90abfe2 100644 --- a/loopy/transform/iname.py +++ b/loopy/transform/iname.py @@ -145,7 +145,7 @@ class _InameSplitter(RuleAwareIdentityMapper): from loopy.symbolic import Reduction return Reduction(expr.operation, tuple(new_inames), - self.rec(expr.exprs, expn_state), + self.rec(expr.expr, expn_state), expr.allow_simultaneous) else: return super(_InameSplitter, self).map_reduction(expr, expn_state) @@ -1192,13 +1192,13 @@ class _ReductionSplitter(RuleAwareIdentityMapper): if self.direction == "in": return Reduction(expr.operation, tuple(leftover_inames), Reduction(expr.operation, tuple(self.inames), - self.rec(expr.exprs, expn_state), + self.rec(expr.expr, expn_state), expr.allow_simultaneous), expr.allow_simultaneous) elif self.direction == "out": return Reduction(expr.operation, tuple(self.inames), Reduction(expr.operation, tuple(leftover_inames), - self.rec(expr.exprs, expn_state), + self.rec(expr.expr, expn_state), expr.allow_simultaneous), expr.allow_simultaneous) else: @@ -1592,7 +1592,8 @@ class _ReductionInameUniquifier(RuleAwareIdentityMapper): from loopy.symbolic import Reduction return Reduction(expr.operation, tuple(new_inames), self.rec( - SubstitutionMapper(make_subst_func(subst_dict))(expr.exprs), + SubstitutionMapper(make_subst_func(subst_dict))( + expr.expr), expn_state), expr.allow_simultaneous) else: diff --git a/loopy/transform/precompute.py b/loopy/transform/precompute.py index 7e70f8c77..a19e06ecd 100644 --- a/loopy/transform/precompute.py +++ b/loopy/transform/precompute.py @@ -59,33 +59,9 @@ def storage_axis_exprs(storage_axis_sources, args): return result -# {{{ identity mapper - -class PrecomputeIdentityMapper(RuleAwareIdentityMapper): - - def map_reduction(self, expr, expn_state): - from pymbolic.primitives import Call - new_exprs = self.rec(expr.exprs, expn_state) - - # If the substitution rule was replaced, precompute turned it into a - # scalar, but since reduction only takes tuple types we turn it into a - # tuple here. - if isinstance(expr.exprs, Call) and not isinstance(new_exprs, Call): - new_exprs = (new_exprs,) - - from loopy.symbolic import Reduction - return Reduction( - expr.operation, - expr.inames, - new_exprs, - expr.allow_simultaneous) - -# }}} - - # {{{ gather rule invocations -class RuleInvocationGatherer(PrecomputeIdentityMapper): +class RuleInvocationGatherer(RuleAwareIdentityMapper): def __init__(self, rule_mapping_context, kernel, subst_name, subst_tag, within): super(RuleInvocationGatherer, self).__init__(rule_mapping_context) @@ -155,7 +131,7 @@ class RuleInvocationGatherer(PrecomputeIdentityMapper): # {{{ replace rule invocation -class RuleInvocationReplacer(PrecomputeIdentityMapper): +class RuleInvocationReplacer(RuleAwareIdentityMapper): def __init__(self, rule_mapping_context, subst_name, subst_tag, within, access_descriptors, array_base_map, storage_axis_names, storage_axis_sources, diff --git a/loopy/type_inference.py b/loopy/type_inference.py index 34e41740e..34d3fc5e2 100644 --- a/loopy/type_inference.py +++ b/loopy/type_inference.py @@ -353,38 +353,38 @@ class TypeInferenceMapper(CombineMapper): def map_reduction(self, expr, return_tuple=False): """ - :arg return_tuple: If *True*, treat the type of the reduction expression - as a tuple type. Otherwise, the number of expressions being reduced over - must equal 1, and the type of the first expression is returned. + :arg return_tuple: If *True*, treat the reduction as having tuple type. + Otherwise, if *False*, the reduction must have scalar type. """ from loopy.symbolic import Reduction from pymbolic.primitives import Call - if isinstance(expr.exprs, tuple): - rec_results = [self.rec(sub_expr) for sub_expr in expr.exprs] + if not return_tuple and expr.is_tuple_typed: + raise LoopyError("reductions with more or fewer than one " + "return value may only be used in direct " + "assignments") + + if isinstance(expr.expr, tuple): + rec_results = [self.rec(sub_expr) for sub_expr in expr.expr] + from itertools import product + rec_results = product(*rec_results) + elif isinstance(expr.expr, Reduction): + rec_results = self.rec(expr.expr, return_tuple=return_tuple) + elif isinstance(expr.expr, Call): + rec_results = self.map_call(expr.expr, return_tuple=return_tuple) + else: if return_tuple: - from itertools import product - rec_results = product(*rec_results) + raise LoopyError("unknown reduction type for tuple reduction: '%s'" + % type(expr.expr).__name__) else: - rec_results = rec_results[0] - elif isinstance(expr.exprs, Reduction): - rec_results = self.rec(expr.exprs, return_tuple=return_tuple) - elif isinstance(expr.exprs, Call): - rec_results = self.map_call(expr.exprs, return_tuple=return_tuple) + rec_results = self.rec(expr.expr) + + if return_tuple: + return [expr.operation.result_dtypes(self.kernel, *rec_result) + for rec_result in rec_results] else: - raise LoopyError("unknown reduction type: '%s'" - % type(expr.exprs).__name__) - - if not return_tuple: - if any(isinstance(rec_result, tuple) for rec_result in rec_results): - raise LoopyError("reductions with more or fewer than one " - "return value may only be used in direct " - "assignments") return [expr.operation.result_dtypes(self.kernel, rec_result)[0] - for rec_result in rec_results] - - return [expr.operation.result_dtypes(self.kernel, *rec_result) - for rec_result in rec_results] + for rec_result in rec_results] # }}} @@ -633,29 +633,29 @@ def infer_unknown_types(kernel, expect_completion=False): def infer_arg_and_reduction_dtypes_for_reduction_expression( kernel, expr, unknown_types_ok): - arg_dtypes = [] - type_inf_mapper = TypeInferenceMapper(kernel) import loopy as lp - if isinstance(expr.exprs, tuple): - exprs = expr.exprs - else: - exprs = (expr.exprs,) + if expr.is_tuple_typed: + arg_dtypes_result = type_inf_mapper(expr, return_tuple=True, return_dtype_set=True) - for sub_expr in exprs: + if len(arg_dtypes_result) == 1: + arg_dtypes = arg_dtypes_result[0] + else: + if unknown_types_ok: + arg_dtypes = [lp.auto] * expr.operation.arg_count + else: + raise LoopyError("failed to determine types of accumulators for " + "reduction '%s'" % expr) + else: try: - arg_dtype = type_inf_mapper(sub_expr) + arg_dtypes = [type_inf_mapper(expr)] except DependencyTypeInferenceFailure: if unknown_types_ok: - arg_dtype = lp.auto + arg_dtypes = [lp.auto] else: raise LoopyError("failed to determine type of accumulator for " - "reduction sub-expression '%s'" % sub_expr) - else: - arg_dtype = arg_dtype.with_target(kernel.target) - - arg_dtypes.append(arg_dtype) + "reduction '%s'" % expr) reduction_dtypes = expr.operation.result_dtypes(kernel, *arg_dtypes) reduction_dtypes = tuple( diff --git a/test/test_loopy.py b/test/test_loopy.py index 771960783..b535ec6fe 100644 --- a/test/test_loopy.py +++ b/test/test_loopy.py @@ -1026,7 +1026,7 @@ def test_within_inames_and_reduction(): from pymbolic.primitives import Subscript, Variable i2 = lp.Assignment("a", - lp.Reduction("sum", "j", (Subscript(Variable("phi"), Variable("j")),)), + lp.Reduction("sum", "j", Subscript(Variable("phi"), Variable("j"))), within_inames=frozenset(), within_inames_is_final=True) @@ -2123,14 +2123,18 @@ def test_multi_argument_reduction_type_inference(): from loopy.types import to_loopy_type op = SegmentedSumReductionOperation() - knl = lp.make_kernel("{[i]: 0<=i<10}", "") + knl = lp.make_kernel("{[i,j]: 0<=i<10 and 0<=j Date: Sat, 8 Apr 2017 15:41:14 -0500 Subject: [PATCH 12/15] Flake8 fixes. --- loopy/symbolic.py | 1 - loopy/type_inference.py | 3 ++- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/loopy/symbolic.py b/loopy/symbolic.py index 3a4623166..44be6f9c9 100644 --- a/loopy/symbolic.py +++ b/loopy/symbolic.py @@ -998,7 +998,6 @@ class FunctionToPrimitiveMapper(IdentityMapper): elif name in ["reduce", "simul_reduce"]: - if len(expr.parameters) >= 3: operation, inames = expr.parameters[:2] red_exprs = expr.parameters[2:] diff --git a/loopy/type_inference.py b/loopy/type_inference.py index 34d3fc5e2..b8b0cbcbf 100644 --- a/loopy/type_inference.py +++ b/loopy/type_inference.py @@ -637,7 +637,8 @@ def infer_arg_and_reduction_dtypes_for_reduction_expression( import loopy as lp if expr.is_tuple_typed: - arg_dtypes_result = type_inf_mapper(expr, return_tuple=True, return_dtype_set=True) + arg_dtypes_result = type_inf_mapper( + expr, return_tuple=True, return_dtype_set=True) if len(arg_dtypes_result) == 1: arg_dtypes = arg_dtypes_result[0] -- GitLab From 5250162b129288b78bfead87aa99a9639a8a9280 Mon Sep 17 00:00:00 2001 From: Matt Wala Date: Sat, 8 Apr 2017 15:50:41 -0500 Subject: [PATCH 13/15] Allow function calls as args to tuple typed reductions. --- loopy/symbolic.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/loopy/symbolic.py b/loopy/symbolic.py index 44be6f9c9..0d8543270 100644 --- a/loopy/symbolic.py +++ b/loopy/symbolic.py @@ -450,7 +450,7 @@ class Reduction(p.Expression): type, it must be one of the following: * a :class:`tuple` of :class:`pymbolic.primitives.Expression`, or * a :class:`loopy.symbolic.Reduction`, or - * a substitution rule invocation. + * a function call or substitution rule invocation. .. attribute:: allow_simultaneous @@ -492,8 +492,8 @@ class Reduction(p.Expression): if not isinstance(expr, (tuple, Reduction, Call)): raise LoopyError("reduction argument must be one of " - "a tuple, reduction, or substitution rule " - "invocation, got '%s'" % type(expr).__name__) + "a tuple, reduction, or call; " + "got '%s'" % type(expr).__name__) else: # Sanity checks if isinstance(expr, tuple): -- GitLab From c4872f22669b6b94c9a2f2360bfd4b1bd0b23517 Mon Sep 17 00:00:00 2001 From: Matt Wala Date: Sat, 8 Apr 2017 15:54:24 -0500 Subject: [PATCH 14/15] Remove debugging print. --- loopy/symbolic.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/loopy/symbolic.py b/loopy/symbolic.py index 0d8543270..f1a494f30 100644 --- a/loopy/symbolic.py +++ b/loopy/symbolic.py @@ -970,8 +970,6 @@ class FunctionToPrimitiveMapper(IdentityMapper): if len(red_exprs) == 1: red_exprs = red_exprs[0] - print("RED EXPRS ARE", red_exprs) - return Reduction(operation, tuple(processed_inames), red_exprs, allow_simultaneous=allow_simultaneous) -- GitLab From 76d25a69430b53d6f048cec4d16ba089affaf193 Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Fri, 21 Apr 2017 21:16:37 -0500 Subject: [PATCH 15/15] Generalize argext and segmented reductions to C-like targets --- loopy/library/reduction.py | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/loopy/library/reduction.py b/loopy/library/reduction.py index 7037de994..f9648bde7 100644 --- a/loopy/library/reduction.py +++ b/loopy/library/reduction.py @@ -455,9 +455,9 @@ def parse_reduction_op(name): def reduction_function_mangler(kernel, func_id, arg_dtypes): if isinstance(func_id, ArgExtFunction) and func_id.name == "init": - from loopy.target.opencl import OpenCLTarget - if not isinstance(kernel.target, OpenCLTarget): - raise LoopyError("only OpenCL supported for now") + from loopy.target.opencl import CTarget + if not isinstance(kernel.target, CTarget): + raise LoopyError("%s: only C-like targets supported for now" % func_id) op = func_id.reduction_op @@ -471,9 +471,9 @@ def reduction_function_mangler(kernel, func_id, arg_dtypes): ) elif isinstance(func_id, ArgExtFunction) and func_id.name == "update": - from loopy.target.opencl import OpenCLTarget - if not isinstance(kernel.target, OpenCLTarget): - raise LoopyError("only OpenCL supported for now") + from loopy.target.opencl import CTarget + if not isinstance(kernel.target, CTarget): + raise LoopyError("%s: only C-like targets supported for now" % func_id) op = func_id.reduction_op @@ -491,9 +491,9 @@ def reduction_function_mangler(kernel, func_id, arg_dtypes): ) elif isinstance(func_id, SegmentedFunction) and func_id.name == "init": - from loopy.target.opencl import OpenCLTarget - if not isinstance(kernel.target, OpenCLTarget): - raise LoopyError("only OpenCL supported for now") + from loopy.target.opencl import CTarget + if not isinstance(kernel.target, CTarget): + raise LoopyError("%s: only C-like targets supported for now" % func_id) op = func_id.reduction_op @@ -507,9 +507,9 @@ def reduction_function_mangler(kernel, func_id, arg_dtypes): ) elif isinstance(func_id, SegmentedFunction) and func_id.name == "update": - from loopy.target.opencl import OpenCLTarget - if not isinstance(kernel.target, OpenCLTarget): - raise LoopyError("only OpenCL supported for now") + from loopy.target.opencl import CTarget + if not isinstance(kernel.target, CTarget): + raise LoopyError("%s: only C-like targets supported for now" % func_id) op = func_id.reduction_op -- GitLab