From 2728158191a0c960866d5e21bcefa9003edb4383 Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner <inform@tiker.net> Date: Wed, 11 May 2016 21:42:32 -0500 Subject: [PATCH] Support functions with multiple return values --- doc/ref_kernel.rst | 28 +++ doc/ref_transform.rst | 2 + loopy/__init__.py | 25 ++- loopy/codegen/instruction.py | 28 ++- loopy/expression.py | 29 ++- loopy/kernel/__init__.py | 34 +++- loopy/kernel/creation.py | 224 +++++++++++++--------- loopy/kernel/data.py | 277 +++++++++++++++++++++++---- loopy/kernel/tools.py | 13 +- loopy/library/function.py | 4 +- loopy/library/reduction.py | 82 ++++---- loopy/maxima.py | 2 +- loopy/preprocess.py | 124 ++++++++---- loopy/schedule.py | 6 +- loopy/statistics.py | 7 +- loopy/symbolic.py | 44 ++++- loopy/target/__init__.py | 3 + loopy/target/c/__init__.py | 64 +++++++ loopy/target/c/codegen/expression.py | 77 ++++---- loopy/target/opencl.py | 24 ++- loopy/target/pyopencl.py | 14 +- loopy/transform/instruction.py | 21 ++ loopy/version.py | 2 +- setup.py | 1 + test/test_loopy.py | 23 +-- 25 files changed, 852 insertions(+), 306 deletions(-) diff --git a/doc/ref_kernel.rst b/doc/ref_kernel.rst index a323fff52..560facd63 100644 --- a/doc/ref_kernel.rst +++ b/doc/ref_kernel.rst @@ -293,6 +293,11 @@ Loopy's expressions are a slight superset of the expressions supported by TODO: Functions TODO: Reductions +Function Call Instructions +^^^^^^^^^^^^^^^^^^^^^^^^^^ + +.. autoclass:: CallInstruction + C Block Instructions ^^^^^^^^^^^^^^^^^^^^ @@ -468,6 +473,8 @@ Targets .. automodule:: loopy.target +.. currentmodule:: loopy + Helper values ------------- @@ -479,6 +486,27 @@ Helper values .. }}} +Libraries: Extending and Interfacing with External Functionality +---------------------------------------------------------------- + +.. _symbols: + +Symbols +^^^^^^^ + +.. _functions: + +Functions +^^^^^^^^^ + +.. autoclass:: CallMangleInfo + +.. _reductions: + +Reductions +^^^^^^^^^^ + + The Kernel Object ----------------- diff --git a/doc/ref_transform.rst b/doc/ref_transform.rst index f92cfbf67..d085c1215 100644 --- a/doc/ref_transform.rst +++ b/doc/ref_transform.rst @@ -68,6 +68,8 @@ Manipulating Instructions .. autofunction:: remove_instructions +.. autofunction:: replace_instruction_ids + .. autofunction:: tag_instructions Registering Library Routines diff --git a/loopy/__init__.py b/loopy/__init__.py index 22022c0f6..424aa522f 100644 --- a/loopy/__init__.py +++ b/loopy/__init__.py @@ -41,9 +41,12 @@ from loopy.kernel.data import ( KernelArgument, ValueArg, GlobalArg, ConstantArg, ImageArg, memory_ordering, memory_scope, VarAtomicity, AtomicInit, AtomicUpdate, - InstructionBase, Assignment, ExpressionInstruction, CInstruction, + InstructionBase, + MultiAssignmentBase, Assignment, ExpressionInstruction, + CallInstruction, CInstruction, temp_var_scope, TemporaryVariable, - SubstitutionRule) + SubstitutionRule, + CallMangleInfo) from loopy.kernel import LoopKernel, kernel_state from loopy.kernel.tools import ( @@ -67,7 +70,9 @@ from loopy.transform.iname import ( from loopy.transform.instruction import ( find_instructions, map_instructions, set_instruction_priority, add_dependency, - remove_instructions, tag_instructions) + remove_instructions, + replace_instruction_ids, + tag_instructions) from loopy.transform.data import ( add_prefetch, change_arg_to_image, tag_data_axes, @@ -131,8 +136,11 @@ __all__ = [ "ValueArg", "GlobalArg", "ConstantArg", "ImageArg", "temp_var_scope", "TemporaryVariable", "SubstitutionRule", + "CallMangleInfo", - "InstructionBase", "Assignment", "ExpressionInstruction", "CInstruction", + "InstructionBase", + "MultiAssignmentBase", "Assignment", "ExpressionInstruction", + "CallInstruction", "CInstruction", "default_function_mangler", "single_arg_function_mangler", @@ -157,7 +165,9 @@ __all__ = [ "find_instructions", "map_instructions", "set_instruction_priority", "add_dependency", - "remove_instructions", "tag_instructions", + "remove_instructions", + "replace_instruction_ids", + "tag_instructions", "extract_subst", "expand_subst", "assignment_to_subst", "find_rules_matching", "find_one_rule_matching", @@ -278,6 +288,11 @@ def register_symbol_manglers(kernel, manglers): def register_function_manglers(kernel, manglers): + """ + :arg manglers: list of functions of signature ``(target, name, arg_dtypes)`` + returning a :class:`loopy.CallMangleInfo`. + :returns: *kernel* with *manglers* registered + """ new_manglers = kernel.function_manglers[:] for m in manglers: if m not in new_manglers: diff --git a/loopy/codegen/instruction.py b/loopy/codegen/instruction.py index d3a7ae42c..7b95f5948 100644 --- a/loopy/codegen/instruction.py +++ b/loopy/codegen/instruction.py @@ -61,10 +61,12 @@ def wrap_in_conditionals(codegen_state, domain, check_inames, required_preds, st def generate_instruction_code(kernel, insn, codegen_state): - from loopy.kernel.data import Assignment, CInstruction + from loopy.kernel.data import Assignment, CallInstruction, CInstruction if isinstance(insn, Assignment): result = generate_expr_instruction_code(kernel, insn, codegen_state) + elif isinstance(insn, CallInstruction): + result = generate_call_code(kernel, insn, codegen_state) elif isinstance(insn, CInstruction): result = generate_c_instruction_code(kernel, insn, codegen_state) else: @@ -218,12 +220,32 @@ def generate_expr_instruction_code(kernel, insn, codegen_state): return result +def generate_call_code(kernel, insn, codegen_state): + # {{{ vectorization handling + + if codegen_state.vectorization_info: + if insn.atomicity: + raise Unvectorizable("function call") + + # }}} + + result = kernel.target.generate_multiple_assignment( + codegen_state, insn) + + # {{{ tracing + + if kernel.options.trace_assignments or kernel.options.trace_assignment_values: + raise NotImplementedError("tracing of multi-output function calls") + + # }}} + + return result + + def generate_c_instruction_code(kernel, insn, codegen_state): if codegen_state.vectorization_info is not None: raise Unvectorizable("C instructions cannot be vectorized") - ecm = codegen_state.expression_to_code_mapper - body = [] from loopy.codegen import POD diff --git a/loopy/expression.py b/loopy/expression.py index 16c09b828..42c54af71 100644 --- a/loopy/expression.py +++ b/loopy/expression.py @@ -199,7 +199,7 @@ class TypeInferenceMapper(CombineMapper): def map_linear_subscript(self, expr): return self.rec(expr.aggregate) - def map_call(self, expr): + def map_call(self, expr, multiple_types_ok=False): from pymbolic.primitives import Variable identifier = expr.function @@ -212,8 +212,15 @@ class TypeInferenceMapper(CombineMapper): arg_dtypes = tuple(self.rec(par) for par in expr.parameters) mangle_result = self.kernel.mangle_function(identifier, arg_dtypes) - if mangle_result is not None: - return mangle_result[0] + if multiple_types_ok: + return mangle_result.result_dtypes + else: + if len(mangle_result.result_dtypes) != 1 and not multiple_types_ok: + raise LoopyError("functions with more or fewer than one " + "return value may only be used in direct assignments") + + if mangle_result is not None: + return mangle_result.result_dtypes[0] raise RuntimeError("no type inference information on " "function '%s'" % identifier) @@ -285,9 +292,19 @@ class TypeInferenceMapper(CombineMapper): def map_local_hw_index(self, expr, *args): return self.kernel.index_dtype - def map_reduction(self, expr): - return expr.operation.result_dtype( - self.kernel.target, self.rec(expr.expr), expr.inames) + def map_reduction(self, expr, multiple_types_ok=False): + result = expr.operation.result_dtypes( + self.kernel, self.rec(expr.expr), expr.inames) + + if multiple_types_ok: + return result + + else: + if len(result) != 1 and not multiple_types_ok: + raise LoopyError("reductions with more or fewer than one " + "return value may only be used in direct assignments") + + return result[0] # }}} diff --git a/loopy/kernel/__init__.py b/loopy/kernel/__init__.py index d7009900b..7baea3243 100644 --- a/loopy/kernel/__init__.py +++ b/loopy/kernel/__init__.py @@ -313,7 +313,35 @@ class LoopKernel(RecordWithoutPickling): for mangler in manglers: mangle_result = mangler(self, identifier, arg_dtypes) if mangle_result is not None: - return mangle_result + from loopy.kernel.data import CallMangleInfo + if isinstance(mangle_result, CallMangleInfo): + assert len(mangle_result.arg_dtypes) == len(arg_dtypes) + return mangle_result + + assert isinstance(mangle_result, tuple) + + from warnings import warn + warn("'%s' returned a tuple instead of a CallMangleInfo instance. " + "This is deprecated." % mangler.__name__, + DeprecationWarning) + + if len(mangle_result) == 2: + result_dtype, target_name = mangle_result + return CallMangleInfo( + target_name=target_name, + result_dtypes=(result_dtype,), + arg_dtypes=None) + + elif len(mangle_result) == 3: + result_dtype, target_name, actual_arg_dtypes = mangle_result + return CallMangleInfo( + target_name=target_name, + result_dtypes=(result_dtype,), + arg_dtypes=actual_arg_dtypes) + + else: + raise ValueError("unexpected size of tuple returned by '%s'" + % mangler.__name__) return None @@ -1027,8 +1055,8 @@ class LoopKernel(RecordWithoutPickling): for dep_id in sorted(insn.depends_on): print_insn(kernel.id_to_insn[dep_id]) - if isinstance(insn, lp.Assignment): - lhs = str(insn.assignee) + if isinstance(insn, lp.MultiAssignmentBase): + lhs = ", ".join(str(a) for a in insn.assignees) rhs = str(insn.expression) trailing = [] elif isinstance(insn, lp.CInstruction): diff --git a/loopy/kernel/creation.py b/loopy/kernel/creation.py index 034c9dd82..9fe0f5b79 100644 --- a/loopy/kernel/creation.py +++ b/loopy/kernel/creation.py @@ -29,7 +29,9 @@ import numpy as np from loopy.tools import intern_frozenset_of_ids from loopy.symbolic import IdentityMapper, WalkMapper from loopy.kernel.data import ( - InstructionBase, Assignment, SubstitutionRule) + InstructionBase, + MultiAssignmentBase, Assignment, + SubstitutionRule) from loopy.diagnostic import LoopyError import islpy as isl from islpy import dim_type @@ -147,7 +149,6 @@ def expand_defines_in_expr(expr, defines): # {{{ parse instructions INSN_RE = re.compile( - "\s*(?:\<(?P<temp_var_type>.*?)\>)?" "\s*(?P<lhs>.+?)\s*(?<!\:)=\s*(?P<rhs>.+?)" "\s*?(?:\{(?P<options>.+)\}\s*)?$" ) @@ -159,7 +160,8 @@ SUBST_RE = re.compile( def parse_insn(insn): """ :return: a tuple ``(insn, inames_to_dup)``, where insn is a - :class:`Assignment` or a :class:`SubstitutionRule` + :class:`Assignment`, a :class:`CallInstruction`, + or a :class:`SubstitutionRule` and *inames_to_dup* is None or a list of tuples `(old, new)`. """ @@ -192,17 +194,69 @@ def parse_insn(insn): "the following error occurred:" % groups["rhs"]) raise - from pymbolic.primitives import Variable, Subscript, Call - if isinstance(lhs, Variable): - assignee_name = lhs.name - elif isinstance(lhs, Subscript): - assignee_name = lhs.aggregate.name - elif isinstance(lhs, Call): - assignee_name = None - assert subst_match is not None - else: - raise LoopyError("left hand side of assignment '%s' must " - "be variable or subscript" % lhs) + from pymbolic.primitives import Variable, Call, Subscript + from loopy.symbolic import TypeAnnotation + + # {{{ deal with subst rules + + if subst_match is not None: + assert insn_match is None + if isinstance(lhs, Variable): + subst_name = lhs.name + arg_names = [] + elif isinstance(lhs, Call): + if not isinstance(lhs.function, Variable): + raise RuntimeError("Invalid substitution rule left-hand side") + subst_name = lhs.function.name + arg_names = [] + + for i, arg in enumerate(lhs.parameters): + if not isinstance(arg, Variable): + raise RuntimeError("Invalid substitution rule " + "left-hand side: %s--arg number %d " + "is not a variable" % (lhs, i)) + arg_names.append(arg.name) + else: + raise RuntimeError("Invalid substitution rule left-hand side") + + return SubstitutionRule( + name=subst_name, + arguments=tuple(arg_names), + expression=rhs), [] + + # }}} + + if not isinstance(lhs, tuple): + lhs = (lhs,) + + temp_var_types = [] + new_lhs = [] + assignee_names = [] + + for lhs_i in lhs: + if isinstance(lhs_i, TypeAnnotation): + if lhs_i.type is None: + temp_var_types.append(lp.auto) + else: + temp_var_types.append(lhs_i.type) + + lhs_i = lhs_i.child + else: + temp_var_types.append(None) + + if isinstance(lhs_i, Variable): + assignee_names.append(lhs_i.name) + elif isinstance(lhs_i, Subscript): + assignee_names.append(lhs_i.aggregate.name) + else: + raise LoopyError("left hand side of assignment '%s' must " + "be variable or subscript" % (lhs_i,)) + + new_lhs.append(lhs_i) + + lhs = tuple(new_lhs) + temp_var_types = tuple(temp_var_types) + del new_lhs if insn_match is not None: depends_on = None @@ -290,6 +344,11 @@ def parse_insn(insn): if tag.strip()) elif opt_key == "atomic": + if len(assignee_names) != 1: + raise LoopyError("atomic operations with more than one " + "left-hand side not supported") + assignee_name, = assignee_names + if opt_value is None: atomicity = atomicity + ( lp.AtomicUpdate(assignee_name),) @@ -302,6 +361,7 @@ def parse_insn(insn): raise LoopyError("atomicity directive not " "understood: %s" % v) + del assignee_name else: raise ValueError( @@ -309,16 +369,7 @@ def parse_insn(insn): "(maybe a missing/extraneous =value?)" % opt_key) - if groups["temp_var_type"] is not None: - if groups["temp_var_type"]: - temp_var_type = np.dtype(groups["temp_var_type"]) - else: - import loopy as lp - temp_var_type = lp.auto - else: - temp_var_type = None - - return Assignment( + kwargs = dict( id=( intern(insn_id) if isinstance(insn_id, str) @@ -330,38 +381,15 @@ def parse_insn(insn): conflicts_with_groups=conflicts_with_groups, forced_iname_deps_is_final=forced_iname_deps_is_final, forced_iname_deps=forced_iname_deps, - assignee=lhs, expression=rhs, - temp_var_type=temp_var_type, - atomicity=atomicity, priority=priority, predicates=predicates, - tags=tags), inames_to_dup + tags=tags, + atomicity=atomicity) - elif subst_match is not None: - from pymbolic.primitives import Variable, Call - - if isinstance(lhs, Variable): - subst_name = lhs.name - arg_names = [] - elif isinstance(lhs, Call): - if not isinstance(lhs.function, Variable): - raise RuntimeError("Invalid substitution rule left-hand side") - subst_name = lhs.function.name - arg_names = [] - - for i, arg in enumerate(lhs.parameters): - if not isinstance(arg, Variable): - raise RuntimeError("Invalid substitution rule " - "left-hand side: %s--arg number %d " - "is not a variable" % (lhs, i)) - arg_names.append(arg.name) - else: - raise RuntimeError("Invalid substitution rule left-hand side") - - return SubstitutionRule( - name=subst_name, - arguments=tuple(arg_names), - expression=rhs), [] + from loopy.kernel.data import make_assignment + return make_assignment( + lhs, rhs, temp_var_types, **kwargs + ), inames_to_dup def parse_if_necessary(insn, defines): @@ -522,13 +550,13 @@ class ArgumentGuesser: self.all_written_names = set() from loopy.symbolic import get_dependencies for insn in instructions: - if isinstance(insn, Assignment): - (assignee_var_name, _), = insn.assignees_and_indices() - self.all_written_names.add(assignee_var_name) - self.all_names.update(get_dependencies( - self.submap(insn.assignee))) - self.all_names.update(get_dependencies( - self.submap(insn.expression))) + if isinstance(insn, MultiAssignmentBase): + for assignee_var_name, _ in insn.assignees_and_indices(): + self.all_written_names.add(assignee_var_name) + self.all_names.update(get_dependencies( + self.submap(insn.assignees))) + self.all_names.update(get_dependencies( + self.submap(insn.expression))) def find_index_rank(self, name): irf = IndexRankFinder(name) @@ -590,10 +618,12 @@ class ArgumentGuesser: temp_var_names = set(six.iterkeys(self.temporary_variables)) for insn in self.instructions: - if isinstance(insn, Assignment): - if insn.temp_var_type is not None: - (assignee_var_name, _), = insn.assignees_and_indices() - temp_var_names.add(assignee_var_name) + if isinstance(insn, MultiAssignmentBase): + for (assignee_var_name, _), temp_var_type in zip( + insn.assignees_and_indices(), + insn.temp_var_types): + if temp_var_type is not None: + temp_var_names.add(assignee_var_name) # }}} @@ -787,7 +817,7 @@ def expand_cses(instructions, cse_prefix="cse_expr"): new_temp_vars = [] for insn in instructions: - if isinstance(insn, Assignment): + if isinstance(insn, MultiAssignmentBase): new_insns.append(insn.copy(expression=cseam(insn.expression))) else: new_insns.append(insn) @@ -806,29 +836,36 @@ def create_temporaries(knl, default_order): import loopy as lp for insn in knl.instructions: - if isinstance(insn, Assignment) \ - and insn.temp_var_type is not None: - (assignee_name, _), = insn.assignees_and_indices() - - if assignee_name in new_temp_vars: - raise RuntimeError("cannot create temporary variable '%s'--" - "already exists" % assignee_name) - if assignee_name in knl.arg_dict: - raise RuntimeError("cannot create temporary variable '%s'--" - "already exists as argument" % assignee_name) - - logger.debug("%s: creating temporary %s" - % (knl.name, assignee_name)) - - new_temp_vars[assignee_name] = lp.TemporaryVariable( - name=assignee_name, - dtype=insn.temp_var_type, - is_local=lp.auto, - base_indices=lp.auto, - shape=lp.auto, - order=default_order) - - insn = insn.copy(temp_var_type=None) + if isinstance(insn, MultiAssignmentBase): + for (assignee_name, _), temp_var_type in zip( + insn.assignees_and_indices(), + insn.temp_var_types): + + if temp_var_type is None: + continue + + if assignee_name in new_temp_vars: + raise RuntimeError("cannot create temporary variable '%s'--" + "already exists" % assignee_name) + if assignee_name in knl.arg_dict: + raise RuntimeError("cannot create temporary variable '%s'--" + "already exists as argument" % assignee_name) + + logger.debug("%s: creating temporary %s" + % (knl.name, assignee_name)) + + new_temp_vars[assignee_name] = lp.TemporaryVariable( + name=assignee_name, + dtype=temp_var_type, + is_local=lp.auto, + base_indices=lp.auto, + shape=lp.auto, + order=default_order) + + if isinstance(insn, Assignment): + insn = insn.copy(temp_var_type=None) + else: + insn = insn.copy(temp_var_types=None) new_insns.append(insn) @@ -932,8 +969,8 @@ def guess_arg_shape_if_requested(kernel, default_order): try: for insn in kernel.instructions: - if isinstance(insn, lp.Assignment): - armap(submap(insn.assignee), kernel.insn_inames(insn)) + if isinstance(insn, lp.MultiAssignmentBase): + armap(submap(insn.assignees), kernel.insn_inames(insn)) armap(submap(insn.expression), kernel.insn_inames(insn)) except TypeError as e: from traceback import print_exc @@ -1133,10 +1170,9 @@ def make_kernel(domains, instructions, kernel_data=["..."], **kwargs): :arg default_offset: 0 or :class:`loopy.auto`. The default value of *offset* in :attr:`GlobalArg` for guessed arguments. Defaults to 0. - :arg function_manglers: list of functions of signature (name, arg_dtypes) - returning a tuple (result_dtype, c_name) - or a tuple (result_dtype, c_name, arg_dtypes), - where c_name is the C-level function to be called. + :arg function_manglers: list of functions of signature + ``(target, name, arg_dtypes)`` + returning a :class:`loopy.CallMangleInfo`. :arg symbol_manglers: list of functions of signature (name) returning a tuple (result_dtype, c_name), where c_name is the C-level symbol to be evaluated. diff --git a/loopy/kernel/data.py b/loopy/kernel/data.py index 30f9f3a10..b477b5fee 100644 --- a/loopy/kernel/data.py +++ b/loopy/kernel/data.py @@ -703,7 +703,7 @@ class InstructionBase(Record): result.append("priority=%d" % self.priority) if self.tags: result.append("tags=%s" % ":".join(self.tags)) - if self.atomicity: + if hasattr(self, "atomicity"): result.append("atomic=%s" % ":".join(str(a) for a in self.atomicity)) return result @@ -930,9 +930,48 @@ class AtomicUpdate(VarAtomicity): # }}} +# {{{ instruction base class: expression rhs + +class MultiAssignmentBase(InstructionBase): + """An assignment instruction with an expression as a right-hand side.""" + + fields = InstructionBase.fields | set(["expression"]) + + @memoize_method + def read_dependency_names(self): + from loopy.symbolic import get_dependencies + result = get_dependencies(self.expression) + for _, subscript in self.assignees_and_indices(): + result = result | get_dependencies(subscript) + + processed_predicates = frozenset( + pred.lstrip("!") for pred in self.predicates) + + result = result | processed_predicates + + return result + + @memoize_method + def reduction_inames(self): + def map_reduction(expr, rec): + rec(expr.expr) + for iname in expr.inames: + result.add(iname) + + from loopy.symbolic import ReductionCallbackMapper + cb_mapper = ReductionCallbackMapper(map_reduction) + + result = set() + cb_mapper(self.expression) + + return result + +# }}} + + # {{{ instruction: assignment -class Assignment(InstructionBase): +class Assignment(MultiAssignmentBase): """ .. attribute:: assignee @@ -983,8 +1022,8 @@ class Assignment(InstructionBase): .. automethod:: __init__ """ - fields = InstructionBase.fields | \ - set("assignee expression temp_var_type atomicity".split()) + fields = MultiAssignmentBase.fields | \ + set("assignee temp_var_type atomicity".split()) def __init__(self, assignee, expression, @@ -1001,7 +1040,7 @@ class Assignment(InstructionBase): priority=0, predicates=frozenset(), insn_deps=None, insn_deps_is_final=None): - InstructionBase.__init__(self, + super(Assignment, self).__init__( id=id, depends_on=depends_on, depends_on_is_final=depends_on_is_final, @@ -1039,35 +1078,6 @@ class Assignment(InstructionBase): # {{{ implement InstructionBase interface - @memoize_method - def read_dependency_names(self): - from loopy.symbolic import get_dependencies - result = get_dependencies(self.expression) - for _, subscript in self.assignees_and_indices(): - result = result | get_dependencies(subscript) - - processed_predicates = frozenset( - pred.lstrip("!") for pred in self.predicates) - - result = result | processed_predicates - - return result - - @memoize_method - def reduction_inames(self): - def map_reduction(expr, rec): - rec(expr.expr) - for iname in expr.inames: - result.add(iname) - - from loopy.symbolic import ReductionCallbackMapper - cb_mapper = ReductionCallbackMapper(map_reduction) - - result = set() - cb_mapper(self.expression) - - return result - @memoize_method def assignees_and_indices(self): return [_get_assignee_and_index(self.assignee)] @@ -1106,6 +1116,18 @@ class Assignment(InstructionBase): else: key_builder.rec(key_hash, getattr(self, field_name)) + # {{{ for interface uniformity with CallInstruction + + @property + def temp_var_types(self): + return (self.temp_var_type,) + + @property + def assignees(self): + return (self.assignee,) + + # }}} + class ExpressionInstruction(Assignment): def __init__(self, *args, **kwargs): @@ -1118,6 +1140,162 @@ class ExpressionInstruction(Assignment): # }}} +# {{{ instruction: function call + +class CallInstruction(MultiAssignmentBase): + """An instruction capturing a function call. Unlike :class:`Assignment`, + this instruction supports functions with multiple return values. + + .. attribute:: assignees + + .. attribute:: expression + + The following attributes are only used until + :func:`loopy.make_kernel` is finished: + + .. attribute:: temp_var_types + + if not *None*, a type that will be assigned to the new temporary variable + created from the assignee + + .. automethod:: __init__ + """ + + fields = MultiAssignmentBase.fields | \ + set("assignees temp_var_types".split()) + + def __init__(self, + assignees, expression, + id=None, + depends_on=None, + depends_on_is_final=None, + groups=None, + conflicts_with_groups=None, + no_sync_with=None, + forced_iname_deps_is_final=None, + forced_iname_deps=frozenset(), + boostable=None, boostable_into=None, tags=None, + temp_var_types=None, + priority=0, predicates=frozenset(), + insn_deps=None, insn_deps_is_final=None): + + super(CallInstruction, self).__init__( + id=id, + depends_on=depends_on, + depends_on_is_final=depends_on_is_final, + groups=groups, + conflicts_with_groups=conflicts_with_groups, + no_sync_with=no_sync_with, + forced_iname_deps_is_final=forced_iname_deps_is_final, + forced_iname_deps=forced_iname_deps, + boostable=boostable, + boostable_into=boostable_into, + priority=priority, + predicates=predicates, + tags=tags, + insn_deps=insn_deps, + insn_deps_is_final=insn_deps_is_final) + + from pymbolic.primitives import Call + from loopy.symbolic import Reduction + if not isinstance(expression, (Call, Reduction)) and expression is not None: + raise LoopyError("'expression' argument to CallInstruction " + "must be a function call") + + from loopy.symbolic import parse + if isinstance(assignees, str): + assignees = parse(assignees) + if isinstance(expression, str): + expression = parse(expression) + + # FIXME: It may be worth it to enable this check eventually. + # For now, it causes grief with certain 'checky' uses of the + # with_transformed_expressions(). (notably the access checker) + # + # from pymbolic.primitives import Variable, Subscript + # if not isinstance(assignee, (Variable, Subscript)): + # raise LoopyError("invalid lvalue '%s'" % assignee) + + self.assignees = assignees + self.expression = expression + self.temp_var_types = temp_var_types + + # {{{ implement InstructionBase interface + + @memoize_method + def assignees_and_indices(self): + return [_get_assignee_and_index(a) for a in self.assignees] + + def with_transformed_expressions(self, f, *args): + return self.copy( + assignees=f(self.assignees, *args), + expression=f(self.expression, *args)) + + # }}} + + def __str__(self): + result = "%s: %s <- %s" % (self.id, + ", ".join(str(a) for a in self.assignees), + self.expression) + + options = self.get_str_options() + if options: + result += " (%s)" % (": ".join(options)) + + if self.predicates: + result += "\n" + 10*" " + "if (%s)" % " && ".join(self.predicates) + return result + + def update_persistent_hash(self, key_hash, key_builder): + """Custom hash computation function for use with + :class:`pytools.persistent_dict.PersistentDict`. + + Only works in conjunction with :class:`loopy.tools.KeyBuilder`. + """ + + # Order matters for hash forming--sort the fields. + for field_name in sorted(self.fields): + if field_name in ["assignees", "expression"]: + key_builder.update_for_pymbolic_expression( + key_hash, getattr(self, field_name)) + else: + key_builder.rec(key_hash, getattr(self, field_name)) + +# }}} + + +def make_assignment(assignees, expression, temp_var_types=None, **kwargs): + if len(assignees) < 1: + raise LoopyError("every instruction must have a left-hand side") + elif len(assignees) > 1: + atomicity = kwargs.pop("atomicity", ()) + if atomicity: + raise LoopyError("atomic operations with more than one " + "left-hand side not supported") + + from pymbolic.primitives import Call + from loopy.symbolic import Reduction + if not isinstance(expression, (Call, Reduction)): + raise LoopyError("right-hand side in multiple assignment must be " + "function call or reduction") + + return CallInstruction( + assignees=assignees, + expression=expression, + temp_var_types=temp_var_types, + **kwargs) + + else: + return Assignment( + assignee=assignees[0], + expression=expression, + temp_var_type=( + temp_var_types[0] + if temp_var_types is not None + else None), + **kwargs) + + # {{{ c instruction class CInstruction(InstructionBase): @@ -1294,4 +1472,35 @@ class CInstruction(InstructionBase): # }}} + +# {{{ function call mangling + +class CallMangleInfo(Record): + """ + .. attribute:: target_name + + A string. The name of the function to be called in the + generated target code. + + .. attribute:: result_dtypes + + A tuple of :class:`LoopyType` instances indicating what + types of values the function returns. + + .. attribute:: arg_dtypes + + A tuple of :class:`LoopyType` instances indicating what + types of arguments the function actually receives. + """ + + def __init__(self, target_name, result_dtypes, arg_dtypes): + assert isinstance(result_dtypes, tuple) + + super(CallMangleInfo, self).__init__( + target_name=target_name, + result_dtypes=result_dtypes, + arg_dtypes=arg_dtypes) + +# }}} + # vim: foldmethod=marker diff --git a/loopy/kernel/tools.py b/loopy/kernel/tools.py index ef3cb3c8a..0a02979ab 100644 --- a/loopy/kernel/tools.py +++ b/loopy/kernel/tools.py @@ -423,11 +423,11 @@ def get_dot_dependency_graph(kernel, iname_cluster=True, use_insn_id=False): dep_graph = {} lines = [] - from loopy.kernel.data import Assignment, CInstruction + from loopy.kernel.data import MultiAssignmentBase, CInstruction for insn in kernel.instructions: - if isinstance(insn, Assignment): - op = "%s <- %s" % (insn.assignee, insn.expression) + if isinstance(insn, MultiAssignmentBase): + op = "%s <- %s" % (insn.assignees, insn.expression) if len(op) > 200: op = op[:200] + "..." @@ -657,8 +657,9 @@ def get_auto_axis_iname_ranking_by_stride(kernel, insn): from pymbolic.primitives import Subscript - if isinstance(insn.assignee, Subscript): - ary_acc_exprs.append(insn.assignee) + for assignee in insn.assignees: + if isinstance(assignee, Subscript): + ary_acc_exprs.append(assignee) # }}} @@ -855,7 +856,7 @@ def assign_automatic_axes(kernel, axis=0, local_size=None): import loopy as lp for insn in kernel.instructions: - if not isinstance(insn, lp.Assignment): + if not isinstance(insn, lp.MultiAssignmentBase): continue auto_axis_inames = [ diff --git a/loopy/library/function.py b/loopy/library/function.py index df623a477..efa590371 100644 --- a/loopy/library/function.py +++ b/loopy/library/function.py @@ -38,7 +38,9 @@ def default_function_mangler(kernel, name, arg_dtypes): def single_arg_function_mangler(kernel, name, arg_dtypes): if len(arg_dtypes) == 1: dtype, = arg_dtypes - return dtype, name + + from loopy.kernel.data import CallMangleInfo + return CallMangleInfo(name, (dtype,), (dtype,)) return None diff --git a/loopy/library/reduction.py b/loopy/library/reduction.py index b39115a35..1540222b2 100644 --- a/loopy/library/reduction.py +++ b/loopy/library/reduction.py @@ -36,7 +36,7 @@ class ReductionOperation(object): equality-comparable. """ - def result_dtype(self, target, arg_dtype, inames): + def result_dtypes(self, target, arg_dtype, inames): raise NotImplementedError def neutral_element(self, dtype, inames): @@ -82,11 +82,12 @@ class ScalarReductionOperation(ReductionOperation): """ self.forced_result_type = forced_result_type - def result_dtype(self, target, arg_dtype, inames): + def result_dtypes(self, kernel, arg_dtype, inames): if self.forced_result_type is not None: - return self.parse_result_type(target, self.forced_result_type) + return self.parse_result_type( + kernel.target, self.forced_result_type) - return arg_dtype + return (arg_dtype,) def __hash__(self): return hash((type(self), self.forced_result_type)) @@ -148,23 +149,12 @@ class MinReductionOperation(ScalarReductionOperation): # {{{ argmin/argmax -ARGEXT_STRUCT_DTYPES = {} - - class _ArgExtremumReductionOperation(ReductionOperation): def prefix(self, dtype): return "loopy_arg%s_%s" % (self.which, dtype.numpy_dtype.type.__name__) - def result_dtype(self, target, dtype, inames): - try: - return ARGEXT_STRUCT_DTYPES[dtype] - except KeyError: - struct_dtype = np.dtype([("value", dtype), ("index", np.int32)]) - ARGEXT_STRUCT_DTYPES[dtype] = NumpyType(struct_dtype, target) - - target.get_or_register_dtype(self.prefix(dtype)+"_result", - NumpyType(struct_dtype)) - return ARGEXT_STRUCT_DTYPES[dtype] + def result_dtypes(self, kernel, dtype, inames): + return (dtype, kernel.index_dtype) def neutral_element(self, dtype, inames): return ArgExtFunction(self, dtype, "init", inames)() @@ -179,7 +169,7 @@ class _ArgExtremumReductionOperation(ReductionOperation): iname, = inames return ArgExtFunction(self, dtype, "update", inames)( - operand1, operand2, var(iname)) + *(operand1 + (operand2, var(iname)))) class ArgMaxReductionOperation(_ArgExtremumReductionOperation): @@ -207,7 +197,7 @@ class ArgExtFunction(FunctionIdentifier): return (self.reduction_op, self.scalar_dtype, self.name, self.inames) -def get_argext_preamble(target, func_id): +def get_argext_preamble(kernel, func_id): op = func_id.reduction_op prefix = op.prefix(func_id.scalar_dtype) @@ -216,35 +206,32 @@ def get_argext_preamble(target, func_id): c_code_mapper = CCodeMapper() return (prefix, """ - typedef struct { - %(scalar_type)s value; - int index; - } %(type_name)s; - - inline %(type_name)s %(prefix)s_init() + inline %(scalar_t)s %(prefix)s_init(%(index_t)s *index_out) { - %(type_name)s result; - result.value = %(neutral)s; - result.index = INT_MIN; - return result; + *index_out = INT_MIN; + return %(neutral)s; } - inline %(type_name)s %(prefix)s_update( - %(type_name)s state, %(scalar_type)s op2, int index) + inline %(scalar_t)s %(prefix)s_update( + %(scalar_t)s op1, %(index_t)s index1, + %(scalar_t)s op2, %(index_t)s index2, + %(index_t)s *index_out) { - %(type_name)s result; - if (op2 %(comp)s state.value) + if (op2 %(comp)s op1) + { + *index_out = index2; + return op2; + } + else { - result.value = op2; - result.index = index; - return result; + *index_out = index1; + return op1; } - else return state; } """ % dict( - type_name=prefix+"_result", - scalar_type=target.dtype_to_typename(func_id.scalar_dtype), + scalar_t=kernel.target.dtype_to_typename(func_id.scalar_dtype), prefix=prefix, + index_t=kernel.target.dtype_to_typename(kernel.index_dtype), neutral=c_code_mapper( op.neutral_sign*get_le_neutral(func_id.scalar_dtype)), comp=op.update_comparison, @@ -308,8 +295,19 @@ def reduction_function_mangler(kernel, func_id, arg_dtypes): raise LoopyError("only OpenCL supported for now") op = func_id.reduction_op - return (op.result_dtype(kernel.target, func_id.scalar_dtype, func_id.inames), - "%s_%s" % (op.prefix(func_id.scalar_dtype), func_id.name)) + + from loopy.kernel.data import CallMangleInfo + return CallMangleInfo( + target_name="%s_%s" % ( + op.prefix(func_id.scalar_dtype), func_id.name), + result_dtypes=op.result_dtypes( + kernel, func_id.scalar_dtype, func_id.inames), + arg_dtypes=( + func_id.scalar_dtype, + kernel.index_dtype, + func_id.scalar_dtype, + kernel.index_dtype), + ) return None @@ -322,6 +320,6 @@ def reduction_preamble_generator(preamble_info): if not isinstance(preamble_info.kernel.target, OpenCLTarget): raise LoopyError("only OpenCL supported for now") - yield get_argext_preamble(preamble_info.kernel.target, func.name) + yield get_argext_preamble(preamble_info.kernel, func.name) # vim: fdm=marker diff --git a/loopy/maxima.py b/loopy/maxima.py index 1a2d0770c..29f974ff9 100644 --- a/loopy/maxima.py +++ b/loopy/maxima.py @@ -82,7 +82,7 @@ def get_loopy_instructions_as_maxima(kernel, prefix): if not isinstance(insn, InstructionBase): insn = kernel.id_to_insn[insn] if not isinstance(insn, Assignment): - raise RuntimeError("non-expression instructions not supported " + raise RuntimeError("non-single-output assignment not supported " "in maxima export") for dep in insn.depends_on: diff --git a/loopy/preprocess.py b/loopy/preprocess.py index e25c1516b..51d588ef5 100644 --- a/loopy/preprocess.py +++ b/loopy/preprocess.py @@ -31,6 +31,7 @@ from loopy.diagnostic import ( from pytools.persistent_dict import PersistentDict from loopy.tools import LoopyKeyBuilder from loopy.version import DATA_MODEL_VERSION +from loopy.kernel.data import make_assignment import logging logger = logging.getLogger(__name__) @@ -123,14 +124,26 @@ def _infer_var_type(kernel, var_name, type_inf_mapper, subst_expander): from loopy.diagnostic import DependencyTypeInferenceFailure for writer_insn_id in kernel.writer_map().get(var_name, []): writer_insn = kernel.id_to_insn[writer_insn_id] - if not isinstance(writer_insn, lp.Assignment): + if not isinstance(writer_insn, lp.MultiAssignmentBase): continue expr = subst_expander(writer_insn.expression) try: debug(" via expr %s" % expr) - result = type_inf_mapper(expr) + if isinstance(writer_insn, lp.Assignment): + result = type_inf_mapper(expr) + elif isinstance(writer_insn, lp.CallInstruction): + result_dtypes = type_inf_mapper(expr, multiple_types_ok=True) + + result = None + for (assignee, _), comp_dtype in zip( + writer_insn.assignees_and_indices(), result_dtypes): + if assignee == var_name: + result = comp_dtype + break + + assert result is not None debug(" result: %s" % result) @@ -438,35 +451,44 @@ def realize_reduction(kernel, insn_id_filter=None): new_insns = [] + insn_id_gen = kernel.get_instruction_id_generator() + var_name_gen = kernel.get_var_name_generator() new_temporary_variables = kernel.temporary_variables.copy() from loopy.expression import TypeInferenceMapper type_inf_mapper = TypeInferenceMapper(kernel) - def map_reduction(expr, rec): + def map_reduction(expr, rec, multiple_values_ok=False): # Only expand one level of reduction at a time, going from outermost to # innermost. Otherwise we get the (iname + insn) dependencies wrong. - from pymbolic import var - - target_var_name = var_name_gen("acc_"+"_".join(expr.inames)) - target_var = var(target_var_name) - try: arg_dtype = type_inf_mapper(expr.expr) except DependencyTypeInferenceFailure: raise LoopyError("failed to determine type of accumulator for " "reduction '%s'" % expr) - from loopy.kernel.data import Assignment, TemporaryVariable + reduction_dtypes = expr.operation.result_dtypes( + kernel, arg_dtype, expr.inames) + + ncomp = len(reduction_dtypes) + + from pymbolic import var + + acc_var_names = [ + var_name_gen("acc_"+"_".join(expr.inames)) + for i in range(ncomp)] + acc_vars = tuple(var(n) for n in acc_var_names) + + from loopy.kernel.data import TemporaryVariable - new_temporary_variables[target_var_name] = TemporaryVariable( - name=target_var_name, - shape=(), - dtype=expr.operation.result_dtype( - kernel.target, arg_dtype, expr.inames), - is_local=False) + for name, dtype in zip(acc_var_names, reduction_dtypes): + new_temporary_variables[name] = TemporaryVariable( + name=name, + shape=(), + dtype=dtype, + is_local=False) outer_insn_inames = temp_kernel.insn_inames(insn) bad_inames = frozenset(expr.inames) & outer_insn_inames @@ -474,13 +496,12 @@ def realize_reduction(kernel, insn_id_filter=None): raise LoopyError("reduction used within loop(s) that it was " "supposed to reduce over: " + ", ".join(bad_inames)) - init_id = temp_kernel.make_unique_instruction_id( - based_on="%s_%s_init" % (insn.id, "_".join(expr.inames)), - extra_used_ids=set(i.id for i in generated_insns)) + init_id = insn_id_gen( + "%s_%s_init" % (insn.id, "_".join(expr.inames))) - init_insn = Assignment( + init_insn = make_assignment( id=init_id, - assignee=target_var, + assignees=acc_vars, forced_iname_deps=outer_insn_inames - frozenset(expr.inames), forced_iname_deps_is_final=insn.forced_iname_deps_is_final, depends_on=frozenset(), @@ -488,19 +509,20 @@ def realize_reduction(kernel, insn_id_filter=None): generated_insns.append(init_insn) - update_id = temp_kernel.make_unique_instruction_id( - based_on="%s_%s_update" % (insn.id, "_".join(expr.inames)), - extra_used_ids=set(i.id for i in generated_insns)) + update_id = insn_id_gen( + based_on="%s_%s_update" % (insn.id, "_".join(expr.inames))) update_insn_iname_deps = temp_kernel.insn_inames(insn) | set(expr.inames) if insn.forced_iname_deps_is_final: update_insn_iname_deps = insn.forced_iname_deps | set(expr.inames) - reduction_insn = Assignment( + reduction_insn = make_assignment( id=update_id, - assignee=target_var, + assignees=acc_vars, expression=expr.operation( - arg_dtype, target_var, expr.expr, expr.inames), + arg_dtype, + acc_vars if len(acc_vars) > 1 else acc_vars[0], + expr.expr, expr.inames), depends_on=frozenset([init_insn.id]) | insn.depends_on, forced_iname_deps=update_insn_iname_deps, forced_iname_deps_is_final=insn.forced_iname_deps_is_final) @@ -509,12 +531,17 @@ def realize_reduction(kernel, insn_id_filter=None): new_insn_depends_on.add(reduction_insn.id) - return target_var + if multiple_values_ok: + return acc_vars + else: + assert len(acc_vars) == 1 + return acc_vars[0] from loopy.symbolic import ReductionCallbackMapper cb_mapper = ReductionCallbackMapper(map_reduction) insn_queue = kernel.instructions[:] + insn_id_replacements = {} temp_kernel = kernel @@ -526,24 +553,47 @@ def realize_reduction(kernel, insn_id_filter=None): insn = insn_queue.pop(0) if insn_id_filter is not None and insn.id != insn_id_filter \ - or not isinstance(insn, lp.Assignment): + or not isinstance(insn, lp.MultiAssignmentBase): new_insns.append(insn) continue # Run reduction expansion. - new_expression = cb_mapper(insn.expression) + from loopy.symbolic import Reduction + if isinstance(insn.expression, Reduction): + new_expressions = cb_mapper(insn.expression, multiple_values_ok=True) + else: + new_expressions = (cb_mapper(insn.expression),) if generated_insns: # An expansion happened, so insert the generated stuff plus # ourselves back into the queue. - insn = insn.copy( - expression=new_expression, - depends_on=insn.depends_on - | frozenset(new_insn_depends_on), - forced_iname_deps=temp_kernel.insn_inames(insn)) + kwargs = insn.get_copy_kwargs( + depends_on=insn.depends_on + | frozenset(new_insn_depends_on), + forced_iname_deps=temp_kernel.insn_inames(insn)) + + kwargs.pop("id") + kwargs.pop("expression") + kwargs.pop("assignee", None) + kwargs.pop("assignees", None) + kwargs.pop("temp_var_type", None) + kwargs.pop("temp_var_types", None) - insn_queue = generated_insns + [insn] + insn_queue + replacement_insns = [ + lp.Assignment( + id=insn_id_gen(insn.id), + assignee=assignee, + expression=new_expr, + **kwargs) + for assignee, new_expr in zip(insn.assignees, new_expressions)] + + insn_id_replacements[insn.id] = [ + rinsn.id for rinsn in replacement_insns] + + # FIXME: Track dep rename + + insn_queue = generated_insns + replacement_insns + insn_queue # The reduction expander needs an up-to-date kernel # object to find dependencies. Keep temp_kernel up-to-date. @@ -558,10 +608,12 @@ def realize_reduction(kernel, insn_id_filter=None): new_insns.append(insn) - return kernel.copy( + kernel = kernel.copy( instructions=new_insns, temporary_variables=new_temporary_variables) + return lp.replace_instruction_ids(kernel, insn_id_replacements) + # }}} diff --git a/loopy/schedule.py b/loopy/schedule.py index b606ba360..8094995d7 100644 --- a/loopy/schedule.py +++ b/loopy/schedule.py @@ -347,7 +347,7 @@ def format_insn(kernel, insn_id): Style = kernel.options._style return "[%s] %s%s%s <- %s%s%s" % ( format_insn_id(kernel, insn_id), - Fore.BLUE, str(insn.assignee), Style.RESET_ALL, + Fore.BLUE, ", ".join(str(a) for a in insn.assignees), Style.RESET_ALL, Fore.MAGENTA, str(insn.expression), Style.RESET_ALL) @@ -355,7 +355,7 @@ def dump_schedule(kernel, schedule): lines = [] indent = "" - from loopy.kernel.data import Assignment + from loopy.kernel.data import MultiAssignmentBase for sched_item in schedule: if isinstance(sched_item, EnterLoop): lines.append(indent + "LOOP %s" % sched_item.iname) @@ -365,7 +365,7 @@ def dump_schedule(kernel, schedule): lines.append(indent + "ENDLOOP %s" % sched_item.iname) elif isinstance(sched_item, RunInstruction): insn = kernel.id_to_insn[sched_item.insn_id] - if isinstance(insn, Assignment): + if isinstance(insn, MultiAssignmentBase): insn_str = format_insn(kernel, sched_item.insn_id) else: insn_str = sched_item.insn_id diff --git a/loopy/statistics.py b/loopy/statistics.py index c5eb3142d..b0e5ad701 100755 --- a/loopy/statistics.py +++ b/loopy/statistics.py @@ -31,7 +31,7 @@ import islpy as isl from pytools import memoize_in from pymbolic.mapper import CombineMapper from functools import reduce -from loopy.kernel.data import Assignment +from loopy.kernel.data import MultiAssignmentBase from loopy.diagnostic import warn @@ -849,7 +849,7 @@ def gather_access_footprints(kernel): read_footprints = [] for insn in kernel.instructions: - if not isinstance(insn, Assignment): + if not isinstance(insn, MultiAssignmentBase): warn(kernel, "count_non_assignment", "Non-assignment instruction encountered in " "gather_access_footprints, not counted") @@ -861,7 +861,8 @@ def gather_access_footprints(kernel): afg = AccessFootprintGatherer(kernel, domain) - write_footprints.append(afg(insn.assignee)) + for assignee in insn.assignees: + write_footprints.append(afg(insn.assignees)) read_footprints.append(afg(insn.expression)) write_footprints = AccessFootprintGatherer.combine(write_footprints) diff --git a/loopy/symbolic.py b/loopy/symbolic.py index b887c7034..219d66d49 100644 --- a/loopy/symbolic.py +++ b/loopy/symbolic.py @@ -33,7 +33,7 @@ from pytools import memoize, memoize_method, Record import pytools.lex from pymbolic.primitives import ( - Leaf, AlgebraicLeaf, Variable, + Leaf, AlgebraicLeaf, Expression, Variable, CommonSubexpression) from pymbolic.mapper import ( @@ -98,6 +98,9 @@ class IdentityMapperMixin(object): # leaf, doesn't change return expr + def map_type_annotation(self, expr, *args): + return TypeAnnotation(expr.type, self.rec(expr.child)) + map_linear_subscript = IdentityMapperBase.map_subscript @@ -321,6 +324,18 @@ class TypedCSE(CommonSubexpression): return dict(dtype=self.dtype) +class TypeAnnotation(Expression): + def __init__(self, type, child): + super(TypeAnnotation, self).__init__() + self.type = type + self.child = child + + def __getinitargs__(self): + return (self.type, self.child) + + mapper_method = intern("map_type_annotation") + + class TaggedVariable(Variable): """This is an identifier with a tag, such as 'matrix$one', where 'one' identifies this specific use of the identifier. This mechanism @@ -882,7 +897,6 @@ class FunctionToPrimitiveMapper(IdentityMapper): # {{{ customization to pymbolic parser _open_dbl_bracket = intern("open_dbl_bracket") -_close_dbl_bracket = intern("close_dbl_bracket") TRAILING_FLOAT_TAG_RE = re.compile("^(.*?)([a-zA-Z]*)$") @@ -908,6 +922,26 @@ class LoopyParser(ParserBase): else: return float(val) # generic float + def parse_prefix(self, pstate): + from pymbolic.parser import _PREC_UNARY, _less, _greater, _identifier + if pstate.is_next(_less): + pstate.advance() + if pstate.is_next(_greater): + typename = None + pstate.advance() + else: + pstate.expect(_identifier) + typename = pstate.next_str() + pstate.advance() + pstate.expect(_greater) + pstate.advance() + + return TypeAnnotation( + typename, + self.parse_expression(pstate, _PREC_UNARY)) + else: + return super(LoopyParser, self).parse_prefix(pstate) + def parse_postfix(self, pstate, min_precedence, left_exp): from pymbolic.parser import _PREC_CALL, _closebracket if pstate.next_tag() is _open_dbl_bracket and _PREC_CALL > min_precedence: @@ -1079,10 +1113,10 @@ class ReductionCallbackMapper(IdentityMapper): def __init__(self, callback): self.callback = callback - def map_reduction(self, expr): - result = self.callback(expr, self.rec) + def map_reduction(self, expr, **kwargs): + result = self.callback(expr, self.rec, **kwargs) if result is None: - return IdentityMapper.map_reduction(self, expr) + return IdentityMapper.map_reduction(self, expr, **kwargs) return result # }}} diff --git a/loopy/target/__init__.py b/loopy/target/__init__.py index 42a9e5a3d..cad451c2e 100644 --- a/loopy/target/__init__.py +++ b/loopy/target/__init__.py @@ -133,6 +133,9 @@ class TargetBase(object): def get_image_arg_decl(self, name, shape, num_target_axes, dtype, is_written): raise NotImplementedError() + def generate_multiple_assignment(self, codegen_state, insn): + raise NotImplementedError() + def generate_atomic_update(self, kernel, codegen_state, lhs_atomicity, lhs_var, lhs_expr, rhs_expr, lhs_dtype): raise NotImplementedError("atomic update in target %s" % type(self).__name__) diff --git a/loopy/target/c/__init__.py b/loopy/target/c/__init__.py index 822ee1838..c6f956253 100644 --- a/loopy/target/c/__init__.py +++ b/loopy/target/c/__init__.py @@ -341,6 +341,70 @@ class CTarget(TargetBase): "++%s" % iname, inner) + def generate_multiple_assignment(self, codegen_state, insn): + ecm = codegen_state.expression_to_code_mapper + + from pymbolic.primitives import Variable + from pymbolic.mapper.stringifier import PREC_NONE + + func_id = insn.expression.function + parameters = insn.expression.parameters + + if isinstance(func_id, Variable): + func_id = func_id.name + + assignee_var_descriptors = [codegen_state.kernel.get_var_descriptor(a) + for a, _ in insn.assignees_and_indices()] + + par_dtypes = tuple(ecm.infer_type(par) for par in parameters) + + str_parameters = None + + mangle_result = codegen_state.kernel.mangle_function(func_id, par_dtypes) + if mangle_result is None: + raise RuntimeError("function '%s' unknown--" + "maybe you need to register a function mangler?" + % func_id) + + assert mangle_result.arg_dtypes is not None + + from loopy.expression import dtype_to_type_context + str_parameters = [ + ecm(par, PREC_NONE, + dtype_to_type_context(self, tgt_dtype), + tgt_dtype) + for par, par_dtype, tgt_dtype in zip( + parameters, par_dtypes, mangle_result.arg_dtypes)] + + from loopy.codegen import SeenFunction + codegen_state.seen_functions.add( + SeenFunction(func_id, + mangle_result.target_name, + mangle_result.arg_dtypes)) + + for a, tgt_dtype in zip(insn.assignees[1:], mangle_result.result_dtypes[1:]): + if tgt_dtype != ecm.infer_type(a): + raise LoopyError("type mismatch in %d'th (1-based) left-hand " + "side of instruction '%s'" % (insn.id)) + str_parameters.append( + "&(%s)" % ecm(a, PREC_NONE, + dtype_to_type_context(self, tgt_dtype), + tgt_dtype)) + + result = "%s(%s)" % (mangle_result.target_name, ", ".join(str_parameters)) + + result = ecm.wrap_in_typecast( + mangle_result.result_dtypes[0], + assignee_var_descriptors[0].dtype, + result) + + lhs_code = ecm(insn.assignees[0], prec=PREC_NONE, type_context=None) + + from cgen import Assign + return Assign( + lhs_code, + result) + # }}} # vim: foldmethod=marker diff --git a/loopy/target/c/codegen/expression.py b/loopy/target/c/codegen/expression.py index 63a053c58..11686dcdc 100644 --- a/loopy/target/c/codegen/expression.py +++ b/loopy/target/c/codegen/expression.py @@ -34,7 +34,7 @@ import islpy as isl from loopy.expression import dtype_to_type_context, TypeInferenceMapper -from loopy.diagnostic import LoopyError +from loopy.diagnostic import LoopyError, LoopyWarning from loopy.tools import is_integer from loopy.types import LoopyType @@ -94,22 +94,23 @@ class LoopyCCodeMapper(RecursiveMapper): return ary - def rec(self, expr, prec, type_context=None, needed_dtype=None): - if needed_dtype is None: - return RecursiveMapper.rec(self, expr, prec, type_context) - - actual_type = self.infer_type(expr) - + def wrap_in_typecast(self, actual_type, needed_dtype, s): if (actual_type.is_complex() and needed_dtype.is_complex() and actual_type != needed_dtype): - result = RecursiveMapper.rec(self, expr, PREC_NONE, type_context) - return "%s_cast(%s)" % (self.complex_type_name(needed_dtype), result) + return "%s_cast(%s)" % (self.complex_type_name(needed_dtype), s) elif not actual_type.is_complex() and needed_dtype.is_complex(): - result = RecursiveMapper.rec(self, expr, PREC_NONE, type_context) - return "%s_fromreal(%s)" % (self.complex_type_name(needed_dtype), result) + return "%s_fromreal(%s)" % (self.complex_type_name(needed_dtype), s) else: + return s + + def rec(self, expr, prec, type_context=None, needed_dtype=None): + if needed_dtype is None: return RecursiveMapper.rec(self, expr, prec, type_context) + return self.wrap_in_typecast( + self.infer_type(expr), needed_dtype, + RecursiveMapper.rec(self, expr, PREC_NONE, type_context)) + __call__ = rec # }}} @@ -419,37 +420,32 @@ class LoopyCCodeMapper(RecursiveMapper): # }}} - c_name = None if isinstance(identifier, Variable): identifier = identifier.name - c_name = identifier par_dtypes = tuple(self.infer_type(par) for par in expr.parameters) str_parameters = None mangle_result = self.kernel.mangle_function(identifier, par_dtypes) - if mangle_result is not None: - if len(mangle_result) == 2: - result_dtype, c_name = mangle_result - elif len(mangle_result) == 3: - result_dtype, c_name, arg_tgt_dtypes = mangle_result - - str_parameters = [ - self.rec(par, PREC_NONE, - dtype_to_type_context(self.kernel.target, tgt_dtype), - tgt_dtype) - for par, par_dtype, tgt_dtype in zip( - expr.parameters, par_dtypes, arg_tgt_dtypes)] - else: - raise RuntimeError("result of function mangler " - "for function '%s' not understood" - % identifier) + if mangle_result is None: + raise RuntimeError("function '%s' unknown--" + "maybe you need to register a function mangler?" + % identifier) - from loopy.codegen import SeenFunction - self.codegen_state.seen_functions.add( - SeenFunction(identifier, c_name, par_dtypes)) - if str_parameters is None: + if len(mangle_result.result_dtypes) != 1: + raise LoopyError("functions with more or fewer than one return value " + "may not be used in an expression") + + if mangle_result.arg_dtypes is not None: + str_parameters = [ + self.rec(par, PREC_NONE, + dtype_to_type_context(self.kernel.target, tgt_dtype), + tgt_dtype) + for par, par_dtype, tgt_dtype in zip( + expr.parameters, par_dtypes, mangle_result.arg_dtypes)] + + else: # /!\ FIXME For some functions (e.g. 'sin'), it makes sense to # propagate the type context here. But for many others, it does # not. Using the inferred type as a stopgap for now. @@ -459,11 +455,18 @@ class LoopyCCodeMapper(RecursiveMapper): self.kernel.target, par_dtype)) for par, par_dtype in zip(expr.parameters, par_dtypes)] - if c_name is None: - raise RuntimeError("unable to find C name for function identifier '%s'" - % identifier) + from warnings import warn + warn("Calling function '%s' with unknown C signature--" + "return CallMangleInfo.arg_dtypes" + % identifier, LoopyWarning) + + from loopy.codegen import SeenFunction + self.codegen_state.seen_functions.add( + SeenFunction(identifier, + mangle_result.target_name, + mangle_result.arg_dtypes or par_dtypes)) - return "%s(%s)" % (c_name, ", ".join(str_parameters)) + return "%s(%s)" % (mangle_result.target_name, ", ".join(str_parameters)) # {{{ deal with complex-valued variables diff --git a/loopy/target/opencl.py b/loopy/target/opencl.py index 5cb6fbc19..362cbb79a 100644 --- a/loopy/target/opencl.py +++ b/loopy/target/opencl.py @@ -32,7 +32,7 @@ from pytools import memoize_method from loopy.diagnostic import LoopyError from loopy.types import NumpyType from loopy.target.c import DTypeRegistryWrapper -from loopy.kernel.data import temp_var_scope +from loopy.kernel.data import temp_var_scope, CallMangleInfo # {{{ dtype registry wrappers @@ -146,7 +146,7 @@ def opencl_function_mangler(kernel, name, arg_dtypes): if not isinstance(name, str): return None - if name in ["max", "min"] and len(arg_dtypes) == 2: + if name in ["max", "min", "atan2"] and len(arg_dtypes) == 2: dtype = np.find_common_type( [], [dtype.numpy_dtype for dtype in arg_dtypes]) @@ -156,14 +156,18 @@ def opencl_function_mangler(kernel, name, arg_dtypes): if dtype.kind == "f": name = "f" + name - return NumpyType(dtype), name - - if name in "atan2" and len(arg_dtypes) == 2: - return arg_dtypes[0], name + result_dtype = NumpyType(dtype) + return CallMangleInfo( + target_name=name, + result_dtypes=(result_dtype,), + arg_dtypes=2*(result_dtype,)) if name == "dot": scalar_dtype, offset, field_name = arg_dtypes[0].numpy_dtype.fields["s0"] - return NumpyType(scalar_dtype), name + return CallMangleInfo( + target_name=name, + result_dtypes=(NumpyType(scalar_dtype),), + arg_dtypes=(arg_dtypes[0],)*2) if name in _CL_SIMPLE_MULTI_ARG_FUNCTIONS: num_args = _CL_SIMPLE_MULTI_ARG_FUNCTIONS[name] @@ -178,7 +182,11 @@ def opencl_function_mangler(kernel, name, arg_dtypes): raise LoopyError("%s does not support complex numbers" % name) - return NumpyType(dtype), name + result_dtype = NumpyType(dtype) + return CallMangleInfo( + target_name=name, + result_dtypes=(result_dtype,), + arg_dtypes=(result_dtype,)*3) return None diff --git a/loopy/target/pyopencl.py b/loopy/target/pyopencl.py index aea247397..6097cd7a2 100644 --- a/loopy/target/pyopencl.py +++ b/loopy/target/pyopencl.py @@ -29,6 +29,7 @@ from six.moves import range import numpy as np +from loopy.kernel.data import CallMangleInfo from loopy.target.opencl import OpenCLTarget from loopy.types import NumpyType @@ -194,13 +195,18 @@ def pyopencl_function_mangler(target, name, arg_dtypes): "sin", "cos", "tan", "sinh", "cosh", "tanh", "conj"]: - return arg_dtype, "%s_%s" % (tpname, name) + return CallMangleInfo( + target_name="%s_%s" % (tpname, name), + result_dtypes=(arg_dtype,), + arg_dtypes=(arg_dtype,)) if name in ["real", "imag", "abs"]: - return ( - NumpyType( + return CallMangleInfo( + target_name="%s_%s" % (tpname, name), + result_dtypes=(NumpyType( np.dtype(arg_dtype.numpy_dtype.type(0).real)), - "%s_%s" % (tpname, name)) + ), + arg_dtypes=(arg_dtype,)) return None diff --git a/loopy/transform/instruction.py b/loopy/transform/instruction.py index 25634c919..8b2fa370a 100644 --- a/loopy/transform/instruction.py +++ b/loopy/transform/instruction.py @@ -124,6 +124,27 @@ def remove_instructions(kernel, insn_ids): return kernel.copy( instructions=new_insns) + +def replace_instruction_ids(kernel, replacements): + new_insns = [] + + for insn in kernel.instructions: + changed = False + new_depends_on = [] + + for dep in insn.depends_on: + if dep in replacements: + new_depends_on.extend(replacements[dep]) + changed = True + else: + new_depends_on.append(dep) + + new_insns.append( + insn.copy(depends_on=frozenset(new_depends_on)) + if changed else insn) + + return kernel.copy(instructions=new_insns) + # }}} diff --git a/loopy/version.py b/loopy/version.py index cd9f45ac3..7716feea3 100644 --- a/loopy/version.py +++ b/loopy/version.py @@ -32,4 +32,4 @@ except ImportError: else: _islpy_version = islpy.version.VERSION_TEXT -DATA_MODEL_VERSION = "v25-islpy%s" % _islpy_version +DATA_MODEL_VERSION = "v26-islpy%s" % _islpy_version diff --git a/setup.py b/setup.py index 5ed095315..30d6dfb63 100644 --- a/setup.py +++ b/setup.py @@ -43,6 +43,7 @@ setup(name="loo.py", "islpy>=2016.1.2", "six>=1.8.0", "colorama", + "Mako", ], extras_require={ diff --git a/test/test_loopy.py b/test/test_loopy.py index f7ef0db33..7474b5128 100644 --- a/test/test_loopy.py +++ b/test/test_loopy.py @@ -262,7 +262,7 @@ def test_join_inames(ctx_factory): knl = lp.add_prefetch(knl, "a", sweep_inames=["i", "j"]) knl = lp.join_inames(knl, ["a_dim_0", "a_dim_1"]) - lp.auto_test_vs_ref(ref_knl, ctx, knl) + lp.auto_test_vs_ref(ref_knl, ctx, knl, print_ref_code=True) def test_divisibility_assumption(ctx_factory): @@ -439,26 +439,21 @@ def test_argmax(ctx_factory): dtype = np.dtype(np.float32) ctx = ctx_factory() queue = cl.CommandQueue(ctx) - order = "C" n = 10000 knl = lp.make_kernel( "{[i]: 0<=i<%d}" % n, - [ - "<> result = argmax(i, fabs(a[i]))", - "max_idx = result.index", - "max_val = result.value", - ], - [ - lp.GlobalArg("a", dtype, shape=(n,), order=order), - lp.GlobalArg("max_idx", np.int32, shape=(), order=order), - lp.GlobalArg("max_val", dtype, shape=(), order=order), - ]) + """ + max_val, max_idx = argmax(i, fabs(a[i])) + """) + + knl = lp.add_and_infer_dtypes(knl, {"a": np.float32}) + print(lp.preprocess_kernel(knl)) + knl = lp.set_options(knl, write_cl=True, highlight_cl=True) a = np.random.randn(10000).astype(dtype) - cknl = lp.CompiledKernel(ctx, knl) - evt, (max_idx, max_val) = cknl(queue, a=a, out_host=True) + evt, (max_idx, max_val) = knl(queue, a=a, out_host=True) assert max_val == np.max(np.abs(a)) assert max_idx == np.where(np.abs(a) == max_val)[-1] -- GitLab