diff --git a/loopy/__init__.py b/loopy/__init__.py index aa29bbe1d25e92194ab782f3aebdfcc762410bfa..451a175835d31d4fd241ce6f3713a63edeecaa98 100644 --- a/loopy/__init__.py +++ b/loopy/__init__.py @@ -61,53 +61,12 @@ def make_kernel(*args, **kwargs): newly_created_vars = set() - # {{{ reduction iname duplication helper function - - def duplicate_reduction_inames(reduction_expr, rec): - duplicate_inames = [iname - for iname, tag in insn.duplicate_inames_and_tags] - - child = rec(reduction_expr.expr) - new_red_inames = [] - did_something = False - - for iname in reduction_expr.inames: - if iname in duplicate_inames: - new_iname = knl.make_unique_var_name(iname+"_"+insn.id, - newly_created_vars) - - old_insn_inames.append(iname) - new_insn_inames.append(new_iname) - newly_created_vars.add(new_iname) - new_red_inames.append(new_iname) - did_something = True - else: - new_red_inames.append(iname) - - if did_something: - from loopy.symbolic import SubstitutionMapper - from pymbolic.mapper.substitutor import make_subst_func - from pymbolic import var - subst_dict = dict( - (old_iname, var(new_iname)) - for old_iname, new_iname in zip( - reduction_expr.inames, new_red_inames)) - subst_map = SubstitutionMapper(make_subst_func(subst_dict)) - - child = subst_map(child) - - for old_iname, new_iname in zip(reduction_expr.inames, new_red_inames): - new_iname_to_tag[new_iname] = insn_dup_iname_to_tag[old_iname] - - from loopy.symbolic import Reduction - return Reduction( - operation=reduction_expr.operation, - inames=tuple(new_red_inames), - expr=child) - - # }}} + from loopy.symbolic import CSESubstitutor + cse_sub = CSESubstitutor(knl.cses) for insn in knl.instructions: + insn = insn.copy(expression=cse_sub(insn.expression)) + # {{{ sanity checking if not set(insn.forced_iname_deps) <= knl.all_inames(): @@ -137,7 +96,7 @@ def make_kernel(*args, **kwargs): reduction_inames = insn.reduction_inames() - duplicate_inames = [iname + inames_to_duplicate = [iname for iname, tag in insn.duplicate_inames_and_tags if iname not in reduction_inames] @@ -146,9 +105,9 @@ def make_kernel(*args, **kwargs): based_on=iname+"_"+insn.id, extra_used_vars= newly_created_vars) - for iname in duplicate_inames] + for iname in inames_to_duplicate] - for old_iname, new_iname in zip(duplicate_inames, new_inames): + for old_iname, new_iname in zip(inames_to_duplicate, new_inames): new_tag = insn_dup_iname_to_tag[old_iname] if new_tag is None: new_tag = AutoFitLocalIndexTag() @@ -157,43 +116,29 @@ def make_kernel(*args, **kwargs): newly_created_vars.update(new_inames) from loopy.isl_helpers import duplicate_axes - new_domain = duplicate_axes(new_domain, duplicate_inames, new_inames) + new_domain = duplicate_axes(new_domain, inames_to_duplicate, new_inames) from loopy.symbolic import SubstitutionMapper from pymbolic.mapper.substitutor import make_subst_func from pymbolic import var old_to_new = dict( (old_iname, var(new_iname)) - for old_iname, new_iname in zip(duplicate_inames, new_inames)) + for old_iname, new_iname in zip(inames_to_duplicate, new_inames)) subst_map = SubstitutionMapper(make_subst_func(old_to_new)) new_expression = subst_map(insn.expression) # }}} - # {{{ duplicate reduction inames - - if len(duplicate_inames) < len(insn.duplicate_inames_and_tags): - # there must've been requests to duplicate reduction inames - old_insn_inames = [] - new_insn_inames = [] - - from loopy.symbolic import ReductionCallbackMapper - new_expression = ( - ReductionCallbackMapper(duplicate_reduction_inames) - (new_expression)) - - from loopy.isl_helpers import duplicate_axes - for old, new in zip(old_insn_inames, new_insn_inames): - new_domain = duplicate_axes(new_domain, [old], [new]) - - # }}} + if len(inames_to_duplicate) < len(insn.duplicate_inames_and_tags): + raise RuntimeError("cannot use [|...] syntax to rename reduction " + "inames") insn = insn.copy( assignee=subst_map(insn.assignee), expression=new_expression, forced_iname_deps=set( old_to_new.get(iname, iname) for iname in insn.forced_iname_deps), - ) + duplicate_inames_and_tags=[]) # }}} @@ -248,7 +193,9 @@ def make_kernel(*args, **kwargs): instructions=new_insns, domain=new_domain, temporary_variables=new_temp_vars, - iname_to_tag=new_iname_to_tag) + iname_to_tag=new_iname_to_tag, + iname_to_tag_requests=[], + cses={}) # }}} diff --git a/loopy/kernel.py b/loopy/kernel.py index 52974a83c359113596b9365d8a3b19c397f49037..23649cb498464a530b9522c63b152d4a05555be5 100644 --- a/loopy/kernel.py +++ b/loopy/kernel.py @@ -446,9 +446,11 @@ class LoopKernel(Record): workgroup axes to ther sizes, e.g. *{0: 16}* forces axis 0 to be length 16. - The following two instance variables are only used until :func:`loopy.kernel.make_kernel` is + The following instance variables are only used until :func:`loopy.kernel.make_kernel` is finished: + :ivar iname_to_tag_requests: + :ivar cses: a mapping from CSE names to tuples (arg_names, expr). """ def __init__(self, device, domain, instructions, args=None, schedule=None, @@ -457,7 +459,7 @@ class LoopKernel(Record): iname_slab_increments={}, temporary_variables={}, local_sizes={}, - iname_to_tag={}, iname_to_tag_requests=None): + iname_to_tag={}, iname_to_tag_requests=None, cses={}): """ :arg domain: a :class:`islpy.BasicSet`, or a string parseable to a basic set by the isl. Example: "{[i,j]: 0<=i < 10 and 0<= j < 9}" @@ -511,21 +513,35 @@ class LoopKernel(Record): return result + # {{{ instruction parser + def parse_if_necessary(insn): from pymbolic import parse if isinstance(insn, Instruction): - return insn - if isinstance(insn, str): - label_dep_match = LABEL_DEP_RE.match(insn) - if label_dep_match is None: - raise RuntimeError("insn parse error") - - groups = label_dep_match.groupdict() - if groups["label"] is not None: - label = groups["label"] - else: - label = "insn" + insns.append(insn) + return + + if not isinstance(insn, str): + raise TypeError("Instructions must be either an Instruction " + "instance or a parseable string. got '%s' instead." + % type(insn)) + + label_dep_match = LABEL_DEP_RE.match(insn) + if label_dep_match is None: + raise RuntimeError("insn parse error") + + groups = label_dep_match.groupdict() + if groups["label"] is not None: + label = groups["label"] + else: + label = "insn" + + lhs = parse(groups["lhs"]) + from loopy.symbolic import FunctionToPrimitiveMapper + rhs = FunctionToPrimitiveMapper()(parse(groups["rhs"])) + + if label.lower() != "cse": if groups["insn_deps"] is not None: insn_deps = set(dep.strip() for dep in groups["insn_deps"].split(",")) else: @@ -550,22 +566,49 @@ class LoopKernel(Record): else: temp_var_type = None - lhs = parse(groups["lhs"]) - from loopy.symbolic import FunctionToPrimitiveMapper - rhs = FunctionToPrimitiveMapper()(parse(groups["rhs"])) + insns.append( + Instruction( + id=self.make_unique_instruction_id(insns, based_on=label), + insn_deps=insn_deps, + forced_iname_deps=forced_iname_deps, + assignee=lhs, expression=rhs, + temp_var_type=temp_var_type, + duplicate_inames_and_tags=duplicate_inames_and_tags)) + else: + if groups["iname_deps_and_tags"] is not None: + raise RuntimeError("CSEs cannot declare iname dependencies") + if groups["insn_deps"] is not None: + raise RuntimeError("CSEs cannot declare instruction dependencies") + if groups["temp_var_type"] is not None: + raise RuntimeError("CSEs cannot declare temporary storage") + + from pymbolic.primitives import Variable, Call + + if isinstance(lhs, Variable): + cse_name = lhs.name + arg_names = [] + elif isinstance(lhs, Call): + if not isinstance(lhs.function, Variable): + raise RuntimeError("Invalid CSE left-hand side") + cse_name = lhs.function.name + arg_names = [] + + for arg in lhs.parameters: + if not isinstance(arg, Variable): + raise RuntimeError("Invalid CSE left-hand side") + arg_names.append(arg.name) + else: + raise RuntimeError("CSEs cannot declare temporary storage") + + cses[cse_name] = (arg_names, rhs) - return Instruction( - id=self.make_unique_instruction_id(insns, based_on=label), - insn_deps=insn_deps, - forced_iname_deps=forced_iname_deps, - assignee=lhs, expression=rhs, - temp_var_type=temp_var_type, - duplicate_inames_and_tags=duplicate_inames_and_tags) + # }}} insns = [] + cses = cses.copy() for insn in instructions: # must construct list one-by-one to facilitate unique id generation - insns.append(parse_if_necessary(insn)) + parse_if_necessary(insn) if len(set(insn.id for insn in insns)) != len(insns): raise RuntimeError("instruction ids do not appear to be unique") @@ -593,7 +636,8 @@ class LoopKernel(Record): temporary_variables=temporary_variables, local_sizes=local_sizes, iname_to_tag=iname_to_tag, - iname_to_tag_requests=iname_to_tag_requests) + iname_to_tag_requests=iname_to_tag_requests, + cses=cses) def make_unique_instruction_id(self, insns=None, based_on="insn", extra_used_ids=set()): if insns is None: diff --git a/loopy/preprocess.py b/loopy/preprocess.py index eacdd2a661265d391cb098f1f1fbdec4461b1097..06807c2e101ab946cd9852c756e0dfeb847f07b6 100644 --- a/loopy/preprocess.py +++ b/loopy/preprocess.py @@ -6,6 +6,76 @@ import pyopencl.characterize as cl_char +# {{{ reduction iname duplication + +def duplicate_reduction_inames(kernel): + + # {{{ helper function + + newly_created_vars = set() + + def duplicate_reduction_inames(reduction_expr, rec): + child = rec(reduction_expr.expr) + new_red_inames = [] + did_something = False + + for iname in reduction_expr.inames: + if iname.startswith("@"): + new_iname = kernel.make_unique_var_name(iname[1:]+"_"+insn.id, + newly_created_vars) + + old_insn_inames.append(iname.lstrip("@")) + new_insn_inames.append(new_iname) + newly_created_vars.add(new_iname) + new_red_inames.append(new_iname) + did_something = True + else: + new_red_inames.append(iname) + + if did_something: + from loopy.symbolic import SubstitutionMapper + from pymbolic.mapper.substitutor import make_subst_func + from pymbolic import var + + old_inames = [iname.lstrip("@") for iname in reduction_expr.inames] + subst_dict = dict( + (old_iname, var(new_iname)) + for old_iname, new_iname in zip( + old_inames, new_red_inames)) + subst_map = SubstitutionMapper(make_subst_func(subst_dict)) + + child = subst_map(child) + + from loopy.symbolic import Reduction + return Reduction( + operation=reduction_expr.operation, + inames=tuple(new_red_inames), + expr=child) + + # }}} + + new_domain = kernel.domain + new_insns = [] + + for insn in kernel.instructions: + old_insn_inames = [] + new_insn_inames = [] + + from loopy.symbolic import ReductionCallbackMapper + new_insns.append(insn.copy( + expression=ReductionCallbackMapper(duplicate_reduction_inames) + (insn.expression))) + + from loopy.isl_helpers import duplicate_axes + for old, new in zip(old_insn_inames, new_insn_inames): + new_domain = duplicate_axes(new_domain, [old], [new]) + + return kernel.copy( + instructions=new_insns, + domain=new_domain) + +# }}} + # {{{ rewrite reduction to imperative form def realize_reduction(kernel): @@ -547,6 +617,7 @@ def adjust_local_temp_var_storage(kernel): def preprocess_kernel(kernel): + kernel = duplicate_reduction_inames(kernel) kernel = realize_reduction(kernel) # {{{ check that all CSEs have been realized diff --git a/loopy/symbolic.py b/loopy/symbolic.py index ba4f05b4f0cb54b9e5f9f3ad61147c2d9c9376b0..83fd8bde3df74a8a81ecedc130d2c80450db09b4 100644 --- a/loopy/symbolic.py +++ b/loopy/symbolic.py @@ -530,7 +530,9 @@ class IndexVariableFinder(CombineMapper): def map_reduction(self, expr): result = self.rec(expr.expr) - if not (set(expr.inames) & result): + + real_inames = set(iname.lstrip("@") for iname in expr.inames) + if not (real_inames & result): raise RuntimeError("reduction '%s' does not depend on " "reduction inames (%s)" % (expr, ",".join(expr.inames))) if self.include_reduction_inames: @@ -543,6 +545,7 @@ class IndexVariableFinder(CombineMapper): # {{{ variable-fetch CSE mapper class VariableFetchCSEMapper(IdentityMapper): + """Turns fetches of a given variable names into CSEs.""" def __init__(self, var_name, cse_tag_getter): self.var_name = var_name self.cse_tag_getter = cse_tag_getter @@ -565,6 +568,47 @@ class VariableFetchCSEMapper(IdentityMapper): # }}} +# {{{ CSE substitutor + +class CSESubstitutor(IdentityMapper): + def __init__(self, cses): + """ + :arg cses: a mapping from CSE names to tuples (arg_names, expr). + """ + self.cses = cses + + def map_variable(self, expr): + if expr.name not in self.cses: + return IdentityMapper.map_variable(self, expr) + + arg_names, cse_expr = self.cse[expr.name] + if len(arg_names) != 0: + raise RuntimeError("CSE '%s' must be invoked with %d arguments" + % (expr.name, len(arg_names))) + + from pymbolic.primitives import CommonSubexpression + return CommonSubexpression(cse_expr, expr.name) + + def map_call(self, expr): + from pymbolic.primitives import Variable, CommonSubexpression + if (not isinstance(expr.function, Variable) + or expr.function.name not in self.cses): + return IdentityMapper.map_variable(self, expr) + + cse_name = expr.function.name + arg_names, cse_expr = self.cses[cse_name] + if len(arg_names) != len(expr.parameters): + raise RuntimeError("CSE '%s' invoked with %d arguments (needs %d)" + % (cse_name, len(arg_names), len(expr.parameters))) + + from pymbolic.mapper.substitutor import make_subst_func + subst_map = SubstitutionMapper(make_subst_func( + dict(zip(arg_names, expr.parameters)))) + + return CommonSubexpression(subst_map(cse_expr), cse_name) + +# }}} + # {{{ prime-adder class PrimeAdder(IdentityMapper):