diff --git a/MEMO b/MEMO index dd00893d8c7085ccd78cdd6adb103eed194f0059..07db1b9a910b3596da8fd4df751cc9b1e19554d7 100644 --- a/MEMO +++ b/MEMO @@ -86,6 +86,10 @@ Future ideas Dealt with ^^^^^^^^^^ +- Restrict-to-sequential and tagging have nothing to do with each other. + -> Removed SequentialTag and turned it into a separate computed kernel + property. + - Just touching a variable written to by a non-idempotent instruction makes that instruction also not idempotent -> Idempotent renamed to boostable. diff --git a/loopy/__init__.py b/loopy/__init__.py index f0fe3f0fda31cbc07d0d3b295dc21d8c0d12f011..017999e7c64f52dc89ed6901ba0a6aee9a8eb140 100644 --- a/loopy/__init__.py +++ b/loopy/__init__.py @@ -127,15 +127,25 @@ def tag_dimensions(kernel, iname_to_tag): iname_to_tag = dict((iname, parse_tag(tag)) for iname, tag in iname_to_tag.iteritems()) + from loopy.kernel import ParallelTag + new_iname_to_tag = kernel.iname_to_tag.copy() for iname, new_tag in iname_to_tag.iteritems(): + old_tag = kernel.iname_to_tag.get(iname) + + if old_tag is not None and new_tag is None: + raise ValueError("cannot untag iname '%s'" % iname) + if new_tag is None: continue if iname not in kernel.all_inames(): raise ValueError("cannot tag '%s'--not known" % iname) - old_tag = kernel.iname_to_tag.get(iname) + if isinstance(new_tag, ParallelTag) and iname in kernel.sequential_inames: + raise ValueError("cannot tag '%s' as parallel--" + "iname requires sequential execution" % iname) + if old_tag is not None and (old_tag != new_tag): raise RuntimeError("'%s' is already tagged '%s'--cannot retag" % (iname, old_tag)) diff --git a/loopy/codegen/control.py b/loopy/codegen/control.py index f4e1d1c0a619ae6fdcd863bc30c719f43b1de57e..07bb62c3c7ce1967f7bfb3c00186ca9abd5b68e9 100644 --- a/loopy/codegen/control.py +++ b/loopy/codegen/control.py @@ -41,10 +41,10 @@ def generate_code_for_sched_index(kernel, sched_index, codegen_state): generate_unroll_loop, generate_sequential_loop_dim_code) - from loopy.kernel import UnrollTag, SequentialTag + from loopy.kernel import UnrollTag if isinstance(tag, UnrollTag): func = generate_unroll_loop - elif tag is None or isinstance(tag, SequentialTag): + elif tag is None: func = generate_sequential_loop_dim_code else: raise RuntimeError("encountered (invalid) EnterLoop for '%s', tagged '%s'" diff --git a/loopy/kernel.py b/loopy/kernel.py index 2eff255384969f11d7b37818e7f383da36c3c6c1..052a45c2f4c102f10ce74d8c1cf0b055e4584cee 100644 --- a/loopy/kernel.py +++ b/loopy/kernel.py @@ -23,10 +23,6 @@ class IndexTag(Record): -class SequentialTag(IndexTag): - def __str__(self): - return "seq" - class ParallelTag(IndexTag): pass @@ -254,17 +250,6 @@ class Instruction(Record): return index_vars | set(self.forced_iname_deps) - @memoize_method - def sequential_inames(self, iname_to_tag): - result = set() - - for iname in self.all_inames(): - tag = iname_to_tag.get(iname) - if isinstance(tag, SequentialTag): - result.add(iname) - - return result - def __str__(self): result = "%s: %s <- %s\n [%s]" % (self.id, self.assignee, self.expression, ", ".join(sorted(self.all_inames()))) @@ -444,7 +429,8 @@ class LoopKernel(Record): temporary_variables={}, workgroup_size=None, iname_to_dim=None, - iname_to_tag={}): + iname_to_tag={}, + ): """ :arg domain: a :class:`islpy.BasicSet`, or a string parseable to a basic set by the isl. Example: "{[i,j]: 0<=i < 10 and 0<= j < 9}" @@ -515,7 +501,7 @@ class LoopKernel(Record): dup_groups = dup_entry_match.groupdict() dup_iname = dup_groups["iname"] assert dup_iname - dup_tag = AutoFitLocalIndexTag() + dup_tag = None if dup_groups["tag"] is not None: dup_tag = parse_tag(dup_groups["tag"]) @@ -548,32 +534,6 @@ class LoopKernel(Record): if len(set(insn.id for insn in insns)) != len(insns): raise RuntimeError("instruction ids do not appear to be unique") - # {{{ find and properly tag reduction inames - - reduction_inames = set() - - from loopy.symbolic import ReductionCallbackMapper - - def map_reduction(expr, rec): - rec(expr.expr) - reduction_inames.update(expr.inames) - - for insn in insns: - ReductionCallbackMapper(map_reduction)(insn.expression) - - iname_to_tag = iname_to_tag.copy() - - if reduction_inames: - for iname in reduction_inames: - tag = iname_to_tag.get(iname) - if not (tag is None or isinstance(tag, SequentialTag)): - raise RuntimeError("inconsistency detected: " - "sequential/reduction iname '%s' was " - "tagged otherwise" % iname) - - iname_to_tag[iname] = SequentialTag() - - # }}} if assumptions is None: assumptions_space = domain.get_space().params() @@ -610,6 +570,28 @@ class LoopKernel(Record): if id_str not in used_ids: return id_str + @property + @memoize_method + def sequential_inames(self): + result = set() + + def map_reduction(red_expr, rec): + rec(red_expr.expr) + result.update(red_expr.inames) + + from loopy.symbolic import ReductionCallbackMapper + for insn in self.instructions: + ReductionCallbackMapper(map_reduction)(insn.expression) + + for iname in result: + tag = self.iname_to_tag.get(iname) + if tag is not None and isinstance(tag, ParallelTag): + raise RuntimeError("inconsistency detected: " + "sequential/reduction iname '%s' has " + "a parallel tag" % iname) + + return result + @property @memoize_method def iname_to_dim(self): @@ -850,9 +832,12 @@ def count_reduction_iname_uses(insn): return reduction_iname_uses +# }}} +# {{{ pass 2 of kernel creation + def make_kernel(*args, **kwargs): """Second pass of kernel creation. Think about requests for iname duplication and temporary variable declaration received as part of string instructions. @@ -902,6 +887,9 @@ def make_kernel(*args, **kwargs): child = subst_map(child) + for old_iname, new_iname in zip(reduction_expr.inames, new_red_inames): + new_iname_to_tag[new_iname] = insn_dup_iname_to_tag[old_iname] + from loopy.symbolic import Reduction return Reduction( operation=reduction_expr.operation, @@ -914,6 +902,9 @@ def make_kernel(*args, **kwargs): # {{{ iname duplication if insn.duplicate_inames_and_tags: + + insn_dup_iname_to_tag = dict(insn.duplicate_inames_and_tags) + # {{{ duplicate non-reduction inames reduction_iname_uses = count_reduction_iname_uses(insn) @@ -921,8 +912,6 @@ def make_kernel(*args, **kwargs): duplicate_inames = [iname for iname, tag in insn.duplicate_inames_and_tags if iname not in reduction_iname_uses] - new_iname_tags = [tag for iname, tag in insn.duplicate_inames_and_tags - if iname not in reduction_iname_uses] new_inames = [ knl.make_unique_var_name( @@ -931,8 +920,11 @@ def make_kernel(*args, **kwargs): newly_created_vars) for iname in duplicate_inames] - for iname, tag in zip(new_inames, new_iname_tags): - new_iname_to_tag[iname] = tag + for old_iname, new_iname in zip(duplicate_inames, new_inames): + new_tag = insn_dup_iname_to_tag[old_iname] + if new_tag is None: + new_tag = AutoFitLocalIndexTag() + new_iname_to_tag[new_iname] = new_tag newly_created_vars.update(new_inames) @@ -1025,6 +1017,8 @@ def make_kernel(*args, **kwargs): temporary_variables=new_temp_vars, iname_to_tag=new_iname_to_tag) +# }}} +