From e3fcfca6193e91f18ff2c5cb5004fba4a716a7b8 Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Sat, 24 Sep 2011 20:19:17 -0400 Subject: [PATCH] Automatic dependency finding. --- MEMO | 2 ++ loopy/__init__.py | 11 +++--- loopy/kernel.py | 15 ++++----- loopy/schedule.py | 85 ++++++++++++++++++++++++++++++++++++++++++++--- loopy/symbolic.py | 7 +++- 5 files changed, 101 insertions(+), 19 deletions(-) diff --git a/MEMO b/MEMO index 942ac822c..d3c511f4c 100644 --- a/MEMO +++ b/MEMO @@ -57,6 +57,8 @@ Things to consider - FIXME: Deal with insns losing a seq iname dep in a CSE realization +- Every loop in loopy is opened at most once. + Dealt with ^^^^^^^^^^ diff --git a/loopy/__init__.py b/loopy/__init__.py index 6c7136304..d8c688f0a 100644 --- a/loopy/__init__.py +++ b/loopy/__init__.py @@ -452,14 +452,13 @@ def realize_reduction(kernel, inames=None, reduction_tag=None): extra_used_ids=set(ni.id for ni in new_insns)), assignee=target_var, expression=expr.operation(target_var, sub_expr), - forced_insn_deps=[init_insn.id], - use_auto_dependencies=False, + insn_deps=[init_insn.id], forced_iname_deps=list(insn.all_inames()), iname_to_tag=insn.iname_to_tag) new_insns.append(reduction_insn) - new_insn_forced_insn_deps.append(reduction_insn.id) + new_insn_insn_deps.append(reduction_insn.id) new_insn_removed_inames.extend(expr.inames) return target_var @@ -468,7 +467,7 @@ def realize_reduction(kernel, inames=None, reduction_tag=None): cb_mapper = ReductionCallbackMapper(map_reduction) for insn in kernel.instructions: - new_insn_forced_insn_deps = [] + new_insn_insn_deps = [] new_insn_removed_inames = [] new_expression = cb_mapper(insn.expression) @@ -480,8 +479,8 @@ def realize_reduction(kernel, inames=None, reduction_tag=None): new_insns.append( insn.copy( expression=new_expression, - forced_insn_deps=insn.forced_insn_deps - + new_insn_forced_insn_deps, + insn_deps=insn.insn_deps + + new_insn_insn_deps, iname_to_tag=new_iname_to_tag, )) diff --git a/loopy/kernel.py b/loopy/kernel.py index c3a0ca0ec..a9140a618 100644 --- a/loopy/kernel.py +++ b/loopy/kernel.py @@ -209,20 +209,17 @@ class Instruction(Record): a :class:`LoopKernel`. :ivar assignee: :ivar expression: - :ivar use_auto_dependencies: :ivar forced_iname_deps: a list of inames that are added to the list of iname dependencies - :ivar forced_insn_deps: a list of ids of :class:`Instruction` instances that + :ivar insn_deps: a list of ids of :class:`Instruction` instances that *must* be executed before this one. Note that loop scheduling augments this - by adding dependencies on any writes to temporaries read by this instruction - *if* use_auto_dependencies is True. + by adding dependencies on any writes to temporaries read by this instruction. :ivar iname_to_tag: a map from loop domain variables to subclasses of :class:`IndexTag` """ def __init__(self, id, assignee, expression, - use_auto_dependencies=True, - forced_iname_deps=[], forced_insn_deps=[], + forced_iname_deps=[], insn_deps=[], iname_to_tag={}): # {{{ find and properly tag reduction inames @@ -253,9 +250,8 @@ class Instruction(Record): Record.__init__(self, id=id, assignee=assignee, expression=expression, - use_auto_dependencies=use_auto_dependencies, forced_iname_deps=forced_iname_deps, - forced_insn_deps=forced_insn_deps, + insn_deps=insn_deps, iname_to_tag=dict( (iname, parse_tag(tag)) for iname, tag in iname_to_tag.iteritems())) @@ -297,6 +293,9 @@ class Instruction(Record): result = "%s: %s <- %s\n [%s]" % (self.id, self.assignee, self.expression, ", ".join(loop_descrs)) + if self.insn_deps: + result += "\n : " + ", ".join(self.insn_deps) + return result # }}} diff --git a/loopy/schedule.py b/loopy/schedule.py index 7a3cc5239..1cc50e59a 100644 --- a/loopy/schedule.py +++ b/loopy/schedule.py @@ -37,6 +37,7 @@ def generate_loop_dep_graph(kernel): :return: a dict mapping an iname to the ones that need to be entered before it. """ + # FIXME likely not useful result = {} print "------------------------------------------------------" @@ -67,6 +68,82 @@ def generate_loop_dep_graph(kernel): +def find_writers(kernel): + """ + :return: a dict that maps variable names to ids of insns that + write to that variable. + """ + writer_insn_ids = {} + + admissible_write_vars = ( + set(arg.name for arg in kernel.args) + | set(tv.name for tv in kernel.temporary_variables)) + + from pymbolic.primitives import Variable, Subscript + for insn in kernel.instructions: + if isinstance(insn.assignee, Variable): + var_name = insn.assignee.name + elif isinstance(insn.assignee, Subscript): + var = insn.assignee.aggregate + assert isinstance(var, Variable) + var_name = var.name + else: + raise RuntimeError("invalid lvalue '%s'" % insn.assignee) + + if var_name not in admissible_write_vars: + raise RuntimeError("writing to '%s' is not allowed" % var_name) + + writer_insn_ids.setdefault(var_name, set()).add(insn.id) + + return writer_insn_ids + + + + +def add_automatic_dependencies(kernel): + writer_map = find_writers(kernel) + + arg_names = set(arg.name for arg in kernel.args) + + var_names = arg_names | set(tv.name for tv in kernel.temporary_variables) + + from loopy.symbolic import DependencyMapper + dep_map = DependencyMapper(composite_leaves=False) + new_insns = [] + for insn in kernel.instructions: + read_vars = ( + set(var.name for var in dep_map(insn.expression)) + & var_names) + + auto_deps = [] + for var in read_vars: + var_writers = writer_map.get(var, set()) + + if not var_writers and var not in var_names: + from warnings import warn + warn("'%s' is read, but never written." % var) + + if len(var_writers) > 1 and not var_writers & set(insn.insn_deps): + from warnings import warn + warn("'%s' is written from more than one place, " + "but instruction '%s' (which reads this variable) " + "does not specify a dependency on any of the writers." + % (var, insn.id)) + + if len(var_writers) == 1: + auto_deps.extend(var_writers) + + new_insns.append( + insn.copy( + insn_deps=insn.insn_deps + auto_deps)) + + return kernel.copy(instructions=new_insns) + + + + + + def generate_loop_schedules_internal(kernel, entered_loops=[]): scheduled_insn_ids = set(sched_item.id for sched_item in kernel.schedule if isinstance(sched_item, RunInstruction)) @@ -92,9 +169,6 @@ def generate_loop_schedules(kernel): # }}} - for i, insn_a in enumerate(kernel.instructions): - print i, insn_a - kernel = fix_grid_sizes(kernel) if 0: @@ -103,7 +177,10 @@ def generate_loop_schedules(kernel): print "%s: %s" % (k, ",".join(v)) 1/0 - kernel = find_automatic_dependencies(kernel) + kernel = add_automatic_dependencies(kernel) + + for insn_a in kernel.instructions: + print insn_a #grid_size, group_size = find_known_grid_and_group_sizes(kernel) diff --git a/loopy/symbolic.py b/loopy/symbolic.py index f44da7b03..e3e599e8b 100644 --- a/loopy/symbolic.py +++ b/loopy/symbolic.py @@ -13,6 +13,8 @@ from pymbolic.mapper.substitutor import \ SubstitutionMapper as SubstitutionMapperBase from pymbolic.mapper.stringifier import \ StringifyMapper as StringifyMapperBase +from pymbolic.mapper.dependency import \ + DependencyMapper as DependencyMapperBase import numpy as np import islpy as isl from islpy import dim_type @@ -77,6 +79,10 @@ class StringifyMapper(StringifyMapperBase): return "reduce(%s, [%s], %s, tag=%s)" % ( expr.operation, ", ".join(expr.inames), expr.expr, expr.tag) +class DependencyMapper(DependencyMapperBase): + def map_reduction(self, expr): + return set(expr.inames) | self.rec(expr.expr) + # }}} # {{{ functions to primitives @@ -478,7 +484,6 @@ class IndexVariableFinder(CombineMapper): return set() def map_subscript(self, expr): - from pymbolic.mapper.dependency import DependencyMapper idx_vars = DependencyMapper()(expr.index) from pymbolic.primitives import Variable -- GitLab