diff --git a/loopy/kernel.py b/loopy/kernel.py index ededdc5308c0d576c1ece0e8f7462501e334932e..5add79c782e12ca14ca223903c5f759f2398e4c9 100644 --- a/loopy/kernel.py +++ b/loopy/kernel.py @@ -1304,10 +1304,7 @@ class LoopKernel(Record): they should be run. """ - return find_all_insn_inames( - self.instructions, self.all_inames(), - writer_map=self.writer_map(), - temporary_variables=self.temporary_variables) + return find_all_insn_inames(self) @memoize_method def all_referenced_inames(self): @@ -1653,26 +1650,27 @@ class LoopKernel(Record): # {{{ find_all_insn_inames fixed point iteration -def find_all_insn_inames(instructions, all_inames, - writer_map, temporary_variables): +def find_all_insn_inames(kernel): from loopy.symbolic import get_dependencies + writer_map = kernel.writer_map() + insn_id_to_inames = {} insn_assignee_inames = {} - for insn in instructions: + for insn in kernel.instructions: read_deps = get_dependencies(insn.expression) write_deps = get_dependencies(insn.assignee) deps = read_deps | write_deps iname_deps = ( - deps & all_inames + deps & kernel.all_inames() | insn.forced_iname_deps) insn_id_to_inames[insn.id] = iname_deps - insn_assignee_inames[insn.id] = write_deps & all_inames + insn_assignee_inames[insn.id] = write_deps & kernel.all_inames() - temp_var_names = set(temporary_variables.iterkeys()) + temp_var_names = set(kernel.temporary_variables.iterkeys()) # fixed point iteration until all iname dep sets have converged @@ -1689,7 +1687,9 @@ def find_all_insn_inames(instructions, all_inames, while True: did_something = False - for insn in instructions: + for insn in kernel.instructions: + + # {{{ depdency-based propagation # For all variables that insn depends on, find the intersection # of iname deps of all writers, and add those to insn's @@ -1717,6 +1717,29 @@ def find_all_insn_inames(instructions, all_inames, if inames_new != inames_old: did_something = True + # }}} + + # {{{ domain-based propagation + + # Add all inames occurring in parameters of domains that my current + # inames refer to. + + inames_old = insn_id_to_inames[insn.id] + inames_new = set(insn_id_to_inames[insn.id]) + + for iname in inames_old: + home_domain = kernel.domains[kernel.get_home_domain_index(iname)] + + for par in home_domain.get_var_names(dim_type.param): + if par in kernel.all_inames(): + inames_new.add(par) + + if inames_new != inames_old: + did_something = True + insn_id_to_inames[insn.id] = frozenset(inames_new) + + # }}} + if not did_something: break diff --git a/loopy/schedule.py b/loopy/schedule.py index 4286b1078d94ad8184f2849556b3963725c46b64..11979e694a641130774eac5801bc42986e2ba033 100644 --- a/loopy/schedule.py +++ b/loopy/schedule.py @@ -2,6 +2,7 @@ from __future__ import division from pytools import Record import sys +import islpy as isl @@ -185,6 +186,14 @@ def loop_nest_map(kernel): if iname_to_insns[inner_iname] < iname_to_insns[outer_iname]: result[inner_iname].add(outer_iname) + for dom_idx, dom in enumerate(kernel.domains): + for outer_iname in dom.get_var_names(isl.dim_type.param): + if outer_iname not in kernel.all_inames(): + continue + + for inner_iname in dom.get_var_names(isl.dim_type.set): + result[inner_iname].add(outer_iname) + return result # }}} @@ -828,7 +837,7 @@ def generate_loop_schedules(kernel, loop_priority=[], debug_args={}): print debug.debug_length = len(debug.longest_rejected_schedule) - for _ in generate_loop_schedules_internal(kernel, loop_priority, + for _ in generate_loop_schedules_internal(sched_state, loop_priority, debug=debug): pass