diff --git a/loopy/schedule.py b/loopy/schedule.py index 471c2c29db9a05316b27d484d7c0bbb03538c004..08af4b6b26a5f7c367722956e1cc9ee31d854050 100644 --- a/loopy/schedule.py +++ b/loopy/schedule.py @@ -249,6 +249,109 @@ def check_for_double_use_of_hw_axes(kernel): insn_tag_keys.add(key) + + + +def check_for_inactive_iname_access(kernel): + from loopy.symbolic import DependencyMapper + depmap = DependencyMapper() + + for insn in kernel.instructions: + expression_indices = depmap(insn.assignee.index) + expression_inames = expression_indices & kernel.all_inames() + + if not expression_inames <= insn.all_inames(): + raise RuntimeError( + "instructiosn '%s' references " + "inames that the instruction does not depend on" + % insn.id) + + + + +def check_for_write_races(kernel): + from pymbolic.primitives import Subscript, Variable + from loopy.symbolic import DependencyMapper + from loopy.kernel import ParallelTag, GroupIndexTag, IlpTag + depmap = DependencyMapper() + + for insn in kernel.instructions: + if isinstance(insn.assignee, Subscript): + assert isinstance(insn.assignee, Subscript) + + var = insn.assignee.aggregate + assert isinstance(var, Variable) + assignee_name = var.name + assignee_indices = depmap(insn.assignee.index) + elif isinstance(insn.assignee, Variable): + assignee_name = insn.assignee.name + assignee_indices = set() + else: + raise RuntimeError("assignee for instruction '%s' not understood" + % insn.id) + + def strip_var(expr): + assert isinstance(expr, Variable) + return expr.name + + assignee_indices = set(strip_var(index) for index in assignee_indices) + + assignee_inames = assignee_indices & kernel.all_inames() + if not assignee_inames <= insn.all_inames(): + raise RuntimeError( + "assignee of instructiosn '%s' references " + "iname that the instruction does not depend on" + % insn.id) + + inames_without_write_dep = None + + if assignee_name in kernel.arg_dict: + # Any parallel tags that are not depended upon by the assignee + # will cause write races. + + parallel_insn_inames = set( + iname + for iname in insn.all_inames() + if isinstance(kernel.iname_to_tag.get(iname), ParallelTag)) + + inames_without_write_dep = parallel_insn_inames - ( + assignee_inames & parallel_insn_inames) + + elif assignee_name in kernel.temporary_variables: + temp_var = kernel.temporary_variables[assignee_name] + if temp_var.is_local: + local_parallel_insn_inames = set( + iname + for iname in insn.all_inames() + if isinstance(kernel.iname_to_tag.get(iname), ParallelTag) + and not isinstance(kernel.iname_to_tag.get(iname), GroupIndexTag)) + + inames_without_write_dep = local_parallel_insn_inames - ( + assignee_inames & local_parallel_insn_inames) + + else: + ilp_inames = set( + iname + for iname in insn.all_inames() + if isinstance(kernel.iname_to_tag.get(iname), IlpTag)) + + inames_without_write_dep = ilp_inames - ( + assignee_inames & ilp_inames) + + + else: + raise RuntimeError("invalid assignee name in instruction '%s'" + % insn.id) + + assert inames_without_write_dep is not None + + if inames_without_write_dep: + raise RuntimeError( + "instruction '%s' contains a write race: " + "instruction will be run across parallel iname(s) '%s', which " + "is/are not referenced in the assignee index" + % (insn.id, ",".join(inames_without_write_dep))) + # }}} # {{{ temp storage adjust for bank conflict @@ -891,6 +994,8 @@ def generate_loop_schedules(kernel, loop_priority=[]): check_for_double_use_of_hw_axes(kernel) check_for_unused_hw_axes(kernel) + check_for_inactive_iname_access(kernel) + check_for_write_races(kernel) schedule_count = 0