diff --git a/loopy/check.py b/loopy/check.py index 600f5670d7fea3bc201c231061f4442201aee5cb..c264123973fc579ce2d676c2cf99b73b62c3bddb 100644 --- a/loopy/check.py +++ b/loopy/check.py @@ -514,11 +514,11 @@ def _check_variable_access_ordered_inner(kernel): overlap_checker = AccessRangeOverlapChecker(kernel) aliasing_equiv_classes = find_aliasing_equivalence_classes(kernel) - logger.debug("%s: check_variable_access_ordered: start" % kernel.name) - - # dep_reqs_to_vars: A mapping from (writer_id, dep_req_id) between whom - # dependency must be established to the variables which prompted the - # dependency requirement. + # dep_reqs_to_vars: A mapping (writer_id, dep_req_id) -> set of variable names, + # where the tuple denotes a pair of instructions IDs, and the variable + # names are the ones that necessitate a dependency. + # + # Note: This can be worst-case O(n^2) in the number of instructions. dep_reqs_to_vars = {} wmap = kernel.writer_map() @@ -537,8 +537,9 @@ def _check_variable_access_ordered_inner(kernel): for writer in writers: required_deps = (readers | writers) - set([writer]) - required_deps = set([req_dep for req_dep in required_deps if not - declares_nosync_with(kernel, address_space, writer, + required_deps = set([req_dep + for req_dep in required_deps + if not declares_nosync_with(kernel, address_space, writer, req_dep)]) for req_dep in required_deps: @@ -559,8 +560,11 @@ def _check_variable_access_ordered_inner(kernel): depends_on[insn.id].update(insn.depends_on) for dep in insn.depends_on: rev_depends[dep].add(insn.id) + # }}} + # {{{ remove pairs from dep_reqs_to_vars for which dependencies exist + topological_order = _get_topological_order(kernel) def discard_dep_reqs_in_order(dep_reqs_to_vars, edges, order): @@ -572,28 +576,39 @@ def _check_variable_access_ordered_inner(kernel): :arg order: An instance of :class:`list` of instruction ids in which the *edges* graph is to be traversed. """ - # memoized_predecessors: mapping from insn_id to its direct/indirect + # predecessors: mapping from insn_id to its direct/indirect # predecessors - memoized_predecessors = {} + predecessors = {} - # reverse postorder traversal of dependency graph for insn_id in order: - # accumulated_predecessors:insn_id's direct+indirect predecessors - accumulated_predecessors = memoized_predecessors.pop(insn_id, set()) + # insn_predecessors:insn_id's direct+indirect predecessors - for pred in accumulated_predecessors: - dep_reqs_to_vars.pop((insn_id, pred), None) + # This set of predecessors is complete because we're + # traversing in topological order: No predecessor + # can occur after the instruction itself. + insn_predecessors = predecessors.pop(insn_id, set()) + + for pred in insn_predecessors: + dep_reqs_to_vars.pop( + (insn_id, pred), + # don't fail if pair doesn't exist + None) for successor in edges[insn_id]: - memoized_predecessors.setdefault(successor, set()).update( - accumulated_predecessors | set([insn_id])) + predecessors.setdefault(successor, set()).update( + insn_predecessors | set([insn_id])) # forward dep. graph traversal in reverse topological sort order + # (proceeds "end of program" -> "beginning of program") discard_dep_reqs_in_order(dep_reqs_to_vars, depends_on, topological_order[::-1]) + # reverse dep. graph traversal in topological sort order + # (proceeds "beginning of program" -> "end of program") discard_dep_reqs_in_order(dep_reqs_to_vars, rev_depends, topological_order) + # }}} + # {{{ handle dependency requirements that weren't satisfied for (writer_id, other_id), variables in six.iteritems(dep_reqs_to_vars): @@ -650,8 +665,6 @@ def _check_variable_access_ordered_inner(kernel): # }}} - logger.debug("%s: check_variable_access_ordered: done" % kernel.name) - def check_variable_access_ordered(kernel): """Checks that between each write to a variable and all other accesses to @@ -673,15 +686,17 @@ def check_variable_access_ordered(kernel): if kernel.options.enforce_variable_access_ordered == "no_check": return - if kernel.options.enforce_variable_access_ordered: - _check_variable_access_ordered_inner(kernel) - else: - from loopy.diagnostic import VariableAccessNotOrdered - try: + from pytools import ProcessLogger + with ProcessLogger(logger, "%s: check variable access ordered" % kernel.name): + if kernel.options.enforce_variable_access_ordered: _check_variable_access_ordered_inner(kernel) - except VariableAccessNotOrdered as e: - from loopy.diagnostic import warn_with_kernel - warn_with_kernel(kernel, "variable_access_ordered", str(e)) + else: + from loopy.diagnostic import VariableAccessNotOrdered + try: + _check_variable_access_ordered_inner(kernel) + except VariableAccessNotOrdered as e: + from loopy.diagnostic import warn_with_kernel + warn_with_kernel(kernel, "variable_access_ordered", str(e)) # }}} diff --git a/loopy/schedule/__init__.py b/loopy/schedule/__init__.py index d0efb98e8e91fc74997e6a57e10b5925b7dee27e..8a83ae84c5623544301b1656499606884b67d37f 100644 --- a/loopy/schedule/__init__.py +++ b/loopy/schedule/__init__.py @@ -561,7 +561,7 @@ class ScheduleDebugInput(Exception): # }}} -# {{{ scheduling algorithm +# {{{ scheduler state class SchedulerState(ImmutableRecord): """ @@ -659,6 +659,8 @@ class SchedulerState(ImmutableRecord): else: return None +# }}} + def get_insns_in_topologically_sorted_order(kernel): from pytools.graph import compute_topological_order @@ -672,6 +674,8 @@ def get_insns_in_topologically_sorted_order(kernel): return [kernel.id_to_insn[insn_id] for insn_id in ids] +# {{{ schedule_as_many_run_insns_as_possible + def schedule_as_many_run_insns_as_possible(sched_state): """ Returns an instance of :class:`loopy.schedule.SchedulerState`, by appending @@ -680,7 +684,7 @@ def schedule_as_many_run_insns_as_possible(sched_state): next_preschedule_item = ( sched_state.preschedule[0] - if len(sched_state.preschedule) > 0 + if sched_state.preschedule else None) if isinstance(next_preschedule_item, (CallKernel, ReturnFromKernel, Barrier)): @@ -706,7 +710,7 @@ def schedule_as_many_run_insns_as_possible(sched_state): for insn in toposorted_insns: if insn.id in sched_state.scheduled_insn_ids: continue - if not insn.within_inames >= have_inames: + if insn.within_inames < have_inames: ignored_unscheduled_insn_ids.add(insn.id) continue if isinstance(insn, MultiAssignmentBase): @@ -746,6 +750,10 @@ def schedule_as_many_run_insns_as_possible(sched_state): return updated_sched_state +# }}} + + +# {{{ scheduling algorithm def generate_loop_schedules_internal( sched_state, allow_boost=False, debug=None): @@ -768,7 +776,7 @@ def generate_loop_schedules_internal( next_preschedule_item = ( sched_state.preschedule[0] - if len(sched_state.preschedule) > 0 + if sched_state.preschedule else None) # {{{ decide about debug mode