From 484534ae44e01123b0b5fcbf0fe452a777761042 Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner <inform@tiker.net> Date: Fri, 7 Aug 2015 15:47:47 -0500 Subject: [PATCH] Implement instruction groups (and IG exclusions) in scheduler, revamp scheduler to recompute less info --- doc/reference.rst | 13 ++- loopy/__init__.py | 4 +- loopy/kernel/__init__.py | 4 + loopy/kernel/creation.py | 14 +++ loopy/kernel/data.py | 49 +++++++-- loopy/schedule.py | 230 ++++++++++++++++++++++++++++----------- loopy/version.py | 2 +- 7 files changed, 243 insertions(+), 73 deletions(-) diff --git a/doc/reference.rst b/doc/reference.rst index eb10788cb..6a42ed944 100644 --- a/doc/reference.rst +++ b/doc/reference.rst @@ -20,7 +20,6 @@ Domain Tree Inames ^^^^^^ - Loops are (by default) entered exactly once. This is necessary to preserve dependency semantics--otherwise e.g. a fetch could happen inside one loop nest, and then the instruction using that fetch could be inside a wholly different @@ -191,6 +190,8 @@ Instructions .. autoclass:: UniqueName +.. autoclass:: InstructionBase + .. _assignments: Assignments @@ -299,8 +300,16 @@ These are usually key-value pairs. The following attributes are recognized: * ``tags=tag1:tag2`` Apply tags to this instruction that can then be used for :ref:`context-matching`. -.. autoclass:: ExpressionInstruction +* ``groups=group1:group2`` Make this instruction part of the given + instruction groups. See :class:`InstructionBase.groups`. +* ``conflicts_grp=group1:group2`` Make this instruction conflict with the + given instruction groups. See + :class:`InstructionBase.conflicts_with_groups`. + +Assignment instructions are expressed as instances of the following class: + +.. autoclass:: ExpressionInstruction .. _expression-syntax: diff --git a/loopy/__init__.py b/loopy/__init__.py index f6f611f65..a161e5478 100644 --- a/loopy/__init__.py +++ b/loopy/__init__.py @@ -43,7 +43,7 @@ from loopy.library.function import ( from loopy.kernel.data import ( auto, ValueArg, GlobalArg, ConstantArg, ImageArg, - ExpressionInstruction, CInstruction, + InstructionBase, ExpressionInstruction, CInstruction, TemporaryVariable) from loopy.kernel import LoopKernel @@ -81,7 +81,7 @@ __all__ = [ "ValueArg", "ScalarArg", "GlobalArg", "ArrayArg", "ConstantArg", "ImageArg", "TemporaryVariable", - "ExpressionInstruction", "CInstruction", + "InstructionBase", "ExpressionInstruction", "CInstruction", "default_function_mangler", "single_arg_function_mangler", diff --git a/loopy/kernel/__init__.py b/loopy/kernel/__init__.py index 485de9ac2..16695e0ad 100644 --- a/loopy/kernel/__init__.py +++ b/loopy/kernel/__init__.py @@ -1004,6 +1004,10 @@ class LoopKernel(RecordWithoutPickling): options.append("priority=%d" % insn.priority) if insn.tags: options.append("tags=%s" % ":".join(insn.tags)) + if insn.groups: + options.append("groups=%s" % ":".join(insn.groups)) + if insn.conflicts_with_groups: + options.append("conflicts=%s" % ":".join(insn.conflicts_with_groups)) if len(loop_list) > loop_list_width: lines.append("[%s]" % loop_list) diff --git a/loopy/kernel/creation.py b/loopy/kernel/creation.py index 7442609a2..e891f0626 100644 --- a/loopy/kernel/creation.py +++ b/loopy/kernel/creation.py @@ -191,6 +191,8 @@ def parse_insn(insn): if insn_match is not None: insn_deps = None insn_deps_is_final = False + insn_groups = None + conflicts_with_groups = None insn_id = None inames_to_dup = [] priority = 0 @@ -236,6 +238,16 @@ def parse_insn(insn): insn_deps = frozenset(dep.strip() for dep in opt_value.split(":") if dep.strip()) + elif opt_key == "groups": + insn_groups = frozenset( + grp.strip() for grp in opt_value.split(":") + if grp.strip()) + + elif opt_key == "conflicts": + conflicts_with_groups = frozenset( + grp.strip() for grp in opt_value.split(":") + if grp.strip()) + elif opt_key == "inames": if opt_value.startswith("+"): forced_iname_deps_is_final = False @@ -275,6 +287,8 @@ def parse_insn(insn): id=insn_id, insn_deps=insn_deps, insn_deps_is_final=insn_deps_is_final, + groups=insn_groups, + conflicts_with_groups=conflicts_with_groups, forced_iname_deps_is_final=forced_iname_deps_is_final, forced_iname_deps=forced_iname_deps, assignee=lhs, expression=rhs, diff --git a/loopy/kernel/data.py b/loopy/kernel/data.py index 248516905..b88929358 100644 --- a/loopy/kernel/data.py +++ b/loopy/kernel/data.py @@ -447,6 +447,19 @@ class InstructionBase(Record): Defaults to *False*. + .. attribute:: groups + + A :class:`frozenset` of strings indicating the names of 'instruction + groups' of which this instruction is a part. An instruction group is + considered 'active' as long as one (but not all) instructions of the + group have been executed. + + .. attribute:: conflicts_with_groups + + A :class:`frozenset` of strings indicating which instruction groups + (see :class:`InstructionBase.groups`) may not be active when this + instruction is scheduled. + .. attribute:: predicates a :class:`frozenset` of variable names the conjunction (logical and) of @@ -488,14 +501,23 @@ class InstructionBase(Record): of statements. """ - fields = set("id insn_deps insn_deps_is_final predicates " + fields = set("id insn_deps insn_deps_is_final " + "groups conflicts_with_groups " + "predicates " "forced_iname_deps_is_final forced_iname_deps " "priority boostable boostable_into".split()) def __init__(self, id, insn_deps, insn_deps_is_final, + groups, conflicts_with_groups, forced_iname_deps_is_final, forced_iname_deps, priority, boostable, boostable_into, predicates, tags): + if groups is None: + groups = frozenset() + + if conflicts_with_groups is None: + conflicts_with_groups = frozenset() + if forced_iname_deps_is_final is None: forced_iname_deps_is_final = False @@ -511,11 +533,14 @@ class InstructionBase(Record): assert isinstance(forced_iname_deps, frozenset) assert isinstance(insn_deps, frozenset) or insn_deps is None + assert isinstance(groups, frozenset) + assert isinstance(conflicts_with_groups, frozenset) Record.__init__(self, id=id, insn_deps=insn_deps, insn_deps_is_final=insn_deps_is_final, + groups=groups, conflicts_with_groups=conflicts_with_groups, forced_iname_deps_is_final=forced_iname_deps_is_final, forced_iname_deps=forced_iname_deps, priority=priority, @@ -586,6 +611,10 @@ class InstructionBase(Record): if self.insn_deps: result.append("deps="+":".join(self.insn_deps)) + if self.groups: + result.append("groups=%s" % ":".join(self.groups)) + if self.conflicts_with_groups: + result.append("conflicts=%s" % ":".join(self.conflicts_with_groups)) if self.priority: result.append("priority=%d" % self.priority) if self.tags: @@ -664,19 +693,23 @@ class ExpressionInstruction(InstructionBase): def __init__(self, assignee, expression, id=None, - forced_iname_deps_is_final=None, - forced_iname_deps=frozenset(), insn_deps=None, insn_deps_is_final=None, + groups=None, + conflicts_with_groups=None, + forced_iname_deps_is_final=None, + forced_iname_deps=frozenset(), boostable=None, boostable_into=None, tags=None, temp_var_type=None, priority=0, predicates=frozenset()): InstructionBase.__init__(self, id=id, - forced_iname_deps_is_final=forced_iname_deps_is_final, - forced_iname_deps=forced_iname_deps, insn_deps=insn_deps, insn_deps_is_final=insn_deps_is_final, + groups=groups, + conflicts_with_groups=conflicts_with_groups, + forced_iname_deps_is_final=forced_iname_deps_is_final, + forced_iname_deps=forced_iname_deps, boostable=boostable, boostable_into=boostable_into, priority=priority, @@ -808,6 +841,7 @@ class CInstruction(InstructionBase): iname_exprs, code, read_variables=frozenset(), assignees=frozenset(), id=None, insn_deps=None, insn_deps_is_final=None, + groups=None, conflicts_with_groups=None, forced_iname_deps_is_final=None, forced_iname_deps=frozenset(), priority=0, boostable=None, boostable_into=None, predicates=frozenset(), tags=None): @@ -823,10 +857,11 @@ class CInstruction(InstructionBase): InstructionBase.__init__(self, id=id, - forced_iname_deps_is_final=forced_iname_deps_is_final, - forced_iname_deps=forced_iname_deps, insn_deps=insn_deps, insn_deps_is_final=insn_deps_is_final, + groups=groups, conflicts_with_groups=conflicts_with_groups, + forced_iname_deps_is_final=forced_iname_deps_is_final, + forced_iname_deps=forced_iname_deps, boostable=boostable, boostable_into=boostable_into, priority=priority, predicates=predicates, tags=tags) diff --git a/loopy/schedule.py b/loopy/schedule.py index 298b6e513..b44569b97 100644 --- a/loopy/schedule.py +++ b/loopy/schedule.py @@ -211,6 +211,16 @@ def loop_nest_map(kernel): return result + +def group_insn_counts(kernel): + result = {} + + for insn in kernel.instructions: + for grp in insn.groups: + result[grp] = result.get(grp, 0) + 1 + + return result + # }}} @@ -296,46 +306,78 @@ class ScheduleDebugger: # {{{ scheduling algorithm class SchedulerState(Record): - pass + """ + .. attribute:: kernel + .. attribute:: loop_nest_map -def generate_loop_schedules_internal(sched_state, loop_priority, schedule=[], - allow_boost=False, allow_insn=False, debug=None): - # allow_insn is set to False initially and after entering each loop - # to give loops containing high-priority instructions a chance. + .. attribute:: loop_priority - kernel = sched_state.kernel - all_insn_ids = set(insn.id for insn in kernel.instructions) + See :func:`loop_nest_map`. - scheduled_insn_ids = set(sched_item.insn_id for sched_item in schedule - if isinstance(sched_item, RunInstruction)) + .. attribute:: breakable_inames - unscheduled_insn_ids = all_insn_ids - scheduled_insn_ids + .. attribute:: ilp_inames - if allow_boost is None: - rec_allow_boost = None - else: - rec_allow_boost = False + .. attribute:: vec_inames - # {{{ find active and entered loops + .. attribute:: parallel_inames - active_inames = [] - entered_inames = set() + *Note:* ``ilp`` and ``vec`` are not 'parallel' for the purposes of the + scheduler. See :attr:`ilp_inames`, :attr:`vec_inames`. + + .. attribute:: loop_priority + + + .. rubric:: Time-varying scheduler state + + .. attribute:: active_inames + + A tuple of active inames. + + .. attribute:: entered_inames + + A :class:`frozenset` of all inames ever entered. + + .. attribute:: schedule + + .. attribute:: scheduled_insn_ids + + .. attribute:: unscheduled_insn_ids + + .. attribute:: group_insn_counts + + A mapping from instruction group names to the number of instructions + contained in them. + + .. attribute:: active_group_counts + + A mapping from instruction group names to the number of instructions + in them that are left to schedule. If a group name occurs in this + mapping, that group is considered active. + """ + + @property + def last_entered_loop(self): + if self.active_inames: + return self.active_inames[-1] + else: + return None - for sched_item in schedule: - if isinstance(sched_item, EnterLoop): - active_inames.append(sched_item.iname) - entered_inames.add(sched_item.iname) - if isinstance(sched_item, LeaveLoop): - active_inames.pop() - if active_inames: - last_entered_loop = active_inames[-1] +def generate_loop_schedules_internal( + sched_state, allow_boost=False, allow_insn=False, debug=None): + # allow_insn is set to False initially and after entering each loop + # to give loops containing high-priority instructions a chance. + + kernel = sched_state.kernel + + if allow_boost is None: + rec_allow_boost = None else: - last_entered_loop = None - active_inames_set = set(active_inames) + rec_allow_boost = False - # }}} + active_inames_set = frozenset(sched_state.active_inames) # {{{ decide about debug mode @@ -343,7 +385,7 @@ def generate_loop_schedules_internal(sched_state, loop_priority, schedule=[], if debug is not None: if (debug.debug_length is not None - and len(schedule) >= debug.debug_length): + and len(sched_state.schedule) >= debug.debug_length): debug_mode = True if debug_mode: @@ -354,7 +396,9 @@ def generate_loop_schedules_internal(sched_state, loop_priority, schedule=[], print(kernel) print(75*"=") print("CURRENT SCHEDULE:") - print("%s (length: %d)" % (dump_schedule(schedule), len(schedule))) + print("%s (length: %d)" % ( + dump_schedule(sched_state.schedule), + len(sched_state.schedule))) print("(LEGEND: entry into loop: <iname>, exit from loop: </iname>, " "instructions w/ no delimiters)") #print("boost allowed:", allow_boost) @@ -376,19 +420,21 @@ def generate_loop_schedules_internal(sched_state, loop_priority, schedule=[], # the current loop nest, in this set: reachable_insn_ids = set() + active_groups = frozenset(sched_state.active_group_counts) - for insn_id in sorted(unscheduled_insn_ids, + for insn_id in sorted(sched_state.unscheduled_insn_ids, key=lambda insn_id: kernel.id_to_insn[insn_id].priority, reverse=True): insn = kernel.id_to_insn[insn_id] - is_ready = set(insn.insn_deps) <= scheduled_insn_ids + is_ready = insn.insn_deps <= sched_state.scheduled_insn_ids if not is_ready: if debug_mode: print("instruction '%s' is missing insn depedencies '%s'" % ( - insn.id, ",".join(set(insn.insn_deps) - scheduled_insn_ids))) + insn.id, ",".join( + insn.insn_deps - sched_state.scheduled_insn_ids))) continue want = kernel.insn_inames(insn) - sched_state.parallel_inames @@ -413,6 +459,18 @@ def generate_loop_schedules_internal(sched_state, loop_priority, schedule=[], print("instruction '%s' won't work under inames '%s'" % (insn.id, ",".join(have-want))) + # {{{ determine group-based readiness + + if insn.conflicts_with_groups & active_groups: + is_ready = False + + if debug_mode: + print("instruction '%s' conflicts with active group(s) '%s'" + % (insn.id, ",".join( + active_groups & insn.conflicts_with_groups))) + + # }}} + # {{{ determine reachability if (not is_ready and have <= want): @@ -423,15 +481,43 @@ def generate_loop_schedules_internal(sched_state, loop_priority, schedule=[], if is_ready and allow_insn: if debug_mode: print("scheduling '%s'" % insn.id) - scheduled_insn_ids.add(insn.id) - schedule = schedule + [RunInstruction(insn_id=insn.id)] + + iid_set = frozenset([insn.id]) + + # {{{ update active group counts for added instruction + + if insn.groups: + new_active_group_counts = sched_state.active_group_counts.copy() + + for grp in insn.groups: + if grp in new_active_group_counts: + new_active_group_counts[grp] -= 1 + if new_active_group_counts[grp] == 0: + del new_active_group_counts[grp] + + else: + new_active_group_counts[grp] = ( + sched_state.group_insn_counts[grp]) + + else: + new_active_group_counts = sched_state.active_group_counts + + # }}} + + new_sched_state = sched_state.copy( + scheduled_insn_ids=sched_state.scheduled_insn_ids | iid_set, + unscheduled_insn_ids=sched_state.unscheduled_insn_ids - iid_set, + schedule=( + sched_state.schedule + (RunInstruction(insn_id=insn.id),)), + active_group_counts=new_active_group_counts, + ) # Don't be eager about entering/leaving loops--if progress has been # made, revert to top of scheduler and see if more progress can be # made. for sub_sched in generate_loop_schedules_internal( - sched_state, loop_priority, schedule, + new_sched_state, allow_boost=rec_allow_boost, debug=debug, allow_insn=True): yield sub_sched @@ -442,6 +528,8 @@ def generate_loop_schedules_internal(sched_state, loop_priority, schedule=[], # {{{ see if we're ready to leave the innermost loop + last_entered_loop = sched_state.last_entered_loop + if last_entered_loop is not None: can_leave = True @@ -449,7 +537,7 @@ def generate_loop_schedules_internal(sched_state, loop_priority, schedule=[], # If the iname is not breakable, then check that we've # scheduled all the instructions that require it. - for insn_id in unscheduled_insn_ids: + for insn_id in sched_state.unscheduled_insn_ids: insn = kernel.id_to_insn[insn_id] if last_entered_loop in kernel.insn_inames(insn): if debug_mode: @@ -466,7 +554,7 @@ def generate_loop_schedules_internal(sched_state, loop_priority, schedule=[], seen_an_insn = False ignore_count = 0 - for sched_item in schedule[::-1]: + for sched_item in sched_state.schedule[::-1]: if isinstance(sched_item, RunInstruction): seen_an_insn = True elif isinstance(sched_item, LeaveLoop): @@ -481,10 +569,12 @@ def generate_loop_schedules_internal(sched_state, loop_priority, schedule=[], break if can_leave: - schedule = schedule + [LeaveLoop(iname=last_entered_loop)] - for sub_sched in generate_loop_schedules_internal( - sched_state, loop_priority, schedule, + sched_state.copy( + schedule=( + sched_state.schedule + + (LeaveLoop(iname=last_entered_loop),)), + active_inames=sched_state.active_inames[:-1]), allow_boost=rec_allow_boost, debug=debug, allow_insn=allow_insn): yield sub_sched @@ -497,7 +587,7 @@ def generate_loop_schedules_internal(sched_state, loop_priority, schedule=[], # Find inames that are being referenced by as yet unscheduled instructions. needed_inames = set() - for insn_id in unscheduled_insn_ids: + for insn_id in sched_state.unscheduled_insn_ids: needed_inames.update(kernel.insn_inames(insn_id)) needed_inames = (needed_inames @@ -510,8 +600,8 @@ def generate_loop_schedules_internal(sched_state, loop_priority, schedule=[], if debug_mode: print(75*"-") print("inames still needed :", ",".join(needed_inames)) - print("active inames :", ",".join(active_inames)) - print("inames entered so far :", ",".join(entered_inames)) + print("active inames :", ",".join(sched_state.active_inames)) + print("inames entered so far :", ",".join(sched_state.entered_inames)) print("reachable insns:", ",".join(reachable_insn_ids)) print(75*"-") @@ -549,7 +639,7 @@ def generate_loop_schedules_internal(sched_state, loop_priority, schedule=[], & set(kernel.temporary_variables)): writer_insn, = kernel.writer_map()[domain_par] - if writer_insn not in scheduled_insn_ids: + if writer_insn not in sched_state.scheduled_insn_ids: data_dep_written = False break @@ -588,14 +678,14 @@ def generate_loop_schedules_internal(sched_state, loop_priority, schedule=[], # Build priority tiers. If a schedule is found in the first tier, then # loops in the second are not even tried (and so on). - loop_priority_set = set(loop_priority) + loop_priority_set = set(sched_state.kernel.loop_priority) useful_loops_set = set(six.iterkeys(iname_to_usefulness)) useful_and_desired = useful_loops_set & loop_priority_set if useful_and_desired: priority_tiers = [ [iname] - for iname in loop_priority + for iname in sched_state.kernel.loop_priority if iname in useful_and_desired and iname not in sched_state.ilp_inames and iname not in sched_state.vec_inames @@ -638,10 +728,17 @@ def generate_loop_schedules_internal(sched_state, loop_priority, schedule=[], for iname in sorted(tier, key=lambda iname: iname_to_usefulness.get(iname, 0), reverse=True): - new_schedule = schedule + [EnterLoop(iname=iname)] for sub_sched in generate_loop_schedules_internal( - sched_state, loop_priority, new_schedule, + sched_state.copy( + schedule=( + sched_state.schedule + + (EnterLoop(iname=iname),)), + active_inames=( + sched_state.active_inames + (iname,)), + entered_inames=( + sched_state.entered_inames | frozenset((iname,))), + ), allow_boost=rec_allow_boost, debug=debug): found_viable_schedule = True @@ -656,17 +753,17 @@ def generate_loop_schedules_internal(sched_state, loop_priority, schedule=[], print(75*"=") six.moves.input("Hit Enter for next schedule:") - if not active_inames and not unscheduled_insn_ids: + if not sched_state.active_inames and not sched_state.unscheduled_insn_ids: # if done, yield result - debug.log_success(schedule) + debug.log_success(sched_state.schedule) - yield schedule + yield sched_state.schedule else: if not allow_insn: # try again with boosting allowed for sub_sched in generate_loop_schedules_internal( - sched_state, loop_priority, schedule=schedule, + sched_state, allow_boost=allow_boost, debug=debug, allow_insn=True): yield sub_sched @@ -674,14 +771,14 @@ def generate_loop_schedules_internal(sched_state, loop_priority, schedule=[], if not allow_boost and allow_boost is not None: # try again with boosting allowed for sub_sched in generate_loop_schedules_internal( - sched_state, loop_priority, schedule=schedule, + sched_state, allow_boost=True, debug=debug, allow_insn=allow_insn): yield sub_sched else: # dead end if debug is not None: - debug.log_dead_end(schedule) + debug.log_dead_end(sched_state.schedule) # }}} @@ -1036,8 +1133,6 @@ def generate_loop_schedules(kernel, debug_args={}): raise LoopyError("cannot schedule a kernel that has not been " "preprocessed") - loop_priority = kernel.loop_priority - from loopy.check import pre_schedule_checks pre_schedule_checks(kernel) @@ -1064,13 +1159,26 @@ def generate_loop_schedules(kernel, debug_args={}): breakable_inames=ilp_inames, ilp_inames=ilp_inames, vec_inames=vec_inames, + + # time-varying part + active_inames=(), + entered_inames=frozenset(), + + schedule=(), + + unscheduled_insn_ids=set(insn.id for insn in kernel.instructions), + scheduled_insn_ids=frozenset(), + # ilp and vec are not parallel for the purposes of the scheduler - parallel_inames=parallel_inames - ilp_inames - vec_inames) + parallel_inames=parallel_inames - ilp_inames - vec_inames, + + group_insn_counts=group_insn_counts(kernel), + active_group_counts={}) generators = [ - generate_loop_schedules_internal(sched_state, loop_priority, + generate_loop_schedules_internal(sched_state, debug=debug, allow_boost=None), - generate_loop_schedules_internal(sched_state, loop_priority, + generate_loop_schedules_internal(sched_state, debug=debug)] for gen in generators: for gen_sched in gen: @@ -1118,7 +1226,7 @@ def generate_loop_schedules(kernel, debug_args={}): print() debug.debug_length = len(debug.longest_rejected_schedule) - for _ in generate_loop_schedules_internal(sched_state, loop_priority, + for _ in generate_loop_schedules_internal(sched_state, debug=debug): pass diff --git a/loopy/version.py b/loopy/version.py index 31f470bae..9f1378f16 100644 --- a/loopy/version.py +++ b/loopy/version.py @@ -32,4 +32,4 @@ except ImportError: else: _islpy_version = islpy.version.VERSION_TEXT -DATA_MODEL_VERSION = "v9-islpy%s" % _islpy_version +DATA_MODEL_VERSION = "v10-islpy%s" % _islpy_version -- GitLab