diff --git a/loopy/schedule/__init__.py b/loopy/schedule/__init__.py index 850f0a61fcdc2878d43895bc0e024032532aa680..771ba737816e39d496d395dde98b5791cb57369e 100644 --- a/loopy/schedule/__init__.py +++ b/loopy/schedule/__init__.py @@ -657,7 +657,6 @@ def generate_loop_schedules_internal( sched_state, allow_boost=False, debug=None): # allow_insn is set to False initially and after entering each loop # to give loops containing high-priority instructions a chance. - kernel = sched_state.kernel Fore = kernel.options._fore # noqa Style = kernel.options._style # noqa @@ -776,10 +775,14 @@ def generate_loop_schedules_internal( # schedule generation order. return (insn.priority, len(active_groups & insn.groups), insn.id) - insn_ids_to_try = sorted( - # Non-prescheduled instructions go first. - sched_state.unscheduled_insn_ids - sched_state.prescheduled_insn_ids, - key=insn_sort_key, reverse=True) + # Use previous instruction sorting result if it is available + if sched_state.insn_ids_to_try is None: + insn_ids_to_try = sorted( + # Non-prescheduled instructions go first. + sched_state.unscheduled_insn_ids - sched_state.prescheduled_insn_ids, + key=insn_sort_key, reverse=True) + else: + insn_ids_to_try = sched_state.insn_ids_to_try insn_ids_to_try.extend( insn_id @@ -898,12 +901,23 @@ def generate_loop_schedules_internal( else: new_active_group_counts[grp] = ( sched_state.group_insn_counts[grp] - 1) - else: new_active_group_counts = sched_state.active_group_counts # }}} + # {{{ update instruction_ids_to_try + + new_insn_ids_to_try = list(insn_ids_to_try) + new_insn_ids_to_try.remove(insn.id) + + # invalidate instruction_ids_to_try when active group changes + if set(new_active_group_counts.keys()) != set( + sched_state.active_group_counts.keys()): + new_insn_ids_to_try = None + + # }}} + new_uses_of_boostability = [] if allow_boost: if orig_have & insn.boostable_into: @@ -913,6 +927,7 @@ def generate_loop_schedules_internal( new_sched_state = sched_state.copy( scheduled_insn_ids=sched_state.scheduled_insn_ids | iid_set, unscheduled_insn_ids=sched_state.unscheduled_insn_ids - iid_set, + insn_ids_to_try=new_insn_ids_to_try, schedule=( sched_state.schedule + (RunInstruction(insn_id=insn.id),)), preschedule=( @@ -928,7 +943,6 @@ def generate_loop_schedules_internal( # Don't be eager about entering/leaving loops--if progress has been # made, revert to top of scheduler and see if more progress can be # made. - for sub_sched in generate_loop_schedules_internal( new_sched_state, allow_boost=rec_allow_boost, debug=debug): @@ -1847,6 +1861,7 @@ def generate_loop_schedules_inner(kernel, debug_args={}): may_schedule_global_barriers=True, preschedule=preschedule, + insn_ids_to_try=None, # ilp and vec are not parallel for the purposes of the scheduler parallel_inames=parallel_inames - ilp_inames - vec_inames,