diff --git a/loopy/schedule/__init__.py b/loopy/schedule/__init__.py index 850f0a61fcdc2878d43895bc0e024032532aa680..448dd6b64631f9ed1d491de1a7142529f30dc045 100644 --- a/loopy/schedule/__init__.py +++ b/loopy/schedule/__init__.py @@ -657,7 +657,6 @@ def generate_loop_schedules_internal( sched_state, allow_boost=False, debug=None): # allow_insn is set to False initially and after entering each loop # to give loops containing high-priority instructions a chance. - kernel = sched_state.kernel Fore = kernel.options._fore # noqa Style = kernel.options._style # noqa @@ -776,10 +775,13 @@ def generate_loop_schedules_internal( # schedule generation order. return (insn.priority, len(active_groups & insn.groups), insn.id) - insn_ids_to_try = sorted( - # Non-prescheduled instructions go first. - sched_state.unscheduled_insn_ids - sched_state.prescheduled_insn_ids, - key=insn_sort_key, reverse=True) + if sched_state.insn_ids_to_try is None: + insn_ids_to_try = sorted( + # Non-prescheduled instructions go first. + sched_state.unscheduled_insn_ids - sched_state.prescheduled_insn_ids, + key=insn_sort_key, reverse=True) + else: + insn_ids_to_try = sched_state.insn_ids_to_try insn_ids_to_try.extend( insn_id @@ -910,9 +912,13 @@ def generate_loop_schedules_internal( new_uses_of_boostability.append( (insn.id, orig_have & insn.boostable_into)) + new_insn_ids_to_try = list(insn_ids_to_try) + new_insn_ids_to_try.remove(insn.id) + new_sched_state = sched_state.copy( scheduled_insn_ids=sched_state.scheduled_insn_ids | iid_set, unscheduled_insn_ids=sched_state.unscheduled_insn_ids - iid_set, + insn_ids_to_try=new_insn_ids_to_try, schedule=( sched_state.schedule + (RunInstruction(insn_id=insn.id),)), preschedule=( @@ -928,7 +934,6 @@ def generate_loop_schedules_internal( # Don't be eager about entering/leaving loops--if progress has been # made, revert to top of scheduler and see if more progress can be # made. - for sub_sched in generate_loop_schedules_internal( new_sched_state, allow_boost=rec_allow_boost, debug=debug): @@ -1847,6 +1852,7 @@ def generate_loop_schedules_inner(kernel, debug_args={}): may_schedule_global_barriers=True, preschedule=preschedule, + insn_ids_to_try=None, # ilp and vec are not parallel for the purposes of the scheduler parallel_inames=parallel_inames - ilp_inames - vec_inames,