diff --git a/MEMO b/MEMO index fc33a978c1d7dcad9ca3282d302fe87cb3822161..eb389e7a9c6f31fe35f9fb3564969215e439208d 100644 --- a/MEMO +++ b/MEMO @@ -87,6 +87,8 @@ Future ideas Dealt with ^^^^^^^^^^ +- Scheduler tries too many boostability-related options + - Automatically generate testing code vs. sequential. - If isl can prove that all operands are positive, may use '/' instead of @@ -209,4 +211,3 @@ g: is tagged as group idx Raise error if dl is targeting a private variable, regardless of whether it's a dependency or not. - diff --git a/loopy/kernel.py b/loopy/kernel.py index 09b5c9b9cb7d7334d4eda8109143f0484d866c8d..b301ac9856e4a42928d553f88bab2d31906e98f1 100644 --- a/loopy/kernel.py +++ b/loopy/kernel.py @@ -738,6 +738,16 @@ class LoopKernel(Record): else: return self.all_insn_inames()[insn.id] + @memoize_method + def iname_to_insns(self): + result = dict( + (iname, set()) for iname in self.all_inames()) + for insn in self.instructions: + for iname in self.insn_inames(insn): + result[iname].add(insn.id) + + return result + @property @memoize_method def sequential_inames(self): @@ -990,6 +1000,22 @@ class LoopKernel(Record): return sum(lv.nbytes for lv in self.temporary_variables.itervalues() if lv.is_local) + @memoize_method + def loop_nest_map(self): + """Returns a dictionary mapping inames to other inames that are + always nested around them. + """ + result = {} + iname_to_insns = self.iname_to_insns() + + for inner_iname in self.all_inames(): + result[inner_iname] = set() + for outer_iname in self.all_inames(): + if iname_to_insns[inner_iname] < iname_to_insns[outer_iname]: + result[inner_iname].add(outer_iname) + + return result + def __str__(self): lines = [] diff --git a/loopy/schedule.py b/loopy/schedule.py index 5523583dfacf48f4cae835ed4c54fb5f26c80138..929792fdab4d404be7a78d9a52155a9fd2ec7379 100644 --- a/loopy/schedule.py +++ b/loopy/schedule.py @@ -266,12 +266,12 @@ def generate_loop_schedules_internal(kernel, loop_priority, schedule=[], allow_b and len(schedule) >= debug.debug_length): debug_mode = True - #print dump_schedule(schedule), len(schedule) if debug_mode: print kernel print "--------------------------------------------" print "CURRENT SCHEDULE:" - print dump_schedule(schedule) + print dump_schedule(schedule), len(schedule) + print "boost allowed:", allow_boost print "--------------------------------------------" #if len(schedule) == 2: @@ -289,6 +289,8 @@ def generate_loop_schedules_internal(kernel, loop_priority, schedule=[], allow_b reachable_insn_ids = set() for insn_id in all_insn_ids - scheduled_insn_ids: + if debug_mode: + print insn_id insn = kernel.id_to_insn[insn_id] schedule_now = set(insn.insn_deps) <= scheduled_insn_ids @@ -299,52 +301,28 @@ def generate_loop_schedules_internal(kernel, loop_priority, schedule=[], allow_b insn.id, ",".join(set(insn.insn_deps) - scheduled_insn_ids)) continue + want = kernel.insn_inames(insn) - parallel_inames + have = active_inames_set - parallel_inames + # If insn is boostable, it may be placed inside a more deeply # nested loop without harm. - # But if it can be scheduled on the way *out* of the currently - # active loops, now is not the right moment. - - schedulable_at_loop_levels = [] - if allow_boost: - try_loop_counts = xrange(len(active_inames), -1, -1) - else: - try_loop_counts = [len(active_inames)] - - for active_loop_count in try_loop_counts: - outer_active_inames = set(active_inames[:active_loop_count]) - - want = kernel.insn_inames(insn) - parallel_inames - have = outer_active_inames - parallel_inames - insn.boostable_into - - if allow_boost: - have -= insn.boostable_into + have = have - insn.boostable_into - if want == have: - schedulable_at_loop_levels.append(active_loop_count) - - if schedulable_at_loop_levels != [len(active_inames)]: + if want != have: schedule_now = False + if debug_mode: - if schedulable_at_loop_levels: - print ("instruction '%s' will be scheduled when more " - "loops have been exited" % insn.id) - else: - want = (kernel.insn_inames(insn) - parallel_inames) - have = (active_inames_set - parallel_inames) - if want-have: - print ("instruction '%s' is missing inames '%s'" - % (insn.id, ",".join(want-have))) - if have-want: - print ("instruction '%s' won't work under inames '%s'" - % (insn.id, ",".join(have-want))) + if want-have: + print ("instruction '%s' is missing inames '%s'" + % (insn.id, ",".join(want-have))) + if have-want: + print ("instruction '%s' won't work under inames '%s'" + % (insn.id, ",".join(have-want))) # {{{ determine reachability - want = kernel.insn_inames(insn) - parallel_inames - have = active_inames_set - parallel_inames - if (not schedule_now and have <= want): reachable_insn_ids.add(insn_id) else: @@ -355,6 +333,8 @@ def generate_loop_schedules_internal(kernel, loop_priority, schedule=[], allow_b # }}} if schedule_now: + if debug_mode: + print "scheduling '%s'" % insn.id scheduled_insn_ids.add(insn.id) schedule = schedule + [RunInstruction(insn_id=insn.id)] made_progress = True @@ -408,6 +388,9 @@ def generate_loop_schedules_internal(kernel, loop_priority, schedule=[], allow_b useful_loops = [] for iname in available_loops: + if not kernel.loop_nest_map()[iname] <= active_inames_set | parallel_inames: + continue + # {{{ determine if that gets us closer to being able to schedule an insn useful = False @@ -415,8 +398,10 @@ def generate_loop_schedules_internal(kernel, loop_priority, schedule=[], allow_b hypothetically_active_loops = active_inames_set | set([iname]) for insn_id in reachable_insn_ids: insn = kernel.id_to_insn[insn_id] - if (hypothetically_active_loops - <= (kernel.insn_inames(insn) | insn.boostable_into)): + + want = kernel.insn_inames(insn) | insn.boostable_into + + if hypothetically_active_loops <= want: useful = True break