From 73b991f1282dc2987efb5c61741ce7827e7254ae Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner <inform@tiker.net> Date: Wed, 9 Nov 2011 09:54:20 -0500 Subject: [PATCH] Try to schedule entirely without boosting first before allowing it. --- MEMO | 6 ++++++ loopy/schedule.py | 52 +++++++++++++++++++++++++++++---------------- test/test_linalg.py | 3 +-- 3 files changed, 41 insertions(+), 20 deletions(-) diff --git a/MEMO b/MEMO index f09d76315..3111326d3 100644 --- a/MEMO +++ b/MEMO @@ -39,6 +39,8 @@ Things to consider To-do ^^^^^ +- CSE should be more like variable assignment + - dim_max caching - Fix all tests @@ -54,6 +56,7 @@ Future ideas - Float4 joining on fetch/store? - How can one automatically generate something like microblocks? + -> Some sort of axis-adding transform? - Better for loop bound generation -> Try a triangular loop @@ -85,6 +88,9 @@ Future ideas Dealt with ^^^^^^^^^^ +- Exhaust the search for a no-boost solution first, before looking + for a schedule with boosts. + - Pick not just axis 0, but all axes by lowest available stride - Scheduler tries too many boostability-related options diff --git a/loopy/schedule.py b/loopy/schedule.py index 67ffbb645..0b1fb08fb 100644 --- a/loopy/schedule.py +++ b/loopy/schedule.py @@ -232,6 +232,11 @@ def generate_loop_schedules_internal(kernel, loop_priority, schedule=[], allow_b scheduled_insn_ids = set(sched_item.insn_id for sched_item in schedule if isinstance(sched_item, RunInstruction)) + if allow_boost is None: + rec_allow_boost = None + else: + rec_allow_boost = False + # {{{ find active and entered loops active_inames = [] @@ -441,8 +446,10 @@ def generate_loop_schedules_internal(kernel, loop_priority, schedule=[], allow_b for iname in tier: new_schedule = schedule + [EnterLoop(iname=iname)] + for sub_sched in generate_loop_schedules_internal( kernel, loop_priority, new_schedule, + allow_boost=rec_allow_boost, debug=debug): found_viable_schedule = True yield sub_sched @@ -465,10 +472,10 @@ def generate_loop_schedules_internal(kernel, loop_priority, schedule=[], allow_b # if not done, but made some progress--try from the top for sub_sched in generate_loop_schedules_internal( kernel, loop_priority, schedule, - debug=debug): + allow_boost=rec_allow_boost, debug=debug): yield sub_sched else: - if not allow_boost: + if not allow_boost and allow_boost is not None: # try again with boosting allowed for sub_sched in generate_loop_schedules_internal( kernel, loop_priority, schedule=schedule, @@ -619,22 +626,31 @@ def generate_loop_schedules(kernel, loop_priority=[], debug=None): debug = SchedulerDebugger(debug) - for gen_sched in generate_loop_schedules_internal(kernel, loop_priority, - debug=debug): - gen_sched, owed_barriers = insert_barriers(kernel, gen_sched) - if owed_barriers: - from warnings import warn - from loopy import LoopyAdvisory - warn("Barrier insertion finished without inserting barriers for " - "local memory writes in these instructions: '%s'. " - "This often means that local memory was " - "written, but never read." % ",".join(owed_barriers), LoopyAdvisory) - - debug.stop() - yield kernel.copy(schedule=gen_sched) - debug.start() - - schedule_count += 1 + generators = [ + generate_loop_schedules_internal(kernel, loop_priority, + debug=debug, allow_boost=None), + generate_loop_schedules_internal(kernel, loop_priority, + debug=debug)] + for gen in generators: + for gen_sched in gen: + gen_sched, owed_barriers = insert_barriers(kernel, gen_sched) + if owed_barriers: + from warnings import warn + from loopy import LoopyAdvisory + warn("Barrier insertion finished without inserting barriers for " + "local memory writes in these instructions: '%s'. " + "This often means that local memory was " + "written, but never read." % ",".join(owed_barriers), LoopyAdvisory) + + debug.stop() + yield kernel.copy(schedule=gen_sched) + debug.start() + + schedule_count += 1 + + # if no-boost mode yielded a viable schedule, stop now + if schedule_count: + break debug.done_scheduling() diff --git a/test/test_linalg.py b/test/test_linalg.py index 07fcde54b..f5d9c2f00 100644 --- a/test/test_linalg.py +++ b/test/test_linalg.py @@ -368,8 +368,7 @@ def test_rank_one(ctx_factory): seq_knl = knl - #for variant in [variant_1, variant_2, variant_4]: - for variant in [variant_4]: + for variant in [variant_1, variant_2, variant_4]: kernel_gen = lp.generate_loop_schedules(variant(knl)) kernel_gen = lp.check_kernels(kernel_gen, dict(n=n)) -- GitLab