Skip to content
Snippets Groups Projects
Commit 73b991f1 authored by Andreas Klöckner's avatar Andreas Klöckner
Browse files

Try to schedule entirely without boosting first before allowing it.

parent 5d0d02c8
No related branches found
No related tags found
No related merge requests found
......@@ -39,6 +39,8 @@ Things to consider
To-do
^^^^^
- CSE should be more like variable assignment
- dim_max caching
- Fix all tests
......@@ -54,6 +56,7 @@ Future ideas
- Float4 joining on fetch/store?
- How can one automatically generate something like microblocks?
-> Some sort of axis-adding transform?
- Better for loop bound generation
-> Try a triangular loop
......@@ -85,6 +88,9 @@ Future ideas
Dealt with
^^^^^^^^^^
- Exhaust the search for a no-boost solution first, before looking
for a schedule with boosts.
- Pick not just axis 0, but all axes by lowest available stride
- Scheduler tries too many boostability-related options
......
......@@ -232,6 +232,11 @@ def generate_loop_schedules_internal(kernel, loop_priority, schedule=[], allow_b
scheduled_insn_ids = set(sched_item.insn_id for sched_item in schedule
if isinstance(sched_item, RunInstruction))
if allow_boost is None:
rec_allow_boost = None
else:
rec_allow_boost = False
# {{{ find active and entered loops
active_inames = []
......@@ -441,8 +446,10 @@ def generate_loop_schedules_internal(kernel, loop_priority, schedule=[], allow_b
for iname in tier:
new_schedule = schedule + [EnterLoop(iname=iname)]
for sub_sched in generate_loop_schedules_internal(
kernel, loop_priority, new_schedule,
allow_boost=rec_allow_boost,
debug=debug):
found_viable_schedule = True
yield sub_sched
......@@ -465,10 +472,10 @@ def generate_loop_schedules_internal(kernel, loop_priority, schedule=[], allow_b
# if not done, but made some progress--try from the top
for sub_sched in generate_loop_schedules_internal(
kernel, loop_priority, schedule,
debug=debug):
allow_boost=rec_allow_boost, debug=debug):
yield sub_sched
else:
if not allow_boost:
if not allow_boost and allow_boost is not None:
# try again with boosting allowed
for sub_sched in generate_loop_schedules_internal(
kernel, loop_priority, schedule=schedule,
......@@ -619,22 +626,31 @@ def generate_loop_schedules(kernel, loop_priority=[], debug=None):
debug = SchedulerDebugger(debug)
for gen_sched in generate_loop_schedules_internal(kernel, loop_priority,
debug=debug):
gen_sched, owed_barriers = insert_barriers(kernel, gen_sched)
if owed_barriers:
from warnings import warn
from loopy import LoopyAdvisory
warn("Barrier insertion finished without inserting barriers for "
"local memory writes in these instructions: '%s'. "
"This often means that local memory was "
"written, but never read." % ",".join(owed_barriers), LoopyAdvisory)
debug.stop()
yield kernel.copy(schedule=gen_sched)
debug.start()
schedule_count += 1
generators = [
generate_loop_schedules_internal(kernel, loop_priority,
debug=debug, allow_boost=None),
generate_loop_schedules_internal(kernel, loop_priority,
debug=debug)]
for gen in generators:
for gen_sched in gen:
gen_sched, owed_barriers = insert_barriers(kernel, gen_sched)
if owed_barriers:
from warnings import warn
from loopy import LoopyAdvisory
warn("Barrier insertion finished without inserting barriers for "
"local memory writes in these instructions: '%s'. "
"This often means that local memory was "
"written, but never read." % ",".join(owed_barriers), LoopyAdvisory)
debug.stop()
yield kernel.copy(schedule=gen_sched)
debug.start()
schedule_count += 1
# if no-boost mode yielded a viable schedule, stop now
if schedule_count:
break
debug.done_scheduling()
......
......@@ -368,8 +368,7 @@ def test_rank_one(ctx_factory):
seq_knl = knl
#for variant in [variant_1, variant_2, variant_4]:
for variant in [variant_4]:
for variant in [variant_1, variant_2, variant_4]:
kernel_gen = lp.generate_loop_schedules(variant(knl))
kernel_gen = lp.check_kernels(kernel_gen, dict(n=n))
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment