diff --git a/MEMO b/MEMO index ebd410f4f9f333041fafb805a743cc18b6a87b56..ec9b4ee1766846b4f8be94303d97b18a6f344c22 100644 --- a/MEMO +++ b/MEMO @@ -36,17 +36,14 @@ Things to consider - We won't generate WAW barrier-needing dependencies from one instruction to itself. +- Loopy is semi-interactive. + To-do ^^^^^ - What if no universally valid precompute base index expression is found? (test_intel_matrix_mul with n = 6*16, e.g.?) -- "No schedule found" debug help: - - - Find longest dead-end - - Automatically report on what hinders progress there - - When duplicating, use iname aliases to relieve burden on isl - Differentiate ilp.unr from ilp.seq @@ -98,6 +95,11 @@ Future ideas Dealt with ^^^^^^^^^^ +- "No schedule found" debug help: + + - Find longest dead-end + - Automatically report on what hinders progress there + - CSE should be more like variable assignment - Deal with equality constraints. diff --git a/loopy/compiled.py b/loopy/compiled.py index c449d1850626f4745048f0fe4efc8e79e9a8e0f6..07bfab181954cdda0042ea5578fd216c1a530525 100644 --- a/loopy/compiled.py +++ b/loopy/compiled.py @@ -321,11 +321,11 @@ def auto_test_vs_ref(ref_knl, ctx, kernel_gen, op_count, op_label, parameters, ref_compiled = CompiledKernel(ref_ctx, ref_sched_kernel, with_annotation=with_annotation) if print_ref_code: - print "----------------------------------------------------------" + print 75*"-" print "Reference Code:" - print "----------------------------------------------------------" + print 75*"-" print_highlighted_code(ref_compiled.code) - print "----------------------------------------------------------" + print 75*"-" ref_args, ref_input_arrays, ref_output_arrays = \ make_ref_args(ref_sched_kernel, ref_queue, parameters, @@ -366,16 +366,16 @@ def auto_test_vs_ref(ref_knl, ctx, kernel_gen, op_count, op_label, parameters, compiled = CompiledKernel(ctx, kernel, edit_code=edit_code, with_annotation=with_annotation) - print "----------------------------------------------------------" + print 75*"-" print "Kernel #%d:" % i - print "----------------------------------------------------------" + print 75*"-" if print_code: print_highlighted_code(compiled.code) - print "----------------------------------------------------------" + print 75*"-" if dump_binary: print type(compiled.cl_program) print compiled.cl_program.binaries[0] - print "----------------------------------------------------------" + print 75*"-" do_check = True diff --git a/loopy/schedule.py b/loopy/schedule.py index 9bdf9ca5c8fb6bf285a631509eea4fbd4850440d..4814f972923739aeb5d0986f5cd15c2b1b06ba50 100644 --- a/loopy/schedule.py +++ b/loopy/schedule.py @@ -174,12 +174,13 @@ def dump_schedule(schedule): return " ".join(entries) -class SchedulerDebugger: - def __init__(self, debug_length): +class ScheduleDebugger: + def __init__(self, debug_length=None, interactive=True): self.longest_rejected_schedule = [] self.success_counter = 0 self.dead_end_counter = 0 self.debug_length = debug_length + self.interactive = interactive self.elapsed_store = 0 self.start() @@ -267,8 +268,6 @@ def generate_loop_schedules_internal(kernel, loop_priority, schedule=[], allow_b # {{{ decide about debug mode debug_mode = False - #if len(schedule) == 15: - #debug_mode = True if debug is not None: if (debug.debug_length is not None @@ -277,12 +276,16 @@ def generate_loop_schedules_internal(kernel, loop_priority, schedule=[], allow_b #print dump_schedule(schedule), len(schedule) if debug_mode: + print 75*"=" + print "KERNEL:" print kernel - print "--------------------------------------------" + print 75*"=" print "CURRENT SCHEDULE:" print dump_schedule(schedule), len(schedule) - print "boost allowed:", allow_boost - print "--------------------------------------------" + print "(entry into loop: <iname>, exit from loop: </iname>, instruction names without delimiters)" + #print "boost allowed:", allow_boost + print 75*"=" + print "WHY IS THIS A DEAD-END SCHEDULE?" #if len(schedule) == 2: #from pudb import set_trace; set_trace() @@ -378,13 +381,12 @@ def generate_loop_schedules_internal(kernel, loop_priority, schedule=[], allow_b ) if debug_mode: - print "--------------------------------------------" - print "available :", ",".join(available_loops) - print "active:", ",".join(active_inames) - print "entered:", ",".join(entered_inames) - print "--------------------------------------------" + print 75*"-" + print "available inames :", ",".join(available_loops) + print "active inames :", ",".join(active_inames) + print "inames entered so far :", ",".join(entered_inames) print "reachable insns:", ",".join(reachable_insn_ids) - print "--------------------------------------------" + print 75*"-" # Don't be eager about scheduling new loops--if progress has been made, # revert to top of scheduler and see if more progress can be made another @@ -462,7 +464,8 @@ def generate_loop_schedules_internal(kernel, loop_priority, schedule=[], allow_b # }}} if debug_mode: - raw_input("Enter:") + print 75*"=" + raw_input("Hit Enter for next schedule:") if not active_inames and not available_loops and not unscheduled_insn_ids: # if done, yield result @@ -605,7 +608,7 @@ def insert_barriers(kernel, schedule, level=0): # {{{ main scheduling entrypoint -def generate_loop_schedules(kernel, loop_priority=[], debug=None): +def generate_loop_schedules(kernel, loop_priority=[], debug_args={}): from loopy.preprocess import preprocess_kernel kernel = preprocess_kernel(kernel) @@ -614,7 +617,7 @@ def generate_loop_schedules(kernel, loop_priority=[], debug=None): schedule_count = 0 - debug = SchedulerDebugger(debug) + debug = ScheduleDebugger(**debug_args) generators = [ generate_loop_schedules_internal(kernel, loop_priority, @@ -645,6 +648,28 @@ def generate_loop_schedules(kernel, loop_priority=[], debug=None): debug.done_scheduling() if not schedule_count: + if debug.interactive: + print 75*"-" + print "ERROR: Sorry--loo.py did not find a schedule for your kernel." + print 75*"-" + print "Loo.py will now show you the scheduler state at the point" + print "where the longest (dead-end) schedule was generated, in the" + print "the hope that some of this makes sense and helps you find" + print "the issue." + print + print "To disable this interactive behavior, pass" + print " debug_args=dict(interactive=False)" + print "to generate_loop_schedules()." + print 75*"-" + raw_input("Enter:") + print + print + + debug.debug_length = len(debug.longest_rejected_schedule) + for _ in generate_loop_schedules_internal(kernel, loop_priority, + debug=debug): + pass + raise RuntimeError("no valid schedules found") # }}}