diff --git a/loopy/compiled.py b/loopy/compiled.py index 164e7db09c0b28799fe4f574a4e5d4fa6b0be3f2..b22eec088fa032fd4aa2c6e21152f79cf46a9804 100644 --- a/loopy/compiled.py +++ b/loopy/compiled.py @@ -277,7 +277,7 @@ def _default_check_result(result, ref_result): def auto_test_vs_ref(ref_knl, ctx, kernel_gen, op_count, op_label, parameters, - print_ref_code=False, print_code=True, warmup_rounds=2, timing_rounds=100, + print_ref_code=False, print_code=True, warmup_rounds=2, edit_code=False, dump_binary=False, with_annotation=False, fills_entire_output=True, check_result=None): """ @@ -405,38 +405,47 @@ def auto_test_vs_ref(ref_knl, ctx, kernel_gen, op_count, op_label, parameters, events = [] queue.finish() - from time import time - start_time = time() + timing_rounds = warmup_rounds - evt_start = cl.enqueue_marker(queue) + while True: + from time import time + start_time = time() - for i in range(timing_rounds): - events.append( - compiled.cl_kernel(queue, gsize, lsize, *args, g_times_l=True)) + evt_start = cl.enqueue_marker(queue) - evt_end = cl.enqueue_marker(queue) + for i in range(timing_rounds): + events.append( + compiled.cl_kernel(queue, gsize, lsize, *args, g_times_l=True)) - queue.finish() - stop_time = time() + evt_end = cl.enqueue_marker(queue) - for evt in events: - evt.wait() - evt_start.wait() - evt_end.wait() + queue.finish() + stop_time = time() - elapsed = (1e-9*events[-1].profile.END-1e-9*events[0].profile.SUBMIT) \ - / timing_rounds - try: - elapsed_evt_2 = "%g" % \ - ((1e-9*evt_end.profile.START-1e-9*evt_start.profile.START) \ - / timing_rounds) - except cl.RuntimeError: - elapsed_evt_2 = "<unavailable>" + for evt in events: + evt.wait() + evt_start.wait() + evt_end.wait() + + elapsed = (1e-9*events[-1].profile.END-1e-9*events[0].profile.SUBMIT) \ + / timing_rounds + try: + elapsed_evt_2 = "%g" % \ + ((1e-9*evt_end.profile.START-1e-9*evt_start.profile.START) \ + / timing_rounds) + except cl.RuntimeError: + elapsed_evt_2 = "<unavailable>" - elapsed_wall = (stop_time-start_time)/timing_rounds + elapsed_wall = (stop_time-start_time)/timing_rounds + + if elapsed_wall * timing_rounds < 0.3: + timing_rounds *= 4 + else: + break - print "elapsed: %g s event, %s s other-event %g s wall, rate: %g %s/s" % ( - elapsed, elapsed_evt_2, elapsed_wall, op_count/elapsed, op_label) + print "elapsed: %g s event, %s s other-event %g s wall, rate: %g %s/s (%d rounds)" % ( + elapsed, elapsed_evt_2, elapsed_wall, op_count/elapsed, op_label, + timing_rounds) print "ref: elapsed: %g s event, %g s wall, rate: %g %s/s" % ( ref_elapsed, ref_elapsed_wall, op_count/ref_elapsed, op_label)