diff --git a/loopy/compiled.py b/loopy/compiled.py index c8b733d3d5e460618807999ee6c64f6f36309d5e..c8337f8b705b226a35fb026f896d60a53f0b548a 100644 --- a/loopy/compiled.py +++ b/loopy/compiled.py @@ -475,7 +475,7 @@ def auto_test_vs_ref(ref_knl, ctx, kernel_gen, op_count=[], op_label=[], paramet print_ref_code=False, print_code=True, warmup_rounds=2, edit_code=False, dump_binary=False, codegen_kwargs={}, options=[], - fills_entire_output=True, check_result=None): + fills_entire_output=True, do_check=True, check_result=None): """Compare results of `ref_knl` to the kernels generated by the generator `kernel_gen`. @@ -505,7 +505,7 @@ def auto_test_vs_ref(ref_knl, ctx, kernel_gen, op_count=[], op_label=[], paramet fill_value_ref = -17 fill_value = fill_value_ref - # {{{ set up CL context for reference run + # {{{ find candidate devices for reference run all_devs = [] cpu_devs = [] @@ -548,6 +548,17 @@ def auto_test_vs_ref(ref_knl, ctx, kernel_gen, op_count=[], op_label=[], paramet ref_sched_kernel = knl break + try: + ref_args, arg_descriptors = \ + make_ref_args(ref_sched_kernel, ref_queue, parameters, + fill_value=fill_value_ref) + except cl.RuntimeError, e: + if e.code == cl.status_code.IMAGE_FORMAT_NOT_SUPPORTED: + continue + + if not do_check: + break + ref_compiled = CompiledKernel(ref_ctx, ref_sched_kernel, options=options, codegen_kwargs=codegen_kwargs) @@ -558,13 +569,6 @@ def auto_test_vs_ref(ref_knl, ctx, kernel_gen, op_count=[], op_label=[], paramet print get_highlighted_code(ref_compiled.code) print 75*"-" - try: - ref_args, arg_descriptors = \ - make_ref_args(ref_sched_kernel, ref_queue, parameters, - fill_value=fill_value_ref) - except cl.RuntimeError, e: - if e.code == cl.status_code.IMAGE_FORMAT_NOT_SUPPORTED: - continue ref_queue.finish() ref_start = time() @@ -586,6 +590,8 @@ def auto_test_vs_ref(ref_knl, ctx, kernel_gen, op_count=[], op_label=[], paramet # {{{ compile and run parallel code + need_check = do_check + queue = cl.CommandQueue(ctx, properties=cl.command_queue_properties.PROFILING_ENABLE) @@ -610,12 +616,10 @@ def auto_test_vs_ref(ref_knl, ctx, kernel_gen, op_count=[], op_label=[], paramet print compiled.cl_program.binaries[0] print 75*"-" - do_check = True - for i in range(warmup_rounds): evt, _ = compiled(queue, **args) - if do_check: + if need_check: for arg_desc in arg_descriptors: if arg_desc is None: continue @@ -637,7 +641,7 @@ def auto_test_vs_ref(ref_knl, ctx, kernel_gen, op_count=[], op_label=[], paramet error_is_small, error = check_result(test_ary, ref_ary) assert error_is_small, error - do_check = False + need_check = False events = [] queue.finish() @@ -687,11 +691,12 @@ def auto_test_vs_ref(ref_knl, ctx, kernel_gen, op_count=[], op_label=[], paramet print "elapsed: %g s event, %s s other-event %g s wall (%d rounds)%s" % ( elapsed, elapsed_evt_2, elapsed_wall, timing_rounds, rates) - ref_rates = "" - for cnt, lbl in zip(op_count, op_label): - ref_rates += " %g %s/s" % (cnt/ref_elapsed, lbl) - print "ref: elapsed: %g s event, %g s wall%s" % ( - ref_elapsed, ref_elapsed_wall, ref_rates) + if do_check: + ref_rates = "" + for cnt, lbl in zip(op_count, op_label): + ref_rates += " %g %s/s" % (cnt/ref_elapsed, lbl) + print "ref: elapsed: %g s event, %g s wall%s" % ( + ref_elapsed, ref_elapsed_wall, ref_rates) # }}}