From 7e40fdca06d48f5c774ff3ff0e0651a72b7bd14e Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner <inform@tiker.net> Date: Sun, 19 Apr 2015 22:53:06 -0500 Subject: [PATCH] Allow auto_test to deal with variables that are both read and written --- loopy/auto_test.py | 156 +++++++++++++++++------------------------- loopy/diagnostic.py | 4 ++ loopy/kernel/array.py | 3 +- test/test_fortran.py | 17 +++-- test/test_loopy.py | 22 ++++++ 5 files changed, 98 insertions(+), 104 deletions(-) diff --git a/loopy/auto_test.py b/loopy/auto_test.py index ecd74c1a6..2a44853dd 100644 --- a/loopy/auto_test.py +++ b/loopy/auto_test.py @@ -26,11 +26,12 @@ THE SOFTWARE. """ from pytools import Record +from warnings import warn import numpy as np import loopy as lp -from loopy.diagnostic import LoopyError +from loopy.diagnostic import LoopyError, AutomaticTestFailure AUTO_TEST_SKIP_RUN = False @@ -76,7 +77,7 @@ class TestArgInfo(Record): # {{{ "reference" arguments -def make_ref_args(kernel, impl_arg_info, queue, parameters, fill_value): +def make_ref_args(kernel, impl_arg_info, queue, parameters): import pyopencl as cl import pyopencl.array as cl_array @@ -143,36 +144,36 @@ def make_ref_args(kernel, impl_arg_info, queue, parameters, fill_value): numpy_strides = [itemsize*s for s in strides] storage_array = cl_array.empty(queue, alloc_size, dtype) - ary = cl_array.as_strided(storage_array, shape, numpy_strides) - if is_output: - if arg.arg_class is ImageArg: - raise LoopyError("write-mode images not supported in " - "automatic testing") + if is_output and arg.arg_class is ImageArg: + raise LoopyError("write-mode images not supported in " + "automatic testing") - if is_dtype_supported(dtype): - storage_array.fill(fill_value) - else: - from warnings import warn - warn("Cannot pre-fill array of dtype '%s' with set " - "value--zeroing instead" % dtype) - storage_array.view(np.uint8).fill(0) + fill_rand(storage_array) - ref_args[arg.name] = ary + if arg.arg_class is ImageArg: + # must be contiguous + pre_run_ary = pre_run_storage_array = storage_array.copy() + + ref_args[arg.name] = cl.image_from_array( + queue.context, ary.get()) else: - fill_rand(storage_array) - if arg.arg_class is ImageArg: - # must be contiguous - ref_args[arg.name] = cl.image_from_array( - queue.context, ary.get()) - else: - ref_args[arg.name] = ary + pre_run_storage_array = storage_array.copy() + + ary = cl_array.as_strided(storage_array, shape, numpy_strides) + pre_run_ary = cl_array.as_strided( + pre_run_storage_array, shape, numpy_strides) + ref_args[arg.name] = ary ref_arg_data.append( TestArgInfo( name=arg.name, ref_array=ary, ref_storage_array=storage_array, + + ref_pre_run_array=pre_run_ary, + ref_pre_run_storage_array=pre_run_storage_array, + ref_shape=shape, ref_strides=strides, ref_alloc_size=alloc_size, @@ -188,8 +189,7 @@ def make_ref_args(kernel, impl_arg_info, queue, parameters, fill_value): # {{{ "full-scale" arguments -def make_args(kernel, impl_arg_info, queue, ref_arg_data, parameters, - fill_value): +def make_args(kernel, impl_arg_info, queue, ref_arg_data, parameters): import pyopencl as cl import pyopencl.array as cl_array @@ -224,7 +224,7 @@ def make_args(kernel, impl_arg_info, queue, ref_arg_data, parameters, # must be contiguous args[arg.name] = cl.image_from_array( - queue.context, arg_desc.ref_array.get()) + queue.context, arg_desc.ref_pre_run_array.get()) elif arg.arg_class is GlobalArg: shape = evaluate(arg.unvec_shape, parameters) @@ -238,50 +238,37 @@ def make_args(kernel, impl_arg_info, queue, ref_arg_data, parameters, alloc_size = sum(astrd*(alen-1) for alen, astrd in zip(shape, strides)) + 1 - if arg.base_name in kernel.get_written_variables(): - storage_array = cl_array.empty(queue, alloc_size, dtype) - ary = cl_array.as_strided(storage_array, shape, numpy_strides) + # use contiguous array to transfer to host + host_ref_contig_array = arg_desc.ref_pre_run_storage_array.get() - if is_dtype_supported(dtype): - storage_array.fill(fill_value) - else: - from warnings import warn - warn("Cannot pre-fill array of dtype '%s'" % dtype) - storage_array.view(np.uint8).fill(0) + # use device shape/strides + from pyopencl.compyte.array import as_strided + host_ref_array = as_strided(host_ref_contig_array, + arg_desc.ref_shape, arg_desc.ref_numpy_strides) - args[arg.name] = ary - else: - # use contiguous array to transfer to host - host_ref_contig_array = arg_desc.ref_storage_array.get() - - # use device shape/strides - from pyopencl.compyte.array import as_strided - host_ref_array = as_strided(host_ref_contig_array, - arg_desc.ref_shape, arg_desc.ref_numpy_strides) - - # flatten the thing - host_ref_flat_array = host_ref_array.flatten() - - # create host array with test shape (but not strides) - host_contig_array = np.empty(shape, dtype=dtype) - - common_len = min( - len(host_ref_flat_array), - len(host_contig_array.ravel())) - host_contig_array.ravel()[:common_len] = \ - host_ref_flat_array[:common_len] - - # create host array with test shape and storage layout - host_storage_array = np.empty(alloc_size, dtype) - host_array = as_strided( - host_storage_array, shape, numpy_strides) - host_array[:] = host_contig_array - - host_contig_array = arg_desc.ref_storage_array.get() - storage_array = cl_array.to_device(queue, host_storage_array) - ary = cl_array.as_strided(storage_array, shape, numpy_strides) + # flatten the thing + host_ref_flat_array = host_ref_array.flatten() + + # create host array with test shape (but not strides) + host_contig_array = np.empty(shape, dtype=dtype) + + common_len = min( + len(host_ref_flat_array), + len(host_contig_array.ravel())) + host_contig_array.ravel()[:common_len] = \ + host_ref_flat_array[:common_len] + + # create host array with test shape and storage layout + host_storage_array = np.empty(alloc_size, dtype) + host_array = as_strided( + host_storage_array, shape, numpy_strides) + host_array[:] = host_contig_array - args[arg.name] = ary + host_contig_array = arg_desc.ref_storage_array.get() + storage_array = cl_array.to_device(queue, host_storage_array) + ary = cl_array.as_strided(storage_array, shape, numpy_strides) + + args[arg.name] = ary arg_desc.test_storage_array = storage_array arg_desc.test_array = ary @@ -324,7 +311,7 @@ def _default_check_result(result, ref_result): # }}} -# {{{ ref device finder +# {{{ find device for reference test def _enumerate_cl_devices_for_ref_test(): import pyopencl as cl @@ -332,8 +319,6 @@ def _enumerate_cl_devices_for_ref_test(): noncpu_devs = [] cpu_devs = [] - from warnings import warn - for pf in cl.get_platforms(): if pf.name == "Portable Computing Language": # pocl not mature enough yet, sadly @@ -368,7 +353,7 @@ def auto_test_vs_ref( ref_knl, ctx, test_knl, op_count=[], op_label=[], parameters={}, print_ref_code=False, print_code=True, warmup_rounds=2, dump_binary=False, - fills_entire_output=True, do_check=True, check_result=None + fills_entire_output=None, do_check=True, check_result=None ): """Compare results of `ref_knl` to the kernels generated by scheduling *test_knl*. @@ -396,36 +381,19 @@ def auto_test_vs_ref( from loopy.compiled import CompiledKernel, get_highlighted_cl_code if isinstance(op_count, (int, float)): - from warnings import warn warn("op_count should be a list", stacklevel=2) op_count = [op_count] if isinstance(op_label, str): - from warnings import warn warn("op_label should be a list", stacklevel=2) op_label = [op_label] - read_and_written_args = ( - ref_knl.get_read_variables() - & ref_knl.get_written_variables() - & set(ref_knl.arg_dict)) - - if read_and_written_args: - # FIXME: In principle, that's possible to test - raise LoopyError("kernel reads *and* writes argument(s) '%s' " - "and therefore cannot be automatically tested" - % ", ".join(read_and_written_args)) - from time import time if check_result is None: check_result = _default_check_result - if fills_entire_output: - fill_value_ref = -17 - fill_value = -18 - else: - fill_value_ref = -17 - fill_value = fill_value_ref + if fills_entire_output is not None: + warn("fills_entire_output is deprecated", DeprecationWarning, stacklevel=2) # {{{ compile and run reference code @@ -460,8 +428,7 @@ def auto_test_vs_ref( try: ref_args, ref_arg_data = \ make_ref_args(ref_sched_kernel, ref_cl_kernel_info.impl_arg_info, - ref_queue, parameters, - fill_value=fill_value_ref) + ref_queue, parameters) ref_args["out_host"] = False except cl.RuntimeError as e: if e.code == cl.status_code.IMAGE_FORMAT_NOT_SUPPORTED: @@ -523,7 +490,6 @@ def auto_test_vs_ref( args = None from loopy.kernel import LoopKernel if not isinstance(test_knl, LoopKernel): - from warnings import warn warn("Passing an iterable of kernels to auto_test_vs_ref " "is deprecated--just pass the kernel instead. " "Scheduling will be performed in auto_test_vs_ref.", @@ -552,7 +518,7 @@ def auto_test_vs_ref( cl_kernel_info = compiled.cl_kernel_info(frozenset()) args = make_args(kernel, cl_kernel_info.impl_arg_info, - queue, ref_arg_data, parameters, fill_value=fill_value) + queue, ref_arg_data, parameters) args["out_host"] = False print(75*"-") @@ -593,7 +559,9 @@ def auto_test_vs_ref( test_ary = test_ary[:common_len] error_is_small, error = check_result(test_ary, ref_ary) - assert error_is_small, error + if not error_is_small: + raise AutomaticTestFailure(error) + need_check = False events = [] diff --git a/loopy/diagnostic.py b/loopy/diagnostic.py index dc0d0d453..56d7f6706 100644 --- a/loopy/diagnostic.py +++ b/loopy/diagnostic.py @@ -74,6 +74,10 @@ class TypeInferenceFailure(LoopyError): pass +class AutomaticTestFailure(LoopyError): + pass + + class DependencyTypeInferenceFailure(TypeInferenceFailure): def __init__(self, message, symbol): TypeInferenceFailure.__init__(self, message) diff --git a/loopy/kernel/array.py b/loopy/kernel/array.py index 233d1a0f7..3f5340d64 100644 --- a/loopy/kernel/array.py +++ b/loopy/kernel/array.py @@ -345,7 +345,8 @@ def parse_array_dim_tags(dim_tags, use_increasing_target_axes=False): ta_nesting_level_increment = -min(ta_nesting_levels) for i in range(len(result)): if (isinstance(result[i], _StrideArrayDimTagBase) - and result[i].target_axis == target_axis): + and result[i].target_axis == target_axis + and result[i].layout_nesting_level is not None): result[i] = result[i].copy( layout_nesting_level=result[i].layout_nesting_level + ta_nesting_level_increment) diff --git a/test/test_fortran.py b/test/test_fortran.py index 8582cb117..4e8de305a 100644 --- a/test/test_fortran.py +++ b/test/test_fortran.py @@ -275,14 +275,13 @@ def test_tagged(ctx_factory): @pytest.mark.parametrize("buffer_inames", [ "", - "i_inner", "i_inner,j_inner", ]) def test_matmul(ctx_factory, buffer_inames): fortran_src = """ subroutine dgemm(m,n,l,a,b,c) implicit none - real*8 temp, a(m,l),b(l,n),c(m,n) + real*8 a(m,l),b(l,n),c(m,n) integer m,n,k,i,j,l do j = 1,n @@ -319,14 +318,14 @@ def test_matmul(ctx_factory, buffer_inames): knl = lp.buffer_array(knl, "c", buffer_inames=buffer_inames, init_expression="0", store_expression="base+buffer") - #ctx = ctx_factory() - #lp.auto_test_vs_ref(ref_knl, ctx, knl, parameters=dict(n=5, m=7, l=10)) + ctx = ctx_factory() + lp.auto_test_vs_ref(ref_knl, ctx, knl, parameters=dict(n=128, m=128, l=128)) - # FIXME: Make r/w tests possible, reactivate the above - knl = lp.preprocess_kernel(knl) - for k in lp.generate_loop_schedules(knl): - code, _ = lp.generate_code(k) - print(code) + # # FIXME: Make r/w tests possible, reactivate the above + # knl = lp.preprocess_kernel(knl) + # for k in lp.generate_loop_schedules(knl): + # code, _ = lp.generate_code(k) + # print(code) @pytest.mark.xfail diff --git a/test/test_loopy.py b/test/test_loopy.py index e713dd7d4..f99c0b3b3 100644 --- a/test/test_loopy.py +++ b/test/test_loopy.py @@ -1936,6 +1936,28 @@ def test_poisson(ctx_factory): parameters=dict(n=5, nels=15, nbf=5, sdim=2, nqp=7)) +def test_auto_test_can_detect_problems(ctx_factory): + ctx = ctx_factory() + + knl = lp.make_kernel( + "{[i,j]: 0<=i,j<n}", + """ + a[i,j] = 25 + """) + + knl = lp.add_and_infer_dtypes(knl, dict(a=np.float32)) + + ref_knl = knl + + knl = lp.link_inames(knl, "i,j", "i0") + + from loopy.diagnostic import AutomaticTestFailure + with pytest.raises(AutomaticTestFailure): + lp.auto_test_vs_ref( + ref_knl, ctx, knl, + parameters=dict(n=123)) + + if __name__ == "__main__": if len(sys.argv) > 1: exec(sys.argv[1]) -- GitLab