From 0a77e4b647d944de89123751b40a0a2e906614b0 Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Mon, 15 Jul 2013 17:53:01 -0400 Subject: [PATCH] Fix, test vector dim tags --- loopy/auto_test.py | 48 ++++++++++++----------- loopy/codegen/__init__.py | 14 ++++++- loopy/compiled.py | 13 ++++--- loopy/kernel/array.py | 81 +++++++++++++++++++++++++++++---------- test/test_loopy.py | 35 +++++++++++++++++ 5 files changed, 141 insertions(+), 50 deletions(-) diff --git a/loopy/auto_test.py b/loopy/auto_test.py index 49f8c582e..2a4a074e6 100644 --- a/loopy/auto_test.py +++ b/loopy/auto_test.py @@ -61,6 +61,8 @@ def make_ref_args(kernel, impl_arg_info, queue, parameters, fill_value): ref_arg_data = [] for arg in impl_arg_info: + kernel_arg = kernel.impl_arg_to_arg.get(arg.name) + if arg.arg_class is ValueArg: if arg.offset_for_name: continue @@ -84,25 +86,25 @@ def make_ref_args(kernel, impl_arg_info, queue, parameters, fill_value): raise ValueError("arrays need known shape to use automatic " "testing") - shape = evaluate(arg.shape, parameters) + shape = evaluate(arg.unvec_shape, parameters) + dtype = kernel_arg.dtype is_output = arg.base_name in kernel.get_written_variables() if arg.arg_class is ImageArg: storage_array = ary = cl_array.empty( - queue, shape, arg.dtype, order="C") + queue, shape, dtype, order="C") numpy_strides = None alloc_size = None strides = None else: - strides = evaluate(arg.strides, parameters) + strides = evaluate(arg.unvec_strides, parameters) from pytools import all assert all(s > 0 for s in strides) alloc_size = sum(astrd*(alen-1) for alen, astrd in zip(shape, strides)) + 1 - dtype = arg.dtype if dtype is None: raise RuntimeError("dtype for argument '%s' is not yet " "known. Perhaps you want to use " @@ -125,7 +127,9 @@ def make_ref_args(kernel, impl_arg_info, queue, parameters, fill_value): storage_array.fill(fill_value) else: from warnings import warn - warn("Cannot pre-fill array of dtype '%s'" % dtype) + warn("Cannot pre-fill array of dtype '%s' with set " + "value--zeroing instead" % dtype) + storage_array.view(np.uint8).fill(0) ref_args[arg.name] = ary else: @@ -165,6 +169,8 @@ def make_args(kernel, impl_arg_info, queue, ref_arg_data, parameters, args = {} for arg, arg_desc in zip(impl_arg_info, ref_arg_data): + kernel_arg = kernel.impl_arg_to_arg.get(arg.name) + if arg.arg_class is ValueArg: arg_value = parameters[arg.name] @@ -183,7 +189,7 @@ def make_args(kernel, impl_arg_info, queue, ref_arg_data, parameters, raise NotImplementedError("write-mode images not supported in " "automatic testing") - shape = evaluate(arg.shape, parameters) + shape = evaluate(arg.unvec_shape, parameters) assert shape == arg_desc.ref_shape # must be contiguous @@ -191,10 +197,11 @@ def make_args(kernel, impl_arg_info, queue, ref_arg_data, parameters, queue.context, arg_desc.ref_array.get()) elif arg.arg_class is GlobalArg: - shape = evaluate(arg.shape, parameters) - strides = evaluate(arg.strides, parameters) + shape = evaluate(arg.unvec_shape, parameters) + strides = evaluate(arg.unvec_strides, parameters) - itemsize = arg.dtype.itemsize + dtype = kernel_arg.dtype + itemsize = dtype.itemsize numpy_strides = [itemsize*s for s in strides] assert all(s > 0 for s in strides) @@ -202,14 +209,15 @@ def make_args(kernel, impl_arg_info, queue, ref_arg_data, parameters, for alen, astrd in zip(shape, strides)) + 1 if arg.base_name in kernel.get_written_variables(): - storage_array = cl_array.empty(queue, alloc_size, arg.dtype) + storage_array = cl_array.empty(queue, alloc_size, dtype) ary = cl_array.as_strided(storage_array, shape, numpy_strides) - if arg.dtype.isbuiltin: + if dtype.isbuiltin: storage_array.fill(fill_value) else: from warnings import warn - warn("Cannot pre-fill array of dtype '%s'" % arg.dtype) + warn("Cannot pre-fill array of dtype '%s'" % dtype) + storage_array.view(np.uint8).fill(0) args[arg.name] = ary else: @@ -225,7 +233,7 @@ def make_args(kernel, impl_arg_info, queue, ref_arg_data, parameters, host_ref_flat_array = host_ref_array.flatten() # create host array with test shape (but not strides) - host_contig_array = np.empty(shape, dtype=arg.dtype) + host_contig_array = np.empty(shape, dtype=dtype) common_len = min( len(host_ref_flat_array), @@ -234,7 +242,7 @@ def make_args(kernel, impl_arg_info, queue, ref_arg_data, parameters, host_ref_flat_array[:common_len] # create host array with test shape and storage layout - host_storage_array = np.empty(alloc_size, arg.dtype) + host_storage_array = np.empty(alloc_size, dtype) host_array = as_strided( host_storage_array, shape, numpy_strides) host_array[:] = host_contig_array @@ -265,6 +273,9 @@ def make_args(kernel, impl_arg_info, queue, ref_arg_data, parameters, # {{{ default array comparison def _default_check_result(result, ref_result): + if not result.dtype.isbuiltin and not (result == ref_result).all(): + return (False, "results do not match exactly") + if not np.allclose(ref_result, result, rtol=1e-3, atol=1e-3): l2_err = ( np.sum(np.abs(ref_result-result)**2) @@ -275,7 +286,7 @@ def _default_check_result(result, ref_result): / np.max(np.abs(ref_result-result))) return (False, - "results do not match(rel) l_2 err: %g, l_inf err: %g" + "results do not match -- (rel) l_2 err: %g, l_inf err: %g" % (l2_err, linf_err)) else: return True, None @@ -292,13 +303,6 @@ def _enumerate_cl_devices_for_ref_test(): from warnings import warn for pf in cl.get_platforms(): - if pf.name == "Portable OpenCL": - # That implementation [1] isn't quite good enough yet. - # [1] https://launchpad.net/pocl - # FIXME remove when no longer true. - warn("Skipping 'Portable OpenCL' for lack of maturity.") - continue - for dev in pf.get_devices(): if dev.type & cl.device_type.CPU: cpu_devs.append(dev) diff --git a/loopy/codegen/__init__.py b/loopy/codegen/__init__.py index e326f5f3c..ba6eb129b 100644 --- a/loopy/codegen/__init__.py +++ b/loopy/codegen/__init__.py @@ -230,6 +230,14 @@ class ImplementedDataInfo(Record): Strides in multiples of ``dtype.itemsize``. + .. attribute:: unvec_shape + .. attribute:: unvec_strides + + Strides in multiples of ``dtype.itemsize`` that accounts for + :class:`loopy.kernel.array.VectorArrayDimTag` in a scalar + manner + + .. attribute:: offset_for_name .. attribute:: stride_for_name_and_axis @@ -241,7 +249,9 @@ class ImplementedDataInfo(Record): """ def __init__(self, name, dtype, cgen_declarator, arg_class, - base_name=None, shape=None, strides=None, + base_name=None, + shape=None, strides=None, + unvec_shape=None, unvec_strides=None, offset_for_name=None, stride_for_name_and_axis=None, allows_offset=None): Record.__init__(self, @@ -252,6 +262,8 @@ class ImplementedDataInfo(Record): base_name=base_name, shape=shape, strides=strides, + unvec_shape=unvec_shape, + unvec_strides=unvec_strides, offset_for_name=offset_for_name, stride_for_name_and_axis=stride_for_name_and_axis, allows_offset=allows_offset) diff --git a/loopy/compiled.py b/loopy/compiled.py index 7156f3c1a..e64bc0f5a 100644 --- a/loopy/compiled.py +++ b/loopy/compiled.py @@ -402,12 +402,12 @@ def generate_array_arg_setup(gen, kernel, impl_arg_info, flags): with Indentation(gen): num_axes = len(arg.strides) for i in xrange(num_axes): - gen("_lpy_shape_%d = %s" % (i, strify(arg.shape[i]))) + gen("_lpy_shape_%d = %s" % (i, strify(arg.unvec_shape[i]))) itemsize = kernel_arg.dtype.itemsize for i in xrange(num_axes): gen("_lpy_strides_%d = %s" % (i, strify( - itemsize*arg.strides[i]))) + itemsize*arg.unvec_strides[i]))) if not flags.skip_checks: for i in xrange(num_axes): @@ -459,7 +459,7 @@ def generate_array_arg_setup(gen, kernel, impl_arg_info, flags): with Indentation(gen): gen("if %s.dtype != %s:" - % (arg.name, python_dtype_str(arg.dtype))) + % (arg.name, python_dtype_str(kernel_arg.dtype))) with Indentation(gen): gen("raise TypeError(\"dtype mismatch on argument '%s' " "(got: %%s, expected: %s)\" %% %s.dtype)" @@ -467,16 +467,17 @@ def generate_array_arg_setup(gen, kernel, impl_arg_info, flags): if arg.shape is not None: gen("if %s.shape != %s:" - % (arg.name, strify(arg.shape))) + % (arg.name, strify(arg.unvec_shape))) with Indentation(gen): gen("raise TypeError(\"shape mismatch on argument '%s' " "(got: %%s, expected: %%s)\" " "%% (%s.shape, %s))" - % (arg.name, arg.name, strify(arg.shape))) + % (arg.name, arg.name, strify(arg.unvec_shape))) if arg.strides is not None: itemsize = kernel_arg.dtype.itemsize - sym_strides = tuple(itemsize*s_i for s_i in arg.strides) + sym_strides = tuple( + itemsize*s_i for s_i in arg.unvec_strides) gen("if %s.strides != %s:" % (arg.name, strify(sym_strides))) with Indentation(gen): diff --git a/loopy/kernel/array.py b/loopy/kernel/array.py index 92afb4494..5771fd87e 100644 --- a/loopy/kernel/array.py +++ b/loopy/kernel/array.py @@ -277,7 +277,22 @@ def convert_computed_to_fixed_dim_tags(name, num_user_axes, num_target_axes, raise ValueError(error_msg) - stride_so_far = 1 + if vector_dim is None: + stride_so_far = 1 + else: + if shape is None or shape is lp.auto: + # unable to normalize without known shape + return None + + if not isinstance(shape[i], int): + raise TypeError("shape along vector axis %d of array '%s' " + "must be an integer, not an expression" + % (i, name)) + + stride_so_far = shape[i] + # FIXME: OpenCL-specific + if stride_so_far == 3: + stride_so_far = 4 if fixed_stride_dim_tags[target_axis]: for i in fixed_stride_dim_tags[target_axis]: @@ -668,12 +683,19 @@ class ArrayBase(Record): from loopy.codegen import ImplementedDataInfo from loopy.kernel.data import ValueArg - vector_size = self.vector_size() - - def gen_decls(name_suffix, shape, strides, stride_arg_axes, + def gen_decls(name_suffix, + shape, strides, + unvec_shape, unvec_strides, + stride_arg_axes, dtype, user_index): """ - :arg stride_arg_axes: a tuple *(user_axis, impl_axis)* + :arg unvec_shape: shape tuple + that accounts for :class:`loopy.kernel.array.VectorArrayDimTag` + in a scalar manner + :arg unvec_strides: strides tuple + that accounts for :class:`loopy.kernel.array.VectorArrayDimTag` + in a scalar manner + :arg stride_arg_axes: a tuple *(user_axis, impl_axis, unvec_impl_axis)* :arg user_index: A tuple representing a (user-facing) multi-dimensional subscript. This is filled in with concrete integers when known (such as for separate-array @@ -695,14 +717,18 @@ class ArrayBase(Record): stride_args = [] strides = list(strides) + unvec_strides = list(unvec_strides) # generate stride arguments, yielded later to keep array first - for stride_user_axis, stride_impl_axis in stride_arg_axes: + for stride_user_axis, stride_impl_axis, stride_unvec_impl_axis \ + in stride_arg_axes: from cgen import Const, POD stride_name = full_name+"_stride%d" % stride_user_axis from pymbolic import var - strides[stride_impl_axis] = var(stride_name) + strides[stride_impl_axis] = \ + unvec_strides[stride_unvec_impl_axis] = \ + var(stride_name) stride_args.append( ImplementedDataInfo( @@ -725,6 +751,8 @@ class ArrayBase(Record): dtype=dtype, shape=shape, strides=tuple(strides), + unvec_shape=unvec_shape, + unvec_strides=tuple(unvec_strides), allows_offset=bool(self.offset), ) @@ -749,23 +777,26 @@ class ArrayBase(Record): if isinstance(dim_tag, FixedStrideArrayDimTag): if self.shape is None: - new_shape = shape + (None,) + new_shape_axis = None else: - new_shape = shape + (self.shape[user_axis],) + new_shape_axis = self.shape[user_axis] import loopy as lp if dim_tag.stride is lp.auto: new_stride_arg_axes = stride_arg_axes \ - + ((user_axis, len(strides)),) + + ((user_axis, len(strides), len(unvec_strides)),) - # fixed above when final array name is known - new_strides = strides + (None,) + # repaired above when final array name is known + # (and stride argument is created) + new_stride_axis = None else: new_stride_arg_axes = stride_arg_axes - new_strides = strides + (dim_tag.stride // vector_size,) + new_stride_axis = dim_tag.stride - for res in gen_decls(name_suffix, new_shape, - new_strides, + for res in gen_decls(name_suffix, + shape + (new_shape_axis,), strides + (new_stride_axis,), + unvec_shape + (new_shape_axis,), + unvec_strides + (new_stride_axis,), new_stride_arg_axes, dtype, user_index + (None,)): yield res @@ -779,7 +810,8 @@ class ArrayBase(Record): for i in xrange(shape_i): for res in gen_decls(name_suffix + "_s%d" % i, - shape, strides, stride_arg_axes, dtype, + shape, strides, unvec_shape, unvec_strides, + stride_arg_axes, dtype, user_index + (i,)): yield res @@ -790,7 +822,11 @@ class ArrayBase(Record): "integer axis %d (0-based)" % ( self.name, user_axis)) - for res in gen_decls(name_suffix, shape, strides, + for res in gen_decls(name_suffix, + shape, strides, + unvec_shape + (shape_i,), + # vectors always have stride 1 + unvec_strides + (1,), stride_arg_axes, cl.array.vec.types[dtype, shape_i], user_index + (None,)): @@ -800,7 +836,9 @@ class ArrayBase(Record): raise RuntimeError("unsupported array dim implementation tag '%s' " "in array '%s'" % (dim_tag, self.name)) - for res in gen_decls(name_suffix="", shape=(), strides=(), + for res in gen_decls(name_suffix="", + shape=(), strides=(), + unvec_shape=(), unvec_strides=(), stride_arg_axes=(), dtype=self.dtype, user_index=()): yield res @@ -892,9 +930,10 @@ def get_access_info(ary, index, eval_expr): if isinstance(stride, int): if not dim_tag.stride % vector_size == 0: - raise RuntimeError("stride of axis %d of array '%s' " - "is not a multiple of the vector axis" - % (i, ary.name)) + raise RuntimeError("array '%s' has axis %d stride of " + "%d, which is not divisible by the size of the " + "vector (%d)" + % (ary.name, i, dim_tag.stride, vector_size)) elif stride is lp.auto: from pymbolic import var diff --git a/test/test_loopy.py b/test/test_loopy.py index 0fe765391..8c4ecb2dc 100644 --- a/test/test_loopy.py +++ b/test/test_loopy.py @@ -33,6 +33,13 @@ import pytest import logging logger = logging.getLogger(__name__) +try: + import faulthandler +except ImportError: + pass +else: + faulthandler.enable() + from pyopencl.tools import pytest_generate_tests_for_pyopencl \ as pytest_generate_tests @@ -1349,6 +1356,34 @@ def test_rob_stroud_bernstein(ctx_factory): )) +@pytest.mark.parametrize("vec_len", [2, 3, 4, 8]) +def test_vector_types(ctx_factory, vec_len): + ctx = cl.create_some_context() + + knl = lp.make_kernel(ctx.devices[0], + "{ [i,j]: 0<=i 1: exec(sys.argv[1]) -- GitLab