diff --git a/MEMO b/MEMO index 38f88de62a33bc0ef8b83e1483780ada4bc377c6..170019de51d8514a051de9c13a5b55e00b50a709 100644 --- a/MEMO +++ b/MEMO @@ -53,12 +53,8 @@ To-do - rename IndexTag -> InameTag - Data implementation tags - TODO initial bringup: - - Adapt padding - - Adapt automatic padding of temp variables - - turn base_indices into offset - TODO further: + - turn base_indices into offset - vectorization - automatic copies - write_image() diff --git a/loopy/cse.py b/loopy/cse.py index 8d8c7477baabe2b8f65c50d33330452c1ae0e592..6d9cc491308d220faee337fe4344799b51754a33 100644 --- a/loopy/cse.py +++ b/loopy/cse.py @@ -864,7 +864,7 @@ def precompute(kernel, subst_use, sweep_inames=[], within=None, name=target_var_name, dtype=dtype, base_indices=(0,)*len(non1_storage_shape), - shape=non1_storage_shape, + shape=tuple(non1_storage_shape), is_local=None) new_temporary_variables[target_var_name] = temp_var diff --git a/loopy/kernel/array.py b/loopy/kernel/array.py index beb2b6d9f8e072489c9d2290d0af981defe32a13..831996ba210c7b6ae8104afbb9dd31029e5898aa 100644 --- a/loopy/kernel/array.py +++ b/loopy/kernel/array.py @@ -285,6 +285,7 @@ def _parse_shape_or_strides(x): x = _pymbolic_parse_if_necessary(x) if isinstance(x, lp.auto): return x + assert not isinstance(x, list) if not isinstance(x, tuple): assert x is not lp.auto x = (x,) diff --git a/loopy/kernel/data.py b/loopy/kernel/data.py index 6d8e27c4f1528c74bc85e6f1b86154346b477a3f..60cba66f0eb808e281970f250096b5c94572a376 100644 --- a/loopy/kernel/data.py +++ b/loopy/kernel/data.py @@ -262,8 +262,9 @@ class TemporaryVariable(ArrayBase): # FIXME take into account storage_shape, or something like it storage_shape = self.shape - for l in storage_shape: - temp_var_decl = ArrayOf(temp_var_decl, l) + if storage_shape: + temp_var_decl = ArrayOf(temp_var_decl, + " * ".join(str(s) for s in storage_shape)) if self.is_local: temp_var_decl = CLLocal(temp_var_decl) diff --git a/loopy/padding.py b/loopy/padding.py index 65d90a8f48c86579e0d444306e8de3e6d10dfa85..ee43f2590c915c582ab11d5891e2a23d0fdbf266 100644 --- a/loopy/padding.py +++ b/loopy/padding.py @@ -108,24 +108,34 @@ def split_arg_axis(kernel, args_and_axes, count): # }}} - # {{{ adjust strides + # {{{ adjust dim tags - new_strides = list(arg.strides) - old_stride = new_strides[axis] + if arg.dim_tags is None: + raise RuntimeError("dim_tags of '%s' are not known" % arg.name) + new_dim_tags = list(arg.dim_tags) + + old_dim_tag = arg.dim_tags[axis] + + from loopy.kernel.array import FixedStrideArrayDimTag + if not isinstance(old_dim_tag, FixedStrideArrayDimTag): + raise RuntimeError("axis %d of '%s' is not tagged fixed-stride" + % (axis, arg.name)) + + old_stride = old_dim_tag.stride outer_stride = count*old_stride if order == "F": - new_strides.insert(axis+1, outer_stride) + new_dim_tags.insert(axis+1, FixedStrideArrayDimTag(outer_stride)) elif order == "C": - new_strides.insert(axis, outer_stride) + new_dim_tags.insert(axis, FixedStrideArrayDimTag(outer_stride)) else: raise RuntimeError("order '%s' not understood" % order) - new_strides = tuple(new_strides) + new_dim_tags = tuple(new_dim_tags) # }}} - new_args[arg_idx] = arg.copy(shape=new_shape, strides=new_strides) + new_args[arg_idx] = arg.copy(shape=new_shape, dim_tags=new_dim_tags) # }}} @@ -187,9 +197,22 @@ def split_arg_axis(kernel, args_and_axes, count): def find_padding_multiple(kernel, variable, axis, align_bytes, allowed_waste=0.1): arg = kernel.arg_dict[variable] - stride = arg.strides[axis] + if arg.dim_tags is None: + raise RuntimeError("cannot find padding multiple--dim_tags of '%s' " + "are not known" % variable) + + dim_tag = arg.dim_tags[axis] + + from loopy.kernel.array import FixedStrideArrayDimTag + if not isinstance(dim_tag, FixedStrideArrayDimTag): + raise RuntimeError("cannot find padding multiple--" + "axis %d of '%s' is not tagged fixed-stride" + % (axis, variable)) + + stride = dim_tag.stride + if not isinstance(stride, int): - raise RuntimeError("cannot find padding multi--stride is not a " + raise RuntimeError("cannot find padding multiple--stride is not a " "known integer") from pytools import div_ceil @@ -212,21 +235,31 @@ def add_padding(kernel, variable, axis, align_bytes): new_args = kernel.args[:] arg = new_args[arg_idx] - new_strides = list(arg.strides) - stride = new_strides[axis] + if arg.dim_tags is None: + raise RuntimeError("cannot add padding--dim_tags of '%s' " + "are not known" % variable) + + new_dim_tags = list(arg.dim_tags) + dim_tag = new_dim_tags[axis] + + from loopy.kernel.array import FixedStrideArrayDimTag + if not isinstance(dim_tag, FixedStrideArrayDimTag): + raise RuntimeError("cannot find padding multiple--" + "axis %d of '%s' is not tagged fixed-stride" + % (axis, variable)) + + stride = dim_tag.stride if not isinstance(stride, int): raise RuntimeError("cannot find split granularity--stride is not a " "known integer") + from pytools import div_ceil - new_strides[axis] = div_ceil(stride, align_bytes) * align_bytes + new_dim_tags[axis] = FixedStrideArrayDimTag( + div_ceil(stride, align_bytes) * align_bytes) - new_args[arg_idx] = arg.copy(strides=tuple(new_strides)) + new_args[arg_idx] = arg.copy(dim_tags=tuple(new_dim_tags)) return kernel.copy(args=new_args) - - - - # vim: foldmethod=marker diff --git a/loopy/preprocess.py b/loopy/preprocess.py index 09adb1278fb45bfafd91a2603c5253a163f421ff..dc32e01454e361b96d9bb5c16d1e994d4bf7bdde 100644 --- a/loopy/preprocess.py +++ b/loopy/preprocess.py @@ -710,14 +710,24 @@ def get_auto_axis_iname_ranking_by_stride(kernel, insn): ary_name = aae.aggregate.name arg = kernel.arg_dict.get(ary_name) - ary_strides = arg.strides - if ary_strides is None and len(index_expr) == 1: - ary_strides = (1,) + if arg.dim_tags is None: + from warnings import warn + warn("Strides for '%s' are not known. Local axis assignment " + "is likely suboptimal." % arg.name) + ary_strides = [1] * len(index_expr) + else: + ary_strides = [] + from loopy.kernel.array import FixedStrideArrayDimTag + for dim_tag in arg.dim_tags: + if isinstance(dim_tag, FixedStrideArrayDimTag): + ary_strides.append(dim_tag.stride) # {{{ construct iname_to_stride_expr iname_to_stride_expr = {} for iexpr_i, stride in zip(index_expr, ary_strides): + if stride is None: + continue coeffs = CoefficientCollector()(iexpr_i) for var_name, coeff in coeffs.iteritems(): if var_name in auto_axis_inames: # excludes '1', i.e. the constant