diff --git a/doc/reference.rst b/doc/reference.rst index cbd96f67ea4edc9827325a64e33fca3f88fd0fa2..5499f92557efdd2a407685f940f4de2cd9182092 100644 --- a/doc/reference.rst +++ b/doc/reference.rst @@ -480,10 +480,12 @@ Influencing data access .. autofunction:: remove_unused_arguments +.. autofunction:: set_array_dim_names + Padding ^^^^^^^ -.. autofunction:: split_arg_axis +.. autofunction:: split_array_dim .. autofunction:: find_padding_multiple diff --git a/loopy/__init__.py b/loopy/__init__.py index c8b9d570a3ddbfc57b8c6fc7c15ace0f14c248c2..b50a4ecf1b610f303dbfc5569822f2aa8ae3a2a7 100644 --- a/loopy/__init__.py +++ b/loopy/__init__.py @@ -58,7 +58,7 @@ from loopy.subst import extract_subst, expand_subst, assignment_to_subst from loopy.precompute import precompute from loopy.buffer import buffer_array from loopy.fusion import fuse_kernels -from loopy.padding import (split_arg_axis, find_padding_multiple, +from loopy.padding import (split_array_dim, split_arg_axis, find_padding_multiple, add_padding) from loopy.preprocess import (preprocess_kernel, realize_reduction, infer_unknown_types) @@ -94,7 +94,8 @@ __all__ = [ "extract_subst", "expand_subst", "assignment_to_subst", "precompute", "buffer_array", "fuse_kernels", - "split_arg_axis", "find_padding_multiple", "add_padding", + "split_array_dim", "split_arg_axis", "find_padding_multiple", + "add_padding", "get_dot_dependency_graph", "show_dependency_graph", @@ -146,6 +147,7 @@ __all__ = [ "find_rules_matching", "find_one_rule_matching", "realize_ilp", + "set_array_dim_names", # }}} ] @@ -1121,6 +1123,8 @@ def add_prefetch(kernel, var_name, sweep_inames=[], dim_arg_names=None, parameters = [] for i in range(arg.num_user_axes()): based_on = "%s_dim_%d" % (c_name, i) + if arg.dim_names is not None: + based_on = "%s_dim_%s" % (c_name, arg.dim_names[i]) if dim_arg_names is not None and i < len(dim_arg_names): based_on = dim_arg_names[i] @@ -1303,14 +1307,14 @@ def change_arg_to_image(knl, name): # {{{ tag data axes def tag_data_axes(knl, ary_names, dim_tags): - for ary_name in ary_names.split(","): - ary_name = ary_name.strip() - if ary_name in knl.temporary_variables: - ary = knl.temporary_variables[ary_name] - elif ary_name in knl.arg_dict: - ary = knl.arg_dict[ary_name] - else: - raise NameError("array '%s' was not found" % ary_name) + from loopy.kernel.tools import ArrayChanger + + if isinstance(ary_names, str): + ary_names = ary_names.split(",") + + for ary_name in ary_names: + achng = ArrayChanger(knl, ary_name) + ary = achng.get() from loopy.kernel.array import parse_array_dim_tags new_dim_tags = parse_array_dim_tags(dim_tags, @@ -1319,23 +1323,7 @@ def tag_data_axes(knl, ary_names, dim_tags): ary = ary.copy(dim_tags=tuple(new_dim_tags)) - if ary_name in knl.temporary_variables: - new_tv = knl.temporary_variables.copy() - new_tv[ary_name] = ary - knl = knl.copy(temporary_variables=new_tv) - - elif ary_name in knl.arg_dict: - new_args = [] - for arg in knl.args: - if arg.name == ary_name: - new_args.append(ary) - else: - new_args.append(arg) - - knl = knl.copy(args=new_args) - - else: - raise NameError("array '%s' was not found" % ary_name) + knl = achng.with_changed_array(ary) return knl @@ -2198,4 +2186,27 @@ def realize_ilp(kernel, iname): # }}} +# {{{ set_array_dim_names + +def set_array_dim_names(kernel, ary_names, dim_names): + from loopy.kernel.tools import ArrayChanger + if isinstance(ary_names, str): + ary_names = ary_names.split(",") + + if isinstance(dim_names, str): + dim_names = tuple(dim_names.split(",")) + + for ary_name in ary_names: + achng = ArrayChanger(kernel, ary_name) + ary = achng.get() + + ary = ary.copy(dim_names=dim_names) + + kernel = achng.with_changed_array(ary) + + return kernel + +# }}} + + # vim: foldmethod=marker diff --git a/loopy/buffer.py b/loopy/buffer.py index 5677421de24ad2319ffcc7abf712f32c5b38132b..d91ec1a9ab912f1e3300c450bc4accda6cbf6f8d 100644 --- a/loopy/buffer.py +++ b/loopy/buffer.py @@ -230,8 +230,12 @@ def buffer_array(kernel, var_name, buffer_inames, init_expression=None, new_iname_to_tag = {} for i in range(len(var_shape)): - init_iname = var_name_gen("%s_init_%d" % (var_name, i)) - store_iname = var_name_gen("%s_store_%d" % (var_name, i)) + dim_name = str(i) + if isinstance(var_descr, ArrayBase) and var_descr.dim_names is not None: + dim_name = var_descr.dim_names[i] + + init_iname = var_name_gen("%s_init_%s" % (var_name, dim_name)) + store_iname = var_name_gen("%s_store_%s" % (var_name, dim_name)) new_iname_to_tag[init_iname] = default_tag new_iname_to_tag[store_iname] = default_tag diff --git a/loopy/kernel/array.py b/loopy/kernel/array.py index 29155904938261e3c9386c7dbd3274b4876049cd..6deb2a348b606d7d717b290e900d7bda6169fb4e 100644 --- a/loopy/kernel/array.py +++ b/loopy/kernel/array.py @@ -534,6 +534,15 @@ class ArrayBase(Record): .. attribute:: offset + .. attribute:: dim_names + + A tuple of strings providing names for the array axes, or *None*. + If given, must have the same number of entries as :attr:`dim_tags` + and :attr:`dim_tags`. These do not live in any particular namespace + (i.e. collide with no other names) and serve a purely + informational/documentational purpose. On occasion, they are used + to generate more informative names than could be achieved by + axis numbers. """ # Note that order may also wind up in attributes, if the @@ -542,7 +551,7 @@ class ArrayBase(Record): allowed_extra_kwargs = [] def __init__(self, name, dtype=None, shape=None, dim_tags=None, offset=0, - strides=None, order=None, **kwargs): + dim_names=None, strides=None, order=None, **kwargs): """ All of the following are optional. Specify either strides or shape. @@ -646,6 +655,19 @@ class ArrayBase(Record): if shape_known: shape = _parse_shape_or_strides(shape) + # {{{ check dim_names + + if dim_names is not None: + if len(dim_names) != len(set(dim_names)): + raise LoopyError("dim_names are not unique") + + for n in dim_names: + if not isinstance(n, str): + raise LoopyError("found non-string '%s' in dim_names" + % type(n).__name__) + + # }}} + # {{{ convert strides to dim_tags (Note: strides override order) if dim_tags is not None and strides_known: @@ -667,18 +689,20 @@ class ArrayBase(Record): num_user_axes = None if shape_known: num_user_axes = len(shape) - if dim_tags is not None: - new_num_user_axes = len(dim_tags) + for dim_iterable in [dim_tags, dim_names]: + if dim_iterable is not None: + new_num_user_axes = len(dim_iterable) - if num_user_axes is None: - num_user_axes = new_num_user_axes - else: - if new_num_user_axes != num_user_axes: - raise LoopyError("contradictory values for number of dimensions " - "of array '%s' from shape, strides, or dim_tags" - % name) + if num_user_axes is None: + num_user_axes = new_num_user_axes + else: + if new_num_user_axes != num_user_axes: + raise LoopyError("contradictory values for number of " + "dimensions of array '%s' from shape, strides, " + "dim_tags, or dim_names" + % name) - del new_num_user_axes + del new_num_user_axes # }}} @@ -746,6 +770,7 @@ class ArrayBase(Record): shape=shape, dim_tags=dim_tags, offset=offset, + dim_names=dim_names, order=order, **kwargs) @@ -760,6 +785,7 @@ class ArrayBase(Record): and istoee(self.shape, other.shape) and self.dim_tags == other.dim_tags and isee(self.offset, other.offset) + and self.dim_names == other.dim_names and self.order == other.order ) @@ -804,8 +830,15 @@ class ArrayBase(Record): elif self.shape is lp.auto: info_entries.append("shape: auto") else: - info_entries.append("shape: (%s)" - % ", ".join(str(i) for i in self.shape)) + # shape is iterable + if self.dim_names is not None: + info_entries.append("shape: (%s)" + % ", ".join( + "%s:%s" % (n, i) + for n, i in zip(self.dim_names, self.shape))) + else: + info_entries.append("shape: (%s)" + % ", ".join(str(i) for i in self.shape)) if self.dim_tags is not None and self.dim_tags != (): info_entries.append("dim_tags: (%s)" @@ -833,6 +866,7 @@ class ArrayBase(Record): key_builder.update_for_pymbolic_expression(key_hash, self.shape) key_builder.rec(key_hash, self.dim_tags) key_builder.rec(key_hash, self.offset) + key_builder.rec(key_hash, self.dim_names) @property @memoize_method diff --git a/loopy/kernel/data.py b/loopy/kernel/data.py index 647eb5e36c2d05748c4882cb8b3e3d3d84d981ae..ed3c40ce9c9fea5e08a7a97027e4ec7ef8ae59d8 100644 --- a/loopy/kernel/data.py +++ b/loopy/kernel/data.py @@ -333,7 +333,7 @@ class TemporaryVariable(ArrayBase): ] def __init__(self, name, dtype=None, shape=(), is_local=auto, - dim_tags=None, offset=0, strides=None, order=None, + dim_tags=None, offset=0, dim_names=None, strides=None, order=None, base_indices=None, storage_shape=None, base_storage=None): """ @@ -351,7 +351,8 @@ class TemporaryVariable(ArrayBase): ArrayBase.__init__(self, name=intern(name), dtype=dtype, shape=shape, - dim_tags=dim_tags, order="C", + dim_tags=dim_tags, offset=offset, dim_names=dim_names, + order="C", base_indices=base_indices, is_local=is_local, storage_shape=storage_shape, base_storage=base_storage) diff --git a/loopy/kernel/tools.py b/loopy/kernel/tools.py index dd93ab2f368f0f4ac560f3ac73e336a8448af487..4e88192715c58f0bd9c94ba5230a46754eef401a 100644 --- a/loopy/kernel/tools.py +++ b/loopy/kernel/tools.py @@ -893,4 +893,45 @@ def assign_automatic_axes(kernel, axis=0, local_size=None): # }}} +# {{{ array modifier + +class ArrayChanger(object): + def __init__(self, kernel, array_name): + self.kernel = kernel + self.array_name = array_name + + def get(self): + ary_name = self.array_name + if ary_name in self.kernel.temporary_variables: + return self.kernel.temporary_variables[ary_name] + elif ary_name in self.kernel.arg_dict: + return self.kernel.arg_dict[ary_name] + else: + raise NameError("array '%s' was not found" % ary_name) + + def with_changed_array(self, new_array): + knl = self.kernel + ary_name = self.array_name + + if ary_name in knl.temporary_variables: + new_tv = knl.temporary_variables.copy() + new_tv[ary_name] = new_array + return knl.copy(temporary_variables=new_tv) + + elif ary_name in knl.arg_dict: + new_args = [] + for arg in knl.args: + if arg.name == ary_name: + new_args.append(new_array) + else: + new_args.append(arg) + + return knl.copy(args=new_args) + + else: + raise NameError("array '%s' was not found" % ary_name) + +# }}} + + # vim: foldmethod=marker diff --git a/loopy/padding.py b/loopy/padding.py index 9aed22598517cb423d978e43f41e98dc8ab43cfd..021c5bfab31584a19cc7da87964f9f8cdc0e08ec 100644 --- a/loopy/padding.py +++ b/loopy/padding.py @@ -25,12 +25,13 @@ THE SOFTWARE. """ +from pytools import MovedFunctionDeprecationWrapper from loopy.symbolic import RuleAwareIdentityMapper, SubstitutionRuleMappingContext -class ArgAxisSplitHelper(RuleAwareIdentityMapper): +class ArrayAxisSplitHelper(RuleAwareIdentityMapper): def __init__(self, rule_mapping_context, arg_names, handler): - super(ArgAxisSplitHelper, self).__init__(rule_mapping_context) + super(ArrayAxisSplitHelper, self).__init__(rule_mapping_context) self.arg_names = arg_names self.handler = handler @@ -38,18 +39,20 @@ class ArgAxisSplitHelper(RuleAwareIdentityMapper): if expr.aggregate.name in self.arg_names: return self.handler(expr) else: - return super(ArgAxisSplitHelper, self).map_subscript(expr, expn_state) + return super(ArrayAxisSplitHelper, self).map_subscript(expr, expn_state) -def split_arg_axis(kernel, args_and_axes, count, auto_split_inames=True, +def split_array_dim(kernel, arrays_and_axes, count, auto_split_inames=True, split_kwargs=None): """ - :arg args_and_axes: a list of tuples *(arg, axis_nr)* indicating + :arg arrays_and_axes: a list of tuples *(array, axis_nr)* indicating that the index in *axis_nr* should be split. The tuples may - also be *(arg, axis_nr, "F")*, indicating that the index will + also be *(array, axis_nr, "F")*, indicating that the index will be split as it would be according to Fortran order. - If *args_and_axes* is a :class:`tuple`, it is automatically + *array* may name a temporary variable or an argument. + + If *arrays_and_axes* is a :class:`tuple`, it is automatically wrapped in a list, to make single splits easier. :arg count: The group size to use in the split. @@ -68,7 +71,7 @@ def split_arg_axis(kernel, args_and_axes, count, auto_split_inames=True, if split_kwargs is None: split_kwargs = {} - # {{{ process input into arg_to_rest + # {{{ process input into array_to_rest # where "rest" is the non-argument-name part of the input tuples # in args_and_axes @@ -80,38 +83,32 @@ def split_arg_axis(kernel, args_and_axes, count, auto_split_inames=True, else: raise RuntimeError("split instruction '%s' not understood" % rest) - if isinstance(args_and_axes, tuple): - args_and_axes = [args_and_axes] + if isinstance(arrays_and_axes, tuple): + arrays_and_axes = [arrays_and_axes] - arg_to_rest = dict((tup[0], normalize_rest(tup[1:])) for tup in args_and_axes) + array_to_rest = dict( + (tup[0], normalize_rest(tup[1:])) for tup in arrays_and_axes) - if len(args_and_axes) != len(arg_to_rest): + if len(arrays_and_axes) != len(array_to_rest): raise RuntimeError("cannot split multiple axes of the same variable") - del args_and_axes + del arrays_and_axes # }}} - from loopy.kernel.data import GlobalArg - for arg_name in arg_to_rest: - if not isinstance(kernel.arg_dict[arg_name], GlobalArg): - raise RuntimeError("only GlobalArg axes may be split") - - arg_to_idx = dict((arg.name, i) for i, arg in enumerate(kernel.args)) - - # {{{ adjust args + # {{{ adjust arrays - new_args = kernel.args[:] - for arg_name, (axis, order) in six.iteritems(arg_to_rest): - arg_idx = arg_to_idx[arg_name] + from loopy.kernel.tools import ArrayChanger - arg = new_args[arg_idx] + for array_name, (axis, order) in six.iteritems(array_to_rest): + achng = ArrayChanger(kernel, array_name) + ary = achng.get() from pytools import div_ceil # {{{ adjust shape - new_shape = arg.shape + new_shape = ary.shape if new_shape is not None: new_shape = list(new_shape) axis_len = new_shape[axis] @@ -130,16 +127,16 @@ def split_arg_axis(kernel, args_and_axes, count, auto_split_inames=True, # {{{ adjust dim tags - if arg.dim_tags is None: - raise RuntimeError("dim_tags of '%s' are not known" % arg.name) - new_dim_tags = list(arg.dim_tags) + if ary.dim_tags is None: + raise RuntimeError("dim_tags of '%s' are not known" % array_name) + new_dim_tags = list(ary.dim_tags) - old_dim_tag = arg.dim_tags[axis] + old_dim_tag = ary.dim_tags[axis] from loopy.kernel.array import FixedStrideArrayDimTag if not isinstance(old_dim_tag, FixedStrideArrayDimTag): raise RuntimeError("axis %d of '%s' is not tagged fixed-stride" - % (axis, arg.name)) + % (axis, array_name)) old_stride = old_dim_tag.stride outer_stride = count*old_stride @@ -155,7 +152,27 @@ def split_arg_axis(kernel, args_and_axes, count, auto_split_inames=True, # }}} - new_args[arg_idx] = arg.copy(shape=new_shape, dim_tags=new_dim_tags) + # {{{ adjust dim_names + + new_dim_names = ary.dim_names + if new_dim_names is not None: + new_dim_names = list(new_dim_names) + existing_name = new_dim_names[axis] + new_dim_names[axis] = existing_name + "_inner" + outer_name = existing_name + "_outer" + + if order == "F": + new_dim_names.insert(axis+1, outer_name) + elif order == "C": + new_dim_names.insert(axis, outer_name) + else: + raise RuntimeError("order '%s' not understood" % order) + new_dim_names = tuple(new_dim_names) + + # }}} + + kernel = achng.with_changed_array(ary.copy( + shape=new_shape, dim_tags=new_dim_tags, dim_names=new_dim_names)) # }}} @@ -164,7 +181,7 @@ def split_arg_axis(kernel, args_and_axes, count, auto_split_inames=True, var_name_gen = kernel.get_var_name_generator() def split_access_axis(expr): - axis_nr, order = arg_to_rest[expr.aggregate.name] + axis_nr, order = array_to_rest[expr.aggregate.name] idx = expr.index if not isinstance(idx, tuple): @@ -212,12 +229,10 @@ def split_arg_axis(kernel, args_and_axes, count, auto_split_inames=True, rule_mapping_context = SubstitutionRuleMappingContext( kernel.substitutions, var_name_gen) - aash = ArgAxisSplitHelper(rule_mapping_context, - set(six.iterkeys(arg_to_rest)), split_access_axis) + aash = ArrayAxisSplitHelper(rule_mapping_context, + set(six.iterkeys(array_to_rest)), split_access_axis) kernel = rule_mapping_context.finish_kernel(aash.map_kernel(kernel)) - kernel = kernel.copy(args=new_args) - if auto_split_inames: from loopy import split_iname for iname, (outer_iname, inner_iname) in six.iteritems(split_vars): @@ -227,6 +242,8 @@ def split_arg_axis(kernel, args_and_axes, count, auto_split_inames=True, return kernel +split_arg_axis = MovedFunctionDeprecationWrapper(split_array_dim) + def find_padding_multiple(kernel, variable, axis, align_bytes, allowed_waste=0.1): arg = kernel.arg_dict[variable] diff --git a/loopy/version.py b/loopy/version.py index 389659b86ac35b00cf14267b33b17b0d841d19c1..6ecd8b490c26bb44c16809011aa5b832db2e2fa3 100644 --- a/loopy/version.py +++ b/loopy/version.py @@ -32,4 +32,4 @@ except ImportError: else: _islpy_version = islpy.version.VERSION_TEXT -DATA_MODEL_VERSION = "v12-islpy%s" % _islpy_version +DATA_MODEL_VERSION = "v13-islpy%s" % _islpy_version diff --git a/test/test_dg.py b/test/test_dg.py index 0eb5be224d23fb295b229b3913ef479dc519e9fa..63a961423d2f750a4c9a25fdcb5fb56a479d8a35 100644 --- a/test/test_dg.py +++ b/test/test_dg.py @@ -140,7 +140,7 @@ def test_dg_volume(ctx_factory): for name in ["u", "v", "w", "p"] for prefix in ["", "rhs"]] - knl = lp.split_arg_axis(knl, [(nm, 0) for nm in arg_names], pad_mult) + knl = lp.split_array_dim(knl, [(nm, 0) for nm in arg_names], pad_mult) return knl diff --git a/test/test_linalg.py b/test/test_linalg.py index c61d963903ff738b62924e31428a928a18afad60..9c6803e93f2e53a0c071e0372bf71256854de38a 100644 --- a/test/test_linalg.py +++ b/test/test_linalg.py @@ -613,7 +613,7 @@ def test_small_batched_matvec(ctx_factory): align_bytes = 64 knl = lp.add_prefetch(knl, 'd[:,:]') pad_mult = lp.find_padding_multiple(knl, "f", 0, align_bytes) - knl = lp.split_arg_axis(knl, ("f", 0), pad_mult) + knl = lp.split_array_dim(knl, ("f", 0), pad_mult) knl = lp.add_padding(knl, "f", 0, align_bytes) lp.auto_test_vs_ref(seq_knl, ctx, knl, diff --git a/test/test_loopy.py b/test/test_loopy.py index 0142fd6d4962da8c555412ee43a2a349c3e6d521..de64c820ed20f22701be4df70e38a0e4d6401ae6 100644 --- a/test/test_loopy.py +++ b/test/test_loopy.py @@ -2078,7 +2078,8 @@ def test_vectorize(ctx_factory): a[i] = temp """) knl = lp.add_and_infer_dtypes(knl, dict(b=np.float32)) - knl = lp.split_arg_axis(knl, [("a", 0), ("b", 0)], 4, + knl = lp.set_array_dim_names(knl, "a,b", "i") + knl = lp.split_array_dim(knl, [("a", 0), ("b", 0)], 4, split_kwargs=dict(slabs=(0, 1))) knl = lp.tag_data_axes(knl, "a,b", "c,vec")