From 049424df12da70a5ae8d2411c6d12dc2269ff626 Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Tue, 19 Jun 2018 18:14:52 -0500 Subject: [PATCH 1/4] Bump version --- loopy/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/loopy/version.py b/loopy/version.py index 2f5006be3..da28a3f0a 100644 --- a/loopy/version.py +++ b/loopy/version.py @@ -42,7 +42,7 @@ else: # }}} -VERSION = (2017, 2, 1) +VERSION = (2018, 1) VERSION_STATUS = "" VERSION_TEXT = ".".join(str(x) for x in VERSION) + VERSION_STATUS -- GitLab From bb5708fa9eb6d9014ab3e417ef1e39092031dc81 Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Tue, 19 Jun 2018 18:16:40 -0500 Subject: [PATCH 2/4] Do not use defaultdict (which is mutable and messes up caching) in kernel.iname_to_tags --- loopy/check.py | 41 +++++---- loopy/codegen/bounds.py | 12 +-- loopy/codegen/control.py | 6 +- loopy/codegen/loop.py | 14 ++- loopy/kernel/__init__.py | 172 +++++++++++++++++++++++++---------- loopy/kernel/data.py | 7 +- loopy/kernel/tools.py | 34 +++---- loopy/preprocess.py | 28 +++--- loopy/schedule/__init__.py | 18 ++-- loopy/statistics.py | 17 ++-- loopy/target/ispc.py | 9 +- loopy/transform/iname.py | 32 ++++--- loopy/transform/privatize.py | 4 +- loopy/transform/save.py | 8 +- 14 files changed, 234 insertions(+), 168 deletions(-) diff --git a/loopy/check.py b/loopy/check.py index d94e9d3bb..c8a36a5e7 100644 --- a/loopy/check.py +++ b/loopy/check.py @@ -128,13 +128,12 @@ def check_multiple_tags_allowed(kernel): def check_for_double_use_of_hw_axes(kernel): - from loopy.kernel.data import UniqueTag, filter_iname_tags_by_type + from loopy.kernel.data import UniqueTag for insn in kernel.instructions: insn_tag_keys = set() for iname in kernel.insn_inames(insn): - tags = kernel.iname_to_tags[iname] - for tag in filter_iname_tags_by_type(tags, UniqueTag): + for tag in kernel.iname_tags_of_type(iname, UniqueTag): key = tag.key if key in insn_tag_keys: raise LoopyError("instruction '%s' has multiple " @@ -182,7 +181,7 @@ def _is_racing_iname_tag(tv, tag): def check_for_write_races(kernel): - from loopy.kernel.data import ConcurrentTag, filter_iname_tags_by_type + from loopy.kernel.data import ConcurrentTag for insn in kernel.instructions: for assignee_name, assignee_indices in zip( @@ -201,15 +200,14 @@ def check_for_write_races(kernel): raceable_parallel_insn_inames = set( iname for iname in kernel.insn_inames(insn) - if filter_iname_tags_by_type(kernel.iname_to_tags[iname], - ConcurrentTag)) + if kernel.iname_tags_of_type(iname, ConcurrentTag)) elif assignee_name in kernel.temporary_variables: temp_var = kernel.temporary_variables[assignee_name] raceable_parallel_insn_inames = set( iname for iname in kernel.insn_inames(insn) if any(_is_racing_iname_tag(temp_var, tag) - for tag in kernel.iname_to_tags[iname])) + for tag in kernel.iname_tags(iname))) else: raise LoopyError("invalid assignee name in instruction '%s'" @@ -245,12 +243,13 @@ def check_for_orphaned_user_hardware_axes(kernel): def check_for_data_dependent_parallel_bounds(kernel): - from loopy.kernel.data import ConcurrentTag, filter_iname_tags_by_type + from loopy.kernel.data import ConcurrentTag for i, dom in enumerate(kernel.domains): dom_inames = set(dom.get_var_names(dim_type.set)) - par_inames = set(iname for iname in dom_inames - if filter_iname_tags_by_type(kernel.iname_to_tags[iname], ConcurrentTag)) + par_inames = set( + iname for iname in dom_inames + if kernel.iname_tags_of_type(iname, ConcurrentTag)) if not par_inames: continue @@ -677,7 +676,7 @@ def _check_for_unused_hw_axes_in_kernel_chunk(kernel, sched_index=None): # alternative: just disregard length-1 dimensions? from loopy.kernel.data import (LocalIndexTag, AutoLocalIndexTagBase, - GroupIndexTag, filter_iname_tags_by_type) + GroupIndexTag) while i < loop_end_i: sched_item = kernel.schedule[i] @@ -695,15 +694,18 @@ def _check_for_unused_hw_axes_in_kernel_chunk(kernel, sched_index=None): local_axes_used = set() for iname in kernel.insn_inames(insn): - tags = kernel.iname_to_tags[iname] + ltags = kernel.iname_tags_of_type(iname, LocalIndexTag, max_num=1) + gtags = kernel.iname_tags_of_type(iname, GroupIndexTag, max_num=1) + altags = kernel.iname_tags_of_type( + iname, AutoLocalIndexTagBase, max_num=1) - if filter_iname_tags_by_type(tags, LocalIndexTag): - tag, = filter_iname_tags_by_type(tags, LocalIndexTag, 1) + if ltags: + tag, = ltags local_axes_used.add(tag.axis) - elif filter_iname_tags_by_type(tags, GroupIndexTag): - tag, = filter_iname_tags_by_type(tags, GroupIndexTag, 1) + elif gtags: + tag, = gtags group_axes_used.add(tag.axis) - elif filter_iname_tags_by_type(tags, AutoLocalIndexTagBase): + elif altags: raise LoopyError("auto local tag encountered") if group_axes != group_axes_used: @@ -948,12 +950,11 @@ def check_implemented_domains(kernel, implemented_domains, code=None): .project_out_except(insn_inames, [dim_type.set])) from loopy.kernel.instruction import BarrierInstruction - from loopy.kernel.data import LocalIndexTag, filter_iname_tags_by_type + from loopy.kernel.data import LocalIndexTag if isinstance(insn, BarrierInstruction): # project out local-id-mapped inames, solves #94 on gitlab non_lid_inames = frozenset(iname for iname in insn_inames - if not filter_iname_tags_by_type( - kernel.iname_to_tags[iname], LocalIndexTag)) + if not kernel.iname_tags_of_type(iname, LocalIndexTag)) insn_impl_domain = insn_impl_domain.project_out_except( non_lid_inames, [dim_type.set]) diff --git a/loopy/codegen/bounds.py b/loopy/codegen/bounds.py index a6b70359a..c946e09a0 100644 --- a/loopy/codegen/bounds.py +++ b/loopy/codegen/bounds.py @@ -59,7 +59,7 @@ def get_usable_inames_for_conditional(kernel, sched_index): from loopy.schedule import ( find_active_inames_at, get_insn_ids_for_block_at, has_barrier_within) from loopy.kernel.data import (ConcurrentTag, LocalIndexTagBase, - IlpBaseTag, filter_iname_tags_by_type) + IlpBaseTag) result = find_active_inames_at(kernel, sched_index) crosses_barrier = has_barrier_within(kernel, sched_index) @@ -88,8 +88,6 @@ def get_usable_inames_for_conditional(kernel, sched_index): for iname in kernel.insn_inames(insn)) for iname in inames_for_subkernel: - tags = kernel.iname_to_tags[iname] - # Parallel inames are defined within a subkernel, BUT: # # - local indices may not be used in conditionals that cross barriers. @@ -98,10 +96,10 @@ def get_usable_inames_for_conditional(kernel, sched_index): # at the innermost level of nesting. if ( - filter_iname_tags_by_type(tags, ConcurrentTag) - and not (filter_iname_tags_by_type(tags, LocalIndexTagBase) - and crosses_barrier) - and not filter_iname_tags_by_type(tags, IlpBaseTag) + kernel.iname_tags_of_type(iname, ConcurrentTag) + and not (kernel.iname_tags_of_type(iname, LocalIndexTagBase) + and crosses_barrier) + and not kernel.iname_tags_of_type(iname, IlpBaseTag) ): result.add(iname) diff --git a/loopy/codegen/control.py b/loopy/codegen/control.py index fcf8ea3b4..966fd1e07 100644 --- a/loopy/codegen/control.py +++ b/loopy/codegen/control.py @@ -128,7 +128,7 @@ def generate_code_for_sched_index(codegen_state, sched_index): ]) elif isinstance(sched_item, EnterLoop): - tags = kernel.iname_to_tags[sched_item.iname] + tags = kernel.iname_tags(sched_item.iname) tags = tuple(tag for tag in tags if tag) from loopy.codegen.loop import ( @@ -143,7 +143,7 @@ def generate_code_for_sched_index(codegen_state, sched_index): func = generate_unroll_loop elif filter_iname_tags_by_type(tags, VectorizeTag): func = generate_vectorize_loop - elif len(tags) == 0 or filter_iname_tags_by_type(tags, (LoopedIlpTag, + elif not tags or filter_iname_tags_by_type(tags, (LoopedIlpTag, ForceSequentialTag, InOrderSequentialSequentialTag)): func = generate_sequential_loop_dim_code else: @@ -423,7 +423,7 @@ def build_loop_nest(codegen_state, schedule_index): # }}} - only_unshared_inames = kernel.remove_inames_for_shared_hw_axes( + only_unshared_inames = kernel._remove_inames_for_shared_hw_axes( current_iname_set & used_inames) bounds_checks = bounds_check_cache(only_unshared_inames) diff --git a/loopy/codegen/loop.py b/loopy/codegen/loop.py index 7b44fd7b2..ebddf3153 100644 --- a/loopy/codegen/loop.py +++ b/loopy/codegen/loop.py @@ -231,7 +231,7 @@ def set_up_hw_parallel_loops(codegen_state, schedule_index, next_func, kernel = codegen_state.kernel from loopy.kernel.data import (UniqueTag, HardwareConcurrentTag, - LocalIndexTag, GroupIndexTag, filter_iname_tags_by_type) + LocalIndexTag, GroupIndexTag) from loopy.schedule import get_insn_ids_for_block_at insn_ids_for_block = get_insn_ids_for_block_at(kernel.schedule, schedule_index) @@ -242,8 +242,7 @@ def set_up_hw_parallel_loops(codegen_state, schedule_index, next_func, all_inames_by_insns |= kernel.insn_inames(insn_id) hw_inames_left = [iname for iname in all_inames_by_insns - if filter_iname_tags_by_type(kernel.iname_to_tags[iname], - HardwareConcurrentTag)] + if kernel.iname_tags_of_type(iname, HardwareConcurrentTag)] if not hw_inames_left: return next_func(codegen_state) @@ -254,11 +253,9 @@ def set_up_hw_parallel_loops(codegen_state, schedule_index, next_func, hw_inames_left = hw_inames_left[:] iname = hw_inames_left.pop() - tags = kernel.iname_to_tags[iname] - from loopy.symbolic import GroupHardwareAxisIndex, LocalHardwareAxisIndex - tag, = filter_iname_tags_by_type(tags, UniqueTag, max_num=1, min_num=1) + tag, = kernel.iname_tags_of_type(iname, UniqueTag, max_num=1, min_num=1) if isinstance(tag, GroupIndexTag): hw_axis_expr = GroupHardwareAxisIndex(tag.axis) @@ -269,10 +266,11 @@ def set_up_hw_parallel_loops(codegen_state, schedule_index, next_func, other_inames_with_same_tag = [ other_iname for other_iname in kernel.all_inames() - if (filter_iname_tags_by_type(kernel.iname_to_tags[other_iname], UniqueTag) + if (kernel.iname_tags_of_type(other_iname, UniqueTag) and other_iname != iname and any(_tag.key == tag.key - for _tag in kernel.iname_to_tags[other_iname] if _tag))] + for _tag in kernel.iname_tags(other_iname) + if _tag))] # {{{ 'implement' hardware axis boundaries diff --git a/loopy/kernel/__init__.py b/loopy/kernel/__init__.py index 429961a71..748875516 100644 --- a/loopy/kernel/__init__.py +++ b/loopy/kernel/__init__.py @@ -108,6 +108,12 @@ class LoopKernel(ImmutableRecordWithoutPickling): """These correspond more or less directly to arguments of :func:`loopy.make_kernel`. + .. note:: + + This data structure and its attributes should be considered immutable, + even if it contains mutable data types. See :method:`copy` for an easy + way of producing a modified copy. + .. attribute:: domains a list of :class:`islpy.BasicSet` instances @@ -191,26 +197,23 @@ class LoopKernel(ImmutableRecordWithoutPickling): # {{{ constructor - def __init__(self, domains, instructions, args=[], schedule=None, + def __init__(self, domains, instructions, args=None, schedule=None, name="loopy_kernel", - preambles=[], - preamble_generators=[], + preambles=None, + preamble_generators=None, assumptions=None, - local_sizes={}, - temporary_variables={}, - iname_to_tags=defaultdict(set), - substitutions={}, - function_manglers=[ - default_function_mangler, - single_arg_function_mangler, - ], + local_sizes=None, + temporary_variables=None, + iname_to_tags=None, + substitutions=None, + function_manglers=None, symbol_manglers=[], - iname_slab_increments={}, + iname_slab_increments=None, loop_priority=frozenset(), - silenced_warnings=[], + silenced_warnings=None, - applied_iname_rewrites=[], + applied_iname_rewrites=None, cache_manager=None, index_dtype=np.int32, options=None, @@ -226,10 +229,46 @@ class LoopKernel(ImmutableRecordWithoutPickling): change. This provides a way to forward sub-kernel grid size requests. """ + # {{{ process constructor arguments + + if args is None: + args = [] + if preambles is None: + preambles = [] + if preamble_generators is None: + preamble_generators = [] + if local_sizes is None: + local_sizes = {} + if temporary_variables is None: + temporary_variables = {} + if iname_to_tags is None: + iname_to_tags = {} + if substitutions is None: + substitutions = {} + if function_manglers is None: + function_manglers = [ + default_function_mangler, + single_arg_function_mangler, + ] + if symbol_manglers is None: + function_manglers = [ + default_function_mangler, + single_arg_function_mangler, + ] + if iname_slab_increments is None: + iname_slab_increments = {} + + if silenced_warnings is None: + silenced_warnings = [] + if applied_iname_rewrites is None: + applied_iname_rewrites = [] + if cache_manager is None: from loopy.kernel.tools import SetOperationCacheManager cache_manager = SetOperationCacheManager() + # }}} + # {{{ process assumptions if assumptions is None: @@ -267,6 +306,14 @@ class LoopKernel(ImmutableRecordWithoutPickling): ]: raise ValueError("invalid value for 'state'") + from collections import defaultdict + assert not isinstance(iname_to_tags, defaultdict) + + for iname, tags in six.iteritems(iname_to_tags): + # don't tolerate empty sets + assert tags + assert isinstance(tags, frozenset) + assert all(dom.get_ctx() == isl.DEFAULT_CONTEXT for dom in domains) assert assumptions.get_ctx() == isl.DEFAULT_CONTEXT @@ -302,24 +349,6 @@ class LoopKernel(ImmutableRecordWithoutPickling): # }}} - # {{{ compatibility wrapper for iname_to_tag.get("iname") - - @property - def iname_to_tag(self): - from warnings import warn - warn("Since version 2018.1, inames can hold multiple tags. Use " - "iname_to_tags['iname'] instead. iname_to_tag.get('iname') will be " - "deprecated at version 2019.0.", DeprecationWarning) - for iname, tags in six.iteritems(self.iname_to_tags): - if len(tags) > 1: - raise LoopyError( - "iname {0} has multiple tags: {1}. " - "Use iname_to_tags['iname'] instead.".format(iname, tags)) - return dict((k, next(iter(v))) - for k, v in six.iteritems(self.iname_to_tags) if v) - - # }}} - # {{{ function mangling def mangle_function(self, identifier, arg_dtypes, ast_builder=None): @@ -651,6 +680,26 @@ class LoopKernel(ImmutableRecordWithoutPickling): # {{{ iname wrangling + def iname_tags(self, iname): + return self.iname_to_tags.get(iname, frozenset()) + + def iname_tags_of_type(self, iname, tag_type_or_types, + max_num=None, min_num=None): + """Return a subset of *tags* that matches type *tag_type*. Raises exception + if the number of tags found were greater than *max_num* or less than + *min_num*. + + :arg tags: An iterable of tags. + :arg tag_type_or_types: a subclass of :class:`loopy.kernel.data.IndexTag`. + :arg max_num: the maximum number of tags expected to be found. + :arg min_num: the minimum number of tags expected to be found. + """ + + from loopy.kernel.data import filter_iname_tags_by_type + return filter_iname_tags_by_type( + self.iname_to_tags.get(iname, frozenset()), + tag_type_or_types, max_num=max_num, min_num=min_num) + @memoize_method def all_inames(self): result = set() @@ -717,7 +766,7 @@ class LoopKernel(ImmutableRecordWithoutPickling): return result @memoize_method - def remove_inames_for_shared_hw_axes(self, cond_inames): + def _remove_inames_for_shared_hw_axes(self, cond_inames): """ See if cond_inames contains references to two (or more) inames that boil down to the same tag. If so, exclude them. (We shouldn't be writing @@ -730,8 +779,7 @@ class LoopKernel(ImmutableRecordWithoutPickling): from loopy.kernel.data import HardwareConcurrentTag for iname in cond_inames: - tags = filter_iname_tags_by_type(self.iname_to_tags[iname], - HardwareConcurrentTag, 1) + tags = self.iname_tags_of_type(iname, HardwareConcurrentTag, max_num=1) if tags: tag, = tags tag_key_uses[tag.key].append(iname) @@ -742,8 +790,7 @@ class LoopKernel(ImmutableRecordWithoutPickling): multi_use_inames = set() for iname in cond_inames: - tags = filter_iname_tags_by_type(self.iname_to_tags[iname], - HardwareConcurrentTag) + tags = self.iname_tags_of_type(iname, HardwareConcurrentTag) if tags: tag, = filter_iname_tags_by_type(tags, HardwareConcurrentTag, 1) if tag.key in multi_use_keys: @@ -751,6 +798,24 @@ class LoopKernel(ImmutableRecordWithoutPickling): return frozenset(cond_inames - multi_use_inames) + # {{{ compatibility wrapper for iname_to_tag.get("iname") + + @property + def iname_to_tag(self): + from warnings import warn + warn("Since version 2018.1, inames can hold multiple tags. Use " + "iname_to_tags['iname'] instead. iname_to_tag.get('iname') will be " + "removed at version 2019.0.", DeprecationWarning) + for iname, tags in six.iteritems(self.iname_to_tags): + if len(tags) > 1: + raise LoopyError( + "iname {0} has multiple tags: {1}. " + "Use iname_to_tags['iname'] instead.".format(iname, tags)) + return dict((k, next(iter(v))) + for k, v in six.iteritems(self.iname_to_tags) if v) + + # }}} + # }}} # {{{ dependency wrangling @@ -977,21 +1042,25 @@ class LoopKernel(ImmutableRecordWithoutPickling): AutoLocalIndexTagBase) for iname in all_inames_by_insns: - tags = self.iname_to_tags[iname] + tags = self.iname_tags_of_type( + iname, + (AutoLocalIndexTagBase, GroupIndexTag, LocalIndexTag), max_num=1) - if filter_iname_tags_by_type(tags, GroupIndexTag): - tgt_dict = global_sizes - elif filter_iname_tags_by_type(tags, LocalIndexTag): - tgt_dict = local_sizes - elif (filter_iname_tags_by_type(tags, AutoLocalIndexTagBase) - and not ignore_auto): + if not tags: + continue + + tag, = tags + + if isinstance(tag, AutoLocalIndexTagBase) and not ignore_auto: raise RuntimeError("cannot find grid sizes if automatic " "local index tags are present") + elif isinstance(tag, GroupIndexTag): + tgt_dict = global_sizes + elif isinstance(tag, LocalIndexTag): + tgt_dict = local_sizes else: continue - tag, = filter_iname_tags_by_type(tags, (GroupIndexTag, LocalIndexTag), 1) - size = self.get_iname_bounds(iname).size if tag.axis in tgt_dict: @@ -1197,11 +1266,14 @@ class LoopKernel(ImmutableRecordWithoutPickling): if show_labels: lines.append("INAME IMPLEMENTATION TAGS:") for iname in natsorted(kernel.all_inames()): - if not kernel.iname_to_tags[iname]: - tags = "None" + tags = kernel.iname_tags(iname) + + if not tags: + tags_str = "None" else: - tags = ", ".join(str(tag) for tag in kernel.iname_to_tags[iname]) - line = "%s: %s" % (iname, tags) + tags_str = ", ".join(str(tag) for tag in tags) + + line = "%s: %s" % (iname, tags_str) lines.append(line) if "variables" in what and kernel.temporary_variables: diff --git a/loopy/kernel/data.py b/loopy/kernel/data.py index 35a8e3b1d..54e37391f 100644 --- a/loopy/kernel/data.py +++ b/loopy/kernel/data.py @@ -58,7 +58,7 @@ class auto(object): # noqa def filter_iname_tags_by_type(tags, tag_type, max_num=None, min_num=None): """Return a subset of *tags* that matches type *tag_type*. Raises exception if the number of tags found were greater than *max_num* or less than - *min_num*W. + *min_num*. :arg tags: An iterable of tags. :arg tag_type: a subclass of :class:`loopy.kernel.data.IndexTag`. @@ -67,11 +67,12 @@ def filter_iname_tags_by_type(tags, tag_type, max_num=None, min_num=None): """ result = set(tag for tag in tags if isinstance(tag, tag_type)) - if max_num: + + if max_num is not None: if len(result) > max_num: raise LoopyError("cannot have more than {0} tags" "of type(s): {1}".format(max_num, tag_type)) - if min_num: + if min_num is not None: if len(result) < min_num: raise LoopyError("must have more than {0} tags" "of type(s): {1}".format(max_num, tag_type)) diff --git a/loopy/kernel/tools.py b/loopy/kernel/tools.py index 908ab9ec3..ac2d1b034 100644 --- a/loopy/kernel/tools.py +++ b/loopy/kernel/tools.py @@ -36,7 +36,6 @@ from islpy import dim_type from loopy.diagnostic import LoopyError, warn_with_kernel from pytools import memoize_on_first_arg from loopy.tools import natsorted -from loopy.kernel.data import filter_iname_tags_by_type import logging logger = logging.getLogger(__name__) @@ -632,7 +631,7 @@ def is_domain_dependent_on_inames(kernel, domain_index, inames): # {{{ rank inames by stride def get_auto_axis_iname_ranking_by_stride(kernel, insn): - from loopy.kernel.data import ImageArg, ValueArg, filter_iname_tags_by_type + from loopy.kernel.data import ImageArg, ValueArg approximate_arg_values = {} for arg in kernel.args: @@ -678,8 +677,7 @@ def get_auto_axis_iname_ranking_by_stride(kernel, insn): from loopy.kernel.data import AutoLocalIndexTagBase auto_axis_inames = set( iname for iname in kernel.insn_inames(insn) - if filter_iname_tags_by_type( - kernel.iname_to_tags[iname], AutoLocalIndexTagBase)) + if kernel.iname_tags_of_type(iname, AutoLocalIndexTagBase)) # }}} @@ -780,7 +778,6 @@ def assign_automatic_axes(kernel, axis=0, local_size=None): # Likely unbounded, automatic assignment is not # going to happen for this iname. new_iname_to_tags = kernel.iname_to_tags.copy() - new_iname_to_tags[iname] = set() return assign_automatic_axes( kernel.copy(iname_to_tags=new_iname_to_tags), axis=recursion_axis) @@ -835,16 +832,23 @@ def assign_automatic_axes(kernel, axis=0, local_size=None): do_tagged_check=False), axis=recursion_axis, local_size=local_size) - if not filter_iname_tags_by_type(kernel.iname_to_tags[iname], - AutoLocalIndexTagBase): + if not kernel.iname_tags_of_type(iname, AutoLocalIndexTagBase): raise LoopyError("trying to reassign '%s'" % iname) if new_tag: - new_tag = set([new_tag]) + new_tag_set = frozenset([new_tag]) else: - new_tag = set() + new_tag_set = frozenset() new_iname_to_tags = kernel.iname_to_tags.copy() - new_iname_to_tags[iname] = new_tag + new_tags = ( + new_iname_to_tags.get(iname, frozenset()) + | new_tag_set) - frozenset([AutoLocalIndexTagBase()]) + + if new_tags: + new_iname_to_tags[iname] = new_tags + else: + del new_iname_to_tags[iname] + return assign_automatic_axes(kernel.copy(iname_to_tags=new_iname_to_tags), axis=recursion_axis, local_size=local_size) @@ -863,8 +867,7 @@ def assign_automatic_axes(kernel, axis=0, local_size=None): auto_axis_inames = [ iname for iname in kernel.insn_inames(insn) - if filter_iname_tags_by_type(kernel.iname_to_tags[iname], - AutoLocalIndexTagBase)] + if kernel.iname_tags_of_type(iname, AutoLocalIndexTagBase)] if not auto_axis_inames: continue @@ -872,8 +875,7 @@ def assign_automatic_axes(kernel, axis=0, local_size=None): assigned_local_axes = set() for iname in kernel.insn_inames(insn): - tags = filter_iname_tags_by_type( - kernel.iname_to_tags[iname], LocalIndexTag) + tags = kernel.iname_tags_of_type(iname, LocalIndexTag) if tags: if len(tags) > 1: raise LoopyError("cannot have more than one LocalIndexTags") @@ -887,7 +889,7 @@ def assign_automatic_axes(kernel, axis=0, local_size=None): iname_ranking = get_auto_axis_iname_ranking_by_stride(kernel, insn) if iname_ranking is not None: for iname in iname_ranking: - prev_tags = kernel.iname_to_tags[iname] + prev_tags = kernel.iname_tags[iname] if filter_iname_tags_by_type( prev_tags, AutoLocalIndexTagBase): return assign_axis(axis, iname, axis) @@ -1145,7 +1147,7 @@ def get_visual_iname_order_embedding(kernel): # nest. ilp_inames = frozenset(iname for iname in kernel.iname_to_tags - if filter_iname_tags_by_type(kernel.iname_to_tags[iname], IlpBaseTag)) + if kernel.iname_tags_of_type(iname, IlpBaseTag)) iname_trie = SetTrie() diff --git a/loopy/preprocess.py b/loopy/preprocess.py index 3e8c70e53..91d71a3d5 100644 --- a/loopy/preprocess.py +++ b/loopy/preprocess.py @@ -136,7 +136,7 @@ def check_reduction_iname_uniqueness(kernel): def _get_compute_inames_tagged(kernel, insn, tag_base): return set(iname for iname in kernel.insn_inames(insn.id) - if filter_iname_tags_by_type(kernel.iname_to_tags[iname], tag_base)) + if kernel.iname_tags_of_type(iname, tag_base)) def _get_assignee_inames_tagged(kernel, insn, tag_base, tv_names): @@ -146,7 +146,7 @@ def _get_assignee_inames_tagged(kernel, insn, tag_base, tv_names): insn.assignee_subscript_deps()) for iname in adeps & kernel.all_inames() if aname in tv_names - if filter_iname_tags_by_type(kernel.iname_to_tags[iname], tag_base)) + if kernel.iname_tags_of_type(iname, tag_base)) def find_temporary_scope(kernel): @@ -294,7 +294,7 @@ def _classify_reduction_inames(kernel, inames): ConcurrentTag, filter_iname_tags_by_type) for iname in inames: - iname_tags = kernel.iname_to_tags[iname] + iname_tags = kernel.iname_tags(iname) if filter_iname_tags_by_type(iname_tags, (UnrollTag, UnrolledIlpTag)): # These are nominally parallel, but we can live with @@ -1134,10 +1134,9 @@ def realize_reduction(kernel, insn_id_filter=None, unknown_types_ok=True, outer_insn_inames = temp_kernel.insn_inames(insn) - from loopy.kernel.data import LocalIndexTagBase, filter_iname_tags_by_type + from loopy.kernel.data import LocalIndexTagBase outer_local_inames = tuple(oiname for oiname in outer_insn_inames - if filter_iname_tags_by_type( - kernel.iname_to_tags[oiname], LocalIndexTagBase)) + if kernel.iname_tags_of_type(oiname, LocalIndexTagBase)) from pymbolic import var outer_local_iname_vars = tuple( @@ -1172,7 +1171,7 @@ def realize_reduction(kernel, insn_id_filter=None, unknown_types_ok=True, base_exec_iname = var_name_gen("red_"+red_iname) domains.append(_make_slab_set(base_exec_iname, size)) - new_iname_tags[base_exec_iname] = kernel.iname_to_tags[red_iname] + new_iname_tags[base_exec_iname] = kernel.iname_tags(red_iname) # }}} @@ -1267,7 +1266,7 @@ def realize_reduction(kernel, insn_id_filter=None, unknown_types_ok=True, stage_exec_iname = var_name_gen("red_%s_s%d" % (red_iname, istage)) domains.append(_make_slab_set(stage_exec_iname, bound-new_size)) - new_iname_tags[stage_exec_iname] = kernel.iname_to_tags[red_iname] + new_iname_tags[stage_exec_iname] = kernel.iname_tags(red_iname) stage_id = insn_id_gen("red_%s_stage_%d" % (red_iname, istage)) stage_insn = make_assignment( @@ -1470,10 +1469,9 @@ def realize_reduction(kernel, insn_id_filter=None, unknown_types_ok=True, outer_insn_inames = temp_kernel.insn_inames(insn) - from loopy.kernel.data import LocalIndexTagBase, filter_iname_tags_by_type + from loopy.kernel.data import LocalIndexTagBase outer_local_inames = tuple(oiname for oiname in outer_insn_inames - if filter_iname_tags_by_type(kernel.iname_to_tags[oiname], - LocalIndexTagBase) + if kernel.iname_tags_of_type(oiname, LocalIndexTagBase) and oiname != sweep_iname) from pymbolic import var @@ -1499,7 +1497,7 @@ def realize_reduction(kernel, insn_id_filter=None, unknown_types_ok=True, base_exec_iname = var_name_gen(sweep_iname + "__scan") domains.append(_make_slab_set(base_exec_iname, scan_size)) - new_iname_tags[base_exec_iname] = kernel.iname_to_tags[sweep_iname] + new_iname_tags[base_exec_iname] = kernel.iname_tags(sweep_iname) # }}} @@ -1590,7 +1588,7 @@ def realize_reduction(kernel, insn_id_filter=None, unknown_types_ok=True, stage_exec_iname = var_name_gen("%s__scan_s%d" % (sweep_iname, istage)) domains.append( _make_slab_set_from_range(stage_exec_iname, cur_size, scan_size)) - new_iname_tags[stage_exec_iname] = kernel.iname_to_tags[sweep_iname] + new_iname_tags[stage_exec_iname] = kernel.iname_tags(sweep_iname) for read_var, acc_var in zip(read_vars, acc_vars): read_stage_id = insn_id_gen( @@ -1740,7 +1738,7 @@ def realize_reduction(kernel, insn_id_filter=None, unknown_types_ok=True, "by reductions is 'local'--found iname(s) '%s' " "respectively tagged '%s'" % (", ".join(bad_inames), - ", ".join(str(kernel.iname_to_tags[iname]) + ", ".join(str(kernel.iname_tags(iname)) for iname in bad_inames))) if n_local_par == 0 and n_sequential == 0: @@ -1780,7 +1778,7 @@ def realize_reduction(kernel, insn_id_filter=None, unknown_types_ok=True, "- the only parallelism allowed is 'local'." % (sweep_iname, ", ".join(tag.key - for tag in temp_kernel.iname_to_tags[sweep_iname]))) + for tag in temp_kernel.iname_tags(sweep_iname)))) elif parallel: return map_scan_local( expr, rec, nresults, arg_dtypes, reduction_dtypes, diff --git a/loopy/schedule/__init__.py b/loopy/schedule/__init__.py index 616c8e62a..440ac22cb 100644 --- a/loopy/schedule/__init__.py +++ b/loopy/schedule/__init__.py @@ -212,12 +212,11 @@ def find_loop_nest_with_map(kernel): """ result = {} - from loopy.kernel.data import (ConcurrentTag, IlpBaseTag, VectorizeTag, - filter_iname_tags_by_type) + from loopy.kernel.data import ConcurrentTag, IlpBaseTag, VectorizeTag all_nonpar_inames = set( iname for iname in kernel.all_inames() - if not filter_iname_tags_by_type(kernel.iname_to_tags[iname], + if not kernel.iname_tags_of_type(iname, (ConcurrentTag, IlpBaseTag, VectorizeTag))) iname_to_insns = kernel.iname_to_insns() @@ -241,15 +240,14 @@ def find_loop_nest_around_map(kernel): iname_to_insns = kernel.iname_to_insns() # examine pairs of all inames--O(n**2), I know. - from loopy.kernel.data import IlpBaseTag, filter_iname_tags_by_type + from loopy.kernel.data import IlpBaseTag for inner_iname in all_inames: result[inner_iname] = set() for outer_iname in all_inames: if inner_iname == outer_iname: continue - tags = kernel.iname_to_tags[outer_iname] - if filter_iname_tags_by_type(tags, IlpBaseTag): + if kernel.iname_tags_of_type(outer_iname, IlpBaseTag): # ILP tags are special because they are parallel tags # and therefore 'in principle' nest around everything. # But they're realized by the scheduler as a loop @@ -278,11 +276,10 @@ def find_loop_insn_dep_map(kernel, loop_nest_with_map, loop_nest_around_map): result = {} - from loopy.kernel.data import (ConcurrentTag, IlpBaseTag, VectorizeTag, - filter_iname_tags_by_type) + from loopy.kernel.data import ConcurrentTag, IlpBaseTag, VectorizeTag for insn in kernel.instructions: for iname in kernel.insn_inames(insn): - if filter_iname_tags_by_type(kernel.iname_to_tags[iname], ConcurrentTag): + if kernel.iname_tags_of_type(iname, ConcurrentTag): continue iname_dep = result.setdefault(iname, set()) @@ -312,8 +309,7 @@ def find_loop_insn_dep_map(kernel, loop_nest_with_map, loop_nest_around_map): # -> safe. continue - tags = kernel.iname_to_tags[dep_insn_iname] - if filter_iname_tags_by_type(tags, + if kernel.iname_tags_of_type(dep_insn_iname, (ConcurrentTag, IlpBaseTag, VectorizeTag)): # Parallel tags don't really nest, so we'll disregard # them here. diff --git a/loopy/statistics.py b/loopy/statistics.py index e5c05b5ec..b8e0a8448 100755 --- a/loopy/statistics.py +++ b/loopy/statistics.py @@ -840,8 +840,7 @@ def _get_lid_and_gid_strides(knl, array, index): lid_to_iname = {} gid_to_iname = {} for iname in my_inames: - tags = filter_iname_tags_by_type(knl.iname_to_tags[iname], - (GroupIndexTag, LocalIndexTag)) + tags = knl.iname_tags_of_type(iname, (GroupIndexTag, LocalIndexTag)) if tags: tag, = filter_iname_tags_by_type( tags, (GroupIndexTag, LocalIndexTag), 1) @@ -1196,11 +1195,10 @@ def get_unused_hw_axes_factor(knl, insn, disregard_local_axes, space=None): g_used = set() l_used = set() - from loopy.kernel.data import (LocalIndexTag, GroupIndexTag, - filter_iname_tags_by_type) + from loopy.kernel.data import LocalIndexTag, GroupIndexTag for iname in knl.insn_inames(insn): - tags = filter_iname_tags_by_type(knl.iname_to_tags[iname], - (LocalIndexTag, GroupIndexTag), 1) + tags = knl.iname_tags_of_type(iname, + (LocalIndexTag, GroupIndexTag), max_num=1) if tags: tag, = tags if isinstance(tag, LocalIndexTag): @@ -1235,9 +1233,10 @@ def count_insn_runs(knl, insn, count_redundant_work, disregard_local_axes=False) insn_inames = knl.insn_inames(insn) if disregard_local_axes: - from loopy.kernel.data import LocalIndexTag, filter_iname_tags_by_type - insn_inames = [iname for iname in insn_inames if not - filter_iname_tags_by_type(knl.iname_to_tags[iname], LocalIndexTag)] + from loopy.kernel.data import LocalIndexTag + insn_inames = [iname + for iname in insn_inames + if not knl.iname_tags_of_type(iname, LocalIndexTag)] inames_domain = knl.get_inames_domain(insn_inames) domain = (inames_domain.project_out_except( diff --git a/loopy/target/ispc.py b/loopy/target/ispc.py index 8e07eb692..261475eb4 100644 --- a/loopy/target/ispc.py +++ b/loopy/target/ispc.py @@ -424,10 +424,9 @@ class ISPCASTBuilder(CASTBuilder): saw_l0 = False for term in terms: if (isinstance(term, Variable) - and filter_iname_tags_by_type( - kernel.iname_to_tags[term.name], LocalIndexTag)): - tag, = filter_iname_tags_by_type( - kernel.iname_to_tags[term.name], LocalIndexTag, 1) + and kernel.iname_tags_of_type(term.name, LocalIndexTag)): + tag, = kernel.iname_tags_of_type( + term.name, LocalIndexTag, min_num=1, max_num=1) if tag.axis == 0: if saw_l0: raise LoopyError( @@ -458,7 +457,7 @@ class ISPCASTBuilder(CASTBuilder): rhs_has_programindex = any( isinstance(tag, LocalIndexTag) and tag.axis == 0 - for tag in kernel.iname_to_tags[dep] + for tag in kernel.iname_tags(dep) for dep in get_dependencies(insn.expression)) if not rhs_has_programindex: diff --git a/loopy/transform/iname.py b/loopy/transform/iname.py index 423ccfb55..2b618a464 100644 --- a/loopy/transform/iname.py +++ b/loopy/transform/iname.py @@ -177,7 +177,7 @@ def _split_iname_backend(kernel, split_iname, for syntax. """ - existing_tags = kernel.iname_to_tags[split_iname] + existing_tags = kernel.iname_tags(split_iname) from loopy.kernel.data import ForceSequentialTag, filter_iname_tags_by_type if (do_tagged_check and existing_tags and not filter_iname_tags_by_type(existing_tags, ForceSequentialTag)): @@ -610,9 +610,13 @@ def untag_inames(kernel, iname_to_untag, tag_type): """ knl_iname_to_tags = kernel.iname_to_tags.copy() - old_tags = knl_iname_to_tags[iname_to_untag] + old_tags = knl_iname_to_tags.get(iname_to_untag, frozenset()) old_tags = set(tag for tag in old_tags if not isinstance(tag, tag_type)) - knl_iname_to_tags[iname_to_untag] = old_tags + + if old_tags: + knl_iname_to_tags[iname_to_untag] = old_tags + else: + del knl_iname_to_tags[iname_to_untag] return kernel.copy(iname_to_tags=knl_iname_to_tags) @@ -671,7 +675,7 @@ def tag_inames(kernel, iname_to_tag, force=False, ignore_nonexistent=False): def parse_tag(tag): if isinstance(tag, str): if tag.startswith("like."): - tags = kernel.iname_to_tags[tag[5:]] + tags = kernel.iname_tags(tag[5:]) if len(tags) == 0: return None if len(tags) == 1: @@ -722,7 +726,7 @@ def tag_inames(kernel, iname_to_tag, force=False, ignore_nonexistent=False): if not new_tag: continue - old_tags = kernel.iname_to_tags[iname] + old_tags = kernel.iname_tags(iname) if iname not in kernel.all_inames(): raise ValueError("cannot tag '%s'--not known" % iname) @@ -739,7 +743,7 @@ def tag_inames(kernel, iname_to_tag, force=False, ignore_nonexistent=False): "(likely because of participation in a precompute or " "a reduction)" % iname) - knl_iname_to_tags[iname] = old_tags.union([new_tag]) + knl_iname_to_tags[iname] = old_tags | frozenset([new_tag]) return kernel.copy(iname_to_tags=knl_iname_to_tags) @@ -992,12 +996,12 @@ def get_iname_duplication_options(knl, use_boostable_into=False): Use :func:`has_schedulable_iname_nesting` to decide whether an iname needs to be duplicated in a given kernel. """ - from loopy.kernel.data import ConcurrentTag, filter_iname_tags_by_type + from loopy.kernel.data import ConcurrentTag concurrent_inames = set( iname - for iname in knl.all_inames() if filter_iname_tags_by_type( - knl.iname_to_tags[iname], ConcurrentTag)) + for iname in knl.all_inames() + if knl.iname_tags_of_type(iname, ConcurrentTag)) # First we extract the minimal necessary information from the kernel if use_boostable_into: @@ -1021,8 +1025,8 @@ def get_iname_duplication_options(knl, use_boostable_into=False): # Get the duplication options as a tuple of iname and a set for iname, insns in _get_iname_duplication_options(insn_iname_sets): # Check whether this iname has a parallel tag and discard it if so - if (iname in knl.iname_to_tags and filter_iname_tags_by_type( - knl.iname_to_tags[iname], ConcurrentTag)): + if (iname in knl.iname_to_tags + and knl.iname_tags_of_type(iname, ConcurrentTag)): continue # If we find a duplication option and to not use boostable_into @@ -1539,8 +1543,7 @@ def find_unused_axis_tag(kernel, kind, insn_match=None): """ used_axes = set() - from loopy.kernel.data import (GroupIndexTag, LocalIndexTag, - filter_iname_tags_by_type) + from loopy.kernel.data import GroupIndexTag, LocalIndexTag if isinstance(kind, str): found = False @@ -1559,8 +1562,7 @@ def find_unused_axis_tag(kernel, kind, insn_match=None): for insn in insns: for iname in kernel.insn_inames(insn): - dim_tags = kernel.iname_to_tags[iname] - if filter_iname_tags_by_type(dim_tags, kind): + if kernel.iname_tags_of_type(iname, kind): used_axes.add(kind.axis) i = 0 diff --git a/loopy/transform/privatize.py b/loopy/transform/privatize.py index c953c1cee..d4128bd11 100644 --- a/loopy/transform/privatize.py +++ b/loopy/transform/privatize.py @@ -174,7 +174,7 @@ def privatize_temporaries_with_inames( # {{{ change temporary variables - from loopy.kernel.data import VectorizeTag, filter_iname_tags_by_type + from loopy.kernel.data import VectorizeTag new_temp_vars = kernel.temporary_variables.copy() for tv_name, inames in six.iteritems(var_to_new_priv_axis_iname): @@ -187,7 +187,7 @@ def privatize_temporaries_with_inames( dim_tags = ["c"] * (len(shape) + len(extra_shape)) for i, iname in enumerate(inames): - if filter_iname_tags_by_type(kernel.iname_to_tags[iname], VectorizeTag): + if kernel.iname_tags_of_type(iname, VectorizeTag): dim_tags[len(shape) + i] = "vec" new_temp_vars[tv.name] = tv.copy(shape=shape + extra_shape, diff --git a/loopy/transform/save.py b/loopy/transform/save.py index dfdd7a154..dcda3c572 100644 --- a/loopy/transform/save.py +++ b/loopy/transform/save.py @@ -245,7 +245,7 @@ class TemporarySaver(object): self.insns_to_insert = [] self.insns_to_update = {} self.extra_args_to_add = {} - self.updated_iname_to_tags = defaultdict(set) + self.updated_iname_to_tags = {} self.updated_temporary_variables = {} # temporary name -> save or reload insn ids @@ -397,7 +397,7 @@ class TemporarySaver(object): my_local_tags = [] for iname in insn.within_inames: - tags = self.kernel.iname_to_tags[iname] + tags = self.kernel.iname_tags(iname) if not tags: continue @@ -677,7 +677,7 @@ class TemporarySaver(object): # If the temporary has local scope, then loads / stores can # be done in parallel. from loopy.kernel.data import AutoFitLocalIndexTag - iname_to_tags[new_iname] = set([AutoFitLocalIndexTag()]) + iname_to_tags[new_iname] = frozenset([AutoFitLocalIndexTag()]) dim_inames.append(new_iname) @@ -707,7 +707,7 @@ class TemporarySaver(object): & aff[new_iname].lt_set(aff_from_expr(domain.space, dim))) - self.updated_iname_to_tags[new_iname] = set([hw_tag]) + self.updated_iname_to_tags[new_iname] = frozenset([hw_tag]) hw_inames.append(new_iname) # The operations on the domain above return a Set object, but the -- GitLab From 008114f9c40eb84da6c1293f199e3bc535f70d18 Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Tue, 19 Jun 2018 18:36:43 -0500 Subject: [PATCH 3/4] Fix assign_automatic_axes after iname_to_tags move away from defaultdict --- loopy/kernel/tools.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/loopy/kernel/tools.py b/loopy/kernel/tools.py index ac2d1b034..7e8b69791 100644 --- a/loopy/kernel/tools.py +++ b/loopy/kernel/tools.py @@ -778,6 +778,15 @@ def assign_automatic_axes(kernel, axis=0, local_size=None): # Likely unbounded, automatic assignment is not # going to happen for this iname. new_iname_to_tags = kernel.iname_to_tags.copy() + new_tags = new_iname_to_tags.get(iname, frozenset()) + new_tags = frozenset(tag for tag in new_tags + if not isinstance(tag, AutoLocalIndexTagBase)) + + if new_tags: + new_iname_to_tags[iname] = new_tags + else: + del new_iname_to_tags[iname] + return assign_automatic_axes( kernel.copy(iname_to_tags=new_iname_to_tags), axis=recursion_axis) @@ -889,7 +898,7 @@ def assign_automatic_axes(kernel, axis=0, local_size=None): iname_ranking = get_auto_axis_iname_ranking_by_stride(kernel, insn) if iname_ranking is not None: for iname in iname_ranking: - prev_tags = kernel.iname_tags[iname] + prev_tags = kernel.iname_tags(iname) if filter_iname_tags_by_type( prev_tags, AutoLocalIndexTagBase): return assign_axis(axis, iname, axis) -- GitLab From b9080c30a187e721034941e80a98d843bf747ada Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Wed, 20 Jun 2018 13:01:53 -0500 Subject: [PATCH 4/4] Fix assign_automatic_axes after iname_to_tags move away from defaultdict --- loopy/kernel/data.py | 14 ++++++++++---- loopy/kernel/tools.py | 9 ++++----- 2 files changed, 14 insertions(+), 9 deletions(-) diff --git a/loopy/kernel/data.py b/loopy/kernel/data.py index 54e37391f..9b746bb99 100644 --- a/loopy/kernel/data.py +++ b/loopy/kernel/data.py @@ -68,14 +68,20 @@ def filter_iname_tags_by_type(tags, tag_type, max_num=None, min_num=None): result = set(tag for tag in tags if isinstance(tag, tag_type)) + def strify_tag_type(): + if isinstance(tag_type, tuple): + return ", ".join(t.__name__ for t in tag_type) + else: + return tag_type.__name__ + if max_num is not None: if len(result) > max_num: - raise LoopyError("cannot have more than {0} tags" - "of type(s): {1}".format(max_num, tag_type)) + raise LoopyError("cannot have more than {0} tags " + "of type(s): {1}".format(max_num, strify_tag_type())) if min_num is not None: if len(result) < min_num: - raise LoopyError("must have more than {0} tags" - "of type(s): {1}".format(max_num, tag_type)) + raise LoopyError("must have more than {0} tags " + "of type(s): {1}".format(max_num, strify_tag_type())) return result diff --git a/loopy/kernel/tools.py b/loopy/kernel/tools.py index 7e8b69791..df89e6c6c 100644 --- a/loopy/kernel/tools.py +++ b/loopy/kernel/tools.py @@ -850,8 +850,9 @@ def assign_automatic_axes(kernel, axis=0, local_size=None): new_tag_set = frozenset() new_iname_to_tags = kernel.iname_to_tags.copy() new_tags = ( - new_iname_to_tags.get(iname, frozenset()) - | new_tag_set) - frozenset([AutoLocalIndexTagBase()]) + frozenset(tag for tag in new_iname_to_tags.get(iname, frozenset()) + if not isinstance(tag, AutoLocalIndexTagBase)) + | new_tag_set) if new_tags: new_iname_to_tags[iname] = new_tags @@ -884,10 +885,8 @@ def assign_automatic_axes(kernel, axis=0, local_size=None): assigned_local_axes = set() for iname in kernel.insn_inames(insn): - tags = kernel.iname_tags_of_type(iname, LocalIndexTag) + tags = kernel.iname_tags_of_type(iname, LocalIndexTag, max_num=1) if tags: - if len(tags) > 1: - raise LoopyError("cannot have more than one LocalIndexTags") tag, = tags assigned_local_axes.add(tag.axis) -- GitLab