From 8734898181feb996ca635a16726f08467535f923 Mon Sep 17 00:00:00 2001 From: Tianjiao Sun Date: Tue, 3 Apr 2018 08:25:41 +0100 Subject: [PATCH 01/20] start working on allowing multiple tags --- loopy/check.py | 6 ++--- loopy/kernel/__init__.py | 49 ++++++++++++++++++++---------------- loopy/kernel/data.py | 9 +++++++ loopy/kernel/tools.py | 7 +++--- loopy/preprocess.py | 12 ++++----- loopy/transform/iname.py | 26 ++++++------------- loopy/transform/privatize.py | 1 + 7 files changed, 58 insertions(+), 52 deletions(-) diff --git a/loopy/check.py b/loopy/check.py index 146391bf2..c0b419b5c 100644 --- a/loopy/check.py +++ b/loopy/check.py @@ -114,13 +114,13 @@ def check_loop_priority_inames_known(kernel): def check_for_double_use_of_hw_axes(kernel): - from loopy.kernel.data import UniqueTag + from loopy.kernel.data import UniqueTag, get_iname_tags for insn in kernel.instructions: insn_tag_keys = set() for iname in kernel.insn_inames(insn): - tag = kernel.iname_to_tag.get(iname) - if isinstance(tag, UniqueTag): + tags = kernel.iname_to_tags.get(iname, set()) + for tag in get_iname_tags(tags, UniqueTag): key = tag.key if key in insn_tag_keys: raise LoopyError("instruction '%s' has multiple " diff --git a/loopy/kernel/__init__.py b/loopy/kernel/__init__.py index 72d3a7dba..f9878aecf 100644 --- a/loopy/kernel/__init__.py +++ b/loopy/kernel/__init__.py @@ -27,6 +27,8 @@ THE SOFTWARE. import six from six.moves import range, zip, intern +from collections import defaultdict + import numpy as np from pytools import ImmutableRecordWithoutPickling, ImmutableRecord, memoize_method import islpy as isl @@ -42,6 +44,7 @@ from loopy.library.function import ( from loopy.diagnostic import CannotBranchDomainTree, LoopyError from loopy.tools import natsorted from loopy.diagnostic import StaticValueFindingError +from loopy.kernel.data import check_iname_tags, get_iname_tags # {{{ unique var names @@ -137,10 +140,10 @@ class LoopKernel(ImmutableRecordWithoutPickling): :class:`loopy.TemporaryVariable` instances. - .. attribute:: iname_to_tag + .. attribute:: iname_to_tags A :class:`dict` mapping inames (as strings) - to instances of :class:`loopy.kernel.data.IndexTag`. + to set of instances of :class:`loopy.kernel.data.IndexTag`. .. attribute:: function_manglers .. attribute:: symbol_manglers @@ -194,7 +197,7 @@ class LoopKernel(ImmutableRecordWithoutPickling): assumptions=None, local_sizes={}, temporary_variables={}, - iname_to_tag={}, + iname_to_tags={}, substitutions={}, function_manglers=[ default_function_mangler, @@ -280,7 +283,7 @@ class LoopKernel(ImmutableRecordWithoutPickling): silenced_warnings=silenced_warnings, temporary_variables=temporary_variables, local_sizes=local_sizes, - iname_to_tag=iname_to_tag, + iname_to_tags=iname_to_tags, substitutions=substitutions, cache_manager=cache_manager, applied_iname_rewrites=applied_iname_rewrites, @@ -703,15 +706,14 @@ class LoopKernel(ImmutableRecordWithoutPickling): the other inames as well.) """ - tag_key_uses = {} + tag_key_uses = defaultdict(list) from loopy.kernel.data import HardwareConcurrentTag for iname in cond_inames: - tag = self.iname_to_tag.get(iname) - - if isinstance(tag, HardwareConcurrentTag): - tag_key_uses.setdefault(tag.key, []).append(iname) + tags = self.iname_to_tags.get(iname, set()) + if check_iname_tags(tags, HardwareConcurrentTag): + tag_key_uses[tag.key].append(iname) multi_use_keys = set( key for key, user_inames in six.iteritems(tag_key_uses) @@ -719,9 +721,10 @@ class LoopKernel(ImmutableRecordWithoutPickling): multi_use_inames = set() for iname in cond_inames: - tag = self.iname_to_tag.get(iname) - if isinstance(tag, HardwareConcurrentTag) and tag.key in multi_use_keys: - multi_use_inames.add(iname) + for tag in self.iname_to_tags.get(iname, set()): + if isinstance(tag, HardwareConcurrentTag) and tag.key in multi_use_keys: + multi_use_inames.add(iname) + break return frozenset(cond_inames - multi_use_inames) @@ -951,21 +954,25 @@ class LoopKernel(ImmutableRecordWithoutPickling): AutoLocalIndexTagBase) for iname in all_inames_by_insns: - tag = self.iname_to_tag.get(iname) + tags = self.iname_to_tags.get(iname, set()) - if isinstance(tag, GroupIndexTag): + if check_iname_tags(tags, GroupIndexTag): tgt_dict = global_sizes - elif isinstance(tag, LocalIndexTag): + elif check_iname_tags(tags, LocalIndexTag): tgt_dict = local_sizes - elif isinstance(tag, AutoLocalIndexTagBase) and not ignore_auto: + elif check_iname_tags(tags, AutoLocalIndexTagBase) and not ignore_auto: raise RuntimeError("cannot find grid sizes if automatic " "local index tags are present") else: - tgt_dict = None - - if tgt_dict is None: continue + tags = get_iname_tags(tags, (GroupIndexTag, LocalIndexTag)) + + if len(tags) != 1: + raise LoopyError("Multiple axis tag not allowed") + + tag, = tags + size = self.get_iname_bounds(iname).size if tag.axis in tgt_dict: @@ -1171,7 +1178,7 @@ class LoopKernel(ImmutableRecordWithoutPickling): if show_labels: lines.append("INAME IMPLEMENTATION TAGS:") for iname in natsorted(kernel.all_inames()): - line = "%s: %s" % (iname, kernel.iname_to_tag.get(iname)) + line = "%s: %s" % (iname, ", ".join(kernel.iname_to_tags.get(iname, set()))) lines.append(line) if "variables" in what and kernel.temporary_variables: @@ -1349,7 +1356,7 @@ class LoopKernel(ImmutableRecordWithoutPickling): "assumptions", "local_sizes", "temporary_variables", - "iname_to_tag", + "iname_to_tags", "substitutions", "iname_slab_increments", "loop_priority", diff --git a/loopy/kernel/data.py b/loopy/kernel/data.py index c90e8a64b..9b66088e5 100644 --- a/loopy/kernel/data.py +++ b/loopy/kernel/data.py @@ -54,6 +54,15 @@ class auto(object): # noqa # {{{ iname tags + +def check_iname_tags(tags, tag_type): + return any([isinstance(tag, tag_type) for tag in tags]) + + +def get_iname_tags(tags, tag_type): + return tuple(tag for tag in tags if isinstance(tag, tag_type)) + + class IndexTag(ImmutableRecord): __slots__ = [] diff --git a/loopy/kernel/tools.py b/loopy/kernel/tools.py index ec26916f3..4ba2976a7 100644 --- a/loopy/kernel/tools.py +++ b/loopy/kernel/tools.py @@ -36,6 +36,7 @@ from islpy import dim_type from loopy.diagnostic import LoopyError, warn_with_kernel from pytools import memoize_on_first_arg from loopy.tools import natsorted +from loopy.kernel.data import check_iname_tags import logging logger = logging.getLogger(__name__) @@ -1129,9 +1130,9 @@ def get_visual_iname_order_embedding(kernel): from loopy.kernel.data import IlpBaseTag # Ignore ILP tagged inames, since they do not have to form a strict loop # nest. - ilp_inames = frozenset( - iname for iname in kernel.iname_to_tag - if isinstance(kernel.iname_to_tag[iname], IlpBaseTag)) + ilp_inames = frozenset(iname + for iname in kernel.iname_to_tags + if check_iname_tags(kernel.iname_to_tags.get(iname, set()), IlpBaseTag)) iname_trie = SetTrie() diff --git a/loopy/preprocess.py b/loopy/preprocess.py index a5284dc74..a9cfdd679 100644 --- a/loopy/preprocess.py +++ b/loopy/preprocess.py @@ -34,7 +34,7 @@ from pytools.persistent_dict import WriteOncePersistentDict from loopy.tools import LoopyKeyBuilder from loopy.version import DATA_MODEL_VERSION -from loopy.kernel.data import make_assignment +from loopy.kernel.data import make_assignment, check_iname_tags, get_iname_tags # for the benefit of loopy.statistics, for now from loopy.type_inference import infer_unknown_types @@ -135,9 +135,9 @@ def check_reduction_iname_uniqueness(kernel): # {{{ decide temporary scope def _get_compute_inames_tagged(kernel, insn, tag_base): - return set(iname - for iname in kernel.insn_inames(insn.id) - if isinstance(kernel.iname_to_tag.get(iname), tag_base)) + return set(iname for iname in kernel.insn_inames(insn.id) + if check_iname_tags(kernel.iname_to_tags.get(iname, set()), + tag_base)) def _get_assignee_inames_tagged(kernel, insn, tag_base, tv_names): @@ -2154,8 +2154,8 @@ def preprocess_kernel(kernel, device=None): # {{{ check that there are no l.auto-tagged inames from loopy.kernel.data import AutoLocalIndexTagBase - for iname, tag in six.iteritems(kernel.iname_to_tag): - if (isinstance(tag, AutoLocalIndexTagBase) + for iname, tags in six.iteritems(kernel.iname_to_tags): + if (check_iname_tags(tags, AutoLocalIndexTagBase) and iname in kernel.all_inames()): raise LoopyError("kernel with automatically-assigned " "local axes passed to preprocessing") diff --git a/loopy/transform/iname.py b/loopy/transform/iname.py index 45d0dc9da..761bbced4 100644 --- a/loopy/transform/iname.py +++ b/loopy/transform/iname.py @@ -33,6 +33,7 @@ from loopy.symbolic import ( RuleAwareIdentityMapper, RuleAwareSubstitutionMapper, SubstitutionRuleMappingContext) from loopy.diagnostic import LoopyError +from loopy.kernel.data import check_iname_tags, get_iname_tags __doc__ = """ @@ -671,41 +672,28 @@ def tag_inames(kernel, iname_to_tag, force=False, ignore_nonexistent=False): # }}} - knl_iname_to_tag = kernel.iname_to_tag.copy() + knl_iname_to_tags = kernel.iname_to_tags.copy() for iname, new_tag in six.iteritems(iname_to_tag): - old_tag = kernel.iname_to_tag.get(iname) - - retag_ok = False - - if isinstance(old_tag, (AutoLocalIndexTagBase, ForceSequentialTag)): - retag_ok = True - - if not retag_ok and old_tag is not None and new_tag is None: - raise ValueError("cannot untag iname '%s'" % iname) + old_tags = kernel.iname_to_tags.get(iname, set()) if iname not in kernel.all_inames(): raise ValueError("cannot tag '%s'--not known" % iname) if isinstance(new_tag, ConcurrentTag) \ - and isinstance(old_tag, ForceSequentialTag): + and check_iname_tags(old_tags, ForceSequentialTag): raise ValueError("cannot tag '%s' as parallel--" "iname requires sequential execution" % iname) if isinstance(new_tag, ForceSequentialTag) \ - and isinstance(old_tag, ConcurrentTag): + and check_iname_tags(old_tags, ConcurrentTag): raise ValueError("'%s' is already tagged as parallel, " "but is now prohibited from being parallel " "(likely because of participation in a precompute or " "a reduction)" % iname) - if (not retag_ok) and (not force) \ - and old_tag is not None and (old_tag != new_tag): - raise LoopyError("'%s' is already tagged '%s'--cannot retag" - % (iname, old_tag)) - - knl_iname_to_tag[iname] = new_tag + knl_iname_to_tags[iname] = old_tags.union([new_tag]) - return kernel.copy(iname_to_tag=knl_iname_to_tag) + return kernel.copy(iname_to_tags=knl_iname_to_tags) # }}} diff --git a/loopy/transform/privatize.py b/loopy/transform/privatize.py index 47f64815e..9abb92fb0 100644 --- a/loopy/transform/privatize.py +++ b/loopy/transform/privatize.py @@ -41,6 +41,7 @@ __doc__ = """ # {{{ privatize temporaries with iname from loopy.symbolic import IdentityMapper +from loopy.kernel.data import check_iname_tags, get_iname_tags class ExtraInameIndexInserter(IdentityMapper): -- GitLab From 4df1ca8e2280ab30881cb9b4d569e22611c45639 Mon Sep 17 00:00:00 2001 From: tj-sun Date: Tue, 3 Apr 2018 12:36:53 +0100 Subject: [PATCH 02/20] Some more changes --- loopy/check.py | 41 ++++++++++++++++++++++------------- loopy/codegen/bounds.py | 13 +++++------ loopy/codegen/control.py | 19 ++++++++-------- loopy/codegen/loop.py | 10 ++++----- loopy/schedule/__init__.py | 44 +++++++++++++++++++++----------------- 5 files changed, 72 insertions(+), 55 deletions(-) diff --git a/loopy/check.py b/loopy/check.py index c0b419b5c..f1e461b63 100644 --- a/loopy/check.py +++ b/loopy/check.py @@ -168,9 +168,9 @@ def _is_racing_iname_tag(tv, tag): def check_for_write_races(kernel): - from loopy.kernel.data import ConcurrentTag + from loopy.kernel.data import ConcurrentTag, check_iname_tags - iname_to_tag = kernel.iname_to_tag.get + iname_to_tag = kernel.iname_to_tags.get for insn in kernel.instructions: for assignee_name, assignee_indices in zip( insn.assignee_var_names(), @@ -187,16 +187,17 @@ def check_for_write_races(kernel): # will cause write races. raceable_parallel_insn_inames = set( - iname - for iname in kernel.insn_inames(insn) - if isinstance(iname_to_tag(iname), ConcurrentTag)) + iname for iname in kernel.insn_inames(insn) + if check_iname_tags( + kernel.iname_to_tags.get(iname, set()), + ConcurrentTag)) elif assignee_name in kernel.temporary_variables: temp_var = kernel.temporary_variables[assignee_name] raceable_parallel_insn_inames = set( - iname - for iname in kernel.insn_inames(insn) - if _is_racing_iname_tag(temp_var, iname_to_tag(iname))) + iname for iname in kernel.insn_inames(insn) + if any(_is_racing_iname_tag(temp_var, tag) + for tag in kernel.iname_to_tags.get(iname, set()))) else: raise LoopyError("invalid assignee name in instruction '%s'" @@ -229,13 +230,14 @@ def check_for_orphaned_user_hardware_axes(kernel): def check_for_data_dependent_parallel_bounds(kernel): - from loopy.kernel.data import ConcurrentTag + from loopy.kernel.data import ConcurrentTag, check_iname_tags for i, dom in enumerate(kernel.domains): dom_inames = set(dom.get_var_names(dim_type.set)) par_inames = set(iname for iname in dom_inames - if isinstance(kernel.iname_to_tag.get(iname), ConcurrentTag)) + if check_iname_tags( + kernel.iname_to_tags.get(iname, set()), ConcurrentTag)) if not par_inames: continue @@ -650,7 +652,8 @@ def _check_for_unused_hw_axes_in_kernel_chunk(kernel, sched_index=None): # alternative: just disregard length-1 dimensions? - from loopy.kernel.data import LocalIndexTag, AutoLocalIndexTagBase, GroupIndexTag + from loopy.kernel.data import (LocalIndexTag, AutoLocalIndexTagBase, + GroupIndexTag, check_iname_tags, get_iname_tags) while i < loop_end_i: sched_item = kernel.schedule[i] @@ -668,13 +671,21 @@ def _check_for_unused_hw_axes_in_kernel_chunk(kernel, sched_index=None): local_axes_used = set() for iname in kernel.insn_inames(insn): - tag = kernel.iname_to_tag.get(iname) + tags = kernel.iname_to_tags.get(iname, set()) - if isinstance(tag, LocalIndexTag): + if check_iname_tags(tags, LocalIndexTag): + tags = get_iname_tags(tags, LocalIndexTag) + if len(tags) > 1: + raise LoopyError("Can only have one LocalIndexTag") + tag, = tags local_axes_used.add(tag.axis) - elif isinstance(tag, GroupIndexTag): + elif check_iname_tags(tags, GroupIndexTag): + tags = get_iname_tags(tags, GroupIndexTag) + if len(tags) > 1: + raise LoopyError("Can only have one GroupIndexTag") + tag, = tags group_axes_used.add(tag.axis) - elif isinstance(tag, AutoLocalIndexTagBase): + elif check_iname_tags(tags, AutoLocalIndexTagBase): raise LoopyError("auto local tag encountered") if group_axes != group_axes_used: diff --git a/loopy/codegen/bounds.py b/loopy/codegen/bounds.py index f398a063d..5b5732770 100644 --- a/loopy/codegen/bounds.py +++ b/loopy/codegen/bounds.py @@ -58,7 +58,8 @@ def get_approximate_convex_bounds_checks(domain, check_inames, implemented_domai def get_usable_inames_for_conditional(kernel, sched_index): from loopy.schedule import ( find_active_inames_at, get_insn_ids_for_block_at, has_barrier_within) - from loopy.kernel.data import ConcurrentTag, LocalIndexTagBase, IlpBaseTag + from loopy.kernel.data import (ConcurrentTag, LocalIndexTagBase, + IlpBaseTag, check_iname_tags) result = find_active_inames_at(kernel, sched_index) crosses_barrier = has_barrier_within(kernel, sched_index) @@ -87,7 +88,7 @@ def get_usable_inames_for_conditional(kernel, sched_index): for iname in kernel.insn_inames(insn)) for iname in inames_for_subkernel: - tag = kernel.iname_to_tag.get(iname) + tags = kernel.iname_to_tags.get(iname, set()) # Parallel inames are defined within a subkernel, BUT: # @@ -97,10 +98,10 @@ def get_usable_inames_for_conditional(kernel, sched_index): # at the innermost level of nesting. if ( - isinstance(tag, ConcurrentTag) - and not (isinstance(tag, LocalIndexTagBase) and crosses_barrier) - and not isinstance(tag, IlpBaseTag) - ): + check_iname_tags(tags, ConcurrentTag) + and not (check_iname_tags(tags, LocalIndexTagBase) + and crosses_barrier) and not check_iname_tags(tags, IlpBaseTag) + ): result.add(iname) return frozenset(result) diff --git a/loopy/codegen/control.py b/loopy/codegen/control.py index e3e209726..d215c74c0 100644 --- a/loopy/codegen/control.py +++ b/loopy/codegen/control.py @@ -40,15 +40,16 @@ def get_admissible_conditional_inames_for(codegen_state, sched_index): kernel = codegen_state.kernel - from loopy.kernel.data import LocalIndexTag, HardwareConcurrentTag + from loopy.kernel.data import (LocalIndexTag, HardwareConcurrentTag, + check_iname_tags) from loopy.schedule import find_active_inames_at, has_barrier_within result = find_active_inames_at(kernel, sched_index) has_barrier = has_barrier_within(kernel, sched_index) - for iname, tag in six.iteritems(kernel.iname_to_tag): - if (isinstance(tag, HardwareConcurrentTag) + for iname, tags in six.iteritems(kernel.iname_to_tags): + if (check_iname_tags(tags, HardwareConcurrentTag) and codegen_state.is_generating_device_code): if not has_barrier or not isinstance(tag, LocalIndexTag): result.add(iname) @@ -127,7 +128,7 @@ def generate_code_for_sched_index(codegen_state, sched_index): ]) elif isinstance(sched_item, EnterLoop): - tag = kernel.iname_to_tag.get(sched_item.iname) + tags = kernel.iname_to_tags.get(sched_item.iname, set()) from loopy.codegen.loop import ( generate_unroll_loop, @@ -135,13 +136,13 @@ def generate_code_for_sched_index(codegen_state, sched_index): generate_sequential_loop_dim_code) from loopy.kernel.data import (UnrolledIlpTag, UnrollTag, ForceSequentialTag, - LoopedIlpTag, VectorizeTag, InOrderSequentialSequentialTag) - if isinstance(tag, (UnrollTag, UnrolledIlpTag)): + LoopedIlpTag, VectorizeTag, InOrderSequentialSequentialTag, check_iname_tags) + if check_iname_tags(tags, (UnrollTag, UnrolledIlpTag)): func = generate_unroll_loop - elif isinstance(tag, VectorizeTag): + elif check_iname_tags(tags, VectorizeTag): func = generate_vectorize_loop - elif tag is None or isinstance(tag, ( - LoopedIlpTag, ForceSequentialTag, InOrderSequentialSequentialTag)): + elif len(tags) == 0 or check_iname_tags(tags, (LoopedIlpTag, + ForceSequentialTag, InOrderSequentialSequentialTag)): func = generate_sequential_loop_dim_code else: raise RuntimeError("encountered (invalid) EnterLoop " diff --git a/loopy/codegen/loop.py b/loopy/codegen/loop.py index 1db7b0445..3698395a6 100644 --- a/loopy/codegen/loop.py +++ b/loopy/codegen/loop.py @@ -230,8 +230,8 @@ def set_up_hw_parallel_loops(codegen_state, schedule_index, next_func, hw_inames_left=None): kernel = codegen_state.kernel - from loopy.kernel.data import ( - UniqueTag, HardwareConcurrentTag, LocalIndexTag, GroupIndexTag) + from loopy.kernel.data import (UniqueTag, HardwareConcurrentTag, + LocalIndexTag, GroupIndexTag, check_iname_tags) from loopy.schedule import get_insn_ids_for_block_at insn_ids_for_block = get_insn_ids_for_block_at(kernel.schedule, schedule_index) @@ -241,9 +241,9 @@ def set_up_hw_parallel_loops(codegen_state, schedule_index, next_func, for insn_id in insn_ids_for_block: all_inames_by_insns |= kernel.insn_inames(insn_id) - hw_inames_left = [iname - for iname in all_inames_by_insns - if isinstance(kernel.iname_to_tag.get(iname), HardwareConcurrentTag)] + hw_inames_left = [iname for iname in all_inames_by_insns + if check_iname_tags(kernel.iname_to_tags.get(iname, set()), + HardwareConcurrentTag)] if not hw_inames_left: return next_func(codegen_state) diff --git a/loopy/schedule/__init__.py b/loopy/schedule/__init__.py index 3c9a6baed..db81fca62 100644 --- a/loopy/schedule/__init__.py +++ b/loopy/schedule/__init__.py @@ -212,13 +212,13 @@ def find_loop_nest_with_map(kernel): """ result = {} - from loopy.kernel.data import ConcurrentTag, IlpBaseTag, VectorizeTag + from loopy.kernel.data import (ConcurrentTag, IlpBaseTag, VectorizeTag, + check_iname_tags) - all_nonpar_inames = set([ - iname - for iname in kernel.all_inames() - if not isinstance(kernel.iname_to_tag.get(iname), - (ConcurrentTag, IlpBaseTag, VectorizeTag))]) + all_nonpar_inames = set( + iname for iname, tags in kernel.iname_to_tags + if not check_iname_tags(tags, + (ConcurrentTag, IlpBaseTag, VectorizeTag))) iname_to_insns = kernel.iname_to_insns() @@ -243,15 +243,15 @@ def find_loop_nest_around_map(kernel): iname_to_insns = kernel.iname_to_insns() # examine pairs of all inames--O(n**2), I know. - from loopy.kernel.data import IlpBaseTag + from loopy.kernel.data import IlpBaseTag, check_iname_tags for inner_iname in all_inames: result[inner_iname] = set() for outer_iname in all_inames: if inner_iname == outer_iname: continue - tag = kernel.iname_to_tag.get(outer_iname) - if isinstance(tag, IlpBaseTag): + tags = kernel.iname_to_tags.get(outer_iname, set()) + if check_iname_tags(tags, IlpBaseTag): # ILP tags are special because they are parallel tags # and therefore 'in principle' nest around everything. # But they're realized by the scheduler as a loop @@ -280,10 +280,12 @@ def find_loop_insn_dep_map(kernel, loop_nest_with_map, loop_nest_around_map): result = {} - from loopy.kernel.data import ConcurrentTag, IlpBaseTag, VectorizeTag + from loopy.kernel.data import (ConcurrentTag, IlpBaseTag, VectorizeTag, + check_iname_tags) for insn in kernel.instructions: for iname in kernel.insn_inames(insn): - if isinstance(kernel.iname_to_tag.get(iname), ConcurrentTag): + if check_iname_tags(kernel.iname_to_tags.get(iname, set()), + ConcurrentTag): continue iname_dep = result.setdefault(iname, set()) @@ -313,8 +315,8 @@ def find_loop_insn_dep_map(kernel, loop_nest_with_map, loop_nest_around_map): # -> safe. continue - tag = kernel.iname_to_tag.get(dep_insn_iname) - if isinstance(tag, (ConcurrentTag, IlpBaseTag, VectorizeTag)): + tags = kernel.iname_to_tags.get(dep_insn_iname, set()) + if check_iname_tags(tags, (ConcurrentTag, IlpBaseTag, VectorizeTag)): # Parallel tags don't really nest, so we'll disregard # them here. continue @@ -1878,18 +1880,20 @@ def generate_loop_schedules_inner(kernel, debug_args={}): for item in preschedule for insn_id in sched_item_to_insn_id(item)) - from loopy.kernel.data import IlpBaseTag, ConcurrentTag, VectorizeTag + from loopy.kernel.data import (IlpBaseTag, ConcurrentTag, VectorizeTag, + check_iname_tags) ilp_inames = set( iname - for iname in kernel.all_inames() - if isinstance(kernel.iname_to_tag.get(iname), IlpBaseTag)) + for iname, tags in kernel.iname_to_tags + if check_iname_tags(tags, IlpBaseTag)) vec_inames = set( iname - for iname in kernel.all_inames() - if isinstance(kernel.iname_to_tag.get(iname), VectorizeTag)) + for iname, tags in kernel.iname_to_tags + if check_iname_tags(tags, VectorizeTag)) parallel_inames = set( - iname for iname in kernel.all_inames() - if isinstance(kernel.iname_to_tag.get(iname), ConcurrentTag)) + iname + for iname, tags in kernel.iname_to_tags + if check_iname_tags(tags, ConcurrentTag)) loop_nest_with_map = find_loop_nest_with_map(kernel) loop_nest_around_map = find_loop_nest_around_map(kernel) -- GitLab From fb29388549886a8b2548ee6f3a510c2b32c51b9b Mon Sep 17 00:00:00 2001 From: tj-sun Date: Tue, 3 Apr 2018 16:35:18 +0100 Subject: [PATCH 03/20] change tags from set to tuple --- loopy/check.py | 11 +++++------ loopy/codegen/bounds.py | 2 +- loopy/codegen/control.py | 2 +- loopy/codegen/loop.py | 2 +- loopy/kernel/__init__.py | 9 +++++---- loopy/kernel/tools.py | 2 +- loopy/preprocess.py | 2 +- loopy/schedule/__init__.py | 14 +++++++------- loopy/transform/iname.py | 7 +++++-- loopy/transform/privatize.py | 7 ++++++- 10 files changed, 33 insertions(+), 25 deletions(-) diff --git a/loopy/check.py b/loopy/check.py index f1e461b63..012d69be0 100644 --- a/loopy/check.py +++ b/loopy/check.py @@ -119,7 +119,7 @@ def check_for_double_use_of_hw_axes(kernel): for insn in kernel.instructions: insn_tag_keys = set() for iname in kernel.insn_inames(insn): - tags = kernel.iname_to_tags.get(iname, set()) + tags = kernel.iname_to_tags.get(iname, tuple()) for tag in get_iname_tags(tags, UniqueTag): key = tag.key if key in insn_tag_keys: @@ -170,7 +170,6 @@ def _is_racing_iname_tag(tv, tag): def check_for_write_races(kernel): from loopy.kernel.data import ConcurrentTag, check_iname_tags - iname_to_tag = kernel.iname_to_tags.get for insn in kernel.instructions: for assignee_name, assignee_indices in zip( insn.assignee_var_names(), @@ -189,7 +188,7 @@ def check_for_write_races(kernel): raceable_parallel_insn_inames = set( iname for iname in kernel.insn_inames(insn) if check_iname_tags( - kernel.iname_to_tags.get(iname, set()), + kernel.iname_to_tags.get(iname, tuple()), ConcurrentTag)) elif assignee_name in kernel.temporary_variables: @@ -197,7 +196,7 @@ def check_for_write_races(kernel): raceable_parallel_insn_inames = set( iname for iname in kernel.insn_inames(insn) if any(_is_racing_iname_tag(temp_var, tag) - for tag in kernel.iname_to_tags.get(iname, set()))) + for tag in kernel.iname_to_tags.get(iname, tuple()))) else: raise LoopyError("invalid assignee name in instruction '%s'" @@ -237,7 +236,7 @@ def check_for_data_dependent_parallel_bounds(kernel): par_inames = set(iname for iname in dom_inames if check_iname_tags( - kernel.iname_to_tags.get(iname, set()), ConcurrentTag)) + kernel.iname_to_tags.get(iname, tuple()), ConcurrentTag)) if not par_inames: continue @@ -671,7 +670,7 @@ def _check_for_unused_hw_axes_in_kernel_chunk(kernel, sched_index=None): local_axes_used = set() for iname in kernel.insn_inames(insn): - tags = kernel.iname_to_tags.get(iname, set()) + tags = kernel.iname_to_tags.get(iname, tuple()) if check_iname_tags(tags, LocalIndexTag): tags = get_iname_tags(tags, LocalIndexTag) diff --git a/loopy/codegen/bounds.py b/loopy/codegen/bounds.py index 5b5732770..2ea8d5b68 100644 --- a/loopy/codegen/bounds.py +++ b/loopy/codegen/bounds.py @@ -88,7 +88,7 @@ def get_usable_inames_for_conditional(kernel, sched_index): for iname in kernel.insn_inames(insn)) for iname in inames_for_subkernel: - tags = kernel.iname_to_tags.get(iname, set()) + tags = kernel.iname_to_tags.get(iname, tuple()) # Parallel inames are defined within a subkernel, BUT: # diff --git a/loopy/codegen/control.py b/loopy/codegen/control.py index d215c74c0..7aad0287c 100644 --- a/loopy/codegen/control.py +++ b/loopy/codegen/control.py @@ -128,7 +128,7 @@ def generate_code_for_sched_index(codegen_state, sched_index): ]) elif isinstance(sched_item, EnterLoop): - tags = kernel.iname_to_tags.get(sched_item.iname, set()) + tags = kernel.iname_to_tags.get(sched_item.iname, tuple()) from loopy.codegen.loop import ( generate_unroll_loop, diff --git a/loopy/codegen/loop.py b/loopy/codegen/loop.py index 3698395a6..8e6e89a18 100644 --- a/loopy/codegen/loop.py +++ b/loopy/codegen/loop.py @@ -242,7 +242,7 @@ def set_up_hw_parallel_loops(codegen_state, schedule_index, next_func, all_inames_by_insns |= kernel.insn_inames(insn_id) hw_inames_left = [iname for iname in all_inames_by_insns - if check_iname_tags(kernel.iname_to_tags.get(iname, set()), + if check_iname_tags(kernel.iname_to_tags.get(iname, tuple()), HardwareConcurrentTag)] if not hw_inames_left: diff --git a/loopy/kernel/__init__.py b/loopy/kernel/__init__.py index f9878aecf..3912bc924 100644 --- a/loopy/kernel/__init__.py +++ b/loopy/kernel/__init__.py @@ -711,7 +711,7 @@ class LoopKernel(ImmutableRecordWithoutPickling): from loopy.kernel.data import HardwareConcurrentTag for iname in cond_inames: - tags = self.iname_to_tags.get(iname, set()) + tags = self.iname_to_tags.get(iname, tuple()) if check_iname_tags(tags, HardwareConcurrentTag): tag_key_uses[tag.key].append(iname) @@ -721,7 +721,7 @@ class LoopKernel(ImmutableRecordWithoutPickling): multi_use_inames = set() for iname in cond_inames: - for tag in self.iname_to_tags.get(iname, set()): + for tag in self.iname_to_tags.get(iname, tuple()): if isinstance(tag, HardwareConcurrentTag) and tag.key in multi_use_keys: multi_use_inames.add(iname) break @@ -954,7 +954,7 @@ class LoopKernel(ImmutableRecordWithoutPickling): AutoLocalIndexTagBase) for iname in all_inames_by_insns: - tags = self.iname_to_tags.get(iname, set()) + tags = self.iname_to_tags.get(iname, tuple()) if check_iname_tags(tags, GroupIndexTag): tgt_dict = global_sizes @@ -1178,7 +1178,8 @@ class LoopKernel(ImmutableRecordWithoutPickling): if show_labels: lines.append("INAME IMPLEMENTATION TAGS:") for iname in natsorted(kernel.all_inames()): - line = "%s: %s" % (iname, ", ".join(kernel.iname_to_tags.get(iname, set()))) + line = "%s: %s" % (iname, ", ".join( + tag.key for tag in kernel.iname_to_tags.get(iname, tuple()))) lines.append(line) if "variables" in what and kernel.temporary_variables: diff --git a/loopy/kernel/tools.py b/loopy/kernel/tools.py index 4ba2976a7..76eccb591 100644 --- a/loopy/kernel/tools.py +++ b/loopy/kernel/tools.py @@ -1132,7 +1132,7 @@ def get_visual_iname_order_embedding(kernel): # nest. ilp_inames = frozenset(iname for iname in kernel.iname_to_tags - if check_iname_tags(kernel.iname_to_tags.get(iname, set()), IlpBaseTag)) + if check_iname_tags(kernel.iname_to_tags.get(iname, tuple()), IlpBaseTag)) iname_trie = SetTrie() diff --git a/loopy/preprocess.py b/loopy/preprocess.py index a9cfdd679..a3952b812 100644 --- a/loopy/preprocess.py +++ b/loopy/preprocess.py @@ -147,7 +147,7 @@ def _get_assignee_inames_tagged(kernel, insn, tag_base, tv_names): insn.assignee_subscript_deps()) for iname in adeps & kernel.all_inames() if aname in tv_names - if isinstance(kernel.iname_to_tag.get(iname), tag_base)) + if check_iname_tags(kernel.iname_to_tags.get(iname, tuple()), tag_base)) def find_temporary_scope(kernel): diff --git a/loopy/schedule/__init__.py b/loopy/schedule/__init__.py index db81fca62..b189d3414 100644 --- a/loopy/schedule/__init__.py +++ b/loopy/schedule/__init__.py @@ -216,7 +216,7 @@ def find_loop_nest_with_map(kernel): check_iname_tags) all_nonpar_inames = set( - iname for iname, tags in kernel.iname_to_tags + iname for iname, tags in six.iteritems(kernel.iname_to_tags) if not check_iname_tags(tags, (ConcurrentTag, IlpBaseTag, VectorizeTag))) @@ -250,7 +250,7 @@ def find_loop_nest_around_map(kernel): if inner_iname == outer_iname: continue - tags = kernel.iname_to_tags.get(outer_iname, set()) + tags = kernel.iname_to_tags.get(outer_iname, tuple()) if check_iname_tags(tags, IlpBaseTag): # ILP tags are special because they are parallel tags # and therefore 'in principle' nest around everything. @@ -284,7 +284,7 @@ def find_loop_insn_dep_map(kernel, loop_nest_with_map, loop_nest_around_map): check_iname_tags) for insn in kernel.instructions: for iname in kernel.insn_inames(insn): - if check_iname_tags(kernel.iname_to_tags.get(iname, set()), + if check_iname_tags(kernel.iname_to_tags.get(iname, tuple()), ConcurrentTag): continue @@ -315,7 +315,7 @@ def find_loop_insn_dep_map(kernel, loop_nest_with_map, loop_nest_around_map): # -> safe. continue - tags = kernel.iname_to_tags.get(dep_insn_iname, set()) + tags = kernel.iname_to_tags.get(dep_insn_iname, tuple()) if check_iname_tags(tags, (ConcurrentTag, IlpBaseTag, VectorizeTag)): # Parallel tags don't really nest, so we'll disregard # them here. @@ -1884,15 +1884,15 @@ def generate_loop_schedules_inner(kernel, debug_args={}): check_iname_tags) ilp_inames = set( iname - for iname, tags in kernel.iname_to_tags + for iname, tags in six.iteritems(kernel.iname_to_tags) if check_iname_tags(tags, IlpBaseTag)) vec_inames = set( iname - for iname, tags in kernel.iname_to_tags + for iname, tags in six.iteritems(kernel.iname_to_tags) if check_iname_tags(tags, VectorizeTag)) parallel_inames = set( iname - for iname, tags in kernel.iname_to_tags + for iname, tags in six.iteritems(kernel.iname_to_tags) if check_iname_tags(tags, ConcurrentTag)) loop_nest_with_map = find_loop_nest_with_map(kernel) diff --git a/loopy/transform/iname.py b/loopy/transform/iname.py index 761bbced4..9f1212fc1 100644 --- a/loopy/transform/iname.py +++ b/loopy/transform/iname.py @@ -674,7 +674,7 @@ def tag_inames(kernel, iname_to_tag, force=False, ignore_nonexistent=False): knl_iname_to_tags = kernel.iname_to_tags.copy() for iname, new_tag in six.iteritems(iname_to_tag): - old_tags = kernel.iname_to_tags.get(iname, set()) + old_tags = kernel.iname_to_tags.get(iname, tuple()) if iname not in kernel.all_inames(): raise ValueError("cannot tag '%s'--not known" % iname) @@ -691,7 +691,10 @@ def tag_inames(kernel, iname_to_tag, force=False, ignore_nonexistent=False): "(likely because of participation in a precompute or " "a reduction)" % iname) - knl_iname_to_tags[iname] = old_tags.union([new_tag]) + if all(tag.key != new_tag.key for tag in old_tags): + old_tags = old_tags + (new_tag,) + + knl_iname_to_tags[iname] = old_tags return kernel.copy(iname_to_tags=knl_iname_to_tags) diff --git a/loopy/transform/privatize.py b/loopy/transform/privatize.py index 9abb92fb0..d0d15cabf 100644 --- a/loopy/transform/privatize.py +++ b/loopy/transform/privatize.py @@ -85,12 +85,16 @@ def privatize_temporaries_with_inames( Example:: +<<<<<<< HEAD:loopy/transform/privatize.py for imatrix, i acc = 0 for k acc = acc + a[imatrix, i, k] * vec[k] end end +======= + from loopy.kernel.data import IlpBaseTag, VectorizeTag, check_iname_tags +>>>>>>> d4c1d2e... change tags from set to tuple:loopy/transform/ilp.py might become:: @@ -188,7 +192,8 @@ def privatize_temporaries_with_inames( dim_tags = ["c"] * (len(shape) + len(extra_shape)) for i, iname in enumerate(inames): - if isinstance(kernel.iname_to_tag.get(iname), VectorizeTag): + if check_iname_tags(kernel.iname_to_tags.get(iname, tuple()), + VectorizeTag): dim_tags[len(shape) + i] = "vec" new_temp_vars[tv.name] = tv.copy(shape=shape + extra_shape, -- GitLab From a47e423c6deb85d33923e5e9e52bef3f165a4bec Mon Sep 17 00:00:00 2001 From: tj-sun Date: Tue, 3 Apr 2018 16:48:58 +0100 Subject: [PATCH 04/20] dict -> defaultdict --- loopy/check.py | 16 ++++++---------- loopy/codegen/bounds.py | 2 +- loopy/codegen/control.py | 2 +- loopy/codegen/loop.py | 2 +- loopy/kernel/__init__.py | 12 ++++++------ loopy/kernel/tools.py | 2 +- loopy/preprocess.py | 5 ++--- loopy/schedule/__init__.py | 7 +++---- loopy/transform/iname.py | 2 +- loopy/transform/privatize.py | 3 +-- 10 files changed, 23 insertions(+), 30 deletions(-) diff --git a/loopy/check.py b/loopy/check.py index 012d69be0..611043d53 100644 --- a/loopy/check.py +++ b/loopy/check.py @@ -119,7 +119,7 @@ def check_for_double_use_of_hw_axes(kernel): for insn in kernel.instructions: insn_tag_keys = set() for iname in kernel.insn_inames(insn): - tags = kernel.iname_to_tags.get(iname, tuple()) + tags = kernel.iname_to_tags[iname] for tag in get_iname_tags(tags, UniqueTag): key = tag.key if key in insn_tag_keys: @@ -187,16 +187,14 @@ def check_for_write_races(kernel): raceable_parallel_insn_inames = set( iname for iname in kernel.insn_inames(insn) - if check_iname_tags( - kernel.iname_to_tags.get(iname, tuple()), - ConcurrentTag)) + if check_iname_tags(kernel.iname_to_tags[iname], ConcurrentTag)) elif assignee_name in kernel.temporary_variables: temp_var = kernel.temporary_variables[assignee_name] raceable_parallel_insn_inames = set( iname for iname in kernel.insn_inames(insn) if any(_is_racing_iname_tag(temp_var, tag) - for tag in kernel.iname_to_tags.get(iname, tuple()))) + for tag in kernel.iname_to_tags[iname])) else: raise LoopyError("invalid assignee name in instruction '%s'" @@ -233,10 +231,8 @@ def check_for_data_dependent_parallel_bounds(kernel): for i, dom in enumerate(kernel.domains): dom_inames = set(dom.get_var_names(dim_type.set)) - par_inames = set(iname - for iname in dom_inames - if check_iname_tags( - kernel.iname_to_tags.get(iname, tuple()), ConcurrentTag)) + par_inames = set(iname for iname in dom_inames + if check_iname_tags(kernel.iname_to_tags[iname], ConcurrentTag)) if not par_inames: continue @@ -670,7 +666,7 @@ def _check_for_unused_hw_axes_in_kernel_chunk(kernel, sched_index=None): local_axes_used = set() for iname in kernel.insn_inames(insn): - tags = kernel.iname_to_tags.get(iname, tuple()) + tags = kernel.iname_to_tags[iname] if check_iname_tags(tags, LocalIndexTag): tags = get_iname_tags(tags, LocalIndexTag) diff --git a/loopy/codegen/bounds.py b/loopy/codegen/bounds.py index 2ea8d5b68..886f305a4 100644 --- a/loopy/codegen/bounds.py +++ b/loopy/codegen/bounds.py @@ -88,7 +88,7 @@ def get_usable_inames_for_conditional(kernel, sched_index): for iname in kernel.insn_inames(insn)) for iname in inames_for_subkernel: - tags = kernel.iname_to_tags.get(iname, tuple()) + tags = kernel.iname_to_tags[iname] # Parallel inames are defined within a subkernel, BUT: # diff --git a/loopy/codegen/control.py b/loopy/codegen/control.py index 7aad0287c..fb92ef750 100644 --- a/loopy/codegen/control.py +++ b/loopy/codegen/control.py @@ -128,7 +128,7 @@ def generate_code_for_sched_index(codegen_state, sched_index): ]) elif isinstance(sched_item, EnterLoop): - tags = kernel.iname_to_tags.get(sched_item.iname, tuple()) + tags = kernel.iname_to_tags[sched_item.iname] from loopy.codegen.loop import ( generate_unroll_loop, diff --git a/loopy/codegen/loop.py b/loopy/codegen/loop.py index 8e6e89a18..89e832127 100644 --- a/loopy/codegen/loop.py +++ b/loopy/codegen/loop.py @@ -242,7 +242,7 @@ def set_up_hw_parallel_loops(codegen_state, schedule_index, next_func, all_inames_by_insns |= kernel.insn_inames(insn_id) hw_inames_left = [iname for iname in all_inames_by_insns - if check_iname_tags(kernel.iname_to_tags.get(iname, tuple()), + if check_iname_tags(kernel.iname_to_tags[iname], HardwareConcurrentTag)] if not hw_inames_left: diff --git a/loopy/kernel/__init__.py b/loopy/kernel/__init__.py index 3912bc924..07f17b6c5 100644 --- a/loopy/kernel/__init__.py +++ b/loopy/kernel/__init__.py @@ -143,7 +143,7 @@ class LoopKernel(ImmutableRecordWithoutPickling): .. attribute:: iname_to_tags A :class:`dict` mapping inames (as strings) - to set of instances of :class:`loopy.kernel.data.IndexTag`. + to tuple of instances of :class:`loopy.kernel.data.IndexTag`. .. attribute:: function_manglers .. attribute:: symbol_manglers @@ -197,7 +197,7 @@ class LoopKernel(ImmutableRecordWithoutPickling): assumptions=None, local_sizes={}, temporary_variables={}, - iname_to_tags={}, + iname_to_tags=defaultdict(tuple), substitutions={}, function_manglers=[ default_function_mangler, @@ -711,7 +711,7 @@ class LoopKernel(ImmutableRecordWithoutPickling): from loopy.kernel.data import HardwareConcurrentTag for iname in cond_inames: - tags = self.iname_to_tags.get(iname, tuple()) + tags = self.iname_to_tags[iname] if check_iname_tags(tags, HardwareConcurrentTag): tag_key_uses[tag.key].append(iname) @@ -721,7 +721,7 @@ class LoopKernel(ImmutableRecordWithoutPickling): multi_use_inames = set() for iname in cond_inames: - for tag in self.iname_to_tags.get(iname, tuple()): + for tag in self.iname_to_tags[iname]: if isinstance(tag, HardwareConcurrentTag) and tag.key in multi_use_keys: multi_use_inames.add(iname) break @@ -954,7 +954,7 @@ class LoopKernel(ImmutableRecordWithoutPickling): AutoLocalIndexTagBase) for iname in all_inames_by_insns: - tags = self.iname_to_tags.get(iname, tuple()) + tags = self.iname_to_tags[iname] if check_iname_tags(tags, GroupIndexTag): tgt_dict = global_sizes @@ -1179,7 +1179,7 @@ class LoopKernel(ImmutableRecordWithoutPickling): lines.append("INAME IMPLEMENTATION TAGS:") for iname in natsorted(kernel.all_inames()): line = "%s: %s" % (iname, ", ".join( - tag.key for tag in kernel.iname_to_tags.get(iname, tuple()))) + tag.key for tag in kernel.iname_to_tags[iname])) lines.append(line) if "variables" in what and kernel.temporary_variables: diff --git a/loopy/kernel/tools.py b/loopy/kernel/tools.py index 76eccb591..c1d17371f 100644 --- a/loopy/kernel/tools.py +++ b/loopy/kernel/tools.py @@ -1132,7 +1132,7 @@ def get_visual_iname_order_embedding(kernel): # nest. ilp_inames = frozenset(iname for iname in kernel.iname_to_tags - if check_iname_tags(kernel.iname_to_tags.get(iname, tuple()), IlpBaseTag)) + if check_iname_tags(kernel.iname_to_tags[iname], IlpBaseTag)) iname_trie = SetTrie() diff --git a/loopy/preprocess.py b/loopy/preprocess.py index a3952b812..bd0a7c8dd 100644 --- a/loopy/preprocess.py +++ b/loopy/preprocess.py @@ -136,8 +136,7 @@ def check_reduction_iname_uniqueness(kernel): def _get_compute_inames_tagged(kernel, insn, tag_base): return set(iname for iname in kernel.insn_inames(insn.id) - if check_iname_tags(kernel.iname_to_tags.get(iname, set()), - tag_base)) + if check_iname_tags(kernel.iname_to_tags[iname], tag_base)) def _get_assignee_inames_tagged(kernel, insn, tag_base, tv_names): @@ -147,7 +146,7 @@ def _get_assignee_inames_tagged(kernel, insn, tag_base, tv_names): insn.assignee_subscript_deps()) for iname in adeps & kernel.all_inames() if aname in tv_names - if check_iname_tags(kernel.iname_to_tags.get(iname, tuple()), tag_base)) + if check_iname_tags(kernel.iname_to_tags[iname], tag_base)) def find_temporary_scope(kernel): diff --git a/loopy/schedule/__init__.py b/loopy/schedule/__init__.py index b189d3414..1611d5364 100644 --- a/loopy/schedule/__init__.py +++ b/loopy/schedule/__init__.py @@ -250,7 +250,7 @@ def find_loop_nest_around_map(kernel): if inner_iname == outer_iname: continue - tags = kernel.iname_to_tags.get(outer_iname, tuple()) + tags = kernel.iname_to_tags[outer_iname] if check_iname_tags(tags, IlpBaseTag): # ILP tags are special because they are parallel tags # and therefore 'in principle' nest around everything. @@ -284,8 +284,7 @@ def find_loop_insn_dep_map(kernel, loop_nest_with_map, loop_nest_around_map): check_iname_tags) for insn in kernel.instructions: for iname in kernel.insn_inames(insn): - if check_iname_tags(kernel.iname_to_tags.get(iname, tuple()), - ConcurrentTag): + if check_iname_tags(kernel.iname_to_tags[iname], ConcurrentTag): continue iname_dep = result.setdefault(iname, set()) @@ -315,7 +314,7 @@ def find_loop_insn_dep_map(kernel, loop_nest_with_map, loop_nest_around_map): # -> safe. continue - tags = kernel.iname_to_tags.get(dep_insn_iname, tuple()) + tags = kernel.iname_to_tags[dep_insn_iname] if check_iname_tags(tags, (ConcurrentTag, IlpBaseTag, VectorizeTag)): # Parallel tags don't really nest, so we'll disregard # them here. diff --git a/loopy/transform/iname.py b/loopy/transform/iname.py index 9f1212fc1..faa6ca3d9 100644 --- a/loopy/transform/iname.py +++ b/loopy/transform/iname.py @@ -674,7 +674,7 @@ def tag_inames(kernel, iname_to_tag, force=False, ignore_nonexistent=False): knl_iname_to_tags = kernel.iname_to_tags.copy() for iname, new_tag in six.iteritems(iname_to_tag): - old_tags = kernel.iname_to_tags.get(iname, tuple()) + old_tags = kernel.iname_to_tags[iname] if iname not in kernel.all_inames(): raise ValueError("cannot tag '%s'--not known" % iname) diff --git a/loopy/transform/privatize.py b/loopy/transform/privatize.py index d0d15cabf..d1c112eca 100644 --- a/loopy/transform/privatize.py +++ b/loopy/transform/privatize.py @@ -192,8 +192,7 @@ def privatize_temporaries_with_inames( dim_tags = ["c"] * (len(shape) + len(extra_shape)) for i, iname in enumerate(inames): - if check_iname_tags(kernel.iname_to_tags.get(iname, tuple()), - VectorizeTag): + if check_iname_tags(kernel.iname_to_tags[iname], VectorizeTag): dim_tags[len(shape) + i] = "vec" new_temp_vars[tv.name] = tv.copy(shape=shape + extra_shape, -- GitLab From 6a0cc21d0fa2ab1001e4543b134d3e8b99bf555e Mon Sep 17 00:00:00 2001 From: tj-sun Date: Tue, 3 Apr 2018 17:37:49 +0100 Subject: [PATCH 05/20] half way through iname_to_tag -> iname_to_tags --- loopy/check.py | 18 +++++++++------- loopy/codegen/loop.py | 17 +++++++++------ loopy/kernel/tools.py | 48 ++++++++++++++++++++++--------------------- loopy/preprocess.py | 46 +++++++++++++++++++---------------------- loopy/statistics.py | 38 ++++++++++++++++++++-------------- 5 files changed, 91 insertions(+), 76 deletions(-) diff --git a/loopy/check.py b/loopy/check.py index 611043d53..9a9ff1fd5 100644 --- a/loopy/check.py +++ b/loopy/check.py @@ -213,12 +213,15 @@ def check_for_write_races(kernel): def check_for_orphaned_user_hardware_axes(kernel): - from loopy.kernel.data import LocalIndexTag + from loopy.kernel.data import LocalIndexTag, check_iname_tags for axis in kernel.local_sizes: found = False - for tag in six.itervalues(kernel.iname_to_tag): - if isinstance(tag, LocalIndexTag) and tag.axis == axis: - found = True + for tags in six.itervalues(kernel.iname_to_tags): + for tag in tags: + if isinstance(tag, LocalIndexTag) and tag.axis == axis: + found = True + break + if found: break if not found: @@ -893,6 +896,8 @@ def check_implemented_domains(kernel, implemented_domains, code=None): from islpy import align_two + from loopy.kernel.data import check_iname_tags + last_idomains = None last_insn_inames = None @@ -928,9 +933,8 @@ def check_implemented_domains(kernel, implemented_domains, code=None): from loopy.kernel.data import LocalIndexTag if isinstance(insn, BarrierInstruction): # project out local-id-mapped inames, solves #94 on gitlab - non_lid_inames = frozenset( - [iname for iname in insn_inames if not isinstance( - kernel.iname_to_tag.get(iname), LocalIndexTag)]) + non_lid_inames = frozenset(iname for iname in insn_inames + if not check_iname_tags(kernel.iname_to_tags[iname], LocalIndexTag)) insn_impl_domain = insn_impl_domain.project_out_except( non_lid_inames, [dim_type.set]) diff --git a/loopy/codegen/loop.py b/loopy/codegen/loop.py index 89e832127..01f8a8255 100644 --- a/loopy/codegen/loop.py +++ b/loopy/codegen/loop.py @@ -254,11 +254,16 @@ def set_up_hw_parallel_loops(codegen_state, schedule_index, next_func, hw_inames_left = hw_inames_left[:] iname = hw_inames_left.pop() - tag = kernel.iname_to_tag.get(iname) + tags = kernel.iname_to_tags[iname] from loopy.symbolic import GroupHardwareAxisIndex, LocalHardwareAxisIndex - assert isinstance(tag, UniqueTag) + assert check_iname_tags(tags, UniqueTag) + + if len(tags) > 1: + raise LoopyError("cannot have more than one UniqueTag") + + tag, = tags if isinstance(tag, GroupIndexTag): hw_axis_expr = GroupHardwareAxisIndex(tag.axis) elif isinstance(tag, LocalIndexTag): @@ -267,10 +272,10 @@ def set_up_hw_parallel_loops(codegen_state, schedule_index, next_func, raise RuntimeError("unexpected hw tag type") other_inames_with_same_tag = [ - other_iname for other_iname in kernel.all_inames() - if isinstance(kernel.iname_to_tag.get(other_iname), UniqueTag) - and kernel.iname_to_tag.get(other_iname).key == tag.key - and other_iname != iname] + other_iname for other_iname in kernel.all_inames() + if check_iname_tags(kernel.iname_to_tags[other_iname], UniqueTag) + and any(_tag.key == tag.key for _tag in kernel.iname_to_tags[other_iname]) + and other_iname != iname] # {{{ 'implement' hardware axis boundaries diff --git a/loopy/kernel/tools.py b/loopy/kernel/tools.py index c1d17371f..7282542dc 100644 --- a/loopy/kernel/tools.py +++ b/loopy/kernel/tools.py @@ -632,7 +632,7 @@ def is_domain_dependent_on_inames(kernel, domain_index, inames): # {{{ rank inames by stride def get_auto_axis_iname_ranking_by_stride(kernel, insn): - from loopy.kernel.data import ImageArg, ValueArg + from loopy.kernel.data import ImageArg, ValueArg, check_iname_tags approximate_arg_values = {} for arg in kernel.args: @@ -677,10 +677,8 @@ def get_auto_axis_iname_ranking_by_stride(kernel, insn): from loopy.kernel.data import AutoLocalIndexTagBase auto_axis_inames = set( - iname - for iname in kernel.insn_inames(insn) - if isinstance(kernel.iname_to_tag.get(iname), - AutoLocalIndexTagBase)) + iname for iname in kernel.insn_inames(insn) + if check_iname_tags(kernel.iname_to_tags[iname], AutoLocalIndexTagBase)) # }}} @@ -752,8 +750,11 @@ def get_auto_axis_iname_ranking_by_stride(kernel, insn): def assign_automatic_axes(kernel, axis=0, local_size=None): logger.debug("%s: assign automatic axes" % kernel.name) + # TODO: do the tag removal rigorously, might be easier after switching + # to set() from tuple() - from loopy.kernel.data import (AutoLocalIndexTagBase, LocalIndexTag) + from loopy.kernel.data import (AutoLocalIndexTagBase, LocalIndexTag, + check_iname_tags, get_iname_tags) # Realize that at this point in time, axis lengths are already # fixed. So we compute them once and pass them to our recursive @@ -777,10 +778,10 @@ def assign_automatic_axes(kernel, axis=0, local_size=None): except isl.Error: # Likely unbounded, automatic assignment is not # going to happen for this iname. - new_iname_to_tag = kernel.iname_to_tag.copy() - new_iname_to_tag[iname] = None + new_iname_to_tags = kernel.iname_to_tags.copy() + new_iname_to_tags[iname] = tuple() return assign_automatic_axes( - kernel.copy(iname_to_tag=new_iname_to_tag), + kernel.copy(iname_to_tags=new_iname_to_tags), axis=recursion_axis) if axis is None: @@ -816,9 +817,9 @@ def assign_automatic_axes(kernel, axis=0, local_size=None): # }}} if axis is None: - new_tag = None + new_tag = tuple() else: - new_tag = LocalIndexTag(axis) + new_tag = (LocalIndexTag(axis),) if desired_length > local_size[axis]: from loopy import split_iname @@ -831,12 +832,12 @@ def assign_automatic_axes(kernel, axis=0, local_size=None): do_tagged_check=False), axis=recursion_axis, local_size=local_size) - if not isinstance(kernel.iname_to_tag.get(iname), AutoLocalIndexTagBase): + if not check_iname_tags(kernel.iname_to_tags[iname], AutoLocalIndexTagBase): raise LoopyError("trying to reassign '%s'" % iname) - new_iname_to_tag = kernel.iname_to_tag.copy() - new_iname_to_tag[iname] = new_tag - return assign_automatic_axes(kernel.copy(iname_to_tag=new_iname_to_tag), + new_iname_to_tags = kernel.iname_to_tags.copy() + new_iname_to_tags[iname] = new_tag + return assign_automatic_axes(kernel.copy(iname_to_tags=new_iname_to_tags), axis=recursion_axis, local_size=local_size) # }}} @@ -853,10 +854,8 @@ def assign_automatic_axes(kernel, axis=0, local_size=None): continue auto_axis_inames = [ - iname - for iname in kernel.insn_inames(insn) - if isinstance(kernel.iname_to_tag.get(iname), - AutoLocalIndexTagBase)] + iname for iname in kernel.insn_inames(insn) + if check_iname_tags(kernel.iname_to_tags[iname], AutoLocalIndexTagBase)] if not auto_axis_inames: continue @@ -864,8 +863,11 @@ def assign_automatic_axes(kernel, axis=0, local_size=None): assigned_local_axes = set() for iname in kernel.insn_inames(insn): - tag = kernel.iname_to_tag.get(iname) - if isinstance(tag, LocalIndexTag): + tags = get(kernel.iname_to_tags[iname], LocalIndexTag) + if tags: + if len(tags) > 1: + raise LoopyError("cannot have more than one LocalIndexTags") + tag, = tags assigned_local_axes.add(tag.axis) if axis < len(local_size): @@ -875,8 +877,8 @@ def assign_automatic_axes(kernel, axis=0, local_size=None): iname_ranking = get_auto_axis_iname_ranking_by_stride(kernel, insn) if iname_ranking is not None: for iname in iname_ranking: - prev_tag = kernel.iname_to_tag.get(iname) - if isinstance(prev_tag, AutoLocalIndexTagBase): + prev_tags = kernel.iname_to_tags[iname] + if check_iname_tags(prev_tags, AutoLocalIndexTagBase): return assign_axis(axis, iname, axis) else: diff --git a/loopy/preprocess.py b/loopy/preprocess.py index bd0a7c8dd..b20fbef91 100644 --- a/loopy/preprocess.py +++ b/loopy/preprocess.py @@ -291,20 +291,20 @@ def _classify_reduction_inames(kernel, inames): from loopy.kernel.data import ( LocalIndexTagBase, UnrolledIlpTag, UnrollTag, VectorizeTag, - ConcurrentTag) + ConcurrentTag, check_iname_tags) for iname in inames: - iname_tag = kernel.iname_to_tag.get(iname) + iname_tags = kernel.iname_to_tags[iname] - if isinstance(iname_tag, (UnrollTag, UnrolledIlpTag)): + if check_iname_tags(iname_tags, (UnrollTag, UnrolledIlpTag)): # These are nominally parallel, but we can live with # them as sequential. sequential.append(iname) - elif isinstance(iname_tag, LocalIndexTagBase): + elif check_iname_tags(iname_tags, LocalIndexTagBase): local_par.append(iname) - elif isinstance(iname_tag, (ConcurrentTag, VectorizeTag)): + elif check_iname_tags(iname_tags, (ConcurrentTag, VectorizeTag)): nonlocal_par.append(iname) else: @@ -912,6 +912,8 @@ def realize_reduction(kernel, insn_id_filter=None, unknown_types_ok=True, outer (sweep) iname. """ + # TODO: reassigning tags needs some thinking here + logger.debug("%s: realize reduction" % kernel.name) new_insns = [] @@ -1134,13 +1136,9 @@ def realize_reduction(kernel, insn_id_filter=None, unknown_types_ok=True, outer_insn_inames = temp_kernel.insn_inames(insn) - from loopy.kernel.data import LocalIndexTagBase - outer_local_inames = tuple( - oiname - for oiname in outer_insn_inames - if isinstance( - kernel.iname_to_tag.get(oiname), - LocalIndexTagBase)) + from loopy.kernel.data import LocalIndexTagBase, check_iname_tags + outer_local_inames = tuple(oiname for oiname in outer_insn_inames + if check_iname_tags(kernel.iname_to_tags[oiname], LocalIndexTagBase)) from pymbolic import var outer_local_iname_vars = tuple( @@ -1175,7 +1173,7 @@ def realize_reduction(kernel, insn_id_filter=None, unknown_types_ok=True, base_exec_iname = var_name_gen("red_"+red_iname) domains.append(_make_slab_set(base_exec_iname, size)) - new_iname_tags[base_exec_iname] = kernel.iname_to_tag[red_iname] + new_iname_tags[base_exec_iname] = kernel.iname_to_tags[red_iname] # }}} @@ -1270,7 +1268,7 @@ def realize_reduction(kernel, insn_id_filter=None, unknown_types_ok=True, stage_exec_iname = var_name_gen("red_%s_s%d" % (red_iname, istage)) domains.append(_make_slab_set(stage_exec_iname, bound-new_size)) - new_iname_tags[stage_exec_iname] = kernel.iname_to_tag[red_iname] + new_iname_tags[stage_exec_iname] = kernel.iname_to_tags[red_iname] stage_id = insn_id_gen("red_%s_stage_%d" % (red_iname, istage)) stage_insn = make_assignment( @@ -1473,13 +1471,9 @@ def realize_reduction(kernel, insn_id_filter=None, unknown_types_ok=True, outer_insn_inames = temp_kernel.insn_inames(insn) - from loopy.kernel.data import LocalIndexTagBase - outer_local_inames = tuple( - oiname - for oiname in outer_insn_inames - if isinstance( - kernel.iname_to_tag.get(oiname), - LocalIndexTagBase) + from loopy.kernel.data import LocalIndexTagBase, check_iname_tags + outer_local_inames = tuple(oiname for oiname in outer_insn_inames + if check_iname_tags(kernel.iname_to_tags[oiname], LocalIndexTagBase) and oiname != sweep_iname) from pymbolic import var @@ -1505,7 +1499,7 @@ def realize_reduction(kernel, insn_id_filter=None, unknown_types_ok=True, base_exec_iname = var_name_gen(sweep_iname + "__scan") domains.append(_make_slab_set(base_exec_iname, scan_size)) - new_iname_tags[base_exec_iname] = kernel.iname_to_tag[sweep_iname] + new_iname_tags[base_exec_iname] = kernel.iname_to_tags[sweep_iname] # }}} @@ -1596,7 +1590,7 @@ def realize_reduction(kernel, insn_id_filter=None, unknown_types_ok=True, stage_exec_iname = var_name_gen("%s__scan_s%d" % (sweep_iname, istage)) domains.append( _make_slab_set_from_range(stage_exec_iname, cur_size, scan_size)) - new_iname_tags[stage_exec_iname] = kernel.iname_to_tag[sweep_iname] + new_iname_tags[stage_exec_iname] = kernel.iname_to_tags[sweep_iname] for read_var, acc_var in zip(read_vars, acc_vars): read_stage_id = insn_id_gen( @@ -1746,7 +1740,7 @@ def realize_reduction(kernel, insn_id_filter=None, unknown_types_ok=True, "by reductions is 'local'--found iname(s) '%s' " "respectively tagged '%s'" % (", ".join(bad_inames), - ", ".join(kernel.iname_to_tag[iname] + ", ".join(tag.key for tag in kernel.iname_to_tags[iname] for iname in bad_inames))) if n_local_par == 0 and n_sequential == 0: @@ -1784,7 +1778,9 @@ def realize_reduction(kernel, insn_id_filter=None, unknown_types_ok=True, _error_if_force_scan_on(LoopyError, "Sweep iname '%s' has an unsupported parallel tag '%s' " "- the only parallelism allowed is 'local'." % - (sweep_iname, temp_kernel.iname_to_tag[sweep_iname])) + (sweep_iname, + ", ".join(tag.key + for tag in temp_kernel.iname_to_tags[sweep_iname]))) elif parallel: return map_scan_local( expr, rec, nresults, arg_dtypes, reduction_dtypes, diff --git a/loopy/statistics.py b/loopy/statistics.py index 6f4cc78b7..77c638128 100755 --- a/loopy/statistics.py +++ b/loopy/statistics.py @@ -917,18 +917,22 @@ class GlobalMemAccessCounter(MemAccessCounter): index = (index,) from loopy.symbolic import get_dependencies - from loopy.kernel.data import LocalIndexTag, GroupIndexTag + from loopy.kernel.data import LocalIndexTag, GroupIndexTag, get_iname_tags + my_inames = get_dependencies(index) & self.knl.all_inames() # find all local and global index tags and corresponding inames lid_to_iname = {} gid_to_iname = {} for iname in my_inames: - tag = self.knl.iname_to_tag.get(iname) - if isinstance(tag, LocalIndexTag): - lid_to_iname[tag.axis] = iname - elif isinstance(tag, GroupIndexTag): - gid_to_iname[tag.axis] = iname + tags = get_iname_tags(self.knl.iname_to_tags[iname], + (GroupIndexTag, LocalIndexTag)) + if tags: + tag, = get_iname_tags(tags, (GroupIndexTag, LocalIndexTag), 1) + if isinstance(tag, LocalIndexTag): + lid_to_iname[tag.axis] = iname + else: + gid_to_iname[tag.axis] = iname # create lid_strides and gid_strides dicts @@ -1177,14 +1181,18 @@ def get_unused_hw_axes_factor(knl, insn, disregard_local_axes, space=None): g_used = set() l_used = set() - from loopy.kernel.data import LocalIndexTag, GroupIndexTag + from loopy.kernel.data import (LocalIndexTag, GroupIndexTag, + get_iname_tags, check_iname_tags) for iname in knl.insn_inames(insn): - tag = knl.iname_to_tag.get(iname) - - if isinstance(tag, LocalIndexTag): - l_used.add(tag.axis) - elif isinstance(tag, GroupIndexTag): - g_used.add(tag.axis) + tags = get_iname_tags(knl.iname_to_tags[iname], (LocalIndexTag, GroupIndexTag)) + if tags: + if len(tags) > 1: + raise LoopyError("cannot have more than one UniqueTags") + tag, = tags + if isinstance(tag, LocalIndexTag): + l_used.add(tag.axis) + elif isinstance(tag, GroupIndexTag): + g_used.add(tag.axis) def mult_grid_factor(used_axes, size): result = 1 @@ -1213,9 +1221,9 @@ def count_insn_runs(knl, insn, count_redundant_work, disregard_local_axes=False) insn_inames = knl.insn_inames(insn) if disregard_local_axes: - from loopy.kernel.data import LocalIndexTag + from loopy.kernel.data import LocalIndexTag, check_iname_tags insn_inames = [iname for iname in insn_inames if not - isinstance(knl.iname_to_tag.get(iname), LocalIndexTag)] + check_iname_tags(kernel.iname_to_tags[iname], LocalIndexTag)] inames_domain = knl.get_inames_domain(insn_inames) domain = (inames_domain.project_out_except( -- GitLab From d79e19d1eab87255c734f55ae2fbbdce842b91e5 Mon Sep 17 00:00:00 2001 From: tj-sun Date: Wed, 4 Apr 2018 11:45:19 +0100 Subject: [PATCH 06/20] finished rewriting with tuples --- loopy/target/ispc.py | 42 +++++++++++++++++++++------------------ loopy/transform/fusion.py | 6 +++--- loopy/transform/iname.py | 33 +++++++++++++++++------------- loopy/transform/save.py | 42 +++++++++++++++++++++++---------------- 4 files changed, 70 insertions(+), 53 deletions(-) diff --git a/loopy/target/ispc.py b/loopy/target/ispc.py index 45a59847b..3fd2bc1ae 100644 --- a/loopy/target/ispc.py +++ b/loopy/target/ispc.py @@ -418,28 +418,33 @@ class ISPCASTBuilder(CASTBuilder): new_terms = [] - from loopy.kernel.data import LocalIndexTag + from loopy.kernel.data import LocalIndexTag, check_iname_tags, get_iname_tags from loopy.symbolic import get_dependencies saw_l0 = False for term in terms: if (isinstance(term, Variable) - and isinstance( - kernel.iname_to_tag.get(term.name), LocalIndexTag) - and kernel.iname_to_tag.get(term.name).axis == 0): - if saw_l0: - raise LoopyError("streaming store must have stride 1 " - "in local index, got: %s" % subscript) - saw_l0 = True - continue + and check_iname_tags(kernel.iname_to_tags[term.name], LocalIndexTag)): + tags = get_iname_tags(kernel.iname_to_tags[term.name], LocalIndexTag) + if len(tags) > 1: + raise LoopyError("cannot have more than one LocalIndexTags") + tag, = tags + if tag.axis == 0: + if saw_l0: + raise LoopyError("streaming store must have stride 1 " + "in local index, got: %s" % subscript) + saw_l0 = True + continue else: for dep in get_dependencies(term): - if ( - isinstance( - kernel.iname_to_tag.get(dep), LocalIndexTag) - and kernel.iname_to_tag.get(dep).axis == 0): - raise LoopyError("streaming store must have stride 1 " - "in local index, got: %s" % subscript) + if check_iname_tags(kernel.iname_to_tags[dep], LocalIndexTag): + tags = get_iname_tags(kernel.iname_to_tags[dep], LocalIndexTag) + if len(tags) > 1: + raise LoopyError("cannot have more than one LocalIndexTags") + tag, = tags + if tag.axis == 0: + raise LoopyError("streaming store must have stride 1 " + "in local index, got: %s" % subscript) new_terms.append(term) @@ -452,10 +457,9 @@ class ISPCASTBuilder(CASTBuilder): "data type") rhs_has_programindex = any( - isinstance( - kernel.iname_to_tag.get(dep), LocalIndexTag) - and kernel.iname_to_tag.get(dep).axis == 0 - for dep in get_dependencies(insn.expression)) + isinstance(tag, LocalIndexTag) and tag.axis == 0 + for tag in kernel.iname_to_tags[dep] + for dep in get_dependencies(insn.expression)) if not rhs_has_programindex: rhs_code = "broadcast(%s, 0)" % rhs_code diff --git a/loopy/transform/fusion.py b/loopy/transform/fusion.py index 77c2d3ade..8f8593c2c 100644 --- a/loopy/transform/fusion.py +++ b/loopy/transform/fusion.py @@ -248,10 +248,10 @@ def _fuse_two_kernels(knla, knlb): local_sizes=_merge_dicts( "local size", knla.local_sizes, knlb.local_sizes), temporary_variables=new_temporaries, - iname_to_tag=_merge_dicts( + iname_to_tags=_merge_dicts( "iname-to-tag mapping", - knla.iname_to_tag, - knlb.iname_to_tag), + knla.iname_to_tags, + knlb.iname_to_tags), substitutions=_merge_dicts( "substitution", knla.substitutions, diff --git a/loopy/transform/iname.py b/loopy/transform/iname.py index faa6ca3d9..7ebba8edf 100644 --- a/loopy/transform/iname.py +++ b/loopy/transform/iname.py @@ -176,11 +176,10 @@ def _split_iname_backend(kernel, split_iname, for syntax. """ - existing_tag = kernel.iname_to_tag.get(split_iname) - from loopy.kernel.data import ForceSequentialTag - if do_tagged_check and ( - existing_tag is not None - and not isinstance(existing_tag, ForceSequentialTag)): + existing_tags = kernel.iname_to_tags[split_iname] + from loopy.kernel.data import ForceSequentialTag, check_iname_tags + if (do_tagged_check and existing_tags + and not check_iname_tags(existing_tags, ForceSequentialTag)): raise LoopyError("cannot split already tagged iname '%s'" % split_iname) if split_iname not in kernel.all_inames(): @@ -295,9 +294,9 @@ def _split_iname_backend(kernel, split_iname, kernel = ins.map_kernel(kernel) kernel = rule_mapping_context.finish_kernel(kernel) - if existing_tag is not None: + if existing_tags: kernel = tag_inames(kernel, - {outer_iname: existing_tag, inner_iname: existing_tag}) + {outer_iname: existing_tags, inner_iname: existing_tags}) return tag_inames(kernel, {outer_iname: outer_tag, inner_iname: inner_tag}) @@ -632,7 +631,13 @@ def tag_inames(kernel, iname_to_tag, force=False, ignore_nonexistent=False): def parse_tag(tag): if isinstance(tag, str): if tag.startswith("like."): - return kernel.iname_to_tag.get(tag[5:]) + tags = kernel.iname_to_tags[tag[5:]] + if len(tags) == 0: + return None + if len(tags) == 1: + return tags[0] + else: + raise LoopyError("cannot use like for multiple tags (for now)") elif tag == "unused.g": return find_unused_axis_tag(kernel, "g") elif tag == "unused.l": @@ -976,8 +981,9 @@ def get_iname_duplication_options(knl, use_boostable_into=False): # Get the duplication options as a tuple of iname and a set for iname, insns in _get_iname_duplication_options(insn_iname_sets): # Check whether this iname has a parallel tag and discard it if so - if (iname in knl.iname_to_tag - and isinstance(knl.iname_to_tag[iname], ConcurrentTag)): + from loopy.kernel.data import ConcurrentTag, check_iname_tags + if (iname in knl.iname_to_tags + and check_iname_tags(knl.iname_to_tags[iname], ConcurrentTag)): continue # If we find a duplication option and to not use boostable_into @@ -1494,7 +1500,7 @@ def find_unused_axis_tag(kernel, kind, insn_match=None): """ used_axes = set() - from loopy.kernel.data import GroupIndexTag, LocalIndexTag + from loopy.kernel.data import GroupIndexTag, LocalIndexTag, check_iname_tags if isinstance(kind, str): found = False @@ -1513,9 +1519,8 @@ def find_unused_axis_tag(kernel, kind, insn_match=None): for insn in insns: for iname in kernel.insn_inames(insn): - dim_tag = kernel.iname_to_tag.get(iname) - - if isinstance(dim_tag, kind): + dim_tags = kernel.iname_to_tags[iname] + if check_iname_tags(dim_tags, kind): used_axes.add(kind.axis) i = 0 diff --git a/loopy/transform/save.py b/loopy/transform/save.py index e3d8368a7..962a83cd1 100644 --- a/loopy/transform/save.py +++ b/loopy/transform/save.py @@ -244,7 +244,7 @@ class TemporarySaver(object): self.insns_to_insert = [] self.insns_to_update = {} self.extra_args_to_add = {} - self.updated_iname_to_tag = {} + self.updated_iname_to_tags = {} self.updated_temporary_variables = {} # temporary name -> save or reload insn ids @@ -397,24 +397,32 @@ class TemporarySaver(object): my_local_tags = [] for iname in insn.within_inames: - tag = self.kernel.iname_to_tag.get(iname) + tags = self.kernel.iname_to_tags[iname] - if tag is None: + if not tags: continue - from loopy.kernel.data import ( - GroupIndexTag, LocalIndexTag, ConcurrentTag) + from loopy.kernel.data import (GroupIndexTag, LocalIndexTag, + ConcurrentTag, get_iname_tags, check_iname_tags) - if isinstance(tag, GroupIndexTag): + if check_iname_tags(tags, GroupIndexTag): + tags = get_iname_tags(tags, GroupIndexTag) + if len(tags) > 1: + raise LoopyError("cannot have more than one GroupIndexTags") + tag, = tags my_group_tags.append(tag) - elif isinstance(tag, LocalIndexTag): + elif check_iname_tags(tags, LocalIndexTag): + tags = get_iname_tags(tags, LocalIndexTag) + if len(tags) > 1: + raise LoopyError("cannot have more than one LocalIndexTags") + tag, = tags my_local_tags.append(tag) - elif isinstance(tag, ConcurrentTag): + elif check_iname_tags(tags, ConcurrentTag): raise LoopyError( "iname '%s' is tagged with '%s' - only " "group and local tags are supported for " "auto save/reload of temporaries" % - (iname, tag)) + (iname, ", ".join(str(tag) for tag in tags))) if group_tags is None: group_tags = _sortedtags(my_group_tags) @@ -501,7 +509,7 @@ class TemporarySaver(object): if promoted_temporary is None: return - new_subdomain, hw_inames, dim_inames, iname_to_tag = ( + new_subdomain, hw_inames, dim_inames, iname_to_tags = ( self.augment_domain_for_save_or_reload( self.new_subdomain, promoted_temporary, mode, subkernel)) @@ -581,7 +589,7 @@ class TemporarySaver(object): self.updated_temporary_variables[promoted_temporary.name] = ( promoted_temporary.as_kernel_temporary(self.kernel)) - self.updated_iname_to_tag.update(iname_to_tag) + self.updated_iname_to_tags.update(iname_to_tags) @memoize_method def finish(self): @@ -597,7 +605,7 @@ class TemporarySaver(object): new_instructions.extend( sorted(insns_to_insert.values(), key=lambda insn: insn.id)) - self.updated_iname_to_tag.update(self.kernel.iname_to_tag) + self.updated_iname_to_tags.update(self.kernel.iname_to_tags) self.updated_temporary_variables.update(self.kernel.temporary_variables) new_domains = list(self.kernel.domains) @@ -608,7 +616,7 @@ class TemporarySaver(object): kernel = self.kernel.copy( domains=new_domains, instructions=new_instructions, - iname_to_tag=self.updated_iname_to_tag, + iname_to_tags=self.updated_iname_to_tags, temporary_variables=self.updated_temporary_variables, overridden_get_grid_sizes_for_insn_ids=None) @@ -650,7 +658,7 @@ class TemporarySaver(object): orig_dim = domain.dim(isl.dim_type.set) # Tags for newly added inames - iname_to_tag = {} + iname_to_tags = {} from loopy.symbolic import aff_from_expr @@ -675,7 +683,7 @@ class TemporarySaver(object): # If the temporary has local scope, then loads / stores can # be done in parallel. from loopy.kernel.data import AutoFitLocalIndexTag - iname_to_tag[new_iname] = AutoFitLocalIndexTag() + iname_to_tags[new_iname] = (AutoFitLocalIndexTag(),) dim_inames.append(new_iname) @@ -705,7 +713,7 @@ class TemporarySaver(object): & aff[new_iname].lt_set(aff_from_expr(domain.space, dim))) - self.updated_iname_to_tag[new_iname] = hw_tag + self.updated_iname_to_tags[new_iname] = (hw_tag,) hw_inames.append(new_iname) # The operations on the domain above return a Set object, but the @@ -713,7 +721,7 @@ class TemporarySaver(object): domain_list = domain.get_basic_set_list() assert domain_list.n_basic_set() == 1 domain = domain_list.get_basic_set(0) - return domain, hw_inames, dim_inames, iname_to_tag + return domain, hw_inames, dim_inames, iname_to_tags # }}} -- GitLab From b87110701371928e9d680c7daa1c41c2a894cc08 Mon Sep 17 00:00:00 2001 From: tj-sun Date: Wed, 4 Apr 2018 13:29:51 +0100 Subject: [PATCH 07/20] fix bug in tagging with tuples --- loopy/codegen/control.py | 6 ++++-- loopy/kernel/__init__.py | 6 +++++- loopy/kernel/tools.py | 8 ++++---- loopy/schedule/__init__.py | 8 +++----- loopy/transform/iname.py | 7 ++++--- 5 files changed, 20 insertions(+), 15 deletions(-) diff --git a/loopy/codegen/control.py b/loopy/codegen/control.py index fb92ef750..22f187608 100644 --- a/loopy/codegen/control.py +++ b/loopy/codegen/control.py @@ -51,7 +51,7 @@ def get_admissible_conditional_inames_for(codegen_state, sched_index): for iname, tags in six.iteritems(kernel.iname_to_tags): if (check_iname_tags(tags, HardwareConcurrentTag) and codegen_state.is_generating_device_code): - if not has_barrier or not isinstance(tag, LocalIndexTag): + if not has_barrier or not check_iname_tags(tags, LocalIndexTag): result.add(iname) return frozenset(result) @@ -129,6 +129,7 @@ def generate_code_for_sched_index(codegen_state, sched_index): elif isinstance(sched_item, EnterLoop): tags = kernel.iname_to_tags[sched_item.iname] + tags = tuple(tag for tag in tags if tag) from loopy.codegen.loop import ( generate_unroll_loop, @@ -146,7 +147,8 @@ def generate_code_for_sched_index(codegen_state, sched_index): func = generate_sequential_loop_dim_code else: raise RuntimeError("encountered (invalid) EnterLoop " - "for '%s', tagged '%s'" % (sched_item.iname, tag)) + "for '%s', tagged '%s'" + % (sched_item.iname, ", ".join(str(tag) for tag in tags))) return func(codegen_state, sched_index) diff --git a/loopy/kernel/__init__.py b/loopy/kernel/__init__.py index 07f17b6c5..a69f0727d 100644 --- a/loopy/kernel/__init__.py +++ b/loopy/kernel/__init__.py @@ -713,6 +713,10 @@ class LoopKernel(ImmutableRecordWithoutPickling): for iname in cond_inames: tags = self.iname_to_tags[iname] if check_iname_tags(tags, HardwareConcurrentTag): + tags = get_iname_tags(tags, HardwareConcurrentTag) + if len(tags) > 1: + raise LoopyError("cannot have more than one HardwareConcurentTags") + tag, = tags tag_key_uses[tag.key].append(iname) multi_use_keys = set( @@ -1179,7 +1183,7 @@ class LoopKernel(ImmutableRecordWithoutPickling): lines.append("INAME IMPLEMENTATION TAGS:") for iname in natsorted(kernel.all_inames()): line = "%s: %s" % (iname, ", ".join( - tag.key for tag in kernel.iname_to_tags[iname])) + str(tag) for tag in kernel.iname_to_tags[iname])) lines.append(line) if "variables" in what and kernel.temporary_variables: diff --git a/loopy/kernel/tools.py b/loopy/kernel/tools.py index 7282542dc..505482dea 100644 --- a/loopy/kernel/tools.py +++ b/loopy/kernel/tools.py @@ -817,9 +817,9 @@ def assign_automatic_axes(kernel, axis=0, local_size=None): # }}} if axis is None: - new_tag = tuple() + new_tag = None else: - new_tag = (LocalIndexTag(axis),) + new_tag = LocalIndexTag(axis) if desired_length > local_size[axis]: from loopy import split_iname @@ -836,7 +836,7 @@ def assign_automatic_axes(kernel, axis=0, local_size=None): raise LoopyError("trying to reassign '%s'" % iname) new_iname_to_tags = kernel.iname_to_tags.copy() - new_iname_to_tags[iname] = new_tag + new_iname_to_tags[iname] = (new_tag,) return assign_automatic_axes(kernel.copy(iname_to_tags=new_iname_to_tags), axis=recursion_axis, local_size=local_size) @@ -863,7 +863,7 @@ def assign_automatic_axes(kernel, axis=0, local_size=None): assigned_local_axes = set() for iname in kernel.insn_inames(insn): - tags = get(kernel.iname_to_tags[iname], LocalIndexTag) + tags = get_iname_tags(kernel.iname_to_tags[iname], LocalIndexTag) if tags: if len(tags) > 1: raise LoopyError("cannot have more than one LocalIndexTags") diff --git a/loopy/schedule/__init__.py b/loopy/schedule/__init__.py index 1611d5364..68b3fb0cb 100644 --- a/loopy/schedule/__init__.py +++ b/loopy/schedule/__init__.py @@ -217,17 +217,15 @@ def find_loop_nest_with_map(kernel): all_nonpar_inames = set( iname for iname, tags in six.iteritems(kernel.iname_to_tags) - if not check_iname_tags(tags, + if tags and not check_iname_tags(tags, (ConcurrentTag, IlpBaseTag, VectorizeTag))) iname_to_insns = kernel.iname_to_insns() for iname in all_nonpar_inames: - result[iname] = set([ - other_iname + result[iname] = set(other_iname for insn in iname_to_insns[iname] - for other_iname in kernel.insn_inames(insn) & all_nonpar_inames - ]) + for other_iname in kernel.insn_inames(insn) & all_nonpar_inames) return result diff --git a/loopy/transform/iname.py b/loopy/transform/iname.py index 7ebba8edf..21c2f7eea 100644 --- a/loopy/transform/iname.py +++ b/loopy/transform/iname.py @@ -295,8 +295,9 @@ def _split_iname_backend(kernel, split_iname, kernel = rule_mapping_context.finish_kernel(kernel) if existing_tags: - kernel = tag_inames(kernel, - {outer_iname: existing_tags, inner_iname: existing_tags}) + for existing_tag in existing_tags: + kernel = tag_inames(kernel, + {outer_iname: existing_tag, inner_iname: existing_tag}) return tag_inames(kernel, {outer_iname: outer_tag, inner_iname: inner_tag}) @@ -696,7 +697,7 @@ def tag_inames(kernel, iname_to_tag, force=False, ignore_nonexistent=False): "(likely because of participation in a precompute or " "a reduction)" % iname) - if all(tag.key != new_tag.key for tag in old_tags): + if new_tag and all(tag.key != new_tag.key for tag in old_tags): old_tags = old_tags + (new_tag,) knl_iname_to_tags[iname] = old_tags -- GitLab From c3b942e7918e8cd993341712d7fe476bca898dbc Mon Sep 17 00:00:00 2001 From: tj-sun Date: Wed, 4 Apr 2018 15:08:50 +0100 Subject: [PATCH 08/20] change tags from tuple to set --- loopy/check.py | 37 +++++++++++++++--------------------- loopy/codegen/bounds.py | 8 ++++---- loopy/codegen/control.py | 17 +++++++++-------- loopy/codegen/loop.py | 19 +++++++++--------- loopy/kernel/__init__.py | 31 +++++++++++++----------------- loopy/kernel/data.py | 19 +++++++++++------- loopy/kernel/tools.py | 18 +++++++++--------- loopy/preprocess.py | 26 ++++++++++++------------- loopy/schedule/__init__.py | 27 +++++++++++++------------- loopy/statistics.py | 11 +++++------ loopy/target/ispc.py | 30 ++++++++++++++--------------- loopy/transform/iname.py | 33 ++++++++++++++++---------------- loopy/transform/privatize.py | 7 +++++-- loopy/transform/save.py | 20 +++++++------------ 14 files changed, 146 insertions(+), 157 deletions(-) diff --git a/loopy/check.py b/loopy/check.py index 9a9ff1fd5..bebd86fff 100644 --- a/loopy/check.py +++ b/loopy/check.py @@ -168,7 +168,7 @@ def _is_racing_iname_tag(tv, tag): def check_for_write_races(kernel): - from loopy.kernel.data import ConcurrentTag, check_iname_tags + from loopy.kernel.data import ConcurrentTag, get_iname_tags for insn in kernel.instructions: for assignee_name, assignee_indices in zip( @@ -186,8 +186,9 @@ def check_for_write_races(kernel): # will cause write races. raceable_parallel_insn_inames = set( - iname for iname in kernel.insn_inames(insn) - if check_iname_tags(kernel.iname_to_tags[iname], ConcurrentTag)) + iname for iname in kernel.insn_inames(insn) + if get_iname_tags(kernel.iname_to_tags[iname], + ConcurrentTag)) elif assignee_name in kernel.temporary_variables: temp_var = kernel.temporary_variables[assignee_name] @@ -213,7 +214,7 @@ def check_for_write_races(kernel): def check_for_orphaned_user_hardware_axes(kernel): - from loopy.kernel.data import LocalIndexTag, check_iname_tags + from loopy.kernel.data import LocalIndexTag for axis in kernel.local_sizes: found = False for tags in six.itervalues(kernel.iname_to_tags): @@ -230,12 +231,12 @@ def check_for_orphaned_user_hardware_axes(kernel): def check_for_data_dependent_parallel_bounds(kernel): - from loopy.kernel.data import ConcurrentTag, check_iname_tags + from loopy.kernel.data import ConcurrentTag, get_iname_tags for i, dom in enumerate(kernel.domains): dom_inames = set(dom.get_var_names(dim_type.set)) par_inames = set(iname for iname in dom_inames - if check_iname_tags(kernel.iname_to_tags[iname], ConcurrentTag)) + if get_iname_tags(kernel.iname_to_tags[iname], ConcurrentTag)) if not par_inames: continue @@ -651,7 +652,7 @@ def _check_for_unused_hw_axes_in_kernel_chunk(kernel, sched_index=None): # alternative: just disregard length-1 dimensions? from loopy.kernel.data import (LocalIndexTag, AutoLocalIndexTagBase, - GroupIndexTag, check_iname_tags, get_iname_tags) + GroupIndexTag, get_iname_tags) while i < loop_end_i: sched_item = kernel.schedule[i] @@ -671,19 +672,13 @@ def _check_for_unused_hw_axes_in_kernel_chunk(kernel, sched_index=None): for iname in kernel.insn_inames(insn): tags = kernel.iname_to_tags[iname] - if check_iname_tags(tags, LocalIndexTag): - tags = get_iname_tags(tags, LocalIndexTag) - if len(tags) > 1: - raise LoopyError("Can only have one LocalIndexTag") - tag, = tags + if get_iname_tags(tags, LocalIndexTag): + tag, = get_iname_tags(tags, LocalIndexTag, 1) local_axes_used.add(tag.axis) - elif check_iname_tags(tags, GroupIndexTag): - tags = get_iname_tags(tags, GroupIndexTag) - if len(tags) > 1: - raise LoopyError("Can only have one GroupIndexTag") - tag, = tags + elif get_iname_tags(tags, GroupIndexTag): + tag, = get_iname_tags(tags, GroupIndexTag, 1) group_axes_used.add(tag.axis) - elif check_iname_tags(tags, AutoLocalIndexTagBase): + elif get_iname_tags(tags, AutoLocalIndexTagBase): raise LoopyError("auto local tag encountered") if group_axes != group_axes_used: @@ -896,8 +891,6 @@ def check_implemented_domains(kernel, implemented_domains, code=None): from islpy import align_two - from loopy.kernel.data import check_iname_tags - last_idomains = None last_insn_inames = None @@ -930,11 +923,11 @@ def check_implemented_domains(kernel, implemented_domains, code=None): .project_out_except(insn_inames, [dim_type.set])) from loopy.kernel.instruction import BarrierInstruction - from loopy.kernel.data import LocalIndexTag + from loopy.kernel.data import LocalIndexTag, get_iname_tags if isinstance(insn, BarrierInstruction): # project out local-id-mapped inames, solves #94 on gitlab non_lid_inames = frozenset(iname for iname in insn_inames - if not check_iname_tags(kernel.iname_to_tags[iname], LocalIndexTag)) + if not get_iname_tags(kernel.iname_to_tags[iname], LocalIndexTag)) insn_impl_domain = insn_impl_domain.project_out_except( non_lid_inames, [dim_type.set]) diff --git a/loopy/codegen/bounds.py b/loopy/codegen/bounds.py index 886f305a4..240df24e5 100644 --- a/loopy/codegen/bounds.py +++ b/loopy/codegen/bounds.py @@ -59,7 +59,7 @@ def get_usable_inames_for_conditional(kernel, sched_index): from loopy.schedule import ( find_active_inames_at, get_insn_ids_for_block_at, has_barrier_within) from loopy.kernel.data import (ConcurrentTag, LocalIndexTagBase, - IlpBaseTag, check_iname_tags) + IlpBaseTag, get_iname_tags) result = find_active_inames_at(kernel, sched_index) crosses_barrier = has_barrier_within(kernel, sched_index) @@ -98,9 +98,9 @@ def get_usable_inames_for_conditional(kernel, sched_index): # at the innermost level of nesting. if ( - check_iname_tags(tags, ConcurrentTag) - and not (check_iname_tags(tags, LocalIndexTagBase) - and crosses_barrier) and not check_iname_tags(tags, IlpBaseTag) + get_iname_tags(tags, ConcurrentTag) + and not (get_iname_tags(tags, LocalIndexTagBase) + and crosses_barrier) and not get_iname_tags(tags, IlpBaseTag) ): result.add(iname) diff --git a/loopy/codegen/control.py b/loopy/codegen/control.py index 22f187608..41b04e172 100644 --- a/loopy/codegen/control.py +++ b/loopy/codegen/control.py @@ -41,7 +41,7 @@ def get_admissible_conditional_inames_for(codegen_state, sched_index): kernel = codegen_state.kernel from loopy.kernel.data import (LocalIndexTag, HardwareConcurrentTag, - check_iname_tags) + get_iname_tags) from loopy.schedule import find_active_inames_at, has_barrier_within result = find_active_inames_at(kernel, sched_index) @@ -49,9 +49,9 @@ def get_admissible_conditional_inames_for(codegen_state, sched_index): has_barrier = has_barrier_within(kernel, sched_index) for iname, tags in six.iteritems(kernel.iname_to_tags): - if (check_iname_tags(tags, HardwareConcurrentTag) + if (get_iname_tags(tags, HardwareConcurrentTag) and codegen_state.is_generating_device_code): - if not has_barrier or not check_iname_tags(tags, LocalIndexTag): + if not has_barrier or not get_iname_tags(tags, LocalIndexTag): result.add(iname) return frozenset(result) @@ -136,13 +136,14 @@ def generate_code_for_sched_index(codegen_state, sched_index): generate_vectorize_loop, generate_sequential_loop_dim_code) - from loopy.kernel.data import (UnrolledIlpTag, UnrollTag, ForceSequentialTag, - LoopedIlpTag, VectorizeTag, InOrderSequentialSequentialTag, check_iname_tags) - if check_iname_tags(tags, (UnrollTag, UnrolledIlpTag)): + from loopy.kernel.data import (UnrolledIlpTag, UnrollTag, + ForceSequentialTag, LoopedIlpTag, VectorizeTag, + InOrderSequentialSequentialTag, get_iname_tags) + if get_iname_tags(tags, (UnrollTag, UnrolledIlpTag)): func = generate_unroll_loop - elif check_iname_tags(tags, VectorizeTag): + elif get_iname_tags(tags, VectorizeTag): func = generate_vectorize_loop - elif len(tags) == 0 or check_iname_tags(tags, (LoopedIlpTag, + elif len(tags) == 0 or get_iname_tags(tags, (LoopedIlpTag, ForceSequentialTag, InOrderSequentialSequentialTag)): func = generate_sequential_loop_dim_code else: diff --git a/loopy/codegen/loop.py b/loopy/codegen/loop.py index 01f8a8255..0efa96f96 100644 --- a/loopy/codegen/loop.py +++ b/loopy/codegen/loop.py @@ -231,7 +231,7 @@ def set_up_hw_parallel_loops(codegen_state, schedule_index, next_func, kernel = codegen_state.kernel from loopy.kernel.data import (UniqueTag, HardwareConcurrentTag, - LocalIndexTag, GroupIndexTag, check_iname_tags) + LocalIndexTag, GroupIndexTag, get_iname_tags) from loopy.schedule import get_insn_ids_for_block_at insn_ids_for_block = get_insn_ids_for_block_at(kernel.schedule, schedule_index) @@ -242,7 +242,7 @@ def set_up_hw_parallel_loops(codegen_state, schedule_index, next_func, all_inames_by_insns |= kernel.insn_inames(insn_id) hw_inames_left = [iname for iname in all_inames_by_insns - if check_iname_tags(kernel.iname_to_tags[iname], + if get_iname_tags(kernel.iname_to_tags[iname], HardwareConcurrentTag)] if not hw_inames_left: @@ -258,12 +258,8 @@ def set_up_hw_parallel_loops(codegen_state, schedule_index, next_func, from loopy.symbolic import GroupHardwareAxisIndex, LocalHardwareAxisIndex - assert check_iname_tags(tags, UniqueTag) + tag, = get_iname_tags(tags, UniqueTag, max_num=1, min_num=1) - if len(tags) > 1: - raise LoopyError("cannot have more than one UniqueTag") - - tag, = tags if isinstance(tag, GroupIndexTag): hw_axis_expr = GroupHardwareAxisIndex(tag.axis) elif isinstance(tag, LocalIndexTag): @@ -271,11 +267,14 @@ def set_up_hw_parallel_loops(codegen_state, schedule_index, next_func, else: raise RuntimeError("unexpected hw tag type") + # TODO: get rid of None + other_inames_with_same_tag = [ other_iname for other_iname in kernel.all_inames() - if check_iname_tags(kernel.iname_to_tags[other_iname], UniqueTag) - and any(_tag.key == tag.key for _tag in kernel.iname_to_tags[other_iname]) - and other_iname != iname] + if (get_iname_tags(kernel.iname_to_tags[other_iname], UniqueTag) + and other_iname != iname + and any(_tag.key == tag.key + for _tag in kernel.iname_to_tags[other_iname]))] # {{{ 'implement' hardware axis boundaries diff --git a/loopy/kernel/__init__.py b/loopy/kernel/__init__.py index a69f0727d..872365fca 100644 --- a/loopy/kernel/__init__.py +++ b/loopy/kernel/__init__.py @@ -44,7 +44,7 @@ from loopy.library.function import ( from loopy.diagnostic import CannotBranchDomainTree, LoopyError from loopy.tools import natsorted from loopy.diagnostic import StaticValueFindingError -from loopy.kernel.data import check_iname_tags, get_iname_tags +from loopy.kernel.data import get_iname_tags # {{{ unique var names @@ -197,7 +197,7 @@ class LoopKernel(ImmutableRecordWithoutPickling): assumptions=None, local_sizes={}, temporary_variables={}, - iname_to_tags=defaultdict(tuple), + iname_to_tags=defaultdict(set), substitutions={}, function_manglers=[ default_function_mangler, @@ -711,11 +711,9 @@ class LoopKernel(ImmutableRecordWithoutPickling): from loopy.kernel.data import HardwareConcurrentTag for iname in cond_inames: - tags = self.iname_to_tags[iname] - if check_iname_tags(tags, HardwareConcurrentTag): - tags = get_iname_tags(tags, HardwareConcurrentTag) - if len(tags) > 1: - raise LoopyError("cannot have more than one HardwareConcurentTags") + tags = get_iname_tags(self.iname_to_tags[iname], + HardwareConcurrentTag, 1) + if tags: tag, = tags tag_key_uses[tag.key].append(iname) @@ -725,8 +723,10 @@ class LoopKernel(ImmutableRecordWithoutPickling): multi_use_inames = set() for iname in cond_inames: - for tag in self.iname_to_tags[iname]: - if isinstance(tag, HardwareConcurrentTag) and tag.key in multi_use_keys: + tags = get_iname_tags(self.iname_to_tags[iname], HardwareConcurrentTag) + if tags: + tag, = get_iname_tags(tags, HardwareConcurrentTag, 1) + if tag.key in multi_use_keys: multi_use_inames.add(iname) break @@ -960,22 +960,17 @@ class LoopKernel(ImmutableRecordWithoutPickling): for iname in all_inames_by_insns: tags = self.iname_to_tags[iname] - if check_iname_tags(tags, GroupIndexTag): + if get_iname_tags(tags, GroupIndexTag): tgt_dict = global_sizes - elif check_iname_tags(tags, LocalIndexTag): + elif get_iname_tags(tags, LocalIndexTag): tgt_dict = local_sizes - elif check_iname_tags(tags, AutoLocalIndexTagBase) and not ignore_auto: + elif get_iname_tags(tags, AutoLocalIndexTagBase) and not ignore_auto: raise RuntimeError("cannot find grid sizes if automatic " "local index tags are present") else: continue - tags = get_iname_tags(tags, (GroupIndexTag, LocalIndexTag)) - - if len(tags) != 1: - raise LoopyError("Multiple axis tag not allowed") - - tag, = tags + tag, = get_iname_tags(tags, (GroupIndexTag, LocalIndexTag), 1) size = self.get_iname_bounds(iname).size diff --git a/loopy/kernel/data.py b/loopy/kernel/data.py index 9b66088e5..9250c5acf 100644 --- a/loopy/kernel/data.py +++ b/loopy/kernel/data.py @@ -55,19 +55,24 @@ class auto(object): # noqa # {{{ iname tags -def check_iname_tags(tags, tag_type): - return any([isinstance(tag, tag_type) for tag in tags]) - - -def get_iname_tags(tags, tag_type): - return tuple(tag for tag in tags if isinstance(tag, tag_type)) +def get_iname_tags(tags, tag_type, max_num=None, min_num=None): + result = set(tag for tag in tags if isinstance(tag, tag_type)) + if max_num: + if len(result) > max_num: + raise LoopyError("cannot have more than {0} tags" + "of type(s): {1}".format(max_num, tag_type)) + if min_num: + if len(result) < min_num: + raise LoopyError("must have more than {0} tags" + "of type(s): {1}".format(max_num, tag_type)) + return result class IndexTag(ImmutableRecord): __slots__ = [] def __hash__(self): - raise RuntimeError("use .key to hash index tags") + return hash(self.key) def update_persistent_hash(self, key_hash, key_builder): """Custom hash computation function for use with diff --git a/loopy/kernel/tools.py b/loopy/kernel/tools.py index 505482dea..5be337537 100644 --- a/loopy/kernel/tools.py +++ b/loopy/kernel/tools.py @@ -36,7 +36,7 @@ from islpy import dim_type from loopy.diagnostic import LoopyError, warn_with_kernel from pytools import memoize_on_first_arg from loopy.tools import natsorted -from loopy.kernel.data import check_iname_tags +from loopy.kernel.data import get_iname_tags import logging logger = logging.getLogger(__name__) @@ -632,7 +632,7 @@ def is_domain_dependent_on_inames(kernel, domain_index, inames): # {{{ rank inames by stride def get_auto_axis_iname_ranking_by_stride(kernel, insn): - from loopy.kernel.data import ImageArg, ValueArg, check_iname_tags + from loopy.kernel.data import ImageArg, ValueArg, get_iname_tags approximate_arg_values = {} for arg in kernel.args: @@ -677,8 +677,8 @@ def get_auto_axis_iname_ranking_by_stride(kernel, insn): from loopy.kernel.data import AutoLocalIndexTagBase auto_axis_inames = set( - iname for iname in kernel.insn_inames(insn) - if check_iname_tags(kernel.iname_to_tags[iname], AutoLocalIndexTagBase)) + iname for iname in kernel.insn_inames(insn) + if get_iname_tags(kernel.iname_to_tags[iname], AutoLocalIndexTagBase)) # }}} @@ -754,7 +754,7 @@ def assign_automatic_axes(kernel, axis=0, local_size=None): # to set() from tuple() from loopy.kernel.data import (AutoLocalIndexTagBase, LocalIndexTag, - check_iname_tags, get_iname_tags) + get_iname_tags) # Realize that at this point in time, axis lengths are already # fixed. So we compute them once and pass them to our recursive @@ -832,7 +832,7 @@ def assign_automatic_axes(kernel, axis=0, local_size=None): do_tagged_check=False), axis=recursion_axis, local_size=local_size) - if not check_iname_tags(kernel.iname_to_tags[iname], AutoLocalIndexTagBase): + if not get_iname_tags(kernel.iname_to_tags[iname], AutoLocalIndexTagBase): raise LoopyError("trying to reassign '%s'" % iname) new_iname_to_tags = kernel.iname_to_tags.copy() @@ -855,7 +855,7 @@ def assign_automatic_axes(kernel, axis=0, local_size=None): auto_axis_inames = [ iname for iname in kernel.insn_inames(insn) - if check_iname_tags(kernel.iname_to_tags[iname], AutoLocalIndexTagBase)] + if get_iname_tags(kernel.iname_to_tags[iname], AutoLocalIndexTagBase)] if not auto_axis_inames: continue @@ -878,7 +878,7 @@ def assign_automatic_axes(kernel, axis=0, local_size=None): if iname_ranking is not None: for iname in iname_ranking: prev_tags = kernel.iname_to_tags[iname] - if check_iname_tags(prev_tags, AutoLocalIndexTagBase): + if get_iname_tags(prev_tags, AutoLocalIndexTagBase): return assign_axis(axis, iname, axis) else: @@ -1134,7 +1134,7 @@ def get_visual_iname_order_embedding(kernel): # nest. ilp_inames = frozenset(iname for iname in kernel.iname_to_tags - if check_iname_tags(kernel.iname_to_tags[iname], IlpBaseTag)) + if get_iname_tags(kernel.iname_to_tags[iname], IlpBaseTag)) iname_trie = SetTrie() diff --git a/loopy/preprocess.py b/loopy/preprocess.py index b20fbef91..38b213c05 100644 --- a/loopy/preprocess.py +++ b/loopy/preprocess.py @@ -34,7 +34,7 @@ from pytools.persistent_dict import WriteOncePersistentDict from loopy.tools import LoopyKeyBuilder from loopy.version import DATA_MODEL_VERSION -from loopy.kernel.data import make_assignment, check_iname_tags, get_iname_tags +from loopy.kernel.data import make_assignment, get_iname_tags # for the benefit of loopy.statistics, for now from loopy.type_inference import infer_unknown_types @@ -136,7 +136,7 @@ def check_reduction_iname_uniqueness(kernel): def _get_compute_inames_tagged(kernel, insn, tag_base): return set(iname for iname in kernel.insn_inames(insn.id) - if check_iname_tags(kernel.iname_to_tags[iname], tag_base)) + if get_iname_tags(kernel.iname_to_tags[iname], tag_base)) def _get_assignee_inames_tagged(kernel, insn, tag_base, tv_names): @@ -146,7 +146,7 @@ def _get_assignee_inames_tagged(kernel, insn, tag_base, tv_names): insn.assignee_subscript_deps()) for iname in adeps & kernel.all_inames() if aname in tv_names - if check_iname_tags(kernel.iname_to_tags[iname], tag_base)) + if get_iname_tags(kernel.iname_to_tags[iname], tag_base)) def find_temporary_scope(kernel): @@ -291,20 +291,20 @@ def _classify_reduction_inames(kernel, inames): from loopy.kernel.data import ( LocalIndexTagBase, UnrolledIlpTag, UnrollTag, VectorizeTag, - ConcurrentTag, check_iname_tags) + ConcurrentTag, get_iname_tags) for iname in inames: iname_tags = kernel.iname_to_tags[iname] - if check_iname_tags(iname_tags, (UnrollTag, UnrolledIlpTag)): + if get_iname_tags(iname_tags, (UnrollTag, UnrolledIlpTag)): # These are nominally parallel, but we can live with # them as sequential. sequential.append(iname) - elif check_iname_tags(iname_tags, LocalIndexTagBase): + elif get_iname_tags(iname_tags, LocalIndexTagBase): local_par.append(iname) - elif check_iname_tags(iname_tags, (ConcurrentTag, VectorizeTag)): + elif get_iname_tags(iname_tags, (ConcurrentTag, VectorizeTag)): nonlocal_par.append(iname) else: @@ -1136,9 +1136,9 @@ def realize_reduction(kernel, insn_id_filter=None, unknown_types_ok=True, outer_insn_inames = temp_kernel.insn_inames(insn) - from loopy.kernel.data import LocalIndexTagBase, check_iname_tags + from loopy.kernel.data import LocalIndexTagBase, get_iname_tags outer_local_inames = tuple(oiname for oiname in outer_insn_inames - if check_iname_tags(kernel.iname_to_tags[oiname], LocalIndexTagBase)) + if get_iname_tags(kernel.iname_to_tags[oiname], LocalIndexTagBase)) from pymbolic import var outer_local_iname_vars = tuple( @@ -1471,9 +1471,9 @@ def realize_reduction(kernel, insn_id_filter=None, unknown_types_ok=True, outer_insn_inames = temp_kernel.insn_inames(insn) - from loopy.kernel.data import LocalIndexTagBase, check_iname_tags + from loopy.kernel.data import LocalIndexTagBase, get_iname_tags outer_local_inames = tuple(oiname for oiname in outer_insn_inames - if check_iname_tags(kernel.iname_to_tags[oiname], LocalIndexTagBase) + if get_iname_tags(kernel.iname_to_tags[oiname], LocalIndexTagBase) and oiname != sweep_iname) from pymbolic import var @@ -1740,7 +1740,7 @@ def realize_reduction(kernel, insn_id_filter=None, unknown_types_ok=True, "by reductions is 'local'--found iname(s) '%s' " "respectively tagged '%s'" % (", ".join(bad_inames), - ", ".join(tag.key for tag in kernel.iname_to_tags[iname] + ", ".join(str(kernel.iname_to_tags[iname]) for iname in bad_inames))) if n_local_par == 0 and n_sequential == 0: @@ -2150,7 +2150,7 @@ def preprocess_kernel(kernel, device=None): from loopy.kernel.data import AutoLocalIndexTagBase for iname, tags in six.iteritems(kernel.iname_to_tags): - if (check_iname_tags(tags, AutoLocalIndexTagBase) + if (get_iname_tags(tags, AutoLocalIndexTagBase) and iname in kernel.all_inames()): raise LoopyError("kernel with automatically-assigned " "local axes passed to preprocessing") diff --git a/loopy/schedule/__init__.py b/loopy/schedule/__init__.py index 68b3fb0cb..bda316bc1 100644 --- a/loopy/schedule/__init__.py +++ b/loopy/schedule/__init__.py @@ -213,12 +213,12 @@ def find_loop_nest_with_map(kernel): result = {} from loopy.kernel.data import (ConcurrentTag, IlpBaseTag, VectorizeTag, - check_iname_tags) + get_iname_tags) all_nonpar_inames = set( - iname for iname, tags in six.iteritems(kernel.iname_to_tags) - if tags and not check_iname_tags(tags, - (ConcurrentTag, IlpBaseTag, VectorizeTag))) + iname for iname in kernel.all_inames() + if not get_iname_tags(kernel.iname_to_tags[iname], + (ConcurrentTag, IlpBaseTag, VectorizeTag))) iname_to_insns = kernel.iname_to_insns() @@ -241,7 +241,7 @@ def find_loop_nest_around_map(kernel): iname_to_insns = kernel.iname_to_insns() # examine pairs of all inames--O(n**2), I know. - from loopy.kernel.data import IlpBaseTag, check_iname_tags + from loopy.kernel.data import IlpBaseTag, get_iname_tags for inner_iname in all_inames: result[inner_iname] = set() for outer_iname in all_inames: @@ -249,7 +249,7 @@ def find_loop_nest_around_map(kernel): continue tags = kernel.iname_to_tags[outer_iname] - if check_iname_tags(tags, IlpBaseTag): + if get_iname_tags(tags, IlpBaseTag): # ILP tags are special because they are parallel tags # and therefore 'in principle' nest around everything. # But they're realized by the scheduler as a loop @@ -279,10 +279,10 @@ def find_loop_insn_dep_map(kernel, loop_nest_with_map, loop_nest_around_map): result = {} from loopy.kernel.data import (ConcurrentTag, IlpBaseTag, VectorizeTag, - check_iname_tags) + get_iname_tags) for insn in kernel.instructions: for iname in kernel.insn_inames(insn): - if check_iname_tags(kernel.iname_to_tags[iname], ConcurrentTag): + if get_iname_tags(kernel.iname_to_tags[iname], ConcurrentTag): continue iname_dep = result.setdefault(iname, set()) @@ -313,7 +313,8 @@ def find_loop_insn_dep_map(kernel, loop_nest_with_map, loop_nest_around_map): continue tags = kernel.iname_to_tags[dep_insn_iname] - if check_iname_tags(tags, (ConcurrentTag, IlpBaseTag, VectorizeTag)): + if get_iname_tags(tags, + (ConcurrentTag, IlpBaseTag, VectorizeTag)): # Parallel tags don't really nest, so we'll disregard # them here. continue @@ -1878,19 +1879,19 @@ def generate_loop_schedules_inner(kernel, debug_args={}): for insn_id in sched_item_to_insn_id(item)) from loopy.kernel.data import (IlpBaseTag, ConcurrentTag, VectorizeTag, - check_iname_tags) + get_iname_tags) ilp_inames = set( iname for iname, tags in six.iteritems(kernel.iname_to_tags) - if check_iname_tags(tags, IlpBaseTag)) + if get_iname_tags(tags, IlpBaseTag)) vec_inames = set( iname for iname, tags in six.iteritems(kernel.iname_to_tags) - if check_iname_tags(tags, VectorizeTag)) + if get_iname_tags(tags, VectorizeTag)) parallel_inames = set( iname for iname, tags in six.iteritems(kernel.iname_to_tags) - if check_iname_tags(tags, ConcurrentTag)) + if get_iname_tags(tags, ConcurrentTag)) loop_nest_with_map = find_loop_nest_with_map(kernel) loop_nest_around_map = find_loop_nest_around_map(kernel) diff --git a/loopy/statistics.py b/loopy/statistics.py index 77c638128..4b0643873 100755 --- a/loopy/statistics.py +++ b/loopy/statistics.py @@ -1182,12 +1182,11 @@ def get_unused_hw_axes_factor(knl, insn, disregard_local_axes, space=None): l_used = set() from loopy.kernel.data import (LocalIndexTag, GroupIndexTag, - get_iname_tags, check_iname_tags) + get_iname_tags) for iname in knl.insn_inames(insn): - tags = get_iname_tags(knl.iname_to_tags[iname], (LocalIndexTag, GroupIndexTag)) + tags = get_iname_tags(knl.iname_to_tags[iname], + (LocalIndexTag, GroupIndexTag), 1) if tags: - if len(tags) > 1: - raise LoopyError("cannot have more than one UniqueTags") tag, = tags if isinstance(tag, LocalIndexTag): l_used.add(tag.axis) @@ -1221,9 +1220,9 @@ def count_insn_runs(knl, insn, count_redundant_work, disregard_local_axes=False) insn_inames = knl.insn_inames(insn) if disregard_local_axes: - from loopy.kernel.data import LocalIndexTag, check_iname_tags + from loopy.kernel.data import LocalIndexTag, get_iname_tags insn_inames = [iname for iname in insn_inames if not - check_iname_tags(kernel.iname_to_tags[iname], LocalIndexTag)] + get_iname_tags(knl.iname_to_tags[iname], LocalIndexTag)] inames_domain = knl.get_inames_domain(insn_inames) domain = (inames_domain.project_out_except( diff --git a/loopy/target/ispc.py b/loopy/target/ispc.py index 3fd2bc1ae..b7edc517b 100644 --- a/loopy/target/ispc.py +++ b/loopy/target/ispc.py @@ -418,33 +418,33 @@ class ISPCASTBuilder(CASTBuilder): new_terms = [] - from loopy.kernel.data import LocalIndexTag, check_iname_tags, get_iname_tags + from loopy.kernel.data import LocalIndexTag, get_iname_tags from loopy.symbolic import get_dependencies saw_l0 = False for term in terms: if (isinstance(term, Variable) - and check_iname_tags(kernel.iname_to_tags[term.name], LocalIndexTag)): - tags = get_iname_tags(kernel.iname_to_tags[term.name], LocalIndexTag) - if len(tags) > 1: - raise LoopyError("cannot have more than one LocalIndexTags") - tag, = tags + and get_iname_tags( + kernel.iname_to_tags[term.name], LocalIndexTag)): + tag, = get_iname_tags(kernel.iname_to_tags[term.name], + LocalIndexTag, 1) if tag.axis == 0: if saw_l0: - raise LoopyError("streaming store must have stride 1 " - "in local index, got: %s" % subscript) + raise LoopyError( + "streaming store must have stride 1 in " + "local index, got: %s" % subscript) saw_l0 = True continue else: for dep in get_dependencies(term): - if check_iname_tags(kernel.iname_to_tags[dep], LocalIndexTag): - tags = get_iname_tags(kernel.iname_to_tags[dep], LocalIndexTag) - if len(tags) > 1: - raise LoopyError("cannot have more than one LocalIndexTags") - tag, = tags + if get_iname_tags( + kernel.iname_to_tags[dep], LocalIndexTag): + tag, = get_iname_tags(kernel.iname_to_tags[dep], + LocalIndexTag, 1) if tag.axis == 0: - raise LoopyError("streaming store must have stride 1 " - "in local index, got: %s" % subscript) + raise LoopyError( + "streaming store must have stride 1 in " + "local index, got: %s" % subscript) new_terms.append(term) diff --git a/loopy/transform/iname.py b/loopy/transform/iname.py index 21c2f7eea..365f2db77 100644 --- a/loopy/transform/iname.py +++ b/loopy/transform/iname.py @@ -33,7 +33,6 @@ from loopy.symbolic import ( RuleAwareIdentityMapper, RuleAwareSubstitutionMapper, SubstitutionRuleMappingContext) from loopy.diagnostic import LoopyError -from loopy.kernel.data import check_iname_tags, get_iname_tags __doc__ = """ @@ -177,9 +176,9 @@ def _split_iname_backend(kernel, split_iname, """ existing_tags = kernel.iname_to_tags[split_iname] - from loopy.kernel.data import ForceSequentialTag, check_iname_tags + from loopy.kernel.data import ForceSequentialTag, get_iname_tags if (do_tagged_check and existing_tags - and not check_iname_tags(existing_tags, ForceSequentialTag)): + and not get_iname_tags(existing_tags, ForceSequentialTag)): raise LoopyError("cannot split already tagged iname '%s'" % split_iname) if split_iname not in kernel.all_inames(): @@ -648,8 +647,8 @@ def tag_inames(kernel, iname_to_tag, force=False, ignore_nonexistent=False): iname_to_tag = [(iname, parse_tag(tag)) for iname, tag in iname_to_tag] - from loopy.kernel.data import (ConcurrentTag, AutoLocalIndexTagBase, - ForceSequentialTag) + from loopy.kernel.data import (ConcurrentTag, ForceSequentialTag, + get_iname_tags) # {{{ globbing @@ -680,27 +679,27 @@ def tag_inames(kernel, iname_to_tag, force=False, ignore_nonexistent=False): knl_iname_to_tags = kernel.iname_to_tags.copy() for iname, new_tag in six.iteritems(iname_to_tag): + if not new_tag: + continue + old_tags = kernel.iname_to_tags[iname] if iname not in kernel.all_inames(): raise ValueError("cannot tag '%s'--not known" % iname) - if isinstance(new_tag, ConcurrentTag) \ - and check_iname_tags(old_tags, ForceSequentialTag): + if (isinstance(new_tag, ConcurrentTag) + and get_iname_tags(old_tags, ForceSequentialTag)): raise ValueError("cannot tag '%s' as parallel--" "iname requires sequential execution" % iname) - if isinstance(new_tag, ForceSequentialTag) \ - and check_iname_tags(old_tags, ConcurrentTag): + if (isinstance(new_tag, ForceSequentialTag) + and get_iname_tags(old_tags, ConcurrentTag)): raise ValueError("'%s' is already tagged as parallel, " "but is now prohibited from being parallel " "(likely because of participation in a precompute or " "a reduction)" % iname) - if new_tag and all(tag.key != new_tag.key for tag in old_tags): - old_tags = old_tags + (new_tag,) - - knl_iname_to_tags[iname] = old_tags + knl_iname_to_tags[iname] = old_tags.union([new_tag]) return kernel.copy(iname_to_tags=knl_iname_to_tags) @@ -982,9 +981,9 @@ def get_iname_duplication_options(knl, use_boostable_into=False): # Get the duplication options as a tuple of iname and a set for iname, insns in _get_iname_duplication_options(insn_iname_sets): # Check whether this iname has a parallel tag and discard it if so - from loopy.kernel.data import ConcurrentTag, check_iname_tags + from loopy.kernel.data import ConcurrentTag, get_iname_tags if (iname in knl.iname_to_tags - and check_iname_tags(knl.iname_to_tags[iname], ConcurrentTag)): + and get_iname_tags(knl.iname_to_tags[iname], ConcurrentTag)): continue # If we find a duplication option and to not use boostable_into @@ -1501,7 +1500,7 @@ def find_unused_axis_tag(kernel, kind, insn_match=None): """ used_axes = set() - from loopy.kernel.data import GroupIndexTag, LocalIndexTag, check_iname_tags + from loopy.kernel.data import GroupIndexTag, LocalIndexTag, get_iname_tags if isinstance(kind, str): found = False @@ -1521,7 +1520,7 @@ def find_unused_axis_tag(kernel, kind, insn_match=None): for insn in insns: for iname in kernel.insn_inames(insn): dim_tags = kernel.iname_to_tags[iname] - if check_iname_tags(dim_tags, kind): + if get_iname_tags(dim_tags, kind): used_axes.add(kind.axis) i = 0 diff --git a/loopy/transform/privatize.py b/loopy/transform/privatize.py index d1c112eca..481078403 100644 --- a/loopy/transform/privatize.py +++ b/loopy/transform/privatize.py @@ -41,7 +41,6 @@ __doc__ = """ # {{{ privatize temporaries with iname from loopy.symbolic import IdentityMapper -from loopy.kernel.data import check_iname_tags, get_iname_tags class ExtraInameIndexInserter(IdentityMapper): @@ -85,6 +84,7 @@ def privatize_temporaries_with_inames( Example:: +<<<<<<< HEAD:loopy/transform/privatize.py <<<<<<< HEAD:loopy/transform/privatize.py for imatrix, i acc = 0 @@ -95,6 +95,9 @@ def privatize_temporaries_with_inames( ======= from loopy.kernel.data import IlpBaseTag, VectorizeTag, check_iname_tags >>>>>>> d4c1d2e... change tags from set to tuple:loopy/transform/ilp.py +======= + from loopy.kernel.data import IlpBaseTag, VectorizeTag, get_iname_tags +>>>>>>> 38a4424... change tags from tuple to set:loopy/transform/ilp.py might become:: @@ -192,7 +195,7 @@ def privatize_temporaries_with_inames( dim_tags = ["c"] * (len(shape) + len(extra_shape)) for i, iname in enumerate(inames): - if check_iname_tags(kernel.iname_to_tags[iname], VectorizeTag): + if get_iname_tags(kernel.iname_to_tags[iname], VectorizeTag): dim_tags[len(shape) + i] = "vec" new_temp_vars[tv.name] = tv.copy(shape=shape + extra_shape, diff --git a/loopy/transform/save.py b/loopy/transform/save.py index 962a83cd1..cd4c10272 100644 --- a/loopy/transform/save.py +++ b/loopy/transform/save.py @@ -403,26 +403,20 @@ class TemporarySaver(object): continue from loopy.kernel.data import (GroupIndexTag, LocalIndexTag, - ConcurrentTag, get_iname_tags, check_iname_tags) + ConcurrentTag, get_iname_tags) - if check_iname_tags(tags, GroupIndexTag): - tags = get_iname_tags(tags, GroupIndexTag) - if len(tags) > 1: - raise LoopyError("cannot have more than one GroupIndexTags") - tag, = tags + if get_iname_tags(tags, GroupIndexTag): + tag, = get_iname_tags(tags, GroupIndexTag, 1) my_group_tags.append(tag) - elif check_iname_tags(tags, LocalIndexTag): - tags = get_iname_tags(tags, LocalIndexTag) - if len(tags) > 1: - raise LoopyError("cannot have more than one LocalIndexTags") - tag, = tags + elif get_iname_tags(tags, LocalIndexTag): + tag, = get_iname_tags(tags, LocalIndexTag, 1) my_local_tags.append(tag) - elif check_iname_tags(tags, ConcurrentTag): + elif get_iname_tags(tags, ConcurrentTag): raise LoopyError( "iname '%s' is tagged with '%s' - only " "group and local tags are supported for " "auto save/reload of temporaries" % - (iname, ", ".join(str(tag) for tag in tags))) + (iname, tags)) if group_tags is None: group_tags = _sortedtags(my_group_tags) -- GitLab From a4da27723b6da14679b829d9178fde6df845ec11 Mon Sep 17 00:00:00 2001 From: tj-sun Date: Wed, 4 Apr 2018 17:17:37 +0100 Subject: [PATCH 09/20] method to untag inames --- loopy/__init__.py | 4 ++-- loopy/kernel/tools.py | 10 ++++++---- loopy/tools.py | 2 ++ loopy/transform/iname.py | 23 +++++++++++++++++++---- 4 files changed, 29 insertions(+), 10 deletions(-) diff --git a/loopy/__init__.py b/loopy/__init__.py index 6e221b24c..54c3523d5 100644 --- a/loopy/__init__.py +++ b/loopy/__init__.py @@ -68,7 +68,7 @@ from loopy.library.reduction import register_reduction_parser from loopy.version import VERSION, MOST_RECENT_LANGUAGE_VERSION from loopy.transform.iname import ( - set_loop_priority, prioritize_loops, + set_loop_priority, prioritize_loops, untag_inames, split_iname, chunk_iname, join_inames, tag_inames, duplicate_inames, rename_iname, remove_unused_inames, split_reduction_inward, split_reduction_outward, @@ -177,7 +177,7 @@ __all__ = [ # {{{ transforms - "set_loop_priority", "prioritize_loops", + "set_loop_priority", "prioritize_loops", "untag_inames", "split_iname", "chunk_iname", "join_inames", "tag_inames", "duplicate_inames", "rename_iname", "remove_unused_inames", diff --git a/loopy/kernel/tools.py b/loopy/kernel/tools.py index 5be337537..74f3cab9f 100644 --- a/loopy/kernel/tools.py +++ b/loopy/kernel/tools.py @@ -779,7 +779,7 @@ def assign_automatic_axes(kernel, axis=0, local_size=None): # Likely unbounded, automatic assignment is not # going to happen for this iname. new_iname_to_tags = kernel.iname_to_tags.copy() - new_iname_to_tags[iname] = tuple() + new_iname_to_tags[iname] = set() return assign_automatic_axes( kernel.copy(iname_to_tags=new_iname_to_tags), axis=recursion_axis) @@ -821,13 +821,15 @@ def assign_automatic_axes(kernel, axis=0, local_size=None): else: new_tag = LocalIndexTag(axis) if desired_length > local_size[axis]: - from loopy import split_iname + from loopy import split_iname, untag_inames # Don't be tempted to switch the outer tag to unroll--this may # generate tons of code on some examples. return assign_automatic_axes( - split_iname(kernel, iname, inner_length=local_size[axis], + split_iname( + lp.untag_inames(kernel, iname, AutoLocalIndexTagBase), + iname, inner_length=local_size[axis], outer_tag=None, inner_tag=new_tag, do_tagged_check=False), axis=recursion_axis, local_size=local_size) @@ -836,7 +838,7 @@ def assign_automatic_axes(kernel, axis=0, local_size=None): raise LoopyError("trying to reassign '%s'" % iname) new_iname_to_tags = kernel.iname_to_tags.copy() - new_iname_to_tags[iname] = (new_tag,) + new_iname_to_tags[iname] = set([new_tag]) return assign_automatic_axes(kernel.copy(iname_to_tags=new_iname_to_tags), axis=recursion_axis, local_size=local_size) diff --git a/loopy/tools.py b/loopy/tools.py index 288e0c3c4..15d2a859a 100644 --- a/loopy/tools.py +++ b/loopy/tools.py @@ -75,6 +75,8 @@ class LoopyKeyBuilder(KeyBuilderBase): for dict_key in sorted(six.iterkeys(key)): self.rec(key_hash, (dict_key, key[dict_key])) + update_for_defaultdict = update_for_dict + def update_for_BasicSet(self, key_hash, key): # noqa from islpy import Printer prn = Printer.to_str(key.get_ctx()) diff --git a/loopy/transform/iname.py b/loopy/transform/iname.py index 365f2db77..0b91e9f87 100644 --- a/loopy/transform/iname.py +++ b/loopy/transform/iname.py @@ -44,6 +44,8 @@ __doc__ = """ .. autofunction:: join_inames +.. autofunction:: untag_inames + .. autofunction:: tag_inames .. autofunction:: duplicate_inames @@ -293,10 +295,9 @@ def _split_iname_backend(kernel, split_iname, kernel = ins.map_kernel(kernel) kernel = rule_mapping_context.finish_kernel(kernel) - if existing_tags: - for existing_tag in existing_tags: - kernel = tag_inames(kernel, - {outer_iname: existing_tag, inner_iname: existing_tag}) + for existing_tag in existing_tags: + kernel = tag_inames(kernel, + {outer_iname: existing_tag, inner_iname: existing_tag}) return tag_inames(kernel, {outer_iname: outer_tag, inner_iname: inner_tag}) @@ -596,6 +597,20 @@ def join_inames(kernel, inames, new_iname=None, tag=None, within=None): # }}} +# {{{ untag inames + +def untag_inames(kernel, iname_to_untag, tag_type): + + knl_iname_to_tags = kernel.iname_to_tags.copy() + old_tags = knl_iname_to_tags[iname_to_untag] + old_tags = set(tag for tag in old_tags if not isinstance(tag, tag_type)) + knl_iname_to_tags[iname_to_untag] = old_tags + + return kernel.copy(iname_to_tags=knl_iname_to_tags) + +# }}} + + # {{{ tag inames def tag_inames(kernel, iname_to_tag, force=False, ignore_nonexistent=False): -- GitLab From 722c9daf9cdd7a5aa24b456de30dab21110ed3a4 Mon Sep 17 00:00:00 2001 From: tj-sun Date: Wed, 4 Apr 2018 17:44:02 +0100 Subject: [PATCH 10/20] bug in temporaries saver --- loopy/kernel/tools.py | 2 +- loopy/transform/save.py | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/loopy/kernel/tools.py b/loopy/kernel/tools.py index 74f3cab9f..599f584d4 100644 --- a/loopy/kernel/tools.py +++ b/loopy/kernel/tools.py @@ -828,7 +828,7 @@ def assign_automatic_axes(kernel, axis=0, local_size=None): return assign_automatic_axes( split_iname( - lp.untag_inames(kernel, iname, AutoLocalIndexTagBase), + untag_inames(kernel, iname, AutoLocalIndexTagBase), iname, inner_length=local_size[axis], outer_tag=None, inner_tag=new_tag, do_tagged_check=False), diff --git a/loopy/transform/save.py b/loopy/transform/save.py index cd4c10272..6e4d592a3 100644 --- a/loopy/transform/save.py +++ b/loopy/transform/save.py @@ -241,14 +241,14 @@ class TemporarySaver(object): self.insn_name_gen = kernel.get_instruction_id_generator() # These fields keep track of updates to the kernel. + from collections import defaultdict self.insns_to_insert = [] self.insns_to_update = {} self.extra_args_to_add = {} - self.updated_iname_to_tags = {} + self.updated_iname_to_tags = defaultdict(set) self.updated_temporary_variables = {} # temporary name -> save or reload insn ids - from collections import defaultdict self.temporary_to_save_ids = defaultdict(set) self.temporary_to_reload_ids = defaultdict(set) self.subkernel_to_newly_added_insn_ids = defaultdict(set) @@ -677,7 +677,7 @@ class TemporarySaver(object): # If the temporary has local scope, then loads / stores can # be done in parallel. from loopy.kernel.data import AutoFitLocalIndexTag - iname_to_tags[new_iname] = (AutoFitLocalIndexTag(),) + iname_to_tags[new_iname] = set(AutoFitLocalIndexTag()) dim_inames.append(new_iname) @@ -707,7 +707,7 @@ class TemporarySaver(object): & aff[new_iname].lt_set(aff_from_expr(domain.space, dim))) - self.updated_iname_to_tags[new_iname] = (hw_tag,) + self.updated_iname_to_tags[new_iname] = set([hw_tag]) hw_inames.append(new_iname) # The operations on the domain above return a Set object, but the -- GitLab From 16d8799274e3cd22964c8b1b231992ab7db285b8 Mon Sep 17 00:00:00 2001 From: tj-sun Date: Thu, 5 Apr 2018 10:22:38 +0100 Subject: [PATCH 11/20] allow tagging with set --- loopy/transform/iname.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/loopy/transform/iname.py b/loopy/transform/iname.py index 0b91e9f87..46cd36a0f 100644 --- a/loopy/transform/iname.py +++ b/loopy/transform/iname.py @@ -628,7 +628,14 @@ def tag_inames(kernel, iname_to_tag, force=False, ignore_nonexistent=False): """ if isinstance(iname_to_tag, dict): - iname_to_tag = list(six.iteritems(iname_to_tag)) + unpack_iname_to_tag = [] + for iname, tags in six.iteritems(iname_to_tag): + if isinstance(tags, set): + for tag in tags: + unpack_iname_to_tag.append((iname, tag)) + else: + unpack_iname_to_tag.append((iname, tags)) + iname_to_tag = unpack_iname_to_tag elif isinstance(iname_to_tag, str): def parse_kv(s): colon_index = s.find(":") -- GitLab From 41f0b8ef74dc65c5e5babe55f0433ddeed18dc1a Mon Sep 17 00:00:00 2001 From: tj-sun Date: Thu, 5 Apr 2018 13:49:18 +0100 Subject: [PATCH 12/20] small bug in removing inames with shared axes --- loopy/kernel/__init__.py | 1 - loopy/transform/save.py | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/loopy/kernel/__init__.py b/loopy/kernel/__init__.py index 872365fca..8e569e0de 100644 --- a/loopy/kernel/__init__.py +++ b/loopy/kernel/__init__.py @@ -728,7 +728,6 @@ class LoopKernel(ImmutableRecordWithoutPickling): tag, = get_iname_tags(tags, HardwareConcurrentTag, 1) if tag.key in multi_use_keys: multi_use_inames.add(iname) - break return frozenset(cond_inames - multi_use_inames) diff --git a/loopy/transform/save.py b/loopy/transform/save.py index 6e4d592a3..6e6db328e 100644 --- a/loopy/transform/save.py +++ b/loopy/transform/save.py @@ -677,7 +677,7 @@ class TemporarySaver(object): # If the temporary has local scope, then loads / stores can # be done in parallel. from loopy.kernel.data import AutoFitLocalIndexTag - iname_to_tags[new_iname] = set(AutoFitLocalIndexTag()) + iname_to_tags[new_iname] = set([AutoFitLocalIndexTag()]) dim_inames.append(new_iname) -- GitLab From 9706470b8e79f727d9398cc4615bd95a5a99d321 Mon Sep 17 00:00:00 2001 From: tj-sun Date: Thu, 5 Apr 2018 14:44:14 +0100 Subject: [PATCH 13/20] small bug in assigning automatic axis --- loopy/kernel/tools.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/loopy/kernel/tools.py b/loopy/kernel/tools.py index 599f584d4..b806b2e2b 100644 --- a/loopy/kernel/tools.py +++ b/loopy/kernel/tools.py @@ -837,8 +837,12 @@ def assign_automatic_axes(kernel, axis=0, local_size=None): if not get_iname_tags(kernel.iname_to_tags[iname], AutoLocalIndexTagBase): raise LoopyError("trying to reassign '%s'" % iname) + if new_tag: + new_tag = set([new_tag]) + else: + new_tag = set() new_iname_to_tags = kernel.iname_to_tags.copy() - new_iname_to_tags[iname] = set([new_tag]) + new_iname_to_tags[iname] = new_tag return assign_automatic_axes(kernel.copy(iname_to_tags=new_iname_to_tags), axis=recursion_axis, local_size=local_size) -- GitLab From 8df92b838e68ffb3e531b2c8a3827a86f135bf64 Mon Sep 17 00:00:00 2001 From: tj-sun Date: Thu, 5 Apr 2018 15:47:01 +0100 Subject: [PATCH 14/20] add check and docstring --- doc/tutorial.rst | 4 ++-- loopy/check.py | 14 ++++++++++++++ loopy/codegen/loop.py | 4 +--- loopy/kernel/data.py | 10 ++++++++++ loopy/transform/iname.py | 8 ++++++++ 5 files changed, 35 insertions(+), 5 deletions(-) diff --git a/doc/tutorial.rst b/doc/tutorial.rst index e08c88598..2cd221bc0 100644 --- a/doc/tutorial.rst +++ b/doc/tutorial.rst @@ -120,7 +120,7 @@ always see loopy's view of a kernel by printing it. [n] -> { [i] : 0 <= i < n } --------------------------------------------------------------------------- INAME IMPLEMENTATION TAGS: - i: None + i: --------------------------------------------------------------------------- INSTRUCTIONS: for i @@ -693,7 +693,7 @@ Iname implementation tags are also printed along with the entire kernel: ... INAME IMPLEMENTATION TAGS: i_inner: unr - i_outer: None + i_outer: --------------------------------------------------------------------------- ... diff --git a/loopy/check.py b/loopy/check.py index bebd86fff..e504e50fd 100644 --- a/loopy/check.py +++ b/loopy/check.py @@ -113,6 +113,19 @@ def check_loop_priority_inames_known(kernel): raise LoopyError("unknown iname '%s' in loop priorities" % iname) +def check_multiple_tags_allowed(kernel): + from loopy.kernel.data import (GroupIndexTag, LocalIndexTag, + get_iname_tags) + illegal_combinations = [ + (GroupIndexTag, LocalIndexTag) + ] + for iname, tags in six.iteritems(kernel.iname_to_tags): + for comb in illegal_combinations: + if len(get_iname_tags(tags, comb)) > 1: + raise LoopyError("iname {0} has illegal combination of " + "tags: {1}".format(iname, tags)) + + def check_for_double_use_of_hw_axes(kernel): from loopy.kernel.data import UniqueTag, get_iname_tags @@ -601,6 +614,7 @@ def pre_schedule_checks(kernel): check_for_double_use_of_hw_axes(kernel) check_insn_attributes(kernel) check_loop_priority_inames_known(kernel) + check_multiple_tags_allowed(kernel) check_for_inactive_iname_access(kernel) check_for_write_races(kernel) check_for_data_dependent_parallel_bounds(kernel) diff --git a/loopy/codegen/loop.py b/loopy/codegen/loop.py index 0efa96f96..7036d25e7 100644 --- a/loopy/codegen/loop.py +++ b/loopy/codegen/loop.py @@ -267,14 +267,12 @@ def set_up_hw_parallel_loops(codegen_state, schedule_index, next_func, else: raise RuntimeError("unexpected hw tag type") - # TODO: get rid of None - other_inames_with_same_tag = [ other_iname for other_iname in kernel.all_inames() if (get_iname_tags(kernel.iname_to_tags[other_iname], UniqueTag) and other_iname != iname and any(_tag.key == tag.key - for _tag in kernel.iname_to_tags[other_iname]))] + for _tag in kernel.iname_to_tags[other_iname] if _tag))] # {{{ 'implement' hardware axis boundaries diff --git a/loopy/kernel/data.py b/loopy/kernel/data.py index 9250c5acf..289d13ebd 100644 --- a/loopy/kernel/data.py +++ b/loopy/kernel/data.py @@ -56,6 +56,16 @@ class auto(object): # noqa def get_iname_tags(tags, tag_type, max_num=None, min_num=None): + """Return a subset of *tags* that matches type *tag_type*. Raises exception + if the number of tags found were greater than *max_num* or less than + *min_num*W. + + :arg tags: An iterable of tags. + :arg tag_type: a subclass of :class:`loopy.kernel.data.IndexTag`. + :arg max_num: the maximum number of tags expected to be found. + :arg min_num: the minimum number of tags expected to be found. + """ + result = set(tag for tag in tags if isinstance(tag, tag_type)) if max_num: if len(result) > max_num: diff --git a/loopy/transform/iname.py b/loopy/transform/iname.py index 46cd36a0f..87a488bb9 100644 --- a/loopy/transform/iname.py +++ b/loopy/transform/iname.py @@ -600,6 +600,14 @@ def join_inames(kernel, inames, new_iname=None, tag=None, within=None): # {{{ untag inames def untag_inames(kernel, iname_to_untag, tag_type): + """ + Remove tags on *iname_to_untag* which matches *tag_type*. + + :arg iname_to_untag: iname as string. + :arg tag_type: a subclass of :class:`loopy.kernel.data.IndexTag`. + + .. versionadded:: 2018.1 + """ knl_iname_to_tags = kernel.iname_to_tags.copy() old_tags = knl_iname_to_tags[iname_to_untag] -- GitLab From aff111a1a7a5be79cae242e0d384bc56f40ef358 Mon Sep 17 00:00:00 2001 From: tj-sun Date: Fri, 6 Apr 2018 17:52:37 +0100 Subject: [PATCH 15/20] need to be able to compare tags --- loopy/kernel/data.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/loopy/kernel/data.py b/loopy/kernel/data.py index 289d13ebd..defadd632 100644 --- a/loopy/kernel/data.py +++ b/loopy/kernel/data.py @@ -84,6 +84,9 @@ class IndexTag(ImmutableRecord): def __hash__(self): return hash(self.key) + def __lt__(self, other): + return self.__hash__() < other.__hash__() + def update_persistent_hash(self, key_hash, key_builder): """Custom hash computation function for use with :class:`pytools.persistent_dict.PersistentDict`. -- GitLab From c89855fa04fc5766eefe250d1cec68366b4eed7f Mon Sep 17 00:00:00 2001 From: tj-sun Date: Thu, 19 Apr 2018 19:01:12 +0100 Subject: [PATCH 16/20] update based on feedback on MR --- doc/tutorial.rst | 4 ++-- loopy/check.py | 37 +++++++++++++++---------------- loopy/codegen/bounds.py | 8 +++---- loopy/codegen/control.py | 14 ++++++------ loopy/codegen/loop.py | 10 ++++----- loopy/kernel/__init__.py | 42 +++++++++++++++++++++++++----------- loopy/kernel/data.py | 2 +- loopy/kernel/tools.py | 20 +++++++++-------- loopy/preprocess.py | 26 +++++++++++----------- loopy/schedule/__init__.py | 22 +++++++++---------- loopy/statistics.py | 15 +++++++------ loopy/target/ispc.py | 10 ++++----- loopy/transform/iname.py | 18 ++++++++-------- loopy/transform/privatize.py | 6 +++++- loopy/transform/save.py | 12 +++++------ 15 files changed, 137 insertions(+), 109 deletions(-) diff --git a/doc/tutorial.rst b/doc/tutorial.rst index 2cd221bc0..e08c88598 100644 --- a/doc/tutorial.rst +++ b/doc/tutorial.rst @@ -120,7 +120,7 @@ always see loopy's view of a kernel by printing it. [n] -> { [i] : 0 <= i < n } --------------------------------------------------------------------------- INAME IMPLEMENTATION TAGS: - i: + i: None --------------------------------------------------------------------------- INSTRUCTIONS: for i @@ -693,7 +693,7 @@ Iname implementation tags are also printed along with the entire kernel: ... INAME IMPLEMENTATION TAGS: i_inner: unr - i_outer: + i_outer: None --------------------------------------------------------------------------- ... diff --git a/loopy/check.py b/loopy/check.py index e504e50fd..8b94e7836 100644 --- a/loopy/check.py +++ b/loopy/check.py @@ -114,26 +114,26 @@ def check_loop_priority_inames_known(kernel): def check_multiple_tags_allowed(kernel): - from loopy.kernel.data import (GroupIndexTag, LocalIndexTag, - get_iname_tags) + from loopy.kernel.data import (GroupIndexTag, LocalIndexTag, VectorizeTag, + UnrollTag, ForceSequentialTag, filter_iname_by_type) illegal_combinations = [ - (GroupIndexTag, LocalIndexTag) + (GroupIndexTag, LocalIndexTag, VectorizeTag, UnrollTag, ForceSequentialTag) ] for iname, tags in six.iteritems(kernel.iname_to_tags): for comb in illegal_combinations: - if len(get_iname_tags(tags, comb)) > 1: + if len(filter_iname_by_type(tags, comb)) > 1: raise LoopyError("iname {0} has illegal combination of " "tags: {1}".format(iname, tags)) def check_for_double_use_of_hw_axes(kernel): - from loopy.kernel.data import UniqueTag, get_iname_tags + from loopy.kernel.data import UniqueTag, filter_iname_by_type for insn in kernel.instructions: insn_tag_keys = set() for iname in kernel.insn_inames(insn): tags = kernel.iname_to_tags[iname] - for tag in get_iname_tags(tags, UniqueTag): + for tag in filter_iname_by_type(tags, UniqueTag): key = tag.key if key in insn_tag_keys: raise LoopyError("instruction '%s' has multiple " @@ -181,7 +181,7 @@ def _is_racing_iname_tag(tv, tag): def check_for_write_races(kernel): - from loopy.kernel.data import ConcurrentTag, get_iname_tags + from loopy.kernel.data import ConcurrentTag, filter_iname_by_type for insn in kernel.instructions: for assignee_name, assignee_indices in zip( @@ -200,7 +200,7 @@ def check_for_write_races(kernel): raceable_parallel_insn_inames = set( iname for iname in kernel.insn_inames(insn) - if get_iname_tags(kernel.iname_to_tags[iname], + if filter_iname_by_type(kernel.iname_to_tags[iname], ConcurrentTag)) elif assignee_name in kernel.temporary_variables: @@ -244,12 +244,12 @@ def check_for_orphaned_user_hardware_axes(kernel): def check_for_data_dependent_parallel_bounds(kernel): - from loopy.kernel.data import ConcurrentTag, get_iname_tags + from loopy.kernel.data import ConcurrentTag, filter_iname_by_type for i, dom in enumerate(kernel.domains): dom_inames = set(dom.get_var_names(dim_type.set)) par_inames = set(iname for iname in dom_inames - if get_iname_tags(kernel.iname_to_tags[iname], ConcurrentTag)) + if filter_iname_by_type(kernel.iname_to_tags[iname], ConcurrentTag)) if not par_inames: continue @@ -666,7 +666,7 @@ def _check_for_unused_hw_axes_in_kernel_chunk(kernel, sched_index=None): # alternative: just disregard length-1 dimensions? from loopy.kernel.data import (LocalIndexTag, AutoLocalIndexTagBase, - GroupIndexTag, get_iname_tags) + GroupIndexTag, filter_iname_by_type) while i < loop_end_i: sched_item = kernel.schedule[i] @@ -686,13 +686,13 @@ def _check_for_unused_hw_axes_in_kernel_chunk(kernel, sched_index=None): for iname in kernel.insn_inames(insn): tags = kernel.iname_to_tags[iname] - if get_iname_tags(tags, LocalIndexTag): - tag, = get_iname_tags(tags, LocalIndexTag, 1) + if filter_iname_by_type(tags, LocalIndexTag): + tag, = filter_iname_by_type(tags, LocalIndexTag, 1) local_axes_used.add(tag.axis) - elif get_iname_tags(tags, GroupIndexTag): - tag, = get_iname_tags(tags, GroupIndexTag, 1) + elif filter_iname_by_type(tags, GroupIndexTag): + tag, = filter_iname_by_type(tags, GroupIndexTag, 1) group_axes_used.add(tag.axis) - elif get_iname_tags(tags, AutoLocalIndexTagBase): + elif filter_iname_by_type(tags, AutoLocalIndexTagBase): raise LoopyError("auto local tag encountered") if group_axes != group_axes_used: @@ -937,11 +937,12 @@ def check_implemented_domains(kernel, implemented_domains, code=None): .project_out_except(insn_inames, [dim_type.set])) from loopy.kernel.instruction import BarrierInstruction - from loopy.kernel.data import LocalIndexTag, get_iname_tags + from loopy.kernel.data import LocalIndexTag, filter_iname_by_type if isinstance(insn, BarrierInstruction): # project out local-id-mapped inames, solves #94 on gitlab non_lid_inames = frozenset(iname for iname in insn_inames - if not get_iname_tags(kernel.iname_to_tags[iname], LocalIndexTag)) + if not filter_iname_by_type( + kernel.iname_to_tags[iname], LocalIndexTag)) insn_impl_domain = insn_impl_domain.project_out_except( non_lid_inames, [dim_type.set]) diff --git a/loopy/codegen/bounds.py b/loopy/codegen/bounds.py index 240df24e5..284cd1c53 100644 --- a/loopy/codegen/bounds.py +++ b/loopy/codegen/bounds.py @@ -59,7 +59,7 @@ def get_usable_inames_for_conditional(kernel, sched_index): from loopy.schedule import ( find_active_inames_at, get_insn_ids_for_block_at, has_barrier_within) from loopy.kernel.data import (ConcurrentTag, LocalIndexTagBase, - IlpBaseTag, get_iname_tags) + IlpBaseTag, filter_iname_by_type) result = find_active_inames_at(kernel, sched_index) crosses_barrier = has_barrier_within(kernel, sched_index) @@ -98,9 +98,9 @@ def get_usable_inames_for_conditional(kernel, sched_index): # at the innermost level of nesting. if ( - get_iname_tags(tags, ConcurrentTag) - and not (get_iname_tags(tags, LocalIndexTagBase) - and crosses_barrier) and not get_iname_tags(tags, IlpBaseTag) + filter_iname_by_type(tags, ConcurrentTag) + and not (filter_iname_by_type(tags, LocalIndexTagBase) + and crosses_barrier) and not filter_iname_by_type(tags, IlpBaseTag) ): result.add(iname) diff --git a/loopy/codegen/control.py b/loopy/codegen/control.py index 41b04e172..b8cedd1a5 100644 --- a/loopy/codegen/control.py +++ b/loopy/codegen/control.py @@ -41,7 +41,7 @@ def get_admissible_conditional_inames_for(codegen_state, sched_index): kernel = codegen_state.kernel from loopy.kernel.data import (LocalIndexTag, HardwareConcurrentTag, - get_iname_tags) + filter_iname_by_type) from loopy.schedule import find_active_inames_at, has_barrier_within result = find_active_inames_at(kernel, sched_index) @@ -49,9 +49,9 @@ def get_admissible_conditional_inames_for(codegen_state, sched_index): has_barrier = has_barrier_within(kernel, sched_index) for iname, tags in six.iteritems(kernel.iname_to_tags): - if (get_iname_tags(tags, HardwareConcurrentTag) + if (filter_iname_by_type(tags, HardwareConcurrentTag) and codegen_state.is_generating_device_code): - if not has_barrier or not get_iname_tags(tags, LocalIndexTag): + if not has_barrier or not filter_iname_by_type(tags, LocalIndexTag): result.add(iname) return frozenset(result) @@ -138,12 +138,12 @@ def generate_code_for_sched_index(codegen_state, sched_index): from loopy.kernel.data import (UnrolledIlpTag, UnrollTag, ForceSequentialTag, LoopedIlpTag, VectorizeTag, - InOrderSequentialSequentialTag, get_iname_tags) - if get_iname_tags(tags, (UnrollTag, UnrolledIlpTag)): + InOrderSequentialSequentialTag, filter_iname_by_type) + if filter_iname_by_type(tags, (UnrollTag, UnrolledIlpTag)): func = generate_unroll_loop - elif get_iname_tags(tags, VectorizeTag): + elif filter_iname_by_type(tags, VectorizeTag): func = generate_vectorize_loop - elif len(tags) == 0 or get_iname_tags(tags, (LoopedIlpTag, + elif len(tags) == 0 or filter_iname_by_type(tags, (LoopedIlpTag, ForceSequentialTag, InOrderSequentialSequentialTag)): func = generate_sequential_loop_dim_code else: diff --git a/loopy/codegen/loop.py b/loopy/codegen/loop.py index 7036d25e7..015dc8d76 100644 --- a/loopy/codegen/loop.py +++ b/loopy/codegen/loop.py @@ -231,7 +231,7 @@ def set_up_hw_parallel_loops(codegen_state, schedule_index, next_func, kernel = codegen_state.kernel from loopy.kernel.data import (UniqueTag, HardwareConcurrentTag, - LocalIndexTag, GroupIndexTag, get_iname_tags) + LocalIndexTag, GroupIndexTag, filter_iname_by_type) from loopy.schedule import get_insn_ids_for_block_at insn_ids_for_block = get_insn_ids_for_block_at(kernel.schedule, schedule_index) @@ -242,8 +242,8 @@ def set_up_hw_parallel_loops(codegen_state, schedule_index, next_func, all_inames_by_insns |= kernel.insn_inames(insn_id) hw_inames_left = [iname for iname in all_inames_by_insns - if get_iname_tags(kernel.iname_to_tags[iname], - HardwareConcurrentTag)] + if filter_iname_by_type(kernel.iname_to_tags[iname], + HardwareConcurrentTag)] if not hw_inames_left: return next_func(codegen_state) @@ -258,7 +258,7 @@ def set_up_hw_parallel_loops(codegen_state, schedule_index, next_func, from loopy.symbolic import GroupHardwareAxisIndex, LocalHardwareAxisIndex - tag, = get_iname_tags(tags, UniqueTag, max_num=1, min_num=1) + tag, = filter_iname_by_type(tags, UniqueTag, max_num=1, min_num=1) if isinstance(tag, GroupIndexTag): hw_axis_expr = GroupHardwareAxisIndex(tag.axis) @@ -269,7 +269,7 @@ def set_up_hw_parallel_loops(codegen_state, schedule_index, next_func, other_inames_with_same_tag = [ other_iname for other_iname in kernel.all_inames() - if (get_iname_tags(kernel.iname_to_tags[other_iname], UniqueTag) + if (filter_iname_by_type(kernel.iname_to_tags[other_iname], UniqueTag) and other_iname != iname and any(_tag.key == tag.key for _tag in kernel.iname_to_tags[other_iname] if _tag))] diff --git a/loopy/kernel/__init__.py b/loopy/kernel/__init__.py index 8e569e0de..ec2cb4064 100644 --- a/loopy/kernel/__init__.py +++ b/loopy/kernel/__init__.py @@ -44,7 +44,7 @@ from loopy.library.function import ( from loopy.diagnostic import CannotBranchDomainTree, LoopyError from loopy.tools import natsorted from loopy.diagnostic import StaticValueFindingError -from loopy.kernel.data import get_iname_tags +from loopy.kernel.data import filter_iname_by_type # {{{ unique var names @@ -143,7 +143,8 @@ class LoopKernel(ImmutableRecordWithoutPickling): .. attribute:: iname_to_tags A :class:`dict` mapping inames (as strings) - to tuple of instances of :class:`loopy.kernel.data.IndexTag`. + to set of instances of :class:`loopy.kernel.data.IndexTag`. + .. versionadded:: 2018.1 .. attribute:: function_manglers .. attribute:: symbol_manglers @@ -301,6 +302,18 @@ class LoopKernel(ImmutableRecordWithoutPickling): # }}} + # {{{ compatibility wrapper for iname_to_tag.get("iname") + + @property + def iname_to_tag(self): + from warnings import warn + warn("Since version 2018.1, inames can hold multiple tags. Use " + "iname_to_tags['iname'] instead. iname_to_tag.get('iname') will be " + "deprecated at version 2019.0.", DeprecationWarning) + return dict((k, list(v)[0]) for k, v in six.iteritems(self.iname_to_tags)) + + # }}} + # {{{ function mangling def mangle_function(self, identifier, arg_dtypes, ast_builder=None): @@ -711,8 +724,8 @@ class LoopKernel(ImmutableRecordWithoutPickling): from loopy.kernel.data import HardwareConcurrentTag for iname in cond_inames: - tags = get_iname_tags(self.iname_to_tags[iname], - HardwareConcurrentTag, 1) + tags = filter_iname_by_type(self.iname_to_tags[iname], + HardwareConcurrentTag, 1) if tags: tag, = tags tag_key_uses[tag.key].append(iname) @@ -723,9 +736,10 @@ class LoopKernel(ImmutableRecordWithoutPickling): multi_use_inames = set() for iname in cond_inames: - tags = get_iname_tags(self.iname_to_tags[iname], HardwareConcurrentTag) + tags = filter_iname_by_type(self.iname_to_tags[iname], + HardwareConcurrentTag) if tags: - tag, = get_iname_tags(tags, HardwareConcurrentTag, 1) + tag, = filter_iname_by_type(tags, HardwareConcurrentTag, 1) if tag.key in multi_use_keys: multi_use_inames.add(iname) @@ -959,17 +973,18 @@ class LoopKernel(ImmutableRecordWithoutPickling): for iname in all_inames_by_insns: tags = self.iname_to_tags[iname] - if get_iname_tags(tags, GroupIndexTag): + if filter_iname_by_type(tags, GroupIndexTag): tgt_dict = global_sizes - elif get_iname_tags(tags, LocalIndexTag): + elif filter_iname_by_type(tags, LocalIndexTag): tgt_dict = local_sizes - elif get_iname_tags(tags, AutoLocalIndexTagBase) and not ignore_auto: + elif (filter_iname_by_type(tags, AutoLocalIndexTagBase) + and not ignore_auto): raise RuntimeError("cannot find grid sizes if automatic " "local index tags are present") else: continue - tag, = get_iname_tags(tags, (GroupIndexTag, LocalIndexTag), 1) + tag, = filter_iname_by_type(tags, (GroupIndexTag, LocalIndexTag), 1) size = self.get_iname_bounds(iname).size @@ -1176,8 +1191,11 @@ class LoopKernel(ImmutableRecordWithoutPickling): if show_labels: lines.append("INAME IMPLEMENTATION TAGS:") for iname in natsorted(kernel.all_inames()): - line = "%s: %s" % (iname, ", ".join( - str(tag) for tag in kernel.iname_to_tags[iname])) + if not kernel.iname_to_tags[iname]: + tags = "None" + else: + tags = ", ".join(str(tag) for tag in kernel.iname_to_tags[iname]) + line = "%s: %s" % (iname, tags) lines.append(line) if "variables" in what and kernel.temporary_variables: diff --git a/loopy/kernel/data.py b/loopy/kernel/data.py index defadd632..1abb3cc90 100644 --- a/loopy/kernel/data.py +++ b/loopy/kernel/data.py @@ -55,7 +55,7 @@ class auto(object): # noqa # {{{ iname tags -def get_iname_tags(tags, tag_type, max_num=None, min_num=None): +def filter_iname_by_type(tags, tag_type, max_num=None, min_num=None): """Return a subset of *tags* that matches type *tag_type*. Raises exception if the number of tags found were greater than *max_num* or less than *min_num*W. diff --git a/loopy/kernel/tools.py b/loopy/kernel/tools.py index b806b2e2b..65bd8070d 100644 --- a/loopy/kernel/tools.py +++ b/loopy/kernel/tools.py @@ -36,7 +36,7 @@ from islpy import dim_type from loopy.diagnostic import LoopyError, warn_with_kernel from pytools import memoize_on_first_arg from loopy.tools import natsorted -from loopy.kernel.data import get_iname_tags +from loopy.kernel.data import filter_iname_by_type import logging logger = logging.getLogger(__name__) @@ -632,7 +632,7 @@ def is_domain_dependent_on_inames(kernel, domain_index, inames): # {{{ rank inames by stride def get_auto_axis_iname_ranking_by_stride(kernel, insn): - from loopy.kernel.data import ImageArg, ValueArg, get_iname_tags + from loopy.kernel.data import ImageArg, ValueArg, filter_iname_by_type approximate_arg_values = {} for arg in kernel.args: @@ -678,7 +678,7 @@ def get_auto_axis_iname_ranking_by_stride(kernel, insn): from loopy.kernel.data import AutoLocalIndexTagBase auto_axis_inames = set( iname for iname in kernel.insn_inames(insn) - if get_iname_tags(kernel.iname_to_tags[iname], AutoLocalIndexTagBase)) + if filter_iname_by_type(kernel.iname_to_tags[iname], AutoLocalIndexTagBase)) # }}} @@ -754,7 +754,7 @@ def assign_automatic_axes(kernel, axis=0, local_size=None): # to set() from tuple() from loopy.kernel.data import (AutoLocalIndexTagBase, LocalIndexTag, - get_iname_tags) + filter_iname_by_type) # Realize that at this point in time, axis lengths are already # fixed. So we compute them once and pass them to our recursive @@ -834,7 +834,8 @@ def assign_automatic_axes(kernel, axis=0, local_size=None): do_tagged_check=False), axis=recursion_axis, local_size=local_size) - if not get_iname_tags(kernel.iname_to_tags[iname], AutoLocalIndexTagBase): + if not filter_iname_by_type(kernel.iname_to_tags[iname], + AutoLocalIndexTagBase): raise LoopyError("trying to reassign '%s'" % iname) if new_tag: @@ -861,7 +862,8 @@ def assign_automatic_axes(kernel, axis=0, local_size=None): auto_axis_inames = [ iname for iname in kernel.insn_inames(insn) - if get_iname_tags(kernel.iname_to_tags[iname], AutoLocalIndexTagBase)] + if filter_iname_by_type(kernel.iname_to_tags[iname], + AutoLocalIndexTagBase)] if not auto_axis_inames: continue @@ -869,7 +871,7 @@ def assign_automatic_axes(kernel, axis=0, local_size=None): assigned_local_axes = set() for iname in kernel.insn_inames(insn): - tags = get_iname_tags(kernel.iname_to_tags[iname], LocalIndexTag) + tags = filter_iname_by_type(kernel.iname_to_tags[iname], LocalIndexTag) if tags: if len(tags) > 1: raise LoopyError("cannot have more than one LocalIndexTags") @@ -884,7 +886,7 @@ def assign_automatic_axes(kernel, axis=0, local_size=None): if iname_ranking is not None: for iname in iname_ranking: prev_tags = kernel.iname_to_tags[iname] - if get_iname_tags(prev_tags, AutoLocalIndexTagBase): + if filter_iname_by_type(prev_tags, AutoLocalIndexTagBase): return assign_axis(axis, iname, axis) else: @@ -1140,7 +1142,7 @@ def get_visual_iname_order_embedding(kernel): # nest. ilp_inames = frozenset(iname for iname in kernel.iname_to_tags - if get_iname_tags(kernel.iname_to_tags[iname], IlpBaseTag)) + if filter_iname_by_type(kernel.iname_to_tags[iname], IlpBaseTag)) iname_trie = SetTrie() diff --git a/loopy/preprocess.py b/loopy/preprocess.py index 38b213c05..ffa43bdb4 100644 --- a/loopy/preprocess.py +++ b/loopy/preprocess.py @@ -34,7 +34,7 @@ from pytools.persistent_dict import WriteOncePersistentDict from loopy.tools import LoopyKeyBuilder from loopy.version import DATA_MODEL_VERSION -from loopy.kernel.data import make_assignment, get_iname_tags +from loopy.kernel.data import make_assignment, filter_iname_by_type # for the benefit of loopy.statistics, for now from loopy.type_inference import infer_unknown_types @@ -136,7 +136,7 @@ def check_reduction_iname_uniqueness(kernel): def _get_compute_inames_tagged(kernel, insn, tag_base): return set(iname for iname in kernel.insn_inames(insn.id) - if get_iname_tags(kernel.iname_to_tags[iname], tag_base)) + if filter_iname_by_type(kernel.iname_to_tags[iname], tag_base)) def _get_assignee_inames_tagged(kernel, insn, tag_base, tv_names): @@ -146,7 +146,7 @@ def _get_assignee_inames_tagged(kernel, insn, tag_base, tv_names): insn.assignee_subscript_deps()) for iname in adeps & kernel.all_inames() if aname in tv_names - if get_iname_tags(kernel.iname_to_tags[iname], tag_base)) + if filter_iname_by_type(kernel.iname_to_tags[iname], tag_base)) def find_temporary_scope(kernel): @@ -291,20 +291,20 @@ def _classify_reduction_inames(kernel, inames): from loopy.kernel.data import ( LocalIndexTagBase, UnrolledIlpTag, UnrollTag, VectorizeTag, - ConcurrentTag, get_iname_tags) + ConcurrentTag, filter_iname_by_type) for iname in inames: iname_tags = kernel.iname_to_tags[iname] - if get_iname_tags(iname_tags, (UnrollTag, UnrolledIlpTag)): + if filter_iname_by_type(iname_tags, (UnrollTag, UnrolledIlpTag)): # These are nominally parallel, but we can live with # them as sequential. sequential.append(iname) - elif get_iname_tags(iname_tags, LocalIndexTagBase): + elif filter_iname_by_type(iname_tags, LocalIndexTagBase): local_par.append(iname) - elif get_iname_tags(iname_tags, (ConcurrentTag, VectorizeTag)): + elif filter_iname_by_type(iname_tags, (ConcurrentTag, VectorizeTag)): nonlocal_par.append(iname) else: @@ -1136,9 +1136,10 @@ def realize_reduction(kernel, insn_id_filter=None, unknown_types_ok=True, outer_insn_inames = temp_kernel.insn_inames(insn) - from loopy.kernel.data import LocalIndexTagBase, get_iname_tags + from loopy.kernel.data import LocalIndexTagBase, filter_iname_by_type outer_local_inames = tuple(oiname for oiname in outer_insn_inames - if get_iname_tags(kernel.iname_to_tags[oiname], LocalIndexTagBase)) + if filter_iname_by_type( + kernel.iname_to_tags[oiname], LocalIndexTagBase)) from pymbolic import var outer_local_iname_vars = tuple( @@ -1471,9 +1472,10 @@ def realize_reduction(kernel, insn_id_filter=None, unknown_types_ok=True, outer_insn_inames = temp_kernel.insn_inames(insn) - from loopy.kernel.data import LocalIndexTagBase, get_iname_tags + from loopy.kernel.data import LocalIndexTagBase, filter_iname_by_type outer_local_inames = tuple(oiname for oiname in outer_insn_inames - if get_iname_tags(kernel.iname_to_tags[oiname], LocalIndexTagBase) + if filter_iname_by_type(kernel.iname_to_tags[oiname], + LocalIndexTagBase) and oiname != sweep_iname) from pymbolic import var @@ -2150,7 +2152,7 @@ def preprocess_kernel(kernel, device=None): from loopy.kernel.data import AutoLocalIndexTagBase for iname, tags in six.iteritems(kernel.iname_to_tags): - if (get_iname_tags(tags, AutoLocalIndexTagBase) + if (filter_iname_by_type(tags, AutoLocalIndexTagBase) and iname in kernel.all_inames()): raise LoopyError("kernel with automatically-assigned " "local axes passed to preprocessing") diff --git a/loopy/schedule/__init__.py b/loopy/schedule/__init__.py index bda316bc1..fd3cbbe92 100644 --- a/loopy/schedule/__init__.py +++ b/loopy/schedule/__init__.py @@ -213,11 +213,11 @@ def find_loop_nest_with_map(kernel): result = {} from loopy.kernel.data import (ConcurrentTag, IlpBaseTag, VectorizeTag, - get_iname_tags) + filter_iname_by_type) all_nonpar_inames = set( iname for iname in kernel.all_inames() - if not get_iname_tags(kernel.iname_to_tags[iname], + if not filter_iname_by_type(kernel.iname_to_tags[iname], (ConcurrentTag, IlpBaseTag, VectorizeTag))) iname_to_insns = kernel.iname_to_insns() @@ -241,7 +241,7 @@ def find_loop_nest_around_map(kernel): iname_to_insns = kernel.iname_to_insns() # examine pairs of all inames--O(n**2), I know. - from loopy.kernel.data import IlpBaseTag, get_iname_tags + from loopy.kernel.data import IlpBaseTag, filter_iname_by_type for inner_iname in all_inames: result[inner_iname] = set() for outer_iname in all_inames: @@ -249,7 +249,7 @@ def find_loop_nest_around_map(kernel): continue tags = kernel.iname_to_tags[outer_iname] - if get_iname_tags(tags, IlpBaseTag): + if filter_iname_by_type(tags, IlpBaseTag): # ILP tags are special because they are parallel tags # and therefore 'in principle' nest around everything. # But they're realized by the scheduler as a loop @@ -279,10 +279,10 @@ def find_loop_insn_dep_map(kernel, loop_nest_with_map, loop_nest_around_map): result = {} from loopy.kernel.data import (ConcurrentTag, IlpBaseTag, VectorizeTag, - get_iname_tags) + filter_iname_by_type) for insn in kernel.instructions: for iname in kernel.insn_inames(insn): - if get_iname_tags(kernel.iname_to_tags[iname], ConcurrentTag): + if filter_iname_by_type(kernel.iname_to_tags[iname], ConcurrentTag): continue iname_dep = result.setdefault(iname, set()) @@ -313,7 +313,7 @@ def find_loop_insn_dep_map(kernel, loop_nest_with_map, loop_nest_around_map): continue tags = kernel.iname_to_tags[dep_insn_iname] - if get_iname_tags(tags, + if filter_iname_by_type(tags, (ConcurrentTag, IlpBaseTag, VectorizeTag)): # Parallel tags don't really nest, so we'll disregard # them here. @@ -1879,19 +1879,19 @@ def generate_loop_schedules_inner(kernel, debug_args={}): for insn_id in sched_item_to_insn_id(item)) from loopy.kernel.data import (IlpBaseTag, ConcurrentTag, VectorizeTag, - get_iname_tags) + filter_iname_by_type) ilp_inames = set( iname for iname, tags in six.iteritems(kernel.iname_to_tags) - if get_iname_tags(tags, IlpBaseTag)) + if filter_iname_by_type(tags, IlpBaseTag)) vec_inames = set( iname for iname, tags in six.iteritems(kernel.iname_to_tags) - if get_iname_tags(tags, VectorizeTag)) + if filter_iname_by_type(tags, VectorizeTag)) parallel_inames = set( iname for iname, tags in six.iteritems(kernel.iname_to_tags) - if get_iname_tags(tags, ConcurrentTag)) + if filter_iname_by_type(tags, ConcurrentTag)) loop_nest_with_map = find_loop_nest_with_map(kernel) loop_nest_around_map = find_loop_nest_around_map(kernel) diff --git a/loopy/statistics.py b/loopy/statistics.py index 4b0643873..d8805069b 100755 --- a/loopy/statistics.py +++ b/loopy/statistics.py @@ -917,7 +917,8 @@ class GlobalMemAccessCounter(MemAccessCounter): index = (index,) from loopy.symbolic import get_dependencies - from loopy.kernel.data import LocalIndexTag, GroupIndexTag, get_iname_tags + from loopy.kernel.data import (LocalIndexTag, GroupIndexTag, + filter_iname_by_type) my_inames = get_dependencies(index) & self.knl.all_inames() @@ -925,10 +926,10 @@ class GlobalMemAccessCounter(MemAccessCounter): lid_to_iname = {} gid_to_iname = {} for iname in my_inames: - tags = get_iname_tags(self.knl.iname_to_tags[iname], + tags = filter_iname_by_type(self.knl.iname_to_tags[iname], (GroupIndexTag, LocalIndexTag)) if tags: - tag, = get_iname_tags(tags, (GroupIndexTag, LocalIndexTag), 1) + tag, = filter_iname_by_type(tags, (GroupIndexTag, LocalIndexTag), 1) if isinstance(tag, LocalIndexTag): lid_to_iname[tag.axis] = iname else: @@ -1182,9 +1183,9 @@ def get_unused_hw_axes_factor(knl, insn, disregard_local_axes, space=None): l_used = set() from loopy.kernel.data import (LocalIndexTag, GroupIndexTag, - get_iname_tags) + filter_iname_by_type) for iname in knl.insn_inames(insn): - tags = get_iname_tags(knl.iname_to_tags[iname], + tags = filter_iname_by_type(knl.iname_to_tags[iname], (LocalIndexTag, GroupIndexTag), 1) if tags: tag, = tags @@ -1220,9 +1221,9 @@ def count_insn_runs(knl, insn, count_redundant_work, disregard_local_axes=False) insn_inames = knl.insn_inames(insn) if disregard_local_axes: - from loopy.kernel.data import LocalIndexTag, get_iname_tags + from loopy.kernel.data import LocalIndexTag, filter_iname_by_type insn_inames = [iname for iname in insn_inames if not - get_iname_tags(knl.iname_to_tags[iname], LocalIndexTag)] + filter_iname_by_type(knl.iname_to_tags[iname], LocalIndexTag)] inames_domain = knl.get_inames_domain(insn_inames) domain = (inames_domain.project_out_except( diff --git a/loopy/target/ispc.py b/loopy/target/ispc.py index b7edc517b..70befdfb4 100644 --- a/loopy/target/ispc.py +++ b/loopy/target/ispc.py @@ -418,15 +418,15 @@ class ISPCASTBuilder(CASTBuilder): new_terms = [] - from loopy.kernel.data import LocalIndexTag, get_iname_tags + from loopy.kernel.data import LocalIndexTag, filter_iname_by_type from loopy.symbolic import get_dependencies saw_l0 = False for term in terms: if (isinstance(term, Variable) - and get_iname_tags( + and filter_iname_by_type( kernel.iname_to_tags[term.name], LocalIndexTag)): - tag, = get_iname_tags(kernel.iname_to_tags[term.name], + tag, = filter_iname_by_type(kernel.iname_to_tags[term.name], LocalIndexTag, 1) if tag.axis == 0: if saw_l0: @@ -437,9 +437,9 @@ class ISPCASTBuilder(CASTBuilder): continue else: for dep in get_dependencies(term): - if get_iname_tags( + if filter_iname_by_type( kernel.iname_to_tags[dep], LocalIndexTag): - tag, = get_iname_tags(kernel.iname_to_tags[dep], + tag, = filter_iname_by_type(kernel.iname_to_tags[dep], LocalIndexTag, 1) if tag.axis == 0: raise LoopyError( diff --git a/loopy/transform/iname.py b/loopy/transform/iname.py index 87a488bb9..505e918d3 100644 --- a/loopy/transform/iname.py +++ b/loopy/transform/iname.py @@ -178,9 +178,9 @@ def _split_iname_backend(kernel, split_iname, """ existing_tags = kernel.iname_to_tags[split_iname] - from loopy.kernel.data import ForceSequentialTag, get_iname_tags + from loopy.kernel.data import ForceSequentialTag, filter_iname_by_type if (do_tagged_check and existing_tags - and not get_iname_tags(existing_tags, ForceSequentialTag)): + and not filter_iname_by_type(existing_tags, ForceSequentialTag)): raise LoopyError("cannot split already tagged iname '%s'" % split_iname) if split_iname not in kernel.all_inames(): @@ -678,7 +678,7 @@ def tag_inames(kernel, iname_to_tag, force=False, ignore_nonexistent=False): iname_to_tag = [(iname, parse_tag(tag)) for iname, tag in iname_to_tag] from loopy.kernel.data import (ConcurrentTag, ForceSequentialTag, - get_iname_tags) + filter_iname_by_type) # {{{ globbing @@ -718,12 +718,12 @@ def tag_inames(kernel, iname_to_tag, force=False, ignore_nonexistent=False): raise ValueError("cannot tag '%s'--not known" % iname) if (isinstance(new_tag, ConcurrentTag) - and get_iname_tags(old_tags, ForceSequentialTag)): + and filter_iname_by_type(old_tags, ForceSequentialTag)): raise ValueError("cannot tag '%s' as parallel--" "iname requires sequential execution" % iname) if (isinstance(new_tag, ForceSequentialTag) - and get_iname_tags(old_tags, ConcurrentTag)): + and filter_iname_by_type(old_tags, ConcurrentTag)): raise ValueError("'%s' is already tagged as parallel, " "but is now prohibited from being parallel " "(likely because of participation in a precompute or " @@ -1011,9 +1011,9 @@ def get_iname_duplication_options(knl, use_boostable_into=False): # Get the duplication options as a tuple of iname and a set for iname, insns in _get_iname_duplication_options(insn_iname_sets): # Check whether this iname has a parallel tag and discard it if so - from loopy.kernel.data import ConcurrentTag, get_iname_tags + from loopy.kernel.data import ConcurrentTag, filter_iname_by_type if (iname in knl.iname_to_tags - and get_iname_tags(knl.iname_to_tags[iname], ConcurrentTag)): + and filter_iname_by_type(knl.iname_to_tags[iname], ConcurrentTag)): continue # If we find a duplication option and to not use boostable_into @@ -1530,7 +1530,7 @@ def find_unused_axis_tag(kernel, kind, insn_match=None): """ used_axes = set() - from loopy.kernel.data import GroupIndexTag, LocalIndexTag, get_iname_tags + from loopy.kernel.data import GroupIndexTag, LocalIndexTag, filter_iname_by_type if isinstance(kind, str): found = False @@ -1550,7 +1550,7 @@ def find_unused_axis_tag(kernel, kind, insn_match=None): for insn in insns: for iname in kernel.insn_inames(insn): dim_tags = kernel.iname_to_tags[iname] - if get_iname_tags(dim_tags, kind): + if filter_iname_by_type(dim_tags, kind): used_axes.add(kind.axis) i = 0 diff --git a/loopy/transform/privatize.py b/loopy/transform/privatize.py index 481078403..b3d3878e0 100644 --- a/loopy/transform/privatize.py +++ b/loopy/transform/privatize.py @@ -101,6 +101,7 @@ def privatize_temporaries_with_inames( might become:: +<<<<<<< HEAD:loopy/transform/privatize.py for imatrix, i acc[imatrix] = 0 for k @@ -123,6 +124,9 @@ def privatize_temporaries_with_inames( for s in only_var_names.split(",")) wmap = kernel.writer_map() +======= + from loopy.kernel.data import IlpBaseTag, VectorizeTag, filter_iname_by_type +>>>>>>> dcbda4e... update based on feedback on MR:loopy/transform/ilp.py var_to_new_priv_axis_iname = {} @@ -195,7 +199,7 @@ def privatize_temporaries_with_inames( dim_tags = ["c"] * (len(shape) + len(extra_shape)) for i, iname in enumerate(inames): - if get_iname_tags(kernel.iname_to_tags[iname], VectorizeTag): + if filter_iname_by_type(kernel.iname_to_tags[iname], VectorizeTag): dim_tags[len(shape) + i] = "vec" new_temp_vars[tv.name] = tv.copy(shape=shape + extra_shape, diff --git a/loopy/transform/save.py b/loopy/transform/save.py index 6e6db328e..75ac16ae9 100644 --- a/loopy/transform/save.py +++ b/loopy/transform/save.py @@ -403,15 +403,15 @@ class TemporarySaver(object): continue from loopy.kernel.data import (GroupIndexTag, LocalIndexTag, - ConcurrentTag, get_iname_tags) + ConcurrentTag, filter_iname_by_type) - if get_iname_tags(tags, GroupIndexTag): - tag, = get_iname_tags(tags, GroupIndexTag, 1) + if filter_iname_by_type(tags, GroupIndexTag): + tag, = filter_iname_by_type(tags, GroupIndexTag, 1) my_group_tags.append(tag) - elif get_iname_tags(tags, LocalIndexTag): - tag, = get_iname_tags(tags, LocalIndexTag, 1) + elif filter_iname_by_type(tags, LocalIndexTag): + tag, = filter_iname_by_type(tags, LocalIndexTag, 1) my_local_tags.append(tag) - elif get_iname_tags(tags, ConcurrentTag): + elif filter_iname_by_type(tags, ConcurrentTag): raise LoopyError( "iname '%s' is tagged with '%s' - only " "group and local tags are supported for " -- GitLab From 798cb8e6040042467c61aeffdac6120284debeb2 Mon Sep 17 00:00:00 2001 From: tj-sun Date: Fri, 20 Apr 2018 13:18:32 +0100 Subject: [PATCH 17/20] rebase to master --- loopy/kernel/__init__.py | 3 ++- loopy/transform/iname.py | 7 +++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/loopy/kernel/__init__.py b/loopy/kernel/__init__.py index ec2cb4064..6e48e1d24 100644 --- a/loopy/kernel/__init__.py +++ b/loopy/kernel/__init__.py @@ -310,7 +310,8 @@ class LoopKernel(ImmutableRecordWithoutPickling): warn("Since version 2018.1, inames can hold multiple tags. Use " "iname_to_tags['iname'] instead. iname_to_tag.get('iname') will be " "deprecated at version 2019.0.", DeprecationWarning) - return dict((k, list(v)[0]) for k, v in six.iteritems(self.iname_to_tags)) + return dict((k, list(v)[0]) + for k, v in six.iteritems(self.iname_to_tags) if v) # }}} diff --git a/loopy/transform/iname.py b/loopy/transform/iname.py index 505e918d3..e41ff1f44 100644 --- a/loopy/transform/iname.py +++ b/loopy/transform/iname.py @@ -982,12 +982,12 @@ def get_iname_duplication_options(knl, use_boostable_into=False): Use :func:`has_schedulable_iname_nesting` to decide whether an iname needs to be duplicated in a given kernel. """ - from loopy.kernel.data import ConcurrentTag + from loopy.kernel.data import ConcurrentTag, filter_iname_by_type concurrent_inames = set( iname - for iname in knl.all_inames() - if isinstance(knl.iname_to_tag.get(iname), ConcurrentTag)) + for iname in knl.all_inames() if filter_iname_by_type( + knl.iname_to_tags[iname], ConcurrentTag)) # First we extract the minimal necessary information from the kernel if use_boostable_into: @@ -1011,7 +1011,6 @@ def get_iname_duplication_options(knl, use_boostable_into=False): # Get the duplication options as a tuple of iname and a set for iname, insns in _get_iname_duplication_options(insn_iname_sets): # Check whether this iname has a parallel tag and discard it if so - from loopy.kernel.data import ConcurrentTag, filter_iname_by_type if (iname in knl.iname_to_tags and filter_iname_by_type(knl.iname_to_tags[iname], ConcurrentTag)): continue -- GitLab From 234c340995d0bc2638419b07b49adfd960a9e722 Mon Sep 17 00:00:00 2001 From: tj-sun Date: Wed, 25 Apr 2018 12:00:55 +0100 Subject: [PATCH 18/20] rebase to master --- loopy/check.py | 5 +++-- loopy/kernel/__init__.py | 2 +- loopy/transform/privatize.py | 16 ++-------------- 3 files changed, 6 insertions(+), 17 deletions(-) diff --git a/loopy/check.py b/loopy/check.py index 8b94e7836..615b3751f 100644 --- a/loopy/check.py +++ b/loopy/check.py @@ -115,9 +115,10 @@ def check_loop_priority_inames_known(kernel): def check_multiple_tags_allowed(kernel): from loopy.kernel.data import (GroupIndexTag, LocalIndexTag, VectorizeTag, - UnrollTag, ForceSequentialTag, filter_iname_by_type) + UnrollTag, ForceSequentialTag, IlpBaseTag, filter_iname_by_type) illegal_combinations = [ - (GroupIndexTag, LocalIndexTag, VectorizeTag, UnrollTag, ForceSequentialTag) + (GroupIndexTag, LocalIndexTag, VectorizeTag, UnrollTag, ForceSequentialTag), + (IlpBaseTag, ForceSequentialTag) ] for iname, tags in six.iteritems(kernel.iname_to_tags): for comb in illegal_combinations: diff --git a/loopy/kernel/__init__.py b/loopy/kernel/__init__.py index 6e48e1d24..906d3a3a3 100644 --- a/loopy/kernel/__init__.py +++ b/loopy/kernel/__init__.py @@ -310,7 +310,7 @@ class LoopKernel(ImmutableRecordWithoutPickling): warn("Since version 2018.1, inames can hold multiple tags. Use " "iname_to_tags['iname'] instead. iname_to_tag.get('iname') will be " "deprecated at version 2019.0.", DeprecationWarning) - return dict((k, list(v)[0]) + return dict((k, next(iter(v))) for k, v in six.iteritems(self.iname_to_tags) if v) # }}} diff --git a/loopy/transform/privatize.py b/loopy/transform/privatize.py index b3d3878e0..bef421006 100644 --- a/loopy/transform/privatize.py +++ b/loopy/transform/privatize.py @@ -84,24 +84,15 @@ def privatize_temporaries_with_inames( Example:: -<<<<<<< HEAD:loopy/transform/privatize.py -<<<<<<< HEAD:loopy/transform/privatize.py for imatrix, i acc = 0 for k acc = acc + a[imatrix, i, k] * vec[k] end end -======= - from loopy.kernel.data import IlpBaseTag, VectorizeTag, check_iname_tags ->>>>>>> d4c1d2e... change tags from set to tuple:loopy/transform/ilp.py -======= - from loopy.kernel.data import IlpBaseTag, VectorizeTag, get_iname_tags ->>>>>>> 38a4424... change tags from tuple to set:loopy/transform/ilp.py might become:: -<<<<<<< HEAD:loopy/transform/privatize.py for imatrix, i acc[imatrix] = 0 for k @@ -110,9 +101,9 @@ def privatize_temporaries_with_inames( end facilitating loop interchange of the *imatrix* loop. - .. versionadded:: 2018.1 """ + if isinstance(privatizing_inames, str): privatizing_inames = frozenset( s.strip() @@ -124,9 +115,6 @@ def privatize_temporaries_with_inames( for s in only_var_names.split(",")) wmap = kernel.writer_map() -======= - from loopy.kernel.data import IlpBaseTag, VectorizeTag, filter_iname_by_type ->>>>>>> dcbda4e... update based on feedback on MR:loopy/transform/ilp.py var_to_new_priv_axis_iname = {} @@ -186,7 +174,7 @@ def privatize_temporaries_with_inames( # {{{ change temporary variables - from loopy.kernel.data import VectorizeTag + from loopy.kernel.data import VectorizeTag, filter_iname_by_type new_temp_vars = kernel.temporary_variables.copy() for tv_name, inames in six.iteritems(var_to_new_priv_axis_iname): -- GitLab From 8ca78fc214adc5006f30eb56c026726839308e63 Mon Sep 17 00:00:00 2001 From: tj-sun Date: Wed, 2 May 2018 12:34:09 +0100 Subject: [PATCH 19/20] update based on feedback on MR --- loopy/check.py | 32 +++++++++---------- loopy/codegen/bounds.py | 9 +++--- loopy/codegen/control.py | 14 ++++----- loopy/codegen/loop.py | 8 ++--- loopy/kernel/__init__.py | 16 +++++----- loopy/kernel/data.py | 2 +- loopy/kernel/tools.py | 21 +++++++------ loopy/preprocess.py | 26 +++++++-------- loopy/schedule/__init__.py | 22 ++++++------- loopy/statistics.py | 15 ++++----- loopy/target/ispc.py | 14 ++++----- loopy/transform/iname.py | 61 +++++++++++++++++++++--------------- loopy/transform/privatize.py | 4 +-- loopy/transform/save.py | 12 +++---- 14 files changed, 135 insertions(+), 121 deletions(-) diff --git a/loopy/check.py b/loopy/check.py index 615b3751f..17b1186ab 100644 --- a/loopy/check.py +++ b/loopy/check.py @@ -115,26 +115,26 @@ def check_loop_priority_inames_known(kernel): def check_multiple_tags_allowed(kernel): from loopy.kernel.data import (GroupIndexTag, LocalIndexTag, VectorizeTag, - UnrollTag, ForceSequentialTag, IlpBaseTag, filter_iname_by_type) + UnrollTag, ForceSequentialTag, IlpBaseTag, filter_iname_tags_by_type) illegal_combinations = [ (GroupIndexTag, LocalIndexTag, VectorizeTag, UnrollTag, ForceSequentialTag), (IlpBaseTag, ForceSequentialTag) ] for iname, tags in six.iteritems(kernel.iname_to_tags): for comb in illegal_combinations: - if len(filter_iname_by_type(tags, comb)) > 1: + if len(filter_iname_tags_by_type(tags, comb)) > 1: raise LoopyError("iname {0} has illegal combination of " "tags: {1}".format(iname, tags)) def check_for_double_use_of_hw_axes(kernel): - from loopy.kernel.data import UniqueTag, filter_iname_by_type + from loopy.kernel.data import UniqueTag, filter_iname_tags_by_type for insn in kernel.instructions: insn_tag_keys = set() for iname in kernel.insn_inames(insn): tags = kernel.iname_to_tags[iname] - for tag in filter_iname_by_type(tags, UniqueTag): + for tag in filter_iname_tags_by_type(tags, UniqueTag): key = tag.key if key in insn_tag_keys: raise LoopyError("instruction '%s' has multiple " @@ -182,7 +182,7 @@ def _is_racing_iname_tag(tv, tag): def check_for_write_races(kernel): - from loopy.kernel.data import ConcurrentTag, filter_iname_by_type + from loopy.kernel.data import ConcurrentTag, filter_iname_tags_by_type for insn in kernel.instructions: for assignee_name, assignee_indices in zip( @@ -201,7 +201,7 @@ def check_for_write_races(kernel): raceable_parallel_insn_inames = set( iname for iname in kernel.insn_inames(insn) - if filter_iname_by_type(kernel.iname_to_tags[iname], + if filter_iname_tags_by_type(kernel.iname_to_tags[iname], ConcurrentTag)) elif assignee_name in kernel.temporary_variables: @@ -245,12 +245,12 @@ def check_for_orphaned_user_hardware_axes(kernel): def check_for_data_dependent_parallel_bounds(kernel): - from loopy.kernel.data import ConcurrentTag, filter_iname_by_type + from loopy.kernel.data import ConcurrentTag, filter_iname_tags_by_type for i, dom in enumerate(kernel.domains): dom_inames = set(dom.get_var_names(dim_type.set)) par_inames = set(iname for iname in dom_inames - if filter_iname_by_type(kernel.iname_to_tags[iname], ConcurrentTag)) + if filter_iname_tags_by_type(kernel.iname_to_tags[iname], ConcurrentTag)) if not par_inames: continue @@ -667,7 +667,7 @@ def _check_for_unused_hw_axes_in_kernel_chunk(kernel, sched_index=None): # alternative: just disregard length-1 dimensions? from loopy.kernel.data import (LocalIndexTag, AutoLocalIndexTagBase, - GroupIndexTag, filter_iname_by_type) + GroupIndexTag, filter_iname_tags_by_type) while i < loop_end_i: sched_item = kernel.schedule[i] @@ -687,13 +687,13 @@ def _check_for_unused_hw_axes_in_kernel_chunk(kernel, sched_index=None): for iname in kernel.insn_inames(insn): tags = kernel.iname_to_tags[iname] - if filter_iname_by_type(tags, LocalIndexTag): - tag, = filter_iname_by_type(tags, LocalIndexTag, 1) + if filter_iname_tags_by_type(tags, LocalIndexTag): + tag, = filter_iname_tags_by_type(tags, LocalIndexTag, 1) local_axes_used.add(tag.axis) - elif filter_iname_by_type(tags, GroupIndexTag): - tag, = filter_iname_by_type(tags, GroupIndexTag, 1) + elif filter_iname_tags_by_type(tags, GroupIndexTag): + tag, = filter_iname_tags_by_type(tags, GroupIndexTag, 1) group_axes_used.add(tag.axis) - elif filter_iname_by_type(tags, AutoLocalIndexTagBase): + elif filter_iname_tags_by_type(tags, AutoLocalIndexTagBase): raise LoopyError("auto local tag encountered") if group_axes != group_axes_used: @@ -938,11 +938,11 @@ def check_implemented_domains(kernel, implemented_domains, code=None): .project_out_except(insn_inames, [dim_type.set])) from loopy.kernel.instruction import BarrierInstruction - from loopy.kernel.data import LocalIndexTag, filter_iname_by_type + from loopy.kernel.data import LocalIndexTag, filter_iname_tags_by_type if isinstance(insn, BarrierInstruction): # project out local-id-mapped inames, solves #94 on gitlab non_lid_inames = frozenset(iname for iname in insn_inames - if not filter_iname_by_type( + if not filter_iname_tags_by_type( kernel.iname_to_tags[iname], LocalIndexTag)) insn_impl_domain = insn_impl_domain.project_out_except( non_lid_inames, [dim_type.set]) diff --git a/loopy/codegen/bounds.py b/loopy/codegen/bounds.py index 284cd1c53..a6b70359a 100644 --- a/loopy/codegen/bounds.py +++ b/loopy/codegen/bounds.py @@ -59,7 +59,7 @@ def get_usable_inames_for_conditional(kernel, sched_index): from loopy.schedule import ( find_active_inames_at, get_insn_ids_for_block_at, has_barrier_within) from loopy.kernel.data import (ConcurrentTag, LocalIndexTagBase, - IlpBaseTag, filter_iname_by_type) + IlpBaseTag, filter_iname_tags_by_type) result = find_active_inames_at(kernel, sched_index) crosses_barrier = has_barrier_within(kernel, sched_index) @@ -98,9 +98,10 @@ def get_usable_inames_for_conditional(kernel, sched_index): # at the innermost level of nesting. if ( - filter_iname_by_type(tags, ConcurrentTag) - and not (filter_iname_by_type(tags, LocalIndexTagBase) - and crosses_barrier) and not filter_iname_by_type(tags, IlpBaseTag) + filter_iname_tags_by_type(tags, ConcurrentTag) + and not (filter_iname_tags_by_type(tags, LocalIndexTagBase) + and crosses_barrier) + and not filter_iname_tags_by_type(tags, IlpBaseTag) ): result.add(iname) diff --git a/loopy/codegen/control.py b/loopy/codegen/control.py index b8cedd1a5..fcf8ea3b4 100644 --- a/loopy/codegen/control.py +++ b/loopy/codegen/control.py @@ -41,7 +41,7 @@ def get_admissible_conditional_inames_for(codegen_state, sched_index): kernel = codegen_state.kernel from loopy.kernel.data import (LocalIndexTag, HardwareConcurrentTag, - filter_iname_by_type) + filter_iname_tags_by_type) from loopy.schedule import find_active_inames_at, has_barrier_within result = find_active_inames_at(kernel, sched_index) @@ -49,9 +49,9 @@ def get_admissible_conditional_inames_for(codegen_state, sched_index): has_barrier = has_barrier_within(kernel, sched_index) for iname, tags in six.iteritems(kernel.iname_to_tags): - if (filter_iname_by_type(tags, HardwareConcurrentTag) + if (filter_iname_tags_by_type(tags, HardwareConcurrentTag) and codegen_state.is_generating_device_code): - if not has_barrier or not filter_iname_by_type(tags, LocalIndexTag): + if not has_barrier or not filter_iname_tags_by_type(tags, LocalIndexTag): result.add(iname) return frozenset(result) @@ -138,12 +138,12 @@ def generate_code_for_sched_index(codegen_state, sched_index): from loopy.kernel.data import (UnrolledIlpTag, UnrollTag, ForceSequentialTag, LoopedIlpTag, VectorizeTag, - InOrderSequentialSequentialTag, filter_iname_by_type) - if filter_iname_by_type(tags, (UnrollTag, UnrolledIlpTag)): + InOrderSequentialSequentialTag, filter_iname_tags_by_type) + if filter_iname_tags_by_type(tags, (UnrollTag, UnrolledIlpTag)): func = generate_unroll_loop - elif filter_iname_by_type(tags, VectorizeTag): + elif filter_iname_tags_by_type(tags, VectorizeTag): func = generate_vectorize_loop - elif len(tags) == 0 or filter_iname_by_type(tags, (LoopedIlpTag, + elif len(tags) == 0 or filter_iname_tags_by_type(tags, (LoopedIlpTag, ForceSequentialTag, InOrderSequentialSequentialTag)): func = generate_sequential_loop_dim_code else: diff --git a/loopy/codegen/loop.py b/loopy/codegen/loop.py index 015dc8d76..7b44fd7b2 100644 --- a/loopy/codegen/loop.py +++ b/loopy/codegen/loop.py @@ -231,7 +231,7 @@ def set_up_hw_parallel_loops(codegen_state, schedule_index, next_func, kernel = codegen_state.kernel from loopy.kernel.data import (UniqueTag, HardwareConcurrentTag, - LocalIndexTag, GroupIndexTag, filter_iname_by_type) + LocalIndexTag, GroupIndexTag, filter_iname_tags_by_type) from loopy.schedule import get_insn_ids_for_block_at insn_ids_for_block = get_insn_ids_for_block_at(kernel.schedule, schedule_index) @@ -242,7 +242,7 @@ def set_up_hw_parallel_loops(codegen_state, schedule_index, next_func, all_inames_by_insns |= kernel.insn_inames(insn_id) hw_inames_left = [iname for iname in all_inames_by_insns - if filter_iname_by_type(kernel.iname_to_tags[iname], + if filter_iname_tags_by_type(kernel.iname_to_tags[iname], HardwareConcurrentTag)] if not hw_inames_left: @@ -258,7 +258,7 @@ def set_up_hw_parallel_loops(codegen_state, schedule_index, next_func, from loopy.symbolic import GroupHardwareAxisIndex, LocalHardwareAxisIndex - tag, = filter_iname_by_type(tags, UniqueTag, max_num=1, min_num=1) + tag, = filter_iname_tags_by_type(tags, UniqueTag, max_num=1, min_num=1) if isinstance(tag, GroupIndexTag): hw_axis_expr = GroupHardwareAxisIndex(tag.axis) @@ -269,7 +269,7 @@ def set_up_hw_parallel_loops(codegen_state, schedule_index, next_func, other_inames_with_same_tag = [ other_iname for other_iname in kernel.all_inames() - if (filter_iname_by_type(kernel.iname_to_tags[other_iname], UniqueTag) + if (filter_iname_tags_by_type(kernel.iname_to_tags[other_iname], UniqueTag) and other_iname != iname and any(_tag.key == tag.key for _tag in kernel.iname_to_tags[other_iname] if _tag))] diff --git a/loopy/kernel/__init__.py b/loopy/kernel/__init__.py index 906d3a3a3..26e928692 100644 --- a/loopy/kernel/__init__.py +++ b/loopy/kernel/__init__.py @@ -44,7 +44,7 @@ from loopy.library.function import ( from loopy.diagnostic import CannotBranchDomainTree, LoopyError from loopy.tools import natsorted from loopy.diagnostic import StaticValueFindingError -from loopy.kernel.data import filter_iname_by_type +from loopy.kernel.data import filter_iname_tags_by_type # {{{ unique var names @@ -725,7 +725,7 @@ class LoopKernel(ImmutableRecordWithoutPickling): from loopy.kernel.data import HardwareConcurrentTag for iname in cond_inames: - tags = filter_iname_by_type(self.iname_to_tags[iname], + tags = filter_iname_tags_by_type(self.iname_to_tags[iname], HardwareConcurrentTag, 1) if tags: tag, = tags @@ -737,10 +737,10 @@ class LoopKernel(ImmutableRecordWithoutPickling): multi_use_inames = set() for iname in cond_inames: - tags = filter_iname_by_type(self.iname_to_tags[iname], + tags = filter_iname_tags_by_type(self.iname_to_tags[iname], HardwareConcurrentTag) if tags: - tag, = filter_iname_by_type(tags, HardwareConcurrentTag, 1) + tag, = filter_iname_tags_by_type(tags, HardwareConcurrentTag, 1) if tag.key in multi_use_keys: multi_use_inames.add(iname) @@ -974,18 +974,18 @@ class LoopKernel(ImmutableRecordWithoutPickling): for iname in all_inames_by_insns: tags = self.iname_to_tags[iname] - if filter_iname_by_type(tags, GroupIndexTag): + if filter_iname_tags_by_type(tags, GroupIndexTag): tgt_dict = global_sizes - elif filter_iname_by_type(tags, LocalIndexTag): + elif filter_iname_tags_by_type(tags, LocalIndexTag): tgt_dict = local_sizes - elif (filter_iname_by_type(tags, AutoLocalIndexTagBase) + elif (filter_iname_tags_by_type(tags, AutoLocalIndexTagBase) and not ignore_auto): raise RuntimeError("cannot find grid sizes if automatic " "local index tags are present") else: continue - tag, = filter_iname_by_type(tags, (GroupIndexTag, LocalIndexTag), 1) + tag, = filter_iname_tags_by_type(tags, (GroupIndexTag, LocalIndexTag), 1) size = self.get_iname_bounds(iname).size diff --git a/loopy/kernel/data.py b/loopy/kernel/data.py index 1abb3cc90..35a8e3b1d 100644 --- a/loopy/kernel/data.py +++ b/loopy/kernel/data.py @@ -55,7 +55,7 @@ class auto(object): # noqa # {{{ iname tags -def filter_iname_by_type(tags, tag_type, max_num=None, min_num=None): +def filter_iname_tags_by_type(tags, tag_type, max_num=None, min_num=None): """Return a subset of *tags* that matches type *tag_type*. Raises exception if the number of tags found were greater than *max_num* or less than *min_num*W. diff --git a/loopy/kernel/tools.py b/loopy/kernel/tools.py index 65bd8070d..3f8d118c4 100644 --- a/loopy/kernel/tools.py +++ b/loopy/kernel/tools.py @@ -36,7 +36,7 @@ from islpy import dim_type from loopy.diagnostic import LoopyError, warn_with_kernel from pytools import memoize_on_first_arg from loopy.tools import natsorted -from loopy.kernel.data import filter_iname_by_type +from loopy.kernel.data import filter_iname_tags_by_type import logging logger = logging.getLogger(__name__) @@ -632,7 +632,7 @@ def is_domain_dependent_on_inames(kernel, domain_index, inames): # {{{ rank inames by stride def get_auto_axis_iname_ranking_by_stride(kernel, insn): - from loopy.kernel.data import ImageArg, ValueArg, filter_iname_by_type + from loopy.kernel.data import ImageArg, ValueArg, filter_iname_tags_by_type approximate_arg_values = {} for arg in kernel.args: @@ -678,7 +678,8 @@ def get_auto_axis_iname_ranking_by_stride(kernel, insn): from loopy.kernel.data import AutoLocalIndexTagBase auto_axis_inames = set( iname for iname in kernel.insn_inames(insn) - if filter_iname_by_type(kernel.iname_to_tags[iname], AutoLocalIndexTagBase)) + if filter_iname_tags_by_type( + kernel.iname_to_tags[iname], AutoLocalIndexTagBase)) # }}} @@ -754,7 +755,7 @@ def assign_automatic_axes(kernel, axis=0, local_size=None): # to set() from tuple() from loopy.kernel.data import (AutoLocalIndexTagBase, LocalIndexTag, - filter_iname_by_type) + filter_iname_tags_by_type) # Realize that at this point in time, axis lengths are already # fixed. So we compute them once and pass them to our recursive @@ -834,7 +835,7 @@ def assign_automatic_axes(kernel, axis=0, local_size=None): do_tagged_check=False), axis=recursion_axis, local_size=local_size) - if not filter_iname_by_type(kernel.iname_to_tags[iname], + if not filter_iname_tags_by_type(kernel.iname_to_tags[iname], AutoLocalIndexTagBase): raise LoopyError("trying to reassign '%s'" % iname) @@ -862,7 +863,7 @@ def assign_automatic_axes(kernel, axis=0, local_size=None): auto_axis_inames = [ iname for iname in kernel.insn_inames(insn) - if filter_iname_by_type(kernel.iname_to_tags[iname], + if filter_iname_tags_by_type(kernel.iname_to_tags[iname], AutoLocalIndexTagBase)] if not auto_axis_inames: @@ -871,7 +872,8 @@ def assign_automatic_axes(kernel, axis=0, local_size=None): assigned_local_axes = set() for iname in kernel.insn_inames(insn): - tags = filter_iname_by_type(kernel.iname_to_tags[iname], LocalIndexTag) + tags = filter_iname_tags_by_type( + kernel.iname_to_tags[iname], LocalIndexTag) if tags: if len(tags) > 1: raise LoopyError("cannot have more than one LocalIndexTags") @@ -886,7 +888,8 @@ def assign_automatic_axes(kernel, axis=0, local_size=None): if iname_ranking is not None: for iname in iname_ranking: prev_tags = kernel.iname_to_tags[iname] - if filter_iname_by_type(prev_tags, AutoLocalIndexTagBase): + if filter_iname_tags_by_type( + prev_tags, AutoLocalIndexTagBase): return assign_axis(axis, iname, axis) else: @@ -1142,7 +1145,7 @@ def get_visual_iname_order_embedding(kernel): # nest. ilp_inames = frozenset(iname for iname in kernel.iname_to_tags - if filter_iname_by_type(kernel.iname_to_tags[iname], IlpBaseTag)) + if filter_iname_tags_by_type(kernel.iname_to_tags[iname], IlpBaseTag)) iname_trie = SetTrie() diff --git a/loopy/preprocess.py b/loopy/preprocess.py index ffa43bdb4..0b19ff416 100644 --- a/loopy/preprocess.py +++ b/loopy/preprocess.py @@ -34,7 +34,7 @@ from pytools.persistent_dict import WriteOncePersistentDict from loopy.tools import LoopyKeyBuilder from loopy.version import DATA_MODEL_VERSION -from loopy.kernel.data import make_assignment, filter_iname_by_type +from loopy.kernel.data import make_assignment, filter_iname_tags_by_type # for the benefit of loopy.statistics, for now from loopy.type_inference import infer_unknown_types @@ -136,7 +136,7 @@ def check_reduction_iname_uniqueness(kernel): def _get_compute_inames_tagged(kernel, insn, tag_base): return set(iname for iname in kernel.insn_inames(insn.id) - if filter_iname_by_type(kernel.iname_to_tags[iname], tag_base)) + if filter_iname_tags_by_type(kernel.iname_to_tags[iname], tag_base)) def _get_assignee_inames_tagged(kernel, insn, tag_base, tv_names): @@ -146,7 +146,7 @@ def _get_assignee_inames_tagged(kernel, insn, tag_base, tv_names): insn.assignee_subscript_deps()) for iname in adeps & kernel.all_inames() if aname in tv_names - if filter_iname_by_type(kernel.iname_to_tags[iname], tag_base)) + if filter_iname_tags_by_type(kernel.iname_to_tags[iname], tag_base)) def find_temporary_scope(kernel): @@ -291,20 +291,20 @@ def _classify_reduction_inames(kernel, inames): from loopy.kernel.data import ( LocalIndexTagBase, UnrolledIlpTag, UnrollTag, VectorizeTag, - ConcurrentTag, filter_iname_by_type) + ConcurrentTag, filter_iname_tags_by_type) for iname in inames: iname_tags = kernel.iname_to_tags[iname] - if filter_iname_by_type(iname_tags, (UnrollTag, UnrolledIlpTag)): + if filter_iname_tags_by_type(iname_tags, (UnrollTag, UnrolledIlpTag)): # These are nominally parallel, but we can live with # them as sequential. sequential.append(iname) - elif filter_iname_by_type(iname_tags, LocalIndexTagBase): + elif filter_iname_tags_by_type(iname_tags, LocalIndexTagBase): local_par.append(iname) - elif filter_iname_by_type(iname_tags, (ConcurrentTag, VectorizeTag)): + elif filter_iname_tags_by_type(iname_tags, (ConcurrentTag, VectorizeTag)): nonlocal_par.append(iname) else: @@ -912,8 +912,6 @@ def realize_reduction(kernel, insn_id_filter=None, unknown_types_ok=True, outer (sweep) iname. """ - # TODO: reassigning tags needs some thinking here - logger.debug("%s: realize reduction" % kernel.name) new_insns = [] @@ -1136,9 +1134,9 @@ def realize_reduction(kernel, insn_id_filter=None, unknown_types_ok=True, outer_insn_inames = temp_kernel.insn_inames(insn) - from loopy.kernel.data import LocalIndexTagBase, filter_iname_by_type + from loopy.kernel.data import LocalIndexTagBase, filter_iname_tags_by_type outer_local_inames = tuple(oiname for oiname in outer_insn_inames - if filter_iname_by_type( + if filter_iname_tags_by_type( kernel.iname_to_tags[oiname], LocalIndexTagBase)) from pymbolic import var @@ -1472,9 +1470,9 @@ def realize_reduction(kernel, insn_id_filter=None, unknown_types_ok=True, outer_insn_inames = temp_kernel.insn_inames(insn) - from loopy.kernel.data import LocalIndexTagBase, filter_iname_by_type + from loopy.kernel.data import LocalIndexTagBase, filter_iname_tags_by_type outer_local_inames = tuple(oiname for oiname in outer_insn_inames - if filter_iname_by_type(kernel.iname_to_tags[oiname], + if filter_iname_tags_by_type(kernel.iname_to_tags[oiname], LocalIndexTagBase) and oiname != sweep_iname) @@ -2152,7 +2150,7 @@ def preprocess_kernel(kernel, device=None): from loopy.kernel.data import AutoLocalIndexTagBase for iname, tags in six.iteritems(kernel.iname_to_tags): - if (filter_iname_by_type(tags, AutoLocalIndexTagBase) + if (filter_iname_tags_by_type(tags, AutoLocalIndexTagBase) and iname in kernel.all_inames()): raise LoopyError("kernel with automatically-assigned " "local axes passed to preprocessing") diff --git a/loopy/schedule/__init__.py b/loopy/schedule/__init__.py index fd3cbbe92..616c8e62a 100644 --- a/loopy/schedule/__init__.py +++ b/loopy/schedule/__init__.py @@ -213,11 +213,11 @@ def find_loop_nest_with_map(kernel): result = {} from loopy.kernel.data import (ConcurrentTag, IlpBaseTag, VectorizeTag, - filter_iname_by_type) + filter_iname_tags_by_type) all_nonpar_inames = set( iname for iname in kernel.all_inames() - if not filter_iname_by_type(kernel.iname_to_tags[iname], + if not filter_iname_tags_by_type(kernel.iname_to_tags[iname], (ConcurrentTag, IlpBaseTag, VectorizeTag))) iname_to_insns = kernel.iname_to_insns() @@ -241,7 +241,7 @@ def find_loop_nest_around_map(kernel): iname_to_insns = kernel.iname_to_insns() # examine pairs of all inames--O(n**2), I know. - from loopy.kernel.data import IlpBaseTag, filter_iname_by_type + from loopy.kernel.data import IlpBaseTag, filter_iname_tags_by_type for inner_iname in all_inames: result[inner_iname] = set() for outer_iname in all_inames: @@ -249,7 +249,7 @@ def find_loop_nest_around_map(kernel): continue tags = kernel.iname_to_tags[outer_iname] - if filter_iname_by_type(tags, IlpBaseTag): + if filter_iname_tags_by_type(tags, IlpBaseTag): # ILP tags are special because they are parallel tags # and therefore 'in principle' nest around everything. # But they're realized by the scheduler as a loop @@ -279,10 +279,10 @@ def find_loop_insn_dep_map(kernel, loop_nest_with_map, loop_nest_around_map): result = {} from loopy.kernel.data import (ConcurrentTag, IlpBaseTag, VectorizeTag, - filter_iname_by_type) + filter_iname_tags_by_type) for insn in kernel.instructions: for iname in kernel.insn_inames(insn): - if filter_iname_by_type(kernel.iname_to_tags[iname], ConcurrentTag): + if filter_iname_tags_by_type(kernel.iname_to_tags[iname], ConcurrentTag): continue iname_dep = result.setdefault(iname, set()) @@ -313,7 +313,7 @@ def find_loop_insn_dep_map(kernel, loop_nest_with_map, loop_nest_around_map): continue tags = kernel.iname_to_tags[dep_insn_iname] - if filter_iname_by_type(tags, + if filter_iname_tags_by_type(tags, (ConcurrentTag, IlpBaseTag, VectorizeTag)): # Parallel tags don't really nest, so we'll disregard # them here. @@ -1879,19 +1879,19 @@ def generate_loop_schedules_inner(kernel, debug_args={}): for insn_id in sched_item_to_insn_id(item)) from loopy.kernel.data import (IlpBaseTag, ConcurrentTag, VectorizeTag, - filter_iname_by_type) + filter_iname_tags_by_type) ilp_inames = set( iname for iname, tags in six.iteritems(kernel.iname_to_tags) - if filter_iname_by_type(tags, IlpBaseTag)) + if filter_iname_tags_by_type(tags, IlpBaseTag)) vec_inames = set( iname for iname, tags in six.iteritems(kernel.iname_to_tags) - if filter_iname_by_type(tags, VectorizeTag)) + if filter_iname_tags_by_type(tags, VectorizeTag)) parallel_inames = set( iname for iname, tags in six.iteritems(kernel.iname_to_tags) - if filter_iname_by_type(tags, ConcurrentTag)) + if filter_iname_tags_by_type(tags, ConcurrentTag)) loop_nest_with_map = find_loop_nest_with_map(kernel) loop_nest_around_map = find_loop_nest_around_map(kernel) diff --git a/loopy/statistics.py b/loopy/statistics.py index d8805069b..97eded2e3 100755 --- a/loopy/statistics.py +++ b/loopy/statistics.py @@ -918,7 +918,7 @@ class GlobalMemAccessCounter(MemAccessCounter): from loopy.symbolic import get_dependencies from loopy.kernel.data import (LocalIndexTag, GroupIndexTag, - filter_iname_by_type) + filter_iname_tags_by_type) my_inames = get_dependencies(index) & self.knl.all_inames() @@ -926,10 +926,11 @@ class GlobalMemAccessCounter(MemAccessCounter): lid_to_iname = {} gid_to_iname = {} for iname in my_inames: - tags = filter_iname_by_type(self.knl.iname_to_tags[iname], + tags = filter_iname_tags_by_type(self.knl.iname_to_tags[iname], (GroupIndexTag, LocalIndexTag)) if tags: - tag, = filter_iname_by_type(tags, (GroupIndexTag, LocalIndexTag), 1) + tag, = filter_iname_tags_by_type( + tags, (GroupIndexTag, LocalIndexTag), 1) if isinstance(tag, LocalIndexTag): lid_to_iname[tag.axis] = iname else: @@ -1183,9 +1184,9 @@ def get_unused_hw_axes_factor(knl, insn, disregard_local_axes, space=None): l_used = set() from loopy.kernel.data import (LocalIndexTag, GroupIndexTag, - filter_iname_by_type) + filter_iname_tags_by_type) for iname in knl.insn_inames(insn): - tags = filter_iname_by_type(knl.iname_to_tags[iname], + tags = filter_iname_tags_by_type(knl.iname_to_tags[iname], (LocalIndexTag, GroupIndexTag), 1) if tags: tag, = tags @@ -1221,9 +1222,9 @@ def count_insn_runs(knl, insn, count_redundant_work, disregard_local_axes=False) insn_inames = knl.insn_inames(insn) if disregard_local_axes: - from loopy.kernel.data import LocalIndexTag, filter_iname_by_type + from loopy.kernel.data import LocalIndexTag, filter_iname_tags_by_type insn_inames = [iname for iname in insn_inames if not - filter_iname_by_type(knl.iname_to_tags[iname], LocalIndexTag)] + filter_iname_tags_by_type(knl.iname_to_tags[iname], LocalIndexTag)] inames_domain = knl.get_inames_domain(insn_inames) domain = (inames_domain.project_out_except( diff --git a/loopy/target/ispc.py b/loopy/target/ispc.py index 70befdfb4..8e07eb692 100644 --- a/loopy/target/ispc.py +++ b/loopy/target/ispc.py @@ -418,16 +418,16 @@ class ISPCASTBuilder(CASTBuilder): new_terms = [] - from loopy.kernel.data import LocalIndexTag, filter_iname_by_type + from loopy.kernel.data import LocalIndexTag, filter_iname_tags_by_type from loopy.symbolic import get_dependencies saw_l0 = False for term in terms: if (isinstance(term, Variable) - and filter_iname_by_type( + and filter_iname_tags_by_type( kernel.iname_to_tags[term.name], LocalIndexTag)): - tag, = filter_iname_by_type(kernel.iname_to_tags[term.name], - LocalIndexTag, 1) + tag, = filter_iname_tags_by_type( + kernel.iname_to_tags[term.name], LocalIndexTag, 1) if tag.axis == 0: if saw_l0: raise LoopyError( @@ -437,10 +437,10 @@ class ISPCASTBuilder(CASTBuilder): continue else: for dep in get_dependencies(term): - if filter_iname_by_type( + if filter_iname_tags_by_type( kernel.iname_to_tags[dep], LocalIndexTag): - tag, = filter_iname_by_type(kernel.iname_to_tags[dep], - LocalIndexTag, 1) + tag, = filter_iname_tags_by_type( + kernel.iname_to_tags[dep], LocalIndexTag, 1) if tag.axis == 0: raise LoopyError( "streaming store must have stride 1 in " diff --git a/loopy/transform/iname.py b/loopy/transform/iname.py index e41ff1f44..423ccfb55 100644 --- a/loopy/transform/iname.py +++ b/loopy/transform/iname.py @@ -178,9 +178,9 @@ def _split_iname_backend(kernel, split_iname, """ existing_tags = kernel.iname_to_tags[split_iname] - from loopy.kernel.data import ForceSequentialTag, filter_iname_by_type + from loopy.kernel.data import ForceSequentialTag, filter_iname_tags_by_type if (do_tagged_check and existing_tags - and not filter_iname_by_type(existing_tags, ForceSequentialTag)): + and not filter_iname_tags_by_type(existing_tags, ForceSequentialTag)): raise LoopyError("cannot split already tagged iname '%s'" % split_iname) if split_iname not in kernel.all_inames(): @@ -625,26 +625,21 @@ def tag_inames(kernel, iname_to_tag, force=False, ignore_nonexistent=False): """Tag an iname :arg iname_to_tag: a list of tuples ``(iname, new_tag)``. *new_tag* is given - as an instance of a subclass of :class:`loopy.kernel.data.IndexTag` or - as a string as shown in :ref:`iname-tags`. May also be a dictionary - for backwards compatibility. *iname* may also be a wildcard using ``*`` - and ``?``. + as an instance of a subclass of :class:`loopy.kernel.data.IndexTag` or an + iterable of which, or as a string as shown in :ref:`iname-tags`. May also + be a dictionary for backwards compatibility. *iname* may also be a wildcard + using ``*`` and ``?``. .. versionchanged:: 2016.3 Added wildcards. + + .. versionchanged:: 2018.1 + + Added iterable of tags """ - if isinstance(iname_to_tag, dict): - unpack_iname_to_tag = [] - for iname, tags in six.iteritems(iname_to_tag): - if isinstance(tags, set): - for tag in tags: - unpack_iname_to_tag.append((iname, tag)) - else: - unpack_iname_to_tag.append((iname, tags)) - iname_to_tag = unpack_iname_to_tag - elif isinstance(iname_to_tag, str): + if isinstance(iname_to_tag, str): def parse_kv(s): colon_index = s.find(":") if colon_index == -1: @@ -656,6 +651,21 @@ def tag_inames(kernel, iname_to_tag, force=False, ignore_nonexistent=False): parse_kv(s) for s in iname_to_tag.split(",") if s.strip()] + # convert dict to list of tuples + if isinstance(iname_to_tag, dict): + iname_to_tag = list(six.iteritems(iname_to_tag)) + + # flatten iterables of tags for each iname + from collections import Iterable + unpack_iname_to_tag = [] + for iname, tags in iname_to_tag: + if isinstance(tags, Iterable) and not isinstance(tags, str): + for tag in tags: + unpack_iname_to_tag.append((iname, tag)) + else: + unpack_iname_to_tag.append((iname, tags)) + iname_to_tag = unpack_iname_to_tag + from loopy.kernel.data import parse_tag as inner_parse_tag def parse_tag(tag): @@ -678,7 +688,7 @@ def tag_inames(kernel, iname_to_tag, force=False, ignore_nonexistent=False): iname_to_tag = [(iname, parse_tag(tag)) for iname, tag in iname_to_tag] from loopy.kernel.data import (ConcurrentTag, ForceSequentialTag, - filter_iname_by_type) + filter_iname_tags_by_type) # {{{ globbing @@ -718,12 +728,12 @@ def tag_inames(kernel, iname_to_tag, force=False, ignore_nonexistent=False): raise ValueError("cannot tag '%s'--not known" % iname) if (isinstance(new_tag, ConcurrentTag) - and filter_iname_by_type(old_tags, ForceSequentialTag)): + and filter_iname_tags_by_type(old_tags, ForceSequentialTag)): raise ValueError("cannot tag '%s' as parallel--" "iname requires sequential execution" % iname) if (isinstance(new_tag, ForceSequentialTag) - and filter_iname_by_type(old_tags, ConcurrentTag)): + and filter_iname_tags_by_type(old_tags, ConcurrentTag)): raise ValueError("'%s' is already tagged as parallel, " "but is now prohibited from being parallel " "(likely because of participation in a precompute or " @@ -982,11 +992,11 @@ def get_iname_duplication_options(knl, use_boostable_into=False): Use :func:`has_schedulable_iname_nesting` to decide whether an iname needs to be duplicated in a given kernel. """ - from loopy.kernel.data import ConcurrentTag, filter_iname_by_type + from loopy.kernel.data import ConcurrentTag, filter_iname_tags_by_type concurrent_inames = set( iname - for iname in knl.all_inames() if filter_iname_by_type( + for iname in knl.all_inames() if filter_iname_tags_by_type( knl.iname_to_tags[iname], ConcurrentTag)) # First we extract the minimal necessary information from the kernel @@ -1011,8 +1021,8 @@ def get_iname_duplication_options(knl, use_boostable_into=False): # Get the duplication options as a tuple of iname and a set for iname, insns in _get_iname_duplication_options(insn_iname_sets): # Check whether this iname has a parallel tag and discard it if so - if (iname in knl.iname_to_tags - and filter_iname_by_type(knl.iname_to_tags[iname], ConcurrentTag)): + if (iname in knl.iname_to_tags and filter_iname_tags_by_type( + knl.iname_to_tags[iname], ConcurrentTag)): continue # If we find a duplication option and to not use boostable_into @@ -1529,7 +1539,8 @@ def find_unused_axis_tag(kernel, kind, insn_match=None): """ used_axes = set() - from loopy.kernel.data import GroupIndexTag, LocalIndexTag, filter_iname_by_type + from loopy.kernel.data import (GroupIndexTag, LocalIndexTag, + filter_iname_tags_by_type) if isinstance(kind, str): found = False @@ -1549,7 +1560,7 @@ def find_unused_axis_tag(kernel, kind, insn_match=None): for insn in insns: for iname in kernel.insn_inames(insn): dim_tags = kernel.iname_to_tags[iname] - if filter_iname_by_type(dim_tags, kind): + if filter_iname_tags_by_type(dim_tags, kind): used_axes.add(kind.axis) i = 0 diff --git a/loopy/transform/privatize.py b/loopy/transform/privatize.py index bef421006..c953c1cee 100644 --- a/loopy/transform/privatize.py +++ b/loopy/transform/privatize.py @@ -174,7 +174,7 @@ def privatize_temporaries_with_inames( # {{{ change temporary variables - from loopy.kernel.data import VectorizeTag, filter_iname_by_type + from loopy.kernel.data import VectorizeTag, filter_iname_tags_by_type new_temp_vars = kernel.temporary_variables.copy() for tv_name, inames in six.iteritems(var_to_new_priv_axis_iname): @@ -187,7 +187,7 @@ def privatize_temporaries_with_inames( dim_tags = ["c"] * (len(shape) + len(extra_shape)) for i, iname in enumerate(inames): - if filter_iname_by_type(kernel.iname_to_tags[iname], VectorizeTag): + if filter_iname_tags_by_type(kernel.iname_to_tags[iname], VectorizeTag): dim_tags[len(shape) + i] = "vec" new_temp_vars[tv.name] = tv.copy(shape=shape + extra_shape, diff --git a/loopy/transform/save.py b/loopy/transform/save.py index 75ac16ae9..dfdd7a154 100644 --- a/loopy/transform/save.py +++ b/loopy/transform/save.py @@ -403,15 +403,15 @@ class TemporarySaver(object): continue from loopy.kernel.data import (GroupIndexTag, LocalIndexTag, - ConcurrentTag, filter_iname_by_type) + ConcurrentTag, filter_iname_tags_by_type) - if filter_iname_by_type(tags, GroupIndexTag): - tag, = filter_iname_by_type(tags, GroupIndexTag, 1) + if filter_iname_tags_by_type(tags, GroupIndexTag): + tag, = filter_iname_tags_by_type(tags, GroupIndexTag, 1) my_group_tags.append(tag) - elif filter_iname_by_type(tags, LocalIndexTag): - tag, = filter_iname_by_type(tags, LocalIndexTag, 1) + elif filter_iname_tags_by_type(tags, LocalIndexTag): + tag, = filter_iname_tags_by_type(tags, LocalIndexTag, 1) my_local_tags.append(tag) - elif filter_iname_by_type(tags, ConcurrentTag): + elif filter_iname_tags_by_type(tags, ConcurrentTag): raise LoopyError( "iname '%s' is tagged with '%s' - only " "group and local tags are supported for " -- GitLab From 511cc9f81a8d7a9afe9d1ab4e78d6ca07f8ce440 Mon Sep 17 00:00:00 2001 From: tj-sun Date: Wed, 2 May 2018 18:03:57 +0100 Subject: [PATCH 20/20] report error in iname_to_tag if multiply tagged --- loopy/kernel/__init__.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/loopy/kernel/__init__.py b/loopy/kernel/__init__.py index 26e928692..429961a71 100644 --- a/loopy/kernel/__init__.py +++ b/loopy/kernel/__init__.py @@ -310,6 +310,11 @@ class LoopKernel(ImmutableRecordWithoutPickling): warn("Since version 2018.1, inames can hold multiple tags. Use " "iname_to_tags['iname'] instead. iname_to_tag.get('iname') will be " "deprecated at version 2019.0.", DeprecationWarning) + for iname, tags in six.iteritems(self.iname_to_tags): + if len(tags) > 1: + raise LoopyError( + "iname {0} has multiple tags: {1}. " + "Use iname_to_tags['iname'] instead.".format(iname, tags)) return dict((k, next(iter(v))) for k, v in six.iteritems(self.iname_to_tags) if v) -- GitLab