diff --git a/loopy/check.py b/loopy/check.py index 9a9ff1fd575883ad4ee212f9e17f7c0197781201..bebd86fffe00d9374da4352206952f1e439ef883 100644 --- a/loopy/check.py +++ b/loopy/check.py @@ -168,7 +168,7 @@ def _is_racing_iname_tag(tv, tag): def check_for_write_races(kernel): - from loopy.kernel.data import ConcurrentTag, check_iname_tags + from loopy.kernel.data import ConcurrentTag, get_iname_tags for insn in kernel.instructions: for assignee_name, assignee_indices in zip( @@ -186,8 +186,9 @@ def check_for_write_races(kernel): # will cause write races. raceable_parallel_insn_inames = set( - iname for iname in kernel.insn_inames(insn) - if check_iname_tags(kernel.iname_to_tags[iname], ConcurrentTag)) + iname for iname in kernel.insn_inames(insn) + if get_iname_tags(kernel.iname_to_tags[iname], + ConcurrentTag)) elif assignee_name in kernel.temporary_variables: temp_var = kernel.temporary_variables[assignee_name] @@ -213,7 +214,7 @@ def check_for_write_races(kernel): def check_for_orphaned_user_hardware_axes(kernel): - from loopy.kernel.data import LocalIndexTag, check_iname_tags + from loopy.kernel.data import LocalIndexTag for axis in kernel.local_sizes: found = False for tags in six.itervalues(kernel.iname_to_tags): @@ -230,12 +231,12 @@ def check_for_orphaned_user_hardware_axes(kernel): def check_for_data_dependent_parallel_bounds(kernel): - from loopy.kernel.data import ConcurrentTag, check_iname_tags + from loopy.kernel.data import ConcurrentTag, get_iname_tags for i, dom in enumerate(kernel.domains): dom_inames = set(dom.get_var_names(dim_type.set)) par_inames = set(iname for iname in dom_inames - if check_iname_tags(kernel.iname_to_tags[iname], ConcurrentTag)) + if get_iname_tags(kernel.iname_to_tags[iname], ConcurrentTag)) if not par_inames: continue @@ -651,7 +652,7 @@ def _check_for_unused_hw_axes_in_kernel_chunk(kernel, sched_index=None): # alternative: just disregard length-1 dimensions? from loopy.kernel.data import (LocalIndexTag, AutoLocalIndexTagBase, - GroupIndexTag, check_iname_tags, get_iname_tags) + GroupIndexTag, get_iname_tags) while i < loop_end_i: sched_item = kernel.schedule[i] @@ -671,19 +672,13 @@ def _check_for_unused_hw_axes_in_kernel_chunk(kernel, sched_index=None): for iname in kernel.insn_inames(insn): tags = kernel.iname_to_tags[iname] - if check_iname_tags(tags, LocalIndexTag): - tags = get_iname_tags(tags, LocalIndexTag) - if len(tags) > 1: - raise LoopyError("Can only have one LocalIndexTag") - tag, = tags + if get_iname_tags(tags, LocalIndexTag): + tag, = get_iname_tags(tags, LocalIndexTag, 1) local_axes_used.add(tag.axis) - elif check_iname_tags(tags, GroupIndexTag): - tags = get_iname_tags(tags, GroupIndexTag) - if len(tags) > 1: - raise LoopyError("Can only have one GroupIndexTag") - tag, = tags + elif get_iname_tags(tags, GroupIndexTag): + tag, = get_iname_tags(tags, GroupIndexTag, 1) group_axes_used.add(tag.axis) - elif check_iname_tags(tags, AutoLocalIndexTagBase): + elif get_iname_tags(tags, AutoLocalIndexTagBase): raise LoopyError("auto local tag encountered") if group_axes != group_axes_used: @@ -896,8 +891,6 @@ def check_implemented_domains(kernel, implemented_domains, code=None): from islpy import align_two - from loopy.kernel.data import check_iname_tags - last_idomains = None last_insn_inames = None @@ -930,11 +923,11 @@ def check_implemented_domains(kernel, implemented_domains, code=None): .project_out_except(insn_inames, [dim_type.set])) from loopy.kernel.instruction import BarrierInstruction - from loopy.kernel.data import LocalIndexTag + from loopy.kernel.data import LocalIndexTag, get_iname_tags if isinstance(insn, BarrierInstruction): # project out local-id-mapped inames, solves #94 on gitlab non_lid_inames = frozenset(iname for iname in insn_inames - if not check_iname_tags(kernel.iname_to_tags[iname], LocalIndexTag)) + if not get_iname_tags(kernel.iname_to_tags[iname], LocalIndexTag)) insn_impl_domain = insn_impl_domain.project_out_except( non_lid_inames, [dim_type.set]) diff --git a/loopy/codegen/bounds.py b/loopy/codegen/bounds.py index 886f305a455f66b943660ff653c40ae632360243..240df24e5d2a6dc7ea620a6f2f290937b9119384 100644 --- a/loopy/codegen/bounds.py +++ b/loopy/codegen/bounds.py @@ -59,7 +59,7 @@ def get_usable_inames_for_conditional(kernel, sched_index): from loopy.schedule import ( find_active_inames_at, get_insn_ids_for_block_at, has_barrier_within) from loopy.kernel.data import (ConcurrentTag, LocalIndexTagBase, - IlpBaseTag, check_iname_tags) + IlpBaseTag, get_iname_tags) result = find_active_inames_at(kernel, sched_index) crosses_barrier = has_barrier_within(kernel, sched_index) @@ -98,9 +98,9 @@ def get_usable_inames_for_conditional(kernel, sched_index): # at the innermost level of nesting. if ( - check_iname_tags(tags, ConcurrentTag) - and not (check_iname_tags(tags, LocalIndexTagBase) - and crosses_barrier) and not check_iname_tags(tags, IlpBaseTag) + get_iname_tags(tags, ConcurrentTag) + and not (get_iname_tags(tags, LocalIndexTagBase) + and crosses_barrier) and not get_iname_tags(tags, IlpBaseTag) ): result.add(iname) diff --git a/loopy/codegen/control.py b/loopy/codegen/control.py index 22f18760882ae984de56ffd050ee21573b46fbad..41b04e172a2afeef612d31bf2fff323d096945ec 100644 --- a/loopy/codegen/control.py +++ b/loopy/codegen/control.py @@ -41,7 +41,7 @@ def get_admissible_conditional_inames_for(codegen_state, sched_index): kernel = codegen_state.kernel from loopy.kernel.data import (LocalIndexTag, HardwareConcurrentTag, - check_iname_tags) + get_iname_tags) from loopy.schedule import find_active_inames_at, has_barrier_within result = find_active_inames_at(kernel, sched_index) @@ -49,9 +49,9 @@ def get_admissible_conditional_inames_for(codegen_state, sched_index): has_barrier = has_barrier_within(kernel, sched_index) for iname, tags in six.iteritems(kernel.iname_to_tags): - if (check_iname_tags(tags, HardwareConcurrentTag) + if (get_iname_tags(tags, HardwareConcurrentTag) and codegen_state.is_generating_device_code): - if not has_barrier or not check_iname_tags(tags, LocalIndexTag): + if not has_barrier or not get_iname_tags(tags, LocalIndexTag): result.add(iname) return frozenset(result) @@ -136,13 +136,14 @@ def generate_code_for_sched_index(codegen_state, sched_index): generate_vectorize_loop, generate_sequential_loop_dim_code) - from loopy.kernel.data import (UnrolledIlpTag, UnrollTag, ForceSequentialTag, - LoopedIlpTag, VectorizeTag, InOrderSequentialSequentialTag, check_iname_tags) - if check_iname_tags(tags, (UnrollTag, UnrolledIlpTag)): + from loopy.kernel.data import (UnrolledIlpTag, UnrollTag, + ForceSequentialTag, LoopedIlpTag, VectorizeTag, + InOrderSequentialSequentialTag, get_iname_tags) + if get_iname_tags(tags, (UnrollTag, UnrolledIlpTag)): func = generate_unroll_loop - elif check_iname_tags(tags, VectorizeTag): + elif get_iname_tags(tags, VectorizeTag): func = generate_vectorize_loop - elif len(tags) == 0 or check_iname_tags(tags, (LoopedIlpTag, + elif len(tags) == 0 or get_iname_tags(tags, (LoopedIlpTag, ForceSequentialTag, InOrderSequentialSequentialTag)): func = generate_sequential_loop_dim_code else: diff --git a/loopy/codegen/loop.py b/loopy/codegen/loop.py index 01f8a82554595d4eda9dfa899fa26dc262294259..0efa96f967bc597d65ed3fd57c18301d67d6990b 100644 --- a/loopy/codegen/loop.py +++ b/loopy/codegen/loop.py @@ -231,7 +231,7 @@ def set_up_hw_parallel_loops(codegen_state, schedule_index, next_func, kernel = codegen_state.kernel from loopy.kernel.data import (UniqueTag, HardwareConcurrentTag, - LocalIndexTag, GroupIndexTag, check_iname_tags) + LocalIndexTag, GroupIndexTag, get_iname_tags) from loopy.schedule import get_insn_ids_for_block_at insn_ids_for_block = get_insn_ids_for_block_at(kernel.schedule, schedule_index) @@ -242,7 +242,7 @@ def set_up_hw_parallel_loops(codegen_state, schedule_index, next_func, all_inames_by_insns |= kernel.insn_inames(insn_id) hw_inames_left = [iname for iname in all_inames_by_insns - if check_iname_tags(kernel.iname_to_tags[iname], + if get_iname_tags(kernel.iname_to_tags[iname], HardwareConcurrentTag)] if not hw_inames_left: @@ -258,12 +258,8 @@ def set_up_hw_parallel_loops(codegen_state, schedule_index, next_func, from loopy.symbolic import GroupHardwareAxisIndex, LocalHardwareAxisIndex - assert check_iname_tags(tags, UniqueTag) + tag, = get_iname_tags(tags, UniqueTag, max_num=1, min_num=1) - if len(tags) > 1: - raise LoopyError("cannot have more than one UniqueTag") - - tag, = tags if isinstance(tag, GroupIndexTag): hw_axis_expr = GroupHardwareAxisIndex(tag.axis) elif isinstance(tag, LocalIndexTag): @@ -271,11 +267,14 @@ def set_up_hw_parallel_loops(codegen_state, schedule_index, next_func, else: raise RuntimeError("unexpected hw tag type") + # TODO: get rid of None + other_inames_with_same_tag = [ other_iname for other_iname in kernel.all_inames() - if check_iname_tags(kernel.iname_to_tags[other_iname], UniqueTag) - and any(_tag.key == tag.key for _tag in kernel.iname_to_tags[other_iname]) - and other_iname != iname] + if (get_iname_tags(kernel.iname_to_tags[other_iname], UniqueTag) + and other_iname != iname + and any(_tag.key == tag.key + for _tag in kernel.iname_to_tags[other_iname]))] # {{{ 'implement' hardware axis boundaries diff --git a/loopy/kernel/__init__.py b/loopy/kernel/__init__.py index a69f0727db132aa5b34372cd4fd9f245a2270fc5..872365fca2c87a0c4bab3ca5569acc471364d63a 100644 --- a/loopy/kernel/__init__.py +++ b/loopy/kernel/__init__.py @@ -44,7 +44,7 @@ from loopy.library.function import ( from loopy.diagnostic import CannotBranchDomainTree, LoopyError from loopy.tools import natsorted from loopy.diagnostic import StaticValueFindingError -from loopy.kernel.data import check_iname_tags, get_iname_tags +from loopy.kernel.data import get_iname_tags # {{{ unique var names @@ -197,7 +197,7 @@ class LoopKernel(ImmutableRecordWithoutPickling): assumptions=None, local_sizes={}, temporary_variables={}, - iname_to_tags=defaultdict(tuple), + iname_to_tags=defaultdict(set), substitutions={}, function_manglers=[ default_function_mangler, @@ -711,11 +711,9 @@ class LoopKernel(ImmutableRecordWithoutPickling): from loopy.kernel.data import HardwareConcurrentTag for iname in cond_inames: - tags = self.iname_to_tags[iname] - if check_iname_tags(tags, HardwareConcurrentTag): - tags = get_iname_tags(tags, HardwareConcurrentTag) - if len(tags) > 1: - raise LoopyError("cannot have more than one HardwareConcurentTags") + tags = get_iname_tags(self.iname_to_tags[iname], + HardwareConcurrentTag, 1) + if tags: tag, = tags tag_key_uses[tag.key].append(iname) @@ -725,8 +723,10 @@ class LoopKernel(ImmutableRecordWithoutPickling): multi_use_inames = set() for iname in cond_inames: - for tag in self.iname_to_tags[iname]: - if isinstance(tag, HardwareConcurrentTag) and tag.key in multi_use_keys: + tags = get_iname_tags(self.iname_to_tags[iname], HardwareConcurrentTag) + if tags: + tag, = get_iname_tags(tags, HardwareConcurrentTag, 1) + if tag.key in multi_use_keys: multi_use_inames.add(iname) break @@ -960,22 +960,17 @@ class LoopKernel(ImmutableRecordWithoutPickling): for iname in all_inames_by_insns: tags = self.iname_to_tags[iname] - if check_iname_tags(tags, GroupIndexTag): + if get_iname_tags(tags, GroupIndexTag): tgt_dict = global_sizes - elif check_iname_tags(tags, LocalIndexTag): + elif get_iname_tags(tags, LocalIndexTag): tgt_dict = local_sizes - elif check_iname_tags(tags, AutoLocalIndexTagBase) and not ignore_auto: + elif get_iname_tags(tags, AutoLocalIndexTagBase) and not ignore_auto: raise RuntimeError("cannot find grid sizes if automatic " "local index tags are present") else: continue - tags = get_iname_tags(tags, (GroupIndexTag, LocalIndexTag)) - - if len(tags) != 1: - raise LoopyError("Multiple axis tag not allowed") - - tag, = tags + tag, = get_iname_tags(tags, (GroupIndexTag, LocalIndexTag), 1) size = self.get_iname_bounds(iname).size diff --git a/loopy/kernel/data.py b/loopy/kernel/data.py index 9b66088e5b75ca6bad3d06d931112de29980b634..9250c5acf7304e19572a96ec214452a62d891254 100644 --- a/loopy/kernel/data.py +++ b/loopy/kernel/data.py @@ -55,19 +55,24 @@ class auto(object): # noqa # {{{ iname tags -def check_iname_tags(tags, tag_type): - return any([isinstance(tag, tag_type) for tag in tags]) - - -def get_iname_tags(tags, tag_type): - return tuple(tag for tag in tags if isinstance(tag, tag_type)) +def get_iname_tags(tags, tag_type, max_num=None, min_num=None): + result = set(tag for tag in tags if isinstance(tag, tag_type)) + if max_num: + if len(result) > max_num: + raise LoopyError("cannot have more than {0} tags" + "of type(s): {1}".format(max_num, tag_type)) + if min_num: + if len(result) < min_num: + raise LoopyError("must have more than {0} tags" + "of type(s): {1}".format(max_num, tag_type)) + return result class IndexTag(ImmutableRecord): __slots__ = [] def __hash__(self): - raise RuntimeError("use .key to hash index tags") + return hash(self.key) def update_persistent_hash(self, key_hash, key_builder): """Custom hash computation function for use with diff --git a/loopy/kernel/tools.py b/loopy/kernel/tools.py index 505482dea66cf2a1849b262eb3d28b121f990b88..5be3375373ca322bb3126ebf1a4407235f0ca2eb 100644 --- a/loopy/kernel/tools.py +++ b/loopy/kernel/tools.py @@ -36,7 +36,7 @@ from islpy import dim_type from loopy.diagnostic import LoopyError, warn_with_kernel from pytools import memoize_on_first_arg from loopy.tools import natsorted -from loopy.kernel.data import check_iname_tags +from loopy.kernel.data import get_iname_tags import logging logger = logging.getLogger(__name__) @@ -632,7 +632,7 @@ def is_domain_dependent_on_inames(kernel, domain_index, inames): # {{{ rank inames by stride def get_auto_axis_iname_ranking_by_stride(kernel, insn): - from loopy.kernel.data import ImageArg, ValueArg, check_iname_tags + from loopy.kernel.data import ImageArg, ValueArg, get_iname_tags approximate_arg_values = {} for arg in kernel.args: @@ -677,8 +677,8 @@ def get_auto_axis_iname_ranking_by_stride(kernel, insn): from loopy.kernel.data import AutoLocalIndexTagBase auto_axis_inames = set( - iname for iname in kernel.insn_inames(insn) - if check_iname_tags(kernel.iname_to_tags[iname], AutoLocalIndexTagBase)) + iname for iname in kernel.insn_inames(insn) + if get_iname_tags(kernel.iname_to_tags[iname], AutoLocalIndexTagBase)) # }}} @@ -754,7 +754,7 @@ def assign_automatic_axes(kernel, axis=0, local_size=None): # to set() from tuple() from loopy.kernel.data import (AutoLocalIndexTagBase, LocalIndexTag, - check_iname_tags, get_iname_tags) + get_iname_tags) # Realize that at this point in time, axis lengths are already # fixed. So we compute them once and pass them to our recursive @@ -832,7 +832,7 @@ def assign_automatic_axes(kernel, axis=0, local_size=None): do_tagged_check=False), axis=recursion_axis, local_size=local_size) - if not check_iname_tags(kernel.iname_to_tags[iname], AutoLocalIndexTagBase): + if not get_iname_tags(kernel.iname_to_tags[iname], AutoLocalIndexTagBase): raise LoopyError("trying to reassign '%s'" % iname) new_iname_to_tags = kernel.iname_to_tags.copy() @@ -855,7 +855,7 @@ def assign_automatic_axes(kernel, axis=0, local_size=None): auto_axis_inames = [ iname for iname in kernel.insn_inames(insn) - if check_iname_tags(kernel.iname_to_tags[iname], AutoLocalIndexTagBase)] + if get_iname_tags(kernel.iname_to_tags[iname], AutoLocalIndexTagBase)] if not auto_axis_inames: continue @@ -878,7 +878,7 @@ def assign_automatic_axes(kernel, axis=0, local_size=None): if iname_ranking is not None: for iname in iname_ranking: prev_tags = kernel.iname_to_tags[iname] - if check_iname_tags(prev_tags, AutoLocalIndexTagBase): + if get_iname_tags(prev_tags, AutoLocalIndexTagBase): return assign_axis(axis, iname, axis) else: @@ -1134,7 +1134,7 @@ def get_visual_iname_order_embedding(kernel): # nest. ilp_inames = frozenset(iname for iname in kernel.iname_to_tags - if check_iname_tags(kernel.iname_to_tags[iname], IlpBaseTag)) + if get_iname_tags(kernel.iname_to_tags[iname], IlpBaseTag)) iname_trie = SetTrie() diff --git a/loopy/preprocess.py b/loopy/preprocess.py index b20fbef91168ab4a5580b031d1cab4403bfec7c1..38b213c05632670ebf98fba0534fbffba5738fa5 100644 --- a/loopy/preprocess.py +++ b/loopy/preprocess.py @@ -34,7 +34,7 @@ from pytools.persistent_dict import WriteOncePersistentDict from loopy.tools import LoopyKeyBuilder from loopy.version import DATA_MODEL_VERSION -from loopy.kernel.data import make_assignment, check_iname_tags, get_iname_tags +from loopy.kernel.data import make_assignment, get_iname_tags # for the benefit of loopy.statistics, for now from loopy.type_inference import infer_unknown_types @@ -136,7 +136,7 @@ def check_reduction_iname_uniqueness(kernel): def _get_compute_inames_tagged(kernel, insn, tag_base): return set(iname for iname in kernel.insn_inames(insn.id) - if check_iname_tags(kernel.iname_to_tags[iname], tag_base)) + if get_iname_tags(kernel.iname_to_tags[iname], tag_base)) def _get_assignee_inames_tagged(kernel, insn, tag_base, tv_names): @@ -146,7 +146,7 @@ def _get_assignee_inames_tagged(kernel, insn, tag_base, tv_names): insn.assignee_subscript_deps()) for iname in adeps & kernel.all_inames() if aname in tv_names - if check_iname_tags(kernel.iname_to_tags[iname], tag_base)) + if get_iname_tags(kernel.iname_to_tags[iname], tag_base)) def find_temporary_scope(kernel): @@ -291,20 +291,20 @@ def _classify_reduction_inames(kernel, inames): from loopy.kernel.data import ( LocalIndexTagBase, UnrolledIlpTag, UnrollTag, VectorizeTag, - ConcurrentTag, check_iname_tags) + ConcurrentTag, get_iname_tags) for iname in inames: iname_tags = kernel.iname_to_tags[iname] - if check_iname_tags(iname_tags, (UnrollTag, UnrolledIlpTag)): + if get_iname_tags(iname_tags, (UnrollTag, UnrolledIlpTag)): # These are nominally parallel, but we can live with # them as sequential. sequential.append(iname) - elif check_iname_tags(iname_tags, LocalIndexTagBase): + elif get_iname_tags(iname_tags, LocalIndexTagBase): local_par.append(iname) - elif check_iname_tags(iname_tags, (ConcurrentTag, VectorizeTag)): + elif get_iname_tags(iname_tags, (ConcurrentTag, VectorizeTag)): nonlocal_par.append(iname) else: @@ -1136,9 +1136,9 @@ def realize_reduction(kernel, insn_id_filter=None, unknown_types_ok=True, outer_insn_inames = temp_kernel.insn_inames(insn) - from loopy.kernel.data import LocalIndexTagBase, check_iname_tags + from loopy.kernel.data import LocalIndexTagBase, get_iname_tags outer_local_inames = tuple(oiname for oiname in outer_insn_inames - if check_iname_tags(kernel.iname_to_tags[oiname], LocalIndexTagBase)) + if get_iname_tags(kernel.iname_to_tags[oiname], LocalIndexTagBase)) from pymbolic import var outer_local_iname_vars = tuple( @@ -1471,9 +1471,9 @@ def realize_reduction(kernel, insn_id_filter=None, unknown_types_ok=True, outer_insn_inames = temp_kernel.insn_inames(insn) - from loopy.kernel.data import LocalIndexTagBase, check_iname_tags + from loopy.kernel.data import LocalIndexTagBase, get_iname_tags outer_local_inames = tuple(oiname for oiname in outer_insn_inames - if check_iname_tags(kernel.iname_to_tags[oiname], LocalIndexTagBase) + if get_iname_tags(kernel.iname_to_tags[oiname], LocalIndexTagBase) and oiname != sweep_iname) from pymbolic import var @@ -1740,7 +1740,7 @@ def realize_reduction(kernel, insn_id_filter=None, unknown_types_ok=True, "by reductions is 'local'--found iname(s) '%s' " "respectively tagged '%s'" % (", ".join(bad_inames), - ", ".join(tag.key for tag in kernel.iname_to_tags[iname] + ", ".join(str(kernel.iname_to_tags[iname]) for iname in bad_inames))) if n_local_par == 0 and n_sequential == 0: @@ -2150,7 +2150,7 @@ def preprocess_kernel(kernel, device=None): from loopy.kernel.data import AutoLocalIndexTagBase for iname, tags in six.iteritems(kernel.iname_to_tags): - if (check_iname_tags(tags, AutoLocalIndexTagBase) + if (get_iname_tags(tags, AutoLocalIndexTagBase) and iname in kernel.all_inames()): raise LoopyError("kernel with automatically-assigned " "local axes passed to preprocessing") diff --git a/loopy/schedule/__init__.py b/loopy/schedule/__init__.py index 68b3fb0cbad892745a775f6e1ef3f6b5e34a1c5a..bda316bc170b9099e41355a4a3670902f44461e6 100644 --- a/loopy/schedule/__init__.py +++ b/loopy/schedule/__init__.py @@ -213,12 +213,12 @@ def find_loop_nest_with_map(kernel): result = {} from loopy.kernel.data import (ConcurrentTag, IlpBaseTag, VectorizeTag, - check_iname_tags) + get_iname_tags) all_nonpar_inames = set( - iname for iname, tags in six.iteritems(kernel.iname_to_tags) - if tags and not check_iname_tags(tags, - (ConcurrentTag, IlpBaseTag, VectorizeTag))) + iname for iname in kernel.all_inames() + if not get_iname_tags(kernel.iname_to_tags[iname], + (ConcurrentTag, IlpBaseTag, VectorizeTag))) iname_to_insns = kernel.iname_to_insns() @@ -241,7 +241,7 @@ def find_loop_nest_around_map(kernel): iname_to_insns = kernel.iname_to_insns() # examine pairs of all inames--O(n**2), I know. - from loopy.kernel.data import IlpBaseTag, check_iname_tags + from loopy.kernel.data import IlpBaseTag, get_iname_tags for inner_iname in all_inames: result[inner_iname] = set() for outer_iname in all_inames: @@ -249,7 +249,7 @@ def find_loop_nest_around_map(kernel): continue tags = kernel.iname_to_tags[outer_iname] - if check_iname_tags(tags, IlpBaseTag): + if get_iname_tags(tags, IlpBaseTag): # ILP tags are special because they are parallel tags # and therefore 'in principle' nest around everything. # But they're realized by the scheduler as a loop @@ -279,10 +279,10 @@ def find_loop_insn_dep_map(kernel, loop_nest_with_map, loop_nest_around_map): result = {} from loopy.kernel.data import (ConcurrentTag, IlpBaseTag, VectorizeTag, - check_iname_tags) + get_iname_tags) for insn in kernel.instructions: for iname in kernel.insn_inames(insn): - if check_iname_tags(kernel.iname_to_tags[iname], ConcurrentTag): + if get_iname_tags(kernel.iname_to_tags[iname], ConcurrentTag): continue iname_dep = result.setdefault(iname, set()) @@ -313,7 +313,8 @@ def find_loop_insn_dep_map(kernel, loop_nest_with_map, loop_nest_around_map): continue tags = kernel.iname_to_tags[dep_insn_iname] - if check_iname_tags(tags, (ConcurrentTag, IlpBaseTag, VectorizeTag)): + if get_iname_tags(tags, + (ConcurrentTag, IlpBaseTag, VectorizeTag)): # Parallel tags don't really nest, so we'll disregard # them here. continue @@ -1878,19 +1879,19 @@ def generate_loop_schedules_inner(kernel, debug_args={}): for insn_id in sched_item_to_insn_id(item)) from loopy.kernel.data import (IlpBaseTag, ConcurrentTag, VectorizeTag, - check_iname_tags) + get_iname_tags) ilp_inames = set( iname for iname, tags in six.iteritems(kernel.iname_to_tags) - if check_iname_tags(tags, IlpBaseTag)) + if get_iname_tags(tags, IlpBaseTag)) vec_inames = set( iname for iname, tags in six.iteritems(kernel.iname_to_tags) - if check_iname_tags(tags, VectorizeTag)) + if get_iname_tags(tags, VectorizeTag)) parallel_inames = set( iname for iname, tags in six.iteritems(kernel.iname_to_tags) - if check_iname_tags(tags, ConcurrentTag)) + if get_iname_tags(tags, ConcurrentTag)) loop_nest_with_map = find_loop_nest_with_map(kernel) loop_nest_around_map = find_loop_nest_around_map(kernel) diff --git a/loopy/statistics.py b/loopy/statistics.py index 77c638128fe3dab63ee79f72bd14d70e0ca866bf..4b0643873d100af365cbf2782f0c084f83b55760 100755 --- a/loopy/statistics.py +++ b/loopy/statistics.py @@ -1182,12 +1182,11 @@ def get_unused_hw_axes_factor(knl, insn, disregard_local_axes, space=None): l_used = set() from loopy.kernel.data import (LocalIndexTag, GroupIndexTag, - get_iname_tags, check_iname_tags) + get_iname_tags) for iname in knl.insn_inames(insn): - tags = get_iname_tags(knl.iname_to_tags[iname], (LocalIndexTag, GroupIndexTag)) + tags = get_iname_tags(knl.iname_to_tags[iname], + (LocalIndexTag, GroupIndexTag), 1) if tags: - if len(tags) > 1: - raise LoopyError("cannot have more than one UniqueTags") tag, = tags if isinstance(tag, LocalIndexTag): l_used.add(tag.axis) @@ -1221,9 +1220,9 @@ def count_insn_runs(knl, insn, count_redundant_work, disregard_local_axes=False) insn_inames = knl.insn_inames(insn) if disregard_local_axes: - from loopy.kernel.data import LocalIndexTag, check_iname_tags + from loopy.kernel.data import LocalIndexTag, get_iname_tags insn_inames = [iname for iname in insn_inames if not - check_iname_tags(kernel.iname_to_tags[iname], LocalIndexTag)] + get_iname_tags(knl.iname_to_tags[iname], LocalIndexTag)] inames_domain = knl.get_inames_domain(insn_inames) domain = (inames_domain.project_out_except( diff --git a/loopy/target/ispc.py b/loopy/target/ispc.py index 3fd2bc1ae6a439aeab1ff290e990143207f948d2..b7edc517bee38c3f7b6a6cdce5419863db5ffed8 100644 --- a/loopy/target/ispc.py +++ b/loopy/target/ispc.py @@ -418,33 +418,33 @@ class ISPCASTBuilder(CASTBuilder): new_terms = [] - from loopy.kernel.data import LocalIndexTag, check_iname_tags, get_iname_tags + from loopy.kernel.data import LocalIndexTag, get_iname_tags from loopy.symbolic import get_dependencies saw_l0 = False for term in terms: if (isinstance(term, Variable) - and check_iname_tags(kernel.iname_to_tags[term.name], LocalIndexTag)): - tags = get_iname_tags(kernel.iname_to_tags[term.name], LocalIndexTag) - if len(tags) > 1: - raise LoopyError("cannot have more than one LocalIndexTags") - tag, = tags + and get_iname_tags( + kernel.iname_to_tags[term.name], LocalIndexTag)): + tag, = get_iname_tags(kernel.iname_to_tags[term.name], + LocalIndexTag, 1) if tag.axis == 0: if saw_l0: - raise LoopyError("streaming store must have stride 1 " - "in local index, got: %s" % subscript) + raise LoopyError( + "streaming store must have stride 1 in " + "local index, got: %s" % subscript) saw_l0 = True continue else: for dep in get_dependencies(term): - if check_iname_tags(kernel.iname_to_tags[dep], LocalIndexTag): - tags = get_iname_tags(kernel.iname_to_tags[dep], LocalIndexTag) - if len(tags) > 1: - raise LoopyError("cannot have more than one LocalIndexTags") - tag, = tags + if get_iname_tags( + kernel.iname_to_tags[dep], LocalIndexTag): + tag, = get_iname_tags(kernel.iname_to_tags[dep], + LocalIndexTag, 1) if tag.axis == 0: - raise LoopyError("streaming store must have stride 1 " - "in local index, got: %s" % subscript) + raise LoopyError( + "streaming store must have stride 1 in " + "local index, got: %s" % subscript) new_terms.append(term) diff --git a/loopy/transform/iname.py b/loopy/transform/iname.py index 21c2f7eea5054db0ed393dd49c003ae8bcfa7ee6..365f2db773e63b69c75771cb45532c29416e03c4 100644 --- a/loopy/transform/iname.py +++ b/loopy/transform/iname.py @@ -33,7 +33,6 @@ from loopy.symbolic import ( RuleAwareIdentityMapper, RuleAwareSubstitutionMapper, SubstitutionRuleMappingContext) from loopy.diagnostic import LoopyError -from loopy.kernel.data import check_iname_tags, get_iname_tags __doc__ = """ @@ -177,9 +176,9 @@ def _split_iname_backend(kernel, split_iname, """ existing_tags = kernel.iname_to_tags[split_iname] - from loopy.kernel.data import ForceSequentialTag, check_iname_tags + from loopy.kernel.data import ForceSequentialTag, get_iname_tags if (do_tagged_check and existing_tags - and not check_iname_tags(existing_tags, ForceSequentialTag)): + and not get_iname_tags(existing_tags, ForceSequentialTag)): raise LoopyError("cannot split already tagged iname '%s'" % split_iname) if split_iname not in kernel.all_inames(): @@ -648,8 +647,8 @@ def tag_inames(kernel, iname_to_tag, force=False, ignore_nonexistent=False): iname_to_tag = [(iname, parse_tag(tag)) for iname, tag in iname_to_tag] - from loopy.kernel.data import (ConcurrentTag, AutoLocalIndexTagBase, - ForceSequentialTag) + from loopy.kernel.data import (ConcurrentTag, ForceSequentialTag, + get_iname_tags) # {{{ globbing @@ -680,27 +679,27 @@ def tag_inames(kernel, iname_to_tag, force=False, ignore_nonexistent=False): knl_iname_to_tags = kernel.iname_to_tags.copy() for iname, new_tag in six.iteritems(iname_to_tag): + if not new_tag: + continue + old_tags = kernel.iname_to_tags[iname] if iname not in kernel.all_inames(): raise ValueError("cannot tag '%s'--not known" % iname) - if isinstance(new_tag, ConcurrentTag) \ - and check_iname_tags(old_tags, ForceSequentialTag): + if (isinstance(new_tag, ConcurrentTag) + and get_iname_tags(old_tags, ForceSequentialTag)): raise ValueError("cannot tag '%s' as parallel--" "iname requires sequential execution" % iname) - if isinstance(new_tag, ForceSequentialTag) \ - and check_iname_tags(old_tags, ConcurrentTag): + if (isinstance(new_tag, ForceSequentialTag) + and get_iname_tags(old_tags, ConcurrentTag)): raise ValueError("'%s' is already tagged as parallel, " "but is now prohibited from being parallel " "(likely because of participation in a precompute or " "a reduction)" % iname) - if new_tag and all(tag.key != new_tag.key for tag in old_tags): - old_tags = old_tags + (new_tag,) - - knl_iname_to_tags[iname] = old_tags + knl_iname_to_tags[iname] = old_tags.union([new_tag]) return kernel.copy(iname_to_tags=knl_iname_to_tags) @@ -982,9 +981,9 @@ def get_iname_duplication_options(knl, use_boostable_into=False): # Get the duplication options as a tuple of iname and a set for iname, insns in _get_iname_duplication_options(insn_iname_sets): # Check whether this iname has a parallel tag and discard it if so - from loopy.kernel.data import ConcurrentTag, check_iname_tags + from loopy.kernel.data import ConcurrentTag, get_iname_tags if (iname in knl.iname_to_tags - and check_iname_tags(knl.iname_to_tags[iname], ConcurrentTag)): + and get_iname_tags(knl.iname_to_tags[iname], ConcurrentTag)): continue # If we find a duplication option and to not use boostable_into @@ -1501,7 +1500,7 @@ def find_unused_axis_tag(kernel, kind, insn_match=None): """ used_axes = set() - from loopy.kernel.data import GroupIndexTag, LocalIndexTag, check_iname_tags + from loopy.kernel.data import GroupIndexTag, LocalIndexTag, get_iname_tags if isinstance(kind, str): found = False @@ -1521,7 +1520,7 @@ def find_unused_axis_tag(kernel, kind, insn_match=None): for insn in insns: for iname in kernel.insn_inames(insn): dim_tags = kernel.iname_to_tags[iname] - if check_iname_tags(dim_tags, kind): + if get_iname_tags(dim_tags, kind): used_axes.add(kind.axis) i = 0 diff --git a/loopy/transform/privatize.py b/loopy/transform/privatize.py index d1c112eca6a44cdd0bbd2b2826c291cd52ba8a0a..4810784039899a471bbd23c4922438e75275db25 100644 --- a/loopy/transform/privatize.py +++ b/loopy/transform/privatize.py @@ -41,7 +41,6 @@ __doc__ = """ # {{{ privatize temporaries with iname from loopy.symbolic import IdentityMapper -from loopy.kernel.data import check_iname_tags, get_iname_tags class ExtraInameIndexInserter(IdentityMapper): @@ -85,6 +84,7 @@ def privatize_temporaries_with_inames( Example:: +<<<<<<< HEAD:loopy/transform/privatize.py <<<<<<< HEAD:loopy/transform/privatize.py for imatrix, i acc = 0 @@ -95,6 +95,9 @@ def privatize_temporaries_with_inames( ======= from loopy.kernel.data import IlpBaseTag, VectorizeTag, check_iname_tags >>>>>>> d4c1d2e... change tags from set to tuple:loopy/transform/ilp.py +======= + from loopy.kernel.data import IlpBaseTag, VectorizeTag, get_iname_tags +>>>>>>> 38a4424... change tags from tuple to set:loopy/transform/ilp.py might become:: @@ -192,7 +195,7 @@ def privatize_temporaries_with_inames( dim_tags = ["c"] * (len(shape) + len(extra_shape)) for i, iname in enumerate(inames): - if check_iname_tags(kernel.iname_to_tags[iname], VectorizeTag): + if get_iname_tags(kernel.iname_to_tags[iname], VectorizeTag): dim_tags[len(shape) + i] = "vec" new_temp_vars[tv.name] = tv.copy(shape=shape + extra_shape, diff --git a/loopy/transform/save.py b/loopy/transform/save.py index 962a83cd1098b48c7bcf21190609209dc79904b2..cd4c10272651128c4fe311c9e4af66798fa448df 100644 --- a/loopy/transform/save.py +++ b/loopy/transform/save.py @@ -403,26 +403,20 @@ class TemporarySaver(object): continue from loopy.kernel.data import (GroupIndexTag, LocalIndexTag, - ConcurrentTag, get_iname_tags, check_iname_tags) + ConcurrentTag, get_iname_tags) - if check_iname_tags(tags, GroupIndexTag): - tags = get_iname_tags(tags, GroupIndexTag) - if len(tags) > 1: - raise LoopyError("cannot have more than one GroupIndexTags") - tag, = tags + if get_iname_tags(tags, GroupIndexTag): + tag, = get_iname_tags(tags, GroupIndexTag, 1) my_group_tags.append(tag) - elif check_iname_tags(tags, LocalIndexTag): - tags = get_iname_tags(tags, LocalIndexTag) - if len(tags) > 1: - raise LoopyError("cannot have more than one LocalIndexTags") - tag, = tags + elif get_iname_tags(tags, LocalIndexTag): + tag, = get_iname_tags(tags, LocalIndexTag, 1) my_local_tags.append(tag) - elif check_iname_tags(tags, ConcurrentTag): + elif get_iname_tags(tags, ConcurrentTag): raise LoopyError( "iname '%s' is tagged with '%s' - only " "group and local tags are supported for " "auto save/reload of temporaries" % - (iname, ", ".join(str(tag) for tag in tags))) + (iname, tags)) if group_tags is None: group_tags = _sortedtags(my_group_tags)