diff --git a/loopy/check.py b/loopy/check.py index 4b2af1b13a00a1bf216528cbe98ea01dafbeb2b8..11e045d867c378de8aefe26d8c677fd3f158f583 100644 --- a/loopy/check.py +++ b/loopy/check.py @@ -144,20 +144,20 @@ def check_for_inactive_iname_access(kernel): def _is_racing_iname_tag(tv, tag): from loopy.kernel.data import (temp_var_scope, - LocalIndexTagBase, GroupIndexTag, ParallelTag, auto) + LocalIndexTagBase, GroupIndexTag, ConcurrentTag, auto) if tv.scope == temp_var_scope.PRIVATE: return ( - isinstance(tag, ParallelTag) + isinstance(tag, ConcurrentTag) and not isinstance(tag, (LocalIndexTagBase, GroupIndexTag))) elif tv.scope == temp_var_scope.LOCAL: return ( - isinstance(tag, ParallelTag) + isinstance(tag, ConcurrentTag) and not isinstance(tag, GroupIndexTag)) elif tv.scope == temp_var_scope.GLOBAL: - return isinstance(tag, ParallelTag) + return isinstance(tag, ConcurrentTag) elif tv.scope == auto: raise LoopyError("scope of temp var '%s' has not yet been" @@ -169,7 +169,7 @@ def _is_racing_iname_tag(tv, tag): def check_for_write_races(kernel): - from loopy.kernel.data import ParallelTag + from loopy.kernel.data import ConcurrentTag iname_to_tag = kernel.iname_to_tag.get for insn in kernel.instructions: @@ -190,7 +190,7 @@ def check_for_write_races(kernel): raceable_parallel_insn_inames = set( iname for iname in kernel.insn_inames(insn) - if isinstance(iname_to_tag(iname), ParallelTag)) + if isinstance(iname_to_tag(iname), ConcurrentTag)) elif assignee_name in kernel.temporary_variables: temp_var = kernel.temporary_variables[assignee_name] @@ -230,13 +230,13 @@ def check_for_orphaned_user_hardware_axes(kernel): def check_for_data_dependent_parallel_bounds(kernel): - from loopy.kernel.data import ParallelTag + from loopy.kernel.data import ConcurrentTag for i, dom in enumerate(kernel.domains): dom_inames = set(dom.get_var_names(dim_type.set)) par_inames = set(iname for iname in dom_inames - if isinstance(kernel.iname_to_tag.get(iname), ParallelTag)) + if isinstance(kernel.iname_to_tag.get(iname), ConcurrentTag)) if not par_inames: continue diff --git a/loopy/codegen/bounds.py b/loopy/codegen/bounds.py index 61f4b3a9b8c38dfc25ebc81243812aa963423f8a..f398a063dc41f3f82267f6d4850158e4c45f4733 100644 --- a/loopy/codegen/bounds.py +++ b/loopy/codegen/bounds.py @@ -58,7 +58,7 @@ def get_approximate_convex_bounds_checks(domain, check_inames, implemented_domai def get_usable_inames_for_conditional(kernel, sched_index): from loopy.schedule import ( find_active_inames_at, get_insn_ids_for_block_at, has_barrier_within) - from loopy.kernel.data import ParallelTag, LocalIndexTagBase, IlpBaseTag + from loopy.kernel.data import ConcurrentTag, LocalIndexTagBase, IlpBaseTag result = find_active_inames_at(kernel, sched_index) crosses_barrier = has_barrier_within(kernel, sched_index) @@ -97,7 +97,7 @@ def get_usable_inames_for_conditional(kernel, sched_index): # at the innermost level of nesting. if ( - isinstance(tag, ParallelTag) + isinstance(tag, ConcurrentTag) and not (isinstance(tag, LocalIndexTagBase) and crosses_barrier) and not isinstance(tag, IlpBaseTag) ): diff --git a/loopy/codegen/control.py b/loopy/codegen/control.py index e18a51c4d499570c034f3ef8682d48647485b511..5240042337163f0aefcbc7fdb8f3151ac280053f 100644 --- a/loopy/codegen/control.py +++ b/loopy/codegen/control.py @@ -40,7 +40,7 @@ def get_admissible_conditional_inames_for(codegen_state, sched_index): kernel = codegen_state.kernel - from loopy.kernel.data import LocalIndexTag, HardwareParallelTag + from loopy.kernel.data import LocalIndexTag, HardwareConcurrentTag from loopy.schedule import find_active_inames_at, has_barrier_within result = find_active_inames_at(kernel, sched_index) @@ -48,7 +48,7 @@ def get_admissible_conditional_inames_for(codegen_state, sched_index): has_barrier = has_barrier_within(kernel, sched_index) for iname, tag in six.iteritems(kernel.iname_to_tag): - if (isinstance(tag, HardwareParallelTag) + if (isinstance(tag, HardwareConcurrentTag) and codegen_state.is_generating_device_code): if not has_barrier or not isinstance(tag, LocalIndexTag): result.add(iname) @@ -135,12 +135,13 @@ def generate_code_for_sched_index(codegen_state, sched_index): generate_sequential_loop_dim_code) from loopy.kernel.data import (UnrolledIlpTag, UnrollTag, ForceSequentialTag, - LoopedIlpTag, VectorizeTag) + LoopedIlpTag, VectorizeTag, InOrderSequentialSequentialTag) if isinstance(tag, (UnrollTag, UnrolledIlpTag)): func = generate_unroll_loop elif isinstance(tag, VectorizeTag): func = generate_vectorize_loop - elif tag is None or isinstance(tag, (LoopedIlpTag, ForceSequentialTag)): + elif tag is None or isinstance(tag, ( + LoopedIlpTag, ForceSequentialTag, InOrderSequentialSequentialTag)): func = generate_sequential_loop_dim_code else: raise RuntimeError("encountered (invalid) EnterLoop " diff --git a/loopy/codegen/loop.py b/loopy/codegen/loop.py index 1a132049731cd094ba5665857f1afa4f9b04684a..1db7b0445efd2a2e27e761164fa919647df37a07 100644 --- a/loopy/codegen/loop.py +++ b/loopy/codegen/loop.py @@ -231,7 +231,7 @@ def set_up_hw_parallel_loops(codegen_state, schedule_index, next_func, kernel = codegen_state.kernel from loopy.kernel.data import ( - UniqueTag, HardwareParallelTag, LocalIndexTag, GroupIndexTag) + UniqueTag, HardwareConcurrentTag, LocalIndexTag, GroupIndexTag) from loopy.schedule import get_insn_ids_for_block_at insn_ids_for_block = get_insn_ids_for_block_at(kernel.schedule, schedule_index) @@ -243,7 +243,7 @@ def set_up_hw_parallel_loops(codegen_state, schedule_index, next_func, hw_inames_left = [iname for iname in all_inames_by_insns - if isinstance(kernel.iname_to_tag.get(iname), HardwareParallelTag)] + if isinstance(kernel.iname_to_tag.get(iname), HardwareConcurrentTag)] if not hw_inames_left: return next_func(codegen_state) diff --git a/loopy/kernel/__init__.py b/loopy/kernel/__init__.py index 4e2819a82162b26a8c53dc25b434990473ed2d2c..642c82c4b02c5b07fb6275a18ed49a8b8ee39104 100644 --- a/loopy/kernel/__init__.py +++ b/loopy/kernel/__init__.py @@ -701,12 +701,12 @@ class LoopKernel(ImmutableRecordWithoutPickling): tag_key_uses = {} - from loopy.kernel.data import HardwareParallelTag + from loopy.kernel.data import HardwareConcurrentTag for iname in cond_inames: tag = self.iname_to_tag.get(iname) - if isinstance(tag, HardwareParallelTag): + if isinstance(tag, HardwareConcurrentTag): tag_key_uses.setdefault(tag.key, []).append(iname) multi_use_keys = set( @@ -716,7 +716,7 @@ class LoopKernel(ImmutableRecordWithoutPickling): multi_use_inames = set() for iname in cond_inames: tag = self.iname_to_tag.get(iname) - if isinstance(tag, HardwareParallelTag) and tag.key in multi_use_keys: + if isinstance(tag, HardwareConcurrentTag) and tag.key in multi_use_keys: multi_use_inames.add(iname) return frozenset(cond_inames - multi_use_inames) diff --git a/loopy/kernel/data.py b/loopy/kernel/data.py index e1941122d12577ff6cdeae5185ac40241a080d96..44cbdea49456904bb61cd93bbe5febbb35bee074 100644 --- a/loopy/kernel/data.py +++ b/loopy/kernel/data.py @@ -77,14 +77,19 @@ class IndexTag(ImmutableRecord): return type(self).__name__ -class ParallelTag(IndexTag): +class ConcurrentTag(IndexTag): pass -class HardwareParallelTag(ParallelTag): +class HardwareConcurrentTag(ConcurrentTag): pass +# deprecated aliases +ParallelTag = ConcurrentTag +HardwareParallelTag = HardwareConcurrentTag + + class UniqueTag(IndexTag): pass @@ -105,11 +110,11 @@ class AxisTag(UniqueTag): self.print_name, self.axis) -class GroupIndexTag(HardwareParallelTag, AxisTag): +class GroupIndexTag(HardwareConcurrentTag, AxisTag): print_name = "g" -class LocalIndexTagBase(HardwareParallelTag): +class LocalIndexTagBase(HardwareConcurrentTag): pass @@ -130,7 +135,7 @@ class AutoFitLocalIndexTag(AutoLocalIndexTagBase): # {{{ ilp-like -class IlpBaseTag(ParallelTag): +class IlpBaseTag(ConcurrentTag): pass @@ -161,6 +166,11 @@ class ForceSequentialTag(IndexTag): return "forceseq" +class InOrderSequentialSequentialTag(IndexTag): + def __str__(self): + return "ord" + + def parse_tag(tag): if tag is None: return tag @@ -173,6 +183,8 @@ def parse_tag(tag): if tag == "for": return None + elif tag == "ord": + return InOrderSequentialSequentialTag() elif tag in ["unr"]: return UnrollTag() elif tag in ["vec"]: diff --git a/loopy/preprocess.py b/loopy/preprocess.py index 02209067f137e0bef8dcd18382ac96b0e743620b..ae70a0d6c07e6b922871c6293162321ea335f80a 100644 --- a/loopy/preprocess.py +++ b/loopy/preprocess.py @@ -292,7 +292,7 @@ def _classify_reduction_inames(kernel, inames): from loopy.kernel.data import ( LocalIndexTagBase, UnrolledIlpTag, UnrollTag, VectorizeTag, - ParallelTag) + ConcurrentTag) for iname in inames: iname_tag = kernel.iname_to_tag.get(iname) @@ -305,7 +305,7 @@ def _classify_reduction_inames(kernel, inames): elif isinstance(iname_tag, LocalIndexTagBase): local_par.append(iname) - elif isinstance(iname_tag, (ParallelTag, VectorizeTag)): + elif isinstance(iname_tag, (ConcurrentTag, VectorizeTag)): nonlocal_par.append(iname) else: diff --git a/loopy/schedule/__init__.py b/loopy/schedule/__init__.py index 3d47a5ec5ff33f7d8184acd8fde92aeaf1855378..7cd07cd84cc32f77e7dfe33cc15d97babedd0714 100644 --- a/loopy/schedule/__init__.py +++ b/loopy/schedule/__init__.py @@ -206,13 +206,13 @@ def find_loop_nest_with_map(kernel): """ result = {} - from loopy.kernel.data import ParallelTag, IlpBaseTag, VectorizeTag + from loopy.kernel.data import ConcurrentTag, IlpBaseTag, VectorizeTag all_nonpar_inames = set([ iname for iname in kernel.all_inames() if not isinstance(kernel.iname_to_tag.get(iname), - (ParallelTag, IlpBaseTag, VectorizeTag))]) + (ConcurrentTag, IlpBaseTag, VectorizeTag))]) iname_to_insns = kernel.iname_to_insns() @@ -274,10 +274,10 @@ def find_loop_insn_dep_map(kernel, loop_nest_with_map, loop_nest_around_map): result = {} - from loopy.kernel.data import ParallelTag, IlpBaseTag, VectorizeTag + from loopy.kernel.data import ConcurrentTag, IlpBaseTag, VectorizeTag for insn in kernel.instructions: for iname in kernel.insn_inames(insn): - if isinstance(kernel.iname_to_tag.get(iname), ParallelTag): + if isinstance(kernel.iname_to_tag.get(iname), ConcurrentTag): continue iname_dep = result.setdefault(iname, set()) @@ -308,7 +308,7 @@ def find_loop_insn_dep_map(kernel, loop_nest_with_map, loop_nest_around_map): continue tag = kernel.iname_to_tag.get(dep_insn_iname) - if isinstance(tag, (ParallelTag, IlpBaseTag, VectorizeTag)): + if isinstance(tag, (ConcurrentTag, IlpBaseTag, VectorizeTag)): # Parallel tags don't really nest, so we'll disregard # them here. continue @@ -1787,7 +1787,7 @@ def generate_loop_schedules_inner(kernel, debug_args={}): for item in preschedule for insn_id in sched_item_to_insn_id(item)) - from loopy.kernel.data import IlpBaseTag, ParallelTag, VectorizeTag + from loopy.kernel.data import IlpBaseTag, ConcurrentTag, VectorizeTag ilp_inames = set( iname for iname in kernel.all_inames() @@ -1798,7 +1798,7 @@ def generate_loop_schedules_inner(kernel, debug_args={}): if isinstance(kernel.iname_to_tag.get(iname), VectorizeTag)) parallel_inames = set( iname for iname in kernel.all_inames() - if isinstance(kernel.iname_to_tag.get(iname), ParallelTag)) + if isinstance(kernel.iname_to_tag.get(iname), ConcurrentTag)) loop_nest_with_map = find_loop_nest_with_map(kernel) loop_nest_around_map = find_loop_nest_around_map(kernel) diff --git a/loopy/transform/iname.py b/loopy/transform/iname.py index ea90abfe27c8de69daf39021b3d0ea5463a2e4c8..22fd7b3bb2c643bc3c1309f4e3fdb89438ae7d2b 100644 --- a/loopy/transform/iname.py +++ b/loopy/transform/iname.py @@ -641,7 +641,7 @@ def tag_inames(kernel, iname_to_tag, force=False, ignore_nonexistent=False): iname_to_tag = [(iname, parse_tag(tag)) for iname, tag in iname_to_tag] - from loopy.kernel.data import (ParallelTag, AutoLocalIndexTagBase, + from loopy.kernel.data import (ConcurrentTag, AutoLocalIndexTagBase, ForceSequentialTag) # {{{ globbing @@ -686,13 +686,13 @@ def tag_inames(kernel, iname_to_tag, force=False, ignore_nonexistent=False): if iname not in kernel.all_inames(): raise ValueError("cannot tag '%s'--not known" % iname) - if isinstance(new_tag, ParallelTag) \ + if isinstance(new_tag, ConcurrentTag) \ and isinstance(old_tag, ForceSequentialTag): raise ValueError("cannot tag '%s' as parallel--" "iname requires sequential execution" % iname) if isinstance(new_tag, ForceSequentialTag) \ - and isinstance(old_tag, ParallelTag): + and isinstance(old_tag, ConcurrentTag): raise ValueError("'%s' is already tagged as parallel, " "but is now prohibited from being parallel " "(likely because of participation in a precompute or " @@ -972,9 +972,9 @@ def get_iname_duplication_options(knl, use_boostable_into=False): # Get the duplication options as a tuple of iname and a set for iname, insns in _get_iname_duplication_options(insn_deps): # Check whether this iname has a parallel tag and discard it if so - from loopy.kernel.data import ParallelTag + from loopy.kernel.data import ConcurrentTag if (iname in knl.iname_to_tag - and isinstance(knl.iname_to_tag[iname], ParallelTag)): + and isinstance(knl.iname_to_tag[iname], ConcurrentTag)): continue # If we find a duplication option and fo not use boostable_into diff --git a/loopy/transform/save.py b/loopy/transform/save.py index 3d4f5c2d4765aa7cbf1e56c76d127bf8f4d61a06..2ba2338b0af541274cc0362c9f71cec9c2887ffc 100644 --- a/loopy/transform/save.py +++ b/loopy/transform/save.py @@ -402,13 +402,13 @@ class TemporarySaver(object): continue from loopy.kernel.data import ( - GroupIndexTag, LocalIndexTag, ParallelTag) + GroupIndexTag, LocalIndexTag, ConcurrentTag) if isinstance(tag, GroupIndexTag): my_group_tags.append(tag) elif isinstance(tag, LocalIndexTag): my_local_tags.append(tag) - elif isinstance(tag, ParallelTag): + elif isinstance(tag, ConcurrentTag): raise LoopyError( "iname '%s' is tagged with '%s' - only " "group and local tags are supported for "