From 26a71d802711eaeb3c19c25e71f08a7b18fc4ea6 Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Tue, 17 Oct 2017 09:41:37 -0500 Subject: [PATCH] Rename Parallel->ConcurrentTag, introduce InOrderSequentialTag --- loopy/check.py | 16 ++++++++-------- loopy/codegen/bounds.py | 4 ++-- loopy/codegen/control.py | 9 +++++---- loopy/codegen/loop.py | 4 ++-- loopy/kernel/__init__.py | 6 +++--- loopy/kernel/data.py | 22 +++++++++++++++++----- loopy/preprocess.py | 4 ++-- loopy/schedule/__init__.py | 14 +++++++------- loopy/transform/iname.py | 10 +++++----- loopy/transform/save.py | 4 ++-- 10 files changed, 53 insertions(+), 40 deletions(-) diff --git a/loopy/check.py b/loopy/check.py index 4b2af1b13..11e045d86 100644 --- a/loopy/check.py +++ b/loopy/check.py @@ -144,20 +144,20 @@ def check_for_inactive_iname_access(kernel): def _is_racing_iname_tag(tv, tag): from loopy.kernel.data import (temp_var_scope, - LocalIndexTagBase, GroupIndexTag, ParallelTag, auto) + LocalIndexTagBase, GroupIndexTag, ConcurrentTag, auto) if tv.scope == temp_var_scope.PRIVATE: return ( - isinstance(tag, ParallelTag) + isinstance(tag, ConcurrentTag) and not isinstance(tag, (LocalIndexTagBase, GroupIndexTag))) elif tv.scope == temp_var_scope.LOCAL: return ( - isinstance(tag, ParallelTag) + isinstance(tag, ConcurrentTag) and not isinstance(tag, GroupIndexTag)) elif tv.scope == temp_var_scope.GLOBAL: - return isinstance(tag, ParallelTag) + return isinstance(tag, ConcurrentTag) elif tv.scope == auto: raise LoopyError("scope of temp var '%s' has not yet been" @@ -169,7 +169,7 @@ def _is_racing_iname_tag(tv, tag): def check_for_write_races(kernel): - from loopy.kernel.data import ParallelTag + from loopy.kernel.data import ConcurrentTag iname_to_tag = kernel.iname_to_tag.get for insn in kernel.instructions: @@ -190,7 +190,7 @@ def check_for_write_races(kernel): raceable_parallel_insn_inames = set( iname for iname in kernel.insn_inames(insn) - if isinstance(iname_to_tag(iname), ParallelTag)) + if isinstance(iname_to_tag(iname), ConcurrentTag)) elif assignee_name in kernel.temporary_variables: temp_var = kernel.temporary_variables[assignee_name] @@ -230,13 +230,13 @@ def check_for_orphaned_user_hardware_axes(kernel): def check_for_data_dependent_parallel_bounds(kernel): - from loopy.kernel.data import ParallelTag + from loopy.kernel.data import ConcurrentTag for i, dom in enumerate(kernel.domains): dom_inames = set(dom.get_var_names(dim_type.set)) par_inames = set(iname for iname in dom_inames - if isinstance(kernel.iname_to_tag.get(iname), ParallelTag)) + if isinstance(kernel.iname_to_tag.get(iname), ConcurrentTag)) if not par_inames: continue diff --git a/loopy/codegen/bounds.py b/loopy/codegen/bounds.py index 61f4b3a9b..f398a063d 100644 --- a/loopy/codegen/bounds.py +++ b/loopy/codegen/bounds.py @@ -58,7 +58,7 @@ def get_approximate_convex_bounds_checks(domain, check_inames, implemented_domai def get_usable_inames_for_conditional(kernel, sched_index): from loopy.schedule import ( find_active_inames_at, get_insn_ids_for_block_at, has_barrier_within) - from loopy.kernel.data import ParallelTag, LocalIndexTagBase, IlpBaseTag + from loopy.kernel.data import ConcurrentTag, LocalIndexTagBase, IlpBaseTag result = find_active_inames_at(kernel, sched_index) crosses_barrier = has_barrier_within(kernel, sched_index) @@ -97,7 +97,7 @@ def get_usable_inames_for_conditional(kernel, sched_index): # at the innermost level of nesting. if ( - isinstance(tag, ParallelTag) + isinstance(tag, ConcurrentTag) and not (isinstance(tag, LocalIndexTagBase) and crosses_barrier) and not isinstance(tag, IlpBaseTag) ): diff --git a/loopy/codegen/control.py b/loopy/codegen/control.py index e18a51c4d..524004233 100644 --- a/loopy/codegen/control.py +++ b/loopy/codegen/control.py @@ -40,7 +40,7 @@ def get_admissible_conditional_inames_for(codegen_state, sched_index): kernel = codegen_state.kernel - from loopy.kernel.data import LocalIndexTag, HardwareParallelTag + from loopy.kernel.data import LocalIndexTag, HardwareConcurrentTag from loopy.schedule import find_active_inames_at, has_barrier_within result = find_active_inames_at(kernel, sched_index) @@ -48,7 +48,7 @@ def get_admissible_conditional_inames_for(codegen_state, sched_index): has_barrier = has_barrier_within(kernel, sched_index) for iname, tag in six.iteritems(kernel.iname_to_tag): - if (isinstance(tag, HardwareParallelTag) + if (isinstance(tag, HardwareConcurrentTag) and codegen_state.is_generating_device_code): if not has_barrier or not isinstance(tag, LocalIndexTag): result.add(iname) @@ -135,12 +135,13 @@ def generate_code_for_sched_index(codegen_state, sched_index): generate_sequential_loop_dim_code) from loopy.kernel.data import (UnrolledIlpTag, UnrollTag, ForceSequentialTag, - LoopedIlpTag, VectorizeTag) + LoopedIlpTag, VectorizeTag, InOrderSequentialSequentialTag) if isinstance(tag, (UnrollTag, UnrolledIlpTag)): func = generate_unroll_loop elif isinstance(tag, VectorizeTag): func = generate_vectorize_loop - elif tag is None or isinstance(tag, (LoopedIlpTag, ForceSequentialTag)): + elif tag is None or isinstance(tag, ( + LoopedIlpTag, ForceSequentialTag, InOrderSequentialSequentialTag)): func = generate_sequential_loop_dim_code else: raise RuntimeError("encountered (invalid) EnterLoop " diff --git a/loopy/codegen/loop.py b/loopy/codegen/loop.py index 1a1320497..1db7b0445 100644 --- a/loopy/codegen/loop.py +++ b/loopy/codegen/loop.py @@ -231,7 +231,7 @@ def set_up_hw_parallel_loops(codegen_state, schedule_index, next_func, kernel = codegen_state.kernel from loopy.kernel.data import ( - UniqueTag, HardwareParallelTag, LocalIndexTag, GroupIndexTag) + UniqueTag, HardwareConcurrentTag, LocalIndexTag, GroupIndexTag) from loopy.schedule import get_insn_ids_for_block_at insn_ids_for_block = get_insn_ids_for_block_at(kernel.schedule, schedule_index) @@ -243,7 +243,7 @@ def set_up_hw_parallel_loops(codegen_state, schedule_index, next_func, hw_inames_left = [iname for iname in all_inames_by_insns - if isinstance(kernel.iname_to_tag.get(iname), HardwareParallelTag)] + if isinstance(kernel.iname_to_tag.get(iname), HardwareConcurrentTag)] if not hw_inames_left: return next_func(codegen_state) diff --git a/loopy/kernel/__init__.py b/loopy/kernel/__init__.py index 4e2819a82..642c82c4b 100644 --- a/loopy/kernel/__init__.py +++ b/loopy/kernel/__init__.py @@ -701,12 +701,12 @@ class LoopKernel(ImmutableRecordWithoutPickling): tag_key_uses = {} - from loopy.kernel.data import HardwareParallelTag + from loopy.kernel.data import HardwareConcurrentTag for iname in cond_inames: tag = self.iname_to_tag.get(iname) - if isinstance(tag, HardwareParallelTag): + if isinstance(tag, HardwareConcurrentTag): tag_key_uses.setdefault(tag.key, []).append(iname) multi_use_keys = set( @@ -716,7 +716,7 @@ class LoopKernel(ImmutableRecordWithoutPickling): multi_use_inames = set() for iname in cond_inames: tag = self.iname_to_tag.get(iname) - if isinstance(tag, HardwareParallelTag) and tag.key in multi_use_keys: + if isinstance(tag, HardwareConcurrentTag) and tag.key in multi_use_keys: multi_use_inames.add(iname) return frozenset(cond_inames - multi_use_inames) diff --git a/loopy/kernel/data.py b/loopy/kernel/data.py index e1941122d..44cbdea49 100644 --- a/loopy/kernel/data.py +++ b/loopy/kernel/data.py @@ -77,14 +77,19 @@ class IndexTag(ImmutableRecord): return type(self).__name__ -class ParallelTag(IndexTag): +class ConcurrentTag(IndexTag): pass -class HardwareParallelTag(ParallelTag): +class HardwareConcurrentTag(ConcurrentTag): pass +# deprecated aliases +ParallelTag = ConcurrentTag +HardwareParallelTag = HardwareConcurrentTag + + class UniqueTag(IndexTag): pass @@ -105,11 +110,11 @@ class AxisTag(UniqueTag): self.print_name, self.axis) -class GroupIndexTag(HardwareParallelTag, AxisTag): +class GroupIndexTag(HardwareConcurrentTag, AxisTag): print_name = "g" -class LocalIndexTagBase(HardwareParallelTag): +class LocalIndexTagBase(HardwareConcurrentTag): pass @@ -130,7 +135,7 @@ class AutoFitLocalIndexTag(AutoLocalIndexTagBase): # {{{ ilp-like -class IlpBaseTag(ParallelTag): +class IlpBaseTag(ConcurrentTag): pass @@ -161,6 +166,11 @@ class ForceSequentialTag(IndexTag): return "forceseq" +class InOrderSequentialSequentialTag(IndexTag): + def __str__(self): + return "ord" + + def parse_tag(tag): if tag is None: return tag @@ -173,6 +183,8 @@ def parse_tag(tag): if tag == "for": return None + elif tag == "ord": + return InOrderSequentialSequentialTag() elif tag in ["unr"]: return UnrollTag() elif tag in ["vec"]: diff --git a/loopy/preprocess.py b/loopy/preprocess.py index 02209067f..ae70a0d6c 100644 --- a/loopy/preprocess.py +++ b/loopy/preprocess.py @@ -292,7 +292,7 @@ def _classify_reduction_inames(kernel, inames): from loopy.kernel.data import ( LocalIndexTagBase, UnrolledIlpTag, UnrollTag, VectorizeTag, - ParallelTag) + ConcurrentTag) for iname in inames: iname_tag = kernel.iname_to_tag.get(iname) @@ -305,7 +305,7 @@ def _classify_reduction_inames(kernel, inames): elif isinstance(iname_tag, LocalIndexTagBase): local_par.append(iname) - elif isinstance(iname_tag, (ParallelTag, VectorizeTag)): + elif isinstance(iname_tag, (ConcurrentTag, VectorizeTag)): nonlocal_par.append(iname) else: diff --git a/loopy/schedule/__init__.py b/loopy/schedule/__init__.py index 3d47a5ec5..7cd07cd84 100644 --- a/loopy/schedule/__init__.py +++ b/loopy/schedule/__init__.py @@ -206,13 +206,13 @@ def find_loop_nest_with_map(kernel): """ result = {} - from loopy.kernel.data import ParallelTag, IlpBaseTag, VectorizeTag + from loopy.kernel.data import ConcurrentTag, IlpBaseTag, VectorizeTag all_nonpar_inames = set([ iname for iname in kernel.all_inames() if not isinstance(kernel.iname_to_tag.get(iname), - (ParallelTag, IlpBaseTag, VectorizeTag))]) + (ConcurrentTag, IlpBaseTag, VectorizeTag))]) iname_to_insns = kernel.iname_to_insns() @@ -274,10 +274,10 @@ def find_loop_insn_dep_map(kernel, loop_nest_with_map, loop_nest_around_map): result = {} - from loopy.kernel.data import ParallelTag, IlpBaseTag, VectorizeTag + from loopy.kernel.data import ConcurrentTag, IlpBaseTag, VectorizeTag for insn in kernel.instructions: for iname in kernel.insn_inames(insn): - if isinstance(kernel.iname_to_tag.get(iname), ParallelTag): + if isinstance(kernel.iname_to_tag.get(iname), ConcurrentTag): continue iname_dep = result.setdefault(iname, set()) @@ -308,7 +308,7 @@ def find_loop_insn_dep_map(kernel, loop_nest_with_map, loop_nest_around_map): continue tag = kernel.iname_to_tag.get(dep_insn_iname) - if isinstance(tag, (ParallelTag, IlpBaseTag, VectorizeTag)): + if isinstance(tag, (ConcurrentTag, IlpBaseTag, VectorizeTag)): # Parallel tags don't really nest, so we'll disregard # them here. continue @@ -1787,7 +1787,7 @@ def generate_loop_schedules_inner(kernel, debug_args={}): for item in preschedule for insn_id in sched_item_to_insn_id(item)) - from loopy.kernel.data import IlpBaseTag, ParallelTag, VectorizeTag + from loopy.kernel.data import IlpBaseTag, ConcurrentTag, VectorizeTag ilp_inames = set( iname for iname in kernel.all_inames() @@ -1798,7 +1798,7 @@ def generate_loop_schedules_inner(kernel, debug_args={}): if isinstance(kernel.iname_to_tag.get(iname), VectorizeTag)) parallel_inames = set( iname for iname in kernel.all_inames() - if isinstance(kernel.iname_to_tag.get(iname), ParallelTag)) + if isinstance(kernel.iname_to_tag.get(iname), ConcurrentTag)) loop_nest_with_map = find_loop_nest_with_map(kernel) loop_nest_around_map = find_loop_nest_around_map(kernel) diff --git a/loopy/transform/iname.py b/loopy/transform/iname.py index ea90abfe2..22fd7b3bb 100644 --- a/loopy/transform/iname.py +++ b/loopy/transform/iname.py @@ -641,7 +641,7 @@ def tag_inames(kernel, iname_to_tag, force=False, ignore_nonexistent=False): iname_to_tag = [(iname, parse_tag(tag)) for iname, tag in iname_to_tag] - from loopy.kernel.data import (ParallelTag, AutoLocalIndexTagBase, + from loopy.kernel.data import (ConcurrentTag, AutoLocalIndexTagBase, ForceSequentialTag) # {{{ globbing @@ -686,13 +686,13 @@ def tag_inames(kernel, iname_to_tag, force=False, ignore_nonexistent=False): if iname not in kernel.all_inames(): raise ValueError("cannot tag '%s'--not known" % iname) - if isinstance(new_tag, ParallelTag) \ + if isinstance(new_tag, ConcurrentTag) \ and isinstance(old_tag, ForceSequentialTag): raise ValueError("cannot tag '%s' as parallel--" "iname requires sequential execution" % iname) if isinstance(new_tag, ForceSequentialTag) \ - and isinstance(old_tag, ParallelTag): + and isinstance(old_tag, ConcurrentTag): raise ValueError("'%s' is already tagged as parallel, " "but is now prohibited from being parallel " "(likely because of participation in a precompute or " @@ -972,9 +972,9 @@ def get_iname_duplication_options(knl, use_boostable_into=False): # Get the duplication options as a tuple of iname and a set for iname, insns in _get_iname_duplication_options(insn_deps): # Check whether this iname has a parallel tag and discard it if so - from loopy.kernel.data import ParallelTag + from loopy.kernel.data import ConcurrentTag if (iname in knl.iname_to_tag - and isinstance(knl.iname_to_tag[iname], ParallelTag)): + and isinstance(knl.iname_to_tag[iname], ConcurrentTag)): continue # If we find a duplication option and fo not use boostable_into diff --git a/loopy/transform/save.py b/loopy/transform/save.py index 3d4f5c2d4..2ba2338b0 100644 --- a/loopy/transform/save.py +++ b/loopy/transform/save.py @@ -402,13 +402,13 @@ class TemporarySaver(object): continue from loopy.kernel.data import ( - GroupIndexTag, LocalIndexTag, ParallelTag) + GroupIndexTag, LocalIndexTag, ConcurrentTag) if isinstance(tag, GroupIndexTag): my_group_tags.append(tag) elif isinstance(tag, LocalIndexTag): my_local_tags.append(tag) - elif isinstance(tag, ParallelTag): + elif isinstance(tag, ConcurrentTag): raise LoopyError( "iname '%s' is tagged with '%s' - only " "group and local tags are supported for " -- GitLab