From 28ff86fe8f87c0a8d7e458f0a4141ef62a0eaca1 Mon Sep 17 00:00:00 2001 From: jdsteve2 Date: Fri, 24 Jan 2020 14:33:43 -0600 Subject: [PATCH 01/13] make HardwareConcurrentTag base class of VectorizeTag --- loopy/kernel/data.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/loopy/kernel/data.py b/loopy/kernel/data.py index 975d7b3ef..9e6e8db66 100644 --- a/loopy/kernel/data.py +++ b/loopy/kernel/data.py @@ -186,7 +186,7 @@ class LoopedIlpTag(IlpBaseTag): # }}} -class VectorizeTag(UniqueTag): +class VectorizeTag(UniqueTag, HardwareConcurrentTag): def __str__(self): return "vec" -- GitLab From 6402d65f8369b70411d8889373c6291fc8c1e4db Mon Sep 17 00:00:00 2001 From: jdsteve2 Date: Fri, 24 Jan 2020 14:51:22 -0600 Subject: [PATCH 02/13] now that VectorizeTag is subclass of ConcurrentTag, remove redundant checks for VectorizeTag when already checking for ConcurrentTag --- loopy/preprocess.py | 4 ++-- loopy/schedule/__init__.py | 9 +++++---- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/loopy/preprocess.py b/loopy/preprocess.py index c0eb91ea6..23c4b7fbd 100644 --- a/loopy/preprocess.py +++ b/loopy/preprocess.py @@ -289,7 +289,7 @@ def _classify_reduction_inames(kernel, inames): nonlocal_par = [] from loopy.kernel.data import ( - LocalIndexTagBase, UnrolledIlpTag, UnrollTag, VectorizeTag, + LocalIndexTagBase, UnrolledIlpTag, UnrollTag, ConcurrentTag, filter_iname_tags_by_type) for iname in inames: @@ -303,7 +303,7 @@ def _classify_reduction_inames(kernel, inames): elif filter_iname_tags_by_type(iname_tags, LocalIndexTagBase): local_par.append(iname) - elif filter_iname_tags_by_type(iname_tags, (ConcurrentTag, VectorizeTag)): + elif filter_iname_tags_by_type(iname_tags, ConcurrentTag): nonlocal_par.append(iname) else: diff --git a/loopy/schedule/__init__.py b/loopy/schedule/__init__.py index fb0d0e2c1..e6f8d1ff3 100644 --- a/loopy/schedule/__init__.py +++ b/loopy/schedule/__init__.py @@ -212,12 +212,12 @@ def find_loop_nest_with_map(kernel): """ result = {} - from loopy.kernel.data import ConcurrentTag, IlpBaseTag, VectorizeTag + from loopy.kernel.data import ConcurrentTag, IlpBaseTag all_nonpar_inames = set( iname for iname in kernel.all_inames() if not kernel.iname_tags_of_type(iname, - (ConcurrentTag, IlpBaseTag, VectorizeTag))) + (ConcurrentTag, IlpBaseTag))) iname_to_insns = kernel.iname_to_insns() @@ -276,7 +276,7 @@ def find_loop_insn_dep_map(kernel, loop_nest_with_map, loop_nest_around_map): result = {} - from loopy.kernel.data import ConcurrentTag, IlpBaseTag, VectorizeTag + from loopy.kernel.data import ConcurrentTag, IlpBaseTag for insn in kernel.instructions: for iname in kernel.insn_inames(insn): if kernel.iname_tags_of_type(iname, ConcurrentTag): @@ -310,7 +310,7 @@ def find_loop_insn_dep_map(kernel, loop_nest_with_map, loop_nest_around_map): continue if kernel.iname_tags_of_type(dep_insn_iname, - (ConcurrentTag, IlpBaseTag, VectorizeTag)): + (ConcurrentTag, IlpBaseTag)): # Parallel tags don't really nest, so we'll disregard # them here. continue @@ -1878,6 +1878,7 @@ def generate_loop_schedules_inner(kernel, debug_args={}): iname for iname, tags in six.iteritems(kernel.iname_to_tags) if filter_iname_tags_by_type(tags, ConcurrentTag)) + # (ConcurrentTag includes VectorizeTag) loop_nest_with_map = find_loop_nest_with_map(kernel) loop_nest_around_map = find_loop_nest_around_map(kernel) -- GitLab From 82a6a4afb170dba9f645576224f449574ee347ac Mon Sep 17 00:00:00 2001 From: jdsteve2 Date: Fri, 24 Jan 2020 15:08:58 -0600 Subject: [PATCH 03/13] add some comments noting that Concurrent tags now include Vectorize tags --- loopy/check.py | 5 +++++ loopy/codegen/bounds.py | 1 + loopy/codegen/control.py | 1 + loopy/codegen/loop.py | 1 + loopy/kernel/__init__.py | 1 + loopy/transform/iname.py | 2 ++ 6 files changed, 11 insertions(+) diff --git a/loopy/check.py b/loopy/check.py index cc87ad987..b9eec8a9a 100644 --- a/loopy/check.py +++ b/loopy/check.py @@ -188,6 +188,8 @@ def _is_racing_iname_tag(tv, tag): from loopy.kernel.data import (AddressSpace, LocalIndexTagBase, GroupIndexTag, ConcurrentTag, auto) + # TODO Question: now that Concurrent tags include Vectorize tags, + # should something change here? if tv.address_space == AddressSpace.PRIVATE: return ( isinstance(tag, ConcurrentTag) @@ -213,6 +215,8 @@ def _is_racing_iname_tag(tv, tag): def check_for_write_races(kernel): from loopy.kernel.data import ConcurrentTag + # TODO Question: now that Concurrent tags include Vectorize tags, + # should something change here? for insn in kernel.instructions: for assignee_name, assignee_indices in zip( insn.assignee_var_names(), @@ -275,6 +279,7 @@ def check_for_orphaned_user_hardware_axes(kernel): def check_for_data_dependent_parallel_bounds(kernel): from loopy.kernel.data import ConcurrentTag + # Note: Concurrent tags include Vectorize tags for i, dom in enumerate(kernel.domains): dom_inames = set(dom.get_var_names(dim_type.set)) par_inames = set( diff --git a/loopy/codegen/bounds.py b/loopy/codegen/bounds.py index c946e09a0..4a546b5e0 100644 --- a/loopy/codegen/bounds.py +++ b/loopy/codegen/bounds.py @@ -95,6 +95,7 @@ def get_usable_inames_for_conditional(kernel, sched_index): # - ILP indices are not available in loop bounds, they only get defined # at the innermost level of nesting. + # Note: Concurrent tags include Vectorize tags, but IlpBase tags do not if ( kernel.iname_tags_of_type(iname, ConcurrentTag) and not (kernel.iname_tags_of_type(iname, LocalIndexTagBase) diff --git a/loopy/codegen/control.py b/loopy/codegen/control.py index e1520a82e..12dd9f021 100644 --- a/loopy/codegen/control.py +++ b/loopy/codegen/control.py @@ -42,6 +42,7 @@ def get_admissible_conditional_inames_for(codegen_state, sched_index): from loopy.kernel.data import (LocalIndexTag, HardwareConcurrentTag, filter_iname_tags_by_type) + # Note: HardwareConcurrent tags include Vectorize tags from loopy.schedule import find_active_inames_at, has_barrier_within result = find_active_inames_at(kernel, sched_index) diff --git a/loopy/codegen/loop.py b/loopy/codegen/loop.py index 128e4fbc8..2b4726682 100644 --- a/loopy/codegen/loop.py +++ b/loopy/codegen/loop.py @@ -243,6 +243,7 @@ def set_up_hw_parallel_loops(codegen_state, schedule_index, next_func, hw_inames_left = [iname for iname in all_inames_by_insns if kernel.iname_tags_of_type(iname, HardwareConcurrentTag)] + # Note: HardwareConcurrent tags include Vectorize tags if not hw_inames_left: return next_func(codegen_state) diff --git a/loopy/kernel/__init__.py b/loopy/kernel/__init__.py index 9096edcc0..ed668e11f 100644 --- a/loopy/kernel/__init__.py +++ b/loopy/kernel/__init__.py @@ -807,6 +807,7 @@ class LoopKernel(ImmutableRecordWithoutPickling): tag_key_uses = defaultdict(list) from loopy.kernel.data import HardwareConcurrentTag + # Note: HardwareConcurrent tags include Vectorize tags for iname in cond_inames: tags = self.iname_tags_of_type(iname, HardwareConcurrentTag, max_num=1) diff --git a/loopy/transform/iname.py b/loopy/transform/iname.py index 96c8252ef..cfa4e7896 100644 --- a/loopy/transform/iname.py +++ b/loopy/transform/iname.py @@ -1022,6 +1022,7 @@ def get_iname_duplication_options(knl, use_boostable_into=False): iname for iname in knl.all_inames() if knl.iname_tags_of_type(iname, ConcurrentTag)) + # Note: Concurrent tags include Vectorize tags # First we extract the minimal necessary information from the kernel if use_boostable_into: @@ -1045,6 +1046,7 @@ def get_iname_duplication_options(knl, use_boostable_into=False): # Get the duplication options as a tuple of iname and a set for iname, insns in _get_iname_duplication_options(insn_iname_sets): # Check whether this iname has a parallel tag and discard it if so + # Note: Concurrent tags include Vectorize tags if (iname in knl.iname_to_tags and knl.iname_tags_of_type(iname, ConcurrentTag)): continue -- GitLab From 1b153e4bce6f8455441e4847406d2a6fa0e65cdb Mon Sep 17 00:00:00 2001 From: jdsteve2 Date: Fri, 24 Jan 2020 15:24:03 -0600 Subject: [PATCH 04/13] keep VectorizeTags around in set_up_hw_parallel_loops() --- loopy/codegen/loop.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/loopy/codegen/loop.py b/loopy/codegen/loop.py index 2b4726682..b805276f6 100644 --- a/loopy/codegen/loop.py +++ b/loopy/codegen/loop.py @@ -231,7 +231,7 @@ def set_up_hw_parallel_loops(codegen_state, schedule_index, next_func, kernel = codegen_state.kernel from loopy.kernel.data import (UniqueTag, HardwareConcurrentTag, - LocalIndexTag, GroupIndexTag) + LocalIndexTag, GroupIndexTag, VectorizeTag) from loopy.schedule import get_insn_ids_for_block_at insn_ids_for_block = get_insn_ids_for_block_at(kernel.schedule, schedule_index) @@ -242,7 +242,8 @@ def set_up_hw_parallel_loops(codegen_state, schedule_index, next_func, all_inames_by_insns |= kernel.insn_inames(insn_id) hw_inames_left = [iname for iname in all_inames_by_insns - if kernel.iname_tags_of_type(iname, HardwareConcurrentTag)] + if kernel.iname_tags_of_type(iname, HardwareConcurrentTag) + and not kernel.iname_tags_of_type(iname, VectorizeTag)] # Note: HardwareConcurrent tags include Vectorize tags if not hw_inames_left: -- GitLab From e91138404265d42a30a534eae1bff9b1bea37c51 Mon Sep 17 00:00:00 2001 From: James Stevens Date: Tue, 28 Jan 2020 17:24:06 +0100 Subject: [PATCH 05/13] Apply suggestion to loopy/check.py --- loopy/check.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/loopy/check.py b/loopy/check.py index b9eec8a9a..bc3d063e2 100644 --- a/loopy/check.py +++ b/loopy/check.py @@ -229,7 +229,7 @@ def check_for_write_races(kernel): % insn.id) if assignee_name in kernel.arg_dict: - # Any parallel tags that are not depended upon by the assignee + # Any concurrent tags that are not depended upon by the assignee # will cause write races. raceable_parallel_insn_inames = set( -- GitLab From c380ecfae8928c03c539c69144b1c1a32f691978 Mon Sep 17 00:00:00 2001 From: James Stevens Date: Tue, 28 Jan 2020 17:24:19 +0100 Subject: [PATCH 06/13] Apply suggestion to loopy/check.py --- loopy/check.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/loopy/check.py b/loopy/check.py index bc3d063e2..caf06abf6 100644 --- a/loopy/check.py +++ b/loopy/check.py @@ -224,7 +224,7 @@ def check_for_write_races(kernel): assignee_inames = assignee_indices & kernel.all_inames() if not assignee_inames <= kernel.insn_inames(insn): raise LoopyError( - "assignee of instructiosn '%s' references " + "assignee of instructions '%s' references " "iname that the instruction does not depend on" % insn.id) -- GitLab From 2e0a5dfadd8417cb5e1ca079944622e9ca8484a4 Mon Sep 17 00:00:00 2001 From: jdsteve2 Date: Sat, 1 Feb 2020 06:18:43 -0600 Subject: [PATCH 07/13] remove comments about VectorizeTag extending HardwareConcurrentTag --- loopy/check.py | 5 ----- loopy/codegen/bounds.py | 1 - loopy/codegen/control.py | 1 - loopy/codegen/loop.py | 1 - loopy/kernel/__init__.py | 1 - loopy/transform/iname.py | 2 -- 6 files changed, 11 deletions(-) diff --git a/loopy/check.py b/loopy/check.py index b9eec8a9a..cc87ad987 100644 --- a/loopy/check.py +++ b/loopy/check.py @@ -188,8 +188,6 @@ def _is_racing_iname_tag(tv, tag): from loopy.kernel.data import (AddressSpace, LocalIndexTagBase, GroupIndexTag, ConcurrentTag, auto) - # TODO Question: now that Concurrent tags include Vectorize tags, - # should something change here? if tv.address_space == AddressSpace.PRIVATE: return ( isinstance(tag, ConcurrentTag) @@ -215,8 +213,6 @@ def _is_racing_iname_tag(tv, tag): def check_for_write_races(kernel): from loopy.kernel.data import ConcurrentTag - # TODO Question: now that Concurrent tags include Vectorize tags, - # should something change here? for insn in kernel.instructions: for assignee_name, assignee_indices in zip( insn.assignee_var_names(), @@ -279,7 +275,6 @@ def check_for_orphaned_user_hardware_axes(kernel): def check_for_data_dependent_parallel_bounds(kernel): from loopy.kernel.data import ConcurrentTag - # Note: Concurrent tags include Vectorize tags for i, dom in enumerate(kernel.domains): dom_inames = set(dom.get_var_names(dim_type.set)) par_inames = set( diff --git a/loopy/codegen/bounds.py b/loopy/codegen/bounds.py index 4a546b5e0..c946e09a0 100644 --- a/loopy/codegen/bounds.py +++ b/loopy/codegen/bounds.py @@ -95,7 +95,6 @@ def get_usable_inames_for_conditional(kernel, sched_index): # - ILP indices are not available in loop bounds, they only get defined # at the innermost level of nesting. - # Note: Concurrent tags include Vectorize tags, but IlpBase tags do not if ( kernel.iname_tags_of_type(iname, ConcurrentTag) and not (kernel.iname_tags_of_type(iname, LocalIndexTagBase) diff --git a/loopy/codegen/control.py b/loopy/codegen/control.py index 12dd9f021..e1520a82e 100644 --- a/loopy/codegen/control.py +++ b/loopy/codegen/control.py @@ -42,7 +42,6 @@ def get_admissible_conditional_inames_for(codegen_state, sched_index): from loopy.kernel.data import (LocalIndexTag, HardwareConcurrentTag, filter_iname_tags_by_type) - # Note: HardwareConcurrent tags include Vectorize tags from loopy.schedule import find_active_inames_at, has_barrier_within result = find_active_inames_at(kernel, sched_index) diff --git a/loopy/codegen/loop.py b/loopy/codegen/loop.py index b805276f6..b3a877988 100644 --- a/loopy/codegen/loop.py +++ b/loopy/codegen/loop.py @@ -244,7 +244,6 @@ def set_up_hw_parallel_loops(codegen_state, schedule_index, next_func, hw_inames_left = [iname for iname in all_inames_by_insns if kernel.iname_tags_of_type(iname, HardwareConcurrentTag) and not kernel.iname_tags_of_type(iname, VectorizeTag)] - # Note: HardwareConcurrent tags include Vectorize tags if not hw_inames_left: return next_func(codegen_state) diff --git a/loopy/kernel/__init__.py b/loopy/kernel/__init__.py index ed668e11f..9096edcc0 100644 --- a/loopy/kernel/__init__.py +++ b/loopy/kernel/__init__.py @@ -807,7 +807,6 @@ class LoopKernel(ImmutableRecordWithoutPickling): tag_key_uses = defaultdict(list) from loopy.kernel.data import HardwareConcurrentTag - # Note: HardwareConcurrent tags include Vectorize tags for iname in cond_inames: tags = self.iname_tags_of_type(iname, HardwareConcurrentTag, max_num=1) diff --git a/loopy/transform/iname.py b/loopy/transform/iname.py index cfa4e7896..96c8252ef 100644 --- a/loopy/transform/iname.py +++ b/loopy/transform/iname.py @@ -1022,7 +1022,6 @@ def get_iname_duplication_options(knl, use_boostable_into=False): iname for iname in knl.all_inames() if knl.iname_tags_of_type(iname, ConcurrentTag)) - # Note: Concurrent tags include Vectorize tags # First we extract the minimal necessary information from the kernel if use_boostable_into: @@ -1046,7 +1045,6 @@ def get_iname_duplication_options(knl, use_boostable_into=False): # Get the duplication options as a tuple of iname and a set for iname, insns in _get_iname_duplication_options(insn_iname_sets): # Check whether this iname has a parallel tag and discard it if so - # Note: Concurrent tags include Vectorize tags if (iname in knl.iname_to_tags and knl.iname_tags_of_type(iname, ConcurrentTag)): continue -- GitLab From abc503ed96a441419f5b92f0eba078115a6d3397 Mon Sep 17 00:00:00 2001 From: jdsteve2 Date: Sat, 1 Feb 2020 06:22:26 -0600 Subject: [PATCH 08/13] removed one more note about vec tags extending concurrent --- loopy/schedule/__init__.py | 1 - 1 file changed, 1 deletion(-) diff --git a/loopy/schedule/__init__.py b/loopy/schedule/__init__.py index e6f8d1ff3..f145c7122 100644 --- a/loopy/schedule/__init__.py +++ b/loopy/schedule/__init__.py @@ -1878,7 +1878,6 @@ def generate_loop_schedules_inner(kernel, debug_args={}): iname for iname, tags in six.iteritems(kernel.iname_to_tags) if filter_iname_tags_by_type(tags, ConcurrentTag)) - # (ConcurrentTag includes VectorizeTag) loop_nest_with_map = find_loop_nest_with_map(kernel) loop_nest_around_map = find_loop_nest_around_map(kernel) -- GitLab From 416d7d1f31e5a7976a124802827036e18340e83e Mon Sep 17 00:00:00 2001 From: jdsteve2 Date: Mon, 3 Feb 2020 01:55:42 -0600 Subject: [PATCH 09/13] codegen changes: in get_usable_inames_for_conditional(), exclude vec inames in conditional check for ConcurrentTag inames; in get_admissible_conditional_inames_for(), exclude vec inames in conditional check for HardwareConcurrentTag inames --- loopy/codegen/bounds.py | 4 +++- loopy/codegen/control.py | 4 +++- loopy/codegen/loop.py | 4 ++-- 3 files changed, 8 insertions(+), 4 deletions(-) diff --git a/loopy/codegen/bounds.py b/loopy/codegen/bounds.py index c946e09a0..7e4b50d5a 100644 --- a/loopy/codegen/bounds.py +++ b/loopy/codegen/bounds.py @@ -59,6 +59,7 @@ def get_usable_inames_for_conditional(kernel, sched_index): from loopy.schedule import ( find_active_inames_at, get_insn_ids_for_block_at, has_barrier_within) from loopy.kernel.data import (ConcurrentTag, LocalIndexTagBase, + VectorizeTag, IlpBaseTag) result = find_active_inames_at(kernel, sched_index) @@ -96,7 +97,8 @@ def get_usable_inames_for_conditional(kernel, sched_index): # at the innermost level of nesting. if ( - kernel.iname_tags_of_type(iname, ConcurrentTag) + kernel.iname_tags_of_type(iname, ConcurrentTag) - + kernel.iname_tags_of_type(iname, VectorizeTag) and not (kernel.iname_tags_of_type(iname, LocalIndexTagBase) and crosses_barrier) and not kernel.iname_tags_of_type(iname, IlpBaseTag) diff --git a/loopy/codegen/control.py b/loopy/codegen/control.py index e1520a82e..f9addc25f 100644 --- a/loopy/codegen/control.py +++ b/loopy/codegen/control.py @@ -41,6 +41,7 @@ def get_admissible_conditional_inames_for(codegen_state, sched_index): kernel = codegen_state.kernel from loopy.kernel.data import (LocalIndexTag, HardwareConcurrentTag, + VectorizeTag, filter_iname_tags_by_type) from loopy.schedule import find_active_inames_at, has_barrier_within @@ -49,7 +50,8 @@ def get_admissible_conditional_inames_for(codegen_state, sched_index): has_barrier = has_barrier_within(kernel, sched_index) for iname, tags in six.iteritems(kernel.iname_to_tags): - if (filter_iname_tags_by_type(tags, HardwareConcurrentTag) + if (filter_iname_tags_by_type(tags, HardwareConcurrentTag) - + filter_iname_tags_by_type(tags, VectorizeTag) and codegen_state.is_generating_device_code): if not has_barrier or not filter_iname_tags_by_type(tags, LocalIndexTag): result.add(iname) diff --git a/loopy/codegen/loop.py b/loopy/codegen/loop.py index b3a877988..dffe5900f 100644 --- a/loopy/codegen/loop.py +++ b/loopy/codegen/loop.py @@ -242,8 +242,8 @@ def set_up_hw_parallel_loops(codegen_state, schedule_index, next_func, all_inames_by_insns |= kernel.insn_inames(insn_id) hw_inames_left = [iname for iname in all_inames_by_insns - if kernel.iname_tags_of_type(iname, HardwareConcurrentTag) - and not kernel.iname_tags_of_type(iname, VectorizeTag)] + if kernel.iname_tags_of_type(iname, HardwareConcurrentTag) - + kernel.iname_tags_of_type(iname, VectorizeTag)] if not hw_inames_left: return next_func(codegen_state) -- GitLab From 40d236b925ffa9b2f9f59d7ac6a94b555a01f6a9 Mon Sep 17 00:00:00 2001 From: jdsteve2 Date: Tue, 4 Feb 2020 12:35:39 -0600 Subject: [PATCH 10/13] in set_up_hw_parallel_loops(), instead of set subtraction (HardwareConcurrent-Vectorize) use 'if HardwareConcurrent and not Vectorize' --- loopy/codegen/loop.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/loopy/codegen/loop.py b/loopy/codegen/loop.py index dffe5900f..b3a877988 100644 --- a/loopy/codegen/loop.py +++ b/loopy/codegen/loop.py @@ -242,8 +242,8 @@ def set_up_hw_parallel_loops(codegen_state, schedule_index, next_func, all_inames_by_insns |= kernel.insn_inames(insn_id) hw_inames_left = [iname for iname in all_inames_by_insns - if kernel.iname_tags_of_type(iname, HardwareConcurrentTag) - - kernel.iname_tags_of_type(iname, VectorizeTag)] + if kernel.iname_tags_of_type(iname, HardwareConcurrentTag) + and not kernel.iname_tags_of_type(iname, VectorizeTag)] if not hw_inames_left: return next_func(codegen_state) -- GitLab From ee7b121295a5974945f139119528f13c297a4eff Mon Sep 17 00:00:00 2001 From: "[6~" Date: Sat, 15 Feb 2020 16:30:16 -0500 Subject: [PATCH 11/13] Fix get_usable_inames_for_conditional: CallKernel should not count as inside subkernel --- loopy/codegen/bounds.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/loopy/codegen/bounds.py b/loopy/codegen/bounds.py index 7e4b50d5a..c789a61d2 100644 --- a/loopy/codegen/bounds.py +++ b/loopy/codegen/bounds.py @@ -68,7 +68,7 @@ def get_usable_inames_for_conditional(kernel, sched_index): # Find our containing subkernel. Grab inames for all insns from there. within_subkernel = False - for sched_item_index, sched_item in enumerate(kernel.schedule[:sched_index+1]): + for sched_item_index, sched_item in enumerate(kernel.schedule[:sched_index]): from loopy.schedule import CallKernel, ReturnFromKernel if isinstance(sched_item, CallKernel): within_subkernel = True -- GitLab From 5f217ae99bbdfc5432329d04fa747171a5364f92 Mon Sep 17 00:00:00 2001 From: "[6~" Date: Sat, 15 Feb 2020 16:31:45 -0500 Subject: [PATCH 12/13] Make iname eligbility logic in get_usable_inames_for_conditional more readable, improve comment --- loopy/codegen/bounds.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/loopy/codegen/bounds.py b/loopy/codegen/bounds.py index c789a61d2..b736191ec 100644 --- a/loopy/codegen/bounds.py +++ b/loopy/codegen/bounds.py @@ -93,12 +93,12 @@ def get_usable_inames_for_conditional(kernel, sched_index): # # - local indices may not be used in conditionals that cross barriers. # - # - ILP indices are not available in loop bounds, they only get defined - # at the innermost level of nesting. + # - ILP indices and vector lane indices are not available in loop + # bounds, they only get defined at the innermost level of nesting. if ( - kernel.iname_tags_of_type(iname, ConcurrentTag) - - kernel.iname_tags_of_type(iname, VectorizeTag) + kernel.iname_tags_of_type(iname, ConcurrentTag) + and not kernel.iname_tags_of_type(iname, VectorizeTag) and not (kernel.iname_tags_of_type(iname, LocalIndexTagBase) and crosses_barrier) and not kernel.iname_tags_of_type(iname, IlpBaseTag) -- GitLab From 3918a6d161b313fdaf522a448709729da26bbc6e Mon Sep 17 00:00:00 2001 From: "[6~" Date: Sat, 15 Feb 2020 16:33:56 -0500 Subject: [PATCH 13/13] Remove duplication of iname-for-conditional logic (Closes #184 on Gitlab) --- loopy/codegen/control.py | 31 +++---------------------------- 1 file changed, 3 insertions(+), 28 deletions(-) diff --git a/loopy/codegen/control.py b/loopy/codegen/control.py index f9addc25f..e9de52eb6 100644 --- a/loopy/codegen/control.py +++ b/loopy/codegen/control.py @@ -24,7 +24,6 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. """ -import six from loopy.codegen.result import merge_codegen_results, wrap_in_if import islpy as isl from loopy.schedule import ( @@ -33,32 +32,6 @@ from loopy.schedule import ( from loopy.diagnostic import LoopyError -def get_admissible_conditional_inames_for(codegen_state, sched_index): - """This function disallows conditionals on local-idx tagged - inames if there is a barrier nested somewhere within. - """ - - kernel = codegen_state.kernel - - from loopy.kernel.data import (LocalIndexTag, HardwareConcurrentTag, - VectorizeTag, - filter_iname_tags_by_type) - - from loopy.schedule import find_active_inames_at, has_barrier_within - result = find_active_inames_at(kernel, sched_index) - - has_barrier = has_barrier_within(kernel, sched_index) - - for iname, tags in six.iteritems(kernel.iname_to_tags): - if (filter_iname_tags_by_type(tags, HardwareConcurrentTag) - - filter_iname_tags_by_type(tags, VectorizeTag) - and codegen_state.is_generating_device_code): - if not has_barrier or not filter_iname_tags_by_type(tags, LocalIndexTag): - result.add(iname) - - return frozenset(result) - - def synthesize_idis_for_extra_args(kernel, schedule_index): """ :returns: A list of :class:`loopy.codegen.ImplementedDataInfo` @@ -304,11 +277,13 @@ def build_loop_nest(codegen_state, schedule_index): """ from loopy.schedule import find_used_inames_within + from loopy.codegen.bounds import get_usable_inames_for_conditional + sched_index_info_entries = [ ScheduleIndexInfo( schedule_indices=[i], admissible_cond_inames=( - get_admissible_conditional_inames_for(codegen_state, i)), + get_usable_inames_for_conditional(kernel, i)), required_predicates=get_required_predicates(kernel, i), used_inames_within=find_used_inames_within(kernel, i) ) -- GitLab