From f07ebbf8f2c23a3472f232abd78edba29000e83b Mon Sep 17 00:00:00 2001 From: arghdos Date: Wed, 4 Oct 2017 14:05:23 -0400 Subject: [PATCH 1/3] project out local-id-mapped inames before domain comparison for barriers; solves #94 on gitlab --- loopy/check.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/loopy/check.py b/loopy/check.py index a8ec1ad35..4b2af1b13 100644 --- a/loopy/check.py +++ b/loopy/check.py @@ -708,6 +708,16 @@ def check_implemented_domains(kernel, implemented_domains, code=None): (insn_impl_domain & assumptions) .project_out_except(insn_inames, [dim_type.set])) + from loopy.kernel.instruction import BarrierInstruction + from loopy.kernel.data import LocalIndexTag + if isinstance(insn, BarrierInstruction): + # project out local-id-mapped inames, solves #94 on gitlab + non_lid_inames = frozenset( + [iname for iname in insn_inames if not isinstance( + kernel.iname_to_tag.get(iname), LocalIndexTag)]) + insn_impl_domain = insn_impl_domain.project_out_except( + non_lid_inames, [dim_type.set]) + insn_domain = kernel.get_inames_domain(insn_inames) insn_parameters = frozenset(insn_domain.get_var_names(dim_type.param)) assumptions, insn_domain = align_two(assumption_non_param, insn_domain) @@ -715,6 +725,11 @@ def check_implemented_domains(kernel, implemented_domains, code=None): .project_out_except(insn_inames, [dim_type.set]) .project_out_except(insn_parameters, [dim_type.param])) + if isinstance(insn, BarrierInstruction): + # project out local-id-mapped inames, solves #94 on gitlab + desired_domain = desired_domain.project_out_except( + non_lid_inames, [dim_type.set]) + insn_impl_domain = (insn_impl_domain .project_out_except(insn_parameters, [dim_type.param])) insn_impl_domain, desired_domain = align_two( -- GitLab From 0dd58286356f8d7c9b5f21f102865c0d69946388 Mon Sep 17 00:00:00 2001 From: arghdos Date: Wed, 4 Oct 2017 14:13:45 -0400 Subject: [PATCH 2/3] add test --- test/test_loopy.py | 36 ++++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/test/test_loopy.py b/test/test_loopy.py index 563964cf0..3888de0f3 100644 --- a/test/test_loopy.py +++ b/test/test_loopy.py @@ -2265,6 +2265,42 @@ def test_barrier_insertion_near_bottom_of_loop(): assert_barrier_between(knl, "ainit", "aupdate", ignore_barriers_in_levels=[1]) +def test_barrier_in_overridden_get_grid_size_expanded_kernel(): + from loopy.kernel.data import temp_var_scope as scopes + + # make simple barrier'd kernel + knl = lp.make_kernel('{[i]: 0 <= i < 10}', + """ + for i + a[i] = i {id=a} + ... lbarrier {id=barrier} + b[i + 1] = a[i] {nosync=a} + end + """, + [lp.TemporaryVariable("a", np.float32, shape=(10,), order='C', + scope=scopes.LOCAL), + lp.GlobalArg("b", np.float32, shape=(11,), order='C')], + seq_dependencies=True) + + # split into kernel w/ vesize larger than iname domain + vecsize = 16 + knl = lp.split_iname(knl, 'i', vecsize, inner_tag='l.0') + + # artifically expand via overridden_get_grid_sizes_for_insn_ids + class ggs(object): + def __init__(self, clean, vecsize=vecsize): + self.clean = clean + self.vecsize = vecsize + + def __call__(self, insn_ids, ignore_auto=True): + gsize, _ = self.clean.get_grid_sizes_for_insn_ids(insn_ids, ignore_auto) + return gsize, (self.vecsize,) + + knl = knl.copy(overridden_get_grid_sizes_for_insn_ids=ggs(knl.copy(), vecsize)) + # make sure we can generate the code + lp.generate_code_v2(knl) + + def test_multi_argument_reduction_type_inference(): from loopy.type_inference import TypeInferenceMapper from loopy.library.reduction import SegmentedSumReductionOperation -- GitLab From b8cdf5f2246b48653b01c1144c7e09d7d26cf0cf Mon Sep 17 00:00:00 2001 From: arghdos Date: Wed, 4 Oct 2017 14:22:16 -0400 Subject: [PATCH 3/3] flake8 naming --- test/test_loopy.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/test/test_loopy.py b/test/test_loopy.py index 3888de0f3..97e3a0806 100644 --- a/test/test_loopy.py +++ b/test/test_loopy.py @@ -2287,7 +2287,7 @@ def test_barrier_in_overridden_get_grid_size_expanded_kernel(): knl = lp.split_iname(knl, 'i', vecsize, inner_tag='l.0') # artifically expand via overridden_get_grid_sizes_for_insn_ids - class ggs(object): + class GridOverride(object): def __init__(self, clean, vecsize=vecsize): self.clean = clean self.vecsize = vecsize @@ -2296,7 +2296,8 @@ def test_barrier_in_overridden_get_grid_size_expanded_kernel(): gsize, _ = self.clean.get_grid_sizes_for_insn_ids(insn_ids, ignore_auto) return gsize, (self.vecsize,) - knl = knl.copy(overridden_get_grid_sizes_for_insn_ids=ggs(knl.copy(), vecsize)) + knl = knl.copy(overridden_get_grid_sizes_for_insn_ids=GridOverride( + knl.copy(), vecsize)) # make sure we can generate the code lp.generate_code_v2(knl) -- GitLab