From 46377e2050f74c49cf3015e4e0ea2d3e2e7cf895 Mon Sep 17 00:00:00 2001 From: arghdos Date: Wed, 15 Mar 2017 10:59:45 -0400 Subject: [PATCH 1/3] enable simul reduce on split out --- loopy/transform/iname.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/loopy/transform/iname.py b/loopy/transform/iname.py index c35b50643..241b1d975 100644 --- a/loopy/transform/iname.py +++ b/loopy/transform/iname.py @@ -1198,7 +1198,8 @@ class _ReductionSplitter(RuleAwareIdentityMapper): return Reduction(expr.operation, tuple(self.inames), Reduction(expr.operation, tuple(leftover_inames), self.rec(expr.expr, expn_state), - expr.allow_simultaneous)) + expr.allow_simultaneous), + expr.allow_simultaneous) else: assert False else: -- GitLab From 2b7224989a60f2abb4cfc547953d50c40778a38f Mon Sep 17 00:00:00 2001 From: arghdos Date: Wed, 15 Mar 2017 10:58:38 -0400 Subject: [PATCH 2/3] fix for issue #44 on gitlab --- loopy/preprocess.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/loopy/preprocess.py b/loopy/preprocess.py index 2b6d97c38..0d8e77195 100644 --- a/loopy/preprocess.py +++ b/loopy/preprocess.py @@ -539,7 +539,7 @@ def realize_reduction(kernel, insn_id_filter=None, unknown_types_ok=True): new_insn_add_depends_on.add(prev_id) new_insn_add_no_sync_with.add((prev_id, "any")) - new_insn_add_within_inames.add(stage_exec_iname or base_exec_iname) + new_insn_add_within_inames.add(base_exec_iname or stage_exec_iname) if nresults == 1: assert len(acc_vars) == 1 -- GitLab From 202b0f5f29d62bdfddd5b044ae98cc527b9027b9 Mon Sep 17 00:00:00 2001 From: arghdos Date: Wed, 15 Mar 2017 11:35:37 -0400 Subject: [PATCH 3/3] add test for size 1 --- test/test_reduction.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/test_reduction.py b/test/test_reduction.py index 5887df7a6..86e72c0c6 100644 --- a/test/test_reduction.py +++ b/test/test_reduction.py @@ -181,7 +181,7 @@ def test_recursive_nested_dependent_reduction(ctx_factory): # FIXME: Actually test functionality. -@pytest.mark.parametrize("size", [128, 5, 113, 67]) +@pytest.mark.parametrize("size", [128, 5, 113, 67, 1]) def test_local_parallel_reduction(ctx_factory, size): ctx = ctx_factory() -- GitLab