diff --git a/loopy/preprocess.py b/loopy/preprocess.py index 2b6d97c38a12b47e5b4653297c18b24c40ed938b..0d8e771954cf26cc11747e745946389420fa5e1b 100644 --- a/loopy/preprocess.py +++ b/loopy/preprocess.py @@ -539,7 +539,7 @@ def realize_reduction(kernel, insn_id_filter=None, unknown_types_ok=True): new_insn_add_depends_on.add(prev_id) new_insn_add_no_sync_with.add((prev_id, "any")) - new_insn_add_within_inames.add(stage_exec_iname or base_exec_iname) + new_insn_add_within_inames.add(base_exec_iname or stage_exec_iname) if nresults == 1: assert len(acc_vars) == 1 diff --git a/loopy/transform/iname.py b/loopy/transform/iname.py index c35b5064365293ac78cdd01af537c9d28bd67193..241b1d975de21b92677709278a872b0a20befe16 100644 --- a/loopy/transform/iname.py +++ b/loopy/transform/iname.py @@ -1198,7 +1198,8 @@ class _ReductionSplitter(RuleAwareIdentityMapper): return Reduction(expr.operation, tuple(self.inames), Reduction(expr.operation, tuple(leftover_inames), self.rec(expr.expr, expn_state), - expr.allow_simultaneous)) + expr.allow_simultaneous), + expr.allow_simultaneous) else: assert False else: diff --git a/test/test_reduction.py b/test/test_reduction.py index 5887df7a628c46fbf09539fdd48c08aaacd8e409..86e72c0c6644b7b9837a6d74da756c58344b1d6f 100644 --- a/test/test_reduction.py +++ b/test/test_reduction.py @@ -181,7 +181,7 @@ def test_recursive_nested_dependent_reduction(ctx_factory): # FIXME: Actually test functionality. -@pytest.mark.parametrize("size", [128, 5, 113, 67]) +@pytest.mark.parametrize("size", [128, 5, 113, 67, 1]) def test_local_parallel_reduction(ctx_factory, size): ctx = ctx_factory()