diff --git a/loopy/preprocess.py b/loopy/preprocess.py
index 2b6d97c38a12b47e5b4653297c18b24c40ed938b..0d8e771954cf26cc11747e745946389420fa5e1b 100644
--- a/loopy/preprocess.py
+++ b/loopy/preprocess.py
@@ -539,7 +539,7 @@ def realize_reduction(kernel, insn_id_filter=None, unknown_types_ok=True):
 
         new_insn_add_depends_on.add(prev_id)
         new_insn_add_no_sync_with.add((prev_id, "any"))
-        new_insn_add_within_inames.add(stage_exec_iname or base_exec_iname)
+        new_insn_add_within_inames.add(base_exec_iname or stage_exec_iname)
 
         if nresults == 1:
             assert len(acc_vars) == 1
diff --git a/loopy/transform/iname.py b/loopy/transform/iname.py
index c35b5064365293ac78cdd01af537c9d28bd67193..241b1d975de21b92677709278a872b0a20befe16 100644
--- a/loopy/transform/iname.py
+++ b/loopy/transform/iname.py
@@ -1198,7 +1198,8 @@ class _ReductionSplitter(RuleAwareIdentityMapper):
                 return Reduction(expr.operation, tuple(self.inames),
                         Reduction(expr.operation, tuple(leftover_inames),
                             self.rec(expr.expr, expn_state),
-                            expr.allow_simultaneous))
+                            expr.allow_simultaneous),
+                        expr.allow_simultaneous)
             else:
                 assert False
         else:
diff --git a/test/test_reduction.py b/test/test_reduction.py
index 5887df7a628c46fbf09539fdd48c08aaacd8e409..86e72c0c6644b7b9837a6d74da756c58344b1d6f 100644
--- a/test/test_reduction.py
+++ b/test/test_reduction.py
@@ -181,7 +181,7 @@ def test_recursive_nested_dependent_reduction(ctx_factory):
     # FIXME: Actually test functionality.
 
 
-@pytest.mark.parametrize("size", [128, 5, 113, 67])
+@pytest.mark.parametrize("size", [128, 5, 113, 67, 1])
 def test_local_parallel_reduction(ctx_factory, size):
     ctx = ctx_factory()