diff --git a/loopy/codegen/instruction.py b/loopy/codegen/instruction.py
index 92f8dc1b8c3f08a0680540501b9eca883e68ce33..e209f8ad07a3e0132cdfe76702eac6c22719d550 100644
--- a/loopy/codegen/instruction.py
+++ b/loopy/codegen/instruction.py
@@ -56,12 +56,27 @@ def generate_instruction_code(kernel, insn, codegen_state):
     from loopy.kernel.data import ExpressionInstruction, CInstruction
 
     if isinstance(insn, ExpressionInstruction):
-        return generate_expr_instruction_code(kernel, insn, codegen_state)
+        result = generate_expr_instruction_code(kernel, insn, codegen_state)
     elif isinstance(insn, CInstruction):
-        return generate_c_instruction_code(kernel, insn, codegen_state)
+        result = generate_c_instruction_code(kernel, insn, codegen_state)
     else:
         raise RuntimeError("unexpected instruction type")
 
+    insn_inames = kernel.insn_inames(insn)
+    insn_code, impl_domain = wrap_in_bounds_checks(
+            codegen_state.c_code_mapper,
+            kernel.get_inames_domain(insn_inames), insn_inames,
+            codegen_state.implemented_domain,
+            result)
+
+    if insn_code is None:
+        return None
+
+    return GeneratedInstruction(
+        insn_id=insn.id,
+        implemented_domain=impl_domain,
+        ast=insn_code)
+
 
 def generate_expr_instruction_code(kernel, insn, codegen_state):
     ccm = codegen_state.c_code_mapper
@@ -74,25 +89,11 @@ def generate_expr_instruction_code(kernel, insn, codegen_state):
     from cgen import Assign
     from loopy.codegen.expression import dtype_to_type_context
     lhs_code = ccm(insn.assignee, prec=None, type_context=None)
-    insn_code = Assign(
+    result = Assign(
             lhs_code,
             ccm(expr, prec=None, type_context=dtype_to_type_context(target_dtype),
                 needed_dtype=target_dtype))
 
-    insn_inames = kernel.insn_inames(insn)
-    insn_code, impl_domain = wrap_in_bounds_checks(
-            ccm, kernel.get_inames_domain(insn_inames), insn_inames,
-            codegen_state.implemented_domain,
-            insn_code)
-
-    if insn_code is None:
-        return None
-
-    result = GeneratedInstruction(
-        insn_id=insn.id,
-        implemented_domain=impl_domain,
-        ast=insn_code)
-
     if kernel.flags.trace_assignments or kernel.flags.trace_assignment_values:
         from loopy.codegen import gen_code_block
         from cgen import Statement as S
@@ -141,13 +142,12 @@ def generate_expr_instruction_code(kernel, insn, codegen_state):
                     printf_format, printf_args_str)),
                 implemented_domain=None)
 
+        from cgen import Block
         if kernel.flags.trace_assignment_values:
-            code_block = [result, printf_insn]
+            result = Block([result, printf_insn])
         else:
             # print first, execute later -> helps find segfaults
-            code_block = [printf_insn, result]
-
-        result = gen_code_block(code_block)
+            result = Block([printf_insn, result])
 
     return result
 
@@ -178,19 +178,8 @@ def generate_c_instruction_code(kernel, insn, codegen_state):
 
     body.extend(Line(l) for l in insn.code.split("\n"))
 
-    insn_inames = kernel.insn_inames(insn)
-    insn_code, impl_domain = wrap_in_bounds_checks(
-            ccm, kernel.get_inames_domain(insn_inames), insn_inames,
-            codegen_state.implemented_domain,
-            Block(body))
-
-    if insn_code is None:
-        return None
+    return Block(body)
 
-    return GeneratedInstruction(
-        insn_id=insn.id,
-        implemented_domain=impl_domain,
-        ast=insn_code)
 
 
 # vim: foldmethod=marker