diff --git a/MEMO b/MEMO
index 675b8f05793c5b57f6200f753042430b08072434..f8746ea6c72fb0b7d6cf200150731d1da2d236ad 100644
--- a/MEMO
+++ b/MEMO
@@ -62,9 +62,6 @@ Things to consider
 TODO
 ^^^^
 
-- implemented_domain may end up being smaller than requested in cse
-  evaluations--check that!
-
 - FIXME: Deal with insns losing a seq iname dep in a CSE realization
 
   a <- cse(reduce(stuff))
@@ -88,6 +85,9 @@ TODO
 Dealt with
 ^^^^^^^^^^
 
+- implemented_domain may end up being smaller than requested in cse
+  evaluations--check that!
+
 - Allow prioritization of loops in scheduling.
 
 - Make axpy better.
diff --git a/loopy/codegen/__init__.py b/loopy/codegen/__init__.py
index eb0de5869c3dc80efd69ca1e4674b821eae7ecbe..47f0b16a7d8e0ef57a6978d056daad9efafdfd2e 100644
--- a/loopy/codegen/__init__.py
+++ b/loopy/codegen/__init__.py
@@ -9,68 +9,82 @@ import islpy as isl
 
 # {{{ support code for AST wrapper objects
 
+class GeneratedInstruction(Record):
+    """Objects of this type are wrapped around ASTs upon
+    return from generation calls to collect information about them.
+
+    :ivar implemented_domains: A map from an insn id to a list of
+        implemented domains, with the purpose of checking that
+        each instruction's exact iteration space has been covered.
+    """
+    __slots__ = ["insn_id", "implemented_domain", "ast"]
+
 class GeneratedCode(Record):
     """Objects of this type are wrapped around ASTs upon
     return from generation calls to collect information about them.
+
+    :ivar implemented_domains: A map from an insn id to a list of
+        implemented domains, with the purpose of checking that
+        each instruction's exact iteration space has been covered.
     """
-    __slots__ = ["ast", "num_conditionals"]
+    __slots__ = ["ast", "implemented_domains"]
 
-def gen_code_block(elements, is_alternatives=False, denest=False):
+def gen_code_block(elements):
     """
     :param is_alternatives: a :class:`bool` indicating that
         only one of the *elements* will effectively be executed.
     """
 
-    from cgen import Generable, Block
+    from cgen import Block, Comment, Line
 
-    conditional_counts = []
     block_els = []
+    implemented_domains = {}
+
     for el in elements:
         if isinstance(el, GeneratedCode):
-            conditional_counts.append(el.num_conditionals)
-            if isinstance(el.ast, Block) and denest:
+            for insn_id, idoms in el.implemented_domains.iteritems():
+                implemented_domains.setdefault(insn_id, []).extend(idoms)
+
+            if isinstance(el.ast, Block):
                 block_els.extend(el.ast.contents)
             else:
                 block_els.append(el.ast)
-        elif isinstance(el, Generable):
+
+        elif isinstance(el, Comment):
             block_els.append(el)
-        else:
-            raise ValueError("unidentifiable object in block")
 
-    if is_alternatives:
-        num_conditionals = min(conditional_counts)
-    else:
-        num_conditionals = sum(conditional_counts)
+        elif isinstance(el, Line):
+            assert not el.text
+            block_els.append(el)
+
+        elif isinstance(el, GeneratedInstruction):
+            block_els.append(el.ast)
+            implemented_domains.setdefault(el.insn_id, []).append(
+                    el.implemented_domain)
+
+        else:
+            raise ValueError("unrecognized object of type '%s' in block"
+                    % type(el))
 
     if len(block_els) == 1:
         ast, = block_els
     else:
         ast = Block(block_els)
 
-    return GeneratedCode(ast=ast, num_conditionals=num_conditionals)
+    return GeneratedCode(ast=ast, implemented_domains=implemented_domains)
 
 def wrap_in(cls, *args):
     inner = args[-1]
     args = args[:-1]
 
-    from cgen import If, Generable
-
-    if isinstance(inner, GeneratedCode):
-        num_conditionals = inner.num_conditionals
-        ast = inner.ast
-    elif isinstance(inner, Generable):
-        num_conditionals = 0
-        ast = inner
+    if not isinstance(inner, GeneratedCode):
+        raise ValueError("unrecognized object of type '%s' in block"
+                % type(inner))
 
-    args = args + (ast,)
-    ast = cls(*args)
+    args = args + (inner.ast,)
 
-    if isinstance(ast, If):
-        import re
-        cond_joiner_re = re.compile(r"\|\||\&\&")
-        num_conditionals += len(cond_joiner_re.split(ast.condition))
-
-    return GeneratedCode(ast=ast, num_conditionals=num_conditionals)
+    return GeneratedCode(ast=cls(*args),
+            implemented_domains=inner.implemented_domains)
 
 def wrap_in_if(condition_codelets, inner):
     from cgen import If
@@ -86,10 +100,12 @@ def add_comment(cmt, code):
     if cmt is None:
         return code
 
-    from cgen import add_comment, Block
-    block_with_comment = add_comment(cmt, code.ast)
-    assert isinstance(block_with_comment, Block)
-    return gen_code_block(block_with_comment.contents)
+    from cgen import add_comment
+    assert isinstance(code, GeneratedCode)
+
+    return GeneratedCode(
+            ast=add_comment(cmt, code.ast),
+            implemented_domains=code.implemented_domains)
 
 # }}}
 
@@ -109,7 +125,7 @@ class CodeGenerationState(object):
 
     def intersect(self, set):
         return CodeGenerationState(
-                self.implemented_domain.intersect(set),
+                self.implemented_domain & set,
                 self.c_code_mapper)
 
     def fix(self, iname, aff, space):
@@ -160,6 +176,32 @@ def make_initial_assignments(kernel):
 
 # }}}
 
+# {{{ sanity-check for implemented domains of each instruction
+
+def check_implemented_domains(kernel, implemented_domains):
+    for insn_id, idomains in implemented_domains.iteritems():
+        assert idomains
+
+        insn_impl_domain = idomains[0]
+        for idomain in idomains[1:]:
+            insn_impl_domain = insn_impl_domain | idomain
+        insn_impl_domain = insn_impl_domain.coalesce()
+
+        insn = kernel.id_to_insn[insn_id]
+        desired_domain = (kernel.domain
+            .eliminate_except(insn.all_inames(), [isl.dim_type.set]))
+
+        if insn_impl_domain != desired_domain:
+            raise RuntimeError("sanity check failed--implemented and desired "
+                    "domain for insn '%s' do not match\n  implemented: %s\n"
+                    "  desired:%s"
+                    % (insn_id, insn_impl_domain, desired_domain))
+
+    # placate the assert at the call site
+    return True
+
+# }}}
+
 # {{{ main code generation entrypoint
 
 def generate_code(kernel):
@@ -301,6 +343,8 @@ def generate_code(kernel):
 
     # }}}
 
+    assert check_implemented_domains(kernel, gen_code.implemented_domains)
+
     return str(mod)
 
 # }}}
diff --git a/loopy/codegen/instruction.py b/loopy/codegen/instruction.py
index 3fd65c4351996678e4f60a0cda0c237392b93c79..60be2e1dcb607d04481b5253fc7407b9329d7461 100644
--- a/loopy/codegen/instruction.py
+++ b/loopy/codegen/instruction.py
@@ -67,6 +67,7 @@ def generate_ilp_instances(kernel, insn, codegen_state):
 
 def generate_instruction_code(kernel, insn, codegen_state):
     result = []
+    from loopy.codegen import GeneratedInstruction
 
     for ilpi in generate_ilp_instances(kernel, insn, codegen_state):
         ccm = codegen_state.c_code_mapper.copy_and_assign_many(ilpi.assignments)
@@ -80,7 +81,10 @@ def generate_instruction_code(kernel, insn, codegen_state):
                 ccm, kernel.domain, insn.all_inames(), ilpi.implemented_domain,
                 insn_code)
 
-        result.append(insn_code)
+        result.append(GeneratedInstruction(
+            insn_id=insn.id,
+            implemented_domain=ilpi.implemented_domain,
+            ast=insn_code))
 
     from loopy.codegen import gen_code_block
     return gen_code_block(result)
diff --git a/loopy/codegen/loop.py b/loopy/codegen/loop.py
index 3e18e997c517749a61eb6fc616806e07d682137c..3e5d484aeb987b24b5feaf7a51438db0682d8925 100644
--- a/loopy/codegen/loop.py
+++ b/loopy/codegen/loop.py
@@ -221,13 +221,13 @@ def set_up_hw_parallel_loops(kernel, sched_index, codegen_state, hw_inames_left=
         if len(slabs) == 1:
             cmt = None
 
-        new_kernel = kernel.copy(domain=kernel.domain.intersect(slab))
+        new_kernel = kernel.copy(domain=kernel.domain & slab)
         inner = set_up_hw_parallel_loops(
                 new_kernel, sched_index, codegen_state, hw_inames_left)
         result.append(add_comment(cmt, inner))
 
     from loopy.codegen import gen_code_block
-    return gen_code_block(result, is_alternatives=True)
+    return gen_code_block(result)
 
 # }}}
 
diff --git a/loopy/kernel.py b/loopy/kernel.py
index 1f9b3ac23825658e80e2fb55e4db72806953a4af..0ee99b860b1cf6829d44865dc8a4cf09f4c89dec 100644
--- a/loopy/kernel.py
+++ b/loopy/kernel.py
@@ -590,12 +590,12 @@ class LoopKernel(Record):
 
     @memoize_method
     def get_iname_bounds(self, iname):
-        lower_bound_pw_aff = (self.domain
-                .intersect(self.assumptions)
+        lower_bound_pw_aff = (
+                (self.domain & self.assumptions)
                 .dim_min(self.iname_to_dim[iname][1])
                 .coalesce())
-        upper_bound_pw_aff = (self.domain
-                .intersect(self.assumptions)
+        upper_bound_pw_aff = (
+                (self.domain & self.assumptions)
                 .dim_max(self.iname_to_dim[iname][1])
                 .coalesce())