diff --git a/doc/tutorial.rst b/doc/tutorial.rst
index 29fab76e7af1d0a0dfa548a056a36273cf553b38..8e7f46722007d47e05e1d59f460a7af8d047398d 100644
--- a/doc/tutorial.rst
+++ b/doc/tutorial.rst
@@ -53,6 +53,13 @@ And some data on the host:
 
 .. }}}
 
+We'll also disable console syntax highlighting because it confuses
+doctest::
+
+    >>> # not a documented interface
+    >>> import loopy.options
+    >>> loopy.options.ALLOW_TERMINAL_COLORS = False
+
 Getting started
 ---------------
 
@@ -797,17 +804,19 @@ enabling some cost savings:
         a[4 * i_outer + 3] = 0.0f;
       }
       /* final slab for 'i_outer' */
-      int const i_outer = -1 + n + -1 * (3 * n / 4);
-    <BLANKLINE>
-      if (-1 + n >= 0)
       {
-        a[4 * i_outer] = 0.0f;
-        if (-2 + -4 * i_outer + n >= 0)
-          a[4 * i_outer + 1] = 0.0f;
-        if (-3 + -4 * i_outer + n >= 0)
-          a[4 * i_outer + 2] = 0.0f;
-        if (4 + 4 * i_outer + -1 * n == 0)
-          a[4 * i_outer + 3] = 0.0f;
+        int const i_outer = -1 + n + -1 * (3 * n / 4);
+    <BLANKLINE>
+        if (-1 + n >= 0)
+        {
+          a[4 * i_outer] = 0.0f;
+          if (-2 + -4 * i_outer + n >= 0)
+            a[4 * i_outer + 1] = 0.0f;
+          if (-3 + -4 * i_outer + n >= 0)
+            a[4 * i_outer + 2] = 0.0f;
+          if (4 + 4 * i_outer + -1 * n == 0)
+            a[4 * i_outer + 3] = 0.0f;
+        }
       }
     ...
 
@@ -1525,16 +1534,18 @@ Now to make things more interesting, we'll create a kernel with barriers:
     {
       __local int c[50 * 10 * 99];
     <BLANKLINE>
-      int const k_outer = 0;
+      {
+        int const k_outer = 0;
     <BLANKLINE>
-      for (int j = 0; j <= 9; ++j)
-        for (int i = 0; i <= 49; ++i)
-        {
-          barrier(CLK_LOCAL_MEM_FENCE) /* for c (insn rev-depends on insn_0) */;
-          c[990 * i + 99 * j + lid(0) + 1] = 2 * a[980 * i + 98 * j + lid(0) + 1];
-          barrier(CLK_LOCAL_MEM_FENCE) /* for c (insn_0 depends on insn) */;
-          e[980 * i + 98 * j + lid(0) + 1] = c[990 * i + 99 * j + 1 + lid(0) + 1] + c[990 * i + 99 * j + -1 + lid(0) + 1];
-        }
+        for (int j = 0; j <= 9; ++j)
+          for (int i = 0; i <= 49; ++i)
+          {
+            barrier(CLK_LOCAL_MEM_FENCE) /* for c (insn rev-depends on insn_0) */;
+            c[990 * i + 99 * j + lid(0) + 1] = 2 * a[980 * i + 98 * j + lid(0) + 1];
+            barrier(CLK_LOCAL_MEM_FENCE) /* for c (insn_0 depends on insn) */;
+            e[980 * i + 98 * j + lid(0) + 1] = c[990 * i + 99 * j + 1 + lid(0) + 1] + c[990 * i + 99 * j + -1 + lid(0) + 1];
+          }
+      }
     }
 
 
diff --git a/loopy/codegen/loop.py b/loopy/codegen/loop.py
index 3d3095d535e67161ec833549cb4b1aa0dedd1eef..8ac963835ec12702f2010806d1d49062422318a2 100644
--- a/loopy/codegen/loop.py
+++ b/loopy/codegen/loop.py
@@ -439,7 +439,7 @@ def generate_sequential_loop_dim_code(codegen_state, sched_index):
 
         if (ubound - lbound).plain_is_equal(zero):
             # single-trip, generate just a variable assignment, not a loop
-            result.append(merge_codegen_results(codegen_state, [
+            inner = merge_codegen_results(codegen_state, [
                 astb.emit_initializer(
                     codegen_state,
                     kernel.index_dtype, loop_iname,
@@ -447,7 +447,12 @@ def generate_sequential_loop_dim_code(codegen_state, sched_index):
                     is_const=True),
                 astb.emit_blank_line(),
                 inner,
-                ]))
+                ])
+            result.append(
+                    inner.with_new_ast(
+                        codegen_state,
+                        astb.ast_block_scope_class(
+                            inner.current_ast(codegen_state))))
 
         else:
             inner_ast = inner.current_ast(codegen_state)
diff --git a/loopy/codegen/result.py b/loopy/codegen/result.py
index 04fab05afdc38a8843a566e0e6e6b10098d6415c..c683d120dbf6a1205618f8835e8f9c72dd13adf7 100644
--- a/loopy/codegen/result.py
+++ b/loopy/codegen/result.py
@@ -207,6 +207,7 @@ def merge_codegen_results(codegen_state, elements, collapse=True):
     codegen_result = None
 
     block_cls = codegen_state.ast_builder.ast_block_class
+    block_scope_cls = codegen_state.ast_builder.ast_block_scope_class
 
     for el in elements:
         if isinstance(el, CodeGenerationResult):
@@ -227,7 +228,8 @@ def merge_codegen_results(codegen_state, elements, collapse=True):
                         dev_program_names.add(dp.name)
 
             cur_ast = el.current_ast(codegen_state)
-            if isinstance(cur_ast, block_cls):
+            if (isinstance(cur_ast, block_cls)
+                    and not isinstance(cur_ast, block_scope_cls)):
                 ast_els.extend(cur_ast.contents)
             else:
                 ast_els.append(cur_ast)
diff --git a/loopy/options.py b/loopy/options.py
index a19afdaca8a4d14bcdf21397d1ea2d7fdd5a1a82..c88c512cb332ceec4587fd3c5011b9f729cad7d5 100644
--- a/loopy/options.py
+++ b/loopy/options.py
@@ -28,6 +28,9 @@ from pytools import Record
 import re
 
 
+ALLOW_TERMINAL_COLORS = False
+
+
 class _ColoramaStub(object):
     def __getattribute__(self, name):
         return ""
@@ -38,10 +41,18 @@ def _apply_legacy_map(lmap, kwargs):
 
     for name, val in six.iteritems(kwargs):
         try:
-            new_name, translator = lmap[name]
+            lmap_value = lmap[name]
         except KeyError:
             new_name = name
         else:
+            if lmap_value is None:
+                # ignore this
+                from warnings import warn
+                warn("option '%s' is deprecated and was ignored" % name,
+                        DeprecationWarning)
+                continue
+
+            new_name, translator = lmap_value
             if name in result:
                 raise TypeError("may not pass a value for both '%s' and '%s'"
                         % (name, new_name))
@@ -113,19 +124,11 @@ class Options(Record):
         Accepts a file name as a value. Writes to
         ``sys.stdout`` if none is given.
 
-    .. attribute:: disable_wrapper_highlight
-
-        Use syntax highlighting in :attr:`write_wrapper`.
-
     .. attribute:: write_code
 
         Print the generated code.  Accepts a file name or a boolean as a value.
         Writes to ``sys.stdout`` if set to *True*.
 
-    .. attribute:: disable_code_highlight
-
-        Use syntax highlighting in :attr:`write_code`.
-
     .. attribute:: edit_code
 
         Invoke an editor (given by the environment variable
@@ -150,8 +153,10 @@ class Options(Record):
     _legacy_options_map = {
             "cl_build_options": ("build_options", None),
             "write_cl": ("write_code", None),
-            "highlight_cl": ("disable_code_highlight", lambda val: not val),
-            "highlight_wrapper": ("disable_wrapper_highlight", lambda val: not val),
+            "highlight_cl": None,
+            "highlight_wrapper": None,
+            "disable_wrapper_highlight": None,
+            "disable_code_highlight": None,
             "edit_cl": ("edit_code", None),
             }
 
@@ -173,6 +178,9 @@ class Options(Record):
         else:
             allow_terminal_colors_def = True
 
+        allow_terminal_colors_def = (
+                ALLOW_TERMINAL_COLORS and allow_terminal_colors_def)
+
         Record.__init__(
                 self,
 
@@ -185,9 +193,7 @@ class Options(Record):
                 no_numpy=kwargs.get("no_numpy", False),
                 return_dict=kwargs.get("return_dict", False),
                 write_wrapper=kwargs.get("write_wrapper", False),
-                highlight_wrapper=kwargs.get("highlight_wrapper", False),
                 write_code=kwargs.get("write_code", False),
-                disable_code_highlight=kwargs.get("disable_code_highlight", False),
                 edit_code=kwargs.get("edit_code", False),
                 build_options=kwargs.get("build_options", []),
                 allow_terminal_colors=kwargs.get("allow_terminal_colors",
@@ -208,7 +214,11 @@ class Options(Record):
 
     @property
     def highlight_cl(self):
-        return not self.disable_code_highlight
+        return self.allow_terminal_colors
+
+    @property
+    def highlight_wrapper(self):
+        return self.allow_terminal_colors
 
     @property
     def write_cl(self):
diff --git a/loopy/target/__init__.py b/loopy/target/__init__.py
index 409b9badb639c500e70404e781036b2e39bf333f..5d5743bae322fc59c989cafd85122c8ca619c422 100644
--- a/loopy/target/__init__.py
+++ b/loopy/target/__init__.py
@@ -216,6 +216,9 @@ class ASTBuilderBase(object):
     def emit_initializer(self, codegen_state, dtype, name, val_str, is_const):
         raise NotImplementedError()
 
+    def emit_declaration_scope(self, codegen_state, inner):
+        raise NotImplementedError()
+
     def emit_blank_line(self):
         raise NotImplementedError()
 
@@ -267,6 +270,10 @@ class DummyHostASTBuilder(ASTBuilderBase):
     def ast_block_class(self):
         return _DummyASTBlock
 
+    @property
+    def ast_block_scope_class(self):
+        return _DummyASTBlock
+
     def emit_assignment(self, codegen_state, insn):
         return None
 
diff --git a/loopy/target/c/__init__.py b/loopy/target/c/__init__.py
index 8b81efb312d996a2cd972a416ddf7d190e580d13..be83ec90c4720f10876e1a5e47a43c429fc40aeb 100644
--- a/loopy/target/c/__init__.py
+++ b/loopy/target/c/__init__.py
@@ -29,7 +29,7 @@ import six
 import numpy as np  # noqa
 from loopy.target import TargetBase, ASTBuilderBase, DummyHostASTBuilder
 from loopy.diagnostic import LoopyError
-from cgen import Pointer, NestedDeclarator
+from cgen import Pointer, NestedDeclarator, Block
 from cgen.mapper import IdentityMapper as CASTIdentityMapperBase
 from pymbolic.mapper.stringifier import PREC_NONE
 from loopy.symbolic import IdentityMapper
@@ -133,6 +133,12 @@ class POD(Declarator):
     mapper_method = "map_loopy_pod"
 
 
+class ScopingBlock(Block):
+    """A block that is mandatory for scoping and may not be simplified away
+    by :func:`loopy.codegen.results.merge_codegen_results`.
+    """
+
+
 class FunctionDeclarationWrapper(NestedDeclarator):
     mapper_method = "map_function_decl_wrapper"
 
@@ -513,6 +519,10 @@ class CASTBuilder(ASTBuilderBase):
         from cgen import Block
         return Block
 
+    @property
+    def ast_block_scope_class(self):
+        return ScopingBlock
+
     # }}}
 
     # {{{ code generation guts
diff --git a/loopy/target/python.py b/loopy/target/python.py
index a348cba83008794ee3d02a61ff90f6d81d1a9322..09a86665b7d949d7bf35b910cd2a6fd66109c1ec 100644
--- a/loopy/target/python.py
+++ b/loopy/target/python.py
@@ -33,6 +33,7 @@ from loopy.type_inference import TypeInferenceMapper
 from loopy.kernel.data import ValueArg
 from loopy.diagnostic import LoopyError  # noqa
 from loopy.target import ASTBuilderBase
+from genpy import Suite
 
 
 # {{{ expression to code
@@ -145,6 +146,17 @@ class ExpressionToPythonMapper(StringifyMapper):
 # }}}
 
 
+# {{{ genpy extensions
+
+class Collection(Suite):
+    def generate(self):
+        for item in self.contents:
+            for item_line in item.generate():
+                yield item_line
+
+# }}}
+
+
 # {{{ ast builder
 
 def _numpy_single_arg_function_mangler(kernel, name, arg_dtypes):
@@ -232,9 +244,15 @@ class PythonASTBuilderBase(ASTBuilderBase):
 
     @property
     def ast_block_class(self):
-        from genpy import Suite
         return Suite
 
+    @property
+    def ast_block_scope_class(self):
+        # Once a new version of genpy is released, switch to this:
+        # from genpy import Collection
+        # and delete the implementation above.
+        return Collection
+
     def emit_sequential_loop(self, codegen_state, iname, iname_dtype,
             lbound, ubound, inner):
         ecm = codegen_state.expression_to_code_mapper