diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index a6f81e5b6e19af6362b51aec26f277a8d83bb829..c11e507ee79cdc6f1567acbf6c12bbd7ed22f1cc 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -17,38 +17,36 @@ Python 2.7 POCL:
       junit: test/pytest.xml
 
 
-Python 2.7 with legacy PyOpenCL:
+Python 3 POCL:
   script:
-  - export PY_EXE=python2.7
+  - export PY_EXE=python3
   - export PYOPENCL_TEST=portable
   - export EXTRA_INSTALL="pybind11 numpy mako"
-  - export REQUIREMENTS_TXT="requirements-old-pyopencl.txt"
   - export LOOPY_NO_CACHE=1
-  - export NO_DOCTESTS=1
   - curl -L -O -k https://gitlab.tiker.net/inducer/ci-support/raw/master/build-and-test-py-project.sh
   - ". ./build-and-test-py-project.sh"
   tags:
-  - python2.7
+  - python3
   - pocl
   except:
   - tags
-  retry: 2
   artifacts:
     reports:
       junit: test/pytest.xml
 
-
-Python 3 POCL:
+Python 3 Intel:
   script:
   - export PY_EXE=python3
-  - export PYOPENCL_TEST=portable
+  - export PYOPENCL_TEST=intel
   - export EXTRA_INSTALL="pybind11 numpy mako"
   - export LOOPY_NO_CACHE=1
+  - export LOOPY_INTEL_CL_OK_FOR_TEST_REF=1
+  - source /opt/enable-intel-cl.sh
   - curl -L -O -k https://gitlab.tiker.net/inducer/ci-support/raw/master/build-and-test-py-project.sh
   - ". ./build-and-test-py-project.sh"
   tags:
   - python3
-  - pocl
+  - intel-cl-cpu
   except:
   - tags
   artifacts:
diff --git a/azure-pipelines.yml b/azure-pipelines.yml
index 6e0979a776b49556b69407b9d0d3717ca0d7761c..0dfb2455568b275b40e699683071da3a1cd2f483 100644
--- a/azure-pipelines.yml
+++ b/azure-pipelines.yml
@@ -97,9 +97,18 @@ jobs:
     -
         script: |
             set -e
-            CONDA_ENVIRONMENT=.test-conda-env-py3.yml
+            sed 's/python=3/python=3.7/' .test-conda-env-py3.yml > .test-conda-env.yml
+            CONDA_ENVIRONMENT=.test-conda-env.yml
             USE_CONDA_BUILD=1
             curl -L -O -k https://gitlab.tiker.net/inducer/ci-support/raw/master/prepare-and-run-pylint.sh
             . ./prepare-and-run-pylint.sh loopy test/test_*.py
 
         displayName: 'Pylint'
+
+schedules:
+-
+    cron: "0 0 * * 0"
+    displayName: Weekly build
+    branches:
+        include:
+        - master
diff --git a/loopy/__init__.py b/loopy/__init__.py
index d69a57bf1a5435adfb067df5cfb2080633cac765..b60de6e2dcd35c1c167bf5e303401f2c6242ebec 100644
--- a/loopy/__init__.py
+++ b/loopy/__init__.py
@@ -142,7 +142,7 @@ from loopy.frontend.fortran import (c_preprocess, parse_transformed_fortran,
         parse_fortran)
 
 from loopy.target import TargetBase, ASTBuilderBase
-from loopy.target.c import CTarget, ExecutableCTarget, generate_header
+from loopy.target.c import CFamilyTarget, CTarget, ExecutableCTarget, generate_header
 from loopy.target.cuda import CudaTarget
 from loopy.target.opencl import OpenCLTarget
 from loopy.target.pyopencl import PyOpenCLTarget
@@ -271,7 +271,7 @@ __all__ = [
         "LoopyError", "LoopyWarning",
 
         "TargetBase",
-        "CTarget", "ExecutableCTarget", "generate_header",
+        "CFamilyTarget", "CTarget", "ExecutableCTarget", "generate_header",
         "CudaTarget", "OpenCLTarget",
         "PyOpenCLTarget", "ISPCTarget",
         "NumbaTarget", "NumbaCudaTarget",
@@ -473,7 +473,8 @@ def make_copy_kernel(new_dim_tags, old_dim_tags=None):
                 )
     result = make_kernel(set_str,
             "output[%s] = input[%s]"
-            % (commad_indices, commad_indices))
+            % (commad_indices, commad_indices),
+            lang_version=MOST_RECENT_LANGUAGE_VERSION)
 
     result = tag_array_axes(result, "input", old_dim_tags)
     result = tag_array_axes(result, "output", new_dim_tags)
diff --git a/loopy/auto_test.py b/loopy/auto_test.py
index 66b3cb0bde00dee3ae46fcf63052219cc3664360..6837b99a026debf32b12aceef00ed3863c620639 100644
--- a/loopy/auto_test.py
+++ b/loopy/auto_test.py
@@ -23,9 +23,11 @@ THE SOFTWARE.
 """
 
 from six.moves import range, zip
-from pytools import Record
+import os
 from warnings import warn
 
+from pytools import Record
+
 import numpy as np
 
 import loopy as lp
@@ -321,7 +323,7 @@ def _default_check_result(result, ref_result):
 
 # {{{ find device for reference test
 
-def _enumerate_cl_devices_for_ref_test(blacklist_ref_vendors):
+def _enumerate_cl_devices_for_ref_test(blacklist_ref_vendors, need_image_support):
     import pyopencl as cl
 
     noncpu_devs = []
@@ -336,8 +338,17 @@ def _enumerate_cl_devices_for_ref_test(blacklist_ref_vendors):
                     for bl in blacklist_ref_vendors):
                 continue
 
+            if need_image_support:
+                if not dev.image_support:
+                    continue
+                if pf.vendor == "The pocl project":
+                    # Hahaha, no.
+                    continue
+
             if dev.type & cl.device_type.CPU:
-                if "Intel" in dev.platform.vendor:
+                if ("Intel" in dev.platform.vendor
+                        and os.environ.get("LOOPY_INTEL_CL_OK_FOR_TEST_REF")
+                        is None):
                     # Sorry, Intel, your CPU CL has gotten too crashy of late.
                     # (Feb 2016)
                     continue
@@ -427,7 +438,12 @@ def auto_test_vs_ref(
 
     ref_errors = []
 
-    for dev in _enumerate_cl_devices_for_ref_test(blacklist_ref_vendors):
+    from loopy.kernel.data import ImageArg
+    need_ref_image_support = any(isinstance(arg, ImageArg) for arg in ref_knl.args)
+
+    for dev in _enumerate_cl_devices_for_ref_test(
+            blacklist_ref_vendors, need_ref_image_support):
+
         ref_ctx = cl.Context([dev])
         ref_queue = cl.CommandQueue(ref_ctx,
                 properties=cl.command_queue_properties.PROFILING_ENABLE)
diff --git a/loopy/check.py b/loopy/check.py
index 81b384a38a18f92c76fda9695b8d340c6f2dcc17..cc87ad9872668bf5323aefd79944e3bbd71b1153 100644
--- a/loopy/check.py
+++ b/loopy/check.py
@@ -362,8 +362,9 @@ class _AccessCheckMapper(WalkMapper):
 
             if not access_range.is_subset(shape_domain):
                 raise LoopyError("'%s' in instruction '%s' "
-                        "accesses out-of-bounds array element"
-                        % (expr, self.insn_id))
+                        "accesses out-of-bounds array element (could not"
+                        " establish '%s' is a subset of '%s')."
+                        % (expr, self.insn_id, access_range, shape_domain))
 
 
 def check_bounds(kernel):
diff --git a/loopy/codegen/loop.py b/loopy/codegen/loop.py
index 58f055b7b5042ff28f7bf9674b0e7dc5ff1b6269..128e4fbc85a2a03e25da3f88b200e67eb41756d3 100644
--- a/loopy/codegen/loop.py
+++ b/loopy/codegen/loop.py
@@ -364,8 +364,7 @@ def generate_sequential_loop_dim_code(codegen_state, sched_index):
 
         # {{{ find bounds
 
-        aligned_domain = isl.align_spaces(domain, slab, across_dim_types=True,
-                obj_bigger_ok=True)
+        aligned_domain = isl.align_spaces(domain, slab, obj_bigger_ok=True)
 
         dom_and_slab = aligned_domain & slab
 
@@ -389,8 +388,7 @@ def generate_sequential_loop_dim_code(codegen_state, sched_index):
         impl_domain = isl.align_spaces(
             codegen_state.implemented_domain,
             dom_and_slab,
-            obj_bigger_ok=True,
-            across_dim_types=True
+            obj_bigger_ok=True
             ).params()
 
         lbound = (
diff --git a/loopy/kernel/__init__.py b/loopy/kernel/__init__.py
index 42d7c0f1e2062f84ee171c8c6274a0aa74601a4a..80a7ad03101bc67f39c89c6089aa6533d1886185 100644
--- a/loopy/kernel/__init__.py
+++ b/loopy/kernel/__init__.py
@@ -633,7 +633,7 @@ class LoopKernel(ImmutableRecordWithoutPickling):
                 result = dom
             else:
                 aligned_dom, aligned_result = isl.align_two(
-                        dom, result, across_dim_types=True)
+                        dom, result)
                 result = aligned_result & aligned_dom
 
         return result
diff --git a/loopy/kernel/array.py b/loopy/kernel/array.py
index 9cead8db8c9d3093ed3f179e1790d765fc4c07d2..3735b2d510aef1ada1643bce9ff07e797bfb210c 100644
--- a/loopy/kernel/array.py
+++ b/loopy/kernel/array.py
@@ -619,7 +619,7 @@ class ArrayBase(ImmutableRecord):
 
         If an integer N is given, the array would be declared
         with ``__attribute__((aligned(N)))`` in code generation for
-        :class:`loopy.CTarget`.
+        :class:`loopy.CFamilyTarget`.
 
         .. versionadded:: 2018.1
 
diff --git a/loopy/kernel/tools.py b/loopy/kernel/tools.py
index 2f32d94f662fe59d23ffb06dd13ade4a057304aa..157099df5a2133baa109f24e8216d63577b5dcb4 100644
--- a/loopy/kernel/tools.py
+++ b/loopy/kernel/tools.py
@@ -1868,7 +1868,7 @@ def infer_arg_is_output_only(kernel):
     from loopy.kernel.data import ArrayArg, ValueArg, ConstantArg, ImageArg
     new_args = []
     for arg in kernel.args:
-        if isinstance(arg, (ArrayArg, ImageArg, ValueArg)):
+        if isinstance(arg, ArrayArg):
             if arg.is_output_only is not None:
                 assert isinstance(arg.is_output_only, bool)
                 new_args.append(arg)
@@ -1877,7 +1877,7 @@ def infer_arg_is_output_only(kernel):
                     new_args.append(arg.copy(is_output_only=True))
                 else:
                     new_args.append(arg.copy(is_output_only=False))
-        elif isinstance(arg, ConstantArg):
+        elif isinstance(arg, (ConstantArg, ImageArg, ValueArg)):
             new_args.append(arg)
         else:
             raise NotImplementedError("Unkonwn argument type %s." % type(arg))
diff --git a/loopy/library/reduction.py b/loopy/library/reduction.py
index 2658b8cd743d335323dab7dd9aebd82ef5830652..53d05a28e7245e381be769af12d6066ffb486541 100644
--- a/loopy/library/reduction.py
+++ b/loopy/library/reduction.py
@@ -448,8 +448,8 @@ def parse_reduction_op(name):
 
 def reduction_function_mangler(kernel, func_id, arg_dtypes):
     if isinstance(func_id, ArgExtOp):
-        from loopy.target.opencl import CTarget
-        if not isinstance(kernel.target, CTarget):
+        from loopy.target.opencl import CFamilyTarget
+        if not isinstance(kernel.target, CFamilyTarget):
             raise LoopyError("%s: only C-like targets supported for now" % func_id)
 
         op = func_id.reduction_op
@@ -470,8 +470,8 @@ def reduction_function_mangler(kernel, func_id, arg_dtypes):
                 )
 
     elif isinstance(func_id, SegmentedOp):
-        from loopy.target.opencl import CTarget
-        if not isinstance(kernel.target, CTarget):
+        from loopy.target.opencl import CFamilyTarget
+        if not isinstance(kernel.target, CFamilyTarget):
             raise LoopyError("%s: only C-like targets supported for now" % func_id)
 
         op = func_id.reduction_op
diff --git a/loopy/preprocess.py b/loopy/preprocess.py
index 2afcd3db4331d57e1e61c48ba521ebaa296ddbb2..c0eb91ea60317ef8cad1c594571d46bba2d1a671 100644
--- a/loopy/preprocess.py
+++ b/loopy/preprocess.py
@@ -376,18 +376,15 @@ def _check_reduction_is_triangular(kernel, expr, scan_param):
 
     sweep_lower_bound = isl.align_spaces(
             scan_param.sweep_lower_bound,
-            affs[0],
-            across_dim_types=True)
+            affs[0])
 
     sweep_upper_bound = isl.align_spaces(
             scan_param.sweep_upper_bound,
-            affs[0],
-            across_dim_types=True)
+            affs[0])
 
     scan_lower_bound = isl.align_spaces(
             scan_param.scan_lower_bound,
-            affs[0],
-            across_dim_types=True)
+            affs[0])
 
     from itertools import product
 
@@ -799,7 +796,7 @@ def _hackily_ensure_multi_assignment_return_values_are_scoped_private(kernel):
                     TemporaryVariable(
                         name=new_assignee_name,
                         dtype=None,
-                        scope=AddressSpace.PRIVATE))
+                        address_space=AddressSpace.PRIVATE))
 
             from pymbolic import var
             new_assignee = var(new_assignee_name)
@@ -985,7 +982,7 @@ def realize_reduction(kernel, insn_id_filter=None, unknown_types_ok=True,
                     name=name,
                     shape=(),
                     dtype=None,
-                    scope=AddressSpace.PRIVATE)
+                    address_space=AddressSpace.PRIVATE)
 
         from pymbolic import var
         temp_vars = tuple(var(n) for n in temp_var_names)
diff --git a/loopy/symbolic.py b/loopy/symbolic.py
index 8f5337533fd9e96c77b56154c1848f5ac419b425..ccac5e199d2b53e202dd735ffd8dfe20a7dc29a2 100644
--- a/loopy/symbolic.py
+++ b/loopy/symbolic.py
@@ -247,7 +247,7 @@ class EqualityPreservingStringifyMapper(StringifyMapperBase):
     """
 
     def __init__(self):
-        super(EqualityPreservingStringifyMapper, self).__init__(constant_mapper=repr)
+        super(EqualityPreservingStringifyMapper, self).__init__()
 
     def map_constant(self, expr, enclosing_prec):
         if isinstance(expr, np.generic):
@@ -257,8 +257,15 @@ class EqualityPreservingStringifyMapper(StringifyMapperBase):
 
             return "%s(%s)" % (type(expr).__name__, repr(expr))
         else:
-            return super(EqualityPreservingStringifyMapper, self).map_constant(
-                    expr, enclosing_prec)
+            result = repr(expr)
+
+            from pymbolic.mapper.stringifier import PREC_SUM
+            if not (result.startswith("(") and result.endswith(")")) \
+                    and ("-" in result or "+" in result) \
+                    and (enclosing_prec > PREC_SUM):
+                return self.parenthesize(result)
+            else:
+                return result
 
 
 class UnidirectionalUnifier(UnidirectionalUnifierBase):
diff --git a/loopy/target/__init__.py b/loopy/target/__init__.py
index a81354e2fd7b52ba514af936441c7a2d980c77b5..73d2a6328af87cb51fb90d43efcde34d39aa8299 100644
--- a/loopy/target/__init__.py
+++ b/loopy/target/__init__.py
@@ -31,6 +31,7 @@ __doc__ = """
 .. autoclass:: TargetBase
 .. autoclass:: ASTBuilderBase
 
+.. autoclass:: CFamilyTarget
 .. autoclass:: CTarget
 .. autoclass:: ExecutableCTarget
 .. autoclass:: CudaTarget
diff --git a/loopy/target/c/__init__.py b/loopy/target/c/__init__.py
index 25b190809fdc38341c811ede15a8baae693a3116..6e3602eda11d5f65e8a6af2977966e946c72a718 100644
--- a/loopy/target/c/__init__.py
+++ b/loopy/target/c/__init__.py
@@ -77,6 +77,11 @@ class DTypeRegistryWrapper(object):
 
 # {{{ preamble generator
 
+def c99_preamble_generator(preamble_info):
+    if any(dtype.is_integral() for dtype in preamble_info.seen_dtypes):
+        yield("10_stdint", "#include <stdint.h>")
+
+
 def _preamble_generator(preamble_info):
     integer_type_names = ["int8", "int16", "int32", "int64"]
 
@@ -314,8 +319,10 @@ class CExpression(object):
 # }}}
 
 
-class CTarget(TargetBase):
-    """A target for plain "C", without any parallel extensions.
+class CFamilyTarget(TargetBase):
+    """A target for "least-common denominator C", without any parallel
+    extensions, and without use of any C99 specifics. Intended to be
+    usable as a common base for C99, C++, OpenCL, CUDA, and the like.
     """
 
     hash_fields = TargetBase.hash_fields + ("fortran_abi",)
@@ -323,7 +330,7 @@ class CTarget(TargetBase):
 
     def __init__(self, fortran_abi=False):
         self.fortran_abi = fortran_abi
-        super(CTarget, self).__init__()
+        super(CFamilyTarget, self).__init__()
 
     def split_kernel_at_global_barriers(self):
         return False
@@ -332,7 +339,7 @@ class CTarget(TargetBase):
         return DummyHostASTBuilder(self)
 
     def get_device_ast_builder(self):
-        return CASTBuilder(self)
+        return CFamilyASTBuilder(self)
 
     # {{{ types
 
@@ -368,29 +375,6 @@ class CTarget(TargetBase):
     # }}}
 
 
-# {{{ executable c target
-
-class ExecutableCTarget(CTarget):
-    """
-    An executable CTarget that uses (by default) JIT compilation of C-code
-    """
-
-    def __init__(self, compiler=None, fortran_abi=False):
-        super(ExecutableCTarget, self).__init__(fortran_abi=fortran_abi)
-        from loopy.target.c.c_execution import CCompiler
-        self.compiler = compiler or CCompiler()
-
-    def get_kernel_executor(self, knl, *args, **kwargs):
-        from loopy.target.c.c_execution import CKernelExecutor
-        return CKernelExecutor(knl, compiler=self.compiler)
-
-    def get_host_ast_builder(self):
-        # enable host code generation
-        return CASTBuilder(self)
-
-# }}}
-
-
 class _ConstRestrictPointer(Pointer):
     def get_decl_pair(self):
         sub_tp, sub_decl = self.subdecl.get_decl_pair()
@@ -484,24 +468,24 @@ def c_math_mangler(target, name, arg_dtypes, modify_name=True):
 # }}}
 
 
-class CASTBuilder(ASTBuilderBase):
+class CFamilyASTBuilder(ASTBuilderBase):
     # {{{ library
 
     def function_manglers(self):
         return (
-                super(CASTBuilder, self).function_manglers() + [
+                super(CFamilyASTBuilder, self).function_manglers() + [
                     c_math_mangler
                     ])
 
     def symbol_manglers(self):
         return (
-                super(CASTBuilder, self).symbol_manglers() + [
+                super(CFamilyASTBuilder, self).symbol_manglers() + [
                     c_symbol_mangler
                     ])
 
     def preamble_generators(self):
         return (
-                super(CASTBuilder, self).preamble_generators() + [
+                super(CFamilyASTBuilder, self).preamble_generators() + [
                     _preamble_generator,
                     ])
 
@@ -1064,7 +1048,7 @@ def generate_header(kernel, codegen_result=None):
         functions.
     """
 
-    if not isinstance(kernel.target, CTarget):
+    if not isinstance(kernel.target, CFamilyTarget):
         raise LoopyError(
                 'Header generation for non C-based languages are not implemented')
 
@@ -1080,4 +1064,57 @@ def generate_header(kernel, codegen_result=None):
 
 # }}}
 
+
+# {{{ C99 target
+
+class CTarget(CFamilyTarget):
+    """This target may emit code using all features of C99.
+    For a target base supporting "least-common-denominator" C,
+    see :class:`CFamilyTarget`.
+    """
+
+    def get_device_ast_builder(self):
+        return CASTBuilder(self)
+
+    @memoize_method
+    def get_dtype_registry(self):
+        from loopy.target.c.compyte.dtypes import (
+                DTypeRegistry, fill_registry_with_c99_stdint_types)
+        result = DTypeRegistry()
+        fill_registry_with_c99_stdint_types(result)
+        return DTypeRegistryWrapper(result)
+
+
+class CASTBuilder(CFamilyASTBuilder):
+    def preamble_generators(self):
+        return (
+                super(CASTBuilder, self).preamble_generators() + [
+                    c99_preamble_generator,
+                    ])
+
+# }}}
+
+
+# {{{ executable c target
+
+class ExecutableCTarget(CTarget):
+    """
+    An executable CFamilyTarget that uses (by default) JIT compilation of C-code
+    """
+
+    def __init__(self, compiler=None, fortran_abi=False):
+        super(ExecutableCTarget, self).__init__(fortran_abi=fortran_abi)
+        from loopy.target.c.c_execution import CCompiler
+        self.compiler = compiler or CCompiler()
+
+    def get_kernel_executor(self, knl, *args, **kwargs):
+        from loopy.target.c.c_execution import CKernelExecutor
+        return CKernelExecutor(knl, compiler=self.compiler)
+
+    def get_host_ast_builder(self):
+        # enable host code generation
+        return CFamilyASTBuilder(self)
+
+# }}}
+
 # vim: foldmethod=marker
diff --git a/loopy/target/c/c_execution.py b/loopy/target/c/c_execution.py
index 60947c7f77d09582868304ded121386bbb3aab68..698507978f7c20d6d594fd3e03626e7b12012a94 100644
--- a/loopy/target/c/c_execution.py
+++ b/loopy/target/c/c_execution.py
@@ -306,7 +306,8 @@ class IDIToCDLL(object):
     """
     def __init__(self, target):
         self.target = target
-        self.registry = target.get_dtype_registry().wrapped_registry
+        from loopy.target.c import CFamilyTarget
+        self.registry = CFamilyTarget().get_dtype_registry().wrapped_registry
 
     def __call__(self, knl, idi):
         # next loop through the implemented data info to get the arg data
diff --git a/loopy/target/c/compyte b/loopy/target/c/compyte
index 11dc00352423cddd71f09e809d0a22ab1c3ea7a5..25ee8b48fd0c7d9f0bd987c6862cdb1884fb1372 160000
--- a/loopy/target/c/compyte
+++ b/loopy/target/c/compyte
@@ -1 +1 @@
-Subproject commit 11dc00352423cddd71f09e809d0a22ab1c3ea7a5
+Subproject commit 25ee8b48fd0c7d9f0bd987c6862cdb1884fb1372
diff --git a/loopy/target/cuda.py b/loopy/target/cuda.py
index 8f14738c307ce7f2d98a47ef0dc086b4c69f7910..50fd1026f7bd15ce72915d0d5d5e60f6da4e264c 100644
--- a/loopy/target/cuda.py
+++ b/loopy/target/cuda.py
@@ -28,7 +28,7 @@ import numpy as np
 
 from pytools import memoize_method
 
-from loopy.target.c import CTarget, CASTBuilder
+from loopy.target.c import CFamilyTarget, CFamilyASTBuilder
 from loopy.target.c.codegen.expression import ExpressionToCExpressionMapper
 from loopy.diagnostic import LoopyError
 from loopy.types import NumpyType
@@ -169,7 +169,7 @@ class ExpressionToCudaCExpressionMapper(ExpressionToCExpressionMapper):
 
 # {{{ target
 
-class CudaTarget(CTarget):
+class CudaTarget(CFamilyTarget):
     """A target for Nvidia's CUDA GPU programming language."""
 
     def __init__(self, extern_c=True):
@@ -216,7 +216,7 @@ class CudaTarget(CTarget):
 
 # {{{ ast builder
 
-class CUDACASTBuilder(CASTBuilder):
+class CUDACASTBuilder(CFamilyASTBuilder):
     # {{{ library
 
     def function_manglers(self):
diff --git a/loopy/target/ispc.py b/loopy/target/ispc.py
index cccee2301e44b16e2454bda5e98af7db7893c003..eb0157bf86d478901fb5a07bbac28aa7a11bcec9 100644
--- a/loopy/target/ispc.py
+++ b/loopy/target/ispc.py
@@ -26,7 +26,7 @@ THE SOFTWARE.
 
 
 import numpy as np  # noqa
-from loopy.target.c import CTarget, CASTBuilder
+from loopy.target.c import CFamilyTarget, CFamilyASTBuilder
 from loopy.target.c.codegen.expression import ExpressionToCExpressionMapper
 from loopy.diagnostic import LoopyError
 from loopy.symbolic import Literal
@@ -154,7 +154,7 @@ def fill_registry_with_ispc_types(reg, respect_windows, include_bool=True):
 # }}}
 
 
-class ISPCTarget(CTarget):
+class ISPCTarget(CFamilyTarget):
     """A code generation target for Intel's `ISPC <https://ispc.github.io/>`_
     SPMD programming language, to target Intel's Knight's hardware and modern
     Intel CPUs with wide vector units.
@@ -200,7 +200,7 @@ class ISPCTarget(CTarget):
     # }}}
 
 
-class ISPCASTBuilder(CASTBuilder):
+class ISPCASTBuilder(CFamilyASTBuilder):
     def _arg_names_and_decls(self, codegen_state):
         implemented_data_info = codegen_state.implemented_data_info
         arg_names = [iai.name for iai in implemented_data_info]
diff --git a/loopy/target/opencl.py b/loopy/target/opencl.py
index 9bdfa8ded76b182ea1552680d65392d4bff219ec..4569be50367b3063999656bcd1de9d76f98e8c0a 100644
--- a/loopy/target/opencl.py
+++ b/loopy/target/opencl.py
@@ -26,7 +26,7 @@ THE SOFTWARE.
 
 import numpy as np
 
-from loopy.target.c import CTarget, CASTBuilder
+from loopy.target.c import CFamilyTarget, CFamilyASTBuilder
 from loopy.target.c.codegen.expression import ExpressionToCExpressionMapper
 from pytools import memoize_method
 from loopy.diagnostic import LoopyError
@@ -304,7 +304,7 @@ class ExpressionToOpenCLCExpressionMapper(ExpressionToCExpressionMapper):
 
 # {{{ target
 
-class OpenCLTarget(CTarget):
+class OpenCLTarget(CFamilyTarget):
     """A target for the OpenCL C heterogeneous compute programming language.
     """
 
@@ -362,7 +362,7 @@ class OpenCLTarget(CTarget):
 
 # {{{ ast builder
 
-class OpenCLCASTBuilder(CASTBuilder):
+class OpenCLCASTBuilder(CFamilyASTBuilder):
     # {{{ library
 
     def function_manglers(self):
diff --git a/loopy/transform/buffer.py b/loopy/transform/buffer.py
index 63d3a40fb6c6967cac5e6149d5cf51bb7c2efbb9..7f4779cc7c0af3fa228ca51a3f8d45944ec21bff 100644
--- a/loopy/transform/buffer.py
+++ b/loopy/transform/buffer.py
@@ -378,7 +378,7 @@ def buffer_array(kernel, var_name, buffer_inames, init_expression=None,
             dtype=var_descr.dtype,
             base_indices=(0,)*len(abm.non1_storage_shape),
             shape=tuple(abm.non1_storage_shape),
-            scope=temporary_scope)
+            address_space=temporary_scope)
 
     new_temporary_variables[buf_var_name] = temp_var
 
diff --git a/loopy/transform/data.py b/loopy/transform/data.py
index 7fbc595f27ea34f9056b36e3d6f0e168b182c24e..a6a2d7b4fe4ba94caa8cbe112a5cf90719ceb643 100644
--- a/loopy/transform/data.py
+++ b/loopy/transform/data.py
@@ -696,7 +696,7 @@ def set_temporary_scope(kernel, temp_var_names, scope):
         except KeyError:
             raise LoopyError("temporary '%s' not found" % tv_name)
 
-        new_temp_vars[tv_name] = tv.copy(scope=scope)
+        new_temp_vars[tv_name] = tv.copy(address_space=scope)
 
     return kernel.copy(temporary_variables=new_temp_vars)
 
diff --git a/loopy/transform/diff.py b/loopy/transform/diff.py
index d4dcb3701f4f23a5b1c66b1559bf6c4879425902..21e61075596bc2b795434716ba8a4347f5cfb173 100644
--- a/loopy/transform/diff.py
+++ b/loopy/transform/diff.py
@@ -37,7 +37,7 @@ from loopy.diagnostic import LoopyError
 
 # {{{ diff mapper
 
-def func_map(i, func, args):
+def func_map(i, func, args, allowed_nonsmoothness):
     if func.name == "exp":
         return var("exp")(*args)
     elif func.name == "log":
@@ -62,8 +62,17 @@ def func_map(i, func, args):
 
 
 class LoopyDiffMapper(DifferentiationMapper, RuleAwareIdentityMapper):
-    def __init__(self, rule_mapping_context, diff_context, diff_inames):
+    def __init__(self, rule_mapping_context, diff_context, diff_inames,
+            allowed_nonsmoothness=None):
         RuleAwareIdentityMapper.__init__(self, rule_mapping_context)
+        DifferentiationMapper.__init__(
+                self,
+
+                # This is actually ignored because we
+                # override map_variable below.
+                variable=None,
+
+                allowed_nonsmoothness=None)
         self.diff_context = diff_context
         self.diff_inames = diff_inames
         self.diff_iname_exprs = tuple(var(diname) for diname in diff_inames)
diff --git a/loopy/transform/fusion.py b/loopy/transform/fusion.py
index 49e30a7516cbbf00a07aace34831eb857a877432..70ad2406aabdd63ee21c448aac1091999247925e 100644
--- a/loopy/transform/fusion.py
+++ b/loopy/transform/fusion.py
@@ -209,9 +209,9 @@ def _fuse_two_kernels(knla, knlb):
     knlb = _apply_renames_in_exprs(knlb, b_var_renames)
 
     from pymbolic.imperative.transform import \
-            fuse_instruction_streams_with_unique_ids
+            fuse_statement_streams_with_unique_ids
     new_instructions, old_b_id_to_new_b_id = \
-            fuse_instruction_streams_with_unique_ids(
+            fuse_statement_streams_with_unique_ids(
                     knla.instructions, knlb.instructions)
 
     # {{{ fuse assumptions
diff --git a/loopy/transform/precompute.py b/loopy/transform/precompute.py
index f2b184a4119485e53d7dee14b1a322be45a0bfe3..9f426f76bc6902fd09bd7685c73f187df935be1e 100644
--- a/loopy/transform/precompute.py
+++ b/loopy/transform/precompute.py
@@ -956,13 +956,9 @@ def precompute(kernel, subst_use, sweep_inames=[], within=None,
     # {{{ set up temp variable
 
     import loopy as lp
-    if dtype is None:
-        dtype = lp.auto
-    else:
+    if dtype is not None:
         dtype = np.dtype(dtype)
 
-    import loopy as lp
-
     if temporary_address_space is None:
         temporary_address_space = lp.auto
 
diff --git a/loopy/transform/save.py b/loopy/transform/save.py
index cca62bc522bb110ec4aeb190b538e5b6e8583abf..baa558a72861f31c5ce707329ea84786b96eb6d2 100644
--- a/loopy/transform/save.py
+++ b/loopy/transform/save.py
@@ -228,7 +228,7 @@ class TemporarySaver(object):
             return TemporaryVariable(
                 name=self.name,
                 dtype=temporary.dtype,
-                scope=AddressSpace.GLOBAL,
+                address_space=AddressSpace.GLOBAL,
                 shape=self.new_shape)
 
         @property
diff --git a/loopy/version.py b/loopy/version.py
index 66c7a0cf46f3b40a8cc0af345d55c40e4e7d7f4f..29abbc2de889b884de93e5fe39a1d996811c93c9 100644
--- a/loopy/version.py
+++ b/loopy/version.py
@@ -29,7 +29,7 @@ if os.environ.get("AKPYTHON_EXEC_IMPORT_UNAVAILABLE") is not None:
     _git_rev = None
 
 else:
-    import loopy._git_rev as _git_rev_mod
+    import loopy._git_rev as _git_rev_mod  # pylint: disable=no-name-in-module,import-error  # noqa: E501
     _git_rev = _git_rev_mod.GIT_REVISION
 
     # If we're running from a dev tree, the last install (and hence the most
diff --git a/requirements-old-pyopencl.txt b/requirements-old-pyopencl.txt
deleted file mode 100644
index 1f1f16075cd9178209a0e601af4177eb3480a517..0000000000000000000000000000000000000000
--- a/requirements-old-pyopencl.txt
+++ /dev/null
@@ -1,11 +0,0 @@
-git+https://github.com/inducer/pytools.git
-git+https://github.com/inducer/islpy.git
-git+https://github.com/inducer/cgen.git
-git+https://github.com/inducer/pyopencl.git@deprecated-boost-python
-git+https://github.com/inducer/pymbolic.git
-git+https://github.com/inducer/genpy.git
-
-hg+https://bitbucket.org/inducer/f2py
-
-# Optional, needed for using the C preprocessor on Fortran
-ply>=3.6
diff --git a/setup.py b/setup.py
index c579bccf1aef27680dc23fa8291390b68923fcb6..75d8b340e8ad98794a244f7e5da89e079870bd2b 100644
--- a/setup.py
+++ b/setup.py
@@ -90,7 +90,7 @@ setup(name="loo.py",
 
       install_requires=[
           "pytools>=2018.4",
-          "pymbolic>=2019.1",
+          "pymbolic>=2019.2",
           "genpy>=2016.1.2",
           "cgen>=2016.1",
           "islpy>=2019.1",
diff --git a/test/test_apps.py b/test/test_apps.py
index e7f4004fa0f2285920bdf9a0848c0d400e2c31b7..e07262dbdda8ad3c24522f7d0eb4dba8422bf0ce 100644
--- a/test/test_apps.py
+++ b/test/test_apps.py
@@ -619,7 +619,8 @@ def test_poisson_fem(ctx_factory):
 
 def test_domain_tree_nesting():
     # From https://github.com/inducer/loopy/issues/78
-    from loopy.kernel.data import temp_var_scope as scopes
+
+    AS = lp.AddressSpace        # noqa
 
     out_map = np.array([1, 2], dtype=np.int32)
     if_val = np.array([-1, 0], dtype=np.int32)
@@ -651,12 +652,13 @@ def test_domain_tree_nesting():
     end
     """,
     [
-        TV('out_map', initializer=out_map, read_only=True, scope=scopes.PRIVATE),
-        TV('if_val', initializer=if_val, read_only=True, scope=scopes.PRIVATE),
-        TV('vals', initializer=vals, read_only=True, scope=scopes.PRIVATE),
-        TV('num_vals', initializer=num_vals, read_only=True, scope=scopes.PRIVATE),
+        TV('out_map', initializer=out_map, read_only=True, address_space=AS.PRIVATE),
+        TV('if_val', initializer=if_val, read_only=True, address_space=AS.PRIVATE),
+        TV('vals', initializer=vals, read_only=True, address_space=AS.PRIVATE),
+        TV('num_vals', initializer=num_vals, read_only=True,
+           address_space=AS.PRIVATE),
         TV('num_vals_offset', initializer=num_vals_offset, read_only=True,
-            scope=scopes.PRIVATE),
+           address_space=AS.PRIVATE),
         lp.GlobalArg('B', shape=(100, 31), dtype=np.float64),
         lp.GlobalArg('out', shape=(100, 12), dtype=np.float64)])
 
diff --git a/test/test_c_execution.py b/test/test_c_execution.py
index bf168c11d838248947a2806123053e63c13ccbeb..b0ca7ade25d3077c7f868f366cb9ff6bb011af33 100644
--- a/test/test_c_execution.py
+++ b/test/test_c_execution.py
@@ -275,7 +275,7 @@ def test_c_execution_with_global_temporaries():
     # global constant temporaries is None
 
     from loopy.target.c import ExecutableCTarget
-    from loopy.kernel.data import temp_var_scope as scopes
+    AS = lp.AddressSpace        # noqa
     n = 10
 
     knl = lp.make_kernel('{[i]: 0 <= i < n}',
@@ -287,7 +287,7 @@ def test_c_execution_with_global_temporaries():
                               initializer=np.arange(n, dtype=np.int32),
                               dtype=np.int32,
                               read_only=True,
-                              scope=scopes.GLOBAL)],
+                              address_space=AS.GLOBAL)],
         target=ExecutableCTarget())
 
     knl = lp.fix_parameters(knl, n=n)
diff --git a/test/test_expression.py b/test/test_expression.py
index 752d9ab2da3e2865891da8eedb4d5295b8b35826..41a8de656efcfc44fe404fa4722572d36c974409 100644
--- a/test/test_expression.py
+++ b/test/test_expression.py
@@ -411,9 +411,13 @@ def test_indexof_vec(ctx_factory):
     ctx = ctx_factory()
     queue = cl.CommandQueue(ctx)
 
-    if ctx.devices[0].platform.name.startswith("Portable"):
-        # Accurate as of 2015-10-08
-        pytest.skip("POCL miscompiles vector code")
+    if (
+            # Accurate as of 2015-10-08
+            ctx.devices[0].platform.name.startswith("Portable")
+            or
+            # Accurate as of 2019-11-04
+            ctx.devices[0].platform.name.startswith("Intel")):
+        pytest.skip("target ICD miscompiles vector code")
 
     knl = lp.make_kernel(
          ''' { [i,j,k]: 0<=i,j,k<4 } ''',
diff --git a/test/test_linalg.py b/test/test_linalg.py
index fec6cd5e7f3ffbb823f36d2de2b17ff9190273d6..f075d3493195ec3364c4de0d26f92c4a987e7187 100644
--- a/test/test_linalg.py
+++ b/test/test_linalg.py
@@ -27,7 +27,8 @@ import pytest
 import sys
 import numpy as np
 import pyopencl as cl
-import pyopencl.array as cl_array
+import pyopencl.array as cl_array  # noqa: F401
+import pyopencl.cltypes as cltypes
 import loopy as lp
 
 import logging
@@ -71,15 +72,14 @@ def test_axpy(ctx_factory):
 
     n = 3145182
 
-    vec = cl_array.vec
-
     if ctx.devices[0].platform.vendor.startswith("Advanced Micro"):
         pytest.skip("crashes on AMD 15.12")
 
     for dtype, check, a, b in [
             (np.complex64, None, 5, 7),
-            (vec.float4, check_float4,
-                vec.make_float4(1, 2, 3, 4), vec.make_float4(6, 7, 8, 9)),
+            (cltypes.float4, check_float4,  # pylint:disable=no-member
+                cltypes.make_float4(1, 2, 3, 4),  # pylint:disable=no-member
+                cltypes.make_float4(6, 7, 8, 9)),  # pylint:disable=no-member
             (np.float32, None, 5, 7),
             ]:
         knl = lp.make_kernel(
@@ -163,7 +163,7 @@ def test_plain_matrix_mul(ctx_factory):
     n = get_suitable_size(ctx)
 
     for dtype, check, vec_size in [
-            (cl_array.vec.float4, check_float4, 4),
+            (cltypes.float4, check_float4, 4),  # pylint:disable=no-member
             (np.float32, None, 1),
             ]:
         knl = lp.make_kernel(
diff --git a/test/test_loopy.py b/test/test_loopy.py
index 36dc35688472fea444a46e03cf076ddde5cf18a4..203ebb3922d3cc7f41b56abc31202b8974b88117 100644
--- a/test/test_loopy.py
+++ b/test/test_loopy.py
@@ -107,9 +107,7 @@ def test_complicated_subst(ctx_factory):
         assert substs_with_letter == how_many
 
 
-def test_type_inference_no_artificial_doubles(ctx_factory):
-    ctx = ctx_factory()
-
+def test_type_inference_no_artificial_doubles():
     knl = lp.make_kernel(
             "{[i]: 0<=i<n}",
             """
@@ -124,7 +122,7 @@ def test_type_inference_no_artificial_doubles(ctx_factory):
                 ],
             assumptions="n>=1")
 
-    knl = lp.preprocess_kernel(knl, ctx.devices[0])
+    knl = lp.preprocess_kernel(knl)
     for k in lp.generate_loop_schedules(knl):
         code = lp.generate_code(k)
         assert "double" not in code
@@ -186,7 +184,7 @@ def test_simple_side_effect(ctx_factory):
             [lp.GlobalArg("a", np.float32, shape=(100,))]
             )
 
-    knl = lp.preprocess_kernel(knl, ctx.devices[0])
+    knl = lp.preprocess_kernel(knl)
     kernel_gen = lp.generate_loop_schedules(knl)
 
     for gen_knl in kernel_gen:
@@ -208,7 +206,7 @@ def test_owed_barriers(ctx_factory):
 
     knl = lp.tag_inames(knl, dict(i="l.0"))
 
-    knl = lp.preprocess_kernel(knl, ctx.devices[0])
+    knl = lp.preprocess_kernel(knl)
     kernel_gen = lp.generate_loop_schedules(knl)
 
     for gen_knl in kernel_gen:
@@ -229,7 +227,7 @@ def test_wg_too_small(ctx_factory):
 
     knl = lp.tag_inames(knl, dict(i="l.0"))
 
-    knl = lp.preprocess_kernel(knl, ctx.devices[0])
+    knl = lp.preprocess_kernel(knl)
     kernel_gen = lp.generate_loop_schedules(knl)
 
     import pytest
@@ -252,7 +250,7 @@ def test_multi_cse(ctx_factory):
     knl = lp.split_iname(knl, "i", 16, inner_tag="l.0")
     knl = lp.add_prefetch(knl, "a", [])
 
-    knl = lp.preprocess_kernel(knl, ctx.devices[0])
+    knl = lp.preprocess_kernel(knl)
     kernel_gen = lp.generate_loop_schedules(knl)
 
     for gen_knl in kernel_gen:
@@ -278,9 +276,8 @@ def test_bare_data_dependency(ctx_factory):
                 lp.ValueArg("n", np.int32),
                 ])
 
-    cknl = lp.CompiledKernel(ctx, knl)
     n = 20000
-    evt, (a,) = cknl(queue, n=n, out_host=True)
+    evt, (a,) = knl(queue, n=n, out_host=True)
 
     assert a.shape == (n,)
     assert (a == 1).all()
@@ -288,10 +285,7 @@ def test_bare_data_dependency(ctx_factory):
 
 # {{{ test race detection
 
-@pytest.mark.skipif("sys.version_info < (2,6)")
-def test_ilp_write_race_detection_global(ctx_factory):
-    ctx = ctx_factory()
-
+def test_ilp_write_race_detection_global():
     knl = lp.make_kernel(
             "[n] -> {[i,j]: 0<=i,j<n }",
             [
@@ -305,7 +299,7 @@ def test_ilp_write_race_detection_global(ctx_factory):
 
     knl = lp.tag_inames(knl, dict(j="ilp"))
 
-    knl = lp.preprocess_kernel(knl, ctx.devices[0])
+    knl = lp.preprocess_kernel(knl)
 
     with lp.CacheMode(False):
         from loopy.diagnostic import WriteRaceConditionWarning
@@ -317,9 +311,7 @@ def test_ilp_write_race_detection_global(ctx_factory):
                     for w in warn_list)
 
 
-def test_ilp_write_race_avoidance_local(ctx_factory):
-    ctx = ctx_factory()
-
+def test_ilp_write_race_avoidance_local():
     knl = lp.make_kernel(
             "{[i,j]: 0<=i<16 and 0<=j<17 }",
             [
@@ -329,14 +321,12 @@ def test_ilp_write_race_avoidance_local(ctx_factory):
 
     knl = lp.tag_inames(knl, dict(i="l.0", j="ilp"))
 
-    knl = lp.preprocess_kernel(knl, ctx.devices[0])
+    knl = lp.preprocess_kernel(knl)
     for k in lp.generate_loop_schedules(knl):
         assert k.temporary_variables["a"].shape == (16, 17)
 
 
-def test_ilp_write_race_avoidance_private(ctx_factory):
-    ctx = ctx_factory()
-
+def test_ilp_write_race_avoidance_private():
     knl = lp.make_kernel(
             "{[j]: 0<=j<16 }",
             [
@@ -346,7 +336,7 @@ def test_ilp_write_race_avoidance_private(ctx_factory):
 
     knl = lp.tag_inames(knl, dict(j="ilp"))
 
-    knl = lp.preprocess_kernel(knl, ctx.devices[0])
+    knl = lp.preprocess_kernel(knl)
     for k in lp.generate_loop_schedules(knl):
         assert k.temporary_variables["a"].shape == (16,)
 
@@ -494,9 +484,7 @@ def test_offsets_and_slicing(ctx_factory):
             assumptions="n>=1 and m>=1",
             default_offset=lp.auto)
 
-    knl = lp.tag_data_axes(knl, "a,b", "stride:auto,stride:1")
-
-    cknl = lp.CompiledKernel(ctx, knl)
+    knl = lp.tag_array_axes(knl, "a,b", "stride:auto,stride:1")
 
     a_full = cl.clrandom.rand(queue, (n, n), np.float64)
     a_full_h = a_full.get()
@@ -511,8 +499,10 @@ def test_offsets_and_slicing(ctx_factory):
 
     b_full_h[b_sub] = 2*a_full_h[a_sub]
 
-    print(cknl.get_highlighted_code({"a": a.dtype}))
-    cknl(queue, a=a, b=b)
+    #print(cknl.get_highlighted_code({"a": a.dtype}))
+    knl = lp.set_options(knl, write_cl=True)
+
+    knl(queue, a=a, b=b)
 
     import numpy.linalg as la
     assert la.norm(b_full.get() - b_full_h) < 1e-13
@@ -657,7 +647,7 @@ def test_vector_types(ctx_factory, vec_len):
 
     ref_knl = knl
 
-    knl = lp.tag_data_axes(knl, "out", "c,vec")
+    knl = lp.tag_array_axes(knl, "out", "c,vec")
     knl = lp.tag_inames(knl, dict(j="unr"))
 
     knl = lp.split_iname(knl, "i", 128, outer_tag="g.0", inner_tag="l.0")
@@ -947,7 +937,7 @@ def test_atomic_load(ctx_factory, dtype):
                 lp.GlobalArg("a", dtype, shape=lp.auto),
                 lp.GlobalArg("b", dtype, shape=lp.auto),
                 lp.TemporaryVariable('temp', dtype, for_atomic=True,
-                                     scope=AddressSpace.LOCAL),
+                                     address_space=AddressSpace.LOCAL),
                 "..."
                 ],
             silenced_warnings=["write_race(init)", "write_race(temp_sum)"])
@@ -1030,7 +1020,7 @@ def test_literal_local_barrier(ctx_factory):
 
 
 def test_local_barrier_mem_kind():
-    def __test_type(mtype, expected):
+    def _test_type(mtype, expected):
         insn = '... lbarrier'
         if mtype:
             insn += '{mem_kind=%s}' % mtype
@@ -1046,9 +1036,9 @@ def test_local_barrier_mem_kind():
         cgr = lp.generate_code_v2(knl)
         assert 'barrier(%s)' % expected in cgr.device_code()
 
-    __test_type('', 'CLK_LOCAL_MEM_FENCE')
-    __test_type('global', 'CLK_GLOBAL_MEM_FENCE')
-    __test_type('local', 'CLK_LOCAL_MEM_FENCE')
+    _test_type('', 'CLK_LOCAL_MEM_FENCE')
+    _test_type('global', 'CLK_GLOBAL_MEM_FENCE')
+    _test_type('local', 'CLK_LOCAL_MEM_FENCE')
 
 
 def test_kernel_splitting(ctx_factory):
@@ -1862,13 +1852,13 @@ def test_temp_initializer(ctx_factory, src_order, tmp_order):
                 lp.TemporaryVariable("tmp",
                     initializer=a,
                     shape=lp.auto,
-                    scope=lp.AddressSpace.PRIVATE,
+                    address_space=lp.AddressSpace.PRIVATE,
                     read_only=True,
                     order=tmp_order),
                 "..."
                 ])
 
-    knl = lp.set_options(knl, write_cl=True, highlight_cl=True)
+    knl = lp.set_options(knl, write_cl=True)
     knl = lp.fix_parameters(knl, n=a.shape[0])
 
     evt, (a2,) = knl(queue, out_host=True)
@@ -1887,7 +1877,7 @@ def test_const_temp_with_initializer_not_saved():
             lp.TemporaryVariable("tmp",
                 initializer=np.arange(10),
                 shape=lp.auto,
-                scope=lp.AddressSpace.PRIVATE,
+                address_space=lp.AddressSpace.PRIVATE,
                 read_only=True),
             "..."
             ],
@@ -2036,6 +2026,13 @@ def test_tight_loop_bounds(ctx_factory):
     ctx = ctx_factory()
     queue = cl.CommandQueue(ctx)
 
+    if (queue.device.platform.vendor == "Intel(R) Corporation"
+            and queue.device.driver_version in [
+                "2019.8.7.0",
+                "2019.8.8.0",
+                ]):
+        pytest.skip("Intel CL miscompiles this kernel")
+
     knl = lp.make_kernel(
         ["{ [i] : 0 <= i <= 5 }",
          "[i] -> { [j] : 2 * i - 2 < j <= 2 * i and 0 <= j <= 9 }"],
@@ -2050,6 +2047,8 @@ def test_tight_loop_bounds(ctx_factory):
 
     knl = lp.split_iname(knl, "i", 5, inner_tag="l.0", outer_tag="g.0")
 
+    knl = lp.set_options(knl, write_cl=True)
+
     evt, (out,) = knl(queue, out_host=True)
 
     assert (out == np.arange(10)).all()
@@ -2110,7 +2109,7 @@ def test_integer_reduction(ctx_factory):
         var_int = np.random.randint(1000, size=n).astype(vtype)
         var_lp = lp.TemporaryVariable('var', initializer=var_int,
                                    read_only=True,
-                                   scope=lp.AddressSpace.PRIVATE,
+                                   address_space=lp.AddressSpace.PRIVATE,
                                    dtype=to_loopy_type(vtype),
                                    shape=lp.auto)
 
@@ -2169,11 +2168,11 @@ def test_complicated_argmin_reduction(ctx_factory):
                                     and qbx_forced_limit * center_side[ictr] > 0)
                             )
 
-                    <> post_dist_sq = if(matches, dist_sq, HUGE)
+                    <> post_dist_sq = dist_sq if matches else HUGE
                 end
                 <> min_dist_sq, <> min_ictr = argmin(ictr, ictr, post_dist_sq)
 
-                tgt_to_qbx_center[itgt] = if(min_dist_sq < HUGE, min_ictr, -1)
+                tgt_to_qbx_center[itgt] = min_ictr if min_dist_sq < HUGE else -1
             end
             """)
 
@@ -2301,7 +2300,7 @@ def test_barrier_in_overridden_get_grid_size_expanded_kernel():
               end
                    """,
                    [lp.TemporaryVariable("a", np.float32, shape=(10,), order='C',
-                                         scope=lp.AddressSpace.LOCAL),
+                                         address_space=lp.AddressSpace.LOCAL),
                     lp.GlobalArg("b", np.float32, shape=(11,), order='C')],
                seq_dependencies=True)
 
@@ -2553,7 +2552,7 @@ def test_preamble_with_separate_temporaries(ctx_factory):
     [lp.GlobalArg('out', shape=('n',)),
      lp.TemporaryVariable(
         'offsets', shape=(offsets.size,), initializer=offsets,
-        scope=lp.AddressSpace.GLOBAL,
+        address_space=lp.AddressSpace.GLOBAL,
         read_only=True),
      lp.GlobalArg('data', shape=(data.size,), dtype=np.float64)],
     )
@@ -2686,8 +2685,8 @@ def test_no_barriers_for_nonoverlapping_access(second_index, expect_barrier):
             a[%s] = 13  {id=second,dep=first}
             """ % second_index,
             [
-                lp.TemporaryVariable("a", lp.auto, shape=(256,),
-                    scope=lp.AddressSpace.LOCAL),
+                lp.TemporaryVariable("a", dtype=None, shape=(256,),
+                    address_space=lp.AddressSpace.LOCAL),
                 ])
 
     knl = lp.tag_inames(knl, "i:l.0")
@@ -2705,7 +2704,7 @@ def test_half_complex_conditional(ctx_factory):
     knl = lp.make_kernel(
             "{[i]: 0 <= i < 10}",
             """
-           tmp[i] = if(i < 5, 0, 0j)
+           tmp[i] = 0 if i < 5 else 0j
            """)
 
     knl(queue)
@@ -2769,6 +2768,15 @@ def test_backwards_dep_printing_and_error():
 def test_dump_binary(ctx_factory):
     ctx = ctx_factory()
 
+    device = ctx.devices[0]
+
+    if (device.platform.vendor == "Intel(R) Corporation"
+            and device.driver_version in [
+                "2019.8.7.0",
+                "2019.8.8.0",
+                ]):
+        pytest.skip("Intel CL doesn't implement Kernel.program")
+
     knl = lp.make_kernel(
             "{ [i]: 0<=i<n }",
             """
diff --git a/test/test_reduction.py b/test/test_reduction.py
index ef229d5cd08554d6656d23d83bc0c6b66ee77b9f..4ce06345499480e521f7dc12a6620271d7b99522 100644
--- a/test/test_reduction.py
+++ b/test/test_reduction.py
@@ -240,7 +240,7 @@ def test_global_parallel_reduction(ctx_factory, size):
     knl = reduction_arg_to_subst_rule(knl, "i_outer")
 
     knl = lp.precompute(knl, "red_i_outer_arg", "i_outer",
-            temporary_scope=lp.temp_var_scope.GLOBAL,
+            temporary_address_space=lp.AddressSpace.GLOBAL,
             default_tag="l.auto")
     knl = lp.realize_reduction(knl)
     knl = lp.tag_inames(knl, "i_outer_0:g.0")
@@ -285,7 +285,7 @@ def test_global_mc_parallel_reduction(ctx_factory, size):
     from loopy.transform.data import reduction_arg_to_subst_rule
     knl = reduction_arg_to_subst_rule(knl, "i_outer")
     knl = lp.precompute(knl, "red_i_outer_arg", "i_outer",
-            temporary_scope=lp.temp_var_scope.GLOBAL,
+            temporary_address_space=lp.AddressSpace.GLOBAL,
             default_tag="l.auto")
     knl = lp.realize_reduction(knl)
     knl = lp.add_dependency(
diff --git a/test/test_transform.py b/test/test_transform.py
index 3ee67b703964d1f7773b10a9199687d78b883a60..cdc0c14b8bacc4fe5279d000461c0ea2244af021 100644
--- a/test/test_transform.py
+++ b/test/test_transform.py
@@ -135,7 +135,7 @@ def test_to_batched_temp(ctx_factory):
              "cnst",
              dtype=np.float32,
              shape=(),
-             scope=lp.temp_var_scope.PRIVATE), '...'])
+             address_space=lp.AddressSpace.PRIVATE), '...'])
     knl = lp.add_and_infer_dtypes(knl, dict(out=np.float32,
                                             x=np.float32,
                                             a=np.float32))
diff --git a/test/testlib.py b/test/testlib.py
index ad290ee7c60297aadd4a6baa0814b8976403cb53..67c5ba04fefde9a7516f22bce679744ce61a4f20 100644
--- a/test/testlib.py
+++ b/test/testlib.py
@@ -80,7 +80,6 @@ class SeparateTemporariesPreambleTestMangler(
 class SeparateTemporariesPreambleTestPreambleGenerator(
         SeparateTemporariesPreambleTestDataHolder):
     def __call__(self, preamble_info):
-        from loopy.kernel.data import temp_var_scope as scopes
 
         # find a function matching our name
         func_match = next(
@@ -96,7 +95,7 @@ class SeparateTemporariesPreambleTestPreambleGenerator(
                 var = lp.TemporaryVariable(
                     'lookup', initializer=self.arr, dtype=self.arr.dtype,
                     shape=self.arr.shape,
-                    scope=scopes.GLOBAL, read_only=True)
+                    address_space=lp.AddressSpace.GLOBAL, read_only=True)
                 # and code
                 code = """
         int {name}(int start, int end, int match)