diff --git a/examples/python/hello-loopy.py b/examples/python/hello-loopy.py
index 82ff2e60dee345fe16771d09cb39d2d56e9f493d..7c5de5a1b1d7042498a12204959a59021ac5e0d8 100644
--- a/examples/python/hello-loopy.py
+++ b/examples/python/hello-loopy.py
@@ -26,5 +26,5 @@ knl = lp.split_iname(knl, "i", 128, outer_tag="g.0", inner_tag="l.0")
 evt, (out,) = knl(queue, a=a)
 # ENDEXAMPLE
 
-cknl = lp.CompiledKernel(ctx, knl)
-print(cknl.get_highlighted_code({"a": np.float32}))
+knl = lp.add_and_infer_dtypes(knl, {"a": np.dtype(np.float32)})
+print(lp.generate_code_v2(knl).device_code())
diff --git a/loopy/preprocess.py b/loopy/preprocess.py
index 17226b63addb9e2e30d556730aa326d2ed59128c..c331ccc8259645029866cad4a518cb8198428836 100644
--- a/loopy/preprocess.py
+++ b/loopy/preprocess.py
@@ -48,20 +48,22 @@ def prepare_for_caching(kernel):
     import loopy as lp
     new_args = []
 
+    tgt = kernel.target
+
     for arg in kernel.args:
         dtype = arg.dtype
-        if dtype is not None and dtype is not lp.auto:
-            dtype = dtype.with_target(kernel.target)
+        if dtype is not None and dtype is not lp.auto and dtype.target is not tgt:
+            arg = arg.copy(dtype=dtype.with_target(kernel.target))
 
-        new_args.append(arg.copy(dtype=dtype))
+        new_args.append(arg)
 
     new_temporary_variables = {}
     for name, temp in six.iteritems(kernel.temporary_variables):
         dtype = temp.dtype
-        if dtype is not None and dtype is not lp.auto:
-            dtype = dtype.with_target(kernel.target)
+        if dtype is not None and dtype is not lp.auto and dtype.target is not tgt:
+            temp = temp.copy(dtype=dtype.with_target(tgt))
 
-        new_temporary_variables[name] = temp.copy(dtype=dtype)
+        new_temporary_variables[name] = temp
 
     kernel = kernel.copy(
             args=new_args,
diff --git a/loopy/target/pyopencl_execution.py b/loopy/target/pyopencl_execution.py
index a8f47adb991e331f8a473c4eb14b1ea634c7a3b1..2da25ba39ceef38a4af105913973226bd3773729 100644
--- a/loopy/target/pyopencl_execution.py
+++ b/loopy/target/pyopencl_execution.py
@@ -25,6 +25,7 @@ THE SOFTWARE.
 import six
 from six.moves import range, zip
 
+import numpy as np
 from pytools import ImmutableRecord, memoize_method
 from loopy.diagnostic import ParameterFinderWarning
 from pytools.py_codegen import (
@@ -686,8 +687,17 @@ class PyOpenCLKernelExecutor(KernelExecutorBase):
     # {{{ debugging aids
 
     def get_code(self, arg_to_dtype=None):
+        def process_dtype(dtype):
+            if isinstance(dtype, type) and issubclass(dtype, np.generic):
+                dtype = np.dtype(dtype)
+            if isinstance(dtype, np.dtype):
+                dtype = NumpyType(dtype, self.kernel.target)
+
+            return dtype
+
         if arg_to_dtype is not None:
-            arg_to_dtype = frozenset(six.iteritems(arg_to_dtype))
+            arg_to_dtype = frozenset(
+                    (k, process_dtype(v)) for k, v in six.iteritems(arg_to_dtype))
 
         kernel = self.get_typed_and_scheduled_kernel(arg_to_dtype)