diff --git a/examples/python/hello-loopy.py b/examples/python/hello-loopy.py index 82ff2e60dee345fe16771d09cb39d2d56e9f493d..7c5de5a1b1d7042498a12204959a59021ac5e0d8 100644 --- a/examples/python/hello-loopy.py +++ b/examples/python/hello-loopy.py @@ -26,5 +26,5 @@ knl = lp.split_iname(knl, "i", 128, outer_tag="g.0", inner_tag="l.0") evt, (out,) = knl(queue, a=a) # ENDEXAMPLE -cknl = lp.CompiledKernel(ctx, knl) -print(cknl.get_highlighted_code({"a": np.float32})) +knl = lp.add_and_infer_dtypes(knl, {"a": np.dtype(np.float32)}) +print(lp.generate_code_v2(knl).device_code()) diff --git a/loopy/preprocess.py b/loopy/preprocess.py index 17226b63addb9e2e30d556730aa326d2ed59128c..c331ccc8259645029866cad4a518cb8198428836 100644 --- a/loopy/preprocess.py +++ b/loopy/preprocess.py @@ -48,20 +48,22 @@ def prepare_for_caching(kernel): import loopy as lp new_args = [] + tgt = kernel.target + for arg in kernel.args: dtype = arg.dtype - if dtype is not None and dtype is not lp.auto: - dtype = dtype.with_target(kernel.target) + if dtype is not None and dtype is not lp.auto and dtype.target is not tgt: + arg = arg.copy(dtype=dtype.with_target(kernel.target)) - new_args.append(arg.copy(dtype=dtype)) + new_args.append(arg) new_temporary_variables = {} for name, temp in six.iteritems(kernel.temporary_variables): dtype = temp.dtype - if dtype is not None and dtype is not lp.auto: - dtype = dtype.with_target(kernel.target) + if dtype is not None and dtype is not lp.auto and dtype.target is not tgt: + temp = temp.copy(dtype=dtype.with_target(tgt)) - new_temporary_variables[name] = temp.copy(dtype=dtype) + new_temporary_variables[name] = temp kernel = kernel.copy( args=new_args,