diff --git a/examples/python/hello-loopy.py b/examples/python/hello-loopy.py index 82ff2e60dee345fe16771d09cb39d2d56e9f493d..7c5de5a1b1d7042498a12204959a59021ac5e0d8 100644 --- a/examples/python/hello-loopy.py +++ b/examples/python/hello-loopy.py @@ -26,5 +26,5 @@ knl = lp.split_iname(knl, "i", 128, outer_tag="g.0", inner_tag="l.0") evt, (out,) = knl(queue, a=a) # ENDEXAMPLE -cknl = lp.CompiledKernel(ctx, knl) -print(cknl.get_highlighted_code({"a": np.float32})) +knl = lp.add_and_infer_dtypes(knl, {"a": np.dtype(np.float32)}) +print(lp.generate_code_v2(knl).device_code()) diff --git a/loopy/preprocess.py b/loopy/preprocess.py index 17226b63addb9e2e30d556730aa326d2ed59128c..c331ccc8259645029866cad4a518cb8198428836 100644 --- a/loopy/preprocess.py +++ b/loopy/preprocess.py @@ -48,20 +48,22 @@ def prepare_for_caching(kernel): import loopy as lp new_args = [] + tgt = kernel.target + for arg in kernel.args: dtype = arg.dtype - if dtype is not None and dtype is not lp.auto: - dtype = dtype.with_target(kernel.target) + if dtype is not None and dtype is not lp.auto and dtype.target is not tgt: + arg = arg.copy(dtype=dtype.with_target(kernel.target)) - new_args.append(arg.copy(dtype=dtype)) + new_args.append(arg) new_temporary_variables = {} for name, temp in six.iteritems(kernel.temporary_variables): dtype = temp.dtype - if dtype is not None and dtype is not lp.auto: - dtype = dtype.with_target(kernel.target) + if dtype is not None and dtype is not lp.auto and dtype.target is not tgt: + temp = temp.copy(dtype=dtype.with_target(tgt)) - new_temporary_variables[name] = temp.copy(dtype=dtype) + new_temporary_variables[name] = temp kernel = kernel.copy( args=new_args, diff --git a/loopy/target/c/codegen/expression.py b/loopy/target/c/codegen/expression.py index 68cc32e56be077c7e45d11b9e2aade86b04494cc..8f924d3aee3b9f2982006fdb7b558cccac6785e3 100644 --- a/loopy/target/c/codegen/expression.py +++ b/loopy/target/c/codegen/expression.py @@ -167,6 +167,13 @@ class ExpressionToCExpressionMapper(IdentityMapper): def base_impl(expr, type_context): return self.rec(expr.aggregate, type_context)[self.rec(expr.index, 'i')] + def make_var(name): + from loopy import TaggedVariable + if isinstance(expr.aggregate, TaggedVariable): + return TaggedVariable(name, expr.aggregate.tag) + else: + return var(name) + from pymbolic.primitives import Variable if not isinstance(expr.aggregate, Variable): return base_impl(expr, type_context) @@ -224,16 +231,16 @@ class ExpressionToCExpressionMapper(IdentityMapper): (isinstance(ary, (ConstantArg, GlobalArg)) or (isinstance(ary, TemporaryVariable) and ary.base_storage))): # unsubscripted global args are pointers - result = var(access_info.array_name)[0] + result = make_var(access_info.array_name)[0] else: # unsubscripted temp vars are scalars # (unless they use base_storage) - result = var(access_info.array_name) + result = make_var(access_info.array_name) else: subscript, = access_info.subscripts - result = var(access_info.array_name)[self.rec(subscript, 'i')] + result = make_var(access_info.array_name)[self.rec(subscript, 'i')] if access_info.vector_index is not None: return self.codegen_state.ast_builder.add_vector_access( @@ -716,6 +723,8 @@ class CExpressionToCodeMapper(RecursiveMapper): def map_variable(self, expr, enclosing_prec): return expr.name + map_tagged_variable = map_variable + def map_lookup(self, expr, enclosing_prec): return self.parenthesize_if_needed( "%s.%s" % (