diff --git a/loopy/preprocess.py b/loopy/preprocess.py index de7f2b593db6d376338aea40171a99fd18778b1e..c1492789f511366dc5f94c859966db67525d123a 100644 --- a/loopy/preprocess.py +++ b/loopy/preprocess.py @@ -773,6 +773,21 @@ def _hackily_ensure_multi_assignment_return_values_are_scoped_private(kernel): FIRST_POINTER_ASSIGNEE_IDX = 1 # noqa + param_dtypes = tuple(type_inf_mapper(param) + for param in insn.expression.parameters) + + func_id = insn.expression.function + + from pymbolic.primitives import Variable + if isinstance(func_id, Variable): + func_id = func_id.name + + mangle_result = kernel.mangle_function(func_id, param_dtypes) + + if mangle_result.target_name == "loopy_make_tuple": + # Skip loopy_make_tuple. This is lowered without a function call. + continue + assignee_dtypes, = type_inf_mapper( insn.expression, return_tuple=True, return_dtype_set=True) @@ -806,7 +821,8 @@ def _hackily_ensure_multi_assignment_return_values_are_scoped_private(kernel): new_temporaries[new_assignee_name] = ( TemporaryVariable( name=new_assignee_name, - dtype=assignee_dtypes[assignee_nr], + dtype=assignee_dtypes[assignee_nr].with_target( + kernel.target), scope=temp_var_scope.PRIVATE)) from pymbolic import var @@ -985,12 +1001,18 @@ def realize_reduction(kernel, insn_id_filter=None, unknown_types_ok=True, var_name_gen(id + "_arg" + str(i)) for i in range(nresults)] - for name in temp_var_names: + from loopy.type_inference import infer_arg_and_reduction_dtypes_for_reduction_expression + + _, reduction_dtypes = ( + infer_arg_and_reduction_dtypes_for_reduction_expression( + temp_kernel, expr, unknown_types_ok=False)) + + for name, dtype in zip(temp_var_names, reduction_dtypes): from loopy.kernel.data import TemporaryVariable, temp_var_scope new_temporary_variables[name] = TemporaryVariable( name=name, shape=(), - dtype=lp.auto, + dtype=dtype, scope=temp_var_scope.PRIVATE) from pymbolic import var