diff --git a/loopy/kernel/function_interface.py b/loopy/kernel/function_interface.py index fb80c5876a37110442d56234a2c20e9f78ab40b6..5066cff5c753089d78f3eb688f124a49ad2160c2 100644 --- a/loopy/kernel/function_interface.py +++ b/loopy/kernel/function_interface.py @@ -203,7 +203,7 @@ class InKernelCallable(ImmutableRecord): raise NotImplementedError() - def is_ready_for_code_gen(self): + def is_ready_for_codegen(self): return (self.arg_id_to_dtype is not None and self.arg_id_to_descr is not None) @@ -289,7 +289,7 @@ class CallableOnScalar(InKernelCallable): raise NotImplementedError() - def is_ready_for_code_gen(self): + def is_ready_for_codegen(self): return (self.arg_id_to_dtype is not None and self.arg_id_to_descr is not None and @@ -304,7 +304,7 @@ class CallableOnScalar(InKernelCallable): def emit_call(self, expression_to_code_mapper, expression, target): - assert self.is_ready_for_code_gen() + assert self.is_ready_for_codegen() # must have single assignee assert len(expression.parameters) == len(self.arg_id_to_dtype) - 1 @@ -339,7 +339,7 @@ class CallableOnScalar(InKernelCallable): # Currently doing pass by value for all the assignees. - assert self.is_ready_for_code_gen() + assert self.is_ready_for_codegen() from loopy.kernel.instruction import CallInstruction @@ -492,7 +492,7 @@ class CallableKernel(InKernelCallable): raise NotImplementedError() - def is_ready_for_code_gen(self): + def is_ready_for_codegen(self): return (self.arg_id_to_dtype is not None and self.arg_id_to_descr is not None and @@ -506,7 +506,7 @@ class CallableKernel(InKernelCallable): def emit_call_insn(self, insn, target, expression_to_code_mapper): - assert self.is_ready_for_code_gen() + assert self.is_ready_for_codegen() from loopy.kernel.instruction import CallInstruction from pymbolic.primitives import CallWithKwargs @@ -653,4 +653,6 @@ def register_pymbolic_calls_to_knl_callables(kernel, # }}} + + # vim: foldmethod=marker diff --git a/loopy/preprocess.py b/loopy/preprocess.py index 51389f4f56669e1cebaf7e6b04e5f3e8d9cde0ae..3f3c1c472a8936a70dcfdda651478f47d53bcb4b 100644 --- a/loopy/preprocess.py +++ b/loopy/preprocess.py @@ -2319,6 +2319,76 @@ def infer_arg_descr(kernel): # }}} +# {{{ final sweep over the callables to make them ready for codegen + +class ReadyForCodegen(CombineMapper): + def __init__(self, kernel): + self.kernel = kernel + + def combine(self, values): + return all(values) + + def map_call(self, expr, *args, **kwargs): + is_ready_for_codegen = self.kernel.scoped_functions[ + expr.function.name].is_ready_for_codegen() + return self.combine( + (is_ready_for_codegen,) + + tuple( + self.rec(child, *args, **kwargs) for child in expr.parameters) + ) + + def map_call_with_kwargs(self, expr, *args, **kwargs): + is_ready_for_codegen = self.kernel.scoped_functions[ + expr.function.name].is_ready_for_codegen() + return self.combine( + (is_ready_for_codegen,) + + tuple( + self.rec(child, *args, **kwargs) + for child in expr.parameters) + + tuple( + self.rec(child, *args, **kwargs) + for child in expr.kw_parameters.values()) + ) + + def map_constant(self, expr): + return True + + map_variable = map_constant + map_function_symbol = map_constant + + +def try_making_callable_ready_for_codegen(kernel): + from loopy.type_inference import TypeInferenceMapper + from loopy.symbolic import SubstitutionRuleExpander + from loopy.kernel.function_interface import ( + register_pymbolic_calls_to_knl_callables) + + ready_for_codegen = ReadyForCodegen(kernel) + subst_expander = SubstitutionRuleExpander(kernel.substitutions) + type_inf_mapper = TypeInferenceMapper(kernel) + + inferred_functions = {} + for insn in kernel.instructions: + if isinstance(insn, (MultiAssignmentBase, CallInstruction)): + expr = subst_expander(insn.expression) + if not ready_for_codegen(expr): + # only trying to specialize the functions which are not ready + # for codegen + type_inf_mapper(expr) + inferred_functions = {**inferred_functions, + **type_inf_mapper.specialized_functions} + + elif isinstance(insn, (_DataObliviousInstruction)): + pass + else: + NotImplementedError("Unknown Instruction") + + return register_pymbolic_calls_to_knl_callables(kernel, + inferred_functions) + +# }}} + + preprocess_cache = WriteOncePersistentDict( "loopy-preprocess-cache-v2-"+DATA_MODEL_VERSION, key_builder=LoopyKeyBuilder()) @@ -2399,6 +2469,9 @@ def preprocess_kernel(kernel, device=None): # call. kernel = infer_arg_descr(kernel) + # try specializing callables one last time. + kernel = try_making_callable_ready_for_codegen(kernel) + # Ordering restriction: # add_axes_to_temporaries_for_ilp because reduction accumulators # need to be duplicated by this.