diff --git a/loopy/__init__.py b/loopy/__init__.py index 89683e0b466714700f18b090ec365d5861ea4d05..4fa8c5fc5a4dbef134eae0d237961fe495ca681d 100644 --- a/loopy/__init__.py +++ b/loopy/__init__.py @@ -116,6 +116,8 @@ from loopy.transform.batch import to_batched from loopy.transform.parameter import assume, fix_parameters from loopy.transform.save import save_and_reload_temporaries from loopy.transform.add_barrier import add_barrier +from loopy.transform.register_knl import register_callable_kernel + # }}} from loopy.type_inference import infer_unknown_types @@ -222,6 +224,8 @@ __all__ = [ "add_barrier", + "register_callable_kernel", + # }}} "get_dot_dependency_graph", diff --git a/loopy/kernel/__init__.py b/loopy/kernel/__init__.py index d716f0b785f83a84d78475f71ecc76ec23c4c683..25737786cdf2fb4fdda115a22c5e644bfabbebe6 100644 --- a/loopy/kernel/__init__.py +++ b/loopy/kernel/__init__.py @@ -1339,7 +1339,6 @@ class LoopKernel(ImmutableRecordWithoutPickling): "temporary_variables", "iname_to_tag", "substitutions", - "scoped_functions", "iname_slab_increments", "loop_priority", "silenced_warnings", @@ -1362,6 +1361,7 @@ class LoopKernel(ImmutableRecordWithoutPickling): "preamble_generators", "function_manglers", "symbol_manglers", + "scoped_functions", ) def update_persistent_hash(self, key_hash, key_builder): diff --git a/loopy/kernel/data.py b/loopy/kernel/data.py index c90e8a64b6f47a87e87c5e64d2ef930232d34894..59297e4752f944f751111e8c4ece2f2141afbc03 100644 --- a/loopy/kernel/data.py +++ b/loopy/kernel/data.py @@ -607,6 +607,13 @@ class SubstitutionRule(ImmutableRecord): # {{{ function call mangling class CallMangleInfo(ImmutableRecord): + def __init__(self): + raise NotImplementedError("New Mangler interface expected") + + +# FIXME: Uncomment it once everything is done. +# KK: Removed it for the duration the new mangler interface starts working. +''' """ .. attribute:: target_name @@ -631,6 +638,7 @@ class CallMangleInfo(ImmutableRecord): target_name=target_name, result_dtypes=result_dtypes, arg_dtypes=arg_dtypes) +''' # }}} diff --git a/loopy/kernel/function_interface.py b/loopy/kernel/function_interface.py index 7127d142b9443062553d92b8f1c5eba1182e7b22..bb88cc0916de1264ede05360554dfc1be1e7dbf0 100644 --- a/loopy/kernel/function_interface.py +++ b/loopy/kernel/function_interface.py @@ -54,6 +54,13 @@ class ArrayArgDescriptor(ArgDescriptor): shape=None, mem_scope=None, dim_tags=None): + + # {{{ sanity checks + + assert isinstance(shape, tuple) + + # }}} + super(ArgDescriptor, self).__init__(shape=None, mem_scope=mem_scope, dim_tags=dim_tags) @@ -299,11 +306,11 @@ class InKernelCallable(ImmutableRecord): raise NotImplementedError("InKernelCallable.with_types() for" " %s target" % target) - # }}} + if new_arg_id_to_dtype is not None: + # got our speciliazed function + return self.copy(arg_id_to_dtype=new_arg_id_to_dtype) - if new_arg_id_to_dtype is not None: - # got our speciliazed function - return self.copy(arg_id_to_dtype=new_arg_id_to_dtype) + # }}} if self.subkernel is None: # did not find a scalar function and function prototype does not @@ -326,7 +333,7 @@ class InKernelCallable(ImmutableRecord): new_args.append(arg.copy( dtype=arg_id_to_dtype[kw_to_pos[kw]])) else: - if kw in self.subkernel.read_variables(): + if kw in self.subkernel.get_read_variables(): # need to know the type of the input arguments for type # inference raise LoopyError("Type of %s variable not supplied to the" @@ -395,7 +402,7 @@ class InKernelCallable(ImmutableRecord): # in the array call. # Collecting the parameters - new_args = self.args.copy() + new_args = self.subkernel.args.copy() kw_to_pos, pos_to_kw = get_kw_pos_association(self.subkernel) for id, descr in arg_id_to_descr.items(): @@ -441,20 +448,59 @@ class InKernelCallable(ImmutableRecord): def get_target_specific_name(self, target): + if self.subkernel is None: + raise NotImplementedError() + else: + return self.subkernel.name + raise NotImplementedError() - def emit_call(self, target): - # two varieties of this call, when obtained in between a function and - # when obtained as a separate instruction statement. + def emit_call(self, insn, target, expression_to_code_mapper): - raise NotImplementedError() + from loopy.kernel.instruction import CallInstruction + from pymbolic.primitives import CallWithKwargs + + assert isinstance(insn, CallInstruction) + + parameters = insn.expression.parameters + kw_parameters = {} + if isinstance(insn.expression, CallWithKwargs): + kw_parameters = insn.expression.kw_parameters + + assignees = insn.assignees + + parameters = list(parameters) + par_dtypes = [self.arg_id_to_dtype[i] for i, _ in enumerate(parameters)] + kw_to_pos, pos_to_kw = get_kw_pos_association(self.subkernel) + for i in range(len(parameters), len(parameters)+len(kw_parameters)): + parameters.append(kw_parameters[pos_to_kw[i]]) + par_dtypes.append(self.arg_id_to_dtype[pos_to_kw[i]]) + + # TODO: currently no suppport for insn keywords. + parameters = parameters + list(assignees) + par_dtypes = par_dtypes + [self.arg_id_to_dtype[-i-1] for i, _ in + enumerate(assignees)] + + # Note that we are not going to do any type casting in array calls. + from loopy.expression import dtype_to_type_context + from pymbolic.mapper.stringifier import PREC_NONE + c_parameters = [ + expression_to_code_mapper(par, PREC_NONE, + dtype_to_type_context(target, par_dtype), + par_dtype).expr + for par, par_dtype in zip( + parameters, par_dtypes)] + + from pymbolic import var + return var(self.get_target_specific_name(target))(*c_parameters) # }}} def __eq__(self, other): return (self.name == other.name and self.arg_id_to_descr == other.arg_id_to_descr - and self.arg_id_to_dtype == other.arg_id_to_dtype) + and self.arg_id_to_dtype == other.arg_id_to_dtype + and self.subkernel == other.subkernel) def __hash__(self): return hash((self.name, self.subkernel)) @@ -640,6 +686,13 @@ def register_pymbolic_calls_to_knl_callables(kernel, unique_name = next_indexed_name(unique_name) # book-keeping of the functions and names mappings for later use + if in_knl_callable.subkernel is not None: + # changing the name of the subkenrel so that it emits a function + # with the name same as the name being used in the + # scoped_function. + new_subkernel = in_knl_callable.subkernel.copy( + name=unique_name) + in_knl_callable = in_knl_callable.copy(subkernel=new_subkernel) scoped_names_to_functions[unique_name] = in_knl_callable scoped_functions_to_names[in_knl_callable] = unique_name diff --git a/loopy/preprocess.py b/loopy/preprocess.py index 01eeb513046be661646d440d7f3a5e7d691ae1b6..068953a52709f9cf869a88dad425168fa6c67cb2 100644 --- a/loopy/preprocess.py +++ b/loopy/preprocess.py @@ -2135,7 +2135,7 @@ def get_arg_description_from_sub_array_ref(sub_array, kernel): """ from loopy.kernel.function_interface import ArrayArgDescriptor - name = sub_array.subscript.attribute.name + name = sub_array.subscript.aggregate.name if name in kernel.temporary_variables: mem_scope = "LOCAL" @@ -2161,8 +2161,8 @@ class ArgDescriptionInferer(CombineMapper): arguments. """ - def __init__(self, scoped_functions): - self.scoped_functions = scoped_functions + def __init__(self, kernel): + self.kernel = kernel def combine(self, values): import operator @@ -2173,7 +2173,8 @@ class ArgDescriptionInferer(CombineMapper): from loopy.symbolic import SubArrayRef # descriptors for the args - arg_id_to_descr = dict((i, get_arg_description_from_sub_array_ref(par)) + arg_id_to_descr = dict((i, + get_arg_description_from_sub_array_ref(par, self.kernel)) if isinstance(par, SubArrayRef) else (i, ValueArgDescriptor()) for i, par in enumerate(expr.parameters)) @@ -2187,7 +2188,8 @@ class ArgDescriptionInferer(CombineMapper): for i, par in enumerate(assignees): if isinstance(par, SubArrayRef): assignee_id_to_descr[-i-1] = ( - get_arg_description_from_sub_array_ref(par)) + get_arg_description_from_sub_array_ref(par, + self.kernel)) else: assignee_id_to_descr[-i-1] = ValueArgDescriptor() @@ -2196,20 +2198,21 @@ class ArgDescriptionInferer(CombineMapper): # specializing the function according to the parameter description new_scoped_function = ( - self.scoped_functions[expr.function.name].with_descrs( + self.kernel.scoped_functions[expr.function.name].with_descrs( combined_arg_id_to_dtype)) # collecting the descriptors for args, kwargs, assignees - return ( - frozenset(((expr, new_scoped_function), )) | - self.combine((self.rec(child) for child in expr.parameters))) + a = frozenset(((expr, new_scoped_function), )) + b = self.combine((self.rec(child) for child in expr.parameters)) + return (a | b) def map_call_with_kwargs(self, expr, **kwargs): from loopy.kernel.function_intergace import ValueArgDescriptor from loopy.symbolic import SubArrayRef # descriptors for the args and kwargs: - arg_id_to_descr = dict((i, get_arg_description_from_sub_array_ref(par)) + arg_id_to_descr = dict((i, get_arg_description_from_sub_array_ref(par, + self.kernel)) if isinstance(par, SubArrayRef) else ValueArgDescriptor() for i, par in enumerate(expr.parameters) + expr.kw_parameters.items()) @@ -2223,7 +2226,8 @@ class ArgDescriptionInferer(CombineMapper): for i, par in enumerate(assignees): if isinstance(par, SubArrayRef): assignee_id_to_descr[-i-1] = ( - get_arg_description_from_sub_array_ref(par)) + get_arg_description_from_sub_array_ref(par, + self.kernel)) else: assignee_id_to_descr[-i-1] = ValueArgDescriptor() @@ -2232,7 +2236,7 @@ class ArgDescriptionInferer(CombineMapper): # specializing the function according to the parameter description new_scoped_function = ( - self.scoped_functions[expr.function.name].with_descr( + self.kernel.scoped_functions[expr.function.name].with_descr( combined_arg_id_to_descr)) # collecting the descriptors for args, kwargs, assignees @@ -2252,7 +2256,7 @@ def infer_arg_descr(kernel): shape and dimensions of the arguments too. """ - arg_description_modifier = ArgDescriptionInferer(kernel.scoped_functions) + arg_description_modifier = ArgDescriptionInferer(kernel) pymbolic_calls_to_functions = set() for insn in kernel.instructions: @@ -2264,8 +2268,7 @@ def infer_arg_descr(kernel): arg_description_modifier(insn.expression, assignees=insn.assignees)) if isinstance(insn, (MultiAssignmentBase, CInstruction)): - a = arg_description_modifier(insn.expression) - pymbolic_calls_to_functions.update(a) + pymbolic_calls_to_functions.update(arg_description_modifier(insn.expression)) elif isinstance(insn, _DataObliviousInstruction): pass else: @@ -2392,9 +2395,10 @@ def preprocess_kernel(kernel, device=None): print(75*'-') print('Linked Functions:') for name, func in kernel.scoped_functions.items(): - print(name, "=>", func) + print(name, "=>", (func.name, func.arg_id_to_dtype, + func.arg_id_to_descr, func.subkernel.args)) + print() print(75*'-') - 1/0 kernel = kernel.target.preprocess(kernel) diff --git a/loopy/symbolic.py b/loopy/symbolic.py index 8abda0f2a641a4ab53d4cce05ba4d3ff4e2da6ef..bdfe57982ac3a457c87ce69886f48ec144841c73 100644 --- a/loopy/symbolic.py +++ b/loopy/symbolic.py @@ -189,6 +189,9 @@ class CombineMapper(CombineMapperBase): def map_reduction(self, expr): return self.rec(expr.expr) + def map_sub_array_ref(self, expr): + return self.rec(expr.get_begin_subscript()) + map_linear_subscript = CombineMapperBase.map_subscript map_scoped_function = CombineMapperBase.map_variable @@ -738,7 +741,7 @@ class SubArrayRef(p.Expression): sub_dim_tags.append(DimTag(dim_tag.stride)) sub_shape.append(axis_length) - return sub_dim_tags, sub_shape + return sub_dim_tags, tuple(sub_shape) def __getinitargs__(self): return (self.swept_inames, self.subscript) diff --git a/loopy/target/c/__init__.py b/loopy/target/c/__init__.py index 2b5e394bbcc566510c27b069506b67f60d5cd911..28c346dcc7e0ef718bc729214587853c835dd0e6 100644 --- a/loopy/target/c/__init__.py +++ b/loopy/target/c/__init__.py @@ -822,6 +822,10 @@ class CASTBuilder(ASTBuilderBase): lhs_expr, rhs_expr, lhs_dtype): raise NotImplementedError("atomic updates in %s" % type(self).__name__) + # FIXME: With the new mangler interface this should not be present, + # Commenting this part so that this does not get used anywhere in the + # meantime + ''' def emit_tuple_assignment(self, codegen_state, insn): ecm = codegen_state.expression_to_code_mapper @@ -844,84 +848,23 @@ class CASTBuilder(ASTBuilderBase): assignments.append(Assign(lhs_code, rhs_code)) return block_if_necessary(assignments) + ''' def emit_multiple_assignment(self, codegen_state, insn): ecm = codegen_state.expression_to_code_mapper - from pymbolic.primitives import Variable - from pymbolic.mapper.stringifier import PREC_NONE - - func_id = insn.expression.function - parameters = insn.expression.parameters - - if isinstance(func_id, Variable): - func_id = func_id.name - - assignee_var_descriptors = [ - codegen_state.kernel.get_var_descriptor(a) - for a in insn.assignee_var_names()] - - par_dtypes = tuple(ecm.infer_type(par) for par in parameters) - - mangle_result = codegen_state.kernel.mangle_function(func_id, par_dtypes) - if mangle_result is None: - raise RuntimeError("function '%s' unknown--" - "maybe you need to register a function mangler?" - % func_id) - - assert mangle_result.arg_dtypes is not None - - if mangle_result.target_name == "loopy_make_tuple": - # This shorcut avoids actually having to emit a 'make_tuple' function. - return self.emit_tuple_assignment(codegen_state, insn) - - from loopy.expression import dtype_to_type_context - c_parameters = [ - ecm(par, PREC_NONE, - dtype_to_type_context(self.target, tgt_dtype), - tgt_dtype).expr - for par, par_dtype, tgt_dtype in zip( - parameters, par_dtypes, mangle_result.arg_dtypes)] - - from loopy.codegen import SeenFunction - codegen_state.seen_functions.add( - SeenFunction(func_id, - mangle_result.target_name, - mangle_result.arg_dtypes)) - - from pymbolic import var - for i, (a, tgt_dtype) in enumerate( - zip(insn.assignees[1:], mangle_result.result_dtypes[1:])): - if tgt_dtype != ecm.infer_type(a): - raise LoopyError("type mismatch in %d'th (1-based) left-hand " - "side of instruction '%s'" % (i+1, insn.id)) - c_parameters.append( - # TODO Yuck: The "where-at function": &(...) - var("&")( - ecm(a, PREC_NONE, - dtype_to_type_context(self.target, tgt_dtype), - tgt_dtype).expr)) - - from pymbolic import var - result = var(mangle_result.target_name)(*c_parameters) - - # In case of no assignees, we are done - if len(mangle_result.result_dtypes) == 0: - from cgen import ExpressionStatement - return ExpressionStatement( - CExpression(self.get_c_expression_to_code_mapper(), result)) - - result = ecm.wrap_in_typecast( - mangle_result.result_dtypes[0], - assignee_var_descriptors[0].dtype, - result) + func_id = insn.expression.function.name - lhs_code = ecm(insn.assignees[0], prec=PREC_NONE, type_context=None) + in_knl_callable = codegen_state.kernel.scoped_functions[func_id] + in_knl_callable_as_call = in_knl_callable.emit_call( + insn=insn, + target=self.target, + expression_to_code_mapper=ecm) - from cgen import Assign - return Assign( - lhs_code, - CExpression(self.get_c_expression_to_code_mapper(), result)) + from cgen import ExpressionStatement + return ExpressionStatement( + CExpression(self.get_c_expression_to_code_mapper(), + in_knl_callable_as_call)) def emit_sequential_loop(self, codegen_state, iname, iname_dtype, lbound, ubound, inner): diff --git a/loopy/target/c/codegen/expression.py b/loopy/target/c/codegen/expression.py index 59ed77f9c17fa04d67e251c22bec88fc8b15936c..17e48555512ef7a004f0ac9488b6cd7034657b7f 100644 --- a/loopy/target/c/codegen/expression.py +++ b/loopy/target/c/codegen/expression.py @@ -165,6 +165,10 @@ class ExpressionToCExpressionMapper(IdentityMapper): def map_tagged_variable(self, expr, type_context): return var(expr.name) + def map_sub_array_ref(self, expr, type_context): + return var("&")(self.rec(expr.get_begin_subscript(), + type_context)) + def map_subscript(self, expr, type_context): def base_impl(expr, type_context): return self.rec(expr.aggregate, type_context)[self.rec(expr.index, 'i')] diff --git a/loopy/transform/register_knl.py b/loopy/transform/register_knl.py index 691c0c51aacc5607f38ee7cf3ee94fe62304bbfb..f43550b5b59e888e7a8cfb4379723d82f361e5c0 100644 --- a/loopy/transform/register_knl.py +++ b/loopy/transform/register_knl.py @@ -25,9 +25,9 @@ THE SOFTWARE. from loopy.kernel import LoopKernel from loopy.kernel.creation import FunctionScoper from loopy.diagnostic import LoopyError -from loopy.function_interface import InKernelCallable +from loopy.kernel.function_interface import InKernelCallable -from loopy.kenrel.instruction import (MultiAssignmentBase, CallInstruction, +from loopy.kernel.instruction import (MultiAssignmentBase, CallInstruction, CInstruction, _DataObliviousInstruction) __doc__ = """ @@ -65,15 +65,11 @@ def register_callable_kernel(parent, function_name, child): tests so that both of them can be confirmed to be made for each other. """ - # {{{ Sanity Checks + # {{{ sanity checks assert isinstance(parent, LoopKernel) assert isinstance(child, LoopKernel) assert isinstance(function_name, str) - assert function_name not in parent.auxiliary_kernels, ( - "%s has already been used with some other kernel. One" - "function can only be associated with a single kernel" % ( - function_name)) # }}} @@ -105,7 +101,8 @@ def register_callable_kernel(parent, function_name, child): subkernel=child) # returning the parent kernel with the new scoped function dictionary - return parent.copy(scope_functions=scoped_functions) + return parent.copy(scoped_functions=scoped_functions, + instructions=new_insns) # }}} diff --git a/loopy/type_inference.py b/loopy/type_inference.py index bc8669528d1388d0e0e4afbbb1deb2e3bf9424f7..13460387226d79dcbc055f0eb245d11090145748 100644 --- a/loopy/type_inference.py +++ b/loopy/type_inference.py @@ -253,9 +253,10 @@ class TypeInferenceMapper(CombineMapper): def map_call(self, expr, return_tuple=False): from pymbolic.primitives import Variable + from loopy.symbolic import ScopedFunction identifier = expr.function - if isinstance(identifier, Variable): + if isinstance(identifier, (Variable, ScopedFunction)): identifier = identifier.name if identifier in ["indexof", "indexof_vec"]: @@ -297,7 +298,7 @@ class TypeInferenceMapper(CombineMapper): """ # Letting this stay over here, as it maybe needed later for maintaining - # backward compatibility + # backward compatibility: ~KK mangle_result = self.kernel.mangle_function(identifier, arg_dtypes) if return_tuple: if mangle_result is not None: @@ -428,6 +429,10 @@ class TypeInferenceMapper(CombineMapper): return [expr.operation.result_dtypes(self.kernel, rec_result)[0] for rec_result in rec_results] + def map_sub_array_ref(self, expr): + return self.rec(expr.get_begin_subscript()) + + # }}} @@ -457,9 +462,16 @@ def _infer_var_type(kernel, var_name, type_inf_mapper, subst_expander): if isinstance(writer_insn, lp.Assignment): result = type_inf_mapper(expr, return_dtype_set=True) elif isinstance(writer_insn, lp.CallInstruction): - return_dtype_set = type_inf_mapper(expr, return_tuple=True, + result = type_inf_mapper(expr, return_dtype_set=True) + """ + # Maybe we need to alter this so that the type_inf_mapper returns a + # :class:`dict`? + # ask about this to Andreas Sir. + return_dtype_set = type_inf_mapper(expr, return_tuple=False, return_dtype_set=True) + print(return_dtype_set) + print(writer_insn.assignee_var_names()) result = [] for return_dtype_set in return_dtype_set: result_i = None @@ -474,6 +486,7 @@ def _infer_var_type(kernel, var_name, type_inf_mapper, subst_expander): assert found if result_i is not None: result.append(result_i) + """ debug(" result: %s", result) @@ -678,6 +691,18 @@ def infer_unknown_types(kernel, expect_completion=False): args=[new_arg_dict[arg.name] for arg in kernel.args], ) + #------------------------------------------------------------------------ + # KK: + # FIXME: more type scoped function type specialization but needed for the + # specialization of the in kernel callables + # for example if an instruction is : + # `[i]:z[i] = a_kernel_function([j]:x[j], [k]: y[k])` + # and if the user already provided the types of the args: x, y, z. + # Then the instruction would not go through the TypeInferenceMapper and hence + # the function: `a_kernel_function` would not undergo type specialization, + # which would create problems in the future. + #------------------------------------------------------------------------ + from loopy.kernel.function_interface import ( register_pymbolic_calls_to_knl_callables) return register_pymbolic_calls_to_knl_callables(