diff --git a/loopy/kernel/function_interface.py b/loopy/kernel/function_interface.py index 13955f92897a1e6dd0b678f27cd658ff16e7b2e7..e0c086eb8b502b9147bbf17927cb628340731790 100644 --- a/loopy/kernel/function_interface.py +++ b/loopy/kernel/function_interface.py @@ -107,6 +107,10 @@ def get_kw_pos_association(kernel): # }}} + +# {{{ template class + + class InKernelCallable(ImmutableRecord): """ @@ -137,13 +141,10 @@ class InKernelCallable(ImmutableRecord): # {{{ sanity checks if not isinstance(name, str): - raise LoopyError("name of a InKernelCallable should be a string") + raise LoopyError("name of a CallableOnScalar should be a string") # }}} - if name_in_target is not None and subkernel is not None: - subkernel = subkernel.copy(name=name_in_target) - super(InKernelCallable, self).__init__(name=name, subkernel=subkernel, arg_id_to_dtype=arg_id_to_dtype, @@ -168,6 +169,93 @@ class InKernelCallable(ImmutableRecord): its keyword identifier. """ + raise NotImplementedError() + + def with_descrs(self, arg_id_to_descr): + """ + :arg arg_id_to_descr: a mapping from argument identifiers + (integers for positional arguments, names for keyword + arguments) to :class:`loopy.ArrayArgDescriptor` instances. + Unspecified/unknown types are not represented in *arg_id_to_descr*. + + Return values are denoted by negative integers, with the + first returned value identified as *-1*. + + :returns: a tuple ``(new_self, arg_id_to_type)``, where *new_self* is a + new :class:`InKernelCallable` specialized for the given types, + and *arg_id_to_descr* is a mapping of the same form as the + argument above, however it may have more information present. + Any argument information exists both by its positional and + its keyword identifier. + """ + + raise NotImplementedError() + + def with_iname_tag_usage(self, unusable, concurrent_shape): + """ + :arg unusable: a set of iname tags that may not be used in the callee. + :arg concurrent_shape: an list of tuples ``(iname_tag, bound)`` for + concurrent inames that are used in the calller but also available + for mapping by the callee. *bound* is given as a + :class:`islpy.PwAff`. + + :returns: a list of the same type as *concurrent*, potentially modified + by increasing bounds or adding further iname tag entries. + + All iname tags not explicitly listed in *concurrent* or *unusable* are + available for mapping by the callee. + """ + + raise NotImplementedError() + + def is_ready_for_code_gen(self): + + return (self.arg_id_to_dtype is not None and + self.arg_id_to_descr is not None and + self.name_in_target is not None) + + # {{{ code generation + + def generate_preambles(self, target): + """ This would generate the target specific preamble. + """ + raise NotImplementedError() + + def emit_call(self, expression_to_code_mapper, expression, target): + + raise NotImplementedError() + + def emit_call_insn(self, insn, target, expression_to_code_mapper): + + raise NotImplementedError() + + # }}} + + def __eq__(self, other): + return (self.name == other.name + and self.arg_id_to_descr == other.arg_id_to_descr + and self.arg_id_to_dtype == other.arg_id_to_dtype + and self.subkernel == other.subkernel) + + def __hash__(self): + return hash((self.name, self.subkernel, self.name_in_target)) + + +# }}} + + +class CallableOnScalar(InKernelCallable): + + def __init__(self, name, arg_id_to_dtype=None, + arg_id_to_descr=None, name_in_target=None): + + super(CallableOnScalar, self).__init__(name=name, + subkernel=None, + arg_id_to_dtype=arg_id_to_dtype, + arg_id_to_descr=arg_id_to_descr, + name_in_target=name_in_target) + + def with_types(self, arg_id_to_dtype, target): if self.arg_id_to_dtype is not None: # specializing an already specialized function. @@ -177,9 +265,9 @@ class InKernelCallable(ImmutableRecord): if self.arg_id_to_dtype[id] != arg_id_to_dtype[id]: raise LoopyError("Overwriting a specialized" " function is illegal--maybe start with new instance of" - " InKernelCallable?") + " CallableScalar?") - # {{{ attempt to specialize using scalar functions + # {{{ attempt to specialize using scalar functions present in target if self.name in target.get_device_ast_builder().function_identifiers(): new_in_knl_callable = target.get_device_ast_builder().with_types( @@ -190,13 +278,93 @@ class InKernelCallable(ImmutableRecord): # }}} - if self.subkernel is None: - # did not find a scalar function and function prototype does not - # even have subkernel registered => no match found - raise LoopyError("Function %s not present within" - " the %s namespace" % (self.name, target)) + # did not find a scalar function and function prototype does not + # even have subkernel registered => no match found + raise LoopyError("Function %s not present within" + " the %s namespace" % (self.name, target)) + + def with_descrs(self, arg_id_to_descr): + + # This is a scalar call + # need to assert that the name is in funtion indentifiers + arg_id_to_descr[-1] = ValueArgDescriptor() + return self.copy(arg_id_to_descr=arg_id_to_descr) + + def with_iname_tag_usage(self, unusable, concurrent_shape): + + raise NotImplementedError() + + def is_ready_for_code_gen(self): + + return (self.arg_id_to_dtype is not None and + self.arg_id_to_descr is not None and + self.name_in_target is not None) + + # {{{ code generation + + def generate_preambles(self, target): + """ This would generate the target specific preamble. + """ + raise NotImplementedError() + + def emit_call(self, expression_to_code_mapper, expression, target): + + assert self.is_ready_for_code_gen() + + # must have single assignee + assert len(expression.parameters) == len(self.arg_id_to_dtype) - 1 + arg_dtypes = tuple(self.arg_id_to_dtype[id] for id in + range(len(self.arg_id_to_dtype)-1)) + + par_dtypes = tuple(expression_to_code_mapper.infer_type(par) for par in + expression.parameters) + + from loopy.expression import dtype_to_type_context + # processing the parameters with the required dtypes + processed_parameters = tuple( + expression_to_code_mapper.rec(par, + dtype_to_type_context(target, tgt_dtype), + tgt_dtype) + for par, par_dtype, tgt_dtype in zip( + expression.parameters, par_dtypes, arg_dtypes)) + + from pymbolic import var + return var(self.name_in_target)(*processed_parameters) + + def emit_call_insn(self, insn, target, expression_to_code_mapper): + # TODO: Need to add support for functions like sincos(x) + # which would give multiple outputs but takes in scalar arguments - # {{{ attempt to specialization with array functions + raise NotImplementedError("emit_call_insn only applies for" + " CallableKernels") + + # }}} + + def __eq__(self, other): + return (self.name == other.name + and self.arg_id_to_descr == other.arg_id_to_descr + and self.arg_id_to_dtype == other.arg_id_to_dtype + and self.subkernel == other.subkernel) + + def __hash__(self): + return hash((self.name, self.subkernel, self.name_in_target)) + + +class CallableKernel(InKernelCallable): + + def __init__(self, name, subkernel, arg_id_to_dtype=None, + arg_id_to_descr=None, name_in_target=None): + + if name_in_target is not None and subkernel is not None: + subkernel = subkernel.copy(name=name_in_target) + + super(CallableKernel, self).__init__(name=name, + subkernel=subkernel, + arg_id_to_dtype=arg_id_to_dtype, + arg_id_to_descr=arg_id_to_descr, + name_in_target=name_in_target) + + def with_types(self, arg_id_to_dtype, target): kw_to_pos, pos_to_kw = get_kw_pos_association(self.subkernel) @@ -239,76 +407,37 @@ class InKernelCallable(ImmutableRecord): new_arg_id_to_dtype[read_count] = arg.dtype read_count += 1 - # }}} - # Returning the kernel call with specialized subkernel and the corresponding # new arg_id_to_dtype return self.copy(subkernel=specialized_kernel, arg_id_to_dtype=new_arg_id_to_dtype) def with_descrs(self, arg_id_to_descr): - """ - :arg arg_id_to_descr: a mapping from argument identifiers - (integers for positional arguments, names for keyword - arguments) to :class:`loopy.ArrayArgDescriptor` instances. - Unspecified/unknown types are not represented in *arg_id_to_descr*. - Return values are denoted by negative integers, with the - first returned value identified as *-1*. + # tuning the subkernel so that we have the the matching shapes and + # dim_tags. + # FIXME: Although We receive input if the argument is + # `local/global`. We do not use it to set the subkernel function + # signature. Need to do it, so that we can handle teporary inputs + # in the array call. - :returns: a tuple ``(new_self, arg_id_to_type)``, where *new_self* is a - new :class:`InKernelCallable` specialized for the given types, - and *arg_id_to_descr* is a mapping of the same form as the - argument above, however it may have more information present. - Any argument information exists both by its positional and - its keyword identifier. - """ + # Collecting the parameters + new_args = self.subkernel.args.copy() + kw_to_pos, pos_to_kw = get_kw_pos_association(self.subkernel) - if self.subkernel is None: - # This is a scalar call - # need to assert that the name is in funtion indentifiers - arg_id_to_descr[-1] = ValueArgDescriptor() - return self.copy(arg_id_to_descr=arg_id_to_descr) + for id, descr in arg_id_to_descr.items(): + if isinstance(id, str): + id = kw_to_pos[id] + assert isinstance(id, int) + new_args[id] = new_args[id].copy(shape=descr.shape, + dim_tags=descr.dim_tags) - else: - # this ia a kernel call - # tuning the subkernel so that we have the the matching shapes and - # dim_tags. - # FIXME: Although We receive input if the argument is - # `local/global`. We do not use it to set the subkernel function - # signature. Need to do it, so that we can handle teporary inputs - # in the array call. - - # Collecting the parameters - new_args = self.subkernel.args.copy() - kw_to_pos, pos_to_kw = get_kw_pos_association(self.subkernel) - - for id, descr in arg_id_to_descr.items(): - if isinstance(id, str): - id = kw_to_pos[id] - assert isinstance(id, int) - new_args[id] = new_args[id].copy(shape=descr.shape, - dim_tags=descr.dim_tags) - - descriptor_specialized_knl = self.subkernel.copy(args=new_args) - - return self.copy(subkernel=descriptor_specialized_knl, - arg_id_to_descr=arg_id_to_descr) + descriptor_specialized_knl = self.subkernel.copy(args=new_args) - def with_iname_tag_usage(self, unusable, concurrent_shape): - """ - :arg unusable: a set of iname tags that may not be used in the callee. - :arg concurrent_shape: an list of tuples ``(iname_tag, bound)`` for - concurrent inames that are used in the calller but also available - for mapping by the callee. *bound* is given as a - :class:`islpy.PwAff`. + return self.copy(subkernel=descriptor_specialized_knl, + arg_id_to_descr=arg_id_to_descr) - :returns: a list of the same type as *concurrent*, potentially modified - by increasing bounds or adding further iname tag entries. - - All iname tags not explicitly listed in *concurrent* or *unusable* are - available for mapping by the callee. - """ + def with_iname_tag_usage(self, unusable, concurrent_shape): raise NotImplementedError() @@ -327,30 +456,7 @@ class InKernelCallable(ImmutableRecord): def emit_call(self, expression_to_code_mapper, expression, target): - assert self.is_ready_for_code_gen() - - if self.subkernel: - raise NotImplementedError() - - # must have single assignee - assert len(expression.parameters) == len(self.arg_id_to_dtype) - 1 - arg_dtypes = tuple(self.arg_id_to_dtype[id] for id in - range(len(self.arg_id_to_dtype)-1)) - - par_dtypes = tuple(expression_to_code_mapper.infer_type(par) for par in - expression.parameters) - - from loopy.expression import dtype_to_type_context - # processing the parameters with the required dtypes - processed_parameters = tuple( - expression_to_code_mapper.rec(par, - dtype_to_type_context(target, tgt_dtype), - tgt_dtype) - for par, par_dtype, tgt_dtype in zip( - expression.parameters, par_dtypes, arg_dtypes)) - - from pymbolic import var - return var(self.name_in_target)(*processed_parameters) + raise NotImplementedError("emit_call only works on scalar operations") def emit_call_insn(self, insn, target, expression_to_code_mapper): @@ -402,111 +508,7 @@ class InKernelCallable(ImmutableRecord): and self.subkernel == other.subkernel) def __hash__(self): - return hash((self.name, self.subkernel)) - -# {{{ callable kernel - - -class CallableKernel(InKernelCallable): - """ - - ..attribute:: name - - This would be the name by which the function would be called in the loopy - kernel. - - .. attribute:: subkernel - - The subkernel associated with the call. - - """ - - # {{{ constructor - - def __init__(self, name=None, subkernel=None): - - super(CallableKernel, self).__init__(name=name) - - if not name == subkernel.name: - subkernel = subkernel.copy(name=name) - - self.subkernel = subkernel - - # }}} - - # {{{ copy - - def copy(self, name=None, subkernel=None): - if name is None: - name = self.name - - if subkernel is None: - subkernel = self.subkernel - - return self.__class__(name=name, - subkernel=subkernel) - - # }}} - - # {{{ with_types - - def with_types(self, arg_id_to_dtype): - - # {{{ sanity checks for arg_id_to_dtype - - for id in arg_id_to_dtype: - if not isinstance(id, str): - raise LoopyError("For Callable kernels the input should be all given" - "as KWargs") - - # }}} - - # }}} - - # {{{ with_descriptors - - def with_descriptors(self, arg_id_to_descr): - for id, arg_descr in arg_id_to_descr.items(): - # The dimensions don't match => reject it - if len(arg_descr.dim_tags) != len(self.subkernel.arg_dict[id].shape): - raise LoopyError("The number of dimensions do not match between the" - "caller kernel and callee kernel for the variable name %s in" - "the callee kernel" % id) - - new_args = [] - for arg in self.subkernel.args: - if arg.name in arg_id_to_descr: - new_args.copy(arg.copy(dim_tags=arg_id_to_descr[arg.name])) - pass - else: - new_args.append(arg.copy()) - - specialized_kernel = self.subkernel.copy(args=new_args) - - new_arg_id_to_descr = {} - - for id, arg in specialized_kernel.arg_dict.items(): - new_arg_id_to_descr[id] = ArrayArgDescriptor(arg.dim_tags, "GLOBAL") - - return self.copy(subkernel=specialized_kernel), new_arg_id_to_descr - - # }}} - - # {{{ get_target_specific_name - - def get_target_specific_name(self, target): - return self.subkernel.name - - # }}} - - # {{{ get preamble - - def get_preamble(self, target): - return "" - - # }}} - -# }}} + return hash((self.name, self.subkernel, self.name_in_target)) # {{{ new pymbolic calls to scoped functions