diff --git a/loopy/kernel/function_interface.py b/loopy/kernel/function_interface.py index ee44d5ea412318f2fe49be3bc5f5556546b04aa4..17bd60ff2a335ebbf8232ae4223ca8b635806736 100644 --- a/loopy/kernel/function_interface.py +++ b/loopy/kernel/function_interface.py @@ -4,6 +4,8 @@ import re import six import numpy as np +from six.moves import zip + from pytools import ImmutableRecord from loopy.diagnostic import LoopyError from loopy.types import NumpyType @@ -274,13 +276,16 @@ class InKernelCallable(ImmutableRecord): """ if self.arg_id_to_dtype: - # trying to specialize an already specialized function. + # specializing an already specialized function. - if self.arg_id_to_dtype == arg_id_to_dtype: - return self.copy() - else: - raise LoopyError("Overwriting a specialized function--maybe" - " start with new instance of InKernelCallable?") + for id, dtype in arg_id_to_dtype.items(): + # only checking for the ones which have been provided + if self.arg_id_to_dtype[id] != arg_id_to_dtype[id]: + raise LoopyError("Overwriting a specialized" + " function is illegal--maybe start with new instance of" + " InKernelCallable?") + # TODO: Check if the arguments match. If yes then just + # return self.copy() # {{{ attempt to specialize using scalar functions @@ -290,6 +295,7 @@ class InKernelCallable(ImmutableRecord): from loopy.target.pyopencl import PyOpenCLTarget from loopy.target.cuda import CudaTarget + # FIXME: Push this into the target if isinstance(target, CTarget): new_arg_id_to_dtype = c_with_types(self.name, arg_id_to_dtype) @@ -393,11 +399,11 @@ class InKernelCallable(ImmutableRecord): return self.copy(arg_id_to_descr=arg_id_to_descr) else: - # Now this ia a kernel call + # this ia a kernel call # tuning the subkernel so that we have the the matching shapes and # dim_tags. # FIXME: Although We receive input if the argument is - # local/global. We do not use it to set the subkernel function + # `local/global`. We do not use it to set the subkernel function # signature. Need to do it, so that we can handle teporary inputs # in the array call. @@ -412,7 +418,6 @@ class InKernelCallable(ImmutableRecord): new_args[id] = new_args[id].copy(shape=descr.shape, dim_tags=descr.dim_tags) - descriptor_specialized_knl = self.subkernel.copy(args=new_args) return self.copy(subkernel=descriptor_specialized_knl, @@ -450,13 +455,37 @@ class InKernelCallable(ImmutableRecord): def get_target_specific_name(self, target): if self.subkernel is None: - raise NotImplementedError() + return self.name else: return self.subkernel.name raise NotImplementedError() - def emit_call(self, insn, target, expression_to_code_mapper): + def emit_call(self, expression_to_code_mapper, expression, target): + if self.subkernel: + raise NotImplementedError() + + # must have single assignee + assert len(expression.parameters) == len(self.arg_id_to_dtype) - 1 + arg_dtypes = tuple(self.arg_id_to_dtype[id] for id in + range(len(self.arg_id_to_dtype)-1)) + + par_dtypes = tuple(expression_to_code_mapper.infer_type(par) for par in + expression.parameters) + + from loopy.expression import dtype_to_type_context + # processing the parameters with the required dtypes + processed_parameters = tuple( + expression_to_code_mapper.rec(par, + dtype_to_type_context(target, tgt_dtype), + tgt_dtype) + for par, par_dtype, tgt_dtype in zip( + expression.parameters, par_dtypes, arg_dtypes)) + + from pymbolic import var + return var(self.get_target_specific_name(target))(*processed_parameters) + + def emit_call_insn(self, insn, target, expression_to_code_mapper): from loopy.kernel.instruction import CallInstruction from pymbolic.primitives import CallWithKwargs diff --git a/loopy/target/c/__init__.py b/loopy/target/c/__init__.py index 28c346dcc7e0ef718bc729214587853c835dd0e6..b79e6ca48a65af583e40c32cb9054d4eebc28895 100644 --- a/loopy/target/c/__init__.py +++ b/loopy/target/c/__init__.py @@ -856,7 +856,7 @@ class CASTBuilder(ASTBuilderBase): func_id = insn.expression.function.name in_knl_callable = codegen_state.kernel.scoped_functions[func_id] - in_knl_callable_as_call = in_knl_callable.emit_call( + in_knl_callable_as_call = in_knl_callable.emit_call_insn( insn=insn, target=self.target, expression_to_code_mapper=ecm) diff --git a/loopy/target/c/codegen/expression.py b/loopy/target/c/codegen/expression.py index 17e48555512ef7a004f0ac9488b6cd7034657b7f..7d05f228ff38170b42c55a90b911d4bd7f10181b 100644 --- a/loopy/target/c/codegen/expression.py +++ b/loopy/target/c/codegen/expression.py @@ -23,7 +23,7 @@ THE SOFTWARE. """ -from six.moves import range, zip +from six.moves import range import numpy as np @@ -41,7 +41,7 @@ from pymbolic import var from loopy.expression import dtype_to_type_context from loopy.type_inference import TypeInferenceMapper -from loopy.diagnostic import LoopyError, LoopyWarning +from loopy.diagnostic import LoopyError from loopy.tools import is_integer from loopy.types import LoopyType @@ -386,12 +386,11 @@ class ExpressionToCExpressionMapper(IdentityMapper): "for constant '%s'" % expr) def map_call(self, expr, type_context): - from pymbolic.primitives import Variable, Subscript - - identifier = expr.function + from pymbolic.primitives import Subscript # {{{ implement indexof, indexof_vec + identifier = expr.function if identifier.name in ["indexof", "indexof_vec"]: if len(expr.parameters) != 1: raise LoopyError("%s takes exactly one argument" % identifier.name) @@ -433,56 +432,10 @@ class ExpressionToCExpressionMapper(IdentityMapper): # }}} - if isinstance(identifier, Variable): - identifier = identifier.name - - par_dtypes = tuple(self.infer_type(par) for par in expr.parameters) - - processed_parameters = None - - mangle_result = self.kernel.mangle_function( - identifier, par_dtypes, - ast_builder=self.codegen_state.ast_builder) - - if mangle_result is None: - raise RuntimeError("function '%s' unknown--" - "maybe you need to register a function mangler?" - % identifier) - - if len(mangle_result.result_dtypes) != 1: - raise LoopyError("functions with more or fewer than one return value " - "may not be used in an expression") - - if mangle_result.arg_dtypes is not None: - processed_parameters = tuple( - self.rec(par, - dtype_to_type_context(self.kernel.target, tgt_dtype), - tgt_dtype) - for par, par_dtype, tgt_dtype in zip( - expr.parameters, par_dtypes, mangle_result.arg_dtypes)) - - else: - # /!\ FIXME For some functions (e.g. 'sin'), it makes sense to - # propagate the type context here. But for many others, it does - # not. Using the inferred type as a stopgap for now. - processed_parameters = tuple( - self.rec(par, - type_context=dtype_to_type_context( - self.kernel.target, par_dtype)) - for par, par_dtype in zip(expr.parameters, par_dtypes)) - - from warnings import warn - warn("Calling function '%s' with unknown C signature--" - "return CallMangleInfo.arg_dtypes" - % identifier, LoopyWarning) - - from loopy.codegen import SeenFunction - self.codegen_state.seen_functions.add( - SeenFunction(identifier, - mangle_result.target_name, - mangle_result.arg_dtypes or par_dtypes)) - - return var(mangle_result.target_name)(*processed_parameters) + return self.kernel.scoped_functions[expr.function.name].emit_call( + expression_to_code_mapper=self, + expression=expr, + target=self.kernel.target) # {{{ deal with complex-valued variables