diff --git a/loopy/kernel/function_interface.py b/loopy/kernel/function_interface.py
index ee44d5ea412318f2fe49be3bc5f5556546b04aa4..17bd60ff2a335ebbf8232ae4223ca8b635806736 100644
--- a/loopy/kernel/function_interface.py
+++ b/loopy/kernel/function_interface.py
@@ -4,6 +4,8 @@ import re
 import six
 import numpy as np
 
+from six.moves import zip
+
 from pytools import ImmutableRecord
 from loopy.diagnostic import LoopyError
 from loopy.types import NumpyType
@@ -274,13 +276,16 @@ class InKernelCallable(ImmutableRecord):
         """
 
         if self.arg_id_to_dtype:
-            # trying to specialize an already specialized function.
+            # specializing an already specialized function.
 
-            if self.arg_id_to_dtype == arg_id_to_dtype:
-                return self.copy()
-            else:
-                raise LoopyError("Overwriting a specialized function--maybe"
-                        " start with new instance of InKernelCallable?")
+            for id, dtype in arg_id_to_dtype.items():
+                # only checking for the ones which have been provided
+                if self.arg_id_to_dtype[id] != arg_id_to_dtype[id]:
+                    raise LoopyError("Overwriting a specialized"
+                            " function is illegal--maybe start with new instance of"
+                            " InKernelCallable?")
+            # TODO: Check if the arguments match. If yes then just
+            # return self.copy()
 
         # {{{ attempt to specialize using scalar functions
 
@@ -290,6 +295,7 @@ class InKernelCallable(ImmutableRecord):
             from loopy.target.pyopencl import PyOpenCLTarget
             from loopy.target.cuda import CudaTarget
 
+            # FIXME: Push this into the target
             if isinstance(target, CTarget):
                 new_arg_id_to_dtype = c_with_types(self.name, arg_id_to_dtype)
 
@@ -393,11 +399,11 @@ class InKernelCallable(ImmutableRecord):
             return self.copy(arg_id_to_descr=arg_id_to_descr)
 
         else:
-            # Now this ia a kernel call
+            # this ia a kernel call
             # tuning the subkernel so that we have the the matching shapes and
             # dim_tags.
             # FIXME: Although We receive input if the argument is
-            # local/global. We do not use it to set the subkernel function
+            # `local/global`. We do not use it to set the subkernel function
             # signature. Need to do it, so that we can handle teporary inputs
             # in the array call.
 
@@ -412,7 +418,6 @@ class InKernelCallable(ImmutableRecord):
                 new_args[id] = new_args[id].copy(shape=descr.shape,
                         dim_tags=descr.dim_tags)
 
-
             descriptor_specialized_knl = self.subkernel.copy(args=new_args)
 
             return self.copy(subkernel=descriptor_specialized_knl,
@@ -450,13 +455,37 @@ class InKernelCallable(ImmutableRecord):
     def get_target_specific_name(self, target):
 
         if self.subkernel is None:
-            raise NotImplementedError()
+            return self.name
         else:
             return self.subkernel.name
 
         raise NotImplementedError()
 
-    def emit_call(self, insn, target, expression_to_code_mapper):
+    def emit_call(self, expression_to_code_mapper, expression, target):
+        if self.subkernel:
+            raise NotImplementedError()
+
+        # must have single assignee
+        assert len(expression.parameters) == len(self.arg_id_to_dtype) - 1
+        arg_dtypes = tuple(self.arg_id_to_dtype[id] for id in
+                range(len(self.arg_id_to_dtype)-1))
+
+        par_dtypes = tuple(expression_to_code_mapper.infer_type(par) for par in
+                expression.parameters)
+
+        from loopy.expression import dtype_to_type_context
+        # processing the parameters with the required dtypes
+        processed_parameters = tuple(
+                expression_to_code_mapper.rec(par,
+                    dtype_to_type_context(target, tgt_dtype),
+                    tgt_dtype)
+                for par, par_dtype, tgt_dtype in zip(
+                    expression.parameters, par_dtypes, arg_dtypes))
+
+        from pymbolic import var
+        return var(self.get_target_specific_name(target))(*processed_parameters)
+
+    def emit_call_insn(self, insn, target, expression_to_code_mapper):
 
         from loopy.kernel.instruction import CallInstruction
         from pymbolic.primitives import CallWithKwargs
diff --git a/loopy/target/c/__init__.py b/loopy/target/c/__init__.py
index 28c346dcc7e0ef718bc729214587853c835dd0e6..b79e6ca48a65af583e40c32cb9054d4eebc28895 100644
--- a/loopy/target/c/__init__.py
+++ b/loopy/target/c/__init__.py
@@ -856,7 +856,7 @@ class CASTBuilder(ASTBuilderBase):
         func_id = insn.expression.function.name
 
         in_knl_callable = codegen_state.kernel.scoped_functions[func_id]
-        in_knl_callable_as_call = in_knl_callable.emit_call(
+        in_knl_callable_as_call = in_knl_callable.emit_call_insn(
                 insn=insn,
                 target=self.target,
                 expression_to_code_mapper=ecm)
diff --git a/loopy/target/c/codegen/expression.py b/loopy/target/c/codegen/expression.py
index 17e48555512ef7a004f0ac9488b6cd7034657b7f..7d05f228ff38170b42c55a90b911d4bd7f10181b 100644
--- a/loopy/target/c/codegen/expression.py
+++ b/loopy/target/c/codegen/expression.py
@@ -23,7 +23,7 @@ THE SOFTWARE.
 """
 
 
-from six.moves import range, zip
+from six.moves import range
 
 import numpy as np
 
@@ -41,7 +41,7 @@ from pymbolic import var
 from loopy.expression import dtype_to_type_context
 from loopy.type_inference import TypeInferenceMapper
 
-from loopy.diagnostic import LoopyError, LoopyWarning
+from loopy.diagnostic import LoopyError
 from loopy.tools import is_integer
 from loopy.types import LoopyType
 
@@ -386,12 +386,11 @@ class ExpressionToCExpressionMapper(IdentityMapper):
                         "for constant '%s'" % expr)
 
     def map_call(self, expr, type_context):
-        from pymbolic.primitives import Variable, Subscript
-
-        identifier = expr.function
+        from pymbolic.primitives import Subscript
 
         # {{{ implement indexof, indexof_vec
 
+        identifier = expr.function
         if identifier.name in ["indexof", "indexof_vec"]:
             if len(expr.parameters) != 1:
                 raise LoopyError("%s takes exactly one argument" % identifier.name)
@@ -433,56 +432,10 @@ class ExpressionToCExpressionMapper(IdentityMapper):
 
         # }}}
 
-        if isinstance(identifier, Variable):
-            identifier = identifier.name
-
-        par_dtypes = tuple(self.infer_type(par) for par in expr.parameters)
-
-        processed_parameters = None
-
-        mangle_result = self.kernel.mangle_function(
-                identifier, par_dtypes,
-                ast_builder=self.codegen_state.ast_builder)
-
-        if mangle_result is None:
-            raise RuntimeError("function '%s' unknown--"
-                    "maybe you need to register a function mangler?"
-                    % identifier)
-
-        if len(mangle_result.result_dtypes) != 1:
-            raise LoopyError("functions with more or fewer than one return value "
-                    "may not be used in an expression")
-
-        if mangle_result.arg_dtypes is not None:
-            processed_parameters = tuple(
-                    self.rec(par,
-                        dtype_to_type_context(self.kernel.target, tgt_dtype),
-                        tgt_dtype)
-                    for par, par_dtype, tgt_dtype in zip(
-                        expr.parameters, par_dtypes, mangle_result.arg_dtypes))
-
-        else:
-            # /!\ FIXME For some functions (e.g. 'sin'), it makes sense to
-            # propagate the type context here. But for many others, it does
-            # not. Using the inferred type as a stopgap for now.
-            processed_parameters = tuple(
-                    self.rec(par,
-                        type_context=dtype_to_type_context(
-                            self.kernel.target, par_dtype))
-                    for par, par_dtype in zip(expr.parameters, par_dtypes))
-
-            from warnings import warn
-            warn("Calling function '%s' with unknown C signature--"
-                    "return CallMangleInfo.arg_dtypes"
-                    % identifier, LoopyWarning)
-
-        from loopy.codegen import SeenFunction
-        self.codegen_state.seen_functions.add(
-                SeenFunction(identifier,
-                    mangle_result.target_name,
-                    mangle_result.arg_dtypes or par_dtypes))
-
-        return var(mangle_result.target_name)(*processed_parameters)
+        return self.kernel.scoped_functions[expr.function.name].emit_call(
+                expression_to_code_mapper=self,
+                expression=expr,
+                target=self.kernel.target)
 
     # {{{ deal with complex-valued variables