diff --git a/loopy/diagnostic.py b/loopy/diagnostic.py
index 16e859e0be38808e5af9e26af5cf540547b704f3..4868f70af81ae54972e7d81282b62798da233407 100644
--- a/loopy/diagnostic.py
+++ b/loopy/diagnostic.py
@@ -108,6 +108,10 @@ class ReductionIsNotTriangularError(LoopyError):
     pass
 
 
+class LoopyTypeError(LoopyError):
+    pass
+
+
 class ExpressionNotAffineError(LoopyError):
     pass
 
diff --git a/loopy/target/c/__init__.py b/loopy/target/c/__init__.py
index 4f4ac4f31d5ee264ea9bccc6466f98ddbb1dfaab..e870f46e60ebf9c817cc29db529562031b693bb5 100644
--- a/loopy/target/c/__init__.py
+++ b/loopy/target/c/__init__.py
@@ -29,7 +29,7 @@ import six
 import numpy as np  # noqa
 from loopy.kernel.data import CallMangleInfo
 from loopy.target import TargetBase, ASTBuilderBase, DummyHostASTBuilder
-from loopy.diagnostic import LoopyError
+from loopy.diagnostic import LoopyError, LoopyTypeError
 from cgen import Pointer, NestedDeclarator, Block
 from cgen.mapper import IdentityMapper as CASTIdentityMapperBase
 from pymbolic.mapper.stringifier import PREC_NONE
@@ -328,35 +328,68 @@ def c_symbol_mangler(kernel, name):
 
 # {{{ function mangler
 
-def c_function_mangler(target, name, arg_dtypes):
-    # convert abs(), min(), max() to fabs(), fmin(), fmax() to comply with
-    # C99 standard
+def c_math_mangler(target, name, arg_dtypes, modify_name=True):
+    # Function mangler for math functions defined in C standard
+    # Convert abs, min, max to fabs, fmin, fmax.
+    # If modify_name is set to True, function names are modified according to
+    # floating point types of the arguments (e.g. cos(double), cosf(float))
+    # This should be set to True for C and Cuda, False for OpenCL
     if not isinstance(name, str):
         return None
 
-    if (name == "abs"
+    if name in ["abs", "min", "max"]:
+        name = "f" + name
+
+    # unitary functions
+    if (name in ["fabs", "acos", "asin", "atan", "cos", "cosh", "sin", "sinh",
+                 "tanh", "exp", "log", "log10", "sqrt", "ceil", "floor"]
             and len(arg_dtypes) == 1
             and arg_dtypes[0].numpy_dtype.kind == "f"):
+
+        dtype = arg_dtypes[0].numpy_dtype
+
+        if modify_name:
+            if dtype == np.float64:
+                pass  # fabs
+            elif dtype == np.float32:
+                name = name + "f"  # fabsf
+            elif dtype == np.float128:
+                name = name + "l"  # fabsl
+            else:
+                raise LoopyTypeError("%s does not support type %s" % (name, dtype))
+
         return CallMangleInfo(
-                target_name="fabs",
+                target_name=name,
                 result_dtypes=arg_dtypes,
                 arg_dtypes=arg_dtypes)
 
-    if name in ["max", "min"] and len(arg_dtypes) == 2:
+    # binary functions
+    if (name in ["fmax", "fmin"]
+            and len(arg_dtypes) == 2):
+
         dtype = np.find_common_type(
-                [], [dtype.numpy_dtype for dtype in arg_dtypes])
+            [], [dtype.numpy_dtype for dtype in arg_dtypes])
 
         if dtype.kind == "c":
-            raise RuntimeError("min/max do not support complex numbers")
-
-        if dtype.kind == "f":
-            name = "f" + name
-
-        result_dtype = NumpyType(dtype)
-        return CallMangleInfo(
-                target_name=name,
-                result_dtypes=(result_dtype,),
-                arg_dtypes=2*(result_dtype,))
+            raise LoopyTypeError("%s does not support complex numbers")
+
+        elif dtype.kind == "f":
+            if modify_name:
+                if dtype == np.float64:
+                    pass  # fmin
+                elif dtype == np.float32:
+                    name = name + "f"  # fminf
+                elif dtype == np.float128:
+                    name = name + "l"  # fminl
+                else:
+                    raise LoopyTypeError("%s does not support type %s"
+                                         % (name, dtype))
+
+            result_dtype = NumpyType(dtype)
+            return CallMangleInfo(
+                    target_name=name,
+                    result_dtypes=(result_dtype,),
+                    arg_dtypes=2*(result_dtype,))
 
     return None
 
@@ -369,7 +402,7 @@ class CASTBuilder(ASTBuilderBase):
     def function_manglers(self):
         return (
                 super(CASTBuilder, self).function_manglers() + [
-                    c_function_mangler
+                    c_math_mangler
                     ])
 
     def symbol_manglers(self):
diff --git a/loopy/target/opencl.py b/loopy/target/opencl.py
index 2763caace891570a1b7f8b13f225001a03d3aa65..d2fe4157fc1ff6f9eb7817bea7da8da7e31bbdc1 100644
--- a/loopy/target/opencl.py
+++ b/loopy/target/opencl.py
@@ -31,13 +31,15 @@ from loopy.target.c.codegen.expression import ExpressionToCExpressionMapper
 from pytools import memoize_method
 from loopy.diagnostic import LoopyError
 from loopy.types import NumpyType
-from loopy.target.c import DTypeRegistryWrapper
+from loopy.target.c import DTypeRegistryWrapper, c_math_mangler
 from loopy.kernel.data import temp_var_scope, CallMangleInfo
 from pymbolic import var
 
+from functools import partial
 
 # {{{ dtype registry wrappers
 
+
 class DTypeRegistryWrapperWithAtomics(DTypeRegistryWrapper):
     def get_or_register_dtype(self, names, dtype=None):
         if dtype is not None:
@@ -167,6 +169,18 @@ def opencl_function_mangler(kernel, name, arg_dtypes):
     if not isinstance(name, str):
         return None
 
+    # OpenCL has min(), max() for integer types
+    if name in ["max", "min"] and len(arg_dtypes) == 2:
+        dtype = np.find_common_type(
+                [], [dtype.numpy_dtype for dtype in arg_dtypes])
+
+        if dtype.kind == "i":
+            result_dtype = NumpyType(dtype)
+            return CallMangleInfo(
+                    target_name=name,
+                    result_dtypes=(result_dtype,),
+                    arg_dtypes=2*(result_dtype,))
+
     if name == "dot":
         scalar_dtype, offset, field_name = arg_dtypes[0].numpy_dtype.fields["s0"]
         return CallMangleInfo(
@@ -354,9 +368,11 @@ class OpenCLCASTBuilder(CASTBuilder):
 
     def function_manglers(self):
         return (
-                super(OpenCLCASTBuilder, self).function_manglers() + [
-                    opencl_function_mangler
-                    ])
+                [
+                    opencl_function_mangler,
+                    partial(c_math_mangler, modify_name=False)
+                ] +
+                super(OpenCLCASTBuilder, self).function_manglers())
 
     def symbol_manglers(self):
         return (
diff --git a/loopy/version.py b/loopy/version.py
index e142162729d5a374082fa853dcc763665f7dfe33..744eb90a46265d0085ac6ff56455398729aad33f 100644
--- a/loopy/version.py
+++ b/loopy/version.py
@@ -32,4 +32,4 @@ except ImportError:
 else:
     _islpy_version = islpy.version.VERSION_TEXT
 
-DATA_MODEL_VERSION = "v69-islpy%s" % _islpy_version
+DATA_MODEL_VERSION = "v70-islpy%s" % _islpy_version
diff --git a/test/test_target.py b/test/test_target.py
index aa6f004634f207a7b9733da4a3d7e06d13d7db7c..d3cf2670cb0db0eb5d0046ce1d816b679d4a1ed8 100644
--- a/test/test_target.py
+++ b/test/test_target.py
@@ -30,6 +30,9 @@ import pyopencl.clmath  # noqa
 import pyopencl.clrandom  # noqa
 import pytest
 
+from loopy.target.c import CTarget
+from loopy.target.opencl import OpenCLTarget
+
 import logging
 logger = logging.getLogger(__name__)
 
@@ -96,8 +99,6 @@ def test_cuda_target():
 
 
 def test_generate_c_snippet():
-    from loopy.target.c import CTarget
-
     from pymbolic import var
     I = var("I")  # noqa
     f = var("f")
@@ -140,10 +141,17 @@ def test_generate_c_snippet():
     print(lp.generate_body(knl))
 
 
-def test_c_min_max():
-    # Test fmin() fmax() is generated for C backend instead of max() and min()
-    from loopy.target.c import CTarget
+@pytest.mark.parametrize("target", [CTarget, OpenCLTarget])
+@pytest.mark.parametrize("tp", ["f32", "f64"])
+def test_math_function(target, tp):
+    # Test correct maths functions are generated for C and OpenCL
+    # backend instead for different data type
+
+    data_type = {"f32": np.float32,
+                 "f64": np.float64}[tp]
+
     import pymbolic.primitives as p
+
     i = p.Variable("i")
     xi = p.Subscript(p.Variable("x"), i)
     yi = p.Subscript(p.Variable("y"), i)
@@ -151,20 +159,32 @@ def test_c_min_max():
 
     n = 100
     domain = "{[i]: 0<=i<%d}" % n
-    data = [lp.GlobalArg("x", np.float64, shape=(n,)),
-            lp.GlobalArg("y", np.float64, shape=(n,)),
-            lp.GlobalArg("z", np.float64, shape=(n,))]
+    data = [lp.GlobalArg("x", data_type, shape=(n,)),
+            lp.GlobalArg("y", data_type, shape=(n,)),
+            lp.GlobalArg("z", data_type, shape=(n,))]
 
     inst = [lp.Assignment(xi, p.Variable("min")(yi, zi))]
-    knl = lp.make_kernel(domain, inst, data, target=CTarget())
+    knl = lp.make_kernel(domain, inst, data, target=target())
     code = lp.generate_code_v2(knl).device_code()
+
     assert "fmin" in code
 
+    if tp == "f32" and target == CTarget:
+        assert "fminf" in code
+    else:
+        assert "fminf" not in code
+
     inst = [lp.Assignment(xi, p.Variable("max")(yi, zi))]
-    knl = lp.make_kernel(domain, inst, data, target=CTarget())
+    knl = lp.make_kernel(domain, inst, data, target=target())
     code = lp.generate_code_v2(knl).device_code()
+
     assert "fmax" in code
 
+    if tp == "f32" and target == CTarget:
+        assert "fmaxf" in code
+    else:
+        assert "fmaxf" not in code
+
 
 @pytest.mark.parametrize("tp", ["f32", "f64"])
 def test_random123(ctx_factory, tp):