From 1e93c755037ccdab3eba646ce041fc32e5858aa4 Mon Sep 17 00:00:00 2001 From: tj-sun Date: Tue, 28 Nov 2017 18:22:53 +0000 Subject: [PATCH 01/11] add back function mangler to opencl target --- loopy/target/c/__init__.py | 46 +++++++++++++++++++++++++++++--------- loopy/target/opencl.py | 32 ++++++++++++++++++++++---- 2 files changed, 63 insertions(+), 15 deletions(-) diff --git a/loopy/target/c/__init__.py b/loopy/target/c/__init__.py index 4f4ac4f31..6ff6a1d63 100644 --- a/loopy/target/c/__init__.py +++ b/loopy/target/c/__init__.py @@ -329,29 +329,53 @@ def c_symbol_mangler(kernel, name): # {{{ function mangler def c_function_mangler(target, name, arg_dtypes): - # convert abs(), min(), max() to fabs(), fmin(), fmax() to comply with - # C99 standard + # select maths functions based on argument type + # convert abs(), min(), max() to fabs(), fmin(), fmax() if not isinstance(name, str): return None - if (name == "abs" + if (name in ["abs", "fabs", "acos", "asin", "atan", "cos", "cosh", "sin", + "sinh", "tanh", "exp", "log", "log10", "sqrt", "ceil", "floor"] and len(arg_dtypes) == 1 and arg_dtypes[0].numpy_dtype.kind == "f"): + + dtype = arg_dtypes[0].numpy_dtype + + if name in ["abs"]: + name = "f" + name + + if dtype == np.float64: + pass # fabs + elif dtype == np.float32: + name = name + "f" # fabsf + elif dtype == np.float128: + name = name + "l" # fabsl + else: + raise RuntimeError("%s does not support type %s" % name, dtype) + return CallMangleInfo( - target_name="fabs", + target_name=name, result_dtypes=arg_dtypes, arg_dtypes=arg_dtypes) - if name in ["max", "min"] and len(arg_dtypes) == 2: - dtype = np.find_common_type( - [], [dtype.numpy_dtype for dtype in arg_dtypes]) - - if dtype.kind == "c": - raise RuntimeError("min/max do not support complex numbers") + if (name in ["max", "min", "fmin", "fmax", "exp"] + and len(arg_dtypes) == 2 + and arg_dtypes[0].numpy_dtype.kind == "f"): - if dtype.kind == "f": + dtype = np.find_common_type( + [], [dtype.numpy_dtype for dtype in arg_dtypes]) + if name in ["max", "min"]: name = "f" + name + if dtype == np.float64: + pass # fmin + elif dtype == np.float32: + name = name + "f" # fminf + elif dtype == np.float128: + name = name + "l" # fminl + else: + raise RuntimeError("%s does not support type %s" % name, dtype) + result_dtype = NumpyType(dtype) return CallMangleInfo( target_name=name, diff --git a/loopy/target/opencl.py b/loopy/target/opencl.py index 2763caace..e7943f8dd 100644 --- a/loopy/target/opencl.py +++ b/loopy/target/opencl.py @@ -167,6 +167,30 @@ def opencl_function_mangler(kernel, name, arg_dtypes): if not isinstance(name, str): return None + if (name == "abs" + and len(arg_dtypes) == 1 + and arg_dtypes[0].numpy_dtype.kind == "f"): + return CallMangleInfo( + target_name="fabs", + result_dtypes=arg_dtypes, + arg_dtypes=arg_dtypes) + + if name in ["max", "min"] and len(arg_dtypes) == 2: + dtype = np.find_common_type( + [], [dtype.numpy_dtype for dtype in arg_dtypes]) + + if dtype.kind == "c": + raise RuntimeError("min/max do not support complex numbers") + + if dtype.kind == "f": + name = "f" + name + + result_dtype = NumpyType(dtype) + return CallMangleInfo( + target_name=name, + result_dtypes=(result_dtype,), + arg_dtypes=2*(result_dtype,)) + if name == "dot": scalar_dtype, offset, field_name = arg_dtypes[0].numpy_dtype.fields["s0"] return CallMangleInfo( @@ -354,15 +378,15 @@ class OpenCLCASTBuilder(CASTBuilder): def function_manglers(self): return ( - super(OpenCLCASTBuilder, self).function_manglers() + [ + [ opencl_function_mangler - ]) + ] + super(OpenCLCASTBuilder, self).function_manglers()) def symbol_manglers(self): return ( - super(OpenCLCASTBuilder, self).symbol_manglers() + [ + [ opencl_symbol_mangler - ]) + ] + super(OpenCLCASTBuilder, self).symbol_manglers()) def preamble_generators(self): from loopy.library.reduction import reduction_preamble_generator -- GitLab From f3e70541bf5a688af9e29e23643f6ce43a073b87 Mon Sep 17 00:00:00 2001 From: tj-sun Date: Tue, 28 Nov 2017 18:43:46 +0000 Subject: [PATCH 02/11] add test --- test/test_target.py | 38 +++++++++++++++++++++++++++++++------- 1 file changed, 31 insertions(+), 7 deletions(-) diff --git a/test/test_target.py b/test/test_target.py index aa6f00463..eca5508c9 100644 --- a/test/test_target.py +++ b/test/test_target.py @@ -140,10 +140,22 @@ def test_generate_c_snippet(): print(lp.generate_body(knl)) -def test_c_min_max(): - # Test fmin() fmax() is generated for C backend instead of max() and min() +@pytest.mark.parametrize("backend", ["c", "opencl"]) +@pytest.mark.parametrize("tp", ["f32", "f64"]) +def test_math_function(backend, tp): + # Test correct maths functions are generated for C and OpenCL + # backend instead for different data type + from loopy.target.c import CTarget + from loopy.target.opencl import OpenCLTarget + + target = {"c": CTarget, + "opencl": OpenCLTarget}[backend] + data_type = {"f32": np.float32, + "f64": np.float64}[tp] + import pymbolic.primitives as p + i = p.Variable("i") xi = p.Subscript(p.Variable("x"), i) yi = p.Subscript(p.Variable("y"), i) @@ -151,20 +163,32 @@ def test_c_min_max(): n = 100 domain = "{[i]: 0<=i<%d}" % n - data = [lp.GlobalArg("x", np.float64, shape=(n,)), - lp.GlobalArg("y", np.float64, shape=(n,)), - lp.GlobalArg("z", np.float64, shape=(n,))] + data = [lp.GlobalArg("x", data_type, shape=(n,)), + lp.GlobalArg("y", data_type, shape=(n,)), + lp.GlobalArg("z", data_type, shape=(n,))] inst = [lp.Assignment(xi, p.Variable("min")(yi, zi))] - knl = lp.make_kernel(domain, inst, data, target=CTarget()) + knl = lp.make_kernel(domain, inst, data, target=target()) code = lp.generate_code_v2(knl).device_code() + assert "fmin" in code + if tp == "f32" and backend == "c": + assert "fminf" in code + else: + assert "fminf" not in code + inst = [lp.Assignment(xi, p.Variable("max")(yi, zi))] - knl = lp.make_kernel(domain, inst, data, target=CTarget()) + knl = lp.make_kernel(domain, inst, data, target=target()) code = lp.generate_code_v2(knl).device_code() + assert "fmax" in code + if tp == "f32" and backend == "c": + assert "fmaxf" in code + else: + assert "fmaxf" not in code + @pytest.mark.parametrize("tp", ["f32", "f64"]) def test_random123(ctx_factory, tp): -- GitLab From 2dacb3350c8e743c8fe059a01bdfe6b4f1ce66f3 Mon Sep 17 00:00:00 2001 From: tj-sun Date: Tue, 28 Nov 2017 19:07:13 +0000 Subject: [PATCH 03/11] swap cuda mangler --- loopy/target/cuda.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/loopy/target/cuda.py b/loopy/target/cuda.py index 027f27838..74e140675 100644 --- a/loopy/target/cuda.py +++ b/loopy/target/cuda.py @@ -220,9 +220,9 @@ class CUDACASTBuilder(CASTBuilder): def function_manglers(self): return ( - super(CUDACASTBuilder, self).function_manglers() + [ + [ cuda_function_mangler - ]) + ] + super(CUDACASTBuilder, self).function_manglers()) # }}} -- GitLab From 9f4383b462793ba548a57d13c5705d6f3a882fe5 Mon Sep 17 00:00:00 2001 From: tj-sun Date: Tue, 28 Nov 2017 19:24:54 +0000 Subject: [PATCH 04/11] add all maths functions to opencl backend to shadow c mangler --- loopy/target/opencl.py | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/loopy/target/opencl.py b/loopy/target/opencl.py index e7943f8dd..5c4695584 100644 --- a/loopy/target/opencl.py +++ b/loopy/target/opencl.py @@ -167,24 +167,32 @@ def opencl_function_mangler(kernel, name, arg_dtypes): if not isinstance(name, str): return None - if (name == "abs" + if (name in ["abs", "fabs", "acos", "asin", "atan", "cos", "cosh", "sin", + "sinh", "tanh", "exp", "log", "log10", "sqrt", "ceil", "floor"] and len(arg_dtypes) == 1 and arg_dtypes[0].numpy_dtype.kind == "f"): + + if name in ["abs"]: + name = "f" + name + return CallMangleInfo( - target_name="fabs", + target_name=name, result_dtypes=arg_dtypes, arg_dtypes=arg_dtypes) - if name in ["max", "min"] and len(arg_dtypes) == 2: + if (name in ["max", "min", "fmin", "fmax", "exp"] + and len(arg_dtypes) == 2 + and arg_dtypes[0].numpy_dtype.kind == "f"): + + if name in ["max", "min"]: + name = "f" + name + dtype = np.find_common_type( [], [dtype.numpy_dtype for dtype in arg_dtypes]) if dtype.kind == "c": raise RuntimeError("min/max do not support complex numbers") - if dtype.kind == "f": - name = "f" + name - result_dtype = NumpyType(dtype) return CallMangleInfo( target_name=name, -- GitLab From c217428ae68ff240686d0a387bd27a069d89b67e Mon Sep 17 00:00:00 2001 From: tj-sun Date: Tue, 28 Nov 2017 19:56:23 +0000 Subject: [PATCH 05/11] math function name mangling condition on target being CTarget --- loopy/target/c/__init__.py | 34 +++++++++++++++++--------------- loopy/target/cuda.py | 4 ++-- loopy/target/opencl.py | 40 ++++---------------------------------- 3 files changed, 24 insertions(+), 54 deletions(-) diff --git a/loopy/target/c/__init__.py b/loopy/target/c/__init__.py index 6ff6a1d63..0706145b7 100644 --- a/loopy/target/c/__init__.py +++ b/loopy/target/c/__init__.py @@ -344,14 +344,15 @@ def c_function_mangler(target, name, arg_dtypes): if name in ["abs"]: name = "f" + name - if dtype == np.float64: - pass # fabs - elif dtype == np.float32: - name = name + "f" # fabsf - elif dtype == np.float128: - name = name + "l" # fabsl - else: - raise RuntimeError("%s does not support type %s" % name, dtype) + if isinstance(target.target, CTarget): + if dtype == np.float64: + pass # fabs + elif dtype == np.float32: + name = name + "f" # fabsf + elif dtype == np.float128: + name = name + "l" # fabsl + else: + raise RuntimeError("%s does not support type %s" % name, dtype) return CallMangleInfo( target_name=name, @@ -367,14 +368,15 @@ def c_function_mangler(target, name, arg_dtypes): if name in ["max", "min"]: name = "f" + name - if dtype == np.float64: - pass # fmin - elif dtype == np.float32: - name = name + "f" # fminf - elif dtype == np.float128: - name = name + "l" # fminl - else: - raise RuntimeError("%s does not support type %s" % name, dtype) + if isinstance(target.target, CTarget): + if dtype == np.float64: + pass # fmin + elif dtype == np.float32: + name = name + "f" # fminf + elif dtype == np.float128: + name = name + "l" # fminl + else: + raise RuntimeError("%s does not support type %s" % name, dtype) result_dtype = NumpyType(dtype) return CallMangleInfo( diff --git a/loopy/target/cuda.py b/loopy/target/cuda.py index 74e140675..027f27838 100644 --- a/loopy/target/cuda.py +++ b/loopy/target/cuda.py @@ -220,9 +220,9 @@ class CUDACASTBuilder(CASTBuilder): def function_manglers(self): return ( - [ + super(CUDACASTBuilder, self).function_manglers() + [ cuda_function_mangler - ] + super(CUDACASTBuilder, self).function_manglers()) + ]) # }}} diff --git a/loopy/target/opencl.py b/loopy/target/opencl.py index 5c4695584..2763caace 100644 --- a/loopy/target/opencl.py +++ b/loopy/target/opencl.py @@ -167,38 +167,6 @@ def opencl_function_mangler(kernel, name, arg_dtypes): if not isinstance(name, str): return None - if (name in ["abs", "fabs", "acos", "asin", "atan", "cos", "cosh", "sin", - "sinh", "tanh", "exp", "log", "log10", "sqrt", "ceil", "floor"] - and len(arg_dtypes) == 1 - and arg_dtypes[0].numpy_dtype.kind == "f"): - - if name in ["abs"]: - name = "f" + name - - return CallMangleInfo( - target_name=name, - result_dtypes=arg_dtypes, - arg_dtypes=arg_dtypes) - - if (name in ["max", "min", "fmin", "fmax", "exp"] - and len(arg_dtypes) == 2 - and arg_dtypes[0].numpy_dtype.kind == "f"): - - if name in ["max", "min"]: - name = "f" + name - - dtype = np.find_common_type( - [], [dtype.numpy_dtype for dtype in arg_dtypes]) - - if dtype.kind == "c": - raise RuntimeError("min/max do not support complex numbers") - - result_dtype = NumpyType(dtype) - return CallMangleInfo( - target_name=name, - result_dtypes=(result_dtype,), - arg_dtypes=2*(result_dtype,)) - if name == "dot": scalar_dtype, offset, field_name = arg_dtypes[0].numpy_dtype.fields["s0"] return CallMangleInfo( @@ -386,15 +354,15 @@ class OpenCLCASTBuilder(CASTBuilder): def function_manglers(self): return ( - [ + super(OpenCLCASTBuilder, self).function_manglers() + [ opencl_function_mangler - ] + super(OpenCLCASTBuilder, self).function_manglers()) + ]) def symbol_manglers(self): return ( - [ + super(OpenCLCASTBuilder, self).symbol_manglers() + [ opencl_symbol_mangler - ] + super(OpenCLCASTBuilder, self).symbol_manglers()) + ]) def preamble_generators(self): from loopy.library.reduction import reduction_preamble_generator -- GitLab From 686bff5334cf32e10ae2e30731c4a28ed1e09509 Mon Sep 17 00:00:00 2001 From: tj-sun Date: Wed, 29 Nov 2017 07:23:49 +0000 Subject: [PATCH 06/11] correctly detecting target types --- loopy/target/c/__init__.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/loopy/target/c/__init__.py b/loopy/target/c/__init__.py index 0706145b7..1ad5ec78d 100644 --- a/loopy/target/c/__init__.py +++ b/loopy/target/c/__init__.py @@ -344,7 +344,7 @@ def c_function_mangler(target, name, arg_dtypes): if name in ["abs"]: name = "f" + name - if isinstance(target.target, CTarget): + if type(target.target) == CTarget: if dtype == np.float64: pass # fabs elif dtype == np.float32: @@ -363,12 +363,13 @@ def c_function_mangler(target, name, arg_dtypes): and len(arg_dtypes) == 2 and arg_dtypes[0].numpy_dtype.kind == "f"): + dtype = np.find_common_type( [], [dtype.numpy_dtype for dtype in arg_dtypes]) if name in ["max", "min"]: name = "f" + name - if isinstance(target.target, CTarget): + if type(target.target) == CTarget: if dtype == np.float64: pass # fmin elif dtype == np.float32: -- GitLab From 648b85262777a76b1b5abb0abf41dd5c2f984954 Mon Sep 17 00:00:00 2001 From: tj-sun Date: Wed, 29 Nov 2017 09:03:24 +0000 Subject: [PATCH 07/11] add min, max mangler for int types in CL backend --- loopy/target/c/__init__.py | 3 +-- loopy/target/opencl.py | 12 ++++++++++++ 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/loopy/target/c/__init__.py b/loopy/target/c/__init__.py index 1ad5ec78d..22ecccb83 100644 --- a/loopy/target/c/__init__.py +++ b/loopy/target/c/__init__.py @@ -329,7 +329,7 @@ def c_symbol_mangler(kernel, name): # {{{ function mangler def c_function_mangler(target, name, arg_dtypes): - # select maths functions based on argument type + # select maths functions based on different floating point types # convert abs(), min(), max() to fabs(), fmin(), fmax() if not isinstance(name, str): return None @@ -363,7 +363,6 @@ def c_function_mangler(target, name, arg_dtypes): and len(arg_dtypes) == 2 and arg_dtypes[0].numpy_dtype.kind == "f"): - dtype = np.find_common_type( [], [dtype.numpy_dtype for dtype in arg_dtypes]) if name in ["max", "min"]: diff --git a/loopy/target/opencl.py b/loopy/target/opencl.py index 2763caace..898400f81 100644 --- a/loopy/target/opencl.py +++ b/loopy/target/opencl.py @@ -167,6 +167,18 @@ def opencl_function_mangler(kernel, name, arg_dtypes): if not isinstance(name, str): return None + # OpenCL has min(), max() for integer types + if name in ["max", "min"] and len(arg_dtypes) == 2: + dtype = np.find_common_type( + [], [dtype.numpy_dtype for dtype in arg_dtypes]) + + if dtype.kind == "i": + result_dtype = NumpyType(dtype) + return CallMangleInfo( + target_name=name, + result_dtypes=(result_dtype,), + arg_dtypes=2*(result_dtype,)) + if name == "dot": scalar_dtype, offset, field_name = arg_dtypes[0].numpy_dtype.fields["s0"] return CallMangleInfo( -- GitLab From 347ae0e1b960be2643354742a88c345d4b642195 Mon Sep 17 00:00:00 2001 From: tj-sun Date: Wed, 29 Nov 2017 18:10:30 +0000 Subject: [PATCH 08/11] data version bump --- loopy/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/loopy/version.py b/loopy/version.py index e14216272..744eb90a4 100644 --- a/loopy/version.py +++ b/loopy/version.py @@ -32,4 +32,4 @@ except ImportError: else: _islpy_version = islpy.version.VERSION_TEXT -DATA_MODEL_VERSION = "v69-islpy%s" % _islpy_version +DATA_MODEL_VERSION = "v70-islpy%s" % _islpy_version -- GitLab From 220826447560df7a5b5e1bf79d1c6306b7667972 Mon Sep 17 00:00:00 2001 From: tj-sun Date: Thu, 30 Nov 2017 17:32:33 +0000 Subject: [PATCH 09/11] updates based on feedback on PR --- loopy/diagnostic.py | 4 ++++ loopy/target/c/__init__.py | 14 +++++++------- test/test_target.py | 18 +++++++----------- 3 files changed, 18 insertions(+), 18 deletions(-) diff --git a/loopy/diagnostic.py b/loopy/diagnostic.py index 512e4ac86..8f859a748 100644 --- a/loopy/diagnostic.py +++ b/loopy/diagnostic.py @@ -107,6 +107,10 @@ class UnscheduledInstructionError(LoopyError): class ReductionIsNotTriangularError(LoopyError): pass + +class LoopyTypeError(LoopyError): + pass + # }}} diff --git a/loopy/target/c/__init__.py b/loopy/target/c/__init__.py index 22ecccb83..c30c35073 100644 --- a/loopy/target/c/__init__.py +++ b/loopy/target/c/__init__.py @@ -29,7 +29,7 @@ import six import numpy as np # noqa from loopy.kernel.data import CallMangleInfo from loopy.target import TargetBase, ASTBuilderBase, DummyHostASTBuilder -from loopy.diagnostic import LoopyError +from loopy.diagnostic import LoopyError, LoopyTypeError from cgen import Pointer, NestedDeclarator, Block from cgen.mapper import IdentityMapper as CASTIdentityMapperBase from pymbolic.mapper.stringifier import PREC_NONE @@ -334,14 +334,14 @@ def c_function_mangler(target, name, arg_dtypes): if not isinstance(name, str): return None - if (name in ["abs", "fabs", "acos", "asin", "atan", "cos", "cosh", "sin", - "sinh", "tanh", "exp", "log", "log10", "sqrt", "ceil", "floor"] + if (name in ["abs", "acos", "asin", "atan", "cos", "cosh", "sin", "sinh", + "tanh", "exp", "log", "log10", "sqrt", "ceil", "floor"] and len(arg_dtypes) == 1 and arg_dtypes[0].numpy_dtype.kind == "f"): dtype = arg_dtypes[0].numpy_dtype - if name in ["abs"]: + if name == "abs": name = "f" + name if type(target.target) == CTarget: @@ -352,14 +352,14 @@ def c_function_mangler(target, name, arg_dtypes): elif dtype == np.float128: name = name + "l" # fabsl else: - raise RuntimeError("%s does not support type %s" % name, dtype) + raise LoopyTypeError("%s does not support type %s" % (name, dtype)) return CallMangleInfo( target_name=name, result_dtypes=arg_dtypes, arg_dtypes=arg_dtypes) - if (name in ["max", "min", "fmin", "fmax", "exp"] + if (name in ["max", "min", "exp"] and len(arg_dtypes) == 2 and arg_dtypes[0].numpy_dtype.kind == "f"): @@ -376,7 +376,7 @@ def c_function_mangler(target, name, arg_dtypes): elif dtype == np.float128: name = name + "l" # fminl else: - raise RuntimeError("%s does not support type %s" % name, dtype) + raise LoopyTypeError("%s does not support type %s" % name, dtype) result_dtype = NumpyType(dtype) return CallMangleInfo( diff --git a/test/test_target.py b/test/test_target.py index eca5508c9..d3cf2670c 100644 --- a/test/test_target.py +++ b/test/test_target.py @@ -30,6 +30,9 @@ import pyopencl.clmath # noqa import pyopencl.clrandom # noqa import pytest +from loopy.target.c import CTarget +from loopy.target.opencl import OpenCLTarget + import logging logger = logging.getLogger(__name__) @@ -96,8 +99,6 @@ def test_cuda_target(): def test_generate_c_snippet(): - from loopy.target.c import CTarget - from pymbolic import var I = var("I") # noqa f = var("f") @@ -140,17 +141,12 @@ def test_generate_c_snippet(): print(lp.generate_body(knl)) -@pytest.mark.parametrize("backend", ["c", "opencl"]) +@pytest.mark.parametrize("target", [CTarget, OpenCLTarget]) @pytest.mark.parametrize("tp", ["f32", "f64"]) -def test_math_function(backend, tp): +def test_math_function(target, tp): # Test correct maths functions are generated for C and OpenCL # backend instead for different data type - from loopy.target.c import CTarget - from loopy.target.opencl import OpenCLTarget - - target = {"c": CTarget, - "opencl": OpenCLTarget}[backend] data_type = {"f32": np.float32, "f64": np.float64}[tp] @@ -173,7 +169,7 @@ def test_math_function(backend, tp): assert "fmin" in code - if tp == "f32" and backend == "c": + if tp == "f32" and target == CTarget: assert "fminf" in code else: assert "fminf" not in code @@ -184,7 +180,7 @@ def test_math_function(backend, tp): assert "fmax" in code - if tp == "f32" and backend == "c": + if tp == "f32" and target == CTarget: assert "fmaxf" in code else: assert "fmaxf" not in code -- GitLab From 7f33edef959ab2e0ea228f9779d68cd1b50ea1f5 Mon Sep 17 00:00:00 2001 From: tj-sun Date: Fri, 1 Dec 2017 11:36:23 +0000 Subject: [PATCH 10/11] exception for complex, partial to mangler --- loopy/target/c/__init__.py | 64 +++++++++++++++++++++----------------- loopy/target/opencl.py | 11 ++++--- 2 files changed, 42 insertions(+), 33 deletions(-) diff --git a/loopy/target/c/__init__.py b/loopy/target/c/__init__.py index c30c35073..c39011517 100644 --- a/loopy/target/c/__init__.py +++ b/loopy/target/c/__init__.py @@ -328,23 +328,27 @@ def c_symbol_mangler(kernel, name): # {{{ function mangler -def c_function_mangler(target, name, arg_dtypes): - # select maths functions based on different floating point types - # convert abs(), min(), max() to fabs(), fmin(), fmax() +def c_math_mangler(target, name, arg_dtypes, modify_name=True): + # Function mangler for math functions defined in C standard + # Convert abs, min, max to fabs, fmin, fmax. + # If modify_name is set to True, function names are modified according to + # floating point types of the arguments (e.g. cos(double), cosf(float)) + # This should be set to True for C and Cuda, False for OpenCL if not isinstance(name, str): return None - if (name in ["abs", "acos", "asin", "atan", "cos", "cosh", "sin", "sinh", + if name in ["abs", "min", "max"]: + name = "f" + name + + # unitary functions + if (name in ["fabs", "acos", "asin", "atan", "cos", "cosh", "sin", "sinh", "tanh", "exp", "log", "log10", "sqrt", "ceil", "floor"] and len(arg_dtypes) == 1 and arg_dtypes[0].numpy_dtype.kind == "f"): dtype = arg_dtypes[0].numpy_dtype - if name == "abs": - name = "f" + name - - if type(target.target) == CTarget: + if modify_name: if dtype == np.float64: pass # fabs elif dtype == np.float32: @@ -359,30 +363,32 @@ def c_function_mangler(target, name, arg_dtypes): result_dtypes=arg_dtypes, arg_dtypes=arg_dtypes) - if (name in ["max", "min", "exp"] - and len(arg_dtypes) == 2 - and arg_dtypes[0].numpy_dtype.kind == "f"): + # binary functions + if (name in ["fmax", "fmin"] + and len(arg_dtypes) == 2): dtype = np.find_common_type( [], [dtype.numpy_dtype for dtype in arg_dtypes]) - if name in ["max", "min"]: - name = "f" + name - - if type(target.target) == CTarget: - if dtype == np.float64: - pass # fmin - elif dtype == np.float32: - name = name + "f" # fminf - elif dtype == np.float128: - name = name + "l" # fminl - else: - raise LoopyTypeError("%s does not support type %s" % name, dtype) - result_dtype = NumpyType(dtype) - return CallMangleInfo( - target_name=name, - result_dtypes=(result_dtype,), - arg_dtypes=2*(result_dtype,)) + if dtype.kind == "c": + raise LoopyTypeError("%s does not support complex numbers") + + elif dtype.kind == "f": + if modify_name: + if dtype == np.float64: + pass # fmin + elif dtype == np.float32: + name = name + "f" # fminf + elif dtype == np.float128: + name = name + "l" # fminl + else: + raise LoopyTypeError("%s does not support type %s" % (name, dtype)) + + result_dtype = NumpyType(dtype) + return CallMangleInfo( + target_name=name, + result_dtypes=(result_dtype,), + arg_dtypes=2*(result_dtype,)) return None @@ -395,7 +401,7 @@ class CASTBuilder(ASTBuilderBase): def function_manglers(self): return ( super(CASTBuilder, self).function_manglers() + [ - c_function_mangler + c_math_mangler ]) def symbol_manglers(self): diff --git a/loopy/target/opencl.py b/loopy/target/opencl.py index 898400f81..868ddc469 100644 --- a/loopy/target/opencl.py +++ b/loopy/target/opencl.py @@ -31,13 +31,15 @@ from loopy.target.c.codegen.expression import ExpressionToCExpressionMapper from pytools import memoize_method from loopy.diagnostic import LoopyError from loopy.types import NumpyType -from loopy.target.c import DTypeRegistryWrapper +from loopy.target.c import DTypeRegistryWrapper, c_math_mangler from loopy.kernel.data import temp_var_scope, CallMangleInfo from pymbolic import var +from functools import partial # {{{ dtype registry wrappers + class DTypeRegistryWrapperWithAtomics(DTypeRegistryWrapper): def get_or_register_dtype(self, names, dtype=None): if dtype is not None: @@ -366,9 +368,10 @@ class OpenCLCASTBuilder(CASTBuilder): def function_manglers(self): return ( - super(OpenCLCASTBuilder, self).function_manglers() + [ - opencl_function_mangler - ]) + [ + opencl_function_mangler, partial(c_math_mangler, modify_name=False) + ] + + super(OpenCLCASTBuilder, self).function_manglers()) def symbol_manglers(self): return ( -- GitLab From b3fc3c4df768a42de813751b0aae46aa5a749fe5 Mon Sep 17 00:00:00 2001 From: tj-sun Date: Fri, 1 Dec 2017 11:51:59 +0000 Subject: [PATCH 11/11] flake8 --- loopy/target/c/__init__.py | 3 ++- loopy/target/opencl.py | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/loopy/target/c/__init__.py b/loopy/target/c/__init__.py index c39011517..e870f46e6 100644 --- a/loopy/target/c/__init__.py +++ b/loopy/target/c/__init__.py @@ -382,7 +382,8 @@ def c_math_mangler(target, name, arg_dtypes, modify_name=True): elif dtype == np.float128: name = name + "l" # fminl else: - raise LoopyTypeError("%s does not support type %s" % (name, dtype)) + raise LoopyTypeError("%s does not support type %s" + % (name, dtype)) result_dtype = NumpyType(dtype) return CallMangleInfo( diff --git a/loopy/target/opencl.py b/loopy/target/opencl.py index 868ddc469..d2fe4157f 100644 --- a/loopy/target/opencl.py +++ b/loopy/target/opencl.py @@ -369,7 +369,8 @@ class OpenCLCASTBuilder(CASTBuilder): def function_manglers(self): return ( [ - opencl_function_mangler, partial(c_math_mangler, modify_name=False) + opencl_function_mangler, + partial(c_math_mangler, modify_name=False) ] + super(OpenCLCASTBuilder, self).function_manglers()) -- GitLab