diff --git a/loopy/target/c/__init__.py b/loopy/target/c/__init__.py index 83efecf0eed38b2abc225ae0a892edd4c905214f..f58509c51e5b8a16b6d7a8af7a58b57fa4f94488 100644 --- a/loopy/target/c/__init__.py +++ b/loopy/target/c/__init__.py @@ -75,29 +75,6 @@ class DTypeRegistryWrapper(object): # }}} -# {{{ preamble generator - -def _preamble_generator(preamble_info): - c_funcs = set(func.c_name for func in preamble_info.seen_functions) - if "int_floor_div" in c_funcs: - yield ("05_int_floor_div", """ - #define int_floor_div(a,b) \ - (( (a) - \ - ( ( (a)<0 ) != ( (b)<0 )) \ - *( (b) + ( (b)<0 ) - ( (b)>=0 ) )) \ - / (b) ) - """) - - if "int_floor_div_pos_b" in c_funcs: - yield ("05_int_floor_div_pos_b", """ - #define int_floor_div_pos_b(a,b) ( \ - ( (a) - ( ((a)<0) ? ((b)-1) : 0 ) ) / (b) \ - ) - """) - -# }}} - - # {{{ cgen overrides from cgen import Declarator @@ -305,6 +282,10 @@ class CTarget(TargetBase): def get_kernel_executor(self, knl, *args, **kwargs): raise NotImplementedError() + @property + def has_math_header(self): + return True + # }}} @@ -356,6 +337,13 @@ def c_symbol_mangler(kernel, name): # {{{ function mangler +c_math_unitary_functions = ["fabs", "acos", "asin", "atan", "cos", "cosh", "sin", + "sinh", "tanh", "exp", "log", "log10", "sqrt", "ceil", + "floor"] +c_math_binary_functions = ["fmax", "fmin"] +c_math_functions = set(c_math_unitary_functions + c_math_binary_functions) + + def c_math_mangler(target, name, arg_dtypes, modify_name=True): # Function mangler for math functions defined in C standard # Convert abs, min, max to fabs, fmin, fmax. @@ -369,8 +357,7 @@ def c_math_mangler(target, name, arg_dtypes, modify_name=True): name = "f" + name # unitary functions - if (name in ["fabs", "acos", "asin", "atan", "cos", "cosh", "sin", "sinh", - "tanh", "exp", "log", "log10", "sqrt", "ceil", "floor"] + if (name in c_math_unitary_functions and len(arg_dtypes) == 1 and arg_dtypes[0].numpy_dtype.kind == "f"): @@ -392,7 +379,7 @@ def c_math_mangler(target, name, arg_dtypes, modify_name=True): arg_dtypes=arg_dtypes) # binary functions - if (name in ["fmax", "fmin"] + if (name in c_math_binary_functions and len(arg_dtypes) == 2): dtype = np.find_common_type( @@ -424,6 +411,32 @@ def c_math_mangler(target, name, arg_dtypes, modify_name=True): # }}} +# {{{ preamble generator + +def _preamble_generator(preamble_info): + c_funcs = set(func.c_name for func in preamble_info.seen_functions) + if "int_floor_div" in c_funcs: + yield ("05_int_floor_div", """ + #define int_floor_div(a,b) \ + (( (a) - \ + ( ( (a)<0 ) != ( (b)<0 )) \ + *( (b) + ( (b)<0 ) - ( (b)>=0 ) )) \ + / (b) ) + """) + + if "int_floor_div_pos_b" in c_funcs: + yield ("05_int_floor_div_pos_b", """ + #define int_floor_div_pos_b(a,b) ( \ + ( (a) - ( ((a)<0) ? ((b)-1) : 0 ) ) / (b) \ + ) + """) + if len(c_funcs & c_math_functions) and ( + preamble_info.kernel.target.has_math_header): + yield ('00_cmath', "#include ") + +# }}} + + class CASTBuilder(ASTBuilderBase): # {{{ library diff --git a/loopy/target/ispc.py b/loopy/target/ispc.py index 771f2cdf638bcd4b54a088adeabb01ca636e064d..a0dace97dcea6d25bf2dc724617187a253c699b1 100644 --- a/loopy/target/ispc.py +++ b/loopy/target/ispc.py @@ -197,6 +197,10 @@ class ISPCTarget(CTarget): include_bool=True) return result + @property + def has_math_header(self): + return False + # }}} diff --git a/loopy/target/opencl.py b/loopy/target/opencl.py index 432c95ef34cc3d34548effba340386e3e44c9147..b9dd11c9e7795f4f70822333def686e74b61eb84 100644 --- a/loopy/target/opencl.py +++ b/loopy/target/opencl.py @@ -357,6 +357,12 @@ class OpenCLTarget(CTarget): vec.types[base.numpy_dtype, count], target=self) + @property + def has_math_header(self): + return False + + # }}} + # }}} diff --git a/test/test_c_execution.py b/test/test_c_execution.py index c355893e4c08f405c6a09cca43849489c145bc4d..5ba91ec259dd6bc43c15ca864ede7009cfba65da 100644 --- a/test/test_c_execution.py +++ b/test/test_c_execution.py @@ -343,6 +343,24 @@ def test_missing_compilers(): __test(eval_tester, ExecutableCTarget, compiler=ccomp) +def test_include_c_math_header(): + from loopy.target.c import ExecutableCTarget + n = 10 + + knl = lp.make_kernel('{[i]: 0 <= i < n}', + """ + a[i] = fabs(b[i]) + """, + [lp.GlobalArg('a', shape=(n,), dtype=np.int32), + lp.GlobalArg('b', shape=(n,), dtype=np.int32)], + target=ExecutableCTarget()) + + knl = lp.fix_parameters(knl, n=n) + assert ('#include ') in lp.generate_code_v2(knl).device_code() + assert np.allclose(knl(a=np.zeros(10, dtype=np.int32), + b=-np.arange(10, dtype=np.int32))[1], np.arange(10)) + + if __name__ == "__main__": if len(sys.argv) > 1: exec(sys.argv[1])