diff --git a/pyopencl/__init__.py b/pyopencl/__init__.py index 45e14ed5f3451dc03a1f10c78b3415ebc49336d2..4f49df0525948329621365c5c0759bdaa065c938 100644 --- a/pyopencl/__init__.py +++ b/pyopencl/__init__.py @@ -648,11 +648,7 @@ def _add_functionality(): err_gen = PythonCodeGenerator() - err_gen("try:") - with Indentation(err_gen): - err_gen.extend(body) - err_gen("except TypeError as e:") - with Indentation(err_gen): + def gen_error_handler(): err_gen(""" if current_arg is not None: args = [{args}] @@ -671,6 +667,16 @@ def _add_functionality(): .format(args=", ".join(arg_names))) err_gen("") + err_gen("try:") + with Indentation(err_gen): + err_gen.extend(body) + err_gen("except TypeError as e:") + with Indentation(err_gen): + gen_error_handler() + err_gen("except _cl.LogicError as e:") + with Indentation(err_gen): + gen_error_handler() + # }}} def add_preamble(gen): @@ -715,10 +721,10 @@ def _add_functionality(): # }}} - def kernel__generate_buffer_arg_setter(self, gen, arg_idx, buf_var): + def kernel__generate_buffer_arg_setter(self, gen, arg_idx, buf_var, could_be_numpy_scalar): from pytools.py_codegen import Indentation - if _CPY2: + if _CPY2 and could_be_numpy_scalar: # https://github.com/numpy/numpy/issues/5381 gen("if isinstance({buf_var}, np.generic):".format(buf_var=buf_var)) with Indentation(gen): @@ -747,7 +753,8 @@ def _add_functionality(): gen("else:") with Indentation(gen): - self._generate_buffer_arg_setter(gen, arg_idx, arg_var) + self._generate_buffer_arg_setter(gen, arg_idx, arg_var, + could_be_numpy_scalar=True) def kernel__generate_naive_call(self): num_args = self.num_args @@ -783,9 +790,10 @@ def _add_functionality(): has_struct_arg_count_bug(dev) for dev in self.context.devices] + from pytools import single_valued if any(count_bug_per_dev): if all(count_bug_per_dev): - work_around_arg_count_bug = True + work_around_arg_count_bug = single_valued(count_bug_per_dev) else: warn_about_arg_count_bug = True @@ -834,25 +842,38 @@ def _add_functionality(): else: raise TypeError("unexpected complex type: %s" % arg_dtype) - if (work_around_arg_count_bug + if (work_around_arg_count_bug == "pocl" and arg_dtype == np.complex128 and fp_arg_count + 2 <= 8): gen( "buf = pack('{arg_char}', {arg_var}.real)" .format(arg_char=arg_char, arg_var=arg_var)) - self._generate_buffer_arg_setter(gen, cl_arg_idx, "buf") + self._generate_buffer_arg_setter(gen, cl_arg_idx, "buf", + could_be_numpy_scalar=False) cl_arg_idx += 1 + gen("current_arg = current_arg + 1000") gen( "buf = pack('{arg_char}', {arg_var}.imag)" .format(arg_char=arg_char, arg_var=arg_var)) - self._generate_buffer_arg_setter(gen, cl_arg_idx, "buf") + self._generate_buffer_arg_setter(gen, cl_arg_idx, "buf", + could_be_numpy_scalar=False) cl_arg_idx += 1 + + elif (work_around_arg_count_bug == "apple" + and arg_dtype == np.complex128 + and fp_arg_count + 2 <= 8): + raise NotImplementedError("No work-around to " + "Apple's broken structs-as-kernel arg " + "handling has been found. " + "Cannot pass complex numbers to kernels.") + else: gen( "buf = pack('{arg_char}{arg_char}', " "{arg_var}.real, {arg_var}.imag)" .format(arg_char=arg_char, arg_var=arg_var)) - self._generate_buffer_arg_setter(gen, cl_arg_idx, "buf") + self._generate_buffer_arg_setter(gen, cl_arg_idx, "buf", + could_be_numpy_scalar=False) cl_arg_idx += 1 fp_arg_count += 2 @@ -864,7 +885,8 @@ def _add_functionality(): gen( "buf = pack('{arg_char}', long({arg_var}))" .format(arg_char=arg_dtype.char, arg_var=arg_var)) - self._generate_buffer_arg_setter(gen, cl_arg_idx, "buf") + self._generate_buffer_arg_setter(gen, cl_arg_idx, "buf", + could_be_numpy_scalar=False) cl_arg_idx += 1 else: @@ -878,7 +900,8 @@ def _add_functionality(): .format( arg_char=arg_char, arg_var=arg_var)) - self._generate_buffer_arg_setter(gen, cl_arg_idx, "buf") + self._generate_buffer_arg_setter(gen, cl_arg_idx, "buf", + could_be_numpy_scalar=False) cl_arg_idx += 1 gen("") diff --git a/pyopencl/characterize/__init__.py b/pyopencl/characterize/__init__.py index e4676a68d4aacc06ba4073664ab0925cbe30c366..175a951d42f2e3e531ead13bafe0128e3fc6a092 100644 --- a/pyopencl/characterize/__init__.py +++ b/pyopencl/characterize/__init__.py @@ -328,7 +328,7 @@ def has_struct_arg_count_bug(dev): """ if dev.platform.name == "Apple" and dev.type & cl.device_type.CPU: - return True + return "apple" if dev.platform.name == "Portable Computing Language": - return True + return "pocl" return False diff --git a/test/test_algorithm.py b/test/test_algorithm.py index 8f44cc594bab73126af98849811b35d99eebd059..43e88ca8c25c2b5594bddbab8df8f12cd96d2aef 100644 --- a/test/test_algorithm.py +++ b/test/test_algorithm.py @@ -37,7 +37,7 @@ import pyopencl as cl import pyopencl.array as cl_array # noqa from pyopencl.tools import ( # noqa pytest_generate_tests_for_pyopencl as pytest_generate_tests) -from pyopencl.characterize import has_double_support +from pyopencl.characterize import has_double_support, has_struct_arg_count_bug from pyopencl.scan import InclusiveScanKernel, ExclusiveScanKernel @@ -326,9 +326,14 @@ def test_dot(ctx_factory): dev = context.devices[0] + import faulthandler + faulthandler.enable() dtypes = [np.float32, np.complex64] if has_double_support(dev): - dtypes.extend([np.float64, np.complex128]) + if has_struct_arg_count_bug(dev) == "apple": + dtypes.extend([np.float64]) + else: + dtypes.extend([np.float64, np.complex128]) for a_dtype in dtypes: for b_dtype in dtypes: @@ -853,7 +858,11 @@ def test_bitonic_sort(ctx_factory, size, dtype): ctx = cl.create_some_context() queue = cl.CommandQueue(ctx) - if (ctx.devices[0].platform.name == "Portable Computing Language" + dev = ctx.devices[0] + if (dev.platform.name == "Apple" and dev.type & cl.device_type.CPU): + pytest.xfail("Bitonic sort won't work on Apple CPU: no workgroup " + "parallelism") + if (dev.platform.name == "Portable Computing Language" and dtype == np.float64): pytest.xfail("Double precision bitonic sort doesn't work on POCL") @@ -882,7 +891,11 @@ def test_bitonic_argsort(ctx_factory, size, dtype): ctx = cl.create_some_context() queue = cl.CommandQueue(ctx) - if (ctx.devices[0].platform.name == "Portable Computing Language" + dev = ctx.devices[0] + if (dev.platform.name == "Apple" and dev.type & cl.device_type.CPU): + pytest.xfail("Bitonic sort won't work on Apple CPU: no workgroup " + "parallelism") + if (dev.platform.name == "Portable Computing Language" and dtype == np.float64): pytest.xfail("Double precision bitonic sort doesn't work on POCL") diff --git a/test/test_array.py b/test/test_array.py index e183a4de0d16aed400bdd7b146f8c85031380921..f3b6a6687379ea84b4962d2eba38f7f832a13007 100644 --- a/test/test_array.py +++ b/test/test_array.py @@ -35,7 +35,7 @@ import pyopencl.array as cl_array import pyopencl.tools as cl_tools from pyopencl.tools import ( # noqa pytest_generate_tests_for_pyopencl as pytest_generate_tests) -from pyopencl.characterize import has_double_support +from pyopencl.characterize import has_double_support, has_struct_arg_count_bug from pyopencl.cffi_cl import _PYPY @@ -102,7 +102,12 @@ def test_mix_complex(ctx_factory): #(np.int32, np.complex64), ] - if has_double_support(context.devices[0]): + dev = context.devices[0] + if has_double_support(dev) and has_struct_arg_count_bug(dev) == "apple": + dtypes.extend([ + (np.float32, np.float64), + ]) + elif has_double_support(dev): dtypes.extend([ (np.float32, np.float64), (np.float32, np.complex128), @@ -169,6 +174,9 @@ def test_pow_neg1_vs_inv(ctx_factory): if not has_double_support(device): from pytest import skip skip("double precision not supported on %s" % device) + if has_struct_arg_count_bug(device) == "apple": + from pytest import xfail + xfail("apple struct arg counting broken") a_dev = make_random_array(queue, np.complex128, 20000) diff --git a/test/test_clmath.py b/test/test_clmath.py index 4dea8f91963e59f5a2ca2e3018cd73bf9287296f..aaa89daf18740ca09d6d999652a7bad641edf29a 100644 --- a/test/test_clmath.py +++ b/test/test_clmath.py @@ -29,22 +29,13 @@ import numpy as np import pytest - -def have_cl(): - try: - import pyopencl # noqa - return True - except: - return False - -if have_cl(): - import pyopencl.array as cl_array - import pyopencl as cl - import pyopencl.clmath as clmath - from pyopencl.tools import ( # noqa - pytest_generate_tests_for_pyopencl - as pytest_generate_tests) - from pyopencl.characterize import has_double_support +import pyopencl.array as cl_array +import pyopencl as cl +import pyopencl.clmath as clmath +from pyopencl.tools import ( # noqa + pytest_generate_tests_for_pyopencl + as pytest_generate_tests) +from pyopencl.characterize import has_double_support, has_struct_arg_count_bug try: import faulthandler @@ -76,8 +67,12 @@ def make_unary_function_test(name, limits=(0, 1), threshold=0, use_complex=False gpu_func = getattr(clmath, name) cpu_func = getattr(np, numpy_func_names.get(name, name)) - if has_double_support(context.devices[0]): - if use_complex: + dev = context.devices[0] + + if has_double_support(dev): + if use_complex and has_struct_arg_count_bug(dev) == "apple": + dtypes = [np.float32, np.float64, np.complex64] + elif use_complex: dtypes = [np.float32, np.float64, np.complex64, np.complex128] else: dtypes = [np.float32, np.float64] @@ -110,26 +105,25 @@ def make_unary_function_test(name, limits=(0, 1), threshold=0, use_complex=False return test -if have_cl(): - test_ceil = make_unary_function_test("ceil", (-10, 10)) - test_floor = make_unary_function_test("ceil", (-10, 10)) - test_fabs = make_unary_function_test("fabs", (-10, 10)) - test_exp = make_unary_function_test("exp", (-3, 3), 1e-5, use_complex=True) - test_log = make_unary_function_test("log", (1e-5, 1), 1e-6, use_complex=True) - test_log10 = make_unary_function_test("log10", (1e-5, 1), 5e-7) - test_sqrt = make_unary_function_test("sqrt", (1e-5, 1), 3e-7, use_complex=True) - - test_sin = make_unary_function_test("sin", (-10, 10), 2e-7, use_complex=2e-2) - test_cos = make_unary_function_test("cos", (-10, 10), 2e-7, use_complex=2e-2) - test_asin = make_unary_function_test("asin", (-0.9, 0.9), 5e-7) - test_acos = make_unary_function_test("acos", (-0.9, 0.9), 5e-7) - test_tan = make_unary_function_test("tan", - (-math.pi/2 + 0.1, math.pi/2 - 0.1), 4e-5, use_complex=True) - test_atan = make_unary_function_test("atan", (-10, 10), 2e-7) - - test_sinh = make_unary_function_test("sinh", (-3, 3), 3e-6, use_complex=2e-3) - test_cosh = make_unary_function_test("cosh", (-3, 3), 3e-6, use_complex=2e-3) - test_tanh = make_unary_function_test("tanh", (-3, 3), 2e-6, use_complex=True) +test_ceil = make_unary_function_test("ceil", (-10, 10)) +test_floor = make_unary_function_test("ceil", (-10, 10)) +test_fabs = make_unary_function_test("fabs", (-10, 10)) +test_exp = make_unary_function_test("exp", (-3, 3), 1e-5, use_complex=True) +test_log = make_unary_function_test("log", (1e-5, 1), 1e-6, use_complex=True) +test_log10 = make_unary_function_test("log10", (1e-5, 1), 5e-7) +test_sqrt = make_unary_function_test("sqrt", (1e-5, 1), 3e-7, use_complex=True) + +test_sin = make_unary_function_test("sin", (-10, 10), 2e-7, use_complex=2e-2) +test_cos = make_unary_function_test("cos", (-10, 10), 2e-7, use_complex=2e-2) +test_asin = make_unary_function_test("asin", (-0.9, 0.9), 5e-7) +test_acos = make_unary_function_test("acos", (-0.9, 0.9), 5e-7) +test_tan = make_unary_function_test("tan", + (-math.pi/2 + 0.1, math.pi/2 - 0.1), 4e-5, use_complex=True) +test_atan = make_unary_function_test("atan", (-10, 10), 2e-7) + +test_sinh = make_unary_function_test("sinh", (-3, 3), 3e-6, use_complex=2e-3) +test_cosh = make_unary_function_test("cosh", (-3, 3), 3e-6, use_complex=2e-3) +test_tanh = make_unary_function_test("tanh", (-3, 3), 2e-6, use_complex=True) def test_atan2(ctx_factory):