diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index e37c40b503fa24ba47d8a4f1db3bbf64fe3747cb..0047755fcbc3ec26d86b3c24075354534c00ef13 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -166,6 +166,7 @@ Python 2.7 Apple: Python 3 Conda Apple: script: - CONDA_ENVIRONMENT=.test-conda-env-py3.yml + - export CC=gcc - curl -L -O -k https://gitlab.tiker.net/inducer/ci-support/raw/master/build-and-test-py-project-within-miniconda.sh - ". ./build-and-test-py-project-within-miniconda.sh" tags: diff --git a/pyopencl/scan.py b/pyopencl/scan.py index d4226cf60315e237aac3b90001234def4505d92b..ab8aee30d188938c591ce1ac7116d4fbd92b7a18 100644 --- a/pyopencl/scan.py +++ b/pyopencl/scan.py @@ -1227,8 +1227,17 @@ class GenericScanKernel(_GenericScanKernelBase): max_scan_wg_size = min(dev.max_work_group_size for dev in self.devices) wg_size_multiples = 64 + # Intel beignet fails "Out of shared local memory" in test_scan int64 + # and asserts in test_sort with this enabled: + # https://github.com/inducer/pyopencl/pull/238 + # A beignet bug report (outside of pyopencl) suggests packed structs + # (which this is) can even give wrong results: + # https://bugs.freedesktop.org/show_bug.cgi?id=98717 + # TODO: does this also affect Intel Compute Runtime? use_bank_conflict_avoidance = ( - self.dtype.itemsize > 4 and self.dtype.itemsize % 8 == 0 and is_gpu) + self.dtype.itemsize > 4 and self.dtype.itemsize % 8 == 0 + and is_gpu + and "beignet" not in self.devices[0].platform.version.lower()) # k_group_size should be a power of two because of in-kernel # division by that number. diff --git a/test/test_clrandom.py b/test/test_clrandom.py index 2846e24c97e1c4c0356420dfa35a1278e615a77f..0cd572d0208cf092eef57648c273f40a88b29c39 100644 --- a/test/test_clrandom.py +++ b/test/test_clrandom.py @@ -31,6 +31,7 @@ import pyopencl.clrandom as clrandom from pyopencl.tools import ( # noqa pytest_generate_tests_for_pyopencl as pytest_generate_tests) +from pyopencl.characterize import has_double_support try: import faulthandler @@ -59,6 +60,8 @@ def make_ranlux_generator(cl_ctx): cltypes.float4]) def test_clrandom_dtypes(ctx_factory, rng_class, dtype): cl_ctx = ctx_factory() + if dtype == np.float64 and not has_double_support(cl_ctx.devices[0]): + pytest.skip("double precision not supported on this device") rng = rng_class(cl_ctx) size = 10 diff --git a/test/test_wrapper.py b/test/test_wrapper.py index 118eee740c26640c36acd5834842d67c465c54f1..a17866fa77110f4d1b232898ca46c76f54ec4a83 100644 --- a/test/test_wrapper.py +++ b/test/test_wrapper.py @@ -953,6 +953,7 @@ def test_coarse_grain_svm(ctx_factory): dev = ctx.devices[0] has_svm = (ctx._get_cl_version() >= (2, 0) and + ctx.devices[0]._get_cl_version() >= (2, 0) and cl.get_cl_header_version() >= (2, 0)) if dev.platform.name == "Portable Computing Language":