diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index e37c40b503fa24ba47d8a4f1db3bbf64fe3747cb..0047755fcbc3ec26d86b3c24075354534c00ef13 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -166,6 +166,7 @@ Python 2.7 Apple:
 Python 3 Conda Apple:
   script:
   - CONDA_ENVIRONMENT=.test-conda-env-py3.yml
+  - export CC=gcc
   - curl -L -O -k https://gitlab.tiker.net/inducer/ci-support/raw/master/build-and-test-py-project-within-miniconda.sh
   - ". ./build-and-test-py-project-within-miniconda.sh"
   tags:
diff --git a/pyopencl/scan.py b/pyopencl/scan.py
index d4226cf60315e237aac3b90001234def4505d92b..ab8aee30d188938c591ce1ac7116d4fbd92b7a18 100644
--- a/pyopencl/scan.py
+++ b/pyopencl/scan.py
@@ -1227,8 +1227,17 @@ class GenericScanKernel(_GenericScanKernelBase):
             max_scan_wg_size = min(dev.max_work_group_size for dev in self.devices)
             wg_size_multiples = 64
 
+        # Intel beignet fails "Out of shared local memory" in test_scan int64
+        # and asserts in test_sort with this enabled:
+        # https://github.com/inducer/pyopencl/pull/238
+        # A beignet bug report (outside of pyopencl) suggests packed structs
+        # (which this is) can even give wrong results:
+        # https://bugs.freedesktop.org/show_bug.cgi?id=98717
+        # TODO: does this also affect Intel Compute Runtime?
         use_bank_conflict_avoidance = (
-                self.dtype.itemsize > 4 and self.dtype.itemsize % 8 == 0 and is_gpu)
+                self.dtype.itemsize > 4 and self.dtype.itemsize % 8 == 0
+                and is_gpu
+                and "beignet" not in self.devices[0].platform.version.lower())
 
         # k_group_size should be a power of two because of in-kernel
         # division by that number.
diff --git a/test/test_clrandom.py b/test/test_clrandom.py
index 2846e24c97e1c4c0356420dfa35a1278e615a77f..0cd572d0208cf092eef57648c273f40a88b29c39 100644
--- a/test/test_clrandom.py
+++ b/test/test_clrandom.py
@@ -31,6 +31,7 @@ import pyopencl.clrandom as clrandom
 from pyopencl.tools import (  # noqa
         pytest_generate_tests_for_pyopencl
         as pytest_generate_tests)
+from pyopencl.characterize import has_double_support
 
 try:
     import faulthandler
@@ -59,6 +60,8 @@ def make_ranlux_generator(cl_ctx):
     cltypes.float4])
 def test_clrandom_dtypes(ctx_factory, rng_class, dtype):
     cl_ctx = ctx_factory()
+    if dtype == np.float64 and not has_double_support(cl_ctx.devices[0]):
+        pytest.skip("double precision not supported on this device")
     rng = rng_class(cl_ctx)
 
     size = 10
diff --git a/test/test_wrapper.py b/test/test_wrapper.py
index 118eee740c26640c36acd5834842d67c465c54f1..a17866fa77110f4d1b232898ca46c76f54ec4a83 100644
--- a/test/test_wrapper.py
+++ b/test/test_wrapper.py
@@ -953,6 +953,7 @@ def test_coarse_grain_svm(ctx_factory):
     dev = ctx.devices[0]
 
     has_svm = (ctx._get_cl_version() >= (2, 0) and
+                ctx.devices[0]._get_cl_version() >= (2, 0) and
                 cl.get_cl_header_version() >= (2, 0))
 
     if dev.platform.name == "Portable Computing Language":