diff --git a/pyopencl/__init__.py b/pyopencl/__init__.py
index 45e14ed5f3451dc03a1f10c78b3415ebc49336d2..4f49df0525948329621365c5c0759bdaa065c938 100644
--- a/pyopencl/__init__.py
+++ b/pyopencl/__init__.py
@@ -648,11 +648,7 @@ def _add_functionality():
 
         err_gen = PythonCodeGenerator()
 
-        err_gen("try:")
-        with Indentation(err_gen):
-            err_gen.extend(body)
-        err_gen("except TypeError as e:")
-        with Indentation(err_gen):
+        def gen_error_handler():
             err_gen("""
                 if current_arg is not None:
                     args = [{args}]
@@ -671,6 +667,16 @@ def _add_functionality():
                 .format(args=", ".join(arg_names)))
             err_gen("")
 
+        err_gen("try:")
+        with Indentation(err_gen):
+            err_gen.extend(body)
+        err_gen("except TypeError as e:")
+        with Indentation(err_gen):
+            gen_error_handler()
+        err_gen("except _cl.LogicError as e:")
+        with Indentation(err_gen):
+            gen_error_handler()
+
         # }}}
 
         def add_preamble(gen):
@@ -715,10 +721,10 @@ def _add_functionality():
 
         # }}}
 
-    def kernel__generate_buffer_arg_setter(self, gen, arg_idx, buf_var):
+    def kernel__generate_buffer_arg_setter(self, gen, arg_idx, buf_var, could_be_numpy_scalar):
         from pytools.py_codegen import Indentation
 
-        if _CPY2:
+        if _CPY2 and could_be_numpy_scalar:
             # https://github.com/numpy/numpy/issues/5381
             gen("if isinstance({buf_var}, np.generic):".format(buf_var=buf_var))
             with Indentation(gen):
@@ -747,7 +753,8 @@ def _add_functionality():
 
         gen("else:")
         with Indentation(gen):
-            self._generate_buffer_arg_setter(gen, arg_idx, arg_var)
+            self._generate_buffer_arg_setter(gen, arg_idx, arg_var,
+                    could_be_numpy_scalar=True)
 
     def kernel__generate_naive_call(self):
         num_args = self.num_args
@@ -783,9 +790,10 @@ def _add_functionality():
                 has_struct_arg_count_bug(dev)
                 for dev in self.context.devices]
 
+        from pytools import single_valued
         if any(count_bug_per_dev):
             if all(count_bug_per_dev):
-                work_around_arg_count_bug = True
+                work_around_arg_count_bug = single_valued(count_bug_per_dev)
             else:
                 warn_about_arg_count_bug = True
 
@@ -834,25 +842,38 @@ def _add_functionality():
                 else:
                     raise TypeError("unexpected complex type: %s" % arg_dtype)
 
-                if (work_around_arg_count_bug
+                if (work_around_arg_count_bug == "pocl"
                         and arg_dtype == np.complex128
                         and fp_arg_count + 2 <= 8):
                     gen(
                             "buf = pack('{arg_char}', {arg_var}.real)"
                             .format(arg_char=arg_char, arg_var=arg_var))
-                    self._generate_buffer_arg_setter(gen, cl_arg_idx, "buf")
+                    self._generate_buffer_arg_setter(gen, cl_arg_idx, "buf",
+                            could_be_numpy_scalar=False)
                     cl_arg_idx += 1
+                    gen("current_arg = current_arg + 1000")
                     gen(
                             "buf = pack('{arg_char}', {arg_var}.imag)"
                             .format(arg_char=arg_char, arg_var=arg_var))
-                    self._generate_buffer_arg_setter(gen, cl_arg_idx, "buf")
+                    self._generate_buffer_arg_setter(gen, cl_arg_idx, "buf",
+                            could_be_numpy_scalar=False)
                     cl_arg_idx += 1
+
+                elif (work_around_arg_count_bug == "apple"
+                        and arg_dtype == np.complex128
+                        and fp_arg_count + 2 <= 8):
+                    raise NotImplementedError("No work-around to "
+                            "Apple's broken structs-as-kernel arg "
+                            "handling has been found. "
+                            "Cannot pass complex numbers to kernels.")
+
                 else:
                     gen(
                             "buf = pack('{arg_char}{arg_char}', "
                             "{arg_var}.real, {arg_var}.imag)"
                             .format(arg_char=arg_char, arg_var=arg_var))
-                    self._generate_buffer_arg_setter(gen, cl_arg_idx, "buf")
+                    self._generate_buffer_arg_setter(gen, cl_arg_idx, "buf",
+                            could_be_numpy_scalar=False)
                     cl_arg_idx += 1
 
                 fp_arg_count += 2
@@ -864,7 +885,8 @@ def _add_functionality():
                 gen(
                         "buf = pack('{arg_char}', long({arg_var}))"
                         .format(arg_char=arg_dtype.char, arg_var=arg_var))
-                self._generate_buffer_arg_setter(gen, cl_arg_idx, "buf")
+                self._generate_buffer_arg_setter(gen, cl_arg_idx, "buf",
+                        could_be_numpy_scalar=False)
                 cl_arg_idx += 1
 
             else:
@@ -878,7 +900,8 @@ def _add_functionality():
                         .format(
                             arg_char=arg_char,
                             arg_var=arg_var))
-                self._generate_buffer_arg_setter(gen, cl_arg_idx, "buf")
+                self._generate_buffer_arg_setter(gen, cl_arg_idx, "buf",
+                        could_be_numpy_scalar=False)
                 cl_arg_idx += 1
 
             gen("")
diff --git a/pyopencl/characterize/__init__.py b/pyopencl/characterize/__init__.py
index e4676a68d4aacc06ba4073664ab0925cbe30c366..175a951d42f2e3e531ead13bafe0128e3fc6a092 100644
--- a/pyopencl/characterize/__init__.py
+++ b/pyopencl/characterize/__init__.py
@@ -328,7 +328,7 @@ def has_struct_arg_count_bug(dev):
     """
 
     if dev.platform.name == "Apple" and dev.type & cl.device_type.CPU:
-        return True
+        return "apple"
     if dev.platform.name == "Portable Computing Language":
-        return True
+        return "pocl"
     return False
diff --git a/test/test_algorithm.py b/test/test_algorithm.py
index 8f44cc594bab73126af98849811b35d99eebd059..43e88ca8c25c2b5594bddbab8df8f12cd96d2aef 100644
--- a/test/test_algorithm.py
+++ b/test/test_algorithm.py
@@ -37,7 +37,7 @@ import pyopencl as cl
 import pyopencl.array as cl_array  # noqa
 from pyopencl.tools import (  # noqa
         pytest_generate_tests_for_pyopencl as pytest_generate_tests)
-from pyopencl.characterize import has_double_support
+from pyopencl.characterize import has_double_support, has_struct_arg_count_bug
 from pyopencl.scan import InclusiveScanKernel, ExclusiveScanKernel
 
 
@@ -326,9 +326,14 @@ def test_dot(ctx_factory):
 
     dev = context.devices[0]
 
+    import faulthandler
+    faulthandler.enable()
     dtypes = [np.float32, np.complex64]
     if has_double_support(dev):
-        dtypes.extend([np.float64, np.complex128])
+        if has_struct_arg_count_bug(dev) == "apple":
+            dtypes.extend([np.float64])
+        else:
+            dtypes.extend([np.float64, np.complex128])
 
     for a_dtype in dtypes:
         for b_dtype in dtypes:
@@ -853,7 +858,11 @@ def test_bitonic_sort(ctx_factory, size, dtype):
     ctx = cl.create_some_context()
     queue = cl.CommandQueue(ctx)
 
-    if (ctx.devices[0].platform.name == "Portable Computing Language"
+    dev = ctx.devices[0]
+    if (dev.platform.name == "Apple" and dev.type & cl.device_type.CPU):
+        pytest.xfail("Bitonic sort won't work on Apple CPU: no workgroup "
+            "parallelism")
+    if (dev.platform.name == "Portable Computing Language"
             and dtype == np.float64):
         pytest.xfail("Double precision bitonic sort doesn't work on POCL")
 
@@ -882,7 +891,11 @@ def test_bitonic_argsort(ctx_factory, size, dtype):
     ctx = cl.create_some_context()
     queue = cl.CommandQueue(ctx)
 
-    if (ctx.devices[0].platform.name == "Portable Computing Language"
+    dev = ctx.devices[0]
+    if (dev.platform.name == "Apple" and dev.type & cl.device_type.CPU):
+        pytest.xfail("Bitonic sort won't work on Apple CPU: no workgroup "
+            "parallelism")
+    if (dev.platform.name == "Portable Computing Language"
             and dtype == np.float64):
         pytest.xfail("Double precision bitonic sort doesn't work on POCL")
 
diff --git a/test/test_array.py b/test/test_array.py
index e183a4de0d16aed400bdd7b146f8c85031380921..f3b6a6687379ea84b4962d2eba38f7f832a13007 100644
--- a/test/test_array.py
+++ b/test/test_array.py
@@ -35,7 +35,7 @@ import pyopencl.array as cl_array
 import pyopencl.tools as cl_tools
 from pyopencl.tools import (  # noqa
         pytest_generate_tests_for_pyopencl as pytest_generate_tests)
-from pyopencl.characterize import has_double_support
+from pyopencl.characterize import has_double_support, has_struct_arg_count_bug
 from pyopencl.cffi_cl import _PYPY
 
 
@@ -102,7 +102,12 @@ def test_mix_complex(ctx_factory):
             #(np.int32, np.complex64),
             ]
 
-    if has_double_support(context.devices[0]):
+    dev = context.devices[0]
+    if has_double_support(dev) and has_struct_arg_count_bug(dev) == "apple":
+        dtypes.extend([
+            (np.float32, np.float64),
+            ])
+    elif has_double_support(dev):
         dtypes.extend([
             (np.float32, np.float64),
             (np.float32, np.complex128),
@@ -169,6 +174,9 @@ def test_pow_neg1_vs_inv(ctx_factory):
     if not has_double_support(device):
         from pytest import skip
         skip("double precision not supported on %s" % device)
+    if has_struct_arg_count_bug(device) == "apple":
+        from pytest import xfail
+        xfail("apple struct arg counting broken")
 
     a_dev = make_random_array(queue, np.complex128, 20000)
 
diff --git a/test/test_clmath.py b/test/test_clmath.py
index 4dea8f91963e59f5a2ca2e3018cd73bf9287296f..aaa89daf18740ca09d6d999652a7bad641edf29a 100644
--- a/test/test_clmath.py
+++ b/test/test_clmath.py
@@ -29,22 +29,13 @@ import numpy as np
 
 import pytest
 
-
-def have_cl():
-    try:
-        import pyopencl  # noqa
-        return True
-    except:
-        return False
-
-if have_cl():
-    import pyopencl.array as cl_array
-    import pyopencl as cl
-    import pyopencl.clmath as clmath
-    from pyopencl.tools import (  # noqa
-            pytest_generate_tests_for_pyopencl
-            as pytest_generate_tests)
-    from pyopencl.characterize import has_double_support
+import pyopencl.array as cl_array
+import pyopencl as cl
+import pyopencl.clmath as clmath
+from pyopencl.tools import (  # noqa
+        pytest_generate_tests_for_pyopencl
+        as pytest_generate_tests)
+from pyopencl.characterize import has_double_support, has_struct_arg_count_bug
 
 try:
     import faulthandler
@@ -76,8 +67,12 @@ def make_unary_function_test(name, limits=(0, 1), threshold=0, use_complex=False
         gpu_func = getattr(clmath, name)
         cpu_func = getattr(np, numpy_func_names.get(name, name))
 
-        if has_double_support(context.devices[0]):
-            if use_complex:
+        dev = context.devices[0]
+
+        if has_double_support(dev):
+            if use_complex and has_struct_arg_count_bug(dev) == "apple":
+                dtypes = [np.float32, np.float64, np.complex64]
+            elif use_complex:
                 dtypes = [np.float32, np.float64, np.complex64, np.complex128]
             else:
                 dtypes = [np.float32, np.float64]
@@ -110,26 +105,25 @@ def make_unary_function_test(name, limits=(0, 1), threshold=0, use_complex=False
     return test
 
 
-if have_cl():
-    test_ceil = make_unary_function_test("ceil", (-10, 10))
-    test_floor = make_unary_function_test("ceil", (-10, 10))
-    test_fabs = make_unary_function_test("fabs", (-10, 10))
-    test_exp = make_unary_function_test("exp", (-3, 3), 1e-5, use_complex=True)
-    test_log = make_unary_function_test("log", (1e-5, 1), 1e-6, use_complex=True)
-    test_log10 = make_unary_function_test("log10", (1e-5, 1), 5e-7)
-    test_sqrt = make_unary_function_test("sqrt", (1e-5, 1), 3e-7, use_complex=True)
-
-    test_sin = make_unary_function_test("sin", (-10, 10), 2e-7, use_complex=2e-2)
-    test_cos = make_unary_function_test("cos", (-10, 10), 2e-7, use_complex=2e-2)
-    test_asin = make_unary_function_test("asin", (-0.9, 0.9), 5e-7)
-    test_acos = make_unary_function_test("acos", (-0.9, 0.9), 5e-7)
-    test_tan = make_unary_function_test("tan",
-            (-math.pi/2 + 0.1, math.pi/2 - 0.1), 4e-5, use_complex=True)
-    test_atan = make_unary_function_test("atan", (-10, 10), 2e-7)
-
-    test_sinh = make_unary_function_test("sinh", (-3, 3), 3e-6, use_complex=2e-3)
-    test_cosh = make_unary_function_test("cosh", (-3, 3), 3e-6, use_complex=2e-3)
-    test_tanh = make_unary_function_test("tanh", (-3, 3), 2e-6, use_complex=True)
+test_ceil = make_unary_function_test("ceil", (-10, 10))
+test_floor = make_unary_function_test("ceil", (-10, 10))
+test_fabs = make_unary_function_test("fabs", (-10, 10))
+test_exp = make_unary_function_test("exp", (-3, 3), 1e-5, use_complex=True)
+test_log = make_unary_function_test("log", (1e-5, 1), 1e-6, use_complex=True)
+test_log10 = make_unary_function_test("log10", (1e-5, 1), 5e-7)
+test_sqrt = make_unary_function_test("sqrt", (1e-5, 1), 3e-7, use_complex=True)
+
+test_sin = make_unary_function_test("sin", (-10, 10), 2e-7, use_complex=2e-2)
+test_cos = make_unary_function_test("cos", (-10, 10), 2e-7, use_complex=2e-2)
+test_asin = make_unary_function_test("asin", (-0.9, 0.9), 5e-7)
+test_acos = make_unary_function_test("acos", (-0.9, 0.9), 5e-7)
+test_tan = make_unary_function_test("tan",
+        (-math.pi/2 + 0.1, math.pi/2 - 0.1), 4e-5, use_complex=True)
+test_atan = make_unary_function_test("atan", (-10, 10), 2e-7)
+
+test_sinh = make_unary_function_test("sinh", (-3, 3), 3e-6, use_complex=2e-3)
+test_cosh = make_unary_function_test("cosh", (-3, 3), 3e-6, use_complex=2e-3)
+test_tanh = make_unary_function_test("tanh", (-3, 3), 2e-6, use_complex=True)
 
 
 def test_atan2(ctx_factory):