diff --git a/aksetup_helper.py b/aksetup_helper.py
index f17dd39f61cc26c121111a87b635ff50ad4e8bf4..e1d1650e913367f910bc34aabbcccd6f4cab007e 100644
--- a/aksetup_helper.py
+++ b/aksetup_helper.py
@@ -547,7 +547,7 @@ def set_up_shipped_boost_if_requested(project_name, conf):
             source_files += glob(
                     "bpl-subset/bpl_subset/libs/thread/src/win32/*.cpp")
             source_files += glob(
-                    "bpl-subset/bpl_subset/libs/thread/src/*.cpp")
+                    "bpl-subset/bpl_subset/libs/thread/src/tss_null.cpp")
         else:
             source_files += glob(
                     "bpl-subset/bpl_subset/libs/thread/src/pthread/*.cpp")
diff --git a/pyopencl/array.py b/pyopencl/array.py
index 2877ac56ea37bbb2a8fe583e46749bf58c34feef..8cefeb2780adf2eeb435df5f2af7dacbc8832504 100644
--- a/pyopencl/array.py
+++ b/pyopencl/array.py
@@ -654,8 +654,9 @@ class Array(object):
     @elwise_kernel_runner
     def _axpbyz(out, afac, a, bfac, b, queue=None):
         """Compute ``out = selffac * self + otherfac*other``,
-        where `other` is a vector.."""
+        where *other* is an array."""
         assert out.shape == a.shape
+        assert out.shape == b.shape
 
         return elementwise.get_axpbyz_kernel(
                 out.context, a.dtype, b.dtype, out.dtype)
@@ -663,15 +664,18 @@ class Array(object):
     @staticmethod
     @elwise_kernel_runner
     def _axpbz(out, a, x, b, queue=None):
-        """Compute ``z = a * x + b``, where `b` is a scalar."""
+        """Compute ``z = a * x + b``, where *b* is a scalar."""
         a = np.array(a)
         b = np.array(b)
+        assert out.shape == x.shape
         return elementwise.get_axpbz_kernel(out.context,
                 a.dtype, x.dtype, b.dtype, out.dtype)
 
     @staticmethod
     @elwise_kernel_runner
     def _elwise_multiply(out, a, b, queue=None):
+        assert out.shape == a.shape
+        assert out.shape == b.shape
         return elementwise.get_multiply_kernel(
                 a.context, a.dtype, b.dtype, out.dtype)
 
@@ -679,6 +683,7 @@ class Array(object):
     @elwise_kernel_runner
     def _rdiv_scalar(out, ary, other, queue=None):
         other = np.array(other)
+        assert out.shape == ary.shape
         return elementwise.get_rdivide_elwise_kernel(
                 out.context, ary.dtype, other.dtype, out.dtype)