diff --git a/pyopencl/array.py b/pyopencl/array.py
index 90e6aa57b0502d4e25ab552a70825e3e730a8a42..a1cbb4249644f331bbc2414077f709853749896a 100644
--- a/pyopencl/array.py
+++ b/pyopencl/array.py
@@ -624,7 +624,7 @@ class Array(object):
             event1 = cl.enqueue_copy(queue or self.queue, self.base_data, ary,
                     device_offset=self.offset,
                     is_blocking=not async_)
-            if not async_: # not already waited for
+            if not async_:  # not already waited for
                 self.add_event(event1)
 
     def get(self, queue=None, ary=None, async_=None, **kwargs):
@@ -1293,7 +1293,8 @@ class Array(object):
         krnl = get_any_kernel(self.context, self.dtype)
         if wait_for is None:
             wait_for = []
-        result, event1 = krnl(self, queue=queue, wait_for=wait_for + self.events, return_event=True)
+        result, event1 = krnl(self, queue=queue,
+               wait_for=wait_for + self.events, return_event=True)
         result.add_event(event1)
         return result
 
@@ -1302,7 +1303,8 @@ class Array(object):
         krnl = get_all_kernel(self.context, self.dtype)
         if wait_for is None:
             wait_for = []
-        result, event1 = krnl(self, queue=queue, wait_for=wait_for + self.events, return_event=True)
+        result, event1 = krnl(self, queue=queue,
+               wait_for=wait_for + self.events, return_event=True)
         result.add_event(event1)
         return result
 
@@ -1686,7 +1688,7 @@ class Array(object):
         if flags is None:
             flags = cl.map_flags.READ | cl.map_flags.WRITE
         if wait_for is None:
-            wait_for=[]
+            wait_for = []
 
         ary, evt = cl.enqueue_map_buffer(
                 queue or self.queue, self.base_data, flags, self.offset,
@@ -2274,7 +2276,6 @@ def multi_put(arrays, dest_indices, dest_shape=None, out=None, queue=None,
     if wait_for is None:
         wait_for = []
     wait_for = wait_for + dest_indices.events
-    
 
     vec_count = len(arrays)
 
@@ -2535,7 +2536,8 @@ def sum(a, dtype=None, queue=None, slice=None):
     """
     from pyopencl.reduction import get_sum_kernel
     krnl = get_sum_kernel(a.context, dtype, a.dtype)
-    result, event1 = krnl(a, queue=queue, slice=slice, wait_for=a.events, return_event=True)
+    result, event1 = krnl(a, queue=queue, slice=slice, wait_for=a.events,
+            return_event=True)
     result.add_event(event1)
     return result
 
@@ -2546,7 +2548,8 @@ def dot(a, b, dtype=None, queue=None, slice=None):
     """
     from pyopencl.reduction import get_dot_kernel
     krnl = get_dot_kernel(a.context, dtype, a.dtype, b.dtype)
-    result, event1 = krnl(a, b, queue=queue, slice=slice, wait_for=a.events + b.events, return_event=True)
+    result, event1 = krnl(a, b, queue=queue, slice=slice,
+            wait_for=a.events + b.events, return_event=True)
     result.add_event(event1)
     return result
 
@@ -2559,7 +2562,8 @@ def vdot(a, b, dtype=None, queue=None, slice=None):
     from pyopencl.reduction import get_dot_kernel
     krnl = get_dot_kernel(a.context, dtype, a.dtype, b.dtype,
             conjugate_first=True)
-    result, event1 = krnl(a, b, queue=queue, slice=slice, wait_for=a.events + b.events, return_event=True)
+    result, event1 = krnl(a, b, queue=queue, slice=slice,
+            wait_for=a.events + b.events, return_event=True)
     result.add_event(event1)
     return result
 
@@ -2572,7 +2576,7 @@ def subset_dot(subset, a, b, dtype=None, queue=None, slice=None):
     krnl = get_subset_dot_kernel(
             a.context, dtype, subset.dtype, a.dtype, b.dtype)
     result, event1 = krnl(subset, a, b, queue=queue, slice=slice,
-        wait_for=subset.events + a.events + b.events, return_event=True)
+            wait_for=subset.events + a.events + b.events, return_event=True)
     result.add_event(event1)
     return result
 
@@ -2581,7 +2585,8 @@ def _make_minmax_kernel(what):
     def f(a, queue=None):
         from pyopencl.reduction import get_minmax_kernel
         krnl = get_minmax_kernel(a.context, what, a.dtype)
-        result, event1 = krnl(a,  queue=queue, wait_for=a.events, return_event=True)
+        result, event1 = krnl(a, queue=queue, wait_for=a.events,
+                return_event=True)
         result.add_event(event1)
         return result
 
@@ -2604,7 +2609,7 @@ def _make_subset_minmax_kernel(what):
         from pyopencl.reduction import get_subset_minmax_kernel
         krnl = get_subset_minmax_kernel(a.context, what, a.dtype, subset.dtype)
         result, event1 = krnl(subset, a,  queue=queue, slice=slice,
-            wait_for=a.events + subset.events, return_event=True)
+                wait_for=a.events + subset.events, return_event=True)
         result.add_event(event1)
         return result
     return f
diff --git a/test/test_array.py b/test/test_array.py
index c9771ac0b5bd15153080bd06a5dbdb658009b4ed..05008c169ae782a49b5b985c7a79780e337c5770 100644
--- a/test/test_array.py
+++ b/test/test_array.py
@@ -1212,39 +1212,45 @@ def test_multi_put(ctx_factory):
 
     assert np.all(np.all(out_compare[i] == out_arrays[i].get()) for i in range(9))
 
+
 def test_outoforderqueue_get(ctx_factory):
     context = ctx_factory()
     try:
-        queue = cl.CommandQueue(context, properties=cl.command_queue_properties.OUT_OF_ORDER_EXEC_MODE_ENABLE)
+        queue = cl.CommandQueue(context,
+               properties=cl.command_queue_properties.OUT_OF_ORDER_EXEC_MODE_ENABLE)
     except Exception:
         pytest.skip("out-of-order queue not available")
     a = np.random.rand(10**6).astype(np.dtype('float32'))
     a_gpu = cl_array.to_device(queue, a)
     b_gpu = a_gpu + a_gpu**5 + 1
-    b1 = b_gpu.get() # testing that this waits for events
+    b1 = b_gpu.get()  # testing that this waits for events
     b = a + a**5 + 1
     assert np.abs(b1 - b).mean() < 1e-5
 
+
 def test_outoforderqueue_copy(ctx_factory):
     context = ctx_factory()
     try:
-        queue = cl.CommandQueue(context, properties=cl.command_queue_properties.OUT_OF_ORDER_EXEC_MODE_ENABLE)
+        queue = cl.CommandQueue(context,
+               properties=cl.command_queue_properties.OUT_OF_ORDER_EXEC_MODE_ENABLE)
     except Exception:
         pytest.skip("out-of-order queue not available")
     a = np.random.rand(10**6).astype(np.dtype('float32'))
     a_gpu = cl_array.to_device(queue, a)
     c_gpu = a_gpu**2 - 7
-    b_gpu = c_gpu.copy() # testing that this waits for and creates events
+    b_gpu = c_gpu.copy()  # testing that this waits for and creates events
     b_gpu *= 10
     queue.finish()
     b1 = b_gpu.get()
     b = 10 * (a**2 - 7)
     assert np.abs(b1 - b).mean() < 1e-5
 
+
 def test_outoforderqueue_indexing(ctx_factory):
     context = ctx_factory()
     try:
-        queue = cl.CommandQueue(context, properties=cl.command_queue_properties.OUT_OF_ORDER_EXEC_MODE_ENABLE)
+        queue = cl.CommandQueue(context,
+               properties=cl.command_queue_properties.OUT_OF_ORDER_EXEC_MODE_ENABLE)
     except Exception:
         pytest.skip("out-of-order queue not available")
     a = np.random.rand(10**6).astype(np.dtype('float32'))
@@ -1262,20 +1268,24 @@ def test_outoforderqueue_indexing(ctx_factory):
     b[i + 10000] = c - 10
     assert np.abs(b1 - b).mean() < 1e-5
 
+
 def test_outoforderqueue_reductions(ctx_factory):
     context = ctx_factory()
     try:
-        queue = cl.CommandQueue(context, properties=cl.command_queue_properties.OUT_OF_ORDER_EXEC_MODE_ENABLE)
+        queue = cl.CommandQueue(context,
+               properties=cl.command_queue_properties.OUT_OF_ORDER_EXEC_MODE_ENABLE)
     except Exception:
         pytest.skip("out-of-order queue not available")
-    a = (np.random.rand(10**6) > 0.5).astype(np.dtype('float32')) # 0/1 values to avoid accumulated rounding error
-    a[800000] = 10 # all<5 looks true until near the end
+    # 0/1 values to avoid accumulated rounding error
+    a = (np.random.rand(10**6) > 0.5).astype(np.dtype('float32'))
+    a[800000] = 10  # all<5 looks true until near the end
     a_gpu = cl_array.to_device(queue, a)
     b1 = cl_array.sum(a_gpu).get()
     b2 = cl_array.dot(a_gpu, 3 - a_gpu).get()
     b3 = (a_gpu < 5).all().get()
     assert b1 == a.sum() and b2 == a.dot(3 - a) and b3 == 0
 
+
 if __name__ == "__main__":
     # make sure that import failures get reported, instead of skipping the
     # tests.
diff --git a/test/test_clmath.py b/test/test_clmath.py
index f4a559367539f7df23e8124c6722c63a34b20d27..beebc2a8c0ad717e7139c340bff14a07fb77b60c 100644
--- a/test/test_clmath.py
+++ b/test/test_clmath.py
@@ -446,15 +446,18 @@ def test_hankel_01_complex(ctx_factory, ref_src):
         pt.loglog(np.abs(z), rel_err_h1)
         pt.show()
 
+
 def test_outoforderqueue_clmath(ctx_factory):
     context = ctx_factory()
     try:
-        queue = cl.CommandQueue(context, properties=cl.command_queue_properties.OUT_OF_ORDER_EXEC_MODE_ENABLE)
+        queue = cl.CommandQueue(context,
+               properties=cl.command_queue_properties.OUT_OF_ORDER_EXEC_MODE_ENABLE)
     except Exception:
         pytest.skip("out-of-order queue not available")
     a = np.random.rand(10**6).astype(np.dtype('float32'))
     a_gpu = cl_array.to_device(queue, a)
-    b_gpu = clmath.fabs(clmath.sin(a_gpu * 5)) # testing that clmath functions wait for and create events
+    # testing that clmath functions wait for and create events
+    b_gpu = clmath.fabs(clmath.sin(a_gpu * 5))
     queue.finish()
     b1 = b_gpu.get()
     b = np.abs(np.sin(a * 5))