diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index 079c6d2ad68f6dd39cdfdd3c77158ae0e3de1ec5..48db98503e76d4144e08395df1bfd90f2a8490f2 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -12,6 +12,7 @@
   - opengl
   except:
   - tags
+
 Python 3.5 Intel CPU:
   script:
   - export PY_EXE=python3.5
@@ -48,6 +49,7 @@ Python 2.6 AMD CPU:
   - amd-cl-cpu
   except:
   - tags
+
 Python 3.5 Titan X:
   script:
   - export PY_EXE=python3.5
@@ -60,6 +62,7 @@ Python 3.5 Titan X:
   - nvidia-titan-x
   except:
   - tags
+
 Python 3.5 K40:
   script:
   - export PY_EXE=python3.5
@@ -72,6 +75,7 @@ Python 3.5 K40:
   - nvidia-k40
   except:
   - tags
+
 Python 3.5 AMD GPU:
   script:
   - export PY_EXE=python3.5
@@ -84,6 +88,7 @@ Python 3.5 AMD GPU:
   - amd-fiji
   except:
   - tags
+
 Python 3.5 POCL CL 1.1:
   script:
   - export PY_EXE=python3.5
@@ -97,6 +102,7 @@ Python 3.5 POCL CL 1.1:
   - pocl
   except:
   - tags
+
 Python 2.7 POCL:
   script:
   - export PY_EXE=python2.7
@@ -109,6 +115,7 @@ Python 2.7 POCL:
   - pocl
   except:
   - tags
+
 Python 2.7 Apple:
   script:
   - export PY_EXE=python2.7
@@ -121,6 +128,7 @@ Python 2.7 Apple:
   - apple
   except:
   - tags
+
 PyPy POCL:
   script:
   - export PY_EXE=pypy
@@ -133,6 +141,7 @@ PyPy POCL:
   - pocl
   except:
   - tags
+
 Documentation:
   script:
   - EXTRA_INSTALL="numpy mako"
@@ -142,3 +151,12 @@ Documentation:
   - python3.5
   only:
   - master
+
+Flake8:
+  script:
+  - curl -L -O -k https://gitlab.tiker.net/inducer/ci-support/raw/master/prepare-and-run-flake8.sh
+  - ". ./prepare-and-run-flake8.sh pyopencl test"
+  tags:
+  - python3.5
+  except:
+  - tags
diff --git a/pyopencl/__init__.py b/pyopencl/__init__.py
index a51e5ddce1fdfbc69546b9c70304e2097a1ba6fb..fbe01041cca71e91ed0174f1d8b141c9f04edff6 100644
--- a/pyopencl/__init__.py
+++ b/pyopencl/__init__.py
@@ -488,6 +488,7 @@ class Program(object):
     def __hash__(self):
         return hash(self._get_prg())
 
+
 _add_get_info_attrs(Program, Program.get_info, program_info)
 
 
@@ -636,6 +637,7 @@ def create_some_context(interactive=None, answers=None, cache_dir=None):
 
     return Context(devices, cache_dir=cache_dir)
 
+
 _csc = create_some_context
 
 # }}}
diff --git a/pyopencl/_buffers.py b/pyopencl/_buffers.py
index cd73cdb772c52bbe71fbbcbf76a30ab04b1484ab..27cf4f4cdf1766bf67a8c594dbde63f81d3ee7ef 100644
--- a/pyopencl/_buffers.py
+++ b/pyopencl/_buffers.py
@@ -108,7 +108,8 @@ try:
     CheckBuffer.restype = ctypes.c_int
 except AttributeError as err:
     # Python 2.6 doesn't appear to have CheckBuffer support...
-    CheckBuffer = lambda x: True
+    def CheckBuffer(x):  # noqa
+        return True
 
 IncRef = ctypes.pythonapi.Py_IncRef
 IncRef.argtypes = [ctypes.py_object]
diff --git a/pyopencl/_mymako.py b/pyopencl/_mymako.py
index ce6943fb634ddbcd06fea2f7c3e1016648d23c3c..78061f31e6baf7e300e0caa95ce6a175f31e9823 100644
--- a/pyopencl/_mymako.py
+++ b/pyopencl/_mymako.py
@@ -1,6 +1,6 @@
 from __future__ import absolute_import
 try:
-    import mako.template
+    import mako.template  # noqa
 except ImportError:
     raise ImportError(
             "Some of PyOpenCL's facilities require the Mako templating engine.\n"
@@ -12,4 +12,4 @@ except ImportError:
             "- aptitude install python-mako\n"
             "\nor whatever else is appropriate for your system.")
 
-from mako import *
+from mako import *  # noqa
diff --git a/pyopencl/algorithm.py b/pyopencl/algorithm.py
index af89eac2eebb42f9d9bbb35a32a40c623b615096..328a1f774189fcd8d2847ba29c9259b3252a3962 100644
--- a/pyopencl/algorithm.py
+++ b/pyopencl/algorithm.py
@@ -300,6 +300,7 @@ def _make_sort_scan_type(device, bits, index_dtype):
     dtype = get_or_register_dtype(name, dtype)
     return name, dtype, c_decl
 
+
 # {{{ types, helpers preamble
 
 RADIX_SORT_PREAMBLE_TPL = Template(r"""//CL//
diff --git a/pyopencl/array.py b/pyopencl/array.py
index 7fe53fb4173d95235c3be04721b6188c20120c4f..a7a2a04c7f7a4f2492628bbac3a2cf6a0150705a 100644
--- a/pyopencl/array.py
+++ b/pyopencl/array.py
@@ -48,6 +48,7 @@ def _get_common_dtype(obj1, obj2, queue):
     return _get_common_dtype_base(obj1, obj2,
             has_double_support(queue.device))
 
+
 # Work around PyPy not currently supporting the object dtype.
 # (Yes, it doesn't even support checking!)
 # (as of May 27, 2014 on PyPy 2.3)
@@ -150,6 +151,7 @@ def _create_vector_types():
             vec.types[np.dtype(base_type), count] = dtype
             vec.type_to_scalar_and_count[dtype] = np.dtype(base_type), count
 
+
 _create_vector_types()
 
 # }}}
@@ -1871,7 +1873,7 @@ def as_strided(ary, shape=None, strides=None):
 
 # {{{ creation helpers
 
-class _same_as_transfer(object):
+class _same_as_transfer(object):  # noqa
     pass
 
 
@@ -2493,6 +2495,7 @@ def _make_minmax_kernel(what):
 
     return f
 
+
 min = _make_minmax_kernel("min")
 min.__doc__ = """
     .. versionadded:: 2011.1
@@ -2512,6 +2515,7 @@ def _make_subset_minmax_kernel(what):
 
     return f
 
+
 subset_min = _make_subset_minmax_kernel("min")
 subset_min.__doc__ = """.. versionadded:: 2011.1"""
 subset_max = _make_subset_minmax_kernel("max")
diff --git a/pyopencl/bitonic_sort_templates.py b/pyopencl/bitonic_sort_templates.py
index 279b814f32b2fa327bd26805046f19ff2361bd73..ec9e8d349c008bcec062339645748b1ba0431fbf 100644
--- a/pyopencl/bitonic_sort_templates.py
+++ b/pyopencl/bitonic_sort_templates.py
@@ -36,7 +36,7 @@ LOCAL_MEM_FACTOR = 1
 
 # {{{ defines
 
-defines = """//CL//
+defines = """//CL//  # noqa
 
 % if dtype == "double":
     #if __OPENCL_C_VERSION__ < 120
@@ -78,7 +78,7 @@ typedef ${idxtype} idx_t;
                         x[a] = (swap)?auxb:auxa; x[b] = (swap)?auxa:auxb;${NS}
                         y[a] = (swap)?auyb:auya; y[b] = (swap)?auya:auyb;}
 #define B2V(x,y,a)  { ORDERV(x,y,a,a+1) }
-#define B4V(x,y,a)  { for (int i4=0;i4<2;i4++) { ORDERV(x,y,a+i4,a+i4+2) } B2V(x,y,a) B2V(x,y,a+2) }
+#define B4V(x,y,a)  { for (int i4=0;i4<2;i4++) { ORDERV(x,y,a+i4,a+i4+2) } B2V(x,y,a) B2V(x,y,a+2) } 
 #define B8V(x,y,a)  { for (int i8=0;i8<4;i8++) { ORDERV(x,y,a+i8,a+i8+4) } B4V(x,y,a) B4V(x,y,a+4) }
 #define B16V(x,y,a) { for (int i16=0;i16<8;i16++) { ORDERV(x,y,a+i16,a+i16+8) } B8V(x,y,a) B8V(x,y,a+8) }
 % else:
@@ -328,7 +328,7 @@ __kernel void run(__global data_t * data\\
 
 # IF YOU REENABLE THIS, YOU NEED TO ADJUST LOCAL_MEM_FACTOR TO 4
 
-ParallelBitonic_C4 = """//CL//
+ParallelBitonic_C4 = """//CL//  # noqa
 //ParallelBitonic_C4
 __kernel void run\\
 % if argsort:
@@ -402,7 +402,7 @@ __kernel void run\\
 
 # {{{ local merge
 
-ParallelMerge_Local = """//CL//
+ParallelMerge_Local = """//CL//  # noqa
 // N threads, WG is workgroup size. Sort WG input blocks in each workgroup.
 __kernel void run(__global const data_t * in,__global data_t * out,__local data_t * aux)
 {
@@ -450,7 +450,7 @@ __kernel void run(__global const data_t * in,__global data_t * out,__local data_
 
 # {{{
 
-ParallelBitonic_Local = """//CL//
+ParallelBitonic_Local = """//CL//  # noqa
 // N threads, WG is workgroup size. Sort WG input blocks in each workgroup.
 __kernel void run(__global const data_t * in,__global data_t * out,__local data_t * aux)
 {
@@ -521,7 +521,7 @@ __kernel void ParallelBitonic_A(__global const data_t * in)
 
 # {{{ local optim
 
-ParallelBitonic_Local_Optim = """//CL//
+ParallelBitonic_Local_Optim = """//CL//  # noqa
 __kernel void run\\
 % if argsort:
 (__global data_t * data, __global idx_t * index, __local data_t * aux, __local idx_t * auy)
diff --git a/pyopencl/capture_call.py b/pyopencl/capture_call.py
index d7d2070c8e4f0891c2ddecb4c78572512c672ffb..c1950ecd56c721aef5747752a1154441f11b878a 100644
--- a/pyopencl/capture_call.py
+++ b/pyopencl/capture_call.py
@@ -93,7 +93,7 @@ def capture_kernel_call(kernel, filename, queue, g_size, l_size, *args, **kwargs
                     arg.dtype.type.__name__, repr(complex(arg))))
             else:
                 try:
-                    arg_buf = buffer(arg)
+                    arg_buf = memoryview(arg)
                 except:
                     raise RuntimeError("cannot capture: "
                             "unsupported arg nr %d (0-based)" % i)
@@ -150,7 +150,7 @@ def capture_kernel_call(kernel, filename, queue, g_size, l_size, *args, **kwargs
     for name, val in arg_data:
         cg("%s = (" % name)
         with Indentation(cg):
-            val = str(b64encode(compress(buffer(val))))
+            val = str(b64encode(compress(memoryview(val))))
             i = 0
             while i < len(val):
                 cg(repr(val[i:i+line_len]))
diff --git a/pyopencl/cffi_cl.py b/pyopencl/cffi_cl.py
index c1848a1e1d1b9b77ba20ef519b3acbe59932451d..16f18f1f4e8507ce56e12b344b9e19f5d68ebd13 100644
--- a/pyopencl/cffi_cl.py
+++ b/pyopencl/cffi_cl.py
@@ -498,6 +498,7 @@ class migrate_mem_object_flags_ext(_ConstantsNamespace):  # noqa
 
 # }}}
 
+
 _locals = locals()
 
 
@@ -2071,6 +2072,7 @@ class ProfilingInfoGetter:
         else:
             return self.event.get_profiling_info(inf_attr)
 
+
 Event.profile = property(ProfilingInfoGetter)
 
 
@@ -2609,6 +2611,7 @@ def _create_gl_enqueue(what):
         return Event._create(ptr_event[0])
     return enqueue_gl_objects
 
+
 if _lib.have_gl():
     enqueue_acquire_gl_objects = _create_gl_enqueue(
         _lib.enqueue_acquire_gl_objects)
@@ -3003,6 +3006,7 @@ class _ImageInfoGetter:
         else:
             return self.event.get_image_info(inf_attr)
 
+
 Image.image = property(_ImageInfoGetter)
 
 # }}}
@@ -3124,6 +3128,7 @@ def add_get_info_attrs(cls, info_method, info_class, cacheable_attrs=None):
             setattr(cls, info_lower, make_getinfo(
                     info_method, info_name, info_constant))
 
+
 add_get_info_attrs(Platform, Platform.get_info, platform_info),
 add_get_info_attrs(Device, Device.get_info, device_info,
                 ["PLATFORM", "MAX_WORK_GROUP_SIZE", "MAX_COMPUTE_UNITS"])
diff --git a/pyopencl/clmath.py b/pyopencl/clmath.py
index bd9290d6650d5b86ae2f37198d161f53ad5909c3..73d390cf850e9a054541209069c322e67adf402a 100644
--- a/pyopencl/clmath.py
+++ b/pyopencl/clmath.py
@@ -46,6 +46,7 @@ def _make_unary_array_func(name):
 
     return f
 
+
 # See table 6.8 in the CL 1.1 spec
 acos = _make_unary_array_func("acos")
 acosh = _make_unary_array_func("acosh")
@@ -171,6 +172,7 @@ def ldexp(significand, exponent, queue=None):
     _ldexp(result, significand, exponent)
     return result
 
+
 lgamma = _make_unary_array_func("lgamma")
 # TODO: lgamma_r
 
@@ -200,6 +202,7 @@ def modf(arg, queue=None):
     _modf(intpart, fracpart, arg, queue=queue)
     return fracpart, intpart
 
+
 nan = _make_unary_array_func("nan")
 
 # TODO: nextafter
diff --git a/pyopencl/scan.py b/pyopencl/scan.py
index e94af0d2de4325263dada0c8ce5375c7e74deb27..0ea9e01e28fdbeca4058290ca0352639f4597880 100644
--- a/pyopencl/scan.py
+++ b/pyopencl/scan.py
@@ -742,6 +742,7 @@ def _round_down_to_power_of_2(val):
     assert result <= val
     return result
 
+
 _PREFIX_WORDS = set("""
         ldata partial_scan_buffer global scan_offset
         segment_start_in_k_group carry
@@ -850,6 +851,7 @@ def _make_template(s):
 
     return mako.template.Template(s, strict_undefined=True)
 
+
 from pytools import Record
 
 
@@ -1412,6 +1414,7 @@ class GenericScanKernel(_GenericScanKernelBase):
 
 # }}}
 
+
 # {{{ debug kernel
 
 DEBUG_SCAN_TEMPLATE = SHARED_PREAMBLE + r"""//CL//
diff --git a/pyopencl/tools.py b/pyopencl/tools.py
index 4e59b388f9d68274d0823d3850af3f1cafbb432d..36fafbc381441139b0bc62bf185005678ba2856c 100644
--- a/pyopencl/tools.py
+++ b/pyopencl/tools.py
@@ -53,6 +53,7 @@ def _register_types():
     get_or_register_dtype("cfloat_t", np.complex64)
     get_or_register_dtype("cdouble_t", np.complex128)
 
+
 _register_types()
 
 
@@ -97,6 +98,7 @@ def first_arg_dependent_memoize(func, cl_object, *args):
         arg_dict[args] = result
         return result
 
+
 context_dependent_memoize = first_arg_dependent_memoize
 
 
@@ -159,6 +161,7 @@ def clear_first_arg_caches():
     for cache in _first_arg_dependent_caches:
         cache.clear()
 
+
 import atexit
 atexit.register(clear_first_arg_caches)
 
diff --git a/setup.cfg b/setup.cfg
index 512924240ea5f744029e9dd0395769ca66267ca1..2bc760d67cfc68d91478948399e51cf470abfe07 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -1,3 +1,4 @@
 [flake8]
 ignore = E126,E127,E128,E123,E226,E241,E242,E265,W503,E402
 max-line-length=85
+exclude=pyopencl/compyte/ndarray,pyopencl/compyte/array.py
diff --git a/test/test_algorithm.py b/test/test_algorithm.py
index b1a37fd1fc9c697eab3fb224f5322f69ff3af8aa..374381ede72ad66951d5146613f4f7a00cc9311a 100644
--- a/test/test_algorithm.py
+++ b/test/test_algorithm.py
@@ -511,6 +511,7 @@ def summarize_error(obtained, desired, orig, thresh=1e-5):
 
     return " ".join(entries)
 
+
 scan_test_counts = [
     10,
     2 ** 8 - 1,