diff --git a/contrib/fortran-to-opencl/translate.py b/contrib/fortran-to-opencl/translate.py
index 66f6e1dbfa80c2647313177730153ee75e80f4d0..371b012034300f7db6016468eeea0c9c3d448585 100644
--- a/contrib/fortran-to-opencl/translate.py
+++ b/contrib/fortran-to-opencl/translate.py
@@ -1,8 +1,4 @@
-from __future__ import division, with_statement
-from __future__ import absolute_import
-from __future__ import print_function
 import six
-from six.moves import range
 
 __copyright__ = "Copyright (C) 2009 Andreas Kloeckner"
 
@@ -366,12 +362,12 @@ class ComplexCCodeMapper(CCodeMapperBase):
 
             complex_sum = self.rec(complexes[0], myprec)
             for child in complexes[1:]:
-                complex_sum = "%s_add(%s, %s)" % (
+                complex_sum = "{}_add({}, {})".format(
                         tgt_name, complex_sum,
                         self.rec(child, PREC_NONE))
 
             if real_sum:
-                result = "%s_add(%s_fromreal(%s), %s)" % (
+                result = "{}_add({}_fromreal({}), {})".format(
                         tgt_name, tgt_name, real_sum, complex_sum)
             else:
                 result = complex_sum
@@ -402,12 +398,12 @@ class ComplexCCodeMapper(CCodeMapperBase):
 
             complex_prd = self.rec(complexes[0], myprec)
             for child in complexes[1:]:
-                complex_prd = "%s_mul(%s, %s)" % (
+                complex_prd = "{}_mul({}, {})".format(
                         tgt_name, complex_prd,
                         self.rec(child, PREC_NONE))
 
             if real_prd:
-                result = "%s_rmul(%s, %s)" % (tgt_name, real_prd, complex_prd)
+                result = f"{tgt_name}_rmul({real_prd}, {complex_prd})"
             else:
                 result = complex_prd
 
@@ -423,17 +419,17 @@ class ComplexCCodeMapper(CCodeMapperBase):
         if not (n_complex or d_complex):
             return CCodeMapperBase.map_quotient(self, expr, enclosing_prec)
         elif n_complex and not d_complex:
-            return "%s_divider(%s, %s)" % (
+            return "{}_divider({}, {})".format(
                     complex_type_name(tgt_dtype),
                     self.rec(expr.numerator, PREC_NONE),
                     self.rec(expr.denominator, PREC_NONE))
         elif not n_complex and d_complex:
-            return "%s_rdivide(%s, %s)" % (
+            return "{}_rdivide({}, {})".format(
                     complex_type_name(tgt_dtype),
                     self.rec(expr.numerator, PREC_NONE),
                     self.rec(expr.denominator, PREC_NONE))
         else:
-            return "%s_divide(%s, %s)" % (
+            return "{}_divide({}, {})".format(
                     complex_type_name(tgt_dtype),
                     self.rec(expr.numerator, PREC_NONE),
                     self.rec(expr.denominator, PREC_NONE))
@@ -460,12 +456,12 @@ class ComplexCCodeMapper(CCodeMapperBase):
                 e_complex = 'c' == self.infer_type(expr.exponent).kind
 
                 if b_complex and not e_complex:
-                    return "%s_powr(%s, %s)" % (
+                    return "{}_powr({}, {})".format(
                             complex_type_name(tgt_dtype),
                             self.rec(expr.base, PREC_NONE),
                             self.rec(expr.exponent, PREC_NONE))
                 else:
-                    return "%s_pow(%s, %s)" % (
+                    return "{}_pow({}, {})".format(
                             complex_type_name(tgt_dtype),
                             self.rec(expr.base, PREC_NONE),
                             self.rec(expr.exponent, PREC_NONE))
@@ -522,7 +518,7 @@ class CCodeMapper(ComplexCCodeMapper):
             if name == "dble":
                 name = "real"
 
-            name = "%s_%s" % (
+            name = "{}_{}".format(
                     complex_type_name(tgt_dtype),
                     name)
 
@@ -532,7 +528,7 @@ class CCodeMapper(ComplexCCodeMapper):
             if name == "aimag":
                 name = "imag"
 
-            name = "%s_%s" % (
+            name = "{}_{}".format(
                     complex_type_name(arg_dtype),
                     name)
 
@@ -568,7 +564,7 @@ class CCodeMapper(ComplexCCodeMapper):
         from pymbolic.mapper.stringifier import PREC_NONE
         if expr.dtype.kind == "c":
             r, i = expr.value
-            return "%s_new(%s, %s)" % (
+            return "{}_new({}, {})".format(
                     complex_type_name(expr.dtype),
                     self.rec(r, PREC_NONE),
                     self.rec(i, PREC_NONE))
@@ -581,7 +577,7 @@ class CCodeMapper(ComplexCCodeMapper):
 
 # }}}
 
-class Scope(object):
+class Scope:
     def __init__(self, subprogram_name, arg_names=set()):
         self.subprogram_name = subprogram_name
 
@@ -608,8 +604,8 @@ class Scope(object):
 
     def known_names(self):
         return (self.used_names
-                | set(six.iterkeys(self.dim_map))
-                | set(six.iterkeys(self.type_map)))
+                | set(self.dim_map.keys())
+                | set(self.type_map.keys()))
 
     def is_known(self, name):
         return (name in self.used_names
@@ -643,12 +639,12 @@ class Scope(object):
     def translate_var_name(self, name):
         shape = self.dim_map.get(name)
         if name in self.data and shape is not None:
-            return "%s_%s" % (self.subprogram_name, name)
+            return f"{self.subprogram_name}_{name}"
         else:
             return name
 
 
-class FTreeWalkerBase(object):
+class FTreeWalkerBase:
     def __init__(self):
         self.scope_stack = []
 
@@ -675,7 +671,7 @@ class FTreeWalkerBase(object):
 
     ENTITY_RE = re.compile(
             r"^(?P<name>[_0-9a-zA-Z]+)"
-            "(\((?P<shape>[-+*0-9:a-zA-Z,]+)\))?$")
+            r"(\((?P<shape>[-+*0-9:a-zA-Z,]+)\))?$")
 
     def parse_dimension_specs(self, dim_decls):
         def parse_bounds(bounds_str):
@@ -949,7 +945,7 @@ class F2CLTranslator(FTreeWalkerBase):
 
             if shape is not None:
                 dim_stmt = cgen.Statement(
-                    "dimension \"fortran\" %s[%s]" % (
+                    "dimension \"fortran\" {}[{}]".format(
                         scope.translate_var_name(name),
                         ", ".join(gen_shape(s) for s in shape)
                         ))
@@ -975,7 +971,7 @@ class F2CLTranslator(FTreeWalkerBase):
                             cgen.Initializer(
                                 CLConstant(
                                     cgen.ArrayOf(self.get_declarator(
-                                        "%s_%s" % (scope.subprogram_name, name)))),
+                                        f"{scope.subprogram_name}_{name}"))),
                                 "{ %s }" % ",\n".join(self.gen_expr(x) for x in data)
                                 ))
             else:
@@ -1231,11 +1227,11 @@ class F2CLTranslator(FTreeWalkerBase):
             cast = self.force_casts.get(
                     (node.designator, i))
             if cast is not None:
-                result = "(%s) (%s)" % (cast, result)
+                result = f"({cast}) ({result})"
 
             return result
 
-        return cgen.Statement("%s(%s)" % (
+        return cgen.Statement("{}({})".format(
             node.designator,
             ", ".join(transform_arg(i, arg_str)
                 for i, arg_str in enumerate(node.items))))
@@ -1328,9 +1324,9 @@ class F2CLTranslator(FTreeWalkerBase):
                 comp_op = "<="
 
             return cgen.For(
-                    "%s = %s" % (loop_var, self.gen_expr(start)),
-                    "%s %s %s" % (loop_var, comp_op, self.gen_expr(stop)),
-                    "%s += %s" % (loop_var, self.gen_expr(step)),
+                    "{} = {}".format(loop_var, self.gen_expr(start)),
+                    "{} {} {}".format(loop_var, comp_op, self.gen_expr(stop)),
+                    "{} += {}".format(loop_var, self.gen_expr(step)),
                     cgen.block_if_necessary(body))
 
         else:
diff --git a/doc/conf.py b/doc/conf.py
index 07353a2ad70c857bbe9fbe597a2180cd43d62b22..2b47e9e4bf04b05d848814a78d0ceccf0d78aa9d 100644
--- a/doc/conf.py
+++ b/doc/conf.py
@@ -1,7 +1,3 @@
-# -*- coding: utf-8 -*-
-
-from __future__ import absolute_import
-
 # PyOpenCL documentation build configuration file, created by
 # sphinx-quickstart on Fri Jun 13 00:51:19 2008.
 #
diff --git a/doc/make_constants.py b/doc/make_constants.py
index 9ab78ad070ec6d0cc419458335a75ed44f9c9a16..2e20383bab957c2d26e84f066804cd4ea925be4c 100644
--- a/doc/make_constants.py
+++ b/doc/make_constants.py
@@ -1,5 +1,3 @@
-from __future__ import absolute_import, print_function
-
 __copyright__ = "Copyright (C) 2009 Andreas Kloeckner"
 
 __license__ = """
diff --git a/examples/demo-struct-reduce.py b/examples/demo-struct-reduce.py
index 2b0d9803f1fdd32e85a2da7fe245297a8ac5cf95..c0c26e34743687a281c534c05d9c8cb74c6587ec 100644
--- a/examples/demo-struct-reduce.py
+++ b/examples/demo-struct-reduce.py
@@ -1,4 +1,3 @@
-from __future__ import absolute_import
 import numpy as np
 import pyopencl as cl
 
diff --git a/examples/demo.py b/examples/demo.py
index 59f7b3d45399777c36c49b0b1690953dbe3387cc..623660fee1b20b9ba140504ca594cc648e28bc45 100644
--- a/examples/demo.py
+++ b/examples/demo.py
@@ -1,7 +1,5 @@
 #!/usr/bin/env python
-# -*- coding: utf-8 -*-
 
-from __future__ import absolute_import, print_function
 import numpy as np
 import pyopencl as cl
 
diff --git a/examples/demo_array.py b/examples/demo_array.py
index c645b372632b8792d302658bbfa6c263b051491e..41b0f79ef2ccb74a807a8da5aff5eedf6a3bb15f 100644
--- a/examples/demo_array.py
+++ b/examples/demo_array.py
@@ -1,5 +1,3 @@
-from __future__ import absolute_import
-from __future__ import print_function
 import pyopencl as cl
 import pyopencl.array as cl_array
 import numpy
diff --git a/examples/demo_elementwise.py b/examples/demo_elementwise.py
index a8a3a007c094cf9b1ca7d3fc66142b7817a8b83d..21646c4f42a8cce495c02aef7beae5d4a2ceaffe 100644
--- a/examples/demo_elementwise.py
+++ b/examples/demo_elementwise.py
@@ -1,5 +1,3 @@
-from __future__ import absolute_import
-from __future__ import print_function
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
 
diff --git a/examples/demo_elementwise_complex.py b/examples/demo_elementwise_complex.py
index 9e04e2dd5a4f09c4235e860de1aa32dfc41a714f..4fe98ec9d0f0d514c84180e2775d84c7f808b152 100644
--- a/examples/demo_elementwise_complex.py
+++ b/examples/demo_elementwise_complex.py
@@ -1,5 +1,3 @@
-from __future__ import absolute_import
-from __future__ import print_function
 import pyopencl as cl
 import pyopencl.array as cl_array
 import numpy
diff --git a/examples/demo_mandelbrot.py b/examples/demo_mandelbrot.py
index 802dfb215802c70e86bdc7534d401b4efe2f173b..9753b3ad5d9287f1968bf1d182bf22c50fe9bb79 100644
--- a/examples/demo_mandelbrot.py
+++ b/examples/demo_mandelbrot.py
@@ -1,5 +1,3 @@
-from __future__ import absolute_import
-from __future__ import print_function
 # I found this example for PyCuda here:
 # http://wiki.tiker.net/PyCuda/Examples/Mandelbrot
 #
@@ -24,7 +22,6 @@ import time
 import numpy as np
 
 import pyopencl as cl
-from six.moves import range
 
 # You can choose a calculation routine below (calc_fractal), uncomment
 # one of the three lines to test the three variations
@@ -117,7 +114,7 @@ if __name__ == '__main__':
         import tkinter as tk
     from PIL import Image, ImageTk
 
-    class Mandelbrot(object):
+    class Mandelbrot:
         def __init__(self):
             # create window
             self.root = tk.Tk()
diff --git a/examples/demo_meta_codepy.py b/examples/demo_meta_codepy.py
index 7ab9958f490bb17b5a55b18c2e9649909ac8c703..c080109b9dcfe45c16525db2eaa7709f9250b3a9 100644
--- a/examples/demo_meta_codepy.py
+++ b/examples/demo_meta_codepy.py
@@ -1,8 +1,6 @@
-from __future__ import absolute_import
 import pyopencl as cl
 import numpy
 import numpy.linalg as la
-from six.moves import range
 
 local_size = 256
 thread_strides = 32
diff --git a/examples/demo_meta_template.py b/examples/demo_meta_template.py
index 76b5f65bf88ba938273b640831a998f93cd94812..fc64934385b58c7ac6a2d5b72a5b4fb1327de688 100644
--- a/examples/demo_meta_template.py
+++ b/examples/demo_meta_template.py
@@ -1,4 +1,3 @@
-from __future__ import absolute_import
 import pyopencl as cl
 import numpy
 import numpy.linalg as la
diff --git a/examples/download-examples-from-wiki.py b/examples/download-examples-from-wiki.py
index 0f8ea87527baeb492f2a264c476ac45f4ccff02d..13fd8fb7a09ecbd4d18f8055377047bc1a00ac4c 100755
--- a/examples/download-examples-from-wiki.py
+++ b/examples/download-examples-from-wiki.py
@@ -1,6 +1,5 @@
 #! /usr/bin/env python
 
-from __future__ import absolute_import, print_function
 
 import six.moves.xmlrpc_client
 destwiki = six.moves.xmlrpc_client.ServerProxy("http://wiki.tiker.net?action=xmlrpc2")
@@ -53,6 +52,6 @@ for page in all_pages:
                 outf.close()
 
     except Exception as e:
-        print("Error when processing %s: %s" % (page, e))
+        print(f"Error when processing {page}: {e}")
         from traceback import print_exc
         print_exc()
diff --git a/examples/dump-performance.py b/examples/dump-performance.py
index 00df1d1bad6e62fc284eb7fa7ce18731255fabc4..f582cd99fcae98df7325717b4e1541dbf873bbcb 100644
--- a/examples/dump-performance.py
+++ b/examples/dump-performance.py
@@ -1,7 +1,5 @@
-from __future__ import division, absolute_import, print_function
 import pyopencl as cl
 import pyopencl.characterize.performance as perf
-from six.moves import range
 
 
 def main():
@@ -9,7 +7,7 @@ def main():
 
     prof_overhead, latency = perf.get_profiling_overhead(ctx)
     print("command latency: %g s" % latency)
-    print("profiling overhead: %g s -> %.1f %%" % (
+    print("profiling overhead: {:g} s -> {:.1f} %".format(
             prof_overhead, 100*prof_overhead/latency))
     queue = cl.CommandQueue(
             ctx, properties=cl.command_queue_properties.PROFILING_ENABLE)
diff --git a/examples/dump-properties.py b/examples/dump-properties.py
index e64f66fa25c9d0e47af70f9409b9ddd2b5aa424d..07d9159827c315605286d46a4f7de494b7d7489e 100644
--- a/examples/dump-properties.py
+++ b/examples/dump-properties.py
@@ -1,5 +1,3 @@
-from __future__ import absolute_import
-from __future__ import print_function
 import pyopencl as cl
 from optparse import OptionParser
 
@@ -21,13 +19,13 @@ def print_info(obj, info_cls):
 
             if (info_cls == cl.device_info and info_name == "PARTITION_TYPES_EXT"
                     and isinstance(info_value, list)):
-                print("%s: %s" % (info_name, [
+                print("{}: {}".format(info_name, [
                     cl.device_partition_property_ext.to_string(v,
                         "<unknown device partition property %d>")
                     for v in info_value]))
             else:
                 try:
-                    print("%s: %s" % (info_name, info_value))
+                    print(f"{info_name}: {info_value}")
                 except:
                     print("%s: <error>" % info_name)
 
@@ -72,13 +70,13 @@ for platform in cl.get_platforms():
                             return result
 
                         formats = ", ".join(
-                                "%s-%s" % (
+                                "{}-{}".format(
                                     cl.channel_order.to_string(iform.channel_order,
                                         "<unknown channel order 0x%x>"),
                                     str_chd_type(iform.channel_data_type))
                                 for iform in formats)
 
-                    print("%s %s FORMATS: %s\n" % (
+                    print("{} {} FORMATS: {}\n".format(
                             cl.mem_object_type.to_string(itype),
                             cl.mem_flags.to_string(mf),
                             formats))
diff --git a/examples/gl_interop_demo.py b/examples/gl_interop_demo.py
index da5ba3b0d6c84216a4a2273134c7cacef3a26b1f..99524cb30b3662b09aa4599d14d6df259ff6f340 100644
--- a/examples/gl_interop_demo.py
+++ b/examples/gl_interop_demo.py
@@ -1,4 +1,3 @@
-from __future__ import absolute_import
 from OpenGL.GL import *
 from OpenGL.GLUT import *
 from OpenGL.raw.GL.VERSION.GL_1_5 import glBufferData as rawGlBufferData
diff --git a/examples/gl_particle_animation.py b/examples/gl_particle_animation.py
index dd2f05c24686cc9cd777923b45de8963ed1f58b3..1d838a2a4a0884dc53f7d24e8319336c5b7ca3ee 100644
--- a/examples/gl_particle_animation.py
+++ b/examples/gl_particle_animation.py
@@ -1,4 +1,3 @@
-from __future__ import absolute_import
 # Visualization of particles with gravity
 # Source: http://enja.org/2010/08/27/adventures-in-opencl-part-2-particles-with-opengl/
 
diff --git a/examples/narray.py b/examples/narray.py
index 78b9bb9205b326b207730d411524fad93fd2c142..40ba945042b8d6337d7d4139deb1991d20532d81 100644
--- a/examples/narray.py
+++ b/examples/narray.py
@@ -1,5 +1,3 @@
-from __future__ import absolute_import
-from __future__ import print_function
 # example by Roger Pau Monn'e
 import pyopencl as cl
 import numpy as np
diff --git a/examples/print-binary.py b/examples/print-binary.py
index c7ea523947f522f9165399a52f483842d21d8744..d45c1d0fe67989eda42342d8b0dee4c90bfcc616 100755
--- a/examples/print-binary.py
+++ b/examples/print-binary.py
@@ -1,12 +1,11 @@
 #! /usr/bin/env python
 
-from __future__ import division
 
 import pyopencl as cl
 import sys
 
 ctx = cl.create_some_context()
-with open(sys.argv[1], "r") as inf:
+with open(sys.argv[1]) as inf:
     src = inf.read()
 
 prg = cl.Program(ctx, src).build()
diff --git a/examples/transpose.py b/examples/transpose.py
index 99f68a28e4bc97e889248e01e7d145172587cf3f..9b07e2b0566be8f0c02677a9c8cfb53448654a0e 100644
--- a/examples/transpose.py
+++ b/examples/transpose.py
@@ -1,13 +1,9 @@
 # Transposition of a matrix
 # originally for PyCUDA by Hendrik Riedmann <riedmann@dam.brown.edu>
 
-from __future__ import division
-from __future__ import absolute_import
-from __future__ import print_function
 import pyopencl as cl
 import numpy
 import numpy.linalg as la
-from six.moves import range
 
 
 
diff --git a/pyopencl/__init__.py b/pyopencl/__init__.py
index 013cc6031d8922ded3230647e22881b4be0d1102..4e2ef296eea79b54d3ececefcfda9c7a8e40ad66 100644
--- a/pyopencl/__init__.py
+++ b/pyopencl/__init__.py
@@ -1,7 +1,3 @@
-# -*- coding: utf-8 -*-
-
-from __future__ import division, absolute_import, print_function
-
 __copyright__ = "Copyright (C) 2009-15 Andreas Kloeckner"
 
 __license__ = """
@@ -25,7 +21,7 @@ THE SOFTWARE.
 """
 
 import six
-from six.moves import input, intern
+from six.moves import intern
 
 from pyopencl.version import VERSION, VERSION_STATUS, VERSION_TEXT  # noqa
 
@@ -53,8 +49,7 @@ import numpy as np
 
 import sys
 
-_PYPY = '__pypy__' in sys.builtin_module_names
-_CPY2 = not _PYPY and sys.version_info < (3,)
+_PYPY = "__pypy__" in sys.builtin_module_names
 
 from pyopencl._cl import (  # noqa
         get_cl_header_version,
@@ -264,7 +259,7 @@ def _find_pyopencl_include_path():
         # Try to find the include path in the same directory as this file
         include_path = join(abspath(dirname(__file__)), "cl")
         if not exists(include_path):
-            raise IOError("unable to find pyopencl include path")
+            raise OSError("unable to find pyopencl include path")
     except Exception:
         # Try to find the resource with pkg_resources (the recommended
         # setuptools approach). This is very slow.
@@ -272,11 +267,11 @@ def _find_pyopencl_include_path():
         include_path = resource_filename(
                 Requirement.parse("pyopencl"), "pyopencl/cl")
         if not exists(include_path):
-            raise IOError("unable to find pyopencl include path")
+            raise OSError("unable to find pyopencl include path")
 
     # Quote the path if it contains a space and is not quoted already.
     # See https://github.com/inducer/pyopencl/issues/250 for discussion.
-    if ' ' in include_path and not include_path.startswith('"'):
+    if " " in include_path and not include_path.startswith('"'):
         return '"' + include_path + '"'
     else:
         return include_path
@@ -287,15 +282,15 @@ def _find_pyopencl_include_path():
 # {{{ build option munging
 
 def _split_options_if_necessary(options):
-    if isinstance(options, six.string_types):
+    if isinstance(options, str):
         import shlex
         if six.PY2:
             # shlex.split takes bytes (py2 str) on py2
-            if isinstance(options, six.text_type):
+            if isinstance(options, str):
                 options = options.encode("utf-8")
         else:
             # shlex.split takes unicode (py3 str) on py3
-            if isinstance(options, six.binary_type):
+            if isinstance(options, bytes):
                 options = options.decode("utf-8")
 
         options = shlex.split(options)
@@ -333,7 +328,7 @@ def _find_include_path(options):
 
 def _options_to_bytestring(options):
     def encode_if_necessary(s):
-        if isinstance(s, six.text_type):
+        if isinstance(s, str):
             return s.encode("utf-8")
         else:
             return s
@@ -377,7 +372,7 @@ def enable_debugging(platform_or_context):
                 % platform.name)
 
 
-class Program(object):
+class Program:
     def __init__(self, arg1, arg2=None, arg3=None):
         if arg2 is None:
             # 1-argument form: program
@@ -396,7 +391,7 @@ class Program(object):
                 return
 
             import sys
-            if isinstance(source, six.text_type) and sys.version_info < (3,):
+            if isinstance(source, str) and sys.version_info < (3,):
                 from warnings import warn
                 warn("Received OpenCL source code in Unicode, "
                      "should be ASCII string. Attempting conversion.",
@@ -492,7 +487,7 @@ class Program(object):
                 self._context, options)
 
         if cache_dir is None:
-            cache_dir = getattr(self._context, 'cache_dir', None)
+            cache_dir = getattr(self._context, "cache_dir", None)
 
         import os
         build_descr = None
@@ -624,7 +619,7 @@ def _add_functionality():
     # {{{ Platform
 
     def platform_repr(self):
-        return "<pyopencl.Platform '%s' at 0x%x>" % (self.name, self.int_ptr)
+        return f"<pyopencl.Platform '{self.name}' at 0x{self.int_ptr:x}>"
 
     Platform.__repr__ = platform_repr
     Platform._get_cl_version = generic_get_cl_version
@@ -634,7 +629,7 @@ def _add_functionality():
     # {{{ Device
 
     def device_repr(self):
-        return "<pyopencl.Device '%s' on '%s' at 0x%x>" % (
+        return "<pyopencl.Device '{}' on '{}' at 0x{:x}>".format(
                 self.name.strip(), self.platform.name.strip(), self.int_ptr)
 
     def device_hashable_model_and_version_identifier(self):
@@ -673,7 +668,7 @@ def _add_functionality():
         context_old_init(self, devices, properties, dev_type)
 
     def context_repr(self):
-        return "<pyopencl.Context at 0x%x on %s>" % (self.int_ptr,
+        return "<pyopencl.Context at 0x{:x} on {}>".format(self.int_ptr,
                 ", ".join(repr(dev) for dev in self.devices))
 
     def context_get_cl_version(self):
@@ -722,7 +717,7 @@ def _add_functionality():
             self._build(options=options_bytes, devices=devices)
         except Error as e:
             msg = str(e) + "\n\n" + (75*"="+"\n").join(
-                    "Build on %s:\n\n%s" % (dev, log)
+                    f"Build on {dev}:\n\n{log}"
                     for dev, log in self._get_build_logs())
             code = e.code
             routine = e.routine
@@ -739,7 +734,7 @@ def _add_functionality():
             raise err
 
         message = (75*"="+"\n").join(
-                "Build on %s succeeded, but said:\n\n%s" % (dev, log)
+                f"Build on {dev} succeeded, but said:\n\n{log}"
                 for dev, log in self._get_build_logs()
                 if log is not None and log.strip())
 
@@ -892,7 +887,7 @@ def _add_functionality():
     # {{{ ImageFormat
 
     def image_format_repr(self):
-        return "ImageFormat(%s, %s)" % (
+        return "ImageFormat({}, {})".format(
                 channel_order.to_string(self.channel_order,
                     "<unknown channel order 0x%x>"),
                 channel_type.to_string(self.channel_data_type,
@@ -1047,7 +1042,7 @@ def _add_functionality():
                         val.code(), "<unknown error %d>")
             routine = val.routine()
             if routine:
-                result = "%s failed: %s" % (routine, result)
+                result = f"{routine} failed: {result}"
             what = val.what()
             if what:
                 if result:
@@ -1343,8 +1338,8 @@ def _add_functionality():
         return property(result)
 
     for cls, (info_method, info_class, cacheable_attrs) \
-            in six.iteritems(cls_to_info_cls):
-        for info_name, info_value in six.iteritems(info_class.__dict__):
+            in cls_to_info_cls.items():
+        for info_name, info_value in info_class.__dict__.items():
             if info_name == "to_string" or info_name.startswith("_"):
                 continue
 
@@ -1413,7 +1408,7 @@ def create_some_context(interactive=None, answers=None):
         if answers:
             return str(answers.pop(0))
         elif not interactive:
-            return ''
+            return ""
         else:
             user_input = input(prompt)
             user_inputs.append(user_input)
@@ -1511,7 +1506,7 @@ _csc = create_some_context
 
 # {{{ SVMMap
 
-class SVMMap(object):
+class SVMMap:
     """
     .. attribute:: event
 
@@ -1971,11 +1966,7 @@ def svm_empty(ctx, flags, shape, dtype, order="C", alignment=None):
         for dim in shape:
             s *= dim
     except TypeError:
-        import sys
-        if sys.version_info >= (3,):
-            admissible_types = (int, np.integer)
-        else:
-            admissible_types = (np.integer,) + six.integer_types
+        admissible_types = (int, np.integer)
 
         if not isinstance(shape, admissible_types):
             raise TypeError("shape must either be iterable or "
diff --git a/pyopencl/_buffers.py b/pyopencl/_buffers.py
deleted file mode 100644
index bbf81a2fe3bb631dd9d28f13b86caa56a4fb84bc..0000000000000000000000000000000000000000
--- a/pyopencl/_buffers.py
+++ /dev/null
@@ -1,123 +0,0 @@
-#! /usr/bin/env python
-# Shamelessly stolen from pyopengl-ctypes on 2015-06-21.
-#
-# Original file here:
-# http://bazaar.launchpad.net/~mcfletch/pyopengl/trunk/view/head:/OpenGL/arrays/_buffers.py
-
-"""Python 3.x buffer-handling (currently just for bytes/bytearray types)
-"""
-
-import ctypes
-import sys
-
-if sys.version_info[:2] < (2, 6):
-    raise ImportError('Buffer interface only usable on Python 2.6+')
-
-PyBUF_SIMPLE = 0
-PyBUF_WRITABLE = PyBUF_WRITEABLE = 0x0001
-PyBUF_ND = 0x0008
-PyBUF_STRIDES = (0x0010 | PyBUF_ND)
-PyBUF_CONTIG = (PyBUF_ND | PyBUF_WRITABLE)
-PyBUF_CONTIG_RO = (PyBUF_ND)
-PyBUF_C_CONTIGUOUS = (0x0020 | PyBUF_STRIDES)
-PyBUF_F_CONTIGUOUS = (0x0040 | PyBUF_STRIDES)
-PyBUF_ANY_CONTIGUOUS = (0x0080 | PyBUF_STRIDES)
-PyBUF_FORMAT = 0x0004
-
-# Python 2.6 doesn't define this...
-c_ssize_t = getattr(ctypes, 'c_ssize_t', ctypes.c_ulong)
-
-_fields_ = [
-    ('buf', ctypes.c_void_p),
-    ('obj', ctypes.c_void_p),
-    ('len', c_ssize_t),
-    ('itemsize', c_ssize_t),
-
-    ('readonly', ctypes.c_int),
-    ('ndim', ctypes.c_int),
-    ('format', ctypes.c_char_p),
-    ('shape', ctypes.POINTER(c_ssize_t)),
-    ('strides', ctypes.POINTER(c_ssize_t)),
-    ('suboffsets', ctypes.POINTER(c_ssize_t)),
-]
-
-
-if sys.version_info[:2] <= (2, 6) or sys.version_info[:2] >= (3, 3):
-    # Original structure was eventually restored in 3.3, so just
-    # 2.7 through 3.2 uses the "enhanced" structure below
-    _fields_.extend([
-        ('internal', ctypes.c_void_p),
-    ])
-
-else:
-    # Sigh, this structure seems to have changed with Python 3.x...
-    _fields_.extend([
-        ('smalltable', ctypes.c_size_t*2),
-        ('internal', ctypes.c_void_p),
-    ])
-
-
-class Py_buffer(ctypes.Structure):  # noqa
-    @classmethod
-    def from_object(cls, obj, flags):
-        """Create a new Py_buffer referencing ram of object"""
-        if not CheckBuffer(obj):
-            raise TypeError(
-                    "%s type does not support Buffer Protocol" % (obj.__class__,))
-        buf = cls()
-
-        # deallocation of the buf causes glibc abort :(
-        result = GetBuffer(obj, buf, flags)
-
-        if result != 0:
-            raise ValueError("Unable to retrieve Buffer from %s" % (obj,))
-        if not buf.buf:
-            raise ValueError("Null pointer result from %s" % (obj,))
-        return buf
-
-    _fields_ = _fields_
-
-    @property
-    def dims(self):
-        return self.shape[:self.ndim]
-
-    def __len__(self):
-        return self.shape[0]
-
-    @property
-    def dim_strides(self):
-        if self.strides:
-            return self.strides[:self.ndim]
-        return None
-
-    def __enter__(self):
-        return self
-
-    def __exit__(self, exc_type=None, exc_value=None, traceback=None):
-        if self.obj:
-            ReleaseBuffer(self)
-
-    def __del__(self):
-        if self.obj:
-            ReleaseBuffer(self)
-
-
-try:
-    CheckBuffer = ctypes.pythonapi.PyObject_CheckBuffer
-    CheckBuffer.argtypes = [ctypes.py_object]
-    CheckBuffer.restype = ctypes.c_int
-except AttributeError:
-    # Python 2.6 doesn't appear to have CheckBuffer support...
-    def CheckBuffer(x):  # noqa
-        return True
-
-IncRef = ctypes.pythonapi.Py_IncRef
-IncRef.argtypes = [ctypes.py_object]
-
-GetBuffer = ctypes.pythonapi.PyObject_GetBuffer
-GetBuffer.argtypes = [ctypes.py_object, ctypes.POINTER(Py_buffer), ctypes.c_int]
-GetBuffer.restype = ctypes.c_int
-
-ReleaseBuffer = ctypes.pythonapi.PyBuffer_Release
-ReleaseBuffer.argtypes = [ctypes.POINTER(Py_buffer)]
-ReleaseBuffer.restype = None
diff --git a/pyopencl/_mymako.py b/pyopencl/_mymako.py
index 78061f31e6baf7e300e0caa95ce6a175f31e9823..5d5e92f81b2307d6104c2213af8a8bf8da6fd0ad 100644
--- a/pyopencl/_mymako.py
+++ b/pyopencl/_mymako.py
@@ -1,4 +1,3 @@
-from __future__ import absolute_import
 try:
     import mako.template  # noqa
 except ImportError:
diff --git a/pyopencl/algorithm.py b/pyopencl/algorithm.py
index 8390b3dfb5bfa932dae7af93755f6a9c383ed58a..197ad94839745eb62934db9b4993a45e56038244 100644
--- a/pyopencl/algorithm.py
+++ b/pyopencl/algorithm.py
@@ -1,6 +1,5 @@
 """Algorithms built on scans."""
 
-from __future__ import division, absolute_import
 
 __copyright__ = """
 Copyright 2011-2012 Andreas Kloeckner
@@ -30,7 +29,6 @@ FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
 OTHER DEALINGS IN THE SOFTWARE.
 """
 
-from six.moves import range, zip
 
 import numpy as np
 import pyopencl as cl
@@ -293,13 +291,13 @@ def to_bin(n):
         digs.append(str(n % 2))
         n >>= 1
 
-    return ''.join(digs[::-1])
+    return "".join(digs[::-1])
 
 
 def _padded_bin(i, nbits):
     s = to_bin(i)
     while len(s) < nbits:
-        s = '0' + s
+        s = "0" + s
     return s
 
 
@@ -310,7 +308,7 @@ def _make_sort_scan_type(device, bits, index_dtype):
 
     fields = []
     for mnr in range(2**bits):
-        fields.append(('c%s' % _padded_bin(mnr, bits), index_dtype))
+        fields.append(("c%s" % _padded_bin(mnr, bits), index_dtype))
 
     dtype = np.dtype(fields)
 
@@ -421,7 +419,7 @@ RADIX_SORT_OUTPUT_STMT_TPL = Template(r"""//CL//
 from pyopencl.scan import GenericScanKernel
 
 
-class RadixSort(object):
+class RadixSort:
     """Provides a general `radix sort <https://en.wikipedia.org/wiki/Radix_sort>`_
     on the compute device.
 
@@ -478,7 +476,7 @@ class RadixSort(object):
 
             boundary_mnr = known_bits + "1" + (self.bits-len(known_bits)-1)*"0"
 
-            return ("((mnr < %s) ? %s : %s)" % (
+            return ("((mnr < {}) ? {} : {})".format(
                 int(boundary_mnr, 2),
                 get_count_branch(known_bits+"0"),
                 get_count_branch(known_bits+"1")))
@@ -894,11 +892,7 @@ class ListOfListsBuilder:
             __global ${index_t} *compressed_indices,
             __global ${index_t} *num_non_empty_list
         """
-        from sys import version_info
-        if version_info > (3, 0):
-            arguments = Template(arguments)
-        else:
-            arguments = Template(arguments, disable_unicode=True)
+        arguments = Template(arguments)
 
         from pyopencl.scan import GenericScanKernel
         return GenericScanKernel(
@@ -937,7 +931,7 @@ class ListOfListsBuilder:
                 continue
 
             name = "plb_loc_%s_count" % name
-            user_list_args.append(OtherArg("%s *%s" % (
+            user_list_args.append(OtherArg("{} *{}".format(
                 index_ctype, name), name))
 
         kernel_name = self.name_prefix+"_count"
@@ -1008,10 +1002,10 @@ class ListOfListsBuilder:
                     VectorArg(index_dtype, "%s_compressed_indices" % name))
 
             index_name = "plb_%s_index" % name
-            user_list_args.append(OtherArg("%s *%s" % (
+            user_list_args.append(OtherArg("{} *{}".format(
                 index_ctype, index_name), index_name))
 
-            kernel_list_arg_values += "%s, &%s, " % (list_name, index_name)
+            kernel_list_arg_values += f"{list_name}, &{index_name}, "
 
         kernel_name = self.name_prefix+"_write"
 
@@ -1332,7 +1326,7 @@ def _make_cl_int_literal(value, dtype):
     return result
 
 
-class KeyValueSorter(object):
+class KeyValueSorter:
     """Given arrays *values* and *keys* of equal length
     and a number *nkeys* of keys, returns a tuple `(starts,
     lists)`, as follows: *values* and *keys* are sorted
diff --git a/pyopencl/array.py b/pyopencl/array.py
index 9bb90d611ed643477b0c6259f1410defde81412e..d3d353d1bca6b70bc6e42127f8ab0da770ded2f4 100644
--- a/pyopencl/array.py
+++ b/pyopencl/array.py
@@ -2,7 +2,6 @@
 
 # pylint:disable=unexpected-keyword-arg  # for @elwise_kernel_runner
 
-from __future__ import division, absolute_import
 
 __copyright__ = "Copyright (C) 2009 Andreas Kloeckner"
 
@@ -29,8 +28,7 @@ FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
 OTHER DEALINGS IN THE SOFTWARE.
 """
 
-import six
-from six.moves import range, reduce
+from functools import reduce
 
 import numpy as np
 import pyopencl.elementwise as elementwise
@@ -65,7 +63,7 @@ except Exception:
         return False
 
 
-class VecLookupWarner(object):
+class VecLookupWarner:
     def __getattr__(self, name):
         from warnings import warn
         warn("pyopencl.array.vec is deprecated. "
@@ -206,7 +204,7 @@ class _copy_queue:  # noqa
     pass
 
 
-class Array(object):
+class Array:
     """A :class:`numpy.ndarray` work-alike that stores its data and performs
     its computations on the compute device.  *shape* and *dtype* work exactly
     as in :mod:`numpy`.  Arithmetic methods in :class:`Array` support the
@@ -437,11 +435,7 @@ class Array(object):
             for dim in shape:
                 size *= dim
         except TypeError:
-            import sys
-            if sys.version_info >= (3,):
-                admissible_types = (int, np.integer)
-            else:
-                admissible_types = (np.integer,) + six.integer_types
+            admissible_types = (int, np.integer)
 
             if not isinstance(shape, admissible_types):
                 raise TypeError("shape must either be iterable or "
@@ -773,7 +767,7 @@ class Array(object):
         return repr(self.get())
 
     def safely_stringify_for_pudb(self):
-        return "cl.Array %s %s" % (self.dtype, self.shape)
+        return f"cl.Array {self.dtype} {self.shape}"
 
     def __hash__(self):
         raise TypeError("pyopencl arrays are not hashable.")
@@ -1949,7 +1943,7 @@ def as_strided(ary, shape=None, strides=None):
             data=ary.data, strides=strides)
 
 
-class _same_as_transfer(object):  # noqa
+class _same_as_transfer:  # noqa
     pass
 
 
@@ -2107,7 +2101,7 @@ def arange(queue, *args, **kwargs):
         raise ValueError("too many arguments")
 
     admissible_names = ["start", "stop", "step", "dtype", "allocator"]
-    for k, v in six.iteritems(kwargs):
+    for k, v in kwargs.items():
         if k in admissible_names:
             if getattr(inf, k) is None:
                 setattr(inf, k, v)
diff --git a/pyopencl/bitonic_sort.py b/pyopencl/bitonic_sort.py
index 4c13cbaa871bd88556c87618f4a42b6d619abd68..29fff563a05ebd4393d28068ecedef68f917945a 100644
--- a/pyopencl/bitonic_sort.py
+++ b/pyopencl/bitonic_sort.py
@@ -1,5 +1,3 @@
-from __future__ import division, with_statement, absolute_import, print_function
-
 __copyright__ = """
 Copyright (c) 2011, Eric Bainville
 Copyright (c) 2015, Ilya Efimoff
@@ -50,7 +48,7 @@ def _is_power_of_2(n):
     return n == 0 or 2**bitlog2(n) == n
 
 
-class BitonicSort(object):
+class BitonicSort:
     """Sort an array (or one axis of one) using a sorting network.
 
     Will only work if the axis of the array to be sorted has a length
@@ -64,14 +62,14 @@ class BitonicSort(object):
     """
 
     kernels_srcs = {
-            'B2': _tmpl.ParallelBitonic_B2,
-            'B4': _tmpl.ParallelBitonic_B4,
-            'B8': _tmpl.ParallelBitonic_B8,
-            'B16': _tmpl.ParallelBitonic_B16,
-            'C4': _tmpl.ParallelBitonic_C4,
-            'BL': _tmpl.ParallelBitonic_Local,
-            'BLO': _tmpl.ParallelBitonic_Local_Optim,
-            'PML': _tmpl.ParallelMerge_Local
+            "B2": _tmpl.ParallelBitonic_B2,
+            "B4": _tmpl.ParallelBitonic_B4,
+            "B8": _tmpl.ParallelBitonic_B8,
+            "B16": _tmpl.ParallelBitonic_B16,
+            "C4": _tmpl.ParallelBitonic_C4,
+            "BL": _tmpl.ParallelBitonic_Local,
+            "BLO": _tmpl.ParallelBitonic_Local_Optim,
+            "PML": _tmpl.ParallelMerge_Local
             }
 
     def __init__(self, context):
@@ -162,7 +160,7 @@ class BitonicSort(object):
         key_ctype = dtype_to_ctype(key_dtype)
 
         if idx_dtype is None:
-            idx_ctype = 'uint'  # Dummy
+            idx_ctype = "uint"  # Dummy
 
         else:
             idx_ctype = dtype_to_ctype(idx_dtype)
@@ -206,7 +204,7 @@ class BitonicSort(object):
 
         length = wg >> 1
         prg = self.get_program(
-                'BLO', argsort, (1, 1, key_ctype, idx_ctype, ds, ns))
+                "BLO", argsort, (1, 1, key_ctype, idx_ctype, ds, ns))
         run_queue.append((prg.run, size, (wg,), True))
 
         while length < ds:
@@ -215,16 +213,16 @@ class BitonicSort(object):
                 ninc = 0
                 direction = length << 1
                 if allowb16 and inc >= 8 and ninc == 0:
-                    letter = 'B16'
+                    letter = "B16"
                     ninc = 4
                 elif allowb8 and inc >= 4 and ninc == 0:
-                    letter = 'B8'
+                    letter = "B8"
                     ninc = 3
                 elif allowb4 and inc >= 2 and ninc == 0:
-                    letter = 'B4'
+                    letter = "B4"
                     ninc = 2
                 elif inc >= 0:
-                    letter = 'B2'
+                    letter = "B2"
                     ninc = 1
 
                 nthreads = size >> ninc
diff --git a/pyopencl/cache.py b/pyopencl/cache.py
index c4603a7d75983c4a55e109d6bdd89c7a47750fb3..adae470b2a58f96db11f22d38a8e374712ee259f 100644
--- a/pyopencl/cache.py
+++ b/pyopencl/cache.py
@@ -1,6 +1,5 @@
 """PyOpenCL compiler cache."""
 
-from __future__ import division, absolute_import
 
 __copyright__ = "Copyright (C) 2011 Andreas Kloeckner"
 
@@ -24,8 +23,6 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 THE SOFTWARE.
 """
 
-import six
-from six.moves import zip
 import pyopencl._cl as _cl
 import re
 import sys
@@ -49,7 +46,7 @@ def _erase_dir(dir):
 
 
 def update_checksum(checksum, obj):
-    if isinstance(obj, six.text_type):
+    if isinstance(obj, str):
         checksum.update(obj.encode("utf8"))
     else:
         checksum.update(obj)
@@ -57,7 +54,7 @@ def update_checksum(checksum, obj):
 
 # {{{ cleanup
 
-class CleanupBase(object):
+class CleanupBase:
     pass
 
 
@@ -168,8 +165,8 @@ def get_dependencies(src, include_path):
 
                 if included_file_name not in result:
                     try:
-                        src_file = open(included_file_name, "rt")
-                    except IOError:
+                        src_file = open(included_file_name)
+                    except OSError:
                         continue
 
                     try:
@@ -198,7 +195,7 @@ def get_dependencies(src, include_path):
 
     _inner(src)
 
-    result = list((name,) + vals for name, vals in six.iteritems(result))
+    result = list((name,) + vals for name, vals in result.items())
     result.sort()
 
     return result
@@ -275,7 +272,7 @@ def retrieve_from_cache(cache_dir, cache_key):
 
                 try:
                     info_file = open(info_path, "rb")
-                except IOError:
+                except OSError:
                     raise _InvalidInfoFile()
 
                 try:
@@ -330,8 +327,8 @@ def _create_built_program_from_source_cached(ctx, src, options_bytes,
     if cache_dir is None:
         import appdirs
         cache_dir = join(appdirs.user_cache_dir("pyopencl", "pyopencl"),
-                "pyopencl-compiler-cache-v2-py%s" % (
-                    ".".join(str(i) for i in sys.version_info),))
+                "pyopencl-compiler-cache-v2-py{}".format(
+                    ".".join(str(i) for i in sys.version_info)))
 
     # {{{ ensure cache directory exists
 
@@ -369,7 +366,7 @@ def _create_built_program_from_source_cached(ctx, src, options_bytes,
             logs.append(log)
 
     message = (75*"="+"\n").join(
-            "Build on %s succeeded, but said:\n\n%s" % (dev, log)
+            f"Build on {dev} succeeded, but said:\n\n{log}"
             for dev, log in zip(devices, logs)
             if log is not None and log.strip())
 
diff --git a/pyopencl/capture_call.py b/pyopencl/capture_call.py
index 527d8caed96f70264762e75b211f2247b67c2495..867365319f39f4e4a629aa6446b8a607c2d16b93 100644
--- a/pyopencl/capture_call.py
+++ b/pyopencl/capture_call.py
@@ -1,7 +1,3 @@
-from __future__ import with_statement, division
-from __future__ import absolute_import
-from six.moves import zip
-
 __copyright__ = "Copyright (C) 2013 Andreas Kloeckner"
 
 __license__ = """
@@ -83,13 +79,13 @@ def capture_kernel_call(kernel, filename, queue, g_size, l_size, *args, **kwargs
             elif isinstance(arg, (int, float)):
                 kernel_args.append(repr(arg))
             elif isinstance(arg, np.integer):
-                kernel_args.append("np.%s(%s)" % (
+                kernel_args.append("np.{}({})".format(
                     arg.dtype.type.__name__, repr(int(arg))))
             elif isinstance(arg, np.floating):
-                kernel_args.append("np.%s(%s)" % (
+                kernel_args.append("np.{}({})".format(
                     arg.dtype.type.__name__, repr(float(arg))))
             elif isinstance(arg, np.complexfloating):
-                kernel_args.append("np.%s(%s)" % (
+                kernel_args.append("np.{}({})".format(
                     arg.dtype.type.__name__, repr(complex(arg))))
             else:
                 try:
@@ -133,7 +129,7 @@ def capture_kernel_call(kernel, filename, queue, g_size, l_size, *args, **kwargs
                     % ", ".join(
                         strify_dtype(dt) for dt in kernel._scalar_arg_dtypes))
 
-        cg("knl(queue, %s, %s," % (repr(g_size), repr(l_size)))
+        cg("knl(queue, {}, {},".format(repr(g_size), repr(l_size)))
         cg("    %s)" % ", ".join(kernel_args))
         cg("")
         cg("queue.finish()")
@@ -163,7 +159,7 @@ def capture_kernel_call(kernel, filename, queue, g_size, l_size, *args, **kwargs
     # {{{ file trailer
 
     cg("")
-    cg("if __name__ == \"__main__\":")
+    cg('if __name__ == "__main__":')
     with Indentation(cg):
         cg("main()")
     cg("")
diff --git a/pyopencl/characterize/__init__.py b/pyopencl/characterize/__init__.py
index eae523be2f045bcadafb28166001cc6beeaf445f..dfb8d0195a9bd7fc946ec50f9b79b2791db5e4e2 100644
--- a/pyopencl/characterize/__init__.py
+++ b/pyopencl/characterize/__init__.py
@@ -1,5 +1,3 @@
-from __future__ import division, absolute_import
-
 __copyright__ = "Copyright (C) 2009 Andreas Kloeckner"
 
 __license__ = """
@@ -24,8 +22,6 @@ THE SOFTWARE.
 
 import pyopencl as cl
 from pytools import memoize
-import six
-from six.moves import range, zip
 
 
 class CLCharacterizationWarning(UserWarning):
@@ -237,13 +233,13 @@ def why_not_local_access_conflict_free(dev, itemsize,
 
             bank = (addr // gran) % bank_count
             bank_accesses.setdefault(bank, []).append(
-                    "w.item %s -> %s" % (work_item_id, idx[::-1]))
+                    "w.item {} -> {}".format(work_item_id, idx[::-1]))
 
         conflict_multiplicity = max(
-                len(acc) for acc in six.itervalues(bank_accesses))
+                len(acc) for acc in bank_accesses.values())
 
         if conflict_multiplicity > 1:
-            for bank, acc in six.iteritems(bank_accesses):
+            for bank, acc in bank_accesses.items():
                 if len(acc) == conflict_multiplicity:
                     conflicts.append(
                             (conflict_multiplicity,
diff --git a/pyopencl/characterize/performance.py b/pyopencl/characterize/performance.py
index f0c769077fe4a2d2959b5f39e2f46588c0eca3cc..f629240438ddd92404a1a8f29fa100761347c95b 100644
--- a/pyopencl/characterize/performance.py
+++ b/pyopencl/characterize/performance.py
@@ -1,5 +1,3 @@
-from __future__ import division, absolute_import, print_function
-
 __copyright__ = "Copyright (C) 2009 Andreas Kloeckner"
 
 __license__ = """
@@ -22,7 +20,6 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 THE SOFTWARE.
 """
 
-from six.moves import range
 import pyopencl as cl
 import numpy as np
 
@@ -100,7 +97,7 @@ def _get_time(queue, f, timer_factory=None, desired_duration=0.1,
 
 # {{{ transfer measurements
 
-class HostDeviceTransferBase(object):
+class HostDeviceTransferBase:
     def __init__(self, queue, block_size):
         self.queue = queue
         self.host_buf = np.empty(block_size, dtype=np.uint8)
@@ -117,7 +114,7 @@ class DeviceToHostTransfer(HostDeviceTransferBase):
         return cl.enqueue_copy(self. queue, self.host_buf, self.dev_buf)
 
 
-class DeviceToDeviceTransfer(object):
+class DeviceToDeviceTransfer:
     def __init__(self, queue, block_size):
         self.queue = queue
         mf = cl.mem_flags
diff --git a/pyopencl/clmath.py b/pyopencl/clmath.py
index 2ae8bfbfa22fd1842134c8db96be03a2e7fb9a44..58c20ce5f48fa25379dd5d84bce95709537c6d61 100644
--- a/pyopencl/clmath.py
+++ b/pyopencl/clmath.py
@@ -1,5 +1,3 @@
-from __future__ import absolute_import
-
 # pylint:disable=unexpected-keyword-arg  # for @elwise_kernel_runner
 
 __copyright__ = "Copyright (C) 2009 Andreas Kloeckner"
@@ -35,7 +33,7 @@ def _make_unary_array_func(name):
     def knl_runner(result, arg):
         if arg.dtype.kind == "c":
             from pyopencl.elementwise import complex_dtype_to_name
-            fname = "%s_%s" % (complex_dtype_to_name(arg.dtype), name)
+            fname = "{}_{}".format(complex_dtype_to_name(arg.dtype), name)
         else:
             fname = name
 
diff --git a/pyopencl/clrandom.py b/pyopencl/clrandom.py
index 96acce1f40c15cd5d87cc71a5761e328d950146c..aada2bd850d305735a62b67517cb4418d6fe7687 100644
--- a/pyopencl/clrandom.py
+++ b/pyopencl/clrandom.py
@@ -1,6 +1,3 @@
-# encoding: utf8
-from __future__ import division, absolute_import
-
 __copyright__ = "Copyright (C) 2009-16 Andreas Kloeckner"
 
 __license__ = """
@@ -26,7 +23,7 @@ THE SOFTWARE.
 
 # {{{ documentation
 
-__doc__ = u"""
+__doc__ = """
 PyOpenCL now includes and uses some of the `Random123 random number generators
 <https://www.deshawresearch.com/resources_random123.html>`_ by D.E. Shaw
 Research.  In addition to being usable through the convenience functions above,
@@ -72,7 +69,7 @@ import numpy as np
 
 # {{{ RanluxGenerator (deprecated)
 
-class RanluxGenerator(object):
+class RanluxGenerator:
     """
     .. warning::
 
@@ -382,21 +379,21 @@ class RanluxGenerator(object):
     @memoize_method
     def get_sync_kernel(self):
         src = """//CL//
-            %(defines)s
+            {defines}
 
             #include <pyopencl-ranluxcl.cl>
 
             kernel void sync(
                 global ranluxcl_state_t *ranluxcltab)
-            {
+            {{
               ranluxcl_state_t ranluxclstate;
               ranluxcl_download_seed(&ranluxclstate, ranluxcltab);
               ranluxcl_synchronize(&ranluxclstate);
               ranluxcl_upload_seed(&ranluxclstate, ranluxcltab);
-            }
-            """ % {
-                "defines": self.generate_settings_defines(),
-                }
+            }}
+            """.format(
+                defines=self.generate_settings_defines(),
+                )
         prg = cl.Program(self.context, src).build()
         return prg.sync
 
@@ -414,7 +411,7 @@ class RanluxGenerator(object):
 
 # {{{ Random123 generators
 
-class Random123GeneratorBase(object):
+class Random123GeneratorBase:
     """
     .. versionadded:: 2016.2
 
@@ -535,9 +532,9 @@ class Random123GeneratorBase(object):
                     "unsupported RNG distribution/data type combination '%s/%s'"
                     % rng_key)
 
-        kernel_name = "rng_gen_%s_%s" % (self.generator_name, distribution)
+        kernel_name = f"rng_gen_{self.generator_name}_{distribution}"
         src = """//CL//
-            #include <%(header_name)s>
+            #include <{header_name}>
 
             #ifndef M_PI
             #ifdef M_PI_F
@@ -547,29 +544,29 @@ class Random123GeneratorBase(object):
             #endif
             #endif
 
-            typedef %(output_t)s output_t;
-            typedef %(output_t)s4 output_vec_t;
-            typedef %(gen_name)s_ctr_t ctr_t;
-            typedef %(gen_name)s_key_t key_t;
+            typedef {output_t} output_t;
+            typedef {output_t}4 output_vec_t;
+            typedef {gen_name}_ctr_t ctr_t;
+            typedef {gen_name}_key_t key_t;
 
             uint4 gen_bits(key_t *key, ctr_t *ctr)
-            {
-                union {
+            {{
+                union {{
                     ctr_t ctr_el;
                     uint4 vec_el;
-                } u;
+                }} u;
 
-                u.ctr_el = %(gen_name)s(*ctr, *key);
+                u.ctr_el = {gen_name}(*ctr, *key);
                 if (++ctr->v[0] == 0)
                     if (++ctr->v[1] == 0)
                         ++ctr->v[2];
 
                 return u.vec_el;
-            }
+            }}
 
-            #if %(include_box_muller)s
+            #if {include_box_muller}
             output_vec_t box_muller(output_vec_t x)
-            {
+            {{
                 #define BOX_MULLER(I, COMPA, COMPB) \
                     output_t r##I = sqrt(-2*log(x.COMPA)); \
                     output_t c##I; \
@@ -578,14 +575,14 @@ class Random123GeneratorBase(object):
                 BOX_MULLER(0, x, y);
                 BOX_MULLER(1, z, w);
                 return (output_vec_t) (r0*c0, r0*s0, r1*c1, r1*s1);
-            }
+            }}
             #endif
 
-            #define GET_RANDOM_NUM(gen) %(rng_expr)s
+            #define GET_RANDOM_NUM(gen) {rng_expr}
 
-            kernel void %(kernel_name)s(
+            kernel void {kernel_name}(
                 int k1,
-                #if %(key_length)s > 2
+                #if {key_length} > 2
                 int k2, int k3,
                 #endif
                 int c0, int c1, int c2, int c3,
@@ -593,23 +590,23 @@ class Random123GeneratorBase(object):
                 long out_size,
                 output_t scale,
                 output_t shift)
-            {
-                #if %(key_length)s == 2
-                key_t k = {{get_global_id(0), k1}};
+            {{
+                #if {key_length} == 2
+                key_t k = {{{{get_global_id(0), k1}}}};
                 #else
-                key_t k = {{get_global_id(0), k1, k2, k3}};
+                key_t k = {{{{get_global_id(0), k1, k2, k3}}}};
                 #endif
 
-                ctr_t c = {{c0, c1, c2, c3}};
+                ctr_t c = {{{{c0, c1, c2, c3}}}};
 
                 // output bulk
                 unsigned long idx = get_global_id(0)*4;
                 while (idx + 4 < out_size)
-                {
+                {{
                     output_vec_t ran = GET_RANDOM_NUM(gen_bits(&k, &c));
                     vstore4(ran, 0, &output[idx]);
                     idx += 4*get_global_size(0);
-                }
+                }}
 
                 // output tail
                 output_vec_t tail_ran = GET_RANDOM_NUM(gen_bits(&k, &c));
@@ -621,16 +618,16 @@ class Random123GeneratorBase(object):
                   output[idx+2] = tail_ran.z;
                 if (idx+3 < out_size)
                   output[idx+3] = tail_ran.w;
-            }
-            """ % {
-                "kernel_name": kernel_name,
-                "gen_name": self.generator_name,
-                "header_name": self.header_name,
-                "output_t": c_type,
-                "key_length": self.key_length,
-                "include_box_muller": int(distribution == "normal"),
-                "rng_expr": rng_expr
-                }
+            }}
+            """.format(
+                kernel_name=kernel_name,
+                gen_name=self.generator_name,
+                header_name=self.header_name,
+                output_t=c_type,
+                key_length=self.key_length,
+                include_box_muller=int(distribution == "normal"),
+                rng_expr=rng_expr
+                )
 
         prg = cl.Program(self.context, src).build()
         knl = getattr(prg, kernel_name)
diff --git a/pyopencl/cltypes.py b/pyopencl/cltypes.py
index d1ba79f3f8e3905bdee8f119dca3e57a8dda6509..fed1834ca087fed17791171536e70e0446c580c6 100644
--- a/pyopencl/cltypes.py
+++ b/pyopencl/cltypes.py
@@ -1,5 +1,3 @@
-# encoding: utf8
-
 __copyright__ = "Copyright (C) 2016 Jonathan Mackenzie"
 
 __license__ = """
@@ -24,7 +22,7 @@ import numpy as np
 from pyopencl.tools import get_or_register_dtype
 import warnings
 
-if __file__.endswith('array.py'):
+if __file__.endswith("array.py"):
     warnings.warn("pyopencl.array.vec is deprecated. Please use pyopencl.cltypes")
 
 """
@@ -48,8 +46,8 @@ double = np.float64
 
 def _create_vector_types():
     _mapping = [(k, globals()[k]) for k in
-                ['char', 'uchar', 'short', 'ushort', 'int',
-                 'uint', 'long', 'ulong', 'float', 'double']]
+                ["char", "uchar", "short", "ushort", "int",
+                 "uint", "long", "ulong", "float", "double"]]
 
     def set_global(key, val):
         globals()[key] = val
diff --git a/pyopencl/compyte b/pyopencl/compyte
index 49e670e0ab7bbc822032196b3478522c04168d6f..3367a19729cfe42d51133453b7bdfa1756a853d9 160000
--- a/pyopencl/compyte
+++ b/pyopencl/compyte
@@ -1 +1 @@
-Subproject commit 49e670e0ab7bbc822032196b3478522c04168d6f
+Subproject commit 3367a19729cfe42d51133453b7bdfa1756a853d9
diff --git a/pyopencl/elementwise.py b/pyopencl/elementwise.py
index c9822cb257e777ef852ffbbed3f47331d17a46bc..357aa2bbf17477713905d040376ec199a518f877 100644
--- a/pyopencl/elementwise.py
+++ b/pyopencl/elementwise.py
@@ -1,7 +1,5 @@
 """Elementwise functionality."""
 
-from __future__ import division, absolute_import
-from six.moves import range, zip
 
 __copyright__ = "Copyright (C) 2009 Andreas Kloeckner"
 
@@ -81,29 +79,29 @@ def get_elwise_program(context, arguments, operation,
                 stacklevel=3)
 
     source = ("""//CL//
-        %(preamble)s
+        {preamble}
 
         #define PYOPENCL_ELWISE_CONTINUE continue
 
-        __kernel void %(name)s(%(arguments)s)
-        {
+        __kernel void {name}({arguments})
+        {{
           int lid = get_local_id(0);
           int gsize = get_global_size(0);
           int work_group_start = get_local_size(0)*get_group_id(0);
           long i;
 
-          %(loop_prep)s;
-          %(body)s
-          %(after_loop)s;
-        }
-        """ % {
-            "arguments": ", ".join(arg.declarator() for arg in arguments),
-            "name": name,
-            "preamble": preamble,
-            "loop_prep": loop_prep,
-            "after_loop": after_loop,
-            "body": body % dict(operation=operation),
-            })
+          {loop_prep};
+          {body}
+          {after_loop};
+        }}
+        """.format(
+            arguments=", ".join(arg.declarator() for arg in arguments),
+            name=name,
+            preamble=preamble,
+            loop_prep=loop_prep,
+            after_loop=after_loop,
+            body=body % dict(operation=operation),
+            ))
 
     from pyopencl import Program
     return Program(context, source).build(options)
@@ -134,7 +132,7 @@ def get_elwise_kernel_and_types(context, arguments, operation,
                         #define PYOPENCL_DEFINE_CDOUBLE
                         """)
                     have_double_pragma = True
-            if arg.dtype.kind == 'c':
+            if arg.dtype.kind == "c":
                 if not have_complex_include:
                     includes.append("#include <pyopencl-complex.h>\n")
                     have_complex_include = True
@@ -471,20 +469,20 @@ def get_put_kernel(context, dtype, idx_dtype, vec_count=1):
 def get_copy_kernel(context, dtype_dest, dtype_src):
     src = "src[i]"
     if dtype_dest.kind == "c" != dtype_src.kind:
-        src = "%s_fromreal(%s)" % (complex_dtype_to_name(dtype_dest), src)
+        src = "{}_fromreal({})".format(complex_dtype_to_name(dtype_dest), src)
 
     if dtype_dest.kind == "c" and dtype_src != dtype_dest:
-        src = "%s_cast(%s)" % (complex_dtype_to_name(dtype_dest), src),
+        src = "{}_cast({})".format(complex_dtype_to_name(dtype_dest), src),
 
     if dtype_dest != dtype_src and (
             dtype_dest.kind == "V" or dtype_src.kind == "V"):
         raise TypeError("copying between non-identical struct types")
 
     return get_elwise_kernel(context,
-            "%(tp_dest)s *dest, %(tp_src)s *src" % {
-                "tp_dest": dtype_to_ctype(dtype_dest),
-                "tp_src": dtype_to_ctype(dtype_src),
-                },
+            "{tp_dest} *dest, {tp_src} *src".format(
+                tp_dest=dtype_to_ctype(dtype_dest),
+                tp_src=dtype_to_ctype(dtype_src),
+                ),
             "dest[i] = %s" % src,
             preamble=dtype_to_c_struct(context.devices[0], dtype_dest),
             name="copy")
@@ -518,10 +516,10 @@ def get_axpbyz_kernel(context, dtype_x, dtype_y, dtype_z):
         by = "%s_mul(b, y[i])" % complex_dtype_to_name(dtype_y)
 
     if x_is_complex and not y_is_complex:
-        by = "%s_fromreal(%s)" % (complex_dtype_to_name(dtype_x), by)
+        by = "{}_fromreal({})".format(complex_dtype_to_name(dtype_x), by)
 
     if not x_is_complex and y_is_complex:
-        ax = "%s_fromreal(%s)" % (complex_dtype_to_name(dtype_y), ax)
+        ax = "{}_fromreal({})".format(complex_dtype_to_name(dtype_y), ax)
 
     if x_is_complex or y_is_complex:
         result = (
@@ -531,14 +529,14 @@ def get_axpbyz_kernel(context, dtype_x, dtype_y, dtype_z):
                     by=by,
                     root=complex_dtype_to_name(dtype_z)))
     else:
-        result = "%s + %s" % (ax, by)
+        result = f"{ax} + {by}"
 
     return get_elwise_kernel(context,
-            "%(tp_z)s *z, %(tp_x)s a, %(tp_x)s *x, %(tp_y)s b, %(tp_y)s *y" % {
-                "tp_x": dtype_to_ctype(dtype_x),
-                "tp_y": dtype_to_ctype(dtype_y),
-                "tp_z": dtype_to_ctype(dtype_z),
-                },
+            "{tp_z} *z, {tp_x} a, {tp_x} *x, {tp_y} b, {tp_y} *y".format(
+                tp_x=dtype_to_ctype(dtype_x),
+                tp_y=dtype_to_ctype(dtype_y),
+                tp_z=dtype_to_ctype(dtype_z),
+                ),
             "z[i] = %s" % result,
             name="axpbyz")
 
@@ -557,33 +555,33 @@ def get_axpbz_kernel(context, dtype_a, dtype_x, dtype_b, dtype_z):
         x = "x[i]"
 
         if dtype_x != dtype_z:
-            x = "%s_cast(%s)" % (complex_dtype_to_name(dtype_z), x)
+            x = "{}_cast({})".format(complex_dtype_to_name(dtype_z), x)
 
         if a_is_complex:
             if dtype_a != dtype_z:
-                a = "%s_cast(%s)" % (complex_dtype_to_name(dtype_z), a)
+                a = "{}_cast({})".format(complex_dtype_to_name(dtype_z), a)
 
-            ax = "%s_mul(%s, %s)" % (complex_dtype_to_name(dtype_z), a, x)
+            ax = "{}_mul({}, {})".format(complex_dtype_to_name(dtype_z), a, x)
         else:
-            ax = "%s_rmul(%s, %s)" % (complex_dtype_to_name(dtype_z), a, x)
+            ax = "{}_rmul({}, {})".format(complex_dtype_to_name(dtype_z), a, x)
     elif a_is_complex:
         a = "a"
         x = "x[i]"
 
         if dtype_a != dtype_z:
-            a = "%s_cast(%s)" % (complex_dtype_to_name(dtype_z), a)
-        ax = "%s_mulr(%s, %s)" % (complex_dtype_to_name(dtype_z), a, x)
+            a = "{}_cast({})".format(complex_dtype_to_name(dtype_z), a)
+        ax = "{}_mulr({}, {})".format(complex_dtype_to_name(dtype_z), a, x)
 
     b = "b"
     if z_is_complex and not b_is_complex:
-        b = "%s_fromreal(%s)" % (complex_dtype_to_name(dtype_z), b)
+        b = "{}_fromreal({})".format(complex_dtype_to_name(dtype_z), b)
 
     if z_is_complex and not (a_is_complex or x_is_complex):
-        ax = "%s_fromreal(%s)" % (complex_dtype_to_name(dtype_z), ax)
+        ax = "{}_fromreal({})".format(complex_dtype_to_name(dtype_z), ax)
 
     if z_is_complex:
-        ax = "%s_cast(%s)" % (complex_dtype_to_name(dtype_z), ax)
-        b = "%s_cast(%s)" % (complex_dtype_to_name(dtype_z), b)
+        ax = "{}_cast({})".format(complex_dtype_to_name(dtype_z), ax)
+        b = "{}_cast({})".format(complex_dtype_to_name(dtype_z), b)
 
     if a_is_complex or x_is_complex or b_is_complex:
         expr = "{root}_add({ax}, {b})".format(
@@ -591,15 +589,15 @@ def get_axpbz_kernel(context, dtype_a, dtype_x, dtype_b, dtype_z):
                 b=b,
                 root=complex_dtype_to_name(dtype_z))
     else:
-        expr = "%s + %s" % (ax, b)
+        expr = f"{ax} + {b}"
 
     return get_elwise_kernel(context,
-            "%(tp_z)s *z, %(tp_a)s a, %(tp_x)s *x,%(tp_b)s b" % {
-                "tp_a": dtype_to_ctype(dtype_a),
-                "tp_x": dtype_to_ctype(dtype_x),
-                "tp_b": dtype_to_ctype(dtype_b),
-                "tp_z": dtype_to_ctype(dtype_z),
-                },
+            "{tp_z} *z, {tp_a} a, {tp_x} *x,{tp_b} b".format(
+                tp_a=dtype_to_ctype(dtype_a),
+                tp_x=dtype_to_ctype(dtype_x),
+                tp_b=dtype_to_ctype(dtype_b),
+                tp_z=dtype_to_ctype(dtype_z),
+                ),
             "z[i] = " + expr,
             name="axpb")
 
@@ -613,25 +611,25 @@ def get_multiply_kernel(context, dtype_x, dtype_y, dtype_z):
     y = "y[i]"
 
     if x_is_complex and dtype_x != dtype_z:
-        x = "%s_cast(%s)" % (complex_dtype_to_name(dtype_z), x)
+        x = "{}_cast({})".format(complex_dtype_to_name(dtype_z), x)
     if y_is_complex and dtype_y != dtype_z:
-        y = "%s_cast(%s)" % (complex_dtype_to_name(dtype_z), y)
+        y = "{}_cast({})".format(complex_dtype_to_name(dtype_z), y)
 
     if x_is_complex and y_is_complex:
-        xy = "%s_mul(%s, %s)" % (complex_dtype_to_name(dtype_z), x, y)
+        xy = "{}_mul({}, {})".format(complex_dtype_to_name(dtype_z), x, y)
     elif x_is_complex and not y_is_complex:
-        xy = "%s_mulr(%s, %s)" % (complex_dtype_to_name(dtype_z), x, y)
+        xy = "{}_mulr({}, {})".format(complex_dtype_to_name(dtype_z), x, y)
     elif not x_is_complex and y_is_complex:
-        xy = "%s_rmul(%s, %s)" % (complex_dtype_to_name(dtype_z), x, y)
+        xy = "{}_rmul({}, {})".format(complex_dtype_to_name(dtype_z), x, y)
     else:
-        xy = "%s * %s" % (x, y)
+        xy = f"{x} * {y}"
 
     return get_elwise_kernel(context,
-            "%(tp_z)s *z, %(tp_x)s *x, %(tp_y)s *y" % {
-                "tp_x": dtype_to_ctype(dtype_x),
-                "tp_y": dtype_to_ctype(dtype_y),
-                "tp_z": dtype_to_ctype(dtype_z),
-                },
+            "{tp_z} *z, {tp_x} *x, {tp_y} *y".format(
+                tp_x=dtype_to_ctype(dtype_x),
+                tp_y=dtype_to_ctype(dtype_y),
+                tp_z=dtype_to_ctype(dtype_z),
+                ),
             "z[i] = %s" % xy,
             name="multiply")
 
@@ -647,28 +645,28 @@ def get_divide_kernel(context, dtype_x, dtype_y, dtype_z):
 
     if z_is_complex and dtype_x != dtype_y:
         if x_is_complex and dtype_x != dtype_z:
-            x = "%s_cast(%s)" % (complex_dtype_to_name(dtype_z), x)
+            x = "{}_cast({})".format(complex_dtype_to_name(dtype_z), x)
         if y_is_complex and dtype_y != dtype_z:
-            y = "%s_cast(%s)" % (complex_dtype_to_name(dtype_z), y)
+            y = "{}_cast({})".format(complex_dtype_to_name(dtype_z), y)
 
     if x_is_complex and y_is_complex:
-        xoy = "%s_divide(%s, %s)" % (complex_dtype_to_name(dtype_z), x, y)
+        xoy = "{}_divide({}, {})".format(complex_dtype_to_name(dtype_z), x, y)
     elif not x_is_complex and y_is_complex:
-        xoy = "%s_rdivide(%s, %s)" % (complex_dtype_to_name(dtype_z), x, y)
+        xoy = "{}_rdivide({}, {})".format(complex_dtype_to_name(dtype_z), x, y)
     elif x_is_complex and not y_is_complex:
-        xoy = "%s_divider(%s, %s)" % (complex_dtype_to_name(dtype_z), x, y)
+        xoy = "{}_divider({}, {})".format(complex_dtype_to_name(dtype_z), x, y)
     else:
-        xoy = "%s / %s" % (x, y)
+        xoy = f"{x} / {y}"
 
     if z_is_complex:
-        xoy = "%s_cast(%s)" % (complex_dtype_to_name(dtype_z), xoy)
+        xoy = "{}_cast({})".format(complex_dtype_to_name(dtype_z), xoy)
 
     return get_elwise_kernel(context,
-            "%(tp_z)s *z, %(tp_x)s *x, %(tp_y)s *y" % {
-                "tp_x": dtype_to_ctype(dtype_x),
-                "tp_y": dtype_to_ctype(dtype_y),
-                "tp_z": dtype_to_ctype(dtype_z),
-                },
+            "{tp_z} *z, {tp_x} *x, {tp_y} *y".format(
+                tp_x=dtype_to_ctype(dtype_x),
+                tp_y=dtype_to_ctype(dtype_y),
+                tp_z=dtype_to_ctype(dtype_z),
+                ),
             "z[i] = %s" % xoy,
             name="divide")
 
@@ -685,25 +683,25 @@ def get_rdivide_elwise_kernel(context, dtype_x, dtype_y, dtype_z):
 
     if z_is_complex and dtype_x != dtype_y:
         if x_is_complex and dtype_x != dtype_z:
-            x = "%s_cast(%s)" % (complex_dtype_to_name(dtype_z), x)
+            x = "{}_cast({})".format(complex_dtype_to_name(dtype_z), x)
         if y_is_complex and dtype_y != dtype_z:
-            y = "%s_cast(%s)" % (complex_dtype_to_name(dtype_z), y)
+            y = "{}_cast({})".format(complex_dtype_to_name(dtype_z), y)
 
     if x_is_complex and y_is_complex:
-        yox = "%s_divide(%s, %s)" % (complex_dtype_to_name(dtype_z), y, x)
+        yox = "{}_divide({}, {})".format(complex_dtype_to_name(dtype_z), y, x)
     elif not y_is_complex and x_is_complex:
-        yox = "%s_rdivide(%s, %s)" % (complex_dtype_to_name(dtype_z), y, x)
+        yox = "{}_rdivide({}, {})".format(complex_dtype_to_name(dtype_z), y, x)
     elif y_is_complex and not x_is_complex:
-        yox = "%s_divider(%s, %s)" % (complex_dtype_to_name(dtype_z), y, x)
+        yox = "{}_divider({}, {})".format(complex_dtype_to_name(dtype_z), y, x)
     else:
-        yox = "%s / %s" % (y, x)
+        yox = f"{y} / {x}"
 
     return get_elwise_kernel(context,
-            "%(tp_z)s *z, %(tp_x)s *x, %(tp_y)s y" % {
-                "tp_x": dtype_to_ctype(dtype_x),
-                "tp_y": dtype_to_ctype(dtype_y),
-                "tp_z": dtype_to_ctype(dtype_z),
-                },
+            "{tp_z} *z, {tp_x} *x, {tp_y} y".format(
+                tp_x=dtype_to_ctype(dtype_x),
+                tp_y=dtype_to_ctype(dtype_y),
+                tp_z=dtype_to_ctype(dtype_z),
+                ),
             "z[i] = %s" % yox,
             name="divide_r")
 
@@ -711,9 +709,9 @@ def get_rdivide_elwise_kernel(context, dtype_x, dtype_y, dtype_z):
 @context_dependent_memoize
 def get_fill_kernel(context, dtype):
     return get_elwise_kernel(context,
-            "%(tp)s *z, %(tp)s a" % {
-                "tp": dtype_to_ctype(dtype),
-                },
+            "{tp} *z, {tp} a".format(
+                tp=dtype_to_ctype(dtype),
+                ),
             "z[i] = a",
             preamble=dtype_to_c_struct(context.devices[0], dtype),
             name="fill")
@@ -722,9 +720,9 @@ def get_fill_kernel(context, dtype):
 @context_dependent_memoize
 def get_reverse_kernel(context, dtype):
     return get_elwise_kernel(context,
-            "%(tp)s *z, %(tp)s *y" % {
-                "tp": dtype_to_ctype(dtype),
-                },
+            "{tp} *z, {tp} *y".format(
+                tp=dtype_to_ctype(dtype),
+                ),
             "z[i] = y[n-1-i]",
             name="reverse")
 
@@ -770,23 +768,23 @@ def get_pow_kernel(context, dtype_x, dtype_y, dtype_z,
 
     if z_is_complex and dtype_x != dtype_y:
         if x_is_complex and dtype_x != dtype_z:
-            x = "%s_cast(%s)" % (complex_dtype_to_name(dtype_z), x)
+            x = "{}_cast({})".format(complex_dtype_to_name(dtype_z), x)
         if y_is_complex and dtype_y != dtype_z:
-            y = "%s_cast(%s)" % (complex_dtype_to_name(dtype_z), y)
+            y = "{}_cast({})".format(complex_dtype_to_name(dtype_z), y)
     elif dtype_x != dtype_y:
         if dtype_x != dtype_z:
-            x = "(%s) (%s)" % (dtype_to_ctype(dtype_z), x)
+            x = "({}) ({})".format(dtype_to_ctype(dtype_z), x)
         if dtype_y != dtype_z:
-            y = "(%s) (%s)" % (dtype_to_ctype(dtype_z), y)
+            y = "({}) ({})".format(dtype_to_ctype(dtype_z), y)
 
     if x_is_complex and y_is_complex:
-        result = "%s_pow(%s, %s)" % (complex_dtype_to_name(dtype_z), x, y)
+        result = "{}_pow({}, {})".format(complex_dtype_to_name(dtype_z), x, y)
     elif x_is_complex and not y_is_complex:
-        result = "%s_powr(%s, %s)" % (complex_dtype_to_name(dtype_z), x, y)
+        result = "{}_powr({}, {})".format(complex_dtype_to_name(dtype_z), x, y)
     elif not x_is_complex and y_is_complex:
-        result = "%s_rpow(%s, %s)" % (complex_dtype_to_name(dtype_z), x, y)
+        result = "{}_rpow({}, {})".format(complex_dtype_to_name(dtype_z), x, y)
     else:
-        result = "pow(%s, %s)" % (x, y)
+        result = f"pow({x}, {y})"
 
     return get_elwise_kernel(context,
             ("%(tp_z)s *z, " + x_ctype + ", "+y_ctype) % {
@@ -882,7 +880,7 @@ def get_binary_func_kernel(context, func_name, x_dtype, y_dtype, out_dtype,
 def get_float_binary_func_kernel(context, func_name, x_dtype, y_dtype,
                                  out_dtype, preamble="", name=None):
     if (np.array(0, x_dtype) * np.array(0, y_dtype)).itemsize > 4:
-        arg_type = 'double'
+        arg_type = "double"
         preamble = """
         #if __OPENCL_C_VERSION__ < 120
         #pragma OPENCL EXTENSION cl_khr_fp64: enable
@@ -890,13 +888,13 @@ def get_float_binary_func_kernel(context, func_name, x_dtype, y_dtype,
         #define PYOPENCL_DEFINE_CDOUBLE
         """ + preamble
     else:
-        arg_type = 'float'
+        arg_type = "float"
     return get_elwise_kernel(context, [
         VectorArg(out_dtype, "z", with_offset=True),
         VectorArg(x_dtype, "x", with_offset=True),
         VectorArg(y_dtype, "y", with_offset=True),
         ],
-        "z[i] = %s((%s)x[i], (%s)y[i])" % (func_name, arg_type, arg_type),
+        f"z[i] = {func_name}(({arg_type})x[i], ({arg_type})y[i])",
         name="%s_kernel" % func_name if name is None else name,
         preamble=preamble)
 
@@ -904,7 +902,7 @@ def get_float_binary_func_kernel(context, func_name, x_dtype, y_dtype,
 @context_dependent_memoize
 def get_fmod_kernel(context, out_dtype=np.float32, arg_dtype=np.float32,
                     mod_dtype=np.float32):
-    return get_float_binary_func_kernel(context, 'fmod', arg_dtype,
+    return get_float_binary_func_kernel(context, "fmod", arg_dtype,
                                         mod_dtype, out_dtype)
 
 
@@ -942,7 +940,7 @@ def get_frexp_kernel(context, sign_dtype=np.float32, exp_dtype=np.float32,
 def get_ldexp_kernel(context, out_dtype=np.float32, sig_dtype=np.float32,
                      expt_dtype=np.float32):
     return get_binary_func_kernel(
-        context, '_PYOCL_LDEXP', sig_dtype, expt_dtype, out_dtype,
+        context, "_PYOCL_LDEXP", sig_dtype, expt_dtype, out_dtype,
         preamble="#define _PYOCL_LDEXP(x, y) ldexp(x, (int)(y))",
         name="ldexp_kernel")
 
diff --git a/pyopencl/invoker.py b/pyopencl/invoker.py
index 57ab4b1f4ce29ebe9ddcd145e6efee8a3056038c..c996768d97d8f9a3e58a99e4839db5f37143128e 100644
--- a/pyopencl/invoker.py
+++ b/pyopencl/invoker.py
@@ -1,5 +1,3 @@
-from __future__ import division, absolute_import
-
 __copyright__ = """
 Copyright (C) 2017 Andreas Kloeckner
 """
@@ -32,22 +30,20 @@ import pyopencl._cl as _cl
 from pytools.persistent_dict import WriteOncePersistentDict
 from pyopencl.tools import _NumpyTypesKeyBuilder
 
-_PYPY = '__pypy__' in sys.builtin_module_names
-_CPY2 = not _PYPY and sys.version_info < (3,)
-_CPY26 = _CPY2 and sys.version_info < (2, 7)
+_PYPY = "__pypy__" in sys.builtin_module_names
 
 
 # {{{ arg packing helpers
 
 _size_t_char = ({
-    8: 'Q',
-    4: 'L',
-    2: 'H',
-    1: 'B',
+    8: "Q",
+    4: "L",
+    2: "H",
+    1: "B",
 })[_cl._sizeof_size_t()]
 _type_char_map = {
-    'n': _size_t_char.lower(),
-    'N': _size_t_char
+    "n": _size_t_char.lower(),
+    "N": _size_t_char
 }
 del _size_t_char
 
@@ -59,9 +55,9 @@ del _size_t_char
 def generate_buffer_arg_setter(gen, arg_idx, buf_var):
     from pytools.py_codegen import Indentation
 
-    if _CPY2 or _PYPY:
+    if _PYPY:
         # https://github.com/numpy/numpy/issues/5381
-        gen("if isinstance({buf_var}, np.generic):".format(buf_var=buf_var))
+        gen(f"if isinstance({buf_var}, np.generic):")
         with Indentation(gen):
             if _PYPY:
                 gen("{buf_var} = np.asarray({buf_var})".format(buf_var=buf_var))
@@ -109,9 +105,9 @@ def generate_generic_arg_handling_body(num_args):
         gen("pass")
 
     for i in range(num_args):
-        gen("# process argument {arg_idx}".format(arg_idx=i))
+        gen(f"# process argument {i}")
         gen("")
-        gen("current_arg = {arg_idx}".format(arg_idx=i))
+        gen(f"current_arg = {i}")
         generate_generic_arg_handler(gen, i, "arg%d" % i)
         gen("")
 
@@ -139,9 +135,9 @@ def generate_specific_arg_handling_body(function_name,
         gen("pass")
 
     for arg_idx, arg_dtype in enumerate(scalar_arg_dtypes):
-        gen("# process argument {arg_idx}".format(arg_idx=arg_idx))
+        gen(f"# process argument {arg_idx}")
         gen("")
-        gen("current_arg = {arg_idx}".format(arg_idx=arg_idx))
+        gen(f"current_arg = {arg_idx}")
         arg_var = "arg%d" % arg_idx
 
         if arg_dtype is None:
@@ -204,16 +200,6 @@ def generate_specific_arg_handling_body(function_name,
 
             fp_arg_count += 2
 
-        elif arg_dtype.char in "IL" and _CPY26:
-            # Prevent SystemError: ../Objects/longobject.c:336: bad
-            # argument to internal function
-
-            gen(
-                    "buf = pack('{arg_char}', long({arg_var}))"
-                    .format(arg_char=arg_dtype.char, arg_var=arg_var))
-            generate_bytes_arg_setter(gen, cl_arg_idx, "buf")
-            cl_arg_idx += 1
-
         else:
             if arg_dtype.kind == "f":
                 fp_arg_count += 1
diff --git a/pyopencl/ipython_ext.py b/pyopencl/ipython_ext.py
index ce80fc07a8b774996f0154f34fec24d2d2b98e1f..619ac5908b2bc1925ad302146b6b116e638b532e 100644
--- a/pyopencl/ipython_ext.py
+++ b/pyopencl/ipython_ext.py
@@ -1,15 +1,11 @@
-from __future__ import division
-from __future__ import absolute_import
-
 from IPython.core.magic import (magics_class, Magics, cell_magic, line_magic)
 
 import pyopencl as cl
 import sys
-import six
 
 
 def _try_to_utf8(text):
-    if isinstance(text, six.text_type):
+    if isinstance(text, str):
         return text.encode("utf8")
     return text
 
@@ -48,16 +44,16 @@ class PyOpenCLMagics(Magics):
     def cl_kernel(self, line, cell):
         kernel = cell
 
-        opts, args = self.parse_options(line, 'o:')
-        build_options = opts.get('o', '')
+        opts, args = self.parse_options(line, "o:")
+        build_options = opts.get("o", "")
 
         self._run_kernel(kernel, build_options)
 
     def _load_kernel_and_options(self, line):
-        opts, args = self.parse_options(line, 'o:f:')
+        opts, args = self.parse_options(line, "o:f:")
 
-        build_options = opts.get('o')
-        kernel = self.shell.find_user_code(opts.get('f') or args)
+        build_options = opts.get("o")
+        kernel = self.shell.find_user_code(opts.get("f") or args)
 
         return kernel, build_options
 
@@ -72,9 +68,9 @@ class PyOpenCLMagics(Magics):
         header = "%%cl_kernel"
 
         if build_options:
-            header = '%s -o "%s"' % (header, build_options)
+            header = f'{header} -o "{build_options}"'
 
-        content = "%s\n\n%s" % (header, kernel)
+        content = f"{header}\n\n{kernel}"
 
         self.shell.set_next_input(content)
 
diff --git a/pyopencl/reduction.py b/pyopencl/reduction.py
index 7c25f05b50e840b249caece15637610bca79f957..00995450831b101cefca040dfc756a4261e52fd7 100644
--- a/pyopencl/reduction.py
+++ b/pyopencl/reduction.py
@@ -1,8 +1,5 @@
 """Computation of reductions on vectors."""
 
-from __future__ import division
-from __future__ import absolute_import
-from six.moves import zip
 
 __copyright__ = "Copyright (C) 2010 Andreas Kloeckner"
 
@@ -537,22 +534,22 @@ def _get_dot_expr(dtype_out, dtype_a, dtype_b, conjugate_first,
     b = "b[%s]" % index_expr
 
     if a_is_complex and (dtype_a != dtype_out):
-        a = "%s_cast(%s)" % (complex_dtype_to_name(dtype_out), a)
+        a = "{}_cast({})".format(complex_dtype_to_name(dtype_out), a)
     if b_is_complex and (dtype_b != dtype_out):
-        b = "%s_cast(%s)" % (complex_dtype_to_name(dtype_out), b)
+        b = "{}_cast({})".format(complex_dtype_to_name(dtype_out), b)
 
     if a_is_complex and conjugate_first and a_is_complex:
-        a = "%s_conj(%s)" % (
+        a = "{}_conj({})".format(
                 complex_dtype_to_name(dtype_out), a)
 
     if a_is_complex and not b_is_complex:
-        map_expr = "%s_mulr(%s, %s)" % (complex_dtype_to_name(dtype_out), a, b)
+        map_expr = "{}_mulr({}, {})".format(complex_dtype_to_name(dtype_out), a, b)
     elif not a_is_complex and b_is_complex:
-        map_expr = "%s_rmul(%s, %s)" % (complex_dtype_to_name(dtype_out), a, b)
+        map_expr = "{}_rmul({}, {})".format(complex_dtype_to_name(dtype_out), a, b)
     elif a_is_complex and b_is_complex:
-        map_expr = "%s_mul(%s, %s)" % (complex_dtype_to_name(dtype_out), a, b)
+        map_expr = "{}_mul({}, {})".format(complex_dtype_to_name(dtype_out), a, b)
     else:
-        map_expr = "%s*%s" % (a, b)
+        map_expr = f"{a}*{b}"
 
     return map_expr, dtype_out, dtype_b
 
@@ -634,10 +631,10 @@ def get_minmax_kernel(ctx, what, dtype):
 
     return ReductionKernel(ctx, dtype,
             neutral=get_minmax_neutral(what, dtype),
-            reduce_expr="%(reduce_expr)s" % {"reduce_expr": reduce_expr},
-            arguments="const %(tp)s *in" % {
-                "tp": dtype_to_ctype(dtype),
-                }, preamble="#define MY_INFINITY (1./0)")
+            reduce_expr=f"{reduce_expr}",
+            arguments="const {tp} *in".format(
+                tp=dtype_to_ctype(dtype),
+                ), preamble="#define MY_INFINITY (1./0)")
 
 
 @context_dependent_memoize
@@ -651,7 +648,7 @@ def get_subset_minmax_kernel(ctx, what, dtype, dtype_subset):
 
     return ReductionKernel(ctx, dtype,
             neutral=get_minmax_neutral(what, dtype),
-            reduce_expr="%(reduce_expr)s" % {"reduce_expr": reduce_expr},
+            reduce_expr=f"{reduce_expr}",
             map_expr="in[lookup_tbl[i]]",
             arguments=(
                 "const %(tp_lut)s *lookup_tbl, "
diff --git a/pyopencl/scan.py b/pyopencl/scan.py
index 6e40c06c883619758b738069290710112b8ed055..24b759069d54303c2607a36d8c2013f71a192dee 100644
--- a/pyopencl/scan.py
+++ b/pyopencl/scan.py
@@ -1,6 +1,5 @@
 """Scan primitive."""
 
-from __future__ import division, absolute_import
 
 __copyright__ = """
 Copyright 2011-2012 Andreas Kloeckner
@@ -23,9 +22,6 @@ limitations under the License.
 Derived from code within the Thrust project, https://github.com/thrust/thrust/
 """
 
-import six
-from six.moves import range, zip
-
 import numpy as np
 
 import pyopencl as cl
@@ -939,7 +935,7 @@ class ScanPerformanceWarning(UserWarning):
     pass
 
 
-class _GenericScanKernelBase(object):
+class _GenericScanKernelBase:
     # {{{ constructor, argument processing
 
     def __init__(self, ctx, dtype,
@@ -1733,7 +1729,7 @@ class _LegacyScanKernelBase(GenericScanKernel):
         scan_ctype = dtype_to_ctype(dtype)
         GenericScanKernel.__init__(self,
                 ctx, dtype,
-                arguments="__global %s *input_ary, __global %s *output_ary" % (
+                arguments="__global {} *input_ary, __global {} *output_ary".format(
                     scan_ctype, scan_ctype),
                 input_expr="input_ary[i]",
                 scan_expr=scan_expr,
@@ -1752,7 +1748,7 @@ class _LegacyScanKernelBase(GenericScanKernel):
         if output_ary is None:
             output_ary = input_ary
 
-        if isinstance(output_ary, (str, six.text_type)) and output_ary == "new":
+        if isinstance(output_ary, (str, str)) and output_ary == "new":
             output_ary = cl.array.empty_like(input_ary, allocator=allocator)
 
         if input_ary.shape != output_ary.shape:
diff --git a/pyopencl/tools.py b/pyopencl/tools.py
index ae1609a5fd9ee1ecde74f826aac6f9c087884b56..5f5e7f675d48124a43a43fa82dd6519eed625d5b 100644
--- a/pyopencl/tools.py
+++ b/pyopencl/tools.py
@@ -1,6 +1,5 @@
 """Various helpful bits and pieces without much of a common theme."""
 
-from __future__ import division, absolute_import
 
 __copyright__ = "Copyright (C) 2010 Andreas Kloeckner"
 
@@ -28,8 +27,7 @@ OTHER DEALINGS IN THE SOFTWARE.
 """
 
 
-import six
-from six.moves import zip, intern
+from sys import intern
 
 # Do not add a pyopencl import here: This will add an import cycle.
 
@@ -219,7 +217,7 @@ def get_test_platforms_and_devices(plat_dev_string=None):
 
         found = False
         for obj in objs:
-            if identifier.lower() in (obj.name + ' ' + obj.vendor).lower():
+            if identifier.lower() in (obj.name + " " + obj.vendor).lower():
                 return obj
         if not found:
             raise RuntimeError("object '%s' not found" % identifier)
@@ -321,7 +319,7 @@ def pytest_generate_tests_for_pyopencl(metafunc):
 
 # {{{ C argument lists
 
-class Argument(object):
+class Argument:
     pass
 
 
@@ -331,7 +329,7 @@ class DtypedArgument(Argument):
         self.name = name
 
     def __repr__(self):
-        return "%s(%r, %s)" % (
+        return "{}({!r}, {})".format(
                 self.__class__.__name__,
                 self.name,
                 self.dtype)
@@ -345,17 +343,17 @@ class VectorArg(DtypedArgument):
     def declarator(self):
         if self.with_offset:
             # Two underscores -> less likelihood of a name clash.
-            return "__global %s *%s__base, long %s__offset" % (
+            return "__global {} *{}__base, long {}__offset".format(
                     dtype_to_ctype(self.dtype), self.name, self.name)
         else:
-            result = "__global %s *%s" % (dtype_to_ctype(self.dtype), self.name)
+            result = "__global {} *{}".format(dtype_to_ctype(self.dtype), self.name)
 
         return result
 
 
 class ScalarArg(DtypedArgument):
     def declarator(self):
-        return "%s %s" % (dtype_to_ctype(self.dtype), self.name)
+        return "{} {}".format(dtype_to_ctype(self.dtype), self.name)
 
 
 class OtherArg(Argument):
@@ -503,7 +501,7 @@ class _CDeclList:
             self.add_dtype(dtype.subdtype[0])
             return
 
-        for name, field_data in sorted(six.iteritems(dtype.fields)):
+        for name, field_data in sorted(dtype.fields.items()):
             field_dtype, offset = field_data[:2]
             self.add_dtype(field_dtype)
 
@@ -583,7 +581,7 @@ def match_dtype_to_c_struct(device, name, dtype, context=None):
 
     import pyopencl as cl
 
-    fields = sorted(six.iteritems(dtype.fields),
+    fields = sorted(dtype.fields.items(),
             key=lambda name_dtype_offset: name_dtype_offset[1][1])
 
     c_fields = []
@@ -600,13 +598,14 @@ def match_dtype_to_c_struct(device, name, dtype, context=None):
                     dims_str += "[%d]" % dim
             except TypeError:
                 dims_str = "[%d]" % array_dims
-            c_fields.append("  %s %s%s;" % (
+            c_fields.append("  {} {}{};".format(
                 dtype_to_ctype(array_dtype), field_name, dims_str)
             )
         else:
-            c_fields.append("  %s %s;" % (dtype_to_ctype(field_dtype), field_name))
+            c_fields.append(
+                    "  {} {};".format(dtype_to_ctype(field_dtype), field_name))
 
-    c_decl = "typedef struct {\n%s\n} %s;\n\n" % (
+    c_decl = "typedef struct {{\n{}\n}} {};\n\n".format(
             "\n".join(c_fields),
             name)
 
@@ -683,12 +682,12 @@ def match_dtype_to_c_struct(device, name, dtype, context=None):
 
     try:
         dtype_arg_dict = {
-            'names': [field_name
+            "names": [field_name
                       for field_name, (field_dtype, offset) in fields],
-            'formats': [field_dtype
+            "formats": [field_dtype
                         for field_name, (field_dtype, offset) in fields],
-            'offsets': [int(x) for x in offsets],
-            'itemsize': int(size_and_offsets[0]),
+            "offsets": [int(x) for x in offsets],
+            "itemsize": int(size_and_offsets[0]),
             }
         dtype = np.dtype(dtype_arg_dict)
         if dtype.itemsize != size_and_offsets[0]:
@@ -704,8 +703,8 @@ def match_dtype_to_c_struct(device, name, dtype, context=None):
             for offset, (field_name, (field_dtype, _)) in zip(offsets, fields):
                 if offset > total_size:
                     padding_count += 1
-                    yield ('__pycl_padding%d' % padding_count,
-                           'V%d' % offset - total_size)
+                    yield ("__pycl_padding%d" % padding_count,
+                           "V%d" % offset - total_size)
                 yield field_name, field_dtype
                 total_size = field_dtype.itemsize + offset
         dtype = np.dtype(list(calc_field_type()))
@@ -731,7 +730,7 @@ def dtype_to_c_struct(device, dtype):
     def dtypes_match():
         result = len(dtype.fields) == len(matched_dtype.fields)
 
-        for name, val in six.iteritems(dtype.fields):
+        for name, val in dtype.fields.items():
             result = result and matched_dtype.fields[name] == val
 
         return result
@@ -802,7 +801,7 @@ class _ScalarArgPlaceholder(_ArgumentPlaceholder):
     target_class = ScalarArg
 
 
-class _TemplateRenderer(object):
+class _TemplateRenderer:
     def __init__(self, template, type_aliases, var_values, context=None,
             options=[]):
         self.template = template
@@ -909,18 +908,18 @@ class _TemplateRenderer(object):
         if arguments is not None:
             cdl.visit_arguments(arguments)
 
-        for _, tv in sorted(six.iteritems(self.type_aliases)):
+        for _, tv in sorted(self.type_aliases.items()):
             cdl.add_dtype(tv)
 
         type_alias_decls = [
-                "typedef %s %s;" % (dtype_to_ctype(val), name)
-                for name, val in sorted(six.iteritems(self.type_aliases))
+                "typedef {} {};".format(dtype_to_ctype(val), name)
+                for name, val in sorted(self.type_aliases.items())
                 ]
 
         return cdl.get_declarations() + "\n" + "\n".join(type_alias_decls)
 
 
-class KernelTemplateBase(object):
+class KernelTemplateBase:
     def __init__(self, template_processor=None):
         self.template_processor = template_processor
 
@@ -963,7 +962,7 @@ class KernelTemplateBase(object):
     def build(self, context, *args, **kwargs):
         """Provide caching for an :meth:`build_inner`."""
 
-        cache_key = (context, args, tuple(sorted(six.iteritems(kwargs))))
+        cache_key = (context, args, tuple(sorted(kwargs.items())))
         try:
             return self.build_cache[cache_key]
         except KeyError:
@@ -1018,7 +1017,7 @@ def array_module(a):
 def is_spirv(s):
     spirv_magic = b"\x07\x23\x02\x03"
     return (
-            isinstance(s, six.binary_type)
+            isinstance(s, bytes)
             and (
                 s[:4] == spirv_magic
                 or s[:4] == spirv_magic[::-1]))
diff --git a/setup.cfg b/setup.cfg
index 2bc760d67cfc68d91478948399e51cf470abfe07..ad68ea236494ef28401610d829b2463349b45b36 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -2,3 +2,9 @@
 ignore = E126,E127,E128,E123,E226,E241,E242,E265,W503,E402
 max-line-length=85
 exclude=pyopencl/compyte/ndarray,pyopencl/compyte/array.py
+
+
+inline-quotes = "
+docstring-quotes = """
+multiline-quotes = """
+
diff --git a/setup.py b/setup.py
index 4beca19a8f0cd20fe685a89e2e085388d43e8018..2bb4252aaa42a3161bfd65c56ea0ad5dd930d4da 100644
--- a/setup.py
+++ b/setup.py
@@ -1,7 +1,5 @@
 #!/usr/bin/env python
-# -*- coding: utf-8 -*-
 
-from __future__ import absolute_import, print_function
 
 __copyright__ = """
 Copyright (C) 2009-15 Andreas Kloeckner
@@ -44,10 +42,10 @@ def get_config_schema():
             "-fvisibility=hidden"
             ]
 
-    if 'darwin' in sys.platform:
+    if "darwin" in sys.platform:
         import platform
         osx_ver, _, _ = platform.mac_ver()
-        osx_ver = '.'.join(osx_ver.split('.')[:2])
+        osx_ver = ".".join(osx_ver.split(".")[:2])
 
         sysroot_paths = [
                 "/Applications/Xcode.app/Contents/Developer/Platforms/"
@@ -57,14 +55,14 @@ def get_config_schema():
 
         default_libs = []
         default_cxxflags = default_cxxflags + [
-                '-stdlib=libc++', '-mmacosx-version-min=10.7',
-                '-arch', 'i386', '-arch', 'x86_64'
+                "-stdlib=libc++", "-mmacosx-version-min=10.7",
+                "-arch', 'i386", "-arch", "x86_64"
                 ]
 
         from os.path import isdir
         for srp in sysroot_paths:
             if isdir(srp):
-                default_cxxflags.extend(['-isysroot', srp])
+                default_cxxflags.extend(["-isysroot", srp])
                 break
 
         default_ldflags = default_cxxflags[:] + ["-Wl,-framework,OpenCL"]
@@ -151,7 +149,7 @@ def main():
     finally:
         version_file.close()
 
-    exec(compile(version_file_contents, "pyopencl/version.py", 'exec'), ver_dic)
+    exec(compile(version_file_contents, "pyopencl/version.py", "exec"), ver_dic)
 
     try:
         import mako  # noqa
@@ -194,27 +192,25 @@ def main():
             # metadata
             version=ver_dic["VERSION_TEXT"],
             description="Python wrapper for OpenCL",
-            long_description=open("README.rst", "rt").read(),
+            long_description=open("README.rst").read(),
             author="Andreas Kloeckner",
             author_email="inform@tiker.net",
             license="MIT",
             url="http://mathema.tician.de/software/pyopencl",
             classifiers=[
-                'Environment :: Console',
-                'Development Status :: 5 - Production/Stable',
-                'Intended Audience :: Developers',
-                'Intended Audience :: Other Audience',
-                'Intended Audience :: Science/Research',
-                'License :: OSI Approved :: MIT License',
-                'Natural Language :: English',
-                'Programming Language :: C++',
-                'Programming Language :: Python',
-                'Programming Language :: Python :: 3',
-                'Programming Language :: Python :: 3.2',
-                'Programming Language :: Python :: 3.3',
-                'Topic :: Scientific/Engineering',
-                'Topic :: Scientific/Engineering :: Mathematics',
-                'Topic :: Scientific/Engineering :: Physics',
+                "Environment :: Console",
+                "Development Status :: 5 - Production/Stable",
+                "Intended Audience :: Developers",
+                "Intended Audience :: Other Audience",
+                "Intended Audience :: Science/Research",
+                "License :: OSI Approved :: MIT License",
+                "Natural Language :: English",
+                "Programming Language :: C++",
+                "Programming Language :: Python",
+                "Programming Language :: Python :: 3",
+                "Topic :: Scientific/Engineering",
+                "Topic :: Scientific/Engineering :: Mathematics",
+                "Topic :: Scientific/Engineering :: Physics",
                 ],
 
             # build info
@@ -238,7 +234,7 @@ def main():
                     define_macros=list(conf["EXTRA_DEFINES"].items()),
                     extra_compile_args=conf["CXXFLAGS"],
                     extra_link_args=conf["LDFLAGS"],
-                    language='c++',
+                    language="c++",
                     ),
                 ],
 
@@ -253,12 +249,11 @@ def main():
                 "pytools>=2017.6",
                 "decorator>=3.2.0",
                 "appdirs>=1.4.0",
-                "six>=1.9.0",
                 # "Mako>=0.3.6",
                 ],
             extras_require={
-                'pocl':  ["pocl_binary_distribution>=1.2"],
-                'oclgrind':  ["oclgrind_binary_distribution>=18.3"],
+                "pocl":  ["pocl_binary_distribution>=1.2"],
+                "oclgrind":  ["oclgrind_binary_distribution>=18.3"],
             },
             include_package_data=True,
             package_data={
@@ -270,11 +265,11 @@ def main():
                         ]
                     },
 
-            cmdclass={'build_ext': PybindBuildExtCommand},
+            cmdclass={"build_ext": PybindBuildExtCommand},
             zip_safe=False)
 
 
-if __name__ == '__main__':
+if __name__ == "__main__":
     main()
 
 # vim: foldmethod=marker
diff --git a/test/test_algorithm.py b/test/test_algorithm.py
index 38a91cef7ee217d05f40ab699806296cd4056662..660c7dfc868f9ae8cf356b4199da68f385e5d8dd 100644
--- a/test/test_algorithm.py
+++ b/test/test_algorithm.py
@@ -1,6 +1,5 @@
 #! /usr/bin/env python
 
-from __future__ import division, with_statement, absolute_import, print_function
 
 __copyright__ = "Copyright (C) 2013 Andreas Kloeckner"
 
@@ -27,7 +26,6 @@ THE SOFTWARE.
 # avoid spurious: pytest.mark.parametrize is not callable
 # pylint: disable=not-callable
 
-from six.moves import range, zip
 import numpy as np
 import numpy.linalg as la
 import sys
@@ -78,7 +76,7 @@ def test_elwise_kernel_with_options(ctx_factory):
 
     in_gpu = clrand(queue, (50,), np.float32)
 
-    options = ['-D', 'ADD_ONE']
+    options = ["-D", "ADD_ONE"]
     add_one = ElementwiseKernel(
         context,
         "float* out, const float *in",
@@ -381,7 +379,7 @@ def test_dot(ctx_factory):
                 vdot_ab = np.vdot(a, b)
             except NotImplementedError:
                 import sys
-                is_pypy = '__pypy__' in sys.builtin_module_names
+                is_pypy = "__pypy__" in sys.builtin_module_names
                 if is_pypy:
                     print("PYPY: VDOT UNIMPLEMENTED")
                     continue
@@ -503,7 +501,7 @@ def summarize_error(obtained, desired, orig, thresh=1e-5):
             bad_count += 1
 
             if bad_count < bad_limit:
-                entries.append("%r (want: %r, got: %r, orig: %r)" % (
+                entries.append("{!r} (want: {!r}, got: {!r}, orig: {!r})".format(
                     obtained[i], desired[i], obtained[i], orig[i]))
         else:
             if bad_count:
@@ -852,7 +850,7 @@ def test_sort(ctx_factory, scan_kernel):
 
         numpy_elapsed = numpy_end-dev_end
         dev_elapsed = dev_end-dev_start
-        print("  dev: %.2f MKeys/s numpy: %.2f MKeys/s ratio: %.2fx" % (
+        print("  dev: {:.2f} MKeys/s numpy: {:.2f} MKeys/s ratio: {:.2f}x".format(
                 1e-6*n/dev_elapsed, 1e-6*n/numpy_elapsed, numpy_elapsed/dev_elapsed))
         assert (a_dev_sorted.get() == a_sorted).all()
 
@@ -1073,7 +1071,7 @@ def test_bitonic_sort(ctx_factory, size, dtype):
 @pytest.mark.bitonic
 def test_bitonic_argsort(ctx_factory, size, dtype):
     import sys
-    is_pypy = '__pypy__' in sys.builtin_module_names
+    is_pypy = "__pypy__" in sys.builtin_module_names
 
     if not size and is_pypy:
         # https://bitbucket.org/pypy/numpy/issues/53/specifying-strides-on-zero-sized-array
diff --git a/test/test_array.py b/test/test_array.py
index a9b0b6124d71e62833552fce1d9d0ea4b0fc5b4f..2cbef16c0d8bbd168ccb9a371f1904cb8cfc022a 100644
--- a/test/test_array.py
+++ b/test/test_array.py
@@ -1,5 +1,4 @@
 #! /usr/bin/env python
-from __future__ import division, with_statement, absolute_import, print_function
 
 __copyright__ = "Copyright (C) 2009 Andreas Kloeckner"
 
@@ -30,7 +29,6 @@ import numpy as np
 import numpy.linalg as la
 import sys
 
-from six.moves import range
 import pytest
 
 import pyopencl as cl
@@ -157,7 +155,7 @@ def test_mix_complex(ctx_factory):
                         # served a Python complex that is really a
                         # smaller numpy complex.
 
-                        print("HOST_DTYPE: %s DEV_DTYPE: %s" % (
+                        print("HOST_DTYPE: {} DEV_DTYPE: {}".format(
                                 host_result.dtype, dev_result.dtype))
 
                         dev_result = dev_result.astype(host_result.dtype)
@@ -533,8 +531,8 @@ def test_bitwise(ctx_factory):
 
         a = a_dev.get()
         b = b_dev.get()
-        s = int((clrand(queue, (), a=int32_min, b=1+int32_max, dtype=np.int64)
-                 .astype(b_dtype).get()))
+        s = int(clrand(queue, (), a=int32_min, b=1+int32_max, dtype=np.int64)
+                 .astype(b_dtype).get())
 
         import operator as o
 
@@ -715,7 +713,7 @@ def test_nan_arithmetic(ctx_factory):
         a = np.random.randn(*shape).astype(np.float32)
         from random import randrange
         for i in range(size // 10):
-            a[randrange(0, size)] = float('nan')
+            a[randrange(0, size)] = float("nan")
         return a
 
     size = 1 << 20
@@ -774,7 +772,7 @@ def test_diff(ctx_factory):
     a = a_dev.get()
 
     err = la.norm(
-            (cl.array.diff(a_dev).get() - np.diff(a)))
+            cl.array.diff(a_dev).get() - np.diff(a))
     assert err < 1e-4
 
 
@@ -1058,7 +1056,7 @@ def test_reshape(ctx_factory):
     # using -1 as unknown dimension
     assert a_dev.reshape(-1, 32).shape == (4, 32)
     assert a_dev.reshape((32, -1)).shape == (32, 4)
-    assert a_dev.reshape(((8, -1, 4))).shape == (8, 4, 4)
+    assert a_dev.reshape((8, -1, 4)).shape == (8, 4, 4)
 
     import pytest
     with pytest.raises(ValueError):
@@ -1224,7 +1222,7 @@ def test_get_async(ctx_factory):
     context = ctx_factory()
     queue = cl.CommandQueue(context)
 
-    a = np.random.rand(10**6).astype(np.dtype('float32'))
+    a = np.random.rand(10**6).astype(np.dtype("float32"))
     a_gpu = cl_array.to_device(queue, a)
     b = a + a**5 + 1
     b_gpu = a_gpu + a_gpu**5 + 1
@@ -1253,7 +1251,7 @@ def test_outoforderqueue_get(ctx_factory):
                properties=cl.command_queue_properties.OUT_OF_ORDER_EXEC_MODE_ENABLE)
     except Exception:
         pytest.skip("out-of-order queue not available")
-    a = np.random.rand(10**6).astype(np.dtype('float32'))
+    a = np.random.rand(10**6).astype(np.dtype("float32"))
     a_gpu = cl_array.to_device(queue, a)
     b_gpu = a_gpu + a_gpu**5 + 1
     b1 = b_gpu.get()  # testing that this waits for events
@@ -1268,7 +1266,7 @@ def test_outoforderqueue_copy(ctx_factory):
                properties=cl.command_queue_properties.OUT_OF_ORDER_EXEC_MODE_ENABLE)
     except Exception:
         pytest.skip("out-of-order queue not available")
-    a = np.random.rand(10**6).astype(np.dtype('float32'))
+    a = np.random.rand(10**6).astype(np.dtype("float32"))
     a_gpu = cl_array.to_device(queue, a)
     c_gpu = a_gpu**2 - 7
     b_gpu = c_gpu.copy()  # testing that this waits for and creates events
@@ -1286,8 +1284,8 @@ def test_outoforderqueue_indexing(ctx_factory):
                properties=cl.command_queue_properties.OUT_OF_ORDER_EXEC_MODE_ENABLE)
     except Exception:
         pytest.skip("out-of-order queue not available")
-    a = np.random.rand(10**6).astype(np.dtype('float32'))
-    i = (8e5 + 1e5 * np.random.rand(10**5)).astype(np.dtype('int32'))
+    a = np.random.rand(10**6).astype(np.dtype("float32"))
+    i = (8e5 + 1e5 * np.random.rand(10**5)).astype(np.dtype("int32"))
     a_gpu = cl_array.to_device(queue, a)
     i_gpu = cl_array.to_device(queue, i)
     c_gpu = (a_gpu**2)[i_gpu - 10000]
@@ -1310,7 +1308,7 @@ def test_outoforderqueue_reductions(ctx_factory):
     except Exception:
         pytest.skip("out-of-order queue not available")
     # 0/1 values to avoid accumulated rounding error
-    a = (np.random.rand(10**6) > 0.5).astype(np.dtype('float32'))
+    a = (np.random.rand(10**6) > 0.5).astype(np.dtype("float32"))
     a[800000] = 10  # all<5 looks true until near the end
     a_gpu = cl_array.to_device(queue, a)
     b1 = cl_array.sum(a_gpu).get()
diff --git a/test/test_arrays_in_structs.py b/test/test_arrays_in_structs.py
index 221742eb8de1930c60fbdc8608aa2b4f1585989a..625b6105448080bf361aa02d66950dba28207fe9 100644
--- a/test/test_arrays_in_structs.py
+++ b/test/test_arrays_in_structs.py
@@ -1,5 +1,3 @@
-from __future__ import division, with_statement, absolute_import, print_function
-
 __copyright__ = "Copyright (C) 2020 Sotiris Niarchos"
 
 __license__ = """
diff --git a/test/test_clmath.py b/test/test_clmath.py
index 74b40208ebdb81f2d97c8990e00944de0510d2ce..409875f8a1c1ff842982dbf4247637f99a7b6cd5 100644
--- a/test/test_clmath.py
+++ b/test/test_clmath.py
@@ -1,5 +1,3 @@
-from __future__ import division, print_function, absolute_import
-
 __copyright__ = "Copyright (C) 2009 Andreas Kloeckner"
 
 __license__ = """
@@ -26,7 +24,6 @@ THE SOFTWARE.
 # avoid spurious: Module 'scipy.special' has no 'jn' member; maybe 'jv'
 # pylint: disable=not-callable,no-member
 
-from six.moves import range
 
 import math
 import numpy as np
@@ -345,7 +342,7 @@ def test_complex_bessel(ctx_factory, ref_src):
     if ref_src == "pyfmmlib":
         pyfmmlib = pytest.importorskip("pyfmmlib")
 
-        jv_ref = np.zeros(len(z), 'complex')
+        jv_ref = np.zeros(len(z), "complex")
 
         vin = v+1
 
@@ -457,7 +454,7 @@ def test_outoforderqueue_clmath(ctx_factory):
                properties=cl.command_queue_properties.OUT_OF_ORDER_EXEC_MODE_ENABLE)
     except Exception:
         pytest.skip("out-of-order queue not available")
-    a = np.random.rand(10**6).astype(np.dtype('float32'))
+    a = np.random.rand(10**6).astype(np.dtype("float32"))
     a_gpu = cl_array.to_device(queue, a)
     # testing that clmath functions wait for and create events
     b_gpu = clmath.fabs(clmath.sin(a_gpu * 5))
diff --git a/test/test_clrandom.py b/test/test_clrandom.py
index 53de4821c6ee401341dd7e72328ec07d93d10b61..1ce479b2b1f18b099c9457d43a03c4b2327ea77f 100644
--- a/test/test_clrandom.py
+++ b/test/test_clrandom.py
@@ -1,5 +1,3 @@
-from __future__ import division, print_function, absolute_import
-
 __copyright__ = "Copyright (C) 2018 Matt Wala"
 
 __license__ = """
diff --git a/test/test_enqueue_copy.py b/test/test_enqueue_copy.py
index bfbf4f16edd757c3bf8e8bc59fb2d2ed311c0d29..162e5292af57f4586269a0ce2a72b2a5ecf2faf5 100644
--- a/test/test_enqueue_copy.py
+++ b/test/test_enqueue_copy.py
@@ -1,5 +1,4 @@
 #! /usr/bin/env python
-from __future__ import division, with_statement, absolute_import, print_function
 
 __copyright__ = "Copyright (C) 2016 Shane J. Latham"
 
diff --git a/test/test_wrapper.py b/test/test_wrapper.py
index afc239d9796e6cebbd8b3352013ea8bc2f80cb3b..45e2d7476bf2939bcdee66c487c412667183e831 100644
--- a/test/test_wrapper.py
+++ b/test/test_wrapper.py
@@ -1,5 +1,3 @@
-from __future__ import division, absolute_import, print_function
-
 __copyright__ = "Copyright (C) 2009 Andreas Kloeckner"
 
 __license__ = """
@@ -25,7 +23,6 @@ THE SOFTWARE.
 # avoid spurious: pytest.mark.parametrize is not callable
 # pylint: disable=not-callable
 
-from six.moves import range
 
 import numpy as np
 import numpy.linalg as la
@@ -49,7 +46,7 @@ else:
     faulthandler.enable()
 
 
-def _skip_if_pocl(plat, up_to_version, msg='unsupported by pocl'):
+def _skip_if_pocl(plat, up_to_version, msg="unsupported by pocl"):
     if plat.vendor == "The pocl project":
         if up_to_version is None or get_pocl_version(plat) <= up_to_version:
             pytest.skip(msg)
@@ -385,7 +382,7 @@ def test_image_2d(ctx_factory):
     if "Intel" in device.vendor and "31360.31426" in device.version:
         from pytest import skip
         skip("images crashy on %s" % device)
-    _skip_if_pocl(device.platform, None, 'pocl does not support CL_ADDRESS_CLAMP')
+    _skip_if_pocl(device.platform, None, "pocl does not support CL_ADDRESS_CLAMP")
 
     prg = cl.Program(context, """
         __kernel void copy_image(
@@ -457,7 +454,7 @@ def test_image_3d(ctx_factory):
     if device.platform.vendor == "Intel(R) Corporation":
         from pytest import skip
         skip("images crashy on %s" % device)
-    _skip_if_pocl(device.platform, None, 'pocl does not support CL_ADDRESS_CLAMP')
+    _skip_if_pocl(device.platform, None, "pocl does not support CL_ADDRESS_CLAMP")
 
     prg = cl.Program(context, """
         __kernel void copy_image_plane(
@@ -680,7 +677,7 @@ def test_enqueue_barrier_marker(ctx_factory):
     ctx = ctx_factory()
     # Still relevant on pocl 1.0RC1.
     _skip_if_pocl(
-            ctx.devices[0].platform, (1, 0), 'pocl crashes on enqueue_barrier')
+            ctx.devices[0].platform, (1, 0), "pocl crashes on enqueue_barrier")
 
     queue = cl.CommandQueue(ctx)
 
@@ -707,7 +704,7 @@ def test_unload_compiler(platform):
             or cl.get_cl_header_version() < (1, 2)):
         from pytest import skip
         skip("clUnloadPlatformCompiler is only available in OpenCL 1.2")
-    _skip_if_pocl(platform, (0, 13), 'pocl does not support unloading compiler')
+    _skip_if_pocl(platform, (0, 13), "pocl does not support unloading compiler")
     if platform.vendor == "Intel(R) Corporation":
         from pytest import skip
         skip("Intel proprietary driver does not support unloading compiler")
@@ -734,7 +731,7 @@ def test_platform_get_devices(ctx_factory):
         devs = platform.get_devices(dev_type)
         if dev_type in (cl.device_type.DEFAULT,
                         cl.device_type.ALL,
-                        getattr(cl.device_type, 'CUSTOM', None)):
+                        getattr(cl.device_type, "CUSTOM", None)):
             continue
         for dev in devs:
             assert dev.type & dev_type == dev_type
@@ -767,22 +764,22 @@ def test_user_event(ctx_factory):
     Thread(target=event_waiter1, args=(evt, 1)).start()
     sleep(.05)
     if status.get(1, False):
-        raise RuntimeError('UserEvent triggered before set_status')
+        raise RuntimeError("UserEvent triggered before set_status")
     evt.set_status(cl.command_execution_status.COMPLETE)
     sleep(.05)
     if not status.get(1, False):
-        raise RuntimeError('UserEvent.wait timeout')
+        raise RuntimeError("UserEvent.wait timeout")
     assert evt.command_execution_status == cl.command_execution_status.COMPLETE
 
     evt = cl.UserEvent(ctx)
     Thread(target=event_waiter2, args=(evt, 2)).start()
     sleep(.05)
     if status.get(2, False):
-        raise RuntimeError('UserEvent triggered before set_status')
+        raise RuntimeError("UserEvent triggered before set_status")
     evt.set_status(cl.command_execution_status.COMPLETE)
     sleep(.05)
     if not status.get(2, False):
-        raise RuntimeError('cl.wait_for_events timeout on UserEvent')
+        raise RuntimeError("cl.wait_for_events timeout on UserEvent")
     assert evt.command_execution_status == cl.command_execution_status.COMPLETE
 
 
@@ -798,8 +795,8 @@ def test_buffer_get_host_array(ctx_factory):
     buf = cl.Buffer(ctx, mf.READ_WRITE | mf.USE_HOST_PTR, hostbuf=host_buf)
     host_buf2 = buf.get_host_array(25, np.float32)
     assert (host_buf == host_buf2).all()
-    assert (host_buf.__array_interface__['data'][0]
-            == host_buf.__array_interface__['data'][0])
+    assert (host_buf.__array_interface__["data"][0]
+            == host_buf.__array_interface__["data"][0])
     assert host_buf2.base is buf
 
     buf = cl.Buffer(ctx, mf.READ_WRITE | mf.ALLOC_HOST_PTR, size=100)
@@ -979,7 +976,7 @@ def test_spirv(ctx_factory):
 
 def test_coarse_grain_svm(ctx_factory):
     import sys
-    is_pypy = '__pypy__' in sys.builtin_module_names
+    is_pypy = "__pypy__" in sys.builtin_module_names
 
     ctx = ctx_factory()
     queue = cl.CommandQueue(ctx)
@@ -1036,7 +1033,7 @@ def test_coarse_grain_svm(ctx_factory):
 
 def test_fine_grain_svm(ctx_factory):
     import sys
-    is_pypy = '__pypy__' in sys.builtin_module_names
+    is_pypy = "__pypy__" in sys.builtin_module_names
 
     ctx = ctx_factory()
     queue = cl.CommandQueue(ctx)