diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index e5b382577c00b0879dabc9a09b5fe74db391a188..96fb4a49bf500e4e70494a6c5839385222c4c835 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -90,6 +90,21 @@ jobs:
                 build_py_project_in_conda_env
                 build_docs
 
+    examples:
+        name: Examples
+        runs-on: ubuntu-latest
+        steps:
+        -   uses: actions/checkout@v2
+        -   name: "Main Script"
+            run: |
+                CONDA_ENVIRONMENT=.test-conda-env-py3.yml
+                curl -L -O -k https://gitlab.tiker.net/inducer/ci-support/raw/master/ci-support.sh
+                . ci-support.sh
+                EXTRA_INSTALL="pillow cgen mako imageio"
+                build_py_project_in_conda_env
+                (cd examples; rm -f gl_*)
+                run_examples --no-require-main
+
     wheels:
         name: Build and upload wheels
         runs-on: ubuntu-latest
diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index 431495595c2d0ca4ba65380e5f38b8b6e579dd77..fa83e362b87c93eb1692d1db82cccc48462e62e7 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -222,6 +222,20 @@ Pylint:
   except:
   - tags
 
+Examples:
+  script: |
+    curl -L -O -k https://gitlab.tiker.net/inducer/ci-support/raw/master/ci-support.sh
+    . ci-support.sh
+    EXTRA_INSTALL="pillow cgen mako imageio"
+    build_py_project_in_venv
+    (cd examples; rm -f gl_*)
+    run_examples --no-require-main
+  except:
+  - tags
+  tags:
+  - python3
+  - pocl
+
 Documentation:
   script:
   - EXTRA_INSTALL="pybind11 numpy mako"
diff --git a/examples/demo_mandelbrot.py b/examples/demo_mandelbrot.py
index 6ca51347d53b239cebeda3063179c3d4db82d814..1c04da6124d3cbb276e990369c381879b6df307b 100644
--- a/examples/demo_mandelbrot.py
+++ b/examples/demo_mandelbrot.py
@@ -23,6 +23,8 @@ import numpy as np
 
 import pyopencl as cl
 
+from PIL import Image
+
 # You can choose a calculation routine below (calc_fractal), uncomment
 # one of the three lines to test the three variations
 # Speed notes are listed in the same place
@@ -42,7 +44,9 @@ def calc_fractal_opencl(q, maxiter):
     q_opencl = cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=q)
     output_opencl = cl.Buffer(ctx, mf.WRITE_ONLY, output.nbytes)
 
-    prg = cl.Program(ctx, """
+    prg = cl.Program(
+        ctx,
+        """
     #pragma OPENCL EXTENSION cl_khr_byte_addressable_store : enable
     __kernel void mandelbrot(__global float2 *q,
                      __global ushort *output, ushort const maxiter)
@@ -62,10 +66,12 @@ def calc_fractal_opencl(q, maxiter):
                  output[gid] = curiter;
         }
     }
-    """).build()
+    """,
+    ).build()
 
-    prg.mandelbrot(queue, output.shape, None, q_opencl,
-                   output_opencl, np.uint16(maxiter))
+    prg.mandelbrot(
+        queue, output.shape, None, q_opencl, output_opencl, np.uint16(maxiter)
+    )
 
     cl.enqueue_copy(queue, output, output_opencl).wait()
 
@@ -77,10 +83,15 @@ def calc_fractal_serial(q, maxiter):
     # note that, unlike the other two implementations,
     # the number of iterations per point is NOT constant
     z = np.zeros(q.shape, complex)
-    output = np.resize(np.array(0,), q.shape)
+    output = np.resize(
+        np.array(
+            0,
+        ),
+        q.shape,
+    )
     for i in range(len(q)):
         for iter in range(maxiter):
-            z[i] = z[i]*z[i] + q[i]
+            z[i] = z[i] * z[i] + q[i]
             if abs(z[i]) > 2.0:
                 output[i] = iter
                 break
@@ -90,67 +101,81 @@ def calc_fractal_serial(q, maxiter):
 def calc_fractal_numpy(q, maxiter):
     # calculate z using numpy, this is the original
     # routine from vegaseat's URL
-    output = np.resize(np.array(0,), q.shape)
+    output = np.resize(
+        np.array(
+            0,
+        ),
+        q.shape,
+    )
     z = np.zeros(q.shape, np.complex64)
 
     for it in range(maxiter):
-        z = z*z + q
+        z = z * z + q
         done = np.greater(abs(z), 2.0)
-        q = np.where(done, 0+0j, q)
-        z = np.where(done, 0+0j, z)
+        q = np.where(done, 0 + 0j, q)
+        z = np.where(done, 0 + 0j, z)
         output = np.where(done, it, output)
     return output
 
+
 # choose your calculation routine here by uncommenting one of the options
 calc_fractal = calc_fractal_opencl
 # calc_fractal = calc_fractal_serial
 # calc_fractal = calc_fractal_numpy
 
-if __name__ == '__main__':
-    import tkinter as tk
-    from PIL import Image, ImageTk
-
-    class Mandelbrot:
-        def __init__(self):
-            # create window
-            self.root = tk.Tk()
-            self.root.title("Mandelbrot Set")
-            self.create_image()
-            self.create_label()
-            # start event loop
-            self.root.mainloop()
-
-        def draw(self, x1, x2, y1, y2, maxiter=30):
-            # draw the Mandelbrot set, from numpy example
-            xx = np.arange(x1, x2, (x2-x1)/w)
-            yy = np.arange(y2, y1, (y1-y2)/h) * 1j
-            q = np.ravel(xx+yy[:, np.newaxis]).astype(np.complex64)
-
-            start_main = time.time()
-            output = calc_fractal(q, maxiter)
-            end_main = time.time()
-
-            secs = end_main - start_main
-            print("Main took", secs)
-
-            self.mandel = (output.reshape((h, w)) /
-                           float(output.max()) * 255.).astype(np.uint8)
-
-        def create_image(self):
-            """"
-            create the image from the draw() string
-            """
-            # you can experiment with these x and y ranges
-            self.draw(-2.13, 0.77, -1.3, 1.3)
-            self.im = Image.fromarray(self.mandel)
-            self.im.putpalette([i for rgb in ((j, 0, 0) for j in range(255))
-                                for i in rgb])
-
-        def create_label(self):
-            # put the image on a label widget
-            self.image = ImageTk.PhotoImage(self.im)
-            self.label = tk.Label(self.root, image=self.image)
-            self.label.pack()
-
-    # test the class
+
+class Mandelbrot:
+    def draw(self, x1, x2, y1, y2, maxiter=30):
+        # draw the Mandelbrot set, from numpy example
+        xx = np.arange(x1, x2, (x2 - x1) / w)
+        yy = np.arange(y2, y1, (y1 - y2) / h) * 1j
+        q = np.ravel(xx + yy[:, np.newaxis]).astype(np.complex64)
+
+        start_main = time.time()
+        output = calc_fractal(q, maxiter)
+        end_main = time.time()
+
+        secs = end_main - start_main
+        print("Main took", secs)
+
+        self.mandel = (output.reshape((h, w)) / float(output.max()) * 255.0).astype(
+            np.uint8
+        )
+
+    def create_image(self):
+        """ "
+        create the image from the draw() string
+        """
+        # you can experiment with these x and y ranges
+        self.draw(-2.13, 0.77, -1.3, 1.3)
+        self.im = Image.fromarray(self.mandel)
+        self.im.putpalette([i for rgb in ((j, 0, 0) for j in range(255))
+            for i in rgb])
+
+    def create_label(self):
+        # put the image on a label widget
+        self.image = ImageTk.PhotoImage(self.im)
+        self.label = tk.Label(self.root, image=self.image)
+        self.label.pack()
+
+    def run_tk(self):
+        self.root = tk.Tk()
+        self.root.title("Mandelbrot Set")
+        self.create_image()
+        self.create_label()
+        # start event loop
+        self.root.mainloop()
+
+
+if __name__ == "__main__":
     test = Mandelbrot()
+    try:
+        import tkinter as tk
+    except ModuleNotFoundError:
+        test.create_image()
+    else:
+        from PIL import ImageTk
+        try:
+            test.run_tk()
+        except tk.TclError:
+            test.create_image()
diff --git a/examples/demo_meta_codepy.py b/examples/demo_meta_codepy.py
index c080109b9dcfe45c16525db2eaa7709f9250b3a9..2ba293c5dfc3783f449b8bc6e0b060a90a4d0c3e 100644
--- a/examples/demo_meta_codepy.py
+++ b/examples/demo_meta_codepy.py
@@ -19,10 +19,10 @@ a_buf = cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=a)
 b_buf = cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=b)
 c_buf = cl.Buffer(ctx, mf.WRITE_ONLY, b.nbytes)
 
-from codepy.cgen import FunctionBody, \
+from cgen import FunctionBody, \
         FunctionDeclaration, Typedef, POD, Value, \
         Pointer, Module, Block, Initializer, Assign, Const
-from codepy.cgen.opencl import CLKernel, CLGlobal, \
+from cgen.opencl import CLKernel, CLGlobal, \
         CLRequiredWorkGroupSize
 
 mod = Module([
@@ -33,14 +33,14 @@ mod = Module([
             arg_decls=[CLGlobal(Pointer(Const(POD(dtype, name))))
                 for name in ["tgt", "op1", "op2"]]))),
         Block([
-            Initializer(POD(numpy.int32, "idx"), 
+            Initializer(POD(numpy.int32, "idx"),
                 "get_local_id(0) + %d * get_group_id(0)"
                 % (local_size*thread_strides))
             ]+[
             Assign(
                 "tgt[idx+%d]" % (o*local_size),
                 "op1[idx+%d] + op2[idx+%d]" % (
-                    o*local_size, 
+                    o*local_size,
                     o*local_size))
             for o in range(thread_strides)]))])
 
@@ -50,7 +50,7 @@ knl(queue, (local_size*macroblock_count,), (local_size,),
         c_buf, a_buf, b_buf)
 
 c = numpy.empty_like(a)
-cl.enqueue_read_buffer(queue, c_buf, c).wait()
+cl.enqueue_copy(queue, c, c_buf).wait()
 
 assert la.norm(c-(a+b)) == 0
 
diff --git a/examples/demo_meta_template.py b/examples/demo_meta_template.py
index fc64934385b58c7ac6a2d5b72a5b4fb1327de688..a39e954221b94cd44876e3af42a3d0feca129849 100644
--- a/examples/demo_meta_template.py
+++ b/examples/demo_meta_template.py
@@ -23,8 +23,8 @@ from mako.template import Template
 
 tpl = Template("""
     __kernel void add(
-            __global ${ type_name } *tgt, 
-            __global const ${ type_name } *op1, 
+            __global ${ type_name } *tgt,
+            __global const ${ type_name } *op1,
             __global const ${ type_name } *op2)
     {
       int idx = get_local_id(0)
@@ -33,13 +33,13 @@ tpl = Template("""
 
       % for i in range(thread_strides):
           <% offset = i*local_size %>
-          tgt[idx + ${ offset }] = 
-            op1[idx + ${ offset }] 
+          tgt[idx + ${ offset }] =
+            op1[idx + ${ offset }]
             + op2[idx + ${ offset } ];
       % endfor
     }""")
 
-rendered_tpl = tpl.render(type_name="float", 
+rendered_tpl = tpl.render(type_name="float",
     local_size=local_size, thread_strides=thread_strides)
 
 knl = cl.Program(ctx, str(rendered_tpl)).build().add
@@ -48,6 +48,6 @@ knl(queue, (local_size*macroblock_count,), (local_size,),
         c_buf, a_buf, b_buf)
 
 c = numpy.empty_like(a)
-cl.enqueue_read_buffer(queue, c_buf, c).wait()
+cl.enqueue_copy(queue, c, c_buf).wait()
 
 assert la.norm(c-(a+b)) == 0
diff --git a/examples/gl_particle_animation.py b/examples/gl_particle_animation.py
index 1d838a2a4a0884dc53f7d24e8319336c5b7ca3ee..c8ac9c20a461c4e307497a4430f4a832dead4f84 100644
--- a/examples/gl_particle_animation.py
+++ b/examples/gl_particle_animation.py
@@ -1,25 +1,27 @@
 # Visualization of particles with gravity
 # Source: http://enja.org/2010/08/27/adventures-in-opencl-part-2-particles-with-opengl/
 
-import pyopencl as cl # OpenCL - GPU computing interface
+import pyopencl as cl  # OpenCL - GPU computing interface
+
 mf = cl.mem_flags
 from pyopencl.tools import get_gl_sharing_context_properties
-from OpenGL.GL import * # OpenGL - GPU rendering interface
-from OpenGL.GLU import * # OpenGL tools (mipmaps, NURBS, perspective projection, shapes)
-from OpenGL.GLUT import * # OpenGL tool to make a visualization window
-from OpenGL.arrays import vbo 
-import numpy # Number tools
-import sys # System tools (path, modules, maxint)
+from OpenGL.GL import *  # OpenGL - GPU rendering interface
+from OpenGL.GLU import *  # OpenGL tools (mipmaps, NURBS, perspective projection, shapes)
+from OpenGL.GLUT import *  # OpenGL tool to make a visualization window
+from OpenGL.arrays import vbo
+import numpy  # Number tools
+import sys  # System tools (path, modules, maxint)
 
 width = 800
 height = 600
 num_particles = 100000
-time_step = .005
+time_step = 0.005
 mouse_down = False
-mouse_old = {'x': 0., 'y': 0.}
-rotate = {'x': 0., 'y': 0., 'z': 0.}
-translate = {'x': 0., 'y': 0., 'z': 0.}
-initial_translate = {'x': 0., 'y': 0., 'z': -2.5}
+mouse_old = {"x": 0.0, "y": 0.0}
+rotate = {"x": 0.0, "y": 0.0, "z": 0.0}
+translate = {"x": 0.0, "y": 0.0, "z": 0.0}
+initial_translate = {"x": 0.0, "y": 0.0, "z": -2.5}
+
 
 def glut_window():
     glutInit(sys.argv)
@@ -37,60 +39,79 @@ def glut_window():
     glViewport(0, 0, width, height)
     glMatrixMode(GL_PROJECTION)
     glLoadIdentity()
-    gluPerspective(60., width / float(height), .1, 1000.)
+    gluPerspective(60.0, width / float(height), 0.1, 1000.0)
+
+    return window
 
-    return(window)
 
 def initial_buffers(num_particles):
     np_position = numpy.ndarray((num_particles, 4), dtype=numpy.float32)
     np_color = numpy.ndarray((num_particles, 4), dtype=numpy.float32)
     np_velocity = numpy.ndarray((num_particles, 4), dtype=numpy.float32)
 
-    np_position[:,0] = numpy.sin(numpy.arange(0., num_particles) * 2.001 * numpy.pi / num_particles) 
-    np_position[:,0] *= numpy.random.random_sample((num_particles,)) / 3. + .2
-    np_position[:,1] = numpy.cos(numpy.arange(0., num_particles) * 2.001 * numpy.pi / num_particles) 
-    np_position[:,1] *= numpy.random.random_sample((num_particles,)) / 3. + .2
-    np_position[:,2] = 0.
-    np_position[:,3] = 1.
-
-    np_color[:,:] = [1.,1.,1.,1.] # White particles
-
-    np_velocity[:,0] = np_position[:,0] * 2.
-    np_velocity[:,1] = np_position[:,1] * 2.
-    np_velocity[:,2] = 3.
-    np_velocity[:,3] = numpy.random.random_sample((num_particles, ))
-    
-    gl_position = vbo.VBO(data=np_position, usage=GL_DYNAMIC_DRAW, target=GL_ARRAY_BUFFER)
+    np_position[:, 0] = numpy.sin(
+        numpy.arange(0.0, num_particles) * 2.001 * numpy.pi / num_particles
+    )
+    np_position[:, 0] *= numpy.random.random_sample((num_particles,)) / 3.0 + 0.2
+    np_position[:, 1] = numpy.cos(
+        numpy.arange(0.0, num_particles) * 2.001 * numpy.pi / num_particles
+    )
+    np_position[:, 1] *= numpy.random.random_sample((num_particles,)) / 3.0 + 0.2
+    np_position[:, 2] = 0.0
+    np_position[:, 3] = 1.0
+
+    np_color[:, :] = [1.0, 1.0, 1.0, 1.0]  # White particles
+
+    np_velocity[:, 0] = np_position[:, 0] * 2.0
+    np_velocity[:, 1] = np_position[:, 1] * 2.0
+    np_velocity[:, 2] = 3.0
+    np_velocity[:, 3] = numpy.random.random_sample((num_particles,))
+
+    gl_position = vbo.VBO(
+        data=np_position, usage=GL_DYNAMIC_DRAW, target=GL_ARRAY_BUFFER
+    )
     gl_position.bind()
     gl_color = vbo.VBO(data=np_color, usage=GL_DYNAMIC_DRAW, target=GL_ARRAY_BUFFER)
     gl_color.bind()
 
     return (np_position, np_velocity, gl_position, gl_color)
 
+
 def on_timer(t):
     glutTimerFunc(t, on_timer, t)
     glutPostRedisplay()
 
+
 def on_key(*args):
-    if args[0] == '\033' or args[0] == 'q':
+    if args[0] == "\033" or args[0] == "q":
         sys.exit()
 
+
 def on_click(button, state, x, y):
-    mouse_old['x'] = x
-    mouse_old['y'] = y
+    mouse_old["x"] = x
+    mouse_old["y"] = y
+
 
 def on_mouse_move(x, y):
-    rotate['x'] += (y - mouse_old['y']) * .2
-    rotate['y'] += (x - mouse_old['x']) * .2
+    rotate["x"] += (y - mouse_old["y"]) * 0.2
+    rotate["y"] += (x - mouse_old["x"]) * 0.2
+
+    mouse_old["x"] = x
+    mouse_old["y"] = y
 
-    mouse_old['x'] = x
-    mouse_old['y'] = y
 
 def on_display():
-    """Render the particles"""        
+    """Render the particles"""
     # Update or particle positions by calling the OpenCL kernel
     cl.enqueue_acquire_gl_objects(queue, [cl_gl_position, cl_gl_color])
-    kernelargs = (cl_gl_position, cl_gl_color, cl_velocity, cl_start_position, cl_start_velocity, numpy.float32(time_step))
+    kernelargs = (
+        cl_gl_position,
+        cl_gl_color,
+        cl_velocity,
+        cl_start_position,
+        cl_start_velocity,
+        numpy.float32(time_step),
+    )
     program.particle_fountain(queue, (num_particles,), None, *(kernelargs))
     cl.enqueue_release_gl_objects(queue, [cl_gl_position, cl_gl_color])
     queue.finish()
@@ -101,11 +122,11 @@ def on_display():
     glLoadIdentity()
 
     # Handle mouse transformations
-    glTranslatef(initial_translate['x'], initial_translate['y'], initial_translate['z'])
-    glRotatef(rotate['x'], 1, 0, 0)
-    glRotatef(rotate['y'], 0, 1, 0) #we switched around the axis so make this rotate_z
-    glTranslatef(translate['x'], translate['y'], translate['z'])
-    
+    glTranslatef(initial_translate["x"], initial_translate["y"], initial_translate["z"])
+    glRotatef(rotate["x"], 1, 0, 0)
+    glRotatef(rotate["y"], 0, 1, 0)  # we switched around the axis so make this rotate_z
+    glTranslatef(translate["x"], translate["y"], translate["z"])
+
     # Render the particles
     glEnable(GL_POINT_SMOOTH)
     glPointSize(2)
@@ -130,17 +151,25 @@ def on_display():
 
     glutSwapBuffers()
 
+
 window = glut_window()
 
 (np_position, np_velocity, gl_position, gl_color) = initial_buffers(num_particles)
 
 platform = cl.get_platforms()[0]
-context = cl.Context(properties=[(cl.context_properties.PLATFORM, platform)] + get_gl_sharing_context_properties())  
+context = cl.Context(
+    properties=[(cl.context_properties.PLATFORM, platform)]
+    + get_gl_sharing_context_properties()
+)
 queue = cl.CommandQueue(context)
 
 cl_velocity = cl.Buffer(context, mf.COPY_HOST_PTR, hostbuf=np_velocity)
-cl_start_position = cl.Buffer(context, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=np_position)
-cl_start_velocity = cl.Buffer(context, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=np_velocity)
+cl_start_position = cl.Buffer(
+    context, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=np_position
+)
+cl_start_velocity = cl.Buffer(
+    context, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=np_velocity
+)
 
 cl_gl_position = cl.GLBuffer(context, mf.READ_WRITE, int(gl_position))
 cl_gl_color = cl.GLBuffer(context, mf.READ_WRITE, int(gl_color))
diff --git a/examples/median-filter.py b/examples/median-filter.py
index 010e2851d7ee9567732ac45b4a46d08d2d8fb212..7f787500ccf82a77b5961413f86e16dbf3cfe8a9 100644
--- a/examples/median-filter.py
+++ b/examples/median-filter.py
@@ -1,25 +1,14 @@
 import pyopencl as cl
 import numpy as np
-from scipy.misc import imread, imsave
+from imageio import imread, imsave
 
 #Read in image
-img = imread('noisyImage.jpg', flatten=True).astype(np.float32)
+img = imread('noisyImage.jpg').astype(np.float32)
+print(img.shape)
+img = np.mean(img, axis=2)
+print(img.shape)
 
-# Get platforms, both CPU and GPU
-plat = cl.get_platforms()
-CPU = plat[0].get_devices()
-try:
-    GPU = plat[1].get_devices()
-except IndexError:
-    GPU = "none"
-
-#Create context for GPU/CPU
-if GPU!= "none":
-    ctx = cl.Context(GPU)
-else:
-    ctx = cl.Context(CPU)
-
-# Create queue for each kernel execution
+ctx = cl.create_some_context()
 queue = cl.CommandQueue(ctx)
 
 mf = cl.mem_flags
@@ -97,4 +86,4 @@ result = np.empty_like(img)
 cl.enqueue_copy(queue, result, result_g)
 
 # Show the blurred image
-imsave('medianFilter-OpenCL.jpg',result)
\ No newline at end of file
+imsave('medianFilter-OpenCL.jpg', result)
diff --git a/examples/narray.py b/examples/narray.py
index 40ba945042b8d6337d7d4139deb1991d20532d81..924c0d69cd89754574b68939c403c92822c5aa07 100644
--- a/examples/narray.py
+++ b/examples/narray.py
@@ -29,7 +29,7 @@ except:
     raise
 
 prg.demo(queue, (500,), None, demo_buf)
-cl.enqueue_read_buffer(queue, demo_buf, demo_r).wait()
+cl.enqueue_copy(queue, demo_r, demo_buf).wait()
 
 for res in demo_r:
     print(res)
diff --git a/examples/noisyImage.jpg b/examples/noisyImage.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..64db427319e4f2e4ce20d76f44cec3cca51a9697
Binary files /dev/null and b/examples/noisyImage.jpg differ
diff --git a/examples/transpose.py b/examples/transpose.py
index 9b07e2b0566be8f0c02677a9c8cfb53448654a0e..6b06a98802eda2e26f4ad3ffb86cd7c761abd87c 100644
--- a/examples/transpose.py
+++ b/examples/transpose.py
@@ -102,7 +102,7 @@ def transpose_using_cl(ctx, queue, cpu_src, cls):
 
     w, h = cpu_src.shape
     result = numpy.empty((h, w), dtype=cpu_src.dtype)
-    cl.enqueue_read_buffer(queue, a_t_buf, result).wait()
+    cl.enqueue_copy(queue, result, a_t_buf).wait()
 
     a_buf.release()
     a_t_buf.release()
@@ -144,7 +144,7 @@ def benchmark_transpose():
     for dev in ctx.devices:
         assert dev.local_mem_size > 0
 
-    queue = cl.CommandQueue(ctx, 
+    queue = cl.CommandQueue(ctx,
             properties=cl.command_queue_properties.PROFILING_ENABLE)
 
     sizes = [int(((2**i) // 32) * 32)
@@ -186,27 +186,27 @@ def benchmark_transpose():
             a_buf.release()
             a_t_buf.release()
 
-    from matplotlib.pyplot import clf, plot, title, xlabel, ylabel, \
-            savefig, legend, grid
-    for i in range(len(methods)):
-        clf()
-        for j in range(i+1):
-            method = methods[j]
-            name = method.__name__.replace("Transpose", "")
-            plot(sizes, numpy.array(mem_bandwidths[method])/1e9, "o-", label=name)
+    try:
+        from matplotlib.pyplot import clf, plot, title, xlabel, ylabel, \
+                savefig, legend, grid
+    except ModuleNotFoundError:
+        pass
+    else:
+        for i in range(len(methods)):
+            clf()
+            for j in range(i+1):
+                method = methods[j]
+                name = method.__name__.replace("Transpose", "")
+                plot(sizes, numpy.array(mem_bandwidths[method])/1e9, "o-", label=name)
 
-        xlabel("Matrix width/height $N$")
-        ylabel("Memory Bandwidth [GB/s]")
-        legend(loc="best")
-        grid()
+            xlabel("Matrix width/height $N$")
+            ylabel("Memory Bandwidth [GB/s]")
+            legend(loc="best")
+            grid()
 
-        savefig("transpose-benchmark-%d.pdf" % i)
+            savefig("transpose-benchmark-%d.pdf" % i)
 
 
-
-
-
-
-#check_transpose()
+check_transpose()
 benchmark_transpose()
 
diff --git a/examples/print-binary.py b/experiments/print-binary.py
similarity index 100%
rename from examples/print-binary.py
rename to experiments/print-binary.py