diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index e5b382577c00b0879dabc9a09b5fe74db391a188..96fb4a49bf500e4e70494a6c5839385222c4c835 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -90,6 +90,21 @@ jobs: build_py_project_in_conda_env build_docs + examples: + name: Examples + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + - name: "Main Script" + run: | + CONDA_ENVIRONMENT=.test-conda-env-py3.yml + curl -L -O -k https://gitlab.tiker.net/inducer/ci-support/raw/master/ci-support.sh + . ci-support.sh + EXTRA_INSTALL="pillow cgen mako imageio" + build_py_project_in_conda_env + (cd examples; rm -f gl_*) + run_examples --no-require-main + wheels: name: Build and upload wheels runs-on: ubuntu-latest diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 431495595c2d0ca4ba65380e5f38b8b6e579dd77..fa83e362b87c93eb1692d1db82cccc48462e62e7 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -222,6 +222,20 @@ Pylint: except: - tags +Examples: + script: | + curl -L -O -k https://gitlab.tiker.net/inducer/ci-support/raw/master/ci-support.sh + . ci-support.sh + EXTRA_INSTALL="pillow cgen mako imageio" + build_py_project_in_venv + (cd examples; rm -f gl_*) + run_examples --no-require-main + except: + - tags + tags: + - python3 + - pocl + Documentation: script: - EXTRA_INSTALL="pybind11 numpy mako" diff --git a/examples/demo_mandelbrot.py b/examples/demo_mandelbrot.py index 6ca51347d53b239cebeda3063179c3d4db82d814..1c04da6124d3cbb276e990369c381879b6df307b 100644 --- a/examples/demo_mandelbrot.py +++ b/examples/demo_mandelbrot.py @@ -23,6 +23,8 @@ import numpy as np import pyopencl as cl +from PIL import Image + # You can choose a calculation routine below (calc_fractal), uncomment # one of the three lines to test the three variations # Speed notes are listed in the same place @@ -42,7 +44,9 @@ def calc_fractal_opencl(q, maxiter): q_opencl = cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=q) output_opencl = cl.Buffer(ctx, mf.WRITE_ONLY, output.nbytes) - prg = cl.Program(ctx, """ + prg = cl.Program( + ctx, + """ #pragma OPENCL EXTENSION cl_khr_byte_addressable_store : enable __kernel void mandelbrot(__global float2 *q, __global ushort *output, ushort const maxiter) @@ -62,10 +66,12 @@ def calc_fractal_opencl(q, maxiter): output[gid] = curiter; } } - """).build() + """, + ).build() - prg.mandelbrot(queue, output.shape, None, q_opencl, - output_opencl, np.uint16(maxiter)) + prg.mandelbrot( + queue, output.shape, None, q_opencl, output_opencl, np.uint16(maxiter) + ) cl.enqueue_copy(queue, output, output_opencl).wait() @@ -77,10 +83,15 @@ def calc_fractal_serial(q, maxiter): # note that, unlike the other two implementations, # the number of iterations per point is NOT constant z = np.zeros(q.shape, complex) - output = np.resize(np.array(0,), q.shape) + output = np.resize( + np.array( + 0, + ), + q.shape, + ) for i in range(len(q)): for iter in range(maxiter): - z[i] = z[i]*z[i] + q[i] + z[i] = z[i] * z[i] + q[i] if abs(z[i]) > 2.0: output[i] = iter break @@ -90,67 +101,81 @@ def calc_fractal_serial(q, maxiter): def calc_fractal_numpy(q, maxiter): # calculate z using numpy, this is the original # routine from vegaseat's URL - output = np.resize(np.array(0,), q.shape) + output = np.resize( + np.array( + 0, + ), + q.shape, + ) z = np.zeros(q.shape, np.complex64) for it in range(maxiter): - z = z*z + q + z = z * z + q done = np.greater(abs(z), 2.0) - q = np.where(done, 0+0j, q) - z = np.where(done, 0+0j, z) + q = np.where(done, 0 + 0j, q) + z = np.where(done, 0 + 0j, z) output = np.where(done, it, output) return output + # choose your calculation routine here by uncommenting one of the options calc_fractal = calc_fractal_opencl # calc_fractal = calc_fractal_serial # calc_fractal = calc_fractal_numpy -if __name__ == '__main__': - import tkinter as tk - from PIL import Image, ImageTk - - class Mandelbrot: - def __init__(self): - # create window - self.root = tk.Tk() - self.root.title("Mandelbrot Set") - self.create_image() - self.create_label() - # start event loop - self.root.mainloop() - - def draw(self, x1, x2, y1, y2, maxiter=30): - # draw the Mandelbrot set, from numpy example - xx = np.arange(x1, x2, (x2-x1)/w) - yy = np.arange(y2, y1, (y1-y2)/h) * 1j - q = np.ravel(xx+yy[:, np.newaxis]).astype(np.complex64) - - start_main = time.time() - output = calc_fractal(q, maxiter) - end_main = time.time() - - secs = end_main - start_main - print("Main took", secs) - - self.mandel = (output.reshape((h, w)) / - float(output.max()) * 255.).astype(np.uint8) - - def create_image(self): - """" - create the image from the draw() string - """ - # you can experiment with these x and y ranges - self.draw(-2.13, 0.77, -1.3, 1.3) - self.im = Image.fromarray(self.mandel) - self.im.putpalette([i for rgb in ((j, 0, 0) for j in range(255)) - for i in rgb]) - - def create_label(self): - # put the image on a label widget - self.image = ImageTk.PhotoImage(self.im) - self.label = tk.Label(self.root, image=self.image) - self.label.pack() - - # test the class + +class Mandelbrot: + def draw(self, x1, x2, y1, y2, maxiter=30): + # draw the Mandelbrot set, from numpy example + xx = np.arange(x1, x2, (x2 - x1) / w) + yy = np.arange(y2, y1, (y1 - y2) / h) * 1j + q = np.ravel(xx + yy[:, np.newaxis]).astype(np.complex64) + + start_main = time.time() + output = calc_fractal(q, maxiter) + end_main = time.time() + + secs = end_main - start_main + print("Main took", secs) + + self.mandel = (output.reshape((h, w)) / float(output.max()) * 255.0).astype( + np.uint8 + ) + + def create_image(self): + """ " + create the image from the draw() string + """ + # you can experiment with these x and y ranges + self.draw(-2.13, 0.77, -1.3, 1.3) + self.im = Image.fromarray(self.mandel) + self.im.putpalette([i for rgb in ((j, 0, 0) for j in range(255)) + for i in rgb]) + + def create_label(self): + # put the image on a label widget + self.image = ImageTk.PhotoImage(self.im) + self.label = tk.Label(self.root, image=self.image) + self.label.pack() + + def run_tk(self): + self.root = tk.Tk() + self.root.title("Mandelbrot Set") + self.create_image() + self.create_label() + # start event loop + self.root.mainloop() + + +if __name__ == "__main__": test = Mandelbrot() + try: + import tkinter as tk + except ModuleNotFoundError: + test.create_image() + else: + from PIL import ImageTk + try: + test.run_tk() + except tk.TclError: + test.create_image() diff --git a/examples/demo_meta_codepy.py b/examples/demo_meta_codepy.py index c080109b9dcfe45c16525db2eaa7709f9250b3a9..2ba293c5dfc3783f449b8bc6e0b060a90a4d0c3e 100644 --- a/examples/demo_meta_codepy.py +++ b/examples/demo_meta_codepy.py @@ -19,10 +19,10 @@ a_buf = cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=a) b_buf = cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=b) c_buf = cl.Buffer(ctx, mf.WRITE_ONLY, b.nbytes) -from codepy.cgen import FunctionBody, \ +from cgen import FunctionBody, \ FunctionDeclaration, Typedef, POD, Value, \ Pointer, Module, Block, Initializer, Assign, Const -from codepy.cgen.opencl import CLKernel, CLGlobal, \ +from cgen.opencl import CLKernel, CLGlobal, \ CLRequiredWorkGroupSize mod = Module([ @@ -33,14 +33,14 @@ mod = Module([ arg_decls=[CLGlobal(Pointer(Const(POD(dtype, name)))) for name in ["tgt", "op1", "op2"]]))), Block([ - Initializer(POD(numpy.int32, "idx"), + Initializer(POD(numpy.int32, "idx"), "get_local_id(0) + %d * get_group_id(0)" % (local_size*thread_strides)) ]+[ Assign( "tgt[idx+%d]" % (o*local_size), "op1[idx+%d] + op2[idx+%d]" % ( - o*local_size, + o*local_size, o*local_size)) for o in range(thread_strides)]))]) @@ -50,7 +50,7 @@ knl(queue, (local_size*macroblock_count,), (local_size,), c_buf, a_buf, b_buf) c = numpy.empty_like(a) -cl.enqueue_read_buffer(queue, c_buf, c).wait() +cl.enqueue_copy(queue, c, c_buf).wait() assert la.norm(c-(a+b)) == 0 diff --git a/examples/demo_meta_template.py b/examples/demo_meta_template.py index fc64934385b58c7ac6a2d5b72a5b4fb1327de688..a39e954221b94cd44876e3af42a3d0feca129849 100644 --- a/examples/demo_meta_template.py +++ b/examples/demo_meta_template.py @@ -23,8 +23,8 @@ from mako.template import Template tpl = Template(""" __kernel void add( - __global ${ type_name } *tgt, - __global const ${ type_name } *op1, + __global ${ type_name } *tgt, + __global const ${ type_name } *op1, __global const ${ type_name } *op2) { int idx = get_local_id(0) @@ -33,13 +33,13 @@ tpl = Template(""" % for i in range(thread_strides): <% offset = i*local_size %> - tgt[idx + ${ offset }] = - op1[idx + ${ offset }] + tgt[idx + ${ offset }] = + op1[idx + ${ offset }] + op2[idx + ${ offset } ]; % endfor }""") -rendered_tpl = tpl.render(type_name="float", +rendered_tpl = tpl.render(type_name="float", local_size=local_size, thread_strides=thread_strides) knl = cl.Program(ctx, str(rendered_tpl)).build().add @@ -48,6 +48,6 @@ knl(queue, (local_size*macroblock_count,), (local_size,), c_buf, a_buf, b_buf) c = numpy.empty_like(a) -cl.enqueue_read_buffer(queue, c_buf, c).wait() +cl.enqueue_copy(queue, c, c_buf).wait() assert la.norm(c-(a+b)) == 0 diff --git a/examples/gl_particle_animation.py b/examples/gl_particle_animation.py index 1d838a2a4a0884dc53f7d24e8319336c5b7ca3ee..c8ac9c20a461c4e307497a4430f4a832dead4f84 100644 --- a/examples/gl_particle_animation.py +++ b/examples/gl_particle_animation.py @@ -1,25 +1,27 @@ # Visualization of particles with gravity # Source: http://enja.org/2010/08/27/adventures-in-opencl-part-2-particles-with-opengl/ -import pyopencl as cl # OpenCL - GPU computing interface +import pyopencl as cl # OpenCL - GPU computing interface + mf = cl.mem_flags from pyopencl.tools import get_gl_sharing_context_properties -from OpenGL.GL import * # OpenGL - GPU rendering interface -from OpenGL.GLU import * # OpenGL tools (mipmaps, NURBS, perspective projection, shapes) -from OpenGL.GLUT import * # OpenGL tool to make a visualization window -from OpenGL.arrays import vbo -import numpy # Number tools -import sys # System tools (path, modules, maxint) +from OpenGL.GL import * # OpenGL - GPU rendering interface +from OpenGL.GLU import * # OpenGL tools (mipmaps, NURBS, perspective projection, shapes) +from OpenGL.GLUT import * # OpenGL tool to make a visualization window +from OpenGL.arrays import vbo +import numpy # Number tools +import sys # System tools (path, modules, maxint) width = 800 height = 600 num_particles = 100000 -time_step = .005 +time_step = 0.005 mouse_down = False -mouse_old = {'x': 0., 'y': 0.} -rotate = {'x': 0., 'y': 0., 'z': 0.} -translate = {'x': 0., 'y': 0., 'z': 0.} -initial_translate = {'x': 0., 'y': 0., 'z': -2.5} +mouse_old = {"x": 0.0, "y": 0.0} +rotate = {"x": 0.0, "y": 0.0, "z": 0.0} +translate = {"x": 0.0, "y": 0.0, "z": 0.0} +initial_translate = {"x": 0.0, "y": 0.0, "z": -2.5} + def glut_window(): glutInit(sys.argv) @@ -37,60 +39,79 @@ def glut_window(): glViewport(0, 0, width, height) glMatrixMode(GL_PROJECTION) glLoadIdentity() - gluPerspective(60., width / float(height), .1, 1000.) + gluPerspective(60.0, width / float(height), 0.1, 1000.0) + + return window - return(window) def initial_buffers(num_particles): np_position = numpy.ndarray((num_particles, 4), dtype=numpy.float32) np_color = numpy.ndarray((num_particles, 4), dtype=numpy.float32) np_velocity = numpy.ndarray((num_particles, 4), dtype=numpy.float32) - np_position[:,0] = numpy.sin(numpy.arange(0., num_particles) * 2.001 * numpy.pi / num_particles) - np_position[:,0] *= numpy.random.random_sample((num_particles,)) / 3. + .2 - np_position[:,1] = numpy.cos(numpy.arange(0., num_particles) * 2.001 * numpy.pi / num_particles) - np_position[:,1] *= numpy.random.random_sample((num_particles,)) / 3. + .2 - np_position[:,2] = 0. - np_position[:,3] = 1. - - np_color[:,:] = [1.,1.,1.,1.] # White particles - - np_velocity[:,0] = np_position[:,0] * 2. - np_velocity[:,1] = np_position[:,1] * 2. - np_velocity[:,2] = 3. - np_velocity[:,3] = numpy.random.random_sample((num_particles, )) - - gl_position = vbo.VBO(data=np_position, usage=GL_DYNAMIC_DRAW, target=GL_ARRAY_BUFFER) + np_position[:, 0] = numpy.sin( + numpy.arange(0.0, num_particles) * 2.001 * numpy.pi / num_particles + ) + np_position[:, 0] *= numpy.random.random_sample((num_particles,)) / 3.0 + 0.2 + np_position[:, 1] = numpy.cos( + numpy.arange(0.0, num_particles) * 2.001 * numpy.pi / num_particles + ) + np_position[:, 1] *= numpy.random.random_sample((num_particles,)) / 3.0 + 0.2 + np_position[:, 2] = 0.0 + np_position[:, 3] = 1.0 + + np_color[:, :] = [1.0, 1.0, 1.0, 1.0] # White particles + + np_velocity[:, 0] = np_position[:, 0] * 2.0 + np_velocity[:, 1] = np_position[:, 1] * 2.0 + np_velocity[:, 2] = 3.0 + np_velocity[:, 3] = numpy.random.random_sample((num_particles,)) + + gl_position = vbo.VBO( + data=np_position, usage=GL_DYNAMIC_DRAW, target=GL_ARRAY_BUFFER + ) gl_position.bind() gl_color = vbo.VBO(data=np_color, usage=GL_DYNAMIC_DRAW, target=GL_ARRAY_BUFFER) gl_color.bind() return (np_position, np_velocity, gl_position, gl_color) + def on_timer(t): glutTimerFunc(t, on_timer, t) glutPostRedisplay() + def on_key(*args): - if args[0] == '\033' or args[0] == 'q': + if args[0] == "\033" or args[0] == "q": sys.exit() + def on_click(button, state, x, y): - mouse_old['x'] = x - mouse_old['y'] = y + mouse_old["x"] = x + mouse_old["y"] = y + def on_mouse_move(x, y): - rotate['x'] += (y - mouse_old['y']) * .2 - rotate['y'] += (x - mouse_old['x']) * .2 + rotate["x"] += (y - mouse_old["y"]) * 0.2 + rotate["y"] += (x - mouse_old["x"]) * 0.2 + + mouse_old["x"] = x + mouse_old["y"] = y - mouse_old['x'] = x - mouse_old['y'] = y def on_display(): - """Render the particles""" + """Render the particles""" # Update or particle positions by calling the OpenCL kernel cl.enqueue_acquire_gl_objects(queue, [cl_gl_position, cl_gl_color]) - kernelargs = (cl_gl_position, cl_gl_color, cl_velocity, cl_start_position, cl_start_velocity, numpy.float32(time_step)) + kernelargs = ( + cl_gl_position, + cl_gl_color, + cl_velocity, + cl_start_position, + cl_start_velocity, + numpy.float32(time_step), + ) program.particle_fountain(queue, (num_particles,), None, *(kernelargs)) cl.enqueue_release_gl_objects(queue, [cl_gl_position, cl_gl_color]) queue.finish() @@ -101,11 +122,11 @@ def on_display(): glLoadIdentity() # Handle mouse transformations - glTranslatef(initial_translate['x'], initial_translate['y'], initial_translate['z']) - glRotatef(rotate['x'], 1, 0, 0) - glRotatef(rotate['y'], 0, 1, 0) #we switched around the axis so make this rotate_z - glTranslatef(translate['x'], translate['y'], translate['z']) - + glTranslatef(initial_translate["x"], initial_translate["y"], initial_translate["z"]) + glRotatef(rotate["x"], 1, 0, 0) + glRotatef(rotate["y"], 0, 1, 0) # we switched around the axis so make this rotate_z + glTranslatef(translate["x"], translate["y"], translate["z"]) + # Render the particles glEnable(GL_POINT_SMOOTH) glPointSize(2) @@ -130,17 +151,25 @@ def on_display(): glutSwapBuffers() + window = glut_window() (np_position, np_velocity, gl_position, gl_color) = initial_buffers(num_particles) platform = cl.get_platforms()[0] -context = cl.Context(properties=[(cl.context_properties.PLATFORM, platform)] + get_gl_sharing_context_properties()) +context = cl.Context( + properties=[(cl.context_properties.PLATFORM, platform)] + + get_gl_sharing_context_properties() +) queue = cl.CommandQueue(context) cl_velocity = cl.Buffer(context, mf.COPY_HOST_PTR, hostbuf=np_velocity) -cl_start_position = cl.Buffer(context, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=np_position) -cl_start_velocity = cl.Buffer(context, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=np_velocity) +cl_start_position = cl.Buffer( + context, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=np_position +) +cl_start_velocity = cl.Buffer( + context, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=np_velocity +) cl_gl_position = cl.GLBuffer(context, mf.READ_WRITE, int(gl_position)) cl_gl_color = cl.GLBuffer(context, mf.READ_WRITE, int(gl_color)) diff --git a/examples/median-filter.py b/examples/median-filter.py index 010e2851d7ee9567732ac45b4a46d08d2d8fb212..7f787500ccf82a77b5961413f86e16dbf3cfe8a9 100644 --- a/examples/median-filter.py +++ b/examples/median-filter.py @@ -1,25 +1,14 @@ import pyopencl as cl import numpy as np -from scipy.misc import imread, imsave +from imageio import imread, imsave #Read in image -img = imread('noisyImage.jpg', flatten=True).astype(np.float32) +img = imread('noisyImage.jpg').astype(np.float32) +print(img.shape) +img = np.mean(img, axis=2) +print(img.shape) -# Get platforms, both CPU and GPU -plat = cl.get_platforms() -CPU = plat[0].get_devices() -try: - GPU = plat[1].get_devices() -except IndexError: - GPU = "none" - -#Create context for GPU/CPU -if GPU!= "none": - ctx = cl.Context(GPU) -else: - ctx = cl.Context(CPU) - -# Create queue for each kernel execution +ctx = cl.create_some_context() queue = cl.CommandQueue(ctx) mf = cl.mem_flags @@ -97,4 +86,4 @@ result = np.empty_like(img) cl.enqueue_copy(queue, result, result_g) # Show the blurred image -imsave('medianFilter-OpenCL.jpg',result) \ No newline at end of file +imsave('medianFilter-OpenCL.jpg', result) diff --git a/examples/narray.py b/examples/narray.py index 40ba945042b8d6337d7d4139deb1991d20532d81..924c0d69cd89754574b68939c403c92822c5aa07 100644 --- a/examples/narray.py +++ b/examples/narray.py @@ -29,7 +29,7 @@ except: raise prg.demo(queue, (500,), None, demo_buf) -cl.enqueue_read_buffer(queue, demo_buf, demo_r).wait() +cl.enqueue_copy(queue, demo_r, demo_buf).wait() for res in demo_r: print(res) diff --git a/examples/noisyImage.jpg b/examples/noisyImage.jpg new file mode 100644 index 0000000000000000000000000000000000000000..64db427319e4f2e4ce20d76f44cec3cca51a9697 Binary files /dev/null and b/examples/noisyImage.jpg differ diff --git a/examples/transpose.py b/examples/transpose.py index 9b07e2b0566be8f0c02677a9c8cfb53448654a0e..6b06a98802eda2e26f4ad3ffb86cd7c761abd87c 100644 --- a/examples/transpose.py +++ b/examples/transpose.py @@ -102,7 +102,7 @@ def transpose_using_cl(ctx, queue, cpu_src, cls): w, h = cpu_src.shape result = numpy.empty((h, w), dtype=cpu_src.dtype) - cl.enqueue_read_buffer(queue, a_t_buf, result).wait() + cl.enqueue_copy(queue, result, a_t_buf).wait() a_buf.release() a_t_buf.release() @@ -144,7 +144,7 @@ def benchmark_transpose(): for dev in ctx.devices: assert dev.local_mem_size > 0 - queue = cl.CommandQueue(ctx, + queue = cl.CommandQueue(ctx, properties=cl.command_queue_properties.PROFILING_ENABLE) sizes = [int(((2**i) // 32) * 32) @@ -186,27 +186,27 @@ def benchmark_transpose(): a_buf.release() a_t_buf.release() - from matplotlib.pyplot import clf, plot, title, xlabel, ylabel, \ - savefig, legend, grid - for i in range(len(methods)): - clf() - for j in range(i+1): - method = methods[j] - name = method.__name__.replace("Transpose", "") - plot(sizes, numpy.array(mem_bandwidths[method])/1e9, "o-", label=name) + try: + from matplotlib.pyplot import clf, plot, title, xlabel, ylabel, \ + savefig, legend, grid + except ModuleNotFoundError: + pass + else: + for i in range(len(methods)): + clf() + for j in range(i+1): + method = methods[j] + name = method.__name__.replace("Transpose", "") + plot(sizes, numpy.array(mem_bandwidths[method])/1e9, "o-", label=name) - xlabel("Matrix width/height $N$") - ylabel("Memory Bandwidth [GB/s]") - legend(loc="best") - grid() + xlabel("Matrix width/height $N$") + ylabel("Memory Bandwidth [GB/s]") + legend(loc="best") + grid() - savefig("transpose-benchmark-%d.pdf" % i) + savefig("transpose-benchmark-%d.pdf" % i) - - - - -#check_transpose() +check_transpose() benchmark_transpose() diff --git a/examples/print-binary.py b/experiments/print-binary.py similarity index 100% rename from examples/print-binary.py rename to experiments/print-binary.py