diff --git a/README.rst b/README.rst index 33e52774ebf6512f7d179bc3475857addb5e93aa..2f457c62a17cbac443fb4f15452529a31f4fde57 100644 --- a/README.rst +++ b/README.rst @@ -40,5 +40,5 @@ Places on the web related to PyOpenCL: :target: http://pypi.python.org/pypi/pyopencl * `C. Gohlke's Windows binaries <http://www.lfd.uci.edu/~gohlke/pythonlibs/#pyopencl>`_ (download Windows binaries) * `Github <http://github.com/pyopencl/pyopencl>`_ (get latest source code, file bugs) -* `Documentation <http://documen.tician.de>`_ (read how things work) +* `Documentation <http://documen.tician.de/pyopencl>`_ (read how things work) * `Wiki <http://wiki.tiker.net/PyOpenCL>`_ (read installation tips, get examples, read FAQ) diff --git a/doc/algorithm.rst b/doc/algorithm.rst index 6eccc2f86683d39bd0bf719b414e3befdef987da..3a2cc1ef85997a36df45815575e7cadb292d51ac 100644 --- a/doc/algorithm.rst +++ b/doc/algorithm.rst @@ -25,35 +25,11 @@ evaluate multi-stage expressions on one or several operands in a single pass. Here's a usage example:: - import pyopencl as cl - import pyopencl.array as cl_array - import numpy - - ctx = cl.create_some_context() - queue = cl.CommandQueue(ctx) - - n = 10 - a_gpu = cl_array.to_device( - ctx, queue, numpy.random.randn(n).astype(numpy.float32)) - b_gpu = cl_array.to_device( - ctx, queue, numpy.random.randn(n).astype(numpy.float32)) - - from pyopencl.elementwise import ElementwiseKernel - lin_comb = ElementwiseKernel(ctx, - "float a, float *x, " - "float b, float *y, " - "float *z", - "z[i] = a*x[i] + b*y[i]", - "linear_combination") - - c_gpu = cl_array.empty_like(a_gpu) - lin_comb(5, a_gpu, 6, b_gpu, c_gpu) - - import numpy.linalg as la - assert la.norm((c_gpu - (5*a_gpu+6*b_gpu)).get()) < 1e-5 - -(You can find this example as :file:`examples/demo_elementwise.py` in the PyOpenCL -distribution.) +.. literalinclude:: ../examples/demo_elementwise.py + +(You can find this example as +:download:`examples/demo_elementwise.py <../examples/demo_elementwise.py>` +in the PyOpenCL distribution.) .. _custom-reductions: diff --git a/doc/misc.rst b/doc/misc.rst index f1fc8cc83cd29a508271066187526b59ca5aa7cb..3fda67b1e9afa7811be889e072b3ec17665e6e4f 100644 --- a/doc/misc.rst +++ b/doc/misc.rst @@ -31,7 +31,7 @@ PyOpenCL comes with IPython integration, which lets you seamlessly integrate PyOpenCL kernels into your IPython notebooks. Simply load the PyOpenCL IPython extension using:: - %load_ext pyopencl.ipython + %load_ext pyopencl.ipython_ext and then use the ``%%cl_kernel`` 'cell-magic' command. See `this notebook <http://nbviewer.ipython.org/urls/raw.githubusercontent.com/pyopencl/pyopencl/master/examples/ipython-demo.ipynb>`_ diff --git a/examples/demo.py b/examples/demo.py index ba948d6716b84c338f3a28b64d0b3e6c9425a1bc..1b694a88062aa101ca80c72df5676ede9c474f1c 100644 --- a/examples/demo.py +++ b/examples/demo.py @@ -1,30 +1,32 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +import numpy as np import pyopencl as cl -import numpy -import numpy.linalg as la -a = numpy.random.rand(50000).astype(numpy.float32) -b = numpy.random.rand(50000).astype(numpy.float32) +a_np = np.random.rand(50000).astype(np.float32) +b_np = np.random.rand(50000).astype(np.float32) ctx = cl.create_some_context() queue = cl.CommandQueue(ctx) mf = cl.mem_flags -a_buf = cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=a) -b_buf = cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=b) -dest_buf = cl.Buffer(ctx, mf.WRITE_ONLY, b.nbytes) +a_g = cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=a_np) +b_g = cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=b_np) prg = cl.Program(ctx, """ - __kernel void sum(__global const float *a, - __global const float *b, __global float *c) - { - int gid = get_global_id(0); - c[gid] = a[gid] + b[gid]; - } - """).build() +__kernel void sum(__global const float *a_g, __global const float *b_g, __global float *res_g) { + int gid = get_global_id(0); + res_g[gid] = a_g[gid] + b_g[gid]; +} +""").build() -prg.sum(queue, a.shape, None, a_buf, b_buf, dest_buf) +res_g = cl.Buffer(ctx, mf.WRITE_ONLY, a_np.nbytes) +prg.sum(queue, a_np.shape, None, a_g, b_g, res_g) -a_plus_b = numpy.empty_like(a) -cl.enqueue_copy(queue, a_plus_b, dest_buf) +res_np = np.empty_like(a_np) +cl.enqueue_copy(queue, res_np, res_g) -print(la.norm(a_plus_b - (a+b)), la.norm(a_plus_b)) +# Check on CPU with Numpy: +print(res_np - (a_np + b_np)) +print(np.linalg.norm(res_np - (a_np + b_np))) diff --git a/examples/demo_elementwise.py b/examples/demo_elementwise.py index a64616baba08f21550c88263e1a813ec2a23b6c0..21646c4f42a8cce495c02aef7beae5d4a2ceaffe 100644 --- a/examples/demo_elementwise.py +++ b/examples/demo_elementwise.py @@ -1,26 +1,34 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +import numpy as np import pyopencl as cl -import pyopencl.array as cl_array -import numpy +import pyopencl.array +from pyopencl.elementwise import ElementwiseKernel + +n = 10 +a_np = np.random.randn(n).astype(np.float32) +b_np = np.random.randn(n).astype(np.float32) ctx = cl.create_some_context() queue = cl.CommandQueue(ctx) -n = 10 -a_gpu = cl_array.to_device( - queue, numpy.random.randn(n).astype(numpy.float32)) -b_gpu = cl_array.to_device( - queue, numpy.random.randn(n).astype(numpy.float32)) +a_g = cl.array.to_device(queue, a_np) +b_g = cl.array.to_device(queue, b_np) -from pyopencl.elementwise import ElementwiseKernel lin_comb = ElementwiseKernel(ctx, - "float a, float *x, " - "float b, float *y, " - "float *z", - "z[i] = a*x[i] + b*y[i]", - "linear_combination") + "float k1, float *a_g, float k2, float *b_g, float *res_g", + "res_g[i] = k1 * a_g[i] + k2 * b_g[i]", + "lin_comb" +) + +res_g = cl.array.empty_like(a_g) +lin_comb(2, a_g, 3, b_g, res_g) -c_gpu = cl_array.empty_like(a_gpu) -lin_comb(5, a_gpu, 6, b_gpu, c_gpu) +# Check on GPU with PyOpenCL Array: +print((res_g - (2 * a_g + 3 * b_g)).get()) -import numpy.linalg as la -assert la.norm((c_gpu - (5*a_gpu+6*b_gpu)).get()) < 1e-5 +# Check on CPU with Numpy: +res_np = res_g.get() +print(res_np - (2 * a_np + 3 * b_np)) +print(np.linalg.norm(res_np - (2 * a_np + 3 * b_np))) diff --git a/examples/ipython-demo.ipynb b/examples/ipython-demo.ipynb index 1635dcc31c9b3a229328f4fede2f913f6d9a1c1e..b0e8159c4d1f3579e79842dd96f0918350063cf6 100644 --- a/examples/ipython-demo.ipynb +++ b/examples/ipython-demo.ipynb @@ -1,7 +1,7 @@ { "metadata": { "name": "", - "signature": "sha256:85c637b863a4bbbd3fb91eca8682d36d9874a53a6db35b18f1c53bb53b3c6bdc" + "signature": "sha256:81f3deed7cdc26b0fc756b3ee1eb6e8f9b1be96304ddfc6ff484d223c2b8a942" }, "nbformat": 3, "nbformat_minor": 0, @@ -19,8 +19,17 @@ ], "language": "python", "metadata": {}, - "outputs": [], - "prompt_number": 2 + "outputs": [ + { + "output_type": "stream", + "stream": "stderr", + "text": [ + "/usr/lib/python2.7/pkgutil.py:186: ImportWarning: Not importing directory '/usr/lib/python2.7/dist-packages/enthought': missing __init__.py\n", + " file, filename, etc = imp.find_module(subname, path)\n" + ] + } + ], + "prompt_number": 1 }, { "cell_type": "markdown", @@ -33,7 +42,7 @@ "cell_type": "code", "collapsed": false, "input": [ - "%load_ext pyopencl.ipython" + "%load_ext pyopencl.ipython_ext" ], "language": "python", "metadata": {}, @@ -62,8 +71,8 @@ "stream": "stdout", "text": [ "Choose platform:\n", - "[0] <pyopencl.Platform 'AMD Accelerated Parallel Processing' at 0x7f244be8e500>\n", - "[1] <pyopencl.Platform 'Intel(R) OpenCL' at 0x3adcef0>\n" + "[0] <pyopencl.Platform 'AMD Accelerated Parallel Processing' at 0x7fc14f1b0080>\n", + "[1] <pyopencl.Platform 'Intel(R) OpenCL' at 0x32aed00>\n" ] }, { @@ -162,7 +171,7 @@ "output_type": "pyout", "prompt_number": 8, "text": [ - "<pyopencl._cl.Event at 0x39dac20>" + "<pyopencl._cl.Event at 0x7fc14f3fdf30>" ] } ], diff --git a/pyopencl/ipython.py b/pyopencl/ipython_ext.py similarity index 100% rename from pyopencl/ipython.py rename to pyopencl/ipython_ext.py diff --git a/pyopencl/tools.py b/pyopencl/tools.py index b85708d8c15809f0a0a86a74d0be82b3997ab5f2..cd784df84da786b7d8a4982c41df299061ea7e47 100644 --- a/pyopencl/tools.py +++ b/pyopencl/tools.py @@ -404,18 +404,20 @@ def get_arg_offset_adjuster_code(arg_types): def get_gl_sharing_context_properties(): ctx_props = cl.context_properties - from OpenGL import platform as gl_platform, GLX, WGL + from OpenGL import platform as gl_platform props = [] import sys if sys.platform in ["linux", "linux2"]: + from OpenGL import GLX props.append( (ctx_props.GL_CONTEXT_KHR, gl_platform.GetCurrentContext())) props.append( (ctx_props.GLX_DISPLAY_KHR, GLX.glXGetCurrentDisplay())) elif sys.platform == "win32": + from OpenGL import WGL props.append( (ctx_props.GL_CONTEXT_KHR, gl_platform.GetCurrentContext())) props.append( diff --git a/pyopencl/version.py b/pyopencl/version.py index cce4332fa693dc66686f9a61f4d85bb1a2692d2e..2d917d29fc041929fe4c94362fd212f13cafc685 100644 --- a/pyopencl/version.py +++ b/pyopencl/version.py @@ -1,4 +1,3 @@ -VERSION = (2013, 3) +VERSION = (2014, 1) VERSION_STATUS = "" VERSION_TEXT = ".".join(str(x) for x in VERSION) + VERSION_STATUS - diff --git a/test/test_clmath.py b/test/test_clmath.py index 6ebbe46bcb1f49ed70e1797c003013fe43d79147..586e9e075275aadb11c3067b3127f8c955afb625 100644 --- a/test/test_clmath.py +++ b/test/test_clmath.py @@ -132,7 +132,7 @@ def test_atan2(ctx_factory): queue = cl.CommandQueue(context) for s in sizes: - a = (cl_array.arange(queue, s, dtype=np.float32) - s / 2) / 100 + a = (cl_array.arange(queue, s, dtype=np.float32) - np.float32(s / 2)) / 100 a2 = (s / 2 - 1 - cl_array.arange(queue, s, dtype=np.float32)) / 100 b = clmath.atan2(a, a2) @@ -149,7 +149,7 @@ def test_atan2pi(ctx_factory): queue = cl.CommandQueue(context) for s in sizes: - a = (cl_array.arange(queue, s, dtype=np.float32) - s / 2) / 100 + a = (cl_array.arange(queue, s, dtype=np.float32) - np.float32(s / 2)) / 100 a2 = (s / 2 - 1 - cl_array.arange(queue, s, dtype=np.float32)) / 100 b = clmath.atan2pi(a, a2)