Merge branch 'master' into cffi

8e8c8bbc · Andreas Klöckner · ad3d61c7 · 15db3849 · 8e8c8bbc · 8e8c8bbc
Commit 8e8c8bbc authored 10 years ago by Andreas Klöckner
--- a/README.rst
+++ b/README.rst
@@ -40,5 +40,5 @@ Places on the web related to PyOpenCL:
      :target: http://pypi.python.org/pypi/pyopencl
 * `C. Gohlke's Windows binaries <http://www.lfd.uci.edu/~gohlke/pythonlibs/#pyopencl>`_ (download Windows binaries)
 * `Github <http://github.com/pyopencl/pyopencl>`_ (get latest source code, file bugs)
-* `Documentation <http://documen.tician.de>`_ (read how things work)
+* `Documentation <http://documen.tician.de/pyopencl>`_ (read how things work)
 * `Wiki <http://wiki.tiker.net/PyOpenCL>`_ (read installation tips, get examples, read FAQ)
--- a/doc/algorithm.rst
+++ b/doc/algorithm.rst
@@ -25,35 +25,11 @@ evaluate multi-stage expressions on one or several operands in a single pass.

 Here's a usage example::

-    import pyopencl as cl
-    import pyopencl.array as cl_array
-    import numpy
-
-    ctx = cl.create_some_context()
-    queue = cl.CommandQueue(ctx)
-
-    n = 10
-    a_gpu = cl_array.to_device(
-            ctx, queue, numpy.random.randn(n).astype(numpy.float32))
-    b_gpu = cl_array.to_device(
-            ctx, queue, numpy.random.randn(n).astype(numpy.float32))
-
-    from pyopencl.elementwise import ElementwiseKernel
-    lin_comb = ElementwiseKernel(ctx,
-            "float a, float *x, "
-            "float b, float *y, "
-            "float *z",
-            "z[i] = a*x[i] + b*y[i]",
-            "linear_combination")
-
-    c_gpu = cl_array.empty_like(a_gpu)
-    lin_comb(5, a_gpu, 6, b_gpu, c_gpu)
-
-    import numpy.linalg as la
-    assert la.norm((c_gpu - (5*a_gpu+6*b_gpu)).get()) < 1e-5
-
-(You can find this example as :file:`examples/demo_elementwise.py` in the PyOpenCL
-distribution.)
+.. literalinclude:: ../examples/demo_elementwise.py
+
+(You can find this example as
+:download:`examples/demo_elementwise.py <../examples/demo_elementwise.py>`
+in the PyOpenCL distribution.)

 .. _custom-reductions:


--- a/doc/misc.rst
+++ b/doc/misc.rst
@@ -31,7 +31,7 @@ PyOpenCL comes with IPython integration, which lets you seamlessly integrate
 PyOpenCL kernels into your IPython notebooks. Simply load the PyOpenCL 
 IPython extension using::

-    %load_ext pyopencl.ipython
+    %load_ext pyopencl.ipython_ext

 and then use the ``%%cl_kernel`` 'cell-magic' command. See `this notebook
 <http://nbviewer.ipython.org/urls/raw.githubusercontent.com/pyopencl/pyopencl/master/examples/ipython-demo.ipynb>`_

--- a/examples/demo.py
+++ b/examples/demo.py
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+import numpy as np
 import pyopencl as cl
-import numpy
-import numpy.linalg as la

-a = numpy.random.rand(50000).astype(numpy.float32)
-b = numpy.random.rand(50000).astype(numpy.float32)
+a_np = np.random.rand(50000).astype(np.float32)
+b_np = np.random.rand(50000).astype(np.float32)

 ctx = cl.create_some_context()
 queue = cl.CommandQueue(ctx)

 mf = cl.mem_flags
-a_buf = cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=a)
-b_buf = cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=b)
-dest_buf = cl.Buffer(ctx, mf.WRITE_ONLY, b.nbytes)
+a_g = cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=a_np)
+b_g = cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=b_np)

 prg = cl.Program(ctx, """
-    __kernel void sum(__global const float *a,
-    __global const float *b, __global float *c)
-    {
-      int gid = get_global_id(0);
-      c[gid] = a[gid] + b[gid];
-    }
-    """).build()
+__kernel void sum(__global const float *a_g, __global const float *b_g, __global float *res_g) {
+  int gid = get_global_id(0);
+  res_g[gid] = a_g[gid] + b_g[gid];
+}
+""").build()

-prg.sum(queue, a.shape, None, a_buf, b_buf, dest_buf)
+res_g = cl.Buffer(ctx, mf.WRITE_ONLY, a_np.nbytes)
+prg.sum(queue, a_np.shape, None, a_g, b_g, res_g)

-a_plus_b = numpy.empty_like(a)
-cl.enqueue_copy(queue, a_plus_b, dest_buf)
+res_np = np.empty_like(a_np)
+cl.enqueue_copy(queue, res_np, res_g)

-print(la.norm(a_plus_b - (a+b)), la.norm(a_plus_b))
+# Check on CPU with Numpy:
+print(res_np - (a_np + b_np))
+print(np.linalg.norm(res_np - (a_np + b_np)))
--- a/examples/demo_elementwise.py
+++ b/examples/demo_elementwise.py
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+import numpy as np
 import pyopencl as cl
-import pyopencl.array as cl_array
-import numpy
+import pyopencl.array
+from pyopencl.elementwise import ElementwiseKernel
+
+n = 10
+a_np = np.random.randn(n).astype(np.float32)
+b_np = np.random.randn(n).astype(np.float32)

 ctx = cl.create_some_context()
 queue = cl.CommandQueue(ctx)

-n = 10
-a_gpu = cl_array.to_device(
-        queue, numpy.random.randn(n).astype(numpy.float32))
-b_gpu = cl_array.to_device(
-        queue, numpy.random.randn(n).astype(numpy.float32))
+a_g = cl.array.to_device(queue, a_np)
+b_g = cl.array.to_device(queue, b_np)

-from pyopencl.elementwise import ElementwiseKernel
 lin_comb = ElementwiseKernel(ctx,
-        "float a, float *x, "
-        "float b, float *y, "
-        "float *z",
-        "z[i] = a*x[i] + b*y[i]",
-        "linear_combination")
+    "float k1, float *a_g, float k2, float *b_g, float *res_g",
+    "res_g[i] = k1 * a_g[i] + k2 * b_g[i]",
+    "lin_comb"
+)
+
+res_g = cl.array.empty_like(a_g)
+lin_comb(2, a_g, 3, b_g, res_g)

-c_gpu = cl_array.empty_like(a_gpu)
-lin_comb(5, a_gpu, 6, b_gpu, c_gpu)
+# Check on GPU with PyOpenCL Array:
+print((res_g - (2 * a_g + 3 * b_g)).get())

-import numpy.linalg as la
-assert la.norm((c_gpu - (5*a_gpu+6*b_gpu)).get()) < 1e-5
+# Check on CPU with Numpy:
+res_np = res_g.get()
+print(res_np - (2 * a_np + 3 * b_np))
+print(np.linalg.norm(res_np - (2 * a_np + 3 * b_np)))
--- a/examples/ipython-demo.ipynb
+++ b/examples/ipython-demo.ipynb
 {
 "metadata": {
  "name": "",
-  "signature": "sha256:85c637b863a4bbbd3fb91eca8682d36d9874a53a6db35b18f1c53bb53b3c6bdc"
+  "signature": "sha256:81f3deed7cdc26b0fc756b3ee1eb6e8f9b1be96304ddfc6ff484d223c2b8a942"
 },
 "nbformat": 3,
 "nbformat_minor": 0,
@@ -19,8 +19,17 @@
     ],
     "language": "python",
     "metadata": {},
-     "outputs": [],
-     "prompt_number": 2
+     "outputs": [
+      {
+       "output_type": "stream",
+       "stream": "stderr",
+       "text": [
+        "/usr/lib/python2.7/pkgutil.py:186: ImportWarning: Not importing directory '/usr/lib/python2.7/dist-packages/enthought': missing __init__.py\n",
+        "  file, filename, etc = imp.find_module(subname, path)\n"
+       ]
+      }
+     ],
+     "prompt_number": 1
    },
    {
     "cell_type": "markdown",
@@ -33,7 +42,7 @@
     "cell_type": "code",
     "collapsed": false,
     "input": [
-      "%load_ext pyopencl.ipython"
+      "%load_ext pyopencl.ipython_ext"
     ],
     "language": "python",
     "metadata": {},
@@ -62,8 +71,8 @@
       "stream": "stdout",
       "text": [
        "Choose platform:\n",
-        "[0] <pyopencl.Platform 'AMD Accelerated Parallel Processing' at 0x7f244be8e500>\n",
-        "[1] <pyopencl.Platform 'Intel(R) OpenCL' at 0x3adcef0>\n"
+        "[0] <pyopencl.Platform 'AMD Accelerated Parallel Processing' at 0x7fc14f1b0080>\n",
+        "[1] <pyopencl.Platform 'Intel(R) OpenCL' at 0x32aed00>\n"
       ]
      },
      {
@@ -162,7 +171,7 @@
       "output_type": "pyout",
       "prompt_number": 8,
       "text": [
-        "<pyopencl._cl.Event at 0x39dac20>"
+        "<pyopencl._cl.Event at 0x7fc14f3fdf30>"
       ]
      }
     ],

--- a/pyopencl/ipython.py
+++ b/pyopencl/ipython.py
--- a/pyopencl/tools.py
+++ b/pyopencl/tools.py
@@ -404,18 +404,20 @@ def get_arg_offset_adjuster_code(arg_types):
 def get_gl_sharing_context_properties():
    ctx_props = cl.context_properties

-    from OpenGL import platform as gl_platform, GLX, WGL
+    from OpenGL import platform as gl_platform

    props = []

    import sys
    if sys.platform in ["linux", "linux2"]:
+        from OpenGL import GLX
        props.append(
            (ctx_props.GL_CONTEXT_KHR, gl_platform.GetCurrentContext()))
        props.append(
                (ctx_props.GLX_DISPLAY_KHR,
                    GLX.glXGetCurrentDisplay()))
    elif sys.platform == "win32":
+        from OpenGL import WGL
        props.append(
            (ctx_props.GL_CONTEXT_KHR, gl_platform.GetCurrentContext()))
        props.append(

--- a/pyopencl/version.py
+++ b/pyopencl/version.py
-VERSION = (2013, 3)
+VERSION = (2014, 1)
 VERSION_STATUS = ""
 VERSION_TEXT = ".".join(str(x) for x in VERSION) + VERSION_STATUS
-
--- a/test/test_clmath.py
+++ b/test/test_clmath.py
@@ -132,7 +132,7 @@ def test_atan2(ctx_factory):
    queue = cl.CommandQueue(context)

    for s in sizes:
-        a = (cl_array.arange(queue, s, dtype=np.float32) - s / 2) / 100
+        a = (cl_array.arange(queue, s, dtype=np.float32) - np.float32(s / 2)) / 100
        a2 = (s / 2 - 1 - cl_array.arange(queue, s, dtype=np.float32)) / 100
        b = clmath.atan2(a, a2)

@@ -149,7 +149,7 @@ def test_atan2pi(ctx_factory):
    queue = cl.CommandQueue(context)

    for s in sizes:
-        a = (cl_array.arange(queue, s, dtype=np.float32) - s / 2) / 100
+        a = (cl_array.arange(queue, s, dtype=np.float32) - np.float32(s / 2)) / 100
        a2 = (s / 2 - 1 - cl_array.arange(queue, s, dtype=np.float32)) / 100
        b = clmath.atan2pi(a, a2)