diff --git a/doc/algorithm.rst b/doc/algorithm.rst index 6eccc2f86683d39bd0bf719b414e3befdef987da..3a2cc1ef85997a36df45815575e7cadb292d51ac 100644 --- a/doc/algorithm.rst +++ b/doc/algorithm.rst @@ -25,35 +25,11 @@ evaluate multi-stage expressions on one or several operands in a single pass. Here's a usage example:: - import pyopencl as cl - import pyopencl.array as cl_array - import numpy - - ctx = cl.create_some_context() - queue = cl.CommandQueue(ctx) - - n = 10 - a_gpu = cl_array.to_device( - ctx, queue, numpy.random.randn(n).astype(numpy.float32)) - b_gpu = cl_array.to_device( - ctx, queue, numpy.random.randn(n).astype(numpy.float32)) - - from pyopencl.elementwise import ElementwiseKernel - lin_comb = ElementwiseKernel(ctx, - "float a, float *x, " - "float b, float *y, " - "float *z", - "z[i] = a*x[i] + b*y[i]", - "linear_combination") - - c_gpu = cl_array.empty_like(a_gpu) - lin_comb(5, a_gpu, 6, b_gpu, c_gpu) - - import numpy.linalg as la - assert la.norm((c_gpu - (5*a_gpu+6*b_gpu)).get()) < 1e-5 - -(You can find this example as :file:`examples/demo_elementwise.py` in the PyOpenCL -distribution.) +.. literalinclude:: ../examples/demo_elementwise.py + +(You can find this example as +:download:`examples/demo_elementwise.py <../examples/demo_elementwise.py>` +in the PyOpenCL distribution.) .. _custom-reductions: diff --git a/examples/demo.py b/examples/demo.py index ba948d6716b84c338f3a28b64d0b3e6c9425a1bc..1b694a88062aa101ca80c72df5676ede9c474f1c 100644 --- a/examples/demo.py +++ b/examples/demo.py @@ -1,30 +1,32 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +import numpy as np import pyopencl as cl -import numpy -import numpy.linalg as la -a = numpy.random.rand(50000).astype(numpy.float32) -b = numpy.random.rand(50000).astype(numpy.float32) +a_np = np.random.rand(50000).astype(np.float32) +b_np = np.random.rand(50000).astype(np.float32) ctx = cl.create_some_context() queue = cl.CommandQueue(ctx) mf = cl.mem_flags -a_buf = cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=a) -b_buf = cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=b) -dest_buf = cl.Buffer(ctx, mf.WRITE_ONLY, b.nbytes) +a_g = cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=a_np) +b_g = cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=b_np) prg = cl.Program(ctx, """ - __kernel void sum(__global const float *a, - __global const float *b, __global float *c) - { - int gid = get_global_id(0); - c[gid] = a[gid] + b[gid]; - } - """).build() +__kernel void sum(__global const float *a_g, __global const float *b_g, __global float *res_g) { + int gid = get_global_id(0); + res_g[gid] = a_g[gid] + b_g[gid]; +} +""").build() -prg.sum(queue, a.shape, None, a_buf, b_buf, dest_buf) +res_g = cl.Buffer(ctx, mf.WRITE_ONLY, a_np.nbytes) +prg.sum(queue, a_np.shape, None, a_g, b_g, res_g) -a_plus_b = numpy.empty_like(a) -cl.enqueue_copy(queue, a_plus_b, dest_buf) +res_np = np.empty_like(a_np) +cl.enqueue_copy(queue, res_np, res_g) -print(la.norm(a_plus_b - (a+b)), la.norm(a_plus_b)) +# Check on CPU with Numpy: +print(res_np - (a_np + b_np)) +print(np.linalg.norm(res_np - (a_np + b_np))) diff --git a/examples/demo_elementwise.py b/examples/demo_elementwise.py index a64616baba08f21550c88263e1a813ec2a23b6c0..21646c4f42a8cce495c02aef7beae5d4a2ceaffe 100644 --- a/examples/demo_elementwise.py +++ b/examples/demo_elementwise.py @@ -1,26 +1,34 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +import numpy as np import pyopencl as cl -import pyopencl.array as cl_array -import numpy +import pyopencl.array +from pyopencl.elementwise import ElementwiseKernel + +n = 10 +a_np = np.random.randn(n).astype(np.float32) +b_np = np.random.randn(n).astype(np.float32) ctx = cl.create_some_context() queue = cl.CommandQueue(ctx) -n = 10 -a_gpu = cl_array.to_device( - queue, numpy.random.randn(n).astype(numpy.float32)) -b_gpu = cl_array.to_device( - queue, numpy.random.randn(n).astype(numpy.float32)) +a_g = cl.array.to_device(queue, a_np) +b_g = cl.array.to_device(queue, b_np) -from pyopencl.elementwise import ElementwiseKernel lin_comb = ElementwiseKernel(ctx, - "float a, float *x, " - "float b, float *y, " - "float *z", - "z[i] = a*x[i] + b*y[i]", - "linear_combination") + "float k1, float *a_g, float k2, float *b_g, float *res_g", + "res_g[i] = k1 * a_g[i] + k2 * b_g[i]", + "lin_comb" +) + +res_g = cl.array.empty_like(a_g) +lin_comb(2, a_g, 3, b_g, res_g) -c_gpu = cl_array.empty_like(a_gpu) -lin_comb(5, a_gpu, 6, b_gpu, c_gpu) +# Check on GPU with PyOpenCL Array: +print((res_g - (2 * a_g + 3 * b_g)).get()) -import numpy.linalg as la -assert la.norm((c_gpu - (5*a_gpu+6*b_gpu)).get()) < 1e-5 +# Check on CPU with Numpy: +res_np = res_g.get() +print(res_np - (2 * a_np + 3 * b_np)) +print(np.linalg.norm(res_np - (2 * a_np + 3 * b_np)))