diff --git a/doc/algorithm.rst b/doc/algorithm.rst index 6eccc2f86683d39bd0bf719b414e3befdef987da..3a2cc1ef85997a36df45815575e7cadb292d51ac 100644 --- a/doc/algorithm.rst +++ b/doc/algorithm.rst @@ -25,35 +25,11 @@ evaluate multi-stage expressions on one or several operands in a single pass. Here's a usage example:: - import pyopencl as cl - import pyopencl.array as cl_array - import numpy - - ctx = cl.create_some_context() - queue = cl.CommandQueue(ctx) - - n = 10 - a_gpu = cl_array.to_device( - ctx, queue, numpy.random.randn(n).astype(numpy.float32)) - b_gpu = cl_array.to_device( - ctx, queue, numpy.random.randn(n).astype(numpy.float32)) - - from pyopencl.elementwise import ElementwiseKernel - lin_comb = ElementwiseKernel(ctx, - "float a, float *x, " - "float b, float *y, " - "float *z", - "z[i] = a*x[i] + b*y[i]", - "linear_combination") - - c_gpu = cl_array.empty_like(a_gpu) - lin_comb(5, a_gpu, 6, b_gpu, c_gpu) - - import numpy.linalg as la - assert la.norm((c_gpu - (5*a_gpu+6*b_gpu)).get()) < 1e-5 - -(You can find this example as :file:`examples/demo_elementwise.py` in the PyOpenCL -distribution.) +.. literalinclude:: ../examples/demo_elementwise.py + +(You can find this example as +:download:`examples/demo_elementwise.py <../examples/demo_elementwise.py>` +in the PyOpenCL distribution.) .. _custom-reductions: diff --git a/examples/demo_elementwise.py b/examples/demo_elementwise.py index a64616baba08f21550c88263e1a813ec2a23b6c0..21646c4f42a8cce495c02aef7beae5d4a2ceaffe 100644 --- a/examples/demo_elementwise.py +++ b/examples/demo_elementwise.py @@ -1,26 +1,34 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +import numpy as np import pyopencl as cl -import pyopencl.array as cl_array -import numpy +import pyopencl.array +from pyopencl.elementwise import ElementwiseKernel + +n = 10 +a_np = np.random.randn(n).astype(np.float32) +b_np = np.random.randn(n).astype(np.float32) ctx = cl.create_some_context() queue = cl.CommandQueue(ctx) -n = 10 -a_gpu = cl_array.to_device( - queue, numpy.random.randn(n).astype(numpy.float32)) -b_gpu = cl_array.to_device( - queue, numpy.random.randn(n).astype(numpy.float32)) +a_g = cl.array.to_device(queue, a_np) +b_g = cl.array.to_device(queue, b_np) -from pyopencl.elementwise import ElementwiseKernel lin_comb = ElementwiseKernel(ctx, - "float a, float *x, " - "float b, float *y, " - "float *z", - "z[i] = a*x[i] + b*y[i]", - "linear_combination") + "float k1, float *a_g, float k2, float *b_g, float *res_g", + "res_g[i] = k1 * a_g[i] + k2 * b_g[i]", + "lin_comb" +) + +res_g = cl.array.empty_like(a_g) +lin_comb(2, a_g, 3, b_g, res_g) -c_gpu = cl_array.empty_like(a_gpu) -lin_comb(5, a_gpu, 6, b_gpu, c_gpu) +# Check on GPU with PyOpenCL Array: +print((res_g - (2 * a_g + 3 * b_g)).get()) -import numpy.linalg as la -assert la.norm((c_gpu - (5*a_gpu+6*b_gpu)).get()) < 1e-5 +# Check on CPU with Numpy: +res_np = res_g.get() +print(res_np - (2 * a_np + 3 * b_np)) +print(np.linalg.norm(res_np - (2 * a_np + 3 * b_np)))