#!/usr/bin/env python # -*- coding: utf-8 -*- from __future__ import absolute_import, print_function import numpy as np import pyopencl as cl a_np = np.random.rand(50000).astype(np.float32) b_np = np.random.rand(50000).astype(np.float32) ctx = cl.create_some_context() queue = cl.CommandQueue(ctx) mf = cl.mem_flags a_g = cl.Buffer(ctx, mf.READ_WRITE, a_np.nbytes) b_g = cl.Buffer(ctx, mf.READ_WRITE, b_np.nbytes) cl.enqueue_copy(queue, a_g, a_np) cl.enqueue_copy(queue, b_g, b_np) prg = cl.Program(ctx, """ __kernel void sum( __global const float *a_g, __global const float *b_g, __global float *res_g) { int gid = get_global_id(0); res_g[gid] = a_g[gid] + b_g[gid]; } """).build() res_g = cl.Buffer(ctx, mf.WRITE_ONLY, a_np.nbytes) prg.sum(queue, (500, 10), (16, 16), a_g, b_g, res_g, g_times_l=True) res_np = np.empty_like(a_np) cl.enqueue_copy(queue, res_np, res_g) # Check on CPU with Numpy: print(res_np - (a_np + b_np)) print(np.linalg.norm(res_np - (a_np + b_np)))