diff --git a/doc/source/array.rst b/doc/source/array.rst index 0e435a7c69b8954c53356d1d0d4dd54a4522611e..c6586eb0cc2c8f4e007df83381591fdeb7ef4242 100644 --- a/doc/source/array.rst +++ b/doc/source/array.rst @@ -1,8 +1,24 @@ -The :class:`Array` Class -======================== +Multi-dimensional arrays on the Compute Device +============================================== .. module:: pyopencl.array +Vector Types +------------ + +.. class :: vec + + All of OpenCL's supported vector types, such as `float3` and `long4` are + available as :mod:`numpy` data types within this class. These + :class:`numpy.dtype` instances have field names of `x`, `y`, `z`, and `w` + just like their OpenCL counterparts. They will work both for parameter passing + to kernels as well as for passing data back and forth between kernels and + Python code. For each type, a `make_type` function is also provided (e.g. + `make_float3(x,y,z)`). + +The :class:`Array` Class +------------------------ + .. class:: DefaultAllocator(context, flags=pyopencl.mem_flags.READ_WRITE) An alias for :class:`pyopencl.tools.CLAllocator`. diff --git a/doc/source/misc.rst b/doc/source/misc.rst index c96af3d1c1edde33aac64cc308ef3ae73ae14bd9..e18d293e9860ddbc19e61f33a834807d7428b256 100644 --- a/doc/source/misc.rst +++ b/doc/source/misc.rst @@ -93,6 +93,7 @@ Version 2011.1 :func:`pyopencl.array.arange`. * Make construction of :class:`pyopencl.array.Array` more flexible (*cqa* argument.) * Add :ref:`memory-pools`. +* Add vector types, see :class:`pyopencl.array.vec`. Version 0.92 ------------ diff --git a/pyopencl/__init__.py b/pyopencl/__init__.py index c05cf74bc4fcc51448b6aee27648870e9cccb505..83f9d3a79fcdd25d3ac6d901ae5ab9b5729c3a4e 100644 --- a/pyopencl/__init__.py +++ b/pyopencl/__init__.py @@ -13,6 +13,7 @@ except ImportError: +import numpy as np from pyopencl._cl import * import inspect as _inspect @@ -225,8 +226,7 @@ def _add_functionality(): if arg_dtype is None: arg_type_chars.append(None) else: - import numpy - arg_type_chars.append(numpy.dtype(arg_dtype).char) + arg_type_chars.append(np.dtype(arg_dtype).char) self._arg_type_chars = arg_type_chars diff --git a/pyopencl/array.py b/pyopencl/array.py index d068dfd5f457c7f6aa765016bef0162f600a015b..71a2206f1b1cf46ec9c0b28bb2f0d64256d0aeb6 100644 --- a/pyopencl/array.py +++ b/pyopencl/array.py @@ -30,12 +30,70 @@ OTHER DEALINGS IN THE SOFTWARE. -import numpy +import numpy as np import pyopencl.elementwise as elementwise import pyopencl as cl #from pytools import memoize_method +# {{{ vector types + +class vec: + pass + +def _create_vector_types(): + field_names = ["x", "y", "z", "w"] + + name_to_dtype = {} + dtype_to_name = {} + + counts = [2, 3, 4, 8, 16] + for base_name, base_type in [ + ('char', np.int8), + ('uchar', np.uint8), + ('short', np.int16), + ('ushort', np.uint16), + ('int', np.uint32), + ('uint', np.uint32), + ('long', np.int64), + ('ulong', np.uint64), + ('float', np.float32), + ('double', np.float64), + ]: + for count in counts: + name = "%s%d" % (base_name, count) + + titles = field_names[:count] + if len(titles) < count: + titles.extend((count-len(titles))*[None]) + + dtype = np.dtype(dict( + names=["s%d" % i for i in range(count)], + formats=[base_type]*count, + titles=titles)) + + name_to_dtype[name] = dtype + dtype_to_name[dtype] = name + + setattr(vec, name, dtype) + + my_field_names = ",".join(field_names[:count]) + my_field_names_defaulted = ",".join( + "%s=0" % fn for fn in field_names[:count]) + setattr(vec, "make_"+name, + staticmethod(eval( + "lambda %s: array((%s), dtype=my_dtype)" + % (my_field_names_defaulted, my_field_names), + dict(array=np.array, my_dtype=dtype)))) + + vec._dtype_to_c_name = dtype_to_name + vec._c_name_to_dtype = name_to_dtype + +_create_vector_types() + +# }}} + +# {{{ helper functionality def splay(queue, n): dev = queue.device @@ -126,6 +184,10 @@ def _should_be_cqa(what): "versions 2011.x of PyOpenCL." % (what, what), DeprecationWarning, 3) +# }}} + +# {{{ array class + class Array(object): """A :mod:`pyopencl` Array is used to do array-based calculation on a compute device. @@ -183,7 +245,7 @@ class Array(object): self.queue = queue self.shape = shape - self.dtype = numpy.dtype(dtype) + self.dtype = np.dtype(dtype) if order not in ["C", "F"]: raise ValueError("order must be either 'C' or 'F'") self.order = order @@ -222,7 +284,7 @@ class Array(object): def get(self, queue=None, ary=None, async=False): if ary is None: - ary = numpy.empty(self.shape, self.dtype, order=self.order) + ary = np.empty(self.shape, self.dtype, order=self.order) else: if ary.size != self.size: raise TypeError("'ary' has non-matching type") @@ -543,7 +605,9 @@ class Array(object): def __gt__(self, other): raise NotImplementedError +# }}} +# {{{ creation helpers def _to_device(queue, ary, allocator=None, async=False): if ary.flags.f_contiguous: @@ -630,7 +694,7 @@ def _arange(queue, *args, **kwargs): inf.step = None inf.dtype = None - if isinstance(args[-1], numpy.dtype): + if isinstance(args[-1], np.dtype): dtype = args[-1] args = args[:-1] explicit_dtype = True @@ -667,10 +731,10 @@ def _arange(queue, *args, **kwargs): if inf.step is None: inf.step = 1 if inf.dtype is None: - inf.dtype = numpy.array([inf.start, inf.stop, inf.step]).dtype + inf.dtype = np.array([inf.start, inf.stop, inf.step]).dtype # actual functionality ---------------------------------------------------- - dtype = numpy.dtype(inf.dtype) + dtype = np.dtype(inf.dtype) start = dtype.type(inf.start) step = dtype.type(inf.step) stop = dtype.type(inf.stop) @@ -707,9 +771,9 @@ def arange(*args, **kwargs): return _arange(*args, **kwargs) +# }}} - - +# {{{ take/put @elwise_kernel_runner def _take(result, ary, indices): @@ -900,8 +964,9 @@ def multi_put(arrays, dest_indices, dest_shape=None, out=None, queue=None): return out +# }}} - +# {{{ conditionals @elwise_kernel_runner def _if_positive(result, criterion, then_, else_): @@ -943,10 +1008,9 @@ def minimum(a, b, out=None, queue=None): return if_positive(a.mul_add(1, b, -1, queue=queue), b, a, queue=queue, out=out) +# }}} - - -# reductions ------------------------------------------------------------------ +# {{{ reductions _builtin_min = min _builtin_max = max @@ -988,6 +1052,8 @@ def _make_subset_minmax_kernel(what): subset_min = _make_subset_minmax_kernel("min") subset_max = _make_subset_minmax_kernel("max") +# }}} + diff --git a/pyopencl/clrandom.py b/pyopencl/clrandom.py index cdd53163e88745f8c8d858f357e78a3bef7c1310..c2cea99cfe41dff89009d538a17fdafbba9954ce 100644 --- a/pyopencl/clrandom.py +++ b/pyopencl/clrandom.py @@ -178,7 +178,7 @@ md5_code = """ d += 0x10325476; """ -import numpy +import numpy as np @@ -186,7 +186,7 @@ import numpy @context_dependent_memoize def get_rand_kernel(context, dtype): from pyopencl.elementwise import get_elwise_kernel - if dtype == numpy.float32: + if dtype == np.float32: return get_elwise_kernel(context, "float *dest, unsigned int seed", md5_code + """ @@ -200,7 +200,7 @@ def get_rand_kernel(context, dtype): dest[i] = d*POW_2_M32; """, "md5_rng_float") - elif dtype == numpy.float64: + elif dtype == np.float64: return get_elwise_kernel(context, "double *dest, unsigned int seed", md5_code + """ @@ -215,7 +215,7 @@ def get_rand_kernel(context, dtype): } """, "md5_rng_float") - elif dtype in [numpy.int32, numpy.uint32]: + elif dtype in [np.int32, np.uint32]: return get_elwise_kernel(context, "unsigned int *dest, unsigned int seed", md5_code + """ @@ -239,13 +239,13 @@ def _rand(output, seed): return get_rand_kernel(output.context, output.dtype) def fill_rand(result): - _rand(result, numpy.random.randint(2**31-1)) + _rand(result, np.random.randint(2**31-1)) def rand(context, queue, shape, dtype): from pyopencl.array import Array result = Array(queue, shape, dtype) - _rand(result, numpy.random.randint(2**31-1)) + _rand(result, np.random.randint(2**31-1)) return result @@ -257,7 +257,7 @@ if __name__ == "__main__": if "generate" in sys.argv[1:]: N = 256 print N, "MB" - r = rand(ctx, queue, (N*2**18,), numpy.uint32) + r = rand(ctx, queue, (N*2**18,), np.uint32) print "generated" r.get().tofile("random.dat") print "written" @@ -265,9 +265,9 @@ if __name__ == "__main__": else: from pylab import plot, show, subplot N = 250 - r1 = rand(ctx, queue, (N,), numpy.uint32) - r2 = rand(ctx, queue, (N,), numpy.int32) - r3 = rand(ctx, queue, (N,), numpy.float32) + r1 = rand(ctx, queue, (N,), np.uint32) + r2 = rand(ctx, queue, (N,), np.int32) + r3 = rand(ctx, queue, (N,), np.float32) subplot(131); plot( r1.get(),"x-") subplot(132); plot( r2.get(),"x-") diff --git a/pyopencl/elementwise.py b/pyopencl/elementwise.py index 70ae34cd885d3974605d0e5dceae09ad56160ce7..f3994ed621d2fe282269a1ce16c026943f110097 100644 --- a/pyopencl/elementwise.py +++ b/pyopencl/elementwise.py @@ -31,7 +31,7 @@ OTHER DEALINGS IN THE SOFTWARE. from pyopencl.tools import context_dependent_memoize -import numpy +import numpy as np import pyopencl as cl from pyopencl.tools import dtype_to_ctype, VectorArg, ScalarArg @@ -84,13 +84,13 @@ def get_elwise_kernel_and_types(context, arguments, operation, parsed_args = arguments for arg in parsed_args: - if numpy.float64 == arg.dtype: + if np.float64 == arg.dtype: preamble = ( "#pragma OPENCL EXTENSION cl_khr_fp64: enable\n\n\n" + preamble) break - parsed_args.append(ScalarArg(numpy.uintp, "n")) + parsed_args.append(ScalarArg(np.uintp, "n")) prg = get_elwise_program(context, parsed_args, operation, name, keep, options, preamble, **kwargs) @@ -297,7 +297,7 @@ def get_linear_combination_kernel(summand_descriptors, summands.append("a%d*x%d[i]" % (i, i)) args.append(VectorArg(dtype_z, "z")) - args.append(ScalarArg(numpy.uintp, "n")) + args.append(ScalarArg(np.uintp, "n")) mod = get_elwise_module(args, "z[i] = " + " + ".join(summands), diff --git a/pyopencl/reduction.py b/pyopencl/reduction.py index db894764f2a89372b8d5825326ffabe67ed06b68..9348253d7fefe1b657bf69c8ef9872ce075c555d 100644 --- a/pyopencl/reduction.py +++ b/pyopencl/reduction.py @@ -194,7 +194,7 @@ def get_reduction_source( from mako.template import Template from pytools import all from pyopencl.tools import has_double_support - src = Template(KERNEL).render( + src = str(Template(KERNEL).render( out_type=out_type, arguments=arguments, group_size=group_size, @@ -206,7 +206,7 @@ def get_reduction_source( preamble=preamble, double_support=all( has_double_support(dev) for dev in devices) - ) + )) from pytools import Record class ReductionInfo(Record): diff --git a/pyopencl/tools.py b/pyopencl/tools.py index 0bdfa292600a6f7fb6f824a293316d585ba8f309..76470a6190ec20b254d530467e5c97d099b251ef 100644 --- a/pyopencl/tools.py +++ b/pyopencl/tools.py @@ -29,9 +29,10 @@ OTHER DEALINGS IN THE SOFTWARE. -import numpy +import numpy as np from decorator import decorator import pyopencl as cl +import pyopencl.array as cl_array @@ -109,35 +110,38 @@ def dtype_to_ctype(dtype): if dtype is None: raise ValueError("dtype may not be None") - dtype = numpy.dtype(dtype) - if dtype == numpy.int64: + dtype = np.dtype(dtype) + if dtype == np.int64: return "long" - elif dtype == numpy.uint64: + elif dtype == np.uint64: return "unsigned long" - elif dtype == numpy.int32: + elif dtype == np.int32: return "int" - elif dtype == numpy.uint32: + elif dtype == np.uint32: return "unsigned int" - elif dtype == numpy.int16: + elif dtype == np.int16: return "short int" - elif dtype == numpy.uint16: + elif dtype == np.uint16: return "short unsigned int" - elif dtype == numpy.int8: + elif dtype == np.int8: return "signed char" - elif dtype == numpy.uint8: + elif dtype == np.uint8: return "unsigned char" - elif dtype == numpy.bool: + elif dtype == np.bool: return "bool" - elif dtype == numpy.float32: + elif dtype == np.float32: return "float" - elif dtype == numpy.float64: + elif dtype == np.float64: return "double" - elif dtype == numpy.complex64: + elif dtype == np.complex64: return "complex float" - elif dtype == numpy.complex128: + elif dtype == np.complex128: return "complex double" else: - raise ValueError, "unable to map dtype '%s'" % dtype + try: + return cl_array.vec._dtype_to_c_name[dtype] + except KeyError: + raise ValueError, "unable to map dtype '%s'" % dtype # }}} @@ -147,7 +151,7 @@ def dtype_to_ctype(dtype): # {{{ C argument lists -------------------------------------------------------- class Argument: def __init__(self, dtype, name, vector_len=1): - self.dtype = numpy.dtype(dtype) + self.dtype = np.dtype(dtype) self.name = name self.vector_len = vector_len @@ -219,19 +223,23 @@ def parse_c_arg(c_arg): else: vector_len = 1 - if tp == "float": dtype = numpy.float32 - elif tp == "double": dtype = numpy.float64 - elif tp in ["int", "signed int"]: dtype = numpy.int32 - elif tp in ["unsigned", "unsigned int"]: dtype = numpy.uint32 - elif tp in ["long", "long int"]: dtype = numpy.int64 + if tp == "float": dtype = np.float32 + elif tp == "double": dtype = np.float64 + elif tp in ["int", "signed int"]: dtype = np.int32 + elif tp in ["unsigned", "unsigned int"]: dtype = np.uint32 + elif tp in ["long", "long int"]: dtype = np.int64 elif tp in ["unsigned long", "unsigned long int"]: - dtype = numpy.uint64 - elif tp in ["short", "short int"]: dtype = numpy.int16 - elif tp in ["unsigned short", "unsigned short int"]: dtype = numpy.uint16 - elif tp in ["char"]: dtype = numpy.int8 - elif tp in ["unsigned char"]: dtype = numpy.uint8 - elif tp in ["bool"]: dtype = numpy.bool - else: raise ValueError, "unknown type '%s'" % tp + dtype = np.uint64 + elif tp in ["short", "short int"]: dtype = np.int16 + elif tp in ["unsigned short", "unsigned short int"]: dtype = np.uint16 + elif tp in ["char"]: dtype = np.int8 + elif tp in ["unsigned char"]: dtype = np.uint8 + elif tp in ["bool"]: dtype = np.bool + else: + try: + return cl_array.vec._c_name_to_dtype[tp] + except KeyError: + raise ValueError("unknown type '%s'" % tp) return arg_class(dtype, name, vector_len) diff --git a/test/test_array.py b/test/test_array.py index 863530c2bf213e9c585e084b2b295a3bd9abe87a..adabdcfc8780e655b83faafa14b233b409e673c4 100644 --- a/test/test_array.py +++ b/test/test_array.py @@ -1,5 +1,5 @@ #! /usr/bin/env python -import numpy +import numpy as np import numpy.linalg as la import sys import pytools.test @@ -29,14 +29,14 @@ def test_pow_array(ctx_getter): context = ctx_getter() queue = cl.CommandQueue(context) - a = numpy.array([1,2,3,4,5]).astype(numpy.float32) + a = np.array([1,2,3,4,5]).astype(np.float32) a_gpu = cl_array.to_device(queue, a) result = pow(a_gpu,a_gpu).get() - assert (numpy.abs(a**a - result) < 1e-3).all() + assert (np.abs(a**a - result) < 1e-3).all() result = (a_gpu**a_gpu).get() - assert (numpy.abs(pow(a, a) - result) < 1e-3).all() + assert (np.abs(pow(a, a) - result) < 1e-3).all() @@ -46,11 +46,11 @@ def test_pow_number(ctx_getter): context = ctx_getter() queue = cl.CommandQueue(context) - a = numpy.array([1,2,3,4,5,6,7,8,9,10]).astype(numpy.float32) + a = np.array([1,2,3,4,5,6,7,8,9,10]).astype(np.float32) a_gpu = cl_array.to_device(queue, a) result = pow(a_gpu, 2).get() - assert (numpy.abs(a**2 - result) < 1e-3).all() + assert (np.abs(a**2 - result) < 1e-3).all() @@ -59,7 +59,7 @@ def test_abs(ctx_getter): context = ctx_getter() queue = cl.CommandQueue(context) - a = -cl_array.arange(queue, 111, dtype=numpy.float32) + a = -cl_array.arange(queue, 111, dtype=np.float32) res = a.get() for i in range(111): @@ -79,7 +79,7 @@ def test_len(ctx_getter): context = ctx_getter() queue = cl.CommandQueue(context) - a = numpy.array([1,2,3,4,5,6,7,8,9,10]).astype(numpy.float32) + a = np.array([1,2,3,4,5,6,7,8,9,10]).astype(np.float32) a_cpu = cl_array.to_device(queue, a) assert len(a_cpu) == 10 @@ -96,11 +96,11 @@ def test_multiply(ctx_getter): for sz in [10, 50000]: for dtype, scalars in [ - (numpy.float32, [2]), - #(numpy.complex64, [2, 2j]) + (np.float32, [2]), + #(np.complex64, [2, 2j]) ]: for scalar in scalars: - a = numpy.arange(sz).astype(dtype) + a = np.arange(sz).astype(dtype) a_gpu = cl_array.to_device(queue, a) a_doubled = (scalar * a_gpu).get() @@ -113,7 +113,7 @@ def test_multiply_array(ctx_getter): context = ctx_getter() queue = cl.CommandQueue(context) - a = numpy.array([1,2,3,4,5,6,7,8,9,10]).astype(numpy.float32) + a = np.array([1,2,3,4,5,6,7,8,9,10]).astype(np.float32) a_gpu = cl_array.to_device(queue, a) b_gpu = cl_array.to_device(queue, a) @@ -132,7 +132,7 @@ def test_addition_array(ctx_getter): context = ctx_getter() queue = cl.CommandQueue(context) - a = numpy.array([1,2,3,4,5,6,7,8,9,10]).astype(numpy.float32) + a = np.array([1,2,3,4,5,6,7,8,9,10]).astype(np.float32) a_gpu = cl_array.to_device(queue, a) a_added = (a_gpu+a_gpu).get() @@ -148,7 +148,7 @@ def test_addition_scalar(ctx_getter): context = ctx_getter() queue = cl.CommandQueue(context) - a = numpy.array([1,2,3,4,5,6,7,8,9,10]).astype(numpy.float32) + a = np.array([1,2,3,4,5,6,7,8,9,10]).astype(np.float32) a_gpu = cl_array.to_device(queue, a) a_added = (7+a_gpu).get() @@ -161,8 +161,8 @@ def test_addition_scalar(ctx_getter): def test_substract_array(ctx_getter): """Test the substraction of two arrays.""" #test data - a = numpy.array([1,2,3,4,5,6,7,8,9,10]).astype(numpy.float32) - b = numpy.array([10,20,30,40,50,60,70,80,90,100]).astype(numpy.float32) + a = np.array([1,2,3,4,5,6,7,8,9,10]).astype(np.float32) + b = np.array([10,20,30,40,50,60,70,80,90,100]).astype(np.float32) context = ctx_getter() queue = cl.CommandQueue(context) @@ -187,7 +187,7 @@ def test_substract_scalar(ctx_getter): queue = cl.CommandQueue(context) #test data - a = numpy.array([1,2,3,4,5,6,7,8,9,10]).astype(numpy.float32) + a = np.array([1,2,3,4,5,6,7,8,9,10]).astype(np.float32) #convert a to a gpu object a_gpu = cl_array.to_device(queue, a) @@ -208,7 +208,7 @@ def test_divide_scalar(ctx_getter): context = ctx_getter() queue = cl.CommandQueue(context) - a = numpy.array([1,2,3,4,5,6,7,8,9,10]).astype(numpy.float32) + a = np.array([1,2,3,4,5,6,7,8,9,10]).astype(np.float32) a_gpu = cl_array.to_device(queue, a) result = (a_gpu/2).get() @@ -228,17 +228,17 @@ def test_divide_array(ctx_getter): queue = cl.CommandQueue(context) #test data - a = numpy.array([10,20,30,40,50,60,70,80,90,100]).astype(numpy.float32) - b = numpy.array([10,10,10,10,10,10,10,10,10,10]).astype(numpy.float32) + a = np.array([10,20,30,40,50,60,70,80,90,100]).astype(np.float32) + b = np.array([10,10,10,10,10,10,10,10,10,10]).astype(np.float32) a_gpu = cl_array.to_device(queue, a) b_gpu = cl_array.to_device(queue, b) a_divide = (a_gpu/b_gpu).get() - assert (numpy.abs(a/b - a_divide) < 1e-3).all() + assert (np.abs(a/b - a_divide) < 1e-3).all() a_divide = (b_gpu/a_gpu).get() - assert (numpy.abs(b/a - a_divide) < 1e-3).all() + assert (np.abs(b/a - a_divide) < 1e-3).all() @@ -251,9 +251,9 @@ def test_random(ctx_getter): from pyopencl.clrandom import rand as clrand if has_double_support(context.devices[0]): - dtypes = [numpy.float32, numpy.float64] + dtypes = [np.float32, np.float64] else: - dtypes = [numpy.float32] + dtypes = [np.float32] for dtype in dtypes: a = clrand(context, queue, (10, 100), dtype=dtype).get() @@ -271,7 +271,7 @@ def test_nan_arithmetic(ctx_getter): def make_nan_contaminated_vector(size): shape = (size,) - a = numpy.random.randn(*shape).astype(numpy.float32) + a = np.random.randn(*shape).astype(np.float32) #for i in range(0, shape[0], 3): #a[i] = float('nan') from random import randrange @@ -290,7 +290,7 @@ def test_nan_arithmetic(ctx_getter): ab_gpu = (a_gpu*b_gpu).get() for i in range(size): - assert numpy.isnan(ab[i]) == numpy.isnan(ab_gpu[i]) + assert np.isnan(ab[i]) == np.isnan(ab_gpu[i]) @@ -302,8 +302,8 @@ def test_elwise_kernel(ctx_getter): from pyopencl.clrandom import rand as clrand - a_gpu = clrand(context, queue, (50,), numpy.float32) - b_gpu = clrand(context, queue, (50,), numpy.float32) + a_gpu = clrand(context, queue, (50,), np.float32) + b_gpu = clrand(context, queue, (50,), np.float32) from pyopencl.elementwise import ElementwiseKernel lin_comb = ElementwiseKernel(context, @@ -324,8 +324,8 @@ def test_take(ctx_getter): context = ctx_getter() queue = cl.CommandQueue(context) - idx = cl_array.arange(queue, 0, 200000, 2, dtype=numpy.uint32) - a = cl_array.arange(queue, 0, 600000, 3, dtype=numpy.float32) + idx = cl_array.arange(queue, 0, 200000, 2, dtype=np.uint32) + a = cl_array.arange(queue, 0, 600000, 3, dtype=np.float32) result = cl_array.take(a, idx) assert ((3*idx).get() == result.get()).all() @@ -338,8 +338,8 @@ def test_arange(ctx_getter): queue = cl.CommandQueue(context) n = 5000 - a = cl_array.arange(queue, n, dtype=numpy.float32) - assert (numpy.arange(n, dtype=numpy.float32) == a.get()).all() + a = cl_array.arange(queue, n, dtype=np.float32) + assert (np.arange(n, dtype=np.float32) == a.get()).all() @@ -350,7 +350,7 @@ def test_reverse(ctx_getter): queue = cl.CommandQueue(context) n = 5000 - a = numpy.arange(n).astype(numpy.float32) + a = np.arange(n).astype(np.float32) a_gpu = cl_array.to_device(queue, a) a_gpu = a_gpu.reverse() @@ -367,10 +367,10 @@ def test_sum(ctx_getter): from pyopencl.clrandom import rand as clrand - a_gpu = clrand(context, queue, (200000,), numpy.float32) + a_gpu = clrand(context, queue, (200000,), np.float32) a = a_gpu.get() - sum_a = numpy.sum(a) + sum_a = np.sum(a) sum_a_gpu = cl_array.sum(a_gpu).get() assert abs(sum_a_gpu-sum_a)/abs(sum_a) < 1e-4 @@ -386,16 +386,16 @@ def test_minmax(ctx_getter): from pyopencl.clrandom import rand as clrand if has_double_support(context.devices[0]): - dtypes = [numpy.float64, numpy.float32, numpy.int32] + dtypes = [np.float64, np.float32, np.int32] else: - dtypes = [numpy.float32, numpy.int32] + dtypes = [np.float32, np.int32] for what in ["min", "max"]: for dtype in dtypes: a_gpu = clrand(context, queue, (200000,), dtype) a = a_gpu.get() - op_a = getattr(numpy, what)(a) + op_a = getattr(np, what)(a) op_a_gpu = getattr(cl_array, what)(a_gpu).get() assert op_a_gpu == op_a, (op_a_gpu, op_a, dtype, what) @@ -415,16 +415,16 @@ def test_subset_minmax(ctx_getter): l_m = l_a - l_a // gran + 1 if has_double_support(context.devices[0]): - dtypes = [numpy.float64, numpy.float32, numpy.int32] + dtypes = [np.float64, np.float32, np.int32] else: - dtypes = [numpy.float32, numpy.int32] + dtypes = [np.float32, np.int32] for dtype in dtypes: a_gpu = clrand(context, queue, (l_a,), dtype) a = a_gpu.get() meaningful_indices_gpu = cl_array.zeros( - queue, l_m, dtype=numpy.int32) + queue, l_m, dtype=np.int32) meaningful_indices = meaningful_indices_gpu.get() j = 0 for i in range(len(meaningful_indices)): @@ -437,7 +437,7 @@ def test_subset_minmax(ctx_getter): queue, meaningful_indices) b = a[meaningful_indices] - min_a = numpy.min(b) + min_a = np.min(b) min_a_gpu = cl_array.subset_min(meaningful_indices_gpu, a_gpu).get() assert min_a_gpu == min_a @@ -451,12 +451,12 @@ def test_dot(ctx_getter): queue = cl.CommandQueue(context) from pyopencl.clrandom import rand as clrand - a_gpu = clrand(context, queue, (200000,), numpy.float32) + a_gpu = clrand(context, queue, (200000,), np.float32) a = a_gpu.get() - b_gpu = clrand(context, queue, (200000,), numpy.float32) + b_gpu = clrand(context, queue, (200000,), np.float32) b = b_gpu.get() - dot_ab = numpy.dot(a, b) + dot_ab = np.dot(a, b) dot_ab_gpu = cl_array.dot(a_gpu, b_gpu).get() @@ -492,8 +492,8 @@ def test_if_positive(ctx_getter): from pyopencl.clrandom import rand as clrand l = 20000 - a_gpu = clrand(context, queue, (l,), numpy.float32) - b_gpu = clrand(context, queue, (l,), numpy.float32) + a_gpu = clrand(context, queue, (l,), np.float32) + b_gpu = clrand(context, queue, (l,), np.float32) a = a_gpu.get() b = b_gpu.get() @@ -501,10 +501,10 @@ def test_if_positive(ctx_getter): min_a_b_gpu = cl_array.minimum(a_gpu, b_gpu) print(max_a_b_gpu) - print(numpy.maximum(a, b)) + print(np.maximum(a, b)) - assert la.norm(max_a_b_gpu.get()- numpy.maximum(a, b)) == 0 - assert la.norm(min_a_b_gpu.get()- numpy.minimum(a, b)) == 0 + assert la.norm(max_a_b_gpu.get()- np.maximum(a, b)) == 0 + assert la.norm(min_a_b_gpu.get()- np.minimum(a, b)) == 0 @pytools.test.mark_test.opencl def test_take_put(ctx_getter): @@ -514,11 +514,11 @@ def test_take_put(ctx_getter): for n in [5, 17, 333]: one_field_size = 8 buf_gpu = cl_array.zeros(queue, - n*one_field_size, dtype=numpy.float32) + n*one_field_size, dtype=np.float32) dest_indices = cl_array.to_device(queue, - numpy.array([ 0, 1, 2, 3, 32, 33, 34, 35], dtype=numpy.uint32)) + np.array([ 0, 1, 2, 3, 32, 33, 34, 35], dtype=np.uint32)) read_map = cl_array.to_device(queue, - numpy.array([7, 6, 5, 4, 3, 2, 1, 0], dtype=numpy.uint32)) + np.array([7, 6, 5, 4, 3, 2, 1, 0], dtype=np.uint32)) cl_array.multi_take_put( arrays=[buf_gpu for i in range(n)], @@ -537,20 +537,20 @@ def test_astype(ctx_getter): if not has_double_support(context.devices[0]): return - a_gpu = clrand(context, queue, (2000,), dtype=numpy.float32) + a_gpu = clrand(context, queue, (2000,), dtype=np.float32) - a = a_gpu.get().astype(numpy.float64) - a2 = a_gpu.astype(numpy.float64).get() + a = a_gpu.get().astype(np.float64) + a2 = a_gpu.astype(np.float64).get() - assert a2.dtype == numpy.float64 + assert a2.dtype == np.float64 assert la.norm(a - a2) == 0, (a, a2) - a_gpu = clrand(context, queue, (2000,), dtype=numpy.float64) + a_gpu = clrand(context, queue, (2000,), dtype=np.float64) - a = a_gpu.get().astype(numpy.float32) - a2 = a_gpu.astype(numpy.float32).get() + a = a_gpu.get().astype(np.float32) + a2 = a_gpu.astype(np.float32).get() - assert a2.dtype == numpy.float32 + assert a2.dtype == np.float32 assert la.norm(a - a2)/la.norm(a) < 1e-7 diff --git a/test/test_clmath.py b/test/test_clmath.py index 77ae77fb6372d239a752283245357d160b40dc92..b8f648de08e83a5711ed372f7ba6250a92176ec9 100644 --- a/test/test_clmath.py +++ b/test/test_clmath.py @@ -1,6 +1,6 @@ from __future__ import division import math -import numpy +import numpy as np import pytools.test def have_cl(): @@ -54,21 +54,21 @@ def make_unary_function_test(name, limits=(0, 1), threshold=0): queue = cl.CommandQueue(context) gpu_func = getattr(clmath, name) - cpu_func = getattr(numpy, numpy_func_names.get(name, name)) + cpu_func = getattr(np, numpy_func_names.get(name, name)) if has_double_support(context.devices[0]): - dtypes = [numpy.float32, numpy.float64] + dtypes = [np.float32, np.float64] else: - dtypes = [numpy.float32] + dtypes = [np.float32] for s in sizes: for dtype in dtypes: args = cl_array.arange(queue, a, b, (b-a)/s, - dtype=numpy.float32) + dtype=np.float32) gpu_results = gpu_func(args).get() cpu_results = cpu_func(args.get()) - max_err = numpy.max(numpy.abs(cpu_results - gpu_results)) + max_err = np.max(np.abs(cpu_results - gpu_results)) assert (max_err <= threshold).all(), \ (max_err, name, dtype) @@ -107,8 +107,8 @@ def test_fmod(ctx_getter): queue = cl.CommandQueue(context) for s in sizes: - a = cl_array.arange(queue, s, dtype=numpy.float32)/10 - a2 = cl_array.arange(queue, s, dtype=numpy.float32)/45.2 + 0.1 + a = cl_array.arange(queue, s, dtype=np.float32)/10 + a2 = cl_array.arange(queue, s, dtype=np.float32)/45.2 + 0.1 b = clmath.fmod(a, a2) a = a.get() @@ -124,8 +124,8 @@ def test_ldexp(ctx_getter): queue = cl.CommandQueue(context) for s in sizes: - a = cl_array.arange(queue, s, dtype=numpy.float32) - a2 = cl_array.arange(queue, s, dtype=numpy.float32)*1e-3 + a = cl_array.arange(queue, s, dtype=np.float32) + a2 = cl_array.arange(queue, s, dtype=np.float32)*1e-3 b = clmath.ldexp(a,a2) a = a.get() @@ -141,7 +141,7 @@ def test_modf(ctx_getter): queue = cl.CommandQueue(context) for s in sizes: - a = cl_array.arange(queue, s, dtype=numpy.float32)/10 + a = cl_array.arange(queue, s, dtype=np.float32)/10 fracpart, intpart = clmath.modf(a) a = a.get() @@ -160,7 +160,7 @@ def test_frexp(ctx_getter): queue = cl.CommandQueue(context) for s in sizes: - a = cl_array.arange(queue, s, dtype=numpy.float32)/10 + a = cl_array.arange(queue, s, dtype=np.float32)/10 significands, exponents = clmath.frexp(a) a = a.get() diff --git a/test/test_wrapper.py b/test/test_wrapper.py index d312260a52049f5c2ca3e1f111d14807878a6ef9..3b334cb9f0ddfe927acfd695bb6309823c68649d 100644 --- a/test/test_wrapper.py +++ b/test/test_wrapper.py @@ -1,5 +1,5 @@ from __future__ import division -import numpy +import numpy as np import numpy.linalg as la import pytools.test @@ -17,6 +17,7 @@ def have_cl(): if have_cl(): import pyopencl as cl + import pyopencl.array as cl_array from pyopencl.tools import pytest_generate_tests_for_pyopencl \ as pytest_generate_tests @@ -203,7 +204,7 @@ class TestCL: { a[get_global_id(0)] *= (b+c); } """).build() - a = numpy.random.rand(50000) + a = np.random.rand(50000) queue = cl.CommandQueue(context) mf = cl.mem_flags a_buf = cl.Buffer(context, mf.READ_WRITE | mf.COPY_HOST_PTR, hostbuf=a) @@ -220,9 +221,9 @@ class TestCL: except cl.LogicError: pass - prg.mult(queue, a.shape, None, a_buf, numpy.float32(2), numpy.int32(3)) + prg.mult(queue, a.shape, None, a_buf, np.float32(2), np.int32(3)) - a_result = numpy.empty_like(a) + a_result = np.empty_like(a) cl.enqueue_read_buffer(queue, a_buf, a_result).wait() @pytools.test.mark_test.opencl @@ -253,7 +254,7 @@ class TestCL: } """).build() - a = numpy.random.rand(1024, 1024, 4).astype(numpy.float32) + a = np.random.rand(1024, 1024, 4).astype(np.float32) queue = cl.CommandQueue(context) mf = cl.mem_flags a_img = cl.Image(context, mf.READ_ONLY | mf.COPY_HOST_PTR, @@ -264,9 +265,9 @@ class TestCL: samp = cl.Sampler(context, False, cl.addressing_mode.CLAMP, cl.filter_mode.NEAREST) - prg.copy_image(queue, a.shape, None, a_dest, a_img, samp, numpy.int32(a.shape[0])) + prg.copy_image(queue, a.shape, None, a_dest, a_img, samp, np.int32(a.shape[0])) - a_result = numpy.empty_like(a) + a_result = np.empty_like(a) cl.enqueue_read_buffer(queue, a_dest, a_result, is_blocking=True) print(a_result.dtype) @@ -279,8 +280,8 @@ class TestCL: queue = cl.CommandQueue(context) mf = cl.mem_flags - a = numpy.random.rand(50000).astype(numpy.float32) - b = numpy.empty_like(a) + a = np.random.rand(50000).astype(np.float32) + b = np.empty_like(a) buf1 = cl.Buffer(context, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=a) buf2 = cl.Buffer(context, mf.WRITE_ONLY, b.nbytes) @@ -324,9 +325,26 @@ class TestCL: assert MemoryPool.bin_number(asize) == bin_nr, s assert asize < asize*(1+1/8) + @pytools.test.mark_test.opencl + def test_vector_args(self, ctx_getter): + context = ctx_getter() + queue = cl.CommandQueue(context) + + prg = cl.Program(context, """ + __kernel void set_vec(float4 x, __global float4 *dest) + { dest[get_global_id(0)] = x; } + """).build() + + x = cl_array.vec.make_float4(1,2,3,4) + dest = np.empty(50000, cl_array.vec.float4) + mf = cl.mem_flags + dest_buf = cl.Buffer(context, mf.READ_WRITE | mf.COPY_HOST_PTR, hostbuf=dest) + prg.set_vec(queue, dest.shape, None, x, dest_buf) + cl.enqueue_read_buffer(queue, dest_buf, dest).wait() + assert (dest == x).all()