diff --git a/doc/source/array.rst b/doc/source/array.rst index 6b5957afbc5b8d12e67f81d1153cced99adc0af1..c24304a7ba56001608f556ecfa7cbe8d6a6845b7 100644 --- a/doc/source/array.rst +++ b/doc/source/array.rst @@ -9,18 +9,28 @@ The :class:`Array` Class .. method:: __call__(self, size) -.. class:: Array(context, shape, dtype, order="C", allocator=None, base=None, data=None, queue=None) +.. class:: Array(cqa, shape, dtype, order="C", allocator=None, base=None, data=None, queue=None) A :class:`numpy.ndarray` work-alike that stores its data and performs its computations on the compute device. *shape* and *dtype* work exactly as in :mod:`numpy`. Arithmetic methods in :class:`Array` support the - broadcasting of scalars. (e.g. `array+5`) If the + broadcasting of scalars. (e.g. `array+5`) + + *cqa* can be a :class:`pyopencl.Context`, :class:`pyopencl.CommandQueue` + or an allocator, as described below. If it is either of the latter two, the *queue* + or *allocator* arguments may not be passed. + + *queue* (or *cqa*, as the case may be) specifies the queue in which the array + carries out its computations by default. *allocator* is a callable that, upon being called with an argument of the number of bytes to be allocated, returns an object that can be cast to an :class:`int` representing the address of the newly allocated memory. (See :class:`DefaultAllocator`.) + .. versionchanged:: 2011.1 + Renamed *context* to *cqa*, made it general-purpose. + .. attribute :: data The :class:`pyopencl.MemoryObject` instance created for the memory that backs @@ -102,22 +112,28 @@ The :class:`Array` Class Constructing :class:`Array` Instances ---------------------------------------- -.. function:: to_device(context, queue, ary, allocator=None, async=False) +.. function:: to_device(queue, ary, allocator=None, async=False) Return a :class:`Array` that is an exact copy of the :class:`numpy.ndarray` instance *ary*. See :class:`Array` for the meaning of *allocator*. + .. versionchanged:: 2011.1 + *context* argument was deprecated. + .. function:: empty(context, shape, dtype, order="C", allocator=None, base=None, data=None, queue=None) A synonym for the :class:`Array` constructor. -.. function:: zeros(context, queue, shape, dtype, order="C", allocator=None) +.. function:: zeros(queue, shape, dtype, order="C", allocator=None) Same as :func:`empty`, but the :class:`Array` is zero-initialized before being returned. + .. versionchanged:: 2011.1 + *context* argument was deprecated. + .. function:: empty_like(other_ary) Make a new, uninitialized :class:`Array` having the same properties @@ -128,7 +144,7 @@ Constructing :class:`Array` Instances Make a new, zero-initialized :class:`Array` having the same properties as *other_ary*. -.. function:: arange(context, queue, start, stop, step, dtype=None) +.. function:: arange(queue, start, stop, step, dtype=None) Create a :class:`Array` filled with numbers spaced `step` apart, starting from `start` and ending at `stop`. @@ -140,6 +156,9 @@ Constructing :class:`Array` Instances *dtype*, if not specified, is taken as the largest common type of *start*, *stop* and *step*. + .. versionchanged:: 2011.1 + *context* argument was deprecated. + .. function:: take(a, indices, out=None, queue=None) Return the :class:`Array` ``[a[indices[0]], ..., a[indices[n]]]``. diff --git a/doc/source/misc.rst b/doc/source/misc.rst index ab349c4c8bddae5f9b430340f7d6857883fb0b58..b2ff45a80facb4227d5272e724157f4cbe47c412 100644 --- a/doc/source/misc.rst +++ b/doc/source/misc.rst @@ -87,6 +87,11 @@ Version 2011.1 * Add :mod:`pyopencl.reduction`. * Add :ref:`reductions`. * Add :meth:`MemoryObject.get_host_array`. +* Deprecate context arguments of + :func:`pyopencl.array.to_device`, + :func:`pyopencl.array.zeros`, + :func:`pyopencl.array.arange`. +* Make construction of :class:`pyopencl.array.Array` more flexible (*cqa* argument.) Version 0.92 ------------ diff --git a/examples/demo_elementwise.py b/examples/demo_elementwise.py index cf89121a7e08d3276a9e8d044aa2b6232503194e..a64616baba08f21550c88263e1a813ec2a23b6c0 100644 --- a/examples/demo_elementwise.py +++ b/examples/demo_elementwise.py @@ -7,9 +7,9 @@ queue = cl.CommandQueue(ctx) n = 10 a_gpu = cl_array.to_device( - ctx, queue, numpy.random.randn(n).astype(numpy.float32)) + queue, numpy.random.randn(n).astype(numpy.float32)) b_gpu = cl_array.to_device( - ctx, queue, numpy.random.randn(n).astype(numpy.float32)) + queue, numpy.random.randn(n).astype(numpy.float32)) from pyopencl.elementwise import ElementwiseKernel lin_comb = ElementwiseKernel(ctx, diff --git a/pyopencl/array.py b/pyopencl/array.py index efe3700a7352d6cfef2ec95725be34354005304e..f3ae7c3f3a3ab38a90a34f9ba4211b3f4bd5f6c2 100644 --- a/pyopencl/array.py +++ b/pyopencl/array.py @@ -123,6 +123,15 @@ class DefaultAllocator: +def _should_be_cqa(what): + from warnings import warn + warn("'%s' should be specified as the frst" + "('cqa') argument, " + "not in the '%s' keyword argument. " + "This will be continue to be accepted througout " + "versions 2011.x of PyOpenCL." % (what, what), + DeprecationWarning, 3) + class Array(object): """A :mod:`pyopencl` Array is used to do array-based calculation on a compute device. @@ -131,10 +140,40 @@ class Array(object): work on an element-by-element basis, just like :class:`numpy.ndarray`. """ - def __init__(self, context, shape, dtype, order="C", allocator=None, + def __init__(self, cqa, shape, dtype, order="C", allocator=None, base=None, data=None, queue=None): - if allocator is None: - allocator = DefaultAllocator(context) + # {{{ backward compatibility for pre-cqa days + + if isinstance(cqa, cl.CommandQueue): + if queue is not None: + raise TypeError("can't specify queue in 'cqa' and " + "'queue' arguments") + queue = cqa + + if allocator is None: + context = queue.context + allocator = DefaultAllocator(context) + + elif isinstance(cqa, cl.Context): + if queue is not None: + _should_be_cqa("queue") + + if allocator is not None: + _should_be_cqa("allocator") + else: + allocator = DefaultAllocator(cqa) + + else: + # cqa is assumed to be an allocator + if allocator is not None: + raise TypeError("can't specify allocator in 'cqa' and " + "'allocator' arguments") + + allocator = cqa + + # }}} + + # invariant here: allocator, queue set try: s = 1 @@ -147,7 +186,6 @@ class Array(object): s = shape shape = (shape,) - self.context = context self.queue = queue self.shape = shape @@ -173,6 +211,8 @@ class Array(object): self.base = base + self.context = self.data.context + #@memoize_method FIXME: reenable def get_sizes(self, queue): return splay(queue, self.mem_size) @@ -294,9 +334,13 @@ class Array(object): def _new_like_me(self, dtype=None, queue=None): if dtype is None: dtype = self.dtype - return self.__class__(self.context, - self.shape, dtype, allocator=self.allocator, - queue=queue or self.queue) + queue = queue or self.queue + if queue is not None: + return self.__class__(queue, self.shape, dtype, allocator=self.allocator) + elif self.allocator is not None: + return self.__class__(self.allocator, self.shape, dtype) + else: + return self.__class__(self.context, self.shape, dtype) # operators --------------------------------------------------------------- def mul_add(self, selffac, other, otherfac, queue=None): @@ -507,8 +551,7 @@ class Array(object): -def to_device(context, queue, ary, allocator=None, async=False): - """Converts a numpy array to a :class:`Array`.""" +def _to_device(queue, ary, allocator=None, async=False): if ary.flags.f_contiguous: order = "F" elif ary.flags.c_contiguous: @@ -517,48 +560,67 @@ def to_device(context, queue, ary, allocator=None, async=False): raise ValueError("to_device only works on C- or Fortran-" "contiguous arrays") - result = Array(context, ary.shape, ary.dtype, order, allocator, - queue=queue) + result = Array(queue, ary.shape, ary.dtype, order, allocator) result.set(ary, async=async) return result +def to_device(*args, **kwargs): + """Converts a numpy array to a :class:`Array`.""" + + if isinstance(args[0], cl.Context): + from warnings import warn + warn("Passing a context as first argument is deprecated. " + "This will be continue to be accepted througout " + "versions 2011.x of PyOpenCL.", + DeprecationWarning, 2) + args = args[1:] + + return _to_device(*args, **kwargs) -empty = Array -def zeros(context, queue, shape, dtype, order="C", allocator=None): - """Returns an array of the given shape and dtype filled with 0's.""" - result = Array(context, shape, dtype, - order=order, allocator=allocator, queue=queue) + +empty = Array + +def _zeros(queue, shape, dtype, order="C", allocator=None): + result = Array(queue, shape, dtype, + order=order, allocator=allocator) result.fill(0) return result +def zeros(*args, **kwargs): + """Returns an array of the given shape and dtype filled with 0's.""" + + if isinstance(args[0], cl.Context): + from warnings import warn + warn("Passing a context as first argument is deprecated. " + "This will be continue to be accepted througout " + "versions 2011.x of PyOpenCL.", + DeprecationWarning, 2) + args = args[1:] + + return _zeros(*args, **kwargs) + def empty_like(ary): - result = Array(ary.context, - ary.shape, ary.dtype, allocator=ary.allocator, queue=ary.queue) - return result + if ary.queue is not None: + return Array(ary.queue, ary.shape, ary.dtype, allocator=ary.allocator) + elif ary.allocator is not None: + return Array(ary.allocator, ary.shape, ary.dtype, queue=ary.queue) + else: + return Array(ary.context, ary.shape, ary.dtype) def zeros_like(ary): - result = Array(ary.context, - ary.shape, ary.dtype, allocator=ary.allocator, queue=ary.queue) + result = empty_like(ary) result.fill(0) return result @elwise_kernel_runner -def _arange(result, start, step): +def _arange_knl(result, start, step): return elementwise.get_arange_kernel( result.context, result.dtype) -def arange(context, queue, *args, **kwargs): - """Create an array filled with numbers spaced `step` apart, - starting from `start` and ending at `stop`. - - For floating point arguments, the length of the result is - `ceil((stop - start)/step)`. This rule may result in the last - element of the result being greater than stop. - """ - +def _arange(queue, *args, **kwargs): # argument processing ----------------------------------------------------- # Yuck. Thanks, numpy developers. ;) @@ -625,13 +687,36 @@ def arange(context, queue, *args, **kwargs): from math import ceil size = int(ceil((stop-start)/step)) - result = Array(context, (size,), dtype, queue=queue) - _arange(result, start, step, queue=queue) + result = Array(queue, (size,), dtype) + _arange_knl(result, start, step, queue=queue) return result +def arange(*args, **kwargs): + """Create an array filled with numbers spaced `step` apart, + starting from `start` and ending at `stop`. + + For floating point arguments, the length of the result is + `ceil((stop - start)/step)`. This rule may result in the last + element of the result being greater than stop. + """ + + if isinstance(args[0], cl.Context): + from warnings import warn + warn("Passing a context as first argument is deprecated. " + "This will be continue to be accepted througout " + "versions 2011.x of PyOpenCL.", + DeprecationWarning, 2) + args = args[1:] + + return _arange(*args, **kwargs) + + + + + @elwise_kernel_runner def _take(result, ary, indices): return elementwise.get_take_kernel( @@ -641,10 +726,9 @@ def _take(result, ary, indices): def take(a, indices, out=None, queue=None): + queue = queue or a.queue if out is None: - out = Array(a.context, indices.shape, a.dtype, - allocator=a.allocator, - queue=queue or a.queue) + out = Array(queue, indices.shape, a.dtype, allocator=a.allocator) assert len(indices.shape) == 1 _take(out, a, indices, queue=queue) @@ -718,8 +802,7 @@ def multi_take_put(arrays, dest_indices, src_indices, dest_shape=None, vec_count = len(arrays) if out is None: - out = [Array(context, dest_shape, a_dtype, - allocator=a_allocator, queue=queue) + out = [Array(queue, dest_shape, a_dtype, allocator=a_allocator) for i in range(vec_count)] else: if a_dtype != single_valued(o.dtype for o in out): diff --git a/pyopencl/clrandom.py b/pyopencl/clrandom.py index b76b15a146597198316bb3892f5e4aa20bfc8bd7..cdd53163e88745f8c8d858f357e78a3bef7c1310 100644 --- a/pyopencl/clrandom.py +++ b/pyopencl/clrandom.py @@ -244,7 +244,7 @@ def fill_rand(result): def rand(context, queue, shape, dtype): from pyopencl.array import Array - result = Array(context, shape, dtype, queue=queue) + result = Array(queue, shape, dtype) _rand(result, numpy.random.randint(2**31-1)) return result diff --git a/pyopencl/reduction.py b/pyopencl/reduction.py index 7611f9e740a12e2f99a9794787c0d1f85fabb324..db894764f2a89372b8d5825326ffabe67ed06b68 100644 --- a/pyopencl/reduction.py +++ b/pyopencl/reduction.py @@ -327,15 +327,13 @@ class ReductionKernel: seq_count = (sz + macrogroup_size - 1) // macrogroup_size if group_count == 1: - result = empty(stage_inf.context, + result = empty(use_queue, (), self.dtype_out, - allocator=repr_vec.allocator, - queue=use_queue) + allocator=repr_vec.allocator) else: - result = empty(stage_inf.context, + result = empty(use_queue, (group_count,), self.dtype_out, - allocator=repr_vec.allocator, - queue=use_queue) + allocator=repr_vec.allocator) #print group_count, seq_count, stage_inf.group_size stage_inf.kernel( diff --git a/test/test_array.py b/test/test_array.py index f0bb10ae6ca7e536010b53162274947a593b9afc..863530c2bf213e9c585e084b2b295a3bd9abe87a 100644 --- a/test/test_array.py +++ b/test/test_array.py @@ -30,7 +30,7 @@ def test_pow_array(ctx_getter): queue = cl.CommandQueue(context) a = numpy.array([1,2,3,4,5]).astype(numpy.float32) - a_gpu = cl_array.to_device(context, queue, a) + a_gpu = cl_array.to_device(queue, a) result = pow(a_gpu,a_gpu).get() assert (numpy.abs(a**a - result) < 1e-3).all() @@ -47,7 +47,7 @@ def test_pow_number(ctx_getter): queue = cl.CommandQueue(context) a = numpy.array([1,2,3,4,5,6,7,8,9,10]).astype(numpy.float32) - a_gpu = cl_array.to_device(context, queue, a) + a_gpu = cl_array.to_device(queue, a) result = pow(a_gpu, 2).get() assert (numpy.abs(a**2 - result) < 1e-3).all() @@ -59,7 +59,7 @@ def test_abs(ctx_getter): context = ctx_getter() queue = cl.CommandQueue(context) - a = -cl_array.arange(context, queue, 111, dtype=numpy.float32) + a = -cl_array.arange(queue, 111, dtype=numpy.float32) res = a.get() for i in range(111): @@ -80,7 +80,7 @@ def test_len(ctx_getter): queue = cl.CommandQueue(context) a = numpy.array([1,2,3,4,5,6,7,8,9,10]).astype(numpy.float32) - a_cpu = cl_array.to_device(context, queue, a) + a_cpu = cl_array.to_device(queue, a) assert len(a_cpu) == 10 @@ -101,7 +101,7 @@ def test_multiply(ctx_getter): ]: for scalar in scalars: a = numpy.arange(sz).astype(dtype) - a_gpu = cl_array.to_device(context, queue, a) + a_gpu = cl_array.to_device(queue, a) a_doubled = (scalar * a_gpu).get() assert (a * scalar == a_doubled).all() @@ -115,8 +115,8 @@ def test_multiply_array(ctx_getter): a = numpy.array([1,2,3,4,5,6,7,8,9,10]).astype(numpy.float32) - a_gpu = cl_array.to_device(context, queue, a) - b_gpu = cl_array.to_device(context, queue, a) + a_gpu = cl_array.to_device(queue, a) + b_gpu = cl_array.to_device(queue, a) a_squared = (b_gpu*a_gpu).get() @@ -133,7 +133,7 @@ def test_addition_array(ctx_getter): queue = cl.CommandQueue(context) a = numpy.array([1,2,3,4,5,6,7,8,9,10]).astype(numpy.float32) - a_gpu = cl_array.to_device(context, queue, a) + a_gpu = cl_array.to_device(queue, a) a_added = (a_gpu+a_gpu).get() assert (a+a == a_added).all() @@ -149,7 +149,7 @@ def test_addition_scalar(ctx_getter): queue = cl.CommandQueue(context) a = numpy.array([1,2,3,4,5,6,7,8,9,10]).astype(numpy.float32) - a_gpu = cl_array.to_device(context, queue, a) + a_gpu = cl_array.to_device(queue, a) a_added = (7+a_gpu).get() assert (7+a == a_added).all() @@ -167,8 +167,8 @@ def test_substract_array(ctx_getter): context = ctx_getter() queue = cl.CommandQueue(context) - a_gpu = cl_array.to_device(context, queue, a) - b_gpu = cl_array.to_device(context, queue, b) + a_gpu = cl_array.to_device(queue, a) + b_gpu = cl_array.to_device(queue, b) result = (a_gpu-b_gpu).get() assert (a-b == result).all() @@ -190,7 +190,7 @@ def test_substract_scalar(ctx_getter): a = numpy.array([1,2,3,4,5,6,7,8,9,10]).astype(numpy.float32) #convert a to a gpu object - a_gpu = cl_array.to_device(context, queue, a) + a_gpu = cl_array.to_device(queue, a) result = (a_gpu-7).get() assert (a-7 == result).all() @@ -209,7 +209,7 @@ def test_divide_scalar(ctx_getter): queue = cl.CommandQueue(context) a = numpy.array([1,2,3,4,5,6,7,8,9,10]).astype(numpy.float32) - a_gpu = cl_array.to_device(context, queue, a) + a_gpu = cl_array.to_device(queue, a) result = (a_gpu/2).get() assert (a/2 == result).all() @@ -231,8 +231,8 @@ def test_divide_array(ctx_getter): a = numpy.array([10,20,30,40,50,60,70,80,90,100]).astype(numpy.float32) b = numpy.array([10,10,10,10,10,10,10,10,10,10]).astype(numpy.float32) - a_gpu = cl_array.to_device(context, queue, a) - b_gpu = cl_array.to_device(context, queue, b) + a_gpu = cl_array.to_device(queue, a) + b_gpu = cl_array.to_device(queue, b) a_divide = (a_gpu/b_gpu).get() assert (numpy.abs(a/b - a_divide) < 1e-3).all() @@ -282,9 +282,9 @@ def test_nan_arithmetic(ctx_getter): size = 1 << 20 a = make_nan_contaminated_vector(size) - a_gpu = cl_array.to_device(context, queue, a) + a_gpu = cl_array.to_device(queue, a) b = make_nan_contaminated_vector(size) - b_gpu = cl_array.to_device(context, queue, b) + b_gpu = cl_array.to_device(queue, b) ab = a*b ab_gpu = (a_gpu*b_gpu).get() @@ -324,8 +324,8 @@ def test_take(ctx_getter): context = ctx_getter() queue = cl.CommandQueue(context) - idx = cl_array.arange(context, queue, 0, 200000, 2, dtype=numpy.uint32) - a = cl_array.arange(context, queue, 0, 600000, 3, dtype=numpy.float32) + idx = cl_array.arange(queue, 0, 200000, 2, dtype=numpy.uint32) + a = cl_array.arange(queue, 0, 600000, 3, dtype=numpy.float32) result = cl_array.take(a, idx) assert ((3*idx).get() == result.get()).all() @@ -338,7 +338,7 @@ def test_arange(ctx_getter): queue = cl.CommandQueue(context) n = 5000 - a = cl_array.arange(context, queue, n, dtype=numpy.float32) + a = cl_array.arange(queue, n, dtype=numpy.float32) assert (numpy.arange(n, dtype=numpy.float32) == a.get()).all() @@ -351,7 +351,7 @@ def test_reverse(ctx_getter): n = 5000 a = numpy.arange(n).astype(numpy.float32) - a_gpu = cl_array.to_device(context, queue, a) + a_gpu = cl_array.to_device(queue, a) a_gpu = a_gpu.reverse() @@ -424,7 +424,7 @@ def test_subset_minmax(ctx_getter): a = a_gpu.get() meaningful_indices_gpu = cl_array.zeros( - context, queue, l_m, dtype=numpy.int32) + queue, l_m, dtype=numpy.int32) meaningful_indices = meaningful_indices_gpu.get() j = 0 for i in range(len(meaningful_indices)): @@ -434,7 +434,7 @@ def test_subset_minmax(ctx_getter): j = j + 1 meaningful_indices_gpu = cl_array.to_device( - context, queue, meaningful_indices) + queue, meaningful_indices) b = a[meaningful_indices] min_a = numpy.min(b) @@ -513,11 +513,11 @@ def test_take_put(ctx_getter): for n in [5, 17, 333]: one_field_size = 8 - buf_gpu = cl_array.zeros(context, queue, + buf_gpu = cl_array.zeros(queue, n*one_field_size, dtype=numpy.float32) - dest_indices = cl_array.to_device(context, queue, + dest_indices = cl_array.to_device(queue, numpy.array([ 0, 1, 2, 3, 32, 33, 34, 35], dtype=numpy.uint32)) - read_map = cl_array.to_device(context, queue, + read_map = cl_array.to_device(queue, numpy.array([7, 6, 5, 4, 3, 2, 1, 0], dtype=numpy.uint32)) cl_array.multi_take_put( diff --git a/test/test_clmath.py b/test/test_clmath.py index 9228b57b5dda590f145090a36947f119295400d1..77ae77fb6372d239a752283245357d160b40dc92 100644 --- a/test/test_clmath.py +++ b/test/test_clmath.py @@ -63,7 +63,7 @@ def make_unary_function_test(name, limits=(0, 1), threshold=0): for s in sizes: for dtype in dtypes: - args = cl_array.arange(context, queue, a, b, (b-a)/s, + args = cl_array.arange(queue, a, b, (b-a)/s, dtype=numpy.float32) gpu_results = gpu_func(args).get() cpu_results = cpu_func(args.get()) @@ -107,8 +107,8 @@ def test_fmod(ctx_getter): queue = cl.CommandQueue(context) for s in sizes: - a = cl_array.arange(context, queue, s, dtype=numpy.float32)/10 - a2 = cl_array.arange(context, queue, s, dtype=numpy.float32)/45.2 + 0.1 + a = cl_array.arange(queue, s, dtype=numpy.float32)/10 + a2 = cl_array.arange(queue, s, dtype=numpy.float32)/45.2 + 0.1 b = clmath.fmod(a, a2) a = a.get() @@ -124,8 +124,8 @@ def test_ldexp(ctx_getter): queue = cl.CommandQueue(context) for s in sizes: - a = cl_array.arange(context, queue, s, dtype=numpy.float32) - a2 = cl_array.arange(context, queue, s, dtype=numpy.float32)*1e-3 + a = cl_array.arange(queue, s, dtype=numpy.float32) + a2 = cl_array.arange(queue, s, dtype=numpy.float32)*1e-3 b = clmath.ldexp(a,a2) a = a.get() @@ -141,7 +141,7 @@ def test_modf(ctx_getter): queue = cl.CommandQueue(context) for s in sizes: - a = cl_array.arange(context, queue, s, dtype=numpy.float32)/10 + a = cl_array.arange(queue, s, dtype=numpy.float32)/10 fracpart, intpart = clmath.modf(a) a = a.get() @@ -160,7 +160,7 @@ def test_frexp(ctx_getter): queue = cl.CommandQueue(context) for s in sizes: - a = cl_array.arange(context, queue, s, dtype=numpy.float32)/10 + a = cl_array.arange(queue, s, dtype=numpy.float32)/10 significands, exponents = clmath.frexp(a) a = a.get()