From fc05c60624d0a50c95ae0dc9afa7c1663124904c Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner <inform@tiker.net> Date: Sun, 16 May 2021 13:32:43 -0500 Subject: [PATCH] Add a fast path to Array constructor --- pyopencl/array.py | 175 +++++++++++++++++++++++++--------------------- 1 file changed, 97 insertions(+), 78 deletions(-) diff --git a/pyopencl/array.py b/pyopencl/array.py index 2fafb659..15979e15 100644 --- a/pyopencl/array.py +++ b/pyopencl/array.py @@ -416,115 +416,134 @@ class Array: __array_priority__ = 100 def __init__(self, cq, shape, dtype, order="C", allocator=None, - data=None, offset=0, strides=None, events=None, _flags=None): - # {{{ backward compatibility + data=None, offset=0, strides=None, events=None, _flags=None, + _fast=False, _size=None, _context=None, _queue=None): + if _fast: + # Assumptions, should be disabled if not testing + if 0: + assert cq is None + assert isinstance(_context, cl.Context) + assert _queue is None or isinstance(_queue, cl.CommandQueue) + assert isinstance(shape, tuple) + assert isinstance(strides, tuple) + assert isinstance(dtype, np.dtype) + assert _size is not None + + size = _size + context = _context + queue = _queue + alloc_nbytes = dtype.itemsize * size - if isinstance(cq, cl.CommandQueue): - queue = cq - context = queue.context + else: + # {{{ backward compatibility - elif isinstance(cq, cl.Context): - context = cq - queue = None + if cq is None: + context = _context + queue = _queue - else: - raise TypeError("cq may be a queue or a context, not '%s'" - % type(cq)) + elif isinstance(cq, cl.CommandQueue): + queue = cq + context = queue.context - if allocator is not None: - # "is" would be wrong because two Python objects are allowed - # to hold handles to the same context. + elif isinstance(cq, cl.Context): + context = cq + queue = None - # FIXME It would be nice to check this. But it would require - # changing the allocator interface. Trust the user for now. + else: + raise TypeError("cq may be a queue or a context, not '%s'" + % type(cq)) - #assert allocator.context == context - pass + if allocator is not None: + # "is" would be wrong because two Python objects are allowed + # to hold handles to the same context. - # Queue-less arrays do have a purpose in life. - # They don't do very much, but at least they don't run kernels - # in random queues. - # - # See also :meth:`with_queue`. + # FIXME It would be nice to check this. But it would require + # changing the allocator interface. Trust the user for now. - del cq + #assert allocator.context == context + pass - # }}} + # Queue-less arrays do have a purpose in life. + # They don't do very much, but at least they don't run kernels + # in random queues. + # + # See also :meth:`with_queue`. - # invariant here: allocator, queue set + del cq - # {{{ determine shape, size, and strides - dtype = np.dtype(dtype) + # }}} - try: - size = 1 - for dim in shape: - size *= dim - if dim < 0: - raise ValueError("negative dimensions are not allowed") + # invariant here: allocator, queue set - except TypeError: - admissible_types = (int, np.integer) + # {{{ determine shape, size, and strides + dtype = np.dtype(dtype) - if not isinstance(shape, admissible_types): - raise TypeError("shape must either be iterable or " - "castable to an integer") - size = shape - if shape < 0: - raise ValueError("negative dimensions are not allowed") - shape = (shape,) + try: + size = 1 + for dim in shape: + size *= dim + if dim < 0: + raise ValueError("negative dimensions are not allowed") - if isinstance(size, np.integer): - size = size.item() + except TypeError: + admissible_types = (int, np.integer) - if strides is None: - if order in "cC": - # inlined from compyte.array.c_contiguous_strides - if shape: - strides = [dtype.itemsize] - for s in shape[:0:-1]: - strides.append(strides[-1]*s) - strides = tuple(strides[::-1]) + if not isinstance(shape, admissible_types): + raise TypeError("shape must either be iterable or " + "castable to an integer") + size = shape + if shape < 0: + raise ValueError("negative dimensions are not allowed") + shape = (shape,) + + if isinstance(size, np.integer): + size = size.item() + + if strides is None: + if order in "cC": + # inlined from compyte.array.c_contiguous_strides + if shape: + strides = [dtype.itemsize] + for s in shape[:0:-1]: + strides.append(strides[-1]*s) + strides = tuple(strides[::-1]) + else: + strides = () + elif order in "fF": + strides = _f_contiguous_strides(dtype.itemsize, shape) else: - strides = () - elif order in "fF": - strides = _f_contiguous_strides(dtype.itemsize, shape) + raise ValueError("invalid order: %s" % order) + else: - raise ValueError("invalid order: %s" % order) + # FIXME: We should possibly perform some plausibility + # checking on 'strides' here. - else: - # FIXME: We should possibly perform some plausibility - # checking on 'strides' here. + strides = tuple(strides) - strides = tuple(strides) + # }}} - # }}} + assert dtype != object, \ + "object arrays on the compute device are not allowed" + assert isinstance(shape, tuple) + assert isinstance(strides, tuple) - assert dtype != object, \ - "object arrays on the compute device are not allowed" - assert isinstance(shape, tuple) - assert isinstance(strides, tuple) + alloc_nbytes = dtype.itemsize * size + + if alloc_nbytes < 0: + raise ValueError("cannot allocate CL buffer with " + "negative size") self.queue = queue self.shape = shape self.dtype = dtype self.strides = strides - if events is None: - self.events = [] - else: - self.events = events - + self.events = [] if events is None else events + self.nbytes = alloc_nbytes self.size = size - alloc_nbytes = self.nbytes = self.dtype.itemsize * self.size - self.allocator = allocator if data is None: - if alloc_nbytes < 0: - raise ValueError("cannot allocate CL buffer with " - "negative size") - - elif alloc_nbytes == 0: + if alloc_nbytes == 0: self.base_data = None else: -- GitLab