From fc05c60624d0a50c95ae0dc9afa7c1663124904c Mon Sep 17 00:00:00 2001
From: Andreas Kloeckner <inform@tiker.net>
Date: Sun, 16 May 2021 13:32:43 -0500
Subject: [PATCH] Add a fast path to Array constructor

---
 pyopencl/array.py | 175 +++++++++++++++++++++++++---------------------
 1 file changed, 97 insertions(+), 78 deletions(-)

diff --git a/pyopencl/array.py b/pyopencl/array.py
index 2fafb659..15979e15 100644
--- a/pyopencl/array.py
+++ b/pyopencl/array.py
@@ -416,115 +416,134 @@ class Array:
     __array_priority__ = 100
 
     def __init__(self, cq, shape, dtype, order="C", allocator=None,
-            data=None, offset=0, strides=None, events=None, _flags=None):
-        # {{{ backward compatibility
+            data=None, offset=0, strides=None, events=None, _flags=None,
+            _fast=False, _size=None, _context=None, _queue=None):
+        if _fast:
+            # Assumptions, should be disabled if not testing
+            if 0:
+                assert cq is None
+                assert isinstance(_context, cl.Context)
+                assert _queue is None or isinstance(_queue, cl.CommandQueue)
+                assert isinstance(shape, tuple)
+                assert isinstance(strides, tuple)
+                assert isinstance(dtype, np.dtype)
+                assert _size is not None
+
+            size = _size
+            context = _context
+            queue = _queue
+            alloc_nbytes = dtype.itemsize * size
 
-        if isinstance(cq, cl.CommandQueue):
-            queue = cq
-            context = queue.context
+        else:
+            # {{{ backward compatibility
 
-        elif isinstance(cq, cl.Context):
-            context = cq
-            queue = None
+            if cq is None:
+                context = _context
+                queue = _queue
 
-        else:
-            raise TypeError("cq may be a queue or a context, not '%s'"
-                    % type(cq))
+            elif isinstance(cq, cl.CommandQueue):
+                queue = cq
+                context = queue.context
 
-        if allocator is not None:
-            # "is" would be wrong because two Python objects are allowed
-            # to hold handles to the same context.
+            elif isinstance(cq, cl.Context):
+                context = cq
+                queue = None
 
-            # FIXME It would be nice to check this. But it would require
-            # changing the allocator interface. Trust the user for now.
+            else:
+                raise TypeError("cq may be a queue or a context, not '%s'"
+                        % type(cq))
 
-            #assert allocator.context == context
-            pass
+            if allocator is not None:
+                # "is" would be wrong because two Python objects are allowed
+                # to hold handles to the same context.
 
-        # Queue-less arrays do have a purpose in life.
-        # They don't do very much, but at least they don't run kernels
-        # in random queues.
-        #
-        # See also :meth:`with_queue`.
+                # FIXME It would be nice to check this. But it would require
+                # changing the allocator interface. Trust the user for now.
 
-        del cq
+                #assert allocator.context == context
+                pass
 
-        # }}}
+            # Queue-less arrays do have a purpose in life.
+            # They don't do very much, but at least they don't run kernels
+            # in random queues.
+            #
+            # See also :meth:`with_queue`.
 
-        # invariant here: allocator, queue set
+            del cq
 
-        # {{{ determine shape, size, and strides
-        dtype = np.dtype(dtype)
+            # }}}
 
-        try:
-            size = 1
-            for dim in shape:
-                size *= dim
-                if dim < 0:
-                    raise ValueError("negative dimensions are not allowed")
+            # invariant here: allocator, queue set
 
-        except TypeError:
-            admissible_types = (int, np.integer)
+            # {{{ determine shape, size, and strides
+            dtype = np.dtype(dtype)
 
-            if not isinstance(shape, admissible_types):
-                raise TypeError("shape must either be iterable or "
-                        "castable to an integer")
-            size = shape
-            if shape < 0:
-                raise ValueError("negative dimensions are not allowed")
-            shape = (shape,)
+            try:
+                size = 1
+                for dim in shape:
+                    size *= dim
+                    if dim < 0:
+                        raise ValueError("negative dimensions are not allowed")
 
-        if isinstance(size, np.integer):
-            size = size.item()
+            except TypeError:
+                admissible_types = (int, np.integer)
 
-        if strides is None:
-            if order in "cC":
-                # inlined from compyte.array.c_contiguous_strides
-                if shape:
-                    strides = [dtype.itemsize]
-                    for s in shape[:0:-1]:
-                        strides.append(strides[-1]*s)
-                    strides = tuple(strides[::-1])
+                if not isinstance(shape, admissible_types):
+                    raise TypeError("shape must either be iterable or "
+                            "castable to an integer")
+                size = shape
+                if shape < 0:
+                    raise ValueError("negative dimensions are not allowed")
+                shape = (shape,)
+
+            if isinstance(size, np.integer):
+                size = size.item()
+
+            if strides is None:
+                if order in "cC":
+                    # inlined from compyte.array.c_contiguous_strides
+                    if shape:
+                        strides = [dtype.itemsize]
+                        for s in shape[:0:-1]:
+                            strides.append(strides[-1]*s)
+                        strides = tuple(strides[::-1])
+                    else:
+                        strides = ()
+                elif order in "fF":
+                    strides = _f_contiguous_strides(dtype.itemsize, shape)
                 else:
-                    strides = ()
-            elif order in "fF":
-                strides = _f_contiguous_strides(dtype.itemsize, shape)
+                    raise ValueError("invalid order: %s" % order)
+
             else:
-                raise ValueError("invalid order: %s" % order)
+                # FIXME: We should possibly perform some plausibility
+                # checking on 'strides' here.
 
-        else:
-            # FIXME: We should possibly perform some plausibility
-            # checking on 'strides' here.
+                strides = tuple(strides)
 
-            strides = tuple(strides)
+            # }}}
 
-        # }}}
+            assert dtype != object, \
+                    "object arrays on the compute device are not allowed"
+            assert isinstance(shape, tuple)
+            assert isinstance(strides, tuple)
 
-        assert dtype != object, \
-                "object arrays on the compute device are not allowed"
-        assert isinstance(shape, tuple)
-        assert isinstance(strides, tuple)
+            alloc_nbytes = dtype.itemsize * size
+
+            if alloc_nbytes < 0:
+                raise ValueError("cannot allocate CL buffer with "
+                        "negative size")
 
         self.queue = queue
         self.shape = shape
         self.dtype = dtype
         self.strides = strides
-        if events is None:
-            self.events = []
-        else:
-            self.events = events
-
+        self.events = [] if events is None else events
+        self.nbytes = alloc_nbytes
         self.size = size
-        alloc_nbytes = self.nbytes = self.dtype.itemsize * self.size
-
         self.allocator = allocator
 
         if data is None:
-            if alloc_nbytes < 0:
-                raise ValueError("cannot allocate CL buffer with "
-                        "negative size")
-
-            elif alloc_nbytes == 0:
+            if alloc_nbytes == 0:
                 self.base_data = None
 
             else:
-- 
GitLab