From 74b585fa4e7dd55a3b21886ffbb24300bf3e1835 Mon Sep 17 00:00:00 2001
From: Alex Fikl <alexfikl@gmail.com>
Date: Tue, 26 Jun 2018 20:31:24 -0500
Subject: [PATCH 1/9] cleaned up MatrixBlockIndex a bit.

* created a new class that holds 1D block index information.
* add a few convenience functions to hide the implementation details.
* make everything work with cl arrays and add some functions to do a
mass get() on all the arrays.
---
 sumpy/p2p.py           |  13 +--
 sumpy/qbx.py           |  11 ++-
 sumpy/tools.py         | 209 +++++++++++++++++++++++++++++------------
 test/test_matrixgen.py | 178 +++++++++++++++++++++--------------
 4 files changed, 269 insertions(+), 142 deletions(-)

diff --git a/sumpy/p2p.py b/sumpy/p2p.py
index c408360d..7e012497 100644
--- a/sumpy/p2p.py
+++ b/sumpy/p2p.py
@@ -375,10 +375,9 @@ class P2PMatrixBlockGenerator(P2PBase):
         .. code-block:: python
 
             blkranges = index_set.linear_ranges()
-            m = index_set.tgtranges[i + 1] - index_set.tgtranges[i]
-            n = index_set.srcranges[i + 1] - index_set.srcranges[i]
+            blkshape = index_set.block_shape(i)
 
-            block2d = result[blkranges[i]:blkranges[i + 1]].reshape(m, n)
+            block2d = result[blkranges[i]:blkranges[i + 1]].reshape(*blkshape)
 
         :arg targets: target point coordinates.
         :arg sources: source point coordinates.
@@ -394,9 +393,11 @@ class P2PMatrixBlockGenerator(P2PBase):
                 sources_is_obj_array=(
                     is_obj_array(sources) or isinstance(sources, (tuple, list))))
 
-        tgtindices, srcindices = index_set.linear_indices()
-        return knl(queue, targets=targets, sources=sources,
-                tgtindices=tgtindices, srcindices=srcindices, **kwargs)
+        return knl(queue,
+                   targets=targets,
+                   sources=sources,
+                   tgtindices=index_set.linear_row_indices,
+                   srcindices=index_set.linear_col_indices, **kwargs)
 
 # }}}
 
diff --git a/sumpy/qbx.py b/sumpy/qbx.py
index b67c1042..dea09e74 100644
--- a/sumpy/qbx.py
+++ b/sumpy/qbx.py
@@ -408,10 +408,13 @@ class LayerPotentialMatrixBlockGenerator(LayerPotentialBase):
 
         knl = self.get_cached_optimized_kernel()
 
-        tgtindices, srcindices = index_set.linear_indices()
-        return knl(queue, src=sources, tgt=targets, center=centers,
-                expansion_radii=expansion_radii,
-                tgtindices=tgtindices, srcindices=srcindices, **kwargs)
+        return knl(queue,
+                   src=sources,
+                   tgt=targets,
+                   center=centers,
+                   expansion_radii=expansion_radii,
+                   tgtindices=index_set.linear_row_indices,
+                   srcindices=index_set.linear_col_indices, **kwargs)
 
 # }}}
 
diff --git a/sumpy/tools.py b/sumpy/tools.py
index b80deddd..ce27a6ec 100644
--- a/sumpy/tools.py
+++ b/sumpy/tools.py
@@ -31,6 +31,9 @@ from pytools import memoize_method, memoize_in
 import numpy as np
 import sumpy.symbolic as sym
 
+import pyopencl as cl
+import pyopencl.array  # noqa
+
 import loopy as lp
 from loopy.version import MOST_RECENT_LANGUAGE_VERSION
 
@@ -300,88 +303,170 @@ class KernelComputation(object):
 
 # {{{
 
-class MatrixBlockIndex(object):
-    def __init__(self, queue, tgtindices, srcindices, tgtranges, srcranges):
-        """Keep track of different ways to index into matrix blocks.
 
-        In this description, a block is given by a subset of indices of
-        the global matrix operator. For example, the :math:`i`-th block of
-        a matrix `mat` using the input arguments is obtained by:
+def _to_host(x, queue=None):
+    if isinstance(x, cl.array.Array):
+        queue = queue or x.queue
+        return x.get(queue)
+    return x
 
-        .. code-block:: python
 
-            mat[np.ix_(tgtindices[tgtranges[i]:tgtranges[i + 1]],
-                       srcindices[srcranges[i]:srcranges[i + 1]])]
+class BlockIndex(object):
+    """Convenience class for working with block indices.
 
-        :arg tgtindices: a subset of the full matrix row index set.
-        :arg srcindices: a subset of the full matrix column index set.
-        :arg tgtranges: an array used to index into `tgtindices`.
-        :arg srcranges: an array used to index into `srcindices`.
-        """
+    .. automethod:: block_shape
+    .. automethod:: get
+    .. automethod:: view
+    """
 
-        self.queue = queue
-        self.tgtindices = tgtindices
-        self.tgtranges = tgtranges
-        self.srcindices = srcindices
-        self.srcranges = srcranges
+    def __init__(self, indices, ranges):
+        self.indices = indices
+        self.ranges = ranges
 
+    @property
+    def nblocks(self):
+        return self.ranges.shape[0] - 1
+
+    def block_shape(self, i):
+        return (self._ranges[i + 1] - self._ranges[i],)
+
+    def get(self, queue=None):
+        return BlockIndex(_to_host(self.indices, queue=queue),
+                          _to_host(self.ranges, queue=queue))
+
+    @property
     @memoize_method
+    def _ranges(self):
+        return _to_host(self.ranges)
+
+    def view(self, x, i):
+        return x[self.indices[self._ranges[i]:self._ranges[i + 1]]]
+
+
+class MatrixBlockIndex(object):
+    """Keep track of different ways to index into matrix blocks.
+
+    .. attribute:: row
+
+        A :class:`BlockIndex`.
+
+    .. attribute:: col
+
+        A :class:`BlockIndex`.
+
+    .. automethod:: block_shape
+    .. automethod:: get
+    .. automethod:: view
+    """
+
+    def __init__(self, queue, rowindices, colindices):
+        self.queue = queue
+        self.row = rowindices
+        self.col = colindices
+        assert self.row.nblocks == self.col.nblocks
+
+        self.blkranges = np.cumsum([0] + [
+            self.row.block_shape(i)[0] * self.col.block_shape(i)[0]
+            for i in range(self.row.nblocks)])
+        if self.queue:
+            self.blkranges = cl.array.to_device(self.queue, self.blkranges)
+
+    @property
+    def nblocks(self):
+        return self.row.nblocks
+
+    def block_shape(self, i):
+        return self.row.block_shape(i) + self.col.block_shape(i)
+
+    @property
+    def linear_row_indices(self):
+        r, _ = self._linear_indices()
+        return r
+
+    @property
+    def linear_col_indices(self):
+        _, c = self._linear_indices()
+        return c
+
+    @property
     def linear_ranges(self):
+        return self.blkranges
+
+    def get(self, queue=None):
+        """Transfer data to the host. Only the initial given data is
+        transfered, not the arrays returned by :meth:`linear_row_indices` and
+        friends.
+
+        :return: a copy of `self` in which all data lives on the host, i.e.
+        all :class:`pyopencl.array.Array` instances are replaces by
+        :class:`numpy.ndarray` instances.
         """
-        :return: an array containing the cummulative sum of all block sizes.
-            It can be used to index into a linear representation of the
-            matrix blocks.
+        queue = queue or self.queue
+
+        return MatrixBlockIndex(None,
+                rowindices=self.row.get(queue),
+                colindices=self.col.get(queue))
+
+    def view(self, x, i):
+        """Retrieve a block from a global matrix.
+
+        :arg x: a 2D :class:`numpy.ndarray`, i.e. a matrix.
+        :arg i: the block index.
+        :return: the requested block from the matrix.
         """
 
-        @memoize_in(self, "block_cumsum_knl")
-        def cumsum():
-            loopy_knl = lp.make_kernel(
-                "{[i, j]: 0 <= i < nranges and 0 <= j <= i}",
-                """
-                blkranges[0] = 0
-                blkranges[i + 1] = reduce(sum, j, \
-                    (srcranges[j + 1] - srcranges[j]) * \
-                    (tgtranges[j + 1] - tgtranges[j])) \
-                """,
-                [
-                    lp.GlobalArg("tgtranges", None, shape="nranges + 1"),
-                    lp.GlobalArg("srcranges", None, shape="nranges + 1"),
-                    lp.GlobalArg("blkranges", np.int32, shape="nranges + 1"),
-                    lp.ValueArg("nranges", None)
-                ],
-                name="block_cumsum_knl",
-                default_offset=lp.auto,
-                assumptions="nranges>=1",
-                lang_version=MOST_RECENT_LANGUAGE_VERSION)
+        if isinstance(x, cl.array.Array) or \
+                isinstance(self.row.indices, cl.array.Array) or \
+                isinstance(self.col.indices, cl.array.Array):
+            raise ValueError("CL `Array`s are not supported."
+                             "Use MatrixBlockIndex.get() and then view into matrices.")
 
-            loopy_knl = lp.realize_reduction(loopy_knl, force_scan=True,
-                force_outer_iname_for_scan="i")
-            return loopy_knl
+        irow = self.row.indices[self.row.ranges[i]:self.row.ranges[i + 1]]
+        icol = self.col.indices[self.col.ranges[i]:self.col.ranges[i + 1]]
+
+        return x[np.ix_(irow, icol)]
 
-        _, (blkranges,) = cumsum()(self.queue,
-            tgtranges=self.tgtranges, srcranges=self.srcranges)
+    def view_block(self, x, i):
+        """Retrieve a block from a linear representation of the matrix blocks.
+
+        :arg x: a 1D :class:`numpy.ndarray`.
+        :arg i: the block index.
+        :return: the requested block, reshaped into a 2D array. The result
+            matches, in shape and value, the block returned by
+            :meth:`view` with the global matrix.
+        """
+        if isinstance(x, cl.array.Array) or \
+                isinstance(self.blkranges, cl.array.Array):
+            raise ValueError("CL `Array`s are not supported."
+                             "Use MatrixBlockIndex.get() and then view into matrices.")
 
-        return blkranges
+        iblk = np.s_[self.blkranges[i]:self.blkranges[i + 1]]
+        return x[iblk].reshape(*self.block_shape(i))
 
     @memoize_method
-    def linear_indices(self):
+    def _linear_indices(self):
         """
-        :return: a tuple of `(rowindices, colindices)` that can be used in
-            conjunction with :func:`linear_ranges` to provide linear indexing
-            into a set of matrix blocks. These index arrays are just the
-            concatenated Cartesian products of all the block arrays described
-            by :attr:`tgtindices` and :arg:`srcindices`.
+        :return: a tuple of `(rowindices, colindices)` that can be
+            used to provide linear indexing into a set of matrix blocks. These
+            index arrays are just the concatenated Cartesian products of all
+            the block arrays described by :attr:`row` and :attr:`col`.
 
-            They can be used to index directly into the matrix as follows:
+            They can be used to index directly into a matrix as follows:
 
             .. code-block:: python
 
                 mat[rowindices[blkranges[i]:blkranges[i + 1]],
                     colindices[blkranges[i]:blkranges[i + 1]]]
+
+            The same block can be obtained more easily using
+
+            .. code-block:: python
+
+                index.view(mat, i).reshape(-1)
         """
 
         @memoize_in(self, "block_index_knl")
-        def linear_index():
+        def _build_index():
             loopy_knl = lp.make_kernel([
                 "{[irange]: 0 <= irange < nranges}",
                 "{[itgt, isrc]: 0 <= itgt < ntgtblock and 0 <= isrc < nsrcblock}"
@@ -417,11 +502,13 @@ class MatrixBlockIndex(object):
 
             return loopy_knl
 
-        blkranges = self.linear_ranges()
-        _, (rowindices, colindices) = linear_index()(self.queue,
-            tgtindices=self.tgtindices, srcindices=self.srcindices,
-            tgtranges=self.tgtranges, srcranges=self.srcranges,
-            blkranges=blkranges, nresults=blkranges[-1])
+        _, (rowindices, colindices) = _build_index()(self.queue,
+            tgtindices=self.row.indices,
+            srcindices=self.col.indices,
+            tgtranges=self.row.ranges,
+            srcranges=self.col.ranges,
+            blkranges=self.blkranges,
+            nresults=self.blkranges[-1].get(self.queue))
 
         return rowindices, colindices
 
diff --git a/test/test_matrixgen.py b/test/test_matrixgen.py
index 5fde01ed..6c9b1c36 100644
--- a/test/test_matrixgen.py
+++ b/test/test_matrixgen.py
@@ -27,9 +27,11 @@ import numpy as np
 import numpy.linalg as la
 
 import pyopencl as cl
+import pyopencl.array
 
 from pytools.obj_array import make_obj_array
 
+from sumpy.tools import vector_to_device
 from sumpy.tools import MatrixBlockIndex
 
 import pytest
@@ -48,7 +50,7 @@ else:
     faulthandler.enable()
 
 
-def _build_geometry(n, mode, target_radius=1.0):
+def _build_geometry(queue, n, mode, target_radius=1.0):
     # parametrize circle
     t = np.linspace(0.0, 2.0 * np.pi, n, endpoint=False)
     unit_circle = np.exp(1j * t)
@@ -66,10 +68,14 @@ def _build_geometry(n, mode, target_radius=1.0):
     centers = unit_circle * (1.0 - radius)
     expansion_radii = radius * np.ones(n)
 
-    return targets, sources, centers, sigma, expansion_radii
+    return (cl.array.to_device(queue, targets),
+            cl.array.to_device(queue, sources),
+            vector_to_device(queue, centers),
+            cl.array.to_device(queue, expansion_radii),
+            cl.array.to_device(queue, sigma))
 
 
-def _build_block_index(nnodes, nblks, factor):
+def _build_block_index(queue, nnodes, nblks, factor):
     indices = np.arange(0, nnodes)
     ranges = np.arange(0, nnodes + 1, nnodes // nblks)
 
@@ -86,128 +92,158 @@ def _build_block_index(nnodes, nblks, factor):
         ranges_ = np.cumsum([0] + [r.shape[0] for r in indices_])
         indices_ = np.hstack(indices_)
 
-    return indices_, ranges_
+    from sumpy.tools import BlockIndex
+    return BlockIndex(cl.array.to_device(queue, indices_),
+                      cl.array.to_device(queue, ranges_))
 
 
 @pytest.mark.parametrize('factor', [1.0, 0.6])
 @pytest.mark.parametrize('lpot_id', [1, 2])
 def test_qbx_direct(ctx_getter, factor, lpot_id):
-    # This evaluates a single layer potential on a circle.
     logging.basicConfig(level=logging.INFO)
 
     ctx = ctx_getter()
     queue = cl.CommandQueue(ctx)
 
+    ndim = 2
+    nblks = 10
+    order = 12
+    mode_nr = 25
+
     from sumpy.kernel import LaplaceKernel, DirectionalSourceDerivative
     if lpot_id == 1:
-        lknl = LaplaceKernel(2)
+        knl = LaplaceKernel(ndim)
     elif lpot_id == 2:
-        lknl = LaplaceKernel(2)
-        lknl = DirectionalSourceDerivative(lknl, dir_vec_name="dsource_vec")
+        knl = LaplaceKernel(ndim)
+        knl = DirectionalSourceDerivative(knl, dir_vec_name="dsource_vec")
     else:
         raise ValueError("unknow lpot_id")
 
-    nblks = 10
-    order = 12
-    mode_nr = 25
+    from sumpy.expansion.local import LineTaylorLocalExpansion
+    lknl = LineTaylorLocalExpansion(knl, order)
 
     from sumpy.qbx import LayerPotential
+    lpot = LayerPotential(ctx, [lknl])
+
     from sumpy.qbx import LayerPotentialMatrixGenerator
+    mat_gen = LayerPotentialMatrixGenerator(ctx, [lknl])
+
     from sumpy.qbx import LayerPotentialMatrixBlockGenerator
-    from sumpy.expansion.local import LineTaylorLocalExpansion
-    lpot = LayerPotential(ctx, [LineTaylorLocalExpansion(lknl, order)])
-    mat_gen = LayerPotentialMatrixGenerator(ctx,
-            [LineTaylorLocalExpansion(lknl, order)])
-    blk_gen = LayerPotentialMatrixBlockGenerator(ctx,
-            [LineTaylorLocalExpansion(lknl, order)])
+    blk_gen = LayerPotentialMatrixBlockGenerator(ctx, [lknl])
 
     for n in [200, 300, 400]:
-        targets, sources, centers, sigma, expansion_radii = \
-                _build_geometry(n, mode_nr)
+        targets, sources, centers, expansion_radii, sigma = \
+                _build_geometry(queue, n, mode_nr)
 
         h = 2 * np.pi / n
         strengths = (sigma * h,)
 
-        tgtindices, tgtranges = _build_block_index(n, nblks, factor)
-        srcindices, srcranges = _build_block_index(n, nblks, factor)
-        assert tgtranges.shape == srcranges.shape
+        tgtindices = _build_block_index(queue, n, nblks, factor)
+        srcindices = _build_block_index(queue, n, nblks, factor)
+        index_set = MatrixBlockIndex(queue, tgtindices, srcindices)
 
-        knl_kwargs = {}
+        extra_kwargs = {}
         if lpot_id == 2:
-            knl_kwargs["dsource_vec"] = make_obj_array([
-                np.ones(n) for _ in range(2)])
-
-        _, (mat,) = mat_gen(queue, targets, sources, centers,
-                expansion_radii, **knl_kwargs)
-        result_mat = mat.dot(strengths[0])
-
-        _, (result_lpot,) = lpot(queue, targets, sources, centers, strengths,
-                expansion_radii, **knl_kwargs)
-
-        eps = 1.0e-10 * la.norm(result_lpot)
+            extra_kwargs["dsource_vec"] = \
+                    vector_to_device(queue, np.ones((ndim, n)))
+
+        _, (result_lpot,) = lpot(queue,
+                targets=targets,
+                sources=sources,
+                centers=centers,
+                expansion_radii=expansion_radii,
+                strengths=strengths, **extra_kwargs)
+        result_lpot = result_lpot.get()
+
+        _, (mat,) = mat_gen(queue,
+                targets=targets,
+                sources=sources,
+                centers=centers,
+                expansion_radii=expansion_radii, **extra_kwargs)
+        mat = mat.get()
+        result_mat = mat.dot(strengths[0].get())
+
+        _, (blk,) = blk_gen(queue,
+                targets=targets,
+                sources=sources,
+                centers=centers,
+                expansion_radii=expansion_radii,
+                index_set=index_set, **extra_kwargs)
+        blk = blk.get()
+
+        rowindices = index_set.linear_row_indices.get(queue)
+        colindices = index_set.linear_col_indices.get(queue)
+
+        eps = 1.0e-13 * la.norm(result_lpot)
         assert la.norm(result_mat - result_lpot) < eps
+        assert la.norm(blk - mat[rowindices, colindices]) < eps
 
-        index_set = MatrixBlockIndex(queue,
-            tgtindices, srcindices, tgtranges, srcranges)
-        _, (blk,) = blk_gen(queue, targets, sources, centers, expansion_radii,
-                            index_set, **knl_kwargs)
-
-        rowindices, colindices = index_set.linear_indices()
-        eps = 1.0e-10 * la.norm(mat)
-        assert la.norm(blk - mat[rowindices, colindices].reshape(-1)) < eps
 
-
-@pytest.mark.parametrize(("exclude_self", "factor"),
-    [(True, 1.0), (True, 0.6), (False, 1.0), (False, 0.6)])
+@pytest.mark.parametrize("exclude_self", [True, False])
+@pytest.mark.parametrize("factor", [1.0, 0.6])
 def test_p2p_direct(ctx_getter, exclude_self, factor):
-    # This does a point-to-point kernel evaluation on a circle.
     logging.basicConfig(level=logging.INFO)
+
     ctx = ctx_getter()
     queue = cl.CommandQueue(ctx)
 
-    from sumpy.kernel import LaplaceKernel
-    lknl = LaplaceKernel(2)
-
+    ndim = 2
     nblks = 10
     mode_nr = 25
 
+    from sumpy.kernel import LaplaceKernel
+    lknl = LaplaceKernel(ndim)
+
     from sumpy.p2p import P2P
-    from sumpy.p2p import P2PMatrixGenerator, P2PMatrixBlockGenerator
     lpot = P2P(ctx, [lknl], exclude_self=exclude_self)
+
+    from sumpy.p2p import P2PMatrixGenerator
     mat_gen = P2PMatrixGenerator(ctx, [lknl], exclude_self=exclude_self)
+
+    from sumpy.p2p import P2PMatrixBlockGenerator
     blk_gen = P2PMatrixBlockGenerator(ctx, [lknl], exclude_self=exclude_self)
 
     for n in [200, 300, 400]:
-        targets, sources, _, sigma, _ = \
-            _build_geometry(n, mode_nr, target_radius=1.2)
+        targets, sources, _, _, sigma = \
+            _build_geometry(queue, n, mode_nr, target_radius=1.2)
 
         h = 2 * np.pi / n
         strengths = (sigma * h,)
 
-        tgtindices, tgtranges = _build_block_index(n, nblks, factor)
-        srcindices, srcranges = _build_block_index(n, nblks, factor)
-        assert tgtranges.shape == srcranges.shape
+        tgtindices = _build_block_index(queue, n, nblks, factor)
+        srcindices = _build_block_index(queue, n, nblks, factor)
+        index_set = MatrixBlockIndex(queue, tgtindices, srcindices)
 
         extra_kwargs = {}
         if exclude_self:
-            extra_kwargs["target_to_source"] = np.arange(n, dtype=np.int32)
-
-        _, (mat,) = mat_gen(queue, targets, sources, **extra_kwargs)
-        result_mat = mat.dot(strengths[0])
-
-        _, (result_lpot,) = lpot(queue, targets, sources, strengths,
-                                 **extra_kwargs)
-
-        eps = 1.0e-10 * la.norm(result_lpot)
+            extra_kwargs["target_to_source"] = \
+                cl.array.arange(queue, 0, n, dtype=np.int)
+
+        _, (result_lpot,) = lpot(queue,
+                targets=targets,
+                sources=sources,
+                strength=strengths, **extra_kwargs)
+        result_lpot = result_lpot.get()
+
+        _, (mat,) = mat_gen(queue,
+                targets=targets,
+                sources=sources, **extra_kwargs)
+        mat = mat.get()
+        result_mat = mat.dot(strengths[0].get())
+
+        _, (blk,) = blk_gen(queue,
+                targets=targets,
+                sources=sources,
+                index_set=index_set, **extra_kwargs)
+        blk = blk.get()
+
+        eps = 1.0e-13 * la.norm(result_lpot)
         assert la.norm(result_mat - result_lpot) < eps
 
-        index_set = MatrixBlockIndex(queue,
-                tgtindices, srcindices, tgtranges, srcranges)
-        _, (blk,) = blk_gen(queue, targets, sources, index_set, **extra_kwargs)
-
-        rowindices, colindices = index_set.linear_indices()
-        eps = 1.0e-10 * la.norm(mat)
-        assert la.norm(blk - mat[rowindices, colindices].reshape(-1)) < eps
+        index_set = index_set.get(queue)
+        for i in range(index_set.nblocks):
+            assert la.norm(index_set.view_block(blk, i) -
+                           index_set.view(mat, i)) < eps
 
 
 # You can test individual routines by typing
-- 
GitLab


From 7eea69df9194f1f2c28130090335fbcd4d2ebfab Mon Sep 17 00:00:00 2001
From: Alex Fikl <alexfikl@gmail.com>
Date: Wed, 27 Jun 2018 15:56:29 -0500
Subject: [PATCH 2/9] tools: fix issues with block index clases.

* renamed BlockIndex to BlockIndexRanges.
* renamed `view` to `take`, since the methods allocate and would cause
confusion.
* reduce the number of queues being passed around.
---
 sumpy/tools.py         | 118 +++++++++++++++++++++++++----------------
 test/test_matrixgen.py |  15 +++---
 2 files changed, 81 insertions(+), 52 deletions(-)

diff --git a/sumpy/tools.py b/sumpy/tools.py
index ce27a6ec..211f3130 100644
--- a/sumpy/tools.py
+++ b/sumpy/tools.py
@@ -311,15 +311,28 @@ def _to_host(x, queue=None):
     return x
 
 
-class BlockIndex(object):
+class BlockIndexRanges(object):
     """Convenience class for working with block indices.
 
+    .. attribute:: indices
+
+        A list of not necessarily continuous integers representing all the
+        indices making up a block (or range) of a global array. The individual
+        blocks are delimited using :attr:`ranges`.
+
+    .. attribute:: ranges
+
+        A list of nondecreasing integers used to index into :attr:`indices`.
+        A block (or range) :math:`i` can be retrieved using
+        `indices[ranges[i]:ranges[i + 1]]`.
+
     .. automethod:: block_shape
     .. automethod:: get
-    .. automethod:: view
+    .. automethod:: take
     """
 
-    def __init__(self, indices, ranges):
+    def __init__(self, cl_context, indices, ranges):
+        self.cl_context = cl_context
         self.indices = indices
         self.ranges = ranges
 
@@ -331,15 +344,21 @@ class BlockIndex(object):
         return (self._ranges[i + 1] - self._ranges[i],)
 
     def get(self, queue=None):
-        return BlockIndex(_to_host(self.indices, queue=queue),
-                          _to_host(self.ranges, queue=queue))
+        return BlockIndexRanges(self.cl_context,
+                                _to_host(self.indices, queue=queue),
+                                _to_host(self.ranges, queue=queue))
 
     @property
     @memoize_method
     def _ranges(self):
-        return _to_host(self.ranges)
+        with cl.CommandQueue(self.cl_context) as queue:
+            return _to_host(self.ranges, queue=queue)
+
+    def take(self, x, i):
+        """Return the subset of a global array `x` that is defined by
+        the :attr:`indices` in block :math:`i`.
+        """
 
-    def view(self, x, i):
         return x[self.indices[self._ranges[i]:self._ranges[i + 1]]]
 
 
@@ -348,28 +367,33 @@ class MatrixBlockIndex(object):
 
     .. attribute:: row
 
-        A :class:`BlockIndex`.
+        A :class:`BlockIndexRanges` encapsulating row block indices.
 
     .. attribute:: col
 
-        A :class:`BlockIndex`.
+        A :class:`BlockIndexRanges` encapsulating column block indices.
 
     .. automethod:: block_shape
+    .. automethod:: block_take
     .. automethod:: get
-    .. automethod:: view
+    .. automethod:: take
+
     """
 
-    def __init__(self, queue, rowindices, colindices):
-        self.queue = queue
-        self.row = rowindices
-        self.col = colindices
+    def __init__(self, cl_context, row, col):
+        self.cl_context = cl_context
+        self.row = row
+        self.col = col
         assert self.row.nblocks == self.col.nblocks
 
         self.blkranges = np.cumsum([0] + [
             self.row.block_shape(i)[0] * self.col.block_shape(i)[0]
             for i in range(self.row.nblocks)])
-        if self.queue:
-            self.blkranges = cl.array.to_device(self.queue, self.blkranges)
+
+        if isinstance(self.row.indices, cl.array.Array):
+            with cl.CommandQueue(self.cl_context) as queue:
+                self.blkranges = \
+                    cl.array.to_device(queue, self.blkranges).with_queue(None)
 
     @property
     def nblocks(self):
@@ -401,22 +425,19 @@ class MatrixBlockIndex(object):
         all :class:`pyopencl.array.Array` instances are replaces by
         :class:`numpy.ndarray` instances.
         """
-        queue = queue or self.queue
+        return MatrixBlockIndex(self.cl_context,
+                row=self.row.get(queue=queue),
+                col=self.col.get(queue=queue))
 
-        return MatrixBlockIndex(None,
-                rowindices=self.row.get(queue),
-                colindices=self.col.get(queue))
-
-    def view(self, x, i):
+    def take(self, x, i):
         """Retrieve a block from a global matrix.
 
-        :arg x: a 2D :class:`numpy.ndarray`, i.e. a matrix.
-        :arg i: the block index.
-        :return: the requested block from the matrix.
+        :arg x: a 2D :class:`numpy.ndarray`.
+        :arg i: block index.
+        :return: requested block from the matrix.
         """
 
-        if isinstance(x, cl.array.Array) or \
-                isinstance(self.row.indices, cl.array.Array) or \
+        if isinstance(self.row.indices, cl.array.Array) or \
                 isinstance(self.col.indices, cl.array.Array):
             raise ValueError("CL `Array`s are not supported."
                              "Use MatrixBlockIndex.get() and then view into matrices.")
@@ -426,19 +447,25 @@ class MatrixBlockIndex(object):
 
         return x[np.ix_(irow, icol)]
 
-    def view_block(self, x, i):
+    def block_take(self, x, i):
         """Retrieve a block from a linear representation of the matrix blocks.
+        A linear representation of the matrix blocks can be obtained, or
+        should be consistent with
+
+        .. code-block:: python
+
+            i = index.linear_row_indices()
+            j = index.linear_col_indices()
+            linear_blks = global_mat[i, j]
+
+            for k in range(index.nblocks):
+                assert np.allclose(index.block_take(linear_blks, k),
+                                   index.take(global_mat, k))
 
         :arg x: a 1D :class:`numpy.ndarray`.
-        :arg i: the block index.
-        :return: the requested block, reshaped into a 2D array. The result
-            matches, in shape and value, the block returned by
-            :meth:`view` with the global matrix.
+        :arg i: block index.
+        :return: requested block, reshaped into a 2D array.
         """
-        if isinstance(x, cl.array.Array) or \
-                isinstance(self.blkranges, cl.array.Array):
-            raise ValueError("CL `Array`s are not supported."
-                             "Use MatrixBlockIndex.get() and then view into matrices.")
 
         iblk = np.s_[self.blkranges[i]:self.blkranges[i + 1]]
         return x[iblk].reshape(*self.block_shape(i))
@@ -502,15 +529,16 @@ class MatrixBlockIndex(object):
 
             return loopy_knl
 
-        _, (rowindices, colindices) = _build_index()(self.queue,
-            tgtindices=self.row.indices,
-            srcindices=self.col.indices,
-            tgtranges=self.row.ranges,
-            srcranges=self.col.ranges,
-            blkranges=self.blkranges,
-            nresults=self.blkranges[-1].get(self.queue))
-
-        return rowindices, colindices
+        with cl.CommandQueue(self.cl_context) as queue:
+            _, (rowindices, colindices) = _build_index()(queue,
+                tgtindices=self.row.indices,
+                srcindices=self.col.indices,
+                tgtranges=self.row.ranges,
+                srcranges=self.col.ranges,
+                blkranges=self.blkranges,
+                nresults=_to_host(self.blkranges[-1], queue=queue))
+            return (rowindices.with_queue(None),
+                    colindices.with_queue(None))
 
 # }}}
 
diff --git a/test/test_matrixgen.py b/test/test_matrixgen.py
index 6c9b1c36..33f7e352 100644
--- a/test/test_matrixgen.py
+++ b/test/test_matrixgen.py
@@ -92,9 +92,10 @@ def _build_block_index(queue, nnodes, nblks, factor):
         ranges_ = np.cumsum([0] + [r.shape[0] for r in indices_])
         indices_ = np.hstack(indices_)
 
-    from sumpy.tools import BlockIndex
-    return BlockIndex(cl.array.to_device(queue, indices_),
-                      cl.array.to_device(queue, ranges_))
+    from sumpy.tools import BlockIndexRanges
+    return BlockIndexRanges(queue.context,
+                            cl.array.to_device(queue, indices_).with_queue(None),
+                            cl.array.to_device(queue, ranges_).with_queue(None))
 
 
 @pytest.mark.parametrize('factor', [1.0, 0.6])
@@ -140,7 +141,7 @@ def test_qbx_direct(ctx_getter, factor, lpot_id):
 
         tgtindices = _build_block_index(queue, n, nblks, factor)
         srcindices = _build_block_index(queue, n, nblks, factor)
-        index_set = MatrixBlockIndex(queue, tgtindices, srcindices)
+        index_set = MatrixBlockIndex(ctx, tgtindices, srcindices)
 
         extra_kwargs = {}
         if lpot_id == 2:
@@ -212,7 +213,7 @@ def test_p2p_direct(ctx_getter, exclude_self, factor):
 
         tgtindices = _build_block_index(queue, n, nblks, factor)
         srcindices = _build_block_index(queue, n, nblks, factor)
-        index_set = MatrixBlockIndex(queue, tgtindices, srcindices)
+        index_set = MatrixBlockIndex(ctx, tgtindices, srcindices)
 
         extra_kwargs = {}
         if exclude_self:
@@ -242,8 +243,8 @@ def test_p2p_direct(ctx_getter, exclude_self, factor):
 
         index_set = index_set.get(queue)
         for i in range(index_set.nblocks):
-            assert la.norm(index_set.view_block(blk, i) -
-                           index_set.view(mat, i)) < eps
+            assert la.norm(index_set.block_take(blk, i) -
+                           index_set.take(mat, i)) < eps
 
 
 # You can test individual routines by typing
-- 
GitLab


From 7b7d453c7bc5480e1460f6f801fc105913514fc3 Mon Sep 17 00:00:00 2001
From: Alex Fikl <alexfikl@gmail.com>
Date: Wed, 27 Jun 2018 15:59:23 -0500
Subject: [PATCH 3/9] flake8 fixes

---
 sumpy/tools.py         | 2 +-
 test/test_matrixgen.py | 4 +---
 2 files changed, 2 insertions(+), 4 deletions(-)

diff --git a/sumpy/tools.py b/sumpy/tools.py
index 211f3130..5b4c8c87 100644
--- a/sumpy/tools.py
+++ b/sumpy/tools.py
@@ -440,7 +440,7 @@ class MatrixBlockIndex(object):
         if isinstance(self.row.indices, cl.array.Array) or \
                 isinstance(self.col.indices, cl.array.Array):
             raise ValueError("CL `Array`s are not supported."
-                             "Use MatrixBlockIndex.get() and then view into matrices.")
+                    "Use MatrixBlockIndex.get() and then view into matrices.")
 
         irow = self.row.indices[self.row.ranges[i]:self.row.ranges[i + 1]]
         icol = self.col.indices[self.col.ranges[i]:self.col.ranges[i + 1]]
diff --git a/test/test_matrixgen.py b/test/test_matrixgen.py
index 33f7e352..e90a2c7a 100644
--- a/test/test_matrixgen.py
+++ b/test/test_matrixgen.py
@@ -27,9 +27,7 @@ import numpy as np
 import numpy.linalg as la
 
 import pyopencl as cl
-import pyopencl.array
-
-from pytools.obj_array import make_obj_array
+import pyopencl.array  # noqa
 
 from sumpy.tools import vector_to_device
 from sumpy.tools import MatrixBlockIndex
-- 
GitLab


From 0063d93268c65d371cfa4affb5090073c16fffc2 Mon Sep 17 00:00:00 2001
From: Alex Fikl <alexfikl@gmail.com>
Date: Wed, 27 Jun 2018 16:09:58 -0500
Subject: [PATCH 4/9] slightly improve docs

---
 sumpy/tools.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/sumpy/tools.py b/sumpy/tools.py
index 5b4c8c87..ee34abf2 100644
--- a/sumpy/tools.py
+++ b/sumpy/tools.py
@@ -312,18 +312,18 @@ def _to_host(x, queue=None):
 
 
 class BlockIndexRanges(object):
-    """Convenience class for working with block indices.
+    """Convenience class for working with blocks of a global array.
 
     .. attribute:: indices
 
-        A list of not necessarily continuous integers representing all the
-        indices making up a block (or range) of a global array. The individual
-        blocks are delimited using :attr:`ranges`.
+        A list of not necessarily continuous or increasing integers
+        representing the indices of a global array. The individual blocks are
+        delimited using :attr:`ranges`.
 
     .. attribute:: ranges
 
         A list of nondecreasing integers used to index into :attr:`indices`.
-        A block (or range) :math:`i` can be retrieved using
+        A block :math:`i` can be retrieved using
         `indices[ranges[i]:ranges[i + 1]]`.
 
     .. automethod:: block_shape
-- 
GitLab


From 8d06f92489fa08fdc481e3df3464f842caab72cc Mon Sep 17 00:00:00 2001
From: Alex Fikl <alexfikl@gmail.com>
Date: Wed, 27 Jun 2018 17:08:32 -0500
Subject: [PATCH 5/9] tests: lower some tolerance that seemed to be failing on
 titan

---
 test/test_matrixgen.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/test/test_matrixgen.py b/test/test_matrixgen.py
index e90a2c7a..e11b6693 100644
--- a/test/test_matrixgen.py
+++ b/test/test_matrixgen.py
@@ -173,7 +173,7 @@ def test_qbx_direct(ctx_getter, factor, lpot_id):
         rowindices = index_set.linear_row_indices.get(queue)
         colindices = index_set.linear_col_indices.get(queue)
 
-        eps = 1.0e-13 * la.norm(result_lpot)
+        eps = 1.0e-10 * la.norm(result_lpot)
         assert la.norm(result_mat - result_lpot) < eps
         assert la.norm(blk - mat[rowindices, colindices]) < eps
 
@@ -236,7 +236,7 @@ def test_p2p_direct(ctx_getter, exclude_self, factor):
                 index_set=index_set, **extra_kwargs)
         blk = blk.get()
 
-        eps = 1.0e-13 * la.norm(result_lpot)
+        eps = 1.0e-10 * la.norm(result_lpot)
         assert la.norm(result_mat - result_lpot) < eps
 
         index_set = index_set.get(queue)
-- 
GitLab


From 261acfb00f33a24ba0b40a61334ebbee2a59351c Mon Sep 17 00:00:00 2001
From: Alex Fikl <alexfikl@gmail.com>
Date: Wed, 27 Jun 2018 20:45:48 -0500
Subject: [PATCH 6/9] blockindex: add function to return the index for a single
 block

---
 sumpy/tools.py | 25 ++++++++++++++-----------
 1 file changed, 14 insertions(+), 11 deletions(-)

diff --git a/sumpy/tools.py b/sumpy/tools.py
index ee34abf2..64236683 100644
--- a/sumpy/tools.py
+++ b/sumpy/tools.py
@@ -336,6 +336,11 @@ class BlockIndexRanges(object):
         self.indices = indices
         self.ranges = ranges
 
+    @memoize_method
+    def _ranges(self):
+        with cl.CommandQueue(self.cl_context) as queue:
+            return _to_host(self.ranges, queue=queue)
+
     @property
     def nblocks(self):
         return self.ranges.shape[0] - 1
@@ -343,23 +348,20 @@ class BlockIndexRanges(object):
     def block_shape(self, i):
         return (self._ranges[i + 1] - self._ranges[i],)
 
+    def block_indices(self, i):
+        return self.indices[self._ranges[i]:self._ranges[i + 1]]
+
     def get(self, queue=None):
         return BlockIndexRanges(self.cl_context,
                                 _to_host(self.indices, queue=queue),
                                 _to_host(self.ranges, queue=queue))
 
-    @property
-    @memoize_method
-    def _ranges(self):
-        with cl.CommandQueue(self.cl_context) as queue:
-            return _to_host(self.ranges, queue=queue)
-
     def take(self, x, i):
         """Return the subset of a global array `x` that is defined by
         the :attr:`indices` in block :math:`i`.
         """
 
-        return x[self.indices[self._ranges[i]:self._ranges[i + 1]]]
+        return x[self.block_indices(i)]
 
 
 class MatrixBlockIndex(object):
@@ -402,6 +404,10 @@ class MatrixBlockIndex(object):
     def block_shape(self, i):
         return self.row.block_shape(i) + self.col.block_shape(i)
 
+    def block_indices(self, i):
+        return (self.row.block_indices(i),
+                self.col.block_indices(i))
+
     @property
     def linear_row_indices(self):
         r, _ = self._linear_indices()
@@ -442,10 +448,7 @@ class MatrixBlockIndex(object):
             raise ValueError("CL `Array`s are not supported."
                     "Use MatrixBlockIndex.get() and then view into matrices.")
 
-        irow = self.row.indices[self.row.ranges[i]:self.row.ranges[i + 1]]
-        icol = self.col.indices[self.col.ranges[i]:self.col.ranges[i + 1]]
-
-        return x[np.ix_(irow, icol)]
+        return x[np.ix_(*self.block_indices)]
 
     def block_take(self, x, i):
         """Retrieve a block from a linear representation of the matrix blocks.
-- 
GitLab


From 258bf9f3e99387386b2070eb65c7726b86274995 Mon Sep 17 00:00:00 2001
From: Alex Fikl <alexfikl@gmail.com>
Date: Wed, 27 Jun 2018 21:00:30 -0500
Subject: [PATCH 7/9] add back property (oops)

---
 sumpy/tools.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/sumpy/tools.py b/sumpy/tools.py
index 64236683..bf57b0fb 100644
--- a/sumpy/tools.py
+++ b/sumpy/tools.py
@@ -336,6 +336,7 @@ class BlockIndexRanges(object):
         self.indices = indices
         self.ranges = ranges
 
+    @property
     @memoize_method
     def _ranges(self):
         with cl.CommandQueue(self.cl_context) as queue:
-- 
GitLab


From 4507c0a25679643c1f56f2249f53fe7f97ae2e19 Mon Sep 17 00:00:00 2001
From: Alex Fikl <alexfikl@gmail.com>
Date: Wed, 27 Jun 2018 21:18:16 -0500
Subject: [PATCH 8/9] fix method call

---
 sumpy/tools.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/sumpy/tools.py b/sumpy/tools.py
index bf57b0fb..4a5ab86a 100644
--- a/sumpy/tools.py
+++ b/sumpy/tools.py
@@ -449,7 +449,8 @@ class MatrixBlockIndex(object):
             raise ValueError("CL `Array`s are not supported."
                     "Use MatrixBlockIndex.get() and then view into matrices.")
 
-        return x[np.ix_(*self.block_indices)]
+        irow, icol = self.block_indices(i)
+        return x[np.ix_(irow, icol)]
 
     def block_take(self, x, i):
         """Retrieve a block from a linear representation of the matrix blocks.
-- 
GitLab


From 09f27b1ac32dea6424d329c2c253f5c1fdefe2d6 Mon Sep 17 00:00:00 2001
From: Alex Fikl <alexfikl@gmail.com>
Date: Fri, 29 Jun 2018 14:09:03 -0500
Subject: [PATCH 9/9] renamed MatrixBlockIndex to MatrixBlockIndexRanges

---
 sumpy/p2p.py           | 4 ++--
 sumpy/qbx.py           | 6 +++---
 sumpy/tools.py         | 6 +++---
 test/test_matrixgen.py | 6 +++---
 4 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/sumpy/p2p.py b/sumpy/p2p.py
index 7e012497..a0e89f24 100644
--- a/sumpy/p2p.py
+++ b/sumpy/p2p.py
@@ -381,8 +381,8 @@ class P2PMatrixBlockGenerator(P2PBase):
 
         :arg targets: target point coordinates.
         :arg sources: source point coordinates.
-        :arg index_set: a :class:`sumpy.tools.MatrixBlockIndex` object used
-            to define the various blocks.
+        :arg index_set: a :class:`sumpy.tools.MatrixBlockIndexRanges` used
+            to define the blocks.
         :return: a tuple of one-dimensional arrays of kernel evaluations at
             target-source pairs described by `index_set`.
         """
diff --git a/sumpy/qbx.py b/sumpy/qbx.py
index dea09e74..a0814e01 100644
--- a/sumpy/qbx.py
+++ b/sumpy/qbx.py
@@ -400,10 +400,10 @@ class LayerPotentialMatrixBlockGenerator(LayerPotentialBase):
         :arg sources: source point coordinates.
         :arg centers: QBX target expansion centers.
         :arg expansion_radii: radii for each expansion center.
-        :arg index_set: a :class:`sumpy.tools.MatrixBlockIndex` object used
-        to define the various blocks.
+        :arg index_set: a :class:`sumpy.tools.MatrixBlockIndexRanges` used
+            to define the blocks.
         :return: a tuple of one-dimensional arrays of kernel evaluations at
-        target-source pairs described by `index_set`.
+            target-source pairs described by `index_set`.
         """
 
         knl = self.get_cached_optimized_kernel()
diff --git a/sumpy/tools.py b/sumpy/tools.py
index 4a5ab86a..469cb5e1 100644
--- a/sumpy/tools.py
+++ b/sumpy/tools.py
@@ -365,7 +365,7 @@ class BlockIndexRanges(object):
         return x[self.block_indices(i)]
 
 
-class MatrixBlockIndex(object):
+class MatrixBlockIndexRanges(object):
     """Keep track of different ways to index into matrix blocks.
 
     .. attribute:: row
@@ -432,7 +432,7 @@ class MatrixBlockIndex(object):
         all :class:`pyopencl.array.Array` instances are replaces by
         :class:`numpy.ndarray` instances.
         """
-        return MatrixBlockIndex(self.cl_context,
+        return MatrixBlockIndexRanges(self.cl_context,
                 row=self.row.get(queue=queue),
                 col=self.col.get(queue=queue))
 
@@ -447,7 +447,7 @@ class MatrixBlockIndex(object):
         if isinstance(self.row.indices, cl.array.Array) or \
                 isinstance(self.col.indices, cl.array.Array):
             raise ValueError("CL `Array`s are not supported."
-                    "Use MatrixBlockIndex.get() and then view into matrices.")
+                    "Use MatrixBlockIndexRanges.get() and then view into matrices.")
 
         irow, icol = self.block_indices(i)
         return x[np.ix_(irow, icol)]
diff --git a/test/test_matrixgen.py b/test/test_matrixgen.py
index e11b6693..2f3aabc3 100644
--- a/test/test_matrixgen.py
+++ b/test/test_matrixgen.py
@@ -30,7 +30,7 @@ import pyopencl as cl
 import pyopencl.array  # noqa
 
 from sumpy.tools import vector_to_device
-from sumpy.tools import MatrixBlockIndex
+from sumpy.tools import MatrixBlockIndexRanges
 
 import pytest
 from pyopencl.tools import (  # noqa
@@ -139,7 +139,7 @@ def test_qbx_direct(ctx_getter, factor, lpot_id):
 
         tgtindices = _build_block_index(queue, n, nblks, factor)
         srcindices = _build_block_index(queue, n, nblks, factor)
-        index_set = MatrixBlockIndex(ctx, tgtindices, srcindices)
+        index_set = MatrixBlockIndexRanges(ctx, tgtindices, srcindices)
 
         extra_kwargs = {}
         if lpot_id == 2:
@@ -211,7 +211,7 @@ def test_p2p_direct(ctx_getter, exclude_self, factor):
 
         tgtindices = _build_block_index(queue, n, nblks, factor)
         srcindices = _build_block_index(queue, n, nblks, factor)
-        index_set = MatrixBlockIndex(ctx, tgtindices, srcindices)
+        index_set = MatrixBlockIndexRanges(ctx, tgtindices, srcindices)
 
         extra_kwargs = {}
         if exclude_self:
-- 
GitLab