From 00e3309ba0e3752d1ddfd30bb5db4dc23ae96a09 Mon Sep 17 00:00:00 2001
From: Alex Fikl <alexfikl@gmail.com>
Date: Sat, 12 Jun 2021 16:32:48 -0500
Subject: [PATCH] Remove BlockIndexRanges and friends from sumpy (#65)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* remove BlockIndexRanges and friends from sumpy

Moving these to pytential, since sumpy does not need to know about
how those indices are sectioned into blocks and other things like that.

* Adjust branches for pytential downstream CI

Co-authored-by: Andreas KlÃ¶ckner <inform@tiker.net>
---
 .github/workflows/ci.yml |   4 +-
 sumpy/p2p.py             |  45 ++++---
 sumpy/qbx.py             |  41 ++++---
 sumpy/tools.py           | 256 +--------------------------------------
 test/test_matrixgen.py   |  76 +++++-------
 5 files changed, 83 insertions(+), 339 deletions(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index e0f76370..a1da39da 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -90,8 +90,8 @@ jobs:
             run: |
                 curl -L -O https://tiker.net/ci-support-v0
                 . ./ci-support-v0
-                if [[ "$DOWNSTREAM_PROJECT" = "pytential" ]] && [[ "$GITHUB_HEAD_REF" = "derivtaker" ]]; then
-                  git clone "https://github.com/isuruf/$DOWNSTREAM_PROJECT.git" -b "$GITHUB_HEAD_REF"
+                if [[ "$DOWNSTREAM_PROJECT" = "pytential" ]] && [[ "$GITHUB_HEAD_REF" = "remove-block-index-ranges" ]]; then
+                  git clone "https://github.com/alexfikl/$DOWNSTREAM_PROJECT.git" -b "block-index-ranges"
                 else
                   git clone "https://github.com/inducer/$DOWNSTREAM_PROJECT.git"
                 fi
diff --git a/sumpy/p2p.py b/sumpy/p2p.py
index c8ec65bf..b1d07f8f 100644
--- a/sumpy/p2p.py
+++ b/sumpy/p2p.py
@@ -39,7 +39,7 @@ Particle-to-particle
 .. autoclass:: P2PBase
 .. autoclass:: P2P
 .. autoclass:: P2PMatrixGenerator
-.. autoclass:: P2PMatrixBlockGenerator
+.. autoclass:: P2PMatrixSubsetGenerator
 .. autoclass:: P2PFromCSR
 
 """
@@ -318,15 +318,18 @@ class P2PMatrixGenerator(P2PBase):
 # }}}
 
 
-# {{{ P2P matrix block writer
+# {{{ P2P matrix subset generator
 
-class P2PMatrixBlockGenerator(P2PBase):
+class P2PMatrixSubsetGenerator(P2PBase):
     """Generator for a subset of P2P interaction matrix entries.
 
+    This generator evaluates a generic set of entries in the matrix. See
+    :class:`P2PFromCSR` for when a compressed row storage format is available.
+
     .. automethod:: __call__
     """
 
-    default_name = "p2p_block"
+    default_name = "p2p_subset"
 
     def get_strength_or_not(self, isrc, kernel_idx):
         return 1
@@ -395,27 +398,21 @@ class P2PMatrixBlockGenerator(P2PBase):
         knl = self._allow_redundant_execution_of_knl_scaling(knl)
         return knl
 
-    def __call__(self, queue, targets, sources, index_set, **kwargs):
-        """Construct a set of blocks of the full P2P interaction matrix.
-
-        The blocks are returned as one-dimensional arrays, for performance
-        and storage reasons. If the two-dimensional form is desired, it can
-        be obtained using the information in the `index_set` for a block
-        :math:`i` in the following way:
-
-        .. code-block:: python
+    def __call__(self, queue, targets, sources, tgtindices, srcindices, **kwargs):
+        """Evaluate a subset of the P2P matrix interactions.
 
-            blkranges = index_set.linear_ranges()
-            blkshape = index_set.block_shape(i)
+        :arg targets: target point coordinates, which can be an object
+            :class:`~numpy.ndarray`, :class:`list` or :class:`tuple` of
+            coordinates or a single stacked array.
+        :arg sources: source point coordinates, which can also be in any of the
+            formats of the *targets*,
 
-            block2d = result[blkranges[i]:blkranges[i + 1]].reshape(*blkshape)
+        :arg srcindices: an array of indices into *sources*.
+        :arg tgtindices: an array of indices into *targets*, of the same size
+            as *srcindices*.
 
-        :arg targets: target point coordinates.
-        :arg sources: source point coordinates.
-        :arg index_set: a :class:`sumpy.tools.MatrixBlockIndexRanges` used
-            to define the blocks.
-        :return: a tuple of one-dimensional arrays of kernel evaluations at
-            target-source pairs described by `index_set`.
+        :returns: a one-dimensional array of interactions, for each index pair
+            in (*srcindices*, *tgtindices*)
         """
         knl = self.get_cached_optimized_kernel(
                 targets_is_obj_array=is_obj_array_like(targets),
@@ -424,8 +421,8 @@ class P2PMatrixBlockGenerator(P2PBase):
         return knl(queue,
                    targets=targets,
                    sources=sources,
-                   tgtindices=index_set.linear_row_indices,
-                   srcindices=index_set.linear_col_indices, **kwargs)
+                   tgtindices=tgtindices,
+                   srcindices=srcindices, **kwargs)
 
 # }}}
 
diff --git a/sumpy/qbx.py b/sumpy/qbx.py
index c6fc4383..30d3254f 100644
--- a/sumpy/qbx.py
+++ b/sumpy/qbx.py
@@ -46,7 +46,7 @@ QBX for Layer Potentials
 .. autoclass:: LayerPotentialBase
 .. autoclass:: LayerPotential
 .. autoclass:: LayerPotentialMatrixGenerator
-.. autoclass:: LayerPotentialMatrixBlockGenerator
+.. autoclass:: LayerPotentialMatrixSubsetGenerator
 
 """
 
@@ -360,15 +360,15 @@ class LayerPotentialMatrixGenerator(LayerPotentialBase):
 # }}}
 
 
-# {{{ matrix block generator
+# {{{ matrix subset generator
 
-class LayerPotentialMatrixBlockGenerator(LayerPotentialBase):
+class LayerPotentialMatrixSubsetGenerator(LayerPotentialBase):
     """Generator for a subset of the layer potential matrix entries.
 
     .. automethod:: __call__
     """
 
-    default_name = "qbx_block"
+    default_name = "qbx_subset"
 
     def get_strength_or_not(self, isrc, kernel_idx):
         return 1
@@ -443,17 +443,28 @@ class LayerPotentialMatrixBlockGenerator(LayerPotentialBase):
         return loopy_knl
 
     def __call__(self, queue, targets, sources, centers, expansion_radii,
-                 index_set, **kwargs):
-        """
-        :arg targets: target point coordinates.
-        :arg sources: source point coordinates.
-        :arg centers: QBX target expansion centers.
+                 tgtindices, srcindices, **kwargs):
+        """Evaluate a subset of the QBX matrix interactions.
+
+        :arg targets: target point coordinates, which can be an object
+            :class:`~numpy.ndarray`, :class:`list` or :class:`tuple` of
+            coordinates or a single stacked array.
+        :arg sources: source point coordinates, which can also be in any of the
+            formats of the *targets*,
+
+        :arg centers: QBX target expansion center coordinates, which can also
+            be in any of the formats of the *targets*. The number of centers
+            must match the number of targets.
         :arg expansion_radii: radii for each expansion center.
-        :arg index_set: a :class:`sumpy.tools.MatrixBlockIndexRanges` used
-            to define the blocks.
-        :return: a tuple of one-dimensional arrays of kernel evaluations at
-            target-source pairs described by `index_set`.
+
+        :arg srcindices: an array of indices into *sources*.
+        :arg tgtindices: an array of indices into *targets*, of the same size
+            as *srcindices*.
+
+        :returns: a one-dimensional array of interactions, for each index pair
+            in (*srcindices*, *tgtindices*)
         """
+
         knl = self.get_cached_optimized_kernel(
                 targets_is_obj_array=is_obj_array_like(targets),
                 sources_is_obj_array=is_obj_array_like(sources),
@@ -464,8 +475,8 @@ class LayerPotentialMatrixBlockGenerator(LayerPotentialBase):
                    targets=targets,
                    center=centers,
                    expansion_radii=expansion_radii,
-                   tgtindices=index_set.linear_row_indices,
-                   srcindices=index_set.linear_col_indices, **kwargs)
+                   tgtindices=tgtindices,
+                   srcindices=srcindices, **kwargs)
 
 # }}}
 
diff --git a/sumpy/tools.py b/sumpy/tools.py
index 22a1f073..34d2f9a5 100644
--- a/sumpy/tools.py
+++ b/sumpy/tools.py
@@ -28,8 +28,6 @@ __doc__ = """
  Misc tools
  ==========
 
- .. autoclass:: BlockIndexRanges
- .. autoclass:: MatrixBlockIndexRanges
  .. autoclass:: ExprDerivativeTaker
  .. autoclass:: LaplaceDerivativeTaker
  .. autoclass:: RadialDerivativeTaker
@@ -37,18 +35,14 @@ __doc__ = """
  .. autoclass:: DifferentiatedExprDerivativeTaker
 """
 
-from pytools import memoize_method, memoize_in
+from pytools import memoize_method
 from pytools.tag import Tag, tag_dataclass
 from pymbolic.mapper import WalkMapper
 
 import numpy as np
 import sumpy.symbolic as sym
 
-import pyopencl as cl
-import pyopencl.array  # noqa
-
 import loopy as lp
-from loopy.version import MOST_RECENT_LANGUAGE_VERSION
 
 import logging
 logger = logging.getLogger(__name__)
@@ -616,254 +610,6 @@ class KernelComputation:
 # }}}
 
 
-# {{{
-
-
-def _to_host(x, queue=None):
-    if isinstance(x, cl.array.Array):
-        queue = queue or x.queue
-        return x.get(queue)
-    return x
-
-
-class BlockIndexRanges:
-    """Convenience class for working with blocks of a global array.
-
-    .. attribute:: indices
-
-        A list of not necessarily continuous or increasing integers
-        representing the indices of a global array. The individual blocks are
-        delimited using :attr:`ranges`.
-
-    .. attribute:: ranges
-
-        A list of nondecreasing integers used to index into :attr:`indices`.
-        A block :math:`i` can be retrieved using
-        `indices[ranges[i]:ranges[i + 1]]`.
-
-    .. automethod:: block_shape
-    .. automethod:: get
-    .. automethod:: take
-    """
-
-    def __init__(self, cl_context, indices, ranges):
-        self.cl_context = cl_context
-        self.indices = indices
-        self.ranges = ranges
-
-    @property
-    @memoize_method
-    def _ranges(self):
-        with cl.CommandQueue(self.cl_context) as queue:
-            return _to_host(self.ranges, queue=queue)
-
-    @property
-    def nblocks(self):
-        return self.ranges.shape[0] - 1
-
-    def block_shape(self, i):
-        return (self._ranges[i + 1] - self._ranges[i],)
-
-    def block_indices(self, i):
-        return self.indices[self._ranges[i]:self._ranges[i + 1]]
-
-    def get(self, queue=None):
-        return BlockIndexRanges(self.cl_context,
-                                _to_host(self.indices, queue=queue),
-                                _to_host(self.ranges, queue=queue))
-
-    def take(self, x, i):
-        """Return the subset of a global array `x` that is defined by
-        the :attr:`indices` in block :math:`i`.
-        """
-
-        return x[self.block_indices(i)]
-
-
-class MatrixBlockIndexRanges:
-    """Keep track of different ways to index into matrix blocks.
-
-    .. attribute:: row
-
-        A :class:`BlockIndexRanges` encapsulating row block indices.
-
-    .. attribute:: col
-
-        A :class:`BlockIndexRanges` encapsulating column block indices.
-
-    .. automethod:: block_shape
-    .. automethod:: block_take
-    .. automethod:: get
-    .. autoattribute:: linear_row_indices
-    .. automethod:: take
-
-    """
-
-    def __init__(self, cl_context, row, col):
-        self.cl_context = cl_context
-        self.row = row
-        self.col = col
-        assert self.row.nblocks == self.col.nblocks
-
-        self.blkranges = np.cumsum([0] + [
-            self.row.block_shape(i)[0] * self.col.block_shape(i)[0]
-            for i in range(self.row.nblocks)])
-
-        if isinstance(self.row.indices, cl.array.Array):
-            with cl.CommandQueue(self.cl_context) as queue:
-                self.blkranges = \
-                    cl.array.to_device(queue, self.blkranges).with_queue(None)
-
-    @property
-    def nblocks(self):
-        return self.row.nblocks
-
-    def block_shape(self, i):
-        return self.row.block_shape(i) + self.col.block_shape(i)
-
-    def block_indices(self, i):
-        return (self.row.block_indices(i),
-                self.col.block_indices(i))
-
-    @property
-    def linear_row_indices(self):
-        r, _ = self._linear_indices()
-        return r
-
-    @property
-    def linear_col_indices(self):
-        _, c = self._linear_indices()
-        return c
-
-    @property
-    def linear_ranges(self):
-        return self.blkranges
-
-    def get(self, queue=None):
-        """Transfer data to the host. Only the initial given data is
-        transfered, not the arrays returned by :meth:`linear_row_indices` and
-        friends.
-
-        :return: a copy of `self` in which all data lives on the host, i.e.
-                 all :class:`pyopencl.array.Array` instances are replaces by
-                 :class:`numpy.ndarray` instances.
-        """
-        return MatrixBlockIndexRanges(self.cl_context,
-                row=self.row.get(queue=queue),
-                col=self.col.get(queue=queue))
-
-    def take(self, x, i):
-        """Retrieve a block from a global matrix.
-
-        :arg x: a 2D :class:`numpy.ndarray`.
-        :arg i: block index.
-        :return: requested block from the matrix.
-        """
-
-        if isinstance(self.row.indices, cl.array.Array) or \
-                isinstance(self.col.indices, cl.array.Array):
-            raise ValueError("CL `Array`s are not supported."
-                    "Use MatrixBlockIndexRanges.get() and then view into matrices.")
-
-        irow, icol = self.block_indices(i)
-        return x[np.ix_(irow, icol)]
-
-    def block_take(self, x, i):
-        """Retrieve a block from a linear representation of the matrix blocks.
-        A linear representation of the matrix blocks can be obtained, or
-        should be consistent with
-
-        .. code-block:: python
-
-            i = index.linear_row_indices()
-            j = index.linear_col_indices()
-            linear_blks = global_mat[i, j]
-
-            for k in range(index.nblocks):
-                assert np.allclose(index.block_take(linear_blks, k),
-                                   index.take(global_mat, k))
-
-        :arg x: a 1D :class:`numpy.ndarray`.
-        :arg i: block index.
-        :return: requested block, reshaped into a 2D array.
-        """
-
-        iblk = np.s_[self.blkranges[i]:self.blkranges[i + 1]]
-        return x[iblk].reshape(*self.block_shape(i))
-
-    @memoize_method
-    def _linear_indices(self):
-        """
-        :return: a tuple of `(rowindices, colindices)` that can be
-            used to provide linear indexing into a set of matrix blocks. These
-            index arrays are just the concatenated Cartesian products of all
-            the block arrays described by :attr:`row` and :attr:`col`.
-
-            They can be used to index directly into a matrix as follows:
-
-            .. code-block:: python
-
-                mat[rowindices[blkranges[i]:blkranges[i + 1]],
-                    colindices[blkranges[i]:blkranges[i + 1]]]
-
-            The same block can be obtained more easily using
-
-            .. code-block:: python
-
-                index.view(mat, i).reshape(-1)
-        """
-
-        @memoize_in(self, "block_index_knl")
-        def _build_index():
-            loopy_knl = lp.make_kernel([
-                "{[irange]: 0 <= irange < nranges}",
-                "{[itgt, isrc]: 0 <= itgt < ntgtblock and 0 <= isrc < nsrcblock}"
-                ],
-                """
-                for irange
-                    <> ntgtblock = tgtranges[irange + 1] - tgtranges[irange]
-                    <> nsrcblock = srcranges[irange + 1] - srcranges[irange]
-
-                    for itgt, isrc
-                        <> imat = blkranges[irange] + (nsrcblock * itgt + isrc)
-
-                        rowindices[imat] = tgtindices[tgtranges[irange] + itgt] \
-                            {id_prefix=write_index}
-                        colindices[imat] = srcindices[srcranges[irange] + isrc] \
-                            {id_prefix=write_index}
-                    end
-                end
-                """,
-                [
-                    lp.GlobalArg("blkranges", None, shape="nranges + 1"),
-                    lp.GlobalArg("rowindices", None, shape="nresults"),
-                    lp.GlobalArg("colindices", None, shape="nresults"),
-                    lp.ValueArg("nresults", None),
-                    "..."
-                ],
-                name="block_index_knl",
-                default_offset=lp.auto,
-                assumptions="nranges>=1",
-                silenced_warnings="write_race(write_index*)",
-                lang_version=MOST_RECENT_LANGUAGE_VERSION)
-            loopy_knl = lp.split_iname(loopy_knl, "irange", 128, outer_tag="g.0")
-
-            return loopy_knl
-
-        with cl.CommandQueue(self.cl_context) as queue:
-            _, (rowindices, colindices) = _build_index()(queue,
-                tgtindices=self.row.indices,
-                srcindices=self.col.indices,
-                tgtranges=self.row.ranges,
-                srcranges=self.col.ranges,
-                blkranges=self.blkranges,
-                nresults=_to_host(self.blkranges[-1], queue=queue))
-            return (rowindices.with_queue(None),
-                    colindices.with_queue(None))
-
-# }}}
-
-
 # {{{ OrderedSet
 
 # Source: https://code.activestate.com/recipes/576694-orderedset/
diff --git a/test/test_matrixgen.py b/test/test_matrixgen.py
index c51fc3ce..6fd929e5 100644
--- a/test/test_matrixgen.py
+++ b/test/test_matrixgen.py
@@ -28,7 +28,6 @@ import pyopencl as cl
 import pyopencl.array  # noqa
 
 from sumpy.tools import vector_to_device
-from sumpy.tools import MatrixBlockIndexRanges
 
 import pytest
 from pyopencl.tools import (  # noqa
@@ -71,27 +70,23 @@ def _build_geometry(queue, n, mode, target_radius=1.0):
             cl.array.to_device(queue, sigma))
 
 
-def _build_block_index(queue, nnodes, nblks, factor):
-    indices = np.arange(0, nnodes)
-    ranges = np.arange(0, nnodes + 1, nnodes // nblks)
+def _build_subset_indices(queue, ntargets, nsources, factor):
+    tgtindices = np.arange(0, ntargets)
+    srcindices = np.arange(0, nsources)
 
-    if abs(factor - 1.0) < 1.0e-14:
-        ranges_ = ranges
-        indices_ = indices
+    rng = np.random.default_rng()
+    if abs(factor - 1.0) > 1.0e-14:
+        tgtindices = rng.choice(tgtindices,
+                size=int(factor * ntargets), replace=False)
+        srcindices = rng.choice(srcindices,
+                size=int(factor * nsources), replace=False)
     else:
-        indices_ = np.empty(ranges.shape[0] - 1, dtype=np.object)
-        for i in range(ranges.shape[0] - 1):
-            iidx = indices[np.s_[ranges[i]:ranges[i + 1]]]
-            indices_[i] = np.sort(np.random.choice(iidx,
-                size=int(factor * len(iidx)), replace=False))
+        rng.shuffle(tgtindices)
+        rng.shuffle(srcindices)
 
-        ranges_ = np.cumsum([0] + [r.shape[0] for r in indices_])
-        indices_ = np.hstack(indices_)
-
-    from sumpy.tools import BlockIndexRanges
-    return BlockIndexRanges(queue.context,
-                            cl.array.to_device(queue, indices_).with_queue(None),
-                            cl.array.to_device(queue, ranges_).with_queue(None))
+    return (
+            cl.array.to_device(queue, tgtindices).with_queue(None),
+            cl.array.to_device(queue, srcindices).with_queue(None))
 
 
 @pytest.mark.parametrize("factor", [1.0, 0.6])
@@ -103,7 +98,6 @@ def test_qbx_direct(ctx_factory, factor, lpot_id):
     queue = cl.CommandQueue(ctx)
 
     ndim = 2
-    nblks = 10
     order = 12
     mode_nr = 25
 
@@ -128,8 +122,8 @@ def test_qbx_direct(ctx_factory, factor, lpot_id):
     mat_gen = LayerPotentialMatrixGenerator(ctx, expansion=expn,
             source_kernels=(knl,), target_kernels=(base_knl,))
 
-    from sumpy.qbx import LayerPotentialMatrixBlockGenerator
-    blk_gen = LayerPotentialMatrixBlockGenerator(ctx, expansion=expn,
+    from sumpy.qbx import LayerPotentialMatrixSubsetGenerator
+    blk_gen = LayerPotentialMatrixSubsetGenerator(ctx, expansion=expn,
             source_kernels=(knl,), target_kernels=(base_knl,))
 
     for n in [200, 300, 400]:
@@ -138,10 +132,8 @@ def test_qbx_direct(ctx_factory, factor, lpot_id):
 
         h = 2 * np.pi / n
         strengths = (sigma * h,)
-
-        tgtindices = _build_block_index(queue, n, nblks, factor)
-        srcindices = _build_block_index(queue, n, nblks, factor)
-        index_set = MatrixBlockIndexRanges(ctx, tgtindices, srcindices)
+        tgtindices, srcindices = _build_subset_indices(queue,
+                ntargets=n, nsources=n, factor=factor)
 
         extra_kwargs = {}
         if lpot_id == 2:
@@ -170,15 +162,16 @@ def test_qbx_direct(ctx_factory, factor, lpot_id):
                 sources=sources,
                 centers=centers,
                 expansion_radii=expansion_radii,
-                index_set=index_set, **extra_kwargs)
+                tgtindices=tgtindices,
+                srcindices=srcindices, **extra_kwargs)
         blk = blk.get()
 
-        rowindices = index_set.linear_row_indices.get(queue)
-        colindices = index_set.linear_col_indices.get(queue)
+        tgtindices = tgtindices.get(queue)
+        srcindices = srcindices.get(queue)
 
         eps = 1.0e-10 * la.norm(result_lpot)
         assert la.norm(result_mat - result_lpot) < eps
-        assert la.norm(blk - mat[rowindices, colindices]) < eps
+        assert la.norm(blk - mat[tgtindices, srcindices]) < eps
 
 
 @pytest.mark.parametrize("exclude_self", [True, False])
@@ -191,7 +184,6 @@ def test_p2p_direct(ctx_factory, exclude_self, factor, lpot_id):
     queue = cl.CommandQueue(ctx)
 
     ndim = 2
-    nblks = 10
     mode_nr = 25
 
     from sumpy.kernel import LaplaceKernel, DirectionalSourceDerivative
@@ -209,8 +201,8 @@ def test_p2p_direct(ctx_factory, exclude_self, factor, lpot_id):
     from sumpy.p2p import P2PMatrixGenerator
     mat_gen = P2PMatrixGenerator(ctx, [lknl], exclude_self=exclude_self)
 
-    from sumpy.p2p import P2PMatrixBlockGenerator
-    blk_gen = P2PMatrixBlockGenerator(ctx, [lknl], exclude_self=exclude_self)
+    from sumpy.p2p import P2PMatrixSubsetGenerator
+    blk_gen = P2PMatrixSubsetGenerator(ctx, [lknl], exclude_self=exclude_self)
 
     for n in [200, 300, 400]:
         targets, sources, _, _, sigma = \
@@ -218,10 +210,8 @@ def test_p2p_direct(ctx_factory, exclude_self, factor, lpot_id):
 
         h = 2 * np.pi / n
         strengths = (sigma * h,)
-
-        tgtindices = _build_block_index(queue, n, nblks, factor)
-        srcindices = _build_block_index(queue, n, nblks, factor)
-        index_set = MatrixBlockIndexRanges(ctx, tgtindices, srcindices)
+        tgtindices, srcindices = _build_subset_indices(queue,
+                ntargets=n, nsources=n, factor=factor)
 
         extra_kwargs = {}
         if exclude_self:
@@ -247,16 +237,16 @@ def test_p2p_direct(ctx_factory, exclude_self, factor, lpot_id):
         _, (blk,) = blk_gen(queue,
                 targets=targets,
                 sources=sources,
-                index_set=index_set, **extra_kwargs)
+                tgtindices=tgtindices,
+                srcindices=srcindices, **extra_kwargs)
         blk = blk.get()
 
+        tgtindices = tgtindices.get(queue)
+        srcindices = srcindices.get(queue)
+
         eps = 1.0e-10 * la.norm(result_lpot)
         assert la.norm(result_mat - result_lpot) < eps
-
-        index_set = index_set.get(queue)
-        for i in range(index_set.nblocks):
-            assert la.norm(index_set.block_take(blk, i)
-                           - index_set.take(mat, i)) < eps
+        assert la.norm(blk - mat[tgtindices, srcindices]) < eps
 
 
 # You can test individual routines by typing
-- 
GitLab