From 00e3309ba0e3752d1ddfd30bb5db4dc23ae96a09 Mon Sep 17 00:00:00 2001 From: Alex Fikl <alexfikl@gmail.com> Date: Sat, 12 Jun 2021 16:32:48 -0500 Subject: [PATCH] Remove BlockIndexRanges and friends from sumpy (#65) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * remove BlockIndexRanges and friends from sumpy Moving these to pytential, since sumpy does not need to know about how those indices are sectioned into blocks and other things like that. * Adjust branches for pytential downstream CI Co-authored-by: Andreas Klöckner <inform@tiker.net> --- .github/workflows/ci.yml | 4 +- sumpy/p2p.py | 45 ++++--- sumpy/qbx.py | 41 ++++--- sumpy/tools.py | 256 +-------------------------------------- test/test_matrixgen.py | 76 +++++------- 5 files changed, 83 insertions(+), 339 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index e0f76370..a1da39da 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -90,8 +90,8 @@ jobs: run: | curl -L -O https://tiker.net/ci-support-v0 . ./ci-support-v0 - if [[ "$DOWNSTREAM_PROJECT" = "pytential" ]] && [[ "$GITHUB_HEAD_REF" = "derivtaker" ]]; then - git clone "https://github.com/isuruf/$DOWNSTREAM_PROJECT.git" -b "$GITHUB_HEAD_REF" + if [[ "$DOWNSTREAM_PROJECT" = "pytential" ]] && [[ "$GITHUB_HEAD_REF" = "remove-block-index-ranges" ]]; then + git clone "https://github.com/alexfikl/$DOWNSTREAM_PROJECT.git" -b "block-index-ranges" else git clone "https://github.com/inducer/$DOWNSTREAM_PROJECT.git" fi diff --git a/sumpy/p2p.py b/sumpy/p2p.py index c8ec65bf..b1d07f8f 100644 --- a/sumpy/p2p.py +++ b/sumpy/p2p.py @@ -39,7 +39,7 @@ Particle-to-particle .. autoclass:: P2PBase .. autoclass:: P2P .. autoclass:: P2PMatrixGenerator -.. autoclass:: P2PMatrixBlockGenerator +.. autoclass:: P2PMatrixSubsetGenerator .. autoclass:: P2PFromCSR """ @@ -318,15 +318,18 @@ class P2PMatrixGenerator(P2PBase): # }}} -# {{{ P2P matrix block writer +# {{{ P2P matrix subset generator -class P2PMatrixBlockGenerator(P2PBase): +class P2PMatrixSubsetGenerator(P2PBase): """Generator for a subset of P2P interaction matrix entries. + This generator evaluates a generic set of entries in the matrix. See + :class:`P2PFromCSR` for when a compressed row storage format is available. + .. automethod:: __call__ """ - default_name = "p2p_block" + default_name = "p2p_subset" def get_strength_or_not(self, isrc, kernel_idx): return 1 @@ -395,27 +398,21 @@ class P2PMatrixBlockGenerator(P2PBase): knl = self._allow_redundant_execution_of_knl_scaling(knl) return knl - def __call__(self, queue, targets, sources, index_set, **kwargs): - """Construct a set of blocks of the full P2P interaction matrix. - - The blocks are returned as one-dimensional arrays, for performance - and storage reasons. If the two-dimensional form is desired, it can - be obtained using the information in the `index_set` for a block - :math:`i` in the following way: - - .. code-block:: python + def __call__(self, queue, targets, sources, tgtindices, srcindices, **kwargs): + """Evaluate a subset of the P2P matrix interactions. - blkranges = index_set.linear_ranges() - blkshape = index_set.block_shape(i) + :arg targets: target point coordinates, which can be an object + :class:`~numpy.ndarray`, :class:`list` or :class:`tuple` of + coordinates or a single stacked array. + :arg sources: source point coordinates, which can also be in any of the + formats of the *targets*, - block2d = result[blkranges[i]:blkranges[i + 1]].reshape(*blkshape) + :arg srcindices: an array of indices into *sources*. + :arg tgtindices: an array of indices into *targets*, of the same size + as *srcindices*. - :arg targets: target point coordinates. - :arg sources: source point coordinates. - :arg index_set: a :class:`sumpy.tools.MatrixBlockIndexRanges` used - to define the blocks. - :return: a tuple of one-dimensional arrays of kernel evaluations at - target-source pairs described by `index_set`. + :returns: a one-dimensional array of interactions, for each index pair + in (*srcindices*, *tgtindices*) """ knl = self.get_cached_optimized_kernel( targets_is_obj_array=is_obj_array_like(targets), @@ -424,8 +421,8 @@ class P2PMatrixBlockGenerator(P2PBase): return knl(queue, targets=targets, sources=sources, - tgtindices=index_set.linear_row_indices, - srcindices=index_set.linear_col_indices, **kwargs) + tgtindices=tgtindices, + srcindices=srcindices, **kwargs) # }}} diff --git a/sumpy/qbx.py b/sumpy/qbx.py index c6fc4383..30d3254f 100644 --- a/sumpy/qbx.py +++ b/sumpy/qbx.py @@ -46,7 +46,7 @@ QBX for Layer Potentials .. autoclass:: LayerPotentialBase .. autoclass:: LayerPotential .. autoclass:: LayerPotentialMatrixGenerator -.. autoclass:: LayerPotentialMatrixBlockGenerator +.. autoclass:: LayerPotentialMatrixSubsetGenerator """ @@ -360,15 +360,15 @@ class LayerPotentialMatrixGenerator(LayerPotentialBase): # }}} -# {{{ matrix block generator +# {{{ matrix subset generator -class LayerPotentialMatrixBlockGenerator(LayerPotentialBase): +class LayerPotentialMatrixSubsetGenerator(LayerPotentialBase): """Generator for a subset of the layer potential matrix entries. .. automethod:: __call__ """ - default_name = "qbx_block" + default_name = "qbx_subset" def get_strength_or_not(self, isrc, kernel_idx): return 1 @@ -443,17 +443,28 @@ class LayerPotentialMatrixBlockGenerator(LayerPotentialBase): return loopy_knl def __call__(self, queue, targets, sources, centers, expansion_radii, - index_set, **kwargs): - """ - :arg targets: target point coordinates. - :arg sources: source point coordinates. - :arg centers: QBX target expansion centers. + tgtindices, srcindices, **kwargs): + """Evaluate a subset of the QBX matrix interactions. + + :arg targets: target point coordinates, which can be an object + :class:`~numpy.ndarray`, :class:`list` or :class:`tuple` of + coordinates or a single stacked array. + :arg sources: source point coordinates, which can also be in any of the + formats of the *targets*, + + :arg centers: QBX target expansion center coordinates, which can also + be in any of the formats of the *targets*. The number of centers + must match the number of targets. :arg expansion_radii: radii for each expansion center. - :arg index_set: a :class:`sumpy.tools.MatrixBlockIndexRanges` used - to define the blocks. - :return: a tuple of one-dimensional arrays of kernel evaluations at - target-source pairs described by `index_set`. + + :arg srcindices: an array of indices into *sources*. + :arg tgtindices: an array of indices into *targets*, of the same size + as *srcindices*. + + :returns: a one-dimensional array of interactions, for each index pair + in (*srcindices*, *tgtindices*) """ + knl = self.get_cached_optimized_kernel( targets_is_obj_array=is_obj_array_like(targets), sources_is_obj_array=is_obj_array_like(sources), @@ -464,8 +475,8 @@ class LayerPotentialMatrixBlockGenerator(LayerPotentialBase): targets=targets, center=centers, expansion_radii=expansion_radii, - tgtindices=index_set.linear_row_indices, - srcindices=index_set.linear_col_indices, **kwargs) + tgtindices=tgtindices, + srcindices=srcindices, **kwargs) # }}} diff --git a/sumpy/tools.py b/sumpy/tools.py index 22a1f073..34d2f9a5 100644 --- a/sumpy/tools.py +++ b/sumpy/tools.py @@ -28,8 +28,6 @@ __doc__ = """ Misc tools ========== - .. autoclass:: BlockIndexRanges - .. autoclass:: MatrixBlockIndexRanges .. autoclass:: ExprDerivativeTaker .. autoclass:: LaplaceDerivativeTaker .. autoclass:: RadialDerivativeTaker @@ -37,18 +35,14 @@ __doc__ = """ .. autoclass:: DifferentiatedExprDerivativeTaker """ -from pytools import memoize_method, memoize_in +from pytools import memoize_method from pytools.tag import Tag, tag_dataclass from pymbolic.mapper import WalkMapper import numpy as np import sumpy.symbolic as sym -import pyopencl as cl -import pyopencl.array # noqa - import loopy as lp -from loopy.version import MOST_RECENT_LANGUAGE_VERSION import logging logger = logging.getLogger(__name__) @@ -616,254 +610,6 @@ class KernelComputation: # }}} -# {{{ - - -def _to_host(x, queue=None): - if isinstance(x, cl.array.Array): - queue = queue or x.queue - return x.get(queue) - return x - - -class BlockIndexRanges: - """Convenience class for working with blocks of a global array. - - .. attribute:: indices - - A list of not necessarily continuous or increasing integers - representing the indices of a global array. The individual blocks are - delimited using :attr:`ranges`. - - .. attribute:: ranges - - A list of nondecreasing integers used to index into :attr:`indices`. - A block :math:`i` can be retrieved using - `indices[ranges[i]:ranges[i + 1]]`. - - .. automethod:: block_shape - .. automethod:: get - .. automethod:: take - """ - - def __init__(self, cl_context, indices, ranges): - self.cl_context = cl_context - self.indices = indices - self.ranges = ranges - - @property - @memoize_method - def _ranges(self): - with cl.CommandQueue(self.cl_context) as queue: - return _to_host(self.ranges, queue=queue) - - @property - def nblocks(self): - return self.ranges.shape[0] - 1 - - def block_shape(self, i): - return (self._ranges[i + 1] - self._ranges[i],) - - def block_indices(self, i): - return self.indices[self._ranges[i]:self._ranges[i + 1]] - - def get(self, queue=None): - return BlockIndexRanges(self.cl_context, - _to_host(self.indices, queue=queue), - _to_host(self.ranges, queue=queue)) - - def take(self, x, i): - """Return the subset of a global array `x` that is defined by - the :attr:`indices` in block :math:`i`. - """ - - return x[self.block_indices(i)] - - -class MatrixBlockIndexRanges: - """Keep track of different ways to index into matrix blocks. - - .. attribute:: row - - A :class:`BlockIndexRanges` encapsulating row block indices. - - .. attribute:: col - - A :class:`BlockIndexRanges` encapsulating column block indices. - - .. automethod:: block_shape - .. automethod:: block_take - .. automethod:: get - .. autoattribute:: linear_row_indices - .. automethod:: take - - """ - - def __init__(self, cl_context, row, col): - self.cl_context = cl_context - self.row = row - self.col = col - assert self.row.nblocks == self.col.nblocks - - self.blkranges = np.cumsum([0] + [ - self.row.block_shape(i)[0] * self.col.block_shape(i)[0] - for i in range(self.row.nblocks)]) - - if isinstance(self.row.indices, cl.array.Array): - with cl.CommandQueue(self.cl_context) as queue: - self.blkranges = \ - cl.array.to_device(queue, self.blkranges).with_queue(None) - - @property - def nblocks(self): - return self.row.nblocks - - def block_shape(self, i): - return self.row.block_shape(i) + self.col.block_shape(i) - - def block_indices(self, i): - return (self.row.block_indices(i), - self.col.block_indices(i)) - - @property - def linear_row_indices(self): - r, _ = self._linear_indices() - return r - - @property - def linear_col_indices(self): - _, c = self._linear_indices() - return c - - @property - def linear_ranges(self): - return self.blkranges - - def get(self, queue=None): - """Transfer data to the host. Only the initial given data is - transfered, not the arrays returned by :meth:`linear_row_indices` and - friends. - - :return: a copy of `self` in which all data lives on the host, i.e. - all :class:`pyopencl.array.Array` instances are replaces by - :class:`numpy.ndarray` instances. - """ - return MatrixBlockIndexRanges(self.cl_context, - row=self.row.get(queue=queue), - col=self.col.get(queue=queue)) - - def take(self, x, i): - """Retrieve a block from a global matrix. - - :arg x: a 2D :class:`numpy.ndarray`. - :arg i: block index. - :return: requested block from the matrix. - """ - - if isinstance(self.row.indices, cl.array.Array) or \ - isinstance(self.col.indices, cl.array.Array): - raise ValueError("CL `Array`s are not supported." - "Use MatrixBlockIndexRanges.get() and then view into matrices.") - - irow, icol = self.block_indices(i) - return x[np.ix_(irow, icol)] - - def block_take(self, x, i): - """Retrieve a block from a linear representation of the matrix blocks. - A linear representation of the matrix blocks can be obtained, or - should be consistent with - - .. code-block:: python - - i = index.linear_row_indices() - j = index.linear_col_indices() - linear_blks = global_mat[i, j] - - for k in range(index.nblocks): - assert np.allclose(index.block_take(linear_blks, k), - index.take(global_mat, k)) - - :arg x: a 1D :class:`numpy.ndarray`. - :arg i: block index. - :return: requested block, reshaped into a 2D array. - """ - - iblk = np.s_[self.blkranges[i]:self.blkranges[i + 1]] - return x[iblk].reshape(*self.block_shape(i)) - - @memoize_method - def _linear_indices(self): - """ - :return: a tuple of `(rowindices, colindices)` that can be - used to provide linear indexing into a set of matrix blocks. These - index arrays are just the concatenated Cartesian products of all - the block arrays described by :attr:`row` and :attr:`col`. - - They can be used to index directly into a matrix as follows: - - .. code-block:: python - - mat[rowindices[blkranges[i]:blkranges[i + 1]], - colindices[blkranges[i]:blkranges[i + 1]]] - - The same block can be obtained more easily using - - .. code-block:: python - - index.view(mat, i).reshape(-1) - """ - - @memoize_in(self, "block_index_knl") - def _build_index(): - loopy_knl = lp.make_kernel([ - "{[irange]: 0 <= irange < nranges}", - "{[itgt, isrc]: 0 <= itgt < ntgtblock and 0 <= isrc < nsrcblock}" - ], - """ - for irange - <> ntgtblock = tgtranges[irange + 1] - tgtranges[irange] - <> nsrcblock = srcranges[irange + 1] - srcranges[irange] - - for itgt, isrc - <> imat = blkranges[irange] + (nsrcblock * itgt + isrc) - - rowindices[imat] = tgtindices[tgtranges[irange] + itgt] \ - {id_prefix=write_index} - colindices[imat] = srcindices[srcranges[irange] + isrc] \ - {id_prefix=write_index} - end - end - """, - [ - lp.GlobalArg("blkranges", None, shape="nranges + 1"), - lp.GlobalArg("rowindices", None, shape="nresults"), - lp.GlobalArg("colindices", None, shape="nresults"), - lp.ValueArg("nresults", None), - "..." - ], - name="block_index_knl", - default_offset=lp.auto, - assumptions="nranges>=1", - silenced_warnings="write_race(write_index*)", - lang_version=MOST_RECENT_LANGUAGE_VERSION) - loopy_knl = lp.split_iname(loopy_knl, "irange", 128, outer_tag="g.0") - - return loopy_knl - - with cl.CommandQueue(self.cl_context) as queue: - _, (rowindices, colindices) = _build_index()(queue, - tgtindices=self.row.indices, - srcindices=self.col.indices, - tgtranges=self.row.ranges, - srcranges=self.col.ranges, - blkranges=self.blkranges, - nresults=_to_host(self.blkranges[-1], queue=queue)) - return (rowindices.with_queue(None), - colindices.with_queue(None)) - -# }}} - - # {{{ OrderedSet # Source: https://code.activestate.com/recipes/576694-orderedset/ diff --git a/test/test_matrixgen.py b/test/test_matrixgen.py index c51fc3ce..6fd929e5 100644 --- a/test/test_matrixgen.py +++ b/test/test_matrixgen.py @@ -28,7 +28,6 @@ import pyopencl as cl import pyopencl.array # noqa from sumpy.tools import vector_to_device -from sumpy.tools import MatrixBlockIndexRanges import pytest from pyopencl.tools import ( # noqa @@ -71,27 +70,23 @@ def _build_geometry(queue, n, mode, target_radius=1.0): cl.array.to_device(queue, sigma)) -def _build_block_index(queue, nnodes, nblks, factor): - indices = np.arange(0, nnodes) - ranges = np.arange(0, nnodes + 1, nnodes // nblks) +def _build_subset_indices(queue, ntargets, nsources, factor): + tgtindices = np.arange(0, ntargets) + srcindices = np.arange(0, nsources) - if abs(factor - 1.0) < 1.0e-14: - ranges_ = ranges - indices_ = indices + rng = np.random.default_rng() + if abs(factor - 1.0) > 1.0e-14: + tgtindices = rng.choice(tgtindices, + size=int(factor * ntargets), replace=False) + srcindices = rng.choice(srcindices, + size=int(factor * nsources), replace=False) else: - indices_ = np.empty(ranges.shape[0] - 1, dtype=np.object) - for i in range(ranges.shape[0] - 1): - iidx = indices[np.s_[ranges[i]:ranges[i + 1]]] - indices_[i] = np.sort(np.random.choice(iidx, - size=int(factor * len(iidx)), replace=False)) + rng.shuffle(tgtindices) + rng.shuffle(srcindices) - ranges_ = np.cumsum([0] + [r.shape[0] for r in indices_]) - indices_ = np.hstack(indices_) - - from sumpy.tools import BlockIndexRanges - return BlockIndexRanges(queue.context, - cl.array.to_device(queue, indices_).with_queue(None), - cl.array.to_device(queue, ranges_).with_queue(None)) + return ( + cl.array.to_device(queue, tgtindices).with_queue(None), + cl.array.to_device(queue, srcindices).with_queue(None)) @pytest.mark.parametrize("factor", [1.0, 0.6]) @@ -103,7 +98,6 @@ def test_qbx_direct(ctx_factory, factor, lpot_id): queue = cl.CommandQueue(ctx) ndim = 2 - nblks = 10 order = 12 mode_nr = 25 @@ -128,8 +122,8 @@ def test_qbx_direct(ctx_factory, factor, lpot_id): mat_gen = LayerPotentialMatrixGenerator(ctx, expansion=expn, source_kernels=(knl,), target_kernels=(base_knl,)) - from sumpy.qbx import LayerPotentialMatrixBlockGenerator - blk_gen = LayerPotentialMatrixBlockGenerator(ctx, expansion=expn, + from sumpy.qbx import LayerPotentialMatrixSubsetGenerator + blk_gen = LayerPotentialMatrixSubsetGenerator(ctx, expansion=expn, source_kernels=(knl,), target_kernels=(base_knl,)) for n in [200, 300, 400]: @@ -138,10 +132,8 @@ def test_qbx_direct(ctx_factory, factor, lpot_id): h = 2 * np.pi / n strengths = (sigma * h,) - - tgtindices = _build_block_index(queue, n, nblks, factor) - srcindices = _build_block_index(queue, n, nblks, factor) - index_set = MatrixBlockIndexRanges(ctx, tgtindices, srcindices) + tgtindices, srcindices = _build_subset_indices(queue, + ntargets=n, nsources=n, factor=factor) extra_kwargs = {} if lpot_id == 2: @@ -170,15 +162,16 @@ def test_qbx_direct(ctx_factory, factor, lpot_id): sources=sources, centers=centers, expansion_radii=expansion_radii, - index_set=index_set, **extra_kwargs) + tgtindices=tgtindices, + srcindices=srcindices, **extra_kwargs) blk = blk.get() - rowindices = index_set.linear_row_indices.get(queue) - colindices = index_set.linear_col_indices.get(queue) + tgtindices = tgtindices.get(queue) + srcindices = srcindices.get(queue) eps = 1.0e-10 * la.norm(result_lpot) assert la.norm(result_mat - result_lpot) < eps - assert la.norm(blk - mat[rowindices, colindices]) < eps + assert la.norm(blk - mat[tgtindices, srcindices]) < eps @pytest.mark.parametrize("exclude_self", [True, False]) @@ -191,7 +184,6 @@ def test_p2p_direct(ctx_factory, exclude_self, factor, lpot_id): queue = cl.CommandQueue(ctx) ndim = 2 - nblks = 10 mode_nr = 25 from sumpy.kernel import LaplaceKernel, DirectionalSourceDerivative @@ -209,8 +201,8 @@ def test_p2p_direct(ctx_factory, exclude_self, factor, lpot_id): from sumpy.p2p import P2PMatrixGenerator mat_gen = P2PMatrixGenerator(ctx, [lknl], exclude_self=exclude_self) - from sumpy.p2p import P2PMatrixBlockGenerator - blk_gen = P2PMatrixBlockGenerator(ctx, [lknl], exclude_self=exclude_self) + from sumpy.p2p import P2PMatrixSubsetGenerator + blk_gen = P2PMatrixSubsetGenerator(ctx, [lknl], exclude_self=exclude_self) for n in [200, 300, 400]: targets, sources, _, _, sigma = \ @@ -218,10 +210,8 @@ def test_p2p_direct(ctx_factory, exclude_self, factor, lpot_id): h = 2 * np.pi / n strengths = (sigma * h,) - - tgtindices = _build_block_index(queue, n, nblks, factor) - srcindices = _build_block_index(queue, n, nblks, factor) - index_set = MatrixBlockIndexRanges(ctx, tgtindices, srcindices) + tgtindices, srcindices = _build_subset_indices(queue, + ntargets=n, nsources=n, factor=factor) extra_kwargs = {} if exclude_self: @@ -247,16 +237,16 @@ def test_p2p_direct(ctx_factory, exclude_self, factor, lpot_id): _, (blk,) = blk_gen(queue, targets=targets, sources=sources, - index_set=index_set, **extra_kwargs) + tgtindices=tgtindices, + srcindices=srcindices, **extra_kwargs) blk = blk.get() + tgtindices = tgtindices.get(queue) + srcindices = srcindices.get(queue) + eps = 1.0e-10 * la.norm(result_lpot) assert la.norm(result_mat - result_lpot) < eps - - index_set = index_set.get(queue) - for i in range(index_set.nblocks): - assert la.norm(index_set.block_take(blk, i) - - index_set.take(mat, i)) < eps + assert la.norm(blk - mat[tgtindices, srcindices]) < eps # You can test individual routines by typing -- GitLab