diff --git a/sumpy/p2p.py b/sumpy/p2p.py index c408360d7aac0985f49b5feaf7dc770d37790956..a0e89f242f958bbaea411090582e885d942b2a1d 100644 --- a/sumpy/p2p.py +++ b/sumpy/p2p.py @@ -375,15 +375,14 @@ class P2PMatrixBlockGenerator(P2PBase): .. code-block:: python blkranges = index_set.linear_ranges() - m = index_set.tgtranges[i + 1] - index_set.tgtranges[i] - n = index_set.srcranges[i + 1] - index_set.srcranges[i] + blkshape = index_set.block_shape(i) - block2d = result[blkranges[i]:blkranges[i + 1]].reshape(m, n) + block2d = result[blkranges[i]:blkranges[i + 1]].reshape(*blkshape) :arg targets: target point coordinates. :arg sources: source point coordinates. - :arg index_set: a :class:`sumpy.tools.MatrixBlockIndex` object used - to define the various blocks. + :arg index_set: a :class:`sumpy.tools.MatrixBlockIndexRanges` used + to define the blocks. :return: a tuple of one-dimensional arrays of kernel evaluations at target-source pairs described by `index_set`. """ @@ -394,9 +393,11 @@ class P2PMatrixBlockGenerator(P2PBase): sources_is_obj_array=( is_obj_array(sources) or isinstance(sources, (tuple, list)))) - tgtindices, srcindices = index_set.linear_indices() - return knl(queue, targets=targets, sources=sources, - tgtindices=tgtindices, srcindices=srcindices, **kwargs) + return knl(queue, + targets=targets, + sources=sources, + tgtindices=index_set.linear_row_indices, + srcindices=index_set.linear_col_indices, **kwargs) # }}} diff --git a/sumpy/qbx.py b/sumpy/qbx.py index b67c104225f3cccacde2abb5d8fbc8cbff0668b0..a0814e01c412727a908ec72758e62014b3278708 100644 --- a/sumpy/qbx.py +++ b/sumpy/qbx.py @@ -400,18 +400,21 @@ class LayerPotentialMatrixBlockGenerator(LayerPotentialBase): :arg sources: source point coordinates. :arg centers: QBX target expansion centers. :arg expansion_radii: radii for each expansion center. - :arg index_set: a :class:`sumpy.tools.MatrixBlockIndex` object used - to define the various blocks. + :arg index_set: a :class:`sumpy.tools.MatrixBlockIndexRanges` used + to define the blocks. :return: a tuple of one-dimensional arrays of kernel evaluations at - target-source pairs described by `index_set`. + target-source pairs described by `index_set`. """ knl = self.get_cached_optimized_kernel() - tgtindices, srcindices = index_set.linear_indices() - return knl(queue, src=sources, tgt=targets, center=centers, - expansion_radii=expansion_radii, - tgtindices=tgtindices, srcindices=srcindices, **kwargs) + return knl(queue, + src=sources, + tgt=targets, + center=centers, + expansion_radii=expansion_radii, + tgtindices=index_set.linear_row_indices, + srcindices=index_set.linear_col_indices, **kwargs) # }}} diff --git a/sumpy/tools.py b/sumpy/tools.py index b80deddd00ce330bffcaadaf112929c89821c745..469cb5e15238299092d55841f2bf43486bd11e0c 100644 --- a/sumpy/tools.py +++ b/sumpy/tools.py @@ -31,6 +31,9 @@ from pytools import memoize_method, memoize_in import numpy as np import sumpy.symbolic as sym +import pyopencl as cl +import pyopencl.array # noqa + import loopy as lp from loopy.version import MOST_RECENT_LANGUAGE_VERSION @@ -300,88 +303,202 @@ class KernelComputation(object): # {{{ -class MatrixBlockIndex(object): - def __init__(self, queue, tgtindices, srcindices, tgtranges, srcranges): - """Keep track of different ways to index into matrix blocks. - In this description, a block is given by a subset of indices of - the global matrix operator. For example, the :math:`i`-th block of - a matrix `mat` using the input arguments is obtained by: +def _to_host(x, queue=None): + if isinstance(x, cl.array.Array): + queue = queue or x.queue + return x.get(queue) + return x - .. code-block:: python - mat[np.ix_(tgtindices[tgtranges[i]:tgtranges[i + 1]], - srcindices[srcranges[i]:srcranges[i + 1]])] +class BlockIndexRanges(object): + """Convenience class for working with blocks of a global array. - :arg tgtindices: a subset of the full matrix row index set. - :arg srcindices: a subset of the full matrix column index set. - :arg tgtranges: an array used to index into `tgtindices`. - :arg srcranges: an array used to index into `srcindices`. - """ + .. attribute:: indices + + A list of not necessarily continuous or increasing integers + representing the indices of a global array. The individual blocks are + delimited using :attr:`ranges`. + + .. attribute:: ranges + + A list of nondecreasing integers used to index into :attr:`indices`. + A block :math:`i` can be retrieved using + `indices[ranges[i]:ranges[i + 1]]`. + + .. automethod:: block_shape + .. automethod:: get + .. automethod:: take + """ - self.queue = queue - self.tgtindices = tgtindices - self.tgtranges = tgtranges - self.srcindices = srcindices - self.srcranges = srcranges + def __init__(self, cl_context, indices, ranges): + self.cl_context = cl_context + self.indices = indices + self.ranges = ranges + @property @memoize_method + def _ranges(self): + with cl.CommandQueue(self.cl_context) as queue: + return _to_host(self.ranges, queue=queue) + + @property + def nblocks(self): + return self.ranges.shape[0] - 1 + + def block_shape(self, i): + return (self._ranges[i + 1] - self._ranges[i],) + + def block_indices(self, i): + return self.indices[self._ranges[i]:self._ranges[i + 1]] + + def get(self, queue=None): + return BlockIndexRanges(self.cl_context, + _to_host(self.indices, queue=queue), + _to_host(self.ranges, queue=queue)) + + def take(self, x, i): + """Return the subset of a global array `x` that is defined by + the :attr:`indices` in block :math:`i`. + """ + + return x[self.block_indices(i)] + + +class MatrixBlockIndexRanges(object): + """Keep track of different ways to index into matrix blocks. + + .. attribute:: row + + A :class:`BlockIndexRanges` encapsulating row block indices. + + .. attribute:: col + + A :class:`BlockIndexRanges` encapsulating column block indices. + + .. automethod:: block_shape + .. automethod:: block_take + .. automethod:: get + .. automethod:: take + + """ + + def __init__(self, cl_context, row, col): + self.cl_context = cl_context + self.row = row + self.col = col + assert self.row.nblocks == self.col.nblocks + + self.blkranges = np.cumsum([0] + [ + self.row.block_shape(i)[0] * self.col.block_shape(i)[0] + for i in range(self.row.nblocks)]) + + if isinstance(self.row.indices, cl.array.Array): + with cl.CommandQueue(self.cl_context) as queue: + self.blkranges = \ + cl.array.to_device(queue, self.blkranges).with_queue(None) + + @property + def nblocks(self): + return self.row.nblocks + + def block_shape(self, i): + return self.row.block_shape(i) + self.col.block_shape(i) + + def block_indices(self, i): + return (self.row.block_indices(i), + self.col.block_indices(i)) + + @property + def linear_row_indices(self): + r, _ = self._linear_indices() + return r + + @property + def linear_col_indices(self): + _, c = self._linear_indices() + return c + + @property def linear_ranges(self): + return self.blkranges + + def get(self, queue=None): + """Transfer data to the host. Only the initial given data is + transfered, not the arrays returned by :meth:`linear_row_indices` and + friends. + + :return: a copy of `self` in which all data lives on the host, i.e. + all :class:`pyopencl.array.Array` instances are replaces by + :class:`numpy.ndarray` instances. """ - :return: an array containing the cummulative sum of all block sizes. - It can be used to index into a linear representation of the - matrix blocks. + return MatrixBlockIndexRanges(self.cl_context, + row=self.row.get(queue=queue), + col=self.col.get(queue=queue)) + + def take(self, x, i): + """Retrieve a block from a global matrix. + + :arg x: a 2D :class:`numpy.ndarray`. + :arg i: block index. + :return: requested block from the matrix. """ - @memoize_in(self, "block_cumsum_knl") - def cumsum(): - loopy_knl = lp.make_kernel( - "{[i, j]: 0 <= i < nranges and 0 <= j <= i}", - """ - blkranges[0] = 0 - blkranges[i + 1] = reduce(sum, j, \ - (srcranges[j + 1] - srcranges[j]) * \ - (tgtranges[j + 1] - tgtranges[j])) \ - """, - [ - lp.GlobalArg("tgtranges", None, shape="nranges + 1"), - lp.GlobalArg("srcranges", None, shape="nranges + 1"), - lp.GlobalArg("blkranges", np.int32, shape="nranges + 1"), - lp.ValueArg("nranges", None) - ], - name="block_cumsum_knl", - default_offset=lp.auto, - assumptions="nranges>=1", - lang_version=MOST_RECENT_LANGUAGE_VERSION) + if isinstance(self.row.indices, cl.array.Array) or \ + isinstance(self.col.indices, cl.array.Array): + raise ValueError("CL `Array`s are not supported." + "Use MatrixBlockIndexRanges.get() and then view into matrices.") - loopy_knl = lp.realize_reduction(loopy_knl, force_scan=True, - force_outer_iname_for_scan="i") - return loopy_knl + irow, icol = self.block_indices(i) + return x[np.ix_(irow, icol)] - _, (blkranges,) = cumsum()(self.queue, - tgtranges=self.tgtranges, srcranges=self.srcranges) + def block_take(self, x, i): + """Retrieve a block from a linear representation of the matrix blocks. + A linear representation of the matrix blocks can be obtained, or + should be consistent with + + .. code-block:: python - return blkranges + i = index.linear_row_indices() + j = index.linear_col_indices() + linear_blks = global_mat[i, j] + + for k in range(index.nblocks): + assert np.allclose(index.block_take(linear_blks, k), + index.take(global_mat, k)) + + :arg x: a 1D :class:`numpy.ndarray`. + :arg i: block index. + :return: requested block, reshaped into a 2D array. + """ + + iblk = np.s_[self.blkranges[i]:self.blkranges[i + 1]] + return x[iblk].reshape(*self.block_shape(i)) @memoize_method - def linear_indices(self): + def _linear_indices(self): """ - :return: a tuple of `(rowindices, colindices)` that can be used in - conjunction with :func:`linear_ranges` to provide linear indexing - into a set of matrix blocks. These index arrays are just the - concatenated Cartesian products of all the block arrays described - by :attr:`tgtindices` and :arg:`srcindices`. + :return: a tuple of `(rowindices, colindices)` that can be + used to provide linear indexing into a set of matrix blocks. These + index arrays are just the concatenated Cartesian products of all + the block arrays described by :attr:`row` and :attr:`col`. - They can be used to index directly into the matrix as follows: + They can be used to index directly into a matrix as follows: .. code-block:: python mat[rowindices[blkranges[i]:blkranges[i + 1]], colindices[blkranges[i]:blkranges[i + 1]]] + + The same block can be obtained more easily using + + .. code-block:: python + + index.view(mat, i).reshape(-1) """ @memoize_in(self, "block_index_knl") - def linear_index(): + def _build_index(): loopy_knl = lp.make_kernel([ "{[irange]: 0 <= irange < nranges}", "{[itgt, isrc]: 0 <= itgt < ntgtblock and 0 <= isrc < nsrcblock}" @@ -417,13 +534,16 @@ class MatrixBlockIndex(object): return loopy_knl - blkranges = self.linear_ranges() - _, (rowindices, colindices) = linear_index()(self.queue, - tgtindices=self.tgtindices, srcindices=self.srcindices, - tgtranges=self.tgtranges, srcranges=self.srcranges, - blkranges=blkranges, nresults=blkranges[-1]) - - return rowindices, colindices + with cl.CommandQueue(self.cl_context) as queue: + _, (rowindices, colindices) = _build_index()(queue, + tgtindices=self.row.indices, + srcindices=self.col.indices, + tgtranges=self.row.ranges, + srcranges=self.col.ranges, + blkranges=self.blkranges, + nresults=_to_host(self.blkranges[-1], queue=queue)) + return (rowindices.with_queue(None), + colindices.with_queue(None)) # }}} diff --git a/test/test_matrixgen.py b/test/test_matrixgen.py index 5fde01edf40510c81bc67dd0d3ab93759a3fc857..2f3aabc3082b99707ab86b5461390e866350fdc3 100644 --- a/test/test_matrixgen.py +++ b/test/test_matrixgen.py @@ -27,10 +27,10 @@ import numpy as np import numpy.linalg as la import pyopencl as cl +import pyopencl.array # noqa -from pytools.obj_array import make_obj_array - -from sumpy.tools import MatrixBlockIndex +from sumpy.tools import vector_to_device +from sumpy.tools import MatrixBlockIndexRanges import pytest from pyopencl.tools import ( # noqa @@ -48,7 +48,7 @@ else: faulthandler.enable() -def _build_geometry(n, mode, target_radius=1.0): +def _build_geometry(queue, n, mode, target_radius=1.0): # parametrize circle t = np.linspace(0.0, 2.0 * np.pi, n, endpoint=False) unit_circle = np.exp(1j * t) @@ -66,10 +66,14 @@ def _build_geometry(n, mode, target_radius=1.0): centers = unit_circle * (1.0 - radius) expansion_radii = radius * np.ones(n) - return targets, sources, centers, sigma, expansion_radii + return (cl.array.to_device(queue, targets), + cl.array.to_device(queue, sources), + vector_to_device(queue, centers), + cl.array.to_device(queue, expansion_radii), + cl.array.to_device(queue, sigma)) -def _build_block_index(nnodes, nblks, factor): +def _build_block_index(queue, nnodes, nblks, factor): indices = np.arange(0, nnodes) ranges = np.arange(0, nnodes + 1, nnodes // nblks) @@ -86,128 +90,159 @@ def _build_block_index(nnodes, nblks, factor): ranges_ = np.cumsum([0] + [r.shape[0] for r in indices_]) indices_ = np.hstack(indices_) - return indices_, ranges_ + from sumpy.tools import BlockIndexRanges + return BlockIndexRanges(queue.context, + cl.array.to_device(queue, indices_).with_queue(None), + cl.array.to_device(queue, ranges_).with_queue(None)) @pytest.mark.parametrize('factor', [1.0, 0.6]) @pytest.mark.parametrize('lpot_id', [1, 2]) def test_qbx_direct(ctx_getter, factor, lpot_id): - # This evaluates a single layer potential on a circle. logging.basicConfig(level=logging.INFO) ctx = ctx_getter() queue = cl.CommandQueue(ctx) + ndim = 2 + nblks = 10 + order = 12 + mode_nr = 25 + from sumpy.kernel import LaplaceKernel, DirectionalSourceDerivative if lpot_id == 1: - lknl = LaplaceKernel(2) + knl = LaplaceKernel(ndim) elif lpot_id == 2: - lknl = LaplaceKernel(2) - lknl = DirectionalSourceDerivative(lknl, dir_vec_name="dsource_vec") + knl = LaplaceKernel(ndim) + knl = DirectionalSourceDerivative(knl, dir_vec_name="dsource_vec") else: raise ValueError("unknow lpot_id") - nblks = 10 - order = 12 - mode_nr = 25 + from sumpy.expansion.local import LineTaylorLocalExpansion + lknl = LineTaylorLocalExpansion(knl, order) from sumpy.qbx import LayerPotential + lpot = LayerPotential(ctx, [lknl]) + from sumpy.qbx import LayerPotentialMatrixGenerator + mat_gen = LayerPotentialMatrixGenerator(ctx, [lknl]) + from sumpy.qbx import LayerPotentialMatrixBlockGenerator - from sumpy.expansion.local import LineTaylorLocalExpansion - lpot = LayerPotential(ctx, [LineTaylorLocalExpansion(lknl, order)]) - mat_gen = LayerPotentialMatrixGenerator(ctx, - [LineTaylorLocalExpansion(lknl, order)]) - blk_gen = LayerPotentialMatrixBlockGenerator(ctx, - [LineTaylorLocalExpansion(lknl, order)]) + blk_gen = LayerPotentialMatrixBlockGenerator(ctx, [lknl]) for n in [200, 300, 400]: - targets, sources, centers, sigma, expansion_radii = \ - _build_geometry(n, mode_nr) + targets, sources, centers, expansion_radii, sigma = \ + _build_geometry(queue, n, mode_nr) h = 2 * np.pi / n strengths = (sigma * h,) - tgtindices, tgtranges = _build_block_index(n, nblks, factor) - srcindices, srcranges = _build_block_index(n, nblks, factor) - assert tgtranges.shape == srcranges.shape + tgtindices = _build_block_index(queue, n, nblks, factor) + srcindices = _build_block_index(queue, n, nblks, factor) + index_set = MatrixBlockIndexRanges(ctx, tgtindices, srcindices) - knl_kwargs = {} + extra_kwargs = {} if lpot_id == 2: - knl_kwargs["dsource_vec"] = make_obj_array([ - np.ones(n) for _ in range(2)]) - - _, (mat,) = mat_gen(queue, targets, sources, centers, - expansion_radii, **knl_kwargs) - result_mat = mat.dot(strengths[0]) - - _, (result_lpot,) = lpot(queue, targets, sources, centers, strengths, - expansion_radii, **knl_kwargs) + extra_kwargs["dsource_vec"] = \ + vector_to_device(queue, np.ones((ndim, n))) + + _, (result_lpot,) = lpot(queue, + targets=targets, + sources=sources, + centers=centers, + expansion_radii=expansion_radii, + strengths=strengths, **extra_kwargs) + result_lpot = result_lpot.get() + + _, (mat,) = mat_gen(queue, + targets=targets, + sources=sources, + centers=centers, + expansion_radii=expansion_radii, **extra_kwargs) + mat = mat.get() + result_mat = mat.dot(strengths[0].get()) + + _, (blk,) = blk_gen(queue, + targets=targets, + sources=sources, + centers=centers, + expansion_radii=expansion_radii, + index_set=index_set, **extra_kwargs) + blk = blk.get() + + rowindices = index_set.linear_row_indices.get(queue) + colindices = index_set.linear_col_indices.get(queue) eps = 1.0e-10 * la.norm(result_lpot) assert la.norm(result_mat - result_lpot) < eps + assert la.norm(blk - mat[rowindices, colindices]) < eps - index_set = MatrixBlockIndex(queue, - tgtindices, srcindices, tgtranges, srcranges) - _, (blk,) = blk_gen(queue, targets, sources, centers, expansion_radii, - index_set, **knl_kwargs) - - rowindices, colindices = index_set.linear_indices() - eps = 1.0e-10 * la.norm(mat) - assert la.norm(blk - mat[rowindices, colindices].reshape(-1)) < eps - -@pytest.mark.parametrize(("exclude_self", "factor"), - [(True, 1.0), (True, 0.6), (False, 1.0), (False, 0.6)]) +@pytest.mark.parametrize("exclude_self", [True, False]) +@pytest.mark.parametrize("factor", [1.0, 0.6]) def test_p2p_direct(ctx_getter, exclude_self, factor): - # This does a point-to-point kernel evaluation on a circle. logging.basicConfig(level=logging.INFO) + ctx = ctx_getter() queue = cl.CommandQueue(ctx) - from sumpy.kernel import LaplaceKernel - lknl = LaplaceKernel(2) - + ndim = 2 nblks = 10 mode_nr = 25 + from sumpy.kernel import LaplaceKernel + lknl = LaplaceKernel(ndim) + from sumpy.p2p import P2P - from sumpy.p2p import P2PMatrixGenerator, P2PMatrixBlockGenerator lpot = P2P(ctx, [lknl], exclude_self=exclude_self) + + from sumpy.p2p import P2PMatrixGenerator mat_gen = P2PMatrixGenerator(ctx, [lknl], exclude_self=exclude_self) + + from sumpy.p2p import P2PMatrixBlockGenerator blk_gen = P2PMatrixBlockGenerator(ctx, [lknl], exclude_self=exclude_self) for n in [200, 300, 400]: - targets, sources, _, sigma, _ = \ - _build_geometry(n, mode_nr, target_radius=1.2) + targets, sources, _, _, sigma = \ + _build_geometry(queue, n, mode_nr, target_radius=1.2) h = 2 * np.pi / n strengths = (sigma * h,) - tgtindices, tgtranges = _build_block_index(n, nblks, factor) - srcindices, srcranges = _build_block_index(n, nblks, factor) - assert tgtranges.shape == srcranges.shape + tgtindices = _build_block_index(queue, n, nblks, factor) + srcindices = _build_block_index(queue, n, nblks, factor) + index_set = MatrixBlockIndexRanges(ctx, tgtindices, srcindices) extra_kwargs = {} if exclude_self: - extra_kwargs["target_to_source"] = np.arange(n, dtype=np.int32) - - _, (mat,) = mat_gen(queue, targets, sources, **extra_kwargs) - result_mat = mat.dot(strengths[0]) - - _, (result_lpot,) = lpot(queue, targets, sources, strengths, - **extra_kwargs) + extra_kwargs["target_to_source"] = \ + cl.array.arange(queue, 0, n, dtype=np.int) + + _, (result_lpot,) = lpot(queue, + targets=targets, + sources=sources, + strength=strengths, **extra_kwargs) + result_lpot = result_lpot.get() + + _, (mat,) = mat_gen(queue, + targets=targets, + sources=sources, **extra_kwargs) + mat = mat.get() + result_mat = mat.dot(strengths[0].get()) + + _, (blk,) = blk_gen(queue, + targets=targets, + sources=sources, + index_set=index_set, **extra_kwargs) + blk = blk.get() eps = 1.0e-10 * la.norm(result_lpot) assert la.norm(result_mat - result_lpot) < eps - index_set = MatrixBlockIndex(queue, - tgtindices, srcindices, tgtranges, srcranges) - _, (blk,) = blk_gen(queue, targets, sources, index_set, **extra_kwargs) - - rowindices, colindices = index_set.linear_indices() - eps = 1.0e-10 * la.norm(mat) - assert la.norm(blk - mat[rowindices, colindices].reshape(-1)) < eps + index_set = index_set.get(queue) + for i in range(index_set.nblocks): + assert la.norm(index_set.block_take(blk, i) - + index_set.take(mat, i)) < eps # You can test individual routines by typing