From 773f439b2e5f45b7fe5d3c3a698b955313550e47 Mon Sep 17 00:00:00 2001
From: Alex Fikl <alexfikl@gmail.com>
Date: Sun, 17 Jun 2018 14:11:47 -0500
Subject: [PATCH 01/25] direct-solver: add proxy point generator class

---
 pytential/direct_solver.py | 515 +++++++++++++++++++++++++++++++++++++
 1 file changed, 515 insertions(+)
 create mode 100644 pytential/direct_solver.py

diff --git a/pytential/direct_solver.py b/pytential/direct_solver.py
new file mode 100644
index 00000000..72a7729e
--- /dev/null
+++ b/pytential/direct_solver.py
@@ -0,0 +1,515 @@
+from __future__ import division, absolute_import
+
+__copyright__ = "Copyright (C) 2018 Alexandru Fikl"
+
+__license__ = """
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+"""
+
+import numpy as np
+
+import pyopencl as cl
+import pyopencl.array # noqa
+
+from pytools import memoize_method, memoize_in
+
+import loopy as lp
+from loopy.version import MOST_RECENT_LANGUAGE_VERSION
+
+
+# {{{ point index partitioning
+
+def _element_node_range(groups, igroup, ielement):
+    istart = groups[igroup].node_nr_base + \
+             groups[igroup].nunit_nodes * ielement
+    iend = groups[igroup].node_nr_base + \
+           groups[igroup].nunit_nodes * (ielement + 1)
+    return np.arange(istart, iend)
+
+
+def partition_by_nodes(discr, use_tree=True, max_particles_in_box=30):
+    if use_tree:
+        from boxtree import box_flags_enum
+        from boxtree import TreeBuilder
+
+        with cl.CommandQueue(discr.cl_context) as queue:
+            builder = TreeBuilder(discr.cl_context)
+
+            tree, _ = builder(queue, discr.nodes(),
+                max_particles_in_box=max_particles_in_box)
+
+            tree = tree.get(queue)
+            leaf_boxes, = (tree.box_flags & 
+                           box_flags_enum.HAS_CHILDREN == 0).nonzero()
+
+            indices = np.empty(len(leaf_boxes), dtype=np.object)
+            for i, ibox in enumerate(leaf_boxes):
+                box_start = tree.box_source_starts[ibox]
+                box_end = box_start + tree.box_source_counts_cumul[ibox]
+                indices[i] = tree.user_source_ids[box_start:box_end]
+
+            ranges = np.cumsum([0] + [box.shape[0] for box in indices])
+            indices = np.hstack(indices)
+    else:
+        indices = np.arange(0, discr.nnodes)
+        ranges = np.arange(0, discr.nnodes + 1,
+                           discr.nnodes // max_particles_in_box)
+
+    assert ranges[-1] == discr.nnodes
+    return indices, ranges
+
+
+def partition_by_elements(discr, use_tree=True, max_particles_in_box=10):
+    if use_tree:
+        from boxtree import box_flags_enum
+        from boxtree import TreeBuilder
+
+        with cl.CommandQueue(discr.cl_context) as queue:
+            builder = TreeBuilder(discr.cl_context)
+
+            from pytential.qbx.utils import element_centers_of_mass
+            elranges = np.cumsum([0] +
+                [group.nelements for group in discr.mesh.groups])
+            elcenters = element_centers_of_mass(discr)
+
+            tree, _ = builder(queue, elcenters,
+                max_particles_in_box=max_particles_in_box)
+
+            groups = discr.groups
+            tree = tree.get(queue)
+            leaf_boxes, = (tree.box_flags & 
+                           box_flags_enum.HAS_CHILDREN == 0).nonzero()
+
+            indices = np.empty(len(leaf_boxes), dtype=np.object)
+            elements = np.empty(len(leaf_boxes), dtype=np.object)
+            for i, ibox in enumerate(leaf_boxes):
+                box_start = tree.box_source_starts[ibox]
+                box_end = box_start + tree.box_source_counts_cumul[ibox]
+
+                ielement = tree.user_source_ids[box_start:box_end]
+                igroup = np.digitize(ielement, elranges) - 1
+
+                indices[i] = np.hstack([_element_node_range(groups, j, k)
+                                        for j, k in zip(igroup, ielement)])
+    else:
+        groups = discr.groups
+        elranges = np.cumsum([0] +
+            [group.nelements for group in discr.mesh.groups])
+
+        nelements = discr.mesh.nelements
+        indices = np.array_split(np.arange(0, nelements),
+                                 nelements // max_particles_in_box)
+        for i in range(len(indices)):
+            ielement = indices[i]
+            igroup = np.digitize(ielement, elranges) - 1
+
+            indices[i] = np.hstack([_element_node_range(groups, j, k)
+                                    for j, k in zip(igroup, ielement)])
+
+    ranges = np.cumsum([0] + [box.shape[0] for box in indices])
+    indices = np.hstack(indices)
+
+    return indices, ranges
+
+
+def partition_from_coarse(queue, resampler, from_indices, from_ranges):
+    if not hasattr(resampler, "groups"):
+        raise ValueError("resampler must be a DirectDiscretizationConnection.")
+
+    # construct ranges
+    from_discr = resampler.from_discr
+    from_grp_ranges = np.cumsum([0] +
+            [grp.nelements for grp in from_discr.mesh.groups])
+    from_el_ranges = np.hstack([
+        np.arange(grp.node_nr_base, grp.nnodes + 1, grp.nunit_nodes)
+        for grp in from_discr.groups])
+
+    # construct coarse element arrays in each from_range
+    el_indices = np.empty(from_ranges.shape[0] - 1, dtype=np.object)
+    el_ranges = np.full(from_grp_ranges[-1], -1, dtype=np.int)
+    for i in range(from_ranges.shape[0] - 1):
+        irange = np.s_[from_ranges[i]:from_ranges[i + 1]]
+        el_indices[i] = \
+            np.unique(np.digitize(from_indices[irange], from_el_ranges)) - 1
+        el_ranges[el_indices[i]] = i
+    el_indices = np.hstack(el_indices)
+
+    # construct lookup table
+    to_el_table = [np.full(g.nelements, -1, dtype=np.int)
+                   for g in resampler.to_discr.groups]
+
+    for igrp, grp in enumerate(resampler.groups):
+        for batch in grp.batches:
+            to_el_table[igrp][batch.to_element_indices.get(queue)] = \
+                from_grp_ranges[igrp] + batch.from_element_indices.get(queue)
+
+    # construct fine node index list
+    indices = [np.empty(0, dtype=np.int)
+               for _ in range(from_ranges.shape[0] - 1)]
+    for igrp in range(len(resampler.groups)):
+        to_element_indices = \
+                np.where(np.isin(to_el_table[igrp], el_indices))[0]
+
+        for i, j in zip(el_ranges[to_el_table[igrp][to_element_indices]],
+                        to_element_indices):
+            indices[i] = np.hstack([indices[i],
+                _element_node_range(resampler.to_discr.groups, igrp, j)])
+
+    ranges = np.cumsum([0] + [box.shape[0] for box in indices])
+    indices = np.hstack(indices)
+
+    return indices, ranges
+
+# }}}
+
+
+# {{{ proxy point generator
+
+class ProxyGenerator(object):
+    def __init__(self, queue, places,
+                 ratio=1.5, nproxy=31, target_order=0):
+        r"""
+        :arg queue: a :class:`pyopencl.CommandQueue` used to synchronize the
+            calculations.
+        :arg places: commonly a :class:`pytential.qbx.LayerPotentialSourceBase`.
+        :arg ratio: a ratio used to compute the proxy point radius. For proxy
+            points, we have two radii of interest for a set of points: the
+            radius :math:`r_{block}` of the smallest ball containing all the
+            points in a block and the radius :math:`r_{qbx}` of the smallest
+            ball containing all the QBX expansion balls in the block. If the 
+            ratio :math:`\theta \in [0, 1]`, then the radius of the proxy
+            ball is computed as:
+
+            .. math::
+
+                r = (1 - \theta) r_{block} + \theta r_{qbx}.
+
+            If the ratio :math:`\theta > 1`, the the radius is simply:
+
+            .. math::
+
+                r = \theta r_{qbx}.
+
+        :arg nproxy: number of proxy points for each block.
+        :arg target_order: order of the discretization of proxy points. Can
+            be quite small, since proxy points are evaluated point-to-point
+            at the moment.
+        """
+
+        self.queue = queue
+        self.places = places
+        self.context = self.queue.context
+        self.ratio = abs(ratio)
+        self.nproxy = int(abs(nproxy))
+        self.target_order = target_order
+        self.dim = self.places.ambient_dim
+
+        if self.dim == 2:
+            from meshmode.mesh.generation import ellipse, make_curve_mesh
+
+            self.mesh = make_curve_mesh(lambda t: ellipse(1.0, t),
+                                   np.linspace(0.0, 1.0, self.nproxy + 1),
+                                   self.nproxy)
+        elif self.dim == 3:
+            from meshmode.mesh.generation import generate_icosphere
+
+            self.mesh = generate_icosphere(1, self.nproxy)
+        else:
+            raise ValueError("unsupported ambient dimension")
+
+    @memoize_method
+    def get_kernel(self):
+        if self.ratio < 1.0:
+            radius_expr = "(1.0 - ratio) * rblk + ratio * rqbx"
+        else:
+            radius_expr = "ratio * rqbx"
+
+        knl = lp.make_kernel([
+            "{[irange]: 0 <= irange < nranges}",
+            "{[iblk]: 0 <= iblk < nblock}",
+            "{[idim]: 0 <= idim < dim}"
+            ],
+            ["""
+            for irange
+                <> blk_start = ranges[irange]
+                <> blk_end = ranges[irange + 1]
+                <> nblock = blk_end - blk_start
+
+                proxy_center[idim, irange] = 1.0 / blk_length * \
+                    reduce(sum, iblk, nodes[idim, indices[blk_start + iblk]]) \
+                        {{dup=idim:iblk}}
+
+                <> rblk = simul_reduce(max, iblk, sqrt(simul_reduce(sum, idim, \
+                        (proxy_center[idim, irange] -
+                         nodes[idim, indices[blk_start + iblk]]) ** 2)))
+                <> rqbx_int = simul_reduce(max, iblk, sqrt(simul_reduce(sum, idim, \
+                        (proxy_center[idim, irange] -
+                         center_int[idim, indices[blk_start + iblk]]) ** 2)) + \
+                         expansion_radii[indices[blk_start + iblk]])
+                <> rqbx_ext = simul_reduce(max, iblk, sqrt(simul_reduce(sum, idim, \
+                        (proxy_center[idim, irange] -
+                         center_ext[idim, indices[blk_start + iblk]]) ** 2)) + \
+                        expansion_radii[indices[blk_start + iblk]])
+
+                <> rqbx = if(rqbx_ext < rqbx_int, rqbx_int, rqbx_ext)
+                proxy_radius[irange] = {radius_expr}
+            end
+            """.format(radius_expr=radius_expr)],
+            [
+                lp.GlobalArg("nodes", None,
+                    shape=(self.dim, "nnodes")),
+                lp.GlobalArg("center_int", None,
+                    shape=(self.dim, "nnodes"), dim_tags="sep,C"),
+                lp.GlobalArg("center_ext", None,
+                    shape=(self.dim, "nnodes"), dim_tags="sep,C"),
+                lp.GlobalArg("expansion_radii", None,
+                    shape="nnodes"),
+                lp.GlobalArg("ranges", None,
+                    shape="nranges + 1"),
+                lp.GlobalArg("indices", None,
+                    shape="nindices"),
+                lp.GlobalArg("proxy_center", None,
+                    shape=(self.dim, "nranges")),
+                lp.GlobalArg("proxy_radius", None,
+                    shape="nranges"),
+                lp.ValueArg("nnodes", np.int64),
+                lp.ValueArg("nranges", None),
+                lp.ValueArg("nindices", np.int64)
+            ],
+            name="proxy_kernel",
+            assumptions="dim>=1 and nranges>=1",
+            fixed_parameters=dict(
+                dim=self.dim,
+                ratio=self.ratio),
+            lang_version=MOST_RECENT_LANGUAGE_VERSION)
+
+        knl = lp.tag_inames(knl, "idim*:unr")
+
+        return knl
+
+    @memoize_method
+    def get_optimized_kernel(self):
+        knl = self.get_kernel()
+        knl = lp.split_iname(knl, "irange", 128, outer_tag="g.0")
+
+        return knl
+
+    def get_proxies(self, indices, ranges, **kwargs):
+        """
+        :arg indices: a set of indices around which to construct proxy balls.
+        :arg ranges: an array of size `nranges + 1` used to index into the
+        set of indices. Each one of the `nranges` ranges will get a proxy
+        ball of a predefined size.
+
+        :returns: `centers` is a list of centers for each proxy ball.
+        :returns: `radii` is a list of radii for each proxy ball.
+        :returns: `proxies` is a set of proxy points.
+        :returns: `pxyranges` is an array used to index into the list of
+        proxy points.
+        """
+
+        from pytential.qbx.utils import get_centers_on_side
+
+        knl = self.get_kernel()
+        _, (centers_dev, radii_dev,) = knl(self.queue,
+            nodes=self.places.density_discr.nodes(),
+            center_int=get_centers_on_side(self.places, -1),
+            center_ext=get_centers_on_side(self.places, +1),
+            expansion_radii=self.places._expansion_radii("nsources"),
+            indices=indices, ranges=ranges, **kwargs)
+        centers = centers_dev.get()
+        radii = radii_dev.get()
+
+        from meshmode.mesh.processing import affine_map
+        proxies = np.empty(ranges.shape[0] - 1, dtype=np.object)
+
+        for i in range(ranges.shape[0] - 1):
+            mesh = affine_map(self.mesh,
+                A=(radii[i] * np.eye(self.dim)),
+                b=centers[:, i].reshape(-1))
+            proxies[i] = mesh.vertices
+
+        pxyranges = np.cumsum([0] + [p.shape[-1] for p in proxies])
+        proxies = make_obj_array([
+            cl.array.to_device(self.queue, np.hstack([p[idim] for p in proxies]))
+            for idim in range(self.dim)])
+        centers_dev = make_obj_array([
+            centers_dev[idim].with_queue(self.queue).copy()
+            for idim in range(self.dim)])
+
+        return centers_dev, radii_dev, proxies, pxyranges
+
+    def get_neighbors(self, indices, ranges, centers, radii):
+        """
+        :arg indices: indices for a subset of discretization nodes.
+        :arg ranges: array used to index into the :attr:`indices` array.
+        :arg centers: centers for each proxy ball.
+        :arg radii: radii for each proxy ball.
+
+        :returns: `neighbors` is a set of indices into the full set of
+        discretization nodes (already mapped through the given set `indices`).
+        It contains all nodes that are inside a given proxy ball :math:`i`,
+        but not contained in the range :math:`i`, given by
+        `indices[ranges[i]:ranges[i + 1]]`.
+        :returns: `nbrranges` is an array used to index into the set of
+        neighbors and return the subset for a given proxy ball.
+        """
+
+        nranges = radii.shape[0] + 1
+        sources = self.places.density_discr.nodes().get(self.queue)
+        sources = make_obj_array([
+            cl.array.to_device(self.queue, sources[idim, indices])
+            for idim in range(self.dim)])
+
+        # construct tree
+        from boxtree import TreeBuilder
+        builder = TreeBuilder(self.context)
+        tree, _ = builder(self.queue, sources, max_particles_in_box=30)
+
+        from boxtree.area_query import AreaQueryBuilder
+        builder = AreaQueryBuilder(self.context)
+        query, _ = builder(self.queue, tree, centers, radii)
+
+        # find nodes inside each proxy ball
+        tree = tree.get(self.queue)
+        query = query.get(self.queue)
+        centers_ = np.vstack([centers[idim].get(self.queue)
+                              for idim in range(self.dim)])
+        radii_ = radii.get(self.queue)
+
+        neighbors = np.empty(nranges - 1, dtype=np.object)
+        for iproxy in range(nranges - 1):
+            # get list of boxes intersecting the current ball
+            istart = query.leaves_near_ball_starts[iproxy]
+            iend = query.leaves_near_ball_starts[iproxy + 1]
+            iboxes = query.leaves_near_ball_lists[istart:iend]
+
+            # get nodes inside the boxes
+            istart = tree.box_source_starts[iboxes]
+            iend = istart + tree.box_source_counts_cumul[iboxes]
+            isources = np.hstack([np.arange(s, e)
+                                  for s, e in zip(istart, iend)])
+            nodes = np.vstack([tree.sources[idim][isources]
+                               for idim in range(self.dim)])
+            isources = tree.user_source_ids[isources]
+
+            # get nodes inside the ball but outside the current range
+            center = centers_[:, iproxy].reshape(-1, 1)
+            radius = radii_[iproxy]
+            mask = (la.norm(nodes - center, axis=0) < radius) & \
+                   ((isources < ranges[iproxy]) | (ranges[iproxy + 1] <= isources))
+
+            neighbors[iproxy] = indices[isources[mask]]
+
+        nbrranges = np.cumsum([0] + [n.shape[0] for n in neighbors])
+        neighbors = np.hstack(neighbors)
+
+        return (cl.array.to_device(self.queue, neighbors),
+                cl.array.to_device(self.queue, nbrranges))
+
+    def __call__(self, indices, ranges, **kwargs):
+        """
+        :arg indices: Set of indices for points in :attr:`places`.
+        :arg ranges: Set of ranges around which to build a set of proxy
+        points. For each range, this builds a ball of proxy points centered
+        at the center of mass of the points in the range with a radius
+        defined by :attr:`ratio`.
+
+        :returns: `skeletons` A set of skeleton points for each range,
+        which are supposed to contain all the necessary information about
+        the farfield interactions. This set of points contains a set of
+        proxy points constructed around each range and the closest neighbors
+        that are inside the proxy ball.
+        :returns: `sklranges` An array used to index into the skeleton
+        points.
+        """
+
+        @memoize_in(self, "concat_skl_knl")
+        def knl():
+            loopy_knl = lp.make_kernel([
+                "{[irange, idim]: 0 <= irange < nranges and \
+                                  0 <= idim < dim}",
+                "{[ipxy, ingb]: 0 <= ipxy < npxyblock and \
+                                0 <= ingb < nngbblock}"
+                ],
+                """
+                for irange
+                    <> pxystart = pxyranges[irange]
+                    <> pxyend = pxyranges[irange + 1]
+                    <> npxyblock = pxyend - pxystart
+
+                    <> ngbstart = nbrranges[irange]
+                    <> ngbend = nbrranges[irange + 1]
+                    <> nngbblock = ngbend - ngbstart
+
+                    <> sklstart = pxyranges[irange] + nbrranges[irange]
+                    skeletons[idim, sklstart + ipxy] = \
+                        proxies[idim, pxystart + ipxy] \
+                        {id_prefix=write_pxy,nosync=write_ngb}
+                    skeletons[idim, sklstart + npxyblock + ingb] = \
+                        sources[idim, neighbors[ngbstart + ingb]] \
+                        {id_prefix=write_ngb,nosync=write_pxy}
+                end
+                """,
+                [
+                    lp.GlobalArg("sources", None,
+                        shape=(self.dim, "nsources")),
+                    lp.GlobalArg("proxies", None,
+                        shape=(self.dim, "nproxies"), dim_tags="sep,C"),
+                    lp.GlobalArg("neighbors", None,
+                        shape="nneighbors"),
+                    lp.GlobalArg("pxyranges", None,
+                        shape="nranges + 1"),
+                    lp.GlobalArg("nbrranges", None,
+                        shape="nranges + 1"),
+                    lp.GlobalArg("skeletons", None,
+                        shape=(self.dim, "nproxies + nneighbors")),
+                    lp.ValueArg("nsources", np.int32),
+                    lp.ValueArg("nproxies", np.int32),
+                    lp.ValueArg("nneighbors", np.int32),
+                    "..."
+                ],
+                name="concat_skl",
+                default_offset=lp.auto,
+                fixed_parameters=dict(dim=self.dim),
+                lang_version=MOST_RECENT_LANGUAGE_VERSION)
+
+            loopy_knl = lp.tag_inames(loopy_knl, "idim*:unr")
+            # loopy_knl = lp.split_iname(loopy_knl, "irange", 16, inner_tag="l.0")
+            return loopy_knl
+
+        # construct point arrays
+        sources = self.places.density_discr.nodes()
+        centers, radii, proxies, pxyranges = \
+            self.get_proxies(indices, ranges, **kwargs)
+        neighbors, nbrranges = \
+            self.get_neighbors(indices, ranges, centers, radii)
+
+        # construct joint array
+        _, (skeletons,) = knl()(self.queue,
+                sources=sources, proxies=proxies, neighbors=neighbors,
+                pxyranges=pxyranges, nbrranges=nbrranges)
+        sklranges = np.array([p + n for p, n in zip(pxyranges, nbrranges)])
+
+        return skeletons, sklranges
+
+
+# }}}
+
+# vim: foldmethod=marker
-- 
GitLab


From f5e10470baa49371469c36d8e95412a35e1210ed Mon Sep 17 00:00:00 2001
From: Alex Fikl <alexfikl@gmail.com>
Date: Sun, 17 Jun 2018 19:15:32 -0500
Subject: [PATCH 02/25] direct-solver: add tests for the proxy point generator

---
 pytential/direct_solver.py | 162 ++++++++-------
 test/test_direct_solver.py | 415 +++++++++++++++++++++++++++++++++++++
 2 files changed, 500 insertions(+), 77 deletions(-)
 create mode 100644 test/test_direct_solver.py

diff --git a/pytential/direct_solver.py b/pytential/direct_solver.py
index 72a7729e..0a864a10 100644
--- a/pytential/direct_solver.py
+++ b/pytential/direct_solver.py
@@ -23,10 +23,12 @@ THE SOFTWARE.
 """
 
 import numpy as np
+import numpy.linalg as la
 
 import pyopencl as cl
 import pyopencl.array # noqa
 
+from pytools.obj_array import make_obj_array
 from pytools import memoize_method, memoize_in
 
 import loopy as lp
@@ -182,43 +184,38 @@ def partition_from_coarse(queue, resampler, from_indices, from_ranges):
 # {{{ proxy point generator
 
 class ProxyGenerator(object):
-    def __init__(self, queue, places,
-                 ratio=1.5, nproxy=31, target_order=0):
+    def __init__(self, queue, source, ratio=1.5, nproxy=31):
         r"""
-        :arg queue: a :class:`pyopencl.CommandQueue` used to synchronize the
-            calculations.
-        :arg places: commonly a :class:`pytential.qbx.LayerPotentialSourceBase`.
-        :arg ratio: a ratio used to compute the proxy point radius. For proxy
-            points, we have two radii of interest for a set of points: the
-            radius :math:`r_{block}` of the smallest ball containing all the
-            points in a block and the radius :math:`r_{qbx}` of the smallest
-            ball containing all the QBX expansion balls in the block. If the 
-            ratio :math:`\theta \in [0, 1]`, then the radius of the proxy
-            ball is computed as:
+        :arg queue: a :class:`pyopencl.CommandQueue`.
+        :arg source: a :class:`pytential.qbx.LayerPotentialSourceBase`.
+        :arg ratio: a ratio used to compute the proxy point radius. The radius
+            is computed in the :math:`L_2` norm, resulting in a circle or
+            sphere of proxy points. For QBX, we have two radii of interest
+            for a set of points: the radius :math:`r_{block}` of the
+            smallest ball containing all the points and the radius
+            :math:`r_{qbx}` of the smallest ball containing all the QBX
+            expansion balls in the block. If the ratio :math:`\theta \in
+            [0, 1]`, then the radius of the proxy ball is
 
             .. math::
 
                 r = (1 - \theta) r_{block} + \theta r_{qbx}.
 
-            If the ratio :math:`\theta > 1`, the the radius is simply:
+            If the ratio :math:`\theta > 1`, the the radius is simply
 
             .. math::
 
                 r = \theta r_{qbx}.
 
-        :arg nproxy: number of proxy points for each block.
-        :arg target_order: order of the discretization of proxy points. Can
-            be quite small, since proxy points are evaluated point-to-point
-            at the moment.
+        :arg nproxy: number of proxy points.
         """
 
         self.queue = queue
-        self.places = places
+        self.source = source
         self.context = self.queue.context
         self.ratio = abs(ratio)
         self.nproxy = int(abs(nproxy))
-        self.target_order = target_order
-        self.dim = self.places.ambient_dim
+        self.dim = self.source.ambient_dim
 
         if self.dim == 2:
             from meshmode.mesh.generation import ellipse, make_curve_mesh
@@ -240,34 +237,34 @@ class ProxyGenerator(object):
         else:
             radius_expr = "ratio * rqbx"
 
+        # NOTE: centers of mass are computed using a second-order approximation
+        # that currently matches what's in `element_centers_of_mass`.
         knl = lp.make_kernel([
             "{[irange]: 0 <= irange < nranges}",
-            "{[iblk]: 0 <= iblk < nblock}",
+            "{[i]: 0 <= i < npoints}",
             "{[idim]: 0 <= idim < dim}"
             ],
             ["""
             for irange
-                <> blk_start = ranges[irange]
-                <> blk_end = ranges[irange + 1]
-                <> nblock = blk_end - blk_start
+                <> npoints = ranges[irange + 1] - ranges[irange]
+                proxy_center[idim, irange] = 1.0 / npoints * \
+                    reduce(sum, i, nodes[idim, indices[i + ranges[irange]]]) \
+                        {{dup=idim:i}}
 
-                proxy_center[idim, irange] = 1.0 / blk_length * \
-                    reduce(sum, iblk, nodes[idim, indices[blk_start + iblk]]) \
-                        {{dup=idim:iblk}}
-
-                <> rblk = simul_reduce(max, iblk, sqrt(simul_reduce(sum, idim, \
+                <> rblk = simul_reduce(max, i, sqrt(simul_reduce(sum, idim, \
                         (proxy_center[idim, irange] -
-                         nodes[idim, indices[blk_start + iblk]]) ** 2)))
-                <> rqbx_int = simul_reduce(max, iblk, sqrt(simul_reduce(sum, idim, \
+                         nodes[idim, indices[i + ranges[irange]]]) ** 2)))
+
+                <> rqbx_int = simul_reduce(max, i, sqrt(simul_reduce(sum, idim, \
                         (proxy_center[idim, irange] -
-                         center_int[idim, indices[blk_start + iblk]]) ** 2)) + \
-                         expansion_radii[indices[blk_start + iblk]])
-                <> rqbx_ext = simul_reduce(max, iblk, sqrt(simul_reduce(sum, idim, \
+                         center_int[idim, indices[i + ranges[irange]]]) ** 2)) + \
+                         expansion_radii[indices[i + ranges[irange]]])
+                <> rqbx_ext = simul_reduce(max, i, sqrt(simul_reduce(sum, idim, \
                         (proxy_center[idim, irange] -
-                         center_ext[idim, indices[blk_start + iblk]]) ** 2)) + \
-                        expansion_radii[indices[blk_start + iblk]])
-
+                         center_ext[idim, indices[i + ranges[irange]]]) ** 2)) + \
+                         expansion_radii[indices[i + ranges[irange]]])
                 <> rqbx = if(rqbx_ext < rqbx_int, rqbx_int, rqbx_ext)
+
                 proxy_radius[irange] = {radius_expr}
             end
             """.format(radius_expr=radius_expr)],
@@ -292,7 +289,7 @@ class ProxyGenerator(object):
                 lp.ValueArg("nranges", None),
                 lp.ValueArg("nindices", np.int64)
             ],
-            name="proxy_kernel",
+            name="proxy_generator_knl",
             assumptions="dim>=1 and nranges>=1",
             fixed_parameters=dict(
                 dim=self.dim,
@@ -311,27 +308,33 @@ class ProxyGenerator(object):
         return knl
 
     def get_proxies(self, indices, ranges, **kwargs):
-        """
+        """Generate proxy points for each given range of source points in
+        the discretization :attr:`source`.
+
         :arg indices: a set of indices around which to construct proxy balls.
-        :arg ranges: an array of size `nranges + 1` used to index into the
-        set of indices. Each one of the `nranges` ranges will get a proxy
-        ball of a predefined size.
-
-        :returns: `centers` is a list of centers for each proxy ball.
-        :returns: `radii` is a list of radii for each proxy ball.
-        :returns: `proxies` is a set of proxy points.
-        :returns: `pxyranges` is an array used to index into the list of
-        proxy points.
+        :arg ranges: an array of size `(nranges + 1,)` used to index into the
+            set of indices. Each one of the `nranges` ranges will get a proxy
+            ball.
+
+        :return: a tuple of `(centers, radii, proxies, pryranges)`, where
+            each value is a :class:`pyopencl.array.Array` of integers. The
+            sizes of the arrays are as follows: `centers` is of size
+            `(2, nranges)`, `radii` is of size `(nranges,)`, `pryranges` is 
+            of size `(nranges + 1,)` and `proxies` is of size 
+            `(2, nranges * nproxies)`. The proxy points in a range :math:`i` 
+            can be obtained by a slice `proxies[pryranges[i]:pryranges[i + 1]]` 
+            and are all at a distance `radii[i]` from the range center 
+            `centers[i]`.
         """
 
         from pytential.qbx.utils import get_centers_on_side
 
         knl = self.get_kernel()
         _, (centers_dev, radii_dev,) = knl(self.queue,
-            nodes=self.places.density_discr.nodes(),
-            center_int=get_centers_on_side(self.places, -1),
-            center_ext=get_centers_on_side(self.places, +1),
-            expansion_radii=self.places._expansion_radii("nsources"),
+            nodes=self.source.density_discr.nodes(),
+            center_int=get_centers_on_side(self.source, -1),
+            center_ext=get_centers_on_side(self.source, +1),
+            expansion_radii=self.source._expansion_radii("nsources"),
             indices=indices, ranges=ranges, **kwargs)
         centers = centers_dev.get()
         radii = radii_dev.get()
@@ -356,23 +359,26 @@ class ProxyGenerator(object):
         return centers_dev, radii_dev, proxies, pxyranges
 
     def get_neighbors(self, indices, ranges, centers, radii):
-        """
-        :arg indices: indices for a subset of discretization nodes.
+        """Generate a set of neighboring points for each range of source
+        points in :attr:`source`. Neighboring points are defined as all
+        the points inside a proxy ball :math:`i` that do not also belong to
+        the set of source points in the same range :math:`i`.
+
+        :arg indices: indices for a subset of the source points.
         :arg ranges: array used to index into the :attr:`indices` array.
         :arg centers: centers for each proxy ball.
         :arg radii: radii for each proxy ball.
 
-        :returns: `neighbors` is a set of indices into the full set of
-        discretization nodes (already mapped through the given set `indices`).
-        It contains all nodes that are inside a given proxy ball :math:`i`,
-        but not contained in the range :math:`i`, given by
-        `indices[ranges[i]:ranges[i + 1]]`.
-        :returns: `nbrranges` is an array used to index into the set of
-        neighbors and return the subset for a given proxy ball.
+        :return: a tuple `(neighbours, nbrranges)` where is value is a
+            :class:`pyopencl.array.Array` of integers. For a range :math:`i`
+            in `nbrranges`, the corresponding slice of the `neighbours` array
+            is a subset of :attr:`indices` such that all points are inside
+            the proxy ball centered at `centers[i]` of radius `radii[i]`
+            that is not included in `indices[ranges[i]:ranges[i + 1]]`.
         """
 
         nranges = radii.shape[0] + 1
-        sources = self.places.density_discr.nodes().get(self.queue)
+        sources = self.source.density_discr.nodes().get(self.queue)
         sources = make_obj_array([
             cl.array.to_device(self.queue, sources[idim, indices])
             for idim in range(self.dim)])
@@ -425,19 +431,19 @@ class ProxyGenerator(object):
 
     def __call__(self, indices, ranges, **kwargs):
         """
-        :arg indices: Set of indices for points in :attr:`places`.
-        :arg ranges: Set of ranges around which to build a set of proxy
-        points. For each range, this builds a ball of proxy points centered
-        at the center of mass of the points in the range with a radius
-        defined by :attr:`ratio`.
-
-        :returns: `skeletons` A set of skeleton points for each range,
-        which are supposed to contain all the necessary information about
-        the farfield interactions. This set of points contains a set of
-        proxy points constructed around each range and the closest neighbors
-        that are inside the proxy ball.
-        :returns: `sklranges` An array used to index into the skeleton
-        points.
+        :arg indices: set of indices for points in :attr:`source`.
+        :arg ranges: set of ranges around which to build a set of proxy
+            points. For each range, this builds a ball of proxy points centered
+            at the center of mass of the points in the range with a radius
+            defined by :attr:`ratio`.
+
+        :returns: a tuple `(skeletons, sklranges)` where each value is a
+            :class:`pyopencl.array.Array`. For a range :math:`i`, we can
+            get the slice using `skeletons[sklranges[i]:sklranges[i + 1]]`.
+            The skeleton points in a range represents the union of a set
+            of generated proxy points and all the source points inside the
+            proxy ball that do not also belong to the current range in
+            :attr:`indices`.
         """
 
         @memoize_in(self, "concat_skl_knl")
@@ -487,15 +493,17 @@ class ProxyGenerator(object):
                 ],
                 name="concat_skl",
                 default_offset=lp.auto,
+                silenced_warnings="write_race(write_*)",
                 fixed_parameters=dict(dim=self.dim),
                 lang_version=MOST_RECENT_LANGUAGE_VERSION)
 
             loopy_knl = lp.tag_inames(loopy_knl, "idim*:unr")
-            # loopy_knl = lp.split_iname(loopy_knl, "irange", 16, inner_tag="l.0")
+            loopy_knl = lp.split_iname(loopy_knl, "irange", 128, outer_tag="g.0")
+
             return loopy_knl
 
         # construct point arrays
-        sources = self.places.density_discr.nodes()
+        sources = self.source.density_discr.nodes()
         centers, radii, proxies, pxyranges = \
             self.get_proxies(indices, ranges, **kwargs)
         neighbors, nbrranges = \
diff --git a/test/test_direct_solver.py b/test/test_direct_solver.py
new file mode 100644
index 00000000..3c86ac8e
--- /dev/null
+++ b/test/test_direct_solver.py
@@ -0,0 +1,415 @@
+from __future__ import division, absolute_import, print_function
+
+__copyright__ = "Copyright (C) 2018 Alexandru Fikl"
+
+__license__ = """
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+"""
+
+import time
+
+import numpy as np
+import numpy.linalg as la
+
+import pyopencl as cl
+import pytest
+
+from pytential import sym
+from meshmode.mesh.generation import ( # noqa
+        ellipse, NArmedStarfish, generate_torus, make_curve_mesh)
+
+from pyopencl.tools import (  # noqa
+        pytest_generate_tests_for_pyopencl
+        as pytest_generate_tests)
+
+
+def create_data(queue,
+        target_order=7,
+        qbx_order=4,
+        nelements=30,
+        curve_f=None,
+        ndim=2, k=0,
+        lpot_idx=2):
+
+    if curve_f is None:
+        curve_f = NArmedStarfish(5, 0.25)
+        # curve_f = partial(ellipse, 3)
+
+    from sumpy.kernel import LaplaceKernel, HelmholtzKernel
+    if k:
+        knl = HelmholtzKernel(ndim)
+        knl_kwargs = {"k": k}
+    else:
+        knl = LaplaceKernel(ndim)
+        knl_kwargs = {}
+
+    if lpot_idx == 1:
+        u_sym = sym.var("u")
+        op = sym.D(knl, u_sym, **knl_kwargs)
+    else:
+        u_sym = sym.var("u")
+        op = sym.S(knl, u_sym, **knl_kwargs)
+
+    if ndim == 2:
+        mesh = make_curve_mesh(curve_f,
+                np.linspace(0, 1, nelements + 1),
+                target_order)
+    else:
+        mesh = generate_torus(10.0, 2.0, order=target_order)
+
+    from meshmode.discretization import Discretization
+    from meshmode.discretization.poly_element import \
+            InterpolatoryQuadratureSimplexGroupFactory
+    from pytential.qbx import QBXLayerPotentialSource
+    pre_density_discr = Discretization(
+            queue.context, mesh,
+            InterpolatoryQuadratureSimplexGroupFactory(target_order))
+
+    qbx, _ = QBXLayerPotentialSource(pre_density_discr, 4 * target_order,
+            qbx_order,
+            # Don't use FMM for now
+            fmm_order=False).with_refinement()
+
+    return qbx, op, u_sym
+
+
+def create_indices(qbx, nblks,
+                   factor=0.6,
+                   random=True,
+                   method='elements',
+                   use_tree=True,
+                   use_stage2=False):
+    from pytential.direct_solver import (
+            partition_by_nodes, partition_by_elements)
+
+    if use_stage2:
+        density_discr = qbx.quad_stage2_density_discr
+    else:
+        density_discr = qbx.density_discr
+
+    if method == 'nodes':
+        nnodes = density_discr.nnodes
+    else:
+        nnodes = density_discr.mesh.nelements
+    max_particles_in_box = nnodes // nblks
+
+    if method == "nodes":
+        indices, ranges = partition_by_nodes(density_discr,
+            use_tree=use_tree, max_particles_in_box=max_particles_in_box)
+    elif method == "elements":
+        indices, ranges = partition_by_elements(density_discr,
+            use_tree=use_tree, max_particles_in_box=max_particles_in_box)
+    else:
+        raise ValueError("unknown method: {}".format(method))
+
+    # take a subset of the points
+    if abs(factor - 1.0) > 1.0e-14:
+        indices_ = np.empty(ranges.shape[0] - 1, dtype=np.object)
+        for i in range(ranges.shape[0] - 1):
+            iidx = indices[np.s_[ranges[i]:ranges[i + 1]]]
+            isize = int(factor * len(iidx))
+
+            if random:
+                indices_[i] = np.sort(np.random.choice(iidx,
+                    size=isize, replace=False))
+            else:
+                indices_[i] = iidx[:isize]
+
+        ranges = np.cumsum([0] + [r.shape[0] for r in indices_])
+        indices = np.hstack(indices_)
+
+    return indices, ranges
+
+
+@pytest.mark.parametrize("method", ["nodes", "elements"])
+@pytest.mark.parametrize("use_tree", [True, False])
+@pytest.mark.parametrize("ndim", [2, 3])
+def test_partition_points(ctx_factory, method, use_tree, ndim, verbose=False):
+    def plot_indices(pid, discr, indices, ranges):
+        import matplotlib.pyplot as pt
+
+        rangefile = os.path.join(os.path.dirname(__file__),
+            "test_partition_{}_{}_ranges_{}.png".format(method,
+                "tree" if use_tree else "linear", pid))
+        pointfile = os.path.join(os.path.dirname(__file__),
+            "test_partition_{}_{}_{}d_{}.png".format(method,
+                "tree" if use_tree else "linear", ndim, pid))
+
+        pt.figure(figsize=(10, 8))
+        pt.plot(np.diff(ranges))
+        pt.savefig(rangefile, dpi=300)
+        pt.clf()
+
+        if ndim == 2:
+            sources = discr.nodes().get(queue)
+
+            pt.figure(figsize=(10, 8))
+            for i in range(nblks):
+                isrc = indices[np.s_[ranges[i]:ranges[i + 1]]]
+                pt.plot(sources[0][isrc], sources[1][isrc], 'o')
+
+            pt.xlim([-1.5, 1.5])
+            pt.ylim([-1.5, 1.5])
+            pt.savefig(pointfile, dpi=300)
+            pt.clf()
+        elif ndim == 3:
+            from meshmode.discretization.visualization import make_visualizer
+            marker = -42.0 * np.ones(discr.nnodes)
+
+            for i in range(nblks):
+                isrc = indices[np.s_[ranges[i]:ranges[i + 1]]]
+                marker[isrc] = 10.0 * (i + 1.0)
+
+            vis = make_visualizer(queue, discr, 10)
+            vis.write_vtk_file(pointfile, [
+                ("marker", cl.array.to_device(queue, marker))
+                ])
+
+
+    ctx = ctx_factory()
+    queue = cl.CommandQueue(ctx)
+
+    from sympy.core.cache import clear_cache
+    clear_cache()
+    qbx = create_data(queue, ndim=ndim)[0]
+
+    nblks = 10
+    t_start = time.time()
+    srcindices, srcranges = create_indices(qbx, nblks,
+            method=method, use_tree=use_tree, factor=0.6)
+    nblks = srcranges.shape[0] - 1
+    t_end = time.time()
+    if verbose:
+        print('Time: {:.5f}s'.format(t_end - t_start))
+
+    if verbose:
+        discr = qbx.resampler.from_discr
+        plot_indices(0, discr, srcindices, srcranges)
+
+    if method == "elements":
+        from meshmode.discretization.connection import flatten_chained_connection
+        resampler = flatten_chained_connection(queue, qbx.resampler)
+
+        from pytential.direct_solver import partition_from_coarse
+        t_start = time.time()
+        srcindices_, srcranges_ = \
+            partition_from_coarse(queue, resampler, srcindices, srcranges)
+        t_end = time.time()
+        if verbose:
+            print('Time: {:.5f}s'.format(t_end - t_start))
+
+        sources = resampler.from_discr.nodes().get(queue)
+        sources_ = resampler.to_discr.nodes().get(queue)
+        for i in range(srcranges.shape[0] - 1):
+            isrc = srcindices[np.s_[srcranges[i]:srcranges[i + 1]]]
+            isrc_ = srcindices_[np.s_[srcranges_[i]:srcranges_[i + 1]]]
+
+            for j in range(ndim):
+                assert np.min(sources_[j][isrc_]) <= np.min(sources[j][isrc])
+                assert np.max(sources_[j][isrc_]) >= np.max(sources[j][isrc])
+
+        if verbose:
+            discr = qbx.resampler.to_discr
+            plot_indices(1, discr, srcindices, srcranges)
+
+
+@pytest.mark.parametrize("ndim", [2, 3])
+def test_proxy_generator(ctx_factory, ndim, verbose=False):
+    ctx = ctx_factory()
+    queue = cl.CommandQueue(ctx)
+
+    # prevent cache explosion
+    from sympy.core.cache import clear_cache
+    clear_cache()
+    qbx = create_data(queue, ndim=ndim)[0]
+
+    nblks = 10
+    srcindices, srcranges = create_indices(qbx, nblks)
+    nblks = srcranges.shape[0] - 1
+
+    from pytential.direct_solver import ProxyGenerator
+    gen = ProxyGenerator(queue, qbx, ratio=1.1)
+    centers, radii, proxies, pxyranges = \
+        gen.get_proxies(srcindices, srcranges)
+
+    if verbose:
+        knl = gen.get_kernel()
+        knl = lp.add_dtypes(knl, {
+            "nodes": np.float64,
+            "center_int": np.float64,
+            "center_ext": np.float64,
+            "expansion_radii": np.float64,
+            "ranges": np.int64,
+            "indices": np.int64,
+            "nnodes": np.int64,
+            "nranges": np.int64,
+            "nindices": np.int64})
+        print(knl)
+        print(lp.generate_code_v2(knl).device_code())
+
+    proxies = np.vstack([p.get(queue) for p in proxies])
+    if verbose:
+        if qbx.ambient_dim == 2:
+            import matplotlib.pyplot as pt
+            from pytential.qbx.utils import get_centers_on_side
+
+            density_nodes = qbx.density_discr.nodes().get(queue)
+            ci = get_centers_on_side(qbx, -1)
+            ci = np.vstack([c.get(queue) for c in ci])
+            ce = get_centers_on_side(qbx, +1)
+            ce = np.vstack([c.get(queue) for c in ce])
+            r = qbx._expansion_radii("nsources").get(queue)
+
+            for i in range(nblks):
+                isrc = srcindices[np.s_[srcranges[i]:srcranges[i + 1]]]
+                ipxy = np.s_[pxyranges[i]:pxyranges[i + 1]]
+
+                pt.figure(figsize=(10, 8))
+                axis = pt.gca()
+                for j in isrc:
+                    c = pt.Circle(ci[:, j], r[j], color='k', alpha=0.1)
+                    axis.add_artist(c)
+                    c = pt.Circle(ce[:, j], r[j], color='k', alpha=0.1)
+                    axis.add_artist(c)
+
+                pt.plot(density_nodes[0], density_nodes[1],
+                        'ko', ms=2.0, alpha=0.5)
+                pt.plot(density_nodes[0, srcindices], density_nodes[1, srcindices],
+                        'o', ms=2.0)
+                pt.plot(density_nodes[0, isrc], density_nodes[1, isrc],
+                        'o', ms=2.0)
+                pt.plot(proxies[0, ipxy], proxies[1, ipxy],
+                        'o', ms=2.0)
+                pt.xlim([-1.5, 1.5])
+                pt.ylim([-1.5, 1.5])
+                filename = os.path.join(os.path.dirname(__file__),
+                        "test_proxy_generator_{}d_{:04}.png".format(ndim, i))
+                pt.savefig(filename, dpi=300)
+                pt.clf()
+        else:
+            from meshmode.discretization.visualization import make_visualizer
+            from meshmode.mesh.generation import make_group_from_vertices
+            from meshmode.mesh.processing import ( # noqa
+                    affine_map, merge_disjoint_meshes)
+            from meshmode.discretization import Discretization
+            from meshmode.mesh import TensorProductElementGroup, Mesh
+            from meshmode.discretization.poly_element import \
+                InterpolatoryQuadratureSimplexGroupFactory
+
+            centers = np.vstack([c.get(queue) for c in centers])
+            radii = radii.get(queue)
+
+            for i in range(nblks):
+                mesh = affine_map(gen.mesh,
+                    A=(radii[i] * np.eye(ndim)),
+                    b=centers[:, i].reshape(-1))
+
+                mesh = merge_disjoint_meshes([mesh, qbx.density_discr.mesh])
+                discr = Discretization(ctx, mesh,
+                    InterpolatoryQuadratureSimplexGroupFactory(10))
+
+                vis = make_visualizer(queue, discr, 10)
+                filename = os.path.join(os.path.dirname(__file__),
+                    "test_proxy_generator_{}d_{:04}.vtu".format(ndim, i))
+                vis.write_vtk_file(filename, [])
+
+
+@pytest.mark.parametrize("ndim", [2, 3])
+def test_area_query(ctx_factory, ndim, verbose=False):
+    ctx = ctx_factory()
+    queue = cl.CommandQueue(ctx)
+
+    from sympy.core.cache import clear_cache
+    clear_cache()
+    qbx = create_data(queue, ndim=ndim)[0]
+
+    nblks = 10
+    srcindices, srcranges = create_indices(qbx, nblks)
+    nblks = srcranges.shape[0] - 1
+
+    # generate proxy points
+    from pytential.direct_solver import ProxyGenerator
+    gen = ProxyGenerator(queue, qbx, ratio=1.1)
+    centers, radii, _, _ = gen.get_proxies(srcindices, srcranges)
+    neighbors, ngbranges = gen.get_neighbors(srcindices, srcranges, centers, radii)
+    skeleton_nodes, sklranges = gen(srcindices, srcranges)
+
+    neighbors = neighbors.get(queue)
+    if verbose:
+        if ndim == 2:
+            import matplotlib.pyplot as pt
+            density_nodes = qbx.density_discr.nodes().get(queue)
+            skeleton_nodes = skeleton_nodes.get(queue)
+
+            for i in range(nblks):
+                isrc = srcindices[np.s_[srcranges[i]:srcranges[i + 1]]]
+                ingb = neighbors[ngbranges[i]:ngbranges[i + 1]]
+                iskl = np.s_[sklranges[i]:sklranges[i + 1]]
+
+                pt.figure(figsize=(10, 8))
+                pt.plot(density_nodes[0], density_nodes[1],
+                        'ko', ms=2.0, alpha=0.5)
+                pt.plot(density_nodes[0, srcindices], density_nodes[1, srcindices],
+                        'o', ms=2.0)
+                pt.plot(density_nodes[0, isrc], density_nodes[1, isrc],
+                        'o', ms=2.0)
+                pt.plot(density_nodes[0, ingb], density_nodes[1, ingb],
+                        'o', ms=2.0)
+                pt.plot(skeleton_nodes[0, iskl], skeleton_nodes[1, iskl],
+                        'x', ms=2.0)
+                pt.xlim([-1.5, 1.5])
+                pt.ylim([-1.5, 1.5])
+
+                filename = os.path.join(os.path.dirname(__file__),
+                    "test_area_query_{}d_{:04}.png".format(ndim, i))
+                pt.savefig(filename, dpi=300)
+                pt.clf()
+        elif ndim == 3:
+            from meshmode.discretization.visualization import make_visualizer
+            marker = np.empty(qbx.density_discr.nnodes)
+
+            for i in range(nblks):
+                isrc = srcindices[np.s_[srcranges[i]:srcranges[i + 1]]]
+                ingb = neighbors[ngbranges[i]:ngbranges[i + 1]]
+
+                # TODO: some way to turn off some of the interpolations
+                # would help visualize this better.
+                marker.fill(0.0)
+                marker[srcindices] = 0.0
+                marker[isrc] = -42.0
+                marker[ingb] = +42.0
+                marker_dev = cl.array.to_device(queue, marker)
+
+                vis = make_visualizer(queue, qbx.density_discr, 10)
+                filename = os.path.join(os.path.dirname(__file__),
+                    "test_area_query_{}d_{:04}.vtu".format(ndim, i))
+                vis.write_vtk_file(filename, [
+                    ("marker", marker_dev),
+                    ])
+
+
+if __name__ == "__main__":
+    import sys
+    if len(sys.argv) > 1:
+        exec(sys.argv[1])
+    else:
+        from pytest import main
+        main([__file__])
+
+# vim: fdm=marker
-- 
GitLab


From bb22b807d37429905929ca30a341bb54a4ed5696 Mon Sep 17 00:00:00 2001
From: Alex Fikl <alexfikl@gmail.com>
Date: Sun, 17 Jun 2018 19:39:42 -0500
Subject: [PATCH 03/25] direct-solver: add some more docs

---
 pytential/direct_solver.py | 47 ++++++++++++++++++++++++++++++++++++++
 1 file changed, 47 insertions(+)

diff --git a/pytential/direct_solver.py b/pytential/direct_solver.py
index 0a864a10..45bb0c4d 100644
--- a/pytential/direct_solver.py
+++ b/pytential/direct_solver.py
@@ -46,6 +46,21 @@ def _element_node_range(groups, igroup, ielement):
 
 
 def partition_by_nodes(discr, use_tree=True, max_particles_in_box=30):
+    """Generate clusters / ranges of nodes. The partition is created at the
+    lowest level of granularity, i.e. nodes. This results in balanced ranges
+    of points, but will split elements across different ranges.
+
+    :arg discr: a :class:`~meshmode.discretization.Discretization`.
+    :arg use_tree: if True, node partitions are generated using a
+        :class:`boxtree.TreeBuilder`, which leads to geometrically close
+        points to belong to the same partition. If False, a simple linear
+        partition is constructed.
+    :arg max_particles_in_box: passed to :class:`boxtree.TreeBuilder`.
+
+    :return: a tuple `(indices, ranges)` of integer arrays. The indices
+        in a range can be retrieved using `indices[ranges[i]:ranges[i + 1]]`.
+    """
+
     if use_tree:
         from boxtree import box_flags_enum
         from boxtree import TreeBuilder
@@ -78,6 +93,22 @@ def partition_by_nodes(discr, use_tree=True, max_particles_in_box=30):
 
 
 def partition_by_elements(discr, use_tree=True, max_particles_in_box=10):
+    """Generate clusters / ranges of points. The partiton is created at the
+    element level, so that all the nodes belonging to an element belong to
+    the same range. This can result in slightly larger difference in size
+    between the ranges, but can be very useful when the individual partitions
+    need to be resampled, integrated, etc.
+
+    :arg discr: a :class:`~meshmode.discretization.Discretization`.
+    :arg use_tree: if True, node partitions are generated using a
+        :class:`boxtree.TreeBuilder`, which leads to geometrically close
+        points to belong to the same partition. If False, a simple linear
+        partition is constructed.
+    :arg max_particles_in_box: passed to :class:`boxtree.TreeBuilder`.
+
+    :return: a tuple `(indices, ranges)` of integer arrays. The indices
+        in a range can be retrieved using `indices[ranges[i]:ranges[i + 1]]`.
+    """
     if use_tree:
         from boxtree import box_flags_enum
         from boxtree import TreeBuilder
@@ -131,6 +162,22 @@ def partition_by_elements(discr, use_tree=True, max_particles_in_box=10):
 
 
 def partition_from_coarse(queue, resampler, from_indices, from_ranges):
+    """Generate a partition of nodes from an existing partition on a
+    coarser discretization. The new partition is generated based on element
+    refinement relationships in :attr:`resampler`, so the existing partition
+    needs to be created using :func:`partition_by_element`.
+
+    :arg queue: a :class:`pyopencl.CommandQueue`.
+    :arg resampler: a
+        :class:`~meshmode.discretization.connection.DirectDiscretizationConnection`.
+    :arg from_indices: a set of indices into the nodes in 
+        :attr:`resampler.from_discr`.
+    :arg from_ranges: array used to index into :attr:`from_indices`.
+
+    :return: a tuple `(indices, ranges)` of integer arrays. The indices
+        in a range can be retrieved using `indices[ranges[i]:ranges[i + 1]]`.
+    """
+
     if not hasattr(resampler, "groups"):
         raise ValueError("resampler must be a DirectDiscretizationConnection.")
 
-- 
GitLab


From b812f9fc6379f2f89d71cc2ecc84e3159d3c57a1 Mon Sep 17 00:00:00 2001
From: Alex Fikl <alexfikl@gmail.com>
Date: Sun, 17 Jun 2018 19:44:17 -0500
Subject: [PATCH 04/25] direct-solver: flake8 fixes

---
 pytential/direct_solver.py | 26 ++++++++++++++++----------
 test/test_direct_solver.py | 31 +++++++++++--------------------
 2 files changed, 27 insertions(+), 30 deletions(-)

diff --git a/pytential/direct_solver.py b/pytential/direct_solver.py
index 45bb0c4d..565c17be 100644
--- a/pytential/direct_solver.py
+++ b/pytential/direct_solver.py
@@ -72,7 +72,7 @@ def partition_by_nodes(discr, use_tree=True, max_particles_in_box=30):
                 max_particles_in_box=max_particles_in_box)
 
             tree = tree.get(queue)
-            leaf_boxes, = (tree.box_flags & 
+            leaf_boxes, = (tree.box_flags &
                            box_flags_enum.HAS_CHILDREN == 0).nonzero()
 
             indices = np.empty(len(leaf_boxes), dtype=np.object)
@@ -126,11 +126,10 @@ def partition_by_elements(discr, use_tree=True, max_particles_in_box=10):
 
             groups = discr.groups
             tree = tree.get(queue)
-            leaf_boxes, = (tree.box_flags & 
+            leaf_boxes, = (tree.box_flags &
                            box_flags_enum.HAS_CHILDREN == 0).nonzero()
 
             indices = np.empty(len(leaf_boxes), dtype=np.object)
-            elements = np.empty(len(leaf_boxes), dtype=np.object)
             for i, ibox in enumerate(leaf_boxes):
                 box_start = tree.box_source_starts[ibox]
                 box_end = box_start + tree.box_source_counts_cumul[ibox]
@@ -165,12 +164,19 @@ def partition_from_coarse(queue, resampler, from_indices, from_ranges):
     """Generate a partition of nodes from an existing partition on a
     coarser discretization. The new partition is generated based on element
     refinement relationships in :attr:`resampler`, so the existing partition
-    needs to be created using :func:`partition_by_element`.
+    needs to be created using :func:`partition_by_element`, since we assume
+    that each range contains all the nodes in an element.
+
+    The new partition will have the same number of ranges as the old partition.
+    The nodes inside each range in the new partition are all the nodes in
+    :attr:`resampler.to_discr` that belong to the same region as the nodes
+    in the same range from :attr:`resampler.from_discr`. These nodes are
+    obtained using :attr:`mesmode.discretization.connection.InterpolationBatch`.
 
     :arg queue: a :class:`pyopencl.CommandQueue`.
     :arg resampler: a
         :class:`~meshmode.discretization.connection.DirectDiscretizationConnection`.
-    :arg from_indices: a set of indices into the nodes in 
+    :arg from_indices: a set of indices into the nodes in
         :attr:`resampler.from_discr`.
     :arg from_ranges: array used to index into :attr:`from_indices`.
 
@@ -366,11 +372,11 @@ class ProxyGenerator(object):
         :return: a tuple of `(centers, radii, proxies, pryranges)`, where
             each value is a :class:`pyopencl.array.Array` of integers. The
             sizes of the arrays are as follows: `centers` is of size
-            `(2, nranges)`, `radii` is of size `(nranges,)`, `pryranges` is 
-            of size `(nranges + 1,)` and `proxies` is of size 
-            `(2, nranges * nproxies)`. The proxy points in a range :math:`i` 
-            can be obtained by a slice `proxies[pryranges[i]:pryranges[i + 1]]` 
-            and are all at a distance `radii[i]` from the range center 
+            `(2, nranges)`, `radii` is of size `(nranges,)`, `pryranges` is
+            of size `(nranges + 1,)` and `proxies` is of size
+            `(2, nranges * nproxies)`. The proxy points in a range :math:`i`
+            can be obtained by a slice `proxies[pryranges[i]:pryranges[i + 1]]`
+            and are all at a distance `radii[i]` from the range center
             `centers[i]`.
         """
 
diff --git a/test/test_direct_solver.py b/test/test_direct_solver.py
index 3c86ac8e..57d58466 100644
--- a/test/test_direct_solver.py
+++ b/test/test_direct_solver.py
@@ -25,15 +25,14 @@ THE SOFTWARE.
 import time
 
 import numpy as np
-import numpy.linalg as la
-
 import pyopencl as cl
-import pytest
 
+import loopy as lp
 from pytential import sym
 from meshmode.mesh.generation import ( # noqa
         ellipse, NArmedStarfish, generate_torus, make_curve_mesh)
 
+import pytest
 from pyopencl.tools import (  # noqa
         pytest_generate_tests_for_pyopencl
         as pytest_generate_tests)
@@ -144,12 +143,10 @@ def test_partition_points(ctx_factory, method, use_tree, ndim, verbose=False):
     def plot_indices(pid, discr, indices, ranges):
         import matplotlib.pyplot as pt
 
-        rangefile = os.path.join(os.path.dirname(__file__),
-            "test_partition_{}_{}_ranges_{}.png".format(method,
-                "tree" if use_tree else "linear", pid))
-        pointfile = os.path.join(os.path.dirname(__file__),
-            "test_partition_{}_{}_{}d_{}.png".format(method,
-                "tree" if use_tree else "linear", ndim, pid))
+        rangefile = "test_partition_{}_{}_ranges_{}.png".format(method,
+                "tree" if use_tree else "linear", pid)
+        pointfile = "test_partition_{}_{}_{}d_{}.png".format(method,
+                "tree" if use_tree else "linear", ndim, pid)
 
         pt.figure(figsize=(10, 8))
         pt.plot(np.diff(ranges))
@@ -181,7 +178,6 @@ def test_partition_points(ctx_factory, method, use_tree, ndim, verbose=False):
                 ("marker", cl.array.to_device(queue, marker))
                 ])
 
-
     ctx = ctx_factory()
     queue = cl.CommandQueue(ctx)
 
@@ -298,17 +294,15 @@ def test_proxy_generator(ctx_factory, ndim, verbose=False):
                         'o', ms=2.0)
                 pt.xlim([-1.5, 1.5])
                 pt.ylim([-1.5, 1.5])
-                filename = os.path.join(os.path.dirname(__file__),
-                        "test_proxy_generator_{}d_{:04}.png".format(ndim, i))
+
+                filename = "test_proxy_generator_{}d_{:04}.png".format(ndim, i)
                 pt.savefig(filename, dpi=300)
                 pt.clf()
         else:
             from meshmode.discretization.visualization import make_visualizer
-            from meshmode.mesh.generation import make_group_from_vertices
             from meshmode.mesh.processing import ( # noqa
                     affine_map, merge_disjoint_meshes)
             from meshmode.discretization import Discretization
-            from meshmode.mesh import TensorProductElementGroup, Mesh
             from meshmode.discretization.poly_element import \
                 InterpolatoryQuadratureSimplexGroupFactory
 
@@ -325,8 +319,7 @@ def test_proxy_generator(ctx_factory, ndim, verbose=False):
                     InterpolatoryQuadratureSimplexGroupFactory(10))
 
                 vis = make_visualizer(queue, discr, 10)
-                filename = os.path.join(os.path.dirname(__file__),
-                    "test_proxy_generator_{}d_{:04}.vtu".format(ndim, i))
+                filename = "test_proxy_generator_{}d_{:04}.vtu".format(ndim, i)
                 vis.write_vtk_file(filename, [])
 
 
@@ -376,8 +369,7 @@ def test_area_query(ctx_factory, ndim, verbose=False):
                 pt.xlim([-1.5, 1.5])
                 pt.ylim([-1.5, 1.5])
 
-                filename = os.path.join(os.path.dirname(__file__),
-                    "test_area_query_{}d_{:04}.png".format(ndim, i))
+                filename = "test_area_query_{}d_{:04}.png".format(ndim, i)
                 pt.savefig(filename, dpi=300)
                 pt.clf()
         elif ndim == 3:
@@ -397,8 +389,7 @@ def test_area_query(ctx_factory, ndim, verbose=False):
                 marker_dev = cl.array.to_device(queue, marker)
 
                 vis = make_visualizer(queue, qbx.density_discr, 10)
-                filename = os.path.join(os.path.dirname(__file__),
-                    "test_area_query_{}d_{:04}.vtu".format(ndim, i))
+                filename = "test_area_query_{}d_{:04}.vtu".format(ndim, i)
                 vis.write_vtk_file(filename, [
                     ("marker", marker_dev),
                     ])
-- 
GitLab


From c190d028892d1f0e1d90efb31205b7126f74870a Mon Sep 17 00:00:00 2001
From: Alex Fikl <alexfikl@gmail.com>
Date: Sun, 17 Jun 2018 20:47:05 -0500
Subject: [PATCH 05/25] direct-solver: fix tests with visualize=True

---
 pytential/direct_solver.py |  5 +--
 test/test_direct_solver.py | 67 +++++++++++++++++++++++++-------------
 2 files changed, 48 insertions(+), 24 deletions(-)

diff --git a/pytential/direct_solver.py b/pytential/direct_solver.py
index 565c17be..80b1f473 100644
--- a/pytential/direct_solver.py
+++ b/pytential/direct_solver.py
@@ -402,6 +402,7 @@ class ProxyGenerator(object):
             proxies[i] = mesh.vertices
 
         pxyranges = np.cumsum([0] + [p.shape[-1] for p in proxies])
+        pxyranges = cl.array.to_device(self.queue, pxyranges)
         proxies = make_obj_array([
             cl.array.to_device(self.queue, np.hstack([p[idim] for p in proxies]))
             for idim in range(self.dim)])
@@ -566,11 +567,11 @@ class ProxyGenerator(object):
         _, (skeletons,) = knl()(self.queue,
                 sources=sources, proxies=proxies, neighbors=neighbors,
                 pxyranges=pxyranges, nbrranges=nbrranges)
-        sklranges = np.array([p + n for p, n in zip(pxyranges, nbrranges)])
+        sklranges = np.array([p + n for p, n in zip(pxyranges.get(self.queue),
+                                                    nbrranges.get(self.queue))])
 
         return skeletons, sklranges
 
-
 # }}}
 
 # vim: foldmethod=marker
diff --git a/test/test_direct_solver.py b/test/test_direct_solver.py
index 57d58466..858c7434 100644
--- a/test/test_direct_solver.py
+++ b/test/test_direct_solver.py
@@ -22,6 +22,7 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 THE SOFTWARE.
 """
 
+import os
 import time
 
 import numpy as np
@@ -89,7 +90,7 @@ def create_data(queue,
 
 
 def create_indices(qbx, nblks,
-                   factor=0.6,
+                   factor=1.0,
                    random=True,
                    method='elements',
                    use_tree=True,
@@ -139,17 +140,15 @@ def create_indices(qbx, nblks,
 @pytest.mark.parametrize("method", ["nodes", "elements"])
 @pytest.mark.parametrize("use_tree", [True, False])
 @pytest.mark.parametrize("ndim", [2, 3])
-def test_partition_points(ctx_factory, method, use_tree, ndim, verbose=False):
+def test_partition_points(ctx_factory, method, use_tree, ndim, visualize=False):
     def plot_indices(pid, discr, indices, ranges):
         import matplotlib.pyplot as pt
 
-        rangefile = "test_partition_{}_{}_ranges_{}.png".format(method,
-                "tree" if use_tree else "linear", pid)
-        pointfile = "test_partition_{}_{}_{}d_{}.png".format(method,
-                "tree" if use_tree else "linear", ndim, pid)
-
         pt.figure(figsize=(10, 8))
         pt.plot(np.diff(ranges))
+
+        rangefile = "test_partition_{}_{}_ranges_{}.png".format(method,
+                "tree" if use_tree else "linear", pid)
         pt.savefig(rangefile, dpi=300)
         pt.clf()
 
@@ -157,15 +156,24 @@ def test_partition_points(ctx_factory, method, use_tree, ndim, verbose=False):
             sources = discr.nodes().get(queue)
 
             pt.figure(figsize=(10, 8))
+            pt.plot(sources[0], sources[1], 'ko', alpha=0.5)
             for i in range(nblks):
                 isrc = indices[np.s_[ranges[i]:ranges[i + 1]]]
                 pt.plot(sources[0][isrc], sources[1][isrc], 'o')
-
             pt.xlim([-1.5, 1.5])
             pt.ylim([-1.5, 1.5])
+
+            pointfile = "test_partition_{}_{}_{}d_{}.png".format(method,
+                "tree" if use_tree else "linear", ndim, pid)
             pt.savefig(pointfile, dpi=300)
             pt.clf()
         elif ndim == 3:
+            from meshmode.discretization import NoninterpolatoryElementGroupError
+            try:
+                discr.groups[0].basis()
+            except NoninterpolatoryElementGroupError:
+                return
+
             from meshmode.discretization.visualization import make_visualizer
             marker = -42.0 * np.ones(discr.nnodes)
 
@@ -174,6 +182,12 @@ def test_partition_points(ctx_factory, method, use_tree, ndim, verbose=False):
                 marker[isrc] = 10.0 * (i + 1.0)
 
             vis = make_visualizer(queue, discr, 10)
+
+            pointfile = "test_partition_{}_{}_{}d_{}.vtu".format(method,
+                "tree" if use_tree else "linear", ndim, pid)
+            if os.path.isfile(pointfile):
+                os.remove(pointfile)
+
             vis.write_vtk_file(pointfile, [
                 ("marker", cl.array.to_device(queue, marker))
                 ])
@@ -188,13 +202,13 @@ def test_partition_points(ctx_factory, method, use_tree, ndim, verbose=False):
     nblks = 10
     t_start = time.time()
     srcindices, srcranges = create_indices(qbx, nblks,
-            method=method, use_tree=use_tree, factor=0.6)
+            method=method, use_tree=use_tree, factor=1.0)
     nblks = srcranges.shape[0] - 1
     t_end = time.time()
-    if verbose:
+    if visualize:
         print('Time: {:.5f}s'.format(t_end - t_start))
 
-    if verbose:
+    if visualize:
         discr = qbx.resampler.from_discr
         plot_indices(0, discr, srcindices, srcranges)
 
@@ -207,7 +221,7 @@ def test_partition_points(ctx_factory, method, use_tree, ndim, verbose=False):
         srcindices_, srcranges_ = \
             partition_from_coarse(queue, resampler, srcindices, srcranges)
         t_end = time.time()
-        if verbose:
+        if visualize:
             print('Time: {:.5f}s'.format(t_end - t_start))
 
         sources = resampler.from_discr.nodes().get(queue)
@@ -220,13 +234,13 @@ def test_partition_points(ctx_factory, method, use_tree, ndim, verbose=False):
                 assert np.min(sources_[j][isrc_]) <= np.min(sources[j][isrc])
                 assert np.max(sources_[j][isrc_]) >= np.max(sources[j][isrc])
 
-        if verbose:
-            discr = qbx.resampler.to_discr
-            plot_indices(1, discr, srcindices, srcranges)
+        if visualize:
+            discr = resampler.to_discr
+            plot_indices(1, discr, srcindices_, srcranges_)
 
 
 @pytest.mark.parametrize("ndim", [2, 3])
-def test_proxy_generator(ctx_factory, ndim, verbose=False):
+def test_proxy_generator(ctx_factory, ndim, visualize=False):
     ctx = ctx_factory()
     queue = cl.CommandQueue(ctx)
 
@@ -236,7 +250,8 @@ def test_proxy_generator(ctx_factory, ndim, verbose=False):
     qbx = create_data(queue, ndim=ndim)[0]
 
     nblks = 10
-    srcindices, srcranges = create_indices(qbx, nblks)
+    srcindices, srcranges = create_indices(qbx, nblks,
+                                           method='nodes', factor=0.6)
     nblks = srcranges.shape[0] - 1
 
     from pytential.direct_solver import ProxyGenerator
@@ -244,7 +259,7 @@ def test_proxy_generator(ctx_factory, ndim, verbose=False):
     centers, radii, proxies, pxyranges = \
         gen.get_proxies(srcindices, srcranges)
 
-    if verbose:
+    if visualize:
         knl = gen.get_kernel()
         knl = lp.add_dtypes(knl, {
             "nodes": np.float64,
@@ -260,7 +275,7 @@ def test_proxy_generator(ctx_factory, ndim, verbose=False):
         print(lp.generate_code_v2(knl).device_code())
 
     proxies = np.vstack([p.get(queue) for p in proxies])
-    if verbose:
+    if visualize:
         if qbx.ambient_dim == 2:
             import matplotlib.pyplot as pt
             from pytential.qbx.utils import get_centers_on_side
@@ -271,6 +286,7 @@ def test_proxy_generator(ctx_factory, ndim, verbose=False):
             ce = get_centers_on_side(qbx, +1)
             ce = np.vstack([c.get(queue) for c in ce])
             r = qbx._expansion_radii("nsources").get(queue)
+            pxyranges = pxyranges.get(queue)
 
             for i in range(nblks):
                 isrc = srcindices[np.s_[srcranges[i]:srcranges[i + 1]]]
@@ -320,11 +336,13 @@ def test_proxy_generator(ctx_factory, ndim, verbose=False):
 
                 vis = make_visualizer(queue, discr, 10)
                 filename = "test_proxy_generator_{}d_{:04}.vtu".format(ndim, i)
+                if os.path.isfile(filename):
+                    os.remove(filename)
                 vis.write_vtk_file(filename, [])
 
 
 @pytest.mark.parametrize("ndim", [2, 3])
-def test_area_query(ctx_factory, ndim, verbose=False):
+def test_area_query(ctx_factory, ndim, visualize=False):
     ctx = ctx_factory()
     queue = cl.CommandQueue(ctx)
 
@@ -333,7 +351,8 @@ def test_area_query(ctx_factory, ndim, verbose=False):
     qbx = create_data(queue, ndim=ndim)[0]
 
     nblks = 10
-    srcindices, srcranges = create_indices(qbx, nblks)
+    srcindices, srcranges = create_indices(qbx, nblks,
+                                           method='nodes', factor=0.6)
     nblks = srcranges.shape[0] - 1
 
     # generate proxy points
@@ -344,7 +363,8 @@ def test_area_query(ctx_factory, ndim, verbose=False):
     skeleton_nodes, sklranges = gen(srcindices, srcranges)
 
     neighbors = neighbors.get(queue)
-    if verbose:
+    ngbranges = ngbranges.get(queue)
+    if visualize:
         if ndim == 2:
             import matplotlib.pyplot as pt
             density_nodes = qbx.density_discr.nodes().get(queue)
@@ -390,6 +410,9 @@ def test_area_query(ctx_factory, ndim, verbose=False):
 
                 vis = make_visualizer(queue, qbx.density_discr, 10)
                 filename = "test_area_query_{}d_{:04}.vtu".format(ndim, i)
+                if os.path.isfile(filename):
+                    os.remove(filename)
+
                 vis.write_vtk_file(filename, [
                     ("marker", marker_dev),
                     ])
-- 
GitLab


From 105f1632b0f04438c90d5b29a7eebf58a656b8e1 Mon Sep 17 00:00:00 2001
From: Alex Fikl <alexfikl@gmail.com>
Date: Wed, 20 Jun 2018 19:05:06 -0500
Subject: [PATCH 06/25] move proxy code to a new linalg folder

---
 pytential/{direct_solver.py => linalg/proxy.py}      | 0
 test/{test_direct_solver.py => test_linalg_proxy.py} | 8 ++++----
 2 files changed, 4 insertions(+), 4 deletions(-)
 rename pytential/{direct_solver.py => linalg/proxy.py} (100%)
 rename test/{test_direct_solver.py => test_linalg_proxy.py} (98%)

diff --git a/pytential/direct_solver.py b/pytential/linalg/proxy.py
similarity index 100%
rename from pytential/direct_solver.py
rename to pytential/linalg/proxy.py
diff --git a/test/test_direct_solver.py b/test/test_linalg_proxy.py
similarity index 98%
rename from test/test_direct_solver.py
rename to test/test_linalg_proxy.py
index 858c7434..bf3223b3 100644
--- a/test/test_direct_solver.py
+++ b/test/test_linalg_proxy.py
@@ -95,7 +95,7 @@ def create_indices(qbx, nblks,
                    method='elements',
                    use_tree=True,
                    use_stage2=False):
-    from pytential.direct_solver import (
+    from pytential.linalg.proxy import (
             partition_by_nodes, partition_by_elements)
 
     if use_stage2:
@@ -216,7 +216,7 @@ def test_partition_points(ctx_factory, method, use_tree, ndim, visualize=False):
         from meshmode.discretization.connection import flatten_chained_connection
         resampler = flatten_chained_connection(queue, qbx.resampler)
 
-        from pytential.direct_solver import partition_from_coarse
+        from pytential.linalg.proxy import partition_from_coarse
         t_start = time.time()
         srcindices_, srcranges_ = \
             partition_from_coarse(queue, resampler, srcindices, srcranges)
@@ -254,7 +254,7 @@ def test_proxy_generator(ctx_factory, ndim, visualize=False):
                                            method='nodes', factor=0.6)
     nblks = srcranges.shape[0] - 1
 
-    from pytential.direct_solver import ProxyGenerator
+    from pytential.linalg.proxy import ProxyGenerator
     gen = ProxyGenerator(queue, qbx, ratio=1.1)
     centers, radii, proxies, pxyranges = \
         gen.get_proxies(srcindices, srcranges)
@@ -356,7 +356,7 @@ def test_area_query(ctx_factory, ndim, visualize=False):
     nblks = srcranges.shape[0] - 1
 
     # generate proxy points
-    from pytential.direct_solver import ProxyGenerator
+    from pytential.linalg.proxy import ProxyGenerator
     gen = ProxyGenerator(queue, qbx, ratio=1.1)
     centers, radii, _, _ = gen.get_proxies(srcindices, srcranges)
     neighbors, ngbranges = gen.get_neighbors(srcindices, srcranges, centers, radii)
-- 
GitLab


From 5cd88ed18c07561fdf3fa84a7de11581f998fe1f Mon Sep 17 00:00:00 2001
From: Alex Fikl <alexfikl@gmail.com>
Date: Wed, 20 Jun 2018 20:48:10 -0500
Subject: [PATCH 07/25] direct-solver: make all the functions return cl arrays

---
 doc/index.rst             |   1 +
 doc/linalg.rst            |   9 ++
 pytential/linalg/proxy.py | 211 +++++++++++++++++++++++---------------
 test/test_linalg_proxy.py |  46 ++++++---
 4 files changed, 168 insertions(+), 99 deletions(-)
 create mode 100644 doc/linalg.rst

diff --git a/doc/index.rst b/doc/index.rst
index d56c8bb5..772a936a 100644
--- a/doc/index.rst
+++ b/doc/index.rst
@@ -17,6 +17,7 @@ Contents
 
     discretization
     symbolic
+    linalg
     tools
     misc
 
diff --git a/doc/linalg.rst b/doc/linalg.rst
new file mode 100644
index 00000000..d5e78a6e
--- /dev/null
+++ b/doc/linalg.rst
@@ -0,0 +1,9 @@
+Linear Algebra Routines
+=======================
+
+Hierarchical Direct Solver
+--------------------------
+
+.. automodule:: pytential.linalg.proxy
+
+.. vim: sw=4:tw=75
diff --git a/pytential/linalg/proxy.py b/pytential/linalg/proxy.py
index 80b1f473..d00dd410 100644
--- a/pytential/linalg/proxy.py
+++ b/pytential/linalg/proxy.py
@@ -22,11 +22,13 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 THE SOFTWARE.
 """
 
+
 import numpy as np
 import numpy.linalg as la
 
 import pyopencl as cl
 import pyopencl.array # noqa
+from pyopencl.array import to_device
 
 from pytools.obj_array import make_obj_array
 from pytools import memoize_method, memoize_in
@@ -35,70 +37,92 @@ import loopy as lp
 from loopy.version import MOST_RECENT_LANGUAGE_VERSION
 
 
+__doc__ = """
+Proxy Point Generation
+~~~~~~~~~~~~~~~~~~~~~~
+
+.. autofunction:: partition_by_nodes
+
+.. autofunction:: partition_by_elements
+
+.. autofunction:: partition_from_coarse
+
+.. autoclass:: ProxyGenerator
+"""
+
+
 # {{{ point index partitioning
 
-def _element_node_range(groups, igroup, ielement):
-    istart = groups[igroup].node_nr_base + \
-             groups[igroup].nunit_nodes * ielement
-    iend = groups[igroup].node_nr_base + \
-           groups[igroup].nunit_nodes * (ielement + 1)
+def _element_node_range(group, ielement):
+    istart = group.node_nr_base + group.nunit_nodes * ielement
+    iend = group.node_nr_base + group.nunit_nodes * (ielement + 1)
+
     return np.arange(istart, iend)
 
 
-def partition_by_nodes(discr, use_tree=True, max_particles_in_box=30):
+def partition_by_nodes(queue, discr,
+                       use_tree=True,
+                       max_particles_in_box=30):
     """Generate clusters / ranges of nodes. The partition is created at the
     lowest level of granularity, i.e. nodes. This results in balanced ranges
     of points, but will split elements across different ranges.
 
+    :arg queue: a :class:`pyopencl.CommandQueue`.
     :arg discr: a :class:`~meshmode.discretization.Discretization`.
-    :arg use_tree: if True, node partitions are generated using a
+    :arg use_tree: if `True`, node partitions are generated using a
         :class:`boxtree.TreeBuilder`, which leads to geometrically close
-        points to belong to the same partition. If False, a simple linear
+        points to belong to the same partition. If `False`, a simple linear
         partition is constructed.
     :arg max_particles_in_box: passed to :class:`boxtree.TreeBuilder`.
 
-    :return: a tuple `(indices, ranges)` of integer arrays. The indices
-        in a range can be retrieved using `indices[ranges[i]:ranges[i + 1]]`.
+    :return: a tuple `(indices, ranges)` of :class:`pyopencl.array.Array`
+        integer arrays. The indices in a range can be retrieved using
+        `indices[ranges[i]:ranges[i + 1]]`.
     """
 
     if use_tree:
         from boxtree import box_flags_enum
         from boxtree import TreeBuilder
 
-        with cl.CommandQueue(discr.cl_context) as queue:
-            builder = TreeBuilder(discr.cl_context)
+        builder = TreeBuilder(discr.cl_context)
 
-            tree, _ = builder(queue, discr.nodes(),
-                max_particles_in_box=max_particles_in_box)
+        tree, _ = builder(queue, discr.nodes(),
+            max_particles_in_box=max_particles_in_box)
 
-            tree = tree.get(queue)
-            leaf_boxes, = (tree.box_flags &
-                           box_flags_enum.HAS_CHILDREN == 0).nonzero()
+        tree = tree.get(queue)
+        leaf_boxes, = (tree.box_flags &
+                       box_flags_enum.HAS_CHILDREN == 0).nonzero()
 
-            indices = np.empty(len(leaf_boxes), dtype=np.object)
-            for i, ibox in enumerate(leaf_boxes):
-                box_start = tree.box_source_starts[ibox]
-                box_end = box_start + tree.box_source_counts_cumul[ibox]
-                indices[i] = tree.user_source_ids[box_start:box_end]
+        indices = np.empty(len(leaf_boxes), dtype=np.object)
+        for i, ibox in enumerate(leaf_boxes):
+            box_start = tree.box_source_starts[ibox]
+            box_end = box_start + tree.box_source_counts_cumul[ibox]
+            indices[i] = tree.user_source_ids[box_start:box_end]
 
-            ranges = np.cumsum([0] + [box.shape[0] for box in indices])
-            indices = np.hstack(indices)
+        ranges = to_device(queue,
+            np.cumsum([0] + [box.shape[0] for box in indices]))
+        indices = to_device(queue, np.hstack(indices))
     else:
-        indices = np.arange(0, discr.nnodes)
-        ranges = np.arange(0, discr.nnodes + 1,
-                           discr.nnodes // max_particles_in_box)
+        indices = cl.array.arange(queue, 0, discr.nnodes,
+                                  dtype=np.int)
+        ranges = cl.array.arange(queue, 0, discr.nnodes + 1,
+                                 discr.nnodes // max_particles_in_box,
+                                 dtype=np.int)
 
     assert ranges[-1] == discr.nnodes
     return indices, ranges
 
 
-def partition_by_elements(discr, use_tree=True, max_particles_in_box=10):
-    """Generate clusters / ranges of points. The partiton is created at the
+def partition_by_elements(queue, discr,
+                          use_tree=True,
+                          max_particles_in_box=10):
+    """Generate clusters / ranges of points. The partition is created at the
     element level, so that all the nodes belonging to an element belong to
-    the same range. This can result in slightly larger difference in size
+    the same range. This can result in slightly larger differences in size
     between the ranges, but can be very useful when the individual partitions
     need to be resampled, integrated, etc.
 
+    :arg queue: a :class:`pyopencl.CommandQueue`.
     :arg discr: a :class:`~meshmode.discretization.Discretization`.
     :arg use_tree: if True, node partitions are generated using a
         :class:`boxtree.TreeBuilder`, which leads to geometrically close
@@ -106,57 +130,57 @@ def partition_by_elements(discr, use_tree=True, max_particles_in_box=10):
         partition is constructed.
     :arg max_particles_in_box: passed to :class:`boxtree.TreeBuilder`.
 
-    :return: a tuple `(indices, ranges)` of integer arrays. The indices
-        in a range can be retrieved using `indices[ranges[i]:ranges[i + 1]]`.
+    :return: a tuple `(indices, ranges)` of :class:`pyopencl.array.Array`
+        integer arrays. The indices in a range can be retrieved using
+        `indices[ranges[i]:ranges[i + 1]]`.
     """
     if use_tree:
         from boxtree import box_flags_enum
         from boxtree import TreeBuilder
 
-        with cl.CommandQueue(discr.cl_context) as queue:
-            builder = TreeBuilder(discr.cl_context)
+        builder = TreeBuilder(discr.cl_context)
 
-            from pytential.qbx.utils import element_centers_of_mass
-            elranges = np.cumsum([0] +
-                [group.nelements for group in discr.mesh.groups])
-            elcenters = element_centers_of_mass(discr)
+        from pytential.qbx.utils import element_centers_of_mass
+        elranges = np.cumsum([group.nelements for group in discr.mesh.groups])
+        elcenters = element_centers_of_mass(discr)
 
-            tree, _ = builder(queue, elcenters,
-                max_particles_in_box=max_particles_in_box)
+        tree, _ = builder(queue, elcenters,
+            max_particles_in_box=max_particles_in_box)
 
-            groups = discr.groups
-            tree = tree.get(queue)
-            leaf_boxes, = (tree.box_flags &
-                           box_flags_enum.HAS_CHILDREN == 0).nonzero()
+        groups = discr.groups
+        tree = tree.get(queue)
+        leaf_boxes, = (tree.box_flags &
+                       box_flags_enum.HAS_CHILDREN == 0).nonzero()
 
-            indices = np.empty(len(leaf_boxes), dtype=np.object)
-            for i, ibox in enumerate(leaf_boxes):
-                box_start = tree.box_source_starts[ibox]
-                box_end = box_start + tree.box_source_counts_cumul[ibox]
+        indices = np.empty(len(leaf_boxes), dtype=np.object)
+        for i, ibox in enumerate(leaf_boxes):
+            box_start = tree.box_source_starts[ibox]
+            box_end = box_start + tree.box_source_counts_cumul[ibox]
 
-                ielement = tree.user_source_ids[box_start:box_end]
-                igroup = np.digitize(ielement, elranges) - 1
+            ielement = tree.user_source_ids[box_start:box_end]
+            igroup = np.digitize(ielement, elranges)
 
-                indices[i] = np.hstack([_element_node_range(groups, j, k)
-                                        for j, k in zip(igroup, ielement)])
+            indices[i] = np.hstack([_element_node_range(groups[j], k)
+                                    for j, k in zip(igroup, ielement)])
     else:
-        groups = discr.groups
-        elranges = np.cumsum([0] +
-            [group.nelements for group in discr.mesh.groups])
-
         nelements = discr.mesh.nelements
-        indices = np.array_split(np.arange(0, nelements),
-                                 nelements // max_particles_in_box)
-        for i in range(len(indices)):
-            ielement = indices[i]
-            igroup = np.digitize(ielement, elranges) - 1
+        elements = np.array_split(np.arange(0, nelements),
+                                  nelements // max_particles_in_box)
 
-            indices[i] = np.hstack([_element_node_range(groups, j, k)
-                                    for j, k in zip(igroup, ielement)])
+        elranges = np.cumsum([g.nelements for g in discr.groups])
+        elgroups = [np.digitize(elements[i], elranges)
+                    for i in range(len(elements))]
+
+        indices = np.empty(len(elements), dtype=np.object)
+        for i in range(indices.shape[0]):
+            indices[i] = np.hstack([_element_node_range(discr.groups[j], k)
+                                    for j, k in zip(elgroups[i], elements[i])])
 
-    ranges = np.cumsum([0] + [box.shape[0] for box in indices])
-    indices = np.hstack(indices)
+    ranges = to_device(queue,
+            np.cumsum([0] + [box.shape[0] for box in indices]))
+    indices = to_device(queue, np.hstack(indices))
 
+    assert ranges[-1] == discr.nnodes
     return indices, ranges
 
 
@@ -180,13 +204,18 @@ def partition_from_coarse(queue, resampler, from_indices, from_ranges):
         :attr:`resampler.from_discr`.
     :arg from_ranges: array used to index into :attr:`from_indices`.
 
-    :return: a tuple `(indices, ranges)` of integer arrays. The indices
-        in a range can be retrieved using `indices[ranges[i]:ranges[i + 1]]`.
+    :return: a tuple `(indices, ranges)` of :class:`pyopencl.array.Array`
+        integer arrays. The indices in a range can be retrieved using
+        `indices[ranges[i]:ranges[i + 1]]`.
     """
 
     if not hasattr(resampler, "groups"):
         raise ValueError("resampler must be a DirectDiscretizationConnection.")
 
+    if isinstance(from_ranges, cl.array.Array):
+        from_indices = from_indices.get(queue)
+        from_ranges = from_ranges.get(queue)
+
     # construct ranges
     from_discr = resampler.from_discr
     from_grp_ranges = np.cumsum([0] +
@@ -224,10 +253,11 @@ def partition_from_coarse(queue, resampler, from_indices, from_ranges):
         for i, j in zip(el_ranges[to_el_table[igrp][to_element_indices]],
                         to_element_indices):
             indices[i] = np.hstack([indices[i],
-                _element_node_range(resampler.to_discr.groups, igrp, j)])
+                _element_node_range(resampler.to_discr.groups[igrp], j)])
 
-    ranges = np.cumsum([0] + [box.shape[0] for box in indices])
-    indices = np.hstack(indices)
+    ranges = to_device(queue,
+            np.cumsum([0] + [box.shape[0] for box in indices]))
+    indices = to_device(queue, np.hstack(indices))
 
     return indices, ranges
 
@@ -364,13 +394,14 @@ class ProxyGenerator(object):
         """Generate proxy points for each given range of source points in
         the discretization :attr:`source`.
 
-        :arg indices: a set of indices around which to construct proxy balls.
-        :arg ranges: an array of size `(nranges + 1,)` used to index into the
-            set of indices. Each one of the `nranges` ranges will get a proxy
-            ball.
+        :arg indices: a :class:`pyopencl.array.Array` of indices around
+            which to construct proxy balls.
+        :arg ranges: an :class:`pyopencl.array.Array` of size `(nranges + 1,)`
+            used to index into :attr:`indices`. Each one of the `nranges`
+            ranges will get a proxy ball.
 
         :return: a tuple of `(centers, radii, proxies, pryranges)`, where
-            each value is a :class:`pyopencl.array.Array` of integers. The
+            each value is a :class:`pyopencl.array.Array`. The
             sizes of the arrays are as follows: `centers` is of size
             `(2, nranges)`, `radii` is of size `(nranges,)`, `pryranges` is
             of size `(nranges + 1,)` and `proxies` is of size
@@ -418,12 +449,16 @@ class ProxyGenerator(object):
         the points inside a proxy ball :math:`i` that do not also belong to
         the set of source points in the same range :math:`i`.
 
-        :arg indices: indices for a subset of the source points.
-        :arg ranges: array used to index into the :attr:`indices` array.
-        :arg centers: centers for each proxy ball.
-        :arg radii: radii for each proxy ball.
+        :arg indices: a :class:`pyopencl.array.Array` of indices for a subset
+            of the source points.
+        :arg ranges: a :class:`pyopencl.array.Array` used to index into
+            the :attr:`indices` array.
+        :arg centers: a :class:`pyopencl.array.Array` containing the center
+            of each proxy ball.
+        :arg radii: a :class:`pyopencl.array.Array` containing the radius
+            of each proxy ball.
 
-        :return: a tuple `(neighbours, nbrranges)` where is value is a
+        :return: a tuple `(neighbours, nbrranges)` where each value is a
             :class:`pyopencl.array.Array` of integers. For a range :math:`i`
             in `nbrranges`, the corresponding slice of the `neighbours` array
             is a subset of :attr:`indices` such that all points are inside
@@ -431,6 +466,10 @@ class ProxyGenerator(object):
             that is not included in `indices[ranges[i]:ranges[i + 1]]`.
         """
 
+        if isinstance(indices, cl.array.Array):
+            indices = indices.get(self.queue)
+            ranges = ranges.get(self.queue)
+
         nranges = radii.shape[0] + 1
         sources = self.source.density_discr.nodes().get(self.queue)
         sources = make_obj_array([
@@ -480,14 +519,16 @@ class ProxyGenerator(object):
         nbrranges = np.cumsum([0] + [n.shape[0] for n in neighbors])
         neighbors = np.hstack(neighbors)
 
-        return (cl.array.to_device(self.queue, neighbors),
-                cl.array.to_device(self.queue, nbrranges))
+        return (to_device(self.queue, neighbors),
+                to_device(self.queue, nbrranges))
 
     def __call__(self, indices, ranges, **kwargs):
         """
-        :arg indices: set of indices for points in :attr:`source`.
-        :arg ranges: set of ranges around which to build a set of proxy
-            points. For each range, this builds a ball of proxy points centered
+        :arg indices: a :class:`pyopencl.array.Array` of indices for points
+            in :attr:`source`.
+        :arg ranges: a :class:`pyopencl.array.Array` describing the ranges
+            from :attr:`indices` around which to build proxy points. For each
+            range, this builds a ball of proxy points centered
             at the center of mass of the points in the range with a radius
             defined by :attr:`ratio`.
 
diff --git a/test/test_linalg_proxy.py b/test/test_linalg_proxy.py
index bf3223b3..915cc4b8 100644
--- a/test/test_linalg_proxy.py
+++ b/test/test_linalg_proxy.py
@@ -27,6 +27,7 @@ import time
 
 import numpy as np
 import pyopencl as cl
+from pyopencl.array import to_device
 
 import loopy as lp
 from pytential import sym
@@ -89,7 +90,7 @@ def create_data(queue,
     return qbx, op, u_sym
 
 
-def create_indices(qbx, nblks,
+def create_indices(queue, qbx, nblks,
                    factor=1.0,
                    random=True,
                    method='elements',
@@ -98,6 +99,9 @@ def create_indices(qbx, nblks,
     from pytential.linalg.proxy import (
             partition_by_nodes, partition_by_elements)
 
+    if method == 'elements':
+        factor = 1.0
+
     if use_stage2:
         density_discr = qbx.quad_stage2_density_discr
     else:
@@ -110,16 +114,19 @@ def create_indices(qbx, nblks,
     max_particles_in_box = nnodes // nblks
 
     if method == "nodes":
-        indices, ranges = partition_by_nodes(density_discr,
+        indices, ranges = partition_by_nodes(queue, density_discr,
             use_tree=use_tree, max_particles_in_box=max_particles_in_box)
     elif method == "elements":
-        indices, ranges = partition_by_elements(density_discr,
+        indices, ranges = partition_by_elements(queue, density_discr,
             use_tree=use_tree, max_particles_in_box=max_particles_in_box)
     else:
         raise ValueError("unknown method: {}".format(method))
 
     # take a subset of the points
     if abs(factor - 1.0) > 1.0e-14:
+        indices = indices.get(queue)
+        ranges = ranges.get(queue)
+
         indices_ = np.empty(ranges.shape[0] - 1, dtype=np.object)
         for i in range(ranges.shape[0] - 1):
             iidx = indices[np.s_[ranges[i]:ranges[i + 1]]]
@@ -131,8 +138,9 @@ def create_indices(qbx, nblks,
             else:
                 indices_[i] = iidx[:isize]
 
-        ranges = np.cumsum([0] + [r.shape[0] for r in indices_])
-        indices = np.hstack(indices_)
+        ranges = to_device(queue,
+                np.cumsum([0] + [r.shape[0] for r in indices_]))
+        indices = to_device(queue, np.hstack(indices_))
 
     return indices, ranges
 
@@ -141,7 +149,7 @@ def create_indices(qbx, nblks,
 @pytest.mark.parametrize("use_tree", [True, False])
 @pytest.mark.parametrize("ndim", [2, 3])
 def test_partition_points(ctx_factory, method, use_tree, ndim, visualize=False):
-    def plot_indices(pid, discr, indices, ranges):
+    def _plot_indices(pid, discr, indices, ranges):
         import matplotlib.pyplot as pt
 
         pt.figure(figsize=(10, 8))
@@ -201,7 +209,7 @@ def test_partition_points(ctx_factory, method, use_tree, ndim, visualize=False):
 
     nblks = 10
     t_start = time.time()
-    srcindices, srcranges = create_indices(qbx, nblks,
+    srcindices, srcranges = create_indices(queue, qbx, nblks,
             method=method, use_tree=use_tree, factor=1.0)
     nblks = srcranges.shape[0] - 1
     t_end = time.time()
@@ -210,13 +218,12 @@ def test_partition_points(ctx_factory, method, use_tree, ndim, visualize=False):
 
     if visualize:
         discr = qbx.resampler.from_discr
-        plot_indices(0, discr, srcindices, srcranges)
+        _plot_indices(0, discr, srcindices.get(queue), srcranges.get(queue))
 
     if method == "elements":
-        from meshmode.discretization.connection import flatten_chained_connection
-        resampler = flatten_chained_connection(queue, qbx.resampler)
-
         from pytential.linalg.proxy import partition_from_coarse
+        resampler = qbx.direct_resampler
+
         t_start = time.time()
         srcindices_, srcranges_ = \
             partition_from_coarse(queue, resampler, srcindices, srcranges)
@@ -224,6 +231,11 @@ def test_partition_points(ctx_factory, method, use_tree, ndim, visualize=False):
         if visualize:
             print('Time: {:.5f}s'.format(t_end - t_start))
 
+        srcindices = srcindices.get(queue)
+        srcranges = srcranges.get(queue)
+        srcindices_ = srcindices_.get(queue)
+        srcranges_ = srcranges_.get(queue)
+
         sources = resampler.from_discr.nodes().get(queue)
         sources_ = resampler.to_discr.nodes().get(queue)
         for i in range(srcranges.shape[0] - 1):
@@ -236,7 +248,7 @@ def test_partition_points(ctx_factory, method, use_tree, ndim, visualize=False):
 
         if visualize:
             discr = resampler.to_discr
-            plot_indices(1, discr, srcindices_, srcranges_)
+            _plot_indices(1, discr, srcindices_, srcranges_)
 
 
 @pytest.mark.parametrize("ndim", [2, 3])
@@ -250,7 +262,7 @@ def test_proxy_generator(ctx_factory, ndim, visualize=False):
     qbx = create_data(queue, ndim=ndim)[0]
 
     nblks = 10
-    srcindices, srcranges = create_indices(qbx, nblks,
+    srcindices, srcranges = create_indices(queue, qbx, nblks,
                                            method='nodes', factor=0.6)
     nblks = srcranges.shape[0] - 1
 
@@ -275,6 +287,9 @@ def test_proxy_generator(ctx_factory, ndim, visualize=False):
         print(lp.generate_code_v2(knl).device_code())
 
     proxies = np.vstack([p.get(queue) for p in proxies])
+    srcindices = srcindices.get(queue)
+    srcranges = srcranges.get(queue)
+
     if visualize:
         if qbx.ambient_dim == 2:
             import matplotlib.pyplot as pt
@@ -351,7 +366,7 @@ def test_area_query(ctx_factory, ndim, visualize=False):
     qbx = create_data(queue, ndim=ndim)[0]
 
     nblks = 10
-    srcindices, srcranges = create_indices(qbx, nblks,
+    srcindices, srcranges = create_indices(queue, qbx, nblks,
                                            method='nodes', factor=0.6)
     nblks = srcranges.shape[0] - 1
 
@@ -364,6 +379,9 @@ def test_area_query(ctx_factory, ndim, visualize=False):
 
     neighbors = neighbors.get(queue)
     ngbranges = ngbranges.get(queue)
+    srcindices = srcindices.get(queue)
+    srcranges = srcranges.get(queue)
+
     if visualize:
         if ndim == 2:
             import matplotlib.pyplot as pt
-- 
GitLab


From cb7bea8f7e0cc9fdbd58f98d0fe35cf272fa13ce Mon Sep 17 00:00:00 2001
From: Alex Fikl <alexfikl@gmail.com>
Date: Thu, 21 Jun 2018 10:01:49 -0500
Subject: [PATCH 08/25] add missing __init__

---
 pytential/linalg/__init__.py | 23 +++++++++++++++++++++++
 1 file changed, 23 insertions(+)
 create mode 100644 pytential/linalg/__init__.py

diff --git a/pytential/linalg/__init__.py b/pytential/linalg/__init__.py
new file mode 100644
index 00000000..b4a471df
--- /dev/null
+++ b/pytential/linalg/__init__.py
@@ -0,0 +1,23 @@
+from __future__ import division
+
+__copyright__ = "Copyright (C) 2018 Andreas Kloeckner"
+
+__license__ = """
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+"""
-- 
GitLab


From cd019e2643c36df89818738bd9853c73d237487d Mon Sep 17 00:00:00 2001
From: Alex Fikl <alexfikl@gmail.com>
Date: Thu, 21 Jun 2018 11:44:30 -0500
Subject: [PATCH 09/25] direct-solver: clean up proxy tests a bit

---
 pytential/linalg/proxy.py |  12 +-
 test/test_linalg_proxy.py | 324 +++++++++++++++++---------------------
 2 files changed, 153 insertions(+), 183 deletions(-)

diff --git a/pytential/linalg/proxy.py b/pytential/linalg/proxy.py
index d00dd410..fd29315c 100644
--- a/pytential/linalg/proxy.py
+++ b/pytential/linalg/proxy.py
@@ -566,6 +566,8 @@ class ProxyGenerator(object):
                     skeletons[idim, sklstart + npxyblock + ingb] = \
                         sources[idim, neighbors[ngbstart + ingb]] \
                         {id_prefix=write_ngb,nosync=write_pxy}
+                    sklranges[irange + 1] = sklranges[irange] + \
+                            npxyblock + nngbblock
                 end
                 """,
                 [
@@ -581,6 +583,8 @@ class ProxyGenerator(object):
                         shape="nranges + 1"),
                     lp.GlobalArg("skeletons", None,
                         shape=(self.dim, "nproxies + nneighbors")),
+                    lp.GlobalArg("sklranges", None,
+                        shape="nranges + 1"),
                     lp.ValueArg("nsources", np.int32),
                     lp.ValueArg("nproxies", np.int32),
                     lp.ValueArg("nneighbors", np.int32),
@@ -605,11 +609,11 @@ class ProxyGenerator(object):
             self.get_neighbors(indices, ranges, centers, radii)
 
         # construct joint array
-        _, (skeletons,) = knl()(self.queue,
+        sklranges = cl.array.zeros(self.queue, ranges.shape, dtype=np.int)
+        _, (skeletons, sklranges) = knl()(self.queue,
                 sources=sources, proxies=proxies, neighbors=neighbors,
-                pxyranges=pxyranges, nbrranges=nbrranges)
-        sklranges = np.array([p + n for p, n in zip(pxyranges.get(self.queue),
-                                                    nbrranges.get(self.queue))])
+                pxyranges=pxyranges, nbrranges=nbrranges,
+                sklranges=sklranges)
 
         return skeletons, sklranges
 
diff --git a/test/test_linalg_proxy.py b/test/test_linalg_proxy.py
index 915cc4b8..e3c6fdab 100644
--- a/test/test_linalg_proxy.py
+++ b/test/test_linalg_proxy.py
@@ -40,89 +40,70 @@ from pyopencl.tools import (  # noqa
         as pytest_generate_tests)
 
 
-def create_data(queue,
+def _build_qbx_discr(queue,
+        ndim=2,
+        nelements=30,
         target_order=7,
         qbx_order=4,
-        nelements=30,
-        curve_f=None,
-        ndim=2, k=0,
-        lpot_idx=2):
+        curve_f=None):
 
     if curve_f is None:
         curve_f = NArmedStarfish(5, 0.25)
-        # curve_f = partial(ellipse, 3)
-
-    from sumpy.kernel import LaplaceKernel, HelmholtzKernel
-    if k:
-        knl = HelmholtzKernel(ndim)
-        knl_kwargs = {"k": k}
-    else:
-        knl = LaplaceKernel(ndim)
-        knl_kwargs = {}
-
-    if lpot_idx == 1:
-        u_sym = sym.var("u")
-        op = sym.D(knl, u_sym, **knl_kwargs)
-    else:
-        u_sym = sym.var("u")
-        op = sym.S(knl, u_sym, **knl_kwargs)
 
     if ndim == 2:
         mesh = make_curve_mesh(curve_f,
                 np.linspace(0, 1, nelements + 1),
                 target_order)
-    else:
+    elif ndim == 3:
         mesh = generate_torus(10.0, 2.0, order=target_order)
+    else:
+        raise ValueError("unsupported ambient dimension")
 
     from meshmode.discretization import Discretization
     from meshmode.discretization.poly_element import \
             InterpolatoryQuadratureSimplexGroupFactory
     from pytential.qbx import QBXLayerPotentialSource
-    pre_density_discr = Discretization(
+    density_discr = Discretization(
             queue.context, mesh,
             InterpolatoryQuadratureSimplexGroupFactory(target_order))
 
-    qbx, _ = QBXLayerPotentialSource(pre_density_discr, 4 * target_order,
-            qbx_order,
-            # Don't use FMM for now
+    qbx, _ = QBXLayerPotentialSource(density_discr,
+            fine_order=4 * target_order,
+            qbx_order=qbx_order,
             fmm_order=False).with_refinement()
 
-    return qbx, op, u_sym
+    return qbx
+
 
+def _build_block_index(queue, discr,
+        nblks=10,
+        factor=1.0,
+        method='elements',
+        use_tree=True):
 
-def create_indices(queue, qbx, nblks,
-                   factor=1.0,
-                   random=True,
-                   method='elements',
-                   use_tree=True,
-                   use_stage2=False):
     from pytential.linalg.proxy import (
             partition_by_nodes, partition_by_elements)
 
     if method == 'elements':
         factor = 1.0
 
-    if use_stage2:
-        density_discr = qbx.quad_stage2_density_discr
-    else:
-        density_discr = qbx.density_discr
-
     if method == 'nodes':
-        nnodes = density_discr.nnodes
+        nnodes = discr.nnodes
     else:
-        nnodes = density_discr.mesh.nelements
+        nnodes = discr.mesh.nelements
     max_particles_in_box = nnodes // nblks
 
+    # create index ranges
     if method == "nodes":
-        indices, ranges = partition_by_nodes(queue, density_discr,
+        indices, ranges = partition_by_nodes(queue, discr,
             use_tree=use_tree, max_particles_in_box=max_particles_in_box)
     elif method == "elements":
-        indices, ranges = partition_by_elements(queue, density_discr,
+        indices, ranges = partition_by_elements(queue, discr,
             use_tree=use_tree, max_particles_in_box=max_particles_in_box)
     else:
         raise ValueError("unknown method: {}".format(method))
 
-    # take a subset of the points
+    # randomly pick a subset of points
     if abs(factor - 1.0) > 1.0e-14:
         indices = indices.get(queue)
         ranges = ranges.get(queue)
@@ -132,11 +113,8 @@ def create_indices(queue, qbx, nblks,
             iidx = indices[np.s_[ranges[i]:ranges[i + 1]]]
             isize = int(factor * len(iidx))
 
-            if random:
-                indices_[i] = np.sort(np.random.choice(iidx,
-                    size=isize, replace=False))
-            else:
-                indices_[i] = iidx[:isize]
+            indices_[i] = np.sort(
+                    np.random.choice(iidx, size=isize, replace=False))
 
         ranges = to_device(queue,
                 np.cumsum([0] + [r.shape[0] for r in indices_]))
@@ -145,110 +123,121 @@ def create_indices(queue, qbx, nblks,
     return indices, ranges
 
 
-@pytest.mark.parametrize("method", ["nodes", "elements"])
-@pytest.mark.parametrize("use_tree", [True, False])
-@pytest.mark.parametrize("ndim", [2, 3])
-def test_partition_points(ctx_factory, method, use_tree, ndim, visualize=False):
-    def _plot_indices(pid, discr, indices, ranges):
-        import matplotlib.pyplot as pt
+def _plot_partition_indices(queue, discr, indices, ranges, **kwargs):
+    import matplotlib.pyplot as pt
+    args = [
+        kwargs.get("method", "unknown"),
+        "tree" if kwargs.get("use_tree", False) else "linear",
+        kwargs.get("pid", "stage1"),
+        discr.ambient_dim
+        ]
 
-        pt.figure(figsize=(10, 8))
-        pt.plot(np.diff(ranges))
+    if isinstance(indices, cl.array.Array):
+        indices = indices.get()
+        ranges = ranges.get()
 
-        rangefile = "test_partition_{}_{}_ranges_{}.png".format(method,
-                "tree" if use_tree else "linear", pid)
-        pt.savefig(rangefile, dpi=300)
-        pt.clf()
+    pt.figure(figsize=(10, 8), dpi=300)
+    pt.plot(np.diff(ranges))
+    pt.savefig("test_partition_{0}_{1}_{3}d_ranges_{2}.png".format(*args))
+    pt.clf()
 
-        if ndim == 2:
-            sources = discr.nodes().get(queue)
+    if discr.ambient_dim == 2:
+        sources = discr.nodes().get(queue)
 
-            pt.figure(figsize=(10, 8))
+        pt.figure(figsize=(10, 8), dpi=300)
+
+        if indices.shape[0] != discr.nnodes:
             pt.plot(sources[0], sources[1], 'ko', alpha=0.5)
-            for i in range(nblks):
-                isrc = indices[np.s_[ranges[i]:ranges[i + 1]]]
-                pt.plot(sources[0][isrc], sources[1][isrc], 'o')
-            pt.xlim([-1.5, 1.5])
-            pt.ylim([-1.5, 1.5])
-
-            pointfile = "test_partition_{}_{}_{}d_{}.png".format(method,
-                "tree" if use_tree else "linear", ndim, pid)
-            pt.savefig(pointfile, dpi=300)
-            pt.clf()
-        elif ndim == 3:
-            from meshmode.discretization import NoninterpolatoryElementGroupError
-            try:
-                discr.groups[0].basis()
-            except NoninterpolatoryElementGroupError:
-                return
+        for i in range(ranges.shape[0] - 1):
+            isrc = indices[np.s_[ranges[i]:ranges[i + 1]]]
+            pt.plot(sources[0][isrc], sources[1][isrc], 'o')
 
-            from meshmode.discretization.visualization import make_visualizer
-            marker = -42.0 * np.ones(discr.nnodes)
+        pt.xlim([-1.5, 1.5])
+        pt.ylim([-1.5, 1.5])
+        pt.savefig("test_partition_{0}_{1}_{3}d_{2}.png".format(*args))
+        pt.clf()
+    elif discr.ambient_dim == 3:
+        from meshmode.discretization import NoninterpolatoryElementGroupError
+        try:
+            discr.groups[0].basis()
+        except NoninterpolatoryElementGroupError:
+            return
+
+        from meshmode.discretization.visualization import make_visualizer
+        marker = -42.0 * np.ones(discr.nnodes)
 
-            for i in range(nblks):
-                isrc = indices[np.s_[ranges[i]:ranges[i + 1]]]
-                marker[isrc] = 10.0 * (i + 1.0)
+        for i in range(ranges.shape[0] - 1):
+            isrc = indices[np.s_[ranges[i]:ranges[i + 1]]]
+            marker[isrc] = 10.0 * (i + 1.0)
+
+        vis = make_visualizer(queue, discr, 10)
 
-            vis = make_visualizer(queue, discr, 10)
+        filename = "test_partition_{0}_{1}_{3}d_{2}.png".format(*args)
+        if os.path.isfile(filename):
+            os.remove(filename)
 
-            pointfile = "test_partition_{}_{}_{}d_{}.vtu".format(method,
-                "tree" if use_tree else "linear", ndim, pid)
-            if os.path.isfile(pointfile):
-                os.remove(pointfile)
+        vis.write_vtk_file(filename, [
+            ("marker", cl.array.to_device(queue, marker))
+            ])
 
-            vis.write_vtk_file(pointfile, [
-                ("marker", cl.array.to_device(queue, marker))
-                ])
 
+@pytest.mark.parametrize("method", ["nodes", "elements"])
+@pytest.mark.parametrize("use_tree", [True, False])
+@pytest.mark.parametrize("ndim", [2, 3])
+def test_partition_points(ctx_factory, method, use_tree, ndim, visualize=False):
     ctx = ctx_factory()
     queue = cl.CommandQueue(ctx)
 
-    from sympy.core.cache import clear_cache
-    clear_cache()
-    qbx = create_data(queue, ndim=ndim)[0]
+    qbx = _build_qbx_discr(queue, ndim=ndim)
+    srcindices, srcranges = _build_block_index(queue, qbx.density_discr,
+            method=method, use_tree=use_tree, factor=0.6)
 
-    nblks = 10
-    t_start = time.time()
-    srcindices, srcranges = create_indices(queue, qbx, nblks,
-            method=method, use_tree=use_tree, factor=1.0)
-    nblks = srcranges.shape[0] - 1
-    t_end = time.time()
-    if visualize:
-        print('Time: {:.5f}s'.format(t_end - t_start))
+
+@pytest.mark.parametrize("use_tree", [True, False])
+@pytest.mark.parametrize("ndim", [2, 3])
+def test_partition_coarse(ctx_factory, use_tree, ndim, visualize=False):
+    ctx = ctx_factory()
+    queue = cl.CommandQueue(ctx)
+
+    qbx = _build_qbx_discr(queue, ndim=ndim)
+    srcindices, srcranges = _build_block_index(queue, qbx.density_discr, 
+            method="elements", use_tree=use_tree)
 
     if visualize:
         discr = qbx.resampler.from_discr
-        _plot_indices(0, discr, srcindices.get(queue), srcranges.get(queue))
+        _plot_partition_indices(queue, discr, srcindices, srcranges,
+                method="elements", use_tree=use_tree, pid="stage1")
 
-    if method == "elements":
-        from pytential.linalg.proxy import partition_from_coarse
-        resampler = qbx.direct_resampler
+    from pytential.linalg.proxy import partition_from_coarse
+    resampler = qbx.direct_resampler
 
-        t_start = time.time()
-        srcindices_, srcranges_ = \
-            partition_from_coarse(queue, resampler, srcindices, srcranges)
-        t_end = time.time()
-        if visualize:
-            print('Time: {:.5f}s'.format(t_end - t_start))
+    t_start = time.time()
+    srcindices_, srcranges_ = \
+        partition_from_coarse(queue, resampler, srcindices, srcranges)
+    t_end = time.time()
+    if visualize:
+        print('Time: {:.5f}s'.format(t_end - t_start))
+
+    srcindices = srcindices.get()
+    srcranges = srcranges.get()
+    srcindices_ = srcindices_.get()
+    srcranges_ = srcranges_.get()
 
-        srcindices = srcindices.get(queue)
-        srcranges = srcranges.get(queue)
-        srcindices_ = srcindices_.get(queue)
-        srcranges_ = srcranges_.get(queue)
+    sources = resampler.from_discr.nodes().get(queue)
+    sources_ = resampler.to_discr.nodes().get(queue)
 
-        sources = resampler.from_discr.nodes().get(queue)
-        sources_ = resampler.to_discr.nodes().get(queue)
-        for i in range(srcranges.shape[0] - 1):
-            isrc = srcindices[np.s_[srcranges[i]:srcranges[i + 1]]]
-            isrc_ = srcindices_[np.s_[srcranges_[i]:srcranges_[i + 1]]]
+    for i in range(srcranges.shape[0] - 1):
+        isrc = srcindices[np.s_[srcranges[i]:srcranges[i + 1]]]
+        isrc_ = srcindices_[np.s_[srcranges_[i]:srcranges_[i + 1]]]
 
-            for j in range(ndim):
-                assert np.min(sources_[j][isrc_]) <= np.min(sources[j][isrc])
-                assert np.max(sources_[j][isrc_]) >= np.max(sources[j][isrc])
+        for j in range(ndim):
+            assert np.min(sources_[j][isrc_]) <= np.min(sources[j][isrc])
+            assert np.max(sources_[j][isrc_]) >= np.max(sources[j][isrc])
 
-        if visualize:
-            discr = resampler.to_discr
-            _plot_indices(1, discr, srcindices_, srcranges_)
+    if visualize:
+        discr = resampler.to_discr
+        _plot_partition_indices(queue, discr, srcindices_, srcranges_,
+                method="elements", use_tree=use_tree, pid="stage2")
 
 
 @pytest.mark.parametrize("ndim", [2, 3])
@@ -256,39 +245,22 @@ def test_proxy_generator(ctx_factory, ndim, visualize=False):
     ctx = ctx_factory()
     queue = cl.CommandQueue(ctx)
 
-    # prevent cache explosion
-    from sympy.core.cache import clear_cache
-    clear_cache()
-    qbx = create_data(queue, ndim=ndim)[0]
-
-    nblks = 10
-    srcindices, srcranges = create_indices(queue, qbx, nblks,
-                                           method='nodes', factor=0.6)
-    nblks = srcranges.shape[0] - 1
+    qbx = _build_qbx_discr(queue, ndim=ndim)
+    srcindices, srcranges = _build_block_index(queue, qbx.density_discr,
+            method='nodes', factor=0.6)
 
     from pytential.linalg.proxy import ProxyGenerator
     gen = ProxyGenerator(queue, qbx, ratio=1.1)
-    centers, radii, proxies, pxyranges = \
-        gen.get_proxies(srcindices, srcranges)
+    centers, radii, proxies, pxyranges = gen.get_proxies(srcindices, srcranges)
 
     if visualize:
-        knl = gen.get_kernel()
-        knl = lp.add_dtypes(knl, {
-            "nodes": np.float64,
-            "center_int": np.float64,
-            "center_ext": np.float64,
-            "expansion_radii": np.float64,
-            "ranges": np.int64,
-            "indices": np.int64,
-            "nnodes": np.int64,
-            "nranges": np.int64,
-            "nindices": np.int64})
-        print(knl)
-        print(lp.generate_code_v2(knl).device_code())
-
-    proxies = np.vstack([p.get(queue) for p in proxies])
-    srcindices = srcindices.get(queue)
-    srcranges = srcranges.get(queue)
+        srcindices = srcindices.get()
+        srcranges = srcranges.get()
+
+        centers = np.vstack([c.get() for c in centers])
+        radii = radii.get()
+        proxies = np.vstack([p.get() for p in proxies])
+        pxyranges = pxyranges.get()
 
     if visualize:
         if qbx.ambient_dim == 2:
@@ -301,9 +273,8 @@ def test_proxy_generator(ctx_factory, ndim, visualize=False):
             ce = get_centers_on_side(qbx, +1)
             ce = np.vstack([c.get(queue) for c in ce])
             r = qbx._expansion_radii("nsources").get(queue)
-            pxyranges = pxyranges.get(queue)
 
-            for i in range(nblks):
+            for i in range(srcranges.shape[0] - 1):
                 isrc = srcindices[np.s_[srcranges[i]:srcranges[i + 1]]]
                 ipxy = np.s_[pxyranges[i]:pxyranges[i + 1]]
 
@@ -337,10 +308,7 @@ def test_proxy_generator(ctx_factory, ndim, visualize=False):
             from meshmode.discretization.poly_element import \
                 InterpolatoryQuadratureSimplexGroupFactory
 
-            centers = np.vstack([c.get(queue) for c in centers])
-            radii = radii.get(queue)
-
-            for i in range(nblks):
+            for i in range(srcranges.shape[0] - 1):
                 mesh = affine_map(gen.mesh,
                     A=(radii[i] * np.eye(ndim)),
                     b=centers[:, i].reshape(-1))
@@ -361,34 +329,32 @@ def test_area_query(ctx_factory, ndim, visualize=False):
     ctx = ctx_factory()
     queue = cl.CommandQueue(ctx)
 
-    from sympy.core.cache import clear_cache
-    clear_cache()
-    qbx = create_data(queue, ndim=ndim)[0]
-
-    nblks = 10
-    srcindices, srcranges = create_indices(queue, qbx, nblks,
-                                           method='nodes', factor=0.6)
-    nblks = srcranges.shape[0] - 1
+    qbx = _build_qbx_discr(queue, ndim=ndim)
+    srcindices, srcranges = _build_block_index(queue, qbx.density_discr,
+            method='nodes', factor=0.6)
 
     # generate proxy points
     from pytential.linalg.proxy import ProxyGenerator
     gen = ProxyGenerator(queue, qbx, ratio=1.1)
     centers, radii, _, _ = gen.get_proxies(srcindices, srcranges)
     neighbors, ngbranges = gen.get_neighbors(srcindices, srcranges, centers, radii)
-    skeleton_nodes, sklranges = gen(srcindices, srcranges)
+    skeletons, sklranges = gen(srcindices, srcranges)
+
+    if visualize:
+        srcindices = srcindices.get()
+        srcranges = srcranges.get()
 
-    neighbors = neighbors.get(queue)
-    ngbranges = ngbranges.get(queue)
-    srcindices = srcindices.get(queue)
-    srcranges = srcranges.get(queue)
+        neighbors = neighbors.get()
+        ngbranges = ngbranges.get()
 
     if visualize:
         if ndim == 2:
             import matplotlib.pyplot as pt
             density_nodes = qbx.density_discr.nodes().get(queue)
-            skeleton_nodes = skeleton_nodes.get(queue)
+            skeletons = skeletons.get(queue)
+            sklranges = sklranges.get(queue)
 
-            for i in range(nblks):
+            for i in range(srcranges.shape[0] - 1):
                 isrc = srcindices[np.s_[srcranges[i]:srcranges[i + 1]]]
                 ingb = neighbors[ngbranges[i]:ngbranges[i + 1]]
                 iskl = np.s_[sklranges[i]:sklranges[i + 1]]
@@ -402,7 +368,7 @@ def test_area_query(ctx_factory, ndim, visualize=False):
                         'o', ms=2.0)
                 pt.plot(density_nodes[0, ingb], density_nodes[1, ingb],
                         'o', ms=2.0)
-                pt.plot(skeleton_nodes[0, iskl], skeleton_nodes[1, iskl],
+                pt.plot(skeletons[0, iskl], skeletons[1, iskl],
                         'x', ms=2.0)
                 pt.xlim([-1.5, 1.5])
                 pt.ylim([-1.5, 1.5])
@@ -414,7 +380,7 @@ def test_area_query(ctx_factory, ndim, visualize=False):
             from meshmode.discretization.visualization import make_visualizer
             marker = np.empty(qbx.density_discr.nnodes)
 
-            for i in range(nblks):
+            for i in range(srcranges.shape[0] - 1):
                 isrc = srcindices[np.s_[srcranges[i]:srcranges[i + 1]]]
                 ingb = neighbors[ngbranges[i]:ngbranges[i + 1]]
 
-- 
GitLab


From 5b113f48ec4a0d56f575b7566eaa0b6e7eddcc0f Mon Sep 17 00:00:00 2001
From: Alex Fikl <alexfikl@gmail.com>
Date: Thu, 21 Jun 2018 15:27:40 -0500
Subject: [PATCH 10/25] direct-solver-proxy: split up proxy generator into
 smaller functions

---
 pytential/linalg/proxy.py | 412 ++++++++++++++++++++------------------
 test/test_linalg_proxy.py |  74 ++++---
 2 files changed, 264 insertions(+), 222 deletions(-)

diff --git a/pytential/linalg/proxy.py b/pytential/linalg/proxy.py
index fd29315c..2bd1aa3d 100644
--- a/pytential/linalg/proxy.py
+++ b/pytential/linalg/proxy.py
@@ -30,8 +30,10 @@ import pyopencl as cl
 import pyopencl.array # noqa
 from pyopencl.array import to_device
 
+from boxtree.tools import DeviceDataRecord
+
 from pytools.obj_array import make_obj_array
-from pytools import memoize_method, memoize_in
+from pytools import memoize_method, memoize
 
 import loopy as lp
 from loopy.version import MOST_RECENT_LANGUAGE_VERSION
@@ -41,13 +43,14 @@ __doc__ = """
 Proxy Point Generation
 ~~~~~~~~~~~~~~~~~~~~~~
 
+.. autoclass:: ProxyGenerator
+
 .. autofunction:: partition_by_nodes
 
 .. autofunction:: partition_by_elements
 
 .. autofunction:: partition_from_coarse
 
-.. autoclass:: ProxyGenerator
 """
 
 
@@ -267,10 +270,9 @@ def partition_from_coarse(queue, resampler, from_indices, from_ranges):
 # {{{ proxy point generator
 
 class ProxyGenerator(object):
-    def __init__(self, queue, source, ratio=1.5, nproxy=31):
+    def __init__(self, source, ratio=1.5, nproxy=30, **kwargs):
         r"""
-        :arg queue: a :class:`pyopencl.CommandQueue`.
-        :arg source: a :class:`pytential.qbx.LayerPotentialSourceBase`.
+        :arg discr: a :class:`pytential.qbx.QBXLayerPotentialBase`.
         :arg ratio: a ratio used to compute the proxy point radius. The radius
             is computed in the :math:`L_2` norm, resulting in a circle or
             sphere of proxy points. For QBX, we have two radii of interest
@@ -293,23 +295,21 @@ class ProxyGenerator(object):
         :arg nproxy: number of proxy points.
         """
 
-        self.queue = queue
         self.source = source
-        self.context = self.queue.context
         self.ratio = abs(ratio)
         self.nproxy = int(abs(nproxy))
-        self.dim = self.source.ambient_dim
+        self.ambient_dim = source.density_discr.ambient_dim
 
-        if self.dim == 2:
+        if self.ambient_dim == 2:
             from meshmode.mesh.generation import ellipse, make_curve_mesh
 
-            self.mesh = make_curve_mesh(lambda t: ellipse(1.0, t),
-                                   np.linspace(0.0, 1.0, self.nproxy + 1),
-                                   self.nproxy)
-        elif self.dim == 3:
+            self.ref_mesh = make_curve_mesh(lambda t: ellipse(1.0, t),
+                    np.linspace(0.0, 1.0, self.nproxy + 1),
+                    self.nproxy)
+        elif self.ambient_dim == 3:
             from meshmode.mesh.generation import generate_icosphere
 
-            self.mesh = generate_icosphere(1, self.nproxy)
+            self.ref_mesh = generate_icosphere(1, self.nproxy)
         else:
             raise ValueError("unsupported ambient dimension")
 
@@ -321,7 +321,7 @@ class ProxyGenerator(object):
             radius_expr = "ratio * rqbx"
 
         # NOTE: centers of mass are computed using a second-order approximation
-        # that currently matches what's in `element_centers_of_mass`.
+        # that currently matches what is in `element_centers_of_mass`.
         knl = lp.make_kernel([
             "{[irange]: 0 <= irange < nranges}",
             "{[i]: 0 <= i < npoints}",
@@ -329,23 +329,23 @@ class ProxyGenerator(object):
             ],
             ["""
             for irange
-                <> npoints = ranges[irange + 1] - ranges[irange]
+                <> npoints = srcranges[irange + 1] - srcranges[irange]
                 proxy_center[idim, irange] = 1.0 / npoints * \
-                    reduce(sum, i, nodes[idim, indices[i + ranges[irange]]]) \
+                    reduce(sum, i, nodes[idim, srcindices[i + srcranges[irange]]]) \
                         {{dup=idim:i}}
 
                 <> rblk = simul_reduce(max, i, sqrt(simul_reduce(sum, idim, \
                         (proxy_center[idim, irange] -
-                         nodes[idim, indices[i + ranges[irange]]]) ** 2)))
+                         nodes[idim, srcindices[i + srcranges[irange]]]) ** 2)))
 
                 <> rqbx_int = simul_reduce(max, i, sqrt(simul_reduce(sum, idim, \
                         (proxy_center[idim, irange] -
-                         center_int[idim, indices[i + ranges[irange]]]) ** 2)) + \
-                         expansion_radii[indices[i + ranges[irange]]])
+                         center_int[idim, srcindices[i + srcranges[irange]]]) ** 2)) + \
+                         expansion_radii[srcindices[i + srcranges[irange]]])
                 <> rqbx_ext = simul_reduce(max, i, sqrt(simul_reduce(sum, idim, \
                         (proxy_center[idim, irange] -
-                         center_ext[idim, indices[i + ranges[irange]]]) ** 2)) + \
-                         expansion_radii[indices[i + ranges[irange]]])
+                         center_ext[idim, srcindices[i + srcranges[irange]]]) ** 2)) + \
+                         expansion_radii[srcindices[i + srcranges[irange]]])
                 <> rqbx = if(rqbx_ext < rqbx_int, rqbx_int, rqbx_ext)
 
                 proxy_radius[irange] = {radius_expr}
@@ -353,19 +353,19 @@ class ProxyGenerator(object):
             """.format(radius_expr=radius_expr)],
             [
                 lp.GlobalArg("nodes", None,
-                    shape=(self.dim, "nnodes")),
+                    shape=(self.ambient_dim, "nnodes")),
                 lp.GlobalArg("center_int", None,
-                    shape=(self.dim, "nnodes"), dim_tags="sep,C"),
+                    shape=(self.ambient_dim, "nnodes"), dim_tags="sep,C"),
                 lp.GlobalArg("center_ext", None,
-                    shape=(self.dim, "nnodes"), dim_tags="sep,C"),
+                    shape=(self.ambient_dim, "nnodes"), dim_tags="sep,C"),
                 lp.GlobalArg("expansion_radii", None,
                     shape="nnodes"),
-                lp.GlobalArg("ranges", None,
+                lp.GlobalArg("srcranges", None,
                     shape="nranges + 1"),
-                lp.GlobalArg("indices", None,
+                lp.GlobalArg("srcindices", None,
                     shape="nindices"),
                 lp.GlobalArg("proxy_center", None,
-                    shape=(self.dim, "nranges")),
+                    shape=(self.ambient_dim, "nranges")),
                 lp.GlobalArg("proxy_radius", None,
                     shape="nranges"),
                 lp.ValueArg("nnodes", np.int64),
@@ -375,7 +375,7 @@ class ProxyGenerator(object):
             name="proxy_generator_knl",
             assumptions="dim>=1 and nranges>=1",
             fixed_parameters=dict(
-                dim=self.dim,
+                dim=self.ambient_dim,
                 ratio=self.ratio),
             lang_version=MOST_RECENT_LANGUAGE_VERSION)
 
@@ -390,23 +390,24 @@ class ProxyGenerator(object):
 
         return knl
 
-    def get_proxies(self, indices, ranges, **kwargs):
+    def __call__(self, queue, srcindices, srcranges, **kwargs):
         """Generate proxy points for each given range of source points in
-        the discretization :attr:`source`.
+        the discretization in :attr:`source`.
 
-        :arg indices: a :class:`pyopencl.array.Array` of indices around
+        :arg queue: a :class:`pyopencl.CommandQueue`.
+        :arg srcindices: a :class:`pyopencl.array.Array` of srcindices around
             which to construct proxy balls.
-        :arg ranges: an :class:`pyopencl.array.Array` of size `(nranges + 1,)`
-            used to index into :attr:`indices`. Each one of the `nranges`
+        :arg srcranges: an :class:`pyopencl.array.Array` of size `(nranges + 1,)`
+            used to index into :attr:`srcindices`. Each one of the `nranges`
             ranges will get a proxy ball.
 
-        :return: a tuple of `(centers, radii, proxies, pryranges)`, where
+        :return: a tuple of `(centers, radii, proxies, pxyranges)`, where
             each value is a :class:`pyopencl.array.Array`. The
             sizes of the arrays are as follows: `centers` is of size
-            `(2, nranges)`, `radii` is of size `(nranges,)`, `pryranges` is
+            `(2, nranges)`, `radii` is of size `(nranges,)`, `pxyranges` is
             of size `(nranges + 1,)` and `proxies` is of size
             `(2, nranges * nproxies)`. The proxy points in a range :math:`i`
-            can be obtained by a slice `proxies[pryranges[i]:pryranges[i + 1]]`
+            can be obtained by a slice `proxies[pxyranges[i]:pxyranges[i + 1]]`
             and are all at a distance `radii[i]` from the range center
             `centers[i]`.
         """
@@ -414,86 +415,100 @@ class ProxyGenerator(object):
         from pytential.qbx.utils import get_centers_on_side
 
         knl = self.get_kernel()
-        _, (centers_dev, radii_dev,) = knl(self.queue,
+        _, (centers_dev, radii_dev,) = knl(queue,
             nodes=self.source.density_discr.nodes(),
             center_int=get_centers_on_side(self.source, -1),
             center_ext=get_centers_on_side(self.source, +1),
             expansion_radii=self.source._expansion_radii("nsources"),
-            indices=indices, ranges=ranges, **kwargs)
+            srcindices=srcindices, srcranges=srcranges, **kwargs)
         centers = centers_dev.get()
         radii = radii_dev.get()
 
         from meshmode.mesh.processing import affine_map
-        proxies = np.empty(ranges.shape[0] - 1, dtype=np.object)
+        proxies = np.empty(srcranges.shape[0] - 1, dtype=np.object)
 
-        for i in range(ranges.shape[0] - 1):
-            mesh = affine_map(self.mesh,
-                A=(radii[i] * np.eye(self.dim)),
+        for i in range(srcranges.shape[0] - 1):
+            mesh = affine_map(self.ref_mesh,
+                A=(radii[i] * np.eye(self.ambient_dim)),
                 b=centers[:, i].reshape(-1))
             proxies[i] = mesh.vertices
 
-        pxyranges = np.cumsum([0] + [p.shape[-1] for p in proxies])
-        pxyranges = cl.array.to_device(self.queue, pxyranges)
+        pxyranges = cl.array.arange(queue, 0,
+                proxies.shape[0] * proxies[0].shape[-1] + 1, proxies[0].shape[-1],
+                dtype=srcranges.dtype)
         proxies = make_obj_array([
-            cl.array.to_device(self.queue, np.hstack([p[idim] for p in proxies]))
-            for idim in range(self.dim)])
-        centers_dev = make_obj_array([
-            centers_dev[idim].with_queue(self.queue).copy()
-            for idim in range(self.dim)])
-
-        return centers_dev, radii_dev, proxies, pxyranges
-
-    def get_neighbors(self, indices, ranges, centers, radii):
-        """Generate a set of neighboring points for each range of source
-        points in :attr:`source`. Neighboring points are defined as all
-        the points inside a proxy ball :math:`i` that do not also belong to
-        the set of source points in the same range :math:`i`.
-
-        :arg indices: a :class:`pyopencl.array.Array` of indices for a subset
-            of the source points.
-        :arg ranges: a :class:`pyopencl.array.Array` used to index into
-            the :attr:`indices` array.
-        :arg centers: a :class:`pyopencl.array.Array` containing the center
-            of each proxy ball.
-        :arg radii: a :class:`pyopencl.array.Array` containing the radius
-            of each proxy ball.
-
-        :return: a tuple `(neighbours, nbrranges)` where each value is a
-            :class:`pyopencl.array.Array` of integers. For a range :math:`i`
-            in `nbrranges`, the corresponding slice of the `neighbours` array
-            is a subset of :attr:`indices` such that all points are inside
-            the proxy ball centered at `centers[i]` of radius `radii[i]`
-            that is not included in `indices[ranges[i]:ranges[i + 1]]`.
-        """
-
-        if isinstance(indices, cl.array.Array):
-            indices = indices.get(self.queue)
-            ranges = ranges.get(self.queue)
+            cl.array.to_device(queue, np.hstack([p[idim] for p in proxies]))
+            for idim in range(self.ambient_dim)])
+        centers = make_obj_array([
+            centers_dev[idim].with_queue(queue).copy()
+            for idim in range(self.ambient_dim)])
+
+        assert pxyranges[-1] == proxies[0].shape[0]
+        return proxies, pxyranges, centers, radii_dev
+
+
+def build_neighbor_list(discr, srcindices, srcranges, pxycenters, pxyradii,
+                        max_nodes_in_box=30, **kwargs):
+    """Generate a set of neighboring points for each range of points in
+    :attr:`discr`. Neighboring points of a range :math:`i` are defined
+    as all the points inside the proxy ball :math:`i` that do not also
+    belong to the range itself.
+
+    :arg discr: a :class:`meshmode.discretization.Discretization`.
+    :arg srcindices: an array of indices for a subset of the nodes in
+        :attr:`discr`.
+    :arg srcranges: an array used to index into the :attr:`srcindices` array.
+    :arg pxycenters: an array containing the center of each proxy ball.
+    :arg pxyradii: an array containing the radius of each proxy ball.
+
+    :return: a tuple `(neighbours, nbrranges)` where each value is a
+        :class:`pyopencl.array.Array` of integers. For a range :math:`i`
+        in `nbrranges`, the corresponding slice of the `neighbours` array
+        is a subset of :attr:`srcindices` such that all points are inside
+        the proxy ball centered at `centers[i]` of radius `radii[i]`
+        that is not included in `srcindices[srcranges[i]:srcranges[i + 1]]`.
+    """
 
-        nranges = radii.shape[0] + 1
-        sources = self.source.density_discr.nodes().get(self.queue)
+    with cl.CommandQueue(discr.cl_context) as queue:
+        if isinstance(srcindices, cl.array.Array):
+            srcindices = srcindices.get(queue)
+        if isinstance(srcranges, cl.array.Array):
+            srcranges = srcranges.get(queue)
+
+        # NOTE: this is used for multiple reasons:
+        #   * TreeBuilder takes object arrays
+        #   * `srcndices` can be a small subset of nodes, so this will save
+        #   some work
+        #   * `srcindices` may reorder the array returned by nodes(), so this
+        #   makes sure that we have the same order in tree.user_source_ids
+        #   and friends
+        sources = discr.nodes().get(queue)
         sources = make_obj_array([
-            cl.array.to_device(self.queue, sources[idim, indices])
-            for idim in range(self.dim)])
+            cl.array.to_device(queue, sources[idim, srcindices])
+            for idim in range(discr.ambient_dim)])
 
         # construct tree
         from boxtree import TreeBuilder
-        builder = TreeBuilder(self.context)
-        tree, _ = builder(self.queue, sources, max_particles_in_box=30)
+        builder = TreeBuilder(discr.cl_context)
+        tree, _ = builder(queue, sources,
+                          max_particles_in_box=max_nodes_in_box)
 
         from boxtree.area_query import AreaQueryBuilder
-        builder = AreaQueryBuilder(self.context)
-        query, _ = builder(self.queue, tree, centers, radii)
+        builder = AreaQueryBuilder(discr.cl_context)
+        query, _ = builder(queue, tree, pxycenters, pxyradii)
 
         # find nodes inside each proxy ball
-        tree = tree.get(self.queue)
-        query = query.get(self.queue)
-        centers_ = np.vstack([centers[idim].get(self.queue)
-                              for idim in range(self.dim)])
-        radii_ = radii.get(self.queue)
-
-        neighbors = np.empty(nranges - 1, dtype=np.object)
-        for iproxy in range(nranges - 1):
+        tree = tree.get(queue)
+        query = query.get(queue)
+
+        if isinstance(pxycenters[0], cl.array.Array):
+            pxycenters = np.vstack([pxycenters[idim].get(queue)
+                                    for idim in range(discr.ambient_dim)])
+        if isinstance(pxyradii, cl.array.Array):
+            pxyradii = pxyradii.get(queue)
+
+        neighbors = np.empty(srcranges.shape[0] - 1, dtype=np.object)
+        for iproxy in range(srcranges.shape[0] - 1):
             # get list of boxes intersecting the current ball
             istart = query.leaves_near_ball_starts[iproxy]
             iend = query.leaves_near_ball_starts[iproxy + 1]
@@ -505,117 +520,124 @@ class ProxyGenerator(object):
             isources = np.hstack([np.arange(s, e)
                                   for s, e in zip(istart, iend)])
             nodes = np.vstack([tree.sources[idim][isources]
-                               for idim in range(self.dim)])
+                               for idim in range(discr.ambient_dim)])
             isources = tree.user_source_ids[isources]
 
             # get nodes inside the ball but outside the current range
-            center = centers_[:, iproxy].reshape(-1, 1)
-            radius = radii_[iproxy]
+            center = pxycenters[:, iproxy].reshape(-1, 1)
+            radius = pxyradii[iproxy]
             mask = (la.norm(nodes - center, axis=0) < radius) & \
-                   ((isources < ranges[iproxy]) | (ranges[iproxy + 1] <= isources))
+                   ((isources < srcranges[iproxy]) | (srcranges[iproxy + 1] <= isources))
 
-            neighbors[iproxy] = indices[isources[mask]]
+            neighbors[iproxy] = srcindices[isources[mask]]
 
-        nbrranges = np.cumsum([0] + [n.shape[0] for n in neighbors])
-        neighbors = np.hstack(neighbors)
+        nbrranges = to_device(queue,
+                np.cumsum([0] + [n.shape[0] for n in neighbors]))
+        neighbors = to_device(queue, np.hstack(neighbors))
 
-        return (to_device(self.queue, neighbors),
-                to_device(self.queue, nbrranges))
+    return neighbors, nbrranges
 
-    def __call__(self, indices, ranges, **kwargs):
-        """
-        :arg indices: a :class:`pyopencl.array.Array` of indices for points
-            in :attr:`source`.
-        :arg ranges: a :class:`pyopencl.array.Array` describing the ranges
-            from :attr:`indices` around which to build proxy points. For each
-            range, this builds a ball of proxy points centered
-            at the center of mass of the points in the range with a radius
-            defined by :attr:`ratio`.
-
-        :returns: a tuple `(skeletons, sklranges)` where each value is a
-            :class:`pyopencl.array.Array`. For a range :math:`i`, we can
-            get the slice using `skeletons[sklranges[i]:sklranges[i + 1]]`.
-            The skeleton points in a range represents the union of a set
-            of generated proxy points and all the source points inside the
-            proxy ball that do not also belong to the current range in
-            :attr:`indices`.
-        """
 
-        @memoize_in(self, "concat_skl_knl")
-        def knl():
-            loopy_knl = lp.make_kernel([
-                "{[irange, idim]: 0 <= irange < nranges and \
-                                  0 <= idim < dim}",
-                "{[ipxy, ingb]: 0 <= ipxy < npxyblock and \
-                                0 <= ingb < nngbblock}"
-                ],
-                """
-                for irange
-                    <> pxystart = pxyranges[irange]
-                    <> pxyend = pxyranges[irange + 1]
-                    <> npxyblock = pxyend - pxystart
-
-                    <> ngbstart = nbrranges[irange]
-                    <> ngbend = nbrranges[irange + 1]
-                    <> nngbblock = ngbend - ngbstart
-
-                    <> sklstart = pxyranges[irange] + nbrranges[irange]
-                    skeletons[idim, sklstart + ipxy] = \
-                        proxies[idim, pxystart + ipxy] \
-                        {id_prefix=write_pxy,nosync=write_ngb}
-                    skeletons[idim, sklstart + npxyblock + ingb] = \
-                        sources[idim, neighbors[ngbstart + ingb]] \
-                        {id_prefix=write_ngb,nosync=write_pxy}
-                    sklranges[irange + 1] = sklranges[irange] + \
-                            npxyblock + nngbblock
-                end
-                """,
-                [
-                    lp.GlobalArg("sources", None,
-                        shape=(self.dim, "nsources")),
-                    lp.GlobalArg("proxies", None,
-                        shape=(self.dim, "nproxies"), dim_tags="sep,C"),
-                    lp.GlobalArg("neighbors", None,
-                        shape="nneighbors"),
-                    lp.GlobalArg("pxyranges", None,
-                        shape="nranges + 1"),
-                    lp.GlobalArg("nbrranges", None,
-                        shape="nranges + 1"),
-                    lp.GlobalArg("skeletons", None,
-                        shape=(self.dim, "nproxies + nneighbors")),
-                    lp.GlobalArg("sklranges", None,
-                        shape="nranges + 1"),
-                    lp.ValueArg("nsources", np.int32),
-                    lp.ValueArg("nproxies", np.int32),
-                    lp.ValueArg("nneighbors", np.int32),
-                    "..."
-                ],
-                name="concat_skl",
-                default_offset=lp.auto,
-                silenced_warnings="write_race(write_*)",
-                fixed_parameters=dict(dim=self.dim),
-                lang_version=MOST_RECENT_LANGUAGE_VERSION)
-
-            loopy_knl = lp.tag_inames(loopy_knl, "idim*:unr")
-            loopy_knl = lp.split_iname(loopy_knl, "irange", 128, outer_tag="g.0")
-
-            return loopy_knl
-
-        # construct point arrays
-        sources = self.source.density_discr.nodes()
-        centers, radii, proxies, pxyranges = \
-            self.get_proxies(indices, ranges, **kwargs)
-        neighbors, nbrranges = \
-            self.get_neighbors(indices, ranges, centers, radii)
-
-        # construct joint array
-        sklranges = cl.array.zeros(self.queue, ranges.shape, dtype=np.int)
-        _, (skeletons, sklranges) = knl()(self.queue,
-                sources=sources, proxies=proxies, neighbors=neighbors,
-                pxyranges=pxyranges, nbrranges=nbrranges,
+def build_skeleton_list(source, srcindices, srcranges, **kwargs):
+    """
+    :arg source: a :class:`pytential.qbx.QBXLayerPotentialBase`.
+    :arg srcindices: a :class:`pyopencl.array.Array` of indices for points
+        in :attr:`source`.
+    :arg srcranges: a :class:`pyopencl.array.Array` describing the ranges
+        from :attr:`srcindices` around which to build proxy points. For each
+        range, this builds a ball of proxy points centered
+        at the center of mass of the points in the range with a radius
+        defined by :attr:`ratio`.
+    :arg kwargs: additional arguments passed to :class:`ProxyGenerator`
+        or :func:`build_neighbor_list`.
+
+    :returns: a tuple `(skeletons, sklranges)` where each value is a
+        :class:`pyopencl.array.Array`. For a range :math:`i`, we can
+        get the slice using `skeletons[sklranges[i]:sklranges[i + 1]]`.
+        The skeleton points in a range represents the union of a set
+        of generated proxy points and all the source points inside the
+        proxy ball that do not also belong to the current range in
+        :attr:`indices`.
+    """
+
+    @memoize
+    def knl():
+        loopy_knl = lp.make_kernel([
+            "{[irange, idim]: 0 <= irange < nranges and \
+                              0 <= idim < dim}",
+            "{[ipxy, ingb]: 0 <= ipxy < npxyblock and \
+                            0 <= ingb < nngbblock}"
+            ],
+            """
+            for irange
+                <> pxystart = pxyranges[irange]
+                <> pxyend = pxyranges[irange + 1]
+                <> npxyblock = pxyend - pxystart
+
+                <> ngbstart = nbrranges[irange]
+                <> ngbend = nbrranges[irange + 1]
+                <> nngbblock = ngbend - ngbstart
+
+                <> sklstart = pxyranges[irange] + nbrranges[irange]
+                skeletons[idim, sklstart + ipxy] = \
+                    proxies[idim, pxystart + ipxy] \
+                    {id_prefix=write_pxy,nosync=write_ngb}
+                skeletons[idim, sklstart + npxyblock + ingb] = \
+                    sources[idim, neighbors[ngbstart + ingb]] \
+                    {id_prefix=write_ngb,nosync=write_pxy}
+                sklranges[irange + 1] = sklranges[irange] + \
+                        npxyblock + nngbblock
+            end
+            """,
+            [
+                lp.GlobalArg("sources", None,
+                    shape=(source.ambient_dim, "nsources")),
+                lp.GlobalArg("proxies", None,
+                    shape=(source.ambient_dim, "nproxies"), dim_tags="sep,C"),
+                lp.GlobalArg("neighbors", None,
+                    shape="nneighbors"),
+                lp.GlobalArg("pxyranges", None,
+                    shape="nranges + 1"),
+                lp.GlobalArg("nbrranges", None,
+                    shape="nranges + 1"),
+                lp.GlobalArg("skeletons", None,
+                    shape=(source.ambient_dim, "nproxies + nneighbors")),
+                lp.GlobalArg("sklranges", None,
+                    shape="nranges + 1"),
+                lp.ValueArg("nsources", np.int32),
+                lp.ValueArg("nproxies", np.int32),
+                lp.ValueArg("nneighbors", np.int32),
+                "..."
+            ],
+            name="concat_skl",
+            default_offset=lp.auto,
+            silenced_warnings="write_race(write_*)",
+            fixed_parameters=dict(dim=source.ambient_dim),
+            lang_version=MOST_RECENT_LANGUAGE_VERSION)
+
+        loopy_knl = lp.tag_inames(loopy_knl, "idim*:unr")
+        loopy_knl = lp.split_iname(loopy_knl, "irange", 128, outer_tag="g.0")
+
+        return loopy_knl
+
+    with cl.CommandQueue(source.cl_context) as queue:
+        proxy = ProxyGenerator(source, **kwargs)
+        proxies, pxyranges, pxycenters, pxyradii = \
+                proxy(queue, srcindices, srcranges)
+
+        neighbors, nbrranges = build_neighbor_list(source.density_discr,
+                srcindices, srcranges, pxycenters, pxyradii, **kwargs)
+
+        sklranges = cl.array.zeros(queue, srcranges.shape, dtype=np.int)
+        _, (skeletons, sklranges) = knl()(queue,
+                sources=source.density_discr.nodes(),
+                proxies=proxies,
+                neighbors=neighbors,
+                pxyranges=pxyranges,
+                nbrranges=nbrranges,
                 sklranges=sklranges)
 
-        return skeletons, sklranges
+    return skeletons, sklranges
 
 # }}}
 
diff --git a/test/test_linalg_proxy.py b/test/test_linalg_proxy.py
index e3c6fdab..f4055b27 100644
--- a/test/test_linalg_proxy.py
+++ b/test/test_linalg_proxy.py
@@ -26,6 +26,8 @@ import os
 import time
 
 import numpy as np
+import numpy.linalg as la
+
 import pyopencl as cl
 from pyopencl.array import to_device
 
@@ -112,6 +114,7 @@ def _build_block_index(queue, discr,
         for i in range(ranges.shape[0] - 1):
             iidx = indices[np.s_[ranges[i]:ranges[i + 1]]]
             isize = int(factor * len(iidx))
+            isize = max(1, min(isize, len(iidx)))
 
             indices_[i] = np.sort(
                     np.random.choice(iidx, size=isize, replace=False))
@@ -184,7 +187,7 @@ def _plot_partition_indices(queue, discr, indices, ranges, **kwargs):
 @pytest.mark.parametrize("method", ["nodes", "elements"])
 @pytest.mark.parametrize("use_tree", [True, False])
 @pytest.mark.parametrize("ndim", [2, 3])
-def test_partition_points(ctx_factory, method, use_tree, ndim, visualize=False):
+def test_partition_points(ctx_factory, method, use_tree, ndim, visualize=True):
     ctx = ctx_factory()
     queue = cl.CommandQueue(ctx)
 
@@ -195,7 +198,7 @@ def test_partition_points(ctx_factory, method, use_tree, ndim, visualize=False):
 
 @pytest.mark.parametrize("use_tree", [True, False])
 @pytest.mark.parametrize("ndim", [2, 3])
-def test_partition_coarse(ctx_factory, use_tree, ndim, visualize=False):
+def test_partition_coarse(ctx_factory, use_tree, ndim, visualize=True):
     ctx = ctx_factory()
     queue = cl.CommandQueue(ctx)
 
@@ -241,27 +244,35 @@ def test_partition_coarse(ctx_factory, use_tree, ndim, visualize=False):
 
 
 @pytest.mark.parametrize("ndim", [2, 3])
-def test_proxy_generator(ctx_factory, ndim, visualize=False):
+@pytest.mark.parametrize("factor", [1.0, 0.6])
+def test_proxy_generator(ctx_factory, ndim, factor, visualize=True):
     ctx = ctx_factory()
     queue = cl.CommandQueue(ctx)
 
     qbx = _build_qbx_discr(queue, ndim=ndim)
     srcindices, srcranges = _build_block_index(queue, qbx.density_discr,
-            method='nodes', factor=0.6)
+            method='nodes', factor=factor)
 
     from pytential.linalg.proxy import ProxyGenerator
-    gen = ProxyGenerator(queue, qbx, ratio=1.1)
-    centers, radii, proxies, pxyranges = gen.get_proxies(srcindices, srcranges)
+    generator = ProxyGenerator(qbx, ratio=1.1)
+    proxies, pxyranges, pxycenters, pxyradii = \
+        generator(queue, srcindices, srcranges)
+
+    proxies = np.vstack([p.get() for p in proxies])
+    pxyranges = pxyranges.get()
+    pxycenters = np.vstack([c.get() for c in pxycenters])
+    pxyradii = pxyradii.get()
+
+    for i in range(srcranges.shape[0] - 1):
+        ipxy = np.s_[pxyranges[i]:pxyranges[i + 1]]
+
+        r = la.norm(proxies[:, ipxy] - pxycenters[:, i].reshape(-1, 1), axis=0)
+        assert np.allclose(r - pxyradii[i], 0.0, atol=1.0e-14)
 
     if visualize:
         srcindices = srcindices.get()
         srcranges = srcranges.get()
 
-        centers = np.vstack([c.get() for c in centers])
-        radii = radii.get()
-        proxies = np.vstack([p.get() for p in proxies])
-        pxyranges = pxyranges.get()
-
     if visualize:
         if qbx.ambient_dim == 2:
             import matplotlib.pyplot as pt
@@ -309,9 +320,9 @@ def test_proxy_generator(ctx_factory, ndim, visualize=False):
                 InterpolatoryQuadratureSimplexGroupFactory
 
             for i in range(srcranges.shape[0] - 1):
-                mesh = affine_map(gen.mesh,
-                    A=(radii[i] * np.eye(ndim)),
-                    b=centers[:, i].reshape(-1))
+                mesh = affine_map(generator.ref_mesh,
+                    A=(pxyradii[i] * np.eye(ndim)),
+                    b=pxycenters[:, i].reshape(-1))
 
                 mesh = merge_disjoint_meshes([mesh, qbx.density_discr.mesh])
                 discr = Discretization(ctx, mesh,
@@ -325,27 +336,36 @@ def test_proxy_generator(ctx_factory, ndim, visualize=False):
 
 
 @pytest.mark.parametrize("ndim", [2, 3])
-def test_area_query(ctx_factory, ndim, visualize=False):
+@pytest.mark.parametrize("factor", [1.0, 0.6])
+def test_area_query(ctx_factory, ndim, factor, visualize=True):
     ctx = ctx_factory()
     queue = cl.CommandQueue(ctx)
 
     qbx = _build_qbx_discr(queue, ndim=ndim)
     srcindices, srcranges = _build_block_index(queue, qbx.density_discr,
-            method='nodes', factor=0.6)
+            method='nodes', factor=factor)
 
     # generate proxy points
     from pytential.linalg.proxy import ProxyGenerator
-    gen = ProxyGenerator(queue, qbx, ratio=1.1)
-    centers, radii, _, _ = gen.get_proxies(srcindices, srcranges)
-    neighbors, ngbranges = gen.get_neighbors(srcindices, srcranges, centers, radii)
-    skeletons, sklranges = gen(srcindices, srcranges)
+    generator = ProxyGenerator(qbx, ratio=1.1)
+    _, _, pxycenters, pxyradii = generator(queue, srcindices, srcranges)
 
-    if visualize:
-        srcindices = srcindices.get()
-        srcranges = srcranges.get()
+    from pytential.linalg.proxy import build_neighbor_list, build_skeleton_list
+    neighbors, nbrranges = build_neighbor_list(qbx.density_discr,
+            srcindices, srcranges, pxycenters, pxyradii)
+    skeletons, sklranges = build_skeleton_list(qbx, srcindices, srcranges,
+                                               ratio=1.1)
+
+    srcindices = srcindices.get()
+    srcranges = srcranges.get()
+    neighbors = neighbors.get()
+    nbrranges = nbrranges.get()
+
+    for i in range(srcranges.shape[0] - 1):
+        isrc = np.s_[srcranges[i]:srcranges[i + 1]]
+        inbr = np.s_[nbrranges[i]:nbrranges[i + 1]]
 
-        neighbors = neighbors.get()
-        ngbranges = ngbranges.get()
+        assert not np.any(np.isin(neighbors[inbr], srcindices[isrc]))
 
     if visualize:
         if ndim == 2:
@@ -356,7 +376,7 @@ def test_area_query(ctx_factory, ndim, visualize=False):
 
             for i in range(srcranges.shape[0] - 1):
                 isrc = srcindices[np.s_[srcranges[i]:srcranges[i + 1]]]
-                ingb = neighbors[ngbranges[i]:ngbranges[i + 1]]
+                ingb = neighbors[nbrranges[i]:nbrranges[i + 1]]
                 iskl = np.s_[sklranges[i]:sklranges[i + 1]]
 
                 pt.figure(figsize=(10, 8))
@@ -382,7 +402,7 @@ def test_area_query(ctx_factory, ndim, visualize=False):
 
             for i in range(srcranges.shape[0] - 1):
                 isrc = srcindices[np.s_[srcranges[i]:srcranges[i + 1]]]
-                ingb = neighbors[ngbranges[i]:ngbranges[i + 1]]
+                ingb = neighbors[nbrranges[i]:nbrranges[i + 1]]
 
                 # TODO: some way to turn off some of the interpolations
                 # would help visualize this better.
-- 
GitLab


From 437e655618cf2b9cd693e10b9127d0f65db7386c Mon Sep 17 00:00:00 2001
From: Alex Fikl <alexfikl@gmail.com>
Date: Thu, 21 Jun 2018 15:58:25 -0500
Subject: [PATCH 11/25] direct-solver-proxy: fix some docs

---
 doc/conf.py               |   1 +
 pytential/linalg/proxy.py | 162 ++++++++++++++++++++------------------
 test/test_linalg_proxy.py |  27 +++----
 3 files changed, 98 insertions(+), 92 deletions(-)

diff --git a/doc/conf.py b/doc/conf.py
index 81b15205..6df4af25 100644
--- a/doc/conf.py
+++ b/doc/conf.py
@@ -271,6 +271,7 @@ intersphinx_mapping = {
     'http://docs.python.org/': None,
     'http://documen.tician.de/boxtree/': None,
     'http://docs.scipy.org/doc/numpy/': None,
+    'http://documen.tician.de/meshmode/': None,
     'http://documen.tician.de/modepy/': None,
     'http://documen.tician.de/pyopencl/': None,
     'http://documen.tician.de/pytools/': None,
diff --git a/pytential/linalg/proxy.py b/pytential/linalg/proxy.py
index 2bd1aa3d..f8fb9845 100644
--- a/pytential/linalg/proxy.py
+++ b/pytential/linalg/proxy.py
@@ -30,8 +30,6 @@ import pyopencl as cl
 import pyopencl.array # noqa
 from pyopencl.array import to_device
 
-from boxtree.tools import DeviceDataRecord
-
 from pytools.obj_array import make_obj_array
 from pytools import memoize_method, memoize
 
@@ -51,6 +49,9 @@ Proxy Point Generation
 
 .. autofunction:: partition_from_coarse
 
+.. autofunction:: build_neighbor_list
+
+.. autofunction:: build_skeleton_list
 """
 
 
@@ -65,18 +66,18 @@ def _element_node_range(group, ielement):
 
 def partition_by_nodes(queue, discr,
                        use_tree=True,
-                       max_particles_in_box=30):
+                       max_nodes_in_box=30):
     """Generate clusters / ranges of nodes. The partition is created at the
     lowest level of granularity, i.e. nodes. This results in balanced ranges
     of points, but will split elements across different ranges.
 
     :arg queue: a :class:`pyopencl.CommandQueue`.
-    :arg discr: a :class:`~meshmode.discretization.Discretization`.
+    :arg discr: a :class:`meshmode.discretization.Discretization`.
     :arg use_tree: if `True`, node partitions are generated using a
         :class:`boxtree.TreeBuilder`, which leads to geometrically close
         points to belong to the same partition. If `False`, a simple linear
         partition is constructed.
-    :arg max_particles_in_box: passed to :class:`boxtree.TreeBuilder`.
+    :arg max_nodes_in_box: passed to :class:`boxtree.TreeBuilder`.
 
     :return: a tuple `(indices, ranges)` of :class:`pyopencl.array.Array`
         integer arrays. The indices in a range can be retrieved using
@@ -90,7 +91,7 @@ def partition_by_nodes(queue, discr,
         builder = TreeBuilder(discr.cl_context)
 
         tree, _ = builder(queue, discr.nodes(),
-            max_particles_in_box=max_particles_in_box)
+            max_particles_in_box=max_nodes_in_box)
 
         tree = tree.get(queue)
         leaf_boxes, = (tree.box_flags &
@@ -109,7 +110,7 @@ def partition_by_nodes(queue, discr,
         indices = cl.array.arange(queue, 0, discr.nnodes,
                                   dtype=np.int)
         ranges = cl.array.arange(queue, 0, discr.nnodes + 1,
-                                 discr.nnodes // max_particles_in_box,
+                                 discr.nnodes // max_nodes_in_box,
                                  dtype=np.int)
 
     assert ranges[-1] == discr.nnodes
@@ -118,7 +119,7 @@ def partition_by_nodes(queue, discr,
 
 def partition_by_elements(queue, discr,
                           use_tree=True,
-                          max_particles_in_box=10):
+                          max_elements_in_box=10):
     """Generate clusters / ranges of points. The partition is created at the
     element level, so that all the nodes belonging to an element belong to
     the same range. This can result in slightly larger differences in size
@@ -126,12 +127,12 @@ def partition_by_elements(queue, discr,
     need to be resampled, integrated, etc.
 
     :arg queue: a :class:`pyopencl.CommandQueue`.
-    :arg discr: a :class:`~meshmode.discretization.Discretization`.
+    :arg discr: a :class:`meshmode.discretization.Discretization`.
     :arg use_tree: if True, node partitions are generated using a
         :class:`boxtree.TreeBuilder`, which leads to geometrically close
         points to belong to the same partition. If False, a simple linear
         partition is constructed.
-    :arg max_particles_in_box: passed to :class:`boxtree.TreeBuilder`.
+    :arg max_elements_in_box: passed to :class:`boxtree.TreeBuilder`.
 
     :return: a tuple `(indices, ranges)` of :class:`pyopencl.array.Array`
         integer arrays. The indices in a range can be retrieved using
@@ -148,7 +149,7 @@ def partition_by_elements(queue, discr,
         elcenters = element_centers_of_mass(discr)
 
         tree, _ = builder(queue, elcenters,
-            max_particles_in_box=max_particles_in_box)
+            max_particles_in_box=max_elements_in_box)
 
         groups = discr.groups
         tree = tree.get(queue)
@@ -168,7 +169,7 @@ def partition_by_elements(queue, discr,
     else:
         nelements = discr.mesh.nelements
         elements = np.array_split(np.arange(0, nelements),
-                                  nelements // max_particles_in_box)
+                                  nelements // max_elements_in_box)
 
         elranges = np.cumsum([g.nelements for g in discr.groups])
         elgroups = [np.digitize(elements[i], elranges)
@@ -197,12 +198,11 @@ def partition_from_coarse(queue, resampler, from_indices, from_ranges):
     The new partition will have the same number of ranges as the old partition.
     The nodes inside each range in the new partition are all the nodes in
     :attr:`resampler.to_discr` that belong to the same region as the nodes
-    in the same range from :attr:`resampler.from_discr`. These nodes are
-    obtained using :attr:`mesmode.discretization.connection.InterpolationBatch`.
+    in the same range from :attr:`resampler.from_discr`.
 
     :arg queue: a :class:`pyopencl.CommandQueue`.
     :arg resampler: a
-        :class:`~meshmode.discretization.connection.DirectDiscretizationConnection`.
+        :class:`meshmode.discretization.connection.DirectDiscretizationConnection`.
     :arg from_indices: a set of indices into the nodes in
         :attr:`resampler.from_discr`.
     :arg from_ranges: array used to index into :attr:`from_indices`.
@@ -270,31 +270,32 @@ def partition_from_coarse(queue, resampler, from_indices, from_ranges):
 # {{{ proxy point generator
 
 class ProxyGenerator(object):
-    def __init__(self, source, ratio=1.5, nproxy=30, **kwargs):
-        r"""
-        :arg discr: a :class:`pytential.qbx.QBXLayerPotentialBase`.
-        :arg ratio: a ratio used to compute the proxy point radius. The radius
-            is computed in the :math:`L_2` norm, resulting in a circle or
-            sphere of proxy points. For QBX, we have two radii of interest
-            for a set of points: the radius :math:`r_{block}` of the
-            smallest ball containing all the points and the radius
-            :math:`r_{qbx}` of the smallest ball containing all the QBX
-            expansion balls in the block. If the ratio :math:`\theta \in
-            [0, 1]`, then the radius of the proxy ball is
+    r"""
+    :arg discr: a :class:`pytential.qbx.QBXLayerPotentialSource`.
+    :arg nproxy: number of proxy points.
+    :arg ratio: a ratio used to compute the proxy point radius. The radius
+        is computed in the :math:`L_2` norm, resulting in a circle or
+        sphere of proxy points. For QBX, we have two radii of interest
+        for a set of points: the radius :math:`r_{block}` of the
+        smallest ball containing all the points and the radius
+        :math:`r_{qbx}` of the smallest ball containing all the QBX
+        expansion balls in the block. If the ratio :math:`\theta \in
+        [0, 1]`, then the radius of the proxy ball is
 
-            .. math::
+        .. math::
 
-                r = (1 - \theta) r_{block} + \theta r_{qbx}.
+            r = (1 - \theta) r_{block} + \theta r_{qbx}.
 
-            If the ratio :math:`\theta > 1`, the the radius is simply
+        If the ratio :math:`\theta > 1`, the the radius is simply
 
-            .. math::
+        .. math::
 
-                r = \theta r_{qbx}.
+            r = \theta r_{qbx}.
 
-        :arg nproxy: number of proxy points.
-        """
+    .. automethod:: __call__
+    """
 
+    def __init__(self, source, nproxy=30, ratio=1.1, **kwargs):
         self.source = source
         self.ratio = abs(ratio)
         self.nproxy = int(abs(nproxy))
@@ -316,9 +317,10 @@ class ProxyGenerator(object):
     @memoize_method
     def get_kernel(self):
         if self.ratio < 1.0:
-            radius_expr = "(1.0 - ratio) * rblk + ratio * rqbx"
+            radius_expr = "(1.0 - {ratio}) * rblk + {ratio} * rqbx"
         else:
-            radius_expr = "ratio * rqbx"
+            radius_expr = "{ratio} * rqbx"
+        radius_expr = radius_expr.format(ratio=self.ratio)
 
         # NOTE: centers of mass are computed using a second-order approximation
         # that currently matches what is in `element_centers_of_mass`.
@@ -329,23 +331,25 @@ class ProxyGenerator(object):
             ],
             ["""
             for irange
+                <> ioffset = srcranges[irange]
                 <> npoints = srcranges[irange + 1] - srcranges[irange]
+
                 proxy_center[idim, irange] = 1.0 / npoints * \
-                    reduce(sum, i, nodes[idim, srcindices[i + srcranges[irange]]]) \
+                    reduce(sum, i, nodes[idim, srcindices[i + ioffset]]) \
                         {{dup=idim:i}}
 
                 <> rblk = simul_reduce(max, i, sqrt(simul_reduce(sum, idim, \
                         (proxy_center[idim, irange] -
-                         nodes[idim, srcindices[i + srcranges[irange]]]) ** 2)))
+                         nodes[idim, srcindices[i + ioffset]]) ** 2)))
 
                 <> rqbx_int = simul_reduce(max, i, sqrt(simul_reduce(sum, idim, \
                         (proxy_center[idim, irange] -
-                         center_int[idim, srcindices[i + srcranges[irange]]]) ** 2)) + \
-                         expansion_radii[srcindices[i + srcranges[irange]]])
+                         center_int[idim, srcindices[i + ioffset]]) ** 2)) + \
+                         expansion_radii[srcindices[i + ioffset]])
                 <> rqbx_ext = simul_reduce(max, i, sqrt(simul_reduce(sum, idim, \
                         (proxy_center[idim, irange] -
-                         center_ext[idim, srcindices[i + srcranges[irange]]]) ** 2)) + \
-                         expansion_radii[srcindices[i + srcranges[irange]]])
+                         center_ext[idim, srcindices[i + ioffset]]) ** 2)) + \
+                         expansion_radii[srcindices[i + ioffset]])
                 <> rqbx = if(rqbx_ext < rqbx_int, rqbx_int, rqbx_ext)
 
                 proxy_radius[irange] = {radius_expr}
@@ -374,9 +378,7 @@ class ProxyGenerator(object):
             ],
             name="proxy_generator_knl",
             assumptions="dim>=1 and nranges>=1",
-            fixed_parameters=dict(
-                dim=self.ambient_dim,
-                ratio=self.ratio),
+            fixed_parameters=dict(dim=self.ambient_dim),
             lang_version=MOST_RECENT_LANGUAGE_VERSION)
 
         knl = lp.tag_inames(knl, "idim*:unr")
@@ -395,21 +397,21 @@ class ProxyGenerator(object):
         the discretization in :attr:`source`.
 
         :arg queue: a :class:`pyopencl.CommandQueue`.
-        :arg srcindices: a :class:`pyopencl.array.Array` of srcindices around
+        :arg srcindices: a :class:`pyopencl.array.Array` of indices around
             which to construct proxy balls.
         :arg srcranges: an :class:`pyopencl.array.Array` of size `(nranges + 1,)`
             used to index into :attr:`srcindices`. Each one of the `nranges`
             ranges will get a proxy ball.
 
-        :return: a tuple of `(centers, radii, proxies, pxyranges)`, where
-            each value is a :class:`pyopencl.array.Array`. The
-            sizes of the arrays are as follows: `centers` is of size
-            `(2, nranges)`, `radii` is of size `(nranges,)`, `pxyranges` is
+        :return: a tuple of `(proxies, pxyranges, pxycenters, pxyranges)`, where
+            each element is a :class:`pyopencl.array.Array`. The
+            sizes of the arrays are as follows: `pxycenters` is of size
+            `(2, nranges)`, `pxyradii` is of size `(nranges,)`, `pxyranges` is
             of size `(nranges + 1,)` and `proxies` is of size
-            `(2, nranges * nproxies)`. The proxy points in a range :math:`i`
+            `(2, nranges * nproxy)`. The proxy points in a range :math:`i`
             can be obtained by a slice `proxies[pxyranges[i]:pxyranges[i + 1]]`
-            and are all at a distance `radii[i]` from the range center
-            `centers[i]`.
+            and are all at a distance `pxyradii[i]` from the range center
+            `pxycenters[i]`.
         """
 
         from pytential.qbx.utils import get_centers_on_side
@@ -461,12 +463,9 @@ def build_neighbor_list(discr, srcindices, srcranges, pxycenters, pxyradii,
     :arg pxycenters: an array containing the center of each proxy ball.
     :arg pxyradii: an array containing the radius of each proxy ball.
 
-    :return: a tuple `(neighbours, nbrranges)` where each value is a
-        :class:`pyopencl.array.Array` of integers. For a range :math:`i`
-        in `nbrranges`, the corresponding slice of the `neighbours` array
-        is a subset of :attr:`srcindices` such that all points are inside
-        the proxy ball centered at `centers[i]` of radius `radii[i]`
-        that is not included in `srcindices[srcranges[i]:srcranges[i + 1]]`.
+    :return: a tuple `(nbrindices, nbrranges)`, where each value is a
+        :class:`pyopencl.array.Array`. For a range :math:`i`, we can
+        get the slice using `nbrindices[nbrranges[i]:nbrranges[i + 1]]`.
     """
 
     with cl.CommandQueue(discr.cl_context) as queue:
@@ -507,7 +506,7 @@ def build_neighbor_list(discr, srcindices, srcranges, pxycenters, pxyradii,
         if isinstance(pxyradii, cl.array.Array):
             pxyradii = pxyradii.get(queue)
 
-        neighbors = np.empty(srcranges.shape[0] - 1, dtype=np.object)
+        nbrindices = np.empty(srcranges.shape[0] - 1, dtype=np.object)
         for iproxy in range(srcranges.shape[0] - 1):
             # get list of boxes intersecting the current ball
             istart = query.leaves_near_ball_starts[iproxy]
@@ -527,20 +526,31 @@ def build_neighbor_list(discr, srcindices, srcranges, pxycenters, pxyradii,
             center = pxycenters[:, iproxy].reshape(-1, 1)
             radius = pxyradii[iproxy]
             mask = (la.norm(nodes - center, axis=0) < radius) & \
-                   ((isources < srcranges[iproxy]) | (srcranges[iproxy + 1] <= isources))
+                   ((isources < srcranges[iproxy]) |
+                    (srcranges[iproxy + 1] <= isources))
 
-            neighbors[iproxy] = srcindices[isources[mask]]
+            nbrindices[iproxy] = srcindices[isources[mask]]
 
         nbrranges = to_device(queue,
-                np.cumsum([0] + [n.shape[0] for n in neighbors]))
-        neighbors = to_device(queue, np.hstack(neighbors))
+                np.cumsum([0] + [n.shape[0] for n in nbrindices]))
+        nbrindices = to_device(queue, np.hstack(nbrindices))
 
-    return neighbors, nbrranges
+    return nbrindices, nbrranges
 
 
 def build_skeleton_list(source, srcindices, srcranges, **kwargs):
-    """
-    :arg source: a :class:`pytential.qbx.QBXLayerPotentialBase`.
+    """Generate sets of skeleton points for each given range of indices
+    in the :attr:`source` discretization. Skeleton points are meant to
+    model the interactions of a set of points. They are composed of two
+    parts:
+
+    - a set of proxy points (or balls) around a given range, which
+      models farfield interactions.
+
+    - a set of neighboring points that are inside the proxy balls, but
+      do not belong to the given range, which model nearby interactions.
+
+    :arg source: a :class:`pytential.qbx.QBXLayerPotentialSource`.
     :arg srcindices: a :class:`pyopencl.array.Array` of indices for points
         in :attr:`source`.
     :arg srcranges: a :class:`pyopencl.array.Array` describing the ranges
@@ -551,13 +561,9 @@ def build_skeleton_list(source, srcindices, srcranges, **kwargs):
     :arg kwargs: additional arguments passed to :class:`ProxyGenerator`
         or :func:`build_neighbor_list`.
 
-    :returns: a tuple `(skeletons, sklranges)` where each value is a
+    :returns: a tuple `(skeletons, sklranges)`, where each value is a
         :class:`pyopencl.array.Array`. For a range :math:`i`, we can
         get the slice using `skeletons[sklranges[i]:sklranges[i + 1]]`.
-        The skeleton points in a range represents the union of a set
-        of generated proxy points and all the source points inside the
-        proxy ball that do not also belong to the current range in
-        :attr:`indices`.
     """
 
     @memoize
@@ -583,7 +589,7 @@ def build_skeleton_list(source, srcindices, srcranges, **kwargs):
                     proxies[idim, pxystart + ipxy] \
                     {id_prefix=write_pxy,nosync=write_ngb}
                 skeletons[idim, sklstart + npxyblock + ingb] = \
-                    sources[idim, neighbors[ngbstart + ingb]] \
+                    sources[idim, nbrindices[ngbstart + ingb]] \
                     {id_prefix=write_ngb,nosync=write_pxy}
                 sklranges[irange + 1] = sklranges[irange] + \
                         npxyblock + nngbblock
@@ -594,19 +600,19 @@ def build_skeleton_list(source, srcindices, srcranges, **kwargs):
                     shape=(source.ambient_dim, "nsources")),
                 lp.GlobalArg("proxies", None,
                     shape=(source.ambient_dim, "nproxies"), dim_tags="sep,C"),
-                lp.GlobalArg("neighbors", None,
-                    shape="nneighbors"),
+                lp.GlobalArg("nbrindices", None,
+                    shape="nnbrindices"),
                 lp.GlobalArg("pxyranges", None,
                     shape="nranges + 1"),
                 lp.GlobalArg("nbrranges", None,
                     shape="nranges + 1"),
                 lp.GlobalArg("skeletons", None,
-                    shape=(source.ambient_dim, "nproxies + nneighbors")),
+                    shape=(source.ambient_dim, "nproxies + nnbrindices")),
                 lp.GlobalArg("sklranges", None,
                     shape="nranges + 1"),
                 lp.ValueArg("nsources", np.int32),
                 lp.ValueArg("nproxies", np.int32),
-                lp.ValueArg("nneighbors", np.int32),
+                lp.ValueArg("nnbrindices", np.int32),
                 "..."
             ],
             name="concat_skl",
@@ -625,15 +631,15 @@ def build_skeleton_list(source, srcindices, srcranges, **kwargs):
         proxies, pxyranges, pxycenters, pxyradii = \
                 proxy(queue, srcindices, srcranges)
 
-        neighbors, nbrranges = build_neighbor_list(source.density_discr,
+        nbrindices, nbrranges = build_neighbor_list(source.density_discr,
                 srcindices, srcranges, pxycenters, pxyradii, **kwargs)
 
         sklranges = cl.array.zeros(queue, srcranges.shape, dtype=np.int)
         _, (skeletons, sklranges) = knl()(queue,
                 sources=source.density_discr.nodes(),
                 proxies=proxies,
-                neighbors=neighbors,
                 pxyranges=pxyranges,
+                nbrindices=nbrindices,
                 nbrranges=nbrranges,
                 sklranges=sklranges)
 
diff --git a/test/test_linalg_proxy.py b/test/test_linalg_proxy.py
index f4055b27..0274bfed 100644
--- a/test/test_linalg_proxy.py
+++ b/test/test_linalg_proxy.py
@@ -98,10 +98,10 @@ def _build_block_index(queue, discr,
     # create index ranges
     if method == "nodes":
         indices, ranges = partition_by_nodes(queue, discr,
-            use_tree=use_tree, max_particles_in_box=max_particles_in_box)
+            use_tree=use_tree, max_nodes_in_box=max_particles_in_box)
     elif method == "elements":
         indices, ranges = partition_by_elements(queue, discr,
-            use_tree=use_tree, max_particles_in_box=max_particles_in_box)
+            use_tree=use_tree, max_elements_in_box=max_particles_in_box)
     else:
         raise ValueError("unknown method: {}".format(method))
 
@@ -187,7 +187,7 @@ def _plot_partition_indices(queue, discr, indices, ranges, **kwargs):
 @pytest.mark.parametrize("method", ["nodes", "elements"])
 @pytest.mark.parametrize("use_tree", [True, False])
 @pytest.mark.parametrize("ndim", [2, 3])
-def test_partition_points(ctx_factory, method, use_tree, ndim, visualize=True):
+def test_partition_points(ctx_factory, method, use_tree, ndim, visualize=False):
     ctx = ctx_factory()
     queue = cl.CommandQueue(ctx)
 
@@ -198,7 +198,7 @@ def test_partition_points(ctx_factory, method, use_tree, ndim, visualize=True):
 
 @pytest.mark.parametrize("use_tree", [True, False])
 @pytest.mark.parametrize("ndim", [2, 3])
-def test_partition_coarse(ctx_factory, use_tree, ndim, visualize=True):
+def test_partition_coarse(ctx_factory, use_tree, ndim, visualize=False):
     ctx = ctx_factory()
     queue = cl.CommandQueue(ctx)
 
@@ -245,7 +245,7 @@ def test_partition_coarse(ctx_factory, use_tree, ndim, visualize=True):
 
 @pytest.mark.parametrize("ndim", [2, 3])
 @pytest.mark.parametrize("factor", [1.0, 0.6])
-def test_proxy_generator(ctx_factory, ndim, factor, visualize=True):
+def test_proxy_generator(ctx_factory, ndim, factor, visualize=False):
     ctx = ctx_factory()
     queue = cl.CommandQueue(ctx)
 
@@ -337,7 +337,7 @@ def test_proxy_generator(ctx_factory, ndim, factor, visualize=True):
 
 @pytest.mark.parametrize("ndim", [2, 3])
 @pytest.mark.parametrize("factor", [1.0, 0.6])
-def test_area_query(ctx_factory, ndim, factor, visualize=True):
+def test_area_query(ctx_factory, ndim, factor, visualize=False):
     ctx = ctx_factory()
     queue = cl.CommandQueue(ctx)
 
@@ -347,25 +347,24 @@ def test_area_query(ctx_factory, ndim, factor, visualize=True):
 
     # generate proxy points
     from pytential.linalg.proxy import ProxyGenerator
-    generator = ProxyGenerator(qbx, ratio=1.1)
+    generator = ProxyGenerator(qbx)
     _, _, pxycenters, pxyradii = generator(queue, srcindices, srcranges)
 
     from pytential.linalg.proxy import build_neighbor_list, build_skeleton_list
-    neighbors, nbrranges = build_neighbor_list(qbx.density_discr,
+    nbrindices, nbrranges = build_neighbor_list(qbx.density_discr,
             srcindices, srcranges, pxycenters, pxyradii)
-    skeletons, sklranges = build_skeleton_list(qbx, srcindices, srcranges,
-                                               ratio=1.1)
+    skeletons, sklranges = build_skeleton_list(qbx, srcindices, srcranges)
 
     srcindices = srcindices.get()
     srcranges = srcranges.get()
-    neighbors = neighbors.get()
+    nbrindices = nbrindices.get()
     nbrranges = nbrranges.get()
 
     for i in range(srcranges.shape[0] - 1):
         isrc = np.s_[srcranges[i]:srcranges[i + 1]]
         inbr = np.s_[nbrranges[i]:nbrranges[i + 1]]
 
-        assert not np.any(np.isin(neighbors[inbr], srcindices[isrc]))
+        assert not np.any(np.isin(nbrindices[inbr], srcindices[isrc]))
 
     if visualize:
         if ndim == 2:
@@ -376,7 +375,7 @@ def test_area_query(ctx_factory, ndim, factor, visualize=True):
 
             for i in range(srcranges.shape[0] - 1):
                 isrc = srcindices[np.s_[srcranges[i]:srcranges[i + 1]]]
-                ingb = neighbors[nbrranges[i]:nbrranges[i + 1]]
+                ingb = nbrindices[nbrranges[i]:nbrranges[i + 1]]
                 iskl = np.s_[sklranges[i]:sklranges[i + 1]]
 
                 pt.figure(figsize=(10, 8))
@@ -402,7 +401,7 @@ def test_area_query(ctx_factory, ndim, factor, visualize=True):
 
             for i in range(srcranges.shape[0] - 1):
                 isrc = srcindices[np.s_[srcranges[i]:srcranges[i + 1]]]
-                ingb = neighbors[nbrranges[i]:nbrranges[i + 1]]
+                ingb = nbrindices[nbrranges[i]:nbrranges[i + 1]]
 
                 # TODO: some way to turn off some of the interpolations
                 # would help visualize this better.
-- 
GitLab


From 514d14d430d850bd85562a3aa5e06df32922ff0e Mon Sep 17 00:00:00 2001
From: Alex Fikl <alexfikl@gmail.com>
Date: Thu, 21 Jun 2018 16:06:57 -0500
Subject: [PATCH 12/25] flake8 fixes

---
 test/test_linalg_proxy.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/test/test_linalg_proxy.py b/test/test_linalg_proxy.py
index 0274bfed..6345c775 100644
--- a/test/test_linalg_proxy.py
+++ b/test/test_linalg_proxy.py
@@ -31,8 +31,6 @@ import numpy.linalg as la
 import pyopencl as cl
 from pyopencl.array import to_device
 
-import loopy as lp
-from pytential import sym
 from meshmode.mesh.generation import ( # noqa
         ellipse, NArmedStarfish, generate_torus, make_curve_mesh)
 
@@ -203,7 +201,7 @@ def test_partition_coarse(ctx_factory, use_tree, ndim, visualize=False):
     queue = cl.CommandQueue(ctx)
 
     qbx = _build_qbx_discr(queue, ndim=ndim)
-    srcindices, srcranges = _build_block_index(queue, qbx.density_discr, 
+    srcindices, srcranges = _build_block_index(queue, qbx.density_discr,
             method="elements", use_tree=use_tree)
 
     if visualize:
-- 
GitLab


From 7ff59983afb16fa3c074e0eff9dede64f09d86db Mon Sep 17 00:00:00 2001
From: Alex Fikl <alexfikl@gmail.com>
Date: Sun, 24 Jun 2018 18:48:18 -0500
Subject: [PATCH 13/25] direct-solver: change default max_particles_in_box to
 None

---
 pytential/linalg/proxy.py | 22 +++++++++++++++++++---
 1 file changed, 19 insertions(+), 3 deletions(-)

diff --git a/pytential/linalg/proxy.py b/pytential/linalg/proxy.py
index f8fb9845..ea04b4e9 100644
--- a/pytential/linalg/proxy.py
+++ b/pytential/linalg/proxy.py
@@ -66,7 +66,7 @@ def _element_node_range(group, ielement):
 
 def partition_by_nodes(queue, discr,
                        use_tree=True,
-                       max_nodes_in_box=30):
+                       max_nodes_in_box=None):
     """Generate clusters / ranges of nodes. The partition is created at the
     lowest level of granularity, i.e. nodes. This results in balanced ranges
     of points, but will split elements across different ranges.
@@ -84,6 +84,10 @@ def partition_by_nodes(queue, discr,
         `indices[ranges[i]:ranges[i + 1]]`.
     """
 
+    if max_nodes_in_box is None:
+        # FIXME: this is just an arbitrary value
+        max_nodes_in_box = 32
+
     if use_tree:
         from boxtree import box_flags_enum
         from boxtree import TreeBuilder
@@ -119,7 +123,7 @@ def partition_by_nodes(queue, discr,
 
 def partition_by_elements(queue, discr,
                           use_tree=True,
-                          max_elements_in_box=10):
+                          max_elements_in_box=None):
     """Generate clusters / ranges of points. The partition is created at the
     element level, so that all the nodes belonging to an element belong to
     the same range. This can result in slightly larger differences in size
@@ -138,6 +142,14 @@ def partition_by_elements(queue, discr,
         integer arrays. The indices in a range can be retrieved using
         `indices[ranges[i]:ranges[i + 1]]`.
     """
+
+    if max_elements_in_box is None:
+        # NOTE: keep in sync with partition_by_nodes
+        max_nodes_in_box = 32
+
+        nunit_nodes = int(np.mean([g.nunit_nodes for g in discr.groups]))
+        max_elements_in_box = max_nodes_in_box // nunit_nodes
+
     if use_tree:
         from boxtree import box_flags_enum
         from boxtree import TreeBuilder
@@ -450,7 +462,7 @@ class ProxyGenerator(object):
 
 
 def build_neighbor_list(discr, srcindices, srcranges, pxycenters, pxyradii,
-                        max_nodes_in_box=30, **kwargs):
+                        max_nodes_in_box=None, **kwargs):
     """Generate a set of neighboring points for each range of points in
     :attr:`discr`. Neighboring points of a range :math:`i` are defined
     as all the points inside the proxy ball :math:`i` that do not also
@@ -468,6 +480,10 @@ def build_neighbor_list(discr, srcindices, srcranges, pxycenters, pxyradii,
         get the slice using `nbrindices[nbrranges[i]:nbrranges[i + 1]]`.
     """
 
+    if max_nodes_in_box is None:
+        # FIXME: this is a fairly arbitrary value
+        max_nodes_in_box = 32
+
     with cl.CommandQueue(discr.cl_context) as queue:
         if isinstance(srcindices, cl.array.Array):
             srcindices = srcindices.get(queue)
-- 
GitLab


From 276b01a2ae5f6aba158c7c43da4c5c233e9d6b40 Mon Sep 17 00:00:00 2001
From: Alex Fikl <alexfikl@gmail.com>
Date: Sun, 24 Jun 2018 19:03:09 -0500
Subject: [PATCH 14/25] direct-solver: let loopy guess more of the kernel
 arguments

---
 pytential/linalg/proxy.py | 23 +++++------------------
 1 file changed, 5 insertions(+), 18 deletions(-)

diff --git a/pytential/linalg/proxy.py b/pytential/linalg/proxy.py
index ea04b4e9..6a46f145 100644
--- a/pytential/linalg/proxy.py
+++ b/pytential/linalg/proxy.py
@@ -374,19 +374,12 @@ class ProxyGenerator(object):
                     shape=(self.ambient_dim, "nnodes"), dim_tags="sep,C"),
                 lp.GlobalArg("center_ext", None,
                     shape=(self.ambient_dim, "nnodes"), dim_tags="sep,C"),
-                lp.GlobalArg("expansion_radii", None,
-                    shape="nnodes"),
-                lp.GlobalArg("srcranges", None,
-                    shape="nranges + 1"),
-                lp.GlobalArg("srcindices", None,
-                    shape="nindices"),
                 lp.GlobalArg("proxy_center", None,
                     shape=(self.ambient_dim, "nranges")),
                 lp.GlobalArg("proxy_radius", None,
                     shape="nranges"),
-                lp.ValueArg("nnodes", np.int64),
-                lp.ValueArg("nranges", None),
-                lp.ValueArg("nindices", np.int64)
+                lp.ValueArg("nnodes", np.int),
+                "..."
             ],
             name="proxy_generator_knl",
             assumptions="dim>=1 and nranges>=1",
@@ -618,17 +611,11 @@ def build_skeleton_list(source, srcindices, srcranges, **kwargs):
                     shape=(source.ambient_dim, "nproxies"), dim_tags="sep,C"),
                 lp.GlobalArg("nbrindices", None,
                     shape="nnbrindices"),
-                lp.GlobalArg("pxyranges", None,
-                    shape="nranges + 1"),
-                lp.GlobalArg("nbrranges", None,
-                    shape="nranges + 1"),
                 lp.GlobalArg("skeletons", None,
                     shape=(source.ambient_dim, "nproxies + nnbrindices")),
-                lp.GlobalArg("sklranges", None,
-                    shape="nranges + 1"),
-                lp.ValueArg("nsources", np.int32),
-                lp.ValueArg("nproxies", np.int32),
-                lp.ValueArg("nnbrindices", np.int32),
+                lp.ValueArg("nsources", np.int),
+                lp.ValueArg("nproxies", np.int),
+                lp.ValueArg("nnbrindices", np.int),
                 "..."
             ],
             name="concat_skl",
-- 
GitLab


From a5dc1ff649e0186356686e24f67f5d1900c32488 Mon Sep 17 00:00:00 2001
From: Alex Fikl <alexfikl@gmail.com>
Date: Wed, 27 Jun 2018 20:31:19 -0500
Subject: [PATCH 15/25] direct-solver: port to new sumpy BlockIndexRanges and
 other fixes

---
 .test-conda-env-py3-requirements.txt |   2 +-
 pytential/linalg/proxy.py            | 420 +++++++++++++--------------
 requirements.txt                     |   2 +-
 test/test_linalg_proxy.py            | 178 ++++++------
 4 files changed, 299 insertions(+), 303 deletions(-)

diff --git a/.test-conda-env-py3-requirements.txt b/.test-conda-env-py3-requirements.txt
index fa6c0426..c5ef1e9b 100644
--- a/.test-conda-env-py3-requirements.txt
+++ b/.test-conda-env-py3-requirements.txt
@@ -1,5 +1,5 @@
 git+https://gitlab.tiker.net/inducer/boxtree
 git+https://github.com/inducer/pymbolic
 git+https://github.com/inducer/loopy
-git+https://gitlab.tiker.net/inducer/sumpy
+git+https://gitlab.tiker.net/fikl2/sumpy@block-index-additions
 git+https://github.com/inducer/meshmode
diff --git a/pytential/linalg/proxy.py b/pytential/linalg/proxy.py
index 6a46f145..6a1d4cfe 100644
--- a/pytential/linalg/proxy.py
+++ b/pytential/linalg/proxy.py
@@ -32,6 +32,7 @@ from pyopencl.array import to_device
 
 from pytools.obj_array import make_obj_array
 from pytools import memoize_method, memoize
+from sumpy.tools import BlockIndexRanges
 
 import loopy as lp
 from loopy.version import MOST_RECENT_LANGUAGE_VERSION
@@ -49,9 +50,9 @@ Proxy Point Generation
 
 .. autofunction:: partition_from_coarse
 
-.. autofunction:: build_neighbor_list
+.. autofunction:: gather_block_neighbor_points
 
-.. autofunction:: build_skeleton_list
+.. autofunction:: gather_block_interaction_points
 """
 
 
@@ -64,14 +65,13 @@ def _element_node_range(group, ielement):
     return np.arange(istart, iend)
 
 
-def partition_by_nodes(queue, discr,
+def partition_by_nodes(discr,
                        use_tree=True,
                        max_nodes_in_box=None):
     """Generate clusters / ranges of nodes. The partition is created at the
     lowest level of granularity, i.e. nodes. This results in balanced ranges
     of points, but will split elements across different ranges.
 
-    :arg queue: a :class:`pyopencl.CommandQueue`.
     :arg discr: a :class:`meshmode.discretization.Discretization`.
     :arg use_tree: if `True`, node partitions are generated using a
         :class:`boxtree.TreeBuilder`, which leads to geometrically close
@@ -79,49 +79,50 @@ def partition_by_nodes(queue, discr,
         partition is constructed.
     :arg max_nodes_in_box: passed to :class:`boxtree.TreeBuilder`.
 
-    :return: a tuple `(indices, ranges)` of :class:`pyopencl.array.Array`
-        integer arrays. The indices in a range can be retrieved using
-        `indices[ranges[i]:ranges[i + 1]]`.
+    :return: a :class:`sumpy.tools.BlockIndexRanges`.
     """
 
     if max_nodes_in_box is None:
         # FIXME: this is just an arbitrary value
         max_nodes_in_box = 32
 
-    if use_tree:
-        from boxtree import box_flags_enum
-        from boxtree import TreeBuilder
+    with cl.CommandQueue(discr.cl_context) as queue:
+        if use_tree:
+            from boxtree import box_flags_enum
+            from boxtree import TreeBuilder
 
-        builder = TreeBuilder(discr.cl_context)
+            builder = TreeBuilder(discr.cl_context)
 
-        tree, _ = builder(queue, discr.nodes(),
-            max_particles_in_box=max_nodes_in_box)
+            tree, _ = builder(queue, discr.nodes(),
+                max_particles_in_box=max_nodes_in_box)
 
-        tree = tree.get(queue)
-        leaf_boxes, = (tree.box_flags &
-                       box_flags_enum.HAS_CHILDREN == 0).nonzero()
+            tree = tree.get(queue)
+            leaf_boxes, = (tree.box_flags &
+                           box_flags_enum.HAS_CHILDREN == 0).nonzero()
 
-        indices = np.empty(len(leaf_boxes), dtype=np.object)
-        for i, ibox in enumerate(leaf_boxes):
-            box_start = tree.box_source_starts[ibox]
-            box_end = box_start + tree.box_source_counts_cumul[ibox]
-            indices[i] = tree.user_source_ids[box_start:box_end]
+            indices = np.empty(len(leaf_boxes), dtype=np.object)
+            for i, ibox in enumerate(leaf_boxes):
+                box_start = tree.box_source_starts[ibox]
+                box_end = box_start + tree.box_source_counts_cumul[ibox]
+                indices[i] = tree.user_source_ids[box_start:box_end]
 
-        ranges = to_device(queue,
-            np.cumsum([0] + [box.shape[0] for box in indices]))
-        indices = to_device(queue, np.hstack(indices))
-    else:
-        indices = cl.array.arange(queue, 0, discr.nnodes,
-                                  dtype=np.int)
-        ranges = cl.array.arange(queue, 0, discr.nnodes + 1,
-                                 discr.nnodes // max_nodes_in_box,
-                                 dtype=np.int)
+            ranges = to_device(queue,
+                np.cumsum([0] + [box.shape[0] for box in indices]))
+            indices = to_device(queue, np.hstack(indices))
+        else:
+            indices = cl.array.arange(queue, 0, discr.nnodes,
+                                      dtype=np.int)
+            ranges = cl.array.arange(queue, 0, discr.nnodes + 1,
+                                     discr.nnodes // max_nodes_in_box,
+                                     dtype=np.int)
+        assert ranges[-1] == discr.nnodes
 
-    assert ranges[-1] == discr.nnodes
-    return indices, ranges
+        return BlockIndexRanges(discr.cl_context,
+                                indices.with_queue(None),
+                                ranges.with_queue(None))
 
 
-def partition_by_elements(queue, discr,
+def partition_by_elements(discr,
                           use_tree=True,
                           max_elements_in_box=None):
     """Generate clusters / ranges of points. The partition is created at the
@@ -130,7 +131,6 @@ def partition_by_elements(queue, discr,
     between the ranges, but can be very useful when the individual partitions
     need to be resampled, integrated, etc.
 
-    :arg queue: a :class:`pyopencl.CommandQueue`.
     :arg discr: a :class:`meshmode.discretization.Discretization`.
     :arg use_tree: if True, node partitions are generated using a
         :class:`boxtree.TreeBuilder`, which leads to geometrically close
@@ -138,9 +138,7 @@ def partition_by_elements(queue, discr,
         partition is constructed.
     :arg max_elements_in_box: passed to :class:`boxtree.TreeBuilder`.
 
-    :return: a tuple `(indices, ranges)` of :class:`pyopencl.array.Array`
-        integer arrays. The indices in a range can be retrieved using
-        `indices[ranges[i]:ranges[i + 1]]`.
+    :return: a :class:`sumpy.tools.BlockIndexRanges`.
     """
 
     if max_elements_in_box is None:
@@ -150,57 +148,60 @@ def partition_by_elements(queue, discr,
         nunit_nodes = int(np.mean([g.nunit_nodes for g in discr.groups]))
         max_elements_in_box = max_nodes_in_box // nunit_nodes
 
-    if use_tree:
-        from boxtree import box_flags_enum
-        from boxtree import TreeBuilder
+    with cl.CommandQueue(discr.cl_context) as queue:
+        if use_tree:
+            from boxtree import box_flags_enum
+            from boxtree import TreeBuilder
 
-        builder = TreeBuilder(discr.cl_context)
+            builder = TreeBuilder(discr.cl_context)
 
-        from pytential.qbx.utils import element_centers_of_mass
-        elranges = np.cumsum([group.nelements for group in discr.mesh.groups])
-        elcenters = element_centers_of_mass(discr)
+            from pytential.qbx.utils import element_centers_of_mass
+            elranges = np.cumsum([group.nelements for group in discr.mesh.groups])
+            elcenters = element_centers_of_mass(discr)
 
-        tree, _ = builder(queue, elcenters,
-            max_particles_in_box=max_elements_in_box)
+            tree, _ = builder(queue, elcenters,
+                max_particles_in_box=max_elements_in_box)
 
-        groups = discr.groups
-        tree = tree.get(queue)
-        leaf_boxes, = (tree.box_flags &
-                       box_flags_enum.HAS_CHILDREN == 0).nonzero()
+            groups = discr.groups
+            tree = tree.get(queue)
+            leaf_boxes, = (tree.box_flags &
+                           box_flags_enum.HAS_CHILDREN == 0).nonzero()
 
-        indices = np.empty(len(leaf_boxes), dtype=np.object)
-        for i, ibox in enumerate(leaf_boxes):
-            box_start = tree.box_source_starts[ibox]
-            box_end = box_start + tree.box_source_counts_cumul[ibox]
+            indices = np.empty(len(leaf_boxes), dtype=np.object)
+            for i, ibox in enumerate(leaf_boxes):
+                box_start = tree.box_source_starts[ibox]
+                box_end = box_start + tree.box_source_counts_cumul[ibox]
 
-            ielement = tree.user_source_ids[box_start:box_end]
-            igroup = np.digitize(ielement, elranges)
+                ielement = tree.user_source_ids[box_start:box_end]
+                igroup = np.digitize(ielement, elranges)
 
-            indices[i] = np.hstack([_element_node_range(groups[j], k)
-                                    for j, k in zip(igroup, ielement)])
-    else:
-        nelements = discr.mesh.nelements
-        elements = np.array_split(np.arange(0, nelements),
-                                  nelements // max_elements_in_box)
+                indices[i] = np.hstack([_element_node_range(groups[j], k)
+                                        for j, k in zip(igroup, ielement)])
+        else:
+            nelements = discr.mesh.nelements
+            elements = np.array_split(np.arange(0, nelements),
+                                      nelements // max_elements_in_box)
 
-        elranges = np.cumsum([g.nelements for g in discr.groups])
-        elgroups = [np.digitize(elements[i], elranges)
-                    for i in range(len(elements))]
+            elranges = np.cumsum([g.nelements for g in discr.groups])
+            elgroups = [np.digitize(elements[i], elranges)
+                        for i in range(len(elements))]
 
-        indices = np.empty(len(elements), dtype=np.object)
-        for i in range(indices.shape[0]):
-            indices[i] = np.hstack([_element_node_range(discr.groups[j], k)
-                                    for j, k in zip(elgroups[i], elements[i])])
+            indices = np.empty(len(elements), dtype=np.object)
+            for i in range(indices.shape[0]):
+                indices[i] = np.hstack([_element_node_range(discr.groups[j], k)
+                                        for j, k in zip(elgroups[i], elements[i])])
 
-    ranges = to_device(queue,
-            np.cumsum([0] + [box.shape[0] for box in indices]))
-    indices = to_device(queue, np.hstack(indices))
+        ranges = to_device(queue,
+                np.cumsum([0] + [b.shape[0] for b in indices]))
+        indices = to_device(queue, np.hstack(indices))
+        assert ranges[-1] == discr.nnodes
 
-    assert ranges[-1] == discr.nnodes
-    return indices, ranges
+        return BlockIndexRanges(discr.cl_context,
+                                indices.with_queue(None),
+                                ranges.with_queue(None))
 
 
-def partition_from_coarse(queue, resampler, from_indices, from_ranges):
+def partition_from_coarse(resampler, from_indices):
     """Generate a partition of nodes from an existing partition on a
     coarser discretization. The new partition is generated based on element
     refinement relationships in :attr:`resampler`, so the existing partition
@@ -212,69 +213,64 @@ def partition_from_coarse(queue, resampler, from_indices, from_ranges):
     :attr:`resampler.to_discr` that belong to the same region as the nodes
     in the same range from :attr:`resampler.from_discr`.
 
-    :arg queue: a :class:`pyopencl.CommandQueue`.
     :arg resampler: a
         :class:`meshmode.discretization.connection.DirectDiscretizationConnection`.
-    :arg from_indices: a set of indices into the nodes in
-        :attr:`resampler.from_discr`.
-    :arg from_ranges: array used to index into :attr:`from_indices`.
+    :arg from_indices: a :class:`sumpy.tools.BlockIndexRanges`.
 
-    :return: a tuple `(indices, ranges)` of :class:`pyopencl.array.Array`
-        integer arrays. The indices in a range can be retrieved using
-        `indices[ranges[i]:ranges[i + 1]]`.
+    :return: a tuple :class:`sumpy.tools.BlockIndexRanges`.
     """
 
     if not hasattr(resampler, "groups"):
         raise ValueError("resampler must be a DirectDiscretizationConnection.")
 
-    if isinstance(from_ranges, cl.array.Array):
+    with cl.CommandQueue(resampler.cl_context) as queue:
         from_indices = from_indices.get(queue)
-        from_ranges = from_ranges.get(queue)
-
-    # construct ranges
-    from_discr = resampler.from_discr
-    from_grp_ranges = np.cumsum([0] +
-            [grp.nelements for grp in from_discr.mesh.groups])
-    from_el_ranges = np.hstack([
-        np.arange(grp.node_nr_base, grp.nnodes + 1, grp.nunit_nodes)
-        for grp in from_discr.groups])
-
-    # construct coarse element arrays in each from_range
-    el_indices = np.empty(from_ranges.shape[0] - 1, dtype=np.object)
-    el_ranges = np.full(from_grp_ranges[-1], -1, dtype=np.int)
-    for i in range(from_ranges.shape[0] - 1):
-        irange = np.s_[from_ranges[i]:from_ranges[i + 1]]
-        el_indices[i] = \
-            np.unique(np.digitize(from_indices[irange], from_el_ranges)) - 1
-        el_ranges[el_indices[i]] = i
-    el_indices = np.hstack(el_indices)
-
-    # construct lookup table
-    to_el_table = [np.full(g.nelements, -1, dtype=np.int)
-                   for g in resampler.to_discr.groups]
-
-    for igrp, grp in enumerate(resampler.groups):
-        for batch in grp.batches:
-            to_el_table[igrp][batch.to_element_indices.get(queue)] = \
-                from_grp_ranges[igrp] + batch.from_element_indices.get(queue)
-
-    # construct fine node index list
-    indices = [np.empty(0, dtype=np.int)
-               for _ in range(from_ranges.shape[0] - 1)]
-    for igrp in range(len(resampler.groups)):
-        to_element_indices = \
-                np.where(np.isin(to_el_table[igrp], el_indices))[0]
-
-        for i, j in zip(el_ranges[to_el_table[igrp][to_element_indices]],
-                        to_element_indices):
-            indices[i] = np.hstack([indices[i],
-                _element_node_range(resampler.to_discr.groups[igrp], j)])
-
-    ranges = to_device(queue,
-            np.cumsum([0] + [box.shape[0] for box in indices]))
-    indices = to_device(queue, np.hstack(indices))
-
-    return indices, ranges
+
+        # construct ranges
+        from_discr = resampler.from_discr
+        from_grp_ranges = np.cumsum([0] +
+                [grp.nelements for grp in from_discr.mesh.groups])
+        from_el_ranges = np.hstack([
+            np.arange(grp.node_nr_base, grp.nnodes + 1, grp.nunit_nodes)
+            for grp in from_discr.groups])
+
+        # construct coarse element arrays in each from_range
+        el_indices = np.empty(from_indices.nblocks, dtype=np.object)
+        el_ranges = np.full(from_grp_ranges[-1], -1, dtype=np.int)
+        for i in range(from_indices.nblocks):
+            ifrom = from_indices.block_indices(i)
+            el_indices[i] = np.unique(np.digitize(ifrom, from_el_ranges)) - 1
+            el_ranges[el_indices[i]] = i
+        el_indices = np.hstack(el_indices)
+
+        # construct lookup table
+        to_el_table = [np.full(g.nelements, -1, dtype=np.int)
+                       for g in resampler.to_discr.groups]
+
+        for igrp, grp in enumerate(resampler.groups):
+            for batch in grp.batches:
+                to_el_table[igrp][batch.to_element_indices.get(queue)] = \
+                    from_grp_ranges[igrp] + batch.from_element_indices.get(queue)
+
+        # construct fine node index list
+        indices = [np.empty(0, dtype=np.int)
+                   for _ in range(from_indices.nblocks)]
+        for igrp in range(len(resampler.groups)):
+            to_element_indices = \
+                    np.where(np.isin(to_el_table[igrp], el_indices))[0]
+
+            for i, j in zip(el_ranges[to_el_table[igrp][to_element_indices]],
+                            to_element_indices):
+                indices[i] = np.hstack([indices[i],
+                    _element_node_range(resampler.to_discr.groups[igrp], j)])
+
+        ranges = to_device(queue,
+                np.cumsum([0] + [b.shape[0] for b in indices]))
+        indices = to_device(queue, np.hstack(indices))
+
+        return BlockIndexRanges(resampler.cl_context,
+                                indices.with_queue(None),
+                                ranges.with_queue(None))
 
 # }}}
 
@@ -283,10 +279,16 @@ def partition_from_coarse(queue, resampler, from_indices, from_ranges):
 
 class ProxyGenerator(object):
     r"""
-    :arg discr: a :class:`pytential.qbx.QBXLayerPotentialSource`.
-    :arg nproxy: number of proxy points.
-    :arg ratio: a ratio used to compute the proxy point radius. The radius
-        is computed in the :math:`L_2` norm, resulting in a circle or
+    .. attribute:: nproxy
+    .. attribute:: ambient_dim
+    .. attribute:: source
+
+        A :class:`pytential.qbx.QBXLayerPotentialSource`.
+
+    .. attribute:: ratio
+
+        A ratio used to compute the proxy point radius. The radius
+        is computed in the :math:`\ell^2` norm, resulting in a circle or
         sphere of proxy points. For QBX, we have two radii of interest
         for a set of points: the radius :math:`r_{block}` of the
         smallest ball containing all the points and the radius
@@ -304,14 +306,21 @@ class ProxyGenerator(object):
 
             r = \theta r_{qbx}.
 
+    .. attribute:: ref_mesh
+
+        Reference mesh. Can be used to construct a mesh for a proxy
+        ball :math:`i` by translating it to `center[i]` and scaling by
+        `radii[i]`, as obtained by :meth:`__call__` (see
+        :meth:`meshmode.mesh.processing.affine_map`).
+
     .. automethod:: __call__
     """
 
-    def __init__(self, source, nproxy=30, ratio=1.1, **kwargs):
+    def __init__(self, source, nproxy=None, ratio=None, **kwargs):
         self.source = source
-        self.ratio = abs(ratio)
-        self.nproxy = int(abs(nproxy))
         self.ambient_dim = source.density_discr.ambient_dim
+        self.ratio = 1.1 if ratio is None else ratio
+        self.nproxy = 32 if nproxy is None else nproxy
 
         if self.ambient_dim == 2:
             from meshmode.mesh.generation import ellipse, make_curve_mesh
@@ -347,12 +356,12 @@ class ProxyGenerator(object):
                 <> npoints = srcranges[irange + 1] - srcranges[irange]
 
                 proxy_center[idim, irange] = 1.0 / npoints * \
-                    reduce(sum, i, nodes[idim, srcindices[i + ioffset]]) \
+                    reduce(sum, i, sources[idim, srcindices[i + ioffset]]) \
                         {{dup=idim:i}}
 
                 <> rblk = simul_reduce(max, i, sqrt(simul_reduce(sum, idim, \
                         (proxy_center[idim, irange] -
-                         nodes[idim, srcindices[i + ioffset]]) ** 2)))
+                         sources[idim, srcindices[i + ioffset]]) ** 2)))
 
                 <> rqbx_int = simul_reduce(max, i, sqrt(simul_reduce(sum, idim, \
                         (proxy_center[idim, irange] -
@@ -368,17 +377,17 @@ class ProxyGenerator(object):
             end
             """.format(radius_expr=radius_expr)],
             [
-                lp.GlobalArg("nodes", None,
-                    shape=(self.ambient_dim, "nnodes")),
+                lp.GlobalArg("sources", None,
+                    shape=(self.ambient_dim, "nsources")),
                 lp.GlobalArg("center_int", None,
-                    shape=(self.ambient_dim, "nnodes"), dim_tags="sep,C"),
+                    shape=(self.ambient_dim, "nsources"), dim_tags="sep,C"),
                 lp.GlobalArg("center_ext", None,
-                    shape=(self.ambient_dim, "nnodes"), dim_tags="sep,C"),
+                    shape=(self.ambient_dim, "nsources"), dim_tags="sep,C"),
                 lp.GlobalArg("proxy_center", None,
                     shape=(self.ambient_dim, "nranges")),
                 lp.GlobalArg("proxy_radius", None,
                     shape="nranges"),
-                lp.ValueArg("nnodes", np.int),
+                lp.ValueArg("nsources", np.int),
                 "..."
             ],
             name="proxy_generator_knl",
@@ -397,16 +406,12 @@ class ProxyGenerator(object):
 
         return knl
 
-    def __call__(self, queue, srcindices, srcranges, **kwargs):
+    def __call__(self, queue, indices, **kwargs):
         """Generate proxy points for each given range of source points in
         the discretization in :attr:`source`.
 
         :arg queue: a :class:`pyopencl.CommandQueue`.
-        :arg srcindices: a :class:`pyopencl.array.Array` of indices around
-            which to construct proxy balls.
-        :arg srcranges: an :class:`pyopencl.array.Array` of size `(nranges + 1,)`
-            used to index into :attr:`srcindices`. Each one of the `nranges`
-            ranges will get a proxy ball.
+        :arg indices: a :class:`sumpy.tools.BlockIndexRanges`.
 
         :return: a tuple of `(proxies, pxyranges, pxycenters, pxyranges)`, where
             each element is a :class:`pyopencl.array.Array`. The
@@ -423,18 +428,19 @@ class ProxyGenerator(object):
 
         knl = self.get_kernel()
         _, (centers_dev, radii_dev,) = knl(queue,
-            nodes=self.source.density_discr.nodes(),
+            sources=self.source.density_discr.nodes(),
             center_int=get_centers_on_side(self.source, -1),
             center_ext=get_centers_on_side(self.source, +1),
             expansion_radii=self.source._expansion_radii("nsources"),
-            srcindices=srcindices, srcranges=srcranges, **kwargs)
+            srcindices=indices.indices,
+            srcranges=indices.ranges, **kwargs)
         centers = centers_dev.get()
         radii = radii_dev.get()
 
         from meshmode.mesh.processing import affine_map
-        proxies = np.empty(srcranges.shape[0] - 1, dtype=np.object)
+        proxies = np.empty(indices.nblocks, dtype=np.object)
 
-        for i in range(srcranges.shape[0] - 1):
+        for i in range(indices.nblocks):
             mesh = affine_map(self.ref_mesh,
                 A=(radii[i] * np.eye(self.ambient_dim)),
                 b=centers[:, i].reshape(-1))
@@ -442,7 +448,7 @@ class ProxyGenerator(object):
 
         pxyranges = cl.array.arange(queue, 0,
                 proxies.shape[0] * proxies[0].shape[-1] + 1, proxies[0].shape[-1],
-                dtype=srcranges.dtype)
+                dtype=indices.ranges.dtype)
         proxies = make_obj_array([
             cl.array.to_device(queue, np.hstack([p[idim] for p in proxies]))
             for idim in range(self.ambient_dim)])
@@ -454,23 +460,19 @@ class ProxyGenerator(object):
         return proxies, pxyranges, centers, radii_dev
 
 
-def build_neighbor_list(discr, srcindices, srcranges, pxycenters, pxyradii,
-                        max_nodes_in_box=None, **kwargs):
+def gather_block_neighbor_points(discr, indices, pxycenters, pxyradii,
+                                 max_nodes_in_box=None, **kwargs):
     """Generate a set of neighboring points for each range of points in
     :attr:`discr`. Neighboring points of a range :math:`i` are defined
     as all the points inside the proxy ball :math:`i` that do not also
     belong to the range itself.
 
     :arg discr: a :class:`meshmode.discretization.Discretization`.
-    :arg srcindices: an array of indices for a subset of the nodes in
-        :attr:`discr`.
-    :arg srcranges: an array used to index into the :attr:`srcindices` array.
+    :arg indices: a :class:`sumpy.tools.BlockIndexRanges`.
     :arg pxycenters: an array containing the center of each proxy ball.
     :arg pxyradii: an array containing the radius of each proxy ball.
 
-    :return: a tuple `(nbrindices, nbrranges)`, where each value is a
-        :class:`pyopencl.array.Array`. For a range :math:`i`, we can
-        get the slice using `nbrindices[nbrranges[i]:nbrranges[i + 1]]`.
+    :return: a tuple :class:`sumpy.tools.BlockIndexRanges`.
     """
 
     if max_nodes_in_box is None:
@@ -478,21 +480,18 @@ def build_neighbor_list(discr, srcindices, srcranges, pxycenters, pxyradii,
         max_nodes_in_box = 32
 
     with cl.CommandQueue(discr.cl_context) as queue:
-        if isinstance(srcindices, cl.array.Array):
-            srcindices = srcindices.get(queue)
-        if isinstance(srcranges, cl.array.Array):
-            srcranges = srcranges.get(queue)
+        indices = indices.get(queue)
 
-        # NOTE: this is used for multiple reasons:
+        # NOTE: this is constructed for multiple reasons:
         #   * TreeBuilder takes object arrays
-        #   * `srcndices` can be a small subset of nodes, so this will save
+        #   * `srcindices` can be a small subset of nodes, so this will save
         #   some work
         #   * `srcindices` may reorder the array returned by nodes(), so this
         #   makes sure that we have the same order in tree.user_source_ids
         #   and friends
         sources = discr.nodes().get(queue)
         sources = make_obj_array([
-            cl.array.to_device(queue, sources[idim, srcindices])
+            cl.array.to_device(queue, sources[idim, indices.indices])
             for idim in range(discr.ambient_dim)])
 
         # construct tree
@@ -515,8 +514,8 @@ def build_neighbor_list(discr, srcindices, srcranges, pxycenters, pxyradii,
         if isinstance(pxyradii, cl.array.Array):
             pxyradii = pxyradii.get(queue)
 
-        nbrindices = np.empty(srcranges.shape[0] - 1, dtype=np.object)
-        for iproxy in range(srcranges.shape[0] - 1):
+        nbrindices = np.empty(indices.nblocks, dtype=np.object)
+        for iproxy in range(indices.nblocks):
             # get list of boxes intersecting the current ball
             istart = query.leaves_near_ball_starts[iproxy]
             iend = query.leaves_near_ball_starts[iproxy + 1]
@@ -535,44 +534,42 @@ def build_neighbor_list(discr, srcindices, srcranges, pxycenters, pxyradii,
             center = pxycenters[:, iproxy].reshape(-1, 1)
             radius = pxyradii[iproxy]
             mask = (la.norm(nodes - center, axis=0) < radius) & \
-                   ((isources < srcranges[iproxy]) |
-                    (srcranges[iproxy + 1] <= isources))
+                   ((isources < indices.ranges[iproxy]) |
+                    (indices.ranges[iproxy + 1] <= isources))
 
-            nbrindices[iproxy] = srcindices[isources[mask]]
+            nbrindices[iproxy] = indices.indices[isources[mask]]
 
         nbrranges = to_device(queue,
                 np.cumsum([0] + [n.shape[0] for n in nbrindices]))
         nbrindices = to_device(queue, np.hstack(nbrindices))
 
-    return nbrindices, nbrranges
+        return BlockIndexRanges(discr.cl_context,
+                                nbrindices.with_queue(None),
+                                nbrranges.with_queue(None))
 
 
-def build_skeleton_list(source, srcindices, srcranges, **kwargs):
-    """Generate sets of skeleton points for each given range of indices
-    in the :attr:`source` discretization. Skeleton points are meant to
-    model the interactions of a set of points. They are composed of two
-    parts:
+def gather_block_interaction_points(source, indices,
+                                    ratio=None,
+                                    nproxy=None,
+                                    max_nodes_in_box=None):
+    """Generate sets of interaction points for each given range of indices
+    in the :attr:`source` discretization. For each input range of indices,
+    the corresponding output range of points is consists of:
 
-    - a set of proxy points (or balls) around a given range, which
-      models farfield interactions.
+    - a set of proxy points (or balls) around the range, which
+      model farfield interactions. These are constructed using
+      :class:`ProxyGenerator`.
 
     - a set of neighboring points that are inside the proxy balls, but
       do not belong to the given range, which model nearby interactions.
+      These are constructed with :meth:`gather_block_neighbor_points`.
 
     :arg source: a :class:`pytential.qbx.QBXLayerPotentialSource`.
-    :arg srcindices: a :class:`pyopencl.array.Array` of indices for points
-        in :attr:`source`.
-    :arg srcranges: a :class:`pyopencl.array.Array` describing the ranges
-        from :attr:`srcindices` around which to build proxy points. For each
-        range, this builds a ball of proxy points centered
-        at the center of mass of the points in the range with a radius
-        defined by :attr:`ratio`.
-    :arg kwargs: additional arguments passed to :class:`ProxyGenerator`
-        or :func:`build_neighbor_list`.
-
-    :returns: a tuple `(skeletons, sklranges)`, where each value is a
+    :arg indices: a :class:`sumpy.tools.BlockIndexRanges`.
+
+    :return: a tuple `(nodes, ranges)`, where each value is a
         :class:`pyopencl.array.Array`. For a range :math:`i`, we can
-        get the slice using `skeletons[sklranges[i]:sklranges[i + 1]]`.
+        get the slice using `nodes[ranges[i]:ranges[i + 1]]`.
     """
 
     @memoize
@@ -593,15 +590,14 @@ def build_skeleton_list(source, srcindices, srcranges, **kwargs):
                 <> ngbend = nbrranges[irange + 1]
                 <> nngbblock = ngbend - ngbstart
 
-                <> sklstart = pxyranges[irange] + nbrranges[irange]
-                skeletons[idim, sklstart + ipxy] = \
+                <> istart = pxyranges[irange] + nbrranges[irange]
+                nodes[idim, istart + ipxy] = \
                     proxies[idim, pxystart + ipxy] \
                     {id_prefix=write_pxy,nosync=write_ngb}
-                skeletons[idim, sklstart + npxyblock + ingb] = \
+                nodes[idim, istart + npxyblock + ingb] = \
                     sources[idim, nbrindices[ngbstart + ingb]] \
                     {id_prefix=write_ngb,nosync=write_pxy}
-                sklranges[irange + 1] = sklranges[irange] + \
-                        npxyblock + nngbblock
+                ranges[irange + 1] = ranges[irange] + npxyblock + nngbblock
             end
             """,
             [
@@ -611,14 +607,14 @@ def build_skeleton_list(source, srcindices, srcranges, **kwargs):
                     shape=(source.ambient_dim, "nproxies"), dim_tags="sep,C"),
                 lp.GlobalArg("nbrindices", None,
                     shape="nnbrindices"),
-                lp.GlobalArg("skeletons", None,
+                lp.GlobalArg("nodes", None,
                     shape=(source.ambient_dim, "nproxies + nnbrindices")),
                 lp.ValueArg("nsources", np.int),
                 lp.ValueArg("nproxies", np.int),
                 lp.ValueArg("nnbrindices", np.int),
                 "..."
             ],
-            name="concat_skl",
+            name="concat_proxy_and_neighbors",
             default_offset=lp.auto,
             silenced_warnings="write_race(write_*)",
             fixed_parameters=dict(dim=source.ambient_dim),
@@ -630,23 +626,23 @@ def build_skeleton_list(source, srcindices, srcranges, **kwargs):
         return loopy_knl
 
     with cl.CommandQueue(source.cl_context) as queue:
-        proxy = ProxyGenerator(source, **kwargs)
-        proxies, pxyranges, pxycenters, pxyradii = \
-                proxy(queue, srcindices, srcranges)
+        generator = ProxyGenerator(source, ratio=ratio, nproxy=nproxy)
+        proxies, pxyranges, pxycenters, pxyradii = generator(queue, indices)
 
-        nbrindices, nbrranges = build_neighbor_list(source.density_discr,
-                srcindices, srcranges, pxycenters, pxyradii, **kwargs)
+        neighbors = gather_block_neighbor_points(source.density_discr,
+                indices, pxycenters, pxyradii,
+                max_nodes_in_box=max_nodes_in_box)
 
-        sklranges = cl.array.zeros(queue, srcranges.shape, dtype=np.int)
-        _, (skeletons, sklranges) = knl()(queue,
+        ranges = cl.array.zeros(queue, indices.nblocks + 1, dtype=np.int)
+        _, (nodes, ranges) = knl()(queue,
                 sources=source.density_discr.nodes(),
                 proxies=proxies,
                 pxyranges=pxyranges,
-                nbrindices=nbrindices,
-                nbrranges=nbrranges,
-                sklranges=sklranges)
+                nbrindices=neighbors.indices,
+                nbrranges=neighbors.ranges,
+                ranges=ranges)
 
-    return skeletons, sklranges
+        return nodes.with_queue(None), ranges.with_queue(None)
 
 # }}}
 
diff --git a/requirements.txt b/requirements.txt
index 8925c34e..9cd4ee99 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -7,5 +7,5 @@ git+https://github.com/inducer/islpy
 git+https://github.com/inducer/loopy
 git+https://gitlab.tiker.net/inducer/boxtree
 git+https://github.com/inducer/meshmode
-git+https://gitlab.tiker.net/inducer/sumpy
+git+https://gitlab.tiker.net/fik2/sumpy@block-index-additions
 git+https://github.com/inducer/pyfmmlib
diff --git a/test/test_linalg_proxy.py b/test/test_linalg_proxy.py
index 6345c775..a9d628f4 100644
--- a/test/test_linalg_proxy.py
+++ b/test/test_linalg_proxy.py
@@ -31,6 +31,7 @@ import numpy.linalg as la
 import pyopencl as cl
 from pyopencl.array import to_device
 
+from sumpy.tools import BlockIndexRanges
 from meshmode.mesh.generation import ( # noqa
         ellipse, NArmedStarfish, generate_torus, make_curve_mesh)
 
@@ -75,11 +76,11 @@ def _build_qbx_discr(queue,
     return qbx
 
 
-def _build_block_index(queue, discr,
-        nblks=10,
-        factor=1.0,
-        method='elements',
-        use_tree=True):
+def _build_block_index(discr,
+                       nblks=10,
+                       factor=1.0,
+                       method='elements',
+                       use_tree=True):
 
     from pytential.linalg.proxy import (
             partition_by_nodes, partition_by_elements)
@@ -94,38 +95,46 @@ def _build_block_index(queue, discr,
     max_particles_in_box = nnodes // nblks
 
     # create index ranges
-    if method == "nodes":
-        indices, ranges = partition_by_nodes(queue, discr,
-            use_tree=use_tree, max_nodes_in_box=max_particles_in_box)
-    elif method == "elements":
-        indices, ranges = partition_by_elements(queue, discr,
-            use_tree=use_tree, max_elements_in_box=max_particles_in_box)
+    if method == 'nodes':
+        indices = partition_by_nodes(discr,
+                                     use_tree=use_tree,
+                                     max_nodes_in_box=max_particles_in_box)
+    elif method == 'elements':
+        indices = partition_by_elements(discr,
+                                        use_tree=use_tree,
+                                        max_elements_in_box=max_particles_in_box)
     else:
-        raise ValueError("unknown method: {}".format(method))
+        raise ValueError('unknown method: {}'.format(method))
 
     # randomly pick a subset of points
     if abs(factor - 1.0) > 1.0e-14:
-        indices = indices.get(queue)
-        ranges = ranges.get(queue)
+        with cl.CommandQueue(discr.cl_context) as queue:
+            indices = indices.get(queue)
+
+            indices_ = np.empty(indices.nblocks, dtype=np.object)
+            for i in range(indices.nblocks):
+                iidx = indices.block_indices(i)
+                isize = int(factor * len(iidx))
+                isize = max(1, min(isize, len(iidx)))
 
-        indices_ = np.empty(ranges.shape[0] - 1, dtype=np.object)
-        for i in range(ranges.shape[0] - 1):
-            iidx = indices[np.s_[ranges[i]:ranges[i + 1]]]
-            isize = int(factor * len(iidx))
-            isize = max(1, min(isize, len(iidx)))
+                indices_[i] = np.sort(
+                        np.random.choice(iidx, size=isize, replace=False))
 
-            indices_[i] = np.sort(
-                    np.random.choice(iidx, size=isize, replace=False))
+            ranges_ = to_device(queue,
+                    np.cumsum([0] + [r.shape[0] for r in indices_]))
+            indices_ = to_device(queue, np.hstack(indices_))
 
-        ranges = to_device(queue,
-                np.cumsum([0] + [r.shape[0] for r in indices_]))
-        indices = to_device(queue, np.hstack(indices_))
+            indices = BlockIndexRanges(discr.cl_context,
+                                       indices_.with_queue(None),
+                                       ranges_.with_queue(None))
 
-    return indices, ranges
+    return indices
 
 
-def _plot_partition_indices(queue, discr, indices, ranges, **kwargs):
+def _plot_partition_indices(queue, discr, indices, **kwargs):
     import matplotlib.pyplot as pt
+    indices = indices.get(queue)
+
     args = [
         kwargs.get("method", "unknown"),
         "tree" if kwargs.get("use_tree", False) else "linear",
@@ -133,12 +142,8 @@ def _plot_partition_indices(queue, discr, indices, ranges, **kwargs):
         discr.ambient_dim
         ]
 
-    if isinstance(indices, cl.array.Array):
-        indices = indices.get()
-        ranges = ranges.get()
-
     pt.figure(figsize=(10, 8), dpi=300)
-    pt.plot(np.diff(ranges))
+    pt.plot(np.diff(indices.ranges))
     pt.savefig("test_partition_{0}_{1}_{3}d_ranges_{2}.png".format(*args))
     pt.clf()
 
@@ -147,10 +152,10 @@ def _plot_partition_indices(queue, discr, indices, ranges, **kwargs):
 
         pt.figure(figsize=(10, 8), dpi=300)
 
-        if indices.shape[0] != discr.nnodes:
+        if indices.indices.shape[0] != discr.nnodes:
             pt.plot(sources[0], sources[1], 'ko', alpha=0.5)
-        for i in range(ranges.shape[0] - 1):
-            isrc = indices[np.s_[ranges[i]:ranges[i + 1]]]
+        for i in range(indices.nblocks):
+            isrc = indices.block_indices(i)
             pt.plot(sources[0][isrc], sources[1][isrc], 'o')
 
         pt.xlim([-1.5, 1.5])
@@ -167,8 +172,8 @@ def _plot_partition_indices(queue, discr, indices, ranges, **kwargs):
         from meshmode.discretization.visualization import make_visualizer
         marker = -42.0 * np.ones(discr.nnodes)
 
-        for i in range(ranges.shape[0] - 1):
-            isrc = indices[np.s_[ranges[i]:ranges[i + 1]]]
+        for i in range(indices.nblocks):
+            isrc = indices.block_indices(i)
             marker[isrc] = 10.0 * (i + 1.0)
 
         vis = make_visualizer(queue, discr, 10)
@@ -190,7 +195,7 @@ def test_partition_points(ctx_factory, method, use_tree, ndim, visualize=False):
     queue = cl.CommandQueue(ctx)
 
     qbx = _build_qbx_discr(queue, ndim=ndim)
-    srcindices, srcranges = _build_block_index(queue, qbx.density_discr,
+    srcindices = _build_block_index(qbx.density_discr,
             method=method, use_tree=use_tree, factor=0.6)
 
 
@@ -201,35 +206,32 @@ def test_partition_coarse(ctx_factory, use_tree, ndim, visualize=False):
     queue = cl.CommandQueue(ctx)
 
     qbx = _build_qbx_discr(queue, ndim=ndim)
-    srcindices, srcranges = _build_block_index(queue, qbx.density_discr,
+    srcindices = _build_block_index(qbx.density_discr,
             method="elements", use_tree=use_tree)
 
     if visualize:
         discr = qbx.resampler.from_discr
-        _plot_partition_indices(queue, discr, srcindices, srcranges,
+        _plot_partition_indices(queue, discr, srcindices,
                 method="elements", use_tree=use_tree, pid="stage1")
 
     from pytential.linalg.proxy import partition_from_coarse
     resampler = qbx.direct_resampler
 
     t_start = time.time()
-    srcindices_, srcranges_ = \
-        partition_from_coarse(queue, resampler, srcindices, srcranges)
+    srcindices_ = partition_from_coarse(resampler, srcindices)
     t_end = time.time()
     if visualize:
         print('Time: {:.5f}s'.format(t_end - t_start))
 
-    srcindices = srcindices.get()
-    srcranges = srcranges.get()
-    srcindices_ = srcindices_.get()
-    srcranges_ = srcranges_.get()
+    srcindices = srcindices.get(queue)
+    srcindices_ = srcindices_.get(queue)
 
     sources = resampler.from_discr.nodes().get(queue)
     sources_ = resampler.to_discr.nodes().get(queue)
 
-    for i in range(srcranges.shape[0] - 1):
-        isrc = srcindices[np.s_[srcranges[i]:srcranges[i + 1]]]
-        isrc_ = srcindices_[np.s_[srcranges_[i]:srcranges_[i + 1]]]
+    for i in range(srcindices.nblocks):
+        isrc = srcindices.block_indices(i)
+        isrc_ = srcindices_.block_indices(i)
 
         for j in range(ndim):
             assert np.min(sources_[j][isrc_]) <= np.min(sources[j][isrc])
@@ -237,7 +239,7 @@ def test_partition_coarse(ctx_factory, use_tree, ndim, visualize=False):
 
     if visualize:
         discr = resampler.to_discr
-        _plot_partition_indices(queue, discr, srcindices_, srcranges_,
+        _plot_partition_indices(queue, discr, srcindices_,
                 method="elements", use_tree=use_tree, pid="stage2")
 
 
@@ -248,29 +250,25 @@ def test_proxy_generator(ctx_factory, ndim, factor, visualize=False):
     queue = cl.CommandQueue(ctx)
 
     qbx = _build_qbx_discr(queue, ndim=ndim)
-    srcindices, srcranges = _build_block_index(queue, qbx.density_discr,
+    srcindices = _build_block_index(qbx.density_discr,
             method='nodes', factor=factor)
 
     from pytential.linalg.proxy import ProxyGenerator
     generator = ProxyGenerator(qbx, ratio=1.1)
-    proxies, pxyranges, pxycenters, pxyradii = \
-        generator(queue, srcindices, srcranges)
+    proxies, pxyranges, pxycenters, pxyradii = generator(queue, srcindices)
 
     proxies = np.vstack([p.get() for p in proxies])
     pxyranges = pxyranges.get()
     pxycenters = np.vstack([c.get() for c in pxycenters])
     pxyradii = pxyradii.get()
 
-    for i in range(srcranges.shape[0] - 1):
+    for i in range(srcindices.nblocks):
         ipxy = np.s_[pxyranges[i]:pxyranges[i + 1]]
 
         r = la.norm(proxies[:, ipxy] - pxycenters[:, i].reshape(-1, 1), axis=0)
         assert np.allclose(r - pxyradii[i], 0.0, atol=1.0e-14)
 
-    if visualize:
-        srcindices = srcindices.get()
-        srcranges = srcranges.get()
-
+    srcindices = srcindices.get(queue)
     if visualize:
         if qbx.ambient_dim == 2:
             import matplotlib.pyplot as pt
@@ -283,8 +281,8 @@ def test_proxy_generator(ctx_factory, ndim, factor, visualize=False):
             ce = np.vstack([c.get(queue) for c in ce])
             r = qbx._expansion_radii("nsources").get(queue)
 
-            for i in range(srcranges.shape[0] - 1):
-                isrc = srcindices[np.s_[srcranges[i]:srcranges[i + 1]]]
+            for i in range(srcindices.nblocks):
+                isrc = srcindices.block_indices(i)
                 ipxy = np.s_[pxyranges[i]:pxyranges[i + 1]]
 
                 pt.figure(figsize=(10, 8))
@@ -297,7 +295,8 @@ def test_proxy_generator(ctx_factory, ndim, factor, visualize=False):
 
                 pt.plot(density_nodes[0], density_nodes[1],
                         'ko', ms=2.0, alpha=0.5)
-                pt.plot(density_nodes[0, srcindices], density_nodes[1, srcindices],
+                pt.plot(density_nodes[0, srcindices.indices],
+                        density_nodes[1, srcindices.indices],
                         'o', ms=2.0)
                 pt.plot(density_nodes[0, isrc], density_nodes[1, isrc],
                         'o', ms=2.0)
@@ -317,7 +316,7 @@ def test_proxy_generator(ctx_factory, ndim, factor, visualize=False):
             from meshmode.discretization.poly_element import \
                 InterpolatoryQuadratureSimplexGroupFactory
 
-            for i in range(srcranges.shape[0] - 1):
+            for i in range(srcindices.nblocks):
                 mesh = affine_map(generator.ref_mesh,
                     A=(pxyradii[i] * np.eye(ndim)),
                     b=pxycenters[:, i].reshape(-1))
@@ -335,57 +334,58 @@ def test_proxy_generator(ctx_factory, ndim, factor, visualize=False):
 
 @pytest.mark.parametrize("ndim", [2, 3])
 @pytest.mark.parametrize("factor", [1.0, 0.6])
-def test_area_query(ctx_factory, ndim, factor, visualize=False):
+def test_interaction_points(ctx_factory, ndim, factor, visualize=False):
     ctx = ctx_factory()
     queue = cl.CommandQueue(ctx)
 
     qbx = _build_qbx_discr(queue, ndim=ndim)
-    srcindices, srcranges = _build_block_index(queue, qbx.density_discr,
+    srcindices = _build_block_index(qbx.density_discr,
             method='nodes', factor=factor)
 
     # generate proxy points
     from pytential.linalg.proxy import ProxyGenerator
     generator = ProxyGenerator(qbx)
-    _, _, pxycenters, pxyradii = generator(queue, srcindices, srcranges)
+    _, _, pxycenters, pxyradii = generator(queue, srcindices)
 
-    from pytential.linalg.proxy import build_neighbor_list, build_skeleton_list
-    nbrindices, nbrranges = build_neighbor_list(qbx.density_discr,
-            srcindices, srcranges, pxycenters, pxyradii)
-    skeletons, sklranges = build_skeleton_list(qbx, srcindices, srcranges)
+    from pytential.linalg.proxy import (  # noqa
+            gather_block_neighbor_points,
+            gather_block_interaction_points)
+    nbrindices = gather_block_neighbor_points(qbx.density_discr,
+            srcindices, pxycenters, pxyradii)
+    nodes, ranges = gather_block_interaction_points(qbx, srcindices)
 
-    srcindices = srcindices.get()
-    srcranges = srcranges.get()
-    nbrindices = nbrindices.get()
-    nbrranges = nbrranges.get()
+    srcindices = srcindices.get(queue)
+    nbrindices = nbrindices.get(queue)
 
-    for i in range(srcranges.shape[0] - 1):
-        isrc = np.s_[srcranges[i]:srcranges[i + 1]]
-        inbr = np.s_[nbrranges[i]:nbrranges[i + 1]]
+    for i in range(srcindices.nblocks):
+        isrc = srcindices.block_indices(i)
+        inbr = nbrindices.block_indices(i)
 
-        assert not np.any(np.isin(nbrindices[inbr], srcindices[isrc]))
+        assert not np.any(np.isin(inbr, isrc))
 
     if visualize:
         if ndim == 2:
             import matplotlib.pyplot as pt
             density_nodes = qbx.density_discr.nodes().get(queue)
-            skeletons = skeletons.get(queue)
-            sklranges = sklranges.get(queue)
+            nodes = nodes.get(queue)
+            ranges = ranges.get(queue)
 
-            for i in range(srcranges.shape[0] - 1):
-                isrc = srcindices[np.s_[srcranges[i]:srcranges[i + 1]]]
-                ingb = nbrindices[nbrranges[i]:nbrranges[i + 1]]
-                iskl = np.s_[sklranges[i]:sklranges[i + 1]]
+            for i in range(srcindices.nblocks):
+                isrc = srcindices.block_indices(i)
+                inbr = nbrindices.block_indices(i)
+                iall = np.s_[ranges[i]:ranges[i + 1]]
 
                 pt.figure(figsize=(10, 8))
                 pt.plot(density_nodes[0], density_nodes[1],
                         'ko', ms=2.0, alpha=0.5)
-                pt.plot(density_nodes[0, srcindices], density_nodes[1, srcindices],
+                pt.plot(density_nodes[0, srcindices.indices],
+                        density_nodes[1, srcindices.indices],
                         'o', ms=2.0)
                 pt.plot(density_nodes[0, isrc], density_nodes[1, isrc],
                         'o', ms=2.0)
-                pt.plot(density_nodes[0, ingb], density_nodes[1, ingb],
+                pt.plot(density_nodes[0, inbr], density_nodes[1, inbr],
                         'o', ms=2.0)
-                pt.plot(skeletons[0, iskl], skeletons[1, iskl],
+                pt.plot(nodes[0, iall], nodes[1, iall],
                         'x', ms=2.0)
                 pt.xlim([-1.5, 1.5])
                 pt.ylim([-1.5, 1.5])
@@ -397,16 +397,16 @@ def test_area_query(ctx_factory, ndim, factor, visualize=False):
             from meshmode.discretization.visualization import make_visualizer
             marker = np.empty(qbx.density_discr.nnodes)
 
-            for i in range(srcranges.shape[0] - 1):
-                isrc = srcindices[np.s_[srcranges[i]:srcranges[i + 1]]]
-                ingb = nbrindices[nbrranges[i]:nbrranges[i + 1]]
+            for i in range(srcindices.nblocks):
+                isrc = srcindices.block_indices(i)
+                inbr = nbrindices.block_indices(i)
 
                 # TODO: some way to turn off some of the interpolations
                 # would help visualize this better.
                 marker.fill(0.0)
                 marker[srcindices] = 0.0
                 marker[isrc] = -42.0
-                marker[ingb] = +42.0
+                marker[inbr] = +42.0
                 marker_dev = cl.array.to_device(queue, marker)
 
                 vis = make_visualizer(queue, qbx.density_discr, 10)
-- 
GitLab


From 64836e16fddcf0153651b9c97908288fd83d0469 Mon Sep 17 00:00:00 2001
From: Alex Fikl <alexfikl@gmail.com>
Date: Wed, 27 Jun 2018 21:19:47 -0500
Subject: [PATCH 16/25] tests: fix visualization errors

---
 test/test_linalg_proxy.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/test/test_linalg_proxy.py b/test/test_linalg_proxy.py
index a9d628f4..fc2b0653 100644
--- a/test/test_linalg_proxy.py
+++ b/test/test_linalg_proxy.py
@@ -190,7 +190,7 @@ def _plot_partition_indices(queue, discr, indices, **kwargs):
 @pytest.mark.parametrize("method", ["nodes", "elements"])
 @pytest.mark.parametrize("use_tree", [True, False])
 @pytest.mark.parametrize("ndim", [2, 3])
-def test_partition_points(ctx_factory, method, use_tree, ndim, visualize=False):
+def test_partition_points(ctx_factory, method, use_tree, ndim, visualize=True):
     ctx = ctx_factory()
     queue = cl.CommandQueue(ctx)
 
@@ -201,7 +201,7 @@ def test_partition_points(ctx_factory, method, use_tree, ndim, visualize=False):
 
 @pytest.mark.parametrize("use_tree", [True, False])
 @pytest.mark.parametrize("ndim", [2, 3])
-def test_partition_coarse(ctx_factory, use_tree, ndim, visualize=False):
+def test_partition_coarse(ctx_factory, use_tree, ndim, visualize=True):
     ctx = ctx_factory()
     queue = cl.CommandQueue(ctx)
 
@@ -245,7 +245,7 @@ def test_partition_coarse(ctx_factory, use_tree, ndim, visualize=False):
 
 @pytest.mark.parametrize("ndim", [2, 3])
 @pytest.mark.parametrize("factor", [1.0, 0.6])
-def test_proxy_generator(ctx_factory, ndim, factor, visualize=False):
+def test_proxy_generator(ctx_factory, ndim, factor, visualize=True):
     ctx = ctx_factory()
     queue = cl.CommandQueue(ctx)
 
@@ -334,7 +334,7 @@ def test_proxy_generator(ctx_factory, ndim, factor, visualize=False):
 
 @pytest.mark.parametrize("ndim", [2, 3])
 @pytest.mark.parametrize("factor", [1.0, 0.6])
-def test_interaction_points(ctx_factory, ndim, factor, visualize=False):
+def test_interaction_points(ctx_factory, ndim, factor, visualize=True):
     ctx = ctx_factory()
     queue = cl.CommandQueue(ctx)
 
@@ -404,7 +404,7 @@ def test_interaction_points(ctx_factory, ndim, factor, visualize=False):
                 # TODO: some way to turn off some of the interpolations
                 # would help visualize this better.
                 marker.fill(0.0)
-                marker[srcindices] = 0.0
+                marker[srcindices.indices] = 0.0
                 marker[isrc] = -42.0
                 marker[inbr] = +42.0
                 marker_dev = cl.array.to_device(queue, marker)
-- 
GitLab


From ea10f0756803a470ba53b6ce01b2530fbb99b88d Mon Sep 17 00:00:00 2001
From: Alex Fikl <alexfikl@gmail.com>
Date: Wed, 27 Jun 2018 21:25:06 -0500
Subject: [PATCH 17/25] flake8 fixes

---
 pytential/linalg/proxy.py | 4 ++--
 test/test_linalg_proxy.py | 6 ++++--
 2 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/pytential/linalg/proxy.py b/pytential/linalg/proxy.py
index 6a1d4cfe..bf2fbca5 100644
--- a/pytential/linalg/proxy.py
+++ b/pytential/linalg/proxy.py
@@ -316,7 +316,7 @@ class ProxyGenerator(object):
     .. automethod:: __call__
     """
 
-    def __init__(self, source, nproxy=None, ratio=None, **kwargs):
+    def __init__(self, source, nproxy=None, ratio=None):
         self.source = source
         self.ambient_dim = source.density_discr.ambient_dim
         self.ratio = 1.1 if ratio is None else ratio
@@ -461,7 +461,7 @@ class ProxyGenerator(object):
 
 
 def gather_block_neighbor_points(discr, indices, pxycenters, pxyradii,
-                                 max_nodes_in_box=None, **kwargs):
+                                 max_nodes_in_box=None):
     """Generate a set of neighboring points for each range of points in
     :attr:`discr`. Neighboring points of a range :math:`i` are defined
     as all the points inside the proxy ball :math:`i` that do not also
diff --git a/test/test_linalg_proxy.py b/test/test_linalg_proxy.py
index fc2b0653..05fb8949 100644
--- a/test/test_linalg_proxy.py
+++ b/test/test_linalg_proxy.py
@@ -195,8 +195,10 @@ def test_partition_points(ctx_factory, method, use_tree, ndim, visualize=True):
     queue = cl.CommandQueue(ctx)
 
     qbx = _build_qbx_discr(queue, ndim=ndim)
-    srcindices = _build_block_index(qbx.density_discr,
-            method=method, use_tree=use_tree, factor=0.6)
+    _build_block_index(qbx.density_discr,
+                       method=method,
+                       use_tree=use_tree,
+                       factor=0.6)
 
 
 @pytest.mark.parametrize("use_tree", [True, False])
-- 
GitLab


From 7a268fe3b5622faf9a2bcc43a342294f4805635a Mon Sep 17 00:00:00 2001
From: Alex Fikl <alexfikl@gmail.com>
Date: Wed, 27 Jun 2018 21:26:48 -0500
Subject: [PATCH 18/25] fix typo in requirements.txt

---
 requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/requirements.txt b/requirements.txt
index 9cd4ee99..a1a00691 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -7,5 +7,5 @@ git+https://github.com/inducer/islpy
 git+https://github.com/inducer/loopy
 git+https://gitlab.tiker.net/inducer/boxtree
 git+https://github.com/inducer/meshmode
-git+https://gitlab.tiker.net/fik2/sumpy@block-index-additions
+git+https://gitlab.tiker.net/fikl2/sumpy@block-index-additions
 git+https://github.com/inducer/pyfmmlib
-- 
GitLab


From 07f11df14845e104345487a31a8d7ca2a26575bd Mon Sep 17 00:00:00 2001
From: Alex Fikl <alexfikl@gmail.com>
Date: Thu, 28 Jun 2018 08:14:01 -0500
Subject: [PATCH 19/25] test: turn off visualization by default

---
 pytential/linalg/proxy.py | 8 ++++----
 test/test_linalg_proxy.py | 8 ++++----
 2 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/pytential/linalg/proxy.py b/pytential/linalg/proxy.py
index bf2fbca5..32f1633c 100644
--- a/pytential/linalg/proxy.py
+++ b/pytential/linalg/proxy.py
@@ -68,7 +68,7 @@ def _element_node_range(group, ielement):
 def partition_by_nodes(discr,
                        use_tree=True,
                        max_nodes_in_box=None):
-    """Generate clusters / ranges of nodes. The partition is created at the
+    """Generate equally sized ranges of nodes. The partition is created at the
     lowest level of granularity, i.e. nodes. This results in balanced ranges
     of points, but will split elements across different ranges.
 
@@ -125,7 +125,7 @@ def partition_by_nodes(discr,
 def partition_by_elements(discr,
                           use_tree=True,
                           max_elements_in_box=None):
-    """Generate clusters / ranges of points. The partition is created at the
+    """Generate equally sized ranges of points. The partition is created at the
     element level, so that all the nodes belonging to an element belong to
     the same range. This can result in slightly larger differences in size
     between the ranges, but can be very useful when the individual partitions
@@ -210,8 +210,8 @@ def partition_from_coarse(resampler, from_indices):
 
     The new partition will have the same number of ranges as the old partition.
     The nodes inside each range in the new partition are all the nodes in
-    :attr:`resampler.to_discr` that belong to the same region as the nodes
-    in the same range from :attr:`resampler.from_discr`.
+    :attr:`resampler.to_discr` that were refined from elements in the same
+    range from :attr:`resampler.from_discr`.
 
     :arg resampler: a
         :class:`meshmode.discretization.connection.DirectDiscretizationConnection`.
diff --git a/test/test_linalg_proxy.py b/test/test_linalg_proxy.py
index 05fb8949..ccf0c3e1 100644
--- a/test/test_linalg_proxy.py
+++ b/test/test_linalg_proxy.py
@@ -190,7 +190,7 @@ def _plot_partition_indices(queue, discr, indices, **kwargs):
 @pytest.mark.parametrize("method", ["nodes", "elements"])
 @pytest.mark.parametrize("use_tree", [True, False])
 @pytest.mark.parametrize("ndim", [2, 3])
-def test_partition_points(ctx_factory, method, use_tree, ndim, visualize=True):
+def test_partition_points(ctx_factory, method, use_tree, ndim, visualize=False):
     ctx = ctx_factory()
     queue = cl.CommandQueue(ctx)
 
@@ -203,7 +203,7 @@ def test_partition_points(ctx_factory, method, use_tree, ndim, visualize=True):
 
 @pytest.mark.parametrize("use_tree", [True, False])
 @pytest.mark.parametrize("ndim", [2, 3])
-def test_partition_coarse(ctx_factory, use_tree, ndim, visualize=True):
+def test_partition_coarse(ctx_factory, use_tree, ndim, visualize=False):
     ctx = ctx_factory()
     queue = cl.CommandQueue(ctx)
 
@@ -247,7 +247,7 @@ def test_partition_coarse(ctx_factory, use_tree, ndim, visualize=True):
 
 @pytest.mark.parametrize("ndim", [2, 3])
 @pytest.mark.parametrize("factor", [1.0, 0.6])
-def test_proxy_generator(ctx_factory, ndim, factor, visualize=True):
+def test_proxy_generator(ctx_factory, ndim, factor, visualize=False):
     ctx = ctx_factory()
     queue = cl.CommandQueue(ctx)
 
@@ -336,7 +336,7 @@ def test_proxy_generator(ctx_factory, ndim, factor, visualize=True):
 
 @pytest.mark.parametrize("ndim", [2, 3])
 @pytest.mark.parametrize("factor", [1.0, 0.6])
-def test_interaction_points(ctx_factory, ndim, factor, visualize=True):
+def test_interaction_points(ctx_factory, ndim, factor, visualize=False):
     ctx = ctx_factory()
     queue = cl.CommandQueue(ctx)
 
-- 
GitLab


From 96e3bfb6f717fd92981502545b2026269d10d6c0 Mon Sep 17 00:00:00 2001
From: Alex Fikl <alexfikl@gmail.com>
Date: Fri, 29 Jun 2018 15:06:59 -0500
Subject: [PATCH 20/25] update requirements

---
 .test-conda-env-py3-requirements.txt | 2 +-
 requirements.txt                     | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/.test-conda-env-py3-requirements.txt b/.test-conda-env-py3-requirements.txt
index c5ef1e9b..fa6c0426 100644
--- a/.test-conda-env-py3-requirements.txt
+++ b/.test-conda-env-py3-requirements.txt
@@ -1,5 +1,5 @@
 git+https://gitlab.tiker.net/inducer/boxtree
 git+https://github.com/inducer/pymbolic
 git+https://github.com/inducer/loopy
-git+https://gitlab.tiker.net/fikl2/sumpy@block-index-additions
+git+https://gitlab.tiker.net/inducer/sumpy
 git+https://github.com/inducer/meshmode
diff --git a/requirements.txt b/requirements.txt
index a1a00691..8925c34e 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -7,5 +7,5 @@ git+https://github.com/inducer/islpy
 git+https://github.com/inducer/loopy
 git+https://gitlab.tiker.net/inducer/boxtree
 git+https://github.com/inducer/meshmode
-git+https://gitlab.tiker.net/fikl2/sumpy@block-index-additions
+git+https://gitlab.tiker.net/inducer/sumpy
 git+https://github.com/inducer/pyfmmlib
-- 
GitLab


From b802994c02ddc64f3c71d85d5fefae6744656c6f Mon Sep 17 00:00:00 2001
From: Alex Fikl <alexfikl@gmail.com>
Date: Fri, 29 Jun 2018 19:07:31 -0500
Subject: [PATCH 21/25] direct-solver: change reference proxy ball generation

---
 pytential/linalg/proxy.py | 92 +++++++++++++++++++++++++++------------
 test/test_linalg_proxy.py |  6 ++-
 2 files changed, 70 insertions(+), 28 deletions(-)

diff --git a/pytential/linalg/proxy.py b/pytential/linalg/proxy.py
index 32f1633c..36d3a9cd 100644
--- a/pytential/linalg/proxy.py
+++ b/pytential/linalg/proxy.py
@@ -277,10 +277,57 @@ def partition_from_coarse(resampler, from_indices):
 
 # {{{ proxy point generator
 
+def _generate_unit_sphere(ambient_dim, approx_npoints):
+    """Generate uniform points on a unit sphere.
+
+    :arg ambient_dim: dimension of the ambient space.
+    :arg approx_npoints: approximate number of points to generate. If the
+        ambient space is 3D, this will not generate the exact number of points.
+    :return: array of shape ``(ambient_dim, npoints)``, where ``npoints``
+        will not generally be the same as `approx_npoints`.
+    """
+
+    if ambient_dim == 2:
+        t = np.linspace(0.0, 2.0 * np.pi, approx_npoints)
+        points = np.vstack([np.cos(t), np.sin(t)])
+    elif ambient_dim == 3:
+        # https://www.cmu.edu/biolphys/deserno/pdf/sphere_equi.pdf
+        a = 4.0 * np.pi / approx_npoints
+        m_theta = int(np.round(np.pi / np.sqrt(a)))
+        d_theta = np.pi / m_theta
+        d_phi = a / d_theta
+
+        points = []
+        for m in range(m_theta):
+            theta = np.pi * (m + 0.5) / m_theta
+            m_phi = int(np.round(2.0 * np.pi * np.sin(theta) / d_phi))
+
+            for n in range(m_phi):
+                phi = 2.0 * np.pi * n / m_phi
+                points.append(np.array([np.sin(theta) * np.cos(phi),
+                                        np.sin(theta) * np.sin(phi),
+                                        np.cos(theta)]))
+
+        for i in range(ambient_dim):
+            for sign in [-1, 1]:
+                pole = np.zeros(ambient_dim)
+                pole[i] = sign
+                points.append(pole)
+
+        points = np.array(points).T
+    else:
+        raise ValueError("ambient_dim > 3 not supported.")
+
+    return points
+
+
 class ProxyGenerator(object):
     r"""
-    .. attribute:: nproxy
     .. attribute:: ambient_dim
+    .. attribute:: nproxy
+
+        Approximate number of proxy points. In 2D, this is the exact
+        number of proxy points, but in 3D
     .. attribute:: source
 
         A :class:`pytential.qbx.QBXLayerPotentialSource`.
@@ -306,12 +353,11 @@ class ProxyGenerator(object):
 
             r = \theta r_{qbx}.
 
-    .. attribute:: ref_mesh
+    .. attribute:: ref_points
 
-        Reference mesh. Can be used to construct a mesh for a proxy
-        ball :math:`i` by translating it to `center[i]` and scaling by
-        `radii[i]`, as obtained by :meth:`__call__` (see
-        :meth:`meshmode.mesh.processing.affine_map`).
+        Reference points on a unit ball. Can be used to construct the points
+        around a proxy ball :math:`i` by translating them to `center[i]` and
+        scaling by `radii[i]`, as obtained by :meth:`__call__`.
 
     .. automethod:: __call__
     """
@@ -322,18 +368,8 @@ class ProxyGenerator(object):
         self.ratio = 1.1 if ratio is None else ratio
         self.nproxy = 32 if nproxy is None else nproxy
 
-        if self.ambient_dim == 2:
-            from meshmode.mesh.generation import ellipse, make_curve_mesh
-
-            self.ref_mesh = make_curve_mesh(lambda t: ellipse(1.0, t),
-                    np.linspace(0.0, 1.0, self.nproxy + 1),
-                    self.nproxy)
-        elif self.ambient_dim == 3:
-            from meshmode.mesh.generation import generate_icosphere
-
-            self.ref_mesh = generate_icosphere(1, self.nproxy)
-        else:
-            raise ValueError("unsupported ambient dimension")
+        self.ref_points = \
+                _generate_unit_sphere(self.ambient_dim, self.nproxy)
 
     @memoize_method
     def get_kernel(self):
@@ -424,6 +460,9 @@ class ProxyGenerator(object):
             `pxycenters[i]`.
         """
 
+        def _affine_map(v, A, b):
+            return np.dot(A, v) + b
+
         from pytential.qbx.utils import get_centers_on_side
 
         knl = self.get_kernel()
@@ -437,17 +476,16 @@ class ProxyGenerator(object):
         centers = centers_dev.get()
         radii = radii_dev.get()
 
-        from meshmode.mesh.processing import affine_map
         proxies = np.empty(indices.nblocks, dtype=np.object)
-
         for i in range(indices.nblocks):
-            mesh = affine_map(self.ref_mesh,
-                A=(radii[i] * np.eye(self.ambient_dim)),
-                b=centers[:, i].reshape(-1))
-            proxies[i] = mesh.vertices
-
-        pxyranges = cl.array.arange(queue, 0,
-                proxies.shape[0] * proxies[0].shape[-1] + 1, proxies[0].shape[-1],
+            proxies[i] = _affine_map(self.ref_points,
+                    A=(radii[i] * np.eye(self.ambient_dim)),
+                    b=centers[:, i].reshape(-1, 1))
+
+        pxyranges = cl.array.arange(queue,
+                0,
+                proxies.shape[0] * proxies[0].shape[1] + 1,
+                proxies[0].shape[1],
                 dtype=indices.ranges.dtype)
         proxies = make_obj_array([
             cl.array.to_device(queue, np.hstack([p[idim] for p in proxies]))
diff --git a/test/test_linalg_proxy.py b/test/test_linalg_proxy.py
index ccf0c3e1..e8a063ca 100644
--- a/test/test_linalg_proxy.py
+++ b/test/test_linalg_proxy.py
@@ -318,8 +318,12 @@ def test_proxy_generator(ctx_factory, ndim, factor, visualize=False):
             from meshmode.discretization.poly_element import \
                 InterpolatoryQuadratureSimplexGroupFactory
 
+            from meshmode.mesh.generation import generate_icosphere
+            ref_mesh = generate_icosphere(1, generator.nproxy)
+
+            # NOTE: this does not plot the actual proxy points
             for i in range(srcindices.nblocks):
-                mesh = affine_map(generator.ref_mesh,
+                mesh = affine_map(ref_mesh,
                     A=(pxyradii[i] * np.eye(ndim)),
                     b=pxycenters[:, i].reshape(-1))
 
-- 
GitLab


From 45744817acbe1b7ae4e82c23ec182877b384cb58 Mon Sep 17 00:00:00 2001
From: Alex Fikl <alexfikl@gmail.com>
Date: Fri, 29 Jun 2018 19:28:02 -0500
Subject: [PATCH 22/25] fix some doc issues

---
 pytential/linalg/proxy.py | 78 +++++++++++++++++++--------------------
 1 file changed, 39 insertions(+), 39 deletions(-)

diff --git a/pytential/linalg/proxy.py b/pytential/linalg/proxy.py
index 36d3a9cd..f7dc6e6d 100644
--- a/pytential/linalg/proxy.py
+++ b/pytential/linalg/proxy.py
@@ -45,13 +45,10 @@ Proxy Point Generation
 .. autoclass:: ProxyGenerator
 
 .. autofunction:: partition_by_nodes
-
 .. autofunction:: partition_by_elements
-
 .. autofunction:: partition_from_coarse
 
 .. autofunction:: gather_block_neighbor_points
-
 .. autofunction:: gather_block_interaction_points
 """
 
@@ -73,9 +70,9 @@ def partition_by_nodes(discr,
     of points, but will split elements across different ranges.
 
     :arg discr: a :class:`meshmode.discretization.Discretization`.
-    :arg use_tree: if `True`, node partitions are generated using a
+    :arg use_tree: if ``True``, node partitions are generated using a
         :class:`boxtree.TreeBuilder`, which leads to geometrically close
-        points to belong to the same partition. If `False`, a simple linear
+        points to belong to the same partition. If ``False``, a simple linear
         partition is constructed.
     :arg max_nodes_in_box: passed to :class:`boxtree.TreeBuilder`.
 
@@ -132,9 +129,9 @@ def partition_by_elements(discr,
     need to be resampled, integrated, etc.
 
     :arg discr: a :class:`meshmode.discretization.Discretization`.
-    :arg use_tree: if True, node partitions are generated using a
+    :arg use_tree: if ``True``, node partitions are generated using a
         :class:`boxtree.TreeBuilder`, which leads to geometrically close
-        points to belong to the same partition. If False, a simple linear
+        points to belong to the same partition. If ``False``, a simple linear
         partition is constructed.
     :arg max_elements_in_box: passed to :class:`boxtree.TreeBuilder`.
 
@@ -204,20 +201,20 @@ def partition_by_elements(discr,
 def partition_from_coarse(resampler, from_indices):
     """Generate a partition of nodes from an existing partition on a
     coarser discretization. The new partition is generated based on element
-    refinement relationships in :attr:`resampler`, so the existing partition
-    needs to be created using :func:`partition_by_element`, since we assume
-    that each range contains all the nodes in an element.
+    refinement relationships in *resampler*, so the existing partition
+    needs to be created using :func:`partition_by_elements`,
+    since we assume that each range contains all the nodes in an element.
 
     The new partition will have the same number of ranges as the old partition.
     The nodes inside each range in the new partition are all the nodes in
-    :attr:`resampler.to_discr` that were refined from elements in the same
-    range from :attr:`resampler.from_discr`.
+    *resampler.to_discr* that were refined from elements in the same
+    range from *resampler.from_discr*.
 
     :arg resampler: a
         :class:`meshmode.discretization.connection.DirectDiscretizationConnection`.
     :arg from_indices: a :class:`sumpy.tools.BlockIndexRanges`.
 
-    :return: a tuple :class:`sumpy.tools.BlockIndexRanges`.
+    :return: a :class:`sumpy.tools.BlockIndexRanges`.
     """
 
     if not hasattr(resampler, "groups"):
@@ -284,7 +281,7 @@ def _generate_unit_sphere(ambient_dim, approx_npoints):
     :arg approx_npoints: approximate number of points to generate. If the
         ambient space is 3D, this will not generate the exact number of points.
     :return: array of shape ``(ambient_dim, npoints)``, where ``npoints``
-        will not generally be the same as `approx_npoints`.
+        will not generally be the same as ``approx_npoints``.
     """
 
     if ambient_dim == 2:
@@ -326,8 +323,8 @@ class ProxyGenerator(object):
     .. attribute:: ambient_dim
     .. attribute:: nproxy
 
-        Approximate number of proxy points. In 2D, this is the exact
-        number of proxy points, but in 3D
+        Number of proxy points.
+
     .. attribute:: source
 
         A :class:`pytential.qbx.QBXLayerPotentialSource`.
@@ -356,20 +353,21 @@ class ProxyGenerator(object):
     .. attribute:: ref_points
 
         Reference points on a unit ball. Can be used to construct the points
-        around a proxy ball :math:`i` by translating them to `center[i]` and
-        scaling by `radii[i]`, as obtained by :meth:`__call__`.
+        around a proxy ball :math:`i` by translating them to ``center[i]`` and
+        scaling by ``radii[i]``, as obtained by :meth:`__call__`.
 
     .. automethod:: __call__
     """
 
-    def __init__(self, source, nproxy=None, ratio=None):
+    def __init__(self, source, approx_nproxy=None, ratio=None):
         self.source = source
         self.ambient_dim = source.density_discr.ambient_dim
         self.ratio = 1.1 if ratio is None else ratio
-        self.nproxy = 32 if nproxy is None else nproxy
 
+        approx_nproxy = 32 if approx_nproxy is None else approx_nproxy
         self.ref_points = \
-                _generate_unit_sphere(self.ambient_dim, self.nproxy)
+                _generate_unit_sphere(self.ambient_dim, approx_nproxy)
+        self.nproxy = self.ref_points.shape[1]
 
     @memoize_method
     def get_kernel(self):
@@ -426,7 +424,7 @@ class ProxyGenerator(object):
                 lp.ValueArg("nsources", np.int),
                 "..."
             ],
-            name="proxy_generator_knl",
+            name="find_proxy_radii_knl",
             assumptions="dim>=1 and nranges>=1",
             fixed_parameters=dict(dim=self.ambient_dim),
             lang_version=MOST_RECENT_LANGUAGE_VERSION)
@@ -449,15 +447,15 @@ class ProxyGenerator(object):
         :arg queue: a :class:`pyopencl.CommandQueue`.
         :arg indices: a :class:`sumpy.tools.BlockIndexRanges`.
 
-        :return: a tuple of `(proxies, pxyranges, pxycenters, pxyranges)`, where
-            each element is a :class:`pyopencl.array.Array`. The
-            sizes of the arrays are as follows: `pxycenters` is of size
-            `(2, nranges)`, `pxyradii` is of size `(nranges,)`, `pxyranges` is
-            of size `(nranges + 1,)` and `proxies` is of size
-            `(2, nranges * nproxy)`. The proxy points in a range :math:`i`
-            can be obtained by a slice `proxies[pxyranges[i]:pxyranges[i + 1]]`
-            and are all at a distance `pxyradii[i]` from the range center
-            `pxycenters[i]`.
+        :return: a tuple of ``(proxies, pxyranges, pxycenters, pxyranges)``,
+            where each element is a :class:`pyopencl.array.Array`. The
+            sizes of the arrays are as follows: ``pxycenters`` is of size
+            ``(2, nranges)``, ``pxyradii`` is of size ``(nranges,)``,
+            ``pxyranges`` is of size ``(nranges + 1,)`` and ``proxies`` is
+            of size ``(2, nranges * nproxy)``. The proxy points in a range
+            :math:`i` can be obtained by a slice
+            ``proxies[pxyranges[i]:pxyranges[i + 1]]`` and are all at a
+            distance ``pxyradii[i]`` from the range center ``pxycenters[i]``.
         """
 
         def _affine_map(v, A, b):
@@ -501,7 +499,7 @@ class ProxyGenerator(object):
 def gather_block_neighbor_points(discr, indices, pxycenters, pxyradii,
                                  max_nodes_in_box=None):
     """Generate a set of neighboring points for each range of points in
-    :attr:`discr`. Neighboring points of a range :math:`i` are defined
+    *discr*. Neighboring points of a range :math:`i` are defined
     as all the points inside the proxy ball :math:`i` that do not also
     belong to the range itself.
 
@@ -510,7 +508,7 @@ def gather_block_neighbor_points(discr, indices, pxycenters, pxyradii,
     :arg pxycenters: an array containing the center of each proxy ball.
     :arg pxyradii: an array containing the radius of each proxy ball.
 
-    :return: a tuple :class:`sumpy.tools.BlockIndexRanges`.
+    :return: a :class:`sumpy.tools.BlockIndexRanges`.
     """
 
     if max_nodes_in_box is None:
@@ -588,10 +586,10 @@ def gather_block_neighbor_points(discr, indices, pxycenters, pxyradii,
 
 def gather_block_interaction_points(source, indices,
                                     ratio=None,
-                                    nproxy=None,
+                                    approx_nproxy=None,
                                     max_nodes_in_box=None):
     """Generate sets of interaction points for each given range of indices
-    in the :attr:`source` discretization. For each input range of indices,
+    in the *source* discretization. For each input range of indices,
     the corresponding output range of points is consists of:
 
     - a set of proxy points (or balls) around the range, which
@@ -600,14 +598,14 @@ def gather_block_interaction_points(source, indices,
 
     - a set of neighboring points that are inside the proxy balls, but
       do not belong to the given range, which model nearby interactions.
-      These are constructed with :meth:`gather_block_neighbor_points`.
+      These are constructed with :func:`gather_block_neighbor_points`.
 
     :arg source: a :class:`pytential.qbx.QBXLayerPotentialSource`.
     :arg indices: a :class:`sumpy.tools.BlockIndexRanges`.
 
-    :return: a tuple `(nodes, ranges)`, where each value is a
+    :return: a tuple ``(nodes, ranges)``, where each value is a
         :class:`pyopencl.array.Array`. For a range :math:`i`, we can
-        get the slice using `nodes[ranges[i]:ranges[i + 1]]`.
+        get the slice using ``nodes[ranges[i]:ranges[i + 1]]``.
     """
 
     @memoize
@@ -664,7 +662,9 @@ def gather_block_interaction_points(source, indices,
         return loopy_knl
 
     with cl.CommandQueue(source.cl_context) as queue:
-        generator = ProxyGenerator(source, ratio=ratio, nproxy=nproxy)
+        generator = ProxyGenerator(source,
+                                   ratio=ratio,
+                                   approx_nproxy=approx_nproxy)
         proxies, pxyranges, pxycenters, pxyradii = generator(queue, indices)
 
         neighbors = gather_block_neighbor_points(source.density_discr,
-- 
GitLab


From c2b38a854fd3d1090d23de24f60e668f1e711a3d Mon Sep 17 00:00:00 2001
From: Alex Fikl <alexfikl@gmail.com>
Date: Fri, 29 Jun 2018 19:31:46 -0500
Subject: [PATCH 23/25] make nproxy a property instead

---
 pytential/linalg/proxy.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/pytential/linalg/proxy.py b/pytential/linalg/proxy.py
index f7dc6e6d..d5b869aa 100644
--- a/pytential/linalg/proxy.py
+++ b/pytential/linalg/proxy.py
@@ -367,7 +367,10 @@ class ProxyGenerator(object):
         approx_nproxy = 32 if approx_nproxy is None else approx_nproxy
         self.ref_points = \
                 _generate_unit_sphere(self.ambient_dim, approx_nproxy)
-        self.nproxy = self.ref_points.shape[1]
+
+    @property
+    def nproxy(self):
+        return self.ref_points.shape[1]
 
     @memoize_method
     def get_kernel(self):
-- 
GitLab


From 31502b931bb2981e3e316cd6ac04b1d147e3396b Mon Sep 17 00:00:00 2001
From: Alex Fikl <alexfikl@gmail.com>
Date: Fri, 29 Jun 2018 19:34:06 -0500
Subject: [PATCH 24/25] clearer docs

---
 pytential/linalg/proxy.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/pytential/linalg/proxy.py b/pytential/linalg/proxy.py
index d5b869aa..55ddcf61 100644
--- a/pytential/linalg/proxy.py
+++ b/pytential/linalg/proxy.py
@@ -323,7 +323,7 @@ class ProxyGenerator(object):
     .. attribute:: ambient_dim
     .. attribute:: nproxy
 
-        Number of proxy points.
+        Number of proxy points in a single proxy ball.
 
     .. attribute:: source
 
@@ -331,7 +331,7 @@ class ProxyGenerator(object):
 
     .. attribute:: ratio
 
-        A ratio used to compute the proxy point radius. The radius
+        A ratio used to compute the proxy ball radius. The radius
         is computed in the :math:`\ell^2` norm, resulting in a circle or
         sphere of proxy points. For QBX, we have two radii of interest
         for a set of points: the radius :math:`r_{block}` of the
@@ -353,7 +353,7 @@ class ProxyGenerator(object):
     .. attribute:: ref_points
 
         Reference points on a unit ball. Can be used to construct the points
-        around a proxy ball :math:`i` by translating them to ``center[i]`` and
+        of a proxy ball :math:`i` by translating them to ``center[i]`` and
         scaling by ``radii[i]``, as obtained by :meth:`__call__`.
 
     .. automethod:: __call__
-- 
GitLab


From 288480615365e49906164fd70e59c2fe87728d39 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Andreas=20Kl=C3=B6ckner?= <inform@tiker.net>
Date: Sat, 30 Jun 2018 01:00:21 -0400
Subject: [PATCH 25/25] Add ack for Matt's sphere sampler

---
 pytential/linalg/proxy.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/pytential/linalg/proxy.py b/pytential/linalg/proxy.py
index 55ddcf61..abb8e8de 100644
--- a/pytential/linalg/proxy.py
+++ b/pytential/linalg/proxy.py
@@ -289,6 +289,8 @@ def _generate_unit_sphere(ambient_dim, approx_npoints):
         points = np.vstack([np.cos(t), np.sin(t)])
     elif ambient_dim == 3:
         # https://www.cmu.edu/biolphys/deserno/pdf/sphere_equi.pdf
+        # code by Matt Wala from
+        # https://github.com/mattwala/gigaqbx-accuracy-experiments/blob/d56ed063ffd7843186f4fe05d2a5b5bfe6ef420c/translation_accuracy.py#L23
         a = 4.0 * np.pi / approx_npoints
         m_theta = int(np.round(np.pi / np.sqrt(a)))
         d_theta = np.pi / m_theta
-- 
GitLab