diff --git a/examples/helmholtz-dirichlet.py b/examples/helmholtz-dirichlet.py
index 4c0cd80d7ea8a8b1f37e558bbb57a85eb9c06cfb..f385215b59795187d5b435bc57b8c8088ab50ea8 100644
--- a/examples/helmholtz-dirichlet.py
+++ b/examples/helmholtz-dirichlet.py
@@ -1,4 +1,5 @@
 import numpy as np
+import numpy.linalg as la
 import pyopencl as cl
 import pyopencl.clmath  # noqa
 
@@ -7,16 +8,17 @@ from meshmode.discretization.poly_element import \
         InterpolatoryQuadratureSimplexGroupFactory
 
 from pytential import bind, sym, norm  # noqa
+from pytential.target import PointsTarget
 
 # {{{ set some constants for use below
 
-nelements = 50
-mesh_order = 3
-bdry_quad_order = 10
+nelements = 20
+bdry_quad_order = 4
+mesh_order = bdry_quad_order
+qbx_order = bdry_quad_order
 bdry_ovsmp_quad_order = 4*bdry_quad_order
-qbx_order = 4
-fmm_order = 8
-k = 15
+fmm_order = 25
+k = 25
 
 # }}}
 
@@ -31,30 +33,58 @@ def main():
     from meshmode.mesh.generation import ellipse, make_curve_mesh
     from functools import partial
 
-    mesh = make_curve_mesh(
-            partial(ellipse, 3),
-            np.linspace(0, 1, nelements+1),
-            mesh_order)
-
-    density_discr = Discretization(
+    if 0:
+        mesh = make_curve_mesh(
+                partial(ellipse, 1),
+                np.linspace(0, 1, nelements+1),
+                mesh_order)
+    else:
+        base_mesh = make_curve_mesh(
+                partial(ellipse, 1),
+                np.linspace(0, 1, nelements+1),
+                mesh_order)
+
+        from meshmode.mesh.processing import affine_map, merge_disjoint_meshes
+        nx = 5
+        ny = 5
+        dx = 2 / nx
+        meshes = [
+                affine_map(
+                    base_mesh,
+                    A=np.diag([dx*0.25, dx*0.25]),
+                    b=np.array([dx*(ix-nx/2), dx*(iy-ny/2)]))
+                for ix in range(nx)
+                for iy in range(ny)]
+
+        mesh = merge_disjoint_meshes(meshes, single_group=True)
+
+        if 0:
+            from meshmode.mesh.visualization import draw_curve
+            draw_curve(mesh)
+            import matplotlib.pyplot as plt
+            plt.show()
+
+    pre_density_discr = Discretization(
             cl_ctx, mesh,
             InterpolatoryQuadratureSimplexGroupFactory(bdry_quad_order))
 
-    from pytential.qbx import QBXLayerPotentialSource
-    qbx = QBXLayerPotentialSource(
-            density_discr, fine_order=bdry_ovsmp_quad_order, qbx_order=qbx_order,
+    from pytential.qbx import (
+            QBXLayerPotentialSource, QBXTargetAssociationFailedException)
+    qbx, _ = QBXLayerPotentialSource(
+            pre_density_discr, fine_order=bdry_ovsmp_quad_order, qbx_order=qbx_order,
             fmm_order=fmm_order
-            )
+            ).with_refinement()
+    density_discr = qbx.density_discr
 
     # {{{ describe bvp
 
-    from sumpy.kernel import HelmholtzKernel
+    from sumpy.kernel import LaplaceKernel, HelmholtzKernel
     kernel = HelmholtzKernel(2)
 
     cse = sym.cse
 
     sigma_sym = sym.var("sigma")
-    sqrt_w = sym.sqrt_jac_q_weight()
+    sqrt_w = sym.sqrt_jac_q_weight(2)
     inv_sqrt_w_sigma = cse(sigma_sym/sqrt_w)
 
     # Brakhage-Werner parameter
@@ -62,7 +92,7 @@ def main():
 
     # -1 for interior Dirichlet
     # +1 for exterior Dirichlet
-    loc_sign = -1
+    loc_sign = +1
 
     bdry_op_sym = (-loc_sign*0.5*sigma_sym
             + sqrt_w*(
@@ -76,18 +106,22 @@ def main():
 
     # {{{ fix rhs and solve
 
-    mode_nr = 3
     nodes = density_discr.nodes().with_queue(queue)
-    angle = cl.clmath.atan2(nodes[1], nodes[0])
+    k_vec = np.array([2, 1])
+    k_vec = k * k_vec / la.norm(k_vec, 2)
+
+    def u_incoming_func(x):
+        return cl.clmath.exp(
+                1j * (x[0] * k_vec[0] + x[1] * k_vec[1]))
 
-    bc = cl.clmath.cos(mode_nr*angle)
+    bc = -u_incoming_func(nodes)
 
     bvp_rhs = bind(qbx, sqrt_w*sym.var("bc"))(queue, bc=bc)
 
     from pytential.solve import gmres
     gmres_result = gmres(
             bound_op.scipy_op(queue, "sigma", dtype=np.complex128, k=k),
-            bvp_rhs, tol=1e-14, progress=True,
+            bvp_rhs, tol=1e-8, progress=True,
             stall_iterations=0,
             hard_failure=True)
 
@@ -97,22 +131,50 @@ def main():
 
     sigma = gmres_result.solution
 
+    repr_kwargs = dict(k=sym.var("k"), qbx_forced_limit=None)
     representation_sym = (
-            alpha*sym.S(kernel, inv_sqrt_w_sigma, k=sym.var("k"))
-            - sym.D(kernel, inv_sqrt_w_sigma, k=sym.var("k")))
+            alpha*sym.S(kernel, inv_sqrt_w_sigma, **repr_kwargs)
+            - sym.D(kernel, inv_sqrt_w_sigma, **repr_kwargs))
 
     from sumpy.visualization import FieldPlotter
     fplot = FieldPlotter(np.zeros(2), extent=5, npoints=1500)
-    from pytential.target import PointsTarget
-    fld_in_vol = bind(
-            (qbx, PointsTarget(fplot.points)),
-            representation_sym)(queue, sigma=sigma, k=k).get()
+
+    targets = cl.array.to_device(queue, fplot.points)
+
+    u_incoming = u_incoming_func(targets)
+
+    qbx_stick_out = qbx.copy(target_stick_out_factor=0.05)
+
+    indicator_qbx = qbx_stick_out.copy(
+            fmm_level_to_order=lambda lev: 7, qbx_order=2)
+
+    ones_density = density_discr.zeros(queue)
+    ones_density.fill(1)
+    indicator = bind(
+            (indicator_qbx, PointsTarget(targets)),
+            sym.D(LaplaceKernel(2), sym.var("sigma")))(
+            queue, sigma=ones_density).get()
+
+    try:
+        fld_in_vol = bind(
+                (qbx_stick_out, PointsTarget(targets)),
+                representation_sym)(queue, sigma=sigma, k=k).get()
+    except QBXTargetAssociationFailedException as e:
+        fplot.write_vtk_file(
+                "failed-targets.vts",
+                [
+                    ("failed", e.failed_target_flags.get(queue))
+                    ]
+                )
+        raise
 
     #fplot.show_scalar_in_mayavi(fld_in_vol.real, max_val=5)
     fplot.write_vtk_file(
             "potential.vts",
             [
-                ("potential", fld_in_vol)
+                ("potential", fld_in_vol),
+                ("indicator", indicator),
+                ("u_incoming", u_incoming.get()),
                 ]
             )
 
diff --git a/examples/layerpot.py b/examples/layerpot.py
index 11c7a23e647406433e2468d48091937ec168cce0..d08659a50135d5a71b719e1a207a055472fe5cbf 100644
--- a/examples/layerpot.py
+++ b/examples/layerpot.py
@@ -32,7 +32,7 @@ if k:
     kernel = HelmholtzKernel(2)
     kernel_kwargs = {"k": sym.var("k")}
 else:
-    kernel = LaplaceKernel()
+    kernel = LaplaceKernel(2)
     kernel_kwargs = {}
 #kernel = OneKernel()
 
@@ -49,11 +49,14 @@ from meshmode.discretization import Discretization
 from meshmode.discretization.poly_element import \
         InterpolatoryQuadratureSimplexGroupFactory
 
-density_discr = Discretization(
+pre_density_discr = Discretization(
         cl_ctx, mesh, InterpolatoryQuadratureSimplexGroupFactory(target_order))
 
-qbx = QBXLayerPotentialSource(density_discr, 4*target_order, qbx_order,
-        fmm_order=False)
+qbx, _ = QBXLayerPotentialSource(pre_density_discr, 4*target_order, qbx_order,
+        fmm_order=qbx_order+3,
+        target_stick_out_factor=0.005).with_refinement()
+
+density_discr = qbx.density_discr
 
 nodes = density_discr.nodes().with_queue(queue)
 
@@ -77,10 +80,12 @@ if isinstance(kernel, HelmholtzKernel):
 bound_bdry_op = bind(qbx, op)
 #mlab.figure(bgcolor=(1, 1, 1))
 if 1:
-    fplot = FieldPlotter(np.zeros(2), extent=5, npoints=400)
+    fplot = FieldPlotter(np.zeros(2), extent=5, npoints=1000)
     from pytential.target import PointsTarget
+
+    targets_dev = cl.array.to_device(queue, fplot.points)
     fld_in_vol = bind(
-            (qbx, PointsTarget(fplot.points)),
+            (qbx, PointsTarget(targets_dev)),
             op)(queue, sigma=sigma, k=k).get()
 
     if enable_mayavi:
diff --git a/examples/scaling-study.py b/examples/scaling-study.py
new file mode 100644
index 0000000000000000000000000000000000000000..cf8cf166fa96c6457a59cc0c93c4bb21b6620d2f
--- /dev/null
+++ b/examples/scaling-study.py
@@ -0,0 +1,205 @@
+import numpy as np
+import pyopencl as cl
+import pyopencl.clmath  # noqa
+
+from meshmode.discretization import Discretization
+from meshmode.discretization.poly_element import \
+        InterpolatoryQuadratureSimplexGroupFactory
+
+from pytential import bind, sym, norm  # noqa
+from pytential.target import PointsTarget
+
+# {{{ set some constants for use below
+
+nelements = 20
+bdry_quad_order = 4
+mesh_order = bdry_quad_order
+qbx_order = bdry_quad_order
+bdry_ovsmp_quad_order = 4*bdry_quad_order
+fmm_order = 25
+k = 25
+
+# }}}
+
+
+def make_mesh(nx, ny):
+    from meshmode.mesh.generation import ellipse, make_curve_mesh
+    from functools import partial
+
+    base_mesh = make_curve_mesh(
+            partial(ellipse, 1),
+            np.linspace(0, 1, nelements+1),
+            mesh_order)
+
+    from meshmode.mesh.processing import affine_map, merge_disjoint_meshes
+    dx = 2 / nx
+    meshes = [
+            affine_map(
+                base_mesh,
+                A=np.diag([dx*0.25, dx*0.25]),
+                b=np.array([dx*(ix-nx/2), dx*(iy-ny/2)]))
+            for ix in range(nx)
+            for iy in range(ny)]
+
+    mesh = merge_disjoint_meshes(meshes, single_group=True)
+
+    if 0:
+        from meshmode.mesh.visualization import draw_curve
+        draw_curve(mesh)
+        import matplotlib.pyplot as plt
+        plt.show()
+
+    return mesh
+
+
+def timing_run(nx, ny):
+    import logging
+    logging.basicConfig(level=logging.INFO)
+
+    cl_ctx = cl.create_some_context()
+    queue = cl.CommandQueue(cl_ctx)
+
+    mesh = make_mesh(nx=nx, ny=ny)
+
+    density_discr = Discretization(
+            cl_ctx, mesh,
+            InterpolatoryQuadratureSimplexGroupFactory(bdry_quad_order))
+
+    from pytential.qbx import (
+            QBXLayerPotentialSource, QBXTargetAssociationFailedException)
+    qbx = QBXLayerPotentialSource(
+            density_discr, fine_order=bdry_ovsmp_quad_order, qbx_order=qbx_order,
+            fmm_order=fmm_order
+            )
+
+    # {{{ describe bvp
+
+    from sumpy.kernel import HelmholtzKernel
+    kernel = HelmholtzKernel(2)
+
+    cse = sym.cse
+
+    sigma_sym = sym.var("sigma")
+    sqrt_w = sym.sqrt_jac_q_weight(2)
+    inv_sqrt_w_sigma = cse(sigma_sym/sqrt_w)
+
+    # Brakhage-Werner parameter
+    alpha = 1j
+
+    # -1 for interior Dirichlet
+    # +1 for exterior Dirichlet
+    loc_sign = +1
+
+    bdry_op_sym = (-loc_sign*0.5*sigma_sym
+            + sqrt_w*(
+                alpha*sym.S(kernel, inv_sqrt_w_sigma, k=sym.var("k"))
+                - sym.D(kernel, inv_sqrt_w_sigma, k=sym.var("k"))
+                ))
+
+    # }}}
+
+    bound_op = bind(qbx, bdry_op_sym)
+
+    # {{{ fix rhs and solve
+
+    mode_nr = 3
+    nodes = density_discr.nodes().with_queue(queue)
+    angle = cl.clmath.atan2(nodes[1], nodes[0])
+
+    sigma = cl.clmath.cos(mode_nr*angle)
+
+    # }}}
+
+    # {{{ postprocess/visualize
+
+    repr_kwargs = dict(k=sym.var("k"), qbx_forced_limit=+1)
+
+    sym_op = sym.S(kernel, sym.var("sigma"), **repr_kwargs)
+    bound_op = bind(qbx, sym_op)
+
+    print("FMM WARM-UP RUN 1: %d elements" % mesh.nelements)
+    bound_op(queue, sigma=sigma, k=k)
+    print("FMM WARM-UP RUN 2: %d elements" % mesh.nelements)
+    bound_op(queue, sigma=sigma, k=k)
+    queue.finish()
+    print("FMM TIMING RUN: %d elements" % mesh.nelements)
+
+    from time import time
+    t_start = time()
+
+    bound_op(queue, sigma=sigma, k=k)
+    queue.finish()
+    elapsed = time()-t_start
+
+    print("FMM TIMING RUN DONE: %d elements -> %g s"
+            % (mesh.nelements, elapsed))
+
+    return (mesh.nelements, elapsed)
+
+    if 0:
+        from sumpy.visualization import FieldPlotter
+        fplot = FieldPlotter(np.zeros(2), extent=5, npoints=1500)
+
+        targets = cl.array.to_device(queue, fplot.points)
+
+        qbx_stick_out = qbx.copy(target_stick_out_factor=0.05)
+
+        indicator_qbx = qbx_stick_out.copy(
+                fmm_level_to_order=lambda lev: 7, qbx_order=2)
+
+        ones_density = density_discr.zeros(queue)
+        ones_density.fill(1)
+        indicator = bind(
+                (indicator_qbx, PointsTarget(targets)),
+                sym_op)(
+                queue, sigma=ones_density).get()
+
+        try:
+            fld_in_vol = bind(
+                    (qbx_stick_out, PointsTarget(targets)),
+                    sym_op)(queue, sigma=sigma, k=k).get()
+        except QBXTargetAssociationFailedException as e:
+            fplot.write_vtk_file(
+                    "failed-targets.vts",
+                    [
+                        ("failed", e.failed_target_flags.get(queue))
+                        ]
+                    )
+            raise
+
+        #fplot.show_scalar_in_mayavi(fld_in_vol.real, max_val=5)
+        fplot.write_vtk_file(
+                "potential.vts",
+                [
+                    ("potential", fld_in_vol),
+                    ("indicator", indicator)
+                    ]
+                )
+
+    # }}}
+
+
+if __name__ == "__main__":
+    results = []
+    for nx, ny in [
+            (3, 3),
+            (3, 4),
+            (4, 4),
+            (4, 5),
+            (5, 5),
+            (5, 6),
+            (6, 6),
+            (6, 7),
+            (7, 7),
+            (7, 8),
+            (8, 8),
+            (8, 9),
+            (9, 9),
+            (9, 10),
+            (10, 10),
+            ]:
+
+        results.append(timing_run(nx, ny))
+
+    for r in results:
+        print(r)
diff --git a/pytential/qbx/__init__.py b/pytential/qbx/__init__.py
index a800a2beb739b1177dabbc61080eb55da58aceb4..baa5a6ba15f365dca22b715781b38d733524faec 100644
--- a/pytential/qbx/__init__.py
+++ b/pytential/qbx/__init__.py
@@ -1,3 +1,4 @@
+# -*- coding: utf-8 -*-
 from __future__ import division, absolute_import
 
 __copyright__ = "Copyright (C) 2013 Andreas Kloeckner"
@@ -24,16 +25,23 @@ THE SOFTWARE.
 
 import six
 
+import loopy as lp
 import numpy as np
-#import numpy.linalg as la
 from pytools import memoize_method
 from meshmode.discretization import Discretization
 from meshmode.discretization.poly_element import QuadratureSimplexGroupFactory
+from pytential.qbx.target_assoc import QBXTargetAssociationFailedException
 
 import pyopencl as cl
 
+import logging
+logger = logging.getLogger(__name__)
+
+
 __doc__ = """
 .. autoclass:: QBXLayerPotentialSource
+
+.. autoclass:: QBXTargetAssociationFailedException
 """
 
 
@@ -92,11 +100,14 @@ class LayerPotentialSource(object):
 
 def get_local_expansion_class(base_kernel):
     # FIXME: Don't hard-code expansion types
-    from sumpy.kernel import HelmholtzKernel
+    from sumpy.kernel import HelmholtzKernel, LaplaceKernel
     if (isinstance(base_kernel.get_base_kernel(), HelmholtzKernel)
             and base_kernel.dim == 2):
         from sumpy.expansion.local import H2DLocalExpansion
         return H2DLocalExpansion
+    elif isinstance(base_kernel.get_base_kernel(), LaplaceKernel):
+        from sumpy.expansion.local import LaplaceConformingVolumeTaylorLocalExpansion
+        return LaplaceConformingVolumeTaylorLocalExpansion
     else:
         from sumpy.expansion.local import VolumeTaylorLocalExpansion
         return VolumeTaylorLocalExpansion
@@ -104,11 +115,15 @@ def get_local_expansion_class(base_kernel):
 
 def get_multipole_expansion_class(base_kernel):
     # FIXME: Don't hard-code expansion types
-    from sumpy.kernel import HelmholtzKernel
+    from sumpy.kernel import HelmholtzKernel, LaplaceKernel
     if (isinstance(base_kernel.get_base_kernel(), HelmholtzKernel)
             and base_kernel.dim == 2):
         from sumpy.expansion.multipole import H2DMultipoleExpansion
         return H2DMultipoleExpansion
+    elif isinstance(base_kernel.get_base_kernel(), LaplaceKernel):
+        from sumpy.expansion.multipole import (
+                LaplaceConformingVolumeTaylorMultipoleExpansion)
+        return LaplaceConformingVolumeTaylorMultipoleExpansion
     else:
         from sumpy.expansion.multipole import VolumeTaylorMultipoleExpansion
         return VolumeTaylorMultipoleExpansion
@@ -124,15 +139,22 @@ class QBXLayerPotentialSource(LayerPotentialSource):
     .. attribute :: fmm_order
     .. attribute :: cl_context
     .. automethod :: centers
+    .. automethod :: panel_sizes
     .. automethod :: weights_and_area_elements
+    .. automethod :: with_refinement
 
     See :ref:`qbxguts` for some information on the inner workings of this.
     """
     def __init__(self, density_discr, fine_order,
-            qbx_order=None, fmm_order=None,
+            qbx_order=None,
+            fmm_order=None,
             fmm_level_to_order=None,
-            # FIXME set debug=False once everything works
-            real_dtype=np.float64, debug=True,
+            target_stick_out_factor=1e-10,
+
+            # begin undocumented arguments
+            # FIXME default debug=False once everything works
+            debug=True,
+            refined_for_global_qbx=False,
             performance_data_file=None):
         """
         :arg fine_order: The total degree to which the (upsampled)
@@ -141,20 +163,12 @@ class QBXLayerPotentialSource(LayerPotentialSource):
             a reasonable(-ish?) default.
         """
 
-        self.fine_density_discr = Discretization(
-                density_discr.cl_context, density_discr.mesh,
-                QuadratureSimplexGroupFactory(fine_order), real_dtype)
-
-        from meshmode.discretization.connection import make_same_mesh_connection
-        self.resampler = make_same_mesh_connection(
-                self.fine_density_discr, density_discr)
-
         if fmm_level_to_order is None:
             if fmm_order is None and qbx_order is not None:
                 fmm_order = qbx_order + 1
 
         if fmm_order is not None and fmm_level_to_order is not None:
-            raise TypeError("may not specify both fmm_order an fmm_level_to_order")
+            raise TypeError("may not specify both fmm_order and fmm_level_to_order")
 
         if fmm_level_to_order is None:
             if fmm_order is False:
@@ -163,12 +177,101 @@ class QBXLayerPotentialSource(LayerPotentialSource):
                 def fmm_level_to_order(level):
                     return fmm_order
 
+        self.fine_order = fine_order
         self.qbx_order = qbx_order
         self.density_discr = density_discr
         self.fmm_level_to_order = fmm_level_to_order
+        self.target_stick_out_factor = target_stick_out_factor
+
         self.debug = debug
+        self.refined_for_global_qbx = refined_for_global_qbx
         self.performance_data_file = performance_data_file
 
+    def copy(
+            self,
+            density_discr=None,
+            fine_order=None,
+            qbx_order=None,
+            fmm_level_to_order=None,
+            target_stick_out_factor=None,
+
+            debug=None,
+            refined_for_global_qbx=None,
+            ):
+        # FIXME Could/should share wrangler and geometry kernels
+        # if no relevant changes have been made.
+        return QBXLayerPotentialSource(
+                density_discr=density_discr or self.density_discr,
+                fine_order=(
+                    fine_order if fine_order is not None else self.fine_order),
+                qbx_order=qbx_order if qbx_order is not None else self.qbx_order,
+                fmm_level_to_order=(
+                    fmm_level_to_order or self.fmm_level_to_order),
+                target_stick_out_factor=(
+                    target_stick_out_factor or self.target_stick_out_factor),
+
+                debug=(
+                    debug if debug is not None else self.debug),
+                refined_for_global_qbx=(
+                    refined_for_global_qbx if refined_for_global_qbx is not None
+                    else self.refined_for_global_qbx),
+                performance_data_file=self.performance_data_file)
+
+    @property
+    @memoize_method
+    def fine_density_discr(self):
+        return Discretization(
+            self.density_discr.cl_context, self.density_discr.mesh,
+            QuadratureSimplexGroupFactory(self.fine_order), self.real_dtype)
+
+    @property
+    @memoize_method
+    def resampler(self):
+        from meshmode.discretization.connection import make_same_mesh_connection
+        return make_same_mesh_connection(
+                self.fine_density_discr, self.density_discr)
+
+    def el_view(self, discr, group_nr, global_array):
+        """Return a view of *global_array* of shape
+        ``(..., discr.groups[group_nr].nelements)``
+        where *global_array* is of shape ``(..., nelements)``,
+        where *nelements* is the global (per-discretization) node count.
+        """
+
+        group = discr.groups[group_nr]
+        el_nr_base = sum(group.nelements for group in discr.groups[:group_nr])
+
+        return global_array[
+            ..., el_nr_base:el_nr_base + group.nelements] \
+            .reshape(
+                global_array.shape[:-1]
+                + (group.nelements,))
+
+    @memoize_method
+    def with_refinement(self, target_order=None):
+        """
+        :returns: a tuple ``(lpot_src, cnx)``, where ``lpot_src`` is a
+            :class:`QBXLayerPotentialSource` and ``cnx`` is a
+            :class:`meshmode.discretization.connection.DiscretizationConnection`
+            from the originally given to the refined geometry.
+        """
+        from pytential.qbx.refinement import QBXLayerPotentialSourceRefiner
+        refiner = QBXLayerPotentialSourceRefiner(self.cl_context)
+        from meshmode.discretization.poly_element import (
+            InterpolatoryQuadratureSimplexGroupFactory)
+        if target_order is None:
+            target_order = self.density_discr.groups[0].order
+        lpot, connection = refiner(self,
+                InterpolatoryQuadratureSimplexGroupFactory(target_order))
+        return lpot, connection
+
+    @property
+    @memoize_method
+    def h_max(self):
+        with cl.CommandQueue(self.cl_context) as queue:
+            panel_sizes = self.panel_sizes("npanels").with_queue(queue)
+            return np.asscalar(cl.array.max(panel_sizes).get())
+
     @property
     def ambient_dim(self):
         return self.density_discr.ambient_dim
@@ -190,16 +293,103 @@ class QBXLayerPotentialSource(LayerPotentialSource):
         return self.density_discr.complex_dtype
 
     @memoize_method
-    def centers(self, target_discr, sign):
-        from pytential import sym, bind
+    def panel_centers_of_mass(self):
+        knl = lp.make_kernel(
+            """{[dim,k,i]:
+                0<=dim<ndims and
+                0<=k<nelements and
+                0<=i<nunit_nodes}""",
+            """
+                panels[dim, k] = sum(i, nodes[dim, k, i])/nunit_nodes
+                """,
+            default_offset=lp.auto, name="find_panel_centers_of_mass")
+
+        knl = lp.fix_parameters(knl, ndims=self.ambient_dim)
+
+        knl = lp.split_iname(knl, "k", 128, inner_tag="l.0", outer_tag="g.0")
+        knl = lp.tag_inames(knl, dict(dim="ilp"))
+
         with cl.CommandQueue(self.cl_context) as queue:
-            amb_dim = self.ambient_dim
-            dim = self.dim
+            mesh = self.density_discr.mesh
+            panels = cl.array.empty(queue, (mesh.ambient_dim, mesh.nelements),
+                                    dtype=self.density_discr.real_dtype)
+            for group_nr, group in enumerate(self.density_discr.groups):
+                _, (result,) = knl(queue,
+                    nelements=group.nelements,
+                    nunit_nodes=group.nunit_nodes,
+                    nodes=group.view(self.density_discr.nodes()),
+                    panels=self.el_view(self.density_discr, group_nr, panels))
+            panels.finish()
+            panels = panels.with_queue(None)
+            return tuple(panels[d, :] for d in range(mesh.ambient_dim))
 
-            return bind(target_discr,
-                    sym.nodes(amb_dim)
-                    + 2*sign*sym.area_element(amb_dim, dim)
-                    * sym.normal(amb_dim, dim))(queue).as_vector(np.object)
+    @memoize_method
+    def panel_sizes(self, last_dim_length="nsources"):
+        assert last_dim_length in ("nsources", "ncenters", "npanels")
+        # To get the panel size this does the equivalent of ∫ 1 ds.
+        # FIXME: Kernel optimizations
+
+        discr = self.density_discr
+
+        if last_dim_length == "nsources" or last_dim_length == "ncenters":
+            knl = lp.make_kernel(
+                "{[i,j,k]: 0<=i<nelements and 0<=j,k<nunit_nodes}",
+                "panel_sizes[i,j] = sum(k, ds[i,k])",
+                name="compute_size")
+
+            def panel_size_view(discr, group_nr):
+                return discr.groups[group_nr].view
+
+        elif last_dim_length == "npanels":
+            knl = lp.make_kernel(
+                "{[i,j]: 0<=i<nelements and 0<=j<nunit_nodes}",
+                "panel_sizes[i] = sum(j, ds[i,j])",
+                name="compute_size")
+            from functools import partial
+
+            def panel_size_view(discr, group_nr):
+                return partial(self.el_view, discr, group_nr)
+
+        else:
+            raise ValueError("unknown dim length specified")
+
+        with cl.CommandQueue(self.cl_context) as queue:
+            from pytential import bind, sym
+            ds = bind(
+                    discr,
+                    sym.area_element(ambient_dim=discr.ambient_dim, dim=discr.dim)
+                    * sym.QWeight()
+                    )(queue)
+            panel_sizes = cl.array.empty(
+                queue, discr.nnodes
+                if last_dim_length in ("nsources", "ncenters")
+                else discr.mesh.nelements, discr.real_dtype)
+            for group_nr, group in enumerate(discr.groups):
+                _, (result,) = knl(queue,
+                    nelements=group.nelements,
+                    nunit_nodes=group.nunit_nodes,
+                    ds=group.view(ds),
+                    panel_sizes=panel_size_view(
+                        discr, group_nr)(panel_sizes))
+            panel_sizes.finish()
+            if last_dim_length == "ncenters":
+                from pytential.qbx.utils import get_interleaver_kernel
+                knl = get_interleaver_kernel(discr.real_dtype)
+                _, (panel_sizes,) = knl(queue, dstlen=2*discr.nnodes,
+                                        src1=panel_sizes, src2=panel_sizes)
+            return panel_sizes.with_queue(None)
+
+    @memoize_method
+    def centers(self, sign):
+        adim = self.density_discr.ambient_dim
+        dim = self.density_discr.dim
+
+        from pytential import sym, bind
+        with cl.CommandQueue(self.cl_context) as queue:
+            nodes = bind(self.density_discr, sym.nodes(adim))(queue)
+            normals = bind(self.density_discr, sym.normal(adim, dim=dim))(queue)
+            panel_sizes = self.panel_sizes().with_queue(queue)
+            return (nodes + normals * sign * panel_sizes / 2).as_vector(np.object)
 
     @memoize_method
     def weights_and_area_elements(self):
@@ -220,8 +410,6 @@ class QBXLayerPotentialSource(LayerPotentialSource):
 
             return (area_element.with_queue(queue)*qweight).with_queue(None)
 
-    # {{{ interface with execution
-
     def preprocess_optemplate(self, name, discretizations, expr):
         """
         :arg name: The symbolic name for *self*, which the preprocessor
@@ -244,6 +432,12 @@ class QBXLayerPotentialSource(LayerPotentialSource):
         from functools import partial
         oversample = partial(self.resampler, queue)
 
+        if not self.refined_for_global_qbx:
+            from warnings import warn
+            warn(
+                "Executing global QBX without refinement. "
+                "This is unlikely to work.")
+
         def evaluate_wrapper(expr):
             value = evaluate(expr)
             return with_object_array_or_scalar(oversample, value)
@@ -255,8 +449,6 @@ class QBXLayerPotentialSource(LayerPotentialSource):
 
         return func(queue, insn, bound_expr, evaluate_wrapper)
 
-    # {{{ fmm-based execution
-
     @property
     @memoize_method
     def qbx_fmm_code_getter(self):
@@ -278,7 +470,11 @@ class QBXLayerPotentialSource(LayerPotentialSource):
         from pytential.qbx.geometry import QBXFMMGeometryData
 
         return QBXFMMGeometryData(self.qbx_fmm_code_getter,
-                self, target_discrs_and_qbx_sides, debug=self.debug)
+                self, target_discrs_and_qbx_sides,
+                target_stick_out_factor=self.target_stick_out_factor,
+                debug=self.debug)
+
+    # {{{ fmm-based execution
 
     @memoize_method
     def expansion_wrangler_code_container(self, base_kernel, out_kernels):
@@ -370,7 +566,7 @@ class QBXLayerPotentialSource(LayerPotentialSource):
             if isinstance(source_array, cl.array.Array):
                 return (source_array
                         .with_queue(queue)
-                        [geo_data.tree().user_point_source_ids]
+                        [geo_data.tree().user_source_ids]
                         .with_queue(None))
             else:
                 return source_array
@@ -401,14 +597,12 @@ class QBXLayerPotentialSource(LayerPotentialSource):
 
         # }}}
 
-        #geo_data.plot()
-
         if len(geo_data.global_qbx_centers()) != geo_data.center_info().ncenters:
             raise NotImplementedError("geometry has centers requiring local QBX")
 
         from pytential.qbx.geometry import target_state
         if (geo_data.user_target_to_center().with_queue(queue)
-                == target_state.FAILED).get().any():
+                == target_state.FAILED).any().get():
             raise RuntimeError("geometry has failed targets")
 
         if self.performance_data_file is not None:
@@ -492,11 +686,6 @@ class QBXLayerPotentialSource(LayerPotentialSource):
 
         return p2p
 
-    @memoize_method
-    def get_center_finder(self, ambient_dim):
-        from pytential.qbx.direct import CenterFinder
-        return CenterFinder(ambient_dim)
-
     @memoize_method
     def get_qbx_target_numberer(self, dtype):
         assert dtype == np.int32
@@ -541,7 +730,7 @@ class QBXLayerPotentialSource(LayerPotentialSource):
                 evt, output_for_each_kernel = lpot_applier(
                         queue, target_discr.nodes(),
                         self.fine_density_discr.nodes(),
-                        self.centers(target_discr, o.qbx_forced_limit),
+                        self.centers(o.qbx_forced_limit),
                         [strengths], **kernel_args)
                 result.append((o.name, output_for_each_kernel[o.kernel_index]))
             else:
@@ -556,23 +745,22 @@ class QBXLayerPotentialSource(LayerPotentialSource):
                         target_discr.nodes(), self.fine_density_discr.nodes(),
                         [strengths], **kernel_args)
 
-                center_info = self.qbx_fmm_geometry_data(
-                        # center_info is independent of targets
-                        target_discrs_and_qbx_sides=()
-                        ).center_info()
-
                 qbx_forced_limit = o.qbx_forced_limit
                 if qbx_forced_limit is None:
                     qbx_forced_limit = 0
 
-                cf = self.get_center_finder(
-                        ambient_dim=self.density_discr.ambient_dim)
-                _, (tgt_to_qbx_center,) = cf(queue,
-                        tgt=target_discr.nodes(),
-                        center=center_info.centers,
-                        center_side=center_info.sides,
-                        radius=center_info.radii,
-                        qbx_forced_limit=qbx_forced_limit)
+                geo_data = self.qbx_fmm_geometry_data(
+                        target_discrs_and_qbx_sides=(
+                            (target_discr, qbx_forced_limit),
+                        ))
+
+                # center_info is independent of targets
+                center_info = geo_data.center_info()
+
+                # First ncenters targets are the centers
+                tgt_to_qbx_center = (
+                        geo_data.user_target_to_center()[center_info.ncenters:]
+                        .copy(queue=queue))
 
                 qbx_tgt_numberer = self.get_qbx_target_numberer(
                         tgt_to_qbx_center.dtype)
@@ -593,6 +781,7 @@ class QBXLayerPotentialSource(LayerPotentialSource):
 
                 qbx_tgt_numbers = qbx_tgt_numbers[:qbx_tgt_count]
                 qbx_center_numbers = tgt_to_qbx_center[qbx_tgt_numbers]
+                qbx_center_numbers.finish()
 
                 tgt_subset_kwargs = kernel_args.copy()
                 for i, res_i in enumerate(output_for_each_kernel):
@@ -615,9 +804,12 @@ class QBXLayerPotentialSource(LayerPotentialSource):
 
     # }}}
 
-    # }}}
-
 # }}}
 
 
+__all__ = (
+        QBXLayerPotentialSource,
+        QBXTargetAssociationFailedException,
+        )
+
 # vim: fdm=marker
diff --git a/pytential/qbx/fmm.py b/pytential/qbx/fmm.py
index 539d3b6e170a350b8877b9934bca12ff77b54f4a..e3508b7089d2875c12a77079e763201300b5a30a 100644
--- a/pytential/qbx/fmm.py
+++ b/pytential/qbx/fmm.py
@@ -160,7 +160,7 @@ QBXFMMGeometryData.non_qbx_box_target_lists`),
     def reorder_sources(self, source_array):
         return (source_array
                 .with_queue(self.queue)
-                [self.tree.user_point_source_ids]
+                [self.tree.user_source_ids]
                 .with_queue(None))
 
     def reorder_potentials(self, potentials):
@@ -177,10 +177,10 @@ QBXFMMGeometryData.non_qbx_box_target_lists`),
 
     def box_source_list_kwargs(self):
         return dict(
-                box_source_starts=self.tree.box_point_source_starts,
+                box_source_starts=self.tree.box_source_starts,
                 box_source_counts_nonchild=(
-                    self.tree.box_point_source_counts_nonchild),
-                sources=self.tree.point_sources)
+                    self.tree.box_source_counts_nonchild),
+                sources=self.tree.sources)
 
     def box_target_list_kwargs(self):
         # This only covers the non-QBX targets.
@@ -277,33 +277,34 @@ QBXFMMGeometryData.non_qbx_box_target_lists`),
             return qbx_expansions
         trav = geo_data.traversal()
 
-        from pytools import single_valued
+        wait_for = local_exps.events
 
-        # AARGH FIXME order may vary by level
-        # TODO: This needs to be split up by source order
-        src_order = single_valued(self.level_orders)
-        l2qbxl = self.code.l2qbxl(
-                src_order,
-                self.qbx_order)
+        for isrc_level in range(geo_data.tree().nlevels):
+            l2qbxl = self.code.l2qbxl(
+                    self.level_orders[isrc_level],
+                    self.qbx_order)
 
-        l_expn_length = len(self.code.local_expansion_factory(src_order))
+            target_level_start_ibox, target_locals_view = \
+                    self.local_expansions_view(local_exps, isrc_level)
 
-        local_exps_shaped = local_exps.reshape(-1, l_expn_length)
+            evt, (qbx_expansions_res,) = l2qbxl(
+                    self.queue,
+                    qbx_center_to_target_box=geo_data.qbx_center_to_target_box(),
+                    target_boxes=trav.target_boxes,
+                    target_base_ibox=target_level_start_ibox,
 
-        evt, (qbx_expansions_res,) = l2qbxl(
-                self.queue,
-                qbx_center_to_target_box=geo_data.qbx_center_to_target_box(),
-                target_boxes=trav.target_boxes,
+                    centers=self.tree.box_centers,
+                    qbx_centers=geo_data.center_info().centers,
 
-                centers=self.tree.box_centers,
-                qbx_centers=geo_data.center_info().centers,
+                    expansions=target_locals_view,
+                    qbx_expansions=qbx_expansions,
 
-                expansions=local_exps_shaped,
-                qbx_expansions=qbx_expansions,
+                    wait_for=wait_for,
 
-                wait_for=local_exps.events,
+                    **self.kernel_extra_kwargs)
 
-                **self.kernel_extra_kwargs)
+            wait_for = [evt]
+            assert qbx_expansions_res is qbx_expansions
 
         qbx_expansions.add_event(evt)
 
@@ -574,7 +575,7 @@ def write_performance_model(outf, geo_data):
 
         start, end = traversal.neighbor_source_boxes_starts[itgt_box:itgt_box+2]
         for src_ibox in traversal.neighbor_source_boxes_lists[start:end]:
-            nsources = tree.box_point_source_counts_nonchild[src_ibox]
+            nsources = tree.box_source_counts_nonchild[src_ibox]
 
             npart_direct += ntargets * nsources
 
@@ -620,7 +621,7 @@ def write_performance_model(outf, geo_data):
         start, end = traversal.sep_bigger_starts[itgt_box:itgt_box+2]
 
         for src_ibox in traversal.sep_bigger_lists[start:end]:
-            nsources = tree.box_point_source_counts_nonchild[src_ibox]
+            nsources = tree.box_source_counts_nonchild[src_ibox]
 
             nform_local += nsources
 
@@ -667,7 +668,7 @@ def write_performance_model(outf, geo_data):
 
         start, end = traversal.neighbor_source_boxes_starts[itgt_box:itgt_box+2]
         for src_ibox in traversal.neighbor_source_boxes_lists[start:end]:
-            nsources = tree.box_point_source_counts_nonchild[src_ibox]
+            nsources = tree.box_source_counts_nonchild[src_ibox]
 
             nqbxl_direct += nsources
 
diff --git a/pytential/qbx/geometry.py b/pytential/qbx/geometry.py
index 6c8bf5510034b904c5854bdd815d62e305757a4b..ce93e851d61741af3951096d3f8e131c76b221fc 100644
--- a/pytential/qbx/geometry.py
+++ b/pytential/qbx/geometry.py
@@ -1,8 +1,4 @@
-from __future__ import division
-from __future__ import absolute_import
-from __future__ import print_function
-from six.moves import range
-from six.moves import zip
+from __future__ import division, absolute_import, print_function
 
 __copyright__ = "Copyright (C) 2013 Andreas Kloeckner"
 
@@ -26,6 +22,7 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 THE SOFTWARE.
 """
 
+from six.moves import zip
 
 import numpy as np
 import pyopencl as cl
@@ -39,12 +36,8 @@ from cgen import Enum
 import logging
 logger = logging.getLogger(__name__)
 
-# Targets match centers if they are within the center's circle, which, for matching
-# purposes, is enlarged by this fraction.
-QBX_CENTER_MATCH_THRESHOLD = 1 + 0.05
 
-
-# {{{
+# {{{ docs
 
 __doc__ = """
 
@@ -368,181 +361,6 @@ class QBXFMMGeometryCodeGetter(object):
 
     # }}}
 
-    # {{{ find a QBX center for each target
-
-    @memoize_method
-    def centers_for_target_finder(self,
-            coord_dtype, box_id_dtype, particle_id_dtype):
-        # must be able to represent target_state values, which are negative
-        assert int(np.iinfo(particle_id_dtype).min) < 0
-
-        from pyopencl.elementwise import ElementwiseTemplate
-        return ElementwiseTemplate(
-            arguments="""//CL:mako//
-                /* input */
-                box_id_t aligned_nboxes,
-                box_id_t *box_child_ids, /* [2**dimensions, aligned_nboxes] */
-                particle_id_t ntargets,
-                coord_t *targets,  /* [dimensions, ntargets] */
-                signed char *target_side_preferences,
-                coord_t root_extent,
-                %for iaxis in range(ambient_dim):
-                    coord_t lower_left_${iaxis},
-                %endfor
-                particle_id_t *balls_near_box_starts,
-                particle_id_t *balls_near_box_lists,
-                %for iaxis in range(ambient_dim):
-                    coord_t *center_${iaxis},  /* [ncenters] */
-                %endfor
-                coord_t *radii,  /* [ncenters] */
-                signed char *center_sides,  /* [ncenters] */
-                char *center_may_use_global_qbx,
-                /* passing this as a parameter (instead of baking it */
-                /* into the source) works around an AMD bug */
-                const coord_t match_threshold_squared,
-
-                /* output */
-                particle_id_t *center_ids,
-                """,
-            operation=r"""//CL:mako//
-                #if 0
-                    #define dbg_printf(ARGS) printf ARGS
-                #else
-                    #define dbg_printf(ARGS) /* */
-                #endif
-
-
-                particle_id_t itgt = i;
-
-                // {{{ compute normalized coordinates of target
-
-                %for iaxis in range(ambient_dim):
-                    coord_t my_target_${iaxis} = targets[itgt + ntargets*${iaxis}];
-                %endfor
-                %for iaxis in range(ambient_dim):
-                    coord_t norm_my_target_${iaxis} =
-                        (my_target_${iaxis} - lower_left_${iaxis}) / root_extent;
-                %endfor
-
-                // }}}
-
-                // {{{ find leaf box containing target
-
-                box_id_t ibox;
-                int level = 0;
-                box_id_t next_box = 0;
-
-                do
-                {
-                    ibox = next_box;
-                    ++level;
-
-                    int which_child = 0;
-                    %for iaxis in range(ambient_dim):
-                        which_child +=
-                            ((int) (norm_my_target_${iaxis} * (1 << level)) & 1)
-                            * ${2**(ambient_dim-1-iaxis)};
-                    %endfor
-
-                    next_box =
-                        box_child_ids[ibox + aligned_nboxes * which_child];
-                }
-                while (next_box);
-
-                dbg_printf(("[itgt=%d] leaf box=%d\n", itgt, ibox));
-
-                // }}}
-
-                // {{{ walk list of centers near box
-
-                particle_id_t start = balls_near_box_starts[ibox];
-                particle_id_t stop = balls_near_box_starts[ibox+1];
-
-                coord_t min_dist_squared = INFINITY;
-                particle_id_t best_center_id = TGT_NO_QBX_NEEDED;
-                signed char my_side_preference = target_side_preferences[itgt];
-
-                for (particle_id_t ilist = start; ilist < stop; ++ilist)
-                {
-                    particle_id_t icenter = balls_near_box_lists[ilist];
-
-                    coord_t dist_squared = 0;
-                    %for iaxis in range(ambient_dim):
-                        {
-                            coord_t axis_dist =
-                                center_${iaxis}[icenter] - my_target_${iaxis};
-                            dist_squared += axis_dist*axis_dist;
-                        }
-                    %endfor
-
-                    // check if we're within the (l^2) radius
-                    coord_t ball_radius = radii[icenter];
-
-                    bool center_usable = (
-                        dist_squared
-                        <= ball_radius * ball_radius * match_threshold_squared);
-
-                    dbg_printf(("[itgt=%d] icenter=%d "
-                        "dist_squared=%g ball_rad_sq=%g match_threshold_squared=%g "
-                        "usable=%d\n",
-                        itgt, icenter, dist_squared, ball_radius*ball_radius,
-                        match_threshold_squared,
-                        center_usable));
-
-                    if (center_usable && best_center_id == TGT_NO_QBX_NEEDED)
-                    {
-                        // The target is in a region touched by a QBX disk.
-                        // Unless we find a usable QBX disk, we'll have to fail
-                        // this target.
-                        best_center_id = TGT_FAILED;
-                    }
-
-                    center_usable = center_usable
-                        && (bool) center_may_use_global_qbx[icenter];
-
-                    // check if we're on the desired side
-                    int center_side = center_sides[icenter];
-                    center_usable = center_usable && (
-
-                           // accept volume or QBX, either side
-                           my_side_preference == 0
-
-                           // accept only QBX
-                        || my_side_preference == center_side
-
-                           // accept volume or QBX from a specific side
-                        || (
-                            (abs((int) my_side_preference) == 2)
-                            && (my_side_preference == 2*center_side
-                                || my_side_preference == 0))
-                        );
-
-                    if (dist_squared < min_dist_squared && center_usable)
-                    {
-                        best_center_id = icenter;
-                        min_dist_squared = dist_squared;
-                    }
-                }
-
-                // }}}
-
-                center_ids[itgt] = best_center_id;
-                """,
-            name="centers_for_target_finder",
-            preamble=target_state.get_c_defines()
-            ).build(
-                    self.cl_context,
-                    type_aliases=(
-                        ("box_id_t", box_id_dtype),
-                        ("particle_id_t", particle_id_dtype),
-                        ("coord_t", coord_dtype),
-                        ),
-                    var_values=(
-                        ("ambient_dim", self.ambient_dim),
-                        ))
-
-    # }}}
-
     @property
     @memoize_method
     def key_value_sort(self):
@@ -716,7 +534,8 @@ class QBXFMMGeometryData(object):
     """
 
     def __init__(self, code_getter, lpot_source,
-            target_discrs_and_qbx_sides, debug):
+            target_discrs_and_qbx_sides,
+            target_stick_out_factor, debug):
         """
         .. rubric:: Constructor arguments
 
@@ -731,6 +550,7 @@ class QBXFMMGeometryData(object):
         self.lpot_source = lpot_source
         self.target_discrs_and_qbx_sides = \
                 target_discrs_and_qbx_sides
+        self.target_stick_out_factor = target_stick_out_factor
         self.debug = debug
 
     @property
@@ -754,78 +574,17 @@ class QBXFMMGeometryData(object):
 
     @memoize_method
     def center_info(self):
-        """Return a :class:`CenterInfo`. |cached|
-
+        """ Return a :class:`CenterInfo`. |cached|
         """
-        self_discr = self.lpot_source.density_discr
 
-        ncenters = 0
-        for el_group in self_discr.groups:
-            kept_indices = self.kept_center_indices(el_group)
-            # two: one for positive side, one for negative side
-            ncenters += 2 * len(kept_indices) * el_group.nelements
+        lpot_source = self.lpot_source
 
-        adim = self_discr.ambient_dim
-        dim = self_discr.dim
-
-        from pytential import sym, bind
-        from pytools.obj_array import make_obj_array
         with cl.CommandQueue(self.cl_context) as queue:
-            radii_sym = sym.cse(2*sym.area_element(adim, dim), "radii")
-            all_radii, all_pos_centers, all_neg_centers = bind(self_discr,
-                    make_obj_array([
-                        radii_sym,
-                        sym.nodes(adim) + radii_sym*sym.normal(adim),
-                        sym.nodes(adim) - radii_sym*sym.normal(adim)
-                        ]))(queue)
-
-            # The centers are returned from the above as multivectors.
-            all_pos_centers = all_pos_centers.as_vector(np.object)
-            all_neg_centers = all_neg_centers.as_vector(np.object)
-
-            # -1 for inside, +1 for outside
-            sides = cl.array.empty(
-                    self.cl_context, ncenters, np.int8)
-            radii = cl.array.empty(
-                    self.cl_context, ncenters, self.coord_dtype)
-            centers = make_obj_array([
-                cl.array.empty(self.cl_context, ncenters,
-                    self.coord_dtype)
-                for i in range(self_discr.ambient_dim)])
-
-            ibase = 0
-            for el_group in self_discr.groups:
-                kept_center_indices = self.kept_center_indices(el_group)
-                group_len = len(kept_indices) * el_group.nelements
-
-                for side, all_centers in [
-                        (+1, all_pos_centers),
-                        (-1, all_neg_centers),
-                        ]:
-
-                    sides[ibase:ibase + group_len].fill(side, queue=queue)
-
-                    radii_view = radii[ibase:ibase + group_len] \
-                            .reshape(el_group.nelements, len(kept_indices))
-                    centers_view = make_obj_array([
-                        centers_i[ibase:ibase + group_len]
-                        .reshape((el_group.nelements, len(kept_indices)))
-                        for centers_i in centers
-                        ])
-                    all_centers_view = make_obj_array([
-                        el_group.view(pos_centers_i)
-                        for pos_centers_i in all_centers
-                        ])
-                    self.code_getter.pick_expansion_centers(queue,
-                            centers=centers_view,
-                            all_centers=all_centers_view,
-                            radii=radii_view,
-                            all_radii=el_group.view(all_radii),
-                            kept_center_indices=kept_center_indices)
-
-                    ibase += group_len
-
-            assert ibase == ncenters
+            from pytential.qbx.utils import get_interleaved_centers
+            centers = get_interleaved_centers(queue, lpot_source)
+            sides = cl.array.arange(queue, len(centers[0]), dtype=np.int32)
+            sides = 2 * (sides & 1) - 1
+            radii = lpot_source.panel_sizes("ncenters").with_queue(queue) / 2
 
         return CenterInfo(
                 sides=sides,
@@ -841,7 +600,6 @@ class QBXFMMGeometryData(object):
         """Return a :class:`TargetInfo`. |cached|"""
 
         code_getter = self.code_getter
-
         lpot_src = self.lpot_source
 
         with cl.CommandQueue(self.cl_context) as queue:
@@ -928,74 +686,34 @@ class QBXFMMGeometryData(object):
         """
 
         code_getter = self.code_getter
-
         lpot_src = self.lpot_source
+        target_info = self.target_info()
 
         with cl.CommandQueue(self.cl_context) as queue:
-            nelements = sum(grp.nelements
-                    for grp in lpot_src.fine_density_discr.groups)
-
-            el_centers = cl.array.empty(
-                    self.cl_context, (lpot_src.ambient_dim, nelements),
-                    self.coord_dtype)
-            el_radii = cl.array.empty(self.cl_context, nelements, self.coord_dtype)
-
-            # {{{ find sources and radii (=element 'centroids')
-
-            # FIXME: Should probably use quad weights to find 'centroids' to deal
-            # with non-symmetric elements.
-
-            i_el_base = 0
-            for grp in lpot_src.fine_density_discr.groups:
-                el_centers_view = el_centers[:, i_el_base:i_el_base+grp.nelements]
-                el_radii_view = el_radii[i_el_base:i_el_base+grp.nelements]
-                nodes_view = grp.view(lpot_src.fine_density_discr.nodes())
-
-                code_getter.find_element_centers(
-                        queue, el_centers=el_centers_view, nodes=nodes_view)
-                code_getter.find_element_radii(
-                        queue, el_centers=el_centers_view, nodes=nodes_view,
-                        el_radii=el_radii_view)
-
-                i_el_base += grp.nelements
-
-            # }}}
-
-            target_info = self.target_info()
-
+            nsources = lpot_src.fine_density_discr.nnodes
+            nparticles = nsources + target_info.ntargets
+
+            refine_weights = cl.array.zeros(queue, nparticles, dtype=np.int32)
+            refine_weights[:nsources] = 1
+            refine_weights.finish()
+
+            # NOTE: max_leaf_refine_weight has an impact on accuracy.
+            # For instance, if a panel contains 64*4 = 256 nodes, then
+            # a box with 128 sources will contain at most half a
+            # panel, meaning that its width will be on the order h/2,
+            # which means many QBX disks (diameter h) will be forced
+            # to cross boxes.
+            # So we set max_leaf_refine weight comfortably large
+            # to avoid having too many disks overlap more than one box.
+            #
+            # FIXME: Should investigate this further.
             tree, _ = code_getter.build_tree(queue,
-                    particles=el_centers, source_radii=el_radii,
-                    max_particles_in_box=30,
+                    particles=lpot_src.fine_density_discr.nodes(),
                     targets=target_info.targets,
-                    target_radii=self.target_radii(),
-                    debug=self.debug)
-
-            # {{{ link point sources
-
-            point_source_starts = cl.array.empty(self.cl_context,
-                    nelements+1, tree.particle_id_dtype)
-
-            i_el_base = 0
-            for grp in lpot_src.fine_density_discr.groups:
-                point_source_starts.setitem(
-                        slice(i_el_base, i_el_base+grp.nelements),
-                        cl.array.arange(queue,
-                            grp.node_nr_base, grp.node_nr_base + grp.nnodes,
-                            grp.nunit_nodes,
-                            dtype=point_source_starts.dtype),
-                        queue=queue)
-
-                i_el_base += grp.nelements
-
-            point_source_starts.setitem(
-                    -1, self.lpot_source.fine_density_discr.nnodes, queue=queue)
-
-            from boxtree.tree import link_point_sources
-            tree = link_point_sources(queue, tree,
-                    point_source_starts,
-                    self.lpot_source.fine_density_discr.nodes())
-
-            # }}}
+                    max_leaf_refine_weight=256,
+                    refine_weights=refine_weights,
+                    debug=self.debug,
+                    kind="adaptive-level-restricted")
 
             return tree
 
@@ -1013,7 +731,6 @@ class QBXFMMGeometryData(object):
             trav, _ = self.code_getter.build_traversal(queue, self.tree(),
                     debug=self.debug)
 
-            trav = trav.merge_close_lists(queue)
             return trav
 
     def leaf_to_center_lookup(self):
@@ -1090,45 +807,13 @@ class QBXFMMGeometryData(object):
         |cached|
         """
 
-        tree = self.tree()
-        trav = self.traversal()
         center_info = self.center_info()
 
-        qbx_center_for_global_tester = \
-                self.code_getter.qbx_center_for_global_tester(
-                        # coord_dtype:
-                        tree.coord_dtype,
-                        # box_id_dtype:
-                        tree.box_id_dtype,
-                        # particle_id_dtype:
-                        tree.particle_id_dtype)
-
         with cl.CommandQueue(self.cl_context) as queue:
             result = cl.array.empty(queue, center_info.ncenters, np.int8)
+            result.fill(1)
 
-            logger.info("find global qbx flags: start")
-
-            qbx_center_for_global_tester(*(
-                    tuple(center_info.centers)
-                    + (
-                        center_info.radii,
-                        self.qbx_center_to_target_box(),
-
-                        trav.neighbor_source_boxes_starts,
-                        trav.neighbor_source_boxes_lists,
-                        tree.box_point_source_starts,
-                        tree.box_point_source_counts_cumul,
-                    ) + tuple(tree.point_sources) + (
-                        result,
-                    )),
-                    **dict(
-                        queue=queue,
-                        range=slice(center_info.ncenters)
-                    ))
-
-            logger.info("find global qbx flags: done")
-
-            return result.with_queue(None)
+        return result.with_queue(None)
 
     @memoize_method
     def global_qbx_centers(self):
@@ -1166,52 +851,37 @@ class QBXFMMGeometryData(object):
         Shape: ``[ntargets]`` of :attr:`boxtree.Tree.particle_id_dtype`, with extra
         values from :class:`target_state` allowed. Targets occur in user order.
         """
+        from pytential.qbx.target_assoc import QBXTargetAssociator
 
-        ltc = self.leaf_to_center_lookup()
+        # FIXME: kernel ownership...
+        tgt_assoc = QBXTargetAssociator(self.cl_context)
 
-        tree = self.tree()
         tgt_info = self.target_info()
         center_info = self.center_info()
 
-        assert ltc.balls_near_box_starts.dtype == ltc.balls_near_box_lists.dtype
-        assert ltc.balls_near_box_starts.dtype == tree.particle_id_dtype
+        from pytential.target import PointsTarget
 
-        centers_for_target_finder = self.code_getter.centers_for_target_finder(
-                tree.coord_dtype,
-                tree.box_id_dtype,
-                tree.particle_id_dtype)
+        with cl.CommandQueue(self.cl_context) as queue:
+            target_side_prefs = (self
+                .target_side_preferences()[center_info.ncenters:].get(queue=queue))
+
+        target_discrs_and_qbx_sides = [(
+                PointsTarget(tgt_info.targets[:, center_info.ncenters:]),
+                target_side_prefs.astype(np.int32))]
+
+        # FIXME: try block...
+        tgt_assoc_result = tgt_assoc(self.lpot_source,
+                                     target_discrs_and_qbx_sides,
+                                     stick_out_factor=self.target_stick_out_factor)
+
+        tree = self.tree()
 
-        logger.info("find center for each target: start")
         with cl.CommandQueue(self.cl_context) as queue:
             result = cl.array.empty(queue, tgt_info.ntargets, tree.particle_id_dtype)
             result[:center_info.ncenters].fill(target_state.NO_QBX_NEEDED)
+            result[center_info.ncenters:] = tgt_assoc_result.target_to_center
 
-            centers_for_target_finder(
-                    *((
-                        tree.aligned_nboxes,
-                        tree.box_child_ids,
-                        tgt_info.ntargets,
-                        tgt_info.targets,
-                        self.target_side_preferences(),
-                        tree.root_extent,
-                    ) + tuple(tree.bounding_box[0]) + (
-                        ltc.balls_near_box_starts,
-                        ltc.balls_near_box_lists,
-                    ) + tuple(center_info.centers) + (
-                        center_info.radii,
-                        center_info.sides,
-                        self.global_qbx_flags(),
-                        QBX_CENTER_MATCH_THRESHOLD**2,
-
-                        result
-                    )),
-                    **dict(
-                        queue=queue,
-                        range=slice(center_info.ncenters, tgt_info.ntargets)))
-
-            logger.info("find center for each target: done")
-
-            return result.with_queue(None)
+        return result
 
     @memoize_method
     def center_to_tree_targets(self):
@@ -1239,7 +909,7 @@ class QBXFMMGeometryData(object):
                     tree_ttc, filtered_tree_ttc, filtered_target_ids, count,
                     queue=queue, size=len(tree_ttc))
 
-            count = count.get()
+            count = np.asscalar(count.get())
 
             filtered_tree_ttc = filtered_tree_ttc[:count]
             filtered_target_ids = filtered_target_ids[:count].copy()
diff --git a/pytential/qbx/interactions.py b/pytential/qbx/interactions.py
index 956fee29c89210b4a4796a4bfc09c34abc5596cf..cc4d494e8f959bb55cbc39c09dc2b72693768d5f 100644
--- a/pytential/qbx/interactions.py
+++ b/pytential/qbx/interactions.py
@@ -242,18 +242,25 @@ class L2QBXL(E2EBase):
                     <> src_ibox = target_boxes[isrc_box] \
                         {id=read_src_ibox}
 
-                    <> tgt_center[idim] = qbx_centers[idim, icenter]
-                    <> src_center[idim] = centers[idim, src_ibox] {dup=idim}
-                    <> d[idim] = tgt_center[idim] - src_center[idim] {dup=idim}
+                    <> in_range = (target_base_ibox <= src_ibox
+                            and src_ibox < target_base_ibox + nboxes)
 
-                    """] + ["""
-                    <> src_coeff{i} = expansions[src_ibox, {i}] {{dep=read_src_ibox}}
-                    """.format(i=i) for i in range(ncoeff_src)] + [
-                    ] + self.get_translation_loopy_insns() + ["""
-                    qbx_expansions[icenter, {i}] = \
-                        qbx_expansions[icenter, {i}] + coeff{i} \
-                        {{id_prefix=write_expn}}
-                    """.format(i=i) for i in range(ncoeff_tgt)] + ["""
+                    if in_range
+                        <> tgt_center[idim] = qbx_centers[idim, icenter]
+                        <> src_center[idim] = centers[idim, src_ibox] {dup=idim}
+                        <> d[idim] = tgt_center[idim] - src_center[idim] {dup=idim}
+
+                        """] + ["""
+                        <> src_coeff{i} = \
+                                expansions[src_ibox - target_base_ibox, {i}] \
+                                {{dep=read_src_ibox}}
+                        """.format(i=i) for i in range(ncoeff_src)] + [
+                        ] + self.get_translation_loopy_insns() + ["""
+                        qbx_expansions[icenter, {i}] = \
+                            qbx_expansions[icenter, {i}] + coeff{i} \
+                            {{id_prefix=write_expn}}
+                        """.format(i=i) for i in range(ncoeff_tgt)] + ["""
+                    end
                 end
                 """],
                 [
@@ -262,9 +269,9 @@ class L2QBXL(E2EBase):
                     lp.GlobalArg("centers", None, shape="dim, naligned_boxes"),
                     lp.GlobalArg("qbx_centers", None, shape="dim, ncenters",
                         dim_tags="sep,c"),
-                    lp.ValueArg("naligned_boxes,nboxes", np.int32),
+                    lp.ValueArg("naligned_boxes,target_base_ibox,nboxes", np.int32),
                     lp.GlobalArg("expansions", None,
-                        shape=("nboxes", ncoeff_src)),
+                        shape=("nboxes", ncoeff_src), offset=lp.auto),
                     "..."
                 ] + gather_loopy_arguments([self.src_expansion, self.tgt_expansion]),
                 name=self.name,
@@ -312,7 +319,8 @@ class QBXL2P(E2PBase):
                             icenter_tgt_start<=icenter_tgt<icenter_tgt_end}",
                     "{[idim]: 0<=idim<dim}",
                     ],
-                ["""
+                self.get_kernel_scaling_assignment()
+                + ["""
                 for iglobal_center
                     <> src_icenter = global_qbx_centers[iglobal_center]
 
diff --git a/pytential/qbx/refinement.py b/pytential/qbx/refinement.py
new file mode 100644
index 0000000000000000000000000000000000000000..c942b45b64b9a7eb275eea8f965de189b7c3e491
--- /dev/null
+++ b/pytential/qbx/refinement.py
@@ -0,0 +1,775 @@
+# -*- coding: utf-8 -*-
+from __future__ import division, absolute_import, print_function
+
+__copyright__ = """
+Copyright (C) 2013 Andreas Kloeckner
+Copyright (C) 2016 Matt Wala
+"""
+
+__license__ = """
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+"""
+
+
+import loopy as lp
+import numpy as np
+import pyopencl as cl
+
+from pytools import memoize_method
+from pytential.qbx.utils import DiscrPlotterMixin
+from boxtree.area_query import AreaQueryElementwiseTemplate
+from pyopencl.elementwise import ElementwiseTemplate
+from boxtree.tools import InlineBinarySearch
+from pytential.qbx.utils import QBX_TREE_C_PREAMBLE, QBX_TREE_MAKO_DEFS
+
+unwrap_args = AreaQueryElementwiseTemplate.unwrap_args
+
+import logging
+logger = logging.getLogger(__name__)
+
+__doc__ = """
+Refinement
+^^^^^^^^^^
+
+.. autoclass:: QBXLayerPotentialSourceRefiner
+"""
+
+# {{{ kernels
+
+TUNNEL_QUERY_DISTANCE_FINDER_TEMPLATE = ElementwiseTemplate(
+    arguments=r"""//CL:mako//
+        /* input */
+        particle_id_t source_offset,
+        particle_id_t panel_offset,
+        int npanels,
+        particle_id_t *panel_to_source_starts,
+        particle_id_t *sorted_target_ids,
+        coord_t *panel_sizes,
+
+        /* output */
+        float *tunnel_query_dists,
+
+        /* input, dim-dependent size */
+        %for ax in AXIS_NAMES[:dimensions]:
+            coord_t *particles_${ax},
+        %endfor
+        """,
+    operation=QBX_TREE_C_PREAMBLE + QBX_TREE_MAKO_DEFS + r"""//CL:mako//
+        /* Find my panel. */
+        particle_id_t panel = bsearch(panel_to_source_starts, npanels + 1, i);
+
+        /* Compute dist(tunnel region, panel center) */
+
+        coord_vec_t center_of_mass;
+        ${load_particle("INDEX_FOR_PANEL_PARTICLE(panel)", "center_of_mass")}
+
+        coord_vec_t center;
+        ${load_particle("INDEX_FOR_SOURCE_PARTICLE(i)", "center")}
+
+        coord_t panel_size = panel_sizes[panel];
+
+        coord_t max_dist = 0;
+
+        %for ax in AXIS_NAMES[:dimensions]:
+        {
+            max_dist = fmax(max_dist,
+                distance(center_of_mass.${ax}, center.${ax} + panel_size / 2));
+            max_dist = fmax(max_dist,
+                distance(center_of_mass.${ax}, center.${ax} - panel_size / 2));
+        }
+        %endfor
+
+        // The atomic max operation supports only integer types.
+        // However, max_dist is of a floating point type.
+        // For comparison purposes we reinterpret the bits of max_dist
+        // as an integer. The comparison result is the same as for positive
+        // IEEE floating point numbers, so long as the float/int endianness
+        // matches (fingers crossed).
+        atomic_max(
+            (volatile __global int *)
+                &tunnel_query_dists[panel],
+            as_int((float) max_dist));
+        """,
+    name="find_tunnel_query_distance",
+    preamble=str(InlineBinarySearch("particle_id_t")))
+
+
+# Implements "Algorithm for triggering refinement based on Condition 1"
+#
+# Does not use r_max as we do not use Newton for checking center-panel closeness.
+CENTER_IS_CLOSEST_TO_ORIG_PANEL_REFINER = AreaQueryElementwiseTemplate(
+    extra_args=r"""
+        /* input */
+        particle_id_t *box_to_panel_starts,
+        particle_id_t *box_to_panel_lists,
+        particle_id_t *panel_to_source_starts,
+        particle_id_t *panel_to_center_starts,
+        particle_id_t source_offset,
+        particle_id_t center_offset,
+        particle_id_t *sorted_target_ids,
+        coord_t *panel_sizes,
+        int npanels,
+
+        /* output */
+        int *panel_refine_flags,
+        int *found_panel_to_refine,
+
+        /* input, dim-dependent length */
+        %for ax in AXIS_NAMES[:dimensions]:
+            coord_t *particles_${ax},
+        %endfor
+        """,
+    ball_center_and_radius_expr=QBX_TREE_C_PREAMBLE + QBX_TREE_MAKO_DEFS + r"""
+        particle_id_t my_panel = bsearch(panel_to_center_starts, npanels + 1, i);
+
+        ${load_particle("INDEX_FOR_CENTER_PARTICLE(i)", ball_center)}
+        ${ball_radius} = panel_sizes[my_panel] / 2;
+        """,
+    leaf_found_op=QBX_TREE_MAKO_DEFS + r"""
+        for (particle_id_t panel_idx = box_to_panel_starts[${leaf_box_id}];
+             panel_idx < box_to_panel_starts[${leaf_box_id} + 1];
+             ++panel_idx)
+        {
+            particle_id_t panel = box_to_panel_lists[panel_idx];
+
+            // Skip self.
+            if (my_panel == panel)
+            {
+                continue;
+            }
+
+            bool is_close = false;
+
+            for (particle_id_t source = panel_to_source_starts[panel];
+                 source < panel_to_source_starts[panel + 1];
+                 ++source)
+            {
+                coord_vec_t source_coords;
+
+                ${load_particle(
+                    "INDEX_FOR_SOURCE_PARTICLE(source)", "source_coords")}
+
+                is_close |= (
+                    distance(${ball_center}, source_coords)
+                    <= panel_sizes[my_panel] / 2);
+            }
+
+            if (is_close)
+            {
+                panel_refine_flags[my_panel] = 1;
+                *found_panel_to_refine = 1;
+                break;
+            }
+        }
+        """,
+    name="refine_center_closest_to_orig_panel",
+    preamble=str(InlineBinarySearch("particle_id_t")))
+
+
+# Implements "Algorithm for triggering refinement based on Condition 2"
+CENTER_IS_FAR_FROM_NONNEIGHBOR_PANEL_REFINER = AreaQueryElementwiseTemplate(
+    extra_args=r"""
+        /* input */
+        particle_id_t *box_to_panel_starts,
+        particle_id_t *box_to_panel_lists,
+        particle_id_t *panel_to_source_starts,
+        particle_id_t *panel_to_center_starts,
+        particle_id_t source_offset,
+        particle_id_t center_offset,
+        particle_id_t panel_offset,
+        particle_id_t *sorted_target_ids,
+        coord_t *panel_sizes,
+        particle_id_t *panel_adjacency_starts,
+        particle_id_t *panel_adjacency_lists,
+        int npanels,
+        coord_t *tunnel_query_dists,
+
+        /* output */
+        int *panel_refine_flags,
+        int *found_panel_to_refine,
+
+        /* input, dim-dependent length */
+        %for ax in AXIS_NAMES[:dimensions]:
+            coord_t *particles_${ax},
+        %endfor
+        """,
+    ball_center_and_radius_expr=QBX_TREE_C_PREAMBLE + QBX_TREE_MAKO_DEFS + r"""
+        particle_id_t my_panel = bsearch(panel_to_center_starts, npanels + 1, i);
+        coord_vec_t my_center_coords;
+
+        ${load_particle("INDEX_FOR_CENTER_PARTICLE(i)", "my_center_coords")}
+        ${load_particle("INDEX_FOR_PANEL_PARTICLE(my_panel)", ball_center)}
+        ${ball_radius} = tunnel_query_dists[my_panel];
+        """,
+    leaf_found_op=QBX_TREE_MAKO_DEFS + r"""
+        for (particle_id_t panel_idx = box_to_panel_starts[${leaf_box_id}];
+             panel_idx < box_to_panel_starts[${leaf_box_id} + 1];
+             ++panel_idx)
+        {
+            particle_id_t panel = box_to_panel_lists[panel_idx];
+
+            bool is_self_or_adjacent = (my_panel == panel);
+
+            for (particle_id_t adj_panel_idx = panel_adjacency_starts[my_panel];
+                 adj_panel_idx < panel_adjacency_starts[my_panel + 1];
+                 ++adj_panel_idx)
+            {
+                is_self_or_adjacent |= (
+                    panel_adjacency_lists[adj_panel_idx] == panel);
+            }
+
+            // Skip self and adjacent panels.
+            if (is_self_or_adjacent)
+            {
+                continue;
+            }
+
+            bool is_close = false;
+
+            for (particle_id_t source = panel_to_source_starts[panel];
+                 source < panel_to_source_starts[panel + 1];
+                 ++source)
+            {
+                coord_vec_t source_coords;
+
+                ${load_particle(
+                    "INDEX_FOR_SOURCE_PARTICLE(source)", "source_coords")}
+
+                is_close |= (
+                    distance(my_center_coords, source_coords)
+                    <= panel_sizes[panel] / 2);
+            }
+
+            if (is_close)
+            {
+                panel_refine_flags[my_panel] = 1;
+                *found_panel_to_refine = 1;
+                break;
+            }
+        }
+        """,
+    name="refine_center_far_from_nonneighbor_panels",
+    preamble=str(InlineBinarySearch("particle_id_t")))
+
+# }}}
+
+
+# {{{ lpot source refiner
+
+class QBXLayerPotentialSourceRefiner(DiscrPlotterMixin):
+    """
+    Driver for refining the QBX source grid. Follows [1]_.
+
+    .. [1] Rachh, Manas, Andreas Klöckner, and Michael O'Neil. "Fast
+       algorithms for Quadrature by Expansion I: Globally valid expansions."
+
+    .. automethod:: get_refine_flags
+    .. automethod:: __call__
+    """
+
+    def __init__(self, context):
+        self.cl_context = context
+        from pytential.qbx.utils import TreeWithQBXMetadataBuilder
+        self.tree_builder = TreeWithQBXMetadataBuilder(self.cl_context)
+        from boxtree.area_query import PeerListFinder
+        self.peer_list_finder = PeerListFinder(self.cl_context)
+
+    # {{{ kernels
+
+    @memoize_method
+    def get_tunnel_query_distance_finder(self, dimensions, coord_dtype,
+                                         particle_id_dtype):
+        from pyopencl.tools import dtype_to_ctype
+        from boxtree.tools import AXIS_NAMES
+        logger.info("refiner: building tunnel query distance finder kernel")
+
+        knl = TUNNEL_QUERY_DISTANCE_FINDER_TEMPLATE.build(
+                self.cl_context,
+                type_aliases=(
+                    ("particle_id_t", particle_id_dtype),
+                    ("coord_t", coord_dtype),
+                    ),
+                var_values=(
+                    ("dimensions", dimensions),
+                    ("AXIS_NAMES", AXIS_NAMES),
+                    ("coord_dtype", coord_dtype),
+                    ("dtype_to_ctype", dtype_to_ctype),
+                    ("vec_types", tuple(cl.array.vec.types.items())),
+                    ))
+
+        logger.info("refiner: done building tunnel query distance finder kernel")
+        return knl
+
+    @memoize_method
+    def get_center_is_closest_to_orig_panel_refiner(self, dimensions,
+                                                    coord_dtype, box_id_dtype,
+                                                    peer_list_idx_dtype,
+                                                    particle_id_dtype,
+                                                    max_levels):
+        return CENTER_IS_CLOSEST_TO_ORIG_PANEL_REFINER.generate(self.cl_context,
+                dimensions, coord_dtype, box_id_dtype, peer_list_idx_dtype,
+                max_levels,
+                extra_type_aliases=(("particle_id_t", particle_id_dtype),))
+
+    @memoize_method
+    def get_center_is_far_from_nonneighbor_panel_refiner(self, dimensions,
+                                                         coord_dtype,
+                                                         box_id_dtype,
+                                                         peer_list_idx_dtype,
+                                                         particle_id_dtype,
+                                                         max_levels):
+        return CENTER_IS_FAR_FROM_NONNEIGHBOR_PANEL_REFINER.generate(self.cl_context,
+                dimensions, coord_dtype, box_id_dtype, peer_list_idx_dtype,
+                max_levels,
+                extra_type_aliases=(("particle_id_t", particle_id_dtype),))
+
+    @memoize_method
+    def get_2_to_1_panel_ratio_refiner(self):
+        knl = lp.make_kernel([
+            "{[panel]: 0<=panel<npanels}",
+            "{[ineighbor]: neighbor_start<=ineighbor<neighbor_stop}"
+            ],
+            """
+            for panel
+                <> neighbor_start = panel_adjacency_starts[panel]
+                <> neighbor_stop = panel_adjacency_starts[panel + 1]
+                for ineighbor
+                    <> neighbor = panel_adjacency_lists[ineighbor]
+
+                    <> oversize = (refine_flags_prev[panel] == 0
+                           and (
+                               (panel_sizes[panel] > 2 * panel_sizes[neighbor]) or
+                               (panel_sizes[panel] > panel_sizes[neighbor] and
+                                   refine_flags_prev[neighbor] == 1)))
+
+                    if oversize
+                        refine_flags[panel] = 1
+                        refine_flags_updated = 1 {id=write_refine_flags_updated}
+                    end
+                end
+            end
+            """, [
+            lp.GlobalArg("panel_adjacency_lists", shape=None),
+            "..."
+            ],
+            options="return_dict",
+            silenced_warnings="write_race(write_refine_flags_updated)",
+            name="refine_2_to_1_adj_panel_size_ratio")
+        knl = lp.split_iname(knl, "panel", 128, inner_tag="l.0", outer_tag="g.0")
+        return knl
+
+    @memoize_method
+    def get_helmholtz_k_to_panel_ratio_refiner(self):
+        knl = lp.make_kernel(
+            "{[panel]: 0<=panel<npanels}",
+            """
+            for panel
+                <> oversize = panel_sizes[panel] * helmholtz_k > 5
+                if oversize
+                    refine_flags[panel] = 1
+                    refine_flags_updated = 1 {id=write_refine_flags_updated}
+                end
+            end
+            """,
+            options="return_dict",
+            silenced_warnings="write_race(write_refine_flags_updated)",
+            name="refine_helmholtz_k_to_panel_size_ratio")
+        knl = lp.split_iname(knl, "panel", 128, inner_tag="l.0", outer_tag="g.0")
+        return knl
+
+    # }}}
+
+    # {{{ refinement triggering
+
+    def refinement_check_center_is_closest_to_orig_panel(self, queue, tree,
+            lpot_source, peer_lists, refine_flags, debug, wait_for=None):
+        # Avoid generating too many kernels.
+        from pytools import div_ceil
+        max_levels = 10 * div_ceil(tree.nlevels, 10)
+
+        knl = self.get_center_is_closest_to_orig_panel_refiner(
+                tree.dimensions,
+                tree.coord_dtype, tree.box_id_dtype,
+                peer_lists.peer_list_starts.dtype,
+                tree.particle_id_dtype,
+                max_levels)
+
+        logger.info("refiner: checking center is closest to orig panel")
+
+        if debug:
+            npanels_to_refine_prev = cl.array.sum(refine_flags).get()
+
+        found_panel_to_refine = cl.array.zeros(queue, 1, np.int32)
+        found_panel_to_refine.finish()
+
+        evt = knl(
+            *unwrap_args(
+                tree, peer_lists,
+                tree.box_to_qbx_panel_starts,
+                tree.box_to_qbx_panel_lists,
+                tree.qbx_panel_to_source_starts,
+                tree.qbx_panel_to_center_starts,
+                tree.qbx_user_source_slice.start,
+                tree.qbx_user_center_slice.start,
+                tree.sorted_target_ids,
+                lpot_source.panel_sizes("npanels"),
+                tree.nqbxpanels,
+                refine_flags,
+                found_panel_to_refine,
+                *tree.sources),
+            range=slice(tree.nqbxcenters),
+            queue=queue)
+
+        cl.wait_for_events([evt])
+
+        if debug:
+            npanels_to_refine = cl.array.sum(refine_flags).get()
+            if npanels_to_refine > npanels_to_refine_prev:
+                logger.debug("refiner: found {} panel(s) to refine".format(
+                    npanels_to_refine - npanels_to_refine_prev))
+
+        logger.info("refiner: done checking center is closest to orig panel")
+
+        return found_panel_to_refine.get()[0] == 1
+
+    def refinement_check_center_is_far_from_nonneighbor_panels(self, queue,
+                tree, lpot_source, peer_lists, tq_dists, refine_flags, debug,
+                wait_for=None):
+        # Avoid generating too many kernels.
+        from pytools import div_ceil
+        max_levels = 10 * div_ceil(tree.nlevels, 10)
+
+        knl = self.get_center_is_far_from_nonneighbor_panel_refiner(
+                tree.dimensions,
+                tree.coord_dtype, tree.box_id_dtype,
+                peer_lists.peer_list_starts.dtype,
+                tree.particle_id_dtype,
+                max_levels)
+
+        logger.info("refiner: checking center is far from nonneighbor panels")
+
+        if debug:
+            npanels_to_refine_prev = cl.array.sum(refine_flags).get()
+
+        found_panel_to_refine = cl.array.zeros(queue, 1, np.int32)
+        found_panel_to_refine.finish()
+
+        adjacency = self.get_adjacency_on_device(queue, lpot_source)
+
+        evt = knl(
+            *unwrap_args(
+                tree, peer_lists,
+                tree.box_to_qbx_panel_starts,
+                tree.box_to_qbx_panel_lists,
+                tree.qbx_panel_to_source_starts,
+                tree.qbx_panel_to_center_starts,
+                tree.qbx_user_source_slice.start,
+                tree.qbx_user_center_slice.start,
+                tree.qbx_user_panel_slice.start,
+                tree.sorted_target_ids,
+                lpot_source.panel_sizes("npanels"),
+                adjacency.adjacency_starts,
+                adjacency.adjacency_lists,
+                tree.nqbxpanels,
+                tq_dists,
+                refine_flags,
+                found_panel_to_refine,
+                *tree.sources),
+            range=slice(tree.nqbxcenters),
+            queue=queue)
+
+        cl.wait_for_events([evt])
+
+        if debug:
+            npanels_to_refine = cl.array.sum(refine_flags).get()
+            if npanels_to_refine > npanels_to_refine_prev:
+                logger.debug("refiner: found {} panel(s) to refine".format(
+                    npanels_to_refine - npanels_to_refine_prev))
+
+        logger.info("refiner: done checking center is far from nonneighbor panels")
+
+        return found_panel_to_refine.get()[0] == 1
+
+    def refinement_check_helmholtz_k_to_panel_size_ratio(self, queue, lpot_source,
+                helmholtz_k, refine_flags, debug, wait_for=None):
+        knl = self.get_helmholtz_k_to_panel_ratio_refiner()
+
+        logger.info("refiner: checking helmholtz k to panel size ratio")
+
+        if debug:
+            npanels_to_refine_prev = cl.array.sum(refine_flags).get()
+
+        evt, out = knl(queue,
+                       panel_sizes=lpot_source.panel_sizes("npanels"),
+                       refine_flags=refine_flags,
+                       refine_flags_updated=np.array(0),
+                       helmholtz_k=np.array(helmholtz_k),
+                       wait_for=wait_for)
+
+        cl.wait_for_events([evt])
+
+        if debug:
+            npanels_to_refine = cl.array.sum(refine_flags).get()
+            if npanels_to_refine > npanels_to_refine_prev:
+                logger.debug("refiner: found {} panel(s) to refine".format(
+                    npanels_to_refine - npanels_to_refine_prev))
+
+        logger.info("refiner: done checking helmholtz k to panel size ratio")
+
+        return (out["refine_flags_updated"].get() == 1).all()
+
+    def refinement_check_2_to_1_panel_size_ratio(self, queue, lpot_source,
+                refine_flags, debug, wait_for=None):
+        knl = self.get_2_to_1_panel_ratio_refiner()
+        adjacency = self.get_adjacency_on_device(queue, lpot_source)
+
+        refine_flags_updated = False
+
+        logger.info("refiner: checking 2-to-1 panel size ratio")
+
+        if debug:
+            npanels_to_refine_prev = cl.array.sum(refine_flags).get()
+
+        # Iterative refinement until no more panels can be marked
+        while True:
+            evt, out = knl(queue,
+                           npanels=lpot_source.density_discr.mesh.nelements,
+                           panel_sizes=lpot_source.panel_sizes("npanels"),
+                           refine_flags=refine_flags,
+                           # It's safe to pass this here, as the resulting data
+                           # race won't affect the final result of the
+                           # computation.
+                           refine_flags_prev=refine_flags,
+                           refine_flags_updated=np.array(0),
+                           panel_adjacency_starts=adjacency.adjacency_starts,
+                           panel_adjacency_lists=adjacency.adjacency_lists,
+                           wait_for=wait_for)
+
+            cl.wait_for_events([evt])
+
+            if (out["refine_flags_updated"].get() == 1).all():
+                refine_flags_updated = True
+            else:
+                break
+
+        if debug:
+            npanels_to_refine = cl.array.sum(refine_flags).get()
+            if npanels_to_refine > npanels_to_refine_prev:
+                logger.debug("refiner: found {} panel(s) to refine".format(
+                    npanels_to_refine - npanels_to_refine_prev))
+
+        logger.info("refiner: done checking 2-to-1 panel size ratio")
+
+        return refine_flags_updated
+
+    # }}}
+
+    # {{{ other utilities
+
+    def get_tunnel_query_dists(self, queue, tree, lpot_source):
+        """
+        Compute radii for the tubular neighborhood around each panel center of mass.
+        """
+        nqbxpanels = lpot_source.density_discr.mesh.nelements
+        # atomic_max only works on float32
+        tq_dists = cl.array.zeros(queue, nqbxpanels, np.float32)
+        tq_dists.finish()
+
+        knl = self.get_tunnel_query_distance_finder(tree.dimensions,
+                tree.coord_dtype, tree.particle_id_dtype)
+
+        evt = knl(tree.qbx_user_source_slice.start,
+                  tree.qbx_user_panel_slice.start,
+                  nqbxpanels,
+                  tree.qbx_panel_to_source_starts,
+                  tree.sorted_target_ids,
+                  lpot_source.panel_sizes("npanels"),
+                  tq_dists,
+                  *tree.sources,
+                  queue=queue,
+                  range=slice(tree.nqbxsources))
+
+        cl.wait_for_events([evt])
+
+        if tree.coord_dtype != tq_dists.dtype:
+            tq_dists = tq_dists.astype(tree.coord_dtype)
+
+        return tq_dists, evt
+
+    def get_adjacency_on_device(self, queue, lpot_source):
+        """
+        Take adjacency information from the mesh and place it onto the device.
+        """
+        from boxtree.tools import DeviceDataRecord
+        adjacency = lpot_source.density_discr.mesh.nodal_adjacency
+        adjacency_starts = cl.array.to_device(queue, adjacency.neighbors_starts)
+        adjacency_lists = cl.array.to_device(queue, adjacency.neighbors)
+
+        return DeviceDataRecord(
+            adjacency_starts=adjacency_starts,
+            adjacency_lists=adjacency_lists)
+
+    def get_refine_flags(self, queue, lpot_source):
+        """
+        Return an array on the device suitable for use as element refine flags.
+
+        :arg queue: An instance of :class:`pyopencl.CommandQueue`.
+        :arg lpot_source: An instance of :class:`QBXLayerPotentialSource`.
+
+        :returns: An instance of :class:`pyopencl.array.Array` suitable for
+            use as refine flags, initialized to zero.
+        """
+        result = cl.array.zeros(
+            queue, lpot_source.density_discr.mesh.nelements, np.int32)
+        return result, result.events[0]
+
+    # }}}
+
+    def refine(self, queue, lpot_source, refine_flags, refiner, factory, debug):
+        """
+        Refine the underlying mesh and discretization.
+        """
+        if isinstance(refine_flags, cl.array.Array):
+            refine_flags = refine_flags.get(queue)
+        refine_flags = refine_flags.astype(np.bool)
+
+        logger.info("refiner: calling meshmode")
+
+        refiner.refine(refine_flags)
+        from meshmode.discretization.connection import make_refinement_connection
+
+        conn = make_refinement_connection(
+                refiner, lpot_source.density_discr,
+                factory)
+
+        logger.info("refiner: done calling meshmode")
+
+        new_density_discr = conn.to_discr
+
+        new_lpot_source = lpot_source.copy(
+                density_discr=new_density_discr,
+                refined_for_global_qbx=True)
+
+        return new_lpot_source, conn
+
+    def __call__(self, lpot_source, discr_factory, helmholtz_k=None,
+                 # FIXME: Set debug=False once everything works.
+                 refine_flags=None, debug=True, maxiter=50):
+        """
+        Entry point for calling the refiner.
+
+        :arg lpot_source: An instance of :class:`QBXLayerPotentialSource`.
+
+        :arg group_factory: An instance of
+            :class:`meshmode.mesh.discretization.ElementGroupFactory`. Used for
+            discretizing the refined mesh.
+
+        :arg helmholtz_k: The Helmholtz parameter, or `None` if not applicable.
+
+        :arg refine_flags: A :class:`pyopencl.array.Array` indicating which
+            panels should get refined initially, or `None` if no initial
+            refinement should be done. Should have size equal to the number of
+            panels. See also :meth:`get_refine_flags()`.
+
+        :returns: A tuple ``(lpot_source, conns)`` where ``lpot_source`` is the
+            refined layer potential source, and ``conns`` is a list of
+            :class:`meshmode.discretization.connection.DiscretizationConnection`
+            objects going from the original mesh to the refined mesh.
+        """
+        from meshmode.mesh.refinement import Refiner
+        refiner = Refiner(lpot_source.density_discr.mesh)
+        connections = []
+
+        lpot_source = lpot_source.copy(
+            debug=debug,
+            refined_for_global_qbx=True)
+
+        with cl.CommandQueue(self.cl_context) as queue:
+            if refine_flags:
+                lpot_source, conn = self.refine(
+                            queue, lpot_source, refine_flags, refiner, discr_factory,
+                            debug)
+                connections.append(conn)
+
+            done_refining = False
+            niter = 0
+
+            while not done_refining:
+                niter += 1
+
+                if niter > maxiter:
+                    logger.warning(
+                        "Max iteration count reached in QBX layer potential source"
+                        " refiner.")
+                    break
+
+                # Build tree and auxiliary data.
+                # FIXME: The tree should not have to be rebuilt at each iteration.
+                tree = self.tree_builder(queue, lpot_source)
+                wait_for = []
+
+                peer_lists, evt = self.peer_list_finder(queue, tree, wait_for)
+                wait_for = [evt]
+
+                refine_flags, evt = self.get_refine_flags(queue, lpot_source)
+                wait_for.append(evt)
+
+                tq_dists, evt = self.get_tunnel_query_dists(queue, tree, lpot_source)
+                wait_for.append(evt)
+
+                # Run refinement checkers.
+                must_refine = False
+
+                must_refine |= \
+                        self.refinement_check_center_is_closest_to_orig_panel(
+                            queue, tree, lpot_source, peer_lists, refine_flags,
+                            debug, wait_for)
+
+                must_refine |= \
+                        self.refinement_check_center_is_far_from_nonneighbor_panels(
+                            queue, tree, lpot_source, peer_lists, tq_dists,
+                            refine_flags, debug, wait_for)
+
+                must_refine |= \
+                        self.refinement_check_2_to_1_panel_size_ratio(
+                            queue, lpot_source, refine_flags, debug, wait_for)
+
+                if helmholtz_k:
+                    must_refine |= \
+                            self.refinement_check_helmholtz_k_to_panel_size_ratio(
+                                queue, lpot_source, helmholtz_k, refine_flags, debug,
+                                wait_for)
+
+                if must_refine:
+                    lpot_source, conn = self.refine(
+                            queue, lpot_source, refine_flags, refiner, discr_factory,
+                            debug)
+                    connections.append(conn)
+
+                del tree
+                del peer_lists
+                del tq_dists
+                del refine_flags
+                done_refining = not must_refine
+
+        return lpot_source, connections
+
+# }}}
+
+# vim: foldmethod=marker:filetype=pyopencl
diff --git a/pytential/qbx/target_assoc.py b/pytential/qbx/target_assoc.py
new file mode 100644
index 0000000000000000000000000000000000000000..8890d992106e09509ceee424d73f8fa13ec9fa4f
--- /dev/null
+++ b/pytential/qbx/target_assoc.py
@@ -0,0 +1,709 @@
+# -*- coding: utf-8 -*-
+from __future__ import division, absolute_import, print_function
+
+__copyright__ = """
+Copyright (C) 2016 Matt Wala
+"""
+
+__license__ = """
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+"""
+
+
+import numpy as np
+from pytools import memoize_method
+
+import pyopencl as cl
+import pyopencl.array # noqa
+
+from boxtree.tools import DeviceDataRecord
+from boxtree.area_query import AreaQueryElementwiseTemplate
+from boxtree.tools import InlineBinarySearch
+from pytential.qbx.utils import (
+    QBX_TREE_C_PREAMBLE, QBX_TREE_MAKO_DEFS, DiscrPlotterMixin)
+
+
+unwrap_args = AreaQueryElementwiseTemplate.unwrap_args
+
+
+import logging
+logger = logging.getLogger(__name__)
+
+#
+# TODO:
+# - Documentation
+#
+#==================
+# HOW DOES TARGET ASSOCIATION WORK?
+#
+# The goal of the target association is to:
+#   a) decide which targets require QBX, and
+#   b) decide which centers to use for targets that require QBX, and
+#   c) if no good centers are available for a target that requires QBX,
+#      flag the appropriate panels for refinement.
+#
+# The flow chart of what happens to target t is shown below. Pass names are in
+# parentheses:
+#
+# START HERE
+# |
+# v
+# +-----------------------+ No +-----------------------------------+
+# |(QBXTargetMarker)      |--->|Mark t as not requiring QBX.       |
+# |Is t close to a source?|    +-----------------------------------+
+# +-----------------------+
+# | Yes
+# v
+# +-----------------------+ No +-----------------------------------+
+# |(QBXCenterFinder)      |--->|(QBXFailedTargetAssociationRefiner)|
+# |Is there a valid center|    |Mark panels close to t for         |
+# |close to t?            |    |refinement.                        |
+# +-----------------------+    +-----------------------------------+
+# | Yes
+# v
+# +-----------------------+
+# |Associate t with the   |
+# |best available center. |
+# +-----------------------+
+
+
+# {{{ kernels
+
+TARGET_ASSOC_DEFINES = r"""
+enum TargetStatus
+{
+    UNMARKED,
+    MARKED_QBX_CENTER_PENDING,
+    MARKED_QBX_CENTER_FOUND
+};
+
+enum TargetFlag
+{
+    INTERIOR_OR_EXTERIOR_VOLUME_TARGET = 0,
+    INTERIOR_SURFACE_TARGET = -1,
+    EXTERIOR_SURFACE_TARGET = +1,
+    INTERIOR_VOLUME_TARGET  = -2,
+    EXTERIOR_VOLUME_TARGET  = +2
+};
+"""
+
+
+class target_status_enum(object):  # noqa
+    # NOTE: Must match "enum TargetStatus" above
+    UNMARKED = 0
+    MARKED_QBX_CENTER_PENDING = 1
+    MARKED_QBX_CENTER_FOUND = 2
+
+
+class target_flag_enum(object):  # noqa
+    # NOTE: Must match "enum TargetFlag" above
+    INTERIOR_OR_EXTERIOR_VOLUME_TARGET = 0
+    INTERIOR_SURFACE_TARGET = -1
+    EXTERIOR_SURFACE_TARGET = +1
+    INTERIOR_VOLUME_TARGET = -2
+    EXTERIOR_VOLUME_TARGET = +2
+
+
+QBX_TARGET_MARKER = AreaQueryElementwiseTemplate(
+    extra_args=r"""
+        /* input */
+        particle_id_t *box_to_source_starts,
+        particle_id_t *box_to_source_lists,
+        particle_id_t source_offset,
+        particle_id_t target_offset,
+        particle_id_t *sorted_target_ids,
+        coord_t *panel_sizes,
+        coord_t *box_to_search_dist,
+
+        /* output */
+        int *target_status,
+        int *found_target_close_to_panel,
+
+        /* input, dim-dependent size */
+        %for ax in AXIS_NAMES[:dimensions]:
+            coord_t *particles_${ax},
+        %endfor
+    """,
+    ball_center_and_radius_expr=QBX_TREE_C_PREAMBLE + QBX_TREE_MAKO_DEFS + r"""
+        coord_vec_t tgt_coords;
+        ${load_particle("INDEX_FOR_TARGET_PARTICLE(i)", "tgt_coords")}
+        {
+            ${find_guiding_box("tgt_coords", 0, "my_box")}
+            ${load_center("ball_center", "my_box", declare=False)}
+            ${ball_radius} = box_to_search_dist[my_box];
+        }
+    """,
+    leaf_found_op=QBX_TREE_MAKO_DEFS + r"""
+        for (particle_id_t source_idx = box_to_source_starts[${leaf_box_id}];
+             source_idx < box_to_source_starts[${leaf_box_id} + 1];
+             ++source_idx)
+        {
+            particle_id_t source = box_to_source_lists[source_idx];
+            coord_vec_t source_coords;
+            ${load_particle("INDEX_FOR_SOURCE_PARTICLE(source)", "source_coords")}
+
+            if (distance(source_coords, tgt_coords) <= panel_sizes[source] / 2)
+            {
+                target_status[i] = MARKED_QBX_CENTER_PENDING;
+                *found_target_close_to_panel = 1;
+            }
+        }
+    """,
+    name="mark_targets",
+    preamble=TARGET_ASSOC_DEFINES)
+
+
+QBX_CENTER_FINDER = AreaQueryElementwiseTemplate(
+    extra_args=r"""
+        /* input */
+        particle_id_t *box_to_center_starts,
+        particle_id_t *box_to_center_lists,
+        particle_id_t center_offset,
+        particle_id_t target_offset,
+        particle_id_t *sorted_target_ids,
+        coord_t *panel_sizes,
+        coord_t *box_to_search_dist,
+        coord_t stick_out_factor,
+        int *target_flags,
+
+        /* input/output */
+        int *target_status,
+
+        /* output */
+        int *target_to_center,
+        coord_t *min_dist_to_center,
+
+        /* input, dim-dependent size */
+        %for ax in AXIS_NAMES[:dimensions]:
+            coord_t *particles_${ax},
+        %endfor
+    """,
+    ball_center_and_radius_expr=QBX_TREE_C_PREAMBLE + QBX_TREE_MAKO_DEFS + r"""
+        coord_vec_t tgt_coords;
+        ${load_particle("INDEX_FOR_TARGET_PARTICLE(i)", "tgt_coords")}
+        {
+            ${find_guiding_box("tgt_coords", 0, "my_box")}
+            ${load_center("ball_center", "my_box", declare=False)}
+            ${ball_radius} = box_to_search_dist[my_box];
+        }
+    """,
+    leaf_found_op=QBX_TREE_MAKO_DEFS + r"""
+        for (particle_id_t center_idx = box_to_center_starts[${leaf_box_id}];
+             center_idx < box_to_center_starts[${leaf_box_id} + 1];
+             ++center_idx)
+        {
+            particle_id_t center = box_to_center_lists[center_idx];
+            int center_side = SIDE_FOR_CENTER_PARTICLE(center);
+
+            // Sign of side should match requested target sign.
+            if (center_side * target_flags[i] < 0)
+            {
+                continue;
+            }
+
+            coord_vec_t center_coords;
+            ${load_particle("INDEX_FOR_CENTER_PARTICLE(center)", "center_coords")}
+            coord_t my_dist_to_center = distance(tgt_coords, center_coords);
+
+            if (my_dist_to_center
+                    <= (panel_sizes[center] / 2) * (1 + stick_out_factor)
+                && my_dist_to_center < min_dist_to_center[i])
+            {
+                target_status[i] = MARKED_QBX_CENTER_FOUND;
+                min_dist_to_center[i] = my_dist_to_center;
+                target_to_center[i] = center;
+            }
+        }
+    """,
+    name="find_centers",
+    preamble=TARGET_ASSOC_DEFINES)
+
+
+QBX_FAILED_TARGET_ASSOCIATION_REFINER = AreaQueryElementwiseTemplate(
+    extra_args=r"""
+        /* input */
+        particle_id_t *box_to_source_starts,
+        particle_id_t *box_to_source_lists,
+        particle_id_t *panel_to_source_starts,
+        particle_id_t source_offset,
+        particle_id_t target_offset,
+        int npanels,
+        particle_id_t *sorted_target_ids,
+        coord_t *panel_sizes,
+        int *target_status,
+        coord_t *box_to_search_dist,
+
+        /* output */
+        int *refine_flags,
+        int *found_panel_to_refine,
+
+        /* input, dim-dependent size */
+        %for ax in AXIS_NAMES[:dimensions]:
+            coord_t *particles_${ax},
+        %endfor
+    """,
+    ball_center_and_radius_expr=QBX_TREE_C_PREAMBLE + QBX_TREE_MAKO_DEFS + r"""
+        coord_vec_t target_coords;
+        ${load_particle("INDEX_FOR_TARGET_PARTICLE(i)", "target_coords")}
+        {
+            ${find_guiding_box("target_coords", 0, "my_box")}
+            ${load_center("ball_center", "my_box", declare=False)}
+            ${ball_radius} = box_to_search_dist[my_box];
+        }
+    """,
+    leaf_found_op=QBX_TREE_MAKO_DEFS + r"""
+        for (particle_id_t source_idx = box_to_source_starts[${leaf_box_id}];
+             source_idx < box_to_source_starts[${leaf_box_id} + 1];
+             ++source_idx)
+        {
+            particle_id_t source = box_to_source_lists[source_idx];
+
+            coord_vec_t source_coords;
+            ${load_particle("INDEX_FOR_SOURCE_PARTICLE(source)", "source_coords")}
+
+            bool is_close =
+                distance(target_coords, source_coords)
+                <= panel_sizes[source] / 2;
+
+            if (is_close && target_status[i] == MARKED_QBX_CENTER_PENDING)
+            {
+                particle_id_t panel = bsearch(
+                    panel_to_source_starts, npanels + 1, source);
+                refine_flags[panel] = 1;
+                *found_panel_to_refine = 1;
+            }
+        }
+    """,
+    name="refine_panels",
+    preamble=TARGET_ASSOC_DEFINES + str(InlineBinarySearch("particle_id_t")))
+
+# }}}
+
+
+# {{{ target associator
+
+class QBXTargetAssociationFailedException(Exception):
+    """
+    .. attribute:: refine_flags
+    .. attribute:: failed_target_flags
+    """
+    def __init__(self, refine_flags, failed_target_flags):
+        self.refine_flags = refine_flags
+        self.failed_target_flags = failed_target_flags
+
+    def __repr__(self):
+        return "<%s>" % type(self).__name__
+
+
+class QBXTargetAssociation(DeviceDataRecord):
+    """
+    .. attribute:: target_to_center
+    """
+    pass
+
+
+class QBXTargetAssociator(DiscrPlotterMixin):
+
+    def __init__(self, cl_context):
+        from pytential.qbx.utils import TreeWithQBXMetadataBuilder
+        self.tree_builder = TreeWithQBXMetadataBuilder(cl_context)
+        self.cl_context = cl_context
+        from boxtree.area_query import PeerListFinder, SpaceInvaderQueryBuilder
+        self.peer_list_finder = PeerListFinder(cl_context)
+        self.space_invader_query = SpaceInvaderQueryBuilder(cl_context)
+
+    # {{{ kernel generation
+
+    @memoize_method
+    def get_qbx_target_marker(self,
+                              dimensions,
+                              coord_dtype,
+                              box_id_dtype,
+                              peer_list_idx_dtype,
+                              particle_id_dtype,
+                              max_levels):
+        return QBX_TARGET_MARKER.generate(
+            self.cl_context,
+            dimensions,
+            coord_dtype,
+            box_id_dtype,
+            peer_list_idx_dtype,
+            max_levels,
+            extra_type_aliases=(("particle_id_t", particle_id_dtype),))
+
+    @memoize_method
+    def get_qbx_center_finder(self,
+                              dimensions,
+                              coord_dtype,
+                              box_id_dtype,
+                              peer_list_idx_dtype,
+                              particle_id_dtype,
+                              max_levels):
+        return QBX_CENTER_FINDER.generate(
+            self.cl_context,
+            dimensions,
+            coord_dtype,
+            box_id_dtype,
+            peer_list_idx_dtype,
+            max_levels,
+            extra_type_aliases=(("particle_id_t", particle_id_dtype),))
+
+    @memoize_method
+    def get_qbx_failed_target_association_refiner(self, dimensions, coord_dtype,
+                                                 box_id_dtype, peer_list_idx_dtype,
+                                                 particle_id_dtype, max_levels):
+        return QBX_FAILED_TARGET_ASSOCIATION_REFINER.generate(
+            self.cl_context,
+            dimensions,
+            coord_dtype,
+            box_id_dtype,
+            peer_list_idx_dtype,
+            max_levels,
+            extra_type_aliases=(("particle_id_t", particle_id_dtype),))
+
+    # }}}
+
+    def mark_targets(self, queue, tree, peer_lists, lpot_source, target_status,
+                     debug, wait_for=None):
+        # Avoid generating too many kernels.
+        from pytools import div_ceil
+        max_levels = 10 * div_ceil(tree.nlevels, 10)
+
+        knl = self.get_qbx_target_marker(
+                tree.dimensions,
+                tree.coord_dtype, tree.box_id_dtype,
+                peer_lists.peer_list_starts.dtype,
+                tree.particle_id_dtype,
+                max_levels)
+
+        found_target_close_to_panel = cl.array.zeros(queue, 1, np.int32)
+        found_target_close_to_panel.finish()
+
+        # Perform a space invader query over the sources.
+        source_slice = tree.user_source_ids[tree.qbx_user_source_slice]
+        sources = [axis.with_queue(queue)[source_slice] for axis in tree.sources]
+        panel_sizes = lpot_source.panel_sizes("nsources").with_queue(queue)
+
+        box_to_search_dist, evt = self.space_invader_query(
+                queue,
+                tree,
+                sources,
+                panel_sizes / 2,
+                peer_lists,
+                wait_for=wait_for)
+        wait_for = [evt]
+
+        logger.info("target association: marking targets close to panels")
+
+        evt = knl(
+            *unwrap_args(
+                tree, peer_lists,
+                tree.box_to_qbx_source_starts,
+                tree.box_to_qbx_source_lists,
+                tree.qbx_user_source_slice.start,
+                tree.qbx_user_target_slice.start,
+                tree.sorted_target_ids,
+                panel_sizes,
+                box_to_search_dist,
+                target_status,
+                found_target_close_to_panel,
+                *tree.sources),
+            range=slice(tree.nqbxtargets),
+            queue=queue,
+            wait_for=wait_for)
+
+        if debug:
+            target_status.finish()
+            # Marked target = 1, 0 otherwise
+            marked_target_count = cl.array.sum(target_status).get()
+            logger.debug("target association: {}/{} targets marked close to panels"
+                         .format(marked_target_count, tree.nqbxtargets))
+
+        cl.wait_for_events([evt])
+
+        logger.info("target association: done marking targets close to panels")
+
+        return (found_target_close_to_panel == 1).all().get()
+
+    def try_find_centers(self, queue, tree, peer_lists, lpot_source,
+                         target_status, target_flags, target_assoc,
+                         stick_out_factor, debug, wait_for=None):
+        # Avoid generating too many kernels.
+        from pytools import div_ceil
+        max_levels = 10 * div_ceil(tree.nlevels, 10)
+
+        knl = self.get_qbx_center_finder(
+                tree.dimensions,
+                tree.coord_dtype, tree.box_id_dtype,
+                peer_lists.peer_list_starts.dtype,
+                tree.particle_id_dtype,
+                max_levels)
+
+        if debug:
+            target_status.finish()
+            marked_target_count = int(cl.array.sum(target_status).get())
+
+        # Perform a space invader query over the centers.
+        center_slice = \
+                tree.sorted_target_ids[tree.qbx_user_center_slice].with_queue(queue)
+        centers = [axis.with_queue(queue)[center_slice] for axis in tree.sources]
+        panel_sizes = lpot_source.panel_sizes("ncenters").with_queue(queue)
+
+        box_to_search_dist, evt = self.space_invader_query(
+                queue,
+                tree,
+                centers,
+                panel_sizes * ((1 + stick_out_factor) / 2),
+                peer_lists,
+                wait_for=wait_for)
+        wait_for = [evt]
+
+        min_dist_to_center = cl.array.empty(
+                queue, tree.nqbxtargets, tree.coord_dtype)
+        min_dist_to_center.fill(np.inf)
+
+        wait_for.extend(min_dist_to_center.events)
+
+        logger.info("target association: finding centers for targets")
+
+        evt = knl(
+            *unwrap_args(
+                tree, peer_lists,
+                tree.box_to_qbx_center_starts,
+                tree.box_to_qbx_center_lists,
+                tree.qbx_user_center_slice.start,
+                tree.qbx_user_target_slice.start,
+                tree.sorted_target_ids,
+                panel_sizes,
+                box_to_search_dist,
+                stick_out_factor,
+                target_flags,
+                target_status,
+                target_assoc.target_to_center,
+                min_dist_to_center,
+                *tree.sources),
+            range=slice(tree.nqbxtargets),
+            queue=queue,
+            wait_for=wait_for)
+
+        if debug:
+            target_status.finish()
+            # Associated target = 2, marked target = 1
+            ntargets_associated = (
+                int(cl.array.sum(target_status).get()) - marked_target_count)
+            assert ntargets_associated >= 0
+            logger.debug("target association: {} targets were assigned centers"
+                         .format(ntargets_associated))
+
+        cl.wait_for_events([evt])
+        logger.info("target association: done finding centers for targets")
+        return
+
+    def mark_panels_for_refinement(self, queue, tree, peer_lists, lpot_source,
+                                   target_status, refine_flags, debug,
+                                   wait_for=None):
+        # Avoid generating too many kernels.
+        from pytools import div_ceil
+        max_levels = 10 * div_ceil(tree.nlevels, 10)
+
+        knl = self.get_qbx_failed_target_association_refiner(
+                tree.dimensions,
+                tree.coord_dtype, tree.box_id_dtype,
+                peer_lists.peer_list_starts.dtype,
+                tree.particle_id_dtype,
+                max_levels)
+
+        found_panel_to_refine = cl.array.zeros(queue, 1, np.int32)
+        found_panel_to_refine.finish()
+
+        # Perform a space invader query over the sources.
+        source_slice = tree.user_source_ids[tree.qbx_user_source_slice]
+        sources = [axis.with_queue(queue)[source_slice] for axis in tree.sources]
+        panel_sizes = lpot_source.panel_sizes("nsources").with_queue(queue)
+
+        box_to_search_dist, evt = self.space_invader_query(
+                queue,
+                tree,
+                sources,
+                panel_sizes / 2,
+                peer_lists,
+                wait_for=wait_for)
+        wait_for = [evt]
+
+        logger.info("target association: marking panels for refinement")
+
+        evt = knl(
+            *unwrap_args(
+                tree, peer_lists,
+                tree.box_to_qbx_source_starts,
+                tree.box_to_qbx_source_lists,
+                tree.qbx_panel_to_source_starts,
+                tree.qbx_user_source_slice.start,
+                tree.qbx_user_target_slice.start,
+                tree.nqbxpanels,
+                tree.sorted_target_ids,
+                lpot_source.panel_sizes("nsources"),
+                target_status,
+                box_to_search_dist,
+                refine_flags,
+                found_panel_to_refine,
+                *tree.sources),
+            range=slice(tree.nqbxtargets),
+            queue=queue,
+            wait_for=wait_for)
+
+        if debug:
+            refine_flags.finish()
+            # Marked panel = 1, 0 otherwise
+            marked_panel_count = cl.array.sum(refine_flags).get()
+            logger.debug("target association: {} panels flagged for refinement"
+                         .format(marked_panel_count))
+
+        cl.wait_for_events([evt])
+
+        logger.info("target association: done marking panels for refinement")
+
+        return (found_panel_to_refine == 1).all().get()
+
+    def make_target_flags(self, queue, target_discrs_and_qbx_sides):
+        ntargets = sum(discr.nnodes for discr, _ in target_discrs_and_qbx_sides)
+        target_flags = cl.array.empty(queue, ntargets, dtype=np.int32)
+        offset = 0
+
+        for discr, flags in target_discrs_and_qbx_sides:
+            if np.isscalar(flags):
+                target_flags[offset:offset + discr.nnodes].fill(flags)
+            else:
+                assert len(flags) == discr.nnodes
+                target_flags[offset:offset + discr.nnodes] = flags
+            offset += discr.nnodes
+
+        target_flags.finish()
+        return target_flags
+
+    def make_default_target_association(self, queue, ntargets):
+        target_to_center = cl.array.empty(queue, ntargets, dtype=np.int32)
+        target_to_center.fill(-1)
+        target_to_center.finish()
+
+        return QBXTargetAssociation(target_to_center=target_to_center)
+
+    def __call__(self, lpot_source, target_discrs_and_qbx_sides,
+                 stick_out_factor=1e-10, debug=True, wait_for=None):
+        """
+        Entry point for calling the target associator.
+
+        :arg lpot_source: An instance of :class:`NewQBXLayerPotentialSource`
+
+        :arg target_discrs_and_qbx_sides:
+
+            a list of tuples ``(discr, sides)``, where
+            *discr* is a
+            :class:`pytential.discretization.Discretization`
+            or a
+            :class:`pytential.discretization.target.TargetBase` instance, and
+            *sides* is either a :class:`int` or
+            an array of (:class:`numpy.int8`) side requests for each
+            target.
+
+            The side request can take the following values for each target:
+
+            ===== ==============================================
+            Value Meaning
+            ===== ==============================================
+            0     Volume target. If near a QBX center,
+                  the value from the QBX expansion is returned,
+                  otherwise the volume potential is returned.
+
+            -1    Surface target. Return interior limit from
+                  interior-side QBX expansion.
+
+            +1    Surface target. Return exterior limit from
+                  exterior-side QBX expansion.
+
+            -2    Volume target. If within an *interior* QBX disk,
+                  the value from the QBX expansion is returned,
+                  otherwise the volume potential is returned.
+
+            +2    Volume target. If within an *exterior* QBX disk,
+                  the value from the QBX expansion is returned,
+                  otherwise the volume potential is returned.
+            ===== ==============================================
+
+        :raises QBXTargetAssociationFailedException:
+            when target association failed to find a center for a target.
+            The returned exception object contains suggested refine flags.
+
+        :returns:
+        """
+
+        with cl.CommandQueue(self.cl_context) as queue:
+            tree = self.tree_builder(queue, lpot_source,
+                    [discr for discr, _ in target_discrs_and_qbx_sides])
+            peer_lists, evt = self.peer_list_finder(queue, tree, wait_for)
+            wait_for = [evt]
+
+            target_status = cl.array.zeros(queue, tree.nqbxtargets, dtype=np.int32)
+            target_status.finish()
+
+            have_close_targets = self.mark_targets(queue, tree, peer_lists,
+                                                   lpot_source, target_status,
+                                                   debug)
+
+            target_assoc = self.make_default_target_association(
+                queue, tree.nqbxtargets)
+
+            if not have_close_targets:
+                return target_assoc.with_queue(None)
+
+            target_flags = self.make_target_flags(queue, target_discrs_and_qbx_sides)
+
+            self.try_find_centers(queue, tree, peer_lists, lpot_source,
+                                  target_status, target_flags, target_assoc,
+                                  stick_out_factor, debug)
+
+            center_not_found = (
+                target_status == target_status_enum.MARKED_QBX_CENTER_PENDING)
+
+            if center_not_found.any().get():
+                surface_target = (
+                    (target_flags == target_flag_enum.INTERIOR_SURFACE_TARGET)
+                    | (target_flags == target_flag_enum.EXTERIOR_SURFACE_TARGET))
+
+                if (center_not_found & surface_target).any().get():
+                    logger.warning("An on-surface target was not "
+                            "assigned a center. As a remedy you can try increasing "
+                            "the \"stick_out_factor\" parameter, but this could "
+                            "also cause an invalid center assignment.")
+
+                refine_flags = cl.array.zeros(queue, tree.nqbxpanels, dtype=np.int32)
+                have_panel_to_refine = self.mark_panels_for_refinement(queue,
+                                                tree, peer_lists,
+                                                lpot_source, target_status,
+                                                refine_flags, debug)
+                assert have_panel_to_refine
+                raise QBXTargetAssociationFailedException(
+                        refine_flags=refine_flags.with_queue(None),
+                        failed_target_flags=center_not_found.with_queue(None))
+
+            return target_assoc.with_queue(None)
+
+# }}}
+
+# vim: foldmethod=marker:filetype=pyopencl
diff --git a/pytential/qbx/utils.py b/pytential/qbx/utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..60da4b5b2a4e914696ba0ea1083c0cc1a6f825d0
--- /dev/null
+++ b/pytential/qbx/utils.py
@@ -0,0 +1,321 @@
+# -*- coding: utf-8 -*-
+from __future__ import division, absolute_import, print_function
+
+__copyright__ = """
+Copyright (C) 2016 Matt Wala
+"""
+
+__license__ = """
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+"""
+
+
+import loopy as lp
+import numpy as np
+from boxtree.tree import Tree
+import pyopencl as cl
+import pyopencl.array # noqa
+from pytools import memoize
+
+import logging
+logger = logging.getLogger(__name__)
+
+
+# {{{ c and mako snippets
+
+QBX_TREE_C_PREAMBLE = r"""//CL:mako//
+// A note on node numberings: sources, centers, and panels each
+// have their own numbering starting at 0. These macros convert
+// the per-class numbering into the internal tree particle number.
+#define INDEX_FOR_CENTER_PARTICLE(i) (sorted_target_ids[center_offset + i])
+#define INDEX_FOR_PANEL_PARTICLE(i) (sorted_target_ids[panel_offset + i])
+#define INDEX_FOR_SOURCE_PARTICLE(i) (sorted_target_ids[source_offset + i])
+#define INDEX_FOR_TARGET_PARTICLE(i) (sorted_target_ids[target_offset + i])
+
+#define SOURCE_FOR_CENTER_PARTICLE(i) (i / 2)
+#define SIDE_FOR_CENTER_PARTICLE(i) (2 * (i % 2) - 1)
+
+## Convert to dict first, as this may be passed as a tuple-of-tuples.
+<% vec_types_dict = dict(vec_types) %>
+typedef ${dtype_to_ctype(vec_types_dict[coord_dtype, dimensions])} coord_vec_t;
+"""
+
+QBX_TREE_MAKO_DEFS = r"""//CL:mako//
+<%def name="load_particle(particle, coords)">
+    %for ax in AXIS_NAMES[:dimensions]:
+        ${coords}.${ax} = particles_${ax}[${particle}];
+    %endfor
+</%def>
+"""
+
+# }}}
+
+
+# {{{ interleaver kernel
+
+@memoize
+def get_interleaver_kernel(dtype):
+    # NOTE: Returned kernel needs dstlen or dst parameter
+    from pymbolic import var
+    knl = lp.make_kernel(
+        "[srclen,dstlen] -> {[i]: 0<=i<srclen}",
+        """
+        dst[2*i] = src1[i]
+        dst[2*i+1] = src2[i]
+        """, [
+            lp.GlobalArg("src1", shape=(var("srclen"),), dtype=dtype),
+            lp.GlobalArg("src2", shape=(var("srclen"),), dtype=dtype),
+            lp.GlobalArg("dst", shape=(var("dstlen"),), dtype=dtype),
+            "..."
+        ],
+        assumptions="2*srclen = dstlen")
+    knl = lp.split_iname(knl, "i", 128, inner_tag="l.0", outer_tag="g.0")
+    return knl
+
+# }}}
+
+
+# {{{ make interleaved centers
+
+def get_interleaved_centers(queue, lpot_source):
+    """
+    Return an array of shape (dim, ncenters) in which interior centers are placed
+    next to corresponding exterior centers.
+    """
+    knl = get_interleaver_kernel(lpot_source.density_discr.real_dtype)
+    int_centers = lpot_source.centers(-1)
+    ext_centers = lpot_source.centers(+1)
+
+    result = []
+    wait_for = []
+
+    for int_axis, ext_axis in zip(int_centers, ext_centers):
+        axis = cl.array.empty(queue, len(int_axis) * 2, int_axis.dtype)
+        evt, _ = knl(queue, src1=int_axis, src2=ext_axis, dst=axis)
+        result.append(axis)
+        wait_for.append(evt)
+
+    cl.wait_for_events(wait_for)
+
+    return result
+
+# }}}
+
+
+# {{{ discr plotter mixin
+
+class DiscrPlotterMixin(object):
+
+    def plot_discr(self, lpot_source, outfilename="discr.pdf"):
+        with cl.CommandQueue(self.cl_context) as queue:
+            tree = self.tree_builder(queue, lpot_source).get(queue=queue)
+            from boxtree.visualization import TreePlotter
+
+            import matplotlib
+            matplotlib.use('Agg')
+            import matplotlib.pyplot as plt
+            tp = TreePlotter(tree)
+            tp.draw_tree()
+            sources = (tree.sources[0], tree.sources[1])
+            sti = tree.sorted_target_ids
+            plt.plot(sources[0][sti[tree.qbx_user_source_slice]],
+                     sources[1][sti[tree.qbx_user_source_slice]],
+                     lw=0, marker=".", markersize=1, label="sources")
+            plt.plot(sources[0][sti[tree.qbx_user_center_slice]],
+                     sources[1][sti[tree.qbx_user_center_slice]],
+                     lw=0, marker=".", markersize=1, label="centers")
+            plt.plot(sources[0][sti[tree.qbx_user_target_slice]],
+                     sources[1][sti[tree.qbx_user_target_slice]],
+                     lw=0, marker=".", markersize=1, label="targets")
+            plt.axis("equal")
+            plt.legend()
+            plt.savefig(outfilename)
+
+# }}}
+
+
+# {{{ tree creation
+
+class TreeWithQBXMetadataBuilder(object):
+
+    MAX_REFINE_WEIGHT = 64
+
+    class TreeWithQBXMetadata(Tree):
+        """
+        .. attribute:: nqbxpanels
+        .. attribuet:: nqbxsources
+        .. attribute:: nqbxcenters
+        .. attribute:: nqbxtargets
+
+        .. attribute:: box_to_qbx_panel_starts
+        .. attribute:: box_to_qbx_panel_lists
+
+        .. attribute:: box_to_qbx_source_starts
+        .. attribute:: box_to_qbx_source_lists
+
+        .. attribute:: box_to_qbx_center_starts
+        .. attribute:: box_to_qbx_center_lists
+
+        .. attribute:: box_to_qbx_target_starts
+        .. attribute:: box_to_qbx_target_lists
+
+        .. attribute:: qbx_panel_to_source_starts
+        .. attribute:: qbx_panel_to_center_starts
+
+        .. attribute:: qbx_user_source_slice
+        .. attribute:: qbx_user_center_slice
+        .. attribute:: qbx_user_panel_slice
+        .. attribute:: qbx_user_target_slice
+        """
+        pass
+
+    def __init__(self, context):
+        self.context = context
+        from boxtree.tree_build import TreeBuilder
+        self.tree_builder = TreeBuilder(self.context)
+
+    def __call__(self, queue, lpot_source, targets_list=()):
+        """
+        Return a :class:`TreeWithQBXMetadata` built from the given layer
+        potential source.
+
+        :arg queue: An instance of :class:`pyopencl.CommandQueue`
+
+        :arg lpot_source: An instance of
+            :class:`pytential.qbx.NewQBXLayerPotentialSource`.
+
+        :arg targets_list: A list of :class:`pytential.target.TargetBase`
+        """
+        # The ordering of particles is as follows:
+        # - sources go first
+        # - then centers
+        # - then panels (=centers of mass)
+        # - then targets
+
+        logger.info("start building tree with qbx metadata")
+
+        sources = lpot_source.density_discr.nodes()
+        centers = get_interleaved_centers(queue, lpot_source)
+        centers_of_mass = lpot_source.panel_centers_of_mass()
+        targets = (tgt.nodes() for tgt in targets_list)
+
+        particles = tuple(
+                cl.array.concatenate(dim_coords, queue=queue)
+                for dim_coords in zip(sources, centers, centers_of_mass, *targets))
+
+        # Counts
+        nparticles = len(particles[0])
+        npanels = len(centers_of_mass[0])
+        nsources = len(sources[0])
+        ncenters = len(centers[0])
+        # Each source gets an interior / exterior center.
+        assert 2 * nsources == ncenters
+        ntargets = sum(tgt.nnodes for tgt in targets_list)
+
+        # Slices
+        qbx_user_source_slice = slice(0, nsources)
+        panel_slice_start = 3 * nsources
+        qbx_user_center_slice = slice(nsources, panel_slice_start)
+        target_slice_start = panel_slice_start + npanels
+        qbx_user_panel_slice = slice(panel_slice_start, panel_slice_start + npanels)
+        qbx_user_target_slice = slice(target_slice_start,
+                                      target_slice_start + ntargets)
+
+        # Build tree with sources, centers, and centers of mass. Split boxes
+        # only because of sources.
+        refine_weights = cl.array.zeros(queue, nparticles, np.int32)
+        refine_weights[:nsources].fill(1)
+
+        refine_weights.finish()
+
+        tree, evt = self.tree_builder(queue, particles,
+                                      max_leaf_refine_weight=self.MAX_REFINE_WEIGHT,
+                                      refine_weights=refine_weights)
+
+        # Compute box => particle class relations
+        flags = refine_weights
+        del refine_weights
+        particle_classes = {}
+
+        for class_name, particle_slice, fixup in (
+                ("box_to_qbx_source", qbx_user_source_slice, 0),
+                ("box_to_qbx_target", qbx_user_target_slice, -target_slice_start),
+                ("box_to_qbx_center", qbx_user_center_slice, -nsources),
+                ("box_to_qbx_panel", qbx_user_panel_slice, -panel_slice_start)):
+            flags.fill(0)
+            flags[particle_slice].fill(1)
+            flags.finish()
+
+            from boxtree.tree import filter_target_lists_in_user_order
+            box_to_class = (
+                filter_target_lists_in_user_order(queue, tree, flags)
+                .with_queue(queue))
+
+            if fixup:
+                box_to_class.target_lists += fixup
+            particle_classes[class_name + "_starts"] = box_to_class.target_starts
+            particle_classes[class_name + "_lists"] = box_to_class.target_lists
+
+        del flags
+        del box_to_class
+
+        # Compute panel => source relation
+        qbx_panel_to_source_starts = cl.array.empty(
+            queue, npanels + 1, dtype=tree.particle_id_dtype)
+        el_offset = 0
+        for group in lpot_source.density_discr.groups:
+            qbx_panel_to_source_starts[el_offset:el_offset + group.nelements] = \
+                    cl.array.arange(queue, group.node_nr_base,
+                                    group.node_nr_base + group.nnodes,
+                                    group.nunit_nodes,
+                                    dtype=tree.particle_id_dtype)
+            el_offset += group.nelements
+        qbx_panel_to_source_starts[-1] = nsources
+
+        # Compute panel => center relation
+        qbx_panel_to_center_starts = 2 * qbx_panel_to_source_starts
+
+        # Transfer all tree attributes.
+        tree_attrs = {}
+        for attr_name in tree.__class__.fields:
+            try:
+                tree_attrs[attr_name] = getattr(tree, attr_name)
+            except AttributeError:
+                pass
+
+        tree_attrs.update(particle_classes)
+
+        logger.info("done building tree with qbx metadata")
+
+        return self.TreeWithQBXMetadata(
+            qbx_panel_to_source_starts=qbx_panel_to_source_starts,
+            qbx_panel_to_center_starts=qbx_panel_to_center_starts,
+            qbx_user_source_slice=qbx_user_source_slice,
+            qbx_user_panel_slice=qbx_user_panel_slice,
+            qbx_user_center_slice=qbx_user_center_slice,
+            qbx_user_target_slice=qbx_user_target_slice,
+            nqbxpanels=npanels,
+            nqbxsources=nsources,
+            nqbxcenters=ncenters,
+            nqbxtargets=ntargets,
+            **tree_attrs).with_queue(None)
+
+# }}}
+
+# vim: foldmethod=marker:filetype=pyopencl
diff --git a/pytential/symbolic/matrix.py b/pytential/symbolic/matrix.py
index 6d204372126800cba3cf5db8a4bae29a8a87fb3c..9b4622fde5f4bf8cc0527b05b09c38433fdf4267 100644
--- a/pytential/symbolic/matrix.py
+++ b/pytential/symbolic/matrix.py
@@ -187,7 +187,7 @@ class MatrixBuilder(EvaluationMapperBase):
         _, (mat,) = mat_gen(self.queue,
                 target_discr.nodes(),
                 source.fine_density_discr.nodes(),
-                source.centers(target_discr, expr.qbx_forced_limit),
+                source.centers(expr.qbx_forced_limit),
                 **kernel_args)
 
         mat = mat.get()
diff --git a/pytential/target.py b/pytential/target.py
index a4f199f06f3206dc48fae5c10f2426d0f3fb1d80..e677bdb7a2f5a1494d87faef74800292bce7fdad 100644
--- a/pytential/target.py
+++ b/pytential/target.py
@@ -48,7 +48,7 @@ class TargetBase(object):
 
 class PointsTarget(TargetBase):
     """The point of this class is to act as a container for some target points
-    while presenting enough of the :class:`pytential.discretization.Discretization`
+    while presenting enough of the :class:`meshmode.discretization.Discretization`
     interface to not necessitate a lot of special cases in that code path.
     """
 
diff --git a/test/extra_curve_data.py b/test/extra_curve_data.py
new file mode 100644
index 0000000000000000000000000000000000000000..e6d667e41ade1e444b97647e58f03f4b6152922b
--- /dev/null
+++ b/test/extra_curve_data.py
@@ -0,0 +1,160 @@
+from __future__ import division, absolute_import, print_function
+
+__copyright__ = """Copyright (C) 2016 Matt Wala"""
+
+__license__ = """
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+"""
+
+import numpy as np
+import numpy.linalg as la
+
+
+class Curve(object):
+
+    def plot(self, npoints=50):
+        import matplotlib.pyplot as plt
+        x, y = self(np.linspace(0, 1, npoints))
+        plt.plot(x, y, marker=".", lw=0)
+        plt.axis("equal")
+        plt.show()
+
+    def __add__(self, other):
+        return CompositeCurve(self, other)
+
+
+class CompositeCurve(Curve):
+    """
+    Parametrization of two or more curves combined.
+    """
+
+    def __init__(self, *objs):
+        curves = []
+        for obj in objs:
+            if isinstance(obj, CompositeCurve):
+                curves.extend(obj.curves)
+            else:
+                curves.append(obj)
+        self.curves = curves
+
+    def __call__(self, ts):
+        ranges = np.linspace(0, 1, len(self.curves) + 1)
+        ts_argsort = np.argsort(ts)
+        ts_sorted = ts[ts_argsort]
+        ts_split_points = np.searchsorted(ts_sorted, ranges)
+        # Make sure the last entry = len(ts), otherwise if ts finishes with a
+        # trail of 1s, then they won't be forwarded to the last curve.
+        ts_split_points[-1] = len(ts)
+        result = []
+        subranges = [
+            slice(*pair) for pair in zip(ts_split_points, ts_split_points[1:])]
+        for curve, subrange, (start, end) in zip(
+                self.curves, subranges, zip(ranges, ranges[1:])):
+            ts_mapped = (ts_sorted[subrange] - start) / (end - start)
+            result.append(curve(ts_mapped))
+        final = np.concatenate(result, axis=-1)
+        assert len(final[0]) == len(ts)
+        return final
+
+
+class Segment(Curve):
+    """
+    Represents a line segment.
+    """
+
+    def __init__(self, start, end):
+        self.start = np.array(start)
+        self.end = np.array(end)
+
+    def __call__(self, ts):
+        return (
+            self.start[:, np.newaxis] +
+            ts * (self.end - self.start)[:, np.newaxis])
+
+
+class Arc(Curve):
+    """
+    Represents an arc of a circle.
+    """
+
+    def __init__(self, start, mid, end):
+        """
+        :arg start: starting point of the arc
+        :arg mid: any point along the arc
+        :arg end: ending point of the arc
+        """
+        xs, ys = np.stack((start, mid, end), axis=1)
+
+        # Get center and radius of circle containing the arc.
+        # http://math.stackexchange.com/a/1460096
+        c = np.array([xs**2 + ys**2, xs, ys, [1, 1, 1]])
+        x0 = la.det(np.delete(c, 1, 0)) / (2 * la.det(np.delete(c, 0, 0)))
+        y0 = -la.det(np.delete(c, 2, 0)) / (2 * la.det(np.delete(c, 0, 0)))
+
+        self.r = la.norm([start[0] - x0, start[1] - y0])
+        self.center = x0 + 1j * y0
+
+        theta_start = np.arctan2(start[1] - y0, start[0] - x0)
+        theta_mid = np.arctan2(mid[1] - y0, mid[0] - x0)
+        theta_end = np.arctan2(end[1] - y0, end[0] - x0)
+
+        if theta_start <= theta_end:
+            crosses_branch = not (theta_start <= theta_mid <= theta_end)
+        else:
+            crosses_branch = not (theta_start >= theta_mid >= theta_end)
+
+        if crosses_branch:
+            # Shift the angles so that branch crossing is not involved.
+            if theta_start < 0:
+                theta_start += 2 * np.pi
+            if theta_mid < 0:
+                theta_mid += 2 * np.pi
+            if theta_end < 0:
+                theta_end += 2 * np.pi
+
+        self.theta_range = np.array(sorted([theta_start, theta_end]))
+        self.theta_increasing = theta_start <= theta_end
+
+    def __call__(self, t):
+        if self.theta_increasing:
+            thetas = (
+                self.theta_range[0] +
+                t * (self.theta_range[1] - self.theta_range[0]))
+        else:
+            thetas = (
+                self.theta_range[1] -
+                t * (self.theta_range[1] - self.theta_range[0]))
+        val = (self.r * np.exp(1j * thetas)) + self.center
+        return np.array([val.real, val.imag])
+
+
+# horseshoe curve
+#
+# To avoid issues with crossing non-smooth regions, make sure the number of
+# panels given to this function (for make_curve_mesh) is a multiple of 8.
+horseshoe = (
+    Segment((0, 0), (-5, 0)) +
+    Arc((-5, 0), (-5.5, -0.5), (-5, -1)) +
+    Segment((-5, -1), (0, -1)) +
+    Arc((0, -1), (1.5, 0.5), (0, 2)) +
+    Segment((0, 2), (-5, 2)) +
+    Arc((-5, 2), (-5.5, 1.5), (-5, 1)) +
+    Segment((-5, 1), (0, 1)) +
+    Arc((0, 1), (0.5, 0.5), (0, 0))
+    )
diff --git a/test/test_global_qbx.py b/test/test_global_qbx.py
new file mode 100644
index 0000000000000000000000000000000000000000..d7e514510d320602496a8b13a562a9da99733eba
--- /dev/null
+++ b/test/test_global_qbx.py
@@ -0,0 +1,385 @@
+from __future__ import division, absolute_import, print_function
+
+__copyright__ = """
+Copyright (C) 2013 Andreas Kloeckner
+Copyright (C) 2016 Matt Wala
+"""
+
+__license__ = """
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+"""
+
+
+import numpy as np
+import numpy.linalg as la
+import pyopencl as cl
+import pyopencl.clmath  # noqa
+import pytest
+from pytools import RecordWithoutPickling
+from pyopencl.tools import pytest_generate_tests_for_pyopencl \
+        as pytest_generate_tests
+from pytential.qbx import QBXLayerPotentialSource
+
+from functools import partial
+from meshmode.mesh.generation import (  # noqa
+        ellipse, cloverleaf, starfish, drop, n_gon, qbx_peanut,
+        make_curve_mesh)
+from extra_curve_data import horseshoe
+
+
+import logging
+logger = logging.getLogger(__name__)
+
+__all__ = ["pytest_generate_tests"]
+
+
+RNG_SEED = 10
+FAR_TARGET_DIST_FROM_SOURCE = 10
+
+
+# {{{ utilities for iterating over panels
+
+class ElementInfo(RecordWithoutPickling):
+    """
+    .. attribute:: element_nr
+    .. attribute:: neighbors
+    .. attribute:: discr_slice
+    .. attribute:: mesh_slice
+    .. attribute:: element_group
+    .. attribute:: mesh_element_group
+    """
+    __slots__ = ["element_nr",
+                 "neighbors",
+                 "discr_slice"]
+
+
+def iter_elements(discr):
+    discr_nodes_idx = 0
+    element_nr = 0
+    adjacency = discr.mesh.nodal_adjacency
+
+    for discr_group in discr.groups:
+        start = element_nr
+        for element_nr in range(start, start + discr_group.nelements):
+            yield ElementInfo(
+                element_nr=element_nr,
+                neighbors=list(adjacency.neighbors[
+                    slice(*adjacency.neighbors_starts[
+                        element_nr:element_nr+2])]),
+                discr_slice=slice(discr_nodes_idx,
+                   discr_nodes_idx + discr_group.nunit_nodes))
+
+            discr_nodes_idx += discr_group.nunit_nodes
+
+# }}}
+
+
+@pytest.mark.parametrize(("curve_name", "curve_f", "nelements"), [
+    ("20-to-1 ellipse", partial(ellipse, 20), 100),
+    ("horseshoe", horseshoe, 64),
+    ])
+def test_source_refinement(ctx_getter, curve_name, curve_f, nelements):
+    cl_ctx = ctx_getter()
+    queue = cl.CommandQueue(cl_ctx)
+
+    # {{{ generate lpot source, run refiner
+
+    order = 16
+    helmholtz_k = 10
+
+    mesh = make_curve_mesh(curve_f, np.linspace(0, 1, nelements+1), order)
+
+    from meshmode.discretization import Discretization
+    from meshmode.discretization.poly_element import \
+            InterpolatoryQuadratureSimplexGroupFactory
+    factory = InterpolatoryQuadratureSimplexGroupFactory(order)
+
+    discr = Discretization(cl_ctx, mesh, factory)
+
+    from pytential.qbx.refinement import QBXLayerPotentialSourceRefiner
+
+    lpot_source = QBXLayerPotentialSource(discr, order)
+    del discr
+    refiner = QBXLayerPotentialSourceRefiner(cl_ctx)
+
+    lpot_source, conn = refiner(lpot_source, factory, helmholtz_k)
+
+    discr_nodes = lpot_source.density_discr.nodes().get(queue)
+    int_centers = lpot_source.centers(-1)
+    int_centers = np.array([axis.get(queue) for axis in int_centers])
+    ext_centers = lpot_source.centers(+1)
+    ext_centers = np.array([axis.get(queue) for axis in ext_centers])
+    panel_sizes = lpot_source.panel_sizes("npanels").get(queue)
+
+    # }}}
+
+    # {{{ check if satisfying criteria
+
+    def check_panel(panel):
+        # Check 2-to-1 panel to neighbor size ratio.
+        for neighbor in panel.neighbors:
+            assert panel_sizes[panel.element_nr] / panel_sizes[neighbor] <= 2, \
+                (panel_sizes[panel.element_nr], panel_sizes[neighbor])
+
+        # Check wavenumber to panel size ratio.
+        assert panel_sizes[panel.element_nr] * helmholtz_k <= 5
+
+    def check_panel_pair(panel_1, panel_2):
+        h_1 = panel_sizes[panel_1.element_nr]
+        h_2 = panel_sizes[panel_2.element_nr]
+
+        if panel_1.element_nr == panel_2.element_nr:
+            # Same panel
+            return
+
+        panel_1_centers = int_centers[:, panel_1.discr_slice]
+        panel_2_nodes = discr_nodes[:, panel_2.discr_slice]
+
+        # =distance(centers of panel 1, panel 2)
+        dist = (
+            la.norm((
+                    panel_1_centers[..., np.newaxis] -
+                    panel_2_nodes[:, np.newaxis, ...]).T,
+                axis=-1)
+            .min())
+
+        # Criterion 1:
+        # A center cannot be closer to another panel than to its originating
+        # panel.
+
+        assert dist >= h_1 / 2, (dist, h_1, panel_1.element_nr, panel_2.element_nr)
+
+        # Criterion 2:
+        # A center cannot be closer to another panel than that panel's
+        # centers - unless the panels are adjacent, to allow for refinement.
+
+        if panel_2.element_nr in panel_1.neighbors:
+            return
+
+        assert dist >= h_2 / 2, (dist, h_2, panel_1.element_nr, panel_2.element_nr)
+
+    for panel_1 in iter_elements(lpot_source.density_discr):
+        check_panel(panel_1)
+        for panel_2 in iter_elements(lpot_source.density_discr):
+            check_panel_pair(panel_1, panel_2)
+
+    # }}}
+
+
+@pytest.mark.parametrize(("curve_name", "curve_f", "nelements"), [
+    ("20-to-1 ellipse", partial(ellipse, 20), 100),
+    ("horseshoe", horseshoe, 64),
+    ])
+def test_target_association(ctx_getter, curve_name, curve_f, nelements):
+    cl_ctx = ctx_getter()
+    queue = cl.CommandQueue(cl_ctx)
+
+    # {{{ generate lpot source
+
+    order = 16
+
+    # Make the curve mesh.
+    mesh = make_curve_mesh(curve_f, np.linspace(0, 1, nelements+1), order)
+
+    from meshmode.discretization import Discretization
+    from meshmode.discretization.poly_element import \
+            InterpolatoryQuadratureSimplexGroupFactory
+    factory = InterpolatoryQuadratureSimplexGroupFactory(order)
+
+    discr = Discretization(cl_ctx, mesh, factory)
+
+    from pytential.qbx.refinement import QBXLayerPotentialSourceRefiner
+
+    lpot_source = QBXLayerPotentialSource(discr, order)
+    del discr
+    refiner = QBXLayerPotentialSourceRefiner(cl_ctx)
+
+    lpot_source, conn = refiner(lpot_source, factory)
+
+    int_centers = lpot_source.centers(-1)
+    ext_centers = lpot_source.centers(+1)
+
+    # }}}
+
+    # {{{ generate targets
+
+    from pyopencl.clrandom import PhiloxGenerator
+    rng = PhiloxGenerator(cl_ctx, seed=RNG_SEED)
+    nsources = lpot_source.density_discr.nnodes
+    noise = rng.uniform(queue, nsources, dtype=np.float, a=0.01, b=1.0)
+    panel_sizes = lpot_source.panel_sizes("nsources").with_queue(queue)
+
+    def targets_from_sources(sign, dist):
+        from pytential import sym, bind
+        dim = 2
+        nodes = bind(lpot_source.density_discr, sym.nodes(dim))(queue)
+        normals = bind(lpot_source.density_discr, sym.normal(dim))(queue)
+        return (nodes + normals * sign * dist).as_vector(np.object)
+
+    from pytential.target import PointsTarget
+
+    int_targets = PointsTarget(targets_from_sources(-1, noise * panel_sizes / 2))
+    ext_targets = PointsTarget(targets_from_sources(+1, noise * panel_sizes / 2))
+    far_targets = PointsTarget(targets_from_sources(+1, FAR_TARGET_DIST_FROM_SOURCE))
+
+    # Create target discretizations.
+    target_discrs = (
+        # On-surface targets, interior
+        (lpot_source.density_discr, -1),
+        # On-surface targets, exterior
+        (lpot_source.density_discr, +1),
+        # Interior close targets
+        (int_targets, -2),
+        # Exterior close targets
+        (ext_targets, +2),
+        # Far targets, should not need centers
+        (far_targets, 0),
+    )
+
+    sizes = np.cumsum([discr.nnodes for discr, _ in target_discrs])
+
+    (surf_int_slice,
+     surf_ext_slice,
+     vol_int_slice,
+     vol_ext_slice,
+     far_slice,
+     ) = [slice(start, end) for start, end in zip(np.r_[0, sizes], sizes)]
+
+    # }}}
+
+    # {{{ run target associator and check
+
+    from pytential.qbx.target_assoc import QBXTargetAssociator
+    target_assoc = (
+        QBXTargetAssociator(cl_ctx)(lpot_source, target_discrs)
+        .get(queue=queue))
+
+    panel_sizes = lpot_source.panel_sizes("nsources").get(queue)
+
+    int_centers = np.array([axis.get(queue) for axis in int_centers])
+    ext_centers = np.array([axis.get(queue) for axis in ext_centers])
+    int_targets = np.array([axis.get(queue) for axis in int_targets.nodes()])
+    ext_targets = np.array([axis.get(queue) for axis in ext_targets.nodes()])
+
+    # Checks that the sources match with their own centers.
+    def check_on_surface_targets(nsources, true_side, target_to_source_result,
+                                 target_to_side_result):
+        sources = np.arange(0, nsources)
+        assert (target_to_source_result == sources).all()
+        assert (target_to_side_result == true_side).all()
+
+    # Checks that the targets match with centers on the appropriate side and
+    # within the allowable distance.
+    def check_close_targets(centers, targets, true_side,
+                            target_to_source_result, target_to_side_result):
+        assert (target_to_side_result == true_side).all()
+        dists = la.norm((targets.T - centers.T[target_to_source_result]), axis=1)
+        assert (dists <= panel_sizes[target_to_source_result] / 2).all()
+
+    # Checks that far targets are not assigned a center.
+    def check_far_targets(target_to_source_result):
+        assert (target_to_source_result == -1).all()
+
+    # Centers for source i are located at indices 2 * i, 2 * i + 1
+    target_to_source = target_assoc.target_to_center // 2
+    # Center side order = -1, 1, -1, 1, ...
+    target_to_center_side = 2 * (target_assoc.target_to_center % 2) - 1
+
+    check_on_surface_targets(
+        nsources, -1,
+        target_to_source[surf_int_slice],
+        target_to_center_side[surf_int_slice])
+
+    check_on_surface_targets(
+        nsources, +1,
+        target_to_source[surf_ext_slice],
+        target_to_center_side[surf_ext_slice])
+
+    check_close_targets(
+        int_centers, int_targets, -1,
+        target_to_source[vol_int_slice],
+        target_to_center_side[vol_int_slice])
+
+    check_close_targets(
+        ext_centers, ext_targets, +1,
+        target_to_source[vol_ext_slice],
+        target_to_center_side[vol_ext_slice])
+
+    check_far_targets(
+        target_to_source[far_slice])
+
+    # }}}
+
+
+def test_target_association_failure(ctx_getter):
+    cl_ctx = ctx_getter()
+    queue = cl.CommandQueue(cl_ctx)
+
+    # {{{ generate circle
+
+    order = 5
+    nelements = 40
+
+    # Make the curve mesh.
+    curve_f = partial(ellipse, 1)
+    mesh = make_curve_mesh(curve_f, np.linspace(0, 1, nelements+1), order)
+
+    from meshmode.discretization import Discretization
+    from meshmode.discretization.poly_element import \
+            InterpolatoryQuadratureSimplexGroupFactory
+    factory = InterpolatoryQuadratureSimplexGroupFactory(order)
+    discr = Discretization(cl_ctx, mesh, factory)
+    lpot_source = QBXLayerPotentialSource(discr, order)
+
+    # }}}
+
+    # {{{ generate targets and check
+
+    close_circle = 0.999 * np.exp(
+        2j * np.pi * np.linspace(0, 1, 500, endpoint=False))
+    from pytential.target import PointsTarget
+    close_circle_target = (
+        PointsTarget(cl.array.to_device(
+            queue, np.array([close_circle.real, close_circle.imag]))))
+
+    targets = (
+        (close_circle_target, 0),
+        )
+
+    from pytential.qbx.target_assoc import (
+        QBXTargetAssociator, QBXTargetAssociationFailedException)
+
+    with pytest.raises(QBXTargetAssociationFailedException):
+        QBXTargetAssociator(cl_ctx)(lpot_source, targets)
+
+    # }}}
+
+
+# You can test individual routines by typing
+# $ python test_layer_pot.py 'test_routine()'
+
+if __name__ == "__main__":
+    import sys
+    if len(sys.argv) > 1:
+        exec(sys.argv[1])
+    else:
+        from py.test.cmdline import main
+        main([__file__])
+
+# vim: fdm=marker
diff --git a/test/test_layer_pot.py b/test/test_layer_pot.py
index 64714160c149ecdedb593f1434e04f477acacfb7..2a2dce38a272d2015647f51896a01e525d352fa5 100644
--- a/test/test_layer_pot.py
+++ b/test/test_layer_pot.py
@@ -122,7 +122,7 @@ def test_ellipse_eigenvalues(ctx_getter, ellipse_aspect, mode_nr, qbx_order):
     cl_ctx = ctx_getter()
     queue = cl.CommandQueue(cl_ctx)
 
-    target_order = 7
+    target_order = 8
 
     from meshmode.discretization import Discretization
     from meshmode.discretization.poly_element import \
@@ -151,17 +151,18 @@ def test_ellipse_eigenvalues(ctx_getter, ellipse_aspect, mode_nr, qbx_order):
                 np.linspace(0, 1, nelements+1),
                 target_order)
 
-        fmm_order = qbx_order
-        if fmm_order > 3:
+        fmm_order = qbx_order + 3
+        if fmm_order > 6:
             # FIXME: for now
             fmm_order = False
 
-        density_discr = Discretization(
+        pre_density_discr = Discretization(
                 cl_ctx, mesh,
                 InterpolatoryQuadratureSimplexGroupFactory(target_order))
-        qbx = QBXLayerPotentialSource(density_discr, 4*target_order,
-                qbx_order, fmm_order=fmm_order)
+        qbx, _ = QBXLayerPotentialSource(pre_density_discr, 4*target_order,
+                qbx_order, fmm_order=fmm_order).with_refinement()
 
+        density_discr = qbx.density_discr
         nodes = density_discr.nodes().with_queue(queue)
 
         if 0:
@@ -213,7 +214,7 @@ def test_ellipse_eigenvalues(ctx_getter, ellipse_aspect, mode_nr, qbx_order):
                 norm(density_discr, queue, s_sigma - s_sigma_ref)
                 /
                 norm(density_discr, queue, s_sigma_ref))
-        s_eoc_rec.add_data_point(1/nelements, s_err)
+        s_eoc_rec.add_data_point(qbx.h_max, s_err)
 
         # }}}
 
@@ -245,7 +246,7 @@ def test_ellipse_eigenvalues(ctx_getter, ellipse_aspect, mode_nr, qbx_order):
                 norm(density_discr, queue, d_sigma - d_sigma_ref)
                 /
                 d_ref_norm)
-        d_eoc_rec.add_data_point(1/nelements, d_err)
+        d_eoc_rec.add_data_point(qbx.h_max, d_err)
 
         # }}}
 
@@ -265,7 +266,7 @@ def test_ellipse_eigenvalues(ctx_getter, ellipse_aspect, mode_nr, qbx_order):
                     norm(density_discr, queue, sp_sigma - sp_sigma_ref)
                     /
                     norm(density_discr, queue, sigma))
-            sp_eoc_rec.add_data_point(1/nelements, sp_err)
+            sp_eoc_rec.add_data_point(qbx.h_max, sp_err)
 
             # }}}
 
@@ -309,16 +310,18 @@ def run_int_eq_test(
     from meshmode.discretization import Discretization
     from meshmode.discretization.poly_element import \
             InterpolatoryQuadratureSimplexGroupFactory
-    density_discr = Discretization(
+    pre_density_discr = Discretization(
             cl_ctx, mesh, InterpolatoryQuadratureSimplexGroupFactory(target_order))
 
     if source_order is None:
         source_order = 4*target_order
 
-    qbx = QBXLayerPotentialSource(
-            density_discr, fine_order=source_order, qbx_order=qbx_order,
+    qbx, _ = QBXLayerPotentialSource(
+            pre_density_discr, fine_order=source_order, qbx_order=qbx_order,
             # Don't use FMM for now
-            fmm_order=False)
+            fmm_order=False).with_refinement()
+
+    density_discr = qbx.density_discr
 
     # {{{ set up operator
 
@@ -618,6 +621,7 @@ def run_int_eq_test(
         pass
 
     return Result(
+            h_max=qbx.h_max,
             rel_err_2=rel_err_2,
             rel_err_inf=rel_err_inf,
             rel_td_err_inf=rel_td_err_inf,
@@ -669,8 +673,8 @@ def test_integral_equation(
                 bc_type, loc_sign, k, target_order=target_order,
                 source_order=source_order)
 
-        eoc_rec_target.add_data_point(1/nelements, result.rel_err_2)
-        eoc_rec_td.add_data_point(1/nelements, result.rel_td_err_inf)
+        eoc_rec_target.add_data_point(result.h_max, result.rel_err_2)
+        eoc_rec_td.add_data_point(result.h_max, result.rel_td_err_inf)
 
     if bc_type == "dirichlet":
         tgt_order = qbx_order
@@ -698,8 +702,8 @@ d2 = sym.Derivative()
 
 @pytest.mark.parametrize(("curve_name", "curve_f"), [
     #("circle", partial(ellipse, 1)),
-    ("3-to-1 ellipse", partial(ellipse, 3)),
-    #("starfish", starfish),
+    #("3-to-1 ellipse", partial(ellipse, 3)),
+    ("starfish", starfish),
     ])
 @pytest.mark.parametrize("qbx_order", [5])
 @pytest.mark.parametrize(("zero_op_name", "k"), [
@@ -712,6 +716,8 @@ d2 = sym.Derivative()
 # sample invocation to copy and paste:
 # 'test_identities(cl._csc, "green", "circ", partial(ellipse, 1), 4, 0)'
 def test_identities(ctx_getter, zero_op_name, curve_name, curve_f, qbx_order, k):
+    logging.basicConfig(level=logging.INFO)
+
     cl_ctx = ctx_getter()
     queue = cl.CommandQueue(cl_ctx)
 
@@ -719,7 +725,7 @@ def test_identities(ctx_getter, zero_op_name, curve_name, curve_f, qbx_order, k)
     from sympy.core.cache import clear_cache
     clear_cache()
 
-    target_order = 7
+    target_order = 8
 
     u_sym = sym.var("u")
     grad_u_sym = sym.make_sym_mv("grad_u", 2)
@@ -772,14 +778,14 @@ def test_identities(ctx_getter, zero_op_name, curve_name, curve_f, qbx_order, k)
         from meshmode.discretization.poly_element import \
                 InterpolatoryQuadratureSimplexGroupFactory
         from pytential.qbx import QBXLayerPotentialSource
-        density_discr = Discretization(
+        pre_density_discr = Discretization(
                 cl_ctx, mesh,
                 InterpolatoryQuadratureSimplexGroupFactory(target_order))
 
-        qbx = QBXLayerPotentialSource(density_discr, 4*target_order,
-                qbx_order,
-                # Don't use FMM for now
-                fmm_order=False)
+        qbx, _ = QBXLayerPotentialSource(
+            pre_density_discr, 4*target_order,
+            qbx_order, fmm_order=qbx_order + 15).with_refinement()
+        density_discr = qbx.density_discr
 
         # {{{ compute values of a solution to the PDE
 
@@ -820,7 +826,7 @@ def test_identities(ctx_getter, zero_op_name, curve_name, curve_f, qbx_order, k)
         l2_error_norm = norm(density_discr, queue, error)
         print(key, l2_error_norm)
 
-        eoc_rec.add_data_point(1/nelements, l2_error_norm)
+        eoc_rec.add_data_point(qbx.h_max, l2_error_norm)
 
     print(eoc_rec)
     tgt_order = order_table[zero_op_name]
@@ -859,10 +865,12 @@ def test_off_surface_eval(ctx_getter, use_fmm, do_plot=False):
     from meshmode.discretization.poly_element import \
             InterpolatoryQuadratureSimplexGroupFactory
 
-    density_discr = Discretization(
+    pre_density_discr = Discretization(
             cl_ctx, mesh, InterpolatoryQuadratureSimplexGroupFactory(target_order))
-    qbx = QBXLayerPotentialSource(density_discr, 4*target_order, qbx_order,
-            fmm_order=fmm_order)
+    qbx, _ = QBXLayerPotentialSource(pre_density_discr, 4*target_order, qbx_order,
+            fmm_order=fmm_order).with_refinement()
+
+    density_discr = qbx.density_discr
 
     from sumpy.kernel import LaplaceKernel
     op = sym.D(LaplaceKernel(2), sym.var("sigma"), qbx_forced_limit=-2)
diff --git a/test/test_matrix.py b/test/test_matrix.py
index 24c10d6b88d996a0fbd62c269e2e918709cc5425..cd70c0e3eeae3a205db2cba8778c8e91ff2c6634 100644
--- a/test/test_matrix.py
+++ b/test/test_matrix.py
@@ -102,14 +102,16 @@ def test_matrix_build(ctx_factory):
     from meshmode.discretization.poly_element import \
             InterpolatoryQuadratureSimplexGroupFactory
     from pytential.qbx import QBXLayerPotentialSource
-    density_discr = Discretization(
+    pre_density_discr = Discretization(
             cl_ctx, mesh,
             InterpolatoryQuadratureSimplexGroupFactory(target_order))
 
-    qbx = QBXLayerPotentialSource(density_discr, 4*target_order,
+    qbx, _ = QBXLayerPotentialSource(pre_density_discr, 4*target_order,
             qbx_order,
             # Don't use FMM for now
-            fmm_order=False)
+            fmm_order=False).with_refinement()
+
+    density_discr = qbx.density_discr
 
     bound_op = bind(qbx, op)
 
diff --git a/test/too_slow_test_helmholtz.py b/test/too_slow_test_helmholtz.py
index 287a678007057a166daa0fe57b93a0b6a251da21..9add5d0522b35a05852b3fb1fe9c983286f1a932 100644
--- a/test/too_slow_test_helmholtz.py
+++ b/test/too_slow_test_helmholtz.py
@@ -108,7 +108,7 @@ def run_dielectric_test(cl_ctx, queue, nelements, qbx_order,
     qbx = QBXLayerPotentialSource(
             density_discr, fine_order=bdry_ovsmp_quad_order, qbx_order=qbx_order,
             fmm_order=fmm_order
-            )
+            ).with_refinement()
 
     #print(sym.pretty(pde_op.operator(op_unknown_sym)))
     #1/0
@@ -339,7 +339,7 @@ def run_dielectric_test(cl_ctx, queue, nelements, qbx_order,
 
         low_order_qbx = QBXLayerPotentialSource(
                 density_discr, fine_order=bdry_ovsmp_quad_order, qbx_order=2,
-                fmm_order=3)
+                fmm_order=3).with_refinement()
         from sumpy.kernel import LaplaceKernel
         from pytential.target import PointsTarget
         ones = (cl.array.empty(queue, (density_discr.nnodes,), dtype=np.float64)