diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index 1b302bbb07dcc6199bfc7738fd4f4472238bfa6d..ee8f171cab2973d8bf8c318a8b808d151d982bf1 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -1,8 +1,15 @@
+# Environment variables
+#
+# * PYTEST_ADDOPTS is used to filter test runs. The default value is "-k-slowtest",
+#   which skips the slow running tests.
+# * SKIP_EXAMPLES, if non-empty, can be used to skip the examples job.
+
 Python 2.7 POCL:
   script:
   - export PY_EXE=python2.7
   - export PYOPENCL_TEST=portable
-  - export EXTRA_INSTALL="pybind11 scipy numpy mako"
+  - export PYTEST_ADDOPTS=${PYTEST_ADDOPTS:--k-slowtest}
+  - export EXTRA_INSTALL="Cython pybind11 numpy scipy mako"
   - curl -L -O -k https://gitlab.tiker.net/inducer/ci-support/raw/master/build-and-test-py-project.sh
   - ". ./build-and-test-py-project.sh"
   tags:
@@ -19,7 +26,8 @@ Python 3 POCL:
   script:
   - export PY_EXE=python3
   - export PYOPENCL_TEST=portable
-  - export EXTRA_INSTALL="pybind11 numpy scipy mako"
+  - export PYTEST_ADDOPTS=${PYTEST_ADDOPTS:--k-slowtest}
+  - export EXTRA_INSTALL="Cython pybind11 numpy scipy mako"
   - curl -L -O -k https://gitlab.tiker.net/inducer/ci-support/raw/master/build-and-test-py-project.sh
   - ". ./build-and-test-py-project.sh"
   tags:
@@ -37,7 +45,7 @@ Python 3 POCL Examples:
   - test -n "$SKIP_EXAMPLES" && exit
   - export PY_EXE=python3
   - export PYOPENCL_TEST=portable
-  - export EXTRA_INSTALL="pybind11 numpy mako pyvisfile matplotlib"
+  - export EXTRA_INSTALL="Cython pybind11 numpy mako pyvisfile matplotlib"
   - curl -L -O -k https://gitlab.tiker.net/inducer/ci-support/raw/master/build-py-project-and-run-examples.sh
   - ". ./build-py-project-and-run-examples.sh"
   tags:
@@ -53,8 +61,9 @@ Python 3 POCL Examples:
 Python 3 Conda:
   script:
   - export SUMPY_FORCE_SYMBOLIC_BACKEND=symengine
-  - CONDA_ENVIRONMENT=.test-conda-env-py3.yml
-  - REQUIREMENTS_TXT=.test-conda-env-py3-requirements.txt
+  - export CONDA_ENVIRONMENT=.test-conda-env-py3.yml
+  - export PYTEST_ADDOPTS=${PYTEST_ADDOPTS:--k-slowtest}
+  - export REQUIREMENTS_TXT=.test-conda-env-py3-requirements.txt
   - curl -L -O -k https://gitlab.tiker.net/inducer/ci-support/raw/master/build-and-test-py-project-within-miniconda.sh
   - ". ./build-and-test-py-project-within-miniconda.sh"
   tags:
@@ -71,9 +80,11 @@ Python 3 Conda Apple:
   script:
   - export LC_ALL=en_US.UTF-8
   - export LANG=en_US.UTF-8
-  - export PYTEST_ADDOPTS=-k-slowtest
-  - CONDA_ENVIRONMENT=.test-conda-env-py3-macos.yml
-  - REQUIREMENTS_TXT=.test-conda-env-py3-requirements.txt
+  - export CONDA_ENVIRONMENT=.test-conda-env-py3-macos.yml
+  - export PYTEST_ADDOPTS=${PYTEST_ADDOPTS:--k-slowtest}
+  - export REQUIREMENTS_TXT=.test-conda-env-py3-requirements.txt
+  - export CC=gcc
+  - set -o xtrace
   - curl -L -O -k https://gitlab.tiker.net/inducer/ci-support/raw/master/build-and-test-py-project-within-miniconda.sh
   - ". ./build-and-test-py-project-within-miniconda.sh"
 
@@ -95,7 +106,7 @@ Python 3 Conda Apple:
 
 Documentation:
   script:
-  - EXTRA_INSTALL="pybind11 numpy mako"
+  - EXTRA_INSTALL="Cython pybind11 numpy mako"
   - curl -L -O -k https://gitlab.tiker.net/inducer/ci-support/raw/master/build-docs.sh
   - ". ./build-docs.sh"
   tags:
diff --git a/.test-conda-env-py3-macos.yml b/.test-conda-env-py3-macos.yml
index b71555f6d6e474a6b0cef0c9d9ed03c168d88a79..37281180b48f654f6c9a2f3d75d0d4298825edb9 100644
--- a/.test-conda-env-py3-macos.yml
+++ b/.test-conda-env-py3-macos.yml
@@ -13,7 +13,8 @@ dependencies:
 - python=3.6
 - symengine=0.3.0
 - python-symengine=0.3.0
+- osx-pocl-opencl
 - pyfmmlib
-# for OpenMP support in pyfmmlib
-- libgfortran>=3.0.1
+- gcc
+- cython
 # things not in here: loopy boxtree pymbolic meshmode sumpy
diff --git a/.test-conda-env-py3.yml b/.test-conda-env-py3.yml
index a0803d620f2a0ff994aca677746813dafaaf27fc..2ac856756e0df70b068c71d725c8999f0eaf3b03 100644
--- a/.test-conda-env-py3.yml
+++ b/.test-conda-env-py3.yml
@@ -14,4 +14,5 @@ dependencies:
 - symengine=0.3.0
 - python-symengine=0.3.0
 - pyfmmlib
+- cython
 # things not in here: loopy boxtree pymbolic meshmode sumpy
diff --git a/README.rst b/README.rst
index dfc5fe49b46f4a9ea13debdc136e9fbf3848c79d..ad3f79057516a8347148e9e9451aada8a73f553b 100644
--- a/README.rst
+++ b/README.rst
@@ -20,7 +20,7 @@ It relies on
 * `boxtree <http://pypi.python.org/pypi/boxtree>`_ for FMM tree building
 * `sumpy <http://pypi.python.org/pypi/sumpy>`_ for expansions and analytical routines
 * `modepy <http://pypi.python.org/pypi/modepy>`_ for modes and nodes on simplices
-* `meshmode <http://pypi.python.org/pypi/meshmode>`_ for modes and nodes on simplices
+* `meshmode <http://pypi.python.org/pypi/meshmode>`_ for high order discretizations
 * `loopy <http://pypi.python.org/pypi/loo.py>`_ for fast array operations
 * `pytest <http://pypi.python.org/pypi/pytest>`_ for automated testing
 
diff --git a/doc/misc.rst b/doc/misc.rst
index f551887fe53b96ed924be0019188770d820e4d33..9fac9ab446ed3ea3ae1ef7e3b6790537eb90930b 100644
--- a/doc/misc.rst
+++ b/doc/misc.rst
@@ -11,7 +11,9 @@ This set of instructions is intended for 64-bit Linux and macOS computers.
     On Debian derivatives (Ubuntu and many more),
     installing ``build-essential`` should do the trick.
 
-    On macOS, run ``xcode-select --install`` to install build tools.
+    On macOS, run ``xcode-select --install`` to install build tools. On Mojave (10.14),
+    you may need to follow `these steps <https://stackoverflow.com/a/52530212>`_
+    as well.
 
     Everywhere else, just making sure you have the ``g++`` package should be
     enough.
@@ -31,11 +33,14 @@ This set of instructions is intended for 64-bit Linux and macOS computers.
 
 #.  ``conda config --add channels conda-forge``
 
-#.  ``conda install git pip pocl islpy pyopencl sympy pyfmmlib pytest``
+#.  (*macOS only*) ``conda install osx-pocl-opencl pocl pyopencl``
 
-#.  Type the following command::
+#.  ``conda install gcc cython git pip pocl islpy pyopencl sympy pyfmmlib pytest``
 
-        hash -r; for i in pymbolic cgen genpy gmsh_interop modepy pyvisfile loopy boxtree sumpy meshmode pytential; do python -m pip install git+https://github.com/inducer/$i; done
+#.  Type the following commands::
+
+        hash -r; for i in pymbolic cgen genpy gmsh_interop modepy pyvisfile loopy boxtree sumpy meshmode; do python -m pip install git+https://github.com/inducer/$i; done
+        CC=gcc python -m pip install git+https://gitlab.tiker.net/inducer/pytential
 
 Next time you want to use :mod:`pytential`, just run the following command::
 
diff --git a/pytential/qbx/__init__.py b/pytential/qbx/__init__.py
index 832ddcbcf2d2f5caff9f57c4d2fdf525047240f0..29237a198aaa9b20ffd8a6829dd5b55374812dca 100644
--- a/pytential/qbx/__init__.py
+++ b/pytential/qbx/__init__.py
@@ -83,6 +83,7 @@ class QBXLayerPotentialSource(LayerPotentialSourceBase):
             _from_sep_smaller_crit=None,
             _from_sep_smaller_min_nsources_cumul=None,
             _tree_kind="adaptive",
+            _use_target_specific_qbx=False,
             geometry_data_inspector=None,
             fmm_backend="sumpy",
             target_stick_out_factor=_not_provided):
@@ -202,6 +203,7 @@ class QBXLayerPotentialSource(LayerPotentialSourceBase):
         self._from_sep_smaller_min_nsources_cumul = \
                 _from_sep_smaller_min_nsources_cumul
         self._tree_kind = _tree_kind
+        self._use_target_specific_qbx = _use_target_specific_qbx
         self.geometry_data_inspector = geometry_data_inspector
 
         # /!\ *All* parameters set here must also be set by copy() below,
@@ -224,6 +226,7 @@ class QBXLayerPotentialSource(LayerPotentialSourceBase):
             _box_extent_norm=None,
             _from_sep_smaller_crit=None,
             _tree_kind=None,
+            _use_target_specific_qbx=_not_provided,
             geometry_data_inspector=None,
             fmm_backend=None,
 
@@ -306,6 +309,9 @@ class QBXLayerPotentialSource(LayerPotentialSourceBase):
                 _from_sep_smaller_min_nsources_cumul=(
                     self._from_sep_smaller_min_nsources_cumul),
                 _tree_kind=_tree_kind or self._tree_kind,
+                _use_target_specific_qbx=(_use_target_specific_qbx
+                    if _use_target_specific_qbx is not _not_provided
+                    else self._use_target_specific_qbx),
                 geometry_data_inspector=(
                     geometry_data_inspector or self.geometry_data_inspector),
                 fmm_backend=fmm_backend or self.fmm_backend,
@@ -745,14 +751,15 @@ class QBXLayerPotentialSource(LayerPotentialSourceBase):
                         self.qbx_order,
                         self.fmm_level_to_order,
                         source_extra_kwargs=source_extra_kwargs,
-                        kernel_extra_kwargs=kernel_extra_kwargs)
+                        kernel_extra_kwargs=kernel_extra_kwargs,
+                        _use_target_specific_qbx=self._use_target_specific_qbx)
 
         from pytential.qbx.geometry import target_state
         if (geo_data.user_target_to_center().with_queue(queue)
                 == target_state.FAILED).any().get():
             raise RuntimeError("geometry has failed targets")
 
-        # {{{ performance data hook
+        # {{{ geometry data inspection hook
 
         if self.geometry_data_inspector is not None:
             perform_fmm = self.geometry_data_inspector(insn, bound_expr, geo_data)
diff --git a/pytential/qbx/fmm.py b/pytential/qbx/fmm.py
index badf630046b239303344e1b1a3664e706a12a0f4..7ec5be2375c2f1e6bfef0b5da9828b1761beb2e0 100644
--- a/pytential/qbx/fmm.py
+++ b/pytential/qbx/fmm.py
@@ -91,12 +91,14 @@ class QBXSumpyExpansionWranglerCodeContainer(SumpyExpansionWranglerCodeContainer
     def get_wrangler(self, queue, geo_data, dtype,
             qbx_order, fmm_level_to_order,
             source_extra_kwargs={},
-            kernel_extra_kwargs=None):
+            kernel_extra_kwargs=None,
+            _use_target_specific_qbx=False):
         return QBXExpansionWrangler(self, queue, geo_data,
                 dtype,
                 qbx_order, fmm_level_to_order,
                 source_extra_kwargs,
-                kernel_extra_kwargs)
+                kernel_extra_kwargs,
+                _use_target_specific_qbx)
 
 
 class QBXExpansionWrangler(SumpyExpansionWrangler):
@@ -120,7 +122,11 @@ QBXFMMGeometryData.non_qbx_box_target_lists`),
 
     def __init__(self, code_container, queue, geo_data, dtype,
             qbx_order, fmm_level_to_order,
-            source_extra_kwargs, kernel_extra_kwargs):
+            source_extra_kwargs, kernel_extra_kwargs,
+            _use_target_specific_qbx=False):
+        if _use_target_specific_qbx:
+            raise ValueError("TSQBX is not implemented in sumpy")
+
         SumpyExpansionWrangler.__init__(self,
                 code_container, queue, geo_data.tree(),
                 dtype, fmm_level_to_order, source_extra_kwargs, kernel_extra_kwargs)
@@ -369,6 +375,10 @@ QBXFMMGeometryData.non_qbx_box_target_lists`),
 
         return (pot, SumpyTimingFuture(self.queue, events))
 
+    @log_process(logger)
+    def eval_target_specific_qbx_locals(self, src_weights):
+        raise NotImplementedError()
+
     # }}}
 
 # }}}
@@ -376,7 +386,8 @@ QBXFMMGeometryData.non_qbx_box_target_lists`),
 
 # {{{ FMM top-level
 
-def drive_fmm(expansion_wrangler, src_weights, timing_data=None):
+def drive_fmm(expansion_wrangler, src_weights, timing_data=None,
+        traversal=None):
     """Top-level driver routine for the QBX fast multipole calculation.
 
     :arg geo_data: A :class:`QBXFMMGeometryData` instance.
@@ -394,8 +405,14 @@ def drive_fmm(expansion_wrangler, src_weights, timing_data=None):
     wrangler = expansion_wrangler
 
     geo_data = wrangler.geo_data
-    traversal = geo_data.traversal()
+
+    use_tsqbx = geo_data.lpot_source._use_target_specific_qbx
+
+    if traversal is None:
+        traversal = geo_data.traversal()
+
     tree = traversal.tree
+
     recorder = TimingRecorder()
 
     # Interface guidelines: Attributes of the tree are assumed to be known
@@ -515,9 +532,12 @@ def drive_fmm(expansion_wrangler, src_weights, timing_data=None):
 
     # {{{ wrangle qbx expansions
 
-    qbx_expansions, timing_future = wrangler.form_global_qbx_locals(src_weights)
+    if not use_tsqbx:
+        qbx_expansions, timing_future = wrangler.form_global_qbx_locals(src_weights)
 
-    recorder.add("form_global_qbx_locals", timing_future)
+        recorder.add("form_global_qbx_locals", timing_future)
+    else:
+        qbx_expansions = wrangler.qbx_local_expansion_zeros()
 
     local_result, timing_future = (
             wrangler.translate_box_multipoles_to_qbx_local(mpole_exps))
@@ -537,6 +557,14 @@ def drive_fmm(expansion_wrangler, src_weights, timing_data=None):
 
     recorder.add("eval_qbx_expansions", timing_future)
 
+    if use_tsqbx:
+        tsqbx_result, timing_future = (
+                wrangler.eval_target_specific_qbx_locals(src_weights))
+
+        recorder.add("eval_target_specific_qbx_locals", timing_future)
+
+        qbx_potentials = qbx_potentials + tsqbx_result
+
     # }}}
 
     # {{{ reorder potentials
diff --git a/pytential/qbx/fmmlib.py b/pytential/qbx/fmmlib.py
index 2d21a3d2ac6865d0d8d3c3d3f41c388b0fe160b0..beff36d0dfb50331c4345e59852a54af2fec35d0 100644
--- a/pytential/qbx/fmmlib.py
+++ b/pytential/qbx/fmmlib.py
@@ -23,11 +23,14 @@ THE SOFTWARE.
 """
 
 import numpy as np
-from pytools import memoize_method, Record
+from pytools import Record, memoize_method
 import pyopencl as cl  # noqa
 import pyopencl.array  # noqa: F401
 from boxtree.pyfmmlib_integration import FMMLibExpansionWrangler
-from sumpy.kernel import LaplaceKernel, HelmholtzKernel
+from sumpy.kernel import (
+        LaplaceKernel, HelmholtzKernel, AxisTargetDerivative,
+        DirectionalSourceDerivative)
+import pytential.qbx.target_specific as ts
 
 
 from boxtree.tools import return_timing_data
@@ -55,70 +58,14 @@ class QBXFMMLibExpansionWranglerCodeContainer(object):
     def get_wrangler(self, queue, geo_data, dtype,
             qbx_order, fmm_level_to_order,
             source_extra_kwargs={},
-            kernel_extra_kwargs=None):
+            kernel_extra_kwargs=None,
+            _use_target_specific_qbx=False):
 
         return QBXFMMLibExpansionWrangler(self, queue, geo_data, dtype,
                 qbx_order, fmm_level_to_order,
                 source_extra_kwargs,
-                kernel_extra_kwargs)
-
-# }}}
-
-
-# {{{ host geo data wrapper
-
-class ToHostTransferredGeoDataWrapper(object):
-    def __init__(self, queue, geo_data):
-        self.queue = queue
-        self.geo_data = geo_data
-
-    @memoize_method
-    def tree(self):
-        return self.traversal().tree
-
-    @memoize_method
-    def traversal(self):
-        return self.geo_data.traversal().get(queue=self.queue)
-
-    @property
-    def ncenters(self):
-        return self.geo_data.ncenters
-
-    @memoize_method
-    def centers(self):
-        return np.array([
-            ci.get(queue=self.queue)
-            for ci in self.geo_data.centers()])
-
-    @memoize_method
-    def expansion_radii(self):
-        return self.geo_data.expansion_radii().get(queue=self.queue)
-
-    @memoize_method
-    def global_qbx_centers(self):
-        return self.geo_data.global_qbx_centers().get(queue=self.queue)
-
-    @memoize_method
-    def qbx_center_to_target_box(self):
-        return self.geo_data.qbx_center_to_target_box().get(queue=self.queue)
-
-    @memoize_method
-    def qbx_center_to_target_box_source_level(self, source_level):
-        return self.geo_data.qbx_center_to_target_box_source_level(
-            source_level).get(queue=self.queue)
-
-    @memoize_method
-    def non_qbx_box_target_lists(self):
-        return self.geo_data.non_qbx_box_target_lists().get(queue=self.queue)
-
-    @memoize_method
-    def center_to_tree_targets(self):
-        return self.geo_data.center_to_tree_targets().get(queue=self.queue)
-
-    @memoize_method
-    def all_targets(self):
-        """All (not just non-QBX) targets packaged into a single array."""
-        return np.array(list(self.tree().targets))
+                kernel_extra_kwargs,
+                _use_target_specific_qbx)
 
 # }}}
 
@@ -129,73 +76,80 @@ class QBXFMMLibExpansionWrangler(FMMLibExpansionWrangler):
     def __init__(self, code, queue, geo_data, dtype,
             qbx_order, fmm_level_to_order,
             source_extra_kwargs,
-            kernel_extra_kwargs):
+            kernel_extra_kwargs,
+            _use_target_specific_qbx=False):
 
         self.code = code
         self.queue = queue
+        self._use_target_specific_qbx = _use_target_specific_qbx
 
         # FMMLib is CPU-only. This wrapper gets the geometry out of
         # OpenCL-land.
+        from pytential.qbx.utils import ToHostTransferredGeoDataWrapper
         self.geo_data = ToHostTransferredGeoDataWrapper(queue, geo_data)
 
         self.qbx_order = qbx_order
 
         # {{{ digest out_kernels
 
-        from sumpy.kernel import AxisTargetDerivative, DirectionalSourceDerivative
-
-        k_names = []
-        source_deriv_names = []
-
-        def is_supported_helmknl(knl):
-            if isinstance(knl, DirectionalSourceDerivative):
-                source_deriv_name = knl.dir_vec_name
-                knl = knl.inner_kernel
-            else:
-                source_deriv_name = None
-
-            if isinstance(knl, HelmholtzKernel) and knl.dim in [2, 3]:
-                k_names.append(knl.helmholtz_k_name)
-                source_deriv_names.append(source_deriv_name)
-                return True
-            elif isinstance(knl, LaplaceKernel) and knl.dim in [2, 3]:
-                k_names.append(None)
-                source_deriv_names.append(source_deriv_name)
-                return True
-
-            return False
-
         ifgrad = False
         outputs = []
+        source_deriv_names = []
+        k_names = []
+
         for out_knl in self.code.out_kernels:
-            if is_supported_helmknl(out_knl):
+            if (
+                    _use_target_specific_qbx
+                    and not self.is_supported_helmknl_for_tsqbx(out_knl)):
+                raise ValueError("not all kernels passed support TSQBX")
+
+            if self.is_supported_helmknl(out_knl):
                 outputs.append(())
+                no_target_deriv_knl = out_knl
+
             elif (isinstance(out_knl, AxisTargetDerivative)
-                    and is_supported_helmknl(out_knl.inner_kernel)):
+                    and self.is_supported_helmknl(out_knl.inner_kernel)):
                 outputs.append((out_knl.axis,))
                 ifgrad = True
+                no_target_deriv_knl = out_knl.inner_kernel
+
             else:
-                raise NotImplementedError(
+                raise ValueError(
                         "only the 2/3D Laplace and Helmholtz kernel "
                         "and their derivatives are supported")
 
+            source_deriv_names.append(no_target_deriv_knl.dir_vec_name
+                    if isinstance(no_target_deriv_knl, DirectionalSourceDerivative)
+                    else None)
+
+            base_knl = out_knl.get_base_kernel()
+            k_names.append(base_knl.helmholtz_k_name
+                    if isinstance(base_knl, HelmholtzKernel)
+                    else None)
+
+        self.outputs = outputs
+
         from pytools import is_single_valued
+
         if not is_single_valued(source_deriv_names):
             raise ValueError("not all kernels passed are the same in "
                     "whether they represent a source derivative")
 
         source_deriv_name = source_deriv_names[0]
-        self.outputs = outputs
 
-        # }}}
+        if not is_single_valued(k_names):
+            raise ValueError("not all kernels passed have the same "
+                    "Helmholtz parameter")
+
+        k_name = k_names[0]
 
-        from pytools import single_valued
-        k_name = single_valued(k_names)
         if k_name is None:
             helmholtz_k = 0
         else:
             helmholtz_k = kernel_extra_kwargs[k_name]
 
+        # }}}
+
         dipole_vec = None
         if source_deriv_name is not None:
             dipole_vec = np.array([
@@ -204,7 +158,6 @@ class QBXFMMLibExpansionWrangler(FMMLibExpansionWrangler):
                     order="F")
 
         def inner_fmm_level_to_nterms(tree, level):
-            from sumpy.kernel import LaplaceKernel, HelmholtzKernel
             if helmholtz_k == 0:
                 return fmm_level_to_order(
                         LaplaceKernel(tree.dimensions),
@@ -225,6 +178,23 @@ class QBXFMMLibExpansionWrangler(FMMLibExpansionWrangler):
 
                 ifgrad=ifgrad)
 
+    @staticmethod
+    def is_supported_helmknl_for_tsqbx(knl):
+        # Supports at most one derivative.
+        if isinstance(knl, (DirectionalSourceDerivative, AxisTargetDerivative)):
+            knl = knl.inner_kernel
+
+        return (isinstance(knl, (LaplaceKernel, HelmholtzKernel))
+                and knl.dim == 3)
+
+    @staticmethod
+    def is_supported_helmknl(knl):
+        if isinstance(knl, DirectionalSourceDerivative):
+            knl = knl.inner_kernel
+
+        return (isinstance(knl, (LaplaceKernel, HelmholtzKernel))
+                and knl.dim in (2, 3))
+
     # {{{ data vector helpers
 
     def output_zeros(self):
@@ -276,6 +246,13 @@ class QBXFMMLibExpansionWrangler(FMMLibExpansionWrangler):
                 raise ValueError("element '%s' of outputs array not "
                         "understood" % out)
 
+    @memoize_method
+    def _get_single_centers_array(self):
+        return np.array([
+            self.geo_data.centers()[idim]
+            for idim in range(self.dim)
+            ], order="F")
+
     # }}}
 
     # {{{ override target lists to only hit non-QBX targets
@@ -304,6 +281,9 @@ class QBXFMMLibExpansionWrangler(FMMLibExpansionWrangler):
     @log_process(logger)
     @return_timing_data
     def form_global_qbx_locals(self, src_weights):
+        if self._use_target_specific_qbx:
+            return self.qbx_local_expansion_zeros()
+
         geo_data = self.geo_data
         trav = geo_data.traversal()
 
@@ -572,7 +552,7 @@ class QBXFMMLibExpansionWrangler(FMMLibExpansionWrangler):
 
         taeval = self.get_expn_eval_routine("ta")
 
-        for isrc_center, src_icenter in enumerate(global_qbx_centers):
+        for src_icenter in global_qbx_centers:
             for icenter_tgt in range(
                     ctt.starts[src_icenter],
                     ctt.starts[src_icenter+1]):
@@ -592,6 +572,60 @@ class QBXFMMLibExpansionWrangler(FMMLibExpansionWrangler):
 
         return output
 
+    @log_process(logger)
+    @return_timing_data
+    def eval_target_specific_qbx_locals(self, src_weights):
+        if not self._use_target_specific_qbx:
+            return self.full_output_zeros()
+
+        geo_data = self.geo_data
+        trav = geo_data.traversal()
+
+        ctt = geo_data.center_to_tree_targets()
+
+        src_weights = src_weights.astype(np.complex128)
+
+        ifcharge = self.dipole_vec is None
+        ifdipole = self.dipole_vec is not None
+
+        ifpot = any(not output for output in self.outputs)
+        ifgrad = self.ifgrad
+
+        # Create temporary output arrays for potential / gradient.
+        pot = np.zeros(self.tree.ntargets, np.complex) if ifpot else None
+        grad = (
+                np.zeros((self.dim, self.tree.ntargets), np.complex)
+                if ifgrad else None)
+
+        ts.eval_target_specific_qbx_locals(
+                ifpot=ifpot,
+                ifgrad=ifgrad,
+                ifcharge=ifcharge,
+                ifdipole=ifdipole,
+                order=self.qbx_order,
+                sources=self._get_single_sources_array(),
+                targets=geo_data.all_targets(),
+                centers=self._get_single_centers_array(),
+                qbx_centers=geo_data.global_qbx_centers(),
+                qbx_center_to_target_box=geo_data.qbx_center_to_target_box(),
+                center_to_target_starts=ctt.starts,
+                center_to_target_lists=ctt.lists,
+                source_box_starts=trav.neighbor_source_boxes_starts,
+                source_box_lists=trav.neighbor_source_boxes_lists,
+                box_source_starts=self.tree.box_source_starts,
+                box_source_counts_nonchild=self.tree.box_source_counts_nonchild,
+                helmholtz_k=self.kernel_kwargs.get("zk", 0),
+                charge=src_weights,
+                dipstr=src_weights,
+                dipvec=self.dipole_vec,
+                pot=pot,
+                grad=grad)
+
+        output = self.full_output_zeros()
+        self.add_potgrad_onto_output(output, slice(None), pot, grad)
+
+        return output
+
     def finalize_potentials(self, potential):
         potential = super(QBXFMMLibExpansionWrangler, self).finalize_potentials(
                 potential)
diff --git a/pytential/qbx/target_assoc.py b/pytential/qbx/target_assoc.py
index 11b70c4b2f1cee8db85fd2c00a159a28682d7fa1..5273b72b6fad7927245ede5195ae79ee7d4ea187 100644
--- a/pytential/qbx/target_assoc.py
+++ b/pytential/qbx/target_assoc.py
@@ -528,9 +528,9 @@ class TargetAssociationWrangler(TreeWranglerBase):
         return (found_target_close_to_panel == 1).all().get()
 
     @log_process(logger)
-    def try_find_centers(self, tree, peer_lists, lpot_source,
-                         target_status, target_flags, target_assoc,
-                         target_association_tolerance, debug, wait_for=None):
+    def find_centers(self, tree, peer_lists, lpot_source,
+                     target_status, target_flags, target_assoc,
+                     target_association_tolerance, debug, wait_for=None):
         # Round up level count--this gets included in the kernel as
         # a stack bound. Rounding avoids too many kernel versions.
         from pytools import div_ceil
@@ -750,7 +750,7 @@ def associate_targets_to_qbx_centers(lpot_source, wrangler,
 
     target_flags = wrangler.make_target_flags(target_discrs_and_qbx_sides)
 
-    wrangler.try_find_centers(tree, peer_lists, lpot_source, target_status,
+    wrangler.find_centers(tree, peer_lists, lpot_source, target_status,
             target_flags, target_assoc, target_association_tolerance, debug)
 
     center_not_found = (
diff --git a/pytential/qbx/target_specific/__init__.py b/pytential/qbx/target_specific/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e6f10bba8c64d193c69110a13b91cac15d4de6ab
--- /dev/null
+++ b/pytential/qbx/target_specific/__init__.py
@@ -0,0 +1,24 @@
+__copyright__ = "Copyright (C) 2018 Matt Wala"
+
+__license__ = """
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+"""
+
+
+from .impl import *  # noqa
diff --git a/pytential/qbx/target_specific/helmholtz_utils.c b/pytential/qbx/target_specific/helmholtz_utils.c
new file mode 100644
index 0000000000000000000000000000000000000000..2c06b7fc22863a022edbe296a0830a4a8d7e2500
--- /dev/null
+++ b/pytential/qbx/target_specific/helmholtz_utils.c
@@ -0,0 +1,515 @@
+/* This file contains routines for evaluating spherical Bessel and Hankel
+   functions.
+
+   This is based on cdjseval3d.f and helmrouts3d.f from fmmlib3d, translated
+   with a hacked version of f2c and manually postprocessed. */
+
+/* Original copyright notice: */
+
+/* **********************************************************************
+
+Copyright (c) 2009-2012, Leslie Greengard, Zydrunas Gimbutas
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+1. Redistributions of source code must retain the above copyright notice, this
+   list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its
+   contributors may be used to endorse or promote products derived
+   from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+********************************************************************** */
+
+#include <complex.h>
+
+/* declarations for f2c generated things */
+
+typedef int integer;
+typedef double doublereal;
+typedef double complex doublecomplex;
+
+static inline double z_abs(doublecomplex *z) {
+    return cabs(*z);
+}
+
+static inline void z_exp(doublecomplex *out, doublecomplex *z) {
+    *out = cexp(*z);
+}
+
+static inline void z_sin(doublecomplex *out, doublecomplex *z) {
+    *out = csin(*z);
+}
+
+static inline void z_cos(doublecomplex *out, doublecomplex *z) {
+    *out = ccos(*z);
+}
+
+/* Start of functions borrowed from cdjseval3d.f */
+
+/*     Computation of spherical Bessel functions via recurrence */
+
+/* ********************************************************************** */
+/* Subroutine */ int jfuns3d_(integer *ier, integer *nterms, doublecomplex *
+	z__, doublereal *scale, doublecomplex *fjs, integer *ifder,
+	doublecomplex *fjder, integer *lwfjs, integer *iscale, integer *ntop)
+{
+    /* Initialized data */
+
+    static doublereal upbound2 = 1e40;
+    static doublereal upbound2inv = 1e-40;
+    static doublereal tiny = 1e-200;
+    static doublereal done = 1.;
+    static doublereal zero = 0.;
+
+    /* System generated locals */
+    integer i__1;
+    doublereal d__1, d__2;
+    doublecomplex z__1;
+
+    /* Local variables */
+    integer i__;
+    doublereal d0, d1, dd, dc1, dc2;
+    doublecomplex fj0, fj1, zinv, ztmp;
+    doublereal dcoef;
+    doublereal sctot;
+    doublecomplex zscale;
+    doublereal scalinv;
+
+/* ********************************************************************** */
+
+/* PURPOSE: */
+
+/* 	This subroutine evaluates the first NTERMS spherical Bessel */
+/* 	functions and if required, their derivatives. */
+/* 	It incorporates a scaling parameter SCALE so that */
+
+/* 		fjs_n(z)=j_n(z)/SCALE^n */
+/* 		fjder_n(z)=\frac{\partial fjs_n(z)}{\partial z} */
+
+/* 	NOTE: The scaling parameter SCALE is meant to be used when */
+/*             abs(z) < 1, in which case we recommend setting */
+/* 	      SCALE = abs(z). This prevents the fjs_n from */
+/*             underflowing too rapidly. */
+/* 	      Otherwise, set SCALE=1. */
+/* 	      Do not set SCALE = abs(z) if z could take on the */
+/*             value zero. */
+/*             In an FMM, when forming an expansion from a collection of */
+/*             sources, set SCALE = min( abs(k*r), 1) */
+/*             where k is the Helmholtz parameter and r is the box dimension */
+/*             at the relevant level. */
+
+/* INPUT: */
+
+/*    nterms (integer): order of expansion of output array fjs */
+/*    z     (complex *16): argument of the spherical Bessel functions */
+/*    scale    (real *8) : scaling factor (discussed above) */
+/*    ifder  (integer): flag indicating whether to calculate "fjder" */
+/* 		          0	NO */
+/* 		          1	YES */
+/*    lwfjs  (integer): upper limit of input arrays */
+/*                         fjs(0:lwfjs) and iscale(0:lwfjs) */
+/*    iscale (integer): integer workspace used to keep track of */
+/*                         internal scaling */
+
+/* OUTPUT: */
+
+/*    ier    (integer): error return code */
+/*                         ier=0 normal return; */
+/*                         ier=8 insufficient array dimension lwfjs */
+/*    fjs   (complex *16): array of scaled Bessel functions. */
+/*    fjder (complex *16): array of derivs of scaled Bessel functions. */
+/*    ntop  (integer) : highest index in arrays fjs that is nonzero */
+
+/*       NOTE, that fjs and fjder arrays must be at least (nterms+2) */
+/*       complex *16 elements long. */
+
+
+
+
+/* ... Initializing ... */
+
+    *ier = 0;
+
+/*       set to asymptotic values if argument is sufficiently small */
+
+    if (z_abs(z__) < tiny) {
+	fjs[0] = done;
+	i__1 = *nterms;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+	    fjs[i__] = zero;
+	}
+
+	if (*ifder == 1) {
+	    i__1 = *nterms;
+	    for (i__ = 0; i__ <= i__1; ++i__) {
+		fjder[i__] = zero;
+	    }
+	    fjder[1] = done / (*scale * 3);
+	}
+
+	return 0;
+    }
+
+/* ... Step 1: recursion up to find ntop, starting from nterms */
+
+    *ntop = 0;
+    zinv = done / *z__;
+    fjs[*nterms] = done;
+    fjs[*nterms - 1] = zero;
+
+    i__1 = *lwfjs;
+    for (i__ = *nterms; i__ <= i__1; ++i__) {
+	dcoef = (i__ << 1) + done;
+	ztmp = dcoef * zinv * fjs[i__] - fjs[i__ - 1];
+	fjs[i__ + 1] = ztmp;
+
+/* Computing 2nd power */
+	d__1 = creal(ztmp);
+/* Computing 2nd power */
+	d__2 = cimag(ztmp);
+	dd = d__1 * d__1 + d__2 * d__2;
+	if (dd > upbound2) {
+	    *ntop = i__ + 1;
+	    break;
+	}
+    }
+    if (*ntop == 0) {
+	*ier = 8;
+	return 0;
+    }
+
+/* ... Step 2: Recursion back down to generate the unscaled jfuns: */
+/*             if magnitude exceeds UPBOUND2, rescale and continue the */
+/* 	      recursion (saving the order at which rescaling occurred */
+/* 	      in array iscale. */
+
+    i__1 = *ntop;
+    for (i__ = 0; i__ <= i__1; ++i__) {
+	iscale[i__] = 0;
+    }
+
+    fjs[*ntop] = zero;
+    fjs[*ntop - 1] = done;
+    for (i__ = *ntop - 1; i__ >= 1; --i__) {
+	dcoef = (i__ << 1) + done;
+	ztmp = dcoef * zinv * fjs[i__] - fjs[i__ + 1];
+	fjs[i__ - 1] = ztmp;
+
+/* Computing 2nd power */
+	d__1 = creal(ztmp);
+/* Computing 2nd power */
+	d__2 = cimag(ztmp);
+	dd = d__1 * d__1 + d__2 * d__2;
+	if (dd > upbound2) {
+	    fjs[i__] *= upbound2inv;
+	    fjs[i__ - 1] *= upbound2inv;
+	    iscale[i__] = 1;
+	}
+/* L2200: */
+    }
+
+/* ...  Step 3: go back up to the top and make sure that all */
+/*              Bessel functions are scaled by the same factor */
+/*              (i.e. the net total of times rescaling was invoked */
+/*              on the way down in the previous loop). */
+/*              At the same time, add scaling to fjs array. */
+
+    scalinv = done / *scale;
+    sctot = 1.;
+    i__1 = *ntop;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	sctot *= scalinv;
+	if (iscale[i__ - 1] == 1) {
+	    sctot *= upbound2inv;
+	}
+	fjs[i__] *= sctot;
+    }
+
+/* ... Determine the normalization parameter: */
+
+    z_sin(&z__1, z__);
+    fj0 = z__1 * zinv;
+    z_cos(&z__1, z__);
+    fj1 = fj0 * zinv - z__1 * zinv;
+
+    d0 = z_abs(&fj0);
+    d1 = z_abs(&fj1);
+    if (d1 > d0) {
+	zscale = fj1 / (fjs[1] * *scale);
+    } else {
+	zscale = fj0 / fjs[0];
+    }
+
+/* ... Scale the jfuns by zscale: */
+
+    ztmp = zscale;
+    i__1 = *nterms;
+    for (i__ = 0; i__ <= i__1; ++i__) {
+	fjs[i__] *= ztmp;
+    }
+
+/* ... Finally, calculate the derivatives if desired: */
+
+    if (*ifder == 1) {
+	fjs[*nterms + 1] *= ztmp;
+
+	fjder[0] = -fjs[1] * *scale;
+	i__1 = *nterms;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+	    dc1 = i__ / ((i__ << 1) + done);
+	    dc2 = done - dc1;
+	    dc1 *= scalinv;
+	    dc2 *= *scale;
+	    fjder[i__] = dc1 * fjs[i__ - 1] - dc2 * fjs[i__ + 1];
+	}
+    }
+    return 0;
+} /* jfuns3d_ */
+
+/* Start of functions borrowed from helmrouts3d.f */
+
+/*      This file contains the basic subroutines for */
+/*      forming and evaluating multipole (partial wave) expansions. */
+
+/*      Documentation is incomplete and ongoing... */
+
+
+/*      Remarks on scaling conventions. */
+
+/*      1)  Hankel and Bessel functions are consistently scaled as */
+/*       	hvec(n)= h_n(z)*scale^(n) */
+/*       	jvec(n)= j_n(z)/scale^(n) */
+
+/*          In some earlier FMM implementations, the convention */
+/*       	hvec(n)= h_n(z)*scale^(n+1) */
+/*          was sometimes used, leading to various obscure rescaling */
+/*          steps. */
+
+/*          scale should be of the order of |z| if |z| < 1. Otherwise, */
+/*          scale should be set to 1. */
+
+
+/*      2) There are many definitions of the spherical harmonics, */
+/*         which differ in terms of normalization constants. We */
+/*         adopt the following convention: */
+
+/*         For m>0, we define Y_n^m according to */
+
+/*         Y_n^m = \sqrt{2n+1} \sqrt{\frac{ (n-m)!}{(n+m)!}} \cdot */
+/*                 P_n^m(\cos \theta)  e^{i m phi} */
+/*         and */
+
+/*         Y_n^-m = dconjg( Y_n^m ) */
+
+/*         We omit the Condon-Shortley phase factor (-1)^m in the */
+/*         definition of Y_n^m for m<0. (This is standard in several */
+/*         communities.) */
+
+/*         We also omit the factor \sqrt{\frac{1}{4 \pi}}, so that */
+/*         the Y_n^m are orthogonal on the unit sphere but not */
+/*         orthonormal.  (This is also standard in several communities.) */
+/*         More precisely, */
+
+/*                 \int_S Y_n^m Y_n^m d\Omega = 4 \pi. */
+
+/*         Using our standard definition, the addition theorem takes */
+/*         the simple form */
+
+/*         e^( i k r}/(ikr) = */
+/*         \sum_n \sum_m  j_n(k|S|) Ylm*(S) h_n(k|T|) Ylm(T) */
+
+
+/* ----------------------------------------------------------------------- */
+/*      h3d01: computes h0, h1 (first two spherical Hankel fns.) */
+/*      h3dall: computes Hankel functions of all orders and scales them */
+/* ********************************************************************** */
+/* Subroutine */ static int h3d01_(doublecomplex *z__, doublecomplex *h0,
+	doublecomplex *h1)
+{
+    /* Initialized data */
+
+    static doublecomplex eye = I;
+    static doublereal thresh = 1e-15;
+    static doublereal done = 1.;
+
+    /* System generated locals */
+    doublecomplex z__1;
+
+    /* Local variables */
+    doublecomplex cd;
+
+/* ********************************************************************** */
+
+/*     Compute spherical Hankel functions of order 0 and 1 */
+
+/*     h0(z)  =   exp(i*z)/(i*z), */
+/*     h1(z)  =   - h0' = -h0*(i-1/z) = h0*(1/z-i) */
+
+/* ----------------------------------------------------------------------- */
+/*     INPUT: */
+
+/* 	z   :  argument of Hankel functions */
+/*              if abs(z)<1.0d-15, returns zero. */
+
+/* ----------------------------------------------------------------------- */
+/*     OUTPUT: */
+
+/* 	h0  :  h0(z)    (spherical Hankel function of order 0). */
+/* 	h1  :  -h0'(z)  (spherical Hankel function of order 1). */
+
+/* ----------------------------------------------------------------------- */
+
+    if (z_abs(z__) < thresh) {
+	*h0 = 0.;
+	*h1 = 0.;
+	return 0;
+    }
+
+/*     Otherwise, use formula */
+
+    cd = eye * *z__;
+    z_exp(&z__1, &cd);
+    *h0 = z__1 / cd;
+    *h1 = *h0 * (done / *z__ - eye);
+
+    return 0;
+} /* h3d01_ */
+
+
+
+
+/* ********************************************************************** */
+/* Subroutine */ int h3dall_(integer *nterms, doublecomplex *z__, doublereal *
+	scale, doublecomplex *hvec, integer *ifder, doublecomplex *hder)
+{
+    /* Initialized data */
+    static doublereal thresh = 1e-15;
+    static doublereal done = 1.;
+
+    /* Builtin functions */
+    double z_abs(doublecomplex *);
+
+    /* Local variables */
+    integer i__;
+    integer i__1;
+    doublereal dtmp;
+    doublecomplex zinv, ztmp;
+    doublereal scal2;
+
+/* ********************************************************************** */
+
+/*     This subroutine computes scaled versions of the spherical Hankel */
+/*     functions h_n of orders 0 to nterms. */
+
+/*       	hvec(n)= h_n(z)*scale^(n) */
+
+/*     The parameter SCALE is useful when |z| < 1, in which case */
+/*     it damps out the rapid growth of h_n as n increases. In such */
+/*     cases, we recommend setting */
+
+/*               scale = |z| */
+
+/*     or something close. If |z| > 1, set scale = 1. */
+
+/*     If the flag IFDER is set to one, it also computes the */
+/*     derivatives of h_n. */
+
+/* 		hder(n)= h_n'(z)*scale^(n) */
+
+/*     NOTE: If |z| < 1.0d-15, the subroutine returns zero. */
+
+/* ----------------------------------------------------------------------- */
+/*     INPUT: */
+
+/*     nterms  : highest order of the Hankel functions to be computed. */
+/*     z       : argument of the Hankel functions. */
+/*     scale   : scaling parameter discussed above */
+/*     ifder   : flag indcating whether derivatives should be computed. */
+/* 		ifder = 1   ==> compute */
+/* 		ifder = 0   ==> do not compute */
+
+/* ----------------------------------------------------------------------- */
+/*     OUTPUT: */
+
+/*     hvec    : the vector of spherical Hankel functions */
+/*     hder    : the derivatives of the spherical Hankel functions */
+
+/* ----------------------------------------------------------------------- */
+
+
+/*     If |z| < thresh, return zeros. */
+
+    if (z_abs(z__) < thresh) {
+	i__1 = *nterms;
+	for (i__ = 0; i__ <= i__1; ++i__) {
+	    hvec[i__] = 0;
+	    hder[i__] = 0;
+	}
+	return 0;
+    }
+
+/*     Otherwise, get h_0 and h_1 analytically and the rest via */
+/*     recursion. */
+
+    h3d01_(z__, hvec, &hvec[1]);
+    hvec[0] = hvec[0];
+    hvec[1] *= *scale;
+
+/*     From Abramowitz and Stegun (10.1.19) */
+
+/*     h_{n+1}(z)=(2n+1)/z * h_n(z) - h_{n-1}(z) */
+
+/*     With scaling: */
+
+/*     hvec(n+1)=scale*(2n+1)/z * hvec(n) -(scale**2) hvec(n-1) */
+
+    scal2 = *scale * *scale;
+    zinv = *scale / *z__;
+    i__1 = *nterms - 1;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	dtmp = (i__ << 1) + done;
+	ztmp = zinv * dtmp;
+	hvec[i__ + 1] = ztmp * hvec[i__] - scal2 * hvec[i__ - 1];
+    }
+
+/*     From Abramowitz and Stegun (10.1.21) */
+
+/* 	h_{n}'(z)= h_{n-1}(z) - (n+1)/z * h_n(z) */
+
+/*     With scaling: */
+
+/*     hder(n)=scale* hvec(n-1) - (n+1)/z * hvec(n) */
+
+
+    if (*ifder == 1) {
+
+	hder[0] = -hvec[1] / *scale;
+	zinv = 1. / *z__;
+	i__1 = *nterms;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+	    dtmp = i__ + done;
+	    ztmp = zinv * dtmp;
+	    hder[i__] = *scale * hvec[i__ - 1] - ztmp * hvec[i__];
+	}
+    }
+
+    return 0;
+} /* h3dall_ */
diff --git a/pytential/qbx/target_specific/helmholtz_utils.h b/pytential/qbx/target_specific/helmholtz_utils.h
new file mode 100644
index 0000000000000000000000000000000000000000..d36b3b1206830f43d31c67551aab9b00dbd80252
--- /dev/null
+++ b/pytential/qbx/target_specific/helmholtz_utils.h
@@ -0,0 +1,36 @@
+/*
+ * Copyright (C) 2018 Matt Wala
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#ifndef HELMHOLTZ_UTILS_H
+#define HELMHOLTZ_UTILS_H
+
+#include <complex.h>
+
+extern int jfuns3d_(int *ier, int *nterms, double complex *z,
+                    double *scale, double complex *fjs, int *ifder,
+                    double complex *fjder, int *lwfjs, int *iscale,
+                    int *ntop);
+
+extern int h3dall_(int *nterms, double complex *z, double *scale,
+		   double complex *hvec, int *ifder, double complex *hder);
+
+#endif  /* HELMHOLTZ_UTILS_H */
diff --git a/pytential/qbx/target_specific/impl.h b/pytential/qbx/target_specific/impl.h
new file mode 100644
index 0000000000000000000000000000000000000000..4d7281b4d3e45cafdc74c2243bf2ca5dbca90abd
--- /dev/null
+++ b/pytential/qbx/target_specific/impl.h
@@ -0,0 +1,32 @@
+/*
+ * Copyright (C) 2018 Matt Wala
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#ifndef IMPL_H
+#define IMPL_H
+
+/* Temporary buffer size for holding e.g. Legendre polynomial values */
+#define BUFSIZE 64
+
+/* Padding for false sharing prevention */
+#define PADDING 65
+
+#endif  /* IMPL_H */
diff --git a/pytential/qbx/target_specific/impl.pyx b/pytential/qbx/target_specific/impl.pyx
new file mode 100644
index 0000000000000000000000000000000000000000..37884f3f522af70946d82b5ea07974aa617ace28
--- /dev/null
+++ b/pytential/qbx/target_specific/impl.pyx
@@ -0,0 +1,786 @@
+# cython: warn.unused=True, warn.unused_arg=True, warn.unreachable=True
+# cython: boundscheck=False, wraparound=False, initializedcheck=False, cdivision=True
+# cython: embedsignature=True, language_level=3
+
+"""Copyright (C) 2018 Matt Wala
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+"""
+
+import numpy as np
+import cython
+import cython.parallel
+
+from libc.math cimport sqrt
+from libc.stdio cimport printf, fprintf, stderr
+from libc.stdlib cimport abort
+
+cimport openmp
+
+
+cdef extern from "complex.h" nogil:
+    double cabs(double complex)
+
+
+cdef extern from "helmholtz_utils.h" nogil:
+    int jfuns3d_(int *ier, int *nterms, double complex * z, double *scale,
+                 double complex *fjs, int *ifder, double complex *fjder,
+                 int *lwfjs, int *iscale, int *ntop);
+    int h3dall_(int *nterms, double complex *z, double *scale,
+		double complex *hvec, int *ifder, double complex *hder);
+
+
+cdef extern from "impl.h" nogil:
+    const int BUFSIZE
+    const int PADDING
+
+
+# {{{ (externally visible) wrappers for bessel / hankel functions
+
+def jfuns3d_wrapper(nterms, z, scale, fjs, fjder):
+    """Evaluate spherical Bessel functions.
+
+    Arguments:
+        nterms: Highest order to be computed
+        z: Argument
+        scale: Output scaling factor (recommended: min(abs(z), 1))
+        fjs: Output array of complex doubles
+        fjder: *None*, or output array of complex double derivatives
+    """
+    cdef:
+        double complex[BUFSIZE] fjstemp
+        double complex[BUFSIZE] fjdertmp
+        int[BUFSIZE] iscale
+        int ier, ifder, lwfjs, ntop, i, nterms_
+        double scale_
+        double complex z_
+
+    if nterms <= 0:
+        raise ValueError("nterms should be positive")
+
+    nterms_ = nterms
+    z_ = z
+    scale_ = scale
+    ifder = fjder is not None
+    lwfjs = BUFSIZE
+
+    jfuns3d_(&ier, &nterms_, &z_, &scale_, fjstemp, &ifder, fjdertmp, &lwfjs,
+             iscale, &ntop)
+
+    if ier:
+        raise ValueError("jfuns3d_ returned error code %d" % ier)
+
+    for i in range(1 + nterms):
+        fjs[i] = fjstemp[i]
+        if ifder:
+            fjder[i] = fjdertmp[i]
+
+
+def h3dall_wrapper(nterms, z, scale, hs, hders):
+    """Evaluate spherical Hankel functions.
+
+    Arguments:
+        nterms: Highest order to be computed
+        z: Argument
+        scale: Output scaling factor (recommended: min(abs(z), 1))
+        hs: Output array of complex doubles
+        hders: *None*, or output array of complex double derivatives
+    """
+    cdef:
+        int nterms_, ifder
+        double scale_
+        double complex z_
+        double complex[:] hvec = np.empty(1 + nterms, np.complex)
+        double complex[:] hdervec = np.empty(1 + nterms, np.complex)
+
+    ifder = hders is not None
+
+    if nterms <= 0:
+        raise ValueError("nterms should be positive")
+
+    z_ = z
+    scale_ = scale
+    nterms_ = nterms
+
+    h3dall_(&nterms_, &z_, &scale_, &hvec[0], &ifder, &hdervec[0])
+
+    hs[:1 + nterms] = hvec[:]
+    if ifder:
+        hders[:1 + nterms] = hdervec[:]
+
+# }}}
+
+
+# {{{ helpers
+
+cdef void legvals(double x, int n, double[] vals, double[] derivs) nogil:
+    """Compute the values of the Legendre polynomial up to order n at x.
+    Optionally, if derivs is non-NULL, compute the values of the derivative too.
+
+    Borrowed from fmmlib.
+    """
+    cdef:
+        double pj, derj, pjm2, pjm1, derjm2, derjm1
+        int j
+
+    pjm2 = 1
+    pjm1 = x
+
+    vals[0] = 1
+    if derivs != NULL:
+        derivs[0] = 0
+        derjm2 = 0
+        derjm1 = 1
+
+    if n == 0:
+        return
+
+    vals[1] = x
+    if derivs != NULL:
+        derivs[1] = 1
+
+    if n == 1:
+        return
+
+    for j in range(2, n + 1):
+        pj = ( (2*j-1)*x*pjm1-(j-1)*pjm2 ) / j
+        vals[j] = pj
+
+        if derivs != NULL:
+            derj = (2*j-1)*(pjm1+x*derjm1)-(j-1)*derjm2
+            derj = derj / j
+            derivs[j] = derj
+            derjm2 = derjm1
+            derjm1 = derj
+
+        pjm2 = pjm1
+        pjm1 = pj
+
+
+cdef double dist(double[3] a, double[3] b) nogil:
+    """Calculate the Euclidean distance between a and b."""
+    return sqrt(
+            (a[0] - b[0]) * (a[0] - b[0]) +
+            (a[1] - b[1]) * (a[1] - b[1]) +
+            (a[2] - b[2]) * (a[2] - b[2]))
+
+
+cdef void ts_helmholtz_precompute(
+        double[3] center,
+        double[3] target,
+        int order,
+        int ifder,
+        double complex k,
+        double complex[] jvals,
+        double complex[] jderivs,
+        double *jscale) nogil:
+    """Evaluate the source-invariant Bessel terms of the Helmholtz target-specific
+    expansion."""
+
+    cdef:
+        double complex z
+        double tc_d
+        int ier, ntop, lwfjs
+        int[BUFSIZE] iscale
+
+    tc_d = dist(target, center)
+    jscale[0] = cabs(k * tc_d) if (cabs(k * tc_d) < 1) else 1
+
+    # Evaluate the spherical Bessel terms.
+    z = k * tc_d
+    lwfjs = BUFSIZE
+    # jfuns3d_ only supports order > 0 (goes out of bounds if order = 0)
+    order = max(1, order)
+    jfuns3d_(&ier, &order, &z, jscale, jvals, &ifder, jderivs, &lwfjs, iscale,
+             &ntop)
+    if ier:
+        # This could in theory fail.
+        fprintf(stderr, "array passed to jfuns3d_ was too small\n")
+        abort()
+
+# }}}
+
+
+# {{{ Laplace S
+
+cdef double complex ts_laplace_s(
+        double[3] source,
+        double[3] center,
+        double[3] target,
+        double complex charge,
+        int order) nogil:
+    """Evaluate the target-specific expansion of the Laplace single-layer kernel."""
+
+    cdef:
+        double j
+        double result, r, sc_d, tc_d, cos_angle
+        # Legendre recurrence values
+        double pj, pjm1, pjm2
+
+    tc_d = dist(target, center)
+    sc_d = dist(source, center)
+
+    cos_angle = ((
+            (target[0] - center[0]) * (source[0] - center[0]) +
+            (target[1] - center[1]) * (source[1] - center[1]) +
+            (target[2] - center[2]) * (source[2] - center[2]))
+            / (tc_d * sc_d))
+
+    if order == 0:
+        return charge / sc_d
+
+    pjm2 = 1
+    pjm1 = cos_angle
+
+    result = 1 / sc_d + (cos_angle * tc_d) / (sc_d * sc_d)
+
+    r = (tc_d * tc_d) / (sc_d * sc_d * sc_d)
+
+    # Invariant: j matches loop counter. Using a double-precision version of the
+    # loop counter avoids an int-to-double conversion inside the loop.
+    j = 2.
+
+    for _ in range(2, order + 1):
+        pj = ( (2.*j-1.)*cos_angle*pjm1-(j-1.)*pjm2 ) / j
+        result += pj * r
+
+        r *= (tc_d / sc_d)
+        j += 1
+        pjm2 = pjm1
+        pjm1 = pj
+
+    return charge * result
+
+# }}}
+
+
+# {{{ Laplace grad(S)
+
+cdef void ts_laplace_sp(
+        double complex[3] grad,
+        double[3] source,
+        double[3] center,
+        double[3] target,
+        double complex charge,
+        int order) nogil:
+    """Evaluate the target-specific expansion of the gradient of the Laplace
+    single-layer kernel."""
+
+    cdef:
+        double[3] grad_tmp
+        double sc_d, tc_d, cos_angle, Rn
+        double[BUFSIZE] lvals, lderivs
+        double[3] smc, tmc
+        int n
+
+    for m in range(3):
+        smc[m] = source[m] - center[m]
+        tmc[m] = target[m] - center[m]
+        grad_tmp[m] = 0
+
+    tc_d = dist(target, center)
+    sc_d = dist(source, center)
+
+    cos_angle = (tmc[0] * smc[0] + tmc[1] * smc[1] + tmc[2] * smc[2]) / (tc_d * sc_d)
+    legvals(cos_angle, order, lvals, lderivs)
+
+    # Invariant: Rn = tc_d ** (n - 1) / sc_d ** (n + 1)
+    Rn = 1 / (sc_d * sc_d)
+
+    for n in range(1, 1 + order):
+        for m in range(3):
+            grad_tmp[m] += Rn * (
+                n * (tmc[m] / tc_d) * lvals[n]
+                + (smc[m] / sc_d - cos_angle * tmc[m] / tc_d) * lderivs[n])
+        Rn *= tc_d / sc_d
+
+    for m in range(3):
+        grad[m] += charge * grad_tmp[m]
+
+# }}}
+
+
+# {{{ Laplace D
+
+cdef double complex ts_laplace_d(
+        double[3] source,
+        double[3] center,
+        double[3] target,
+        double[3] dipole,
+        double complex dipstr,
+        int order) nogil:
+    """Evaluate the target-specific expansion of the Laplace double-layer kernel."""
+
+    cdef:
+        int n, m
+        double sc_d, tc_d, cos_angle, Rn
+        double[BUFSIZE] lvals, lderivs
+        double[3] smc, tmc, grad
+
+    for m in range(3):
+        smc[m] = source[m] - center[m]
+        tmc[m] = target[m] - center[m]
+        grad[m] = 0
+
+    tc_d = dist(target, center)
+    sc_d = dist(source, center)
+
+    cos_angle = (tmc[0] * smc[0] + tmc[1] * smc[1] + tmc[2] * smc[2]) / (tc_d * sc_d)
+    legvals(cos_angle, order, lvals, lderivs)
+
+    # Invariant: Rn = (tc_d ** n / sc_d ** (n + 2))
+    Rn = 1 / (sc_d * sc_d)
+
+    for n in range(0, order + 1):
+        for m in range(3):
+            grad[m] += Rn * (
+                    -(n + 1) * (smc[m] / sc_d) * lvals[n]
+                    + (tmc[m] / tc_d - cos_angle * smc[m] / sc_d) * lderivs[n])
+        Rn *= (tc_d / sc_d)
+
+    return dipstr * (
+            dipole[0] * grad[0] + dipole[1] * grad[1] + dipole[2] * grad[2])
+
+# }}}
+
+
+# {{{ Helmholtz S
+
+cdef double complex ts_helmholtz_s(
+        double[3] source,
+        double[3] center,
+        double[3] target,
+        double complex charge,
+        int order,
+        double complex k,
+        double complex[] jvals,
+        double jscale) nogil:
+    """Evaluate the target-specific expansion of the Helmholtz single-layer
+    kernel."""
+
+    cdef:
+        int n, ifder
+        double sc_d, tc_d, cos_angle
+        double[BUFSIZE] lvals
+        double complex[BUFSIZE] hvals
+        double hscale, unscale
+        double complex z, result
+
+    tc_d = dist(target, center)
+    sc_d = dist(source, center)
+
+    cos_angle = ((
+            (target[0] - center[0]) * (source[0] - center[0]) +
+            (target[1] - center[1]) * (source[1] - center[1]) +
+            (target[2] - center[2]) * (source[2] - center[2]))
+            / (tc_d * sc_d))
+
+    # Evaluate the Legendre terms.
+    legvals(cos_angle, order, lvals, NULL)
+
+    # Scaling magic for Hankel terms.
+    # These values are taken from the fmmlib documentation.
+    hscale = cabs(k * sc_d) if (cabs(k * sc_d) < 1) else 1
+    # unscale = (jscale / hscale) ** n
+    # Multiply against unscale to remove the scaling.
+    unscale = 1
+
+    # Evaluate the spherical Hankel terms.
+    z = k * sc_d
+    ifder = 0
+    h3dall_(&order, &z, &hscale, hvals, &ifder, NULL)
+
+    result = 0
+
+    for n in range(1 + order):
+        result += (2 * n + 1) * unscale * (jvals[n] * hvals[n] * lvals[n])
+        unscale *= jscale / hscale
+
+    return 1j * k * charge * result
+
+# }}}
+
+
+# {{{ Helmholtz grad(S)
+
+cdef void ts_helmholtz_sp(
+        double complex[3] grad,
+        double[3] source,
+        double[3] center,
+        double[3] target,
+        double complex charge,
+        int order,
+        double complex k,
+        double complex[] jvals,
+        double complex[] jderivs,
+        double jscale) nogil:
+    """Evaluate the target-specific expansion of the gradient of the Helmholtz
+    single-layer kernel."""
+
+    cdef:
+        int n, m
+        int ifder
+        double sc_d, tc_d, cos_angle
+        double[3] smc, tmc
+        double complex[3] grad_tmp
+        double[BUFSIZE] lvals, lderivs
+        double complex z
+        double complex [BUFSIZE] hvals
+        double hscale, unscale
+
+    for m in range(3):
+        smc[m] = source[m] - center[m]
+        tmc[m] = target[m] - center[m]
+        grad_tmp[m] = 0
+
+    tc_d = dist(target, center)
+    sc_d = dist(source, center)
+
+    # Evaluate the Legendre terms.
+    cos_angle = (tmc[0] * smc[0] + tmc[1] * smc[1] + tmc[2] * smc[2]) / (tc_d * sc_d)
+    legvals(cos_angle, order, lvals, lderivs)
+
+    # Scaling magic for Hankel terms.
+    # These values are taken from the fmmlib documentation.
+    hscale = cabs(k * sc_d) if (cabs(k * sc_d) < 1) else 1
+    # unscale = (jscale / hscale) ** n
+    # Multiply against unscale to remove the scaling.
+    unscale = 1
+
+    # Evaluate the spherical Hankel terms.
+    z = k * sc_d
+    ifder = 0
+    h3dall_(&order, &z, &hscale, hvals, &ifder, NULL)
+
+    #
+    # This is a mess, but amounts to the t-gradient of:
+    #
+    #      __ order
+    #  ik \         (2n  +  1) j (k |t - c|) h (k |s - c|) P (cos θ)
+    #     /__ n = 0             n             n             n
+    #
+    #
+    for n in range(0, order + 1):
+        for m in range(3):
+            grad_tmp[m] += (2 * n + 1) * unscale * hvals[n] / tc_d * (
+                    k * jderivs[n] * lvals[n] * tmc[m]
+                    + (smc[m] / sc_d - cos_angle * tmc[m] / tc_d)
+                    * jvals[n] * lderivs[n])
+        unscale *= jscale / hscale
+
+    for m in range(3):
+        grad[m] += 1j * k * charge * grad_tmp[m]
+
+# }}}
+
+
+# {{{ Helmholtz D
+
+cdef double complex ts_helmholtz_d(
+        double[3] source,
+        double[3] center,
+        double[3] target,
+        double[3] dipole,
+        double complex dipstr,
+        int order,
+        double complex k,
+        double complex[] jvals,
+        double jscale) nogil:
+    """Evaluate the target-specific expansion of the Helmholtz double-layer
+    kernel."""
+
+    cdef:
+        int n, m
+        int ifder
+        double sc_d, tc_d, cos_angle
+        double[3] smc, tmc
+        double complex[3] grad
+        double[BUFSIZE] lvals, lderivs
+        double complex z
+        double complex [BUFSIZE] hvals, hderivs
+        double hscale, unscale
+
+    for m in range(3):
+        smc[m] = source[m] - center[m]
+        tmc[m] = target[m] - center[m]
+        grad[m] = 0
+
+    tc_d = dist(target, center)
+    sc_d = dist(source, center)
+
+    cos_angle = (tmc[0] * smc[0] + tmc[1] * smc[1] + tmc[2] * smc[2]) / (tc_d * sc_d)
+
+    # Evaluate the Legendre terms.
+    legvals(cos_angle, order, lvals, lderivs)
+
+    # Scaling magic for Hankel terms.
+    # These values are taken from the fmmlib documentation.
+    hscale = cabs(k * sc_d) if (cabs(k * sc_d) < 1) else 1
+    # unscale = (jscale / hscale) ** n
+    # Multiply against unscale to remove the scaling.
+    unscale = 1
+
+    # Evaluate the spherical Hankel terms.
+    z = k * sc_d
+    ifder = 1
+    h3dall_(&order, &z, &hscale, hvals, &ifder, hderivs)
+
+    #
+    # This is a mess, but amounts to the s-gradient of:
+    #
+    #      __ order
+    #  ik \         (2n  +  1) j (k |t - c|) h (k |s - c|) P (cos θ)
+    #     /__ n = 0             n             n             n
+    #
+    #
+    for n in range(0, order + 1):
+        for m in range(3):
+            grad[m] += (2 * n + 1) * unscale * jvals[n] / sc_d * (
+                    k * smc[m] * hderivs[n] * lvals[n]
+                    + (tmc[m] / tc_d - cos_angle * smc[m] / sc_d)
+                    * hvals[n] * lderivs[n])
+        unscale *= jscale / hscale
+
+    return 1j * k * dipstr * (
+            grad[0] * dipole[0] + grad[1] * dipole[1] + grad[2] * dipole[2])
+
+# }}}
+
+
+def eval_target_specific_qbx_locals(
+        int ifpot,
+        int ifgrad,
+        int ifcharge,
+        int ifdipole,
+        int order,
+        double[:,:] sources,
+        double[:,:] targets,
+        double[:,:] centers,
+        int[:] qbx_centers,
+        int[:] qbx_center_to_target_box,
+        int[:] center_to_target_starts, int[:] center_to_target_lists,
+        int[:] source_box_starts, int[:] source_box_lists,
+        int[:] box_source_starts, int[:] box_source_counts_nonchild,
+        double complex helmholtz_k,
+        double complex[:] charge,
+        double complex[:] dipstr,
+        double[:,:] dipvec,
+        double complex[:] pot,
+        double complex[:,:] grad):
+    """TSQBX entry point.
+
+    Arguments:
+        ifpot: Flag indicating whether to evaluate the potential
+        ifgrad: Flag indicating whether to evaluate the gradient of the potential
+        ifcharge: Flag indicating whether to include monopole sources
+        ifdipole: Flag indicating whether to include dipole sources
+        order: Expansion order
+        sources: Array of sources of shape (3, *nsrcs*)
+        targets: Array of targets of shape (3, *ntgts*)
+        centers: Array of centers of shape (3, *nctrs*)
+        qbx_centers: Array of subset of indices into *centers* which are QBX centers
+        qbx_center_to_target_box: Array mapping centers to target box numbers
+        center_to_target_starts: "Start" indices for center-to-target CSR list
+        center_to_target_lists: Center-to-target CSR list
+        source_box_starts: "Start" indices for target-box-to-source-box CSR list
+        source_box_lists: Target-box-to-source-box CSR list
+        box_source_starts: "Start" indices for sources for each box
+        box_source_counts_nonchild: Number of sources per box
+        helmholtz_k: Helmholtz parameter (Pass 0 for Laplace)
+        charge: (Complex) Source strengths, shape (*nsrcs*,), or *None*
+        dipstr: (Complex) Dipole source strengths, shape (*nsrcs*,) or *None*
+        dipvec: (Real) Dipole source orientations, shape (3, *nsrcs*), or *None*
+        pot: (Complex) Output potential, shape (*ngts*,), or *None*
+        grad: (Complex) Output gradient, shape (3, *ntgts*), or *None*
+    """
+
+    cdef:
+        int tgt, ictr, ctr
+        int itgt, itgt_start, itgt_end
+        int tgt_box, src_ibox
+        int isrc_box, isrc_box_start, isrc_box_end
+        int isrc, isrc_start, isrc_end
+        int tid, m
+        double jscale
+        double complex result
+        double[:,:] source, center, target, dipole
+        double complex[:,:] result_grad, jvals, jderivs
+        int laplace_s, helmholtz_s, laplace_sp, helmholtz_sp, laplace_d, helmholtz_d
+
+    # {{{ process arguments
+
+    if ifcharge:
+        if charge is None:
+            raise ValueError("Missing charge")
+
+    if ifdipole:
+        if dipstr is None:
+            raise ValueError("Missing dipstr")
+        if dipvec is None:
+            raise ValueError("Missing dipvec")
+
+    if ifdipole and ifgrad:
+        raise ValueError("Does not support computing gradient of dipole sources")
+
+    if helmholtz_k == 0:
+        helmholtz_s = helmholtz_sp = helmholtz_d = 0
+
+        if ifpot:
+            laplace_s = ifcharge
+            laplace_d = ifdipole
+
+        if ifgrad:
+            laplace_sp = ifcharge
+
+    else:
+        laplace_s = laplace_sp = laplace_d = 0
+
+        if ifpot:
+            helmholtz_s = ifcharge
+            helmholtz_d = ifdipole
+
+        if ifgrad:
+            helmholtz_sp = ifcharge
+
+    # }}}
+
+    if not any([
+            laplace_s, laplace_sp, laplace_d, helmholtz_s, helmholtz_sp,
+            helmholtz_d]):
+        return
+
+    if qbx_centers.shape[0] == 0:
+        return
+
+    # {{{ set up thread-local storage
+
+    # Hack to obtain thread-local storage
+    maxthreads = openmp.omp_get_max_threads()
+
+    # Prevent false sharing by padding the thread-local buffers
+    source = np.zeros((maxthreads, PADDING))
+    target = np.zeros((maxthreads, PADDING))
+    center = np.zeros((maxthreads, PADDING))
+    dipole = np.zeros((maxthreads, PADDING))
+    result_grad = np.zeros((maxthreads, PADDING), dtype=np.complex)
+    jvals = np.zeros((maxthreads, BUFSIZE + PADDING), dtype=np.complex)
+    jderivs = np.zeros((maxthreads, BUFSIZE + PADDING), dtype=np.complex)
+
+    # TODO: Check that the order is not too high, since temporary
+    # arrays in this module that are limited by BUFSIZE may overflow
+    # if that is the case
+
+    # }}}
+
+    for ictr in cython.parallel.prange(0, qbx_centers.shape[0],
+                                       nogil=True, schedule="static",
+                                       chunksize=128):
+        # Assign to jscale so Cython marks it as private
+        jscale = 0
+        ctr = qbx_centers[ictr]
+        itgt_start = center_to_target_starts[ctr]
+        itgt_end = center_to_target_starts[ctr + 1]
+        tgt_box = qbx_center_to_target_box[ctr]
+        tid = cython.parallel.threadid()
+
+        for m in range(3):
+            center[tid, m] = centers[m, ctr]
+
+        for itgt in range(itgt_start, itgt_end):
+            result = 0
+            tgt = center_to_target_lists[itgt]
+
+            for m in range(3):
+                target[tid, m] = targets[m, tgt]
+                if ifgrad:
+                    result_grad[tid, m] = 0
+
+            if helmholtz_s or helmholtz_sp or helmholtz_d:
+                # Precompute source-invariant Helmholtz terms.
+                ts_helmholtz_precompute(
+                        &center[tid, 0], &target[tid, 0],
+                        order, ifgrad, helmholtz_k, &jvals[tid, 0],
+                        &jderivs[tid, 0], &jscale)
+
+            isrc_box_start = source_box_starts[tgt_box]
+            isrc_box_end = source_box_starts[tgt_box + 1]
+
+            for isrc_box in range(isrc_box_start, isrc_box_end):
+                src_ibox = source_box_lists[isrc_box]
+                isrc_start = box_source_starts[src_ibox]
+                isrc_end = isrc_start + box_source_counts_nonchild[src_ibox]
+
+                for isrc in range(isrc_start, isrc_end):
+
+                    for m in range(3):
+                        source[tid, m] = sources[m, isrc]
+                        if ifdipole:
+                            dipole[tid, m] = dipvec[m, isrc]
+
+                    # NOTE: Don't use +=, since that makes Cython think we are
+                    # doing an OpenMP reduction.
+
+                    # {{{ evaluate potentials
+
+                    if laplace_s:
+                        result = result + (
+                                ts_laplace_s(
+                                    &source[tid, 0], &center[tid, 0], &target[tid, 0],
+                                    charge[isrc], order))
+
+                    if laplace_sp:
+                        ts_laplace_sp(
+                                &result_grad[tid, 0],
+                                &source[tid, 0], &center[tid, 0], &target[tid, 0],
+                                charge[isrc], order)
+
+                    if laplace_d:
+                        result = result + (
+                                ts_laplace_d(
+                                    &source[tid, 0], &center[tid, 0], &target[tid, 0],
+                                    &dipole[tid, 0], dipstr[isrc], order))
+
+                    if helmholtz_s:
+                        result = result + (
+                                ts_helmholtz_s(&source[tid, 0], &center[tid, 0],
+                                    &target[tid, 0], charge[isrc], order, helmholtz_k,
+                                    &jvals[tid, 0], jscale))
+
+                    if helmholtz_sp:
+                        ts_helmholtz_sp(
+                                &result_grad[tid, 0],
+                                &source[tid, 0], &center[tid, 0], &target[tid, 0],
+                                charge[isrc], order, helmholtz_k,
+                                &jvals[tid, 0], &jderivs[tid, 0], jscale)
+
+                    if helmholtz_d:
+                        result = result + (
+                                ts_helmholtz_d(
+                                    &source[tid, 0], &center[tid, 0], &target[tid, 0],
+                                    &dipole[tid, 0], dipstr[isrc], order, helmholtz_k,
+                                    &jvals[tid, 0], jscale))
+
+                    # }}}
+
+            if ifpot:
+                pot[tgt] = result
+
+            if ifgrad:
+                for m in range(3):
+                    grad[m, tgt] = result_grad[tid, m]
diff --git a/pytential/qbx/utils.py b/pytential/qbx/utils.py
index b0ffc066035c0c8f1aea690ecc5dcb2618367275..ca8b08df88a3fd10364a4c317bf779b1b134c7b5 100644
--- a/pytential/qbx/utils.py
+++ b/pytential/qbx/utils.py
@@ -601,4 +601,70 @@ def build_tree_with_qbx_metadata(
 
 # }}}
 
+
+# {{{ host geo data wrapper
+
+class ToHostTransferredGeoDataWrapper(object):
+    """Wraps an instance of :class:`pytential.qbx.geometry.QBXFMMGeometryData`,
+    automatically converting returned OpenCL arrays to host data.
+    """
+
+    def __init__(self, queue, geo_data):
+        self.queue = queue
+        self.geo_data = geo_data
+
+    @memoize_method
+    def tree(self):
+        return self.geo_data.tree().get(queue=self.queue)
+
+    @memoize_method
+    def traversal(self):
+        return self.geo_data.traversal().get(queue=self.queue)
+
+    @property
+    def lpot_source(self):
+        return self.geo_data.lpot_source
+
+    @property
+    def ncenters(self):
+        return self.geo_data.ncenters
+
+    @memoize_method
+    def centers(self):
+        return np.array([
+            ci.get(queue=self.queue)
+            for ci in self.geo_data.centers()])
+
+    @memoize_method
+    def expansion_radii(self):
+        return self.geo_data.expansion_radii().get(queue=self.queue)
+
+    @memoize_method
+    def global_qbx_centers(self):
+        return self.geo_data.global_qbx_centers().get(queue=self.queue)
+
+    @memoize_method
+    def qbx_center_to_target_box(self):
+        return self.geo_data.qbx_center_to_target_box().get(queue=self.queue)
+
+    @memoize_method
+    def qbx_center_to_target_box_source_level(self, source_level):
+        return self.geo_data.qbx_center_to_target_box_source_level(
+            source_level).get(queue=self.queue)
+
+    @memoize_method
+    def non_qbx_box_target_lists(self):
+        return self.geo_data.non_qbx_box_target_lists().get(queue=self.queue)
+
+    @memoize_method
+    def center_to_tree_targets(self):
+        return self.geo_data.center_to_tree_targets().get(queue=self.queue)
+
+    @memoize_method
+    def all_targets(self):
+        """All (not just non-QBX) targets packaged into a single array."""
+        return np.array(list(self.tree().targets))
+
+# }}}
+
 # vim: foldmethod=marker:filetype=pyopencl
diff --git a/setup.py b/setup.py
index e2a1ae90b75867829508b789fbe340e15b29c426..01a241b138f70c77e2b40f9451db72870094189e 100644
--- a/setup.py
+++ b/setup.py
@@ -3,6 +3,8 @@
 
 import os
 from setuptools import setup, find_packages
+from setuptools.extension import Extension
+from Cython.Build import cythonize
 
 
 # {{{ capture git revision at install time
@@ -54,6 +56,21 @@ write_git_revision("pytential")
 # }}}
 
 
+ext_modules = [
+        Extension(
+            "pytential.qbx.target_specific.impl",
+            sources=[
+                "pytential/qbx/target_specific/impl.pyx",
+                "pytential/qbx/target_specific/helmholtz_utils.c"],
+            depends=[
+                "pytential/qbx/target_specific/impl.h",
+                "pytential/qbx/target_specific/helmholtz_utils.h"],
+            extra_compile_args=["-Wall", "-fopenmp", "-Ofast"],
+            extra_link_args=["-fopenmp"]
+        ),
+]
+
+
 version_dict = {}
 init_filename = "pytential/version.py"
 os.environ["AKPYTHON_EXEC_FROM_WITHIN_WITHIN_SETUP_PY"] = "1"
@@ -91,6 +108,8 @@ setup(name="pytential",
 
       packages=find_packages(),
 
+      ext_modules=cythonize(ext_modules),
+
       install_requires=[
           "pytest>=2.3",
           # FIXME leave out for now
diff --git a/test/test_layer_pot.py b/test/test_layer_pot.py
index 4b5d70b675871031023d3f5cf40c049c008d0f5b..d6660784042c982162df6cb6f68a5b3670fd21a0 100644
--- a/test/test_layer_pot.py
+++ b/test/test_layer_pot.py
@@ -183,7 +183,7 @@ def test_off_surface_eval_vs_direct(ctx_getter,  do_plot=False):
             target_association_tolerance=0.05,
             ).with_refinement()
 
-    fplot = FieldPlotter(np.zeros(2), extent=5, npoints=1000)
+    fplot = FieldPlotter(np.zeros(2), extent=5, npoints=500)
     from pytential.target import PointsTarget
     ptarget = PointsTarget(fplot.points)
     from sumpy.kernel import LaplaceKernel
diff --git a/test/test_layer_pot_identity.py b/test/test_layer_pot_identity.py
index c376fdf3e5072aba8604147c70a4f5d918c15f33..a43d2caf78b066218d622f39e250e1226d285891 100644
--- a/test/test_layer_pot_identity.py
+++ b/test/test_layer_pot_identity.py
@@ -233,32 +233,38 @@ class DynamicTestCase(object):
         if (self.geometry.mesh_name == "sphere"
                 and self.k != 0
                 and self.fmm_backend == "sumpy"):
-            pytest.skip("both direct eval and generating the FMM kernels "
+            raise ValueError("both direct eval and generating the FMM kernels "
                     "are too slow")
 
         if (self.geometry.mesh_name == "sphere"
                 and self.expr.zero_op_name == "green_grad"):
-            pytest.skip("does not achieve sufficient precision")
+            raise ValueError("does not achieve sufficient precision")
 
 
 # {{{ integral identity tester
 
+
+@pytest.mark.slowtest
+@pytest.mark.parametrize("case", [
+        DynamicTestCase(SphereGeometry(), GreenExpr(), 0),
+])
+def test_identity_convergence_slow(ctx_getter, case):
+    test_identity_convergence(ctx_getter, case)
+
+
 @pytest.mark.parametrize("case", [
-    tc
-    for geom in [
-        StarfishGeometry(),
-        SphereGeometry(),
-        ]
-    for tc in [
-        DynamicTestCase(geom, GreenExpr(), 0),
-        DynamicTestCase(geom, GreenExpr(), 1.2),
-        DynamicTestCase(geom, GradGreenExpr(), 0),
-        DynamicTestCase(geom, GradGreenExpr(), 1.2),
-        DynamicTestCase(geom, ZeroCalderonExpr(), 0),
-
-        DynamicTestCase(geom, GreenExpr(), 0, fmm_backend="fmmlib"),
-        DynamicTestCase(geom, GreenExpr(), 1.2, fmm_backend="fmmlib"),
-        ]])
+        # 2d
+        DynamicTestCase(StarfishGeometry(), GreenExpr(), 0),
+        DynamicTestCase(StarfishGeometry(), GreenExpr(), 1.2),
+        DynamicTestCase(StarfishGeometry(), GradGreenExpr(), 0),
+        DynamicTestCase(StarfishGeometry(), GradGreenExpr(), 1.2),
+        DynamicTestCase(StarfishGeometry(), ZeroCalderonExpr(), 0),
+        DynamicTestCase(StarfishGeometry(), GreenExpr(), 0, fmm_backend="fmmlib"),
+        DynamicTestCase(StarfishGeometry(), GreenExpr(), 1.2, fmm_backend="fmmlib"),
+        # 3d
+        DynamicTestCase(SphereGeometry(), GreenExpr(), 0, fmm_backend="fmmlib"),
+        DynamicTestCase(SphereGeometry(), GreenExpr(), 1.2, fmm_backend="fmmlib")
+])
 def test_identity_convergence(ctx_getter,  case, visualize=False):
     logging.basicConfig(level=logging.INFO)
 
diff --git a/test/test_target_specific_qbx.py b/test/test_target_specific_qbx.py
new file mode 100644
index 0000000000000000000000000000000000000000..34091af9a91e26f526ea3f2b371515b0c8992029
--- /dev/null
+++ b/test/test_target_specific_qbx.py
@@ -0,0 +1,219 @@
+from __future__ import division, absolute_import, print_function
+
+__copyright__ = "Copyright (C) 2018 Matt Wala"
+
+__license__ = """
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+"""
+
+
+import numpy as np
+import numpy.linalg as la  # noqa
+import pyopencl as cl
+import pyopencl.clmath as clmath
+import pytest
+from pyopencl.tools import (  # noqa
+        pytest_generate_tests_for_pyopencl as pytest_generate_tests)
+
+from functools import partial  # noqa
+from meshmode.mesh.generation import (  # noqa
+        ellipse, cloverleaf, starfish, drop, n_gon, qbx_peanut, WobblyCircle,
+        NArmedStarfish,
+        make_curve_mesh)
+
+from pytential import bind, sym, norm  # noqa
+from sumpy.kernel import LaplaceKernel, HelmholtzKernel
+
+import logging
+logger = logging.getLogger(__name__)
+
+
+def test_spherical_bessel_functions():
+    import pytential.qbx.target_specific as ts
+
+    nterms = 9
+    z = 3j
+    scale = 1
+    j = np.zeros(1 + nterms, dtype=np.complex)
+    jder = np.zeros(1 + nterms, dtype=np.complex)
+    ts.jfuns3d_wrapper(nterms, z, scale, j, jder)
+
+    # Reference solution computed using scipy.special.spherical_jn
+
+    j_expected = np.array([
+            +3.33929164246994992e+00 + +0.00000000000000000e+00j,
+            +0.00000000000000000e+00 + +2.24279011776926884e+00j,
+            -1.09650152470070195e+00 + +0.00000000000000000e+00j,
+            +2.77555756156289135e-17 + -4.15287576601431119e-01j,
+            +1.27497179297362317e-01 + +0.00000000000000000e+00j,
+            -3.46944695195361419e-18 + +3.27960387093445271e-02j,
+            -7.24503736309898075e-03 + -4.33680868994201774e-19j,
+            +0.00000000000000000e+00 + -1.40087680258226812e-03j,
+            +2.40653350187633002e-04 + +0.00000000000000000e+00j,
+            +0.00000000000000000e+00 + +3.71744848523478122e-05j,
+    ])
+
+    assert np.allclose(j, j_expected)
+
+    jder_expected = np.array([
+            -0.00000000000000000e+00 + -2.24279011776926884e+00j,
+            +1.84409823062377076e+00 + +0.00000000000000000e+00j,
+            +0.00000000000000000e+00 + +1.14628859306856690e+00j,
+            -5.42784755898793825e-01 + +3.70074341541718826e-17j,
+            +2.77555756156289135e-17 + -2.02792277772493951e-01j,
+            +6.19051018786732632e-02 + -6.93889390390722838e-18j,
+            -2.45752492430047685e-18 + +1.58909515287802387e-02j,
+            -3.50936588954626604e-03 + -4.33680868994201774e-19j,
+            +0.00000000000000000e+00 + -6.78916752019369197e-04j,
+            +1.16738400679806980e-04 + +0.00000000000000000e+00j,
+    ])
+
+    assert np.allclose(jder, jder_expected)
+
+
+def test_spherical_hankel_functions():
+    import pytential.qbx.target_specific as ts
+
+    nterms = 9
+    z = 2 + 3j
+    scale = 1
+    h = np.zeros(1 + nterms, dtype=np.complex)
+    hder = np.zeros(1 + nterms, dtype=np.complex)
+    ts.h3dall_wrapper(nterms, z, scale, h, hder)
+
+    # Reference solution computed using
+    # scipy.special.spherical_jn + 1j * scipy.special.spherical_yn
+    h_expected = np.array([
+            +1.17460537937623677e-02 + -7.25971518952217565e-03j,
+            -7.12794888037171503e-03 + -1.55735608522498126e-02j,
+            -2.58175723285687941e-02 + +5.00665171335734627e-03j,
+            -6.95481631849959037e-03 + +4.92143379339500253e-02j,
+            +9.78278544942576822e-02 + +5.92281078069348405e-02j,
+            +2.65420992601874961e-01 + -1.70387117227806167e-01j,
+            -8.11750107462848453e-02 + -1.02133651818182791e+00j,
+            -3.49178056863992792e+00 + -1.62876088689699405e+00j,
+            -1.36147986022969878e+01 + +9.34959028601928743e+00j,
+            +4.56300765393887087e+00 + +7.94934376901125432e+01j,
+    ])
+
+    assert np.allclose(h, h_expected)
+
+    hder_expected = np.array([
+            +7.12794888037171503e-03 + +1.55735608522498126e-02j,
+            +2.11270661502996893e-02 + -5.75767287207851197e-03j,
+            +1.32171023895111261e-03 + -3.57580271012700734e-02j,
+            -6.69663049946767064e-02 + -3.16989251553807527e-02j,
+            -1.50547136475930293e-01 + +1.16532548652759055e-01j,
+            +8.87444851771816839e-02 + +5.84014513465967444e-01j,
+            +2.00269153354544205e+00 + +7.98384884993240895e-01j,
+            +7.22334424954346144e+00 + -5.46307186102847187e+00j,
+            -4.05890079026877615e+00 + -4.28512368415405192e+01j,
+            -2.04081205047078043e+02 + -1.02417988497371837e+02j,
+    ])
+
+    assert np.allclose(hder, hder_expected)
+
+
+@pytest.mark.parametrize("op", ["S", "D", "Sp"])
+@pytest.mark.parametrize("helmholtz_k", [0, 1.2, 12 + 1.2j])
+@pytest.mark.parametrize("qbx_order", [0, 1, 5])
+def test_target_specific_qbx(ctx_getter, op, helmholtz_k, qbx_order):
+    logging.basicConfig(level=logging.INFO)
+
+    cl_ctx = ctx_getter()
+    queue = cl.CommandQueue(cl_ctx)
+
+    target_order = 4
+    fmm_tol = 1e-3
+
+    from meshmode.mesh.generation import generate_icosphere
+    mesh = generate_icosphere(1, target_order)
+
+    from meshmode.discretization import Discretization
+    from meshmode.discretization.poly_element import \
+        InterpolatoryQuadratureSimplexGroupFactory
+    from pytential.qbx import QBXLayerPotentialSource
+    pre_density_discr = Discretization(
+            cl_ctx, mesh,
+            InterpolatoryQuadratureSimplexGroupFactory(target_order))
+
+    from sumpy.expansion.level_to_order import SimpleExpansionOrderFinder
+
+    refiner_extra_kwargs = {}
+
+    if helmholtz_k != 0:
+        refiner_extra_kwargs["kernel_length_scale"] = 5 / abs(helmholtz_k)
+
+    qbx, _ = QBXLayerPotentialSource(
+            pre_density_discr, 4*target_order,
+            qbx_order=qbx_order,
+            fmm_level_to_order=SimpleExpansionOrderFinder(fmm_tol),
+            fmm_backend="fmmlib",
+            _expansions_in_tree_have_extent=True,
+            _expansion_stick_out_factor=0.9,
+            _use_target_specific_qbx=False,
+            ).with_refinement(**refiner_extra_kwargs)
+
+    density_discr = qbx.density_discr
+
+    nodes = density_discr.nodes().with_queue(queue)
+    u_dev = clmath.sin(nodes[0])
+
+    if helmholtz_k == 0:
+        kernel = LaplaceKernel(3)
+        kernel_kwargs = {}
+    else:
+        kernel = HelmholtzKernel(3, allow_evanescent=True)
+        kernel_kwargs = {"k": sym.var("k")}
+
+    u_sym = sym.var("u")
+
+    if op == "S":
+        op = sym.S
+    elif op == "D":
+        op = sym.D
+    elif op == "Sp":
+        op = sym.Sp
+    else:
+        raise ValueError("unknown operator: '%s'" % op)
+
+    expr = op(kernel, u_sym, qbx_forced_limit=-1, **kernel_kwargs)
+
+    bound_op = bind(qbx, expr)
+    pot_ref = bound_op(queue, u=u_dev, k=helmholtz_k).get()
+
+    qbx = qbx.copy(_use_target_specific_qbx=True)
+    bound_op = bind(qbx, expr)
+    pot_tsqbx = bound_op(queue, u=u_dev, k=helmholtz_k).get()
+
+    assert np.allclose(pot_tsqbx, pot_ref, atol=1e-13, rtol=1e-13)
+
+
+# You can test individual routines by typing
+# $ python test_target_specific_qbx.py 'test_routine()'
+
+if __name__ == "__main__":
+    import sys
+    if len(sys.argv) > 1:
+        exec(sys.argv[1])
+    else:
+        from pytest import main
+        main([__file__])
+
+# vim: fdm=marker