diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 5dc64f57c38e1040cd40e1055de90a47ccb5d8e9..a8284242f2007befa55f371368ec759d8bc2ffa7 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -18,7 +18,7 @@ jobs:
         -
             uses: actions/setup-python@v1
             with:
-                python-version: '3.x' 
+                python-version: '3.x'
         -   name: "Main Script"
             run: |
                 curl -L -O -k https://gitlab.tiker.net/inducer/ci-support/raw/master/prepare-and-run-flake8.sh
@@ -29,15 +29,9 @@ jobs:
         runs-on: ubuntu-latest
         steps:
         -   uses: actions/checkout@v2
-        -
-            uses: actions/setup-python@v1
-            with:
-                python-version: '3.x' 
         -   name: "Main Script"
             run: |
-                set -e
-                CONDA_ENVIRONMENT=.test-conda-env-py3-pylint.yml
-                cp .test-conda-env-py3.yml $CONDA_ENVIRONMENT
+                CONDA_ENVIRONMENT=.test-conda-env-py3.yml
                 echo "- matplotlib" >> $CONDA_ENVIRONMENT
                 echo "-------------------------------------------"
                 cat $CONDA_ENVIRONMENT
@@ -51,18 +45,35 @@ jobs:
                 . ./prepare-and-run-pylint.sh pytential test/test_*.py
 
     pytest3:
-        name: Pytest on Py3
+        name: Pytest Linux
         runs-on: ubuntu-latest
         steps:
         -   uses: actions/checkout@v2
         -   name: "Main Script"
             run: |
-                set -e
                 grep -v symengine .test-conda-env-py3.yml > .test-conda-env.yml
                 CONDA_ENVIRONMENT=.test-conda-env.yml
                 curl -L -O -k https://gitlab.tiker.net/inducer/ci-support/raw/master/build-and-test-py-project-within-miniconda.sh
                 . ./build-and-test-py-project-within-miniconda.sh
 
+    pytest3mac:
+        name: Pytest Mac
+        runs-on: macos-latest
+        steps:
+        -   uses: actions/checkout@v2
+        -   name: "Main Script"
+            run: |
+                export LC_ALL=en_US.UTF-8
+                export LANG=en_US.UTF-8
+                grep -v symengine .test-conda-env-py3.yml > .test-conda-env.yml
+                echo "- compilers" >> .test-conda-env.yml
+                echo "- llvm-openmp" >> .test-conda-env.yml
+                CONDA_ENVIRONMENT=.test-conda-env.yml
+                export PYTEST_ADDOPTS=${PYTEST_ADDOPTS:--k-slowtest}
+                set -o xtrace
+                curl -L -O -k https://gitlab.tiker.net/inducer/ci-support/raw/master/build-and-test-py-project-within-miniconda.sh
+                . ./build-and-test-py-project-within-miniconda.sh
+
     pytest3symengine:
         name: Pytest on Py3 with SymEngine
         runs-on: ubuntu-latest
@@ -76,4 +87,3 @@ jobs:
                 . ./build-and-test-py-project-within-miniconda.sh
 
 # vim: sw=4
-
diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index f9c12b45fb03a45dd816e821efdb92d850325166..dae0a27cd0cd0b5a7687f5ee59d1bd33c24302ee 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -73,29 +73,6 @@ Python 3 Conda:
     reports:
       junit: test/pytest.xml
 
-Python 3 Conda Apple:
-  script:
-  - export LC_ALL=en_US.UTF-8
-  - export LANG=en_US.UTF-8
-  - export CONDA_ENVIRONMENT=.test-conda-env-py3-macos.yml
-  - export PYTEST_ADDOPTS=${PYTEST_ADDOPTS:--k-slowtest}
-  - export CC=clang
-  # https://stackoverflow.com/q/60934005; https://reviews.llvm.org/D71579
-  - export LDFLAGS="-mlinker-version=519"
-  - set -o xtrace
-  - curl -L -O -k https://gitlab.tiker.net/inducer/ci-support/raw/master/build-and-test-py-project-within-miniconda.sh
-  - ". ./build-and-test-py-project-within-miniconda.sh"
-
-  tags:
-  - apple
-  except:
-  - tags
-  retry: 2
-
-  artifacts:
-    reports:
-      junit: test/pytest.xml
-
 Documentation:
   script:
   - EXTRA_INSTALL="Cython pybind11 numpy mako"
diff --git a/.test-conda-env-py3-macos.yml b/.test-conda-env-py3-macos.yml
deleted file mode 100644
index 901576dcf85d5fae32db5acb200702e2ad73c352..0000000000000000000000000000000000000000
--- a/.test-conda-env-py3-macos.yml
+++ /dev/null
@@ -1,30 +0,0 @@
-name: test-conda-env-py3-macos
-channels:
-- conda-forge
-- nodefaults
-dependencies:
-- git
-- conda-forge::numpy
-- conda-forge::sympy
-- scipy
-- pocl
-- islpy
-- pyopencl
-- python>=3.6
-- python-symengine=0.6.0
-- pyfmmlib
-# for OpenMP support in pyfmmlib
-- libgfortran>=3.0.1
-- clangdev
-- openmp
-- cython
-- gmsh
-
-- pip
-- pip:
-    - git+https://github.com/inducer/pytools
-    - git+https://gitlab.tiker.net/inducer/boxtree
-    - git+https://github.com/inducer/pymbolic
-    - git+https://github.com/inducer/loopy
-    - git+https://gitlab.tiker.net/inducer/sumpy
-    - git+https://github.com/inducer/meshmode
diff --git a/.test-conda-env-py3.yml b/.test-conda-env-py3.yml
index 748855b09faf2accacd8b0970597d4732740a2a4..cb1f9027bcaee157cdde0f9983f94895cc2ae2ab 100644
--- a/.test-conda-env-py3.yml
+++ b/.test-conda-env-py3.yml
@@ -22,5 +22,5 @@ dependencies:
     - git+https://gitlab.tiker.net/inducer/boxtree
     - git+https://github.com/inducer/pymbolic
     - git+https://github.com/inducer/loopy
-    - git+https://gitlab.tiker.net/inducer/sumpy
+    - git+https://github.com/inducer/sumpy
     - git+https://github.com/inducer/meshmode
diff --git a/examples/cost.py b/examples/cost.py
index 71c11680484b4c7321273c23d43946633a62cbf8..0070aaa6402f0233acd254cccaf2f0a8f8fa0fb3 100644
--- a/examples/cost.py
+++ b/examples/cost.py
@@ -2,6 +2,8 @@
 
 import pyopencl as cl
 import numpy as np
+from meshmode.array_context import PyOpenCLArrayContext
+from meshmode.dof_array import thaw
 
 from pytential import sym, bind
 from pytools import one
@@ -26,7 +28,7 @@ TRAINING_ARMS = (10, 15, 25)
 TESTING_ARMS = (20,)
 
 
-def starfish_lpot_source(queue, n_arms):
+def starfish_lpot_source(actx, n_arms):
     from meshmode.discretization import Discretization
     from meshmode.discretization.poly_element import (
             InterpolatoryQuadratureSimplexGroupFactory)
@@ -39,7 +41,7 @@ def starfish_lpot_source(queue, n_arms):
             TARGET_ORDER)
 
     pre_density_discr = Discretization(
-            queue.context, mesh,
+            actx, mesh,
             InterpolatoryQuadratureSimplexGroupFactory(TARGET_ORDER))
 
     lpot_kwargs = DEFAULT_LPOT_KWARGS.copy()
@@ -60,14 +62,14 @@ def starfish_lpot_source(queue, n_arms):
 # }}}
 
 
-def training_geometries(queue):
+def training_geometries(actx):
     for n_arms in TRAINING_ARMS:
-        yield starfish_lpot_source(queue, n_arms)
+        yield starfish_lpot_source(actx, n_arms)
 
 
-def test_geometries(queue):
+def test_geometries(actx):
     for n_arms in TESTING_ARMS:
-        yield starfish_lpot_source(queue, n_arms)
+        yield starfish_lpot_source(actx, n_arms)
 
 
 def get_bound_op(places):
@@ -79,15 +81,15 @@ def get_bound_op(places):
     return bind(places, op)
 
 
-def get_test_density(queue, density_discr):
-    nodes = density_discr.nodes().with_queue(queue)
-    sigma = cl.clmath.sin(10 * nodes[0])
-
+def get_test_density(actx, density_discr):
+    nodes = thaw(actx, density_discr.nodes())
+    sigma = actx.np.sin(10 * nodes[0])
     return sigma
 
 
 def calibrate_cost_model(ctx):
     queue = cl.CommandQueue(ctx)
+    actx = PyOpenCLArrayContext(queue)
 
     from pytential.qbx.cost import CostModel, estimate_calibration_params
     cost_model = CostModel()
@@ -95,7 +97,7 @@ def calibrate_cost_model(ctx):
     model_results = []
     timing_results = []
 
-    for lpot_source in training_geometries(queue):
+    for lpot_source in training_geometries(actx):
         lpot_source = lpot_source.copy(cost_model=cost_model)
 
         from pytential import GeometryCollection
@@ -103,16 +105,17 @@ def calibrate_cost_model(ctx):
         density_discr = places.get_discretization(places.auto_source.geometry)
 
         bound_op = get_bound_op(places)
-        sigma = get_test_density(queue, density_discr)
+        sigma = get_test_density(actx, density_discr)
 
-        cost_S = bound_op.get_modeled_cost(queue, sigma=sigma)
+        cost_S = bound_op.get_modeled_cost(actx, sigma=sigma)
 
         # Warm-up run.
-        bound_op.eval(queue, {"sigma": sigma})
+        bound_op.eval({"sigma": sigma}, array_context=actx)
 
         for _ in range(RUNS):
             timing_data = {}
-            bound_op.eval(queue, {"sigma": sigma}, timing_data=timing_data)
+            bound_op.eval({"sigma": sigma}, array_context=actx,
+                    timing_data=timing_data)
 
             model_results.append(one(cost_S.values()))
             timing_results.append(one(timing_data.values()))
@@ -125,8 +128,9 @@ def calibrate_cost_model(ctx):
 
 def test_cost_model(ctx, cost_model):
     queue = cl.CommandQueue(ctx)
+    actx = PyOpenCLArrayContext(queue)
 
-    for lpot_source in test_geometries(queue):
+    for lpot_source in test_geometries(actx):
         lpot_source = lpot_source.copy(cost_model=cost_model)
 
         from pytential import GeometryCollection
@@ -134,20 +138,21 @@ def test_cost_model(ctx, cost_model):
         density_discr = places.get_discretization(places.auto_source.geometry)
 
         bound_op = get_bound_op(places)
-        sigma = get_test_density(queue, density_discr)
+        sigma = get_test_density(actx, density_discr)
 
-        cost_S = bound_op.get_modeled_cost(queue, sigma=sigma)
+        cost_S = bound_op.get_modeled_cost(actx, sigma=sigma)
         model_result = (
                 one(cost_S.values())
                 .get_predicted_times(merge_close_lists=True))
 
         # Warm-up run.
-        bound_op.eval(queue, {"sigma": sigma})
+        bound_op.eval({"sigma": sigma}, array_context=actx)
 
         temp_timing_results = []
         for _ in range(RUNS):
             timing_data = {}
-            bound_op.eval(queue, {"sigma": sigma}, timing_data=timing_data)
+            bound_op.eval({"sigma": sigma},
+                    array_context=actx, timing_data=timing_data)
             temp_timing_results.append(one(timing_data.values()))
 
         timing_result = {}
diff --git a/examples/fmm-error.py b/examples/fmm-error.py
index a6d19bb150499c121c38994fcaffdd3e1bca9f50..ea50d70f66b4c842c0404ab326f3e02c2cf3eccd 100644
--- a/examples/fmm-error.py
+++ b/examples/fmm-error.py
@@ -1,6 +1,8 @@
 from __future__ import division
 import numpy as np
 import pyopencl as cl
+from meshmode.array_context import PyOpenCLArrayContext
+from meshmode.dof_array import thaw
 from meshmode.mesh.generation import (  # noqa
         make_curve_mesh, starfish, ellipse, drop)
 from sumpy.visualization import FieldPlotter
@@ -13,6 +15,7 @@ def main():
 
     cl_ctx = cl.create_some_context()
     queue = cl.CommandQueue(cl_ctx)
+    actx = PyOpenCLArrayContext(queue)
 
     target_order = 16
     qbx_order = 3
@@ -37,13 +40,13 @@ def main():
             InterpolatoryQuadratureSimplexGroupFactory
 
     pre_density_discr = Discretization(
-            cl_ctx, mesh,
+            actx, mesh,
             InterpolatoryQuadratureSimplexGroupFactory(target_order))
 
     unaccel_qbx = QBXLayerPotentialSource(
             pre_density_discr, fine_order=2*target_order,
             qbx_order=qbx_order, fmm_order=False,
-            target_association_tolerance=.05
+            target_association_tolerance=.05,
             )
 
     from pytential.target import PointsTarget
@@ -57,24 +60,35 @@ def main():
         })
     density_discr = places.get_discretization("unaccel_qbx")
 
-    nodes = density_discr.nodes().with_queue(queue)
-    angle = cl.clmath.atan2(nodes[1], nodes[0])
+    nodes = thaw(actx, density_discr.nodes())
+    angle = actx.np.atan2(nodes[1], nodes[0])
 
     from pytential import bind, sym
-    #op = sym.d_dx(sym.S(kernel, sym.var("sigma")), qbx_forced_limit=None)
-    #op = sym.D(kernel, sym.var("sigma"), qbx_forced_limit=None)
-    op = sym.S(kernel, sym.var("sigma"), qbx_forced_limit=None)
+    if k:
+        kernel_kwargs = {"k": sym.var("k")}
+    else:
+        kernel_kwargs = {}
+
+    def get_op():
+        kwargs = dict(qbx_forced_limit=None)
+        kwargs.update(kernel_kwargs)
+        # return sym.d_dx(2, sym.S(kernel, sym.var("sigma"), **kwargs))
+        # return sym.D(kernel, sym.var("sigma"), **kwargs)
+        return sym.S(kernel, sym.var("sigma"), **kwargs)
+
+    op = get_op()
 
-    sigma = cl.clmath.cos(mode_nr*angle)
+    sigma = actx.np.cos(mode_nr*angle)
 
     if isinstance(kernel, HelmholtzKernel):
-        sigma = sigma.astype(np.complex128)
+        for i, elem in np.ndenumerate(sigma):
+            sigma[i] = elem.astype(np.complex128)
 
     fld_in_vol = bind(places, op, auto_where=("unaccel_qbx", "targets"))(
-            queue, sigma=sigma, k=k).get()
+            actx, sigma=sigma, k=k).get()
 
     fmm_fld_in_vol = bind(places, op, auto_where=("qbx", "targets"))(
-            queue, sigma=sigma, k=k).get()
+            actx, sigma=sigma, k=k).get()
 
     err = fmm_fld_in_vol-fld_in_vol
 
diff --git a/examples/helmholtz-dirichlet.py b/examples/helmholtz-dirichlet.py
index 75115da4a896638c1a18fcc50ae3345704b801ce..aee8c39086fd203e11703f8f762aaeb6c3455d5d 100644
--- a/examples/helmholtz-dirichlet.py
+++ b/examples/helmholtz-dirichlet.py
@@ -3,6 +3,7 @@ import numpy.linalg as la
 import pyopencl as cl
 import pyopencl.clmath  # noqa
 
+from meshmode.array_context import PyOpenCLArrayContext
 from meshmode.discretization import Discretization
 from meshmode.discretization.poly_element import \
         InterpolatoryQuadratureSimplexGroupFactory
@@ -29,6 +30,7 @@ def main(mesh_name="ellipse", visualize=False):
 
     cl_ctx = cl.create_some_context()
     queue = cl.CommandQueue(cl_ctx)
+    actx = PyOpenCLArrayContext(queue)
 
     from meshmode.mesh.generation import ellipse, make_curve_mesh
     from functools import partial
@@ -67,7 +69,7 @@ def main(mesh_name="ellipse", visualize=False):
         raise ValueError("unknown mesh name: {}".format(mesh_name))
 
     pre_density_discr = Discretization(
-            cl_ctx, mesh,
+            actx, mesh,
             InterpolatoryQuadratureSimplexGroupFactory(bdry_quad_order))
 
     from pytential.qbx import (
@@ -79,7 +81,7 @@ def main(mesh_name="ellipse", visualize=False):
 
     from sumpy.visualization import FieldPlotter
     fplot = FieldPlotter(np.zeros(2), extent=5, npoints=500)
-    targets = cl.array.to_device(queue, fplot.points)
+    targets = actx.from_numpy(fplot.points)
 
     from pytential import GeometryCollection
     places = GeometryCollection({
@@ -120,21 +122,22 @@ def main(mesh_name="ellipse", visualize=False):
 
     # {{{ fix rhs and solve
 
-    nodes = density_discr.nodes().with_queue(queue)
+    from meshmode.dof_array import thaw
+    nodes = thaw(actx, density_discr.nodes())
     k_vec = np.array([2, 1])
     k_vec = k * k_vec / la.norm(k_vec, 2)
 
     def u_incoming_func(x):
-        return cl.clmath.exp(
+        return actx.np.exp(
                 1j * (x[0] * k_vec[0] + x[1] * k_vec[1]))
 
     bc = -u_incoming_func(nodes)
 
-    bvp_rhs = bind(places, sqrt_w*sym.var("bc"))(queue, bc=bc)
+    bvp_rhs = bind(places, sqrt_w*sym.var("bc"))(actx, bc=bc)
 
     from pytential.solve import gmres
     gmres_result = gmres(
-            bound_op.scipy_op(queue, sigma_sym.name, dtype=np.complex128, k=k),
+            bound_op.scipy_op(actx, sigma_sym.name, dtype=np.complex128, k=k),
             bvp_rhs, tol=1e-8, progress=True,
             stall_iterations=0,
             hard_failure=True)
@@ -152,15 +155,18 @@ def main(mesh_name="ellipse", visualize=False):
             - sym.D(kernel, inv_sqrt_w_sigma, k=k_sym, **repr_kwargs))
 
     u_incoming = u_incoming_func(targets)
-    ones_density = density_discr.zeros(queue)
-    ones_density.fill(1)
+    ones_density = density_discr.zeros(actx)
+    for elem in ones_density:
+        elem.fill(1)
 
-    indicator = bind(places, sym.D(LaplaceKernel(2), sigma_sym, **repr_kwargs))(
-            queue, sigma=ones_density).get()
+    indicator = actx.to_numpy(
+            bind(places, sym.D(LaplaceKernel(2), sigma_sym, **repr_kwargs))(
+                actx, sigma=ones_density))
 
     try:
-        fld_in_vol = bind(places, representation_sym)(
-                queue, sigma=gmres_result.solution, k=k).get()
+        fld_in_vol = actx.to_numpy(
+                bind(places, representation_sym)(
+                    actx, sigma=gmres_result.solution, k=k))
     except QBXTargetAssociationFailedException as e:
         fplot.write_vtk_file("helmholtz-dirichlet-failed-targets.vts", [
             ("failed", e.failed_target_flags.get(queue))
@@ -171,7 +177,7 @@ def main(mesh_name="ellipse", visualize=False):
     fplot.write_vtk_file("helmholtz-dirichlet-potential.vts", [
         ("potential", fld_in_vol),
         ("indicator", indicator),
-        ("u_incoming", u_incoming.get()),
+        ("u_incoming", actx.to_numpy(u_incoming)),
         ])
 
     # }}}
diff --git a/examples/laplace-dirichlet-3d.py b/examples/laplace-dirichlet-3d.py
index 984f1de10694a493e1b708f35118897c21a25cc5..7f6bba45890308b5ac27b80c836c1916ecd08be3 100644
--- a/examples/laplace-dirichlet-3d.py
+++ b/examples/laplace-dirichlet-3d.py
@@ -3,6 +3,7 @@ import numpy.linalg as la
 import pyopencl as cl
 import pyopencl.clmath  # noqa
 
+from meshmode.array_context import PyOpenCLArrayContext
 from meshmode.discretization import Discretization
 from meshmode.discretization.poly_element import \
         InterpolatoryQuadratureSimplexGroupFactory
@@ -28,6 +29,7 @@ def main(mesh_name="torus", visualize=False):
 
     cl_ctx = cl.create_some_context()
     queue = cl.CommandQueue(cl_ctx)
+    actx = PyOpenCLArrayContext(queue)
 
     if mesh_name == "torus":
         rout = 10
@@ -61,7 +63,7 @@ def main(mesh_name="torus", visualize=False):
         raise ValueError("unknown mesh name: {}".format(mesh_name))
 
     pre_density_discr = Discretization(
-            cl_ctx, mesh,
+            actx, mesh,
             InterpolatoryQuadratureSimplexGroupFactory(bdry_quad_order))
 
     from pytential.qbx import (
@@ -73,7 +75,7 @@ def main(mesh_name="torus", visualize=False):
 
     from sumpy.visualization import FieldPlotter
     fplot = FieldPlotter(np.zeros(3), extent=20, npoints=50)
-    targets = cl.array.to_device(queue, fplot.points)
+    targets = actx.from_numpy(fplot.points)
 
     from pytential import GeometryCollection
     places = GeometryCollection({
@@ -109,33 +111,39 @@ def main(mesh_name="torus", visualize=False):
 
     # {{{ fix rhs and solve
 
-    nodes = density_discr.nodes().with_queue(queue)
+    from meshmode.dof_array import thaw, flatten, unflatten
+    nodes = thaw(actx, density_discr.nodes())
     source = np.array([rout, 0, 0])
 
     def u_incoming_func(x):
+        from pytools.obj_array import obj_array_vectorize
+        x = obj_array_vectorize(actx.to_numpy, flatten(x))
+        x = np.array(list(x))
         #        return 1/cl.clmath.sqrt( (x[0] - source[0])**2
         #                                +(x[1] - source[1])**2
         #                                +(x[2] - source[2])**2 )
-        return 1.0/la.norm(x.get()-source[:, None], axis=0)
+        return 1.0/la.norm(x - source[:, None], axis=0)
 
-    bc = cl.array.to_device(queue, u_incoming_func(nodes))
+    bc = unflatten(actx,
+            density_discr,
+            actx.from_numpy(u_incoming_func(nodes)))
 
-    bvp_rhs = bind(places, sqrt_w*sym.var("bc"))(queue, bc=bc)
+    bvp_rhs = bind(places, sqrt_w*sym.var("bc"))(actx, bc=bc)
 
     from pytential.solve import gmres
     gmres_result = gmres(
-            bound_op.scipy_op(queue, "sigma", dtype=np.float64),
+            bound_op.scipy_op(actx, "sigma", dtype=np.float64),
             bvp_rhs, tol=1e-14, progress=True,
             stall_iterations=0,
             hard_failure=True)
 
     sigma = bind(places, sym.var("sigma")/sqrt_w)(
-            queue, sigma=gmres_result.solution)
+            actx, sigma=gmres_result.solution)
 
     # }}}
 
     from meshmode.discretization.visualization import make_visualizer
-    bdry_vis = make_visualizer(queue, density_discr, 20)
+    bdry_vis = make_visualizer(actx, density_discr, 20)
     bdry_vis.write_vtk_file("laplace.vtu", [
         ("sigma", sigma),
         ])
@@ -151,8 +159,8 @@ def main(mesh_name="torus", visualize=False):
             + sym.D(kernel, inv_sqrt_w_sigma, **repr_kwargs))
 
     try:
-        fld_in_vol = bind(places, representation_sym)(
-                queue, sigma=sigma).get()
+        fld_in_vol = actx.to_numpy(
+                bind(places, representation_sym)(actx, sigma=sigma))
     except QBXTargetAssociationFailedException as e:
         fplot.write_vtk_file("laplace-dirichlet-3d-failed-targets.vts", [
             ("failed", e.failed_target_flags.get(queue)),
diff --git a/examples/layerpot-3d.py b/examples/layerpot-3d.py
index ecace75de9c67dfdb4899f688737aeda14b05fc8..78112858c16890ef13720d5305839d62e7f6cd8b 100644
--- a/examples/layerpot-3d.py
+++ b/examples/layerpot-3d.py
@@ -1,5 +1,5 @@
-from __future__ import division
-
+from meshmode.array_context import PyOpenCLArrayContext
+from meshmode.dof_array import thaw
 import numpy as np
 import pyopencl as cl
 
@@ -22,6 +22,7 @@ def main(mesh_name="ellipsoid"):
 
     cl_ctx = cl.create_some_context()
     queue = cl.CommandQueue(cl_ctx)
+    actx = PyOpenCLArrayContext(queue)
 
     if mesh_name == "ellipsoid":
         cad_file_name = "geometries/ellipsoid.step"
@@ -55,7 +56,7 @@ def main(mesh_name="ellipsoid"):
             InterpolatoryQuadratureSimplexGroupFactory
 
     density_discr = Discretization(
-            cl_ctx, mesh, InterpolatoryQuadratureSimplexGroupFactory(target_order))
+            actx, mesh, InterpolatoryQuadratureSimplexGroupFactory(target_order))
 
     qbx = QBXLayerPotentialSource(density_discr, 4*target_order, qbx_order,
             fmm_order=qbx_order + 3,
@@ -71,8 +72,8 @@ def main(mesh_name="ellipsoid"):
         }, auto_where="qbx")
     density_discr = places.get_discretization("qbx")
 
-    nodes = density_discr.nodes().with_queue(queue)
-    angle = cl.clmath.atan2(nodes[1], nodes[0])
+    nodes = thaw(actx, density_discr.nodes())
+    angle = actx.np.atan2(nodes[1], nodes[0])
 
     if k:
         kernel = HelmholtzKernel(3)
@@ -83,18 +84,22 @@ def main(mesh_name="ellipsoid"):
     op = sym.D(kernel, sym.var("sigma"), qbx_forced_limit=None)
     #op = sym.S(kernel, sym.var("sigma"), qbx_forced_limit=None)
 
-    sigma = cl.clmath.cos(mode_nr*angle)
+    sigma = actx.np.cos(mode_nr*angle)
     if 0:
-        sigma = 0*angle
+        from meshmode.dof_array import flatten, unflatten
+        sigma = flatten(0 * angle)
         from random import randrange
         for i in range(5):
             sigma[randrange(len(sigma))] = 1
+        sigma = unflatten(actx, density_discr, sigma)
 
     if isinstance(kernel, HelmholtzKernel):
-        sigma = sigma.astype(np.complex128)
+        for i, elem in np.ndenumerate(sigma):
+            sigma[i] = elem.astype(np.complex128)
 
-    fld_in_vol = bind(places, op, auto_where=("qbx", "targets"))(
-            queue, sigma=sigma, k=k).get()
+    fld_in_vol = actx.to_numpy(
+            bind(places, op, auto_where=("qbx", "targets"))(
+                actx, sigma=sigma, k=k))
 
     #fplot.show_scalar_in_mayavi(fld_in_vol.real, max_val=5)
     fplot.write_vtk_file("layerpot-3d-potential.vts", [
@@ -102,11 +107,10 @@ def main(mesh_name="ellipsoid"):
         ])
 
     bdry_normals = bind(places,
-            sym.normal(density_discr.ambient_dim))(queue).as_vector(dtype=object)
+            sym.normal(density_discr.ambient_dim))(actx).as_vector(dtype=object)
 
     from meshmode.discretization.visualization import make_visualizer
-    bdry_vis = make_visualizer(queue, density_discr, target_order)
-
+    bdry_vis = make_visualizer(actx, density_discr, target_order)
     bdry_vis.write_vtk_file("layerpot-3d-density.vtu", [
         ("sigma", sigma),
         ("bdry_normals", bdry_normals),
diff --git a/examples/layerpot.py b/examples/layerpot.py
index e01a24eb8ffbc15e232839e7015a2f5feeae35a5..40a060fb4a2e52aeab7bf20cfce3ccf73cf20bc6 100644
--- a/examples/layerpot.py
+++ b/examples/layerpot.py
@@ -36,12 +36,15 @@ def main(curve_fn=starfish, visualize=True):
             target_order)
 
     from pytential.qbx import QBXLayerPotentialSource
+    from meshmode.array_context import PyOpenCLArrayContext
     from meshmode.discretization import Discretization
     from meshmode.discretization.poly_element import \
             InterpolatoryQuadratureSimplexGroupFactory
 
+    actx = PyOpenCLArrayContext(queue)
+
     pre_density_discr = Discretization(
-            cl_ctx, mesh, InterpolatoryQuadratureSimplexGroupFactory(target_order))
+            actx, mesh, InterpolatoryQuadratureSimplexGroupFactory(target_order))
 
     qbx = QBXLayerPotentialSource(pre_density_discr, 4*target_order, qbx_order,
             fmm_order=qbx_order+3,
@@ -56,10 +59,12 @@ def main(curve_fn=starfish, visualize=True):
         "qbx": qbx,
         "targets": PointsTarget(targets_dev),
         }, auto_where="qbx")
+
     density_discr = places.get_discretization("qbx")
 
-    nodes = density_discr.nodes().with_queue(queue)
-    angle = cl.clmath.atan2(nodes[1], nodes[0])
+    from meshmode.dof_array import thaw
+    nodes = thaw(actx, density_discr.nodes())
+    angle = actx.np.atan2(nodes[1], nodes[0])
 
     if k:
         kernel = HelmholtzKernel(2)
@@ -75,22 +80,26 @@ def main(curve_fn=starfish, visualize=True):
         return sym.D(kernel, sym.var("sigma"), **kwargs)
         #op = sym.S(kernel, sym.var("sigma"), qbx_forced_limit=None, **kwargs)
 
-    sigma = cl.clmath.cos(mode_nr*angle)
+    sigma = actx.np.cos(mode_nr*angle)
     if 0:
-        sigma = 0*angle
+        from meshmode.dof_array import flatten, unflatten
+        sigma = flatten(0 * angle)
         from random import randrange
         for i in range(5):
             sigma[randrange(len(sigma))] = 1
+        sigma = unflatten(actx, density_discr, sigma)
 
     if isinstance(kernel, HelmholtzKernel):
-        sigma = sigma.astype(np.complex128)
+        for i, elem in np.ndenumerate(sigma):
+            sigma[i] = elem.astype(np.complex128)
 
     bound_bdry_op = bind(places, op())
     if visualize:
-        fld_in_vol = bind(places, op(
-            source="qbx",
-            target="targets",
-            qbx_forced_limit=None))(queue, sigma=sigma, k=k).get()
+        fld_in_vol = actx.to_numpy(
+                bind(places, op(
+                    source="qbx",
+                    target="targets",
+                    qbx_forced_limit=None))(actx, sigma=sigma, k=k))
 
         if enable_mayavi:
             fplot.show_scalar_in_mayavi(fld_in_vol.real, max_val=5)
@@ -100,13 +109,9 @@ def main(curve_fn=starfish, visualize=True):
                 ])
 
     if 0:
-        def apply_op(density):
-            return bound_bdry_op(
-                    queue, sigma=cl.array.to_device(queue, density), k=k).get()
-
+        apply_op = bound_bdry_op.scipy_op(actx, "sigma", np.float64, k=k)
         from sumpy.tools import build_matrix
-        n = len(sigma)
-        mat = build_matrix(apply_op, dtype=np.float64, shape=(n, n))
+        mat = build_matrix(apply_op)
 
         import matplotlib.pyplot as pt
         pt.imshow(mat)
@@ -116,18 +121,20 @@ def main(curve_fn=starfish, visualize=True):
     if enable_mayavi:
         # {{{ plot boundary field
 
-        fld_on_bdry = bound_bdry_op(queue, sigma=sigma, k=k).get()
+        from pytential.utils import flatten_to_numpy
+
+        fld_on_bdry = flatten_to_numpy(
+                actx, bound_bdry_op(actx, sigma=sigma, k=k))
+        nodes_host = flatten_to_numpy(actx, density_discr.nodes())
 
-        nodes_host = density_discr.nodes().get(queue=queue)
         mlab.points3d(nodes_host[0], nodes_host[1],
                 fld_on_bdry.real, scale_factor=0.03)
 
-        # }}}
-
-    if enable_mayavi:
         mlab.colorbar()
         mlab.show()
 
+        # }}}
+
 
 if __name__ == "__main__":
     main()
diff --git a/examples/scaling-study.py b/examples/scaling-study.py
index 21a85019ff03214f314265999598ed000350e3d5..a14a5aed0b41085379c79f8cd90684ba4ec65daa 100644
--- a/examples/scaling-study.py
+++ b/examples/scaling-study.py
@@ -2,6 +2,7 @@ import numpy as np
 import pyopencl as cl
 import pyopencl.clmath  # noqa
 
+from meshmode.array_context import PyOpenCLArrayContext
 from meshmode.discretization import Discretization
 from meshmode.discretization.poly_element import \
         InterpolatoryQuadratureSimplexGroupFactory
@@ -58,11 +59,12 @@ def timing_run(nx, ny, visualize=False):
 
     cl_ctx = cl.create_some_context()
     queue = cl.CommandQueue(cl_ctx)
+    actx = PyOpenCLArrayContext(queue)
 
     mesh = make_mesh(nx=nx, ny=ny, visualize=visualize)
 
     density_discr = Discretization(
-            cl_ctx, mesh,
+            actx, mesh,
             InterpolatoryQuadratureSimplexGroupFactory(bdry_quad_order))
 
     from pytential.qbx import (
@@ -76,7 +78,7 @@ def timing_run(nx, ny, visualize=False):
     if visualize:
         from sumpy.visualization import FieldPlotter
         fplot = FieldPlotter(np.zeros(2), extent=5, npoints=1500)
-        targets = PointsTarget(cl.array.to_device(queue, fplot.points))
+        targets = PointsTarget(actx.from_numpy(fplot.points))
 
         places.update({
             "plot-targets": targets,
@@ -119,10 +121,12 @@ def timing_run(nx, ny, visualize=False):
     # {{{ fix rhs and solve
 
     mode_nr = 3
-    nodes = density_discr.nodes().with_queue(queue)
-    angle = cl.clmath.atan2(nodes[1], nodes[0])
 
-    sigma = cl.clmath.cos(mode_nr*angle)
+    from meshmode.dof_array import thaw
+    nodes = thaw(actx, density_discr.nodes())
+    angle = actx.np.atan2(nodes[1], nodes[0])
+
+    sigma = actx.np.cos(mode_nr*angle)
 
     # }}}
 
@@ -134,17 +138,17 @@ def timing_run(nx, ny, visualize=False):
     bound_op = bind(places, sym_op)
 
     print("FMM WARM-UP RUN 1: %5d elements" % mesh.nelements)
-    bound_op(queue, sigma=sigma, k=k)
+    bound_op(actx, sigma=sigma, k=k)
     queue.finish()
 
     print("FMM WARM-UP RUN 2: %5d elements" % mesh.nelements)
-    bound_op(queue, sigma=sigma, k=k)
+    bound_op(actx, sigma=sigma, k=k)
     queue.finish()
 
     from time import time
     t_start = time()
-    bound_op(queue, sigma=sigma, k=k)
-    queue.finish()
+    bound_op(actx, sigma=sigma, k=k)
+    actx.queue.finish()
     elapsed = time() - t_start
 
     print("FMM TIMING RUN:    %5d elements -> %g s"
diff --git a/pytential/__init__.py b/pytential/__init__.py
index 728ce196fd33a55566ea9f21e920e2b59332453b..1f548debac95ea850c6c7798b632c076cf2a3d56 100644
--- a/pytential/__init__.py
+++ b/pytential/__init__.py
@@ -66,8 +66,8 @@ def _integral_op(discr):
                 discr.ambient_dim, discr.dim, sym.var("integrand")))
 
 
-def integral(discr, queue, x):
-    return _integral_op(discr)(queue, integrand=x)
+def integral(discr, x):
+    return _integral_op(discr)(integrand=x)
 
 
 @memoize_on_first_arg
@@ -97,23 +97,26 @@ def _norm_inf_op(discr, num_components):
     return bind(discr, sym.NodeMax(max_arg))
 
 
-def norm(discr, queue, x, p=2):
+def norm(discr, x, p=2):
     from pymbolic.geometric_algebra import MultiVector
     if isinstance(x, MultiVector):
         x = x.as_vector(np.object)
 
+    from meshmode.dof_array import DOFArray
     num_components = None
-    if isinstance(x, np.ndarray):
+    if (isinstance(x, np.ndarray)
+            and x.dtype.char == "O"
+            and not isinstance(x, DOFArray)):
         num_components, = x.shape
 
     if p == 2:
         norm_op = _norm_2_op(discr, num_components)
         from math import sqrt
-        return sqrt(norm_op(queue, integrand=x))
+        return sqrt(norm_op(integrand=x))
 
     elif p == np.inf or p == "inf":
         norm_op = _norm_inf_op(discr, num_components)
-        norm_res = norm_op(queue, arg=x)
+        norm_res = norm_op(arg=x)
         if isinstance(norm_res, np.ndarray):
             return max(norm_res)
         else:
diff --git a/pytential/linalg/proxy.py b/pytential/linalg/proxy.py
index ba2e2ea96990c6b57457e9a01d3e6cdba5920e33..0f79148e8961433de65fc960487c5e4cd6202e94 100644
--- a/pytential/linalg/proxy.py
+++ b/pytential/linalg/proxy.py
@@ -26,10 +26,6 @@ THE SOFTWARE.
 import numpy as np
 import numpy.linalg as la
 
-import pyopencl as cl
-import pyopencl.array # noqa
-from pyopencl.array import to_device
-
 from pytools.obj_array import make_obj_array
 from pytools import memoize_method, memoize_in
 from sumpy.tools import BlockIndexRanges
@@ -54,16 +50,7 @@ Proxy Point Generation
 
 # {{{ point index partitioning
 
-def _element_node_range(group, ielement):
-    istart = group.node_nr_base + group.nunit_nodes * ielement
-    iend = group.node_nr_base + group.nunit_nodes * (ielement + 1)
-
-    return np.arange(istart, iend)
-
-
-def partition_by_nodes(discr,
-                       use_tree=True,
-                       max_nodes_in_box=None):
+def partition_by_nodes(actx, discr, use_tree=True, max_nodes_in_box=None):
     """Generate equally sized ranges of nodes. The partition is created at the
     lowest level of granularity, i.e. nodes. This results in balanced ranges
     of points, but will split elements across different ranges.
@@ -82,112 +69,42 @@ def partition_by_nodes(discr,
         # FIXME: this is just an arbitrary value
         max_nodes_in_box = 32
 
-    with cl.CommandQueue(discr.cl_context) as queue:
-        if use_tree:
-            from boxtree import box_flags_enum
-            from boxtree import TreeBuilder
+    if use_tree:
+        from boxtree import box_flags_enum
+        from boxtree import TreeBuilder
 
-            builder = TreeBuilder(discr.cl_context)
+        builder = TreeBuilder(actx.context)
 
-            tree, _ = builder(queue, discr.nodes(),
+        from meshmode.dof_array import flatten, thaw
+        tree, _ = builder(actx.queue,
+                flatten(thaw(actx, discr.nodes())),
                 max_particles_in_box=max_nodes_in_box)
 
-            tree = tree.get(queue)
-            leaf_boxes, = (tree.box_flags
-                           & box_flags_enum.HAS_CHILDREN == 0).nonzero()
+        tree = tree.get(actx.queue)
+        leaf_boxes, = (tree.box_flags
+                       & box_flags_enum.HAS_CHILDREN == 0).nonzero()
 
-            indices = np.empty(len(leaf_boxes), dtype=np.object)
-            for i, ibox in enumerate(leaf_boxes):
-                box_start = tree.box_source_starts[ibox]
-                box_end = box_start + tree.box_source_counts_cumul[ibox]
-                indices[i] = tree.user_source_ids[box_start:box_end]
+        indices = np.empty(len(leaf_boxes), dtype=np.object)
+        for i, ibox in enumerate(leaf_boxes):
+            box_start = tree.box_source_starts[ibox]
+            box_end = box_start + tree.box_source_counts_cumul[ibox]
+            indices[i] = tree.user_source_ids[box_start:box_end]
 
-            ranges = to_device(queue,
-                np.cumsum([0] + [box.shape[0] for box in indices]))
-            indices = to_device(queue, np.hstack(indices))
-        else:
-            indices = cl.array.arange(queue, 0, discr.nnodes,
-                                      dtype=np.int)
-            ranges = cl.array.arange(queue, 0, discr.nnodes + 1,
-                                     discr.nnodes // max_nodes_in_box,
-                                     dtype=np.int)
-        assert ranges[-1] == discr.nnodes
-
-        return BlockIndexRanges(discr.cl_context,
-                                indices.with_queue(None),
-                                ranges.with_queue(None))
-
-
-def partition_from_coarse(resampler, from_indices):
-    """Generate a partition of nodes from an existing partition on a
-    coarser discretization. The new partition is generated based on element
-    refinement relationships in *resampler*, so the existing partition
-    needs to be created using :func:`partition_by_elements`,
-    since we assume that each range contains all the nodes in an element.
-
-    The new partition will have the same number of ranges as the old partition.
-    The nodes inside each range in the new partition are all the nodes in
-    *resampler.to_discr* that were refined from elements in the same
-    range from *resampler.from_discr*.
-
-    :arg resampler: a
-        :class:`meshmode.discretization.connection.DirectDiscretizationConnection`.
-    :arg from_indices: a :class:`sumpy.tools.BlockIndexRanges`.
+        ranges = actx.from_numpy(
+                np.cumsum([0] + [box.shape[0] for box in indices])
+                )
+        indices = actx.from_numpy(np.hstack(indices))
+    else:
+        indices = actx.from_numpy(np.arange(0, discr.ndofs, dtype=np.int))
+        ranges = actx.from_numpy(np.arange(
+            0,
+            discr.ndofs + 1,
+            discr.ndofs // max_nodes_in_box, dtype=np.int))
 
-    :return: a :class:`sumpy.tools.BlockIndexRanges`.
-    """
+    assert ranges[-1] == discr.ndofs
 
-    if not hasattr(resampler, "groups"):
-        raise ValueError("resampler must be a DirectDiscretizationConnection.")
-
-    with cl.CommandQueue(resampler.cl_context) as queue:
-        from_indices = from_indices.get(queue)
-
-        # construct ranges
-        from_discr = resampler.from_discr
-        from_grp_ranges = np.cumsum(
-            [0] + [grp.nelements for grp in from_discr.mesh.groups])
-        from_el_ranges = np.hstack([
-            np.arange(grp.node_nr_base, grp.nnodes + 1, grp.nunit_nodes)
-            for grp in from_discr.groups])
-
-        # construct coarse element arrays in each from_range
-        el_indices = np.empty(from_indices.nblocks, dtype=np.object)
-        el_ranges = np.full(from_grp_ranges[-1], -1, dtype=np.int)
-        for i in range(from_indices.nblocks):
-            ifrom = from_indices.block_indices(i)
-            el_indices[i] = np.unique(np.digitize(ifrom, from_el_ranges)) - 1
-            el_ranges[el_indices[i]] = i
-        el_indices = np.hstack(el_indices)
-
-        # construct lookup table
-        to_el_table = [np.full(g.nelements, -1, dtype=np.int)
-                       for g in resampler.to_discr.groups]
-
-        for igrp, grp in enumerate(resampler.groups):
-            for batch in grp.batches:
-                to_el_table[igrp][batch.to_element_indices.get(queue)] = \
-                    from_grp_ranges[igrp] + batch.from_element_indices.get(queue)
-
-        # construct fine node index list
-        indices = [np.empty(0, dtype=np.int)
-                   for _ in range(from_indices.nblocks)]
-        for igrp in range(len(resampler.groups)):
-            to_element_indices = \
-                    np.where(np.isin(to_el_table[igrp], el_indices))[0]
-
-            for i, j in zip(el_ranges[to_el_table[igrp][to_element_indices]],
-                            to_element_indices):
-                indices[i] = np.hstack([indices[i],
-                    _element_node_range(resampler.to_discr.groups[igrp], j)])
-
-        ranges = to_device(queue,
-                np.cumsum([0] + [b.shape[0] for b in indices]))
-        indices = to_device(queue, np.hstack(indices))
-
-        return BlockIndexRanges(resampler.cl_context,
-                                indices.with_queue(None),
-                                ranges.with_queue(None))
+    return BlockIndexRanges(actx.context,
+        actx.freeze(indices), actx.freeze(ranges))
 
 # }}}
 
@@ -340,7 +257,7 @@ class ProxyGenerator(object):
             """.format(radius_expr=radius_expr)],
             [
                 lp.GlobalArg("sources", None,
-                    shape=(self.ambient_dim, "nsources")),
+                    shape=(self.ambient_dim, "nsources"), dim_tags="sep,C"),
                 lp.GlobalArg("center_int", None,
                     shape=(self.ambient_dim, "nsources"), dim_tags="sep,C"),
                 lp.GlobalArg("center_ext", None,
@@ -367,11 +284,11 @@ class ProxyGenerator(object):
 
         return knl
 
-    def __call__(self, queue, source_dd, indices, **kwargs):
+    def __call__(self, actx, source_dd, indices, **kwargs):
         """Generate proxy points for each given range of source points in
         the discretization in *source_dd*.
 
-        :arg queue: a :class:`pyopencl.CommandQueue`.
+        :arg actx: a :class:`~meshmode.array_context.ArrayContext`.
         :arg source_dd: a :class:`~pytential.symbolic.primitives.DOFDescriptor`
             for the discretization on which the proxy points are to be
             generated.
@@ -397,47 +314,51 @@ class ProxyGenerator(object):
                 source_dd.geometry, source_dd.discr_stage)
 
         radii = bind(self.places, sym.expansion_radii(
-            self.ambient_dim, dofdesc=source_dd))(queue)
+            self.ambient_dim, dofdesc=source_dd))(actx)
         center_int = bind(self.places, sym.expansion_centers(
-            self.ambient_dim, -1, dofdesc=source_dd))(queue)
+            self.ambient_dim, -1, dofdesc=source_dd))(actx)
         center_ext = bind(self.places, sym.expansion_centers(
-            self.ambient_dim, +1, dofdesc=source_dd))(queue)
+            self.ambient_dim, +1, dofdesc=source_dd))(actx)
 
+        from meshmode.dof_array import flatten, thaw
         knl = self.get_kernel()
-        _, (centers_dev, radii_dev,) = knl(queue,
-            sources=discr.nodes(),
-            center_int=center_int,
-            center_ext=center_ext,
-            expansion_radii=radii,
+        _, (centers_dev, radii_dev,) = knl(actx.queue,
+            sources=flatten(thaw(actx, discr.nodes())),
+            center_int=flatten(center_int),
+            center_ext=flatten(center_ext),
+            expansion_radii=flatten(radii),
             srcindices=indices.indices,
             srcranges=indices.ranges, **kwargs)
-        centers = centers_dev.get()
-        radii = radii_dev.get()
 
+        from pytential.utils import flatten_to_numpy
+        centers = flatten_to_numpy(actx, centers_dev)
+        radii = flatten_to_numpy(actx, radii_dev)
         proxies = np.empty(indices.nblocks, dtype=np.object)
         for i in range(indices.nblocks):
             proxies[i] = _affine_map(self.ref_points,
                     A=(radii[i] * np.eye(self.ambient_dim)),
                     b=centers[:, i].reshape(-1, 1))
 
-        pxyranges = cl.array.arange(queue,
-                0,
-                proxies.shape[0] * proxies[0].shape[1] + 1,
-                proxies[0].shape[1],
-                dtype=indices.ranges.dtype)
+        pxyranges = actx.from_numpy(np.arange(
+            0,
+            proxies.shape[0] * proxies[0].shape[1] + 1,
+            proxies[0].shape[1],
+            dtype=indices.ranges.dtype))
         proxies = make_obj_array([
-            cl.array.to_device(queue, np.hstack([p[idim] for p in proxies]))
-            for idim in range(self.ambient_dim)])
+            actx.freeze(actx.from_numpy(np.hstack([p[idim] for p in proxies])))
+            for idim in range(self.ambient_dim)
+            ])
         centers = make_obj_array([
-            centers_dev[idim].with_queue(queue).copy()
-            for idim in range(self.ambient_dim)])
+            actx.freeze(centers_dev[idim])
+            for idim in range(self.ambient_dim)
+            ])
 
         assert pxyranges[-1] == proxies[0].shape[0]
-        return proxies, pxyranges, centers, radii_dev
+        return proxies, actx.freeze(pxyranges), centers, actx.freeze(radii_dev)
 
 
-def gather_block_neighbor_points(discr, indices, pxycenters, pxyradii,
-                                 max_nodes_in_box=None):
+def gather_block_neighbor_points(actx, discr, indices, pxycenters, pxyradii,
+        max_nodes_in_box=None):
     """Generate a set of neighboring points for each range of points in
     *discr*. Neighboring points of a range :math:`i` are defined
     as all the points inside the proxy ball :math:`i` that do not also
@@ -455,79 +376,77 @@ def gather_block_neighbor_points(discr, indices, pxycenters, pxyradii,
         # FIXME: this is a fairly arbitrary value
         max_nodes_in_box = 32
 
-    with cl.CommandQueue(discr.cl_context) as queue:
-        indices = indices.get(queue)
-
-        # NOTE: this is constructed for multiple reasons:
-        #   * TreeBuilder takes object arrays
-        #   * `srcindices` can be a small subset of nodes, so this will save
-        #   some work
-        #   * `srcindices` may reorder the array returned by nodes(), so this
-        #   makes sure that we have the same order in tree.user_source_ids
-        #   and friends
-        sources = discr.nodes().get(queue)
-        sources = make_obj_array([
-            cl.array.to_device(queue, sources[idim, indices.indices])
-            for idim in range(discr.ambient_dim)])
-
-        # construct tree
-        from boxtree import TreeBuilder
-        builder = TreeBuilder(discr.cl_context)
-        tree, _ = builder(queue, sources,
-                          max_particles_in_box=max_nodes_in_box)
-
-        from boxtree.area_query import AreaQueryBuilder
-        builder = AreaQueryBuilder(discr.cl_context)
-        query, _ = builder(queue, tree, pxycenters, pxyradii)
-
-        # find nodes inside each proxy ball
-        tree = tree.get(queue)
-        query = query.get(queue)
-
-        if isinstance(pxycenters[0], cl.array.Array):
-            pxycenters = np.vstack([pxycenters[idim].get(queue)
-                                    for idim in range(discr.ambient_dim)])
-        if isinstance(pxyradii, cl.array.Array):
-            pxyradii = pxyradii.get(queue)
-
-        nbrindices = np.empty(indices.nblocks, dtype=np.object)
-        for iproxy in range(indices.nblocks):
-            # get list of boxes intersecting the current ball
-            istart = query.leaves_near_ball_starts[iproxy]
-            iend = query.leaves_near_ball_starts[iproxy + 1]
-            iboxes = query.leaves_near_ball_lists[istart:iend]
-
-            # get nodes inside the boxes
-            istart = tree.box_source_starts[iboxes]
-            iend = istart + tree.box_source_counts_cumul[iboxes]
-            isources = np.hstack([np.arange(s, e)
-                                  for s, e in zip(istart, iend)])
-            nodes = np.vstack([tree.sources[idim][isources]
-                               for idim in range(discr.ambient_dim)])
-            isources = tree.user_source_ids[isources]
-
-            # get nodes inside the ball but outside the current range
-            center = pxycenters[:, iproxy].reshape(-1, 1)
-            radius = pxyradii[iproxy]
-            mask = ((la.norm(nodes - center, axis=0) < radius)
-                    & ((isources < indices.ranges[iproxy])
-                        | (indices.ranges[iproxy + 1] <= isources)))
-
-            nbrindices[iproxy] = indices.indices[isources[mask]]
-
-        nbrranges = to_device(queue,
-                np.cumsum([0] + [n.shape[0] for n in nbrindices]))
-        nbrindices = to_device(queue, np.hstack(nbrindices))
-
-        return BlockIndexRanges(discr.cl_context,
-                                nbrindices.with_queue(None),
-                                nbrranges.with_queue(None))
-
-
-def gather_block_interaction_points(places, source_dd, indices,
-                                    radius_factor=None,
-                                    approx_nproxy=None,
-                                    max_nodes_in_box=None):
+    indices = indices.get(actx.queue)
+
+    # NOTE: this is constructed for multiple reasons:
+    #   * TreeBuilder takes object arrays
+    #   * `srcindices` can be a small subset of nodes, so this will save
+    #   some work
+    #   * `srcindices` may reorder the array returned by nodes(), so this
+    #   makes sure that we have the same order in tree.user_source_ids
+    #   and friends
+    from pytential.utils import flatten_to_numpy
+    sources = flatten_to_numpy(actx, discr.nodes())
+    sources = make_obj_array([
+        actx.from_numpy(sources[idim][indices.indices])
+        for idim in range(discr.ambient_dim)])
+
+    # construct tree
+    from boxtree import TreeBuilder
+    builder = TreeBuilder(actx.context)
+    tree, _ = builder(actx.queue, sources,
+            max_particles_in_box=max_nodes_in_box)
+
+    from boxtree.area_query import AreaQueryBuilder
+    builder = AreaQueryBuilder(actx.context)
+    query, _ = builder(actx.queue, tree, pxycenters, pxyradii)
+
+    # find nodes inside each proxy ball
+    tree = tree.get(actx.queue)
+    query = query.get(actx.queue)
+
+    pxycenters = np.vstack([
+        actx.to_numpy(pxycenters[idim])
+        for idim in range(discr.ambient_dim)
+        ])
+    pxyradii = actx.to_numpy(pxyradii)
+
+    nbrindices = np.empty(indices.nblocks, dtype=np.object)
+    for iproxy in range(indices.nblocks):
+        # get list of boxes intersecting the current ball
+        istart = query.leaves_near_ball_starts[iproxy]
+        iend = query.leaves_near_ball_starts[iproxy + 1]
+        iboxes = query.leaves_near_ball_lists[istart:iend]
+
+        # get nodes inside the boxes
+        istart = tree.box_source_starts[iboxes]
+        iend = istart + tree.box_source_counts_cumul[iboxes]
+        isources = np.hstack([np.arange(s, e)
+                              for s, e in zip(istart, iend)])
+        nodes = np.vstack([tree.sources[idim][isources]
+                           for idim in range(discr.ambient_dim)])
+        isources = tree.user_source_ids[isources]
+
+        # get nodes inside the ball but outside the current range
+        center = pxycenters[:, iproxy].reshape(-1, 1)
+        radius = pxyradii[iproxy]
+        mask = ((la.norm(nodes - center, axis=0) < radius)
+                & ((isources < indices.ranges[iproxy])
+                    | (indices.ranges[iproxy + 1] <= isources)))
+
+        nbrindices[iproxy] = indices.indices[isources[mask]]
+
+    nbrranges = actx.from_numpy(np.cumsum([0] + [n.shape[0] for n in nbrindices]))
+    nbrindices = actx.from_numpy(np.hstack(nbrindices))
+
+    return BlockIndexRanges(actx.context,
+            actx.freeze(nbrindices), actx.freeze(nbrranges))
+
+
+def gather_block_interaction_points(actx, places, source_dd, indices,
+        radius_factor=None,
+        approx_nproxy=None,
+        max_nodes_in_box=None):
     """Generate sets of interaction points for each given range of indices
     in the *source* discretization. For each input range of indices,
     the corresponding output range of points is consists of:
@@ -583,7 +502,7 @@ def gather_block_interaction_points(places, source_dd, indices,
             """,
             [
                 lp.GlobalArg("sources", None,
-                    shape=(lpot_source.ambient_dim, "nsources")),
+                    shape=(lpot_source.ambient_dim, "nsources"), dim_tags="sep,C"),
                 lp.GlobalArg("proxies", None,
                     shape=(lpot_source.ambient_dim, "nproxies"), dim_tags="sep,C"),
                 lp.GlobalArg("nbrindices", None,
@@ -607,28 +526,28 @@ def gather_block_interaction_points(places, source_dd, indices,
         return loopy_knl
 
     lpot_source = places.get_geometry(source_dd.geometry)
-    with cl.CommandQueue(lpot_source.cl_context) as queue:
-        generator = ProxyGenerator(places,
-                radius_factor=radius_factor,
-                approx_nproxy=approx_nproxy)
-        proxies, pxyranges, pxycenters, pxyradii = \
-                generator(queue, source_dd, indices)
-
-        discr = places.get_discretization(source_dd.geometry, source_dd.discr_stage)
-        neighbors = gather_block_neighbor_points(discr,
-                indices, pxycenters, pxyradii,
-                max_nodes_in_box=max_nodes_in_box)
-
-        ranges = cl.array.zeros(queue, indices.nblocks + 1, dtype=np.int)
-        _, (nodes, ranges) = knl()(queue,
-                sources=discr.nodes(),
-                proxies=proxies,
-                pxyranges=pxyranges,
-                nbrindices=neighbors.indices,
-                nbrranges=neighbors.ranges,
-                ranges=ranges)
-
-        return nodes.with_queue(None), ranges.with_queue(None)
+    generator = ProxyGenerator(places,
+            radius_factor=radius_factor,
+            approx_nproxy=approx_nproxy)
+    proxies, pxyranges, pxycenters, pxyradii = \
+            generator(actx, source_dd, indices)
+
+    discr = places.get_discretization(source_dd.geometry, source_dd.discr_stage)
+    neighbors = gather_block_neighbor_points(actx, discr,
+            indices, pxycenters, pxyradii,
+            max_nodes_in_box=max_nodes_in_box)
+
+    from meshmode.dof_array import flatten, thaw
+    ranges = actx.zeros(indices.nblocks + 1, dtype=np.int)
+    _, (nodes, ranges) = knl()(actx.queue,
+            sources=flatten(thaw(actx, discr.nodes())),
+            proxies=proxies,
+            pxyranges=pxyranges,
+            nbrindices=neighbors.indices,
+            nbrranges=neighbors.ranges,
+            ranges=ranges)
+
+    return actx.freeze(nodes), actx.freeze(ranges)
 
 # }}}
 
diff --git a/pytential/qbx/__init__.py b/pytential/qbx/__init__.py
index a23dd3d711258cdf25387196c1c89145c062994d..db87e79acae37e6423784b6fc0a7d57dd1ce8e91 100644
--- a/pytential/qbx/__init__.py
+++ b/pytential/qbx/__init__.py
@@ -1,5 +1,4 @@
-# -*- coding: utf-8 -*-
-from __future__ import division, absolute_import
+from __future__ import annotations
 
 __copyright__ = "Copyright (C) 2013 Andreas Kloeckner"
 
@@ -25,8 +24,11 @@ THE SOFTWARE.
 
 import six
 
+from meshmode.array_context import PyOpenCLArrayContext
+from meshmode.dof_array import flatten, unflatten, thaw
 import numpy as np
-from pytools import memoize_method
+from pytools import memoize_method, memoize_in
+
 from pytential.qbx.target_assoc import QBXTargetAssociationFailedException
 from pytential.source import LayerPotentialSourceBase
 
@@ -335,24 +337,57 @@ class QBXLayerPotentialSource(LayerPotentialSourceBase):
 
     # }}}
 
+    # {{{ code containers
+
     @property
-    @memoize_method
     def tree_code_container(self):
-        from pytential.qbx.utils import TreeCodeContainer
-        return TreeCodeContainer(self.cl_context)
+        @memoize_in(self._setup_actx, (
+                QBXLayerPotentialSource, "tree_code_container"))
+        def make_container():
+            from pytential.qbx.utils import TreeCodeContainer
+            return TreeCodeContainer(self._setup_actx)
+        return make_container()
 
     @property
-    @memoize_method
     def refiner_code_container(self):
-        from pytential.qbx.refinement import RefinerCodeContainer
-        return RefinerCodeContainer(self.cl_context, self.tree_code_container)
+        @memoize_in(self._setup_actx, (
+                QBXLayerPotentialSource, "refiner_code_container"))
+        def make_container():
+            from pytential.qbx.refinement import RefinerCodeContainer
+            return RefinerCodeContainer(
+                    self._setup_actx, self.tree_code_container)
+        return make_container()
 
     @property
-    @memoize_method
     def target_association_code_container(self):
-        from pytential.qbx.target_assoc import TargetAssociationCodeContainer
-        return TargetAssociationCodeContainer(
-                self.cl_context, self.tree_code_container)
+        @memoize_in(self._setup_actx, (
+                QBXLayerPotentialSource, "target_association_code_container"))
+        def make_container():
+            from pytential.qbx.target_assoc import TargetAssociationCodeContainer
+            return TargetAssociationCodeContainer(
+                    self._setup_actx, self.tree_code_container)
+        return make_container()
+
+    @property
+    def qbx_fmm_geometry_data_code_container(self):
+        @memoize_in(self._setup_actx, (
+                QBXLayerPotentialSource,
+                "qbx_fmm_geometry_data_code_container"))
+        def make_container(
+                debug, ambient_dim, well_sep_is_n_away,
+                from_sep_smaller_crit):
+            from pytential.qbx.geometry import QBXFMMGeometryDataCodeContainer
+            return QBXFMMGeometryDataCodeContainer(
+                    self._setup_actx,
+                    ambient_dim, self.tree_code_container, debug,
+                    _well_sep_is_n_away=well_sep_is_n_away,
+                    _from_sep_smaller_crit=from_sep_smaller_crit)
+
+        return make_container(
+                self.debug, self.ambient_dim,
+                self._well_sep_is_n_away, self._from_sep_smaller_crit)
+
+    # }}}
 
     # {{{ internal API
 
@@ -371,7 +406,7 @@ class QBXLayerPotentialSource(LayerPotentialSourceBase):
         from pytential.qbx.geometry import QBXFMMGeometryData
 
         return QBXFMMGeometryData(places, name,
-                self.qbx_fmm_code_getter,
+                self.qbx_fmm_geometry_data_code_container,
                 target_discrs_and_qbx_sides,
                 target_association_tolerance=self.target_association_tolerance,
                 tree_kind=self._tree_kind,
@@ -445,8 +480,8 @@ class QBXLayerPotentialSource(LayerPotentialSourceBase):
             cost_model_result = (
                     self.cost_model(wrangler, geo_data, kernel, kernel_arguments))
 
-            from pytools.obj_array import with_object_array_or_scalar
-            output_placeholder = with_object_array_or_scalar(
+            from pytools.obj_array import obj_array_vectorize
+            output_placeholder = obj_array_vectorize(
                 wrangler.finalize_potentials,
                 wrangler.full_output_zeros()
             )
@@ -471,15 +506,6 @@ class QBXLayerPotentialSource(LayerPotentialSourceBase):
 
         return func(queue, insn, bound_expr, evaluate, **extra_args)
 
-    @property
-    @memoize_method
-    def qbx_fmm_code_getter(self):
-        from pytential.qbx.geometry import QBXFMMGeometryCodeGetter
-        return QBXFMMGeometryCodeGetter(self.cl_context, self.ambient_dim,
-                self.tree_code_container, debug=self.debug,
-                _well_sep_is_n_away=self._well_sep_is_n_away,
-                _from_sep_smaller_crit=self._from_sep_smaller_crit)
-
     # {{{ fmm-based execution
 
     @memoize_method
@@ -542,8 +568,8 @@ class QBXLayerPotentialSource(LayerPotentialSourceBase):
 
         return target_name_and_side_to_number, tuple(target_discrs_and_qbx_sides)
 
-    def exec_compute_potential_insn_fmm(self, queue, insn, bound_expr, evaluate,
-            fmm_driver):
+    def exec_compute_potential_insn_fmm(self, actx: PyOpenCLArrayContext,
+            insn, bound_expr, evaluate, fmm_driver):
         """
         :arg fmm_driver: A function that accepts four arguments:
             *wrangler*, *strength*, *geo_data*, *kernel*, *kernel_arguments*
@@ -572,21 +598,23 @@ class QBXLayerPotentialSource(LayerPotentialSourceBase):
 
         from pytential import bind, sym
         waa = bind(bound_expr.places, sym.weights_and_area_elements(
-            self.ambient_dim, dofdesc=insn.source))(queue)
-        strengths = waa * evaluate(insn.density).with_queue(queue)
+            self.ambient_dim, dofdesc=insn.source))(actx)
+        density = evaluate(insn.density)
+        strengths = waa * density
+        flat_strengths = flatten(strengths)
 
         out_kernels = tuple(knl for knl in insn.kernels)
         fmm_kernel = self.get_fmm_kernel(out_kernels)
         output_and_expansion_dtype = (
-                self.get_fmm_output_and_expansion_dtype(fmm_kernel, strengths))
+                self.get_fmm_output_and_expansion_dtype(fmm_kernel, flat_strengths))
         kernel_extra_kwargs, source_extra_kwargs = (
                 self.get_fmm_expansion_wrangler_extra_kwargs(
-                    queue, out_kernels, geo_data.tree().user_source_ids,
+                    actx, out_kernels, geo_data.tree().user_source_ids,
                     insn.kernel_arguments, evaluate))
 
         wrangler = self.expansion_wrangler_code_container(
                 fmm_kernel, out_kernels).get_wrangler(
-                        queue, geo_data, output_and_expansion_dtype,
+                        actx.queue, geo_data, output_and_expansion_dtype,
                         self.qbx_order,
                         self.fmm_level_to_order,
                         source_extra_kwargs=source_extra_kwargs,
@@ -594,7 +622,7 @@ class QBXLayerPotentialSource(LayerPotentialSourceBase):
                         _use_target_specific_qbx=self._use_target_specific_qbx)
 
         from pytential.qbx.geometry import target_state
-        if (geo_data.user_target_to_center().with_queue(queue)
+        if (actx.thaw(geo_data.user_target_to_center())
                 == target_state.FAILED).any().get():
             raise RuntimeError("geometry has failed targets")
 
@@ -610,20 +638,28 @@ class QBXLayerPotentialSource(LayerPotentialSourceBase):
         # Execute global QBX.
         all_potentials_on_every_target, extra_outputs = (
                 fmm_driver(
-                    wrangler, strengths, geo_data, fmm_kernel, kernel_extra_kwargs))
+                    wrangler, flat_strengths, geo_data,
+                    fmm_kernel, kernel_extra_kwargs))
 
-        result = []
+        results = []
 
         for o in insn.outputs:
             target_side_number = target_name_and_side_to_number[
                     o.target_name, o.qbx_forced_limit]
+            target_discr, _ = target_discrs_and_qbx_sides[target_side_number]
             target_slice = slice(*geo_data.target_info().target_discr_starts[
                     target_side_number:target_side_number+2])
 
-            result.append((o.name,
-                    all_potentials_on_every_target[o.kernel_index][target_slice]))
+            result = all_potentials_on_every_target[o.kernel_index][target_slice]
+
+            from meshmode.discretization import Discretization
+            if isinstance(target_discr, Discretization):
+                from meshmode.dof_array import unflatten
+                result = unflatten(actx, target_discr, result)
 
-        return result, extra_outputs
+            results.append((o.name, result))
+
+        return results, extra_outputs
 
     # }}}
 
@@ -681,7 +717,7 @@ class QBXLayerPotentialSource(LayerPotentialSourceBase):
                         *count = item;
                     """)
 
-    def exec_compute_potential_insn_direct(self, queue, insn, bound_expr, evaluate,
+    def exec_compute_potential_insn_direct(self, actx, insn, bound_expr, evaluate,
             return_timing_data):
         from pytential import bind, sym
         if return_timing_data:
@@ -695,19 +731,23 @@ class QBXLayerPotentialSource(LayerPotentialSourceBase):
         p2p = None
         lpot_applier_on_tgt_subset = None
 
+        from pytential.utils import flatten_if_needed
         kernel_args = {}
         for arg_name, arg_expr in six.iteritems(insn.kernel_arguments):
-            kernel_args[arg_name] = evaluate(arg_expr)
+            kernel_args[arg_name] = flatten_if_needed(actx, evaluate(arg_expr))
 
         waa = bind(bound_expr.places, sym.weights_and_area_elements(
-            self.ambient_dim, dofdesc=insn.source))(queue)
-        strengths = waa * evaluate(insn.density).with_queue(queue)
+            self.ambient_dim, dofdesc=insn.source))(actx)
+        strengths = waa * evaluate(insn.density)
+
+        from meshmode.discretization import Discretization
+        flat_strengths = flatten(strengths)
 
         source_discr = bound_expr.places.get_discretization(
                 insn.source.geometry, insn.source.discr_stage)
 
         # FIXME: Do this all at once
-        result = []
+        results = []
         for o in insn.outputs:
             source_dd = insn.source.copy(discr_stage=o.target_name.discr_stage)
             target_discr = bound_expr.places.get_discretization(
@@ -722,31 +762,41 @@ class QBXLayerPotentialSource(LayerPotentialSourceBase):
                 assert abs(o.qbx_forced_limit) > 0
 
                 expansion_radii = bind(bound_expr.places, sym.expansion_radii(
-                    self.ambient_dim, dofdesc=o.target_name))(queue)
+                    self.ambient_dim, dofdesc=o.target_name))(actx)
                 centers = bind(bound_expr.places, sym.expansion_centers(
                     self.ambient_dim, o.qbx_forced_limit,
-                    dofdesc=o.target_name))(queue)
+                    dofdesc=o.target_name))(actx)
 
                 evt, output_for_each_kernel = lpot_applier(
-                        queue, target_discr.nodes(),
-                        source_discr.nodes(),
-                        centers,
-                        [strengths],
-                        expansion_radii=expansion_radii,
+                        actx.queue,
+                        flatten(thaw(actx, target_discr.nodes())),
+                        flatten(thaw(actx, source_discr.nodes())),
+                        flatten(centers),
+                        [flat_strengths],
+                        expansion_radii=flatten(expansion_radii),
                         **kernel_args)
-                result.append((o.name, output_for_each_kernel[o.kernel_index]))
+
+                result = output_for_each_kernel[o.kernel_index]
+                if isinstance(target_discr, Discretization):
+                    result = unflatten(actx, target_discr, result)
+
+                results.append((o.name, result))
             else:
                 # no on-disk kernel caching
                 if p2p is None:
-                    p2p = self.get_p2p(insn.kernels)
+                    p2p = self.get_p2p(actx, insn.kernels)
                 if lpot_applier_on_tgt_subset is None:
                     lpot_applier_on_tgt_subset = self.get_lpot_applier_on_tgt_subset(
                             insn.kernels)
 
+                queue = actx.queue
+
+                flat_targets = flatten_if_needed(actx, target_discr.nodes())
+                flat_sources = flatten(thaw(actx, source_discr.nodes()))
+
                 evt, output_for_each_kernel = p2p(queue,
-                        target_discr.nodes(),
-                        source_discr.nodes(),
-                        [strengths], **kernel_args)
+                        flat_targets, flat_sources,
+                        [flat_strengths], **kernel_args)
 
                 qbx_forced_limit = o.qbx_forced_limit
                 if qbx_forced_limit is None:
@@ -779,7 +829,7 @@ class QBXLayerPotentialSource(LayerPotentialSourceBase):
 
                 if (o.qbx_forced_limit is not None
                         and abs(o.qbx_forced_limit) == 1
-                        and qbx_tgt_count < target_discr.nnodes):
+                        and qbx_tgt_count < target_discr.ndofs):
                     raise RuntimeError("Did not find a matching QBX center "
                             "for some targets")
 
@@ -794,19 +844,23 @@ class QBXLayerPotentialSource(LayerPotentialSourceBase):
                 if qbx_tgt_count:
                     lpot_applier_on_tgt_subset(
                             queue,
-                            targets=target_discr.nodes(),
-                            sources=source_discr.nodes(),
-                            centers=geo_data.centers(),
-                            expansion_radii=geo_data.expansion_radii(),
-                            strengths=[strengths],
+                            targets=flat_targets,
+                            sources=flat_sources,
+                            centers=geo_data.flat_centers(),
+                            expansion_radii=geo_data.flat_expansion_radii(),
+                            strengths=[flat_strengths],
                             qbx_tgt_numbers=qbx_tgt_numbers,
                             qbx_center_numbers=qbx_center_numbers,
                             **tgt_subset_kwargs)
 
-                result.append((o.name, output_for_each_kernel[o.kernel_index]))
+                result = output_for_each_kernel[o.kernel_index]
+                if isinstance(target_discr, Discretization):
+                    result = unflatten(actx, target_discr, result)
+
+                results.append((o.name, result))
 
         timing_data = {}
-        return result, timing_data
+        return results, timing_data
 
     # }}}
 
diff --git a/pytential/qbx/fmm.py b/pytential/qbx/fmm.py
index 8b9ea4292868fc7c811f46d8fe8b966555b5784e..5185376e475f2f98a2a6e67bc1f33039ef14f42b 100644
--- a/pytential/qbx/fmm.py
+++ b/pytential/qbx/fmm.py
@@ -225,8 +225,8 @@ QBXFMMGeometryData.non_qbx_box_target_lists`),
                 self.queue,
                 global_qbx_centers=geo_data.global_qbx_centers(),
                 qbx_center_to_target_box=geo_data.qbx_center_to_target_box(),
-                qbx_centers=geo_data.centers(),
-                qbx_expansion_radii=geo_data.expansion_radii(),
+                qbx_centers=geo_data.flat_centers(),
+                qbx_expansion_radii=geo_data.flat_expansion_radii(),
 
                 source_box_starts=starts,
                 source_box_lists=lists,
@@ -268,8 +268,8 @@ QBXFMMGeometryData.non_qbx_box_target_lists`),
                     ),
 
                     centers=self.tree.box_centers,
-                    qbx_centers=geo_data.centers(),
-                    qbx_expansion_radii=geo_data.expansion_radii(),
+                    qbx_centers=geo_data.flat_centers(),
+                    qbx_expansion_radii=geo_data.flat_expansion_radii(),
 
                     src_expansions=source_mpoles_view,
                     src_base_ibox=source_level_start_ibox,
@@ -321,8 +321,8 @@ QBXFMMGeometryData.non_qbx_box_target_lists`),
                     target_base_ibox=target_level_start_ibox,
 
                     centers=self.tree.box_centers,
-                    qbx_centers=geo_data.centers(),
-                    qbx_expansion_radii=geo_data.expansion_radii(),
+                    qbx_centers=geo_data.flat_centers(),
+                    qbx_expansion_radii=geo_data.flat_expansion_radii(),
 
                     expansions=target_locals_view,
                     qbx_expansions=qbx_expansions,
@@ -356,8 +356,8 @@ QBXFMMGeometryData.non_qbx_box_target_lists`),
         qbxl2p = self.code.qbxl2p(self.qbx_order)
 
         evt, pot_res = qbxl2p(self.queue,
-                qbx_centers=geo_data.centers(),
-                qbx_expansion_radii=geo_data.expansion_radii(),
+                qbx_centers=geo_data.flat_centers(),
+                qbx_expansion_radii=geo_data.flat_expansion_radii(),
 
                 global_qbx_centers=geo_data.global_qbx_centers(),
 
@@ -582,8 +582,8 @@ def drive_fmm(expansion_wrangler, src_weights, timing_data=None,
         # potential back into a CL array.
         return wrangler.finalize_potentials(x[tree.sorted_target_ids])
 
-    from pytools.obj_array import with_object_array_or_scalar
-    result = with_object_array_or_scalar(
+    from pytools.obj_array import obj_array_vectorize
+    result = obj_array_vectorize(
             reorder_and_finalize_potentials, all_potentials_in_tree_order)
 
     # }}}
diff --git a/pytential/qbx/geometry.py b/pytential/qbx/geometry.py
index 7a991ddfa2b17c6f5b6fa3dc81d1c8d9fdecab06..ace246b7a4b324652062eda1c8c619f85519c782 100644
--- a/pytential/qbx/geometry.py
+++ b/pytential/qbx/geometry.py
@@ -28,6 +28,9 @@ import numpy as np
 import pyopencl as cl
 import pyopencl.array  # noqa
 from pytools import memoize_method
+from pytools.obj_array import obj_array_vectorize
+from meshmode.array_context import PyOpenCLArrayContext
+from meshmode.dof_array import flatten, thaw
 from boxtree.tools import DeviceDataRecord
 from boxtree.pyfmmlib_integration import FMMLibRotationDataInterface
 import loopy as lp
@@ -76,7 +79,7 @@ Enums of special values
 Geometry description code container
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
-.. autoclass:: QBXFMMGeometryCodeGetter
+.. autoclass:: QBXFMMGeometryDataCodeContainer
     :members:
     :undoc-members:
 
@@ -109,16 +112,21 @@ class target_state(Enum):  # noqa
     FAILED = -2
 
 
-class QBXFMMGeometryCodeGetter(TreeCodeContainerMixin):
-    def __init__(self, cl_context, ambient_dim, tree_code_container, debug,
+class QBXFMMGeometryDataCodeContainer(TreeCodeContainerMixin):
+    def __init__(self, actx: PyOpenCLArrayContext, ambient_dim,
+            tree_code_container, debug,
             _well_sep_is_n_away, _from_sep_smaller_crit):
-        self.cl_context = cl_context
+        self.array_context = actx
         self.ambient_dim = ambient_dim
         self.tree_code_container = tree_code_container
         self.debug = debug
         self._well_sep_is_n_away = _well_sep_is_n_away
         self._from_sep_smaller_crit = _from_sep_smaller_crit
 
+    @property
+    def cl_context(self):
+        return self.array_context.context
+
     @memoize_method
     def copy_targets_kernel(self):
         knl = lp.make_kernel(
@@ -324,7 +332,7 @@ class QBXFMMGeometryData(FMMLibRotationDataInterface):
 
     .. attribute:: code_getter
 
-        The :class:`QBXFMMGeometryCodeGetter` for this object.
+        The :class:`QBXFMMGeometryDataCodeContainer` for this object.
 
     .. attribute:: target_discrs_and_qbx_sides
 
@@ -348,7 +356,7 @@ class QBXFMMGeometryData(FMMLibRotationDataInterface):
     .. rubric :: Expansion centers
 
     .. attribute:: ncenters
-    .. automethod:: centers()
+    .. automethod:: flat_centers()
 
     .. rubric :: Methods
 
@@ -401,49 +409,54 @@ class QBXFMMGeometryData(FMMLibRotationDataInterface):
     def ambient_dim(self):
         return self.lpot_source.ambient_dim
 
-    @property
-    def cl_context(self):
-        return self.lpot_source.cl_context
-
     @property
     def coord_dtype(self):
         return self.lpot_source.density_discr.real_dtype
 
+    @property
+    def array_context(self):
+        return self.code_getter.array_context
+
+    @property
+    def cl_context(self):
+        return self.code_getter.cl_context
+
     # {{{ centers/radii
 
     @property
     def ncenters(self):
-        return len(self.centers()[0])
+        return len(self.flat_centers()[0])
 
     @memoize_method
-    def centers(self):
-        """ Return an object array of (interleaved) center coordinates.
+    def flat_centers(self):
+        """Return an object array of (interleaved) center coordinates.
 
         ``coord_t [ambient_dim][ncenters]``
         """
         from pytential import bind, sym
-        from pytools.obj_array import make_obj_array
 
-        with cl.CommandQueue(self.cl_context) as queue:
-            centers = bind(self.places, sym.interleaved_expansion_centers(
+        centers = bind(self.places, sym.interleaved_expansion_centers(
                 self.ambient_dim,
-                dofdesc=self.source_dd.to_stage1()))(queue)
-            return make_obj_array([ax.with_queue(None) for ax in centers])
+                dofdesc=self.source_dd.to_stage1()))(self.array_context)
+        return obj_array_vectorize(self.array_context.freeze, flatten(centers))
 
     @memoize_method
-    def expansion_radii(self):
-        """Return an  array of radii associated with the (interleaved)
+    def flat_expansion_radii(self):
+        """Return an array of radii associated with the (interleaved)
         expansion centers.
 
         ``coord_t [ncenters]``
         """
         from pytential import bind, sym
 
-        with cl.CommandQueue(self.cl_context) as queue:
-            return bind(self.places, sym.expansion_radii(
-                self.ambient_dim,
-                granularity=sym.GRANULARITY_CENTER,
-                dofdesc=self.source_dd.to_stage1()))(queue)
+        radii = bind(self.places,
+                    sym.expansion_radii(
+                        self.ambient_dim,
+                        granularity=sym.GRANULARITY_CENTER,
+                        dofdesc=self.source_dd.to_stage1()))(
+                    self.array_context)
+
+        return self.array_context.freeze(flatten(radii))
 
     # }}}
 
@@ -453,36 +466,41 @@ class QBXFMMGeometryData(FMMLibRotationDataInterface):
     def target_info(self):
         """Return a :class:`TargetInfo`. |cached|"""
 
-        code_getter = self.code_getter
-        with cl.CommandQueue(self.cl_context) as queue:
-            ntargets = self.ncenters
-            target_discr_starts = []
+        from pytential.utils import flatten_if_needed
 
-            for target_discr, qbx_side in self.target_discrs_and_qbx_sides:
-                target_discr_starts.append(ntargets)
-                ntargets += target_discr.nnodes
+        code_getter = self.code_getter
+        queue = self.array_context.queue
+        ntargets = self.ncenters
+        target_discr_starts = []
 
+        for target_discr, qbx_side in self.target_discrs_and_qbx_sides:
             target_discr_starts.append(ntargets)
+            ntargets += target_discr.ndofs
 
-            targets = cl.array.empty(
-                    self.cl_context, (self.ambient_dim, ntargets),
-                    self.coord_dtype)
+        target_discr_starts.append(ntargets)
+
+        targets = cl.array.empty(
+                self.cl_context, (self.ambient_dim, ntargets),
+                self.coord_dtype)
+        code_getter.copy_targets_kernel()(
+                queue,
+                targets=targets[:, :self.ncenters],
+                points=self.flat_centers())
+
+        for start, (target_discr, _) in zip(
+                target_discr_starts, self.target_discrs_and_qbx_sides):
             code_getter.copy_targets_kernel()(
                     queue,
-                    targets=targets[:, :self.ncenters],
-                    points=self.centers())
+                    targets=targets[:,
+                        start:start+target_discr.ndofs],
+                    points=flatten_if_needed(
+                        self.array_context,
+                        target_discr.nodes()))
 
-            for start, (target_discr, _) in zip(
-                    target_discr_starts, self.target_discrs_and_qbx_sides):
-                code_getter.copy_targets_kernel()(
-                        queue,
-                        targets=targets[:, start:start+target_discr.nnodes],
-                        points=target_discr.nodes())
-
-            return TargetInfo(
-                    targets=targets,
-                    target_discr_starts=target_discr_starts,
-                    ntargets=ntargets).with_queue(None)
+        return TargetInfo(
+                targets=targets,
+                target_discr_starts=target_discr_starts,
+                ntargets=ntargets).with_queue(None)
 
     def target_side_preferences(self):
         """Return one big array combining all the data from
@@ -492,7 +510,7 @@ class QBXFMMGeometryData(FMMLibRotationDataInterface):
 
         tgt_info = self.target_info()
 
-        with cl.CommandQueue(self.cl_context) as queue:
+        with cl.CommandQueue(self.array_context.context) as queue:
             target_side_preferences = cl.array.empty(
                     queue, tgt_info.ntargets, np.int8)
             target_side_preferences[:self.ncenters] = 0
@@ -500,7 +518,7 @@ class QBXFMMGeometryData(FMMLibRotationDataInterface):
             for tdstart, (target_discr, qbx_side) in \
                     zip(tgt_info.target_discr_starts,
                             self.target_discrs_and_qbx_sides):
-                target_side_preferences[tdstart:tdstart+target_discr.nnodes] \
+                target_side_preferences[tdstart:tdstart+target_discr.ndofs] \
                     = qbx_side
 
             return target_side_preferences.with_queue(None)
@@ -521,52 +539,54 @@ class QBXFMMGeometryData(FMMLibRotationDataInterface):
         lpot_source = self.lpot_source
         target_info = self.target_info()
 
-        with cl.CommandQueue(self.cl_context) as queue:
-            from pytential import sym
-            quad_stage2_discr = self.places.get_discretization(
-                    self.source_dd.geometry, sym.QBX_SOURCE_QUAD_STAGE2)
-
-            nsources = quad_stage2_discr.nnodes
-            nparticles = nsources + target_info.ntargets
-
-            target_radii = None
-            if lpot_source._expansions_in_tree_have_extent:
-                target_radii = cl.array.zeros(queue, target_info.ntargets,
-                        self.coord_dtype)
-                target_radii[:self.ncenters] = self.expansion_radii()
-
-            refine_weights = cl.array.empty(queue, nparticles, dtype=np.int32)
-
-            # Assign a weight of 1 to all sources, QBX centers, and conventional
-            # (non-QBX) targets. Assign a weight of 0 to all targets that need
-            # QBX centers. The potential at the latter targets is mediated
-            # entirely by the QBX center, so as a matter of evaluation cost,
-            # their location in the tree is irrelevant.
-            refine_weights[:-target_info.ntargets] = 1
-            user_ttc = self.user_target_to_center().with_queue(queue)
-            refine_weights[-target_info.ntargets:] = (
-                    user_ttc == target_state.NO_QBX_NEEDED).astype(np.int32)
-
-            refine_weights.finish()
-
-            tree, _ = code_getter.build_tree()(queue,
-                    particles=quad_stage2_discr.nodes(),
-                    targets=target_info.targets,
-                    target_radii=target_radii,
-                    max_leaf_refine_weight=lpot_source._max_leaf_refine_weight,
-                    refine_weights=refine_weights,
-                    debug=self.debug,
-                    stick_out_factor=lpot_source._expansion_stick_out_factor,
-                    extent_norm=lpot_source._box_extent_norm,
-                    kind=self.tree_kind)
+        queue = self.array_context.queue
 
-            if self.debug:
-                tgt_count_2 = cl.array.sum(
-                        tree.box_target_counts_nonchild, queue=queue).get()
+        from pytential import sym
+        quad_stage2_discr = self.places.get_discretization(
+                self.source_dd.geometry, sym.QBX_SOURCE_QUAD_STAGE2)
 
-                assert (tree.ntargets == tgt_count_2), (tree.ntargets, tgt_count_2)
+        nsources = sum(grp.ndofs for grp in quad_stage2_discr.groups)
+        nparticles = nsources + target_info.ntargets
 
-            return tree
+        target_radii = None
+        if lpot_source._expansions_in_tree_have_extent:
+            target_radii = cl.array.zeros(queue, target_info.ntargets,
+                    self.coord_dtype)
+            target_radii[:self.ncenters] = self.flat_expansion_radii()
+
+        refine_weights = cl.array.empty(queue, nparticles, dtype=np.int32)
+
+        # Assign a weight of 1 to all sources, QBX centers, and conventional
+        # (non-QBX) targets. Assign a weight of 0 to all targets that need
+        # QBX centers. The potential at the latter targets is mediated
+        # entirely by the QBX center, so as a matter of evaluation cost,
+        # their location in the tree is irrelevant.
+        refine_weights[:-target_info.ntargets] = 1
+        user_ttc = self.user_target_to_center().with_queue(queue)
+        refine_weights[-target_info.ntargets:] = (
+                user_ttc == target_state.NO_QBX_NEEDED).astype(np.int32)
+
+        refine_weights.finish()
+
+        tree, _ = code_getter.build_tree()(queue,
+                particles=flatten(thaw(
+                    self.array_context, quad_stage2_discr.nodes())),
+                targets=target_info.targets,
+                target_radii=target_radii,
+                max_leaf_refine_weight=lpot_source._max_leaf_refine_weight,
+                refine_weights=refine_weights,
+                debug=self.debug,
+                stick_out_factor=lpot_source._expansion_stick_out_factor,
+                extent_norm=lpot_source._box_extent_norm,
+                kind=self.tree_kind)
+
+        if self.debug:
+            tgt_count_2 = cl.array.sum(
+                    tree.box_target_counts_nonchild, queue=queue).get()
+
+            assert (tree.ntargets == tgt_count_2), (tree.ntargets, tgt_count_2)
+
+        return tree
 
     # }}}
 
@@ -761,31 +781,32 @@ class QBXFMMGeometryData(FMMLibRotationDataInterface):
 
         from pytential.target import PointsTarget
 
-        with cl.CommandQueue(self.cl_context) as queue:
-            target_side_prefs = (self
-                    .target_side_preferences()[self.ncenters:].get(queue=queue))
-
-            target_discrs_and_qbx_sides = [(
-                    PointsTarget(target_info.targets[:, self.ncenters:]),
-                    target_side_prefs.astype(np.int32))]
-
-            target_association_wrangler = (
-                    self.lpot_source.target_association_code_container
-                    .get_wrangler(queue))
-
-            tgt_assoc_result = associate_targets_to_qbx_centers(
-                    self.places,
-                    self.source_dd,
-                    target_association_wrangler,
-                    target_discrs_and_qbx_sides,
-                    target_association_tolerance=(
-                        self.target_association_tolerance),
-                    debug=self.debug)
-
-            result = cl.array.empty(queue, target_info.ntargets,
-                    tgt_assoc_result.target_to_center.dtype)
-            result[:self.ncenters].fill(target_state.NO_QBX_NEEDED)
-            result[self.ncenters:] = tgt_assoc_result.target_to_center
+        queue = self.array_context.queue
+
+        target_side_prefs = (self
+                .target_side_preferences()[self.ncenters:].get(queue=queue))
+
+        target_discrs_and_qbx_sides = [(
+                PointsTarget(target_info.targets[:, self.ncenters:]),
+                target_side_prefs.astype(np.int32))]
+
+        target_association_wrangler = (
+                self.lpot_source.target_association_code_container
+                .get_wrangler(self.array_context))
+
+        tgt_assoc_result = associate_targets_to_qbx_centers(
+                self.places,
+                self.source_dd,
+                target_association_wrangler,
+                target_discrs_and_qbx_sides,
+                target_association_tolerance=(
+                    self.target_association_tolerance),
+                debug=self.debug)
+
+        result = cl.array.empty(queue, target_info.ntargets,
+                tgt_assoc_result.target_to_center.dtype)
+        result[:self.ncenters].fill(target_state.NO_QBX_NEEDED)
+        result[self.ncenters:] = tgt_assoc_result.target_to_center
 
         return result.with_queue(None)
 
@@ -918,7 +939,7 @@ class QBXFMMGeometryData(FMMLibRotationDataInterface):
 
             # {{{ draw centers and circles
 
-            centers = self.centers()
+            centers = self.flat_centers()
             centers = [
                     centers[0].get(queue),
                     centers[1].get(queue)]
@@ -937,7 +958,7 @@ class QBXFMMGeometryData(FMMLibRotationDataInterface):
             if draw_circles:
                 for icenter, (cx, cy, r) in enumerate(zip(
                         centers[0], centers[1],
-                        self.expansion_radii().get(queue))):
+                        self.flat_expansion_radii().get(queue))):
                     ax.add_artist(
                             pt.Circle((cx, cy), r, fill=False, ls="dotted", lw=1))
 
diff --git a/pytential/qbx/refinement.py b/pytential/qbx/refinement.py
index b3c28ee6e7c843ced0a39bc27da6e2a1b503d3f2..dbcfe657b650d46e9d407e30097a1339afc11bed 100644
--- a/pytential/qbx/refinement.py
+++ b/pytential/qbx/refinement.py
@@ -1,5 +1,5 @@
 # -*- coding: utf-8 -*-
-from __future__ import division, absolute_import, print_function
+from __future__ import annotations
 
 __copyright__ = """
 Copyright (C) 2013 Andreas Kloeckner
@@ -29,6 +29,8 @@ THE SOFTWARE.
 
 import loopy as lp
 from loopy.version import MOST_RECENT_LANGUAGE_VERSION
+from meshmode.array_context import PyOpenCLArrayContext
+from meshmode.dof_array import flatten
 import numpy as np
 import pyopencl as cl
 
@@ -219,8 +221,8 @@ SUFFICIENT_SOURCE_QUADRATURE_RESOLUTION_CHECKER = AreaQueryElementwiseTemplate(
 
 class RefinerCodeContainer(TreeCodeContainerMixin):
 
-    def __init__(self, cl_context, tree_code_container):
-        self.cl_context = cl_context
+    def __init__(self, actx: PyOpenCLArrayContext, tree_code_container):
+        self.array_context = actx
         self.tree_code_container = tree_code_container
 
     @memoize_method
@@ -228,7 +230,7 @@ class RefinerCodeContainer(TreeCodeContainerMixin):
             self, dimensions, coord_dtype, box_id_dtype, peer_list_idx_dtype,
             particle_id_dtype, max_levels):
         return EXPANSION_DISK_UNDISTURBED_BY_SOURCES_CHECKER.generate(
-                self.cl_context,
+                self.array_context.context,
                 dimensions, coord_dtype, box_id_dtype, peer_list_idx_dtype,
                 max_levels,
                 extra_type_aliases=(("particle_id_t", particle_id_dtype),))
@@ -238,7 +240,7 @@ class RefinerCodeContainer(TreeCodeContainerMixin):
             self, dimensions, coord_dtype, box_id_dtype, peer_list_idx_dtype,
             particle_id_dtype, max_levels):
         return SUFFICIENT_SOURCE_QUADRATURE_RESOLUTION_CHECKER.generate(
-                self.cl_context,
+                self.array_context.context,
                 dimensions, coord_dtype, box_id_dtype, peer_list_idx_dtype,
                 max_levels,
                 extra_type_aliases=(("particle_id_t", particle_id_dtype),))
@@ -268,11 +270,11 @@ class RefinerCodeContainer(TreeCodeContainerMixin):
         knl = lp.split_iname(knl, "ielement", 128, inner_tag="l.0", outer_tag="g.0")
         return knl
 
-    def get_wrangler(self, queue):
+    def get_wrangler(self):
         """
         :arg queue:
         """
-        return RefinerWrangler(self, queue)
+        return RefinerWrangler(self.array_context, self)
 
 # }}}
 
@@ -309,9 +311,10 @@ class RefinerWrangler(TreeWranglerBase):
         unwrap_args = AreaQueryElementwiseTemplate.unwrap_args
 
         from pytential import bind, sym
-        center_danger_zone_radii = bind(stage1_density_discr,
+        center_danger_zone_radii = flatten(
+            bind(stage1_density_discr,
                 sym.expansion_radii(stage1_density_discr.ambient_dim,
-                    granularity=sym.GRANULARITY_CENTER))(self.queue)
+                    granularity=sym.GRANULARITY_CENTER))(self.array_context))
 
         evt = knl(
             *unwrap_args(
@@ -367,9 +370,11 @@ class RefinerWrangler(TreeWranglerBase):
 
         from pytential import bind, sym
         dd = sym.as_dofdesc(sym.GRANULARITY_ELEMENT).to_stage2()
-        source_danger_zone_radii_by_panel = bind(stage2_density_discr,
-                sym._source_danger_zone_radii(
-                    stage2_density_discr.ambient_dim, dofdesc=dd))(self.queue)
+        source_danger_zone_radii_by_panel = flatten(
+                bind(stage2_density_discr,
+                    sym._source_danger_zone_radii(
+                        stage2_density_discr.ambient_dim, dofdesc=dd))
+                (self.array_context))
         unwrap_args = AreaQueryElementwiseTemplate.unwrap_args
 
         evt = knl(
@@ -407,6 +412,10 @@ class RefinerWrangler(TreeWranglerBase):
         if debug:
             npanels_to_refine_prev = cl.array.sum(refine_flags).get()
 
+        from pytential.utils import flatten_if_needed
+        element_property = flatten_if_needed(
+                self.array_context, element_property)
+
         evt, out = knl(self.queue,
                        element_property=element_property,
                        refine_flags=refine_flags,
@@ -436,8 +445,10 @@ class RefinerWrangler(TreeWranglerBase):
 
         with ProcessLogger(logger, "refine mesh"):
             refiner.refine(refine_flags)
-            from meshmode.discretization.connection import make_refinement_connection
-            conn = make_refinement_connection(refiner, density_discr, factory)
+            from meshmode.discretization.connection import (
+                    make_refinement_connection)
+            conn = make_refinement_connection(
+                    self.array_context, refiner, density_discr, factory)
 
         return conn
 
@@ -485,7 +496,7 @@ def _warn_max_iterations(violated_criteria, expansion_disturbance_tolerance):
             RefinerNotConvergedWarning)
 
 
-def _visualize_refinement(queue, discr,
+def _visualize_refinement(actx: PyOpenCLArrayContext, discr,
         niter, stage_nr, stage_name, flags, visualize=False):
     if not visualize:
         return
@@ -498,18 +509,18 @@ def _visualize_refinement(queue, discr,
             stage_name, np.sum(flags), discr.mesh.nelements, stage_nr)
 
     from meshmode.discretization.visualization import make_visualizer
-    vis = make_visualizer(queue, discr, 3)
+    vis = make_visualizer(actx, discr, 3)
 
     assert len(flags) == discr.mesh.nelements
 
     flags = flags.astype(np.bool)
-    nodes_flags = np.zeros(discr.nnodes)
+    nodes_flags = np.zeros(discr.ndofs)
     for grp in discr.groups:
         meg = grp.mesh_el_group
         grp.view(nodes_flags)[
                 flags[meg.element_nr_base:meg.nelements+meg.element_nr_base]] = 1
 
-    nodes_flags = cl.array.to_device(queue, nodes_flags)
+    nodes_flags = actx.from_numpy(nodes_flags)
     vis_data = [
         ("refine_flags", nodes_flags),
         ]
@@ -517,7 +528,7 @@ def _visualize_refinement(queue, discr,
     if 0:
         from pytential import sym, bind
         bdry_normals = bind(discr, sym.normal(discr.ambient_dim))(
-                queue).as_vector(dtype=object)
+                actx).as_vector(dtype=object)
         vis_data.append(("bdry_normals", bdry_normals),)
 
     vis.write_vtk_file("refinement-%s-%03d.vtu" % (stage_name, niter), vis_data)
@@ -529,7 +540,7 @@ def _make_quad_stage2_discr(lpot_source, stage2_density_discr):
             QuadratureSimplexGroupFactory
 
     return Discretization(
-            lpot_source.cl_context,
+            lpot_source._setup_actx,
             stage2_density_discr.mesh,
             QuadratureSimplexGroupFactory(lpot_source.fine_order),
             lpot_source.real_dtype)
@@ -583,6 +594,8 @@ def _refine_qbx_stage1(lpot_source, density_discr,
     iter_violated_criteria = ["start"]
     niter = 0
 
+    actx = wrangler.array_context
+
     stage1_density_discr = density_discr
     while iter_violated_criteria:
         iter_violated_criteria = []
@@ -602,7 +615,7 @@ def _refine_qbx_stage1(lpot_source, density_discr,
 
                 quad_resolution = bind(stage1_density_discr,
                         sym._quad_resolution(stage1_density_discr.ambient_dim,
-                            dofdesc=sym.GRANULARITY_ELEMENT))(wrangler.queue)
+                            dofdesc=sym.GRANULARITY_ELEMENT))(actx)
 
                 violates_kernel_length_scale = \
                         wrangler.check_element_prop_threshold(
@@ -612,7 +625,7 @@ def _refine_qbx_stage1(lpot_source, density_discr,
 
                 if violates_kernel_length_scale:
                     iter_violated_criteria.append("kernel length scale")
-                    _visualize_refinement(wrangler.queue, stage1_density_discr,
+                    _visualize_refinement(actx, stage1_density_discr,
                             niter, 1, "kernel-length-scale", refine_flags,
                             visualize=visualize)
 
@@ -622,7 +635,7 @@ def _refine_qbx_stage1(lpot_source, density_discr,
                 scaled_max_curv = bind(stage1_density_discr,
                     sym.ElementwiseMax(sym._scaled_max_curvature(
                         stage1_density_discr.ambient_dim),
-                        dofdesc=sym.GRANULARITY_ELEMENT))(wrangler.queue)
+                        dofdesc=sym.GRANULARITY_ELEMENT))(actx)
 
                 violates_scaled_max_curv = \
                         wrangler.check_element_prop_threshold(
@@ -764,7 +777,8 @@ def _refine_qbx_stage2(lpot_source, stage1_density_discr,
 def _refine_qbx_quad_stage2(lpot_source, stage2_density_discr):
     from meshmode.discretization.connection import make_same_mesh_connection
     discr = _make_quad_stage2_discr(lpot_source, stage2_density_discr)
-    conn = make_same_mesh_connection(discr, stage2_density_discr)
+    conn = make_same_mesh_connection(
+            lpot_source._setup_actx, discr, stage2_density_discr)
 
     return discr, conn
 
@@ -898,7 +912,7 @@ def _refine_for_global_qbx(places, dofdesc, wrangler,
 
 # {{{ refine_geometry_collection
 
-def refine_geometry_collection(queue, places,
+def refine_geometry_collection(places,
         group_factory=None,
         refine_discr_stage=None,
         kernel_length_scale=None,
@@ -945,7 +959,7 @@ def refine_geometry_collection(queue, places,
             continue
 
         _refine_for_global_qbx(places, dofdesc,
-                lpot_source.refiner_code_container.get_wrangler(queue),
+                lpot_source.refiner_code_container.get_wrangler(),
                 group_factory=group_factory,
                 kernel_length_scale=kernel_length_scale,
                 scaled_max_curvature_threshold=scaled_max_curvature_threshold,
diff --git a/pytential/qbx/target_assoc.py b/pytential/qbx/target_assoc.py
index 39b226ab491e72adfe1dba8ec714d73f090779f6..64aecae6de399c5bf8acf0d9fc7e78ee3f537938 100644
--- a/pytential/qbx/target_assoc.py
+++ b/pytential/qbx/target_assoc.py
@@ -36,6 +36,8 @@ from boxtree.tools import DeviceDataRecord
 from boxtree.area_query import AreaQueryElementwiseTemplate
 from boxtree.tools import InlineBinarySearch
 from cgen import Enum
+from meshmode.array_context import PyOpenCLArrayContext
+from meshmode.dof_array import flatten
 from pytential.qbx.utils import (
     QBX_TREE_C_PREAMBLE, QBX_TREE_MAKO_DEFS, TreeWranglerBase,
     TreeCodeContainerMixin)
@@ -444,10 +446,14 @@ class QBXTargetAssociation(DeviceDataRecord):
 
 class TargetAssociationCodeContainer(TreeCodeContainerMixin):
 
-    def __init__(self, cl_context, tree_code_container):
-        self.cl_context = cl_context
+    def __init__(self, actx: PyOpenCLArrayContext, tree_code_container):
+        self.array_context = actx
         self.tree_code_container = tree_code_container
 
+    @property
+    def cl_context(self):
+        return self.array_context.context
+
     @memoize_method
     def target_marker(self, dimensions, coord_dtype, box_id_dtype,
             peer_list_idx_dtype, particle_id_dtype, max_levels):
@@ -489,8 +495,8 @@ class TargetAssociationCodeContainer(TreeCodeContainerMixin):
         from boxtree.area_query import SpaceInvaderQueryBuilder
         return SpaceInvaderQueryBuilder(self.cl_context)
 
-    def get_wrangler(self, queue):
-        return TargetAssociationWrangler(self, queue)
+    def get_wrangler(self, actx: PyOpenCLArrayContext):
+        return TargetAssociationWrangler(actx, code_container=self)
 
 
 class TargetAssociationWrangler(TreeWranglerBase):
@@ -521,9 +527,11 @@ class TargetAssociationWrangler(TreeWranglerBase):
         source_slice = tree.sorted_target_ids[tree.qbx_user_source_slice]
         sources = [
                 axis.with_queue(self.queue)[source_slice] for axis in tree.sources]
-        tunnel_radius_by_source = bind(places,
-                sym._close_target_tunnel_radii(ambient_dim, dofdesc=dofdesc))(
-                        self.queue)
+
+        tunnel_radius_by_source = flatten(
+                bind(places,
+                    sym._close_target_tunnel_radii(ambient_dim, dofdesc=dofdesc))
+                (self.array_context))
 
         # Target-marking algorithm (TGTMARK):
         #
@@ -620,9 +628,9 @@ class TargetAssociationWrangler(TreeWranglerBase):
         expansion_radii_by_center = bind(places, sym.expansion_radii(
             ambient_dim,
             granularity=sym.GRANULARITY_CENTER,
-            dofdesc=dofdesc))(self.queue)
-        expansion_radii_by_center_with_tolerance = \
-                expansion_radii_by_center * (1 + target_association_tolerance)
+            dofdesc=dofdesc))(self.array_context)
+        expansion_radii_by_center_with_tolerance = flatten(
+                expansion_radii_by_center * (1 + target_association_tolerance))
 
         # Idea:
         #
@@ -716,11 +724,13 @@ class TargetAssociationWrangler(TreeWranglerBase):
         source_slice = tree.user_source_ids[tree.qbx_user_source_slice]
         sources = [
                 axis.with_queue(self.queue)[source_slice] for axis in tree.sources]
-        tunnel_radius_by_source = bind(places,
-                sym._close_target_tunnel_radii(ambient_dim, dofdesc=dofdesc))(
-                        self.queue)
 
-        # See (TGTMARK) above for algorithm.
+        tunnel_radius_by_source = flatten(
+                bind(places,
+                    sym._close_target_tunnel_radii(ambient_dim, dofdesc=dofdesc))
+                (self.array_context))
+
+        # see (TGTMARK) above for algorithm.
 
         box_to_search_dist, evt = self.code_container.space_invader_query()(
                 self.queue,
@@ -731,10 +741,6 @@ class TargetAssociationWrangler(TreeWranglerBase):
                 wait_for=wait_for)
         wait_for = [evt]
 
-        tunnel_radius_by_source = bind(places,
-                sym._close_target_tunnel_radii(ambient_dim, dofdesc=dofdesc))(
-                        self.queue)
-
         evt = knl(
             *unwrap_args(
                 tree, peer_lists,
@@ -767,17 +773,17 @@ class TargetAssociationWrangler(TreeWranglerBase):
         return (found_panel_to_refine == 1).all().get()
 
     def make_target_flags(self, target_discrs_and_qbx_sides):
-        ntargets = sum(discr.nnodes for discr, _ in target_discrs_and_qbx_sides)
+        ntargets = sum(discr.ndofs for discr, _ in target_discrs_and_qbx_sides)
         target_flags = cl.array.empty(self.queue, ntargets, dtype=np.int32)
         offset = 0
 
         for discr, flags in target_discrs_and_qbx_sides:
             if np.isscalar(flags):
-                target_flags[offset:offset + discr.nnodes].fill(flags)
+                target_flags[offset:offset + discr.ndofs].fill(flags)
             else:
-                assert len(flags) == discr.nnodes
-                target_flags[offset:offset + discr.nnodes] = flags
-            offset += discr.nnodes
+                assert len(flags) == discr.ndofs
+                target_flags[offset:offset + discr.ndofs] = flags
+            offset += discr.ndofs
 
         target_flags.finish()
         return target_flags
diff --git a/pytential/qbx/utils.py b/pytential/qbx/utils.py
index b872152a2002ddec5678073e15255c25842db4ed..da804df5cf39219d8b4162fdbaaae1c115042d99 100644
--- a/pytential/qbx/utils.py
+++ b/pytential/qbx/utils.py
@@ -1,5 +1,5 @@
 # -*- coding: utf-8 -*-
-from __future__ import division, absolute_import, print_function
+from __future__ import annotations
 
 __copyright__ = """
 Copyright (C) 2016 Matt Wala
@@ -28,6 +28,7 @@ THE SOFTWARE.
 
 import numpy as np
 from boxtree.tree import Tree
+from meshmode.array_context import PyOpenCLArrayContext
 import pyopencl as cl
 import pyopencl.array # noqa
 from pytools import memoize_method
@@ -72,23 +73,23 @@ QBX_TREE_MAKO_DEFS = r"""//CL:mako//
 
 class TreeCodeContainer(object):
 
-    def __init__(self, cl_context):
-        self.cl_context = cl_context
+    def __init__(self, actx: PyOpenCLArrayContext):
+        self.array_context = actx
 
     @memoize_method
     def build_tree(self):
         from boxtree.tree_build import TreeBuilder
-        return TreeBuilder(self.cl_context)
+        return TreeBuilder(self.array_context.context)
 
     @memoize_method
     def peer_list_finder(self):
         from boxtree.area_query import PeerListFinder
-        return PeerListFinder(self.cl_context)
+        return PeerListFinder(self.array_context.context)
 
     @memoize_method
     def particle_list_filter(self):
         from boxtree.tree import ParticleListFilter
-        return ParticleListFilter(self.cl_context)
+        return ParticleListFilter(self.array_context.context)
 
 # }}}
 
@@ -116,9 +117,13 @@ class TreeCodeContainerMixin(object):
 
 class TreeWranglerBase(object):
 
-    def __init__(self, code_container, queue):
+    def __init__(self, array_context: PyOpenCLArrayContext, code_container):
         self.code_container = code_container
-        self.queue = queue
+        self.array_context = array_context
+
+    @property
+    def queue(self):
+        return self.array_context.queue
 
     def build_tree(self, places, targets_list=(), sources_list=(),
                    use_stage2_discr=False):
@@ -126,7 +131,7 @@ class TreeWranglerBase(object):
         plfilt = self.code_container.particle_list_filter()
 
         return build_tree_with_qbx_metadata(
-                self.queue, places, tb, plfilt,
+                self.array_context, places, tb, plfilt,
                 sources_list=sources_list,
                 targets_list=targets_list,
                 use_stage2_discr=use_stage2_discr)
@@ -226,8 +231,8 @@ MAX_REFINE_WEIGHT = 64
 
 
 @log_process(logger)
-def build_tree_with_qbx_metadata(queue, places,
-        tree_builder, particle_list_filter,
+def build_tree_with_qbx_metadata(actx: PyOpenCLArrayContext,
+        places, tree_builder, particle_list_filter,
         sources_list=(), targets_list=(),
         use_stage2_discr=False):
     """Return a :class:`TreeWithQBXMetadata` built from the given layer
@@ -240,7 +245,7 @@ def build_tree_with_qbx_metadata(queue, places,
          :class:`~pytential.symbolic.primitives.QBX_SOURCE_STAGE1`.
        * targets from ``targets_list``.
 
-    :arg queue: An instance of :class:`pyopencl.CommandQueue`
+    :arg actx: A :class:`PyOpenCLArrayContext`
     :arg places: An instance of
         :class:`~pytential.symbolic.execution.GeometryCollection`.
     :arg targets_list: A list of :class:`pytential.target.TargetBase`
@@ -274,15 +279,20 @@ def build_tree_with_qbx_metadata(queue, places,
 
     def _make_centers(discr):
         return bind(discr, sym.interleaved_expansion_centers(
-            discr.ambient_dim))(queue)
+            discr.ambient_dim))(actx)
 
     stage1_density_discr = stage1_density_discrs[0]
     density_discr = density_discrs[0]
 
-    sources = density_discr.nodes()
-    centers = _make_centers(stage1_density_discr)
-    targets = (tgt.nodes() for tgt in targets_list)
+    from meshmode.dof_array import flatten, thaw
+    from pytential.utils import flatten_if_needed
+    sources = flatten(thaw(actx, density_discr.nodes()))
+    centers = flatten(_make_centers(stage1_density_discr))
+    targets = [
+            flatten_if_needed(actx, tgt.nodes())
+            for tgt in targets_list]
 
+    queue = actx.queue
     particles = tuple(
             cl.array.concatenate(dim_coords, queue=queue)
             for dim_coords in zip(sources, centers, *targets))
@@ -294,7 +304,7 @@ def build_tree_with_qbx_metadata(queue, places,
     ncenters = len(centers[0])
     # Each source gets an interior / exterior center.
     assert 2 * nsources == ncenters or use_stage2_discr
-    ntargets = sum(tgt.nnodes for tgt in targets_list)
+    ntargets = sum(tgt.ndofs for tgt in targets_list)
 
     # Slices
     qbx_user_source_slice = slice(0, nsources)
@@ -333,7 +343,7 @@ def build_tree_with_qbx_metadata(queue, places,
         box_to_class = (
             particle_list_filter
             .filter_target_lists_in_user_order(queue, tree, flags)
-            .with_queue(queue))
+            .with_queue(actx.queue))
 
         if fixup:
             box_to_class.target_lists += fixup
@@ -347,12 +357,14 @@ def build_tree_with_qbx_metadata(queue, places,
     qbx_panel_to_source_starts = cl.array.empty(
             queue, npanels + 1, dtype=tree.particle_id_dtype)
     el_offset = 0
+    node_nr_base = 0
     for group in density_discr.groups:
         qbx_panel_to_source_starts[el_offset:el_offset + group.nelements] = \
-                cl.array.arange(queue, group.node_nr_base,
-                                group.node_nr_base + group.nnodes,
-                                group.nunit_nodes,
+                cl.array.arange(queue, node_nr_base,
+                                node_nr_base + group.ndofs,
+                                group.nunit_dofs,
                                 dtype=tree.particle_id_dtype)
+        node_nr_base += group.ndofs
         el_offset += group.nelements
     qbx_panel_to_source_starts[-1] = nsources
 
@@ -418,11 +430,11 @@ class ToHostTransferredGeoDataWrapper(FMMLibRotationDataInterface):
     def centers(self):
         return np.array([
             ci.get(queue=self.queue)
-            for ci in self.geo_data.centers()])
+            for ci in self.geo_data.flat_centers()])
 
     @memoize_method
     def expansion_radii(self):
-        return self.geo_data.expansion_radii().get(queue=self.queue)
+        return self.geo_data.flat_expansion_radii().get(queue=self.queue)
 
     @memoize_method
     def global_qbx_centers(self):
diff --git a/pytential/solve.py b/pytential/solve.py
index c2e932432b8db59a2734d0c8225f1edd500f84bc..9a12cff47b4a60c30b765e6ced3f6c05b6fd04b0 100644
--- a/pytential/solve.py
+++ b/pytential/solve.py
@@ -22,9 +22,6 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 THE SOFTWARE.
 """
 
-
-from six.moves import range
-
 __doc__ = """
 
 .. autofunction:: gmres
@@ -37,52 +34,24 @@ __doc__ = """
 """
 
 
-def get_array_module(vec):
-    try:
-        from pyopencl.tools import array_module
-        from pytools.obj_array import is_obj_array
-        if is_obj_array(vec):
-            return array_module(vec[0])
-        else:
-            return array_module(vec)
-    except ImportError:
-        return np
-
-
-# {{{ block system support
-
-class VectorChopper(object):
-    def __init__(self, structured_vec):
-        from pytools.obj_array import is_obj_array
-        self.is_structured = is_obj_array(structured_vec)
-        self.array_module = get_array_module(structured_vec)
-
-        if self.is_structured:
-            self.slices = []
-            num_dofs = 0
-            for entry in structured_vec:
-                if isinstance(entry, self.array_module.ndarray):
-                    length = len(entry)
-                else:
-                    length = 1
-
-                self.slices.append(slice(num_dofs, num_dofs+length))
-                num_dofs += length
-
-    def stack(self, vec):
-        if not self.is_structured:
-            return vec
-
-        return self.array_module.hstack(vec)
-
-    def chop(self, vec):
-        if not self.is_structured:
-            return vec
-
-        from pytools.obj_array import make_obj_array
-        return make_obj_array([vec[slc] for slc in self.slices])
-
-# }}}
+import numpy as np
+from numbers import Number
+import pyopencl as cl
+import pyopencl.array  # noqa
+from pytools.obj_array import obj_array_vectorize_n_args
+
+
+def structured_vdot(x, y):
+    # vdot() implementation that is aware of scalars and host or
+    # PyOpenCL arrays. It also recurses down nested object arrays.
+    if (isinstance(x, Number)
+            or (isinstance(x, np.ndarray) and x.dtype.char != "O")):
+        return np.vdot(x, y)
+    elif isinstance(x, cl.array.Array):
+        return cl.array.vdot(x, y).get()
+    else:
+        assert isinstance(x, np.ndarray) and x.dtype.char == "O"
+        return sum(obj_array_vectorize_n_args(structured_vdot, x, y))
 
 
 # {{{ gmres
@@ -92,7 +61,6 @@ class VectorChopper(object):
 # Necessary because SciPy gmres is not reentrant and thus does
 # not allow recursive solves.
 
-import numpy as np
 from pytools import Record
 
 
@@ -274,11 +242,8 @@ def _gmres(A, b, restart=None, tol=None, x0=None, dot=None,  # noqa
 # {{{ progress reporting
 
 class ResidualPrinter:
-    def __init__(self, inner_product=None):
+    def __init__(self, inner_product=structured_vdot):
         self.count = 0
-        if inner_product is None:
-            inner_product = np.vdot
-
         self.inner_product = inner_product
 
     def __call__(self, resid):
@@ -298,7 +263,7 @@ class ResidualPrinter:
 # {{{ entrypoint
 
 def gmres(op, rhs, restart=None, tol=None, x0=None,
-        inner_product=None,
+        inner_product=structured_vdot,
         maxiter=None, hard_failure=None,
         no_progress_factor=None, stall_iterations=None,
         callback=None, progress=False, require_monotonicity=True):
@@ -320,32 +285,20 @@ def gmres(op, rhs, restart=None, tol=None, x0=None,
 
     :return: a :class:`GMRESResult`
     """
-    amod = get_array_module(rhs)
-
-    chopper = VectorChopper(rhs)
-    stacked_rhs = chopper.stack(rhs)
-
-    stacked_x0 = x0
-    if stacked_x0 is not None:
-        stacked_x0 = chopper.stack(stacked_x0)
-
-    if inner_product is None:
-        inner_product = amod.vdot
-
     if callback is None:
         if progress:
             callback = ResidualPrinter(inner_product)
         else:
             callback = None
 
-    result = _gmres(op, stacked_rhs, restart=restart, tol=tol, x0=stacked_x0,
+    result = _gmres(op, rhs, restart=restart, tol=tol, x0=x0,
             dot=inner_product,
             maxiter=maxiter, hard_failure=hard_failure,
             no_progress_factor=no_progress_factor,
             stall_iterations=stall_iterations, callback=callback,
             require_monotonicity=require_monotonicity)
 
-    return result.copy(solution=chopper.chop(result.solution))
+    return result
 
 # }}}
 
@@ -367,10 +320,7 @@ def lu(op, rhs, show_spectrum=False):
         pt.plot(ev.real, ev.imag, "o")
         pt.show()
 
-    chopper = VectorChopper(rhs)
-    return chopper.chop(
-            la.solve(mat,
-                chopper.stack(rhs)))
+    return la.solve(mat, rhs)
 
 # }}}
 
diff --git a/pytential/source.py b/pytential/source.py
index 7ed794abfb2d1fa1ff32d21117e13ccfa3069a8c..19d90ae7a5daeafed5ee2d7ca71ac5345af12324 100644
--- a/pytential/source.py
+++ b/pytential/source.py
@@ -26,7 +26,7 @@ THE SOFTWARE.
 import numpy as np  # noqa: F401
 import pyopencl as cl  # noqa: F401
 import six
-from pytools import memoize_method
+from pytools import memoize_in
 from sumpy.fmm import UnableToCollectTimingData
 
 
@@ -53,23 +53,51 @@ class PotentialSource(object):
     def preprocess_optemplate(self, name, discretizations, expr):
         return expr
 
+    @property
+    def real_dtype(self):
+        raise NotImplementedError
+
+    @property
+    def complex_dtype(self):
+        raise NotImplementedError
+
+    def get_p2p(self, actx, kernels):
+        raise NotImplementedError
+
+
+class _SumpyP2PMixin(object):
+
+    def get_p2p(self, actx, kernels):
+        @memoize_in(actx, (_SumpyP2PMixin, "p2p"))
+        def p2p(kernels):
+            from pytools import any
+            if any(knl.is_complex_valued for knl in kernels):
+                value_dtype = self.complex_dtype
+            else:
+                value_dtype = self.real_dtype
+
+            from sumpy.p2p import P2P
+            return P2P(actx.context,
+                    kernels, exclude_self=False, value_dtypes=value_dtype)
+
+        return p2p(kernels)
+
 
 # {{{ point potential source
 
-class PointPotentialSource(PotentialSource):
+class PointPotentialSource(_SumpyP2PMixin, PotentialSource):
     """
     .. attribute:: nodes
 
-        An :class:`pyopencl.array.Array` of shape ``[ambient_dim, nnodes]``.
+        An :class:`pyopencl.array.Array` of shape ``[ambient_dim, ndofs]``.
 
-    .. attribute:: nnodes
+    .. attribute:: ndofs
 
     .. automethod:: cost_model_compute_potential_insn
     .. automethod:: exec_compute_potential_insn
     """
 
-    def __init__(self, cl_context, nodes):
-        self.cl_context = cl_context
+    def __init__(self, nodes):
         self._nodes = nodes
 
     @property
@@ -88,8 +116,9 @@ class PointPotentialSource(PotentialSource):
         return self._nodes.dtype
 
     @property
-    def nnodes(self):
-        return self._nodes.shape[-1]
+    def ndofs(self):
+        for coord_ary in self._nodes:
+            return coord_ary.shape[0]
 
     @property
     def complex_dtype(self):
@@ -111,27 +140,11 @@ class PointPotentialSource(PotentialSource):
 
         return result
 
-    @memoize_method
-    def get_p2p(self, kernels):
-        # needs to be separate method for caching
-
-        from pytools import any
-        if any(knl.is_complex_valued for knl in kernels):
-            value_dtype = self.complex_dtype
-        else:
-            value_dtype = self.real_dtype
-
-        from sumpy.p2p import P2P
-        p2p = P2P(self.cl_context,
-                    kernels, exclude_self=False, value_dtypes=value_dtype)
-
-        return p2p
-
-    def cost_model_compute_potential_insn(self, queue, insn, bound_expr,
+    def cost_model_compute_potential_insn(self, actx, insn, bound_expr,
                                           evaluate, costs):
         raise NotImplementedError
 
-    def exec_compute_potential_insn(self, queue, insn, bound_expr, evaluate,
+    def exec_compute_potential_insn(self, actx, insn, bound_expr, evaluate,
             return_timing_data):
         if return_timing_data:
             from warnings import warn
@@ -145,33 +158,41 @@ class PointPotentialSource(PotentialSource):
         for arg_name, arg_expr in six.iteritems(insn.kernel_arguments):
             kernel_args[arg_name] = evaluate(arg_expr)
 
-        strengths = evaluate(insn.density).with_queue(queue).copy()
+        strengths = evaluate(insn.density)
 
         # FIXME: Do this all at once
-        result = []
+        results = []
         for o in insn.outputs:
             target_discr = bound_expr.places.get_discretization(
                     o.target_name.geometry, o.target_name.discr_stage)
 
             # no on-disk kernel caching
             if p2p is None:
-                p2p = self.get_p2p(insn.kernels)
+                p2p = self.get_p2p(actx, insn.kernels)
 
-            evt, output_for_each_kernel = p2p(queue,
-                    target_discr.nodes(), self._nodes,
+            from pytential.utils import flatten_if_needed
+            evt, output_for_each_kernel = p2p(actx.queue,
+                    flatten_if_needed(actx, target_discr.nodes()),
+                    self._nodes,
                     [strengths], **kernel_args)
 
-            result.append((o.name, output_for_each_kernel[o.kernel_index]))
+            from meshmode.discretization import Discretization
+            result = output_for_each_kernel[o.kernel_index]
+            if isinstance(target_discr, Discretization):
+                from meshmode.dof_array import unflatten
+                result = unflatten(actx, target_discr, result)
+
+            results.append((o.name, result))
 
         timing_data = {}
-        return result, timing_data
+        return results, timing_data
 
 # }}}
 
 
 # {{{ layer potential source
 
-class LayerPotentialSourceBase(PotentialSource):
+class LayerPotentialSourceBase(_SumpyP2PMixin, PotentialSource):
     """A discretization of a layer potential using panel-based geometry, with
     support for refinement and upsampling.
 
@@ -197,13 +218,17 @@ class LayerPotentialSourceBase(PotentialSource):
     def ambient_dim(self):
         return self.density_discr.ambient_dim
 
+    @property
+    def _setup_actx(self):
+        return self.density_discr._setup_actx
+
     @property
     def dim(self):
         return self.density_discr.dim
 
     @property
     def cl_context(self):
-        return self.density_discr.cl_context
+        return self.density_discr._setup_actx.context
 
     @property
     def real_dtype(self):
@@ -213,22 +238,6 @@ class LayerPotentialSourceBase(PotentialSource):
     def complex_dtype(self):
         return self.density_discr.complex_dtype
 
-    @memoize_method
-    def get_p2p(self, kernels):
-        # needs to be separate method for caching
-
-        from pytools import any
-        if any(knl.is_complex_valued for knl in kernels):
-            value_dtype = self.density_discr.complex_dtype
-        else:
-            value_dtype = self.density_discr.real_dtype
-
-        from sumpy.p2p import P2P
-        p2p = P2P(self.cl_context,
-                  kernels, exclude_self=False, value_dtypes=value_dtype)
-
-        return p2p
-
     # {{{ fmm setup helpers
 
     def get_fmm_kernel(self, kernels):
@@ -252,10 +261,12 @@ class LayerPotentialSourceBase(PotentialSource):
             return self.real_dtype
 
     def get_fmm_expansion_wrangler_extra_kwargs(
-            self, queue, out_kernels, tree_user_source_ids, arguments, evaluator):
+            self, actx, out_kernels, tree_user_source_ids, arguments, evaluator):
         # This contains things like the Helmholtz parameter k or
         # the normal directions for double layers.
 
+        queue = actx.queue
+
         def reorder_sources(source_array):
             if isinstance(source_array, cl.array.Array):
                 return (source_array
@@ -269,15 +280,17 @@ class LayerPotentialSourceBase(PotentialSource):
         source_extra_kwargs = {}
 
         from sumpy.tools import gather_arguments, gather_source_arguments
-        from pytools.obj_array import with_object_array_or_scalar
+        from pytools.obj_array import obj_array_vectorize
+        from pytential.utils import flatten_if_needed
+
         for func, var_dict in [
                 (gather_arguments, kernel_extra_kwargs),
                 (gather_source_arguments, source_extra_kwargs),
                 ]:
             for arg in func(out_kernels):
-                var_dict[arg.name] = with_object_array_or_scalar(
+                var_dict[arg.name] = obj_array_vectorize(
                         reorder_sources,
-                        evaluator(arguments[arg.name]))
+                        flatten_if_needed(actx, evaluator(arguments[arg.name])))
 
         return kernel_extra_kwargs, source_extra_kwargs
 
diff --git a/pytential/symbolic/compiler.py b/pytential/symbolic/compiler.py
index 3fcfbf2ec6eb4c42cb2a6d1af24bdb0bb1eca297..eb01532d96f71dbc7322fc6db0b23b32fc431ada 100644
--- a/pytential/symbolic/compiler.py
+++ b/pytential/symbolic/compiler.py
@@ -270,13 +270,13 @@ def dot_dataflow_graph(code, max_node_label_length=30,
         for dep in insn.get_dependencies():
             gen_expr_arrow(dep, node_names[insn])
 
-    from pytools.obj_array import is_obj_array
+    code_res = code.result
 
-    if is_obj_array(code.result):
-        for subexp in code.result:
+    if isinstance(code_res, np.ndarray) and code_res.dtype.char == "O":
+        for subexp in code_res:
             gen_expr_arrow(subexp, "result")
     else:
-        gen_expr_arrow(code.result, "result")
+        gen_expr_arrow(code_res, "result")
 
     return "digraph dataflow {\n%s\n}\n" % "\n".join(result)
 
@@ -331,7 +331,7 @@ class Code(object):
         discardable_vars = set(available_names) - needed_vars
 
         # {{{ make sure results do not get discarded
-        from pytools.obj_array import with_object_array_or_scalar
+        from pytools.obj_array import obj_array_vectorize
 
         from pytential.symbolic.mappers import DependencyMapper
         dm = DependencyMapper(composite_leaves=False)
@@ -347,7 +347,7 @@ class Code(object):
                 assert isinstance(var, Variable)
                 discardable_vars.discard(var.name)
 
-        with_object_array_or_scalar(remove_result_variable, self.result)
+        obj_array_vectorize(remove_result_variable, self.result)
         # }}}
 
         return argmax2(available_insns), discardable_vars
@@ -387,9 +387,9 @@ class Code(object):
 
                 done_insns.add(insn)
                 assignments = (
-                        self.get_exec_function(insn, exec_mapper)
-                        (exec_mapper.queue, insn, exec_mapper.bound_expr,
-                            exec_mapper))
+                        self.get_exec_function(insn, exec_mapper)(
+                            exec_mapper.array_context,
+                            insn, exec_mapper.bound_expr, exec_mapper))
 
                 assignees = insn.get_assignees()
                 for target, value in assignments:
@@ -412,8 +412,8 @@ class Code(object):
             raise RuntimeError("not all instructions are reachable"
                     "--did you forget to pass a value for a placeholder?")
 
-        from pytools.obj_array import with_object_array_or_scalar
-        return with_object_array_or_scalar(exec_mapper, self.result)
+        from pytools.obj_array import obj_array_vectorize
+        return obj_array_vectorize(exec_mapper, self.result)
 
     # }}}
 
@@ -480,8 +480,8 @@ class OperatorCompiler(IdentityMapper):
 
         # Put the toplevel expressions into variables as well.
 
-        from pytools.obj_array import with_object_array_or_scalar
-        result = with_object_array_or_scalar(self.assign_to_new_var, result)
+        from pytools.obj_array import obj_array_vectorize
+        result = obj_array_vectorize(self.assign_to_new_var, result)
 
         return Code(self.code, result)
 
diff --git a/pytential/symbolic/dof_connection.py b/pytential/symbolic/dof_connection.py
index 9d23fb73d980e08b7bee5bece36c738a688f232d..4863652c6819de52b8d8d14f6758197a471797f1 100644
--- a/pytential/symbolic/dof_connection.py
+++ b/pytential/symbolic/dof_connection.py
@@ -27,12 +27,12 @@ THE SOFTWARE.
 """
 
 import six
-import pyopencl as cl
-import pyopencl.array # noqa
-from pytools import memoize
+from meshmode.array_context import PyOpenCLArrayContext  # noqa
+from meshmode.dof_array import DOFArray
+import numpy as np
+from pytools import memoize_in
 
 import loopy as lp
-from loopy.version import MOST_RECENT_LANGUAGE_VERSION
 
 
 __doc__ = """
@@ -69,7 +69,11 @@ class GranularityConnection(object):
     def to_discr(self):
         return self.discr
 
-    def __call__(self, queue, vec):
+    @property
+    def array_context(self):
+        return self.discr._setup_actx
+
+    def __call__(self, ary):
         raise NotImplementedError()
 
 
@@ -85,61 +89,66 @@ class CenterGranularityConnection(GranularityConnection):
     def __init__(self, discr):
         super(CenterGranularityConnection, self).__init__(discr)
 
-    @memoize
-    def kernel(self):
-        knl = lp.make_kernel(
-            "[srclen, dstlen] -> {[i]: 0 <= i < srclen}",
-            """
-            dst[2*i] = src1[i]
-            dst[2*i + 1] = src2[i]
-            """,
-            [
-                lp.GlobalArg("src1", shape="srclen"),
-                lp.GlobalArg("src2", shape="srclen"),
-                lp.GlobalArg("dst", shape="dstlen"),
-                "..."
-            ],
-            name="node_interleaver_knl",
-            assumptions="2*srclen = dstlen",
-            lang_version=MOST_RECENT_LANGUAGE_VERSION,
-            )
-
-        knl = lp.split_iname(knl, "i", 128,
-                inner_tag="l.0", outer_tag="g.0")
-        return knl
-
-    def __call__(self, queue, vecs):
+    def _interleave_dof_arrays(self, ary1, ary2):
+        if not isinstance(ary1, DOFArray) or not isinstance(ary2, DOFArray):
+            raise TypeError("non-array passed to connection")
+
+        @memoize_in(self.array_context,
+                 (CenterGranularityConnection, "interleave"))
+        def prg():
+            from meshmode.array_context import make_loopy_program
+            return make_loopy_program(
+                    """{[iel, idof]: 0<=iel<nelements and 0<=idof<nunit_dofs}""",
+                    """
+                    dst[iel, 2*idof] = src1[iel, idof]
+                    dst[iel, 2*idof + 1] = src2[iel, idof]
+                    """,
+                    [
+                        lp.GlobalArg("src1", shape="(nelements, nunit_dofs)"),
+                        lp.GlobalArg("src2", shape="(nelements, nunit_dofs)"),
+                        lp.GlobalArg("dst", shape="(nelements, 2*nunit_dofs)"),
+                        "...",
+                        ],
+                    name="interleave")
+
+        results = []
+        for grp, src1, src2 in zip(self.discr.groups, ary1, ary2):
+            if src1.dtype != src2.dtype:
+                raise ValueError("dtype mismatch in inputs")
+            result = self.array_context.empty(
+                    (grp.nelements, 2 * grp.nunit_dofs), dtype=src1.dtype)
+            self.array_context.call_loopy(
+                    prg(), src1=src1, src2=src2, dst=result,
+                    nelements=grp.nelements, nunit_dofs=grp.nunit_dofs)
+            results.append(result)
+        return DOFArray.from_list(self.array_context, results)
+
+    def __call__(self, arys):
         r"""
-        :arg vecs: a single :class:`pyopencl.array.Array` or a pair of arrays.
+        :arg arys: either a single :class:`~meshmode.dof_array.DOFArray`
+            or a list/tuple with exactly 2 entries that are both
+            :class:`~meshmode.dof_array.DOFArray`\ s.
+            Additionally, this function vectorizes over object arrays of
+            :class:`~meshmode.dof_array.DOFArrays`\ s.
+
         :return: an interleaved array or list of :class:`pyopencl.array.Array`s.
             If *vecs* was a pair of arrays :math:`(x, y)`, they are
             interleaved as :math:`[x_1, y_1, x_2, y_2, \ddots, x_n, y_n]`.
             A single array is simply interleaved with itself.
-        """
 
-        if isinstance(vecs, cl.array.Array):
-            vecs = [[vecs], [vecs]]
-        elif isinstance(vecs, (list, tuple)):
-            assert len(vecs) == 2
+        """
+        if isinstance(arys, np.ndarray):
+            arys = (arys, arys)
+        if isinstance(arys, (list, tuple)):
+            assert len(arys) == 2
         else:
-            raise ValueError('cannot interleave arrays')
+            raise ValueError("cannot interleave arrays")
 
-        result = []
-        for src1, src2 in zip(vecs[0], vecs[1]):
-            if not isinstance(src1, cl.array.Array) \
-                    or not isinstance(src2, cl.array.Array):
-                raise TypeError('non-array passed to connection')
-
-            if src1.shape != (self.discr.nnodes,) \
-                    or src2.shape != (self.discr.nnodes,):
-                raise ValueError('invalid shape of incoming array')
-
-            axis = cl.array.empty(queue, 2 * len(src1), src1.dtype)
-            self.kernel()(queue,
-                    src1=src1, src2=src2, dst=axis)
-            result.append(axis)
-
-        return result[0] if len(result) == 1 else result
+        if isinstance(arys[0], DOFArray):
+            return self._interleave_dof_arrays(*arys)
+        else:
+            from pytools.obj_array import obj_array_vectorize_n_args
+            return obj_array_vectorize_n_args(self._interleave_dof_arrays, *arys)
 
 # }}}
 
@@ -190,11 +199,11 @@ class DOFConnection(object):
             self.from_discr = self.connections[0].from_discr
             self.to_discr = self.connections[-1].to_discr
 
-    def __call__(self, queue, vec):
+    def __call__(self, ary):
         for conn in self.connections:
-            vec = conn(queue, vec)
+            ary = conn(ary)
 
-        return vec
+        return ary
 
 
 def connection_from_dds(places, from_dd, to_dd):
diff --git a/pytential/symbolic/execution.py b/pytential/symbolic/execution.py
index 9e3833f07eb8cd407bea197af1af2eb9bf474aca..3beabc2c75457ef05eec31c21bad5bfdcee11536 100644
--- a/pytential/symbolic/execution.py
+++ b/pytential/symbolic/execution.py
@@ -25,6 +25,8 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 THE SOFTWARE.
 """
 
+from typing import Optional
+
 import six
 from six.moves import zip
 
@@ -36,9 +38,11 @@ import pyopencl as cl
 import pyopencl.array  # noqa
 import pyopencl.clmath  # noqa
 
-from loopy.version import MOST_RECENT_LANGUAGE_VERSION
+from meshmode.array_context import PyOpenCLArrayContext
+from meshmode.dof_array import DOFArray, thaw
 
 from pytools import memoize_in
+
 from pytential import sym
 
 import logging
@@ -71,7 +75,7 @@ def mesh_el_view(mesh, group_nr, global_array):
 
 
 class EvaluationMapperBase(PymbolicEvaluationMapper):
-    def __init__(self, bound_expr, queue, context=None,
+    def __init__(self, bound_expr, actx: PyOpenCLArrayContext, context=None,
             target_geometry=None,
             target_points=None, target_normals=None, target_tangents=None):
         if context is None:
@@ -80,93 +84,95 @@ class EvaluationMapperBase(PymbolicEvaluationMapper):
 
         self.bound_expr = bound_expr
         self.places = bound_expr.places
-        self.queue = queue
+        self.array_context = actx
+
+        if not isinstance(actx, PyOpenCLArrayContext):
+            raise NotImplementedError("evaluation with non-PyOpenCL array context")
+
+        self.queue = actx.queue
 
     # {{{ map_XXX
 
-    def _map_minmax(self, func, inherited, expr):
+    def _map_minmax(self, func, inherited_func, expr):
         ev_children = [self.rec(ch) for ch in expr.children]
-        from functools import reduce, partial
-        if any(isinstance(ch, cl.array.Array) for ch in ev_children):
-            return reduce(partial(func, queue=self.queue), ev_children)
+        from functools import reduce
+        from meshmode.dof_array import DOFArray
+        if any(isinstance(ch, (cl.array.Array, DOFArray)) for ch in ev_children):
+            return reduce(func, ev_children)
         else:
-            return inherited(expr)
+            return inherited_func(expr)
 
     def map_max(self, expr):
         return self._map_minmax(
-                cl.array.maximum,
+                self.array_context.np.maximum,
                 super(EvaluationMapperBase, self).map_max,
                 expr)
 
     def map_min(self, expr):
         return self._map_minmax(
-                cl.array.minimum,
+                self.array_context.np.minimum,
                 super(EvaluationMapperBase, self).map_min,
                 expr)
 
     def map_node_sum(self, expr):
-        return cl.array.sum(self.rec(expr.operand)).get()[()]
+        return sum(
+                cl.array.sum(grp_ary).get()[()]
+                for grp_ary in self.rec(expr.operand))
 
     def map_node_max(self, expr):
-        return cl.array.max(self.rec(expr.operand)).get()[()]
+        return max(
+                cl.array.max(grp_ary).get()[()]
+                for grp_ary in self.rec(expr.operand))
 
     def _map_elementwise_reduction(self, reduction_name, expr):
         @memoize_in(self.places, "elementwise_node_"+reduction_name)
         def node_knl():
-            import loopy as lp
-            knl = lp.make_kernel(
-                    """{[el, idof, jdof]:
-                        0<=el<nelements and
-                        0<=idof, jdof<ndofs}
-                    """,
+            from meshmode.array_context import make_loopy_program
+            return make_loopy_program(
+                    """{[iel, idof, jdof]:
+                        0<=iel<nelements and
+                        0<=idof, jdof<ndofs}""",
                     """
-                    result[el, idof] = %s(jdof, operand[el, jdof])
+                    result[iel, idof] = %s(jdof, operand[iel, jdof])
                     """ % reduction_name,
-                    default_offset=lp.auto,
-                    lang_version=MOST_RECENT_LANGUAGE_VERSION)
-
-            knl = lp.tag_inames(knl, "el:g.0,idof:l.0")
-            return knl
+                    name="nodewise_reduce")
 
         @memoize_in(self.places, "elementwise_"+reduction_name)
         def element_knl():
-            import loopy as lp
-            knl = lp.make_kernel(
-                    """{[el, jdof]:
-                        0<=el<nelements and
+            from meshmode.array_context import make_loopy_program
+            return make_loopy_program(
+                    """{[iel, jdof]:
+                        0<=iel<nelements and
                         0<=jdof<ndofs}
                     """,
                     """
-                    result[el] = %s(jdof, operand[el, jdof])
+                    result[iel, 0] = %s(jdof, operand[iel, jdof])
                     """ % reduction_name,
-                    default_offset=lp.auto,
-                    lang_version=MOST_RECENT_LANGUAGE_VERSION)
-
-            return knl
-
-        def _reduce(nresult, knl, view):
-            result = cl.array.empty(self.queue, nresult, operand.dtype)
-            for igrp, group in enumerate(discr.groups):
-                knl(self.queue,
-                        operand=group.view(operand),
-                        result=view(igrp, result))
-
-            return result
+                    name="elementwise_reduce")
 
         discr = self.places.get_discretization(
                 expr.dofdesc.geometry, expr.dofdesc.discr_stage)
         operand = self.rec(expr.operand)
-        assert operand.shape == (discr.nnodes,)
+        assert operand.shape == (len(discr.groups),)
+
+        def _reduce(knl, result):
+            for grp in discr.groups:
+                self.array_context.call_loopy(knl,
+                        operand=operand[grp.index],
+                        result=result[grp.index])
 
+            return result
+
+        dtype = operand.entry_dtype
         granularity = expr.dofdesc.granularity
         if granularity is sym.GRANULARITY_NODE:
-            return _reduce(discr.nnodes,
-                    node_knl(),
-                    lambda g, x: discr.groups[g].view(x))
+            return _reduce(node_knl(),
+                    discr.empty(self.array_context, dtype=dtype))
         elif granularity is sym.GRANULARITY_ELEMENT:
-            return _reduce(discr.mesh.nelements,
-                    element_knl(),
-                    lambda g, x: mesh_el_view(discr.mesh, g, x))
+            result = DOFArray.from_list(self.array_context, [
+                    self.array_context.empty((grp.nelements, 1), dtype=dtype)
+                    for grp in discr.groups])
+            return _reduce(element_knl(), result)
         else:
             raise ValueError('unsupported granularity: %s' % granularity)
 
@@ -182,18 +188,16 @@ class EvaluationMapperBase(PymbolicEvaluationMapper):
     def map_ones(self, expr):
         discr = self.places.get_discretization(
                 expr.dofdesc.geometry, expr.dofdesc.discr_stage)
-        result = (discr
-                .empty(queue=self.queue, dtype=discr.real_dtype)
-                .with_queue(self.queue))
+        result = discr.empty(actx=self.array_context, dtype=discr.real_dtype)
 
-        result.fill(1)
+        for grp_ary in result:
+            grp_ary.fill(1)
         return result
 
     def map_node_coordinate_component(self, expr):
         discr = self.places.get_discretization(
                 expr.dofdesc.geometry, expr.dofdesc.discr_stage)
-        return discr.nodes()[expr.ambient_axis] \
-                .with_queue(self.queue)
+        return thaw(self.array_context, discr.nodes()[expr.ambient_axis])
 
     def map_num_reference_derivative(self, expr):
         discr = self.places.get_discretization(
@@ -201,16 +205,12 @@ class EvaluationMapperBase(PymbolicEvaluationMapper):
 
         from pytools import flatten
         ref_axes = flatten([axis] * mult for axis, mult in expr.ref_axes)
-        return discr.num_reference_derivative(
-                self.queue,
-                ref_axes, self.rec(expr.operand)) \
-                        .with_queue(self.queue)
+        return discr.num_reference_derivative(ref_axes, self.rec(expr.operand))
 
     def map_q_weight(self, expr):
         discr = self.places.get_discretization(
                 expr.dofdesc.geometry, expr.dofdesc.discr_stage)
-        return discr.quad_weights(self.queue) \
-                .with_queue(self.queue)
+        return thaw(self.array_context, discr.quad_weights())
 
     def map_inverse(self, expr):
         bound_op_cache = self.bound_expr.places._get_cache("bound_op")
@@ -236,9 +236,9 @@ class EvaluationMapperBase(PymbolicEvaluationMapper):
     def map_interpolation(self, expr):
         operand = self.rec(expr.operand)
 
-        if isinstance(operand, (cl.array.Array, list)):
+        if isinstance(operand, (cl.array.Array, list, np.ndarray)):
             conn = self.places.get_connection(expr.from_dd, expr.to_dd)
-            return conn(self.queue, operand)
+            return conn(operand)
         elif isinstance(operand, (int, float, complex, np.number)):
             return operand
         else:
@@ -262,28 +262,25 @@ class EvaluationMapperBase(PymbolicEvaluationMapper):
 
     # }}}
 
-    def exec_assign(self, queue, insn, bound_expr, evaluate):
+    def exec_assign(self, actx: PyOpenCLArrayContext, insn, bound_expr, evaluate):
         return [(name, evaluate(expr))
                 for name, expr in zip(insn.names, insn.exprs)]
 
-    def exec_compute_potential_insn(self, queue, insn, bound_expr, evaluate):
+    def exec_compute_potential_insn(
+            self, actx: PyOpenCLArrayContext, insn, bound_expr, evaluate):
         raise NotImplementedError
 
     # {{{ functions
 
     def apply_real(self, args):
-        from pytools.obj_array import is_obj_array
         arg, = args
-        result = self.rec(arg)
-        assert not is_obj_array(result)  # numpy bug with obj_array.imag
-        return result.real
+        from pytools.obj_array import obj_array_real
+        return obj_array_real(self.rec(arg))
 
     def apply_imag(self, args):
-        from pytools.obj_array import is_obj_array
         arg, = args
-        result = self.rec(arg)
-        assert not is_obj_array(result)  # numpy bug with obj_array.imag
-        return result.imag
+        from pytools.obj_array import obj_array_imag
+        return obj_array_imag(self.rec(arg))
 
     def apply_conj(self, args):
         arg, = args
@@ -306,8 +303,7 @@ class EvaluationMapperBase(PymbolicEvaluationMapper):
             if all(isinstance(arg, Number) for arg in args):
                 return getattr(np, expr.function.name)(*args)
             else:
-                return getattr(cl.clmath, expr.function.name)(
-                        *args, queue=self.queue)
+                return getattr(self.array_context.np, expr.function.name)(*args)
 
         else:
             return super(EvaluationMapperBase, self).map_call(expr)
@@ -319,19 +315,20 @@ class EvaluationMapperBase(PymbolicEvaluationMapper):
 
 class EvaluationMapper(EvaluationMapperBase):
 
-    def __init__(self, bound_expr, queue, context=None,
+    def __init__(self, bound_expr, actx, context=None,
             timing_data=None):
-        EvaluationMapperBase.__init__(self, bound_expr, queue, context)
+        EvaluationMapperBase.__init__(self, bound_expr, actx, context)
         self.timing_data = timing_data
 
-    def exec_compute_potential_insn(self, queue, insn, bound_expr, evaluate):
+    def exec_compute_potential_insn(
+            self, actx: PyOpenCLArrayContext, insn, bound_expr, evaluate):
         source = bound_expr.places.get_geometry(insn.source.geometry)
 
         return_timing_data = self.timing_data is not None
 
         result, timing_data = (
                 source.exec_compute_potential_insn(
-                    queue, insn, bound_expr, evaluate, return_timing_data))
+                    actx, insn, bound_expr, evaluate, return_timing_data))
 
         if return_timing_data:
             # The compiler ensures this.
@@ -354,25 +351,26 @@ class CostModelMapper(EvaluationMapperBase):
     data is collected.
     """
 
-    def __init__(self, bound_expr, queue, context=None,
+    def __init__(self, bound_expr, actx, context=None,
             target_geometry=None,
             target_points=None, target_normals=None, target_tangents=None):
         if context is None:
             context = {}
         EvaluationMapperBase.__init__(
-                self, bound_expr, queue, context,
+                self, bound_expr, actx, context,
                 target_geometry,
                 target_points,
                 target_normals,
                 target_tangents)
         self.modeled_cost = {}
 
-    def exec_compute_potential_insn(self, queue, insn, bound_expr, evaluate):
+    def exec_compute_potential_insn(
+            self, actx: PyOpenCLArrayContext, insn, bound_expr, evaluate):
         source = bound_expr.places.get_geometry(insn.source.geometry)
 
         result, cost_model_result = (
                 source.cost_model_compute_potential_insn(
-                    queue, insn, bound_expr, evaluate))
+                    actx, insn, bound_expr, evaluate))
 
         # The compiler ensures this.
         assert insn not in self.modeled_cost
@@ -388,20 +386,25 @@ class CostModelMapper(EvaluationMapperBase):
 
 # {{{ scipy-like mat-vec op
 
-class MatVecOp:
+class MatVecOp(object):
     """A :class:`scipy.sparse.linalg.LinearOperator` work-alike.
     Exposes a :mod:`pytential` operator as a generic matrix operation,
-    i.e. given :math:`x`, compute :math:`Ax`.
+    i.e., given :math:`x`, compute :math:`Ax`.
+
+    .. attribute:: shape
+    .. attribute:: dtype
+    .. automethod:: matvec
     """
 
     def __init__(self,
-            bound_expr, queue, arg_name, dtype, total_dofs,
-            starts_and_ends, extra_args):
+            bound_expr, actx: PyOpenCLArrayContext,
+            arg_name, dtype, total_dofs, discrs, starts_and_ends, extra_args):
         self.bound_expr = bound_expr
-        self.queue = queue
+        self.array_context = actx
         self.arg_name = arg_name
         self.dtype = dtype
         self.total_dofs = total_dofs
+        self.discrs = discrs
         self.starts_and_ends = starts_and_ends
         self.extra_args = extra_args
 
@@ -409,34 +412,70 @@ class MatVecOp:
     def shape(self):
         return (self.total_dofs, self.total_dofs)
 
-    def matvec(self, x):
-        if isinstance(x, np.ndarray):
-            x = cl.array.to_device(self.queue, x)
-            out_host = True
-        else:
-            out_host = False
+    @property
+    def _operator_uses_obj_array(self):
+        return len(self.discrs) > 1
+
+    def flatten(self, ary):
+        # Return a flat version of *ary*. The returned value is suitable for
+        # use with solvers whose API expects a one-dimensional array.
+        if not self._operator_uses_obj_array:
+            ary = [ary]
+
+        result = self.array_context.empty(self.total_dofs, self.dtype)
+        from pytential.utils import flatten_if_needed
+        for res_i, (start, end) in zip(ary, self.starts_and_ends):
+            result[start:end] = flatten_if_needed(self.array_context, res_i)
+        return result
 
-        do_split = len(self.starts_and_ends) > 1
-        from pytools.obj_array import make_obj_array
+    def unflatten(self, ary):
+        # Convert a flat version of *ary* into a structured version.
+        components = []
+        for discr, (start, end) in zip(self.discrs, self.starts_and_ends):
+            component = ary[start:end]
+            from meshmode.discretization import Discretization
+            if isinstance(discr, Discretization):
+                from meshmode.dof_array import unflatten
+                component = unflatten(self.array_context, discr, component)
+            components.append(component)
+
+        if self._operator_uses_obj_array:
+            from pytools.obj_array import make_obj_array
+            return make_obj_array(components)
+        else:
+            return components[0]
 
-        if do_split:
-            x = make_obj_array(
-                    [x[start:end] for start, end in self.starts_and_ends])
+    def matvec(self, x):
+        # Three types of inputs are supported:
+        # * flat NumPy arrays
+        #    => output is a flat NumPy array
+        # * flat PyOpenCL arrays
+        #    => output is a flat PyOpenCL array
+        # * structured arrays (object arrays/DOFArrays)
+        #    => output has same structure as input
+        if isinstance(x, np.ndarray) and x.dtype.char != "O":
+            x = self.array_context.from_numpy(x)
+            flat = True
+            host = True
+            assert x.shape == (self.total_dofs,)
+        elif isinstance(x, cl.array.Array):
+            flat = True
+            host = False
+            assert x.shape == (self.total_dofs,)
+        elif isinstance(x, np.ndarray) and x.dtype.char == "O":
+            flat = False
+            host = False
+        else:
+            raise ValueError("unsupported input type")
 
         args = self.extra_args.copy()
-        args[self.arg_name] = x
-        result = self.bound_expr(self.queue, **args)
-
-        if do_split:
-            # re-join what was split
-            joined_result = cl.array.empty(self.queue, self.total_dofs,
-                    self.dtype)
-            for res_i, (start, end) in zip(result, self.starts_and_ends):
-                joined_result[start:end] = res_i
-            result = joined_result
+        args[self.arg_name] = self.unflatten(x) if flat else x
+        result = self.bound_expr(self.array_context, **args)
 
-        if out_host:
-            result = result.get()
+        if flat:
+            result = self.flatten(result)
+        if host:
+            result = self.array_context.to_numpy(result)
 
         return result
 
@@ -632,25 +671,8 @@ class GeometryCollection(object):
                 raise TypeError("Values in 'places' must be discretization, targets "
                         "or layer potential sources.")
 
-        # check cl_context
-        from pytools import is_single_valued
-        cl_contexts = []
-        for p in six.itervalues(self.places):
-            if isinstance(p, (PotentialSource, Discretization)):
-                cl_contexts.append(p.cl_context)
-            elif isinstance(p, TargetBase):
-                nodes = p.nodes()[0]
-                if isinstance(nodes, cl.array.Array) and nodes.queue is not None:
-                    cl_contexts.append(nodes.queue.context)
-            else:
-                raise ValueError("unexpected value type in 'places'")
-
-        if not is_single_valued(cl_contexts):
-            raise RuntimeError("All 'places' must have the same CL context.")
-
-        self.cl_context = cl_contexts[0]
-
         # check ambient_dim
+        from pytools import is_single_valued
         ambient_dims = [p.ambient_dim for p in six.itervalues(self.places)]
         if not is_single_valued(ambient_dims):
             raise RuntimeError("All 'places' must have the same ambient dimension.")
@@ -719,12 +741,11 @@ class GeometryCollection(object):
             from pytential import sym
             from pytential.qbx.refinement import _refine_for_global_qbx
 
-            with cl.CommandQueue(lpot_source.cl_context) as queue:
-                # NOTE: this adds the required discretizations to the cache
-                dofdesc = sym.DOFDescriptor(geometry, discr_stage)
-                _refine_for_global_qbx(self, dofdesc,
-                        lpot_source.refiner_code_container.get_wrangler(queue),
-                        _copy_collection=False)
+            # NOTE: this adds the required discretizations to the cache
+            dofdesc = sym.DOFDescriptor(geometry, discr_stage)
+            _refine_for_global_qbx(self, dofdesc,
+                    lpot_source.refiner_code_container.get_wrangler(),
+                    _copy_collection=False)
 
             discr = self._get_discr_from_cache(geometry, discr_stage)
 
@@ -829,7 +850,9 @@ class BoundExpression(object):
         self.code.execute(cost_model_mapper)
         return cost_model_mapper.get_modeled_cost()
 
-    def scipy_op(self, queue, arg_name, dtype, domains=None, **extra_args):
+    def scipy_op(
+            self, actx: PyOpenCLArrayContext, arg_name, dtype,
+            domains=None, **extra_args):
         """
         :arg domains: a list of discretization identifiers or
             *None* values indicating the domains on which each component of the
@@ -842,8 +865,7 @@ class BoundExpression(object):
             and returning :class:`pyopencl.array.Array` arrays.
         """
 
-        from pytools.obj_array import is_obj_array
-        if is_obj_array(self.code.result):
+        if isinstance(self.code.result, np.ndarray):
             nresults = len(self.code.result)
         else:
             nresults = 1
@@ -852,15 +874,18 @@ class BoundExpression(object):
                 self.places, domains, self.places.auto_target)
 
         total_dofs = 0
+        discrs = []
         starts_and_ends = []
         for dom_name in domains:
             if dom_name is None:
+                discr = None
                 size = 1
             else:
                 discr = self.places.get_discretization(
                         dom_name.geometry, dom_name.discr_stage)
-                size = discr.nnodes
+                size = discr.ndofs
 
+            discrs.append(discr)
             starts_and_ends.append((total_dofs, total_dofs+size))
             total_dofs += size
 
@@ -869,10 +894,11 @@ class BoundExpression(object):
         # fair, since these operators are usually only used
         # for linear system solving, in which case the assumption
         # has to be true.
-        return MatVecOp(self, queue,
-                arg_name, dtype, total_dofs, starts_and_ends, extra_args)
+        return MatVecOp(self, actx,
+                arg_name, dtype, total_dofs, discrs, starts_and_ends, extra_args)
 
-    def eval(self, queue, context=None, timing_data=None):
+    def eval(self, context=None, timing_data=None,
+            array_context: Optional[PyOpenCLArrayContext] = None):
         """Evaluate the expression in *self*, using the
         :class:`pyopencl.CommandQueue` *queue* and the
         input variables given in the dictionary *context*.
@@ -880,25 +906,81 @@ class BoundExpression(object):
         :arg timing_data: A dictionary into which timing
             data will be inserted during evaluation.
             (experimental)
+        :arg array_context: only needs to be supplied if no instances of
+            :class:`~meshmode.dof_array.DOFArray` with a
+            :class:`~meshmode.array_context.PyOpenCLArrayContext`
+            are supplied as part of *context*.
         :returns: the value of the expression, as a scalar,
             :class:`pyopencl.array.Array`, or an object array of these.
         """
 
         if context is None:
             context = {}
+
+        # {{{ figure array context
+
+        array_contexts = []
+        if array_context is not None:
+            if not isinstance(array_context, PyOpenCLArrayContext):
+                raise TypeError(
+                        "first argument (if supplied) must be a "
+                        "PyOpenCLArrayContext")
+
+            array_contexts.append(array_context)
+        del array_context
+
+        def look_for_array_contexts(ary):
+            if isinstance(ary, DOFArray):
+                if ary.array_context is not None:
+                    array_contexts.append(ary.array_context)
+            elif isinstance(ary, np.ndarray) and ary.dtype.char == "O":
+                for idx in np.ndindex(ary.shape):
+                    look_for_array_contexts(ary[idx])
+            else:
+                pass
+
+        for key, val in context.items():
+            look_for_array_contexts(val)
+
+        if array_contexts:
+            from pytools import is_single_valued
+            if not is_single_valued(array_contexts):
+                raise ValueError("arguments do not agree on an array context")
+
+            array_context = array_contexts[0]
+        else:
+            array_context = None
+
+        # }}}
+
         exec_mapper = EvaluationMapper(
-                self, queue, context, timing_data=timing_data)
+                self, array_context, context, timing_data=timing_data)
         return self.code.execute(exec_mapper)
 
-    def __call__(self, queue, **args):
+    def __call__(self, *args, **kwargs):
         """Evaluate the expression in *self*, using the
         :class:`pyopencl.CommandQueue` *queue* and the
         input variables given in the dictionary *context*.
 
         :returns: the value of the expression, as a scalar,
-            :class:`pyopencl.array.Array`, or an object array of these.
+            :class:`meshmode.dof_array.DOFArray`, or an object array of
+            these.
         """
-        return self.eval(queue, args)
+        array_context = None
+        if len(args) == 1:
+            array_context, = args
+            if not isinstance(array_context, PyOpenCLArrayContext):
+                raise TypeError("first positional argument (if given) "
+                        "must be of type PyOpenCLArrayContext")
+
+        elif not args:
+            pass
+
+        else:
+            raise TypeError("More than one positional argument supplied. "
+                    "None or an ArrayContext expected.")
+
+        return self.eval(kwargs, array_context=array_context)
 
 
 def bind(places, expr, auto_where=None):
@@ -956,10 +1038,10 @@ def _bmat(blocks, dtypes):
     return result
 
 
-def build_matrix(queue, places, exprs, input_exprs, domains=None,
+def build_matrix(actx, places, exprs, input_exprs, domains=None,
         auto_where=None, context=None):
     """
-    :arg queue: a :class:`pyopencl.CommandQueue`.
+    :arg actx: a :class:`~meshmode.array_context.ArrayContext`.
     :arg places: a :class:`~pytential.symbolic.execution.GeometryCollection`.
         Alternatively, any list or mapping that is a valid argument for its
         constructor can also be used.
@@ -981,13 +1063,14 @@ def build_matrix(queue, places, exprs, input_exprs, domains=None,
         context = {}
 
     from pytential import GeometryCollection
-    from pytools.obj_array import is_obj_array, make_obj_array
     if not isinstance(places, GeometryCollection):
         places = GeometryCollection(places, auto_where=auto_where)
     exprs = _prepare_expr(places, exprs, auto_where=auto_where)
 
-    if not is_obj_array(exprs):
+    if not (isinstance(exprs, np.ndarray) and exprs.dtype.char == "O"):
+        from pytools.obj_array import make_obj_array
         exprs = make_obj_array([exprs])
+
     try:
         input_exprs = list(input_exprs)
     except TypeError:
@@ -1009,7 +1092,7 @@ def build_matrix(queue, places, exprs, input_exprs, domains=None,
                 domains[ibcol].geometry, domains[ibcol].discr_stage)
 
         mbuilder = MatrixBuilder(
-                queue,
+                actx,
                 dep_expr=input_exprs[ibcol],
                 other_dep_exprs=(input_exprs[:ibcol]
                                  + input_exprs[ibcol + 1:]),
@@ -1026,7 +1109,7 @@ def build_matrix(queue, places, exprs, input_exprs, domains=None,
             if isinstance(block, np.ndarray):
                 dtypes.append(block.dtype)
 
-    return cl.array.to_device(queue, _bmat(blocks, dtypes))
+    return actx.from_numpy(_bmat(blocks, dtypes))
 
 # }}}
 
diff --git a/pytential/symbolic/matrix.py b/pytential/symbolic/matrix.py
index d63f30549fdada50a0e7a64e55dd5f0e111f8a15..dc0eb3734d112a39d41de654e4258b52d05fa813 100644
--- a/pytential/symbolic/matrix.py
+++ b/pytential/symbolic/matrix.py
@@ -26,14 +26,14 @@ THE SOFTWARE.
 """
 
 import numpy as np
-import pyopencl as cl  # noqa
-import pyopencl.array  # noqa
 
 import six
 from six.moves import intern
 
 from pytools import memoize_method
 from pytential.symbolic.mappers import EvaluationMapperBase
+from pytential.utils import (
+        flatten_if_needed, flatten_to_numpy, unflatten_from_numpy)
 
 
 # {{{ helpers
@@ -56,7 +56,9 @@ def _get_layer_potential_args(mapper, expr, include_args=None):
                 and arg_name not in include_args):
             continue
 
-        kernel_args[arg_name] = mapper.rec(arg_expr)
+        kernel_args[arg_name] = flatten_if_needed(mapper.array_context,
+                mapper.rec(arg_expr)
+                )
 
     return kernel_args
 
@@ -66,7 +68,7 @@ def _get_layer_potential_args(mapper, expr, include_args=None):
 # {{{ base classes for matrix builders
 
 class MatrixBuilderBase(EvaluationMapperBase):
-    def __init__(self, queue, dep_expr, other_dep_exprs,
+    def __init__(self, actx, dep_expr, other_dep_exprs,
             dep_source, dep_discr, places, context):
         """
         :arg queue: a :class:`pyopencl.CommandQueue`.
@@ -84,7 +86,7 @@ class MatrixBuilderBase(EvaluationMapperBase):
         """
         super(MatrixBuilderBase, self).__init__(context=context)
 
-        self.queue = queue
+        self.array_context = actx
         self.dep_expr = dep_expr
         self.other_dep_exprs = other_dep_exprs
         self.dep_source = dep_source
@@ -94,7 +96,7 @@ class MatrixBuilderBase(EvaluationMapperBase):
     # {{{
 
     def get_dep_variable(self):
-        return np.eye(self.dep_discr.nnodes, dtype=np.float64)
+        return np.eye(self.dep_discr.ndofs, dtype=np.float64)
 
     def is_kind_vector(self, x):
         return len(x.shape) == 1
@@ -196,17 +198,25 @@ class MatrixBuilderBase(EvaluationMapperBase):
         if self.is_kind_matrix(rec_operand):
             raise NotImplementedError("derivatives")
 
-        rec_operand = cl.array.to_device(self.queue, rec_operand)
+        dofdesc = expr.dofdesc
         op = sym.NumReferenceDerivative(
                 ref_axes=expr.ref_axes,
                 operand=sym.var("u"),
-                dofdesc=expr.dofdesc)
-        return bind(self.places, op)(self.queue, u=rec_operand).get()
+                dofdesc=dofdesc)
+
+        discr = self.places.get_discretization(dofdesc.geometry, dofdesc.discr_stage)
+        rec_operand = unflatten_from_numpy(self.array_context, discr, rec_operand)
+
+        return flatten_to_numpy(self.array_context,
+                bind(self.places, op)(self.array_context, u=rec_operand)
+                )
 
     def map_node_coordinate_component(self, expr):
         from pytential import bind, sym
         op = sym.NodeCoordinateComponent(expr.ambient_axis, dofdesc=expr.dofdesc)
-        return bind(self.places, op)(self.queue).get()
+        return flatten_to_numpy(self.array_context,
+                bind(self.places, op)(self.array_context)
+                )
 
     def map_call(self, expr):
         arg, = expr.parameters
@@ -215,17 +225,13 @@ class MatrixBuilderBase(EvaluationMapperBase):
         if isinstance(rec_arg, np.ndarray) and self.is_kind_matrix(rec_arg):
             raise RuntimeError("expression is nonlinear in variable")
 
-        if isinstance(rec_arg, np.ndarray):
-            rec_arg = cl.array.to_device(self.queue, rec_arg)
-
-        from pytential import bind, sym
-        op = expr.function(sym.var("u"))
-        result = bind(self.places, op)(self.queue, u=rec_arg)
-
-        if isinstance(result, cl.array.Array):
-            result = result.get()
-
-        return result
+        from numbers import Number
+        if isinstance(rec_arg, Number):
+            return getattr(np, expr.function.name)(rec_arg)
+        else:
+            rec_arg = unflatten_from_numpy(self.array_context, None, rec_arg)
+            result = getattr(self.array_context.np, expr.function.name)(rec_arg)
+            return flatten_to_numpy(self.array_context, result)
 
     # }}}
 
@@ -240,14 +246,14 @@ class MatrixBlockBuilderBase(MatrixBuilderBase):
     assume that each operator acts directly on the density.
     """
 
-    def __init__(self, queue, dep_expr, other_dep_exprs,
+    def __init__(self, actx, dep_expr, other_dep_exprs,
             dep_source, dep_discr, places, index_set, context):
         """
         :arg index_set: a :class:`sumpy.tools.MatrixBlockIndexRanges` class
             describing which blocks are going to be evaluated.
         """
 
-        super(MatrixBlockBuilderBase, self).__init__(queue,
+        super(MatrixBlockBuilderBase, self).__init__(actx,
                 dep_expr, other_dep_exprs, dep_source, dep_discr,
                 places, context)
         self.index_set = index_set
@@ -259,7 +265,7 @@ class MatrixBlockBuilderBase(MatrixBuilderBase):
         # be computed on the full discretization, ignoring our index_set,
         # e.g the normal in a double layer potential
 
-        return MatrixBuilderBase(self.queue,
+        return MatrixBuilderBase(self.array_context,
                 self.dep_expr,
                 self.other_dep_exprs,
                 self.dep_source,
@@ -272,7 +278,7 @@ class MatrixBlockBuilderBase(MatrixBuilderBase):
         # blk_mapper is used to recursively compute the density to
         # a layer potential operator to ensure there is no composition
 
-        return MatrixBlockBuilderBase(self.queue,
+        return MatrixBlockBuilderBase(self.array_context,
                 self.dep_expr,
                 self.other_dep_exprs,
                 self.dep_source,
@@ -302,9 +308,10 @@ class MatrixBlockBuilderBase(MatrixBuilderBase):
 # We'll cheat and build the matrix on the host.
 
 class MatrixBuilder(MatrixBuilderBase):
-    def __init__(self, queue, dep_expr, other_dep_exprs,
+    def __init__(self, actx, dep_expr, other_dep_exprs,
             dep_source, dep_discr, places, context):
-        super(MatrixBuilder, self).__init__(queue, dep_expr, other_dep_exprs,
+        super(MatrixBuilder, self).__init__(
+                actx, dep_expr, other_dep_exprs,
                 dep_source, dep_discr, places, context)
 
     def map_interpolation(self, expr):
@@ -313,13 +320,17 @@ class MatrixBuilder(MatrixBuilderBase):
         if expr.to_dd.discr_stage != sym.QBX_SOURCE_QUAD_STAGE2:
             raise RuntimeError("can only interpolate to QBX_SOURCE_QUAD_STAGE2")
         operand = self.rec(expr.operand)
+        actx = self.array_context
 
         if isinstance(operand, (int, float, complex, np.number)):
             return operand
         elif isinstance(operand, np.ndarray) and operand.ndim == 1:
             conn = self.places.get_connection(expr.from_dd, expr.to_dd)
-            return conn(self.queue,
-                    cl.array.to_device(self.queue, operand)).get(self.queue)
+            discr = self.places.get_discretization(
+                    expr.from_dd.geometry, expr.from_dd.discr_stage)
+
+            operand = unflatten_from_numpy(actx, discr, operand)
+            return flatten_to_numpy(actx, conn(operand))
         elif isinstance(operand, np.ndarray) and operand.ndim == 2:
             cache = self.places._get_cache("direct_resampler")
             key = (expr.from_dd.geometry,
@@ -333,8 +344,8 @@ class MatrixBuilder(MatrixBuilderBase):
                     flatten_chained_connection
 
                 conn = self.places.get_connection(expr.from_dd, expr.to_dd)
-                conn = flatten_chained_connection(self.queue, conn)
-                mat = conn.full_resample_matrix(self.queue).get(self.queue)
+                conn = flatten_chained_connection(actx, conn)
+                mat = actx.to_numpy(conn.full_resample_matrix(actx))
 
                 # FIXME: the resample matrix is slow to compute and very big
                 # to store, so caching it may not be the best idea
@@ -359,6 +370,7 @@ class MatrixBuilder(MatrixBuilderBase):
         if not self.is_kind_matrix(rec_density):
             raise NotImplementedError("layer potentials on non-variables")
 
+        actx = self.array_context
         kernel = expr.kernel
         kernel_args = _get_layer_potential_args(self, expr)
 
@@ -366,31 +378,31 @@ class MatrixBuilder(MatrixBuilderBase):
         local_expn = LineTaylorLocalExpansion(kernel, lpot_source.qbx_order)
 
         from sumpy.qbx import LayerPotentialMatrixGenerator
-        mat_gen = LayerPotentialMatrixGenerator(
-                self.queue.context, (local_expn,))
+        mat_gen = LayerPotentialMatrixGenerator(actx.context, (local_expn,))
 
         assert abs(expr.qbx_forced_limit) > 0
         from pytential import bind, sym
         radii = bind(self.places, sym.expansion_radii(
             source_discr.ambient_dim,
-            dofdesc=expr.target))(self.queue)
+            dofdesc=expr.target))(actx)
         centers = bind(self.places, sym.expansion_centers(
             source_discr.ambient_dim,
             expr.qbx_forced_limit,
-            dofdesc=expr.target))(self.queue)
-
-        _, (mat,) = mat_gen(self.queue,
-                targets=target_discr.nodes(),
-                sources=source_discr.nodes(),
-                centers=centers,
-                expansion_radii=radii,
+            dofdesc=expr.target))(actx)
+
+        from meshmode.dof_array import flatten, thaw
+        _, (mat,) = mat_gen(actx.queue,
+                targets=flatten(thaw(actx, target_discr.nodes())),
+                sources=flatten(thaw(actx, source_discr.nodes())),
+                centers=flatten(centers),
+                expansion_radii=flatten(radii),
                 **kernel_args)
-        mat = mat.get()
+        mat = actx.to_numpy(mat)
 
         waa = bind(self.places, sym.weights_and_area_elements(
             source_discr.ambient_dim,
-            dofdesc=expr.source))(self.queue)
-        mat[:, :] *= waa.get(self.queue)
+            dofdesc=expr.source))(actx)
+        mat[:, :] *= actx.to_numpy(flatten(waa))
         mat = mat.dot(rec_density)
 
         return mat
@@ -401,9 +413,9 @@ class MatrixBuilder(MatrixBuilderBase):
 # {{{ p2p matrix builder
 
 class P2PMatrixBuilder(MatrixBuilderBase):
-    def __init__(self, queue, dep_expr, other_dep_exprs,
+    def __init__(self, actx, dep_expr, other_dep_exprs,
             dep_source, dep_discr, places, context, exclude_self=True):
-        super(P2PMatrixBuilder, self).__init__(queue,
+        super(P2PMatrixBuilder, self).__init__(actx,
                 dep_expr, other_dep_exprs, dep_source, dep_discr,
                 places, context)
 
@@ -430,25 +442,26 @@ class P2PMatrixBuilder(MatrixBuilderBase):
         kernel_args = kernel.get_args() + kernel.get_source_args()
         kernel_args = set(arg.loopy_arg.name for arg in kernel_args)
 
+        actx = self.array_context
         kernel_args = _get_layer_potential_args(self,
                 expr, include_args=kernel_args)
         if self.exclude_self:
-            kernel_args["target_to_source"] = \
-                cl.array.arange(self.queue, 0, target_discr.nnodes, dtype=np.int)
+            kernel_args["target_to_source"] = actx.from_numpy(
+                    np.arange(0, target_discr.ndofs, dtype=np.int)
+                    )
 
         from sumpy.p2p import P2PMatrixGenerator
-        mat_gen = P2PMatrixGenerator(
-                self.queue.context, (kernel,), exclude_self=self.exclude_self)
+        mat_gen = P2PMatrixGenerator(actx.context, (kernel,),
+                exclude_self=self.exclude_self)
 
-        _, (mat,) = mat_gen(self.queue,
-                targets=target_discr.nodes(),
-                sources=source_discr.nodes(),
+        from meshmode.dof_array import flatten, thaw
+        _, (mat,) = mat_gen(actx.queue,
+                targets=flatten(thaw(actx, target_discr.nodes())),
+                sources=flatten(thaw(actx, source_discr.nodes())),
                 **kernel_args)
 
-        mat = mat.get()
-        mat = mat.dot(rec_density)
+        return actx.to_numpy(mat).dot(rec_density)
 
-        return mat
 # }}}
 
 
@@ -462,8 +475,9 @@ class NearFieldBlockBuilder(MatrixBlockBuilderBase):
                 places, index_set, context)
 
     def get_dep_variable(self):
-        tgtindices = self.index_set.linear_row_indices.get(self.queue)
-        srcindices = self.index_set.linear_col_indices.get(self.queue)
+        queue = self.array_context.queue
+        tgtindices = self.index_set.linear_row_indices.get(queue)
+        srcindices = self.index_set.linear_col_indices.get(queue)
 
         return np.equal(tgtindices, srcindices).astype(np.float64)
 
@@ -484,6 +498,7 @@ class NearFieldBlockBuilder(MatrixBlockBuilderBase):
         if not np.isscalar(rec_density):
             raise NotImplementedError
 
+        actx = self.array_context
         kernel = expr.kernel
         kernel_args = _get_layer_potential_args(self._mat_mapper, expr)
 
@@ -491,34 +506,34 @@ class NearFieldBlockBuilder(MatrixBlockBuilderBase):
         local_expn = LineTaylorLocalExpansion(kernel, lpot_source.qbx_order)
 
         from sumpy.qbx import LayerPotentialMatrixBlockGenerator
-        mat_gen = LayerPotentialMatrixBlockGenerator(
-                self.queue.context, (local_expn,))
+        mat_gen = LayerPotentialMatrixBlockGenerator(actx.context, (local_expn,))
 
         assert abs(expr.qbx_forced_limit) > 0
         from pytential import bind, sym
         radii = bind(self.places, sym.expansion_radii(
             source_discr.ambient_dim,
-            dofdesc=expr.target))(self.queue)
+            dofdesc=expr.target))(actx)
         centers = bind(self.places, sym.expansion_centers(
             source_discr.ambient_dim,
             expr.qbx_forced_limit,
-            dofdesc=expr.target))(self.queue)
-
-        _, (mat,) = mat_gen(self.queue,
-                targets=target_discr.nodes(),
-                sources=source_discr.nodes(),
-                centers=centers,
-                expansion_radii=radii,
+            dofdesc=expr.target))(actx)
+
+        from meshmode.dof_array import flatten, thaw
+        _, (mat,) = mat_gen(actx.queue,
+                targets=flatten(thaw(actx, target_discr.nodes())),
+                sources=flatten(thaw(actx, source_discr.nodes())),
+                centers=flatten(centers),
+                expansion_radii=flatten(radii),
                 index_set=self.index_set,
                 **kernel_args)
 
         waa = bind(self.places, sym.weights_and_area_elements(
             source_discr.ambient_dim,
-            dofdesc=expr.source))(self.queue)
-        mat *= waa[self.index_set.linear_col_indices]
-        mat = rec_density * mat.get(self.queue)
+            dofdesc=expr.source))(actx)
+        waa = flatten(waa)
 
-        return mat
+        mat *= waa[self.index_set.linear_col_indices]
+        return rec_density * actx.to_numpy(mat)
 
 
 class FarFieldBlockBuilder(MatrixBlockBuilderBase):
@@ -530,8 +545,9 @@ class FarFieldBlockBuilder(MatrixBlockBuilderBase):
         self.exclude_self = exclude_self
 
     def get_dep_variable(self):
-        tgtindices = self.index_set.linear_row_indices.get(self.queue)
-        srcindices = self.index_set.linear_col_indices.get(self.queue)
+        queue = self.array_context.queue
+        tgtindices = self.index_set.linear_row_indices.get(queue)
+        srcindices = self.index_set.linear_col_indices.get(queue)
 
         return np.equal(tgtindices, srcindices).astype(np.float64)
 
@@ -558,24 +574,26 @@ class FarFieldBlockBuilder(MatrixBlockBuilderBase):
         kernel_args = kernel.get_args() + kernel.get_source_args()
         kernel_args = set(arg.loopy_arg.name for arg in kernel_args)
 
+        actx = self.array_context
         kernel_args = _get_layer_potential_args(self._mat_mapper,
                 expr, include_args=kernel_args)
         if self.exclude_self:
-            kernel_args["target_to_source"] = \
-                cl.array.arange(self.queue, 0, target_discr.nnodes, dtype=np.int)
+            kernel_args["target_to_source"] = actx.from_numpy(
+                    np.arange(0, target_discr.ndofs, dtype=np.int)
+                    )
 
         from sumpy.p2p import P2PMatrixBlockGenerator
-        mat_gen = P2PMatrixBlockGenerator(
-                self.queue.context, (kernel,), exclude_self=self.exclude_self)
+        mat_gen = P2PMatrixBlockGenerator(actx.context, (kernel,),
+                exclude_self=self.exclude_self)
 
-        _, (mat,) = mat_gen(self.queue,
-                targets=target_discr.nodes(),
-                sources=source_discr.nodes(),
+        from meshmode.dof_array import flatten, thaw
+        _, (mat,) = mat_gen(actx.queue,
+                targets=flatten(thaw(actx, target_discr.nodes())),
+                sources=flatten(thaw(actx, source_discr.nodes())),
                 index_set=self.index_set,
                 **kernel_args)
-        mat = rec_density * mat.get(self.queue)
 
-        return mat
+        return rec_density * actx.to_numpy(mat)
 
 # }}}
 
diff --git a/pytential/symbolic/pde/maxwell/__init__.py b/pytential/symbolic/pde/maxwell/__init__.py
index d89d393c4948a2944d4efa4da2b89db65e55d4b1..f7148cfd0156872ee4dbb36029e2bc1f00a1a032 100644
--- a/pytential/symbolic/pde/maxwell/__init__.py
+++ b/pytential/symbolic/pde/maxwell/__init__.py
@@ -61,7 +61,7 @@ def get_sym_maxwell_point_source(kernel, jxyz, k):
     # https://en.wikipedia.org/w/index.php?title=Maxwell%27s_equations&oldid=798940325#Alternative_formulations
     # (Vector calculus/Potentials/Any Gauge)
     # assumed time dependence exp(-1j*omega*t)
-    return sym.join_fields(
+    return sym.flat_obj_array(
         1j*k*A,
         sym.curl(A))
 
@@ -108,7 +108,7 @@ def get_sym_maxwell_plane_wave(amplitude_vec, v, omega,
 
     e = amplitude_vec * sym.exp(1j*np.dot(n*omega, x))
 
-    return sym.join_fields(e, c_inv * sym.cross(n, e))
+    return sym.flat_obj_array(e, c_inv * sym.cross(n, e))
 
 # }}}
 
@@ -180,7 +180,7 @@ class PECChargeCurrentMFIEOperator:
         E_scat = 1j*self.k*A - sym.grad(3, phi)
         H_scat = sym.curl(A)
 
-        return sym.join_fields(E_scat, H_scat)
+        return sym.flat_obj_array(E_scat, H_scat)
 
 # }}}
 
@@ -248,13 +248,13 @@ class MuellerAugmentedMFIEOperator(object):
         # sign flip included
         F4 = -sym.n_dot(mu1*H1-mu0*H0) + 0.5*(mu1+mu0)*u.rho_m  # noqa pylint:disable=invalid-unary-operand-type
 
-        return sym.join_fields(F1, F2, F3, F4)
+        return sym.flat_obj_array(F1, F2, F3, F4)
 
     def rhs(self, Einc_xyz, Hinc_xyz):
         mu1 = self.mus[1]
         eps1 = self.epss[1]
 
-        return sym.join_fields(
+        return sym.flat_obj_array(
             xyz_to_tangential(sym.n_cross(Hinc_xyz)),
             sym.n_dot(eps1*Einc_xyz),
             xyz_to_tangential(sym.n_cross(Einc_xyz)),
@@ -280,7 +280,7 @@ class MuellerAugmentedMFIEOperator(object):
         E0 = 1j*k*eps*S(Jxyz) + mu*curl_S(Mxyz) - grad(S(u.rho_e))
         H0 = -1j*k*mu*S(Mxyz) + eps*curl_S(Jxyz) + grad(S(u.rho_m))
 
-        return sym.join_fields(E0, H0)
+        return sym.flat_obj_array(E0, H0)
 
 # }}}
 
diff --git a/pytential/symbolic/pde/maxwell/generalized_debye.py b/pytential/symbolic/pde/maxwell/generalized_debye.py
index 1b6ea8ee9dab508237973f1861ecb77405c495ab..df7fd9f50ea23167fb415ae7c41ab428214dd28f 100644
--- a/pytential/symbolic/pde/maxwell/generalized_debye.py
+++ b/pytential/symbolic/pde/maxwell/generalized_debye.py
@@ -153,8 +153,8 @@ class DebyeOperatorBase(object):
         E = 1j*k*A - grad_phi - curl_S_volume(k, m)
         H = curl_S_volume(k, j) + 1j*k*Q - grad_psi
 
-        from pytools.obj_array import join_fields
-        return join_fields(E, H)
+        from pytools.obj_array import flat_obj_array
+        return flat_obj_array(E, H)
 
     def integral_equation(self, *args, **kwargs):
         nxnxE, ndotH = self.boundary_field(*args)
@@ -178,8 +178,8 @@ class DebyeOperatorBase(object):
         E_minus_grad_phi = 1j*k*A - curl_S_volume(k, m)
 
         from hellskitchen.fmm import DifferenceKernel
-        from pytools.obj_array import join_fields
-        return join_fields(
+        from pytools.obj_array import flat_obj_array
+        return flat_obj_array(
                 eh_op,
                 # FIXME: These are inefficient. They compute a full volume field,
                 # but only actually use the line part of it.
@@ -262,10 +262,10 @@ class InvertingDebyeOperatorBase(DebyeOperatorBase):
             r_coeff = inv_rank_one_coeff(r_tilde)
             q_coeff = inv_rank_one_coeff(q_tilde)
 
-            from pytools.obj_array import join_fields
+            from pytools.obj_array import flat_obj_array
             factors = self.cluster_points()
 
-            fix = join_fields(
+            fix = flat_obj_array(
                     factors[0]*s_ones*r_coeff,
                     factors[1]*Ones()*q_coeff,
                     )
@@ -376,10 +376,10 @@ class NonInvertingDebyeOperator(DebyeOperatorBase):
             r_coeff = inv_rank_one_coeff(r_tilde)
             q_coeff = inv_rank_one_coeff(q_tilde)
 
-            from pytools.obj_array import join_fields
+            from pytools.obj_array import flat_obj_array
             factors = self.cluster_points()
 
-            fix = join_fields(
+            fix = flat_obj_array(
                     factors[0]*s_ones*(r_coeff),
                     factors[1]*Ones()*(q_coeff),
                     )
diff --git a/pytential/symbolic/pde/maxwell/waveguide.py b/pytential/symbolic/pde/maxwell/waveguide.py
index 3a1d3a63dfa932969c2363ac2c09da0a98e3b89f..aab6e94268e957b31b48083bbaef7f25cce3e741 100644
--- a/pytential/symbolic/pde/maxwell/waveguide.py
+++ b/pytential/symbolic/pde/maxwell/waveguide.py
@@ -28,16 +28,9 @@ Second-Kind Waveguide
 ^^^^^^^^^^^^^^^^^^^^^
 
 .. autoclass:: SecondKindInfZMuellerOperator
-
-2D Dielectric (old-style)
-^^^^^^^^^^^^^^^^^^^^^^^^^
-
-.. autoclass:: DielectricSRep2DBoundaryOperator
-.. autoclass:: DielectricSDRep2DBoundaryOperator
 """
 
 import numpy as np
-from collections import namedtuple
 from six.moves import range
 
 from pytential import sym
@@ -353,632 +346,4 @@ class SecondKindInfZMuellerOperator(L2WeightedPDEOperator):
 
 # }}}
 
-
-# {{{ old-style waveguide
-
-class Dielectric2DBoundaryOperatorBase(L2WeightedPDEOperator):
-    r"""
-    Solves the following system of BVPs on :math:`\mathbb{R}^2`, in which
-    a disjoint family of domains :math:`\Omega_i` is embedded:
-
-    .. math::
-
-        \triangle E + (k_i^2-\beta^2) E = 0\quad \text{on $\Omega_i$}\\
-        \triangle H + (k_i^2-\beta^2) H = 0\quad \text{on $\Omega_i$}\\
-        [H] = 0 \text{ on $\partial \Omega_i$},\quad
-        [E] = 0 \text{ on $\partial \Omega_i$}\\
-        \left[ \frac{k_0}{k^2-\beta^2} \partial_{\hat n}H\right] = 0
-        \quad\text{on $\partial \Omega_i$},\quad\\
-        \left[ \frac{k_0}{k^2-\beta^2} \partial_{\hat n}E\right] = 0
-        \quad\text{on $\partial \Omega_i$}
-
-    :math:`E` and :math:`H` are assumed to be of the form
-
-    .. math::
-
-        E(x,y,z,t)=E(x,y)e^{i(\beta z-\omega t)
-        H(x,y,z,t)=H(x,y)e^{i(\beta z-\omega t)
-
-    where :math:`[\cdot]` denotes the jump across an interface, and :math:`k`
-    (without an index) denotes the value of :math:`k` on either side of the
-    interface, for the purpose of computing the jump. :math:`\hat n` denotes
-    the unit normal of the interface.
-
-    .. automethod:: make_unknown
-    .. automethod:: representation_outer
-    .. automethod:: representation_inner
-    .. automethod:: operator
-    """
-
-    field_kind_e = 0
-    field_kind_h = 1
-    field_kinds = [field_kind_e, field_kind_h]
-
-    side_in = 0
-    side_out = 1
-    sides = [side_in, side_out]
-    side_to_sign = {
-            side_in: -1,
-            side_out: 1,
-            }
-
-    dir_none = 0
-    dir_normal = 1
-    dir_tangential = 2
-
-    BCTermDescriptor = namedtuple("BCDescriptor",
-            "i_interface direction field_kind coeff_inner coeff_outer".split())
-
-    # {{{ constructor
-
-    def __init__(self, mode, k_vacuum, domain_k_exprs, beta,
-            interfaces, use_l2_weighting=None):
-        """
-        :attr mode: one of 'te', 'tm', 'tem'
-        :attr k_vacuum: A symbolic expression for the wave number in vacuum.
-            May be a string, which will be interpreted as a variable name.
-        :attr interfaces: a tuple of tuples
-            ``(outer_domain, inner_domain, interface_id)``,
-            where *outer_domain* and *inner_domain* are indices into
-            *domain_k_names*,
-            and *interface_id* is a symbolic name for the discretization of the
-            interface. 'outer' designates the side of the interface to which
-            the normal points.
-        :attr domain_k_exprs: a tuple of variable names of the Helmholtz
-            parameter *k*, to be used inside each part of the source geometry.
-            May also be a tuple of strings, which will be transformed into
-            variable references of the corresponding names.
-        :attr beta: A symbolic expression for the wave number in the :math:`z`
-            direction. May be a string, which will be interpreted as a variable
-            name.
-        """
-
-        if use_l2_weighting is None:
-            use_l2_weighting = False
-
-        from sumpy.kernel import HelmholtzKernel
-        self.kernel = HelmholtzKernel(2, allow_evanescent=True)
-
-        super(Dielectric2DBoundaryOperatorBase, self).__init__(
-                self.kernel,
-                use_l2_weighting=use_l2_weighting)
-
-        if mode == "te":
-            self.ez_enabled = False
-            self.hz_enabled = True
-        elif mode == "tm":
-            self.ez_enabled = True
-            self.hz_enabled = False
-        elif mode == "tem":
-            self.ez_enabled = True
-            self.hz_enabled = True
-        else:
-            raise ValueError("invalid mode '%s'" % mode)
-
-        self.interfaces = interfaces
-
-        fk_e = self.field_kind_e
-        fk_h = self.field_kind_h
-
-        dir_none = self.dir_none
-        dir_normal = self.dir_normal
-        dir_tangential = self.dir_tangential
-
-        if isinstance(beta, str):
-            beta = sym.var(beta)
-        beta = sym.cse(beta, "beta")
-
-        if isinstance(k_vacuum, str):
-            k_vacuum = sym.var(k_vacuum)
-        k_vacuum = sym.cse(k_vacuum, "k_vac")
-
-        self.domain_k_exprs = [
-                sym.var(k_expr)
-                if isinstance(k_expr, str)
-                else sym.cse(k_expr, "k%d" % idom)
-                for idom, k_expr in enumerate(domain_k_exprs)]
-        del domain_k_exprs
-
-        # Note the case of k/K!
-        # "K" is the 2D Helmholtz parameter.
-        # "k" is the 3D Helmholtz parameter.
-
-        self.domain_K_exprs = [
-                sym.cse((k_expr**2-beta**2)**0.5, "K%d" % i)
-                for i, k_expr in enumerate(self.domain_k_exprs)]
-
-        # {{{ build bc list
-
-        # list of tuples, where each tuple consists of BCTermDescriptor instances
-
-        all_bcs = []
-        for i_interface, (outer_domain, inner_domain, _) in (
-                enumerate(self.interfaces)):
-            k_outer = self.domain_k_exprs[outer_domain]
-            k_inner = self.domain_k_exprs[inner_domain]
-
-            all_bcs += [
-                    (  # [E] = 0
-                        self.BCTermDescriptor(
-                            i_interface=i_interface,
-                            direction=dir_none,
-                            field_kind=fk_e,
-                            coeff_outer=1,
-                            coeff_inner=-1),
-                        ),
-                    (  # [H] = 0
-                        self.BCTermDescriptor(
-                            i_interface=i_interface,
-                            direction=dir_none,
-                            field_kind=fk_h,
-                            coeff_outer=1,
-                            coeff_inner=-1),
-                        ),
-                    (
-                        self.BCTermDescriptor(
-                            i_interface=i_interface,
-                            direction=dir_tangential,
-                            field_kind=fk_e,
-                            coeff_outer=beta/(k_outer**2-beta**2),
-                            coeff_inner=-beta/(k_inner**2-beta**2)),
-                        self.BCTermDescriptor(
-                            i_interface=i_interface,
-                            direction=dir_normal,
-                            field_kind=fk_h,
-                            coeff_outer=sym.cse(-k_vacuum/(k_outer**2-beta**2)),
-                            coeff_inner=sym.cse(k_vacuum/(k_inner**2-beta**2))),
-                        ),
-                    (
-                        self.BCTermDescriptor(
-                            i_interface=i_interface,
-                            direction=dir_tangential,
-                            field_kind=fk_h,
-                            coeff_outer=beta/(k_outer**2-beta**2),
-                            coeff_inner=-beta/(k_inner**2-beta**2)),
-                        self.BCTermDescriptor(
-                            i_interface=i_interface,
-                            direction=dir_normal,
-                            field_kind=fk_e,
-                            coeff_outer=sym.cse(
-                                (k_outer**2/k_vacuum)/(k_outer**2-beta**2)),
-                            coeff_inner=sym.cse(
-                                -(k_inner**2/k_vacuum)
-                                / (k_inner**2-beta**2)))
-                        ),
-                    ]
-
-            del k_outer
-            del k_inner
-
-        self.bcs = []
-        for bc in all_bcs:
-            any_significant_e = any(
-                    term.field_kind == fk_e
-                    and term.direction in [dir_normal, dir_none]
-                    for term in bc)
-            any_significant_h = any(
-                    term.field_kind == fk_h
-                    and term.direction in [dir_normal, dir_none]
-                    for term in bc)
-            is_necessary = (
-                    (self.ez_enabled and any_significant_e)
-                    or (self.hz_enabled and any_significant_h))
-
-            # Only keep tangential modes for TEM. Otherwise,
-            # no jump in H already implies jump condition on
-            # tangential derivative.
-            is_tem = self.ez_enabled and self.hz_enabled
-            terms = tuple(
-                    term
-                    for term in bc
-                    if term.direction != dir_tangential
-                    or is_tem)
-
-            if is_necessary:
-                self.bcs.append(terms)
-
-        assert (len(all_bcs)
-                * (int(self.ez_enabled) + int(self.hz_enabled)) // 2
-                == len(self.bcs))
-
-        # }}}
-
-    # }}}
-
-    def is_field_present(self, field_kind):
-        return (
-                (field_kind == self.field_kind_e and self.ez_enabled)
-                or (field_kind == self.field_kind_h and self.hz_enabled))
-
-    def make_unknown(self, name):
-        num_densities = (
-                2
-                * (int(self.ez_enabled) + int(self.hz_enabled))
-                * len(self.interfaces))
-
-        assert num_densities == len(self.bcs)
-
-        return sym.make_sym_vector(name, num_densities)
-
-    def bc_term_to_operator_contrib(self, term, side, raw_potential_op,
-            density, discrete):
-        potential_op = raw_potential_op
-
-        side_sign = self.side_to_sign[side]
-
-        domain_outer, domain_inner, interface_id = \
-                self.interfaces[term.i_interface]
-        if side == self.side_in:
-            K_expr = self.domain_K_exprs[domain_inner]  # noqa
-            bc_coeff = term.coeff_inner
-        elif side == self.side_out:
-            K_expr = self.domain_K_exprs[domain_outer]  # noqa
-            bc_coeff = term.coeff_outer
-        else:
-            raise ValueError("invalid value of 'side'")
-
-        potential_op = potential_op(
-                self.kernel, density, source=interface_id,
-                k=K_expr)
-
-        if term.direction == self.dir_none:
-            if raw_potential_op is sym.S:
-                jump_term = 0
-            elif raw_potential_op is sym.D:
-                jump_term = (side_sign*0.5) * discrete
-            else:
-                assert False, raw_potential_op
-        elif term.direction == self.dir_normal:
-            potential_op = sym.normal_derivative(
-                    2, potential_op, dofdesc=interface_id)
-
-            if raw_potential_op is sym.S:
-                # S'
-                jump_term = (-side_sign*0.5) * discrete
-            elif raw_potential_op is sym.D:
-                jump_term = 0
-            else:
-                assert False, raw_potential_op
-
-        elif term.direction == self.dir_tangential:
-            potential_op = sym.tangential_derivative(
-                    raw_potential_op(
-                        self.kernel, density, source=interface_id,
-                        k=K_expr, qbx_forced_limit=side_sign),
-                    interface_id).a.as_scalar()
-
-            # Some of these may have jumps, but QBX does the dirty
-            # work here by directly computing the limit.
-            jump_term = 0
-
-        else:
-            raise ValueError("invalid direction")
-
-        potential_op = (
-                jump_term
-                + self.get_sqrt_weight(interface_id)*potential_op)
-
-        del jump_term
-
-        contrib = bc_coeff * potential_op
-
-        if (raw_potential_op is sym.D
-                and term.direction == self.dir_normal):
-            # FIXME The hypersingular part should perhaps be
-            # treated specially to avoid cancellation.
-            pass
-
-        return contrib
-
-
-# {{{ single-layer representation
-
-class DielectricSRep2DBoundaryOperator(Dielectric2DBoundaryOperatorBase):
-    def _structured_unknown(self, unknown, with_l2_weights):
-        """
-        :arg with_l2_weights: If True, return the 'bare' unknowns
-            that do not have the :math:`L^2` weights divided out.
-            Note: Those unknowns should *not* be interpreted as
-            point values of a density.
-        :returns: an array of unknowns, with the following index axes:
-            ``[side, field_kind, i_interface]``, where
-            ``side`` is o for the outside part and i for the interior part,
-            ``field_kind`` is 0 for the E-field and 1 for the H-field part,
-            ``i_interface`` is the number of the enclosed domain, starting from 0.
-        """
-        result = np.zeros((2, 2, len(self.interfaces)), dtype=np.object)
-        sides = {
-                self.side_out: "o",
-                self.side_in: "i"
-                }
-        fields = {
-                self.field_kind_e: "E",
-                self.field_kind_h: "H"
-                }
-
-        i_unknown = 0
-        for side in self.sides:
-            for field_kind in self.field_kinds:
-                for i_interface in range(len(self.interfaces)):
-
-                    if self.is_field_present(field_kind):
-                        dens = unknown[i_unknown]
-                        i_unknown += 1
-                    else:
-                        dens = 0
-
-                    _, _, interface_id = self.interfaces[i_interface]
-
-                    if not with_l2_weights:
-                        dens = sym.cse(
-                                dens/self.get_sqrt_weight(interface_id),
-                                "dens_{side}_{field}_{dom}".format(
-                                    side=sides[side],
-                                    field=fields[field_kind],
-                                    dom=i_interface))
-
-                    result[side, field_kind, i_interface] = dens
-
-        assert i_unknown == len(unknown)
-        return result
-
-    def representation(self, unknown, i_domain, qbx_forced_limit=None):
-        """
-        :return: a symbolic expression for the representation of the PDE solution
-            in domain number *i_domain*.
-        """
-        unk = self._structured_unknown(unknown, with_l2_weights=False)
-
-        result = []
-
-        for field_kind in self.field_kinds:
-            if not self.is_field_present(field_kind):
-                continue
-
-            field_result = 0
-            for i_interface, (i_domain_outer, i_domain_inner, interface_id) in (
-                    enumerate(self.interfaces)):
-                if i_domain_outer == i_domain:
-                    side = self.side_out
-                elif i_domain_inner == i_domain:
-                    side = self.side_in
-                else:
-                    continue
-
-                my_unk = unk[side, field_kind, i_interface]
-                if my_unk:
-                    field_result += sym.S(
-                            self.kernel,
-                            my_unk,
-                            source=interface_id,
-                            k=self.domain_K_exprs[i_domain],
-                            qbx_forced_limit=qbx_forced_limit)
-
-            result.append(field_result)
-
-        from pytools.obj_array import make_obj_array
-        return make_obj_array(result)
-
-    def operator(self, unknown):
-        density_unk = self._structured_unknown(unknown, with_l2_weights=False)
-        discrete_unk = self._structured_unknown(unknown, with_l2_weights=True)
-
-        result = []
-        for bc in self.bcs:
-            op = 0
-
-            for side in self.sides:
-                for term in bc:
-                    unk_index = (side, term.field_kind, term.i_interface)
-                    density = density_unk[unk_index]
-                    discrete = discrete_unk[unk_index]
-
-                    op += self.bc_term_to_operator_contrib(
-                            term, side, sym.S, density, discrete)
-
-            result.append(op)
-
-        return np.array(result, dtype=np.object)
-
-# }}}
-
-
-# {{{ single + double layer representation
-
-class DielectricSDRep2DBoundaryOperator(Dielectric2DBoundaryOperatorBase):
-    pot_kind_S = 0  # noqa: N815
-    pot_kind_D = 1  # noqa: N815
-    pot_kinds = [pot_kind_S, pot_kind_D]
-    potential_ops = {
-            pot_kind_S: sym.S,
-            pot_kind_D: sym.D,
-            }
-
-    def __init__(self, mode, k_vacuum, domain_k_exprs, beta,
-            interfaces, use_l2_weighting=None):
-
-        super(DielectricSDRep2DBoundaryOperator, self).__init__(
-                mode, k_vacuum, domain_k_exprs, beta,
-                interfaces, use_l2_weighting=use_l2_weighting)
-
-        side_in = self.side_in
-        side_out = self.side_out
-
-        def find_normal_derivative_bc_coeff(field_kind, i_interface, side):
-            result = 0
-            for bc in self.bcs:
-                for term in bc:
-                    if term.field_kind != field_kind:
-                        continue
-                    if term.i_interface != i_interface:
-                        continue
-                    if term.direction != self.dir_normal:
-                        continue
-
-                    if side == side_in:
-                        result += term.coeff_inner
-                    elif side == side_out:
-                        result += term.coeff_outer
-                    else:
-                        raise ValueError("invalid side")
-
-            return result
-
-        self.density_coeffs = np.zeros(
-                (len(self.pot_kinds), len(self.field_kinds),
-                    len(self.interfaces), len(self.sides)),
-                dtype=np.object)
-        for field_kind in self.field_kinds:
-            for i_interface in range(len(self.interfaces)):
-                self.density_coeffs[
-                        self.pot_kind_S, field_kind, i_interface, side_in] = 1
-                self.density_coeffs[
-                        self.pot_kind_S, field_kind, i_interface, side_out] = 1
-
-                # These need to satisfy
-                #
-                # [dens_coeff_D * bc_coeff * dn D]
-                # = dens_coeff_D_out * bc_coeff_out * (dn D)
-                #   + dens_coeff_D_in * bc_coeff_in * dn D
-                # = 0
-                #
-                # (because dn D is hypersingular, which we'd like to cancel out)
-                #
-                # NB: bc_coeff_{in,out} already contain the signs to realize
-                # the subtraction for the jump. (So the "+" above is as it
-                # should be.)
-
-                dens_coeff_D_in = find_normal_derivative_bc_coeff(  # noqa
-                        field_kind, i_interface, side_out)
-                dens_coeff_D_out = - find_normal_derivative_bc_coeff(  # noqa
-                        field_kind, i_interface, side_in)
-
-                self.density_coeffs[
-                        self.pot_kind_D, field_kind, i_interface, side_in] \
-                                = dens_coeff_D_in
-                self.density_coeffs[
-                        self.pot_kind_D, field_kind, i_interface, side_out] \
-                                = dens_coeff_D_out
-
-    def _structured_unknown(self, unknown, with_l2_weights):
-        """
-        :arg with_l2_weights: If True, return the 'bare' unknowns
-            that do not have the :math:`L^2` weights divided out.
-            Note: Those unknowns should *not* be interpreted as
-            point values of a density.
-        :returns: an array of unknowns, with the following index axes:
-            ``[pot_kind, field_kind, i_interface]``, where
-            ``pot_kind`` is 0 for the single-layer part and 1 for the double-layer
-            part,
-            ``field_kind`` is 0 for the E-field and 1 for the H-field part,
-            ``i_interface`` is the number of the enclosed domain, starting from 0.
-        """
-        result = np.zeros((2, 2, len(self.interfaces)), dtype=np.object)
-
-        i_unknown = 0
-        for pot_kind in self.pot_kinds:
-            for field_kind in self.field_kinds:
-                for i_interface in range(len(self.interfaces)):
-
-                    if self.is_field_present(field_kind):
-                        dens = unknown[i_unknown]
-                        i_unknown += 1
-                    else:
-                        dens = 0
-
-                    _, _, interface_id = self.interfaces[i_interface]
-
-                    if not with_l2_weights:
-                        dens = sym.cse(
-                                dens/self.get_sqrt_weight(interface_id),
-                                "dens_{pot}_{field}_{intf}".format(
-                                    pot={0: "S", 1: "D"}[pot_kind],
-                                    field={
-                                        self.field_kind_e: "E",
-                                        self.field_kind_h: "H"
-                                        }
-                                    [field_kind],
-                                    intf=i_interface))
-
-                    result[pot_kind, field_kind, i_interface] = dens
-
-        assert i_unknown == len(unknown)
-        return result
-
-    def representation(self, unknown, i_domain):
-        """
-        :return: a symbolic expression for the representation of the PDE solution
-            in domain number *i_domain*.
-        """
-        unk = self._structured_unknown(unknown, with_l2_weights=False)
-
-        result = []
-
-        for field_kind in self.field_kinds:
-            if not self.is_field_present(field_kind):
-                continue
-
-            field_result = 0
-            for pot_kind in self.pot_kinds:
-                for i_interface, (i_domain_outer, i_domain_inner, interface_id) in (
-                        enumerate(self.interfaces)):
-                    if i_domain_outer == i_domain:
-                        side = self.side_out
-                    elif i_domain_inner == i_domain:
-                        side = self.side_in
-                    else:
-                        continue
-
-                    my_unk = unk[pot_kind, field_kind, i_interface]
-                    if my_unk:
-                        field_result += (
-                                self.density_coeffs[
-                                    pot_kind, field_kind, i_interface, side]
-                                * self.potential_ops[pot_kind](
-                                    self.kernel,
-                                    my_unk,
-                                    source=interface_id,
-                                    k=self.domain_K_exprs[i_domain]
-                                    ))
-
-            result.append(field_result)
-
-        from pytools.obj_array import make_obj_array
-        return make_obj_array(result)
-
-    def operator(self, unknown):
-        density_unk = self._structured_unknown(unknown, with_l2_weights=False)
-        discrete_unk = self._structured_unknown(unknown, with_l2_weights=True)
-
-        result = []
-        for bc in self.bcs:
-            op = 0
-
-            for pot_kind in self.pot_kinds:
-                for term in bc:
-
-                    for side in self.sides:
-                        raw_potential_op = \
-                                self.potential_ops[pot_kind]
-
-                        unk_index = (pot_kind, term.field_kind, term.i_interface)
-                        density = density_unk[unk_index]
-                        discrete = discrete_unk[unk_index]
-
-                        op += (
-                                self.density_coeffs[
-                                    pot_kind, term.field_kind, term.i_interface,
-                                    side]
-                                * self.bc_term_to_operator_contrib(
-                                    term, side, raw_potential_op, density, discrete)
-                                )
-
-            result.append(op)
-
-        return np.array(result, dtype=np.object)
-
-# }}}
-
-# }}}
-
 # vim: foldmethod=marker
diff --git a/pytential/symbolic/primitives.py b/pytential/symbolic/primitives.py
index bb5dfcbfa157aaab4517d75be8e553bffad937f1..65c18bbe896d90433c439a9677a496f07e09dcf2 100644
--- a/pytential/symbolic/primitives.py
+++ b/pytential/symbolic/primitives.py
@@ -36,7 +36,7 @@ from pymbolic.geometric_algebra import MultiVector, componentwise
 from pymbolic.geometric_algebra.primitives import (  # noqa: F401
         NablaComponent, DerivativeSource, Derivative as DerivativeBase)
 from pymbolic.primitives import make_sym_vector  # noqa: F401
-from pytools.obj_array import make_obj_array, join_fields  # noqa: F401
+from pytools.obj_array import make_obj_array, flat_obj_array  # noqa: F401
 
 from functools import partial
 
@@ -53,28 +53,33 @@ Object types
 Based on the mathematical quantity being represented, the following types of
 objects occur as part of a symbolic operator representation:
 
-*   If a quantity is a scalar, it is just a symbolic expression--that is, a nested
-    combination of placeholders (see below), arithmetic on them (see
-    :mod:`pymbolic.primitives`. These objects are created simply by doing
-    arithmetic on placeholders.
+*   If a quantity is a scalar, it is just a symbolic expression--that is, an
+    element of the set of formal expressions recursively generated by the
+    placeholders (see :ref:`placeholders`), constants, and arithmetic operations
+    on them (see :mod:`pymbolic.primitives`).  Objects of this type are created
+    simply by doing arithmetic on placeholders and scalar constants.
 
 *   If the quantity is "just a bunch of scalars" (like, say, rows in a system
-    of integral equations), the symbolic representation an object array. Each
+    of integral equations), the symbolic representation is an object array. Each
     element of the object array contains a symbolic expression.
 
     :func:`pytools.obj_array.make_obj_array` and
-    :func:`pytools.obj_array.join_fields`
+    :func:`pytools.obj_array.flat_obj_array`
     can help create those.
 
 *   If it is a geometric quantity (that makes sense without explicit reference to
     coordinates), it is a :class:`pymbolic.geometric_algebra.MultiVector`.
-    This can be converted to an object array by calling :
+    This can be converted to an object array by calling:
     :meth:`pymbolic.geometric_algebra.MultiVector.as_vector`.
 
-:mod:`pyopencl.array.Array` instances do not occur on the symbolic of
-:mod:`pymbolic` at all.  Those hold per-node degrees of freedom (and only
-those), which is not visible as an array axis in symbolic code. (They're
-visible only once evaluated.)
+:class:`pyopencl.array.Array` and :class:`meshmode.dof_array.DOFArray` instances
+hold per-node degrees of freedom (and only those). Such instances do *not* occur
+on the symbolic side of :mod:`pytential` at all. They're only visible either as
+bound inputs (see :func:`pytential.bind`) or outputs of evaluation. Which one is
+used depends on the meaning of the data being represented. If the data is
+associated with a :class:`~meshmode.discretization.Discretization`, then
+:class:`~meshmode.dof_array.DOFArray` is used and otherwise
+:class:`~pyopencl.array.Array` is used.
 
 DOF Description
 ^^^^^^^^^^^^^^^
@@ -93,6 +98,8 @@ DOF Description
 .. autoclass:: DOFDescriptor
 .. autofunction:: as_dofdesc
 
+.. _placeholders:
+
 Placeholders
 ^^^^^^^^^^^^
 
@@ -472,12 +479,12 @@ class Function(var):
         # return an object array of the operator applied to each of the
         # operands.
 
-        from pytools.obj_array import is_obj_array, with_object_array_or_scalar
-        if is_obj_array(operand):
+        from pytools.obj_array import obj_array_vectorize
+        if isinstance(operand, np.ndarray) and operand.dtype.char == "O":
             def make_op(operand_i):
                 return self(operand_i, *args, **kwargs)
 
-            return with_object_array_or_scalar(make_op, operand)
+            return obj_array_vectorize(make_op, operand)
         else:
             return var.__call__(self, operand, *args, **kwargs)
 
@@ -1333,12 +1340,12 @@ def dd_axis(axis, ambient_dim, operand):
     """Return the derivative along (XYZ) axis *axis*
     (in *ambient_dim*-dimensional space) of *operand*.
     """
-    from pytools.obj_array import is_obj_array, with_object_array_or_scalar
-    if is_obj_array(operand):
+    from pytools.obj_array import obj_array_vectorize
+    if isinstance(operand, np.ndarray) and operand.dtype.char == "O":
         def dd_axis_comp(operand_i):
             return dd_axis(axis, ambient_dim, operand_i)
 
-        return with_object_array_or_scalar(dd_axis_comp, operand)
+        return obj_array_vectorize(dd_axis_comp, operand)
 
     d = Derivative()
 
diff --git a/pytential/target.py b/pytential/target.py
index e677bdb7a2f5a1494d87faef74800292bce7fdad..98f876b51d505ecb55d4cc2ec1800b65505d03be 100644
--- a/pytential/target.py
+++ b/pytential/target.py
@@ -41,8 +41,8 @@ class TargetBase(object):
     .. attribute:: ambient_dim
     .. method:: nodes
 
-        Shape: ``[ambient_dim, nnodes]``
-    .. attribute:: nnodes
+        Shape: ``[ambient_dim, ndofs]``
+    .. attribute:: ndofs
     """
 
 
@@ -63,13 +63,13 @@ class PointsTarget(TargetBase):
         return expr
 
     def nodes(self):
-        """Shape: ``[ambient_dim, nnodes]``
+        """Shape: ``[ambient_dim, ndofs]``
         """
 
         return self._nodes
 
     @property
-    def nnodes(self):
+    def ndofs(self):
         for coord_ary in self._nodes:
             return coord_ary.shape[0]
 
diff --git a/pytential/unregularized.py b/pytential/unregularized.py
index 6f0125cd56d6af95bf9576fea3f24e08d114c65f..b7cae228ab10ad979dbe46838aa47e24c6b3ba38 100644
--- a/pytential/unregularized.py
+++ b/pytential/unregularized.py
@@ -27,6 +27,7 @@ THE SOFTWARE.
 
 import six
 
+from meshmode.array_context import PyOpenCLArrayContext
 import numpy as np
 import loopy as lp
 
@@ -99,8 +100,8 @@ class UnregularizedLayerPotentialSource(LayerPotentialSourceBase):
                 density_discr=density_discr or self.density_discr,
                 debug=debug if debug is not None else self.debug)
 
-    def exec_compute_potential_insn(self, queue, insn, bound_expr, evaluate,
-            return_timing_data):
+    def exec_compute_potential_insn(self, actx: PyOpenCLArrayContext,
+            insn, bound_expr, evaluate, return_timing_data):
         if return_timing_data:
             from warnings import warn
             from pytential.source import UnableToCollectTimingData
@@ -108,18 +109,18 @@ class UnregularizedLayerPotentialSource(LayerPotentialSourceBase):
                    "Timing data collection not supported.",
                    category=UnableToCollectTimingData)
 
-        from pytools.obj_array import with_object_array_or_scalar
+        from pytools.obj_array import obj_array_vectorize
 
         def evaluate_wrapper(expr):
             value = evaluate(expr)
-            return with_object_array_or_scalar(lambda x: x, value)
+            return obj_array_vectorize(lambda x: x, value)
 
         if self.fmm_level_to_order is False:
             func = self.exec_compute_potential_insn_direct
         else:
             func = self.exec_compute_potential_insn_fmm
 
-        return func(queue, insn, bound_expr, evaluate_wrapper)
+        return func(actx, insn, bound_expr, evaluate_wrapper)
 
     def op_group_features(self, expr):
         from sumpy.kernel import AxisTargetDerivativeRemover
@@ -138,18 +139,23 @@ class UnregularizedLayerPotentialSource(LayerPotentialSourceBase):
         from pytential.symbolic.mappers import UnregularizedPreprocessor
         return UnregularizedPreprocessor(name, discretizations)(expr)
 
-    def exec_compute_potential_insn_direct(self, queue, insn, bound_expr, evaluate):
+    def exec_compute_potential_insn_direct(self, actx: PyOpenCLArrayContext,
+            insn, bound_expr, evaluate):
         kernel_args = {}
 
+        from pytential.utils import flatten_if_needed
+        from meshmode.dof_array import flatten, thaw, unflatten
+
         for arg_name, arg_expr in six.iteritems(insn.kernel_arguments):
-            kernel_args[arg_name] = evaluate(arg_expr)
+            kernel_args[arg_name] = flatten_if_needed(actx, evaluate(arg_expr))
 
         from pytential import bind, sym
         waa = bind(bound_expr.places, sym.weights_and_area_elements(
-            self.ambient_dim, dofdesc=insn.source))(queue)
-        strengths = waa * evaluate(insn.density).with_queue(queue)
+            self.ambient_dim, dofdesc=insn.source))(actx)
+        strengths = waa * evaluate(insn.density)
+        flat_strengths = flatten(strengths)
 
-        result = []
+        results = []
         p2p = None
 
         for o in insn.outputs:
@@ -157,17 +163,22 @@ class UnregularizedLayerPotentialSource(LayerPotentialSourceBase):
                     o.target_name.geometry, o.target_name.discr_stage)
 
             if p2p is None:
-                p2p = self.get_p2p(insn.kernels)
+                p2p = self.get_p2p(actx, insn.kernels)
+
+            evt, output_for_each_kernel = p2p(actx.queue,
+                    flatten_if_needed(actx, target_discr.nodes()),
+                    flatten(thaw(actx, self.density_discr.nodes())),
+                    [flat_strengths], **kernel_args)
 
-            evt, output_for_each_kernel = p2p(queue,
-                    target_discr.nodes(),
-                    self.density_discr.nodes(),
-                    [strengths], **kernel_args)
+            from meshmode.discretization import Discretization
+            result = output_for_each_kernel[o.kernel_index]
+            if isinstance(target_discr, Discretization):
+                result = unflatten(actx, target_discr, result)
 
-            result.append((o.name, output_for_each_kernel[o.kernel_index]))
+            results.append((o.name, result))
 
         timing_data = {}
-        return result, timing_data
+        return results, timing_data
 
     # {{{ fmm-based execution
 
@@ -190,10 +201,9 @@ class UnregularizedLayerPotentialSource(LayerPotentialSourceBase):
                 out_kernels)
 
     @property
-    @memoize_method
     def fmm_geometry_code_container(self):
-        return _FMMGeometryCodeContainer(
-                self.cl_context, self.ambient_dim, self.debug)
+        return _FMMGeometryDataCodeContainer(
+                self._setup_actx, self.ambient_dim, self.debug)
 
     def fmm_geometry_data(self, targets):
         return _FMMGeometryData(
@@ -202,7 +212,8 @@ class UnregularizedLayerPotentialSource(LayerPotentialSourceBase):
                 targets,
                 self.debug)
 
-    def exec_compute_potential_insn_fmm(self, queue, insn, bound_expr, evaluate):
+    def exec_compute_potential_insn_fmm(self, actx: PyOpenCLArrayContext,
+            insn, bound_expr, evaluate):
         # {{{ gather unique target discretizations used
 
         target_name_to_index = {}
@@ -227,8 +238,11 @@ class UnregularizedLayerPotentialSource(LayerPotentialSourceBase):
 
         from pytential import bind, sym
         waa = bind(bound_expr.places, sym.weights_and_area_elements(
-            self.ambient_dim, dofdesc=insn.source))(queue)
-        strengths = waa * evaluate(insn.density).with_queue(queue)
+            self.ambient_dim, dofdesc=insn.source))(actx)
+        strengths = waa * evaluate(insn.density)
+
+        from meshmode.dof_array import flatten
+        flat_strengths = flatten(strengths)
 
         out_kernels = tuple(knl for knl in insn.kernels)
         fmm_kernel = self.get_fmm_kernel(out_kernels)
@@ -236,12 +250,12 @@ class UnregularizedLayerPotentialSource(LayerPotentialSourceBase):
                 self.get_fmm_output_and_expansion_dtype(fmm_kernel, strengths))
         kernel_extra_kwargs, source_extra_kwargs = (
                 self.get_fmm_expansion_wrangler_extra_kwargs(
-                    queue, out_kernels, geo_data.tree().user_source_ids,
+                    actx, out_kernels, geo_data.tree().user_source_ids,
                     insn.kernel_arguments, evaluate))
 
         wrangler = self.expansion_wrangler_code_container(
                 fmm_kernel, out_kernels).get_wrangler(
-                    queue,
+                    actx.queue,
                     geo_data.tree(),
                     output_and_expansion_dtype,
                     self.fmm_level_to_order,
@@ -252,25 +266,32 @@ class UnregularizedLayerPotentialSource(LayerPotentialSourceBase):
 
         from boxtree.fmm import drive_fmm
         all_potentials_on_every_tgt = drive_fmm(
-                geo_data.traversal(), wrangler, strengths, timing_data=None)
+                geo_data.traversal(), wrangler, flat_strengths,
+                timing_data=None)
 
         # {{{ postprocess fmm
 
-        result = []
+        results = []
 
         for o in insn.outputs:
             target_index = target_name_to_index[o.target_name]
             target_slice = slice(*geo_data.target_info().target_discr_starts[
                     target_index:target_index+2])
+            target_discr = targets[target_index]
+
+            result = all_potentials_on_every_tgt[o.kernel_index][target_slice]
 
-            result.append(
-                    (o.name,
-                        all_potentials_on_every_tgt[o.kernel_index][target_slice]))
+            from meshmode.discretization import Discretization
+            if isinstance(target_discr, Discretization):
+                from meshmode.dof_array import unflatten
+                result = unflatten(actx, target_discr, result)
+
+            results.append((o.name, result))
 
         # }}}
 
         timing_data = {}
-        return result, timing_data
+        return results, timing_data
 
     # }}}
 
@@ -279,13 +300,17 @@ class UnregularizedLayerPotentialSource(LayerPotentialSourceBase):
 
 # {{{ fmm tools
 
-class _FMMGeometryCodeContainer(object):
+class _FMMGeometryDataCodeContainer(object):
 
-    def __init__(self, cl_context, ambient_dim, debug):
-        self.cl_context = cl_context
+    def __init__(self, actx, ambient_dim, debug):
+        self.array_context = actx
         self.ambient_dim = ambient_dim
         self.debug = debug
 
+    @property
+    def cl_context(self):
+        return self.array_context.context
+
     @memoize_method
     def copy_targets_kernel(self):
         knl = lp.make_kernel(
@@ -343,11 +368,15 @@ class _FMMGeometryData(object):
 
     @property
     def cl_context(self):
-        return self.lpot_source.cl_context
+        return self.code_getter.cl_context
+
+    @property
+    def array_context(self):
+        return self.code_getter.array_context
 
     @property
     def coord_dtype(self):
-        return self.lpot_source.density_discr.nodes().dtype
+        return self.lpot_source.density_discr.real_dtype
 
     @property
     def ambient_dim(self):
@@ -373,25 +402,29 @@ class _FMMGeometryData(object):
         lpot_src = self.lpot_source
         target_info = self.target_info()
 
-        with cl.CommandQueue(self.cl_context) as queue:
-            nsources = lpot_src.density_discr.nnodes
-            nparticles = nsources + target_info.ntargets
+        queue = self.array_context.queue
+
+        nsources = lpot_src.density_discr.ndofs
+        nparticles = nsources + target_info.ntargets
 
-            refine_weights = cl.array.zeros(queue, nparticles, dtype=np.int32)
-            refine_weights[:nsources] = 1
-            refine_weights.finish()
+        refine_weights = cl.array.zeros(queue, nparticles, dtype=np.int32)
+        refine_weights[:nsources] = 1
+        refine_weights.finish()
 
-            MAX_LEAF_REFINE_WEIGHT = 32  # noqa
+        MAX_LEAF_REFINE_WEIGHT = 32  # noqa
 
-            tree, _ = code_getter.build_tree(queue,
-                    particles=lpot_src.density_discr.nodes(),
-                    targets=target_info.targets,
-                    max_leaf_refine_weight=MAX_LEAF_REFINE_WEIGHT,
-                    refine_weights=refine_weights,
-                    debug=self.debug,
-                    kind="adaptive")
+        from meshmode.dof_array import thaw, flatten
 
-            return tree
+        tree, _ = code_getter.build_tree(queue,
+                particles=flatten(
+                    thaw(self.array_context, lpot_src.density_discr.nodes())),
+                targets=target_info.targets,
+                max_leaf_refine_weight=MAX_LEAF_REFINE_WEIGHT,
+                refine_weights=refine_weights,
+                debug=self.debug,
+                kind="adaptive")
+
+        return tree
 
     @memoize_method
     def target_info(self):
@@ -399,31 +432,31 @@ class _FMMGeometryData(object):
         lpot_src = self.lpot_source
         target_discrs = self.target_discrs
 
-        with cl.CommandQueue(self.cl_context) as queue:
-            ntargets = 0
-            target_discr_starts = []
-
-            for target_discr in target_discrs:
-                target_discr_starts.append(ntargets)
-                ntargets += target_discr.nnodes
+        ntargets = 0
+        target_discr_starts = []
 
+        for target_discr in target_discrs:
             target_discr_starts.append(ntargets)
-
-            targets = cl.array.empty(
-                    self.cl_context,
-                    (lpot_src.ambient_dim, ntargets),
-                    self.coord_dtype)
-
-            for start, target_discr in zip(target_discr_starts, target_discrs):
-                code_getter.copy_targets_kernel()(
-                        queue,
-                        targets=targets[:, start:start+target_discr.nnodes],
-                        points=target_discr.nodes())
-
-            return _TargetInfo(
-                    targets=targets,
-                    target_discr_starts=target_discr_starts,
-                    ntargets=ntargets).with_queue(None)
+            ntargets += target_discr.ndofs
+
+        target_discr_starts.append(ntargets)
+
+        targets = self.array_context.empty(
+                (lpot_src.ambient_dim, ntargets),
+                self.coord_dtype)
+
+        from pytential.utils import flatten_if_needed
+        for start, target_discr in zip(target_discr_starts, target_discrs):
+            code_getter.copy_targets_kernel()(
+                    self.array_context.queue,
+                    targets=targets[:, start:start+target_discr.ndofs],
+                    points=flatten_if_needed(
+                        self.array_context, target_discr.nodes()))
+
+        return _TargetInfo(
+                targets=targets,
+                target_discr_starts=target_discr_starts,
+                ntargets=ntargets).with_queue(None)
 
 # }}}
 
diff --git a/pytential/utils.py b/pytential/utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..f1e9f0d109069d54b34090b1b0103b012693e454
--- /dev/null
+++ b/pytential/utils.py
@@ -0,0 +1,64 @@
+__copyright__ = """
+Copyright (C) 2020 Matt Wala
+"""
+
+__license__ = """
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+"""
+
+import numpy as np
+from meshmode.array_context import PyOpenCLArrayContext
+
+
+def flatten_if_needed(actx: PyOpenCLArrayContext, ary: np.ndarray):
+    from pytools.obj_array import obj_array_vectorize_n_args
+    from meshmode.dof_array import DOFArray, thaw, flatten
+
+    if (isinstance(ary, np.ndarray)
+            and ary.dtype.char == "O"
+            and not isinstance(ary, DOFArray)):
+        return obj_array_vectorize_n_args(flatten_if_needed, actx, ary)
+
+    if not isinstance(ary, DOFArray):
+        return ary
+
+    if ary.array_context is None:
+        ary = thaw(actx, ary)
+
+    return flatten(ary)
+
+
+def unflatten_from_numpy(actx, discr, ary):
+    from pytools.obj_array import obj_array_vectorize
+    from meshmode.dof_array import unflatten
+
+    ary = obj_array_vectorize(actx.from_numpy, ary)
+    if discr is None:
+        return ary
+    else:
+        return unflatten(actx, discr, ary)
+
+
+def flatten_to_numpy(actx, ary):
+    result = flatten_if_needed(actx, ary)
+
+    from pytools.obj_array import obj_array_vectorize
+    return obj_array_vectorize(actx.to_numpy, result)
+
+# vim: foldmethod=marker
diff --git a/pytential/version.py b/pytential/version.py
index 0849c083e068879d34fe1a7d9cd82895ce3287a2..aafdf685fbe56dae62b2af7d2179f547ccc577e3 100644
--- a/pytential/version.py
+++ b/pytential/version.py
@@ -43,7 +43,7 @@ else:
 # }}}
 
 
-VERSION = (2020, 1)
+VERSION = (2020, 2)
 VERSION_TEXT = ".".join(str(i) for i in VERSION)
 
 PYTENTIAL_KERNEL_VERSION = (VERSION, _git_rev, 0)
diff --git a/requirements.txt b/requirements.txt
index 625deb28d7e5a04253bc3ffb5c12b2ba263362b5..09d1ec0d7ac443491cbb98c0862231f9d73ff41d 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -6,7 +6,7 @@ git+https://github.com/inducer/modepy
 git+https://github.com/inducer/pyopencl
 git+https://github.com/inducer/islpy
 git+https://github.com/inducer/loopy
-git+https://gitlab.tiker.net/inducer/boxtree
+git+https://github.com/inducer/boxtree
 git+https://github.com/inducer/meshmode
-git+https://gitlab.tiker.net/inducer/sumpy
-git+https://gitlab.tiker.net/inducer/pyfmmlib
+git+https://github.com/inducer/sumpy
+git+https://github.com/inducer/pyfmmlib
diff --git a/setup.cfg b/setup.cfg
index a353f3f7242d42f689d5721b8f4a104aaf3e4e6b..444d02d0d654bdb034caed50f35f429ee4dd58cf 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -1,4 +1,3 @@
-
 [flake8]
 ignore = E126,E127,E128,E123,E226,E241,E242,E265,E402,W503,N803,N806,N802,D102,D103
 max-line-length=85
diff --git a/test/test_cost_model.py b/test/test_cost_model.py
index 2ebac61c3113b962172ce2e27825c64059402ea1..42e03b2c4d1a1f4c284ee4b7f83ea094007a25da 100644
--- a/test/test_cost_model.py
+++ b/test/test_cost_model.py
@@ -26,6 +26,7 @@ import numpy as np
 import numpy.linalg as la  # noqa
 
 from boxtree.tools import ConstantOneExpansionWrangler
+from meshmode.array_context import PyOpenCLArrayContext
 import pyopencl as cl
 import pyopencl.clmath  # noqa
 import pytest
@@ -55,7 +56,7 @@ DEFAULT_LPOT_KWARGS = {
         }
 
 
-def get_lpot_source(queue, dim):
+def get_lpot_source(actx: PyOpenCLArrayContext, dim):
     from meshmode.discretization import Discretization
     from meshmode.discretization.poly_element import (
             InterpolatoryQuadratureSimplexGroupFactory)
@@ -72,7 +73,7 @@ def get_lpot_source(queue, dim):
         raise ValueError("unsupported dimension: %d" % dim)
 
     pre_density_discr = Discretization(
-            queue.context, mesh,
+            actx, mesh,
             InterpolatoryQuadratureSimplexGroupFactory(target_order))
 
     lpot_kwargs = DEFAULT_LPOT_KWARGS.copy()
@@ -91,9 +92,10 @@ def get_lpot_source(queue, dim):
     return lpot_source
 
 
-def get_density(queue, discr):
-    nodes = discr.nodes().with_queue(queue)
-    return cl.clmath.sin(10 * nodes[0])
+def get_density(actx, discr):
+    from meshmode.dof_array import thaw
+    nodes = thaw(actx, discr.nodes())
+    return actx.np.sin(10 * nodes[0])
 
 # }}}
 
@@ -108,13 +110,14 @@ def test_timing_data_gathering(ctx_factory):
     cl_ctx = ctx_factory()
     queue = cl.CommandQueue(cl_ctx,
             properties=cl.command_queue_properties.PROFILING_ENABLE)
+    actx = PyOpenCLArrayContext(queue)
 
-    lpot_source = get_lpot_source(queue, 2)
+    lpot_source = get_lpot_source(actx, 2)
     places = GeometryCollection(lpot_source)
 
     dofdesc = places.auto_source.to_stage1()
     density_discr = places.get_discretization(dofdesc.geometry)
-    sigma = get_density(queue, density_discr)
+    sigma = get_density(actx, density_discr)
 
     sigma_sym = sym.var("sigma")
     k_sym = LaplaceKernel(lpot_source.ambient_dim)
@@ -123,7 +126,7 @@ def test_timing_data_gathering(ctx_factory):
     op_S = bind(places, sym_op_S)
 
     timing_data = {}
-    op_S.eval(queue, dict(sigma=sigma), timing_data=timing_data)
+    op_S.eval(dict(sigma=sigma), timing_data=timing_data, array_context=actx)
     assert timing_data
     print(timing_data)
 
@@ -140,28 +143,29 @@ def test_cost_model(ctx_factory, dim, use_target_specific_qbx):
     """Test that cost model gathering can execute successfully."""
     cl_ctx = ctx_factory()
     queue = cl.CommandQueue(cl_ctx)
+    actx = PyOpenCLArrayContext(queue)
 
-    lpot_source = get_lpot_source(queue, dim).copy(
+    lpot_source = get_lpot_source(actx, dim).copy(
             _use_target_specific_qbx=use_target_specific_qbx,
             cost_model=CostModel())
     places = GeometryCollection(lpot_source)
 
     density_discr = places.get_discretization(places.auto_source.geometry)
-    sigma = get_density(queue, density_discr)
+    sigma = get_density(actx, density_discr)
 
     sigma_sym = sym.var("sigma")
     k_sym = LaplaceKernel(lpot_source.ambient_dim)
 
     sym_op_S = sym.S(k_sym, sigma_sym, qbx_forced_limit=+1)
     op_S = bind(places, sym_op_S)
-    cost_S = op_S.get_modeled_cost(queue, sigma=sigma)
+    cost_S = op_S.get_modeled_cost(actx, sigma=sigma)
     assert len(cost_S) == 1
 
     sym_op_S_plus_D = (
             sym.S(k_sym, sigma_sym, qbx_forced_limit=+1)
             + sym.D(k_sym, sigma_sym, qbx_forced_limit="avg"))
     op_S_plus_D = bind(places, sym_op_S_plus_D)
-    cost_S_plus_D = op_S_plus_D.get_modeled_cost(queue, sigma=sigma)
+    cost_S_plus_D = op_S_plus_D.get_modeled_cost(actx, sigma=sigma)
     assert len(cost_S_plus_D) == 2
 
 # }}}
@@ -173,17 +177,18 @@ def test_cost_model_metadata_gathering(ctx_factory):
     """Test that the cost model correctly gathers metadata."""
     cl_ctx = ctx_factory()
     queue = cl.CommandQueue(cl_ctx)
+    actx = PyOpenCLArrayContext(queue)
 
     from sumpy.expansion.level_to_order import SimpleExpansionOrderFinder
 
     fmm_level_to_order = SimpleExpansionOrderFinder(tol=1e-5)
 
-    lpot_source = get_lpot_source(queue, 2).copy(
+    lpot_source = get_lpot_source(actx, 2).copy(
             fmm_level_to_order=fmm_level_to_order)
     places = GeometryCollection(lpot_source)
 
     density_discr = places.get_discretization(places.auto_source.geometry)
-    sigma = get_density(queue, density_discr)
+    sigma = get_density(actx, density_discr)
 
     sigma_sym = sym.var("sigma")
     k_sym = HelmholtzKernel(2, "k")
@@ -192,7 +197,7 @@ def test_cost_model_metadata_gathering(ctx_factory):
     sym_op_S = sym.S(k_sym, sigma_sym, qbx_forced_limit=+1, k=sym.var("k"))
     op_S = bind(places, sym_op_S)
 
-    cost_S = one(op_S.get_modeled_cost(queue, sigma=sigma, k=k).values())
+    cost_S = one(op_S.get_modeled_cost(actx, sigma=sigma, k=k).values())
 
     geo_data = lpot_source.qbx_fmm_geometry_data(
             places,
@@ -437,12 +442,13 @@ def test_cost_model_correctness(ctx_factory, dim, off_surface,
     """Check that computed cost matches that of a constant-one FMM."""
     cl_ctx = ctx_factory()
     queue = cl.CommandQueue(cl_ctx)
+    actx = PyOpenCLArrayContext(queue)
 
     cost_model = (
             CostModel(
                 translation_cost_model_factory=OpCountingTranslationCostModel))
 
-    lpot_source = get_lpot_source(queue, dim).copy(
+    lpot_source = get_lpot_source(actx, dim).copy(
             cost_model=cost_model,
             _use_target_specific_qbx=use_target_specific_qbx)
 
@@ -470,10 +476,10 @@ def test_cost_model_correctness(ctx_factory, dim, off_surface,
     sym_op_S = sym.S(k_sym, sigma_sym, qbx_forced_limit=qbx_forced_limit)
 
     op_S = bind(places, sym_op_S)
-    sigma = get_density(queue, density_discr)
+    sigma = get_density(actx, density_discr)
 
     from pytools import one
-    cost_S = one(op_S.get_modeled_cost(queue, sigma=sigma).values())
+    cost_S = one(op_S.get_modeled_cost(actx, sigma=sigma).values())
 
     # Run FMM with ConstantOneWrangler. This can't be done with pytential's
     # high-level interface, so call the FMM driver directly.
@@ -487,15 +493,15 @@ def test_cost_model_correctness(ctx_factory, dim, off_surface,
 
     quad_stage2_density_discr = places.get_discretization(
             source_dd.geometry, sym.QBX_SOURCE_QUAD_STAGE2)
-    nnodes = quad_stage2_density_discr.nnodes
-    src_weights = np.ones(nnodes)
+    ndofs = quad_stage2_density_discr.ndofs
+    src_weights = np.ones(ndofs)
 
     timing_data = {}
     potential = drive_fmm(wrangler, src_weights, timing_data,
             traversal=wrangler.trav)[0][geo_data.ncenters:]
 
     # Check constant one wrangler for correctness.
-    assert (potential == nnodes).all()
+    assert (potential == ndofs).all()
 
     modeled_time = cost_S.get_predicted_times(merge_close_lists=True)
 
@@ -538,13 +544,14 @@ def test_cost_model_order_varying_by_level(ctx_factory):
 
     cl_ctx = ctx_factory()
     queue = cl.CommandQueue(cl_ctx)
+    actx = PyOpenCLArrayContext(queue)
 
     # {{{ constant level to order
 
     def level_to_order_constant(kernel, kernel_args, tree, level):
         return 1
 
-    lpot_source = get_lpot_source(queue, 2).copy(
+    lpot_source = get_lpot_source(actx, 2).copy(
             cost_model=CostModel(
                 calibration_params=CONSTANT_ONE_PARAMS),
             fmm_level_to_order=level_to_order_constant)
@@ -556,11 +563,11 @@ def test_cost_model_order_varying_by_level(ctx_factory):
     k_sym = LaplaceKernel(2)
     sym_op = sym.S(k_sym, sigma_sym, qbx_forced_limit=+1)
 
-    sigma = get_density(queue, density_discr)
+    sigma = get_density(actx, density_discr)
 
     cost_constant = one(
             bind(places, sym_op)
-            .get_modeled_cost(queue, sigma=sigma).values())
+            .get_modeled_cost(actx, sigma=sigma).values())
 
     # }}}
 
diff --git a/test/test_global_qbx.py b/test/test_global_qbx.py
index 9ab5b6926e2f88767bd0b01594f1a05c686174e9..6b8047b36b4197f9d5b840ee2009f2a16587d75d 100644
--- a/test/test_global_qbx.py
+++ b/test/test_global_qbx.py
@@ -37,6 +37,7 @@ from pyopencl.tools import pytest_generate_tests_for_pyopencl \
 from pytential.qbx import QBXLayerPotentialSource
 
 from functools import partial
+from meshmode.array_context import PyOpenCLArrayContext
 from meshmode.mesh.generation import (  # noqa
         ellipse, cloverleaf, starfish, drop, n_gon, qbx_peanut,
         make_curve_mesh, generate_icosphere, generate_torus)
@@ -55,6 +56,22 @@ RNG_SEED = 10
 FAR_TARGET_DIST_FROM_SOURCE = 10
 
 
+# {{{ utils
+
+def dof_array_to_numpy(actx, ary):
+    """Converts DOFArrays (or object arrays of DOFArrays) to NumPy arrays.
+    Object arrays get turned into multidimensional arrays.
+    """
+    from pytools.obj_array import obj_array_vectorize
+    from meshmode.dof_array import flatten
+    arr = obj_array_vectorize(actx.to_numpy, flatten(ary))
+    if arr.dtype.char == "O":
+        arr = np.array(list(arr))
+    return arr
+
+# }}}
+
+
 # {{{ source refinement checker
 
 class ElementInfo(RecordWithoutPickling):
@@ -76,22 +93,23 @@ def iter_elements(discr):
             yield ElementInfo(
                 element_nr=element_nr,
                 discr_slice=slice(discr_nodes_idx,
-                   discr_nodes_idx + discr_group.nunit_nodes))
+                    discr_nodes_idx + discr_group.nunit_dofs))
 
-            discr_nodes_idx += discr_group.nunit_nodes
+            discr_nodes_idx += discr_group.nunit_dofs
 
 
 def run_source_refinement_test(ctx_factory, mesh, order,
         helmholtz_k=None, visualize=False):
     cl_ctx = ctx_factory()
     queue = cl.CommandQueue(cl_ctx)
+    actx = PyOpenCLArrayContext(queue)
 
     # {{{ initial geometry
 
     from meshmode.discretization import Discretization
     from meshmode.discretization.poly_element import (
             InterpolatoryQuadratureSimplexGroupFactory)
-    discr = Discretization(cl_ctx, mesh,
+    discr = Discretization(actx, mesh,
             InterpolatoryQuadratureSimplexGroupFactory(order))
 
     lpot_source = QBXLayerPotentialSource(discr,
@@ -107,7 +125,7 @@ def run_source_refinement_test(ctx_factory, mesh, order,
     expansion_disturbance_tolerance = 0.025
 
     from pytential.qbx.refinement import refine_geometry_collection
-    places = refine_geometry_collection(queue, places,
+    places = refine_geometry_collection(places,
             kernel_length_scale=kernel_length_scale,
             expansion_disturbance_tolerance=expansion_disturbance_tolerance,
             visualize=visualize)
@@ -116,27 +134,34 @@ def run_source_refinement_test(ctx_factory, mesh, order,
 
     dd = places.auto_source
     stage1_density_discr = places.get_discretization(dd.geometry)
-    stage1_density_nodes = stage1_density_discr.nodes().get(queue)
+    from meshmode.dof_array import thaw
+
+    stage1_density_nodes = dof_array_to_numpy(actx,
+            thaw(actx, stage1_density_discr.nodes()))
 
     quad_stage2_density_discr = places.get_discretization(
             dd.geometry, sym.QBX_SOURCE_QUAD_STAGE2)
-    quad_stage2_density_nodes = quad_stage2_density_discr.nodes().get(queue)
-
-    int_centers = bind(places,
-        sym.expansion_centers(lpot_source.ambient_dim, -1))(queue)
-    int_centers = np.array([axis.get(queue) for axis in int_centers])
-    ext_centers = bind(places,
-        sym.expansion_centers(lpot_source.ambient_dim, +1))(queue)
-    ext_centers = np.array([axis.get(queue) for axis in ext_centers])
-
-    expansion_radii = bind(places,
-        sym.expansion_radii(lpot_source.ambient_dim))(queue).get()
+    quad_stage2_density_nodes = dof_array_to_numpy(actx,
+            thaw(actx, quad_stage2_density_discr.nodes()))
+
+    int_centers = dof_array_to_numpy(actx,
+            bind(places,
+                sym.expansion_centers(lpot_source.ambient_dim, -1))(actx))
+    ext_centers = dof_array_to_numpy(actx,
+            bind(places,
+                sym.expansion_centers(lpot_source.ambient_dim, +1))(actx))
+    expansion_radii = dof_array_to_numpy(actx,
+            bind(places, sym.expansion_radii(lpot_source.ambient_dim))(actx))
 
     dd = dd.copy(granularity=sym.GRANULARITY_ELEMENT)
-    source_danger_zone_radii = bind(places, sym._source_danger_zone_radii(
-        lpot_source.ambient_dim, dofdesc=dd.to_stage2()))(queue).get()
-    quad_res = bind(places, sym._quad_resolution(
-        lpot_source.ambient_dim, dofdesc=dd))(queue)
+    source_danger_zone_radii = dof_array_to_numpy(actx,
+            bind(places,
+                sym._source_danger_zone_radii(
+                    lpot_source.ambient_dim, dofdesc=dd.to_stage2()))(actx))
+    quad_res = dof_array_to_numpy(actx,
+            bind(places,
+                sym._quad_resolution(
+                    lpot_source.ambient_dim, dofdesc=dd))(actx))
 
     # {{{ check if satisfying criteria
 
@@ -236,6 +261,7 @@ def test_target_association(ctx_factory, curve_name, curve_f, nelements,
         visualize=False):
     cl_ctx = ctx_factory()
     queue = cl.CommandQueue(cl_ctx)
+    actx = PyOpenCLArrayContext(queue)
 
     # {{{ generate lpot source
 
@@ -248,7 +274,7 @@ def test_target_association(ctx_factory, curve_name, curve_f, nelements,
     from meshmode.discretization.poly_element import \
             InterpolatoryQuadratureSimplexGroupFactory
     factory = InterpolatoryQuadratureSimplexGroupFactory(order)
-    discr = Discretization(cl_ctx, mesh, factory)
+    discr = Discretization(actx, mesh, factory)
 
     lpot_source = QBXLayerPotentialSource(discr,
             qbx_order=order,  # not used in target association
@@ -263,20 +289,25 @@ def test_target_association(ctx_factory, curve_name, curve_f, nelements,
     rng = PhiloxGenerator(cl_ctx, seed=RNG_SEED)
 
     dd = places.auto_source.to_stage1()
-    centers = bind(places, sym.interleaved_expansion_centers(
-        lpot_source.ambient_dim, dofdesc=dd))(queue)
-    centers = np.array([ax.get(queue) for ax in centers])
-
-    tunnel_radius = bind(places, sym._close_target_tunnel_radii(
-        lpot_source.ambient_dim, dofdesc=dd))(queue)
+    centers = dof_array_to_numpy(actx,
+            bind(places, sym.interleaved_expansion_centers(
+                lpot_source.ambient_dim, dofdesc=dd))(actx))
 
     density_discr = places.get_discretization(dd.geometry)
-    noise = rng.uniform(queue, density_discr.nnodes, dtype=np.float, a=0.01, b=1.0)
+
+    noise = actx.to_numpy(
+            rng.uniform(queue, density_discr.ndofs, dtype=np.float, a=0.01, b=1.0))
+
+    tunnel_radius = dof_array_to_numpy(actx,
+            bind(places, sym._close_target_tunnel_radii(
+                lpot_source.ambient_dim, dofdesc=dd))(actx))
 
     def targets_from_sources(sign, dist, dim=2):
-        nodes = bind(places, sym.nodes(dim, dofdesc=dd))(queue)
-        normals = bind(places, sym.normal(dim, dofdesc=dd))(queue)
-        return (nodes + normals * sign * dist).as_vector(np.object)
+        nodes = dof_array_to_numpy(actx,
+                bind(places, sym.nodes(dim, dofdesc=dd))(actx).as_vector(np.object))
+        normals = dof_array_to_numpy(actx,
+                bind(places, sym.normal(dim, dofdesc=dd))(actx).as_vector(np.object))
+        return actx.from_numpy(nodes + normals * sign * dist)
 
     from pytential.target import PointsTarget
     int_targets = PointsTarget(targets_from_sources(-1, noise * tunnel_radius))
@@ -297,7 +328,7 @@ def test_target_association(ctx_factory, curve_name, curve_f, nelements,
         (far_targets, 0),
     )
 
-    sizes = np.cumsum([discr.nnodes for discr, _ in target_discrs])
+    sizes = np.cumsum([discr.ndofs for discr, _ in target_discrs])
 
     (surf_int_slice,
      surf_ext_slice,
@@ -315,23 +346,24 @@ def test_target_association(ctx_factory, curve_name, curve_f, nelements,
 
     from pytential.qbx.utils import TreeCodeContainer
     code_container = TargetAssociationCodeContainer(
-            cl_ctx, TreeCodeContainer(cl_ctx))
+            actx, TreeCodeContainer(actx))
 
     target_assoc = (associate_targets_to_qbx_centers(
             places,
             places.auto_source,
-            code_container.get_wrangler(queue),
+            code_container.get_wrangler(actx),
             target_discrs,
             target_association_tolerance=1e-10)
         .get(queue=queue))
 
-    expansion_radii = bind(places, sym.expansion_radii(
-        lpot_source.ambient_dim,
-        granularity=sym.GRANULARITY_CENTER))(queue).get()
-    surf_targets = np.array(
-            [axis.get(queue) for axis in density_discr.nodes()])
-    int_targets = np.array([axis.get(queue) for axis in int_targets.nodes()])
-    ext_targets = np.array([axis.get(queue) for axis in ext_targets.nodes()])
+    expansion_radii = dof_array_to_numpy(actx,
+            bind(places, sym.expansion_radii(
+                lpot_source.ambient_dim,
+                granularity=sym.GRANULARITY_CENTER))(actx))
+    from meshmode.dof_array import thaw
+    surf_targets = dof_array_to_numpy(actx, thaw(actx, density_discr.nodes()))
+    int_targets = actx.to_numpy(int_targets.nodes())
+    ext_targets = actx.to_numpy(ext_targets.nodes())
 
     def visualize_curve_and_assoc():
         import matplotlib.pyplot as plt
@@ -416,6 +448,7 @@ def test_target_association(ctx_factory, curve_name, curve_f, nelements,
 def test_target_association_failure(ctx_factory):
     cl_ctx = ctx_factory()
     queue = cl.CommandQueue(cl_ctx)
+    actx = PyOpenCLArrayContext(queue)
 
     # {{{ generate circle
 
@@ -430,7 +463,7 @@ def test_target_association_failure(ctx_factory):
     from meshmode.discretization.poly_element import \
             InterpolatoryQuadratureSimplexGroupFactory
     factory = InterpolatoryQuadratureSimplexGroupFactory(order)
-    discr = Discretization(cl_ctx, mesh, factory)
+    discr = Discretization(actx, mesh, factory)
     lpot_source = QBXLayerPotentialSource(discr,
             qbx_order=order,  # not used in target association
             fine_order=order)
@@ -444,8 +477,9 @@ def test_target_association_failure(ctx_factory):
         2j * np.pi * np.linspace(0, 1, 500, endpoint=False))
     from pytential.target import PointsTarget
     close_circle_target = (
-        PointsTarget(cl.array.to_device(
-            queue, np.array([close_circle.real, close_circle.imag]))))
+            PointsTarget(
+                actx.from_numpy(
+                    np.array([close_circle.real, close_circle.imag]))))
 
     targets = (
         (close_circle_target, 0),
@@ -458,13 +492,13 @@ def test_target_association_failure(ctx_factory):
     from pytential.qbx.utils import TreeCodeContainer
 
     code_container = TargetAssociationCodeContainer(
-            cl_ctx, TreeCodeContainer(cl_ctx))
+            actx, TreeCodeContainer(actx))
 
     with pytest.raises(QBXTargetAssociationFailedException):
         associate_targets_to_qbx_centers(
             places,
             places.auto_source,
-            code_container.get_wrangler(queue),
+            code_container.get_wrangler(actx),
             targets,
             target_association_tolerance=1e-10)
 
diff --git a/test/test_layer_pot.py b/test/test_layer_pot.py
index c7eeb200c97195a51881a28fcb4092d94d257599..9fa844355b2699baa26511b3efbf1e4a1d1de45b 100644
--- a/test/test_layer_pot.py
+++ b/test/test_layer_pot.py
@@ -26,12 +26,12 @@ THE SOFTWARE.
 import numpy as np
 import numpy.linalg as la  # noqa
 import pyopencl as cl
-import pyopencl.clmath  # noqa
 import pytest
 from pyopencl.tools import (  # noqa
         pytest_generate_tests_for_pyopencl as pytest_generate_tests)
 
 from functools import partial
+from meshmode.array_context import PyOpenCLArrayContext
 from meshmode.mesh.generation import (  # noqa
         ellipse, cloverleaf, starfish, drop, n_gon, qbx_peanut, WobblyCircle,
         make_curve_mesh, NArmedStarfish)
@@ -51,6 +51,7 @@ circle = partial(ellipse, 1)
 def test_geometry(ctx_factory):
     cl_ctx = ctx_factory()
     queue = cl.CommandQueue(cl_ctx)
+    actx = PyOpenCLArrayContext(queue)
 
     nelements = 30
     order = 5
@@ -63,13 +64,13 @@ def test_geometry(ctx_factory):
     from meshmode.discretization.poly_element import \
             InterpolatoryQuadratureSimplexGroupFactory
 
-    discr = Discretization(cl_ctx, mesh,
+    discr = Discretization(actx, mesh,
             InterpolatoryQuadratureSimplexGroupFactory(order))
 
     import pytential.symbolic.primitives as prim
     area_sym = prim.integral(2, 1, 1)
 
-    area = bind(discr, area_sym)(queue)
+    area = bind(discr, area_sym)(actx)
 
     err = abs(area-2*np.pi)
     print(err)
@@ -86,6 +87,7 @@ def test_off_surface_eval(ctx_factory, use_fmm, visualize=False):
 
     cl_ctx = ctx_factory()
     queue = cl.CommandQueue(cl_ctx)
+    actx = PyOpenCLArrayContext(queue)
 
     # prevent cache 'splosion
     from sympy.core.cache import clear_cache
@@ -109,7 +111,7 @@ def test_off_surface_eval(ctx_factory, use_fmm, visualize=False):
             InterpolatoryQuadratureSimplexGroupFactory
 
     pre_density_discr = Discretization(
-            cl_ctx, mesh, InterpolatoryQuadratureSimplexGroupFactory(target_order))
+            actx, mesh, InterpolatoryQuadratureSimplexGroupFactory(target_order))
     qbx = QBXLayerPotentialSource(
             pre_density_discr,
             4*target_order,
@@ -127,16 +129,16 @@ def test_off_surface_eval(ctx_factory, use_fmm, visualize=False):
     from sumpy.kernel import LaplaceKernel
     op = sym.D(LaplaceKernel(2), sym.var("sigma"), qbx_forced_limit=-2)
 
-    sigma = density_discr.zeros(queue) + 1
-    fld_in_vol = bind(places, op)(queue, sigma=sigma)
+    sigma = density_discr.zeros(actx) + 1
+    fld_in_vol = bind(places, op)(actx, sigma=sigma)
     fld_in_vol_exact = -1
 
-    err = cl.clmath.fabs(fld_in_vol - fld_in_vol_exact)
-    linf_err = cl.array.max(err).get()
+    err = actx.np.fabs(fld_in_vol - fld_in_vol_exact)
+    linf_err = actx.to_numpy(err).max()
     print("l_inf error:", linf_err)
 
     if visualize:
-        fplot.show_scalar_in_matplotlib(fld_in_vol.get())
+        fplot.show_scalar_in_matplotlib(actx.to_numpy(fld_in_vol))
         import matplotlib.pyplot as pt
         pt.colorbar()
         pt.show()
@@ -153,6 +155,7 @@ def test_off_surface_eval_vs_direct(ctx_factory,  do_plot=False):
 
     cl_ctx = ctx_factory()
     queue = cl.CommandQueue(cl_ctx)
+    actx = PyOpenCLArrayContext(queue)
 
     # prevent cache 'splosion
     from sympy.core.cache import clear_cache
@@ -172,7 +175,7 @@ def test_off_surface_eval_vs_direct(ctx_factory,  do_plot=False):
             InterpolatoryQuadratureSimplexGroupFactory
 
     pre_density_discr = Discretization(
-            cl_ctx, mesh, InterpolatoryQuadratureSimplexGroupFactory(target_order))
+            actx, mesh, InterpolatoryQuadratureSimplexGroupFactory(target_order))
     direct_qbx = QBXLayerPotentialSource(
             pre_density_discr, 4*target_order, qbx_order,
             fmm_order=False,
@@ -201,31 +204,31 @@ def test_off_surface_eval_vs_direct(ctx_factory,  do_plot=False):
     from pytential.qbx import QBXTargetAssociationFailedException
     op = sym.D(LaplaceKernel(2), sym.var("sigma"), qbx_forced_limit=None)
     try:
-        direct_sigma = direct_density_discr.zeros(queue) + 1
+        direct_sigma = direct_density_discr.zeros(actx) + 1
         direct_fld_in_vol = bind(places, op,
                 auto_where=("direct_qbx", "target"))(
-                        queue, sigma=direct_sigma)
+                        actx, sigma=direct_sigma)
     except QBXTargetAssociationFailedException as e:
-        fplot.show_scalar_in_matplotlib(e.failed_target_flags.get(queue))
+        fplot.show_scalar_in_matplotlib(
+            actx.to_numpy(actx.thaw(e.failed_target_flags)))
         import matplotlib.pyplot as pt
         pt.show()
         raise
 
-    fmm_sigma = fmm_density_discr.zeros(queue) + 1
+    fmm_sigma = fmm_density_discr.zeros(actx) + 1
     fmm_fld_in_vol = bind(places, op,
             auto_where=("fmm_qbx", "target"))(
-                    queue, sigma=fmm_sigma)
+                    actx, sigma=fmm_sigma)
 
-    err = cl.clmath.fabs(fmm_fld_in_vol - direct_fld_in_vol)
-
-    linf_err = cl.array.max(err).get()
+    err = actx.np.fabs(fmm_fld_in_vol - direct_fld_in_vol)
+    linf_err = actx.to_numpy(err).max()
     print("l_inf error:", linf_err)
 
     if do_plot:
         #fplot.show_scalar_in_mayavi(0.1*.get(queue))
         fplot.write_vtk_file("potential.vts", [
-            ("fmm_fld_in_vol", fmm_fld_in_vol.get(queue)),
-            ("direct_fld_in_vol", direct_fld_in_vol.get(queue))
+            ("fmm_fld_in_vol", actx.to_numpy(fmm_fld_in_vol)),
+            ("direct_fld_in_vol", actx.to_numpy(direct_fld_in_vol))
             ])
 
     assert linf_err < 1e-3
@@ -235,10 +238,10 @@ def test_off_surface_eval_vs_direct(ctx_factory,  do_plot=False):
 
 # {{{ unregularized tests
 
-
 def test_unregularized_with_ones_kernel(ctx_factory):
     cl_ctx = ctx_factory()
     queue = cl.CommandQueue(cl_ctx)
+    actx = PyOpenCLArrayContext(queue)
 
     nelements = 10
     order = 8
@@ -251,7 +254,7 @@ def test_unregularized_with_ones_kernel(ctx_factory):
     from meshmode.discretization.poly_element import \
             InterpolatoryQuadratureSimplexGroupFactory
 
-    discr = Discretization(cl_ctx, mesh,
+    discr = Discretization(actx, mesh,
             InterpolatoryQuadratureSimplexGroupFactory(order))
 
     from pytential.unregularized import UnregularizedLayerPotentialSource
@@ -268,24 +271,24 @@ def test_unregularized_with_ones_kernel(ctx_factory):
     sigma_sym = sym.var("sigma")
     op = sym.IntG(one_kernel_2d, sigma_sym, qbx_forced_limit=None)
 
-    sigma = cl.array.zeros(queue, discr.nnodes, dtype=float)
-    sigma.fill(1)
-    sigma.finish()
+    sigma = discr.zeros(actx) + 1
 
     result_self = bind(places, op,
             auto_where=places.auto_where)(
-                    queue, sigma=sigma)
+                    actx, sigma=sigma)
     result_nonself = bind(places, op,
             auto_where=(places.auto_source, "target_non_self"))(
-                    queue, sigma=sigma)
+                    actx, sigma=sigma)
 
-    assert np.allclose(result_self.get(), 2 * np.pi)
-    assert np.allclose(result_nonself.get(), 2 * np.pi)
+    from meshmode.dof_array import flatten
+    assert np.allclose(actx.to_numpy(flatten(result_self)), 2 * np.pi)
+    assert np.allclose(actx.to_numpy(result_nonself), 2 * np.pi)
 
 
 def test_unregularized_off_surface_fmm_vs_direct(ctx_factory):
     cl_ctx = ctx_factory()
     queue = cl.CommandQueue(cl_ctx)
+    actx = PyOpenCLArrayContext(queue)
 
     nelements = 300
     target_order = 8
@@ -303,7 +306,7 @@ def test_unregularized_off_surface_fmm_vs_direct(ctx_factory):
             InterpolatoryQuadratureSimplexGroupFactory
 
     density_discr = Discretization(
-            cl_ctx, mesh, InterpolatoryQuadratureSimplexGroupFactory(target_order))
+            actx, mesh, InterpolatoryQuadratureSimplexGroupFactory(target_order))
     direct = UnregularizedLayerPotentialSource(
             density_discr,
             fmm_order=False,
@@ -311,7 +314,7 @@ def test_unregularized_off_surface_fmm_vs_direct(ctx_factory):
     fmm = direct.copy(
             fmm_level_to_order=lambda kernel, kernel_args, tree, level: fmm_order)
 
-    sigma = density_discr.zeros(queue) + 1
+    sigma = density_discr.zeros(actx) + 1
 
     fplot = FieldPlotter(np.zeros(2), extent=5, npoints=100)
     from pytential.target import PointsTarget
@@ -332,13 +335,12 @@ def test_unregularized_off_surface_fmm_vs_direct(ctx_factory):
 
     direct_fld_in_vol = bind(places, op,
             auto_where=("unregularized_direct", "targets"))(
-                    queue, sigma=sigma)
+                    actx, sigma=sigma)
     fmm_fld_in_vol = bind(places, op,
-            auto_where=("unregularized_fmm", "targets"))(queue, sigma=sigma)
-
-    err = cl.clmath.fabs(fmm_fld_in_vol - direct_fld_in_vol)
+            auto_where=("unregularized_fmm", "targets"))(actx, sigma=sigma)
 
-    linf_err = cl.array.max(err).get()
+    err = actx.np.fabs(fmm_fld_in_vol - direct_fld_in_vol)
+    linf_err = actx.to_numpy(err).max()
     print("l_inf error:", linf_err)
 
     assert linf_err < 5e-3
@@ -356,6 +358,7 @@ def test_3d_jump_relations(ctx_factory, relation, visualize=False):
 
     cl_ctx = ctx_factory()
     queue = cl.CommandQueue(cl_ctx)
+    actx = PyOpenCLArrayContext(queue)
 
     if relation == "div_s":
         target_order = 3
@@ -377,7 +380,7 @@ def test_3d_jump_relations(ctx_factory, relation, visualize=False):
         from meshmode.discretization.poly_element import \
             InterpolatoryQuadratureSimplexGroupFactory
         pre_discr = Discretization(
-                cl_ctx, mesh,
+                actx, mesh,
                 InterpolatoryQuadratureSimplexGroupFactory(3))
 
         from pytential.qbx import QBXLayerPotentialSource
@@ -401,8 +404,9 @@ def test_3d_jump_relations(ctx_factory, relation, visualize=False):
                 sym.cse(sym.tangential_to_xyz(density_sym), "jxyz"),
                 qbx_forced_limit=qbx_forced_limit)))
 
-        x, y, z = density_discr.nodes().with_queue(queue)
-        m = cl.clmath
+        from meshmode.dof_array import thaw
+        x, y, z = thaw(actx, density_discr.nodes())
+        m = actx.np
 
         if relation == "nxcurls":
             density_sym = sym.make_sym_vector("density", 2)
@@ -417,7 +421,7 @@ def test_3d_jump_relations(ctx_factory, relation, visualize=False):
             # an XYZ function and project it.
             density = bind(places,
                     sym.xyz_to_tangential(sym.make_sym_vector("jxyz", 3)))(
-                            queue,
+                            actx,
                             jxyz=sym.make_obj_array([
                                 m.cos(0.5*x) * m.cos(0.5*y) * m.cos(0.5*z),
                                 m.sin(0.5*x) * m.cos(0.5*y) * m.sin(0.5*z),
@@ -448,12 +452,12 @@ def test_3d_jump_relations(ctx_factory, relation, visualize=False):
             raise ValueError("unexpected value of 'relation': %s" % relation)
 
         bound_jump_identity = bind(places, jump_identity_sym)
-        jump_identity = bound_jump_identity(queue, density=density)
+        jump_identity = bound_jump_identity(actx, density=density)
 
-        h_max = bind(places, sym.h_max(qbx.ambient_dim))(queue)
+        h_max = bind(places, sym.h_max(qbx.ambient_dim))(actx)
         err = (
-                norm(density_discr, queue, jump_identity, np.inf)
-                / norm(density_discr, queue, density, np.inf))
+                norm(density_discr, jump_identity, np.inf)
+                / norm(density_discr, density, np.inf))
         print("ERROR", h_max, err)
 
         eoc_rec.add_data_point(h_max, err)
@@ -461,15 +465,15 @@ def test_3d_jump_relations(ctx_factory, relation, visualize=False):
         # {{{ visualization
 
         if visualize and relation == "nxcurls":
-            nxcurlS_ext = bind(places, nxcurlS(+1))(queue, density=density)
-            nxcurlS_avg = bind(places, nxcurlS("avg"))(queue, density=density)
+            nxcurlS_ext = bind(places, nxcurlS(+1))(actx, density=density)
+            nxcurlS_avg = bind(places, nxcurlS("avg"))(actx, density=density)
             jtxyz = bind(places, sym.tangential_to_xyz(density_sym))(
-                    queue, density=density)
+                    actx, density=density)
 
             from meshmode.discretization.visualization import make_visualizer
-            bdry_vis = make_visualizer(queue, qbx.density_discr, target_order+3)
+            bdry_vis = make_visualizer(actx, qbx.density_discr, target_order+3)
 
-            bdry_normals = bind(places, sym.normal(3))(queue)\
+            bdry_normals = bind(places, sym.normal(3))(actx)\
                     .as_vector(dtype=object)
 
             bdry_vis.write_vtk_file("source-%s.vtu" % nel_factor, [
@@ -481,15 +485,15 @@ def test_3d_jump_relations(ctx_factory, relation, visualize=False):
 
         if visualize and relation == "sp":
             op = sym.Sp(knl, density_sym, qbx_forced_limit=+1)
-            sp_ext = bind(places, op)(queue, density=density)
+            sp_ext = bind(places, op)(actx, density=density)
             op = sym.Sp(knl, density_sym, qbx_forced_limit="avg")
-            sp_avg = bind(places, op)(queue, density=density)
+            sp_avg = bind(places, op)(actx, density=density)
 
             from meshmode.discretization.visualization import make_visualizer
-            bdry_vis = make_visualizer(queue, qbx.density_discr, target_order+3)
+            bdry_vis = make_visualizer(actx, qbx.density_discr, target_order+3)
 
             bdry_normals = bind(places,
-                    sym.normal(3))(queue).as_vector(dtype=object)
+                    sym.normal(3))(actx).as_vector(dtype=object)
 
             bdry_vis.write_vtk_file("source-%s.vtu" % nel_factor, [
                 ("density", density),
diff --git a/test/test_layer_pot_eigenvalues.py b/test/test_layer_pot_eigenvalues.py
index af2278ebeb402190cd921255404bd06ed35a0d26..b579791307cdbc55298618942bfe6977f276c1b5 100644
--- a/test/test_layer_pot_eigenvalues.py
+++ b/test/test_layer_pot_eigenvalues.py
@@ -26,12 +26,12 @@ THE SOFTWARE.
 import numpy as np
 import numpy.linalg as la  # noqa
 import pyopencl as cl
-import pyopencl.clmath  # noqa
 import pytest
 from pyopencl.tools import (  # noqa
         pytest_generate_tests_for_pyopencl as pytest_generate_tests)
 
 from functools import partial
+from meshmode.array_context import PyOpenCLArrayContext
 from meshmode.mesh.generation import (  # noqa
         ellipse, cloverleaf, starfish, drop, n_gon, qbx_peanut, WobblyCircle,
         make_curve_mesh, NArmedStarfish)
@@ -72,6 +72,7 @@ def test_ellipse_eigenvalues(ctx_factory, ellipse_aspect, mode_nr, qbx_order,
 
     cl_ctx = ctx_factory()
     queue = cl.CommandQueue(cl_ctx)
+    actx = PyOpenCLArrayContext(queue)
 
     target_order = 8
 
@@ -107,7 +108,7 @@ def test_ellipse_eigenvalues(ctx_factory, ellipse_aspect, mode_nr, qbx_order,
             fmm_order = False
 
         pre_density_discr = Discretization(
-                cl_ctx, mesh,
+                actx, mesh,
                 InterpolatoryQuadratureSimplexGroupFactory(target_order))
         qbx = QBXLayerPotentialSource(
                 pre_density_discr, 4*target_order,
@@ -117,18 +118,19 @@ def test_ellipse_eigenvalues(ctx_factory, ellipse_aspect, mode_nr, qbx_order,
         places = GeometryCollection(qbx)
 
         density_discr = places.get_discretization(places.auto_source.geometry)
-        nodes = density_discr.nodes().with_queue(queue)
+        from meshmode.dof_array import thaw, flatten
+        nodes = thaw(actx, density_discr.nodes())
 
         if visualize:
             # plot geometry, centers, normals
             centers = bind(places,
-                    sym.expansion_centers(qbx.ambient_dim, +1))(queue)
-            normal = bind(places,
-                    sym.normal(qbx.ambient_dim))(queue).as_vector(np.object)
+                    sym.expansion_centers(qbx.ambient_dim, +1))(actx)
+            normals = bind(places,
+                    sym.normal(qbx.ambient_dim))(actx).as_vector(np.object)
 
-            nodes_h = nodes.get()
-            centers_h = [centers[0].get(), centers[1].get()]
-            normals_h = [normal[0].get(), normal[1].get()]
+            nodes_h = np.array([actx.to_numpy(axis) for axis in flatten(nodes)])
+            centers_h = np.array([actx.to_numpy(axis) for axis in flatten(centers)])
+            normals_h = np.array([actx.to_numpy(axis) for axis in flatten(normals)])
 
             pt.plot(nodes_h[0], nodes_h[1], "x-")
             pt.plot(centers_h[0], centers_h[1], "o")
@@ -136,14 +138,14 @@ def test_ellipse_eigenvalues(ctx_factory, ellipse_aspect, mode_nr, qbx_order,
             pt.gca().set_aspect("equal")
             pt.show()
 
-        angle = cl.clmath.atan2(nodes[1]*ellipse_aspect, nodes[0])
+        angle = actx.np.atan2(nodes[1]*ellipse_aspect, nodes[0])
 
         ellipse_fraction = ((1-ellipse_aspect)/(1+ellipse_aspect))**mode_nr
 
         # (2.6) in [1]
-        J = cl.clmath.sqrt(  # noqa
-                cl.clmath.sin(angle)**2
-                + (1/ellipse_aspect)**2 * cl.clmath.cos(angle)**2)
+        J = actx.np.sqrt(  # noqa
+                actx.np.sin(angle)**2
+                + (1/ellipse_aspect)**2 * actx.np.cos(angle)**2)
 
         from sumpy.kernel import LaplaceKernel
         lap_knl = LaplaceKernel(2)
@@ -153,8 +155,8 @@ def test_ellipse_eigenvalues(ctx_factory, ellipse_aspect, mode_nr, qbx_order,
         sigma_sym = sym.var("sigma")
         s_sigma_op = sym.S(lap_knl, sigma_sym, qbx_forced_limit=+1)
 
-        sigma = cl.clmath.cos(mode_nr*angle)/J
-        s_sigma = bind(places, s_sigma_op)(queue=queue, sigma=sigma)
+        sigma = actx.np.cos(mode_nr*angle)/J
+        s_sigma = bind(places, s_sigma_op)(actx, sigma=sigma)
 
         # SIGN BINGO! :)
         s_eigval = 1/(2*mode_nr) * (1 + (-1)**mode_nr * ellipse_fraction)
@@ -165,14 +167,14 @@ def test_ellipse_eigenvalues(ctx_factory, ellipse_aspect, mode_nr, qbx_order,
         if 0:
             #pt.plot(s_sigma.get(), label="result")
             #pt.plot(s_sigma_ref.get(), label="ref")
-            pt.plot((s_sigma_ref - s_sigma).get(), label="err")
+            pt.plot(actx.to_numpy(flatten(s_sigma_ref - s_sigma)), label="err")
             pt.legend()
             pt.show()
 
-        h_max = bind(places, sym.h_max(qbx.ambient_dim))(queue)
+        h_max = bind(places, sym.h_max(qbx.ambient_dim))(actx)
         s_err = (
-                norm(density_discr, queue, s_sigma - s_sigma_ref)
-                / norm(density_discr, queue, s_sigma_ref))
+                norm(density_discr, s_sigma - s_sigma_ref)
+                / norm(density_discr, s_sigma_ref))
         s_eoc_rec.add_data_point(h_max, s_err)
 
         # }}}
@@ -181,8 +183,8 @@ def test_ellipse_eigenvalues(ctx_factory, ellipse_aspect, mode_nr, qbx_order,
 
         d_sigma_op = sym.D(lap_knl, sigma_sym, qbx_forced_limit="avg")
 
-        sigma = cl.clmath.cos(mode_nr*angle)
-        d_sigma = bind(places, d_sigma_op)(queue=queue, sigma=sigma)
+        sigma = actx.np.cos(mode_nr*angle)
+        d_sigma = bind(places, d_sigma_op)(actx, sigma=sigma)
 
         # SIGN BINGO! :)
         d_eigval = -(-1)**mode_nr * 1/2*ellipse_fraction
@@ -190,18 +192,18 @@ def test_ellipse_eigenvalues(ctx_factory, ellipse_aspect, mode_nr, qbx_order,
         d_sigma_ref = d_eigval*sigma
 
         if 0:
-            pt.plot(d_sigma.get(), label="result")
-            pt.plot(d_sigma_ref.get(), label="ref")
+            pt.plot(actx.to_numpy(flatten(d_sigma)), label="result")
+            pt.plot(actx.to_numpy(flatten(d_sigma_ref)), label="ref")
             pt.legend()
             pt.show()
 
         if ellipse_aspect == 1:
-            d_ref_norm = norm(density_discr, queue, sigma)
+            d_ref_norm = norm(density_discr, sigma)
         else:
-            d_ref_norm = norm(density_discr, queue, d_sigma_ref)
+            d_ref_norm = norm(density_discr, d_sigma_ref)
 
         d_err = (
-                norm(density_discr, queue, d_sigma - d_sigma_ref)
+                norm(density_discr, d_sigma - d_sigma_ref)
                 / d_ref_norm)
         d_eoc_rec.add_data_point(h_max, d_err)
 
@@ -212,15 +214,15 @@ def test_ellipse_eigenvalues(ctx_factory, ellipse_aspect, mode_nr, qbx_order,
 
             sp_sigma_op = sym.Sp(lap_knl, sym.var("sigma"), qbx_forced_limit="avg")
 
-            sigma = cl.clmath.cos(mode_nr*angle)
-            sp_sigma = bind(places, sp_sigma_op)(queue=queue, sigma=sigma)
+            sigma = actx.np.cos(mode_nr*angle)
+            sp_sigma = bind(places, sp_sigma_op)(actx, sigma=sigma)
             sp_eigval = 0
 
             sp_sigma_ref = sp_eigval*sigma
 
             sp_err = (
-                    norm(density_discr, queue, sp_sigma - sp_sigma_ref)
-                    / norm(density_discr, queue, sigma))
+                    norm(density_discr, sp_sigma - sp_sigma_ref)
+                    / norm(density_discr, sigma))
             sp_eoc_rec.add_data_point(h_max, sp_err)
 
             # }}}
@@ -261,6 +263,7 @@ def test_sphere_eigenvalues(ctx_factory, mode_m, mode_n, qbx_order,
 
     cl_ctx = ctx_factory()
     queue = cl.CommandQueue(cl_ctx)
+    actx = PyOpenCLArrayContext(queue)
 
     target_order = 8
 
@@ -277,8 +280,8 @@ def test_sphere_eigenvalues(ctx_factory, mode_m, mode_n, qbx_order,
 
     def rel_err(comp, ref):
         return (
-                norm(density_discr, queue, comp - ref)
-                / norm(density_discr, queue, ref))
+                norm(density_discr, comp - ref)
+                / norm(density_discr, ref))
 
     for nrefinements in [0, 1]:
         from meshmode.mesh.generation import generate_icosphere
@@ -292,7 +295,7 @@ def test_sphere_eigenvalues(ctx_factory, mode_m, mode_n, qbx_order,
             mesh = refiner.get_current_mesh()
 
         pre_density_discr = Discretization(
-                cl_ctx, mesh,
+                actx, mesh,
                 InterpolatoryQuadratureSimplexGroupFactory(target_order))
         qbx = QBXLayerPotentialSource(
                 pre_density_discr, 4*target_order,
@@ -301,14 +304,20 @@ def test_sphere_eigenvalues(ctx_factory, mode_m, mode_n, qbx_order,
                 )
         places = GeometryCollection(qbx)
 
-        density_discr = places.get_discretization(places.auto_source.geometry)
-        nodes = density_discr.nodes().with_queue(queue)
-        r = cl.clmath.sqrt(nodes[0]**2 + nodes[1]**2 + nodes[2]**2)
-        phi = cl.clmath.acos(nodes[2]/r)
-        theta = cl.clmath.atan2(nodes[0], nodes[1])
+        from meshmode.dof_array import flatten, unflatten, thaw
 
-        ymn = cl.array.to_device(queue,
-                special.sph_harm(mode_m, mode_n, theta.get(), phi.get()))
+        density_discr = places.get_discretization(places.auto_source.geometry)
+        nodes = thaw(actx, density_discr.nodes())
+        r = actx.np.sqrt(nodes[0]*nodes[0] + nodes[1]*nodes[1] + nodes[2]*nodes[2])
+        phi = actx.np.acos(nodes[2]/r)
+        theta = actx.np.atan2(nodes[0], nodes[1])
+
+        ymn = unflatten(actx, density_discr,
+                actx.from_numpy(
+                    special.sph_harm(
+                        mode_m, mode_n,
+                        actx.to_numpy(flatten(theta)),
+                        actx.to_numpy(flatten(phi)))))
 
         from sumpy.kernel import LaplaceKernel
         lap_knl = LaplaceKernel(3)
@@ -317,10 +326,10 @@ def test_sphere_eigenvalues(ctx_factory, mode_m, mode_n, qbx_order,
 
         s_sigma_op = bind(places,
                 sym.S(lap_knl, sym.var("sigma"), qbx_forced_limit=+1))
-        s_sigma = s_sigma_op(queue=queue, sigma=ymn)
+        s_sigma = s_sigma_op(actx, sigma=ymn)
         s_eigval = 1/(2*mode_n + 1)
 
-        h_max = bind(places, sym.h_max(qbx.ambient_dim))(queue)
+        h_max = bind(places, sym.h_max(qbx.ambient_dim))(actx)
         s_eoc_rec.add_data_point(h_max, rel_err(s_sigma, s_eigval*ymn))
 
         # }}}
@@ -329,7 +338,7 @@ def test_sphere_eigenvalues(ctx_factory, mode_m, mode_n, qbx_order,
 
         d_sigma_op = bind(places,
                 sym.D(lap_knl, sym.var("sigma"), qbx_forced_limit="avg"))
-        d_sigma = d_sigma_op(queue=queue, sigma=ymn)
+        d_sigma = d_sigma_op(actx, sigma=ymn)
         d_eigval = -1/(2*(2*mode_n + 1))
         d_eoc_rec.add_data_point(h_max, rel_err(d_sigma, d_eigval*ymn))
 
@@ -339,7 +348,7 @@ def test_sphere_eigenvalues(ctx_factory, mode_m, mode_n, qbx_order,
 
         sp_sigma_op = bind(places,
                  sym.Sp(lap_knl, sym.var("sigma"), qbx_forced_limit="avg"))
-        sp_sigma = sp_sigma_op(queue=queue, sigma=ymn)
+        sp_sigma = sp_sigma_op(actx, sigma=ymn)
         sp_eigval = -1/(2*(2*mode_n + 1))
 
         sp_eoc_rec.add_data_point(h_max, rel_err(sp_sigma, sp_eigval*ymn))
@@ -350,7 +359,7 @@ def test_sphere_eigenvalues(ctx_factory, mode_m, mode_n, qbx_order,
 
         dp_sigma_op = bind(places,
                 sym.Dp(lap_knl, sym.var("sigma"), qbx_forced_limit="avg"))
-        dp_sigma = dp_sigma_op(queue=queue, sigma=ymn)
+        dp_sigma = dp_sigma_op(actx, sigma=ymn)
         dp_eigval = -(mode_n*(mode_n+1))/(2*mode_n + 1)
 
         dp_eoc_rec.add_data_point(h_max, rel_err(dp_sigma, dp_eigval*ymn))
diff --git a/test/test_layer_pot_identity.py b/test/test_layer_pot_identity.py
index 7b0cbc65cd87a2061d06894cb405b4341967ba7c..b206e78de063c1f56e3f1f1a2f277e337e2d2414 100644
--- a/test/test_layer_pot_identity.py
+++ b/test/test_layer_pot_identity.py
@@ -32,6 +32,7 @@ from pyopencl.tools import (  # noqa
         pytest_generate_tests_for_pyopencl as pytest_generate_tests)
 
 from functools import partial
+from meshmode.array_context import PyOpenCLArrayContext
 from meshmode.mesh.generation import (  # noqa
         ellipse, cloverleaf, starfish, drop, n_gon, qbx_peanut, WobblyCircle,
         NArmedStarfish,
@@ -280,6 +281,7 @@ def test_identity_convergence(ctx_factory,  case, visualize=False):
 
     cl_ctx = ctx_factory()
     queue = cl.CommandQueue(cl_ctx)
+    actx = PyOpenCLArrayContext(queue)
 
     # prevent cache 'splosion
     from sympy.core.cache import clear_cache
@@ -314,7 +316,7 @@ def test_identity_convergence(ctx_factory,  case, visualize=False):
                 InterpolatoryQuadratureSimplexGroupFactory
         from pytential.qbx import QBXLayerPotentialSource
         pre_density_discr = Discretization(
-                cl_ctx, mesh,
+                actx, mesh,
                 InterpolatoryQuadratureSimplexGroupFactory(target_order))
 
         qbx = QBXLayerPotentialSource(
@@ -331,16 +333,18 @@ def test_identity_convergence(ctx_factory,  case, visualize=False):
 
         from pytential.qbx.refinement import refine_geometry_collection
         kernel_length_scale = 5 / case.k if case.k else None
-        places = refine_geometry_collection(queue, places,
+        places = refine_geometry_collection(places,
                 kernel_length_scale=kernel_length_scale)
 
         # {{{ compute values of a solution to the PDE
 
         density_discr = places.get_discretization(places.auto_source.geometry)
 
-        nodes_host = density_discr.nodes().get(queue)
-        normal = bind(places, sym.normal(d))(queue).as_vector(np.object)
-        normal_host = [normal[j].get() for j in range(d)]
+        from meshmode.dof_array import thaw, flatten, unflatten
+        nodes_host = [actx.to_numpy(axis)
+                for axis in flatten(thaw(actx, density_discr.nodes()))]
+        normal = bind(places, sym.normal(d))(actx).as_vector(np.object)
+        normal_host = [actx.to_numpy(axis)for axis in flatten(normal)]
 
         if k != 0:
             if d == 2:
@@ -376,31 +380,33 @@ def test_identity_convergence(ctx_factory,  case, visualize=False):
 
         # }}}
 
-        u_dev = cl.array.to_device(queue, u)
-        dn_u_dev = cl.array.to_device(queue, dn_u)
-        grad_u_dev = cl.array.to_device(queue, grad_u)
+        u_dev = unflatten(actx, density_discr, actx.from_numpy(u))
+        dn_u_dev = unflatten(actx, density_discr, actx.from_numpy(dn_u))
+        from pytools.obj_array import make_obj_array, obj_array_vectorize
+        grad_u_dev = unflatten(actx, density_discr,
+                obj_array_vectorize(actx.from_numpy, make_obj_array(grad_u)))
 
         key = (case.qbx_order, case.geometry.mesh_name, resolution,
                 case.expr.zero_op_name)
 
         bound_op = bind(places, case.expr.get_zero_op(k_sym, **knl_kwargs))
         error = bound_op(
-                queue, u=u_dev, dn_u=dn_u_dev, grad_u=grad_u_dev, k=case.k)
+                actx, u=u_dev, dn_u=dn_u_dev, grad_u=grad_u_dev, k=case.k)
         if 0:
             pt.plot(error)
             pt.show()
 
-        linf_error_norm = norm(density_discr, queue, error, p=np.inf)
+        linf_error_norm = norm(density_discr, error, p=np.inf)
         print("--->", key, linf_error_norm)
 
-        h_max = bind(places, sym.h_max(qbx.ambient_dim))(queue)
+        h_max = bind(places, sym.h_max(qbx.ambient_dim))(actx)
         eoc_rec.add_data_point(h_max, linf_error_norm)
 
         if visualize:
             from meshmode.discretization.visualization import make_visualizer
-            bdry_vis = make_visualizer(queue, density_discr, target_order)
+            bdry_vis = make_visualizer(actx, density_discr, target_order)
 
-            bdry_normals = bind(places, sym.normal(mesh.ambient_dim))(queue)\
+            bdry_normals = bind(places, sym.normal(mesh.ambient_dim))(actx)\
                     .as_vector(dtype=np.object)
 
             bdry_vis.write_vtk_file("source-%s.vtu" % resolution, [
diff --git a/test/test_linalg_proxy.py b/test/test_linalg_proxy.py
index a44872906c08f68e9c387b0006da34933de859ad..8e485d6e9fb3e3cb8fc6f541971796bc8fcf4f63 100644
--- a/test/test_linalg_proxy.py
+++ b/test/test_linalg_proxy.py
@@ -29,6 +29,8 @@ import pyopencl as cl
 import pyopencl.array   # noqa
 
 from pytential import bind, sym
+
+from meshmode.array_context import PyOpenCLArrayContext
 from meshmode.mesh.generation import ( # noqa
         ellipse, NArmedStarfish, generate_torus, make_curve_mesh)
 
@@ -41,9 +43,9 @@ from pyopencl.tools import (  # noqa
 from test_matrix import _build_geometry, _build_block_index
 
 
-def _plot_partition_indices(queue, discr, indices, **kwargs):
+def _plot_partition_indices(actx, discr, indices, **kwargs):
     import matplotlib.pyplot as pt
-    indices = indices.get(queue)
+    indices = indices.get(actx.queue)
 
     args = [
         kwargs.get("method", "unknown"),
@@ -57,12 +59,13 @@ def _plot_partition_indices(queue, discr, indices, **kwargs):
     pt.savefig("test_partition_{0}_{1}_{3}d_ranges_{2}.png".format(*args))
     pt.clf()
 
+    from pytential.utils import flatten_to_numpy
     if discr.ambient_dim == 2:
-        sources = discr.nodes().get(queue)
+        sources = flatten_to_numpy(actx, discr.nodes())
 
         pt.figure(figsize=(10, 8), dpi=300)
 
-        if indices.indices.shape[0] != discr.nnodes:
+        if indices.indices.shape[0] != discr.ndofs:
             pt.plot(sources[0], sources[1], 'ko', alpha=0.5)
         for i in range(indices.nblocks):
             isrc = indices.block_indices(i)
@@ -80,17 +83,20 @@ def _plot_partition_indices(queue, discr, indices, **kwargs):
             return
 
         from meshmode.discretization.visualization import make_visualizer
-        marker = -42.0 * np.ones(discr.nnodes)
+        marker = -42.0 * np.ones(discr.ndofs)
 
         for i in range(indices.nblocks):
             isrc = indices.block_indices(i)
             marker[isrc] = 10.0 * (i + 1.0)
 
-        vis = make_visualizer(queue, discr, 10)
+        from meshmode.dof_array import unflatten
+        marker = unflatten(actx, discr, actx.from_numpy(marker))
+
+        vis = make_visualizer(actx, discr, 10)
 
-        filename = "test_partition_{0}_{1}_{3}d_{2}.png".format(*args)
+        filename = "test_partition_{0}_{1}_{3}d_{2}.vtu".format(*args)
         vis.write_vtk_file(filename, [
-            ("marker", cl.array.to_device(queue, marker))
+            ("marker", marker)
             ])
 
 
@@ -99,12 +105,14 @@ def _plot_partition_indices(queue, discr, indices, **kwargs):
 def test_partition_points(ctx_factory, use_tree, ambient_dim, visualize=False):
     ctx = ctx_factory()
     queue = cl.CommandQueue(ctx)
+    actx = PyOpenCLArrayContext(queue)
 
-    places, dofdesc = _build_geometry(queue, ambient_dim=ambient_dim)
-    _build_block_index(queue,
-            places.get_discretization(dofdesc.geometry, dofdesc.discr_stage),
-            use_tree=use_tree,
-            factor=0.6)
+    places, dofdesc = _build_geometry(actx, ambient_dim=ambient_dim)
+    discr = places.get_discretization(dofdesc.geometry, dofdesc.discr_stage)
+    indices = _build_block_index(actx, discr, use_tree=use_tree, factor=0.6)
+
+    if visualize:
+        _plot_partition_indices(actx, discr, indices, use_tree=use_tree)
 
 
 @pytest.mark.parametrize("ambient_dim", [2, 3])
@@ -112,24 +120,23 @@ def test_partition_points(ctx_factory, use_tree, ambient_dim, visualize=False):
 def test_proxy_generator(ctx_factory, ambient_dim, factor, visualize=False):
     ctx = ctx_factory()
     queue = cl.CommandQueue(ctx)
+    actx = PyOpenCLArrayContext(queue)
 
-    places, dofdesc = _build_geometry(queue, ambient_dim=ambient_dim)
+    places, dofdesc = _build_geometry(actx, ambient_dim=ambient_dim)
     dofdesc = dofdesc.to_stage1()
 
     density_discr = places.get_discretization(dofdesc.geometry, dofdesc.discr_stage)
-    srcindices = _build_block_index(queue,
-            density_discr,
-            factor=factor)
+    srcindices = _build_block_index(actx, density_discr, factor=factor)
 
     from pytential.linalg.proxy import ProxyGenerator
     generator = ProxyGenerator(places)
     proxies, pxyranges, pxycenters, pxyradii = \
-            generator(queue, dofdesc, srcindices)
+            generator(actx, dofdesc, srcindices)
 
-    proxies = np.vstack([p.get() for p in proxies])
-    pxyranges = pxyranges.get()
-    pxycenters = np.vstack([c.get() for c in pxycenters])
-    pxyradii = pxyradii.get()
+    proxies = np.vstack([actx.to_numpy(p) for p in proxies])
+    pxyranges = actx.to_numpy(pxyranges)
+    pxycenters = np.vstack([actx.to_numpy(c) for c in pxycenters])
+    pxyradii = actx.to_numpy(pxyradii)
 
     for i in range(srcindices.nblocks):
         ipxy = np.s_[pxyranges[i]:pxyranges[i + 1]]
@@ -142,12 +149,14 @@ def test_proxy_generator(ctx_factory, ambient_dim, factor, visualize=False):
         if ambient_dim == 2:
             import matplotlib.pyplot as pt
 
-            density_nodes = density_discr.nodes().get(queue)
-            ci = bind(places, sym.expansion_centers(ambient_dim, -1))(queue)
-            ci = np.vstack([c.get(queue) for c in ci])
-            ce = bind(places, sym.expansion_centers(ambient_dim, +1))(queue)
-            ce = np.vstack([c.get(queue) for c in ce])
-            r = bind(places, sym.expansion_radii(ambient_dim))(queue).get()
+            from pytential.utils import flatten_to_numpy
+            density_nodes = np.vstack(flatten_to_numpy(actx, density_discr.nodes()))
+            ci = bind(places, sym.expansion_centers(ambient_dim, -1))(actx)
+            ci = np.vstack(flatten_to_numpy(actx, ci))
+            ce = bind(places, sym.expansion_centers(ambient_dim, +1))(actx)
+            ce = np.vstack(flatten_to_numpy(actx, ce))
+            r = bind(places, sym.expansion_radii(ambient_dim))(actx)
+            r = flatten_to_numpy(actx, r)
 
             for i in range(srcindices.nblocks):
                 isrc = srcindices.block_indices(i)
@@ -195,10 +204,10 @@ def test_proxy_generator(ctx_factory, ambient_dim, factor, visualize=False):
                     b=pxycenters[:, i].reshape(-1))
 
                 mesh = merge_disjoint_meshes([mesh, density_discr.mesh])
-                discr = Discretization(ctx, mesh,
+                discr = Discretization(actx, mesh,
                     InterpolatoryQuadratureSimplexGroupFactory(10))
 
-                vis = make_visualizer(queue, discr, 10)
+                vis = make_visualizer(actx, discr, 10)
                 filename = "test_proxy_generator_{}d_{:04}.vtu".format(
                         ambient_dim, i)
                 vis.write_vtk_file(filename, [])
@@ -209,26 +218,25 @@ def test_proxy_generator(ctx_factory, ambient_dim, factor, visualize=False):
 def test_interaction_points(ctx_factory, ambient_dim, factor, visualize=False):
     ctx = ctx_factory()
     queue = cl.CommandQueue(ctx)
+    actx = PyOpenCLArrayContext(queue)
 
-    places, dofdesc = _build_geometry(queue, ambient_dim=ambient_dim)
+    places, dofdesc = _build_geometry(actx, ambient_dim=ambient_dim)
     dofdesc = dofdesc.to_stage1()
 
     density_discr = places.get_discretization(dofdesc.geometry, dofdesc.discr_stage)
-    srcindices = _build_block_index(queue,
-            density_discr,
-            factor=factor)
+    srcindices = _build_block_index(actx, density_discr, factor=factor)
 
     # generate proxy points
     from pytential.linalg.proxy import ProxyGenerator
     generator = ProxyGenerator(places)
-    _, _, pxycenters, pxyradii = generator(queue, dofdesc, srcindices)
+    _, _, pxycenters, pxyradii = generator(actx, dofdesc, srcindices)
 
     from pytential.linalg.proxy import (  # noqa
             gather_block_neighbor_points,
             gather_block_interaction_points)
-    nbrindices = gather_block_neighbor_points(density_discr,
+    nbrindices = gather_block_neighbor_points(actx, density_discr,
             srcindices, pxycenters, pxyradii)
-    nodes, ranges = gather_block_interaction_points(
+    nodes, ranges = gather_block_interaction_points(actx,
             places, dofdesc, srcindices)
 
     srcindices = srcindices.get(queue)
@@ -240,12 +248,13 @@ def test_interaction_points(ctx_factory, ambient_dim, factor, visualize=False):
 
         assert not np.any(np.isin(inbr, isrc))
 
+    from pytential.utils import flatten_to_numpy
     if visualize:
         if ambient_dim == 2:
             import matplotlib.pyplot as pt
-            density_nodes = density_discr.nodes().get(queue)
-            nodes = nodes.get(queue)
-            ranges = ranges.get(queue)
+            density_nodes = flatten_to_numpy(actx, density_discr.nodes())
+            nodes = flatten_to_numpy(actx, nodes)
+            ranges = actx.to_numpy(ranges)
 
             for i in range(srcindices.nblocks):
                 isrc = srcindices.block_indices(i)
@@ -255,14 +264,14 @@ def test_interaction_points(ctx_factory, ambient_dim, factor, visualize=False):
                 pt.figure(figsize=(10, 8))
                 pt.plot(density_nodes[0], density_nodes[1],
                         'ko', ms=2.0, alpha=0.5)
-                pt.plot(density_nodes[0, srcindices.indices],
-                        density_nodes[1, srcindices.indices],
+                pt.plot(density_nodes[0][srcindices.indices],
+                        density_nodes[1][srcindices.indices],
                         'o', ms=2.0)
-                pt.plot(density_nodes[0, isrc], density_nodes[1, isrc],
+                pt.plot(density_nodes[0][isrc], density_nodes[1][isrc],
                         'o', ms=2.0)
-                pt.plot(density_nodes[0, inbr], density_nodes[1, inbr],
+                pt.plot(density_nodes[0][inbr], density_nodes[1][inbr],
                         'o', ms=2.0)
-                pt.plot(nodes[0, iall], nodes[1, iall],
+                pt.plot(nodes[0][iall], nodes[1][iall],
                         'x', ms=2.0)
                 pt.xlim([-1.5, 1.5])
                 pt.ylim([-1.5, 1.5])
@@ -272,7 +281,7 @@ def test_interaction_points(ctx_factory, ambient_dim, factor, visualize=False):
                 pt.clf()
         elif ambient_dim == 3:
             from meshmode.discretization.visualization import make_visualizer
-            marker = np.empty(density_discr.nnodes)
+            marker = np.empty(density_discr.ndofs)
 
             for i in range(srcindices.nblocks):
                 isrc = srcindices.block_indices(i)
@@ -282,9 +291,11 @@ def test_interaction_points(ctx_factory, ambient_dim, factor, visualize=False):
                 marker[srcindices.indices] = 0.0
                 marker[isrc] = -42.0
                 marker[inbr] = +42.0
-                marker_dev = cl.array.to_device(queue, marker)
 
-                vis = make_visualizer(queue, density_discr, 10)
+                from meshmode.dof_array import unflatten
+                marker_dev = unflatten(actx, density_discr, actx.from_numpy(marker))
+
+                vis = make_visualizer(actx, density_discr, 10)
                 filename = "test_area_query_{}d_{:04}.vtu".format(ambient_dim, i)
                 vis.write_vtk_file(filename, [
                     ("marker", marker_dev),
diff --git a/test/test_matrix.py b/test/test_matrix.py
index 12be496c5b1d50af7fb72d39c458dbbe3fce4d27..ce429642df74870fa2553fffac0e548ee5bb4061 100644
--- a/test/test_matrix.py
+++ b/test/test_matrix.py
@@ -31,16 +31,17 @@ import numpy as np
 import numpy.linalg as la
 
 import pyopencl as cl
-import pyopencl.array   # noqa
+import pyopencl.array
 
 from pytools.obj_array import make_obj_array, is_obj_array
 
 from sumpy.tools import BlockIndexRanges, MatrixBlockIndexRanges
 from sumpy.symbolic import USE_SYMENGINE
 
-from pytential import sym
+from pytential import bind, sym
 from pytential import GeometryCollection
 
+from meshmode.array_context import PyOpenCLArrayContext
 from meshmode.mesh.generation import (  # noqa
         ellipse, NArmedStarfish, make_curve_mesh, generate_torus)
 
@@ -55,7 +56,7 @@ except ImportError:
     pass
 
 
-def _build_geometry(queue,
+def _build_geometry(actx,
         ambient_dim=2,
         nelements=30,
         target_order=7,
@@ -79,8 +80,7 @@ def _build_geometry(queue,
     from meshmode.discretization.poly_element import \
             InterpolatoryQuadratureSimplexGroupFactory
     from pytential.qbx import QBXLayerPotentialSource
-    density_discr = Discretization(
-            queue.context, mesh,
+    density_discr = Discretization(actx, mesh,
             InterpolatoryQuadratureSimplexGroupFactory(target_order))
 
     qbx = QBXLayerPotentialSource(density_discr,
@@ -92,24 +92,24 @@ def _build_geometry(queue,
     return places, places.auto_source
 
 
-def _build_block_index(queue,
-                       discr,
+def _build_block_index(actx, discr,
                        nblks=10,
                        factor=1.0,
                        use_tree=True):
-    nnodes = discr.nnodes
-    max_particles_in_box = nnodes // nblks
+    max_particles_in_box = discr.ndofs // nblks
 
     # create index ranges
     from pytential.linalg.proxy import partition_by_nodes
-    indices = partition_by_nodes(discr,
-            use_tree=use_tree, max_nodes_in_box=max_particles_in_box)
+    indices = partition_by_nodes(actx, discr,
+            use_tree=use_tree,
+            max_nodes_in_box=max_particles_in_box)
 
     if abs(factor - 1.0) < 1.0e-14:
         return indices
 
     # randomly pick a subset of points
-    indices = indices.get(queue)
+    # FIXME: this needs porting in sumpy.tools.BlockIndexRanges
+    indices = indices.get(actx.queue)
 
     indices_ = np.empty(indices.nblocks, dtype=np.object)
     for i in range(indices.nblocks):
@@ -120,13 +120,11 @@ def _build_block_index(queue,
         indices_[i] = np.sort(
                 np.random.choice(iidx, size=isize, replace=False))
 
-    ranges_ = cl.array.to_device(queue,
-            np.cumsum([0] + [r.shape[0] for r in indices_]))
-    indices_ = cl.array.to_device(queue, np.hstack(indices_))
+    ranges_ = actx.from_numpy(np.cumsum([0] + [r.shape[0] for r in indices_]))
+    indices_ = actx.from_numpy(np.hstack(indices_))
 
-    indices = BlockIndexRanges(discr.cl_context,
-                               indices_.with_queue(None),
-                               ranges_.with_queue(None))
+    indices = BlockIndexRanges(actx.context,
+            actx.freeze(indices_), actx.freeze(ranges_))
 
     return indices
 
@@ -137,8 +135,8 @@ def _build_op(lpot_id,
               source=sym.DEFAULT_SOURCE,
               target=sym.DEFAULT_TARGET,
               qbx_forced_limit="avg"):
-
     from sumpy.kernel import LaplaceKernel, HelmholtzKernel
+
     if k:
         knl = HelmholtzKernel(ambient_dim)
         knl_kwargs = {"k": k}
@@ -200,6 +198,7 @@ def _max_block_error(mat, blk, index_set):
 def test_matrix_build(ctx_factory, k, curve_f, lpot_id, visualize=False):
     cl_ctx = ctx_factory()
     queue = cl.CommandQueue(cl_ctx)
+    actx = PyOpenCLArrayContext(queue)
 
     # prevent cache 'splosion
     from sympy.core.cache import clear_cache
@@ -215,8 +214,7 @@ def test_matrix_build(ctx_factory, k, curve_f, lpot_id, visualize=False):
     from meshmode.discretization import Discretization
     from meshmode.discretization.poly_element import \
             InterpolatoryQuadratureSimplexGroupFactory
-    pre_density_discr = Discretization(
-            cl_ctx, mesh,
+    pre_density_discr = Discretization(actx, mesh,
             InterpolatoryQuadratureSimplexGroupFactory(target_order))
 
     from pytential.qbx import QBXLayerPotentialSource
@@ -228,7 +226,7 @@ def test_matrix_build(ctx_factory, k, curve_f, lpot_id, visualize=False):
 
     from pytential.qbx.refinement import refine_geometry_collection
     places = GeometryCollection(qbx)
-    places = refine_geometry_collection(queue, places,
+    places = refine_geometry_collection(places,
             kernel_length_scale=(5 / k if k else None))
 
     source = places.auto_source.to_stage1()
@@ -237,15 +235,14 @@ def test_matrix_build(ctx_factory, k, curve_f, lpot_id, visualize=False):
     op, u_sym, knl_kwargs = _build_op(lpot_id, k=k,
             source=places.auto_source,
             target=places.auto_target)
-    from pytential import bind
     bound_op = bind(places, op)
 
     from pytential.symbolic.execution import build_matrix
-    mat = build_matrix(queue, places, op, u_sym).get()
+    mat = build_matrix(actx, places, op, u_sym).get()
 
     if visualize:
         from sumpy.tools import build_matrix as build_matrix_via_matvec
-        mat2 = bound_op.scipy_op(queue, "u", dtype=mat.dtype, **knl_kwargs)
+        mat2 = bound_op.scipy_op(actx, "u", dtype=mat.dtype, **knl_kwargs)
         mat2 = build_matrix_via_matvec(mat2)
         print(la.norm((mat - mat2).real, "fro") / la.norm(mat2.real, "fro"),
               la.norm((mat - mat2).imag, "fro") / la.norm(mat2.imag, "fro"))
@@ -267,23 +264,22 @@ def test_matrix_build(ctx_factory, k, curve_f, lpot_id, visualize=False):
         pt.colorbar()
         pt.show()
 
-    from sumpy.tools import vector_to_device, vector_from_device
+    from pytential.utils import unflatten_from_numpy, flatten_to_numpy
     np.random.seed(12)
     for i in range(5):
         if is_obj_array(u_sym):
             u = make_obj_array([
-                np.random.randn(density_discr.nnodes)
+                np.random.randn(density_discr.ndofs)
                 for _ in range(len(u_sym))
                 ])
         else:
-            u = np.random.randn(density_discr.nnodes)
+            u = np.random.randn(density_discr.ndofs)
+        u_dev = unflatten_from_numpy(actx, density_discr, u)
 
-        u_dev = vector_to_device(queue, u)
         res_matvec = np.hstack(
-                list(vector_from_device(
-                    queue, bound_op(queue, u=u_dev))))
-
-        res_mat = mat.dot(np.hstack(list(u)))
+                flatten_to_numpy(actx, bound_op(actx, u=u_dev))
+                )
+        res_mat = mat.dot(np.hstack(u))
 
         abs_err = la.norm(res_mat - res_matvec, np.inf)
         rel_err = abs_err / la.norm(res_matvec, np.inf)
@@ -299,6 +295,7 @@ def test_p2p_block_builder(ctx_factory, factor, ambient_dim, lpot_id,
                            visualize=False):
     ctx = ctx_factory()
     queue = cl.CommandQueue(ctx)
+    actx = PyOpenCLArrayContext(queue)
 
     # prevent cache explosion
     from sympy.core.cache import clear_cache
@@ -312,7 +309,7 @@ def test_p2p_block_builder(ctx_factory, factor, ambient_dim, lpot_id,
             )
     target_order = 2 if ambient_dim == 3 else 7
 
-    places, dofdesc = _build_geometry(queue,
+    places, dofdesc = _build_geometry(actx,
             target_order=target_order,
             ambient_dim=ambient_dim,
             auto_where=place_ids)
@@ -323,14 +320,14 @@ def test_p2p_block_builder(ctx_factory, factor, ambient_dim, lpot_id,
 
     dd = places.auto_source
     density_discr = places.get_discretization(dd.geometry, dd.discr_stage)
-    index_set = _build_block_index(queue, density_discr, factor=factor)
+    index_set = _build_block_index(actx, density_discr, factor=factor)
     index_set = MatrixBlockIndexRanges(ctx, index_set, index_set)
 
     from pytential.symbolic.execution import _prepare_expr
     expr = _prepare_expr(places, op)
 
     from pytential.symbolic.matrix import P2PMatrixBuilder
-    mbuilder = P2PMatrixBuilder(queue,
+    mbuilder = P2PMatrixBuilder(actx,
             dep_expr=u_sym,
             other_dep_exprs=[],
             dep_source=places.get_geometry(dd.geometry),
@@ -341,7 +338,7 @@ def test_p2p_block_builder(ctx_factory, factor, ambient_dim, lpot_id,
     mat = mbuilder(expr)
 
     from pytential.symbolic.matrix import FarFieldBlockBuilder
-    mbuilder = FarFieldBlockBuilder(queue,
+    mbuilder = FarFieldBlockBuilder(actx,
             dep_expr=u_sym,
             other_dep_exprs=[],
             dep_source=places.get_geometry(dd.geometry),
@@ -352,7 +349,7 @@ def test_p2p_block_builder(ctx_factory, factor, ambient_dim, lpot_id,
             exclude_self=True)
     blk = mbuilder(expr)
 
-    index_set = index_set.get(queue)
+    index_set = index_set.get(actx.queue)
     if visualize and ambient_dim == 2:
         blk_full = np.zeros_like(mat)
         mat_full = np.zeros_like(mat)
@@ -381,6 +378,7 @@ def test_qbx_block_builder(ctx_factory, factor, ambient_dim, lpot_id,
                            visualize=False):
     ctx = ctx_factory()
     queue = cl.CommandQueue(ctx)
+    actx = PyOpenCLArrayContext(queue)
 
     # prevent cache explosion
     from sympy.core.cache import clear_cache
@@ -394,7 +392,7 @@ def test_qbx_block_builder(ctx_factory, factor, ambient_dim, lpot_id,
             )
     target_order = 2 if ambient_dim == 3 else 7
 
-    places, _ = _build_geometry(queue,
+    places, _ = _build_geometry(actx,
             target_order=target_order,
             ambient_dim=ambient_dim,
             auto_where=place_ids)
@@ -409,11 +407,11 @@ def test_qbx_block_builder(ctx_factory, factor, ambient_dim, lpot_id,
 
     dd = places.auto_source
     density_discr = places.get_discretization(dd.geometry, dd.discr_stage)
-    index_set = _build_block_index(queue, density_discr, factor=factor)
+    index_set = _build_block_index(actx, density_discr, factor=factor)
     index_set = MatrixBlockIndexRanges(ctx, index_set, index_set)
 
     from pytential.symbolic.matrix import NearFieldBlockBuilder
-    mbuilder = NearFieldBlockBuilder(queue,
+    mbuilder = NearFieldBlockBuilder(actx,
             dep_expr=u_sym,
             other_dep_exprs=[],
             dep_source=places.get_geometry(dd.geometry),
@@ -424,7 +422,7 @@ def test_qbx_block_builder(ctx_factory, factor, ambient_dim, lpot_id,
     blk = mbuilder(expr)
 
     from pytential.symbolic.matrix import MatrixBuilder
-    mbuilder = MatrixBuilder(queue,
+    mbuilder = MatrixBuilder(actx,
             dep_expr=u_sym,
             other_dep_exprs=[],
             dep_source=places.get_geometry(dd.geometry),
@@ -462,6 +460,7 @@ def test_build_matrix_places(ctx_factory,
         source_discr_stage, target_discr_stage, visualize=False):
     ctx = ctx_factory()
     queue = cl.CommandQueue(ctx)
+    actx = PyOpenCLArrayContext(queue)
 
     # prevent cache explosion
     from sympy.core.cache import clear_cache
@@ -476,7 +475,7 @@ def test_build_matrix_places(ctx_factory,
             )
 
     # build test operators
-    places, _ = _build_geometry(queue,
+    places, _ = _build_geometry(actx,
             nelements=8,
             target_order=2,
             ambient_dim=2,
@@ -493,7 +492,7 @@ def test_build_matrix_places(ctx_factory,
     dd = places.auto_source
     source_discr = places.get_discretization(dd.geometry, dd.discr_stage)
 
-    index_set = _build_block_index(queue, source_discr, factor=0.6)
+    index_set = _build_block_index(actx, source_discr, factor=0.6)
     index_set = MatrixBlockIndexRanges(ctx, index_set, index_set)
 
     from pytential.symbolic.execution import _prepare_expr
@@ -501,7 +500,7 @@ def test_build_matrix_places(ctx_factory,
 
     # build full QBX matrix
     from pytential.symbolic.matrix import MatrixBuilder
-    mbuilder = MatrixBuilder(queue,
+    mbuilder = MatrixBuilder(actx,
             dep_expr=u_sym,
             other_dep_exprs=[],
             dep_source=places.get_geometry(dd.geometry),
@@ -512,7 +511,7 @@ def test_build_matrix_places(ctx_factory,
 
     # build full p2p matrix
     from pytential.symbolic.matrix import P2PMatrixBuilder
-    mbuilder = P2PMatrixBuilder(queue,
+    mbuilder = P2PMatrixBuilder(actx,
             dep_expr=u_sym,
             other_dep_exprs=[],
             dep_source=places.get_geometry(dd.geometry),
@@ -521,11 +520,11 @@ def test_build_matrix_places(ctx_factory,
             context={})
     p2p_mat = mbuilder(op)
 
-    assert p2p_mat.shape == (target_discr.nnodes, source_discr.nnodes)
+    assert p2p_mat.shape == (target_discr.ndofs, source_discr.ndofs)
 
     # build block qbx and p2p matrices
     from pytential.symbolic.matrix import NearFieldBlockBuilder
-    mbuilder = NearFieldBlockBuilder(queue,
+    mbuilder = NearFieldBlockBuilder(actx,
             dep_expr=u_sym,
             other_dep_exprs=[],
             dep_source=places.get_geometry(dd.geometry),
@@ -538,7 +537,7 @@ def test_build_matrix_places(ctx_factory,
         assert _max_block_error(qbx_mat, mat, index_set.get(queue)) < 1.0e-14
 
     from pytential.symbolic.matrix import FarFieldBlockBuilder
-    mbuilder = FarFieldBlockBuilder(queue,
+    mbuilder = FarFieldBlockBuilder(actx,
             dep_expr=u_sym,
             other_dep_exprs=[],
             dep_source=places.get_geometry(dd.geometry),
diff --git a/test/test_maxwell.py b/test/test_maxwell.py
index 2e950193e17daf410cf8ef9c4d4fa74cae915670..0fc67b0bf9fa321d8a94d2e254e60f4ad0983f33 100644
--- a/test/test_maxwell.py
+++ b/test/test_maxwell.py
@@ -31,6 +31,7 @@ import pytest
 
 from pytential import bind, sym, norm
 
+from meshmode.array_context import PyOpenCLArrayContext
 from sumpy.visualization import make_field_plotter_from_bbox  # noqa
 from sumpy.point_calculus import CalculusPatch, frequency_domain_maxwell
 from sumpy.tools import vector_from_device
@@ -77,7 +78,7 @@ class SphereTestCase(MaxwellTestCase):
         else:
             return generate_icosphere(0.5, target_order)
 
-    def get_source(self, queue):
+    def get_source(self, actx):
         if self.is_interior:
             source_ctr = np.array([[0.35, 0.1, 0.15]]).T
         else:
@@ -87,9 +88,7 @@ class SphereTestCase(MaxwellTestCase):
 
         sources = source_ctr + source_rad*2*(np.random.rand(3, 10)-0.5)
         from pytential.source import PointPotentialSource
-        return PointPotentialSource(
-                queue.context,
-                cl.array.to_device(queue, sources))
+        return PointPotentialSource(actx.from_numpy(sources))
 
 
 class RoundedCubeTestCase(MaxwellTestCase):
@@ -121,7 +120,7 @@ class RoundedCubeTestCase(MaxwellTestCase):
         else:
             return generate_icosphere(0.5, target_order)
 
-    def get_source(self, queue):
+    def get_source(self, actx):
         if self.is_interior:
             source_ctr = np.array([[0.35, 0.1, 0.15]]).T
         else:
@@ -131,9 +130,7 @@ class RoundedCubeTestCase(MaxwellTestCase):
 
         sources = source_ctr + source_rad*2*(np.random.rand(3, 10)-0.5)
         from pytential.source import PointPotentialSource
-        return PointPotentialSource(
-                queue.context,
-                cl.array.to_device(queue, sources))
+        return PointPotentialSource(actx.from_numpy(sources))
 
 
 class ElliptiPlaneTestCase(MaxwellTestCase):
@@ -168,7 +165,7 @@ class ElliptiPlaneTestCase(MaxwellTestCase):
         else:
             return generate_icosphere(0.5, target_order)
 
-    def get_source(self, queue):
+    def get_source(self, actx):
         if self.is_interior:
             source_ctr = np.array([[0.35, 0.1, 0.15]]).T
         else:
@@ -178,9 +175,7 @@ class ElliptiPlaneTestCase(MaxwellTestCase):
 
         sources = source_ctr + source_rad*2*(np.random.rand(3, 10)-0.5)
         from pytential.source import PointPotentialSource
-        return PointPotentialSource(
-                queue.context,
-                cl.array.to_device(queue, sources))
+        return PointPotentialSource(actx.from_numpy(sources))
 
 # }}}
 
@@ -228,6 +223,7 @@ def test_pec_mfie_extinction(ctx_factory, case,
 
     cl_ctx = ctx_factory()
     queue = cl.CommandQueue(cl_ctx)
+    actx = PyOpenCLArrayContext(queue)
 
     np.random.seed(12)
 
@@ -245,13 +241,16 @@ def test_pec_mfie_extinction(ctx_factory, case,
             get_sym_maxwell_plane_wave)
     mfie = PECChargeCurrentMFIEOperator()
 
-    test_source = case.get_source(queue)
+    test_source = case.get_source(actx)
 
     calc_patch = CalculusPatch(np.array([-3, 0, 0]), h=0.01)
-    calc_patch_tgt = PointsTarget(cl.array.to_device(queue, calc_patch.points))
+    calc_patch_tgt = PointsTarget(actx.from_numpy(calc_patch.points))
 
     rng = cl.clrandom.PhiloxGenerator(cl_ctx, seed=12)
-    src_j = rng.normal(queue, (3, test_source.nnodes), dtype=np.float64)
+    from pytools.obj_array import make_obj_array
+    src_j = make_obj_array([
+            rng.normal(actx.queue, (test_source.ndofs), dtype=np.float64)
+            for _ in range(3)])
 
     def eval_inc_field_at(places, source=None, target=None):
         if source is None:
@@ -264,12 +263,12 @@ def test_pec_mfie_extinction(ctx_factory, case,
                         amplitude_vec=np.array([1, 1, 1]),
                         v=np.array([1, 0, 0]),
                         omega=case.k),
-                    auto_where=target)(queue)
+                    auto_where=target)(actx)
         else:
             # point source
             return bind(places,
                     get_sym_maxwell_point_source(mfie.kernel, j_sym, mfie.k),
-                    auto_where=(source, target))(queue, j=src_j, k=case.k)
+                    auto_where=(source, target))(actx, j=src_j, k=case.k)
 
     # }}}
 
@@ -294,7 +293,7 @@ def test_pec_mfie_extinction(ctx_factory, case,
         observation_mesh = case.get_observation_mesh(case.target_order)
 
         pre_scat_discr = Discretization(
-                cl_ctx, scat_mesh,
+                actx, scat_mesh,
                 InterpolatoryQuadratureSimplexGroupFactory(case.target_order))
         qbx = QBXLayerPotentialSource(
                 pre_scat_discr, fine_order=4*case.target_order,
@@ -306,7 +305,7 @@ def test_pec_mfie_extinction(ctx_factory, case,
 
         scat_discr = qbx.density_discr
         obs_discr = Discretization(
-                cl_ctx, observation_mesh,
+                actx, observation_mesh,
                 InterpolatoryQuadratureSimplexGroupFactory(case.target_order))
 
         places.update({
@@ -324,7 +323,7 @@ def test_pec_mfie_extinction(ctx_factory, case,
             fplot = make_field_plotter_from_bbox(
                     find_bounding_box(scat_discr.mesh), h=(0.05, 0.05, 0.3),
                     extend_factor=0.3)
-            fplot_tgt = PointsTarget(cl.array.to_device(queue, fplot.points))
+            fplot_tgt = PointsTarget(actx.from_numpy(fplot.points))
 
             places.update({
                 "qbx_target_tol": qbx_tgt_tol,
@@ -337,9 +336,9 @@ def test_pec_mfie_extinction(ctx_factory, case,
 
         # {{{ system solve
 
-        h_max = bind(places, sym.h_max(qbx.ambient_dim))(queue)
+        h_max = bind(places, sym.h_max(qbx.ambient_dim))(actx)
 
-        pde_test_inc = EHField(vector_from_device(queue,
+        pde_test_inc = EHField(vector_from_device(actx.queue,
             eval_inc_field_at(places, target="patch_target")))
 
         source_maxwell_resids = [
@@ -356,7 +355,7 @@ def test_pec_mfie_extinction(ctx_factory, case,
 
         bound_j_op = bind(places, mfie.j_operator(loc_sign, jt_sym))
         j_rhs = bind(places, mfie.j_rhs(inc_xyz_sym.h))(
-                queue, inc_fld=inc_field_scat.field, **knl_kwargs)
+                actx, inc_fld=inc_field_scat.field, **knl_kwargs)
 
         gmres_settings = dict(
                 tol=case.gmres_tol,
@@ -365,24 +364,24 @@ def test_pec_mfie_extinction(ctx_factory, case,
                 stall_iterations=50, no_progress_factor=1.05)
         from pytential.solve import gmres
         gmres_result = gmres(
-                bound_j_op.scipy_op(queue, "jt", np.complex128, **knl_kwargs),
+                bound_j_op.scipy_op(actx, "jt", np.complex128, **knl_kwargs),
                 j_rhs, **gmres_settings)
 
         jt = gmres_result.solution
 
         bound_rho_op = bind(places, mfie.rho_operator(loc_sign, rho_sym))
         rho_rhs = bind(places, mfie.rho_rhs(jt_sym, inc_xyz_sym.e))(
-                queue, jt=jt, inc_fld=inc_field_scat.field, **knl_kwargs)
+                actx, jt=jt, inc_fld=inc_field_scat.field, **knl_kwargs)
 
         gmres_result = gmres(
-                bound_rho_op.scipy_op(queue, "rho", np.complex128, **knl_kwargs),
+                bound_rho_op.scipy_op(actx, "rho", np.complex128, **knl_kwargs),
                 rho_rhs, **gmres_settings)
 
         rho = gmres_result.solution
 
         # }}}
 
-        jxyz = bind(places, sym.tangential_to_xyz(jt_sym))(queue, jt=jt)
+        jxyz = bind(places, sym.tangential_to_xyz(jt_sym))(actx, jt=jt)
 
         # {{{ volume eval
 
@@ -393,9 +392,9 @@ def test_pec_mfie_extinction(ctx_factory, case,
                 source = sym.DEFAULT_SOURCE
 
             return bind(places, sym_repr, auto_where=(source, target))(
-                    queue, jt=jt, rho=rho, **knl_kwargs)
+                    actx, jt=jt, rho=rho, **knl_kwargs)
 
-        pde_test_repr = EHField(vector_from_device(queue,
+        pde_test_repr = EHField(vector_from_device(actx.queue,
             eval_repr_at(places, target="patch_target")))
 
         maxwell_residuals = [
@@ -415,12 +414,12 @@ def test_pec_mfie_extinction(ctx_factory, case,
         pec_bc_e = sym.n_cross(bc_repr.e + inc_xyz_sym.e)
         pec_bc_h = sym.normal(3).as_vector().dot(bc_repr.h + inc_xyz_sym.h)
 
-        eh_bc_values = bind(places, sym.join_fields(pec_bc_e, pec_bc_h))(
-                    queue, jt=jt, rho=rho, inc_fld=inc_field_scat.field,
+        eh_bc_values = bind(places, sym.flat_obj_array(pec_bc_e, pec_bc_h))(
+                    actx, jt=jt, rho=rho, inc_fld=inc_field_scat.field,
                     **knl_kwargs)
 
         def scat_norm(f):
-            return norm(density_discr, queue, f, p=np.inf)
+            return norm(density_discr, f, p=np.inf)
 
         e_bc_residual = scat_norm(eh_bc_values[:3]) / scat_norm(inc_field_scat.e)
         h_bc_residual = scat_norm(eh_bc_values[3]) / scat_norm(inc_field_scat.h)
@@ -435,11 +434,11 @@ def test_pec_mfie_extinction(ctx_factory, case,
 
         if visualize:
             from meshmode.discretization.visualization import make_visualizer
-            bdry_vis = make_visualizer(queue, scat_discr, case.target_order+3)
+            bdry_vis = make_visualizer(actx, scat_discr, case.target_order+3)
 
             bdry_normals = bind(places,
                     sym.normal(3, dofdesc="scat_discr")
-                    )(queue).as_vector(dtype=object)
+                    )(actx).as_vector(dtype=object)
 
             bdry_vis.write_vtk_file("source-%s.vtu" % resolution, [
                 ("j", jxyz),
@@ -459,12 +458,13 @@ def test_pec_mfie_extinction(ctx_factory, case,
                 fplot.write_vtk_file(
                         "failed-targets.vts",
                         [
-                            ("failed_targets", e.failed_target_flags.get(queue))
+                            ("failed_targets", actx.to_numpy(
+                                actx.thaw(e.failed_target_flags))),
                             ])
                 raise
 
-            fplot_repr = EHField(vector_from_device(queue, fplot_repr))
-            fplot_inc = EHField(vector_from_device(queue,
+            fplot_repr = EHField(vector_from_device(actx.queue, fplot_repr))
+            fplot_inc = EHField(vector_from_device(actx.queue,
                 eval_inc_field_at(places, target="plot_targets")))
 
             fplot.write_vtk_file(
@@ -484,7 +484,7 @@ def test_pec_mfie_extinction(ctx_factory, case,
         obs_repr = EHField(eval_repr_at(places, target="obs_discr"))
 
         def obs_norm(f):
-            return norm(obs_discr, queue, f, p=np.inf)
+            return norm(obs_discr, f, p=np.inf)
 
         rel_err_e = (obs_norm(inc_field_obs.e + obs_repr.e)
                 / obs_norm(inc_field_obs.e))
diff --git a/test/test_scalar_int_eq.py b/test/test_scalar_int_eq.py
index bc197395771f2ed3e82b709bbe9ef00fb4ca7839..82fca9ae12adcce48b2e5a562f96be709ad2f2d8 100644
--- a/test/test_scalar_int_eq.py
+++ b/test/test_scalar_int_eq.py
@@ -32,6 +32,7 @@ import pytest
 from pyopencl.tools import (  # noqa
         pytest_generate_tests_for_pyopencl as pytest_generate_tests)
 
+from meshmode.array_context import PyOpenCLArrayContext
 from meshmode.mesh.generation import (  # noqa
         ellipse, cloverleaf, starfish, drop, n_gon, qbx_peanut, WobblyCircle,
         make_curve_mesh)
@@ -436,7 +437,8 @@ class BetterplaneIntEqTestCase(IntEqTestCase):
 
 # {{{ test backend
 
-def run_int_eq_test(cl_ctx, queue, case, resolution, visualize=False):
+def run_int_eq_test(actx: PyOpenCLArrayContext,
+        case, resolution, visualize=False):
     mesh = case.get_mesh(resolution, case.target_order)
     print("%d elements" % mesh.nelements)
 
@@ -445,7 +447,7 @@ def run_int_eq_test(cl_ctx, queue, case, resolution, visualize=False):
     from meshmode.discretization.poly_element import \
             InterpolatoryQuadratureSimplexGroupFactory
     pre_density_discr = Discretization(
-            cl_ctx, mesh,
+            actx, mesh,
             InterpolatoryQuadratureSimplexGroupFactory(case.target_order))
 
     source_order = 4*case.target_order
@@ -495,7 +497,7 @@ def run_int_eq_test(cl_ctx, queue, case, resolution, visualize=False):
     point_sources = make_circular_point_group(
             mesh.ambient_dim, 10, test_src_geo_radius,
             func=lambda x: x**1.5)
-    point_source = PointPotentialSource(cl_ctx, point_sources)
+    point_source = PointPotentialSource(point_sources)
 
     from pytential.target import PointsTarget
     test_targets = make_circular_point_group(
@@ -554,7 +556,7 @@ def run_int_eq_test(cl_ctx, queue, case, resolution, visualize=False):
     places = GeometryCollection(places)
     if case.use_refinement:
         from pytential.qbx.refinement import refine_geometry_collection
-        places = refine_geometry_collection(queue, places,
+        places = refine_geometry_collection(places,
                 **refiner_extra_kwargs)
 
     dd = sym.as_dofdesc(sym.DEFAULT_SOURCE).to_stage1()
@@ -573,15 +575,15 @@ def run_int_eq_test(cl_ctx, queue, case, resolution, visualize=False):
 
         discr = places.get_discretization(dd.geometry, sym.QBX_SOURCE_QUAD_STAGE2)
         print("quad stage-2 elements have %d nodes"
-                % discr.groups[0].nunit_nodes)
+                % discr.groups[0].nunit_dofs)
 
     # }}}
 
     if hasattr(case, "visualize_geometry") and case.visualize_geometry:
         bdry_normals = bind(places, sym.normal(mesh.ambient_dim))(
-                queue).as_vector(dtype=np.object)
+                actx).as_vector(dtype=np.object)
 
-        bdry_vis = make_visualizer(queue, density_discr, case.target_order)
+        bdry_vis = make_visualizer(actx, density_discr, case.target_order)
         bdry_vis.write_vtk_file("geometry.vtu", [
             ("normals", bdry_normals)
             ])
@@ -591,19 +593,23 @@ def run_int_eq_test(cl_ctx, queue, case, resolution, visualize=False):
     if visualize:
         if mesh.ambient_dim == 2:
             # show geometry, centers, normals
-            nodes_h = density_discr.nodes().get(queue=queue)
-            normal = bind(places, sym.normal(2))(queue).as_vector(np.object)
+            from meshmode.dof_array import thaw, flatten
+            nodes_h = [actx.to_numpy(axis) for axis in
+                    flatten(thaw(actx, density_discr.nodes()))]
+            normal_h = [actx.to_numpy(axis) for axis in
+                    flatten(
+                        bind(places, sym.normal(2))(actx)
+                        .as_vector(np.object))]
 
             pt.plot(nodes_h[0], nodes_h[1], "x-")
-            pt.quiver(nodes_h[0], nodes_h[1],
-                    normal[0].get(queue), normal[1].get(queue))
+            pt.quiver(nodes_h[0], nodes_h[1], normal_h[0], normal_h[1])
             pt.gca().set_aspect("equal")
             pt.show()
         elif mesh.ambient_dim == 3:
             bdry_normals = bind(places, sym.normal(3))(
-                    queue).as_vector(dtype=object)
+                    actx).as_vector(dtype=object)
 
-            bdry_vis = make_visualizer(queue, density_discr, case.target_order+3)
+            bdry_vis = make_visualizer(actx, density_discr, case.target_order+3)
             bdry_vis.write_vtk_file("pre-solve-source-%s.vtu" % resolution, [
                 ("bdry_normals", bdry_normals),
                 ])
@@ -654,7 +660,7 @@ def run_int_eq_test(cl_ctx, queue, case, resolution, visualize=False):
     source_charges = source_charges.astype(dtype)
     assert np.sum(source_charges) < 1e-15
 
-    source_charges_dev = cl.array.to_device(queue, source_charges)
+    source_charges_dev = actx.from_numpy(source_charges)
 
     # }}}
 
@@ -666,27 +672,27 @@ def run_int_eq_test(cl_ctx, queue, case, resolution, visualize=False):
 
     test_direct = bind(places, pot_src,
             auto_where=("point_source", "point_target"))(
-            queue, charges=source_charges_dev, **concrete_knl_kwargs)
+            actx, charges=source_charges_dev, **concrete_knl_kwargs)
 
     if case.bc_type == "dirichlet":
         bc = bind(places, pot_src,
                 auto_where=("point_source", sym.DEFAULT_TARGET))(
-                        queue, charges=source_charges_dev, **concrete_knl_kwargs)
+                        actx, charges=source_charges_dev, **concrete_knl_kwargs)
 
     elif case.bc_type == "neumann":
         bc = bind(places, sym.normal_derivative(
             qbx.ambient_dim, pot_src, dofdesc=sym.DEFAULT_TARGET),
             auto_where=("point_source", sym.DEFAULT_TARGET))(
-                    queue, charges=source_charges_dev, **concrete_knl_kwargs)
+                    actx, charges=source_charges_dev, **concrete_knl_kwargs)
 
     elif case.bc_type == "clamped_plate":
         bc_u = bind((point_source, density_discr), pot_src)(
-                queue, charges=source_charges_dev, **concrete_knl_kwargs)
+                actx, charges=source_charges_dev, **concrete_knl_kwargs)
         bc_du = bind(
                 (point_source, density_discr),
                 sym.normal_derivative(
                     qbx.ambient_dim, pot_src, dofdesc=sym.DEFAULT_TARGET)
-                )(queue, charges=source_charges_dev, **concrete_knl_kwargs)
+                )(actx, charges=source_charges_dev, **concrete_knl_kwargs)
         bc = [bc_u, bc_du]
 
     # }}}
@@ -694,22 +700,22 @@ def run_int_eq_test(cl_ctx, queue, case, resolution, visualize=False):
     # {{{ solve
 
     bound_op = bind(places, op_u)
-    rhs = bind(places, op.prepare_rhs(op.get_density_var("bc")))(queue, bc=bc)
+    rhs = bind(places, op.prepare_rhs(op.get_density_var("bc")))(actx, bc=bc)
 
     try:
         from pytential.solve import gmres
         gmres_result = gmres(
-                bound_op.scipy_op(queue, "u", dtype, **concrete_knl_kwargs),
+                bound_op.scipy_op(actx, "u", dtype, **concrete_knl_kwargs),
                 rhs,
                 tol=case.gmres_tol,
                 progress=True,
                 hard_failure=True,
                 stall_iterations=50, no_progress_factor=1.05)
     except QBXTargetAssociationFailedException as e:
-        bdry_vis = make_visualizer(queue, density_discr, case.target_order+3)
+        bdry_vis = make_visualizer(actx, density_discr, case.target_order+3)
 
         bdry_vis.write_vtk_file("failed-targets-%s.vtu" % resolution, [
-            ("failed_targets", e.failed_target_flags),
+            ("failed_targets", actx.thaw(e.failed_target_flags)),
             ])
         raise
 
@@ -724,7 +730,7 @@ def run_int_eq_test(cl_ctx, queue, case, resolution, visualize=False):
         from sumpy.tools import build_matrix
         mat = build_matrix(
                 bound_op.scipy_op(
-                    queue, arg_name="u", dtype=dtype, **concrete_knl_kwargs))
+                    actx, arg_name="u", dtype=dtype, **concrete_knl_kwargs))
         w, v = la.eig(mat)
         if visualize:
             pt.imshow(np.log10(1e-20+np.abs(mat)))
@@ -740,7 +746,7 @@ def run_int_eq_test(cl_ctx, queue, case, resolution, visualize=False):
                 op.representation(op.get_density_var("u")),
                 auto_where=(sym.DEFAULT_SOURCE, "point_target"))
 
-        test_via_bdry = bound_tgt_op(queue, u=weighted_u, **concrete_knl_kwargs)
+        test_via_bdry = bound_tgt_op(actx, u=weighted_u, **concrete_knl_kwargs)
 
         err = test_via_bdry - test_direct
 
@@ -784,11 +790,11 @@ def run_int_eq_test(cl_ctx, queue, case, resolution, visualize=False):
         #print(bound_t_deriv_op.code)
 
         grad_from_src = bound_grad_op(
-                queue, u=weighted_u, **concrete_knl_kwargs)
+                actx, u=weighted_u, **concrete_knl_kwargs)
 
         grad_ref = bind(places,
                 sym.grad(mesh.ambient_dim, pot_src),
-                auto_where=("point_source", "point_target"))(queue,
+                auto_where=("point_source", "point_target"))(actx,
                         charges=source_charges_dev,
                         **concrete_knl_kwargs)
 
@@ -812,14 +818,16 @@ def run_int_eq_test(cl_ctx, queue, case, resolution, visualize=False):
                     sym.tangential_derivative(qbx.ambient_dim, pot),
                     qbx_forced_limit=loc_sign))
 
-        tang_deriv_from_src = bound_t_deriv_op(
-                queue, u=weighted_u, **concrete_knl_kwargs).as_scalar().get()
+        from meshmode.dof_array import flatten
+        tang_deriv_from_src = actx.to_numpy(
+                flatten(bound_t_deriv_op(
+                    actx, u=weighted_u, **concrete_knl_kwargs).as_scalar()))
 
-        tang_deriv_ref = bind(places,
+        tang_deriv_ref = actx.to_numpy(flatten(bind(places,
                 sym.tangential_derivative(qbx.ambient_dim, pot_src),
-                auto_where=("point_source", sym.DEFAULT_TARGET))(queue,
+                auto_where=("point_source", sym.DEFAULT_TARGET))(actx,
                         charges=source_charges_dev,
-                        **concrete_knl_kwargs).as_scalar().get()
+                        **concrete_knl_kwargs).as_scalar()))
 
         if visualize:
             pt.plot(tang_deriv_ref.real)
@@ -841,12 +849,12 @@ def run_int_eq_test(cl_ctx, queue, case, resolution, visualize=False):
 
     if visualize:
         bdry_normals = bind(places, sym.normal(qbx.ambient_dim))(
-                queue).as_vector(dtype=np.object)
+                actx).as_vector(dtype=np.object)
 
         sym_sqrt_j = sym.sqrt_jac_q_weight(density_discr.ambient_dim)
-        u = bind(places, op.get_density_var("u") / sym_sqrt_j)(queue, u=weighted_u)
+        u = bind(places, op.get_density_var("u") / sym_sqrt_j)(actx, u=weighted_u)
 
-        bdry_vis = make_visualizer(queue, density_discr, case.target_order+3)
+        bdry_vis = make_visualizer(actx, density_discr, case.target_order+3)
         bdry_vis.write_vtk_file("source-%s.vtu" % resolution, [
             ("u", u),
             ("bc", bc),
@@ -857,32 +865,34 @@ def run_int_eq_test(cl_ctx, queue, case, resolution, visualize=False):
             solved_pot = bind(places,
                     op.representation(op.get_density_var("u")),
                     auto_where=("qbx_target_tol", "plot_targets"))(
-                            queue, u=weighted_u, k=case.k)
+                            actx, u=weighted_u, k=getattr(case, "k", None))
         except QBXTargetAssociationFailedException as e:
             fplot.write_vtk_file(
                     "failed-targets.vts",
                     [
-                        ("failed_targets", e.failed_target_flags.get(queue))
+                        ("failed_targets", actx.to_numpy(
+                            actx.thaw(e.failed_target_flags)))
                         ])
             raise
 
-        ones_density = density_discr.zeros(queue)
-        ones_density.fill(1)
+        ones_density = density_discr.zeros(actx) + 1
 
         indicator = -sym.D(LaplaceKernel(qbx.ambient_dim),
                 op.get_density_var("sigma"),
                 qbx_forced_limit=None)
         indicator = bind(places, indicator,
                 auto_where=("qbx_target_tol", "plot_targets"))(
-                        queue, sigma=ones_density).get()
-
-        solved_pot = solved_pot.get()
+                        actx, sigma=ones_density)
 
         true_pot = bind(places, pot_src,
                 auto_where=("point_source", "plot_targets"))(
-                        queue,
+                        actx,
                         charges=source_charges_dev,
-                        **concrete_knl_kwargs).get()
+                        **concrete_knl_kwargs)
+
+        solved_pot = actx.to_numpy(solved_pot)
+        true_pot = actx.to_numpy(true_pot)
+        indicator = actx.to_numpy(indicator)
 
         #fplot.show_scalar_in_mayavi(solved_pot.real, max_val=5)
         if case.prob_side == "scat":
@@ -906,7 +916,7 @@ def run_int_eq_test(cl_ctx, queue, case, resolution, visualize=False):
 
     # }}}
 
-    h_max = bind(places, sym.h_max(qbx.ambient_dim))(queue)
+    h_max = bind(places, sym.h_max(qbx.ambient_dim))(actx)
     return dict(
             h_max=h_max,
             rel_err_2=rel_err_2,
@@ -943,6 +953,7 @@ def test_integral_equation(ctx_factory, case, visualize=False):
 
     cl_ctx = ctx_factory()
     queue = cl.CommandQueue(cl_ctx)
+    actx = PyOpenCLArrayContext(queue)
 
     if USE_SYMENGINE and case.fmm_backend is None:
         pytest.skip("https://gitlab.tiker.net/inducer/sumpy/issues/25")
@@ -959,8 +970,7 @@ def test_integral_equation(ctx_factory, case, visualize=False):
 
     have_error_data = False
     for resolution in case.resolutions:
-        result = run_int_eq_test(cl_ctx, queue, case, resolution,
-                visualize=visualize)
+        result = run_int_eq_test(actx, case, resolution, visualize=visualize)
 
         if result["rel_err_2"] is not None:
             have_error_data = True
diff --git a/test/test_stokes.py b/test/test_stokes.py
index 19167efe102673c5a5350b89002b9b6f28b01509..5ac2ab47a8ced6dfbd91b612ca37c1ea88fcb8ca 100644
--- a/test/test_stokes.py
+++ b/test/test_stokes.py
@@ -25,9 +25,9 @@ THE SOFTWARE.
 
 import numpy as np
 import pyopencl as cl
-import pyopencl.clmath  # noqa
 import pytest
 
+from meshmode.array_context import PyOpenCLArrayContext
 from meshmode.discretization import Discretization
 from meshmode.discretization.poly_element import \
         InterpolatoryQuadratureSimplexGroupFactory
@@ -46,7 +46,8 @@ import logging
 
 def run_exterior_stokes_2d(ctx_factory, nelements,
         mesh_order=4, target_order=4, qbx_order=4,
-        fmm_order=10, mu=1, circle_rad=1.5, visualize=False):
+        fmm_order=False,  # FIXME: FMM is slower than direct eval
+        mu=1, circle_rad=1.5, visualize=False):
 
     # This program tests an exterior Stokes flow in 2D using the
     # compound representation given in Hsiao & Kress,
@@ -57,6 +58,7 @@ def run_exterior_stokes_2d(ctx_factory, nelements,
 
     cl_ctx = cl.create_some_context()
     queue = cl.CommandQueue(cl_ctx)
+    actx = PyOpenCLArrayContext(queue)
 
     ovsmp_target_order = 4*target_order
 
@@ -68,8 +70,7 @@ def run_exterior_stokes_2d(ctx_factory, nelements,
             lambda t: circle_rad * ellipse(1, t),
             np.linspace(0, 1, nelements+1),
             target_order)
-    coarse_density_discr = Discretization(
-            cl_ctx, mesh,
+    coarse_density_discr = Discretization(actx, mesh,
             InterpolatoryQuadratureSimplexGroupFactory(target_order))
 
     from pytential.qbx import QBXLayerPotentialSource
@@ -111,8 +112,8 @@ def run_exterior_stokes_2d(ctx_factory, nelements,
 
     density_discr = places.get_discretization(sym.DEFAULT_SOURCE)
 
-    normal = bind(places, sym.normal(2).as_vector())(queue)
-    path_length = bind(places, sym.integral(2, 1, 1))(queue)
+    normal = bind(places, sym.normal(2).as_vector())(actx)
+    path_length = bind(places, sym.integral(2, 1, 1))(actx)
 
     # }}}
 
@@ -150,47 +151,52 @@ def run_exterior_stokes_2d(ctx_factory, nelements,
 
     def fund_soln(x, y, loc, strength):
         #with direction (1,0) for point source
-        r = cl.clmath.sqrt((x - loc[0])**2 + (y - loc[1])**2)
+        r = actx.np.sqrt((x - loc[0])**2 + (y - loc[1])**2)
         scaling = strength/(4*np.pi*mu)
-        xcomp = (-cl.clmath.log(r) + (x - loc[0])**2/r**2) * scaling
+        xcomp = (-actx.np.log(r) + (x - loc[0])**2/r**2) * scaling
         ycomp = ((x - loc[0])*(y - loc[1])/r**2) * scaling
         return [xcomp, ycomp]
 
     def rotlet_soln(x, y, loc):
-        r = cl.clmath.sqrt((x - loc[0])**2 + (y - loc[1])**2)
+        r = actx.np.sqrt((x - loc[0])**2 + (y - loc[1])**2)
         xcomp = -(y - loc[1])/r**2
         ycomp = (x - loc[0])/r**2
         return [xcomp, ycomp]
 
     def fund_and_rot_soln(x, y, loc, strength):
         #with direction (1,0) for point source
-        r = cl.clmath.sqrt((x - loc[0])**2 + (y - loc[1])**2)
+        r = actx.np.sqrt((x - loc[0])**2 + (y - loc[1])**2)
         scaling = strength/(4*np.pi*mu)
         xcomp = (
-                (-cl.clmath.log(r) + (x - loc[0])**2/r**2) * scaling
+                (-actx.np.log(r) + (x - loc[0])**2/r**2) * scaling
                 - (y - loc[1])*strength*0.125/r**2 + 3.3)
         ycomp = (
                 ((x - loc[0])*(y - loc[1])/r**2) * scaling
                 + (x - loc[0])*strength*0.125/r**2 + 1.5)
-        return [xcomp, ycomp]
+        return make_obj_array([xcomp, ycomp])
 
-    nodes = density_discr.nodes().with_queue(queue)
+    from meshmode.dof_array import unflatten, flatten, thaw
+    nodes = flatten(thaw(actx, density_discr.nodes()))
     fund_soln_loc = np.array([0.5, -0.2])
     strength = 100.
-    bc = fund_and_rot_soln(nodes[0], nodes[1], fund_soln_loc, strength)
+    bc = unflatten(actx, density_discr,
+            fund_and_rot_soln(nodes[0], nodes[1], fund_soln_loc, strength))
 
     omega_sym = sym.make_sym_vector("omega", dim)
     u_A_sym_bdry = stokeslet_obj.apply(  # noqa: N806
             omega_sym, mu_sym, qbx_forced_limit=1)
 
-    omega = [
-            cl.array.to_device(queue, (strength/path_length)*np.ones(len(nodes[0]))),
-            cl.array.to_device(queue, np.zeros(len(nodes[0])))]
+    from pytential.utils import unflatten_from_numpy
+    omega = unflatten_from_numpy(actx, density_discr, make_obj_array([
+            (strength/path_length)*np.ones(len(nodes[0])),
+            np.zeros(len(nodes[0]))
+            ]))
+
     bvp_rhs = bind(places,
-            sym.make_sym_vector("bc", dim) + u_A_sym_bdry)(queue,
+            sym.make_sym_vector("bc", dim) + u_A_sym_bdry)(actx,
                     bc=bc, mu=mu, omega=omega)
     gmres_result = gmres(
-            bound_op.scipy_op(queue, "sigma", np.float64, mu=mu, normal=normal),
+            bound_op.scipy_op(actx, "sigma", np.float64, mu=mu, normal=normal),
             bvp_rhs,
             x0=bvp_rhs,
             tol=1e-9, progress=True,
@@ -203,7 +209,7 @@ def run_exterior_stokes_2d(ctx_factory, nelements,
 
     sigma = gmres_result.solution
     sigma_int_val_sym = sym.make_sym_vector("sigma_int_val", 2)
-    int_val = bind(places, sym.integral(2, 1, sigma_sym))(queue, sigma=sigma)
+    int_val = bind(places, sym.integral(2, 1, sigma_sym))(actx, sigma=sigma)
     int_val = -int_val/(2 * np.pi)
     print("int_val = ", int_val)
 
@@ -217,7 +223,7 @@ def run_exterior_stokes_2d(ctx_factory, nelements,
             - u_A_sym_vol + sigma_int_val_sym)
 
     where = (sym.DEFAULT_SOURCE, "point_target")
-    vel = bind(places, representation_sym, auto_where=where)(queue,
+    vel = bind(places, representation_sym, auto_where=where)(actx,
             sigma=sigma,
             mu=mu,
             normal=normal,
@@ -226,7 +232,7 @@ def run_exterior_stokes_2d(ctx_factory, nelements,
     print("@@@@@@@@")
 
     plot_vel = bind(places, representation_sym,
-            auto_where=(sym.DEFAULT_SOURCE, "plot_target"))(queue,
+            auto_where=(sym.DEFAULT_SOURCE, "plot_target"))(actx,
                     sigma=sigma,
                     mu=mu,
                     normal=normal,
@@ -240,8 +246,10 @@ def run_exterior_stokes_2d(ctx_factory, nelements,
                 ])
 
     exact_soln = fund_and_rot_soln(
-            cl.array.to_device(queue, eval_points[0]), cl.array.to_device(
-                queue, eval_points[1]), fund_soln_loc, strength)
+            actx.from_numpy(eval_points[0]),
+            actx.from_numpy(eval_points[1]),
+            fund_soln_loc,
+            strength)
 
     vel = get_obj_array(vel)
     err = vel-get_obj_array(exact_soln)
@@ -289,7 +297,7 @@ def run_exterior_stokes_2d(ctx_factory, nelements,
 
     # }}}
 
-    h_max = bind(places, sym.h_max(qbx.ambient_dim))(queue)
+    h_max = bind(places, sym.h_max(qbx.ambient_dim))(actx)
     return h_max, l2_err
 
 
diff --git a/test/test_symbolic.py b/test/test_symbolic.py
index 37b86c12a485b419b05f8b6878923870f1169a41..a510596cdc9f9d56c2f974a7a64666fe36e5cea8 100644
--- a/test/test_symbolic.py
+++ b/test/test_symbolic.py
@@ -39,6 +39,7 @@ logger = logging.getLogger(__name__)
 from pyopencl.tools import (  # noqa
         pytest_generate_tests_for_pyopencl as pytest_generate_tests)
 
+from meshmode.array_context import PyOpenCLArrayContext
 from meshmode.mesh.generation import (  # noqa
         ellipse, cloverleaf, starfish, drop, n_gon, qbx_peanut, WobblyCircle,
         make_curve_mesh)
@@ -51,27 +52,27 @@ from meshmode.discretization.poly_element import \
 
 # {{{ discretization getters
 
-def get_ellipse_with_ref_mean_curvature(cl_ctx, nelements, aspect=1):
+def get_ellipse_with_ref_mean_curvature(actx, nelements, aspect=1):
     order = 4
     mesh = make_curve_mesh(
             partial(ellipse, aspect),
             np.linspace(0, 1, nelements+1),
             order)
 
-    discr = Discretization(cl_ctx, mesh,
+    discr = Discretization(actx, mesh,
         InterpolatoryQuadratureSimplexGroupFactory(order))
 
-    with cl.CommandQueue(cl_ctx) as queue:
-        nodes = discr.nodes().get(queue=queue)
+    from meshmode.dof_array import thaw
+    nodes = thaw(actx, discr.nodes())
 
     a = 1
     b = 1/aspect
-    t = np.arctan2(nodes[1] * aspect, nodes[0])
+    t = actx.np.atan2(nodes[1] * aspect, nodes[0])
 
-    return discr, a*b / ((a*np.sin(t))**2 + (b*np.cos(t))**2)**(3/2)
+    return discr, a*b / ((a*actx.np.sin(t))**2 + (b*actx.np.cos(t))**2)**(3/2)
 
 
-def get_torus_with_ref_mean_curvature(cl_ctx, h):
+def get_torus_with_ref_mean_curvature(actx, h):
     order = 4
     r_minor = 1.0
     r_major = 3.0
@@ -79,20 +80,21 @@ def get_torus_with_ref_mean_curvature(cl_ctx, h):
     from meshmode.mesh.generation import generate_torus
     mesh = generate_torus(r_major, r_minor,
             n_major=h, n_minor=h, order=order)
-    discr = Discretization(cl_ctx, mesh,
+    discr = Discretization(actx, mesh,
         InterpolatoryQuadratureSimplexGroupFactory(order))
 
-    with cl.CommandQueue(cl_ctx) as queue:
-        nodes = discr.nodes().get(queue=queue)
+    from meshmode.dof_array import thaw
+    nodes = thaw(actx, discr.nodes())
 
     # copied from meshmode.mesh.generation.generate_torus
     a = r_major
     b = r_minor
 
-    u = np.arctan2(nodes[1], nodes[0])
-    rvec = np.array([np.cos(u), np.sin(u), np.zeros_like(u)])
-    rvec = np.sum(nodes * rvec, axis=0) - a
-    cosv = np.cos(np.arctan2(nodes[2], rvec))
+    u = actx.np.atan2(nodes[1], nodes[0])
+    from pytools.obj_array import flat_obj_array
+    rvec = flat_obj_array(actx.np.cos(u), actx.np.sin(u), 0*u)
+    rvec = sum(nodes * rvec) - a
+    cosv = actx.np.cos(actx.np.atan2(nodes[2], rvec))
 
     return discr, (a + 2.0 * b * cosv) / (2 * b * (a + b * cosv))
 
@@ -115,19 +117,20 @@ def test_mean_curvature(ctx_factory, discr_name, resolutions,
         discr_and_ref_mean_curvature_getter, visualize=False):
     ctx = ctx_factory()
     queue = cl.CommandQueue(ctx)
+    actx = PyOpenCLArrayContext(queue)
 
     from pytools.convergence import EOCRecorder
     eoc = EOCRecorder()
 
     for r in resolutions:
         discr, ref_mean_curvature = \
-                discr_and_ref_mean_curvature_getter(ctx, r)
+                discr_and_ref_mean_curvature_getter(actx, r)
         mean_curvature = bind(
-            discr,
-            sym.mean_curvature(discr.ambient_dim))(queue).get(queue)
+            discr, sym.mean_curvature(discr.ambient_dim))(actx)
 
         h = 1.0 / r
-        h_error = la.norm(mean_curvature - ref_mean_curvature, np.inf)
+        from meshmode.dof_array import flat_norm
+        h_error = flat_norm(mean_curvature - ref_mean_curvature, np.inf)
         eoc.add_data_point(h, h_error)
     print(eoc)
 
@@ -142,13 +145,13 @@ def test_mean_curvature(ctx_factory, discr_name, resolutions,
 def test_tangential_onb(ctx_factory):
     cl_ctx = ctx_factory()
     queue = cl.CommandQueue(cl_ctx)
+    actx = PyOpenCLArrayContext(queue)
 
     from meshmode.mesh.generation import generate_torus
     mesh = generate_torus(5, 2, order=3)
 
     discr = Discretization(
-            cl_ctx, mesh,
-            InterpolatoryQuadratureSimplexGroupFactory(3))
+            actx, mesh, InterpolatoryQuadratureSimplexGroupFactory(3))
 
     tob = sym.tangential_onb(mesh.ambient_dim)
     nvecs = tob.shape[1]
@@ -157,8 +160,10 @@ def test_tangential_onb(ctx_factory):
     orth_check = bind(discr, sym.make_obj_array([
         np.dot(tob[:, i], tob[:, j]) - (1 if i == j else 0)
         for i in range(nvecs) for j in range(nvecs)])
-        )(queue)
+        )(actx)
 
+    from meshmode.dof_array import flatten
+    orth_check = flatten(orth_check)
     for i, orth_i in enumerate(orth_check):
         assert (cl.clmath.fabs(orth_i) < 1e-13).get().all()
 
@@ -166,8 +171,9 @@ def test_tangential_onb(ctx_factory):
     orth_check = bind(discr, sym.make_obj_array([
         np.dot(tob[:, i], sym.normal(mesh.ambient_dim).as_vector())
         for i in range(nvecs)])
-        )(queue)
+        )(actx)
 
+    orth_check = flatten(orth_check)
     for i, orth_i in enumerate(orth_check):
         assert (cl.clmath.fabs(orth_i) < 1e-13).get().all()
 
@@ -227,6 +233,7 @@ def test_layer_potential_construction(lpot_class, ambient_dim=2):
 def test_interpolation(ctx_factory, name, source_discr_stage, target_granularity):
     ctx = ctx_factory()
     queue = cl.CommandQueue(ctx)
+    actx = PyOpenCLArrayContext(queue)
 
     nelements = 32
     target_order = 7
@@ -245,7 +252,7 @@ def test_interpolation(ctx_factory, name, source_discr_stage, target_granularity
     mesh = make_curve_mesh(starfish,
             np.linspace(0.0, 1.0, nelements + 1),
             target_order)
-    discr = Discretization(ctx, mesh,
+    discr = Discretization(actx, mesh,
             InterpolatoryQuadratureSimplexGroupFactory(target_order))
 
     from pytential.qbx import QBXLayerPotentialSource
@@ -261,16 +268,22 @@ def test_interpolation(ctx_factory, name, source_discr_stage, target_granularity
     op_sym = sym.sin(sym.interp(from_dd, to_dd, sigma_sym))
     bound_op = bind(places, op_sym, auto_where=where)
 
-    def nodes(stage):
+    from meshmode.dof_array import thaw, flatten, unflatten
+
+    def discr_and_nodes(stage):
         density_discr = places.get_discretization(where.geometry, stage)
-        return density_discr.nodes().get(queue)
+        return density_discr, np.array([
+                actx.to_numpy(flatten(axis))
+                for axis in thaw(actx, density_discr.nodes())])
 
-    target_nodes = nodes(sym.QBX_SOURCE_QUAD_STAGE2)
-    source_nodes = nodes(source_discr_stage)
+    _, target_nodes = discr_and_nodes(sym.QBX_SOURCE_QUAD_STAGE2)
+    source_discr, source_nodes = discr_and_nodes(source_discr_stage)
 
-    sigma_dev = cl.array.to_device(queue, la.norm(source_nodes, axis=0))
     sigma_target = np.sin(la.norm(target_nodes, axis=0))
-    sigma_target_interp = bound_op(queue, sigma=sigma_dev).get(queue)
+    sigma_dev = unflatten(
+            actx, source_discr,
+            actx.from_numpy(la.norm(source_nodes, axis=0)))
+    sigma_target_interp = actx.to_numpy(flatten(bound_op(actx, sigma=sigma_dev)))
 
     if name in ("default", "default_explicit", "stage2", "quad"):
         error = la.norm(sigma_target_interp - sigma_target) / la.norm(sigma_target)
diff --git a/test/test_target_specific_qbx.py b/test/test_target_specific_qbx.py
index 551bb38c5a5e73a30f4ed72124c866b89349330e..440e74862c7e452029ad7df93ac73aaff9ae9a43 100644
--- a/test/test_target_specific_qbx.py
+++ b/test/test_target_specific_qbx.py
@@ -23,10 +23,10 @@ THE SOFTWARE.
 """
 
 
+from meshmode.array_context import PyOpenCLArrayContext
 import numpy as np
 import numpy.linalg as la  # noqa
 import pyopencl as cl
-import pyopencl.clmath as clmath
 import pytest
 from pyopencl.tools import (  # noqa
         pytest_generate_tests_for_pyopencl as pytest_generate_tests)
@@ -140,6 +140,7 @@ def test_target_specific_qbx(ctx_factory, op, helmholtz_k, qbx_order):
 
     cl_ctx = ctx_factory()
     queue = cl.CommandQueue(cl_ctx)
+    actx = PyOpenCLArrayContext(queue)
 
     target_order = 4
     fmm_tol = 1e-3
@@ -152,7 +153,7 @@ def test_target_specific_qbx(ctx_factory, op, helmholtz_k, qbx_order):
         InterpolatoryQuadratureSimplexGroupFactory
     from pytential.qbx import QBXLayerPotentialSource
     pre_density_discr = Discretization(
-            cl_ctx, mesh,
+            actx, mesh,
             InterpolatoryQuadratureSimplexGroupFactory(target_order))
 
     from sumpy.expansion.level_to_order import SimpleExpansionOrderFinder
@@ -174,12 +175,13 @@ def test_target_specific_qbx(ctx_factory, op, helmholtz_k, qbx_order):
 
     from pytential.qbx.refinement import refine_geometry_collection
     places = GeometryCollection(places, auto_where="qbx")
-    places = refine_geometry_collection(queue, places,
+    places = refine_geometry_collection(places,
             kernel_length_scale=kernel_length_scale)
 
     density_discr = places.get_discretization("qbx")
-    nodes = density_discr.nodes().with_queue(queue)
-    u_dev = clmath.sin(nodes[0])
+    from meshmode.dof_array import thaw
+    nodes = thaw(actx, density_discr.nodes())
+    u_dev = actx.np.sin(nodes[0])
 
     if helmholtz_k == 0:
         kernel = LaplaceKernel(3)
@@ -201,11 +203,12 @@ def test_target_specific_qbx(ctx_factory, op, helmholtz_k, qbx_order):
 
     expr = op(kernel, u_sym, qbx_forced_limit=-1, **kernel_kwargs)
 
+    from meshmode.dof_array import flatten
     bound_op = bind(places, expr)
-    pot_ref = bound_op(queue, u=u_dev, k=helmholtz_k).get()
+    pot_ref = actx.to_numpy(flatten(bound_op(actx, u=u_dev, k=helmholtz_k)))
 
     bound_op = bind(places, expr, auto_where="qbx_target_specific")
-    pot_tsqbx = bound_op(queue, u=u_dev, k=helmholtz_k).get()
+    pot_tsqbx = actx.to_numpy(flatten(bound_op(actx, u=u_dev, k=helmholtz_k)))
 
     assert np.allclose(pot_tsqbx, pot_ref, atol=1e-13, rtol=1e-13)
 
diff --git a/test/test_tools.py b/test/test_tools.py
index d2f107848cb9785a74528e64e1bd1ea0cbee6582..89bfcd7857cc803255548c05b9553022295e0f1e 100644
--- a/test/test_tools.py
+++ b/test/test_tools.py
@@ -24,6 +24,7 @@ THE SOFTWARE.
 
 from functools import partial
 
+from meshmode.array_context import PyOpenCLArrayContext
 import pytest
 
 import numpy as np
@@ -63,6 +64,7 @@ def test_interpolatory_error_reporting(ctx_factory):
 
     ctx = ctx_factory()
     queue = cl.CommandQueue(ctx)
+    actx = PyOpenCLArrayContext(queue)
 
     h = 0.2
     from meshmode.mesh.io import generate_gmsh, FileSource
@@ -80,20 +82,19 @@ def test_interpolatory_error_reporting(ctx_factory):
     from meshmode.discretization.poly_element import \
             QuadratureSimplexGroupFactory
 
-    vol_discr = Discretization(ctx, mesh,
+    vol_discr = Discretization(actx, mesh,
             QuadratureSimplexGroupFactory(5))
 
-    vol_x = vol_discr.nodes().with_queue(queue)
+    from meshmode.dof_array import thaw
+    vol_x = thaw(actx, vol_discr.nodes())
 
     # }}}
 
     from pytential import integral
-    rhs = 1 + 0*vol_x[0]
-
-    one = rhs.copy()
-    one.fill(1)
-    with pytest.raises(TypeError):
-        print("AREA", integral(vol_discr, queue, one), 0.25**2*np.pi)
+    one = 1 + 0*vol_x[0]
+    from meshmode.discretization import NoninterpolatoryElementGroupError
+    with pytest.raises(NoninterpolatoryElementGroupError):
+        print("AREA", integral(vol_discr, one), 0.25**2*np.pi)
 
 
 def test_geometry_collection_caching(ctx_factory):
@@ -103,6 +104,7 @@ def test_geometry_collection_caching(ctx_factory):
     # the `nodes` on each `discr_stage`.
     ctx = ctx_factory()
     queue = cl.CommandQueue(ctx)
+    actx = PyOpenCLArrayContext(queue)
 
     ndim = 2
     nelements = 1024
@@ -128,7 +130,7 @@ def test_geometry_collection_caching(ctx_factory):
             mesh = affine_map(discrs[0].mesh,
                     b=np.array([3 * k * radius, 0]))
 
-        discr = Discretization(ctx, mesh,
+        discr = Discretization(actx, mesh,
             InterpolatoryQuadratureSimplexGroupFactory(target_order))
         discrs.append(discr)
 
@@ -161,7 +163,7 @@ def test_geometry_collection_caching(ctx_factory):
                 discr = places._get_discr_from_cache(sources[k], discr_stage)
 
             dofdesc = sym.DOFDescriptor(sources[k], discr_stage=discr_stage)
-            bind(places, sym.nodes(ndim, dofdesc=dofdesc))(queue)
+            bind(places, sym.nodes(ndim, dofdesc=dofdesc))(actx)
 
             discr = places._get_discr_from_cache(sources[k], discr_stage)
             assert discr is not None
diff --git a/test/too_slow_test_helmholtz.py b/test/too_slow_test_helmholtz.py
deleted file mode 100644
index 9e9df21aa78d137d0fdb1c6a7521f37c486aad68..0000000000000000000000000000000000000000
--- a/test/too_slow_test_helmholtz.py
+++ /dev/null
@@ -1,455 +0,0 @@
-from __future__ import division, absolute_import, print_function
-
-__copyright__ = "Copyright (C) 2014 Shidong Jiang, Andreas Kloeckner"
-
-__license__ = """
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in
-all copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-THE SOFTWARE.
-"""
-
-
-import numpy as np
-import numpy.linalg as la
-import pyopencl as cl
-import pyopencl.array  # noqa
-import pyopencl.clmath  # noqa
-
-import pytest
-
-from pytools.obj_array import make_obj_array
-
-from meshmode.discretization import Discretization
-from meshmode.discretization.poly_element import \
-        InterpolatoryQuadratureSimplexGroupFactory
-
-from six.moves import range
-
-from pytential import bind, sym, norm  # noqa
-from pytential.symbolic.pde.maxwell.waveguide import (  # noqa
-        DielectricSRep2DBoundaryOperator as SRep,
-        DielectricSDRep2DBoundaryOperator as SDRep)
-
-
-from pyopencl.tools import (  # noqa
-        pytest_generate_tests_for_pyopencl as pytest_generate_tests)
-
-import logging
-logger = logging.getLogger(__name__)
-
-
-def run_dielectric_test(cl_ctx, queue, nelements, qbx_order,
-        op_class, mode,
-        k0=3, k1=2.9, mesh_order=10,
-        bdry_quad_order=None, bdry_ovsmp_quad_order=None,
-        use_l2_weighting=False,
-        fmm_order=None, visualize=False):
-
-    if fmm_order is None:
-        fmm_order = qbx_order * 2
-    if bdry_quad_order is None:
-        bdry_quad_order = mesh_order
-    if bdry_ovsmp_quad_order is None:
-        bdry_ovsmp_quad_order = 4*bdry_quad_order
-
-    # {{{ geometries
-
-    from meshmode.mesh.generation import ellipse, make_curve_mesh
-    from functools import partial
-    mesh = make_curve_mesh(
-            partial(ellipse, 3),
-            np.linspace(0, 1, nelements+1),
-            mesh_order)
-
-    density_discr = Discretization(
-            cl_ctx, mesh,
-            InterpolatoryQuadratureSimplexGroupFactory(bdry_quad_order))
-
-    logger.info("%d elements" % mesh.nelements)
-
-    from pytential.qbx import QBXLayerPotentialSource
-    qbx = QBXLayerPotentialSource(
-            density_discr, fine_order=bdry_ovsmp_quad_order, qbx_order=qbx_order,
-            fmm_order=fmm_order
-            )
-
-    from pytential.target import PointsTarget
-    targets_0 = PointsTarget(make_obj_array(list(np.array([
-        [3.2 + t, -4]
-        for t in [0, 0.5, 1]
-        ]).T.copy())))
-    targets_1 = PointsTarget(make_obj_array(list(np.array([
-        [-0.3 * t, -0.2 * t]
-        for t in [0, 0.5, 1]
-        ]).T.copy())))
-
-    if visualize:
-        low_order_qbx, _ = QBXLayerPotentialSource(
-                density_discr,
-                fine_order=bdry_ovsmp_quad_order, qbx_order=2,
-                fmm_order=3,
-                )
-
-        from sumpy.visualization import FieldPlotter
-        fplot = FieldPlotter(np.zeros(2), extent=5, npoints=300)
-        targets_plot = PointsTarget(fplot.points)
-
-    places = {
-        sym.DEFAULT_SOURCE: qbx,
-        sym.DEFAULT_TARGET: qbx.density_discr,
-        "targets0": targets_0,
-        "targets1": targets_1
-        }
-    if visualize:
-        places.update({
-            "qbx-low-order": low_order_qbx,
-            "targets-plot": targets_plot
-            })
-
-    from pytential import GeometryCollection
-    places = GeometryCollection(places)
-
-    # }}}
-
-    # from meshmode.discretization.visualization import make_visualizer
-    # bdry_vis = make_visualizer(queue, density_discr, 20)
-
-    # {{{ solve bvp
-
-    from sumpy.kernel import HelmholtzKernel, AxisTargetDerivative
-    kernel = HelmholtzKernel(2)
-
-    beta = 2.5
-    K0 = np.sqrt(k0**2-beta**2)  # noqa
-    K1 = np.sqrt(k1**2-beta**2)  # noqa
-
-    pde_op = op_class(
-            mode,
-            k_vacuum=1,
-            domain_k_exprs=(k0, k1),
-            beta=beta,
-            interfaces=((0, 1, sym.DEFAULT_SOURCE),),
-            use_l2_weighting=use_l2_weighting)
-
-    op_unknown_sym = pde_op.make_unknown("unknown")
-
-    representation0_sym = pde_op.representation(op_unknown_sym, 0)
-    representation1_sym = pde_op.representation(op_unknown_sym, 1)
-    bound_pde_op = bind(places, pde_op.operator(op_unknown_sym))
-
-    e_factor = float(pde_op.ez_enabled)
-    h_factor = float(pde_op.hz_enabled)
-
-    e_sources_0 = make_obj_array(list(np.array([
-        [0.1, 0.2]
-        ]).T.copy()))
-    e_strengths_0 = np.array([1*e_factor])
-    e_sources_1 = make_obj_array(list(np.array([
-        [4, 4]
-        ]).T.copy()))
-    e_strengths_1 = np.array([1*e_factor])
-
-    h_sources_0 = make_obj_array(list(np.array([
-        [0.2, 0.1]
-        ]).T.copy()))
-    h_strengths_0 = np.array([1*h_factor])
-    h_sources_1 = make_obj_array(list(np.array([
-        [4, 5]
-        ]).T.copy()))
-    h_strengths_1 = np.array([1*h_factor])
-
-    kernel_grad = [
-        AxisTargetDerivative(i, kernel) for i in range(density_discr.ambient_dim)]
-
-    from sumpy.p2p import P2P
-    pot_p2p = P2P(cl_ctx, [kernel], exclude_self=False)
-    pot_p2p_grad = P2P(cl_ctx, kernel_grad, exclude_self=False)
-
-    normal = bind(places, sym.normal(qbx.ambient_dim))(
-            queue).as_vector(np.object)
-    tangent = bind(places,
-            sym.pseudoscalar(qbx.ambient_dim)/sym.area_element(qbx.ambient_dim))(
-                    queue).as_vector(np.object)
-
-    _, (E0,) = pot_p2p(queue, density_discr.nodes(), e_sources_0, [e_strengths_0],
-                    out_host=False, k=K0)
-    _, (E1,) = pot_p2p(queue, density_discr.nodes(), e_sources_1, [e_strengths_1],
-                    out_host=False, k=K1)
-    _, (grad0_E0, grad1_E0) = pot_p2p_grad(
-        queue, density_discr.nodes(), e_sources_0, [e_strengths_0],
-        out_host=False, k=K0)
-    _, (grad0_E1, grad1_E1) = pot_p2p_grad(
-        queue, density_discr.nodes(), e_sources_1, [e_strengths_1],
-        out_host=False, k=K1)
-
-    _, (H0,) = pot_p2p(queue, density_discr.nodes(), h_sources_0, [h_strengths_0],
-                    out_host=False, k=K0)
-    _, (H1,) = pot_p2p(queue, density_discr.nodes(), h_sources_1, [h_strengths_1],
-                    out_host=False, k=K1)
-    _, (grad0_H0, grad1_H0) = pot_p2p_grad(
-        queue, density_discr.nodes(), h_sources_0, [h_strengths_0],
-        out_host=False, k=K0)
-    _, (grad0_H1, grad1_H1) = pot_p2p_grad(
-        queue, density_discr.nodes(), h_sources_1, [h_strengths_1],
-        out_host=False, k=K1)
-
-    E0_dntarget = (grad0_E0*normal[0] + grad1_E0*normal[1])  # noqa
-    E1_dntarget = (grad0_E1*normal[0] + grad1_E1*normal[1])  # noqa
-
-    H0_dntarget = (grad0_H0*normal[0] + grad1_H0*normal[1])  # noqa
-    H1_dntarget = (grad0_H1*normal[0] + grad1_H1*normal[1])  # noqa
-
-    E0_dttarget = (grad0_E0*tangent[0] + grad1_E0*tangent[1])  # noqa
-    E1_dttarget = (grad0_E1*tangent[0] + grad1_E1*tangent[1])  # noqa
-
-    H0_dttarget = (grad0_H0*tangent[0] + grad1_H0*tangent[1])  # noqa
-    H1_dttarget = (grad0_H1*tangent[0] + grad1_H1*tangent[1])  # noqa
-
-    sqrt_w = bind(places, sym.sqrt_jac_q_weight(qbx.ambient_dim))(queue)
-
-    bvp_rhs = np.zeros(len(pde_op.bcs), dtype=np.object)
-    for i_bc, terms in enumerate(pde_op.bcs):
-        for term in terms:
-            assert term.i_interface == 0
-            if term.field_kind == pde_op.field_kind_e:
-
-                if term.direction == pde_op.dir_none:
-                    bvp_rhs[i_bc] += (
-                        term.coeff_outer * E0
-                        + term.coeff_inner * E1)
-                elif term.direction == pde_op.dir_normal:
-                    bvp_rhs[i_bc] += (
-                        term.coeff_outer * E0_dntarget
-                        + term.coeff_inner * E1_dntarget)
-                elif term.direction == pde_op.dir_tangential:
-                    bvp_rhs[i_bc] += (
-                        term.coeff_outer * E0_dttarget
-                        + term.coeff_inner * E1_dttarget)
-                else:
-                    raise NotImplementedError("direction spec in RHS")
-
-            elif term.field_kind == pde_op.field_kind_h:
-                if term.direction == pde_op.dir_none:
-                    bvp_rhs[i_bc] += (
-                        term.coeff_outer * H0
-                        + term.coeff_inner * H1)
-                elif term.direction == pde_op.dir_normal:
-                    bvp_rhs[i_bc] += (
-                        term.coeff_outer * H0_dntarget
-                        + term.coeff_inner * H1_dntarget)
-                elif term.direction == pde_op.dir_tangential:
-                    bvp_rhs[i_bc] += (
-                        term.coeff_outer * H0_dttarget
-                        + term.coeff_inner * H1_dttarget)
-                else:
-                    raise NotImplementedError("direction spec in RHS")
-
-            if use_l2_weighting:
-                bvp_rhs[i_bc] *= sqrt_w
-
-    scipy_op = bound_pde_op.scipy_op(queue, "unknown",
-            domains=[sym.DEFAULT_TARGET]*len(pde_op.bcs), K0=K0, K1=K1,
-            dtype=np.complex128)
-
-    if mode == "tem" or op_class is SRep:
-        from sumpy.tools import vector_from_device, vector_to_device
-        from pytential.solve import lu
-        unknown = lu(scipy_op, vector_from_device(queue, bvp_rhs))
-        unknown = vector_to_device(queue, unknown)
-
-    else:
-        from pytential.solve import gmres
-        gmres_result = gmres(scipy_op,
-                bvp_rhs, tol=1e-14, progress=True,
-                hard_failure=True, stall_iterations=0)
-
-        unknown = gmres_result.solution
-
-    # }}}
-
-    from sumpy.tools import vector_from_device
-    F0_tgt = bind(places, representation0_sym,
-            auto_where=(sym.DEFAULT_SOURCE, "targets0"))(
-                    queue, unknown=unknown, K0=K0, K1=K1)
-    F0_tgt = vector_from_device(queue, F0_tgt)
-
-    F1_tgt = bind(places, representation1_sym,
-            auto_where=(sym.DEFAULT_SOURCE, "targets1"))(
-                    queue, unknown=unknown, K0=K0, K1=K1)
-    F1_tgt = vector_from_device(queue, F1_tgt)
-
-    _, (E0_tgt_true,) = pot_p2p(queue,
-            targets_0.nodes(), e_sources_0, [e_strengths_0],
-            out_host=True, k=K0)
-    _, (E1_tgt_true,) = pot_p2p(queue,
-            targets_1.nodes(), e_sources_1, [e_strengths_1],
-            out_host=True, k=K1)
-
-    _, (H0_tgt_true,) = pot_p2p(queue,
-            targets_0.nodes(), h_sources_0, [h_strengths_0],
-            out_host=True, k=K0)
-    _, (H1_tgt_true,) = pot_p2p(queue,
-            targets_1.nodes(), h_sources_1, [h_strengths_1],
-            out_host=True, k=K1)
-
-    err_F0_total = 0  # noqa
-    err_F1_total = 0  # noqa
-
-    i_field = 0
-
-    def vec_norm(ary):
-        return la.norm(ary.reshape(-1))
-
-    def field_kind_to_string(field_kind):
-        return {pde_op.field_kind_e: "E", pde_op.field_kind_h: "H"}[field_kind]
-
-    for field_kind in pde_op.field_kinds:
-        if not pde_op.is_field_present(field_kind):
-            continue
-
-        if field_kind == pde_op.field_kind_e:
-            F0_tgt_true = E0_tgt_true  # noqa
-            F1_tgt_true = E1_tgt_true  # noqa
-        elif field_kind == pde_op.field_kind_h:
-            F0_tgt_true = H0_tgt_true  # noqa
-            F1_tgt_true = H1_tgt_true  # noqa
-        else:
-            assert False
-
-        abs_err_F0 = vec_norm(F0_tgt[i_field] - F0_tgt_true)  # noqa
-        abs_err_F1 = vec_norm(F1_tgt[i_field] - F1_tgt_true)  # noqa
-
-        rel_err_F0 = abs_err_F0/vec_norm(F0_tgt_true)  # noqa
-        rel_err_F1 = abs_err_F1/vec_norm(F1_tgt_true)  # noqa
-
-        err_F0_total = max(rel_err_F0, err_F0_total)  # noqa
-        err_F1_total = max(rel_err_F1, err_F1_total)  # noqa
-
-        print("Abs Err %s0" % field_kind_to_string(field_kind), abs_err_F0)
-        print("Abs Err %s1" % field_kind_to_string(field_kind), abs_err_F1)
-
-        print("Rel Err %s0" % field_kind_to_string(field_kind), rel_err_F0)
-        print("Rel Err %s1" % field_kind_to_string(field_kind), rel_err_F1)
-
-        i_field += 1
-
-    if visualize:
-        fld0 = bind(places, representation0_sym,
-                auto_where=(sym.DEFAULT_SOURCE, "targets-plot"))(
-                        queue, unknown=unknown, K0=K0)
-        fld1 = bind(places, representation1_sym,
-                auto_where=(sym.DEFAULT_SOURCE, "targets-plot"))(
-                        queue, unknown=unknown, K1=K1)
-
-        comp_fields = []
-        i_field = 0
-        for field_kind in pde_op.field_kinds:
-            if not pde_op.is_field_present(field_kind):
-                continue
-
-            fld_str = field_kind_to_string(field_kind)
-            comp_fields.extend([
-                ("%s_fld0" % fld_str, fld0[i_field].get()),
-                ("%s_fld1" % fld_str, fld1[i_field].get()),
-                ])
-
-            i_field += 0
-
-        from sumpy.kernel import LaplaceKernel
-        ones = (cl.array.empty(queue, (density_discr.nnodes,), dtype=np.float64)
-                .fill(1))
-        ind_func = - bind(places, sym.D(LaplaceKernel(2), sym.var("u")),
-                auto_where=("qbx-low-order", "targets-plot"))(
-                        queue, u=ones).get()
-
-        _, (e_fld0_true,) = pot_p2p(
-                queue, fplot.points, e_sources_0, [e_strengths_0],
-                out_host=True, k=K0)
-        _, (e_fld1_true,) = pot_p2p(
-                queue, fplot.points, e_sources_1, [e_strengths_1],
-                out_host=True, k=K1)
-        _, (h_fld0_true,) = pot_p2p(
-                queue, fplot.points, h_sources_0, [h_strengths_0],
-                out_host=True, k=K0)
-        _, (h_fld1_true,) = pot_p2p(
-                queue, fplot.points, h_sources_1, [h_strengths_1],
-                out_host=True, k=K1)
-
-        #fplot.show_scalar_in_mayavi(fld_in_vol.real, max_val=5)
-        fplot.write_vtk_file(
-                "potential-n%d.vts" % nelements,
-                [
-                    ("e_fld0_true", e_fld0_true),
-                    ("e_fld1_true", e_fld1_true),
-                    ("h_fld0_true", h_fld0_true),
-                    ("h_fld1_true", h_fld1_true),
-                    ("ind", ind_func),
-                    ] + comp_fields
-                )
-
-    return err_F0_total, err_F1_total
-
-
-@pytest.mark.parametrize("qbx_order", [4])
-@pytest.mark.parametrize("op_class", [
-    SRep,
-    SDRep,
-    ])
-@pytest.mark.parametrize("mode", [
-    "te",
-    "tm",
-    "tem",
-    ])
-def test_dielectric(ctx_factory, qbx_order, op_class, mode, visualize=False):
-    cl_ctx = ctx_factory()
-    queue = cl.CommandQueue(cl_ctx)
-
-    import logging
-    logging.basicConfig(level=logging.INFO)
-
-    from pytools.convergence import EOCRecorder
-    eoc_rec = EOCRecorder()
-
-    for nelements in [30, 50, 70]:
-        # prevent sympy cache 'splosion
-        from sympy.core.cache import clear_cache
-        clear_cache()
-
-        errs = run_dielectric_test(
-                cl_ctx, queue,
-                nelements=nelements, qbx_order=qbx_order,
-                op_class=op_class, mode=mode,
-                visualize=visualize)
-
-        eoc_rec.add_data_point(1/nelements, la.norm(list(errs), np.inf))
-
-    print(eoc_rec)
-    assert eoc_rec.order_estimate() > qbx_order - 0.5
-
-
-# You can test individual routines by typing
-# $ python test_layer_pot.py 'test_routine()'
-
-if __name__ == "__main__":
-    import sys
-    if len(sys.argv) > 1:
-        exec(sys.argv[1])
-    else:
-        from pytest import main
-        main([__file__])
-
-# vim: fdm=marker