diff --git a/pytential/linalg/proxy.py b/pytential/linalg/proxy.py
index 631a74fa99a57490779da319c7bd68979b0322b2..0f79148e8961433de65fc960487c5e4cd6202e94 100644
--- a/pytential/linalg/proxy.py
+++ b/pytential/linalg/proxy.py
@@ -26,10 +26,6 @@ THE SOFTWARE.
 import numpy as np
 import numpy.linalg as la
 
-import pyopencl as cl
-import pyopencl.array # noqa
-from pyopencl.array import to_device
-
 from pytools.obj_array import make_obj_array
 from pytools import memoize_method, memoize_in
 from sumpy.tools import BlockIndexRanges
@@ -54,16 +50,7 @@ Proxy Point Generation
 
 # {{{ point index partitioning
 
-def _element_node_range(group, ielement):
-    istart = group.node_nr_base + group.nunit_nodes * ielement
-    iend = group.node_nr_base + group.nunit_nodes * (ielement + 1)
-
-    return np.arange(istart, iend)
-
-
-def partition_by_nodes(discr,
-                       use_tree=True,
-                       max_nodes_in_box=None):
+def partition_by_nodes(actx, discr, use_tree=True, max_nodes_in_box=None):
     """Generate equally sized ranges of nodes. The partition is created at the
     lowest level of granularity, i.e. nodes. This results in balanced ranges
     of points, but will split elements across different ranges.
@@ -82,112 +69,42 @@ def partition_by_nodes(discr,
         # FIXME: this is just an arbitrary value
         max_nodes_in_box = 32
 
-    with cl.CommandQueue(discr.cl_context) as queue:
-        if use_tree:
-            from boxtree import box_flags_enum
-            from boxtree import TreeBuilder
+    if use_tree:
+        from boxtree import box_flags_enum
+        from boxtree import TreeBuilder
 
-            builder = TreeBuilder(discr.cl_context)
+        builder = TreeBuilder(actx.context)
 
-            tree, _ = builder(queue, discr.nodes(),
+        from meshmode.dof_array import flatten, thaw
+        tree, _ = builder(actx.queue,
+                flatten(thaw(actx, discr.nodes())),
                 max_particles_in_box=max_nodes_in_box)
 
-            tree = tree.get(queue)
-            leaf_boxes, = (tree.box_flags
-                           & box_flags_enum.HAS_CHILDREN == 0).nonzero()
+        tree = tree.get(actx.queue)
+        leaf_boxes, = (tree.box_flags
+                       & box_flags_enum.HAS_CHILDREN == 0).nonzero()
 
-            indices = np.empty(len(leaf_boxes), dtype=np.object)
-            for i, ibox in enumerate(leaf_boxes):
-                box_start = tree.box_source_starts[ibox]
-                box_end = box_start + tree.box_source_counts_cumul[ibox]
-                indices[i] = tree.user_source_ids[box_start:box_end]
+        indices = np.empty(len(leaf_boxes), dtype=np.object)
+        for i, ibox in enumerate(leaf_boxes):
+            box_start = tree.box_source_starts[ibox]
+            box_end = box_start + tree.box_source_counts_cumul[ibox]
+            indices[i] = tree.user_source_ids[box_start:box_end]
 
-            ranges = to_device(queue,
-                np.cumsum([0] + [box.shape[0] for box in indices]))
-            indices = to_device(queue, np.hstack(indices))
-        else:
-            indices = cl.array.arange(queue, 0, discr.ndofs,
-                                      dtype=np.int)
-            ranges = cl.array.arange(queue, 0, discr.ndofs + 1,
-                                     discr.ndofs // max_nodes_in_box,
-                                     dtype=np.int)
-        assert ranges[-1] == discr.ndofs
-
-        return BlockIndexRanges(discr.cl_context,
-                                indices.with_queue(None),
-                                ranges.with_queue(None))
-
-
-def partition_from_coarse(resampler, from_indices):
-    """Generate a partition of nodes from an existing partition on a
-    coarser discretization. The new partition is generated based on element
-    refinement relationships in *resampler*, so the existing partition
-    needs to be created using :func:`partition_by_elements`,
-    since we assume that each range contains all the nodes in an element.
-
-    The new partition will have the same number of ranges as the old partition.
-    The nodes inside each range in the new partition are all the nodes in
-    *resampler.to_discr* that were refined from elements in the same
-    range from *resampler.from_discr*.
-
-    :arg resampler: a
-        :class:`meshmode.discretization.connection.DirectDiscretizationConnection`.
-    :arg from_indices: a :class:`sumpy.tools.BlockIndexRanges`.
+        ranges = actx.from_numpy(
+                np.cumsum([0] + [box.shape[0] for box in indices])
+                )
+        indices = actx.from_numpy(np.hstack(indices))
+    else:
+        indices = actx.from_numpy(np.arange(0, discr.ndofs, dtype=np.int))
+        ranges = actx.from_numpy(np.arange(
+            0,
+            discr.ndofs + 1,
+            discr.ndofs // max_nodes_in_box, dtype=np.int))
 
-    :return: a :class:`sumpy.tools.BlockIndexRanges`.
-    """
+    assert ranges[-1] == discr.ndofs
 
-    if not hasattr(resampler, "groups"):
-        raise ValueError("resampler must be a DirectDiscretizationConnection.")
-
-    with cl.CommandQueue(resampler.cl_context) as queue:
-        from_indices = from_indices.get(queue)
-
-        # construct ranges
-        from_discr = resampler.from_discr
-        from_grp_ranges = np.cumsum(
-            [0] + [grp.nelements for grp in from_discr.mesh.groups])
-        from_el_ranges = np.hstack([
-            np.arange(grp.node_nr_base, grp.ndofs + 1, grp.nunit_nodes)
-            for grp in from_discr.groups])
-
-        # construct coarse element arrays in each from_range
-        el_indices = np.empty(from_indices.nblocks, dtype=np.object)
-        el_ranges = np.full(from_grp_ranges[-1], -1, dtype=np.int)
-        for i in range(from_indices.nblocks):
-            ifrom = from_indices.block_indices(i)
-            el_indices[i] = np.unique(np.digitize(ifrom, from_el_ranges)) - 1
-            el_ranges[el_indices[i]] = i
-        el_indices = np.hstack(el_indices)
-
-        # construct lookup table
-        to_el_table = [np.full(g.nelements, -1, dtype=np.int)
-                       for g in resampler.to_discr.groups]
-
-        for igrp, grp in enumerate(resampler.groups):
-            for batch in grp.batches:
-                to_el_table[igrp][batch.to_element_indices.get(queue)] = \
-                    from_grp_ranges[igrp] + batch.from_element_indices.get(queue)
-
-        # construct fine node index list
-        indices = [np.empty(0, dtype=np.int)
-                   for _ in range(from_indices.nblocks)]
-        for igrp in range(len(resampler.groups)):
-            to_element_indices = \
-                    np.where(np.isin(to_el_table[igrp], el_indices))[0]
-
-            for i, j in zip(el_ranges[to_el_table[igrp][to_element_indices]],
-                            to_element_indices):
-                indices[i] = np.hstack([indices[i],
-                    _element_node_range(resampler.to_discr.groups[igrp], j)])
-
-        ranges = to_device(queue,
-                np.cumsum([0] + [b.shape[0] for b in indices]))
-        indices = to_device(queue, np.hstack(indices))
-
-        return BlockIndexRanges(resampler.cl_context,
-                                indices.with_queue(None),
-                                ranges.with_queue(None))
+    return BlockIndexRanges(actx.context,
+        actx.freeze(indices), actx.freeze(ranges))
 
 # }}}
 
@@ -340,7 +257,7 @@ class ProxyGenerator(object):
             """.format(radius_expr=radius_expr)],
             [
                 lp.GlobalArg("sources", None,
-                    shape=(self.ambient_dim, "nsources")),
+                    shape=(self.ambient_dim, "nsources"), dim_tags="sep,C"),
                 lp.GlobalArg("center_int", None,
                     shape=(self.ambient_dim, "nsources"), dim_tags="sep,C"),
                 lp.GlobalArg("center_ext", None,
@@ -367,11 +284,11 @@ class ProxyGenerator(object):
 
         return knl
 
-    def __call__(self, queue, source_dd, indices, **kwargs):
+    def __call__(self, actx, source_dd, indices, **kwargs):
         """Generate proxy points for each given range of source points in
         the discretization in *source_dd*.
 
-        :arg queue: a :class:`pyopencl.CommandQueue`.
+        :arg actx: a :class:`~meshmode.array_context.ArrayContext`.
         :arg source_dd: a :class:`~pytential.symbolic.primitives.DOFDescriptor`
             for the discretization on which the proxy points are to be
             generated.
@@ -397,47 +314,51 @@ class ProxyGenerator(object):
                 source_dd.geometry, source_dd.discr_stage)
 
         radii = bind(self.places, sym.expansion_radii(
-            self.ambient_dim, dofdesc=source_dd))(queue)
+            self.ambient_dim, dofdesc=source_dd))(actx)
         center_int = bind(self.places, sym.expansion_centers(
-            self.ambient_dim, -1, dofdesc=source_dd))(queue)
+            self.ambient_dim, -1, dofdesc=source_dd))(actx)
         center_ext = bind(self.places, sym.expansion_centers(
-            self.ambient_dim, +1, dofdesc=source_dd))(queue)
+            self.ambient_dim, +1, dofdesc=source_dd))(actx)
 
+        from meshmode.dof_array import flatten, thaw
         knl = self.get_kernel()
-        _, (centers_dev, radii_dev,) = knl(queue,
-            sources=discr.nodes(),
-            center_int=center_int,
-            center_ext=center_ext,
-            expansion_radii=radii,
+        _, (centers_dev, radii_dev,) = knl(actx.queue,
+            sources=flatten(thaw(actx, discr.nodes())),
+            center_int=flatten(center_int),
+            center_ext=flatten(center_ext),
+            expansion_radii=flatten(radii),
             srcindices=indices.indices,
             srcranges=indices.ranges, **kwargs)
-        centers = centers_dev.get()
-        radii = radii_dev.get()
 
+        from pytential.utils import flatten_to_numpy
+        centers = flatten_to_numpy(actx, centers_dev)
+        radii = flatten_to_numpy(actx, radii_dev)
         proxies = np.empty(indices.nblocks, dtype=np.object)
         for i in range(indices.nblocks):
             proxies[i] = _affine_map(self.ref_points,
                     A=(radii[i] * np.eye(self.ambient_dim)),
                     b=centers[:, i].reshape(-1, 1))
 
-        pxyranges = cl.array.arange(queue,
-                0,
-                proxies.shape[0] * proxies[0].shape[1] + 1,
-                proxies[0].shape[1],
-                dtype=indices.ranges.dtype)
+        pxyranges = actx.from_numpy(np.arange(
+            0,
+            proxies.shape[0] * proxies[0].shape[1] + 1,
+            proxies[0].shape[1],
+            dtype=indices.ranges.dtype))
         proxies = make_obj_array([
-            cl.array.to_device(queue, np.hstack([p[idim] for p in proxies]))
-            for idim in range(self.ambient_dim)])
+            actx.freeze(actx.from_numpy(np.hstack([p[idim] for p in proxies])))
+            for idim in range(self.ambient_dim)
+            ])
         centers = make_obj_array([
-            centers_dev[idim].with_queue(queue).copy()
-            for idim in range(self.ambient_dim)])
+            actx.freeze(centers_dev[idim])
+            for idim in range(self.ambient_dim)
+            ])
 
         assert pxyranges[-1] == proxies[0].shape[0]
-        return proxies, pxyranges, centers, radii_dev
+        return proxies, actx.freeze(pxyranges), centers, actx.freeze(radii_dev)
 
 
-def gather_block_neighbor_points(discr, indices, pxycenters, pxyradii,
-                                 max_nodes_in_box=None):
+def gather_block_neighbor_points(actx, discr, indices, pxycenters, pxyradii,
+        max_nodes_in_box=None):
     """Generate a set of neighboring points for each range of points in
     *discr*. Neighboring points of a range :math:`i` are defined
     as all the points inside the proxy ball :math:`i` that do not also
@@ -455,79 +376,77 @@ def gather_block_neighbor_points(discr, indices, pxycenters, pxyradii,
         # FIXME: this is a fairly arbitrary value
         max_nodes_in_box = 32
 
-    with cl.CommandQueue(discr.cl_context) as queue:
-        indices = indices.get(queue)
-
-        # NOTE: this is constructed for multiple reasons:
-        #   * TreeBuilder takes object arrays
-        #   * `srcindices` can be a small subset of nodes, so this will save
-        #   some work
-        #   * `srcindices` may reorder the array returned by nodes(), so this
-        #   makes sure that we have the same order in tree.user_source_ids
-        #   and friends
-        sources = discr.nodes().get(queue)
-        sources = make_obj_array([
-            cl.array.to_device(queue, sources[idim, indices.indices])
-            for idim in range(discr.ambient_dim)])
-
-        # construct tree
-        from boxtree import TreeBuilder
-        builder = TreeBuilder(discr.cl_context)
-        tree, _ = builder(queue, sources,
-                          max_particles_in_box=max_nodes_in_box)
-
-        from boxtree.area_query import AreaQueryBuilder
-        builder = AreaQueryBuilder(discr.cl_context)
-        query, _ = builder(queue, tree, pxycenters, pxyradii)
-
-        # find nodes inside each proxy ball
-        tree = tree.get(queue)
-        query = query.get(queue)
-
-        if isinstance(pxycenters[0], cl.array.Array):
-            pxycenters = np.vstack([pxycenters[idim].get(queue)
-                                    for idim in range(discr.ambient_dim)])
-        if isinstance(pxyradii, cl.array.Array):
-            pxyradii = pxyradii.get(queue)
-
-        nbrindices = np.empty(indices.nblocks, dtype=np.object)
-        for iproxy in range(indices.nblocks):
-            # get list of boxes intersecting the current ball
-            istart = query.leaves_near_ball_starts[iproxy]
-            iend = query.leaves_near_ball_starts[iproxy + 1]
-            iboxes = query.leaves_near_ball_lists[istart:iend]
-
-            # get nodes inside the boxes
-            istart = tree.box_source_starts[iboxes]
-            iend = istart + tree.box_source_counts_cumul[iboxes]
-            isources = np.hstack([np.arange(s, e)
-                                  for s, e in zip(istart, iend)])
-            nodes = np.vstack([tree.sources[idim][isources]
-                               for idim in range(discr.ambient_dim)])
-            isources = tree.user_source_ids[isources]
-
-            # get nodes inside the ball but outside the current range
-            center = pxycenters[:, iproxy].reshape(-1, 1)
-            radius = pxyradii[iproxy]
-            mask = ((la.norm(nodes - center, axis=0) < radius)
-                    & ((isources < indices.ranges[iproxy])
-                        | (indices.ranges[iproxy + 1] <= isources)))
-
-            nbrindices[iproxy] = indices.indices[isources[mask]]
-
-        nbrranges = to_device(queue,
-                np.cumsum([0] + [n.shape[0] for n in nbrindices]))
-        nbrindices = to_device(queue, np.hstack(nbrindices))
-
-        return BlockIndexRanges(discr.cl_context,
-                                nbrindices.with_queue(None),
-                                nbrranges.with_queue(None))
-
-
-def gather_block_interaction_points(places, source_dd, indices,
-                                    radius_factor=None,
-                                    approx_nproxy=None,
-                                    max_nodes_in_box=None):
+    indices = indices.get(actx.queue)
+
+    # NOTE: this is constructed for multiple reasons:
+    #   * TreeBuilder takes object arrays
+    #   * `srcindices` can be a small subset of nodes, so this will save
+    #   some work
+    #   * `srcindices` may reorder the array returned by nodes(), so this
+    #   makes sure that we have the same order in tree.user_source_ids
+    #   and friends
+    from pytential.utils import flatten_to_numpy
+    sources = flatten_to_numpy(actx, discr.nodes())
+    sources = make_obj_array([
+        actx.from_numpy(sources[idim][indices.indices])
+        for idim in range(discr.ambient_dim)])
+
+    # construct tree
+    from boxtree import TreeBuilder
+    builder = TreeBuilder(actx.context)
+    tree, _ = builder(actx.queue, sources,
+            max_particles_in_box=max_nodes_in_box)
+
+    from boxtree.area_query import AreaQueryBuilder
+    builder = AreaQueryBuilder(actx.context)
+    query, _ = builder(actx.queue, tree, pxycenters, pxyradii)
+
+    # find nodes inside each proxy ball
+    tree = tree.get(actx.queue)
+    query = query.get(actx.queue)
+
+    pxycenters = np.vstack([
+        actx.to_numpy(pxycenters[idim])
+        for idim in range(discr.ambient_dim)
+        ])
+    pxyradii = actx.to_numpy(pxyradii)
+
+    nbrindices = np.empty(indices.nblocks, dtype=np.object)
+    for iproxy in range(indices.nblocks):
+        # get list of boxes intersecting the current ball
+        istart = query.leaves_near_ball_starts[iproxy]
+        iend = query.leaves_near_ball_starts[iproxy + 1]
+        iboxes = query.leaves_near_ball_lists[istart:iend]
+
+        # get nodes inside the boxes
+        istart = tree.box_source_starts[iboxes]
+        iend = istart + tree.box_source_counts_cumul[iboxes]
+        isources = np.hstack([np.arange(s, e)
+                              for s, e in zip(istart, iend)])
+        nodes = np.vstack([tree.sources[idim][isources]
+                           for idim in range(discr.ambient_dim)])
+        isources = tree.user_source_ids[isources]
+
+        # get nodes inside the ball but outside the current range
+        center = pxycenters[:, iproxy].reshape(-1, 1)
+        radius = pxyradii[iproxy]
+        mask = ((la.norm(nodes - center, axis=0) < radius)
+                & ((isources < indices.ranges[iproxy])
+                    | (indices.ranges[iproxy + 1] <= isources)))
+
+        nbrindices[iproxy] = indices.indices[isources[mask]]
+
+    nbrranges = actx.from_numpy(np.cumsum([0] + [n.shape[0] for n in nbrindices]))
+    nbrindices = actx.from_numpy(np.hstack(nbrindices))
+
+    return BlockIndexRanges(actx.context,
+            actx.freeze(nbrindices), actx.freeze(nbrranges))
+
+
+def gather_block_interaction_points(actx, places, source_dd, indices,
+        radius_factor=None,
+        approx_nproxy=None,
+        max_nodes_in_box=None):
     """Generate sets of interaction points for each given range of indices
     in the *source* discretization. For each input range of indices,
     the corresponding output range of points is consists of:
@@ -583,7 +502,7 @@ def gather_block_interaction_points(places, source_dd, indices,
             """,
             [
                 lp.GlobalArg("sources", None,
-                    shape=(lpot_source.ambient_dim, "nsources")),
+                    shape=(lpot_source.ambient_dim, "nsources"), dim_tags="sep,C"),
                 lp.GlobalArg("proxies", None,
                     shape=(lpot_source.ambient_dim, "nproxies"), dim_tags="sep,C"),
                 lp.GlobalArg("nbrindices", None,
@@ -607,28 +526,28 @@ def gather_block_interaction_points(places, source_dd, indices,
         return loopy_knl
 
     lpot_source = places.get_geometry(source_dd.geometry)
-    with cl.CommandQueue(lpot_source.cl_context) as queue:
-        generator = ProxyGenerator(places,
-                radius_factor=radius_factor,
-                approx_nproxy=approx_nproxy)
-        proxies, pxyranges, pxycenters, pxyradii = \
-                generator(queue, source_dd, indices)
-
-        discr = places.get_discretization(source_dd.geometry, source_dd.discr_stage)
-        neighbors = gather_block_neighbor_points(discr,
-                indices, pxycenters, pxyradii,
-                max_nodes_in_box=max_nodes_in_box)
-
-        ranges = cl.array.zeros(queue, indices.nblocks + 1, dtype=np.int)
-        _, (nodes, ranges) = knl()(queue,
-                sources=discr.nodes(),
-                proxies=proxies,
-                pxyranges=pxyranges,
-                nbrindices=neighbors.indices,
-                nbrranges=neighbors.ranges,
-                ranges=ranges)
-
-        return nodes.with_queue(None), ranges.with_queue(None)
+    generator = ProxyGenerator(places,
+            radius_factor=radius_factor,
+            approx_nproxy=approx_nproxy)
+    proxies, pxyranges, pxycenters, pxyradii = \
+            generator(actx, source_dd, indices)
+
+    discr = places.get_discretization(source_dd.geometry, source_dd.discr_stage)
+    neighbors = gather_block_neighbor_points(actx, discr,
+            indices, pxycenters, pxyradii,
+            max_nodes_in_box=max_nodes_in_box)
+
+    from meshmode.dof_array import flatten, thaw
+    ranges = actx.zeros(indices.nblocks + 1, dtype=np.int)
+    _, (nodes, ranges) = knl()(actx.queue,
+            sources=flatten(thaw(actx, discr.nodes())),
+            proxies=proxies,
+            pxyranges=pxyranges,
+            nbrindices=neighbors.indices,
+            nbrranges=neighbors.ranges,
+            ranges=ranges)
+
+    return actx.freeze(nodes), actx.freeze(ranges)
 
 # }}}
 
diff --git a/pytential/symbolic/execution.py b/pytential/symbolic/execution.py
index 11b96566e4d2b6872bcdee907b02f69d064ac7b5..ce8910e53d5d558e817f69f36cabd772619358a0 100644
--- a/pytential/symbolic/execution.py
+++ b/pytential/symbolic/execution.py
@@ -1038,10 +1038,10 @@ def _bmat(blocks, dtypes):
     return result
 
 
-def build_matrix(queue, places, exprs, input_exprs, domains=None,
+def build_matrix(actx, places, exprs, input_exprs, domains=None,
         auto_where=None, context=None):
     """
-    :arg queue: a :class:`pyopencl.CommandQueue`.
+    :arg actx: a :class:`~meshmode.array_context.ArrayContext`.
     :arg places: a :class:`~pytential.symbolic.execution.GeometryCollection`.
         Alternatively, any list or mapping that is a valid argument for its
         constructor can also be used.
@@ -1092,7 +1092,7 @@ def build_matrix(queue, places, exprs, input_exprs, domains=None,
                 domains[ibcol].geometry, domains[ibcol].discr_stage)
 
         mbuilder = MatrixBuilder(
-                queue,
+                actx,
                 dep_expr=input_exprs[ibcol],
                 other_dep_exprs=(input_exprs[:ibcol]
                                  + input_exprs[ibcol + 1:]),
@@ -1109,7 +1109,7 @@ def build_matrix(queue, places, exprs, input_exprs, domains=None,
             if isinstance(block, np.ndarray):
                 dtypes.append(block.dtype)
 
-    return cl.array.to_device(queue, _bmat(blocks, dtypes))
+    return actx.from_numpy(_bmat(blocks, dtypes))
 
 # }}}
 
diff --git a/pytential/symbolic/matrix.py b/pytential/symbolic/matrix.py
index 4fa07091caf3855e36ef5788a572150e2e1c63a2..dc0eb3734d112a39d41de654e4258b52d05fa813 100644
--- a/pytential/symbolic/matrix.py
+++ b/pytential/symbolic/matrix.py
@@ -26,14 +26,14 @@ THE SOFTWARE.
 """
 
 import numpy as np
-import pyopencl as cl  # noqa
-import pyopencl.array  # noqa
 
 import six
 from six.moves import intern
 
 from pytools import memoize_method
 from pytential.symbolic.mappers import EvaluationMapperBase
+from pytential.utils import (
+        flatten_if_needed, flatten_to_numpy, unflatten_from_numpy)
 
 
 # {{{ helpers
@@ -56,7 +56,9 @@ def _get_layer_potential_args(mapper, expr, include_args=None):
                 and arg_name not in include_args):
             continue
 
-        kernel_args[arg_name] = mapper.rec(arg_expr)
+        kernel_args[arg_name] = flatten_if_needed(mapper.array_context,
+                mapper.rec(arg_expr)
+                )
 
     return kernel_args
 
@@ -66,7 +68,7 @@ def _get_layer_potential_args(mapper, expr, include_args=None):
 # {{{ base classes for matrix builders
 
 class MatrixBuilderBase(EvaluationMapperBase):
-    def __init__(self, queue, dep_expr, other_dep_exprs,
+    def __init__(self, actx, dep_expr, other_dep_exprs,
             dep_source, dep_discr, places, context):
         """
         :arg queue: a :class:`pyopencl.CommandQueue`.
@@ -84,7 +86,7 @@ class MatrixBuilderBase(EvaluationMapperBase):
         """
         super(MatrixBuilderBase, self).__init__(context=context)
 
-        self.queue = queue
+        self.array_context = actx
         self.dep_expr = dep_expr
         self.other_dep_exprs = other_dep_exprs
         self.dep_source = dep_source
@@ -196,17 +198,25 @@ class MatrixBuilderBase(EvaluationMapperBase):
         if self.is_kind_matrix(rec_operand):
             raise NotImplementedError("derivatives")
 
-        rec_operand = cl.array.to_device(self.queue, rec_operand)
+        dofdesc = expr.dofdesc
         op = sym.NumReferenceDerivative(
                 ref_axes=expr.ref_axes,
                 operand=sym.var("u"),
-                dofdesc=expr.dofdesc)
-        return bind(self.places, op)(self.queue, u=rec_operand).get()
+                dofdesc=dofdesc)
+
+        discr = self.places.get_discretization(dofdesc.geometry, dofdesc.discr_stage)
+        rec_operand = unflatten_from_numpy(self.array_context, discr, rec_operand)
+
+        return flatten_to_numpy(self.array_context,
+                bind(self.places, op)(self.array_context, u=rec_operand)
+                )
 
     def map_node_coordinate_component(self, expr):
         from pytential import bind, sym
         op = sym.NodeCoordinateComponent(expr.ambient_axis, dofdesc=expr.dofdesc)
-        return bind(self.places, op)(self.queue).get()
+        return flatten_to_numpy(self.array_context,
+                bind(self.places, op)(self.array_context)
+                )
 
     def map_call(self, expr):
         arg, = expr.parameters
@@ -215,17 +225,13 @@ class MatrixBuilderBase(EvaluationMapperBase):
         if isinstance(rec_arg, np.ndarray) and self.is_kind_matrix(rec_arg):
             raise RuntimeError("expression is nonlinear in variable")
 
-        if isinstance(rec_arg, np.ndarray):
-            rec_arg = cl.array.to_device(self.queue, rec_arg)
-
-        from pytential import bind, sym
-        op = expr.function(sym.var("u"))
-        result = bind(self.places, op)(self.queue, u=rec_arg)
-
-        if isinstance(result, cl.array.Array):
-            result = result.get()
-
-        return result
+        from numbers import Number
+        if isinstance(rec_arg, Number):
+            return getattr(np, expr.function.name)(rec_arg)
+        else:
+            rec_arg = unflatten_from_numpy(self.array_context, None, rec_arg)
+            result = getattr(self.array_context.np, expr.function.name)(rec_arg)
+            return flatten_to_numpy(self.array_context, result)
 
     # }}}
 
@@ -240,14 +246,14 @@ class MatrixBlockBuilderBase(MatrixBuilderBase):
     assume that each operator acts directly on the density.
     """
 
-    def __init__(self, queue, dep_expr, other_dep_exprs,
+    def __init__(self, actx, dep_expr, other_dep_exprs,
             dep_source, dep_discr, places, index_set, context):
         """
         :arg index_set: a :class:`sumpy.tools.MatrixBlockIndexRanges` class
             describing which blocks are going to be evaluated.
         """
 
-        super(MatrixBlockBuilderBase, self).__init__(queue,
+        super(MatrixBlockBuilderBase, self).__init__(actx,
                 dep_expr, other_dep_exprs, dep_source, dep_discr,
                 places, context)
         self.index_set = index_set
@@ -259,7 +265,7 @@ class MatrixBlockBuilderBase(MatrixBuilderBase):
         # be computed on the full discretization, ignoring our index_set,
         # e.g the normal in a double layer potential
 
-        return MatrixBuilderBase(self.queue,
+        return MatrixBuilderBase(self.array_context,
                 self.dep_expr,
                 self.other_dep_exprs,
                 self.dep_source,
@@ -272,7 +278,7 @@ class MatrixBlockBuilderBase(MatrixBuilderBase):
         # blk_mapper is used to recursively compute the density to
         # a layer potential operator to ensure there is no composition
 
-        return MatrixBlockBuilderBase(self.queue,
+        return MatrixBlockBuilderBase(self.array_context,
                 self.dep_expr,
                 self.other_dep_exprs,
                 self.dep_source,
@@ -302,9 +308,10 @@ class MatrixBlockBuilderBase(MatrixBuilderBase):
 # We'll cheat and build the matrix on the host.
 
 class MatrixBuilder(MatrixBuilderBase):
-    def __init__(self, queue, dep_expr, other_dep_exprs,
+    def __init__(self, actx, dep_expr, other_dep_exprs,
             dep_source, dep_discr, places, context):
-        super(MatrixBuilder, self).__init__(queue, dep_expr, other_dep_exprs,
+        super(MatrixBuilder, self).__init__(
+                actx, dep_expr, other_dep_exprs,
                 dep_source, dep_discr, places, context)
 
     def map_interpolation(self, expr):
@@ -313,13 +320,17 @@ class MatrixBuilder(MatrixBuilderBase):
         if expr.to_dd.discr_stage != sym.QBX_SOURCE_QUAD_STAGE2:
             raise RuntimeError("can only interpolate to QBX_SOURCE_QUAD_STAGE2")
         operand = self.rec(expr.operand)
+        actx = self.array_context
 
         if isinstance(operand, (int, float, complex, np.number)):
             return operand
         elif isinstance(operand, np.ndarray) and operand.ndim == 1:
             conn = self.places.get_connection(expr.from_dd, expr.to_dd)
-            return conn(self.queue,
-                    cl.array.to_device(self.queue, operand)).get(self.queue)
+            discr = self.places.get_discretization(
+                    expr.from_dd.geometry, expr.from_dd.discr_stage)
+
+            operand = unflatten_from_numpy(actx, discr, operand)
+            return flatten_to_numpy(actx, conn(operand))
         elif isinstance(operand, np.ndarray) and operand.ndim == 2:
             cache = self.places._get_cache("direct_resampler")
             key = (expr.from_dd.geometry,
@@ -333,8 +344,8 @@ class MatrixBuilder(MatrixBuilderBase):
                     flatten_chained_connection
 
                 conn = self.places.get_connection(expr.from_dd, expr.to_dd)
-                conn = flatten_chained_connection(self.queue, conn)
-                mat = conn.full_resample_matrix(self.queue).get(self.queue)
+                conn = flatten_chained_connection(actx, conn)
+                mat = actx.to_numpy(conn.full_resample_matrix(actx))
 
                 # FIXME: the resample matrix is slow to compute and very big
                 # to store, so caching it may not be the best idea
@@ -359,6 +370,7 @@ class MatrixBuilder(MatrixBuilderBase):
         if not self.is_kind_matrix(rec_density):
             raise NotImplementedError("layer potentials on non-variables")
 
+        actx = self.array_context
         kernel = expr.kernel
         kernel_args = _get_layer_potential_args(self, expr)
 
@@ -366,31 +378,31 @@ class MatrixBuilder(MatrixBuilderBase):
         local_expn = LineTaylorLocalExpansion(kernel, lpot_source.qbx_order)
 
         from sumpy.qbx import LayerPotentialMatrixGenerator
-        mat_gen = LayerPotentialMatrixGenerator(
-                self.queue.context, (local_expn,))
+        mat_gen = LayerPotentialMatrixGenerator(actx.context, (local_expn,))
 
         assert abs(expr.qbx_forced_limit) > 0
         from pytential import bind, sym
         radii = bind(self.places, sym.expansion_radii(
             source_discr.ambient_dim,
-            dofdesc=expr.target))(self.queue)
+            dofdesc=expr.target))(actx)
         centers = bind(self.places, sym.expansion_centers(
             source_discr.ambient_dim,
             expr.qbx_forced_limit,
-            dofdesc=expr.target))(self.queue)
-
-        _, (mat,) = mat_gen(self.queue,
-                targets=target_discr.nodes(),
-                sources=source_discr.nodes(),
-                centers=centers,
-                expansion_radii=radii,
+            dofdesc=expr.target))(actx)
+
+        from meshmode.dof_array import flatten, thaw
+        _, (mat,) = mat_gen(actx.queue,
+                targets=flatten(thaw(actx, target_discr.nodes())),
+                sources=flatten(thaw(actx, source_discr.nodes())),
+                centers=flatten(centers),
+                expansion_radii=flatten(radii),
                 **kernel_args)
-        mat = mat.get()
+        mat = actx.to_numpy(mat)
 
         waa = bind(self.places, sym.weights_and_area_elements(
             source_discr.ambient_dim,
-            dofdesc=expr.source))(self.queue)
-        mat[:, :] *= waa.get(self.queue)
+            dofdesc=expr.source))(actx)
+        mat[:, :] *= actx.to_numpy(flatten(waa))
         mat = mat.dot(rec_density)
 
         return mat
@@ -401,9 +413,9 @@ class MatrixBuilder(MatrixBuilderBase):
 # {{{ p2p matrix builder
 
 class P2PMatrixBuilder(MatrixBuilderBase):
-    def __init__(self, queue, dep_expr, other_dep_exprs,
+    def __init__(self, actx, dep_expr, other_dep_exprs,
             dep_source, dep_discr, places, context, exclude_self=True):
-        super(P2PMatrixBuilder, self).__init__(queue,
+        super(P2PMatrixBuilder, self).__init__(actx,
                 dep_expr, other_dep_exprs, dep_source, dep_discr,
                 places, context)
 
@@ -430,25 +442,26 @@ class P2PMatrixBuilder(MatrixBuilderBase):
         kernel_args = kernel.get_args() + kernel.get_source_args()
         kernel_args = set(arg.loopy_arg.name for arg in kernel_args)
 
+        actx = self.array_context
         kernel_args = _get_layer_potential_args(self,
                 expr, include_args=kernel_args)
         if self.exclude_self:
-            kernel_args["target_to_source"] = \
-                cl.array.arange(self.queue, 0, target_discr.ndofs, dtype=np.int)
+            kernel_args["target_to_source"] = actx.from_numpy(
+                    np.arange(0, target_discr.ndofs, dtype=np.int)
+                    )
 
         from sumpy.p2p import P2PMatrixGenerator
-        mat_gen = P2PMatrixGenerator(
-                self.queue.context, (kernel,), exclude_self=self.exclude_self)
+        mat_gen = P2PMatrixGenerator(actx.context, (kernel,),
+                exclude_self=self.exclude_self)
 
-        _, (mat,) = mat_gen(self.queue,
-                targets=target_discr.nodes(),
-                sources=source_discr.nodes(),
+        from meshmode.dof_array import flatten, thaw
+        _, (mat,) = mat_gen(actx.queue,
+                targets=flatten(thaw(actx, target_discr.nodes())),
+                sources=flatten(thaw(actx, source_discr.nodes())),
                 **kernel_args)
 
-        mat = mat.get()
-        mat = mat.dot(rec_density)
+        return actx.to_numpy(mat).dot(rec_density)
 
-        return mat
 # }}}
 
 
@@ -462,8 +475,9 @@ class NearFieldBlockBuilder(MatrixBlockBuilderBase):
                 places, index_set, context)
 
     def get_dep_variable(self):
-        tgtindices = self.index_set.linear_row_indices.get(self.queue)
-        srcindices = self.index_set.linear_col_indices.get(self.queue)
+        queue = self.array_context.queue
+        tgtindices = self.index_set.linear_row_indices.get(queue)
+        srcindices = self.index_set.linear_col_indices.get(queue)
 
         return np.equal(tgtindices, srcindices).astype(np.float64)
 
@@ -484,6 +498,7 @@ class NearFieldBlockBuilder(MatrixBlockBuilderBase):
         if not np.isscalar(rec_density):
             raise NotImplementedError
 
+        actx = self.array_context
         kernel = expr.kernel
         kernel_args = _get_layer_potential_args(self._mat_mapper, expr)
 
@@ -491,34 +506,34 @@ class NearFieldBlockBuilder(MatrixBlockBuilderBase):
         local_expn = LineTaylorLocalExpansion(kernel, lpot_source.qbx_order)
 
         from sumpy.qbx import LayerPotentialMatrixBlockGenerator
-        mat_gen = LayerPotentialMatrixBlockGenerator(
-                self.queue.context, (local_expn,))
+        mat_gen = LayerPotentialMatrixBlockGenerator(actx.context, (local_expn,))
 
         assert abs(expr.qbx_forced_limit) > 0
         from pytential import bind, sym
         radii = bind(self.places, sym.expansion_radii(
             source_discr.ambient_dim,
-            dofdesc=expr.target))(self.queue)
+            dofdesc=expr.target))(actx)
         centers = bind(self.places, sym.expansion_centers(
             source_discr.ambient_dim,
             expr.qbx_forced_limit,
-            dofdesc=expr.target))(self.queue)
-
-        _, (mat,) = mat_gen(self.queue,
-                targets=target_discr.nodes(),
-                sources=source_discr.nodes(),
-                centers=centers,
-                expansion_radii=radii,
+            dofdesc=expr.target))(actx)
+
+        from meshmode.dof_array import flatten, thaw
+        _, (mat,) = mat_gen(actx.queue,
+                targets=flatten(thaw(actx, target_discr.nodes())),
+                sources=flatten(thaw(actx, source_discr.nodes())),
+                centers=flatten(centers),
+                expansion_radii=flatten(radii),
                 index_set=self.index_set,
                 **kernel_args)
 
         waa = bind(self.places, sym.weights_and_area_elements(
             source_discr.ambient_dim,
-            dofdesc=expr.source))(self.queue)
-        mat *= waa[self.index_set.linear_col_indices]
-        mat = rec_density * mat.get(self.queue)
+            dofdesc=expr.source))(actx)
+        waa = flatten(waa)
 
-        return mat
+        mat *= waa[self.index_set.linear_col_indices]
+        return rec_density * actx.to_numpy(mat)
 
 
 class FarFieldBlockBuilder(MatrixBlockBuilderBase):
@@ -530,8 +545,9 @@ class FarFieldBlockBuilder(MatrixBlockBuilderBase):
         self.exclude_self = exclude_self
 
     def get_dep_variable(self):
-        tgtindices = self.index_set.linear_row_indices.get(self.queue)
-        srcindices = self.index_set.linear_col_indices.get(self.queue)
+        queue = self.array_context.queue
+        tgtindices = self.index_set.linear_row_indices.get(queue)
+        srcindices = self.index_set.linear_col_indices.get(queue)
 
         return np.equal(tgtindices, srcindices).astype(np.float64)
 
@@ -558,24 +574,26 @@ class FarFieldBlockBuilder(MatrixBlockBuilderBase):
         kernel_args = kernel.get_args() + kernel.get_source_args()
         kernel_args = set(arg.loopy_arg.name for arg in kernel_args)
 
+        actx = self.array_context
         kernel_args = _get_layer_potential_args(self._mat_mapper,
                 expr, include_args=kernel_args)
         if self.exclude_self:
-            kernel_args["target_to_source"] = \
-                cl.array.arange(self.queue, 0, target_discr.ndofs, dtype=np.int)
+            kernel_args["target_to_source"] = actx.from_numpy(
+                    np.arange(0, target_discr.ndofs, dtype=np.int)
+                    )
 
         from sumpy.p2p import P2PMatrixBlockGenerator
-        mat_gen = P2PMatrixBlockGenerator(
-                self.queue.context, (kernel,), exclude_self=self.exclude_self)
+        mat_gen = P2PMatrixBlockGenerator(actx.context, (kernel,),
+                exclude_self=self.exclude_self)
 
-        _, (mat,) = mat_gen(self.queue,
-                targets=target_discr.nodes(),
-                sources=source_discr.nodes(),
+        from meshmode.dof_array import flatten, thaw
+        _, (mat,) = mat_gen(actx.queue,
+                targets=flatten(thaw(actx, target_discr.nodes())),
+                sources=flatten(thaw(actx, source_discr.nodes())),
                 index_set=self.index_set,
                 **kernel_args)
-        mat = rec_density * mat.get(self.queue)
 
-        return mat
+        return rec_density * actx.to_numpy(mat)
 
 # }}}
 
diff --git a/pytential/utils.py b/pytential/utils.py
index fb772c0fa1febf73e54a415895f181e73b8a589b..f1e9f0d109069d54b34090b1b0103b012693e454 100644
--- a/pytential/utils.py
+++ b/pytential/utils.py
@@ -43,4 +43,22 @@ def flatten_if_needed(actx: PyOpenCLArrayContext, ary: np.ndarray):
 
     return flatten(ary)
 
+
+def unflatten_from_numpy(actx, discr, ary):
+    from pytools.obj_array import obj_array_vectorize
+    from meshmode.dof_array import unflatten
+
+    ary = obj_array_vectorize(actx.from_numpy, ary)
+    if discr is None:
+        return ary
+    else:
+        return unflatten(actx, discr, ary)
+
+
+def flatten_to_numpy(actx, ary):
+    result = flatten_if_needed(actx, ary)
+
+    from pytools.obj_array import obj_array_vectorize
+    return obj_array_vectorize(actx.to_numpy, result)
+
 # vim: foldmethod=marker
diff --git a/setup.cfg b/setup.cfg
index bfd27f3e0e44952969ce0996be3636210eb80b72..52f59fb78b3009f8aa1fb24fa26641a5207b9b9e 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -9,4 +9,4 @@ exclude=
 [tool:pytest]
 markers=
     slowtest: mark a test as slow
-python_files = test_cost_model.py test_tools.py test_target_specific_qbx.py test_symbolic.py test_muller.py test_global_qbx.py test_layer_pot_eigenvalues.py test_layer_pot_identity.py test_layer_pot.py test_scalar_int_eq.py test_maxwell.py
+python_files = test_cost_model.py test_tools.py test_target_specific_qbx.py test_symbolic.py test_muller.py test_global_qbx.py test_layer_pot_eigenvalues.py test_layer_pot_identity.py test_layer_pot.py test_scalar_int_eq.py test_maxwell.py test_linalg_proxy.py test_matrix.py test_stokes.py
diff --git a/test/test_linalg_proxy.py b/test/test_linalg_proxy.py
index d4a428dc53d5ad068990f54727dba255adfc010f..8e485d6e9fb3e3cb8fc6f541971796bc8fcf4f63 100644
--- a/test/test_linalg_proxy.py
+++ b/test/test_linalg_proxy.py
@@ -29,6 +29,8 @@ import pyopencl as cl
 import pyopencl.array   # noqa
 
 from pytential import bind, sym
+
+from meshmode.array_context import PyOpenCLArrayContext
 from meshmode.mesh.generation import ( # noqa
         ellipse, NArmedStarfish, generate_torus, make_curve_mesh)
 
@@ -41,9 +43,9 @@ from pyopencl.tools import (  # noqa
 from test_matrix import _build_geometry, _build_block_index
 
 
-def _plot_partition_indices(queue, discr, indices, **kwargs):
+def _plot_partition_indices(actx, discr, indices, **kwargs):
     import matplotlib.pyplot as pt
-    indices = indices.get(queue)
+    indices = indices.get(actx.queue)
 
     args = [
         kwargs.get("method", "unknown"),
@@ -57,8 +59,9 @@ def _plot_partition_indices(queue, discr, indices, **kwargs):
     pt.savefig("test_partition_{0}_{1}_{3}d_ranges_{2}.png".format(*args))
     pt.clf()
 
+    from pytential.utils import flatten_to_numpy
     if discr.ambient_dim == 2:
-        sources = discr.nodes().get(queue)
+        sources = flatten_to_numpy(actx, discr.nodes())
 
         pt.figure(figsize=(10, 8), dpi=300)
 
@@ -86,11 +89,14 @@ def _plot_partition_indices(queue, discr, indices, **kwargs):
             isrc = indices.block_indices(i)
             marker[isrc] = 10.0 * (i + 1.0)
 
-        vis = make_visualizer(queue, discr, 10)
+        from meshmode.dof_array import unflatten
+        marker = unflatten(actx, discr, actx.from_numpy(marker))
+
+        vis = make_visualizer(actx, discr, 10)
 
-        filename = "test_partition_{0}_{1}_{3}d_{2}.png".format(*args)
+        filename = "test_partition_{0}_{1}_{3}d_{2}.vtu".format(*args)
         vis.write_vtk_file(filename, [
-            ("marker", cl.array.to_device(queue, marker))
+            ("marker", marker)
             ])
 
 
@@ -99,12 +105,14 @@ def _plot_partition_indices(queue, discr, indices, **kwargs):
 def test_partition_points(ctx_factory, use_tree, ambient_dim, visualize=False):
     ctx = ctx_factory()
     queue = cl.CommandQueue(ctx)
+    actx = PyOpenCLArrayContext(queue)
 
-    places, dofdesc = _build_geometry(queue, ambient_dim=ambient_dim)
-    _build_block_index(queue,
-            places.get_discretization(dofdesc.geometry, dofdesc.discr_stage),
-            use_tree=use_tree,
-            factor=0.6)
+    places, dofdesc = _build_geometry(actx, ambient_dim=ambient_dim)
+    discr = places.get_discretization(dofdesc.geometry, dofdesc.discr_stage)
+    indices = _build_block_index(actx, discr, use_tree=use_tree, factor=0.6)
+
+    if visualize:
+        _plot_partition_indices(actx, discr, indices, use_tree=use_tree)
 
 
 @pytest.mark.parametrize("ambient_dim", [2, 3])
@@ -112,24 +120,23 @@ def test_partition_points(ctx_factory, use_tree, ambient_dim, visualize=False):
 def test_proxy_generator(ctx_factory, ambient_dim, factor, visualize=False):
     ctx = ctx_factory()
     queue = cl.CommandQueue(ctx)
+    actx = PyOpenCLArrayContext(queue)
 
-    places, dofdesc = _build_geometry(queue, ambient_dim=ambient_dim)
+    places, dofdesc = _build_geometry(actx, ambient_dim=ambient_dim)
     dofdesc = dofdesc.to_stage1()
 
     density_discr = places.get_discretization(dofdesc.geometry, dofdesc.discr_stage)
-    srcindices = _build_block_index(queue,
-            density_discr,
-            factor=factor)
+    srcindices = _build_block_index(actx, density_discr, factor=factor)
 
     from pytential.linalg.proxy import ProxyGenerator
     generator = ProxyGenerator(places)
     proxies, pxyranges, pxycenters, pxyradii = \
-            generator(queue, dofdesc, srcindices)
+            generator(actx, dofdesc, srcindices)
 
-    proxies = np.vstack([p.get() for p in proxies])
-    pxyranges = pxyranges.get()
-    pxycenters = np.vstack([c.get() for c in pxycenters])
-    pxyradii = pxyradii.get()
+    proxies = np.vstack([actx.to_numpy(p) for p in proxies])
+    pxyranges = actx.to_numpy(pxyranges)
+    pxycenters = np.vstack([actx.to_numpy(c) for c in pxycenters])
+    pxyradii = actx.to_numpy(pxyradii)
 
     for i in range(srcindices.nblocks):
         ipxy = np.s_[pxyranges[i]:pxyranges[i + 1]]
@@ -142,12 +149,14 @@ def test_proxy_generator(ctx_factory, ambient_dim, factor, visualize=False):
         if ambient_dim == 2:
             import matplotlib.pyplot as pt
 
-            density_nodes = density_discr.nodes().get(queue)
-            ci = bind(places, sym.expansion_centers(ambient_dim, -1))(queue)
-            ci = np.vstack([c.get(queue) for c in ci])
-            ce = bind(places, sym.expansion_centers(ambient_dim, +1))(queue)
-            ce = np.vstack([c.get(queue) for c in ce])
-            r = bind(places, sym.expansion_radii(ambient_dim))(queue).get()
+            from pytential.utils import flatten_to_numpy
+            density_nodes = np.vstack(flatten_to_numpy(actx, density_discr.nodes()))
+            ci = bind(places, sym.expansion_centers(ambient_dim, -1))(actx)
+            ci = np.vstack(flatten_to_numpy(actx, ci))
+            ce = bind(places, sym.expansion_centers(ambient_dim, +1))(actx)
+            ce = np.vstack(flatten_to_numpy(actx, ce))
+            r = bind(places, sym.expansion_radii(ambient_dim))(actx)
+            r = flatten_to_numpy(actx, r)
 
             for i in range(srcindices.nblocks):
                 isrc = srcindices.block_indices(i)
@@ -195,10 +204,10 @@ def test_proxy_generator(ctx_factory, ambient_dim, factor, visualize=False):
                     b=pxycenters[:, i].reshape(-1))
 
                 mesh = merge_disjoint_meshes([mesh, density_discr.mesh])
-                discr = Discretization(ctx, mesh,
+                discr = Discretization(actx, mesh,
                     InterpolatoryQuadratureSimplexGroupFactory(10))
 
-                vis = make_visualizer(queue, discr, 10)
+                vis = make_visualizer(actx, discr, 10)
                 filename = "test_proxy_generator_{}d_{:04}.vtu".format(
                         ambient_dim, i)
                 vis.write_vtk_file(filename, [])
@@ -209,26 +218,25 @@ def test_proxy_generator(ctx_factory, ambient_dim, factor, visualize=False):
 def test_interaction_points(ctx_factory, ambient_dim, factor, visualize=False):
     ctx = ctx_factory()
     queue = cl.CommandQueue(ctx)
+    actx = PyOpenCLArrayContext(queue)
 
-    places, dofdesc = _build_geometry(queue, ambient_dim=ambient_dim)
+    places, dofdesc = _build_geometry(actx, ambient_dim=ambient_dim)
     dofdesc = dofdesc.to_stage1()
 
     density_discr = places.get_discretization(dofdesc.geometry, dofdesc.discr_stage)
-    srcindices = _build_block_index(queue,
-            density_discr,
-            factor=factor)
+    srcindices = _build_block_index(actx, density_discr, factor=factor)
 
     # generate proxy points
     from pytential.linalg.proxy import ProxyGenerator
     generator = ProxyGenerator(places)
-    _, _, pxycenters, pxyradii = generator(queue, dofdesc, srcindices)
+    _, _, pxycenters, pxyradii = generator(actx, dofdesc, srcindices)
 
     from pytential.linalg.proxy import (  # noqa
             gather_block_neighbor_points,
             gather_block_interaction_points)
-    nbrindices = gather_block_neighbor_points(density_discr,
+    nbrindices = gather_block_neighbor_points(actx, density_discr,
             srcindices, pxycenters, pxyradii)
-    nodes, ranges = gather_block_interaction_points(
+    nodes, ranges = gather_block_interaction_points(actx,
             places, dofdesc, srcindices)
 
     srcindices = srcindices.get(queue)
@@ -240,12 +248,13 @@ def test_interaction_points(ctx_factory, ambient_dim, factor, visualize=False):
 
         assert not np.any(np.isin(inbr, isrc))
 
+    from pytential.utils import flatten_to_numpy
     if visualize:
         if ambient_dim == 2:
             import matplotlib.pyplot as pt
-            density_nodes = density_discr.nodes().get(queue)
-            nodes = nodes.get(queue)
-            ranges = ranges.get(queue)
+            density_nodes = flatten_to_numpy(actx, density_discr.nodes())
+            nodes = flatten_to_numpy(actx, nodes)
+            ranges = actx.to_numpy(ranges)
 
             for i in range(srcindices.nblocks):
                 isrc = srcindices.block_indices(i)
@@ -255,14 +264,14 @@ def test_interaction_points(ctx_factory, ambient_dim, factor, visualize=False):
                 pt.figure(figsize=(10, 8))
                 pt.plot(density_nodes[0], density_nodes[1],
                         'ko', ms=2.0, alpha=0.5)
-                pt.plot(density_nodes[0, srcindices.indices],
-                        density_nodes[1, srcindices.indices],
+                pt.plot(density_nodes[0][srcindices.indices],
+                        density_nodes[1][srcindices.indices],
                         'o', ms=2.0)
-                pt.plot(density_nodes[0, isrc], density_nodes[1, isrc],
+                pt.plot(density_nodes[0][isrc], density_nodes[1][isrc],
                         'o', ms=2.0)
-                pt.plot(density_nodes[0, inbr], density_nodes[1, inbr],
+                pt.plot(density_nodes[0][inbr], density_nodes[1][inbr],
                         'o', ms=2.0)
-                pt.plot(nodes[0, iall], nodes[1, iall],
+                pt.plot(nodes[0][iall], nodes[1][iall],
                         'x', ms=2.0)
                 pt.xlim([-1.5, 1.5])
                 pt.ylim([-1.5, 1.5])
@@ -282,9 +291,11 @@ def test_interaction_points(ctx_factory, ambient_dim, factor, visualize=False):
                 marker[srcindices.indices] = 0.0
                 marker[isrc] = -42.0
                 marker[inbr] = +42.0
-                marker_dev = cl.array.to_device(queue, marker)
 
-                vis = make_visualizer(queue, density_discr, 10)
+                from meshmode.dof_array import unflatten
+                marker_dev = unflatten(actx, density_discr, actx.from_numpy(marker))
+
+                vis = make_visualizer(actx, density_discr, 10)
                 filename = "test_area_query_{}d_{:04}.vtu".format(ambient_dim, i)
                 vis.write_vtk_file(filename, [
                     ("marker", marker_dev),
diff --git a/test/test_matrix.py b/test/test_matrix.py
index 3048486597e0386e981fa7bdbe96dfa5e4045e45..ce429642df74870fa2553fffac0e548ee5bb4061 100644
--- a/test/test_matrix.py
+++ b/test/test_matrix.py
@@ -31,16 +31,17 @@ import numpy as np
 import numpy.linalg as la
 
 import pyopencl as cl
-import pyopencl.array   # noqa
+import pyopencl.array
 
 from pytools.obj_array import make_obj_array, is_obj_array
 
 from sumpy.tools import BlockIndexRanges, MatrixBlockIndexRanges
 from sumpy.symbolic import USE_SYMENGINE
 
-from pytential import sym
+from pytential import bind, sym
 from pytential import GeometryCollection
 
+from meshmode.array_context import PyOpenCLArrayContext
 from meshmode.mesh.generation import (  # noqa
         ellipse, NArmedStarfish, make_curve_mesh, generate_torus)
 
@@ -55,7 +56,7 @@ except ImportError:
     pass
 
 
-def _build_geometry(queue,
+def _build_geometry(actx,
         ambient_dim=2,
         nelements=30,
         target_order=7,
@@ -79,8 +80,7 @@ def _build_geometry(queue,
     from meshmode.discretization.poly_element import \
             InterpolatoryQuadratureSimplexGroupFactory
     from pytential.qbx import QBXLayerPotentialSource
-    density_discr = Discretization(
-            queue.context, mesh,
+    density_discr = Discretization(actx, mesh,
             InterpolatoryQuadratureSimplexGroupFactory(target_order))
 
     qbx = QBXLayerPotentialSource(density_discr,
@@ -92,24 +92,24 @@ def _build_geometry(queue,
     return places, places.auto_source
 
 
-def _build_block_index(queue,
-                       discr,
+def _build_block_index(actx, discr,
                        nblks=10,
                        factor=1.0,
                        use_tree=True):
-    ndofs = discr.ndofs
-    max_particles_in_box = ndofs // nblks
+    max_particles_in_box = discr.ndofs // nblks
 
     # create index ranges
     from pytential.linalg.proxy import partition_by_nodes
-    indices = partition_by_nodes(discr,
-            use_tree=use_tree, max_nodes_in_box=max_particles_in_box)
+    indices = partition_by_nodes(actx, discr,
+            use_tree=use_tree,
+            max_nodes_in_box=max_particles_in_box)
 
     if abs(factor - 1.0) < 1.0e-14:
         return indices
 
     # randomly pick a subset of points
-    indices = indices.get(queue)
+    # FIXME: this needs porting in sumpy.tools.BlockIndexRanges
+    indices = indices.get(actx.queue)
 
     indices_ = np.empty(indices.nblocks, dtype=np.object)
     for i in range(indices.nblocks):
@@ -120,13 +120,11 @@ def _build_block_index(queue,
         indices_[i] = np.sort(
                 np.random.choice(iidx, size=isize, replace=False))
 
-    ranges_ = cl.array.to_device(queue,
-            np.cumsum([0] + [r.shape[0] for r in indices_]))
-    indices_ = cl.array.to_device(queue, np.hstack(indices_))
+    ranges_ = actx.from_numpy(np.cumsum([0] + [r.shape[0] for r in indices_]))
+    indices_ = actx.from_numpy(np.hstack(indices_))
 
-    indices = BlockIndexRanges(discr.cl_context,
-                               indices_.with_queue(None),
-                               ranges_.with_queue(None))
+    indices = BlockIndexRanges(actx.context,
+            actx.freeze(indices_), actx.freeze(ranges_))
 
     return indices
 
@@ -137,8 +135,8 @@ def _build_op(lpot_id,
               source=sym.DEFAULT_SOURCE,
               target=sym.DEFAULT_TARGET,
               qbx_forced_limit="avg"):
-
     from sumpy.kernel import LaplaceKernel, HelmholtzKernel
+
     if k:
         knl = HelmholtzKernel(ambient_dim)
         knl_kwargs = {"k": k}
@@ -200,6 +198,7 @@ def _max_block_error(mat, blk, index_set):
 def test_matrix_build(ctx_factory, k, curve_f, lpot_id, visualize=False):
     cl_ctx = ctx_factory()
     queue = cl.CommandQueue(cl_ctx)
+    actx = PyOpenCLArrayContext(queue)
 
     # prevent cache 'splosion
     from sympy.core.cache import clear_cache
@@ -215,8 +214,7 @@ def test_matrix_build(ctx_factory, k, curve_f, lpot_id, visualize=False):
     from meshmode.discretization import Discretization
     from meshmode.discretization.poly_element import \
             InterpolatoryQuadratureSimplexGroupFactory
-    pre_density_discr = Discretization(
-            cl_ctx, mesh,
+    pre_density_discr = Discretization(actx, mesh,
             InterpolatoryQuadratureSimplexGroupFactory(target_order))
 
     from pytential.qbx import QBXLayerPotentialSource
@@ -228,7 +226,7 @@ def test_matrix_build(ctx_factory, k, curve_f, lpot_id, visualize=False):
 
     from pytential.qbx.refinement import refine_geometry_collection
     places = GeometryCollection(qbx)
-    places = refine_geometry_collection(queue, places,
+    places = refine_geometry_collection(places,
             kernel_length_scale=(5 / k if k else None))
 
     source = places.auto_source.to_stage1()
@@ -237,15 +235,14 @@ def test_matrix_build(ctx_factory, k, curve_f, lpot_id, visualize=False):
     op, u_sym, knl_kwargs = _build_op(lpot_id, k=k,
             source=places.auto_source,
             target=places.auto_target)
-    from pytential import bind
     bound_op = bind(places, op)
 
     from pytential.symbolic.execution import build_matrix
-    mat = build_matrix(queue, places, op, u_sym).get()
+    mat = build_matrix(actx, places, op, u_sym).get()
 
     if visualize:
         from sumpy.tools import build_matrix as build_matrix_via_matvec
-        mat2 = bound_op.scipy_op(queue, "u", dtype=mat.dtype, **knl_kwargs)
+        mat2 = bound_op.scipy_op(actx, "u", dtype=mat.dtype, **knl_kwargs)
         mat2 = build_matrix_via_matvec(mat2)
         print(la.norm((mat - mat2).real, "fro") / la.norm(mat2.real, "fro"),
               la.norm((mat - mat2).imag, "fro") / la.norm(mat2.imag, "fro"))
@@ -267,7 +264,7 @@ def test_matrix_build(ctx_factory, k, curve_f, lpot_id, visualize=False):
         pt.colorbar()
         pt.show()
 
-    from sumpy.tools import vector_to_device, vector_from_device
+    from pytential.utils import unflatten_from_numpy, flatten_to_numpy
     np.random.seed(12)
     for i in range(5):
         if is_obj_array(u_sym):
@@ -277,13 +274,12 @@ def test_matrix_build(ctx_factory, k, curve_f, lpot_id, visualize=False):
                 ])
         else:
             u = np.random.randn(density_discr.ndofs)
+        u_dev = unflatten_from_numpy(actx, density_discr, u)
 
-        u_dev = vector_to_device(queue, u)
         res_matvec = np.hstack(
-                list(vector_from_device(
-                    queue, bound_op(queue, u=u_dev))))
-
-        res_mat = mat.dot(np.hstack(list(u)))
+                flatten_to_numpy(actx, bound_op(actx, u=u_dev))
+                )
+        res_mat = mat.dot(np.hstack(u))
 
         abs_err = la.norm(res_mat - res_matvec, np.inf)
         rel_err = abs_err / la.norm(res_matvec, np.inf)
@@ -299,6 +295,7 @@ def test_p2p_block_builder(ctx_factory, factor, ambient_dim, lpot_id,
                            visualize=False):
     ctx = ctx_factory()
     queue = cl.CommandQueue(ctx)
+    actx = PyOpenCLArrayContext(queue)
 
     # prevent cache explosion
     from sympy.core.cache import clear_cache
@@ -312,7 +309,7 @@ def test_p2p_block_builder(ctx_factory, factor, ambient_dim, lpot_id,
             )
     target_order = 2 if ambient_dim == 3 else 7
 
-    places, dofdesc = _build_geometry(queue,
+    places, dofdesc = _build_geometry(actx,
             target_order=target_order,
             ambient_dim=ambient_dim,
             auto_where=place_ids)
@@ -323,14 +320,14 @@ def test_p2p_block_builder(ctx_factory, factor, ambient_dim, lpot_id,
 
     dd = places.auto_source
     density_discr = places.get_discretization(dd.geometry, dd.discr_stage)
-    index_set = _build_block_index(queue, density_discr, factor=factor)
+    index_set = _build_block_index(actx, density_discr, factor=factor)
     index_set = MatrixBlockIndexRanges(ctx, index_set, index_set)
 
     from pytential.symbolic.execution import _prepare_expr
     expr = _prepare_expr(places, op)
 
     from pytential.symbolic.matrix import P2PMatrixBuilder
-    mbuilder = P2PMatrixBuilder(queue,
+    mbuilder = P2PMatrixBuilder(actx,
             dep_expr=u_sym,
             other_dep_exprs=[],
             dep_source=places.get_geometry(dd.geometry),
@@ -341,7 +338,7 @@ def test_p2p_block_builder(ctx_factory, factor, ambient_dim, lpot_id,
     mat = mbuilder(expr)
 
     from pytential.symbolic.matrix import FarFieldBlockBuilder
-    mbuilder = FarFieldBlockBuilder(queue,
+    mbuilder = FarFieldBlockBuilder(actx,
             dep_expr=u_sym,
             other_dep_exprs=[],
             dep_source=places.get_geometry(dd.geometry),
@@ -352,7 +349,7 @@ def test_p2p_block_builder(ctx_factory, factor, ambient_dim, lpot_id,
             exclude_self=True)
     blk = mbuilder(expr)
 
-    index_set = index_set.get(queue)
+    index_set = index_set.get(actx.queue)
     if visualize and ambient_dim == 2:
         blk_full = np.zeros_like(mat)
         mat_full = np.zeros_like(mat)
@@ -381,6 +378,7 @@ def test_qbx_block_builder(ctx_factory, factor, ambient_dim, lpot_id,
                            visualize=False):
     ctx = ctx_factory()
     queue = cl.CommandQueue(ctx)
+    actx = PyOpenCLArrayContext(queue)
 
     # prevent cache explosion
     from sympy.core.cache import clear_cache
@@ -394,7 +392,7 @@ def test_qbx_block_builder(ctx_factory, factor, ambient_dim, lpot_id,
             )
     target_order = 2 if ambient_dim == 3 else 7
 
-    places, _ = _build_geometry(queue,
+    places, _ = _build_geometry(actx,
             target_order=target_order,
             ambient_dim=ambient_dim,
             auto_where=place_ids)
@@ -409,11 +407,11 @@ def test_qbx_block_builder(ctx_factory, factor, ambient_dim, lpot_id,
 
     dd = places.auto_source
     density_discr = places.get_discretization(dd.geometry, dd.discr_stage)
-    index_set = _build_block_index(queue, density_discr, factor=factor)
+    index_set = _build_block_index(actx, density_discr, factor=factor)
     index_set = MatrixBlockIndexRanges(ctx, index_set, index_set)
 
     from pytential.symbolic.matrix import NearFieldBlockBuilder
-    mbuilder = NearFieldBlockBuilder(queue,
+    mbuilder = NearFieldBlockBuilder(actx,
             dep_expr=u_sym,
             other_dep_exprs=[],
             dep_source=places.get_geometry(dd.geometry),
@@ -424,7 +422,7 @@ def test_qbx_block_builder(ctx_factory, factor, ambient_dim, lpot_id,
     blk = mbuilder(expr)
 
     from pytential.symbolic.matrix import MatrixBuilder
-    mbuilder = MatrixBuilder(queue,
+    mbuilder = MatrixBuilder(actx,
             dep_expr=u_sym,
             other_dep_exprs=[],
             dep_source=places.get_geometry(dd.geometry),
@@ -462,6 +460,7 @@ def test_build_matrix_places(ctx_factory,
         source_discr_stage, target_discr_stage, visualize=False):
     ctx = ctx_factory()
     queue = cl.CommandQueue(ctx)
+    actx = PyOpenCLArrayContext(queue)
 
     # prevent cache explosion
     from sympy.core.cache import clear_cache
@@ -476,7 +475,7 @@ def test_build_matrix_places(ctx_factory,
             )
 
     # build test operators
-    places, _ = _build_geometry(queue,
+    places, _ = _build_geometry(actx,
             nelements=8,
             target_order=2,
             ambient_dim=2,
@@ -493,7 +492,7 @@ def test_build_matrix_places(ctx_factory,
     dd = places.auto_source
     source_discr = places.get_discretization(dd.geometry, dd.discr_stage)
 
-    index_set = _build_block_index(queue, source_discr, factor=0.6)
+    index_set = _build_block_index(actx, source_discr, factor=0.6)
     index_set = MatrixBlockIndexRanges(ctx, index_set, index_set)
 
     from pytential.symbolic.execution import _prepare_expr
@@ -501,7 +500,7 @@ def test_build_matrix_places(ctx_factory,
 
     # build full QBX matrix
     from pytential.symbolic.matrix import MatrixBuilder
-    mbuilder = MatrixBuilder(queue,
+    mbuilder = MatrixBuilder(actx,
             dep_expr=u_sym,
             other_dep_exprs=[],
             dep_source=places.get_geometry(dd.geometry),
@@ -512,7 +511,7 @@ def test_build_matrix_places(ctx_factory,
 
     # build full p2p matrix
     from pytential.symbolic.matrix import P2PMatrixBuilder
-    mbuilder = P2PMatrixBuilder(queue,
+    mbuilder = P2PMatrixBuilder(actx,
             dep_expr=u_sym,
             other_dep_exprs=[],
             dep_source=places.get_geometry(dd.geometry),
@@ -525,7 +524,7 @@ def test_build_matrix_places(ctx_factory,
 
     # build block qbx and p2p matrices
     from pytential.symbolic.matrix import NearFieldBlockBuilder
-    mbuilder = NearFieldBlockBuilder(queue,
+    mbuilder = NearFieldBlockBuilder(actx,
             dep_expr=u_sym,
             other_dep_exprs=[],
             dep_source=places.get_geometry(dd.geometry),
@@ -538,7 +537,7 @@ def test_build_matrix_places(ctx_factory,
         assert _max_block_error(qbx_mat, mat, index_set.get(queue)) < 1.0e-14
 
     from pytential.symbolic.matrix import FarFieldBlockBuilder
-    mbuilder = FarFieldBlockBuilder(queue,
+    mbuilder = FarFieldBlockBuilder(actx,
             dep_expr=u_sym,
             other_dep_exprs=[],
             dep_source=places.get_geometry(dd.geometry),
diff --git a/test/test_stokes.py b/test/test_stokes.py
index 19167efe102673c5a5350b89002b9b6f28b01509..5ac2ab47a8ced6dfbd91b612ca37c1ea88fcb8ca 100644
--- a/test/test_stokes.py
+++ b/test/test_stokes.py
@@ -25,9 +25,9 @@ THE SOFTWARE.
 
 import numpy as np
 import pyopencl as cl
-import pyopencl.clmath  # noqa
 import pytest
 
+from meshmode.array_context import PyOpenCLArrayContext
 from meshmode.discretization import Discretization
 from meshmode.discretization.poly_element import \
         InterpolatoryQuadratureSimplexGroupFactory
@@ -46,7 +46,8 @@ import logging
 
 def run_exterior_stokes_2d(ctx_factory, nelements,
         mesh_order=4, target_order=4, qbx_order=4,
-        fmm_order=10, mu=1, circle_rad=1.5, visualize=False):
+        fmm_order=False,  # FIXME: FMM is slower than direct eval
+        mu=1, circle_rad=1.5, visualize=False):
 
     # This program tests an exterior Stokes flow in 2D using the
     # compound representation given in Hsiao & Kress,
@@ -57,6 +58,7 @@ def run_exterior_stokes_2d(ctx_factory, nelements,
 
     cl_ctx = cl.create_some_context()
     queue = cl.CommandQueue(cl_ctx)
+    actx = PyOpenCLArrayContext(queue)
 
     ovsmp_target_order = 4*target_order
 
@@ -68,8 +70,7 @@ def run_exterior_stokes_2d(ctx_factory, nelements,
             lambda t: circle_rad * ellipse(1, t),
             np.linspace(0, 1, nelements+1),
             target_order)
-    coarse_density_discr = Discretization(
-            cl_ctx, mesh,
+    coarse_density_discr = Discretization(actx, mesh,
             InterpolatoryQuadratureSimplexGroupFactory(target_order))
 
     from pytential.qbx import QBXLayerPotentialSource
@@ -111,8 +112,8 @@ def run_exterior_stokes_2d(ctx_factory, nelements,
 
     density_discr = places.get_discretization(sym.DEFAULT_SOURCE)
 
-    normal = bind(places, sym.normal(2).as_vector())(queue)
-    path_length = bind(places, sym.integral(2, 1, 1))(queue)
+    normal = bind(places, sym.normal(2).as_vector())(actx)
+    path_length = bind(places, sym.integral(2, 1, 1))(actx)
 
     # }}}
 
@@ -150,47 +151,52 @@ def run_exterior_stokes_2d(ctx_factory, nelements,
 
     def fund_soln(x, y, loc, strength):
         #with direction (1,0) for point source
-        r = cl.clmath.sqrt((x - loc[0])**2 + (y - loc[1])**2)
+        r = actx.np.sqrt((x - loc[0])**2 + (y - loc[1])**2)
         scaling = strength/(4*np.pi*mu)
-        xcomp = (-cl.clmath.log(r) + (x - loc[0])**2/r**2) * scaling
+        xcomp = (-actx.np.log(r) + (x - loc[0])**2/r**2) * scaling
         ycomp = ((x - loc[0])*(y - loc[1])/r**2) * scaling
         return [xcomp, ycomp]
 
     def rotlet_soln(x, y, loc):
-        r = cl.clmath.sqrt((x - loc[0])**2 + (y - loc[1])**2)
+        r = actx.np.sqrt((x - loc[0])**2 + (y - loc[1])**2)
         xcomp = -(y - loc[1])/r**2
         ycomp = (x - loc[0])/r**2
         return [xcomp, ycomp]
 
     def fund_and_rot_soln(x, y, loc, strength):
         #with direction (1,0) for point source
-        r = cl.clmath.sqrt((x - loc[0])**2 + (y - loc[1])**2)
+        r = actx.np.sqrt((x - loc[0])**2 + (y - loc[1])**2)
         scaling = strength/(4*np.pi*mu)
         xcomp = (
-                (-cl.clmath.log(r) + (x - loc[0])**2/r**2) * scaling
+                (-actx.np.log(r) + (x - loc[0])**2/r**2) * scaling
                 - (y - loc[1])*strength*0.125/r**2 + 3.3)
         ycomp = (
                 ((x - loc[0])*(y - loc[1])/r**2) * scaling
                 + (x - loc[0])*strength*0.125/r**2 + 1.5)
-        return [xcomp, ycomp]
+        return make_obj_array([xcomp, ycomp])
 
-    nodes = density_discr.nodes().with_queue(queue)
+    from meshmode.dof_array import unflatten, flatten, thaw
+    nodes = flatten(thaw(actx, density_discr.nodes()))
     fund_soln_loc = np.array([0.5, -0.2])
     strength = 100.
-    bc = fund_and_rot_soln(nodes[0], nodes[1], fund_soln_loc, strength)
+    bc = unflatten(actx, density_discr,
+            fund_and_rot_soln(nodes[0], nodes[1], fund_soln_loc, strength))
 
     omega_sym = sym.make_sym_vector("omega", dim)
     u_A_sym_bdry = stokeslet_obj.apply(  # noqa: N806
             omega_sym, mu_sym, qbx_forced_limit=1)
 
-    omega = [
-            cl.array.to_device(queue, (strength/path_length)*np.ones(len(nodes[0]))),
-            cl.array.to_device(queue, np.zeros(len(nodes[0])))]
+    from pytential.utils import unflatten_from_numpy
+    omega = unflatten_from_numpy(actx, density_discr, make_obj_array([
+            (strength/path_length)*np.ones(len(nodes[0])),
+            np.zeros(len(nodes[0]))
+            ]))
+
     bvp_rhs = bind(places,
-            sym.make_sym_vector("bc", dim) + u_A_sym_bdry)(queue,
+            sym.make_sym_vector("bc", dim) + u_A_sym_bdry)(actx,
                     bc=bc, mu=mu, omega=omega)
     gmres_result = gmres(
-            bound_op.scipy_op(queue, "sigma", np.float64, mu=mu, normal=normal),
+            bound_op.scipy_op(actx, "sigma", np.float64, mu=mu, normal=normal),
             bvp_rhs,
             x0=bvp_rhs,
             tol=1e-9, progress=True,
@@ -203,7 +209,7 @@ def run_exterior_stokes_2d(ctx_factory, nelements,
 
     sigma = gmres_result.solution
     sigma_int_val_sym = sym.make_sym_vector("sigma_int_val", 2)
-    int_val = bind(places, sym.integral(2, 1, sigma_sym))(queue, sigma=sigma)
+    int_val = bind(places, sym.integral(2, 1, sigma_sym))(actx, sigma=sigma)
     int_val = -int_val/(2 * np.pi)
     print("int_val = ", int_val)
 
@@ -217,7 +223,7 @@ def run_exterior_stokes_2d(ctx_factory, nelements,
             - u_A_sym_vol + sigma_int_val_sym)
 
     where = (sym.DEFAULT_SOURCE, "point_target")
-    vel = bind(places, representation_sym, auto_where=where)(queue,
+    vel = bind(places, representation_sym, auto_where=where)(actx,
             sigma=sigma,
             mu=mu,
             normal=normal,
@@ -226,7 +232,7 @@ def run_exterior_stokes_2d(ctx_factory, nelements,
     print("@@@@@@@@")
 
     plot_vel = bind(places, representation_sym,
-            auto_where=(sym.DEFAULT_SOURCE, "plot_target"))(queue,
+            auto_where=(sym.DEFAULT_SOURCE, "plot_target"))(actx,
                     sigma=sigma,
                     mu=mu,
                     normal=normal,
@@ -240,8 +246,10 @@ def run_exterior_stokes_2d(ctx_factory, nelements,
                 ])
 
     exact_soln = fund_and_rot_soln(
-            cl.array.to_device(queue, eval_points[0]), cl.array.to_device(
-                queue, eval_points[1]), fund_soln_loc, strength)
+            actx.from_numpy(eval_points[0]),
+            actx.from_numpy(eval_points[1]),
+            fund_soln_loc,
+            strength)
 
     vel = get_obj_array(vel)
     err = vel-get_obj_array(exact_soln)
@@ -289,7 +297,7 @@ def run_exterior_stokes_2d(ctx_factory, nelements,
 
     # }}}
 
-    h_max = bind(places, sym.h_max(qbx.ambient_dim))(queue)
+    h_max = bind(places, sym.h_max(qbx.ambient_dim))(actx)
     return h_max, l2_err