diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index f82e332a6853b8438cb14b9189cea55f32ef86f2..9431f2a9eb1beec0f70c06aadb5bebbac559fe04 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -93,7 +93,11 @@ jobs:
                 . ./ci-support-v0
 
                 if test "$DOWNSTREAM_PROJECT" = "mirgecom"; then
-                    git clone "https://github.com/illinois-ceesd/$DOWNSTREAM_PROJECT.git"
+                    if [[ "$GITHUB_HEAD_REF" = "nodal-reduction-device-scalar" ]]; then
+                        git clone "https://github.com/majosm/$DOWNSTREAM_PROJECT.git" -b "nodal-reduction-device-scalar"
+                    else
+                        git clone "https://github.com/illinois-ceesd/$DOWNSTREAM_PROJECT.git"
+                    fi
                 else
                     git clone "https://github.com/inducer/$DOWNSTREAM_PROJECT.git"
                 fi
diff --git a/doc/conf.py b/doc/conf.py
index 0cd5ba6d60692dcd95cd1b6dcae1fa9fdfdbfa07..31bc96d9b0d409cafb0db5686acbf6dce590589c 100644
--- a/doc/conf.py
+++ b/doc/conf.py
@@ -26,6 +26,11 @@ version = get_version()
 # The full version, including alpha/beta/rc tags.
 release = version
 
+autodoc_type_aliases = {
+    "DeviceScalar": "arraycontext.DeviceScalar",
+    "DeviceArray": "arraycontext.DeviceArray",
+    }
+
 intersphinx_mapping = {
     "https://docs.python.org/3/": None,
     "https://numpy.org/doc/stable/": None,
diff --git a/examples/advection/surface.py b/examples/advection/surface.py
index 973db0ff986ac424bf3324eeb9b56de8930f8730..bc06efc5d5f4324c32d78a2e81cc08847d216180 100644
--- a/examples/advection/surface.py
+++ b/examples/advection/surface.py
@@ -206,7 +206,8 @@ def main(ctx_factory, dim=2, order=4, use_quad=False, visualize=False):
     # {{{ time stepping
 
     # FIXME: dt estimate is not necessarily valid for surfaces
-    dt = 0.45 * adv_operator.estimate_rk4_timestep(actx, dcoll, fields=u0)
+    dt = actx.to_numpy(
+        0.45 * adv_operator.estimate_rk4_timestep(actx, dcoll, fields=u0))
     nsteps = int(final_time // dt) + 1
 
     logger.info("dt:        %.5e", dt)
diff --git a/examples/advection/var-velocity.py b/examples/advection/var-velocity.py
index 7f81f3d387b5eb456f5f5b80f4b50526929c7bf1..422259f92b5f15707d7d25c1996fe32706800cae 100644
--- a/examples/advection/var-velocity.py
+++ b/examples/advection/var-velocity.py
@@ -196,7 +196,7 @@ def main(ctx_factory, dim=2, order=4, use_quad=False, visualize=False,
     def rhs(t, u):
         return adv_operator.operator(t, u)
 
-    dt = adv_operator.estimate_rk4_timestep(actx, dcoll, fields=u)
+    dt = actx.to_numpy(adv_operator.estimate_rk4_timestep(actx, dcoll, fields=u))
 
     logger.info("Timestep size: %g", dt)
 
diff --git a/examples/advection/weak.py b/examples/advection/weak.py
index ae72ac498d9196a6be8c3a34db46d5cbef44de73..e27f2aa84e0f4080a9c01c5f448bad633a91872e 100644
--- a/examples/advection/weak.py
+++ b/examples/advection/weak.py
@@ -166,7 +166,7 @@ def main(ctx_factory, dim=2, order=4, visualize=False):
     def rhs(t, u):
         return adv_operator.operator(t, u)
 
-    dt = adv_operator.estimate_rk4_timestep(actx, dcoll, fields=u)
+    dt = actx.to_numpy(adv_operator.estimate_rk4_timestep(actx, dcoll, fields=u))
 
     logger.info("Timestep size: %g", dt)
 
diff --git a/examples/maxwell/cavities.py b/examples/maxwell/cavities.py
index 3380b37879899dc0adc4e8d60758ecd96b25ed36..6966f786779889c2348aacf49452305e3c0c5ce0 100644
--- a/examples/maxwell/cavities.py
+++ b/examples/maxwell/cavities.py
@@ -93,7 +93,8 @@ def main(ctx_factory, dim=3, order=4, visualize=False):
     def rhs(t, w):
         return maxwell_operator.operator(t, w)
 
-    dt = maxwell_operator.estimate_rk4_timestep(actx, dcoll, fields=fields)
+    dt = actx.to_numpy(
+        maxwell_operator.estimate_rk4_timestep(actx, dcoll, fields=fields))
 
     dt_stepper = set_up_rk4("w", dt, fields, rhs)
 
diff --git a/examples/wave/var-propagation-speed.py b/examples/wave/var-propagation-speed.py
index bbd763a0f581dccd3fa3d108d55a6ce9c0c94050..0d1fd9ea9defe03633fc31f980beadebe2bc167b 100644
--- a/examples/wave/var-propagation-speed.py
+++ b/examples/wave/var-propagation-speed.py
@@ -104,7 +104,8 @@ def main(ctx_factory, dim=2, order=4, visualize=False):
     def rhs(t, w):
         return wave_op.operator(t, w)
 
-    dt = 2/3 * wave_op.estimate_rk4_timestep(actx, dcoll, fields=fields)
+    dt = actx.to_numpy(
+        2/3 * wave_op.estimate_rk4_timestep(actx, dcoll, fields=fields))
     dt_stepper = set_up_rk4("w", dt, fields, rhs)
 
     final_t = 1
diff --git a/examples/wave/wave-min-mpi.py b/examples/wave/wave-min-mpi.py
index 3cdc3fa62838eb7b61114b47f75c6613ebf9a9dc..4c4813a9e896d4cd6801cf3909f0b038e6026fda 100644
--- a/examples/wave/wave-min-mpi.py
+++ b/examples/wave/wave-min-mpi.py
@@ -116,7 +116,8 @@ def main(ctx_factory, dim=2, order=4, visualize=False):
         [dcoll.zeros(actx) for i in range(dcoll.dim)]
     )
 
-    dt = 2/3 * wave_op.estimate_rk4_timestep(actx, dcoll, fields=fields)
+    dt = actx.to_numpy(
+        2/3 * wave_op.estimate_rk4_timestep(actx, dcoll, fields=fields))
 
     wave_op.check_bc_coverage(local_mesh)
 
diff --git a/examples/wave/wave-op-mpi.py b/examples/wave/wave-op-mpi.py
index ede428747c2da73c1317d08256a3d136b2bf33f0..73e6330087d81e3eae60d90c0b61e95172b56d5b 100644
--- a/examples/wave/wave-op-mpi.py
+++ b/examples/wave/wave-op-mpi.py
@@ -185,7 +185,7 @@ def main(ctx_factory, dim=2, order=3, visualize=False):
             )
 
     c = 1
-    dt = 0.45 * estimate_rk4_timestep(actx, dcoll, c)
+    dt = actx.to_numpy(0.45 * estimate_rk4_timestep(actx, dcoll, c))
 
     vis = make_visualizer(dcoll)
 
@@ -201,12 +201,12 @@ def main(ctx_factory, dim=2, order=3, visualize=False):
     while t < t_final:
         fields = rk4_step(fields, t, dt, rhs)
 
-        l2norm = op.norm(dcoll, fields[0], 2)
+        l2norm = actx.to_numpy(op.norm(dcoll, fields[0], 2))
 
         if istep % 10 == 0:
-            linfnorm = op.norm(dcoll, fields[0], np.inf)
-            nodalmax = op.nodal_max(dcoll, "vol", fields[0])
-            nodalmin = op.nodal_min(dcoll, "vol", fields[0])
+            linfnorm = actx.to_numpy(op.norm(dcoll, fields[0], np.inf))
+            nodalmax = actx.to_numpy(op.nodal_max(dcoll, "vol", fields[0]))
+            nodalmin = actx.to_numpy(op.nodal_min(dcoll, "vol", fields[0]))
             if comm.rank == 0:
                 logger.info(f"step: {istep} t: {t} "
                             f"L2: {l2norm} "
diff --git a/examples/wave/wave-op-var-velocity.py b/examples/wave/wave-op-var-velocity.py
index 25df0fc4be01134dd41ff8cf6afcf7eebd3d83c8..2aabfb3757980f4fbc2cb517d851b08b4a584507 100644
--- a/examples/wave/wave-op-var-velocity.py
+++ b/examples/wave/wave-op-var-velocity.py
@@ -189,7 +189,7 @@ def main(ctx_factory, dim=2, order=3, visualize=False):
 
     # bounded above by 1
     c = 0.2 + 0.8*bump(actx, dcoll, center=np.zeros(3), width=0.5)
-    dt = 0.5 * estimate_rk4_timestep(actx, dcoll, c=1)
+    dt = actx.to_numpy(0.5 * estimate_rk4_timestep(actx, dcoll, c=1))
 
     fields = flat_obj_array(
             bump(actx, dcoll, ),
@@ -209,12 +209,17 @@ def main(ctx_factory, dim=2, order=3, visualize=False):
     while t < t_final:
         fields = rk4_step(fields, t, dt, rhs)
 
+        l2norm = actx.to_numpy(op.norm(dcoll, fields[0], 2))
+
         if istep % 10 == 0:
+            linfnorm = actx.to_numpy(op.norm(dcoll, fields[0], np.inf))
+            nodalmax = actx.to_numpy(op.nodal_max(dcoll, "vol", fields[0]))
+            nodalmin = actx.to_numpy(op.nodal_min(dcoll, "vol", fields[0]))
             logger.info(f"step: {istep} t: {t} "
-                        f"L2: {op.norm(dcoll, fields[0], 2)} "
-                        f"Linf: {op.norm(dcoll, fields[0], np.inf)} "
-                        f"sol max: {op.nodal_max(dcoll, 'vol', fields[0])} "
-                        f"sol min: {op.nodal_min(dcoll, 'vol', fields[0])}")
+                        f"L2: {l2norm} "
+                        f"Linf: {linfnorm} "
+                        f"sol max: {nodalmax} "
+                        f"sol min: {nodalmin}")
             if visualize:
                 vis.write_vtk_file(
                     f"fld-wave-eager-var-velocity-{istep:04d}.vtu",
@@ -230,7 +235,7 @@ def main(ctx_factory, dim=2, order=3, visualize=False):
 
         # NOTE: These are here to ensure the solution is bounded for the
         # time interval specified
-        assert op.norm(dcoll, fields[0], 2) < 1
+        assert l2norm < 1
 
 
 if __name__ == "__main__":
diff --git a/grudge/dt_utils.py b/grudge/dt_utils.py
index 41031e39104d670114fd9afc794fd861b58cdf91..1cc1728118d815fce3aea9d7c2c05081b1c11976 100644
--- a/grudge/dt_utils.py
+++ b/grudge/dt_utils.py
@@ -45,7 +45,7 @@ THE SOFTWARE.
 
 import numpy as np
 
-from arraycontext import ArrayContext, thaw, freeze
+from arraycontext import ArrayContext, thaw, freeze, DeviceScalar
 from meshmode.transform_metadata import FirstAxisIsElementsTag
 
 from grudge.dof_desc import DD_VOLUME, DOFDesc, as_dofdesc
@@ -158,7 +158,7 @@ def dt_non_geometric_factors(
 
 @memoize_on_first_arg
 def h_max_from_volume(
-        dcoll: DiscretizationCollection, dim=None, dd=None) -> float:
+        dcoll: DiscretizationCollection, dim=None, dd=None) -> "DeviceScalar":
     """Returns a (maximum) characteristic length based on the volume of the
     elements. This length may not be representative if the elements have very
     high aspect ratios.
@@ -189,7 +189,7 @@ def h_max_from_volume(
 
 @memoize_on_first_arg
 def h_min_from_volume(
-        dcoll: DiscretizationCollection, dim=None, dd=None) -> float:
+        dcoll: DiscretizationCollection, dim=None, dd=None) -> "DeviceScalar":
     """Returns a (minimum) characteristic length based on the volume of the
     elements. This length may not be representative if the elements have very
     high aspect ratios.
diff --git a/grudge/reductions.py b/grudge/reductions.py
index 150ce01e2407a94990451fe954d8621dc3d87a0d..ec3015765f451d20d37bfc3150a405b61f190d07 100644
--- a/grudge/reductions.py
+++ b/grudge/reductions.py
@@ -60,7 +60,7 @@ THE SOFTWARE.
 from numbers import Number
 from functools import reduce
 
-from arraycontext import make_loopy_program
+from arraycontext import make_loopy_program, DeviceScalar
 
 from grudge.discretization import DiscretizationCollection
 
@@ -75,24 +75,26 @@ import grudge.dof_desc as dof_desc
 
 # {{{ Nodal reductions
 
-def _norm(dcoll: DiscretizationCollection, vec, p, dd):
+def _norm(dcoll: DiscretizationCollection, vec, p, dd) -> "DeviceScalar":
     if isinstance(vec, Number):
         return np.fabs(vec)
     if p == 2:
         from grudge.op import _apply_mass_operator
-        return abs(
-            nodal_sum(
-                dcoll,
-                dd,
-                vec.conj() * _apply_mass_operator(dcoll, dd, dd, vec))
-            )**(1/2)
+        return vec.array_context.np.sqrt(
+            # Quantities being summed are real up to rounding error, so abs() can
+            # go on the outside
+            abs(
+                nodal_sum(
+                    dcoll,
+                    dd,
+                    vec.conj() * _apply_mass_operator(dcoll, dd, dd, vec))))
     elif p == np.inf:
         return nodal_max(dcoll, dd, abs(vec))
     else:
         raise NotImplementedError("Unsupported value of p")
 
 
-def norm(dcoll: DiscretizationCollection, vec, p, dd=None) -> float:
+def norm(dcoll: DiscretizationCollection, vec, p, dd=None) -> "DeviceScalar":
     r"""Return the vector p-norm of a function represented
     by its vector of degrees of freedom *vec*.
 
@@ -128,7 +130,7 @@ def norm(dcoll: DiscretizationCollection, vec, p, dd=None) -> float:
     return _norm(dcoll, vec, p, dd)
 
 
-def nodal_sum(dcoll: DiscretizationCollection, dd, vec) -> float:
+def nodal_sum(dcoll: DiscretizationCollection, dd, vec) -> "DeviceScalar":
     r"""Return the nodal sum of a vector of degrees of freedom *vec*.
 
     :arg dd: a :class:`~grudge.dof_desc.DOFDesc`, or a value
@@ -144,10 +146,11 @@ def nodal_sum(dcoll: DiscretizationCollection, dd, vec) -> float:
     from mpi4py import MPI
     actx = vec.array_context
 
-    return comm.allreduce(actx.to_numpy(nodal_sum_loc(dcoll, dd, vec)), op=MPI.SUM)
+    return actx.from_numpy(
+        comm.allreduce(actx.to_numpy(nodal_sum_loc(dcoll, dd, vec)), op=MPI.SUM))
 
 
-def nodal_sum_loc(dcoll: DiscretizationCollection, dd, vec) -> float:
+def nodal_sum_loc(dcoll: DiscretizationCollection, dd, vec) -> "DeviceScalar":
     r"""Return the rank-local nodal sum of a vector of degrees of freedom *vec*.
 
     :arg dd: a :class:`~grudge.dof_desc.DOFDesc`, or a value
@@ -159,12 +162,12 @@ def nodal_sum_loc(dcoll: DiscretizationCollection, dd, vec) -> float:
         return sum(nodal_sum_loc(dcoll, dd, vec[idx])
                    for idx in np.ndindex(vec.shape))
 
-    # FIXME: do not force result onto host
     actx = vec.array_context
+
     return sum([actx.np.sum(grp_ary) for grp_ary in vec])
 
 
-def nodal_min(dcoll: DiscretizationCollection, dd, vec) -> float:
+def nodal_min(dcoll: DiscretizationCollection, dd, vec) -> "DeviceScalar":
     r"""Return the nodal minimum of a vector of degrees of freedom *vec*.
 
     :arg dd: a :class:`~grudge.dof_desc.DOFDesc`, or a value
@@ -178,11 +181,13 @@ def nodal_min(dcoll: DiscretizationCollection, dd, vec) -> float:
 
     # NOTE: Don't move this
     from mpi4py import MPI
+    actx = vec.array_context
 
-    return comm.allreduce(nodal_min_loc(dcoll, dd, vec), op=MPI.MIN)
+    return actx.from_numpy(
+        comm.allreduce(actx.to_numpy(nodal_min_loc(dcoll, dd, vec)), op=MPI.MIN))
 
 
-def nodal_min_loc(dcoll: DiscretizationCollection, dd, vec) -> float:
+def nodal_min_loc(dcoll: DiscretizationCollection, dd, vec) -> "DeviceScalar":
     r"""Return the rank-local nodal minimum of a vector of degrees
     of freedom *vec*.
 
@@ -197,18 +202,12 @@ def nodal_min_loc(dcoll: DiscretizationCollection, dd, vec) -> float:
 
     actx = vec.array_context
 
-    # FIXME: do not force result onto host
-    # Host transfer is needed for now because actx.np.minimum does not succeed
-    # on array scalars.
-    # https://github.com/inducer/arraycontext/issues/49#issuecomment-869266944
     return reduce(
-            lambda acc, grp_ary: actx.np.minimum(
-                acc,
-                actx.to_numpy(actx.np.min(grp_ary))[()]),
-            vec, np.inf)
+            lambda acc, grp_ary: actx.np.minimum(acc, actx.np.min(grp_ary)),
+            vec, actx.from_numpy(np.array(np.inf)))
 
 
-def nodal_max(dcoll: DiscretizationCollection, dd, vec) -> float:
+def nodal_max(dcoll: DiscretizationCollection, dd, vec) -> "DeviceScalar":
     r"""Return the nodal maximum of a vector of degrees of freedom *vec*.
 
     :arg dd: a :class:`~grudge.dof_desc.DOFDesc`, or a value
@@ -222,11 +221,13 @@ def nodal_max(dcoll: DiscretizationCollection, dd, vec) -> float:
 
     # NOTE: Don't move this
     from mpi4py import MPI
+    actx = vec.array_context
 
-    return comm.allreduce(nodal_max_loc(dcoll, dd, vec), op=MPI.MAX)
+    return actx.from_numpy(
+        comm.allreduce(actx.to_numpy(nodal_max_loc(dcoll, dd, vec)), op=MPI.MAX))
 
 
-def nodal_max_loc(dcoll: DiscretizationCollection, dd, vec) -> float:
+def nodal_max_loc(dcoll: DiscretizationCollection, dd, vec) -> "DeviceScalar":
     r"""Return the rank-local nodal maximum of a vector of degrees
     of freedom *vec*.
 
@@ -241,18 +242,12 @@ def nodal_max_loc(dcoll: DiscretizationCollection, dd, vec) -> float:
 
     actx = vec.array_context
 
-    # FIXME: do not force result onto host
-    # Host transfer is needed for now because actx.np.minimum does not succeed
-    # on array scalars.
-    # https://github.com/inducer/arraycontext/issues/49#issuecomment-869266944
     return reduce(
-            lambda acc, grp_ary: actx.np.maximum(
-                acc,
-                actx.to_numpy(actx.np.max(grp_ary))[()]),
-            vec, -np.inf)
+            lambda acc, grp_ary: actx.np.maximum(acc, actx.np.max(grp_ary)),
+            vec, actx.from_numpy(np.array(-np.inf)))
 
 
-def integral(dcoll: DiscretizationCollection, dd, vec) -> float:
+def integral(dcoll: DiscretizationCollection, dd, vec) -> "DeviceScalar":
     """Numerically integrates a function represented by a
     :class:`~meshmode.dof_array.DOFArray` of degrees of freedom.
 
diff --git a/test/test_dt_utils.py b/test/test_dt_utils.py
index f3f3f9a54ba44c4291bd70d835bc64574cd9a7d6..9d4c345d2e0718026881707feeddd035191e598c 100644
--- a/test/test_dt_utils.py
+++ b/test/test_dt_utils.py
@@ -70,7 +70,8 @@ def test_geometric_factors_regular_refinement(actx_factory, name):
         mesh = builder.get_mesh(resolution, builder.mesh_order)
         dcoll = DiscretizationCollection(actx, mesh, order=builder.order)
         min_factors.append(
-            op.nodal_min(dcoll, "vol", thaw(dt_geometric_factors(dcoll), actx))
+            actx.to_numpy(
+                op.nodal_min(dcoll, "vol", thaw(dt_geometric_factors(dcoll), actx)))
         )
 
     # Resolution is doubled each refinement, so the ratio of consecutive
diff --git a/test/test_grudge.py b/test/test_grudge.py
index f6e4ad9b81ae7b52b91f7ad5de13a883be39ae08..7a351f40a77e3763f47767b3f602a37c4a2ddcee 100644
--- a/test/test_grudge.py
+++ b/test/test_grudge.py
@@ -254,7 +254,7 @@ def test_mass_surface_area(actx_factory, name):
 
         h_max = h_max_from_volume(dcoll)
 
-        eoc.add_data_point(h_max, area_error)
+        eoc.add_data_point(actx.to_numpy(h_max), area_error)
 
     # }}}
 
@@ -328,7 +328,7 @@ def test_mass_operator_inverse(actx_factory, name):
 
         h_max = h_max_from_volume(dcoll)
 
-        eoc.add_data_point(h_max, inv_error)
+        eoc.add_data_point(actx.to_numpy(h_max), inv_error)
 
     logger.info("inverse mass error\n%s", str(eoc))
 
@@ -636,8 +636,8 @@ def test_surface_divergence_theorem(actx_factory, mesh_name, visualize=False):
 
         h_max = h_max_from_volume(dcoll)
 
-        eoc_global.add_data_point(h_max, actx.to_numpy(err_global))
-        eoc_local.add_data_point(h_max, err_local)
+        eoc_global.add_data_point(actx.to_numpy(h_max), actx.to_numpy(err_global))
+        eoc_local.add_data_point(actx.to_numpy(h_max), actx.to_numpy(err_local))
 
         if visualize:
             from grudge.shortcuts import make_visualizer
@@ -773,7 +773,7 @@ def test_convergence_advec(actx_factory, mesh_name, mesh_pars, op_type, flux_typ
         from grudge.dt_utils import h_max_from_volume
 
         h_max = h_max_from_volume(dcoll, dim=dcoll.ambient_dim)
-        dt = dt_factor * h_max/order**2
+        dt = actx.to_numpy(dt_factor * h_max/order**2)
         nsteps = (final_time // dt) + 1
         dt = final_time/nsteps + 1e-15
 
@@ -806,8 +806,8 @@ def test_convergence_advec(actx_factory, mesh_name, mesh_pars, op_type, flux_typ
             last_u - u_analytic(nodes, t=last_t),
             2
         )
-        logger.info("h_max %.5e error %.5e", h_max, error_l2)
-        eoc_rec.add_data_point(h_max, actx.to_numpy(error_l2))
+        logger.info("h_max %.5e error %.5e", actx.to_numpy(h_max), error_l2)
+        eoc_rec.add_data_point(actx.to_numpy(h_max), actx.to_numpy(error_l2))
 
     logger.info("\n%s", eoc_rec.pretty_print(
         abscissa_label="h",
@@ -868,7 +868,7 @@ def test_convergence_maxwell(actx_factory,  order):
         def rhs(t, w):
             return maxwell_operator.operator(t, w)
 
-        dt = maxwell_operator.estimate_rk4_timestep(actx, dcoll)
+        dt = actx.to_numpy(maxwell_operator.estimate_rk4_timestep(actx, dcoll))
         final_t = dt * 5
         nsteps = int(final_t/dt)
 
diff --git a/test/test_mpi_communication.py b/test/test_mpi_communication.py
index fbb152b8a118d67c72288accccaacbc570f959d9..4f62767030985eb1dae543718f46d6c069950a20 100644
--- a/test/test_mpi_communication.py
+++ b/test/test_mpi_communication.py
@@ -176,7 +176,8 @@ def mpi_communication_entrypoint():
         [dcoll.zeros(actx) for i in range(dcoll.dim)]
     )
 
-    dt = 2/3 * wave_op.estimate_rk4_timestep(actx, dcoll, fields=fields)
+    dt = actx.to_numpy(
+        2/3 * wave_op.estimate_rk4_timestep(actx, dcoll, fields=fields))
 
     wave_op.check_bc_coverage(local_mesh)
 
diff --git a/test/test_op.py b/test/test_op.py
index 92c088894024a27a84c6d0a2687e53733d9b4038..364a988f5e5107b188616917f5c233028267abb0 100644
--- a/test/test_op.py
+++ b/test/test_op.py
@@ -151,7 +151,7 @@ def test_gradient(actx_factory, form, dim, order, vectorize, nested,
                 ("expected_grad_u", expected_grad_u),
                 ], overwrite=True)
 
-        rel_linf_err = (
+        rel_linf_err = actx.to_numpy(
             op.norm(dcoll, grad_u - expected_grad_u, np.inf)
             / op.norm(dcoll, expected_grad_u, np.inf))
         eoc_rec.add_data_point(1./n, rel_linf_err)
@@ -267,7 +267,7 @@ def test_divergence(actx_factory, form, dim, order, vectorize, nested,
                 ("expected_div_u", expected_div_u),
                 ], overwrite=True)
 
-        rel_linf_err = (
+        rel_linf_err = actx.to_numpy(
             op.norm(dcoll, div_u - expected_div_u, np.inf)
             / op.norm(dcoll, expected_div_u, np.inf))
         eoc_rec.add_data_point(1./n, rel_linf_err)