diff --git a/doc/conf.py b/doc/conf.py
index 560efec5951aa5de9a4b732e304a121c0c0c8baa..6bb9487380141270dcbfcf88c7e5036935be7e91 100644
--- a/doc/conf.py
+++ b/doc/conf.py
@@ -319,12 +319,12 @@ texinfo_documents = [
 
 # Example configuration for intersphinx: refer to the Python standard library.
 intersphinx_mapping = {
-    'http://docs.python.org/': None,
-    'http://docs.scipy.org/doc/numpy/': None,
-    'http://documen.tician.de/pyopencl/': None,
-    'http://documen.tician.de/modepy/': None,
-    'http://documen.tician.de/pymbolic/': None,
-    'http://documen.tician.de/meshmode/': None,
+    'https://docs.python.org/': None,
+    'https://docs.scipy.org/doc/numpy/': None,
+    'https://documen.tician.de/pyopencl/': None,
+    'https://documen.tician.de/modepy/': None,
+    'https://documen.tician.de/pymbolic/': None,
+    'https://documen.tician.de/meshmode/': None,
     #'http://documen.tician.de/loopy/': None,
     }
 autoclass_content = "both"
diff --git a/grudge/execution.py b/grudge/execution.py
index 761b14729c330e5d1ebcdaf203dea1c437259a4f..a95b15e75e43917688f2c3766651e3018ff9d526 100644
--- a/grudge/execution.py
+++ b/grudge/execution.py
@@ -54,7 +54,7 @@ class ExecutionMapper(mappers.Evaluator,
         self.function_registry = bound_op.function_registry
         self.queue = queue
 
-    # {{{ expression mappings -------------------------------------------------
+    # {{{ expression mappings
 
     def map_ones(self, expr):
         if expr.dd.is_scalar():
@@ -100,6 +100,49 @@ class ExecutionMapper(mappers.Evaluator,
         args = [self.rec(p) for p in expr.parameters]
         return self.function_registry[expr.function.name](self.queue, *args)
 
+    # }}}
+
+    # {{{ elementwise reductions
+
+    def _map_elementwise_reduction(self, op_name, field_expr, dd):
+        @memoize_in(self, "elementwise_%s_knl" % op_name)
+        def knl():
+            knl = lp.make_kernel(
+                "{[el, idof, jdof]: 0<=el<nelements and 0<=idof, jdof<ndofs}",
+                """
+                result[el, idof] = %s(jdof, operand[el, jdof])
+                """ % op_name,
+                default_offset=lp.auto,
+                name="elementwise_%s_knl" % op_name)
+
+            return lp.tag_inames(knl, "el:g.0,idof:l.0")
+
+        field = self.rec(field_expr)
+        discr = self.discrwb.discr_from_dd(dd)
+        assert field.shape == (discr.nnodes,)
+
+        result = discr.empty(queue=self.queue, dtype=field.dtype,
+                allocator=self.bound_op.allocator)
+        for grp in discr.groups:
+            knl()(self.queue,
+                    operand=grp.view(field),
+                    result=grp.view(result))
+
+        return result
+
+    def map_elementwise_sum(self, op, field_expr):
+        return self._map_elementwise_reduction("sum", field_expr, op.dd_in)
+
+    def map_elementwise_min(self, op, field_expr):
+        return self._map_elementwise_reduction("min", field_expr, op.dd_in)
+
+    def map_elementwise_max(self, op, field_expr):
+        return self._map_elementwise_reduction("max", field_expr, op.dd_in)
+
+    # }}}
+
+    # {{{ nodal reductions
+
     def map_nodal_sum(self, op, field_expr):
         # FIXME: Could allow array scalars
         return cl.array.sum(self.rec(field_expr)).get()[()]
@@ -112,6 +155,8 @@ class ExecutionMapper(mappers.Evaluator,
         # FIXME: Could allow array scalars
         return cl.array.min(self.rec(field_expr)).get()[()]
 
+    # }}}
+
     def map_if(self, expr):
         bool_crit = self.rec(expr.condition)
 
@@ -152,6 +197,8 @@ class ExecutionMapper(mappers.Evaluator,
 
         return out
 
+    # {{{ elementwise linear operators
+
     def map_ref_diff_base(self, op, field_expr):
         raise NotImplementedError(
                 "differentiation should be happening in batched form")
@@ -203,16 +250,6 @@ class ExecutionMapper(mappers.Evaluator,
 
         return result
 
-    def map_elementwise_max(self, op, field_expr):
-        from grudge._internal import perform_elwise_max
-        field = self.rec(field_expr)
-
-        out = self.discr.volume_zeros(dtype=field.dtype)
-        for eg in self.discr.element_groups:
-            perform_elwise_max(eg.ranges, field, out)
-
-        return out
-
     def map_interpolation(self, op, field_expr):
         conn = self.discrwb.connection_from_dds(op.dd_in, op.dd_out)
         return conn(self.queue, self.rec(field_expr)).with_queue(self.queue)
@@ -227,6 +264,8 @@ class ExecutionMapper(mappers.Evaluator,
         return self.discrwb.opposite_face_connection()(
                 self.queue, self.rec(field_expr)).with_queue(self.queue)
 
+    # }}}
+
     # {{{ face mass operator
 
     def map_ref_face_mass_operator(self, op, field_expr):
@@ -282,8 +321,6 @@ class ExecutionMapper(mappers.Evaluator,
 
         return result
 
-    # }}}
-
     def map_signed_face_ones(self, expr):
         assert expr.dd.is_trace()
         face_discr = self.discrwb.discr_from_dd(expr.dd)
diff --git a/grudge/symbolic/mappers/__init__.py b/grudge/symbolic/mappers/__init__.py
index 8652e392f917e8beecae30d01f84fd52e9f7e2aa..36b57d50aaf333fbd3303fd72f1425f5de085086 100644
--- a/grudge/symbolic/mappers/__init__.py
+++ b/grudge/symbolic/mappers/__init__.py
@@ -133,6 +133,10 @@ class OperatorReducerMixin(LocalOpReducerMixin, FluxOpReducerMixin):
 
     map_interpolation = _map_op_base
 
+    map_elementwise_sum = _map_op_base
+    map_elementwise_min = _map_op_base
+    map_elementwise_max = _map_op_base
+
     map_nodal_sum = _map_op_base
     map_nodal_min = _map_op_base
     map_nodal_max = _map_op_base
@@ -182,6 +186,10 @@ class IdentityMapperMixin(LocalOpReducerMixin, FluxOpReducerMixin):
 
     map_interpolation = map_elementwise_linear
 
+    map_elementwise_sum = map_elementwise_linear
+    map_elementwise_min = map_elementwise_linear
+    map_elementwise_max = map_elementwise_linear
+
     map_nodal_sum = map_elementwise_linear
     map_nodal_min = map_elementwise_linear
     map_nodal_max = map_elementwise_linear
@@ -701,6 +709,19 @@ class StringifyMapper(pymbolic.mapper.stringifier.StringifyMapper):
     def _format_op_dd(self, op):
         return ":%s->%s" % (self._format_dd(op.dd_in), self._format_dd(op.dd_out))
 
+    # {{{ elementwise ops
+
+    def map_elementwise_sum(self, expr, enclosing_prec):
+        return "ElementwiseSum" + self._format_op_dd(expr)
+
+    def map_elementwise_max(self, expr, enclosing_prec):
+        return "ElementwiseMax" + self._format_op_dd(expr)
+
+    def map_elementwise_min(self, expr, enclosing_prec):
+        return "ElementwiseMin" + self._format_op_dd(expr)
+
+    # }}}
+
     # {{{ nodal ops
 
     def map_nodal_sum(self, expr, enclosing_prec):
diff --git a/grudge/symbolic/operators.py b/grudge/symbolic/operators.py
index b5d8407349a33b47ee115e918bef110f1932e73c..e6484fb71c0a9af8678f22094764cd6d0314b6e3 100644
--- a/grudge/symbolic/operators.py
+++ b/grudge/symbolic/operators.py
@@ -43,6 +43,8 @@ Basic Operators
 Reductions
 ^^^^^^^^^^
 
+.. autoclass:: ElementwiseSumOperator
+.. autoclass:: ElementwiseMinOperator
 .. autoclass:: ElementwiseMaxOperator
 
 .. autoclass:: NodalReductionOperator
@@ -170,9 +172,38 @@ class InterpolationOperator(Operator):
 interp = InterpolationOperator
 
 
-class ElementwiseMaxOperator(Operator):
+# {{{ element reduction: sum, min, max
+
+class ElementwiseReductionOperator(Operator):
+    def __init__(self, dd):
+        super(ElementwiseReductionOperator, self).__init__(dd_in=dd, dd_out=dd)
+
+
+class ElementwiseSumOperator(ElementwiseReductionOperator):
+    """Returns a vector of DOFs with all entries on each element set
+    to the sum of DOFs on that element.
+    """
+
+    mapper_method = intern("map_elementwise_sum")
+
+
+class ElementwiseMinOperator(ElementwiseReductionOperator):
+    """Returns a vector of DOFs with all entries on each element set
+    to the minimum of DOFs on that element.
+    """
+
+    mapper_method = intern("map_elementwise_min")
+
+
+class ElementwiseMaxOperator(ElementwiseReductionOperator):
+    """Returns a vector of DOFs with all entries on each element set
+    to the maximum of DOFs on that element.
+    """
+
     mapper_method = intern("map_elementwise_max")
 
+# }}}
+
 
 # {{{ nodal reduction: sum, integral, max
 
@@ -687,6 +718,27 @@ def norm(p, arg, dd=None):
     else:
         raise ValueError("unsupported value of p")
 
+
+def h_max_from_volume(ambient_dim, dim=None, dd=None):
+    """Defines a characteristic length based on the volume of the elements.
+    This length may not be representative if the elements have very high
+    aspect ratios.
+    """
+
+    import grudge.symbolic.primitives as prim
+    if dd is None:
+        dd = prim.DD_VOLUME
+    dd = prim.as_dofdesc(dd)
+
+    if dim is None:
+        dim = ambient_dim
+
+    return NodalMax(dd_in=dd)(
+            ElementwiseSumOperator(dd)(
+                MassOperator(dd_in=dd)(prim.Ones(dd))
+                )
+            )**(1.0/dim)
+
 # }}}
 
 # vim: foldmethod=marker
diff --git a/test/test_grudge.py b/test/test_grudge.py
index a55f92baffb7d7f5bfa293bb105579dfeeba08a5..53bd4975176e7f4a263fecfa2985997f26be90f4 100644
--- a/test/test_grudge.py
+++ b/test/test_grudge.py
@@ -263,7 +263,6 @@ def test_convergence_advec(ctx_factory, mesh_name, mesh_pars, op_type, flux_type
                 [np.linspace(-1.0, 1.0, mesh_par)],
                 order=order)
 
-            h = 2.0 / mesh_par
             dim = 1
             dt_factor = 1.0
         elif mesh_name == "disk":
@@ -281,7 +280,6 @@ def test_convergence_advec(ctx_factory, mesh_name, mesh_pars, op_type, flux_type
 
             from meshmode.mesh.io import from_meshpy
             mesh = from_meshpy(mesh_info, order=1)
-            h = np.sqrt(mesh_par)
             dim = 2
             dt_factor = 4
         elif mesh_name.startswith("rect"):
@@ -290,7 +288,6 @@ def test_convergence_advec(ctx_factory, mesh_name, mesh_pars, op_type, flux_type
             mesh = generate_regular_rect_mesh(a=(-0.5,)*dim, b=(0.5,)*dim,
                     n=(mesh_par,)*dim, order=4)
 
-            h = 1/mesh_par
             if dim == 2:
                 dt_factor = 4
             elif dim == 3:
@@ -337,7 +334,8 @@ def test_convergence_advec(ctx_factory, mesh_name, mesh_pars, op_type, flux_type
         else:
             final_time = 0.2
 
-        dt = dt_factor * h/order**2
+        h_max = bind(discr, sym.h_max_from_volume(discr.ambient_dim))(queue)
+        dt = dt_factor * h_max/order**2
         nsteps = (final_time // dt) + 1
         dt = final_time/nsteps + 1e-15
 
@@ -366,11 +364,12 @@ def test_convergence_advec(ctx_factory, mesh_name, mesh_pars, op_type, flux_type
         error_l2 = bind(discr,
             sym.norm(2, sym.var("u")-u_analytic(sym.nodes(dim))))(
                 queue, t=last_t, u=last_u)
-        print(h, error_l2)
-        eoc_rec.add_data_point(h, error_l2)
+        print(h_max, error_l2)
+        eoc_rec.add_data_point(h_max, error_l2)
 
-    print(eoc_rec.pretty_print(abscissa_label="h",
-            error_label="L2 Error"))
+    print(eoc_rec.pretty_print(
+        abscissa_label="h",
+        error_label="L2 Error"))
 
     assert eoc_rec.order_estimate() > order