diff --git a/grudge/symbolic/mappers/__init__.py b/grudge/symbolic/mappers/__init__.py
index eef6cce7346dfaa2b621669b9a9d986e48ed2a87..c8def89882ea06b1002135983b9a1cdf570ea5db 100644
--- a/grudge/symbolic/mappers/__init__.py
+++ b/grudge/symbolic/mappers/__init__.py
@@ -621,17 +621,24 @@ class GlobalToReferenceMapper(CSECachingMapperMixin, IdentityMapper):
                 dd=dd_in.with_qtag(sym.QTAG_NONE))
 
         def rewrite_derivative(ref_class, field,  dd_in, with_jacobian=True):
-            jac_tag = sym.area_element(self.ambient_dim, self.dim, dd=dd_in)
+            def imd(rst):
+                return sym.inverse_surface_metric_derivative(
+                        rst, expr.op.xyz_axis,
+                        ambient_dim=self.ambient_dim, dim=self.dim,
+                        dd=dd_in)
 
             rec_field = self.rec(field)
             if with_jacobian:
+                jac_tag = sym.area_element(self.ambient_dim, self.dim, dd=dd_in)
                 rec_field = jac_tag * rec_field
-            return sum(
-                    sym.inverse_metric_derivative(
-                        rst_axis, expr.op.xyz_axis,
-                        ambient_dim=self.ambient_dim, dim=self.dim)
-                    * ref_class(rst_axis, dd_in=dd_in)(rec_field)
-                    for rst_axis in range(self.dim))
+
+                return sum(
+                        ref_class(rst_axis, dd_in=dd_in)(rec_field * imd(rst_axis))
+                        for rst_axis in range(self.dim))
+            else:
+                return sum(
+                        ref_class(rst_axis, dd_in=dd_in)(rec_field) * imd(rst_axis)
+                        for rst_axis in range(self.dim))
 
         if isinstance(expr.op, op.MassOperator):
             return op.RefMassOperator(dd_in, dd_out)(
diff --git a/grudge/symbolic/primitives.py b/grudge/symbolic/primitives.py
index 0671046217b942c81752fb89a95a75046fed3c0c..fd963eba539436c881f70b6e6b491c048217c523 100644
--- a/grudge/symbolic/primitives.py
+++ b/grudge/symbolic/primitives.py
@@ -494,7 +494,7 @@ def mv_nodes(ambient_dim, dd=None):
     return MultiVector(nodes(ambient_dim, dd))
 
 
-def forward_metric_derivative(xyz_axis, rst_axis, dd=None):
+def forward_metric_nth_derivative(xyz_axis, ref_axes, dd=None):
     r"""
     Pointwise metric derivatives representing
 
@@ -502,24 +502,44 @@ def forward_metric_derivative(xyz_axis, rst_axis, dd=None):
 
         \frac{\partial x_{\mathrm{xyz\_axis}} }{\partial r_{\mathrm{rst\_axis}} }
     """
+
+    if isinstance(ref_axes, int):
+        ref_axes = ((ref_axes, 1),)
+
+    if not isinstance(ref_axes, tuple):
+        raise ValueError("ref_axes must be a tuple")
+
+    if tuple(sorted(ref_axes)) != ref_axes:
+        raise ValueError("ref_axes must be sorted")
+
+    if len(dict(ref_axes)) != len(ref_axes):
+        raise ValueError("ref_axes must not contain an axis more than once")
+
     if dd is None:
         dd = DD_VOLUME
-
     inner_dd = dd.with_qtag(QTAG_NONE)
 
-    from grudge.symbolic.operators import RefDiffOperator
-    diff_op = RefDiffOperator(rst_axis, inner_dd)
+    from pytools import flatten
+    flat_ref_axes = flatten([rst_axis] * n for rst_axis, n in ref_axes)
 
-    result = diff_op(NodeCoordinateComponent(xyz_axis, inner_dd))
+    from grudge.symbolic.operators import RefDiffOperator
+    result = NodeCoordinateComponent(xyz_axis, inner_dd)
+    for rst_axis in flat_ref_axes:
+        result = RefDiffOperator(rst_axis, inner_dd)(result)
 
     if dd.uses_quadrature():
         from grudge.symbolic.operators import interp
         result = interp(inner_dd, dd)(result)
 
-    return cse(
-            result,
-            prefix="dx%d_dr%d" % (xyz_axis, rst_axis),
-            scope=cse_scope.DISCRETIZATION)
+    prefix = "dx%d_%s" % (
+            xyz_axis,
+            "_".join("%sr%d" % ("d" * n, rst_axis) for rst_axis, n in ref_axes))
+
+    return cse(result, prefix, cse_scope.DISCRETIZATION)
+
+
+def forward_metric_derivative(xyz_axis, rst_axis, dd=None):
+    return forward_metric_nth_derivative(xyz_axis, rst_axis, dd=dd)
 
 
 def forward_metric_derivative_vector(ambient_dim, rst_axis, dd=None):
@@ -594,6 +614,7 @@ def forward_metric_derivative_mat(ambient_dim, dim=None, dd=None):
     result = np.zeros((ambient_dim, dim), dtype=np.object)
     for j in range(dim):
         result[:, j] = forward_metric_derivative_vector(ambient_dim, j, dd=dd)
+
     return result
 
 
@@ -610,6 +631,91 @@ def inverse_metric_derivative_mat(ambient_dim, dim=None, dd=None):
     return result
 
 
+def first_fundamental_form(ambient_dim, dim=None, dd=None):
+    if dim is None:
+        dim = ambient_dim - 1
+
+    mder = forward_metric_derivative_mat(ambient_dim, dim=dim, dd=dd)
+    return cse(mder.T.dot(mder), "form1_mat", cse_scope.DISCRETIZATION)
+
+
+def inverse_first_fundamental_form(ambient_dim, dim=None, dd=None):
+    if dim is None:
+        dim = ambient_dim - 1
+
+    if ambient_dim == dim:
+        inv_mder = inverse_metric_derivative_mat(ambient_dim, dim=dim, dd=dd)
+        inv_form1 = inv_mder.dot(inv_mder.T)
+    else:
+        form1 = first_fundamental_form(ambient_dim, dim, dd)
+
+        if dim == 1:
+            inv_form1 = np.array([[1.0/form1[0, 0]]], dtype=np.object)
+        elif dim == 2:
+            (E, F), (_, G) = form1      # noqa: N806
+            inv_form1 = 1.0 / (E * G - F * F) * np.array([
+                [G, -F], [-F, E]
+                ], dtype=np.object)
+        else:
+            raise ValueError("%dD surfaces not supported" % dim)
+
+    return cse(inv_form1, "inv_form1_mat", cse_scope.DISCRETIZATION)
+
+
+def inverse_surface_metric_derivative(rst_axis, xyz_axis,
+        ambient_dim, dim=None, dd=None):
+    if dim is None:
+        dim = ambient_dim - 1
+
+    if ambient_dim == dim:
+        return inverse_metric_derivative(rst_axis, xyz_axis,
+                ambient_dim, dim=dim, dd=dd)
+    else:
+        inv_form1 = inverse_first_fundamental_form(ambient_dim, dim=dim, dd=dd)
+        imd = sum(
+                inv_form1[rst_axis, d] * forward_metric_derivative(xyz_axis, d, dd=dd)
+                for d in range(dim))
+
+        return cse(imd,
+                prefix="ds%d_dx%d" % (rst_axis, xyz_axis),
+                scope=cse_scope.DISCRETIZATION)
+
+
+def second_fundamental_form(ambient_dim, dim=None, dd=None):
+    if dim is None:
+        dim = ambient_dim - 1
+
+    normal = surface_normal(ambient_dim, dim=dim, dd=dd).as_vector()
+    if dim == 1:
+        second_ref_axes = [((0, 2),)]
+    elif dim == 2:
+        second_ref_axes = [((0, 2),), ((0, 1), (1, 1)), ((1, 2),)]
+    else:
+        raise ValueError("%dD surfaces not supported" % dim)
+
+    from pytools import flatten
+    form2 = np.empty((dim, dim), dtype=np.object)
+    for ref_axes in second_ref_axes:
+        i, j = flatten([rst_axis] * n for rst_axis, n in ref_axes)
+
+        ruv = np.array([
+            forward_metric_nth_derivative(xyz_axis, ref_axes, dd=dd)
+            for xyz_axis in range(ambient_dim)])
+        form2[i, j] = form2[j, i] = normal.dot(ruv)
+
+    return cse(form2, "form2_mat", cse_scope.DISCRETIZATION)
+
+
+def shape_operator(ambient_dim, dim=None, dd=None):
+    if dim is None:
+        dim = ambient_dim - 1
+
+    inv_form1 = inverse_first_fundamental_form(ambient_dim, dim=dim, dd=dd)
+    form2 = second_fundamental_form(ambient_dim, dim=dim, dd=dd)
+
+    return cse(-form2.dot(inv_form1), "shape_operator", cse_scope.DISCRETIZATION)
+
+
 def pseudoscalar(ambient_dim, dim=None, dd=None):
     if dim is None:
         dim = ambient_dim
@@ -685,6 +791,24 @@ def mv_normal(dd, ambient_dim, dim=None):
 def normal(dd, ambient_dim, dim=None, quadrature_tag=None):
     return mv_normal(dd, ambient_dim, dim).as_vector()
 
+
+def summed_curvature(ambient_dim, dim=None, dd=None):
+    if dim is None:
+        dim = ambient_dim - 1
+
+    if ambient_dim == 1:
+        return 0.0
+
+    if ambient_dim == dim:
+        return 0.0
+
+    op = shape_operator(ambient_dim, dim=dim, dd=dd)
+    return cse(np.trace(op), "summed_curvature", cse_scope.DISCRETIZATION)
+
+
+def mean_curvature(ambient_dim, dim=None, dd=None):
+    return 1.0 / (ambient_dim-1.0) * summed_curvature(ambient_dim, dim=dim, dd=dd)
+
 # }}}
 
 
diff --git a/test/mesh_data.py b/test/mesh_data.py
index c6ba21eecb712d74e7e6d3f197adfe72ebec3df6..d1d04dacf9794387e4289a3dab1cca6762c7ac65 100644
--- a/test/mesh_data.py
+++ b/test/mesh_data.py
@@ -74,7 +74,7 @@ class SpheroidMeshBuilder(MeshBuilder):
 
     mesh_order = 3
     resolutions = [1.0, 0.1, 0.05]
-    # resolutions = [1.0, 0.1, 0.05, 0.025, 0.015]
+    # resolutions = [1.0, 0.1, 0.05, 0.03, 0.015]
 
     @property
     def radius(self):
diff --git a/test/test_grudge.py b/test/test_grudge.py
index 3bf5ddf4f7fcccb083098e75cd24bf7e346afbd2..e0d08dfc12253561a5d5bd5ff9e2e944f389891c 100644
--- a/test/test_grudge.py
+++ b/test/test_grudge.py
@@ -527,6 +527,163 @@ def test_2d_gauss_theorem(ctx_factory):
 
     assert abs(gauss_err) < 1e-13
 
+
+@pytest.mark.parametrize("mesh_name", ["2-1-ellipse", "spheroid"])
+def test_surface_divergence_theorem(ctx_factory, mesh_name, visualize=False):
+    r"""Check the surface divergence theorem.
+
+        .. math::
+
+            \int_Sigma \phi \nabla_i f_i =
+            \int_\Sigma \nabla_i \phi f_i +
+            \int_\Sigma \kappa \phi f_i n_i +
+            \int_{\partial \Sigma} \phi f_i m_i
+
+        where :math:`n_i` is the surface normal and :class:`m_i` is the
+        face normal (which should be orthogonal to both the surface normal
+        and the face tangent).
+    """
+
+    cl_ctx = ctx_factory()
+    queue = cl.CommandQueue(cl_ctx)
+
+    # {{{ cases
+
+    if mesh_name == "2-1-ellipse":
+        from mesh_data import EllipseMeshBuilder
+        builder = EllipseMeshBuilder(radius=3.1, aspect_ratio=2.0)
+    elif mesh_name == "spheroid":
+        from mesh_data import SpheroidMeshBuilder
+        builder = SpheroidMeshBuilder()
+    elif mesh_name == "circle":
+        from mesh_data import EllipseMeshBuilder
+        builder = EllipseMeshBuilder(radius=1.0, aspect_ratio=1.0)
+    elif mesh_name == "starfish":
+        from mesh_data import StarfishMeshBuilder
+        builder = StarfishMeshBuilder()
+    elif mesh_name == "sphere":
+        from mesh_data import SphereMeshBuilder
+        builder = SphereMeshBuilder(radius=1.0, mesh_order=16)
+    else:
+        raise ValueError("unknown mesh name: %s" % mesh_name)
+
+    # }}}
+
+    # {{{ convergene
+
+    def f(x):
+        return join_fields(
+                sym.sin(3*x[1]) + sym.cos(3*x[0]) + 1.0,
+                sym.sin(2*x[0]) + sym.cos(x[1]),
+                3.0 * sym.cos(x[0] / 2) + sym.cos(x[1]),
+                )[:ambient_dim]
+
+    from pytools.convergence import EOCRecorder
+    eoc_global = EOCRecorder()
+    eoc_local = EOCRecorder()
+
+    theta = np.pi / 3.33
+    ambient_dim = builder.ambient_dim
+    if ambient_dim == 2:
+        mesh_rotation = np.array([
+            [np.cos(theta), -np.sin(theta)],
+            [np.sin(theta), np.cos(theta)],
+            ])
+    else:
+        mesh_rotation = np.array([
+            [1.0, 0.0, 0.0],
+            [0.0, np.cos(theta), -np.sin(theta)],
+            [0.0, np.sin(theta), np.cos(theta)],
+            ])
+
+    mesh_offset = np.array([0.33, -0.21, 0.0])[:ambient_dim]
+
+    for i, resolution in enumerate(builder.resolutions):
+        from meshmode.mesh.processing import affine_map
+        mesh = builder.get_mesh(resolution, builder.mesh_order)
+        # mesh = affine_map(mesh, A=mesh_rotation, b=mesh_offset)
+
+        from meshmode.discretization.poly_element import \
+                QuadratureSimplexGroupFactory
+        discr = DGDiscretizationWithBoundaries(cl_ctx, mesh, order=builder.order,
+                quad_tag_to_group_factory={
+                    "product": QuadratureSimplexGroupFactory(2 * builder.order)
+                    })
+
+        volume = discr.discr_from_dd(sym.DD_VOLUME)
+        assert len(volume.groups) == 1
+        logger.info("nnodes:    %d", volume.nnodes)
+        logger.info("nelements: %d", volume.mesh.nelements)
+
+        dd = sym.DD_VOLUME
+        dq = dd.with_qtag("product")
+        df = sym.as_dofdesc(sym.FACE_RESTR_ALL)
+        ambient_dim = discr.ambient_dim
+        dim = discr.dim
+
+        # variables
+        sym_f = f(sym.nodes(ambient_dim, dd=dd))
+        sym_f_quad = f(sym.nodes(ambient_dim, dd=dq))
+        sym_kappa = sym.summed_curvature(ambient_dim, dim=dim, dd=dq)
+        sym_normal = sym.surface_normal(ambient_dim, dim=dim, dd=dq).as_vector()
+
+        sym_face_normal = sym.normal(df, ambient_dim, dim=dim - 1)
+        sym_face_f = sym.interp(dd, df)(sym_f)
+
+        # operators
+        sym_stiff = sum(
+                sym.StiffnessOperator(d)(f) for d, f in enumerate(sym_f)
+                )
+        sym_stiff_t = sum(
+                sym.StiffnessTOperator(d)(f) for d, f in enumerate(sym_f)
+                )
+        sym_k = sym.MassOperator(dq, dd)(sym_kappa * sym_f_quad.dot(sym_normal))
+        sym_flux = sym.FaceMassOperator()(sym_face_f.dot(sym_face_normal))
+
+        # sum everything up and check the result
+        sym_op_global = sym.NodalSum(dd)(
+                sym_stiff - (sym_stiff_t + sym_k))
+        sym_op_local = sym.ElementwiseSumOperator(dd)(
+                sym_stiff - (sym_stiff_t + sym_k + sym_flux))
+
+        op_global = bind(discr, sym_op_global)(queue)
+        op_local = bind(discr, sym_op_local)(queue).get(queue)
+
+        err_global = la.norm(op_global)
+        err_local = la.norm(
+                la.norm(volume.groups[0].view(op_local), np.inf, axis=1),
+                np.inf)
+        logger.info("errors: global %.5e local %.5e", err_global, err_local)
+
+        # compute max element size
+        h_max = bind(discr, sym.h_max_from_volume(discr.ambient_dim, dd=dd))(queue)
+        eoc_global.add_data_point(h_max, err_global)
+        eoc_local.add_data_point(h_max, err_local)
+
+        if visualize:
+            r = cl.array.to_device(queue, op_local)
+            r = cl.clmath.log10(cl.clmath.fabs(r) + 1.0e-16)
+
+            from meshmode.discretization.visualization import make_visualizer
+            vis = make_visualizer(queue, discr, vis_order=builder.order)
+
+            filename = "test_surface_divergence_theorem_error_{:04d}".format(i)
+            vis.write_vtk_file(filename, [
+                ("r", r)
+                ], overwrite=True, legend=False)
+
+    # }}}
+
+    order = min(builder.order, builder.mesh_order) - 0.5
+    logger.info("\n%s", str(eoc_global))
+    logger.info("\n%s", str(eoc_local))
+
+    assert eoc_global.max_error() < 1.0e-12 \
+            or eoc_global.order_estimate() >= order
+
+    assert eoc_local.max_error() < 1.0e-12 \
+            or eoc_local.order_estimate() >= order
+
 # }}}
 
 
@@ -537,6 +694,7 @@ def test_2d_gauss_theorem(ctx_factory):
     ("disk", [0.1, 0.05]),
     ("rect2", [4, 8]),
     ("rect3", [4, 6]),
+    ("warped", [4, 6, 8])
     ])
 @pytest.mark.parametrize("op_type", ["strong", "weak"])
 @pytest.mark.parametrize("flux_type", ["central"])
@@ -549,6 +707,9 @@ def test_convergence_advec(ctx_factory, mesh_name, mesh_pars, op_type, flux_type
     cl_ctx = ctx_factory()
     queue = cl.CommandQueue(cl_ctx)
 
+    if mesh_name == "warped" and op_type == "strong":
+        pytest.skip("expected failure for strong form on curvilinear grids")
+
     from pytools.convergence import EOCRecorder
     eoc_rec = EOCRecorder()
 
@@ -590,7 +751,12 @@ def test_convergence_advec(ctx_factory, mesh_name, mesh_pars, op_type, flux_type
                 dt_factor = 2
             else:
                 raise ValueError("dt_factor not known for %dd" % dim)
+        elif mesh_name == "warped":
+            dim = 2
+            from meshmode.mesh.generation import generate_warped_rect_mesh
+            mesh = generate_warped_rect_mesh(dim, order, mesh_par)
 
+            dt_factor = 1 if dim == 2 else 2
         else:
             raise ValueError("invalid mesh name: " + mesh_name)
 
@@ -665,7 +831,11 @@ def test_convergence_advec(ctx_factory, mesh_name, mesh_pars, op_type, flux_type
         abscissa_label="h",
         error_label="L2 Error"))
 
-    assert eoc_rec.order_estimate() > order
+    if mesh_name == "warped":
+        # NOTE: warped grids are hard.. be more forgiving
+        assert eoc_rec.order_estimate() > order - 0.25
+    else:
+        assert eoc_rec.order_estimate() > order
 
 # }}}