diff --git a/examples/dagrt-fusion.py b/examples/dagrt-fusion.py
index d4b977b129de7ef7019d4b237023c73095d91406..a96923dbb792ed9874595a3d84177a4f4d429635 100755
--- a/examples/dagrt-fusion.py
+++ b/examples/dagrt-fusion.py
@@ -651,6 +651,7 @@ class ExecutionMapperWithMemOpCounting(ExecutionMapperWrapper):
                         # TODO: Not comprehensive.
                         sym_op.InterpolationOperator,
                         sym_op.RefFaceMassOperator,
+                        sym_op.RefMassOperator,
                         sym_op.RefInverseMassOperator,
                         sym_op.OppositeInteriorFaceSwap)):
                 val = self.map_profiled_essentially_elementwise_linear(
diff --git a/grudge/execution.py b/grudge/execution.py
index a95b15e75e43917688f2c3766651e3018ff9d526..b3faae20d4cc6862f81f3d2c2071ad1e53fa8a8b 100644
--- a/grudge/execution.py
+++ b/grudge/execution.py
@@ -63,7 +63,7 @@ class ExecutionMapper(mappers.Evaluator,
         discr = self.discrwb.discr_from_dd(expr.dd)
 
         result = discr.empty(self.queue, allocator=self.bound_op.allocator)
-        result.fill(1)
+        result.fill(1.0)
         return result
 
     def map_node_coordinate_component(self, expr):
@@ -626,7 +626,9 @@ def process_sym_operator(discrwb, sym_operator, post_bind_mapper=None,
             )(sym_operator)
 
     dumper("before-global-to-reference", sym_operator)
-    sym_operator = mappers.GlobalToReferenceMapper(discrwb.ambient_dim)(sym_operator)
+    sym_operator = mappers.GlobalToReferenceMapper(
+            discrwb.ambient_dim,
+            dim=discrwb.dim)(sym_operator)
 
     dumper("before-distributed", sym_operator)
 
diff --git a/grudge/symbolic/compiler.py b/grudge/symbolic/compiler.py
index 51eb5202acaa1d1ffa6c4cbc8b293eef4a25e54e..7b36ee9f73a7fdf0143619a06617789f7a8a7862 100644
--- a/grudge/symbolic/compiler.py
+++ b/grudge/symbolic/compiler.py
@@ -923,7 +923,7 @@ class ToLoopyExpressionMapper(mappers.IdentityMapper):
                     % expr.function)
 
     def map_ones(self, expr):
-        return 1
+        return 1.0
 
     def map_node_coordinate_component(self, expr):
         return self.map_variable_ref_expr(
diff --git a/grudge/symbolic/mappers/__init__.py b/grudge/symbolic/mappers/__init__.py
index 36b57d50aaf333fbd3303fd72f1425f5de085086..eef6cce7346dfaa2b621669b9a9d986e48ed2a87 100644
--- a/grudge/symbolic/mappers/__init__.py
+++ b/grudge/symbolic/mappers/__init__.py
@@ -597,6 +597,9 @@ class GlobalToReferenceMapper(CSECachingMapperMixin, IdentityMapper):
         self.ambient_dim = ambient_dim
         self.dim = dim
 
+        # NOTE: only use WADG on surfaces at the moment
+        self.use_wadg = self.ambient_dim == (self.dim + 1)
+
     map_common_subexpression_uncached = \
             IdentityMapper.map_common_subexpression
 
@@ -605,14 +608,17 @@ class GlobalToReferenceMapper(CSECachingMapperMixin, IdentityMapper):
         # if we encounter non-quadrature operators here, we know they
         # must be nodal.
 
-        if expr.op.dd_in.is_volume():
+        dd_in = expr.op.dd_in
+        dd_out = expr.op.dd_out
+
+        if dd_in.is_volume():
             dim = self.dim
         else:
             dim = self.dim - 1
 
-        jac_in = sym.area_element(self.ambient_dim, dim, dd=expr.op.dd_in)
+        jac_in = sym.area_element(self.ambient_dim, dim, dd=dd_in)
         jac_noquad = sym.area_element(self.ambient_dim, dim,
-                dd=expr.op.dd_in.with_qtag(sym.QTAG_NONE))
+                dd=dd_in.with_qtag(sym.QTAG_NONE))
 
         def rewrite_derivative(ref_class, field,  dd_in, with_jacobian=True):
             jac_tag = sym.area_element(self.ambient_dim, self.dim, dd=dd_in)
@@ -628,21 +634,31 @@ class GlobalToReferenceMapper(CSECachingMapperMixin, IdentityMapper):
                     for rst_axis in range(self.dim))
 
         if isinstance(expr.op, op.MassOperator):
-            return op.RefMassOperator(expr.op.dd_in, expr.op.dd_out)(
+            return op.RefMassOperator(dd_in, dd_out)(
                     jac_in * self.rec(expr.field))
 
         elif isinstance(expr.op, op.InverseMassOperator):
-            return op.RefInverseMassOperator(expr.op.dd_in, expr.op.dd_out)(
-                1/jac_in * self.rec(expr.field))
+            if self.use_wadg:
+                # TODO: this is also required for flat curvilinear elements
+                # based on https://arxiv.org/pdf/1608.03836.pdf
+                return op.RefInverseMassOperator(dd_in, dd_out)(
+                    op.RefMassOperator(dd_in, dd_out)(
+                        1.0/jac_in * op.RefInverseMassOperator(dd_in, dd_out)(
+                            self.rec(expr.field))
+                            )
+                    )
+            else:
+                return op.RefInverseMassOperator(dd_in, dd_out)(
+                        1/jac_in * self.rec(expr.field))
 
         elif isinstance(expr.op, op.FaceMassOperator):
-            jac_in_surf = sym.area_element(self.ambient_dim, self.dim - 1,
-                    dd=expr.op.dd_in)
-            return op.RefFaceMassOperator(expr.op.dd_in, expr.op.dd_out)(
+            jac_in_surf = sym.area_element(
+                    self.ambient_dim, self.dim - 1, dd=dd_in)
+            return op.RefFaceMassOperator(dd_in, dd_out)(
                     jac_in_surf * self.rec(expr.field))
 
         elif isinstance(expr.op, op.StiffnessOperator):
-            return op.RefMassOperator()(
+            return op.RefMassOperator(dd_in=dd_in, dd_out=dd_out)(
                     jac_noquad
                     * self.rec(
                         op.DiffOperator(expr.op.xyz_axis)(expr.field)))
@@ -650,12 +666,12 @@ class GlobalToReferenceMapper(CSECachingMapperMixin, IdentityMapper):
         elif isinstance(expr.op, op.DiffOperator):
             return rewrite_derivative(
                     op.RefDiffOperator,
-                    expr.field, dd_in=expr.op.dd_in, with_jacobian=False)
+                    expr.field, dd_in=dd_in, with_jacobian=False)
 
         elif isinstance(expr.op, op.StiffnessTOperator):
             return rewrite_derivative(
                     op.RefStiffnessTOperator,
-                    expr.field, dd_in=expr.op.dd_in)
+                    expr.field, dd_in=dd_in)
 
         elif isinstance(expr.op, op.MInvSTOperator):
             return self.rec(
@@ -805,6 +821,9 @@ class StringifyMapper(pymbolic.mapper.stringifier.StringifyMapper):
     def map_ones(self, expr, enclosing_prec):
         return "Ones:" + self._format_dd(expr.dd)
 
+    def map_signed_face_ones(self, expr, enclosing_prec):
+        return "SignedOnes:" + self._format_dd(expr.dd)
+
     # {{{ geometry data
 
     def map_node_coordinate_component(self, expr, enclosing_prec):
@@ -1174,7 +1193,7 @@ class ErrorChecker(CSECachingMapperMixin, IdentityMapper):
     def map_operator_binding(self, expr):
         if isinstance(expr.op, op.DiffOperatorBase):
             if (self.mesh is not None
-                    and expr.op.xyz_axis >= self.mesh.dim):
+                    and expr.op.xyz_axis >= self.mesh.ambient_dim):
                 raise ValueError("optemplate tries to differentiate along a "
                         "non-existent axis (e.g. Z in 2D)")
 
diff --git a/test/mesh_data.py b/test/mesh_data.py
new file mode 100644
index 0000000000000000000000000000000000000000..c6ba21eecb712d74e7e6d3f197adfe72ebec3df6
--- /dev/null
+++ b/test/mesh_data.py
@@ -0,0 +1,130 @@
+import six
+import numpy as np
+
+
+class MeshBuilder:
+    order = 4
+    mesh_order = None
+
+    def __init__(self, **kwargs):
+        for k, v in six.iteritems(kwargs):
+            setattr(self, k, v)
+
+        if self.mesh_order is None:
+            self.mesh_order = self.order
+
+    @property
+    def ambient_dim(self):
+        raise NotImplementedError
+
+    @property
+    def resolutions(self):
+        raise NotImplementedError
+
+    def get_mesh(self, resolution, mesh_order):
+        raise NotImplementedError
+
+
+class Curve2DMeshBuilder(MeshBuilder):
+    ambient_dim = 2
+    resolutions = [16, 32, 64, 128]
+
+    def get_mesh(self, resolution, mesh_order):
+        from meshmode.mesh.generation import make_curve_mesh
+        return make_curve_mesh(
+                self.curve_fn,
+                np.linspace(0.0, 1.0, resolution + 1),
+                mesh_order)
+
+
+class EllipseMeshBuilder(Curve2DMeshBuilder):
+    radius = 3.1
+    aspect_ratio = 2.0
+
+    @property
+    def curve_fn(self):
+        from meshmode.mesh.generation import ellipse
+        return lambda t: self.radius * ellipse(self.aspect_ratio, t)
+
+
+class StarfishMeshBuilder(Curve2DMeshBuilder):
+    narms = 5
+    amplitude = 0.25
+
+    @property
+    def curve_fn(self):
+        from meshmode.mesh.generation import NArmedStarfish
+        return NArmedStarfish(self.narms, self.amplitude)
+
+
+class SphereMeshBuilder(MeshBuilder):
+    ambient_dim = 3
+
+    resolutions = [0, 1, 2, 3]
+    radius = 1.0
+
+    def get_mesh(self, resolution, mesh_order):
+        from meshmode.mesh.generation import generate_icosphere
+        return generate_icosphere(self.radius, order=mesh_order,
+                uniform_refinement_rounds=resolution)
+
+
+class SpheroidMeshBuilder(MeshBuilder):
+    ambient_dim = 3
+
+    mesh_order = 3
+    resolutions = [1.0, 0.1, 0.05]
+    # resolutions = [1.0, 0.1, 0.05, 0.025, 0.015]
+
+    @property
+    def radius(self):
+        return 0.25
+
+    @property
+    def diameter(self):
+        return 2.0 * self.radius
+
+    @property
+    def aspect_ratio(self):
+        return 2.0
+
+    def get_mesh(self, resolution, mesh_order):
+        from meshmode.mesh.io import ScriptSource
+        source = ScriptSource("""
+        SetFactory("OpenCASCADE");
+        Sphere(1) = {{0, 0, 0, {r}}};
+        Dilate {{ {{0, 0, 0}}, {{ {r}, {r}, {rr} }} }} {{ Volume{{1}}; }}
+        """.format(r=self.diameter, rr=self.aspect_ratio * self.diameter),
+        "geo"
+        )
+
+        from meshmode.mesh.io import generate_gmsh
+        mesh = generate_gmsh(source, 2, order=mesh_order,
+                other_options=[
+                    "-string",
+                    "Mesh.CharacteristicLengthMax = %g;" % resolution])
+
+        from meshmode.mesh.processing import perform_flips
+        return perform_flips(mesh, np.ones(mesh.nelements))
+
+
+class BoxMeshBuilder(MeshBuilder):
+    ambient_dim = 2
+
+    mesh_order = 1
+    resolutions = [8, 16, 32]
+
+    a = (-0.5, -0.5, -0.5)
+    b = (+0.5, +0.5, +0.5)
+
+    def get_mesh(self, resolution, mesh_order):
+        if not isinstance(resolution, (list, tuple)):
+            resolution = (resolution,) * self.ambient_dim
+
+        from meshmode.mesh.generation import generate_regular_rect_mesh
+        mesh = generate_regular_rect_mesh(
+                a=self.a, b=self.b,
+                n=resolution,
+                order=mesh_order)
+
+        return mesh
diff --git a/test/test_grudge.py b/test/test_grudge.py
index fc20811f9e126dea877fa61d4111b3b336db606f..b41e003cd1e7908001de8fb8cfdb6a4b912f395f 100644
--- a/test/test_grudge.py
+++ b/test/test_grudge.py
@@ -31,19 +31,17 @@ import pyopencl.array
 import pyopencl.clmath
 
 from pytools.obj_array import join_fields, make_obj_array
+from grudge import sym, bind, DGDiscretizationWithBoundaries
 
-import pytest  # noqa
-
+import pytest
 from pyopencl.tools import (  # noqa
-        pytest_generate_tests_for_pyopencl as pytest_generate_tests)
+        pytest_generate_tests_for_pyopencl
+        as pytest_generate_tests)
 
 import logging
 
 logger = logging.getLogger(__name__)
-logging.basicConfig()
-logger.setLevel(logging.INFO)
-
-from grudge import sym, bind, DGDiscretizationWithBoundaries
+logging.basicConfig(level=logging.INFO)
 
 
 @pytest.mark.parametrize("dim", [2, 3])
@@ -157,6 +155,166 @@ def test_mass_mat_trig(ctx_factory, ambient_dim, quad_tag):
         assert err_3 < 5.0e-10, err_3
 
 
+def _ellipse_surface_area(radius, aspect_ratio):
+    # https://docs.scipy.org/doc/scipy/reference/generated/scipy.special.ellipe.html
+    eccentricity = 1.0 - (1/aspect_ratio)**2
+
+    if abs(aspect_ratio - 2.0) < 1.0e-14:
+        # NOTE: hardcoded value so we don't need scipy for the test
+        ellip_e = 1.2110560275684594
+    else:
+        from scipy.special import ellipe
+        ellip_e = ellipe(eccentricity)
+
+    return 4.0 * radius * ellip_e
+
+
+def _spheroid_surface_area(radius, aspect_ratio):
+    # https://en.wikipedia.org/wiki/Ellipsoid#Surface_area
+    a = 1.0
+    c = aspect_ratio
+
+    if a < c:
+        e = np.sqrt(1.0 - (a/c)**2)
+        return 2.0 * np.pi * radius**2 * (1.0 + (c/a) / e * np.arcsin(e))
+    else:
+        e = np.sqrt(1.0 - (c/a)**2)
+        return 2.0 * np.pi * radius**2 * (1 + (c/a)**2 / e * np.arctanh(e))
+
+
+@pytest.mark.parametrize("name", [
+    "2-1-ellipse", "spheroid", "box2d", "box3d"
+    ])
+def test_mass_surface_area(ctx_factory, name):
+    cl_ctx = cl.create_some_context()
+    queue = cl.CommandQueue(cl_ctx)
+
+    # {{{ cases
+
+    if name == "2-1-ellipse":
+        from mesh_data import EllipseMeshBuilder
+        builder = EllipseMeshBuilder(radius=3.1, aspect_ratio=2.0)
+        surface_area = _ellipse_surface_area(builder.radius, builder.aspect_ratio)
+    elif name == "spheroid":
+        from mesh_data import SpheroidMeshBuilder
+        builder = SpheroidMeshBuilder()
+        surface_area = _spheroid_surface_area(builder.radius, builder.aspect_ratio)
+    elif name == "box2d":
+        from mesh_data import BoxMeshBuilder
+        builder = BoxMeshBuilder(ambient_dim=2)
+        surface_area = 1.0
+    elif name == "box3d":
+        from mesh_data import BoxMeshBuilder
+        builder = BoxMeshBuilder(ambient_dim=3)
+        surface_area = 1.0
+    else:
+        raise ValueError("unknown geometry name: %s" % name)
+
+    # }}}
+
+    # {{{ convergence
+
+    from pytools.convergence import EOCRecorder
+    eoc = EOCRecorder()
+
+    for resolution in builder.resolutions:
+        mesh = builder.get_mesh(resolution, builder.mesh_order)
+        discr = DGDiscretizationWithBoundaries(cl_ctx, mesh, order=builder.order)
+        volume_discr = discr.discr_from_dd(sym.DD_VOLUME)
+
+        x = discr.discr_from_dd("vol").nodes().get(queue)
+        logger.info("nnodes:    %d", volume_discr.nnodes)
+        logger.info("nelements: %d", volume_discr.mesh.nelements)
+        logger.info("bbox:      %s",
+                [(np.min(x[n]), np.max(x[n])) for n in range(x.shape[0])])
+
+        # {{{ compute surface area
+
+        dd = sym.DD_VOLUME
+        sym_op = sym.NodalSum(dd)(sym.MassOperator(dd, dd)(sym.Ones(dd)))
+        approx_surface_area = bind(discr, sym_op)(queue)
+
+        logger.info("surface: got {:.5e} / expected {:.5e}".format(
+            approx_surface_area, surface_area))
+        area_error = abs(approx_surface_area - surface_area) / abs(surface_area)
+
+        # }}}
+
+        h_max = bind(discr, sym.h_max_from_volume(discr.ambient_dim, dd=dd))(queue)
+        eoc.add_data_point(h_max, area_error)
+
+    # }}}
+
+    logger.info("surface area error\n%s", str(eoc))
+
+    assert eoc.max_error() < 1.0e-14 \
+            or eoc.order_estimate() >= (builder.order - 1.0)
+
+
+@pytest.mark.parametrize("name", ["2-1-ellipse", "spheroid"])
+def test_surface_mass_operator_inverse(ctx_factory, name):
+    cl_ctx = cl.create_some_context()
+    queue = cl.CommandQueue(cl_ctx)
+
+    # {{{ cases
+
+    if name == "2-1-ellipse":
+        from mesh_data import EllipseMeshBuilder
+        builder = EllipseMeshBuilder(radius=3.1, aspect_ratio=2.0)
+        surface_area = _ellipse_surface_area(builder.radius, builder.aspect_ratio)
+    elif name == "spheroid":
+        from mesh_data import SpheroidMeshBuilder
+        builder = SpheroidMeshBuilder()
+        surface_area = _spheroid_surface_area(builder.radius, builder.aspect_ratio)
+    else:
+        raise ValueError("unknown geometry name: %s" % name)
+
+    # }}}
+
+    # {{{ convergence
+
+    from pytools.convergence import EOCRecorder
+    eoc = EOCRecorder()
+
+    for resolution in builder.resolutions:
+        mesh = builder.get_mesh(resolution, builder.mesh_order)
+        discr = DGDiscretizationWithBoundaries(cl_ctx, mesh, order=builder.order)
+        volume_discr = discr.discr_from_dd(sym.DD_VOLUME)
+
+        x = discr.discr_from_dd("vol").nodes().get(queue)
+        logger.info("nnodes:    %d", volume_discr.nnodes)
+        logger.info("nelements: %d", volume_discr.mesh.nelements)
+        logger.info("bbox:      %s",
+                [(np.min(x[n]), np.max(x[n])) for n in range(x.shape[0])])
+
+        # {{{ compute inverse mass
+
+        dd = sym.DD_VOLUME
+        sym_f = sym.cos(4.0 * sym.nodes(mesh.ambient_dim, dd)[0])
+        sym_op = sym.InverseMassOperator(dd, dd)(
+                sym.MassOperator(dd, dd)(sym.var("f")))
+
+        f = bind(discr, sym_f)(queue)
+        f_inv = bind(discr, sym_op)(queue, f=f)
+
+        logger.info("inverse: got {:.5e} / expected {:.5e}".format(
+            cl.array.max(f - f_inv).get(queue), 1.0))
+        inv_error = la.norm(f.get(queue) - f_inv.get(queue)) \
+                / la.norm(f.get(queue))
+
+        # }}}
+
+        h_max = bind(discr, sym.h_max_from_volume(discr.ambient_dim, dd=dd))(queue)
+        eoc.add_data_point(h_max, inv_error)
+
+    # }}}
+
+    logger.info("inverse mass error\n%s", str(eoc))
+
+    assert eoc.max_error() < 5.0e-09 \
+            or eoc.order_estimate() >= (builder.order - 1.0)
+
+
 @pytest.mark.parametrize("dim", [1, 2, 3])
 def test_tri_diff_mat(ctx_factory, dim, order=4):
     """Check differentiation matrix along the coordinate axes on a disk
@@ -656,7 +814,6 @@ if __name__ == "__main__":
     if len(sys.argv) > 1:
         exec(sys.argv[1])
     else:
-        from pytest import main
-        main([__file__])
+        pytest.main([__file__])
 
 # vim: fdm=marker