diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index eb3a2e4aff06ef799b5a2018685b07e5d149646f..1d877ddbb97c6feec650d7b6648474017eb2e837 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -9,6 +9,8 @@ Python 2.7 POCL:
   - python2.7
   - pocl
   - mpi
+  # https://github.com/pocl/pocl/issues/757
+  allow_failure: true
   except:
   - tags
   artifacts:
@@ -26,6 +28,26 @@ Python 3 POCL:
   - python3
   - pocl
   - mpi
+  # https://github.com/pocl/pocl/issues/757
+  allow_failure: true
+  except:
+  - tags
+  artifacts:
+    reports:
+      junit: test/pytest.xml
+
+Python 3 Intel:
+  script:
+  - export PY_EXE=python3
+  - export EXTRA_INSTALL="pybind11 numpy mako"
+  - source /opt/enable-intel-cl.sh
+  - export PYOPENCL_TEST="intel(r):pu"
+  - curl -L -O -k https://gitlab.tiker.net/inducer/ci-support/raw/master/build-and-test-py-project.sh
+  - ". ./build-and-test-py-project.sh"
+  tags:
+  - python3
+  - pocl
+  - mpi
   except:
   - tags
   artifacts:
@@ -44,6 +66,8 @@ Python 2.7 POCL MPI:
   - python2.7
   - pocl
   - mpi
+  # https://github.com/pocl/pocl/issues/757
+  allow_failure: true
   except:
   - tags
   artifacts:
@@ -62,6 +86,27 @@ Python 3 POCL MPI:
   - python3
   - pocl
   - mpi
+  # https://github.com/pocl/pocl/issues/757
+  allow_failure: true
+  except:
+  - tags
+  artifacts:
+    reports:
+      junit: test/pytest.xml
+
+Python 3 Intel MPI:
+  script:
+  - export PY_EXE=python3
+  - source /opt/enable-intel-cl.sh
+  - export PYOPENCL_TEST="intel(r):pu"
+  - export EXTRA_INSTALL="pybind11 numpy mako mpi4py pymetis"
+  - export PYTEST_ADDOPTS="-k mpi"
+  - curl -L -O -k https://gitlab.tiker.net/inducer/ci-support/raw/master/build-and-test-py-project.sh
+  - ". ./build-and-test-py-project.sh"
+  tags:
+  - python3
+  - pocl
+  - mpi
   except:
   - tags
   artifacts:
@@ -79,6 +124,23 @@ Python 3 POCL Examples:
   - python3
   - pocl
   - large-node
+  # https://github.com/pocl/pocl/issues/757
+  allow_failure: true
+  except:
+  - tags
+
+Python 3 Intel Examples:
+  script:
+  - export PY_EXE=python3
+  - source /opt/enable-intel-cl.sh
+  - export PYOPENCL_TEST="intel(r):pu"
+  - export EXTRA_INSTALL="pybind11 numpy mako mpi4py pyvisfile pymetis"
+  - curl -L -O -k https://gitlab.tiker.net/inducer/ci-support/raw/master/build-py-project-and-run-examples.sh
+  - ". ./build-py-project-and-run-examples.sh"
+  tags:
+  - python3
+  - pocl
+  - large-node
   except:
   - tags
 
diff --git a/doc/conf.py b/doc/conf.py
index 3becabe61a7206322542e48a615be62742af0dae..560efec5951aa5de9a4b732e304a121c0c0c8baa 100644
--- a/doc/conf.py
+++ b/doc/conf.py
@@ -320,11 +320,11 @@ texinfo_documents = [
 # Example configuration for intersphinx: refer to the Python standard library.
 intersphinx_mapping = {
     'http://docs.python.org/': None,
-    'http://documen.tician.de/boxtree/': None,
     'http://docs.scipy.org/doc/numpy/': None,
-    'http://documen.tician.de/modepy/': None,
     'http://documen.tician.de/pyopencl/': None,
+    'http://documen.tician.de/modepy/': None,
     'http://documen.tician.de/pymbolic/': None,
+    'http://documen.tician.de/meshmode/': None,
     #'http://documen.tician.de/loopy/': None,
     }
 autoclass_content = "both"
diff --git a/doc/index.rst b/doc/index.rst
index a20ec5093527240204a0da00b19b4188f08317c2..b996c470ffc998618b118e815b05b1c42ff7e9be 100644
--- a/doc/index.rst
+++ b/doc/index.rst
@@ -1,9 +1,4 @@
-.. grudge documentation master file, created by
-   sphinx-quickstart on Sun Sep 27 13:08:30 2015.
-   You can adapt this file completely to your liking, but it should at least
-   contain the root `toctree` directive.
-
-Welcome to grudge's documentation!
+Welcome to grudge's Documentation!
 ==================================
 
 Contents:
@@ -12,10 +7,10 @@ Contents:
     :maxdepth: 2
 
     misc
+    symbolic
 
 
-
-Indices and tables
+Indices and Tables
 ==================
 
 * :ref:`genindex`
diff --git a/doc/symbolic.rst b/doc/symbolic.rst
new file mode 100644
index 0000000000000000000000000000000000000000..632b1f861beaa80c3f3c26e93b08272ec4a31259
--- /dev/null
+++ b/doc/symbolic.rst
@@ -0,0 +1,14 @@
+Symbolic Operator Representation
+================================
+
+Based on :mod:`pymbolic`.
+
+Basic Objects
+-------------
+
+.. automodule:: grudge.symbolic.primitives
+
+Operators
+---------
+
+.. automodule:: grudge.symbolic.operators
diff --git a/examples/advection/weak.py b/examples/advection/weak.py
index 9b880ba8952761514c91743e5078794b346b7c4b..6e30094d080033f225a9dbc45227e0d0c7665ad0 100644
--- a/examples/advection/weak.py
+++ b/examples/advection/weak.py
@@ -13,88 +13,135 @@
 # You should have received a copy of the GNU General Public License
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 
-
 import numpy as np
-import pyopencl as cl  # noqa
-import pyopencl.array  # noqa
-import pyopencl.clmath  # noqa
+import numpy.linalg as la
 
-import pytest  # noqa
+import pyopencl as cl
+import pyopencl.array
+import pyopencl.clmath
 
-from pyopencl.tools import (  # noqa
-                pytest_generate_tests_for_pyopencl as pytest_generate_tests)
+from grudge import bind, sym
 
 import logging
+
 logger = logging.getLogger(__name__)
+logging.basicConfig(level=logging.INFO)
 
-from grudge import sym, bind, DGDiscretizationWithBoundaries
 
-import numpy.linalg as la
+def main(ctx_factory, dim=2, order=4, visualize=True):
+    cl_ctx = ctx_factory()
+    queue = cl.CommandQueue(cl_ctx)
 
+    # {{{ parameters
+
+    # domain side [-d/2, d/2]^dim
+    d = 1.0
+    # number of points in each dimension
+    npoints = 20
+    # grid spacing
+    h = d / npoints
+    # cfl?
+    dt_factor = 2.0
+    # final time
+    final_time = 1.0
+
+    # velocity field
+    c = np.array([0.5] * dim)
+    norm_c = la.norm(c)
+    # flux
+    flux_type = "central"
 
-def main(write_output=True, order=4):
-    cl_ctx = cl.create_some_context()
-    queue = cl.CommandQueue(cl_ctx)
+    # compute number of steps
+    dt = dt_factor * h / order**2
+    nsteps = int(final_time // dt) + 1
+    dt = final_time/nsteps + 1.0e-15
 
-    dim = 2
+    # }}}
 
-    from meshmode.mesh.generation import generate_regular_rect_mesh
-    mesh = generate_regular_rect_mesh(a=(-0.5, -0.5), b=(0.5, 0.5),
-            n=(20, 20), order=order)
+    # {{{ discretization
 
-    dt_factor = 4
-    h = 1/20
+    from meshmode.mesh.generation import generate_box_mesh
+    mesh = generate_box_mesh(
+            [np.linspace(-d/2, d/2, npoints) for _ in range(dim)],
+            order=order)
 
+    from grudge import DGDiscretizationWithBoundaries
     discr = DGDiscretizationWithBoundaries(cl_ctx, mesh, order=order)
+    volume_discr = discr.discr_from_dd(sym.DD_VOLUME)
 
-    c = np.array([0.1, 0.1])
-    norm_c = la.norm(c)
+    # }}}
 
-    flux_type = "central"
+    # {{{ solve advection
 
     def f(x):
-        return sym.sin(3*x)
+        return sym.sin(3 * x)
 
-    def u_analytic(x):
-        return f(-np.dot(c, x)/norm_c+sym.var("t", sym.DD_SCALAR)*norm_c)
+    def u_analytic(x, t=None):
+        if t is None:
+            t = sym.var("t", sym.DD_SCALAR)
+        return f(-np.dot(c, x) / norm_c + t * norm_c)
 
     from grudge.models.advection import WeakAdvectionOperator
-
-    discr = DGDiscretizationWithBoundaries(cl_ctx, mesh, order=order)
     op = WeakAdvectionOperator(c,
         inflow_u=u_analytic(sym.nodes(dim, sym.BTAG_ALL)),
         flux_type=flux_type)
 
     bound_op = bind(discr, op.sym_operator())
-
     u = bind(discr, u_analytic(sym.nodes(dim)))(queue, t=0)
 
     def rhs(t, u):
         return bound_op(queue, t=t, u=u)
 
-    final_time = 0.3
-
-    dt = dt_factor * h/order**2
-    nsteps = (final_time // dt) + 1
-    dt = final_time/nsteps + 1e-15
-
     from grudge.shortcuts import set_up_rk4
     dt_stepper = set_up_rk4("u", dt, u, rhs)
 
-    from grudge.shortcuts import make_visualizer
-    vis = make_visualizer(discr, vis_order=order)
+    if dim == 1:
+        import matplotlib.pyplot as pt
+        pt.figure(figsize=(8, 8), dpi=300)
+
+        volume_x = volume_discr.nodes().get(queue)
+    else:
+        from grudge.shortcuts import make_visualizer
+        vis = make_visualizer(discr, vis_order=order)
+
+    def plot_solution(evt):
+        if not visualize:
+            return
+
+        if dim == 1:
+            u = event.state_component.get(queue)
+            u_ = bind(discr, u_analytic(sym.nodes(dim)))(queue, t=evt.t).get(queue)
+
+            pt.plot(volume_x[0], u, 'o')
+            pt.plot(volume_x[0], u_, "k.")
+            pt.xlabel("$x$")
+            pt.ylabel("$u$")
+            pt.title("t = {:.2f}".format(evt.t))
+            pt.savefig("fld-weak-%04d.png" % step)
+            pt.clf()
+        else:
+            vis.write_vtk_file("fld-weak-%04d.vtu" % step, [
+                ("u", evt.state_component)
+                ], overwrite=True)
 
     step = 0
-
+    norm = bind(discr, sym.norm(2, sym.var("u")))
     for event in dt_stepper.run(t_end=final_time):
-        if isinstance(event, dt_stepper.StateComputed):
-
-            step += 1
+        if not isinstance(event, dt_stepper.StateComputed):
+            continue
 
-            #print(step, event.t, norm(queue, u=event.state_component[0]))
-            vis.write_vtk_file("fld-weak-%04d.vtu" % step,
-                    [("u", event.state_component)])
+        step += 1
+        norm_u = norm(queue, u=event.state_component)
+        logger.info("[%04d] t = %.5f |u| = %.5e", step, event.t, norm_u)
+        plot_solution(event)
 
 
 if __name__ == "__main__":
-    main()
+    import argparse
+
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--dim", default=2, type=int)
+    args = parser.parse_args()
+
+    main(cl.create_some_context,
+            dim=args.dim)
diff --git a/examples/dagrt-fusion.py b/examples/dagrt-fusion.py
index 27e35abd82161e528481344eb28a6f0f81c22839..d4b977b129de7ef7019d4b237023c73095d91406 100755
--- a/examples/dagrt-fusion.py
+++ b/examples/dagrt-fusion.py
@@ -50,6 +50,7 @@ from pymbolic.mapper import Mapper
 from pymbolic.mapper.evaluator import EvaluationMapper \
         as PymbolicEvaluationMapper
 from pytools import memoize
+from pytools.obj_array import join_fields
 
 from grudge import sym, bind, DGDiscretizationWithBoundaries
 from leap.rk import LSRK4MethodBuilder
@@ -159,7 +160,7 @@ def transcribe_phase(dag, field_var_name, field_components, phase_name,
             "<dt>": sym.var("input_dt", sym.DD_SCALAR),
             f"<state>{field_var_name}": sym.make_sym_array(
                 f"input_{field_var_name}", field_components),
-            f"<p>residual": sym.make_sym_array(
+            "<p>residual": sym.make_sym_array(
                 "input_residual", field_components),
     }
 
@@ -245,7 +246,6 @@ class RK4TimeStepperBase(object):
         self.component_getter = component_getter
 
     def get_initial_context(self, fields, t_start, dt):
-        from pytools.obj_array import join_fields
 
         # Flatten fields.
         flattened_fields = []
@@ -286,7 +286,6 @@ class RK4TimeStepperBase(object):
         assert len(output_states) == num_fields
         assert len(output_states) == len(output_residuals)
 
-        from pytools.obj_array import join_fields
         flattened_results = join_fields(output_t, output_dt, *output_states)
 
         self.bound_op = bind(
@@ -354,8 +353,6 @@ class RK4TimeStepper(RK4TimeStepperBase):
                     + tuple(
                         Variable(field_var_name, dd=sym.DD_VOLUME)[i]
                         for i in range(num_fields)))))
-
-        from pytools.obj_array import join_fields
         sym_rhs = join_fields(*(call[i] for i in range(num_fields)))
 
         self.queue = queue
@@ -377,7 +374,6 @@ class RK4TimeStepper(RK4TimeStepperBase):
         self.component_getter = component_getter
 
     def _bound_op(self, queue, t, *args, profile_data=None):
-        from pytools.obj_array import join_fields
         context = {
                 "t": t,
                 self.field_var_name: join_fields(*args)}
@@ -452,16 +448,15 @@ def dg_flux(c, tpair):
     dims = len(v)
 
     normal = sym.normal(tpair.dd, dims)
-
-    flux_weak = sym.join_fields(
+    flux_weak = join_fields(
             np.dot(v.avg, normal),
             u.avg * normal)
 
-    flux_weak -= (1 if c > 0 else -1)*sym.join_fields(
+    flux_weak -= (1 if c > 0 else -1)*join_fields(
             0.5*(u.int-u.ext),
             0.5*(normal * np.dot(normal, v.int-v.ext)))
 
-    flux_strong = sym.join_fields(
+    flux_strong = join_fields(
             np.dot(v.int, normal),
             u.int * normal) - flux_weak
 
@@ -507,13 +502,13 @@ def get_strong_wave_op_with_discr_direct(cl_ctx, dims=2, order=4):
     rad_u = sym.cse(sym.interp("vol", BTAG_ALL)(u))
     rad_v = sym.cse(sym.interp("vol", BTAG_ALL)(v))
 
-    rad_bc = sym.cse(sym.join_fields(
+    rad_bc = sym.cse(join_fields(
             0.5*(rad_u - sign*np.dot(rad_normal, rad_v)),
             0.5*rad_normal*(np.dot(rad_normal, rad_v) - sign*rad_u)
             ), "rad_bc")
 
     sym_operator = (
-            - sym.join_fields(
+            - join_fields(
                 -c*np.dot(sym.nabla(dims), v) - source_f,
                 -c*(sym.nabla(dims)*u)
                 )
@@ -550,7 +545,6 @@ def test_stepper_equivalence(ctx_factory, order=4):
     elif dims == 3:
         dt = 0.02
 
-    from pytools.obj_array import join_fields
     ic = join_fields(discr.zeros(queue),
             [discr.zeros(queue) for i in range(discr.dim)])
 
@@ -804,7 +798,6 @@ def test_stepper_mem_ops(ctx_factory, use_fusion):
     dt = 0.04
     t_end = 0.2
 
-    from pytools.obj_array import join_fields
     ic = join_fields(discr.zeros(queue),
             [discr.zeros(queue) for i in range(discr.dim)])
 
@@ -975,7 +968,6 @@ def test_stepper_timing(ctx_factory, use_fusion):
     dt = 0.04
     t_end = 0.1
 
-    from pytools.obj_array import join_fields
     ic = join_fields(discr.zeros(queue),
             [discr.zeros(queue) for i in range(discr.dim)])
 
@@ -1040,7 +1032,6 @@ def get_example_stepper(queue, dims=2, order=3, use_fusion=True,
                 exec_mapper_factory=exec_mapper_factory)
 
     if return_ic:
-        from pytools.obj_array import join_fields
         ic = join_fields(discr.zeros(queue),
                 [discr.zeros(queue) for i in range(discr.dim)])
         return stepper, ic
diff --git a/grudge/__init__.py b/grudge/__init__.py
index b854007a48c227d58541a13a972a3a7e25c4e062..17358dd474304bec731bedb58a98cd692ec69935 100644
--- a/grudge/__init__.py
+++ b/grudge/__init__.py
@@ -22,6 +22,11 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 THE SOFTWARE.
 """
 
-import grudge.sym  # noqa
-from grudge.execution import bind  # noqa
-from grudge.discretization import DGDiscretizationWithBoundaries  # noqa
+import grudge.symbolic as sym
+from grudge.execution import bind
+
+from grudge.discretization import DGDiscretizationWithBoundaries
+
+__all__ = [
+    "sym", "bind", "DGDiscretizationWithBoundaries"
+]
diff --git a/grudge/execution.py b/grudge/execution.py
index 948be131205034a2d0ddb597a11d2c8ad8f42327..761b14729c330e5d1ebcdaf203dea1c437259a4f 100644
--- a/grudge/execution.py
+++ b/grudge/execution.py
@@ -132,7 +132,7 @@ class ExecutionMapper(mappers.Evaluator,
 
         if isinstance(else_,  pyopencl.array.Array):
             sym_else = var("b")[i]
-        elif isinstance(then,  np.number):
+        elif isinstance(else_,  np.number):
             sym_else = var("b")
         else:
             raise TypeError(
@@ -174,6 +174,7 @@ class ExecutionMapper(mappers.Evaluator,
                 default_offset=lp.auto, name="diff")
 
             knl = lp.split_iname(knl, "i", 16, inner_tag="l.0")
+            knl = lp.tag_array_axes(knl, "mat", "stride:auto,stride:auto")
             return lp.tag_inames(knl, dict(k="g.0"))
 
         in_discr = self.discrwb.discr_from_dd(op.dd_in)
@@ -283,6 +284,29 @@ class ExecutionMapper(mappers.Evaluator,
 
     # }}}
 
+    def map_signed_face_ones(self, expr):
+        assert expr.dd.is_trace()
+        face_discr = self.discrwb.discr_from_dd(expr.dd)
+        assert face_discr.dim == 0
+
+        # NOTE: ignore quadrature_tags on expr.dd, since we only care about
+        # the face_id here
+        all_faces_conn = self.discrwb.connection_from_dds(
+                sym.DD_VOLUME,
+                sym.DOFDesc(expr.dd.domain_tag))
+
+        field = face_discr.empty(self.queue,
+                dtype=self.discrwb.real_dtype,
+                allocator=self.bound_op.allocator)
+        field.fill(1)
+
+        for grp in all_faces_conn.groups:
+            for batch in grp.batches:
+                i = batch.to_element_indices.with_queue(self.queue)
+                field[i] = (2.0 * (batch.to_element_face % 2) - 1.0) * field[i]
+
+        return field
+
     # }}}
 
     # {{{ instruction execution functions
diff --git a/grudge/function_registry.py b/grudge/function_registry.py
index 111443c41a7c12f155222bb06bb0ac636c0bde87..4d521560d8e6c83a8e9fa238b3c46955dc62e7a7 100644
--- a/grudge/function_registry.py
+++ b/grudge/function_registry.py
@@ -33,6 +33,24 @@ from loopy.version import LOOPY_USE_LANGUAGE_VERSION_2018_1  # noqa
 from pytools import RecordWithoutPickling, memoize_in
 
 
+# {{{ helpers
+
+def should_use_numpy(arg):
+    from numbers import Number
+    if isinstance(arg, Number) or \
+            isinstance(arg, np.ndarray) and arg.shape == ():
+        return True
+    return False
+
+
+def cl_to_numpy_function_name(name):
+    return {
+        "atan2": "arctan2",
+        }.get(name, name)
+
+# }}}
+
+
 # {{{ function
 
 class FunctionNotFound(KeyError):
@@ -74,12 +92,7 @@ class CElementwiseUnaryFunction(Function):
 
     def __call__(self, queue, arg):
         func_name = self.identifier
-
-        from numbers import Number
-        if (
-                isinstance(arg, Number)
-                or (isinstance(arg, np.ndarray)
-                    and arg.shape == ())):
+        if should_use_numpy(arg):
             func = getattr(np, func_name)
             return func(arg)
 
@@ -108,6 +121,39 @@ class CElementwiseUnaryFunction(Function):
         return out
 
 
+class CElementwiseBinaryFunction(Function):
+    supports_codegen = True
+
+    def get_result_dofdesc(self, arg_dds):
+        assert len(arg_dds) == 2
+        from pytools import single_valued
+        return single_valued(arg_dds)
+
+    def __call__(self, queue, arg0, arg1):
+        func_name = self.identifier
+        if should_use_numpy(arg0) and should_use_numpy(arg1):
+            func = getattr(np, cl_to_numpy_function_name(func_name))
+            return func(arg0, arg1)
+
+        from pymbolic.primitives import Variable
+
+        @memoize_in(self, "map_call_knl_%s" % func_name)
+        def knl():
+            i = Variable("i")
+            knl = lp.make_kernel(
+                "{[i]: 0<=i<n}",
+                [
+                    lp.Assignment(
+                        Variable("out")[i],
+                        Variable(func_name)(Variable("a")[i], Variable("b")[i]))
+                ], default_offset=lp.auto)
+
+            return lp.split_iname(knl, "i", 128, outer_tag="g.0", inner_tag="l.0")
+
+        evt, (out,) = knl()(queue, a=arg0, b=arg1)
+        return out
+
+
 class CBesselFunction(Function):
 
     supports_codegen = True
@@ -179,6 +225,7 @@ def _make_bfr():
     bfr = bfr.register(CElementwiseUnaryFunction("fabs"))
     bfr = bfr.register(CElementwiseUnaryFunction("sin"))
     bfr = bfr.register(CElementwiseUnaryFunction("cos"))
+    bfr = bfr.register(CElementwiseBinaryFunction("atan2"))
     bfr = bfr.register(CBesselFunction("bessel_j"))
     bfr = bfr.register(CBesselFunction("bessel_y"))
 
diff --git a/grudge/models/advection.py b/grudge/models/advection.py
index 091b9870c4b82a3fde2fc21b84bf92e5ec62ab17..c3b142e411cd5fd7b34b17bce69e529ff565d316 100644
--- a/grudge/models/advection.py
+++ b/grudge/models/advection.py
@@ -74,7 +74,7 @@ class AdvectionOperatorBase(HyperbolicOperator):
 
 class StrongAdvectionOperator(AdvectionOperatorBase):
     def flux(self, u):
-        normal = sym.normal(u. dd, self.ambient_dim)
+        normal = sym.normal(u.dd, self.ambient_dim)
         v_dot_normal = sym.cse(self.v.dot(normal), "v_dot_normal")
 
         return u.int * v_dot_normal - self.weak_flux(u)
@@ -142,7 +142,7 @@ class VariableCoefficientAdvectionOperator(HyperbolicOperator):
 
     def flux(self, u):
 
-        normal = sym.normal(u. dd, self.ambient_dim)
+        normal = sym.normal(u.dd, self.ambient_dim)
 
         surf_v = sym.interp("vol", u.dd)(self.v)
 
diff --git a/grudge/models/em.py b/grudge/models/em.py
index 3422c3be14f8ec10d872d46adc29caaa155131dd..225b3b4a411beae96359df0548ff5366f00ea675 100644
--- a/grudge/models/em.py
+++ b/grudge/models/em.py
@@ -437,7 +437,7 @@ def get_rectangular_cavity_mode(E_0, mode_indices):  # noqa: N803
     if dims == 2:
         tfac = sym.ScalarVariable("t") * omega
 
-        result = sym.join_fields(
+        result = join_fields(
             0,
             0,
             sym.sin(kx * x) * sym.sin(ky * y) * sym.cos(tfac),  # ez
@@ -449,7 +449,7 @@ def get_rectangular_cavity_mode(E_0, mode_indices):  # noqa: N803
         tdep = sym.exp(-1j * omega * sym.ScalarVariable("t"))
 
         gamma_squared = ky**2 + kx**2
-        result = sym.join_fields(
+        result = join_fields(
             -kx * kz * E_0*cx*sy*sz*tdep / gamma_squared,  # ex
             -ky * kz * E_0*sx*cy*sz*tdep / gamma_squared,  # ey
             E_0 * sx*sy*cz*tdep,  # ez
diff --git a/grudge/sym.py b/grudge/sym.py
deleted file mode 100644
index bf9c7de615c82c11a5a6d91fa9ed872341d0d32c..0000000000000000000000000000000000000000
--- a/grudge/sym.py
+++ /dev/null
@@ -1,28 +0,0 @@
-from __future__ import division, absolute_import
-
-__copyright__ = "Copyright (C) 2015 Andreas Kloeckner"
-
-__license__ = """
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in
-all copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-THE SOFTWARE.
-"""
-
-from grudge.symbolic.primitives import *  # noqa
-from grudge.symbolic.operators import *  # noqa
-
-from grudge.symbolic.tools import pretty  # noqa
diff --git a/grudge/symbolic/__init__.py b/grudge/symbolic/__init__.py
index acd34e3532cba7516fab14dc1a9d2af46ddd649d..59fa5b26b5943a24982db5616632cef7dc8c042f 100644
--- a/grudge/symbolic/__init__.py
+++ b/grudge/symbolic/__init__.py
@@ -23,3 +23,9 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 THE SOFTWARE.
 """
+
+from grudge.symbolic.primitives import *        # noqa: F401,F403
+from grudge.symbolic.operators import *         # noqa: F401,F403
+from grudge.symbolic.tools import pretty        # noqa: F401
+
+from pymbolic.primitives import If, Comparison  # noqa: F401
diff --git a/grudge/symbolic/dofdesc_inference.py b/grudge/symbolic/dofdesc_inference.py
index c44a7940b9ec142171a0b0a67885c041ec9d43cb..38c775127488b79624264439778c334fac29fcd3 100644
--- a/grudge/symbolic/dofdesc_inference.py
+++ b/grudge/symbolic/dofdesc_inference.py
@@ -183,6 +183,7 @@ class DOFDescInferenceMapper(RecursiveMapper, CSECachingMapperMixin):
         return expr.dd
 
     map_node_coordinate_component = map_ones
+    map_signed_face_ones = map_ones
 
     def map_call(self, expr):
         arg_dds = [
diff --git a/grudge/symbolic/mappers/__init__.py b/grudge/symbolic/mappers/__init__.py
index 0e9b8e02d67b6f2b86d217d8c230263281d49a19..b277c4c99e97280e4f82be63cac8bbcbb00bee8e 100644
--- a/grudge/symbolic/mappers/__init__.py
+++ b/grudge/symbolic/mappers/__init__.py
@@ -213,6 +213,7 @@ class IdentityMapperMixin(LocalOpReducerMixin, FluxOpReducerMixin):
 
     map_function_symbol = map_grudge_variable
     map_ones = map_grudge_variable
+    map_signed_face_ones = map_grudge_variable
     map_node_coordinate_component = map_grudge_variable
 
     # }}}
@@ -266,6 +267,7 @@ class DependencyMapper(
         return set()
 
     map_ones = _map_leaf
+    map_signed_face_ones = _map_leaf
     map_node_coordinate_component = _map_leaf
 
 
@@ -1235,6 +1237,7 @@ class CollectorMixin(OperatorReducerMixin, LocalOpReducerMixin, FluxOpReducerMix
     map_function_symbol = map_constant
 
     map_ones = map_grudge_variable
+    map_signed_face_ones = map_grudge_variable
     map_node_coordinate_component = map_grudge_variable
 
     map_operator = map_grudge_variable
diff --git a/grudge/symbolic/operators.py b/grudge/symbolic/operators.py
index a1899ed21ef6431f7b57a81fcf143680433b7b53..56a26feef896bc9599f8358a8a5dc0d792456825 100644
--- a/grudge/symbolic/operators.py
+++ b/grudge/symbolic/operators.py
@@ -1,5 +1,3 @@
-"""Building blocks and mappers for operator expression trees."""
-
 from __future__ import division, absolute_import
 
 __copyright__ = "Copyright (C) 2008-2017 Andreas Kloeckner, Bogdan Enache"
@@ -27,16 +25,63 @@ THE SOFTWARE.
 from six.moves import intern
 
 import numpy as np
-import numpy.linalg as la  # noqa
 import pymbolic.primitives
 
+__doc__ = """
+
+Building blocks and mappers for operator expression trees.
+
+Basic Operators
+^^^^^^^^^^^^^^^
+
+.. autoclass:: Operator
+.. autoclass:: ElementwiseLinearOperator
+.. autoclass:: InterpolationOperator
+
+.. data:: interp
+
+Reductions
+^^^^^^^^^^
+
+.. autoclass:: ElementwiseMaxOperator
+
+.. autoclass:: NodalReductionOperator
+.. autoclass:: NodalSum
+.. autoclass:: NodalMax
+.. autoclass:: NodalMin
+
+Differentiation
+^^^^^^^^^^^^^^^
+
+.. autoclass:: StrongFormDiffOperatorBase
+.. autoclass:: WeakFormDiffOperatorBase
+.. autoclass:: StiffnessOperator
+.. autoclass:: DiffOperator
+.. autoclass:: StiffnessTOperator
+.. autoclass:: MInvSTOperator
 
-def _sym():
-    # A hack to make referring to grudge.sym possible without
-    # circular imports and tons of typing.
+.. autoclass:: RefDiffOperator
+.. autoclass:: RefStiffnessTOperator
 
-    from grudge import sym
-    return sym
+.. autofunction:: nabla
+.. autofunction:: minv_stiffness_t
+.. autofunction:: stiffness
+.. autofunction:: stiffness_t
+
+Mass Operators
+^^^^^^^^^^^^^^
+
+.. autoclass:: MassOperatorBase
+
+.. autoclass:: MassOperator
+.. autoclass:: InverseMassOperator
+.. autoclass:: FaceMassOperator
+
+.. autoclass:: RefMassOperator
+.. autoclass:: RefInverseMassOperator
+.. autoclass:: RefFaceMassOperator
+
+"""
 
 
 # {{{ base classes
@@ -45,18 +90,19 @@ class Operator(pymbolic.primitives.Expression):
     """
     .. attribute:: dd_in
 
-        an instance of :class:`grudge.sym.DOFDesc` describing the
+        an instance of :class:`~grudge.symbolic.primitives.DOFDesc` describing the
         input discretization.
 
     .. attribute:: dd_out
 
-        an instance of :class:`grudge.sym.DOFDesc` describing the
+        an instance of :class:`~grudge.symbolic.primitives.DOFDesc` describing the
         output discretization.
     """
 
     def __init__(self, dd_in, dd_out):
-        self.dd_in = _sym().as_dofdesc(dd_in)
-        self.dd_out = _sym().as_dofdesc(dd_out)
+        import grudge.symbolic.primitives as prim
+        self.dd_in = prim.as_dofdesc(dd_in)
+        self.dd_out = prim.as_dofdesc(dd_out)
 
     def stringifier(self):
         from grudge.symbolic.mappers import StringifyMapper
@@ -100,14 +146,24 @@ class ElementwiseLinearOperator(Operator):
 
 class InterpolationOperator(Operator):
     def __init__(self, dd_in, dd_out):
-        official_dd_in = _sym().as_dofdesc(dd_in)
-        official_dd_out = _sym().as_dofdesc(dd_out)
-        if official_dd_in == official_dd_out:
-            raise ValueError("Interpolating from {} to {}"
-            " does not do anything.".format(official_dd_in, official_dd_out))
-
         super(InterpolationOperator, self).__init__(dd_in, dd_out)
 
+    def __call__(self, expr):
+        from pytools.obj_array import with_object_array_or_scalar
+
+        def interp_one(subexpr):
+            from pymbolic.primitives import is_constant
+            if self.dd_in == self.dd_out:
+                # no-op interpolation, go away
+                return subexpr
+            elif is_constant(subexpr):
+                return subexpr
+            else:
+                from grudge.symbolic.primitives import OperatorBinding
+                return OperatorBinding(self, subexpr)
+
+        return with_object_array_or_scalar(interp_one, expr)
+
     mapper_method = intern("map_interpolation")
 
 
@@ -123,7 +179,8 @@ class ElementwiseMaxOperator(Operator):
 class NodalReductionOperator(Operator):
     def __init__(self, dd_in, dd_out=None):
         if dd_out is None:
-            dd_out = _sym().DD_SCALAR
+            import grudge.symbolic.primitives as prim
+            dd_out = prim.DD_SCALAR
 
         assert dd_out.is_scalar()
 
@@ -150,13 +207,14 @@ class NodalMin(NodalReductionOperator):
 
 class DiffOperatorBase(Operator):
     def __init__(self, xyz_axis, dd_in=None, dd_out=None):
+        import grudge.symbolic.primitives as prim
         if dd_in is None:
-            dd_in = _sym().DD_VOLUME
+            dd_in = prim.DD_VOLUME
 
         if dd_out is None:
-            dd_out = dd_in.with_qtag(_sym().QTAG_NONE)
+            dd_out = dd_in.with_qtag(prim.QTAG_NONE)
         else:
-            dd_out = _sym().as_dofdesc(dd_out)
+            dd_out = prim.as_dofdesc(dd_out)
 
         if dd_out.uses_quadrature():
             raise ValueError("differentiation outputs are not on "
@@ -207,10 +265,13 @@ class MInvSTOperator(WeakFormDiffOperatorBase):
 
 class RefDiffOperatorBase(ElementwiseLinearOperator):
     def __init__(self, rst_axis, dd_in=None, dd_out=None):
+        import grudge.symbolic.primitives as prim
         if dd_in is None:
-            dd_in = _sym().DD_VOLUME
+            dd_in = prim.DD_VOLUME
+
         if dd_out is None:
-            dd_out = dd_in.with_qtag(_sym().QTAG_NONE)
+            dd_out = dd_in.with_qtag(prim.QTAG_NONE)
+
         if dd_out.uses_quadrature():
             raise ValueError("differentiation outputs are not on "
                     "quadrature grids")
@@ -275,8 +336,10 @@ class FilterOperator(ElementwiseLinearOperator):
           (For example an instance of
           :class:`ExponentialFilterResponseFunction`.
         """
+        import grudge.symbolic.primitives as prim
         if dd_in is None:
-            dd_in = _sym().DD_VOLUME
+            dd_in = prim.DD_VOLUME
+
         if dd_out is None:
             dd_out = dd_in
 
@@ -314,8 +377,10 @@ class FilterOperator(ElementwiseLinearOperator):
 
 class AveragingOperator(ElementwiseLinearOperator):
     def __init__(self, dd_in=None, dd_out=None):
+        import grudge.symbolic.primitives as prim
         if dd_in is None:
-            dd_in = _sym().DD_VOLUME
+            dd_in = prim.DD_VOLUME
+
         if dd_out is None:
             dd_out = dd_in
 
@@ -362,8 +427,9 @@ class MassOperatorBase(Operator):
     """
 
     def __init__(self, dd_in=None, dd_out=None):
+        import grudge.symbolic.primitives as prim
         if dd_in is None:
-            dd_in = _sym().DD_VOLUME
+            dd_in = prim.DD_VOLUME
         if dd_out is None:
             dd_out = dd_in
 
@@ -394,8 +460,7 @@ class RefMassOperator(RefMassOperatorBase):
         vand_inv_t = np.linalg.inv(vand).T
 
         weights = in_element_group.weights
-
-        return np.einsum('c,bz,acz->abc', weights, vand_inv_t, o_vand)
+        return np.einsum('j,ik,jk->ij', weights, vand_inv_t, o_vand)
 
     mapper_method = intern("map_ref_mass")
 
@@ -429,15 +494,14 @@ class OppositeInteriorFaceSwap(Operator):
     """
 
     def __init__(self, dd_in=None, dd_out=None, unique_id=None):
-        sym = _sym()
-
+        import grudge.symbolic.primitives as prim
         if dd_in is None:
-            dd_in = sym.DOFDesc(sym.FACE_RESTR_INTERIOR, None)
+            dd_in = prim.DOFDesc(prim.FACE_RESTR_INTERIOR, None)
         if dd_out is None:
             dd_out = dd_in
 
         super(OppositeInteriorFaceSwap, self).__init__(dd_in, dd_out)
-        if self.dd_in.domain_tag is not sym.FACE_RESTR_INTERIOR:
+        if self.dd_in.domain_tag is not prim.FACE_RESTR_INTERIOR:
             raise ValueError("dd_in must be an interior faces domain")
         if self.dd_out != self.dd_in:
             raise ValueError("dd_out and dd_in must be identical")
@@ -462,7 +526,7 @@ class OppositePartitionFaceSwap(Operator):
         MPI tag offset to keep different subexpressions apart in MPI traffic.
     """
     def __init__(self, dd_in=None, dd_out=None, unique_id=None):
-        sym = _sym()
+        import grudge.symbolic.primitives as prim
 
         if dd_in is None and dd_out is None:
             raise ValueError("dd_in or dd_out must be specified")
@@ -472,7 +536,7 @@ class OppositePartitionFaceSwap(Operator):
             dd_out = dd_in
 
         super(OppositePartitionFaceSwap, self).__init__(dd_in, dd_out)
-        if not isinstance(self.dd_in.domain_tag, sym.BTAG_PARTITION):
+        if not isinstance(self.dd_in.domain_tag, prim.BTAG_PARTITION):
             raise ValueError("dd_in must be a partition boundary faces domain")
         if self.dd_out != self.dd_in:
             raise ValueError("dd_out and dd_in must be identical")
@@ -492,20 +556,20 @@ class OppositePartitionFaceSwap(Operator):
 
 class FaceMassOperatorBase(ElementwiseLinearOperator):
     def __init__(self, dd_in=None, dd_out=None):
-        sym = _sym()
-
+        import grudge.symbolic.primitives as prim
         if dd_in is None:
-            dd_in = sym.DOFDesc(sym.FACE_RESTR_ALL, None)
+            dd_in = prim.DOFDesc(prim.FACE_RESTR_ALL, None)
 
         if dd_out is None or dd_out == "vol":
-            dd_out = sym.DOFDesc("vol", sym.QTAG_NONE)
+            dd_out = prim.DOFDesc("vol", prim.QTAG_NONE)
+
         if dd_out.uses_quadrature():
             raise ValueError("face mass operator outputs are not on "
                     "quadrature grids")
 
         if not dd_out.is_volume():
             raise ValueError("dd_out must be a volume domain")
-        if dd_in.domain_tag is not sym.FACE_RESTR_ALL:
+        if dd_in.domain_tag is not prim.FACE_RESTR_ALL:
             raise ValueError("dd_in must be an interior faces domain")
 
         super(FaceMassOperatorBase, self).__init__(dd_in, dd_out)
@@ -575,43 +639,40 @@ def stiffness_t(dim, dd_in=None, dd_out=None):
 
 
 def integral(arg, dd=None):
-    sym = _sym()
+    import grudge.symbolic.primitives as prim
 
     if dd is None:
-        dd = sym.DD_VOLUME
+        dd = prim.DD_VOLUME
+    dd = prim.as_dofdesc(dd)
 
-    dd = sym.as_dofdesc(dd)
-
-    return sym.NodalSum(dd)(
-            arg * sym.cse(
-                sym.MassOperator(dd_in=dd)(sym.Ones(dd)),
+    return NodalSum(dd)(
+            arg * prim.cse(
+                MassOperator(dd_in=dd)(prim.Ones(dd)),
                 "mass_quad_weights",
-                sym.cse_scope.DISCRETIZATION))
+                prim.cse_scope.DISCRETIZATION))
 
 
 def norm(p, arg, dd=None):
     """
     :arg arg: is assumed to be a vector, i.e. have shape ``(n,)``.
     """
-    sym = _sym()
+    import grudge.symbolic.primitives as prim
 
     if dd is None:
-        dd = sym.DD_VOLUME
-
-    dd = sym.as_dofdesc(dd)
+        dd = prim.DD_VOLUME
+    dd = prim.as_dofdesc(dd)
 
     if p == 2:
-        norm_squared = sym.NodalSum(dd_in=dd)(
-                sym.FunctionSymbol("fabs")(
-                    arg * sym.MassOperator()(arg)))
+        norm_squared = NodalSum(dd_in=dd)(
+                prim.fabs(arg * MassOperator()(arg)))
 
         if isinstance(norm_squared, np.ndarray):
             norm_squared = norm_squared.sum()
 
-        return sym.FunctionSymbol("sqrt")(norm_squared)
+        return prim.sqrt(norm_squared)
 
     elif p == np.Inf:
-        result = sym.NodalMax(dd_in=dd)(sym.FunctionSymbol("fabs")(arg))
+        result = NodalMax(dd_in=dd)(prim.fabs(arg))
         from pymbolic.primitives import Max
 
         if isinstance(result, np.ndarray):
diff --git a/grudge/symbolic/primitives.py b/grudge/symbolic/primitives.py
index 0b9fac20332e505b038ee2ed9bda94485048d8d2..aa00e8e678be045fa5ae3938a295af5da3f9ff41 100644
--- a/grudge/symbolic/primitives.py
+++ b/grudge/symbolic/primitives.py
@@ -27,45 +27,45 @@ THE SOFTWARE.
 from six.moves import range, intern
 
 import numpy as np
-import pymbolic.primitives
-from meshmode.mesh import BTAG_ALL, BTAG_REALLY_ALL, BTAG_NONE, BTAG_PARTITION  # noqa
-from meshmode.discretization.connection import (  # noqa
-        FACE_RESTR_ALL, FACE_RESTR_INTERIOR)
-
-from pymbolic.primitives import (  # noqa
+from pytools.obj_array import make_obj_array
+
+from meshmode.mesh import (
+        BTAG_ALL,
+        BTAG_REALLY_ALL,
+        BTAG_NONE,
+        BTAG_PARTITION)
+from meshmode.discretization.connection import (
+        FACE_RESTR_ALL,
+        FACE_RESTR_INTERIOR)
+
+import pymbolic.primitives as prim
+from pymbolic.primitives import (
+        Variable as VariableBase,
+        CommonSubexpression as CommonSubexpressionBase,
         cse_scope as cse_scope_base,
-        make_common_subexpression as cse, If, Comparison)
+        make_common_subexpression as cse)
 from pymbolic.geometric_algebra import MultiVector
-from pytools.obj_array import join_fields, make_obj_array  # noqa
 
 
-class ExpressionBase(pymbolic.primitives.Expression):
+class ExpressionBase(prim.Expression):
     def make_stringifier(self, originating_stringifier=None):
         from grudge.symbolic.mappers import StringifyMapper
         return StringifyMapper()
 
 
-def _sym():
-    # A hack to make referring to grudge.sym possible without
-    # circular imports and tons of typing.
-
-    from grudge import sym
-    return sym
-
-
 __doc__ = """
 
-.. currentmodule:: grudge.sym
-
-.. autoclass:: If
-
 DOF description
 ^^^^^^^^^^^^^^^
 
 .. autoclass:: DTAG_SCALAR
 .. autoclass:: DTAG_VOLUME_ALL
+.. autoclass:: DTAG_BOUNDARY
 .. autoclass:: QTAG_NONE
+
 .. autoclass:: DOFDesc
+.. autofunction:: as_dofdesc
+
 .. data:: DD_SCALAR
 .. data:: DD_VOLUME
 
@@ -74,10 +74,12 @@ Symbols
 
 .. autoclass:: Variable
 .. autoclass:: ScalarVariable
-.. autoclass:: make_sym_array
-.. autoclass:: make_sym_mv
 .. autoclass:: FunctionSymbol
 
+.. autofunction:: make_sym_array
+.. autofunction:: make_sym_mv
+
+.. function :: fabs(arg)
 .. function :: sqrt(arg)
 .. function :: exp(arg)
 .. function :: sin(arg)
@@ -90,11 +92,11 @@ Helpers
 
 .. autoclass:: OperatorBinding
 .. autoclass:: PrioritizedSubexpression
-.. autoclass:: BoundaryPair
 .. autoclass:: Ones
 
 Geometry data
 ^^^^^^^^^^^^^
+
 .. autoclass:: NodeCoordinateComponent
 .. autofunction:: nodes
 .. autofunction:: mv_nodes
@@ -111,20 +113,20 @@ Geometry data
 
 # {{{ DOF description
 
-class DTAG_SCALAR:  # noqa
+class DTAG_SCALAR:          # noqa: N801
     pass
 
 
-class DTAG_VOLUME_ALL:  # noqa
+class DTAG_VOLUME_ALL:      # noqa: N801
     pass
 
 
-class DTAG_BOUNDARY:  # noqa
+class DTAG_BOUNDARY:        # noqa: N801
     def __init__(self, tag):
         self.tag = tag
 
 
-class QTAG_NONE:  # noqa
+class QTAG_NONE:            # noqa: N801
     pass
 
 
@@ -133,14 +135,18 @@ class DOFDesc(object):
 
     .. attribute:: domain_tag
     .. attribute:: quadrature_tag
+
     .. automethod:: is_scalar
     .. automethod:: is_discretized
     .. automethod:: is_volume
     .. automethod:: is_boundary
     .. automethod:: is_trace
+
     .. automethod:: uses_quadrature
+
     .. automethod:: with_qtag
     .. automethod:: with_dtag
+
     .. automethod:: __eq__
     .. automethod:: __ne__
     .. automethod:: __hash__
@@ -149,46 +155,36 @@ class DOFDesc(object):
     def __init__(self, domain_tag, quadrature_tag=None):
         """
         :arg domain_tag: One of the following:
-            :class:`grudge.sym.DTAG_SCALAR` (or the string ``"scalar"``),
-            :class:`grudge.sym.DTAG_VOLUME_ALL` (or the string ``"vol"``)
+            :class:`DTAG_SCALAR` (or the string ``"scalar"``),
+            :class:`DTAG_VOLUME_ALL` (or the string ``"vol"``)
             for the default volume discretization,
-            :class:`meshmode.discretization.connection.FACE_RESTR_ALL`
-            (or the string ``"all_faces"``),
-            or
-            :class:`meshmode.discretization.connection.FACE_RESTR_INTERIOR`
-            (or the string ``"int_faces"``),
-            or one of
-            :class:`meshmode.discretization.BTAG_ALL`,
-            :class:`meshmode.discretization.BTAG_NONE`,
-            :class:`meshmode.discretization.BTAG_REALLY_ALL`,
-            :class:`meshmode.discretization.PARTITION`,
-            or :class
+            :data:`~meshmode.discretization.connection.FACE_RESTR_ALL`
+            (or the string ``"all_faces"``), or
+            :data:`~meshmode.discretization.connection.FACE_RESTR_INTERIOR`
+            (or the string ``"int_faces"``), or one of
+            :class:`~meshmode.mesh.BTAG_ALL`,
+            :class:`~meshmode.mesh.BTAG_NONE`,
+            :class:`~meshmode.mesh.BTAG_REALLY_ALL`,
+            :class:`~meshmode.mesh.BTAG_PARTITION`,
             or *None* to indicate that the geometry is not yet known.
 
         :arg quadrature_tag:
-            *None* to indicate that the quadrature grid is not known,or
+            *None* to indicate that the quadrature grid is not known, or
             :class:`QTAG_NONE` to indicate the use of the basic discretization
             grid, or a string to indicate the use of the thus-tagged quadratue
             grid.
         """
-        if domain_tag == "scalar":
-            domain_tag = DTAG_SCALAR
-        elif domain_tag is DTAG_SCALAR:
+
+        if domain_tag is None:
+            pass
+        elif domain_tag in [DTAG_SCALAR, "scalar"]:
             domain_tag = DTAG_SCALAR
-        elif domain_tag == "vol":
+        elif domain_tag in [DTAG_VOLUME_ALL, "vol"]:
             domain_tag = DTAG_VOLUME_ALL
-        elif domain_tag is DTAG_VOLUME_ALL:
-            pass
-        elif domain_tag == "all_faces":
+        elif domain_tag in [FACE_RESTR_ALL, "all_faces"]:
             domain_tag = FACE_RESTR_ALL
-        elif domain_tag is FACE_RESTR_ALL:
-            pass
-        elif domain_tag == "int_faces":
+        elif domain_tag in [FACE_RESTR_INTERIOR, "int_faces"]:
             domain_tag = FACE_RESTR_INTERIOR
-        elif domain_tag is FACE_RESTR_INTERIOR:
-            pass
-        elif domain_tag is None:
-            pass
         elif isinstance(domain_tag, BTAG_PARTITION):
             pass
         elif domain_tag in [BTAG_ALL, BTAG_REALLY_ALL, BTAG_NONE]:
@@ -273,8 +269,7 @@ DD_VOLUME = DOFDesc(DTAG_VOLUME_ALL, None)
 def as_dofdesc(dd):
     if isinstance(dd, DOFDesc):
         return dd
-    else:
-        return DOFDesc(dd, None)
+    return DOFDesc(dd, quadrature_tag=None)
 
 # }}}
 
@@ -314,11 +309,11 @@ class HasDOFDesc(object):
 
 # {{{ variables
 
-class cse_scope(cse_scope_base):  # noqa
+class cse_scope(cse_scope_base):        # noqa: N801
     DISCRETIZATION = "grudge_discretization"
 
 
-class Variable(HasDOFDesc, ExpressionBase, pymbolic.primitives.Variable):
+class Variable(HasDOFDesc, ExpressionBase, VariableBase):
     """A user-supplied input variable with a known :class:`DOFDesc`.
     """
     init_arg_names = ("name", "dd")
@@ -347,7 +342,7 @@ def make_sym_array(name, shape, dd=None):
     def var_factory(name):
         return Variable(name, dd)
 
-    return pymbolic.primitives.make_sym_array(name, shape, var_factory)
+    return prim.make_sym_array(name, shape, var_factory)
 
 
 def make_sym_mv(name, dim, var_factory=None):
@@ -355,19 +350,20 @@ def make_sym_mv(name, dim, var_factory=None):
             make_sym_array(name, dim, var_factory))
 
 
-class FunctionSymbol(ExpressionBase, pymbolic.primitives.Variable):
+class FunctionSymbol(ExpressionBase, VariableBase):
     """A symbol to be used as the function argument of
-    :class:`pymbolic.primitives.Call`.
-
+    :class:`~pymbolic.primitives.Call`.
     """
 
     mapper_method = "map_function_symbol"
 
 
+fabs = FunctionSymbol("fabs")
 sqrt = FunctionSymbol("sqrt")
 exp = FunctionSymbol("exp")
 sin = FunctionSymbol("sin")
 cos = FunctionSymbol("cos")
+atan2 = FunctionSymbol("atan2")
 bessel_j = FunctionSymbol("bessel_j")
 bessel_y = FunctionSymbol("bessel_y")
 
@@ -399,7 +395,7 @@ class OperatorBinding(ExpressionBase):
         return hash((self.__class__, self.op, obj_array_to_hashable(self.field)))
 
 
-class PrioritizedSubexpression(pymbolic.primitives.CommonSubexpression):
+class PrioritizedSubexpression(CommonSubexpressionBase):
     """When the optemplate-to-code transformation is performed,
     prioritized subexpressions  work like common subexpression in
     that they are assigned their own separate identifier/register
@@ -409,7 +405,7 @@ class PrioritizedSubexpression(pymbolic.primitives.CommonSubexpression):
     """
 
     def __init__(self, child, priority=0):
-        pymbolic.primitives.CommonSubexpression.__init__(self, child)
+        super(PrioritizedSubexpression, self).__init__(child)
         self.priority = priority
 
     def __getinitargs__(self):
@@ -425,6 +421,28 @@ class Ones(HasDOFDesc, ExpressionBase):
     mapper_method = intern("map_ones")
 
 
+class _SignedFaceOnes(HasDOFDesc, ExpressionBase):
+    """Produces DoFs on a face that are :math:`-1` if their corresponding
+    face number is odd and :math:`+1` if it is even.
+    *dd* must refer to a 0D (point-shaped) trace domain.
+    This is based on the face order of
+    :meth:`meshmode.mesh.MeshElementGroup.face_vertex_indices`.
+
+   .. note::
+
+       This is used as a hack to generate normals with the correct orientation
+       in 1D problems, and so far only intended for this particular use cases.
+       (If you can think of a better way, please speak up!)
+    """
+
+    def __init__(self, dd):
+        dd = as_dofdesc(dd)
+        assert dd.is_trace()
+        super(_SignedFaceOnes, self).__init__(dd)
+
+    mapper_method = intern("map_signed_face_ones")
+
+
 # {{{ geometry data
 
 class DiscretizationProperty(HasDOFDesc, ExpressionBase):
@@ -470,19 +488,21 @@ def forward_metric_derivative(xyz_axis, rst_axis, dd=None):
 
     .. math::
 
-        \frac{d x_{\mathtt{xyz\_axis}} }{d r_{\mathtt{rst\_axis}} }
+        \frac{\partial x_{\mathrm{xyz\_axis}} }{\partial r_{\mathrm{rst\_axis}} }
     """
     if dd is None:
         dd = DD_VOLUME
 
     inner_dd = dd.with_qtag(QTAG_NONE)
 
-    diff_op = _sym().RefDiffOperator(rst_axis, inner_dd)
+    from grudge.symbolic.operators import RefDiffOperator
+    diff_op = RefDiffOperator(rst_axis, inner_dd)
 
     result = diff_op(NodeCoordinateComponent(xyz_axis, inner_dd))
 
     if dd.uses_quadrature():
-        result = _sym().interp(inner_dd, dd)(result)
+        from grudge.symbolic.operators import interp
+        result = interp(inner_dd, dd)(result)
 
     return cse(
             result,
@@ -506,7 +526,7 @@ def parametrization_derivative(ambient_dim, dim=None, dd=None):
         dim = ambient_dim
 
     if dim == 0:
-        return MultiVector(np.array([1.]))
+        return MultiVector(np.array([_SignedFaceOnes(dd)]))
 
     from pytools import product
     return product(
@@ -595,7 +615,7 @@ def area_element(ambient_dim, dim=None, dd=None):
 
 
 def mv_normal(dd, ambient_dim, dim=None):
-    """Exterior unit normal as a MultiVector."""
+    """Exterior unit normal as a :class:`~pymbolic.geometric_algebra.MultiVector`."""
 
     dd = as_dofdesc(dd)
 
@@ -605,17 +625,30 @@ def mv_normal(dd, ambient_dim, dim=None):
     if dim is None:
         dim = ambient_dim - 1
 
-    # Don't be tempted to add a sign here. As it is, it produces
+    # NOTE: Don't be tempted to add a sign here. As it is, it produces
     # exterior normals for positively oriented curves.
 
-    pder = (
-            pseudoscalar(ambient_dim, dim, dd=dd)
-            /  # noqa: W504
-            area_element(ambient_dim, dim, dd=dd))
-    return cse(
-            # Dorst Section 3.7.2
-            pder << pder.I.inv(),
-            "normal", cse_scope.DISCRETIZATION)
+    pder = pseudoscalar(ambient_dim, dim, dd=dd) \
+            / area_element(ambient_dim, dim, dd=dd)
+
+    # Dorst Section 3.7.2
+    mv = pder << pder.I.inv()
+
+    if dim == 0:
+        # NOTE: when the mesh is 0D, we do not have a clear notion of
+        # `exterior normal`, so we just take the tangent of the 1D element,
+        # interpolate it to the element faces and make it signed
+        tangent = parametrization_derivative(
+                ambient_dim, dim=1, dd=DD_VOLUME)
+        tangent = tangent / sqrt(tangent.norm_squared())
+
+        from grudge.symbolic.operators import interp
+        interp = interp(DD_VOLUME, dd)
+        mv = MultiVector(np.array([
+            mv.as_scalar() * interp(t) for t in tangent.as_vector()
+            ]))
+
+    return cse(mv, "normal", cse_scope.DISCRETIZATION)
 
 
 def normal(dd, ambient_dim, dim=None, quadrature_tag=None):
@@ -676,13 +709,15 @@ class TracePair:
 
 
 def int_tpair(expression, qtag=None):
-    i = _sym().interp("vol", "int_faces")(expression)
-    e = cse(_sym().OppositeInteriorFaceSwap()(i))
+    from grudge.symbolic.operators import interp, OppositeInteriorFaceSwap
+
+    i = interp("vol", "int_faces")(expression)
+    e = cse(OppositeInteriorFaceSwap()(i))
 
-    if qtag is not None and qtag != _sym().QTAG_NONE:
-        q_dd = _sym().DOFDesc("int_faces", qtag)
-        i = cse(_sym().interp("int_faces", q_dd)(i))
-        e = cse(_sym().interp("int_faces", q_dd)(e))
+    if qtag is not None and qtag != QTAG_NONE:
+        q_dd = DOFDesc("int_faces", qtag)
+        i = cse(interp("int_faces", q_dd)(i))
+        e = cse(interp("int_faces", q_dd)(e))
     else:
         q_dd = "int_faces"
 
@@ -710,7 +745,8 @@ def bv_tpair(dd, interior, exterior):
         representing the exterior value to be used
         for the flux.
     """
-    interior = _sym().cse(_sym().interp("vol", dd)(interior))
+    from grudge.symbolic.operators import interp
+    interior = cse(interp("vol", dd)(interior))
     return TracePair(dd, interior, exterior)
 
 # }}}
diff --git a/test/test_grudge.py b/test/test_grudge.py
index e373e1335a86a9f384b19a54bb2a8842130decc8..a55f92baffb7d7f5bfa293bb105579dfeeba08a5 100644
--- a/test/test_grudge.py
+++ b/test/test_grudge.py
@@ -23,11 +23,14 @@ THE SOFTWARE.
 """
 
 
-import numpy as np  # noqa
-import numpy.linalg as la  # noqa
-import pyopencl as cl  # noqa
-import pyopencl.array  # noqa
-import pyopencl.clmath  # noqa
+import numpy as np
+import numpy.linalg as la
+
+import pyopencl as cl
+import pyopencl.array
+import pyopencl.clmath
+
+from pytools.obj_array import join_fields
 
 import pytest  # noqa
 
@@ -85,39 +88,70 @@ def test_inverse_metric(ctx_factory, dim):
             assert err < 1e-12, (i, j, err)
 
 
-def test_1d_mass_mat_trig(ctx_factory):
+@pytest.mark.parametrize("ambient_dim", [1, 2, 3])
+@pytest.mark.parametrize("quad_tag", [sym.QTAG_NONE, "OVSMP"])
+def test_mass_mat_trig(ctx_factory, ambient_dim, quad_tag):
     """Check the integral of some trig functions on an interval using the mass
-    matrix
+    matrix.
     """
-
     cl_ctx = cl.create_some_context()
     queue = cl.CommandQueue(cl_ctx)
 
+    nelements = 17
+    order = 4
+
+    a = -4.0 * np.pi
+    b = +9.0 * np.pi
+    true_integral = 13*np.pi/2 * (b - a)**(ambient_dim - 1)
+
+    from meshmode.discretization.poly_element import QuadratureSimplexGroupFactory
+    dd_quad = sym.DOFDesc(sym.DTAG_VOLUME_ALL, quad_tag)
+    if quad_tag is sym.QTAG_NONE:
+        quad_tag_to_group_factory = {}
+    else:
+        quad_tag_to_group_factory = {
+                quad_tag: QuadratureSimplexGroupFactory(order=2*order)
+                }
+
     from meshmode.mesh.generation import generate_regular_rect_mesh
+    mesh = generate_regular_rect_mesh(
+            a=(a,)*ambient_dim, b=(b,)*ambient_dim,
+            n=(nelements,)*ambient_dim, order=1)
+    discr = DGDiscretizationWithBoundaries(cl_ctx, mesh, order=order,
+            quad_tag_to_group_factory=quad_tag_to_group_factory)
 
-    mesh = generate_regular_rect_mesh(a=(-4*np.pi,), b=(9*np.pi,),
-            n=(17,), order=1)
+    def _get_variables_on(dd):
+        sym_f = sym.var("f", dd=dd)
+        sym_x = sym.nodes(ambient_dim, dd=dd)
+        sym_ones = sym.Ones(dd)
 
-    discr = DGDiscretizationWithBoundaries(cl_ctx, mesh, order=8)
+        return sym_f, sym_x, sym_ones
 
-    x = sym.nodes(1)
-    f = bind(discr, sym.cos(x[0])**2)(queue)
-    ones = bind(discr, sym.Ones(sym.DD_VOLUME))(queue)
+    sym_f, sym_x, sym_ones = _get_variables_on(sym.DD_VOLUME)
+    f_volm = bind(discr, sym.cos(sym_x[0])**2)(queue).get()
+    ones_volm = bind(discr, sym_ones)(queue).get()
 
-    mass_op = bind(discr, sym.MassOperator()(sym.var("f")))
+    sym_f, sym_x, sym_ones = _get_variables_on(dd_quad)
+    f_quad = bind(discr, sym.cos(sym_x[0])**2)(queue)
+    ones_quad = bind(discr, sym_ones)(queue)
 
-    num_integral_1 = np.dot(ones.get(), mass_op(queue, f=f))
-    num_integral_2 = np.dot(f.get(), mass_op(queue, f=ones))
-    num_integral_3 = bind(discr, sym.integral(sym.var("f")))(queue, f=f)
+    mass_op = bind(discr, sym.MassOperator(dd_quad, sym.DD_VOLUME)(sym_f))
 
-    true_integral = 13*np.pi/2
-    err_1 = abs(num_integral_1-true_integral)
-    err_2 = abs(num_integral_2-true_integral)
-    err_3 = abs(num_integral_3-true_integral)
+    num_integral_1 = np.dot(ones_volm, mass_op(queue, f=f_quad).get())
+    err_1 = abs(num_integral_1 - true_integral)
+    assert err_1 < 5.0e-10, err_1
 
-    assert err_1 < 1e-10
-    assert err_2 < 1e-10
-    assert err_3 < 1e-10
+    num_integral_2 = np.dot(f_volm, mass_op(queue, f=ones_quad).get())
+    err_2 = abs(num_integral_2 - true_integral)
+    assert err_2 < 5.0e-10, err_2
+
+    if quad_tag is sym.QTAG_NONE:
+        # NOTE: `integral` always makes a square mass matrix and
+        # `QuadratureSimplexGroupFactory` does not have a `mass_matrix` method.
+        num_integral_3 = bind(discr,
+                sym.integral(sym_f, dd=dd_quad))(queue, f=f_quad)
+        err_3 = abs(num_integral_3 - true_integral)
+        assert err_3 < 5.0e-10, err_3
 
 
 @pytest.mark.parametrize("dim", [1, 2, 3])
@@ -185,7 +219,7 @@ def test_2d_gauss_theorem(ctx_factory):
     discr = DGDiscretizationWithBoundaries(cl_ctx, mesh, order=2)
 
     def f(x):
-        return sym.join_fields(
+        return join_fields(
                 sym.sin(3*x[0])+sym.cos(3*x[1]),
                 sym.sin(2*x[0])+sym.cos(x[1]))
 
@@ -204,18 +238,18 @@ def test_2d_gauss_theorem(ctx_factory):
 
 
 @pytest.mark.parametrize(("mesh_name", "mesh_pars"), [
+    ("segment", [8, 16, 32]),
     ("disk", [0.1, 0.05]),
     ("rect2", [4, 8]),
     ("rect3", [4, 6]),
     ])
 @pytest.mark.parametrize("op_type", ["strong", "weak"])
-@pytest.mark.parametrize("flux_type", ["upwind"])
+@pytest.mark.parametrize("flux_type", ["central"])
 @pytest.mark.parametrize("order", [3, 4, 5])
 # test: 'test_convergence_advec(cl._csc, "disk", [0.1, 0.05], "strong", "upwind", 3)'
 def test_convergence_advec(ctx_factory, mesh_name, mesh_pars, op_type, flux_type,
         order, visualize=False):
     """Test whether 2D advection actually converges"""
-
     cl_ctx = cl.create_some_context()
     queue = cl.CommandQueue(cl_ctx)
 
@@ -223,7 +257,16 @@ def test_convergence_advec(ctx_factory, mesh_name, mesh_pars, op_type, flux_type
     eoc_rec = EOCRecorder()
 
     for mesh_par in mesh_pars:
-        if mesh_name == "disk":
+        if mesh_name == "segment":
+            from meshmode.mesh.generation import generate_box_mesh
+            mesh = generate_box_mesh(
+                [np.linspace(-1.0, 1.0, mesh_par)],
+                order=order)
+
+            h = 2.0 / mesh_par
+            dim = 1
+            dt_factor = 1.0
+        elif mesh_name == "disk":
             pytest.importorskip("meshpy")
 
             from meshpy.geometry import make_circle, GeometryBuilder
@@ -241,7 +284,6 @@ def test_convergence_advec(ctx_factory, mesh_name, mesh_pars, op_type, flux_type
             h = np.sqrt(mesh_par)
             dim = 2
             dt_factor = 4
-
         elif mesh_name.startswith("rect"):
             dim = int(mesh_name[4:])
             from meshmode.mesh.generation import generate_regular_rect_mesh
@@ -408,7 +450,6 @@ def test_improvement_quadrature(ctx_factory, order):
     from meshmode.mesh.generation import generate_regular_rect_mesh
     from grudge.models.advection import VariableCoefficientAdvectionOperator
     from pytools.convergence import EOCRecorder
-    from pytools.obj_array import join_fields
     from meshmode.discretization.poly_element import QuadratureSimplexGroupFactory
 
     cl_ctx = cl.create_some_context()