From 8ef5181b48f06a68ee629cecad79719f82eb2b7b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Andreas=20Kl=C3=B6ckner?= <inform@tiker.net>
Date: Thu, 20 May 2021 18:43:04 -0500
Subject: [PATCH] Move array context functionality over from meshmode (#4)

* Move array context functionality over from meshmode

Co-authored-by: Alex Fikl <alexfikl@gmail.com>

* Drop sphinx version requirement

* Fix replica DOFArray in tests for pylint

Co-authored-by: Alex Fikl <alexfikl@gmail.com>
---
 .github/workflows/ci.yml             |   2 +
 .gitlab-ci.yml                       |   9 +-
 README.rst                           |   6 +-
 arraycontext/__init__.py             |  54 +++
 arraycontext/container/__init__.py   | 221 ++++++++++++
 arraycontext/container/arithmetic.py | 327 +++++++++++++++++
 arraycontext/container/dataclass.py  | 114 ++++++
 arraycontext/container/traversal.py  | 268 ++++++++++++++
 arraycontext/context.py              | 264 ++++++++++++++
 arraycontext/fake_numpy.py           | 180 ++++++++++
 arraycontext/impl/pyopencl.py        | 381 ++++++++++++++++++++
 arraycontext/loopy.py                |  73 ++++
 arraycontext/metadata.py             |  52 +++
 arraycontext/pytest.py               |  98 ++++++
 doc/array_context.rst                |   7 +
 doc/conf.py                          |   5 +-
 doc/container.rst                    |  19 +
 doc/index.rst                        |  27 +-
 doc/misc.rst                         |   4 +-
 doc/other.rst                        |  12 +
 setup.cfg                            |   1 -
 test/test_arraycontext.py            | 501 ++++++++++++++++++++++++++-
 22 files changed, 2603 insertions(+), 22 deletions(-)
 create mode 100644 arraycontext/container/__init__.py
 create mode 100644 arraycontext/container/arithmetic.py
 create mode 100644 arraycontext/container/dataclass.py
 create mode 100644 arraycontext/container/traversal.py
 create mode 100644 arraycontext/context.py
 create mode 100644 arraycontext/fake_numpy.py
 create mode 100644 arraycontext/impl/pyopencl.py
 create mode 100644 arraycontext/loopy.py
 create mode 100644 arraycontext/metadata.py
 create mode 100644 arraycontext/pytest.py
 create mode 100644 doc/container.rst
 create mode 100644 doc/other.rst

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index d1ca41f..28a382a 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -91,6 +91,8 @@ jobs:
                 . ci-support.sh
                 build_py_project_in_conda_env
                 conda install graphviz
+
+                CI_SUPPORT_SPHINX_VERSION_SPECIFIER=">=4.0"
                 build_docs
 
     downstream_tests:
diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index fc6bfca..88293d2 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -64,10 +64,11 @@ Python 3 Conda:
   - tags
 
 Documentation:
-  script:
-  - EXTRA_INSTALL="pybind11 cython numpy"
-  - curl -L -O -k https://gitlab.tiker.net/inducer/ci-support/raw/main/build-docs.sh
-  - ". ./build-docs.sh"
+  script: |
+    EXTRA_INSTALL="pybind11 cython numpy"
+    curl -L -O -k https://gitlab.tiker.net/inducer/ci-support/raw/main/build-docs.sh
+    CI_SUPPORT_SPHINX_VERSION_SPECIFIER=">=4.0"
+    . ./build-docs.sh
   tags:
   - python3
 
diff --git a/README.rst b/README.rst
index 1fa2c21..ab5e747 100644
--- a/README.rst
+++ b/README.rst
@@ -1,10 +1,6 @@
 arraycontext: Choose your favorite ``numpy``-workalike
 ======================================================
 
-(Caution: vaporware for now! Much of this functionality exists in
-`meshmode <https://documen.tician.de/meshmode/>`__ at the moment
-and is in the process of being moved here)
-
 .. image:: https://gitlab.tiker.net/inducer/arraycontext/badges/main/pipeline.svg
     :alt: Gitlab Build Status
     :target: https://gitlab.tiker.net/inducer/arraycontext/commits/main
@@ -25,7 +21,7 @@ implementations for:
 - Debugging
 - Profiling
 
-``arraycontext`` started life as an array abstraction for use with the 
+``arraycontext`` started life as an array abstraction for use with the
 `meshmode <https://documen.tician.de/meshmode/>`__ unstrucuted discretization
 package.
 
diff --git a/arraycontext/__init__.py b/arraycontext/__init__.py
index 37d8fc8..16dc273 100644
--- a/arraycontext/__init__.py
+++ b/arraycontext/__init__.py
@@ -28,5 +28,59 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 THE SOFTWARE.
 """
 
+from .context import ArrayContext
+
+from .metadata import CommonSubexpressionTag, FirstAxisIsElementsTag
+
+from .container import (
+        ArrayContainer,
+        is_array_container, is_array_container_type,
+        get_container_context, get_container_context_recursively,
+        serialize_container, deserialize_container)
+from .container.arithmetic import with_container_arithmetic
+from .container.dataclass import dataclass_array_container
+
+from .container.traversal import (
+        map_array_container,
+        multimap_array_container,
+        rec_map_array_container,
+        rec_multimap_array_container,
+        mapped_over_array_containers,
+        multimapped_over_array_containers,
+        thaw, freeze)
+
+from .impl.pyopencl import PyOpenCLArrayContext
+
+from .pytest import pytest_generate_tests_for_pyopencl_array_context
+
+from .loopy import make_loopy_program
+
+
+__all__ = (
+        "ArrayContext",
+
+        "CommonSubexpressionTag",
+        "FirstAxisIsElementsTag",
+
+        "ArrayContainer",
+        "is_array_container", "is_array_container_type",
+        "get_container_context", "get_container_context_recursively",
+        "serialize_container", "deserialize_container",
+        "with_container_arithmetic",
+        "dataclass_array_container",
+
+        "map_array_container", "multimap_array_container",
+        "rec_map_array_container", "rec_multimap_array_container",
+        "mapped_over_array_containers",
+        "multimapped_over_array_containers",
+        "thaw", "freeze",
+
+        "PyOpenCLArrayContext",
+
+        "make_loopy_program",
+
+        "pytest_generate_tests_for_pyopencl_array_context"
+        )
+
 
 # vim: foldmethod=marker
diff --git a/arraycontext/container/__init__.py b/arraycontext/container/__init__.py
new file mode 100644
index 0000000..a3398f3
--- /dev/null
+++ b/arraycontext/container/__init__.py
@@ -0,0 +1,221 @@
+"""
+.. currentmodule:: arraycontext
+
+
+.. autoclass:: ArrayContainer
+
+Serialization/deserialization
+-----------------------------
+.. autofunction:: is_array_container
+.. autofunction:: serialize_container
+.. autofunction:: deserialize_container
+
+Context retrieval
+-----------------
+.. autofunction:: get_container_context
+.. autofunction:: get_container_context_recursively
+"""
+
+
+__copyright__ = """
+Copyright (C) 2020-1 University of Illinois Board of Trustees
+"""
+
+__license__ = """
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+"""
+
+from functools import singledispatch
+from arraycontext.context import ArrayContext
+from typing import Any, Iterable, Tuple, Optional
+import numpy as np
+
+
+# {{{ ArrayContainer
+
+class ArrayContainer:
+    r"""
+    A generic container for the array type supported by the
+    :class:`ArrayContext`.
+
+    The functionality required for the container to operated is supplied via
+    :func:`functools.singledispatch`. Implementations of the following functions need
+    to be registered for a type serving as an :class:`ArrayContainer`:
+
+    * :func:`serialize_container` for serialization, which gives the components
+      of the array.
+    * :func:`deserialize_container` for deserialization, which constructs a
+      container from a set of components.
+    * :func:`get_container_context` retrieves the :class:`ArrayContext` from
+      a container, if it has one.
+
+    This allows enumeration of the component arrays in a container and the
+    construction of modified containers from an iterable of those component arrays.
+    :func:`is_array_container` will return *True* for types that have
+    a container serialization function registered.
+
+    Packages may register their own types as array containers. They must not
+    register other types (e.g. :class:`list`) as array containers.
+    The type :class:`numpy.ndarray` is considered an array container, but
+    only arrays with dtype *object* may be used as such. (This is so
+    because object arrays cannot be distinguished from non-object arrays
+    via their type.)
+
+    The container and its serialization interface has goals and uses
+    approaches similar to JAX's
+    `PyTrees <https://jax.readthedocs.io/en/latest/pytrees.html>`__,
+    however its implementation differs a bit.
+
+    .. note::
+
+        This class is used in type annotation. Inheriting from it confers no
+        special meaning or behavior.
+    """
+
+
+@singledispatch
+def serialize_container(ary: ArrayContainer) -> Iterable[Tuple[Any, Any]]:
+    r"""Serialize the array container into an iterable over its components.
+
+    The order of the components and their identifiers are entirely under
+    the control of the container class.
+
+    If *ary* is mutable, the serialization function is not required to ensure
+    that the serialization result reflects the array state at the time of the
+    call to :func:`serialize_container`.
+
+    :returns: an :class:`Iterable` of 2-tuples where the first
+        entry is an identifier for the component and the second entry
+        is an array-like component of the :class:`ArrayContainer`.
+        Components can themselves be :class:`ArrayContainer`\ s, allowing
+        for arbitrarily nested structures. The identifiers need to be hashable
+        but are otherwise treated as opaque.
+    """
+    raise NotImplementedError(type(ary).__name__)
+
+
+@singledispatch
+def deserialize_container(template, iterable: Iterable[Tuple[Any, Any]]):
+    """Deserialize an iterable into an array container.
+
+    :param template: an instance of an existing object that
+        can be used to aid in the deserialization. For a similar choice
+        see :attr:`~numpy.class.__array_finalize__`.
+    :param iterable: an iterable that mirrors the output of
+        :meth:`serialize_container`.
+    """
+    raise NotImplementedError(type(template).__name__)
+
+
+def is_array_container_type(cls: type) -> bool:
+    """
+    :returns: *True* if the type *cls* has a registered implementation of
+        :func:`serialize_container`, or if it is an :class:`ArrayContainer`.
+    """
+    return (
+            cls is ArrayContainer
+            or (serialize_container.dispatch(cls)
+                is not serialize_container.__wrapped__))
+
+
+def is_array_container(ary: Any) -> bool:
+    """
+    :returns: *True* if the instance *ary* has a registered implementation of
+        :func:`serialize_container`.
+    """
+    return (serialize_container.dispatch(ary.__class__)
+            is not serialize_container.__wrapped__)
+
+
+@singledispatch
+def get_container_context(ary: ArrayContainer) -> Optional["ArrayContext"]:
+    """Retrieves the :class:`ArrayContext` from the container, if any.
+
+    This function is not recursive, so it will only search at the root level
+    of the container. For the recursive version, see
+    :func:`get_container_context_recursively`.
+    """
+    return getattr(ary, "array_context", None)
+
+# }}}
+
+
+# {{{ object arrays as array containers
+
+@serialize_container.register(np.ndarray)
+def _serialize_ndarray_container(ary: np.ndarray) -> Iterable[Tuple[Any, Any]]:
+    if ary.dtype.char != "O":
+        raise ValueError(
+                f"only object arrays are supported, given dtype '{ary.dtype}'")
+
+    return np.ndenumerate(ary)
+
+
+@deserialize_container.register(np.ndarray)
+def _deserialize_ndarray_container(
+        template: Any, iterable: Iterable[Tuple[Any, Any]]):
+    # disallow subclasses
+    assert type(template) is np.ndarray
+    assert template.dtype.char == "O"
+
+    result = type(template)(template.shape, dtype=object)
+    for i, subary in iterable:
+        result[i] = subary
+
+    return result
+
+# }}}
+
+
+# {{{ get_container_context_recursively
+
+def get_container_context_recursively(ary) -> Optional["ArrayContext"]:
+    """Walks the :class:`ArrayContainer` hierarchy to find an
+    :class:`ArrayContext` associated with it.
+
+    If different components that have different array contexts are found at
+    any level, an assertion error is raised.
+    """
+    actx = None
+    if not is_array_container(ary):
+        return actx
+
+    # try getting the array context directly
+    actx = get_container_context(ary)
+    if actx is not None:
+        return actx
+
+    for _, subary in serialize_container(ary):
+        context = get_container_context_recursively(subary)
+        if context is None:
+            continue
+
+        if not __debug__:
+            return context
+        elif actx is None:
+            actx = context
+        else:
+            assert actx is context
+
+    return actx
+
+# }}}
+
+
+# vim: foldmethod=marker
diff --git a/arraycontext/container/arithmetic.py b/arraycontext/container/arithmetic.py
new file mode 100644
index 0000000..b19aa00
--- /dev/null
+++ b/arraycontext/container/arithmetic.py
@@ -0,0 +1,327 @@
+"""
+.. currentmodule:: arraycontext
+.. autofunction:: with_container_arithmetic
+"""
+
+import enum
+
+__copyright__ = """
+Copyright (C) 2020-1 University of Illinois Board of Trustees
+"""
+
+__license__ = """
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+"""
+
+
+# {{{ with_container_arithmetic
+
+class _OpClass(enum.Enum):
+    ARITHMETIC = enum.auto
+    BITWISE = enum.auto
+    SHIFT = enum.auto
+    EQ_COMPARISON = enum.auto
+    REL_COMPARISON = enum.auto
+
+
+_UNARY_OP_AND_DUNDER = [
+        ("pos", "+{}", _OpClass.ARITHMETIC),
+        ("neg", "-{}", _OpClass.ARITHMETIC),
+        ("abs", "abs({})", _OpClass.ARITHMETIC),
+        ("inv", "~{}", _OpClass.BITWISE),
+        ]
+_BINARY_OP_AND_DUNDER = [
+        ("add", "{} + {}", True, _OpClass.ARITHMETIC),
+        ("sub", "{} - {}", True, _OpClass.ARITHMETIC),
+        ("mul", "{} * {}", True, _OpClass.ARITHMETIC),
+        ("truediv", "{} / {}", True, _OpClass.ARITHMETIC),
+        ("floordiv", "{} // {}", True, _OpClass.ARITHMETIC),
+        ("pow", "{} ** {}", True, _OpClass.ARITHMETIC),
+        ("mod", "{} % {}", True, _OpClass.ARITHMETIC),
+        ("divmod", "divmod({}, {})", True, _OpClass.ARITHMETIC),
+
+        ("and", "{} & {}", True, _OpClass.BITWISE),
+        ("or", "{} | {}", True, _OpClass.BITWISE),
+        ("xor", "{} ^ {}", True, _OpClass.BITWISE),
+
+        ("lshift", "{} << {}", False, _OpClass.SHIFT),
+        ("rshift", "{} >> {}", False, _OpClass.SHIFT),
+
+        ("eq", "{} == {}", False, _OpClass.EQ_COMPARISON),
+        ("ne", "{} != {}", False, _OpClass.EQ_COMPARISON),
+
+        ("lt", "{} < {}", False, _OpClass.REL_COMPARISON),
+        ("gt", "{} > {}", False, _OpClass.REL_COMPARISON),
+        ("le", "{} <= {}", False, _OpClass.REL_COMPARISON),
+        ("ge", "{} >= {}", False, _OpClass.REL_COMPARISON),
+        ]
+
+
+def _format_unary_op_str(op_str, arg1):
+    if isinstance(arg1, tuple):
+        arg1_entry, arg1_container = arg1
+        return (f"{op_str.format(arg1_entry)} "
+                f"for {arg1_entry} in {arg1_container}")
+    else:
+        return op_str.format(arg1)
+
+
+def _format_binary_op_str(op_str, arg1, arg2):
+    if isinstance(arg1, tuple) and isinstance(arg2, tuple):
+        import sys
+        if sys.version_info >= (3, 10):
+            strict_arg = ", strict=__debug__"
+        else:
+            strict_arg = ""
+
+        arg1_entry, arg1_container = arg1
+        arg2_entry, arg2_container = arg2
+        return (f"{op_str.format(arg1_entry, arg2_entry)} "
+                f"for {arg1_entry}, {arg2_entry} "
+                f"in zip({arg1_container}, {arg2_container}{strict_arg})")
+
+    elif isinstance(arg1, tuple):
+        arg1_entry, arg1_container = arg1
+        return (f"{op_str.format(arg1_entry, arg2)} "
+                f"for {arg1_entry} in {arg1_container}")
+
+    elif isinstance(arg2, tuple):
+        arg2_entry, arg2_container = arg2
+        return (f"{op_str.format(arg1, arg2_entry)} "
+                f"for {arg2_entry} in {arg2_container}")
+    else:
+        return op_str.format(arg1, arg2)
+
+
+def with_container_arithmetic(
+        bcast_number=True, bcast_obj_array=None, bcast_numpy_array=False,
+        arithmetic=True, bitwise=False, shift=False,
+        eq_comparison=None, rel_comparison=None):
+    """A class decorator that implements built-in operators for array containers
+    by propagating the operations to the elements of the container.
+
+    :arg bcast_number: If *True*, numbers broadcast over the container
+        (with the container as the 'outer' structure).
+    :arg bcast_obj_array: If *True*, :mod:`numpy` object arrays broadcast over
+        the container.  (with the container as the 'inner' structure)
+    :arg bcast_numpy_array: If *True*, any :class:`numpy.ndarray` will broadcast
+        over the container.  (with the container as the 'inner' structure)
+    :arg arithmetic: Implement the conventional arithmetic operators, including
+        ``**``, :func:`divmod`, and ``//``. Also includes ``+`` and ``-`` as well as
+        :func:`abs`.
+    :arg bitwise: If *True*, implement bitwise and, or, not, and inversion.
+    :arg shift: If *True*, implement bit shifts.
+    :arg eq_comparison: If *True*, implement ``==`` and ``!=``.
+    :arg rel_comparison: If *True*, implement ``<``, ``<=``, ``>``, ``>=``.
+        In that case, if *eq_comparison* is unspecified, it is also set to
+        *True*.
+
+    Each operator class also includes the "reverse" operators if applicable.
+
+    .. note::
+
+        To generate the code implementing the operators, this function relies on
+        class methods ``_deserialize_init_arrays_code`` and
+        ``_serialize_init_arrays_code``. This interface should be considered
+        undocumented and subject to change, however if you are curious, you may look
+        at its implementation in :class:`meshmode.dof_array.DOFArray`. For a simple
+        structure type, the implementation might look like this::
+
+            @classmethod
+            def _serialize_init_arrays_code(cls, instance_name):
+                return {"u": f"{instance_name}.u", "v": f"{instance_name}.v"}
+
+            @classmethod
+            def _deserialize_init_arrays_code(cls, tmpl_instance_name, args):
+                return f"u={args['u']}, v={args['v']}"
+
+    :func:`dataclass_array_container` automatically generates an appropriate
+    implementation of these methods, so :func:`with_container_arithmetic`
+    should nest "outside" :func:dataclass_array_container`.
+    """
+
+    # {{{ handle inputs
+
+    if bcast_obj_array is None:
+        raise TypeError("bcast_obj_array must be specified")
+
+    if rel_comparison is None:
+        raise TypeError("rel_comparison must be specified")
+
+    if rel_comparison and eq_comparison is None:
+        eq_comparison = True
+
+    if eq_comparison is None:
+        raise TypeError("eq_comparison must be specified")
+
+    if not bcast_obj_array and bcast_numpy_array:
+        raise TypeError("bcast_obj_array must be set if bcast_numpy_array is")
+
+    if bcast_numpy_array:
+        def numpy_pred(name):
+            return f"isinstance({name}, np.ndarray)"
+    elif bcast_obj_array:
+        def numpy_pred(name):
+            return f"isinstance({name}, np.ndarray) and {name}.dtype.char == 'O'"
+    else:
+        def numpy_pred(name):
+            return "False"  # optimized away
+
+    desired_op_classes = set()
+    if arithmetic:
+        desired_op_classes.add(_OpClass.ARITHMETIC)
+    if bitwise:
+        desired_op_classes.add(_OpClass.BITWISE)
+    if shift:
+        desired_op_classes.add(_OpClass.SHIFT)
+    if eq_comparison:
+        desired_op_classes.add(_OpClass.EQ_COMPARISON)
+    if rel_comparison:
+        desired_op_classes.add(_OpClass.REL_COMPARISON)
+
+    # }}}
+
+    def wrap(cls):
+        if (not hasattr(cls, "_serialize_init_arrays_code")
+                or not hasattr(cls, "_deserialize_init_arrays_code")):
+            raise TypeError(f"class '{cls.__name__}' must provide serialization "
+                    "code to generate arithmetic operations by implementing "
+                    "'_serialize_init_arrays_code' and "
+                    "'_deserialize_init_arrays_code'. If this is a dataclass, "
+                    "use the 'dataclass_array_container' decorator first.")
+
+        from pytools.codegen import CodeGenerator
+        gen = CodeGenerator()
+        gen("""
+            from numbers import Number
+            import numpy as np
+            from arraycontext import ArrayContainer
+            """)
+        gen("")
+
+        def same_key(k1, k2):
+            assert k1 == k2
+            return k1
+
+        # {{{ unary operators
+
+        for dunder_name, op_str, op_cls in _UNARY_OP_AND_DUNDER:
+            if op_cls not in desired_op_classes:
+                continue
+
+            fname = f"_{cls.__name__.lower()}_{dunder_name}"
+            init_args = cls._deserialize_init_arrays_code("arg1", {
+                    key_arg1: _format_unary_op_str(op_str, expr_arg1)
+                    for key_arg1, expr_arg1 in
+                    cls._serialize_init_arrays_code("arg1").items()
+                    })
+
+            gen(f"""
+                def {fname}(arg1):
+                    return cls({init_args})
+                cls.__{dunder_name}__ = {fname}""")
+            gen("")
+
+        # }}}
+
+        # {{{ binary operators
+
+        for dunder_name, op_str, reversible, op_cls in _BINARY_OP_AND_DUNDER:
+            if op_cls not in desired_op_classes:
+                continue
+
+            # {{{ "forward" binary operators
+
+            fname = f"_{cls.__name__.lower()}_{dunder_name}"
+            zip_init_args = cls._deserialize_init_arrays_code("arg1", {
+                    same_key(key_arg1, key_arg2):
+                    _format_binary_op_str(op_str, expr_arg1, expr_arg2)
+                    for (key_arg1, expr_arg1), (key_arg2, expr_arg2) in zip(
+                        cls._serialize_init_arrays_code("arg1").items(),
+                        cls._serialize_init_arrays_code("arg2").items())
+                    })
+            bcast_init_args = cls._deserialize_init_arrays_code("arg1", {
+                    key_arg1: _format_binary_op_str(op_str, expr_arg1, "arg2")
+                    for key_arg1, expr_arg1 in
+                    cls._serialize_init_arrays_code("arg1").items()
+                    })
+
+            gen(f"""
+                def {fname}(arg1, arg2):
+                    if arg2.__class__ is cls:
+                        return cls({zip_init_args})
+                    if {bcast_number}:  # optimized away
+                        if isinstance(arg2, Number):
+                            return cls({bcast_init_args})
+                    if {numpy_pred("arg2")}:
+                        result = np.empty_like(arg2, dtype=object)
+                        for i in np.ndindex(arg2.shape):
+                            result[i] = {op_str.format("arg1", "arg2[i]")}
+                        return result
+                    return NotImplemented
+                cls.__{dunder_name}__ = {fname}""")
+            gen("")
+
+            # }}}
+
+            # {{{ "reverse" binary operators
+
+            if reversible:
+                fname = f"_{cls.__name__.lower()}_r{dunder_name}"
+                bcast_init_args = cls._deserialize_init_arrays_code("arg2", {
+                        key_arg2: _format_binary_op_str(
+                            op_str, "arg1", expr_arg2)
+                        for key_arg2, expr_arg2 in
+                        cls._serialize_init_arrays_code("arg2").items()
+                        })
+                gen(f"""
+                    def {fname}(arg2, arg1):
+                        # assert other.__cls__ is not cls
+
+                        if {bcast_number}:  # optimized away
+                            if isinstance(arg1, Number):
+                                return cls({bcast_init_args})
+                        if {numpy_pred("arg1")}:
+                            result = np.empty_like(arg1, dtype=object)
+                            for i in np.ndindex(arg1.shape):
+                                result[i] = {op_str.format("arg1[i]", "arg2")}
+                            return result
+                        return NotImplemented
+
+                    cls.__r{dunder_name}__ = {fname}""")
+                gen("")
+
+            # }}}
+
+        # }}}
+
+        # This will evaluate the module, which is all we need.
+        code = gen.get().rstrip()+"\n"
+        result_dict = {"_MODULE_SOURCE_CODE": code, "cls": cls}
+        exec(compile(code, "<generated code>", "exec"), result_dict)
+
+        return cls
+
+    # we're being called as @with_container_arithmetic(...), with parens
+    return wrap
+
+# }}}
+
+
+# vim: foldmethod=marker
diff --git a/arraycontext/container/dataclass.py b/arraycontext/container/dataclass.py
new file mode 100644
index 0000000..f30f3e4
--- /dev/null
+++ b/arraycontext/container/dataclass.py
@@ -0,0 +1,114 @@
+"""
+.. currentmodule:: arraycontext
+.. autofunction:: dataclass_array_container
+"""
+
+
+__copyright__ = """
+Copyright (C) 2020-1 University of Illinois Board of Trustees
+"""
+
+__license__ = """
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+"""
+
+
+from dataclasses import fields
+from arraycontext.container import is_array_container_type
+
+
+# {{{ dataclass containers
+
+def dataclass_array_container(cls):
+    """A class decorator that makes the class to which it is applied a
+    :class:`ArrayContainer` by registering appropriate implementations of
+    :func:`serialize_container` and :func:`deserialize_container`.
+    *cls* must be a :func:`~dataclasses.dataclass`.
+
+    Attributes that are not array containers are allowed. In order to decide
+    whether an attribute is an array container, the declared attribute type
+    is checked by the criteria from :func:`is_array_container`.
+    """
+    from dataclasses import is_dataclass
+    assert is_dataclass(cls)
+
+    array_fields = [
+            f for f in fields(cls) if is_array_container_type(f.type)]
+    non_array_fields = [
+            f for f in fields(cls) if not is_array_container_type(f.type)]
+
+    if not array_fields:
+        raise ValueError(f"'{cls}' must have fields with array container type "
+                "in order to use the 'dataclass_array_container' decorator")
+
+    serialize_expr = ", ".join(
+            f"({f.name!r}, ary.{f.name})" for f in array_fields)
+    template_kwargs = ", ".join(
+            f"{f.name}=template.{f.name}" for f in non_array_fields)
+
+    lower_cls_name = cls.__name__.lower()
+
+    serialize_init_code = ", ".join(f"{f.name!r}: f'{{instance_name}}.{f.name}'"
+            for f in array_fields)
+    deserialize_init_code = ", ".join([
+            f"{f.name}={{args[{f.name!r}]}}" for f in array_fields
+            ] + [
+            f"{f.name}={{template_instance_name}}.{f.name}"
+            for f in non_array_fields
+            ])
+
+    from pytools.codegen import remove_common_indentation
+    serialize_code = remove_common_indentation(f"""
+        from typing import Any, Iterable, Tuple
+        from arraycontext import serialize_container, deserialize_container
+
+        @serialize_container.register(cls)
+        def _serialize_{lower_cls_name}(ary: cls):
+            return ({serialize_expr},)
+
+        @deserialize_container.register(cls)
+        def _deserialize_{lower_cls_name}(
+                template: cls, iterable: Iterable[Tuple[Any, Any]]) -> cls:
+            return cls(**dict(iterable), {template_kwargs})
+
+        # support for with_container_arithmetic
+
+        def _serialize_init_arrays_code_{lower_cls_name}(cls, instance_name):
+            return {{
+                {serialize_init_code}
+                }}
+        cls._serialize_init_arrays_code = classmethod(
+            _serialize_init_arrays_code_{lower_cls_name})
+
+        def _deserialize_init_arrays_code_{lower_cls_name}(
+                cls, template_instance_name, args):
+            return f"{deserialize_init_code}"
+
+        cls._deserialize_init_arrays_code = classmethod(
+            _deserialize_init_arrays_code_{lower_cls_name})
+        """)
+
+    exec_dict = {"cls": cls, "_MODULE_SOURCE_CODE": serialize_code}
+    exec(compile(serialize_code, "<generated code>", "exec"), exec_dict)
+
+    return cls
+
+# }}}
+
+# vim: foldmethod=marker
diff --git a/arraycontext/container/traversal.py b/arraycontext/container/traversal.py
new file mode 100644
index 0000000..ab8a4ff
--- /dev/null
+++ b/arraycontext/container/traversal.py
@@ -0,0 +1,268 @@
+"""
+.. currentmodule:: arraycontext
+
+.. autofunction:: map_array_container
+.. autofunction:: multimap_array_container
+.. autofunction:: rec_map_array_container
+.. autofunction:: rec_multimap_array_container
+
+Traversing decorators
+~~~~~~~~~~~~~~~~~~~~~
+.. autofunction:: mapped_over_array_containers
+.. autofunction:: multimapped_over_array_containers
+
+Freezing and thawing
+~~~~~~~~~~~~~~~~~~~~
+.. autofunction:: freeze
+.. autofunction:: thaw
+"""
+
+__copyright__ = """
+Copyright (C) 2020-1 University of Illinois Board of Trustees
+"""
+
+__license__ = """
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+"""
+
+from typing import Any, Callable
+from functools import update_wrapper, partial, singledispatch
+from arraycontext.container import (is_array_container,
+        serialize_container, deserialize_container)
+
+
+# {{{ array container traversal
+
+def _map_array_container_impl(f, ary, *, leaf_cls=None, recursive=False):
+    """Helper for :func:`rec_map_array_container`.
+
+    :param leaf_cls: class on which we call *f* directly. This is mostly
+        useful in the recursive setting, where it can stop the recursion on
+        specific container classes. By default, the recursion is stopped when
+        a non-:class:`ArrayContainer` class is encountered.
+    """
+    def rec(_ary):
+        if type(_ary) is leaf_cls:  # type(ary) is never None
+            return f(_ary)
+        elif is_array_container(_ary):
+            return deserialize_container(_ary, [
+                    (key, frec(subary)) for key, subary in serialize_container(_ary)
+                    ])
+        else:
+            return f(_ary)
+
+    frec = rec if recursive else f
+    return rec(ary)
+
+
+def _multimap_array_container_impl(f, *args, leaf_cls=None, recursive=False):
+    """Helper for :func:`rec_multimap_array_container`.
+
+    :param leaf_cls: class on which we call *f* directly. This is mostly
+        useful in the recursive setting, where it can stop the recursion on
+        specific container classes. By default, the recursion is stopped when
+        a non-:class:`ArrayContainer` class is encountered.
+    """
+    def rec(*_args):
+        template_ary = _args[container_indices[0]]
+        assert all(
+                type(_args[i]) is type(template_ary) for i in container_indices[1:]
+                ), "expected type '{type(template_ary).__name__}'"
+
+        if (type(template_ary) is leaf_cls
+                or not is_array_container(template_ary)):
+            return f(*_args)
+
+        result = []
+        new_args = list(_args)
+
+        for subarys in zip(*[
+                serialize_container(_args[i]) for i in container_indices
+                ]):
+            key = None
+            for i, (subkey, subary) in zip(container_indices, subarys):
+                if key is None:
+                    key = subkey
+                else:
+                    assert key == subkey
+
+                new_args[i] = subary
+
+            result.append((key, frec(*new_args)))
+
+        return deserialize_container(template_ary, result)
+
+    container_indices = [
+            i for i, arg in enumerate(args)
+            if is_array_container(arg) and type(arg) is not leaf_cls]
+
+    if not container_indices:
+        return f(*args)
+
+    if len(container_indices) == 1:
+        # NOTE: if we just have one ArrayContainer in args, passing it through
+        # _map_array_container_impl should be faster
+        def wrapper(ary):
+            new_args = list(args)
+            new_args[container_indices[0]] = ary
+            return f(*new_args)
+
+        update_wrapper(wrapper, f)
+        return _map_array_container_impl(
+                wrapper, args[container_indices[0]],
+                leaf_cls=leaf_cls, recursive=recursive)
+
+    frec = rec if recursive else f
+    return rec(*args)
+
+
+def map_array_container(f: Callable[[Any], Any], ary):
+    r"""Applies *f* to all components of an :class:`ArrayContainer`.
+
+    Works similarly to :func:`~pytools.obj_array.obj_array_vectorize`, but
+    on arbitrary containers.
+
+    For a recursive version, see :func:`rec_map_array_container`.
+
+    :param ary: a (potentially nested) structure of :class:`ArrayContainer`\ s,
+        or an instance of a base array type.
+    """
+    if is_array_container(ary):
+        return deserialize_container(ary, [
+                (key, f(subary)) for key, subary in serialize_container(ary)
+                ])
+    else:
+        return f(ary)
+
+
+def multimap_array_container(f: Callable[[Any], Any], *args):
+    r"""Applies *f* to the components of multiple :class:`ArrayContainer`\ s.
+
+    Works similarly to :func:`~pytools.obj_array.obj_array_vectorize_n_args`,
+    but on arbitrary containers. The containers must all have the same type,
+    which will also be the return type.
+
+    For a recursive version, see :func:`rec_multimap_array_container`.
+
+    :param args: all :class:`ArrayContainer` arguments must be of the same
+        type and with the same structure (same number of components, etc.).
+    """
+    return _multimap_array_container_impl(f, *args, recursive=False)
+
+
+def rec_map_array_container(f: Callable[[Any], Any], ary):
+    r"""Applies *f* recursively to an :class:`ArrayContainer`.
+
+    For a non-recursive version see :func:`map_array_container`.
+
+    :param ary: a (potentially nested) structure of :class:`ArrayContainer`\ s,
+        or an instance of a base array type.
+    """
+    return _map_array_container_impl(f, ary, recursive=True)
+
+
+def mapped_over_array_containers(f: Callable[[Any], Any]):
+    """Decorator around :func:`rec_map_array_container`."""
+    wrapper = partial(rec_map_array_container, f)
+    update_wrapper(wrapper, f)
+    return wrapper
+
+
+def rec_multimap_array_container(f: Callable[[Any], Any], *args):
+    r"""Applies *f* recursively to multiple :class:`ArrayContainer`\ s.
+
+    For a non-recursive version see :func:`multimap_array_container`.
+
+    :param args: all :class:`ArrayContainer` arguments must be of the same
+        type and with the same structure (same number of components, etc.).
+    """
+    return _multimap_array_container_impl(f, *args, recursive=True)
+
+
+def multimapped_over_array_containers(f: Callable[[Any], Any]):
+    """Decorator around :func:`rec_multimap_array_container`."""
+    # can't use functools.partial, because its result is insufficiently
+    # function-y to be used as a method definition.
+    def wrapper(*args):
+        return rec_multimap_array_container(f, *args)
+
+    update_wrapper(wrapper, f)
+    return wrapper
+
+# }}}
+
+
+# {{{ freeze/thaw
+
+@singledispatch
+def freeze(ary, actx=None):
+    r"""Freezes recursively by going through all components of the
+    :class:`ArrayContainer` *ary*.
+
+    :param ary: a :meth:`~ArrayContext.thaw`\ ed :class:`ArrayContainer`.
+
+    Array container types may use :func:`functools.singledispatch` ``.register`` to
+    register additional implementations.
+
+    See :meth:`ArrayContext.thaw`.
+    """
+    if is_array_container(ary):
+        return map_array_container(partial(freeze, actx=actx), ary)
+    else:
+        if actx is None:
+            raise TypeError(
+                    f"cannot freeze arrays of type {type(ary).__name__} "
+                    "when actx is not supplied. Try calling actx.freeze "
+                    "directly or supplying an array context")
+        else:
+            return actx.freeze(ary)
+
+
+@singledispatch
+def thaw(ary, actx):
+    r"""Thaws recursively by going through all components of the
+    :class:`ArrayContainer` *ary*.
+
+    :param ary: a :meth:`~ArrayContext.freeze`\ ed :class:`ArrayContainer`.
+
+    Array container types may use :func:`functools.singledispatch` ``.register``
+    to register additional implementations.
+
+    See :meth:`ArrayContext.thaw`.
+
+    Serves as the registration point (using :func:`~functools.singledispatch`
+    ``.register`` to register additional implementations for :func:`thaw`.
+
+    .. note::
+
+        This function has the reverse argument order from the original function
+        in :mod:`meshmode`. This was necessary because
+        :func:`~functools.singledispatch` only dispatches on the first argument.
+    """
+    if is_array_container(ary):
+        return deserialize_container(ary, [
+            (key, thaw(subary, actx))
+            for key, subary in serialize_container(ary)
+            ])
+    else:
+        return actx.thaw(ary)
+
+# }}}
+
+# vim: foldmethod=marker
diff --git a/arraycontext/context.py b/arraycontext/context.py
new file mode 100644
index 0000000..f9a3bd1
--- /dev/null
+++ b/arraycontext/context.py
@@ -0,0 +1,264 @@
+"""
+An array context is an abstraction that helps you dispatch between multiple
+implementations of :mod:`numpy`-like :math:`n`-dimensional arrays.
+
+.. currentmodule:: arraycontext
+.. autoclass:: ArrayContext
+"""
+
+
+__copyright__ = """
+Copyright (C) 2020-1 University of Illinois Board of Trustees
+"""
+
+__license__ = """
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+"""
+
+from typing import Sequence, Union
+from abc import ABC, abstractmethod
+
+import numpy as np
+from pytools import memoize_method
+from pytools.tag import Tag
+
+
+# {{{ ArrayContext
+
+class ArrayContext(ABC):
+    r"""
+    :canonical: arraycontext.ArrayContext
+
+    An interface that allows software implementing a numerical algorithm
+    (such as :class:`~meshmode.discretization.Discretization`) to create and interact
+    with arrays without knowing their types.
+
+    .. versionadded:: 2020.2
+
+    .. automethod:: empty
+    .. automethod:: zeros
+    .. automethod:: empty_like
+    .. automethod:: zeros_like
+    .. automethod:: from_numpy
+    .. automethod:: to_numpy
+    .. automethod:: call_loopy
+    .. automethod:: einsum
+    .. attribute:: np
+
+         Provides access to a namespace that serves as a work-alike to
+         :mod:`numpy`.  The actual level of functionality provided is up to the
+         individual array context implementation, however the functions and
+         objects available under this namespace must not behave differently
+         from :mod:`numpy`.
+
+         As a baseline, special functions available through :mod:`loopy`
+         (e.g. ``sin``, ``exp``) are accessible through this interface.
+
+         Callables accessible through this namespace vectorize over object
+         arrays, including :class:`arraycontext.ArrayContainer`\ s.
+
+    .. automethod:: freeze
+    .. automethod:: thaw
+    .. automethod:: tag
+    .. automethod:: tag_axis
+    """
+
+    def __init__(self):
+        self.np = self._get_fake_numpy_namespace()
+
+    def _get_fake_numpy_namespace(self):
+        from .fake_numpy import BaseFakeNumpyNamespace
+        return BaseFakeNumpyNamespace(self)
+
+    @abstractmethod
+    def empty(self, shape, dtype):
+        pass
+
+    @abstractmethod
+    def zeros(self, shape, dtype):
+        pass
+
+    def empty_like(self, ary):
+        return self.empty(shape=ary.shape, dtype=ary.dtype)
+
+    def zeros_like(self, ary):
+        return self.zeros(shape=ary.shape, dtype=ary.dtype)
+
+    @abstractmethod
+    def from_numpy(self, array: np.ndarray):
+        r"""
+        :returns: the :class:`numpy.ndarray` *array* converted to the
+            array context's array type. The returned array will be
+            :meth:`thaw`\ ed.
+        """
+        pass
+
+    @abstractmethod
+    def to_numpy(self, array):
+        r"""
+        :returns: *array*, an array recognized by the context, converted
+            to a :class:`numpy.ndarray`. *array* must be
+            :meth:`thaw`\ ed.
+        """
+        pass
+
+    def call_loopy(self, program, **kwargs):
+        """Execute the :mod:`loopy` program *program* on the arguments
+        *kwargs*.
+
+        *program* is a :class:`loopy.LoopKernel` or :class:`loopy.LoopKernel`.
+        It is expected to not yet be transformed for execution speed.
+        It must have :attr:`loopy.Options.return_dict` set.
+
+        :return: a :class:`dict` of outputs from the program, each an
+            array understood by the context.
+        """
+
+    @memoize_method
+    def _get_scalar_func_loopy_program(self, c_name, nargs, naxes):
+        from pymbolic import var
+
+        var_names = ["i%d" % i for i in range(naxes)]
+        size_names = ["n%d" % i for i in range(naxes)]
+        subscript = tuple(var(vname) for vname in var_names)
+        from islpy import make_zero_and_vars
+        v = make_zero_and_vars(var_names, params=size_names)
+        domain = v[0].domain()
+        for vname, sname in zip(var_names, size_names):
+            domain = domain & v[0].le_set(v[vname]) & v[vname].lt_set(v[sname])
+
+        domain_bset, = domain.get_basic_sets()
+
+        import loopy as lp
+        from .loopy import make_loopy_program
+        return make_loopy_program(
+                [domain_bset],
+                [
+                    lp.Assignment(
+                        var("out")[subscript],
+                        var(c_name)(*[
+                            var("inp%d" % i)[subscript] for i in range(nargs)]))
+                    ],
+                name="actx_special_%s" % c_name)
+
+    @abstractmethod
+    def freeze(self, array):
+        """Return a version of the context-defined array *array* that is
+        'frozen', i.e. suitable for long-term storage and reuse. Frozen arrays
+        do not support arithmetic. For example, in the context of
+        :class:`~pyopencl.array.Array`, this might mean stripping the array
+        of an associated command queue, whereas in a lazily-evaluated context,
+        it might mean that the array is evaluated and stored.
+
+        Freezing makes the array independent of this :class:`ArrayContext`;
+        it is permitted to :meth:`thaw` it in a different one, as long as that
+        context understands the array format.
+
+        See also :func:`arraycontext.freeze`.
+        """
+
+    @abstractmethod
+    def thaw(self, array):
+        """Take a 'frozen' array and return a new array representing the data in
+        *array* that is able to perform arithmetic and other operations, using
+        the execution resources of this context. In the context of
+        :class:`~pyopencl.array.Array`, this might mean that the array is
+        equipped with a command queue, whereas in a lazily-evaluated context,
+        it might mean that the returned array is a symbol bound to
+        the data in *array*.
+
+        The returned array may not be used with other contexts while thawed.
+
+        See also :func:`arraycontext.thaw`.
+        """
+
+    @abstractmethod
+    def tag(self, tags: Union[Sequence[Tag], Tag], array):
+        """If the array type used by the array context is capable of capturing
+        metadata, return a version of *array* with the *tags* applied. *array*
+        itself is not modified.
+
+        .. versionadded:: 2021.2
+        """
+
+    @abstractmethod
+    def tag_axis(self, iaxis, tags: Union[Sequence[Tag], Tag], array):
+        """If the array type used by the array context is capable of capturing
+        metadata, return a version of *array* in which axis number *iaxis* has
+        the *tags* applied. *array* itself is not modified.
+
+        .. versionadded:: 2021.2
+        """
+
+    @memoize_method
+    def _get_einsum_prg(self, spec, arg_names, tagged):
+        import loopy as lp
+        from .loopy import _DEFAULT_LOOPY_OPTIONS
+        from loopy.version import MOST_RECENT_LANGUAGE_VERSION
+        return lp.make_einsum(
+            spec,
+            arg_names,
+            options=_DEFAULT_LOOPY_OPTIONS,
+            lang_version=MOST_RECENT_LANGUAGE_VERSION,
+            tags=tagged,
+        )
+
+    # This lives here rather than in .np because the interface does not
+    # agree with numpy's all that well. Why can't it, you ask?
+    # Well, optimizing generic einsum for OpenCL/GPU execution
+    # is actually difficult, even in eager mode, and so without added
+    # metadata describing what's happening, transform_loopy_program
+    # has a very difficult (hopeless?) job to do.
+    #
+    # Unfortunately, the existing metadata support (cf. .tag()) cannot
+    # help with eager mode execution [1], because, by definition, when the
+    # result is passed to .tag(), it is already computed.
+    # That's why einsum's interface here needs to be cluttered with
+    # metadata, and that's why it can't live under .np.
+    # [1] https://github.com/inducer/meshmode/issues/177
+    def einsum(self, spec, *args, arg_names=None, tagged=()):
+        """Computes the result of Einstein summation following the
+        convention in :func:`numpy.einsum`.
+
+        :arg spec: a string denoting the subscripts for
+            summation as a comma-separated list of subscript labels.
+            This follows the usual :func:`numpy.einsum` convention.
+            Note that the explicit indicator `->` for the precise output
+            form is required.
+        :arg args: a sequence of array-like operands, whose order matches
+            the subscript labels provided by *spec*.
+        :arg arg_names: an optional iterable of string types denoting
+            the names of the *args*. If *None*, default names will be
+            generated.
+        :arg tagged: an optional sequence of :class:`pytools.tag.Tag`
+            objects specifying the tags to be applied to the operation.
+
+        :return: the output of the einsum :mod:`loopy` program
+        """
+        if arg_names is None:
+            arg_names = tuple("arg%d" % i for i in range(len(args)))
+
+        prg = self._get_einsum_prg(spec, arg_names, tagged)
+        return self.call_loopy(
+            prg, **{arg_names[i]: arg for i, arg in enumerate(args)}
+        )["out"]
+
+# }}}
+
+# vim: foldmethod=marker
diff --git a/arraycontext/fake_numpy.py b/arraycontext/fake_numpy.py
new file mode 100644
index 0000000..6b0163d
--- /dev/null
+++ b/arraycontext/fake_numpy.py
@@ -0,0 +1,180 @@
+__copyright__ = """
+Copyright (C) 2020-1 University of Illinois Board of Trustees
+"""
+
+__license__ = """
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+"""
+
+
+import numpy as np
+from arraycontext.container import is_array_container
+from arraycontext.container.traversal import (
+        rec_map_array_container, multimapped_over_array_containers)
+
+
+# {{{ BaseFakeNumpyNamespace
+
+class BaseFakeNumpyNamespace:
+    def __init__(self, array_context):
+        self._array_context = array_context
+        self.linalg = self._get_fake_numpy_linalg_namespace()
+
+    def _get_fake_numpy_linalg_namespace(self):
+        return BaseFakeNumpyLinalgNamespace(self.array_context)
+
+    _numpy_math_functions = frozenset({
+        # https://numpy.org/doc/stable/reference/routines.math.html
+
+        # FIXME: Heads up: not all of these are supported yet.
+        # But I felt it was important to only dispatch actually existing
+        # numpy functions to loopy.
+
+        # Trigonometric functions
+        "sin", "cos", "tan", "arcsin", "arccos", "arctan", "hypot", "arctan2",
+        "degrees", "radians", "unwrap", "deg2rad", "rad2deg",
+
+        # Hyperbolic functions
+        "sinh", "cosh", "tanh", "arcsinh", "arccosh", "arctanh",
+
+        # Rounding
+        "around", "round_", "rint", "fix", "floor", "ceil", "trunc",
+
+        # Sums, products, differences
+
+        # FIXME: Many of These are reductions or scans.
+        # "prod", "sum", "nanprod", "nansum", "cumprod", "cumsum", "nancumprod",
+        # "nancumsum", "diff", "ediff1d", "gradient", "cross", "trapz",
+
+        # Exponents and logarithms
+        "exp", "expm1", "exp2", "log", "log10", "log2", "log1p", "logaddexp",
+        "logaddexp2",
+
+        # Other special functions
+        "i0", "sinc",
+
+        # Floating point routines
+        "signbit", "copysign", "frexp", "ldexp", "nextafter", "spacing",
+        # Rational routines
+        "lcm", "gcd",
+
+        # Arithmetic operations
+        "add", "reciprocal", "positive", "negative", "multiply", "divide", "power",
+        "subtract", "true_divide", "floor_divide", "float_power", "fmod", "mod",
+        "modf", "remainder", "divmod",
+
+        # Handling complex numbers
+        "angle", "real", "imag",
+        # Implemented below:
+        # "conj", "conjugate",
+
+        # Miscellaneous
+        "convolve", "clip", "sqrt", "cbrt", "square", "absolute", "abs", "fabs",
+        "sign", "heaviside", "maximum", "fmax", "nan_to_num",
+
+        # FIXME:
+        # "interp",
+
+        })
+
+    _numpy_to_c_arc_functions = {
+            "arcsin": "asin",
+            "arccos": "acos",
+            "arctan": "atan",
+            "arctan2": "atan2",
+
+            "arcsinh": "asinh",
+            "arccosh": "acosh",
+            "arctanh": "atanh",
+            }
+
+    _c_to_numpy_arc_functions = {c_name: numpy_name
+            for numpy_name, c_name in _numpy_to_c_arc_functions.items()}
+
+    def __getattr__(self, name):
+        def loopy_implemented_elwise_func(*args):
+            actx = self._array_context
+            # FIXME: Maybe involve loopy type inference?
+            result = actx.empty(args[0].shape, args[0].dtype)
+            prg = actx._get_scalar_func_loopy_program(
+                    c_name, nargs=len(args), naxes=len(args[0].shape))
+            actx.call_loopy(prg, out=result,
+                    **{"inp%d" % i: arg for i, arg in enumerate(args)})
+            return result
+
+        if name in self._c_to_numpy_arc_functions:
+            from warnings import warn
+            warn(f"'{name}' in ArrayContext.np is deprecated. "
+                    "Use '{c_to_numpy_arc_functions[name]}' as in numpy. "
+                    "The old name will stop working in 2021.",
+                    DeprecationWarning, stacklevel=3)
+
+        # normalize to C names anyway
+        c_name = self._numpy_to_c_arc_functions.get(name, name)
+
+        # limit which functions we try to hand off to loopy
+        if name in self._numpy_math_functions:
+            return multimapped_over_array_containers(loopy_implemented_elwise_func)
+        else:
+            raise AttributeError(name)
+
+    def _new_like(self, ary, alloc_like):
+        from numbers import Number
+
+        if isinstance(ary, np.ndarray) and ary.dtype.char == "O":
+            # NOTE: we don't want to match numpy semantics on object arrays,
+            # e.g. `np.zeros_like(x)` returns `array([0, 0, ...], dtype=object)`
+            # FIXME: what about object arrays nested in an ArrayContainer?
+            raise NotImplementedError("operation not implemented for object arrays")
+        elif is_array_container(ary):
+            return rec_map_array_container(alloc_like, ary)
+        elif isinstance(ary, Number):
+            # NOTE: `np.zeros_like(x)` returns `array(x, shape=())`, which
+            # is best implemented by concrete array contexts, if at all
+            raise NotImplementedError("operation not implemented for scalars")
+        else:
+            return alloc_like(ary)
+
+    def empty_like(self, ary):
+        return self._new_like(ary, self._array_context.empty_like)
+
+    def zeros_like(self, ary):
+        return self._new_like(ary, self._array_context.zeros_like)
+
+    def conjugate(self, x):
+        # NOTE: conjugate distributes over object arrays, but it looks for a
+        # `conjugate` ufunc, while some implementations only have the shorter
+        # `conj` (e.g. cl.array.Array), so this should work for everybody.
+        return rec_map_array_container(lambda obj: obj.conj(), x)
+
+    conj = conjugate
+
+# }}}
+
+
+# {{{ BaseFakeNumpyLinalgNamespace
+
+class BaseFakeNumpyLinalgNamespace:
+    def __init__(self, array_context):
+        self._array_context = array_context
+
+# }}}
+
+
+# vim: foldmethod=marker
diff --git a/arraycontext/impl/pyopencl.py b/arraycontext/impl/pyopencl.py
new file mode 100644
index 0000000..80ea558
--- /dev/null
+++ b/arraycontext/impl/pyopencl.py
@@ -0,0 +1,381 @@
+"""
+.. currentmodule:: arraycontext
+.. autoclass:: PyOpenCLArrayContext
+"""
+__copyright__ = """
+Copyright (C) 2020-1 University of Illinois Board of Trustees
+"""
+
+__license__ = """
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+"""
+
+from typing import Sequence, Union
+from functools import partial
+import operator
+
+import numpy as np
+
+from pytools import memoize_method
+from pytools.tag import Tag
+
+from arraycontext.metadata import FirstAxisIsElementsTag
+from arraycontext.fake_numpy import \
+        BaseFakeNumpyNamespace, BaseFakeNumpyLinalgNamespace
+from arraycontext.container.traversal import rec_multimap_array_container
+from arraycontext.container import serialize_container, is_array_container
+from arraycontext.context import ArrayContext
+
+
+# {{{ fake numpy
+
+class PyOpenCLFakeNumpyNamespace(BaseFakeNumpyNamespace):
+    def _get_fake_numpy_linalg_namespace(self):
+        return _PyOpenCLFakeNumpyLinalgNamespace(self._array_context)
+
+    # {{{ comparisons
+
+    # FIXME: This should be documentation, not a comment.
+    # These are here mainly because some arrays may choose to interpret
+    # equality comparison as a binary predicate of structural identity,
+    # i.e. more like "are you two equal", and not like numpy semantics.
+    # These operations provide access to numpy-style comparisons in that
+    # case.
+
+    def equal(self, x, y):
+        return rec_multimap_array_container(operator.eq, x, y)
+
+    def not_equal(self, x, y):
+        return rec_multimap_array_container(operator.ne, x, y)
+
+    def greater(self, x, y):
+        return rec_multimap_array_container(operator.gt, x, y)
+
+    def greater_equal(self, x, y):
+        return rec_multimap_array_container(operator.ge, x, y)
+
+    def less(self, x, y):
+        return rec_multimap_array_container(operator.lt, x, y)
+
+    def less_equal(self, x, y):
+        return rec_multimap_array_container(operator.le, x, y)
+
+    # }}}
+
+    def ones_like(self, ary):
+        def _ones_like(subary):
+            ones = self._array_context.empty_like(subary)
+            ones.fill(1)
+            return ones
+
+        return self._new_like(ary, _ones_like)
+
+    def maximum(self, x, y):
+        import pyopencl.array as cl_array
+        return rec_multimap_array_container(
+                partial(cl_array.maximum, queue=self._array_context.queue),
+                x, y)
+
+    def minimum(self, x, y):
+        import pyopencl.array as cl_array
+        return rec_multimap_array_container(
+                partial(cl_array.minimum, queue=self._array_context.queue),
+                x, y)
+
+    def where(self, criterion, then, else_):
+        import pyopencl.array as cl_array
+
+        def where_inner(inner_crit, inner_then, inner_else):
+            if isinstance(inner_crit, bool):
+                return inner_then if inner_crit else inner_else
+            return cl_array.if_positive(inner_crit != 0, inner_then, inner_else,
+                    queue=self._array_context.queue)
+
+        return rec_multimap_array_container(where_inner, criterion, then, else_)
+
+    def sum(self, a, dtype=None):
+        import pyopencl.array as cl_array
+        return cl_array.sum(
+                a, dtype=dtype, queue=self._array_context.queue).get()[()]
+
+    def min(self, a):
+        import pyopencl.array as cl_array
+        return cl_array.min(a, queue=self._array_context.queue).get()[()]
+
+    def max(self, a):
+        import pyopencl.array as cl_array
+        return cl_array.max(a, queue=self._array_context.queue).get()[()]
+
+    def stack(self, arrays, axis=0):
+        import pyopencl.array as cla
+        return rec_multimap_array_container(
+                lambda *args: cla.stack(arrays=args, axis=axis,
+                    queue=self._array_context.queue),
+                *arrays)
+
+# }}}
+
+
+# {{{ fake np.linalg
+
+def _flatten_array(ary):
+    import pyopencl.array as cl
+    assert isinstance(ary, cl.Array)
+
+    if ary.size == 0:
+        # Work around https://github.com/inducer/pyopencl/pull/402
+        return ary._new_with_changes(
+                data=None, offset=0, shape=(0,), strides=(ary.dtype.itemsize,))
+    if ary.flags.f_contiguous:
+        return ary.reshape(-1, order="F")
+    elif ary.flags.c_contiguous:
+        return ary.reshape(-1, order="C")
+    else:
+        raise ValueError("cannot flatten array "
+                f"with strides {ary.strides} of {ary.dtype}")
+
+
+class _PyOpenCLFakeNumpyLinalgNamespace(BaseFakeNumpyLinalgNamespace):
+    def norm(self, ary, ord=None):
+        from numbers import Number
+        if isinstance(ary, Number):
+            return abs(ary)
+
+        if ord is None:
+            ord = 2
+
+        try:
+            from meshmode.dof_array import DOFArray
+        except ImportError:
+            pass
+        else:
+            if isinstance(ary, DOFArray):
+                from warnings import warn
+                warn("Taking an actx.np.linalg.norm of a DOFArray is deprecated. "
+                        "(DOFArrays use 2D arrays internally, and "
+                        "actx.np.linalg.norm should compute matrix norms of those.) "
+                        "This will stop working in 2022. "
+                        "Use meshmode.dof_array.flat_norm instead.",
+                        DeprecationWarning, stacklevel=2)
+
+                import numpy.linalg as la
+                return la.norm(
+                        [self.norm(_flatten_array(subary), ord=ord)
+                            for _, subary in serialize_container(ary)],
+                        ord=ord)
+
+        if is_array_container(ary):
+            import numpy.linalg as la
+            return la.norm(
+                    [self.norm(subary, ord=ord)
+                        for _, subary in serialize_container(ary)],
+                    ord=ord)
+
+        if len(ary.shape) != 1:
+            raise NotImplementedError("only vector norms are implemented")
+
+        if ary.size == 0:
+            return 0
+
+        if ord == np.inf:
+            return self._array_context.np.max(abs(ary))
+        elif isinstance(ord, Number) and ord > 0:
+            return self._array_context.np.sum(abs(ary)**ord)**(1/ord)
+        else:
+            raise NotImplementedError(f"unsupported value of 'ord': {ord}")
+
+# }}}
+
+
+# {{{ PyOpenCLArrayContext
+
+class PyOpenCLArrayContext(ArrayContext):
+    """
+    A :class:`ArrayContext` that uses :class:`pyopencl.array.Array` instances
+    for its base array class.
+
+    .. attribute:: context
+
+        A :class:`pyopencl.Context`.
+
+    .. attribute:: queue
+
+        A :class:`pyopencl.CommandQueue`.
+
+    .. attribute:: allocator
+
+        A PyOpenCL memory allocator. Can also be `None` (default) or `False` to
+        use the default allocator. Please note that running with the default
+        allocator allocates and deallocates OpenCL buffers directly. If lots
+        of arrays are created (e.g. as results of computation), the associated cost
+        may become significant. Using e.g. :class:`pyopencl.tools.MemoryPool`
+        as the allocator can help avoid this cost.
+    """
+
+    def __init__(self, queue, allocator=None, wait_event_queue_length=None):
+        r"""
+        :arg wait_event_queue_length: The length of a queue of
+            :class:`~pyopencl.Event` objects that are maintained by the
+            array context, on a per-kernel-name basis. The events returned
+            from kernel execution are appended to the queue, and Once the
+            length of the queue exceeds *wait_event_queue_length*, the
+            first event in the queue :meth:`pyopencl.Event.wait`\ ed on.
+
+            *wait_event_queue_length* may be set to *False* to disable this feature.
+
+            The use of *wait_event_queue_length* helps avoid enqueuing
+            large amounts of work (and, potentially, allocating large amounts
+            of memory) far ahead of the actual OpenCL execution front,
+            by limiting the number of each type (name, really) of kernel
+            that may reside unexecuted in the queue at one time.
+
+        .. note::
+
+            For now, *wait_event_queue_length* should be regarded as an
+            experimental feature that may change or disappear at any minute.
+        """
+        super().__init__()
+        self.context = queue.context
+        self.queue = queue
+        self.allocator = allocator if allocator else None
+
+        if wait_event_queue_length is None:
+            wait_event_queue_length = 10
+
+        self._wait_event_queue_length = wait_event_queue_length
+        self._kernel_name_to_wait_event_queue = {}
+
+        import pyopencl as cl
+        if allocator is None and queue.device.type & cl.device_type.GPU:
+            from warnings import warn
+            warn("PyOpenCLArrayContext created without an allocator on a GPU. "
+                 "This can lead to high numbers of memory allocations. "
+                 "Please consider using a pyopencl.tools.MemoryPool. "
+                 "Run with allocator=False to disable this warning.")
+
+    def _get_fake_numpy_namespace(self):
+        return PyOpenCLFakeNumpyNamespace(self)
+
+    # {{{ ArrayContext interface
+
+    def empty(self, shape, dtype):
+        import pyopencl.array as cla
+        return cla.empty(self.queue, shape=shape, dtype=dtype,
+                allocator=self.allocator)
+
+    def zeros(self, shape, dtype):
+        import pyopencl.array as cla
+        return cla.zeros(self.queue, shape=shape, dtype=dtype,
+                allocator=self.allocator)
+
+    def from_numpy(self, array: np.ndarray):
+        import pyopencl.array as cla
+        return cla.to_device(self.queue, array, allocator=self.allocator)
+
+    def to_numpy(self, array):
+        return array.get(queue=self.queue)
+
+    def call_loopy(self, t_unit, **kwargs):
+        t_unit = self.transform_loopy_program(t_unit)
+        from arraycontext.loopy import get_default_entrypoint
+        default_entrypoint = get_default_entrypoint(t_unit)
+        prg_name = default_entrypoint.name
+
+        evt, result = t_unit(self.queue, **kwargs, allocator=self.allocator)
+
+        if self._wait_event_queue_length is not False:
+            wait_event_queue = self._kernel_name_to_wait_event_queue.setdefault(
+                    prg_name, [])
+
+            wait_event_queue.append(evt)
+            if len(wait_event_queue) > self._wait_event_queue_length:
+                wait_event_queue.pop(0).wait()
+
+        return result
+
+    def freeze(self, array):
+        array.finish()
+        return array.with_queue(None)
+
+    def thaw(self, array):
+        return array.with_queue(self.queue)
+
+    # }}}
+
+    @memoize_method
+    def transform_loopy_program(self, t_unit):
+        # accommodate loopy with and without kernel callables
+
+        import loopy as lp
+        from arraycontext.loopy import get_default_entrypoint
+        default_entrypoint = get_default_entrypoint(t_unit)
+        options = default_entrypoint.options
+        if not (options.return_dict and options.no_numpy):
+            raise ValueError("Loopy kernel passed to call_loopy must "
+                    "have return_dict and no_numpy options set. "
+                    "Did you use arraycontext.make_loopy_program "
+                    "to create this kernel?")
+
+        all_inames = default_entrypoint.all_inames()
+        # FIXME: This could be much smarter.
+        inner_iname = None
+        if (len(default_entrypoint.instructions) == 1
+                and isinstance(default_entrypoint.instructions[0], lp.Assignment)
+                and any(isinstance(tag, FirstAxisIsElementsTag)
+                    # FIXME: Firedrake branch lacks kernel tags
+                    for tag in getattr(default_entrypoint, "tags", ()))):
+            stmt, = default_entrypoint.instructions
+
+            out_inames = [v.name for v in stmt.assignee.index_tuple]
+            assert out_inames
+            outer_iname = out_inames[0]
+            if len(out_inames) >= 2:
+                inner_iname = out_inames[1]
+
+        elif "iel" in all_inames:
+            outer_iname = "iel"
+
+            if "idof" in all_inames:
+                inner_iname = "idof"
+        elif "i0" in all_inames:
+            outer_iname = "i0"
+
+            if "i1" in all_inames:
+                inner_iname = "i1"
+        else:
+            raise RuntimeError(
+                "Unable to reason what outer_iname and inner_iname "
+                f"needs to be; all_inames is given as: {all_inames}"
+            )
+
+        if inner_iname is not None:
+            t_unit = lp.split_iname(t_unit, inner_iname, 16, inner_tag="l.0")
+        return lp.tag_inames(t_unit, {outer_iname: "g.0"})
+
+    def tag(self, tags: Union[Sequence[Tag], Tag], array):
+        # Sorry, not capable.
+        return array
+
+    def tag_axis(self, iaxis, tags: Union[Sequence[Tag], Tag], array):
+        # Sorry, not capable.
+        return array
+
+# }}}
+
+# vim: foldmethod=marker
diff --git a/arraycontext/loopy.py b/arraycontext/loopy.py
new file mode 100644
index 0000000..8f2816d
--- /dev/null
+++ b/arraycontext/loopy.py
@@ -0,0 +1,73 @@
+"""
+.. currentmodule:: arraycontext
+.. autofunction:: make_loopy_program
+"""
+
+__copyright__ = """
+Copyright (C) 2020-1 University of Illinois Board of Trustees
+"""
+
+__license__ = """
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+"""
+
+import loopy as lp
+from loopy.version import MOST_RECENT_LANGUAGE_VERSION
+
+
+# {{{ loopy
+
+_DEFAULT_LOOPY_OPTIONS = lp.Options(
+        no_numpy=True,
+        return_dict=True)
+
+
+def make_loopy_program(domains, statements, kernel_data=None,
+        name="mm_actx_kernel"):
+    """Return a :class:`loopy.LoopKernel` suitable for use with
+    :meth:`ArrayContext.call_loopy`.
+    """
+    if kernel_data is None:
+        kernel_data = ["..."]
+
+    return lp.make_kernel(
+            domains,
+            statements,
+            kernel_data=kernel_data,
+            options=_DEFAULT_LOOPY_OPTIONS,
+            default_offset=lp.auto,
+            name=name,
+            lang_version=MOST_RECENT_LANGUAGE_VERSION)
+
+
+def get_default_entrypoint(t_unit):
+    try:
+        # main and "kernel callables" branch
+        return t_unit.default_entrypoint
+    except AttributeError:
+        try:
+            return t_unit.root_kernel
+        except AttributeError:
+            raise TypeError("unable to find default entry point for loopy "
+                    "translation unit")
+
+# }}}
+
+
+# vim: foldmethod=marker
diff --git a/arraycontext/metadata.py b/arraycontext/metadata.py
new file mode 100644
index 0000000..1713cd1
--- /dev/null
+++ b/arraycontext/metadata.py
@@ -0,0 +1,52 @@
+"""
+.. autoclass:: CommonSubexpressionTag
+.. autoclass:: FirstAxisIsElementsTag
+"""
+
+__copyright__ = """
+Copyright (C) 2020-1 University of Illinois Board of Trustees
+"""
+
+__license__ = """
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+"""
+
+from pytools.tag import Tag
+
+
+# {{{ program metadata
+
+class CommonSubexpressionTag(Tag):
+    """A tag that is applicable to arrays indicating that this same array
+    may be evaluated multiple times, and that the implementation should
+    eliminate those redundant evaluations if possible.
+    """
+
+
+class FirstAxisIsElementsTag(Tag):
+    """A tag that is applicable to array outputs indicating that the
+    first index corresponds to element indices. This suggests that
+    the implementation should set element indices as the outermost
+    loop extent.
+    """
+
+# }}}
+
+
+# vim: foldmethod=marker
diff --git a/arraycontext/pytest.py b/arraycontext/pytest.py
new file mode 100644
index 0000000..3327aa4
--- /dev/null
+++ b/arraycontext/pytest.py
@@ -0,0 +1,98 @@
+"""
+.. currentmodule:: arraycontext
+.. autofunction:: pytest_generate_tests_for_pyopencl_array_context
+"""
+
+
+__copyright__ = """
+Copyright (C) 2020-1 University of Illinois Board of Trustees
+"""
+
+__license__ = """
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+"""
+
+
+# {{{ pytest integration
+
+def pytest_generate_tests_for_pyopencl_array_context(metafunc):
+    """Parametrize tests for pytest to use a
+    :class:`~arraycontext.PyOpenCLArrayContext`.
+
+    Performs device enumeration analogously to
+    :func:`pyopencl.tools.pytest_generate_tests_for_pyopencl`.
+
+    Using the line:
+
+    .. code-block:: python
+
+       from arraycontext import pytest_generate_tests_for_pyopencl
+            as pytest_generate_tests
+
+    in your pytest test scripts allows you to use the argument ``actx_factory``,
+    in your test functions, and they will automatically be
+    run once for each OpenCL device/platform in the system, as appropriate,
+    with an argument-less function that returns an
+    :class:`~arraycontext.ArrayContext` when called.
+
+    It also allows you to specify the ``PYOPENCL_TEST`` environment variable
+    for device selection.
+    """
+
+    import pyopencl as cl
+    from pyopencl.tools import _ContextFactory
+
+    class ArrayContextFactory(_ContextFactory):
+        def __call__(self):
+            ctx = super().__call__()
+            from arraycontext.impl.pyopencl import PyOpenCLArrayContext
+            return PyOpenCLArrayContext(cl.CommandQueue(ctx))
+
+        def __str__(self):
+            return ("<array context factory for <pyopencl.Device '%s' on '%s'>" %
+                    (self.device.name.strip(),
+                     self.device.platform.name.strip()))
+
+    import pyopencl.tools as cl_tools
+    arg_names = cl_tools.get_pyopencl_fixture_arg_names(
+            metafunc, extra_arg_names=["actx_factory"])
+
+    if not arg_names:
+        return
+
+    arg_values, ids = cl_tools.get_pyopencl_fixture_arg_values()
+    if "actx_factory" in arg_names:
+        if "ctx_factory" in arg_names or "ctx_getter" in arg_names:
+            raise RuntimeError("Cannot use both an 'actx_factory' and a "
+                    "'ctx_factory' / 'ctx_getter' as arguments.")
+
+        for arg_dict in arg_values:
+            arg_dict["actx_factory"] = ArrayContextFactory(arg_dict["device"])
+
+    arg_values = [
+            tuple(arg_dict[name] for name in arg_names)
+            for arg_dict in arg_values
+            ]
+
+    metafunc.parametrize(arg_names, arg_values, ids=ids)
+
+# }}}
+
+
+# vim: foldmethod=marker
diff --git a/doc/array_context.rst b/doc/array_context.rst
index 95c7a5e..4b3b004 100644
--- a/doc/array_context.rst
+++ b/doc/array_context.rst
@@ -3,5 +3,12 @@ The Array Context Abstraction
 
 .. automodule:: arraycontext
 
+.. automodule:: arraycontext.context
+
 Implementations of the Array Context Abstraction
 ================================================
+
+Array context based on :mod:`pyopencl.array`
+--------------------------------------------
+
+.. automodule:: arraycontext.impl.pyopencl
diff --git a/doc/conf.py b/doc/conf.py
index d1b7661..eda91af 100644
--- a/doc/conf.py
+++ b/doc/conf.py
@@ -17,7 +17,8 @@ copyright = "2021, University of Illinois Board of Trustees"
 ver_dic = {}
 exec(
         compile(
-            open("../arraycontext/version.py").read(), "../arraycontext/version.py", "exec"),
+            open("../arraycontext/version.py").read(),
+            "../arraycontext/version.py", "exec"),
         ver_dic)
 version = ".".join(str(x) for x in ver_dic["VERSION"])
 # The full version, including alpha/beta/rc tags.
@@ -65,6 +66,8 @@ intersphinx_mapping = {
     "https://documen.tician.de/pyopencl": None,
     "https://documen.tician.de/pytato": None,
     "https://documen.tician.de/loopy": None,
+    "https://documen.tician.de/meshmode": None,
+    "https://docs.pytest.org/en/latest/": None,
 }
 
 autoclass_content = "class"
diff --git a/doc/container.rst b/doc/container.rst
new file mode 100644
index 0000000..967d4e1
--- /dev/null
+++ b/doc/container.rst
@@ -0,0 +1,19 @@
+Array Containers
+================
+
+.. automodule:: arraycontext.container
+
+Containers with arithmetic
+--------------------------
+
+.. automodule:: arraycontext.container.arithmetic
+
+Containers based on :mod:`dataclasses`
+--------------------------------------
+
+.. automodule:: arraycontext.container.dataclass
+
+Traversing containers
+---------------------
+
+.. automodule:: arraycontext.container.traversal
diff --git a/doc/index.rst b/doc/index.rst
index 9d0e0b1..4d2b436 100644
--- a/doc/index.rst
+++ b/doc/index.rst
@@ -1,17 +1,32 @@
-Welcome to arraycontext's documentation!
-========================================
+Welcome to :mod:`arraycontext`'s documentation!
+===============================================
 
-.. toctree::
-    :maxdepth: 2
-    :caption: Contents:
+GPU arrays? Deferred-evaluation arrays? Just plain ``numpy`` arrays? You'd like your
+code to work with all of them? No problem! Comes with pre-made array context
+implementations for:
+
+- :mod:`numpy`
+- :mod:`pyopencl`
+- :mod:`pytato`
+- Debugging
+- Profiling
+
+:mod:`arraycontext` started life as an array abstraction for use with the
+:mod:`meshmode` unstrucuted discretization package.
 
+Contents
+--------
+
+.. toctree::
     array_context
+    container
+    other
     misc
     🚀 Github <https://github.com/inducer/arraycontext>
     💾 Download Releases <https://pypi.org/project/arraycontext>
 
 Indices and tables
-==================
+------------------
 
 * :ref:`genindex`
 * :ref:`modindex`
diff --git a/doc/misc.rst b/doc/misc.rst
index bd387e9..d9547e7 100644
--- a/doc/misc.rst
+++ b/doc/misc.rst
@@ -50,7 +50,7 @@ or `examples <https://github.com/inducer/arraycontext/tree/main/examples>`_.
 User-visible Changes
 ====================
 
-Version 2016.1
+Version 2021.1
 --------------
 .. note::
 
@@ -98,7 +98,5 @@ Work on :mod:`arraycontext` was supported in part by
 * the US National Science Foundation under grant numbers DMS-1418961, CCF-1524433,
   DMS-1654756, SHF-1911019, and OAC-1931577.
 
-AK also gratefully acknowledges a hardware gift from Nvidia Corporation.
-
 The views and opinions expressed herein do not necessarily reflect those of the
 funding agencies.
diff --git a/doc/other.rst b/doc/other.rst
new file mode 100644
index 0000000..bc1998d
--- /dev/null
+++ b/doc/other.rst
@@ -0,0 +1,12 @@
+Other functionality
+===================
+
+:class:`~arraycontext.ArrayContext`-generating fixture for :mod:`pytest`
+------------------------------------------------------------------------
+
+.. automodule:: arraycontext.pytest
+
+Program creation for :mod:`loopy`
+---------------------------------
+
+.. automodule:: arraycontext.loopy
diff --git a/setup.cfg b/setup.cfg
index a3c1114..f1124f0 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -1,7 +1,6 @@
 [flake8]
 ignore = E126,E127,E128,E123,E226,E241,E242,E265,W503,E402
 max-line-length=85
-exclude=meshmode/mesh/refinement/__init__.py
 
 inline-quotes = "
 docstring-quotes = """
diff --git a/test/test_arraycontext.py b/test/test_arraycontext.py
index 181142b..a4fb4bf 100644
--- a/test/test_arraycontext.py
+++ b/test/test_arraycontext.py
@@ -20,9 +20,506 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 THE SOFTWARE.
 """
 
+from dataclasses import dataclass
+import numpy as np
+import pytest
 
-def test_stub():
-    assert 1 + 1 == 2
+from pytools.obj_array import make_obj_array
+
+from arraycontext import (
+        ArrayContext,
+        dataclass_array_container, with_container_arithmetic,
+        serialize_container, deserialize_container,
+        freeze, thaw,
+        FirstAxisIsElementsTag)
+from arraycontext import (  # noqa: F401
+        pytest_generate_tests_for_pyopencl_array_context
+        as pytest_generate_tests)
+
+import logging
+logger = logging.getLogger(__name__)
+
+
+# {{{ stand-in DOFArray implementation
+
+@with_container_arithmetic(
+        bcast_obj_array=True,
+        bcast_numpy_array=True,
+        rel_comparison=True)
+class DOFArray:
+    def __init__(self, actx, data):
+        if not (actx is None or isinstance(actx, ArrayContext)):
+            raise TypeError("actx must be of type ArrayContext")
+
+        if not isinstance(data, tuple):
+            raise TypeError("'data' argument must be a tuple")
+
+        self.array_context = actx
+        self.data = data
+
+    __array_priority__ = 10
+
+    def __len__(self):
+        return len(self.data)
+
+    def __getitem__(self, i):
+        return self.data[i]
+
+    @classmethod
+    def _serialize_init_arrays_code(cls, instance_name):
+        return {"_":
+                (f"{instance_name}_i", f"{instance_name}")}
+
+    @classmethod
+    def _deserialize_init_arrays_code(cls, template_instance_name, args):
+        (_, arg), = args.items()
+        # Why tuple([...])? https://stackoverflow.com/a/48592299
+        return (f"{template_instance_name}.array_context, tuple([{arg}])")
+
+    @property
+    def real(self):
+        return DOFArray(self.array_context, tuple([subary.real for subary in self]))
+
+    @property
+    def imag(self):
+        return DOFArray(self.array_context, tuple([subary.imag for subary in self]))
+
+
+@serialize_container.register(DOFArray)
+def _serialize_dof_container(ary: DOFArray):
+    return enumerate(ary.data)
+
+
+@deserialize_container.register(DOFArray)
+def _deserialize_dof_container(
+        template, iterable):
+    def _raise_index_inconsistency(i, stream_i):
+        raise ValueError(
+                "out-of-sequence indices supplied in DOFArray deserialization "
+                f"(expected {i}, received {stream_i})")
+
+    return type(template)(
+            template.array_context,
+            data=tuple(
+                v if i == stream_i else _raise_index_inconsistency(i, stream_i)
+                for i, (stream_i, v) in enumerate(iterable)))
+
+
+@freeze.register(DOFArray)
+def _freeze_dofarray(ary, actx=None):
+    assert actx is None
+    return type(ary)(
+        None,
+        tuple(ary.array_context.freeze(subary) for subary in ary.data))
+
+
+@thaw.register(DOFArray)
+def _thaw_dofarray(ary, actx):
+    if ary.array_context is not None:
+        raise ValueError("cannot thaw DOFArray that already has an array context")
+
+    return type(ary)(
+        actx,
+        tuple(actx.thaw(subary) for subary in ary.data))
+
+# }}}
+
+
+def test_array_context_np_workalike(actx_factory):
+    actx = actx_factory()
+
+    ndofs = 5000
+
+    for sym_name, n_args in [
+            ("sin", 1),
+            ("exp", 1),
+            ("arctan2", 2),
+            ("minimum", 2),
+            ("maximum", 2),
+            ("where", 3),
+            ("conj", 1),
+            # ("empty_like", 1),    # NOTE: fails np.allclose, obviously
+            ("zeros_like", 1),
+            ("ones_like", 1),
+            ]:
+        args = [np.random.randn(ndofs) for i in range(n_args)]
+        ref_result = getattr(np, sym_name)(*args)
+
+        # {{{ test cl.Arrays
+
+        actx_args = [actx.from_numpy(arg) for arg in args]
+        actx_result = actx.to_numpy(getattr(actx.np, sym_name)(*actx_args))
+
+        assert np.allclose(actx_result, ref_result)
+
+        # }}}
+
+        # {{{ test DOFArrays
+
+        actx_args = [DOFArray(actx, (arg,)) for arg in actx_args]
+        actx_result = actx.to_numpy(
+                getattr(actx.np, sym_name)(*actx_args)[0])
+
+        assert np.allclose(actx_result, ref_result)
+
+        # }}}
+
+        # {{{ test object arrays
+
+        if sym_name in ("empty_like", "zeros_like", "ones_like"):
+            continue
+
+        obj_array_args = [make_obj_array([arg]) for arg in actx_args]
+        obj_array_result = actx.to_numpy(
+                getattr(actx.np, sym_name)(*obj_array_args)[0][0])
+
+        assert np.allclose(obj_array_result, ref_result)
+
+        # }}}
+
+
+def test_dof_array_arithmetic_same_as_numpy(actx_factory):
+    actx = actx_factory()
+
+    ndofs = 50_000
+
+    def get_real(ary):
+        return ary.real
+
+    def get_imag(ary):
+        return ary.real
+
+    import operator
+    from pytools import generate_nonnegative_integer_tuples_below as gnitb
+    from random import uniform, randrange
+    for op_func, n_args, use_integers in [
+            (operator.add, 2, False),
+            (operator.sub, 2, False),
+            (operator.mul, 2, False),
+            (operator.truediv, 2, False),
+            (operator.pow, 2, False),
+            # FIXME pyopencl.Array doesn't do mod.
+            #(operator.mod, 2, True),
+            #(operator.mod, 2, False),
+            #(operator.imod, 2, True),
+            #(operator.imod, 2, False),
+            # FIXME: Two outputs
+            #(divmod, 2, False),
+
+            (operator.iadd, 2, False),
+            (operator.isub, 2, False),
+            (operator.imul, 2, False),
+            (operator.itruediv, 2, False),
+
+            (operator.and_, 2, True),
+            (operator.xor, 2, True),
+            (operator.or_, 2, True),
+
+            (operator.iand, 2, True),
+            (operator.ixor, 2, True),
+            (operator.ior, 2, True),
+
+            (operator.ge, 2, False),
+            (operator.lt, 2, False),
+            (operator.gt, 2, False),
+            (operator.eq, 2, True),
+            (operator.ne, 2, True),
+
+            (operator.pos, 1, False),
+            (operator.neg, 1, False),
+            (operator.abs, 1, False),
+
+            (get_real, 1, False),
+            (get_imag, 1, False),
+            ]:
+        for is_array_flags in gnitb(2, n_args):
+            if sum(is_array_flags) == 0:
+                # all scalars, no need to test
+                continue
+
+            if is_array_flags[0] == 0 and op_func in [
+                    operator.iadd, operator.isub,
+                    operator.imul, operator.itruediv,
+                    operator.iand, operator.ixor, operator.ior,
+                    ]:
+                # can't do in place operations with a scalar lhs
+                continue
+
+            args = [
+                    (0.5+np.random.rand(ndofs)
+                        if not use_integers else
+                        np.random.randint(3, 200, ndofs))
+
+                    if is_array_flag else
+                    (uniform(0.5, 2)
+                        if not use_integers
+                        else randrange(3, 200))
+                    for is_array_flag in is_array_flags]
+
+            # {{{ get reference numpy result
+
+            # make a copy for the in place operators
+            ref_args = [
+                    arg.copy() if isinstance(arg, np.ndarray) else arg
+                    for arg in args]
+            ref_result = op_func(*ref_args)
+
+            # }}}
+
+            # {{{ test DOFArrays
+
+            actx_args = [
+                    DOFArray(actx, (actx.from_numpy(arg),))
+                    if isinstance(arg, np.ndarray) else arg
+                    for arg in args]
+
+            actx_result = actx.to_numpy(op_func(*actx_args)[0])
+
+            assert np.allclose(actx_result, ref_result)
+
+            # }}}
+
+            # {{{ test object arrays of DOFArrays
+
+            # It would be very nice if comparisons on object arrays behaved
+            # consistently with everything else. Alas, they do not. Instead:
+            #
+            # 0.5 < obj_array(DOFArray) -> obj_array([True])
+            #
+            # because hey, 0.5 < DOFArray returned something truthy.
+
+            if op_func not in [
+                    operator.eq, operator.ne,
+                    operator.le, operator.lt,
+                    operator.ge, operator.gt,
+
+                    operator.iadd, operator.isub,
+                    operator.imul, operator.itruediv,
+                    operator.iand, operator.ixor, operator.ior,
+
+                    # All Python objects are real-valued, right?
+                    get_imag,
+                    ]:
+                obj_array_args = [
+                        make_obj_array([arg]) if isinstance(arg, DOFArray) else arg
+                        for arg in actx_args]
+
+                obj_array_result = actx.to_numpy(
+                        op_func(*obj_array_args)[0][0])
+
+                assert np.allclose(obj_array_result, ref_result)
+
+            # }}}
+
+
+def test_dof_array_reductions_same_as_numpy(actx_factory):
+    actx = actx_factory()
+
+    from numbers import Number
+    for name in ["sum", "min", "max"]:
+        ary = np.random.randn(3000)
+        np_red = getattr(np, name)(ary)
+        actx_red = getattr(actx.np, name)(actx.from_numpy(ary))
+
+        assert isinstance(actx_red, Number)
+        assert np.allclose(np_red, actx_red)
+
+
+# {{{ test array context einsum
+
+@pytest.mark.parametrize("spec", [
+    "ij->ij",
+    "ij->ji",
+    "ii->i",
+])
+def test_array_context_einsum_array_manipulation(actx_factory, spec):
+    actx = actx_factory()
+
+    mat = actx.from_numpy(np.random.randn(10, 10))
+    res = actx.to_numpy(actx.einsum(spec, mat,
+                                    tagged=(FirstAxisIsElementsTag())))
+    ans = np.einsum(spec, actx.to_numpy(mat))
+    assert np.allclose(res, ans)
+
+
+@pytest.mark.parametrize("spec", [
+    "ij,ij->ij",
+    "ij,ji->ij",
+    "ij,kj->ik",
+])
+def test_array_context_einsum_array_matmatprods(actx_factory, spec):
+    actx = actx_factory()
+
+    mat_a = actx.from_numpy(np.random.randn(5, 5))
+    mat_b = actx.from_numpy(np.random.randn(5, 5))
+    res = actx.to_numpy(actx.einsum(spec, mat_a, mat_b,
+                                    tagged=(FirstAxisIsElementsTag())))
+    ans = np.einsum(spec, actx.to_numpy(mat_a), actx.to_numpy(mat_b))
+    assert np.allclose(res, ans)
+
+
+@pytest.mark.parametrize("spec", [
+    "im,mj,k->ijk"
+])
+def test_array_context_einsum_array_tripleprod(actx_factory, spec):
+    actx = actx_factory()
+
+    mat_a = actx.from_numpy(np.random.randn(7, 5))
+    mat_b = actx.from_numpy(np.random.randn(5, 7))
+    vec = actx.from_numpy(np.random.randn(7))
+    res = actx.to_numpy(actx.einsum(spec, mat_a, mat_b, vec,
+                                    tagged=(FirstAxisIsElementsTag())))
+    ans = np.einsum(spec,
+                    actx.to_numpy(mat_a),
+                    actx.to_numpy(mat_b),
+                    actx.to_numpy(vec))
+    assert np.allclose(res, ans)
+
+# }}}
+
+
+# {{{ test array container
+
+@with_container_arithmetic(bcast_obj_array=False, rel_comparison=True)
+@dataclass_array_container
+@dataclass(frozen=True)
+class MyContainer:
+    name: str
+    mass: DOFArray
+    momentum: np.ndarray
+    enthalpy: DOFArray
+
+    @property
+    def array_context(self):
+        return self.mass.array_context
+
+
+def _get_test_containers(actx, ambient_dim=2):
+    x = DOFArray(actx, (actx.from_numpy(np.random.randn(50_000)),))
+
+    # pylint: disable=unexpected-keyword-arg, no-value-for-parameter
+    dataclass_of_dofs = MyContainer(
+            name="container",
+            mass=x,
+            momentum=make_obj_array([x, x]),
+            enthalpy=x)
+
+    ary_dof = x
+    ary_of_dofs = make_obj_array([x, x, x])
+    mat_of_dofs = np.empty((2, 2), dtype=object)
+    for i in np.ndindex(mat_of_dofs.shape):
+        mat_of_dofs[i] = x
+
+    return ary_dof, ary_of_dofs, mat_of_dofs, dataclass_of_dofs
+
+
+def test_container_multimap(actx_factory):
+    actx = actx_factory()
+    ary_dof, ary_of_dofs, mat_of_dofs, dc_of_dofs = _get_test_containers(actx)
+
+    # {{{ check
+
+    def _check_allclose(f, arg1, arg2, atol=1.0e-14):
+        assert np.linalg.norm((f(arg1) - arg2).get()) < atol
+
+    def func_all_scalar(x, y):
+        return x + y
+
+    def func_first_scalar(x, subary):
+        return x + subary
+
+    def func_multiple_scalar(a, subary1, b, subary2):
+        return a * subary1 + b * subary2
+
+    from arraycontext import rec_multimap_array_container
+    result = rec_multimap_array_container(func_all_scalar, 1, 2)
+    assert result == 3
+
+    from functools import partial
+    for ary in [ary_dof, ary_of_dofs, mat_of_dofs, dc_of_dofs]:
+        result = rec_multimap_array_container(func_first_scalar, 1, ary)
+        rec_multimap_array_container(
+                partial(_check_allclose, lambda x: 1 + x),
+                ary, result)
+
+        result = rec_multimap_array_container(func_multiple_scalar, 2, ary, 2, ary)
+        rec_multimap_array_container(
+                partial(_check_allclose, lambda x: 4 * x),
+                ary, result)
+
+    with pytest.raises(AssertionError):
+        rec_multimap_array_container(func_multiple_scalar, 2, ary_dof, 2, dc_of_dofs)
+
+    # }}}
+
+
+def test_container_arithmetic(actx_factory):
+    actx = actx_factory()
+    ary_dof, ary_of_dofs, mat_of_dofs, dc_of_dofs = _get_test_containers(actx)
+
+    # {{{ check
+
+    def _check_allclose(f, arg1, arg2, atol=1.0e-14):
+        assert np.linalg.norm((f(arg1) - arg2).get()) < atol
+
+    from functools import partial
+    from arraycontext import rec_multimap_array_container
+    for ary in [ary_dof, ary_of_dofs, mat_of_dofs, dc_of_dofs]:
+        rec_multimap_array_container(
+                partial(_check_allclose, lambda x: 3 * x),
+                ary, 2 * ary + ary)
+        rec_multimap_array_container(
+                partial(_check_allclose, lambda x: actx.np.sin(x)),
+                ary, actx.np.sin(ary))
+
+    with pytest.raises(TypeError):
+        ary_of_dofs + dc_of_dofs
+
+    # }}}
+
+
+def test_container_freeze_thaw(actx_factory):
+    actx = actx_factory()
+    ary_dof, ary_of_dofs, mat_of_dofs, dc_of_dofs = _get_test_containers(actx)
+
+    # {{{ check
+
+    from arraycontext import get_container_context
+    from arraycontext import get_container_context_recursively
+
+    assert get_container_context(ary_of_dofs) is None
+    assert get_container_context(mat_of_dofs) is None
+    assert get_container_context(ary_dof) is actx
+    assert get_container_context(dc_of_dofs) is actx
+
+    assert get_container_context_recursively(ary_of_dofs) is actx
+    assert get_container_context_recursively(mat_of_dofs) is actx
+
+    for ary in [ary_dof, ary_of_dofs, mat_of_dofs, dc_of_dofs]:
+        frozen_ary = freeze(ary)
+        thawed_ary = thaw(frozen_ary, actx)
+        frozen_ary = freeze(thawed_ary)
+
+        assert get_container_context_recursively(frozen_ary) is None
+        assert get_container_context_recursively(thawed_ary) is actx
+
+    # }}}
+
+
+@pytest.mark.parametrize("ord", [2, np.inf])
+def test_container_norm(actx_factory, ord):
+    actx = actx_factory()
+
+    ary_dof, ary_of_dofs, mat_of_dofs, dc_of_dofs = _get_test_containers(actx)
+
+    from pytools.obj_array import make_obj_array
+    c = MyContainer(name="hey", mass=1, momentum=make_obj_array([2, 3]), enthalpy=5)
+    n1 = actx.np.linalg.norm(make_obj_array([c, c]), ord)
+    n2 = np.linalg.norm([1, 2, 3, 5]*2, ord)
+
+    assert abs(n1 - n2) < 1e-12
+
+# }}}
 
 
 if __name__ == "__main__":
-- 
GitLab