diff --git a/meshmode/array_context.py b/meshmode/array_context.py
index 4375eed48f4053f09d411168e26a0a8482d77e58..2e3ae4af97c3f49374c2e661987e60bfb792f401 100644
--- a/meshmode/array_context.py
+++ b/meshmode/array_context.py
@@ -27,6 +27,7 @@ import numpy as np
 import loopy as lp
 from loopy.version import MOST_RECENT_LANGUAGE_VERSION
 from pytools import memoize_method
+from pytools.obj_array import obj_array_vectorized_n_args
 
 __doc__ = """
 .. autofunction:: make_loopy_program
@@ -47,13 +48,14 @@ def make_loopy_program(domains, statements, kernel_data=["..."],
             options=lp.Options(
                 no_numpy=True,
                 return_dict=True),
+            default_offset=lp.auto,
             name=name,
             lang_version=MOST_RECENT_LANGUAGE_VERSION)
 
 
 # {{{ ArrayContext
 
-class _FakeNumpyNamespace:
+class _BaseFakeNumpyNamespace:
     def __init__(self, array_context):
         self._array_context = array_context
 
@@ -62,12 +64,12 @@ class _FakeNumpyNamespace:
             actx = self._array_context
             # FIXME: Maybe involve loopy type inference?
             result = actx.empty(args[0].shape, args[0].dtype)
-            prg = actx._get_scalar_func_loopy_program(name, len(args))
+            prg = actx._get_scalar_func_loopy_program(
+                    name, nargs=len(args), naxes=len(args[0].shape))
             actx.call_loopy(prg, out=result,
                     **{"inp%d" % i: arg for i, arg in enumerate(args)})
             return result
 
-        from pytools.obj_array import obj_array_vectorized_n_args
         return obj_array_vectorized_n_args(f)
 
 
@@ -103,7 +105,10 @@ class ArrayContext:
     """
 
     def __init__(self):
-        self.np = _FakeNumpyNamespace(self)
+        self.np = self._get_fake_numpy_namespace()
+
+    def _get_fake_numpy_namespace(self):
+        return _BaseFakeNumpyNamespace(self)
 
     def empty(self, shape, dtype):
         raise NotImplementedError
@@ -147,17 +152,27 @@ class ArrayContext:
         raise NotImplementedError
 
     @memoize_method
-    def _get_scalar_func_loopy_program(self, name, nargs):
+    def _get_scalar_func_loopy_program(self, name, nargs, naxes):
         from pymbolic import var
-        iel = var("iel")
-        idof = var("idof")
+
+        var_names = ["i%d" % i for i in range(naxes)]
+        size_names = ["n%d" % i for i in range(naxes)]
+        subscript = tuple(var(vname) for vname in var_names)
+        from islpy import make_zero_and_vars
+        v = make_zero_and_vars(var_names, params=size_names)
+        domain = v[0].domain()
+        for vname, sname in zip(var_names, size_names):
+            domain = domain & v[0].le_set(v[vname]) & v[vname].le_set(v[sname])
+
+        domain_bset, = domain.get_basic_sets()
+
         return make_loopy_program(
-                "{[iel, idof]: 0<=iel<nelements and 0<=idof<ndofs}",
+                [domain_bset],
                 [
                     lp.Assignment(
-                        var("out")[iel, idof],
+                        var("out")[subscript],
                         var(name)(*[
-                            var("inp%d" % i)[iel, idof] for i in range(nargs)]))
+                            var("inp%d" % i)[subscript] for i in range(nargs)]))
                     ],
                 name="actx_special_%s" % name)
 
@@ -193,6 +208,15 @@ class ArrayContext:
 
 # {{{ PyOpenCLArrayContext
 
+class _PyOpenCLFakeNumpyNamespace(_BaseFakeNumpyNamespace):
+    def __getattr__(self, name):
+        if name in ["minimum", "maximum"]:
+            import pyopencl.array as cl_array
+            return obj_array_vectorized_n_args(getattr(cl_array, name))
+
+        return super().__getattr__(name)
+
+
 class PyOpenCLArrayContext(ArrayContext):
     """
     A :class:`ArrayContext` that uses :class:`pyopencl.array.Array` instances
@@ -215,6 +239,9 @@ class PyOpenCLArrayContext(ArrayContext):
         self.queue = queue
         self.allocator = allocator
 
+    def _get_fake_numpy_namespace(self):
+        return _PyOpenCLFakeNumpyNamespace(self)
+
     # {{{ ArrayContext interface
 
     def empty(self, shape, dtype):
@@ -253,12 +280,25 @@ class PyOpenCLArrayContext(ArrayContext):
 
     @memoize_method
     def transform_loopy_program(self, program):
-        # FIXME: This assumes that the iname 'iel' exists.
         # FIXME: This could be much smarter.
         import loopy as lp
-        if "idof" in program.all_inames():
-            program = lp.split_iname(program, "idof", 16, inner_tag="l.0")
-        return lp.tag_inames(program, dict(iel="g.0"))
+        all_inames = program.all_inames()
+
+        inner_iname = None
+        if "iel" not in all_inames and "i0" in all_inames:
+            outer_iname = "i0"
+
+            if "i1" in all_inames:
+                inner_iname = "i1"
+        else:
+            outer_iname = "iel"
+
+            if "idof" in all_inames:
+                inner_iname = "idof"
+
+        if inner_iname is not None:
+            program = lp.split_iname(program, inner_iname, 16, inner_tag="l.0")
+        return lp.tag_inames(program, {outer_iname: "g.0"})
 
 # }}}
 
diff --git a/meshmode/discretization/__init__.py b/meshmode/discretization/__init__.py
index f1dd89158f6a201cdaf47f53406cbe7cafbaa7d2..d19e5b92e1f58bf21416e09e8e23b734f24db206 100644
--- a/meshmode/discretization/__init__.py
+++ b/meshmode/discretization/__init__.py
@@ -188,6 +188,8 @@ class Discretization(object):
 
     .. attribute:: ambient_dim
 
+    .. attribute:: ndofs
+
     .. attribute :: groups
 
     .. automethod:: empty
@@ -241,6 +243,10 @@ class Discretization(object):
     def ambient_dim(self):
         return self.mesh.ambient_dim
 
+    @property
+    def ndofs(self):
+        return sum(grp.ndofs for grp in self.groups)
+
     def _new_array(self, actx, creation_func, dtype=None):
         if dtype is None:
             dtype = self.real_dtype
diff --git a/meshmode/dof_array.py b/meshmode/dof_array.py
index 12ffe33fb19f83e3a38a10f52d434166ccd93e79..f74f2090672c4f94a13426f05cd71a2cfe583297 100644
--- a/meshmode/dof_array.py
+++ b/meshmode/dof_array.py
@@ -21,7 +21,7 @@ THE SOFTWARE.
 """
 
 import numpy as np
-from typing import Optional, Iterable, TYPE_CHECKING
+from typing import Optional, Iterable, TYPE_CHECKING, Any
 from functools import partial
 
 from pytools import single_valued, memoize_in
@@ -170,7 +170,7 @@ def freeze(ary: np.ndarray) -> np.ndarray:
         ])
 
 
-def flatten(ary: np.ndarray) -> np.ndarray:
+def flatten(ary: np.ndarray) -> Any:
     r"""Convert a :class:`DOFArray` into a "flat" array of degrees of freedom,
     where the resulting type of the array is given by the
     :attr:`DOFArray.array_context`.
@@ -244,17 +244,16 @@ def unflatten(actx: ArrayContext, discr: "_Discretization", ary,
     group_starts = np.cumsum([0] + group_sizes)
 
     return DOFArray.from_list(actx, [
-            actx.freeze(
-                actx.call_loopy(
-                    prg(),
-                    grp_start=grp_start, ary=ary,
-                    nelements=grp.nelements,
-                    ndofs_per_element=ndofs_per_element,
-                    )["result"])
-            for grp_start, grp, ndofs_per_element in zip(
-                    group_starts,
-                    discr.groups,
-                    ndofs_per_element_per_group)])
+        actx.call_loopy(
+            prg(),
+            grp_start=grp_start, ary=ary,
+            nelements=grp.nelements,
+            ndofs_per_element=ndofs_per_element,
+            )["result"]
+        for grp_start, grp, ndofs_per_element in zip(
+            group_starts,
+            discr.groups,
+            ndofs_per_element_per_group)])
 
 
 def flat_norm(ary: DOFArray, ord=2):