From e468f16df9e4940993aca61229e572f45f92200e Mon Sep 17 00:00:00 2001
From: Matt Wala <wala1@illinois.edu>
Date: Mon, 6 Jul 2020 14:14:26 -0500
Subject: [PATCH 01/52] Add advection.py and dg_tools.py from arrayzy

---
 examples/advection.py | 187 +++++++++++++++++++
 examples/dg_tools.py  | 420 ++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 607 insertions(+)
 create mode 100644 examples/advection.py
 create mode 100644 examples/dg_tools.py

diff --git a/examples/advection.py b/examples/advection.py
new file mode 100644
index 0000000..de6c7ca
--- /dev/null
+++ b/examples/advection.py
@@ -0,0 +1,187 @@
+import numpy as np
+import pytest
+import functools
+from dg_tools import DGDiscr1D, integrate, DGOps1D, DGOps1DRef
+
+memoized = functools.lru_cache(maxsize=None)
+
+
+class AdvectionOperator(object):
+    """A class representing a DG advection operator."""
+
+    def __init__(self, discr, c, flux_type, dg_ops):
+        """
+        Inputs:
+            discr: an instance of DGDiscr1D
+            c: advection speed parameter
+            flux_type: "upwind" or "central"
+            dg_ops: An instance of AbstractDGOps1D
+        """
+        self.discr = discr
+        self.c = c
+        assert flux_type in ("upwind", "central")
+        self.flux_type = flux_type
+        self.dg = dg_ops
+
+    def weak_flux(self, vec):
+        """Apply the flux, weak form.
+
+        Inputs:
+            dg: a DGOps1D instance
+            vec: vector of nodal values at the faces
+
+        Signature: (m, 2) -> (m, 2)
+        """
+        if self.flux_type == "central":
+            flux = (vec + self.dg.face_swap(vec)) / 2
+
+        elif self.flux_type == "upwind":
+            swp = self.dg.face_swap(vec)
+            if self.c >= 0:
+                flux = self.dg.array_ops.stack((vec[:, 0], swp[:, 1]), axis=1)
+            else:
+                flux = self.dg.array_ops.stack((swp[:, 0], vec[:, 1]), axis=1)
+
+        flux = flux * self.c * self.dg.normals
+
+        return flux
+
+    def strong_flux(self, vec):
+        """Apply the flux, strong form.
+
+        Inputs:
+            dg: a DGOps1D instance
+            vec: vector of nodal values at the faces
+
+        Signature: (m, 2) -> (m, 2)
+        """
+        return self.c * self.dg.normals * vec - self.weak_flux(vec)
+
+    def apply(self, vec):
+        """Main operator implementation.
+
+        Signature: (m, n) -> (m, n)
+        """
+        dg = self.dg
+        return -dg.inv_mass(
+                dg.face_mass(self.strong_flux(dg.interp(vec)))
+                - self.c * dg.stiffness(vec))
+
+    def __call__(self, vec):
+        """Apply the DG advection operator to the vector of degrees of freedom.
+
+        Signature: (m*n,) -> (m*n,)
+        """
+        vec = vec.reshape((self.discr.nelements, self.discr.nnodes))
+        return self.apply(vec).reshape((-1,))
+
+
+def rk4(rhs, initial, t_initial, t_final, dt):
+    """RK4 integrator.
+
+    Inputs:
+        - rhs: a callable that takes arguments (t, y)
+        - initial: initial value
+        - t_initial: initial time
+        - t_final: final time
+        - dt: step size
+
+    Returns:
+        The solution computed at the final time.
+    """
+    t = t_initial
+    sol = initial
+
+    while t < t_final:
+        dt = min(dt, t_final - t)
+        s0 = rhs(t, sol)
+        s1 = rhs(t + dt/2, sol + dt/2 * s0)
+        s2 = rhs(t + dt/2, sol + dt/2 * s1)
+        s3 = rhs(t + dt, sol + dt * s2)
+        sol = sol + dt / 6 * (s0 + 2 * s1 + 2 * s2 + s3)
+        t += dt
+
+    return sol
+
+
+def test_rk4():
+    assert np.isclose(rk4(lambda t, y: -y, 1, 0, 5, 0.01), np.exp(-5))
+
+
+@pytest.mark.parametrize("order", (3, 4, 5))
+@pytest.mark.parametrize("flux_type", ("central", "upwind"))
+def test_ref_advection_convergence(order, flux_type):
+    errors = []
+    hs = []
+
+    for nelements in (8, 12, 16, 20):
+        discr = DGDiscr1D(0, 2*np.pi, nelements=nelements, nnodes=order)
+        u_initial = np.sin(discr.nodes())
+        op = AdvectionOperator(
+                discr, c=1, flux_type=flux_type, dg_ops=DGOps1DRef(discr))
+        u = rk4(lambda t, y: op(y), u_initial, t_initial=0, t_final=np.pi,
+                dt=0.01)
+        u_ref = -u_initial
+        hs.append(discr.h)
+        errors.append(integrate(discr, (u - u_ref)**2)**0.5)
+
+    eoc, _ = np.polyfit(np.log(hs), np.log(errors), 1)
+    assert eoc >= order - 0.1, eoc
+
+
+@pytest.mark.parametrize("order", (3, 4, 5))
+@pytest.mark.parametrize("flux_type", ("central", "upwind"))
+def test_advection_convergence(order, flux_type):
+    errors = []
+    hs = []
+
+    import arrayzy as az
+    import pyopencl as cl
+    cl_ctx = cl.create_some_context()
+    queue = cl.CommandQueue(cl_ctx)
+
+    for nelements in (8, 12, 16, 20):
+        discr = DGDiscr1D(0, 2*np.pi, nelements=nelements, nnodes=order)
+        u_initial = np.sin(discr.nodes())
+        with az.Context(queue) as cb:
+            u = cb.argument("u", shape="(nelements, nnodes)", dtype=np.float64)
+            op = AdvectionOperator(
+                    discr, c=1, flux_type=flux_type, dg_ops=DGOps1D(discr, cb))
+            cb.output(op.apply(u))
+        prog = cb.build()
+        u = rk4(lambda t, y: prog(
+            u=y.reshape(nelements, order))[1][0].reshape(-1),
+            u_initial, t_initial=0, t_final=np.pi, dt=0.01)
+        u_ref = -u_initial
+        hs.append(discr.h)
+        errors.append(integrate(discr, (u - u_ref)**2)**0.5)
+
+    eoc, _ = np.polyfit(np.log(hs), np.log(errors), 1)
+    assert eoc >= order - 0.1, eoc
+
+
+def main():
+    import arrayzy as az
+    import pyopencl as cl
+    cl_ctx = cl.create_some_context()
+    queue = cl.CommandQueue(cl_ctx)
+
+    nelements = 20
+    nnodes = 3
+
+    discr = DGDiscr1D(0, 2*np.pi, nelements=nelements, nnodes=nnodes)
+
+    with az.Context(queue) as cb:
+        u = cb.argument("u", shape="(nelements, nnodes)", dtype=np.float64)
+        op = AdvectionOperator(
+                discr, c=1, flux_type="central", dg_ops=DGOps1D(discr, cb))
+        cb.output(op.apply(u))
+
+    prog = cb.build()
+    u = np.sin(discr.nodes())
+    print(prog.program)
+    print(prog(u=u.reshape(nelements, nnodes))[1][0])
+
+
+if __name__ == "__main__":
+    main()
diff --git a/examples/dg_tools.py b/examples/dg_tools.py
new file mode 100644
index 0000000..3a3fca3
--- /dev/null
+++ b/examples/dg_tools.py
@@ -0,0 +1,420 @@
+import numpy as np
+import numpy.polynomial.legendre as leg
+import numpy.linalg as la
+import contextlib
+
+__doc__ = """
+Notation convention for operator shapes
+=======================================
+
+* m - number of elements in the discretization
+* n - number of volume degrees of freedom per element
+"""
+
+import functools
+
+
+memoized = functools.lru_cache(maxsize=None)
+
+
+def ortholegvander(x, deg):
+    """See numpy.polynomial.legendre.legvander(). Uses an orthonormal basis."""
+    result = leg.legvander(x, deg)
+    factors = np.array([np.sqrt((2*n+1)/2) for n in range(0, 1 + deg)])
+    return result * factors
+
+
+def ortholegder(c):
+    """See numpy.polynomial.legendre.legder(). Uses an orthonormal basis."""
+    fw_factors = np.array([np.sqrt((2*n+1)/2) for n in range(len(c))])
+    derivs = leg.legder(c * fw_factors)
+    return derivs / fw_factors[:len(derivs)]
+
+
+def ortholegval(x, c):
+    """See numpy.polynomial.legendre.legval(). Uses an orthonormal basis."""
+    factors = np.array([np.sqrt((2*n+1)/2) for n in range(len(c))])
+    return leg.legval(x, c * factors)
+
+
+class DGDiscr1D(object):
+    """A one-dimensional Discontinuous Galerkin discretization."""
+
+    def __init__(self, left, right, nelements, nnodes):
+        """
+        Inputs:
+            left - left endpoint
+            right - right endpoint
+            nelements - number of discretization panels
+            nnodes - number of degrees of freedom per panel
+        """
+        self.left = left
+        self.right = right
+        self.nelements = nelements
+        self.nnodes = nnodes
+
+    @property
+    @memoized
+    def ref_nodes(self):
+        """Return reference nodes for a single element.
+
+        Signature: ->(n,)
+        """
+        nodes, _ = leg.leggauss(self.nnodes)
+        return nodes
+
+    @property
+    @memoized
+    def ref_weights(self):
+        """Return reference quadrature weights for a single element.
+
+        Signature: ->(n,)
+        """
+        _, weights = leg.leggauss(self.nnodes)
+        return weights
+
+    def zeros(self):
+        """Return a zero solution.
+
+        Signature: ->(n*m,)
+        """
+        return np.zeros(self.nnodes * self.nelements)
+
+    @property
+    def h(self):
+        """Return the element size.
+
+        Signature: ->()
+        """
+        return self.elements[0,1] - self.elements[0,0]
+
+    def nodes(self):
+        """Return the vector of node coordinates.
+
+        Signature: ->(n*m,)
+        """
+        centers = (self.elements[:,0] + self.elements[:,1]) / 2
+        radii = (self.elements[:,1] - self.elements[:,0]) / 2
+        return ((self.ref_nodes[:,np.newaxis] * radii) + centers).T.ravel()
+
+    @property
+    @memoized
+    def vdm(self):
+        """Return the elementwise Vandermonde (modal-to-nodal) matrix.
+
+        Signature: ->(n, n)
+        """
+        return ortholegvander(self.ref_nodes, self.nnodes - 1)
+
+    @property
+    @memoized
+    def _ref_mass(self):
+        """Return the (volume) mass matrix for the reference element.
+
+        Signature: ->(n, n)
+        """
+        return la.inv(self.vdm @ self.vdm.T)
+
+    @property
+    @memoized
+    def mass(self):
+        """Return the elementwise volume mass matrix.
+
+        Signature: ->(n, n)
+        """
+        h = (self.right - self.left) / self.nelements
+        return (h/2) * self._ref_mass
+
+    @property
+    @memoized
+    def inv_mass(self):
+        """Return the inverse of the elementwise volume mass matrix.
+
+        Signature: ->(n, n)
+        """
+        return la.inv(self.mass)
+
+    @property
+    @memoized
+    def face_mass(self):
+        """Return the face mass matrix.
+
+        The face mass matrix combines the effects of applying the face mass
+        operator on each face and interpolating the output to the volume nodes.
+
+        Signature: ->(n, 2)
+        """
+        return self.interp.T.copy()
+
+    @property
+    @memoized
+    def diff(self):
+        """Return the elementwise differentiation matrix.
+
+        Signature: ->(n, n)
+        """
+        VrT = []
+        for row in np.eye(self.nnodes):
+            deriv = ortholegder(row)
+            VrT.append(ortholegval(self.ref_nodes, deriv))
+        Vr = np.vstack(VrT).T
+        return Vr @ la.inv(self.vdm)
+
+    @property
+    @memoized
+    def stiffness(self):
+        """Return the stiffness matrix.
+
+        Signature: ->(n, n)
+        """
+        return (self._ref_mass @ self.diff)
+
+    @property
+    @memoized
+    def interp(self):
+        """Return the volume-to-face interpolation matrix.
+
+        Signature: ->(2, n)
+        """
+        return ortholegvander([-1, 1], self.nnodes - 1) @ la.inv(self.vdm)
+
+    @property
+    @memoized
+    def elements(self):
+        """Return the list of elements, each given by their left/right boundaries.
+
+        Signature: ->(m, 2)
+        """
+        h = (self.right - self.left) / self.nelements
+        return np.array(list(zip(
+            np.linspace(self.left, self.right, self.nelements, endpoint=False),
+            np.linspace(h + self.left, self.right, self.nelements))))
+
+    @property
+    def dg_ops(self):
+        """Return a context manager yielding a DGOps1D instance.
+        """
+        return contextlib.contextmanager(lambda: (yield DGOps1DRef(self)))
+
+    @property
+    def normals(self):
+        """Return the face unit normals.
+
+        Signature: ->(m, 2)
+        """
+        result = np.zeros((self.nelements, 2))
+        result[:,0] = -1
+        result[:,1] = 1
+        return result
+
+
+def interpolate(discr, vec, nodes):
+    """Return an interpolated solution at *nodes*.
+
+    Input:
+        discr - a DGDiscr1D instance
+        vec - vector of nodal values at degrees of freedom
+        nodes - vector of nodes to interpolate to
+
+    Signature:  (m*n,) -> (len(nodes),)
+    """
+    elements = discr.elements
+    nelements = discr.nelements
+    nnodes = discr.nnodes
+    inv_vdm = la.inv(discr.vdm)
+
+    sorter = np.argsort(nodes)
+    sorted_nodes = nodes[sorter]
+    result = []
+
+    indices = np.searchsorted(sorted_nodes, elements)
+    for i, (start, end) in enumerate(indices):
+        if i == 0:
+            start = 0
+        elif i == nelements - 1:
+            end = len(nodes)
+
+        center = (elements[i][0] + elements[i][1]) / 2
+        radius = (elements[i][1] - elements[i][0]) / 2
+        element_nodes = sorted_nodes[start:end]
+        mapped_nodes = (element_nodes - center) / radius
+
+        modal_vals = inv_vdm @ vec[i * nnodes:(i + 1) * nnodes]
+        nodal_vals = ortholegvander(mapped_nodes, nnodes - 1) @ modal_vals
+        result.append(nodal_vals)
+
+    result = np.hstack(result)
+    unsorter = np.arange(len(nodes))[sorter]
+    return result[unsorter]
+
+
+def integrate(discr, soln):
+    """Return the integral of the solution.
+
+    Signature: (n*m,) -> ()
+    """
+    soln = soln.reshape((discr.nelements, discr.nnodes))
+    h = discr.elements[0][1] - discr.elements[0][0]
+    weights = discr.ref_weights * h / 2
+    return np.sum(soln * weights)
+
+
+def elementwise(mat, vec):
+    """Apply a matrix to rows of the input representing per-element
+    degrees of freedom.
+
+    Inputs:
+        mat: Shape (a, b)
+        vec: Shape (c, b)
+
+    Signature: (a, b), (c, b) -> (c, a)
+    """
+    return np.einsum("ij,kj->ki", mat, vec)
+
+
+class AbstractDGOps1D(object):
+    def __init__(self, discr):
+        self.discr = discr
+
+    @property
+    def array_ops(self):
+        raise NotImplementedError
+
+    @property
+    def normals(self):
+        """Return the vector of normals at the faces.
+
+        Signature: ->(m, 2)
+        """
+        raise NotImplementedError
+
+    def interp(self, vec):
+        """Apply elementwise volume-to-face interpolation.
+
+        Signature: (m, n) -> (m, 2)
+        """
+        raise NotImplementedError
+
+    def inv_mass(self, vec):
+        """Apply the elementwise inverse mass matrix.
+
+        Signature: (m, n) -> (m, n)
+        """
+        raise NotImplementedError
+
+    def stiffness(self, vec):
+        """Apply the elementwise stiffness matrix.
+
+        Signature: (m, n) -> (m, n)
+        """
+        raise NotImplementedError
+
+    def face_mass(self, vec):
+        """Apply the elementwise face mass matrix.
+
+        Signature: (m, 2) -> (m, n)
+        """
+        raise NotImplementedError
+
+    def face_swap(self, vec):
+        """Swap values at opposite faces.
+
+        Signature: (m, 2) -> (m, 2)
+        """
+        raise NotImplementedError
+
+
+def elementwise(mat, vec):
+    """Apply a matrix to rows of the input representing per-element
+    degrees of freedom.
+
+    Inputs:
+        mat: Shape (a, b)
+        vec: Shape (c, b)
+
+    Signature: (a, b), (c, b) -> (c, a)
+    """
+    return np.einsum("ij,kj->ki", mat, vec)
+
+
+class DGOps1DRef(AbstractDGOps1D):
+    """A reference NumPy implementation of the AbstractDGOps1D interface."""
+
+    @AbstractDGOps1D.array_ops.getter
+    def array_ops(self):
+        return np
+
+    @AbstractDGOps1D.normals.getter
+    def normals(self):
+        return self.discr.normals
+
+    def interp(self, vec):
+        return elementwise(self.discr.interp, vec)
+
+    def inv_mass(self, vec):
+        return elementwise(self.discr.inv_mass, vec)
+
+    def stiffness(self, vec):
+        return elementwise(self.discr.stiffness, vec)
+
+    def face_mass(self, vec):
+        return elementwise(self.discr.face_mass, vec)
+
+    def face_swap(self, vec):
+        result = np.zeros_like(vec)
+        result[:, 0] = np.roll(vec[:, 1], +1)
+        result[:, 1] = np.roll(vec[:, 0], -1)
+        return result
+
+
+def matrix_getter(name, shape):
+
+    def getter(self):
+        mat = self.ctx.argument(name, shape, np.float64)
+        self.ctx.bind(mat, getattr(self.discr, name))
+        return mat
+
+    return property(memoized(getter))
+
+
+class DGOps1D(AbstractDGOps1D):
+
+    @AbstractDGOps1D.array_ops.getter
+    def array_ops(self):
+        return self.ctx
+
+    def __init__(self, discr, ctx):
+        self.discr = discr
+        self.ctx = ctx
+
+    _normals = matrix_getter("normals", "(nelements, 2)")
+    _interp_mat = matrix_getter("interp", "(2, nnodes)")
+    _inv_mass_mat = matrix_getter("inv_mass", "(nnodes, nnodes)")
+    _stiffness_mat = matrix_getter("stiffness", "(nnodes, nnodes)")
+    _face_mass_mat = matrix_getter("face_mass", "(nnodes, 2)")
+
+    @AbstractDGOps1D.normals.getter
+    def normals(self):
+        return self._normals
+
+    def interp(self, vec):
+        return self.ctx.matmul(self._interp_mat, vec.T, name="_interp").T
+
+    def inv_mass(self, vec):
+        return self.ctx.matmul(self._inv_mass_mat, vec.T, name="_inv_mass").T
+
+    def stiffness(self, vec):
+        return self.ctx.matmul(self._stiffness_mat, vec.T, name="_stiffness").T
+
+    def face_mass(self, vec):
+        return self.ctx.matmul(self._face_mass_mat, vec.T, name="_face_mass").T
+
+    def face_swap(self, vec):
+        return self.ctx.stack(
+                (
+                    self.ctx.roll(vec[:, 1], +1),
+                    self.ctx.roll(vec[:, 0], -1)),
+                axis=1,
+                name="_face_swap")
+
+
-- 
GitLab


From e331068ab4952a5f43cf078858151f58fcacd2e7 Mon Sep 17 00:00:00 2001
From: Matt Wala <wala1@illinois.edu>
Date: Wed, 8 Jul 2020 16:28:37 -0500
Subject: [PATCH 02/52] Add support for scalar outputs

---
 pytato/codegen.py    | 11 +++++++----
 test/test_codegen.py | 12 ++++++++++++
 2 files changed, 19 insertions(+), 4 deletions(-)

diff --git a/pytato/codegen.py b/pytato/codegen.py
index 5a7a7cb..4fef59f 100644
--- a/pytato/codegen.py
+++ b/pytato/codegen.py
@@ -366,9 +366,6 @@ def add_output(name: str, expr: Array, state: CodeGenState,
         mapper: CodeGenMapper) -> None:
     """Add an output argument to the kernel.
     """
-    # FIXE: Scalar outputs are not supported yet.
-    assert expr.shape != ()
-
     result = mapper(expr, state)
 
     inames = tuple(
@@ -391,7 +388,13 @@ def add_output(name: str, expr: Array, state: CodeGenState,
     assert not expr_context.depends_on
 
     from loopy.kernel.instruction import make_assignment
-    insn = make_assignment((prim.Variable(name)[indices],),
+
+    if indices:
+        assignee = prim.Variable(name)[indices]
+    else:
+        assignee = prim.Variable(name)
+
+    insn = make_assignment((assignee,),
             copy_expr,
             id=state.insn_id_gen(f"{name}_copy"),
             within_inames=frozenset(inames),
diff --git a/test/test_codegen.py b/test/test_codegen.py
index 0f26bc0..fdf20fd 100755
--- a/test/test_codegen.py
+++ b/test/test_codegen.py
@@ -49,6 +49,18 @@ def test_basic_codegen(ctx_factory):
     assert (out == x_in * x_in).all()
 
 
+def test_scalar_output(ctx_factory):
+    ctx = ctx_factory()
+    queue = cl.CommandQueue(ctx)
+
+    namespace = pt.Namespace()
+    x = pt.Placeholder(namespace, "x", (), np.int)
+    prog = pt.generate_loopy(x, target=pt.PyOpenCLTarget(queue))
+    x_in = np.array(1)
+    _, (x_out,) = prog(x=x_in)
+    assert x_out == x_in
+
+
 def test_codegen_with_DictOfNamedArrays(ctx_factory):  # noqa
     ctx = ctx_factory()
     queue = cl.CommandQueue(ctx)
-- 
GitLab


From 5f558dea4ebb52d055a7a15ca9b02785a3f28a17 Mon Sep 17 00:00:00 2001
From: Matt Wala <wala1@illinois.edu>
Date: Wed, 15 Jul 2020 21:07:23 -0500
Subject: [PATCH 03/52] Revise input argument handling

---
 pytato/__init__.py   |   8 +-
 pytato/array.py      | 195 +++++++++++++++++++++++++++++++++----------
 pytato/codegen.py    |  43 ++++++++--
 test/sym.pt          |  11 +++
 test/test_codegen.py |  37 +++++++-
 5 files changed, 242 insertions(+), 52 deletions(-)
 create mode 100644 test/sym.pt

diff --git a/pytato/__init__.py b/pytato/__init__.py
index 123041a..ebd4d64 100644
--- a/pytato/__init__.py
+++ b/pytato/__init__.py
@@ -26,7 +26,9 @@ THE SOFTWARE.
 
 from pytato.array import (
         Namespace, Array, DictOfNamedArrays, Tag, UniqueTag,
-        DottedName, Placeholder, make_placeholder,
+        DottedName, Placeholder,
+
+        make_placeholder, make_size_param, make_data_wrapper,
         )
 
 from pytato.codegen import generate_loopy
@@ -34,7 +36,9 @@ from pytato.target import Target, PyOpenCLTarget
 
 __all__ = (
         "DottedName", "Namespace", "Array", "DictOfNamedArrays",
-        "Tag", "UniqueTag", "Placeholder", "make_placeholder",
+        "Tag", "UniqueTag", "Placeholder",
+
+        "make_placeholder", "make_size_param", "make_data_wrapper",
 
         "generate_loopy",
         "Target", "PyOpenCLTarget",
diff --git a/pytato/array.py b/pytato/array.py
index e9bea17..ad2fc16 100644
--- a/pytato/array.py
+++ b/pytato/array.py
@@ -72,9 +72,15 @@ Built-in Expression Nodes
 .. autoclass:: IndexLambda
 .. autoclass:: Einsum
 .. autoclass:: Reshape
+.. autoclass:: LoopyFunction
+
+Input Arguments
+^^^^^^^^^^^^^^^
+
+.. autoclass:: InputArgumentBase
 .. autoclass:: DataWrapper
 .. autoclass:: Placeholder
-.. autoclass:: LoopyFunction
+.. autoclass:: SizeParam
 
 User-Facing Node Creation
 -------------------------
@@ -88,6 +94,8 @@ Node constructors such as :class:`Placeholder.__init__` and
 
 .. autofunction:: make_dict_of_named_arrays
 .. autofunction:: make_placeholder
+.. autofunction:: make_size_param
+.. autofunction:: make_data_wrapper
 """
 
 # }}}
@@ -102,7 +110,7 @@ from typing import (
 
 import numpy as np
 import pymbolic.primitives as prim
-from pytools import is_single_valued, memoize_method
+from pytools import is_single_valued, memoize_method, UniqueNameGenerator
 
 import pytato.scalar_expr as scalar_expr
 from pytato.scalar_expr import ScalarExpression
@@ -131,7 +139,7 @@ class DottedName:
             raise ValueError("empty name parts")
 
         for p in name_parts:
-            if not str.isidentifier(p):
+            if not p.isidentifier():
                 raise ValueError(f"{p} is not a Python identifier")
 
         self.name_parts = name_parts
@@ -159,6 +167,7 @@ class Namespace(Mapping[str, "Array"]):
     may not be used.  (:class:`Placeholder` instances register their names in
     this way to avoid ambiguity.)
 
+    .. attribute:: name_gen
     .. automethod:: __contains__
     .. automethod:: __getitem__
     .. automethod:: __iter__
@@ -167,17 +176,17 @@ class Namespace(Mapping[str, "Array"]):
     .. automethod:: copy
     .. automethod:: ref
     """
+    name_gen: UniqueNameGenerator
 
     def __init__(self) -> None:
         self._symbol_table: Dict[str, Array] = {}
+        self.name_gen = UniqueNameGenerator()
 
     def __contains__(self, name: object) -> bool:
         return name in self._symbol_table
 
     def __getitem__(self, name: str) -> Array:
         item = self._symbol_table[name]
-        if item is None:
-            raise ValueError("cannot access a reserved name")
         return item
 
     def __iter__(self) -> Iterator[str]:
@@ -199,6 +208,8 @@ class Namespace(Mapping[str, "Array"]):
         """
         if name in self._symbol_table:
             raise ValueError(f"'{name}' is already assigned")
+        if not self.name_gen.is_name_conflicting(name):
+            self.name_gen.add_name(name)
         self._symbol_table[name] = value
 
         return name
@@ -277,7 +288,7 @@ ConvertibleToShape = Union[
 
 
 def _check_identifier(s: str, ns: Optional[Namespace] = None) -> bool:
-    if not str.isidentifier(s):
+    if not s.isidentifier():
         raise ValueError(f"'{s}' is not a valid identifier")
 
     if ns is not None:
@@ -504,6 +515,10 @@ class Array:
     __mul__ = partialmethod(_binary_op, operator.mul)
     __rmul__ = partialmethod(_binary_op, operator.mul, reverse=True)
 
+# }}}
+
+
+# {{{ mixins
 
 class _SuppliedShapeAndDtypeMixin(object):
     """A mixin class for when an array must store its own *shape* and *dtype*,
@@ -644,6 +659,7 @@ class IndexLambda(_SuppliedShapeAndDtypeMixin, Array):
 
     .. automethod:: is_reference
     """
+
     fields = Array.fields + ("expr", "bindings")
     mapper_method = "map_index_lambda"
 
@@ -715,6 +731,49 @@ class Reshape(Array):
 # }}}
 
 
+# {{{ base class for arguments
+
+class InputArgumentBase(Array):
+    r"""Base class for input arguments.
+
+    .. attribute:: name
+
+        The name by which a value is supplied for the argument once computation
+        begins.
+
+        The name is also implicitly :meth:`~Namespace.assign`\ ed in the
+        :class:`Namespace`.
+
+    .. note::
+
+        and within the same :class:`Namespace` is not allowed.
+    """
+
+    # The name uniquely identifies this object in the namespace.
+    fields = ("name",)
+
+    def __init__(self,
+            namespace: Namespace,
+            name: str,
+            tags: Optional[TagsType] = None):
+        if name is None:
+            raise ValueError("Must have explicit name")
+
+        # Publish our name to the namespace
+        namespace.assign(name, self)
+
+        super().__init__(namespace=namespace, tags=tags)
+        self.name = name
+
+    def tagged(self, tag: Tag) -> Array:
+        raise ValueError("Cannot modify tags")
+
+    def without_tag(self, tag: Tag, verify_existence: bool = True) -> Array:
+        raise ValueError("Cannot modify tags")
+
+# }}}
+
+
 # {{{ data wrapper
 
 class DataInterface(Protocol):
@@ -729,15 +788,14 @@ class DataInterface(Protocol):
     .. attribute:: shape
     .. attribute:: dtype
     """
+
     shape: ShapeType
     dtype: np.dtype
 
 
-class DataWrapper(Array):
-    # TODO: Name?
-    """
-    Takes concrete array data and packages it to be compatible
-    with the :class:`Array` interface.
+class DataWrapper(InputArgumentBase):
+    """Takes concrete array data and packages it to be compatible with the
+    :class:`Array` interface.
 
     .. attribute:: data
 
@@ -751,16 +809,22 @@ class DataWrapper(Array):
         this array may not be updated in-place.
     """
 
+    mapper_method = "map_data_wrapper"
+
     def __init__(self,
             namespace: Namespace,
+            name: str,
             data: DataInterface,
+            shape: ShapeType,
             tags: Optional[TagsType] = None):
-        super().__init__(namespace, tags=tags)
+        super().__init__(namespace, name, tags=tags)
+
         self.data = data
+        self._shape = shape
 
     @property
     def shape(self) -> ShapeType:
-        return self.data.shape
+        return self._shape
 
     @property
     def dtype(self) -> np.dtype:
@@ -771,25 +835,12 @@ class DataWrapper(Array):
 
 # {{{ placeholder
 
-class Placeholder(_SuppliedShapeAndDtypeMixin, Array):
-    r"""
-    A named placeholder for an array whose concrete value
-    is supplied by the user during evaluation.
-
-    .. attribute:: name
-
-        The name by which a value is supplied
-        for the placeholder once computation begins.
-        The name is also implicitly :meth:`~Namespace.assign`\ ed
-        in the :class:`Namespace`.
-
-    .. note::
-
-        Creating multiple instances of a :class:`Placeholder` with the same name
-        and within the same :class:`Namespace` is not allowed.
+class Placeholder(_SuppliedShapeAndDtypeMixin, InputArgumentBase):
+    r"""A named placeholder for an array whose concrete value is supplied by the
+    user during evaluation.
     """
+
     mapper_method = "map_placeholder"
-    fields = Array.fields + ("name",)
 
     def __init__(self,
             namespace: Namespace,
@@ -797,24 +848,32 @@ class Placeholder(_SuppliedShapeAndDtypeMixin, Array):
             shape: ShapeType,
             dtype: np.dtype,
             tags: Optional[TagsType] = None):
-        if name is None:
-            raise ValueError("Must have explicit name")
-
-        # Publish our name to the namespace
-        namespace.assign(name, self)
-
-        super().__init__(namespace=namespace,
+        super().__init__(
+                namespace,
                 shape=shape,
                 dtype=dtype,
+                name=name,
                 tags=tags)
 
-        self.name = name
+# }}}
 
-    def tagged(self, tag: Tag) -> Array:
-        raise ValueError("Cannot modify tags")
 
-    def without_tag(self, tag: Tag, verify_existence: bool = True) -> Array:
-        raise ValueError("Cannot modify tags")
+# {{{ size parameter
+
+class SizeParam(InputArgumentBase):
+    r"""A named placeholder for a scalar that may be used as a variable in symbolic
+    expressions for array sizes.
+    """
+
+    mapper_method = "map_size_param"
+
+    @property
+    def shape(self) -> ShapeType:
+        return ()
+
+    @property
+    def dtype(self) -> np.dtype:
+        return np.intp
 
 # }}}
 
@@ -830,7 +889,6 @@ class LoopyFunction(DictOfNamedArrays):
         name.
     """
 
-
 # }}}
 
 
@@ -864,13 +922,62 @@ def make_placeholder(namespace: Namespace,
     if name is None:
         raise ValueError("Placeholder instances must have a name")
 
-    if not str.isidentifier(name):
+    if not name.isidentifier():
         raise ValueError(f"'{name}' is not a Python identifier")
 
     shape = normalize_shape(shape, namespace)
 
     return Placeholder(namespace, name, shape, dtype, tags)
 
+
+def make_size_param(namespace: Namespace,
+        name: str,
+        tags: Optional[TagsType] = None) -> SizeParam:
+    """Make a :class:`SizeParam`.
+
+    Size parameters may be used as variables in symbolic expressions for array
+    sizes.
+
+    :param namespace:  namespace
+    :param name:       name
+    :param tags:       implementation tags
+    """
+    if name is None:
+        raise ValueError("SizeParam instances must have a name")
+
+    if not name.isidentifier():
+        raise ValueError(f"'{name}' is not a Python identifier")
+
+    return SizeParam(namespace, name, tags=tags)
+
+
+def make_data_wrapper(namespace: Namespace,
+        data: DataInterface,
+        name: Optional[str] = None,
+        shape: Optional[ConvertibleToShape] = None,
+        tags: Optional[TagsType] = None) -> DataWrapper:
+    """Make a :class:`DataWrapper`.
+
+    :param namespace:  namespace
+    :param data:       an instance obeying the :class:`DataInterface`
+    :param name:       an optional name, generated automatically if not given
+    :param shape:      optional shape of the array, inferred from *data* if not given
+    :param tags:       implementation tags
+    """
+    if name is None:
+        name = namespace.name_gen("_pt_data")
+
+    if not name.isidentifier():
+        raise ValueError(f"'{name}' is not a Python identifier")
+
+    if shape is None:
+        shape = data.shape
+
+    shape = normalize_shape(shape, namespace)
+
+    return DataWrapper(namespace, name, data, shape, tags)
+
 # }}}
 
+
 # vim: foldmethod=marker
diff --git a/pytato/codegen.py b/pytato/codegen.py
index 4fef59f..4d883c8 100644
--- a/pytato/codegen.py
+++ b/pytato/codegen.py
@@ -31,7 +31,8 @@ import pymbolic.primitives as prim
 import pytools
 
 from pytato.array import (
-        Array, DictOfNamedArrays, Placeholder, ShapeType, IndexLambda)
+        Array, DictOfNamedArrays, Placeholder, ShapeType, IndexLambda,
+        SizeParam, DataWrapper, InputArgumentBase)
 from pytato.program import BoundProgram
 from pytato.target import Target, PyOpenCLTarget
 import pytato.scalar_expr as scalar_expr
@@ -243,13 +244,34 @@ class CodeGenMapper(pytato.transform.Mapper):
     def __init__(self) -> None:
         self.exprgen_mapper = InlinedExpressionGenMapper(self)
 
-    def map_placeholder(self, expr: Placeholder,
+    def map_size_param(self, expr: SizeParam,
             state: CodeGenState) -> ImplementedResult:
         if expr in state.results:
             return state.results[expr]
 
+        arg = lp.ValueArg(expr.name, dtype=expr.dtype)
+        kernel = state.kernel.copy(args=state.kernel.args + [arg])
+        state.update_kernel(kernel)
+
+        result = StoredResult(expr.name, expr)
+        state.results[expr] = result
+        return result
+
+    def handle_array_input_argument(self, expr: Union[Placeholder, DataWrapper],
+            state: CodeGenState) -> ImplementedResult:
+        if expr in state.results:
+            return state.results[expr]
+
+        shape_context = LoopyExpressionContext(state)
+        shape = []
+        for component in expr.shape:
+            shape.append(self.exprgen_mapper(component, shape_context))
+            # Not supported yet.
+            assert not shape_context.depends_on
+            assert not shape_context.reduction_bounds
+
         arg = lp.GlobalArg(expr.name,
-                shape=expr.shape,
+                shape=tuple(shape),
                 dtype=expr.dtype,
                 order="C")
         kernel = state.kernel.copy(args=state.kernel.args + [arg])
@@ -259,6 +281,9 @@ class CodeGenMapper(pytato.transform.Mapper):
         state.results[expr] = result
         return result
 
+    map_placeholder = handle_array_input_argument
+    map_data_wrapper = handle_array_input_argument
+
     def map_index_lambda(self, expr: IndexLambda,
             state: CodeGenState) -> ImplementedResult:
         if expr in state.results:
@@ -450,7 +475,7 @@ def generate_loopy(result: Union[Array, DictOfNamedArrays],
 
     # Reserve names of input and output arguments.
     for val in namespace.values():
-        if isinstance(val, Placeholder):
+        if isinstance(val, InputArgumentBase):
             state.var_name_gen.add_name(val.name)
     state.var_name_gen.add_names(outputs)
 
@@ -463,4 +488,12 @@ def generate_loopy(result: Union[Array, DictOfNamedArrays],
     for name, expr in outputs.items():
         add_output(name, expr, state, mapper)
 
-    return target.bind_program(program=state.kernel, bound_arguments=dict())
+    # Collect bound arguments.
+    bound_arguments = {}
+    for name, val in namespace.items():
+        if isinstance(val, DataWrapper):
+            bound_arguments[name] = val.data
+
+    return target.bind_program(
+            program=state.kernel,
+            bound_arguments=bound_arguments)
diff --git a/test/sym.pt b/test/sym.pt
new file mode 100644
index 0000000..5dbf9fc
--- /dev/null
+++ b/test/sym.pt
@@ -0,0 +1,11 @@
+def test_symbolic_shape(ctx_factory):
+    ctx = ctx_factory()
+    queue = cl.CommandQueue(ctx)
+
+    namespace = pt.Namespace()
+    n = pt.make_size_param(namespace, "n")
+    x = pt.Placeholder(namespace, "x", "(n,)", np.int)
+    prog = pt.generate_loopy(n, target=pt.PyOpenCLTarget(queue))
+    x_in = np.array([1, 2, 3, 4, 5])
+    _, (n_out,) = prog(x=x_in)
+    assert n_out == 5
diff --git a/test/test_codegen.py b/test/test_codegen.py
index fdf20fd..2361ce0 100755
--- a/test/test_codegen.py
+++ b/test/test_codegen.py
@@ -49,7 +49,7 @@ def test_basic_codegen(ctx_factory):
     assert (out == x_in * x_in).all()
 
 
-def test_scalar_output(ctx_factory):
+def test_scalar_placeholder(ctx_factory):
     ctx = ctx_factory()
     queue = cl.CommandQueue(ctx)
 
@@ -61,6 +61,41 @@ def test_scalar_output(ctx_factory):
     assert x_out == x_in
 
 
+def test_size_param(ctx_factory):
+    ctx = ctx_factory()
+    queue = cl.CommandQueue(ctx)
+
+    namespace = pt.Namespace()
+    n = pt.make_size_param(namespace, "n")
+    pt.make_placeholder(namespace, "x", "(n,)", np.int)
+    prog = pt.generate_loopy(n, target=pt.PyOpenCLTarget(queue))
+    x_in = np.array([1, 2, 3, 4, 5])
+    _, (n_out,) = prog(x=x_in)
+    assert n_out == 5
+
+
+def test_data_wrapper(ctx_factory):
+    ctx = ctx_factory()
+    queue = cl.CommandQueue(ctx)
+
+    # Without name/shape
+    namespace = pt.Namespace()
+    x_in = np.array([1, 2, 3, 4, 5])
+    x = pt.make_data_wrapper(namespace, x_in)
+    prog = pt.generate_loopy(x, target=pt.PyOpenCLTarget(queue))
+    _, (x_out,) = prog()
+    assert (x_out == x_in).all()
+
+    # With name/shape
+    namespace = pt.Namespace()
+    x_in = np.array([[1, 2], [3, 4], [5, 6]])
+    pt.make_size_param(namespace, "n")
+    x = pt.make_data_wrapper(namespace, x_in, name="x", shape="(n, 2)")
+    prog = pt.generate_loopy(x, target=pt.PyOpenCLTarget(queue))
+    _, (x_out,) = prog()
+    assert (x_out == x_in).all()
+
+
 def test_codegen_with_DictOfNamedArrays(ctx_factory):  # noqa
     ctx = ctx_factory()
     queue = cl.CommandQueue(ctx)
-- 
GitLab


From 6cacf655396df9d7ed678c5ef45c527739ab84e2 Mon Sep 17 00:00:00 2001
From: Matt Wala <wala1@illinois.edu>
Date: Wed, 15 Jul 2020 21:14:05 -0500
Subject: [PATCH 04/52] Delete accidentally included file

---
 test/sym.pt | 11 -----------
 1 file changed, 11 deletions(-)
 delete mode 100644 test/sym.pt

diff --git a/test/sym.pt b/test/sym.pt
deleted file mode 100644
index 5dbf9fc..0000000
--- a/test/sym.pt
+++ /dev/null
@@ -1,11 +0,0 @@
-def test_symbolic_shape(ctx_factory):
-    ctx = ctx_factory()
-    queue = cl.CommandQueue(ctx)
-
-    namespace = pt.Namespace()
-    n = pt.make_size_param(namespace, "n")
-    x = pt.Placeholder(namespace, "x", "(n,)", np.int)
-    prog = pt.generate_loopy(n, target=pt.PyOpenCLTarget(queue))
-    x_in = np.array([1, 2, 3, 4, 5])
-    _, (n_out,) = prog(x=x_in)
-    assert n_out == 5
-- 
GitLab


From 20ad973627b94071ef1bf2a309c139790fe9200a Mon Sep 17 00:00:00 2001
From: Matt Wala <wala1@illinois.edu>
Date: Sat, 18 Jul 2020 21:47:01 -0500
Subject: [PATCH 05/52] Implement matmul

---
 doc/design.rst        |   4 ++
 pytato/__init__.py    |   9 ++-
 pytato/array.py       |  92 +++++++++++++++++++++++--
 pytato/codegen.py     | 157 ++++++++++++++++++++++++++++++++++--------
 pytato/scalar_expr.py |  52 ++++++++++++--
 test/test_codegen.py  |  31 +++++++--
 test/test_pytato.py   |  27 ++++++--
 7 files changed, 324 insertions(+), 48 deletions(-)

diff --git a/doc/design.rst b/doc/design.rst
index fd50da9..5114de6 100644
--- a/doc/design.rst
+++ b/doc/design.rst
@@ -121,6 +121,10 @@ Reserved Identifiers
 
     -   ``_pt_out``: The default name of an unnamed output argument
 
+    -   ``_pt_data``: Used to automatically generate identifiers for
+        names of :class:`~pytato.array.DataWrapper` arguments that are
+        not supplied by the user.
+
 -   Identifiers used in index lambdas are also reserved. These include:
 
     -   Identifiers matching the regular expression ``_[0-9]+``. They are used
diff --git a/pytato/__init__.py b/pytato/__init__.py
index ebd4d64..08bd3fc 100644
--- a/pytato/__init__.py
+++ b/pytato/__init__.py
@@ -26,9 +26,11 @@ THE SOFTWARE.
 
 from pytato.array import (
         Namespace, Array, DictOfNamedArrays, Tag, UniqueTag,
-        DottedName, Placeholder,
+        DottedName,
 
         make_placeholder, make_size_param, make_data_wrapper,
+
+        matmul
         )
 
 from pytato.codegen import generate_loopy
@@ -36,10 +38,13 @@ from pytato.target import Target, PyOpenCLTarget
 
 __all__ = (
         "DottedName", "Namespace", "Array", "DictOfNamedArrays",
-        "Tag", "UniqueTag", "Placeholder",
+        "Tag", "UniqueTag",
 
         "make_placeholder", "make_size_param", "make_data_wrapper",
 
+        "matmul",
+
         "generate_loopy",
+
         "Target", "PyOpenCLTarget",
 )
diff --git a/pytato/array.py b/pytato/array.py
index ad2fc16..9f9c2a4 100644
--- a/pytato/array.py
+++ b/pytato/array.py
@@ -46,6 +46,11 @@ Array Interface
 .. autoclass:: UniqueTag
 .. autoclass:: DictOfNamedArrays
 
+NumPy-Like Interface
+--------------------
+
+.. autofunction:: matmul
+
 Supporting Functionality
 ------------------------
 
@@ -55,6 +60,7 @@ Supporting Functionality
 
 Concrete Array Data
 -------------------
+
 .. autoclass:: DataInterface
 
 Pre-Defined Tags
@@ -71,6 +77,7 @@ Built-in Expression Nodes
 
 .. autoclass:: IndexLambda
 .. autoclass:: Einsum
+.. autoclass:: MatrixProduct
 .. autoclass:: Reshape
 .. autoclass:: LoopyFunction
 
@@ -363,7 +370,7 @@ class Array:
 
     .. attribute:: namespace
 
-        A (mutable) instance of :class:`Namespace` containing the
+        A (mutable) instance of :class:`~pytato.Namespace` containing the
         names used in the computation. All arrays in a
         computation share the same namespace.
 
@@ -476,6 +483,9 @@ class Array:
     def __ne__(self, other: Any) -> bool:
         return not self.__eq__(other)
 
+    def __matmul__(self, other: Array) -> Array:
+        return matmul(self, other)
+
     def _binary_op(self,
             op: Any,
             other: Union[Array, Number],
@@ -722,6 +732,56 @@ class Einsum(Array):
 # }}}
 
 
+# {{{ matrix product
+
+class MatrixProduct(Array):
+    """A product of two matrices, or a matrix and a vector.
+
+    The semantics of this operation follow PEP 459 [pep459]_.
+
+    .. attribute:: x1
+    .. attribute:: x2
+
+    .. [pep459] https://www.python.org/dev/peps/pep-0459/
+
+    """
+    fields = Array.fields + ("x1", "x2")
+
+    mapper_method = "map_matrix_product"
+
+    def __init__(self,
+            namespace: Namespace,
+            x1: Array,
+            x2: Array,
+            tags: Optional[TagsType] = None):
+        super().__init__(namespace, tags)
+        self.x1 = x1
+        self.x2 = x2
+
+    @property
+    def shape(self) -> ShapeType:
+        # FIXME: Broadcasting currently unsupported.
+        assert 0 < self.x1.ndim <= 2
+        assert 0 < self.x2.ndim <= 2
+
+        if self.x1.ndim == 1 and self.x2.ndim == 1:
+            return ()
+        elif self.x1.ndim == 1 and self.x2.ndim == 2:
+            return (self.x2.shape[1],)
+        elif self.x1.ndim == 2 and self.x2.ndim == 1:
+            return (self.x1.shape[0],)
+        elif self.x1.ndim == 2 and self.x2.ndim == 2:
+            return (self.x1.shape[0], self.x2.shape[1])
+
+        assert False
+
+    @property
+    def dtype(self) -> np.dtype:
+        return np.result_type(self.x1.dtype, self.x2.dtype)
+
+# }}}
+
+
 # {{{ reshape
 
 class Reshape(Array):
@@ -741,12 +801,13 @@ class InputArgumentBase(Array):
         The name by which a value is supplied for the argument once computation
         begins.
 
-        The name is also implicitly :meth:`~Namespace.assign`\ ed in the
-        :class:`Namespace`.
+        The name is also implicitly :meth:`~pytato.Namespace.assign`\ ed in the
+        :class:`~pytato.Namespace`.
 
     .. note::
 
-        and within the same :class:`Namespace` is not allowed.
+        Creating multiple instances of any input argument with the same name
+        and within the same :class:`~pytato.Namespace` is not allowed.
     """
 
     # The name uniquely identifies this object in the namespace.
@@ -892,6 +953,29 @@ class LoopyFunction(DictOfNamedArrays):
 # }}}
 
 
+# {{{ numpy interface
+
+def matmul(x1: Array, x2: Array) -> Array:
+    """Matrix multiplication."""
+    if x1.namespace is not x2.namespace:
+        raise ValueError("namespace mismatch")
+
+    if (
+            isinstance(x1, Number) or x1.shape == ()
+            or isinstance(x2, Number) or x2.shape == ()):
+        raise ValueError("scalars not allowed as arguments to matmul")
+
+    if len(x1.shape) > 2 or len(x2.shape) > 2:
+        raise NotImplementedError("broadcasting not supported")
+
+    if x1.shape[-1] != x2.shape[0]:
+        raise ValueError("dimension mismatch")
+
+    return MatrixProduct(x1.namespace, x1, x2)
+
+# }}}
+
+
 # {{{ end-user-facing
 
 def make_dict_of_named_arrays(data: Dict[str, Array]) -> DictOfNamedArrays:
diff --git a/pytato/codegen.py b/pytato/codegen.py
index 4d883c8..5d38fd0 100644
--- a/pytato/codegen.py
+++ b/pytato/codegen.py
@@ -28,11 +28,12 @@ from typing import (Union, Optional, Mapping, Dict, Tuple, FrozenSet, Set)
 import islpy as isl
 import loopy as lp
 import pymbolic.primitives as prim
+from pymbolic import var
 import pytools
 
 from pytato.array import (
         Array, DictOfNamedArrays, Placeholder, ShapeType, IndexLambda,
-        SizeParam, DataWrapper, InputArgumentBase)
+        SizeParam, DataWrapper, InputArgumentBase, MatrixProduct)
 from pytato.program import BoundProgram
 from pytato.target import Target, PyOpenCLTarget
 import pytato.scalar_expr as scalar_expr
@@ -75,7 +76,7 @@ Code Generation Internals
 # SymbolicIndex and ShapeType are semantically distinct but identical at the
 # type level.
 SymbolicIndex = ShapeType
-ReductionBounds = Dict[str, Tuple[ScalarExpression, ScalarExpression]]
+ReductionBounds = Dict[int, Tuple[ScalarExpression, ScalarExpression]]
 
 
 @dataclasses.dataclass(init=True, repr=False, eq=False)
@@ -103,7 +104,8 @@ class LoopyExpressionContext(object):
 
     .. attribute:: reduction_bounds
 
-        A mapping from inames to reduction bounds in the expression.
+        A mapping from reduction iname number (for reduction inames ``_r0``,
+        ``_r1``, ...) to reduction bounds in the expression.
 
     .. automethod:: update_depends_on
     .. automethod:: lookup
@@ -146,7 +148,20 @@ class ImplementedResult(object):
 
     def to_loopy_expression(self, indices: SymbolicIndex,
             expr_context: LoopyExpressionContext) -> ScalarExpression:
-        """Return a :mod:`loopy` expression for this result."""
+        """Return a :mod:`loopy` expression for this result.
+
+        :param indices: symbolic expressions for the indices of the array
+        :param expr_context: the associated expression context. The fields are
+            treated as follows:
+
+               - *depends_on* is populated with any dependencies needed for the
+                 generated expression.
+
+               - *reduction_bounds* is populated with reduction bounds for the
+                 reduction inames in the returned expression. If
+                 *reduction_bounds* is nonempty, then the returned inames are
+                 ensured to be disjoint from those present.
+        """
         raise NotImplementedError
 
 
@@ -174,16 +189,30 @@ class InlinedResult(ImplementedResult):
 
     See also: :class:`pytato.array.ImplInlined`.
     """
-    def __init__(self, expr: ScalarExpression, array: Array):
+    def __init__(self, expr: ScalarExpression, array: Array,
+            reduction_bounds: ReductionBounds, depends_on: FrozenSet[str]):
         super().__init__(array)
         self.expr = expr
+        self.reduction_bounds = dict(reduction_bounds)
+        self.depends_on = depends_on
 
-    # TODO: Handle dependencies and reduction domains.
     def to_loopy_expression(self, indices: SymbolicIndex,
             expr_context: LoopyExpressionContext) -> ScalarExpression:
-        return scalar_expr.substitute(
-                self.expr,
-                {f"_{d}": i for d, i in zip(range(self.array.ndim), indices)})
+        assert len(indices) == self.array.ndim
+        substitutions = {f"_{d}": i for d, i in enumerate(indices)}
+
+        reduction_start = 0
+        if expr_context.reduction_bounds:
+            reduction_start = 1 + max(expr_context.reduction_bounds)
+
+        for i, bounds in self.reduction_bounds.items():
+            j = i + reduction_start
+            substitutions[f"_r{i}"] = var(f"_r{j}")
+            expr_context.reduction_bounds[j] = bounds
+
+        expr_context.update_depends_on(self.depends_on)
+
+        return scalar_expr.substitute(self.expr, substitutions)
 
 
 class SubstitutionRuleResult(ImplementedResult):
@@ -284,6 +313,53 @@ class CodeGenMapper(pytato.transform.Mapper):
     map_placeholder = handle_array_input_argument
     map_data_wrapper = handle_array_input_argument
 
+    def map_matrix_product(self, expr: MatrixProduct,
+            state: CodeGenState) -> ImplementedResult:
+        if expr in state.results:
+            return state.results[expr]
+
+        x1_result = self.rec(expr.x1, state)
+        x2_result = self.rec(expr.x2, state)
+
+        expr_context = LoopyExpressionContext(state)
+        expr_context.reduction_bounds[0] = (0, expr.x2.shape[0])
+
+        # Figure out inames.
+        x1_inames = []
+        for i in range(expr.x1.ndim):
+            if i == expr.x1.ndim - 1:
+                x1_inames.append(var("_r0"))
+            else:
+                x1_inames.append(var(f"_{i}"))
+        x2_inames = []
+        for i in range(expr.x2.ndim):
+            if i == 0:
+                x2_inames.append(var("_r0"))
+            else:
+                offset = i + len(x1_inames) - 2
+                x2_inames.append(var(f"_{offset}"))
+
+        inner_expr = x1_result.to_loopy_expression(
+                tuple(x1_inames), expr_context)
+        inner_expr *= x2_result.to_loopy_expression(
+                tuple(x2_inames), expr_context)
+
+        import loopy.library.reduction as red
+        result_expr = lp.Reduction(
+                operation=red.parse_reduction_op("sum"),
+                inames=("_r0",),
+                expr=inner_expr,
+                allow_simultaneous=False)
+
+        result = InlinedResult(result_expr,
+                expr,
+                expr_context.reduction_bounds,
+                expr_context.depends_on)
+
+        state.results[expr] = result
+        return result
+
+
     def map_index_lambda(self, expr: IndexLambda,
             state: CodeGenState) -> ImplementedResult:
         if expr in state.results:
@@ -295,7 +371,7 @@ class CodeGenMapper(pytato.transform.Mapper):
                 local_namespace=expr.bindings)
         loopy_expr = self.exprgen_mapper(expr.expr, expr_context)
 
-        result = InlinedResult(loopy_expr, expr)
+        result = InlinedResult(loopy_expr, expr, {}, frozenset())
         state.results[expr] = result
         return result
 
@@ -345,10 +421,13 @@ class InlinedExpressionGenMapper(scalar_expr.IdentityMapper):
 
 # {{{ utils
 
-def domain_for_shape(
-        dim_names: Tuple[str, ...], shape: ShapeType) -> isl.BasicSet:
+def domain_for_shape(dim_names: Tuple[str, ...],
+         shape: ShapeType,
+         reductions: Dict[str, Tuple[ScalarExpression, ScalarExpression]],
+         ) -> isl.BasicSet:
     """Create an :class:`islpy.BasicSet` that expresses an appropriate index domain
-    for an array of (potentially symbolic) shape *shape*.
+    for an array of (potentially symbolic) shape *shape* having reduction
+    dimensions *reductions*.
 
     :param dim_names: A tuple of strings, the names of the axes. These become set
         dimensions in the returned domain.
@@ -357,6 +436,10 @@ def domain_for_shape(
         expressions. The variables in these expressions become parameter
         dimensions in the returned set.  Must have the same length as
         *dim_names*.
+
+    :arg reductions: A map from reduction inames to (lower, upper) bounds
+        (as half-open integer ranges). The variables in the bounds become
+        parameter dimensions in the returned set.
     """
     assert len(dim_names) == len(shape)
 
@@ -365,7 +448,12 @@ def domain_for_shape(
     for sdep in map(scalar_expr.get_dependencies, shape):
         param_names_set |= sdep
 
-    set_names = sorted(dim_names)
+    for bounds in reductions.values():
+        for sdep in map(scalar_expr.get_dependencies, bounds):
+            # FIXME: Assumes that reduction bounds are not data-dependent.
+            param_names_set |= sdep
+
+    set_names = sorted(tuple(dim_names) + tuple(reductions))
     param_names = sorted(param_names_set)
 
     # Build domain.
@@ -382,6 +470,10 @@ def domain_for_shape(
         dom &= affs[0].le_set(affs[iname])
         dom &= affs[iname].lt_set(aff_from_expr(dom.space, dim))
 
+    for iname, (left, right) in reductions.items():
+        dom &= aff_from_expr(dom.space, left).le_set(affs[iname])
+        dom &= affs[iname].lt_set(aff_from_expr(dom.space, right))
+
     dom, = dom.get_basic_sets()
 
     return dom
@@ -393,38 +485,49 @@ def add_output(name: str, expr: Array, state: CodeGenState,
     """
     result = mapper(expr, state)
 
+    # Get expression.
     inames = tuple(
             state.var_name_gen(f"{name}_dim{d}")
             for d in range(expr.ndim))
-    domain = domain_for_shape(inames, expr.shape)
-
-    arg = lp.GlobalArg(name,
-            shape=expr.shape,
-            dtype=expr.dtype,
-            order="C",
-            is_output_only=True)
-
     indices = tuple(prim.Variable(iname) for iname in inames)
     expr_context = LoopyExpressionContext(state)
     copy_expr = result.to_loopy_expression(indices, expr_context)
 
-    # TODO: Contextual data not supported yet.
-    assert not expr_context.reduction_bounds
-    assert not expr_context.depends_on
+    # Make the reduction inames/substitute them into expression.
+    reduction_inames = tuple(
+            state.var_name_gen(f"{name}_red{i}")
+            for i in range(len(expr_context.reduction_bounds)))
+    substitutions = {
+            f"_r{i}": var(reduction_inames[i])
+            for i in expr_context.reduction_bounds}
+    copy_expr = scalar_expr.substitute(copy_expr, substitutions)
 
+    # Make the instruction
     from loopy.kernel.instruction import make_assignment
-
     if indices:
         assignee = prim.Variable(name)[indices]
     else:
         assignee = prim.Variable(name)
-
     insn = make_assignment((assignee,),
             copy_expr,
             id=state.insn_id_gen(f"{name}_copy"),
             within_inames=frozenset(inames),
             depends_on=expr_context.depends_on)
 
+    # Get the domain.
+    reduction_bounds = {
+            reduction_inames[i]: bounds
+            for i, bounds in expr_context.reduction_bounds.items()}
+    domain = domain_for_shape(inames, expr.shape, reduction_bounds)
+
+    # Get the argument.
+    arg = lp.GlobalArg(name,
+            shape=expr.shape,
+            dtype=expr.dtype,
+            order="C",
+            is_output_only=True)
+
+    # Update the kernel.
     kernel = state.kernel
     kernel = kernel.copy(args=kernel.args + [arg],
             instructions=kernel.instructions + [insn],
diff --git a/pytato/scalar_expr.py b/pytato/scalar_expr.py
index a09e9db..0cbc339 100644
--- a/pytato/scalar_expr.py
+++ b/pytato/scalar_expr.py
@@ -25,8 +25,9 @@ THE SOFTWARE.
 """
 
 from numbers import Number
-from typing import Any, Union, Mapping, FrozenSet
+from typing import Any, Union, Mapping, FrozenSet, Set
 
+from loopy.symbolic import Reduction
 from pymbolic.mapper import (WalkMapper as WalkMapperBase, IdentityMapper as
         IdentityMapperBase)
 from pymbolic.mapper.substitutor import (SubstitutionMapper as
@@ -70,19 +71,60 @@ def parse(s: str) -> ScalarExpression:
 # {{{ mapper classes
 
 class WalkMapper(WalkMapperBase):
-    pass
+
+    def map_reduction(self, expr: Reduction, *args: Any, **kwargs: Any) -> None:
+        if not self.visit(expr, *args, **kwargs):
+            return
+
+        self.rec(expr.expr, *args, **kwargs)
 
 
 class IdentityMapper(IdentityMapperBase):
-    pass
+
+    def map_reduction(self, expr: Reduction,
+            *args: Any, **kwargs: Any) -> Reduction:
+        new_inames = []
+        for iname in expr.inames:
+            new_iname = self.rec(prim.Variable(iname), *args, **kwargs)
+            if not isinstance(new_iname, prim.Variable):
+                raise ValueError(
+                        f"reduction iname {iname} can only be renamed"
+                        " to another iname")
+            new_inames.append(new_iname.name)
+
+        return Reduction(expr.operation,
+                tuple(new_inames),
+                self.rec(expr.expr, *args, **kwargs),
+                allow_simultaneous=expr.allow_simultaneous)
 
 
 class SubstitutionMapper(SubstitutionMapperBase):
-    pass
+
+    def map_reduction(self, expr: Reduction) -> Reduction:
+        new_inames = []
+        for iname in expr.inames:
+            new_iname = self.subst_func(iname)
+            if new_iname is None:
+                new_iname = prim.Variable(iname)
+            else:
+                if not isinstance(new_iname, prim.Variable):
+                    raise ValueError(
+                            f"reduction iname {iname} can only be renamed"
+                            " to another iname")
+            new_inames.append(new_iname.name)
+
+        return Reduction(expr.operation,
+                tuple(new_inames),
+                self.rec(expr.expr),
+                allow_simultaneous=expr.allow_simultaneous)
 
 
 class DependencyMapper(DependencyMapperBase):
-    pass
+
+    def map_reduction(self, expr: Reduction,
+            *args: Any, **kwargs: Any) -> Set[prim.Variable]:
+        deps: Set[prim.Variable] = self.rec(expr.expr, *args, **kwargs)
+        return deps - set(prim.Variable(iname) for iname in expr.inames)
 
 # }}}
 
diff --git a/test/test_codegen.py b/test/test_codegen.py
index 2361ce0..2863bdb 100755
--- a/test/test_codegen.py
+++ b/test/test_codegen.py
@@ -35,6 +35,7 @@ from pyopencl.tools import (  # noqa
 import pytest  # noqa
 
 import pytato as pt
+from pytato.array import Placeholder
 
 
 def test_basic_codegen(ctx_factory):
@@ -42,7 +43,7 @@ def test_basic_codegen(ctx_factory):
     queue = cl.CommandQueue(ctx)
 
     namespace = pt.Namespace()
-    x = pt.Placeholder(namespace, "x", (5,), np.int)
+    x = Placeholder(namespace, "x", (5,), np.int)
     prog = pt.generate_loopy(x * x, target=pt.PyOpenCLTarget(queue))
     x_in = np.array([1, 2, 3, 4, 5])
     _, (out,) = prog(x=x_in)
@@ -54,7 +55,7 @@ def test_scalar_placeholder(ctx_factory):
     queue = cl.CommandQueue(ctx)
 
     namespace = pt.Namespace()
-    x = pt.Placeholder(namespace, "x", (), np.int)
+    x = Placeholder(namespace, "x", (), np.int)
     prog = pt.generate_loopy(x, target=pt.PyOpenCLTarget(queue))
     x_in = np.array(1)
     _, (x_out,) = prog(x=x_in)
@@ -74,6 +75,28 @@ def test_size_param(ctx_factory):
     assert n_out == 5
 
 
+@pytest.mark.parametrize("x1_ndim", (1, 2))
+@pytest.mark.parametrize("x2_ndim", (1, 2))
+def test_matmul(ctx_factory, x1_ndim, x2_ndim):
+    ctx = ctx_factory()
+    queue = cl.CommandQueue(ctx)
+
+    def get_array(ndim):
+        arr = np.array([[1, 2], [3, 4]])
+        return arr[(0,) * (arr.ndim - ndim)]
+
+    x1_in = get_array(x1_ndim)
+    x2_in = get_array(x2_ndim)
+
+    namespace = pt.Namespace()
+    x1 = pt.make_data_wrapper(namespace, x1_in)
+    x2 = pt.make_data_wrapper(namespace, x2_in)
+    prog = pt.generate_loopy(x1 @ x2, target=pt.PyOpenCLTarget(queue))
+    _, (out,) = prog()
+
+    assert (out == x1_in @ x2_in).all()
+
+
 def test_data_wrapper(ctx_factory):
     ctx = ctx_factory()
     queue = cl.CommandQueue(ctx)
@@ -101,8 +124,8 @@ def test_codegen_with_DictOfNamedArrays(ctx_factory):  # noqa
     queue = cl.CommandQueue(ctx)
 
     namespace = pt.Namespace()
-    x = pt.Placeholder(namespace, "x", (5,), np.int)
-    y = pt.Placeholder(namespace, "y", (5,), np.int)
+    x = Placeholder(namespace, "x", (5,), np.int)
+    y = Placeholder(namespace, "y", (5,), np.int)
     x_in = np.array([1, 2, 3, 4, 5])
     y_in = np.array([6, 7, 8, 9, 10])
 
diff --git a/test/test_pytato.py b/test/test_pytato.py
index 8b8e68d..fef1d37 100755
--- a/test/test_pytato.py
+++ b/test/test_pytato.py
@@ -23,17 +23,32 @@ THE SOFTWARE.
 """
 
 import sys
-import pytest  # noqa
 
+import numpy as np
+import pytest
 
-def test_being_very_lazy():
-    # Placeholder to make CI pass. Remove after tests are added.
-    pass
+import pytato as pt
+
+
+def test_matmul_validation():
+    namespace = pt.Namespace()
+
+    a = pt.make_placeholder(namespace, "a", (10, 10), np.float)
+    b = pt.make_placeholder(namespace, "b", (20, 10), np.float)
+
+    with pytest.raises(ValueError):
+        a @ b
+
+    c = pt.make_placeholder(namespace, "c", (), np.float)
+    with pytest.raises(ValueError):
+        c @ c
+
+    n = pt.make_size_param(namespace, "n")
+    d = pt.make_placeholder(namespace, "d", "(n, n)", np.float)
+    d @ d
 
 
 if __name__ == "__main__":
-    # make sure that import failures get reported, instead of skipping the
-    # tests.
     if len(sys.argv) > 1:
         exec(sys.argv[1])
     else:
-- 
GitLab


From ef922fcc66f15913cb865bbe161bbbf8749cf902 Mon Sep 17 00:00:00 2001
From: Matt Wala <wala1@illinois.edu>
Date: Mon, 20 Jul 2020 11:57:48 -0500
Subject: [PATCH 06/52] Formatting

---
 pytato/array.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/pytato/array.py b/pytato/array.py
index 9f9c2a4..1df11bd 100644
--- a/pytato/array.py
+++ b/pytato/array.py
@@ -961,8 +961,10 @@ def matmul(x1: Array, x2: Array) -> Array:
         raise ValueError("namespace mismatch")
 
     if (
-            isinstance(x1, Number) or x1.shape == ()
-            or isinstance(x2, Number) or x2.shape == ()):
+            isinstance(x1, Number)
+            or x1.shape == ()
+            or isinstance(x2, Number)
+            or x2.shape == ()):
         raise ValueError("scalars not allowed as arguments to matmul")
 
     if len(x1.shape) > 2 or len(x2.shape) > 2:
-- 
GitLab


From 532f0a049762e5c7c497c5c078f7a4a029c2ce67 Mon Sep 17 00:00:00 2001
From: Matt Wala <wala1@illinois.edu>
Date: Mon, 20 Jul 2020 12:11:01 -0500
Subject: [PATCH 07/52] Implement roll

---
 pytato/__init__.py   |  4 +--
 pytato/array.py      | 71 ++++++++++++++++++++++++++++++++++++++++++++
 pytato/codegen.py    | 25 ++++++++++++++--
 test/test_codegen.py | 21 +++++++++++++
 test/test_pytato.py  | 13 ++++++++
 5 files changed, 130 insertions(+), 4 deletions(-)

diff --git a/pytato/__init__.py b/pytato/__init__.py
index 08bd3fc..c196f05 100644
--- a/pytato/__init__.py
+++ b/pytato/__init__.py
@@ -30,7 +30,7 @@ from pytato.array import (
 
         make_placeholder, make_size_param, make_data_wrapper,
 
-        matmul
+        matmul, roll,
         )
 
 from pytato.codegen import generate_loopy
@@ -42,7 +42,7 @@ __all__ = (
 
         "make_placeholder", "make_size_param", "make_data_wrapper",
 
-        "matmul",
+        "matmul", "roll",
 
         "generate_loopy",
 
diff --git a/pytato/array.py b/pytato/array.py
index 1df11bd..5ed930c 100644
--- a/pytato/array.py
+++ b/pytato/array.py
@@ -50,6 +50,7 @@ NumPy-Like Interface
 --------------------
 
 .. autofunction:: matmul
+.. autofunction:: roll
 
 Supporting Functionality
 ------------------------
@@ -78,6 +79,7 @@ Built-in Expression Nodes
 .. autoclass:: IndexLambda
 .. autoclass:: Einsum
 .. autoclass:: MatrixProduct
+.. autoclass:: Roll
 .. autoclass:: Reshape
 .. autoclass:: LoopyFunction
 
@@ -782,6 +784,49 @@ class MatrixProduct(Array):
 # }}}
 
 
+# {{{ roll
+
+class Roll(Array):
+    """Roll an array along an axis.
+
+    .. attribute:: array
+
+        The input :class:`~pytato.Array`.
+
+    .. attribute:: shift
+
+        Shift amount.
+
+    .. attribute:: axis
+
+        Shift axis.
+    """
+
+    fields = Array.fields + ("array", "shift", "axis")
+    mapper_method = "map_roll"
+
+    def __init__(self,
+            namespace: Namespace,
+            array: Array,
+            shift: int,
+            axis: int,
+            tags: Optional[TagsType] = None):
+        super().__init__(namespace, tags)
+        self.array = array
+        self.shift = shift
+        self.axis = axis
+
+    @property
+    def shape(self) -> ShapeType:
+        return self.array.shape
+
+    @property
+    def dtype(self) -> np.dtype:
+        return self.array.dtype
+
+# }}}
+
+
 # {{{ reshape
 
 class Reshape(Array):
@@ -975,6 +1020,32 @@ def matmul(x1: Array, x2: Array) -> Array:
 
     return MatrixProduct(x1.namespace, x1, x2)
 
+
+def roll(a: Array, shift: int, axis: Optional[int] = None) -> Array:
+    """Roll array elements along a given axis.
+
+    :param a: Input array
+    :param shift: The number of places by which elements are shifted
+    :param axis: Axis along which the array is shifted
+    """
+    if a.ndim == 0:
+        return a
+
+    if axis is None:
+        if a.ndim > 1:
+            raise NotImplementedError(
+                    "shifing along more than one dimension is unsupported")
+        else:
+            axis = 0
+
+    if not (0 <= axis < a.ndim):
+        raise ValueError("invalid axis")
+
+    if shift == 0:
+        return a
+
+    return Roll(a.namespace, a, shift, axis)
+
 # }}}
 
 
diff --git a/pytato/codegen.py b/pytato/codegen.py
index 5d38fd0..ec1f471 100644
--- a/pytato/codegen.py
+++ b/pytato/codegen.py
@@ -33,7 +33,7 @@ import pytools
 
 from pytato.array import (
         Array, DictOfNamedArrays, Placeholder, ShapeType, IndexLambda,
-        SizeParam, DataWrapper, InputArgumentBase, MatrixProduct)
+        SizeParam, DataWrapper, InputArgumentBase, MatrixProduct, Roll)
 from pytato.program import BoundProgram
 from pytato.target import Target, PyOpenCLTarget
 import pytato.scalar_expr as scalar_expr
@@ -359,6 +359,27 @@ class CodeGenMapper(pytato.transform.Mapper):
         state.results[expr] = result
         return result
 
+    def map_roll(self, expr: Roll,
+            state: CodeGenState) -> ImplementedResult:
+        if expr in state.results:
+            return state.results[expr]
+
+        indices = [var(f"_{d}") for d in range(expr.ndim)]
+        indices[expr.axis] = (
+                indices[expr.axis] + expr.shift) % expr.shape[expr.axis]
+
+        expr_context = LoopyExpressionContext(state)
+        loopy_expr = (
+                self.rec(expr.array, state)
+                .to_loopy_expression(tuple(indices), expr_context))
+
+        result = InlinedResult(loopy_expr,
+                expr,
+                expr_context.reduction_bounds,
+                expr_context.depends_on)
+
+        state.results[expr] = result
+        return result
 
     def map_index_lambda(self, expr: IndexLambda,
             state: CodeGenState) -> ImplementedResult:
@@ -424,7 +445,7 @@ class InlinedExpressionGenMapper(scalar_expr.IdentityMapper):
 def domain_for_shape(dim_names: Tuple[str, ...],
          shape: ShapeType,
          reductions: Dict[str, Tuple[ScalarExpression, ScalarExpression]],
-         ) -> isl.BasicSet:
+         ) -> isl.BasicSet:  # noqa
     """Create an :class:`islpy.BasicSet` that expresses an appropriate index domain
     for an array of (potentially symbolic) shape *shape* having reduction
     dimensions *reductions*.
diff --git a/test/test_codegen.py b/test/test_codegen.py
index 2863bdb..4914031 100755
--- a/test/test_codegen.py
+++ b/test/test_codegen.py
@@ -147,6 +147,27 @@ def test_codegen_with_DictOfNamedArrays(ctx_factory):  # noqa
     assert (outputs["y_out"] == y_in).all()
 
 
+@pytest.mark.parametrize("shift", (-1, 1, -20, 20))
+@pytest.mark.parametrize("axis", (0, 1))
+def test_roll(ctx_factory, shift, axis):
+    cl_ctx = ctx_factory()
+    queue = cl.CommandQueue(cl_ctx)
+
+    namespace = pt.Namespace()
+    pt.make_size_param(namespace, "n")
+    x = pt.make_placeholder(namespace, "x", shape=("n", "n"), dtype=np.float)
+
+    prog = pt.generate_loopy(
+            pt.roll(x, shift=shift, axis=axis),
+            target=pt.PyOpenCLTarget(queue))
+
+    x_in = np.array([[1., 2.], [3., 4.]])
+
+    _, (x_out,) = prog(x=x_in)
+
+    assert (x_out == np.roll(x_in, shift=shift, axis=axis)).all()
+
+
 if __name__ == "__main__":
     if len(sys.argv) > 1:
         exec(sys.argv[1])
diff --git a/test/test_pytato.py b/test/test_pytato.py
index fef1d37..bf74bab 100755
--- a/test/test_pytato.py
+++ b/test/test_pytato.py
@@ -48,6 +48,19 @@ def test_matmul_validation():
     d @ d
 
 
+def test_roll_validation():
+    namespace = pt.Namespace()
+
+    a = pt.make_placeholder(namespace, "a", (10, 10), np.float)
+    pt.roll(a, 1, axis=0)
+
+    with pytest.raises(ValueError):
+        pt.roll(a, 1, axis=2)
+
+    with pytest.raises(ValueError):
+        pt.roll(a, 1, axis=-1)
+
+
 if __name__ == "__main__":
     if len(sys.argv) > 1:
         exec(sys.argv[1])
-- 
GitLab


From 0b07e9fa74337b30178a9ed30d637e39552f8915 Mon Sep 17 00:00:00 2001
From: Matt Wala <wala1@illinois.edu>
Date: Mon, 20 Jul 2020 13:49:42 -0500
Subject: [PATCH 08/52] Make namespace a property in the superclass

---
 pytato/array.py | 48 ++++++++++++++++++++++++++++++++----------------
 1 file changed, 32 insertions(+), 16 deletions(-)

diff --git a/pytato/array.py b/pytato/array.py
index 5ed930c..689dd4d 100644
--- a/pytato/array.py
+++ b/pytato/array.py
@@ -419,17 +419,19 @@ class Array:
     fields: ClassVar[Tuple[str, ...]] = ("shape", "dtype", "tags")
 
     def __init__(self,
-            namespace: Namespace,
             tags: Optional[TagsType] = None):
         if tags is None:
             tags = frozenset()
 
-        self.namespace = namespace
         self.tags = tags
 
     def copy(self, **kwargs: Any) -> Array:
         raise NotImplementedError
 
+    @property
+    def namespace(self) -> Namespace:
+        raise NotImplementedError
+
     @property
     def shape(self) -> ShapeType:
         raise NotImplementedError
@@ -538,12 +540,11 @@ class _SuppliedShapeAndDtypeMixin(object):
     """
 
     def __init__(self,
-            namespace: Namespace,
             shape: ShapeType,
             dtype: np.dtype,
             **kwargs: Any):
         # https://github.com/python/mypy/issues/5887
-        super().__init__(namespace, **kwargs)  # type: ignore
+        super().__init__(**kwargs)  # type: ignore
         self._shape = shape
         self._dtype = dtype
 
@@ -686,11 +687,16 @@ class IndexLambda(_SuppliedShapeAndDtypeMixin, Array):
         if bindings is None:
             bindings = {}
 
-        super().__init__(namespace, shape=shape, dtype=dtype, tags=tags)
+        super().__init__(shape=shape, dtype=dtype, tags=tags)
 
+        self._namespace = namespace
         self.expr = expr
         self.bindings = bindings
 
+    @property
+    def namespace(self) -> Namespace:
+        return self._namespace
+
     @memoize_method
     def is_reference(self) -> bool:
         # FIXME: Do we want a specific 'reference' node to make all this
@@ -752,14 +758,17 @@ class MatrixProduct(Array):
     mapper_method = "map_matrix_product"
 
     def __init__(self,
-            namespace: Namespace,
             x1: Array,
             x2: Array,
             tags: Optional[TagsType] = None):
-        super().__init__(namespace, tags)
+        super().__init__(tags)
         self.x1 = x1
         self.x2 = x2
 
+    @property
+    def namespace(self) -> Namespace:
+        return self.x1.namespace
+
     @property
     def shape(self) -> ShapeType:
         # FIXME: Broadcasting currently unsupported.
@@ -806,16 +815,19 @@ class Roll(Array):
     mapper_method = "map_roll"
 
     def __init__(self,
-            namespace: Namespace,
             array: Array,
             shift: int,
             axis: int,
             tags: Optional[TagsType] = None):
-        super().__init__(namespace, tags)
+        super().__init__(tags)
         self.array = array
         self.shift = shift
         self.axis = axis
 
+    @property
+    def namespace(self) -> Namespace:
+        return self.array.namespace
+
     @property
     def shape(self) -> ShapeType:
         return self.array.shape
@@ -865,12 +877,17 @@ class InputArgumentBase(Array):
         if name is None:
             raise ValueError("Must have explicit name")
 
-        # Publish our name to the namespace
+        # Publish our name to the namespace.
         namespace.assign(name, self)
 
-        super().__init__(namespace=namespace, tags=tags)
+        self._namespace = namespace
+        super().__init__(tags=tags)
         self.name = name
 
+    @property
+    def namespace(self) -> Namespace:
+        return self._namespace
+
     def tagged(self, tag: Tag) -> Array:
         raise ValueError("Cannot modify tags")
 
@@ -954,10 +971,9 @@ class Placeholder(_SuppliedShapeAndDtypeMixin, InputArgumentBase):
             shape: ShapeType,
             dtype: np.dtype,
             tags: Optional[TagsType] = None):
-        super().__init__(
-                namespace,
-                shape=shape,
+        super().__init__(shape=shape,
                 dtype=dtype,
+                namespace=namespace,
                 name=name,
                 tags=tags)
 
@@ -1018,7 +1034,7 @@ def matmul(x1: Array, x2: Array) -> Array:
     if x1.shape[-1] != x2.shape[0]:
         raise ValueError("dimension mismatch")
 
-    return MatrixProduct(x1.namespace, x1, x2)
+    return MatrixProduct(x1, x2)
 
 
 def roll(a: Array, shift: int, axis: Optional[int] = None) -> Array:
@@ -1044,7 +1060,7 @@ def roll(a: Array, shift: int, axis: Optional[int] = None) -> Array:
     if shift == 0:
         return a
 
-    return Roll(a.namespace, a, shift, axis)
+    return Roll(a, shift, axis)
 
 # }}}
 
-- 
GitLab


From dd30d13b8749eb3150bde767710824070b0e9bb1 Mon Sep 17 00:00:00 2001
From: Matt Wala <wala1@illinois.edu>
Date: Mon, 20 Jul 2020 14:15:21 -0500
Subject: [PATCH 09/52] Implement transpose

---
 pytato/__init__.py   |  4 +--
 pytato/array.py      | 68 ++++++++++++++++++++++++++++++++++++++++++--
 pytato/codegen.py    | 25 +++++++++++++++-
 test/test_codegen.py | 43 ++++++++++++++++++++++++++++
 test/test_pytato.py  | 16 +++++++++++
 5 files changed, 151 insertions(+), 5 deletions(-)

diff --git a/pytato/__init__.py b/pytato/__init__.py
index c196f05..0eb430d 100644
--- a/pytato/__init__.py
+++ b/pytato/__init__.py
@@ -30,7 +30,7 @@ from pytato.array import (
 
         make_placeholder, make_size_param, make_data_wrapper,
 
-        matmul, roll,
+        matmul, roll, transpose,
         )
 
 from pytato.codegen import generate_loopy
@@ -42,7 +42,7 @@ __all__ = (
 
         "make_placeholder", "make_size_param", "make_data_wrapper",
 
-        "matmul", "roll",
+        "matmul", "roll", "transpose",
 
         "generate_loopy",
 
diff --git a/pytato/array.py b/pytato/array.py
index 689dd4d..f67c8f8 100644
--- a/pytato/array.py
+++ b/pytato/array.py
@@ -51,6 +51,7 @@ NumPy-Like Interface
 
 .. autofunction:: matmul
 .. autofunction:: roll
+.. autofunction:: transpose
 
 Supporting Functionality
 ------------------------
@@ -80,6 +81,7 @@ Built-in Expression Nodes
 .. autoclass:: Einsum
 .. autoclass:: MatrixProduct
 .. autoclass:: Roll
+.. autoclass:: AxisPermutation
 .. autoclass:: Reshape
 .. autoclass:: LoopyFunction
 
@@ -115,7 +117,7 @@ import operator
 from dataclasses import dataclass
 from typing import (
         Optional, ClassVar, Dict, Any, Mapping, Iterator, Tuple, Union,
-        FrozenSet, Protocol)
+        FrozenSet, Protocol, Sequence)
 
 import numpy as np
 import pymbolic.primitives as prim
@@ -447,6 +449,10 @@ class Array:
     def ndim(self) -> int:
         return len(self.shape)
 
+    @property
+    def T(self) -> Array:
+        return AxisPermutation(self, tuple(range(self.ndim)[::-1]))
+
     def tagged(self, tag: Tag) -> Array:
         """
         Returns a copy of *self* tagged with *tag*.
@@ -839,6 +845,41 @@ class Roll(Array):
 # }}}
 
 
+# {{{ axis permutation
+
+class AxisPermutation(Array):
+    r"""Permute the axes of an array."""
+
+    fields = Array.fields + ("array", "axes")
+    mapper_method = "map_axis_permutation"
+
+    def __init__(self,
+            array: Array,
+            axes: Tuple[int, ...],
+            tags: Optional[TagsType] = None):
+        super().__init__(tags)
+        self.array = array
+        self.axes = axes
+
+    @property
+    def namespace(self) -> Namespace:
+        return self.array.namespace
+
+    @property
+    def shape(self) -> ShapeType:
+        result = []
+        base_shape = self.array.shape
+        for index in self.axes:
+            result.append(base_shape[index])
+        return tuple(result)
+
+    @property
+    def dtype(self) -> np.dtype:
+        return self.array.dtype
+
+# }}}
+
+
 # {{{ reshape
 
 class Reshape(Array):
@@ -1017,7 +1058,11 @@ class LoopyFunction(DictOfNamedArrays):
 # {{{ numpy interface
 
 def matmul(x1: Array, x2: Array) -> Array:
-    """Matrix multiplication."""
+    """Matrix multiplication.
+
+    :param x1: First argument
+    :param x2: Second argument
+    """
     if x1.namespace is not x2.namespace:
         raise ValueError("namespace mismatch")
 
@@ -1062,6 +1107,25 @@ def roll(a: Array, shift: int, axis: Optional[int] = None) -> Array:
 
     return Roll(a, shift, axis)
 
+
+def transpose(a: Array, axes: Optional[Sequence[int]] = None) -> Array:
+    """Reverse or permute the axes of an array.
+
+    :param a: Input array
+    :param axes: If specified, a permutation of ``[0, 1, ..., a.ndim-1]``. Defaults
+        to ``range(a.ndim)[::-1]``.
+    """
+    if axes is None:
+        axes = range(a.ndim)[::-1]
+
+    if len(axes) != a.ndim:
+        raise ValueError("axes have incorrect length")
+
+    if set(axes) != set(range(a.ndim)):
+        raise ValueError("invalid axes were given")
+
+    return AxisPermutation(a, tuple(axes))
+
 # }}}
 
 
diff --git a/pytato/codegen.py b/pytato/codegen.py
index ec1f471..0149bf1 100644
--- a/pytato/codegen.py
+++ b/pytato/codegen.py
@@ -33,7 +33,8 @@ import pytools
 
 from pytato.array import (
         Array, DictOfNamedArrays, Placeholder, ShapeType, IndexLambda,
-        SizeParam, DataWrapper, InputArgumentBase, MatrixProduct, Roll)
+        SizeParam, DataWrapper, InputArgumentBase, MatrixProduct, Roll,
+        AxisPermutation)
 from pytato.program import BoundProgram
 from pytato.target import Target, PyOpenCLTarget
 import pytato.scalar_expr as scalar_expr
@@ -381,6 +382,28 @@ class CodeGenMapper(pytato.transform.Mapper):
         state.results[expr] = result
         return result
 
+    def map_axis_permutation(self, expr: AxisPermutation,
+            state: CodeGenState) -> ImplementedResult:
+        if expr in state.results:
+            return state.results[expr]
+
+        indices = [None] * expr.ndim
+        for from_index, to_index in enumerate(expr.axes):
+            indices[to_index] = var(f"_{from_index}")
+
+        expr_context = LoopyExpressionContext(state)
+        loopy_expr = (
+                self.rec(expr.array, state)
+                .to_loopy_expression(tuple(indices), expr_context))
+
+        result = InlinedResult(loopy_expr,
+                expr,
+                expr_context.reduction_bounds,
+                expr_context.depends_on)
+
+        state.results[expr] = result
+        return result
+
     def map_index_lambda(self, expr: IndexLambda,
             state: CodeGenState) -> ImplementedResult:
         if expr in state.results:
diff --git a/test/test_codegen.py b/test/test_codegen.py
index 4914031..a86e89f 100755
--- a/test/test_codegen.py
+++ b/test/test_codegen.py
@@ -168,6 +168,49 @@ def test_roll(ctx_factory, shift, axis):
     assert (x_out == np.roll(x_in, shift=shift, axis=axis)).all()
 
 
+@pytest.mark.parametrize("axes", (
+    (), (0, 1), (1, 0),
+    (0, 1, 2), (0, 2, 1), (1, 0, 2), (1, 2, 0), (2, 0, 1), (2, 1, 0)))
+def test_axis_permutation(ctx_factory, axes):
+    cl_ctx = ctx_factory()
+    queue = cl.CommandQueue(cl_ctx)
+
+    ndim = len(axes)
+    shape = (3, 4, 5)[:ndim]
+
+    from numpy.random import default_rng
+    rng = default_rng()
+
+    x_in = rng.random(size=shape)
+
+    namespace = pt.Namespace()
+    x = pt.make_data_wrapper(namespace, x_in)
+    prog = pt.generate_loopy(
+            pt.transpose(x, axes),
+            target=pt.PyOpenCLTarget(queue))
+
+    _, (x_out,) = prog()
+    assert (x_out == np.transpose(x_in, axes)).all()
+
+
+def test_transpose(ctx_factory):
+    cl_ctx = ctx_factory()
+    queue = cl.CommandQueue(cl_ctx)
+
+    shape = (2, 8)
+
+    from numpy.random import default_rng
+    rng = default_rng()
+    x_in = rng.random(size=shape)
+
+    namespace = pt.Namespace()
+    x = pt.make_data_wrapper(namespace, x_in)
+    prog = pt.generate_loopy(x.T, target=pt.PyOpenCLTarget(queue))
+
+    _, (x_out,) = prog()
+    assert (x_out == x_in.T).all()
+
+
 if __name__ == "__main__":
     if len(sys.argv) > 1:
         exec(sys.argv[1])
diff --git a/test/test_pytato.py b/test/test_pytato.py
index bf74bab..e840053 100755
--- a/test/test_pytato.py
+++ b/test/test_pytato.py
@@ -61,6 +61,22 @@ def test_roll_validation():
         pt.roll(a, 1, axis=-1)
 
 
+def test_transpose_validation():
+    namespace = pt.Namespace()
+
+    a = pt.make_placeholder(namespace, "a", (10, 10), np.float)
+    pt.transpose(a)
+
+    with pytest.raises(ValueError):
+        pt.transpose(a, (2, 0, 1))
+
+    with pytest.raises(ValueError):
+        pt.transpose(a, (1, 1))
+
+    with pytest.raises(ValueError):
+        pt.transpose(a, (0,))
+
+
 if __name__ == "__main__":
     if len(sys.argv) > 1:
         exec(sys.argv[1])
-- 
GitLab


From 1ee23a1bd620d8f22ade7561d9fe369ad5cf8908 Mon Sep 17 00:00:00 2001
From: Matt Wala <wala1@illinois.edu>
Date: Mon, 20 Jul 2020 15:26:47 -0500
Subject: [PATCH 10/52] Add arithmetic

---
 pytato/array.py      | 82 +++++++++++++++++++++++++++++++-------------
 test/test_codegen.py | 65 +++++++++++++++++++++++++++++++++++
 2 files changed, 123 insertions(+), 24 deletions(-)

diff --git a/pytato/array.py b/pytato/array.py
index f67c8f8..ccbfd37 100644
--- a/pytato/array.py
+++ b/pytato/array.py
@@ -121,6 +121,7 @@ from typing import (
 
 import numpy as np
 import pymbolic.primitives as prim
+from pymbolic import var
 from pytools import is_single_valued, memoize_method, UniqueNameGenerator
 
 import pytato.scalar_expr as scalar_expr
@@ -500,41 +501,74 @@ class Array:
             op: Any,
             other: Union[Array, Number],
             reverse: bool = False) -> Array:
+
+        def add_indices(val):
+            if self.ndim == 0:
+                return val
+            else:
+                indices = tuple(var(f"_{i}") for i in range(self.ndim))
+                return val[indices]
+
         if isinstance(other, Number):
-            # TODO
-            raise NotImplementedError
-        else:
+            first = add_indices(var("_in0"))
+            second = other
+            bindings = dict(_in0=self)
+            other = np.array(other)
+        elif isinstance(other, Array):
             if self.shape != other.shape:
-                raise NotImplementedError("broadcasting")
+                raise NotImplementedError("broadcasting not supported")
+            first = add_indices(var("_in0"))
+            second = add_indices(var("_in1"))
+            bindings = dict(_in0=self, _in1=other)
+        else:
+            raise ValueError("unknown argument")
 
-            dtype = np.result_type(self.dtype, other.dtype)
+        if reverse:
+            first, second = second, first
+            if len(bindings) == 2:
+                bindings["_in1"], bindings["_in0"] = \
+                        bindings["_in0"], bindings["_in1"]
 
-            # FIXME: If either *self* or *other* is an IndexLambda, its expression
-            # could be folded into the output, producing a fused result.
-            if self.shape == ():
-                expr = op(prim.Variable("_in0"), prim.Variable("_in1"))
-            else:
-                indices = tuple(
-                        prim.Variable(f"_{i}") for i in range(self.ndim))
-                expr = op(
-                        prim.Variable("_in0")[indices],
-                        prim.Variable("_in1")[indices])
+        dtype = np.result_type(self.dtype, other.dtype)
+        expr = op(first, second)
 
-            first, second = self, other
-            if reverse:
-                first, second = second, first
+        return IndexLambda(self.namespace,
+                expr,
+                shape=self.shape,
+                dtype=dtype,
+                bindings=bindings)
 
-            bindings = dict(_in0=first, _in1=second)
+    def _unary_op(self, op: Any) -> Array:
+        if self.ndim == 0:
+            expr = op(var("_in0"))
+        else:
+            indices = tuple(var(f"_{i}") for i in range(self.ndim))
+            expr = op(var("_in0")[indices])
 
-            return IndexLambda(self.namespace,
-                    expr,
-                    shape=self.shape,
-                    dtype=dtype,
-                    bindings=bindings)
+        bindings = dict(_in0=self)
+        return IndexLambda(self.namespace,
+                expr,
+                shape=self.shape,
+                dtype=self.dtype,
+                bindings=bindings)
 
     __mul__ = partialmethod(_binary_op, operator.mul)
     __rmul__ = partialmethod(_binary_op, operator.mul, reverse=True)
 
+    __add__ = partialmethod(_binary_op, operator.add)
+    __radd__ = partialmethod(_binary_op, operator.add, reverse=True)
+
+    __sub__ = partialmethod(_binary_op, operator.sub)
+    __rsub__ = partialmethod(_binary_op, operator.sub, reverse=True)
+
+    __truediv__ = partialmethod(_binary_op, operator.truediv)
+    __rtruediv__ = partialmethod(_binary_op, operator.truediv, reverse=True)
+
+    __neg__ = partialmethod(_unary_op, operator.neg)
+
+    def __pos__(self) -> Array:
+        return self
+
 # }}}
 
 
diff --git a/test/test_codegen.py b/test/test_codegen.py
index a86e89f..0f6f7c3 100755
--- a/test/test_codegen.py
+++ b/test/test_codegen.py
@@ -22,6 +22,7 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 THE SOFTWARE.
 """
 
+import operator
 import sys
 
 import loopy as lp
@@ -211,6 +212,70 @@ def test_transpose(ctx_factory):
     assert (x_out == x_in.T).all()
 
 
+def reverse_args(f):
+    def wrapper(*args):
+        return f(*reversed(args))
+    return wrapper
+
+
+@pytest.mark.parametrize("which", ("add", "sub", "mul", "truediv"))
+@pytest.mark.parametrize("reverse", (False, True))
+def test_scalar_array_binary_arith(ctx_factory, which, reverse):
+    cl_ctx = ctx_factory()
+    queue = cl.CommandQueue(cl_ctx)
+
+    op = getattr(operator, which)
+    if reverse:
+        op = reverse_args(op)
+
+    arg = 2
+
+    x_in = np.array([1., 2., 3., 4., 5.])
+    namespace = pt.Namespace()
+    x = pt.make_data_wrapper(namespace, x_in)
+    prog = pt.generate_loopy(op(arg, x), target=pt.PyOpenCLTarget(queue))
+
+    _, (x_out,) = prog()
+    assert (x_out == op(arg, x_in)).all()
+
+
+@pytest.mark.parametrize("which", ("neg", "pos"))
+def test_unary_arith(ctx_factory, which):
+    cl_ctx = ctx_factory()
+    queue = cl.CommandQueue(cl_ctx)
+
+    op = getattr(operator, which)
+
+    x_in = np.array([1., 2., 3., 4., 5.])
+    namespace = pt.Namespace()
+    x = pt.make_data_wrapper(namespace, x_in)
+    prog = pt.generate_loopy(op(x), target=pt.PyOpenCLTarget(queue))
+
+    _, (x_out,) = prog()
+    assert (x_out == op(x_in)).all()
+
+
+@pytest.mark.parametrize("which", ("add", "sub", "mul", "truediv"))
+@pytest.mark.parametrize("reverse", (False, True))
+def test_array_array_binary_arith(ctx_factory, which, reverse):
+    cl_ctx = ctx_factory()
+    queue = cl.CommandQueue(cl_ctx)
+
+    op = getattr(operator, which)
+    if reverse:
+        op = reverse_args(op)
+
+    x_in = np.array([1., 2., 3., 4., 5.])
+    y_in = np.array([6., 7., 8., 9., 10.])
+    namespace = pt.Namespace()
+    x = pt.make_data_wrapper(namespace, x_in)
+    y = pt.make_data_wrapper(namespace, y_in)
+    prog = pt.generate_loopy(op(x, y), target=pt.PyOpenCLTarget(queue))
+
+    _, (out,) = prog()
+    assert (out == op(x_in, y_in)).all()
+
+
 if __name__ == "__main__":
     if len(sys.argv) > 1:
         exec(sys.argv[1])
-- 
GitLab


From 3878685cff15ea0701d311428e9202fe86d25733 Mon Sep 17 00:00:00 2001
From: Matt Wala <wala1@illinois.edu>
Date: Mon, 20 Jul 2020 23:31:26 -0500
Subject: [PATCH 11/52] Implement simple slicing

---
 pytato/__init__.py   |   3 +-
 pytato/array.py      | 235 ++++++++++++++++++++++++++++++++++++-------
 pytato/codegen.py    |  81 ++++++++-------
 test/test_codegen.py |  49 +++++++++
 test/test_pytato.py  |  24 ++++-
 5 files changed, 314 insertions(+), 78 deletions(-)

diff --git a/pytato/__init__.py b/pytato/__init__.py
index 0eb430d..864eb4b 100644
--- a/pytato/__init__.py
+++ b/pytato/__init__.py
@@ -28,6 +28,7 @@ from pytato.array import (
         Namespace, Array, DictOfNamedArrays, Tag, UniqueTag,
         DottedName,
 
+        make_dict_of_named_arrays,
         make_placeholder, make_size_param, make_data_wrapper,
 
         matmul, roll, transpose,
@@ -40,7 +41,7 @@ __all__ = (
         "DottedName", "Namespace", "Array", "DictOfNamedArrays",
         "Tag", "UniqueTag",
 
-        "make_placeholder", "make_size_param", "make_data_wrapper",
+        "make_dict_of_named_arrays", "make_placeholder", "make_size_param", "make_data_wrapper",
 
         "matmul", "roll", "transpose",
 
diff --git a/pytato/array.py b/pytato/array.py
index ccbfd37..53f56ea 100644
--- a/pytato/array.py
+++ b/pytato/array.py
@@ -83,6 +83,7 @@ Built-in Expression Nodes
 .. autoclass:: Roll
 .. autoclass:: AxisPermutation
 .. autoclass:: Reshape
+.. autoclass:: Slice
 .. autoclass:: LoopyFunction
 
 Input Arguments
@@ -107,6 +108,7 @@ Node constructors such as :class:`Placeholder.__init__` and
 .. autofunction:: make_placeholder
 .. autofunction:: make_size_param
 .. autofunction:: make_data_wrapper
+.. autofunction:: make_slice
 """
 
 # }}}
@@ -355,6 +357,9 @@ def normalize_shape(
 # }}}
 
 
+SliceItem = Union[int, slice, type(Ellipsis), None]
+
+
 # {{{ array inteface
 
 class Array:
@@ -421,8 +426,7 @@ class Array:
     # hashable. Dicts of hashable keys and values are also permitted.
     fields: ClassVar[Tuple[str, ...]] = ("shape", "dtype", "tags")
 
-    def __init__(self,
-            tags: Optional[TagsType] = None):
+    def __init__(self, tags: Optional[TagsType] = None):
         if tags is None:
             tags = frozenset()
 
@@ -446,6 +450,87 @@ class Array:
     def named(self, name: str) -> Array:
         return self.namespace.ref(self.namespace.assign(name, self))
 
+    def __getitem__(self,
+            slice_spec: Union[SliceItem, Tuple[SliceItem, ...]]) -> Array:
+        if not isinstance(slice_spec, tuple):
+            slice_spec = (slice_spec,)
+
+        # FIXME: This doesn't support all NumPy basic indexing constructs,
+        # including:
+        #
+        # - newaxis
+        # - Ellipsis
+        # - slices with nontrivial strides
+        # - slices with bounds that exceed array dimensions
+        # - slices with negative indices
+        # - slices that yield an array with a zero dimension (lacks codegen support)
+        #
+        # Symbolic expression support is also limited.
+
+        starts = []
+        sizes = []
+        kept_dims = []
+
+        slice_spec_expanded = []
+
+        for elem in slice_spec:
+            if elem is Ellipsis:
+                raise NotImplementedError("'...' is unsupported")
+            elif elem is None:
+                raise NotImplementedError("newaxis is unsupported")
+            else:
+                slice_spec_expanded.append(elem)
+
+        slice_spec_expanded.extend(
+                [slice(None, None, None)] * (self.ndim - len(slice_spec)))
+
+        if len(slice_spec_expanded) > self.ndim:
+            raise ValueError("too many dimensions in index")
+
+        for i, elem in enumerate(slice_spec_expanded):
+            if isinstance(elem, slice):
+                start = elem.start
+                if start is None:
+                    start = 0
+                stop = elem.stop
+                if stop is None:
+                    stop = self.shape[i]
+                stride = elem.step
+                if stride is not None and stride != 1:
+                    raise ValueError("non-trivial strides unsupported")
+                starts.append(start)
+                sizes.append(stop - start)
+                kept_dims.append(i)
+
+            elif isinstance(elem, int):
+                starts.append(elem)
+                sizes.append(1)
+
+            else:
+                raise ValueError("unknown index along dimension")
+
+        slice_ = make_slice(self, starts, sizes)
+
+        if len(kept_dims) != self.ndim:
+            # Return an IndexLambda that elides the indexed-into dimensions
+            # (as opposed to the ones that were sliced).
+            indices = [0] * self.ndim
+            shape = []
+            for i, dim in enumerate(kept_dims):
+                indices[dim] = var(f"_{i}")
+                shape.append(slice_.shape[dim])
+            expr = var("_in0")
+            if indices:
+                expr = expr[tuple(indices)]
+
+            return IndexLambda(self.namespace,
+                    expr,
+                    shape=tuple(shape),
+                    dtype=self.dtype,
+                    bindings=dict(_in0=slice_))
+        else:
+            return slice_
+
     @property
     def ndim(self) -> int:
         return len(self.shape)
@@ -833,14 +918,42 @@ class MatrixProduct(Array):
 # }}}
 
 
-# {{{ roll
+# {{{ index remapping
 
-class Roll(Array):
-    """Roll an array along an axis.
+class IndexRemappingBase(Array):
+    """Base class for operations that remap the indices of an array.
+
+    Note that index remappings can also be expressed via
+    :class:`~pytato.IndexLambda`.
 
     .. attribute:: array
 
-        The input :class:`~pytato.Array`.
+        The input :class:`~pytato.Array`
+
+    """
+    fields = Array.fields + ("array",)
+
+    def __init__(self,
+            array: Array,
+            tags: Optional[TagsType] = None):
+        super().__init__(tags)
+        self.array = array
+
+    @property
+    def dtype(self) -> np.dtype:
+        return self.array.dtype
+
+    @property
+    def namespace(self) -> Namespace:
+        return self.array.namespace
+
+# }}}
+
+
+# {{{ roll
+
+class Roll(IndexRemappingBase):
+    """Roll an array along an axis.
 
     .. attribute:: shift
 
@@ -851,7 +964,7 @@ class Roll(Array):
         Shift axis.
     """
 
-    fields = Array.fields + ("array", "shift", "axis")
+    fields = IndexRemappingBase.fields + ("shift", "axis")
     mapper_method = "map_roll"
 
     def __init__(self,
@@ -859,46 +972,33 @@ class Roll(Array):
             shift: int,
             axis: int,
             tags: Optional[TagsType] = None):
-        super().__init__(tags)
-        self.array = array
+        super().__init__(array, tags)
         self.shift = shift
         self.axis = axis
 
-    @property
-    def namespace(self) -> Namespace:
-        return self.array.namespace
-
     @property
     def shape(self) -> ShapeType:
         return self.array.shape
 
-    @property
-    def dtype(self) -> np.dtype:
-        return self.array.dtype
-
 # }}}
 
 
 # {{{ axis permutation
 
-class AxisPermutation(Array):
+class AxisPermutation(IndexRemappingBase):
     r"""Permute the axes of an array."""
 
-    fields = Array.fields + ("array", "axes")
+    fields = IndexRemappingBase.fields + ("axes",)
     mapper_method = "map_axis_permutation"
 
     def __init__(self,
             array: Array,
             axes: Tuple[int, ...],
             tags: Optional[TagsType] = None):
-        super().__init__(tags)
+        super().__init__(array, tags)
         self.array = array
         self.axes = axes
 
-    @property
-    def namespace(self) -> Namespace:
-        return self.array.namespace
-
     @property
     def shape(self) -> ShapeType:
         result = []
@@ -907,22 +1007,47 @@ class AxisPermutation(Array):
             result.append(base_shape[index])
         return tuple(result)
 
-    @property
-    def dtype(self) -> np.dtype:
-        return self.array.dtype
-
 # }}}
 
 
 # {{{ reshape
 
-class Reshape(Array):
+class Reshape(IndexRemappingBase):
     """
     """
 
 # }}}
 
 
+# {{{ slice
+
+class Slice(IndexRemappingBase):
+    """Extracts a slice of constant size from an array.
+
+    .. attribute:: begin
+    .. attribute:: size
+    """
+
+    fields = Array.fields + ("begin", "size")
+    mapper_method = "map_slice"
+
+    def __init__(self,
+            array: Array,
+            begin: Tuple[int, ...],
+            size: Tuple[int, ...],
+            tags: Optional[TagsType] = None):
+        super().__init__(array, tags)
+
+        self.begin = begin
+        self.size = size
+
+    @property
+    def shape(self) -> ShapeType:
+        return self.size
+
+# }}}
+
+
 # {{{ base class for arguments
 
 class InputArgumentBase(Array):
@@ -942,7 +1067,8 @@ class InputArgumentBase(Array):
         and within the same :class:`~pytato.Namespace` is not allowed.
     """
 
-    # The name uniquely identifies this object in the namespace.
+    # The name uniquely identifies this object in the namespace. Therefore,
+    # subclasses don't have to update *fields*.
     fields = ("name",)
 
     def __init__(self,
@@ -1089,7 +1215,7 @@ class LoopyFunction(DictOfNamedArrays):
 # }}}
 
 
-# {{{ numpy interface
+# {{{ end-user facing
 
 def matmul(x1: Array, x2: Array) -> Array:
     """Matrix multiplication.
@@ -1147,7 +1273,8 @@ def transpose(a: Array, axes: Optional[Sequence[int]] = None) -> Array:
 
     :param a: Input array
     :param axes: If specified, a permutation of ``[0, 1, ..., a.ndim-1]``. Defaults
-        to ``range(a.ndim)[::-1]``.
+        to ``range(a.ndim)[::-1]``. The returned axis at index *i* corresponds to
+        the input axis *axes[i]*.
     """
     if axes is None:
         axes = range(a.ndim)[::-1]
@@ -1156,14 +1283,52 @@ def transpose(a: Array, axes: Optional[Sequence[int]] = None) -> Array:
         raise ValueError("axes have incorrect length")
 
     if set(axes) != set(range(a.ndim)):
-        raise ValueError("invalid axes were given")
+        raise ValueError("repeated or out-of-bounds axes detected")
 
     return AxisPermutation(a, tuple(axes))
 
-# }}}
 
+def stack(arrays: Sequence[Array], axis: int = 0) -> Array:
+    """Join a sequence of arrays along a new axis.
+
+    :param arrays: a finite sequence, each of whose elements is an
+        :class:`Array` of the same shape
+    :param axis: the position of the new axis, which will have length
+        *len(arrays)*
+    """
+
+    if not arrays:
+        raise ValueError("need at least one array to stack")
+
+
+def make_slice(array: Array, begin: Sequence[int], size: Sequence[int]) -> Array:
+    """Extract a constant-sized slice from an array with constant offsets.
+    """
+    if array.ndim != len(begin):
+        raise ValueError("'begin' and 'array' do not match in dimensions")
+
+    if len(begin) != len(size):
+        raise ValueError("'begin' and 'size' do not match in dimensions")
+
+    for i, (bval, sval) in enumerate(zip(begin, size)):
+        symbolic_index = not (
+                isinstance(array.shape[i], int)
+                and isinstance(bval, int)
+                and isinstance(sval, int))
+
+        if symbolic_index:
+            if not (0 == bval and sval == array.shape[i]):
+                raise ValueError("symbolic indexing is unsupported")
+            continue
+
+        if not (0 <= bval < array.shape[i]):
+            raise ValueError("index '%d' of 'begin' out of bounds" % i)
+
+        if sval < 0 or not (0 <= bval + sval <= array.shape[i]):
+            raise ValueError("index '%d' of 'size' out of bounds" % i)
+
+    return Slice(array, tuple(begin), tuple(size))
 
-# {{{ end-user-facing
 
 def make_dict_of_named_arrays(data: Dict[str, Array]) -> DictOfNamedArrays:
     """Make a :class:`DictOfNamedArrays` object and ensure that all arrays
diff --git a/pytato/codegen.py b/pytato/codegen.py
index 0149bf1..33c774d 100644
--- a/pytato/codegen.py
+++ b/pytato/codegen.py
@@ -23,7 +23,8 @@ THE SOFTWARE.
 """
 
 import dataclasses
-from typing import (Union, Optional, Mapping, Dict, Tuple, FrozenSet, Set)
+from functools import partialmethod
+from typing import (Any, Union, Optional, Mapping, Dict, Tuple, FrozenSet, Set)
 
 import islpy as isl
 import loopy as lp
@@ -32,9 +33,9 @@ from pymbolic import var
 import pytools
 
 from pytato.array import (
-        Array, DictOfNamedArrays, Placeholder, ShapeType, IndexLambda,
+        Array, DictOfNamedArrays, ShapeType, IndexLambda,
         SizeParam, DataWrapper, InputArgumentBase, MatrixProduct, Roll,
-        AxisPermutation)
+        AxisPermutation, Slice, IndexRemappingBase)
 from pytato.program import BoundProgram
 from pytato.target import Target, PyOpenCLTarget
 import pytato.scalar_expr as scalar_expr
@@ -197,6 +198,16 @@ class InlinedResult(ImplementedResult):
         self.reduction_bounds = dict(reduction_bounds)
         self.depends_on = depends_on
 
+    @classmethod
+    def from_loopy_expression(cls,
+            loopy_expr: ScalarExpression,
+            loopy_expr_context: LoopyExpressionContext,
+            array: Array):
+        return cls(loopy_expr,
+                array,
+                loopy_expr_context.reduction_bounds,
+                loopy_expr_context.depends_on)
+
     def to_loopy_expression(self, indices: SymbolicIndex,
             expr_context: LoopyExpressionContext) -> ScalarExpression:
         assert len(indices) == self.array.ndim
@@ -287,7 +298,7 @@ class CodeGenMapper(pytato.transform.Mapper):
         state.results[expr] = result
         return result
 
-    def handle_array_input_argument(self, expr: Union[Placeholder, DataWrapper],
+    def handle_array_input_argument(self, expr: InputArgumentBase,
             state: CodeGenState) -> ImplementedResult:
         if expr in state.results:
             return state.results[expr]
@@ -322,8 +333,8 @@ class CodeGenMapper(pytato.transform.Mapper):
         x1_result = self.rec(expr.x1, state)
         x2_result = self.rec(expr.x2, state)
 
-        expr_context = LoopyExpressionContext(state)
-        expr_context.reduction_bounds[0] = (0, expr.x2.shape[0])
+        loopy_expr_context = LoopyExpressionContext(state)
+        loopy_expr_context.reduction_bounds[0] = (0, expr.x2.shape[0])
 
         # Figure out inames.
         x1_inames = []
@@ -341,68 +352,62 @@ class CodeGenMapper(pytato.transform.Mapper):
                 x2_inames.append(var(f"_{offset}"))
 
         inner_expr = x1_result.to_loopy_expression(
-                tuple(x1_inames), expr_context)
+                tuple(x1_inames), loopy_expr_context)
         inner_expr *= x2_result.to_loopy_expression(
-                tuple(x2_inames), expr_context)
+                tuple(x2_inames), loopy_expr_context)
 
         import loopy.library.reduction as red
-        result_expr = lp.Reduction(
+        loopy_expr = lp.Reduction(
                 operation=red.parse_reduction_op("sum"),
                 inames=("_r0",),
                 expr=inner_expr,
                 allow_simultaneous=False)
 
-        result = InlinedResult(result_expr,
-                expr,
-                expr_context.reduction_bounds,
-                expr_context.depends_on)
+        result = InlinedResult.from_loopy_expression(
+                loopy_expr, loopy_expr_context, expr)
 
         state.results[expr] = result
         return result
 
-    def map_roll(self, expr: Roll,
+    def handle_index_remapping(self,
+            indices_getter: Any,
+            expr: IndexRemappingBase,
             state: CodeGenState) -> ImplementedResult:
         if expr in state.results:
             return state.results[expr]
 
-        indices = [var(f"_{d}") for d in range(expr.ndim)]
-        indices[expr.axis] = (
-                indices[expr.axis] + expr.shift) % expr.shape[expr.axis]
+        indices = indices_getter(self, expr)
 
-        expr_context = LoopyExpressionContext(state)
+        loopy_expr_context = LoopyExpressionContext(state)
         loopy_expr = (
                 self.rec(expr.array, state)
-                .to_loopy_expression(tuple(indices), expr_context))
+                .to_loopy_expression(indices, loopy_expr_context))
 
-        result = InlinedResult(loopy_expr,
-                expr,
-                expr_context.reduction_bounds,
-                expr_context.depends_on)
+        result = InlinedResult.from_loopy_expression(
+                loopy_expr, loopy_expr_context, expr)
 
         state.results[expr] = result
         return result
 
-    def map_axis_permutation(self, expr: AxisPermutation,
-            state: CodeGenState) -> ImplementedResult:
-        if expr in state.results:
-            return state.results[expr]
+    def _indices_for_roll(self, expr: Roll) -> SymbolicIndex:
+        indices = [var(f"_{d}") for d in range(expr.ndim)]
+        axis = expr.axis
+        indices[axis] = (indices[axis] + expr.shift) % expr.shape[axis]
+        return tuple(indices)
 
+    def _indices_for_axis_permutation(self, expr: AxisPermutation) -> SymbolicIndex:
         indices = [None] * expr.ndim
         for from_index, to_index in enumerate(expr.axes):
             indices[to_index] = var(f"_{from_index}")
+        return tuple(indices)
 
-        expr_context = LoopyExpressionContext(state)
-        loopy_expr = (
-                self.rec(expr.array, state)
-                .to_loopy_expression(tuple(indices), expr_context))
-
-        result = InlinedResult(loopy_expr,
-                expr,
-                expr_context.reduction_bounds,
-                expr_context.depends_on)
+    def _indices_for_slice(self, expr: Slice) -> SymbolicIndex:
+        return tuple(var(f"_{d}") + expr.begin[d] for d in range(expr.ndim))
 
-        state.results[expr] = result
-        return result
+    map_roll = partialmethod(handle_index_remapping, _indices_for_roll)
+    map_axis_permutation = (
+            partialmethod(handle_index_remapping, _indices_for_axis_permutation))
+    map_slice = partialmethod(handle_index_remapping, _indices_for_slice)
 
     def map_index_lambda(self, expr: IndexLambda,
             state: CodeGenState) -> ImplementedResult:
diff --git a/test/test_codegen.py b/test/test_codegen.py
index 0f6f7c3..e28d32d 100755
--- a/test/test_codegen.py
+++ b/test/test_codegen.py
@@ -22,6 +22,7 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 THE SOFTWARE.
 """
 
+import itertools
 import operator
 import sys
 
@@ -276,6 +277,54 @@ def test_array_array_binary_arith(ctx_factory, which, reverse):
     assert (out == op(x_in, y_in)).all()
 
 
+def generate_test_slices_for_dim(dim_bound):
+    # Include scalars to test indexing.
+    for i in range(dim_bound):
+        yield i
+
+    for i in range(0, dim_bound):
+        for j in range(i + 1, 1 + dim_bound):
+            yield slice(i, j, None)
+
+
+def generate_test_slices(shape):
+    yield from itertools.product(*map(generate_test_slices_for_dim, shape))
+
+
+@pytest.mark.parametrize("shape", [(3,), (2, 2), (1, 2, 1)])
+def test_slice(ctx_factory, shape):
+    cl_ctx = ctx_factory()
+    queue = cl.CommandQueue(cl_ctx)
+
+    from numpy.random import default_rng
+    rng = default_rng()
+
+    x_in = rng.random(size=shape)
+    namespace = pt.Namespace()
+    x = pt.make_data_wrapper(namespace, x_in)
+
+    outputs = {}
+    ref_outputs = {}
+
+    i = 0
+    for slice_ in generate_test_slices(shape):
+        outputs[f"out_{i}"] = x[slice_]
+        ref_outputs[f"out_{i}"] = x_in[slice_]
+        i += 1
+
+    prog = pt.generate_loopy(
+            pt.make_dict_of_named_arrays(outputs),
+            target=pt.PyOpenCLTarget(queue),
+            options=lp.Options(return_dict=True))
+
+    _, outputs = prog()
+
+    for output in outputs:
+        x_out = outputs[output]
+        x_ref = ref_outputs[output]
+        assert (x_out == x_ref).all()
+
+
 if __name__ == "__main__":
     if len(sys.argv) > 1:
         exec(sys.argv[1])
diff --git a/test/test_pytato.py b/test/test_pytato.py
index e840053..45c1bad 100755
--- a/test/test_pytato.py
+++ b/test/test_pytato.py
@@ -30,7 +30,7 @@ import pytest
 import pytato as pt
 
 
-def test_matmul_validation():
+def test_matmul_input_validation():
     namespace = pt.Namespace()
 
     a = pt.make_placeholder(namespace, "a", (10, 10), np.float)
@@ -43,12 +43,12 @@ def test_matmul_validation():
     with pytest.raises(ValueError):
         c @ c
 
-    n = pt.make_size_param(namespace, "n")
+    pt.make_size_param(namespace, "n")
     d = pt.make_placeholder(namespace, "d", "(n, n)", np.float)
     d @ d
 
 
-def test_roll_validation():
+def test_roll_input_validation():
     namespace = pt.Namespace()
 
     a = pt.make_placeholder(namespace, "a", (10, 10), np.float)
@@ -61,7 +61,7 @@ def test_roll_validation():
         pt.roll(a, 1, axis=-1)
 
 
-def test_transpose_validation():
+def test_transpose_input_validation():
     namespace = pt.Namespace()
 
     a = pt.make_placeholder(namespace, "a", (10, 10), np.float)
@@ -77,6 +77,22 @@ def test_transpose_validation():
         pt.transpose(a, (0,))
 
 
+def test_slice_input_validation():
+    namespace = pt.Namespace()
+
+    a = pt.make_placeholder(namespace, "a", (10, 10, 10), np.float)
+
+    a[0]
+    a[0, 0]
+    a[0, 0, 0]
+
+    with pytest.raises(ValueError):
+        a[0, 0, 0, 0]
+
+    with pytest.raises(ValueError):
+        a[10]
+
+
 if __name__ == "__main__":
     if len(sys.argv) > 1:
         exec(sys.argv[1])
-- 
GitLab


From 213bf2eb6c9d1b0f2d7fc950e821ddbe2b91cf82 Mon Sep 17 00:00:00 2001
From: Matt Wala <wala1@illinois.edu>
Date: Tue, 21 Jul 2020 22:46:38 -0500
Subject: [PATCH 12/52] Implement stack and a few other changes

---
 pytato/__init__.py   |   4 +-
 pytato/array.py      | 172 ++++++++++++++++++++++++-----
 pytato/codegen.py    | 252 ++++++++++++++++++++++++++++++-------------
 test/test_codegen.py |  28 ++++-
 test/test_pytato.py  |  21 ++++
 5 files changed, 375 insertions(+), 102 deletions(-)

diff --git a/pytato/__init__.py b/pytato/__init__.py
index 864eb4b..06c9bed 100644
--- a/pytato/__init__.py
+++ b/pytato/__init__.py
@@ -31,7 +31,7 @@ from pytato.array import (
         make_dict_of_named_arrays,
         make_placeholder, make_size_param, make_data_wrapper,
 
-        matmul, roll, transpose,
+        matmul, roll, transpose, stack,
         )
 
 from pytato.codegen import generate_loopy
@@ -43,7 +43,7 @@ __all__ = (
 
         "make_dict_of_named_arrays", "make_placeholder", "make_size_param", "make_data_wrapper",
 
-        "matmul", "roll", "transpose",
+        "matmul", "roll", "transpose", "stack",
 
         "generate_loopy",
 
diff --git a/pytato/array.py b/pytato/array.py
index 53f56ea..495670f 100644
--- a/pytato/array.py
+++ b/pytato/array.py
@@ -49,9 +49,14 @@ Array Interface
 NumPy-Like Interface
 --------------------
 
+These functions generally follow the interface of the corresponding functions in
+:mod:`numpy`, with the notable exception of the addition of an optional *tags*
+argument for implementation tags. Not all NumPy features may be supported.
+
 .. autofunction:: matmul
 .. autofunction:: roll
 .. autofunction:: transpose
+.. autofunction:: stack
 
 Supporting Functionality
 ------------------------
@@ -80,11 +85,17 @@ Built-in Expression Nodes
 .. autoclass:: IndexLambda
 .. autoclass:: Einsum
 .. autoclass:: MatrixProduct
+.. autoclass:: LoopyFunction
+.. autoclass:: Stack
+
+Index Remapping
+^^^^^^^^^^^^^^^
+
+.. autoclass:: IndexRemappingBase
 .. autoclass:: Roll
 .. autoclass:: AxisPermutation
 .. autoclass:: Reshape
 .. autoclass:: Slice
-.. autoclass:: LoopyFunction
 
 Input Arguments
 ^^^^^^^^^^^^^^^
@@ -119,7 +130,7 @@ import operator
 from dataclasses import dataclass
 from typing import (
         Optional, ClassVar, Dict, Any, Mapping, Iterator, Tuple, Union,
-        FrozenSet, Protocol, Sequence)
+        FrozenSet, Protocol, Sequence, cast, TYPE_CHECKING)
 
 import numpy as np
 import pymbolic.primitives as prim
@@ -130,6 +141,17 @@ import pytato.scalar_expr as scalar_expr
 from pytato.scalar_expr import ScalarExpression
 
 
+# Support ellipsis as a type.
+# https://github.com/python/typing/issues/684#issuecomment-548203158
+if TYPE_CHECKING:
+    from enum import Enum
+    class ellipsis(Enum):
+        Ellipsis = "..."
+    Ellipsis = ellipsis.Ellipsis
+else:
+    ellipsis = type(Ellipsis)
+
+
 # {{{ dotted name
 
 class DottedName:
@@ -357,7 +379,7 @@ def normalize_shape(
 # }}}
 
 
-SliceItem = Union[int, slice, type(Ellipsis), None]
+SliceItem = Union[int, slice, None, ellipsis]
 
 
 # {{{ array inteface
@@ -416,6 +438,21 @@ class Array:
     .. automethod:: tagged
     .. automethod:: without_tag
 
+    Array interface:
+
+    .. automethod:: __getitem__
+    .. automethod:: T
+    .. method:: __mul__
+    .. method:: __rmul__
+    .. method:: __add__
+    .. method:: __radd__
+    .. method:: __sub__
+    .. method:: __rsub__
+    .. method:: __truediv__
+    .. method:: __rtruediv__
+    .. method:: __neg__
+    .. method:: __pos__
+
     Derived attributes:
 
     .. attribute:: ndim
@@ -587,7 +624,7 @@ class Array:
             other: Union[Array, Number],
             reverse: bool = False) -> Array:
 
-        def add_indices(val):
+        def add_indices(val: prim.Expression) -> prim.Expression:
             if self.ndim == 0:
                 return val
             else:
@@ -598,13 +635,14 @@ class Array:
             first = add_indices(var("_in0"))
             second = other
             bindings = dict(_in0=self)
-            other = np.array(other)
+            other_dtype = np.array(other).dtype
         elif isinstance(other, Array):
             if self.shape != other.shape:
                 raise NotImplementedError("broadcasting not supported")
             first = add_indices(var("_in0"))
             second = add_indices(var("_in1"))
             bindings = dict(_in0=self, _in1=other)
+            other_dtype = other.dtype
         else:
             raise ValueError("unknown argument")
 
@@ -614,7 +652,7 @@ class Array:
                 bindings["_in1"], bindings["_in0"] = \
                         bindings["_in0"], bindings["_in1"]
 
-        dtype = np.result_type(self.dtype, other.dtype)
+        dtype = np.result_type(self.dtype, other_dtype)
         expr = op(first, second)
 
         return IndexLambda(self.namespace,
@@ -837,7 +875,7 @@ class IndexLambda(_SuppliedShapeAndDtypeMixin, Array):
         else:
             return False
 
-        if index != tuple("_%d" % i for i in range(len(self.shape))):
+        if index != tuple(var("_%d") % i for i in range(len(self.shape))):
             return False
 
         try:
@@ -918,6 +956,49 @@ class MatrixProduct(Array):
 # }}}
 
 
+# {{{ stack
+
+class Stack(Array):
+    """Join a sequence of arrays along an axis.
+
+    .. attribute:: arrays
+
+        The sequence of arrays to join
+
+    .. attribute:: axis
+
+        The output axis
+
+    """
+
+    fields = Array.fields + ("arrays", "axis")
+    mapper_method = "map_stack"
+
+    def __init__(self,
+            arrays: Tuple[Array, ...],
+            axis: int,
+            tags: Optional[TagsType] = None):
+        super().__init__(tags)
+        self.arrays = arrays
+        self.axis = axis
+
+    @property
+    def namespace(self) -> Namespace:
+        return self.arrays[0].namespace
+
+    @property
+    def dtype(self) -> np.dtype:
+        return np.result_type(*(arr.dtype for arr in self.arrays))
+
+    @property
+    def shape(self) -> ShapeType:
+        result = list(self.arrays[0].shape)
+        result.insert(self.axis, len(self.arrays))
+        return tuple(result)
+
+# }}}
+
+
 # {{{ index remapping
 
 class IndexRemappingBase(Array):
@@ -963,7 +1044,6 @@ class Roll(IndexRemappingBase):
 
         Shift axis.
     """
-
     fields = IndexRemappingBase.fields + ("shift", "axis")
     mapper_method = "map_roll"
 
@@ -986,8 +1066,12 @@ class Roll(IndexRemappingBase):
 # {{{ axis permutation
 
 class AxisPermutation(IndexRemappingBase):
-    r"""Permute the axes of an array."""
+    r"""Permute the axes of an array.
+
+    .. attribute:: axes
 
+        A permutation of the input axes.
+    """
     fields = IndexRemappingBase.fields + ("axes",)
     mapper_method = "map_axis_permutation"
 
@@ -1027,7 +1111,6 @@ class Slice(IndexRemappingBase):
     .. attribute:: begin
     .. attribute:: size
     """
-
     fields = Array.fields + ("begin", "size")
     mapper_method = "map_slice"
 
@@ -1217,11 +1300,12 @@ class LoopyFunction(DictOfNamedArrays):
 
 # {{{ end-user facing
 
-def matmul(x1: Array, x2: Array) -> Array:
+def matmul(x1: Array, x2: Array, tags: Optional[TagsType] = None) -> Array:
     """Matrix multiplication.
 
-    :param x1: First argument
-    :param x2: Second argument
+    :param x1: first argument
+    :param x2: second argument
+    :param tags: implementation tags
     """
     if x1.namespace is not x2.namespace:
         raise ValueError("namespace mismatch")
@@ -1239,15 +1323,17 @@ def matmul(x1: Array, x2: Array) -> Array:
     if x1.shape[-1] != x2.shape[0]:
         raise ValueError("dimension mismatch")
 
-    return MatrixProduct(x1, x2)
+    return MatrixProduct(x1, x2, tags)
 
 
-def roll(a: Array, shift: int, axis: Optional[int] = None) -> Array:
+def roll(a: Array, shift: int, axis: Optional[int] = None,
+        tags: Optional[TagsType] = None) -> Array:
     """Roll array elements along a given axis.
 
-    :param a: Input array
-    :param shift: The number of places by which elements are shifted
-    :param axis: Axis along which the array is shifted
+    :param a: input array
+    :param shift: the number of places by which elements are shifted
+    :param axis: axis along which the array is shifted
+    :param tags: implementation tags
     """
     if a.ndim == 0:
         return a
@@ -1268,13 +1354,15 @@ def roll(a: Array, shift: int, axis: Optional[int] = None) -> Array:
     return Roll(a, shift, axis)
 
 
-def transpose(a: Array, axes: Optional[Sequence[int]] = None) -> Array:
+def transpose(a: Array, axes: Optional[Sequence[int]] = None,
+        tags: Optional[TagsType] = None) -> Array:
     """Reverse or permute the axes of an array.
 
-    :param a: Input array
-    :param axes: If specified, a permutation of ``[0, 1, ..., a.ndim-1]``. Defaults
+    :param a: input array
+    :param axes: if specified, a permutation of ``[0, 1, ..., a.ndim-1]``. Defaults
         to ``range(a.ndim)[::-1]``. The returned axis at index *i* corresponds to
         the input axis *axes[i]*.
+    :param tags: implementation tags
     """
     if axes is None:
         axes = range(a.ndim)[::-1]
@@ -1288,21 +1376,53 @@ def transpose(a: Array, axes: Optional[Sequence[int]] = None) -> Array:
     return AxisPermutation(a, tuple(axes))
 
 
-def stack(arrays: Sequence[Array], axis: int = 0) -> Array:
+def stack(arrays: Sequence[Array], axis: int = 0,
+        tags: Optional[TagsType] = None) -> Array:
     """Join a sequence of arrays along a new axis.
 
+    The *axis* parameter specifies the position of the new axis in the result.
+
+    Example::
+
+       >>> arrays = [pt.zeros(3)] * 4
+       >>> pt.stack(arrays, axis=0).shape
+       (4, 3)
+
     :param arrays: a finite sequence, each of whose elements is an
         :class:`Array` of the same shape
     :param axis: the position of the new axis, which will have length
         *len(arrays)*
+    :param tags: implementation tags
+
     """
 
     if not arrays:
         raise ValueError("need at least one array to stack")
 
+    for array in arrays[1:]:
+        if array.shape != arrays[0].shape:
+            raise ValueError("arrays must have the same shape")
+
+    if not (0 <= axis <= arrays[0].ndim):
+        raise ValueError("invalid axis")
+
+    return Stack(tuple(arrays), axis)
+
 
 def make_slice(array: Array, begin: Sequence[int], size: Sequence[int]) -> Array:
     """Extract a constant-sized slice from an array with constant offsets.
+
+    :param array: input array
+    :param begin: a sequence of length *array.ndim* containing slice
+        offsets. Must be in bounds.
+    :param size: a sequence of length *array.ndim* containing the sizes of
+        the slice along each dimension. Must be in bounds.
+
+    .. note::
+
+        Support for slices is currently limited. Among other things, non-trivial
+        slices (i.e., not the length of the whole dimension) involving symbolic
+        expressions are unsupported.
     """
     if array.ndim != len(begin):
         raise ValueError("'begin' and 'array' do not match in dimensions")
@@ -1318,13 +1438,15 @@ def make_slice(array: Array, begin: Sequence[int], size: Sequence[int]) -> Array
 
         if symbolic_index:
             if not (0 == bval and sval == array.shape[i]):
-                raise ValueError("symbolic indexing is unsupported")
+                raise ValueError(
+                        "slicing with symbolic dimensions is unsupported")
             continue
 
-        if not (0 <= bval < array.shape[i]):
+        ubound: int = cast(int, array.shape[i])
+        if not (0 <= bval < ubound):
             raise ValueError("index '%d' of 'begin' out of bounds" % i)
 
-        if sval < 0 or not (0 <= bval + sval <= array.shape[i]):
+        if sval < 0 or not (0 <= bval + sval <= ubound):
             raise ValueError("index '%d' of 'size' out of bounds" % i)
 
     return Slice(array, tuple(begin), tuple(size))
diff --git a/pytato/codegen.py b/pytato/codegen.py
index 33c774d..7f7aa45 100644
--- a/pytato/codegen.py
+++ b/pytato/codegen.py
@@ -24,7 +24,8 @@ THE SOFTWARE.
 
 import dataclasses
 from functools import partialmethod
-from typing import (Any, Union, Optional, Mapping, Dict, Tuple, FrozenSet, Set)
+from typing import (
+        Any, Union, Optional, Mapping, Dict, Tuple, FrozenSet, Set, Callable)
 
 import islpy as isl
 import loopy as lp
@@ -35,7 +36,7 @@ import pytools
 from pytato.array import (
         Array, DictOfNamedArrays, ShapeType, IndexLambda,
         SizeParam, DataWrapper, InputArgumentBase, MatrixProduct, Roll,
-        AxisPermutation, Slice, IndexRemappingBase)
+        AxisPermutation, Slice, IndexRemappingBase, Stack)
 from pytato.program import BoundProgram
 from pytato.target import Target, PyOpenCLTarget
 import pytato.scalar_expr as scalar_expr
@@ -68,7 +69,8 @@ Code Generation Internals
 .. autoclass:: InlinedExpressionGenMapper
 
 .. autofunction:: domain_for_shape
-.. autofunction:: add_output
+.. autofunction:: add_store
+.. autofunction:: rename_reductions
 
 """
 
@@ -78,7 +80,7 @@ Code Generation Internals
 # SymbolicIndex and ShapeType are semantically distinct but identical at the
 # type level.
 SymbolicIndex = ShapeType
-ReductionBounds = Dict[int, Tuple[ScalarExpression, ScalarExpression]]
+ReductionBounds = Dict[str, Tuple[ScalarExpression, ScalarExpression]]
 
 
 @dataclasses.dataclass(init=True, repr=False, eq=False)
@@ -139,14 +141,8 @@ class ImplementedResult(object):
     """Generated code for a node in the computation graph (i.e., an array
     expression).
 
-    .. attribute:: array
-
-        The :class:`pytato.Array` associated with this code.
-
     .. automethod:: to_loopy_expression
     """
-    def __init__(self, array: Array):
-        self.array = array
 
     def to_loopy_expression(self, indices: SymbolicIndex,
             expr_context: LoopyExpressionContext) -> ScalarExpression:
@@ -172,13 +168,13 @@ class StoredResult(ImplementedResult):
 
     See also: :class:`pytato.array.ImplStored`.
     """
-    def __init__(self, name: str, array: Array):
-        super().__init__(array)
+    def __init__(self, name: str, depends_on: FrozenSet[str]):
         self.name = name
+        self.depends_on = depends_on
 
-    # TODO: Handle dependencies.
     def to_loopy_expression(self, indices: SymbolicIndex,
             expr_context: LoopyExpressionContext) -> ScalarExpression:
+        expr_context.update_depends_on(self.depends_on)
         if indices == ():
             return prim.Variable(self.name)
         else:
@@ -191,36 +187,34 @@ class InlinedResult(ImplementedResult):
 
     See also: :class:`pytato.array.ImplInlined`.
     """
-    def __init__(self, expr: ScalarExpression, array: Array,
-            reduction_bounds: ReductionBounds, depends_on: FrozenSet[str]):
-        super().__init__(array)
+    def __init__(self, expr: ScalarExpression,
+            reduction_bounds: ReductionBounds,
+            depends_on: FrozenSet[str]):
         self.expr = expr
         self.reduction_bounds = dict(reduction_bounds)
         self.depends_on = depends_on
 
-    @classmethod
-    def from_loopy_expression(cls,
+    @staticmethod
+    def from_loopy_expression(
             loopy_expr: ScalarExpression,
-            loopy_expr_context: LoopyExpressionContext,
-            array: Array):
-        return cls(loopy_expr,
-                array,
+            loopy_expr_context: LoopyExpressionContext) -> InlinedResult:
+        return InlinedResult(loopy_expr,
                 loopy_expr_context.reduction_bounds,
                 loopy_expr_context.depends_on)
 
     def to_loopy_expression(self, indices: SymbolicIndex,
             expr_context: LoopyExpressionContext) -> ScalarExpression:
-        assert len(indices) == self.array.ndim
         substitutions = {f"_{d}": i for d, i in enumerate(indices)}
 
-        reduction_start = 0
-        if expr_context.reduction_bounds:
-            reduction_start = 1 + max(expr_context.reduction_bounds)
+        reduction_start = len(expr_context.reduction_bounds)
 
-        for i, bounds in self.reduction_bounds.items():
+        # Rename reductions in expression not to conflict with those in expr_context.
+        for i, (prev_name, bounds) in enumerate(self.reduction_bounds.items()):
             j = i + reduction_start
-            substitutions[f"_r{i}"] = var(f"_r{j}")
-            expr_context.reduction_bounds[j] = bounds
+            new_name = f"_r{j}"
+            assert new_name not in expr_context.reduction_bounds
+            substitutions[prev_name] = var(new_name)
+            expr_context.reduction_bounds[new_name] = bounds
 
         expr_context.update_depends_on(self.depends_on)
 
@@ -294,7 +288,7 @@ class CodeGenMapper(pytato.transform.Mapper):
         kernel = state.kernel.copy(args=state.kernel.args + [arg])
         state.update_kernel(kernel)
 
-        result = StoredResult(expr.name, expr)
+        result = StoredResult(expr.name, frozenset())
         state.results[expr] = result
         return result
 
@@ -318,7 +312,7 @@ class CodeGenMapper(pytato.transform.Mapper):
         kernel = state.kernel.copy(args=state.kernel.args + [arg])
         state.update_kernel(kernel)
 
-        result = StoredResult(expr.name, expr)
+        result = StoredResult(expr.name, frozenset())
         state.results[expr] = result
         return result
 
@@ -334,7 +328,7 @@ class CodeGenMapper(pytato.transform.Mapper):
         x2_result = self.rec(expr.x2, state)
 
         loopy_expr_context = LoopyExpressionContext(state)
-        loopy_expr_context.reduction_bounds[0] = (0, expr.x2.shape[0])
+        loopy_expr_context.reduction_bounds["_r0"] = (0, expr.x2.shape[0])
 
         # Figure out inames.
         x1_inames = []
@@ -363,9 +357,80 @@ class CodeGenMapper(pytato.transform.Mapper):
                 expr=inner_expr,
                 allow_simultaneous=False)
 
-        result = InlinedResult.from_loopy_expression(
-                loopy_expr, loopy_expr_context, expr)
+        inlined_result = InlinedResult.from_loopy_expression(loopy_expr,
+                loopy_expr_context)
+
+        output_name = state.var_name_gen("matmul")
+
+        insn_id = add_store(output_name, expr, inlined_result, state,
+                output_to_temporary=True)
+
+        result = StoredResult(output_name, frozenset([insn_id]))
+
+        state.results[expr] = result
+        return result
+
+    def map_stack(self, expr: Stack, state: CodeGenState) -> ImplementedResult:
+        if expr in state.results:
+            return state.results[expr]
+
+        out_name = state.var_name_gen("stack")
+
+        out = lp.TemporaryVariable(out_name,
+                dtype=expr.dtype,
+                shape=expr.shape,
+                address_space=lp.AddressSpace.GLOBAL)
+
+        inames = []
+        for j in range(expr.ndim - 1):
+            if j >= expr.axis:
+                j += 1
+            inames.append(state.var_name_gen(f"{out_name}_dim{j}"))
+        indices = tuple(var(iname) for iname in inames)
+
+        reduction_bounds = {}
+        depends_on: FrozenSet[str] = frozenset()
+        new_insns = []
+
+        for i, array in enumerate(expr.arrays):
+            loopy_expr_context = LoopyExpressionContext(state)
+            loopy_expr = (
+                    self.rec(array, state)
+                    .to_loopy_expression(indices, loopy_expr_context))
+            loopy_expr = rename_reductions(
+                    loopy_expr, loopy_expr_context,
+                    lambda old_iname: state.var_name_gen(f"{out_name}{old_iname}"))
+
+            reduction_bounds.update(loopy_expr_context.reduction_bounds)
+
+            assignee_indices = list(indices)
+            assignee_indices.insert(expr.axis, i)
+            assignee = var(out_name)[tuple(assignee_indices)]
+
+            insn_id = state.insn_id_gen(f"{out_name}_{i}")
+
+            from loopy.kernel.instruction import make_assignment
+            insn = make_assignment((assignee,),
+                    loopy_expr,
+                    id=insn_id,
+                    within_inames=frozenset(inames),
+                    depends_on=loopy_expr_context.depends_on | depends_on)
+
+            depends_on = frozenset([insn_id])
+            new_insns.append(insn)
+
+        # Update kernel.
+        kernel = state.kernel
+        domain = domain_for_shape(tuple(inames),
+                expr.arrays[0].shape, reduction_bounds)
+        temporary_variables = kernel.temporary_variables.copy()
+        temporary_variables[out_name] = out
+        kernel = kernel.copy(domains=kernel.domains + [domain],
+                instructions=kernel.instructions + new_insns,
+                temporary_variables=temporary_variables)
+        state.update_kernel(kernel)
 
+        result = StoredResult(out_name, depends_on)
         state.results[expr] = result
         return result
 
@@ -383,8 +448,7 @@ class CodeGenMapper(pytato.transform.Mapper):
                 self.rec(expr.array, state)
                 .to_loopy_expression(indices, loopy_expr_context))
 
-        result = InlinedResult.from_loopy_expression(
-                loopy_expr, loopy_expr_context, expr)
+        result = InlinedResult.from_loopy_expression(loopy_expr, loopy_expr_context)
 
         state.results[expr] = result
         return result
@@ -392,7 +456,7 @@ class CodeGenMapper(pytato.transform.Mapper):
     def _indices_for_roll(self, expr: Roll) -> SymbolicIndex:
         indices = [var(f"_{d}") for d in range(expr.ndim)]
         axis = expr.axis
-        indices[axis] = (indices[axis] + expr.shift) % expr.shape[axis]
+        indices[axis] = (indices[axis] - expr.shift) % expr.shape[axis]
         return tuple(indices)
 
     def _indices_for_axis_permutation(self, expr: AxisPermutation) -> SymbolicIndex:
@@ -416,11 +480,12 @@ class CodeGenMapper(pytato.transform.Mapper):
 
         # TODO: Respect tags.
 
-        expr_context = LoopyExpressionContext(state,
+        loopy_expr_context = LoopyExpressionContext(state,
                 local_namespace=expr.bindings)
-        loopy_expr = self.exprgen_mapper(expr.expr, expr_context)
+        loopy_expr = self.exprgen_mapper(expr.expr, loopy_expr_context)
 
-        result = InlinedResult(loopy_expr, expr, {}, frozenset())
+        result = InlinedResult.from_loopy_expression(loopy_expr,
+                loopy_expr_context)
         state.results[expr] = result
         return result
 
@@ -528,28 +593,31 @@ def domain_for_shape(dim_names: Tuple[str, ...],
     return dom
 
 
-def add_output(name: str, expr: Array, state: CodeGenState,
-        mapper: CodeGenMapper) -> None:
-    """Add an output argument to the kernel.
-    """
-    result = mapper(expr, state)
+def add_store(name: str, expr: Array, result: ImplementedResult,
+       state: CodeGenState, output_to_temporary: bool = False) -> str:
+    """Add an instruction that stores to a variable in the kernel.
+
+    :param name: name of the output array, which is created
+    :param expr: the :class:`~pytato.Array` to store
+    :param result: the corresponding :class:`ImplementedResult`
+    :param state: code generation state
+    :param output_to_temporary: whether to generate an output argument (default)
+        or a temporary variable
 
+    :returns: the id of the generated instruction
+    """
     # Get expression.
     inames = tuple(
             state.var_name_gen(f"{name}_dim{d}")
             for d in range(expr.ndim))
     indices = tuple(prim.Variable(iname) for iname in inames)
-    expr_context = LoopyExpressionContext(state)
-    copy_expr = result.to_loopy_expression(indices, expr_context)
-
-    # Make the reduction inames/substitute them into expression.
-    reduction_inames = tuple(
-            state.var_name_gen(f"{name}_red{i}")
-            for i in range(len(expr_context.reduction_bounds)))
-    substitutions = {
-            f"_r{i}": var(reduction_inames[i])
-            for i in expr_context.reduction_bounds}
-    copy_expr = scalar_expr.substitute(copy_expr, substitutions)
+    loopy_expr_context = LoopyExpressionContext(state)
+    loopy_expr = result.to_loopy_expression(indices, loopy_expr_context)
+
+    # Rename reductions to names suitable as inames.
+    loopy_expr = rename_reductions(
+            loopy_expr, loopy_expr_context,
+            lambda old_name: state.var_name_gen(f"{name}{old_name}"))
 
     # Make the instruction
     from loopy.kernel.instruction import make_assignment
@@ -557,31 +625,67 @@ def add_output(name: str, expr: Array, state: CodeGenState,
         assignee = prim.Variable(name)[indices]
     else:
         assignee = prim.Variable(name)
+    insn_id = state.insn_id_gen(f"{name}_store")
     insn = make_assignment((assignee,),
-            copy_expr,
-            id=state.insn_id_gen(f"{name}_copy"),
+            loopy_expr,
+            id=insn_id,
             within_inames=frozenset(inames),
-            depends_on=expr_context.depends_on)
+            depends_on=loopy_expr_context.depends_on)
 
     # Get the domain.
-    reduction_bounds = {
-            reduction_inames[i]: bounds
-            for i, bounds in expr_context.reduction_bounds.items()}
-    domain = domain_for_shape(inames, expr.shape, reduction_bounds)
-
-    # Get the argument.
-    arg = lp.GlobalArg(name,
-            shape=expr.shape,
-            dtype=expr.dtype,
-            order="C",
-            is_output_only=True)
+    domain = domain_for_shape(inames, expr.shape,
+            loopy_expr_context.reduction_bounds)
 
     # Update the kernel.
     kernel = state.kernel
-    kernel = kernel.copy(args=kernel.args + [arg],
-            instructions=kernel.instructions + [insn],
-            domains=kernel.domains + [domain])
+
+    if output_to_temporary:
+        tvar = lp.TemporaryVariable(name,
+                dtype=expr.dtype,
+                shape=expr.shape,
+                address_space=lp.AddressSpace.GLOBAL)
+        temporary_variables = kernel.temporary_variables.copy()
+        temporary_variables[name] = tvar
+        kernel = kernel.copy(temporary_variables=temporary_variables,
+                domains=kernel.domains + [domain],
+                instructions=kernel.instructions + [insn])
+    else:
+        arg = lp.GlobalArg(name,
+                shape=expr.shape,
+                dtype=expr.dtype,
+                order="C",
+                is_output_only=True)
+        kernel = kernel.copy(args=kernel.args + [arg],
+                domains=kernel.domains + [domain],
+                instructions=kernel.instructions + [insn])
+
     state.update_kernel(kernel)
+    return insn_id
+
+
+def rename_reductions(
+        loopy_expr: ScalarExpression,
+        loopy_expr_context: LoopyExpressionContext,
+        var_name_gen: Callable[[str], str]) -> ScalarExpression:
+    """Rename the reductions in *loopy_expr* and *loopy_expr_context* using the
+    callable *var_name_gen.*
+    """
+    new_reduction_inames = tuple(
+            var_name_gen(old_iname)
+            for old_iname in loopy_expr_context.reduction_bounds)
+
+    substitutions = dict(zip(
+            loopy_expr_context.reduction_bounds,
+            map(var, new_reduction_inames)))
+
+    result = scalar_expr.substitute(loopy_expr, substitutions)
+
+    new_reduction_bounds = {
+            substitutions[old_iname].name: bounds
+            for old_iname, bounds in loopy_expr_context.reduction_bounds.items()}
+
+    loopy_expr_context.reduction_bounds = new_reduction_bounds
+    return result
 
 # }}}
 
@@ -638,7 +742,7 @@ def generate_loopy(result: Union[Array, DictOfNamedArrays],
 
     # Generate code for outputs.
     for name, expr in outputs.items():
-        add_output(name, expr, state, mapper)
+        add_store(name, expr, mapper(expr, state), state)
 
     # Collect bound arguments.
     bound_arguments = {}
diff --git a/test/test_codegen.py b/test/test_codegen.py
index e28d32d..f96f8a1 100755
--- a/test/test_codegen.py
+++ b/test/test_codegen.py
@@ -163,7 +163,7 @@ def test_roll(ctx_factory, shift, axis):
             pt.roll(x, shift=shift, axis=axis),
             target=pt.PyOpenCLTarget(queue))
 
-    x_in = np.array([[1., 2.], [3., 4.]])
+    x_in = np.arange(1., 10.).reshape(3, 3)
 
     _, (x_out,) = prog(x=x_in)
 
@@ -325,6 +325,32 @@ def test_slice(ctx_factory, shape):
         assert (x_out == x_ref).all()
 
 
+@pytest.mark.parametrize("input_dims", (1, 2, 3))
+def test_stack(ctx_factory, input_dims):
+    cl_ctx = ctx_factory()
+    queue = cl.CommandQueue(cl_ctx)
+
+    shape = (2, 2, 2)[:input_dims]
+
+    from numpy.random import default_rng
+    rng = default_rng()
+    x_in = rng.random(size=shape)
+    y_in = rng.random(size=shape)
+
+    namespace = pt.Namespace()
+    x = pt.make_data_wrapper(namespace, x_in)
+    y = pt.make_data_wrapper(namespace, y_in)
+
+    for axis in range(0, 1 + input_dims):
+        prog = pt.generate_loopy(
+                pt.stack((x, y), axis=axis),
+                target=pt.PyOpenCLTarget(queue))
+        print(prog.program)
+
+        _, (out,) = prog()
+        assert (out == np.stack((x_in, y_in), axis=axis)).all()
+
+
 if __name__ == "__main__":
     if len(sys.argv) > 1:
         exec(sys.argv[1])
diff --git a/test/test_pytato.py b/test/test_pytato.py
index 45c1bad..49444fa 100755
--- a/test/test_pytato.py
+++ b/test/test_pytato.py
@@ -93,6 +93,27 @@ def test_slice_input_validation():
         a[10]
 
 
+def test_stack_input_validation():
+    namespace = pt.Namespace()
+
+    x = pt.make_placeholder(namespace, "x", (10, 10), np.float)
+    y = pt.make_placeholder(namespace, "y", (1, 10), np.float)
+
+    assert pt.stack((x, x, x), axis=0).shape == (3, 10, 10)
+
+    pt.stack((x,), axis=0)
+    pt.stack((x,), axis=1)
+
+    with pytest.raises(ValueError):
+        pt.stack(())
+
+    with pytest.raises(ValueError):
+        pt.stack((x, y))
+
+    with pytest.raises(ValueError):
+        pt.stack((x, x), axis=3)
+
+
 if __name__ == "__main__":
     if len(sys.argv) > 1:
         exec(sys.argv[1])
-- 
GitLab


From 4ea15e688c9dcb0a03d3a0c0dc3ec0a28fb83fbc Mon Sep 17 00:00:00 2001
From: Matt Wala <wala1@illinois.edu>
Date: Tue, 21 Jul 2020 22:46:54 -0500
Subject: [PATCH 13/52] Add advection example

---
 examples/advection.py | 46 +++++++++++++++++++++++++++----------------
 examples/dg_tools.py  | 32 ++++++++++++++----------------
 2 files changed, 44 insertions(+), 34 deletions(-)
 mode change 100644 => 100755 examples/advection.py

diff --git a/examples/advection.py b/examples/advection.py
old mode 100644
new mode 100755
index de6c7ca..a599b05
--- a/examples/advection.py
+++ b/examples/advection.py
@@ -1,3 +1,4 @@
+#!/usr/bin/env python
 import numpy as np
 import pytest
 import functools
@@ -135,7 +136,7 @@ def test_advection_convergence(order, flux_type):
     errors = []
     hs = []
 
-    import arrayzy as az
+    import pytato as pt
     import pyopencl as cl
     cl_ctx = cl.create_some_context()
     queue = cl.CommandQueue(cl_ctx)
@@ -143,15 +144,21 @@ def test_advection_convergence(order, flux_type):
     for nelements in (8, 12, 16, 20):
         discr = DGDiscr1D(0, 2*np.pi, nelements=nelements, nnodes=order)
         u_initial = np.sin(discr.nodes())
-        with az.Context(queue) as cb:
-            u = cb.argument("u", shape="(nelements, nnodes)", dtype=np.float64)
-            op = AdvectionOperator(
-                    discr, c=1, flux_type=flux_type, dg_ops=DGOps1D(discr, cb))
-            cb.output(op.apply(u))
-        prog = cb.build()
+
+        ns = pt.Namespace()
+        pt.make_size_param(ns, "nelements")
+        pt.make_size_param(ns, "nnodes")
+        u = pt.make_placeholder(ns,
+                "u", shape="(nelements, nnodes)", dtype=np.float64)
+        op = AdvectionOperator(discr, c=1, flux_type=flux_type,
+                dg_ops=DGOps1D(discr, ns))
+        result = op.apply(u)
+
+        prog = pt.generate_loopy(result, target=pt.PyOpenCLTarget(queue))
+
         u = rk4(lambda t, y: prog(
-            u=y.reshape(nelements, order))[1][0].reshape(-1),
-            u_initial, t_initial=0, t_final=np.pi, dt=0.01)
+                u=y.reshape(nelements, order))[1][0].reshape(-1),
+                u_initial, t_initial=0, t_final=np.pi, dt=0.01)
         u_ref = -u_initial
         hs.append(discr.h)
         errors.append(integrate(discr, (u - u_ref)**2)**0.5)
@@ -161,7 +168,7 @@ def test_advection_convergence(order, flux_type):
 
 
 def main():
-    import arrayzy as az
+    import pytato as pt
     import pyopencl as cl
     cl_ctx = cl.create_some_context()
     queue = cl.CommandQueue(cl_ctx)
@@ -171,17 +178,22 @@ def main():
 
     discr = DGDiscr1D(0, 2*np.pi, nelements=nelements, nnodes=nnodes)
 
-    with az.Context(queue) as cb:
-        u = cb.argument("u", shape="(nelements, nnodes)", dtype=np.float64)
-        op = AdvectionOperator(
-                discr, c=1, flux_type="central", dg_ops=DGOps1D(discr, cb))
-        cb.output(op.apply(u))
+    ns = pt.Namespace()
+    pt.make_size_param(ns, "nelements")
+    pt.make_size_param(ns, "nnodes")
+    u = pt.make_placeholder(ns,
+            "u", shape="(nelements, nnodes)", dtype=np.float64)
+    op = AdvectionOperator(discr, c=1, flux_type="central",
+            dg_ops=DGOps1D(discr, ns))
+    result = op.apply(u)
 
-    prog = cb.build()
-    u = np.sin(discr.nodes())
+    prog = pt.generate_loopy(result, target=pt.PyOpenCLTarget(queue))
     print(prog.program)
+
+    u = np.sin(discr.nodes())
     print(prog(u=u.reshape(nelements, nnodes))[1][0])
 
 
 if __name__ == "__main__":
+    pytest.main([__file__])
     main()
diff --git a/examples/dg_tools.py b/examples/dg_tools.py
index 3a3fca3..47ee6fb 100644
--- a/examples/dg_tools.py
+++ b/examples/dg_tools.py
@@ -13,6 +13,8 @@ Notation convention for operator shapes
 
 import functools
 
+import pytato as pt
+
 
 memoized = functools.lru_cache(maxsize=None)
 
@@ -370,9 +372,8 @@ class DGOps1DRef(AbstractDGOps1D):
 def matrix_getter(name, shape):
 
     def getter(self):
-        mat = self.ctx.argument(name, shape, np.float64)
-        self.ctx.bind(mat, getattr(self.discr, name))
-        return mat
+        data = getattr(self.discr, name)
+        return pt.make_data_wrapper(self.namespace, data, name=name, shape=shape)
 
     return property(memoized(getter))
 
@@ -381,11 +382,11 @@ class DGOps1D(AbstractDGOps1D):
 
     @AbstractDGOps1D.array_ops.getter
     def array_ops(self):
-        return self.ctx
+        return pt
 
-    def __init__(self, discr, ctx):
+    def __init__(self, discr, namespace):
         self.discr = discr
-        self.ctx = ctx
+        self.namespace = namespace
 
     _normals = matrix_getter("normals", "(nelements, 2)")
     _interp_mat = matrix_getter("interp", "(2, nnodes)")
@@ -398,23 +399,20 @@ class DGOps1D(AbstractDGOps1D):
         return self._normals
 
     def interp(self, vec):
-        return self.ctx.matmul(self._interp_mat, vec.T, name="_interp").T
+        return pt.matmul(self._interp_mat, vec.T).T
 
     def inv_mass(self, vec):
-        return self.ctx.matmul(self._inv_mass_mat, vec.T, name="_inv_mass").T
+        return pt.matmul(self._inv_mass_mat, vec.T).T
 
     def stiffness(self, vec):
-        return self.ctx.matmul(self._stiffness_mat, vec.T, name="_stiffness").T
+        return pt.matmul(self._stiffness_mat, vec.T).T
 
     def face_mass(self, vec):
-        return self.ctx.matmul(self._face_mass_mat, vec.T, name="_face_mass").T
+        return pt.matmul(self._face_mass_mat, vec.T).T
 
     def face_swap(self, vec):
-        return self.ctx.stack(
+        return pt.stack(
                 (
-                    self.ctx.roll(vec[:, 1], +1),
-                    self.ctx.roll(vec[:, 0], -1)),
-                axis=1,
-                name="_face_swap")
-
-
+                    pt.roll(vec[:, 1], +1),
+                    pt.roll(vec[:, 0], -1)),
+                axis=1)
-- 
GitLab


From bb2e94c690561107fe2eadb4eed6fc13fdaa7f10 Mon Sep 17 00:00:00 2001
From: Matt Wala <wala1@illinois.edu>
Date: Tue, 21 Jul 2020 22:48:40 -0500
Subject: [PATCH 14/52] Add examples CI job

---
 .gitlab-ci.yml | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index 56cf388..ea72e3a 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -15,6 +15,21 @@ Python 3 POCL:
       junit: test/pytest.xml
 
 
+Python 3 POCL Examples:
+  script:
+  - export PY_EXE=python3
+  - export PYOPENCL_TEST=portable:pthread
+  - export EXTRA_INSTALL="pyopencl"
+  - curl -L -O -k https://gitlab.tiker.net/inducer/ci-support/raw/master/build-py-project-and-run-examples.sh
+  - ". ./build-py-project-and-run-examples.sh"
+  tags:
+  - python3
+  - pocl
+  - large-node
+  except:
+  - tags
+
+
 Pylint:
   script:
   - export PY_EXE=python3
-- 
GitLab


From 675b96d3d84bcb5219c7045718b9e8d76fdf3c48 Mon Sep 17 00:00:00 2001
From: Matt Wala <wala1@illinois.edu>
Date: Tue, 21 Jul 2020 22:52:23 -0500
Subject: [PATCH 15/52] flake8 fix

---
 pytato/array.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/pytato/array.py b/pytato/array.py
index 495670f..9e14363 100644
--- a/pytato/array.py
+++ b/pytato/array.py
@@ -141,15 +141,15 @@ import pytato.scalar_expr as scalar_expr
 from pytato.scalar_expr import ScalarExpression
 
 
-# Support ellipsis as a type.
+# Get a type variable that represents the type of '...'
 # https://github.com/python/typing/issues/684#issuecomment-548203158
 if TYPE_CHECKING:
     from enum import Enum
-    class ellipsis(Enum):
+    class EllipsisType(Enum):
         Ellipsis = "..."
-    Ellipsis = ellipsis.Ellipsis
+    Ellipsis = EllipsisType.Ellipsis
 else:
-    ellipsis = type(Ellipsis)
+    EllipsisType = type(Ellipsis)
 
 
 # {{{ dotted name
@@ -379,7 +379,7 @@ def normalize_shape(
 # }}}
 
 
-SliceItem = Union[int, slice, None, ellipsis]
+SliceItem = Union[int, slice, None, EllipsisType]
 
 
 # {{{ array inteface
-- 
GitLab


From ea4a5a56082cd99f19bfd0cacbf4b9ac2f9f37c0 Mon Sep 17 00:00:00 2001
From: Matt Wala <wala1@illinois.edu>
Date: Tue, 21 Jul 2020 22:56:59 -0500
Subject: [PATCH 16/52] More flake8 fixes

---
 pytato/__init__.py | 3 ++-
 pytato/array.py    | 2 ++
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/pytato/__init__.py b/pytato/__init__.py
index 06c9bed..5bc498c 100644
--- a/pytato/__init__.py
+++ b/pytato/__init__.py
@@ -41,7 +41,8 @@ __all__ = (
         "DottedName", "Namespace", "Array", "DictOfNamedArrays",
         "Tag", "UniqueTag",
 
-        "make_dict_of_named_arrays", "make_placeholder", "make_size_param", "make_data_wrapper",
+        "make_dict_of_named_arrays", "make_placeholder", "make_size_param",
+        "make_data_wrapper",
 
         "matmul", "roll", "transpose", "stack",
 
diff --git a/pytato/array.py b/pytato/array.py
index 9e14363..e25b648 100644
--- a/pytato/array.py
+++ b/pytato/array.py
@@ -145,8 +145,10 @@ from pytato.scalar_expr import ScalarExpression
 # https://github.com/python/typing/issues/684#issuecomment-548203158
 if TYPE_CHECKING:
     from enum import Enum
+
     class EllipsisType(Enum):
         Ellipsis = "..."
+
     Ellipsis = EllipsisType.Ellipsis
 else:
     EllipsisType = type(Ellipsis)
-- 
GitLab


From fabb31beabe19b6d7b47a1c8a0ce622ed84dd080 Mon Sep 17 00:00:00 2001
From: Matt Wala <wala1@illinois.edu>
Date: Tue, 21 Jul 2020 23:17:36 -0500
Subject: [PATCH 17/52] Doc/var naming fixes

---
 pytato/codegen.py | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/pytato/codegen.py b/pytato/codegen.py
index 7f7aa45..86a84f2 100644
--- a/pytato/codegen.py
+++ b/pytato/codegen.py
@@ -108,8 +108,7 @@ class LoopyExpressionContext(object):
 
     .. attribute:: reduction_bounds
 
-        A mapping from reduction iname number (for reduction inames ``_r0``,
-        ``_r1``, ...) to reduction bounds in the expression.
+        A mapping from inames to reduction bounds in the expression.
 
     .. automethod:: update_depends_on
     .. automethod:: lookup
@@ -209,11 +208,10 @@ class InlinedResult(ImplementedResult):
         reduction_start = len(expr_context.reduction_bounds)
 
         # Rename reductions in expression not to conflict with those in expr_context.
-        for i, (prev_name, bounds) in enumerate(self.reduction_bounds.items()):
-            j = i + reduction_start
-            new_name = f"_r{j}"
+        for i, (old_name, bounds) in enumerate(self.reduction_bounds.items()):
+            new_name = f"_r{i + reduction_start}"
             assert new_name not in expr_context.reduction_bounds
-            substitutions[prev_name] = var(new_name)
+            substitutions[old_name] = var(new_name)
             expr_context.reduction_bounds[new_name] = bounds
 
         expr_context.update_depends_on(self.depends_on)
-- 
GitLab


From 9b90713967f0a4512930ceeeaff6f3e74e15837d Mon Sep 17 00:00:00 2001
From: Matt Wala <wala1@illinois.edu>
Date: Wed, 22 Jul 2020 11:29:58 -0500
Subject: [PATCH 18/52] Remove print statement

---
 test/test_codegen.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/test/test_codegen.py b/test/test_codegen.py
index f96f8a1..950aac8 100755
--- a/test/test_codegen.py
+++ b/test/test_codegen.py
@@ -345,7 +345,6 @@ def test_stack(ctx_factory, input_dims):
         prog = pt.generate_loopy(
                 pt.stack((x, y), axis=axis),
                 target=pt.PyOpenCLTarget(queue))
-        print(prog.program)
 
         _, (out,) = prog()
         assert (out == np.stack((x_in, y_in), axis=axis)).all()
-- 
GitLab


From 23735e4d3390737462e7b09e5ecf90d697e598d9 Mon Sep 17 00:00:00 2001
From: Matt Wala <wala1@illinois.edu>
Date: Wed, 22 Jul 2020 13:16:09 -0500
Subject: [PATCH 19/52] NumPy interface: remove tags argument

The tags argument is unnecessary and it constrains the implementation
to return a newly created node when it doesn't have to
---
 pytato/array.py | 21 ++++++---------------
 1 file changed, 6 insertions(+), 15 deletions(-)

diff --git a/pytato/array.py b/pytato/array.py
index e25b648..c4d0146 100644
--- a/pytato/array.py
+++ b/pytato/array.py
@@ -50,8 +50,7 @@ NumPy-Like Interface
 --------------------
 
 These functions generally follow the interface of the corresponding functions in
-:mod:`numpy`, with the notable exception of the addition of an optional *tags*
-argument for implementation tags. Not all NumPy features may be supported.
+:mod:`numpy`, but not all NumPy features may be supported.
 
 .. autofunction:: matmul
 .. autofunction:: roll
@@ -1302,12 +1301,11 @@ class LoopyFunction(DictOfNamedArrays):
 
 # {{{ end-user facing
 
-def matmul(x1: Array, x2: Array, tags: Optional[TagsType] = None) -> Array:
+def matmul(x1: Array, x2: Array) -> Array:
     """Matrix multiplication.
 
     :param x1: first argument
     :param x2: second argument
-    :param tags: implementation tags
     """
     if x1.namespace is not x2.namespace:
         raise ValueError("namespace mismatch")
@@ -1325,17 +1323,15 @@ def matmul(x1: Array, x2: Array, tags: Optional[TagsType] = None) -> Array:
     if x1.shape[-1] != x2.shape[0]:
         raise ValueError("dimension mismatch")
 
-    return MatrixProduct(x1, x2, tags)
+    return MatrixProduct(x1, x2)
 
 
-def roll(a: Array, shift: int, axis: Optional[int] = None,
-        tags: Optional[TagsType] = None) -> Array:
+def roll(a: Array, shift: int, axis: Optional[int] = None) -> Array:
     """Roll array elements along a given axis.
 
     :param a: input array
     :param shift: the number of places by which elements are shifted
     :param axis: axis along which the array is shifted
-    :param tags: implementation tags
     """
     if a.ndim == 0:
         return a
@@ -1356,15 +1352,13 @@ def roll(a: Array, shift: int, axis: Optional[int] = None,
     return Roll(a, shift, axis)
 
 
-def transpose(a: Array, axes: Optional[Sequence[int]] = None,
-        tags: Optional[TagsType] = None) -> Array:
+def transpose(a: Array, axes: Optional[Sequence[int]] = None) -> Array:
     """Reverse or permute the axes of an array.
 
     :param a: input array
     :param axes: if specified, a permutation of ``[0, 1, ..., a.ndim-1]``. Defaults
         to ``range(a.ndim)[::-1]``. The returned axis at index *i* corresponds to
         the input axis *axes[i]*.
-    :param tags: implementation tags
     """
     if axes is None:
         axes = range(a.ndim)[::-1]
@@ -1378,8 +1372,7 @@ def transpose(a: Array, axes: Optional[Sequence[int]] = None,
     return AxisPermutation(a, tuple(axes))
 
 
-def stack(arrays: Sequence[Array], axis: int = 0,
-        tags: Optional[TagsType] = None) -> Array:
+def stack(arrays: Sequence[Array], axis: int = 0) -> Array:
     """Join a sequence of arrays along a new axis.
 
     The *axis* parameter specifies the position of the new axis in the result.
@@ -1394,8 +1387,6 @@ def stack(arrays: Sequence[Array], axis: int = 0,
         :class:`Array` of the same shape
     :param axis: the position of the new axis, which will have length
         *len(arrays)*
-    :param tags: implementation tags
-
     """
 
     if not arrays:
-- 
GitLab


From 01805a14891e53196c4f9d877176e2d6f79ce51e Mon Sep 17 00:00:00 2001
From: Matt Wala <wala1@illinois.edu>
Date: Wed, 22 Jul 2020 13:25:05 -0500
Subject: [PATCH 20/52] Change temporary variable address space to auto

---
 pytato/codegen.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pytato/codegen.py b/pytato/codegen.py
index 86a84f2..54c1503 100644
--- a/pytato/codegen.py
+++ b/pytato/codegen.py
@@ -377,7 +377,7 @@ class CodeGenMapper(pytato.transform.Mapper):
         out = lp.TemporaryVariable(out_name,
                 dtype=expr.dtype,
                 shape=expr.shape,
-                address_space=lp.AddressSpace.GLOBAL)
+                address_space=lp.auto)
 
         inames = []
         for j in range(expr.ndim - 1):
@@ -641,7 +641,7 @@ def add_store(name: str, expr: Array, result: ImplementedResult,
         tvar = lp.TemporaryVariable(name,
                 dtype=expr.dtype,
                 shape=expr.shape,
-                address_space=lp.AddressSpace.GLOBAL)
+                address_space=lp.auto)
         temporary_variables = kernel.temporary_variables.copy()
         temporary_variables[name] = tvar
         kernel = kernel.copy(temporary_variables=temporary_variables,
-- 
GitLab


From 60af58bd0b8a11204d5eef8f663ab8d81cfb7b08 Mon Sep 17 00:00:00 2001
From: Matt Wala <wala1@illinois.edu>
Date: Wed, 22 Jul 2020 15:25:47 -0500
Subject: [PATCH 21/52] setup.py: Change python_requires.to ~=3.8 (uses
 typing.Protocol)

---
 setup.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/setup.py b/setup.py
index 6bc7b74..094aa6e 100644
--- a/setup.py
+++ b/setup.py
@@ -32,7 +32,7 @@ setup(name="pytato",
           'Topic :: Software Development :: Libraries',
           ],
 
-      python_requires="~=3.6",
+      python_requires="~=3.8",
       install_requires=[
           "loo.py",
           ],
-- 
GitLab


From 4e7c310920ae1e6faea8ac8495ef70534241a117 Mon Sep 17 00:00:00 2001
From: Matt Wala <wala1@illinois.edu>
Date: Wed, 22 Jul 2020 15:49:14 -0500
Subject: [PATCH 22/52] Temporaries with symbolic shape must be global

---
 pytato/codegen.py | 22 +++++++++++++---------
 1 file changed, 13 insertions(+), 9 deletions(-)

diff --git a/pytato/codegen.py b/pytato/codegen.py
index 54c1503..bea65db 100644
--- a/pytato/codegen.py
+++ b/pytato/codegen.py
@@ -69,6 +69,7 @@ Code Generation Internals
 .. autoclass:: InlinedExpressionGenMapper
 
 .. autofunction:: domain_for_shape
+.. autofunction:: get_loopy_temporary
 .. autofunction:: add_store
 .. autofunction:: rename_reductions
 
@@ -373,11 +374,7 @@ class CodeGenMapper(pytato.transform.Mapper):
             return state.results[expr]
 
         out_name = state.var_name_gen("stack")
-
-        out = lp.TemporaryVariable(out_name,
-                dtype=expr.dtype,
-                shape=expr.shape,
-                address_space=lp.auto)
+        out = get_loopy_temporary(out_name, expr)
 
         inames = []
         for j in range(expr.ndim - 1):
@@ -638,10 +635,7 @@ def add_store(name: str, expr: Array, result: ImplementedResult,
     kernel = state.kernel
 
     if output_to_temporary:
-        tvar = lp.TemporaryVariable(name,
-                dtype=expr.dtype,
-                shape=expr.shape,
-                address_space=lp.auto)
+        tvar = get_loopy_temporary(name, expr)
         temporary_variables = kernel.temporary_variables.copy()
         temporary_variables[name] = tvar
         kernel = kernel.copy(temporary_variables=temporary_variables,
@@ -661,6 +655,16 @@ def add_store(name: str, expr: Array, result: ImplementedResult,
     return insn_id
 
 
+def get_loopy_temporary(name: str, expr: Array) -> lp.TemporaryVariable:
+    is_shape_symbolic = not all(isinstance(dim, int) for dim in expr.shape)
+    # Only global variables can have symbolic shape.
+    address_space = lp.AddressSpace.GLOBAL if is_shape_symbolic else lp.auto
+    return lp.TemporaryVariable(name,
+            dtype=expr.dtype,
+            shape=expr.shape,
+            address_space=address_space)
+
+
 def rename_reductions(
         loopy_expr: ScalarExpression,
         loopy_expr_context: LoopyExpressionContext,
-- 
GitLab


From ee5dd97c9655bb187e484cee9dac0d7eb903f543 Mon Sep 17 00:00:00 2001
From: Matt Wala <wala1@illinois.edu>
Date: Wed, 22 Jul 2020 16:29:32 -0500
Subject: [PATCH 23/52] Indentation fix

---
 pytato/codegen.py | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/pytato/codegen.py b/pytato/codegen.py
index bea65db..f74051c 100644
--- a/pytato/codegen.py
+++ b/pytato/codegen.py
@@ -152,13 +152,13 @@ class ImplementedResult(object):
         :param expr_context: the associated expression context. The fields are
             treated as follows:
 
-               - *depends_on* is populated with any dependencies needed for the
-                 generated expression.
+            - *depends_on* is populated with any dependencies needed for the
+              generated expression.
 
-               - *reduction_bounds* is populated with reduction bounds for the
-                 reduction inames in the returned expression. If
-                 *reduction_bounds* is nonempty, then the returned inames are
-                 ensured to be disjoint from those present.
+            - *reduction_bounds* is populated with reduction bounds for the
+              reduction inames in the returned expression. If
+              *reduction_bounds* is nonempty, then the returned inames are
+              ensured to be disjoint from those present.
         """
         raise NotImplementedError
 
-- 
GitLab


From 850980cf009a3f0d50a2c53a3ec0b432f0cd5cec Mon Sep 17 00:00:00 2001
From: Matt Wala <wala1@illinois.edu>
Date: Thu, 23 Jul 2020 16:16:31 -0500
Subject: [PATCH 24/52] Use array_equal when comparing a single element array
 against a potential empty array

---
 test/test_codegen.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/test_codegen.py b/test/test_codegen.py
index 950aac8..7d66a49 100755
--- a/test/test_codegen.py
+++ b/test/test_codegen.py
@@ -61,7 +61,7 @@ def test_scalar_placeholder(ctx_factory):
     prog = pt.generate_loopy(x, target=pt.PyOpenCLTarget(queue))
     x_in = np.array(1)
     _, (x_out,) = prog(x=x_in)
-    assert x_out == x_in
+    assert np.array_equal(x_out, x_in)
 
 
 def test_size_param(ctx_factory):
-- 
GitLab


From 6e3c1ae1c36043ddbf7139d75dd55cc7317702f9 Mon Sep 17 00:00:00 2001
From: Matt Wala <wala1@illinois.edu>
Date: Thu, 23 Jul 2020 16:19:16 -0500
Subject: [PATCH 25/52] Better handling/testing of dtypes in arithmetic

---
 pytato/array.py      |  24 +++++++---
 test/test_codegen.py | 109 ++++++++++++++++++++++++++++++++-----------
 2 files changed, 100 insertions(+), 33 deletions(-)

diff --git a/pytato/array.py b/pytato/array.py
index c4d0146..5d57ad2 100644
--- a/pytato/array.py
+++ b/pytato/array.py
@@ -128,7 +128,7 @@ from numbers import Number
 import operator
 from dataclasses import dataclass
 from typing import (
-        Optional, ClassVar, Dict, Any, Mapping, Iterator, Tuple, Union,
+        Optional, Callable, ClassVar, Dict, Any, Mapping, Iterator, Tuple, Union,
         FrozenSet, Protocol, Sequence, cast, TYPE_CHECKING)
 
 import numpy as np
@@ -380,10 +380,18 @@ def normalize_shape(
 # }}}
 
 
+# {{{ array inteface
+
 SliceItem = Union[int, slice, None, EllipsisType]
 
 
-# {{{ array inteface
+def _truediv_result_type(arg1: Any, arg2: Any) -> np.dtype:
+    dtype = np.result_type(arg1, arg2)
+    if dtype.char in "bhilqBHILQ":
+        return np.float64
+    else:
+        return dtype
+
 
 class Array:
     """
@@ -623,6 +631,7 @@ class Array:
     def _binary_op(self,
             op: Any,
             other: Union[Array, Number],
+            result_type: Callable[[Any, Any], np.dtype] = np.result_type,
             reverse: bool = False) -> Array:
 
         def add_indices(val: prim.Expression) -> prim.Expression:
@@ -636,14 +645,14 @@ class Array:
             first = add_indices(var("_in0"))
             second = other
             bindings = dict(_in0=self)
-            other_dtype = np.array(other).dtype
+            dtype = result_type(other, self.dtype)
         elif isinstance(other, Array):
             if self.shape != other.shape:
                 raise NotImplementedError("broadcasting not supported")
             first = add_indices(var("_in0"))
             second = add_indices(var("_in1"))
             bindings = dict(_in0=self, _in1=other)
-            other_dtype = other.dtype
+            dtype = result_type(other.dtype, self.dtype)
         else:
             raise ValueError("unknown argument")
 
@@ -653,7 +662,6 @@ class Array:
                 bindings["_in1"], bindings["_in0"] = \
                         bindings["_in0"], bindings["_in1"]
 
-        dtype = np.result_type(self.dtype, other_dtype)
         expr = op(first, second)
 
         return IndexLambda(self.namespace,
@@ -685,8 +693,10 @@ class Array:
     __sub__ = partialmethod(_binary_op, operator.sub)
     __rsub__ = partialmethod(_binary_op, operator.sub, reverse=True)
 
-    __truediv__ = partialmethod(_binary_op, operator.truediv)
-    __rtruediv__ = partialmethod(_binary_op, operator.truediv, reverse=True)
+    __truediv__ = partialmethod(_binary_op, operator.truediv,
+            result_type=_truediv_result_type)
+    __rtruediv__ = partialmethod(_binary_op, operator.truediv,
+            result_type=_truediv_result_type, reverse=True)
 
     __neg__ = partialmethod(_unary_op, operator.neg)
 
diff --git a/test/test_codegen.py b/test/test_codegen.py
index 7d66a49..c6887d4 100755
--- a/test/test_codegen.py
+++ b/test/test_codegen.py
@@ -213,6 +213,10 @@ def test_transpose(ctx_factory):
     assert (x_out == x_in.T).all()
 
 
+# Doesn't include: ? (boolean), g (float128), G (complex256)
+ARITH_DTYPES = "bhilqpBHILQPfdFD"
+
+
 def reverse_args(f):
     def wrapper(*args):
         return f(*reversed(args))
@@ -229,36 +233,41 @@ def test_scalar_array_binary_arith(ctx_factory, which, reverse):
     if reverse:
         op = reverse_args(op)
 
-    arg = 2
-
-    x_in = np.array([1., 2., 3., 4., 5.])
-    namespace = pt.Namespace()
-    x = pt.make_data_wrapper(namespace, x_in)
-    prog = pt.generate_loopy(op(arg, x), target=pt.PyOpenCLTarget(queue))
+    x_orig = 7
+    y_orig = np.array([1, 2, 3, 4, 5])
 
-    _, (x_out,) = prog()
-    assert (x_out == op(arg, x_in)).all()
+    for first_dtype in (int, float, complex):
+        namespace = pt.Namespace()
+        x_in = first_dtype(x_orig)
 
+        exprs = {}
+        for dtype in ARITH_DTYPES:
+            y = pt.make_data_wrapper(namespace,
+                    y_orig.astype(dtype), name=f"y{dtype}")
+            exprs[dtype] = op(x_in, y)
 
-@pytest.mark.parametrize("which", ("neg", "pos"))
-def test_unary_arith(ctx_factory, which):
-    cl_ctx = ctx_factory()
-    queue = cl.CommandQueue(cl_ctx)
+        prog = pt.generate_loopy(pt.make_dict_of_named_arrays(exprs),
+                target=pt.PyOpenCLTarget(queue),
+                options=lp.Options(return_dict=True))
 
-    op = getattr(operator, which)
+        _, outputs = prog()
 
-    x_in = np.array([1., 2., 3., 4., 5.])
-    namespace = pt.Namespace()
-    x = pt.make_data_wrapper(namespace, x_in)
-    prog = pt.generate_loopy(op(x), target=pt.PyOpenCLTarget(queue))
+        for dtype in exprs:
+            out = outputs[dtype]
+            out_ref = op(x_in, y_orig.astype(dtype))
 
-    _, (x_out,) = prog()
-    assert (x_out == op(x_in)).all()
+            assert out.dtype == out_ref.dtype, (out.dtype, out_ref.dtype)
+            # truediv in some cases is done in float32 on loopy but float64 in
+            # numpy, hence the reason for allclose.
+            assert np.allclose(out, out_ref), (out, out_ref)
 
 
 @pytest.mark.parametrize("which", ("add", "sub", "mul", "truediv"))
 @pytest.mark.parametrize("reverse", (False, True))
 def test_array_array_binary_arith(ctx_factory, which, reverse):
+    if which == "sub":
+        pytest.skip("https://github.com/inducer/loopy/issues/131")
+
     cl_ctx = ctx_factory()
     queue = cl.CommandQueue(cl_ctx)
 
@@ -266,15 +275,63 @@ def test_array_array_binary_arith(ctx_factory, which, reverse):
     if reverse:
         op = reverse_args(op)
 
-    x_in = np.array([1., 2., 3., 4., 5.])
-    y_in = np.array([6., 7., 8., 9., 10.])
+    x_orig = np.array([1, 2, 3, 4, 5])
+    y_orig = np.array([5, 4, 3, 2, 1])
+
+    for first_dtype in ARITH_DTYPES:
+        namespace = pt.Namespace()
+        x_in = x_orig.astype(first_dtype)
+        x = pt.make_data_wrapper(namespace, x_in, name="x")
+
+        exprs = {}
+        for dtype in ARITH_DTYPES:
+            y = pt.make_data_wrapper(namespace,
+                    y_orig.astype(dtype), name=f"y{dtype}")
+            exprs[dtype] = op(x, y)
+
+        prog = pt.generate_loopy(pt.make_dict_of_named_arrays(exprs),
+                target=pt.PyOpenCLTarget(queue),
+                options=lp.Options(return_dict=True))
+
+        _, outputs = prog()
+
+        for dtype in ARITH_DTYPES:
+            out = outputs[dtype]
+            out_ref = op(x_in, y_orig.astype(dtype))
+
+            assert out.dtype == out_ref.dtype, (out.dtype, out_ref.dtype)
+            # truediv in some cases is done in float32 on loopy but float64 in
+            # numpy, hence the reason for allclose.
+            assert np.allclose(out, out_ref), (out, out_ref)
+
+
+@pytest.mark.parametrize("which", ("neg", "pos"))
+def test_unary_arith(ctx_factory, which):
+    cl_ctx = ctx_factory()
+    queue = cl.CommandQueue(cl_ctx)
+
+    op = getattr(operator, which)
+
+    x_orig = np.array([1, 2, 3, 4, 5])
     namespace = pt.Namespace()
-    x = pt.make_data_wrapper(namespace, x_in)
-    y = pt.make_data_wrapper(namespace, y_in)
-    prog = pt.generate_loopy(op(x, y), target=pt.PyOpenCLTarget(queue))
 
-    _, (out,) = prog()
-    assert (out == op(x_in, y_in)).all()
+    exprs = {}
+    for dtype in ARITH_DTYPES:
+        exprs[dtype] = op(
+                pt.make_data_wrapper(namespace, x_orig.astype(dtype)))
+
+    prog = pt.generate_loopy(pt.make_dict_of_named_arrays(exprs),
+            target=pt.PyOpenCLTarget(queue),
+            options=lp.Options(return_dict=True))
+
+    _, outputs = prog()
+
+    for dtype in ARITH_DTYPES:
+        out = outputs[dtype]
+        out_ref = op(x_orig.astype(dtype))
+
+        assert out.dtype == out_ref.dtype
+        assert np.array_equal(out, out_ref)
 
 
 def generate_test_slices_for_dim(dim_bound):
-- 
GitLab


From 870842e65af7de9ae23c870f860fa0aa0a117629 Mon Sep 17 00:00:00 2001
From: Matt Wala <wala1@illinois.edu>
Date: Fri, 24 Jul 2020 12:32:37 -0500
Subject: [PATCH 26/52] Improve comments

---
 test/test_codegen.py | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/test/test_codegen.py b/test/test_codegen.py
index c6887d4..dea7256 100755
--- a/test/test_codegen.py
+++ b/test/test_codegen.py
@@ -257,8 +257,7 @@ def test_scalar_array_binary_arith(ctx_factory, which, reverse):
             out_ref = op(x_in, y_orig.astype(dtype))
 
             assert out.dtype == out_ref.dtype, (out.dtype, out_ref.dtype)
-            # truediv in some cases is done in float32 on loopy but float64 in
-            # numpy, hence the reason for allclose.
+            # In some cases ops are done in float32 in loopy but float64 in numpy.
             assert np.allclose(out, out_ref), (out, out_ref)
 
 
@@ -276,7 +275,7 @@ def test_array_array_binary_arith(ctx_factory, which, reverse):
         op = reverse_args(op)
 
     x_orig = np.array([1, 2, 3, 4, 5])
-    y_orig = np.array([5, 4, 3, 2, 1])
+    y_orig = np.array([10, 9, 8, 7, 6])
 
     for first_dtype in ARITH_DTYPES:
         namespace = pt.Namespace()
@@ -300,8 +299,7 @@ def test_array_array_binary_arith(ctx_factory, which, reverse):
             out_ref = op(x_in, y_orig.astype(dtype))
 
             assert out.dtype == out_ref.dtype, (out.dtype, out_ref.dtype)
-            # truediv in some cases is done in float32 on loopy but float64 in
-            # numpy, hence the reason for allclose.
+            # In some cases ops are done in float32 in loopy but float64 in numpy.
             assert np.allclose(out, out_ref), (out, out_ref)
 
 
-- 
GitLab


From 8f5594c80eebadc26257282ea95b758394bf3d81 Mon Sep 17 00:00:00 2001
From: Matt Wala <wala1@illinois.edu>
Date: Fri, 24 Jul 2020 12:36:10 -0500
Subject: [PATCH 27/52] Add another case to test_roll

---
 test/test_codegen.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/test_codegen.py b/test/test_codegen.py
index dea7256..4d901b2 100755
--- a/test/test_codegen.py
+++ b/test/test_codegen.py
@@ -149,7 +149,7 @@ def test_codegen_with_DictOfNamedArrays(ctx_factory):  # noqa
     assert (outputs["y_out"] == y_in).all()
 
 
-@pytest.mark.parametrize("shift", (-1, 1, -20, 20))
+@pytest.mark.parametrize("shift", (-1, 1, 0, -20, 20))
 @pytest.mark.parametrize("axis", (0, 1))
 def test_roll(ctx_factory, shift, axis):
     cl_ctx = ctx_factory()
-- 
GitLab


From 1286637de2b736a081cebd7dcecdd8f6ce89e217 Mon Sep 17 00:00:00 2001
From: Matt Wala <wala1@illinois.edu>
Date: Fri, 24 Jul 2020 18:19:33 -0500
Subject: [PATCH 28/52] Better matmul error reporting: - implement __rmatmul__
 - check for scalar before checking namespace attribute

---
 pytato/array.py | 16 +++++++++++-----
 1 file changed, 11 insertions(+), 5 deletions(-)

diff --git a/pytato/array.py b/pytato/array.py
index 5d57ad2..e3d8a83 100644
--- a/pytato/array.py
+++ b/pytato/array.py
@@ -625,8 +625,14 @@ class Array:
     def __ne__(self, other: Any) -> bool:
         return not self.__eq__(other)
 
-    def __matmul__(self, other: Array) -> Array:
-        return matmul(self, other)
+    def __matmul__(self, other: Array, reverse: bool = False) -> Array:
+        first = self
+        second = other
+        if reverse:
+            first, second = second, first
+        return matmul(first, second)
+
+    __rmatmul__ = partialmethod(__matmul__, reverse=True)
 
     def _binary_op(self,
             op: Any,
@@ -1317,9 +1323,6 @@ def matmul(x1: Array, x2: Array) -> Array:
     :param x1: first argument
     :param x2: second argument
     """
-    if x1.namespace is not x2.namespace:
-        raise ValueError("namespace mismatch")
-
     if (
             isinstance(x1, Number)
             or x1.shape == ()
@@ -1327,6 +1330,9 @@ def matmul(x1: Array, x2: Array) -> Array:
             or x2.shape == ()):
         raise ValueError("scalars not allowed as arguments to matmul")
 
+    if x1.namespace is not x2.namespace:
+        raise ValueError("namespace mismatch")
+
     if len(x1.shape) > 2 or len(x2.shape) > 2:
         raise NotImplementedError("broadcasting not supported")
 
-- 
GitLab


From bd4c68c3311f561b0b0715c08f491778d7bc7e99 Mon Sep 17 00:00:00 2001
From: Matt Wala <wala1@illinois.edu>
Date: Fri, 24 Jul 2020 23:47:44 -0500
Subject: [PATCH 29/52] Ensure that dtypes are actually instances of np.dtype

---
 pytato/array.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/pytato/array.py b/pytato/array.py
index e3d8a83..755a9e3 100644
--- a/pytato/array.py
+++ b/pytato/array.py
@@ -1296,7 +1296,7 @@ class SizeParam(InputArgumentBase):
 
     @property
     def dtype(self) -> np.dtype:
-        return np.intp
+        return np.dtype(np.intp)
 
 # }}}
 
@@ -1476,7 +1476,7 @@ def make_dict_of_named_arrays(data: Dict[str, Array]) -> DictOfNamedArrays:
 def make_placeholder(namespace: Namespace,
         name: str,
         shape: ConvertibleToShape,
-        dtype: np.dtype,
+        dtype: Any,
         tags: Optional[TagsType] = None) -> Placeholder:
     """Make a :class:`Placeholder` object.
 
@@ -1484,6 +1484,7 @@ def make_placeholder(namespace: Namespace,
     :param name:       name of the placeholder array
     :param shape:      shape of the placeholder array
     :param dtype:      dtype of the placeholder array
+                       (must be convertible to :class:`numpy.dtype`)
     :param tags:       implementation tags
     """
     if name is None:
@@ -1493,6 +1494,7 @@ def make_placeholder(namespace: Namespace,
         raise ValueError(f"'{name}' is not a Python identifier")
 
     shape = normalize_shape(shape, namespace)
+    dtype = np.dtype(dtype)
 
     return Placeholder(namespace, name, shape, dtype, tags)
 
-- 
GitLab


From 8a13707ee10e25f2dbe8faa04e5312ce54fdb938 Mon Sep 17 00:00:00 2001
From: Matt Wala <wala1@illinois.edu>
Date: Mon, 27 Jul 2020 12:00:42 -0500
Subject: [PATCH 30/52] Make InputArgumentBase fields inherit from superclass

The fields contain useful information for stringification.
---
 pytato/array.py | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/pytato/array.py b/pytato/array.py
index 755a9e3..5da878b 100644
--- a/pytato/array.py
+++ b/pytato/array.py
@@ -1166,10 +1166,7 @@ class InputArgumentBase(Array):
         Creating multiple instances of any input argument with the same name
         and within the same :class:`~pytato.Namespace` is not allowed.
     """
-
-    # The name uniquely identifies this object in the namespace. Therefore,
-    # subclasses don't have to update *fields*.
-    fields = ("name",)
+    fields = Array.fields + ("name",)
 
     def __init__(self,
             namespace: Namespace,
@@ -1232,6 +1229,8 @@ class DataWrapper(InputArgumentBase):
         Starting with the construction of the :class:`DataWrapper`,
         this array may not be updated in-place.
     """
+    # Not in fields as it may not be hashable.
+    data: DataInterface
 
     mapper_method = "map_data_wrapper"
 
-- 
GitLab


From 607c386c2d7ccbdda493ad061c8b270e1c03718e Mon Sep 17 00:00:00 2001
From: Matt Wala <wala1@illinois.edu>
Date: Mon, 27 Jul 2020 14:31:55 -0500
Subject: [PATCH 31/52] Revert "Make InputArgumentBase fields inherit from
 superclass"

This reverts commit 8a13707ee10e25f2dbe8faa04e5312ce54fdb938.
---
 pytato/array.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/pytato/array.py b/pytato/array.py
index 5da878b..755a9e3 100644
--- a/pytato/array.py
+++ b/pytato/array.py
@@ -1166,7 +1166,10 @@ class InputArgumentBase(Array):
         Creating multiple instances of any input argument with the same name
         and within the same :class:`~pytato.Namespace` is not allowed.
     """
-    fields = Array.fields + ("name",)
+
+    # The name uniquely identifies this object in the namespace. Therefore,
+    # subclasses don't have to update *fields*.
+    fields = ("name",)
 
     def __init__(self,
             namespace: Namespace,
@@ -1229,8 +1232,6 @@ class DataWrapper(InputArgumentBase):
         Starting with the construction of the :class:`DataWrapper`,
         this array may not be updated in-place.
     """
-    # Not in fields as it may not be hashable.
-    data: DataInterface
 
     mapper_method = "map_data_wrapper"
 
-- 
GitLab


From 377d53b6769ac73bbcd38a32c995ac5f77d9ed9f Mon Sep 17 00:00:00 2001
From: Matt Wala <wala1@illinois.edu>
Date: Mon, 27 Jul 2020 14:35:08 -0500
Subject: [PATCH 32/52] Fix Slice fields

---
 pytato/array.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pytato/array.py b/pytato/array.py
index 755a9e3..4382ad7 100644
--- a/pytato/array.py
+++ b/pytato/array.py
@@ -1128,7 +1128,7 @@ class Slice(IndexRemappingBase):
     .. attribute:: begin
     .. attribute:: size
     """
-    fields = Array.fields + ("begin", "size")
+    fields = IndexRemappingBase.fields + ("begin", "size")
     mapper_method = "map_slice"
 
     def __init__(self,
-- 
GitLab


From 5581cdddd145eb46991503dc5b861a1ee0d655d2 Mon Sep 17 00:00:00 2001
From: Matt Wala <wala1@illinois.edu>
Date: Tue, 28 Jul 2020 07:39:14 +0200
Subject: [PATCH 33/52] Apply 1 suggestion(s) to 1 file(s)

---
 pytato/array.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pytato/array.py b/pytato/array.py
index 4382ad7..b5b6038 100644
--- a/pytato/array.py
+++ b/pytato/array.py
@@ -1437,7 +1437,7 @@ def make_slice(array: Array, begin: Sequence[int], size: Sequence[int]) -> Array
         raise ValueError("'begin' and 'array' do not match in dimensions")
 
     if len(begin) != len(size):
-        raise ValueError("'begin' and 'size' do not match in dimensions")
+        raise ValueError("'begin' and 'size' do not match in number of dimensions")
 
     for i, (bval, sval) in enumerate(zip(begin, size)):
         symbolic_index = not (
-- 
GitLab


From 9e5d067a5b3a6ebaab098f0fa1572291fe6d75d2 Mon Sep 17 00:00:00 2001
From: Matt Wala <wala1@illinois.edu>
Date: Tue, 28 Jul 2020 07:39:17 +0200
Subject: [PATCH 34/52] Apply 1 suggestion(s) to 1 file(s)

---
 pytato/array.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pytato/array.py b/pytato/array.py
index b5b6038..7fd7d05 100644
--- a/pytato/array.py
+++ b/pytato/array.py
@@ -1434,7 +1434,7 @@ def make_slice(array: Array, begin: Sequence[int], size: Sequence[int]) -> Array
         expressions are unsupported.
     """
     if array.ndim != len(begin):
-        raise ValueError("'begin' and 'array' do not match in dimensions")
+        raise ValueError("'begin' and 'array' do not match in number of dimensions")
 
     if len(begin) != len(size):
         raise ValueError("'begin' and 'size' do not match in number of dimensions")
-- 
GitLab


From 88a30af5822dd5f727cc04ef9b243926ad959244 Mon Sep 17 00:00:00 2001
From: Matt Wala <wala1@illinois.edu>
Date: Tue, 28 Jul 2020 01:01:38 -0500
Subject: [PATCH 35/52] Add a comment concerning where a magic string came from

---
 pytato/array.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/pytato/array.py b/pytato/array.py
index 4382ad7..b4a3881 100644
--- a/pytato/array.py
+++ b/pytato/array.py
@@ -387,6 +387,7 @@ SliceItem = Union[int, slice, None, EllipsisType]
 
 def _truediv_result_type(arg1: Any, arg2: Any) -> np.dtype:
     dtype = np.result_type(arg1, arg2)
+    # See: test_true_divide in numpy/core/tests/test_ufunc.py
     if dtype.char in "bhilqBHILQ":
         return np.float64
     else:
-- 
GitLab


From fcaf04aa82887632502136509dd3b4e438ca93f9 Mon Sep 17 00:00:00 2001
From: Matt Wala <wala1@illinois.edu>
Date: Tue, 28 Jul 2020 01:06:35 -0500
Subject: [PATCH 36/52] Use dtype.kind to simplify conditional

---
 pytato/array.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pytato/array.py b/pytato/array.py
index 3b9252b..07aced0 100644
--- a/pytato/array.py
+++ b/pytato/array.py
@@ -388,7 +388,7 @@ SliceItem = Union[int, slice, None, EllipsisType]
 def _truediv_result_type(arg1: Any, arg2: Any) -> np.dtype:
     dtype = np.result_type(arg1, arg2)
     # See: test_true_divide in numpy/core/tests/test_ufunc.py
-    if dtype.char in "bhilqBHILQ":
+    if dtype.kind in "iu":
         return np.float64
     else:
         return dtype
-- 
GitLab


From d86ba2bdf5d2289bf1e3a4fd9eacc9740b3368f1 Mon Sep 17 00:00:00 2001
From: Matt Wala <wala1@illinois.edu>
Date: Tue, 28 Jul 2020 01:07:52 -0500
Subject: [PATCH 37/52] Docs: Change method to attribute

---
 pytato/array.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/pytato/array.py b/pytato/array.py
index 07aced0..a149d4b 100644
--- a/pytato/array.py
+++ b/pytato/array.py
@@ -451,7 +451,8 @@ class Array:
     Array interface:
 
     .. automethod:: __getitem__
-    .. automethod:: T
+    .. attribute:: T
+
     .. method:: __mul__
     .. method:: __rmul__
     .. method:: __add__
-- 
GitLab


From 20e4ca163303f46363aa827e9f24e113f632093e Mon Sep 17 00:00:00 2001
From: Matt Wala <wala1@illinois.edu>
Date: Tue, 28 Jul 2020 08:08:53 +0200
Subject: [PATCH 38/52] Apply 1 suggestion(s) to 1 file(s)

---
 pytato/array.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pytato/array.py b/pytato/array.py
index a149d4b..83df130 100644
--- a/pytato/array.py
+++ b/pytato/array.py
@@ -639,7 +639,7 @@ class Array:
     def _binary_op(self,
             op: Any,
             other: Union[Array, Number],
-            result_type: Callable[[Any, Any], np.dtype] = np.result_type,
+            get_result_type: Callable[[Any, Any], np.dtype] = np.result_type,
             reverse: bool = False) -> Array:
 
         def add_indices(val: prim.Expression) -> prim.Expression:
-- 
GitLab


From 1b556530fc85754151700f706f2d625fbd34d62f Mon Sep 17 00:00:00 2001
From: Matt Wala <wala1@illinois.edu>
Date: Tue, 28 Jul 2020 01:09:34 -0500
Subject: [PATCH 39/52] Fix name of get_result_type

---
 pytato/array.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pytato/array.py b/pytato/array.py
index 83df130..6e40568 100644
--- a/pytato/array.py
+++ b/pytato/array.py
@@ -653,14 +653,14 @@ class Array:
             first = add_indices(var("_in0"))
             second = other
             bindings = dict(_in0=self)
-            dtype = result_type(other, self.dtype)
+            dtype = get_result_type(other, self.dtype)
         elif isinstance(other, Array):
             if self.shape != other.shape:
                 raise NotImplementedError("broadcasting not supported")
             first = add_indices(var("_in0"))
             second = add_indices(var("_in1"))
             bindings = dict(_in0=self, _in1=other)
-            dtype = result_type(other.dtype, self.dtype)
+            dtype = get_result_type(other.dtype, self.dtype)
         else:
             raise ValueError("unknown argument")
 
-- 
GitLab


From c5e85f6f046f9706b134f4571620b98af92534bc Mon Sep 17 00:00:00 2001
From: Matt Wala <wala1@illinois.edu>
Date: Tue, 28 Jul 2020 01:17:47 -0500
Subject: [PATCH 40/52] Clean up reversing logic in Array._binary_op

---
 pytato/array.py | 20 +++++++++++---------
 1 file changed, 11 insertions(+), 9 deletions(-)

diff --git a/pytato/array.py b/pytato/array.py
index 6e40568..9b39650 100644
--- a/pytato/array.py
+++ b/pytato/array.py
@@ -650,27 +650,29 @@ class Array:
                 return val[indices]
 
         if isinstance(other, Number):
-            first = add_indices(var("_in0"))
+            first = self
             second = other
+            first_expr = add_indices(var("_in0"))
+            second_expr = other
             bindings = dict(_in0=self)
-            dtype = get_result_type(other, self.dtype)
+
         elif isinstance(other, Array):
             if self.shape != other.shape:
                 raise NotImplementedError("broadcasting not supported")
-            first = add_indices(var("_in0"))
-            second = add_indices(var("_in1"))
+            first = self
+            second = other
+            first_expr = add_indices(var("_in0"))
+            second_expr = add_indices(var("_in1"))
             bindings = dict(_in0=self, _in1=other)
-            dtype = get_result_type(other.dtype, self.dtype)
         else:
             raise ValueError("unknown argument")
 
         if reverse:
             first, second = second, first
-            if len(bindings) == 2:
-                bindings["_in1"], bindings["_in0"] = \
-                        bindings["_in0"], bindings["_in1"]
+            first_expr, second_expr = second_expr, first_expr
 
-        expr = op(first, second)
+        dtype = get_result_type(first, second)
+        expr = op(first_expr, second_expr)
 
         return IndexLambda(self.namespace,
                 expr,
-- 
GitLab


From b9a2d5f0cdd71498c48f2a16ed9c6afc1be2248b Mon Sep 17 00:00:00 2001
From: Matt Wala <wala1@illinois.edu>
Date: Tue, 28 Jul 2020 01:21:42 -0500
Subject: [PATCH 41/52] Actually clean up reversing logic

---
 pytato/array.py | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

diff --git a/pytato/array.py b/pytato/array.py
index 9b39650..7870ef3 100644
--- a/pytato/array.py
+++ b/pytato/array.py
@@ -650,28 +650,25 @@ class Array:
                 return val[indices]
 
         if isinstance(other, Number):
-            first = self
-            second = other
             first_expr = add_indices(var("_in0"))
             second_expr = other
             bindings = dict(_in0=self)
+            dtype = get_result_type(self.dtype, other)
 
         elif isinstance(other, Array):
             if self.shape != other.shape:
                 raise NotImplementedError("broadcasting not supported")
-            first = self
-            second = other
             first_expr = add_indices(var("_in0"))
             second_expr = add_indices(var("_in1"))
             bindings = dict(_in0=self, _in1=other)
+            dtype = get_result_type(self.dtype, other.dtype)
+
         else:
             raise ValueError("unknown argument")
 
         if reverse:
-            first, second = second, first
             first_expr, second_expr = second_expr, first_expr
 
-        dtype = get_result_type(first, second)
         expr = op(first_expr, second_expr)
 
         return IndexLambda(self.namespace,
-- 
GitLab


From 7f05f2834ac181b1484beb694bfae0162855bb10 Mon Sep 17 00:00:00 2001
From: Matt Wala <wala1@illinois.edu>
Date: Tue, 28 Jul 2020 01:25:15 -0500
Subject: [PATCH 42/52] Fix parameter name

---
 pytato/array.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pytato/array.py b/pytato/array.py
index 7870ef3..52c91f0 100644
--- a/pytato/array.py
+++ b/pytato/array.py
@@ -701,9 +701,9 @@ class Array:
     __rsub__ = partialmethod(_binary_op, operator.sub, reverse=True)
 
     __truediv__ = partialmethod(_binary_op, operator.truediv,
-            result_type=_truediv_result_type)
+            get_result_type=_truediv_result_type)
     __rtruediv__ = partialmethod(_binary_op, operator.truediv,
-            result_type=_truediv_result_type, reverse=True)
+            get_result_type=_truediv_result_type, reverse=True)
 
     __neg__ = partialmethod(_unary_op, operator.neg)
 
-- 
GitLab


From 8019a059865bf4ce262c74a70ecd5a923f7b3a01 Mon Sep 17 00:00:00 2001
From: Matt Wala <wala1@illinois.edu>
Date: Tue, 28 Jul 2020 01:50:47 -0500
Subject: [PATCH 43/52] Improve the precision of the signature of
 get_result_type

---
 pytato/array.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/pytato/array.py b/pytato/array.py
index 52c91f0..d2d0fd6 100644
--- a/pytato/array.py
+++ b/pytato/array.py
@@ -383,9 +383,10 @@ def normalize_shape(
 # {{{ array inteface
 
 SliceItem = Union[int, slice, None, EllipsisType]
+DtypeOrScalar = Union[np.dtype, Number]
 
 
-def _truediv_result_type(arg1: Any, arg2: Any) -> np.dtype:
+def _truediv_result_type(arg1: DtypeOrScalar, arg2: DtypeOrScalar) -> np.dtype:
     dtype = np.result_type(arg1, arg2)
     # See: test_true_divide in numpy/core/tests/test_ufunc.py
     if dtype.kind in "iu":
@@ -639,7 +640,7 @@ class Array:
     def _binary_op(self,
             op: Any,
             other: Union[Array, Number],
-            get_result_type: Callable[[Any, Any], np.dtype] = np.result_type,
+            get_result_type: Callable[[DtypeOrScalar, DtypeOrScalar], np.dtype] = np.result_type,  # noqa
             reverse: bool = False) -> Array:
 
         def add_indices(val: prim.Expression) -> prim.Expression:
-- 
GitLab


From 5ee1a3253572e8b951fc7b7233f91b6cf85e60dc Mon Sep 17 00:00:00 2001
From: Matt Wala <wala1@illinois.edu>
Date: Tue, 28 Jul 2020 08:56:06 +0200
Subject: [PATCH 44/52] Fix PEP number for MatrixProduct (woops)

---
 pytato/array.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pytato/array.py b/pytato/array.py
index d2d0fd6..d156069 100644
--- a/pytato/array.py
+++ b/pytato/array.py
@@ -927,12 +927,12 @@ class Einsum(Array):
 class MatrixProduct(Array):
     """A product of two matrices, or a matrix and a vector.
 
-    The semantics of this operation follow PEP 459 [pep459]_.
+    The semantics of this operation follow PEP 465 [pep465]_.
 
     .. attribute:: x1
     .. attribute:: x2
 
-    .. [pep459] https://www.python.org/dev/peps/pep-0459/
+    .. [pep465] https://www.python.org/dev/peps/pep-0465/
 
     """
     fields = Array.fields + ("x1", "x2")
-- 
GitLab


From 78403670197e110f74f1fe1a8de383762b93f3b5 Mon Sep 17 00:00:00 2001
From: Matt Wala <wala1@illinois.edu>
Date: Tue, 28 Jul 2020 02:00:01 -0500
Subject: [PATCH 45/52] Improve doc in MatrixProduct

---
 pytato/array.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/pytato/array.py b/pytato/array.py
index d156069..1485d58 100644
--- a/pytato/array.py
+++ b/pytato/array.py
@@ -927,7 +927,8 @@ class Einsum(Array):
 class MatrixProduct(Array):
     """A product of two matrices, or a matrix and a vector.
 
-    The semantics of this operation follow PEP 465 [pep465]_.
+    The semantics of this operation follow PEP 465 [pep465]_, i.e., the Python
+    matmul (@) operator.
 
     .. attribute:: x1
     .. attribute:: x2
-- 
GitLab


From 271edb3e9617262bd5e1365f5008367eb3ee07cb Mon Sep 17 00:00:00 2001
From: Matt Wala <wala1@illinois.edu>
Date: Sun, 2 Aug 2020 19:13:21 -0500
Subject: [PATCH 46/52] Preprocess the nodes before code generation to reduce
 the number of node types in code generation

---
 pytato/__init__.py     |   4 +-
 pytato/array.py        |   3 +-
 pytato/codegen.py      | 344 ++++++++++++++++++++++++-----------------
 pytato/transform.py    | 121 ++++++++++++++-
 test/test_transform.py |  57 +++++++
 5 files changed, 379 insertions(+), 150 deletions(-)
 create mode 100755 test/test_transform.py

diff --git a/pytato/__init__.py b/pytato/__init__.py
index 5bc498c..8eab250 100644
--- a/pytato/__init__.py
+++ b/pytato/__init__.py
@@ -26,7 +26,7 @@ THE SOFTWARE.
 
 from pytato.array import (
         Namespace, Array, DictOfNamedArrays, Tag, UniqueTag,
-        DottedName,
+        DottedName, Placeholder, IndexLambda,
 
         make_dict_of_named_arrays,
         make_placeholder, make_size_param, make_data_wrapper,
@@ -39,7 +39,7 @@ from pytato.target import Target, PyOpenCLTarget
 
 __all__ = (
         "DottedName", "Namespace", "Array", "DictOfNamedArrays",
-        "Tag", "UniqueTag",
+        "Tag", "UniqueTag", "Placeholder", "IndexLambda",
 
         "make_dict_of_named_arrays", "make_placeholder", "make_size_param",
         "make_data_wrapper",
diff --git a/pytato/array.py b/pytato/array.py
index 1485d58..87f4e3d 100644
--- a/pytato/array.py
+++ b/pytato/array.py
@@ -233,7 +233,8 @@ class Namespace(Mapping[str, "Array"]):
         return len(self._symbol_table)
 
     def copy(self) -> Namespace:
-        raise NotImplementedError
+        from pytato.transform import CopyMapper, copy_namespace
+        return copy_namespace(self, CopyMapper(Namespace()))
 
     def assign(self, name: str, value: Array) -> str:
         """Declare a new array.
diff --git a/pytato/codegen.py b/pytato/codegen.py
index f74051c..47d34ce 100644
--- a/pytato/codegen.py
+++ b/pytato/codegen.py
@@ -24,8 +24,9 @@ THE SOFTWARE.
 
 import dataclasses
 from functools import partialmethod
+import re
 from typing import (
-        Any, Union, Optional, Mapping, Dict, Tuple, FrozenSet, Set, Callable)
+        Union, Optional, Mapping, Dict, Tuple, FrozenSet, Set, Callable)
 
 import islpy as isl
 import loopy as lp
@@ -36,12 +37,13 @@ import pytools
 from pytato.array import (
         Array, DictOfNamedArrays, ShapeType, IndexLambda,
         SizeParam, DataWrapper, InputArgumentBase, MatrixProduct, Roll,
-        AxisPermutation, Slice, IndexRemappingBase, Stack)
+        AxisPermutation, Slice, IndexRemappingBase, Stack, Placeholder,
+        Namespace, DataInterface)
 from pytato.program import BoundProgram
 from pytato.target import Target, PyOpenCLTarget
 import pytato.scalar_expr as scalar_expr
 from pytato.scalar_expr import ScalarExpression
-import pytato.transform
+from pytato.transform import Mapper, CopyMapper
 
 
 __doc__ = """
@@ -57,6 +59,8 @@ Code Generation Internals
 
 .. currentmodule:: pytato.codegen
 
+.. autoclass:: CodeGenPreprocessor
+
 .. autoclass:: LoopyExpressionContext
 .. autoclass:: ImplementedResult
 .. autoclass:: StoredResult
@@ -72,10 +76,126 @@ Code Generation Internals
 .. autofunction:: get_loopy_temporary
 .. autofunction:: add_store
 .. autofunction:: rename_reductions
+.. autofunction:: get_initial_codegen_state
+.. autofunction:: preprocess
 
 """
 
 
+# {{{ preprocessing for codegen
+
+class CodeGenPreprocessor(CopyMapper):
+    """A mapper that preprocesses graphs to simplify code generation.
+
+    The following node simplifications are performed:
+
+    ======================================  =====================================
+    Source Node Type                        Target Node Type
+    ======================================  =====================================
+    :class:`~pytato.array.DataWrapper`      :class:`~pytato.array.Placeholder`
+    :class:`~pytato.array.Roll`             :class:`~pytato.array.IndexLambda`
+    :class:`~pytato.array.AxisPermutation`  :class:`~pytato.array.IndexLambda`
+    :class:`~pytato.array.Slice`            :class:`~pytato.array.IndexLambda`
+    ======================================  =====================================
+    """
+
+    # TODO:
+    # Stack -> IndexLambda
+    # MatrixProduct -> Einsum
+
+    def __init__(self, namespace: Namespace):
+        super().__init__(namespace)
+        self.bound_arguments: Dict[str, DataInterface] = {}
+
+    def map_data_wrapper(self, expr: DataWrapper) -> Array:
+        self.bound_arguments[expr.name] = expr.data
+        return Placeholder(namespace=self.namespace,
+                name=expr.name,
+                shape=expr.shape,
+                dtype=expr.dtype,
+                tags=expr.tags)
+
+    def map_stack(self, expr: Stack) -> Array:
+
+        def get_subscript(array_index: int) -> SymbolicIndex:
+            result = []
+            for i in range(expr.ndim):
+                if i != expr.axis:
+                    result.append(var(f"_{i}"))
+            return tuple(result)
+
+        # I = axis index
+        #
+        # => If(_I == 0,
+        #        _in0[_0, _1, ...],
+        #        If(_I == 1,
+        #            _in1[_0, _1, ...],
+        #            ...
+        #                _inNm1[_0, _1, ...] ...))
+        for i in range(len(expr.arrays) - 1, -1, -1):
+            subarray_expr = var(f"_in{i}")[get_subscript(i)]
+            if i == len(expr.arrays) - 1:
+                stack_expr = subarray_expr
+            else:
+                from pymbolic.primitives import If, Comparison
+                stack_expr = If(Comparison(var(f"_{expr.axis}"), "==", i),
+                        subarray_expr,
+                        stack_expr)
+
+        bindings = {f"_in{i}": self.rec(array)
+                for i, array in enumerate(expr.arrays)}
+
+        return IndexLambda(namespace=self.namespace,
+                expr=stack_expr,
+                shape=expr.shape,
+                dtype=expr.dtype,
+                bindings=bindings)
+
+    # {{{ index remapping (roll, axis permutation, slice)
+
+    def handle_index_remapping(self,
+            indices_getter: Callable[[CodeGenPreprocessor, Array], SymbolicIndex],
+            expr: IndexRemappingBase) -> Array:
+        indices = indices_getter(self, expr)
+
+        index_expr = var("_in0")
+        if indices:
+            index_expr = index_expr[indices]
+
+        array = self.rec(expr.array)
+
+        return IndexLambda(namespace=self.namespace,
+                expr=index_expr,
+                shape=expr.shape,
+                dtype=expr.dtype,
+                bindings=dict(_in0=array))
+
+    def _indices_for_roll(self, expr: Roll) -> SymbolicIndex:
+        indices = [var(f"_{d}") for d in range(expr.ndim)]
+        axis = expr.axis
+        indices[axis] = (indices[axis] - expr.shift) % expr.shape[axis]
+        return tuple(indices)
+
+    def _indices_for_axis_permutation(self, expr: AxisPermutation) -> SymbolicIndex:
+        indices = [None] * expr.ndim
+        for from_index, to_index in enumerate(expr.axes):
+            indices[to_index] = var(f"_{from_index}")
+        return tuple(indices)
+
+    def _indices_for_slice(self, expr: Slice) -> SymbolicIndex:
+        return tuple(var(f"_{d}") + expr.begin[d] for d in range(expr.ndim))
+
+    # https://github.com/python/mypy/issues/8619
+    map_roll = partialmethod(handle_index_remapping, _indices_for_roll)  # noqa type: ignore
+    map_axis_permutation = (
+            partialmethod(handle_index_remapping, _indices_for_axis_permutation))  # noqa type: ignore
+    map_slice = partialmethod(handle_index_remapping, _indices_for_slice)  # noqa type: ignore
+
+    # }}}
+
+# }}}
+
+
 # {{{ generated array expressions
 
 # SymbolicIndex and ShapeType are semantically distinct but identical at the
@@ -102,6 +222,11 @@ class LoopyExpressionContext(object):
         A (read-only) local name mapping used for name lookup when generating
         code.
 
+    .. attribute:: num_indices
+
+        The number of indices of the form ``_0``, ``_1``, allowed in the
+        expression.
+
     .. attribute:: depends_on
 
         The set of statement IDs that need to be included in
@@ -116,6 +241,7 @@ class LoopyExpressionContext(object):
 
     """
     state: CodeGenState
+    num_indices: int
     _depends_on: FrozenSet[str] = \
             dataclasses.field(default_factory=frozenset)
     local_namespace: Mapping[str, Array] = \
@@ -168,12 +294,14 @@ class StoredResult(ImplementedResult):
 
     See also: :class:`pytato.array.ImplStored`.
     """
-    def __init__(self, name: str, depends_on: FrozenSet[str]):
+    def __init__(self, name: str, num_indices: int, depends_on: FrozenSet[str]):
         self.name = name
+        self.num_indices = num_indices
         self.depends_on = depends_on
 
     def to_loopy_expression(self, indices: SymbolicIndex,
             expr_context: LoopyExpressionContext) -> ScalarExpression:
+        assert len(indices) == self.num_indices
         expr_context.update_depends_on(self.depends_on)
         if indices == ():
             return prim.Variable(self.name)
@@ -188,9 +316,11 @@ class InlinedResult(ImplementedResult):
     See also: :class:`pytato.array.ImplInlined`.
     """
     def __init__(self, expr: ScalarExpression,
+            num_indices: int,
             reduction_bounds: ReductionBounds,
             depends_on: FrozenSet[str]):
         self.expr = expr
+        self.num_indices = num_indices
         self.reduction_bounds = dict(reduction_bounds)
         self.depends_on = depends_on
 
@@ -199,11 +329,13 @@ class InlinedResult(ImplementedResult):
             loopy_expr: ScalarExpression,
             loopy_expr_context: LoopyExpressionContext) -> InlinedResult:
         return InlinedResult(loopy_expr,
+                loopy_expr_context.num_indices,
                 loopy_expr_context.reduction_bounds,
                 loopy_expr_context.depends_on)
 
     def to_loopy_expression(self, indices: SymbolicIndex,
             expr_context: LoopyExpressionContext) -> ScalarExpression:
+        assert len(indices) == self.num_indices
         substitutions = {f"_{d}": i for d, i in enumerate(indices)}
 
         reduction_start = len(expr_context.reduction_bounds)
@@ -270,7 +402,7 @@ class CodeGenState:
         self._kernel = kernel
 
 
-class CodeGenMapper(pytato.transform.Mapper):
+class CodeGenMapper(Mapper):
     """A mapper for generating code for nodes in the computation graph.
     """
     exprgen_mapper: InlinedExpressionGenMapper
@@ -287,16 +419,16 @@ class CodeGenMapper(pytato.transform.Mapper):
         kernel = state.kernel.copy(args=state.kernel.args + [arg])
         state.update_kernel(kernel)
 
-        result = StoredResult(expr.name, frozenset())
+        result = StoredResult(expr.name, expr.ndim, frozenset())
         state.results[expr] = result
         return result
 
-    def handle_array_input_argument(self, expr: InputArgumentBase,
+    def map_placeholder(self, expr: Placeholder,
             state: CodeGenState) -> ImplementedResult:
         if expr in state.results:
             return state.results[expr]
 
-        shape_context = LoopyExpressionContext(state)
+        shape_context = LoopyExpressionContext(state, num_indices=0)
         shape = []
         for component in expr.shape:
             shape.append(self.exprgen_mapper(component, shape_context))
@@ -311,13 +443,10 @@ class CodeGenMapper(pytato.transform.Mapper):
         kernel = state.kernel.copy(args=state.kernel.args + [arg])
         state.update_kernel(kernel)
 
-        result = StoredResult(expr.name, frozenset())
+        result = StoredResult(expr.name, expr.ndim, frozenset())
         state.results[expr] = result
         return result
 
-    map_placeholder = handle_array_input_argument
-    map_data_wrapper = handle_array_input_argument
-
     def map_matrix_product(self, expr: MatrixProduct,
             state: CodeGenState) -> ImplementedResult:
         if expr in state.results:
@@ -326,7 +455,8 @@ class CodeGenMapper(pytato.transform.Mapper):
         x1_result = self.rec(expr.x1, state)
         x2_result = self.rec(expr.x2, state)
 
-        loopy_expr_context = LoopyExpressionContext(state)
+        loopy_expr_context = LoopyExpressionContext(state,
+                num_indices=expr.ndim)
         loopy_expr_context.reduction_bounds["_r0"] = (0, expr.x2.shape[0])
 
         # Figure out inames.
@@ -364,110 +494,11 @@ class CodeGenMapper(pytato.transform.Mapper):
         insn_id = add_store(output_name, expr, inlined_result, state,
                 output_to_temporary=True)
 
-        result = StoredResult(output_name, frozenset([insn_id]))
+        result = StoredResult(output_name, expr.ndim, frozenset([insn_id]))
 
         state.results[expr] = result
         return result
 
-    def map_stack(self, expr: Stack, state: CodeGenState) -> ImplementedResult:
-        if expr in state.results:
-            return state.results[expr]
-
-        out_name = state.var_name_gen("stack")
-        out = get_loopy_temporary(out_name, expr)
-
-        inames = []
-        for j in range(expr.ndim - 1):
-            if j >= expr.axis:
-                j += 1
-            inames.append(state.var_name_gen(f"{out_name}_dim{j}"))
-        indices = tuple(var(iname) for iname in inames)
-
-        reduction_bounds = {}
-        depends_on: FrozenSet[str] = frozenset()
-        new_insns = []
-
-        for i, array in enumerate(expr.arrays):
-            loopy_expr_context = LoopyExpressionContext(state)
-            loopy_expr = (
-                    self.rec(array, state)
-                    .to_loopy_expression(indices, loopy_expr_context))
-            loopy_expr = rename_reductions(
-                    loopy_expr, loopy_expr_context,
-                    lambda old_iname: state.var_name_gen(f"{out_name}{old_iname}"))
-
-            reduction_bounds.update(loopy_expr_context.reduction_bounds)
-
-            assignee_indices = list(indices)
-            assignee_indices.insert(expr.axis, i)
-            assignee = var(out_name)[tuple(assignee_indices)]
-
-            insn_id = state.insn_id_gen(f"{out_name}_{i}")
-
-            from loopy.kernel.instruction import make_assignment
-            insn = make_assignment((assignee,),
-                    loopy_expr,
-                    id=insn_id,
-                    within_inames=frozenset(inames),
-                    depends_on=loopy_expr_context.depends_on | depends_on)
-
-            depends_on = frozenset([insn_id])
-            new_insns.append(insn)
-
-        # Update kernel.
-        kernel = state.kernel
-        domain = domain_for_shape(tuple(inames),
-                expr.arrays[0].shape, reduction_bounds)
-        temporary_variables = kernel.temporary_variables.copy()
-        temporary_variables[out_name] = out
-        kernel = kernel.copy(domains=kernel.domains + [domain],
-                instructions=kernel.instructions + new_insns,
-                temporary_variables=temporary_variables)
-        state.update_kernel(kernel)
-
-        result = StoredResult(out_name, depends_on)
-        state.results[expr] = result
-        return result
-
-    def handle_index_remapping(self,
-            indices_getter: Any,
-            expr: IndexRemappingBase,
-            state: CodeGenState) -> ImplementedResult:
-        if expr in state.results:
-            return state.results[expr]
-
-        indices = indices_getter(self, expr)
-
-        loopy_expr_context = LoopyExpressionContext(state)
-        loopy_expr = (
-                self.rec(expr.array, state)
-                .to_loopy_expression(indices, loopy_expr_context))
-
-        result = InlinedResult.from_loopy_expression(loopy_expr, loopy_expr_context)
-
-        state.results[expr] = result
-        return result
-
-    def _indices_for_roll(self, expr: Roll) -> SymbolicIndex:
-        indices = [var(f"_{d}") for d in range(expr.ndim)]
-        axis = expr.axis
-        indices[axis] = (indices[axis] - expr.shift) % expr.shape[axis]
-        return tuple(indices)
-
-    def _indices_for_axis_permutation(self, expr: AxisPermutation) -> SymbolicIndex:
-        indices = [None] * expr.ndim
-        for from_index, to_index in enumerate(expr.axes):
-            indices[to_index] = var(f"_{from_index}")
-        return tuple(indices)
-
-    def _indices_for_slice(self, expr: Slice) -> SymbolicIndex:
-        return tuple(var(f"_{d}") + expr.begin[d] for d in range(expr.ndim))
-
-    map_roll = partialmethod(handle_index_remapping, _indices_for_roll)
-    map_axis_permutation = (
-            partialmethod(handle_index_remapping, _indices_for_axis_permutation))
-    map_slice = partialmethod(handle_index_remapping, _indices_for_slice)
-
     def map_index_lambda(self, expr: IndexLambda,
             state: CodeGenState) -> ImplementedResult:
         if expr in state.results:
@@ -476,7 +507,8 @@ class CodeGenMapper(pytato.transform.Mapper):
         # TODO: Respect tags.
 
         loopy_expr_context = LoopyExpressionContext(state,
-                local_namespace=expr.bindings)
+                local_namespace=expr.bindings,
+                num_indices=expr.ndim)
         loopy_expr = self.exprgen_mapper(expr.expr, loopy_expr_context)
 
         result = InlinedResult.from_loopy_expression(loopy_expr,
@@ -489,6 +521,9 @@ class CodeGenMapper(pytato.transform.Mapper):
 
 # {{{ inlined expression gen mapper
 
+INDEX_RE = re.compile("_(0|([1-9][0-9]*))")
+
+
 class InlinedExpressionGenMapper(scalar_expr.IdentityMapper):
     """A mapper for generating :mod:`loopy` expressions with inlined
     sub-expressions.
@@ -520,10 +555,18 @@ class InlinedExpressionGenMapper(scalar_expr.IdentityMapper):
 
     def map_variable(self, expr: prim.Variable,
             expr_context: LoopyExpressionContext) -> ScalarExpression:
-        result: ImplementedResult = self.codegen_mapper(
-                expr_context.lookup(expr.name),
-                expr_context.state)
-        return result.to_loopy_expression((), expr_context)
+        match = INDEX_RE.fullmatch(expr.name)
+        if match:
+            # Found an index of the form _0, _1, ...
+            index = int(match.group(1))
+            if not (0 <= index < expr_context.num_indices):
+                raise ValueError(f"invalid index encountered: _{index}")
+            return expr
+        else:
+            array = expr_context.lookup(expr.name)
+            impl_result: ImplementedResult = self.codegen_mapper(array,
+                    expr_context.state)
+            return impl_result.to_loopy_expression((), expr_context)
 
 # }}}
 
@@ -606,7 +649,7 @@ def add_store(name: str, expr: Array, result: ImplementedResult,
             state.var_name_gen(f"{name}_dim{d}")
             for d in range(expr.ndim))
     indices = tuple(prim.Variable(iname) for iname in inames)
-    loopy_expr_context = LoopyExpressionContext(state)
+    loopy_expr_context = LoopyExpressionContext(state, num_indices=0)
     loopy_expr = result.to_loopy_expression(indices, loopy_expr_context)
 
     # Rename reductions to names suitable as inames.
@@ -689,6 +732,35 @@ def rename_reductions(
     loopy_expr_context.reduction_bounds = new_reduction_bounds
     return result
 
+
+def get_initial_codegen_state(namespace: Namespace, target: Target,
+        options: Optional[lp.Options]) -> CodeGenState:
+    kernel = lp.make_kernel("{:}", [],
+            target=target.get_loopy_target(),
+            options=options,
+            lang_version=lp.MOST_RECENT_LANGUAGE_VERSION)
+
+    return CodeGenState(namespace=namespace,
+            _kernel=kernel,
+            results=dict())
+
+
+@dataclasses.dataclass(init=True, repr=False, eq=False)
+class PreprocessResult:
+    outputs: DictOfNamedArrays
+    bound_arguments: Dict[str, DataInterface]
+
+
+def preprocess(outputs: DictOfNamedArrays) -> PreprocessResult:
+    """Preprocess a computation for code generation."""
+    mapper = CodeGenPreprocessor(Namespace())
+
+    from pytato.transform import copy_dict_of_named_arrays
+    new_outputs = copy_dict_of_named_arrays(outputs, mapper)
+
+    return PreprocessResult(outputs=new_outputs,
+            bound_arguments=mapper.bound_arguments)
+
 # }}}
 
 
@@ -704,16 +776,14 @@ def generate_loopy(result: Union[Array, DictOfNamedArrays],
     """
     # {{{ get namespace and outputs
 
-    outputs: DictOfNamedArrays
+    orig_outputs: DictOfNamedArrays
 
     if isinstance(result, Array):
-        outputs = DictOfNamedArrays({"_pt_out": result})
-        namespace = outputs.namespace
+        orig_outputs = DictOfNamedArrays({"_pt_out": result})
     else:
         assert isinstance(result, DictOfNamedArrays)
-        outputs = result
+        orig_outputs = result
 
-    namespace = outputs.namespace
     del result
 
     # }}}
@@ -721,15 +791,11 @@ def generate_loopy(result: Union[Array, DictOfNamedArrays],
     if target is None:
         target = PyOpenCLTarget()
 
-    # Set up codegen state.
-    kernel = lp.make_kernel("{:}", [],
-            target=target.get_loopy_target(),
-            options=options,
-            lang_version=lp.MOST_RECENT_LANGUAGE_VERSION)
+    preproc_result = preprocess(orig_outputs)
+    outputs = preproc_result.outputs
+    namespace = outputs.namespace
 
-    state = CodeGenState(namespace=namespace,
-            _kernel=kernel,
-            results=dict())
+    state = get_initial_codegen_state(namespace, target, options)
 
     # Reserve names of input and output arguments.
     for val in namespace.values():
@@ -738,20 +804,14 @@ def generate_loopy(result: Union[Array, DictOfNamedArrays],
     state.var_name_gen.add_names(outputs)
 
     # Generate code for graph nodes.
-    mapper = CodeGenMapper()
+    cg_mapper = CodeGenMapper()
     for name, val in namespace.items():
-        _ = mapper(val, state)
+        _ = cg_mapper(val, state)
 
     # Generate code for outputs.
     for name, expr in outputs.items():
-        add_store(name, expr, mapper(expr, state), state)
-
-    # Collect bound arguments.
-    bound_arguments = {}
-    for name, val in namespace.items():
-        if isinstance(val, DataWrapper):
-            bound_arguments[name] = val.data
+        add_store(name, expr, cg_mapper(expr, state), state)
 
     return target.bind_program(
             program=state.kernel,
-            bound_arguments=bound_arguments)
+            bound_arguments=preproc_result.bound_arguments)
diff --git a/pytato/transform.py b/pytato/transform.py
index 1354665..8121358 100644
--- a/pytato/transform.py
+++ b/pytato/transform.py
@@ -24,9 +24,11 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 THE SOFTWARE.
 """
 
-from typing import Any, Callable
+from typing import Any, Callable, Dict
 
-from pytato.array import Array
+from pytato.array import (
+        Array, IndexLambda, Namespace, Placeholder, MatrixProduct, Stack,
+        Roll, AxisPermutation, Slice, DataWrapper, SizeParam, DictOfNamedArrays)
 
 __doc__ = """
 .. currentmodule:: pytato.transform
@@ -34,7 +36,9 @@ __doc__ = """
 Transforming Computations
 -------------------------
 
-.. autoclass:: Mapper
+.. autoclass:: CopyMapper
+.. autofunction:: copy_namespace
+.. autofunction:: copy_dict_of_named_arrays
 
 """
 
@@ -59,7 +63,7 @@ class Mapper:
         raise ValueError("%s encountered invalid foreign object: %s"
                 % (type(self).__name__, repr(expr)))
 
-    def __call__(self, expr: Array, *args: Any, **kwargs: Any) -> Any:
+    def rec(self, expr: Array, *args: Any, **kwargs: Any) -> Any:
         method: Callable[..., Array]
 
         try:
@@ -72,9 +76,116 @@ class Mapper:
 
         return method(expr, *args, **kwargs)
 
-    rec = __call__
+    def __call__(self, expr: Array, *args: Any, **kwargs: Any) -> Any:
+        return self.rec(expr, *args, **kwargs)
+
+
+class CopyMapper(Mapper):
+
+    def __init__(self, namespace: Namespace):
+        self.namespace = namespace
+        self.cache: Dict[Array, Array] = {}
+
+    def rec(self, expr: Array) -> Array:  # type: ignore
+        if expr in self.cache:
+            return self.cache[expr]
+        result: Array = super().rec(expr)
+        self.cache[expr] = result
+        return result
+
+    def map_index_lambda(self, expr: IndexLambda) -> Array:
+        bindings = {
+                name: self.rec(subexpr)
+                for name, subexpr in expr.bindings.items()}
+        return IndexLambda(namespace=self.namespace,
+                expr=expr.expr,
+                shape=expr.shape,
+                dtype=expr.dtype,
+                bindings=bindings,
+                tags=expr.tags)
+
+    def map_placeholder(self, expr: Placeholder) -> Array:
+        return Placeholder(namespace=self.namespace,
+                name=expr.name,
+                shape=expr.shape,
+                dtype=expr.dtype,
+                tags=expr.tags)
+
+    def map_matrix_product(self, expr: MatrixProduct) -> Array:
+        return MatrixProduct(x1=self.rec(expr.x1),
+                x2=self.rec(expr.x2),
+                tags=expr.tags)
+
+    def map_stack(self, expr: Stack) -> Array:
+        arrays = tuple(self.rec(arr) for arr in expr.arrays)
+        return Stack(arrays=arrays, axis=expr.axis, tags=expr.tags)
+
+    def map_roll(self, expr: Roll) -> Array:
+        return Roll(array=self.rec(expr.array),
+                shift=expr.shift,
+                axis=expr.axis,
+                tags=expr.tags)
+
+    def map_axis_permutation(self, expr: AxisPermutation) -> Array:
+        return AxisPermutation(array=self.rec(expr.array),
+                axes=expr.axes,
+                tags=expr.tags)
+
+    def map_slice(self, expr: Slice) -> Array:
+        return Slice(array=self.rec(expr.array),
+                begin=expr.begin,
+                size=expr.size,
+                tags=expr.tags)
+
+    def map_data_wrapper(self, expr: DataWrapper) -> Array:
+        return DataWrapper(namespace=self.namespace,
+                name=expr.name,
+                data=expr.data,
+                shape=expr.shape,
+                tags=expr.tags)
+
+    def map_size_param(self, expr: SizeParam) -> Array:
+        return SizeParam(self.namespace, name=expr.name, tags=expr.tags)
 
 # }}}
 
 
+# {{{ mapper frontends
+
+def copy_namespace(source_namespace: Namespace,
+        copy_mapper: CopyMapper) -> Namespace:
+    """Copy the elements of *namespace* into a new namespace.
+
+    :param source_namespace: The namespace to copy
+    :param copy_mapper: A mapper that performs copies into a new namespace
+    :returns: A new namespace containing copies of the items in *source_namespace*
+    """
+    for name, val in source_namespace.items():
+        mapped_val = copy_mapper(val)
+        if name not in copy_mapper.namespace:
+            copy_mapper.namespace.assign(name, mapped_val)
+    return copy_mapper.namespace
+
+
+def copy_dict_of_named_arrays(source_dict: DictOfNamedArrays,
+        copy_mapper: CopyMapper) -> DictOfNamedArrays:
+    """Copy the elements of a :class:`~pytato.DictOfNamedArrays` into a
+    :class:`~pytato.DictOfNamedArrays` with a new namespace.
+
+    :param source_dict: The :class:`~pytato.DictOfNamedArrays` to copy
+    :param copy_mapper: A mapper that performs copies into a new namespace
+    :returns: A new :class:`~pytato.DictOfNamedArrays` containing copies of the
+        items in *source_dict*
+    """
+    if not source_dict:
+        return DictOfNamedArrays({})
+
+    data = {}
+    copy_namespace(source_dict.namespace, copy_mapper)
+    for name, val in source_dict.items():
+        data[name] = copy_mapper(val)
+    return DictOfNamedArrays(data)
+
+# }}}
+
 # vim: foldmethod=marker
diff --git a/test/test_transform.py b/test/test_transform.py
new file mode 100755
index 0000000..cf69e29
--- /dev/null
+++ b/test/test_transform.py
@@ -0,0 +1,57 @@
+#!/usr/bin/env python
+
+__copyright__ = "Copyright (C) 2020 Andreas Kloeckner"
+
+__license__ = """
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+"""
+
+import sys
+
+import numpy as np
+import pyopencl as cl  # noqa
+import pyopencl.array as cl_array  # noqa
+import pyopencl.cltypes as cltypes  # noqa
+import pyopencl.tools as cl_tools  # noqa
+from pyopencl.tools import (  # noqa
+        pytest_generate_tests_for_pyopencl as pytest_generate_tests)
+import pytest  # noqa
+
+import pytato as pt
+
+
+def test_namespace_copy():
+    namespace = pt.Namespace()
+    x = pt.Placeholder(namespace, "x", (5,), np.int)
+    namespace.assign("xsquared", x * x)
+
+    namespace_copy = namespace.copy()
+    assert len(namespace_copy) == 2
+    assert isinstance(namespace_copy["x"], pt.Placeholder)
+    assert isinstance(namespace_copy["xsquared"], pt.IndexLambda)
+
+
+if __name__ == "__main__":
+    if len(sys.argv) > 1:
+        exec(sys.argv[1])
+    else:
+        from pytest import main
+        main([__file__])
+
+# vim: filetype=pyopencl:fdm=marker
-- 
GitLab


From a79f4ca9a4f86421f3f77db5c88ae9033c384490 Mon Sep 17 00:00:00 2001
From: Matt Wala <wala1@illinois.edu>
Date: Sun, 2 Aug 2020 19:18:03 -0500
Subject: [PATCH 47/52] Fix comments

---
 pytato/codegen.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/pytato/codegen.py b/pytato/codegen.py
index 47d34ce..5a62bb2 100644
--- a/pytato/codegen.py
+++ b/pytato/codegen.py
@@ -186,10 +186,10 @@ class CodeGenPreprocessor(CopyMapper):
         return tuple(var(f"_{d}") + expr.begin[d] for d in range(expr.ndim))
 
     # https://github.com/python/mypy/issues/8619
-    map_roll = partialmethod(handle_index_remapping, _indices_for_roll)  # noqa type: ignore
+    map_roll = partialmethod(handle_index_remapping, _indices_for_roll)  # type: ignore  # noqa
     map_axis_permutation = (
-            partialmethod(handle_index_remapping, _indices_for_axis_permutation))  # noqa type: ignore
-    map_slice = partialmethod(handle_index_remapping, _indices_for_slice)  # noqa type: ignore
+            partialmethod(handle_index_remapping, _indices_for_axis_permutation))  # type: ignore  # noqa
+    map_slice = partialmethod(handle_index_remapping, _indices_for_slice)  # type: ignore  # noqa
 
     # }}}
 
-- 
GitLab


From 031615c977a614c863fb6eb279693236bad45856 Mon Sep 17 00:00:00 2001
From: Andreas Kloeckner <inform@tiker.net>
Date: Thu, 20 Aug 2020 17:05:32 -0500
Subject: [PATCH 48/52] Privatize _make_slice

---
 pytato/array.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/pytato/array.py b/pytato/array.py
index 87f4e3d..f6f6471 100644
--- a/pytato/array.py
+++ b/pytato/array.py
@@ -118,7 +118,6 @@ Node constructors such as :class:`Placeholder.__init__` and
 .. autofunction:: make_placeholder
 .. autofunction:: make_size_param
 .. autofunction:: make_data_wrapper
-.. autofunction:: make_slice
 """
 
 # }}}
@@ -559,7 +558,7 @@ class Array:
             else:
                 raise ValueError("unknown index along dimension")
 
-        slice_ = make_slice(self, starts, sizes)
+        slice_ = _make_slice(self, starts, sizes)
 
         if len(kept_dims) != self.ndim:
             # Return an IndexLambda that elides the indexed-into dimensions
@@ -1422,7 +1421,7 @@ def stack(arrays: Sequence[Array], axis: int = 0) -> Array:
     return Stack(tuple(arrays), axis)
 
 
-def make_slice(array: Array, begin: Sequence[int], size: Sequence[int]) -> Array:
+def _make_slice(array: Array, begin: Sequence[int], size: Sequence[int]) -> Array:
     """Extract a constant-sized slice from an array with constant offsets.
 
     :param array: input array
-- 
GitLab


From a8be07145a8ad20398056a2b21b8ee2d0591ec0b Mon Sep 17 00:00:00 2001
From: Andreas Kloeckner <inform@tiker.net>
Date: Thu, 20 Aug 2020 17:07:03 -0500
Subject: [PATCH 49/52] Add underscore to Array._mapper_method

---
 pytato/array.py     | 20 ++++++++++----------
 pytato/transform.py |  2 +-
 2 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/pytato/array.py b/pytato/array.py
index f6f6471..02e48e3 100644
--- a/pytato/array.py
+++ b/pytato/array.py
@@ -470,7 +470,7 @@ class Array:
     .. attribute:: ndim
 
     """
-    mapper_method: ClassVar[str]
+    _mapper_method: ClassVar[str]
     # A tuple of field names. Fields must be equality comparable and
     # hashable. Dicts of hashable keys and values are also permitted.
     fields: ClassVar[Tuple[str, ...]] = ("shape", "dtype", "tags")
@@ -856,7 +856,7 @@ class IndexLambda(_SuppliedShapeAndDtypeMixin, Array):
     """
 
     fields = Array.fields + ("expr", "bindings")
-    mapper_method = "map_index_lambda"
+    _mapper_method = "map_index_lambda"
 
     def __init__(self,
             namespace: Namespace,
@@ -938,7 +938,7 @@ class MatrixProduct(Array):
     """
     fields = Array.fields + ("x1", "x2")
 
-    mapper_method = "map_matrix_product"
+    _mapper_method = "map_matrix_product"
 
     def __init__(self,
             x1: Array,
@@ -992,7 +992,7 @@ class Stack(Array):
     """
 
     fields = Array.fields + ("arrays", "axis")
-    mapper_method = "map_stack"
+    _mapper_method = "map_stack"
 
     def __init__(self,
             arrays: Tuple[Array, ...],
@@ -1065,7 +1065,7 @@ class Roll(IndexRemappingBase):
         Shift axis.
     """
     fields = IndexRemappingBase.fields + ("shift", "axis")
-    mapper_method = "map_roll"
+    _mapper_method = "map_roll"
 
     def __init__(self,
             array: Array,
@@ -1093,7 +1093,7 @@ class AxisPermutation(IndexRemappingBase):
         A permutation of the input axes.
     """
     fields = IndexRemappingBase.fields + ("axes",)
-    mapper_method = "map_axis_permutation"
+    _mapper_method = "map_axis_permutation"
 
     def __init__(self,
             array: Array,
@@ -1132,7 +1132,7 @@ class Slice(IndexRemappingBase):
     .. attribute:: size
     """
     fields = IndexRemappingBase.fields + ("begin", "size")
-    mapper_method = "map_slice"
+    _mapper_method = "map_slice"
 
     def __init__(self,
             array: Array,
@@ -1236,7 +1236,7 @@ class DataWrapper(InputArgumentBase):
         this array may not be updated in-place.
     """
 
-    mapper_method = "map_data_wrapper"
+    _mapper_method = "map_data_wrapper"
 
     def __init__(self,
             namespace: Namespace,
@@ -1267,7 +1267,7 @@ class Placeholder(_SuppliedShapeAndDtypeMixin, InputArgumentBase):
     user during evaluation.
     """
 
-    mapper_method = "map_placeholder"
+    _mapper_method = "map_placeholder"
 
     def __init__(self,
             namespace: Namespace,
@@ -1291,7 +1291,7 @@ class SizeParam(InputArgumentBase):
     expressions for array sizes.
     """
 
-    mapper_method = "map_size_param"
+    _mapper_method = "map_size_param"
 
     @property
     def shape(self) -> ShapeType:
diff --git a/pytato/transform.py b/pytato/transform.py
index 8121358..1973974 100644
--- a/pytato/transform.py
+++ b/pytato/transform.py
@@ -67,7 +67,7 @@ class Mapper:
         method: Callable[..., Array]
 
         try:
-            method = getattr(self, expr.mapper_method)
+            method = getattr(self, expr._mapper_method)
         except AttributeError:
             if isinstance(expr, Array):
                 return self.handle_unsupported_array(expr, *args, **kwargs)
-- 
GitLab


From e3a024c2c62ec8347e76067a1686e8b6333fff03 Mon Sep 17 00:00:00 2001
From: Andreas Kloeckner <inform@tiker.net>
Date: Thu, 20 Aug 2020 17:08:00 -0500
Subject: [PATCH 50/52] Add underscore to Array._fields

---
 pytato/array.py | 24 ++++++++++++------------
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/pytato/array.py b/pytato/array.py
index 02e48e3..17355f2 100644
--- a/pytato/array.py
+++ b/pytato/array.py
@@ -473,7 +473,7 @@ class Array:
     _mapper_method: ClassVar[str]
     # A tuple of field names. Fields must be equality comparable and
     # hashable. Dicts of hashable keys and values are also permitted.
-    fields: ClassVar[Tuple[str, ...]] = ("shape", "dtype", "tags")
+    _fields: ClassVar[Tuple[str, ...]] = ("shape", "dtype", "tags")
 
     def __init__(self, tags: Optional[TagsType] = None):
         if tags is None:
@@ -608,7 +608,7 @@ class Array:
     @memoize_method
     def __hash__(self) -> int:
         attrs = []
-        for field in self.fields:
+        for field in self._fields:
             attr = getattr(self, field)
             if isinstance(attr, dict):
                 attr = frozenset(attr.items())
@@ -623,7 +623,7 @@ class Array:
                 and self.namespace is other.namespace
                 and all(
                     getattr(self, field) == getattr(other, field)
-                    for field in self.fields))
+                    for field in self._fields))
 
     def __ne__(self, other: Any) -> bool:
         return not self.__eq__(other)
@@ -855,7 +855,7 @@ class IndexLambda(_SuppliedShapeAndDtypeMixin, Array):
     .. automethod:: is_reference
     """
 
-    fields = Array.fields + ("expr", "bindings")
+    _fields = Array._fields + ("expr", "bindings")
     _mapper_method = "map_index_lambda"
 
     def __init__(self,
@@ -936,7 +936,7 @@ class MatrixProduct(Array):
     .. [pep465] https://www.python.org/dev/peps/pep-0465/
 
     """
-    fields = Array.fields + ("x1", "x2")
+    _fields = Array._fields + ("x1", "x2")
 
     _mapper_method = "map_matrix_product"
 
@@ -991,7 +991,7 @@ class Stack(Array):
 
     """
 
-    fields = Array.fields + ("arrays", "axis")
+    _fields = Array._fields + ("arrays", "axis")
     _mapper_method = "map_stack"
 
     def __init__(self,
@@ -1032,7 +1032,7 @@ class IndexRemappingBase(Array):
         The input :class:`~pytato.Array`
 
     """
-    fields = Array.fields + ("array",)
+    _fields = Array._fields + ("array",)
 
     def __init__(self,
             array: Array,
@@ -1064,7 +1064,7 @@ class Roll(IndexRemappingBase):
 
         Shift axis.
     """
-    fields = IndexRemappingBase.fields + ("shift", "axis")
+    _fields = IndexRemappingBase._fields + ("shift", "axis")
     _mapper_method = "map_roll"
 
     def __init__(self,
@@ -1092,7 +1092,7 @@ class AxisPermutation(IndexRemappingBase):
 
         A permutation of the input axes.
     """
-    fields = IndexRemappingBase.fields + ("axes",)
+    _fields = IndexRemappingBase._fields + ("axes",)
     _mapper_method = "map_axis_permutation"
 
     def __init__(self,
@@ -1131,7 +1131,7 @@ class Slice(IndexRemappingBase):
     .. attribute:: begin
     .. attribute:: size
     """
-    fields = IndexRemappingBase.fields + ("begin", "size")
+    _fields = IndexRemappingBase._fields + ("begin", "size")
     _mapper_method = "map_slice"
 
     def __init__(self,
@@ -1171,8 +1171,8 @@ class InputArgumentBase(Array):
     """
 
     # The name uniquely identifies this object in the namespace. Therefore,
-    # subclasses don't have to update *fields*.
-    fields = ("name",)
+    # subclasses don't have to update *_fields*.
+    _fields = ("name",)
 
     def __init__(self,
             namespace: Namespace,
-- 
GitLab


From 400fe784b5ef333ccc0c8bf6e93e63c6085e08e1 Mon Sep 17 00:00:00 2001
From: Andreas Kloeckner <inform@tiker.net>
Date: Thu, 20 Aug 2020 17:08:18 -0500
Subject: [PATCH 51/52] Add an assert and some FIXMEs

---
 pytato/array.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/pytato/array.py b/pytato/array.py
index 17355f2..42070f5 100644
--- a/pytato/array.py
+++ b/pytato/array.py
@@ -528,6 +528,7 @@ class Array:
             elif elem is None:
                 raise NotImplementedError("newaxis is unsupported")
             else:
+                assert isinstance(elem, (int, slice))
                 slice_spec_expanded.append(elem)
 
         slice_spec_expanded.extend(
@@ -572,6 +573,7 @@ class Array:
             if indices:
                 expr = expr[tuple(indices)]
 
+            # FIXME: Flatten into a single IndexLambda
             return IndexLambda(self.namespace,
                     expr,
                     shape=tuple(shape),
@@ -1461,6 +1463,7 @@ def _make_slice(array: Array, begin: Sequence[int], size: Sequence[int]) -> Arra
         if sval < 0 or not (0 <= bval + sval <= ubound):
             raise ValueError("index '%d' of 'size' out of bounds" % i)
 
+    # FIXME: Generate IndexLambda when possible
     return Slice(array, tuple(begin), tuple(size))
 
 
-- 
GitLab


From 88ccedc9b1398a8c39f5736c8df9b85ea4ad6e61 Mon Sep 17 00:00:00 2001
From: Andreas Kloeckner <inform@tiker.net>
Date: Thu, 20 Aug 2020 17:12:09 -0500
Subject: [PATCH 52/52] Codegen comments: reductions -> reduction variables

---
 pytato/codegen.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/pytato/codegen.py b/pytato/codegen.py
index 5a62bb2..ad4fbc5 100644
--- a/pytato/codegen.py
+++ b/pytato/codegen.py
@@ -432,7 +432,7 @@ class CodeGenMapper(Mapper):
         shape = []
         for component in expr.shape:
             shape.append(self.exprgen_mapper(component, shape_context))
-            # Not supported yet.
+            # Data-dependent shape: Not supported yet.
             assert not shape_context.depends_on
             assert not shape_context.reduction_bounds
 
@@ -652,7 +652,7 @@ def add_store(name: str, expr: Array, result: ImplementedResult,
     loopy_expr_context = LoopyExpressionContext(state, num_indices=0)
     loopy_expr = result.to_loopy_expression(indices, loopy_expr_context)
 
-    # Rename reductions to names suitable as inames.
+    # Rename reduction variables to names suitable as inames.
     loopy_expr = rename_reductions(
             loopy_expr, loopy_expr_context,
             lambda old_name: state.var_name_gen(f"{name}{old_name}"))
@@ -712,8 +712,8 @@ def rename_reductions(
         loopy_expr: ScalarExpression,
         loopy_expr_context: LoopyExpressionContext,
         var_name_gen: Callable[[str], str]) -> ScalarExpression:
-    """Rename the reductions in *loopy_expr* and *loopy_expr_context* using the
-    callable *var_name_gen.*
+    """Rename the reduction variables in *loopy_expr* and *loopy_expr_context*
+    using the callable *var_name_gen.*
     """
     new_reduction_inames = tuple(
             var_name_gen(old_iname)
-- 
GitLab