From 7b1fa24de1b42a37a2ca80e9bd7cf5fe7fa70a65 Mon Sep 17 00:00:00 2001
From: Matt Wala <wala1@illinois.edu>
Date: Thu, 9 Apr 2020 12:10:45 -0500
Subject: [PATCH 01/46] Add stub of test_array

---
 test/test_array.py | 48 ++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 48 insertions(+)
 create mode 100644 test/test_array.py

diff --git a/test/test_array.py b/test/test_array.py
new file mode 100644
index 0000000..98de63e
--- /dev/null
+++ b/test/test_array.py
@@ -0,0 +1,48 @@
+#!/usr/bin/env python
+
+__copyright__ = "Copyright (C) 2020 Matt Wala"
+
+__license__ = """
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+"""
+
+import numpy as np
+import numpy.linalg as la
+import sys
+
+import pytest
+
+import pyopencl as cl
+import pyopencl.array as cl_array
+import pyopencl.cltypes as cltypes
+import pyopencl.tools as cl_tools
+from pyopencl.tools import (  # noqa
+        pytest_generate_tests_for_pyopencl as pytest_generate_tests)
+
+
+if __name__ == "__main__":
+    # make sure that import failures get reported, instead of skipping the
+    # tests.
+    if len(sys.argv) > 1:
+        exec(sys.argv[1])
+    else:
+        from pytest import main
+        main([__file__])
+
+# vim: filetype=pyopencl:fdm=marker
-- 
GitLab


From 75bc91bc972d894a016d9bdfa5c236ebd947f308 Mon Sep 17 00:00:00 2001
From: Matt Wala <wala1@illinois.edu>
Date: Thu, 9 Apr 2020 17:36:32 -0500
Subject: [PATCH 02/46] WIP: Basic symbolic array support

---
 arrayzy/__init__.py |   2 +-
 arrayzy/array.py    | 164 ++++++++++++++++++++++++++++++++++++--------
 test/test_array.py  |  32 +++++++++
 3 files changed, 169 insertions(+), 29 deletions(-)

diff --git a/arrayzy/__init__.py b/arrayzy/__init__.py
index a2831ea..b7028a0 100644
--- a/arrayzy/__init__.py
+++ b/arrayzy/__init__.py
@@ -1 +1 @@
-from arrayzy.array import Array, Context, make_context
+from arrayzy.array import Array, Context, make_context, make_sym
diff --git a/arrayzy/array.py b/arrayzy/array.py
index a365f31..c52c82e 100644
--- a/arrayzy/array.py
+++ b/arrayzy/array.py
@@ -20,8 +20,14 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 THE SOFTWARE.
 """
 
+import islpy as isl
 import loopy as lp
+import numpy as np
+
+import pymbolic
 import pymbolic.primitives as prim
+from pytools import memoize_method
+from pyopencl.compyte.array import get_common_dtype
 
 
 class Context:
@@ -44,7 +50,8 @@ class Context:
         *program* may not define any names starting with underscores.
     """
 
-    def __init__(self, program, bindings, target):
+    def __init__(self, queue, program, bindings, target):
+        self.queue = queue
         self._program = program
         self.bindings = bindings
         self.target = target
@@ -53,28 +60,33 @@ class Context:
     # update_program.
     @property
     def program(self):
-        self.program
+        return self._program
+
+    @property
+    def parameters(self):
+        pass
 
     def update_program(self, program):
-        self.program = program
+        self._program = program
 
     def get_parameter(self, name):
-        if name in self.program.all_variable_names():
-            if name not in self.program.all_params():
-                # FIXME: ... data dependent control flow?
-                raise ValueError(
-                        f"'{name}' is not a domain parameter "
-                        "in this context")
-
-            else:
-                return prim.Variable(name)
-        else:
+        if name not in self.program.all_variable_names():
             self.update_program(
                     self.program.copy(
-                        arguments=self.program.args + [
+                        args=self.program.args + [
                             lp.ValueArg(name, dtype=self.program.index_dtype)
                             ]))
 
+        """
+        if name not in self.program.all_params():
+            # FIXME: ... data dependent control flow?
+            raise ValueError(
+                    f"'{name}' is not a domain parameter "
+                    "in this context")
+        """
+
+        return prim.Variable(name)
+
 
 class Target:
     pass
@@ -104,9 +116,9 @@ def make_context(arg):
         raise ValueError(f"invalid argument type: {type(arg).__name__}")
 
     import loopy as lp
-    program = lp.make_kernel("{[]:}", [], target=target.get_loopy_target())
+    program = lp.make_kernel("{:}", [], target=target.get_loopy_target())
 
-    return Context(program, {}, target)
+    return Context(arg, program, {}, target)
 
 
 class Array:
@@ -121,8 +133,11 @@ class Array:
         An instance of :class:`loopy.types.LoopyType` or *None* to indicate
         that the type of the array is not yet known.
 
-    .. attribute:: expression
+    .. attribute:: is_materialized
 
+        Whether this array is backed by actual storage.
+
+    .. attribute:: expression
     """
 
     def __init__(self, context, shape, dtype, expression):
@@ -135,34 +150,127 @@ class Array:
         pass
 
     def eval(self, **kwargs):
+        _, (out,) = self._knl()(self.context.queue, **kwargs)
+        return out
+
+    @property
+    def _dim_names(self):
+        return tuple(f"_{i}" for i in range(len(self.shape)))
+
+    def __getitem__(self, indices):
+        # TODO
         pass
 
+    @property
+    def T(self):
+        def swap_last_two_dims(arr):
+            if len(arr) < 2:
+                return arr
+            arr_copy = list(arr)
+            arr_copy[-2], arr_copy[-1] = arr_copy[-1], arr_copy[-2]
+            return type(arr)(arr_copy)
+
+        index_map = dict(
+                zip(self._dim_names,
+                    map(prim.Variable, swap_last_two_dims(self._dim_names))))
+
+        expression = pymbolic.substitute(self.expression, index_map)
+        shape = swap_last_two_dims(self.shape)
+
+        return Array(self.context, shape, self.dtype, expression)
+
+    def __mul__(self, other):
+        if not np.isscalar(other):
+            raise TypeError("only scalar multiplication supported")
+        return Array(
+                self.context,
+                self.shape,
+                get_common_dtype(self, other, allow_double=True),
+                other * self.expression)
+
+    __rmul__ = __mul__
+
+    @property
+    def ndim(self):
+        return len(self.shape)
+
+    @memoize_method
+    def _knl(self):
+        knl = self.context.program
+
+        out = lp.GlobalArg("_out", shape=self.shape, dtype=self.dtype,
+                           order="C")
+
+        # FIXME: Won't work for scalars.
+        out_inames = [f"_out_{i}" for i in range(self.ndim)]
+
+        out_expr = pymbolic.substitute(
+                self.expression,
+                dict(zip(self._dim_names, map(prim.Variable, out_inames))))
+
+        # Build output domain.
+        params = []
+        from loopy.symbolic import get_dependencies
+        for sdep in map(get_dependencies, self.shape):
+            for dep in sdep:
+                params.append(self.context.get_parameter(dep).name)
+
+        dom = isl.BasicSet.universe(
+                isl.Space.create_from_names(
+                    isl.DEFAULT_CONTEXT,
+                    set=out_inames,
+                    params=params))
+
+        from loopy.symbolic import aff_from_expr
+        affs = isl.affs_from_space(dom.space)
+        for iname, expr in zip(out_inames, self.shape):
+            dom &= affs[0].le_set(affs[iname])
+            dom &= affs[iname].lt_set(aff_from_expr(dom.space, expr))
+        dom, = dom.get_basic_sets()
+
+        # Build output instruction.
+        from loopy.kernel.instruction import make_assignment
+        out_insn = make_assignment(
+                (prim.Variable("_out")[tuple(map(prim.Variable, out_inames))],),
+                out_expr,
+                id="_out",
+                within_inames=frozenset(out_inames))
+
+        return knl.copy(
+                domains=knl.domains + [dom],
+                instructions=knl.instructions + [out_insn],
+                args=knl.args + [out])
+
     def store(self, prefix="tmp"):
+        """Stores the array in a temporary."""
         pass
 
 
-def make_sym(context, name, shape, dtype=None):
+def make_sym(context, name, shape, dtype=None, order="C"):
     if name in context.program.all_variable_names():
         raise ValueError(f"name '{name}' already in use in context")
 
-    arg = lp.ArrayArg(name, shape=shape, dtype=dtype)
-    from loopy.symbolic import get_dependencies
-    for sdep in get_dependencies(si for si in arg.shape):
-        context.get_parameter(sdep)
+    arg = lp.GlobalArg(name, shape=shape, dtype=dtype, order=order)
 
-    context.update_program(context.program.copy(
-        arguments=context.program.arguments + [arg]))
+    shape = arg.shape
+    dtype = arg.dtype
 
-    # TODO make sure "name" is not taken
+    from loopy.symbolic import get_dependencies
+    for sdep in map(get_dependencies, arg.shape):
+        for dep in sdep:
+            context.get_parameter(dep)
+
+    context.update_program(
+            context.program.copy(
+                args=context.program.args + [arg]))
 
     v_name = prim.Variable(name)
-    subscripts = tuple(prim.Variable(???) for i in range(len(shape)))
+    subscripts = tuple(prim.Variable(f"_{i}") for i in range(len(shape)))
 
-    return Array(context, expression=v_name[subscripts])
+    return Array(context, shape, dtype, expression=v_name[subscripts])
 
 
 def zeros(context, shape, dtype):
     pass
 
-
 # vim: foldmethod=marker
diff --git a/test/test_array.py b/test/test_array.py
index 98de63e..bb39276 100644
--- a/test/test_array.py
+++ b/test/test_array.py
@@ -22,6 +22,8 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 THE SOFTWARE.
 """
 
+import arrayzy as az
+
 import numpy as np
 import numpy.linalg as la
 import sys
@@ -36,6 +38,36 @@ from pyopencl.tools import (  # noqa
         pytest_generate_tests_for_pyopencl as pytest_generate_tests)
 
 
+def test_symbolic_array(ctx_getter):
+    cl_ctx = ctx_getter()
+    queue = cl.CommandQueue(cl_ctx)
+    ctx = az.make_context(queue)
+    x = az.make_sym(ctx, "x", shape="n", dtype=np.float64)
+    x_in = cl.array.to_device(queue, np.array([1., 2., 3., 4., 5.]))
+    assert (x.eval(x=x_in).get() == x_in.get()).all()
+
+
+def test_transpose(ctx_getter):
+    cl_ctx = ctx_getter()
+    queue = cl.CommandQueue(cl_ctx)
+    ctx = az.make_context(queue)
+    x = az.make_sym(ctx, "x", shape="n,m", dtype=np.float64)
+    x_in = cl.array.to_device(
+            queue,
+            np.array([[1., 2., 3., 4., 5.], [6., 7., 8., 9., 10.]]))
+    assert (x.T.eval(x=x_in).get() == x_in.get().T).all()
+
+
+def test_scalar_multiply(ctx_getter):
+    cl_ctx = ctx_getter()
+    queue = cl.CommandQueue(cl_ctx)
+    ctx = az.make_context(queue)
+    x = az.make_sym(ctx, "x", shape="n", dtype=np.float64)
+    x_in = np.array([1., 2., 3., 4., 5.])
+    x_in = cl.array.to_device(queue, np.array([1., 2., 3., 4., 5.]))
+    assert ((2*x).eval(x=x_in).get() == (2*x_in).get()).all()
+
+
 if __name__ == "__main__":
     # make sure that import failures get reported, instead of skipping the
     # tests.
-- 
GitLab


From 53ad7902f64811f552f74320ea23a02c36286b4a Mon Sep 17 00:00:00 2001
From: Matt Wala <wala1@illinois.edu>
Date: Fri, 10 Apr 2020 16:24:24 -0500
Subject: [PATCH 03/46] Add a basic CI configuration

---
 .gitlab-ci.yml | 36 ++++++++++++++++++++++++++++++++++++
 1 file changed, 36 insertions(+)
 create mode 100644 .gitlab-ci.yml

diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
new file mode 100644
index 0000000..b8bbe44
--- /dev/null
+++ b/.gitlab-ci.yml
@@ -0,0 +1,36 @@
+Python 3 POCL:
+  script:
+  - export PY_EXE=python3
+  - export PYOPENCL_TEST=portable
+  - export EXTRA_INSTALL="pybind11 numpy mako"
+  - export LOOPY_NO_CACHE=1
+  - curl -L -O -k https://gitlab.tiker.net/inducer/ci-support/raw/master/build-and-test-py-project.sh
+  - ". ./build-and-test-py-project.sh"
+  tags:
+  - python3
+  - pocl
+  except:
+  - tags
+  artifacts:
+    reports:
+      junit: test/pytest.xml
+
+Pylint:
+  script:
+  - export PY_EXE=python3
+  - EXTRA_INSTALL="pybind11 numpy mako"
+  - curl -L -O -k https://gitlab.tiker.net/inducer/ci-support/raw/master/prepare-and-run-pylint.sh
+  - ". ./prepare-and-run-pylint.sh arrayzy test/test_*.py"
+  tags:
+  - python3
+  except:
+  - tags
+
+Flake8:
+  script:
+  - curl -L -O -k https://gitlab.tiker.net/inducer/ci-support/raw/master/prepare-and-run-flake8.sh
+  - ". ./prepare-and-run-flake8.sh arrayzy test"
+  tags:
+  - python3
+  except:
+  - tags
-- 
GitLab


From 13043b741ae0fcc4be232c2e89be39ecc7415e09 Mon Sep 17 00:00:00 2001
From: Matt Wala <wala1@illinois.edu>
Date: Fri, 10 Apr 2020 16:30:05 -0500
Subject: [PATCH 04/46] Add requirements.txt with pyopencl

---
 requirements.txt | 1 +
 1 file changed, 1 insertion(+)
 create mode 100644 requirements.txt

diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000..7819c26
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1 @@
+git+https://github.com/inducer/pyopencl.git
-- 
GitLab


From 76b0a756988abf6b910b4ec83d5599e01e99d948 Mon Sep 17 00:00:00 2001
From: Matt Wala <wala1@illinois.edu>
Date: Fri, 10 Apr 2020 16:45:03 -0500
Subject: [PATCH 05/46] Use loopy's type inference to get the type of a new
 array

---
 arrayzy/array.py | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/arrayzy/array.py b/arrayzy/array.py
index c52c82e..f3a6d0a 100644
--- a/arrayzy/array.py
+++ b/arrayzy/array.py
@@ -27,7 +27,6 @@ import numpy as np
 import pymbolic
 import pymbolic.primitives as prim
 from pytools import memoize_method
-from pyopencl.compyte.array import get_common_dtype
 
 
 class Context:
@@ -62,6 +61,12 @@ class Context:
     def program(self):
         return self._program
 
+    def _infer_type(self, expr):
+        """Infer the type of an expression in the kernel being built."""
+        from loopy.type_inference import TypeInferenceMapper
+        mapper = TypeInferenceMapper(self._program)
+        return mapper(expr)
+
     @property
     def parameters(self):
         pass
@@ -182,11 +187,12 @@ class Array:
     def __mul__(self, other):
         if not np.isscalar(other):
             raise TypeError("only scalar multiplication supported")
+        new_expr = other * self.expression
         return Array(
                 self.context,
                 self.shape,
-                get_common_dtype(self, other, allow_double=True),
-                other * self.expression)
+                self.context._infer_type(new_expr),
+                new_expr)
 
     __rmul__ = __mul__
 
-- 
GitLab


From 82b44deeee40c5c117276a21137e319aca59d6c4 Mon Sep 17 00:00:00 2001
From: Matt Wala <wala1@illinois.edu>
Date: Fri, 10 Apr 2020 16:54:04 -0500
Subject: [PATCH 06/46] Fix, test transpose for arbitrary dims

---
 arrayzy/array.py   | 11 ++---------
 test/test_array.py |  8 +++++---
 2 files changed, 7 insertions(+), 12 deletions(-)

diff --git a/arrayzy/array.py b/arrayzy/array.py
index f3a6d0a..11274a8 100644
--- a/arrayzy/array.py
+++ b/arrayzy/array.py
@@ -168,19 +168,12 @@ class Array:
 
     @property
     def T(self):
-        def swap_last_two_dims(arr):
-            if len(arr) < 2:
-                return arr
-            arr_copy = list(arr)
-            arr_copy[-2], arr_copy[-1] = arr_copy[-1], arr_copy[-2]
-            return type(arr)(arr_copy)
-
         index_map = dict(
                 zip(self._dim_names,
-                    map(prim.Variable, swap_last_two_dims(self._dim_names))))
+                    map(prim.Variable, reversed(self._dim_names))))
 
         expression = pymbolic.substitute(self.expression, index_map)
-        shape = swap_last_two_dims(self.shape)
+        shape = tuple(reversed(self.shape))
 
         return Array(self.context, shape, self.dtype, expression)
 
diff --git a/test/test_array.py b/test/test_array.py
index bb39276..6a483bb 100644
--- a/test/test_array.py
+++ b/test/test_array.py
@@ -47,14 +47,16 @@ def test_symbolic_array(ctx_getter):
     assert (x.eval(x=x_in).get() == x_in.get()).all()
 
 
-def test_transpose(ctx_getter):
+@pytest.mark.parametrize("dim", (1, 2, 3))
+def test_transpose(ctx_getter, dim):
     cl_ctx = ctx_getter()
     queue = cl.CommandQueue(cl_ctx)
     ctx = az.make_context(queue)
-    x = az.make_sym(ctx, "x", shape="n,m", dtype=np.float64)
+    shape = ("l", "m", "n")[-dim:]
+    x = az.make_sym(ctx, "x", shape=shape, dtype=np.float64)
     x_in = cl.array.to_device(
             queue,
-            np.array([[1., 2., 3., 4., 5.], [6., 7., 8., 9., 10.]]))
+            np.arange(24, dtype=float).reshape((2, 3, -1)[-dim:]))
     assert (x.T.eval(x=x_in).get() == x_in.get().T).all()
 
 
-- 
GitLab


From 7ffe02ffaaea273c26bec49158053f200344807e Mon Sep 17 00:00:00 2001
From: Matt Wala <wala1@illinois.edu>
Date: Fri, 10 Apr 2020 16:56:03 -0500
Subject: [PATCH 07/46] flake8 fixes

---
 arrayzy/__init__.py | 2 +-
 arrayzy/array.py    | 6 ++++--
 test/test_array.py  | 9 ++++-----
 3 files changed, 9 insertions(+), 8 deletions(-)

diff --git a/arrayzy/__init__.py b/arrayzy/__init__.py
index b7028a0..9d6dae8 100644
--- a/arrayzy/__init__.py
+++ b/arrayzy/__init__.py
@@ -1 +1 @@
-from arrayzy.array import Array, Context, make_context, make_sym
+from arrayzy.array import Array, Context, make_context, make_sym  # noqa
diff --git a/arrayzy/array.py b/arrayzy/array.py
index 11274a8..4dd2985 100644
--- a/arrayzy/array.py
+++ b/arrayzy/array.py
@@ -167,7 +167,7 @@ class Array:
         pass
 
     @property
-    def T(self):
+    def T(self):  # noqa
         index_map = dict(
                 zip(self._dim_names,
                     map(prim.Variable, reversed(self._dim_names))))
@@ -230,7 +230,9 @@ class Array:
         # Build output instruction.
         from loopy.kernel.instruction import make_assignment
         out_insn = make_assignment(
-                (prim.Variable("_out")[tuple(map(prim.Variable, out_inames))],),
+                (
+                    prim.Variable("_out")[
+                        tuple(map(prim.Variable, out_inames))],),
                 out_expr,
                 id="_out",
                 within_inames=frozenset(out_inames))
diff --git a/test/test_array.py b/test/test_array.py
index 6a483bb..14090f4 100644
--- a/test/test_array.py
+++ b/test/test_array.py
@@ -25,15 +25,14 @@ THE SOFTWARE.
 import arrayzy as az
 
 import numpy as np
-import numpy.linalg as la
 import sys
 
-import pytest
+import pytest  # noqa
 
 import pyopencl as cl
-import pyopencl.array as cl_array
-import pyopencl.cltypes as cltypes
-import pyopencl.tools as cl_tools
+import pyopencl.array as cl_array  # noqa
+import pyopencl.cltypes as cltypes  # noqa
+import pyopencl.tools as cl_tools  # noqa
 from pyopencl.tools import (  # noqa
         pytest_generate_tests_for_pyopencl as pytest_generate_tests)
 
-- 
GitLab


From 9ab172a95496ae77d3a57f39def303d5fd37170f Mon Sep 17 00:00:00 2001
From: Matt Wala <wala1@illinois.edu>
Date: Fri, 24 Apr 2020 12:11:33 -0500
Subject: [PATCH 08/46] Add initial advection demo

---
 experiments/advection.py | 474 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 474 insertions(+)
 create mode 100644 experiments/advection.py

diff --git a/experiments/advection.py b/experiments/advection.py
new file mode 100644
index 0000000..f0dc376
--- /dev/null
+++ b/experiments/advection.py
@@ -0,0 +1,474 @@
+import contextlib
+import numpy as np
+import numpy.linalg as la
+import numpy.polynomial.legendre as leg
+import pytest
+
+
+__doc__ = """
+Notation convention for operator shapes
+=======================================
+
+* m - number of elements in the discretization
+* n - number of volume degrees of freedom per element
+"""
+
+import functools
+
+
+memoized = functools.lru_cache(maxsize=None)
+
+
+def ortholegvander(x, deg):
+    """See numpy.polynomial.legendre.legvander(). Uses an orthonormal basis."""
+    result = leg.legvander(x, deg)
+    factors = np.array([np.sqrt((2*n+1)/2) for n in range(0, 1 + deg)])
+    return result * factors
+
+
+def ortholegder(c):
+    """See numpy.polynomial.legendre.legder(). Uses an orthonormal basis."""
+    fw_factors = np.array([np.sqrt((2*n+1)/2) for n in range(len(c))])
+    derivs = leg.legder(c * fw_factors)
+    return derivs / fw_factors[:len(derivs)]
+
+
+def ortholegval(x, c):
+    """See numpy.polynomial.legendre.legval(). Uses an orthonormal basis."""
+    factors = np.array([np.sqrt((2*n+1)/2) for n in range(len(c))])
+    return leg.legval(x, c * factors)
+
+
+class DGDiscr1D(object):
+    """A one-dimensional Discontinuous Galerkin discretization."""
+
+    def __init__(self, left, right, nelements, nnodes):
+        """
+        Inputs:
+            left - left endpoint
+            right - right endpoint
+            nelements - number of discretization panels
+            nnodes - number of degrees of freedom per panel
+        """
+        self.left = left
+        self.right = right
+        self.nelements = nelements
+        self.nnodes = nnodes
+
+    @property
+    @memoized
+    def ref_nodes(self):
+        """Return reference nodes for a single element.
+
+        Signature: ->(n,)
+        """
+        nodes, _ = leg.leggauss(self.nnodes)
+        return nodes
+
+    @property
+    @memoized
+    def ref_weights(self):
+        """Return reference quadrature weights for a single element.
+
+        Signature: ->(n,)
+        """
+        _, weights = leg.leggauss(self.nnodes)
+        return weights
+
+    def zeros(self):
+        """Return a zero solution.
+
+        Signature: ->(n*m,)
+        """
+        return np.zeros(self.nnodes * self.nelements)
+
+    @property
+    def h(self):
+        """Return the element size.
+
+        Signature: ->()
+        """
+        return self.elements[0,1] - self.elements[0,0]
+
+    def nodes(self):
+        """Return the vector of node coordinates.
+
+        Signature: ->(n*m,)
+        """
+        centers = (self.elements[:,0] + self.elements[:,1]) / 2
+        radii = (self.elements[:,1] - self.elements[:,0]) / 2
+        return ((self.ref_nodes[:,np.newaxis] * radii) + centers).T.ravel()
+
+    @property
+    @memoized
+    def vdm(self):
+        """Return the elementwise Vandermonde (modal-to-nodal) matrix.
+
+        Signature: ->(n, n)
+        """
+        return ortholegvander(self.ref_nodes, self.nnodes - 1)
+
+    @property
+    @memoized
+    def _ref_mass(self):
+        """Return the (volume) mass matrix for the reference element.
+
+        Signature: ->(n, n)
+        """
+        return la.inv(self.vdm @ self.vdm.T)
+
+    @property
+    @memoized
+    def mass(self):
+        """Return the elementwise volume mass matrix.
+
+        Signature: ->(n, n)
+        """
+        h = (self.right - self.left) / self.nelements
+        return (h/2) * self._ref_mass
+
+    @property
+    @memoized
+    def inv_mass(self):
+        """Return the inverse of the elementwise volume mass matrix.
+
+        Signature: ->(n, n)
+        """
+        return la.inv(self.mass)
+
+    @property
+    @memoized
+    def face_mass(self):
+        """Return the face mass matrix.
+
+        The face mass matrix combines the effects of applying the face mass
+        operator on each face and interpolating the output to the volume nodes.
+
+        Signature: ->(n, 2)
+        """
+        return self.interp.T
+
+    @property
+    @memoized
+    def diff(self):
+        """Return the elementwise differentiation matrix.
+
+        Signature: ->(n, n)
+        """
+        VrT = []
+        for row in np.eye(self.nnodes):
+            deriv = ortholegder(row)
+            VrT.append(ortholegval(self.ref_nodes, deriv))
+        Vr = np.vstack(VrT).T
+        return Vr @ la.inv(self.vdm)
+
+    @property
+    @memoized
+    def stiffness(self):
+        """Return the stiffness matrix.
+
+        Signature: ->(n, n)
+        """
+        return (self._ref_mass @ self.diff)
+
+    @property
+    @memoized
+    def interp(self):
+        """Return the volume-to-face interpolation matrix.
+
+        Signature: ->(2, n)
+        """
+        return ortholegvander([-1, 1], self.nnodes - 1) @ la.inv(self.vdm)
+
+    @property
+    @memoized
+    def elements(self):
+        """Return the list of elements, each given by their left/right boundaries.
+
+        Signature: ->(m, 2)
+        """
+        h = (self.right - self.left) / self.nelements
+        return np.array(list(zip(
+            np.linspace(self.left, self.right, self.nelements, endpoint=False),
+            np.linspace(h + self.left, self.right, self.nelements))))
+
+    @property
+    def dg_ops(self):
+        """Return a context manager yielding a DGOps1D instance.
+        """
+        return contextlib.contextmanager(lambda: (yield DGOps1DRef(self)))
+
+    @property
+    def normals(self):
+        """Return the face unit normals.
+
+        Signature: ->(m, 2)
+        """
+        result = np.zeros((self.nelements, 2))
+        result[:,0] = -1
+        result[:,1] = 1
+        return result
+
+
+def interpolate(discr, vec, nodes):
+    """Return an interpolated solution at *nodes*.
+
+    Input:
+        discr - a DGDiscr1D instance
+        vec - vector of nodal values at degrees of freedom
+        nodes - vector of nodes to interpolate to
+
+    Signature:  (m*n,) -> (len(nodes),)
+    """
+    elements = discr.elements
+    nelements = discr.nelements
+    nnodes = discr.nnodes
+    inv_vdm = la.inv(discr.vdm)
+
+    sorter = np.argsort(nodes)
+    sorted_nodes = nodes[sorter]
+    result = []
+
+    indices = np.searchsorted(sorted_nodes, elements)
+    for i, (start, end) in enumerate(indices):
+        if i == 0:
+            start = 0
+        elif i == nelements - 1:
+            end = len(nodes)
+
+        center = (elements[i][0] + elements[i][1]) / 2
+        radius = (elements[i][1] - elements[i][0]) / 2
+        element_nodes = sorted_nodes[start:end]
+        mapped_nodes = (element_nodes - center) / radius
+
+        modal_vals = inv_vdm @ vec[i * nnodes:(i + 1) * nnodes]
+        nodal_vals = ortholegvander(mapped_nodes, nnodes - 1) @ modal_vals
+        result.append(nodal_vals)
+
+    result = np.hstack(result)
+    unsorter = np.arange(len(nodes))[sorter]
+    return result[unsorter]
+
+
+def integrate(discr, soln):
+    """Return the integral of the solution.
+
+    Signature: (n*m,) -> ()
+    """
+    soln = soln.reshape((discr.nelements, discr.nnodes))
+    h = discr.elements[0][1] - discr.elements[0][0]
+    weights = discr.ref_weights * h / 2
+    return np.sum(soln * weights)
+
+
+def elementwise(mat, vec):
+    """Apply a matrix to rows of the input representing per-element
+    degrees of freedom.
+
+    Inputs:
+        mat: Shape (a, b)
+        vec: Shape (c, b)
+
+    Signature: (a, b), (c, b) -> (c, a)
+    """
+    return np.einsum("ij,kj->ki", mat, vec)
+
+
+class AbstractDGOps1D(object):
+
+    def __init__(self, discr):
+        self.discr = discr
+
+    def interp(self, vec):
+        """Apply elementwise volume-to-face interpolation.
+
+        Signature: (m, n) -> (m, 2)
+        """
+        raise NotImplementedError
+
+    def inv_mass(self, vec):
+        """Apply the elementwise inverse mass matrix.
+
+        Signature: (m, n) -> (m, n)
+        """
+        raise NotImplementedError
+
+    def stiffness(self, vec):
+        """Apply the elementwise stiffness matrix.
+
+        Signature: (m, n) -> (m, n)
+        """
+        raise NotImplementedError
+
+    def face_mass(self, vec):
+        """Apply the elementwise face mass matrix.
+
+        Signature: (m, 2) -> (m, n)
+        """
+        raise NotImplementedError
+
+    def face_swap(self, vec):
+        """Swap values at opposite faces.
+
+        Signature: (m, 2) -> (m, 2)
+        """
+        raise NotImplementedError
+
+
+def elementwise(mat, vec):
+    """Apply a matrix to rows of the input representing per-element
+    degrees of freedom.
+
+    Inputs:
+        mat: Shape (a, b)
+        vec: Shape (c, b)
+
+    Signature: (a, b), (c, b) -> (c, a)
+    """
+    return np.einsum("ij,kj->ki", mat, vec)
+
+
+class DGOps1DRef(AbstractDGOps1D):
+    """A reference NumPy implementation of the AbstractDGOps1D interface."""
+
+    def interp(self, vec):
+        return elementwise(self.discr.interp, vec)
+
+    def inv_mass(self, vec):
+        return elementwise(self.discr.inv_mass, vec)
+
+    def stiffness(self, vec):
+        return elementwise(self.discr.stiffness, vec)
+
+    def face_mass(self, vec):
+        return elementwise(self.discr.face_mass, vec)
+
+    def face_swap(self, vec):
+        result = np.zeros_like(vec)
+        result[:,0] = np.roll(vec[:,1], +1)
+        result[:,1] = np.roll(vec[:,0], -1)
+        return result
+
+
+class DGOps1D(AbstractDGOps1D):
+    pass
+
+
+class AdvectionOperator(object):
+    """A class representing a DG advection operator."""
+
+    def __init__(self, discr, c, flux_type):
+        """
+        Inputs:
+            discr: an instance of DGDiscr1D
+            c: advection speed parameter
+            flux_type: "upwind" or "central"
+        """
+        self.discr = discr
+        self.c = c
+        assert flux_type in ("upwind", "central")
+        self.flux_type = flux_type
+
+    def weak_flux(self, dg, vec):
+        """Apply the flux, weak form.
+
+        Inputs:
+            dg: a DGOps1D instance
+            vec: vector of nodal values at the faces
+
+        Signature: (m, 2) -> (m, 2)
+        """
+        if self.flux_type == "central":
+            flux = (vec + dg.face_swap(vec)) / 2
+
+        elif self.flux_type == "upwind":
+            swp = dg.face_swap(vec)
+            if self.c >= 0:
+                flux = np.stack((vec[:,0], swp[:,1]), axis=1)
+            else:
+                flux = np.stack((swp[:,0], vec[:,1]), axis=1)
+
+        flux *= self.c
+        flux *= self.discr.normals
+
+        return flux
+
+    def strong_flux(self, dg, vec):
+        """Apply the flux, strong form.
+
+        Inputs:
+            dg: a DGOps1D instance
+            vec: vector of nodal values at the faces
+
+        Signature: (m, 2) -> (m, 2)
+        """
+        return self.c * self.discr.normals * vec - self.weak_flux(dg, vec)
+
+    def apply(self, vec):
+        """Main operator implementation.
+
+        Signature: (m, n) -> (m, n)
+        """
+        with self.discr.dg_ops() as dg:
+            pt1 = dg.face_mass(self.strong_flux(dg, dg.interp(vec)))
+            pt2 = self.c * dg.stiffness(vec)
+            return -dg.inv_mass(pt1 - pt2)
+
+    def __call__(self, vec):
+        """Apply the DG advection operator to the vector of degrees of freedom.
+
+        Signature: (m*n,) -> (m*n,)
+        """
+        vec = vec.reshape((self.discr.nelements, self.discr.nnodes))
+        return self.apply(vec).reshape((-1,))
+
+
+def rk4(rhs, initial, t_initial, t_final, dt):
+    """RK4 integrator.
+
+    Inputs:
+        - rhs: a callable that takes arguments (t, y)
+        - initial: initial value
+        - t_initial: initial time
+        - t_final: final time
+        - dt: step size
+
+    Returns:
+        The solution computed at the final time.
+    """
+    t = t_initial
+    sol = initial
+
+    while t < t_final:
+        dt = min(dt, t_final - t)
+        s0 = rhs(t, sol)
+        s1 = rhs(t + dt/2, sol + dt/2 * s0)
+        s2 = rhs(t + dt/2, sol + dt/2 * s1)
+        s3 = rhs(t + dt, sol + dt * s2)
+        sol = sol + dt / 6 * (s0 + 2 * s1 + 2 * s2 + s3)
+        t += dt
+
+    return sol
+
+
+def test_rk4():
+    assert np.isclose(rk4(lambda t, y: -y, 1, 0, 5, 0.01), np.exp(-5))
+
+
+@pytest.mark.parametrize("order", (3, 4, 5))
+@pytest.mark.parametrize("flux_type", ("central", "upwind"))
+def test_advection_convergence(order, flux_type):
+    errors = []
+    hs = []
+
+    for nelements in (8, 12, 16, 20):
+        discr = DGDiscr1D(0, 2*np.pi, nelements=nelements, nnodes=order)
+        u_initial = np.sin(discr.nodes())
+        op = AdvectionOperator(discr, c=1, flux_type=flux_type)
+        u = rk4(lambda t, y: op(y), u_initial, t_initial=0, t_final=np.pi, dt=0.01)
+        u_ref = -u_initial
+        hs.append(discr.h)
+        errors.append(integrate(discr, (u - u_ref)**2)**0.5)
+
+    eoc, _ = np.polyfit(np.log(hs), np.log(errors), 1)
+    assert eoc >= order - 0.1, eoc
-- 
GitLab


From 3a543c9a24dc7caf1a80043fb28c5cae37dfd0c7 Mon Sep 17 00:00:00 2001
From: Matt Wala <wala1@illinois.edu>
Date: Sat, 25 Apr 2020 20:36:13 -0500
Subject: [PATCH 09/46] Work on refactoring

---
 arrayzy/__init__.py |   2 +-
 arrayzy/array.py    | 267 ++++++++++----------------------------------
 arrayzy/code.py     | 203 +++++++++++++++++++++++++++++++++
 test/test_array.py  |  39 ++++---
 4 files changed, 292 insertions(+), 219 deletions(-)
 create mode 100644 arrayzy/code.py

diff --git a/arrayzy/__init__.py b/arrayzy/__init__.py
index 9d6dae8..af107be 100644
--- a/arrayzy/__init__.py
+++ b/arrayzy/__init__.py
@@ -1 +1 @@
-from arrayzy.array import Array, Context, make_context, make_sym  # noqa
+from arrayzy.code import Code, CodeBuilder
diff --git a/arrayzy/array.py b/arrayzy/array.py
index 4dd2985..adea782 100644
--- a/arrayzy/array.py
+++ b/arrayzy/array.py
@@ -29,108 +29,12 @@ import pymbolic.primitives as prim
 from pytools import memoize_method
 
 
-class Context:
-    """
-    .. attribute:: program
-
-        A :mod:`loopy.LoopKernel` that defines names used in expressions
-        in :class:`Array` instances attached to this context. Names
-        defined once will never change their meaning, for the lifetime
-        of this object.
-
-    .. attribute:: bindings
-
-    .. attribute:: target
-
-        A :class:`Target` for code generation and execution.
-
-    .. note::
-
-        *program* may not define any names starting with underscores.
-    """
-
-    def __init__(self, queue, program, bindings, target):
-        self.queue = queue
-        self._program = program
-        self.bindings = bindings
-        self.target = target
-
-    # The 'program' attribute is only supposed to be modified via
-    # update_program.
-    @property
-    def program(self):
-        return self._program
-
-    def _infer_type(self, expr):
-        """Infer the type of an expression in the kernel being built."""
-        from loopy.type_inference import TypeInferenceMapper
-        mapper = TypeInferenceMapper(self._program)
-        return mapper(expr)
-
-    @property
-    def parameters(self):
-        pass
-
-    def update_program(self, program):
-        self._program = program
-
-    def get_parameter(self, name):
-        if name not in self.program.all_variable_names():
-            self.update_program(
-                    self.program.copy(
-                        args=self.program.args + [
-                            lp.ValueArg(name, dtype=self.program.index_dtype)
-                            ]))
-
-        """
-        if name not in self.program.all_params():
-            # FIXME: ... data dependent control flow?
-            raise ValueError(
-                    f"'{name}' is not a domain parameter "
-                    "in this context")
-        """
-
-        return prim.Variable(name)
-
-
-class Target:
-    pass
-
-
-class PyOpenCLTarget:
-    def __init__(self, queue):
-        self.queue = queue
-
-    def get_loopy_target(self):
-        import loopy as lp
-        return lp.PyOpenCLTarget(self.queue.device)
-
-
-def make_context(arg):
-    import sys
-
-    target = None
-
-    # avoid expensive/failing import
-    if "pyopencl" in sys.modules:
-        import pyopencl as cl
-        if isinstance(arg, cl.CommandQueue):
-            target = PyOpenCLTarget(arg)
-
-    if target is None:
-        raise ValueError(f"invalid argument type: {type(arg).__name__}")
-
-    import loopy as lp
-    program = lp.make_kernel("{:}", [], target=target.get_loopy_target())
-
-    return Context(arg, program, {}, target)
-
-
 class Array:
-    """
-    .. attribute:: context
+    """A multidimensional array.
+
+    .. attribute:: code
 
-        The :class:`Context` with which this array is associated, to
+        The :class:`Code` with which this array is associated, to
         define the meanings of names used in expressions in this object.
 
     .. attribute:: dtype
@@ -138,140 +42,93 @@ class Array:
         An instance of :class:`loopy.types.LoopyType` or *None* to indicate
         that the type of the array is not yet known.
 
-    .. attribute:: is_materialized
+    .. attribute:: shape
 
-        Whether this array is backed by actual storage.
+    .. attribute:: ndim
 
-    .. attribute:: expression
-    """
-
-    def __init__(self, context, shape, dtype, expression):
-        self.context = context
-        self.shape = shape
-        self.dtype = dtype
-        self.expression = expression
+    .. attribute:: T
 
-    def with_(self, **kwargs):
-        pass
+    .. automethod:: to_loopy_expression
 
-    def eval(self, **kwargs):
-        _, (out,) = self._knl()(self.context.queue, **kwargs)
-        return out
-
-    @property
-    def _dim_names(self):
-        return tuple(f"_{i}" for i in range(len(self.shape)))
+    .. automethod:: get_dependencies
+    """
 
-    def __getitem__(self, indices):
-        # TODO
-        pass
+    def __init__(self, code, shape, dtype):
+        self.code = code
+        self._dtype = dtype
+        self.shape = shape
 
     @property
-    def T(self):  # noqa
-        index_map = dict(
-                zip(self._dim_names,
-                    map(prim.Variable, reversed(self._dim_names))))
-
-        expression = pymbolic.substitute(self.expression, index_map)
-        shape = tuple(reversed(self.shape))
+    def ndim(self):
+        return len(self.shape)
 
-        return Array(self.context, shape, self.dtype, expression)
+    def _infer_type(self, expr):
+        from loopy.type_inference import TypeInferenceMapper
+        mapper = TypeInferenceMapper(self.code.program)
+        return mapper(expr)
 
     def __mul__(self, other):
         if not np.isscalar(other):
             raise TypeError("only scalar multiplication supported")
-        new_expr = other * self.expression
-        return Array(
-                self.context,
+
+        new_expr = other * self.to_loopy_expression(self.dims)
+        return ArrayExpression(
+                self.code,
                 self.shape,
-                self.context._infer_type(new_expr),
+                self._infer_type(new_expr),
                 new_expr)
 
     __rmul__ = __mul__
-
+    
     @property
-    def ndim(self):
-        return len(self.shape)
-
-    @memoize_method
-    def _knl(self):
-        knl = self.context.program
-
-        out = lp.GlobalArg("_out", shape=self.shape, dtype=self.dtype,
-                           order="C")
-
-        # FIXME: Won't work for scalars.
-        out_inames = [f"_out_{i}" for i in range(self.ndim)]
-
-        out_expr = pymbolic.substitute(
-                self.expression,
-                dict(zip(self._dim_names, map(prim.Variable, out_inames))))
+    def dtype(self):
+        # TODO: The dtype could get out of sync with the kernel if the kernel is
+        # updated. Should the dtype be updated as well?
+        return self._dtype
 
-        # Build output domain.
-        params = []
-        from loopy.symbolic import get_dependencies
-        for sdep in map(get_dependencies, self.shape):
-            for dep in sdep:
-                params.append(self.context.get_parameter(dep).name)
-
-        dom = isl.BasicSet.universe(
-                isl.Space.create_from_names(
-                    isl.DEFAULT_CONTEXT,
-                    set=out_inames,
-                    params=params))
-
-        from loopy.symbolic import aff_from_expr
-        affs = isl.affs_from_space(dom.space)
-        for iname, expr in zip(out_inames, self.shape):
-            dom &= affs[0].le_set(affs[iname])
-            dom &= affs[iname].lt_set(aff_from_expr(dom.space, expr))
-        dom, = dom.get_basic_sets()
-
-        # Build output instruction.
-        from loopy.kernel.instruction import make_assignment
-        out_insn = make_assignment(
-                (
-                    prim.Variable("_out")[
-                        tuple(map(prim.Variable, out_inames))],),
-                out_expr,
-                id="_out",
-                within_inames=frozenset(out_inames))
+    @property
+    def dims(self):
+        return tuple(map(prim.Variable, self.dim_names))
 
-        return knl.copy(
-                domains=knl.domains + [dom],
-                instructions=knl.instructions + [out_insn],
-                args=knl.args + [out])
+    @property
+    def dim_names(self):
+        return tuple(f"_{i}" for i in range(len(self.shape)))
 
-    def store(self, prefix="tmp"):
-        """Stores the array in a temporary."""
-        pass
+    @property
+    def T(self):
+        new_shape = tuple(reversed(self.shape))
+        new_expr = self.to_loopy_expression(list(reversed(self.dims)))
+        return ArrayExpression(self.code, new_shape, self.dtype, new_expr)
 
+    def to_loopy_expression(self, dims):
+        raise NotImplementedError
 
-def make_sym(context, name, shape, dtype=None, order="C"):
-    if name in context.program.all_variable_names():
-        raise ValueError(f"name '{name}' already in use in context")
 
-    arg = lp.GlobalArg(name, shape=shape, dtype=dtype, order=order)
+class ArrayExpression(Array):
+    """
+    .. attribute:: expression
+    """
 
-    shape = arg.shape
-    dtype = arg.dtype
+    def __init__(self, code, shape, dtype, expression):
+        super().__init__(code, shape, dtype)
+        self.expression = expression
 
-    from loopy.symbolic import get_dependencies
-    for sdep in map(get_dependencies, arg.shape):
-        for dep in sdep:
-            context.get_parameter(dep)
+    def to_loopy_expression(self, dims):
+        assignments = dict(zip(self.dim_names, tuple(dims)))
+        return pymbolic.substitute(self.expression, assignments)
 
-    context.update_program(
-            context.program.copy(
-                args=context.program.args + [arg]))
 
-    v_name = prim.Variable(name)
-    subscripts = tuple(prim.Variable(f"_{i}") for i in range(len(shape)))
+class ArrayVariable(Array):
+    """
+    .. attribute:: name
+    """
 
-    return Array(context, shape, dtype, expression=v_name[subscripts])
+    def __init__(self, code, shape, dtype, name):
+        super().__init__(code, shape, dtype)
+        self.name = name
 
+    def to_loopy_expression(self, dims):
+        return prim.Variable(self.name)[tuple(dims)]
 
-def zeros(context, shape, dtype):
-    pass
 
 # vim: foldmethod=marker
diff --git a/arrayzy/code.py b/arrayzy/code.py
new file mode 100644
index 0000000..2e22011
--- /dev/null
+++ b/arrayzy/code.py
@@ -0,0 +1,203 @@
+import loopy as lp
+import islpy as isl
+import pymbolic.primitives as prim
+
+from arrayzy.array import ArrayVariable, ArrayExpression
+
+
+dim_type = isl.dim_type
+
+
+
+class Target:
+    pass
+
+
+class PyOpenCLTarget:
+
+    def __init__(self, queue):
+        self.queue = queue
+
+    def get_loopy_target(self):
+        import loopy as lp
+        return lp.PyOpenCLTarget(self.queue.device)
+
+
+class Code:
+    """A representation of a program that carries out an array-valued
+    computation.
+
+    .. attribute:: program
+
+        A :mod:`loopy.LoopKernel` that defines names used in expressions
+        in :class:`Array` instances attached to this context. Names
+        defined once will never change their meaning, for the lifetime
+        of this object.
+
+    .. attribute:: bindings
+
+        A mapping from argument names to :class:`Array` values.
+
+    .. attribute:: target
+
+        A :class:`Target` for code generation and execution.
+
+    .. note::
+
+        *program* may not define any names starting with underscores.
+
+    """
+
+    def __init__(self, queue, program, bindings, target):
+        self.queue = queue
+        self._program = program
+        self.bindings = bindings
+        self.target = target
+
+    # The 'program' attribute is only supposed to be modified via
+    # update_program.
+    @property
+    def program(self):
+        return self._program
+
+    def update_program(self, program):
+        self._program = program
+
+    def __call__(self, *args, **kwargs):
+        return self.program(self.queue, *args, **kwargs)
+
+    def eval_one(self, *args, **kwargs):
+        _, (result,) = self(*args, **kwargs)
+        return result
+
+
+class CodeBuilder:
+
+    def __init__(self, arg):
+        import sys
+
+        target = None
+
+        # avoid expensive/failing import
+        if "pyopencl" in sys.modules:
+            import pyopencl as cl
+            if isinstance(arg, cl.CommandQueue):
+                target = PyOpenCLTarget(arg)
+
+        if target is None:
+            raise ValueError(f"invalid argument type: {type(arg).__name__}")
+
+        import loopy as lp
+        program = lp.make_kernel("{:}", [], target=target.get_loopy_target())
+        self.code = Code(arg, program, {}, target)
+        
+        self.var_name_gen = self.program.get_var_name_generator()
+        self.insn_id_gen = self.program.get_instruction_id_generator()
+
+    def __enter__(self):
+        return self
+
+    def __exit__(self, exc_type, exc_value, traceback):
+        pass
+
+    def build(self):
+        return self.code
+
+    @property
+    def program(self):
+        return self.code.program
+
+    @property
+    def update_program(self):
+        return self.code.update_program
+    
+    def _get_or_create_parameter(self, name):
+        if name in self.program.all_variable_names():
+            if name not in self.program.all_params():
+                # FIXME: May have been added, but not added to a domain.
+                #
+                # FIXME: ... data dependent control flow?
+                # raise ValueError(
+                #        f"'{name}' is not a domain parameter "
+                #        "in this context")
+                pass
+        else:
+            self.update_program(
+                    self.program.copy(
+                        args=self.program.args + [
+                            lp.ValueArg(name, dtype=self.program.index_dtype)
+                            ]))
+            
+        return prim.Variable(name)
+
+    def _get_dependencies(self, expr):
+        pass
+
+    def argument(self, name, shape, dtype, order="C"):
+        if name in self.program.all_variable_names():
+            raise ValueError(f"name '{name}' already in use in code")
+
+        arg = lp.GlobalArg(name, shape=shape, dtype=dtype, order=order)
+
+        shape = arg.shape
+        dtype = arg.dtype
+
+        # Insert parameters from shape description.
+        from loopy.symbolic import get_dependencies
+        for sdep in map(get_dependencies, arg.shape):
+            for dep in sdep:
+                self._get_or_create_parameter(dep)
+
+        # Add argument to program.
+        self.code.update_program(
+                self.program.copy(args=self.program.args + [arg]))
+
+        return ArrayVariable(self.code, shape, dtype, name)
+
+    def matmul(self, lhs, rhs):
+        pass
+
+    def output(self, expr):
+        knl = self.program
+        name = self.var_name_gen("_out")
+        out = lp.GlobalArg(name, shape=expr.shape, dtype=expr.dtype, order="C")
+        out_inames = [self.var_name_gen(f"{name}_dim{d}")
+                                        for d in range(expr.ndim)]
+        out_expr = expr.to_loopy_expression(map(prim.Variable, out_inames))
+
+        # Build output domain.
+        param_names = []
+        from loopy.symbolic import get_dependencies
+        for sdep in map(get_dependencies, expr.shape):
+            for dep in sdep:
+                param_names.append(self._get_or_create_parameter(dep).name)
+
+        dom = isl.BasicSet.universe(
+                isl.Space.create_from_names(
+                    isl.DEFAULT_CONTEXT,
+                    set=out_inames,
+                    params=param_names))
+
+        from loopy.symbolic import aff_from_expr
+        affs = isl.affs_from_space(dom.space)
+        for iname, dim in zip(out_inames, expr.shape):
+            dom &= affs[0].le_set(affs[iname])
+            dom &= affs[iname].lt_set(aff_from_expr(dom.space, dim))
+        dom, = dom.get_basic_sets()
+
+        # Build output instruction.
+        from loopy.kernel.instruction import make_assignment
+        out_insn = make_assignment(
+                (
+                    prim.Variable(name)[
+                        tuple(map(prim.Variable, out_inames))],),
+                out_expr,
+                id=self.insn_id_gen(),
+                within_inames=frozenset(out_inames))
+
+        # TODO: dependencies
+        
+        self.update_program(knl.copy(
+                domains=knl.domains + [dom],
+                instructions=knl.instructions + [out_insn],
+                args=knl.args + [out]))
diff --git a/test/test_array.py b/test/test_array.py
index 14090f4..f7c263a 100644
--- a/test/test_array.py
+++ b/test/test_array.py
@@ -40,33 +40,46 @@ from pyopencl.tools import (  # noqa
 def test_symbolic_array(ctx_getter):
     cl_ctx = ctx_getter()
     queue = cl.CommandQueue(cl_ctx)
-    ctx = az.make_context(queue)
-    x = az.make_sym(ctx, "x", shape="n", dtype=np.float64)
+    
+    with az.CodeBuilder(queue) as cb:
+        x = cb.argument("x", shape="n", dtype=np.float64)
+        cb.output(x)
+
     x_in = cl.array.to_device(queue, np.array([1., 2., 3., 4., 5.]))
-    assert (x.eval(x=x_in).get() == x_in.get()).all()
+    code = cb.build()
+    assert (code.eval_one(x=x_in).get() == x_in.get()).all()
 
 
 @pytest.mark.parametrize("dim", (1, 2, 3))
 def test_transpose(ctx_getter, dim):
     cl_ctx = ctx_getter()
     queue = cl.CommandQueue(cl_ctx)
-    ctx = az.make_context(queue)
-    shape = ("l", "m", "n")[-dim:]
-    x = az.make_sym(ctx, "x", shape=shape, dtype=np.float64)
+
+    with az.CodeBuilder(queue) as cb:
+        shape = ("l", "m", "n")[-dim:]
+        x = cb.argument("x", shape=shape, dtype=np.float64)
+        cb.output(x.T)
+
+
     x_in = cl.array.to_device(
-            queue,
-            np.arange(24, dtype=float).reshape((2, 3, -1)[-dim:]))
-    assert (x.T.eval(x=x_in).get() == x_in.get().T).all()
+        queue,
+        np.arange(24, dtype=float).reshape((2, 3, -1)[-dim:]))
+    code = cb.build()
+    assert (code.eval_one(x=x_in).get() == x_in.get().T).all()
 
 
 def test_scalar_multiply(ctx_getter):
     cl_ctx = ctx_getter()
     queue = cl.CommandQueue(cl_ctx)
-    ctx = az.make_context(queue)
-    x = az.make_sym(ctx, "x", shape="n", dtype=np.float64)
-    x_in = np.array([1., 2., 3., 4., 5.])
+    
+    with az.CodeBuilder(queue) as cb:
+        x = cb.argument("x", shape="n", dtype=np.float64)
+        cb.output(2 * x)
+
     x_in = cl.array.to_device(queue, np.array([1., 2., 3., 4., 5.]))
-    assert ((2*x).eval(x=x_in).get() == (2*x_in).get()).all()
+    code = cb.build()
+    print(code.program)
+    assert (code.eval_one(x=x_in).get() == (2*x_in).get()).all()
 
 
 if __name__ == "__main__":
-- 
GitLab


From 61b4dfc4cf9c430c0daa5a2f347758cfbf72a355 Mon Sep 17 00:00:00 2001
From: Matt Wala <wala1@illinois.edu>
Date: Sat, 25 Apr 2020 21:56:52 -0500
Subject: [PATCH 10/46] Run pytest

---
 experiments/advection.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/experiments/advection.py b/experiments/advection.py
index f0dc376..52d6eff 100644
--- a/experiments/advection.py
+++ b/experiments/advection.py
@@ -472,3 +472,8 @@ def test_advection_convergence(order, flux_type):
 
     eoc, _ = np.polyfit(np.log(hs), np.log(errors), 1)
     assert eoc >= order - 0.1, eoc
+
+
+if __name__ == "__main__":
+    from pytest import main
+    main([__file__])
-- 
GitLab


From 224d47553baf93477364a148a2caeb25d11c6c62 Mon Sep 17 00:00:00 2001
From: Matt Wala <wala1@illinois.edu>
Date: Mon, 27 Apr 2020 21:32:32 -0500
Subject: [PATCH 11/46] WIP

---
 arrayzy/array.py         | 148 ++++++++++++++++----
 arrayzy/code.py          | 296 ++++++++++++++++++++++++++++++---------
 arrayzy/utils.py         |  66 +++++++++
 experiments/advection.py | 136 +++++++++++++++---
 test/test_array.py       |  94 -------------
 test/test_linalg.py      | 218 ++++++++++++++++++++++++++++
 6 files changed, 755 insertions(+), 203 deletions(-)
 create mode 100644 arrayzy/utils.py
 delete mode 100644 test/test_array.py
 create mode 100644 test/test_linalg.py

diff --git a/arrayzy/array.py b/arrayzy/array.py
index adea782..3158061 100644
--- a/arrayzy/array.py
+++ b/arrayzy/array.py
@@ -20,6 +20,8 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 THE SOFTWARE.
 """
 
+import operator
+
 import islpy as isl
 import loopy as lp
 import numpy as np
@@ -28,13 +30,16 @@ import pymbolic
 import pymbolic.primitives as prim
 from pytools import memoize_method
 
+from functools import partialmethod
+from arrayzy.utils import domain_for_shape
+
 
 class Array:
-    """A multidimensional array.
+    """A representation of an array value in a computation.
 
-    .. attribute:: code
+    .. attribute:: ctx
 
-        The :class:`Code` with which this array is associated, to
+        The :class:`Context` with which this array is associated, to
         define the meanings of names used in expressions in this object.
 
     .. attribute:: dtype
@@ -44,17 +49,35 @@ class Array:
 
     .. attribute:: shape
 
+    .. attribute:: name
+
+        A name for this computation, or *None*.
+
     .. attribute:: ndim
 
-    .. attribute:: T
+        The number of dimensions in :attr:`shape`.
+
+    .. automethod:: T
 
     .. automethod:: to_loopy_expression
 
-    .. automethod:: get_dependencies
+    .. automethod:: get_domain
+
+    .. automethod:: __mul__
+    .. automethod:: __rmul__
+    .. automethod:: __add__
+    .. automethod:: __radd__
+    .. automethod:: __sub__
+    .. automethod:: __rsub__
+    .. automethod:: __truediv__
+    .. automethod:: __rtruediv__
+    .. automethod:: __neg__
+
+    .. automethod:: __getitem__
     """
 
-    def __init__(self, code, shape, dtype):
-        self.code = code
+    def __init__(self, ctx, shape, dtype):
+        self.ctx = ctx
         self._dtype = dtype
         self.shape = shape
 
@@ -64,22 +87,90 @@ class Array:
 
     def _infer_type(self, expr):
         from loopy.type_inference import TypeInferenceMapper
-        mapper = TypeInferenceMapper(self.code.program)
+        mapper = TypeInferenceMapper(self.ctx.program)
         return mapper(expr)
 
-    def __mul__(self, other):
-        if not np.isscalar(other):
-            raise TypeError("only scalar multiplication supported")
-
-        new_expr = other * self.to_loopy_expression(self.dims)
+    def _unary_op(self, op):
+        new_expr = op(self.to_loopy_expression(self.dims))
         return ArrayExpression(
-                self.code,
+                self.ctx,
                 self.shape,
-                self._infer_type(new_expr),
+                self.dtype,
                 new_expr)
 
-    __rmul__ = __mul__
+    def _binary_op(self, op, other, reverse=False):
+        if np.isscalar(other):
+            args = (self.to_loopy_expression(self.dims), other)
+            if reverse:
+                args = tuple(reversed(args))
+                
+            new_expr = op(*args)
+            return ArrayExpression(
+                    self.ctx,
+                    self.shape,
+                    self._infer_type(new_expr),
+                    new_expr)
+
+        elif isinstance(other, Array):
+            args = (
+                    self.to_loopy_expression(self.dims),
+                    other.to_loopy_expression(self.dims))
+            if reverse:
+                args = tuple(reversed(args))
+
+            new_expr = op(*args)
+            return ArrayExpression(
+                    self.ctx,
+                    self.shape,
+                    self._infer_type(new_expr),
+                    new_expr)
+            
+        else:
+            raise ValueError
+
+    __mul__ = partialmethod(_binary_op, operator.mul)
+    __rmul__ = partialmethod(__mul__, reverse=True)
+
+    __add__ = partialmethod(_binary_op, operator.add)
+    __radd__ = partialmethod(__add__, reverse=True)
     
+    __sub__ = partialmethod(_binary_op, operator.sub)
+    __rsub__ = partialmethod(__sub__, reverse=True)
+
+    __truediv__ = partialmethod(_binary_op, operator.truediv)
+    __rtruediv__ = partialmethod(__truediv__, reverse=True)
+
+    __neg__ = partialmethod(_unary_op, operator.neg)
+
+    def __getitem__(self, slice_spec):
+        from numbers import Integral
+        
+        if len(slice_spec) != self.ndim:
+            raise ValueError("incorrect slice shape")
+
+        dims = []
+        shape = []
+        
+        for i, elem in enumerate(slice_spec):
+            if elem == slice(None, None, None):
+                d = len(shape)
+                dims.append(prim.Variable(f"_{d}"))
+                shape.append(self.shape[i])
+            elif isinstance(elem, Integral):
+                # TODO: should assume that this is in-bounds
+                dims.append(elem)
+            else:
+                raise ValueError("not implemented")
+            # TODO: Not clear how to implement slices with upper bounds that may
+            # exceed the symbolic maximum along the dimension.  What is the
+            # shape of the resulting array?
+
+        return ArrayExpression(
+                self.ctx,
+                shape=tuple(shape),
+                dtype=self.dtype,
+                expression=self.to_loopy_expression(dims))
+
     @property
     def dtype(self):
         # TODO: The dtype could get out of sync with the kernel if the kernel is
@@ -94,37 +185,46 @@ class Array:
     def dim_names(self):
         return tuple(f"_{i}" for i in range(len(self.shape)))
 
+    def get_domain(self, dim_names=None):
+        """Return the domain that defines the space of indices for this array.
+        """
+        if dim_names is None:
+            dim_names = self.dim_names
+        return domain_for_shape(dim_names, self.shape)
+
     @property
     def T(self):
         new_shape = tuple(reversed(self.shape))
         new_expr = self.to_loopy_expression(list(reversed(self.dims)))
-        return ArrayExpression(self.code, new_shape, self.dtype, new_expr)
+        return ArrayExpression(self.ctx, new_shape, self.dtype, new_expr)
 
     def to_loopy_expression(self, dims):
         raise NotImplementedError
 
 
 class ArrayExpression(Array):
-    """
+    """An array-valued expression.
+
     .. attribute:: expression
     """
 
-    def __init__(self, code, shape, dtype, expression):
-        super().__init__(code, shape, dtype)
+    def __init__(self, ctx, shape, dtype, expression):
+        super().__init__(ctx, shape, dtype)
         self.expression = expression
-
+        
     def to_loopy_expression(self, dims):
         assignments = dict(zip(self.dim_names, tuple(dims)))
         return pymbolic.substitute(self.expression, assignments)
 
 
 class ArrayVariable(Array):
-    """
+    """An array-valued variable.
+
     .. attribute:: name
     """
 
-    def __init__(self, code, shape, dtype, name):
-        super().__init__(code, shape, dtype)
+    def __init__(self, ctx, shape, dtype, name):
+        super().__init__(ctx, shape, dtype)
         self.name = name
 
     def to_loopy_expression(self, dims):
diff --git a/arrayzy/code.py b/arrayzy/code.py
index 2e22011..b7cb9d8 100644
--- a/arrayzy/code.py
+++ b/arrayzy/code.py
@@ -1,12 +1,11 @@
 import loopy as lp
+import loopy.symbolic as sym
+
 import islpy as isl
 import pymbolic.primitives as prim
 
 from arrayzy.array import ArrayVariable, ArrayExpression
-
-
-dim_type = isl.dim_type
-
+from arrayzy.utils import domain_for_shape
 
 
 class Target:
@@ -24,19 +23,19 @@ class PyOpenCLTarget:
 
 
 class Code:
-    """A representation of a program that carries out an array-valued
-    computation.
+    """A representation of a program carrying out an array-valued computation.
 
     .. attribute:: program
 
-        A :mod:`loopy.LoopKernel` that defines names used in expressions
-        in :class:`Array` instances attached to this context. Names
-        defined once will never change their meaning, for the lifetime
-        of this object.
+        A :mod:`loopy.LoopKernel` that defines names used in expressions in
+        :class:`Array` instances attached to this code. Names defined once will
+        never change their meaning, for the lifetime of this object.
 
     .. attribute:: bindings
 
-        A mapping from argument names to :class:`Array` values.
+        A mapping from argument names to bound values.
+
+    .. attribute:: bound_arguments
 
     .. attribute:: target
 
@@ -45,7 +44,6 @@ class Code:
     .. note::
 
         *program* may not define any names starting with underscores.
-
     """
 
     def __init__(self, queue, program, bindings, target):
@@ -53,6 +51,7 @@ class Code:
         self._program = program
         self.bindings = bindings
         self.target = target
+        self.bound_arguments = {}
 
     # The 'program' attribute is only supposed to be modified via
     # update_program.
@@ -63,15 +62,21 @@ class Code:
     def update_program(self, program):
         self._program = program
 
-    def __call__(self, *args, **kwargs):
-        return self.program(self.queue, *args, **kwargs)
-
     def eval_one(self, *args, **kwargs):
         _, (result,) = self(*args, **kwargs)
         return result
 
+    def __call__(self, *args, **kwargs):
+        kwargs.update(self.bound_arguments)
+        return self.program(self.queue, *args, **kwargs)
+
 
 class CodeBuilder:
+    """An interface for building up array computations.
+
+    .. automethod:: __enter__
+    .. automethod:: __exit__
+    """
 
     def __init__(self, arg):
         import sys
@@ -88,11 +93,14 @@ class CodeBuilder:
             raise ValueError(f"invalid argument type: {type(arg).__name__}")
 
         import loopy as lp
-        program = lp.make_kernel("{:}", [], target=target.get_loopy_target())
+        program = lp.make_kernel(
+                "{:}", [], target=target.get_loopy_target(),
+                lang_version=lp.MOST_RECENT_LANGUAGE_VERSION)
         self.code = Code(arg, program, {}, target)
-        
+
         self.var_name_gen = self.program.get_var_name_generator()
         self.insn_id_gen = self.program.get_instruction_id_generator()
+        self._last_insn_id = None
 
     def __enter__(self):
         return self
@@ -101,6 +109,8 @@ class CodeBuilder:
         pass
 
     def build(self):
+        """Return the generated :class:`Code`.
+        """
         return self.code
 
     @property
@@ -110,30 +120,85 @@ class CodeBuilder:
     @property
     def update_program(self):
         return self.code.update_program
-    
+
+    # {{{
+
+    def _unify_types(self, *args):
+        if not all(args[0] == arg for arg in args[1:]):
+            raise NotImplementedError
+        return args[0]
+
     def _get_or_create_parameter(self, name):
+        # Create a parameter variable for *name* if not present.
+        # Return the parameter as a pymbolic variable.
+
         if name in self.program.all_variable_names():
             if name not in self.program.all_params():
                 # FIXME: May have been added, but not added to a domain.
-                #
-                # FIXME: ... data dependent control flow?
-                # raise ValueError(
-                #        f"'{name}' is not a domain parameter "
-                #        "in this context")
                 pass
         else:
             self.update_program(
                     self.program.copy(
                         args=self.program.args + [
-                            lp.ValueArg(name, dtype=self.program.index_dtype)
+                                lp.ValueArg(name, dtype=self.program.index_dtype)
                             ]))
-            
+
         return prim.Variable(name)
 
     def _get_dependencies(self, expr):
-        pass
+        # FIXME: this needs to be implemented properly.
+        if not self._last_insn_id:
+            return frozenset([])
+        return frozenset([self._last_insn_id])
+
+    def _assume_equal(self, lhs, rhs):
+        """Adds the assumption "lhs = rhs"."""
+        param_names = set()
+        for sdep in map(sym.get_dependencies, (lhs, rhs)):
+            param_names |= sdep
+
+        # Build domain.
+        dom = isl.BasicSet.universe(
+                isl.Space.create_from_names(
+                    isl.DEFAULT_CONTEXT,
+                    set=(),
+                    params=param_names))
+
+        aff_lhs = sym.aff_from_expr(dom.space, lhs)
+        aff_rhs = sym.aff_from_expr(dom.space, rhs)
+
+        dom &= aff_lhs.eq_set(aff_rhs)
+
+        # TODO: Throw an error if the assumption leads to a contradiction.
+        dom, = dom.get_basic_sets()
+
+        self.update_program(lp.assume(self.program, dom))
+
+    def _make_array_assignment(self, name, dim_names, expr, within_inames):
+        from loopy.kernel.instruction import make_assignment
+        insn_id = self.insn_id_gen(f"_store{name}")
+        out_insn = make_assignment(
+                (
+                    prim.Variable(name)[
+                        tuple(map(prim.Variable, dim_names))],),
+                expr,
+                id=insn_id,
+                within_inames=frozenset(dim_names),
+                depends_on=self._get_dependencies(expr))
+
+        self.update_program(self.program.copy(
+                instructions=self.program.instructions + [out_insn]))
+        self._last_insn_id = insn_id
+
+    # }}}
+
+    # {{{ user interface
 
     def argument(self, name, shape, dtype, order="C"):
+        """Append an argument to the program.
+
+        :returns: a :class:`ArrayVariable`
+        """
         if name in self.program.all_variable_names():
             raise ValueError(f"name '{name}' already in use in code")
 
@@ -143,8 +208,7 @@ class CodeBuilder:
         dtype = arg.dtype
 
         # Insert parameters from shape description.
-        from loopy.symbolic import get_dependencies
-        for sdep in map(get_dependencies, arg.shape):
+        for sdep in map(sym.get_dependencies, arg.shape):
             for dep in sdep:
                 self._get_or_create_parameter(dep)
 
@@ -154,50 +218,152 @@ class CodeBuilder:
 
         return ArrayVariable(self.code, shape, dtype, name)
 
-    def matmul(self, lhs, rhs):
-        pass
+    def bind_argument(self, arg, val):
+        self.code.bound_arguments[arg.name] = val
+
+    def roll(self, a, shift, name=None):
+        """Roll elements along the given axis.
+
+        :returns: a :class:`Array`
+        """
+        assert a.ndim == 1
+        idx = (prim.Variable("_0") - shift) % a.shape[0]
+        # It's not immediately clear what loopy-generated code guarantees about
+        # the sign of the result of the % operator, so if the result is negative
+        # we shift it to be positive.
+        idx = idx + prim.If(prim.Comparison(idx, "<", 0), a.shape[0], 0)
+        expr = a.to_loopy_expression((idx,))
+        return ArrayExpression(self.code, a.shape, a.dtype, expr)
+
+    def stack(self, arrays, axis=0, name=None):
+        """Join a sequence of arrays along a new axis.
+
+        :arg axis: the index of the axis
+
+        :returns: a :class:`Array`
+        """
+        for array in arrays[1:]:
+            if array.ndim != arrays[0].ndim:
+                raise ValueError("arrays must have same dimension")
+            for array_dim, array0_dim in zip(array.shape, arrays[0].shape):
+                self._assume_equal(array_dim, array0_dim)
+
+        if name is None:
+            name = "_stack"
+
+        name = self.var_name_gen(name)
+
+        out_inames = []
+        for i in range(arrays[0].ndim):
+            if i >= axis:
+                i += 1
+            out_inames.append(f"{name}_dim{i}")
+
+        domain = domain_for_shape(out_inames, arrays[0].shape)
+
+        out_shape = list(arrays[0].shape)
+        out_shape.insert(axis, len(arrays))
+        out_shape = tuple(out_shape)
+
+        out_dtype = self._unify_types(*(a.dtype for a in arrays))
+
+        out = lp.TemporaryVariable(
+                name, dtype=out_dtype,
+                shape=out_shape,
+                address_space=lp.AddressSpace.GLOBAL)
+
+        # Create an output instruction for each input array.
+        from loopy.kernel.instruction import make_assignment
+        out_insns = []
+        for i in range(len(arrays)):
+            indices = list(map(prim.Variable, out_inames))
+            expr = arrays[i].to_loopy_expression(indices)
+            indices.insert(axis, i)
+            indices = tuple(indices)
+            insn_id = self.insn_id_gen(f"{name}_copy{i}")
+            out_insn = make_assignment(
+                    (prim.Variable(name)[indices],),
+                    expr,
+                    id=insn_id,
+                    within_inames=frozenset(out_inames),
+                    depends_on=self._get_dependencies(expr))
+            self._last_insn_id = insn_id
+            out_insns.append(out_insn)
+
+        new_tv = self.program.temporary_variables.copy()
+        new_tv[name] = out
+
+        self.update_program(self.program.copy(
+                temporary_variables=new_tv,
+                domains=self.program.domains + [domain],
+                instructions=self.program.instructions + out_insns))
+
+        return ArrayVariable(self.code, out_shape, out_dtype, name)
+
+    def matmul(self, a, b, name=None):
+        # Generate a temporay for the matrix-matrix multiplication.
+        self._assume_equal(a.shape[1], b.shape[0])
+        if name is None:
+            name = "_matmul"
+        name = self.var_name_gen(name)
+
+        out_shape = (a.shape[0], b.shape[1])
+        out_inames = (f"{name}_dim0", f"{name}_dim1")
+        out_red_iname = self.var_name_gen(f"{name}_reduce")
+        out_dtype = self._unify_types(a.dtype, b.dtype)
+
+        out = lp.TemporaryVariable(
+                name, dtype=out_dtype,
+                shape=(a.shape[0], b.shape[1]),
+                address_space=lp.AddressSpace.GLOBAL)
+
+        import loopy.library.reduction as red
+
+        a_inames = tuple(map(prim.Variable, (out_inames[0], out_red_iname)))
+        b_inames = tuple(map(prim.Variable, (out_red_iname, out_inames[1])))
+
+        expr = a.to_loopy_expression(a_inames) * b.to_loopy_expression(b_inames)
+
+        out_expr = sym.Reduction(
+                operation=red.parse_reduction_op("sum"),
+                inames=(out_red_iname,),
+                expr=expr,
+                allow_simultaneous=False)
+
+        domain = domain_for_shape(
+                out_inames + (out_red_iname,),
+                out_shape + (a.shape[1],))
+
+        new_tv = self.program.temporary_variables.copy()
+        new_tv[name] = out
+
+        self.update_program(self.program.copy(
+                temporary_variables=new_tv,
+                domains=self.program.domains + [domain]))
+
+        self._make_array_assignment(
+                name, dim_names=out_inames, expr=out_expr,
+                within_inames=frozenset(out_inames))
+
+        return ArrayVariable(self.code, out_shape, out_dtype, name)
 
     def output(self, expr):
-        knl = self.program
         name = self.var_name_gen("_out")
         out = lp.GlobalArg(name, shape=expr.shape, dtype=expr.dtype, order="C")
-        out_inames = [self.var_name_gen(f"{name}_dim{d}")
-                                        for d in range(expr.ndim)]
+        out_inames = [
+                self.var_name_gen(f"{name}_dim{d}") for d in range(expr.ndim)]
         out_expr = expr.to_loopy_expression(map(prim.Variable, out_inames))
 
-        # Build output domain.
-        param_names = []
-        from loopy.symbolic import get_dependencies
-        for sdep in map(get_dependencies, expr.shape):
-            for dep in sdep:
-                param_names.append(self._get_or_create_parameter(dep).name)
+        domain = expr.get_domain(out_inames)
 
-        dom = isl.BasicSet.universe(
-                isl.Space.create_from_names(
-                    isl.DEFAULT_CONTEXT,
-                    set=out_inames,
-                    params=param_names))
-
-        from loopy.symbolic import aff_from_expr
-        affs = isl.affs_from_space(dom.space)
-        for iname, dim in zip(out_inames, expr.shape):
-            dom &= affs[0].le_set(affs[iname])
-            dom &= affs[iname].lt_set(aff_from_expr(dom.space, dim))
-        dom, = dom.get_basic_sets()
+        self.update_program(self.program.copy(
+                args=self.program.args + [out],
+                domains=self.program.domains + [domain]))
 
-        # Build output instruction.
-        from loopy.kernel.instruction import make_assignment
-        out_insn = make_assignment(
-                (
-                    prim.Variable(name)[
-                        tuple(map(prim.Variable, out_inames))],),
-                out_expr,
-                id=self.insn_id_gen(),
+        self._make_array_assignment(
+                name, dim_names=out_inames, expr=out_expr,
                 within_inames=frozenset(out_inames))
 
-        # TODO: dependencies
-        
-        self.update_program(knl.copy(
-                domains=knl.domains + [dom],
-                instructions=knl.instructions + [out_insn],
-                args=knl.args + [out]))
+    # }}}
+
+Context = CodeBuilder
diff --git a/arrayzy/utils.py b/arrayzy/utils.py
new file mode 100644
index 0000000..54e3436
--- /dev/null
+++ b/arrayzy/utils.py
@@ -0,0 +1,66 @@
+__copyright__ = "Copyright (C) 2020 Matt Wala"
+
+__license__ = """
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+"""
+
+
+import islpy as isl
+
+
+def domain_for_shape(dim_names, shape):
+    """Return a :class:`isl.BasicSet` that expresses an appropriate index domain
+    for an array of (potentially symbolic) shape *shape*.
+
+    :arg dim_names: A tuple of strings, the names of the axes. These become set
+        dimensions in the returned domain.
+
+    :arg shape: A tuple of constant or quasi-affine :mod:`pymbolic`
+        expressions. The variables in these expressions become parameter
+        dimensions in the returned set.  Must have the same length as
+        *dim_names*.
+
+    :returns: a :class:`isl.BasicSet`
+
+    """
+
+    # Collect parameters.
+    param_names = set()
+    from loopy.symbolic import get_dependencies
+    for sdep in map(get_dependencies, shape):
+        param_names |= sdep
+
+    param_names = sorted(param_names)
+
+    # Build domain.
+    dom = isl.BasicSet.universe(
+            isl.Space.create_from_names(
+                isl.DEFAULT_CONTEXT,
+                set=dim_names,
+                params=param_names))
+
+    # Add constraints.
+    from loopy.symbolic import aff_from_expr
+    affs = isl.affs_from_space(dom.space)
+    for iname, dim in zip(dim_names, shape):
+        dom &= affs[0].le_set(affs[iname])
+        dom &= affs[iname].lt_set(aff_from_expr(dom.space, dim))
+    dom, = dom.get_basic_sets()
+
+    return dom
diff --git a/experiments/advection.py b/experiments/advection.py
index 52d6eff..56fe1c6 100644
--- a/experiments/advection.py
+++ b/experiments/advection.py
@@ -146,7 +146,7 @@ class DGDiscr1D(object):
 
         Signature: ->(n, 2)
         """
-        return self.interp.T
+        return self.interp.T.copy()
 
     @property
     @memoized
@@ -279,6 +279,18 @@ class AbstractDGOps1D(object):
     def __init__(self, discr):
         self.discr = discr
 
+    @property
+    def array_ops(self):
+        raise NotImplementedError
+
+    @property
+    def normals(self):
+        """Return the vector of normals at the faces.
+
+        Signature: ->(m, 2)
+        """
+        raise NotImplementedError
+
     def interp(self, vec):
         """Apply elementwise volume-to-face interpolation.
 
@@ -331,6 +343,14 @@ def elementwise(mat, vec):
 class DGOps1DRef(AbstractDGOps1D):
     """A reference NumPy implementation of the AbstractDGOps1D interface."""
 
+    @AbstractDGOps1D.array_ops.getter
+    def array_ops(self):
+        return np
+
+    @AbstractDGOps1D.normals.getter
+    def normals(self):
+        return self.discr.normals
+
     def interp(self, vec):
         return elementwise(self.discr.interp, vec)
 
@@ -350,26 +370,75 @@ class DGOps1DRef(AbstractDGOps1D):
         return result
 
 
+def matrix_getter(name, shape):
+    
+    def getter(self):
+        mat = self.cb.argument(name, shape, np.float64)
+        self.cb.bind_argument(mat, getattr(self.discr, name))
+        return mat
+
+    return property(memoized(getter))
+
+
 class DGOps1D(AbstractDGOps1D):
-    pass
+
+    @AbstractDGOps1D.array_ops.setter
+    def array_ops(self):
+        return self.cb
+
+    def __init__(self, discr, cb):
+        self.discr = discr
+        self.cb = cb
+
+    _normals = matrix_getter("normals", "(nelements, 2)")
+    _interp_mat = matrix_getter("interp", "(2, nnodes)")
+    _inv_mass_mat = matrix_getter("inv_mass", "(nnodes, nnodes)")
+    _stiffness_mat = matrix_getter("stiffness", "(nnodes, nnodes)")
+    _face_mass_mat = matrix_getter("face_mass", "(nnodes, 2)")
+
+    @AbstractDGOps1D.normals.getter
+    def normals(self):
+        return self._normals
+
+    def interp(self, vec):
+        return self.cb.matmul(self._interp_mat, vec.T, name="_interp").T
+
+    def inv_mass(self, vec):
+        return self.cb.matmul(self._inv_mass_mat, vec.T, name="_inv_mass").T
+
+    def stiffness(self, vec):
+        return self.cb.matmul(self._stiffness_mat, vec.T, name="_stiffness").T
+
+    def face_mass(self, vec):
+        return self.cb.matmul(self._face_mass_mat, vec.T, name="_face_mass").T
+
+    def face_swap(self, vec):
+        return self.cb.stack(
+                (
+                    self.cb.roll(vec[:,1], +1),
+                    self.cb.roll(vec[:,0], -1)),
+                axis=1,
+                name="face_swap")
 
 
 class AdvectionOperator(object):
     """A class representing a DG advection operator."""
 
-    def __init__(self, discr, c, flux_type):
+    def __init__(self, discr, c, flux_type, dg_ops):
         """
         Inputs:
             discr: an instance of DGDiscr1D
             c: advection speed parameter
             flux_type: "upwind" or "central"
+            dg_ops: An instance of AbstractDGOps1D
         """
         self.discr = discr
         self.c = c
         assert flux_type in ("upwind", "central")
         self.flux_type = flux_type
+        self.dg = dg_ops
 
-    def weak_flux(self, dg, vec):
+    def weak_flux(self, vec):
         """Apply the flux, weak form.
 
         Inputs:
@@ -379,21 +448,20 @@ class AdvectionOperator(object):
         Signature: (m, 2) -> (m, 2)
         """
         if self.flux_type == "central":
-            flux = (vec + dg.face_swap(vec)) / 2
+            flux = (vec + self.dg.face_swap(vec)) / 2
 
         elif self.flux_type == "upwind":
-            swp = dg.face_swap(vec)
+            swp = self.dg.face_swap(vec)
             if self.c >= 0:
-                flux = np.stack((vec[:,0], swp[:,1]), axis=1)
+                flux = self.dg.array_ops.stack((vec[:,0], swp[:,1]), axis=1)
             else:
-                flux = np.stack((swp[:,0], vec[:,1]), axis=1)
+                flux = self.dg.array_ops.stack((swp[:,0], vec[:,1]), axis=1)
 
-        flux *= self.c
-        flux *= self.discr.normals
+        flux = flux * self.c * self.dg.normals
 
         return flux
 
-    def strong_flux(self, dg, vec):
+    def strong_flux(self, vec):
         """Apply the flux, strong form.
 
         Inputs:
@@ -402,17 +470,18 @@ class AdvectionOperator(object):
 
         Signature: (m, 2) -> (m, 2)
         """
-        return self.c * self.discr.normals * vec - self.weak_flux(dg, vec)
+        return self.c * self.dg.normals * vec - self.weak_flux(vec)
 
     def apply(self, vec):
         """Main operator implementation.
 
         Signature: (m, n) -> (m, n)
         """
-        with self.discr.dg_ops() as dg:
-            pt1 = dg.face_mass(self.strong_flux(dg, dg.interp(vec)))
-            pt2 = self.c * dg.stiffness(vec)
-            return -dg.inv_mass(pt1 - pt2)
+        dg = self.dg
+        pt1 = dg.face_mass(self.strong_flux(dg.interp(vec)))
+        print("pt1 shape", pt1.shape)
+        pt2 = self.c * dg.stiffness(vec)
+        return -dg.inv_mass(pt1 - pt2)
 
     def __call__(self, vec):
         """Apply the DG advection operator to the vector of degrees of freedom.
@@ -457,14 +526,15 @@ def test_rk4():
 
 @pytest.mark.parametrize("order", (3, 4, 5))
 @pytest.mark.parametrize("flux_type", ("central", "upwind"))
-def test_advection_convergence(order, flux_type):
+def test_ref_advection_convergence(order, flux_type):
     errors = []
     hs = []
 
     for nelements in (8, 12, 16, 20):
         discr = DGDiscr1D(0, 2*np.pi, nelements=nelements, nnodes=order)
         u_initial = np.sin(discr.nodes())
-        op = AdvectionOperator(discr, c=1, flux_type=flux_type)
+        op = AdvectionOperator(
+                discr, c=1, flux_type=flux_type, dg_ops=DGOps1DRef(discr))
         u = rk4(lambda t, y: op(y), u_initial, t_initial=0, t_final=np.pi, dt=0.01)
         u_ref = -u_initial
         hs.append(discr.h)
@@ -474,6 +544,32 @@ def test_advection_convergence(order, flux_type):
     assert eoc >= order - 0.1, eoc
 
 
+def main():
+    import arrayzy as az
+    import pyopencl as cl
+    cl_ctx = cl.create_some_context()
+    queue = cl.CommandQueue(cl_ctx)
+
+    nelements = 20
+    nnodes = 3
+
+    discr = DGDiscr1D(0, 2*np.pi, nelements=nelements, nnodes=nnodes)
+    
+    with az.CodeBuilder(queue) as cb:
+        u = cb.argument("u", shape="(nelements, nnodes)", dtype=np.float64)
+        op = AdvectionOperator(
+                discr, c=1, flux_type="central", dg_ops=DGOps1D(discr, cb))
+        cb.output(op.apply(u))
+
+    code = cb.build()
+    print(code.program)
+    #import loopy as lp
+    #code.update_program(lp.fix_parameters(code.program, nnodes=nnodes, nelements=nelements))
+    u = np.sin(discr.nodes())
+    print(code.eval_one(u=u.reshape(nelements, nnodes)))
+
+
 if __name__ == "__main__":
-    from pytest import main
-    main([__file__])
+    #from pytest import main
+    #main([__file__])
+    main()
diff --git a/test/test_array.py b/test/test_array.py
deleted file mode 100644
index f7c263a..0000000
--- a/test/test_array.py
+++ /dev/null
@@ -1,94 +0,0 @@
-#!/usr/bin/env python
-
-__copyright__ = "Copyright (C) 2020 Matt Wala"
-
-__license__ = """
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in
-all copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-THE SOFTWARE.
-"""
-
-import arrayzy as az
-
-import numpy as np
-import sys
-
-import pytest  # noqa
-
-import pyopencl as cl
-import pyopencl.array as cl_array  # noqa
-import pyopencl.cltypes as cltypes  # noqa
-import pyopencl.tools as cl_tools  # noqa
-from pyopencl.tools import (  # noqa
-        pytest_generate_tests_for_pyopencl as pytest_generate_tests)
-
-
-def test_symbolic_array(ctx_getter):
-    cl_ctx = ctx_getter()
-    queue = cl.CommandQueue(cl_ctx)
-    
-    with az.CodeBuilder(queue) as cb:
-        x = cb.argument("x", shape="n", dtype=np.float64)
-        cb.output(x)
-
-    x_in = cl.array.to_device(queue, np.array([1., 2., 3., 4., 5.]))
-    code = cb.build()
-    assert (code.eval_one(x=x_in).get() == x_in.get()).all()
-
-
-@pytest.mark.parametrize("dim", (1, 2, 3))
-def test_transpose(ctx_getter, dim):
-    cl_ctx = ctx_getter()
-    queue = cl.CommandQueue(cl_ctx)
-
-    with az.CodeBuilder(queue) as cb:
-        shape = ("l", "m", "n")[-dim:]
-        x = cb.argument("x", shape=shape, dtype=np.float64)
-        cb.output(x.T)
-
-
-    x_in = cl.array.to_device(
-        queue,
-        np.arange(24, dtype=float).reshape((2, 3, -1)[-dim:]))
-    code = cb.build()
-    assert (code.eval_one(x=x_in).get() == x_in.get().T).all()
-
-
-def test_scalar_multiply(ctx_getter):
-    cl_ctx = ctx_getter()
-    queue = cl.CommandQueue(cl_ctx)
-    
-    with az.CodeBuilder(queue) as cb:
-        x = cb.argument("x", shape="n", dtype=np.float64)
-        cb.output(2 * x)
-
-    x_in = cl.array.to_device(queue, np.array([1., 2., 3., 4., 5.]))
-    code = cb.build()
-    print(code.program)
-    assert (code.eval_one(x=x_in).get() == (2*x_in).get()).all()
-
-
-if __name__ == "__main__":
-    # make sure that import failures get reported, instead of skipping the
-    # tests.
-    if len(sys.argv) > 1:
-        exec(sys.argv[1])
-    else:
-        from pytest import main
-        main([__file__])
-
-# vim: filetype=pyopencl:fdm=marker
diff --git a/test/test_linalg.py b/test/test_linalg.py
new file mode 100644
index 0000000..7448476
--- /dev/null
+++ b/test/test_linalg.py
@@ -0,0 +1,218 @@
+#!/usr/bin/env python
+
+__copyright__ = "Copyright (C) 2020 Matt Wala"
+
+__license__ = """
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+"""
+
+import arrayzy as az
+
+import operator
+import numpy as np
+import sys
+
+import pytest  # noqa
+
+import pyopencl as cl
+import pyopencl.array as cl_array  # noqa
+import pyopencl.cltypes as cltypes  # noqa
+import pyopencl.tools as cl_tools  # noqa
+from pyopencl.tools import (  # noqa
+        pytest_generate_tests_for_pyopencl as pytest_generate_tests)
+
+
+def test_symbolic_array(ctx_factory):
+    cl_ctx = ctx_factory()
+    queue = cl.CommandQueue(cl_ctx)
+    
+    with az.CodeBuilder(queue) as cb:
+        x = cb.argument("x", shape="n", dtype=np.float64)
+        cb.output(x)
+
+    x_in = cl.array.to_device(queue, np.array([1., 2., 3., 4., 5.]))
+    code = cb.build()
+    assert (code.eval_one(x=x_in).get() == x_in.get()).all()
+
+
+@pytest.mark.parametrize("dim", (1, 2, 3))
+def test_transpose(ctx_factory, dim):
+    cl_ctx = ctx_factory()
+    queue = cl.CommandQueue(cl_ctx)
+
+    with az.CodeBuilder(queue) as cb:
+        shape = ("l", "m", "n")[-dim:]
+        x = cb.argument("x", shape=shape, dtype=np.float64)
+        cb.output(x.T)
+
+    x_in = cl.array.to_device(queue,
+            np.arange(24, dtype=float).reshape((2, 3, -1)[-dim:]))
+
+    code = cb.build()
+    assert (code.eval_one(x=x_in).get() == x_in.get().T).all()
+
+
+@pytest.mark.parametrize("which", ("add", "sub", "mul", "truediv"))
+@pytest.mark.parametrize("reverse", (False, True))
+def test_scalar_array_binary_arith(ctx_factory, which, reverse):
+    cl_ctx = ctx_factory()
+    queue = cl.CommandQueue(cl_ctx)
+
+    op = getattr(operator, which)
+    if reverse:
+        op_orig = op
+        op = lambda x, y: op_orig(y, x)
+
+    with az.CodeBuilder(queue) as cb:
+        x = cb.argument("x", shape="n", dtype=np.float64)
+        cb.output(op(2, x))
+
+    x_in = np.array([1., 2., 3., 4., 5.])
+    code = cb.build()
+    assert (code.eval_one(x=x_in) == op(2, x_in)).all()
+
+
+@pytest.mark.parametrize("which", ("add", "sub", "mul", "truediv"))
+def test_array_array_binary_arith(ctx_factory, which):
+    cl_ctx = ctx_factory()
+    queue = cl.CommandQueue(cl_ctx)
+
+    op = getattr(operator, which)
+
+    with az.CodeBuilder(queue) as cb:
+        x = cb.argument("x", shape="n", dtype=np.float64)
+        y = cb.argument("y", shape="n", dtype=np.float64)
+        cb.output(op(x, y))
+
+    x_in = np.array([1., 2., 3., 4., 5.])
+    y_in = np.array([6., 7., 8., 9., 19.])
+    code = cb.build()
+    assert (code.eval_one(x=x_in, y=y_in) == op(x_in, y_in)).all()
+
+
+@pytest.mark.parametrize("which", ("neg",))
+def test_unary_arith(ctx_factory, which):
+    cl_ctx = ctx_factory()
+    queue = cl.CommandQueue(cl_ctx)
+
+    op = getattr(operator, which)
+
+    with az.CodeBuilder(queue) as cb:
+        x = cb.argument("x", shape="n", dtype=np.float64)
+        cb.output(op(x))
+
+    x_in = np.array([1., 2., 3., 4., 5.])
+    code = cb.build()
+    assert (code.eval_one(x=x_in) == op(x_in)).all()
+
+
+def test_matmul(ctx_factory):
+    cl_ctx = ctx_factory()
+    queue = cl.CommandQueue(cl_ctx)
+
+    with az.CodeBuilder(queue) as cb:
+        x = cb.argument("x", shape=("m, n"), dtype=np.float64)
+        y = cb.argument("y", shape=("n, k"), dtype=np.float64)
+        z = cb.matmul(x, y)
+        cb.output(z)
+
+    x_in = cl.array.to_device(queue,
+            np.array([[1, 2, 3], [4, 5, 6]], dtype=float))
+    y_in = cl.array.to_device(queue,
+            np.array([[2, 5], [5, 1], [1, 2]], dtype=float))
+
+    code = cb.build()
+    print(code.program)
+    assert (
+            code.eval_one(x=x_in, y=y_in).get()
+            == x_in.get() @ y_in.get()).all()
+
+
+@pytest.mark.parametrize("shift", (-1, 1, -20, 20))
+def test_roll(ctx_factory, shift):
+    cl_ctx = ctx_factory()
+    queue = cl.CommandQueue(cl_ctx)
+
+    with az.CodeBuilder(queue) as cb:
+        x = cb.argument("x", shape=("n",), dtype=np.float64)
+        cb.output(cb.roll(x, shift))
+
+    x_in = cl.array.to_device(queue, np.array([0, 1, 2], dtype=float))
+
+    code = cb.build()
+    assert (code.eval_one(x=x_in).get() == np.roll(x_in.get(), shift)).all()
+
+
+@pytest.mark.parametrize("input_dims", (1, 2, 3))
+def test_stack(ctx_factory, input_dims):
+    cl_ctx = ctx_factory()
+    queue = cl.CommandQueue(cl_ctx)
+
+    for axis in range(input_dims):
+        shape = (2,) * input_dims
+
+        with az.CodeBuilder(queue) as cb:
+            x = cb.argument("x", shape=shape, dtype=np.float64)
+            y = cb.argument("y", shape=shape, dtype=np.float64)
+            cb.output(cb.stack((x, y), axis=axis))
+
+        x_in = np.arange(2 ** input_dims, dtype=float).reshape(shape)
+        y_in = np.arange(1, 1 + 2 ** input_dims, dtype=float).reshape(shape)
+
+        code = cb.build()
+
+        ref = np.stack((x_in, y_in), axis)
+
+        x_in = cl.array.to_device(queue, x_in)
+        y_in = cl.array.to_device(queue, y_in)
+
+        assert (code.eval_one(x=x_in, y=y_in).get() == ref).all()
+
+
+@pytest.mark.parametrize("input_dims", (2, 3))
+def test_slice_along_dim(ctx_factory, input_dims):
+    cl_ctx = ctx_factory()
+    queue = cl.CommandQueue(cl_ctx)
+
+    for axis in range(input_dims):
+        shape = (2,) * input_dims
+        
+        slice_spec = [slice(None, None, None)] * input_dims
+        slice_spec[axis] = 1
+        slice_spec = tuple(slice_spec)
+
+        with az.CodeBuilder(queue) as cb:
+            x = cb.argument("x", shape=shape, dtype=np.float64)
+            cb.output(x[slice_spec])
+
+        x_in = np.arange(2 ** input_dims, dtype=float).reshape(shape)
+        code = cb.build()
+        assert (code.eval_one(x=x_in) == x_in[slice_spec]).all()
+
+
+if __name__ == "__main__":
+    # make sure that import failures get reported, instead of skipping the
+    # tests.
+    if len(sys.argv) > 1:
+        exec(sys.argv[1])
+    else:
+        from pytest import main
+        main([__file__])
+
+# vim: filetype=pyopencl:fdm=marker
-- 
GitLab


From 1a4c0428bcea0f64d7f84b33acb8a7a5d3a1908a Mon Sep 17 00:00:00 2001
From: Matt Wala <wala1@illinois.edu>
Date: Mon, 27 Apr 2020 23:44:09 -0500
Subject: [PATCH 12/46] Make things work, sort of

---
 arrayzy/__init__.py |   2 +-
 arrayzy/array.py    | 411 ++++++++++++++++++++++++++++++++++++++++----
 arrayzy/code.py     | 369 ---------------------------------------
 arrayzy/program.py  |  78 +++++++++
 doc/array.rst       |   4 +
 doc/index.rst       |   3 +-
 doc/program.rst     |   4 +
 test/test_linalg.py | 140 ++++++++-------
 8 files changed, 533 insertions(+), 478 deletions(-)
 delete mode 100644 arrayzy/code.py
 create mode 100644 arrayzy/program.py
 create mode 100644 doc/array.rst
 create mode 100644 doc/program.rst

diff --git a/arrayzy/__init__.py b/arrayzy/__init__.py
index af107be..989c2fe 100644
--- a/arrayzy/__init__.py
+++ b/arrayzy/__init__.py
@@ -1 +1 @@
-from arrayzy.code import Code, CodeBuilder
+from arrayzy.array import Context
diff --git a/arrayzy/array.py b/arrayzy/array.py
index 3158061..bd44e26 100644
--- a/arrayzy/array.py
+++ b/arrayzy/array.py
@@ -21,48 +21,65 @@ THE SOFTWARE.
 """
 
 import operator
+from functools import partialmethod
 
-import islpy as isl
 import loopy as lp
+import islpy as isl
 import numpy as np
-
+import loopy.symbolic as sym
 import pymbolic
 import pymbolic.primitives as prim
 from pytools import memoize_method
 
-from functools import partialmethod
+from arrayzy.program import BoundProgram, PyOpenCLTarget
 from arrayzy.utils import domain_for_shape
 
 
+__doc__ = """
+.. autoclass:: Array
+.. autoclass:: ArrayExpression
+.. autoclass:: ArrayVariable
+
+.. autoclass:: Context
+"""
+
+
 class Array:
-    """A representation of an array value in a computation.
+    """A representation of an array computation in a :mod:`loopy` kernel.
 
     .. attribute:: ctx
 
-        The :class:`Context` with which this array is associated, to
-        define the meanings of names used in expressions in this object.
+        The :class:`Context` with which this array is associated, to define the
+        meanings of names used in expressions in this object.
 
-    .. attribute:: dtype
+    .. attribute:: shape
 
-        An instance of :class:`loopy.types.LoopyType` or *None* to indicate
-        that the type of the array is not yet known.
+        A tuple of :mod:`pymbolic` expressions or constants representing the
+        shape of the array. Only quasi-affine expressions are supported.
 
-    .. attribute:: shape
+    .. attribute:: dtype
+
+        An instance of :class:`loopy.types.LoopyType` or *None* to indicate that
+        the type of the array is not yet known.
 
     .. attribute:: name
 
-        A name for this computation, or *None*.
+        A name for this computation, or *None*. This is not necessarily a
+        variable name, as there may be no defined storage for the array.
 
     .. attribute:: ndim
 
         The number of dimensions in :attr:`shape`.
 
-    .. automethod:: T
+    .. attribute:: T
 
-    .. automethod:: to_loopy_expression
+    .. attribute:: dims
+    .. attribute:: dim_names
 
     .. automethod:: get_domain
+    .. automethod:: to_loopy_expression
 
+    .. automethod:: __getitem__
     .. automethod:: __mul__
     .. automethod:: __rmul__
     .. automethod:: __add__
@@ -73,23 +90,18 @@ class Array:
     .. automethod:: __rtruediv__
     .. automethod:: __neg__
 
-    .. automethod:: __getitem__
     """
 
-    def __init__(self, ctx, shape, dtype):
+    def __init__(self, ctx, shape, dtype, name):
         self.ctx = ctx
         self._dtype = dtype
         self.shape = shape
+        self.name = name
 
     @property
     def ndim(self):
         return len(self.shape)
 
-    def _infer_type(self, expr):
-        from loopy.type_inference import TypeInferenceMapper
-        mapper = TypeInferenceMapper(self.ctx.program)
-        return mapper(expr)
-
     def _unary_op(self, op):
         new_expr = op(self.to_loopy_expression(self.dims))
         return ArrayExpression(
@@ -103,12 +115,12 @@ class Array:
             args = (self.to_loopy_expression(self.dims), other)
             if reverse:
                 args = tuple(reversed(args))
-                
+
             new_expr = op(*args)
             return ArrayExpression(
                     self.ctx,
                     self.shape,
-                    self._infer_type(new_expr),
+                    self.ctx.unify_types(self.dtype, type(other)),
                     new_expr)
 
         elif isinstance(other, Array):
@@ -122,9 +134,9 @@ class Array:
             return ArrayExpression(
                     self.ctx,
                     self.shape,
-                    self._infer_type(new_expr),
+                    self.ctx.unify_types(self.dtype, other.dtype),
                     new_expr)
-            
+
         else:
             raise ValueError
 
@@ -133,7 +145,7 @@ class Array:
 
     __add__ = partialmethod(_binary_op, operator.add)
     __radd__ = partialmethod(__add__, reverse=True)
-    
+
     __sub__ = partialmethod(_binary_op, operator.sub)
     __rsub__ = partialmethod(__sub__, reverse=True)
 
@@ -144,13 +156,13 @@ class Array:
 
     def __getitem__(self, slice_spec):
         from numbers import Integral
-        
+
         if len(slice_spec) != self.ndim:
             raise ValueError("incorrect slice shape")
 
         dims = []
         shape = []
-        
+
         for i, elem in enumerate(slice_spec):
             if elem == slice(None, None, None):
                 d = len(shape)
@@ -187,6 +199,8 @@ class Array:
 
     def get_domain(self, dim_names=None):
         """Return the domain that defines the space of indices for this array.
+
+        :arg dim_names: if not *None*, the names of the dimensions of the array
         """
         if dim_names is None:
             dim_names = self.dim_names
@@ -194,6 +208,7 @@ class Array:
 
     @property
     def T(self):
+        """Return the transpose of the array of the array."""
         new_shape = tuple(reversed(self.shape))
         new_expr = self.to_loopy_expression(list(reversed(self.dims)))
         return ArrayExpression(self.ctx, new_shape, self.dtype, new_expr)
@@ -206,12 +221,14 @@ class ArrayExpression(Array):
     """An array-valued expression.
 
     .. attribute:: expression
+
+        A :mod:`pymbolic` expression for the array.
     """
 
-    def __init__(self, ctx, shape, dtype, expression):
-        super().__init__(ctx, shape, dtype)
+    def __init__(self, ctx, shape, dtype, expression, name=None):
+        super().__init__(ctx, shape, dtype, name)
         self.expression = expression
-        
+
     def to_loopy_expression(self, dims):
         assignments = dict(zip(self.dim_names, tuple(dims)))
         return pymbolic.substitute(self.expression, assignments)
@@ -219,16 +236,342 @@ class ArrayExpression(Array):
 
 class ArrayVariable(Array):
     """An array-valued variable.
-
-    .. attribute:: name
     """
 
     def __init__(self, ctx, shape, dtype, name):
-        super().__init__(ctx, shape, dtype)
-        self.name = name
+        super().__init__(ctx, shape, dtype, name)
 
     def to_loopy_expression(self, dims):
         return prim.Variable(self.name)[tuple(dims)]
 
 
+class Context:
+    """An interface for building up array computations.
+
+    .. automethod:: __enter__
+    .. automethod:: __exit__
+
+    .. attribute:: program
+
+        A :class:`lp.LoopKernel` that defines names used in expressions in
+        :class:`Array` instances attached to this code. Names defined once will
+        never change their meaning, for the lifetime of this object.
+
+    .. automethod:: build
+    .. automethod:: argument
+    .. automethod:: bind
+    .. automethod:: roll
+    .. automethod:: stack
+    .. automethod:: matmul
+    .. automethod:: output
+    """
+
+    def __init__(self, arg):
+        import sys
+
+        self.target = None
+        self.queue = None
+
+        # avoid expensive/failing import
+        if "pyopencl" in sys.modules:
+            import pyopencl as cl
+            if isinstance(arg, cl.CommandQueue):
+                target = PyOpenCLTarget(arg)
+                self.queue = arg
+
+        if target is None:
+            raise ValueError(f"invalid argument type: {type(arg).__name__}")
+
+        import loopy as lp
+        program = lp.make_kernel("{:}", [], target=target.get_loopy_target(),
+                                 lang_version=lp.MOST_RECENT_LANGUAGE_VERSION)
+
+        self.bound_arguments = {}
+        self.program = program
+        self.var_name_gen = self.program.get_var_name_generator()
+        self.insn_id_gen = self.program.get_instruction_id_generator()
+        self._last_insn_id = None
+
+    def __enter__(self):
+        return self
+
+    def __exit__(self, exc_type, exc_value, traceback):
+        pass
+
+    def build(self):
+        """Return a generated :class:`BoundProgram`.
+        """
+        return BoundProgram(self.program, self.queue,
+                            self.bound_arguments.copy(), self.target)
+
+    @property
+    def program(self):
+        return self._program
+
+    @program.setter
+    def program(self, program):
+        self._program = program
+
+    # {{{ internal api
+
+    def unify_types(self, *args):
+        # Unifies the list of types.
+        # TODO: does not handle None
+        result = args[0]
+        for arg in args[1:]:
+            result = (
+                    np.empty(0, dtype=result)
+                    + np.empty(0, dtype=arg
+                    )).dtype
+        return result
+
+    def _get_or_create_parameter(self, name):
+        # Create a parameter variable for *name* if not present.
+        # Return the parameter as a pymbolic variable.
+
+        if name in self.program.all_variable_names():
+            if name not in self.program.all_params():
+                # FIXME: May have been added, but not added to a domain.
+                pass
+        else:
+            self.program = self.program.copy(
+                    args=self.program.args + [
+                        lp.ValueArg(name, dtype=self.program.index_dtype)
+                    ])
+
+        return prim.Variable(name)
+
+    def _get_dependencies(self, expr):
+        # Get the dependency set of an expression.
+        # FIXME: this needs to be implemented properly.
+        if not self._last_insn_id:
+            return frozenset([])
+        return frozenset([self._last_insn_id])
+
+    def _assume_equal(self, lhs, rhs):
+        # Add the assumption *lhs = rhs* to the kernel.
+        param_names = set()
+        for sdep in map(sym.get_dependencies, (lhs, rhs)):
+            param_names |= sdep
+
+        # Build domain.
+        dom = isl.BasicSet.universe(
+                isl.Space.create_from_names(
+                    isl.DEFAULT_CONTEXT,
+                    set=(),
+                    params=param_names))
+
+        aff_lhs = sym.aff_from_expr(dom.space, lhs)
+        aff_rhs = sym.aff_from_expr(dom.space, rhs)
+
+        dom &= aff_lhs.eq_set(aff_rhs)
+
+        # TODO: Throw an error if the assumption leads to a contradiction.
+        dom, = dom.get_basic_sets()
+
+        self.program = lp.assume(self.program, dom)
+
+    def _make_array_assignment(self, name, dim_names, expr, within_inames):
+        # Add an instruction that assigns to an array.  *dim_names* should be a
+        # list of strings which are the inames for the dimensions.
+        from loopy.kernel.instruction import make_assignment
+        insn_id = self.insn_id_gen(f"_store{name}")
+        out_insn = make_assignment(
+                (
+                    prim.Variable(name)[
+                        tuple(map(prim.Variable, dim_names))],),
+                expr,
+                id=insn_id,
+                within_inames=frozenset(within_inames),
+                depends_on=self._get_dependencies(expr))
+
+        self.program = self.program.copy(
+                instructions=self.program.instructions + [out_insn])
+        self._last_insn_id = insn_id
+
+    # }}}
+
+    # {{{ user interface
+
+    def argument(self, name, shape, dtype, order="C"):
+        """Append an argument to the program.
+
+        :returns: a :class:`ArrayVariable`
+        """
+        if name in self.program.all_variable_names():
+            raise ValueError(f"name '{name}' already in use in code")
+
+        arg = lp.GlobalArg(name, shape=shape, dtype=dtype, order=order)
+
+        shape = arg.shape
+        dtype = arg.dtype
+
+        # Insert parameters from shape description.
+        for sdep in map(sym.get_dependencies, arg.shape):
+            for dep in sdep:
+                self._get_or_create_parameter(dep)
+
+        # Add argument to program.
+        self.program = self.program.copy(args=self.program.args + [arg])
+
+        return ArrayVariable(self, shape, dtype, name)
+
+    def bind(self, arg, val):
+        """Bind an argument to a value.
+
+        :arg arg: a :class:`ArrayVariable` representing an argument
+        """
+        self.bound_arguments[arg.name] = val
+
+    def roll(self, a, shift, name=None):
+        """Roll elements along the given axis.
+
+        :returns: a :class:`Array`
+        """
+        assert a.ndim == 1
+        idx = (prim.Variable("_0") - shift) % a.shape[0]
+        # It's not immediately clear what loopy-generated code guarantees about
+        # the sign of the result of the % operator, so if the result is negative
+        # we shift it to be positive.
+        idx = idx + prim.If(prim.Comparison(idx, "<", 0), a.shape[0], 0)
+        expr = a.to_loopy_expression((idx,))
+        return ArrayExpression(self, a.shape, a.dtype, expr)
+
+    def stack(self, arrays, axis=0, name=None):
+        """Join a sequence of arrays along a new axis.
+
+        :arg axis: the index of the axis
+
+        :returns: a :class:`Array`
+        """
+        for array in arrays[1:]:
+            if array.ndim != arrays[0].ndim:
+                raise ValueError("arrays must have same dimension")
+            for array_dim, array0_dim in zip(array.shape, arrays[0].shape):
+                self._assume_equal(array_dim, array0_dim)
+
+        if name is None:
+            name = "_stack"
+
+        name = self.var_name_gen(name)
+
+        out_inames = []
+        for i in range(arrays[0].ndim):
+            if i >= axis:
+                i += 1
+            out_inames.append(f"{name}_dim{i}")
+
+        domain = domain_for_shape(out_inames, arrays[0].shape)
+
+        out_shape = list(arrays[0].shape)
+        out_shape.insert(axis, len(arrays))
+        out_shape = tuple(out_shape)
+
+        out_dtype = self.unify_types(*(a.dtype for a in arrays))
+
+        out = lp.TemporaryVariable(
+                name, dtype=out_dtype,
+                shape=out_shape,
+                address_space=lp.AddressSpace.GLOBAL)
+
+        # Create an output instruction for each input array.
+        from loopy.kernel.instruction import make_assignment
+        out_insns = []
+        for i in range(len(arrays)):
+            indices = list(map(prim.Variable, out_inames))
+            expr = arrays[i].to_loopy_expression(indices)
+            indices.insert(axis, i)
+            indices = tuple(indices)
+            insn_id = self.insn_id_gen(f"{name}_copy{i}")
+            out_insn = make_assignment(
+                    (prim.Variable(name)[indices],),
+                    expr,
+                    id=insn_id,
+                    within_inames=frozenset(out_inames),
+                    depends_on=self._get_dependencies(expr))
+            self._last_insn_id = insn_id
+            out_insns.append(out_insn)
+
+        new_tv = self.program.temporary_variables.copy()
+        new_tv[name] = out
+
+        self.program = self.program.copy(
+                temporary_variables=new_tv,
+                domains=self.program.domains + [domain],
+                instructions=self.program.instructions + out_insns)
+
+        return ArrayVariable(self, out_shape, out_dtype, name)
+
+    def matmul(self, a, b, name=None):
+        """Multiply matrix *a* by *b*.
+        """
+        # Generate a temporay for the matrix-matrix multiplication.
+        self._assume_equal(a.shape[1], b.shape[0])
+        if name is None:
+            name = "_matmul"
+        name = self.var_name_gen(name)
+
+        out_shape = (a.shape[0], b.shape[1])
+        out_inames = (f"{name}_dim0", f"{name}_dim1")
+        out_red_iname = self.var_name_gen(f"{name}_reduce")
+        out_dtype = self.unify_types(a.dtype, b.dtype)
+
+        out = lp.TemporaryVariable(
+                name, dtype=out_dtype,
+                shape=(a.shape[0], b.shape[1]),
+                address_space=lp.AddressSpace.GLOBAL)
+
+        import loopy.library.reduction as red
+
+        a_inames = tuple(map(prim.Variable, (out_inames[0], out_red_iname)))
+        b_inames = tuple(map(prim.Variable, (out_red_iname, out_inames[1])))
+
+        expr = a.to_loopy_expression(a_inames) * b.to_loopy_expression(b_inames)
+
+        out_expr = sym.Reduction(
+                operation=red.parse_reduction_op("sum"),
+                inames=(out_red_iname,),
+                expr=expr,
+                allow_simultaneous=False)
+
+        domain = domain_for_shape(
+                out_inames + (out_red_iname,),
+                out_shape + (a.shape[1],))
+
+        new_tv = self.program.temporary_variables.copy()
+        new_tv[name] = out
+
+        self.program = self.program.copy(
+                temporary_variables=new_tv,
+                domains=self.program.domains + [domain])
+
+        self._make_array_assignment(
+                name, dim_names=out_inames, expr=out_expr,
+                within_inames=frozenset(out_inames))
+
+        return ArrayVariable(self, out_shape, out_dtype, name)
+
+    def output(self, expr):
+        """Copy *expr* to an output variable.
+
+        :returns: a :class:`Array`
+        """
+        name = self.var_name_gen("_out")
+        out = lp.GlobalArg(name, shape=expr.shape, dtype=expr.dtype, order="C")
+        out_inames = [
+                self.var_name_gen(f"{name}_dim{d}") for d in range(expr.ndim)]
+        out_expr = expr.to_loopy_expression(map(prim.Variable, out_inames))
+
+        domain = expr.get_domain(out_inames)
+
+        self.program = self.program.copy(
+                args=self.program.args + [out],
+                domains=self.program.domains + [domain])
+
+        self._make_array_assignment(
+                name, dim_names=out_inames, expr=out_expr,
+                within_inames=frozenset(out_inames))
+
+    # }}}
+
 # vim: foldmethod=marker
diff --git a/arrayzy/code.py b/arrayzy/code.py
deleted file mode 100644
index b7cb9d8..0000000
--- a/arrayzy/code.py
+++ /dev/null
@@ -1,369 +0,0 @@
-import loopy as lp
-import loopy.symbolic as sym
-
-import islpy as isl
-import pymbolic.primitives as prim
-
-from arrayzy.array import ArrayVariable, ArrayExpression
-from arrayzy.utils import domain_for_shape
-
-
-class Target:
-    pass
-
-
-class PyOpenCLTarget:
-
-    def __init__(self, queue):
-        self.queue = queue
-
-    def get_loopy_target(self):
-        import loopy as lp
-        return lp.PyOpenCLTarget(self.queue.device)
-
-
-class Code:
-    """A representation of a program carrying out an array-valued computation.
-
-    .. attribute:: program
-
-        A :mod:`loopy.LoopKernel` that defines names used in expressions in
-        :class:`Array` instances attached to this code. Names defined once will
-        never change their meaning, for the lifetime of this object.
-
-    .. attribute:: bindings
-
-        A mapping from argument names to bound values.
-
-    .. attribute:: bound_arguments
-
-    .. attribute:: target
-
-        A :class:`Target` for code generation and execution.
-
-    .. note::
-
-        *program* may not define any names starting with underscores.
-    """
-
-    def __init__(self, queue, program, bindings, target):
-        self.queue = queue
-        self._program = program
-        self.bindings = bindings
-        self.target = target
-        self.bound_arguments = {}
-
-    # The 'program' attribute is only supposed to be modified via
-    # update_program.
-    @property
-    def program(self):
-        return self._program
-
-    def update_program(self, program):
-        self._program = program
-
-    def eval_one(self, *args, **kwargs):
-        _, (result,) = self(*args, **kwargs)
-        return result
-
-    def __call__(self, *args, **kwargs):
-        kwargs.update(self.bound_arguments)
-        return self.program(self.queue, *args, **kwargs)
-
-
-class CodeBuilder:
-    """An interface for building up array computations.
-
-    .. automethod:: __enter__
-    .. automethod:: __exit__
-    """
-
-    def __init__(self, arg):
-        import sys
-
-        target = None
-
-        # avoid expensive/failing import
-        if "pyopencl" in sys.modules:
-            import pyopencl as cl
-            if isinstance(arg, cl.CommandQueue):
-                target = PyOpenCLTarget(arg)
-
-        if target is None:
-            raise ValueError(f"invalid argument type: {type(arg).__name__}")
-
-        import loopy as lp
-        program = lp.make_kernel(
-                "{:}", [], target=target.get_loopy_target(),
-                lang_version=lp.MOST_RECENT_LANGUAGE_VERSION)
-        self.code = Code(arg, program, {}, target)
-
-        self.var_name_gen = self.program.get_var_name_generator()
-        self.insn_id_gen = self.program.get_instruction_id_generator()
-        self._last_insn_id = None
-
-    def __enter__(self):
-        return self
-
-    def __exit__(self, exc_type, exc_value, traceback):
-        pass
-
-    def build(self):
-        """Return the generated :class:`Code`.
-        """
-        return self.code
-
-    @property
-    def program(self):
-        return self.code.program
-
-    @property
-    def update_program(self):
-        return self.code.update_program
-
-    # {{{
-
-    def _unify_types(self, *args):
-        if not all(args[0] == arg for arg in args[1:]):
-            raise NotImplementedError
-        return args[0]
-
-    def _get_or_create_parameter(self, name):
-        # Create a parameter variable for *name* if not present.
-        # Return the parameter as a pymbolic variable.
-
-        if name in self.program.all_variable_names():
-            if name not in self.program.all_params():
-                # FIXME: May have been added, but not added to a domain.
-                pass
-        else:
-            self.update_program(
-                    self.program.copy(
-                        args=self.program.args + [
-                                lp.ValueArg(name, dtype=self.program.index_dtype)
-                            ]))
-
-        return prim.Variable(name)
-
-    def _get_dependencies(self, expr):
-        # FIXME: this needs to be implemented properly.
-        if not self._last_insn_id:
-            return frozenset([])
-        return frozenset([self._last_insn_id])
-
-    def _assume_equal(self, lhs, rhs):
-        """Adds the assumption "lhs = rhs"."""
-        param_names = set()
-        for sdep in map(sym.get_dependencies, (lhs, rhs)):
-            param_names |= sdep
-
-        # Build domain.
-        dom = isl.BasicSet.universe(
-                isl.Space.create_from_names(
-                    isl.DEFAULT_CONTEXT,
-                    set=(),
-                    params=param_names))
-
-        aff_lhs = sym.aff_from_expr(dom.space, lhs)
-        aff_rhs = sym.aff_from_expr(dom.space, rhs)
-
-        dom &= aff_lhs.eq_set(aff_rhs)
-
-        # TODO: Throw an error if the assumption leads to a contradiction.
-        dom, = dom.get_basic_sets()
-
-        self.update_program(lp.assume(self.program, dom))
-
-    def _make_array_assignment(self, name, dim_names, expr, within_inames):
-        from loopy.kernel.instruction import make_assignment
-        insn_id = self.insn_id_gen(f"_store{name}")
-        out_insn = make_assignment(
-                (
-                    prim.Variable(name)[
-                        tuple(map(prim.Variable, dim_names))],),
-                expr,
-                id=insn_id,
-                within_inames=frozenset(dim_names),
-                depends_on=self._get_dependencies(expr))
-
-        self.update_program(self.program.copy(
-                instructions=self.program.instructions + [out_insn]))
-        self._last_insn_id = insn_id
-
-    # }}}
-
-    # {{{ user interface
-
-    def argument(self, name, shape, dtype, order="C"):
-        """Append an argument to the program.
-
-        :returns: a :class:`ArrayVariable`
-        """
-        if name in self.program.all_variable_names():
-            raise ValueError(f"name '{name}' already in use in code")
-
-        arg = lp.GlobalArg(name, shape=shape, dtype=dtype, order=order)
-
-        shape = arg.shape
-        dtype = arg.dtype
-
-        # Insert parameters from shape description.
-        for sdep in map(sym.get_dependencies, arg.shape):
-            for dep in sdep:
-                self._get_or_create_parameter(dep)
-
-        # Add argument to program.
-        self.code.update_program(
-                self.program.copy(args=self.program.args + [arg]))
-
-        return ArrayVariable(self.code, shape, dtype, name)
-
-    def bind_argument(self, arg, val):
-        self.code.bound_arguments[arg.name] = val
-
-    def roll(self, a, shift, name=None):
-        """Roll elements along the given axis.
-
-        :returns: a :class:`Array`
-        """
-        assert a.ndim == 1
-        idx = (prim.Variable("_0") - shift) % a.shape[0]
-        # It's not immediately clear what loopy-generated code guarantees about
-        # the sign of the result of the % operator, so if the result is negative
-        # we shift it to be positive.
-        idx = idx + prim.If(prim.Comparison(idx, "<", 0), a.shape[0], 0)
-        expr = a.to_loopy_expression((idx,))
-        return ArrayExpression(self.code, a.shape, a.dtype, expr)
-
-    def stack(self, arrays, axis=0, name=None):
-        """Join a sequence of arrays along a new axis.
-
-        :arg axis: the index of the axis
-
-        :returns: a :class:`Array`
-        """
-        for array in arrays[1:]:
-            if array.ndim != arrays[0].ndim:
-                raise ValueError("arrays must have same dimension")
-            for array_dim, array0_dim in zip(array.shape, arrays[0].shape):
-                self._assume_equal(array_dim, array0_dim)
-
-        if name is None:
-            name = "_stack"
-
-        name = self.var_name_gen(name)
-
-        out_inames = []
-        for i in range(arrays[0].ndim):
-            if i >= axis:
-                i += 1
-            out_inames.append(f"{name}_dim{i}")
-
-        domain = domain_for_shape(out_inames, arrays[0].shape)
-
-        out_shape = list(arrays[0].shape)
-        out_shape.insert(axis, len(arrays))
-        out_shape = tuple(out_shape)
-
-        out_dtype = self._unify_types(*(a.dtype for a in arrays))
-
-        out = lp.TemporaryVariable(
-                name, dtype=out_dtype,
-                shape=out_shape,
-                address_space=lp.AddressSpace.GLOBAL)
-
-        # Create an output instruction for each input array.
-        from loopy.kernel.instruction import make_assignment
-        out_insns = []
-        for i in range(len(arrays)):
-            indices = list(map(prim.Variable, out_inames))
-            expr = arrays[i].to_loopy_expression(indices)
-            indices.insert(axis, i)
-            indices = tuple(indices)
-            insn_id = self.insn_id_gen(f"{name}_copy{i}")
-            out_insn = make_assignment(
-                    (prim.Variable(name)[indices],),
-                    expr,
-                    id=insn_id,
-                    within_inames=frozenset(out_inames),
-                    depends_on=self._get_dependencies(expr))
-            self._last_insn_id = insn_id
-            out_insns.append(out_insn)
-
-        new_tv = self.program.temporary_variables.copy()
-        new_tv[name] = out
-
-        self.update_program(self.program.copy(
-                temporary_variables=new_tv,
-                domains=self.program.domains + [domain],
-                instructions=self.program.instructions + out_insns))
-
-        return ArrayVariable(self.code, out_shape, out_dtype, name)
-
-    def matmul(self, a, b, name=None):
-        # Generate a temporay for the matrix-matrix multiplication.
-        self._assume_equal(a.shape[1], b.shape[0])
-        if name is None:
-            name = "_matmul"
-        name = self.var_name_gen(name)
-
-        out_shape = (a.shape[0], b.shape[1])
-        out_inames = (f"{name}_dim0", f"{name}_dim1")
-        out_red_iname = self.var_name_gen(f"{name}_reduce")
-        out_dtype = self._unify_types(a.dtype, b.dtype)
-
-        out = lp.TemporaryVariable(
-                name, dtype=out_dtype,
-                shape=(a.shape[0], b.shape[1]),
-                address_space=lp.AddressSpace.GLOBAL)
-
-        import loopy.library.reduction as red
-
-        a_inames = tuple(map(prim.Variable, (out_inames[0], out_red_iname)))
-        b_inames = tuple(map(prim.Variable, (out_red_iname, out_inames[1])))
-
-        expr = a.to_loopy_expression(a_inames) * b.to_loopy_expression(b_inames)
-
-        out_expr = sym.Reduction(
-                operation=red.parse_reduction_op("sum"),
-                inames=(out_red_iname,),
-                expr=expr,
-                allow_simultaneous=False)
-
-        domain = domain_for_shape(
-                out_inames + (out_red_iname,),
-                out_shape + (a.shape[1],))
-
-        new_tv = self.program.temporary_variables.copy()
-        new_tv[name] = out
-
-        self.update_program(self.program.copy(
-                temporary_variables=new_tv,
-                domains=self.program.domains + [domain]))
-
-        self._make_array_assignment(
-                name, dim_names=out_inames, expr=out_expr,
-                within_inames=frozenset(out_inames))
-
-        return ArrayVariable(self.code, out_shape, out_dtype, name)
-
-    def output(self, expr):
-        name = self.var_name_gen("_out")
-        out = lp.GlobalArg(name, shape=expr.shape, dtype=expr.dtype, order="C")
-        out_inames = [
-                self.var_name_gen(f"{name}_dim{d}") for d in range(expr.ndim)]
-        out_expr = expr.to_loopy_expression(map(prim.Variable, out_inames))
-
-        domain = expr.get_domain(out_inames)
-
-        self.update_program(self.program.copy(
-                args=self.program.args + [out],
-                domains=self.program.domains + [domain]))
-
-        self._make_array_assignment(
-                name, dim_names=out_inames, expr=out_expr,
-                within_inames=frozenset(out_inames))
-
-    # }}}
-
-Context = CodeBuilder
diff --git a/arrayzy/program.py b/arrayzy/program.py
new file mode 100644
index 0000000..b77b92d
--- /dev/null
+++ b/arrayzy/program.py
@@ -0,0 +1,78 @@
+__copyright__ = """Copyright (C) 2020 Andreas Kloeckner"""
+
+__license__ = """
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+"""
+
+
+import loopy as lp
+from pytools import RecordWithoutPickling
+
+
+class Target:
+    pass
+
+
+class PyOpenCLTarget:
+
+    def __init__(self, queue):
+        self.queue = queue
+
+    def get_loopy_target(self):
+        import loopy as lp
+        return lp.PyOpenCLTarget(self.queue.device)
+
+
+class BoundProgram(RecordWithoutPickling):
+    """A wrapper around a :mod:`loopy` kernel with additional context.
+
+    .. attribute:: program
+
+        The underlying :class:`lp.LoopKernel`.
+
+    .. attribute:: queue
+
+        If not *None*, a :mod:`pyopencl` command queue for the program.
+
+    .. attribute:: bound_arguments
+
+        A map from names to pre-bound kernel arguments.
+
+    .. attribute:: target
+
+        A :class:`Target` for code generation and execution.
+
+    """
+
+    def __init__(self, program, queue, bound_arguments, target):
+        super().__init__(program=program, queue=queue,
+                 bound_arguments=bound_arguments, target=target)
+        
+    def __call__(self, *args, **kwargs):
+        """Convenience function for launching a :mod:`pyopencl` computation."""
+        if not self.queue:
+            raise ValueError("queue must be specified")
+        
+        updated_kwargs = self.bound_arguments.copy()
+        updated_kwargs.update(kwargs)
+        return self.program(self.queue, *args, **updated_kwargs)
+
+
+# vim: foldmethod=marker
+    
diff --git a/doc/array.rst b/doc/array.rst
new file mode 100644
index 0000000..23fc13d
--- /dev/null
+++ b/doc/array.rst
@@ -0,0 +1,4 @@
+Building array computations
+===========================
+
+.. automodule:: arrayzy.array
diff --git a/doc/index.rst b/doc/index.rst
index 298bef1..306b5cf 100644
--- a/doc/index.rst
+++ b/doc/index.rst
@@ -10,7 +10,8 @@ Welcome to Arrayzy's documentation!
    :maxdepth: 2
    :caption: Contents:
 
-
+   array
+   program
 
 Indices and tables
 ==================
diff --git a/doc/program.rst b/doc/program.rst
new file mode 100644
index 0000000..7c96f9f
--- /dev/null
+++ b/doc/program.rst
@@ -0,0 +1,4 @@
+Generated programs
+==================
+
+.. autoclass:: arrayzy.program.BoundProgram
diff --git a/test/test_linalg.py b/test/test_linalg.py
index 7448476..4172d4d 100644
--- a/test/test_linalg.py
+++ b/test/test_linalg.py
@@ -38,17 +38,23 @@ from pyopencl.tools import (  # noqa
         pytest_generate_tests_for_pyopencl as pytest_generate_tests)
 
 
+def eval_one(prog, **kwargs):
+    # Evaluate and return the unique output.
+    _, (out,) = prog(**kwargs)
+    return out
+
+
 def test_symbolic_array(ctx_factory):
     cl_ctx = ctx_factory()
     queue = cl.CommandQueue(cl_ctx)
     
-    with az.CodeBuilder(queue) as cb:
-        x = cb.argument("x", shape="n", dtype=np.float64)
-        cb.output(x)
+    with az.Context(queue) as ctx:
+        x = ctx.argument("x", shape="n", dtype=np.float64)
+        ctx.output(x)
 
-    x_in = cl.array.to_device(queue, np.array([1., 2., 3., 4., 5.]))
-    code = cb.build()
-    assert (code.eval_one(x=x_in).get() == x_in.get()).all()
+    x = np.array([1., 2., 3., 4., 5.])
+    prog = ctx.build()
+    assert (eval_one(prog, x=x) == x).all()
 
 
 @pytest.mark.parametrize("dim", (1, 2, 3))
@@ -56,16 +62,15 @@ def test_transpose(ctx_factory, dim):
     cl_ctx = ctx_factory()
     queue = cl.CommandQueue(cl_ctx)
 
-    with az.CodeBuilder(queue) as cb:
+    with az.Context(queue) as ctx:
         shape = ("l", "m", "n")[-dim:]
-        x = cb.argument("x", shape=shape, dtype=np.float64)
-        cb.output(x.T)
+        x = ctx.argument("x", shape=shape, dtype=np.float64)
+        ctx.output(x.T)
 
-    x_in = cl.array.to_device(queue,
-            np.arange(24, dtype=float).reshape((2, 3, -1)[-dim:]))
+    x = np.arange(24, dtype=np.float64).reshape((2, 3, -1)[-dim:])
 
-    code = cb.build()
-    assert (code.eval_one(x=x_in).get() == x_in.get().T).all()
+    prog = ctx.build()
+    assert (eval_one(prog, x=x) == x.T).all()
 
 
 @pytest.mark.parametrize("which", ("add", "sub", "mul", "truediv"))
@@ -79,13 +84,13 @@ def test_scalar_array_binary_arith(ctx_factory, which, reverse):
         op_orig = op
         op = lambda x, y: op_orig(y, x)
 
-    with az.CodeBuilder(queue) as cb:
-        x = cb.argument("x", shape="n", dtype=np.float64)
-        cb.output(op(2, x))
+    with az.Context(queue) as ctx:
+        x = ctx.argument("x", shape="n", dtype=np.float64)
+        ctx.output(op(2, x))
 
-    x_in = np.array([1., 2., 3., 4., 5.])
-    code = cb.build()
-    assert (code.eval_one(x=x_in) == op(2, x_in)).all()
+    x = np.array([1., 2., 3., 4., 5.])
+    prog = ctx.build()
+    assert (eval_one(prog, x=x) == op(2, x)).all()
 
 
 @pytest.mark.parametrize("which", ("add", "sub", "mul", "truediv"))
@@ -95,15 +100,15 @@ def test_array_array_binary_arith(ctx_factory, which):
 
     op = getattr(operator, which)
 
-    with az.CodeBuilder(queue) as cb:
-        x = cb.argument("x", shape="n", dtype=np.float64)
-        y = cb.argument("y", shape="n", dtype=np.float64)
-        cb.output(op(x, y))
+    with az.Context(queue) as ctx:
+        x = ctx.argument("x", shape="n", dtype=np.float64)
+        y = ctx.argument("y", shape="n", dtype=np.float64)
+        ctx.output(op(x, y))
 
-    x_in = np.array([1., 2., 3., 4., 5.])
-    y_in = np.array([6., 7., 8., 9., 19.])
-    code = cb.build()
-    assert (code.eval_one(x=x_in, y=y_in) == op(x_in, y_in)).all()
+    x = np.array([1., 2., 3., 4., 5.])
+    y = np.array([6., 7., 8., 9., 19.])
+    prog = ctx.build()
+    assert (eval_one(prog, x=x, y=y) == op(x, y)).all()
 
 
 @pytest.mark.parametrize("which", ("neg",))
@@ -113,35 +118,30 @@ def test_unary_arith(ctx_factory, which):
 
     op = getattr(operator, which)
 
-    with az.CodeBuilder(queue) as cb:
-        x = cb.argument("x", shape="n", dtype=np.float64)
-        cb.output(op(x))
+    with az.Context(queue) as ctx:
+        x = ctx.argument("x", shape="n", dtype=np.float64)
+        ctx.output(op(x))
 
-    x_in = np.array([1., 2., 3., 4., 5.])
-    code = cb.build()
-    assert (code.eval_one(x=x_in) == op(x_in)).all()
+    x = np.array([1., 2., 3., 4., 5.])
+    prog = ctx.build()
+    assert (eval_one(prog, x=x) == op(x)).all()
 
 
 def test_matmul(ctx_factory):
     cl_ctx = ctx_factory()
     queue = cl.CommandQueue(cl_ctx)
 
-    with az.CodeBuilder(queue) as cb:
-        x = cb.argument("x", shape=("m, n"), dtype=np.float64)
-        y = cb.argument("y", shape=("n, k"), dtype=np.float64)
-        z = cb.matmul(x, y)
-        cb.output(z)
+    with az.Context(queue) as ctx:
+        x = ctx.argument("x", shape=("m, n"), dtype=np.float64)
+        y = ctx.argument("y", shape=("n, k"), dtype=np.float64)
+        z = ctx.matmul(x, y)
+        ctx.output(z)
 
-    x_in = cl.array.to_device(queue,
-            np.array([[1, 2, 3], [4, 5, 6]], dtype=float))
-    y_in = cl.array.to_device(queue,
-            np.array([[2, 5], [5, 1], [1, 2]], dtype=float))
+    x = np.array([[1, 2, 3], [4, 5, 6]], dtype=np.float64)
+    y = np.array([[2, 5], [5, 1], [1, 2]], dtype=np.float64)
 
-    code = cb.build()
-    print(code.program)
-    assert (
-            code.eval_one(x=x_in, y=y_in).get()
-            == x_in.get() @ y_in.get()).all()
+    prog = ctx.build()
+    assert (eval_one(prog, x=x, y=y) == x @ y).all()
 
 
 @pytest.mark.parametrize("shift", (-1, 1, -20, 20))
@@ -149,14 +149,14 @@ def test_roll(ctx_factory, shift):
     cl_ctx = ctx_factory()
     queue = cl.CommandQueue(cl_ctx)
 
-    with az.CodeBuilder(queue) as cb:
-        x = cb.argument("x", shape=("n",), dtype=np.float64)
-        cb.output(cb.roll(x, shift))
+    with az.Context(queue) as ctx:
+        x = ctx.argument("x", shape=("n",), dtype=np.float64)
+        ctx.output(ctx.roll(x, shift))
 
-    x_in = cl.array.to_device(queue, np.array([0, 1, 2], dtype=float))
+    x = np.array([0, 1, 2], dtype=np.float64)
 
-    code = cb.build()
-    assert (code.eval_one(x=x_in).get() == np.roll(x_in.get(), shift)).all()
+    prog = ctx.build()
+    assert (eval_one(prog, x=x) == np.roll(x, shift)).all()
 
 
 @pytest.mark.parametrize("input_dims", (1, 2, 3))
@@ -167,22 +167,16 @@ def test_stack(ctx_factory, input_dims):
     for axis in range(input_dims):
         shape = (2,) * input_dims
 
-        with az.CodeBuilder(queue) as cb:
-            x = cb.argument("x", shape=shape, dtype=np.float64)
-            y = cb.argument("y", shape=shape, dtype=np.float64)
-            cb.output(cb.stack((x, y), axis=axis))
-
-        x_in = np.arange(2 ** input_dims, dtype=float).reshape(shape)
-        y_in = np.arange(1, 1 + 2 ** input_dims, dtype=float).reshape(shape)
-
-        code = cb.build()
-
-        ref = np.stack((x_in, y_in), axis)
+        with az.Context(queue) as ctx:
+            x = ctx.argument("x", shape=shape, dtype=np.float64)
+            y = ctx.argument("y", shape=shape, dtype=np.float64)
+            ctx.output(ctx.stack((x, y), axis=axis))
 
-        x_in = cl.array.to_device(queue, x_in)
-        y_in = cl.array.to_device(queue, y_in)
+        x = np.arange(2 ** input_dims, dtype=np.float64).reshape(shape)
+        y = np.arange(1, 1 + 2 ** input_dims, dtype=np.float64).reshape(shape)
 
-        assert (code.eval_one(x=x_in, y=y_in).get() == ref).all()
+        prog = ctx.build()
+        assert (eval_one(prog, x=x, y=y) == np.stack((x, y), axis)).all()
 
 
 @pytest.mark.parametrize("input_dims", (2, 3))
@@ -197,13 +191,13 @@ def test_slice_along_dim(ctx_factory, input_dims):
         slice_spec[axis] = 1
         slice_spec = tuple(slice_spec)
 
-        with az.CodeBuilder(queue) as cb:
-            x = cb.argument("x", shape=shape, dtype=np.float64)
-            cb.output(x[slice_spec])
+        with az.Context(queue) as ctx:
+            x = ctx.argument("x", shape=shape, dtype=np.float64)
+            ctx.output(x[slice_spec])
 
-        x_in = np.arange(2 ** input_dims, dtype=float).reshape(shape)
-        code = cb.build()
-        assert (code.eval_one(x=x_in) == x_in[slice_spec]).all()
+        x = np.arange(2 ** input_dims, dtype=np.float64).reshape(shape)
+        prog = ctx.build()
+        assert (eval_one(prog, x=x) == x[slice_spec]).all()
 
 
 if __name__ == "__main__":
-- 
GitLab


From 756eaef59eb0516a24114a102d0a5ef08b3128be Mon Sep 17 00:00:00 2001
From: Matt Wala <wala1@illinois.edu>
Date: Mon, 27 Apr 2020 23:46:14 -0500
Subject: [PATCH 13/46] Modify advection demo

---
 experiments/advection.py | 15 ++++++---------
 1 file changed, 6 insertions(+), 9 deletions(-)

diff --git a/experiments/advection.py b/experiments/advection.py
index 56fe1c6..e5e435f 100644
--- a/experiments/advection.py
+++ b/experiments/advection.py
@@ -374,7 +374,7 @@ def matrix_getter(name, shape):
     
     def getter(self):
         mat = self.cb.argument(name, shape, np.float64)
-        self.cb.bind_argument(mat, getattr(self.discr, name))
+        self.cb.bind(mat, getattr(self.discr, name))
         return mat
 
     return property(memoized(getter))
@@ -555,21 +555,18 @@ def main():
 
     discr = DGDiscr1D(0, 2*np.pi, nelements=nelements, nnodes=nnodes)
     
-    with az.CodeBuilder(queue) as cb:
+    with az.Context(queue) as cb:
         u = cb.argument("u", shape="(nelements, nnodes)", dtype=np.float64)
         op = AdvectionOperator(
                 discr, c=1, flux_type="central", dg_ops=DGOps1D(discr, cb))
         cb.output(op.apply(u))
 
-    code = cb.build()
-    print(code.program)
-    #import loopy as lp
-    #code.update_program(lp.fix_parameters(code.program, nnodes=nnodes, nelements=nelements))
+    prog = cb.build()
     u = np.sin(discr.nodes())
-    print(code.eval_one(u=u.reshape(nelements, nnodes)))
+    print(prog.program)
+    print(prog(u=u.reshape(nelements, nnodes))[1][0])
 
 
 if __name__ == "__main__":
-    #from pytest import main
-    #main([__file__])
     main()
+
-- 
GitLab


From 0b128689c024998d053423f57196897ba9c99496 Mon Sep 17 00:00:00 2001
From: Matt Wala <wala1@illinois.edu>
Date: Mon, 27 Apr 2020 23:51:12 -0500
Subject: [PATCH 14/46] Add loopy to requirements.txt

---
 requirements.txt | 1 +
 1 file changed, 1 insertion(+)

diff --git a/requirements.txt b/requirements.txt
index 7819c26..a9288c2 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1 +1,2 @@
+git+https://github.com/inducer/loopy.git
 git+https://github.com/inducer/pyopencl.git
-- 
GitLab


From d5afc8042cee397ea86ad108d9b984c770777fc4 Mon Sep 17 00:00:00 2001
From: Matt Wala <wala1@illinois.edu>
Date: Mon, 27 Apr 2020 23:54:31 -0500
Subject: [PATCH 15/46] Flake8 fixes

---
 arrayzy/array.py | 26 ++++++++++++--------------
 1 file changed, 12 insertions(+), 14 deletions(-)

diff --git a/arrayzy/array.py b/arrayzy/array.py
index bd44e26..a2042aa 100644
--- a/arrayzy/array.py
+++ b/arrayzy/array.py
@@ -29,7 +29,6 @@ import numpy as np
 import loopy.symbolic as sym
 import pymbolic
 import pymbolic.primitives as prim
-from pytools import memoize_method
 
 from arrayzy.program import BoundProgram, PyOpenCLTarget
 from arrayzy.utils import domain_for_shape
@@ -59,8 +58,8 @@ class Array:
 
     .. attribute:: dtype
 
-        An instance of :class:`loopy.types.LoopyType` or *None* to indicate that
-        the type of the array is not yet known.
+        An instance of :class:`loopy.types.LoopyType` or *None* to indicate
+        that the type of the array is not yet known.
 
     .. attribute:: name
 
@@ -173,8 +172,8 @@ class Array:
                 dims.append(elem)
             else:
                 raise ValueError("not implemented")
-            # TODO: Not clear how to implement slices with upper bounds that may
-            # exceed the symbolic maximum along the dimension.  What is the
+            # TODO: Not clear how to implement slices with upper bounds that
+            # may exceed the symbolic maximum along the dimension.  What is the
             # shape of the resulting array?
 
         return ArrayExpression(
@@ -185,8 +184,8 @@ class Array:
 
     @property
     def dtype(self):
-        # TODO: The dtype could get out of sync with the kernel if the kernel is
-        # updated. Should the dtype be updated as well?
+        # TODO: The dtype could get out of sync with the kernel if the kernel
+        # is updated. Should the dtype be updated as well?
         return self._dtype
 
     @property
@@ -319,10 +318,7 @@ class Context:
         # TODO: does not handle None
         result = args[0]
         for arg in args[1:]:
-            result = (
-                    np.empty(0, dtype=result)
-                    + np.empty(0, dtype=arg
-                    )).dtype
+            result = (np.empty(0, dtype=result) + np.empty(0, dtype=arg)).dtype
         return result
 
     def _get_or_create_parameter(self, name):
@@ -431,8 +427,8 @@ class Context:
         assert a.ndim == 1
         idx = (prim.Variable("_0") - shift) % a.shape[0]
         # It's not immediately clear what loopy-generated code guarantees about
-        # the sign of the result of the % operator, so if the result is negative
-        # we shift it to be positive.
+        # the sign of the result of the % operator, so if the result is
+        # negative we shift it to be positive.
         idx = idx + prim.If(prim.Comparison(idx, "<", 0), a.shape[0], 0)
         expr = a.to_loopy_expression((idx,))
         return ArrayExpression(self, a.shape, a.dtype, expr)
@@ -526,7 +522,9 @@ class Context:
         a_inames = tuple(map(prim.Variable, (out_inames[0], out_red_iname)))
         b_inames = tuple(map(prim.Variable, (out_red_iname, out_inames[1])))
 
-        expr = a.to_loopy_expression(a_inames) * b.to_loopy_expression(b_inames)
+        expr = (
+                a.to_loopy_expression(a_inames)
+                * b.to_loopy_expression(b_inames))
 
         out_expr = sym.Reduction(
                 operation=red.parse_reduction_op("sum"),
-- 
GitLab


From e084e44bd98a25feb64b67d08823d5fe93f9fd2b Mon Sep 17 00:00:00 2001
From: Matt Wala <wala1@illinois.edu>
Date: Mon, 27 Apr 2020 23:56:10 -0500
Subject: [PATCH 16/46] more flake8 fixes

---
 arrayzy/__init__.py | 2 +-
 arrayzy/program.py  | 8 +++-----
 test/test_linalg.py | 7 ++++---
 3 files changed, 8 insertions(+), 9 deletions(-)

diff --git a/arrayzy/__init__.py b/arrayzy/__init__.py
index 989c2fe..088dc97 100644
--- a/arrayzy/__init__.py
+++ b/arrayzy/__init__.py
@@ -1 +1 @@
-from arrayzy.array import Context
+from arrayzy.array import Context  # noqa
diff --git a/arrayzy/program.py b/arrayzy/program.py
index b77b92d..0378ca4 100644
--- a/arrayzy/program.py
+++ b/arrayzy/program.py
@@ -21,7 +21,6 @@ THE SOFTWARE.
 """
 
 
-import loopy as lp
 from pytools import RecordWithoutPickling
 
 
@@ -62,17 +61,16 @@ class BoundProgram(RecordWithoutPickling):
 
     def __init__(self, program, queue, bound_arguments, target):
         super().__init__(program=program, queue=queue,
-                 bound_arguments=bound_arguments, target=target)
-        
+                         bound_arguments=bound_arguments, target=target)
+
     def __call__(self, *args, **kwargs):
         """Convenience function for launching a :mod:`pyopencl` computation."""
         if not self.queue:
             raise ValueError("queue must be specified")
-        
+
         updated_kwargs = self.bound_arguments.copy()
         updated_kwargs.update(kwargs)
         return self.program(self.queue, *args, **updated_kwargs)
 
 
 # vim: foldmethod=marker
-    
diff --git a/test/test_linalg.py b/test/test_linalg.py
index 4172d4d..1d2cd02 100644
--- a/test/test_linalg.py
+++ b/test/test_linalg.py
@@ -47,7 +47,7 @@ def eval_one(prog, **kwargs):
 def test_symbolic_array(ctx_factory):
     cl_ctx = ctx_factory()
     queue = cl.CommandQueue(cl_ctx)
-    
+
     with az.Context(queue) as ctx:
         x = ctx.argument("x", shape="n", dtype=np.float64)
         ctx.output(x)
@@ -82,7 +82,8 @@ def test_scalar_array_binary_arith(ctx_factory, which, reverse):
     op = getattr(operator, which)
     if reverse:
         op_orig = op
-        op = lambda x, y: op_orig(y, x)
+        def op(x, y):
+            return op_orig(y, x)
 
     with az.Context(queue) as ctx:
         x = ctx.argument("x", shape="n", dtype=np.float64)
@@ -186,7 +187,7 @@ def test_slice_along_dim(ctx_factory, input_dims):
 
     for axis in range(input_dims):
         shape = (2,) * input_dims
-        
+
         slice_spec = [slice(None, None, None)] * input_dims
         slice_spec[axis] = 1
         slice_spec = tuple(slice_spec)
-- 
GitLab


From 56fa8da48f70afbea0868e8b9c66035872b6300a Mon Sep 17 00:00:00 2001
From: Matt Wala <wala1@illinois.edu>
Date: Mon, 27 Apr 2020 23:57:22 -0500
Subject: [PATCH 17/46] Even more flake8 fixes

---
 arrayzy/array.py    | 2 +-
 test/test_linalg.py | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/arrayzy/array.py b/arrayzy/array.py
index a2042aa..fe1a5ab 100644
--- a/arrayzy/array.py
+++ b/arrayzy/array.py
@@ -206,7 +206,7 @@ class Array:
         return domain_for_shape(dim_names, self.shape)
 
     @property
-    def T(self):
+    def T(self):  # noqa
         """Return the transpose of the array of the array."""
         new_shape = tuple(reversed(self.shape))
         new_expr = self.to_loopy_expression(list(reversed(self.dims)))
diff --git a/test/test_linalg.py b/test/test_linalg.py
index 1d2cd02..5393f59 100644
--- a/test/test_linalg.py
+++ b/test/test_linalg.py
@@ -82,6 +82,7 @@ def test_scalar_array_binary_arith(ctx_factory, which, reverse):
     op = getattr(operator, which)
     if reverse:
         op_orig = op
+        
         def op(x, y):
             return op_orig(y, x)
 
-- 
GitLab


From 47e7c5df6628ec2f657de22db6559bb03a5fa228 Mon Sep 17 00:00:00 2001
From: Matt Wala <wala1@illinois.edu>
Date: Mon, 27 Apr 2020 23:59:21 -0500
Subject: [PATCH 18/46] Try removing pyopencl from requirements.txt

---
 .gitlab-ci.yml      | 5 ++---
 requirements.txt    | 1 -
 test/test_linalg.py | 2 +-
 3 files changed, 3 insertions(+), 5 deletions(-)

diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index d2ad9be..5f733be 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -2,8 +2,7 @@ Python 3 POCL:
   script:
   - export PY_EXE=python3
   - export PYOPENCL_TEST=portable
-  - export EXTRA_INSTALL="pybind11 numpy mako"
-  - export LOOPY_NO_CACHE=1
+  - export EXTRA_INSTALL="pyopencl"
   - curl -L -O -k https://gitlab.tiker.net/inducer/ci-support/raw/master/build-and-test-py-project.sh
   - ". ./build-and-test-py-project.sh"
   tags:
@@ -18,7 +17,7 @@ Python 3 POCL:
 Pylint:
   script:
   - export PY_EXE=python3
-  - EXTRA_INSTALL="pybind11 numpy mako"
+  - EXTRA_INSTALL="pyopencl"
   - curl -L -O -k https://gitlab.tiker.net/inducer/ci-support/raw/master/prepare-and-run-pylint.sh
   - ". ./prepare-and-run-pylint.sh arrayzy test/test_*.py"
   tags:
diff --git a/requirements.txt b/requirements.txt
index a9288c2..22617c7 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,2 +1 @@
 git+https://github.com/inducer/loopy.git
-git+https://github.com/inducer/pyopencl.git
diff --git a/test/test_linalg.py b/test/test_linalg.py
index 5393f59..e3949ce 100644
--- a/test/test_linalg.py
+++ b/test/test_linalg.py
@@ -82,7 +82,7 @@ def test_scalar_array_binary_arith(ctx_factory, which, reverse):
     op = getattr(operator, which)
     if reverse:
         op_orig = op
-        
+
         def op(x, y):
             return op_orig(y, x)
 
-- 
GitLab


From dc222823727cb4717e5db353bc5239f8cea5033f Mon Sep 17 00:00:00 2001
From: Matt Wala <wala1@illinois.edu>
Date: Tue, 28 Apr 2020 00:02:20 -0500
Subject: [PATCH 19/46] Placate pylint and flake8, possibly

---
 test/test_linalg.py | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/test/test_linalg.py b/test/test_linalg.py
index e3949ce..b1df677 100644
--- a/test/test_linalg.py
+++ b/test/test_linalg.py
@@ -73,6 +73,12 @@ def test_transpose(ctx_factory, dim):
     assert (eval_one(prog, x=x) == x.T).all()
 
 
+def reverse_args(f):
+    def wrapper(*args):
+        return f(*reversed(args))
+    return wrapper
+
+
 @pytest.mark.parametrize("which", ("add", "sub", "mul", "truediv"))
 @pytest.mark.parametrize("reverse", (False, True))
 def test_scalar_array_binary_arith(ctx_factory, which, reverse):
@@ -81,10 +87,7 @@ def test_scalar_array_binary_arith(ctx_factory, which, reverse):
 
     op = getattr(operator, which)
     if reverse:
-        op_orig = op
-
-        def op(x, y):
-            return op_orig(y, x)
+        op = reverse_args(op)
 
     with az.Context(queue) as ctx:
         x = ctx.argument("x", shape="n", dtype=np.float64)
-- 
GitLab


From 1fa8d487a830e6386dd0503d08730754e323c8bc Mon Sep 17 00:00:00 2001
From: Matt Wala <wala1@illinois.edu>
Date: Tue, 28 Apr 2020 00:11:43 -0500
Subject: [PATCH 20/46] Tweak advection demo

---
 experiments/advection.py | 33 ++++++++++++++++-----------------
 1 file changed, 16 insertions(+), 17 deletions(-)

diff --git a/experiments/advection.py b/experiments/advection.py
index e5e435f..0b74490 100644
--- a/experiments/advection.py
+++ b/experiments/advection.py
@@ -373,8 +373,8 @@ class DGOps1DRef(AbstractDGOps1D):
 def matrix_getter(name, shape):
     
     def getter(self):
-        mat = self.cb.argument(name, shape, np.float64)
-        self.cb.bind(mat, getattr(self.discr, name))
+        mat = self.ctx.argument(name, shape, np.float64)
+        self.ctx.bind(mat, getattr(self.discr, name))
         return mat
 
     return property(memoized(getter))
@@ -384,11 +384,11 @@ class DGOps1D(AbstractDGOps1D):
 
     @AbstractDGOps1D.array_ops.setter
     def array_ops(self):
-        return self.cb
+        return self.ctx
 
-    def __init__(self, discr, cb):
+    def __init__(self, discr, ctx):
         self.discr = discr
-        self.cb = cb
+        self.ctx = ctx
 
     _normals = matrix_getter("normals", "(nelements, 2)")
     _interp_mat = matrix_getter("interp", "(2, nnodes)")
@@ -401,24 +401,24 @@ class DGOps1D(AbstractDGOps1D):
         return self._normals
 
     def interp(self, vec):
-        return self.cb.matmul(self._interp_mat, vec.T, name="_interp").T
+        return self.ctx.matmul(self._interp_mat, vec.T, name="_interp").T
 
     def inv_mass(self, vec):
-        return self.cb.matmul(self._inv_mass_mat, vec.T, name="_inv_mass").T
+        return self.ctx.matmul(self._inv_mass_mat, vec.T, name="_inv_mass").T
 
     def stiffness(self, vec):
-        return self.cb.matmul(self._stiffness_mat, vec.T, name="_stiffness").T
+        return self.ctx.matmul(self._stiffness_mat, vec.T, name="_stiffness").T
 
     def face_mass(self, vec):
-        return self.cb.matmul(self._face_mass_mat, vec.T, name="_face_mass").T
+        return self.ctx.matmul(self._face_mass_mat, vec.T, name="_face_mass").T
 
     def face_swap(self, vec):
-        return self.cb.stack(
+        return self.ctx.stack(
                 (
-                    self.cb.roll(vec[:,1], +1),
-                    self.cb.roll(vec[:,0], -1)),
+                    self.ctx.roll(vec[:,1], +1),
+                    self.ctx.roll(vec[:,0], -1)),
                 axis=1,
-                name="face_swap")
+                name="_face_swap")
 
 
 class AdvectionOperator(object):
@@ -478,10 +478,9 @@ class AdvectionOperator(object):
         Signature: (m, n) -> (m, n)
         """
         dg = self.dg
-        pt1 = dg.face_mass(self.strong_flux(dg.interp(vec)))
-        print("pt1 shape", pt1.shape)
-        pt2 = self.c * dg.stiffness(vec)
-        return -dg.inv_mass(pt1 - pt2)
+        return -dg.inv_mass(
+                dg.face_mass(self.strong_flux(dg.interp(vec)))
+                - self.c * dg.stiffness(vec))
 
     def __call__(self, vec):
         """Apply the DG advection operator to the vector of degrees of freedom.
-- 
GitLab


From cd3a0a9830affe1c7783a7c8a3bc6da210169f1b Mon Sep 17 00:00:00 2001
From: Matt Wala <wala1@illinois.edu>
Date: Thu, 30 Apr 2020 13:40:41 -0500
Subject: [PATCH 21/46] Improve, document _get_or_create_parameter()

---
 arrayzy/array.py | 38 +++++++++++++++++++++++++-------------
 1 file changed, 25 insertions(+), 13 deletions(-)

diff --git a/arrayzy/array.py b/arrayzy/array.py
index fe1a5ab..bf5112f 100644
--- a/arrayzy/array.py
+++ b/arrayzy/array.py
@@ -315,25 +315,37 @@ class Context:
 
     def unify_types(self, *args):
         # Unifies the list of types.
-        # TODO: does not handle None
+        if None in args:
+            # FIXME: This isn't completely precise. Should None + complex return
+            # complex?
+            return None
         result = args[0]
         for arg in args[1:]:
             result = (np.empty(0, dtype=result) + np.empty(0, dtype=arg)).dtype
         return result
 
     def _get_or_create_parameter(self, name):
-        # Create a parameter variable for *name* if not present.
-        # Return the parameter as a pymbolic variable.
-
-        if name in self.program.all_variable_names():
-            if name not in self.program.all_params():
-                # FIXME: May have been added, but not added to a domain.
-                pass
-        else:
-            self.program = self.program.copy(
-                    args=self.program.args + [
-                        lp.ValueArg(name, dtype=self.program.index_dtype)
-                    ])
+        # Create a parameter variable for *name* if not present (suitable for
+        # use as a domain parameter).  Return the parameter as a pymbolic
+        # variable.
+        #
+        # We regard a variable as a parameter if it's an argument of type
+        # *self.program.index_dtype* or if it's an iname (the latter is for
+        # data-dependent control flow).
+        if not name in self._parameters:
+            # Resist the temptation to look through domain parameters, because a
+            # variable may be a parameter without a domain in the kernel yet.
+            if name not in self.program.all_inames():
+                # Add an argument.
+                if name in self.program.all_variable_names():
+                    raise ValueError("could not create parameter: "
+                           "name '%s' already in use" % name)
+                self.program = self.program.copy(
+                        args=self.program.args + [
+                            lp.ValueArg(name, dtype=self.program.index_dtype)
+                        ])
+                self.var_name_gen.add_name(name)
+            self._parameters.add(name)
 
         return prim.Variable(name)
 
-- 
GitLab


From 9023708bc5c1c936eec14b0e77a2965a3782d513 Mon Sep 17 00:00:00 2001
From: Matt Wala <wala1@illinois.edu>
Date: Thu, 30 Apr 2020 22:08:01 -0500
Subject: [PATCH 22/46] Add _parameters attribute

---
 arrayzy/array.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/arrayzy/array.py b/arrayzy/array.py
index bf5112f..bfe4642 100644
--- a/arrayzy/array.py
+++ b/arrayzy/array.py
@@ -290,6 +290,9 @@ class Context:
         self.var_name_gen = self.program.get_var_name_generator()
         self.insn_id_gen = self.program.get_instruction_id_generator()
         self._last_insn_id = None
+        # The set of names that can be used for domain in parameters. It should
+        # be updated through self._get_or_create_parameter().
+        self._parameters = set()
 
     def __enter__(self):
         return self
-- 
GitLab


From 2506ec1e30b7afce9eeeca2af3f0ed1aadab7f74 Mon Sep 17 00:00:00 2001
From: Matt Wala <wala1@illinois.edu>
Date: Thu, 30 Apr 2020 22:52:53 -0500
Subject: [PATCH 23/46] Various doc improvements

---
 arrayzy/array.py   | 90 ++++++++++++++++++++++++++++++++++++++--------
 arrayzy/program.py | 11 +++++-
 doc/program.rst    |  2 +-
 3 files changed, 86 insertions(+), 17 deletions(-)

diff --git a/arrayzy/array.py b/arrayzy/array.py
index bfe4642..bff1b12 100644
--- a/arrayzy/array.py
+++ b/arrayzy/array.py
@@ -35,17 +35,32 @@ from arrayzy.utils import domain_for_shape
 
 
 __doc__ = """
+
+Array objects
+-------------
+
+:class:`Array` is the basic abstraction that represents array
+computations. Instances of this class are created and managed within a
+:class:`Context`.
+
 .. autoclass:: Array
 .. autoclass:: ArrayExpression
 .. autoclass:: ArrayVariable
 
+Code building context
+---------------------
+
 .. autoclass:: Context
+
 """
 
 
 class Array:
     """A representation of an array computation in a :mod:`loopy` kernel.
 
+    This class is abstract. It has at least two concrete subclasses,
+    :class:`ArrayExpression` and :class:`ArrayVariable`.
+
     .. attribute:: ctx
 
         The :class:`Context` with which this array is associated, to define the
@@ -70,14 +85,23 @@ class Array:
 
         The number of dimensions in :attr:`shape`.
 
-    .. attribute:: T
+    Code generation support:
 
     .. attribute:: dims
+
+        A tuple of :mod:`pymbolic` variables representing the indices of the
+        array. These are named like *_0*, *_1*, ....
+
     .. attribute:: dim_names
 
+        Same as :data:`dims` but a tuple of strings.
+
     .. automethod:: get_domain
     .. automethod:: to_loopy_expression
 
+    Supported array operations:
+
+    .. autoattribute:: T
     .. automethod:: __getitem__
     .. automethod:: __mul__
     .. automethod:: __rmul__
@@ -154,6 +178,12 @@ class Array:
     __neg__ = partialmethod(_unary_op, operator.neg)
 
     def __getitem__(self, slice_spec):
+        """Extract a region from an array.
+
+        :arg slice_spec: The slice argument.
+
+        :returns: an :class:`Array`
+        """
         from numbers import Integral
 
         if len(slice_spec) != self.ndim:
@@ -199,7 +229,10 @@ class Array:
     def get_domain(self, dim_names=None):
         """Return the domain that defines the space of indices for this array.
 
-        :arg dim_names: if not *None*, the names of the dimensions of the array
+        :arg dim_names: if not *None*, a tuple of strings, the names of the
+            dimensions of the array.  Defaults to :data:`dim_names`.
+
+        :returns: a :class:`islpy.BasicSet`
         """
         if dim_names is None:
             dim_names = self.dim_names
@@ -207,12 +240,23 @@ class Array:
 
     @property
     def T(self):  # noqa
-        """Return the transpose of the array of the array."""
+        """Return the transpose of the array.
+
+        :returns: an :class:`Array`
+        """
         new_shape = tuple(reversed(self.shape))
         new_expr = self.to_loopy_expression(list(reversed(self.dims)))
         return ArrayExpression(self.ctx, new_shape, self.dtype, new_expr)
 
     def to_loopy_expression(self, dims):
+        """Create a :mod:`loopy` expression for the value at index *dims*.
+
+        :arg dims: A tuple of :mod:`pymbolic` expressions representing the
+           desired index/indices. For instance if *dims = (Variable('a'), 1)*,
+           this will return the expression for the element at index *a, 1*.
+
+        :returns: a :mod:`loopy` expression
+        """
         raise NotImplementedError
 
 
@@ -252,7 +296,7 @@ class Context:
 
     .. attribute:: program
 
-        A :class:`lp.LoopKernel` that defines names used in expressions in
+        A :class:`loopy.LoopKernel` that defines names used in expressions in
         :class:`Array` instances attached to this code. Names defined once will
         never change their meaning, for the lifetime of this object.
 
@@ -301,7 +345,7 @@ class Context:
         pass
 
     def build(self):
-        """Return a generated :class:`BoundProgram`.
+        """Return a generated :class:`arrayzy.program.BoundProgram`.
         """
         return BoundProgram(self.program, self.queue,
                             self.bound_arguments.copy(), self.target)
@@ -342,7 +386,7 @@ class Context:
                 # Add an argument.
                 if name in self.program.all_variable_names():
                     raise ValueError("could not create parameter: "
-                           "name '%s' already in use" % name)
+                           f"name '{name}' already in use")
                 self.program = self.program.copy(
                         args=self.program.args + [
                             lp.ValueArg(name, dtype=self.program.index_dtype)
@@ -407,10 +451,10 @@ class Context:
     def argument(self, name, shape, dtype, order="C"):
         """Append an argument to the program.
 
-        :returns: a :class:`ArrayVariable`
+        :returns: an :class:`ArrayVariable`
         """
         if name in self.program.all_variable_names():
-            raise ValueError(f"name '{name}' already in use in code")
+            raise ValueError(f"name '{name}' already in use")
 
         arg = lp.GlobalArg(name, shape=shape, dtype=dtype, order=order)
 
@@ -431,13 +475,17 @@ class Context:
         """Bind an argument to a value.
 
         :arg arg: a :class:`ArrayVariable` representing an argument
+        :arg val: a concrete array value appropriate for the code generation target
         """
         self.bound_arguments[arg.name] = val
 
     def roll(self, a, shift, name=None):
         """Roll elements along the given axis.
 
-        :returns: a :class:`Array`
+        :arg a: a one-dimensional :class:`Array`
+        :arg shift: a scalar shift amount
+
+        :returns: an :class:`Array`
         """
         assert a.ndim == 1
         idx = (prim.Variable("_0") - shift) % a.shape[0]
@@ -451,9 +499,12 @@ class Context:
     def stack(self, arrays, axis=0, name=None):
         """Join a sequence of arrays along a new axis.
 
-        :arg axis: the index of the axis
+        :arg array: a finite sequence, each of whose elements is an
+            :class:`Array`
+        :arg axis: the position of the new axis, which will have length
+            *len(arrays)*
 
-        :returns: a :class:`Array`
+        :returns: an :class:`Array`
         """
         for array in arrays[1:]:
             if array.ndim != arrays[0].ndim:
@@ -515,6 +566,11 @@ class Context:
 
     def matmul(self, a, b, name=None):
         """Multiply matrix *a* by *b*.
+
+        :arg a: an :class:`Array` of shape *(m, k)*
+        :arg b: an :class:`Array` of shape *(k, n)*
+
+        :returns: an :class:`Array` of shape *(m, n)*
         """
         # Generate a temporay for the matrix-matrix multiplication.
         self._assume_equal(a.shape[1], b.shape[0])
@@ -564,12 +620,16 @@ class Context:
 
         return ArrayVariable(self, out_shape, out_dtype, name)
 
-    def output(self, expr):
-        """Copy *expr* to an output variable.
+    def output(self, expr, name=None):
+        """Copy *expr* to a (new) output variable.
 
-        :returns: a :class:`Array`
+        :arg expr: an :class:`Array`
+
+        :returns: an :class:`ArrayVariable`
         """
-        name = self.var_name_gen("_out")
+        if name is None:
+            name = "_out"
+        name = self.var_name_gen(name)
         out = lp.GlobalArg(name, shape=expr.shape, dtype=expr.dtype, order="C")
         out_inames = [
                 self.var_name_gen(f"{name}_dim{d}") for d in range(expr.ndim)]
diff --git a/arrayzy/program.py b/arrayzy/program.py
index 0378ca4..3d46e35 100644
--- a/arrayzy/program.py
+++ b/arrayzy/program.py
@@ -24,11 +24,20 @@ THE SOFTWARE.
 from pytools import RecordWithoutPickling
 
 
+__doc__ = """
+.. autoclass:: Target
+.. autoclass:: PyOpenCLTarget
+.. autoclass:: BoundProgram
+"""
+
+
 class Target:
+    """An abstract code generation target."""
     pass
 
 
 class PyOpenCLTarget:
+    """A :mod:`pyopencl` code generation target."""
 
     def __init__(self, queue):
         self.queue = queue
@@ -43,7 +52,7 @@ class BoundProgram(RecordWithoutPickling):
 
     .. attribute:: program
 
-        The underlying :class:`lp.LoopKernel`.
+        The underlying :class:`loopy.LoopKernel`.
 
     .. attribute:: queue
 
diff --git a/doc/program.rst b/doc/program.rst
index 7c96f9f..b0f4ff2 100644
--- a/doc/program.rst
+++ b/doc/program.rst
@@ -1,4 +1,4 @@
 Generated programs
 ==================
 
-.. autoclass:: arrayzy.program.BoundProgram
+.. automodule:: arrayzy.program
-- 
GitLab


From afbf4059f57b8ff825888a55b63e423adcd39683 Mon Sep 17 00:00:00 2001
From: Matt Wala <wala1@illinois.edu>
Date: Thu, 30 Apr 2020 22:56:12 -0500
Subject: [PATCH 24/46] flake8 fixes

---
 arrayzy/array.py | 19 +++++++++++--------
 1 file changed, 11 insertions(+), 8 deletions(-)

diff --git a/arrayzy/array.py b/arrayzy/array.py
index bff1b12..6c7de16 100644
--- a/arrayzy/array.py
+++ b/arrayzy/array.py
@@ -363,8 +363,8 @@ class Context:
     def unify_types(self, *args):
         # Unifies the list of types.
         if None in args:
-            # FIXME: This isn't completely precise. Should None + complex return
-            # complex?
+            # FIXME: This isn't completely precise. Should None + complex
+            # return complex?
             return None
         result = args[0]
         for arg in args[1:]:
@@ -379,14 +379,15 @@ class Context:
         # We regard a variable as a parameter if it's an argument of type
         # *self.program.index_dtype* or if it's an iname (the latter is for
         # data-dependent control flow).
-        if not name in self._parameters:
-            # Resist the temptation to look through domain parameters, because a
-            # variable may be a parameter without a domain in the kernel yet.
+        if name not in self._parameters:
+            # Resist the temptation to look through domain parameters, because
+            # a variable may be a parameter without a domain in the kernel yet.
             if name not in self.program.all_inames():
                 # Add an argument.
                 if name in self.program.all_variable_names():
-                    raise ValueError("could not create parameter: "
-                           f"name '{name}' already in use")
+                    raise ValueError(
+                            "could not create parameter: "
+                            f"name '{name}' already in use")
                 self.program = self.program.copy(
                         args=self.program.args + [
                             lp.ValueArg(name, dtype=self.program.index_dtype)
@@ -475,7 +476,9 @@ class Context:
         """Bind an argument to a value.
 
         :arg arg: a :class:`ArrayVariable` representing an argument
-        :arg val: a concrete array value appropriate for the code generation target
+
+        :arg val: a concrete array value appropriate for the code generation
+            target
         """
         self.bound_arguments[arg.name] = val
 
-- 
GitLab


From 75b805006cc8249590559f710c29df7279f89c43 Mon Sep 17 00:00:00 2001
From: Matt Wala <wala1@illinois.edu>
Date: Thu, 30 Apr 2020 23:00:11 -0500
Subject: [PATCH 25/46] Try running without cache

---
 .gitlab-ci.yml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index 5f733be..e34f3bc 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -3,6 +3,7 @@ Python 3 POCL:
   - export PY_EXE=python3
   - export PYOPENCL_TEST=portable
   - export EXTRA_INSTALL="pyopencl"
+  - export LOOPY_NO_CACHE=1
   - curl -L -O -k https://gitlab.tiker.net/inducer/ci-support/raw/master/build-and-test-py-project.sh
   - ". ./build-and-test-py-project.sh"
   tags:
-- 
GitLab


From 36b45848f994d4e694eff30335a29a7941e41bd6 Mon Sep 17 00:00:00 2001
From: Matt Wala <wala1@illinois.edu>
Date: Thu, 30 Apr 2020 23:01:31 -0500
Subject: [PATCH 26/46] Revert "Try running without cache"

This reverts commit 75b805006cc8249590559f710c29df7279f89c43.
---
 .gitlab-ci.yml | 1 -
 1 file changed, 1 deletion(-)

diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index e34f3bc..5f733be 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -3,7 +3,6 @@ Python 3 POCL:
   - export PY_EXE=python3
   - export PYOPENCL_TEST=portable
   - export EXTRA_INSTALL="pyopencl"
-  - export LOOPY_NO_CACHE=1
   - curl -L -O -k https://gitlab.tiker.net/inducer/ci-support/raw/master/build-and-test-py-project.sh
   - ". ./build-and-test-py-project.sh"
   tags:
-- 
GitLab


From 439553be3e2324a9fc10da1859af2b4f8a0196b0 Mon Sep 17 00:00:00 2001
From: Matt Wala <wala1@illinois.edu>
Date: Thu, 30 Apr 2020 23:09:38 -0500
Subject: [PATCH 27/46] Experiment: comment out everything except the simplest
 test

---
 test/test_linalg.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/test/test_linalg.py b/test/test_linalg.py
index b1df677..ac9f7a8 100644
--- a/test/test_linalg.py
+++ b/test/test_linalg.py
@@ -57,6 +57,7 @@ def test_symbolic_array(ctx_factory):
     assert (eval_one(prog, x=x) == x).all()
 
 
+"""
 @pytest.mark.parametrize("dim", (1, 2, 3))
 def test_transpose(ctx_factory, dim):
     cl_ctx = ctx_factory()
@@ -203,6 +204,7 @@ def test_slice_along_dim(ctx_factory, input_dims):
         x = np.arange(2 ** input_dims, dtype=np.float64).reshape(shape)
         prog = ctx.build()
         assert (eval_one(prog, x=x) == x[slice_spec]).all()
+"""
 
 
 if __name__ == "__main__":
-- 
GitLab


From 1cf84331354ca63e9971a3e8a81ea6316b1914f5 Mon Sep 17 00:00:00 2001
From: Matt Wala <wala1@illinois.edu>
Date: Thu, 30 Apr 2020 23:10:32 -0500
Subject: [PATCH 28/46] Revert "Experiment: comment out everything except the
 simplest test"

This reverts commit 439553be3e2324a9fc10da1859af2b4f8a0196b0.
---
 test/test_linalg.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/test/test_linalg.py b/test/test_linalg.py
index ac9f7a8..b1df677 100644
--- a/test/test_linalg.py
+++ b/test/test_linalg.py
@@ -57,7 +57,6 @@ def test_symbolic_array(ctx_factory):
     assert (eval_one(prog, x=x) == x).all()
 
 
-"""
 @pytest.mark.parametrize("dim", (1, 2, 3))
 def test_transpose(ctx_factory, dim):
     cl_ctx = ctx_factory()
@@ -204,7 +203,6 @@ def test_slice_along_dim(ctx_factory, input_dims):
         x = np.arange(2 ** input_dims, dtype=np.float64).reshape(shape)
         prog = ctx.build()
         assert (eval_one(prog, x=x) == x[slice_spec]).all()
-"""
 
 
 if __name__ == "__main__":
-- 
GitLab


From 9d4727099b99b60b8359c31df522f853a7c063e4 Mon Sep 17 00:00:00 2001
From: Matt Wala <wala1@illinois.edu>
Date: Fri, 1 May 2020 00:17:50 -0500
Subject: [PATCH 29/46] Add convergence test for arrayzy generated code

---
 experiments/advection.py | 32 +++++++++++++++++++++++++++++++-
 1 file changed, 31 insertions(+), 1 deletion(-)

diff --git a/experiments/advection.py b/experiments/advection.py
index 0b74490..8f96229 100644
--- a/experiments/advection.py
+++ b/experiments/advection.py
@@ -382,7 +382,7 @@ def matrix_getter(name, shape):
 
 class DGOps1D(AbstractDGOps1D):
 
-    @AbstractDGOps1D.array_ops.setter
+    @AbstractDGOps1D.array_ops.getter
     def array_ops(self):
         return self.ctx
 
@@ -543,6 +543,36 @@ def test_ref_advection_convergence(order, flux_type):
     assert eoc >= order - 0.1, eoc
 
 
+@pytest.mark.parametrize("order", (3, 4, 5))
+@pytest.mark.parametrize("flux_type", ("central", "upwind"))
+def test_advection_convergence(order, flux_type):
+    errors = []
+    hs = []
+
+    import arrayzy as az
+    import pyopencl as cl
+    cl_ctx = cl.create_some_context()
+    queue = cl.CommandQueue(cl_ctx)
+
+    for nelements in (8, 12, 16, 20):
+        discr = DGDiscr1D(0, 2*np.pi, nelements=nelements, nnodes=order)
+        u_initial = np.sin(discr.nodes())
+        with az.Context(queue) as cb:
+            u = cb.argument("u", shape="(nelements, nnodes)", dtype=np.float64)
+            op = AdvectionOperator(
+                    discr, c=1, flux_type=flux_type, dg_ops=DGOps1D(discr, cb))
+            cb.output(op.apply(u))
+        prog = cb.build()
+        u = rk4(lambda t, y: prog(u=y.reshape(nelements, order))[1][0].reshape(-1),
+                u_initial, t_initial=0, t_final=np.pi, dt=0.01)
+        u_ref = -u_initial
+        hs.append(discr.h)
+        errors.append(integrate(discr, (u - u_ref)**2)**0.5)
+
+    eoc, _ = np.polyfit(np.log(hs), np.log(errors), 1)
+    assert eoc >= order - 0.1, eoc
+
+
 def main():
     import arrayzy as az
     import pyopencl as cl
-- 
GitLab


From 54bc4e859e099886042553d619b5b05d23d99c93 Mon Sep 17 00:00:00 2001
From: Andreas Kloeckner <inform@tiker.net>
Date: Fri, 1 May 2020 00:57:09 -0500
Subject: [PATCH 30/46] _Split DG demo bits off into a separate file to make
 them importable from a demo notebook

---
 .gitignore                                    |   1 +
 .../Computing and DG with Lazy Arrays.ipynb   | 204 +++++++++
 experiments/advection.py                      | 432 +-----------------
 experiments/dg_tools.py                       | 420 +++++++++++++++++
 4 files changed, 634 insertions(+), 423 deletions(-)
 create mode 100644 experiments/Computing and DG with Lazy Arrays.ipynb
 create mode 100644 experiments/dg_tools.py

diff --git a/.gitignore b/.gitignore
index c11edd6..7a7df09 100644
--- a/.gitignore
+++ b/.gitignore
@@ -13,3 +13,4 @@ distribute*egg
 distribute*tar.gz
 
 .cache
+.ipynb_checkpoints
diff --git a/experiments/Computing and DG with Lazy Arrays.ipynb b/experiments/Computing and DG with Lazy Arrays.ipynb
new file mode 100644
index 0000000..9f8fcef
--- /dev/null
+++ b/experiments/Computing and DG with Lazy Arrays.ipynb	
@@ -0,0 +1,204 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Computing and DG with Lazy Arrays"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 22,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import numpy as np\n",
+    "import arrayzy as az\n",
+    "import pyopencl as cl\n",
+    "import loopy as lp"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Choose platform:\n",
+      "[0] <pyopencl.Platform 'Intel(R) OpenCL' at 0x2df3398>\n",
+      "[1] <pyopencl.Platform 'Portable Computing Language' at 0x7fca01de0740>\n"
+     ]
+    },
+    {
+     "name": "stdin",
+     "output_type": "stream",
+     "text": [
+      "Choice [0]: 1\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Set the environment variable PYOPENCL_CTX='1' to avoid being asked again.\n"
+     ]
+    }
+   ],
+   "source": [
+    "cl_ctx = cl.create_some_context(interactive=True)\n",
+    "queue = cl.CommandQueue(cl_ctx)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## The Basic Mechanics"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "with az.Context(queue) as ctx:\n",
+    "    A = ctx.argument(\"A\", shape=\"n, n\", dtype=np.float64)\n",
+    "    ctx.output(2*A + 5)\n",
+    "\n",
+    "prog = ctx.build()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "---------------------------------------------------------------------------\n",
+      "KERNEL: loopy_kernel\n",
+      "---------------------------------------------------------------------------\n",
+      "ARGUMENTS:\n",
+      "A: type: np:dtype('float64'), shape: (n, n), dim_tags: (N1:stride:n, N0:stride:1) aspace: global\n",
+      "_out: type: <auto/runtime>, shape: (n, n), dim_tags: (N1:stride:n, N0:stride:1) aspace: global\n",
+      "n: ValueArg, type: np:dtype('int32')\n",
+      "---------------------------------------------------------------------------\n",
+      "DOMAINS:\n",
+      "{  :  }\n",
+      "[n] -> { [_out_dim0, _out_dim1] : 0 <= _out_dim0 < n and 0 <= _out_dim1 < n }\n",
+      "---------------------------------------------------------------------------\n",
+      "INAME IMPLEMENTATION TAGS:\n",
+      "_out_dim0: None\n",
+      "_out_dim1: None\n",
+      "---------------------------------------------------------------------------\n",
+      "INSTRUCTIONS:\n",
+      "for _out_dim1, _out_dim0\n",
+      "    \u001b[36m_out[_out_dim0, _out_dim1]\u001b[0m = \u001b[35m2*A[_out_dim0, _out_dim1] + 5\u001b[0m  {id=\u001b[32m_store_out\u001b[0m}\n",
+      "end _out_dim1, _out_dim0\n",
+      "---------------------------------------------------------------------------\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(prog.program)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 24,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "#define lid(N) ((int) get_local_id(N))\n",
+      "#define gid(N) ((int) get_group_id(N))\n",
+      "#if __OPENCL_C_VERSION__ < 120\n",
+      "#pragma OPENCL EXTENSION cl_khr_fp64: enable\n",
+      "#endif\n",
+      "\n",
+      "__kernel void __attribute__ ((reqd_work_group_size(1, 1, 1))) loopy_kernel(int const n, __global double const *__restrict__ A, __global double *__restrict__ _out)\n",
+      "{\n",
+      "  for (int _out_dim1 = 0; _out_dim1 <= -1 + n; ++_out_dim1)\n",
+      "    for (int _out_dim0 = 0; _out_dim0 <= -1 + n; ++_out_dim0)\n",
+      "      _out[n * _out_dim0 + _out_dim1] = 2.0 * A[n * _out_dim0 + _out_dim1] + 5.0;\n",
+      "}\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(lp.generate_code_v2(prog.program).device_code())"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 25,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "A = np.random.randn(5, 5)\n",
+    "evt, (result,) = prog(A=A)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 26,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "array([[0., 0., 0., 0., 0.],\n",
+       "       [0., 0., 0., 0., 0.],\n",
+       "       [0., 0., 0., 0., 0.],\n",
+       "       [0., 0., 0., 0., 0.],\n",
+       "       [0., 0., 0., 0., 0.]])"
+      ]
+     },
+     "execution_count": 26,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "result - (2*A+5)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.7.7"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
diff --git a/experiments/advection.py b/experiments/advection.py
index 8f96229..de6c7ca 100644
--- a/experiments/advection.py
+++ b/experiments/advection.py
@@ -1,426 +1,11 @@
-import contextlib
 import numpy as np
-import numpy.linalg as la
-import numpy.polynomial.legendre as leg
 import pytest
-
-
-__doc__ = """
-Notation convention for operator shapes
-=======================================
-
-* m - number of elements in the discretization
-* n - number of volume degrees of freedom per element
-"""
-
 import functools
-
+from dg_tools import DGDiscr1D, integrate, DGOps1D, DGOps1DRef
 
 memoized = functools.lru_cache(maxsize=None)
 
 
-def ortholegvander(x, deg):
-    """See numpy.polynomial.legendre.legvander(). Uses an orthonormal basis."""
-    result = leg.legvander(x, deg)
-    factors = np.array([np.sqrt((2*n+1)/2) for n in range(0, 1 + deg)])
-    return result * factors
-
-
-def ortholegder(c):
-    """See numpy.polynomial.legendre.legder(). Uses an orthonormal basis."""
-    fw_factors = np.array([np.sqrt((2*n+1)/2) for n in range(len(c))])
-    derivs = leg.legder(c * fw_factors)
-    return derivs / fw_factors[:len(derivs)]
-
-
-def ortholegval(x, c):
-    """See numpy.polynomial.legendre.legval(). Uses an orthonormal basis."""
-    factors = np.array([np.sqrt((2*n+1)/2) for n in range(len(c))])
-    return leg.legval(x, c * factors)
-
-
-class DGDiscr1D(object):
-    """A one-dimensional Discontinuous Galerkin discretization."""
-
-    def __init__(self, left, right, nelements, nnodes):
-        """
-        Inputs:
-            left - left endpoint
-            right - right endpoint
-            nelements - number of discretization panels
-            nnodes - number of degrees of freedom per panel
-        """
-        self.left = left
-        self.right = right
-        self.nelements = nelements
-        self.nnodes = nnodes
-
-    @property
-    @memoized
-    def ref_nodes(self):
-        """Return reference nodes for a single element.
-
-        Signature: ->(n,)
-        """
-        nodes, _ = leg.leggauss(self.nnodes)
-        return nodes
-
-    @property
-    @memoized
-    def ref_weights(self):
-        """Return reference quadrature weights for a single element.
-
-        Signature: ->(n,)
-        """
-        _, weights = leg.leggauss(self.nnodes)
-        return weights
-
-    def zeros(self):
-        """Return a zero solution.
-
-        Signature: ->(n*m,)
-        """
-        return np.zeros(self.nnodes * self.nelements)
-
-    @property
-    def h(self):
-        """Return the element size.
-
-        Signature: ->()
-        """
-        return self.elements[0,1] - self.elements[0,0]
-
-    def nodes(self):
-        """Return the vector of node coordinates.
-
-        Signature: ->(n*m,)
-        """
-        centers = (self.elements[:,0] + self.elements[:,1]) / 2
-        radii = (self.elements[:,1] - self.elements[:,0]) / 2
-        return ((self.ref_nodes[:,np.newaxis] * radii) + centers).T.ravel()
-
-    @property
-    @memoized
-    def vdm(self):
-        """Return the elementwise Vandermonde (modal-to-nodal) matrix.
-
-        Signature: ->(n, n)
-        """
-        return ortholegvander(self.ref_nodes, self.nnodes - 1)
-
-    @property
-    @memoized
-    def _ref_mass(self):
-        """Return the (volume) mass matrix for the reference element.
-
-        Signature: ->(n, n)
-        """
-        return la.inv(self.vdm @ self.vdm.T)
-
-    @property
-    @memoized
-    def mass(self):
-        """Return the elementwise volume mass matrix.
-
-        Signature: ->(n, n)
-        """
-        h = (self.right - self.left) / self.nelements
-        return (h/2) * self._ref_mass
-
-    @property
-    @memoized
-    def inv_mass(self):
-        """Return the inverse of the elementwise volume mass matrix.
-
-        Signature: ->(n, n)
-        """
-        return la.inv(self.mass)
-
-    @property
-    @memoized
-    def face_mass(self):
-        """Return the face mass matrix.
-
-        The face mass matrix combines the effects of applying the face mass
-        operator on each face and interpolating the output to the volume nodes.
-
-        Signature: ->(n, 2)
-        """
-        return self.interp.T.copy()
-
-    @property
-    @memoized
-    def diff(self):
-        """Return the elementwise differentiation matrix.
-
-        Signature: ->(n, n)
-        """
-        VrT = []
-        for row in np.eye(self.nnodes):
-            deriv = ortholegder(row)
-            VrT.append(ortholegval(self.ref_nodes, deriv))
-        Vr = np.vstack(VrT).T
-        return Vr @ la.inv(self.vdm)
-
-    @property
-    @memoized
-    def stiffness(self):
-        """Return the stiffness matrix.
-
-        Signature: ->(n, n)
-        """
-        return (self._ref_mass @ self.diff)
-
-    @property
-    @memoized
-    def interp(self):
-        """Return the volume-to-face interpolation matrix.
-
-        Signature: ->(2, n)
-        """
-        return ortholegvander([-1, 1], self.nnodes - 1) @ la.inv(self.vdm)
-
-    @property
-    @memoized
-    def elements(self):
-        """Return the list of elements, each given by their left/right boundaries.
-
-        Signature: ->(m, 2)
-        """
-        h = (self.right - self.left) / self.nelements
-        return np.array(list(zip(
-            np.linspace(self.left, self.right, self.nelements, endpoint=False),
-            np.linspace(h + self.left, self.right, self.nelements))))
-
-    @property
-    def dg_ops(self):
-        """Return a context manager yielding a DGOps1D instance.
-        """
-        return contextlib.contextmanager(lambda: (yield DGOps1DRef(self)))
-
-    @property
-    def normals(self):
-        """Return the face unit normals.
-
-        Signature: ->(m, 2)
-        """
-        result = np.zeros((self.nelements, 2))
-        result[:,0] = -1
-        result[:,1] = 1
-        return result
-
-
-def interpolate(discr, vec, nodes):
-    """Return an interpolated solution at *nodes*.
-
-    Input:
-        discr - a DGDiscr1D instance
-        vec - vector of nodal values at degrees of freedom
-        nodes - vector of nodes to interpolate to
-
-    Signature:  (m*n,) -> (len(nodes),)
-    """
-    elements = discr.elements
-    nelements = discr.nelements
-    nnodes = discr.nnodes
-    inv_vdm = la.inv(discr.vdm)
-
-    sorter = np.argsort(nodes)
-    sorted_nodes = nodes[sorter]
-    result = []
-
-    indices = np.searchsorted(sorted_nodes, elements)
-    for i, (start, end) in enumerate(indices):
-        if i == 0:
-            start = 0
-        elif i == nelements - 1:
-            end = len(nodes)
-
-        center = (elements[i][0] + elements[i][1]) / 2
-        radius = (elements[i][1] - elements[i][0]) / 2
-        element_nodes = sorted_nodes[start:end]
-        mapped_nodes = (element_nodes - center) / radius
-
-        modal_vals = inv_vdm @ vec[i * nnodes:(i + 1) * nnodes]
-        nodal_vals = ortholegvander(mapped_nodes, nnodes - 1) @ modal_vals
-        result.append(nodal_vals)
-
-    result = np.hstack(result)
-    unsorter = np.arange(len(nodes))[sorter]
-    return result[unsorter]
-
-
-def integrate(discr, soln):
-    """Return the integral of the solution.
-
-    Signature: (n*m,) -> ()
-    """
-    soln = soln.reshape((discr.nelements, discr.nnodes))
-    h = discr.elements[0][1] - discr.elements[0][0]
-    weights = discr.ref_weights * h / 2
-    return np.sum(soln * weights)
-
-
-def elementwise(mat, vec):
-    """Apply a matrix to rows of the input representing per-element
-    degrees of freedom.
-
-    Inputs:
-        mat: Shape (a, b)
-        vec: Shape (c, b)
-
-    Signature: (a, b), (c, b) -> (c, a)
-    """
-    return np.einsum("ij,kj->ki", mat, vec)
-
-
-class AbstractDGOps1D(object):
-
-    def __init__(self, discr):
-        self.discr = discr
-
-    @property
-    def array_ops(self):
-        raise NotImplementedError
-
-    @property
-    def normals(self):
-        """Return the vector of normals at the faces.
-
-        Signature: ->(m, 2)
-        """
-        raise NotImplementedError
-
-    def interp(self, vec):
-        """Apply elementwise volume-to-face interpolation.
-
-        Signature: (m, n) -> (m, 2)
-        """
-        raise NotImplementedError
-
-    def inv_mass(self, vec):
-        """Apply the elementwise inverse mass matrix.
-
-        Signature: (m, n) -> (m, n)
-        """
-        raise NotImplementedError
-
-    def stiffness(self, vec):
-        """Apply the elementwise stiffness matrix.
-
-        Signature: (m, n) -> (m, n)
-        """
-        raise NotImplementedError
-
-    def face_mass(self, vec):
-        """Apply the elementwise face mass matrix.
-
-        Signature: (m, 2) -> (m, n)
-        """
-        raise NotImplementedError
-
-    def face_swap(self, vec):
-        """Swap values at opposite faces.
-
-        Signature: (m, 2) -> (m, 2)
-        """
-        raise NotImplementedError
-
-
-def elementwise(mat, vec):
-    """Apply a matrix to rows of the input representing per-element
-    degrees of freedom.
-
-    Inputs:
-        mat: Shape (a, b)
-        vec: Shape (c, b)
-
-    Signature: (a, b), (c, b) -> (c, a)
-    """
-    return np.einsum("ij,kj->ki", mat, vec)
-
-
-class DGOps1DRef(AbstractDGOps1D):
-    """A reference NumPy implementation of the AbstractDGOps1D interface."""
-
-    @AbstractDGOps1D.array_ops.getter
-    def array_ops(self):
-        return np
-
-    @AbstractDGOps1D.normals.getter
-    def normals(self):
-        return self.discr.normals
-
-    def interp(self, vec):
-        return elementwise(self.discr.interp, vec)
-
-    def inv_mass(self, vec):
-        return elementwise(self.discr.inv_mass, vec)
-
-    def stiffness(self, vec):
-        return elementwise(self.discr.stiffness, vec)
-
-    def face_mass(self, vec):
-        return elementwise(self.discr.face_mass, vec)
-
-    def face_swap(self, vec):
-        result = np.zeros_like(vec)
-        result[:,0] = np.roll(vec[:,1], +1)
-        result[:,1] = np.roll(vec[:,0], -1)
-        return result
-
-
-def matrix_getter(name, shape):
-    
-    def getter(self):
-        mat = self.ctx.argument(name, shape, np.float64)
-        self.ctx.bind(mat, getattr(self.discr, name))
-        return mat
-
-    return property(memoized(getter))
-
-
-class DGOps1D(AbstractDGOps1D):
-
-    @AbstractDGOps1D.array_ops.getter
-    def array_ops(self):
-        return self.ctx
-
-    def __init__(self, discr, ctx):
-        self.discr = discr
-        self.ctx = ctx
-
-    _normals = matrix_getter("normals", "(nelements, 2)")
-    _interp_mat = matrix_getter("interp", "(2, nnodes)")
-    _inv_mass_mat = matrix_getter("inv_mass", "(nnodes, nnodes)")
-    _stiffness_mat = matrix_getter("stiffness", "(nnodes, nnodes)")
-    _face_mass_mat = matrix_getter("face_mass", "(nnodes, 2)")
-
-    @AbstractDGOps1D.normals.getter
-    def normals(self):
-        return self._normals
-
-    def interp(self, vec):
-        return self.ctx.matmul(self._interp_mat, vec.T, name="_interp").T
-
-    def inv_mass(self, vec):
-        return self.ctx.matmul(self._inv_mass_mat, vec.T, name="_inv_mass").T
-
-    def stiffness(self, vec):
-        return self.ctx.matmul(self._stiffness_mat, vec.T, name="_stiffness").T
-
-    def face_mass(self, vec):
-        return self.ctx.matmul(self._face_mass_mat, vec.T, name="_face_mass").T
-
-    def face_swap(self, vec):
-        return self.ctx.stack(
-                (
-                    self.ctx.roll(vec[:,1], +1),
-                    self.ctx.roll(vec[:,0], -1)),
-                axis=1,
-                name="_face_swap")
-
-
 class AdvectionOperator(object):
     """A class representing a DG advection operator."""
 
@@ -453,9 +38,9 @@ class AdvectionOperator(object):
         elif self.flux_type == "upwind":
             swp = self.dg.face_swap(vec)
             if self.c >= 0:
-                flux = self.dg.array_ops.stack((vec[:,0], swp[:,1]), axis=1)
+                flux = self.dg.array_ops.stack((vec[:, 0], swp[:, 1]), axis=1)
             else:
-                flux = self.dg.array_ops.stack((swp[:,0], vec[:,1]), axis=1)
+                flux = self.dg.array_ops.stack((swp[:, 0], vec[:, 1]), axis=1)
 
         flux = flux * self.c * self.dg.normals
 
@@ -534,7 +119,8 @@ def test_ref_advection_convergence(order, flux_type):
         u_initial = np.sin(discr.nodes())
         op = AdvectionOperator(
                 discr, c=1, flux_type=flux_type, dg_ops=DGOps1DRef(discr))
-        u = rk4(lambda t, y: op(y), u_initial, t_initial=0, t_final=np.pi, dt=0.01)
+        u = rk4(lambda t, y: op(y), u_initial, t_initial=0, t_final=np.pi,
+                dt=0.01)
         u_ref = -u_initial
         hs.append(discr.h)
         errors.append(integrate(discr, (u - u_ref)**2)**0.5)
@@ -563,8 +149,9 @@ def test_advection_convergence(order, flux_type):
                     discr, c=1, flux_type=flux_type, dg_ops=DGOps1D(discr, cb))
             cb.output(op.apply(u))
         prog = cb.build()
-        u = rk4(lambda t, y: prog(u=y.reshape(nelements, order))[1][0].reshape(-1),
-                u_initial, t_initial=0, t_final=np.pi, dt=0.01)
+        u = rk4(lambda t, y: prog(
+            u=y.reshape(nelements, order))[1][0].reshape(-1),
+            u_initial, t_initial=0, t_final=np.pi, dt=0.01)
         u_ref = -u_initial
         hs.append(discr.h)
         errors.append(integrate(discr, (u - u_ref)**2)**0.5)
@@ -583,7 +170,7 @@ def main():
     nnodes = 3
 
     discr = DGDiscr1D(0, 2*np.pi, nelements=nelements, nnodes=nnodes)
-    
+
     with az.Context(queue) as cb:
         u = cb.argument("u", shape="(nelements, nnodes)", dtype=np.float64)
         op = AdvectionOperator(
@@ -598,4 +185,3 @@ def main():
 
 if __name__ == "__main__":
     main()
-
diff --git a/experiments/dg_tools.py b/experiments/dg_tools.py
new file mode 100644
index 0000000..3a3fca3
--- /dev/null
+++ b/experiments/dg_tools.py
@@ -0,0 +1,420 @@
+import numpy as np
+import numpy.polynomial.legendre as leg
+import numpy.linalg as la
+import contextlib
+
+__doc__ = """
+Notation convention for operator shapes
+=======================================
+
+* m - number of elements in the discretization
+* n - number of volume degrees of freedom per element
+"""
+
+import functools
+
+
+memoized = functools.lru_cache(maxsize=None)
+
+
+def ortholegvander(x, deg):
+    """See numpy.polynomial.legendre.legvander(). Uses an orthonormal basis."""
+    result = leg.legvander(x, deg)
+    factors = np.array([np.sqrt((2*n+1)/2) for n in range(0, 1 + deg)])
+    return result * factors
+
+
+def ortholegder(c):
+    """See numpy.polynomial.legendre.legder(). Uses an orthonormal basis."""
+    fw_factors = np.array([np.sqrt((2*n+1)/2) for n in range(len(c))])
+    derivs = leg.legder(c * fw_factors)
+    return derivs / fw_factors[:len(derivs)]
+
+
+def ortholegval(x, c):
+    """See numpy.polynomial.legendre.legval(). Uses an orthonormal basis."""
+    factors = np.array([np.sqrt((2*n+1)/2) for n in range(len(c))])
+    return leg.legval(x, c * factors)
+
+
+class DGDiscr1D(object):
+    """A one-dimensional Discontinuous Galerkin discretization."""
+
+    def __init__(self, left, right, nelements, nnodes):
+        """
+        Inputs:
+            left - left endpoint
+            right - right endpoint
+            nelements - number of discretization panels
+            nnodes - number of degrees of freedom per panel
+        """
+        self.left = left
+        self.right = right
+        self.nelements = nelements
+        self.nnodes = nnodes
+
+    @property
+    @memoized
+    def ref_nodes(self):
+        """Return reference nodes for a single element.
+
+        Signature: ->(n,)
+        """
+        nodes, _ = leg.leggauss(self.nnodes)
+        return nodes
+
+    @property
+    @memoized
+    def ref_weights(self):
+        """Return reference quadrature weights for a single element.
+
+        Signature: ->(n,)
+        """
+        _, weights = leg.leggauss(self.nnodes)
+        return weights
+
+    def zeros(self):
+        """Return a zero solution.
+
+        Signature: ->(n*m,)
+        """
+        return np.zeros(self.nnodes * self.nelements)
+
+    @property
+    def h(self):
+        """Return the element size.
+
+        Signature: ->()
+        """
+        return self.elements[0,1] - self.elements[0,0]
+
+    def nodes(self):
+        """Return the vector of node coordinates.
+
+        Signature: ->(n*m,)
+        """
+        centers = (self.elements[:,0] + self.elements[:,1]) / 2
+        radii = (self.elements[:,1] - self.elements[:,0]) / 2
+        return ((self.ref_nodes[:,np.newaxis] * radii) + centers).T.ravel()
+
+    @property
+    @memoized
+    def vdm(self):
+        """Return the elementwise Vandermonde (modal-to-nodal) matrix.
+
+        Signature: ->(n, n)
+        """
+        return ortholegvander(self.ref_nodes, self.nnodes - 1)
+
+    @property
+    @memoized
+    def _ref_mass(self):
+        """Return the (volume) mass matrix for the reference element.
+
+        Signature: ->(n, n)
+        """
+        return la.inv(self.vdm @ self.vdm.T)
+
+    @property
+    @memoized
+    def mass(self):
+        """Return the elementwise volume mass matrix.
+
+        Signature: ->(n, n)
+        """
+        h = (self.right - self.left) / self.nelements
+        return (h/2) * self._ref_mass
+
+    @property
+    @memoized
+    def inv_mass(self):
+        """Return the inverse of the elementwise volume mass matrix.
+
+        Signature: ->(n, n)
+        """
+        return la.inv(self.mass)
+
+    @property
+    @memoized
+    def face_mass(self):
+        """Return the face mass matrix.
+
+        The face mass matrix combines the effects of applying the face mass
+        operator on each face and interpolating the output to the volume nodes.
+
+        Signature: ->(n, 2)
+        """
+        return self.interp.T.copy()
+
+    @property
+    @memoized
+    def diff(self):
+        """Return the elementwise differentiation matrix.
+
+        Signature: ->(n, n)
+        """
+        VrT = []
+        for row in np.eye(self.nnodes):
+            deriv = ortholegder(row)
+            VrT.append(ortholegval(self.ref_nodes, deriv))
+        Vr = np.vstack(VrT).T
+        return Vr @ la.inv(self.vdm)
+
+    @property
+    @memoized
+    def stiffness(self):
+        """Return the stiffness matrix.
+
+        Signature: ->(n, n)
+        """
+        return (self._ref_mass @ self.diff)
+
+    @property
+    @memoized
+    def interp(self):
+        """Return the volume-to-face interpolation matrix.
+
+        Signature: ->(2, n)
+        """
+        return ortholegvander([-1, 1], self.nnodes - 1) @ la.inv(self.vdm)
+
+    @property
+    @memoized
+    def elements(self):
+        """Return the list of elements, each given by their left/right boundaries.
+
+        Signature: ->(m, 2)
+        """
+        h = (self.right - self.left) / self.nelements
+        return np.array(list(zip(
+            np.linspace(self.left, self.right, self.nelements, endpoint=False),
+            np.linspace(h + self.left, self.right, self.nelements))))
+
+    @property
+    def dg_ops(self):
+        """Return a context manager yielding a DGOps1D instance.
+        """
+        return contextlib.contextmanager(lambda: (yield DGOps1DRef(self)))
+
+    @property
+    def normals(self):
+        """Return the face unit normals.
+
+        Signature: ->(m, 2)
+        """
+        result = np.zeros((self.nelements, 2))
+        result[:,0] = -1
+        result[:,1] = 1
+        return result
+
+
+def interpolate(discr, vec, nodes):
+    """Return an interpolated solution at *nodes*.
+
+    Input:
+        discr - a DGDiscr1D instance
+        vec - vector of nodal values at degrees of freedom
+        nodes - vector of nodes to interpolate to
+
+    Signature:  (m*n,) -> (len(nodes),)
+    """
+    elements = discr.elements
+    nelements = discr.nelements
+    nnodes = discr.nnodes
+    inv_vdm = la.inv(discr.vdm)
+
+    sorter = np.argsort(nodes)
+    sorted_nodes = nodes[sorter]
+    result = []
+
+    indices = np.searchsorted(sorted_nodes, elements)
+    for i, (start, end) in enumerate(indices):
+        if i == 0:
+            start = 0
+        elif i == nelements - 1:
+            end = len(nodes)
+
+        center = (elements[i][0] + elements[i][1]) / 2
+        radius = (elements[i][1] - elements[i][0]) / 2
+        element_nodes = sorted_nodes[start:end]
+        mapped_nodes = (element_nodes - center) / radius
+
+        modal_vals = inv_vdm @ vec[i * nnodes:(i + 1) * nnodes]
+        nodal_vals = ortholegvander(mapped_nodes, nnodes - 1) @ modal_vals
+        result.append(nodal_vals)
+
+    result = np.hstack(result)
+    unsorter = np.arange(len(nodes))[sorter]
+    return result[unsorter]
+
+
+def integrate(discr, soln):
+    """Return the integral of the solution.
+
+    Signature: (n*m,) -> ()
+    """
+    soln = soln.reshape((discr.nelements, discr.nnodes))
+    h = discr.elements[0][1] - discr.elements[0][0]
+    weights = discr.ref_weights * h / 2
+    return np.sum(soln * weights)
+
+
+def elementwise(mat, vec):
+    """Apply a matrix to rows of the input representing per-element
+    degrees of freedom.
+
+    Inputs:
+        mat: Shape (a, b)
+        vec: Shape (c, b)
+
+    Signature: (a, b), (c, b) -> (c, a)
+    """
+    return np.einsum("ij,kj->ki", mat, vec)
+
+
+class AbstractDGOps1D(object):
+    def __init__(self, discr):
+        self.discr = discr
+
+    @property
+    def array_ops(self):
+        raise NotImplementedError
+
+    @property
+    def normals(self):
+        """Return the vector of normals at the faces.
+
+        Signature: ->(m, 2)
+        """
+        raise NotImplementedError
+
+    def interp(self, vec):
+        """Apply elementwise volume-to-face interpolation.
+
+        Signature: (m, n) -> (m, 2)
+        """
+        raise NotImplementedError
+
+    def inv_mass(self, vec):
+        """Apply the elementwise inverse mass matrix.
+
+        Signature: (m, n) -> (m, n)
+        """
+        raise NotImplementedError
+
+    def stiffness(self, vec):
+        """Apply the elementwise stiffness matrix.
+
+        Signature: (m, n) -> (m, n)
+        """
+        raise NotImplementedError
+
+    def face_mass(self, vec):
+        """Apply the elementwise face mass matrix.
+
+        Signature: (m, 2) -> (m, n)
+        """
+        raise NotImplementedError
+
+    def face_swap(self, vec):
+        """Swap values at opposite faces.
+
+        Signature: (m, 2) -> (m, 2)
+        """
+        raise NotImplementedError
+
+
+def elementwise(mat, vec):
+    """Apply a matrix to rows of the input representing per-element
+    degrees of freedom.
+
+    Inputs:
+        mat: Shape (a, b)
+        vec: Shape (c, b)
+
+    Signature: (a, b), (c, b) -> (c, a)
+    """
+    return np.einsum("ij,kj->ki", mat, vec)
+
+
+class DGOps1DRef(AbstractDGOps1D):
+    """A reference NumPy implementation of the AbstractDGOps1D interface."""
+
+    @AbstractDGOps1D.array_ops.getter
+    def array_ops(self):
+        return np
+
+    @AbstractDGOps1D.normals.getter
+    def normals(self):
+        return self.discr.normals
+
+    def interp(self, vec):
+        return elementwise(self.discr.interp, vec)
+
+    def inv_mass(self, vec):
+        return elementwise(self.discr.inv_mass, vec)
+
+    def stiffness(self, vec):
+        return elementwise(self.discr.stiffness, vec)
+
+    def face_mass(self, vec):
+        return elementwise(self.discr.face_mass, vec)
+
+    def face_swap(self, vec):
+        result = np.zeros_like(vec)
+        result[:, 0] = np.roll(vec[:, 1], +1)
+        result[:, 1] = np.roll(vec[:, 0], -1)
+        return result
+
+
+def matrix_getter(name, shape):
+
+    def getter(self):
+        mat = self.ctx.argument(name, shape, np.float64)
+        self.ctx.bind(mat, getattr(self.discr, name))
+        return mat
+
+    return property(memoized(getter))
+
+
+class DGOps1D(AbstractDGOps1D):
+
+    @AbstractDGOps1D.array_ops.getter
+    def array_ops(self):
+        return self.ctx
+
+    def __init__(self, discr, ctx):
+        self.discr = discr
+        self.ctx = ctx
+
+    _normals = matrix_getter("normals", "(nelements, 2)")
+    _interp_mat = matrix_getter("interp", "(2, nnodes)")
+    _inv_mass_mat = matrix_getter("inv_mass", "(nnodes, nnodes)")
+    _stiffness_mat = matrix_getter("stiffness", "(nnodes, nnodes)")
+    _face_mass_mat = matrix_getter("face_mass", "(nnodes, 2)")
+
+    @AbstractDGOps1D.normals.getter
+    def normals(self):
+        return self._normals
+
+    def interp(self, vec):
+        return self.ctx.matmul(self._interp_mat, vec.T, name="_interp").T
+
+    def inv_mass(self, vec):
+        return self.ctx.matmul(self._inv_mass_mat, vec.T, name="_inv_mass").T
+
+    def stiffness(self, vec):
+        return self.ctx.matmul(self._stiffness_mat, vec.T, name="_stiffness").T
+
+    def face_mass(self, vec):
+        return self.ctx.matmul(self._face_mass_mat, vec.T, name="_face_mass").T
+
+    def face_swap(self, vec):
+        return self.ctx.stack(
+                (
+                    self.ctx.roll(vec[:, 1], +1),
+                    self.ctx.roll(vec[:, 0], -1)),
+                axis=1,
+                name="_face_swap")
+
+
-- 
GitLab


From 406fb101140c37c98c8cab5924f57732d65af6eb Mon Sep 17 00:00:00 2001
From: Matt Wala <wala1@illinois.edu>
Date: Fri, 1 May 2020 01:16:15 -0500
Subject: [PATCH 31/46] Implement Array.store()

---
 arrayzy/array.py                              | 53 ++++++++++++++-----
 .../Computing and DG with Lazy Arrays.ipynb   |  2 +-
 test/test_linalg.py                           | 14 +++++
 3 files changed, 55 insertions(+), 14 deletions(-)

diff --git a/arrayzy/array.py b/arrayzy/array.py
index 6c7de16..9370369 100644
--- a/arrayzy/array.py
+++ b/arrayzy/array.py
@@ -112,6 +112,7 @@ class Array:
     .. automethod:: __truediv__
     .. automethod:: __rtruediv__
     .. automethod:: __neg__
+    .. automethod:: store
 
     """
 
@@ -259,6 +260,10 @@ class Array:
         """
         raise NotImplementedError
 
+    def store(self, name=None):
+        """Store this array into a variable, if not already stored."""
+        raise NotImplementedError
+
 
 class ArrayExpression(Array):
     """An array-valued expression.
@@ -276,6 +281,19 @@ class ArrayExpression(Array):
         assignments = dict(zip(self.dim_names, tuple(dims)))
         return pymbolic.substitute(self.expression, assignments)
 
+    def store(self, name=None):
+        if name is None:
+            name = "_temp"
+        name = self.ctx.var_name_gen(name)
+        out = lp.TemporaryVariable(
+                name, dtype=self.dtype,
+                shape=self.shape,
+                address_space=lp.AddressSpace.GLOBAL)
+        new_tv = self.ctx.program.temporary_variables.copy()
+        new_tv[name] = out
+        self.ctx.program = self.ctx.program.copy(temporary_variables=new_tv)
+        return self.ctx.copy_expr(self, name)
+
 
 class ArrayVariable(Array):
     """An array-valued variable.
@@ -287,6 +305,9 @@ class ArrayVariable(Array):
     def to_loopy_expression(self, dims):
         return prim.Variable(self.name)[tuple(dims)]
 
+    def store(self, name=None):
+        return self
+
 
 class Context:
     """An interface for building up array computations.
@@ -445,6 +466,23 @@ class Context:
                 instructions=self.program.instructions + [out_insn])
         self._last_insn_id = insn_id
 
+    def copy_expr(self, expr, name):
+        # Copy *expr* to a variable named *name*.
+        out_inames = [
+                self.var_name_gen(f"{name}_dim{d}") for d in range(expr.ndim)]
+        out_expr = expr.to_loopy_expression(map(prim.Variable, out_inames))
+
+        domain = expr.get_domain(out_inames)
+
+        self.program = self.program.copy(
+                domains=self.program.domains + [domain])
+
+        self._make_array_assignment(
+                name, dim_names=out_inames, expr=out_expr,
+                within_inames=frozenset(out_inames))
+
+        return ArrayVariable(self, expr.shape, expr.dtype, name)
+
     # }}}
 
     # {{{ user interface
@@ -634,19 +672,8 @@ class Context:
             name = "_out"
         name = self.var_name_gen(name)
         out = lp.GlobalArg(name, shape=expr.shape, dtype=expr.dtype, order="C")
-        out_inames = [
-                self.var_name_gen(f"{name}_dim{d}") for d in range(expr.ndim)]
-        out_expr = expr.to_loopy_expression(map(prim.Variable, out_inames))
-
-        domain = expr.get_domain(out_inames)
-
-        self.program = self.program.copy(
-                args=self.program.args + [out],
-                domains=self.program.domains + [domain])
-
-        self._make_array_assignment(
-                name, dim_names=out_inames, expr=out_expr,
-                within_inames=frozenset(out_inames))
+        self.program = self.program.copy(args=self.program.args + [out])
+        return self.copy_expr(expr, name)
 
     # }}}
 
diff --git a/experiments/Computing and DG with Lazy Arrays.ipynb b/experiments/Computing and DG with Lazy Arrays.ipynb
index 9f8fcef..733083e 100644
--- a/experiments/Computing and DG with Lazy Arrays.ipynb	
+++ b/experiments/Computing and DG with Lazy Arrays.ipynb	
@@ -196,7 +196,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.7.7"
+   "version": "3.8.2"
   }
  },
  "nbformat": 4,
diff --git a/test/test_linalg.py b/test/test_linalg.py
index b1df677..68c4ca6 100644
--- a/test/test_linalg.py
+++ b/test/test_linalg.py
@@ -57,6 +57,20 @@ def test_symbolic_array(ctx_factory):
     assert (eval_one(prog, x=x) == x).all()
 
 
+def test_store(ctx_factory):
+    cl_ctx = ctx_factory()
+    queue = cl.CommandQueue(cl_ctx)
+
+    with az.Context(queue) as ctx:
+        x = ctx.argument("x", shape="n", dtype=np.float64)
+        y = (x + 1).store()
+        ctx.output(y * y)
+
+    x = np.array([1., 2., 3., 4., 5.])
+    prog = ctx.build()
+    assert (eval_one(prog, x=x) == (x + 1) * (x + 1)).all()
+
+
 @pytest.mark.parametrize("dim", (1, 2, 3))
 def test_transpose(ctx_factory, dim):
     cl_ctx = ctx_factory()
-- 
GitLab


From bb789e2435317f21c550fd2825d5538a79af2b6c Mon Sep 17 00:00:00 2001
From: Andreas Kloeckner <inform@tiker.net>
Date: Fri, 1 May 2020 01:49:10 -0500
Subject: [PATCH 32/46] Migrate to kernel_callables_v3-edit0.2 loopy branch

---
 arrayzy/array.py | 71 ++++++++++++++++++++++++++++++------------------
 requirements.txt |  2 +-
 2 files changed, 46 insertions(+), 27 deletions(-)

diff --git a/arrayzy/array.py b/arrayzy/array.py
index 9370369..c7886d3 100644
--- a/arrayzy/array.py
+++ b/arrayzy/array.py
@@ -289,9 +289,13 @@ class ArrayExpression(Array):
                 name, dtype=self.dtype,
                 shape=self.shape,
                 address_space=lp.AddressSpace.GLOBAL)
-        new_tv = self.ctx.program.temporary_variables.copy()
+
+        root_knl = self.ctx.program.root_kernel
+        new_tv = root_knl.temporary_variables.copy()
         new_tv[name] = out
-        self.ctx.program = self.ctx.program.copy(temporary_variables=new_tv)
+        self.ctx.program = self.ctx.program.with_kernel(
+                root_knl.copy(temporary_variables=new_tv))
+
         return self.ctx.copy_expr(self, name)
 
 
@@ -352,8 +356,9 @@ class Context:
 
         self.bound_arguments = {}
         self.program = program
-        self.var_name_gen = self.program.get_var_name_generator()
-        self.insn_id_gen = self.program.get_instruction_id_generator()
+        self.var_name_gen = self.program.root_kernel.get_var_name_generator()
+        self.insn_id_gen = (
+                self.program.root_kernel.get_instruction_id_generator())
         self._last_insn_id = None
         # The set of names that can be used for domain in parameters. It should
         # be updated through self._get_or_create_parameter().
@@ -401,18 +406,21 @@ class Context:
         # *self.program.index_dtype* or if it's an iname (the latter is for
         # data-dependent control flow).
         if name not in self._parameters:
+            root_knl = self.program.root_kernel
             # Resist the temptation to look through domain parameters, because
             # a variable may be a parameter without a domain in the kernel yet.
-            if name not in self.program.all_inames():
+            if name not in root_knl.all_inames():
                 # Add an argument.
-                if name in self.program.all_variable_names():
+                if name in root_knl.all_variable_names():
                     raise ValueError(
                             "could not create parameter: "
                             f"name '{name}' already in use")
-                self.program = self.program.copy(
-                        args=self.program.args + [
-                            lp.ValueArg(name, dtype=self.program.index_dtype)
-                        ])
+                self.program = self.program.with_kernel(
+                        root_knl.copy(
+                            args=self.program.args + [
+                                lp.ValueArg(
+                                    name, dtype=root_knl.index_dtype)
+                                ]))
                 self.var_name_gen.add_name(name)
             self._parameters.add(name)
 
@@ -462,8 +470,9 @@ class Context:
                 within_inames=frozenset(within_inames),
                 depends_on=self._get_dependencies(expr))
 
-        self.program = self.program.copy(
-                instructions=self.program.instructions + [out_insn])
+        self.program = self.program.with_kernel(
+                self.program.root_kernel.copy(
+                    instructions=self.program.root_kernel.instructions + [out_insn]))
         self._last_insn_id = insn_id
 
     def copy_expr(self, expr, name):
@@ -474,8 +483,9 @@ class Context:
 
         domain = expr.get_domain(out_inames)
 
-        self.program = self.program.copy(
-                domains=self.program.domains + [domain])
+        self.program = self.program.with_kernel(
+                self.program.root_kernel.copy(
+                    domains=self.program.root_kernel.domains + [domain]))
 
         self._make_array_assignment(
                 name, dim_names=out_inames, expr=out_expr,
@@ -492,7 +502,7 @@ class Context:
 
         :returns: an :class:`ArrayVariable`
         """
-        if name in self.program.all_variable_names():
+        if name in self.program.root_kernel.all_variable_names():
             raise ValueError(f"name '{name}' already in use")
 
         arg = lp.GlobalArg(name, shape=shape, dtype=dtype, order=order)
@@ -506,7 +516,8 @@ class Context:
                 self._get_or_create_parameter(dep)
 
         # Add argument to program.
-        self.program = self.program.copy(args=self.program.args + [arg])
+        self.program = self.program.with_kernel(
+                self.program.root_kernel.copy(args=self.program.args + [arg]))
 
         return ArrayVariable(self, shape, dtype, name)
 
@@ -595,13 +606,15 @@ class Context:
             self._last_insn_id = insn_id
             out_insns.append(out_insn)
 
-        new_tv = self.program.temporary_variables.copy()
+        root_knl = self.program.root_kernel
+        new_tv = root_knl.temporary_variables.copy()
         new_tv[name] = out
 
-        self.program = self.program.copy(
-                temporary_variables=new_tv,
-                domains=self.program.domains + [domain],
-                instructions=self.program.instructions + out_insns)
+        self.program = self.program.with_kernel(
+                root_knl.copy(
+                    temporary_variables=new_tv,
+                    domains=root_knl.domains + [domain],
+                    instructions=root_knl.instructions + out_insns))
 
         return ArrayVariable(self, out_shape, out_dtype, name)
 
@@ -648,12 +661,14 @@ class Context:
                 out_inames + (out_red_iname,),
                 out_shape + (a.shape[1],))
 
-        new_tv = self.program.temporary_variables.copy()
+        root_knl = self.program.root_kernel
+        new_tv = root_knl.temporary_variables.copy()
         new_tv[name] = out
 
-        self.program = self.program.copy(
-                temporary_variables=new_tv,
-                domains=self.program.domains + [domain])
+        self.program = self.program.with_kernel(
+                root_knl.copy(
+                    temporary_variables=new_tv,
+                    domains=root_knl.domains + [domain]))
 
         self._make_array_assignment(
                 name, dim_names=out_inames, expr=out_expr,
@@ -661,6 +676,9 @@ class Context:
 
         return ArrayVariable(self, out_shape, out_dtype, name)
 
+    def call_kernel(self, knl, **kwargs):
+        pass
+
     def output(self, expr, name=None):
         """Copy *expr* to a (new) output variable.
 
@@ -672,7 +690,8 @@ class Context:
             name = "_out"
         name = self.var_name_gen(name)
         out = lp.GlobalArg(name, shape=expr.shape, dtype=expr.dtype, order="C")
-        self.program = self.program.copy(args=self.program.args + [out])
+        self.program = self.program.with_kernel(
+                self.program.root_kernel.copy(args=self.program.args + [out]))
         return self.copy_expr(expr, name)
 
     # }}}
diff --git a/requirements.txt b/requirements.txt
index 22617c7..7ca0048 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1 +1 @@
-git+https://github.com/inducer/loopy.git
+git+https://github.com/inducer/loopy.git@kernel_callables_v3-edit0.2
-- 
GitLab


From aaec9806501945bd901545545d47f2fd5970b471 Mon Sep 17 00:00:00 2001
From: Matt Wala <wala1@illinois.edu>
Date: Fri, 1 May 2020 03:35:18 -0500
Subject: [PATCH 33/46] Support reductions in expressions. Rewrite matmul to be
 an expression.

---
 arrayzy/array.py    | 197 +++++++++++++++++++++++++++-----------------
 arrayzy/utils.py    |  57 ++++++++++++-
 test/test_linalg.py |  36 ++++++++
 3 files changed, 213 insertions(+), 77 deletions(-)

diff --git a/arrayzy/array.py b/arrayzy/array.py
index c7886d3..042b5a5 100644
--- a/arrayzy/array.py
+++ b/arrayzy/array.py
@@ -27,11 +27,10 @@ import loopy as lp
 import islpy as isl
 import numpy as np
 import loopy.symbolic as sym
-import pymbolic
 import pymbolic.primitives as prim
 
 from arrayzy.program import BoundProgram, PyOpenCLTarget
-from arrayzy.utils import domain_for_shape
+from arrayzy.utils import domain_for_shape, substitute
 
 
 __doc__ = """
@@ -96,6 +95,11 @@ class Array:
 
         Same as :data:`dims` but a tuple of strings.
 
+    .. attribute:: reduction_dim_names
+
+        A tuple of strings representing the names of reduction inames present
+        in the array.
+
     .. automethod:: get_domain
     .. automethod:: to_loopy_expression
 
@@ -121,6 +125,8 @@ class Array:
         self._dtype = dtype
         self.shape = shape
         self.name = name
+        # Maps redution inames to (left, right) bounds.
+        self.reductions = {}
 
     @property
     def ndim(self):
@@ -132,7 +138,8 @@ class Array:
                 self.ctx,
                 self.shape,
                 self.dtype,
-                new_expr)
+                new_expr,
+                self.reductions.copy())
 
     def _binary_op(self, op, other, reverse=False):
         if np.isscalar(other):
@@ -145,7 +152,8 @@ class Array:
                     self.ctx,
                     self.shape,
                     self.ctx.unify_types(self.dtype, type(other)),
-                    new_expr)
+                    new_expr,
+                    self.reductions.copy())
 
         elif isinstance(other, Array):
             args = (
@@ -154,12 +162,16 @@ class Array:
             if reverse:
                 args = tuple(reversed(args))
 
+            new_reductions = self.reductions.copy()
+            new_reductions.update(other.reductions)
+
             new_expr = op(*args)
             return ArrayExpression(
                     self.ctx,
                     self.shape,
                     self.ctx.unify_types(self.dtype, other.dtype),
-                    new_expr)
+                    new_expr,
+                    new_reductions)
 
         else:
             raise ValueError
@@ -211,7 +223,8 @@ class Array:
                 self.ctx,
                 shape=tuple(shape),
                 dtype=self.dtype,
-                expression=self.to_loopy_expression(dims))
+                expression=self.to_loopy_expression(dims),
+                reductions=self.reductions.copy())
 
     @property
     def dtype(self):
@@ -227,17 +240,29 @@ class Array:
     def dim_names(self):
         return tuple(f"_{i}" for i in range(len(self.shape)))
 
-    def get_domain(self, dim_names=None):
+    def get_domain(self, dim_names=None, reduction_dim_names=None):
         """Return the domain that defines the space of indices for this array.
 
         :arg dim_names: if not *None*, a tuple of strings, the names of the
             dimensions of the array.  Defaults to :data:`dim_names`.
+        :arg reduction_dim_names: if not *None*, a tuple of strings, the names
+            of the reduction dimensions of the array.  Defaults to
+            :data:`reduction_dim_names`.
 
         :returns: a :class:`islpy.BasicSet`
+
         """
         if dim_names is None:
             dim_names = self.dim_names
-        return domain_for_shape(dim_names, self.shape)
+        if reduction_dim_names is not None:
+            # Rename reductions.
+            reductions = dict(
+                    (new_rname, self.reductions[rname])
+                    for new_rname, rname
+                    in zip(reduction_dim_names, self.reduction_dim_names))
+        else:
+            reductions = self.reductions
+        return domain_for_shape(dim_names, self.shape, reductions)
 
     @property
     def T(self):  # noqa
@@ -247,15 +272,19 @@ class Array:
         """
         new_shape = tuple(reversed(self.shape))
         new_expr = self.to_loopy_expression(list(reversed(self.dims)))
-        return ArrayExpression(self.ctx, new_shape, self.dtype, new_expr)
+        return ArrayExpression(self.ctx, new_shape, self.dtype, new_expr,
+                               self.reductions.copy())
 
-    def to_loopy_expression(self, dims):
+    def to_loopy_expression(self, dims, reduction_names=None):
         """Create a :mod:`loopy` expression for the value at index *dims*.
 
         :arg dims: A tuple of :mod:`pymbolic` expressions representing the
            desired index/indices. For instance if *dims = (Variable('a'), 1)*,
            this will return the expression for the element at index *a, 1*.
 
+        :arg reduction_names: If not *None*, a tuple that renames reduction
+           inames in *self* in the order of :attr:`reduction_dim_names`.
+
         :returns: a :mod:`loopy` expression
         """
         raise NotImplementedError
@@ -264,6 +293,10 @@ class Array:
         """Store this array into a variable, if not already stored."""
         raise NotImplementedError
 
+    @property
+    def reduction_dim_names(self):
+        return tuple(self.reductions)
+
 
 class ArrayExpression(Array):
     """An array-valued expression.
@@ -271,15 +304,24 @@ class ArrayExpression(Array):
     .. attribute:: expression
 
         A :mod:`pymbolic` expression for the array.
+
+    .. attribute:: reductions
+
+        A mapping from reduction inames in :attr:`expression` to bounds.
     """
 
-    def __init__(self, ctx, shape, dtype, expression, name=None):
+    def __init__(self, ctx, shape, dtype, expression, reductions, name=None):
         super().__init__(ctx, shape, dtype, name)
         self.expression = expression
+        self.reductions = reductions
 
-    def to_loopy_expression(self, dims):
+    def to_loopy_expression(self, dims, reduction_names=None):
         assignments = dict(zip(self.dim_names, tuple(dims)))
-        return pymbolic.substitute(self.expression, assignments)
+        if reduction_names is not None:
+            assignments.update(dict(
+                    zip(self.reduction_dim_names,
+                        map(prim.Variable, reduction_names))))
+        return substitute(self.expression, assignments)
 
     def store(self, name=None):
         if name is None:
@@ -306,7 +348,7 @@ class ArrayVariable(Array):
     def __init__(self, ctx, shape, dtype, name):
         super().__init__(ctx, shape, dtype, name)
 
-    def to_loopy_expression(self, dims):
+    def to_loopy_expression(self, dims, reduction_names=None):
         return prim.Variable(self.name)[tuple(dims)]
 
     def store(self, name=None):
@@ -470,18 +512,24 @@ class Context:
                 within_inames=frozenset(within_inames),
                 depends_on=self._get_dependencies(expr))
 
+        new_insns = self.program.root_kernel.instructions + [out_insn]
         self.program = self.program.with_kernel(
-                self.program.root_kernel.copy(
-                    instructions=self.program.root_kernel.instructions + [out_insn]))
+                self.program.root_kernel.copy(instructions=new_insns))
+
         self._last_insn_id = insn_id
 
     def copy_expr(self, expr, name):
         # Copy *expr* to a variable named *name*.
         out_inames = [
                 self.var_name_gen(f"{name}_dim{d}") for d in range(expr.ndim)]
-        out_expr = expr.to_loopy_expression(map(prim.Variable, out_inames))
+        out_red_inames = [
+                self.var_name_gen(f"{name}{rname}") for rname
+                in expr.reduction_dim_names]
 
-        domain = expr.get_domain(out_inames)
+        out_expr = expr.to_loopy_expression(
+                map(prim.Variable, out_inames),
+                out_red_inames)
+        domain = expr.get_domain(out_inames, out_red_inames)
 
         self.program = self.program.with_kernel(
                 self.program.root_kernel.copy(
@@ -546,7 +594,7 @@ class Context:
         # negative we shift it to be positive.
         idx = idx + prim.If(prim.Comparison(idx, "<", 0), a.shape[0], 0)
         expr = a.to_loopy_expression((idx,))
-        return ArrayExpression(self, a.shape, a.dtype, expr)
+        return ArrayExpression(self, a.shape, a.dtype, expr, a.reductions)
 
     def stack(self, arrays, axis=0, name=None):
         """Join a sequence of arrays along a new axis.
@@ -564,18 +612,10 @@ class Context:
             for array_dim, array0_dim in zip(array.shape, arrays[0].shape):
                 self._assume_equal(array_dim, array0_dim)
 
-        if name is None:
-            name = "_stack"
-
-        name = self.var_name_gen(name)
-
-        out_inames = []
-        for i in range(arrays[0].ndim):
-            if i >= axis:
-                i += 1
-            out_inames.append(f"{name}_dim{i}")
+        base_name = name
 
-        domain = domain_for_shape(out_inames, arrays[0].shape)
+        if base_name is None:
+            base_name = "_stack"
 
         out_shape = list(arrays[0].shape)
         out_shape.insert(axis, len(arrays))
@@ -583,40 +623,62 @@ class Context:
 
         out_dtype = self.unify_types(*(a.dtype for a in arrays))
 
+        out_name = self.var_name_gen(base_name)
+
         out = lp.TemporaryVariable(
-                name, dtype=out_dtype,
+                out_name,
+                dtype=out_dtype,
                 shape=out_shape,
                 address_space=lp.AddressSpace.GLOBAL)
 
         # Create an output instruction for each input array.
         from loopy.kernel.instruction import make_assignment
-        out_insns = []
-        for i in range(len(arrays)):
+        new_insns = []
+        new_domains = []
+
+        for i, arr in enumerate(arrays):
+            name = self.var_name_gen(f"{base_name}_array{i}")
+            out_inames = []
+            for j in range(arr.ndim):
+                if j >= axis:
+                    j += 1
+                out_inames.append(self.var_name_gen(f"{name}_dim{j}"))
+
+            out_red_inames = [
+                    self.var_name_gen(f"{name}{rname}")
+                    for rname in arr.reduction_dim_names]
+            reductions = dict(
+                    (new_rname, arr.reductions[rname])
+                    for new_rname, rname
+                    in zip(out_red_inames, arr.reduction_dim_names))
+            domain = domain_for_shape(out_inames, arr.shape, reductions)
+            new_domains.append(domain)
+
             indices = list(map(prim.Variable, out_inames))
-            expr = arrays[i].to_loopy_expression(indices)
+            expr = arr.to_loopy_expression(indices, out_red_inames)
             indices.insert(axis, i)
             indices = tuple(indices)
-            insn_id = self.insn_id_gen(f"{name}_copy{i}")
+            insn_id = self.insn_id_gen(f"{name}_array{i}")
             out_insn = make_assignment(
-                    (prim.Variable(name)[indices],),
+                    (prim.Variable(out_name)[indices],),
                     expr,
                     id=insn_id,
                     within_inames=frozenset(out_inames),
                     depends_on=self._get_dependencies(expr))
             self._last_insn_id = insn_id
-            out_insns.append(out_insn)
+            new_insns.append(out_insn)
 
         root_knl = self.program.root_kernel
         new_tv = root_knl.temporary_variables.copy()
-        new_tv[name] = out
+        new_tv[out_name] = out
 
         self.program = self.program.with_kernel(
                 root_knl.copy(
                     temporary_variables=new_tv,
-                    domains=root_knl.domains + [domain],
-                    instructions=root_knl.instructions + out_insns))
+                    domains=root_knl.domains + new_domains,
+                    instructions=root_knl.instructions + new_insns))
 
-        return ArrayVariable(self, out_shape, out_dtype, name)
+        return ArrayVariable(self, out_shape, out_dtype, out_name)
 
     def matmul(self, a, b, name=None):
         """Multiply matrix *a* by *b*.
@@ -632,49 +694,34 @@ class Context:
             name = "_matmul"
         name = self.var_name_gen(name)
 
-        out_shape = (a.shape[0], b.shape[1])
-        out_inames = (f"{name}_dim0", f"{name}_dim1")
-        out_red_iname = self.var_name_gen(f"{name}_reduce")
-        out_dtype = self.unify_types(a.dtype, b.dtype)
-
-        out = lp.TemporaryVariable(
-                name, dtype=out_dtype,
-                shape=(a.shape[0], b.shape[1]),
-                address_space=lp.AddressSpace.GLOBAL)
-
-        import loopy.library.reduction as red
+        red_iname = self.var_name_gen(f"{name}_reduce")
+        red_domain = (0, a.shape[1])
+        dtype = self.unify_types(a.dtype, b.dtype)
 
-        a_inames = tuple(map(prim.Variable, (out_inames[0], out_red_iname)))
-        b_inames = tuple(map(prim.Variable, (out_red_iname, out_inames[1])))
+        a_inames = tuple(map(prim.Variable, ("_0", red_iname)))
+        b_inames = tuple(map(prim.Variable, (red_iname, "_1")))
 
-        expr = (
+        inner_expr = (
                 a.to_loopy_expression(a_inames)
                 * b.to_loopy_expression(b_inames))
 
-        out_expr = sym.Reduction(
+        import loopy.library.reduction as red
+        expr = sym.Reduction(
                 operation=red.parse_reduction_op("sum"),
-                inames=(out_red_iname,),
-                expr=expr,
+                inames=(red_iname,),
+                expr=inner_expr,
                 allow_simultaneous=False)
 
-        domain = domain_for_shape(
-                out_inames + (out_red_iname,),
-                out_shape + (a.shape[1],))
-
-        root_knl = self.program.root_kernel
-        new_tv = root_knl.temporary_variables.copy()
-        new_tv[name] = out
-
-        self.program = self.program.with_kernel(
-                root_knl.copy(
-                    temporary_variables=new_tv,
-                    domains=root_knl.domains + [domain]))
-
-        self._make_array_assignment(
-                name, dim_names=out_inames, expr=out_expr,
-                within_inames=frozenset(out_inames))
+        reductions = a.reductions.copy()
+        reductions.update(b.reductions)
+        reductions[red_iname] = red_domain
 
-        return ArrayVariable(self, out_shape, out_dtype, name)
+        return ArrayExpression(
+                self,
+                shape=(a.shape[0], b.shape[1]),
+                dtype=dtype,
+                expression=expr,
+                reductions=reductions)
 
     def call_kernel(self, knl, **kwargs):
         pass
diff --git a/arrayzy/utils.py b/arrayzy/utils.py
index 54e3436..c2bf016 100644
--- a/arrayzy/utils.py
+++ b/arrayzy/utils.py
@@ -21,10 +21,47 @@ THE SOFTWARE.
 """
 
 
+import loopy as lp
 import islpy as isl
 
+import pymbolic.primitives as prim
+import loopy.symbolic as sym
 
-def domain_for_shape(dim_names, shape):
+
+from pymbolic.mapper.substitutor import SubstitutionMapper
+
+
+class LoopyAwareSubstitutionMapper(SubstitutionMapper):
+
+    def map_reduction(self, expr):
+        new_inames = []
+        for iname in expr.inames:
+            new_iname = self.subst_func(iname)
+            if new_iname is None:
+                new_iname = prim.Variable(iname)
+            else:
+                if not isinstance(new_iname, prim.Variable):
+                    raise ValueError(
+                            f"reduction iname {iname} can only be renamed"
+                            " to another iname")
+            new_inames.append(new_iname.name)
+        new_inames = tuple(new_inames)
+        new_expr = self.rec(expr.expr)
+
+        return sym.Reduction(expr.operation, new_inames, new_expr,
+                expr.allow_simultaneous)
+
+
+def substitute(expression, variable_assignments={}, **kwargs):
+    variable_assignments = variable_assignments.copy()
+    variable_assignments.update(kwargs)
+    from pymbolic.mapper.substitutor import make_subst_func
+    print("ASSIGNMENTS", variable_assignments)
+    return LoopyAwareSubstitutionMapper(
+            make_subst_func(variable_assignments))(expression)
+
+
+def domain_for_shape(dim_names, shape, reductions=None):
     """Return a :class:`isl.BasicSet` that expresses an appropriate index domain
     for an array of (potentially symbolic) shape *shape*.
 
@@ -36,9 +73,13 @@ def domain_for_shape(dim_names, shape):
         dimensions in the returned set.  Must have the same length as
         *dim_names*.
 
+    :arg reductions: A map from reduction inames to (lower, upper) bounds.
+
     :returns: a :class:`isl.BasicSet`
 
     """
+    if reductions is None:
+        reductions = {}
 
     # Collect parameters.
     param_names = set()
@@ -46,21 +87,33 @@ def domain_for_shape(dim_names, shape):
     for sdep in map(get_dependencies, shape):
         param_names |= sdep
 
+    for bounds in reductions.values():
+        for sdep in map(get_dependencies, bounds):
+            # FIXME: Assumes that reduction bounds are not data-dependent.
+            param_names |= sdep
+
+    set_names = sorted(tuple(dim_names) + tuple(reductions))
     param_names = sorted(param_names)
 
     # Build domain.
     dom = isl.BasicSet.universe(
             isl.Space.create_from_names(
                 isl.DEFAULT_CONTEXT,
-                set=dim_names,
+                set=set_names,
                 params=param_names))
 
     # Add constraints.
     from loopy.symbolic import aff_from_expr
     affs = isl.affs_from_space(dom.space)
+
     for iname, dim in zip(dim_names, shape):
         dom &= affs[0].le_set(affs[iname])
         dom &= affs[iname].lt_set(aff_from_expr(dom.space, dim))
+
+    for iname, (left, right) in reductions.items():
+        dom &= aff_from_expr(dom.space, left).le_set(affs[iname])
+        dom &= affs[iname].lt_set(aff_from_expr(dom.space, right))
+
     dom, = dom.get_basic_sets()
 
     return dom
diff --git a/test/test_linalg.py b/test/test_linalg.py
index 68c4ca6..821c10a 100644
--- a/test/test_linalg.py
+++ b/test/test_linalg.py
@@ -163,6 +163,42 @@ def test_matmul(ctx_factory):
     assert (eval_one(prog, x=x, y=y) == x @ y).all()
 
 
+def test_matmul_addition(ctx_factory):
+    cl_ctx = ctx_factory()
+    queue = cl.CommandQueue(cl_ctx)
+
+    with az.Context(queue) as ctx:
+        x = ctx.argument("x", shape=("n, n"), dtype=np.float64)
+        y = ctx.argument("y", shape=("n, n"), dtype=np.float64)
+        z = ctx.argument("z", shape=("n, n"), dtype=np.float64)
+        ctx.output(ctx.matmul(x, y) + ctx.matmul(x, z))
+
+    x = np.array([[1, 2], [3, 4]], dtype=np.float64)
+    y = np.array([[5, 6], [7, 8]], dtype=np.float64)
+    z = np.array([[9, 10], [11, 12]], dtype=np.float64)
+
+    prog = ctx.build()
+    assert (eval_one(prog, x=x, y=y, z=z) == x @ y + x @ z).all()
+
+
+def test_matmul_stack(ctx_factory):
+    cl_ctx = ctx_factory()
+    queue = cl.CommandQueue(cl_ctx)
+
+    with az.Context(queue) as ctx:
+        x = ctx.argument("x", shape=("m, n"), dtype=np.float64)
+        y = ctx.argument("y", shape=("n, k"), dtype=np.float64)
+        z = ctx.matmul(x, y)
+        w = ctx.matmul(x, 2 * y)
+        ctx.output(ctx.stack((z, w)))
+
+    x = np.array([[1, 2, 3], [4, 5, 6]], dtype=np.float64)
+    y = np.array([[2, 5], [5, 1], [1, 2]], dtype=np.float64)
+
+    prog = ctx.build()
+    assert (eval_one(prog, x=x, y=y) == np.array([x @ y, x @ (2 * y)])).all()
+
+
 @pytest.mark.parametrize("shift", (-1, 1, -20, 20))
 def test_roll(ctx_factory, shift):
     cl_ctx = ctx_factory()
-- 
GitLab


From 67d0637faa5e9308d45ef85083ca7ff56a39bf61 Mon Sep 17 00:00:00 2001
From: Matt Wala <wala1@illinois.edu>
Date: Fri, 1 May 2020 03:41:24 -0500
Subject: [PATCH 34/46] Revert "Support reductions in expressions. Rewrite
 matmul to be an expression."

This reverts commit aaec9806501945bd901545545d47f2fd5970b471.
---
 arrayzy/array.py    | 197 +++++++++++++++++---------------------------
 arrayzy/utils.py    |  57 +------------
 test/test_linalg.py |  36 --------
 3 files changed, 77 insertions(+), 213 deletions(-)

diff --git a/arrayzy/array.py b/arrayzy/array.py
index 042b5a5..c7886d3 100644
--- a/arrayzy/array.py
+++ b/arrayzy/array.py
@@ -27,10 +27,11 @@ import loopy as lp
 import islpy as isl
 import numpy as np
 import loopy.symbolic as sym
+import pymbolic
 import pymbolic.primitives as prim
 
 from arrayzy.program import BoundProgram, PyOpenCLTarget
-from arrayzy.utils import domain_for_shape, substitute
+from arrayzy.utils import domain_for_shape
 
 
 __doc__ = """
@@ -95,11 +96,6 @@ class Array:
 
         Same as :data:`dims` but a tuple of strings.
 
-    .. attribute:: reduction_dim_names
-
-        A tuple of strings representing the names of reduction inames present
-        in the array.
-
     .. automethod:: get_domain
     .. automethod:: to_loopy_expression
 
@@ -125,8 +121,6 @@ class Array:
         self._dtype = dtype
         self.shape = shape
         self.name = name
-        # Maps redution inames to (left, right) bounds.
-        self.reductions = {}
 
     @property
     def ndim(self):
@@ -138,8 +132,7 @@ class Array:
                 self.ctx,
                 self.shape,
                 self.dtype,
-                new_expr,
-                self.reductions.copy())
+                new_expr)
 
     def _binary_op(self, op, other, reverse=False):
         if np.isscalar(other):
@@ -152,8 +145,7 @@ class Array:
                     self.ctx,
                     self.shape,
                     self.ctx.unify_types(self.dtype, type(other)),
-                    new_expr,
-                    self.reductions.copy())
+                    new_expr)
 
         elif isinstance(other, Array):
             args = (
@@ -162,16 +154,12 @@ class Array:
             if reverse:
                 args = tuple(reversed(args))
 
-            new_reductions = self.reductions.copy()
-            new_reductions.update(other.reductions)
-
             new_expr = op(*args)
             return ArrayExpression(
                     self.ctx,
                     self.shape,
                     self.ctx.unify_types(self.dtype, other.dtype),
-                    new_expr,
-                    new_reductions)
+                    new_expr)
 
         else:
             raise ValueError
@@ -223,8 +211,7 @@ class Array:
                 self.ctx,
                 shape=tuple(shape),
                 dtype=self.dtype,
-                expression=self.to_loopy_expression(dims),
-                reductions=self.reductions.copy())
+                expression=self.to_loopy_expression(dims))
 
     @property
     def dtype(self):
@@ -240,29 +227,17 @@ class Array:
     def dim_names(self):
         return tuple(f"_{i}" for i in range(len(self.shape)))
 
-    def get_domain(self, dim_names=None, reduction_dim_names=None):
+    def get_domain(self, dim_names=None):
         """Return the domain that defines the space of indices for this array.
 
         :arg dim_names: if not *None*, a tuple of strings, the names of the
             dimensions of the array.  Defaults to :data:`dim_names`.
-        :arg reduction_dim_names: if not *None*, a tuple of strings, the names
-            of the reduction dimensions of the array.  Defaults to
-            :data:`reduction_dim_names`.
 
         :returns: a :class:`islpy.BasicSet`
-
         """
         if dim_names is None:
             dim_names = self.dim_names
-        if reduction_dim_names is not None:
-            # Rename reductions.
-            reductions = dict(
-                    (new_rname, self.reductions[rname])
-                    for new_rname, rname
-                    in zip(reduction_dim_names, self.reduction_dim_names))
-        else:
-            reductions = self.reductions
-        return domain_for_shape(dim_names, self.shape, reductions)
+        return domain_for_shape(dim_names, self.shape)
 
     @property
     def T(self):  # noqa
@@ -272,19 +247,15 @@ class Array:
         """
         new_shape = tuple(reversed(self.shape))
         new_expr = self.to_loopy_expression(list(reversed(self.dims)))
-        return ArrayExpression(self.ctx, new_shape, self.dtype, new_expr,
-                               self.reductions.copy())
+        return ArrayExpression(self.ctx, new_shape, self.dtype, new_expr)
 
-    def to_loopy_expression(self, dims, reduction_names=None):
+    def to_loopy_expression(self, dims):
         """Create a :mod:`loopy` expression for the value at index *dims*.
 
         :arg dims: A tuple of :mod:`pymbolic` expressions representing the
            desired index/indices. For instance if *dims = (Variable('a'), 1)*,
            this will return the expression for the element at index *a, 1*.
 
-        :arg reduction_names: If not *None*, a tuple that renames reduction
-           inames in *self* in the order of :attr:`reduction_dim_names`.
-
         :returns: a :mod:`loopy` expression
         """
         raise NotImplementedError
@@ -293,10 +264,6 @@ class Array:
         """Store this array into a variable, if not already stored."""
         raise NotImplementedError
 
-    @property
-    def reduction_dim_names(self):
-        return tuple(self.reductions)
-
 
 class ArrayExpression(Array):
     """An array-valued expression.
@@ -304,24 +271,15 @@ class ArrayExpression(Array):
     .. attribute:: expression
 
         A :mod:`pymbolic` expression for the array.
-
-    .. attribute:: reductions
-
-        A mapping from reduction inames in :attr:`expression` to bounds.
     """
 
-    def __init__(self, ctx, shape, dtype, expression, reductions, name=None):
+    def __init__(self, ctx, shape, dtype, expression, name=None):
         super().__init__(ctx, shape, dtype, name)
         self.expression = expression
-        self.reductions = reductions
 
-    def to_loopy_expression(self, dims, reduction_names=None):
+    def to_loopy_expression(self, dims):
         assignments = dict(zip(self.dim_names, tuple(dims)))
-        if reduction_names is not None:
-            assignments.update(dict(
-                    zip(self.reduction_dim_names,
-                        map(prim.Variable, reduction_names))))
-        return substitute(self.expression, assignments)
+        return pymbolic.substitute(self.expression, assignments)
 
     def store(self, name=None):
         if name is None:
@@ -348,7 +306,7 @@ class ArrayVariable(Array):
     def __init__(self, ctx, shape, dtype, name):
         super().__init__(ctx, shape, dtype, name)
 
-    def to_loopy_expression(self, dims, reduction_names=None):
+    def to_loopy_expression(self, dims):
         return prim.Variable(self.name)[tuple(dims)]
 
     def store(self, name=None):
@@ -512,24 +470,18 @@ class Context:
                 within_inames=frozenset(within_inames),
                 depends_on=self._get_dependencies(expr))
 
-        new_insns = self.program.root_kernel.instructions + [out_insn]
         self.program = self.program.with_kernel(
-                self.program.root_kernel.copy(instructions=new_insns))
-
+                self.program.root_kernel.copy(
+                    instructions=self.program.root_kernel.instructions + [out_insn]))
         self._last_insn_id = insn_id
 
     def copy_expr(self, expr, name):
         # Copy *expr* to a variable named *name*.
         out_inames = [
                 self.var_name_gen(f"{name}_dim{d}") for d in range(expr.ndim)]
-        out_red_inames = [
-                self.var_name_gen(f"{name}{rname}") for rname
-                in expr.reduction_dim_names]
+        out_expr = expr.to_loopy_expression(map(prim.Variable, out_inames))
 
-        out_expr = expr.to_loopy_expression(
-                map(prim.Variable, out_inames),
-                out_red_inames)
-        domain = expr.get_domain(out_inames, out_red_inames)
+        domain = expr.get_domain(out_inames)
 
         self.program = self.program.with_kernel(
                 self.program.root_kernel.copy(
@@ -594,7 +546,7 @@ class Context:
         # negative we shift it to be positive.
         idx = idx + prim.If(prim.Comparison(idx, "<", 0), a.shape[0], 0)
         expr = a.to_loopy_expression((idx,))
-        return ArrayExpression(self, a.shape, a.dtype, expr, a.reductions)
+        return ArrayExpression(self, a.shape, a.dtype, expr)
 
     def stack(self, arrays, axis=0, name=None):
         """Join a sequence of arrays along a new axis.
@@ -612,10 +564,18 @@ class Context:
             for array_dim, array0_dim in zip(array.shape, arrays[0].shape):
                 self._assume_equal(array_dim, array0_dim)
 
-        base_name = name
+        if name is None:
+            name = "_stack"
+
+        name = self.var_name_gen(name)
+
+        out_inames = []
+        for i in range(arrays[0].ndim):
+            if i >= axis:
+                i += 1
+            out_inames.append(f"{name}_dim{i}")
 
-        if base_name is None:
-            base_name = "_stack"
+        domain = domain_for_shape(out_inames, arrays[0].shape)
 
         out_shape = list(arrays[0].shape)
         out_shape.insert(axis, len(arrays))
@@ -623,62 +583,40 @@ class Context:
 
         out_dtype = self.unify_types(*(a.dtype for a in arrays))
 
-        out_name = self.var_name_gen(base_name)
-
         out = lp.TemporaryVariable(
-                out_name,
-                dtype=out_dtype,
+                name, dtype=out_dtype,
                 shape=out_shape,
                 address_space=lp.AddressSpace.GLOBAL)
 
         # Create an output instruction for each input array.
         from loopy.kernel.instruction import make_assignment
-        new_insns = []
-        new_domains = []
-
-        for i, arr in enumerate(arrays):
-            name = self.var_name_gen(f"{base_name}_array{i}")
-            out_inames = []
-            for j in range(arr.ndim):
-                if j >= axis:
-                    j += 1
-                out_inames.append(self.var_name_gen(f"{name}_dim{j}"))
-
-            out_red_inames = [
-                    self.var_name_gen(f"{name}{rname}")
-                    for rname in arr.reduction_dim_names]
-            reductions = dict(
-                    (new_rname, arr.reductions[rname])
-                    for new_rname, rname
-                    in zip(out_red_inames, arr.reduction_dim_names))
-            domain = domain_for_shape(out_inames, arr.shape, reductions)
-            new_domains.append(domain)
-
+        out_insns = []
+        for i in range(len(arrays)):
             indices = list(map(prim.Variable, out_inames))
-            expr = arr.to_loopy_expression(indices, out_red_inames)
+            expr = arrays[i].to_loopy_expression(indices)
             indices.insert(axis, i)
             indices = tuple(indices)
-            insn_id = self.insn_id_gen(f"{name}_array{i}")
+            insn_id = self.insn_id_gen(f"{name}_copy{i}")
             out_insn = make_assignment(
-                    (prim.Variable(out_name)[indices],),
+                    (prim.Variable(name)[indices],),
                     expr,
                     id=insn_id,
                     within_inames=frozenset(out_inames),
                     depends_on=self._get_dependencies(expr))
             self._last_insn_id = insn_id
-            new_insns.append(out_insn)
+            out_insns.append(out_insn)
 
         root_knl = self.program.root_kernel
         new_tv = root_knl.temporary_variables.copy()
-        new_tv[out_name] = out
+        new_tv[name] = out
 
         self.program = self.program.with_kernel(
                 root_knl.copy(
                     temporary_variables=new_tv,
-                    domains=root_knl.domains + new_domains,
-                    instructions=root_knl.instructions + new_insns))
+                    domains=root_knl.domains + [domain],
+                    instructions=root_knl.instructions + out_insns))
 
-        return ArrayVariable(self, out_shape, out_dtype, out_name)
+        return ArrayVariable(self, out_shape, out_dtype, name)
 
     def matmul(self, a, b, name=None):
         """Multiply matrix *a* by *b*.
@@ -694,34 +632,49 @@ class Context:
             name = "_matmul"
         name = self.var_name_gen(name)
 
-        red_iname = self.var_name_gen(f"{name}_reduce")
-        red_domain = (0, a.shape[1])
-        dtype = self.unify_types(a.dtype, b.dtype)
+        out_shape = (a.shape[0], b.shape[1])
+        out_inames = (f"{name}_dim0", f"{name}_dim1")
+        out_red_iname = self.var_name_gen(f"{name}_reduce")
+        out_dtype = self.unify_types(a.dtype, b.dtype)
+
+        out = lp.TemporaryVariable(
+                name, dtype=out_dtype,
+                shape=(a.shape[0], b.shape[1]),
+                address_space=lp.AddressSpace.GLOBAL)
+
+        import loopy.library.reduction as red
 
-        a_inames = tuple(map(prim.Variable, ("_0", red_iname)))
-        b_inames = tuple(map(prim.Variable, (red_iname, "_1")))
+        a_inames = tuple(map(prim.Variable, (out_inames[0], out_red_iname)))
+        b_inames = tuple(map(prim.Variable, (out_red_iname, out_inames[1])))
 
-        inner_expr = (
+        expr = (
                 a.to_loopy_expression(a_inames)
                 * b.to_loopy_expression(b_inames))
 
-        import loopy.library.reduction as red
-        expr = sym.Reduction(
+        out_expr = sym.Reduction(
                 operation=red.parse_reduction_op("sum"),
-                inames=(red_iname,),
-                expr=inner_expr,
+                inames=(out_red_iname,),
+                expr=expr,
                 allow_simultaneous=False)
 
-        reductions = a.reductions.copy()
-        reductions.update(b.reductions)
-        reductions[red_iname] = red_domain
+        domain = domain_for_shape(
+                out_inames + (out_red_iname,),
+                out_shape + (a.shape[1],))
 
-        return ArrayExpression(
-                self,
-                shape=(a.shape[0], b.shape[1]),
-                dtype=dtype,
-                expression=expr,
-                reductions=reductions)
+        root_knl = self.program.root_kernel
+        new_tv = root_knl.temporary_variables.copy()
+        new_tv[name] = out
+
+        self.program = self.program.with_kernel(
+                root_knl.copy(
+                    temporary_variables=new_tv,
+                    domains=root_knl.domains + [domain]))
+
+        self._make_array_assignment(
+                name, dim_names=out_inames, expr=out_expr,
+                within_inames=frozenset(out_inames))
+
+        return ArrayVariable(self, out_shape, out_dtype, name)
 
     def call_kernel(self, knl, **kwargs):
         pass
diff --git a/arrayzy/utils.py b/arrayzy/utils.py
index c2bf016..54e3436 100644
--- a/arrayzy/utils.py
+++ b/arrayzy/utils.py
@@ -21,47 +21,10 @@ THE SOFTWARE.
 """
 
 
-import loopy as lp
 import islpy as isl
 
-import pymbolic.primitives as prim
-import loopy.symbolic as sym
 
-
-from pymbolic.mapper.substitutor import SubstitutionMapper
-
-
-class LoopyAwareSubstitutionMapper(SubstitutionMapper):
-
-    def map_reduction(self, expr):
-        new_inames = []
-        for iname in expr.inames:
-            new_iname = self.subst_func(iname)
-            if new_iname is None:
-                new_iname = prim.Variable(iname)
-            else:
-                if not isinstance(new_iname, prim.Variable):
-                    raise ValueError(
-                            f"reduction iname {iname} can only be renamed"
-                            " to another iname")
-            new_inames.append(new_iname.name)
-        new_inames = tuple(new_inames)
-        new_expr = self.rec(expr.expr)
-
-        return sym.Reduction(expr.operation, new_inames, new_expr,
-                expr.allow_simultaneous)
-
-
-def substitute(expression, variable_assignments={}, **kwargs):
-    variable_assignments = variable_assignments.copy()
-    variable_assignments.update(kwargs)
-    from pymbolic.mapper.substitutor import make_subst_func
-    print("ASSIGNMENTS", variable_assignments)
-    return LoopyAwareSubstitutionMapper(
-            make_subst_func(variable_assignments))(expression)
-
-
-def domain_for_shape(dim_names, shape, reductions=None):
+def domain_for_shape(dim_names, shape):
     """Return a :class:`isl.BasicSet` that expresses an appropriate index domain
     for an array of (potentially symbolic) shape *shape*.
 
@@ -73,13 +36,9 @@ def domain_for_shape(dim_names, shape, reductions=None):
         dimensions in the returned set.  Must have the same length as
         *dim_names*.
 
-    :arg reductions: A map from reduction inames to (lower, upper) bounds.
-
     :returns: a :class:`isl.BasicSet`
 
     """
-    if reductions is None:
-        reductions = {}
 
     # Collect parameters.
     param_names = set()
@@ -87,33 +46,21 @@ def domain_for_shape(dim_names, shape, reductions=None):
     for sdep in map(get_dependencies, shape):
         param_names |= sdep
 
-    for bounds in reductions.values():
-        for sdep in map(get_dependencies, bounds):
-            # FIXME: Assumes that reduction bounds are not data-dependent.
-            param_names |= sdep
-
-    set_names = sorted(tuple(dim_names) + tuple(reductions))
     param_names = sorted(param_names)
 
     # Build domain.
     dom = isl.BasicSet.universe(
             isl.Space.create_from_names(
                 isl.DEFAULT_CONTEXT,
-                set=set_names,
+                set=dim_names,
                 params=param_names))
 
     # Add constraints.
     from loopy.symbolic import aff_from_expr
     affs = isl.affs_from_space(dom.space)
-
     for iname, dim in zip(dim_names, shape):
         dom &= affs[0].le_set(affs[iname])
         dom &= affs[iname].lt_set(aff_from_expr(dom.space, dim))
-
-    for iname, (left, right) in reductions.items():
-        dom &= aff_from_expr(dom.space, left).le_set(affs[iname])
-        dom &= affs[iname].lt_set(aff_from_expr(dom.space, right))
-
     dom, = dom.get_basic_sets()
 
     return dom
diff --git a/test/test_linalg.py b/test/test_linalg.py
index 821c10a..68c4ca6 100644
--- a/test/test_linalg.py
+++ b/test/test_linalg.py
@@ -163,42 +163,6 @@ def test_matmul(ctx_factory):
     assert (eval_one(prog, x=x, y=y) == x @ y).all()
 
 
-def test_matmul_addition(ctx_factory):
-    cl_ctx = ctx_factory()
-    queue = cl.CommandQueue(cl_ctx)
-
-    with az.Context(queue) as ctx:
-        x = ctx.argument("x", shape=("n, n"), dtype=np.float64)
-        y = ctx.argument("y", shape=("n, n"), dtype=np.float64)
-        z = ctx.argument("z", shape=("n, n"), dtype=np.float64)
-        ctx.output(ctx.matmul(x, y) + ctx.matmul(x, z))
-
-    x = np.array([[1, 2], [3, 4]], dtype=np.float64)
-    y = np.array([[5, 6], [7, 8]], dtype=np.float64)
-    z = np.array([[9, 10], [11, 12]], dtype=np.float64)
-
-    prog = ctx.build()
-    assert (eval_one(prog, x=x, y=y, z=z) == x @ y + x @ z).all()
-
-
-def test_matmul_stack(ctx_factory):
-    cl_ctx = ctx_factory()
-    queue = cl.CommandQueue(cl_ctx)
-
-    with az.Context(queue) as ctx:
-        x = ctx.argument("x", shape=("m, n"), dtype=np.float64)
-        y = ctx.argument("y", shape=("n, k"), dtype=np.float64)
-        z = ctx.matmul(x, y)
-        w = ctx.matmul(x, 2 * y)
-        ctx.output(ctx.stack((z, w)))
-
-    x = np.array([[1, 2, 3], [4, 5, 6]], dtype=np.float64)
-    y = np.array([[2, 5], [5, 1], [1, 2]], dtype=np.float64)
-
-    prog = ctx.build()
-    assert (eval_one(prog, x=x, y=y) == np.array([x @ y, x @ (2 * y)])).all()
-
-
 @pytest.mark.parametrize("shift", (-1, 1, -20, 20))
 def test_roll(ctx_factory, shift):
     cl_ctx = ctx_factory()
-- 
GitLab


From 91f4874468e08bde012edc8aef971dc2dd80507b Mon Sep 17 00:00:00 2001
From: Matt Wala <wala1@illinois.edu>
Date: Fri, 1 May 2020 03:46:55 -0500
Subject: [PATCH 35/46] Revert "Revert "Support reductions in expressions.
 Rewrite matmul to be an expression.""

This reverts commit 67d0637faa5e9308d45ef85083ca7ff56a39bf61.
---
 arrayzy/array.py    | 197 +++++++++++++++++++++++++++-----------------
 arrayzy/utils.py    |  57 ++++++++++++-
 test/test_linalg.py |  36 ++++++++
 3 files changed, 213 insertions(+), 77 deletions(-)

diff --git a/arrayzy/array.py b/arrayzy/array.py
index c7886d3..042b5a5 100644
--- a/arrayzy/array.py
+++ b/arrayzy/array.py
@@ -27,11 +27,10 @@ import loopy as lp
 import islpy as isl
 import numpy as np
 import loopy.symbolic as sym
-import pymbolic
 import pymbolic.primitives as prim
 
 from arrayzy.program import BoundProgram, PyOpenCLTarget
-from arrayzy.utils import domain_for_shape
+from arrayzy.utils import domain_for_shape, substitute
 
 
 __doc__ = """
@@ -96,6 +95,11 @@ class Array:
 
         Same as :data:`dims` but a tuple of strings.
 
+    .. attribute:: reduction_dim_names
+
+        A tuple of strings representing the names of reduction inames present
+        in the array.
+
     .. automethod:: get_domain
     .. automethod:: to_loopy_expression
 
@@ -121,6 +125,8 @@ class Array:
         self._dtype = dtype
         self.shape = shape
         self.name = name
+        # Maps redution inames to (left, right) bounds.
+        self.reductions = {}
 
     @property
     def ndim(self):
@@ -132,7 +138,8 @@ class Array:
                 self.ctx,
                 self.shape,
                 self.dtype,
-                new_expr)
+                new_expr,
+                self.reductions.copy())
 
     def _binary_op(self, op, other, reverse=False):
         if np.isscalar(other):
@@ -145,7 +152,8 @@ class Array:
                     self.ctx,
                     self.shape,
                     self.ctx.unify_types(self.dtype, type(other)),
-                    new_expr)
+                    new_expr,
+                    self.reductions.copy())
 
         elif isinstance(other, Array):
             args = (
@@ -154,12 +162,16 @@ class Array:
             if reverse:
                 args = tuple(reversed(args))
 
+            new_reductions = self.reductions.copy()
+            new_reductions.update(other.reductions)
+
             new_expr = op(*args)
             return ArrayExpression(
                     self.ctx,
                     self.shape,
                     self.ctx.unify_types(self.dtype, other.dtype),
-                    new_expr)
+                    new_expr,
+                    new_reductions)
 
         else:
             raise ValueError
@@ -211,7 +223,8 @@ class Array:
                 self.ctx,
                 shape=tuple(shape),
                 dtype=self.dtype,
-                expression=self.to_loopy_expression(dims))
+                expression=self.to_loopy_expression(dims),
+                reductions=self.reductions.copy())
 
     @property
     def dtype(self):
@@ -227,17 +240,29 @@ class Array:
     def dim_names(self):
         return tuple(f"_{i}" for i in range(len(self.shape)))
 
-    def get_domain(self, dim_names=None):
+    def get_domain(self, dim_names=None, reduction_dim_names=None):
         """Return the domain that defines the space of indices for this array.
 
         :arg dim_names: if not *None*, a tuple of strings, the names of the
             dimensions of the array.  Defaults to :data:`dim_names`.
+        :arg reduction_dim_names: if not *None*, a tuple of strings, the names
+            of the reduction dimensions of the array.  Defaults to
+            :data:`reduction_dim_names`.
 
         :returns: a :class:`islpy.BasicSet`
+
         """
         if dim_names is None:
             dim_names = self.dim_names
-        return domain_for_shape(dim_names, self.shape)
+        if reduction_dim_names is not None:
+            # Rename reductions.
+            reductions = dict(
+                    (new_rname, self.reductions[rname])
+                    for new_rname, rname
+                    in zip(reduction_dim_names, self.reduction_dim_names))
+        else:
+            reductions = self.reductions
+        return domain_for_shape(dim_names, self.shape, reductions)
 
     @property
     def T(self):  # noqa
@@ -247,15 +272,19 @@ class Array:
         """
         new_shape = tuple(reversed(self.shape))
         new_expr = self.to_loopy_expression(list(reversed(self.dims)))
-        return ArrayExpression(self.ctx, new_shape, self.dtype, new_expr)
+        return ArrayExpression(self.ctx, new_shape, self.dtype, new_expr,
+                               self.reductions.copy())
 
-    def to_loopy_expression(self, dims):
+    def to_loopy_expression(self, dims, reduction_names=None):
         """Create a :mod:`loopy` expression for the value at index *dims*.
 
         :arg dims: A tuple of :mod:`pymbolic` expressions representing the
            desired index/indices. For instance if *dims = (Variable('a'), 1)*,
            this will return the expression for the element at index *a, 1*.
 
+        :arg reduction_names: If not *None*, a tuple that renames reduction
+           inames in *self* in the order of :attr:`reduction_dim_names`.
+
         :returns: a :mod:`loopy` expression
         """
         raise NotImplementedError
@@ -264,6 +293,10 @@ class Array:
         """Store this array into a variable, if not already stored."""
         raise NotImplementedError
 
+    @property
+    def reduction_dim_names(self):
+        return tuple(self.reductions)
+
 
 class ArrayExpression(Array):
     """An array-valued expression.
@@ -271,15 +304,24 @@ class ArrayExpression(Array):
     .. attribute:: expression
 
         A :mod:`pymbolic` expression for the array.
+
+    .. attribute:: reductions
+
+        A mapping from reduction inames in :attr:`expression` to bounds.
     """
 
-    def __init__(self, ctx, shape, dtype, expression, name=None):
+    def __init__(self, ctx, shape, dtype, expression, reductions, name=None):
         super().__init__(ctx, shape, dtype, name)
         self.expression = expression
+        self.reductions = reductions
 
-    def to_loopy_expression(self, dims):
+    def to_loopy_expression(self, dims, reduction_names=None):
         assignments = dict(zip(self.dim_names, tuple(dims)))
-        return pymbolic.substitute(self.expression, assignments)
+        if reduction_names is not None:
+            assignments.update(dict(
+                    zip(self.reduction_dim_names,
+                        map(prim.Variable, reduction_names))))
+        return substitute(self.expression, assignments)
 
     def store(self, name=None):
         if name is None:
@@ -306,7 +348,7 @@ class ArrayVariable(Array):
     def __init__(self, ctx, shape, dtype, name):
         super().__init__(ctx, shape, dtype, name)
 
-    def to_loopy_expression(self, dims):
+    def to_loopy_expression(self, dims, reduction_names=None):
         return prim.Variable(self.name)[tuple(dims)]
 
     def store(self, name=None):
@@ -470,18 +512,24 @@ class Context:
                 within_inames=frozenset(within_inames),
                 depends_on=self._get_dependencies(expr))
 
+        new_insns = self.program.root_kernel.instructions + [out_insn]
         self.program = self.program.with_kernel(
-                self.program.root_kernel.copy(
-                    instructions=self.program.root_kernel.instructions + [out_insn]))
+                self.program.root_kernel.copy(instructions=new_insns))
+
         self._last_insn_id = insn_id
 
     def copy_expr(self, expr, name):
         # Copy *expr* to a variable named *name*.
         out_inames = [
                 self.var_name_gen(f"{name}_dim{d}") for d in range(expr.ndim)]
-        out_expr = expr.to_loopy_expression(map(prim.Variable, out_inames))
+        out_red_inames = [
+                self.var_name_gen(f"{name}{rname}") for rname
+                in expr.reduction_dim_names]
 
-        domain = expr.get_domain(out_inames)
+        out_expr = expr.to_loopy_expression(
+                map(prim.Variable, out_inames),
+                out_red_inames)
+        domain = expr.get_domain(out_inames, out_red_inames)
 
         self.program = self.program.with_kernel(
                 self.program.root_kernel.copy(
@@ -546,7 +594,7 @@ class Context:
         # negative we shift it to be positive.
         idx = idx + prim.If(prim.Comparison(idx, "<", 0), a.shape[0], 0)
         expr = a.to_loopy_expression((idx,))
-        return ArrayExpression(self, a.shape, a.dtype, expr)
+        return ArrayExpression(self, a.shape, a.dtype, expr, a.reductions)
 
     def stack(self, arrays, axis=0, name=None):
         """Join a sequence of arrays along a new axis.
@@ -564,18 +612,10 @@ class Context:
             for array_dim, array0_dim in zip(array.shape, arrays[0].shape):
                 self._assume_equal(array_dim, array0_dim)
 
-        if name is None:
-            name = "_stack"
-
-        name = self.var_name_gen(name)
-
-        out_inames = []
-        for i in range(arrays[0].ndim):
-            if i >= axis:
-                i += 1
-            out_inames.append(f"{name}_dim{i}")
+        base_name = name
 
-        domain = domain_for_shape(out_inames, arrays[0].shape)
+        if base_name is None:
+            base_name = "_stack"
 
         out_shape = list(arrays[0].shape)
         out_shape.insert(axis, len(arrays))
@@ -583,40 +623,62 @@ class Context:
 
         out_dtype = self.unify_types(*(a.dtype for a in arrays))
 
+        out_name = self.var_name_gen(base_name)
+
         out = lp.TemporaryVariable(
-                name, dtype=out_dtype,
+                out_name,
+                dtype=out_dtype,
                 shape=out_shape,
                 address_space=lp.AddressSpace.GLOBAL)
 
         # Create an output instruction for each input array.
         from loopy.kernel.instruction import make_assignment
-        out_insns = []
-        for i in range(len(arrays)):
+        new_insns = []
+        new_domains = []
+
+        for i, arr in enumerate(arrays):
+            name = self.var_name_gen(f"{base_name}_array{i}")
+            out_inames = []
+            for j in range(arr.ndim):
+                if j >= axis:
+                    j += 1
+                out_inames.append(self.var_name_gen(f"{name}_dim{j}"))
+
+            out_red_inames = [
+                    self.var_name_gen(f"{name}{rname}")
+                    for rname in arr.reduction_dim_names]
+            reductions = dict(
+                    (new_rname, arr.reductions[rname])
+                    for new_rname, rname
+                    in zip(out_red_inames, arr.reduction_dim_names))
+            domain = domain_for_shape(out_inames, arr.shape, reductions)
+            new_domains.append(domain)
+
             indices = list(map(prim.Variable, out_inames))
-            expr = arrays[i].to_loopy_expression(indices)
+            expr = arr.to_loopy_expression(indices, out_red_inames)
             indices.insert(axis, i)
             indices = tuple(indices)
-            insn_id = self.insn_id_gen(f"{name}_copy{i}")
+            insn_id = self.insn_id_gen(f"{name}_array{i}")
             out_insn = make_assignment(
-                    (prim.Variable(name)[indices],),
+                    (prim.Variable(out_name)[indices],),
                     expr,
                     id=insn_id,
                     within_inames=frozenset(out_inames),
                     depends_on=self._get_dependencies(expr))
             self._last_insn_id = insn_id
-            out_insns.append(out_insn)
+            new_insns.append(out_insn)
 
         root_knl = self.program.root_kernel
         new_tv = root_knl.temporary_variables.copy()
-        new_tv[name] = out
+        new_tv[out_name] = out
 
         self.program = self.program.with_kernel(
                 root_knl.copy(
                     temporary_variables=new_tv,
-                    domains=root_knl.domains + [domain],
-                    instructions=root_knl.instructions + out_insns))
+                    domains=root_knl.domains + new_domains,
+                    instructions=root_knl.instructions + new_insns))
 
-        return ArrayVariable(self, out_shape, out_dtype, name)
+        return ArrayVariable(self, out_shape, out_dtype, out_name)
 
     def matmul(self, a, b, name=None):
         """Multiply matrix *a* by *b*.
@@ -632,49 +694,34 @@ class Context:
             name = "_matmul"
         name = self.var_name_gen(name)
 
-        out_shape = (a.shape[0], b.shape[1])
-        out_inames = (f"{name}_dim0", f"{name}_dim1")
-        out_red_iname = self.var_name_gen(f"{name}_reduce")
-        out_dtype = self.unify_types(a.dtype, b.dtype)
-
-        out = lp.TemporaryVariable(
-                name, dtype=out_dtype,
-                shape=(a.shape[0], b.shape[1]),
-                address_space=lp.AddressSpace.GLOBAL)
-
-        import loopy.library.reduction as red
+        red_iname = self.var_name_gen(f"{name}_reduce")
+        red_domain = (0, a.shape[1])
+        dtype = self.unify_types(a.dtype, b.dtype)
 
-        a_inames = tuple(map(prim.Variable, (out_inames[0], out_red_iname)))
-        b_inames = tuple(map(prim.Variable, (out_red_iname, out_inames[1])))
+        a_inames = tuple(map(prim.Variable, ("_0", red_iname)))
+        b_inames = tuple(map(prim.Variable, (red_iname, "_1")))
 
-        expr = (
+        inner_expr = (
                 a.to_loopy_expression(a_inames)
                 * b.to_loopy_expression(b_inames))
 
-        out_expr = sym.Reduction(
+        import loopy.library.reduction as red
+        expr = sym.Reduction(
                 operation=red.parse_reduction_op("sum"),
-                inames=(out_red_iname,),
-                expr=expr,
+                inames=(red_iname,),
+                expr=inner_expr,
                 allow_simultaneous=False)
 
-        domain = domain_for_shape(
-                out_inames + (out_red_iname,),
-                out_shape + (a.shape[1],))
-
-        root_knl = self.program.root_kernel
-        new_tv = root_knl.temporary_variables.copy()
-        new_tv[name] = out
-
-        self.program = self.program.with_kernel(
-                root_knl.copy(
-                    temporary_variables=new_tv,
-                    domains=root_knl.domains + [domain]))
-
-        self._make_array_assignment(
-                name, dim_names=out_inames, expr=out_expr,
-                within_inames=frozenset(out_inames))
+        reductions = a.reductions.copy()
+        reductions.update(b.reductions)
+        reductions[red_iname] = red_domain
 
-        return ArrayVariable(self, out_shape, out_dtype, name)
+        return ArrayExpression(
+                self,
+                shape=(a.shape[0], b.shape[1]),
+                dtype=dtype,
+                expression=expr,
+                reductions=reductions)
 
     def call_kernel(self, knl, **kwargs):
         pass
diff --git a/arrayzy/utils.py b/arrayzy/utils.py
index 54e3436..c2bf016 100644
--- a/arrayzy/utils.py
+++ b/arrayzy/utils.py
@@ -21,10 +21,47 @@ THE SOFTWARE.
 """
 
 
+import loopy as lp
 import islpy as isl
 
+import pymbolic.primitives as prim
+import loopy.symbolic as sym
 
-def domain_for_shape(dim_names, shape):
+
+from pymbolic.mapper.substitutor import SubstitutionMapper
+
+
+class LoopyAwareSubstitutionMapper(SubstitutionMapper):
+
+    def map_reduction(self, expr):
+        new_inames = []
+        for iname in expr.inames:
+            new_iname = self.subst_func(iname)
+            if new_iname is None:
+                new_iname = prim.Variable(iname)
+            else:
+                if not isinstance(new_iname, prim.Variable):
+                    raise ValueError(
+                            f"reduction iname {iname} can only be renamed"
+                            " to another iname")
+            new_inames.append(new_iname.name)
+        new_inames = tuple(new_inames)
+        new_expr = self.rec(expr.expr)
+
+        return sym.Reduction(expr.operation, new_inames, new_expr,
+                expr.allow_simultaneous)
+
+
+def substitute(expression, variable_assignments={}, **kwargs):
+    variable_assignments = variable_assignments.copy()
+    variable_assignments.update(kwargs)
+    from pymbolic.mapper.substitutor import make_subst_func
+    print("ASSIGNMENTS", variable_assignments)
+    return LoopyAwareSubstitutionMapper(
+            make_subst_func(variable_assignments))(expression)
+
+
+def domain_for_shape(dim_names, shape, reductions=None):
     """Return a :class:`isl.BasicSet` that expresses an appropriate index domain
     for an array of (potentially symbolic) shape *shape*.
 
@@ -36,9 +73,13 @@ def domain_for_shape(dim_names, shape):
         dimensions in the returned set.  Must have the same length as
         *dim_names*.
 
+    :arg reductions: A map from reduction inames to (lower, upper) bounds.
+
     :returns: a :class:`isl.BasicSet`
 
     """
+    if reductions is None:
+        reductions = {}
 
     # Collect parameters.
     param_names = set()
@@ -46,21 +87,33 @@ def domain_for_shape(dim_names, shape):
     for sdep in map(get_dependencies, shape):
         param_names |= sdep
 
+    for bounds in reductions.values():
+        for sdep in map(get_dependencies, bounds):
+            # FIXME: Assumes that reduction bounds are not data-dependent.
+            param_names |= sdep
+
+    set_names = sorted(tuple(dim_names) + tuple(reductions))
     param_names = sorted(param_names)
 
     # Build domain.
     dom = isl.BasicSet.universe(
             isl.Space.create_from_names(
                 isl.DEFAULT_CONTEXT,
-                set=dim_names,
+                set=set_names,
                 params=param_names))
 
     # Add constraints.
     from loopy.symbolic import aff_from_expr
     affs = isl.affs_from_space(dom.space)
+
     for iname, dim in zip(dim_names, shape):
         dom &= affs[0].le_set(affs[iname])
         dom &= affs[iname].lt_set(aff_from_expr(dom.space, dim))
+
+    for iname, (left, right) in reductions.items():
+        dom &= aff_from_expr(dom.space, left).le_set(affs[iname])
+        dom &= affs[iname].lt_set(aff_from_expr(dom.space, right))
+
     dom, = dom.get_basic_sets()
 
     return dom
diff --git a/test/test_linalg.py b/test/test_linalg.py
index 68c4ca6..821c10a 100644
--- a/test/test_linalg.py
+++ b/test/test_linalg.py
@@ -163,6 +163,42 @@ def test_matmul(ctx_factory):
     assert (eval_one(prog, x=x, y=y) == x @ y).all()
 
 
+def test_matmul_addition(ctx_factory):
+    cl_ctx = ctx_factory()
+    queue = cl.CommandQueue(cl_ctx)
+
+    with az.Context(queue) as ctx:
+        x = ctx.argument("x", shape=("n, n"), dtype=np.float64)
+        y = ctx.argument("y", shape=("n, n"), dtype=np.float64)
+        z = ctx.argument("z", shape=("n, n"), dtype=np.float64)
+        ctx.output(ctx.matmul(x, y) + ctx.matmul(x, z))
+
+    x = np.array([[1, 2], [3, 4]], dtype=np.float64)
+    y = np.array([[5, 6], [7, 8]], dtype=np.float64)
+    z = np.array([[9, 10], [11, 12]], dtype=np.float64)
+
+    prog = ctx.build()
+    assert (eval_one(prog, x=x, y=y, z=z) == x @ y + x @ z).all()
+
+
+def test_matmul_stack(ctx_factory):
+    cl_ctx = ctx_factory()
+    queue = cl.CommandQueue(cl_ctx)
+
+    with az.Context(queue) as ctx:
+        x = ctx.argument("x", shape=("m, n"), dtype=np.float64)
+        y = ctx.argument("y", shape=("n, k"), dtype=np.float64)
+        z = ctx.matmul(x, y)
+        w = ctx.matmul(x, 2 * y)
+        ctx.output(ctx.stack((z, w)))
+
+    x = np.array([[1, 2, 3], [4, 5, 6]], dtype=np.float64)
+    y = np.array([[2, 5], [5, 1], [1, 2]], dtype=np.float64)
+
+    prog = ctx.build()
+    assert (eval_one(prog, x=x, y=y) == np.array([x @ y, x @ (2 * y)])).all()
+
+
 @pytest.mark.parametrize("shift", (-1, 1, -20, 20))
 def test_roll(ctx_factory, shift):
     cl_ctx = ctx_factory()
-- 
GitLab


From 5072c5f5ca367ac52d70039a6466fa519c7741f1 Mon Sep 17 00:00:00 2001
From: Matt Wala <wala1@illinois.edu>
Date: Fri, 1 May 2020 04:00:50 -0500
Subject: [PATCH 36/46] Ensure reductions are disjoint when creating binary
 operations

---
 arrayzy/array.py | 26 ++++++++++++++++++++++++++
 1 file changed, 26 insertions(+)

diff --git a/arrayzy/array.py b/arrayzy/array.py
index 042b5a5..1395356 100644
--- a/arrayzy/array.py
+++ b/arrayzy/array.py
@@ -156,6 +156,8 @@ class Array:
                     self.reductions.copy())
 
         elif isinstance(other, Array):
+            other = self.ctx.ensure_disjoint_reductions(
+                    other, self.reduction_dim_names)
             args = (
                     self.to_loopy_expression(self.dims),
                     other.to_loopy_expression(self.dims))
@@ -428,6 +430,29 @@ class Context:
 
     # {{{ internal api
 
+    def ensure_disjoint_reductions(self, expr, disjoint_from):
+        # Renames the reductions in *expr* not to conflict with the ones in the
+        # list *disjoint_from*.
+        common_reductions = set(expr.reductions) & set(disjoint_from)
+        if not common_reductions:
+            return expr
+
+        new_red_names = []
+        for iname in expr.reduction_dim_names:
+            if iname not in common_reductions:
+                new_red_names.append(iname)
+            else:
+                new_red_names.append(self.var_name_gen(iname))
+
+        new_expr = expr.to_loopy_expression(expr.dims, new_red_names)
+        new_reductions = dict(
+                (new_rname, expr.reductions[rname])
+                for new_rname, rname
+                in zip(new_red_names, expr.reduction_dim_names))
+
+        return ArrayExpression(self, expr.shape, expr.dtype, new_expr,
+                               new_reductions, expr.name)
+
     def unify_types(self, *args):
         # Unifies the list of types.
         if None in args:
@@ -694,6 +719,7 @@ class Context:
             name = "_matmul"
         name = self.var_name_gen(name)
 
+        b = self.ensure_disjoint_reductions(b, a.reduction_dim_names)
         red_iname = self.var_name_gen(f"{name}_reduce")
         red_domain = (0, a.shape[1])
         dtype = self.unify_types(a.dtype, b.dtype)
-- 
GitLab


From 3bff3657bed936d969b2b04dd2c394f8fc32590c Mon Sep 17 00:00:00 2001
From: Matt Wala <wala1@illinois.edu>
Date: Fri, 1 May 2020 04:05:36 -0500
Subject: [PATCH 37/46] Fix insn id naming

---
 arrayzy/array.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arrayzy/array.py b/arrayzy/array.py
index 1395356..ff928d2 100644
--- a/arrayzy/array.py
+++ b/arrayzy/array.py
@@ -683,7 +683,7 @@ class Context:
             expr = arr.to_loopy_expression(indices, out_red_inames)
             indices.insert(axis, i)
             indices = tuple(indices)
-            insn_id = self.insn_id_gen(f"{name}_array{i}")
+            insn_id = self.insn_id_gen(f"{name}")
             out_insn = make_assignment(
                     (prim.Variable(out_name)[indices],),
                     expr,
-- 
GitLab


From 991355e2a4e814632912fa7918f36faeae69e96e Mon Sep 17 00:00:00 2001
From: Matt Wala <wala1@illinois.edu>
Date: Fri, 1 May 2020 04:23:18 -0500
Subject: [PATCH 38/46] flake8 fixes

---
 arrayzy/utils.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/arrayzy/utils.py b/arrayzy/utils.py
index c2bf016..cb30cd5 100644
--- a/arrayzy/utils.py
+++ b/arrayzy/utils.py
@@ -21,7 +21,6 @@ THE SOFTWARE.
 """
 
 
-import loopy as lp
 import islpy as isl
 
 import pymbolic.primitives as prim
@@ -49,7 +48,7 @@ class LoopyAwareSubstitutionMapper(SubstitutionMapper):
         new_expr = self.rec(expr.expr)
 
         return sym.Reduction(expr.operation, new_inames, new_expr,
-                expr.allow_simultaneous)
+                             expr.allow_simultaneous)
 
 
 def substitute(expression, variable_assignments={}, **kwargs):
-- 
GitLab


From 67883e5b4c22b7399227818330aa769995dd15b8 Mon Sep 17 00:00:00 2001
From: Andreas Kloeckner <inform@tiker.net>
Date: Fri, 1 May 2020 04:24:33 -0500
Subject: [PATCH 39/46] Add flake8 config

---
 setup.cfg | 6 ++++++
 1 file changed, 6 insertions(+)
 create mode 100644 setup.cfg

diff --git a/setup.cfg b/setup.cfg
new file mode 100644
index 0000000..a0d9574
--- /dev/null
+++ b/setup.cfg
@@ -0,0 +1,6 @@
+[flake8]
+ignore = E126,E127,E128,E123,E226,E241,E242,E265,N802,W503,E402,N814,N817,W504
+max-line-length=85
+exclude=
+    loopy/target/c/compyte/ndarray,
+    loopy/target/c/compyte/array.py
-- 
GitLab


From ff03278ad2034586179778b5ac4400135043c29a Mon Sep 17 00:00:00 2001
From: Andreas Kloeckner <inform@tiker.net>
Date: Fri, 1 May 2020 04:25:49 -0500
Subject: [PATCH 40/46] Placate flake8

---
 arrayzy/array.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/arrayzy/array.py b/arrayzy/array.py
index c7886d3..16c2d10 100644
--- a/arrayzy/array.py
+++ b/arrayzy/array.py
@@ -472,7 +472,8 @@ class Context:
 
         self.program = self.program.with_kernel(
                 self.program.root_kernel.copy(
-                    instructions=self.program.root_kernel.instructions + [out_insn]))
+                    instructions=(
+                        self.program.root_kernel.instructions + [out_insn])))
         self._last_insn_id = insn_id
 
     def copy_expr(self, expr, name):
-- 
GitLab


From 0b38369d9d1a74b948c3f498f511a17f8fe20d9b Mon Sep 17 00:00:00 2001
From: Andreas Kloeckner <inform@tiker.net>
Date: Fri, 1 May 2020 04:27:02 -0500
Subject: [PATCH 41/46] ctx.output(): If given a temp var, make an output

---
 arrayzy/array.py    | 37 ++++++++++++++++++++++++++++++-------
 test/test_linalg.py |  1 +
 2 files changed, 31 insertions(+), 7 deletions(-)

diff --git a/arrayzy/array.py b/arrayzy/array.py
index 16c2d10..750a1b7 100644
--- a/arrayzy/array.py
+++ b/arrayzy/array.py
@@ -687,13 +687,36 @@ class Context:
 
         :returns: an :class:`ArrayVariable`
         """
-        if name is None:
-            name = "_out"
-        name = self.var_name_gen(name)
-        out = lp.GlobalArg(name, shape=expr.shape, dtype=expr.dtype, order="C")
-        self.program = self.program.with_kernel(
-                self.program.root_kernel.copy(args=self.program.args + [out]))
-        return self.copy_expr(expr, name)
+        if isinstance(expr, ArrayVariable):
+            # must already exist, make sure it's usable
+            root_knl = self.program.root_kernel
+
+            if expr.name in root_knl.temporary_variables:
+                tv = root_knl.temporary_variables[expr.name]
+                new_tvs = root_knl.temporary_variables.copy()
+                new_args  = root_knl.args + [
+                        lp.GlobalArg(
+                            expr.name, shape=tv.shape, dtype=tv.dtype,
+                            dim_tags=tv.dim_tags,
+                            is_output=True)]
+
+                del new_tvs[expr.name]
+
+                self.program = self.program.with_kernel(
+                        root_knl.copy(
+                            temporary_variables=new_tvs,
+                            args=new_args))
+
+        else:
+            if name is None:
+                name = "_out"
+            name = self.var_name_gen(name)
+            out = lp.GlobalArg(
+                    name, shape=expr.shape, dtype=expr.dtype, order="C",
+                    is_output=True)
+            self.program = self.program.with_kernel(
+                    self.program.root_kernel.copy(args=self.program.args + [out]))
+            return self.copy_expr(expr, name)
 
     # }}}
 
diff --git a/test/test_linalg.py b/test/test_linalg.py
index 68c4ca6..948beb7 100644
--- a/test/test_linalg.py
+++ b/test/test_linalg.py
@@ -45,6 +45,7 @@ def eval_one(prog, **kwargs):
 
 
 def test_symbolic_array(ctx_factory):
+    pytest.xfail("input is also an output--what should we do?")
     cl_ctx = ctx_factory()
     queue = cl.CommandQueue(cl_ctx)
 
-- 
GitLab


From 992d012535d531672f61cc329bd5b908be9aa9a7 Mon Sep 17 00:00:00 2001
From: Andreas Kloeckner <inform@tiker.net>
Date: Fri, 1 May 2020 04:28:08 -0500
Subject: [PATCH 42/46] Implement Context.call_kernel

---
 arrayzy/array.py    | 97 ++++++++++++++++++++++++++++++++++++++++++++-
 test/test_kernel.py | 96 ++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 192 insertions(+), 1 deletion(-)
 create mode 100644 test/test_kernel.py

diff --git a/arrayzy/array.py b/arrayzy/array.py
index 750a1b7..97fd30f 100644
--- a/arrayzy/array.py
+++ b/arrayzy/array.py
@@ -678,7 +678,102 @@ class Context:
         return ArrayVariable(self, out_shape, out_dtype, name)
 
     def call_kernel(self, knl, **kwargs):
-        pass
+        # FIXME: Avoid name clash between existing and new kernel
+
+        root_knl = self.program.root_kernel
+
+        new_tvs = root_knl.temporary_variables.copy()
+        new_domains = root_knl.domains[:]
+
+        from pymbolic.primitives import Variable, Call
+        from loopy.symbolic import SubArrayRef
+
+        retvals = []
+        lhs_exprs = []
+        rhs_args = []
+        for arg in knl.args:
+            if isinstance(arg, lp.ValueArg):
+                passed_arg = kwargs[arg.name]
+                if isinstance(passed_arg, str):
+                    from loopy.symbolic import parse
+                    passed_arg = parse(passed_arg)
+                rhs_args.append(passed_arg)
+            else:
+                if arg.is_output:
+                    arg_name_in_caller = self.var_name_gen(arg.name)
+                    # FIXME: We're not nearly copying all attributes
+                    new_tvs[arg_name_in_caller] = lp.TemporaryVariable(
+                            name=arg_name_in_caller,
+                            # FIXME: Callee and caller parameter names in
+                            # shapes here match purely by accident.
+                            shape=arg.shape,
+                            dtype=arg.dtype)
+                    # FIXME: Callee and caller parameter names in shapes here
+                    # match purely by accident.
+                    arg_shape = arg.shape
+
+                    result_inames = tuple(
+                            self.var_name_gen(f"{arg_name_in_caller}_i{i}")
+                            for i in range(len(arg_shape)))
+                    new_domains.append(
+                            domain_for_shape(result_inames, arg.shape))
+                    retvals.append(
+                            ArrayVariable(
+                                self, arg.shape, arg.dtype, arg_name_in_caller))
+
+                else:
+                    # FIXME: Allow passing slices
+                    passed_arg = kwargs[arg.name]
+                    if not isinstance(passed_arg, ArrayVariable):
+                        raise ValueError(
+                                f"argument '{arg.name}': passed value "
+                                f"'{passed_arg}' is not a variable. "
+                                "Call .store() to make it one.")
+
+                    arg_name_in_caller = passed_arg.name
+                    arg_shape = passed_arg.shape
+
+                array_ref_inames = tuple(
+                        self.var_name_gen("ai") for i in arg_shape)
+                new_domains.append(
+                        domain_for_shape(array_ref_inames, arg.shape))
+                array_ref_inames_vars = tuple(
+                        Variable(iname) for iname in array_ref_inames)
+
+                sar = SubArrayRef(
+                        array_ref_inames_vars,
+                        Variable(arg_name_in_caller)[array_ref_inames_vars])
+
+                if arg.is_output:
+                    lhs_exprs.append(sar)
+                else:
+                    rhs_args.append(sar)
+
+        rhs_args = tuple(rhs_args)
+        from loopy.kernel.instruction import make_assignment
+        new_insns = root_knl.instructions + [
+                make_assignment(
+                    tuple(lhs_exprs),
+                    Call(
+                        Variable(knl.name),
+                        rhs_args),
+                    id=self.insn_id_gen(f"_call_{knl.name}"),
+                    depends_on=self._get_dependencies(rhs_args)
+                    )
+                ]
+
+        program = self.program.with_kernel(root_knl.copy(
+            instructions=new_insns,
+            temporary_variables=new_tvs,
+            domains=new_domains,
+            ))
+
+        self.program = lp.register_callable_kernel(program, knl)
+
+        if len(retvals) == 1:
+            return retvals[0]
+        else:
+            return retvals
 
     def output(self, expr, name=None):
         """Copy *expr* to a (new) output variable.
diff --git a/test/test_kernel.py b/test/test_kernel.py
new file mode 100644
index 0000000..5ad8cd4
--- /dev/null
+++ b/test/test_kernel.py
@@ -0,0 +1,96 @@
+#!/usr/bin/env python
+
+__copyright__ = "Copyright (C) 2020 Matt Wala"
+
+__license__ = """
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+"""
+
+import sys
+import arrayzy as az
+import loopy as lp
+import numpy as np
+import numpy.linalg as la
+
+import pyopencl as cl
+from pyopencl.tools import (  # noqa
+        pytest_generate_tests_for_pyopencl as pytest_generate_tests)
+
+
+def test_call_kernel(ctx_factory):
+    cl_ctx = ctx_factory()
+    queue = cl.CommandQueue(cl_ctx)
+
+    with az.Context(queue) as ctx:
+        a = ctx.argument("a", shape="m,k", dtype=np.float64)
+        b = ctx.argument("b", shape="k,n", dtype=np.float64)
+
+        two_a = (2*a).store()
+
+        fortran_src = """
+            subroutine dgemm(m,n,k,a,b,c)
+              implicit none
+              real*8 a(m,k),b(k,n),c(m,n),mysum
+              integer m,n,k,i,j,ell
+
+              do j = 1,n
+                do i = 1,m
+                  mysum = 0
+                  do ell = 1,k
+                     mysum = mysum + b(ell,j)*a(i,ell)
+                  end do
+                  c(i,j) = mysum
+                end do
+              end do
+            end subroutine
+            """
+
+        dgemm_knl = lp.parse_fortran(fortran_src)["dgemm"]
+
+        c = ctx.call_kernel(
+                dgemm_knl, a=two_a, b=b,
+
+                # FIXME: Avoid having to pass these
+                m="m", n="n", k="k")
+
+        # FIXME: Loopy global temporaries would allow this:
+        # ctx.output(5*c)
+
+        ctx.output(c)
+
+        prog = ctx.build().program
+
+    print(prog)
+    a_c = np.random.randn(10, 5)
+    b_c = np.random.randn(5, 20)
+
+    prog = lp.set_options(prog, write_code=True)
+    evt, (c_c,) = prog(queue, a=a_c, b=b_c)
+
+    assert la.norm(2*a_c@b_c - c_c) < 1e-13
+
+
+if __name__ == "__main__":
+    if len(sys.argv) > 1:
+        exec(sys.argv[1])
+    else:
+        from pytest import main
+        main([__file__])
+
+# vim: foldmethod=marker
-- 
GitLab


From 5fd12a818bfb1e5f44f161a321a19f9580db3232 Mon Sep 17 00:00:00 2001
From: Andreas Kloeckner <inform@tiker.net>
Date: Fri, 1 May 2020 04:34:39 -0500
Subject: [PATCH 43/46] Add fparser to requirements

---
 requirements.txt | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/requirements.txt b/requirements.txt
index 7ca0048..ee195dd 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1 +1,3 @@
+git+https://github.com/inducer/f2py
+
 git+https://github.com/inducer/loopy.git@kernel_callables_v3-edit0.2
-- 
GitLab


From 885c20a9090d5ea88bf90b0e3b25d17e6a1eed43 Mon Sep 17 00:00:00 2001
From: Matt Wala <wala1@illinois.edu>
Date: Fri, 1 May 2020 04:37:00 -0500
Subject: [PATCH 44/46] Remove a debugging print statement

---
 arrayzy/utils.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/arrayzy/utils.py b/arrayzy/utils.py
index cb30cd5..1af1e8e 100644
--- a/arrayzy/utils.py
+++ b/arrayzy/utils.py
@@ -55,7 +55,6 @@ def substitute(expression, variable_assignments={}, **kwargs):
     variable_assignments = variable_assignments.copy()
     variable_assignments.update(kwargs)
     from pymbolic.mapper.substitutor import make_subst_func
-    print("ASSIGNMENTS", variable_assignments)
     return LoopyAwareSubstitutionMapper(
             make_subst_func(variable_assignments))(expression)
 
-- 
GitLab


From 8f003a58378d39a5be362eae2d6bf73454abcfaf Mon Sep 17 00:00:00 2001
From: Andreas Kloeckner <inform@tiker.net>
Date: Fri, 1 May 2020 05:06:16 -0500
Subject: [PATCH 45/46] Add demo notebook

---
 .../Computing and DG with Lazy Arrays.ipynb   | 520 +++++++++++++++++-
 1 file changed, 507 insertions(+), 13 deletions(-)

diff --git a/experiments/Computing and DG with Lazy Arrays.ipynb b/experiments/Computing and DG with Lazy Arrays.ipynb
index 733083e..56ce522 100644
--- a/experiments/Computing and DG with Lazy Arrays.ipynb	
+++ b/experiments/Computing and DG with Lazy Arrays.ipynb	
@@ -9,19 +9,21 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 22,
+   "execution_count": 55,
    "metadata": {},
    "outputs": [],
    "source": [
     "import numpy as np\n",
+    "import numpy.linalg as la\n",
     "import arrayzy as az\n",
     "import pyopencl as cl\n",
-    "import loopy as lp"
+    "import loopy as lp\n",
+    "import matplotlib.pyplot as plt"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": 56,
    "metadata": {},
    "outputs": [
     {
@@ -29,8 +31,8 @@
      "output_type": "stream",
      "text": [
       "Choose platform:\n",
-      "[0] <pyopencl.Platform 'Intel(R) OpenCL' at 0x2df3398>\n",
-      "[1] <pyopencl.Platform 'Portable Computing Language' at 0x7fca01de0740>\n"
+      "[0] <pyopencl.Platform 'Intel(R) OpenCL' at 0x3477388>\n",
+      "[1] <pyopencl.Platform 'Portable Computing Language' at 0x7fe1a8ea4740>\n"
      ]
     },
     {
@@ -62,9 +64,18 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 10,
+   "execution_count": 57,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "ASSIGNMENTS {'_0': Variable('_0'), '_1': Variable('_1')}\n",
+      "ASSIGNMENTS {'_0': Variable('_out_dim0'), '_1': Variable('_out_dim1')}\n"
+     ]
+    }
+   ],
    "source": [
     "with az.Context(queue) as ctx:\n",
     "    A = ctx.argument(\"A\", shape=\"n, n\", dtype=np.float64)\n",
@@ -75,7 +86,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 11,
+   "execution_count": 58,
    "metadata": {},
    "outputs": [
     {
@@ -112,7 +123,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 24,
+   "execution_count": 59,
    "metadata": {},
    "outputs": [
     {
@@ -140,7 +151,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 25,
+   "execution_count": 60,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -150,7 +161,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 26,
+   "execution_count": 61,
    "metadata": {},
    "outputs": [
     {
@@ -163,7 +174,7 @@
        "       [0., 0., 0., 0., 0.]])"
       ]
      },
-     "execution_count": 26,
+     "execution_count": 61,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -172,6 +183,489 @@
     "result - (2*A+5)"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Incorporating a Fortran Kernel"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 62,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/home/andreas/src/loopy/loopy/kernel/creation.py:2402: LoopyWarning: 'lang_version' was not passed to make_kernel(). To avoid this warning, pass lang_version=(2018, 2) in this invocation. (Or say 'from loopy.version import LOOPY_USE_LANGUAGE_VERSION_2018_2' in the global scope of the calling frame.)\n",
+      "  return make_kernel(*args, **kwargs)\n"
+     ]
+    }
+   ],
+   "source": [
+    "fortran_src = \"\"\"\n",
+    "    subroutine dgemm(m,n,k,a,b,c)\n",
+    "      implicit none\n",
+    "      real*8 a(m,k),b(k,n),c(m,n),mysum\n",
+    "      integer m,n,k,i,j,ell\n",
+    "\n",
+    "      do j = 1,n\n",
+    "        do i = 1,m\n",
+    "          mysum = 0\n",
+    "          do ell = 1,k\n",
+    "             mysum = mysum + b(ell,j)*a(i,ell)\n",
+    "          end do\n",
+    "          c(i,j) = mysum\n",
+    "        end do\n",
+    "      end do\n",
+    "    end subroutine\n",
+    "    \"\"\"\n",
+    "\n",
+    "dgemm_knl = lp.parse_fortran(fortran_src)[\"dgemm\"]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 63,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "ASSIGNMENTS {'_0': Variable('_temp_dim0'), '_1': Variable('_temp_dim1')}\n",
+      "---------------------------------------------------------------------------\n",
+      "KERNEL: loopy_kernel\n",
+      "---------------------------------------------------------------------------\n",
+      "ARGUMENTS:\n",
+      "a: type: np:dtype('float64'), shape: (m, k), dim_tags: (N1:stride:k, N0:stride:1) aspace: global\n",
+      "b: type: np:dtype('float64'), shape: (k, n), dim_tags: (N1:stride:n, N0:stride:1) aspace: global\n",
+      "c: type: np:dtype('float64'), shape: (m, n), dim_tags: (N1:stride:n, N0:stride:1) aspace: global\n",
+      "k: ValueArg, type: np:dtype('int32')\n",
+      "m: ValueArg, type: np:dtype('int32')\n",
+      "n: ValueArg, type: np:dtype('int32')\n",
+      "---------------------------------------------------------------------------\n",
+      "DOMAINS:\n",
+      "{  :  }\n",
+      "[k, m] -> { [_temp_dim0, _temp_dim1] : 0 <= _temp_dim0 < m and 0 <= _temp_dim1 < k }\n",
+      "[k, m] -> { [ai, ai_0] : 0 <= ai < m and 0 <= ai_0 < k }\n",
+      "[k, n] -> { [ai_1, ai_2] : 0 <= ai_1 < k and 0 <= ai_2 < n }\n",
+      "[m, n] -> { [c_i0, c_i1] : 0 <= c_i0 < m and 0 <= c_i1 < n }\n",
+      "[m, n] -> { [ai_3, ai_4] : 0 <= ai_3 < m and 0 <= ai_4 < n }\n",
+      "---------------------------------------------------------------------------\n",
+      "INAME IMPLEMENTATION TAGS:\n",
+      "_temp_dim0: None\n",
+      "_temp_dim1: None\n",
+      "ai: None\n",
+      "ai_0: None\n",
+      "ai_1: None\n",
+      "ai_2: None\n",
+      "ai_3: None\n",
+      "ai_4: None\n",
+      "c_i0: None\n",
+      "c_i1: None\n",
+      "---------------------------------------------------------------------------\n",
+      "TEMPORARIES:\n",
+      "_temp: type: np:dtype('float64'), shape: (m, k), dim_tags: (N1:stride:k, N0:stride:1) scope:global\n",
+      "---------------------------------------------------------------------------\n",
+      "INSTRUCTIONS:\n",
+      "  for _temp_dim1, _temp_dim0\n",
+      "↱     \u001b[36m_temp[_temp_dim0, _temp_dim1]\u001b[0m = \u001b[35m2*a[_temp_dim0, _temp_dim1]\u001b[0m  {id=\u001b[32m_store_temp\u001b[0m}\n",
+      "│ end _temp_dim1, _temp_dim0\n",
+      "└ \u001b[36m[ai_3,ai_4]: c[ai_3, ai_4]\u001b[0m = \u001b[35mdgemm(m, n, k, [ai,ai_0]: _temp[ai, ai_0], [ai_1,ai_2]: b[ai_1, ai_2])\u001b[0m  {id=\u001b[32m_call_dgemm\u001b[0m}\n",
+      "---------------------------------------------------------------------------\n",
+      "---------------------------------------------------------------------------\n",
+      "KERNEL: dgemm\n",
+      "---------------------------------------------------------------------------\n",
+      "ARGUMENTS:\n",
+      "a: type: np:dtype('float64'), shape: (m, k), dim_tags: (N0:stride:1, N1:stride:m) aspace: global\n",
+      "b: type: np:dtype('float64'), shape: (k, n), dim_tags: (N0:stride:1, N1:stride:k) aspace: global\n",
+      "c: type: np:dtype('float64'), shape: (m, n), dim_tags: (N0:stride:1, N1:stride:m) aspace: global\n",
+      "k: ValueArg, type: np:dtype('int32')\n",
+      "m: ValueArg, type: np:dtype('int32')\n",
+      "n: ValueArg, type: np:dtype('int32')\n",
+      "---------------------------------------------------------------------------\n",
+      "DOMAINS:\n",
+      "[n, m] -> { [j, i] : 0 <= j < n and 0 <= i < m }\n",
+      "[k] -> { [ell] : 0 <= ell < k }\n",
+      "---------------------------------------------------------------------------\n",
+      "INAME IMPLEMENTATION TAGS:\n",
+      "ell: None\n",
+      "i: None\n",
+      "j: None\n",
+      "---------------------------------------------------------------------------\n",
+      "TEMPORARIES:\n",
+      "mysum: type: np:dtype('float64'), shape: () scope:auto\n",
+      "---------------------------------------------------------------------------\n",
+      "INSTRUCTIONS:\n",
+      "   for j, i\n",
+      "↱      \u001b[36mmysum\u001b[0m = \u001b[35m0\u001b[0m  {id=\u001b[32minsn0\u001b[0m}\n",
+      "│      for ell\n",
+      "└↱       \u001b[36mmysum\u001b[0m = \u001b[35mmysum + b[ell, j]*a[i, ell]\u001b[0m  {id=\u001b[32minsn1\u001b[0m}\n",
+      " │     end ell\n",
+      " └     \u001b[36mc[i, j]\u001b[0m = \u001b[35mmysum\u001b[0m  {id=\u001b[32minsn2\u001b[0m}\n",
+      "   end j, i\n",
+      "---------------------------------------------------------------------------\n"
+     ]
+    }
+   ],
+   "source": [
+    "with az.Context(queue) as ctx:\n",
+    "    a = ctx.argument(\"a\", shape=\"m,k\", dtype=np.float64)\n",
+    "    b = ctx.argument(\"b\", shape=\"k,n\", dtype=np.float64)\n",
+    "\n",
+    "    two_a = (2*a).store()\n",
+    "\n",
+    "    c = ctx.call_kernel(\n",
+    "            dgemm_knl, a=two_a, b=b,\n",
+    "\n",
+    "            # FIXME: Avoid having to pass these\n",
+    "            m=\"m\", n=\"n\", k=\"k\")\n",
+    "\n",
+    "    ctx.output(c)\n",
+    "\n",
+    "    prog = ctx.build().program\n",
+    "\n",
+    "print(prog)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 64,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\u001b[36m#\u001b[39;49;00m\u001b[36mdefine lid(N) ((int) get_local_id(N))\u001b[39;49;00m\u001b[36m\u001b[39;49;00m\n",
+      "\u001b[36m#\u001b[39;49;00m\u001b[36mdefine gid(N) ((int) get_group_id(N))\u001b[39;49;00m\u001b[36m\u001b[39;49;00m\n",
+      "\u001b[36m#\u001b[39;49;00m\u001b[36mif __OPENCL_C_VERSION__ < 120\u001b[39;49;00m\u001b[36m\u001b[39;49;00m\n",
+      "\u001b[36m#\u001b[39;49;00m\u001b[36mpragma OPENCL EXTENSION cl_khr_fp64: enable\u001b[39;49;00m\u001b[36m\u001b[39;49;00m\n",
+      "\u001b[36m#\u001b[39;49;00m\u001b[36mendif\u001b[39;49;00m\u001b[36m\u001b[39;49;00m\n",
+      "\n",
+      "\u001b[34mstatic\u001b[39;49;00m \u001b[36mvoid\u001b[39;49;00m \u001b[32mdgemm\u001b[39;49;00m(\u001b[36mint\u001b[39;49;00m \u001b[34mconst\u001b[39;49;00m m, \u001b[36mint\u001b[39;49;00m \u001b[34mconst\u001b[39;49;00m n, \u001b[36mint\u001b[39;49;00m \u001b[34mconst\u001b[39;49;00m k, __global \u001b[36mdouble\u001b[39;49;00m \u001b[34mconst\u001b[39;49;00m *__restrict__ a, __global \u001b[36mdouble\u001b[39;49;00m \u001b[34mconst\u001b[39;49;00m *__restrict__ b, __global \u001b[36mdouble\u001b[39;49;00m *__restrict__ c);\n",
+      "\u001b[34mstatic\u001b[39;49;00m \u001b[36mvoid\u001b[39;49;00m \u001b[32mdgemm\u001b[39;49;00m(\u001b[36mint\u001b[39;49;00m \u001b[34mconst\u001b[39;49;00m m, \u001b[36mint\u001b[39;49;00m \u001b[34mconst\u001b[39;49;00m n, \u001b[36mint\u001b[39;49;00m \u001b[34mconst\u001b[39;49;00m k, __global \u001b[36mdouble\u001b[39;49;00m \u001b[34mconst\u001b[39;49;00m *__restrict__ a, __global \u001b[36mdouble\u001b[39;49;00m \u001b[34mconst\u001b[39;49;00m *__restrict__ b, __global \u001b[36mdouble\u001b[39;49;00m *__restrict__ c)\n",
+      "{\n",
+      "  \u001b[36mdouble\u001b[39;49;00m mysum;\n",
+      "\n",
+      "  \u001b[34mfor\u001b[39;49;00m (\u001b[36mint\u001b[39;49;00m j = \u001b[34m0\u001b[39;49;00m; j <= -\u001b[34m1\u001b[39;49;00m + n; ++j)\n",
+      "    \u001b[34mif\u001b[39;49;00m (-\u001b[34m1\u001b[39;49;00m + m >= \u001b[34m0\u001b[39;49;00m)\n",
+      "      \u001b[34mfor\u001b[39;49;00m (\u001b[36mint\u001b[39;49;00m i = \u001b[34m0\u001b[39;49;00m; i <= -\u001b[34m1\u001b[39;49;00m + m; ++i)\n",
+      "      {\n",
+      "        mysum = \u001b[34m0.0\u001b[39;49;00m;\n",
+      "        \u001b[34mfor\u001b[39;49;00m (\u001b[36mint\u001b[39;49;00m ell = \u001b[34m0\u001b[39;49;00m; ell <= -\u001b[34m1\u001b[39;49;00m + k; ++ell)\n",
+      "          mysum = mysum + b[n * ell + j] * a[k * i + ell];\n",
+      "        c[n * i + j] = mysum;\n",
+      "      }\n",
+      "}\n",
+      "__kernel \u001b[36mvoid\u001b[39;49;00m \u001b[32m__attribute__\u001b[39;49;00m ((reqd_work_group_size(\u001b[34m1\u001b[39;49;00m, \u001b[34m1\u001b[39;49;00m, \u001b[34m1\u001b[39;49;00m))) loopy_kernel(\u001b[36mint\u001b[39;49;00m \u001b[34mconst\u001b[39;49;00m m, \u001b[36mint\u001b[39;49;00m \u001b[34mconst\u001b[39;49;00m k, __global \u001b[36mdouble\u001b[39;49;00m \u001b[34mconst\u001b[39;49;00m *__restrict__ a, \u001b[36mint\u001b[39;49;00m \u001b[34mconst\u001b[39;49;00m n, __global \u001b[36mdouble\u001b[39;49;00m \u001b[34mconst\u001b[39;49;00m *__restrict__ b, __global \u001b[36mdouble\u001b[39;49;00m *__restrict__ c, __global \u001b[36mdouble\u001b[39;49;00m *__restrict__ _temp)\n",
+      "{\n",
+      "  \u001b[34mfor\u001b[39;49;00m (\u001b[36mint\u001b[39;49;00m _temp_dim1 = \u001b[34m0\u001b[39;49;00m; _temp_dim1 <= -\u001b[34m1\u001b[39;49;00m + k; ++_temp_dim1)\n",
+      "    \u001b[34mif\u001b[39;49;00m (-\u001b[34m1\u001b[39;49;00m + m >= \u001b[34m0\u001b[39;49;00m)\n",
+      "      \u001b[34mfor\u001b[39;49;00m (\u001b[36mint\u001b[39;49;00m _temp_dim0 = \u001b[34m0\u001b[39;49;00m; _temp_dim0 <= -\u001b[34m1\u001b[39;49;00m + m; ++_temp_dim0)\n",
+      "        _temp[k * _temp_dim0 + _temp_dim1] = \u001b[34m2.0\u001b[39;49;00m * a[k * _temp_dim0 + _temp_dim1];\n",
+      "  dgemm(m, n, k, &(_temp[\u001b[34m0\u001b[39;49;00m]), &(b[\u001b[34m0\u001b[39;49;00m]), &(c[\u001b[34m0\u001b[39;49;00m]));\n",
+      "}\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "a_c = np.random.randn(10, 5)\n",
+    "b_c = np.random.randn(5, 20)\n",
+    "\n",
+    "prog = lp.set_options(prog, write_code=True)\n",
+    "evt, (c_c,) = prog(queue, a=a_c, b=b_c)\n",
+    "\n",
+    "assert la.norm(2*a_c@b_c - c_c) < 1e-13"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Running DG"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 65,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "c = 1\n",
+    "\n",
+    "from dg_tools import DGDiscr1D, DGOps1D\n",
+    "discr = DGDiscr1D(0, 2*np.pi, nelements=20, nnodes=4)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 66,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def weak_flux( vec):\n",
+    "    return (vec + dg.face_swap(vec)) / 2 * c * dg.normals\n",
+    "\n",
+    "def strong_flux(vec):\n",
+    "    return c * dg.normals * vec - weak_flux(vec)\n",
+    "\n",
+    "def advec_op(vec):\n",
+    "    return -dg.inv_mass(\n",
+    "            dg.face_mass(strong_flux(dg.interp(vec)))\n",
+    "            - c * dg.stiffness(vec))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 68,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "ASSIGNMENTS {'_0': Variable('_interp_reduce'), '_1': Variable('_1')}\n",
+      "ASSIGNMENTS {'_0': Variable('_1'), '_1': Variable('_0')}\n",
+      "ASSIGNMENTS {'_0': Variable('_0'), '_1': Variable('_1')}\n",
+      "ASSIGNMENTS {'_0': Variable('_0'), '_1': Variable('_1')}\n",
+      "ASSIGNMENTS {'_0': Variable('_0'), '_1': 1}\n",
+      "ASSIGNMENTS {'_0': Sum((Remainder(Sum((Variable('_0'), -1)), Variable('nelements')), If(Comparison(Remainder(Sum((..., -1)), Variable('nelements')), '<', 0), Variable('nelements'), 0)))}\n",
+      "ASSIGNMENTS {'_0': Variable('_0'), '_1': 0}\n",
+      "ASSIGNMENTS {'_0': Sum((Remainder(Sum((Variable('_0'), 1)), Variable('nelements')), If(Comparison(Remainder(Sum((..., 1)), Variable('nelements')), '<', 0), Variable('nelements'), 0)))}\n",
+      "ASSIGNMENTS {'_0': Variable('_face_swap_array0_dim0'), '_interp_reduce': Variable('_face_swap_array0_interp_reduce')}\n",
+      "ASSIGNMENTS {'_0': Variable('_face_swap_array1_dim0'), '_interp_reduce': Variable('_face_swap_array1_interp_reduce')}\n",
+      "ASSIGNMENTS {'_0': Variable('_0'), '_1': Variable('_1')}\n",
+      "ASSIGNMENTS {'_0': Variable('_0'), '_1': Variable('_1')}\n",
+      "ASSIGNMENTS {'_0': Variable('_0'), '_1': Variable('_1')}\n",
+      "ASSIGNMENTS {'_0': Variable('_0'), '_1': Variable('_1')}\n",
+      "ASSIGNMENTS {'_0': Variable('_0'), '_1': Variable('_1'), '_interp_reduce': Variable('_interp_reduce_0')}\n",
+      "ASSIGNMENTS {'_0': Variable('_0'), '_1': Variable('_1')}\n",
+      "ASSIGNMENTS {'_0': Variable('_0'), '_1': Variable('_1')}\n",
+      "ASSIGNMENTS {'_0': Variable('_1'), '_1': Variable('_0')}\n",
+      "ASSIGNMENTS {'_0': Variable('_face_mass_reduce'), '_1': Variable('_1')}\n",
+      "ASSIGNMENTS {'_0': Variable('_1'), '_1': Variable('_0')}\n",
+      "ASSIGNMENTS {'_0': Variable('_stiffness_reduce'), '_1': Variable('_1')}\n",
+      "ASSIGNMENTS {'_0': Variable('_1'), '_1': Variable('_0')}\n",
+      "ASSIGNMENTS {'_0': Variable('_0'), '_1': Variable('_1')}\n",
+      "ASSIGNMENTS {'_0': Variable('_0'), '_1': Variable('_1')}\n",
+      "ASSIGNMENTS {'_0': Variable('_0'), '_1': Variable('_1')}\n",
+      "ASSIGNMENTS {'_0': Variable('_1'), '_1': Variable('_0')}\n",
+      "ASSIGNMENTS {'_0': Variable('_inv_mass_reduce'), '_1': Variable('_1')}\n",
+      "ASSIGNMENTS {'_0': Variable('_1'), '_1': Variable('_0')}\n",
+      "ASSIGNMENTS {'_0': Variable('_0'), '_1': Variable('_1')}\n",
+      "ASSIGNMENTS {'_0': Variable('_out_dim0'), '_1': Variable('_out_dim1'), '_interp_reduce': Variable('_out_interp_reduce'), '_interp_reduce_0': Variable('_out_interp_reduce_0'), '_face_mass_reduce': Variable('_out_face_mass_reduce'), '_stiffness_reduce': Variable('_out_stiffness_reduce'), '_inv_mass_reduce': Variable('_out_inv_mass_reduce')}\n",
+      "---------------------------------------------------------------------------\n",
+      "KERNEL: loopy_kernel\n",
+      "---------------------------------------------------------------------------\n",
+      "ARGUMENTS:\n",
+      "_out: type: <auto/runtime>, shape: (nelements, nnodes), dim_tags: (N1:stride:nnodes, N0:stride:1) aspace: global\n",
+      "face_mass: type: np:dtype('float64'), shape: (nnodes, 2), dim_tags: (N1:stride:2, N0:stride:1) aspace: global\n",
+      "interp: type: np:dtype('float64'), shape: (2, nnodes), dim_tags: (N1:stride:nnodes, N0:stride:1) aspace: global\n",
+      "inv_mass: type: np:dtype('float64'), shape: (nnodes, nnodes), dim_tags: (N1:stride:nnodes, N0:stride:1) aspace: global\n",
+      "nelements: ValueArg, type: np:dtype('int32')\n",
+      "nnodes: ValueArg, type: np:dtype('int32')\n",
+      "normals: type: np:dtype('float64'), shape: (nelements, 2), dim_tags: (N1:stride:2, N0:stride:1) aspace: global\n",
+      "stiffness: type: np:dtype('float64'), shape: (nnodes, nnodes), dim_tags: (N1:stride:nnodes, N0:stride:1) aspace: global\n",
+      "u: type: np:dtype('float64'), shape: (nelements, nnodes), dim_tags: (N1:stride:nnodes, N0:stride:1) aspace: global\n",
+      "---------------------------------------------------------------------------\n",
+      "DOMAINS:\n",
+      "{  :  }\n",
+      "[nelements, nnodes] -> { [_face_swap_array0_dim0, _face_swap_array0_interp_reduce] : 0 <= _face_swap_array0_dim0 < nelements and 0 <= _face_swap_array0_interp_reduce < nnodes }\n",
+      "[nelements, nnodes] -> { [_face_swap_array1_dim0, _face_swap_array1_interp_reduce] : 0 <= _face_swap_array1_dim0 < nelements and 0 <= _face_swap_array1_interp_reduce < nnodes }\n",
+      "[nelements, nnodes] -> { [_out_dim0, _out_dim1, _out_face_mass_reduce, _out_interp_reduce, _out_interp_reduce_0, _out_inv_mass_reduce, _out_stiffness_reduce] : 0 <= _out_dim0 < nelements and 0 <= _out_dim1 < nnodes and 0 <= _out_face_mass_reduce <= 1 and 0 <= _out_interp_reduce < nnodes and 0 <= _out_interp_reduce_0 < nnodes and 0 <= _out_inv_mass_reduce < nnodes and 0 <= _out_stiffness_reduce < nnodes }\n",
+      "---------------------------------------------------------------------------\n",
+      "INAME IMPLEMENTATION TAGS:\n",
+      "_face_swap_array0_dim0: None\n",
+      "_face_swap_array0_interp_reduce: None\n",
+      "_face_swap_array1_dim0: None\n",
+      "_face_swap_array1_interp_reduce: None\n",
+      "_out_dim0: None\n",
+      "_out_dim1: None\n",
+      "_out_face_mass_reduce: None\n",
+      "_out_interp_reduce: None\n",
+      "_out_interp_reduce_0: None\n",
+      "_out_inv_mass_reduce: None\n",
+      "_out_stiffness_reduce: None\n",
+      "---------------------------------------------------------------------------\n",
+      "TEMPORARIES:\n",
+      "_face_swap: type: <auto/runtime>, shape: (nelements, 2), dim_tags: (N1:stride:2, N0:stride:1) scope:global\n",
+      "---------------------------------------------------------------------------\n",
+      "INSTRUCTIONS:\n",
+      "   for _face_swap_array0_dim0\n",
+      "↱    \u001b[36m_face_swap[_face_swap_array0_dim0, 0]\u001b[0m = \u001b[35mreduce(sum, [_face_swap_array0_interp_reduce], interp[1, _face_swap_array0_interp_reduce]*u[(_face_swap_array0_dim0 + -1) % nelements + (nelements if (_face_swap_array0_dim0 + -1) % nelements < 0 else 0), _face_swap_array0_interp_reduce])\u001b[0m  {id=\u001b[32m_face_swap_array0\u001b[0m}\n",
+      "│  end _face_swap_array0_dim0\n",
+      "│  for _face_swap_array1_dim0\n",
+      "└↱   \u001b[36m_face_swap[_face_swap_array1_dim0, 1]\u001b[0m = \u001b[35mreduce(sum, [_face_swap_array1_interp_reduce], interp[0, _face_swap_array1_interp_reduce]*u[(_face_swap_array1_dim0 + 1) % nelements + (nelements if (_face_swap_array1_dim0 + 1) % nelements < 0 else 0), _face_swap_array1_interp_reduce])\u001b[0m  {id=\u001b[32m_face_swap_array1\u001b[0m}\n",
+      " │ end _face_swap_array1_dim0\n",
+      " │ for _out_dim1, _out_dim0\n",
+      " └     \u001b[36m_out[_out_dim0, _out_dim1]\u001b[0m = \u001b[35m(-1)*reduce(sum, [_out_inv_mass_reduce], inv_mass[_out_dim1, _out_inv_mass_reduce]*(reduce(sum, [_out_face_mass_reduce], face_mass[_out_inv_mass_reduce, _out_face_mass_reduce]*(normals[_out_dim0, _out_face_mass_reduce]*reduce(sum, [_out_interp_reduce], interp[_out_face_mass_reduce, _out_interp_reduce]*u[_out_dim0, _out_interp_reduce]) + (-1)*((reduce(sum, [_out_interp_reduce_0], interp[_out_face_mass_reduce, _out_interp_reduce_0]*u[_out_dim0, _out_interp_reduce_0]) + _face_swap[_out_dim0, _out_face_mass_reduce]) / 2)*normals[_out_dim0, _out_face_mass_reduce])) + (-1)*reduce(sum, [_out_stiffness_reduce], stiffness[_out_inv_mass_reduce, _out_stiffness_reduce]*u[_out_dim0, _out_stiffness_reduce])))\u001b[0m  {id=\u001b[32m_store_out\u001b[0m}\n",
+      "   end _out_dim1, _out_dim0\n",
+      "---------------------------------------------------------------------------\n"
+     ]
+    }
+   ],
+   "source": [
+    "with az.Context(queue) as ctx:\n",
+    "    dg = DGOps1D(discr, ctx)\n",
+    "    u = ctx.argument(\"u\", shape=\"(nelements, nnodes)\", dtype=np.float64)\n",
+    "    ctx.output(advec_op(u))\n",
+    "    \n",
+    "prog = ctx.build()\n",
+    "print(prog.program)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 69,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\u001b[36m#\u001b[39;49;00m\u001b[36mdefine lid(N) ((int) get_local_id(N))\u001b[39;49;00m\u001b[36m\u001b[39;49;00m\n",
+      "\u001b[36m#\u001b[39;49;00m\u001b[36mdefine gid(N) ((int) get_group_id(N))\u001b[39;49;00m\u001b[36m\u001b[39;49;00m\n",
+      "\u001b[36m#\u001b[39;49;00m\u001b[36mif __OPENCL_C_VERSION__ < 120\u001b[39;49;00m\u001b[36m\u001b[39;49;00m\n",
+      "\u001b[36m#\u001b[39;49;00m\u001b[36mpragma OPENCL EXTENSION cl_khr_fp64: enable\u001b[39;49;00m\u001b[36m\u001b[39;49;00m\n",
+      "\u001b[36m#\u001b[39;49;00m\u001b[36mendif\u001b[39;49;00m\u001b[36m\u001b[39;49;00m\n",
+      "\u001b[36m#\u001b[39;49;00m\u001b[36mdefine LOOPY_CALL_WITH_INTEGER_TYPES(MACRO_NAME) \\\u001b[39;49;00m\u001b[36m\u001b[39;49;00m\n",
+      "\u001b[36m    MACRO_NAME(int8, char) \\\u001b[39;49;00m\u001b[36m\u001b[39;49;00m\n",
+      "\u001b[36m    MACRO_NAME(int16, short) \\\u001b[39;49;00m\u001b[36m\u001b[39;49;00m\n",
+      "\u001b[36m    MACRO_NAME(int32, int) \\\u001b[39;49;00m\u001b[36m\u001b[39;49;00m\n",
+      "\u001b[36m    MACRO_NAME(int64, long)\u001b[39;49;00m\u001b[36m\u001b[39;49;00m\n",
+      "\u001b[36m#\u001b[39;49;00m\u001b[36mdefine LOOPY_DEFINE_MOD_POS_B(SUFFIX, TYPE) \\\u001b[39;49;00m\u001b[36m\u001b[39;49;00m\n",
+      "\u001b[36m    inline TYPE loopy_mod_pos_b_##SUFFIX(TYPE a, TYPE b) \\\u001b[39;49;00m\u001b[36m\u001b[39;49;00m\n",
+      "\u001b[36m    { \\\u001b[39;49;00m\u001b[36m\u001b[39;49;00m\n",
+      "\u001b[36m        TYPE result = a%b; \\\u001b[39;49;00m\u001b[36m\u001b[39;49;00m\n",
+      "\u001b[36m        if (result < 0) \\\u001b[39;49;00m\u001b[36m\u001b[39;49;00m\n",
+      "\u001b[36m            result += b; \\\u001b[39;49;00m\u001b[36m\u001b[39;49;00m\n",
+      "\u001b[36m        return result; \\\u001b[39;49;00m\u001b[36m\u001b[39;49;00m\n",
+      "\u001b[36m    }\u001b[39;49;00m\u001b[36m\u001b[39;49;00m\n",
+      "LOOPY_CALL_WITH_INTEGER_TYPES(LOOPY_DEFINE_MOD_POS_B)\n",
+      "\u001b[36m#\u001b[39;49;00m\u001b[36mundef LOOPY_DEFINE_MOD_POS_B\u001b[39;49;00m\u001b[36m\u001b[39;49;00m\n",
+      "\u001b[36m#\u001b[39;49;00m\u001b[36mundef LOOPY_CALL_WITH_INTEGER_TYPES\u001b[39;49;00m\u001b[36m\u001b[39;49;00m\n",
+      "\n",
+      "__kernel \u001b[36mvoid\u001b[39;49;00m __attribute__ ((reqd_work_group_size(\u001b[34m1\u001b[39;49;00m, \u001b[34m1\u001b[39;49;00m, \u001b[34m1\u001b[39;49;00m))) loopy_kernel(\u001b[36mint\u001b[39;49;00m \u001b[34mconst\u001b[39;49;00m nelements, \u001b[36mint\u001b[39;49;00m \u001b[34mconst\u001b[39;49;00m nnodes, __global \u001b[36mdouble\u001b[39;49;00m \u001b[34mconst\u001b[39;49;00m *__restrict__ u, __global \u001b[36mdouble\u001b[39;49;00m \u001b[34mconst\u001b[39;49;00m *__restrict__ interp, __global \u001b[36mdouble\u001b[39;49;00m \u001b[34mconst\u001b[39;49;00m *__restrict__ normals, __global \u001b[36mdouble\u001b[39;49;00m \u001b[34mconst\u001b[39;49;00m *__restrict__ face_mass, __global \u001b[36mdouble\u001b[39;49;00m \u001b[34mconst\u001b[39;49;00m *__restrict__ stiffness, __global \u001b[36mdouble\u001b[39;49;00m \u001b[34mconst\u001b[39;49;00m *__restrict__ inv_mass, __global \u001b[36mdouble\u001b[39;49;00m *__restrict__ _out, __global \u001b[36mdouble\u001b[39;49;00m *__restrict__ _face_swap)\n",
+      "{\n",
+      "  \u001b[36mdouble\u001b[39;49;00m acc__face_swap_array0_interp_reduce;\n",
+      "  \u001b[36mdouble\u001b[39;49;00m acc__face_swap_array1_interp_reduce;\n",
+      "  \u001b[36mdouble\u001b[39;49;00m acc__out_face_mass_reduce;\n",
+      "  \u001b[36mdouble\u001b[39;49;00m acc__out_interp_reduce;\n",
+      "  \u001b[36mdouble\u001b[39;49;00m acc__out_interp_reduce_0;\n",
+      "  \u001b[36mdouble\u001b[39;49;00m acc__out_inv_mass_reduce;\n",
+      "  \u001b[36mdouble\u001b[39;49;00m acc__out_stiffness_reduce;\n",
+      "\n",
+      "  \u001b[34mfor\u001b[39;49;00m (\u001b[36mint\u001b[39;49;00m _face_swap_array0_dim0 = \u001b[34m0\u001b[39;49;00m; _face_swap_array0_dim0 <= -\u001b[34m1\u001b[39;49;00m + nelements; ++_face_swap_array0_dim0)\n",
+      "    \u001b[34mif\u001b[39;49;00m (-\u001b[34m1\u001b[39;49;00m + nnodes >= \u001b[34m0\u001b[39;49;00m)\n",
+      "    {\n",
+      "      acc__face_swap_array0_interp_reduce = \u001b[34m0.0\u001b[39;49;00m;\n",
+      "      \u001b[34mfor\u001b[39;49;00m (\u001b[36mint\u001b[39;49;00m _face_swap_array0_interp_reduce = \u001b[34m0\u001b[39;49;00m; _face_swap_array0_interp_reduce <= -\u001b[34m1\u001b[39;49;00m + nnodes; ++_face_swap_array0_interp_reduce)\n",
+      "        acc__face_swap_array0_interp_reduce = acc__face_swap_array0_interp_reduce + interp[_face_swap_array0_interp_reduce + nnodes] * u[nnodes * (loopy_mod_pos_b_int32(_face_swap_array0_dim0 + -\u001b[34m1\u001b[39;49;00m, nelements) + (loopy_mod_pos_b_int32(_face_swap_array0_dim0 + -\u001b[34m1\u001b[39;49;00m, nelements) < \u001b[34m0\u001b[39;49;00m ? nelements : \u001b[34m0\u001b[39;49;00m)) + _face_swap_array0_interp_reduce];\n",
+      "      _face_swap[\u001b[34m2\u001b[39;49;00m * _face_swap_array0_dim0] = acc__face_swap_array0_interp_reduce;\n",
+      "    }\n",
+      "  \u001b[34mfor\u001b[39;49;00m (\u001b[36mint\u001b[39;49;00m _face_swap_array1_dim0 = \u001b[34m0\u001b[39;49;00m; _face_swap_array1_dim0 <= -\u001b[34m1\u001b[39;49;00m + nelements; ++_face_swap_array1_dim0)\n",
+      "    \u001b[34mif\u001b[39;49;00m (-\u001b[34m1\u001b[39;49;00m + nnodes >= \u001b[34m0\u001b[39;49;00m)\n",
+      "    {\n",
+      "      acc__face_swap_array1_interp_reduce = \u001b[34m0.0\u001b[39;49;00m;\n",
+      "      \u001b[34mfor\u001b[39;49;00m (\u001b[36mint\u001b[39;49;00m _face_swap_array1_interp_reduce = \u001b[34m0\u001b[39;49;00m; _face_swap_array1_interp_reduce <= -\u001b[34m1\u001b[39;49;00m + nnodes; ++_face_swap_array1_interp_reduce)\n",
+      "        acc__face_swap_array1_interp_reduce = acc__face_swap_array1_interp_reduce + interp[_face_swap_array1_interp_reduce] * u[nnodes * ((_face_swap_array1_dim0 + \u001b[34m1\u001b[39;49;00m) % nelements + ((_face_swap_array1_dim0 + \u001b[34m1\u001b[39;49;00m) % nelements < \u001b[34m0\u001b[39;49;00m ? nelements : \u001b[34m0\u001b[39;49;00m)) + _face_swap_array1_interp_reduce];\n",
+      "      _face_swap[\u001b[34m1\u001b[39;49;00m + \u001b[34m2\u001b[39;49;00m * _face_swap_array1_dim0] = acc__face_swap_array1_interp_reduce;\n",
+      "    }\n",
+      "  \u001b[34mfor\u001b[39;49;00m (\u001b[36mint\u001b[39;49;00m _out_dim1 = \u001b[34m0\u001b[39;49;00m; _out_dim1 <= -\u001b[34m1\u001b[39;49;00m + nnodes; ++_out_dim1)\n",
+      "    \u001b[34mif\u001b[39;49;00m (-\u001b[34m1\u001b[39;49;00m + nelements >= \u001b[34m0\u001b[39;49;00m)\n",
+      "      \u001b[34mfor\u001b[39;49;00m (\u001b[36mint\u001b[39;49;00m _out_dim0 = \u001b[34m0\u001b[39;49;00m; _out_dim0 <= -\u001b[34m1\u001b[39;49;00m + nelements; ++_out_dim0)\n",
+      "      {\n",
+      "        acc__out_inv_mass_reduce = \u001b[34m0.0\u001b[39;49;00m;\n",
+      "        \u001b[34mfor\u001b[39;49;00m (\u001b[36mint\u001b[39;49;00m _out_inv_mass_reduce = \u001b[34m0\u001b[39;49;00m; _out_inv_mass_reduce <= -\u001b[34m1\u001b[39;49;00m + nnodes; ++_out_inv_mass_reduce)\n",
+      "        {\n",
+      "          acc__out_stiffness_reduce = \u001b[34m0.0\u001b[39;49;00m;\n",
+      "          acc__out_face_mass_reduce = \u001b[34m0.0\u001b[39;49;00m;\n",
+      "          \u001b[34mfor\u001b[39;49;00m (\u001b[36mint\u001b[39;49;00m _out_stiffness_reduce = \u001b[34m0\u001b[39;49;00m; _out_stiffness_reduce <= -\u001b[34m1\u001b[39;49;00m + nnodes; ++_out_stiffness_reduce)\n",
+      "            acc__out_stiffness_reduce = acc__out_stiffness_reduce + stiffness[nnodes * _out_inv_mass_reduce + _out_stiffness_reduce] * u[nnodes * _out_dim0 + _out_stiffness_reduce];\n",
+      "          \u001b[34mfor\u001b[39;49;00m (\u001b[36mint\u001b[39;49;00m _out_face_mass_reduce = \u001b[34m0\u001b[39;49;00m; _out_face_mass_reduce <= \u001b[34m1\u001b[39;49;00m; ++_out_face_mass_reduce)\n",
+      "          {\n",
+      "            acc__out_interp_reduce = \u001b[34m0.0\u001b[39;49;00m;\n",
+      "            acc__out_interp_reduce_0 = \u001b[34m0.0\u001b[39;49;00m;\n",
+      "            \u001b[34mfor\u001b[39;49;00m (\u001b[36mint\u001b[39;49;00m _out_interp_reduce_0 = \u001b[34m0\u001b[39;49;00m; _out_interp_reduce_0 <= -\u001b[34m1\u001b[39;49;00m + nnodes; ++_out_interp_reduce_0)\n",
+      "              acc__out_interp_reduce_0 = acc__out_interp_reduce_0 + interp[nnodes * _out_face_mass_reduce + _out_interp_reduce_0] * u[nnodes * _out_dim0 + _out_interp_reduce_0];\n",
+      "            \u001b[34mfor\u001b[39;49;00m (\u001b[36mint\u001b[39;49;00m _out_interp_reduce = \u001b[34m0\u001b[39;49;00m; _out_interp_reduce <= -\u001b[34m1\u001b[39;49;00m + nnodes; ++_out_interp_reduce)\n",
+      "              acc__out_interp_reduce = acc__out_interp_reduce + interp[nnodes * _out_face_mass_reduce + _out_interp_reduce] * u[nnodes * _out_dim0 + _out_interp_reduce];\n",
+      "            acc__out_face_mass_reduce = acc__out_face_mass_reduce + face_mass[_out_face_mass_reduce + \u001b[34m2\u001b[39;49;00m * _out_inv_mass_reduce] * (normals[\u001b[34m2\u001b[39;49;00m * _out_dim0 + _out_face_mass_reduce] * acc__out_interp_reduce + -\u001b[34m1.0\u001b[39;49;00m * ((acc__out_interp_reduce_0 + _face_swap[\u001b[34m2\u001b[39;49;00m * _out_dim0 + _out_face_mass_reduce]) / \u001b[34m2.0\u001b[39;49;00m) * normals[\u001b[34m2\u001b[39;49;00m * _out_dim0 + _out_face_mass_reduce]);\n",
+      "          }\n",
+      "          acc__out_inv_mass_reduce = acc__out_inv_mass_reduce + inv_mass[nnodes * _out_dim1 + _out_inv_mass_reduce] * (acc__out_face_mass_reduce + -\u001b[34m1.0\u001b[39;49;00m * acc__out_stiffness_reduce);\n",
+      "        }\n",
+      "        _out[nnodes * _out_dim0 + _out_dim1] = -\u001b[34m1.0\u001b[39;49;00m * acc__out_inv_mass_reduce;\n",
+      "      }\n",
+      "}\n",
+      "\n"
+     ]
+    },
+    {
+     "ename": "RuntimeError",
+     "evalue": "input argument 'interp' must be supplied",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[0;31mRuntimeError\u001b[0m                              Traceback (most recent call last)",
+      "\u001b[0;32m<ipython-input-69-4ac91224156c>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m      3\u001b[0m \u001b[0mlp_prog\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mprog\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mprogram\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      4\u001b[0m \u001b[0mlp_prog\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mlp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mset_options\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mlp_prog\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mwrite_code\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mTrue\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 5\u001b[0;31m \u001b[0mlp_prog\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mqueue\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mu\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mu\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
+      "\u001b[0;32m~/src/loopy/loopy/program.py\u001b[0m in \u001b[0;36m__call__\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m    389\u001b[0m             \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_program_executor_cache\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mpex\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    390\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 391\u001b[0;31m         \u001b[0;32mreturn\u001b[0m \u001b[0mpex\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    392\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    393\u001b[0m     \u001b[0;32mdef\u001b[0m \u001b[0m__str__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0;32m~/src/loopy/loopy/target/pyopencl_execution.py\u001b[0m in \u001b[0;36m__call__\u001b[0;34m(self, queue, **kwargs)\u001b[0m\n\u001b[1;32m    356\u001b[0m         return program_info.invoker(\n\u001b[1;32m    357\u001b[0m                 \u001b[0mprogram_info\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcl_kernels\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mqueue\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mallocator\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mwait_for\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 358\u001b[0;31m                 out_host, **kwargs)\n\u001b[0m\u001b[1;32m    359\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    360\u001b[0m \u001b[0;31m# }}}\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0;32m~/src/pytools/pytools/py_codegen.py\u001b[0m in \u001b[0;36m__call__\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m    197\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    198\u001b[0m     \u001b[0;32mdef\u001b[0m \u001b[0m__call__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 199\u001b[0;31m         \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfunc\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    200\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    201\u001b[0m     \u001b[0;32mdef\u001b[0m \u001b[0m__getstate__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0;32m<generated code>\u001b[0m in \u001b[0;36minvoke_loopy_kernel_loopy_kernel\u001b[0;34m(_lpy_cl_kernels, queue, allocator, wait_for, out_host, nelements, nnodes, u, interp, normals, face_mass, stiffness, inv_mass, _out)\u001b[0m\n",
+      "\u001b[0;31mRuntimeError\u001b[0m: input argument 'interp' must be supplied"
+     ]
+    }
+   ],
+   "source": [
+    "u = np.sin(discr.nodes()).reshape(discr.nelements, discr.nnodes)\n",
+    "\n",
+    "lp_prog = prog.program\n",
+    "lp_prog = lp.set_options(lp_prog, write_code=True)\n",
+    "lp_prog(queue, u=u)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 70,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "[<matplotlib.lines.Line2D at 0x7fe14cfe1990>,\n",
+       " <matplotlib.lines.Line2D at 0x7fe14cfe1bd0>,\n",
+       " <matplotlib.lines.Line2D at 0x7fe14cfe1d90>,\n",
+       " <matplotlib.lines.Line2D at 0x7fe14cfe1f50>]"
+      ]
+     },
+     "execution_count": 70,
+     "metadata": {},
+     "output_type": "execute_result"
+    },
+    {
+     "data": {
+      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYIAAAD4CAYAAADhNOGaAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+j8jraAAAgAElEQVR4nOydd1gUZ9eH79kCLEU6SBURRAFBxYq9o8bejYmmx1RLTDQx3fTEFEsSY/Q19thib4hYsBdAikrv0nvdMt8fS5Akms8YDCpzX9deujNnZs6zMXNmfuc85xFEUURCQkJCoukia2wHJCQkJCQaFykQSEhISDRxpEAgISEh0cSRAoGEhIREE0cKBBISEhJNHEVjO3A32NjYiG5ubo3thoSEhMQDxcWLF/NEUbT98/YHMhC4ublx4cKFxnZDQkJC4oFCEISUW22XpCEJCQmJJo4UCCQkJCSaOFIgkJCQkGjiSIFAQkJCookjBQIJCQmJJk6DBAJBEFYJgpAjCELUbfYLgiB8JwhCvCAIkYIgdKy3b7ogCHG1n+kN4Y+EhISExJ3TUG8E/wOC/mb/UMCz9vMs8D2AIAhWwLtAV6AL8K4gCJYN5JOEhISExB3QIIFAFMXjQMHfmIwCfhH1nAEsBEFwAIYAh0VRLBBFsRA4zN8HlH/FldB4rhxLpapMfa8ucd+iq6igZP9+Ctavp+r6df5p+/EbxVVsu5jOujMp5JRW3SMvJSQkboVOpyM5OZn9u/ei1Wob/Pz/1YQyJyCt3vf02m232/4XBEF4Fv3bBK6urnflxKktP1FekEmocTda+HendRcH3P1tMVA9kPPq/l90NTWUnzhB8d59lIQEI1TV1O0rN1WQ3daOUr+WiB19MW/RClvk2Gt12NZUIRZlk5WeRElOKrqSLJqp8xkkFFCCCUt2jyClxViGtncjyKc5liYGjThKCYmHE1EUycjIICoqiqgrUZSVlyEXZbTz9MG5jVuDXuuBuQOKorgCWAHQqVOnu1pNZ8CwkUQEHyA5cz8J586QfLkrSmNv3NrZ4tHJDjc/G5QG8gb1+79GVKspP3OWkn37KA0ORldaSoWxnJNtdVxvb01rByeMY29gGVeMc2wW7ucz4ecwcszhXAuBK24CUS0EdCoRW60WcwMZFtaGWCttcDRpS4fSXD7MWk1O1i6WpIzgw9/60dXTkRH+jgzytsfMSNnYP4GExAOLKIpkZ2frb/5RURQVFSETZDhrrOgsuGFr14zmzo4Nft3/KhBkAC71vjvXbssA+v5pe+i9cqJZmildVYPpOHQIJ2O2kJNyAJlwgZTIriRc9kRppKSlnw2enexw9bZGrnwwiqpErZaKixf1N/+Dh9AWFqIzNiLCW8W+VjIKfBx50tKXF85tQpl4BQxB7GpLeU8bbhSZkpGspSa9ksCrZfSP1L925tqakOJlQYKHAZecqjkhFqBRF4IRTA2cxqtp1/kw7X/MU+5lZcZoFlzrwXyFEf297Bjh70j/NnaoHvCgKiHxX5Gbm0t0dDRRUVHk5eUhCAIuKnv81G1xk9lj2MaMo+fXcOFaIpZDWuDg6dWg1xcaaqlKQRDcgD2iKPreYt9w4CVgGPrE8HeiKHapTRZfBH6vIroEBIii+Hf5Bjp16iTeTa+h9KRUSo6l0eyqDqWTKWXeVYQd2khuciKmVnZYu/ajKNeV6gotBioF7u1t8Oxkj1MbS+Ty+ysoiKJIVWQkJfv2UbL/AJqcHASVisquPvzmlsdu2zTsLJx5tvVkRkTuQxl3kGqXnmxvPouDWSrOpJRSpdYhE6CdkzmBHjb0cLPEtyob7YVzlJ8+Q8XFi4hVVSCTYeTjjRDgxwG3EpZVH6CleUs+aTken4sbIPU0Ncb2BFtN5aMbXcgoEzE2kDPI254Rfo70am2DoUIKChIS9SksLKy7+d+4cQMAV0cX3LX2OKWqMFYaYdy1OdeKz3Fq90ZMLK3w7v04PSf1QxCEu7qmIAgXRVHs9JftDREIBEHYiP7J3gbIRl8JpAQQRfEHQe/1UvSJ4ArgCVEUL9Qe+yTwZu2pPhJFcfX/d727CQSiKLJ69WoyMzMZ2rE/DhcEdDU6zIe6kW2QxumtG8lJTsDcvjmeXYejrvYgKbKAmiotRqZKWnW0w7OTHQ4eFshkd/cf4d8iiiLV165RsncfJfv2oc7IQFAqMendm/RubiwzPktE2VWcTJ14zu85HqmRodw7G6rLSOn4BmMv+pJfoaG1vSmBrWwIbGVNV3drzFW3lnN0NTVURURQfvoM5WfPUhkRATodFS9M5nW7oxRUFfK8//M8ZeqJ4tgXkHoK0cyBZK9nWF3dh93RBRRWqDEzUhDk05wR/o4EtrJGcZ8FVQmJ/4qSkpK6m39GRgYAzs7OtG3RGucsU+Sx5QhKGabdHdF4ytj/8zdkJ8bRulsfKiu7kZuqZtzrATR3N7+r69/TQPBfc7dvBOXl5WzevJnU1FR6d+tJu0x7qq8XYehpgeU4T1ISIji1ZQM5SfqA0HnkBEws/Ui4nE9yRB4atQ4bF1MeeckfE3PDezCy26OrrCRj7muUhYSAXI5JYCDNhg0lvI0hy+P/R2xBLM6mzjzr9yyPOPVBefhtuLwOmvtxwOtDXg6uwMXSmBWPB+BhZ3ZXPmhLSsicv4CykBCMJ45led9q9qUdxM/Wj497fESLglQI/RRSwsC0OdrAVwkzf4Sd0YUcir5BabWGljYm/O+JzrSwNmngX0hC4v5FFEXCwsI4cuQIoijSvHlzfH198bJviex8CZVX8hCUckwDHTHp6ciVkwc5vv5/KAwM6DbuKaJOqqgqVTNghjceAXZ37YcUCGrRaDTs3r2biIgIfHx8GOjYjYoDqaCQYTnaA5WfDYmXznN66wayE+Mxt7On65hJeHTpTXJkIcc3X8fYTMnIV9tjbmvcwCO7NdqiItJmvkBlRAS2r76K+YTxnCgL54eIH4gtiMXFzIVn/Z5luPtwlGnnYcdzUJyO2GMOn1eP4vsTafTwsGb51ADMjf9dMlfUaslZvJiCn1dhEtid2FnDeT/6KzQ6Da91eo0JrScgJJ+sDQgnwdQeesyiyv9xjsSXsvC3K8hlMtY82Rkfx7t7qpGQeJDQ6XQcPnyY06dP4+3tTb9+/bDQGlMSknozAPRwxLSnExXVxRz8/ltSIi/j1j4A796PceLXDAyM5Ax7wQ+7Fs3+lS9SIKjH79E5ODgYJycnxg0ejWZPBjVppaj8bbEc1QpBpSDp8gVObdlAdmIczWzt6TpmIrYtOrPv+2gEGYx4uT22rnf3dH2nqLOzSXv6aWqSU3D48gsutFXwQ8QPXC24iouZC8/5Pcdw9+EodFo4+hGEfQeWblQ+spxXwgw4HJPNo11deW+kD8oGlGSKtm0n6733MHB2RrX4Q95L+5HTWafp5dSL9wPfx9bYFn4PCMknagPCqyS4TuCxX65QWqXhp+md6OZu3WA+SUjcb2i1Wnbu3ElkZCRdunRhYIfelIak6QOAof4NwLSnE3ITJVdPHefIyuVoNGr6THsKUfDh1PYEbF3MGDbTD1PLf69CSIHgFly9epVt27ahUqmYMmkKJlfVlBxJRW6qxHJCa4w8LRFFkaTwC5zeupEb8dexc2vFoGff5ODKeKorNQyf6YeT172ZDF2dmETq00+hKy6h+ZJvWVi5idD0UFzNXHnO/zmGtRyGQqaA7GjY/ixkR0HADLK6vs2TG2O5dqOEtx/xZkag210nl/6OivPnSX/5FURRxOm7b9jVLJHFFxejUqh4p/s7DGoxSG+YHAbHPoWk42Dpxo2x25m2JZ3Uggq+m9yBIN/mDe6bhERjU1NTw6+//kp8fDz9+/envcydkj1J+gDQwxGznk7IjJVUlZVxZNX3XA07RnOP1gx5fjZXQsuICcuiVQdbBjzh3WBl7VIguA1ZWVls3LiRyspKxo0bh7upEwWbr6HJrcS0hyPmQW4ISjmiKBJ3Nox9S77E1s2doS+9zYEV1ynOrWDwUz606nD3ut2tqLxyhbRnnwNBwHHF97ybt4ZDKYeYGzCXad7T9AFAp4XTyyDkQzAyh5FLuazqyjO/XKRarWXJ1A709WpYv/5MTWoqaTNfoCYlhebvvkPh4AAWnFhATH4MI1uNZH6X+ZgZ1L41JYbCpmlgZk/RpJ3M2JJCZHoRH49px+QudzdJUELifqSiooINGzaQkZHB8OHDaaNzomh7PEbe1liN90RWK9GmXAnnwPffUF5YQPfxU/AfOIZDP8eQcb2IgKEt6DrCHaEBi1OkQPA3lJaWsmnTJjIyMhg4cCCBXbpRciCFslOZKOxUWE30wsBZfzOLP3+GXYs/xsnLm2GvLOTQyqtkJ5XQZ6oXPr1uOSn6H1MWFkb6y6+gsLTEeeVPfJixkl0Ju3it02tM96nty1eUCjtm6nX4No/AiG/ZFV/Da1sisG9myKrpnfG0v7ey1e9oS0vJmD2H8pMnsZo+HcvXZvFT9M/8FPkTdsZ2fNTzIzo376w3TjkNa8eAdSsqpv7GzG1JHLuey7whXrzQt9U9eXORkPgvKS4uZt26dRQUFDBu3Djcamwo2HwNo9aWWD/mjaCQoa6p5uSGNVzavwtLR2eGvTQXQxNH9i6LpLSwiv6PtcWra8O/KUuB4P9BrVazc+dOoqKiaN++PY888giapFIKt1xHW6am2UBXzPq4IMgFYsOOsW/Jl7j5d2TYywsIXn2N1Oh8uo5yJyCoxb+6mZXs30/G629g6O6Oy4oVfJ70E5uubeIF/xeY2X4miCJEbIR9r+sPGPoZov8Uvj4Sz3dH4ujsZskP0wKwNv1vq5pEjYbsTz+jcN06TPv0wfGrL4mqTODNE2+SVprG496P83LHlzGUG0JCCGyYBM3boX50B/N2JfJbeCZP9mjJwuFtG608V0Li35KXl8fatWuprKxkypQpNK8wI399LIZu5tg84YOglJOdGM++pV9RkJFGh6AR9Jo6nRuJFRxYEYVcITBspt9dl4f+f0iB4A4QRZFjx44RGhqKq6srkyZNQiUYULgzgcqIXAxczbCa6IXCRsWVkEMc+vE7PLsEMvTleYSuv871s9n49Xem53jPu3qdK9iwgewPF6EK6IjzsmV8F/8zq6NWM8NnBnMC5iBUFMCeVyF2N7gGwpgfqDJ1Zu6WCPZGZjGuozMfj/Vt1MlbhRs3cmPRRxi6u+P8/fdo7C1ZfHExm69txsPCg097fYqXlRdc3Qebp4FrN3RTt/DhoWRWhyUzur0jX0zwb9DEtoTEf0FGRgbr169HEASmTZuGZakReWuiMXA0xeZpX1AKnN+5jVNb1mPczJwhL8zGza8DUcfSOb45Dsvmxgx/0Y9m1qp75qMUCP4BUVFR/Pbbb5iamjJ16lTs7OyoiMihcEcCgkLA7qUOKCwMubRvJ0fX/ETbXv0Ien4Wp3YkEnEkDc/O9gyY3ha54s5uZqIokrd0GXnLlmHarx9OXy/mp2trWBq+lImtJ7Kw20KEmnJYNQTyrkP/hdD9JXLK1DzzywUiM4p5I6gNz/V2vy+klbKwMDJmzUZQKnFeuhTjjh04kX6Cd069Q4W6gnXD1uFp6QlXtsK2p8FjAOKk9Sw/mc4XB6/R18uW5Y92xNjggWmFJdHESUhIYNOmTZiYmPDYY49hWqIgb1UUChsVts+0Q2as5MiqHwg/uAevwN4MeGomhioTTm6N58rRdFq0s2bwUz4YGN3bf/NSIPiHpKens2nTJtRqNePHj8fT0xN1TgU5y8JRWBth+7w/MgM5Z7ZvJmzzWvwGBjHgqRcIP5zG6R0JuHpbEfRcO5SGf/90Lmq13Fi0iKKNmzAfMwaHDz9g7bUNfHHhC0a4j2BRz0XIRODXx+DaPnh0C3gMJCqjmGd+uUBxpZqvJ7VniM/9VXlTnZhI2syZaDKzcPhoEeYjR3Kj/AZT9k7BUG7IpuGbsDCygEtrYddL+jzHhDVsupjJmzuu4O9iwarpnaXOphL3PVFRUWzfvh1bW1umTZuGYRHkrryCvJkBts/5ITc1IDL4AId/WkrA8NH0ffxpqis1HPopitSYAvwHuhA41uM/kURvFwik9+/b4OzszDPPPIOlpSUbNmzg7NmzKGxVWE1pgzqrnMKt+p7+XcdMpMuo8UQGH+D4ulV0GOxKv8fakBZbwM5vLv/t2ge6mhoy5r5G0cZNWD/9FA4ff8TWxB18ceELBrUYxAc9PkAmyPSll1f3wOBF4DGQg9E3mPDDaQRgy/Pd77sgAGDo7o7bpk2oOnQg8/U3yPn6G+xVdnzT7xtyK3KZe2wuap0aOj4GQZ/px/fbTCZ3cmb5owFEZ5Yw4cfTZBZVNvZQJCRuy7lz59i6dSvOzs7MmDEDwzIZuauikJkosX26HXJTA9Jjojiy6nvc2gfQe9oTFOdWsu2zC6RfLaTftDb0HO/Z6Hkx+XvvvdeoDtwNK1aseO/ZZ5+959cxMjKiXbt25ObmcubMGcrLy2nT1Re5oYKyk5kIMgEjdwtcff2pLC3l8v5dAHQM6oG1sylXQjNIDM/Fzc8Gwz+teaAtKyf9hRcoDw3F7vXXsX3xRfYm7eXdsHfp6dSTxX0Wo5QrIXoH7JsH7achDniH748lsGD7FXyczNnwTFda2pje89/hbpGpVJgPH44mL4/CX9ZSfT0Oj2GTcLR0ZW3MWoqqi+jt3BucO4FMCWe/h7IcPHqOo5ObFZvOpbIzPIM+XnZYSW8GEvcRoigSGhpKcHAwXl5eTJkyBVmJlryfriBTyLB91g+FpRHFOdlsXbQQUysbxi14n9zUKnZ9E466WsvwF/1p1fHelnf/mffffz/rvffeW/Hn7VIg+H9QKBR4e3uj0Wg4e/YsN27coOOw7mgLqikLy0TZ3ASlvQkt/TtSkp/LpX27UBoZ4dunEw4e5sScyOT6uWxcfaxRmelvZpqCAtKefIrKiEgcPvkYqylTOJJ6hPnH5xPQPIAl/ZdgqDCErAjYMBmcAmDiGj7aH8eSkHhG+Duy4rEAzFX3/81RkMsx7dcPmZkphWvXUn7mNF2mz6NKrGF97HpsVDb42PhAi0DQ1OiDQU0ZLp2G08fLlm2XMth8PpVu7tY0Nzdq7OFISKDT6di3bx+nT5+mffv2jB07Fko05P0UCTqwfdYPpY2KmqpKti1aSFVFGRPe/pjCGwK7l0RgYmHI6Nkd/nW7iLvhdoFAkobuAJlMxqBBgwgKCuL69eucPHkSy7GeGLiYUfDrNWqyyhFkMgY/9zKtu/fi+LpVhB/ah1NrS8a81hGtVmT7lxe5kVRMTXoGKVMfpTohAedlS7EYPZqwjDDmHZuHj7UPS/ovwUhhBGU5sHEqGFvBpHXsiy1g5ckkHu/egu8mt8dI+eC0dRYEAesZM3D6ejFVEZHkfPY5szrOoqdTTz45+wnnb5zXGw54B7o8B6eXQuin+Dias21md8yMlEz96QzHr+c27kAkmjwajYatW7dy4cIFevTowahRo6BcQ+7PV9BV67B5yhelnTGiTsf+pYvJS0vlkVffwEBlzeFVMVg2N2Hc6wFY2P83fcruFCkQ/AO6du2Kr68vR48eJSUjVT85xEhB/ppotGU1yGRyhr00B/eOnTny83Kijx3BxtmMcfMCMDRWsvOrS1x85m00BQW4rvoZs759uXDjArOOzsLd3J3lA5djojQBTbW+tLIiHyZvIKXahDe2RtLexYKFw73vi8qgu6FZUBBWTzxB4YYNlB86zOe9P8fZzJk5oXNIL00HQYCgT6HDNH1eJOxbWlibsPX57rSwNuGpNefZFZHZ2MOQaKJUV1ezfv16YmJiGDx4MIMGDUJXriZ3ZRS6UjU2T/pg4KiXak9t3Uj8+dP0eewpXH3bc+jnaLQaHUOe8cHI5P5bxU8KBP8AQRAYMWIElpaWbNu2jSq5GpvHvdGWqclfH4uo0SFXKBkxewGuvn4c/P5brp8Nw9xWxeiZXqjKsohwnojlktUYd+zIldwrvBTyEg6mDvw46EfMDc31E8b2zIG0szB6OdV27XhxwyUEAZZO7YDBHZak3q/YzZmNyt+frLcWYnijkKUDlqIVtbwc8jLl6nKQyWDEd+AzFg6/A+d+wq6ZEZuf60YHV0vmbA4nMr2osYch0cQQRZHdu3eTnJzM6NGjCQwMRFepIW9VFJqCKqyne2Poqpd6rp0+yZltG/HpO5COw0ZydncSWfHF9H3UC8vm92f79Qf7rtIIGBoaMmHCBCoqKti+fTsKRxOsxntSk1RC0a4ERFFEYWDAqHlv4+Dpxd5vvyDx8nlKvv6UdhHLkRsbciyknNjcqzwf/DwWhhb8NOgnrFW1XTjP/gDh66D36+A7lk/2XSUqo4QvJ/jjbHl/vU7eDYJSidPir0ChIGPWbFyMHPiyz5ckFiey4MQCdKIOZHIYuwJaD4V9r0H4RpoZKfnpsU7YmRny6qZwyqs1jT0UiSZEZGQkUVFR9O3bl/bt26Or1pK3Ogp1dgXWj7XFqJUFANlJCRxY/jWOrdsy8OkXSY0p4NKBFLx7OtK6y/1X3fc7DRIIBEEIEgThmiAI8YIgzL/F/q8FQQiv/VwXBKGo3j5tvX27GsKfe42DgwNDhw4lISGBsLAwjNvbYdbXhfJzNyg/kwWAgZGKMfPfxcalBbu+WERSyGGcn5lK/+k+5KSUsuynzRgpjFg5eCX2Jvb6E8cfgYNv6mvq+y5g/5Us/ncqmad6tmTwfVgierconZxw/ORjqmJiyPnscwIdA5nXaR5H046yLHyZ3kiuhAn/g5Z9YOcLELMTc2Mliye1Jzm/nPd3RzfqGCSaDgUFBezduxdXV1d69eqFqNaR/0s0NWmlWE1ug8rLCoDyokJ2frEIlVkzRs59k6oyHcGrY7B2MqHXRM9GHsXf868DgSAIcmAZMBTwBqYIguBd30YUxdmiKLYXRbE9sATYXm935e/7RFEc+W/9+a8ICAjAx8eHkJAQUlJSaDa4BUZtrSjanUBVvD7OGZmYMvKJ51FVVHHBw4ma/n0xaq0m2fEybVJ78lmrJTibOetPmBcPW58A27Yw5kdSC6t4fWsk/i4WvBHUpvEGeo8w698fqxkzKFy/npIDB3m07aOM9RzLisgVHEg6oDdSGsGUjeDcBbY+BdcP0c3dmhf7evDrhXT2RmY17iAkHnq0Wi3btm1DEATGjh2LoIP89bFUJxRjOaE1xu1sANCo1ez66mMqS0sYNW8hKjNzDv0chUatY8gzvigaqI30vaIh3gi6APGiKCaKolgDbAJG/Y39FGBjA1y3UamfL9i6dSsVlRVYTfJCYWtMwYZYNPmViGo1Be++R7cbxZja2LL9s/eYtelpzrrvxsRGSdSvhfoJZ1XFsHEyyBQwZSPVchUvbazNC0x58PMCt8NuzmyM/PzIWrgQdXo6b3V9iw52HXg77G2i82uf+A1M4NFfwd5bP7s66TivDvSkvYsFC7ZHkiFNOJO4h4SGhpKRkcGIESMwb2ZOweZrVF0twGK0ByYd9W/yoigSvHIZmddjCXphNvYtW3Hu97zA1Ps3L1CfhrjDOAFp9b6n1277C4IgtABaAiH1NhsJgnBBEIQzgiCMvt1FBEF4ttbuQm7u/VFGaGRkVJcv2LFjBxjIsHlc/zKUtyaGnKU/UBURSct332XcOx9TRiV+J+CbXp/zyLMdqCyt4ei6WMQtT0JhEkz8BSxb8Mm+q0SmF/PFBH9crB78vMDtEAwMcFq8GGQyMmbNRqGFxX0XY2Fkwashr5JXmac3NDKHaTvAsiVsehRlWRbfTm6PVicye1M4Wt2D1yZF4v4nOTmZEydO0L59e3x9fSk5ol9a0nxYS0y7OdTZXdq3i+jQYLqNm4JX956kRudz8UAK3j0c7kkr6XvBf/2oORnYKoqitt62FrW9L6YC3wiC0OpWB4qiuEIUxU6iKHaytbX9L3y9IxwcHAgKCiI+Pp6wsDAU1iqsHm2LJreCyggZzUaPodnQoezI3c/B9pkY1yjI2XcKW1czuo1qRWJ4HjFXBBj2Jbj15ECUPi/wZI+W92XriIbGwLk2XxAdTc7nX2CjsmFJ/yWU1JQw6+gsarQ1ekMTa71MpNPA7ldoYWXMh6N9OZdcwPeh8Y07CImHjsrKSrZv346VlRVDhw6lJqOM0qNpGHeww6y3c51dcsQljq39GY/O3QkcP4WywmoO/54XmNS6EUfwz2iIQJABuNT77ly77VZM5k+ykCiKGbV/JgKhQIcG8Ok/pVOnTn/IFyhtBGoS96Bo7odx4AwSixJZdnkZHfx602XEeKJDg0m8fJ72tmE4G4RzsvxZCp0nkppfwbytkfg7mzN/6MOXF7gdZgMGYDV9OoXr1lFy8BBtrNqwqMciInIj+OD0B9Q1RrRqCQPfh/hguLyOMR2cGOnvyNfBcVxKLWzcQUg8NPxeKlpWVsa4ceMwkCsp3HIdmYkCixHudXYFmRns+fYzbFxcGfrSHEQRDq+KfmDyAvVpiEBwHvAUBKGlIAgG6G/2f6n+EQShDWAJnK63zVIQBMPav9sAPYCYBvDpP+X3fIGFhQVbt24l6f33qY7eh6GHAeWnstm6Yw3GSmPe6vYW3SdMxdrZlcPLF1O9czYD251DYWTEwZXRvLL+IgBLp3Z8aPMCt8Nu7py6fEFNWhqD3QYz038mOxN28kvMLzcNOz8NLXrCwTcRSjJZNMYXB3MjZm0Kp7Tq9g3+JCTulMuXLxMTE0P//v1xcnKi5Gga6hvlWI65ucRkVXkZv33xITKZnFHz3sbASMW5PUlkxhU9MHmB+vzru40oihrgJeAgEAv8KopitCAIHwiCUL8KaDKwSfxj3+u2wAVBECKAo8Cnoig+cIEA9PmCiRMnUlFWxpGyMmxefhmbGZ0psKtg3PXefNhyITYqGxRKJUGPP0p5SQmheW0xeXQ5/R9vS356GRYJFXwx/uHOC9wOfb7gKxAEMmbPQVdTw/P+zzOoxSAWX1zMyYyTekOZDEYtrZOImhkq+GZSe9ILK3h3p1RSKvHvyMvLY//+/bi5uREYGPgHSUjlrZ/ro9Np2fvdFxRnZzFy7puY29mTGqPPC7R9gPIC9WmQx05RFORjtiYAACAASURBVPeJothaFMVWoih+VLvtHVEUd9WzeU8Uxfl/Ou6UKIrtRFH0r/3z54bwp7Gwrq6mfUQEWY6OXPNuS1JZMq9YfkSVkZrWRyzQllSDupLmZ96ks10O0bkmJF5L4Jpcy2UDDZ2rlfjI7v9GcvcKA2dnHD/+iKqoKHK++BKZIGNRj0V4Wnjy+rHXSSpO0hv+SSLq5GbFKwM82X45g53ht1MlJST+Ho1Gw7Zt21AoFIwZMwZBxy0loePr/0dy+EUGPDUT57a+lBdVE7w6BiuHBysvUJ+mpT/cQ8SaGjJem4dnRiZt3d05EhLCogOL0KpEbKf7IlZpyPslBnHHK5AVQffn38Xa2ZUD33/HW5vPke+hwqK5McH/i6GytKaxh9NomA0ciNX0xylcu5aSw4cxVhrzXf/vUMqVvBLyCsXVxXrDehIRxem81M+DTi0sWbgjirSCisYdhMQDydGjR8nKymLkyJGYm5vfUhKKPnaEi3t20CFoBH4DgtBpdRz6ORp1tZYhz/iifIDyAvWRAkEDkbtkKVVRUTh++AGjJk5EbizHJs6Gef7zsHNzxmqSF+r0MgojHGDAOyh8HmHAc69SUVxE5+wTfDctgCFP+1JdoSHkl1jEB3DluIbCbu5cjNq1I+vNt6hJT8fR1JHFfReTXpbO/BPz9b/NHySiV1HIBL6e1B6AWZvD0Wh1jTwKiQeJxMREwsLCCAgIoG3btreUhG4kxHF4xRJcff3p+/jTAJzfm0xmXBF9pnph5fBg5QXqIwWCBqD8zFnyV67EYsIEmg0eTGZVJiGWIah0KkovlaLT6VA1L8ZMuZUK7SCq7GcAsPqalgvmHfAsjkWdEo2Nsyndx7Qi+Uo+UcearsQhGBjg9PViADJmz0GsqSHAPoB5neZxMuMkexL36A3/JBG5WBmzaIwvF1MKWRIilZRK3Bnl5eXs2LEDGxsbhgwZgqjR/UUS0mm1HFqxBJVZMx6ZPR+ZXE5aTAEX9ifTNtCBNvXmFTyISIHgX6IpLCTzjTcwcHPDfsF8NDoNC8MWojZV02dgH+Li4jh96hTsnUsz1W4UVkqKdiZwKDKTVWFJtBk6Tl9F9OMSqsrL8OvvjKuPNWHb4snPLGvs4TUaBs7OOHz8EVVXrpD95ZcATG4zGT9bP7688OVtJaJR7Z0Y29GJJSFxXEguaMQRSDwIiKLIrl27KC8v15eKGhhQEpL6F0no8oE95CYn0u+J51CZmlFeVM3h1dH6vMDkBzMvUB8pEPwLRFHkxjvvoCkowPHLL5AZG7Mmeg1X8q7wZtc36RfYD29vb4KPBJOaeBVhwHwsxnihya/i8uartHMyZ8EIX4JemE15cRGha1YiCAIDprfFwEjO4Z+j0ai1/78jDynNBg3C8rHHKPxlLaXBwcgEGe90e4fi6mK+ufSN3qi+RLTrFRBFPhjli7OlMa9uCqe4Uioplbg9Fy9e5Nq1awwcOBAHBwe9JBT6R0moND+PsF/X0bJDJzy7BD40eYH6SIHgX1C0ZQulh4OxmzULlY8PCUUJLAtfxkDXgQS5BSEIAiMH98GCUrbKRlPhMwVZS3POG8NErYJlQd4YKuQ0b+VJl1HjiT4WTOKl8xg3M9CXlGaUc3p7QmMPs1Gxm/caRr6+ZNbmC7ysvHi07aNsvb6V8JxwvdHvElHCEbi8DlNDBd9Obs+NkioW/hbVpPMtErcnJyeHAwcO0KpVK7p161ZPEjL4Q5XQ0TUrELVaBjz5PIIgPDR5gfpIgeAuqU5MJPvjTzAJ7I7VEzPQ6DS8HfY2JkoT3ur2Vt0qYkZhXzBB3EM5xuzYuYtP98fyYUUpMqUc1bGMuptUt3FTsHFpweEVeonIrZ0Nfv2ciTyaTvKVvMYcaqMi+z1fIIp1+YIX2r+AvbE9H5z5ALWu9om/89Pg1qtOIurgasmcQa3ZHZHJ9ktNN98icWt+LxU1MDBg9OjRyGSyepKQR50klHjpPHFnT9Ft3GTM7ZqTFqvPC7R5CPIC9ZECwV2gq6khY+5ryIyMcPjkUwSZrE4SeqvrW9io9K1pybgI51fi2GUUg4cMIS4ujrNnTjMysAXWw1tSHV9EZYS+gZ5CqWTIzFl1EhFA97GtsHYyIeSXWCpKmm5JqYGLCw4fLaLqyhVyvlqMidKEBV0WEFcYx4bYDXojmQxGLvmDRPR8n1Z0bWnFOzujSM4rb9xBSNxXBAcHk52dzahRozAzM7ulJKSuruLIqh+wcnKh04gxlBdXc3iVPi/Q+yHIC9RHCgR3Qe7X31AdG4vDxx+htLf7gyQ0xG2I3kirgd2zwNQe+r+Ft39HbgjWBCgzmdndHpMuDihdzCjak4iuQv9U+2eJSKGUM+gpH2qqtBxZE4PYhLtsNhs8GMupUylYs4bKyEj6u/anj3MfloUvI6usdl2CP0hEa5HXlpTKZQKvbg5HLZWUSgBxcXGcOXOGzp074+XlVSsJXfuLJHRm+2ZKcrMZ9PSLyBVKwrbGU1OpZcjTD0deoD5SIPiHlJ0Mo2D1aiymTMasf//bSkKcXwk3IiHoEzAyZ+nReI5XOqNUyDl6JBhBJmA52gNduZriQyl15/+DRFRWhrWjKT3GeZAaXUDk0fRGGvX9ge2c2chtbbix6CMQRRZ0XYAoinx67tObRnUS0VtQnI6jhYpPx/kRkVbEN8HXG895ifuCsrIyfvvtN2xtbRk8eDBArSRU8QdJKC8thQu7t+PTZyDO3r5kxhURdz6bDkNcsXJ8OPIC9ZECwT9AU1BA5oL5GHi0wv711wFuLQmVZEHIImg1AHzGkJhbxqqTSQwLcKdvn95cvXqVhIQEDJxMMQ10pPxsFtWpJYBeIqqrIvpFLxH59nHCzc+GUzviyUsvbZSx3w/ITU2xmzuXqshIin/biZOpE8/7P09IWgihaaF6ozqJSFsnEQ1r58CkTi4sD03gTGJ+o45BovEQRZGdO3dSVVXF+PHjUSqVt5SE9AvNLMdAZUzvaU+g04kc33wdUytDOg5p0cijuDdIgeAfcOODD9EVFeP01VfIVKo6SWhQi0E3JSGAgwtAWwPDv0QEPtgTg6FCzutBXnTv3h1LS0v279+PVqul2eAWyM0MKNoRj6jVSz/27h50GTWhTiISBIH+j7XByFhJ8OoYdE1Y4jAfORKVvz85X32FtrSUx30ex8PCg0/OfkKFura1hFVLGHRTIgJ4Z4Q3btYmzNkcTkWNtPB9U+Ty5cvExcUxePBg7O3tbysJRR87QsbVaHo/+gTGzcyJOZFBfnoZPcZ5PnSS0O9IgeAOKT97jtIDB7Ce+TxGXl5/kITe7PrmTUkoLhiid0DveWDlTsjVHEKv5TJroCd2ZkYoFAqCgoLIy8vj7NmzyAwVmI9ohTqrnLLTmXXX6zZu8h8kIpWZAX2meJGfUU70iczbePnwI8hk2C9ciLaggLxly1HKlCzstpDM8kx+jPzxpmGnp/4gEZkYKvhivB+ZxVX8cCyx8QYg0ShUVVURHByMq6srXbp0AW4tCVWWlnBs3Socvbzx7TuQqjI1Z3Yl4uRlQauO98+CWA2NFAjuAFGrJfuTT1A6OmL95JPAbSQhdSXsmwvWntDjFarUWj7YE0MrWxMe7+5Wdz4vLy88PT0JDQ2ltLQUla81Rl6WlBxKQVNcDfxZIvoJgJbtbXDysuDc7iSqypvuRClVO18sxo+jYN06qhMSCLAPYIzHGH6J/oW4wji90S0kok5uVozwd+THYwmkF0qN6ZoSx48fp6KigqAg/fyeW0lCoO8sWlNRzsCnX0CQyTi7K5GaSi29Jra++bD3ECIFgjugaOs2qq9exW6evmT0tpLQia+gMBkeWQwKQ34+mURKfgXvjfT5y0IzQ4YMQaPRcOTIEQRBwGKUB6JOpHj3zQlkNyWiI3USUc8JramuUHN+T9J/NPr7E9vZs5GpVGR/9BGiKDI7YDamBqYsOrMInVgrnd1CIpo/tA2CAJ/uv9qI3kv8l+Tn53PmzBk6dOiAo6PjbSWh9KvRRB09RMDw0di6upGbVkr0iQza9XHC2sm0EUdw72mQQCAIQpAgCNcEQYgXBGH+LfbPEAQhVxCE8NrP0/X2TRcEIa72M70h/GlItKWl5H77LapOAZgFBel7CZ1cqK8S6lqvSij3Opz8BvwmQ8veZBVXsjQknsHe9vTy/OsrpY2NDd27dyc8PJy0tDQUVkY0G+BKZVQ+lVdv9sj5s0Rk42yKd09HrhzLoCCr6dbGK6yssH35ZcpPnaY0OBhLI0vmBMzhUs4ldsbvvGn4J4nIyULFs71bsScyi/NSL6ImwcGDB1EoFPTv3x+oJwmNvSkJaTUagn9ahpmNLd3HTUEURU5svo6hiZIuI1o2pvv/Cf86EAiCIAeWAUMBb2CKIAjetzDdLIpi+9rPytpjrYB3ga5AF+BdQRAs/61PDUne8u/RFhZiv2ABgiCwJnoNUflRvNX1LaxVta+Uogh754CBMQxeBMAn+66iFUXefuRWP4We3r17Y2pqyv79+9HpdJj1ckJhp6JoZzy6Gn2PoVtJRF1HuqM0lBO2Ne7eDv4+x3LqFAw9Pcj59DN0VVWM8hhFR7uOfHXxKwqratcwvoVE9Hwfd5o3M+KD3THomvDcjKZAQkIC169fp3fv3vqJY+mlekmoox2qtjcloYt7fyM/PZUBTz6P0siIuPPZZMUX0310Kwxrg8XDTEO8EXQB4kVRTBRFsQbYBIy6w2OHAIdFUSwQRbEQOAwENYBPDUJ1UhIFa9diPm7sH3oJ/UUSitwMySf0k5lMbTmbmM+uiEye7+3+t8tOGhoaMnjwYDIzMwkPD0dQyLAc7Ym2sJrSkLQ6uz9LRCozAzoPdyM1uqBJt58QFArs31qIOiOD/J9/RibIeLvb25TXlLP44uKbhn+SiIwNFCwY1oYrGcVsvdS052Y8zGi1Wg4cOIClpWVdL6GC33sJPXJTEirOyeb01o14dO5Gq4Cu1FRpOLUtHrsWZrQNfHjaSPwdDREInIC0et/Ta7f9mXGCIEQKgrBVEASXf3hso5Dz2efIDA2xmzULURT54PQHf5WEKgr0soNzZ+g4HY1Wx7u7onE0N2JmX4//9xrt2rXDxcWF4OBgKisrMXQ3xzjAntLj6aizb0o/dRLRymWoq6to19cZC3tjwrbGo9U03XJSk25dMQsKIn/FT6gzMvCw9GC6z3R+i/+NCzcu3DSsk4gWQnk+I/0d6ehqwecHrkmL3j+kXLhwgdzcXAYPHoxCoaD0RAaa7D9KQqIoErL6BwRBoN+MZwG4uD+F8uIaek1qjSB7eBPE9fmvksW7ATdRFP3QP/Wv+acnEAThWUEQLgiCcCE3N7fBHfwzZSfDKAsNxWbm8yhsbAhJDeFSziVe6fjKTUkI4Mj7UFkIj3wNMhkbz6Vy9UYpbw33RnUHNceCIDBs2DAqKioIDQ0FwHyoGzIjOYU74uua0imUSgY8OZOy/Dwu7duFXCGjx3gPirIruBLatJ9q7V+fB4JA9udfAPCc/3M4mTqx6Mwi1Nram7xMBsO/gpoyOPYZgiDw7ggf8sqqWXa0aXd4fRj5/f+nli1b0qZNG7SlNZQeTcPI2/oPklD8hTMkXjpP4ISpNLOxoyi7gvAjqbTp1pzm7uaNOIL/loYIBBmAS73vzrXb6hBFMV8UxeraryuBgDs9tt45Voii2EkUxU62tve2nlfUaMj+9BOUrq5YPv44aq2axRcX08q8FWM8xtw0TD0LF/8H3WZC83YUltfw5aHrdHe3Zli75nd8PQcHBwICAjh37hw5OTnITQ0wH9qSmuQSKi7m1Nk5e/vi0bkbZ3/bQnlRIS18rXH1tuL83uQmvc6x0tER62efofTgQcrPnEGlUPFm1zdJKE5gTUy9Zw5bLwiYDhd+hrx4/F0sGNvRiVUnk0jJb7qJ94eR0NBQqqqq6spFS46kImq0mA91q7OpqaokZPWP2Lq60WHoSABObo1DrpDRbUyrRvK8cWiIQHAe8BQEoaUgCAbAZGBXfQNBEOoLbSOB2Nq/HwQGC4JgWZskHly7rVEp3LSZmvgE7F+fh8zAgF+v/0pqaSpzOs1BIVPojbRq2DMbmjlB3wUAfHnoGmXVGt4b6fOPa4779++PoaEh+/fvRxRFjAPsMWjRjOJ9iWjrzRnoNfUJtOoaTm/diCAI9Jjgibpay9ndTbuc1PrJJ1E6OenLSdVqejv3ZqDrQH6M+JH00npvTH0XgMIIgt8F4I2gNijkAh/vi73NmSUeNHJycjh//jwBAQHY29ujzqmg/FwWJl0dUNrezNmd2rKBsvw8Bj7zInKFguQreaRcyafz8JaYmBs24gj+e/51IBBFUQO8hP4GHgv8KopitCAIHwiCMLLW7BVBEKIFQYgAXgFm1B5bAHyIPpicBz6o3dZoaAoLyV2yBOPu3TAdMICSmhJ+iPiBrg5d6eXU66bh2R8gJxqGfg6GpkRlFLPhXCqPdWuBV3Ozf3xdExMT+vfvT1JSErGxsfqmdGM80FVpKd5/8yZv5eiE38ChRB45QH56GlYOJrTr40TMiQzy0pvu0pYyIyPsF8ynOi6ewo2bAHijyxvIBBkfn/345uI0pnbQcxZc3QMpp7BvZsSL/Tw4GJ3Nqfimm3h/WBBFkYMHD2JoaEi/fv0AKN6fhKCU02yAa51dTnIil/btxG9AEI6t26JV6zj5axwW9sb49XNuLPcbjQbJEYiiuE8UxdaiKLYSRfGj2m3viKK4q/bvC0RR9BFF0V8UxX6iKF6td+wqURQ9aj+rG8Kff0Pe0mXoSkuxn68vF10ZuZLi6mJe6/Tazaf8ojQ4+gm0HgpthiOKIu/vjsbS2IDZA+++T/nvTzAHDx6kpqYGZXMTTHs5UXEhm+rk4jq77uOnoDQ04vgG/c/V+ZGWGBgrOLnlepNejct0wABMAgPJXbIETX4+zU2a82L7FzmRcYIjqUduGnZ7Ecwc9Ul+nY6nerbE2VLFB3ti0DThPk4PA9evXychIYE+ffpgYmJCVUIRVbEFmPVzQW5qAICo0xG8chlGpmb0nKqfuhQRkkZxbiW9JnkiVzS9ebZNb8R/Q3VcHIWbNmExaSJGXq3JKMtgXew6RrYaSRurNjcND8wHRBj2OQgCuyIyOZ9cyOtDvDD/FzXHcrmcoUOHUlxcTFhYGADNBrgitzDUJ45rb1LGzczpOmYiiRfPkRoViZGJkq4j3Mm4VkRSeNN9qhUEAfu33kRXWUnuN/o1jae2nYqXpRefnPuEcnVtHsDAGAa8A5mXIHo7Rko5bw5ry9UbpWw6n/Y3V5C4n9FoNBw8eBBra2u6dOmin6m/Lwm5uSFmPRzr7K6EHCIr7hp9H3sKlakZZYXVnN+XTEt/G1zrtZtoSkiBoBZRFMn+5FNkJibYvvIKAN9e+ha5IOelDi/dNLy6Ty8r9HkDLFwpr9bw8b5Y2jmZM6GTy23Ofue4ubnh6+vLyZMnKSwsRGYgx2JkKzTZFZSdvJlH7zB0BGY2thxb9zOiTodPL0esHE0I2xaHVt10n2oNW7XCato0irZuo/JKFAqZgne6v0NuRS7LwpfdNPSbBM3bQfD7oK5iqG9zurS0YvHh69KC9w8o586do6CggCFDhiCXy6mIyEWdUUazIDcEpb6Cr6K4iOMbVuPi3Y62vfTS0enazr89xns2pvuNihQIaik7Gkr5qVPYvvQiCktLruReYX/Sfh73eZzmJrUVQDXlsP91sG0L3V8EYOnReLJLqnlvpA/yBqo5HjRoEDKZjIMH9Xlzlbc1Rt7WlASnoimoAkBpYEivyY+Tk5RAbNgxZHIZPSd4UpJXRURI036qtXnpReTW1mQvWoSo0+Fn68eE1hNYH7ue2PzapLBMBoM/guJUOPcjgiDwziPeFFbU8N2Rpj1j+0GkrKyMY8eO4eHhQevWrRHVWkoOJKN0MsXY/2aV4bG1P6OuqmbA0y8gCAKZ8UVcP5dNh8GumNuqGnEEjYsUCACxpoaczz7DwN0dyyn6PiNfXvgSKyMrnvR98qbhsc+hOE0/Z0CuJCmvnJUnEhnb0YmAFg3XGcPc3JxevXrVLWADYDHSHQQoqteUrk2PPti7e3By4y+oa6pxaWuFm58NF/YlU15cfbvTP/TITU2xmzOHyogIinfqC9he6fgKFoYWf2xK594HPIfA8a+gPB9fJ3Mmd3Zhzalk4nOabuL9QeTo0aOo1WqGDNHP+C8Ny0RbXI35sJZ1k8LSYq4Qc+IoXUaNw9rJBZ1O30/I1NKQjkEP54Izd4oUCICCdeupSUnBfv4bCEolIWn6yWMvtn8RE2XtsnRFaXDme/CfAi26A/Bh7YIz84Pa/M3Z744/L2CjsDDCrL8rVbEFVCfqE8eCTEafaU9Smp/LpX36G16PcR5oNTrO7GzaPffNR4/CyN9Pv4BNWRnmhubMDphNZF4kh1IO3TQc9AHUlMLxzwGYO9gLlVLOR3tjGslziX/KjRs3uHTpEp07d8bW1hZtWe3ksbZWGLWyAPTS77G1qzCztqXLmIkAxJzMJC+tjMBxHg/tgjN3SpMPBJr8fPKWL8ekdy9Me/dGrVPz9cWvcTd3Z6zn2JuGoZ8CIvR7E4CQq9mEXM3hlQEe2DUzanC/lEpl3QI2586dA8A00BFZMwOKDyTVVQe5+PjRqlNXzv32KxUlxfryt/4uXD2dRU5KSYP79aAgyGQ0X7gQbX4+ecuWAzDCfQQeFh4subQEta42D2DXBjpO168xnZ+AjakhLw/w4Oi1XEKv5fzNFSTuB0RR5MCBAxgZGdG3b18A/eQxtRbzoTe7hl4/E0Z2YhyBEx9FaWBIVbmaMzsTcGptgUeAXSN5f//Q5ANB7rffoauqwn6+vnv2lmtbSClJYW6nuTcnj+VchYgN0PkZsHClWqPlg90xuNuaMCPw3rWobd26NR4eHoSGhlJWVobMQI75wBbUpJZSFXNz7d1eU2egrq7m9NYNAHQa5obKVMnJX+OadDmpql07zMeNpWDtWqoTE5HL5Lza8VVSS1PZEbfjpuGfJpnNCGyJm7UxH+6JQS2Vk97XxMbGkpycTL9+/VCpVKhzKyg/m4VJFweUdvrJY1qNhrDNv2Dt7Ip3b32CuG7BmUkP94Izd0qTDgRVsbEUbdmC1aNTMXR3p7SmlO8jvqdr8z9NHjvyARiYQq+5APx8Monk/AreHfHXBWcaEkEQCAoKQq1WExwcDIBxgD0KWxXFB5Pr1ji2dnLBb+BQIg7vpyAzHUOVgm6jWpGVUEz8hab9VGtXt4CNflJZH+c+dLTryPcR399c49jMHnrMgtjdkHIaA4WMt4Z7k5BbzrozKY07AInbolarOXToEHZ2dgQE6LvWFO9P1k8eG3hz8ljU0cMUZmXSa+p0ZDI5eemlRB/PwLcJLDhzpzTZQCCKItkff4Lc3BybF14AYOUV/eSxuZ3m3nxKSD0D1/ZCj1fAxJobxVUsDYlnkLc9fVrf+zVMbWxs6NatG+Hh4aSnpyPIBcyHuKHJqaTiUnadXeD4KSgNDTm+/n8AtAl0wMbFlFPb41HXrm3QFFFYW2P78kuUh4VRFhKCIAjMDphNXmUe62LX3TTs/iKYOcCht0AUGdjWjl6eNnx9+DoF5U23j9P9zJkzZygqKiIoKAi5XE51YhFVMfmY9b05eUxdXcXpbRtx9PLGvWMXRFHk+Kb/Y++8w6I60/7/eaYxtKE3aVIVVBRUBEVU7MbEVE3fZJNN2TSzm+3l3fLbd8ubbPqum56Y3mMSYwOxoKAoiF16l14HmHp+f5yRQROz2RUEnfleFxfDzH3OeQbx3PM8z/29PzbgzMrLHzjzXeWwiaBny1b69u8n4JGHUXp50dDbwJvH3uTKmCtJ8EuQgyQJtv0OPIIgTU4Wf/7qOGarxG+uOD9wZrg1b948PDw82LhxI1arFe0kPzThnnRvq0YyyTd5Ny9vUlfdQHlhPrXHDqNQCOaujqO3w0Dx1pqLNtaxKJ+bbkITG0PTn/+CdWCAaYHTmB8+n1ePvErnQKccpHGDrN9A/QE4+jFCCH6zMhG90cKTW0+N7htw6mvq6elh586dTJgwgejoaCSrROeXlSi9NHhm2M1jBzduQN/RTubNdyCEoKywmcayLtJWRaN1v/yBM99VDpkIrAYDzX/7Gy5xcXjfcAMAzxQ9gxCCh5Ifsgee2gw1e2HeT0Hjzv6qdj4rbuDezGgi/M4PnBluubi4sHjxYjvARgi8lo/H0mWkd0/jYFzKFavw8PNnx3rZZDYuzoeYlAAObqqmx+Y/cEQJtZrgX/0KU10d7a/KbTkeSX6EPnMfLx5+0R449UYImiInf9MA8UGe3DIrgrcKqjl5umd0Bu/UNyo7OxuLxcKSJUsA6D9jHltqN4/193Sz77MPiZkxi9CJiRgHzOR9VEZAhCcJQ5zGTjloImh/9TVM9fUE/fIXCJWKI61H+LLiS25PHGIes1pk1oBvNKR8D0mS+OMXxwjx0nL//IvfojYpKYnw8HCys7MxGo24RHujneBDd24t1n4zYDeZNVWUcSJvBwCzr41FkmDvJ47dc989PR3PJUtofeFFzC0txPrEclXMVbxz4h0ae23JVKGEJX+EzhrY9wIAjy6Kx1Or5g9fHHXojfexpPr6eoqLi0lLS8PPzw/JZKFrcxXqce64TbNXABV8+gGmgQEybrwdgOKtNeg7DcxdE4/CQYAz31UOlwhMTc20vvACHosW4p6efn7zWMn70HxMXi5Qqtl05DQldV08ujgeN43qoo9bCMHixYvR6/Xk5+cDoFs6HmnATM8Ou5M4IWM+geNj2PWubDLT+bsybXE4pfubOF3Rdb7TO4QCf/wjJKOR1n+uA+CBaQ8gEGe3nohZAHFLYOfj0NeOj7uGtYviyCtrY+uxpvOc2amLpTPlou7u7mRmZgI281inAa8V0YPmdK2eHQAAIABJREFUse7WZoo3f0FiZhb+4ZH09xgp3lZLTHIAITGOA5z5rnK4RNDy5JNgMhH0058CsL12OweaDvDDqT/EQ2OrIDANwPY/Qcg0SLwai1Xi8S0niQlw59rk0SNpRkREEB8fT15eHv39/WjGyfb5XpuLEmwms9vuoqe1haKvPgcgZWkkbl4adr13CsmBYe2ayEi8r7uOjg8+wFhXR7B7MDdNvIkN5Rso7RjSVuKMyWyHbDK7NS2S2EAP/rTxOAaz4268jwUdOXKE2tpaFi5ciFartZvHJvqijfUejNvzgVxKPXv1zQAc2FyN2Wgh9arobzyvo8uhEkH/4cN0ffopvt+7HU1ExKB5LMoriuvir7MHFr4st5JY9DtQKPj4YB3lLXoeWzIBlXJ0f2VZWVkYDAZ7d9Il45GsEt3Z9g3hiMlJRE9PpeAT2WSm0apIvyaG5uoeTu47PVpDHxPyf+CHCIWC1mefA+DuKXfjrnbnmYPP2IMCEyDldtj/IrSVo1Yq+M3KRKrb+ngtr2p0Bu4URqORrVu3EhwczLRp04Ah5rEV9gqg1tpqju3IYdrSlej8A+lpH+BIbj0T0kPwDXEfreGPaTlUImj+699Q+vvjd999AHx46kOquqv48fQh5rGBLnlZIHo+xCzAYLbw1LZSpoR6sWzyd8dPjpSCg4OZMmUK+fn59PT0oPLV4jErBH3haUwtfYNxmTffKZfOffgOABNSgwkcryP/k3LMDlxOqg4KwueWW+jasAFDaSneWm++P/n75NblcrDpoD1w/i9B6SJvHAPz4gPImhjIszlldDjLSUdF+fn5dHd3s3z5chQKhc08dhr3mcGD5jGA3e++gVqrZdbVciFI4ZeVSEjMvGL8KI187GtYEoEQYpkQ4qQQokwI8fNveP1HQohjQogSIUS2ECJyyGsWIUSx7WvDuccOp4J//zvG/fnPKD08ZPNY8T9JDU4lMyzTHrTnWehvl2cDwDsFNdR39vOTpRPGjANx/vz5WK1Wdu7cCYBnVjhCpaR7c9VgjF9YOEkLl1GyTTaZCYVg9rUx6LuMHN7xjVhoh5HfD+5G4eZG89NPA3BLwi0EuAbw1MGn7BvCnkEyyez4BtlLAvx8+UT0RjPrdjj2xvtoqK+vj7y8PCZMmEBkpHz76PqqCqFSoFtkbxhXf+IY5YUFpK66HldPHZ1NfRzfe5rJc0PR+Tlud9F/pwtOBEIIJfA8sBxIBG4SQpxbZF8EzJAkKQn4EPjbkNf6JUmaZvu6ihGUS0wMHnMzAHj58Mt0GDrONo/1nIa9z8Oka2FcMnqDmee2l5EW7cvcOP+RHNp/JD8/P5KTkzlw4AAdHR0oPTR4ZobSf6QNQ429v9DsG25GpdGw6+3XAAiN9yE80ZeDm6ox2iqNHFEqHx98v38nvduy6S8pwU3txn1T76OouYjc2lx74BmT2WbZZBYf5Mk100J5bU8VTd2OW447GtqzZw8Gg2EQP2mo6LKZx8JQetrIY5LErndew93bhxQbjL7g8wqUagXTh0Drnfq6hmNGkAqUSZJUIUmSEXgXWDU0QJKk7ZIknVm3yAdGFQra2NvI+mPruTL6ShL9huSsHX8DixGyfg3Aq3mVtPYa+emyiWNmNnBG8+bNQ6FQsH37dgA85oai8FDTvalq8FPtGZNZ2f586o4dASBtVTQDehPF2Y7NLPD93h0ofX1pfvJJAK6Ju4ZIXSTPFD2DxWpbOtO4y38L9YVwVO5NtHZRPBarxLM5TmbBxVJPTw/5+flMmTKF4OBg2Ty2sQKllwaPDHvxRsXBfdSfOEb69Tej1mppqemhrLCZaQvDcdNpRvEdjH0NRyIIBYbeVepsz51PdwFfDflZK4QoFELkCyGuPt9BQoh7bHGFLS0tFzTgbzSPtZXDwdflTpR+MXT2GfnXzgoWJQSREjF8rIHhkk6nIzU1lZKSEpqamlC4qNBlRWCo6MJwqmMwLmXFVbLJzEYyC4zUEZ0cQPG2GgZ6HZfEpfRwx//ee+jbm49+717UCjUPJT9EWWcZn1d8bg+cehMETZb3CswGIvzcWDMznHf31VLT1nfe8zs1fNq1axcWi2Wwu2h/SQumul50S8ajsLWPtlot7Hr7dXxCxjF5wWIA8j+rwMVNxbRFF04OvNx1UTeLhRC3AjOA/xvydKQkSTOAm4GnhBDf6NaSJOkFSZJmSJI0IyDgv+/xc7TtKF9UfMFtibcR4hFifyHn/4FSIyMogXU7Kug1mHls6X8Pox9pZWRk4OLiMjgrcE8NRumrpWtT1WCZqNpFS8aa2zhdXsqJvbsAmHVlNCaDhYObHbuhmveNN6IKCaH5SXlvYEnkEib5TeL54ucxWGxgn0GTWTXsk13ID2XFoVQInsp2tp4YaXV2dlJYWEhycrLNPGala1MV6hB33JLt5rHju3Jpq6thzprbUapUNJR2UnO0jZSlkbhcAEfcUTQciaAeGJpyw2zPnSUhxCLgV8BVkiQN4rMkSaq3fa8AcoHkYRjTN0qSJJ4ofAJfrS93Tb7L/kJDERz92LYmHERz9wCv7alk1dRxTAzWjdRwLlhubm7Mnj2bEydOyA3pVAq8lkRiatTTX2KfNSXOXUDA+Gh2v/M6ZqMR33HuTEgNpiS3Dn2n45LMFC4uBDzwQwZKSujNzh5sSHdaf5p3T7xrD4zJgthFMrymr51gLy3fmz2eT4rqOdXkbD0xksrNzUUIwbx58wDo3WMzj11hN4+ZjUby3n+ToOg44tPmIEkS+Z+V4+alYcqCUV2FvmQ0HIlgPxAnhIgSQmiAG4Gzqn+EEMnAv5CTQPOQ532EEC62x/7AHGDE0FA76naw//R+7p96v908BvK039UXZsvQ+mdySjFbJB5dPHZnA2eUlpaGm5sb2dnZALgmBaAOcadrSzWSWe6lLxQK5t92F90tzRRtkpc9Zq6MQrJIFG6sGq2hjwl5XX01mqgomp96CsliYVbILGaPm82Lh1+kxzjkJr/4j2DogZ3yZPa+eTG4a1T8fYtzVjBSamlp4dChQ8ycORMvLy8sehPd22u+Zh47tHUjPa0tZN4iN5arPtJGY1kXM1eMd3jy2HfVBScCSZLMwIPAZuA48L4kSUeFEH8QQpypAvo/wAP44Jwy0QSgUAhxCNgO/EWSpBFLBK8ceYXxuvFnm8fKt0NFLmQ+Blod1W163t1Xy5qZ4UT6jX3ziYuLC5mZmVRWVlJRUYFQCLyWR2FpH0BfYG9IFzF5KtEpMyn45H0GenvxCnAlMWMcx3Y30NXSP4rvYHQlVCoCHnkYY1k5XZ/LSXJtylq6DF28euRVe2BQIiTfKi8PdVTj667h7rlRbDp6mpK6zlEa/eWt7du3o1armTtXZoP0bK9FMljwGlIBZOjTk//xe0QmJRMxeSqSVaJgQwU6f62zsdx/oGHZI5AkaaMkSfGSJMVIkvQn23O/lSRpg+3xIkmSgs4tE5UkaY8kSVMkSZpq+/7ycIznfHpu4XM8Mf8J1ArbmqHVKs8GvMJhhrxU9NS2UlRKwcML40ZyKMOq6dOno9PpyM7ORpIkXOK8cYn2ojunFqvBXiY6Z81tGPr0FH7xMSCTzIRSsP+LytEa+piQ55IluCQm0Prsc0hGIwl+CSwfv5z1x9bT0jekMGHez0EoBltP3JURhY+bmseds4JhV2NjI8eOHSMtLQ13d3fMXQZ68xtwSwlCHWT/gLZ/w8cM9PYw9+Y7ACg72ExrbS+pV0ajHEFo1OUmh/pN6TQ64n2GLPcc+xQai2UOsVrLidPdfFpcz/dmjydoBDjEIyW1Ws38+fOpr6/nxIkTtjbVUVj1Jnp32bdrAsdHMyF9Lgc3bqCvqxN3bxeS5odxct9p2hp6R/EdjK6EQkHgo49iqq+n44MPAHgw+UHMVjPrDq2zB3qFwsy7ZGxpaymeWjX3z49h56kW8ivaznN2p/4b5eTkoNVqmT17NgA9OTUggW6hnTzW29HOgY2fMmF2JkFRMVgsVgo2VOAX6k78zKDRGvolKYdKBGfJYoKcP0JgIiStAeDxzafw0Ki4f97FbzN9oZo6dSp+fn7k5ORgtVrRhHviOsWfnp31WHrtLRFmr74Fs9HIvs/kG17K0kjULkr2fe7YswL3jAzcZsyg9Z/rsPb1EaGL4Pr46/mo9COquqrsgRk/ApUr5P4ZgNvTxxOkc+HxzSedbaqHSdXV1ZSWlpKRkYFWq8Xc1o9+fxPuM4NR+do/oOV/9C5Ws5mMNbcBcGJPI13N/cy6yr6R7NR3k+MmgoOvQ3sFLPwfUCg5WNPBtuNN3JMZjbfbpWc+USqVZGVl0dLSQklJCQC6JZFIZgs9OXabh++4MBIzsyjespGetla0HmqSF0dQUdRCU1X3+U5/2UsIQcCPHsXS2kr7ehlhee/Ue9EoNTxb9Kw90CMA0u6DIx/B6SNo1UoeyoqjsLqD3JMX5m9xSq7sy87OxsPDg9TUVEBuLIdCoMuyFyd2NNZTkr2JpEXL8A4OwWyysP/LKoKjdYxPGjtdAC4VOWYiMOrldd6IdIhfiiRJ/N+mk/h7aPh+xqXLMU1ISCAkJITc3FzMZjPqADfcZwTTW9CIuc2+IZx+/U1IVon8j+USyalZ4Wjd1RRsqBitoY8JuaWk4DFvHm0vv4ylqwt/V39uT7ydLdVbONp61B44+yFw8ZJblQOrZ4QT4evG41tOYnXgNt/DofLycmpqasjMzESj0WBq7qOvqBmP9BCUOpfBuN3vvYlKrSHt2hsBOLKjHn2ngbRVMWOuC8ClIMdMBPn/gN4mWPR7EILdZa3srWjjgQWxuLtcfOjMcEmhULBw4UI6Ozs5cOAAALpFEQiFoHur3TzmFRjElIVLObJ9K51Np9G4qkhZFkntsXbqT3ac7/QOoYBH12Lt7qbt5VcAuGPSHfi4+PDkwSftQa4+cjI4uRHqCtGoFKxdFMfRhm6+OuLYbb4vRGdmA97e3qSkpADQvbUaoVbiOd8+G2iqKOPU3l1MX3k17t4+GPvNHPiqmvAEH0InjL0uAJeCHC8R9LVD3jMwYQVEzJJnA5tPEurtys2zIv798WNcMTExREZGsnPnToxGI0qdCx5zQuk71IJxyIZw2jWrUSiU7P1QBnhMmReKu5eG/M8qHHqtWztxIrorrqB9/XrMLS14aDy4J+keChoL2NOwxx6Ydh+4+cmOdGDVtFDiAj14YutJzBbrKI3+0tbx48dpbGxk/vz5qFQqjPW99B9uxSNjHMohoPmdb7+Gq6eOGSuvBaA4u5YBvYm0qy+9vb2xIsdLBLueAGMvLPwtAJuPygjKRxbF4aK69M0nQggWLlyIXq+noKAAAM95YQitiq5NVYNxHr5+TFu20mbNr0WlUTLjiihOV3RRfcSxK2ACHn7oLKTl6gmrCfUI5akDT2GVbDd5F09547hiO1TtRqkQ/HjJBCpa9Hxc5Nhtvv8bWa1WcnJy8Pf3JykpCbDNBlxVeM61u4OrSoqoOVxM2rVrcHFzo7/XSPG2GmKSAwiMHLtdAMa6HCsRnIGST70ZAhNsCMpTo46gHG6di7RUuKrQLQjHcKqDgXK7+WnmVdehcnFhz/vy5mjCnBB0/loKNlQ4kZZDkJYapYYHpj3A8fbjbK7abA+ceZfcpjr7jyBJLJ0URFKYF09vK3UiLf9DlZSU0NrayoIFC1AoFBiquxk40Y5nZhgKV3m5VrJa2fX2a+gCgkhavAKAA5uqMRucCMoLlWMlgu1/BgQs+AUAHx+so6y5d0wgKIdbWVlZDAwMDCItPdLHofTSnN2mWufF9CtWcaogj6bKcpRKBalXRtNa20vZweZvO/1lr3ORliuiVhDnE8ezRc9istq6tqpdIfMnUJsPZdsQQvCTpROo7+zn3X2O3eb7P5HZbCY3N5eQkBASEhIA6N5ShcJDjccQd/CpgjyaK8uZs/oWVGq1HUGZFuxEUF6gLq+7379TeCrM/xl4hY05BOVw61ykpVDLJCdjbQ8Dx9sH42asvAatu8fgrCBuZhC+49zZ93klVgde6z4XaalUKHkk+RFqe2r5tOxTe2DybeAdIXtSJImMWH9mRfnybE4ZfUbHhf/8JyoqKqKzs5OsrCwUCgUDZZ0YyrvwnB9ubzNtsZD33pv4h0cyMUNuQFe4sUpGUK68dCv9xoocKxHMuBPm/hgYmwjK4da5SEu3lCBU/q50b6keXPpxcXNnxlXXUXFwPw2njqNQCGZdFU1nUx8n8h27AuYM0rLlGRlsnxmWSVJAEv869C97m2qVBub/AhoPwfHPB2cFrb0GXttTNXqDv0RkNBrZsWMHERERxMbGIkkS3VuqZOjMLHub+GM7c+horGf2mltRKJQygnJPoxNBOUxyrERg01hFUA63zkVaCqVAtygC02k9/YdbB+NSll2Jm5c3u99dD0DUVH8CIz3Z/2UlFpPjzgrOIC17tm6jv6QEIQQPJz9MU18TH5z8wB6YtAb842VfgdXCjPG+ZE0MZF1uOV39jgv/+S7av38/vb29LFy4ECEEAyfaMdb04LkwAqGWb08Ws4m9H71DUHQcsTPSACeCcrjlkIngDILyJ0vHHoJyuHUGaZmbmwvIbapVQW50b61GstjgNVots66+gdqjJVQfLkYIQdrVMfS2Gzi627ErYM5FWs4KmUVqcCovHn6RPpONUKZQyv2qWk7A4Q8B+PGSeLoHzLy407FNet+mgYEBdu/eTWxsLJGRkUhWie4t1Sj9tLhPt/cKOpy9he6WZjLW3IoQgpZaGUE5NSvMiaAcJjlcIrAjKAOZHnn5m0/OIC0PHTpEc3Oz3KZ6SSTm1n76ipoG45IWLcfDz5+899YjSRJhE30IneBN4cYqTAbHrYA5F2kJ8FDyQ7QPtPP2ibftgQmrIGgK5P4vWExMGufFyqQQXsmrpLXXceE/36a9e/fS399PVlYWAP1HWjE16tEtikTYijdMhgHyP3mP0ImTiJwqm8wKbAjK5MWXvu9nrMjhEoEdQTlhtIdy0XQGaZmTkwOANtEPdZgH3dtqBuE1Ko2G9OtupLH0JBUH98uzglUx9PeYKNnu2BUw5yItpwVOIzMsk1eOvEK30dafSaGQQfcdVVAkb7w/ujieAZOFf2wvH73Bj1Hp9Xr27t1LYmIi48aNk2cDW6tRBbrhNtWOoi3eshF9RzsZN96GEIKGsk6qjzgRlMMth0oElwqCcrj1NaSlEHgtGY+l04B+v31DeNK8RXgHhcizAquV4Ggvxk/xo2hLDQN6x13rPhdpCfKsoMfYwxtH37AHxi+FsJkyxcw0QEyAB9dPD+PN/GrqOx0X/vNN2r17NyaTiQULFgDQV9SMuaUfryWRg51Djf197PvsQ8ZPTSEsYbKMoPzUiaAcCQ1LIhBCLBNCnBRClAkhfv4Nr7sIId6zvV4ghBg/5LVf2J4/KYRYOhzjOZ+ezSm7ZBCUw61zkZYucd5oxutkeI1RXvpRqlSk33AzLdWVnCqQ/QezVkVj6DNTvLVm1MY+FnQu0nKi70SWRC5h/bH1tA/YynGFgKzfQHc9HJDpZmcAR89ml47W0Mecurq62LdvH0lJSQQEBCCZrXRvq0Yd6oF2kt9g3IGNnzHQ082c1bcCUHO03YmgHCFdcCIQQiiB54HlQCJwkxAi8Zywu4AOSZJigSeBv9qOTURmHE8ClgH/sJ1vRDTO25W75kZdEgjK4dbXkJZC4LV0PNYeI/p8O9Jy4pxM/MIiyHv/LawWC/5hnsTNCOTQ9jr6uo3fcoXLW0ORlt1ffgnAA9MeYMAywCuHX7EHRs+DqExbKxM9YT5u3Dwrgg8O1FHZqh+l0Y8t7dy5E0mSmD9/PgD6wiYsHQZ0SyIHizf6e3so/PwTYmemERwbL3fL/azciaAcIQ3HjCAVKJMkqUKSJCPwLrDqnJhVwOu2xx8CC4X8L74KeFeSJIMkSZVAme18I6L758fwi+UJI3X6Ma+vIS2jvHCJ96EntxbrgGx+UiiUzFl9Kx0NdRzfnQtA6pXRWExWDgzpVeSIOoO0bLEhLaO9o1kZvZJ3T75Lc98QJ3bWb0DfAgVyr6IHFsSiUSp4cqsTadne3k5RURHTp0/Hx8cHyWShO6cGTaQObby9eKNww0cYB/oHZwPlRS0ygnJllBNBOQIajt9oKDB0N7HO9tw3xthg912A33c8FgAhxD1CiEIhRGFLixMA8t9IrVYzb9486uvrOXVKvil5LYnE2memd0iZaGxqOkHRsez54G0sZhPeQW4kpAdzZGc9Pe0DozX8UZdQKAh85BFMtbV0fixzn++beh8Wq4UXSl6wB4anQtxSyHsa+jsJ8HThzjnj2XCogWMNjgv/AcjNzUWhUJCZmQlAb34j1m4jXkvtswF9ZwcHN33OxNmZ+EeMx2pDUPqEuBOXevl1ARgLumRSqyRJL0iSNEOSpBkBAQH//gCnvlHTpk3D19eX7OxsGWkZ5ok20Y+eXfVY++QNYSEEc9bcRndLE4dztgIw4wrZxu/ooHv3zExcU1Joff4fWAcGCPcM59q4a/mo9CPqe4d4LrJ+DQNdsPd5AO7NjMFTq+KJLSdHaeSjr6amJkpKSpg1axaenp5YDWZ6cmtxifPGJdp7MG7fpx9gMZmYfcPNAJwsaKKzqY+0q6JRODCC0tTcTNOf/4K5Y/iZIcORCOqB8CE/h9me+8YYIYQK8ALavuOxTg2jlEolCxYsoLm5maNHZeqW15JIJKOFnp11g3Hjp6YwbkIiBR+/i8lowNNXy5TMME7sbaTjtOOudQshCHx0LeaWFjrekn0E9yTdgwIF/yz+pz0wJAkSr5YhSPpWvNzU3DcvhuwTzRyobj/P2S9v5eTk4OLiwpw5cwDozWvAqjfjtWT8YEx3awuHtm5k0rxF+ISEYjFZ2f9FJYGRnkRNu3y7AHwXta1bR/tbb2Ht6Rn2cw9HItgPxAkhooQQGuTN3w3nxGwAvmd7fD2QI8ktMDcAN9qqiqKAOGDfMIzJqW/RpEmTCAoKYvv27VgsFtTB7rhODaA3rwFLj7whLIQg48bb6O1o59CWjQCkLItEqXGC7t1mzsQ9I4O2F1/E0ttLkHsQayau4fOKz6noGuIkXvBLMPXBbtmVfMfs8fh7aPjbJscD3dfV1XHy5Elmz56Nm5sb1j4TPTvr0Cb4ogn3HIw7g09Nv15GUB7d3UBP+4DDIyiNtbV0vP8B3jdcjyZi+I10F5wIbGv+DwKbgePA+5IkHRVC/EEIcZUt7GXATwhRBvwI+Lnt2KPA+8AxYBPwgCRJjmtjvUhSKBRkZWXR3t5OcXExALpFkUgWKz1DzGPhiVOITEpm36cfYOzvw02nYdrCcMoONNNSM/yfSi4lBaxdi6Wzk/bX5BqIuybfhYvShX8U/2NI0AS5D9H+l6C7EXcXFQ8uiKWgsp1dpa3nOfPlqezsbNzc3EhLk3sF9eyqRzJY0A2ZDXScbuDI9q0kLVqOzj8Qk8FC4VdVhMZ7E5Zw+XcB+Da1PPssQqXC/777R+T8w7JHIEnSRkmS4iVJipEk6U+2534rSdIG2+MBSZJukCQpVpKkVEmSKoYc+yfbcRMkSfpqOMbj1L9XfHw8YWFh7NixA5PJhNrfFbeUIBl032nfEJ6z5lb6e7o5uFGe5E1bHIGLu4r8zxzbLes6eRKeixfT/uqrmDs68HP149aEW9lctZmT7UP2Aeb9DKxm2WQG3DQrglBvV/5vs+PMCsrLy6msrCQzMxMXFxcsvUZ68+pxTQpAM4QjsPeDt1Gq1My6ZjUAJdtr6e82MsvBZwMDp07R/fkX+N56C+qgwBG5xiWzWezU8OoM0rK7u5vCwkJABt0D9OTYZwUhsROImZFG4RefMNDbi4uripSlkdQcbaeh1MFB9488jLWvj7YXXwLgjsl34Knx5Lmi5+xBvlGQcjscfAM6qnBRKVm7KI7D9V1scgDQ/RkgvZeXFzNmzACgJ7cOyWQd/HsDaK2t5njeDpKXrcTd2wdDn4miLTVETvEjJMZrtIY/JtTy9DMoPDzwu/vuEbuGMxE4sKKiooiOjmbXrl0YDAZU3lo8ZoWgLzyNudXeEmHO6lsw9PdR+IVcMpk0P0wG3X/q2KB7l9hYvK66io633sLU1IxOo+OOSXeQW5fLoZZD9sDMn4BQwI6/AXBtShixgR48vuXyB92fOHGChoaGQSC9uctAb34DbilBqAPcBuP2vP8WGq0rM6+6DoDibbUY+szMcnAEZX9xMb3Z2fjd9X2U3t7//oD/Us5E4ODKysqir6+P/Px8ADwXhCOUCrq3VQ/GBERGMSF9Lgc3bqCvq3MQdN9Y7gTd+z/0IJLVSus6uWLo1oRb8dX68mzRs/Yg3TiYeTccegdaTqFUCB5bEk/5ZQ66/yYgfU9ODUigW2ifDTRVlFG6bw/Tr7gaV08dfd1GirNriZ0RSMCQjWRHkyRJND/5FEo/P3xvu21Er+VMBA6usLAwJkyYwJ49e+jr60PpqcFj9jj6DrVgGlImOvuGWzCbjBR8KgNZEuaEoAtwJf8zBwfdh4Xhc8P1dH7wIcbaWtzUbtw1+S4KGgvY1zikAC7jUVC5ym2qgaWTgi970H1JSQktLS0sWLAApVKJua0f/f4m3FODUflqB+N2v7cerYcn06+4GoCDm6qxmKzMutKxZwN9e/fSV1CA/733onB3p3OgkxdKXrBzMIZRzkTgFFlZWRgMBjvoPjMMoVHStdU+K/AdF8qkeQs5tOVLulubUSoVzLoyira6XsoOODbo3u+++xAqFa3PyXsDayauIdAtkGeLnrUvnXkEQNr9cPQTaCg+C3T/dsHl19DPbDazfft2QkJCSEyUW491b62WKXkL7LOBuhNHqSo+wMyrrsPFzY2e9gEO76xjYnow3kFu5zv9Za8zswHVuBC8b1wDwEuHX+L54udp6G0Y9us5E4FTBAUFMWXiuq2IAAAgAElEQVTKFAoKCujp6UHprsZzbigDR9sw1tnLRNOvvxmEYM/7spEqbkYQfqHuFGyowHKZr3V/m9SBgfjccjNdGz7HUFqKi9KFe5PupbilmF31u+yBcx4GVx/I/j0AGbH+pEf78VxOGXrD5QW6P3DgAF1dXYMISmNDL32HWvCYMw6ljSomSRJ5767H3duH5GUrASj8UvaozLzCsYH0PVu3MnD4MAEPPIhCo6Gxt5F3TrzDldFXEusTO+zXcyYCpwA76H7XLvnG5ZERisJNRdcW+6xA5x9A8rIrObYzh9aaKoRCMGtVDF0t/ZzY03i+UzuE/O4+G3R/Tew1hHqE8lzRc1glW5LUesHcx6A8Byp2yLOCZRNo0xt5ZfflY9IzGo3s3LmTyMhIYmJiAOjeXIXQqvCcZ28kUH24mLrjR5h1zWrULloZSL/3NJPnhuI5ZOnI0SRZLLQ8/Qya6Gi8VslWrH8ckv0pD0x7YESu6UwETgF20H1hYSEdHR0obP9pDac6MFR1DcalXn0DGldXdr0rA1nGT/EjOFrH/i+rMBsvz7Xu76KzQPeHD6NWqvnhtB9yvP0426q32QNn3g26MNj2PyBJpET4sDgxiBd2VtChvzzafOfn56PX61m0aJEMpC/vZOBkB7oF4ShcVcCZ2cAbePoHMGXhMgD2fV6BUiUcHkjfteFzjOXlBDzyCEKloqyjjA3lG7hp4k2EeISMyDWdicCpQWVmZiKEYMeOHQC4p4eg8FTTtblqcK3b1cOTmVddR8WBfdSdODoIutd3Gji84/KtgPku8v3eHSh9fGh56mkAroi6gmivaJ4vfh6L1ZYk1Vq59URDERz7FIDHlkyg12hm3c5L36TX399PXl4e8fHxhIeHI0kSXZuqUHpp8Ei3cwTKCws4XV5K+nU3oVKraa3robSwmalZ4Q4NpLcajbQ++yzaSZPwXLIYgKeLnsZN5cbdU5w+Aqcugry8vAZB9y0tLSg0SnQLIjBWdmMo7RyMS1lxFe4+vux6+3UkSSI03ofwRF8ObqrG2H95rXX/J1J6uON3zz3o8/LQF+xDqVDywLQHqOiqYGPlRnvg1BshIAGy/wgWExOCPbl6Wiiv5VXR1H1pt/nOy8vDYDAMAukHjrZhqu2RgfRq+XYjWa3kvf8mPiHjmDRvIQAFGypxcVMxzcGB9J3vvY+poYGARx9FCEFRcxG5tbl8f/L38dY6fQROXSRlZGSgVqvZvn07AO6pwSi9XejaYp8VqF20pF93Ew0nj1FxUC6RTFsVzYDeRPG2y68C5j+Rz003ogoKouUpGXS/KHIRCb4J/KP4H5isNu6zQgkLfwvt5VC0HoBHF8VjsUo8cwkjLXt6esjPz2fKlCkEBwcjWSS6NlehCpTbl5zRyb27aK2pIv2GW1AolZyu6KKqpJVpiyPQujsukN6q19O6bh1uqam4z5mNJEk8deAp/F39uSXhlhG9tjMROHWW3N3dSU9P59ixYzQ0NCBUCnQLIzDV9TJwzN4+efKCxfiEhLLr7dexWi0ERuqISQmgeFst/T2Xx1r3fyOFVov//ffTX1RE744dKISCB5MfpK63jk9KP7EHTlgO4WmQ+1cw6onwc+Om1Aje219Lddul2eZ7586dWK1WO5D+QJMMpF8ahVDKvYKsFgt7Pngb//BIJqbPHQTSu3qqSXJwIH37+jextLUR8OhaeYm2bgcHmw9y/9T7cVOPbCmtMxE49TWlp6ej1WrJyckBwC0lCJW/K91bqwbNY0qViowbb6OtroZjO+XZw6yrojEbLRzYVH3eczuCvK+7FnV4OC1PP4NktTI3dC5TA6byr5J/YbAY5CAhYPHvofc05Muu5IeyYlEpxSWJtGxvb+fAgQOkpKTg6+uL1Wiha1u1jKBM9B2MO7ozm47GemavuRWhUFB3ooP6U51MXz4ejVY1iu9gdGXp7KTt5ZfxWLAAt+RkLFYLTx98mkhdJNfEXTPi13cmAqe+Jq1WS0ZGBmVlZVRX20xAiyIwne6jv8SOCY2bNYfgmDj2vP8WZqMRn2B3JqSHcGSHgyMt1WoCHn4Iw/Hj9GzejBCCh5MfprmvmfdPvm8PjEiD+OUy0rKvnUCdljtmR/HZoQZOnL60kJZfQ1DuaZARlMvGD3YONZtM7P3wHYJj4oidkTY4G/DwcWHy3G8k1DqM2l5+GWtvLwFr1wLwRcUXlHWW8VDyQ6gVtuUySYKekWlU6EwETn2jUlNT8fDwGATduyYFoA52o3trNZLNPCaEYO7Nd9DT1kLxli/l41ZGISENGoMcVboVK3CJi5NnBWYzqSGpzAqZxUuHX6LX2GsPXPhbMPTAricAuG9eNB4uKh7ffOkgLc8gKFNTU9HpdDJ0JrcO7URfXKLsnUNLtm2ip7WFOWtuQwhB5aFWmqt7mLkyCqXacW9FpuZm2te/iW7lSrQT4jFYDDxf/DyT/CaxJHKJPfD4Bnh6KtQfHPYxOO5v36lvlUajITMzk5qaGsrKyhAKgW5ZFOa2AfQF9k8lEZOnMn5qCgWfvI+hT4+nr5bJmaEc33uazqbh74lyqUgolQSsfQRjVRVdn30GwNqUtbQPtPPKkVfsgUGJMPUm2PcidNbi7abh3sxoth2/dJCWZxCUGRkZAHTvqEMymPFaNn4wxtCnZ+9H7xA+KYnIpGSsVomCDRV4B7kxMc2xgfRt69Yhmc0EPPQgAO+eeJdGfSNrp6+1cxjMRtj6P+AbDSFTh30MF5QIhBC+QoitQohS2/evYYSEENOEEHuFEEeFECVCiDVDXntNCFEphCi2fU27kPE4NbxKSUnB29ubnJwcJElCO8EHlxgvurdVYx2wl4lm3PQ9Bnp72L/hIwCmLxuPUq2gYEPF+U7tEPLIykKblETL889jNRqZ7D+ZFVEreOPYG5zWD5niL/il/D33LwDcOSfqkkFanougNHcZ6M1rwC05EHWwHTpT8OkHDPT2MO+2uxBCULq/ifYGPalXRqFQOu7n0UEE5fXXoYmIoMfYw4uHX2T2uNmkhaTZAwtfho5KWPxHuepsmHWh/wI/B7IlSYoDsm0/n6s+4HZJkiYBy4CnhBBDC2J/IknSNNtX8QWOx6lhlEqlYv78+TQ2NnL8+HGEEHitiMbab6Z7CNIyKCqGiXPmceDLz+jtaHciLW0SQhC49hHMDY10vifvDTyc8jCSJJ3dpto7HFJ/AIfehubjlxTS8lwEZffWapAkdIsjB2O6W5o5uPEzEucuICgqBovFyr7PK/AL8yA2ZWSIW5eKBhGU9/8QgFePvEqXoYu1KWvtQf0dsOOvEL0AYheOyDguNBGsAl63PX4duPrcAEmSTkmSVGp73AA0AwEXeF2nLpKSkpLw9/cnJycHq9WKJtQDt+RAevPqMQ/ZEJ6z+lasFjP5H70DDEVaOvaswC09HbdZs2hdtw5rXx+hHqHckngLn5d/zvG24/bAuT8GjQdk/wG4NJCW5yIoTc199B1owiMtBJXPkDbT776BQDBnjdxT/3heI92tA6StikYoHBhBefJsBGVLXwtvHn+T5eOXk+CXYA/c9QT0d8KSP8rVZiOgC00EQZIknek2dhoI+rZgIUQqoAGGeun/ZFsyelII4fItx94jhCgUQhS2tLScL8ypYdYZ0H1rayslJSUANuC4oGtL1WCcd3AISYuWUZK9mfaG+iFIyzYahriSHU1CCALWPoKlrY329W8CcPeUu/Fy8eKJwifsN3k3X7k76cmNUJM/5pGWZxCUOp1uEEHZtbkKoVHimWV3B58uO8Xx3blMX3k1Ov8AzEYLhV9WEhztReRkv9Ea/phQyzPPoHB3H0RQrju0DpPFxIPJD9qDOqqg4F8w7RYInjJiY/m3iUAIsU0IceQbvlYNjZPkv+jzfnQRQoQA64E7JelMO0Z+AUwEZgK+wM/Od7wkSS9IkjRDkqQZAQHOCcXFVEJCAiEhIeTm5mI2m1F5u+A5N5T+4haMtfaln7Rrb0Sl1pD3nuyWnTI/DDcvDfmflo/ZT7UXQ27JyXgsWEDbyy9j6e5Gp9Fx39T7KDhdcHab6rQfgkcQbPsdSNKYRlqei6A0VHczcLQNz8wwlDZ3sCRJ7HjzFVx1Xsy86noAjuysR99lJO3qaIcG0p+LoKzuruaj0o+4Pv56InRD2mxs+z0IJWT9akTH828TgSRJiyRJmvwNX58BTbYb/Jkb/TcSSoQQOuBL4FeSJOUPOXejJMsAvAqkDsebcmp4dQZ039nZycGDcuma57wwFO5qOjfaucXu3j5MX3kNp/J3c7q8FLVGyUwn0hKAgLWPYO3upu0VuWJodfxqInWR/L3w75itto13jTvM+xnU7IVTm8cs0nIognLq1Km2xnKVKDzUeAzxA5QXFlB3/Aizb7gFFzc3jP1mDnxVTXiCD6HxX6srcRidhaC8/XYAni16Fo1Sw71T77UH1u6Hox/D7IdANw6zxcqB6o4RGdOFLg1tAL5ne/w94LNzA4QQGuAT4A1Jkj4857UzSUQg7y8cucDxODVCiomJITIykh07dmAwGFBoVegWyw3phraemLHyGlw9dex6+zXAhrT01zo80lI7YQK6FStof/0NTE1NqJVqHk15lPKucj4pG9J6IuV28I2RZwVWy5hEWp6LoBw42YGxshvdwggUGrmixWI2s/OtV/EdF8aULLkWvmhrDQN6E7NWxYzm8Edd+j17zkJQHm09yuaqzdyeeDv+rv5ykCTBll+De6C8ZAi8VVDDdf/cQ1HN8CeDC00EfwEWCyFKgUW2nxFCzBBCvGSLWQ1kAnd8Q5noW0KIw8BhwB/4fxc4HqdGSEIIFi9ejF6vH4TXuM8MRhXgStdXlYMmMxc3N9KuXUPNkUNUlRShVCpIvTLaibQEAh5dC2YzLX//OwBZEVmkBKbwfNHz6E22/kJKNWT9GlqOQ8l7Yw5pORRBmZCQgGSV6N5UidJPi3uq3Q9Qkr2JjsZ6Mm+9E6VKRXdbP0Vba4ibEUjQeN0ovoPRlSRJtJyDoHzy4JP4uPhwx6Q77IHHP4fafLm02MWTDr2Rv289xewYP6aFD38X0gtKBJIktUmStFCSpDjbElK77flCSZLutj1+U5Ik9ZAS0cEyUUmSsiRJmmJbarpVkqTeb7ueU6OrsLAwkpKS2Lt3L+3t7QilAq/lUZhb+9Hvs29oJi1egS4giF1vvYZktRI/04m0BNCEh+N75510fbaB/kOHEELw2IzHaBtoO9tklng1hEyD7f8LpoExhbQciqBUKBT0FTdjOt2H15LxCJsfwNCnZ+8HbxOeOIXoFHm1d+/H5Qgg/drhxyxeSurZupWBI0cGEZR7GvZQ0FjAD5J+gIfGQw4yG2VwUUACJMuVVk9tO0XPgInfXpk4InsrjuvkcOq/0qJFi1AoFGzZsgUAbYIvLtFnm8xUajVz1txKc1U5J/fuciIth8jvnntQBvjT9L9/RpIkpgRMYXnUct44OsRkplDIDem6aqHw5TGDtDwXQSmZrXRvqUYd6oHrFP/BuH2ffkB/T/egeayhtJOyA80kL4lwIiiHICitkpWnDjzFOPdxrJmwxh5Y+Aq0V8jlokoVp5p6eLOghptnRTAxeGRmU85E4NR/JJ1Ox9y5czlx4gSVlZWyyeyKaKx6Mz25dpNZwpx5BESMJ++9N7GYTXak5ReVGAccG14T+OiP6D90iO4vvgDgkZRHsEgWnit6zh4YPV82EO18HAa6zkJatvYaRmXse/bsQa/XDwLpe/MbsXQa5MZyNj9Ad2szB86Yx6JjkawSuz8oxcPHheSlkf/mCpe3uj755CwE5ZaqLRxvP86DyQ+iUdqobP2dsOMv8r9/7CIkSeKPXxzDXaPkR4snjNjYnInAqf9Y6enpeHl5sWnTprNMZj276zF3yiYzoVCQcfP36Gxq5HD2FoQQzLk+Dn2XkYMO3qba6+pVaCdPpvnxJwZNZrcm3MqG8g2caD9hD1z0O+hvh7xnAPjZson0myz836aL35Cus7OT3bt3k5iYSEREBNYBMz3ba3CJ9UYbZ68A2v3uetk8dqPNPLa3kZaaHtKviUGtGf7WCJeKLN3dNP/9SVyTk/FcshiT1cQzRc8Q5xPHiqgV9sAz5rHFsnks50Qzu0pbeWRRPL7uI4fwdCYCp/5jqdVqlixZQlNT02A5qW5pJCDo3my/yUdNm0FY4mT2fvQOxoF+gqO9mDArmKJtNXQ2O3BDOoWCoF/+EnNTE20vyTUVdyfdjc5Fx+OFj9s9F+OmwaRrIf8f0HOa2EAP7pwznvcP1HKo9uKa9M4sBS5ZIlcA9eysw6o/u7FcU0UZx3dtJ+WKVej8AzH2m8n/rILgaB1xM7/Va3rZq+W557B0dBD0618hhOCjUx9R21PL2pS1KM/0DuqohoJ1MO1mCEnCaLby/748TnSAO7enj+xsypkInPqvlJiYSGRkJDk5OfT396Py1uKZEUpfUTPGOtlkJoRg7k130NfVyYEvZVB7+rUxKJUK8j4sG83hj7rcUpLRXXEFbS+/gqm+Hp1Gx/1T76egsYDd9bvtgVm/BosRdvwNgIcXxuHn7sL/bDiK9SKV41ZWVnLs2DEyMjLw9vbG0mOkd3c9rkn+aMI8AZt5bP3LuOq8SF11AwCFX1XR320kY3W8Q5vHDKWldLz1Nt6rV+M6aRJ9pj7WHVpHSmAKc0Pn2gOz/yCbxxbI5rE39lZR2arnN1ckoh7hxnzORODUfyUhBMuWLaOvr48dO3YA4DnfZjL7snLwU+24+InEzkyn8POP6evuwt3LhRlXjKeqpNXhTWaBj/0YhKD5CZlFsDp+NRGeETxR+ITdZOYXA9PvgIOvQ1s5nlo1P18+keLaTj46WDfiY7RYLHz11Vd4e3szZ84cALpzapDMkq3ViKyKg/uoPXaY2dffjIubG10tfRzKqWViWrDDl4ue/tP/ovDwIGDtIwCsP7aetoE2Hp3+qD1B1h2AIx/C7AfBK5TWXgNPbytlXnwACyaOfGM+ZyJw6r9WSEgIKSkp7Nu3j9bWVtlktigCY2UXA8ftJrOMG2/HNGCg4BO5A+fUrHC8g9zY9f4pLGbHLSdVh4Tgd9dddG/8ir7CQtlkNl02mX1a9qk9MPOnoNRAzh8BuDY5lOQIb/666STdA6YRHWNhYSHNzc0sXboUtVqNua0ffcFp3GcGofZ3BWTz2I43X8VnXBhTFi4FIO/DMhRKBWlXO7Z5rGfLVvry8wl4+CFUPj50DHTw6tFXyQrPYlqgzU41aB4LgDlysnhiyyn6TRZ+szLhW84+fHImAqcuSFlZWahUKjZv3gyAe+rXTWZ+YeFMmr+IQ1u+pKu5CaVKQcbqOLqa+zmUXfttp7/s5Xf3XaiCg+VyUquVhRELSQ5M5rmi5+gz2fZRPIMg/QE4+gk0FKFQCH5/1STa9Aae2VY6YmPT6/Vs376d6OhoJk6cCEDXFhu6dKF9zfpwzhY6GurIvEU2j9WdaKfyUCvTl0Xi7n3ePpKXvaz9/TT99S+4xMfjs0YuD32h5AX6zf08kvKIPfDEl1CzZ9A8dqyhm/f213BbeiSxgZ4XZazORODUBcnDw4N58+ZRWlpKaWmp3WTWcrbJbPYNNyOEgrz35Q6ckZP8GJ/kT+HGKvSdo1MOORakcHUl8LHHGDh2jK5PPjnLZPbq0VftgbMfBldfmVIlSSSFebNmRjiv7amirHlkmA/Z2dkYjUaWL1+OEAJjfS/9h1rwyAhFqZMrWAx9fez54C3CEicTMz0Vq8XK7g9K8fTTMm1R+IiM61JR20svY25olDeIVSrqe+t57+R7rIpZRbR3tBxkMcHW34L/BEi+HUmS+MMXR/FyVbN2YfxFG6szETh1wZo1axa+vr5s3rwZi8WCNsEXTdTZJjNPP39SVlzF8V3bqT9xDICMG+KwWiT2fOLYG8e6K1bgmpxM85NPYentJSkgiWXjl/Hakddo0jfJQVodzPspVO6Q2w8Ajy2dgKtGye8/Pzbs3V3r6+s5ePAgs2bNIiAgAMkq0flZGQp3FZ7zwgbj9m/4kP7uLubdKpvHju1uoK1ez5zrYlGpHbdc1FhXT9tLL6FbsRz3VNld/bd9f0MplPxw2g/tgYWvQHv5oHls89HT5Fe086PF8Xi5qS/aeJ2JwKkLlkqlYsmSJbS2trJ//36EEHhfEWUzmdk3NNOuvRFP/wC2vfQ8FrMZrwBXpi0O51RBE41ljs0sCPrlL7C0ttK2bh0wxGRWPMRkNvMHEDQFvvoZGHrw93DhR4vj2VXaypZjTcM2HqvVyldffYW7uzvz5s0DQF94GmNND14rolFoVQB0t7Zw4ItPSciYT3BMHAN6EwUbKhkX5010smO3im/+619BoSDwJz8BYHvNdnJqc7hv6n0Eu9t6MvV3ynjSqEyIW8KAycKfNh5nQpAnN6VGfMvZh1/ORODUsGjChAlER0eTm5tLX18fmjBP3KYFnGUyU2u1ZN15H6211RzcKDeqnb5sPB4+Lux879RFK4cci3KdMgWvq6+m/fU3MNbUEOYZxs0Tb+azss842W4zkClVsPJJ6GmE7X8G4Na0SOKDPPjjF8cYMA1Pd9KSkhLq6upYtGgRWq0WS6+Rrq+q0ER54TYELZn33nokJDJulFspF35ZxUCfiYzVcQ5dLqrfs4eerVvxv/ce1CEh9Jn6+PO+PxPrHcvtk263B+7+u4yhXPInEIKXd1dS297Pb69MRHWROc7ORODUsEgIwdKlSzEYDGzfvh0A3dLxgHSWySx2xixiZqSx58O36W5pRu2iZPZ1sbTW9nI8r2F0Bj9GFPDoo6BW0/Q32TPwg6QffN1kFj5TLict+Cc0HkKtVPC7KydR19HPCzsvHAs6MDDAtm3bCA0NZerUqQB0baxEMlrwuSZ28AbfVFHGsV3bSVmxCl1AIB2n9RzOrSNxzjgCwi/OBudYlGQycfpP/4va1mAQYF3JOhr1jfw67deoFbblno5qyF8HU2+CkCSauwd4fnsZixODmBPr/y1XGBk5E4FTw6agoCBmzJhBYWEhTU1NqHy0eM6xmczq7Y1ls+68B4Cc1/4FQOz0QMbFeZP/aQUD+pEthxzLUgcF4n/vvfRuy0afn4+Xixf3Jd1HfmM+eQ159sBF/yNvHH/xKFgtzI71Z8WUYP6RW0Zdx4U5tnfu3Elvby8rVqxAoVAwUN5J38FmPDPDUAe6Af+/vfMOj6ra+vC7Z5JJ7xXSCGmE3kILNXTpTcCGCirqp6JXioWi13rxKthFUFAQEER6CU1AIKFDEmpICEkI6b1O2d8fM5KIQfFGkkDO+zzzzJlz1pmzzp6Zs+b81t57VVYes7S1o/NI4+Cxg2vjMdOo6Dy8aY2Of7eTs2IFFZcv4/HKLFQWFlzMvcj3cd8zKnAUHTw6VBruMdUfjngdgP/suIBWb+C1+2qnu+jNKIFA4R+lT58+WFhYsGPHDqSU2PXxQWVtRv6Wykpm9q7udBv3IJePRRN/NMo4Anl8MOUlWo5sqrvZNesDzo9Owtzb29idVKdjfMh4fOx8fj/IzMoJBr4Dqcfh+FIAXjVdQN7dev4W7/zXZGZmEhUVRbt27fDy8kLqDOStj0ftbIl9RGUPoMSTx0iOO0O3sROxsLYhKTabpNhsOg7xx9r+zs2HU9/RZWWR9eln2PTogW2fPhikgbei3sJWY8uLHV6sNEw9DjFroKtx8NiZlDzWHk/h8XB/mrja1InvNQoEQghnIcROIcQl03O19eeEEPoqRWk2VlnvL4SIFkLECyFWm6qZKdzFWFtb07t3bxISErhw4YJpkJkf5Qn5lJ2vHGTWfvBwXH2bsOfbr6goK8XV25aWvbyJ3ZdCVkrDLUuhsrDAffp0yi9eJG/NmhuDzOLz4tkQX6UAYOv7oUkPY03bogy8nax5ulcgW2LSOBSf9bePK6Vk+/btmJub07dvX8A4n5AusxSnEQEIUw8gg17PvuXf4NTIi9b9BqPXGzi49hIO7la07uP9Z4e458n48CMM5eV4vPIKQgjWx6/nZMZJXurwEk6WpkujlBA52zh4rPs0Y3fRTWdxtdXwfxF1V6uhpncEs4DdUsogYLfpdXWUVilKM7zK+veBj6SUgUAuMLmG/ijUA8LCwnB1dSUyMhKdTodNZ0/MXK2MWrNpkJnazIx+U56lMDuTw2tXAtBpmD8W1uYcWH2xQRe7txvQH+uwMDIXfow+P59+vv1o69aWT09VGWQmhDFxrCuFHca5aZ7q1RRvJyvmbYr728XuL1y4wOXLl+nTpw+2trbosksp2JOMVStXLEOcb9jF7NlBTmryjcFjsb+kknu9hPCxQajNGq7AUHrmDPnr1uH8yMNYNPUnpyyHD49/SHv39owIHFFpeGErJB2E3q+AhR2bzqRxLCmX6QNDsLOsve6iN1PTT24EsMy0vAxj3eHbwlSnOAL4rY7x39pfof6iVqsZNGgQOTk5REdH/36Q2dHKQWZeIaG06juQ41vWk5mUiKWNOV1GNr1RyKShcqM7aUEBWZ9/bhxkFvYyWaVZLI1bWmnoGgTh0yDmR0j4BUtzNa8Pac7F9CK+j7r9qb61Wi07duzAzc2NsLAwpJTkbriMUAsch1Zq/sbBYz/gHdqSgI6dKS2q4OiWRHyaO9Oklcs/2AJ3F9Jg4Pq/30Lt5orr008D8OGxDymuKGZ2l9mohOkye2PwWDC0n0RphZ53t56jRWN7xnao28F3NQ0EHlLK30pOXQduNdespRDimBAiSgjx28XeBciTUv5WpSQF8KqhPwr1hMDAQIKCgm4kHy2bO6Pxt6cgMgl9UcUNux4PPIqljS07F3+GNBgIDW+Mm68dh36KR1teP4q11wWWoaE4jh1LzoofKE9IoI1bGwY2Gci3sd+SXFBlWo4eL4GTP2x+CbRlDGzhQY8gVz7ceZHs2yxgc/jwYXJzcxk8eGa71/YAACAASURBVDBqtZrSmCzKL+ZiP8APtUPlFBGH1/5ASX4evR56HCEERzYlUlGmp/vYht1dNP/n9ZTFxODx8suobW05ev0oGy5vYFKLSQQ6VZF7or6A7Hjo/yaozfhq/2XS8suYO6wFalXdtt9fBgIhxC4hRGw1jxFV7aTxXv5W9/N+UsqOwAPAAiHE356JSgjxpCmYHMvMzPy7uyvUAQMHDkSr1bJnzx6EEDiNCMRQridvw+UbNla2dvR6eDJpF88TszcSlUrQ4/4ginLLOb79St05Xw9we+F5VJaWpL/3HgAvd3wZM5UZsw/NxiBN0o+5FQz5wDg69eBChBDMHdac0go983f8dQGb/Px8Dhw4QGhoKE2bNsVQpiNvUwLmXrbYdml8wy71wjmOb91Am/6D8QwMJju1iLj9qbTs6YVz47pJcNYH9IWFZHz4IVZt22I/bBhavZa3ot7Cy9aLp9o8VWmYeRH2vAXNhkLwIK7llfLlvssMadWITv7Otz5ALfGXgcBUlL5lNY8NQLoQohGA6bna+3kpZarpOQH4BWgHZAOOQggzk5k3kPonfiySUnaUUnZ0c2vYoxbvFlxdXenUqRMnTpwgLS0Nc08b7Pv5UhqTRcmZymDevGcE3s1bcmDFUkry82gU6EhwZw9O7rxKfmbDLWBj5uKC6zPPULz/AEX79+Np48mMsBkcTz/OqvOrKg0D+xkL2Bz4L2RfJtDdjke7NWH1sWTOpPz5iO3IyEiklAwcaJw1tCAyCUNRBU4jAxFq479UbUU5O75YgL2rGz0ffAwpjeUnNVZmdBrqf8fO/24g69PP0Ofk4PH66wiVimVnl5GQn8CrnV/Fysw4OysGPWx4BjTWMORDEIL3t5/HIGHW4GZ1ewImaioNbQQmmZYnARtuNhBCOAkhLEzLrkA4cNZ0B7EXGPtn+yvc3fTq1Qtra2u2b99u7E7a0wdzb1vyNsTfkIiEEPSb/CwVZWXsX2GcaK3bqEDUahW/rmnY8xA5P/QgGj8/0t97H6nVMjJwJN29urPgxILfS0QD3wEzC9jyL5CS5/v9dQGbxMRE4uLibhScqUgtoujwNWy6NEJTZVDYwVXfk5uWysCpL6CxsibxdBYp53PpNKwplrZ1l+Csa8rj48lZsQLHsWOxatmC5MJkvjz9Jf18+9HTu2el4eHPIOUoDJ4Pdh4cT8phw6lrPNmjKT7O1nV3AlWoaSB4D+gvhLgE9DO9RgjRUQix2GQTChwTQpzGeOF/T0p51rRtJvCSECIeY85gSQ39UahnWFlZERERQVJSEmfPnkWoBc5jgzGU/V4icvH2IWz4GOL27SY57gw2jhZ0vM9UwCau4RawERoN7rNmUpGQQO7KlUbpp+tc1EL9e4nIvhFEzIaEvRD7E/aW5swcFMLJq3n8fPKPN9q/FZxxcHAgPDwcaZDk/nwJla05DgOb3LBLPX/2hiTk27INeq2Bgz/F49TIhpY9G//hfRsKxoIzb6OytsbtRWM30Hei30Et1MzsNLPSsKok1GosBoPkjU1n8bC34One9adWQ40CgZQyW0rZV0oZZJKQckzrj0kpp5iWD0kpW0kp25iel1TZP0FK2UlKGSilHCelbLjzEd/DtG/fHg8PD3bu3IlWq72lRNR59P04eHiya/Hn6HVa2vQ1FrD59cdLDbqAjW3v3tiEh5P56WfocnN/JxGtPL+y0jBsMjRqCztehdI8xrT3pq2PI+9uO0/hTQVsbi44UxydhjalCMehlZPKaSvK2fFlpSQEcHpvMgWZpXQfF4iqlufDqU8U7txJyeEo3J57DjNnZ3Zd3cWvqb/ybNtnKyeVq0YSWncylTMp+cwc1AwbC7M/P0gt0nA/SYVaQ6VSMWjQIPLy8jh8+DBAtRKRucaCvo8/Tc61FI5t+tlYwGZcEHnpxrKHDRUhBB6vzMJQXEzmxx8D3JCIFp5YWCkRqdQwbAEUZ8Ket24UsMkqKueTPZUSW9WCM6GhoegLK8jffgWLIEesWlfm34yS0LUbklBJQQXHtl6hSSsXfJs33O6ihrIyMt57H4ugIJwmTqBYW8x70e8R4hTCA6EPVBreJAkVl+v4z/bztPFxZGTb+tVBUgkECrWCv78/oaGh7N+/n8zMTKNENO6PEpF/2w4Ed+lO1E+ryEu/jl9LUwGbLVcozm+4N4wWgYE4TZxI3qrVlJim+p7bdS5m4qZeRI3bGaerProYUo/TxseR+zt6882vicRnGEds79mzh4qKCgYNGoQQgrzNCUi9AccRlZPKVUpC9+Hbso1xfqGVF9BrDYSPDaqrZqgXZC9egvbaNWOC2MyMT09+SmZpJnO6zsFMZfqXf5MkBPDutnNkFJYzd1hzVHXcXfRmlECgUGvcd999aDQa1q5di06nw9zDBvt+fn+QiHpPmoLKTM3ub75ASkn42ED0egOH113+k3e/93GbNg1zHx9SZ8xEn5+Pp40n08Om/1EiingNbD1g0zTQ65gxqBlW5mre2BRHamoqx48fp1OnTri7u1N2KZfS05nY9/a5UYNYW15WRRJ6FIBzB9NIOJlJ5xFNcfSoHwnOukCbmkr2119jN3gQNp07cS77HD+c/4FxweNo7dbaaFSNJBQZd53lUVd5smdT2vtWOxNPnaIEAoVaw87OjhEjRpCens6uXbuM63p6/0EisnN2JXz8w1w5dZxL0QdxdLemXT9fLkRfJ+1yfl2eQp2itrXB64P56DIzSZs3Dyll9RKRpQMMeheun4Gji3G1tWBa/2AOXMpk1bqN2NjY0Lt3b6TWOKmcmasVdr0rR7YeXP17SSj3ejEHfryIdzMn2vWr3YIp9Y309/8DQuAxfTp6g543D7+Jo4Ujz7d/vtLo8Ke/k4TSC8qY+dMZWnrZ8/KAkLpz/k9QAoFCrRISEkJYWBhRUVHEx8ffUiJqO2AI7v4B7F26iPKSEtqbCqHvX3WhQSeOrVq3xu355ynctp38dT/fWiJqMQoC+hrliYJrPNLVj3DHIgqz0+neuw+WlpYU/JKMLrsMx5EBCNM8QUZJaOMNSUivNRC5JA4zczV9JzVH1DNJozYpOnCAwshIY8GZxo1Ze3EtsdmxTA+bjoOFg9Eo8yLseft3vYRe+vEUZVoDCye0Q1NP52Oqn14p3NMMGDAANzc3fv75Z4qKiqqViFRqNf2nPEtRXi6HflyOxtKMnhOCyUouavASkcvkx7Hu1Inrb79NxZUr1UtEQhhHHOsrYPsr5OVk00wXz3WDHaviVVRklFD4SzLWbd2wDDRKFZWSkDs9HzL2EoramEBWchF9Hm6GrZPFrVy659FmZHBt5iw0gQE4P/YYWaVZLDyxkM6enRniP8RoVI0ktPjXBA7GZzN3WHMC3Gzr9iT+BCUQKNQ65ubmjBkzhrKyMjZs2GAaaPZHicgzMJi2A+7j5PbNpCfE07StG637eHN6TzIJpxruNCNCrabxf95HmJuT+vJ0ZEVF9RKRc1PoOZ2Ks5tZs/xbLDQaQrv2Y8PpNC5+H4cwV+EwpHJSuUpJ6Hk0llYkn8vh1M6rtOjpRdO2DXc0v9TrufbydAylpXgvWIDK0pL5R+dTpi/jtS6vVc6z9JskdN8HYOdBTEo+83dcYHBLT8aH1e2kcn+FEggU6gRPT08GDBjApUuXOHLkyC0lou4THsHawYFdiz/DYNDTbXQg7n527PnuHAVZpXV4BnWLuacnjf79JmWxsWR+8umtJaLw59lmMYKM/GJGjxjKcwNb838eTjhmllHa1RO1nbEESMr5uN9JQqVFFexaehYnT2vCx9bdPPn1gazPPqfkyBE858zBIjCQw9cOszVxK5NbTcbfwTTFRlVJqOUYSip0vLDqJK62Frw7ulW9n5RPCQQKdUanTp0ICgoiMjKS9PT0aiUiC2sbej8yheuXL3Fm53bU5ioGTGmJNEh2LI5r0PkC+wEDcBw3juzFiymOiq5WIjodd56T5b704AiBKT9BmY4JRYKLKgNPnblKUbkObXkZkV8uvCEJSSnZ8915yoq1DJjSAnONuo7PtO4oPnSIrC++wGHUKBxHjaRcX87b0W/jY+fDlFZTjEbVSEJvbjpLYnYxH97fFkfr+l9vSwkECnWGEIIRI0ZgaWnJTz/9hFarrVYiCunWE99WbTmwchnFebk4uFkR8UgoGVcKOPxzw84XeLwyC02TJlybORNdbi4jA0fSw6sHC08sJOZKDJs3b8bX15ferXzh4ELyN5yGEh32IwJIzCnm9Z9j+HXV7yWhuP2pXDmTRbdRgbh6N9xC9NqMDFKnz0AT0BTP2cbawt/EfENSQRKvd34dC7UpZ3KTJLQtJo1VR5N5ulcAXQPujoF3SiBQqFNsbW0ZOXIkGRkZ7Ny5s1qJyDgp3dPodVr2LF2ElJKA9u606uPN6d0NO1+gsram8Qfz0eXkcH3OHADmdp2LRmpYuXol5ubmjB07FvXAtyhXtab4dAm23RrRsbM30/oFEx11nBNVJKGca8X8ujYe3+bODbr0pNTruTZ9BobiYrw/+giVtTWJ+Yl8HfM1g5sMpptXN6Nh5oXfSULX8kqZtS6GNt4OvNg/uG5P4m+gBAKFOicoKIguXbpw5MgRLl68aJSI+v9eInJq5EXXMRO5ePgAJ7dvBiB8dCBuvkq+wKpFC9ynTaNw5y7y1qzBw8aD++X9aEo1OHZ0xN7eHr10IEc/GzUZ2JsbZaMnu3kzNG8/BWa2ePYfi06rJ3JJHBpLNRGTQht0V9Gsz7+gJDramBcICqKwopBpe6dhbW7N9LDpRiODHtZXSkJ6CS+uPoVOb+wqan4XzcV093iqcE/Tr18/PDw8WL9+PYWFhdj1+KNE1GnEWAI6duGX774m6cwp1OYqBj7RAmmQRC5p2PkC58cexaZbV9LfeZfju3aRczmHYu9iFqctJjkniezvz2LQaXBpG4sq6gM4s4bDPy7HsjSHY74DeH7tWfavjSc7tYiIR0KxcWi4XUWLDx8m6/PPcRg5EsfRo9Ab9Mw6MIurBVf5qPdHuFmbelAd/hRSj92QhL7cd5noxBzeGNGSJq53V7EeJRAo1AvMzMwYM2YMFRUVrF+/HilkpUS0Ph4pJUKl4r7/ewkXLx82L3iP3LRUHNys6fNwKOmJBRxe33DzBUKlotG771Hk4sy2ffvw8fbmpfEvYYYZJ5ftouJqIc7jQ9CMfRX8upOychYnthklodemDIO0Us7tS6VVby+atHKt69OpM3SZmca8QNOmeM6ZDcDCkwvZn7KfWZ1mEeYZZjS8SRI6lZzHRzsvMrR1I8a0r18Tyt0OSiBQqDe4u7szcOBALl++THR0dKVEFJtNaUwWABora0bOmA0qFev/82/KS4oJ7OBOq97enN6VTOLphpsvwNmJqEGDUGu19M7KopFdIz6wmk379GCutM3DqqUrmGnQjviaHdcCsTevoOfwwbR1s2e01opMlYE0P8u6Pos6Q+r1pE6fgaGoCO8FxrzApsub+Db2W+4Pvp/xzcYbDW+ShIoq9Lyw6iQe9pa8Par+dxWtDiUQKNQrOnbsSEhICDt37iQtLa1aicjB3ZPhL71CXnoaWz6ej8GgJ3yMMV+we9k5CrIbZr5g27ZtZBUXE2FpScXSZeSu+xXfo7acc7/KS9p/c7XgKgC/btpCXpk5A70TMV8/md1LYzHTQ2qoNXO2nOX89YI6PpO6IeuLLymJisJz9mwsgoI4k3mGeYfmEeYZxqzOsyoNb5KE5m6IIzmnhAUT2uJgdXdWbKtRIBBCOAshdgohLpme/zCtnhCijxDiVJVHmRBipGnbUiFEYpVtbWvij8LdjxCC4cOHY21tbexSqtf+QSIC8GneiojHppJ48hgHflj2+3xBAxxfcObMGU6cOEH37t0JmzkTi+ZhFB0qxczNklaPRWCmNmPOoTlcPRtjlIQGDMH3kQ85c8mTq2fzCB8TwDuPdsDeypxnV5yguFxX16dUqxRHRZH12Wc4jBiBw+hRpBenM23vNNys3fhvr/9irjJd4G+ShDaevsZPJ1L4v4ggwprUfRH6/5Wa3hHMAnZLKYOA3abXv0NKuVdK2VZK2RaIAEqAyCom03/bLqU8VUN/FO4BbGxsGD16NFlZWezYsaNaiQigTf/BtBkwhGOb1nF2/57f5QuiGlC+ICsri82bN+Pj40OfPn1AmmHZcSpSr0UbvxoPR09mdJrBqWsnWPfJ28aBYw8+SpZdbw4XP04Ti6O0NF+Hm50FC8e3JSGrmNnrY28E3XsdXWYmqS9PR+Pvj+ec2ZTry5m2dxrF2mI+ifgEJ0vT/9uqktDQj0jOLeW1n2No7+vI8xF39+jrmgaCEcAy0/IyYORf2I8FtkkpS2p4XIV7nKZNm9KtWzeOHz/OuXPnjBKRjx256+KpSCu+Yddn0hP4tGhN5KJPSLt0gcAO7rTs5cWpXckknsn6kyPcG2i1WtasWYNarWbs2LGoUJG98jyGYgNWIYUU7dlC7sqVDPMfyshLzdDnFNFk/CBUKg2RS+KwsLUkolMiYtdcuLiDboGuvNA3iHUnU1lzPKWuT++OI/V6UmcY8wJeCz5CWFsz99BcYrNjebfHuwQ5mYrwSAk7XrshCemsXHlx9SmQsHBCO8zuoq6i1VFT7z2klGmm5euAx1/YTwBW3rTubSHEGSHER0KIW/ZZE0I8KYQ4JoQ4lpnZgBOCDYiIiAgaNWrExo0bKSwuxGViM1QaFVlLYtBmGv9LqM3MGPbiLGydnNnwwVsU5mQRPjYQVx9bdi89S2FOWR2fxZ1l+/btpKenM2rUKBwcHMjfmkD5pTycRgbi+tT92PTsQfr7/2HHB+9il1BCQhvB7OT/smnFEXLTiun7aChW4z4Ez1awdjJkXuC5iCC6BbgwZ0MsF64X1vUp3lGyvvySksNReM5+HcvgYL6J/YatiVt5rt1zRPhGVBrufRuiv4Auz0DLMXy29zLHknJ5a1RLfJzv/kI9fxkIhBC7hBCx1TxGVLWTxvvIW95LCiEaAa2AHVVWvwI0A8IAZ2DmrfaXUi6SUnaUUnZ0c2u4MyE2JH7rUqrT6Vi3bh0qRw2uT7QCIOvrGHSmi7yVnT0jZ8yhoqyMDfPfRkodA59oicEg2fF1LHr9vZkviImJ4fjx44SHhxMcHEzx0esUHbyGbXhjbMI8EULQ6O23OeflxrkT0XQeMZY5LywmpKAD16JL8e1ua6w9rLGGiSvB3Ap+GI+6LJcFE9pia2HOsz+coKTi3swXFEdFk/XpZziMGI7D6NH8kvwLC08sZFCTQTzR6olKwwMfwv750P4RGPgOx6/msnD3RUa182JEPas9/L/yl4FAStlPStmymscGIN10gf/tQp/xJ291P/CzlFJb5b3TpJFy4FugU81OR+Few9XVlcGDB3PlyhUOHTqEuZs1rpNbYdAayFwcg77AWMfY1cePIc+/THpiPJFffoyDmxV9HmpGemIB0esT6vgs/nmysrLYtGkTPj4+REREUH4ln9z18VgEOeJwX+XU0tG7t5FoZ0GTzDwCElOx1TnS/dL95Nle50P1LK7kXzEaOnjDhBVQkAprHsXd2oyFE9pyObOI2evj6uYk7yC6rCxSp79sygvM4XLeZWbun0kz52a8Gf5mZRfQ6EWw+w1oNQ6GLqCgXMcLq07h5WTFmyNa1O1J/IPUVBraCEwyLU8CNvyJ7URukoWqBBGBMb8QW0N/FO5B2rVrR/PmzdmzZw+pqaloGtng9nhLDEVaYzAwdSsN6NCZ7uMf5vzBfRzZsJagjh607OnFyZ1XuXIP5QtuzgvIQi3Zy89h5mSJy8RmCLXxIhb9849E//wjrfoOpFvEYHK/X8GOjw5hqJAMe6o9BpWeKZFTSC1KNb6xTycY+hEk7oMdrxIe6MrzEUH8dCKFNceS6/CM/1mkXs+1GTMwFBTi9dFHFKi1PLfnOazMrPg44mOszIy1mzm5HLZNh5AhMPILUKmZvT6WtPwyFk5oh53l3dlVtDpqGgjeA/oLIS4B/UyvEUJ0FEIs/s1ICNEE8AH23bT/CiFEDBADuAJv1dAfhXsQIQTDhg3D1taWNWvWkJubi8bHDtdHm6PLKSdrSSyGUqN80WnkOJqF9+LXVd9x+Xg04eOM+YJdy+6dfMGOHTtu5AXsrGzJXnYWqTXg8khzVNbGi9OJbZv4ddV3NAvvRb8pz+D+8r+40vEx0q5LOve0o3VQMxb1X0SJroQpO6aQUWK6mW/3EHR5Fo58BceX8nzfILo2dWH2hlgupt8b+YKsr76i+NBhPGe/jjrIn3/t+xfpJeks6LMATxtPo1HsOtj4HAREwLhvQW3Ol/sus+HUNab1DaqXBehrQo0CgZQyW0rZV0oZZJKQckzrj0kpp1SxuyKl9JJSGm7aP0JK2cokNT0kpSyqiT8K9y5WVlaMHz+esrIylixZwvXr17Fo6ojrw6FoM0rI+jYWQ7keIQQDpj6Ph38AWz7+gLzrKQyc0hKDXhK5+O7PF8TGxnLs2DHCw8MJCgoid81FtNeLcX6gGebuxqRlzN5I9i79isCwLgx65kWQgn0/JpJo2wGf3KNYvj+VkqNHCXEO4ct+X5JTlsMTkU+QU5ZjPEj/N431jre8jDr5MAsntsXWwoxnV9z9+YLi6CNkffoZ9sOG4TBmDPOPzufI9SPM7TqXtu6mYUwXtsO6J8CnC4xfgVRreHfrOd7bdp4hrRvxTJ+7u6toddzdfZ4UGhReXl48/vjjCCH49ttvSUpKwjLEGZeJzahIKSR7WRxSq8dcY8GI6a+jsbRk/fx/Y2Gto89DzbieYKxfcLf2j8/MzGTjxo038gKFe5IpjcnCYbA/ViHGwUznD+0n8qtP8GvdjiEvzEQaBNu+iuXcoTTChjRh0IcPYe7uztXJUyjcvZvWbq35tO+npBalMnXnVAoqCkBtBmO/ASc/WP0w7voMFoxvR3xmEa/fxeMLtBkZpL78LzR+fjSaN5e1l9ay8vxKJjWfxIhAU9+XhF/gx0eMvageWI1Obcn0tWf4an8CD3fx4+MJ7VDfg7OyKoFA4a7C3d2dyZMnY2try/fff8+FCxewaumK07gQyhPzyV5xHqkzYOfsyoiXX6coJ5tNH71H07YutOzlxeldyexbefGuuzNISkrim2++qZyc71wuBTuTsG7vjm0PY8+Vy8ej2fbpf/EKac6Il19Dr4WNH5/iSkwWPScE02lYUzReXvitWI5Fs2akPPc8eT/9RJhnGAv6LOBS3iWe2fUMJdoSsHKEiatAr4WVE+nua2kcX3AiledWnqRMq6/jFvl7lF24yJUJEzAUFeO14CNOFJ7jnah3CPcK58UOLxqNrkbByongEggPraNUZcPU5cdZezyFaf2CeHNEi3syCIASCBTuQhwdHXn88cdxd3dn1apVnDx5Ept27jiODKTsfA45qy8g9ZJGQSEMePI5kuPOsHfZ1/QcH0z7gb7E7U9l8yenKSvW/vXB6gEnT55k2bJlWFtbM3nyZKxLzcn58QIaXzucRgUhhCDpzCk2ffQe7k2aMmrmXCrKBD//9yTpiQUMmNyCVr0ri8yYOTnh9+032HTtStprr5P19deENw5nfs/5xGbF8tye5yjTlYFrEIz7BjLOwvqpvBARwMxBzdgSk8b4RVFkFNwdOZfCvXtJmjgRdHr8vvuObC9bXvrlJbztvPlPz/+gVqnh2ilYMQ7sGsEj68nHjke+iWb3+Qz+PaIF0/oF35WTyd0u6nnz5tW1D3+bRYsWzXvyySfr2g2FOkSj0dCyZUtSU1OJiorCzMyMwG4tEBZqin69hj6vDMtQF9ybNEVbXsbJbRuxcXKi45DO2DlbEvNLCgknM/Ft7oylbf3s/WEwGNi1axc7d+7E39+fhx9+GBuVJVlfxyDUKtymtEZlbUbq+bOse38eTp6NGTv7bcqKVWz46CRFeeUMebo1/m3+OO5GaDTYDx5ERdJVcr/7DkNxMa3uewhvO2++P/s953PPM8BvAGrXINDYQvQXCARhfYYT2sieFVFXWX8qla4BLrjb1c8ZS6WU5CxdRtqrr2ERFITvsqXofTx5YucTFJQXsGTgEjxsPCDjHCwbDhb28NgW0nHiwcXRnEsrYOGEdozr6FPXp/KP8cYbb6TNmzdv0c3rlTsChbsWCwsLHnjgAVq0aMGuXbuIjIzEJrwx9v18KTmRQd5GYz6gxwOT8G/XkT3ffkVy3BlCuzVixLR2lBVpWfv+MVIu5Nb1qfyB8vJyVq9ezcGDBwkLC+PBBx/E0tyC7OXn0BdpcXmkOWp7DekJ8ax7bx52zq6Mfe3fFOXAuvnHqSjVM/LFdvg0v/VEaEKjofEH83F66CFyli4l7ZVXGOo7iNe7vG6cf//ALHQGHXR9Fto+CPveg7j1DGzhydqnuwIw9ovD7Ii7XlvNctvIigquz5lDxvvvY9e/P34rlqN2d+PVX1/lct5lPuj1AU0cmkD2ZfhuJKg18Mh6ErVOjPniEMk5JXz7aCeGtm5c16dSKyh3BAp3NSqVitDQUEpLS4mKiiIvL4+WAzuCDooPXkPqDFgGORHQoROXog9xZvd27Jxd8W/XjID27lyJyebM7mSs7TW4+9nX9ekAkJ+fz3fffcfVq1cZPHgwffr0wVBQQc7yc1QkFuA8PhjLYGeykpNY89brWFhbc/+cd8nPVLHpk1OYW6oZ+VK72yo8L4TApkcPhLkZOcu+o/RsHF3Gv4CtlQPfn/uetOI0+vj2QQT1h4R9cHQxWNjiHtKN4W29OJSQzZJfE7EwU9PRz6leyCe63FxSnn6Gwp27cJn6FJ5z55JnKOKVA6+wJ3kP08OmMzRgKOQlw7KhoCuDRzcTW+HBA19HodVLlk/pTJj/3Tub6K241R2BuBt7AHTs2FEeO3asrt1QqEdIKdm/fz979+4lODiYsWPHUrz1KsVRadj398O+mPcq1QAAFMdJREFUry8FWRls/eQDUs+fJbhzOP2eeBaVmTWRi2O5GpdDm74+dBsTiKoOE4IpKSmsXLkSnU7HuHHjCAgIoORUJnkb4sEAjsMDsOnoQe71a6yeOxOEYMK898lOUxO5JA5Hd2uGPdcWW6e/X2oyd/WPXH/jDazatMHni89ZlLSSz099zviQ8bzW+TVESQ5seAYubgf/XjDyc8qsG/HymtNsPpPGmPbevDO6JRZm6jvQMrdHeUICyVOfRpeWRqO338Jh+HB+Sf6FuYfmUlhRyAvtX+CR5o8gijLg28FQnAWTNnKoxJsnvz+Og5U530/uRFM32zo7hzuJEOK4lLLjH9YrgUDhXuLo0aNs2bIFHx8fJk6YSNmWZEpOZOAwpCl2PbwwGPQc2/QzB1cvx8rOjoFTX8CvdXsOro3nzN4U/Fq5MODxFmiszGrd95iYGNavX4+9vT0TJ07ExcaJvPXxlMZkoWlij/O4YMxcrCjIymDV3JnoyssZP+89rieq2ffDBTz87RnybBssbf73nEfBjkiuvfwymiZ+eC9axCepP7A0bimPtXiMFzu8iAA4sQy2vwoqMxjyX2TLMSzYHc/C3ZcIa+LElw91wMW29mseFx08SOq0FxEaDd6ffoJsGcz8o/P56dJPhDiF8E6Pdwh2CoaSHFg6BHKT4OGf2Zrvy7RVp2jias13j3fG06F+5jz+CZRAoNBgiIuLY926dbi4uPDgxAfRbUmlNDYbx1GB2HZuBEDGlQS2ffpfspKTaNN/ML0emsyFI9nsX3URJ09rhjzTGntXq1rxV0rJL7/8wr59+/D19WX8+PGoU8rJWXsJQ4kW+/5+2PX0RqgERbk5rJ43k9KCAsbOfouU8+ZEb0zAr6ULA59sibmm5v/Gi6OiSXn2WVQO9vgsXsz89BX8ePFHnm37LFPbTDUa5STAuqcg5Qi0GA1D/svGS2VMX3Mad3sLlkwKI9jjr6Wpf4qcH34g/e13sAgIwOeLz4kzz+SVA6+QWpTKYy0f49m2z6JRa6CsAL4bDuln4cEfWZ7hz+wNsbT3dWLJpI44Wmtqzee6QAkECg2KhIQEVq1ahZWVFQ8/8BBsTafsYi5OY4Oxbu+OEAJdRQUHf1zOsc0/4+jhyeBn/4Ve786Or2NRqQWDp7amUYDDHfVTq9Wyfv164uLiaNu2LfcNGExxZDLFUWmYeVgbC843NsoUxXm5rH3rdfIz0hnz6ptcPm1GzN4Ugjt7EPFIKOp/cE780rg4kp98CgwGvL76grfzV7MpYRMvdXiJR1s8aswF6HVwcAH88i7YuMGIzzhl0YEnvjtGWYWejx9oR58Q93/Mp+qQOh3p775H7ooV2Pbujft/3uWr+O9YEruERjaNeLv723Tw6GA0LsuHH8ZDylHk+OV8khLIhzsvEtHMnc8eaI/VPxBE6ztKIFBocFy7do3ly5cD8OD4iWh25FKekI+miT32/fywCHBACEHy2Ri2ffYhRdnZdB59P83Ch7Hty7MU5pYR8XAoIZ0974h/hYWFrFy5kmvXrtG/f386eLckb80ldNml2Hb3wmFAE4S5ipKCfI5tWsepHVswGPSMnD6Hi8c0XDqaTpt+PoSPDkTcgbxGRVISVydPQZeTQ6OPP+IN3Xp2Ju0k1DmUqW2m0senjzEgXDsFPz8Fmeeh05Okhc1i8g/G2sevD2nOY+FN7kgSWV9QQOqLL1F88CDOjz1GweQRvHZ4NudyzjE6aDQzwmZgY25jDADRX8Hhz6C8AMPoJbyREMyyw0mMbu/F+2NaY36XF5a5XZRAoNAgyc7O5vvvv6ekpITx4+7HI8uawr3J6AsqTAHBF4sARypKS9i7dBFx+3bj0TSQiMee58jmPFIv5tFhkB+dhzf9Ry+2aWlprFy5ktLSUkaPGk3jVGsK915FbW+B07hgLAMcKcnP4+imdZyK3IK+QktItx50GDqOI5vzSD6bQ9dRAbQb4HtHe+poMzJIfuJJyhMS8HzvHfaF6Fh0ZhHJhcmEOofyVJuniPCJQOjKYPebEPU5uARROvRzXjigIvJsOg909uWN4S3+0YttxdWrJE99moqrV/GYO4ctLcpYcHwBthpb5nadaywqU5YPUV9C1GfG5eDBaHvM4KVfBZtOX+OJHv68Mji0TjsH1DZKIFBosBQUFLB8+XKys7MZNmwYrUJbUnoig8JfktHnV6DxMwWEQEfijxwm8utP0ZWVET5hEgU5gZw7lE5AOzf6Ptocc4uaywfnzp1j3bp1WFlZMW7QKDR78tCmFGHd3h3H4QGUlRf9LgA0696LLqPHY6ZxYcfiODKTCuj9UDOah9dOH3d9QQHJzzxD6fETuM+Ygd2EcWxL3cVXZ74iuTCZZs7NmNp6Kn18+6BK3G+s61t4HUPPGcwvHcoX+5MID3Th8wc64GBd88F7JUePkvLc8yAllu/P5c3yn4i+Hk1vn97M6zoPF9QQ/aUxKJXlG6eR7jWDdNtmvLzmNAcuZfHK4GY81SvgH2iduwslECg0aEpLS/nhhx9ITk7G2tqa5s2b0zK0BS4ZGop+SUWfX24MCH190blJIr/6mMSTx/Bt1RafluM4viMbNx877nu69f/UNROMSeGDBw+ya9cuvBp7MTywN9o9Gag0KhxHBSF91Bzd+BOnd25Dr9US2r0XrfuPJivFjEvH0klPLEBtpmLAlBY0bVu7VfoMZWWkvvQvivbsQWVri12/ftgMHsg+z1wWnV1CUkESIU4hTG0zlQi39qi2z4Izq8GrA9uD3+S5yAJ8nKxZ8mgY/q42/7MfeT+tI23ePDQ+Plx6ZSxvJn+NXuqZ2Wkmo7z6IKK/hKgvoDwfmg0lL+xFNme6sen0NY5cyUElBO+ObsX999Bo4b+DEggUGjw6nY5Lly4RGxvLhQsX0Ol02NnZ0Ty0OYGqRticKMdQUIHG1w67vr5cunqEvd9/jdrMjDb9H+ZclLEXTONgR7xDnPBu5oxLY5s/lYxKSkq4cuUKCQkJJCYmkp2dTYuQULoXhaC/XIhliBMW/d05vmcDp3duR6/VEtylJ25NI0iLh9RLeSDBxduWoI7uBIV5YO9SO72ZbkYaDBQfPETB1q0U7tyJoagItaMjNgP6c66dMwsNO7lSdJVgp2CmtplK34J8VFv+BbpyrnR8ldFHmlFcoaeTvzPhga50C3ChRWOHP53ITV9URMnRo5RERVMcFUX5hQtounRi8f0ObM7cSzv3drzdcSY+sRtMAaAAbfAQ9no8xvIkBw7GZ6E3SALcbBjWpjEj2nrVKBDd7SiBQEGhCuXl5Vy8eJHY2Fji4+PR6/U4OjgS7NIE3xRbHAo0WPjaI9rbsnPTF6TFX8C/XTfsPQeTnlhOXnoJAFZ25ngFO+HdzPiwtFdz9epVEhMTSUhI4Pp14/QLGo0GPz8/mtp64XNSg9BLLCM8OJO0lzO7tqHX62gc0hlzq86kJ6mRBomjhzVBYR4EdXTHybN+XbwMFRUUHzhAwZatFO7diywtRe3mRna3EJY3vsJ+hzSCnIOZGjSefkdXokrYS6lfHz63m8b2q4JLGcbSIw5W5nRp6ky3AFfCA13wtzen7NRpiqMOU3I4itLYWNDrERoNVu3bk9nOl1nu+8nW5fFsi8d5LDcP9ZFFUF7Atcb9+VqMY8UVeyr0BrydrBjWpjHDWjcmtJFdvRj1XNfckUAghBgHzANCgU5SymqvzkKIQcBCQA0sllL+VsnMH1gFuADHgYellBV/dVwlECj8k5SWlnL+/HliY2NJSEhASomzjSP+Za74l7ji5u1BmlUSe3d9ixACW2cXrOycECpbKrQW5JeqKVWDwdKAVlOOFBKVUNHIxg0faw8aC2dcy2ygUIehRIe6sRWXLWM4tn8TBp0Oe4+2lFe0BemInYslQR09CApzx8XL9q64eBlKSijat4+CrVsp2rcfWVGB1sOJX5tJtgYUYh4cyFTbYPpHfYfKoAW7xmit3ckUziSU2nIlUYcuuRjXzBx8s7Mw1+sxqAR5/u6khbpw0V9DTKMKrmmzyCnLIcC+Ce9aBdPsxCpERREnbXsyL38op7XeeNhbMLR1Y4a1aUwbb4e7ov1qkzsVCEIBA/AV8HJ1gUAIoQYuAv2BFOAoMFFKeVYI8SOwTkq5SgjxJXBaSvnFXx1XCQQKd4ri4mLOnTtHbGwsV65cAcBF2NG0wp2mTl6UmuWQU5RPVnkJ2bKUHHUpeiERElylPY0NTjQyOOFpcEQlBeWGMipUOqSlxNzRgkKZw6/HV6PX6zCzaI5K0wk7Z3cCO3oQ1NED9yZ39z9XfWEhhbt3U7B1K8WHDoNOR4abOftCdKR0aEwvTw/E1QzUl7KxTyzDM1liafrrd8Ud4vwEMX6Cc76CMg3YG1Q4SQ2u5nZ427gSqNIwNm4vNrpiImUXPqwYSYZ1IPe18mRY68aENXFuUL2A/i53VBoSQvzCrQNBV2CelHKg6fUrpk3vAZmAp5RSd7Pdn6EEAoXaoKCggLNnzxIbE0NKqrHAu7lUoxXGoixOKlu8rT3wcfDEw8EJoTZQZiimqDyPzMzrpF9PpTA7m/LiPKShGON/JoFa0xxrl3BCOoUQFOZOowDHOzIOoK7R5eZSGLmT/K1bKDlyFCElJRZgXW7cnu1iTmqIC/nNG6Nv7oODvTUeBnCtKMciNxd1ZjoiPw3LsgzcyMVCGMtkbtZ35hv1OAJadGJYm8Z0C3DBrIGMA6gptwoEtTGhiheQXOV1CtAZoxyUJ6XUVVnvdas3EUI8CTwJ4Ovre2c8VVCogr29PV26dKFLly7k5uYSGxNLbkY2fgH+NA1sip3d7U2hoNcZuH45j8SYFMqKtAR3aoJ3iBOqe/ziZebkhNP4+3Eafz/ajAzyt20j79xp7Nt3wim8B6Fet/y5/w6d3kBMSh4nLlzmem4xnVqFsjLYtU4nt7vX+MtAIITYBVQ3tPI1KeWGf96l6pFSLgIWgfGOoLaOq6AA4OTkRI+ePf6nfdVmKrxCnPEKufemNb5dzN3dcZ00Cdf/YV8ztYp2fs6082u47Xen+ctAIKXsV8NjpAJVO+16m9ZlA45CCDPTXcFv6xUUFBQUapHauDc9CgQJIfyFEBpgArBRGpMTe4GxJrtJQK3dYSgoKCgoGKlRIBBCjBJCpABdgS1CiB2m9Y2FEFsBTP/2/w/YAZwDfpRSxpneYibwkhAiHmPOYElN/FFQUFBQ+PsoA8oUFBQUGgi36jV0b3dbUFBQUFD4S5RAoKCgoNDAUQKBgoKCQgNHCQQKCgoKDZy7MlkshMgEkv7H3V2BrH/QnX8axb+aofhXMxT/akZ9989PSvmHYhZ3ZSCoCUKIY9VlzesLin81Q/GvZij+1Yz67t+tUKQhBQUFhQaOEggUFBQUGjgNMRAsqmsH/gLFv5qh+FczFP9qRn33r1oaXI5AQUFBQeH3NMQ7AgUFBQWFKiiBQEFBQaGBc88GAiHEICHEBSFEvBBiVjXbLYQQq03bo4UQTWrRNx8hxF4hxFkhRJwQ4oVqbHoLIfKFEKdMjzm15Z/p+FeEEDGmY1dXglQIIT42td8ZIUT7WvQtpEq7nBJCFAghpt1kU6vtJ4T4RgiRIYSIrbLOWQixUwhxyfTsdIt9J5lsLgkhJtWif/OFEOdNn9/PQgjHW+z7p9+FO+jfPCFEapXP8L5b7Punv/U76N/qKr5dEUKcusW+d7z9aoyU8p57AGrgMtAU0ACngeY32TwDfGlangCsrkX/GgHtTct2wMVq/OsNbK7DNrwCuP7J9vuAbYAAugDRdfhZX8c4UKbO2g/oCbQHYqus+w8wy7Q8C3i/mv2cgQTTs5Np2amW/BsAmJmW36/Ov9v5LtxB/+ZhrIX+V5//n/7W75R/N23/LzCnrtqvpo979Y6gExAvpUyQUlYAq4ARN9mMAJaZltcCfYUQtVJBXEqZJqU8YVouxFin4fYKuNYfRgDfSSNRGKvNNaoDP/oCl6WU/+tI838EKeV+IOem1VW/Y8uAkdXsOhDYKaXMkVLmAjuBQbXhn5QyUlbWDI/CWCWwTrhF+90Ot/NbrzF/5p/punE/sPKfPm5tca8GAi8gucrrFP54ob1hY/ox5GMsjlOrmCSpdkB0NZu7CiFOCyG2CSFa1KpjIIFIIcRxIcST1Wy/nTauDSZw6x9gXbYfgIeUMs20fB3wqMamvrTj4xjv8Krjr74Ld5L/M0lX39xCWqsP7dcDSJdSXrrF9rpsv9viXg0EdwVCCFvgJ2CalLLgps0nMModbYBPgPW17F53KWV7YDDwrBCiZy0f/y8RxtKnw4E11Wyu6/b7HdKoEdTLvtpCiNcAHbDiFiZ19V34AggA2gJpGOWX+shE/vxuoN7/lu7VQJAK+FR57W1aV62NEMIMcACya8U74zHNMQaBFVLKdTdvl1IWSCmLTMtbAXMhhGtt+SelTDU9ZwA/Y7wFr8rttPGdZjBwQkqZfvOGum4/E+m/yWWm54xqbOq0HYUQjwJDgQdNweoP3MZ34Y4gpUyXUuqllAbg61sct67bzwwYDay+lU1dtd/f4V4NBEeBICGEv+lf4wRg4002G4HfemiMBfbc6ofwT2PSFJcA56SUH97CxvO3nIUQohPGz6pWApUQwkYIYffbMsakYuxNZhuBR0y9h7oA+VVkkNrilv/E6rL9qlD1OzYJ2FCNzQ5ggBDCySR9DDCtu+MIIQYBM4DhUsqSW9jcznfhTvlXNec06hbHvZ3f+p2kH3BeSplS3ca6bL+/RV1nq+/UA2OvlosYexS8Zlr3JsYvPYAlRkkhHjgCNK1F37pjlAnOAKdMj/uAqcBUk83/AXEYe0FEAd1q0b+mpuOeNvnwW/tV9U8An5naNwboWMufrw3GC7tDlXV11n4YA1IaoMWoU0/GmHPaDVwCdgHOJtuOwOIq+z5u+h7GA4/Von/xGPX1376Dv/Wiawxs/bPvQi35973pu3UG48W90c3+mV7/4bdeG/6Z1i/97TtXxbbW26+mD2WKCQUFBYUGzr0qDSkoKCgo3CZKIFBQUFBo4CiBQEFBQaGBowQCBQUFhQaOEggUFBQUGjhKIFBQUFBo4CiBQEFBQaGB8//AuuxsOUmz+wAAAABJRU5ErkJggg==\n",
+      "text/plain": [
+       "<Figure size 432x288 with 1 Axes>"
+      ]
+     },
+     "metadata": {
+      "needs_background": "light"
+     },
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "evt, (rhs,) = prog(u=u)\n",
+    "plt.plot(u)\n",
+    "plt.plot(rhs)"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": null,
@@ -196,7 +690,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.8.2"
+   "version": "3.7.7"
   }
  },
  "nbformat": 4,
-- 
GitLab


From f579c554d69e6244cae14a4209c53d43a5d6aba7 Mon Sep 17 00:00:00 2001
From: Matt Wala <wala1@illinois.edu>
Date: Thu, 7 May 2020 21:43:51 -0500
Subject: [PATCH 46/46] Simplify the code generation of roll(), because loopy
 appears to have Python semantics for %

---
 arrayzy/array.py | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/arrayzy/array.py b/arrayzy/array.py
index 33f08f5..c72c500 100644
--- a/arrayzy/array.py
+++ b/arrayzy/array.py
@@ -614,10 +614,6 @@ class Context:
         """
         assert a.ndim == 1
         idx = (prim.Variable("_0") - shift) % a.shape[0]
-        # It's not immediately clear what loopy-generated code guarantees about
-        # the sign of the result of the % operator, so if the result is
-        # negative we shift it to be positive.
-        idx = idx + prim.If(prim.Comparison(idx, "<", 0), a.shape[0], 0)
         expr = a.to_loopy_expression((idx,))
         return ArrayExpression(self, a.shape, a.dtype, expr, a.reductions)
 
-- 
GitLab