From 7b1fa24de1b42a37a2ca80e9bd7cf5fe7fa70a65 Mon Sep 17 00:00:00 2001 From: Matt Wala Date: Thu, 9 Apr 2020 12:10:45 -0500 Subject: [PATCH 01/46] Add stub of test_array --- test/test_array.py | 48 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 48 insertions(+) create mode 100644 test/test_array.py diff --git a/test/test_array.py b/test/test_array.py new file mode 100644 index 0000000..98de63e --- /dev/null +++ b/test/test_array.py @@ -0,0 +1,48 @@ +#!/usr/bin/env python + +__copyright__ = "Copyright (C) 2020 Matt Wala" + +__license__ = """ +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +""" + +import numpy as np +import numpy.linalg as la +import sys + +import pytest + +import pyopencl as cl +import pyopencl.array as cl_array +import pyopencl.cltypes as cltypes +import pyopencl.tools as cl_tools +from pyopencl.tools import ( # noqa + pytest_generate_tests_for_pyopencl as pytest_generate_tests) + + +if __name__ == "__main__": + # make sure that import failures get reported, instead of skipping the + # tests. + if len(sys.argv) > 1: + exec(sys.argv[1]) + else: + from pytest import main + main([__file__]) + +# vim: filetype=pyopencl:fdm=marker -- GitLab From 75bc91bc972d894a016d9bdfa5c236ebd947f308 Mon Sep 17 00:00:00 2001 From: Matt Wala Date: Thu, 9 Apr 2020 17:36:32 -0500 Subject: [PATCH 02/46] WIP: Basic symbolic array support --- arrayzy/__init__.py | 2 +- arrayzy/array.py | 164 ++++++++++++++++++++++++++++++++++++-------- test/test_array.py | 32 +++++++++ 3 files changed, 169 insertions(+), 29 deletions(-) diff --git a/arrayzy/__init__.py b/arrayzy/__init__.py index a2831ea..b7028a0 100644 --- a/arrayzy/__init__.py +++ b/arrayzy/__init__.py @@ -1 +1 @@ -from arrayzy.array import Array, Context, make_context +from arrayzy.array import Array, Context, make_context, make_sym diff --git a/arrayzy/array.py b/arrayzy/array.py index a365f31..c52c82e 100644 --- a/arrayzy/array.py +++ b/arrayzy/array.py @@ -20,8 +20,14 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. """ +import islpy as isl import loopy as lp +import numpy as np + +import pymbolic import pymbolic.primitives as prim +from pytools import memoize_method +from pyopencl.compyte.array import get_common_dtype class Context: @@ -44,7 +50,8 @@ class Context: *program* may not define any names starting with underscores. """ - def __init__(self, program, bindings, target): + def __init__(self, queue, program, bindings, target): + self.queue = queue self._program = program self.bindings = bindings self.target = target @@ -53,28 +60,33 @@ class Context: # update_program. @property def program(self): - self.program + return self._program + + @property + def parameters(self): + pass def update_program(self, program): - self.program = program + self._program = program def get_parameter(self, name): - if name in self.program.all_variable_names(): - if name not in self.program.all_params(): - # FIXME: ... data dependent control flow? - raise ValueError( - f"'{name}' is not a domain parameter " - "in this context") - - else: - return prim.Variable(name) - else: + if name not in self.program.all_variable_names(): self.update_program( self.program.copy( - arguments=self.program.args + [ + args=self.program.args + [ lp.ValueArg(name, dtype=self.program.index_dtype) ])) + """ + if name not in self.program.all_params(): + # FIXME: ... data dependent control flow? + raise ValueError( + f"'{name}' is not a domain parameter " + "in this context") + """ + + return prim.Variable(name) + class Target: pass @@ -104,9 +116,9 @@ def make_context(arg): raise ValueError(f"invalid argument type: {type(arg).__name__}") import loopy as lp - program = lp.make_kernel("{[]:}", [], target=target.get_loopy_target()) + program = lp.make_kernel("{:}", [], target=target.get_loopy_target()) - return Context(program, {}, target) + return Context(arg, program, {}, target) class Array: @@ -121,8 +133,11 @@ class Array: An instance of :class:`loopy.types.LoopyType` or *None* to indicate that the type of the array is not yet known. - .. attribute:: expression + .. attribute:: is_materialized + Whether this array is backed by actual storage. + + .. attribute:: expression """ def __init__(self, context, shape, dtype, expression): @@ -135,34 +150,127 @@ class Array: pass def eval(self, **kwargs): + _, (out,) = self._knl()(self.context.queue, **kwargs) + return out + + @property + def _dim_names(self): + return tuple(f"_{i}" for i in range(len(self.shape))) + + def __getitem__(self, indices): + # TODO pass + @property + def T(self): + def swap_last_two_dims(arr): + if len(arr) < 2: + return arr + arr_copy = list(arr) + arr_copy[-2], arr_copy[-1] = arr_copy[-1], arr_copy[-2] + return type(arr)(arr_copy) + + index_map = dict( + zip(self._dim_names, + map(prim.Variable, swap_last_two_dims(self._dim_names)))) + + expression = pymbolic.substitute(self.expression, index_map) + shape = swap_last_two_dims(self.shape) + + return Array(self.context, shape, self.dtype, expression) + + def __mul__(self, other): + if not np.isscalar(other): + raise TypeError("only scalar multiplication supported") + return Array( + self.context, + self.shape, + get_common_dtype(self, other, allow_double=True), + other * self.expression) + + __rmul__ = __mul__ + + @property + def ndim(self): + return len(self.shape) + + @memoize_method + def _knl(self): + knl = self.context.program + + out = lp.GlobalArg("_out", shape=self.shape, dtype=self.dtype, + order="C") + + # FIXME: Won't work for scalars. + out_inames = [f"_out_{i}" for i in range(self.ndim)] + + out_expr = pymbolic.substitute( + self.expression, + dict(zip(self._dim_names, map(prim.Variable, out_inames)))) + + # Build output domain. + params = [] + from loopy.symbolic import get_dependencies + for sdep in map(get_dependencies, self.shape): + for dep in sdep: + params.append(self.context.get_parameter(dep).name) + + dom = isl.BasicSet.universe( + isl.Space.create_from_names( + isl.DEFAULT_CONTEXT, + set=out_inames, + params=params)) + + from loopy.symbolic import aff_from_expr + affs = isl.affs_from_space(dom.space) + for iname, expr in zip(out_inames, self.shape): + dom &= affs[0].le_set(affs[iname]) + dom &= affs[iname].lt_set(aff_from_expr(dom.space, expr)) + dom, = dom.get_basic_sets() + + # Build output instruction. + from loopy.kernel.instruction import make_assignment + out_insn = make_assignment( + (prim.Variable("_out")[tuple(map(prim.Variable, out_inames))],), + out_expr, + id="_out", + within_inames=frozenset(out_inames)) + + return knl.copy( + domains=knl.domains + [dom], + instructions=knl.instructions + [out_insn], + args=knl.args + [out]) + def store(self, prefix="tmp"): + """Stores the array in a temporary.""" pass -def make_sym(context, name, shape, dtype=None): +def make_sym(context, name, shape, dtype=None, order="C"): if name in context.program.all_variable_names(): raise ValueError(f"name '{name}' already in use in context") - arg = lp.ArrayArg(name, shape=shape, dtype=dtype) - from loopy.symbolic import get_dependencies - for sdep in get_dependencies(si for si in arg.shape): - context.get_parameter(sdep) + arg = lp.GlobalArg(name, shape=shape, dtype=dtype, order=order) - context.update_program(context.program.copy( - arguments=context.program.arguments + [arg])) + shape = arg.shape + dtype = arg.dtype - # TODO make sure "name" is not taken + from loopy.symbolic import get_dependencies + for sdep in map(get_dependencies, arg.shape): + for dep in sdep: + context.get_parameter(dep) + + context.update_program( + context.program.copy( + args=context.program.args + [arg])) v_name = prim.Variable(name) - subscripts = tuple(prim.Variable(???) for i in range(len(shape))) + subscripts = tuple(prim.Variable(f"_{i}") for i in range(len(shape))) - return Array(context, expression=v_name[subscripts]) + return Array(context, shape, dtype, expression=v_name[subscripts]) def zeros(context, shape, dtype): pass - # vim: foldmethod=marker diff --git a/test/test_array.py b/test/test_array.py index 98de63e..bb39276 100644 --- a/test/test_array.py +++ b/test/test_array.py @@ -22,6 +22,8 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. """ +import arrayzy as az + import numpy as np import numpy.linalg as la import sys @@ -36,6 +38,36 @@ from pyopencl.tools import ( # noqa pytest_generate_tests_for_pyopencl as pytest_generate_tests) +def test_symbolic_array(ctx_getter): + cl_ctx = ctx_getter() + queue = cl.CommandQueue(cl_ctx) + ctx = az.make_context(queue) + x = az.make_sym(ctx, "x", shape="n", dtype=np.float64) + x_in = cl.array.to_device(queue, np.array([1., 2., 3., 4., 5.])) + assert (x.eval(x=x_in).get() == x_in.get()).all() + + +def test_transpose(ctx_getter): + cl_ctx = ctx_getter() + queue = cl.CommandQueue(cl_ctx) + ctx = az.make_context(queue) + x = az.make_sym(ctx, "x", shape="n,m", dtype=np.float64) + x_in = cl.array.to_device( + queue, + np.array([[1., 2., 3., 4., 5.], [6., 7., 8., 9., 10.]])) + assert (x.T.eval(x=x_in).get() == x_in.get().T).all() + + +def test_scalar_multiply(ctx_getter): + cl_ctx = ctx_getter() + queue = cl.CommandQueue(cl_ctx) + ctx = az.make_context(queue) + x = az.make_sym(ctx, "x", shape="n", dtype=np.float64) + x_in = np.array([1., 2., 3., 4., 5.]) + x_in = cl.array.to_device(queue, np.array([1., 2., 3., 4., 5.])) + assert ((2*x).eval(x=x_in).get() == (2*x_in).get()).all() + + if __name__ == "__main__": # make sure that import failures get reported, instead of skipping the # tests. -- GitLab From 53ad7902f64811f552f74320ea23a02c36286b4a Mon Sep 17 00:00:00 2001 From: Matt Wala Date: Fri, 10 Apr 2020 16:24:24 -0500 Subject: [PATCH 03/46] Add a basic CI configuration --- .gitlab-ci.yml | 36 ++++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) create mode 100644 .gitlab-ci.yml diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml new file mode 100644 index 0000000..b8bbe44 --- /dev/null +++ b/.gitlab-ci.yml @@ -0,0 +1,36 @@ +Python 3 POCL: + script: + - export PY_EXE=python3 + - export PYOPENCL_TEST=portable + - export EXTRA_INSTALL="pybind11 numpy mako" + - export LOOPY_NO_CACHE=1 + - curl -L -O -k https://gitlab.tiker.net/inducer/ci-support/raw/master/build-and-test-py-project.sh + - ". ./build-and-test-py-project.sh" + tags: + - python3 + - pocl + except: + - tags + artifacts: + reports: + junit: test/pytest.xml + +Pylint: + script: + - export PY_EXE=python3 + - EXTRA_INSTALL="pybind11 numpy mako" + - curl -L -O -k https://gitlab.tiker.net/inducer/ci-support/raw/master/prepare-and-run-pylint.sh + - ". ./prepare-and-run-pylint.sh arrayzy test/test_*.py" + tags: + - python3 + except: + - tags + +Flake8: + script: + - curl -L -O -k https://gitlab.tiker.net/inducer/ci-support/raw/master/prepare-and-run-flake8.sh + - ". ./prepare-and-run-flake8.sh arrayzy test" + tags: + - python3 + except: + - tags -- GitLab From 13043b741ae0fcc4be232c2e89be39ecc7415e09 Mon Sep 17 00:00:00 2001 From: Matt Wala Date: Fri, 10 Apr 2020 16:30:05 -0500 Subject: [PATCH 04/46] Add requirements.txt with pyopencl --- requirements.txt | 1 + 1 file changed, 1 insertion(+) create mode 100644 requirements.txt diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..7819c26 --- /dev/null +++ b/requirements.txt @@ -0,0 +1 @@ +git+https://github.com/inducer/pyopencl.git -- GitLab From 76b0a756988abf6b910b4ec83d5599e01e99d948 Mon Sep 17 00:00:00 2001 From: Matt Wala Date: Fri, 10 Apr 2020 16:45:03 -0500 Subject: [PATCH 05/46] Use loopy's type inference to get the type of a new array --- arrayzy/array.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/arrayzy/array.py b/arrayzy/array.py index c52c82e..f3a6d0a 100644 --- a/arrayzy/array.py +++ b/arrayzy/array.py @@ -27,7 +27,6 @@ import numpy as np import pymbolic import pymbolic.primitives as prim from pytools import memoize_method -from pyopencl.compyte.array import get_common_dtype class Context: @@ -62,6 +61,12 @@ class Context: def program(self): return self._program + def _infer_type(self, expr): + """Infer the type of an expression in the kernel being built.""" + from loopy.type_inference import TypeInferenceMapper + mapper = TypeInferenceMapper(self._program) + return mapper(expr) + @property def parameters(self): pass @@ -182,11 +187,12 @@ class Array: def __mul__(self, other): if not np.isscalar(other): raise TypeError("only scalar multiplication supported") + new_expr = other * self.expression return Array( self.context, self.shape, - get_common_dtype(self, other, allow_double=True), - other * self.expression) + self.context._infer_type(new_expr), + new_expr) __rmul__ = __mul__ -- GitLab From 82b44deeee40c5c117276a21137e319aca59d6c4 Mon Sep 17 00:00:00 2001 From: Matt Wala Date: Fri, 10 Apr 2020 16:54:04 -0500 Subject: [PATCH 06/46] Fix, test transpose for arbitrary dims --- arrayzy/array.py | 11 ++--------- test/test_array.py | 8 +++++--- 2 files changed, 7 insertions(+), 12 deletions(-) diff --git a/arrayzy/array.py b/arrayzy/array.py index f3a6d0a..11274a8 100644 --- a/arrayzy/array.py +++ b/arrayzy/array.py @@ -168,19 +168,12 @@ class Array: @property def T(self): - def swap_last_two_dims(arr): - if len(arr) < 2: - return arr - arr_copy = list(arr) - arr_copy[-2], arr_copy[-1] = arr_copy[-1], arr_copy[-2] - return type(arr)(arr_copy) - index_map = dict( zip(self._dim_names, - map(prim.Variable, swap_last_two_dims(self._dim_names)))) + map(prim.Variable, reversed(self._dim_names)))) expression = pymbolic.substitute(self.expression, index_map) - shape = swap_last_two_dims(self.shape) + shape = tuple(reversed(self.shape)) return Array(self.context, shape, self.dtype, expression) diff --git a/test/test_array.py b/test/test_array.py index bb39276..6a483bb 100644 --- a/test/test_array.py +++ b/test/test_array.py @@ -47,14 +47,16 @@ def test_symbolic_array(ctx_getter): assert (x.eval(x=x_in).get() == x_in.get()).all() -def test_transpose(ctx_getter): +@pytest.mark.parametrize("dim", (1, 2, 3)) +def test_transpose(ctx_getter, dim): cl_ctx = ctx_getter() queue = cl.CommandQueue(cl_ctx) ctx = az.make_context(queue) - x = az.make_sym(ctx, "x", shape="n,m", dtype=np.float64) + shape = ("l", "m", "n")[-dim:] + x = az.make_sym(ctx, "x", shape=shape, dtype=np.float64) x_in = cl.array.to_device( queue, - np.array([[1., 2., 3., 4., 5.], [6., 7., 8., 9., 10.]])) + np.arange(24, dtype=float).reshape((2, 3, -1)[-dim:])) assert (x.T.eval(x=x_in).get() == x_in.get().T).all() -- GitLab From 7ffe02ffaaea273c26bec49158053f200344807e Mon Sep 17 00:00:00 2001 From: Matt Wala Date: Fri, 10 Apr 2020 16:56:03 -0500 Subject: [PATCH 07/46] flake8 fixes --- arrayzy/__init__.py | 2 +- arrayzy/array.py | 6 ++++-- test/test_array.py | 9 ++++----- 3 files changed, 9 insertions(+), 8 deletions(-) diff --git a/arrayzy/__init__.py b/arrayzy/__init__.py index b7028a0..9d6dae8 100644 --- a/arrayzy/__init__.py +++ b/arrayzy/__init__.py @@ -1 +1 @@ -from arrayzy.array import Array, Context, make_context, make_sym +from arrayzy.array import Array, Context, make_context, make_sym # noqa diff --git a/arrayzy/array.py b/arrayzy/array.py index 11274a8..4dd2985 100644 --- a/arrayzy/array.py +++ b/arrayzy/array.py @@ -167,7 +167,7 @@ class Array: pass @property - def T(self): + def T(self): # noqa index_map = dict( zip(self._dim_names, map(prim.Variable, reversed(self._dim_names)))) @@ -230,7 +230,9 @@ class Array: # Build output instruction. from loopy.kernel.instruction import make_assignment out_insn = make_assignment( - (prim.Variable("_out")[tuple(map(prim.Variable, out_inames))],), + ( + prim.Variable("_out")[ + tuple(map(prim.Variable, out_inames))],), out_expr, id="_out", within_inames=frozenset(out_inames)) diff --git a/test/test_array.py b/test/test_array.py index 6a483bb..14090f4 100644 --- a/test/test_array.py +++ b/test/test_array.py @@ -25,15 +25,14 @@ THE SOFTWARE. import arrayzy as az import numpy as np -import numpy.linalg as la import sys -import pytest +import pytest # noqa import pyopencl as cl -import pyopencl.array as cl_array -import pyopencl.cltypes as cltypes -import pyopencl.tools as cl_tools +import pyopencl.array as cl_array # noqa +import pyopencl.cltypes as cltypes # noqa +import pyopencl.tools as cl_tools # noqa from pyopencl.tools import ( # noqa pytest_generate_tests_for_pyopencl as pytest_generate_tests) -- GitLab From 9ab172a95496ae77d3a57f39def303d5fd37170f Mon Sep 17 00:00:00 2001 From: Matt Wala Date: Fri, 24 Apr 2020 12:11:33 -0500 Subject: [PATCH 08/46] Add initial advection demo --- experiments/advection.py | 474 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 474 insertions(+) create mode 100644 experiments/advection.py diff --git a/experiments/advection.py b/experiments/advection.py new file mode 100644 index 0000000..f0dc376 --- /dev/null +++ b/experiments/advection.py @@ -0,0 +1,474 @@ +import contextlib +import numpy as np +import numpy.linalg as la +import numpy.polynomial.legendre as leg +import pytest + + +__doc__ = """ +Notation convention for operator shapes +======================================= + +* m - number of elements in the discretization +* n - number of volume degrees of freedom per element +""" + +import functools + + +memoized = functools.lru_cache(maxsize=None) + + +def ortholegvander(x, deg): + """See numpy.polynomial.legendre.legvander(). Uses an orthonormal basis.""" + result = leg.legvander(x, deg) + factors = np.array([np.sqrt((2*n+1)/2) for n in range(0, 1 + deg)]) + return result * factors + + +def ortholegder(c): + """See numpy.polynomial.legendre.legder(). Uses an orthonormal basis.""" + fw_factors = np.array([np.sqrt((2*n+1)/2) for n in range(len(c))]) + derivs = leg.legder(c * fw_factors) + return derivs / fw_factors[:len(derivs)] + + +def ortholegval(x, c): + """See numpy.polynomial.legendre.legval(). Uses an orthonormal basis.""" + factors = np.array([np.sqrt((2*n+1)/2) for n in range(len(c))]) + return leg.legval(x, c * factors) + + +class DGDiscr1D(object): + """A one-dimensional Discontinuous Galerkin discretization.""" + + def __init__(self, left, right, nelements, nnodes): + """ + Inputs: + left - left endpoint + right - right endpoint + nelements - number of discretization panels + nnodes - number of degrees of freedom per panel + """ + self.left = left + self.right = right + self.nelements = nelements + self.nnodes = nnodes + + @property + @memoized + def ref_nodes(self): + """Return reference nodes for a single element. + + Signature: ->(n,) + """ + nodes, _ = leg.leggauss(self.nnodes) + return nodes + + @property + @memoized + def ref_weights(self): + """Return reference quadrature weights for a single element. + + Signature: ->(n,) + """ + _, weights = leg.leggauss(self.nnodes) + return weights + + def zeros(self): + """Return a zero solution. + + Signature: ->(n*m,) + """ + return np.zeros(self.nnodes * self.nelements) + + @property + def h(self): + """Return the element size. + + Signature: ->() + """ + return self.elements[0,1] - self.elements[0,0] + + def nodes(self): + """Return the vector of node coordinates. + + Signature: ->(n*m,) + """ + centers = (self.elements[:,0] + self.elements[:,1]) / 2 + radii = (self.elements[:,1] - self.elements[:,0]) / 2 + return ((self.ref_nodes[:,np.newaxis] * radii) + centers).T.ravel() + + @property + @memoized + def vdm(self): + """Return the elementwise Vandermonde (modal-to-nodal) matrix. + + Signature: ->(n, n) + """ + return ortholegvander(self.ref_nodes, self.nnodes - 1) + + @property + @memoized + def _ref_mass(self): + """Return the (volume) mass matrix for the reference element. + + Signature: ->(n, n) + """ + return la.inv(self.vdm @ self.vdm.T) + + @property + @memoized + def mass(self): + """Return the elementwise volume mass matrix. + + Signature: ->(n, n) + """ + h = (self.right - self.left) / self.nelements + return (h/2) * self._ref_mass + + @property + @memoized + def inv_mass(self): + """Return the inverse of the elementwise volume mass matrix. + + Signature: ->(n, n) + """ + return la.inv(self.mass) + + @property + @memoized + def face_mass(self): + """Return the face mass matrix. + + The face mass matrix combines the effects of applying the face mass + operator on each face and interpolating the output to the volume nodes. + + Signature: ->(n, 2) + """ + return self.interp.T + + @property + @memoized + def diff(self): + """Return the elementwise differentiation matrix. + + Signature: ->(n, n) + """ + VrT = [] + for row in np.eye(self.nnodes): + deriv = ortholegder(row) + VrT.append(ortholegval(self.ref_nodes, deriv)) + Vr = np.vstack(VrT).T + return Vr @ la.inv(self.vdm) + + @property + @memoized + def stiffness(self): + """Return the stiffness matrix. + + Signature: ->(n, n) + """ + return (self._ref_mass @ self.diff) + + @property + @memoized + def interp(self): + """Return the volume-to-face interpolation matrix. + + Signature: ->(2, n) + """ + return ortholegvander([-1, 1], self.nnodes - 1) @ la.inv(self.vdm) + + @property + @memoized + def elements(self): + """Return the list of elements, each given by their left/right boundaries. + + Signature: ->(m, 2) + """ + h = (self.right - self.left) / self.nelements + return np.array(list(zip( + np.linspace(self.left, self.right, self.nelements, endpoint=False), + np.linspace(h + self.left, self.right, self.nelements)))) + + @property + def dg_ops(self): + """Return a context manager yielding a DGOps1D instance. + """ + return contextlib.contextmanager(lambda: (yield DGOps1DRef(self))) + + @property + def normals(self): + """Return the face unit normals. + + Signature: ->(m, 2) + """ + result = np.zeros((self.nelements, 2)) + result[:,0] = -1 + result[:,1] = 1 + return result + + +def interpolate(discr, vec, nodes): + """Return an interpolated solution at *nodes*. + + Input: + discr - a DGDiscr1D instance + vec - vector of nodal values at degrees of freedom + nodes - vector of nodes to interpolate to + + Signature: (m*n,) -> (len(nodes),) + """ + elements = discr.elements + nelements = discr.nelements + nnodes = discr.nnodes + inv_vdm = la.inv(discr.vdm) + + sorter = np.argsort(nodes) + sorted_nodes = nodes[sorter] + result = [] + + indices = np.searchsorted(sorted_nodes, elements) + for i, (start, end) in enumerate(indices): + if i == 0: + start = 0 + elif i == nelements - 1: + end = len(nodes) + + center = (elements[i][0] + elements[i][1]) / 2 + radius = (elements[i][1] - elements[i][0]) / 2 + element_nodes = sorted_nodes[start:end] + mapped_nodes = (element_nodes - center) / radius + + modal_vals = inv_vdm @ vec[i * nnodes:(i + 1) * nnodes] + nodal_vals = ortholegvander(mapped_nodes, nnodes - 1) @ modal_vals + result.append(nodal_vals) + + result = np.hstack(result) + unsorter = np.arange(len(nodes))[sorter] + return result[unsorter] + + +def integrate(discr, soln): + """Return the integral of the solution. + + Signature: (n*m,) -> () + """ + soln = soln.reshape((discr.nelements, discr.nnodes)) + h = discr.elements[0][1] - discr.elements[0][0] + weights = discr.ref_weights * h / 2 + return np.sum(soln * weights) + + +def elementwise(mat, vec): + """Apply a matrix to rows of the input representing per-element + degrees of freedom. + + Inputs: + mat: Shape (a, b) + vec: Shape (c, b) + + Signature: (a, b), (c, b) -> (c, a) + """ + return np.einsum("ij,kj->ki", mat, vec) + + +class AbstractDGOps1D(object): + + def __init__(self, discr): + self.discr = discr + + def interp(self, vec): + """Apply elementwise volume-to-face interpolation. + + Signature: (m, n) -> (m, 2) + """ + raise NotImplementedError + + def inv_mass(self, vec): + """Apply the elementwise inverse mass matrix. + + Signature: (m, n) -> (m, n) + """ + raise NotImplementedError + + def stiffness(self, vec): + """Apply the elementwise stiffness matrix. + + Signature: (m, n) -> (m, n) + """ + raise NotImplementedError + + def face_mass(self, vec): + """Apply the elementwise face mass matrix. + + Signature: (m, 2) -> (m, n) + """ + raise NotImplementedError + + def face_swap(self, vec): + """Swap values at opposite faces. + + Signature: (m, 2) -> (m, 2) + """ + raise NotImplementedError + + +def elementwise(mat, vec): + """Apply a matrix to rows of the input representing per-element + degrees of freedom. + + Inputs: + mat: Shape (a, b) + vec: Shape (c, b) + + Signature: (a, b), (c, b) -> (c, a) + """ + return np.einsum("ij,kj->ki", mat, vec) + + +class DGOps1DRef(AbstractDGOps1D): + """A reference NumPy implementation of the AbstractDGOps1D interface.""" + + def interp(self, vec): + return elementwise(self.discr.interp, vec) + + def inv_mass(self, vec): + return elementwise(self.discr.inv_mass, vec) + + def stiffness(self, vec): + return elementwise(self.discr.stiffness, vec) + + def face_mass(self, vec): + return elementwise(self.discr.face_mass, vec) + + def face_swap(self, vec): + result = np.zeros_like(vec) + result[:,0] = np.roll(vec[:,1], +1) + result[:,1] = np.roll(vec[:,0], -1) + return result + + +class DGOps1D(AbstractDGOps1D): + pass + + +class AdvectionOperator(object): + """A class representing a DG advection operator.""" + + def __init__(self, discr, c, flux_type): + """ + Inputs: + discr: an instance of DGDiscr1D + c: advection speed parameter + flux_type: "upwind" or "central" + """ + self.discr = discr + self.c = c + assert flux_type in ("upwind", "central") + self.flux_type = flux_type + + def weak_flux(self, dg, vec): + """Apply the flux, weak form. + + Inputs: + dg: a DGOps1D instance + vec: vector of nodal values at the faces + + Signature: (m, 2) -> (m, 2) + """ + if self.flux_type == "central": + flux = (vec + dg.face_swap(vec)) / 2 + + elif self.flux_type == "upwind": + swp = dg.face_swap(vec) + if self.c >= 0: + flux = np.stack((vec[:,0], swp[:,1]), axis=1) + else: + flux = np.stack((swp[:,0], vec[:,1]), axis=1) + + flux *= self.c + flux *= self.discr.normals + + return flux + + def strong_flux(self, dg, vec): + """Apply the flux, strong form. + + Inputs: + dg: a DGOps1D instance + vec: vector of nodal values at the faces + + Signature: (m, 2) -> (m, 2) + """ + return self.c * self.discr.normals * vec - self.weak_flux(dg, vec) + + def apply(self, vec): + """Main operator implementation. + + Signature: (m, n) -> (m, n) + """ + with self.discr.dg_ops() as dg: + pt1 = dg.face_mass(self.strong_flux(dg, dg.interp(vec))) + pt2 = self.c * dg.stiffness(vec) + return -dg.inv_mass(pt1 - pt2) + + def __call__(self, vec): + """Apply the DG advection operator to the vector of degrees of freedom. + + Signature: (m*n,) -> (m*n,) + """ + vec = vec.reshape((self.discr.nelements, self.discr.nnodes)) + return self.apply(vec).reshape((-1,)) + + +def rk4(rhs, initial, t_initial, t_final, dt): + """RK4 integrator. + + Inputs: + - rhs: a callable that takes arguments (t, y) + - initial: initial value + - t_initial: initial time + - t_final: final time + - dt: step size + + Returns: + The solution computed at the final time. + """ + t = t_initial + sol = initial + + while t < t_final: + dt = min(dt, t_final - t) + s0 = rhs(t, sol) + s1 = rhs(t + dt/2, sol + dt/2 * s0) + s2 = rhs(t + dt/2, sol + dt/2 * s1) + s3 = rhs(t + dt, sol + dt * s2) + sol = sol + dt / 6 * (s0 + 2 * s1 + 2 * s2 + s3) + t += dt + + return sol + + +def test_rk4(): + assert np.isclose(rk4(lambda t, y: -y, 1, 0, 5, 0.01), np.exp(-5)) + + +@pytest.mark.parametrize("order", (3, 4, 5)) +@pytest.mark.parametrize("flux_type", ("central", "upwind")) +def test_advection_convergence(order, flux_type): + errors = [] + hs = [] + + for nelements in (8, 12, 16, 20): + discr = DGDiscr1D(0, 2*np.pi, nelements=nelements, nnodes=order) + u_initial = np.sin(discr.nodes()) + op = AdvectionOperator(discr, c=1, flux_type=flux_type) + u = rk4(lambda t, y: op(y), u_initial, t_initial=0, t_final=np.pi, dt=0.01) + u_ref = -u_initial + hs.append(discr.h) + errors.append(integrate(discr, (u - u_ref)**2)**0.5) + + eoc, _ = np.polyfit(np.log(hs), np.log(errors), 1) + assert eoc >= order - 0.1, eoc -- GitLab From 3a543c9a24dc7caf1a80043fb28c5cae37dfd0c7 Mon Sep 17 00:00:00 2001 From: Matt Wala Date: Sat, 25 Apr 2020 20:36:13 -0500 Subject: [PATCH 09/46] Work on refactoring --- arrayzy/__init__.py | 2 +- arrayzy/array.py | 267 ++++++++++---------------------------------- arrayzy/code.py | 203 +++++++++++++++++++++++++++++++++ test/test_array.py | 39 ++++--- 4 files changed, 292 insertions(+), 219 deletions(-) create mode 100644 arrayzy/code.py diff --git a/arrayzy/__init__.py b/arrayzy/__init__.py index 9d6dae8..af107be 100644 --- a/arrayzy/__init__.py +++ b/arrayzy/__init__.py @@ -1 +1 @@ -from arrayzy.array import Array, Context, make_context, make_sym # noqa +from arrayzy.code import Code, CodeBuilder diff --git a/arrayzy/array.py b/arrayzy/array.py index 4dd2985..adea782 100644 --- a/arrayzy/array.py +++ b/arrayzy/array.py @@ -29,108 +29,12 @@ import pymbolic.primitives as prim from pytools import memoize_method -class Context: - """ - .. attribute:: program - - A :mod:`loopy.LoopKernel` that defines names used in expressions - in :class:`Array` instances attached to this context. Names - defined once will never change their meaning, for the lifetime - of this object. - - .. attribute:: bindings - - .. attribute:: target - - A :class:`Target` for code generation and execution. - - .. note:: - - *program* may not define any names starting with underscores. - """ - - def __init__(self, queue, program, bindings, target): - self.queue = queue - self._program = program - self.bindings = bindings - self.target = target - - # The 'program' attribute is only supposed to be modified via - # update_program. - @property - def program(self): - return self._program - - def _infer_type(self, expr): - """Infer the type of an expression in the kernel being built.""" - from loopy.type_inference import TypeInferenceMapper - mapper = TypeInferenceMapper(self._program) - return mapper(expr) - - @property - def parameters(self): - pass - - def update_program(self, program): - self._program = program - - def get_parameter(self, name): - if name not in self.program.all_variable_names(): - self.update_program( - self.program.copy( - args=self.program.args + [ - lp.ValueArg(name, dtype=self.program.index_dtype) - ])) - - """ - if name not in self.program.all_params(): - # FIXME: ... data dependent control flow? - raise ValueError( - f"'{name}' is not a domain parameter " - "in this context") - """ - - return prim.Variable(name) - - -class Target: - pass - - -class PyOpenCLTarget: - def __init__(self, queue): - self.queue = queue - - def get_loopy_target(self): - import loopy as lp - return lp.PyOpenCLTarget(self.queue.device) - - -def make_context(arg): - import sys - - target = None - - # avoid expensive/failing import - if "pyopencl" in sys.modules: - import pyopencl as cl - if isinstance(arg, cl.CommandQueue): - target = PyOpenCLTarget(arg) - - if target is None: - raise ValueError(f"invalid argument type: {type(arg).__name__}") - - import loopy as lp - program = lp.make_kernel("{:}", [], target=target.get_loopy_target()) - - return Context(arg, program, {}, target) - - class Array: - """ - .. attribute:: context + """A multidimensional array. + + .. attribute:: code - The :class:`Context` with which this array is associated, to + The :class:`Code` with which this array is associated, to define the meanings of names used in expressions in this object. .. attribute:: dtype @@ -138,140 +42,93 @@ class Array: An instance of :class:`loopy.types.LoopyType` or *None* to indicate that the type of the array is not yet known. - .. attribute:: is_materialized + .. attribute:: shape - Whether this array is backed by actual storage. + .. attribute:: ndim - .. attribute:: expression - """ - - def __init__(self, context, shape, dtype, expression): - self.context = context - self.shape = shape - self.dtype = dtype - self.expression = expression + .. attribute:: T - def with_(self, **kwargs): - pass + .. automethod:: to_loopy_expression - def eval(self, **kwargs): - _, (out,) = self._knl()(self.context.queue, **kwargs) - return out - - @property - def _dim_names(self): - return tuple(f"_{i}" for i in range(len(self.shape))) + .. automethod:: get_dependencies + """ - def __getitem__(self, indices): - # TODO - pass + def __init__(self, code, shape, dtype): + self.code = code + self._dtype = dtype + self.shape = shape @property - def T(self): # noqa - index_map = dict( - zip(self._dim_names, - map(prim.Variable, reversed(self._dim_names)))) - - expression = pymbolic.substitute(self.expression, index_map) - shape = tuple(reversed(self.shape)) + def ndim(self): + return len(self.shape) - return Array(self.context, shape, self.dtype, expression) + def _infer_type(self, expr): + from loopy.type_inference import TypeInferenceMapper + mapper = TypeInferenceMapper(self.code.program) + return mapper(expr) def __mul__(self, other): if not np.isscalar(other): raise TypeError("only scalar multiplication supported") - new_expr = other * self.expression - return Array( - self.context, + + new_expr = other * self.to_loopy_expression(self.dims) + return ArrayExpression( + self.code, self.shape, - self.context._infer_type(new_expr), + self._infer_type(new_expr), new_expr) __rmul__ = __mul__ - + @property - def ndim(self): - return len(self.shape) - - @memoize_method - def _knl(self): - knl = self.context.program - - out = lp.GlobalArg("_out", shape=self.shape, dtype=self.dtype, - order="C") - - # FIXME: Won't work for scalars. - out_inames = [f"_out_{i}" for i in range(self.ndim)] - - out_expr = pymbolic.substitute( - self.expression, - dict(zip(self._dim_names, map(prim.Variable, out_inames)))) + def dtype(self): + # TODO: The dtype could get out of sync with the kernel if the kernel is + # updated. Should the dtype be updated as well? + return self._dtype - # Build output domain. - params = [] - from loopy.symbolic import get_dependencies - for sdep in map(get_dependencies, self.shape): - for dep in sdep: - params.append(self.context.get_parameter(dep).name) - - dom = isl.BasicSet.universe( - isl.Space.create_from_names( - isl.DEFAULT_CONTEXT, - set=out_inames, - params=params)) - - from loopy.symbolic import aff_from_expr - affs = isl.affs_from_space(dom.space) - for iname, expr in zip(out_inames, self.shape): - dom &= affs[0].le_set(affs[iname]) - dom &= affs[iname].lt_set(aff_from_expr(dom.space, expr)) - dom, = dom.get_basic_sets() - - # Build output instruction. - from loopy.kernel.instruction import make_assignment - out_insn = make_assignment( - ( - prim.Variable("_out")[ - tuple(map(prim.Variable, out_inames))],), - out_expr, - id="_out", - within_inames=frozenset(out_inames)) + @property + def dims(self): + return tuple(map(prim.Variable, self.dim_names)) - return knl.copy( - domains=knl.domains + [dom], - instructions=knl.instructions + [out_insn], - args=knl.args + [out]) + @property + def dim_names(self): + return tuple(f"_{i}" for i in range(len(self.shape))) - def store(self, prefix="tmp"): - """Stores the array in a temporary.""" - pass + @property + def T(self): + new_shape = tuple(reversed(self.shape)) + new_expr = self.to_loopy_expression(list(reversed(self.dims))) + return ArrayExpression(self.code, new_shape, self.dtype, new_expr) + def to_loopy_expression(self, dims): + raise NotImplementedError -def make_sym(context, name, shape, dtype=None, order="C"): - if name in context.program.all_variable_names(): - raise ValueError(f"name '{name}' already in use in context") - arg = lp.GlobalArg(name, shape=shape, dtype=dtype, order=order) +class ArrayExpression(Array): + """ + .. attribute:: expression + """ - shape = arg.shape - dtype = arg.dtype + def __init__(self, code, shape, dtype, expression): + super().__init__(code, shape, dtype) + self.expression = expression - from loopy.symbolic import get_dependencies - for sdep in map(get_dependencies, arg.shape): - for dep in sdep: - context.get_parameter(dep) + def to_loopy_expression(self, dims): + assignments = dict(zip(self.dim_names, tuple(dims))) + return pymbolic.substitute(self.expression, assignments) - context.update_program( - context.program.copy( - args=context.program.args + [arg])) - v_name = prim.Variable(name) - subscripts = tuple(prim.Variable(f"_{i}") for i in range(len(shape))) +class ArrayVariable(Array): + """ + .. attribute:: name + """ - return Array(context, shape, dtype, expression=v_name[subscripts]) + def __init__(self, code, shape, dtype, name): + super().__init__(code, shape, dtype) + self.name = name + def to_loopy_expression(self, dims): + return prim.Variable(self.name)[tuple(dims)] -def zeros(context, shape, dtype): - pass # vim: foldmethod=marker diff --git a/arrayzy/code.py b/arrayzy/code.py new file mode 100644 index 0000000..2e22011 --- /dev/null +++ b/arrayzy/code.py @@ -0,0 +1,203 @@ +import loopy as lp +import islpy as isl +import pymbolic.primitives as prim + +from arrayzy.array import ArrayVariable, ArrayExpression + + +dim_type = isl.dim_type + + + +class Target: + pass + + +class PyOpenCLTarget: + + def __init__(self, queue): + self.queue = queue + + def get_loopy_target(self): + import loopy as lp + return lp.PyOpenCLTarget(self.queue.device) + + +class Code: + """A representation of a program that carries out an array-valued + computation. + + .. attribute:: program + + A :mod:`loopy.LoopKernel` that defines names used in expressions + in :class:`Array` instances attached to this context. Names + defined once will never change their meaning, for the lifetime + of this object. + + .. attribute:: bindings + + A mapping from argument names to :class:`Array` values. + + .. attribute:: target + + A :class:`Target` for code generation and execution. + + .. note:: + + *program* may not define any names starting with underscores. + + """ + + def __init__(self, queue, program, bindings, target): + self.queue = queue + self._program = program + self.bindings = bindings + self.target = target + + # The 'program' attribute is only supposed to be modified via + # update_program. + @property + def program(self): + return self._program + + def update_program(self, program): + self._program = program + + def __call__(self, *args, **kwargs): + return self.program(self.queue, *args, **kwargs) + + def eval_one(self, *args, **kwargs): + _, (result,) = self(*args, **kwargs) + return result + + +class CodeBuilder: + + def __init__(self, arg): + import sys + + target = None + + # avoid expensive/failing import + if "pyopencl" in sys.modules: + import pyopencl as cl + if isinstance(arg, cl.CommandQueue): + target = PyOpenCLTarget(arg) + + if target is None: + raise ValueError(f"invalid argument type: {type(arg).__name__}") + + import loopy as lp + program = lp.make_kernel("{:}", [], target=target.get_loopy_target()) + self.code = Code(arg, program, {}, target) + + self.var_name_gen = self.program.get_var_name_generator() + self.insn_id_gen = self.program.get_instruction_id_generator() + + def __enter__(self): + return self + + def __exit__(self, exc_type, exc_value, traceback): + pass + + def build(self): + return self.code + + @property + def program(self): + return self.code.program + + @property + def update_program(self): + return self.code.update_program + + def _get_or_create_parameter(self, name): + if name in self.program.all_variable_names(): + if name not in self.program.all_params(): + # FIXME: May have been added, but not added to a domain. + # + # FIXME: ... data dependent control flow? + # raise ValueError( + # f"'{name}' is not a domain parameter " + # "in this context") + pass + else: + self.update_program( + self.program.copy( + args=self.program.args + [ + lp.ValueArg(name, dtype=self.program.index_dtype) + ])) + + return prim.Variable(name) + + def _get_dependencies(self, expr): + pass + + def argument(self, name, shape, dtype, order="C"): + if name in self.program.all_variable_names(): + raise ValueError(f"name '{name}' already in use in code") + + arg = lp.GlobalArg(name, shape=shape, dtype=dtype, order=order) + + shape = arg.shape + dtype = arg.dtype + + # Insert parameters from shape description. + from loopy.symbolic import get_dependencies + for sdep in map(get_dependencies, arg.shape): + for dep in sdep: + self._get_or_create_parameter(dep) + + # Add argument to program. + self.code.update_program( + self.program.copy(args=self.program.args + [arg])) + + return ArrayVariable(self.code, shape, dtype, name) + + def matmul(self, lhs, rhs): + pass + + def output(self, expr): + knl = self.program + name = self.var_name_gen("_out") + out = lp.GlobalArg(name, shape=expr.shape, dtype=expr.dtype, order="C") + out_inames = [self.var_name_gen(f"{name}_dim{d}") + for d in range(expr.ndim)] + out_expr = expr.to_loopy_expression(map(prim.Variable, out_inames)) + + # Build output domain. + param_names = [] + from loopy.symbolic import get_dependencies + for sdep in map(get_dependencies, expr.shape): + for dep in sdep: + param_names.append(self._get_or_create_parameter(dep).name) + + dom = isl.BasicSet.universe( + isl.Space.create_from_names( + isl.DEFAULT_CONTEXT, + set=out_inames, + params=param_names)) + + from loopy.symbolic import aff_from_expr + affs = isl.affs_from_space(dom.space) + for iname, dim in zip(out_inames, expr.shape): + dom &= affs[0].le_set(affs[iname]) + dom &= affs[iname].lt_set(aff_from_expr(dom.space, dim)) + dom, = dom.get_basic_sets() + + # Build output instruction. + from loopy.kernel.instruction import make_assignment + out_insn = make_assignment( + ( + prim.Variable(name)[ + tuple(map(prim.Variable, out_inames))],), + out_expr, + id=self.insn_id_gen(), + within_inames=frozenset(out_inames)) + + # TODO: dependencies + + self.update_program(knl.copy( + domains=knl.domains + [dom], + instructions=knl.instructions + [out_insn], + args=knl.args + [out])) diff --git a/test/test_array.py b/test/test_array.py index 14090f4..f7c263a 100644 --- a/test/test_array.py +++ b/test/test_array.py @@ -40,33 +40,46 @@ from pyopencl.tools import ( # noqa def test_symbolic_array(ctx_getter): cl_ctx = ctx_getter() queue = cl.CommandQueue(cl_ctx) - ctx = az.make_context(queue) - x = az.make_sym(ctx, "x", shape="n", dtype=np.float64) + + with az.CodeBuilder(queue) as cb: + x = cb.argument("x", shape="n", dtype=np.float64) + cb.output(x) + x_in = cl.array.to_device(queue, np.array([1., 2., 3., 4., 5.])) - assert (x.eval(x=x_in).get() == x_in.get()).all() + code = cb.build() + assert (code.eval_one(x=x_in).get() == x_in.get()).all() @pytest.mark.parametrize("dim", (1, 2, 3)) def test_transpose(ctx_getter, dim): cl_ctx = ctx_getter() queue = cl.CommandQueue(cl_ctx) - ctx = az.make_context(queue) - shape = ("l", "m", "n")[-dim:] - x = az.make_sym(ctx, "x", shape=shape, dtype=np.float64) + + with az.CodeBuilder(queue) as cb: + shape = ("l", "m", "n")[-dim:] + x = cb.argument("x", shape=shape, dtype=np.float64) + cb.output(x.T) + + x_in = cl.array.to_device( - queue, - np.arange(24, dtype=float).reshape((2, 3, -1)[-dim:])) - assert (x.T.eval(x=x_in).get() == x_in.get().T).all() + queue, + np.arange(24, dtype=float).reshape((2, 3, -1)[-dim:])) + code = cb.build() + assert (code.eval_one(x=x_in).get() == x_in.get().T).all() def test_scalar_multiply(ctx_getter): cl_ctx = ctx_getter() queue = cl.CommandQueue(cl_ctx) - ctx = az.make_context(queue) - x = az.make_sym(ctx, "x", shape="n", dtype=np.float64) - x_in = np.array([1., 2., 3., 4., 5.]) + + with az.CodeBuilder(queue) as cb: + x = cb.argument("x", shape="n", dtype=np.float64) + cb.output(2 * x) + x_in = cl.array.to_device(queue, np.array([1., 2., 3., 4., 5.])) - assert ((2*x).eval(x=x_in).get() == (2*x_in).get()).all() + code = cb.build() + print(code.program) + assert (code.eval_one(x=x_in).get() == (2*x_in).get()).all() if __name__ == "__main__": -- GitLab From 61b4dfc4cf9c430c0daa5a2f347758cfbf72a355 Mon Sep 17 00:00:00 2001 From: Matt Wala Date: Sat, 25 Apr 2020 21:56:52 -0500 Subject: [PATCH 10/46] Run pytest --- experiments/advection.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/experiments/advection.py b/experiments/advection.py index f0dc376..52d6eff 100644 --- a/experiments/advection.py +++ b/experiments/advection.py @@ -472,3 +472,8 @@ def test_advection_convergence(order, flux_type): eoc, _ = np.polyfit(np.log(hs), np.log(errors), 1) assert eoc >= order - 0.1, eoc + + +if __name__ == "__main__": + from pytest import main + main([__file__]) -- GitLab From 224d47553baf93477364a148a2caeb25d11c6c62 Mon Sep 17 00:00:00 2001 From: Matt Wala Date: Mon, 27 Apr 2020 21:32:32 -0500 Subject: [PATCH 11/46] WIP --- arrayzy/array.py | 148 ++++++++++++++++---- arrayzy/code.py | 296 ++++++++++++++++++++++++++++++--------- arrayzy/utils.py | 66 +++++++++ experiments/advection.py | 136 +++++++++++++++--- test/test_array.py | 94 ------------- test/test_linalg.py | 218 ++++++++++++++++++++++++++++ 6 files changed, 755 insertions(+), 203 deletions(-) create mode 100644 arrayzy/utils.py delete mode 100644 test/test_array.py create mode 100644 test/test_linalg.py diff --git a/arrayzy/array.py b/arrayzy/array.py index adea782..3158061 100644 --- a/arrayzy/array.py +++ b/arrayzy/array.py @@ -20,6 +20,8 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. """ +import operator + import islpy as isl import loopy as lp import numpy as np @@ -28,13 +30,16 @@ import pymbolic import pymbolic.primitives as prim from pytools import memoize_method +from functools import partialmethod +from arrayzy.utils import domain_for_shape + class Array: - """A multidimensional array. + """A representation of an array value in a computation. - .. attribute:: code + .. attribute:: ctx - The :class:`Code` with which this array is associated, to + The :class:`Context` with which this array is associated, to define the meanings of names used in expressions in this object. .. attribute:: dtype @@ -44,17 +49,35 @@ class Array: .. attribute:: shape + .. attribute:: name + + A name for this computation, or *None*. + .. attribute:: ndim - .. attribute:: T + The number of dimensions in :attr:`shape`. + + .. automethod:: T .. automethod:: to_loopy_expression - .. automethod:: get_dependencies + .. automethod:: get_domain + + .. automethod:: __mul__ + .. automethod:: __rmul__ + .. automethod:: __add__ + .. automethod:: __radd__ + .. automethod:: __sub__ + .. automethod:: __rsub__ + .. automethod:: __truediv__ + .. automethod:: __rtruediv__ + .. automethod:: __neg__ + + .. automethod:: __getitem__ """ - def __init__(self, code, shape, dtype): - self.code = code + def __init__(self, ctx, shape, dtype): + self.ctx = ctx self._dtype = dtype self.shape = shape @@ -64,22 +87,90 @@ class Array: def _infer_type(self, expr): from loopy.type_inference import TypeInferenceMapper - mapper = TypeInferenceMapper(self.code.program) + mapper = TypeInferenceMapper(self.ctx.program) return mapper(expr) - def __mul__(self, other): - if not np.isscalar(other): - raise TypeError("only scalar multiplication supported") - - new_expr = other * self.to_loopy_expression(self.dims) + def _unary_op(self, op): + new_expr = op(self.to_loopy_expression(self.dims)) return ArrayExpression( - self.code, + self.ctx, self.shape, - self._infer_type(new_expr), + self.dtype, new_expr) - __rmul__ = __mul__ + def _binary_op(self, op, other, reverse=False): + if np.isscalar(other): + args = (self.to_loopy_expression(self.dims), other) + if reverse: + args = tuple(reversed(args)) + + new_expr = op(*args) + return ArrayExpression( + self.ctx, + self.shape, + self._infer_type(new_expr), + new_expr) + + elif isinstance(other, Array): + args = ( + self.to_loopy_expression(self.dims), + other.to_loopy_expression(self.dims)) + if reverse: + args = tuple(reversed(args)) + + new_expr = op(*args) + return ArrayExpression( + self.ctx, + self.shape, + self._infer_type(new_expr), + new_expr) + + else: + raise ValueError + + __mul__ = partialmethod(_binary_op, operator.mul) + __rmul__ = partialmethod(__mul__, reverse=True) + + __add__ = partialmethod(_binary_op, operator.add) + __radd__ = partialmethod(__add__, reverse=True) + __sub__ = partialmethod(_binary_op, operator.sub) + __rsub__ = partialmethod(__sub__, reverse=True) + + __truediv__ = partialmethod(_binary_op, operator.truediv) + __rtruediv__ = partialmethod(__truediv__, reverse=True) + + __neg__ = partialmethod(_unary_op, operator.neg) + + def __getitem__(self, slice_spec): + from numbers import Integral + + if len(slice_spec) != self.ndim: + raise ValueError("incorrect slice shape") + + dims = [] + shape = [] + + for i, elem in enumerate(slice_spec): + if elem == slice(None, None, None): + d = len(shape) + dims.append(prim.Variable(f"_{d}")) + shape.append(self.shape[i]) + elif isinstance(elem, Integral): + # TODO: should assume that this is in-bounds + dims.append(elem) + else: + raise ValueError("not implemented") + # TODO: Not clear how to implement slices with upper bounds that may + # exceed the symbolic maximum along the dimension. What is the + # shape of the resulting array? + + return ArrayExpression( + self.ctx, + shape=tuple(shape), + dtype=self.dtype, + expression=self.to_loopy_expression(dims)) + @property def dtype(self): # TODO: The dtype could get out of sync with the kernel if the kernel is @@ -94,37 +185,46 @@ class Array: def dim_names(self): return tuple(f"_{i}" for i in range(len(self.shape))) + def get_domain(self, dim_names=None): + """Return the domain that defines the space of indices for this array. + """ + if dim_names is None: + dim_names = self.dim_names + return domain_for_shape(dim_names, self.shape) + @property def T(self): new_shape = tuple(reversed(self.shape)) new_expr = self.to_loopy_expression(list(reversed(self.dims))) - return ArrayExpression(self.code, new_shape, self.dtype, new_expr) + return ArrayExpression(self.ctx, new_shape, self.dtype, new_expr) def to_loopy_expression(self, dims): raise NotImplementedError class ArrayExpression(Array): - """ + """An array-valued expression. + .. attribute:: expression """ - def __init__(self, code, shape, dtype, expression): - super().__init__(code, shape, dtype) + def __init__(self, ctx, shape, dtype, expression): + super().__init__(ctx, shape, dtype) self.expression = expression - + def to_loopy_expression(self, dims): assignments = dict(zip(self.dim_names, tuple(dims))) return pymbolic.substitute(self.expression, assignments) class ArrayVariable(Array): - """ + """An array-valued variable. + .. attribute:: name """ - def __init__(self, code, shape, dtype, name): - super().__init__(code, shape, dtype) + def __init__(self, ctx, shape, dtype, name): + super().__init__(ctx, shape, dtype) self.name = name def to_loopy_expression(self, dims): diff --git a/arrayzy/code.py b/arrayzy/code.py index 2e22011..b7cb9d8 100644 --- a/arrayzy/code.py +++ b/arrayzy/code.py @@ -1,12 +1,11 @@ import loopy as lp +import loopy.symbolic as sym + import islpy as isl import pymbolic.primitives as prim from arrayzy.array import ArrayVariable, ArrayExpression - - -dim_type = isl.dim_type - +from arrayzy.utils import domain_for_shape class Target: @@ -24,19 +23,19 @@ class PyOpenCLTarget: class Code: - """A representation of a program that carries out an array-valued - computation. + """A representation of a program carrying out an array-valued computation. .. attribute:: program - A :mod:`loopy.LoopKernel` that defines names used in expressions - in :class:`Array` instances attached to this context. Names - defined once will never change their meaning, for the lifetime - of this object. + A :mod:`loopy.LoopKernel` that defines names used in expressions in + :class:`Array` instances attached to this code. Names defined once will + never change their meaning, for the lifetime of this object. .. attribute:: bindings - A mapping from argument names to :class:`Array` values. + A mapping from argument names to bound values. + + .. attribute:: bound_arguments .. attribute:: target @@ -45,7 +44,6 @@ class Code: .. note:: *program* may not define any names starting with underscores. - """ def __init__(self, queue, program, bindings, target): @@ -53,6 +51,7 @@ class Code: self._program = program self.bindings = bindings self.target = target + self.bound_arguments = {} # The 'program' attribute is only supposed to be modified via # update_program. @@ -63,15 +62,21 @@ class Code: def update_program(self, program): self._program = program - def __call__(self, *args, **kwargs): - return self.program(self.queue, *args, **kwargs) - def eval_one(self, *args, **kwargs): _, (result,) = self(*args, **kwargs) return result + def __call__(self, *args, **kwargs): + kwargs.update(self.bound_arguments) + return self.program(self.queue, *args, **kwargs) + class CodeBuilder: + """An interface for building up array computations. + + .. automethod:: __enter__ + .. automethod:: __exit__ + """ def __init__(self, arg): import sys @@ -88,11 +93,14 @@ class CodeBuilder: raise ValueError(f"invalid argument type: {type(arg).__name__}") import loopy as lp - program = lp.make_kernel("{:}", [], target=target.get_loopy_target()) + program = lp.make_kernel( + "{:}", [], target=target.get_loopy_target(), + lang_version=lp.MOST_RECENT_LANGUAGE_VERSION) self.code = Code(arg, program, {}, target) - + self.var_name_gen = self.program.get_var_name_generator() self.insn_id_gen = self.program.get_instruction_id_generator() + self._last_insn_id = None def __enter__(self): return self @@ -101,6 +109,8 @@ class CodeBuilder: pass def build(self): + """Return the generated :class:`Code`. + """ return self.code @property @@ -110,30 +120,85 @@ class CodeBuilder: @property def update_program(self): return self.code.update_program - + + # {{{ + + def _unify_types(self, *args): + if not all(args[0] == arg for arg in args[1:]): + raise NotImplementedError + return args[0] + def _get_or_create_parameter(self, name): + # Create a parameter variable for *name* if not present. + # Return the parameter as a pymbolic variable. + if name in self.program.all_variable_names(): if name not in self.program.all_params(): # FIXME: May have been added, but not added to a domain. - # - # FIXME: ... data dependent control flow? - # raise ValueError( - # f"'{name}' is not a domain parameter " - # "in this context") pass else: self.update_program( self.program.copy( args=self.program.args + [ - lp.ValueArg(name, dtype=self.program.index_dtype) + lp.ValueArg(name, dtype=self.program.index_dtype) ])) - + return prim.Variable(name) def _get_dependencies(self, expr): - pass + # FIXME: this needs to be implemented properly. + if not self._last_insn_id: + return frozenset([]) + return frozenset([self._last_insn_id]) + + def _assume_equal(self, lhs, rhs): + """Adds the assumption "lhs = rhs".""" + param_names = set() + for sdep in map(sym.get_dependencies, (lhs, rhs)): + param_names |= sdep + + # Build domain. + dom = isl.BasicSet.universe( + isl.Space.create_from_names( + isl.DEFAULT_CONTEXT, + set=(), + params=param_names)) + + aff_lhs = sym.aff_from_expr(dom.space, lhs) + aff_rhs = sym.aff_from_expr(dom.space, rhs) + + dom &= aff_lhs.eq_set(aff_rhs) + + # TODO: Throw an error if the assumption leads to a contradiction. + dom, = dom.get_basic_sets() + + self.update_program(lp.assume(self.program, dom)) + + def _make_array_assignment(self, name, dim_names, expr, within_inames): + from loopy.kernel.instruction import make_assignment + insn_id = self.insn_id_gen(f"_store{name}") + out_insn = make_assignment( + ( + prim.Variable(name)[ + tuple(map(prim.Variable, dim_names))],), + expr, + id=insn_id, + within_inames=frozenset(dim_names), + depends_on=self._get_dependencies(expr)) + + self.update_program(self.program.copy( + instructions=self.program.instructions + [out_insn])) + self._last_insn_id = insn_id + + # }}} + + # {{{ user interface def argument(self, name, shape, dtype, order="C"): + """Append an argument to the program. + + :returns: a :class:`ArrayVariable` + """ if name in self.program.all_variable_names(): raise ValueError(f"name '{name}' already in use in code") @@ -143,8 +208,7 @@ class CodeBuilder: dtype = arg.dtype # Insert parameters from shape description. - from loopy.symbolic import get_dependencies - for sdep in map(get_dependencies, arg.shape): + for sdep in map(sym.get_dependencies, arg.shape): for dep in sdep: self._get_or_create_parameter(dep) @@ -154,50 +218,152 @@ class CodeBuilder: return ArrayVariable(self.code, shape, dtype, name) - def matmul(self, lhs, rhs): - pass + def bind_argument(self, arg, val): + self.code.bound_arguments[arg.name] = val + + def roll(self, a, shift, name=None): + """Roll elements along the given axis. + + :returns: a :class:`Array` + """ + assert a.ndim == 1 + idx = (prim.Variable("_0") - shift) % a.shape[0] + # It's not immediately clear what loopy-generated code guarantees about + # the sign of the result of the % operator, so if the result is negative + # we shift it to be positive. + idx = idx + prim.If(prim.Comparison(idx, "<", 0), a.shape[0], 0) + expr = a.to_loopy_expression((idx,)) + return ArrayExpression(self.code, a.shape, a.dtype, expr) + + def stack(self, arrays, axis=0, name=None): + """Join a sequence of arrays along a new axis. + + :arg axis: the index of the axis + + :returns: a :class:`Array` + """ + for array in arrays[1:]: + if array.ndim != arrays[0].ndim: + raise ValueError("arrays must have same dimension") + for array_dim, array0_dim in zip(array.shape, arrays[0].shape): + self._assume_equal(array_dim, array0_dim) + + if name is None: + name = "_stack" + + name = self.var_name_gen(name) + + out_inames = [] + for i in range(arrays[0].ndim): + if i >= axis: + i += 1 + out_inames.append(f"{name}_dim{i}") + + domain = domain_for_shape(out_inames, arrays[0].shape) + + out_shape = list(arrays[0].shape) + out_shape.insert(axis, len(arrays)) + out_shape = tuple(out_shape) + + out_dtype = self._unify_types(*(a.dtype for a in arrays)) + + out = lp.TemporaryVariable( + name, dtype=out_dtype, + shape=out_shape, + address_space=lp.AddressSpace.GLOBAL) + + # Create an output instruction for each input array. + from loopy.kernel.instruction import make_assignment + out_insns = [] + for i in range(len(arrays)): + indices = list(map(prim.Variable, out_inames)) + expr = arrays[i].to_loopy_expression(indices) + indices.insert(axis, i) + indices = tuple(indices) + insn_id = self.insn_id_gen(f"{name}_copy{i}") + out_insn = make_assignment( + (prim.Variable(name)[indices],), + expr, + id=insn_id, + within_inames=frozenset(out_inames), + depends_on=self._get_dependencies(expr)) + self._last_insn_id = insn_id + out_insns.append(out_insn) + + new_tv = self.program.temporary_variables.copy() + new_tv[name] = out + + self.update_program(self.program.copy( + temporary_variables=new_tv, + domains=self.program.domains + [domain], + instructions=self.program.instructions + out_insns)) + + return ArrayVariable(self.code, out_shape, out_dtype, name) + + def matmul(self, a, b, name=None): + # Generate a temporay for the matrix-matrix multiplication. + self._assume_equal(a.shape[1], b.shape[0]) + if name is None: + name = "_matmul" + name = self.var_name_gen(name) + + out_shape = (a.shape[0], b.shape[1]) + out_inames = (f"{name}_dim0", f"{name}_dim1") + out_red_iname = self.var_name_gen(f"{name}_reduce") + out_dtype = self._unify_types(a.dtype, b.dtype) + + out = lp.TemporaryVariable( + name, dtype=out_dtype, + shape=(a.shape[0], b.shape[1]), + address_space=lp.AddressSpace.GLOBAL) + + import loopy.library.reduction as red + + a_inames = tuple(map(prim.Variable, (out_inames[0], out_red_iname))) + b_inames = tuple(map(prim.Variable, (out_red_iname, out_inames[1]))) + + expr = a.to_loopy_expression(a_inames) * b.to_loopy_expression(b_inames) + + out_expr = sym.Reduction( + operation=red.parse_reduction_op("sum"), + inames=(out_red_iname,), + expr=expr, + allow_simultaneous=False) + + domain = domain_for_shape( + out_inames + (out_red_iname,), + out_shape + (a.shape[1],)) + + new_tv = self.program.temporary_variables.copy() + new_tv[name] = out + + self.update_program(self.program.copy( + temporary_variables=new_tv, + domains=self.program.domains + [domain])) + + self._make_array_assignment( + name, dim_names=out_inames, expr=out_expr, + within_inames=frozenset(out_inames)) + + return ArrayVariable(self.code, out_shape, out_dtype, name) def output(self, expr): - knl = self.program name = self.var_name_gen("_out") out = lp.GlobalArg(name, shape=expr.shape, dtype=expr.dtype, order="C") - out_inames = [self.var_name_gen(f"{name}_dim{d}") - for d in range(expr.ndim)] + out_inames = [ + self.var_name_gen(f"{name}_dim{d}") for d in range(expr.ndim)] out_expr = expr.to_loopy_expression(map(prim.Variable, out_inames)) - # Build output domain. - param_names = [] - from loopy.symbolic import get_dependencies - for sdep in map(get_dependencies, expr.shape): - for dep in sdep: - param_names.append(self._get_or_create_parameter(dep).name) + domain = expr.get_domain(out_inames) - dom = isl.BasicSet.universe( - isl.Space.create_from_names( - isl.DEFAULT_CONTEXT, - set=out_inames, - params=param_names)) - - from loopy.symbolic import aff_from_expr - affs = isl.affs_from_space(dom.space) - for iname, dim in zip(out_inames, expr.shape): - dom &= affs[0].le_set(affs[iname]) - dom &= affs[iname].lt_set(aff_from_expr(dom.space, dim)) - dom, = dom.get_basic_sets() + self.update_program(self.program.copy( + args=self.program.args + [out], + domains=self.program.domains + [domain])) - # Build output instruction. - from loopy.kernel.instruction import make_assignment - out_insn = make_assignment( - ( - prim.Variable(name)[ - tuple(map(prim.Variable, out_inames))],), - out_expr, - id=self.insn_id_gen(), + self._make_array_assignment( + name, dim_names=out_inames, expr=out_expr, within_inames=frozenset(out_inames)) - # TODO: dependencies - - self.update_program(knl.copy( - domains=knl.domains + [dom], - instructions=knl.instructions + [out_insn], - args=knl.args + [out])) + # }}} + +Context = CodeBuilder diff --git a/arrayzy/utils.py b/arrayzy/utils.py new file mode 100644 index 0000000..54e3436 --- /dev/null +++ b/arrayzy/utils.py @@ -0,0 +1,66 @@ +__copyright__ = "Copyright (C) 2020 Matt Wala" + +__license__ = """ +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +""" + + +import islpy as isl + + +def domain_for_shape(dim_names, shape): + """Return a :class:`isl.BasicSet` that expresses an appropriate index domain + for an array of (potentially symbolic) shape *shape*. + + :arg dim_names: A tuple of strings, the names of the axes. These become set + dimensions in the returned domain. + + :arg shape: A tuple of constant or quasi-affine :mod:`pymbolic` + expressions. The variables in these expressions become parameter + dimensions in the returned set. Must have the same length as + *dim_names*. + + :returns: a :class:`isl.BasicSet` + + """ + + # Collect parameters. + param_names = set() + from loopy.symbolic import get_dependencies + for sdep in map(get_dependencies, shape): + param_names |= sdep + + param_names = sorted(param_names) + + # Build domain. + dom = isl.BasicSet.universe( + isl.Space.create_from_names( + isl.DEFAULT_CONTEXT, + set=dim_names, + params=param_names)) + + # Add constraints. + from loopy.symbolic import aff_from_expr + affs = isl.affs_from_space(dom.space) + for iname, dim in zip(dim_names, shape): + dom &= affs[0].le_set(affs[iname]) + dom &= affs[iname].lt_set(aff_from_expr(dom.space, dim)) + dom, = dom.get_basic_sets() + + return dom diff --git a/experiments/advection.py b/experiments/advection.py index 52d6eff..56fe1c6 100644 --- a/experiments/advection.py +++ b/experiments/advection.py @@ -146,7 +146,7 @@ class DGDiscr1D(object): Signature: ->(n, 2) """ - return self.interp.T + return self.interp.T.copy() @property @memoized @@ -279,6 +279,18 @@ class AbstractDGOps1D(object): def __init__(self, discr): self.discr = discr + @property + def array_ops(self): + raise NotImplementedError + + @property + def normals(self): + """Return the vector of normals at the faces. + + Signature: ->(m, 2) + """ + raise NotImplementedError + def interp(self, vec): """Apply elementwise volume-to-face interpolation. @@ -331,6 +343,14 @@ def elementwise(mat, vec): class DGOps1DRef(AbstractDGOps1D): """A reference NumPy implementation of the AbstractDGOps1D interface.""" + @AbstractDGOps1D.array_ops.getter + def array_ops(self): + return np + + @AbstractDGOps1D.normals.getter + def normals(self): + return self.discr.normals + def interp(self, vec): return elementwise(self.discr.interp, vec) @@ -350,26 +370,75 @@ class DGOps1DRef(AbstractDGOps1D): return result +def matrix_getter(name, shape): + + def getter(self): + mat = self.cb.argument(name, shape, np.float64) + self.cb.bind_argument(mat, getattr(self.discr, name)) + return mat + + return property(memoized(getter)) + + class DGOps1D(AbstractDGOps1D): - pass + + @AbstractDGOps1D.array_ops.setter + def array_ops(self): + return self.cb + + def __init__(self, discr, cb): + self.discr = discr + self.cb = cb + + _normals = matrix_getter("normals", "(nelements, 2)") + _interp_mat = matrix_getter("interp", "(2, nnodes)") + _inv_mass_mat = matrix_getter("inv_mass", "(nnodes, nnodes)") + _stiffness_mat = matrix_getter("stiffness", "(nnodes, nnodes)") + _face_mass_mat = matrix_getter("face_mass", "(nnodes, 2)") + + @AbstractDGOps1D.normals.getter + def normals(self): + return self._normals + + def interp(self, vec): + return self.cb.matmul(self._interp_mat, vec.T, name="_interp").T + + def inv_mass(self, vec): + return self.cb.matmul(self._inv_mass_mat, vec.T, name="_inv_mass").T + + def stiffness(self, vec): + return self.cb.matmul(self._stiffness_mat, vec.T, name="_stiffness").T + + def face_mass(self, vec): + return self.cb.matmul(self._face_mass_mat, vec.T, name="_face_mass").T + + def face_swap(self, vec): + return self.cb.stack( + ( + self.cb.roll(vec[:,1], +1), + self.cb.roll(vec[:,0], -1)), + axis=1, + name="face_swap") class AdvectionOperator(object): """A class representing a DG advection operator.""" - def __init__(self, discr, c, flux_type): + def __init__(self, discr, c, flux_type, dg_ops): """ Inputs: discr: an instance of DGDiscr1D c: advection speed parameter flux_type: "upwind" or "central" + dg_ops: An instance of AbstractDGOps1D """ self.discr = discr self.c = c assert flux_type in ("upwind", "central") self.flux_type = flux_type + self.dg = dg_ops - def weak_flux(self, dg, vec): + def weak_flux(self, vec): """Apply the flux, weak form. Inputs: @@ -379,21 +448,20 @@ class AdvectionOperator(object): Signature: (m, 2) -> (m, 2) """ if self.flux_type == "central": - flux = (vec + dg.face_swap(vec)) / 2 + flux = (vec + self.dg.face_swap(vec)) / 2 elif self.flux_type == "upwind": - swp = dg.face_swap(vec) + swp = self.dg.face_swap(vec) if self.c >= 0: - flux = np.stack((vec[:,0], swp[:,1]), axis=1) + flux = self.dg.array_ops.stack((vec[:,0], swp[:,1]), axis=1) else: - flux = np.stack((swp[:,0], vec[:,1]), axis=1) + flux = self.dg.array_ops.stack((swp[:,0], vec[:,1]), axis=1) - flux *= self.c - flux *= self.discr.normals + flux = flux * self.c * self.dg.normals return flux - def strong_flux(self, dg, vec): + def strong_flux(self, vec): """Apply the flux, strong form. Inputs: @@ -402,17 +470,18 @@ class AdvectionOperator(object): Signature: (m, 2) -> (m, 2) """ - return self.c * self.discr.normals * vec - self.weak_flux(dg, vec) + return self.c * self.dg.normals * vec - self.weak_flux(vec) def apply(self, vec): """Main operator implementation. Signature: (m, n) -> (m, n) """ - with self.discr.dg_ops() as dg: - pt1 = dg.face_mass(self.strong_flux(dg, dg.interp(vec))) - pt2 = self.c * dg.stiffness(vec) - return -dg.inv_mass(pt1 - pt2) + dg = self.dg + pt1 = dg.face_mass(self.strong_flux(dg.interp(vec))) + print("pt1 shape", pt1.shape) + pt2 = self.c * dg.stiffness(vec) + return -dg.inv_mass(pt1 - pt2) def __call__(self, vec): """Apply the DG advection operator to the vector of degrees of freedom. @@ -457,14 +526,15 @@ def test_rk4(): @pytest.mark.parametrize("order", (3, 4, 5)) @pytest.mark.parametrize("flux_type", ("central", "upwind")) -def test_advection_convergence(order, flux_type): +def test_ref_advection_convergence(order, flux_type): errors = [] hs = [] for nelements in (8, 12, 16, 20): discr = DGDiscr1D(0, 2*np.pi, nelements=nelements, nnodes=order) u_initial = np.sin(discr.nodes()) - op = AdvectionOperator(discr, c=1, flux_type=flux_type) + op = AdvectionOperator( + discr, c=1, flux_type=flux_type, dg_ops=DGOps1DRef(discr)) u = rk4(lambda t, y: op(y), u_initial, t_initial=0, t_final=np.pi, dt=0.01) u_ref = -u_initial hs.append(discr.h) @@ -474,6 +544,32 @@ def test_advection_convergence(order, flux_type): assert eoc >= order - 0.1, eoc +def main(): + import arrayzy as az + import pyopencl as cl + cl_ctx = cl.create_some_context() + queue = cl.CommandQueue(cl_ctx) + + nelements = 20 + nnodes = 3 + + discr = DGDiscr1D(0, 2*np.pi, nelements=nelements, nnodes=nnodes) + + with az.CodeBuilder(queue) as cb: + u = cb.argument("u", shape="(nelements, nnodes)", dtype=np.float64) + op = AdvectionOperator( + discr, c=1, flux_type="central", dg_ops=DGOps1D(discr, cb)) + cb.output(op.apply(u)) + + code = cb.build() + print(code.program) + #import loopy as lp + #code.update_program(lp.fix_parameters(code.program, nnodes=nnodes, nelements=nelements)) + u = np.sin(discr.nodes()) + print(code.eval_one(u=u.reshape(nelements, nnodes))) + + if __name__ == "__main__": - from pytest import main - main([__file__]) + #from pytest import main + #main([__file__]) + main() diff --git a/test/test_array.py b/test/test_array.py deleted file mode 100644 index f7c263a..0000000 --- a/test/test_array.py +++ /dev/null @@ -1,94 +0,0 @@ -#!/usr/bin/env python - -__copyright__ = "Copyright (C) 2020 Matt Wala" - -__license__ = """ -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -""" - -import arrayzy as az - -import numpy as np -import sys - -import pytest # noqa - -import pyopencl as cl -import pyopencl.array as cl_array # noqa -import pyopencl.cltypes as cltypes # noqa -import pyopencl.tools as cl_tools # noqa -from pyopencl.tools import ( # noqa - pytest_generate_tests_for_pyopencl as pytest_generate_tests) - - -def test_symbolic_array(ctx_getter): - cl_ctx = ctx_getter() - queue = cl.CommandQueue(cl_ctx) - - with az.CodeBuilder(queue) as cb: - x = cb.argument("x", shape="n", dtype=np.float64) - cb.output(x) - - x_in = cl.array.to_device(queue, np.array([1., 2., 3., 4., 5.])) - code = cb.build() - assert (code.eval_one(x=x_in).get() == x_in.get()).all() - - -@pytest.mark.parametrize("dim", (1, 2, 3)) -def test_transpose(ctx_getter, dim): - cl_ctx = ctx_getter() - queue = cl.CommandQueue(cl_ctx) - - with az.CodeBuilder(queue) as cb: - shape = ("l", "m", "n")[-dim:] - x = cb.argument("x", shape=shape, dtype=np.float64) - cb.output(x.T) - - - x_in = cl.array.to_device( - queue, - np.arange(24, dtype=float).reshape((2, 3, -1)[-dim:])) - code = cb.build() - assert (code.eval_one(x=x_in).get() == x_in.get().T).all() - - -def test_scalar_multiply(ctx_getter): - cl_ctx = ctx_getter() - queue = cl.CommandQueue(cl_ctx) - - with az.CodeBuilder(queue) as cb: - x = cb.argument("x", shape="n", dtype=np.float64) - cb.output(2 * x) - - x_in = cl.array.to_device(queue, np.array([1., 2., 3., 4., 5.])) - code = cb.build() - print(code.program) - assert (code.eval_one(x=x_in).get() == (2*x_in).get()).all() - - -if __name__ == "__main__": - # make sure that import failures get reported, instead of skipping the - # tests. - if len(sys.argv) > 1: - exec(sys.argv[1]) - else: - from pytest import main - main([__file__]) - -# vim: filetype=pyopencl:fdm=marker diff --git a/test/test_linalg.py b/test/test_linalg.py new file mode 100644 index 0000000..7448476 --- /dev/null +++ b/test/test_linalg.py @@ -0,0 +1,218 @@ +#!/usr/bin/env python + +__copyright__ = "Copyright (C) 2020 Matt Wala" + +__license__ = """ +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +""" + +import arrayzy as az + +import operator +import numpy as np +import sys + +import pytest # noqa + +import pyopencl as cl +import pyopencl.array as cl_array # noqa +import pyopencl.cltypes as cltypes # noqa +import pyopencl.tools as cl_tools # noqa +from pyopencl.tools import ( # noqa + pytest_generate_tests_for_pyopencl as pytest_generate_tests) + + +def test_symbolic_array(ctx_factory): + cl_ctx = ctx_factory() + queue = cl.CommandQueue(cl_ctx) + + with az.CodeBuilder(queue) as cb: + x = cb.argument("x", shape="n", dtype=np.float64) + cb.output(x) + + x_in = cl.array.to_device(queue, np.array([1., 2., 3., 4., 5.])) + code = cb.build() + assert (code.eval_one(x=x_in).get() == x_in.get()).all() + + +@pytest.mark.parametrize("dim", (1, 2, 3)) +def test_transpose(ctx_factory, dim): + cl_ctx = ctx_factory() + queue = cl.CommandQueue(cl_ctx) + + with az.CodeBuilder(queue) as cb: + shape = ("l", "m", "n")[-dim:] + x = cb.argument("x", shape=shape, dtype=np.float64) + cb.output(x.T) + + x_in = cl.array.to_device(queue, + np.arange(24, dtype=float).reshape((2, 3, -1)[-dim:])) + + code = cb.build() + assert (code.eval_one(x=x_in).get() == x_in.get().T).all() + + +@pytest.mark.parametrize("which", ("add", "sub", "mul", "truediv")) +@pytest.mark.parametrize("reverse", (False, True)) +def test_scalar_array_binary_arith(ctx_factory, which, reverse): + cl_ctx = ctx_factory() + queue = cl.CommandQueue(cl_ctx) + + op = getattr(operator, which) + if reverse: + op_orig = op + op = lambda x, y: op_orig(y, x) + + with az.CodeBuilder(queue) as cb: + x = cb.argument("x", shape="n", dtype=np.float64) + cb.output(op(2, x)) + + x_in = np.array([1., 2., 3., 4., 5.]) + code = cb.build() + assert (code.eval_one(x=x_in) == op(2, x_in)).all() + + +@pytest.mark.parametrize("which", ("add", "sub", "mul", "truediv")) +def test_array_array_binary_arith(ctx_factory, which): + cl_ctx = ctx_factory() + queue = cl.CommandQueue(cl_ctx) + + op = getattr(operator, which) + + with az.CodeBuilder(queue) as cb: + x = cb.argument("x", shape="n", dtype=np.float64) + y = cb.argument("y", shape="n", dtype=np.float64) + cb.output(op(x, y)) + + x_in = np.array([1., 2., 3., 4., 5.]) + y_in = np.array([6., 7., 8., 9., 19.]) + code = cb.build() + assert (code.eval_one(x=x_in, y=y_in) == op(x_in, y_in)).all() + + +@pytest.mark.parametrize("which", ("neg",)) +def test_unary_arith(ctx_factory, which): + cl_ctx = ctx_factory() + queue = cl.CommandQueue(cl_ctx) + + op = getattr(operator, which) + + with az.CodeBuilder(queue) as cb: + x = cb.argument("x", shape="n", dtype=np.float64) + cb.output(op(x)) + + x_in = np.array([1., 2., 3., 4., 5.]) + code = cb.build() + assert (code.eval_one(x=x_in) == op(x_in)).all() + + +def test_matmul(ctx_factory): + cl_ctx = ctx_factory() + queue = cl.CommandQueue(cl_ctx) + + with az.CodeBuilder(queue) as cb: + x = cb.argument("x", shape=("m, n"), dtype=np.float64) + y = cb.argument("y", shape=("n, k"), dtype=np.float64) + z = cb.matmul(x, y) + cb.output(z) + + x_in = cl.array.to_device(queue, + np.array([[1, 2, 3], [4, 5, 6]], dtype=float)) + y_in = cl.array.to_device(queue, + np.array([[2, 5], [5, 1], [1, 2]], dtype=float)) + + code = cb.build() + print(code.program) + assert ( + code.eval_one(x=x_in, y=y_in).get() + == x_in.get() @ y_in.get()).all() + + +@pytest.mark.parametrize("shift", (-1, 1, -20, 20)) +def test_roll(ctx_factory, shift): + cl_ctx = ctx_factory() + queue = cl.CommandQueue(cl_ctx) + + with az.CodeBuilder(queue) as cb: + x = cb.argument("x", shape=("n",), dtype=np.float64) + cb.output(cb.roll(x, shift)) + + x_in = cl.array.to_device(queue, np.array([0, 1, 2], dtype=float)) + + code = cb.build() + assert (code.eval_one(x=x_in).get() == np.roll(x_in.get(), shift)).all() + + +@pytest.mark.parametrize("input_dims", (1, 2, 3)) +def test_stack(ctx_factory, input_dims): + cl_ctx = ctx_factory() + queue = cl.CommandQueue(cl_ctx) + + for axis in range(input_dims): + shape = (2,) * input_dims + + with az.CodeBuilder(queue) as cb: + x = cb.argument("x", shape=shape, dtype=np.float64) + y = cb.argument("y", shape=shape, dtype=np.float64) + cb.output(cb.stack((x, y), axis=axis)) + + x_in = np.arange(2 ** input_dims, dtype=float).reshape(shape) + y_in = np.arange(1, 1 + 2 ** input_dims, dtype=float).reshape(shape) + + code = cb.build() + + ref = np.stack((x_in, y_in), axis) + + x_in = cl.array.to_device(queue, x_in) + y_in = cl.array.to_device(queue, y_in) + + assert (code.eval_one(x=x_in, y=y_in).get() == ref).all() + + +@pytest.mark.parametrize("input_dims", (2, 3)) +def test_slice_along_dim(ctx_factory, input_dims): + cl_ctx = ctx_factory() + queue = cl.CommandQueue(cl_ctx) + + for axis in range(input_dims): + shape = (2,) * input_dims + + slice_spec = [slice(None, None, None)] * input_dims + slice_spec[axis] = 1 + slice_spec = tuple(slice_spec) + + with az.CodeBuilder(queue) as cb: + x = cb.argument("x", shape=shape, dtype=np.float64) + cb.output(x[slice_spec]) + + x_in = np.arange(2 ** input_dims, dtype=float).reshape(shape) + code = cb.build() + assert (code.eval_one(x=x_in) == x_in[slice_spec]).all() + + +if __name__ == "__main__": + # make sure that import failures get reported, instead of skipping the + # tests. + if len(sys.argv) > 1: + exec(sys.argv[1]) + else: + from pytest import main + main([__file__]) + +# vim: filetype=pyopencl:fdm=marker -- GitLab From 1a4c0428bcea0f64d7f84b33acb8a7a5d3a1908a Mon Sep 17 00:00:00 2001 From: Matt Wala Date: Mon, 27 Apr 2020 23:44:09 -0500 Subject: [PATCH 12/46] Make things work, sort of --- arrayzy/__init__.py | 2 +- arrayzy/array.py | 411 ++++++++++++++++++++++++++++++++++++++++---- arrayzy/code.py | 369 --------------------------------------- arrayzy/program.py | 78 +++++++++ doc/array.rst | 4 + doc/index.rst | 3 +- doc/program.rst | 4 + test/test_linalg.py | 140 ++++++++------- 8 files changed, 533 insertions(+), 478 deletions(-) delete mode 100644 arrayzy/code.py create mode 100644 arrayzy/program.py create mode 100644 doc/array.rst create mode 100644 doc/program.rst diff --git a/arrayzy/__init__.py b/arrayzy/__init__.py index af107be..989c2fe 100644 --- a/arrayzy/__init__.py +++ b/arrayzy/__init__.py @@ -1 +1 @@ -from arrayzy.code import Code, CodeBuilder +from arrayzy.array import Context diff --git a/arrayzy/array.py b/arrayzy/array.py index 3158061..bd44e26 100644 --- a/arrayzy/array.py +++ b/arrayzy/array.py @@ -21,48 +21,65 @@ THE SOFTWARE. """ import operator +from functools import partialmethod -import islpy as isl import loopy as lp +import islpy as isl import numpy as np - +import loopy.symbolic as sym import pymbolic import pymbolic.primitives as prim from pytools import memoize_method -from functools import partialmethod +from arrayzy.program import BoundProgram, PyOpenCLTarget from arrayzy.utils import domain_for_shape +__doc__ = """ +.. autoclass:: Array +.. autoclass:: ArrayExpression +.. autoclass:: ArrayVariable + +.. autoclass:: Context +""" + + class Array: - """A representation of an array value in a computation. + """A representation of an array computation in a :mod:`loopy` kernel. .. attribute:: ctx - The :class:`Context` with which this array is associated, to - define the meanings of names used in expressions in this object. + The :class:`Context` with which this array is associated, to define the + meanings of names used in expressions in this object. - .. attribute:: dtype + .. attribute:: shape - An instance of :class:`loopy.types.LoopyType` or *None* to indicate - that the type of the array is not yet known. + A tuple of :mod:`pymbolic` expressions or constants representing the + shape of the array. Only quasi-affine expressions are supported. - .. attribute:: shape + .. attribute:: dtype + + An instance of :class:`loopy.types.LoopyType` or *None* to indicate that + the type of the array is not yet known. .. attribute:: name - A name for this computation, or *None*. + A name for this computation, or *None*. This is not necessarily a + variable name, as there may be no defined storage for the array. .. attribute:: ndim The number of dimensions in :attr:`shape`. - .. automethod:: T + .. attribute:: T - .. automethod:: to_loopy_expression + .. attribute:: dims + .. attribute:: dim_names .. automethod:: get_domain + .. automethod:: to_loopy_expression + .. automethod:: __getitem__ .. automethod:: __mul__ .. automethod:: __rmul__ .. automethod:: __add__ @@ -73,23 +90,18 @@ class Array: .. automethod:: __rtruediv__ .. automethod:: __neg__ - .. automethod:: __getitem__ """ - def __init__(self, ctx, shape, dtype): + def __init__(self, ctx, shape, dtype, name): self.ctx = ctx self._dtype = dtype self.shape = shape + self.name = name @property def ndim(self): return len(self.shape) - def _infer_type(self, expr): - from loopy.type_inference import TypeInferenceMapper - mapper = TypeInferenceMapper(self.ctx.program) - return mapper(expr) - def _unary_op(self, op): new_expr = op(self.to_loopy_expression(self.dims)) return ArrayExpression( @@ -103,12 +115,12 @@ class Array: args = (self.to_loopy_expression(self.dims), other) if reverse: args = tuple(reversed(args)) - + new_expr = op(*args) return ArrayExpression( self.ctx, self.shape, - self._infer_type(new_expr), + self.ctx.unify_types(self.dtype, type(other)), new_expr) elif isinstance(other, Array): @@ -122,9 +134,9 @@ class Array: return ArrayExpression( self.ctx, self.shape, - self._infer_type(new_expr), + self.ctx.unify_types(self.dtype, other.dtype), new_expr) - + else: raise ValueError @@ -133,7 +145,7 @@ class Array: __add__ = partialmethod(_binary_op, operator.add) __radd__ = partialmethod(__add__, reverse=True) - + __sub__ = partialmethod(_binary_op, operator.sub) __rsub__ = partialmethod(__sub__, reverse=True) @@ -144,13 +156,13 @@ class Array: def __getitem__(self, slice_spec): from numbers import Integral - + if len(slice_spec) != self.ndim: raise ValueError("incorrect slice shape") dims = [] shape = [] - + for i, elem in enumerate(slice_spec): if elem == slice(None, None, None): d = len(shape) @@ -187,6 +199,8 @@ class Array: def get_domain(self, dim_names=None): """Return the domain that defines the space of indices for this array. + + :arg dim_names: if not *None*, the names of the dimensions of the array """ if dim_names is None: dim_names = self.dim_names @@ -194,6 +208,7 @@ class Array: @property def T(self): + """Return the transpose of the array of the array.""" new_shape = tuple(reversed(self.shape)) new_expr = self.to_loopy_expression(list(reversed(self.dims))) return ArrayExpression(self.ctx, new_shape, self.dtype, new_expr) @@ -206,12 +221,14 @@ class ArrayExpression(Array): """An array-valued expression. .. attribute:: expression + + A :mod:`pymbolic` expression for the array. """ - def __init__(self, ctx, shape, dtype, expression): - super().__init__(ctx, shape, dtype) + def __init__(self, ctx, shape, dtype, expression, name=None): + super().__init__(ctx, shape, dtype, name) self.expression = expression - + def to_loopy_expression(self, dims): assignments = dict(zip(self.dim_names, tuple(dims))) return pymbolic.substitute(self.expression, assignments) @@ -219,16 +236,342 @@ class ArrayExpression(Array): class ArrayVariable(Array): """An array-valued variable. - - .. attribute:: name """ def __init__(self, ctx, shape, dtype, name): - super().__init__(ctx, shape, dtype) - self.name = name + super().__init__(ctx, shape, dtype, name) def to_loopy_expression(self, dims): return prim.Variable(self.name)[tuple(dims)] +class Context: + """An interface for building up array computations. + + .. automethod:: __enter__ + .. automethod:: __exit__ + + .. attribute:: program + + A :class:`lp.LoopKernel` that defines names used in expressions in + :class:`Array` instances attached to this code. Names defined once will + never change their meaning, for the lifetime of this object. + + .. automethod:: build + .. automethod:: argument + .. automethod:: bind + .. automethod:: roll + .. automethod:: stack + .. automethod:: matmul + .. automethod:: output + """ + + def __init__(self, arg): + import sys + + self.target = None + self.queue = None + + # avoid expensive/failing import + if "pyopencl" in sys.modules: + import pyopencl as cl + if isinstance(arg, cl.CommandQueue): + target = PyOpenCLTarget(arg) + self.queue = arg + + if target is None: + raise ValueError(f"invalid argument type: {type(arg).__name__}") + + import loopy as lp + program = lp.make_kernel("{:}", [], target=target.get_loopy_target(), + lang_version=lp.MOST_RECENT_LANGUAGE_VERSION) + + self.bound_arguments = {} + self.program = program + self.var_name_gen = self.program.get_var_name_generator() + self.insn_id_gen = self.program.get_instruction_id_generator() + self._last_insn_id = None + + def __enter__(self): + return self + + def __exit__(self, exc_type, exc_value, traceback): + pass + + def build(self): + """Return a generated :class:`BoundProgram`. + """ + return BoundProgram(self.program, self.queue, + self.bound_arguments.copy(), self.target) + + @property + def program(self): + return self._program + + @program.setter + def program(self, program): + self._program = program + + # {{{ internal api + + def unify_types(self, *args): + # Unifies the list of types. + # TODO: does not handle None + result = args[0] + for arg in args[1:]: + result = ( + np.empty(0, dtype=result) + + np.empty(0, dtype=arg + )).dtype + return result + + def _get_or_create_parameter(self, name): + # Create a parameter variable for *name* if not present. + # Return the parameter as a pymbolic variable. + + if name in self.program.all_variable_names(): + if name not in self.program.all_params(): + # FIXME: May have been added, but not added to a domain. + pass + else: + self.program = self.program.copy( + args=self.program.args + [ + lp.ValueArg(name, dtype=self.program.index_dtype) + ]) + + return prim.Variable(name) + + def _get_dependencies(self, expr): + # Get the dependency set of an expression. + # FIXME: this needs to be implemented properly. + if not self._last_insn_id: + return frozenset([]) + return frozenset([self._last_insn_id]) + + def _assume_equal(self, lhs, rhs): + # Add the assumption *lhs = rhs* to the kernel. + param_names = set() + for sdep in map(sym.get_dependencies, (lhs, rhs)): + param_names |= sdep + + # Build domain. + dom = isl.BasicSet.universe( + isl.Space.create_from_names( + isl.DEFAULT_CONTEXT, + set=(), + params=param_names)) + + aff_lhs = sym.aff_from_expr(dom.space, lhs) + aff_rhs = sym.aff_from_expr(dom.space, rhs) + + dom &= aff_lhs.eq_set(aff_rhs) + + # TODO: Throw an error if the assumption leads to a contradiction. + dom, = dom.get_basic_sets() + + self.program = lp.assume(self.program, dom) + + def _make_array_assignment(self, name, dim_names, expr, within_inames): + # Add an instruction that assigns to an array. *dim_names* should be a + # list of strings which are the inames for the dimensions. + from loopy.kernel.instruction import make_assignment + insn_id = self.insn_id_gen(f"_store{name}") + out_insn = make_assignment( + ( + prim.Variable(name)[ + tuple(map(prim.Variable, dim_names))],), + expr, + id=insn_id, + within_inames=frozenset(within_inames), + depends_on=self._get_dependencies(expr)) + + self.program = self.program.copy( + instructions=self.program.instructions + [out_insn]) + self._last_insn_id = insn_id + + # }}} + + # {{{ user interface + + def argument(self, name, shape, dtype, order="C"): + """Append an argument to the program. + + :returns: a :class:`ArrayVariable` + """ + if name in self.program.all_variable_names(): + raise ValueError(f"name '{name}' already in use in code") + + arg = lp.GlobalArg(name, shape=shape, dtype=dtype, order=order) + + shape = arg.shape + dtype = arg.dtype + + # Insert parameters from shape description. + for sdep in map(sym.get_dependencies, arg.shape): + for dep in sdep: + self._get_or_create_parameter(dep) + + # Add argument to program. + self.program = self.program.copy(args=self.program.args + [arg]) + + return ArrayVariable(self, shape, dtype, name) + + def bind(self, arg, val): + """Bind an argument to a value. + + :arg arg: a :class:`ArrayVariable` representing an argument + """ + self.bound_arguments[arg.name] = val + + def roll(self, a, shift, name=None): + """Roll elements along the given axis. + + :returns: a :class:`Array` + """ + assert a.ndim == 1 + idx = (prim.Variable("_0") - shift) % a.shape[0] + # It's not immediately clear what loopy-generated code guarantees about + # the sign of the result of the % operator, so if the result is negative + # we shift it to be positive. + idx = idx + prim.If(prim.Comparison(idx, "<", 0), a.shape[0], 0) + expr = a.to_loopy_expression((idx,)) + return ArrayExpression(self, a.shape, a.dtype, expr) + + def stack(self, arrays, axis=0, name=None): + """Join a sequence of arrays along a new axis. + + :arg axis: the index of the axis + + :returns: a :class:`Array` + """ + for array in arrays[1:]: + if array.ndim != arrays[0].ndim: + raise ValueError("arrays must have same dimension") + for array_dim, array0_dim in zip(array.shape, arrays[0].shape): + self._assume_equal(array_dim, array0_dim) + + if name is None: + name = "_stack" + + name = self.var_name_gen(name) + + out_inames = [] + for i in range(arrays[0].ndim): + if i >= axis: + i += 1 + out_inames.append(f"{name}_dim{i}") + + domain = domain_for_shape(out_inames, arrays[0].shape) + + out_shape = list(arrays[0].shape) + out_shape.insert(axis, len(arrays)) + out_shape = tuple(out_shape) + + out_dtype = self.unify_types(*(a.dtype for a in arrays)) + + out = lp.TemporaryVariable( + name, dtype=out_dtype, + shape=out_shape, + address_space=lp.AddressSpace.GLOBAL) + + # Create an output instruction for each input array. + from loopy.kernel.instruction import make_assignment + out_insns = [] + for i in range(len(arrays)): + indices = list(map(prim.Variable, out_inames)) + expr = arrays[i].to_loopy_expression(indices) + indices.insert(axis, i) + indices = tuple(indices) + insn_id = self.insn_id_gen(f"{name}_copy{i}") + out_insn = make_assignment( + (prim.Variable(name)[indices],), + expr, + id=insn_id, + within_inames=frozenset(out_inames), + depends_on=self._get_dependencies(expr)) + self._last_insn_id = insn_id + out_insns.append(out_insn) + + new_tv = self.program.temporary_variables.copy() + new_tv[name] = out + + self.program = self.program.copy( + temporary_variables=new_tv, + domains=self.program.domains + [domain], + instructions=self.program.instructions + out_insns) + + return ArrayVariable(self, out_shape, out_dtype, name) + + def matmul(self, a, b, name=None): + """Multiply matrix *a* by *b*. + """ + # Generate a temporay for the matrix-matrix multiplication. + self._assume_equal(a.shape[1], b.shape[0]) + if name is None: + name = "_matmul" + name = self.var_name_gen(name) + + out_shape = (a.shape[0], b.shape[1]) + out_inames = (f"{name}_dim0", f"{name}_dim1") + out_red_iname = self.var_name_gen(f"{name}_reduce") + out_dtype = self.unify_types(a.dtype, b.dtype) + + out = lp.TemporaryVariable( + name, dtype=out_dtype, + shape=(a.shape[0], b.shape[1]), + address_space=lp.AddressSpace.GLOBAL) + + import loopy.library.reduction as red + + a_inames = tuple(map(prim.Variable, (out_inames[0], out_red_iname))) + b_inames = tuple(map(prim.Variable, (out_red_iname, out_inames[1]))) + + expr = a.to_loopy_expression(a_inames) * b.to_loopy_expression(b_inames) + + out_expr = sym.Reduction( + operation=red.parse_reduction_op("sum"), + inames=(out_red_iname,), + expr=expr, + allow_simultaneous=False) + + domain = domain_for_shape( + out_inames + (out_red_iname,), + out_shape + (a.shape[1],)) + + new_tv = self.program.temporary_variables.copy() + new_tv[name] = out + + self.program = self.program.copy( + temporary_variables=new_tv, + domains=self.program.domains + [domain]) + + self._make_array_assignment( + name, dim_names=out_inames, expr=out_expr, + within_inames=frozenset(out_inames)) + + return ArrayVariable(self, out_shape, out_dtype, name) + + def output(self, expr): + """Copy *expr* to an output variable. + + :returns: a :class:`Array` + """ + name = self.var_name_gen("_out") + out = lp.GlobalArg(name, shape=expr.shape, dtype=expr.dtype, order="C") + out_inames = [ + self.var_name_gen(f"{name}_dim{d}") for d in range(expr.ndim)] + out_expr = expr.to_loopy_expression(map(prim.Variable, out_inames)) + + domain = expr.get_domain(out_inames) + + self.program = self.program.copy( + args=self.program.args + [out], + domains=self.program.domains + [domain]) + + self._make_array_assignment( + name, dim_names=out_inames, expr=out_expr, + within_inames=frozenset(out_inames)) + + # }}} + # vim: foldmethod=marker diff --git a/arrayzy/code.py b/arrayzy/code.py deleted file mode 100644 index b7cb9d8..0000000 --- a/arrayzy/code.py +++ /dev/null @@ -1,369 +0,0 @@ -import loopy as lp -import loopy.symbolic as sym - -import islpy as isl -import pymbolic.primitives as prim - -from arrayzy.array import ArrayVariable, ArrayExpression -from arrayzy.utils import domain_for_shape - - -class Target: - pass - - -class PyOpenCLTarget: - - def __init__(self, queue): - self.queue = queue - - def get_loopy_target(self): - import loopy as lp - return lp.PyOpenCLTarget(self.queue.device) - - -class Code: - """A representation of a program carrying out an array-valued computation. - - .. attribute:: program - - A :mod:`loopy.LoopKernel` that defines names used in expressions in - :class:`Array` instances attached to this code. Names defined once will - never change their meaning, for the lifetime of this object. - - .. attribute:: bindings - - A mapping from argument names to bound values. - - .. attribute:: bound_arguments - - .. attribute:: target - - A :class:`Target` for code generation and execution. - - .. note:: - - *program* may not define any names starting with underscores. - """ - - def __init__(self, queue, program, bindings, target): - self.queue = queue - self._program = program - self.bindings = bindings - self.target = target - self.bound_arguments = {} - - # The 'program' attribute is only supposed to be modified via - # update_program. - @property - def program(self): - return self._program - - def update_program(self, program): - self._program = program - - def eval_one(self, *args, **kwargs): - _, (result,) = self(*args, **kwargs) - return result - - def __call__(self, *args, **kwargs): - kwargs.update(self.bound_arguments) - return self.program(self.queue, *args, **kwargs) - - -class CodeBuilder: - """An interface for building up array computations. - - .. automethod:: __enter__ - .. automethod:: __exit__ - """ - - def __init__(self, arg): - import sys - - target = None - - # avoid expensive/failing import - if "pyopencl" in sys.modules: - import pyopencl as cl - if isinstance(arg, cl.CommandQueue): - target = PyOpenCLTarget(arg) - - if target is None: - raise ValueError(f"invalid argument type: {type(arg).__name__}") - - import loopy as lp - program = lp.make_kernel( - "{:}", [], target=target.get_loopy_target(), - lang_version=lp.MOST_RECENT_LANGUAGE_VERSION) - self.code = Code(arg, program, {}, target) - - self.var_name_gen = self.program.get_var_name_generator() - self.insn_id_gen = self.program.get_instruction_id_generator() - self._last_insn_id = None - - def __enter__(self): - return self - - def __exit__(self, exc_type, exc_value, traceback): - pass - - def build(self): - """Return the generated :class:`Code`. - """ - return self.code - - @property - def program(self): - return self.code.program - - @property - def update_program(self): - return self.code.update_program - - # {{{ - - def _unify_types(self, *args): - if not all(args[0] == arg for arg in args[1:]): - raise NotImplementedError - return args[0] - - def _get_or_create_parameter(self, name): - # Create a parameter variable for *name* if not present. - # Return the parameter as a pymbolic variable. - - if name in self.program.all_variable_names(): - if name not in self.program.all_params(): - # FIXME: May have been added, but not added to a domain. - pass - else: - self.update_program( - self.program.copy( - args=self.program.args + [ - lp.ValueArg(name, dtype=self.program.index_dtype) - ])) - - return prim.Variable(name) - - def _get_dependencies(self, expr): - # FIXME: this needs to be implemented properly. - if not self._last_insn_id: - return frozenset([]) - return frozenset([self._last_insn_id]) - - def _assume_equal(self, lhs, rhs): - """Adds the assumption "lhs = rhs".""" - param_names = set() - for sdep in map(sym.get_dependencies, (lhs, rhs)): - param_names |= sdep - - # Build domain. - dom = isl.BasicSet.universe( - isl.Space.create_from_names( - isl.DEFAULT_CONTEXT, - set=(), - params=param_names)) - - aff_lhs = sym.aff_from_expr(dom.space, lhs) - aff_rhs = sym.aff_from_expr(dom.space, rhs) - - dom &= aff_lhs.eq_set(aff_rhs) - - # TODO: Throw an error if the assumption leads to a contradiction. - dom, = dom.get_basic_sets() - - self.update_program(lp.assume(self.program, dom)) - - def _make_array_assignment(self, name, dim_names, expr, within_inames): - from loopy.kernel.instruction import make_assignment - insn_id = self.insn_id_gen(f"_store{name}") - out_insn = make_assignment( - ( - prim.Variable(name)[ - tuple(map(prim.Variable, dim_names))],), - expr, - id=insn_id, - within_inames=frozenset(dim_names), - depends_on=self._get_dependencies(expr)) - - self.update_program(self.program.copy( - instructions=self.program.instructions + [out_insn])) - self._last_insn_id = insn_id - - # }}} - - # {{{ user interface - - def argument(self, name, shape, dtype, order="C"): - """Append an argument to the program. - - :returns: a :class:`ArrayVariable` - """ - if name in self.program.all_variable_names(): - raise ValueError(f"name '{name}' already in use in code") - - arg = lp.GlobalArg(name, shape=shape, dtype=dtype, order=order) - - shape = arg.shape - dtype = arg.dtype - - # Insert parameters from shape description. - for sdep in map(sym.get_dependencies, arg.shape): - for dep in sdep: - self._get_or_create_parameter(dep) - - # Add argument to program. - self.code.update_program( - self.program.copy(args=self.program.args + [arg])) - - return ArrayVariable(self.code, shape, dtype, name) - - def bind_argument(self, arg, val): - self.code.bound_arguments[arg.name] = val - - def roll(self, a, shift, name=None): - """Roll elements along the given axis. - - :returns: a :class:`Array` - """ - assert a.ndim == 1 - idx = (prim.Variable("_0") - shift) % a.shape[0] - # It's not immediately clear what loopy-generated code guarantees about - # the sign of the result of the % operator, so if the result is negative - # we shift it to be positive. - idx = idx + prim.If(prim.Comparison(idx, "<", 0), a.shape[0], 0) - expr = a.to_loopy_expression((idx,)) - return ArrayExpression(self.code, a.shape, a.dtype, expr) - - def stack(self, arrays, axis=0, name=None): - """Join a sequence of arrays along a new axis. - - :arg axis: the index of the axis - - :returns: a :class:`Array` - """ - for array in arrays[1:]: - if array.ndim != arrays[0].ndim: - raise ValueError("arrays must have same dimension") - for array_dim, array0_dim in zip(array.shape, arrays[0].shape): - self._assume_equal(array_dim, array0_dim) - - if name is None: - name = "_stack" - - name = self.var_name_gen(name) - - out_inames = [] - for i in range(arrays[0].ndim): - if i >= axis: - i += 1 - out_inames.append(f"{name}_dim{i}") - - domain = domain_for_shape(out_inames, arrays[0].shape) - - out_shape = list(arrays[0].shape) - out_shape.insert(axis, len(arrays)) - out_shape = tuple(out_shape) - - out_dtype = self._unify_types(*(a.dtype for a in arrays)) - - out = lp.TemporaryVariable( - name, dtype=out_dtype, - shape=out_shape, - address_space=lp.AddressSpace.GLOBAL) - - # Create an output instruction for each input array. - from loopy.kernel.instruction import make_assignment - out_insns = [] - for i in range(len(arrays)): - indices = list(map(prim.Variable, out_inames)) - expr = arrays[i].to_loopy_expression(indices) - indices.insert(axis, i) - indices = tuple(indices) - insn_id = self.insn_id_gen(f"{name}_copy{i}") - out_insn = make_assignment( - (prim.Variable(name)[indices],), - expr, - id=insn_id, - within_inames=frozenset(out_inames), - depends_on=self._get_dependencies(expr)) - self._last_insn_id = insn_id - out_insns.append(out_insn) - - new_tv = self.program.temporary_variables.copy() - new_tv[name] = out - - self.update_program(self.program.copy( - temporary_variables=new_tv, - domains=self.program.domains + [domain], - instructions=self.program.instructions + out_insns)) - - return ArrayVariable(self.code, out_shape, out_dtype, name) - - def matmul(self, a, b, name=None): - # Generate a temporay for the matrix-matrix multiplication. - self._assume_equal(a.shape[1], b.shape[0]) - if name is None: - name = "_matmul" - name = self.var_name_gen(name) - - out_shape = (a.shape[0], b.shape[1]) - out_inames = (f"{name}_dim0", f"{name}_dim1") - out_red_iname = self.var_name_gen(f"{name}_reduce") - out_dtype = self._unify_types(a.dtype, b.dtype) - - out = lp.TemporaryVariable( - name, dtype=out_dtype, - shape=(a.shape[0], b.shape[1]), - address_space=lp.AddressSpace.GLOBAL) - - import loopy.library.reduction as red - - a_inames = tuple(map(prim.Variable, (out_inames[0], out_red_iname))) - b_inames = tuple(map(prim.Variable, (out_red_iname, out_inames[1]))) - - expr = a.to_loopy_expression(a_inames) * b.to_loopy_expression(b_inames) - - out_expr = sym.Reduction( - operation=red.parse_reduction_op("sum"), - inames=(out_red_iname,), - expr=expr, - allow_simultaneous=False) - - domain = domain_for_shape( - out_inames + (out_red_iname,), - out_shape + (a.shape[1],)) - - new_tv = self.program.temporary_variables.copy() - new_tv[name] = out - - self.update_program(self.program.copy( - temporary_variables=new_tv, - domains=self.program.domains + [domain])) - - self._make_array_assignment( - name, dim_names=out_inames, expr=out_expr, - within_inames=frozenset(out_inames)) - - return ArrayVariable(self.code, out_shape, out_dtype, name) - - def output(self, expr): - name = self.var_name_gen("_out") - out = lp.GlobalArg(name, shape=expr.shape, dtype=expr.dtype, order="C") - out_inames = [ - self.var_name_gen(f"{name}_dim{d}") for d in range(expr.ndim)] - out_expr = expr.to_loopy_expression(map(prim.Variable, out_inames)) - - domain = expr.get_domain(out_inames) - - self.update_program(self.program.copy( - args=self.program.args + [out], - domains=self.program.domains + [domain])) - - self._make_array_assignment( - name, dim_names=out_inames, expr=out_expr, - within_inames=frozenset(out_inames)) - - # }}} - -Context = CodeBuilder diff --git a/arrayzy/program.py b/arrayzy/program.py new file mode 100644 index 0000000..b77b92d --- /dev/null +++ b/arrayzy/program.py @@ -0,0 +1,78 @@ +__copyright__ = """Copyright (C) 2020 Andreas Kloeckner""" + +__license__ = """ +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +""" + + +import loopy as lp +from pytools import RecordWithoutPickling + + +class Target: + pass + + +class PyOpenCLTarget: + + def __init__(self, queue): + self.queue = queue + + def get_loopy_target(self): + import loopy as lp + return lp.PyOpenCLTarget(self.queue.device) + + +class BoundProgram(RecordWithoutPickling): + """A wrapper around a :mod:`loopy` kernel with additional context. + + .. attribute:: program + + The underlying :class:`lp.LoopKernel`. + + .. attribute:: queue + + If not *None*, a :mod:`pyopencl` command queue for the program. + + .. attribute:: bound_arguments + + A map from names to pre-bound kernel arguments. + + .. attribute:: target + + A :class:`Target` for code generation and execution. + + """ + + def __init__(self, program, queue, bound_arguments, target): + super().__init__(program=program, queue=queue, + bound_arguments=bound_arguments, target=target) + + def __call__(self, *args, **kwargs): + """Convenience function for launching a :mod:`pyopencl` computation.""" + if not self.queue: + raise ValueError("queue must be specified") + + updated_kwargs = self.bound_arguments.copy() + updated_kwargs.update(kwargs) + return self.program(self.queue, *args, **updated_kwargs) + + +# vim: foldmethod=marker + diff --git a/doc/array.rst b/doc/array.rst new file mode 100644 index 0000000..23fc13d --- /dev/null +++ b/doc/array.rst @@ -0,0 +1,4 @@ +Building array computations +=========================== + +.. automodule:: arrayzy.array diff --git a/doc/index.rst b/doc/index.rst index 298bef1..306b5cf 100644 --- a/doc/index.rst +++ b/doc/index.rst @@ -10,7 +10,8 @@ Welcome to Arrayzy's documentation! :maxdepth: 2 :caption: Contents: - + array + program Indices and tables ================== diff --git a/doc/program.rst b/doc/program.rst new file mode 100644 index 0000000..7c96f9f --- /dev/null +++ b/doc/program.rst @@ -0,0 +1,4 @@ +Generated programs +================== + +.. autoclass:: arrayzy.program.BoundProgram diff --git a/test/test_linalg.py b/test/test_linalg.py index 7448476..4172d4d 100644 --- a/test/test_linalg.py +++ b/test/test_linalg.py @@ -38,17 +38,23 @@ from pyopencl.tools import ( # noqa pytest_generate_tests_for_pyopencl as pytest_generate_tests) +def eval_one(prog, **kwargs): + # Evaluate and return the unique output. + _, (out,) = prog(**kwargs) + return out + + def test_symbolic_array(ctx_factory): cl_ctx = ctx_factory() queue = cl.CommandQueue(cl_ctx) - with az.CodeBuilder(queue) as cb: - x = cb.argument("x", shape="n", dtype=np.float64) - cb.output(x) + with az.Context(queue) as ctx: + x = ctx.argument("x", shape="n", dtype=np.float64) + ctx.output(x) - x_in = cl.array.to_device(queue, np.array([1., 2., 3., 4., 5.])) - code = cb.build() - assert (code.eval_one(x=x_in).get() == x_in.get()).all() + x = np.array([1., 2., 3., 4., 5.]) + prog = ctx.build() + assert (eval_one(prog, x=x) == x).all() @pytest.mark.parametrize("dim", (1, 2, 3)) @@ -56,16 +62,15 @@ def test_transpose(ctx_factory, dim): cl_ctx = ctx_factory() queue = cl.CommandQueue(cl_ctx) - with az.CodeBuilder(queue) as cb: + with az.Context(queue) as ctx: shape = ("l", "m", "n")[-dim:] - x = cb.argument("x", shape=shape, dtype=np.float64) - cb.output(x.T) + x = ctx.argument("x", shape=shape, dtype=np.float64) + ctx.output(x.T) - x_in = cl.array.to_device(queue, - np.arange(24, dtype=float).reshape((2, 3, -1)[-dim:])) + x = np.arange(24, dtype=np.float64).reshape((2, 3, -1)[-dim:]) - code = cb.build() - assert (code.eval_one(x=x_in).get() == x_in.get().T).all() + prog = ctx.build() + assert (eval_one(prog, x=x) == x.T).all() @pytest.mark.parametrize("which", ("add", "sub", "mul", "truediv")) @@ -79,13 +84,13 @@ def test_scalar_array_binary_arith(ctx_factory, which, reverse): op_orig = op op = lambda x, y: op_orig(y, x) - with az.CodeBuilder(queue) as cb: - x = cb.argument("x", shape="n", dtype=np.float64) - cb.output(op(2, x)) + with az.Context(queue) as ctx: + x = ctx.argument("x", shape="n", dtype=np.float64) + ctx.output(op(2, x)) - x_in = np.array([1., 2., 3., 4., 5.]) - code = cb.build() - assert (code.eval_one(x=x_in) == op(2, x_in)).all() + x = np.array([1., 2., 3., 4., 5.]) + prog = ctx.build() + assert (eval_one(prog, x=x) == op(2, x)).all() @pytest.mark.parametrize("which", ("add", "sub", "mul", "truediv")) @@ -95,15 +100,15 @@ def test_array_array_binary_arith(ctx_factory, which): op = getattr(operator, which) - with az.CodeBuilder(queue) as cb: - x = cb.argument("x", shape="n", dtype=np.float64) - y = cb.argument("y", shape="n", dtype=np.float64) - cb.output(op(x, y)) + with az.Context(queue) as ctx: + x = ctx.argument("x", shape="n", dtype=np.float64) + y = ctx.argument("y", shape="n", dtype=np.float64) + ctx.output(op(x, y)) - x_in = np.array([1., 2., 3., 4., 5.]) - y_in = np.array([6., 7., 8., 9., 19.]) - code = cb.build() - assert (code.eval_one(x=x_in, y=y_in) == op(x_in, y_in)).all() + x = np.array([1., 2., 3., 4., 5.]) + y = np.array([6., 7., 8., 9., 19.]) + prog = ctx.build() + assert (eval_one(prog, x=x, y=y) == op(x, y)).all() @pytest.mark.parametrize("which", ("neg",)) @@ -113,35 +118,30 @@ def test_unary_arith(ctx_factory, which): op = getattr(operator, which) - with az.CodeBuilder(queue) as cb: - x = cb.argument("x", shape="n", dtype=np.float64) - cb.output(op(x)) + with az.Context(queue) as ctx: + x = ctx.argument("x", shape="n", dtype=np.float64) + ctx.output(op(x)) - x_in = np.array([1., 2., 3., 4., 5.]) - code = cb.build() - assert (code.eval_one(x=x_in) == op(x_in)).all() + x = np.array([1., 2., 3., 4., 5.]) + prog = ctx.build() + assert (eval_one(prog, x=x) == op(x)).all() def test_matmul(ctx_factory): cl_ctx = ctx_factory() queue = cl.CommandQueue(cl_ctx) - with az.CodeBuilder(queue) as cb: - x = cb.argument("x", shape=("m, n"), dtype=np.float64) - y = cb.argument("y", shape=("n, k"), dtype=np.float64) - z = cb.matmul(x, y) - cb.output(z) + with az.Context(queue) as ctx: + x = ctx.argument("x", shape=("m, n"), dtype=np.float64) + y = ctx.argument("y", shape=("n, k"), dtype=np.float64) + z = ctx.matmul(x, y) + ctx.output(z) - x_in = cl.array.to_device(queue, - np.array([[1, 2, 3], [4, 5, 6]], dtype=float)) - y_in = cl.array.to_device(queue, - np.array([[2, 5], [5, 1], [1, 2]], dtype=float)) + x = np.array([[1, 2, 3], [4, 5, 6]], dtype=np.float64) + y = np.array([[2, 5], [5, 1], [1, 2]], dtype=np.float64) - code = cb.build() - print(code.program) - assert ( - code.eval_one(x=x_in, y=y_in).get() - == x_in.get() @ y_in.get()).all() + prog = ctx.build() + assert (eval_one(prog, x=x, y=y) == x @ y).all() @pytest.mark.parametrize("shift", (-1, 1, -20, 20)) @@ -149,14 +149,14 @@ def test_roll(ctx_factory, shift): cl_ctx = ctx_factory() queue = cl.CommandQueue(cl_ctx) - with az.CodeBuilder(queue) as cb: - x = cb.argument("x", shape=("n",), dtype=np.float64) - cb.output(cb.roll(x, shift)) + with az.Context(queue) as ctx: + x = ctx.argument("x", shape=("n",), dtype=np.float64) + ctx.output(ctx.roll(x, shift)) - x_in = cl.array.to_device(queue, np.array([0, 1, 2], dtype=float)) + x = np.array([0, 1, 2], dtype=np.float64) - code = cb.build() - assert (code.eval_one(x=x_in).get() == np.roll(x_in.get(), shift)).all() + prog = ctx.build() + assert (eval_one(prog, x=x) == np.roll(x, shift)).all() @pytest.mark.parametrize("input_dims", (1, 2, 3)) @@ -167,22 +167,16 @@ def test_stack(ctx_factory, input_dims): for axis in range(input_dims): shape = (2,) * input_dims - with az.CodeBuilder(queue) as cb: - x = cb.argument("x", shape=shape, dtype=np.float64) - y = cb.argument("y", shape=shape, dtype=np.float64) - cb.output(cb.stack((x, y), axis=axis)) - - x_in = np.arange(2 ** input_dims, dtype=float).reshape(shape) - y_in = np.arange(1, 1 + 2 ** input_dims, dtype=float).reshape(shape) - - code = cb.build() - - ref = np.stack((x_in, y_in), axis) + with az.Context(queue) as ctx: + x = ctx.argument("x", shape=shape, dtype=np.float64) + y = ctx.argument("y", shape=shape, dtype=np.float64) + ctx.output(ctx.stack((x, y), axis=axis)) - x_in = cl.array.to_device(queue, x_in) - y_in = cl.array.to_device(queue, y_in) + x = np.arange(2 ** input_dims, dtype=np.float64).reshape(shape) + y = np.arange(1, 1 + 2 ** input_dims, dtype=np.float64).reshape(shape) - assert (code.eval_one(x=x_in, y=y_in).get() == ref).all() + prog = ctx.build() + assert (eval_one(prog, x=x, y=y) == np.stack((x, y), axis)).all() @pytest.mark.parametrize("input_dims", (2, 3)) @@ -197,13 +191,13 @@ def test_slice_along_dim(ctx_factory, input_dims): slice_spec[axis] = 1 slice_spec = tuple(slice_spec) - with az.CodeBuilder(queue) as cb: - x = cb.argument("x", shape=shape, dtype=np.float64) - cb.output(x[slice_spec]) + with az.Context(queue) as ctx: + x = ctx.argument("x", shape=shape, dtype=np.float64) + ctx.output(x[slice_spec]) - x_in = np.arange(2 ** input_dims, dtype=float).reshape(shape) - code = cb.build() - assert (code.eval_one(x=x_in) == x_in[slice_spec]).all() + x = np.arange(2 ** input_dims, dtype=np.float64).reshape(shape) + prog = ctx.build() + assert (eval_one(prog, x=x) == x[slice_spec]).all() if __name__ == "__main__": -- GitLab From 756eaef59eb0516a24114a102d0a5ef08b3128be Mon Sep 17 00:00:00 2001 From: Matt Wala Date: Mon, 27 Apr 2020 23:46:14 -0500 Subject: [PATCH 13/46] Modify advection demo --- experiments/advection.py | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/experiments/advection.py b/experiments/advection.py index 56fe1c6..e5e435f 100644 --- a/experiments/advection.py +++ b/experiments/advection.py @@ -374,7 +374,7 @@ def matrix_getter(name, shape): def getter(self): mat = self.cb.argument(name, shape, np.float64) - self.cb.bind_argument(mat, getattr(self.discr, name)) + self.cb.bind(mat, getattr(self.discr, name)) return mat return property(memoized(getter)) @@ -555,21 +555,18 @@ def main(): discr = DGDiscr1D(0, 2*np.pi, nelements=nelements, nnodes=nnodes) - with az.CodeBuilder(queue) as cb: + with az.Context(queue) as cb: u = cb.argument("u", shape="(nelements, nnodes)", dtype=np.float64) op = AdvectionOperator( discr, c=1, flux_type="central", dg_ops=DGOps1D(discr, cb)) cb.output(op.apply(u)) - code = cb.build() - print(code.program) - #import loopy as lp - #code.update_program(lp.fix_parameters(code.program, nnodes=nnodes, nelements=nelements)) + prog = cb.build() u = np.sin(discr.nodes()) - print(code.eval_one(u=u.reshape(nelements, nnodes))) + print(prog.program) + print(prog(u=u.reshape(nelements, nnodes))[1][0]) if __name__ == "__main__": - #from pytest import main - #main([__file__]) main() + -- GitLab From 0b128689c024998d053423f57196897ba9c99496 Mon Sep 17 00:00:00 2001 From: Matt Wala Date: Mon, 27 Apr 2020 23:51:12 -0500 Subject: [PATCH 14/46] Add loopy to requirements.txt --- requirements.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/requirements.txt b/requirements.txt index 7819c26..a9288c2 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1 +1,2 @@ +git+https://github.com/inducer/loopy.git git+https://github.com/inducer/pyopencl.git -- GitLab From d5afc8042cee397ea86ad108d9b984c770777fc4 Mon Sep 17 00:00:00 2001 From: Matt Wala Date: Mon, 27 Apr 2020 23:54:31 -0500 Subject: [PATCH 15/46] Flake8 fixes --- arrayzy/array.py | 26 ++++++++++++-------------- 1 file changed, 12 insertions(+), 14 deletions(-) diff --git a/arrayzy/array.py b/arrayzy/array.py index bd44e26..a2042aa 100644 --- a/arrayzy/array.py +++ b/arrayzy/array.py @@ -29,7 +29,6 @@ import numpy as np import loopy.symbolic as sym import pymbolic import pymbolic.primitives as prim -from pytools import memoize_method from arrayzy.program import BoundProgram, PyOpenCLTarget from arrayzy.utils import domain_for_shape @@ -59,8 +58,8 @@ class Array: .. attribute:: dtype - An instance of :class:`loopy.types.LoopyType` or *None* to indicate that - the type of the array is not yet known. + An instance of :class:`loopy.types.LoopyType` or *None* to indicate + that the type of the array is not yet known. .. attribute:: name @@ -173,8 +172,8 @@ class Array: dims.append(elem) else: raise ValueError("not implemented") - # TODO: Not clear how to implement slices with upper bounds that may - # exceed the symbolic maximum along the dimension. What is the + # TODO: Not clear how to implement slices with upper bounds that + # may exceed the symbolic maximum along the dimension. What is the # shape of the resulting array? return ArrayExpression( @@ -185,8 +184,8 @@ class Array: @property def dtype(self): - # TODO: The dtype could get out of sync with the kernel if the kernel is - # updated. Should the dtype be updated as well? + # TODO: The dtype could get out of sync with the kernel if the kernel + # is updated. Should the dtype be updated as well? return self._dtype @property @@ -319,10 +318,7 @@ class Context: # TODO: does not handle None result = args[0] for arg in args[1:]: - result = ( - np.empty(0, dtype=result) - + np.empty(0, dtype=arg - )).dtype + result = (np.empty(0, dtype=result) + np.empty(0, dtype=arg)).dtype return result def _get_or_create_parameter(self, name): @@ -431,8 +427,8 @@ class Context: assert a.ndim == 1 idx = (prim.Variable("_0") - shift) % a.shape[0] # It's not immediately clear what loopy-generated code guarantees about - # the sign of the result of the % operator, so if the result is negative - # we shift it to be positive. + # the sign of the result of the % operator, so if the result is + # negative we shift it to be positive. idx = idx + prim.If(prim.Comparison(idx, "<", 0), a.shape[0], 0) expr = a.to_loopy_expression((idx,)) return ArrayExpression(self, a.shape, a.dtype, expr) @@ -526,7 +522,9 @@ class Context: a_inames = tuple(map(prim.Variable, (out_inames[0], out_red_iname))) b_inames = tuple(map(prim.Variable, (out_red_iname, out_inames[1]))) - expr = a.to_loopy_expression(a_inames) * b.to_loopy_expression(b_inames) + expr = ( + a.to_loopy_expression(a_inames) + * b.to_loopy_expression(b_inames)) out_expr = sym.Reduction( operation=red.parse_reduction_op("sum"), -- GitLab From e084e44bd98a25feb64b67d08823d5fe93f9fd2b Mon Sep 17 00:00:00 2001 From: Matt Wala Date: Mon, 27 Apr 2020 23:56:10 -0500 Subject: [PATCH 16/46] more flake8 fixes --- arrayzy/__init__.py | 2 +- arrayzy/program.py | 8 +++----- test/test_linalg.py | 7 ++++--- 3 files changed, 8 insertions(+), 9 deletions(-) diff --git a/arrayzy/__init__.py b/arrayzy/__init__.py index 989c2fe..088dc97 100644 --- a/arrayzy/__init__.py +++ b/arrayzy/__init__.py @@ -1 +1 @@ -from arrayzy.array import Context +from arrayzy.array import Context # noqa diff --git a/arrayzy/program.py b/arrayzy/program.py index b77b92d..0378ca4 100644 --- a/arrayzy/program.py +++ b/arrayzy/program.py @@ -21,7 +21,6 @@ THE SOFTWARE. """ -import loopy as lp from pytools import RecordWithoutPickling @@ -62,17 +61,16 @@ class BoundProgram(RecordWithoutPickling): def __init__(self, program, queue, bound_arguments, target): super().__init__(program=program, queue=queue, - bound_arguments=bound_arguments, target=target) - + bound_arguments=bound_arguments, target=target) + def __call__(self, *args, **kwargs): """Convenience function for launching a :mod:`pyopencl` computation.""" if not self.queue: raise ValueError("queue must be specified") - + updated_kwargs = self.bound_arguments.copy() updated_kwargs.update(kwargs) return self.program(self.queue, *args, **updated_kwargs) # vim: foldmethod=marker - diff --git a/test/test_linalg.py b/test/test_linalg.py index 4172d4d..1d2cd02 100644 --- a/test/test_linalg.py +++ b/test/test_linalg.py @@ -47,7 +47,7 @@ def eval_one(prog, **kwargs): def test_symbolic_array(ctx_factory): cl_ctx = ctx_factory() queue = cl.CommandQueue(cl_ctx) - + with az.Context(queue) as ctx: x = ctx.argument("x", shape="n", dtype=np.float64) ctx.output(x) @@ -82,7 +82,8 @@ def test_scalar_array_binary_arith(ctx_factory, which, reverse): op = getattr(operator, which) if reverse: op_orig = op - op = lambda x, y: op_orig(y, x) + def op(x, y): + return op_orig(y, x) with az.Context(queue) as ctx: x = ctx.argument("x", shape="n", dtype=np.float64) @@ -186,7 +187,7 @@ def test_slice_along_dim(ctx_factory, input_dims): for axis in range(input_dims): shape = (2,) * input_dims - + slice_spec = [slice(None, None, None)] * input_dims slice_spec[axis] = 1 slice_spec = tuple(slice_spec) -- GitLab From 56fa8da48f70afbea0868e8b9c66035872b6300a Mon Sep 17 00:00:00 2001 From: Matt Wala Date: Mon, 27 Apr 2020 23:57:22 -0500 Subject: [PATCH 17/46] Even more flake8 fixes --- arrayzy/array.py | 2 +- test/test_linalg.py | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/arrayzy/array.py b/arrayzy/array.py index a2042aa..fe1a5ab 100644 --- a/arrayzy/array.py +++ b/arrayzy/array.py @@ -206,7 +206,7 @@ class Array: return domain_for_shape(dim_names, self.shape) @property - def T(self): + def T(self): # noqa """Return the transpose of the array of the array.""" new_shape = tuple(reversed(self.shape)) new_expr = self.to_loopy_expression(list(reversed(self.dims))) diff --git a/test/test_linalg.py b/test/test_linalg.py index 1d2cd02..5393f59 100644 --- a/test/test_linalg.py +++ b/test/test_linalg.py @@ -82,6 +82,7 @@ def test_scalar_array_binary_arith(ctx_factory, which, reverse): op = getattr(operator, which) if reverse: op_orig = op + def op(x, y): return op_orig(y, x) -- GitLab From 47e7c5df6628ec2f657de22db6559bb03a5fa228 Mon Sep 17 00:00:00 2001 From: Matt Wala Date: Mon, 27 Apr 2020 23:59:21 -0500 Subject: [PATCH 18/46] Try removing pyopencl from requirements.txt --- .gitlab-ci.yml | 5 ++--- requirements.txt | 1 - test/test_linalg.py | 2 +- 3 files changed, 3 insertions(+), 5 deletions(-) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index d2ad9be..5f733be 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -2,8 +2,7 @@ Python 3 POCL: script: - export PY_EXE=python3 - export PYOPENCL_TEST=portable - - export EXTRA_INSTALL="pybind11 numpy mako" - - export LOOPY_NO_CACHE=1 + - export EXTRA_INSTALL="pyopencl" - curl -L -O -k https://gitlab.tiker.net/inducer/ci-support/raw/master/build-and-test-py-project.sh - ". ./build-and-test-py-project.sh" tags: @@ -18,7 +17,7 @@ Python 3 POCL: Pylint: script: - export PY_EXE=python3 - - EXTRA_INSTALL="pybind11 numpy mako" + - EXTRA_INSTALL="pyopencl" - curl -L -O -k https://gitlab.tiker.net/inducer/ci-support/raw/master/prepare-and-run-pylint.sh - ". ./prepare-and-run-pylint.sh arrayzy test/test_*.py" tags: diff --git a/requirements.txt b/requirements.txt index a9288c2..22617c7 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,2 +1 @@ git+https://github.com/inducer/loopy.git -git+https://github.com/inducer/pyopencl.git diff --git a/test/test_linalg.py b/test/test_linalg.py index 5393f59..e3949ce 100644 --- a/test/test_linalg.py +++ b/test/test_linalg.py @@ -82,7 +82,7 @@ def test_scalar_array_binary_arith(ctx_factory, which, reverse): op = getattr(operator, which) if reverse: op_orig = op - + def op(x, y): return op_orig(y, x) -- GitLab From dc222823727cb4717e5db353bc5239f8cea5033f Mon Sep 17 00:00:00 2001 From: Matt Wala Date: Tue, 28 Apr 2020 00:02:20 -0500 Subject: [PATCH 19/46] Placate pylint and flake8, possibly --- test/test_linalg.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/test/test_linalg.py b/test/test_linalg.py index e3949ce..b1df677 100644 --- a/test/test_linalg.py +++ b/test/test_linalg.py @@ -73,6 +73,12 @@ def test_transpose(ctx_factory, dim): assert (eval_one(prog, x=x) == x.T).all() +def reverse_args(f): + def wrapper(*args): + return f(*reversed(args)) + return wrapper + + @pytest.mark.parametrize("which", ("add", "sub", "mul", "truediv")) @pytest.mark.parametrize("reverse", (False, True)) def test_scalar_array_binary_arith(ctx_factory, which, reverse): @@ -81,10 +87,7 @@ def test_scalar_array_binary_arith(ctx_factory, which, reverse): op = getattr(operator, which) if reverse: - op_orig = op - - def op(x, y): - return op_orig(y, x) + op = reverse_args(op) with az.Context(queue) as ctx: x = ctx.argument("x", shape="n", dtype=np.float64) -- GitLab From 1fa8d487a830e6386dd0503d08730754e323c8bc Mon Sep 17 00:00:00 2001 From: Matt Wala Date: Tue, 28 Apr 2020 00:11:43 -0500 Subject: [PATCH 20/46] Tweak advection demo --- experiments/advection.py | 33 ++++++++++++++++----------------- 1 file changed, 16 insertions(+), 17 deletions(-) diff --git a/experiments/advection.py b/experiments/advection.py index e5e435f..0b74490 100644 --- a/experiments/advection.py +++ b/experiments/advection.py @@ -373,8 +373,8 @@ class DGOps1DRef(AbstractDGOps1D): def matrix_getter(name, shape): def getter(self): - mat = self.cb.argument(name, shape, np.float64) - self.cb.bind(mat, getattr(self.discr, name)) + mat = self.ctx.argument(name, shape, np.float64) + self.ctx.bind(mat, getattr(self.discr, name)) return mat return property(memoized(getter)) @@ -384,11 +384,11 @@ class DGOps1D(AbstractDGOps1D): @AbstractDGOps1D.array_ops.setter def array_ops(self): - return self.cb + return self.ctx - def __init__(self, discr, cb): + def __init__(self, discr, ctx): self.discr = discr - self.cb = cb + self.ctx = ctx _normals = matrix_getter("normals", "(nelements, 2)") _interp_mat = matrix_getter("interp", "(2, nnodes)") @@ -401,24 +401,24 @@ class DGOps1D(AbstractDGOps1D): return self._normals def interp(self, vec): - return self.cb.matmul(self._interp_mat, vec.T, name="_interp").T + return self.ctx.matmul(self._interp_mat, vec.T, name="_interp").T def inv_mass(self, vec): - return self.cb.matmul(self._inv_mass_mat, vec.T, name="_inv_mass").T + return self.ctx.matmul(self._inv_mass_mat, vec.T, name="_inv_mass").T def stiffness(self, vec): - return self.cb.matmul(self._stiffness_mat, vec.T, name="_stiffness").T + return self.ctx.matmul(self._stiffness_mat, vec.T, name="_stiffness").T def face_mass(self, vec): - return self.cb.matmul(self._face_mass_mat, vec.T, name="_face_mass").T + return self.ctx.matmul(self._face_mass_mat, vec.T, name="_face_mass").T def face_swap(self, vec): - return self.cb.stack( + return self.ctx.stack( ( - self.cb.roll(vec[:,1], +1), - self.cb.roll(vec[:,0], -1)), + self.ctx.roll(vec[:,1], +1), + self.ctx.roll(vec[:,0], -1)), axis=1, - name="face_swap") + name="_face_swap") class AdvectionOperator(object): @@ -478,10 +478,9 @@ class AdvectionOperator(object): Signature: (m, n) -> (m, n) """ dg = self.dg - pt1 = dg.face_mass(self.strong_flux(dg.interp(vec))) - print("pt1 shape", pt1.shape) - pt2 = self.c * dg.stiffness(vec) - return -dg.inv_mass(pt1 - pt2) + return -dg.inv_mass( + dg.face_mass(self.strong_flux(dg.interp(vec))) + - self.c * dg.stiffness(vec)) def __call__(self, vec): """Apply the DG advection operator to the vector of degrees of freedom. -- GitLab From cd3a0a9830affe1c7783a7c8a3bc6da210169f1b Mon Sep 17 00:00:00 2001 From: Matt Wala Date: Thu, 30 Apr 2020 13:40:41 -0500 Subject: [PATCH 21/46] Improve, document _get_or_create_parameter() --- arrayzy/array.py | 38 +++++++++++++++++++++++++------------- 1 file changed, 25 insertions(+), 13 deletions(-) diff --git a/arrayzy/array.py b/arrayzy/array.py index fe1a5ab..bf5112f 100644 --- a/arrayzy/array.py +++ b/arrayzy/array.py @@ -315,25 +315,37 @@ class Context: def unify_types(self, *args): # Unifies the list of types. - # TODO: does not handle None + if None in args: + # FIXME: This isn't completely precise. Should None + complex return + # complex? + return None result = args[0] for arg in args[1:]: result = (np.empty(0, dtype=result) + np.empty(0, dtype=arg)).dtype return result def _get_or_create_parameter(self, name): - # Create a parameter variable for *name* if not present. - # Return the parameter as a pymbolic variable. - - if name in self.program.all_variable_names(): - if name not in self.program.all_params(): - # FIXME: May have been added, but not added to a domain. - pass - else: - self.program = self.program.copy( - args=self.program.args + [ - lp.ValueArg(name, dtype=self.program.index_dtype) - ]) + # Create a parameter variable for *name* if not present (suitable for + # use as a domain parameter). Return the parameter as a pymbolic + # variable. + # + # We regard a variable as a parameter if it's an argument of type + # *self.program.index_dtype* or if it's an iname (the latter is for + # data-dependent control flow). + if not name in self._parameters: + # Resist the temptation to look through domain parameters, because a + # variable may be a parameter without a domain in the kernel yet. + if name not in self.program.all_inames(): + # Add an argument. + if name in self.program.all_variable_names(): + raise ValueError("could not create parameter: " + "name '%s' already in use" % name) + self.program = self.program.copy( + args=self.program.args + [ + lp.ValueArg(name, dtype=self.program.index_dtype) + ]) + self.var_name_gen.add_name(name) + self._parameters.add(name) return prim.Variable(name) -- GitLab From 9023708bc5c1c936eec14b0e77a2965a3782d513 Mon Sep 17 00:00:00 2001 From: Matt Wala Date: Thu, 30 Apr 2020 22:08:01 -0500 Subject: [PATCH 22/46] Add _parameters attribute --- arrayzy/array.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arrayzy/array.py b/arrayzy/array.py index bf5112f..bfe4642 100644 --- a/arrayzy/array.py +++ b/arrayzy/array.py @@ -290,6 +290,9 @@ class Context: self.var_name_gen = self.program.get_var_name_generator() self.insn_id_gen = self.program.get_instruction_id_generator() self._last_insn_id = None + # The set of names that can be used for domain in parameters. It should + # be updated through self._get_or_create_parameter(). + self._parameters = set() def __enter__(self): return self -- GitLab From 2506ec1e30b7afce9eeeca2af3f0ed1aadab7f74 Mon Sep 17 00:00:00 2001 From: Matt Wala Date: Thu, 30 Apr 2020 22:52:53 -0500 Subject: [PATCH 23/46] Various doc improvements --- arrayzy/array.py | 90 ++++++++++++++++++++++++++++++++++++++-------- arrayzy/program.py | 11 +++++- doc/program.rst | 2 +- 3 files changed, 86 insertions(+), 17 deletions(-) diff --git a/arrayzy/array.py b/arrayzy/array.py index bfe4642..bff1b12 100644 --- a/arrayzy/array.py +++ b/arrayzy/array.py @@ -35,17 +35,32 @@ from arrayzy.utils import domain_for_shape __doc__ = """ + +Array objects +------------- + +:class:`Array` is the basic abstraction that represents array +computations. Instances of this class are created and managed within a +:class:`Context`. + .. autoclass:: Array .. autoclass:: ArrayExpression .. autoclass:: ArrayVariable +Code building context +--------------------- + .. autoclass:: Context + """ class Array: """A representation of an array computation in a :mod:`loopy` kernel. + This class is abstract. It has at least two concrete subclasses, + :class:`ArrayExpression` and :class:`ArrayVariable`. + .. attribute:: ctx The :class:`Context` with which this array is associated, to define the @@ -70,14 +85,23 @@ class Array: The number of dimensions in :attr:`shape`. - .. attribute:: T + Code generation support: .. attribute:: dims + + A tuple of :mod:`pymbolic` variables representing the indices of the + array. These are named like *_0*, *_1*, .... + .. attribute:: dim_names + Same as :data:`dims` but a tuple of strings. + .. automethod:: get_domain .. automethod:: to_loopy_expression + Supported array operations: + + .. autoattribute:: T .. automethod:: __getitem__ .. automethod:: __mul__ .. automethod:: __rmul__ @@ -154,6 +178,12 @@ class Array: __neg__ = partialmethod(_unary_op, operator.neg) def __getitem__(self, slice_spec): + """Extract a region from an array. + + :arg slice_spec: The slice argument. + + :returns: an :class:`Array` + """ from numbers import Integral if len(slice_spec) != self.ndim: @@ -199,7 +229,10 @@ class Array: def get_domain(self, dim_names=None): """Return the domain that defines the space of indices for this array. - :arg dim_names: if not *None*, the names of the dimensions of the array + :arg dim_names: if not *None*, a tuple of strings, the names of the + dimensions of the array. Defaults to :data:`dim_names`. + + :returns: a :class:`islpy.BasicSet` """ if dim_names is None: dim_names = self.dim_names @@ -207,12 +240,23 @@ class Array: @property def T(self): # noqa - """Return the transpose of the array of the array.""" + """Return the transpose of the array. + + :returns: an :class:`Array` + """ new_shape = tuple(reversed(self.shape)) new_expr = self.to_loopy_expression(list(reversed(self.dims))) return ArrayExpression(self.ctx, new_shape, self.dtype, new_expr) def to_loopy_expression(self, dims): + """Create a :mod:`loopy` expression for the value at index *dims*. + + :arg dims: A tuple of :mod:`pymbolic` expressions representing the + desired index/indices. For instance if *dims = (Variable('a'), 1)*, + this will return the expression for the element at index *a, 1*. + + :returns: a :mod:`loopy` expression + """ raise NotImplementedError @@ -252,7 +296,7 @@ class Context: .. attribute:: program - A :class:`lp.LoopKernel` that defines names used in expressions in + A :class:`loopy.LoopKernel` that defines names used in expressions in :class:`Array` instances attached to this code. Names defined once will never change their meaning, for the lifetime of this object. @@ -301,7 +345,7 @@ class Context: pass def build(self): - """Return a generated :class:`BoundProgram`. + """Return a generated :class:`arrayzy.program.BoundProgram`. """ return BoundProgram(self.program, self.queue, self.bound_arguments.copy(), self.target) @@ -342,7 +386,7 @@ class Context: # Add an argument. if name in self.program.all_variable_names(): raise ValueError("could not create parameter: " - "name '%s' already in use" % name) + f"name '{name}' already in use") self.program = self.program.copy( args=self.program.args + [ lp.ValueArg(name, dtype=self.program.index_dtype) @@ -407,10 +451,10 @@ class Context: def argument(self, name, shape, dtype, order="C"): """Append an argument to the program. - :returns: a :class:`ArrayVariable` + :returns: an :class:`ArrayVariable` """ if name in self.program.all_variable_names(): - raise ValueError(f"name '{name}' already in use in code") + raise ValueError(f"name '{name}' already in use") arg = lp.GlobalArg(name, shape=shape, dtype=dtype, order=order) @@ -431,13 +475,17 @@ class Context: """Bind an argument to a value. :arg arg: a :class:`ArrayVariable` representing an argument + :arg val: a concrete array value appropriate for the code generation target """ self.bound_arguments[arg.name] = val def roll(self, a, shift, name=None): """Roll elements along the given axis. - :returns: a :class:`Array` + :arg a: a one-dimensional :class:`Array` + :arg shift: a scalar shift amount + + :returns: an :class:`Array` """ assert a.ndim == 1 idx = (prim.Variable("_0") - shift) % a.shape[0] @@ -451,9 +499,12 @@ class Context: def stack(self, arrays, axis=0, name=None): """Join a sequence of arrays along a new axis. - :arg axis: the index of the axis + :arg array: a finite sequence, each of whose elements is an + :class:`Array` + :arg axis: the position of the new axis, which will have length + *len(arrays)* - :returns: a :class:`Array` + :returns: an :class:`Array` """ for array in arrays[1:]: if array.ndim != arrays[0].ndim: @@ -515,6 +566,11 @@ class Context: def matmul(self, a, b, name=None): """Multiply matrix *a* by *b*. + + :arg a: an :class:`Array` of shape *(m, k)* + :arg b: an :class:`Array` of shape *(k, n)* + + :returns: an :class:`Array` of shape *(m, n)* """ # Generate a temporay for the matrix-matrix multiplication. self._assume_equal(a.shape[1], b.shape[0]) @@ -564,12 +620,16 @@ class Context: return ArrayVariable(self, out_shape, out_dtype, name) - def output(self, expr): - """Copy *expr* to an output variable. + def output(self, expr, name=None): + """Copy *expr* to a (new) output variable. - :returns: a :class:`Array` + :arg expr: an :class:`Array` + + :returns: an :class:`ArrayVariable` """ - name = self.var_name_gen("_out") + if name is None: + name = "_out" + name = self.var_name_gen(name) out = lp.GlobalArg(name, shape=expr.shape, dtype=expr.dtype, order="C") out_inames = [ self.var_name_gen(f"{name}_dim{d}") for d in range(expr.ndim)] diff --git a/arrayzy/program.py b/arrayzy/program.py index 0378ca4..3d46e35 100644 --- a/arrayzy/program.py +++ b/arrayzy/program.py @@ -24,11 +24,20 @@ THE SOFTWARE. from pytools import RecordWithoutPickling +__doc__ = """ +.. autoclass:: Target +.. autoclass:: PyOpenCLTarget +.. autoclass:: BoundProgram +""" + + class Target: + """An abstract code generation target.""" pass class PyOpenCLTarget: + """A :mod:`pyopencl` code generation target.""" def __init__(self, queue): self.queue = queue @@ -43,7 +52,7 @@ class BoundProgram(RecordWithoutPickling): .. attribute:: program - The underlying :class:`lp.LoopKernel`. + The underlying :class:`loopy.LoopKernel`. .. attribute:: queue diff --git a/doc/program.rst b/doc/program.rst index 7c96f9f..b0f4ff2 100644 --- a/doc/program.rst +++ b/doc/program.rst @@ -1,4 +1,4 @@ Generated programs ================== -.. autoclass:: arrayzy.program.BoundProgram +.. automodule:: arrayzy.program -- GitLab From afbf4059f57b8ff825888a55b63e423adcd39683 Mon Sep 17 00:00:00 2001 From: Matt Wala Date: Thu, 30 Apr 2020 22:56:12 -0500 Subject: [PATCH 24/46] flake8 fixes --- arrayzy/array.py | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/arrayzy/array.py b/arrayzy/array.py index bff1b12..6c7de16 100644 --- a/arrayzy/array.py +++ b/arrayzy/array.py @@ -363,8 +363,8 @@ class Context: def unify_types(self, *args): # Unifies the list of types. if None in args: - # FIXME: This isn't completely precise. Should None + complex return - # complex? + # FIXME: This isn't completely precise. Should None + complex + # return complex? return None result = args[0] for arg in args[1:]: @@ -379,14 +379,15 @@ class Context: # We regard a variable as a parameter if it's an argument of type # *self.program.index_dtype* or if it's an iname (the latter is for # data-dependent control flow). - if not name in self._parameters: - # Resist the temptation to look through domain parameters, because a - # variable may be a parameter without a domain in the kernel yet. + if name not in self._parameters: + # Resist the temptation to look through domain parameters, because + # a variable may be a parameter without a domain in the kernel yet. if name not in self.program.all_inames(): # Add an argument. if name in self.program.all_variable_names(): - raise ValueError("could not create parameter: " - f"name '{name}' already in use") + raise ValueError( + "could not create parameter: " + f"name '{name}' already in use") self.program = self.program.copy( args=self.program.args + [ lp.ValueArg(name, dtype=self.program.index_dtype) @@ -475,7 +476,9 @@ class Context: """Bind an argument to a value. :arg arg: a :class:`ArrayVariable` representing an argument - :arg val: a concrete array value appropriate for the code generation target + + :arg val: a concrete array value appropriate for the code generation + target """ self.bound_arguments[arg.name] = val -- GitLab From 75b805006cc8249590559f710c29df7279f89c43 Mon Sep 17 00:00:00 2001 From: Matt Wala Date: Thu, 30 Apr 2020 23:00:11 -0500 Subject: [PATCH 25/46] Try running without cache --- .gitlab-ci.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 5f733be..e34f3bc 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -3,6 +3,7 @@ Python 3 POCL: - export PY_EXE=python3 - export PYOPENCL_TEST=portable - export EXTRA_INSTALL="pyopencl" + - export LOOPY_NO_CACHE=1 - curl -L -O -k https://gitlab.tiker.net/inducer/ci-support/raw/master/build-and-test-py-project.sh - ". ./build-and-test-py-project.sh" tags: -- GitLab From 36b45848f994d4e694eff30335a29a7941e41bd6 Mon Sep 17 00:00:00 2001 From: Matt Wala Date: Thu, 30 Apr 2020 23:01:31 -0500 Subject: [PATCH 26/46] Revert "Try running without cache" This reverts commit 75b805006cc8249590559f710c29df7279f89c43. --- .gitlab-ci.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index e34f3bc..5f733be 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -3,7 +3,6 @@ Python 3 POCL: - export PY_EXE=python3 - export PYOPENCL_TEST=portable - export EXTRA_INSTALL="pyopencl" - - export LOOPY_NO_CACHE=1 - curl -L -O -k https://gitlab.tiker.net/inducer/ci-support/raw/master/build-and-test-py-project.sh - ". ./build-and-test-py-project.sh" tags: -- GitLab From 439553be3e2324a9fc10da1859af2b4f8a0196b0 Mon Sep 17 00:00:00 2001 From: Matt Wala Date: Thu, 30 Apr 2020 23:09:38 -0500 Subject: [PATCH 27/46] Experiment: comment out everything except the simplest test --- test/test_linalg.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/test/test_linalg.py b/test/test_linalg.py index b1df677..ac9f7a8 100644 --- a/test/test_linalg.py +++ b/test/test_linalg.py @@ -57,6 +57,7 @@ def test_symbolic_array(ctx_factory): assert (eval_one(prog, x=x) == x).all() +""" @pytest.mark.parametrize("dim", (1, 2, 3)) def test_transpose(ctx_factory, dim): cl_ctx = ctx_factory() @@ -203,6 +204,7 @@ def test_slice_along_dim(ctx_factory, input_dims): x = np.arange(2 ** input_dims, dtype=np.float64).reshape(shape) prog = ctx.build() assert (eval_one(prog, x=x) == x[slice_spec]).all() +""" if __name__ == "__main__": -- GitLab From 1cf84331354ca63e9971a3e8a81ea6316b1914f5 Mon Sep 17 00:00:00 2001 From: Matt Wala Date: Thu, 30 Apr 2020 23:10:32 -0500 Subject: [PATCH 28/46] Revert "Experiment: comment out everything except the simplest test" This reverts commit 439553be3e2324a9fc10da1859af2b4f8a0196b0. --- test/test_linalg.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/test/test_linalg.py b/test/test_linalg.py index ac9f7a8..b1df677 100644 --- a/test/test_linalg.py +++ b/test/test_linalg.py @@ -57,7 +57,6 @@ def test_symbolic_array(ctx_factory): assert (eval_one(prog, x=x) == x).all() -""" @pytest.mark.parametrize("dim", (1, 2, 3)) def test_transpose(ctx_factory, dim): cl_ctx = ctx_factory() @@ -204,7 +203,6 @@ def test_slice_along_dim(ctx_factory, input_dims): x = np.arange(2 ** input_dims, dtype=np.float64).reshape(shape) prog = ctx.build() assert (eval_one(prog, x=x) == x[slice_spec]).all() -""" if __name__ == "__main__": -- GitLab From 9d4727099b99b60b8359c31df522f853a7c063e4 Mon Sep 17 00:00:00 2001 From: Matt Wala Date: Fri, 1 May 2020 00:17:50 -0500 Subject: [PATCH 29/46] Add convergence test for arrayzy generated code --- experiments/advection.py | 32 +++++++++++++++++++++++++++++++- 1 file changed, 31 insertions(+), 1 deletion(-) diff --git a/experiments/advection.py b/experiments/advection.py index 0b74490..8f96229 100644 --- a/experiments/advection.py +++ b/experiments/advection.py @@ -382,7 +382,7 @@ def matrix_getter(name, shape): class DGOps1D(AbstractDGOps1D): - @AbstractDGOps1D.array_ops.setter + @AbstractDGOps1D.array_ops.getter def array_ops(self): return self.ctx @@ -543,6 +543,36 @@ def test_ref_advection_convergence(order, flux_type): assert eoc >= order - 0.1, eoc +@pytest.mark.parametrize("order", (3, 4, 5)) +@pytest.mark.parametrize("flux_type", ("central", "upwind")) +def test_advection_convergence(order, flux_type): + errors = [] + hs = [] + + import arrayzy as az + import pyopencl as cl + cl_ctx = cl.create_some_context() + queue = cl.CommandQueue(cl_ctx) + + for nelements in (8, 12, 16, 20): + discr = DGDiscr1D(0, 2*np.pi, nelements=nelements, nnodes=order) + u_initial = np.sin(discr.nodes()) + with az.Context(queue) as cb: + u = cb.argument("u", shape="(nelements, nnodes)", dtype=np.float64) + op = AdvectionOperator( + discr, c=1, flux_type=flux_type, dg_ops=DGOps1D(discr, cb)) + cb.output(op.apply(u)) + prog = cb.build() + u = rk4(lambda t, y: prog(u=y.reshape(nelements, order))[1][0].reshape(-1), + u_initial, t_initial=0, t_final=np.pi, dt=0.01) + u_ref = -u_initial + hs.append(discr.h) + errors.append(integrate(discr, (u - u_ref)**2)**0.5) + + eoc, _ = np.polyfit(np.log(hs), np.log(errors), 1) + assert eoc >= order - 0.1, eoc + + def main(): import arrayzy as az import pyopencl as cl -- GitLab From 54bc4e859e099886042553d619b5b05d23d99c93 Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Fri, 1 May 2020 00:57:09 -0500 Subject: [PATCH 30/46] _Split DG demo bits off into a separate file to make them importable from a demo notebook --- .gitignore | 1 + .../Computing and DG with Lazy Arrays.ipynb | 204 +++++++++ experiments/advection.py | 432 +----------------- experiments/dg_tools.py | 420 +++++++++++++++++ 4 files changed, 634 insertions(+), 423 deletions(-) create mode 100644 experiments/Computing and DG with Lazy Arrays.ipynb create mode 100644 experiments/dg_tools.py diff --git a/.gitignore b/.gitignore index c11edd6..7a7df09 100644 --- a/.gitignore +++ b/.gitignore @@ -13,3 +13,4 @@ distribute*egg distribute*tar.gz .cache +.ipynb_checkpoints diff --git a/experiments/Computing and DG with Lazy Arrays.ipynb b/experiments/Computing and DG with Lazy Arrays.ipynb new file mode 100644 index 0000000..9f8fcef --- /dev/null +++ b/experiments/Computing and DG with Lazy Arrays.ipynb @@ -0,0 +1,204 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Computing and DG with Lazy Arrays" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "import arrayzy as az\n", + "import pyopencl as cl\n", + "import loopy as lp" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Choose platform:\n", + "[0] \n", + "[1] \n" + ] + }, + { + "name": "stdin", + "output_type": "stream", + "text": [ + "Choice [0]: 1\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Set the environment variable PYOPENCL_CTX='1' to avoid being asked again.\n" + ] + } + ], + "source": [ + "cl_ctx = cl.create_some_context(interactive=True)\n", + "queue = cl.CommandQueue(cl_ctx)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## The Basic Mechanics" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [], + "source": [ + "with az.Context(queue) as ctx:\n", + " A = ctx.argument(\"A\", shape=\"n, n\", dtype=np.float64)\n", + " ctx.output(2*A + 5)\n", + "\n", + "prog = ctx.build()" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "---------------------------------------------------------------------------\n", + "KERNEL: loopy_kernel\n", + "---------------------------------------------------------------------------\n", + "ARGUMENTS:\n", + "A: type: np:dtype('float64'), shape: (n, n), dim_tags: (N1:stride:n, N0:stride:1) aspace: global\n", + "_out: type: , shape: (n, n), dim_tags: (N1:stride:n, N0:stride:1) aspace: global\n", + "n: ValueArg, type: np:dtype('int32')\n", + "---------------------------------------------------------------------------\n", + "DOMAINS:\n", + "{ : }\n", + "[n] -> { [_out_dim0, _out_dim1] : 0 <= _out_dim0 < n and 0 <= _out_dim1 < n }\n", + "---------------------------------------------------------------------------\n", + "INAME IMPLEMENTATION TAGS:\n", + "_out_dim0: None\n", + "_out_dim1: None\n", + "---------------------------------------------------------------------------\n", + "INSTRUCTIONS:\n", + "for _out_dim1, _out_dim0\n", + " \u001b[36m_out[_out_dim0, _out_dim1]\u001b[0m = \u001b[35m2*A[_out_dim0, _out_dim1] + 5\u001b[0m {id=\u001b[32m_store_out\u001b[0m}\n", + "end _out_dim1, _out_dim0\n", + "---------------------------------------------------------------------------\n" + ] + } + ], + "source": [ + "print(prog.program)" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "#define lid(N) ((int) get_local_id(N))\n", + "#define gid(N) ((int) get_group_id(N))\n", + "#if __OPENCL_C_VERSION__ < 120\n", + "#pragma OPENCL EXTENSION cl_khr_fp64: enable\n", + "#endif\n", + "\n", + "__kernel void __attribute__ ((reqd_work_group_size(1, 1, 1))) loopy_kernel(int const n, __global double const *__restrict__ A, __global double *__restrict__ _out)\n", + "{\n", + " for (int _out_dim1 = 0; _out_dim1 <= -1 + n; ++_out_dim1)\n", + " for (int _out_dim0 = 0; _out_dim0 <= -1 + n; ++_out_dim0)\n", + " _out[n * _out_dim0 + _out_dim1] = 2.0 * A[n * _out_dim0 + _out_dim1] + 5.0;\n", + "}\n" + ] + } + ], + "source": [ + "print(lp.generate_code_v2(prog.program).device_code())" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "metadata": {}, + "outputs": [], + "source": [ + "A = np.random.randn(5, 5)\n", + "evt, (result,) = prog(A=A)" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([[0., 0., 0., 0., 0.],\n", + " [0., 0., 0., 0., 0.],\n", + " [0., 0., 0., 0., 0.],\n", + " [0., 0., 0., 0., 0.],\n", + " [0., 0., 0., 0., 0.]])" + ] + }, + "execution_count": 26, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "result - (2*A+5)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.7" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/experiments/advection.py b/experiments/advection.py index 8f96229..de6c7ca 100644 --- a/experiments/advection.py +++ b/experiments/advection.py @@ -1,426 +1,11 @@ -import contextlib import numpy as np -import numpy.linalg as la -import numpy.polynomial.legendre as leg import pytest - - -__doc__ = """ -Notation convention for operator shapes -======================================= - -* m - number of elements in the discretization -* n - number of volume degrees of freedom per element -""" - import functools - +from dg_tools import DGDiscr1D, integrate, DGOps1D, DGOps1DRef memoized = functools.lru_cache(maxsize=None) -def ortholegvander(x, deg): - """See numpy.polynomial.legendre.legvander(). Uses an orthonormal basis.""" - result = leg.legvander(x, deg) - factors = np.array([np.sqrt((2*n+1)/2) for n in range(0, 1 + deg)]) - return result * factors - - -def ortholegder(c): - """See numpy.polynomial.legendre.legder(). Uses an orthonormal basis.""" - fw_factors = np.array([np.sqrt((2*n+1)/2) for n in range(len(c))]) - derivs = leg.legder(c * fw_factors) - return derivs / fw_factors[:len(derivs)] - - -def ortholegval(x, c): - """See numpy.polynomial.legendre.legval(). Uses an orthonormal basis.""" - factors = np.array([np.sqrt((2*n+1)/2) for n in range(len(c))]) - return leg.legval(x, c * factors) - - -class DGDiscr1D(object): - """A one-dimensional Discontinuous Galerkin discretization.""" - - def __init__(self, left, right, nelements, nnodes): - """ - Inputs: - left - left endpoint - right - right endpoint - nelements - number of discretization panels - nnodes - number of degrees of freedom per panel - """ - self.left = left - self.right = right - self.nelements = nelements - self.nnodes = nnodes - - @property - @memoized - def ref_nodes(self): - """Return reference nodes for a single element. - - Signature: ->(n,) - """ - nodes, _ = leg.leggauss(self.nnodes) - return nodes - - @property - @memoized - def ref_weights(self): - """Return reference quadrature weights for a single element. - - Signature: ->(n,) - """ - _, weights = leg.leggauss(self.nnodes) - return weights - - def zeros(self): - """Return a zero solution. - - Signature: ->(n*m,) - """ - return np.zeros(self.nnodes * self.nelements) - - @property - def h(self): - """Return the element size. - - Signature: ->() - """ - return self.elements[0,1] - self.elements[0,0] - - def nodes(self): - """Return the vector of node coordinates. - - Signature: ->(n*m,) - """ - centers = (self.elements[:,0] + self.elements[:,1]) / 2 - radii = (self.elements[:,1] - self.elements[:,0]) / 2 - return ((self.ref_nodes[:,np.newaxis] * radii) + centers).T.ravel() - - @property - @memoized - def vdm(self): - """Return the elementwise Vandermonde (modal-to-nodal) matrix. - - Signature: ->(n, n) - """ - return ortholegvander(self.ref_nodes, self.nnodes - 1) - - @property - @memoized - def _ref_mass(self): - """Return the (volume) mass matrix for the reference element. - - Signature: ->(n, n) - """ - return la.inv(self.vdm @ self.vdm.T) - - @property - @memoized - def mass(self): - """Return the elementwise volume mass matrix. - - Signature: ->(n, n) - """ - h = (self.right - self.left) / self.nelements - return (h/2) * self._ref_mass - - @property - @memoized - def inv_mass(self): - """Return the inverse of the elementwise volume mass matrix. - - Signature: ->(n, n) - """ - return la.inv(self.mass) - - @property - @memoized - def face_mass(self): - """Return the face mass matrix. - - The face mass matrix combines the effects of applying the face mass - operator on each face and interpolating the output to the volume nodes. - - Signature: ->(n, 2) - """ - return self.interp.T.copy() - - @property - @memoized - def diff(self): - """Return the elementwise differentiation matrix. - - Signature: ->(n, n) - """ - VrT = [] - for row in np.eye(self.nnodes): - deriv = ortholegder(row) - VrT.append(ortholegval(self.ref_nodes, deriv)) - Vr = np.vstack(VrT).T - return Vr @ la.inv(self.vdm) - - @property - @memoized - def stiffness(self): - """Return the stiffness matrix. - - Signature: ->(n, n) - """ - return (self._ref_mass @ self.diff) - - @property - @memoized - def interp(self): - """Return the volume-to-face interpolation matrix. - - Signature: ->(2, n) - """ - return ortholegvander([-1, 1], self.nnodes - 1) @ la.inv(self.vdm) - - @property - @memoized - def elements(self): - """Return the list of elements, each given by their left/right boundaries. - - Signature: ->(m, 2) - """ - h = (self.right - self.left) / self.nelements - return np.array(list(zip( - np.linspace(self.left, self.right, self.nelements, endpoint=False), - np.linspace(h + self.left, self.right, self.nelements)))) - - @property - def dg_ops(self): - """Return a context manager yielding a DGOps1D instance. - """ - return contextlib.contextmanager(lambda: (yield DGOps1DRef(self))) - - @property - def normals(self): - """Return the face unit normals. - - Signature: ->(m, 2) - """ - result = np.zeros((self.nelements, 2)) - result[:,0] = -1 - result[:,1] = 1 - return result - - -def interpolate(discr, vec, nodes): - """Return an interpolated solution at *nodes*. - - Input: - discr - a DGDiscr1D instance - vec - vector of nodal values at degrees of freedom - nodes - vector of nodes to interpolate to - - Signature: (m*n,) -> (len(nodes),) - """ - elements = discr.elements - nelements = discr.nelements - nnodes = discr.nnodes - inv_vdm = la.inv(discr.vdm) - - sorter = np.argsort(nodes) - sorted_nodes = nodes[sorter] - result = [] - - indices = np.searchsorted(sorted_nodes, elements) - for i, (start, end) in enumerate(indices): - if i == 0: - start = 0 - elif i == nelements - 1: - end = len(nodes) - - center = (elements[i][0] + elements[i][1]) / 2 - radius = (elements[i][1] - elements[i][0]) / 2 - element_nodes = sorted_nodes[start:end] - mapped_nodes = (element_nodes - center) / radius - - modal_vals = inv_vdm @ vec[i * nnodes:(i + 1) * nnodes] - nodal_vals = ortholegvander(mapped_nodes, nnodes - 1) @ modal_vals - result.append(nodal_vals) - - result = np.hstack(result) - unsorter = np.arange(len(nodes))[sorter] - return result[unsorter] - - -def integrate(discr, soln): - """Return the integral of the solution. - - Signature: (n*m,) -> () - """ - soln = soln.reshape((discr.nelements, discr.nnodes)) - h = discr.elements[0][1] - discr.elements[0][0] - weights = discr.ref_weights * h / 2 - return np.sum(soln * weights) - - -def elementwise(mat, vec): - """Apply a matrix to rows of the input representing per-element - degrees of freedom. - - Inputs: - mat: Shape (a, b) - vec: Shape (c, b) - - Signature: (a, b), (c, b) -> (c, a) - """ - return np.einsum("ij,kj->ki", mat, vec) - - -class AbstractDGOps1D(object): - - def __init__(self, discr): - self.discr = discr - - @property - def array_ops(self): - raise NotImplementedError - - @property - def normals(self): - """Return the vector of normals at the faces. - - Signature: ->(m, 2) - """ - raise NotImplementedError - - def interp(self, vec): - """Apply elementwise volume-to-face interpolation. - - Signature: (m, n) -> (m, 2) - """ - raise NotImplementedError - - def inv_mass(self, vec): - """Apply the elementwise inverse mass matrix. - - Signature: (m, n) -> (m, n) - """ - raise NotImplementedError - - def stiffness(self, vec): - """Apply the elementwise stiffness matrix. - - Signature: (m, n) -> (m, n) - """ - raise NotImplementedError - - def face_mass(self, vec): - """Apply the elementwise face mass matrix. - - Signature: (m, 2) -> (m, n) - """ - raise NotImplementedError - - def face_swap(self, vec): - """Swap values at opposite faces. - - Signature: (m, 2) -> (m, 2) - """ - raise NotImplementedError - - -def elementwise(mat, vec): - """Apply a matrix to rows of the input representing per-element - degrees of freedom. - - Inputs: - mat: Shape (a, b) - vec: Shape (c, b) - - Signature: (a, b), (c, b) -> (c, a) - """ - return np.einsum("ij,kj->ki", mat, vec) - - -class DGOps1DRef(AbstractDGOps1D): - """A reference NumPy implementation of the AbstractDGOps1D interface.""" - - @AbstractDGOps1D.array_ops.getter - def array_ops(self): - return np - - @AbstractDGOps1D.normals.getter - def normals(self): - return self.discr.normals - - def interp(self, vec): - return elementwise(self.discr.interp, vec) - - def inv_mass(self, vec): - return elementwise(self.discr.inv_mass, vec) - - def stiffness(self, vec): - return elementwise(self.discr.stiffness, vec) - - def face_mass(self, vec): - return elementwise(self.discr.face_mass, vec) - - def face_swap(self, vec): - result = np.zeros_like(vec) - result[:,0] = np.roll(vec[:,1], +1) - result[:,1] = np.roll(vec[:,0], -1) - return result - - -def matrix_getter(name, shape): - - def getter(self): - mat = self.ctx.argument(name, shape, np.float64) - self.ctx.bind(mat, getattr(self.discr, name)) - return mat - - return property(memoized(getter)) - - -class DGOps1D(AbstractDGOps1D): - - @AbstractDGOps1D.array_ops.getter - def array_ops(self): - return self.ctx - - def __init__(self, discr, ctx): - self.discr = discr - self.ctx = ctx - - _normals = matrix_getter("normals", "(nelements, 2)") - _interp_mat = matrix_getter("interp", "(2, nnodes)") - _inv_mass_mat = matrix_getter("inv_mass", "(nnodes, nnodes)") - _stiffness_mat = matrix_getter("stiffness", "(nnodes, nnodes)") - _face_mass_mat = matrix_getter("face_mass", "(nnodes, 2)") - - @AbstractDGOps1D.normals.getter - def normals(self): - return self._normals - - def interp(self, vec): - return self.ctx.matmul(self._interp_mat, vec.T, name="_interp").T - - def inv_mass(self, vec): - return self.ctx.matmul(self._inv_mass_mat, vec.T, name="_inv_mass").T - - def stiffness(self, vec): - return self.ctx.matmul(self._stiffness_mat, vec.T, name="_stiffness").T - - def face_mass(self, vec): - return self.ctx.matmul(self._face_mass_mat, vec.T, name="_face_mass").T - - def face_swap(self, vec): - return self.ctx.stack( - ( - self.ctx.roll(vec[:,1], +1), - self.ctx.roll(vec[:,0], -1)), - axis=1, - name="_face_swap") - - class AdvectionOperator(object): """A class representing a DG advection operator.""" @@ -453,9 +38,9 @@ class AdvectionOperator(object): elif self.flux_type == "upwind": swp = self.dg.face_swap(vec) if self.c >= 0: - flux = self.dg.array_ops.stack((vec[:,0], swp[:,1]), axis=1) + flux = self.dg.array_ops.stack((vec[:, 0], swp[:, 1]), axis=1) else: - flux = self.dg.array_ops.stack((swp[:,0], vec[:,1]), axis=1) + flux = self.dg.array_ops.stack((swp[:, 0], vec[:, 1]), axis=1) flux = flux * self.c * self.dg.normals @@ -534,7 +119,8 @@ def test_ref_advection_convergence(order, flux_type): u_initial = np.sin(discr.nodes()) op = AdvectionOperator( discr, c=1, flux_type=flux_type, dg_ops=DGOps1DRef(discr)) - u = rk4(lambda t, y: op(y), u_initial, t_initial=0, t_final=np.pi, dt=0.01) + u = rk4(lambda t, y: op(y), u_initial, t_initial=0, t_final=np.pi, + dt=0.01) u_ref = -u_initial hs.append(discr.h) errors.append(integrate(discr, (u - u_ref)**2)**0.5) @@ -563,8 +149,9 @@ def test_advection_convergence(order, flux_type): discr, c=1, flux_type=flux_type, dg_ops=DGOps1D(discr, cb)) cb.output(op.apply(u)) prog = cb.build() - u = rk4(lambda t, y: prog(u=y.reshape(nelements, order))[1][0].reshape(-1), - u_initial, t_initial=0, t_final=np.pi, dt=0.01) + u = rk4(lambda t, y: prog( + u=y.reshape(nelements, order))[1][0].reshape(-1), + u_initial, t_initial=0, t_final=np.pi, dt=0.01) u_ref = -u_initial hs.append(discr.h) errors.append(integrate(discr, (u - u_ref)**2)**0.5) @@ -583,7 +170,7 @@ def main(): nnodes = 3 discr = DGDiscr1D(0, 2*np.pi, nelements=nelements, nnodes=nnodes) - + with az.Context(queue) as cb: u = cb.argument("u", shape="(nelements, nnodes)", dtype=np.float64) op = AdvectionOperator( @@ -598,4 +185,3 @@ def main(): if __name__ == "__main__": main() - diff --git a/experiments/dg_tools.py b/experiments/dg_tools.py new file mode 100644 index 0000000..3a3fca3 --- /dev/null +++ b/experiments/dg_tools.py @@ -0,0 +1,420 @@ +import numpy as np +import numpy.polynomial.legendre as leg +import numpy.linalg as la +import contextlib + +__doc__ = """ +Notation convention for operator shapes +======================================= + +* m - number of elements in the discretization +* n - number of volume degrees of freedom per element +""" + +import functools + + +memoized = functools.lru_cache(maxsize=None) + + +def ortholegvander(x, deg): + """See numpy.polynomial.legendre.legvander(). Uses an orthonormal basis.""" + result = leg.legvander(x, deg) + factors = np.array([np.sqrt((2*n+1)/2) for n in range(0, 1 + deg)]) + return result * factors + + +def ortholegder(c): + """See numpy.polynomial.legendre.legder(). Uses an orthonormal basis.""" + fw_factors = np.array([np.sqrt((2*n+1)/2) for n in range(len(c))]) + derivs = leg.legder(c * fw_factors) + return derivs / fw_factors[:len(derivs)] + + +def ortholegval(x, c): + """See numpy.polynomial.legendre.legval(). Uses an orthonormal basis.""" + factors = np.array([np.sqrt((2*n+1)/2) for n in range(len(c))]) + return leg.legval(x, c * factors) + + +class DGDiscr1D(object): + """A one-dimensional Discontinuous Galerkin discretization.""" + + def __init__(self, left, right, nelements, nnodes): + """ + Inputs: + left - left endpoint + right - right endpoint + nelements - number of discretization panels + nnodes - number of degrees of freedom per panel + """ + self.left = left + self.right = right + self.nelements = nelements + self.nnodes = nnodes + + @property + @memoized + def ref_nodes(self): + """Return reference nodes for a single element. + + Signature: ->(n,) + """ + nodes, _ = leg.leggauss(self.nnodes) + return nodes + + @property + @memoized + def ref_weights(self): + """Return reference quadrature weights for a single element. + + Signature: ->(n,) + """ + _, weights = leg.leggauss(self.nnodes) + return weights + + def zeros(self): + """Return a zero solution. + + Signature: ->(n*m,) + """ + return np.zeros(self.nnodes * self.nelements) + + @property + def h(self): + """Return the element size. + + Signature: ->() + """ + return self.elements[0,1] - self.elements[0,0] + + def nodes(self): + """Return the vector of node coordinates. + + Signature: ->(n*m,) + """ + centers = (self.elements[:,0] + self.elements[:,1]) / 2 + radii = (self.elements[:,1] - self.elements[:,0]) / 2 + return ((self.ref_nodes[:,np.newaxis] * radii) + centers).T.ravel() + + @property + @memoized + def vdm(self): + """Return the elementwise Vandermonde (modal-to-nodal) matrix. + + Signature: ->(n, n) + """ + return ortholegvander(self.ref_nodes, self.nnodes - 1) + + @property + @memoized + def _ref_mass(self): + """Return the (volume) mass matrix for the reference element. + + Signature: ->(n, n) + """ + return la.inv(self.vdm @ self.vdm.T) + + @property + @memoized + def mass(self): + """Return the elementwise volume mass matrix. + + Signature: ->(n, n) + """ + h = (self.right - self.left) / self.nelements + return (h/2) * self._ref_mass + + @property + @memoized + def inv_mass(self): + """Return the inverse of the elementwise volume mass matrix. + + Signature: ->(n, n) + """ + return la.inv(self.mass) + + @property + @memoized + def face_mass(self): + """Return the face mass matrix. + + The face mass matrix combines the effects of applying the face mass + operator on each face and interpolating the output to the volume nodes. + + Signature: ->(n, 2) + """ + return self.interp.T.copy() + + @property + @memoized + def diff(self): + """Return the elementwise differentiation matrix. + + Signature: ->(n, n) + """ + VrT = [] + for row in np.eye(self.nnodes): + deriv = ortholegder(row) + VrT.append(ortholegval(self.ref_nodes, deriv)) + Vr = np.vstack(VrT).T + return Vr @ la.inv(self.vdm) + + @property + @memoized + def stiffness(self): + """Return the stiffness matrix. + + Signature: ->(n, n) + """ + return (self._ref_mass @ self.diff) + + @property + @memoized + def interp(self): + """Return the volume-to-face interpolation matrix. + + Signature: ->(2, n) + """ + return ortholegvander([-1, 1], self.nnodes - 1) @ la.inv(self.vdm) + + @property + @memoized + def elements(self): + """Return the list of elements, each given by their left/right boundaries. + + Signature: ->(m, 2) + """ + h = (self.right - self.left) / self.nelements + return np.array(list(zip( + np.linspace(self.left, self.right, self.nelements, endpoint=False), + np.linspace(h + self.left, self.right, self.nelements)))) + + @property + def dg_ops(self): + """Return a context manager yielding a DGOps1D instance. + """ + return contextlib.contextmanager(lambda: (yield DGOps1DRef(self))) + + @property + def normals(self): + """Return the face unit normals. + + Signature: ->(m, 2) + """ + result = np.zeros((self.nelements, 2)) + result[:,0] = -1 + result[:,1] = 1 + return result + + +def interpolate(discr, vec, nodes): + """Return an interpolated solution at *nodes*. + + Input: + discr - a DGDiscr1D instance + vec - vector of nodal values at degrees of freedom + nodes - vector of nodes to interpolate to + + Signature: (m*n,) -> (len(nodes),) + """ + elements = discr.elements + nelements = discr.nelements + nnodes = discr.nnodes + inv_vdm = la.inv(discr.vdm) + + sorter = np.argsort(nodes) + sorted_nodes = nodes[sorter] + result = [] + + indices = np.searchsorted(sorted_nodes, elements) + for i, (start, end) in enumerate(indices): + if i == 0: + start = 0 + elif i == nelements - 1: + end = len(nodes) + + center = (elements[i][0] + elements[i][1]) / 2 + radius = (elements[i][1] - elements[i][0]) / 2 + element_nodes = sorted_nodes[start:end] + mapped_nodes = (element_nodes - center) / radius + + modal_vals = inv_vdm @ vec[i * nnodes:(i + 1) * nnodes] + nodal_vals = ortholegvander(mapped_nodes, nnodes - 1) @ modal_vals + result.append(nodal_vals) + + result = np.hstack(result) + unsorter = np.arange(len(nodes))[sorter] + return result[unsorter] + + +def integrate(discr, soln): + """Return the integral of the solution. + + Signature: (n*m,) -> () + """ + soln = soln.reshape((discr.nelements, discr.nnodes)) + h = discr.elements[0][1] - discr.elements[0][0] + weights = discr.ref_weights * h / 2 + return np.sum(soln * weights) + + +def elementwise(mat, vec): + """Apply a matrix to rows of the input representing per-element + degrees of freedom. + + Inputs: + mat: Shape (a, b) + vec: Shape (c, b) + + Signature: (a, b), (c, b) -> (c, a) + """ + return np.einsum("ij,kj->ki", mat, vec) + + +class AbstractDGOps1D(object): + def __init__(self, discr): + self.discr = discr + + @property + def array_ops(self): + raise NotImplementedError + + @property + def normals(self): + """Return the vector of normals at the faces. + + Signature: ->(m, 2) + """ + raise NotImplementedError + + def interp(self, vec): + """Apply elementwise volume-to-face interpolation. + + Signature: (m, n) -> (m, 2) + """ + raise NotImplementedError + + def inv_mass(self, vec): + """Apply the elementwise inverse mass matrix. + + Signature: (m, n) -> (m, n) + """ + raise NotImplementedError + + def stiffness(self, vec): + """Apply the elementwise stiffness matrix. + + Signature: (m, n) -> (m, n) + """ + raise NotImplementedError + + def face_mass(self, vec): + """Apply the elementwise face mass matrix. + + Signature: (m, 2) -> (m, n) + """ + raise NotImplementedError + + def face_swap(self, vec): + """Swap values at opposite faces. + + Signature: (m, 2) -> (m, 2) + """ + raise NotImplementedError + + +def elementwise(mat, vec): + """Apply a matrix to rows of the input representing per-element + degrees of freedom. + + Inputs: + mat: Shape (a, b) + vec: Shape (c, b) + + Signature: (a, b), (c, b) -> (c, a) + """ + return np.einsum("ij,kj->ki", mat, vec) + + +class DGOps1DRef(AbstractDGOps1D): + """A reference NumPy implementation of the AbstractDGOps1D interface.""" + + @AbstractDGOps1D.array_ops.getter + def array_ops(self): + return np + + @AbstractDGOps1D.normals.getter + def normals(self): + return self.discr.normals + + def interp(self, vec): + return elementwise(self.discr.interp, vec) + + def inv_mass(self, vec): + return elementwise(self.discr.inv_mass, vec) + + def stiffness(self, vec): + return elementwise(self.discr.stiffness, vec) + + def face_mass(self, vec): + return elementwise(self.discr.face_mass, vec) + + def face_swap(self, vec): + result = np.zeros_like(vec) + result[:, 0] = np.roll(vec[:, 1], +1) + result[:, 1] = np.roll(vec[:, 0], -1) + return result + + +def matrix_getter(name, shape): + + def getter(self): + mat = self.ctx.argument(name, shape, np.float64) + self.ctx.bind(mat, getattr(self.discr, name)) + return mat + + return property(memoized(getter)) + + +class DGOps1D(AbstractDGOps1D): + + @AbstractDGOps1D.array_ops.getter + def array_ops(self): + return self.ctx + + def __init__(self, discr, ctx): + self.discr = discr + self.ctx = ctx + + _normals = matrix_getter("normals", "(nelements, 2)") + _interp_mat = matrix_getter("interp", "(2, nnodes)") + _inv_mass_mat = matrix_getter("inv_mass", "(nnodes, nnodes)") + _stiffness_mat = matrix_getter("stiffness", "(nnodes, nnodes)") + _face_mass_mat = matrix_getter("face_mass", "(nnodes, 2)") + + @AbstractDGOps1D.normals.getter + def normals(self): + return self._normals + + def interp(self, vec): + return self.ctx.matmul(self._interp_mat, vec.T, name="_interp").T + + def inv_mass(self, vec): + return self.ctx.matmul(self._inv_mass_mat, vec.T, name="_inv_mass").T + + def stiffness(self, vec): + return self.ctx.matmul(self._stiffness_mat, vec.T, name="_stiffness").T + + def face_mass(self, vec): + return self.ctx.matmul(self._face_mass_mat, vec.T, name="_face_mass").T + + def face_swap(self, vec): + return self.ctx.stack( + ( + self.ctx.roll(vec[:, 1], +1), + self.ctx.roll(vec[:, 0], -1)), + axis=1, + name="_face_swap") + + -- GitLab From 406fb101140c37c98c8cab5924f57732d65af6eb Mon Sep 17 00:00:00 2001 From: Matt Wala Date: Fri, 1 May 2020 01:16:15 -0500 Subject: [PATCH 31/46] Implement Array.store() --- arrayzy/array.py | 53 ++++++++++++++----- .../Computing and DG with Lazy Arrays.ipynb | 2 +- test/test_linalg.py | 14 +++++ 3 files changed, 55 insertions(+), 14 deletions(-) diff --git a/arrayzy/array.py b/arrayzy/array.py index 6c7de16..9370369 100644 --- a/arrayzy/array.py +++ b/arrayzy/array.py @@ -112,6 +112,7 @@ class Array: .. automethod:: __truediv__ .. automethod:: __rtruediv__ .. automethod:: __neg__ + .. automethod:: store """ @@ -259,6 +260,10 @@ class Array: """ raise NotImplementedError + def store(self, name=None): + """Store this array into a variable, if not already stored.""" + raise NotImplementedError + class ArrayExpression(Array): """An array-valued expression. @@ -276,6 +281,19 @@ class ArrayExpression(Array): assignments = dict(zip(self.dim_names, tuple(dims))) return pymbolic.substitute(self.expression, assignments) + def store(self, name=None): + if name is None: + name = "_temp" + name = self.ctx.var_name_gen(name) + out = lp.TemporaryVariable( + name, dtype=self.dtype, + shape=self.shape, + address_space=lp.AddressSpace.GLOBAL) + new_tv = self.ctx.program.temporary_variables.copy() + new_tv[name] = out + self.ctx.program = self.ctx.program.copy(temporary_variables=new_tv) + return self.ctx.copy_expr(self, name) + class ArrayVariable(Array): """An array-valued variable. @@ -287,6 +305,9 @@ class ArrayVariable(Array): def to_loopy_expression(self, dims): return prim.Variable(self.name)[tuple(dims)] + def store(self, name=None): + return self + class Context: """An interface for building up array computations. @@ -445,6 +466,23 @@ class Context: instructions=self.program.instructions + [out_insn]) self._last_insn_id = insn_id + def copy_expr(self, expr, name): + # Copy *expr* to a variable named *name*. + out_inames = [ + self.var_name_gen(f"{name}_dim{d}") for d in range(expr.ndim)] + out_expr = expr.to_loopy_expression(map(prim.Variable, out_inames)) + + domain = expr.get_domain(out_inames) + + self.program = self.program.copy( + domains=self.program.domains + [domain]) + + self._make_array_assignment( + name, dim_names=out_inames, expr=out_expr, + within_inames=frozenset(out_inames)) + + return ArrayVariable(self, expr.shape, expr.dtype, name) + # }}} # {{{ user interface @@ -634,19 +672,8 @@ class Context: name = "_out" name = self.var_name_gen(name) out = lp.GlobalArg(name, shape=expr.shape, dtype=expr.dtype, order="C") - out_inames = [ - self.var_name_gen(f"{name}_dim{d}") for d in range(expr.ndim)] - out_expr = expr.to_loopy_expression(map(prim.Variable, out_inames)) - - domain = expr.get_domain(out_inames) - - self.program = self.program.copy( - args=self.program.args + [out], - domains=self.program.domains + [domain]) - - self._make_array_assignment( - name, dim_names=out_inames, expr=out_expr, - within_inames=frozenset(out_inames)) + self.program = self.program.copy(args=self.program.args + [out]) + return self.copy_expr(expr, name) # }}} diff --git a/experiments/Computing and DG with Lazy Arrays.ipynb b/experiments/Computing and DG with Lazy Arrays.ipynb index 9f8fcef..733083e 100644 --- a/experiments/Computing and DG with Lazy Arrays.ipynb +++ b/experiments/Computing and DG with Lazy Arrays.ipynb @@ -196,7 +196,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.7.7" + "version": "3.8.2" } }, "nbformat": 4, diff --git a/test/test_linalg.py b/test/test_linalg.py index b1df677..68c4ca6 100644 --- a/test/test_linalg.py +++ b/test/test_linalg.py @@ -57,6 +57,20 @@ def test_symbolic_array(ctx_factory): assert (eval_one(prog, x=x) == x).all() +def test_store(ctx_factory): + cl_ctx = ctx_factory() + queue = cl.CommandQueue(cl_ctx) + + with az.Context(queue) as ctx: + x = ctx.argument("x", shape="n", dtype=np.float64) + y = (x + 1).store() + ctx.output(y * y) + + x = np.array([1., 2., 3., 4., 5.]) + prog = ctx.build() + assert (eval_one(prog, x=x) == (x + 1) * (x + 1)).all() + + @pytest.mark.parametrize("dim", (1, 2, 3)) def test_transpose(ctx_factory, dim): cl_ctx = ctx_factory() -- GitLab From bb789e2435317f21c550fd2825d5538a79af2b6c Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Fri, 1 May 2020 01:49:10 -0500 Subject: [PATCH 32/46] Migrate to kernel_callables_v3-edit0.2 loopy branch --- arrayzy/array.py | 71 ++++++++++++++++++++++++++++++------------------ requirements.txt | 2 +- 2 files changed, 46 insertions(+), 27 deletions(-) diff --git a/arrayzy/array.py b/arrayzy/array.py index 9370369..c7886d3 100644 --- a/arrayzy/array.py +++ b/arrayzy/array.py @@ -289,9 +289,13 @@ class ArrayExpression(Array): name, dtype=self.dtype, shape=self.shape, address_space=lp.AddressSpace.GLOBAL) - new_tv = self.ctx.program.temporary_variables.copy() + + root_knl = self.ctx.program.root_kernel + new_tv = root_knl.temporary_variables.copy() new_tv[name] = out - self.ctx.program = self.ctx.program.copy(temporary_variables=new_tv) + self.ctx.program = self.ctx.program.with_kernel( + root_knl.copy(temporary_variables=new_tv)) + return self.ctx.copy_expr(self, name) @@ -352,8 +356,9 @@ class Context: self.bound_arguments = {} self.program = program - self.var_name_gen = self.program.get_var_name_generator() - self.insn_id_gen = self.program.get_instruction_id_generator() + self.var_name_gen = self.program.root_kernel.get_var_name_generator() + self.insn_id_gen = ( + self.program.root_kernel.get_instruction_id_generator()) self._last_insn_id = None # The set of names that can be used for domain in parameters. It should # be updated through self._get_or_create_parameter(). @@ -401,18 +406,21 @@ class Context: # *self.program.index_dtype* or if it's an iname (the latter is for # data-dependent control flow). if name not in self._parameters: + root_knl = self.program.root_kernel # Resist the temptation to look through domain parameters, because # a variable may be a parameter without a domain in the kernel yet. - if name not in self.program.all_inames(): + if name not in root_knl.all_inames(): # Add an argument. - if name in self.program.all_variable_names(): + if name in root_knl.all_variable_names(): raise ValueError( "could not create parameter: " f"name '{name}' already in use") - self.program = self.program.copy( - args=self.program.args + [ - lp.ValueArg(name, dtype=self.program.index_dtype) - ]) + self.program = self.program.with_kernel( + root_knl.copy( + args=self.program.args + [ + lp.ValueArg( + name, dtype=root_knl.index_dtype) + ])) self.var_name_gen.add_name(name) self._parameters.add(name) @@ -462,8 +470,9 @@ class Context: within_inames=frozenset(within_inames), depends_on=self._get_dependencies(expr)) - self.program = self.program.copy( - instructions=self.program.instructions + [out_insn]) + self.program = self.program.with_kernel( + self.program.root_kernel.copy( + instructions=self.program.root_kernel.instructions + [out_insn])) self._last_insn_id = insn_id def copy_expr(self, expr, name): @@ -474,8 +483,9 @@ class Context: domain = expr.get_domain(out_inames) - self.program = self.program.copy( - domains=self.program.domains + [domain]) + self.program = self.program.with_kernel( + self.program.root_kernel.copy( + domains=self.program.root_kernel.domains + [domain])) self._make_array_assignment( name, dim_names=out_inames, expr=out_expr, @@ -492,7 +502,7 @@ class Context: :returns: an :class:`ArrayVariable` """ - if name in self.program.all_variable_names(): + if name in self.program.root_kernel.all_variable_names(): raise ValueError(f"name '{name}' already in use") arg = lp.GlobalArg(name, shape=shape, dtype=dtype, order=order) @@ -506,7 +516,8 @@ class Context: self._get_or_create_parameter(dep) # Add argument to program. - self.program = self.program.copy(args=self.program.args + [arg]) + self.program = self.program.with_kernel( + self.program.root_kernel.copy(args=self.program.args + [arg])) return ArrayVariable(self, shape, dtype, name) @@ -595,13 +606,15 @@ class Context: self._last_insn_id = insn_id out_insns.append(out_insn) - new_tv = self.program.temporary_variables.copy() + root_knl = self.program.root_kernel + new_tv = root_knl.temporary_variables.copy() new_tv[name] = out - self.program = self.program.copy( - temporary_variables=new_tv, - domains=self.program.domains + [domain], - instructions=self.program.instructions + out_insns) + self.program = self.program.with_kernel( + root_knl.copy( + temporary_variables=new_tv, + domains=root_knl.domains + [domain], + instructions=root_knl.instructions + out_insns)) return ArrayVariable(self, out_shape, out_dtype, name) @@ -648,12 +661,14 @@ class Context: out_inames + (out_red_iname,), out_shape + (a.shape[1],)) - new_tv = self.program.temporary_variables.copy() + root_knl = self.program.root_kernel + new_tv = root_knl.temporary_variables.copy() new_tv[name] = out - self.program = self.program.copy( - temporary_variables=new_tv, - domains=self.program.domains + [domain]) + self.program = self.program.with_kernel( + root_knl.copy( + temporary_variables=new_tv, + domains=root_knl.domains + [domain])) self._make_array_assignment( name, dim_names=out_inames, expr=out_expr, @@ -661,6 +676,9 @@ class Context: return ArrayVariable(self, out_shape, out_dtype, name) + def call_kernel(self, knl, **kwargs): + pass + def output(self, expr, name=None): """Copy *expr* to a (new) output variable. @@ -672,7 +690,8 @@ class Context: name = "_out" name = self.var_name_gen(name) out = lp.GlobalArg(name, shape=expr.shape, dtype=expr.dtype, order="C") - self.program = self.program.copy(args=self.program.args + [out]) + self.program = self.program.with_kernel( + self.program.root_kernel.copy(args=self.program.args + [out])) return self.copy_expr(expr, name) # }}} diff --git a/requirements.txt b/requirements.txt index 22617c7..7ca0048 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1 +1 @@ -git+https://github.com/inducer/loopy.git +git+https://github.com/inducer/loopy.git@kernel_callables_v3-edit0.2 -- GitLab From aaec9806501945bd901545545d47f2fd5970b471 Mon Sep 17 00:00:00 2001 From: Matt Wala Date: Fri, 1 May 2020 03:35:18 -0500 Subject: [PATCH 33/46] Support reductions in expressions. Rewrite matmul to be an expression. --- arrayzy/array.py | 197 +++++++++++++++++++++++++++----------------- arrayzy/utils.py | 57 ++++++++++++- test/test_linalg.py | 36 ++++++++ 3 files changed, 213 insertions(+), 77 deletions(-) diff --git a/arrayzy/array.py b/arrayzy/array.py index c7886d3..042b5a5 100644 --- a/arrayzy/array.py +++ b/arrayzy/array.py @@ -27,11 +27,10 @@ import loopy as lp import islpy as isl import numpy as np import loopy.symbolic as sym -import pymbolic import pymbolic.primitives as prim from arrayzy.program import BoundProgram, PyOpenCLTarget -from arrayzy.utils import domain_for_shape +from arrayzy.utils import domain_for_shape, substitute __doc__ = """ @@ -96,6 +95,11 @@ class Array: Same as :data:`dims` but a tuple of strings. + .. attribute:: reduction_dim_names + + A tuple of strings representing the names of reduction inames present + in the array. + .. automethod:: get_domain .. automethod:: to_loopy_expression @@ -121,6 +125,8 @@ class Array: self._dtype = dtype self.shape = shape self.name = name + # Maps redution inames to (left, right) bounds. + self.reductions = {} @property def ndim(self): @@ -132,7 +138,8 @@ class Array: self.ctx, self.shape, self.dtype, - new_expr) + new_expr, + self.reductions.copy()) def _binary_op(self, op, other, reverse=False): if np.isscalar(other): @@ -145,7 +152,8 @@ class Array: self.ctx, self.shape, self.ctx.unify_types(self.dtype, type(other)), - new_expr) + new_expr, + self.reductions.copy()) elif isinstance(other, Array): args = ( @@ -154,12 +162,16 @@ class Array: if reverse: args = tuple(reversed(args)) + new_reductions = self.reductions.copy() + new_reductions.update(other.reductions) + new_expr = op(*args) return ArrayExpression( self.ctx, self.shape, self.ctx.unify_types(self.dtype, other.dtype), - new_expr) + new_expr, + new_reductions) else: raise ValueError @@ -211,7 +223,8 @@ class Array: self.ctx, shape=tuple(shape), dtype=self.dtype, - expression=self.to_loopy_expression(dims)) + expression=self.to_loopy_expression(dims), + reductions=self.reductions.copy()) @property def dtype(self): @@ -227,17 +240,29 @@ class Array: def dim_names(self): return tuple(f"_{i}" for i in range(len(self.shape))) - def get_domain(self, dim_names=None): + def get_domain(self, dim_names=None, reduction_dim_names=None): """Return the domain that defines the space of indices for this array. :arg dim_names: if not *None*, a tuple of strings, the names of the dimensions of the array. Defaults to :data:`dim_names`. + :arg reduction_dim_names: if not *None*, a tuple of strings, the names + of the reduction dimensions of the array. Defaults to + :data:`reduction_dim_names`. :returns: a :class:`islpy.BasicSet` + """ if dim_names is None: dim_names = self.dim_names - return domain_for_shape(dim_names, self.shape) + if reduction_dim_names is not None: + # Rename reductions. + reductions = dict( + (new_rname, self.reductions[rname]) + for new_rname, rname + in zip(reduction_dim_names, self.reduction_dim_names)) + else: + reductions = self.reductions + return domain_for_shape(dim_names, self.shape, reductions) @property def T(self): # noqa @@ -247,15 +272,19 @@ class Array: """ new_shape = tuple(reversed(self.shape)) new_expr = self.to_loopy_expression(list(reversed(self.dims))) - return ArrayExpression(self.ctx, new_shape, self.dtype, new_expr) + return ArrayExpression(self.ctx, new_shape, self.dtype, new_expr, + self.reductions.copy()) - def to_loopy_expression(self, dims): + def to_loopy_expression(self, dims, reduction_names=None): """Create a :mod:`loopy` expression for the value at index *dims*. :arg dims: A tuple of :mod:`pymbolic` expressions representing the desired index/indices. For instance if *dims = (Variable('a'), 1)*, this will return the expression for the element at index *a, 1*. + :arg reduction_names: If not *None*, a tuple that renames reduction + inames in *self* in the order of :attr:`reduction_dim_names`. + :returns: a :mod:`loopy` expression """ raise NotImplementedError @@ -264,6 +293,10 @@ class Array: """Store this array into a variable, if not already stored.""" raise NotImplementedError + @property + def reduction_dim_names(self): + return tuple(self.reductions) + class ArrayExpression(Array): """An array-valued expression. @@ -271,15 +304,24 @@ class ArrayExpression(Array): .. attribute:: expression A :mod:`pymbolic` expression for the array. + + .. attribute:: reductions + + A mapping from reduction inames in :attr:`expression` to bounds. """ - def __init__(self, ctx, shape, dtype, expression, name=None): + def __init__(self, ctx, shape, dtype, expression, reductions, name=None): super().__init__(ctx, shape, dtype, name) self.expression = expression + self.reductions = reductions - def to_loopy_expression(self, dims): + def to_loopy_expression(self, dims, reduction_names=None): assignments = dict(zip(self.dim_names, tuple(dims))) - return pymbolic.substitute(self.expression, assignments) + if reduction_names is not None: + assignments.update(dict( + zip(self.reduction_dim_names, + map(prim.Variable, reduction_names)))) + return substitute(self.expression, assignments) def store(self, name=None): if name is None: @@ -306,7 +348,7 @@ class ArrayVariable(Array): def __init__(self, ctx, shape, dtype, name): super().__init__(ctx, shape, dtype, name) - def to_loopy_expression(self, dims): + def to_loopy_expression(self, dims, reduction_names=None): return prim.Variable(self.name)[tuple(dims)] def store(self, name=None): @@ -470,18 +512,24 @@ class Context: within_inames=frozenset(within_inames), depends_on=self._get_dependencies(expr)) + new_insns = self.program.root_kernel.instructions + [out_insn] self.program = self.program.with_kernel( - self.program.root_kernel.copy( - instructions=self.program.root_kernel.instructions + [out_insn])) + self.program.root_kernel.copy(instructions=new_insns)) + self._last_insn_id = insn_id def copy_expr(self, expr, name): # Copy *expr* to a variable named *name*. out_inames = [ self.var_name_gen(f"{name}_dim{d}") for d in range(expr.ndim)] - out_expr = expr.to_loopy_expression(map(prim.Variable, out_inames)) + out_red_inames = [ + self.var_name_gen(f"{name}{rname}") for rname + in expr.reduction_dim_names] - domain = expr.get_domain(out_inames) + out_expr = expr.to_loopy_expression( + map(prim.Variable, out_inames), + out_red_inames) + domain = expr.get_domain(out_inames, out_red_inames) self.program = self.program.with_kernel( self.program.root_kernel.copy( @@ -546,7 +594,7 @@ class Context: # negative we shift it to be positive. idx = idx + prim.If(prim.Comparison(idx, "<", 0), a.shape[0], 0) expr = a.to_loopy_expression((idx,)) - return ArrayExpression(self, a.shape, a.dtype, expr) + return ArrayExpression(self, a.shape, a.dtype, expr, a.reductions) def stack(self, arrays, axis=0, name=None): """Join a sequence of arrays along a new axis. @@ -564,18 +612,10 @@ class Context: for array_dim, array0_dim in zip(array.shape, arrays[0].shape): self._assume_equal(array_dim, array0_dim) - if name is None: - name = "_stack" - - name = self.var_name_gen(name) - - out_inames = [] - for i in range(arrays[0].ndim): - if i >= axis: - i += 1 - out_inames.append(f"{name}_dim{i}") + base_name = name - domain = domain_for_shape(out_inames, arrays[0].shape) + if base_name is None: + base_name = "_stack" out_shape = list(arrays[0].shape) out_shape.insert(axis, len(arrays)) @@ -583,40 +623,62 @@ class Context: out_dtype = self.unify_types(*(a.dtype for a in arrays)) + out_name = self.var_name_gen(base_name) + out = lp.TemporaryVariable( - name, dtype=out_dtype, + out_name, + dtype=out_dtype, shape=out_shape, address_space=lp.AddressSpace.GLOBAL) # Create an output instruction for each input array. from loopy.kernel.instruction import make_assignment - out_insns = [] - for i in range(len(arrays)): + new_insns = [] + new_domains = [] + + for i, arr in enumerate(arrays): + name = self.var_name_gen(f"{base_name}_array{i}") + out_inames = [] + for j in range(arr.ndim): + if j >= axis: + j += 1 + out_inames.append(self.var_name_gen(f"{name}_dim{j}")) + + out_red_inames = [ + self.var_name_gen(f"{name}{rname}") + for rname in arr.reduction_dim_names] + reductions = dict( + (new_rname, arr.reductions[rname]) + for new_rname, rname + in zip(out_red_inames, arr.reduction_dim_names)) + domain = domain_for_shape(out_inames, arr.shape, reductions) + new_domains.append(domain) + indices = list(map(prim.Variable, out_inames)) - expr = arrays[i].to_loopy_expression(indices) + expr = arr.to_loopy_expression(indices, out_red_inames) indices.insert(axis, i) indices = tuple(indices) - insn_id = self.insn_id_gen(f"{name}_copy{i}") + insn_id = self.insn_id_gen(f"{name}_array{i}") out_insn = make_assignment( - (prim.Variable(name)[indices],), + (prim.Variable(out_name)[indices],), expr, id=insn_id, within_inames=frozenset(out_inames), depends_on=self._get_dependencies(expr)) self._last_insn_id = insn_id - out_insns.append(out_insn) + new_insns.append(out_insn) root_knl = self.program.root_kernel new_tv = root_knl.temporary_variables.copy() - new_tv[name] = out + new_tv[out_name] = out self.program = self.program.with_kernel( root_knl.copy( temporary_variables=new_tv, - domains=root_knl.domains + [domain], - instructions=root_knl.instructions + out_insns)) + domains=root_knl.domains + new_domains, + instructions=root_knl.instructions + new_insns)) - return ArrayVariable(self, out_shape, out_dtype, name) + return ArrayVariable(self, out_shape, out_dtype, out_name) def matmul(self, a, b, name=None): """Multiply matrix *a* by *b*. @@ -632,49 +694,34 @@ class Context: name = "_matmul" name = self.var_name_gen(name) - out_shape = (a.shape[0], b.shape[1]) - out_inames = (f"{name}_dim0", f"{name}_dim1") - out_red_iname = self.var_name_gen(f"{name}_reduce") - out_dtype = self.unify_types(a.dtype, b.dtype) - - out = lp.TemporaryVariable( - name, dtype=out_dtype, - shape=(a.shape[0], b.shape[1]), - address_space=lp.AddressSpace.GLOBAL) - - import loopy.library.reduction as red + red_iname = self.var_name_gen(f"{name}_reduce") + red_domain = (0, a.shape[1]) + dtype = self.unify_types(a.dtype, b.dtype) - a_inames = tuple(map(prim.Variable, (out_inames[0], out_red_iname))) - b_inames = tuple(map(prim.Variable, (out_red_iname, out_inames[1]))) + a_inames = tuple(map(prim.Variable, ("_0", red_iname))) + b_inames = tuple(map(prim.Variable, (red_iname, "_1"))) - expr = ( + inner_expr = ( a.to_loopy_expression(a_inames) * b.to_loopy_expression(b_inames)) - out_expr = sym.Reduction( + import loopy.library.reduction as red + expr = sym.Reduction( operation=red.parse_reduction_op("sum"), - inames=(out_red_iname,), - expr=expr, + inames=(red_iname,), + expr=inner_expr, allow_simultaneous=False) - domain = domain_for_shape( - out_inames + (out_red_iname,), - out_shape + (a.shape[1],)) - - root_knl = self.program.root_kernel - new_tv = root_knl.temporary_variables.copy() - new_tv[name] = out - - self.program = self.program.with_kernel( - root_knl.copy( - temporary_variables=new_tv, - domains=root_knl.domains + [domain])) - - self._make_array_assignment( - name, dim_names=out_inames, expr=out_expr, - within_inames=frozenset(out_inames)) + reductions = a.reductions.copy() + reductions.update(b.reductions) + reductions[red_iname] = red_domain - return ArrayVariable(self, out_shape, out_dtype, name) + return ArrayExpression( + self, + shape=(a.shape[0], b.shape[1]), + dtype=dtype, + expression=expr, + reductions=reductions) def call_kernel(self, knl, **kwargs): pass diff --git a/arrayzy/utils.py b/arrayzy/utils.py index 54e3436..c2bf016 100644 --- a/arrayzy/utils.py +++ b/arrayzy/utils.py @@ -21,10 +21,47 @@ THE SOFTWARE. """ +import loopy as lp import islpy as isl +import pymbolic.primitives as prim +import loopy.symbolic as sym -def domain_for_shape(dim_names, shape): + +from pymbolic.mapper.substitutor import SubstitutionMapper + + +class LoopyAwareSubstitutionMapper(SubstitutionMapper): + + def map_reduction(self, expr): + new_inames = [] + for iname in expr.inames: + new_iname = self.subst_func(iname) + if new_iname is None: + new_iname = prim.Variable(iname) + else: + if not isinstance(new_iname, prim.Variable): + raise ValueError( + f"reduction iname {iname} can only be renamed" + " to another iname") + new_inames.append(new_iname.name) + new_inames = tuple(new_inames) + new_expr = self.rec(expr.expr) + + return sym.Reduction(expr.operation, new_inames, new_expr, + expr.allow_simultaneous) + + +def substitute(expression, variable_assignments={}, **kwargs): + variable_assignments = variable_assignments.copy() + variable_assignments.update(kwargs) + from pymbolic.mapper.substitutor import make_subst_func + print("ASSIGNMENTS", variable_assignments) + return LoopyAwareSubstitutionMapper( + make_subst_func(variable_assignments))(expression) + + +def domain_for_shape(dim_names, shape, reductions=None): """Return a :class:`isl.BasicSet` that expresses an appropriate index domain for an array of (potentially symbolic) shape *shape*. @@ -36,9 +73,13 @@ def domain_for_shape(dim_names, shape): dimensions in the returned set. Must have the same length as *dim_names*. + :arg reductions: A map from reduction inames to (lower, upper) bounds. + :returns: a :class:`isl.BasicSet` """ + if reductions is None: + reductions = {} # Collect parameters. param_names = set() @@ -46,21 +87,33 @@ def domain_for_shape(dim_names, shape): for sdep in map(get_dependencies, shape): param_names |= sdep + for bounds in reductions.values(): + for sdep in map(get_dependencies, bounds): + # FIXME: Assumes that reduction bounds are not data-dependent. + param_names |= sdep + + set_names = sorted(tuple(dim_names) + tuple(reductions)) param_names = sorted(param_names) # Build domain. dom = isl.BasicSet.universe( isl.Space.create_from_names( isl.DEFAULT_CONTEXT, - set=dim_names, + set=set_names, params=param_names)) # Add constraints. from loopy.symbolic import aff_from_expr affs = isl.affs_from_space(dom.space) + for iname, dim in zip(dim_names, shape): dom &= affs[0].le_set(affs[iname]) dom &= affs[iname].lt_set(aff_from_expr(dom.space, dim)) + + for iname, (left, right) in reductions.items(): + dom &= aff_from_expr(dom.space, left).le_set(affs[iname]) + dom &= affs[iname].lt_set(aff_from_expr(dom.space, right)) + dom, = dom.get_basic_sets() return dom diff --git a/test/test_linalg.py b/test/test_linalg.py index 68c4ca6..821c10a 100644 --- a/test/test_linalg.py +++ b/test/test_linalg.py @@ -163,6 +163,42 @@ def test_matmul(ctx_factory): assert (eval_one(prog, x=x, y=y) == x @ y).all() +def test_matmul_addition(ctx_factory): + cl_ctx = ctx_factory() + queue = cl.CommandQueue(cl_ctx) + + with az.Context(queue) as ctx: + x = ctx.argument("x", shape=("n, n"), dtype=np.float64) + y = ctx.argument("y", shape=("n, n"), dtype=np.float64) + z = ctx.argument("z", shape=("n, n"), dtype=np.float64) + ctx.output(ctx.matmul(x, y) + ctx.matmul(x, z)) + + x = np.array([[1, 2], [3, 4]], dtype=np.float64) + y = np.array([[5, 6], [7, 8]], dtype=np.float64) + z = np.array([[9, 10], [11, 12]], dtype=np.float64) + + prog = ctx.build() + assert (eval_one(prog, x=x, y=y, z=z) == x @ y + x @ z).all() + + +def test_matmul_stack(ctx_factory): + cl_ctx = ctx_factory() + queue = cl.CommandQueue(cl_ctx) + + with az.Context(queue) as ctx: + x = ctx.argument("x", shape=("m, n"), dtype=np.float64) + y = ctx.argument("y", shape=("n, k"), dtype=np.float64) + z = ctx.matmul(x, y) + w = ctx.matmul(x, 2 * y) + ctx.output(ctx.stack((z, w))) + + x = np.array([[1, 2, 3], [4, 5, 6]], dtype=np.float64) + y = np.array([[2, 5], [5, 1], [1, 2]], dtype=np.float64) + + prog = ctx.build() + assert (eval_one(prog, x=x, y=y) == np.array([x @ y, x @ (2 * y)])).all() + + @pytest.mark.parametrize("shift", (-1, 1, -20, 20)) def test_roll(ctx_factory, shift): cl_ctx = ctx_factory() -- GitLab From 67d0637faa5e9308d45ef85083ca7ff56a39bf61 Mon Sep 17 00:00:00 2001 From: Matt Wala Date: Fri, 1 May 2020 03:41:24 -0500 Subject: [PATCH 34/46] Revert "Support reductions in expressions. Rewrite matmul to be an expression." This reverts commit aaec9806501945bd901545545d47f2fd5970b471. --- arrayzy/array.py | 197 +++++++++++++++++--------------------------- arrayzy/utils.py | 57 +------------ test/test_linalg.py | 36 -------- 3 files changed, 77 insertions(+), 213 deletions(-) diff --git a/arrayzy/array.py b/arrayzy/array.py index 042b5a5..c7886d3 100644 --- a/arrayzy/array.py +++ b/arrayzy/array.py @@ -27,10 +27,11 @@ import loopy as lp import islpy as isl import numpy as np import loopy.symbolic as sym +import pymbolic import pymbolic.primitives as prim from arrayzy.program import BoundProgram, PyOpenCLTarget -from arrayzy.utils import domain_for_shape, substitute +from arrayzy.utils import domain_for_shape __doc__ = """ @@ -95,11 +96,6 @@ class Array: Same as :data:`dims` but a tuple of strings. - .. attribute:: reduction_dim_names - - A tuple of strings representing the names of reduction inames present - in the array. - .. automethod:: get_domain .. automethod:: to_loopy_expression @@ -125,8 +121,6 @@ class Array: self._dtype = dtype self.shape = shape self.name = name - # Maps redution inames to (left, right) bounds. - self.reductions = {} @property def ndim(self): @@ -138,8 +132,7 @@ class Array: self.ctx, self.shape, self.dtype, - new_expr, - self.reductions.copy()) + new_expr) def _binary_op(self, op, other, reverse=False): if np.isscalar(other): @@ -152,8 +145,7 @@ class Array: self.ctx, self.shape, self.ctx.unify_types(self.dtype, type(other)), - new_expr, - self.reductions.copy()) + new_expr) elif isinstance(other, Array): args = ( @@ -162,16 +154,12 @@ class Array: if reverse: args = tuple(reversed(args)) - new_reductions = self.reductions.copy() - new_reductions.update(other.reductions) - new_expr = op(*args) return ArrayExpression( self.ctx, self.shape, self.ctx.unify_types(self.dtype, other.dtype), - new_expr, - new_reductions) + new_expr) else: raise ValueError @@ -223,8 +211,7 @@ class Array: self.ctx, shape=tuple(shape), dtype=self.dtype, - expression=self.to_loopy_expression(dims), - reductions=self.reductions.copy()) + expression=self.to_loopy_expression(dims)) @property def dtype(self): @@ -240,29 +227,17 @@ class Array: def dim_names(self): return tuple(f"_{i}" for i in range(len(self.shape))) - def get_domain(self, dim_names=None, reduction_dim_names=None): + def get_domain(self, dim_names=None): """Return the domain that defines the space of indices for this array. :arg dim_names: if not *None*, a tuple of strings, the names of the dimensions of the array. Defaults to :data:`dim_names`. - :arg reduction_dim_names: if not *None*, a tuple of strings, the names - of the reduction dimensions of the array. Defaults to - :data:`reduction_dim_names`. :returns: a :class:`islpy.BasicSet` - """ if dim_names is None: dim_names = self.dim_names - if reduction_dim_names is not None: - # Rename reductions. - reductions = dict( - (new_rname, self.reductions[rname]) - for new_rname, rname - in zip(reduction_dim_names, self.reduction_dim_names)) - else: - reductions = self.reductions - return domain_for_shape(dim_names, self.shape, reductions) + return domain_for_shape(dim_names, self.shape) @property def T(self): # noqa @@ -272,19 +247,15 @@ class Array: """ new_shape = tuple(reversed(self.shape)) new_expr = self.to_loopy_expression(list(reversed(self.dims))) - return ArrayExpression(self.ctx, new_shape, self.dtype, new_expr, - self.reductions.copy()) + return ArrayExpression(self.ctx, new_shape, self.dtype, new_expr) - def to_loopy_expression(self, dims, reduction_names=None): + def to_loopy_expression(self, dims): """Create a :mod:`loopy` expression for the value at index *dims*. :arg dims: A tuple of :mod:`pymbolic` expressions representing the desired index/indices. For instance if *dims = (Variable('a'), 1)*, this will return the expression for the element at index *a, 1*. - :arg reduction_names: If not *None*, a tuple that renames reduction - inames in *self* in the order of :attr:`reduction_dim_names`. - :returns: a :mod:`loopy` expression """ raise NotImplementedError @@ -293,10 +264,6 @@ class Array: """Store this array into a variable, if not already stored.""" raise NotImplementedError - @property - def reduction_dim_names(self): - return tuple(self.reductions) - class ArrayExpression(Array): """An array-valued expression. @@ -304,24 +271,15 @@ class ArrayExpression(Array): .. attribute:: expression A :mod:`pymbolic` expression for the array. - - .. attribute:: reductions - - A mapping from reduction inames in :attr:`expression` to bounds. """ - def __init__(self, ctx, shape, dtype, expression, reductions, name=None): + def __init__(self, ctx, shape, dtype, expression, name=None): super().__init__(ctx, shape, dtype, name) self.expression = expression - self.reductions = reductions - def to_loopy_expression(self, dims, reduction_names=None): + def to_loopy_expression(self, dims): assignments = dict(zip(self.dim_names, tuple(dims))) - if reduction_names is not None: - assignments.update(dict( - zip(self.reduction_dim_names, - map(prim.Variable, reduction_names)))) - return substitute(self.expression, assignments) + return pymbolic.substitute(self.expression, assignments) def store(self, name=None): if name is None: @@ -348,7 +306,7 @@ class ArrayVariable(Array): def __init__(self, ctx, shape, dtype, name): super().__init__(ctx, shape, dtype, name) - def to_loopy_expression(self, dims, reduction_names=None): + def to_loopy_expression(self, dims): return prim.Variable(self.name)[tuple(dims)] def store(self, name=None): @@ -512,24 +470,18 @@ class Context: within_inames=frozenset(within_inames), depends_on=self._get_dependencies(expr)) - new_insns = self.program.root_kernel.instructions + [out_insn] self.program = self.program.with_kernel( - self.program.root_kernel.copy(instructions=new_insns)) - + self.program.root_kernel.copy( + instructions=self.program.root_kernel.instructions + [out_insn])) self._last_insn_id = insn_id def copy_expr(self, expr, name): # Copy *expr* to a variable named *name*. out_inames = [ self.var_name_gen(f"{name}_dim{d}") for d in range(expr.ndim)] - out_red_inames = [ - self.var_name_gen(f"{name}{rname}") for rname - in expr.reduction_dim_names] + out_expr = expr.to_loopy_expression(map(prim.Variable, out_inames)) - out_expr = expr.to_loopy_expression( - map(prim.Variable, out_inames), - out_red_inames) - domain = expr.get_domain(out_inames, out_red_inames) + domain = expr.get_domain(out_inames) self.program = self.program.with_kernel( self.program.root_kernel.copy( @@ -594,7 +546,7 @@ class Context: # negative we shift it to be positive. idx = idx + prim.If(prim.Comparison(idx, "<", 0), a.shape[0], 0) expr = a.to_loopy_expression((idx,)) - return ArrayExpression(self, a.shape, a.dtype, expr, a.reductions) + return ArrayExpression(self, a.shape, a.dtype, expr) def stack(self, arrays, axis=0, name=None): """Join a sequence of arrays along a new axis. @@ -612,10 +564,18 @@ class Context: for array_dim, array0_dim in zip(array.shape, arrays[0].shape): self._assume_equal(array_dim, array0_dim) - base_name = name + if name is None: + name = "_stack" + + name = self.var_name_gen(name) + + out_inames = [] + for i in range(arrays[0].ndim): + if i >= axis: + i += 1 + out_inames.append(f"{name}_dim{i}") - if base_name is None: - base_name = "_stack" + domain = domain_for_shape(out_inames, arrays[0].shape) out_shape = list(arrays[0].shape) out_shape.insert(axis, len(arrays)) @@ -623,62 +583,40 @@ class Context: out_dtype = self.unify_types(*(a.dtype for a in arrays)) - out_name = self.var_name_gen(base_name) - out = lp.TemporaryVariable( - out_name, - dtype=out_dtype, + name, dtype=out_dtype, shape=out_shape, address_space=lp.AddressSpace.GLOBAL) # Create an output instruction for each input array. from loopy.kernel.instruction import make_assignment - new_insns = [] - new_domains = [] - - for i, arr in enumerate(arrays): - name = self.var_name_gen(f"{base_name}_array{i}") - out_inames = [] - for j in range(arr.ndim): - if j >= axis: - j += 1 - out_inames.append(self.var_name_gen(f"{name}_dim{j}")) - - out_red_inames = [ - self.var_name_gen(f"{name}{rname}") - for rname in arr.reduction_dim_names] - reductions = dict( - (new_rname, arr.reductions[rname]) - for new_rname, rname - in zip(out_red_inames, arr.reduction_dim_names)) - domain = domain_for_shape(out_inames, arr.shape, reductions) - new_domains.append(domain) - + out_insns = [] + for i in range(len(arrays)): indices = list(map(prim.Variable, out_inames)) - expr = arr.to_loopy_expression(indices, out_red_inames) + expr = arrays[i].to_loopy_expression(indices) indices.insert(axis, i) indices = tuple(indices) - insn_id = self.insn_id_gen(f"{name}_array{i}") + insn_id = self.insn_id_gen(f"{name}_copy{i}") out_insn = make_assignment( - (prim.Variable(out_name)[indices],), + (prim.Variable(name)[indices],), expr, id=insn_id, within_inames=frozenset(out_inames), depends_on=self._get_dependencies(expr)) self._last_insn_id = insn_id - new_insns.append(out_insn) + out_insns.append(out_insn) root_knl = self.program.root_kernel new_tv = root_knl.temporary_variables.copy() - new_tv[out_name] = out + new_tv[name] = out self.program = self.program.with_kernel( root_knl.copy( temporary_variables=new_tv, - domains=root_knl.domains + new_domains, - instructions=root_knl.instructions + new_insns)) + domains=root_knl.domains + [domain], + instructions=root_knl.instructions + out_insns)) - return ArrayVariable(self, out_shape, out_dtype, out_name) + return ArrayVariable(self, out_shape, out_dtype, name) def matmul(self, a, b, name=None): """Multiply matrix *a* by *b*. @@ -694,34 +632,49 @@ class Context: name = "_matmul" name = self.var_name_gen(name) - red_iname = self.var_name_gen(f"{name}_reduce") - red_domain = (0, a.shape[1]) - dtype = self.unify_types(a.dtype, b.dtype) + out_shape = (a.shape[0], b.shape[1]) + out_inames = (f"{name}_dim0", f"{name}_dim1") + out_red_iname = self.var_name_gen(f"{name}_reduce") + out_dtype = self.unify_types(a.dtype, b.dtype) + + out = lp.TemporaryVariable( + name, dtype=out_dtype, + shape=(a.shape[0], b.shape[1]), + address_space=lp.AddressSpace.GLOBAL) + + import loopy.library.reduction as red - a_inames = tuple(map(prim.Variable, ("_0", red_iname))) - b_inames = tuple(map(prim.Variable, (red_iname, "_1"))) + a_inames = tuple(map(prim.Variable, (out_inames[0], out_red_iname))) + b_inames = tuple(map(prim.Variable, (out_red_iname, out_inames[1]))) - inner_expr = ( + expr = ( a.to_loopy_expression(a_inames) * b.to_loopy_expression(b_inames)) - import loopy.library.reduction as red - expr = sym.Reduction( + out_expr = sym.Reduction( operation=red.parse_reduction_op("sum"), - inames=(red_iname,), - expr=inner_expr, + inames=(out_red_iname,), + expr=expr, allow_simultaneous=False) - reductions = a.reductions.copy() - reductions.update(b.reductions) - reductions[red_iname] = red_domain + domain = domain_for_shape( + out_inames + (out_red_iname,), + out_shape + (a.shape[1],)) - return ArrayExpression( - self, - shape=(a.shape[0], b.shape[1]), - dtype=dtype, - expression=expr, - reductions=reductions) + root_knl = self.program.root_kernel + new_tv = root_knl.temporary_variables.copy() + new_tv[name] = out + + self.program = self.program.with_kernel( + root_knl.copy( + temporary_variables=new_tv, + domains=root_knl.domains + [domain])) + + self._make_array_assignment( + name, dim_names=out_inames, expr=out_expr, + within_inames=frozenset(out_inames)) + + return ArrayVariable(self, out_shape, out_dtype, name) def call_kernel(self, knl, **kwargs): pass diff --git a/arrayzy/utils.py b/arrayzy/utils.py index c2bf016..54e3436 100644 --- a/arrayzy/utils.py +++ b/arrayzy/utils.py @@ -21,47 +21,10 @@ THE SOFTWARE. """ -import loopy as lp import islpy as isl -import pymbolic.primitives as prim -import loopy.symbolic as sym - -from pymbolic.mapper.substitutor import SubstitutionMapper - - -class LoopyAwareSubstitutionMapper(SubstitutionMapper): - - def map_reduction(self, expr): - new_inames = [] - for iname in expr.inames: - new_iname = self.subst_func(iname) - if new_iname is None: - new_iname = prim.Variable(iname) - else: - if not isinstance(new_iname, prim.Variable): - raise ValueError( - f"reduction iname {iname} can only be renamed" - " to another iname") - new_inames.append(new_iname.name) - new_inames = tuple(new_inames) - new_expr = self.rec(expr.expr) - - return sym.Reduction(expr.operation, new_inames, new_expr, - expr.allow_simultaneous) - - -def substitute(expression, variable_assignments={}, **kwargs): - variable_assignments = variable_assignments.copy() - variable_assignments.update(kwargs) - from pymbolic.mapper.substitutor import make_subst_func - print("ASSIGNMENTS", variable_assignments) - return LoopyAwareSubstitutionMapper( - make_subst_func(variable_assignments))(expression) - - -def domain_for_shape(dim_names, shape, reductions=None): +def domain_for_shape(dim_names, shape): """Return a :class:`isl.BasicSet` that expresses an appropriate index domain for an array of (potentially symbolic) shape *shape*. @@ -73,13 +36,9 @@ def domain_for_shape(dim_names, shape, reductions=None): dimensions in the returned set. Must have the same length as *dim_names*. - :arg reductions: A map from reduction inames to (lower, upper) bounds. - :returns: a :class:`isl.BasicSet` """ - if reductions is None: - reductions = {} # Collect parameters. param_names = set() @@ -87,33 +46,21 @@ def domain_for_shape(dim_names, shape, reductions=None): for sdep in map(get_dependencies, shape): param_names |= sdep - for bounds in reductions.values(): - for sdep in map(get_dependencies, bounds): - # FIXME: Assumes that reduction bounds are not data-dependent. - param_names |= sdep - - set_names = sorted(tuple(dim_names) + tuple(reductions)) param_names = sorted(param_names) # Build domain. dom = isl.BasicSet.universe( isl.Space.create_from_names( isl.DEFAULT_CONTEXT, - set=set_names, + set=dim_names, params=param_names)) # Add constraints. from loopy.symbolic import aff_from_expr affs = isl.affs_from_space(dom.space) - for iname, dim in zip(dim_names, shape): dom &= affs[0].le_set(affs[iname]) dom &= affs[iname].lt_set(aff_from_expr(dom.space, dim)) - - for iname, (left, right) in reductions.items(): - dom &= aff_from_expr(dom.space, left).le_set(affs[iname]) - dom &= affs[iname].lt_set(aff_from_expr(dom.space, right)) - dom, = dom.get_basic_sets() return dom diff --git a/test/test_linalg.py b/test/test_linalg.py index 821c10a..68c4ca6 100644 --- a/test/test_linalg.py +++ b/test/test_linalg.py @@ -163,42 +163,6 @@ def test_matmul(ctx_factory): assert (eval_one(prog, x=x, y=y) == x @ y).all() -def test_matmul_addition(ctx_factory): - cl_ctx = ctx_factory() - queue = cl.CommandQueue(cl_ctx) - - with az.Context(queue) as ctx: - x = ctx.argument("x", shape=("n, n"), dtype=np.float64) - y = ctx.argument("y", shape=("n, n"), dtype=np.float64) - z = ctx.argument("z", shape=("n, n"), dtype=np.float64) - ctx.output(ctx.matmul(x, y) + ctx.matmul(x, z)) - - x = np.array([[1, 2], [3, 4]], dtype=np.float64) - y = np.array([[5, 6], [7, 8]], dtype=np.float64) - z = np.array([[9, 10], [11, 12]], dtype=np.float64) - - prog = ctx.build() - assert (eval_one(prog, x=x, y=y, z=z) == x @ y + x @ z).all() - - -def test_matmul_stack(ctx_factory): - cl_ctx = ctx_factory() - queue = cl.CommandQueue(cl_ctx) - - with az.Context(queue) as ctx: - x = ctx.argument("x", shape=("m, n"), dtype=np.float64) - y = ctx.argument("y", shape=("n, k"), dtype=np.float64) - z = ctx.matmul(x, y) - w = ctx.matmul(x, 2 * y) - ctx.output(ctx.stack((z, w))) - - x = np.array([[1, 2, 3], [4, 5, 6]], dtype=np.float64) - y = np.array([[2, 5], [5, 1], [1, 2]], dtype=np.float64) - - prog = ctx.build() - assert (eval_one(prog, x=x, y=y) == np.array([x @ y, x @ (2 * y)])).all() - - @pytest.mark.parametrize("shift", (-1, 1, -20, 20)) def test_roll(ctx_factory, shift): cl_ctx = ctx_factory() -- GitLab From 91f4874468e08bde012edc8aef971dc2dd80507b Mon Sep 17 00:00:00 2001 From: Matt Wala Date: Fri, 1 May 2020 03:46:55 -0500 Subject: [PATCH 35/46] Revert "Revert "Support reductions in expressions. Rewrite matmul to be an expression."" This reverts commit 67d0637faa5e9308d45ef85083ca7ff56a39bf61. --- arrayzy/array.py | 197 +++++++++++++++++++++++++++----------------- arrayzy/utils.py | 57 ++++++++++++- test/test_linalg.py | 36 ++++++++ 3 files changed, 213 insertions(+), 77 deletions(-) diff --git a/arrayzy/array.py b/arrayzy/array.py index c7886d3..042b5a5 100644 --- a/arrayzy/array.py +++ b/arrayzy/array.py @@ -27,11 +27,10 @@ import loopy as lp import islpy as isl import numpy as np import loopy.symbolic as sym -import pymbolic import pymbolic.primitives as prim from arrayzy.program import BoundProgram, PyOpenCLTarget -from arrayzy.utils import domain_for_shape +from arrayzy.utils import domain_for_shape, substitute __doc__ = """ @@ -96,6 +95,11 @@ class Array: Same as :data:`dims` but a tuple of strings. + .. attribute:: reduction_dim_names + + A tuple of strings representing the names of reduction inames present + in the array. + .. automethod:: get_domain .. automethod:: to_loopy_expression @@ -121,6 +125,8 @@ class Array: self._dtype = dtype self.shape = shape self.name = name + # Maps redution inames to (left, right) bounds. + self.reductions = {} @property def ndim(self): @@ -132,7 +138,8 @@ class Array: self.ctx, self.shape, self.dtype, - new_expr) + new_expr, + self.reductions.copy()) def _binary_op(self, op, other, reverse=False): if np.isscalar(other): @@ -145,7 +152,8 @@ class Array: self.ctx, self.shape, self.ctx.unify_types(self.dtype, type(other)), - new_expr) + new_expr, + self.reductions.copy()) elif isinstance(other, Array): args = ( @@ -154,12 +162,16 @@ class Array: if reverse: args = tuple(reversed(args)) + new_reductions = self.reductions.copy() + new_reductions.update(other.reductions) + new_expr = op(*args) return ArrayExpression( self.ctx, self.shape, self.ctx.unify_types(self.dtype, other.dtype), - new_expr) + new_expr, + new_reductions) else: raise ValueError @@ -211,7 +223,8 @@ class Array: self.ctx, shape=tuple(shape), dtype=self.dtype, - expression=self.to_loopy_expression(dims)) + expression=self.to_loopy_expression(dims), + reductions=self.reductions.copy()) @property def dtype(self): @@ -227,17 +240,29 @@ class Array: def dim_names(self): return tuple(f"_{i}" for i in range(len(self.shape))) - def get_domain(self, dim_names=None): + def get_domain(self, dim_names=None, reduction_dim_names=None): """Return the domain that defines the space of indices for this array. :arg dim_names: if not *None*, a tuple of strings, the names of the dimensions of the array. Defaults to :data:`dim_names`. + :arg reduction_dim_names: if not *None*, a tuple of strings, the names + of the reduction dimensions of the array. Defaults to + :data:`reduction_dim_names`. :returns: a :class:`islpy.BasicSet` + """ if dim_names is None: dim_names = self.dim_names - return domain_for_shape(dim_names, self.shape) + if reduction_dim_names is not None: + # Rename reductions. + reductions = dict( + (new_rname, self.reductions[rname]) + for new_rname, rname + in zip(reduction_dim_names, self.reduction_dim_names)) + else: + reductions = self.reductions + return domain_for_shape(dim_names, self.shape, reductions) @property def T(self): # noqa @@ -247,15 +272,19 @@ class Array: """ new_shape = tuple(reversed(self.shape)) new_expr = self.to_loopy_expression(list(reversed(self.dims))) - return ArrayExpression(self.ctx, new_shape, self.dtype, new_expr) + return ArrayExpression(self.ctx, new_shape, self.dtype, new_expr, + self.reductions.copy()) - def to_loopy_expression(self, dims): + def to_loopy_expression(self, dims, reduction_names=None): """Create a :mod:`loopy` expression for the value at index *dims*. :arg dims: A tuple of :mod:`pymbolic` expressions representing the desired index/indices. For instance if *dims = (Variable('a'), 1)*, this will return the expression for the element at index *a, 1*. + :arg reduction_names: If not *None*, a tuple that renames reduction + inames in *self* in the order of :attr:`reduction_dim_names`. + :returns: a :mod:`loopy` expression """ raise NotImplementedError @@ -264,6 +293,10 @@ class Array: """Store this array into a variable, if not already stored.""" raise NotImplementedError + @property + def reduction_dim_names(self): + return tuple(self.reductions) + class ArrayExpression(Array): """An array-valued expression. @@ -271,15 +304,24 @@ class ArrayExpression(Array): .. attribute:: expression A :mod:`pymbolic` expression for the array. + + .. attribute:: reductions + + A mapping from reduction inames in :attr:`expression` to bounds. """ - def __init__(self, ctx, shape, dtype, expression, name=None): + def __init__(self, ctx, shape, dtype, expression, reductions, name=None): super().__init__(ctx, shape, dtype, name) self.expression = expression + self.reductions = reductions - def to_loopy_expression(self, dims): + def to_loopy_expression(self, dims, reduction_names=None): assignments = dict(zip(self.dim_names, tuple(dims))) - return pymbolic.substitute(self.expression, assignments) + if reduction_names is not None: + assignments.update(dict( + zip(self.reduction_dim_names, + map(prim.Variable, reduction_names)))) + return substitute(self.expression, assignments) def store(self, name=None): if name is None: @@ -306,7 +348,7 @@ class ArrayVariable(Array): def __init__(self, ctx, shape, dtype, name): super().__init__(ctx, shape, dtype, name) - def to_loopy_expression(self, dims): + def to_loopy_expression(self, dims, reduction_names=None): return prim.Variable(self.name)[tuple(dims)] def store(self, name=None): @@ -470,18 +512,24 @@ class Context: within_inames=frozenset(within_inames), depends_on=self._get_dependencies(expr)) + new_insns = self.program.root_kernel.instructions + [out_insn] self.program = self.program.with_kernel( - self.program.root_kernel.copy( - instructions=self.program.root_kernel.instructions + [out_insn])) + self.program.root_kernel.copy(instructions=new_insns)) + self._last_insn_id = insn_id def copy_expr(self, expr, name): # Copy *expr* to a variable named *name*. out_inames = [ self.var_name_gen(f"{name}_dim{d}") for d in range(expr.ndim)] - out_expr = expr.to_loopy_expression(map(prim.Variable, out_inames)) + out_red_inames = [ + self.var_name_gen(f"{name}{rname}") for rname + in expr.reduction_dim_names] - domain = expr.get_domain(out_inames) + out_expr = expr.to_loopy_expression( + map(prim.Variable, out_inames), + out_red_inames) + domain = expr.get_domain(out_inames, out_red_inames) self.program = self.program.with_kernel( self.program.root_kernel.copy( @@ -546,7 +594,7 @@ class Context: # negative we shift it to be positive. idx = idx + prim.If(prim.Comparison(idx, "<", 0), a.shape[0], 0) expr = a.to_loopy_expression((idx,)) - return ArrayExpression(self, a.shape, a.dtype, expr) + return ArrayExpression(self, a.shape, a.dtype, expr, a.reductions) def stack(self, arrays, axis=0, name=None): """Join a sequence of arrays along a new axis. @@ -564,18 +612,10 @@ class Context: for array_dim, array0_dim in zip(array.shape, arrays[0].shape): self._assume_equal(array_dim, array0_dim) - if name is None: - name = "_stack" - - name = self.var_name_gen(name) - - out_inames = [] - for i in range(arrays[0].ndim): - if i >= axis: - i += 1 - out_inames.append(f"{name}_dim{i}") + base_name = name - domain = domain_for_shape(out_inames, arrays[0].shape) + if base_name is None: + base_name = "_stack" out_shape = list(arrays[0].shape) out_shape.insert(axis, len(arrays)) @@ -583,40 +623,62 @@ class Context: out_dtype = self.unify_types(*(a.dtype for a in arrays)) + out_name = self.var_name_gen(base_name) + out = lp.TemporaryVariable( - name, dtype=out_dtype, + out_name, + dtype=out_dtype, shape=out_shape, address_space=lp.AddressSpace.GLOBAL) # Create an output instruction for each input array. from loopy.kernel.instruction import make_assignment - out_insns = [] - for i in range(len(arrays)): + new_insns = [] + new_domains = [] + + for i, arr in enumerate(arrays): + name = self.var_name_gen(f"{base_name}_array{i}") + out_inames = [] + for j in range(arr.ndim): + if j >= axis: + j += 1 + out_inames.append(self.var_name_gen(f"{name}_dim{j}")) + + out_red_inames = [ + self.var_name_gen(f"{name}{rname}") + for rname in arr.reduction_dim_names] + reductions = dict( + (new_rname, arr.reductions[rname]) + for new_rname, rname + in zip(out_red_inames, arr.reduction_dim_names)) + domain = domain_for_shape(out_inames, arr.shape, reductions) + new_domains.append(domain) + indices = list(map(prim.Variable, out_inames)) - expr = arrays[i].to_loopy_expression(indices) + expr = arr.to_loopy_expression(indices, out_red_inames) indices.insert(axis, i) indices = tuple(indices) - insn_id = self.insn_id_gen(f"{name}_copy{i}") + insn_id = self.insn_id_gen(f"{name}_array{i}") out_insn = make_assignment( - (prim.Variable(name)[indices],), + (prim.Variable(out_name)[indices],), expr, id=insn_id, within_inames=frozenset(out_inames), depends_on=self._get_dependencies(expr)) self._last_insn_id = insn_id - out_insns.append(out_insn) + new_insns.append(out_insn) root_knl = self.program.root_kernel new_tv = root_knl.temporary_variables.copy() - new_tv[name] = out + new_tv[out_name] = out self.program = self.program.with_kernel( root_knl.copy( temporary_variables=new_tv, - domains=root_knl.domains + [domain], - instructions=root_knl.instructions + out_insns)) + domains=root_knl.domains + new_domains, + instructions=root_knl.instructions + new_insns)) - return ArrayVariable(self, out_shape, out_dtype, name) + return ArrayVariable(self, out_shape, out_dtype, out_name) def matmul(self, a, b, name=None): """Multiply matrix *a* by *b*. @@ -632,49 +694,34 @@ class Context: name = "_matmul" name = self.var_name_gen(name) - out_shape = (a.shape[0], b.shape[1]) - out_inames = (f"{name}_dim0", f"{name}_dim1") - out_red_iname = self.var_name_gen(f"{name}_reduce") - out_dtype = self.unify_types(a.dtype, b.dtype) - - out = lp.TemporaryVariable( - name, dtype=out_dtype, - shape=(a.shape[0], b.shape[1]), - address_space=lp.AddressSpace.GLOBAL) - - import loopy.library.reduction as red + red_iname = self.var_name_gen(f"{name}_reduce") + red_domain = (0, a.shape[1]) + dtype = self.unify_types(a.dtype, b.dtype) - a_inames = tuple(map(prim.Variable, (out_inames[0], out_red_iname))) - b_inames = tuple(map(prim.Variable, (out_red_iname, out_inames[1]))) + a_inames = tuple(map(prim.Variable, ("_0", red_iname))) + b_inames = tuple(map(prim.Variable, (red_iname, "_1"))) - expr = ( + inner_expr = ( a.to_loopy_expression(a_inames) * b.to_loopy_expression(b_inames)) - out_expr = sym.Reduction( + import loopy.library.reduction as red + expr = sym.Reduction( operation=red.parse_reduction_op("sum"), - inames=(out_red_iname,), - expr=expr, + inames=(red_iname,), + expr=inner_expr, allow_simultaneous=False) - domain = domain_for_shape( - out_inames + (out_red_iname,), - out_shape + (a.shape[1],)) - - root_knl = self.program.root_kernel - new_tv = root_knl.temporary_variables.copy() - new_tv[name] = out - - self.program = self.program.with_kernel( - root_knl.copy( - temporary_variables=new_tv, - domains=root_knl.domains + [domain])) - - self._make_array_assignment( - name, dim_names=out_inames, expr=out_expr, - within_inames=frozenset(out_inames)) + reductions = a.reductions.copy() + reductions.update(b.reductions) + reductions[red_iname] = red_domain - return ArrayVariable(self, out_shape, out_dtype, name) + return ArrayExpression( + self, + shape=(a.shape[0], b.shape[1]), + dtype=dtype, + expression=expr, + reductions=reductions) def call_kernel(self, knl, **kwargs): pass diff --git a/arrayzy/utils.py b/arrayzy/utils.py index 54e3436..c2bf016 100644 --- a/arrayzy/utils.py +++ b/arrayzy/utils.py @@ -21,10 +21,47 @@ THE SOFTWARE. """ +import loopy as lp import islpy as isl +import pymbolic.primitives as prim +import loopy.symbolic as sym -def domain_for_shape(dim_names, shape): + +from pymbolic.mapper.substitutor import SubstitutionMapper + + +class LoopyAwareSubstitutionMapper(SubstitutionMapper): + + def map_reduction(self, expr): + new_inames = [] + for iname in expr.inames: + new_iname = self.subst_func(iname) + if new_iname is None: + new_iname = prim.Variable(iname) + else: + if not isinstance(new_iname, prim.Variable): + raise ValueError( + f"reduction iname {iname} can only be renamed" + " to another iname") + new_inames.append(new_iname.name) + new_inames = tuple(new_inames) + new_expr = self.rec(expr.expr) + + return sym.Reduction(expr.operation, new_inames, new_expr, + expr.allow_simultaneous) + + +def substitute(expression, variable_assignments={}, **kwargs): + variable_assignments = variable_assignments.copy() + variable_assignments.update(kwargs) + from pymbolic.mapper.substitutor import make_subst_func + print("ASSIGNMENTS", variable_assignments) + return LoopyAwareSubstitutionMapper( + make_subst_func(variable_assignments))(expression) + + +def domain_for_shape(dim_names, shape, reductions=None): """Return a :class:`isl.BasicSet` that expresses an appropriate index domain for an array of (potentially symbolic) shape *shape*. @@ -36,9 +73,13 @@ def domain_for_shape(dim_names, shape): dimensions in the returned set. Must have the same length as *dim_names*. + :arg reductions: A map from reduction inames to (lower, upper) bounds. + :returns: a :class:`isl.BasicSet` """ + if reductions is None: + reductions = {} # Collect parameters. param_names = set() @@ -46,21 +87,33 @@ def domain_for_shape(dim_names, shape): for sdep in map(get_dependencies, shape): param_names |= sdep + for bounds in reductions.values(): + for sdep in map(get_dependencies, bounds): + # FIXME: Assumes that reduction bounds are not data-dependent. + param_names |= sdep + + set_names = sorted(tuple(dim_names) + tuple(reductions)) param_names = sorted(param_names) # Build domain. dom = isl.BasicSet.universe( isl.Space.create_from_names( isl.DEFAULT_CONTEXT, - set=dim_names, + set=set_names, params=param_names)) # Add constraints. from loopy.symbolic import aff_from_expr affs = isl.affs_from_space(dom.space) + for iname, dim in zip(dim_names, shape): dom &= affs[0].le_set(affs[iname]) dom &= affs[iname].lt_set(aff_from_expr(dom.space, dim)) + + for iname, (left, right) in reductions.items(): + dom &= aff_from_expr(dom.space, left).le_set(affs[iname]) + dom &= affs[iname].lt_set(aff_from_expr(dom.space, right)) + dom, = dom.get_basic_sets() return dom diff --git a/test/test_linalg.py b/test/test_linalg.py index 68c4ca6..821c10a 100644 --- a/test/test_linalg.py +++ b/test/test_linalg.py @@ -163,6 +163,42 @@ def test_matmul(ctx_factory): assert (eval_one(prog, x=x, y=y) == x @ y).all() +def test_matmul_addition(ctx_factory): + cl_ctx = ctx_factory() + queue = cl.CommandQueue(cl_ctx) + + with az.Context(queue) as ctx: + x = ctx.argument("x", shape=("n, n"), dtype=np.float64) + y = ctx.argument("y", shape=("n, n"), dtype=np.float64) + z = ctx.argument("z", shape=("n, n"), dtype=np.float64) + ctx.output(ctx.matmul(x, y) + ctx.matmul(x, z)) + + x = np.array([[1, 2], [3, 4]], dtype=np.float64) + y = np.array([[5, 6], [7, 8]], dtype=np.float64) + z = np.array([[9, 10], [11, 12]], dtype=np.float64) + + prog = ctx.build() + assert (eval_one(prog, x=x, y=y, z=z) == x @ y + x @ z).all() + + +def test_matmul_stack(ctx_factory): + cl_ctx = ctx_factory() + queue = cl.CommandQueue(cl_ctx) + + with az.Context(queue) as ctx: + x = ctx.argument("x", shape=("m, n"), dtype=np.float64) + y = ctx.argument("y", shape=("n, k"), dtype=np.float64) + z = ctx.matmul(x, y) + w = ctx.matmul(x, 2 * y) + ctx.output(ctx.stack((z, w))) + + x = np.array([[1, 2, 3], [4, 5, 6]], dtype=np.float64) + y = np.array([[2, 5], [5, 1], [1, 2]], dtype=np.float64) + + prog = ctx.build() + assert (eval_one(prog, x=x, y=y) == np.array([x @ y, x @ (2 * y)])).all() + + @pytest.mark.parametrize("shift", (-1, 1, -20, 20)) def test_roll(ctx_factory, shift): cl_ctx = ctx_factory() -- GitLab From 5072c5f5ca367ac52d70039a6466fa519c7741f1 Mon Sep 17 00:00:00 2001 From: Matt Wala Date: Fri, 1 May 2020 04:00:50 -0500 Subject: [PATCH 36/46] Ensure reductions are disjoint when creating binary operations --- arrayzy/array.py | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/arrayzy/array.py b/arrayzy/array.py index 042b5a5..1395356 100644 --- a/arrayzy/array.py +++ b/arrayzy/array.py @@ -156,6 +156,8 @@ class Array: self.reductions.copy()) elif isinstance(other, Array): + other = self.ctx.ensure_disjoint_reductions( + other, self.reduction_dim_names) args = ( self.to_loopy_expression(self.dims), other.to_loopy_expression(self.dims)) @@ -428,6 +430,29 @@ class Context: # {{{ internal api + def ensure_disjoint_reductions(self, expr, disjoint_from): + # Renames the reductions in *expr* not to conflict with the ones in the + # list *disjoint_from*. + common_reductions = set(expr.reductions) & set(disjoint_from) + if not common_reductions: + return expr + + new_red_names = [] + for iname in expr.reduction_dim_names: + if iname not in common_reductions: + new_red_names.append(iname) + else: + new_red_names.append(self.var_name_gen(iname)) + + new_expr = expr.to_loopy_expression(expr.dims, new_red_names) + new_reductions = dict( + (new_rname, expr.reductions[rname]) + for new_rname, rname + in zip(new_red_names, expr.reduction_dim_names)) + + return ArrayExpression(self, expr.shape, expr.dtype, new_expr, + new_reductions, expr.name) + def unify_types(self, *args): # Unifies the list of types. if None in args: @@ -694,6 +719,7 @@ class Context: name = "_matmul" name = self.var_name_gen(name) + b = self.ensure_disjoint_reductions(b, a.reduction_dim_names) red_iname = self.var_name_gen(f"{name}_reduce") red_domain = (0, a.shape[1]) dtype = self.unify_types(a.dtype, b.dtype) -- GitLab From 3bff3657bed936d969b2b04dd2c394f8fc32590c Mon Sep 17 00:00:00 2001 From: Matt Wala Date: Fri, 1 May 2020 04:05:36 -0500 Subject: [PATCH 37/46] Fix insn id naming --- arrayzy/array.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arrayzy/array.py b/arrayzy/array.py index 1395356..ff928d2 100644 --- a/arrayzy/array.py +++ b/arrayzy/array.py @@ -683,7 +683,7 @@ class Context: expr = arr.to_loopy_expression(indices, out_red_inames) indices.insert(axis, i) indices = tuple(indices) - insn_id = self.insn_id_gen(f"{name}_array{i}") + insn_id = self.insn_id_gen(f"{name}") out_insn = make_assignment( (prim.Variable(out_name)[indices],), expr, -- GitLab From 991355e2a4e814632912fa7918f36faeae69e96e Mon Sep 17 00:00:00 2001 From: Matt Wala Date: Fri, 1 May 2020 04:23:18 -0500 Subject: [PATCH 38/46] flake8 fixes --- arrayzy/utils.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arrayzy/utils.py b/arrayzy/utils.py index c2bf016..cb30cd5 100644 --- a/arrayzy/utils.py +++ b/arrayzy/utils.py @@ -21,7 +21,6 @@ THE SOFTWARE. """ -import loopy as lp import islpy as isl import pymbolic.primitives as prim @@ -49,7 +48,7 @@ class LoopyAwareSubstitutionMapper(SubstitutionMapper): new_expr = self.rec(expr.expr) return sym.Reduction(expr.operation, new_inames, new_expr, - expr.allow_simultaneous) + expr.allow_simultaneous) def substitute(expression, variable_assignments={}, **kwargs): -- GitLab From 67883e5b4c22b7399227818330aa769995dd15b8 Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Fri, 1 May 2020 04:24:33 -0500 Subject: [PATCH 39/46] Add flake8 config --- setup.cfg | 6 ++++++ 1 file changed, 6 insertions(+) create mode 100644 setup.cfg diff --git a/setup.cfg b/setup.cfg new file mode 100644 index 0000000..a0d9574 --- /dev/null +++ b/setup.cfg @@ -0,0 +1,6 @@ +[flake8] +ignore = E126,E127,E128,E123,E226,E241,E242,E265,N802,W503,E402,N814,N817,W504 +max-line-length=85 +exclude= + loopy/target/c/compyte/ndarray, + loopy/target/c/compyte/array.py -- GitLab From ff03278ad2034586179778b5ac4400135043c29a Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Fri, 1 May 2020 04:25:49 -0500 Subject: [PATCH 40/46] Placate flake8 --- arrayzy/array.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arrayzy/array.py b/arrayzy/array.py index c7886d3..16c2d10 100644 --- a/arrayzy/array.py +++ b/arrayzy/array.py @@ -472,7 +472,8 @@ class Context: self.program = self.program.with_kernel( self.program.root_kernel.copy( - instructions=self.program.root_kernel.instructions + [out_insn])) + instructions=( + self.program.root_kernel.instructions + [out_insn]))) self._last_insn_id = insn_id def copy_expr(self, expr, name): -- GitLab From 0b38369d9d1a74b948c3f498f511a17f8fe20d9b Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Fri, 1 May 2020 04:27:02 -0500 Subject: [PATCH 41/46] ctx.output(): If given a temp var, make an output --- arrayzy/array.py | 37 ++++++++++++++++++++++++++++++------- test/test_linalg.py | 1 + 2 files changed, 31 insertions(+), 7 deletions(-) diff --git a/arrayzy/array.py b/arrayzy/array.py index 16c2d10..750a1b7 100644 --- a/arrayzy/array.py +++ b/arrayzy/array.py @@ -687,13 +687,36 @@ class Context: :returns: an :class:`ArrayVariable` """ - if name is None: - name = "_out" - name = self.var_name_gen(name) - out = lp.GlobalArg(name, shape=expr.shape, dtype=expr.dtype, order="C") - self.program = self.program.with_kernel( - self.program.root_kernel.copy(args=self.program.args + [out])) - return self.copy_expr(expr, name) + if isinstance(expr, ArrayVariable): + # must already exist, make sure it's usable + root_knl = self.program.root_kernel + + if expr.name in root_knl.temporary_variables: + tv = root_knl.temporary_variables[expr.name] + new_tvs = root_knl.temporary_variables.copy() + new_args = root_knl.args + [ + lp.GlobalArg( + expr.name, shape=tv.shape, dtype=tv.dtype, + dim_tags=tv.dim_tags, + is_output=True)] + + del new_tvs[expr.name] + + self.program = self.program.with_kernel( + root_knl.copy( + temporary_variables=new_tvs, + args=new_args)) + + else: + if name is None: + name = "_out" + name = self.var_name_gen(name) + out = lp.GlobalArg( + name, shape=expr.shape, dtype=expr.dtype, order="C", + is_output=True) + self.program = self.program.with_kernel( + self.program.root_kernel.copy(args=self.program.args + [out])) + return self.copy_expr(expr, name) # }}} diff --git a/test/test_linalg.py b/test/test_linalg.py index 68c4ca6..948beb7 100644 --- a/test/test_linalg.py +++ b/test/test_linalg.py @@ -45,6 +45,7 @@ def eval_one(prog, **kwargs): def test_symbolic_array(ctx_factory): + pytest.xfail("input is also an output--what should we do?") cl_ctx = ctx_factory() queue = cl.CommandQueue(cl_ctx) -- GitLab From 992d012535d531672f61cc329bd5b908be9aa9a7 Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Fri, 1 May 2020 04:28:08 -0500 Subject: [PATCH 42/46] Implement Context.call_kernel --- arrayzy/array.py | 97 ++++++++++++++++++++++++++++++++++++++++++++- test/test_kernel.py | 96 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 192 insertions(+), 1 deletion(-) create mode 100644 test/test_kernel.py diff --git a/arrayzy/array.py b/arrayzy/array.py index 750a1b7..97fd30f 100644 --- a/arrayzy/array.py +++ b/arrayzy/array.py @@ -678,7 +678,102 @@ class Context: return ArrayVariable(self, out_shape, out_dtype, name) def call_kernel(self, knl, **kwargs): - pass + # FIXME: Avoid name clash between existing and new kernel + + root_knl = self.program.root_kernel + + new_tvs = root_knl.temporary_variables.copy() + new_domains = root_knl.domains[:] + + from pymbolic.primitives import Variable, Call + from loopy.symbolic import SubArrayRef + + retvals = [] + lhs_exprs = [] + rhs_args = [] + for arg in knl.args: + if isinstance(arg, lp.ValueArg): + passed_arg = kwargs[arg.name] + if isinstance(passed_arg, str): + from loopy.symbolic import parse + passed_arg = parse(passed_arg) + rhs_args.append(passed_arg) + else: + if arg.is_output: + arg_name_in_caller = self.var_name_gen(arg.name) + # FIXME: We're not nearly copying all attributes + new_tvs[arg_name_in_caller] = lp.TemporaryVariable( + name=arg_name_in_caller, + # FIXME: Callee and caller parameter names in + # shapes here match purely by accident. + shape=arg.shape, + dtype=arg.dtype) + # FIXME: Callee and caller parameter names in shapes here + # match purely by accident. + arg_shape = arg.shape + + result_inames = tuple( + self.var_name_gen(f"{arg_name_in_caller}_i{i}") + for i in range(len(arg_shape))) + new_domains.append( + domain_for_shape(result_inames, arg.shape)) + retvals.append( + ArrayVariable( + self, arg.shape, arg.dtype, arg_name_in_caller)) + + else: + # FIXME: Allow passing slices + passed_arg = kwargs[arg.name] + if not isinstance(passed_arg, ArrayVariable): + raise ValueError( + f"argument '{arg.name}': passed value " + f"'{passed_arg}' is not a variable. " + "Call .store() to make it one.") + + arg_name_in_caller = passed_arg.name + arg_shape = passed_arg.shape + + array_ref_inames = tuple( + self.var_name_gen("ai") for i in arg_shape) + new_domains.append( + domain_for_shape(array_ref_inames, arg.shape)) + array_ref_inames_vars = tuple( + Variable(iname) for iname in array_ref_inames) + + sar = SubArrayRef( + array_ref_inames_vars, + Variable(arg_name_in_caller)[array_ref_inames_vars]) + + if arg.is_output: + lhs_exprs.append(sar) + else: + rhs_args.append(sar) + + rhs_args = tuple(rhs_args) + from loopy.kernel.instruction import make_assignment + new_insns = root_knl.instructions + [ + make_assignment( + tuple(lhs_exprs), + Call( + Variable(knl.name), + rhs_args), + id=self.insn_id_gen(f"_call_{knl.name}"), + depends_on=self._get_dependencies(rhs_args) + ) + ] + + program = self.program.with_kernel(root_knl.copy( + instructions=new_insns, + temporary_variables=new_tvs, + domains=new_domains, + )) + + self.program = lp.register_callable_kernel(program, knl) + + if len(retvals) == 1: + return retvals[0] + else: + return retvals def output(self, expr, name=None): """Copy *expr* to a (new) output variable. diff --git a/test/test_kernel.py b/test/test_kernel.py new file mode 100644 index 0000000..5ad8cd4 --- /dev/null +++ b/test/test_kernel.py @@ -0,0 +1,96 @@ +#!/usr/bin/env python + +__copyright__ = "Copyright (C) 2020 Matt Wala" + +__license__ = """ +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +""" + +import sys +import arrayzy as az +import loopy as lp +import numpy as np +import numpy.linalg as la + +import pyopencl as cl +from pyopencl.tools import ( # noqa + pytest_generate_tests_for_pyopencl as pytest_generate_tests) + + +def test_call_kernel(ctx_factory): + cl_ctx = ctx_factory() + queue = cl.CommandQueue(cl_ctx) + + with az.Context(queue) as ctx: + a = ctx.argument("a", shape="m,k", dtype=np.float64) + b = ctx.argument("b", shape="k,n", dtype=np.float64) + + two_a = (2*a).store() + + fortran_src = """ + subroutine dgemm(m,n,k,a,b,c) + implicit none + real*8 a(m,k),b(k,n),c(m,n),mysum + integer m,n,k,i,j,ell + + do j = 1,n + do i = 1,m + mysum = 0 + do ell = 1,k + mysum = mysum + b(ell,j)*a(i,ell) + end do + c(i,j) = mysum + end do + end do + end subroutine + """ + + dgemm_knl = lp.parse_fortran(fortran_src)["dgemm"] + + c = ctx.call_kernel( + dgemm_knl, a=two_a, b=b, + + # FIXME: Avoid having to pass these + m="m", n="n", k="k") + + # FIXME: Loopy global temporaries would allow this: + # ctx.output(5*c) + + ctx.output(c) + + prog = ctx.build().program + + print(prog) + a_c = np.random.randn(10, 5) + b_c = np.random.randn(5, 20) + + prog = lp.set_options(prog, write_code=True) + evt, (c_c,) = prog(queue, a=a_c, b=b_c) + + assert la.norm(2*a_c@b_c - c_c) < 1e-13 + + +if __name__ == "__main__": + if len(sys.argv) > 1: + exec(sys.argv[1]) + else: + from pytest import main + main([__file__]) + +# vim: foldmethod=marker -- GitLab From 5fd12a818bfb1e5f44f161a321a19f9580db3232 Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Fri, 1 May 2020 04:34:39 -0500 Subject: [PATCH 43/46] Add fparser to requirements --- requirements.txt | 2 ++ 1 file changed, 2 insertions(+) diff --git a/requirements.txt b/requirements.txt index 7ca0048..ee195dd 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1 +1,3 @@ +git+https://github.com/inducer/f2py + git+https://github.com/inducer/loopy.git@kernel_callables_v3-edit0.2 -- GitLab From 885c20a9090d5ea88bf90b0e3b25d17e6a1eed43 Mon Sep 17 00:00:00 2001 From: Matt Wala Date: Fri, 1 May 2020 04:37:00 -0500 Subject: [PATCH 44/46] Remove a debugging print statement --- arrayzy/utils.py | 1 - 1 file changed, 1 deletion(-) diff --git a/arrayzy/utils.py b/arrayzy/utils.py index cb30cd5..1af1e8e 100644 --- a/arrayzy/utils.py +++ b/arrayzy/utils.py @@ -55,7 +55,6 @@ def substitute(expression, variable_assignments={}, **kwargs): variable_assignments = variable_assignments.copy() variable_assignments.update(kwargs) from pymbolic.mapper.substitutor import make_subst_func - print("ASSIGNMENTS", variable_assignments) return LoopyAwareSubstitutionMapper( make_subst_func(variable_assignments))(expression) -- GitLab From 8f003a58378d39a5be362eae2d6bf73454abcfaf Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Fri, 1 May 2020 05:06:16 -0500 Subject: [PATCH 45/46] Add demo notebook --- .../Computing and DG with Lazy Arrays.ipynb | 520 +++++++++++++++++- 1 file changed, 507 insertions(+), 13 deletions(-) diff --git a/experiments/Computing and DG with Lazy Arrays.ipynb b/experiments/Computing and DG with Lazy Arrays.ipynb index 733083e..56ce522 100644 --- a/experiments/Computing and DG with Lazy Arrays.ipynb +++ b/experiments/Computing and DG with Lazy Arrays.ipynb @@ -9,19 +9,21 @@ }, { "cell_type": "code", - "execution_count": 22, + "execution_count": 55, "metadata": {}, "outputs": [], "source": [ "import numpy as np\n", + "import numpy.linalg as la\n", "import arrayzy as az\n", "import pyopencl as cl\n", - "import loopy as lp" + "import loopy as lp\n", + "import matplotlib.pyplot as plt" ] }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 56, "metadata": {}, "outputs": [ { @@ -29,8 +31,8 @@ "output_type": "stream", "text": [ "Choose platform:\n", - "[0] \n", - "[1] \n" + "[0] \n", + "[1] \n" ] }, { @@ -62,9 +64,18 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 57, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "ASSIGNMENTS {'_0': Variable('_0'), '_1': Variable('_1')}\n", + "ASSIGNMENTS {'_0': Variable('_out_dim0'), '_1': Variable('_out_dim1')}\n" + ] + } + ], "source": [ "with az.Context(queue) as ctx:\n", " A = ctx.argument(\"A\", shape=\"n, n\", dtype=np.float64)\n", @@ -75,7 +86,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 58, "metadata": {}, "outputs": [ { @@ -112,7 +123,7 @@ }, { "cell_type": "code", - "execution_count": 24, + "execution_count": 59, "metadata": {}, "outputs": [ { @@ -140,7 +151,7 @@ }, { "cell_type": "code", - "execution_count": 25, + "execution_count": 60, "metadata": {}, "outputs": [], "source": [ @@ -150,7 +161,7 @@ }, { "cell_type": "code", - "execution_count": 26, + "execution_count": 61, "metadata": {}, "outputs": [ { @@ -163,7 +174,7 @@ " [0., 0., 0., 0., 0.]])" ] }, - "execution_count": 26, + "execution_count": 61, "metadata": {}, "output_type": "execute_result" } @@ -172,6 +183,489 @@ "result - (2*A+5)" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Incorporating a Fortran Kernel" + ] + }, + { + "cell_type": "code", + "execution_count": 62, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/andreas/src/loopy/loopy/kernel/creation.py:2402: LoopyWarning: 'lang_version' was not passed to make_kernel(). To avoid this warning, pass lang_version=(2018, 2) in this invocation. (Or say 'from loopy.version import LOOPY_USE_LANGUAGE_VERSION_2018_2' in the global scope of the calling frame.)\n", + " return make_kernel(*args, **kwargs)\n" + ] + } + ], + "source": [ + "fortran_src = \"\"\"\n", + " subroutine dgemm(m,n,k,a,b,c)\n", + " implicit none\n", + " real*8 a(m,k),b(k,n),c(m,n),mysum\n", + " integer m,n,k,i,j,ell\n", + "\n", + " do j = 1,n\n", + " do i = 1,m\n", + " mysum = 0\n", + " do ell = 1,k\n", + " mysum = mysum + b(ell,j)*a(i,ell)\n", + " end do\n", + " c(i,j) = mysum\n", + " end do\n", + " end do\n", + " end subroutine\n", + " \"\"\"\n", + "\n", + "dgemm_knl = lp.parse_fortran(fortran_src)[\"dgemm\"]" + ] + }, + { + "cell_type": "code", + "execution_count": 63, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "ASSIGNMENTS {'_0': Variable('_temp_dim0'), '_1': Variable('_temp_dim1')}\n", + "---------------------------------------------------------------------------\n", + "KERNEL: loopy_kernel\n", + "---------------------------------------------------------------------------\n", + "ARGUMENTS:\n", + "a: type: np:dtype('float64'), shape: (m, k), dim_tags: (N1:stride:k, N0:stride:1) aspace: global\n", + "b: type: np:dtype('float64'), shape: (k, n), dim_tags: (N1:stride:n, N0:stride:1) aspace: global\n", + "c: type: np:dtype('float64'), shape: (m, n), dim_tags: (N1:stride:n, N0:stride:1) aspace: global\n", + "k: ValueArg, type: np:dtype('int32')\n", + "m: ValueArg, type: np:dtype('int32')\n", + "n: ValueArg, type: np:dtype('int32')\n", + "---------------------------------------------------------------------------\n", + "DOMAINS:\n", + "{ : }\n", + "[k, m] -> { [_temp_dim0, _temp_dim1] : 0 <= _temp_dim0 < m and 0 <= _temp_dim1 < k }\n", + "[k, m] -> { [ai, ai_0] : 0 <= ai < m and 0 <= ai_0 < k }\n", + "[k, n] -> { [ai_1, ai_2] : 0 <= ai_1 < k and 0 <= ai_2 < n }\n", + "[m, n] -> { [c_i0, c_i1] : 0 <= c_i0 < m and 0 <= c_i1 < n }\n", + "[m, n] -> { [ai_3, ai_4] : 0 <= ai_3 < m and 0 <= ai_4 < n }\n", + "---------------------------------------------------------------------------\n", + "INAME IMPLEMENTATION TAGS:\n", + "_temp_dim0: None\n", + "_temp_dim1: None\n", + "ai: None\n", + "ai_0: None\n", + "ai_1: None\n", + "ai_2: None\n", + "ai_3: None\n", + "ai_4: None\n", + "c_i0: None\n", + "c_i1: None\n", + "---------------------------------------------------------------------------\n", + "TEMPORARIES:\n", + "_temp: type: np:dtype('float64'), shape: (m, k), dim_tags: (N1:stride:k, N0:stride:1) scope:global\n", + "---------------------------------------------------------------------------\n", + "INSTRUCTIONS:\n", + " for _temp_dim1, _temp_dim0\n", + "↱ \u001b[36m_temp[_temp_dim0, _temp_dim1]\u001b[0m = \u001b[35m2*a[_temp_dim0, _temp_dim1]\u001b[0m {id=\u001b[32m_store_temp\u001b[0m}\n", + "│ end _temp_dim1, _temp_dim0\n", + "└ \u001b[36m[ai_3,ai_4]: c[ai_3, ai_4]\u001b[0m = \u001b[35mdgemm(m, n, k, [ai,ai_0]: _temp[ai, ai_0], [ai_1,ai_2]: b[ai_1, ai_2])\u001b[0m {id=\u001b[32m_call_dgemm\u001b[0m}\n", + "---------------------------------------------------------------------------\n", + "---------------------------------------------------------------------------\n", + "KERNEL: dgemm\n", + "---------------------------------------------------------------------------\n", + "ARGUMENTS:\n", + "a: type: np:dtype('float64'), shape: (m, k), dim_tags: (N0:stride:1, N1:stride:m) aspace: global\n", + "b: type: np:dtype('float64'), shape: (k, n), dim_tags: (N0:stride:1, N1:stride:k) aspace: global\n", + "c: type: np:dtype('float64'), shape: (m, n), dim_tags: (N0:stride:1, N1:stride:m) aspace: global\n", + "k: ValueArg, type: np:dtype('int32')\n", + "m: ValueArg, type: np:dtype('int32')\n", + "n: ValueArg, type: np:dtype('int32')\n", + "---------------------------------------------------------------------------\n", + "DOMAINS:\n", + "[n, m] -> { [j, i] : 0 <= j < n and 0 <= i < m }\n", + "[k] -> { [ell] : 0 <= ell < k }\n", + "---------------------------------------------------------------------------\n", + "INAME IMPLEMENTATION TAGS:\n", + "ell: None\n", + "i: None\n", + "j: None\n", + "---------------------------------------------------------------------------\n", + "TEMPORARIES:\n", + "mysum: type: np:dtype('float64'), shape: () scope:auto\n", + "---------------------------------------------------------------------------\n", + "INSTRUCTIONS:\n", + " for j, i\n", + "↱ \u001b[36mmysum\u001b[0m = \u001b[35m0\u001b[0m {id=\u001b[32minsn0\u001b[0m}\n", + "│ for ell\n", + "└↱ \u001b[36mmysum\u001b[0m = \u001b[35mmysum + b[ell, j]*a[i, ell]\u001b[0m {id=\u001b[32minsn1\u001b[0m}\n", + " │ end ell\n", + " └ \u001b[36mc[i, j]\u001b[0m = \u001b[35mmysum\u001b[0m {id=\u001b[32minsn2\u001b[0m}\n", + " end j, i\n", + "---------------------------------------------------------------------------\n" + ] + } + ], + "source": [ + "with az.Context(queue) as ctx:\n", + " a = ctx.argument(\"a\", shape=\"m,k\", dtype=np.float64)\n", + " b = ctx.argument(\"b\", shape=\"k,n\", dtype=np.float64)\n", + "\n", + " two_a = (2*a).store()\n", + "\n", + " c = ctx.call_kernel(\n", + " dgemm_knl, a=two_a, b=b,\n", + "\n", + " # FIXME: Avoid having to pass these\n", + " m=\"m\", n=\"n\", k=\"k\")\n", + "\n", + " ctx.output(c)\n", + "\n", + " prog = ctx.build().program\n", + "\n", + "print(prog)" + ] + }, + { + "cell_type": "code", + "execution_count": 64, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[36m#\u001b[39;49;00m\u001b[36mdefine lid(N) ((int) get_local_id(N))\u001b[39;49;00m\u001b[36m\u001b[39;49;00m\n", + "\u001b[36m#\u001b[39;49;00m\u001b[36mdefine gid(N) ((int) get_group_id(N))\u001b[39;49;00m\u001b[36m\u001b[39;49;00m\n", + "\u001b[36m#\u001b[39;49;00m\u001b[36mif __OPENCL_C_VERSION__ < 120\u001b[39;49;00m\u001b[36m\u001b[39;49;00m\n", + "\u001b[36m#\u001b[39;49;00m\u001b[36mpragma OPENCL EXTENSION cl_khr_fp64: enable\u001b[39;49;00m\u001b[36m\u001b[39;49;00m\n", + "\u001b[36m#\u001b[39;49;00m\u001b[36mendif\u001b[39;49;00m\u001b[36m\u001b[39;49;00m\n", + "\n", + "\u001b[34mstatic\u001b[39;49;00m \u001b[36mvoid\u001b[39;49;00m \u001b[32mdgemm\u001b[39;49;00m(\u001b[36mint\u001b[39;49;00m \u001b[34mconst\u001b[39;49;00m m, \u001b[36mint\u001b[39;49;00m \u001b[34mconst\u001b[39;49;00m n, \u001b[36mint\u001b[39;49;00m \u001b[34mconst\u001b[39;49;00m k, __global \u001b[36mdouble\u001b[39;49;00m \u001b[34mconst\u001b[39;49;00m *__restrict__ a, __global \u001b[36mdouble\u001b[39;49;00m \u001b[34mconst\u001b[39;49;00m *__restrict__ b, __global \u001b[36mdouble\u001b[39;49;00m *__restrict__ c);\n", + "\u001b[34mstatic\u001b[39;49;00m \u001b[36mvoid\u001b[39;49;00m \u001b[32mdgemm\u001b[39;49;00m(\u001b[36mint\u001b[39;49;00m \u001b[34mconst\u001b[39;49;00m m, \u001b[36mint\u001b[39;49;00m \u001b[34mconst\u001b[39;49;00m n, \u001b[36mint\u001b[39;49;00m \u001b[34mconst\u001b[39;49;00m k, __global \u001b[36mdouble\u001b[39;49;00m \u001b[34mconst\u001b[39;49;00m *__restrict__ a, __global \u001b[36mdouble\u001b[39;49;00m \u001b[34mconst\u001b[39;49;00m *__restrict__ b, __global \u001b[36mdouble\u001b[39;49;00m *__restrict__ c)\n", + "{\n", + " \u001b[36mdouble\u001b[39;49;00m mysum;\n", + "\n", + " \u001b[34mfor\u001b[39;49;00m (\u001b[36mint\u001b[39;49;00m j = \u001b[34m0\u001b[39;49;00m; j <= -\u001b[34m1\u001b[39;49;00m + n; ++j)\n", + " \u001b[34mif\u001b[39;49;00m (-\u001b[34m1\u001b[39;49;00m + m >= \u001b[34m0\u001b[39;49;00m)\n", + " \u001b[34mfor\u001b[39;49;00m (\u001b[36mint\u001b[39;49;00m i = \u001b[34m0\u001b[39;49;00m; i <= -\u001b[34m1\u001b[39;49;00m + m; ++i)\n", + " {\n", + " mysum = \u001b[34m0.0\u001b[39;49;00m;\n", + " \u001b[34mfor\u001b[39;49;00m (\u001b[36mint\u001b[39;49;00m ell = \u001b[34m0\u001b[39;49;00m; ell <= -\u001b[34m1\u001b[39;49;00m + k; ++ell)\n", + " mysum = mysum + b[n * ell + j] * a[k * i + ell];\n", + " c[n * i + j] = mysum;\n", + " }\n", + "}\n", + "__kernel \u001b[36mvoid\u001b[39;49;00m \u001b[32m__attribute__\u001b[39;49;00m ((reqd_work_group_size(\u001b[34m1\u001b[39;49;00m, \u001b[34m1\u001b[39;49;00m, \u001b[34m1\u001b[39;49;00m))) loopy_kernel(\u001b[36mint\u001b[39;49;00m \u001b[34mconst\u001b[39;49;00m m, \u001b[36mint\u001b[39;49;00m \u001b[34mconst\u001b[39;49;00m k, __global \u001b[36mdouble\u001b[39;49;00m \u001b[34mconst\u001b[39;49;00m *__restrict__ a, \u001b[36mint\u001b[39;49;00m \u001b[34mconst\u001b[39;49;00m n, __global \u001b[36mdouble\u001b[39;49;00m \u001b[34mconst\u001b[39;49;00m *__restrict__ b, __global \u001b[36mdouble\u001b[39;49;00m *__restrict__ c, __global \u001b[36mdouble\u001b[39;49;00m *__restrict__ _temp)\n", + "{\n", + " \u001b[34mfor\u001b[39;49;00m (\u001b[36mint\u001b[39;49;00m _temp_dim1 = \u001b[34m0\u001b[39;49;00m; _temp_dim1 <= -\u001b[34m1\u001b[39;49;00m + k; ++_temp_dim1)\n", + " \u001b[34mif\u001b[39;49;00m (-\u001b[34m1\u001b[39;49;00m + m >= \u001b[34m0\u001b[39;49;00m)\n", + " \u001b[34mfor\u001b[39;49;00m (\u001b[36mint\u001b[39;49;00m _temp_dim0 = \u001b[34m0\u001b[39;49;00m; _temp_dim0 <= -\u001b[34m1\u001b[39;49;00m + m; ++_temp_dim0)\n", + " _temp[k * _temp_dim0 + _temp_dim1] = \u001b[34m2.0\u001b[39;49;00m * a[k * _temp_dim0 + _temp_dim1];\n", + " dgemm(m, n, k, &(_temp[\u001b[34m0\u001b[39;49;00m]), &(b[\u001b[34m0\u001b[39;49;00m]), &(c[\u001b[34m0\u001b[39;49;00m]));\n", + "}\n", + "\n" + ] + } + ], + "source": [ + "a_c = np.random.randn(10, 5)\n", + "b_c = np.random.randn(5, 20)\n", + "\n", + "prog = lp.set_options(prog, write_code=True)\n", + "evt, (c_c,) = prog(queue, a=a_c, b=b_c)\n", + "\n", + "assert la.norm(2*a_c@b_c - c_c) < 1e-13" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Running DG" + ] + }, + { + "cell_type": "code", + "execution_count": 65, + "metadata": {}, + "outputs": [], + "source": [ + "c = 1\n", + "\n", + "from dg_tools import DGDiscr1D, DGOps1D\n", + "discr = DGDiscr1D(0, 2*np.pi, nelements=20, nnodes=4)" + ] + }, + { + "cell_type": "code", + "execution_count": 66, + "metadata": {}, + "outputs": [], + "source": [ + "def weak_flux( vec):\n", + " return (vec + dg.face_swap(vec)) / 2 * c * dg.normals\n", + "\n", + "def strong_flux(vec):\n", + " return c * dg.normals * vec - weak_flux(vec)\n", + "\n", + "def advec_op(vec):\n", + " return -dg.inv_mass(\n", + " dg.face_mass(strong_flux(dg.interp(vec)))\n", + " - c * dg.stiffness(vec))" + ] + }, + { + "cell_type": "code", + "execution_count": 68, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "ASSIGNMENTS {'_0': Variable('_interp_reduce'), '_1': Variable('_1')}\n", + "ASSIGNMENTS {'_0': Variable('_1'), '_1': Variable('_0')}\n", + "ASSIGNMENTS {'_0': Variable('_0'), '_1': Variable('_1')}\n", + "ASSIGNMENTS {'_0': Variable('_0'), '_1': Variable('_1')}\n", + "ASSIGNMENTS {'_0': Variable('_0'), '_1': 1}\n", + "ASSIGNMENTS {'_0': Sum((Remainder(Sum((Variable('_0'), -1)), Variable('nelements')), If(Comparison(Remainder(Sum((..., -1)), Variable('nelements')), '<', 0), Variable('nelements'), 0)))}\n", + "ASSIGNMENTS {'_0': Variable('_0'), '_1': 0}\n", + "ASSIGNMENTS {'_0': Sum((Remainder(Sum((Variable('_0'), 1)), Variable('nelements')), If(Comparison(Remainder(Sum((..., 1)), Variable('nelements')), '<', 0), Variable('nelements'), 0)))}\n", + "ASSIGNMENTS {'_0': Variable('_face_swap_array0_dim0'), '_interp_reduce': Variable('_face_swap_array0_interp_reduce')}\n", + "ASSIGNMENTS {'_0': Variable('_face_swap_array1_dim0'), '_interp_reduce': Variable('_face_swap_array1_interp_reduce')}\n", + "ASSIGNMENTS {'_0': Variable('_0'), '_1': Variable('_1')}\n", + "ASSIGNMENTS {'_0': Variable('_0'), '_1': Variable('_1')}\n", + "ASSIGNMENTS {'_0': Variable('_0'), '_1': Variable('_1')}\n", + "ASSIGNMENTS {'_0': Variable('_0'), '_1': Variable('_1')}\n", + "ASSIGNMENTS {'_0': Variable('_0'), '_1': Variable('_1'), '_interp_reduce': Variable('_interp_reduce_0')}\n", + "ASSIGNMENTS {'_0': Variable('_0'), '_1': Variable('_1')}\n", + "ASSIGNMENTS {'_0': Variable('_0'), '_1': Variable('_1')}\n", + "ASSIGNMENTS {'_0': Variable('_1'), '_1': Variable('_0')}\n", + "ASSIGNMENTS {'_0': Variable('_face_mass_reduce'), '_1': Variable('_1')}\n", + "ASSIGNMENTS {'_0': Variable('_1'), '_1': Variable('_0')}\n", + "ASSIGNMENTS {'_0': Variable('_stiffness_reduce'), '_1': Variable('_1')}\n", + "ASSIGNMENTS {'_0': Variable('_1'), '_1': Variable('_0')}\n", + "ASSIGNMENTS {'_0': Variable('_0'), '_1': Variable('_1')}\n", + "ASSIGNMENTS {'_0': Variable('_0'), '_1': Variable('_1')}\n", + "ASSIGNMENTS {'_0': Variable('_0'), '_1': Variable('_1')}\n", + "ASSIGNMENTS {'_0': Variable('_1'), '_1': Variable('_0')}\n", + "ASSIGNMENTS {'_0': Variable('_inv_mass_reduce'), '_1': Variable('_1')}\n", + "ASSIGNMENTS {'_0': Variable('_1'), '_1': Variable('_0')}\n", + "ASSIGNMENTS {'_0': Variable('_0'), '_1': Variable('_1')}\n", + "ASSIGNMENTS {'_0': Variable('_out_dim0'), '_1': Variable('_out_dim1'), '_interp_reduce': Variable('_out_interp_reduce'), '_interp_reduce_0': Variable('_out_interp_reduce_0'), '_face_mass_reduce': Variable('_out_face_mass_reduce'), '_stiffness_reduce': Variable('_out_stiffness_reduce'), '_inv_mass_reduce': Variable('_out_inv_mass_reduce')}\n", + "---------------------------------------------------------------------------\n", + "KERNEL: loopy_kernel\n", + "---------------------------------------------------------------------------\n", + "ARGUMENTS:\n", + "_out: type: , shape: (nelements, nnodes), dim_tags: (N1:stride:nnodes, N0:stride:1) aspace: global\n", + "face_mass: type: np:dtype('float64'), shape: (nnodes, 2), dim_tags: (N1:stride:2, N0:stride:1) aspace: global\n", + "interp: type: np:dtype('float64'), shape: (2, nnodes), dim_tags: (N1:stride:nnodes, N0:stride:1) aspace: global\n", + "inv_mass: type: np:dtype('float64'), shape: (nnodes, nnodes), dim_tags: (N1:stride:nnodes, N0:stride:1) aspace: global\n", + "nelements: ValueArg, type: np:dtype('int32')\n", + "nnodes: ValueArg, type: np:dtype('int32')\n", + "normals: type: np:dtype('float64'), shape: (nelements, 2), dim_tags: (N1:stride:2, N0:stride:1) aspace: global\n", + "stiffness: type: np:dtype('float64'), shape: (nnodes, nnodes), dim_tags: (N1:stride:nnodes, N0:stride:1) aspace: global\n", + "u: type: np:dtype('float64'), shape: (nelements, nnodes), dim_tags: (N1:stride:nnodes, N0:stride:1) aspace: global\n", + "---------------------------------------------------------------------------\n", + "DOMAINS:\n", + "{ : }\n", + "[nelements, nnodes] -> { [_face_swap_array0_dim0, _face_swap_array0_interp_reduce] : 0 <= _face_swap_array0_dim0 < nelements and 0 <= _face_swap_array0_interp_reduce < nnodes }\n", + "[nelements, nnodes] -> { [_face_swap_array1_dim0, _face_swap_array1_interp_reduce] : 0 <= _face_swap_array1_dim0 < nelements and 0 <= _face_swap_array1_interp_reduce < nnodes }\n", + "[nelements, nnodes] -> { [_out_dim0, _out_dim1, _out_face_mass_reduce, _out_interp_reduce, _out_interp_reduce_0, _out_inv_mass_reduce, _out_stiffness_reduce] : 0 <= _out_dim0 < nelements and 0 <= _out_dim1 < nnodes and 0 <= _out_face_mass_reduce <= 1 and 0 <= _out_interp_reduce < nnodes and 0 <= _out_interp_reduce_0 < nnodes and 0 <= _out_inv_mass_reduce < nnodes and 0 <= _out_stiffness_reduce < nnodes }\n", + "---------------------------------------------------------------------------\n", + "INAME IMPLEMENTATION TAGS:\n", + "_face_swap_array0_dim0: None\n", + "_face_swap_array0_interp_reduce: None\n", + "_face_swap_array1_dim0: None\n", + "_face_swap_array1_interp_reduce: None\n", + "_out_dim0: None\n", + "_out_dim1: None\n", + "_out_face_mass_reduce: None\n", + "_out_interp_reduce: None\n", + "_out_interp_reduce_0: None\n", + "_out_inv_mass_reduce: None\n", + "_out_stiffness_reduce: None\n", + "---------------------------------------------------------------------------\n", + "TEMPORARIES:\n", + "_face_swap: type: , shape: (nelements, 2), dim_tags: (N1:stride:2, N0:stride:1) scope:global\n", + "---------------------------------------------------------------------------\n", + "INSTRUCTIONS:\n", + " for _face_swap_array0_dim0\n", + "↱ \u001b[36m_face_swap[_face_swap_array0_dim0, 0]\u001b[0m = \u001b[35mreduce(sum, [_face_swap_array0_interp_reduce], interp[1, _face_swap_array0_interp_reduce]*u[(_face_swap_array0_dim0 + -1) % nelements + (nelements if (_face_swap_array0_dim0 + -1) % nelements < 0 else 0), _face_swap_array0_interp_reduce])\u001b[0m {id=\u001b[32m_face_swap_array0\u001b[0m}\n", + "│ end _face_swap_array0_dim0\n", + "│ for _face_swap_array1_dim0\n", + "└↱ \u001b[36m_face_swap[_face_swap_array1_dim0, 1]\u001b[0m = \u001b[35mreduce(sum, [_face_swap_array1_interp_reduce], interp[0, _face_swap_array1_interp_reduce]*u[(_face_swap_array1_dim0 + 1) % nelements + (nelements if (_face_swap_array1_dim0 + 1) % nelements < 0 else 0), _face_swap_array1_interp_reduce])\u001b[0m {id=\u001b[32m_face_swap_array1\u001b[0m}\n", + " │ end _face_swap_array1_dim0\n", + " │ for _out_dim1, _out_dim0\n", + " └ \u001b[36m_out[_out_dim0, _out_dim1]\u001b[0m = \u001b[35m(-1)*reduce(sum, [_out_inv_mass_reduce], inv_mass[_out_dim1, _out_inv_mass_reduce]*(reduce(sum, [_out_face_mass_reduce], face_mass[_out_inv_mass_reduce, _out_face_mass_reduce]*(normals[_out_dim0, _out_face_mass_reduce]*reduce(sum, [_out_interp_reduce], interp[_out_face_mass_reduce, _out_interp_reduce]*u[_out_dim0, _out_interp_reduce]) + (-1)*((reduce(sum, [_out_interp_reduce_0], interp[_out_face_mass_reduce, _out_interp_reduce_0]*u[_out_dim0, _out_interp_reduce_0]) + _face_swap[_out_dim0, _out_face_mass_reduce]) / 2)*normals[_out_dim0, _out_face_mass_reduce])) + (-1)*reduce(sum, [_out_stiffness_reduce], stiffness[_out_inv_mass_reduce, _out_stiffness_reduce]*u[_out_dim0, _out_stiffness_reduce])))\u001b[0m {id=\u001b[32m_store_out\u001b[0m}\n", + " end _out_dim1, _out_dim0\n", + "---------------------------------------------------------------------------\n" + ] + } + ], + "source": [ + "with az.Context(queue) as ctx:\n", + " dg = DGOps1D(discr, ctx)\n", + " u = ctx.argument(\"u\", shape=\"(nelements, nnodes)\", dtype=np.float64)\n", + " ctx.output(advec_op(u))\n", + " \n", + "prog = ctx.build()\n", + "print(prog.program)" + ] + }, + { + "cell_type": "code", + "execution_count": 69, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[36m#\u001b[39;49;00m\u001b[36mdefine lid(N) ((int) get_local_id(N))\u001b[39;49;00m\u001b[36m\u001b[39;49;00m\n", + "\u001b[36m#\u001b[39;49;00m\u001b[36mdefine gid(N) ((int) get_group_id(N))\u001b[39;49;00m\u001b[36m\u001b[39;49;00m\n", + "\u001b[36m#\u001b[39;49;00m\u001b[36mif __OPENCL_C_VERSION__ < 120\u001b[39;49;00m\u001b[36m\u001b[39;49;00m\n", + "\u001b[36m#\u001b[39;49;00m\u001b[36mpragma OPENCL EXTENSION cl_khr_fp64: enable\u001b[39;49;00m\u001b[36m\u001b[39;49;00m\n", + "\u001b[36m#\u001b[39;49;00m\u001b[36mendif\u001b[39;49;00m\u001b[36m\u001b[39;49;00m\n", + "\u001b[36m#\u001b[39;49;00m\u001b[36mdefine LOOPY_CALL_WITH_INTEGER_TYPES(MACRO_NAME) \\\u001b[39;49;00m\u001b[36m\u001b[39;49;00m\n", + "\u001b[36m MACRO_NAME(int8, char) \\\u001b[39;49;00m\u001b[36m\u001b[39;49;00m\n", + "\u001b[36m MACRO_NAME(int16, short) \\\u001b[39;49;00m\u001b[36m\u001b[39;49;00m\n", + "\u001b[36m MACRO_NAME(int32, int) \\\u001b[39;49;00m\u001b[36m\u001b[39;49;00m\n", + "\u001b[36m MACRO_NAME(int64, long)\u001b[39;49;00m\u001b[36m\u001b[39;49;00m\n", + "\u001b[36m#\u001b[39;49;00m\u001b[36mdefine LOOPY_DEFINE_MOD_POS_B(SUFFIX, TYPE) \\\u001b[39;49;00m\u001b[36m\u001b[39;49;00m\n", + "\u001b[36m inline TYPE loopy_mod_pos_b_##SUFFIX(TYPE a, TYPE b) \\\u001b[39;49;00m\u001b[36m\u001b[39;49;00m\n", + "\u001b[36m { \\\u001b[39;49;00m\u001b[36m\u001b[39;49;00m\n", + "\u001b[36m TYPE result = a%b; \\\u001b[39;49;00m\u001b[36m\u001b[39;49;00m\n", + "\u001b[36m if (result < 0) \\\u001b[39;49;00m\u001b[36m\u001b[39;49;00m\n", + "\u001b[36m result += b; \\\u001b[39;49;00m\u001b[36m\u001b[39;49;00m\n", + "\u001b[36m return result; \\\u001b[39;49;00m\u001b[36m\u001b[39;49;00m\n", + "\u001b[36m }\u001b[39;49;00m\u001b[36m\u001b[39;49;00m\n", + "LOOPY_CALL_WITH_INTEGER_TYPES(LOOPY_DEFINE_MOD_POS_B)\n", + "\u001b[36m#\u001b[39;49;00m\u001b[36mundef LOOPY_DEFINE_MOD_POS_B\u001b[39;49;00m\u001b[36m\u001b[39;49;00m\n", + "\u001b[36m#\u001b[39;49;00m\u001b[36mundef LOOPY_CALL_WITH_INTEGER_TYPES\u001b[39;49;00m\u001b[36m\u001b[39;49;00m\n", + "\n", + "__kernel \u001b[36mvoid\u001b[39;49;00m __attribute__ ((reqd_work_group_size(\u001b[34m1\u001b[39;49;00m, \u001b[34m1\u001b[39;49;00m, \u001b[34m1\u001b[39;49;00m))) loopy_kernel(\u001b[36mint\u001b[39;49;00m \u001b[34mconst\u001b[39;49;00m nelements, \u001b[36mint\u001b[39;49;00m \u001b[34mconst\u001b[39;49;00m nnodes, __global \u001b[36mdouble\u001b[39;49;00m \u001b[34mconst\u001b[39;49;00m *__restrict__ u, __global \u001b[36mdouble\u001b[39;49;00m \u001b[34mconst\u001b[39;49;00m *__restrict__ interp, __global \u001b[36mdouble\u001b[39;49;00m \u001b[34mconst\u001b[39;49;00m *__restrict__ normals, __global \u001b[36mdouble\u001b[39;49;00m \u001b[34mconst\u001b[39;49;00m *__restrict__ face_mass, __global \u001b[36mdouble\u001b[39;49;00m \u001b[34mconst\u001b[39;49;00m *__restrict__ stiffness, __global \u001b[36mdouble\u001b[39;49;00m \u001b[34mconst\u001b[39;49;00m *__restrict__ inv_mass, __global \u001b[36mdouble\u001b[39;49;00m *__restrict__ _out, __global \u001b[36mdouble\u001b[39;49;00m *__restrict__ _face_swap)\n", + "{\n", + " \u001b[36mdouble\u001b[39;49;00m acc__face_swap_array0_interp_reduce;\n", + " \u001b[36mdouble\u001b[39;49;00m acc__face_swap_array1_interp_reduce;\n", + " \u001b[36mdouble\u001b[39;49;00m acc__out_face_mass_reduce;\n", + " \u001b[36mdouble\u001b[39;49;00m acc__out_interp_reduce;\n", + " \u001b[36mdouble\u001b[39;49;00m acc__out_interp_reduce_0;\n", + " \u001b[36mdouble\u001b[39;49;00m acc__out_inv_mass_reduce;\n", + " \u001b[36mdouble\u001b[39;49;00m acc__out_stiffness_reduce;\n", + "\n", + " \u001b[34mfor\u001b[39;49;00m (\u001b[36mint\u001b[39;49;00m _face_swap_array0_dim0 = \u001b[34m0\u001b[39;49;00m; _face_swap_array0_dim0 <= -\u001b[34m1\u001b[39;49;00m + nelements; ++_face_swap_array0_dim0)\n", + " \u001b[34mif\u001b[39;49;00m (-\u001b[34m1\u001b[39;49;00m + nnodes >= \u001b[34m0\u001b[39;49;00m)\n", + " {\n", + " acc__face_swap_array0_interp_reduce = \u001b[34m0.0\u001b[39;49;00m;\n", + " \u001b[34mfor\u001b[39;49;00m (\u001b[36mint\u001b[39;49;00m _face_swap_array0_interp_reduce = \u001b[34m0\u001b[39;49;00m; _face_swap_array0_interp_reduce <= -\u001b[34m1\u001b[39;49;00m + nnodes; ++_face_swap_array0_interp_reduce)\n", + " acc__face_swap_array0_interp_reduce = acc__face_swap_array0_interp_reduce + interp[_face_swap_array0_interp_reduce + nnodes] * u[nnodes * (loopy_mod_pos_b_int32(_face_swap_array0_dim0 + -\u001b[34m1\u001b[39;49;00m, nelements) + (loopy_mod_pos_b_int32(_face_swap_array0_dim0 + -\u001b[34m1\u001b[39;49;00m, nelements) < \u001b[34m0\u001b[39;49;00m ? nelements : \u001b[34m0\u001b[39;49;00m)) + _face_swap_array0_interp_reduce];\n", + " _face_swap[\u001b[34m2\u001b[39;49;00m * _face_swap_array0_dim0] = acc__face_swap_array0_interp_reduce;\n", + " }\n", + " \u001b[34mfor\u001b[39;49;00m (\u001b[36mint\u001b[39;49;00m _face_swap_array1_dim0 = \u001b[34m0\u001b[39;49;00m; _face_swap_array1_dim0 <= -\u001b[34m1\u001b[39;49;00m + nelements; ++_face_swap_array1_dim0)\n", + " \u001b[34mif\u001b[39;49;00m (-\u001b[34m1\u001b[39;49;00m + nnodes >= \u001b[34m0\u001b[39;49;00m)\n", + " {\n", + " acc__face_swap_array1_interp_reduce = \u001b[34m0.0\u001b[39;49;00m;\n", + " \u001b[34mfor\u001b[39;49;00m (\u001b[36mint\u001b[39;49;00m _face_swap_array1_interp_reduce = \u001b[34m0\u001b[39;49;00m; _face_swap_array1_interp_reduce <= -\u001b[34m1\u001b[39;49;00m + nnodes; ++_face_swap_array1_interp_reduce)\n", + " acc__face_swap_array1_interp_reduce = acc__face_swap_array1_interp_reduce + interp[_face_swap_array1_interp_reduce] * u[nnodes * ((_face_swap_array1_dim0 + \u001b[34m1\u001b[39;49;00m) % nelements + ((_face_swap_array1_dim0 + \u001b[34m1\u001b[39;49;00m) % nelements < \u001b[34m0\u001b[39;49;00m ? nelements : \u001b[34m0\u001b[39;49;00m)) + _face_swap_array1_interp_reduce];\n", + " _face_swap[\u001b[34m1\u001b[39;49;00m + \u001b[34m2\u001b[39;49;00m * _face_swap_array1_dim0] = acc__face_swap_array1_interp_reduce;\n", + " }\n", + " \u001b[34mfor\u001b[39;49;00m (\u001b[36mint\u001b[39;49;00m _out_dim1 = \u001b[34m0\u001b[39;49;00m; _out_dim1 <= -\u001b[34m1\u001b[39;49;00m + nnodes; ++_out_dim1)\n", + " \u001b[34mif\u001b[39;49;00m (-\u001b[34m1\u001b[39;49;00m + nelements >= \u001b[34m0\u001b[39;49;00m)\n", + " \u001b[34mfor\u001b[39;49;00m (\u001b[36mint\u001b[39;49;00m _out_dim0 = \u001b[34m0\u001b[39;49;00m; _out_dim0 <= -\u001b[34m1\u001b[39;49;00m + nelements; ++_out_dim0)\n", + " {\n", + " acc__out_inv_mass_reduce = \u001b[34m0.0\u001b[39;49;00m;\n", + " \u001b[34mfor\u001b[39;49;00m (\u001b[36mint\u001b[39;49;00m _out_inv_mass_reduce = \u001b[34m0\u001b[39;49;00m; _out_inv_mass_reduce <= -\u001b[34m1\u001b[39;49;00m + nnodes; ++_out_inv_mass_reduce)\n", + " {\n", + " acc__out_stiffness_reduce = \u001b[34m0.0\u001b[39;49;00m;\n", + " acc__out_face_mass_reduce = \u001b[34m0.0\u001b[39;49;00m;\n", + " \u001b[34mfor\u001b[39;49;00m (\u001b[36mint\u001b[39;49;00m _out_stiffness_reduce = \u001b[34m0\u001b[39;49;00m; _out_stiffness_reduce <= -\u001b[34m1\u001b[39;49;00m + nnodes; ++_out_stiffness_reduce)\n", + " acc__out_stiffness_reduce = acc__out_stiffness_reduce + stiffness[nnodes * _out_inv_mass_reduce + _out_stiffness_reduce] * u[nnodes * _out_dim0 + _out_stiffness_reduce];\n", + " \u001b[34mfor\u001b[39;49;00m (\u001b[36mint\u001b[39;49;00m _out_face_mass_reduce = \u001b[34m0\u001b[39;49;00m; _out_face_mass_reduce <= \u001b[34m1\u001b[39;49;00m; ++_out_face_mass_reduce)\n", + " {\n", + " acc__out_interp_reduce = \u001b[34m0.0\u001b[39;49;00m;\n", + " acc__out_interp_reduce_0 = \u001b[34m0.0\u001b[39;49;00m;\n", + " \u001b[34mfor\u001b[39;49;00m (\u001b[36mint\u001b[39;49;00m _out_interp_reduce_0 = \u001b[34m0\u001b[39;49;00m; _out_interp_reduce_0 <= -\u001b[34m1\u001b[39;49;00m + nnodes; ++_out_interp_reduce_0)\n", + " acc__out_interp_reduce_0 = acc__out_interp_reduce_0 + interp[nnodes * _out_face_mass_reduce + _out_interp_reduce_0] * u[nnodes * _out_dim0 + _out_interp_reduce_0];\n", + " \u001b[34mfor\u001b[39;49;00m (\u001b[36mint\u001b[39;49;00m _out_interp_reduce = \u001b[34m0\u001b[39;49;00m; _out_interp_reduce <= -\u001b[34m1\u001b[39;49;00m + nnodes; ++_out_interp_reduce)\n", + " acc__out_interp_reduce = acc__out_interp_reduce + interp[nnodes * _out_face_mass_reduce + _out_interp_reduce] * u[nnodes * _out_dim0 + _out_interp_reduce];\n", + " acc__out_face_mass_reduce = acc__out_face_mass_reduce + face_mass[_out_face_mass_reduce + \u001b[34m2\u001b[39;49;00m * _out_inv_mass_reduce] * (normals[\u001b[34m2\u001b[39;49;00m * _out_dim0 + _out_face_mass_reduce] * acc__out_interp_reduce + -\u001b[34m1.0\u001b[39;49;00m * ((acc__out_interp_reduce_0 + _face_swap[\u001b[34m2\u001b[39;49;00m * _out_dim0 + _out_face_mass_reduce]) / \u001b[34m2.0\u001b[39;49;00m) * normals[\u001b[34m2\u001b[39;49;00m * _out_dim0 + _out_face_mass_reduce]);\n", + " }\n", + " acc__out_inv_mass_reduce = acc__out_inv_mass_reduce + inv_mass[nnodes * _out_dim1 + _out_inv_mass_reduce] * (acc__out_face_mass_reduce + -\u001b[34m1.0\u001b[39;49;00m * acc__out_stiffness_reduce);\n", + " }\n", + " _out[nnodes * _out_dim0 + _out_dim1] = -\u001b[34m1.0\u001b[39;49;00m * acc__out_inv_mass_reduce;\n", + " }\n", + "}\n", + "\n" + ] + }, + { + "ename": "RuntimeError", + "evalue": "input argument 'interp' must be supplied", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mRuntimeError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[0mlp_prog\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mprog\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mprogram\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 4\u001b[0m \u001b[0mlp_prog\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mlp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mset_options\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mlp_prog\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mwrite_code\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mTrue\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 5\u001b[0;31m \u001b[0mlp_prog\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mqueue\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mu\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mu\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", + "\u001b[0;32m~/src/loopy/loopy/program.py\u001b[0m in \u001b[0;36m__call__\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 389\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_program_executor_cache\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mpex\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 390\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 391\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mpex\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 392\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 393\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m__str__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m~/src/loopy/loopy/target/pyopencl_execution.py\u001b[0m in \u001b[0;36m__call__\u001b[0;34m(self, queue, **kwargs)\u001b[0m\n\u001b[1;32m 356\u001b[0m return program_info.invoker(\n\u001b[1;32m 357\u001b[0m \u001b[0mprogram_info\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcl_kernels\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mqueue\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mallocator\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mwait_for\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 358\u001b[0;31m out_host, **kwargs)\n\u001b[0m\u001b[1;32m 359\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 360\u001b[0m \u001b[0;31m# }}}\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m~/src/pytools/pytools/py_codegen.py\u001b[0m in \u001b[0;36m__call__\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 197\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 198\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m__call__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 199\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfunc\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 200\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 201\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m__getstate__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m\u001b[0m in \u001b[0;36minvoke_loopy_kernel_loopy_kernel\u001b[0;34m(_lpy_cl_kernels, queue, allocator, wait_for, out_host, nelements, nnodes, u, interp, normals, face_mass, stiffness, inv_mass, _out)\u001b[0m\n", + "\u001b[0;31mRuntimeError\u001b[0m: input argument 'interp' must be supplied" + ] + } + ], + "source": [ + "u = np.sin(discr.nodes()).reshape(discr.nelements, discr.nnodes)\n", + "\n", + "lp_prog = prog.program\n", + "lp_prog = lp.set_options(lp_prog, write_code=True)\n", + "lp_prog(queue, u=u)" + ] + }, + { + "cell_type": "code", + "execution_count": 70, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[,\n", + " ,\n", + " ,\n", + " ]" + ] + }, + "execution_count": 70, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYIAAAD4CAYAAADhNOGaAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+j8jraAAAgAElEQVR4nOydd1gUZ9eH79kCLEU6SBURRAFBxYq9o8bejYmmx1RLTDQx3fTEFEsSY/Q19thib4hYsBdAikrv0nvdMt8fS5Akms8YDCpzX9deujNnZs6zMXNmfuc85xFEUURCQkJCoukia2wHJCQkJCQaFykQSEhISDRxpEAgISEh0cSRAoGEhIREE0cKBBISEhJNHEVjO3A32NjYiG5ubo3thoSEhMQDxcWLF/NEUbT98/YHMhC4ublx4cKFxnZDQkJC4oFCEISUW22XpCEJCQmJJo4UCCQkJCSaOFIgkJCQkGjiSIFAQkJCookjBQIJCQmJJk6DBAJBEFYJgpAjCELUbfYLgiB8JwhCvCAIkYIgdKy3b7ogCHG1n+kN4Y+EhISExJ3TUG8E/wOC/mb/UMCz9vMs8D2AIAhWwLtAV6AL8K4gCJYN5JOEhISExB3QIIFAFMXjQMHfmIwCfhH1nAEsBEFwAIYAh0VRLBBFsRA4zN8HlH/FldB4rhxLpapMfa8ucd+iq6igZP9+Ctavp+r6df5p+/EbxVVsu5jOujMp5JRW3SMvJSQkboVOpyM5OZn9u/ei1Wob/Pz/1YQyJyCt3vf02m232/4XBEF4Fv3bBK6urnflxKktP1FekEmocTda+HendRcH3P1tMVA9kPPq/l90NTWUnzhB8d59lIQEI1TV1O0rN1WQ3daOUr+WiB19MW/RClvk2Gt12NZUIRZlk5WeRElOKrqSLJqp8xkkFFCCCUt2jyClxViGtncjyKc5liYGjThKCYmHE1EUycjIICoqiqgrUZSVlyEXZbTz9MG5jVuDXuuBuQOKorgCWAHQqVOnu1pNZ8CwkUQEHyA5cz8J586QfLkrSmNv3NrZ4tHJDjc/G5QG8gb1+79GVKspP3OWkn37KA0ORldaSoWxnJNtdVxvb01rByeMY29gGVeMc2wW7ucz4ecwcszhXAuBK24CUS0EdCoRW60WcwMZFtaGWCttcDRpS4fSXD7MWk1O1i6WpIzgw9/60dXTkRH+jgzytsfMSNnYP4GExAOLKIpkZ2frb/5RURQVFSETZDhrrOgsuGFr14zmzo4Nft3/KhBkAC71vjvXbssA+v5pe+i9cqJZmildVYPpOHQIJ2O2kJNyAJlwgZTIriRc9kRppKSlnw2enexw9bZGrnwwiqpErZaKixf1N/+Dh9AWFqIzNiLCW8W+VjIKfBx50tKXF85tQpl4BQxB7GpLeU8bbhSZkpGspSa9ksCrZfSP1L925tqakOJlQYKHAZecqjkhFqBRF4IRTA2cxqtp1/kw7X/MU+5lZcZoFlzrwXyFEf297Bjh70j/NnaoHvCgKiHxX5Gbm0t0dDRRUVHk5eUhCAIuKnv81G1xk9lj2MaMo+fXcOFaIpZDWuDg6dWg1xcaaqlKQRDcgD2iKPreYt9w4CVgGPrE8HeiKHapTRZfBH6vIroEBIii+Hf5Bjp16iTeTa+h9KRUSo6l0eyqDqWTKWXeVYQd2khuciKmVnZYu/ajKNeV6gotBioF7u1t8Oxkj1MbS+Ty+ysoiKJIVWQkJfv2UbL/AJqcHASVisquPvzmlsdu2zTsLJx5tvVkRkTuQxl3kGqXnmxvPouDWSrOpJRSpdYhE6CdkzmBHjb0cLPEtyob7YVzlJ8+Q8XFi4hVVSCTYeTjjRDgxwG3EpZVH6CleUs+aTken4sbIPU0Ncb2BFtN5aMbXcgoEzE2kDPI254Rfo70am2DoUIKChIS9SksLKy7+d+4cQMAV0cX3LX2OKWqMFYaYdy1OdeKz3Fq90ZMLK3w7v04PSf1QxCEu7qmIAgXRVHs9JftDREIBEHYiP7J3gbIRl8JpAQQRfEHQe/1UvSJ4ArgCVEUL9Qe+yTwZu2pPhJFcfX/d727CQSiKLJ69WoyMzMZ2rE/DhcEdDU6zIe6kW2QxumtG8lJTsDcvjmeXYejrvYgKbKAmiotRqZKWnW0w7OTHQ4eFshkd/cf4d8iiiLV165RsncfJfv2oc7IQFAqMendm/RubiwzPktE2VWcTJ14zu85HqmRodw7G6rLSOn4BmMv+pJfoaG1vSmBrWwIbGVNV3drzFW3lnN0NTVURURQfvoM5WfPUhkRATodFS9M5nW7oxRUFfK8//M8ZeqJ4tgXkHoK0cyBZK9nWF3dh93RBRRWqDEzUhDk05wR/o4EtrJGcZ8FVQmJ/4qSkpK6m39GRgYAzs7OtG3RGucsU+Sx5QhKGabdHdF4ytj/8zdkJ8bRulsfKiu7kZuqZtzrATR3N7+r69/TQPBfc7dvBOXl5WzevJnU1FR6d+tJu0x7qq8XYehpgeU4T1ISIji1ZQM5SfqA0HnkBEws/Ui4nE9yRB4atQ4bF1MeeckfE3PDezCy26OrrCRj7muUhYSAXI5JYCDNhg0lvI0hy+P/R2xBLM6mzjzr9yyPOPVBefhtuLwOmvtxwOtDXg6uwMXSmBWPB+BhZ3ZXPmhLSsicv4CykBCMJ45led9q9qUdxM/Wj497fESLglQI/RRSwsC0OdrAVwkzf4Sd0YUcir5BabWGljYm/O+JzrSwNmngX0hC4v5FFEXCwsI4cuQIoijSvHlzfH198bJviex8CZVX8hCUckwDHTHp6ciVkwc5vv5/KAwM6DbuKaJOqqgqVTNghjceAXZ37YcUCGrRaDTs3r2biIgIfHx8GOjYjYoDqaCQYTnaA5WfDYmXznN66wayE+Mxt7On65hJeHTpTXJkIcc3X8fYTMnIV9tjbmvcwCO7NdqiItJmvkBlRAS2r76K+YTxnCgL54eIH4gtiMXFzIVn/Z5luPtwlGnnYcdzUJyO2GMOn1eP4vsTafTwsGb51ADMjf9dMlfUaslZvJiCn1dhEtid2FnDeT/6KzQ6Da91eo0JrScgJJ+sDQgnwdQeesyiyv9xjsSXsvC3K8hlMtY82Rkfx7t7qpGQeJDQ6XQcPnyY06dP4+3tTb9+/bDQGlMSknozAPRwxLSnExXVxRz8/ltSIi/j1j4A796PceLXDAyM5Ax7wQ+7Fs3+lS9SIKjH79E5ODgYJycnxg0ejWZPBjVppaj8bbEc1QpBpSDp8gVObdlAdmIczWzt6TpmIrYtOrPv+2gEGYx4uT22rnf3dH2nqLOzSXv6aWqSU3D48gsutFXwQ8QPXC24iouZC8/5Pcdw9+EodFo4+hGEfQeWblQ+spxXwgw4HJPNo11deW+kD8oGlGSKtm0n6733MHB2RrX4Q95L+5HTWafp5dSL9wPfx9bYFn4PCMknagPCqyS4TuCxX65QWqXhp+md6OZu3WA+SUjcb2i1Wnbu3ElkZCRdunRhYIfelIak6QOAof4NwLSnE3ITJVdPHefIyuVoNGr6THsKUfDh1PYEbF3MGDbTD1PLf69CSIHgFly9epVt27ahUqmYMmkKJlfVlBxJRW6qxHJCa4w8LRFFkaTwC5zeupEb8dexc2vFoGff5ODKeKorNQyf6YeT172ZDF2dmETq00+hKy6h+ZJvWVi5idD0UFzNXHnO/zmGtRyGQqaA7GjY/ixkR0HADLK6vs2TG2O5dqOEtx/xZkag210nl/6OivPnSX/5FURRxOm7b9jVLJHFFxejUqh4p/s7DGoxSG+YHAbHPoWk42Dpxo2x25m2JZ3Uggq+m9yBIN/mDe6bhERjU1NTw6+//kp8fDz9+/envcydkj1J+gDQwxGznk7IjJVUlZVxZNX3XA07RnOP1gx5fjZXQsuICcuiVQdbBjzh3WBl7VIguA1ZWVls3LiRyspKxo0bh7upEwWbr6HJrcS0hyPmQW4ISjmiKBJ3Nox9S77E1s2doS+9zYEV1ynOrWDwUz606nD3ut2tqLxyhbRnnwNBwHHF97ybt4ZDKYeYGzCXad7T9AFAp4XTyyDkQzAyh5FLuazqyjO/XKRarWXJ1A709WpYv/5MTWoqaTNfoCYlhebvvkPh4AAWnFhATH4MI1uNZH6X+ZgZ1L41JYbCpmlgZk/RpJ3M2JJCZHoRH49px+QudzdJUELifqSiooINGzaQkZHB8OHDaaNzomh7PEbe1liN90RWK9GmXAnnwPffUF5YQPfxU/AfOIZDP8eQcb2IgKEt6DrCHaEBi1OkQPA3lJaWsmnTJjIyMhg4cCCBXbpRciCFslOZKOxUWE30wsBZfzOLP3+GXYs/xsnLm2GvLOTQyqtkJ5XQZ6oXPr1uOSn6H1MWFkb6y6+gsLTEeeVPfJixkl0Ju3it02tM96nty1eUCjtm6nX4No/AiG/ZFV/Da1sisG9myKrpnfG0v7ey1e9oS0vJmD2H8pMnsZo+HcvXZvFT9M/8FPkTdsZ2fNTzIzo376w3TjkNa8eAdSsqpv7GzG1JHLuey7whXrzQt9U9eXORkPgvKS4uZt26dRQUFDBu3Djcamwo2HwNo9aWWD/mjaCQoa6p5uSGNVzavwtLR2eGvTQXQxNH9i6LpLSwiv6PtcWra8O/KUuB4P9BrVazc+dOoqKiaN++PY888giapFIKt1xHW6am2UBXzPq4IMgFYsOOsW/Jl7j5d2TYywsIXn2N1Oh8uo5yJyCoxb+6mZXs30/G629g6O6Oy4oVfJ70E5uubeIF/xeY2X4miCJEbIR9r+sPGPoZov8Uvj4Sz3dH4ujsZskP0wKwNv1vq5pEjYbsTz+jcN06TPv0wfGrL4mqTODNE2+SVprG496P83LHlzGUG0JCCGyYBM3boX50B/N2JfJbeCZP9mjJwuFtG608V0Li35KXl8fatWuprKxkypQpNK8wI399LIZu5tg84YOglJOdGM++pV9RkJFGh6AR9Jo6nRuJFRxYEYVcITBspt9dl4f+f0iB4A4QRZFjx44RGhqKq6srkyZNQiUYULgzgcqIXAxczbCa6IXCRsWVkEMc+vE7PLsEMvTleYSuv871s9n49Xem53jPu3qdK9iwgewPF6EK6IjzsmV8F/8zq6NWM8NnBnMC5iBUFMCeVyF2N7gGwpgfqDJ1Zu6WCPZGZjGuozMfj/Vt1MlbhRs3cmPRRxi6u+P8/fdo7C1ZfHExm69txsPCg097fYqXlRdc3Qebp4FrN3RTt/DhoWRWhyUzur0jX0zwb9DEtoTEf0FGRgbr169HEASmTZuGZakReWuiMXA0xeZpX1AKnN+5jVNb1mPczJwhL8zGza8DUcfSOb45Dsvmxgx/0Y9m1qp75qMUCP4BUVFR/Pbbb5iamjJ16lTs7OyoiMihcEcCgkLA7qUOKCwMubRvJ0fX/ETbXv0Ien4Wp3YkEnEkDc/O9gyY3ha54s5uZqIokrd0GXnLlmHarx9OXy/mp2trWBq+lImtJ7Kw20KEmnJYNQTyrkP/hdD9JXLK1DzzywUiM4p5I6gNz/V2vy+klbKwMDJmzUZQKnFeuhTjjh04kX6Cd069Q4W6gnXD1uFp6QlXtsK2p8FjAOKk9Sw/mc4XB6/R18uW5Y92xNjggWmFJdHESUhIYNOmTZiYmPDYY49hWqIgb1UUChsVts+0Q2as5MiqHwg/uAevwN4MeGomhioTTm6N58rRdFq0s2bwUz4YGN3bf/NSIPiHpKens2nTJtRqNePHj8fT0xN1TgU5y8JRWBth+7w/MgM5Z7ZvJmzzWvwGBjHgqRcIP5zG6R0JuHpbEfRcO5SGf/90Lmq13Fi0iKKNmzAfMwaHDz9g7bUNfHHhC0a4j2BRz0XIRODXx+DaPnh0C3gMJCqjmGd+uUBxpZqvJ7VniM/9VXlTnZhI2syZaDKzcPhoEeYjR3Kj/AZT9k7BUG7IpuGbsDCygEtrYddL+jzHhDVsupjJmzuu4O9iwarpnaXOphL3PVFRUWzfvh1bW1umTZuGYRHkrryCvJkBts/5ITc1IDL4AId/WkrA8NH0ffxpqis1HPopitSYAvwHuhA41uM/kURvFwik9+/b4OzszDPPPIOlpSUbNmzg7NmzKGxVWE1pgzqrnMKt+p7+XcdMpMuo8UQGH+D4ulV0GOxKv8fakBZbwM5vLv/t2ge6mhoy5r5G0cZNWD/9FA4ff8TWxB18ceELBrUYxAc9PkAmyPSll1f3wOBF4DGQg9E3mPDDaQRgy/Pd77sgAGDo7o7bpk2oOnQg8/U3yPn6G+xVdnzT7xtyK3KZe2wuap0aOj4GQZ/px/fbTCZ3cmb5owFEZ5Yw4cfTZBZVNvZQJCRuy7lz59i6dSvOzs7MmDEDwzIZuauikJkosX26HXJTA9Jjojiy6nvc2gfQe9oTFOdWsu2zC6RfLaTftDb0HO/Z6Hkx+XvvvdeoDtwNK1aseO/ZZ5+959cxMjKiXbt25ObmcubMGcrLy2nT1Re5oYKyk5kIMgEjdwtcff2pLC3l8v5dAHQM6oG1sylXQjNIDM/Fzc8Gwz+teaAtKyf9hRcoDw3F7vXXsX3xRfYm7eXdsHfp6dSTxX0Wo5QrIXoH7JsH7achDniH748lsGD7FXyczNnwTFda2pje89/hbpGpVJgPH44mL4/CX9ZSfT0Oj2GTcLR0ZW3MWoqqi+jt3BucO4FMCWe/h7IcPHqOo5ObFZvOpbIzPIM+XnZYSW8GEvcRoigSGhpKcHAwXl5eTJkyBVmJlryfriBTyLB91g+FpRHFOdlsXbQQUysbxi14n9zUKnZ9E466WsvwF/1p1fHelnf/mffffz/rvffeW/Hn7VIg+H9QKBR4e3uj0Wg4e/YsN27coOOw7mgLqikLy0TZ3ASlvQkt/TtSkp/LpX27UBoZ4dunEw4e5sScyOT6uWxcfaxRmelvZpqCAtKefIrKiEgcPvkYqylTOJJ6hPnH5xPQPIAl/ZdgqDCErAjYMBmcAmDiGj7aH8eSkHhG+Duy4rEAzFX3/81RkMsx7dcPmZkphWvXUn7mNF2mz6NKrGF97HpsVDb42PhAi0DQ1OiDQU0ZLp2G08fLlm2XMth8PpVu7tY0Nzdq7OFISKDT6di3bx+nT5+mffv2jB07Fko05P0UCTqwfdYPpY2KmqpKti1aSFVFGRPe/pjCGwK7l0RgYmHI6Nkd/nW7iLvhdoFAkobuAJlMxqBBgwgKCuL69eucPHkSy7GeGLiYUfDrNWqyyhFkMgY/9zKtu/fi+LpVhB/ah1NrS8a81hGtVmT7lxe5kVRMTXoGKVMfpTohAedlS7EYPZqwjDDmHZuHj7UPS/ovwUhhBGU5sHEqGFvBpHXsiy1g5ckkHu/egu8mt8dI+eC0dRYEAesZM3D6ejFVEZHkfPY5szrOoqdTTz45+wnnb5zXGw54B7o8B6eXQuin+Dias21md8yMlEz96QzHr+c27kAkmjwajYatW7dy4cIFevTowahRo6BcQ+7PV9BV67B5yhelnTGiTsf+pYvJS0vlkVffwEBlzeFVMVg2N2Hc6wFY2P83fcruFCkQ/AO6du2Kr68vR48eJSUjVT85xEhB/ppotGU1yGRyhr00B/eOnTny83Kijx3BxtmMcfMCMDRWsvOrS1x85m00BQW4rvoZs759uXDjArOOzsLd3J3lA5djojQBTbW+tLIiHyZvIKXahDe2RtLexYKFw73vi8qgu6FZUBBWTzxB4YYNlB86zOe9P8fZzJk5oXNIL00HQYCgT6HDNH1eJOxbWlibsPX57rSwNuGpNefZFZHZ2MOQaKJUV1ezfv16YmJiGDx4MIMGDUJXriZ3ZRS6UjU2T/pg4KiXak9t3Uj8+dP0eewpXH3bc+jnaLQaHUOe8cHI5P5bxU8KBP8AQRAYMWIElpaWbNu2jSq5GpvHvdGWqclfH4uo0SFXKBkxewGuvn4c/P5brp8Nw9xWxeiZXqjKsohwnojlktUYd+zIldwrvBTyEg6mDvw46EfMDc31E8b2zIG0szB6OdV27XhxwyUEAZZO7YDBHZak3q/YzZmNyt+frLcWYnijkKUDlqIVtbwc8jLl6nKQyWDEd+AzFg6/A+d+wq6ZEZuf60YHV0vmbA4nMr2osYch0cQQRZHdu3eTnJzM6NGjCQwMRFepIW9VFJqCKqyne2Poqpd6rp0+yZltG/HpO5COw0ZydncSWfHF9H3UC8vm92f79Qf7rtIIGBoaMmHCBCoqKti+fTsKRxOsxntSk1RC0a4ERFFEYWDAqHlv4+Dpxd5vvyDx8nlKvv6UdhHLkRsbciyknNjcqzwf/DwWhhb8NOgnrFW1XTjP/gDh66D36+A7lk/2XSUqo4QvJ/jjbHl/vU7eDYJSidPir0ChIGPWbFyMHPiyz5ckFiey4MQCdKIOZHIYuwJaD4V9r0H4RpoZKfnpsU7YmRny6qZwyqs1jT0UiSZEZGQkUVFR9O3bl/bt26Or1pK3Ogp1dgXWj7XFqJUFANlJCRxY/jWOrdsy8OkXSY0p4NKBFLx7OtK6y/1X3fc7DRIIBEEIEgThmiAI8YIgzL/F/q8FQQiv/VwXBKGo3j5tvX27GsKfe42DgwNDhw4lISGBsLAwjNvbYdbXhfJzNyg/kwWAgZGKMfPfxcalBbu+WERSyGGcn5lK/+k+5KSUsuynzRgpjFg5eCX2Jvb6E8cfgYNv6mvq+y5g/5Us/ncqmad6tmTwfVgierconZxw/ORjqmJiyPnscwIdA5nXaR5H046yLHyZ3kiuhAn/g5Z9YOcLELMTc2Mliye1Jzm/nPd3RzfqGCSaDgUFBezduxdXV1d69eqFqNaR/0s0NWmlWE1ug8rLCoDyokJ2frEIlVkzRs59k6oyHcGrY7B2MqHXRM9GHsXf868DgSAIcmAZMBTwBqYIguBd30YUxdmiKLYXRbE9sATYXm935e/7RFEc+W/9+a8ICAjAx8eHkJAQUlJSaDa4BUZtrSjanUBVvD7OGZmYMvKJ51FVVHHBw4ma/n0xaq0m2fEybVJ78lmrJTibOetPmBcPW58A27Yw5kdSC6t4fWsk/i4WvBHUpvEGeo8w698fqxkzKFy/npIDB3m07aOM9RzLisgVHEg6oDdSGsGUjeDcBbY+BdcP0c3dmhf7evDrhXT2RmY17iAkHnq0Wi3btm1DEATGjh2LoIP89bFUJxRjOaE1xu1sANCo1ez66mMqS0sYNW8hKjNzDv0chUatY8gzvigaqI30vaIh3gi6APGiKCaKolgDbAJG/Y39FGBjA1y3UamfL9i6dSsVlRVYTfJCYWtMwYZYNPmViGo1Be++R7cbxZja2LL9s/eYtelpzrrvxsRGSdSvhfoJZ1XFsHEyyBQwZSPVchUvbazNC0x58PMCt8NuzmyM/PzIWrgQdXo6b3V9iw52HXg77G2i82uf+A1M4NFfwd5bP7s66TivDvSkvYsFC7ZHkiFNOJO4h4SGhpKRkcGIESMwb2ZOweZrVF0twGK0ByYd9W/yoigSvHIZmddjCXphNvYtW3Hu97zA1Ps3L1CfhrjDOAFp9b6n1277C4IgtABaAiH1NhsJgnBBEIQzgiCMvt1FBEF4ttbuQm7u/VFGaGRkVJcv2LFjBxjIsHlc/zKUtyaGnKU/UBURSct332XcOx9TRiV+J+CbXp/zyLMdqCyt4ei6WMQtT0JhEkz8BSxb8Mm+q0SmF/PFBH9crB78vMDtEAwMcFq8GGQyMmbNRqGFxX0XY2Fkwashr5JXmac3NDKHaTvAsiVsehRlWRbfTm6PVicye1M4Wt2D1yZF4v4nOTmZEydO0L59e3x9fSk5ol9a0nxYS0y7OdTZXdq3i+jQYLqNm4JX956kRudz8UAK3j0c7kkr6XvBf/2oORnYKoqitt62FrW9L6YC3wiC0OpWB4qiuEIUxU6iKHaytbX9L3y9IxwcHAgKCiI+Pp6wsDAU1iqsHm2LJreCyggZzUaPodnQoezI3c/B9pkY1yjI2XcKW1czuo1qRWJ4HjFXBBj2Jbj15ECUPi/wZI+W92XriIbGwLk2XxAdTc7nX2CjsmFJ/yWU1JQw6+gsarQ1ekMTa71MpNPA7ldoYWXMh6N9OZdcwPeh8Y07CImHjsrKSrZv346VlRVDhw6lJqOM0qNpGHeww6y3c51dcsQljq39GY/O3QkcP4WywmoO/54XmNS6EUfwz2iIQJABuNT77ly77VZM5k+ykCiKGbV/JgKhQIcG8Ok/pVOnTn/IFyhtBGoS96Bo7odx4AwSixJZdnkZHfx602XEeKJDg0m8fJ72tmE4G4RzsvxZCp0nkppfwbytkfg7mzN/6MOXF7gdZgMGYDV9OoXr1lFy8BBtrNqwqMciInIj+OD0B9Q1RrRqCQPfh/hguLyOMR2cGOnvyNfBcVxKLWzcQUg8NPxeKlpWVsa4ceMwkCsp3HIdmYkCixHudXYFmRns+fYzbFxcGfrSHEQRDq+KfmDyAvVpiEBwHvAUBKGlIAgG6G/2f6n+EQShDWAJnK63zVIQBMPav9sAPYCYBvDpP+X3fIGFhQVbt24l6f33qY7eh6GHAeWnstm6Yw3GSmPe6vYW3SdMxdrZlcPLF1O9czYD251DYWTEwZXRvLL+IgBLp3Z8aPMCt8Nu7py6fEFNWhqD3QYz038mOxN28kvMLzcNOz8NLXrCwTcRSjJZNMYXB3MjZm0Kp7Tq9g3+JCTulMuXLxMTE0P//v1xcnKi5Gga6hvlWI65ucRkVXkZv33xITKZnFHz3sbASMW5PUlkxhU9MHmB+vzru40oihrgJeAgEAv8KopitCAIHwiCUL8KaDKwSfxj3+u2wAVBECKAo8Cnoig+cIEA9PmCiRMnUlFWxpGyMmxefhmbGZ0psKtg3PXefNhyITYqGxRKJUGPP0p5SQmheW0xeXQ5/R9vS356GRYJFXwx/uHOC9wOfb7gKxAEMmbPQVdTw/P+zzOoxSAWX1zMyYyTekOZDEYtrZOImhkq+GZSe9ILK3h3p1RSKvHvyMvLY//+/bi5uREYGPgHSUjlrZ/ro9Np2fvdFxRnZzFy7puY29mTGqPPC7R9gPIC9WmQx05RFORjtiYAACAASURBVPeJothaFMVWoih+VLvtHVEUd9WzeU8Uxfl/Ou6UKIrtRFH0r/3z54bwp7Gwrq6mfUQEWY6OXPNuS1JZMq9YfkSVkZrWRyzQllSDupLmZ96ks10O0bkmJF5L4Jpcy2UDDZ2rlfjI7v9GcvcKA2dnHD/+iKqoKHK++BKZIGNRj0V4Wnjy+rHXSSpO0hv+SSLq5GbFKwM82X45g53ht1MlJST+Ho1Gw7Zt21AoFIwZMwZBxy0loePr/0dy+EUGPDUT57a+lBdVE7w6BiuHBysvUJ+mpT/cQ8SaGjJem4dnRiZt3d05EhLCogOL0KpEbKf7IlZpyPslBnHHK5AVQffn38Xa2ZUD33/HW5vPke+hwqK5McH/i6GytKaxh9NomA0ciNX0xylcu5aSw4cxVhrzXf/vUMqVvBLyCsXVxXrDehIRxem81M+DTi0sWbgjirSCisYdhMQDydGjR8nKymLkyJGYm5vfUhKKPnaEi3t20CFoBH4DgtBpdRz6ORp1tZYhz/iifIDyAvWRAkEDkbtkKVVRUTh++AGjJk5EbizHJs6Gef7zsHNzxmqSF+r0MgojHGDAOyh8HmHAc69SUVxE5+wTfDctgCFP+1JdoSHkl1jEB3DluIbCbu5cjNq1I+vNt6hJT8fR1JHFfReTXpbO/BPz9b/NHySiV1HIBL6e1B6AWZvD0Wh1jTwKiQeJxMREwsLCCAgIoG3btreUhG4kxHF4xRJcff3p+/jTAJzfm0xmXBF9pnph5fBg5QXqIwWCBqD8zFnyV67EYsIEmg0eTGZVJiGWIah0KkovlaLT6VA1L8ZMuZUK7SCq7GcAsPqalgvmHfAsjkWdEo2Nsyndx7Qi+Uo+UcearsQhGBjg9PViADJmz0GsqSHAPoB5neZxMuMkexL36A3/JBG5WBmzaIwvF1MKWRIilZRK3Bnl5eXs2LEDGxsbhgwZgqjR/UUS0mm1HFqxBJVZMx6ZPR+ZXE5aTAEX9ifTNtCBNvXmFTyISIHgX6IpLCTzjTcwcHPDfsF8NDoNC8MWojZV02dgH+Li4jh96hTsnUsz1W4UVkqKdiZwKDKTVWFJtBk6Tl9F9OMSqsrL8OvvjKuPNWHb4snPLGvs4TUaBs7OOHz8EVVXrpD95ZcATG4zGT9bP7688OVtJaJR7Z0Y29GJJSFxXEguaMQRSDwIiKLIrl27KC8v15eKGhhQEpL6F0no8oE95CYn0u+J51CZmlFeVM3h1dH6vMDkBzMvUB8pEPwLRFHkxjvvoCkowPHLL5AZG7Mmeg1X8q7wZtc36RfYD29vb4KPBJOaeBVhwHwsxnihya/i8uartHMyZ8EIX4JemE15cRGha1YiCAIDprfFwEjO4Z+j0ai1/78jDynNBg3C8rHHKPxlLaXBwcgEGe90e4fi6mK+ufSN3qi+RLTrFRBFPhjli7OlMa9uCqe4Uioplbg9Fy9e5Nq1awwcOBAHBwe9JBT6R0moND+PsF/X0bJDJzy7BD40eYH6SIHgX1C0ZQulh4OxmzULlY8PCUUJLAtfxkDXgQS5BSEIAiMH98GCUrbKRlPhMwVZS3POG8NErYJlQd4YKuQ0b+VJl1HjiT4WTOKl8xg3M9CXlGaUc3p7QmMPs1Gxm/caRr6+ZNbmC7ysvHi07aNsvb6V8JxwvdHvElHCEbi8DlNDBd9Obs+NkioW/hbVpPMtErcnJyeHAwcO0KpVK7p161ZPEjL4Q5XQ0TUrELVaBjz5PIIgPDR5gfpIgeAuqU5MJPvjTzAJ7I7VEzPQ6DS8HfY2JkoT3ur2Vt0qYkZhXzBB3EM5xuzYuYtP98fyYUUpMqUc1bGMuptUt3FTsHFpweEVeonIrZ0Nfv2ciTyaTvKVvMYcaqMi+z1fIIp1+YIX2r+AvbE9H5z5ALWu9om/89Pg1qtOIurgasmcQa3ZHZHJ9ktNN98icWt+LxU1MDBg9OjRyGSyepKQR50klHjpPHFnT9Ft3GTM7ZqTFqvPC7R5CPIC9ZECwV2gq6khY+5ryIyMcPjkUwSZrE4SeqvrW9io9K1pybgI51fi2GUUg4cMIS4ujrNnTjMysAXWw1tSHV9EZYS+gZ5CqWTIzFl1EhFA97GtsHYyIeSXWCpKmm5JqYGLCw4fLaLqyhVyvlqMidKEBV0WEFcYx4bYDXojmQxGLvmDRPR8n1Z0bWnFOzujSM4rb9xBSNxXBAcHk52dzahRozAzM7ulJKSuruLIqh+wcnKh04gxlBdXc3iVPi/Q+yHIC9RHCgR3Qe7X31AdG4vDxx+htLf7gyQ0xG2I3kirgd2zwNQe+r+Ft39HbgjWBCgzmdndHpMuDihdzCjak4iuQv9U+2eJSKGUM+gpH2qqtBxZE4PYhLtsNhs8GMupUylYs4bKyEj6u/anj3MfloUvI6usdl2CP0hEa5HXlpTKZQKvbg5HLZWUSgBxcXGcOXOGzp074+XlVSsJXfuLJHRm+2ZKcrMZ9PSLyBVKwrbGU1OpZcjTD0deoD5SIPiHlJ0Mo2D1aiymTMasf//bSkKcXwk3IiHoEzAyZ+nReI5XOqNUyDl6JBhBJmA52gNduZriQyl15/+DRFRWhrWjKT3GeZAaXUDk0fRGGvX9ge2c2chtbbix6CMQRRZ0XYAoinx67tObRnUS0VtQnI6jhYpPx/kRkVbEN8HXG895ifuCsrIyfvvtN2xtbRk8eDBArSRU8QdJKC8thQu7t+PTZyDO3r5kxhURdz6bDkNcsXJ8OPIC9ZECwT9AU1BA5oL5GHi0wv711wFuLQmVZEHIImg1AHzGkJhbxqqTSQwLcKdvn95cvXqVhIQEDJxMMQ10pPxsFtWpJYBeIqqrIvpFLxH59nHCzc+GUzviyUsvbZSx3w/ITU2xmzuXqshIin/biZOpE8/7P09IWgihaaF6ozqJSFsnEQ1r58CkTi4sD03gTGJ+o45BovEQRZGdO3dSVVXF+PHjUSqVt5SE9AvNLMdAZUzvaU+g04kc33wdUytDOg5p0cijuDdIgeAfcOODD9EVFeP01VfIVKo6SWhQi0E3JSGAgwtAWwPDv0QEPtgTg6FCzutBXnTv3h1LS0v279+PVqul2eAWyM0MKNoRj6jVSz/27h50GTWhTiISBIH+j7XByFhJ8OoYdE1Y4jAfORKVvz85X32FtrSUx30ex8PCg0/OfkKFura1hFVLGHRTIgJ4Z4Q3btYmzNkcTkWNtPB9U+Ty5cvExcUxePBg7O3tbysJRR87QsbVaHo/+gTGzcyJOZFBfnoZPcZ5PnSS0O9IgeAOKT97jtIDB7Ce+TxGXl5/kITe7PrmTUkoLhiid0DveWDlTsjVHEKv5TJroCd2ZkYoFAqCgoLIy8vj7NmzyAwVmI9ohTqrnLLTmXXX6zZu8h8kIpWZAX2meJGfUU70iczbePnwI8hk2C9ciLaggLxly1HKlCzstpDM8kx+jPzxpmGnp/4gEZkYKvhivB+ZxVX8cCyx8QYg0ShUVVURHByMq6srXbp0AW4tCVWWlnBs3Socvbzx7TuQqjI1Z3Yl4uRlQauO98+CWA2NFAjuAFGrJfuTT1A6OmL95JPAbSQhdSXsmwvWntDjFarUWj7YE0MrWxMe7+5Wdz4vLy88PT0JDQ2ltLQUla81Rl6WlBxKQVNcDfxZIvoJgJbtbXDysuDc7iSqypvuRClVO18sxo+jYN06qhMSCLAPYIzHGH6J/oW4wji90S0kok5uVozwd+THYwmkF0qN6ZoSx48fp6KigqAg/fyeW0lCoO8sWlNRzsCnX0CQyTi7K5GaSi29Jra++bD3ECIFgjugaOs2qq9exW6evmT0tpLQia+gMBkeWQwKQ34+mURKfgXvjfT5y0IzQ4YMQaPRcOTIEQRBwGKUB6JOpHj3zQlkNyWiI3USUc8JramuUHN+T9J/NPr7E9vZs5GpVGR/9BGiKDI7YDamBqYsOrMInVgrnd1CIpo/tA2CAJ/uv9qI3kv8l+Tn53PmzBk6dOiAo6PjbSWh9KvRRB09RMDw0di6upGbVkr0iQza9XHC2sm0EUdw72mQQCAIQpAgCNcEQYgXBGH+LfbPEAQhVxCE8NrP0/X2TRcEIa72M70h/GlItKWl5H77LapOAZgFBel7CZ1cqK8S6lqvSij3Opz8BvwmQ8veZBVXsjQknsHe9vTy/OsrpY2NDd27dyc8PJy0tDQUVkY0G+BKZVQ+lVdv9sj5s0Rk42yKd09HrhzLoCCr6dbGK6yssH35ZcpPnaY0OBhLI0vmBMzhUs4ldsbvvGn4J4nIyULFs71bsScyi/NSL6ImwcGDB1EoFPTv3x+oJwmNvSkJaTUagn9ahpmNLd3HTUEURU5svo6hiZIuI1o2pvv/Cf86EAiCIAeWAUMBb2CKIAjetzDdLIpi+9rPytpjrYB3ga5AF+BdQRAs/61PDUne8u/RFhZiv2ABgiCwJnoNUflRvNX1LaxVta+Uogh754CBMQxeBMAn+66iFUXefuRWP4We3r17Y2pqyv79+9HpdJj1ckJhp6JoZzy6Gn2PoVtJRF1HuqM0lBO2Ne7eDv4+x3LqFAw9Pcj59DN0VVWM8hhFR7uOfHXxKwqratcwvoVE9Hwfd5o3M+KD3THomvDcjKZAQkIC169fp3fv3vqJY+mlekmoox2qtjcloYt7fyM/PZUBTz6P0siIuPPZZMUX0310Kwxrg8XDTEO8EXQB4kVRTBRFsQbYBIy6w2OHAIdFUSwQRbEQOAwENYBPDUJ1UhIFa9diPm7sH3oJ/UUSitwMySf0k5lMbTmbmM+uiEye7+3+t8tOGhoaMnjwYDIzMwkPD0dQyLAc7Ym2sJrSkLQ6uz9LRCozAzoPdyM1uqBJt58QFArs31qIOiOD/J9/RibIeLvb25TXlLP44uKbhn+SiIwNFCwY1oYrGcVsvdS052Y8zGi1Wg4cOIClpWVdL6GC33sJPXJTEirOyeb01o14dO5Gq4Cu1FRpOLUtHrsWZrQNfHjaSPwdDREInIC0et/Ta7f9mXGCIEQKgrBVEASXf3hso5Dz2efIDA2xmzULURT54PQHf5WEKgr0soNzZ+g4HY1Wx7u7onE0N2JmX4//9xrt2rXDxcWF4OBgKisrMXQ3xzjAntLj6aizb0o/dRLRymWoq6to19cZC3tjwrbGo9U03XJSk25dMQsKIn/FT6gzMvCw9GC6z3R+i/+NCzcu3DSsk4gWQnk+I/0d6ehqwecHrkmL3j+kXLhwgdzcXAYPHoxCoaD0RAaa7D9KQqIoErL6BwRBoN+MZwG4uD+F8uIaek1qjSB7eBPE9fmvksW7ATdRFP3QP/Wv+acnEAThWUEQLgiCcCE3N7fBHfwzZSfDKAsNxWbm8yhsbAhJDeFSziVe6fjKTUkI4Mj7UFkIj3wNMhkbz6Vy9UYpbw33RnUHNceCIDBs2DAqKioIDQ0FwHyoGzIjOYU74uua0imUSgY8OZOy/Dwu7duFXCGjx3gPirIruBLatJ9q7V+fB4JA9udfAPCc/3M4mTqx6Mwi1Nram7xMBsO/gpoyOPYZgiDw7ggf8sqqWXa0aXd4fRj5/f+nli1b0qZNG7SlNZQeTcPI2/oPklD8hTMkXjpP4ISpNLOxoyi7gvAjqbTp1pzm7uaNOIL/loYIBBmAS73vzrXb6hBFMV8UxeraryuBgDs9tt45Voii2EkUxU62tve2nlfUaMj+9BOUrq5YPv44aq2axRcX08q8FWM8xtw0TD0LF/8H3WZC83YUltfw5aHrdHe3Zli75nd8PQcHBwICAjh37hw5OTnITQ0wH9qSmuQSKi7m1Nk5e/vi0bkbZ3/bQnlRIS18rXH1tuL83uQmvc6x0tER62efofTgQcrPnEGlUPFm1zdJKE5gTUy9Zw5bLwiYDhd+hrx4/F0sGNvRiVUnk0jJb7qJ94eR0NBQqqqq6spFS46kImq0mA91q7OpqaokZPWP2Lq60WHoSABObo1DrpDRbUyrRvK8cWiIQHAe8BQEoaUgCAbAZGBXfQNBEOoLbSOB2Nq/HwQGC4JgWZskHly7rVEp3LSZmvgE7F+fh8zAgF+v/0pqaSpzOs1BIVPojbRq2DMbmjlB3wUAfHnoGmXVGt4b6fOPa4779++PoaEh+/fvRxRFjAPsMWjRjOJ9iWjrzRnoNfUJtOoaTm/diCAI9Jjgibpay9ndTbuc1PrJJ1E6OenLSdVqejv3ZqDrQH6M+JH00npvTH0XgMIIgt8F4I2gNijkAh/vi73NmSUeNHJycjh//jwBAQHY29ujzqmg/FwWJl0dUNrezNmd2rKBsvw8Bj7zInKFguQreaRcyafz8JaYmBs24gj+e/51IBBFUQO8hP4GHgv8KopitCAIHwiCMLLW7BVBEKIFQYgAXgFm1B5bAHyIPpicBz6o3dZoaAoLyV2yBOPu3TAdMICSmhJ+iPiBrg5d6eXU66bh2R8gJxqGfg6GpkRlFLPhXCqPdWuBV3Ozf3xdExMT+vfvT1JSErGxsfqmdGM80FVpKd5/8yZv5eiE38ChRB45QH56GlYOJrTr40TMiQzy0pvu0pYyIyPsF8ynOi6ewo2bAHijyxvIBBkfn/345uI0pnbQcxZc3QMpp7BvZsSL/Tw4GJ3Nqfimm3h/WBBFkYMHD2JoaEi/fv0AKN6fhKCU02yAa51dTnIil/btxG9AEI6t26JV6zj5axwW9sb49XNuLPcbjQbJEYiiuE8UxdaiKLYSRfGj2m3viKK4q/bvC0RR9BFF0V8UxX6iKF6td+wqURQ9aj+rG8Kff0Pe0mXoSkuxn68vF10ZuZLi6mJe6/Tazaf8ojQ4+gm0HgpthiOKIu/vjsbS2IDZA+++T/nvTzAHDx6kpqYGZXMTTHs5UXEhm+rk4jq77uOnoDQ04vgG/c/V+ZGWGBgrOLnlepNejct0wABMAgPJXbIETX4+zU2a82L7FzmRcYIjqUduGnZ7Ecwc9Ul+nY6nerbE2VLFB3ti0DThPk4PA9evXychIYE+ffpgYmJCVUIRVbEFmPVzQW5qAICo0xG8chlGpmb0nKqfuhQRkkZxbiW9JnkiVzS9ebZNb8R/Q3VcHIWbNmExaSJGXq3JKMtgXew6RrYaSRurNjcND8wHRBj2OQgCuyIyOZ9cyOtDvDD/FzXHcrmcoUOHUlxcTFhYGADNBrgitzDUJ45rb1LGzczpOmYiiRfPkRoViZGJkq4j3Mm4VkRSeNN9qhUEAfu33kRXWUnuN/o1jae2nYqXpRefnPuEcnVtHsDAGAa8A5mXIHo7Rko5bw5ry9UbpWw6n/Y3V5C4n9FoNBw8eBBra2u6dOmin6m/Lwm5uSFmPRzr7K6EHCIr7hp9H3sKlakZZYXVnN+XTEt/G1zrtZtoSkiBoBZRFMn+5FNkJibYvvIKAN9e+ha5IOelDi/dNLy6Ty8r9HkDLFwpr9bw8b5Y2jmZM6GTy23Ofue4ubnh6+vLyZMnKSwsRGYgx2JkKzTZFZSdvJlH7zB0BGY2thxb9zOiTodPL0esHE0I2xaHVt10n2oNW7XCato0irZuo/JKFAqZgne6v0NuRS7LwpfdNPSbBM3bQfD7oK5iqG9zurS0YvHh69KC9w8o586do6CggCFDhiCXy6mIyEWdUUazIDcEpb6Cr6K4iOMbVuPi3Y62vfTS0enazr89xns2pvuNihQIaik7Gkr5qVPYvvQiCktLruReYX/Sfh73eZzmJrUVQDXlsP91sG0L3V8EYOnReLJLqnlvpA/yBqo5HjRoEDKZjIMH9Xlzlbc1Rt7WlASnoimoAkBpYEivyY+Tk5RAbNgxZHIZPSd4UpJXRURI036qtXnpReTW1mQvWoSo0+Fn68eE1hNYH7ue2PzapLBMBoM/guJUOPcjgiDwziPeFFbU8N2Rpj1j+0GkrKyMY8eO4eHhQevWrRHVWkoOJKN0MsXY/2aV4bG1P6OuqmbA0y8gCAKZ8UVcP5dNh8GumNuqGnEEjYsUCACxpoaczz7DwN0dyyn6PiNfXvgSKyMrnvR98qbhsc+hOE0/Z0CuJCmvnJUnEhnb0YmAFg3XGcPc3JxevXrVLWADYDHSHQQoqteUrk2PPti7e3By4y+oa6pxaWuFm58NF/YlU15cfbvTP/TITU2xmzOHyogIinfqC9he6fgKFoYWf2xK594HPIfA8a+gPB9fJ3Mmd3Zhzalk4nOabuL9QeTo0aOo1WqGDNHP+C8Ny0RbXI35sJZ1k8LSYq4Qc+IoXUaNw9rJBZ1O30/I1NKQjkEP54Izd4oUCICCdeupSUnBfv4bCEolIWn6yWMvtn8RE2XtsnRFaXDme/CfAi26A/Bh7YIz84Pa/M3Z744/L2CjsDDCrL8rVbEFVCfqE8eCTEafaU9Smp/LpX36G16PcR5oNTrO7GzaPffNR4/CyN9Pv4BNWRnmhubMDphNZF4kh1IO3TQc9AHUlMLxzwGYO9gLlVLOR3tjGslziX/KjRs3uHTpEp07d8bW1hZtWe3ksbZWGLWyAPTS77G1qzCztqXLmIkAxJzMJC+tjMBxHg/tgjN3SpMPBJr8fPKWL8ekdy9Me/dGrVPz9cWvcTd3Z6zn2JuGoZ8CIvR7E4CQq9mEXM3hlQEe2DUzanC/lEpl3QI2586dA8A00BFZMwOKDyTVVQe5+PjRqlNXzv32KxUlxfryt/4uXD2dRU5KSYP79aAgyGQ0X7gQbX4+ecuWAzDCfQQeFh4subQEta42D2DXBjpO168xnZ+AjakhLw/w4Oi1XEKv5fzNFSTuB0RR5MCBAxgZGdG3b18A/eQxtRbzoTe7hl4/E0Z2YhyBEx9FaWBIVbmaMzsTcGptgUeAXSN5f//Q5ANB7rffoauqwn6+vnv2lmtbSClJYW6nuTcnj+VchYgN0PkZsHClWqPlg90xuNuaMCPw3rWobd26NR4eHoSGhlJWVobMQI75wBbUpJZSFXNz7d1eU2egrq7m9NYNAHQa5obKVMnJX+OadDmpql07zMeNpWDtWqoTE5HL5Lza8VVSS1PZEbfjpuGfJpnNCGyJm7UxH+6JQS2Vk97XxMbGkpycTL9+/VCpVKhzKyg/m4VJFweUdvrJY1qNhrDNv2Dt7Ip3b32CuG7BmUkP94Izd0qTDgRVsbEUbdmC1aNTMXR3p7SmlO8jvqdr8z9NHjvyARiYQq+5APx8Monk/AreHfHXBWcaEkEQCAoKQq1WExwcDIBxgD0KWxXFB5Pr1ji2dnLBb+BQIg7vpyAzHUOVgm6jWpGVUEz8hab9VGtXt4CNflJZH+c+dLTryPcR399c49jMHnrMgtjdkHIaA4WMt4Z7k5BbzrozKY07AInbolarOXToEHZ2dgQE6LvWFO9P1k8eG3hz8ljU0cMUZmXSa+p0ZDI5eemlRB/PwLcJLDhzpzTZQCCKItkff4Lc3BybF14AYOUV/eSxuZ3m3nxKSD0D1/ZCj1fAxJobxVUsDYlnkLc9fVrf+zVMbWxs6NatG+Hh4aSnpyPIBcyHuKHJqaTiUnadXeD4KSgNDTm+/n8AtAl0wMbFlFPb41HXrm3QFFFYW2P78kuUh4VRFhKCIAjMDphNXmUe62LX3TTs/iKYOcCht0AUGdjWjl6eNnx9+DoF5U23j9P9zJkzZygqKiIoKAi5XE51YhFVMfmY9b05eUxdXcXpbRtx9PLGvWMXRFHk+Kb/Y++8w6I60/7/eaYxtKE3aVIVVBRUBEVU7MbEVE3fZJNN2TSzm+3l3fLbd8ubbPqum56Y3mMSYwOxoKAoiF16l14HmHp+f5yRQROz2RUEnfleFxfDzH3OeQbx3PM8z/29PzbgzMrLHzjzXeWwiaBny1b69u8n4JGHUXp50dDbwJvH3uTKmCtJ8EuQgyQJtv0OPIIgTU4Wf/7qOGarxG+uOD9wZrg1b948PDw82LhxI1arFe0kPzThnnRvq0YyyTd5Ny9vUlfdQHlhPrXHDqNQCOaujqO3w0Dx1pqLNtaxKJ+bbkITG0PTn/+CdWCAaYHTmB8+n1ePvErnQKccpHGDrN9A/QE4+jFCCH6zMhG90cKTW0+N7htw6mvq6elh586dTJgwgejoaCSrROeXlSi9NHhm2M1jBzduQN/RTubNdyCEoKywmcayLtJWRaN1v/yBM99VDpkIrAYDzX/7Gy5xcXjfcAMAzxQ9gxCCh5Ifsgee2gw1e2HeT0Hjzv6qdj4rbuDezGgi/M4PnBluubi4sHjxYjvARgi8lo/H0mWkd0/jYFzKFavw8PNnx3rZZDYuzoeYlAAObqqmx+Y/cEQJtZrgX/0KU10d7a/KbTkeSX6EPnMfLx5+0R449UYImiInf9MA8UGe3DIrgrcKqjl5umd0Bu/UNyo7OxuLxcKSJUsA6D9jHltqN4/193Sz77MPiZkxi9CJiRgHzOR9VEZAhCcJQ5zGTjloImh/9TVM9fUE/fIXCJWKI61H+LLiS25PHGIes1pk1oBvNKR8D0mS+OMXxwjx0nL//IvfojYpKYnw8HCys7MxGo24RHujneBDd24t1n4zYDeZNVWUcSJvBwCzr41FkmDvJ47dc989PR3PJUtofeFFzC0txPrEclXMVbxz4h0ae23JVKGEJX+EzhrY9wIAjy6Kx1Or5g9fHHXojfexpPr6eoqLi0lLS8PPzw/JZKFrcxXqce64TbNXABV8+gGmgQEybrwdgOKtNeg7DcxdE4/CQYAz31UOlwhMTc20vvACHosW4p6efn7zWMn70HxMXi5Qqtl05DQldV08ujgeN43qoo9bCMHixYvR6/Xk5+cDoFs6HmnATM8Ou5M4IWM+geNj2PWubDLT+bsybXE4pfubOF3Rdb7TO4QCf/wjJKOR1n+uA+CBaQ8gEGe3nohZAHFLYOfj0NeOj7uGtYviyCtrY+uxpvOc2amLpTPlou7u7mRmZgI281inAa8V0YPmdK2eHQAAIABJREFUse7WZoo3f0FiZhb+4ZH09xgp3lZLTHIAITGOA5z5rnK4RNDy5JNgMhH0058CsL12OweaDvDDqT/EQ2OrIDANwPY/Qcg0SLwai1Xi8S0niQlw59rk0SNpRkREEB8fT15eHv39/WjGyfb5XpuLEmwms9vuoqe1haKvPgcgZWkkbl4adr13CsmBYe2ayEi8r7uOjg8+wFhXR7B7MDdNvIkN5Rso7RjSVuKMyWyHbDK7NS2S2EAP/rTxOAaz4268jwUdOXKE2tpaFi5ciFartZvHJvqijfUejNvzgVxKPXv1zQAc2FyN2Wgh9arobzyvo8uhEkH/4cN0ffopvt+7HU1ExKB5LMoriuvir7MHFr4st5JY9DtQKPj4YB3lLXoeWzIBlXJ0f2VZWVkYDAZ7d9Il45GsEt3Z9g3hiMlJRE9PpeAT2WSm0apIvyaG5uoeTu47PVpDHxPyf+CHCIWC1mefA+DuKXfjrnbnmYPP2IMCEyDldtj/IrSVo1Yq+M3KRKrb+ngtr2p0Bu4URqORrVu3EhwczLRp04Ah5rEV9gqg1tpqju3IYdrSlej8A+lpH+BIbj0T0kPwDXEfreGPaTlUImj+699Q+vvjd999AHx46kOquqv48fQh5rGBLnlZIHo+xCzAYLbw1LZSpoR6sWzyd8dPjpSCg4OZMmUK+fn59PT0oPLV4jErBH3haUwtfYNxmTffKZfOffgOABNSgwkcryP/k3LMDlxOqg4KwueWW+jasAFDaSneWm++P/n75NblcrDpoD1w/i9B6SJvHAPz4gPImhjIszlldDjLSUdF+fn5dHd3s3z5chQKhc08dhr3mcGD5jGA3e++gVqrZdbVciFI4ZeVSEjMvGL8KI187GtYEoEQYpkQ4qQQokwI8fNveP1HQohjQogSIUS2ECJyyGsWIUSx7WvDuccOp4J//zvG/fnPKD08ZPNY8T9JDU4lMyzTHrTnWehvl2cDwDsFNdR39vOTpRPGjANx/vz5WK1Wdu7cCYBnVjhCpaR7c9VgjF9YOEkLl1GyTTaZCYVg9rUx6LuMHN7xjVhoh5HfD+5G4eZG89NPA3BLwi0EuAbw1MGn7BvCnkEyyez4BtlLAvx8+UT0RjPrdjj2xvtoqK+vj7y8PCZMmEBkpHz76PqqCqFSoFtkbxhXf+IY5YUFpK66HldPHZ1NfRzfe5rJc0PR+Tlud9F/pwtOBEIIJfA8sBxIBG4SQpxbZF8EzJAkKQn4EPjbkNf6JUmaZvu6ihGUS0wMHnMzAHj58Mt0GDrONo/1nIa9z8Oka2FcMnqDmee2l5EW7cvcOP+RHNp/JD8/P5KTkzlw4AAdHR0oPTR4ZobSf6QNQ429v9DsG25GpdGw6+3XAAiN9yE80ZeDm6ox2iqNHFEqHx98v38nvduy6S8pwU3txn1T76OouYjc2lx74BmT2WbZZBYf5Mk100J5bU8VTd2OW447GtqzZw8Gg2EQP2mo6LKZx8JQetrIY5LErndew93bhxQbjL7g8wqUagXTh0Drnfq6hmNGkAqUSZJUIUmSEXgXWDU0QJKk7ZIknVm3yAdGFQra2NvI+mPruTL6ShL9huSsHX8DixGyfg3Aq3mVtPYa+emyiWNmNnBG8+bNQ6FQsH37dgA85oai8FDTvalq8FPtGZNZ2f586o4dASBtVTQDehPF2Y7NLPD93h0ofX1pfvJJAK6Ju4ZIXSTPFD2DxWpbOtO4y38L9YVwVO5NtHZRPBarxLM5TmbBxVJPTw/5+flMmTKF4OBg2Ty2sQKllwaPDHvxRsXBfdSfOEb69Tej1mppqemhrLCZaQvDcdNpRvEdjH0NRyIIBYbeVepsz51PdwFfDflZK4QoFELkCyGuPt9BQoh7bHGFLS0tFzTgbzSPtZXDwdflTpR+MXT2GfnXzgoWJQSREjF8rIHhkk6nIzU1lZKSEpqamlC4qNBlRWCo6MJwqmMwLmXFVbLJzEYyC4zUEZ0cQPG2GgZ6HZfEpfRwx//ee+jbm49+717UCjUPJT9EWWcZn1d8bg+cehMETZb3CswGIvzcWDMznHf31VLT1nfe8zs1fNq1axcWi2Wwu2h/SQumul50S8ajsLWPtlot7Hr7dXxCxjF5wWIA8j+rwMVNxbRFF04OvNx1UTeLhRC3AjOA/xvydKQkSTOAm4GnhBDf6NaSJOkFSZJmSJI0IyDgv+/xc7TtKF9UfMFtibcR4hFifyHn/4FSIyMogXU7Kug1mHls6X8Pox9pZWRk4OLiMjgrcE8NRumrpWtT1WCZqNpFS8aa2zhdXsqJvbsAmHVlNCaDhYObHbuhmveNN6IKCaH5SXlvYEnkEib5TeL54ucxWGxgn0GTWTXsk13ID2XFoVQInsp2tp4YaXV2dlJYWEhycrLNPGala1MV6hB33JLt5rHju3Jpq6thzprbUapUNJR2UnO0jZSlkbhcAEfcUTQciaAeGJpyw2zPnSUhxCLgV8BVkiQN4rMkSaq3fa8AcoHkYRjTN0qSJJ4ofAJfrS93Tb7L/kJDERz92LYmHERz9wCv7alk1dRxTAzWjdRwLlhubm7Mnj2bEydOyA3pVAq8lkRiatTTX2KfNSXOXUDA+Gh2v/M6ZqMR33HuTEgNpiS3Dn2n45LMFC4uBDzwQwZKSujNzh5sSHdaf5p3T7xrD4zJgthFMrymr51gLy3fmz2eT4rqOdXkbD0xksrNzUUIwbx58wDo3WMzj11hN4+ZjUby3n+ToOg44tPmIEkS+Z+V4+alYcqCUV2FvmQ0HIlgPxAnhIgSQmiAG4Gzqn+EEMnAv5CTQPOQ532EEC62x/7AHGDE0FA76naw//R+7p96v908BvK039UXZsvQ+mdySjFbJB5dPHZnA2eUlpaGm5sb2dnZALgmBaAOcadrSzWSWe6lLxQK5t92F90tzRRtkpc9Zq6MQrJIFG6sGq2hjwl5XX01mqgomp96CsliYVbILGaPm82Lh1+kxzjkJr/4j2DogZ3yZPa+eTG4a1T8fYtzVjBSamlp4dChQ8ycORMvLy8sehPd22u+Zh47tHUjPa0tZN4iN5arPtJGY1kXM1eMd3jy2HfVBScCSZLMwIPAZuA48L4kSUeFEH8QQpypAvo/wAP44Jwy0QSgUAhxCNgO/EWSpBFLBK8ceYXxuvFnm8fKt0NFLmQ+Blod1W163t1Xy5qZ4UT6jX3ziYuLC5mZmVRWVlJRUYFQCLyWR2FpH0BfYG9IFzF5KtEpMyn45H0GenvxCnAlMWMcx3Y30NXSP4rvYHQlVCoCHnkYY1k5XZ/LSXJtylq6DF28euRVe2BQIiTfKi8PdVTj667h7rlRbDp6mpK6zlEa/eWt7du3o1armTtXZoP0bK9FMljwGlIBZOjTk//xe0QmJRMxeSqSVaJgQwU6f62zsdx/oGHZI5AkaaMkSfGSJMVIkvQn23O/lSRpg+3xIkmSgs4tE5UkaY8kSVMkSZpq+/7ycIznfHpu4XM8Mf8J1ArbmqHVKs8GvMJhhrxU9NS2UlRKwcML40ZyKMOq6dOno9PpyM7ORpIkXOK8cYn2ojunFqvBXiY6Z81tGPr0FH7xMSCTzIRSsP+LytEa+piQ55IluCQm0Prsc0hGIwl+CSwfv5z1x9bT0jekMGHez0EoBltP3JURhY+bmseds4JhV2NjI8eOHSMtLQ13d3fMXQZ68xtwSwlCHWT/gLZ/w8cM9PYw9+Y7ACg72ExrbS+pV0ajHEFo1OUmh/pN6TQ64n2GLPcc+xQai2UOsVrLidPdfFpcz/dmjydoBDjEIyW1Ws38+fOpr6/nxIkTtjbVUVj1Jnp32bdrAsdHMyF9Lgc3bqCvqxN3bxeS5odxct9p2hp6R/EdjK6EQkHgo49iqq+n44MPAHgw+UHMVjPrDq2zB3qFwsy7ZGxpaymeWjX3z49h56kW8ivaznN2p/4b5eTkoNVqmT17NgA9OTUggW6hnTzW29HOgY2fMmF2JkFRMVgsVgo2VOAX6k78zKDRGvolKYdKBGfJYoKcP0JgIiStAeDxzafw0Ki4f97FbzN9oZo6dSp+fn7k5ORgtVrRhHviOsWfnp31WHrtLRFmr74Fs9HIvs/kG17K0kjULkr2fe7YswL3jAzcZsyg9Z/rsPb1EaGL4Pr46/mo9COquqrsgRk/ApUr5P4ZgNvTxxOkc+HxzSedbaqHSdXV1ZSWlpKRkYFWq8Xc1o9+fxPuM4NR+do/oOV/9C5Ws5mMNbcBcGJPI13N/cy6yr6R7NR3k+MmgoOvQ3sFLPwfUCg5WNPBtuNN3JMZjbfbpWc+USqVZGVl0dLSQklJCQC6JZFIZgs9OXabh++4MBIzsyjespGetla0HmqSF0dQUdRCU1X3+U5/2UsIQcCPHsXS2kr7ehlhee/Ue9EoNTxb9Kw90CMA0u6DIx/B6SNo1UoeyoqjsLqD3JMX5m9xSq7sy87OxsPDg9TUVEBuLIdCoMuyFyd2NNZTkr2JpEXL8A4OwWyysP/LKoKjdYxPGjtdAC4VOWYiMOrldd6IdIhfiiRJ/N+mk/h7aPh+xqXLMU1ISCAkJITc3FzMZjPqADfcZwTTW9CIuc2+IZx+/U1IVon8j+USyalZ4Wjd1RRsqBitoY8JuaWk4DFvHm0vv4ylqwt/V39uT7ydLdVbONp61B44+yFw8ZJblQOrZ4QT4evG41tOYnXgNt/DofLycmpqasjMzESj0WBq7qOvqBmP9BCUOpfBuN3vvYlKrSHt2hsBOLKjHn2ngbRVMWOuC8ClIMdMBPn/gN4mWPR7EILdZa3srWjjgQWxuLtcfOjMcEmhULBw4UI6Ozs5cOAAALpFEQiFoHur3TzmFRjElIVLObJ9K51Np9G4qkhZFkntsXbqT3ac7/QOoYBH12Lt7qbt5VcAuGPSHfi4+PDkwSftQa4+cjI4uRHqCtGoFKxdFMfRhm6+OuLYbb4vRGdmA97e3qSkpADQvbUaoVbiOd8+G2iqKOPU3l1MX3k17t4+GPvNHPiqmvAEH0InjL0uAJeCHC8R9LVD3jMwYQVEzJJnA5tPEurtys2zIv798WNcMTExREZGsnPnToxGI0qdCx5zQuk71IJxyIZw2jWrUSiU7P1QBnhMmReKu5eG/M8qHHqtWztxIrorrqB9/XrMLS14aDy4J+keChoL2NOwxx6Ydh+4+cmOdGDVtFDiAj14YutJzBbrKI3+0tbx48dpbGxk/vz5qFQqjPW99B9uxSNjHMohoPmdb7+Gq6eOGSuvBaA4u5YBvYm0qy+9vb2xIsdLBLueAGMvLPwtAJuPygjKRxbF4aK69M0nQggWLlyIXq+noKAAAM95YQitiq5NVYNxHr5+TFu20mbNr0WlUTLjiihOV3RRfcSxK2ACHn7oLKTl6gmrCfUI5akDT2GVbDd5F09547hiO1TtRqkQ/HjJBCpa9Hxc5Nhtvv8bWa1WcnJy8Pf3JykpCbDNBlxVeM61u4OrSoqoOVxM2rVrcHFzo7/XSPG2GmKSAwiMHLtdAMa6HCsRnIGST70ZAhNsCMpTo46gHG6di7RUuKrQLQjHcKqDgXK7+WnmVdehcnFhz/vy5mjCnBB0/loKNlQ4kZZDkJYapYYHpj3A8fbjbK7abA+ceZfcpjr7jyBJLJ0URFKYF09vK3UiLf9DlZSU0NrayoIFC1AoFBiquxk40Y5nZhgKV3m5VrJa2fX2a+gCgkhavAKAA5uqMRucCMoLlWMlgu1/BgQs+AUAHx+so6y5d0wgKIdbWVlZDAwMDCItPdLHofTSnN2mWufF9CtWcaogj6bKcpRKBalXRtNa20vZweZvO/1lr3ORliuiVhDnE8ezRc9istq6tqpdIfMnUJsPZdsQQvCTpROo7+zn3X2O3eb7P5HZbCY3N5eQkBASEhIA6N5ShcJDjccQd/CpgjyaK8uZs/oWVGq1HUGZFuxEUF6gLq+7379TeCrM/xl4hY05BOVw61ykpVDLJCdjbQ8Dx9sH42asvAatu8fgrCBuZhC+49zZ93klVgde6z4XaalUKHkk+RFqe2r5tOxTe2DybeAdIXtSJImMWH9mRfnybE4ZfUbHhf/8JyoqKqKzs5OsrCwUCgUDZZ0YyrvwnB9ubzNtsZD33pv4h0cyMUNuQFe4sUpGUK68dCv9xoocKxHMuBPm/hgYmwjK4da5SEu3lCBU/q50b6keXPpxcXNnxlXXUXFwPw2njqNQCGZdFU1nUx8n8h27AuYM0rLlGRlsnxmWSVJAEv869C97m2qVBub/AhoPwfHPB2cFrb0GXttTNXqDv0RkNBrZsWMHERERxMbGIkkS3VuqZOjMLHub+GM7c+horGf2mltRKJQygnJPoxNBOUxyrERg01hFUA63zkVaCqVAtygC02k9/YdbB+NSll2Jm5c3u99dD0DUVH8CIz3Z/2UlFpPjzgrOIC17tm6jv6QEIQQPJz9MU18TH5z8wB6YtAb842VfgdXCjPG+ZE0MZF1uOV39jgv/+S7av38/vb29LFy4ECEEAyfaMdb04LkwAqGWb08Ws4m9H71DUHQcsTPSACeCcrjlkIngDILyJ0vHHoJyuHUGaZmbmwvIbapVQW50b61GstjgNVots66+gdqjJVQfLkYIQdrVMfS2Gzi627ErYM5FWs4KmUVqcCovHn6RPpONUKZQyv2qWk7A4Q8B+PGSeLoHzLy407FNet+mgYEBdu/eTWxsLJGRkUhWie4t1Sj9tLhPt/cKOpy9he6WZjLW3IoQgpZaGUE5NSvMiaAcJjlcIrAjKAOZHnn5m0/OIC0PHTpEc3Oz3KZ6SSTm1n76ipoG45IWLcfDz5+899YjSRJhE30IneBN4cYqTAbHrYA5F2kJ8FDyQ7QPtPP2ibftgQmrIGgK5P4vWExMGufFyqQQXsmrpLXXceE/36a9e/fS399PVlYWAP1HWjE16tEtikTYijdMhgHyP3mP0ImTiJwqm8wKbAjK5MWXvu9nrMjhEoEdQTlhtIdy0XQGaZmTkwOANtEPdZgH3dtqBuE1Ko2G9OtupLH0JBUH98uzglUx9PeYKNnu2BUw5yItpwVOIzMsk1eOvEK30dafSaGQQfcdVVAkb7w/ujieAZOFf2wvH73Bj1Hp9Xr27t1LYmIi48aNk2cDW6tRBbrhNtWOoi3eshF9RzsZN96GEIKGsk6qjzgRlMMth0oElwqCcrj1NaSlEHgtGY+l04B+v31DeNK8RXgHhcizAquV4Ggvxk/xo2hLDQN6x13rPhdpCfKsoMfYwxtH37AHxi+FsJkyxcw0QEyAB9dPD+PN/GrqOx0X/vNN2r17NyaTiQULFgDQV9SMuaUfryWRg51Djf197PvsQ8ZPTSEsYbKMoPzUiaAcCQ1LIhBCLBNCnBRClAkhfv4Nr7sIId6zvV4ghBg/5LVf2J4/KYRYOhzjOZ+ezSm7ZBCUw61zkZYucd5oxutkeI1RXvpRqlSk33AzLdWVnCqQ/QezVkVj6DNTvLVm1MY+FnQu0nKi70SWRC5h/bH1tA/YynGFgKzfQHc9HJDpZmcAR89ml47W0Mecurq62LdvH0lJSQQEBCCZrXRvq0Yd6oF2kt9g3IGNnzHQ082c1bcCUHO03YmgHCFdcCIQQiiB54HlQCJwkxAi8Zywu4AOSZJigSeBv9qOTURmHE8ClgH/sJ1vRDTO25W75kZdEgjK4dbXkJZC4LV0PNYeI/p8O9Jy4pxM/MIiyHv/LawWC/5hnsTNCOTQ9jr6uo3fcoXLW0ORlt1ffgnAA9MeYMAywCuHX7EHRs+DqExbKxM9YT5u3Dwrgg8O1FHZqh+l0Y8t7dy5E0mSmD9/PgD6wiYsHQZ0SyIHizf6e3so/PwTYmemERwbL3fL/azciaAcIQ3HjCAVKJMkqUKSJCPwLrDqnJhVwOu2xx8CC4X8L74KeFeSJIMkSZVAme18I6L758fwi+UJI3X6Ma+vIS2jvHCJ96EntxbrgGx+UiiUzFl9Kx0NdRzfnQtA6pXRWExWDgzpVeSIOoO0bLEhLaO9o1kZvZJ3T75Lc98QJ3bWb0DfAgVyr6IHFsSiUSp4cqsTadne3k5RURHTp0/Hx8cHyWShO6cGTaQObby9eKNww0cYB/oHZwPlRS0ygnJllBNBOQIajt9oKDB0N7HO9tw3xthg912A33c8FgAhxD1CiEIhRGFLixMA8t9IrVYzb9486uvrOXVKvil5LYnE2memd0iZaGxqOkHRsez54G0sZhPeQW4kpAdzZGc9Pe0DozX8UZdQKAh85BFMtbV0fixzn++beh8Wq4UXSl6wB4anQtxSyHsa+jsJ8HThzjnj2XCogWMNjgv/AcjNzUWhUJCZmQlAb34j1m4jXkvtswF9ZwcHN33OxNmZ+EeMx2pDUPqEuBOXevl1ARgLumRSqyRJL0iSNEOSpBkBAQH//gCnvlHTpk3D19eX7OxsGWkZ5ok20Y+eXfVY++QNYSEEc9bcRndLE4dztgIw4wrZxu/ooHv3zExcU1Joff4fWAcGCPcM59q4a/mo9CPqe4d4LrJ+DQNdsPd5AO7NjMFTq+KJLSdHaeSjr6amJkpKSpg1axaenp5YDWZ6cmtxifPGJdp7MG7fpx9gMZmYfcPNAJwsaKKzqY+0q6JRODCC0tTcTNOf/4K5Y/iZIcORCOqB8CE/h9me+8YYIYQK8ALavuOxTg2jlEolCxYsoLm5maNHZeqW15JIJKOFnp11g3Hjp6YwbkIiBR+/i8lowNNXy5TMME7sbaTjtOOudQshCHx0LeaWFjrekn0E9yTdgwIF/yz+pz0wJAkSr5YhSPpWvNzU3DcvhuwTzRyobj/P2S9v5eTk4OLiwpw5cwDozWvAqjfjtWT8YEx3awuHtm5k0rxF+ISEYjFZ2f9FJYGRnkRNu3y7AHwXta1bR/tbb2Ht6Rn2cw9HItgPxAkhooQQGuTN3w3nxGwAvmd7fD2QI8ktMDcAN9qqiqKAOGDfMIzJqW/RpEmTCAoKYvv27VgsFtTB7rhODaA3rwFLj7whLIQg48bb6O1o59CWjQCkLItEqXGC7t1mzsQ9I4O2F1/E0ttLkHsQayau4fOKz6noGuIkXvBLMPXBbtmVfMfs8fh7aPjbJscD3dfV1XHy5Elmz56Nm5sb1j4TPTvr0Cb4ogn3HIw7g09Nv15GUB7d3UBP+4DDIyiNtbV0vP8B3jdcjyZi+I10F5wIbGv+DwKbgePA+5IkHRVC/EEIcZUt7GXATwhRBvwI+Lnt2KPA+8AxYBPwgCRJjmtjvUhSKBRkZWXR3t5OcXExALpFkUgWKz1DzGPhiVOITEpm36cfYOzvw02nYdrCcMoONNNSM/yfSi4lBaxdi6Wzk/bX5BqIuybfhYvShX8U/2NI0AS5D9H+l6C7EXcXFQ8uiKWgsp1dpa3nOfPlqezsbNzc3EhLk3sF9eyqRzJY0A2ZDXScbuDI9q0kLVqOzj8Qk8FC4VdVhMZ7E5Zw+XcB+Da1PPssQqXC/777R+T8w7JHIEnSRkmS4iVJipEk6U+2534rSdIG2+MBSZJukCQpVpKkVEmSKoYc+yfbcRMkSfpqOMbj1L9XfHw8YWFh7NixA5PJhNrfFbeUIBl032nfEJ6z5lb6e7o5uFGe5E1bHIGLu4r8zxzbLes6eRKeixfT/uqrmDs68HP149aEW9lctZmT7UP2Aeb9DKxm2WQG3DQrglBvV/5vs+PMCsrLy6msrCQzMxMXFxcsvUZ68+pxTQpAM4QjsPeDt1Gq1My6ZjUAJdtr6e82MsvBZwMDp07R/fkX+N56C+qgwBG5xiWzWezU8OoM0rK7u5vCwkJABt0D9OTYZwUhsROImZFG4RefMNDbi4uripSlkdQcbaeh1MFB9488jLWvj7YXXwLgjsl34Knx5Lmi5+xBvlGQcjscfAM6qnBRKVm7KI7D9V1scgDQ/RkgvZeXFzNmzACgJ7cOyWQd/HsDaK2t5njeDpKXrcTd2wdDn4miLTVETvEjJMZrtIY/JtTy9DMoPDzwu/vuEbuGMxE4sKKiooiOjmbXrl0YDAZU3lo8ZoWgLzyNudXeEmHO6lsw9PdR+IVcMpk0P0wG3X/q2KB7l9hYvK66io633sLU1IxOo+OOSXeQW5fLoZZD9sDMn4BQwI6/AXBtShixgR48vuXyB92fOHGChoaGQSC9uctAb34DbilBqAPcBuP2vP8WGq0rM6+6DoDibbUY+szMcnAEZX9xMb3Z2fjd9X2U3t7//oD/Us5E4ODKysqir6+P/Px8ADwXhCOUCrq3VQ/GBERGMSF9Lgc3bqCvq3MQdN9Y7gTd+z/0IJLVSus6uWLo1oRb8dX68mzRs/Yg3TiYeTccegdaTqFUCB5bEk/5ZQ66/yYgfU9ODUigW2ifDTRVlFG6bw/Tr7gaV08dfd1GirNriZ0RSMCQjWRHkyRJND/5FEo/P3xvu21Er+VMBA6usLAwJkyYwJ49e+jr60PpqcFj9jj6DrVgGlImOvuGWzCbjBR8KgNZEuaEoAtwJf8zBwfdh4Xhc8P1dH7wIcbaWtzUbtw1+S4KGgvY1zikAC7jUVC5ym2qgaWTgi970H1JSQktLS0sWLAApVKJua0f/f4m3FODUflqB+N2v7cerYcn06+4GoCDm6qxmKzMutKxZwN9e/fSV1CA/733onB3p3OgkxdKXrBzMIZRzkTgFFlZWRgMBjvoPjMMoVHStdU+K/AdF8qkeQs5tOVLulubUSoVzLoyira6XsoOODbo3u+++xAqFa3PyXsDayauIdAtkGeLnrUvnXkEQNr9cPQTaCg+C3T/dsHl19DPbDazfft2QkJCSEyUW491b62WKXkL7LOBuhNHqSo+wMyrrsPFzY2e9gEO76xjYnow3kFu5zv9Za8zswHVuBC8b1wDwEuHX+L54udp6G0Y9us5E4FTBAUFMWXiuq2IAAAgAElEQVTKFAoKCujp6UHprsZzbigDR9sw1tnLRNOvvxmEYM/7spEqbkYQfqHuFGyowHKZr3V/m9SBgfjccjNdGz7HUFqKi9KFe5PupbilmF31u+yBcx4GVx/I/j0AGbH+pEf78VxOGXrD5QW6P3DgAF1dXYMISmNDL32HWvCYMw6ljSomSRJ5767H3duH5GUrASj8UvaozLzCsYH0PVu3MnD4MAEPPIhCo6Gxt5F3TrzDldFXEusTO+zXcyYCpwA76H7XLvnG5ZERisJNRdcW+6xA5x9A8rIrObYzh9aaKoRCMGtVDF0t/ZzY03i+UzuE/O4+G3R/Tew1hHqE8lzRc1glW5LUesHcx6A8Byp2yLOCZRNo0xt5ZfflY9IzGo3s3LmTyMhIYmJiAOjeXIXQqvCcZ28kUH24mLrjR5h1zWrULloZSL/3NJPnhuI5ZOnI0SRZLLQ8/Qya6Gi8VslWrH8ckv0pD0x7YESu6UwETgF20H1hYSEdHR0obP9pDac6MFR1DcalXn0DGldXdr0rA1nGT/EjOFrH/i+rMBsvz7Xu76KzQPeHD6NWqvnhtB9yvP0426q32QNn3g26MNj2PyBJpET4sDgxiBd2VtChvzzafOfn56PX61m0aJEMpC/vZOBkB7oF4ShcVcCZ2cAbePoHMGXhMgD2fV6BUiUcHkjfteFzjOXlBDzyCEKloqyjjA3lG7hp4k2EeISMyDWdicCpQWVmZiKEYMeOHQC4p4eg8FTTtblqcK3b1cOTmVddR8WBfdSdODoIutd3Gji84/KtgPku8v3eHSh9fGh56mkAroi6gmivaJ4vfh6L1ZYk1Vq59URDERz7FIDHlkyg12hm3c5L36TX399PXl4e8fHxhIeHI0kSXZuqUHpp8Ei3cwTKCws4XV5K+nU3oVKraa3robSwmalZ4Q4NpLcajbQ++yzaSZPwXLIYgKeLnsZN5cbdU5w+Aqcugry8vAZB9y0tLSg0SnQLIjBWdmMo7RyMS1lxFe4+vux6+3UkSSI03ofwRF8ObqrG2H95rXX/J1J6uON3zz3o8/LQF+xDqVDywLQHqOiqYGPlRnvg1BshIAGy/wgWExOCPbl6Wiiv5VXR1H1pt/nOy8vDYDAMAukHjrZhqu2RgfRq+XYjWa3kvf8mPiHjmDRvIQAFGypxcVMxzcGB9J3vvY+poYGARx9FCEFRcxG5tbl8f/L38dY6fQROXSRlZGSgVqvZvn07AO6pwSi9XejaYp8VqF20pF93Ew0nj1FxUC6RTFsVzYDeRPG2y68C5j+Rz003ogoKouUpGXS/KHIRCb4J/KP4H5isNu6zQgkLfwvt5VC0HoBHF8VjsUo8cwkjLXt6esjPz2fKlCkEBwcjWSS6NlehCpTbl5zRyb27aK2pIv2GW1AolZyu6KKqpJVpiyPQujsukN6q19O6bh1uqam4z5mNJEk8deAp/F39uSXhlhG9tjMROHWW3N3dSU9P59ixYzQ0NCBUCnQLIzDV9TJwzN4+efKCxfiEhLLr7dexWi0ERuqISQmgeFst/T2Xx1r3fyOFVov//ffTX1RE744dKISCB5MfpK63jk9KP7EHTlgO4WmQ+1cw6onwc+Om1Aje219Lddul2eZ7586dWK1WO5D+QJMMpF8ahVDKvYKsFgt7Pngb//BIJqbPHQTSu3qqSXJwIH37+jextLUR8OhaeYm2bgcHmw9y/9T7cVOPbCmtMxE49TWlp6ej1WrJyckBwC0lCJW/K91bqwbNY0qViowbb6OtroZjO+XZw6yrojEbLRzYVH3eczuCvK+7FnV4OC1PP4NktTI3dC5TA6byr5J/YbAY5CAhYPHvofc05Muu5IeyYlEpxSWJtGxvb+fAgQOkpKTg6+uL1Wiha1u1jKBM9B2MO7ozm47GemavuRWhUFB3ooP6U51MXz4ejVY1iu9gdGXp7KTt5ZfxWLAAt+RkLFYLTx98mkhdJNfEXTPi13cmAqe+Jq1WS0ZGBmVlZVRX20xAiyIwne6jv8SOCY2bNYfgmDj2vP8WZqMRn2B3JqSHcGSHgyMt1WoCHn4Iw/Hj9GzejBCCh5MfprmvmfdPvm8PjEiD+OUy0rKvnUCdljtmR/HZoQZOnL60kJZfQ1DuaZARlMvGD3YONZtM7P3wHYJj4oidkTY4G/DwcWHy3G8k1DqM2l5+GWtvLwFr1wLwRcUXlHWW8VDyQ6gVtuUySYKekWlU6EwETn2jUlNT8fDwGATduyYFoA52o3trNZLNPCaEYO7Nd9DT1kLxli/l41ZGISENGoMcVboVK3CJi5NnBWYzqSGpzAqZxUuHX6LX2GsPXPhbMPTAricAuG9eNB4uKh7ffOkgLc8gKFNTU9HpdDJ0JrcO7URfXKLsnUNLtm2ip7WFOWtuQwhB5aFWmqt7mLkyCqXacW9FpuZm2te/iW7lSrQT4jFYDDxf/DyT/CaxJHKJPfD4Bnh6KtQfHPYxOO5v36lvlUajITMzk5qaGsrKyhAKgW5ZFOa2AfQF9k8lEZOnMn5qCgWfvI+hT4+nr5bJmaEc33uazqbh74lyqUgolQSsfQRjVRVdn30GwNqUtbQPtPPKkVfsgUGJMPUm2PcidNbi7abh3sxoth2/dJCWZxCUGRkZAHTvqEMymPFaNn4wxtCnZ+9H7xA+KYnIpGSsVomCDRV4B7kxMc2xgfRt69Yhmc0EPPQgAO+eeJdGfSNrp6+1cxjMRtj6P+AbDSFTh30MF5QIhBC+QoitQohS2/evYYSEENOEEHuFEEeFECVCiDVDXntNCFEphCi2fU27kPE4NbxKSUnB29ubnJwcJElCO8EHlxgvurdVYx2wl4lm3PQ9Bnp72L/hIwCmLxuPUq2gYEPF+U7tEPLIykKblETL889jNRqZ7D+ZFVEreOPYG5zWD5niL/il/D33LwDcOSfqkkFanougNHcZ6M1rwC05EHWwHTpT8OkHDPT2MO+2uxBCULq/ifYGPalXRqFQOu7n0UEE5fXXoYmIoMfYw4uHX2T2uNmkhaTZAwtfho5KWPxHuepsmHWh/wI/B7IlSYoDsm0/n6s+4HZJkiYBy4CnhBBDC2J/IknSNNtX8QWOx6lhlEqlYv78+TQ2NnL8+HGEEHitiMbab6Z7CNIyKCqGiXPmceDLz+jtaHciLW0SQhC49hHMDY10vifvDTyc8jCSJJ3dpto7HFJ/AIfehubjlxTS8lwEZffWapAkdIsjB2O6W5o5uPEzEucuICgqBovFyr7PK/AL8yA2ZWSIW5eKBhGU9/8QgFePvEqXoYu1KWvtQf0dsOOvEL0AYheOyDguNBGsAl63PX4duPrcAEmSTkmSVGp73AA0AwEXeF2nLpKSkpLw9/cnJycHq9WKJtQDt+RAevPqMQ/ZEJ6z+lasFjP5H70DDEVaOvaswC09HbdZs2hdtw5rXx+hHqHckngLn5d/zvG24/bAuT8GjQdk/wG4NJCW5yIoTc199B1owiMtBJXPkDbT776BQDBnjdxT/3heI92tA6StikYoHBhBefJsBGVLXwtvHn+T5eOXk+CXYA/c9QT0d8KSP8rVZiOgC00EQZIknek2dhoI+rZgIUQqoAGGeun/ZFsyelII4fItx94jhCgUQhS2tLScL8ypYdYZ0H1rayslJSUANuC4oGtL1WCcd3AISYuWUZK9mfaG+iFIyzYahriSHU1CCALWPoKlrY329W8CcPeUu/Fy8eKJwifsN3k3X7k76cmNUJM/5pGWZxCUOp1uEEHZtbkKoVHimWV3B58uO8Xx3blMX3k1Ov8AzEYLhV9WEhztReRkv9Ea/phQyzPPoHB3H0RQrju0DpPFxIPJD9qDOqqg4F8w7RYInjJiY/m3iUAIsU0IceQbvlYNjZPkv+jzfnQRQoQA64E7JelMO0Z+AUwEZgK+wM/Od7wkSS9IkjRDkqQZAQHOCcXFVEJCAiEhIeTm5mI2m1F5u+A5N5T+4haMtfaln7Rrb0Sl1pD3nuyWnTI/DDcvDfmflo/ZT7UXQ27JyXgsWEDbyy9j6e5Gp9Fx39T7KDhdcHab6rQfgkcQbPsdSNKYRlqei6A0VHczcLQNz8wwlDZ3sCRJ7HjzFVx1Xsy86noAjuysR99lJO3qaIcG0p+LoKzuruaj0o+4Pv56InRD2mxs+z0IJWT9akTH828TgSRJiyRJmvwNX58BTbYb/Jkb/TcSSoQQOuBL4FeSJOUPOXejJMsAvAqkDsebcmp4dQZ039nZycGDcuma57wwFO5qOjfaucXu3j5MX3kNp/J3c7q8FLVGyUwn0hKAgLWPYO3upu0VuWJodfxqInWR/L3w75itto13jTvM+xnU7IVTm8cs0nIognLq1Km2xnKVKDzUeAzxA5QXFlB3/Aizb7gFFzc3jP1mDnxVTXiCD6HxX6srcRidhaC8/XYAni16Fo1Sw71T77UH1u6Hox/D7IdANw6zxcqB6o4RGdOFLg1tAL5ne/w94LNzA4QQGuAT4A1Jkj4857UzSUQg7y8cucDxODVCiomJITIykh07dmAwGFBoVegWyw3phraemLHyGlw9dex6+zXAhrT01zo80lI7YQK6FStof/0NTE1NqJVqHk15lPKucj4pG9J6IuV28I2RZwVWy5hEWp6LoBw42YGxshvdwggUGrmixWI2s/OtV/EdF8aULLkWvmhrDQN6E7NWxYzm8Edd+j17zkJQHm09yuaqzdyeeDv+rv5ykCTBll+De6C8ZAi8VVDDdf/cQ1HN8CeDC00EfwEWCyFKgUW2nxFCzBBCvGSLWQ1kAnd8Q5noW0KIw8BhwB/4fxc4HqdGSEIIFi9ejF6vH4TXuM8MRhXgStdXlYMmMxc3N9KuXUPNkUNUlRShVCpIvTLaibQEAh5dC2YzLX//OwBZEVmkBKbwfNHz6E22/kJKNWT9GlqOQ8l7Yw5pORRBmZCQgGSV6N5UidJPi3uq3Q9Qkr2JjsZ6Mm+9E6VKRXdbP0Vba4ibEUjQeN0ovoPRlSRJtJyDoHzy4JP4uPhwx6Q77IHHP4fafLm02MWTDr2Rv289xewYP6aFD38X0gtKBJIktUmStFCSpDjbElK77flCSZLutj1+U5Ik9ZAS0cEyUUmSsiRJmmJbarpVkqTeb7ueU6OrsLAwkpKS2Lt3L+3t7QilAq/lUZhb+9Hvs29oJi1egS4giF1vvYZktRI/04m0BNCEh+N75510fbaB/kOHEELw2IzHaBtoO9tklng1hEyD7f8LpoExhbQciqBUKBT0FTdjOt2H15LxCJsfwNCnZ+8HbxOeOIXoFHm1d+/H5Qgg/drhxyxeSurZupWBI0cGEZR7GvZQ0FjAD5J+gIfGQw4yG2VwUUACJMuVVk9tO0XPgInfXpk4InsrjuvkcOq/0qJFi1AoFGzZsgUAbYIvLtFnm8xUajVz1txKc1U5J/fuciIth8jvnntQBvjT9L9/RpIkpgRMYXnUct44OsRkplDIDem6aqHw5TGDtDwXQSmZrXRvqUYd6oHrFP/BuH2ffkB/T/egeayhtJOyA80kL4lwIiiHICitkpWnDjzFOPdxrJmwxh5Y+Aq0V8jlokoVp5p6eLOghptnRTAxeGRmU85E4NR/JJ1Ox9y5czlx4gSVlZWyyeyKaKx6Mz25dpNZwpx5BESMJ++9N7GYTXak5ReVGAccG14T+OiP6D90iO4vvgDgkZRHsEgWnit6zh4YPV82EO18HAa6zkJatvYaRmXse/bsQa/XDwLpe/MbsXQa5MZyNj9Ad2szB86Yx6JjkawSuz8oxcPHheSlkf/mCpe3uj755CwE5ZaqLRxvP86DyQ+iUdqobP2dsOMv8r9/7CIkSeKPXxzDXaPkR4snjNjYnInAqf9Y6enpeHl5sWnTprNMZj276zF3yiYzoVCQcfP36Gxq5HD2FoQQzLk+Dn2XkYMO3qba6+pVaCdPpvnxJwZNZrcm3MqG8g2caD9hD1z0O+hvh7xnAPjZson0myz836aL35Cus7OT3bt3k5iYSEREBNYBMz3ba3CJ9UYbZ68A2v3uetk8dqPNPLa3kZaaHtKviUGtGf7WCJeKLN3dNP/9SVyTk/FcshiT1cQzRc8Q5xPHiqgV9sAz5rHFsnks50Qzu0pbeWRRPL7uI4fwdCYCp/5jqdVqlixZQlNT02A5qW5pJCDo3my/yUdNm0FY4mT2fvQOxoF+gqO9mDArmKJtNXQ2O3BDOoWCoF/+EnNTE20vyTUVdyfdjc5Fx+OFj9s9F+OmwaRrIf8f0HOa2EAP7pwznvcP1HKo9uKa9M4sBS5ZIlcA9eysw6o/u7FcU0UZx3dtJ+WKVej8AzH2m8n/rILgaB1xM7/Va3rZq+W557B0dBD0618hhOCjUx9R21PL2pS1KM/0DuqohoJ1MO1mCEnCaLby/748TnSAO7enj+xsypkInPqvlJiYSGRkJDk5OfT396Py1uKZEUpfUTPGOtlkJoRg7k130NfVyYEvZVB7+rUxKJUK8j4sG83hj7rcUpLRXXEFbS+/gqm+Hp1Gx/1T76egsYDd9bvtgVm/BosRdvwNgIcXxuHn7sL/bDiK9SKV41ZWVnLs2DEyMjLw9vbG0mOkd3c9rkn+aMI8AZt5bP3LuOq8SF11AwCFX1XR320kY3W8Q5vHDKWldLz1Nt6rV+M6aRJ9pj7WHVpHSmAKc0Pn2gOz/yCbxxbI5rE39lZR2arnN1ckoh7hxnzORODUfyUhBMuWLaOvr48dO3YA4DnfZjL7snLwU+24+InEzkyn8POP6evuwt3LhRlXjKeqpNXhTWaBj/0YhKD5CZlFsDp+NRGeETxR+ITdZOYXA9PvgIOvQ1s5nlo1P18+keLaTj46WDfiY7RYLHz11Vd4e3szZ84cALpzapDMkq3ViKyKg/uoPXaY2dffjIubG10tfRzKqWViWrDDl4ue/tP/ovDwIGDtIwCsP7aetoE2Hp3+qD1B1h2AIx/C7AfBK5TWXgNPbytlXnwACyaOfGM+ZyJw6r9WSEgIKSkp7Nu3j9bWVtlktigCY2UXA8ftJrOMG2/HNGCg4BO5A+fUrHC8g9zY9f4pLGbHLSdVh4Tgd9dddG/8ir7CQtlkNl02mX1a9qk9MPOnoNRAzh8BuDY5lOQIb/666STdA6YRHWNhYSHNzc0sXboUtVqNua0ffcFp3GcGofZ3BWTz2I43X8VnXBhTFi4FIO/DMhRKBWlXO7Z5rGfLVvry8wl4+CFUPj50DHTw6tFXyQrPYlqgzU41aB4LgDlysnhiyyn6TRZ+szLhW84+fHImAqcuSFlZWahUKjZv3gyAe+rXTWZ+YeFMmr+IQ1u+pKu5CaVKQcbqOLqa+zmUXfttp7/s5Xf3XaiCg+VyUquVhRELSQ5M5rmi5+gz2fZRPIMg/QE4+gk0FKFQCH5/1STa9Aae2VY6YmPT6/Vs376d6OhoJk6cCEDXFhu6dKF9zfpwzhY6GurIvEU2j9WdaKfyUCvTl0Xi7n3ePpKXvaz9/TT99S+4xMfjs0YuD32h5AX6zf08kvKIPfDEl1CzZ9A8dqyhm/f213BbeiSxgZ4XZazORODUBcnDw4N58+ZRWlpKaWmp3WTWcrbJbPYNNyOEgrz35Q6ckZP8GJ/kT+HGKvSdo1MOORakcHUl8LHHGDh2jK5PPjnLZPbq0VftgbMfBldfmVIlSSSFebNmRjiv7amirHlkmA/Z2dkYjUaWL1+OEAJjfS/9h1rwyAhFqZMrWAx9fez54C3CEicTMz0Vq8XK7g9K8fTTMm1R+IiM61JR20svY25olDeIVSrqe+t57+R7rIpZRbR3tBxkMcHW34L/BEi+HUmS+MMXR/FyVbN2YfxFG6szETh1wZo1axa+vr5s3rwZi8WCNsEXTdTZJjNPP39SVlzF8V3bqT9xDICMG+KwWiT2fOLYG8e6K1bgmpxM85NPYentJSkgiWXjl/Hakddo0jfJQVodzPspVO6Q2w8Ajy2dgKtGye8/Pzbs3V3r6+s5ePAgs2bNIiAgAMkq0flZGQp3FZ7zwgbj9m/4kP7uLubdKpvHju1uoK1ez5zrYlGpHbdc1FhXT9tLL6FbsRz3VNld/bd9f0MplPxw2g/tgYWvQHv5oHls89HT5Fe086PF8Xi5qS/aeJ2JwKkLlkqlYsmSJbS2trJ//36EEHhfEWUzmdk3NNOuvRFP/wC2vfQ8FrMZrwBXpi0O51RBE41ljs0sCPrlL7C0ttK2bh0wxGRWPMRkNvMHEDQFvvoZGHrw93DhR4vj2VXaypZjTcM2HqvVyldffYW7uzvz5s0DQF94GmNND14rolFoVQB0t7Zw4ItPSciYT3BMHAN6EwUbKhkX5010smO3im/+619BoSDwJz8BYHvNdnJqc7hv6n0Eu9t6MvV3ynjSqEyIW8KAycKfNh5nQpAnN6VGfMvZh1/ORODUsGjChAlER0eTm5tLX18fmjBP3KYFnGUyU2u1ZN15H6211RzcKDeqnb5sPB4+Lux879RFK4cci3KdMgWvq6+m/fU3MNbUEOYZxs0Tb+azss842W4zkClVsPJJ6GmE7X8G4Na0SOKDPPjjF8cYMA1Pd9KSkhLq6upYtGgRWq0WS6+Rrq+q0ER54TYELZn33nokJDJulFspF35ZxUCfiYzVcQ5dLqrfs4eerVvxv/ce1CEh9Jn6+PO+PxPrHcvtk263B+7+u4yhXPInEIKXd1dS297Pb69MRHWROc7ORODUsEgIwdKlSzEYDGzfvh0A3dLxgHSWySx2xixiZqSx58O36W5pRu2iZPZ1sbTW9nI8r2F0Bj9GFPDoo6BW0/Q32TPwg6QffN1kFj5TLict+Cc0HkKtVPC7KydR19HPCzsvHAs6MDDAtm3bCA0NZerUqQB0baxEMlrwuSZ28AbfVFHGsV3bSVmxCl1AIB2n9RzOrSNxzjgCwi/OBudYlGQycfpP/4va1mAQYF3JOhr1jfw67deoFbblno5qyF8HU2+CkCSauwd4fnsZixODmBPr/y1XGBk5E4FTw6agoCBmzJhBYWEhTU1NqHy0eM6xmczq7Y1ls+68B4Cc1/4FQOz0QMbFeZP/aQUD+pEthxzLUgcF4n/vvfRuy0afn4+Xixf3Jd1HfmM+eQ159sBF/yNvHH/xKFgtzI71Z8WUYP6RW0Zdx4U5tnfu3Elvby8rVqxAoVAwUN5J38FmPDPDUAe6Af+/vfMOj6ra+vC7Z5JJ7xXSCGmE3kILNXTpTcCGCirqp6JXioWi13rxKthFUFAQEER6CU1AIKFDEmpICEkI6b1O2d8fM5KIQfFGkkDO+zzzzJlz1pmzzp6Zs+b81t57VVYes7S1o/NI4+Cxg2vjMdOo6Dy8aY2Of7eTs2IFFZcv4/HKLFQWFlzMvcj3cd8zKnAUHTw6VBruMdUfjngdgP/suIBWb+C1+2qnu+jNKIFA4R+lT58+WFhYsGPHDqSU2PXxQWVtRv6Wykpm9q7udBv3IJePRRN/NMo4Anl8MOUlWo5sqrvZNesDzo9Owtzb29idVKdjfMh4fOx8fj/IzMoJBr4Dqcfh+FIAXjVdQN7dev4W7/zXZGZmEhUVRbt27fDy8kLqDOStj0ftbIl9RGUPoMSTx0iOO0O3sROxsLYhKTabpNhsOg7xx9r+zs2HU9/RZWWR9eln2PTogW2fPhikgbei3sJWY8uLHV6sNEw9DjFroKtx8NiZlDzWHk/h8XB/mrja1InvNQoEQghnIcROIcQl03O19eeEEPoqRWk2VlnvL4SIFkLECyFWm6qZKdzFWFtb07t3bxISErhw4YJpkJkf5Qn5lJ2vHGTWfvBwXH2bsOfbr6goK8XV25aWvbyJ3ZdCVkrDLUuhsrDAffp0yi9eJG/NmhuDzOLz4tkQX6UAYOv7oUkPY03bogy8nax5ulcgW2LSOBSf9bePK6Vk+/btmJub07dvX8A4n5AusxSnEQEIUw8gg17PvuXf4NTIi9b9BqPXGzi49hIO7la07uP9Z4e458n48CMM5eV4vPIKQgjWx6/nZMZJXurwEk6WpkujlBA52zh4rPs0Y3fRTWdxtdXwfxF1V6uhpncEs4DdUsogYLfpdXWUVilKM7zK+veBj6SUgUAuMLmG/ijUA8LCwnB1dSUyMhKdTodNZ0/MXK2MWrNpkJnazIx+U56lMDuTw2tXAtBpmD8W1uYcWH2xQRe7txvQH+uwMDIXfow+P59+vv1o69aWT09VGWQmhDFxrCuFHca5aZ7q1RRvJyvmbYr728XuL1y4wOXLl+nTpw+2trbosksp2JOMVStXLEOcb9jF7NlBTmryjcFjsb+kknu9hPCxQajNGq7AUHrmDPnr1uH8yMNYNPUnpyyHD49/SHv39owIHFFpeGErJB2E3q+AhR2bzqRxLCmX6QNDsLOsve6iN1PTT24EsMy0vAxj3eHbwlSnOAL4rY7x39pfof6iVqsZNGgQOTk5REdH/36Q2dHKQWZeIaG06juQ41vWk5mUiKWNOV1GNr1RyKShcqM7aUEBWZ9/bhxkFvYyWaVZLI1bWmnoGgTh0yDmR0j4BUtzNa8Pac7F9CK+j7r9qb61Wi07duzAzc2NsLAwpJTkbriMUAsch1Zq/sbBYz/gHdqSgI6dKS2q4OiWRHyaO9Oklcs/2AJ3F9Jg4Pq/30Lt5orr008D8OGxDymuKGZ2l9mohOkye2PwWDC0n0RphZ53t56jRWN7xnao28F3NQ0EHlLK30pOXQduNdespRDimBAiSgjx28XeBciTUv5WpSQF8KqhPwr1hMDAQIKCgm4kHy2bO6Pxt6cgMgl9UcUNux4PPIqljS07F3+GNBgIDW+Mm68dh36KR1teP4q11wWWoaE4jh1LzoofKE9IoI1bGwY2Gci3sd+SXFBlWo4eL4GTP2x+CbRlDGzhQY8gVz7ceZHs2yxgc/jwYXJzcxk8eGa71/YAACAASURBVDBqtZrSmCzKL+ZiP8APtUPlFBGH1/5ASX4evR56HCEERzYlUlGmp/vYht1dNP/n9ZTFxODx8suobW05ev0oGy5vYFKLSQQ6VZF7or6A7Hjo/yaozfhq/2XS8suYO6wFalXdtt9fBgIhxC4hRGw1jxFV7aTxXv5W9/N+UsqOwAPAAiHE356JSgjxpCmYHMvMzPy7uyvUAQMHDkSr1bJnzx6EEDiNCMRQridvw+UbNla2dvR6eDJpF88TszcSlUrQ4/4ginLLOb79St05Xw9we+F5VJaWpL/3HgAvd3wZM5UZsw/NxiBN0o+5FQz5wDg69eBChBDMHdac0go983f8dQGb/Px8Dhw4QGhoKE2bNsVQpiNvUwLmXrbYdml8wy71wjmOb91Am/6D8QwMJju1iLj9qbTs6YVz47pJcNYH9IWFZHz4IVZt22I/bBhavZa3ot7Cy9aLp9o8VWmYeRH2vAXNhkLwIK7llfLlvssMadWITv7Otz5ALfGXgcBUlL5lNY8NQLoQohGA6bna+3kpZarpOQH4BWgHZAOOQggzk5k3kPonfiySUnaUUnZ0c2vYoxbvFlxdXenUqRMnTpwgLS0Nc08b7Pv5UhqTRcmZymDevGcE3s1bcmDFUkry82gU6EhwZw9O7rxKfmbDLWBj5uKC6zPPULz/AEX79+Np48mMsBkcTz/OqvOrKg0D+xkL2Bz4L2RfJtDdjke7NWH1sWTOpPz5iO3IyEiklAwcaJw1tCAyCUNRBU4jAxFq479UbUU5O75YgL2rGz0ffAwpjeUnNVZmdBrqf8fO/24g69PP0Ofk4PH66wiVimVnl5GQn8CrnV/Fysw4OysGPWx4BjTWMORDEIL3t5/HIGHW4GZ1ewImaioNbQQmmZYnARtuNhBCOAkhLEzLrkA4cNZ0B7EXGPtn+yvc3fTq1Qtra2u2b99u7E7a0wdzb1vyNsTfkIiEEPSb/CwVZWXsX2GcaK3bqEDUahW/rmnY8xA5P/QgGj8/0t97H6nVMjJwJN29urPgxILfS0QD3wEzC9jyL5CS5/v9dQGbxMRE4uLibhScqUgtoujwNWy6NEJTZVDYwVXfk5uWysCpL6CxsibxdBYp53PpNKwplrZ1l+Csa8rj48lZsQLHsWOxatmC5MJkvjz9Jf18+9HTu2el4eHPIOUoDJ4Pdh4cT8phw6lrPNmjKT7O1nV3AlWoaSB4D+gvhLgE9DO9RgjRUQix2GQTChwTQpzGeOF/T0p51rRtJvCSECIeY85gSQ39UahnWFlZERERQVJSEmfPnkWoBc5jgzGU/V4icvH2IWz4GOL27SY57gw2jhZ0vM9UwCau4RawERoN7rNmUpGQQO7KlUbpp+tc1EL9e4nIvhFEzIaEvRD7E/aW5swcFMLJq3n8fPKPN9q/FZxxcHAgPDwcaZDk/nwJla05DgOb3LBLPX/2hiTk27INeq2Bgz/F49TIhpY9G//hfRsKxoIzb6OytsbtRWM30Hei30Et1MzsNLPSsKok1GosBoPkjU1n8bC34One9adWQ40CgZQyW0rZV0oZZJKQckzrj0kpp5iWD0kpW0kp25iel1TZP0FK2UlKGSilHCelbLjzEd/DtG/fHg8PD3bu3IlWq72lRNR59P04eHiya/Hn6HVa2vQ1FrD59cdLDbqAjW3v3tiEh5P56WfocnN/JxGtPL+y0jBsMjRqCztehdI8xrT3pq2PI+9uO0/hTQVsbi44UxydhjalCMehlZPKaSvK2fFlpSQEcHpvMgWZpXQfF4iqlufDqU8U7txJyeEo3J57DjNnZ3Zd3cWvqb/ybNtnKyeVq0YSWncylTMp+cwc1AwbC7M/P0gt0nA/SYVaQ6VSMWjQIPLy8jh8+DBAtRKRucaCvo8/Tc61FI5t+tlYwGZcEHnpxrKHDRUhBB6vzMJQXEzmxx8D3JCIFp5YWCkRqdQwbAEUZ8Ket24UsMkqKueTPZUSW9WCM6GhoegLK8jffgWLIEesWlfm34yS0LUbklBJQQXHtl6hSSsXfJs33O6ihrIyMt57H4ugIJwmTqBYW8x70e8R4hTCA6EPVBreJAkVl+v4z/bztPFxZGTb+tVBUgkECrWCv78/oaGh7N+/n8zMTKNENO6PEpF/2w4Ed+lO1E+ryEu/jl9LUwGbLVcozm+4N4wWgYE4TZxI3qrVlJim+p7bdS5m4qZeRI3bGaerProYUo/TxseR+zt6882vicRnGEds79mzh4qKCgYNGoQQgrzNCUi9AccRlZPKVUpC9+Hbso1xfqGVF9BrDYSPDaqrZqgXZC9egvbaNWOC2MyMT09+SmZpJnO6zsFMZfqXf5MkBPDutnNkFJYzd1hzVHXcXfRmlECgUGvcd999aDQa1q5di06nw9zDBvt+fn+QiHpPmoLKTM3ub75ASkn42ED0egOH113+k3e/93GbNg1zHx9SZ8xEn5+Pp40n08Om/1EiingNbD1g0zTQ65gxqBlW5mre2BRHamoqx48fp1OnTri7u1N2KZfS05nY9/a5UYNYW15WRRJ6FIBzB9NIOJlJ5xFNcfSoHwnOukCbmkr2119jN3gQNp07cS77HD+c/4FxweNo7dbaaFSNJBQZd53lUVd5smdT2vtWOxNPnaIEAoVaw87OjhEjRpCens6uXbuM63p6/0EisnN2JXz8w1w5dZxL0QdxdLemXT9fLkRfJ+1yfl2eQp2itrXB64P56DIzSZs3Dyll9RKRpQMMeheun4Gji3G1tWBa/2AOXMpk1bqN2NjY0Lt3b6TWOKmcmasVdr0rR7YeXP17SSj3ejEHfryIdzMn2vWr3YIp9Y309/8DQuAxfTp6g543D7+Jo4Ujz7d/vtLo8Ke/k4TSC8qY+dMZWnrZ8/KAkLpz/k9QAoFCrRISEkJYWBhRUVHEx8ffUiJqO2AI7v4B7F26iPKSEtqbCqHvX3WhQSeOrVq3xu355ynctp38dT/fWiJqMQoC+hrliYJrPNLVj3DHIgqz0+neuw+WlpYU/JKMLrsMx5EBCNM8QUZJaOMNSUivNRC5JA4zczV9JzVH1DNJozYpOnCAwshIY8GZxo1Ze3EtsdmxTA+bjoOFg9Eo8yLseft3vYRe+vEUZVoDCye0Q1NP52Oqn14p3NMMGDAANzc3fv75Z4qKiqqViFRqNf2nPEtRXi6HflyOxtKMnhOCyUouavASkcvkx7Hu1Inrb79NxZUr1UtEQhhHHOsrYPsr5OVk00wXz3WDHaviVVRklFD4SzLWbd2wDDRKFZWSkDs9HzL2EoramEBWchF9Hm6GrZPFrVy659FmZHBt5iw0gQE4P/YYWaVZLDyxkM6enRniP8RoVI0ktPjXBA7GZzN3WHMC3Gzr9iT+BCUQKNQ65ubmjBkzhrKyMjZs2GAaaPZHicgzMJi2A+7j5PbNpCfE07StG637eHN6TzIJpxruNCNCrabxf95HmJuT+vJ0ZEVF9RKRc1PoOZ2Ks5tZs/xbLDQaQrv2Y8PpNC5+H4cwV+EwpHJSuUpJ6Hk0llYkn8vh1M6rtOjpRdO2DXc0v9TrufbydAylpXgvWIDK0pL5R+dTpi/jtS6vVc6z9JskdN8HYOdBTEo+83dcYHBLT8aH1e2kcn+FEggU6gRPT08GDBjApUuXOHLkyC0lou4THsHawYFdiz/DYNDTbXQg7n527PnuHAVZpXV4BnWLuacnjf79JmWxsWR+8umtJaLw59lmMYKM/GJGjxjKcwNb838eTjhmllHa1RO1nbEESMr5uN9JQqVFFexaehYnT2vCx9bdPPn1gazPPqfkyBE858zBIjCQw9cOszVxK5NbTcbfwTTFRlVJqOUYSip0vLDqJK62Frw7ulW9n5RPCQQKdUanTp0ICgoiMjKS9PT0aiUiC2sbej8yheuXL3Fm53bU5ioGTGmJNEh2LI5r0PkC+wEDcBw3juzFiymOiq5WIjodd56T5b704AiBKT9BmY4JRYKLKgNPnblKUbkObXkZkV8uvCEJSSnZ8915yoq1DJjSAnONuo7PtO4oPnSIrC++wGHUKBxHjaRcX87b0W/jY+fDlFZTjEbVSEJvbjpLYnYxH97fFkfr+l9vSwkECnWGEIIRI0ZgaWnJTz/9hFarrVYiCunWE99WbTmwchnFebk4uFkR8UgoGVcKOPxzw84XeLwyC02TJlybORNdbi4jA0fSw6sHC08sJOZKDJs3b8bX15ferXzh4ELyN5yGEh32IwJIzCnm9Z9j+HXV7yWhuP2pXDmTRbdRgbh6N9xC9NqMDFKnz0AT0BTP2cbawt/EfENSQRKvd34dC7UpZ3KTJLQtJo1VR5N5ulcAXQPujoF3SiBQqFNsbW0ZOXIkGRkZ7Ny5s1qJyDgp3dPodVr2LF2ElJKA9u606uPN6d0NO1+gsram8Qfz0eXkcH3OHADmdp2LRmpYuXol5ubmjB07FvXAtyhXtab4dAm23RrRsbM30/oFEx11nBNVJKGca8X8ujYe3+bODbr0pNTruTZ9BobiYrw/+giVtTWJ+Yl8HfM1g5sMpptXN6Nh5oXfSULX8kqZtS6GNt4OvNg/uG5P4m+gBAKFOicoKIguXbpw5MgRLl68aJSI+v9eInJq5EXXMRO5ePgAJ7dvBiB8dCBuvkq+wKpFC9ynTaNw5y7y1qzBw8aD++X9aEo1OHZ0xN7eHr10IEc/GzUZ2JsbZaMnu3kzNG8/BWa2ePYfi06rJ3JJHBpLNRGTQht0V9Gsz7+gJDramBcICqKwopBpe6dhbW7N9LDpRiODHtZXSkJ6CS+uPoVOb+wqan4XzcV093iqcE/Tr18/PDw8WL9+PYWFhdj1+KNE1GnEWAI6duGX774m6cwp1OYqBj7RAmmQRC5p2PkC58cexaZbV9LfeZfju3aRczmHYu9iFqctJjkniezvz2LQaXBpG4sq6gM4s4bDPy7HsjSHY74DeH7tWfavjSc7tYiIR0KxcWi4XUWLDx8m6/PPcRg5EsfRo9Ab9Mw6MIurBVf5qPdHuFmbelAd/hRSj92QhL7cd5noxBzeGNGSJq53V7EeJRAo1AvMzMwYM2YMFRUVrF+/HilkpUS0Ph4pJUKl4r7/ewkXLx82L3iP3LRUHNys6fNwKOmJBRxe33DzBUKlotG771Hk4sy2ffvw8fbmpfEvYYYZJ5ftouJqIc7jQ9CMfRX8upOychYnthklodemDIO0Us7tS6VVby+atHKt69OpM3SZmca8QNOmeM6ZDcDCkwvZn7KfWZ1mEeYZZjS8SRI6lZzHRzsvMrR1I8a0r18Tyt0OSiBQqDe4u7szcOBALl++THR0dKVEFJtNaUwWABora0bOmA0qFev/82/KS4oJ7OBOq97enN6VTOLphpsvwNmJqEGDUGu19M7KopFdIz6wmk379GCutM3DqqUrmGnQjviaHdcCsTevoOfwwbR1s2e01opMlYE0P8u6Pos6Q+r1pE6fgaGoCO8FxrzApsub+Db2W+4Pvp/xzcYbDW+ShIoq9Lyw6iQe9pa8Par+dxWtDiUQKNQrOnbsSEhICDt37iQtLa1aicjB3ZPhL71CXnoaWz6ej8GgJ3yMMV+we9k5CrIbZr5g27ZtZBUXE2FpScXSZeSu+xXfo7acc7/KS9p/c7XgKgC/btpCXpk5A70TMV8/md1LYzHTQ2qoNXO2nOX89YI6PpO6IeuLLymJisJz9mwsgoI4k3mGeYfmEeYZxqzOsyoNb5KE5m6IIzmnhAUT2uJgdXdWbKtRIBBCOAshdgohLpme/zCtnhCijxDiVJVHmRBipGnbUiFEYpVtbWvij8LdjxCC4cOHY21tbexSqtf+QSIC8GneiojHppJ48hgHflj2+3xBAxxfcObMGU6cOEH37t0JmzkTi+ZhFB0qxczNklaPRWCmNmPOoTlcPRtjlIQGDMH3kQ85c8mTq2fzCB8TwDuPdsDeypxnV5yguFxX16dUqxRHRZH12Wc4jBiBw+hRpBenM23vNNys3fhvr/9irjJd4G+ShDaevsZPJ1L4v4ggwprUfRH6/5Wa3hHMAnZLKYOA3abXv0NKuVdK2VZK2RaIAEqAyCom03/bLqU8VUN/FO4BbGxsGD16NFlZWezYsaNaiQigTf/BtBkwhGOb1nF2/57f5QuiGlC+ICsri82bN+Pj40OfPn1AmmHZcSpSr0UbvxoPR09mdJrBqWsnWPfJ28aBYw8+SpZdbw4XP04Ti6O0NF+Hm50FC8e3JSGrmNnrY28E3XsdXWYmqS9PR+Pvj+ec2ZTry5m2dxrF2mI+ifgEJ0vT/9uqktDQj0jOLeW1n2No7+vI8xF39+jrmgaCEcAy0/IyYORf2I8FtkkpS2p4XIV7nKZNm9KtWzeOHz/OuXPnjBKRjx256+KpSCu+Yddn0hP4tGhN5KJPSLt0gcAO7rTs5cWpXckknsn6kyPcG2i1WtasWYNarWbs2LGoUJG98jyGYgNWIYUU7dlC7sqVDPMfyshLzdDnFNFk/CBUKg2RS+KwsLUkolMiYtdcuLiDboGuvNA3iHUnU1lzPKWuT++OI/V6UmcY8wJeCz5CWFsz99BcYrNjebfHuwQ5mYrwSAk7XrshCemsXHlx9SmQsHBCO8zuoq6i1VFT7z2klGmm5euAx1/YTwBW3rTubSHEGSHER0KIW/ZZE0I8KYQ4JoQ4lpnZgBOCDYiIiAgaNWrExo0bKSwuxGViM1QaFVlLYtBmGv9LqM3MGPbiLGydnNnwwVsU5mQRPjYQVx9bdi89S2FOWR2fxZ1l+/btpKenM2rUKBwcHMjfmkD5pTycRgbi+tT92PTsQfr7/2HHB+9il1BCQhvB7OT/smnFEXLTiun7aChW4z4Ez1awdjJkXuC5iCC6BbgwZ0MsF64X1vUp3lGyvvySksNReM5+HcvgYL6J/YatiVt5rt1zRPhGVBrufRuiv4Auz0DLMXy29zLHknJ5a1RLfJzv/kI9fxkIhBC7hBCx1TxGVLWTxvvIW95LCiEaAa2AHVVWvwI0A8IAZ2DmrfaXUi6SUnaUUnZ0c2u4MyE2JH7rUqrT6Vi3bh0qRw2uT7QCIOvrGHSmi7yVnT0jZ8yhoqyMDfPfRkodA59oicEg2fF1LHr9vZkviImJ4fjx44SHhxMcHEzx0esUHbyGbXhjbMI8EULQ6O23OeflxrkT0XQeMZY5LywmpKAD16JL8e1ua6w9rLGGiSvB3Ap+GI+6LJcFE9pia2HOsz+coKTi3swXFEdFk/XpZziMGI7D6NH8kvwLC08sZFCTQTzR6olKwwMfwv750P4RGPgOx6/msnD3RUa182JEPas9/L/yl4FAStlPStmymscGIN10gf/tQp/xJ291P/CzlFJb5b3TpJFy4FugU81OR+Few9XVlcGDB3PlyhUOHTqEuZs1rpNbYdAayFwcg77AWMfY1cePIc+/THpiPJFffoyDmxV9HmpGemIB0esT6vgs/nmysrLYtGkTPj4+REREUH4ln9z18VgEOeJwX+XU0tG7t5FoZ0GTzDwCElOx1TnS/dL95Nle50P1LK7kXzEaOnjDhBVQkAprHsXd2oyFE9pyObOI2evj6uYk7yC6rCxSp79sygvM4XLeZWbun0kz52a8Gf5mZRfQ6EWw+w1oNQ6GLqCgXMcLq07h5WTFmyNa1O1J/IPUVBraCEwyLU8CNvyJ7URukoWqBBGBMb8QW0N/FO5B2rVrR/PmzdmzZw+pqaloGtng9nhLDEVaYzAwdSsN6NCZ7uMf5vzBfRzZsJagjh607OnFyZ1XuXIP5QtuzgvIQi3Zy89h5mSJy8RmCLXxIhb9849E//wjrfoOpFvEYHK/X8GOjw5hqJAMe6o9BpWeKZFTSC1KNb6xTycY+hEk7oMdrxIe6MrzEUH8dCKFNceS6/CM/1mkXs+1GTMwFBTi9dFHFKi1PLfnOazMrPg44mOszIy1mzm5HLZNh5AhMPILUKmZvT6WtPwyFk5oh53l3dlVtDpqGgjeA/oLIS4B/UyvEUJ0FEIs/s1ICNEE8AH23bT/CiFEDBADuAJv1dAfhXsQIQTDhg3D1taWNWvWkJubi8bHDtdHm6PLKSdrSSyGUqN80WnkOJqF9+LXVd9x+Xg04eOM+YJdy+6dfMGOHTtu5AXsrGzJXnYWqTXg8khzVNbGi9OJbZv4ddV3NAvvRb8pz+D+8r+40vEx0q5LOve0o3VQMxb1X0SJroQpO6aQUWK6mW/3EHR5Fo58BceX8nzfILo2dWH2hlgupt8b+YKsr76i+NBhPGe/jjrIn3/t+xfpJeks6LMATxtPo1HsOtj4HAREwLhvQW3Ol/sus+HUNab1DaqXBehrQo0CgZQyW0rZV0oZZJKQckzrj0kpp1SxuyKl9JJSGm7aP0JK2cokNT0kpSyqiT8K9y5WVlaMHz+esrIylixZwvXr17Fo6ojrw6FoM0rI+jYWQ7keIQQDpj6Ph38AWz7+gLzrKQyc0hKDXhK5+O7PF8TGxnLs2DHCw8MJCgoid81FtNeLcX6gGebuxqRlzN5I9i79isCwLgx65kWQgn0/JpJo2wGf3KNYvj+VkqNHCXEO4ct+X5JTlsMTkU+QU5ZjPEj/N431jre8jDr5MAsntsXWwoxnV9z9+YLi6CNkffoZ9sOG4TBmDPOPzufI9SPM7TqXtu6mYUwXtsO6J8CnC4xfgVRreHfrOd7bdp4hrRvxTJ+7u6toddzdfZ4UGhReXl48/vjjCCH49ttvSUpKwjLEGZeJzahIKSR7WRxSq8dcY8GI6a+jsbRk/fx/Y2Gto89DzbieYKxfcLf2j8/MzGTjxo038gKFe5IpjcnCYbA/ViHGwUznD+0n8qtP8GvdjiEvzEQaBNu+iuXcoTTChjRh0IcPYe7uztXJUyjcvZvWbq35tO+npBalMnXnVAoqCkBtBmO/ASc/WP0w7voMFoxvR3xmEa/fxeMLtBkZpL78LzR+fjSaN5e1l9ay8vxKJjWfxIhAU9+XhF/gx0eMvageWI1Obcn0tWf4an8CD3fx4+MJ7VDfg7OyKoFA4a7C3d2dyZMnY2try/fff8+FCxewaumK07gQyhPzyV5xHqkzYOfsyoiXX6coJ5tNH71H07YutOzlxeldyexbefGuuzNISkrim2++qZyc71wuBTuTsG7vjm0PY8+Vy8ej2fbpf/EKac6Il19Dr4WNH5/iSkwWPScE02lYUzReXvitWI5Fs2akPPc8eT/9RJhnGAv6LOBS3iWe2fUMJdoSsHKEiatAr4WVE+nua2kcX3AiledWnqRMq6/jFvl7lF24yJUJEzAUFeO14CNOFJ7jnah3CPcK58UOLxqNrkbByongEggPraNUZcPU5cdZezyFaf2CeHNEi3syCIASCBTuQhwdHXn88cdxd3dn1apVnDx5Ept27jiODKTsfA45qy8g9ZJGQSEMePI5kuPOsHfZ1/QcH0z7gb7E7U9l8yenKSvW/vXB6gEnT55k2bJlWFtbM3nyZKxLzcn58QIaXzucRgUhhCDpzCk2ffQe7k2aMmrmXCrKBD//9yTpiQUMmNyCVr0ri8yYOTnh9+032HTtStprr5P19deENw5nfs/5xGbF8tye5yjTlYFrEIz7BjLOwvqpvBARwMxBzdgSk8b4RVFkFNwdOZfCvXtJmjgRdHr8vvuObC9bXvrlJbztvPlPz/+gVqnh2ilYMQ7sGsEj68nHjke+iWb3+Qz+PaIF0/oF35WTyd0u6nnz5tW1D3+bRYsWzXvyySfr2g2FOkSj0dCyZUtSU1OJiorCzMyMwG4tEBZqin69hj6vDMtQF9ybNEVbXsbJbRuxcXKi45DO2DlbEvNLCgknM/Ft7oylbf3s/WEwGNi1axc7d+7E39+fhx9+GBuVJVlfxyDUKtymtEZlbUbq+bOse38eTp6NGTv7bcqKVWz46CRFeeUMebo1/m3+OO5GaDTYDx5ERdJVcr/7DkNxMa3uewhvO2++P/s953PPM8BvAGrXINDYQvQXCARhfYYT2sieFVFXWX8qla4BLrjb1c8ZS6WU5CxdRtqrr2ERFITvsqXofTx5YucTFJQXsGTgEjxsPCDjHCwbDhb28NgW0nHiwcXRnEsrYOGEdozr6FPXp/KP8cYbb6TNmzdv0c3rlTsChbsWCwsLHnjgAVq0aMGuXbuIjIzEJrwx9v18KTmRQd5GYz6gxwOT8G/XkT3ffkVy3BlCuzVixLR2lBVpWfv+MVIu5Nb1qfyB8vJyVq9ezcGDBwkLC+PBBx/E0tyC7OXn0BdpcXmkOWp7DekJ8ax7bx52zq6Mfe3fFOXAuvnHqSjVM/LFdvg0v/VEaEKjofEH83F66CFyli4l7ZVXGOo7iNe7vG6cf//ALHQGHXR9Fto+CPveg7j1DGzhydqnuwIw9ovD7Ii7XlvNctvIigquz5lDxvvvY9e/P34rlqN2d+PVX1/lct5lPuj1AU0cmkD2ZfhuJKg18Mh6ErVOjPniEMk5JXz7aCeGtm5c16dSKyh3BAp3NSqVitDQUEpLS4mKiiIvL4+WAzuCDooPXkPqDFgGORHQoROXog9xZvd27Jxd8W/XjID27lyJyebM7mSs7TW4+9nX9ekAkJ+fz3fffcfVq1cZPHgwffr0wVBQQc7yc1QkFuA8PhjLYGeykpNY89brWFhbc/+cd8nPVLHpk1OYW6oZ+VK72yo8L4TApkcPhLkZOcu+o/RsHF3Gv4CtlQPfn/uetOI0+vj2QQT1h4R9cHQxWNjiHtKN4W29OJSQzZJfE7EwU9PRz6leyCe63FxSnn6Gwp27cJn6FJ5z55JnKOKVA6+wJ3kP08OmMzRgKOQlw7KhoCuDRzcTW+HBA19HodVLlk/pTJj/3Tub6K241R2BuBt7AHTs2FEeO3asrt1QqEdIKdm/fz979+4lODiYsWPHUrz1KsVRadj398O+mPcq1QAAFMdJREFUry8FWRls/eQDUs+fJbhzOP2eeBaVmTWRi2O5GpdDm74+dBsTiKoOE4IpKSmsXLkSnU7HuHHjCAgIoORUJnkb4sEAjsMDsOnoQe71a6yeOxOEYMK898lOUxO5JA5Hd2uGPdcWW6e/X2oyd/WPXH/jDazatMHni89ZlLSSz099zviQ8bzW+TVESQ5seAYubgf/XjDyc8qsG/HymtNsPpPGmPbevDO6JRZm6jvQMrdHeUICyVOfRpeWRqO338Jh+HB+Sf6FuYfmUlhRyAvtX+CR5o8gijLg28FQnAWTNnKoxJsnvz+Og5U530/uRFM32zo7hzuJEOK4lLLjH9YrgUDhXuLo0aNs2bIFHx8fJk6YSNmWZEpOZOAwpCl2PbwwGPQc2/QzB1cvx8rOjoFTX8CvdXsOro3nzN4U/Fq5MODxFmiszGrd95iYGNavX4+9vT0TJ07ExcaJvPXxlMZkoWlij/O4YMxcrCjIymDV3JnoyssZP+89rieq2ffDBTz87RnybBssbf73nEfBjkiuvfwymiZ+eC9axCepP7A0bimPtXiMFzu8iAA4sQy2vwoqMxjyX2TLMSzYHc/C3ZcIa+LElw91wMW29mseFx08SOq0FxEaDd6ffoJsGcz8o/P56dJPhDiF8E6Pdwh2CoaSHFg6BHKT4OGf2Zrvy7RVp2jias13j3fG06F+5jz+CZRAoNBgiIuLY926dbi4uPDgxAfRbUmlNDYbx1GB2HZuBEDGlQS2ffpfspKTaNN/ML0emsyFI9nsX3URJ09rhjzTGntXq1rxV0rJL7/8wr59+/D19WX8+PGoU8rJWXsJQ4kW+/5+2PX0RqgERbk5rJ43k9KCAsbOfouU8+ZEb0zAr6ULA59sibmm5v/Gi6OiSXn2WVQO9vgsXsz89BX8ePFHnm37LFPbTDUa5STAuqcg5Qi0GA1D/svGS2VMX3Mad3sLlkwKI9jjr6Wpf4qcH34g/e13sAgIwOeLz4kzz+SVA6+QWpTKYy0f49m2z6JRa6CsAL4bDuln4cEfWZ7hz+wNsbT3dWLJpI44Wmtqzee6QAkECg2KhIQEVq1ahZWVFQ8/8BBsTafsYi5OY4Oxbu+OEAJdRQUHf1zOsc0/4+jhyeBn/4Ve786Or2NRqQWDp7amUYDDHfVTq9Wyfv164uLiaNu2LfcNGExxZDLFUWmYeVgbC843NsoUxXm5rH3rdfIz0hnz6ptcPm1GzN4Ugjt7EPFIKOp/cE780rg4kp98CgwGvL76grfzV7MpYRMvdXiJR1s8aswF6HVwcAH88i7YuMGIzzhl0YEnvjtGWYWejx9oR58Q93/Mp+qQOh3p775H7ooV2Pbujft/3uWr+O9YEruERjaNeLv723Tw6GA0LsuHH8ZDylHk+OV8khLIhzsvEtHMnc8eaI/VPxBE6ztKIFBocFy7do3ly5cD8OD4iWh25FKekI+miT32/fywCHBACEHy2Ri2ffYhRdnZdB59P83Ch7Hty7MU5pYR8XAoIZ0974h/hYWFrFy5kmvXrtG/f386eLckb80ldNml2Hb3wmFAE4S5ipKCfI5tWsepHVswGPSMnD6Hi8c0XDqaTpt+PoSPDkTcgbxGRVISVydPQZeTQ6OPP+IN3Xp2Ju0k1DmUqW2m0senjzEgXDsFPz8Fmeeh05Okhc1i8g/G2sevD2nOY+FN7kgSWV9QQOqLL1F88CDOjz1GweQRvHZ4NudyzjE6aDQzwmZgY25jDADRX8Hhz6C8AMPoJbyREMyyw0mMbu/F+2NaY36XF5a5XZRAoNAgyc7O5vvvv6ekpITx4+7HI8uawr3J6AsqTAHBF4sARypKS9i7dBFx+3bj0TSQiMee58jmPFIv5tFhkB+dhzf9Ry+2aWlprFy5ktLSUkaPGk3jVGsK915FbW+B07hgLAMcKcnP4+imdZyK3IK+QktItx50GDqOI5vzSD6bQ9dRAbQb4HtHe+poMzJIfuJJyhMS8HzvHfaF6Fh0ZhHJhcmEOofyVJuniPCJQOjKYPebEPU5uARROvRzXjigIvJsOg909uWN4S3+0YttxdWrJE99moqrV/GYO4ctLcpYcHwBthpb5nadaywqU5YPUV9C1GfG5eDBaHvM4KVfBZtOX+OJHv68Mji0TjsH1DZKIFBosBQUFLB8+XKys7MZNmwYrUJbUnoig8JfktHnV6DxMwWEQEfijxwm8utP0ZWVET5hEgU5gZw7lE5AOzf6Ptocc4uaywfnzp1j3bp1WFlZMW7QKDR78tCmFGHd3h3H4QGUlRf9LgA0696LLqPHY6ZxYcfiODKTCuj9UDOah9dOH3d9QQHJzzxD6fETuM+Ygd2EcWxL3cVXZ74iuTCZZs7NmNp6Kn18+6BK3G+s61t4HUPPGcwvHcoX+5MID3Th8wc64GBd88F7JUePkvLc8yAllu/P5c3yn4i+Hk1vn97M6zoPF9QQ/aUxKJXlG6eR7jWDdNtmvLzmNAcuZfHK4GY81SvgH2iduwslECg0aEpLS/nhhx9ITk7G2tqa5s2b0zK0BS4ZGop+SUWfX24MCH190blJIr/6mMSTx/Bt1RafluM4viMbNx877nu69f/UNROMSeGDBw+ya9cuvBp7MTywN9o9Gag0KhxHBSF91Bzd+BOnd25Dr9US2r0XrfuPJivFjEvH0klPLEBtpmLAlBY0bVu7VfoMZWWkvvQvivbsQWVri12/ftgMHsg+z1wWnV1CUkESIU4hTG0zlQi39qi2z4Izq8GrA9uD3+S5yAJ8nKxZ8mgY/q42/7MfeT+tI23ePDQ+Plx6ZSxvJn+NXuqZ2Wkmo7z6IKK/hKgvoDwfmg0lL+xFNme6sen0NY5cyUElBO+ObsX999Bo4b+DEggUGjw6nY5Lly4RGxvLhQsX0Ol02NnZ0Ty0OYGqRticKMdQUIHG1w67vr5cunqEvd9/jdrMjDb9H+ZclLEXTONgR7xDnPBu5oxLY5s/lYxKSkq4cuUKCQkJJCYmkp2dTYuQULoXhaC/XIhliBMW/d05vmcDp3duR6/VEtylJ25NI0iLh9RLeSDBxduWoI7uBIV5YO9SO72ZbkYaDBQfPETB1q0U7tyJoagItaMjNgP6c66dMwsNO7lSdJVgp2CmtplK34J8VFv+BbpyrnR8ldFHmlFcoaeTvzPhga50C3ChRWOHP53ITV9URMnRo5RERVMcFUX5hQtounRi8f0ObM7cSzv3drzdcSY+sRtMAaAAbfAQ9no8xvIkBw7GZ6E3SALcbBjWpjEj2nrVKBDd7SiBQEGhCuXl5Vy8eJHY2Fji4+PR6/U4OjgS7NIE3xRbHAo0WPjaI9rbsnPTF6TFX8C/XTfsPQeTnlhOXnoJAFZ25ngFO+HdzPiwtFdz9epVEhMTSUhI4Pp14/QLGo0GPz8/mtp64XNSg9BLLCM8OJO0lzO7tqHX62gc0hlzq86kJ6mRBomjhzVBYR4EdXTHybN+XbwMFRUUHzhAwZatFO7diywtRe3mRna3EJY3vsJ+hzSCnIOZGjSefkdXokrYS6lfHz63m8b2q4JLGcbSIw5W5nRp6ky3AFfCA13wtzen7NRpiqMOU3I4itLYWNDrERoNVu3bk9nOl1nu+8nW5fFsi8d5LDcP9ZFFUF7Atcb9+VqMY8UVeyr0BrydrBjWpjHDWjcmtJFdvRj1XNfckUAghBgHzANCgU5SymqvzkKIQcBCQA0sllL+VsnMH1gFuADHgYellBV/dVwlECj8k5SWlnL+/HliY2NJSEhASomzjSP+Za74l7ji5u1BmlUSe3d9ixACW2cXrOycECpbKrQW5JeqKVWDwdKAVlOOFBKVUNHIxg0faw8aC2dcy2ygUIehRIe6sRWXLWM4tn8TBp0Oe4+2lFe0BemInYslQR09CApzx8XL9q64eBlKSijat4+CrVsp2rcfWVGB1sOJX5tJtgYUYh4cyFTbYPpHfYfKoAW7xmit3ckUziSU2nIlUYcuuRjXzBx8s7Mw1+sxqAR5/u6khbpw0V9DTKMKrmmzyCnLIcC+Ce9aBdPsxCpERREnbXsyL38op7XeeNhbMLR1Y4a1aUwbb4e7ov1qkzsVCEIBA/AV8HJ1gUAIoQYuAv2BFOAoMFFKeVYI8SOwTkq5SgjxJXBaSvnFXx1XCQQKd4ri4mLOnTtHbGwsV65cAcBF2NG0wp2mTl6UmuWQU5RPVnkJ2bKUHHUpeiERElylPY0NTjQyOOFpcEQlBeWGMipUOqSlxNzRgkKZw6/HV6PX6zCzaI5K0wk7Z3cCO3oQ1NED9yZ39z9XfWEhhbt3U7B1K8WHDoNOR4abOftCdKR0aEwvTw/E1QzUl7KxTyzDM1liafrrd8Ud4vwEMX6Cc76CMg3YG1Q4SQ2u5nZ427gSqNIwNm4vNrpiImUXPqwYSYZ1IPe18mRY68aENXFuUL2A/i53VBoSQvzCrQNBV2CelHKg6fUrpk3vAZmAp5RSd7Pdn6EEAoXaoKCggLNnzxIbE0NKqrHAu7lUoxXGoixOKlu8rT3wcfDEw8EJoTZQZiimqDyPzMzrpF9PpTA7m/LiPKShGON/JoFa0xxrl3BCOoUQFOZOowDHOzIOoK7R5eZSGLmT/K1bKDlyFCElJRZgXW7cnu1iTmqIC/nNG6Nv7oODvTUeBnCtKMciNxd1ZjoiPw3LsgzcyMVCGMtkbtZ35hv1OAJadGJYm8Z0C3DBrIGMA6gptwoEtTGhiheQXOV1CtAZoxyUJ6XUVVnvdas3EUI8CTwJ4Ovre2c8VVCogr29PV26dKFLly7k5uYSGxNLbkY2fgH+NA1sip3d7U2hoNcZuH45j8SYFMqKtAR3aoJ3iBOqe/ziZebkhNP4+3Eafz/ajAzyt20j79xp7Nt3wim8B6Fet/y5/w6d3kBMSh4nLlzmem4xnVqFsjLYtU4nt7vX+MtAIITYBVQ3tPI1KeWGf96l6pFSLgIWgfGOoLaOq6AA4OTkRI+ePf6nfdVmKrxCnPEKufemNb5dzN3dcZ00Cdf/YV8ztYp2fs6082u47Xen+ctAIKXsV8NjpAJVO+16m9ZlA45CCDPTXcFv6xUUFBQUapHauDc9CgQJIfyFEBpgArBRGpMTe4GxJrtJQK3dYSgoKCgoGKlRIBBCjBJCpABdgS1CiB2m9Y2FEFsBTP/2/w/YAZwDfpRSxpneYibwkhAiHmPOYElN/FFQUFBQ+PsoA8oUFBQUGgi36jV0b3dbUFBQUFD4S5RAoKCgoNDAUQKBgoKCQgNHCQQKCgoKDZy7MlkshMgEkv7H3V2BrH/QnX8axb+aofhXMxT/akZ9989PSvmHYhZ3ZSCoCUKIY9VlzesLin81Q/GvZij+1Yz67t+tUKQhBQUFhQaOEggUFBQUGjgNMRAsqmsH/gLFv5qh+FczFP9qRn33r1oaXI5AQUFBQeH3NMQ7AgUFBQWFKiiBQEFBQaGBc88GAiHEICHEBSFEvBBiVjXbLYQQq03bo4UQTWrRNx8hxF4hxFkhRJwQ4oVqbHoLIfKFEKdMjzm15Z/p+FeEEDGmY1dXglQIIT42td8ZIUT7WvQtpEq7nBJCFAghpt1kU6vtJ4T4RgiRIYSIrbLOWQixUwhxyfTsdIt9J5lsLgkhJtWif/OFEOdNn9/PQgjHW+z7p9+FO+jfPCFEapXP8L5b7Punv/U76N/qKr5dEUKcusW+d7z9aoyU8p57AGrgMtAU0ACngeY32TwDfGlangCsrkX/GgHtTct2wMVq/OsNbK7DNrwCuP7J9vuAbYAAugDRdfhZX8c4UKbO2g/oCbQHYqus+w8wy7Q8C3i/mv2cgQTTs5Np2amW/BsAmJmW36/Ov9v5LtxB/+ZhrIX+V5//n/7W75R/N23/LzCnrtqvpo979Y6gExAvpUyQUlYAq4ARN9mMAJaZltcCfYUQtVJBXEqZJqU8YVouxFin4fYKuNYfRgDfSSNRGKvNNaoDP/oCl6WU/+tI838EKeV+IOem1VW/Y8uAkdXsOhDYKaXMkVLmAjuBQbXhn5QyUlbWDI/CWCWwTrhF+90Ot/NbrzF/5p/punE/sPKfPm5tca8GAi8gucrrFP54ob1hY/ox5GMsjlOrmCSpdkB0NZu7CiFOCyG2CSFa1KpjIIFIIcRxIcST1Wy/nTauDSZw6x9gXbYfgIeUMs20fB3wqMamvrTj4xjv8Krjr74Ld5L/M0lX39xCWqsP7dcDSJdSXrrF9rpsv9viXg0EdwVCCFvgJ2CalLLgps0nMModbYBPgPW17F53KWV7YDDwrBCiZy0f/y8RxtKnw4E11Wyu6/b7HdKoEdTLvtpCiNcAHbDiFiZ19V34AggA2gJpGOWX+shE/vxuoN7/lu7VQJAK+FR57W1aV62NEMIMcACya8U74zHNMQaBFVLKdTdvl1IWSCmLTMtbAXMhhGtt+SelTDU9ZwA/Y7wFr8rttPGdZjBwQkqZfvOGum4/E+m/yWWm54xqbOq0HYUQjwJDgQdNweoP3MZ34Y4gpUyXUuqllAbg61sct67bzwwYDay+lU1dtd/f4V4NBEeBICGEv+lf4wRg4002G4HfemiMBfbc6ofwT2PSFJcA56SUH97CxvO3nIUQohPGz6pWApUQwkYIYffbMsakYuxNZhuBR0y9h7oA+VVkkNrilv/E6rL9qlD1OzYJ2FCNzQ5ggBDCySR9DDCtu+MIIQYBM4DhUsqSW9jcznfhTvlXNec06hbHvZ3f+p2kH3BeSplS3ca6bL+/RV1nq+/UA2OvlosYexS8Zlr3JsYvPYAlRkkhHjgCNK1F37pjlAnOAKdMj/uAqcBUk83/AXEYe0FEAd1q0b+mpuOeNvnwW/tV9U8An5naNwboWMufrw3GC7tDlXV11n4YA1IaoMWoU0/GmHPaDVwCdgHOJtuOwOIq+z5u+h7GA4/Von/xGPX1376Dv/Wiawxs/bPvQi35973pu3UG48W90c3+mV7/4bdeG/6Z1i/97TtXxbbW26+mD2WKCQUFBYUGzr0qDSkoKCgo3CZKIFBQUFBo4CiBQEFBQaGBowQCBQUFhQaOEggUFBQUGjhKIFBQUFBo4CiBQEFBQaGB8//AuuxsOUmz+wAAAABJRU5ErkJggg==\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "evt, (rhs,) = prog(u=u)\n", + "plt.plot(u)\n", + "plt.plot(rhs)" + ] + }, { "cell_type": "code", "execution_count": null, @@ -196,7 +690,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.8.2" + "version": "3.7.7" } }, "nbformat": 4, -- GitLab From f579c554d69e6244cae14a4209c53d43a5d6aba7 Mon Sep 17 00:00:00 2001 From: Matt Wala Date: Thu, 7 May 2020 21:43:51 -0500 Subject: [PATCH 46/46] Simplify the code generation of roll(), because loopy appears to have Python semantics for % --- arrayzy/array.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/arrayzy/array.py b/arrayzy/array.py index 33f08f5..c72c500 100644 --- a/arrayzy/array.py +++ b/arrayzy/array.py @@ -614,10 +614,6 @@ class Context: """ assert a.ndim == 1 idx = (prim.Variable("_0") - shift) % a.shape[0] - # It's not immediately clear what loopy-generated code guarantees about - # the sign of the result of the % operator, so if the result is - # negative we shift it to be positive. - idx = idx + prim.If(prim.Comparison(idx, "<", 0), a.shape[0], 0) expr = a.to_loopy_expression((idx,)) return ArrayExpression(self, a.shape, a.dtype, expr, a.reductions) -- GitLab