From 4356f2e8813d7a967009bd3d80cc5b96f2c2614b Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Mon, 25 May 2020 13:38:14 -0500 Subject: [PATCH 01/15] Begin design doc, flesh out tagging --- doc/design.rst | 70 ++++++++++++++++- pytato/__init__.py | 5 +- pytato/array.py | 187 ++++++++++++++++++++++++++++++++++++++------- setup.py | 1 - 4 files changed, 230 insertions(+), 33 deletions(-) diff --git a/doc/design.rst b/doc/design.rst index 7837eaf..1feecc3 100644 --- a/doc/design.rst +++ b/doc/design.rst @@ -1,8 +1,70 @@ Design Decisions in Pytato ========================== -- There is one (for now) computation :class:`pytato.N -- Shapes and dtypes are computed eagerly. -- Array data is computed eagerly. -- Results of array computations may *beomc +.. currentmodule:: pytato +- Results of computations are either implement the :class:`~Array` + interface or are a :class:`~DictOfNamedArrays`. + The former are referred to as an :term:`array expression`. The union type + of both of them is referred to as an *array result*. (FIXME? name) + +- There is one (for now) computation :class:`~Namespace` per + computation that defines the computational "environment". + Operations involving array expressions not using the same + namespace are prohibited. + +- Names in the :class:`~Namespace` are under user control + and unique. I.e. new names in the :class:`~Namespace` outside + the reserved sub-namespace of identifiers beginning with + ``_pt`` are not generated automatically without explicit user requests. + +- :attr:`Array.shape` and :attr:`Array.dtype` are evaluated eagerly. + +- Array data is computed lazily, i.e. a representation of the desired + computation is built, but computation/code generation is not carried + out until instructed by the user. Evaluation/computation + is never triggered implicitly. + +- Results of array computations that are scalar (i.e. an :attr:`Array.shape` of `()`) + and have an integral :attr:`Array.dtype` (i.e. ``dtype.kind == "i"``) may be used in + shapes once they have been assigned a name. + + For some computations such as fancy indexing:: + + A[A > 0] + + it may be necessary to automatically generate names, in this + case to describe the shape of the index array used to realize + the access``A[A>0]``. These will be drawn from the reserved namespace + ``_pt_shp``. Users may control the naming of these counts + by assigning the tag :attr:`pytato.array.CountNamed`, like so:: + + B = A[(A > 0).tagged(CountNamed("mycount"))] + +Glossary +-------- + +.. glossary:: + + array expression + An object implementing the :clas:`~Array` interface + + array result + An :class:`array expression` or an instance of + :class:`~DictOfNamedArrays`. + + identifier + Any string matching the regular expression + ``[a-zA-z_][a-zA-Z0-9_]+`` that does not + start with ``_pt``, ``_lp``, or a double underscore. + +Reserved Identifiers +-------------------- + +Identifiers beginning with ``_pt`` are reserved for internal use +by :module:`pytato`. Any such internal use must be drawn from one +of the following sub-regions, identified by their identifier +prefixes: + +- ``_pt_shp``: Used to automatically generate identifiers used + in data-dependent shapes. diff --git a/pytato/__init__.py b/pytato/__init__.py index 5f047b9..de2ea92 100644 --- a/pytato/__init__.py +++ b/pytato/__init__.py @@ -25,7 +25,8 @@ THE SOFTWARE. """ from pytato.array import ( - DottedName, Namespace, Array, DictOfNamedArrays, + DottedName, Namespace, Array, DictOfNamedArrays, Tag ) -__all__ = ("DottedName", "Namespace", "Array", "DictOfNamedArrays") +__all__ = ("DottedName", "Namespace", "Array", "DictOfNamedArrays", + "Tag") diff --git a/pytato/array.py b/pytato/array.py index 918b53e..de78540 100644 --- a/pytato/array.py +++ b/pytato/array.py @@ -24,21 +24,23 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. """ -__doc__ = """ +# {{{ docs +__doc__ = """ +.. currentmodule:: pytato -Expression trees based on this package are picklable -as long as no non-picklable data -(e.g. :class:`pyopencl.array.Array`) -is referenced from :class:`DataWrapper`. +.. note:: + Expression trees based on this package are picklable + as long as no non-picklable data + (e.g. :class:`pyopencl.array.Array`) + is referenced from :class:`DataWrapper`. Array Interface --------------- -.. currentmodule:: pytato - .. autoclass :: Namespace .. autoclass :: Array +.. autoclass :: Tag .. autoclass :: DictOfNamedArrays Supporting Functionality @@ -57,35 +59,54 @@ Built-in Expression Nodes .. autoclass:: LoopyFunction """ +# }}} + import collections.abc +from dataclasses import dataclass from pytools import single_valued, is_single_valued +# {{{ dotted name + class DottedName: """ .. attribute:: name_parts A tuple of strings, each of which is a valid - Python identifier. + Python identifier. No name part may start with + a double underscore. The name (at least morally) exists in the name space defined by the Python module system. It need not necessarily identify an importable object. + + .. automethod:: from_class """ def __init__(self, name_parts): self.name_parts = name_parts + @classmethod + def from_class(cls, argcls): + name_parts = tuple(".".split(argcls.__module__) + [argcls.__name__]) + if not all(not npart.startswith("__") for npart in name_parts): + raise ValueError(f"some name parts of {'.'.join(name_parts)} " + "start with double underscores") + return cls(name_parts) + +# }}} + + +# {{{ namespace class Namespace: # Possible future extension: .parent attribute """ .. attribute:: symbol_table - A mapping from strings that must be valid - C identifiers to objects implementing the - :class:`Array` interface. + A mapping from :term:`identifier` strings + to :term:`array expression`s. """ def __init__(self): @@ -96,6 +117,46 @@ class Namespace: raise ValueError(f"'{name}' is already assigned") self.symbol_table[name] = value +# }}} + + +# {{{ tag + +tag_dataclass = dataclass(init=True, eq=True, frozen=True) + + +@tag_dataclass +class Tag: + """ + Generic metadata, applied to, among other things, + instances of :class:`Array`. + + .. attribute:: tag_name + + A fully qualified :class:`DottedName` that reflects + the class name of the tag. + + Instances of this type must be immutable, hashable, + picklable, and have a reasonably concise :meth:`__repr__` + of the form ``dotted.name(attr1=value1, attr2=value2)``. + Positional arguments are not allowed. + """ + + @property + def tag_name(self): + return DottedName.from_class(type(self)) + + +class UniqueTag(Tag): + """ + Only one instance of this type of tag may be assigned + to a single tagged object. + """ + +# }}} + + +# {{{ array inteface class Array: """ @@ -138,8 +199,7 @@ class Array: .. attribute:: tags A :class:`dict` mapping :class:`DottedName` instances - to an argument object, whose structure is defined - by the tag. + to instances of the :class:`Tag` interface. Motivation: `RDF `__ @@ -160,6 +220,10 @@ class Array: This mirrors the tagging scheme that :mod:`loopy` is headed towards. + .. automethod:: named + .. automethod:: tagged + .. automethod:: without_tag + Derived attributes: .. attribute:: ndim @@ -192,32 +256,79 @@ class Array: def shape(self): raise NotImplementedError + def named(self, name): + self.namespace.assign_name(name, self) + return self.copy(name=name) + @property def ndim(self): return len(self.shape) - def with_tag(self, dotted_name, args=None): + def tagged(self, tag: Tag): """ - Returns a copy of *self* tagged with *dotted_name* - and arguments *args* - If a tag *dotted_name* is already present, it is - replaced in the returned object. + Returns a copy of *self* tagged with *tag*. + If *tag* is a :class:`UniqueTag` and other + tags of this type are already present, an error + is raised. """ - if args is None: - pass + pass def without_tag(self, dotted_name): pass - def with_name(self, name): - self.namespace.assign_name(name, self) - return self.copy(name=name) - # TODO: - # - tags # - codegen interface - # - naming +# }}} + + +# {{{ pre-defined tag: ImplementAs + +@tag_dataclass +class ImplementationStrategy(Tag): + pass + + +@tag_dataclass +class ImplStored(ImplementationStrategy): + pass + + +@tag_dataclass +class ImplInlined(ImplementationStrategy): + pass + + +@tag_dataclass +class ImplDefault(ImplementationStrategy): + pass + + +@tag_dataclass +class ImplementAs(UniqueTag): + """ + .. attribute:: strategy + """ + + strategy: ImplementationStrategy + +# }}} + + +# {{{ pre-defined tag: CountNamed + +@tag_dataclass +class CountNamed(UniqueTag): + """ + .. attribute:: name + """ + + name: str + +# }}} + + +# {{{ dict of named arrays class DictOfNamedArrays(collections.abc.Mapping): """A container that maps valid Python identifiers @@ -260,6 +371,10 @@ class DictOfNamedArrays(collections.abc.Mapping): def __len__(self): return len(self._data) +# }}} + + +# {{{ index lambda class IndexLambda(Array): """ @@ -285,11 +400,19 @@ class IndexLambda(Array): :attr:`index_expr`. """ +# }}} + + +# {{{ einsum class Einsum(Array): """ """ +# }}} + + +# {{{ data wrapper class DataWrapper(Array): # TODO: Name? @@ -305,6 +428,10 @@ class DataWrapper(Array): """ +# }}} + + +# {{{ placeholder class Placeholder(Array): """ @@ -335,6 +462,10 @@ class Placeholder(Array): self._shape = shape +# }}} + + +# {{{ loopy function class LoopyFunction(DictOfNamedArrays): """ @@ -344,3 +475,7 @@ class LoopyFunction(DictOfNamedArrays): and one that's obtained by importing a dotted name. """ + +# }}} + +# vim: foldmethod=marker diff --git a/setup.py b/setup.py index c7d57e4..a0f3d05 100644 --- a/setup.py +++ b/setup.py @@ -23,7 +23,6 @@ setup(name="pytato", 'Programming Language :: Python', 'Programming Language :: Python', 'Programming Language :: Python :: 3', - 'Programming Language :: Python :: 3.6', 'Programming Language :: Python :: 3.7', 'Programming Language :: Python :: 3.8', 'Topic :: Scientific/Engineering', -- GitLab From 7e39c1cfe6e1ddd0d73861bb67aadd1de8b1622c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andreas=20Kl=C3=B6ckner?= Date: Tue, 26 May 2020 00:38:46 +0200 Subject: [PATCH 02/15] Apply suggestion to pytato/array.py --- pytato/array.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pytato/array.py b/pytato/array.py index de78540..1a80349 100644 --- a/pytato/array.py +++ b/pytato/array.py @@ -89,7 +89,7 @@ class DottedName: @classmethod def from_class(cls, argcls): - name_parts = tuple(".".split(argcls.__module__) + [argcls.__name__]) + name_parts = tuple(argcls.__module__.split(".")) + [argcls.__name__]) if not all(not npart.startswith("__") for npart in name_parts): raise ValueError(f"some name parts of {'.'.join(name_parts)} " "start with double underscores") -- GitLab From eaa3f38327704e0fe369e0d66e379c40e20a546e Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Mon, 25 May 2020 17:55:16 -0500 Subject: [PATCH 03/15] Fix paren typo --- pytato/array.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pytato/array.py b/pytato/array.py index 1a80349..ca8a0dc 100644 --- a/pytato/array.py +++ b/pytato/array.py @@ -89,7 +89,7 @@ class DottedName: @classmethod def from_class(cls, argcls): - name_parts = tuple(argcls.__module__.split(".")) + [argcls.__name__]) + name_parts = tuple(argcls.__module__.split(".") + [argcls.__name__]) if not all(not npart.startswith("__") for npart in name_parts): raise ValueError(f"some name parts of {'.'.join(name_parts)} " "start with double underscores") -- GitLab From a6c70b97b37e5b5195bf9850fa7d749eb70de38d Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Wed, 27 May 2020 13:06:49 -0500 Subject: [PATCH 04/15] Naming, documentation, design progress --- doc/design.rst | 104 +++++++++++++++++++++------ pytato/array.py | 188 +++++++++++++++++++++++++++++++++--------------- 2 files changed, 213 insertions(+), 79 deletions(-) diff --git a/doc/design.rst b/doc/design.rst index 1feecc3..5a9efa7 100644 --- a/doc/design.rst +++ b/doc/design.rst @@ -3,11 +3,43 @@ Design Decisions in Pytato .. currentmodule:: pytato +TODO +---- + +- reduction inames +- finish trawling the design doc +- expression nodes in index lambda + - what pymbolic expression nodes are OK + - reductions + - function identifier scoping + - piecewise def (use ISL?) + +Computation and Results +----------------------- + - Results of computations are either implement the :class:`~Array` interface or are a :class:`~DictOfNamedArrays`. The former are referred to as an :term:`array expression`. The union type of both of them is referred to as an *array result*. (FIXME? name) +- Array data is computed lazily, i.e., a representation of the desired + computation is built, but computation/code generation is not carried + out until instructed by the user. Evaluation/computation + is never triggered implicitly. + +- :class:`IndexLambda` is the main means by which element-wise + expressions, expressions involving reductions, and + prefix sums/scans are expressed. No expression nodes should + be created for array expressions that are expressible without + loss of information as a :class:`IndexLambda`. :class:`IndexLambda` + allows anything for which :mod:`numpy` might use a + :class:`numpy.ufunc`, but for example :func:`numpy.reshape` + is not expressible without loss of information and therefore + realized as its own node, :class:`Reshape`. + +Naming +------ + - There is one (for now) computation :class:`~Namespace` per computation that defines the computational "environment". Operations involving array expressions not using the same @@ -15,15 +47,14 @@ Design Decisions in Pytato - Names in the :class:`~Namespace` are under user control and unique. I.e. new names in the :class:`~Namespace` outside - the reserved sub-namespace of identifiers beginning with - ``_pt`` are not generated automatically without explicit user requests. + that are not a :ref:`reserved_identifier` + not generated automatically without explicit user input. -- :attr:`Array.shape` and :attr:`Array.dtype` are evaluated eagerly. +- The (array) value associated with a name is immutable once evaluated. + In-place slice assignment may be simulated by returning a new + node realizing a "partial replacement". -- Array data is computed lazily, i.e. a representation of the desired - computation is built, but computation/code generation is not carried - out until instructed by the user. Evaluation/computation - is never triggered implicitly. +- :attr:`Array.shape` and :attr:`Array.dtype` are evaluated eagerly. - Results of array computations that are scalar (i.e. an :attr:`Array.shape` of `()`) and have an integral :attr:`Array.dtype` (i.e. ``dtype.kind == "i"``) may be used in @@ -41,8 +72,44 @@ Design Decisions in Pytato B = A[(A > 0).tagged(CountNamed("mycount"))] +- :class:`Placeholder` expressions, like all array expressions, + are considered read-only. When computation begins, the same + actual memory may be supplied for multiple :term:`placeholder name`s, + i.e. those arrays may alias. + + .. note:: + + This does not preclude the arrays being declared with + C's ``*restrict`` qualifier in generated code, as they + do not alias any data that is being modified. + +.. _reserved_identifier: + +Reserved Identifiers +-------------------- + +- Identifiers beginning with ``_pt_`` are reserved for internal use + by :module:`pytato`. Any such internal use must be drawn from one + of the following sub-regions, identified by their identifier + prefixes: + + - ``_pt_shp``: Used to automatically generate identifiers used + in data-dependent shapes. + +- Identifiers used in index lambdas are also reserved. These include: + + - Identifiers matching the regular expression ``_[0-9]+``. They are used + as index ("iname") placeholders. + + - Identifiers matching the regular expression ``_r[0-9]+``. They are used + as reduction indices. + + - Identifiers matching the regular expression ``_in[0-9]+``. They are used + as automatically generated names (if required) in + :attr:`IndexLambda.bindings`. + Glossary --------- +======== .. glossary:: @@ -50,21 +117,18 @@ Glossary An object implementing the :clas:`~Array` interface array result - An :class:`array expression` or an instance of + An :term:`array expression` or an instance of :class:`~DictOfNamedArrays`. identifier - Any string matching the regular expression - ``[a-zA-z_][a-zA-Z0-9_]+`` that does not - start with ``_pt``, ``_lp``, or a double underscore. + Any string for which :meth:`str.isidentifier` returns + *True*. See also :ref:`reserved_identifier`. -Reserved Identifiers --------------------- + namespace name + The name by which an :term:`array expression` is known + in a :class:`Namespace`. -Identifiers beginning with ``_pt`` are reserved for internal use -by :module:`pytato`. Any such internal use must be drawn from one -of the following sub-regions, identified by their identifier -prefixes: + placeholder name + See :attr:`Placeholder.name`. -- ``_pt_shp``: Used to automatically generate identifiers used - in data-dependent shapes. +.. vim: shiftwidth=4 diff --git a/pytato/array.py b/pytato/array.py index ca8a0dc..23df2f6 100644 --- a/pytato/array.py +++ b/pytato/array.py @@ -54,6 +54,7 @@ Built-in Expression Nodes .. autoclass:: IndexLambda .. autoclass:: Einsum +.. autoclass:: Reshape .. autoclass:: DataWrapper .. autoclass:: Placeholder .. autoclass:: LoopyFunction @@ -65,6 +66,7 @@ Built-in Expression Nodes import collections.abc from dataclasses import dataclass from pytools import single_valued, is_single_valued +import pymbolic.primitives as prim # {{{ dotted name @@ -103,26 +105,49 @@ class DottedName: class Namespace: # Possible future extension: .parent attribute """ - .. attribute:: symbol_table + Represents a mapping from :term:`identifier` strings to :term:`array expression`s + or *None*, where *None* indicates that the name may not be used. + (:class:`Placeholder` instances register their names in this way to + avoid ambiguity.) - A mapping from :term:`identifier` strings - to :term:`array expression`s. + .. automethod:: assign + .. automethod:: ref """ def __init__(self): - self.symbol_table = {} + self._symbol_table = {} def assign(self, name, value): + """ + :returns: *name* + """ if name in self.symbol_table: raise ValueError(f"'{name}' is already assigned") - self.symbol_table[name] = value + self._symbol_table[name] = value + + return name + + def ref(self, name): + """ + :returns: A :term:`array expression` referring to *name*. + """ + + value = self.symbol_table[name] + + v = prim.Variable(name) + ituple = tuple("_%d" % i for i in range(len(value.shape))) + + return IndexLambda( + self.namespace, + name, index_expr=v[ituple], shape=value.shape, dtype=value.dtype) + # }}} # {{{ tag -tag_dataclass = dataclass(init=True, eq=True, frozen=True) +tag_dataclass = dataclass(init=True, eq=True, frozen=True, repr=True) @tag_dataclass @@ -140,6 +165,11 @@ class Tag: picklable, and have a reasonably concise :meth:`__repr__` of the form ``dotted.name(attr1=value1, attr2=value2)``. Positional arguments are not allowed. + + .. note:: + + This mirrors the tagging scheme that :mod:`loopy` + is headed towards. """ @property @@ -169,13 +199,6 @@ class Array: The interface seeks to maximize :mod:`numpy` compatibility, though not at all costs. - All these are abstract: - - .. attribute:: name - - A name in :attr:`namespace` that has been assigned - to this expression. May be (and typically is) *None*. - .. attribute:: namespace A (mutable) instance of :class:`Namespace` containing the @@ -184,13 +207,18 @@ class Array: .. attribute:: shape - Identifiers (:class:`pymbolic.Variable`) refer to - names from :attr:`namespace`. - A tuple of integers or :mod:`pymbolic` expressions. + Identifiers (:class:`pymbolic.Variable`) refer to names from + :attr:`namespace`. A tuple of integers or :mod:`pymbolic` expressions. Shape may be (at most affinely) symbolic in these identifiers. - # FIXME: -> https://gitlab.tiker.net/inducer/pytato/-/issues/1 + .. note:: + + Affine-ness is mainly required by code generation for + :class:`IndexLambda`, but :class:`IndexLambda` is used to produce + references to named arrays. Since any array that needs to be + referenced in this way needs to obey this restriction anyway, + a decision was made to requir the same of *all* array expressions. .. attribute:: dtype @@ -206,20 +234,6 @@ class Array: triples (subject: implicitly the array being tagged, predicate: the tag, object: the arg). - For example:: - - # tag - DottedName("our_array_thing.impl_mode"): - - # argument - DottedName( - "our_array_thing.loopy_target.subst_rule") - - .. note:: - - This mirrors the tagging scheme that :mod:`loopy` - is headed towards. - .. automethod:: named .. automethod:: tagged .. automethod:: without_tag @@ -238,15 +252,11 @@ class Array: purposefully so. """ - def __init__(self, namespace, name, tags=None): + def __init__(self, namespace, tags=None): if tags is None: tags = {} - if name is not None: - namespace.assign(name, self) - self.namespace = namespace - self.name = name self.tags = tags def copy(self, **kwargs): @@ -257,8 +267,7 @@ class Array: raise NotImplementedError def named(self, name): - self.namespace.assign_name(name, self) - return self.copy(name=name) + return self.namespace.ref(self.namespace.assign(name, self)) @property def ndim(self): @@ -381,24 +390,52 @@ class IndexLambda(Array): .. attribute:: index_expr A scalar-valued :mod:`pymbolic` expression such as - ``a[_1] + b[_2, _1]`` depending on TODO + ``a[_1] + b[_2, _1]``. Identifiers in the expression are resolved, in - order, by lookups in :attr:`inputs`, then in + order, by lookups in :attr:`bindings`, then in :attr:`namespace`. Scalar functions in this expression must - be identified by a dotted name - (e.g. ``our_array_thing.c99.sin``). + be identified by a dotted name representing + a Python object (e.g. ``pytato.c99.sin``). - .. attribute:: binding + .. attribute:: bindings A :class:`dict` mapping strings that are valid Python identifiers to objects implementing the :class:`Array` interface, making array expressions available for use in :attr:`index_expr`. + + .. automethod:: is_reference """ + def __init__(self, namespace, name, index_expr, shape, bindings=None, tags=None): + if bindings is None: + bindings = {} + + super().__init__(namespace, name, tags=tags) + + self.shape = shape + self.index_expr = index_expr + self.bindings = bindings + + def is_reference(self): + if isinstance(self.index_expr, prim.Subscript): + assert isinstance(self.index_expr.aggregate, prim.Variable) + name = self.index_expr.aggregate.name + index = self.index_expr.index + elif isinstance(self.index_expr, prim.Variable): + name = self.index_expr.aggregate.name + index = () + else: + return False + + if name not in self.namespace: + assert name in self.bindings + return False + + return index == tuple("_%d" % i for i in range(len(self.shape))) # }}} @@ -412,6 +449,15 @@ class Einsum(Array): # }}} +# {{{ reshape + +class Reshape(Array): + """ + """ + +# }}} + + # {{{ data wrapper class DataWrapper(Array): @@ -420,14 +466,31 @@ class DataWrapper(Array): Takes concrete array data and packages it to be compatible with the :class:`Array` interface. - A way - .. attrib + .. attribute:: data A concrete array (containing data), given as, for example, a :class:`numpy.ndarray`, or a :class:`pyopencl.array.Array`. + This must offer ``shape`` and ``dtype`` attributes but is + otherwise considered opaque. At evaluation time, its + type must be understood by the appropriate execution backend. + Starting with the construction of the :class:`DataWrapper`, + may not be updated in-place. """ + def __init__(self, namespace, data, tags=None): + super().__init__(namespace, tags) + + self.data = data + + @property + def shape(self): + self.data.shape + + @property + def dtype(self): + self.data.dtype + # }}} @@ -438,30 +501,37 @@ class Placeholder(Array): A named placeholder for an array whose concrete value is supplied by the user during evaluation. + .. attribute:: name + + The name by which a value is supplied + for the placeholder once computation begins. + .. note:: - A symbolically represented - A symbolic - On - is required, and :attr:`shape` is given as data. + :attr:`name` is not a :term:`namespace name`. In fact, + it is prohibited from being one. (This has to be the case: Suppose a + :class:`Placeholder` is :meth:`~Array.tagged`, would the namespace name + refer to the tagged or the untagged version?) """ - @property - def shape(self): - # Matt added this to make Pylint happy. - # Not tied to this, open for discussion about how to implement this. - return self._shape - def __init__(self, namespace, name, shape, tags=None): if name is None: raise ValueError("Placeholder instances must have a name") - super().__init__( - namespace=namespace, - name=name, - tags=tags) + # Reserve the name, prevent others from using it. + namespace.assign(name, None) + + super().__init__(namespace=namespace, tags=tags) + + self.name = name self._shape = shape + @property + def shape(self): + # Matt added this to make Pylint happy. + # Not tied to this, open for discussion about how to implement this. + return self._shape + # }}} -- GitLab From a9d262709863c927d3ad082a068d9cc7a18cf413 Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Wed, 27 May 2020 13:17:23 -0500 Subject: [PATCH 05/15] Fix IndexLambda.is_reference, pylint fixes --- pytato/array.py | 48 ++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 40 insertions(+), 8 deletions(-) diff --git a/pytato/array.py b/pytato/array.py index 23df2f6..49084df 100644 --- a/pytato/array.py +++ b/pytato/array.py @@ -117,11 +117,17 @@ class Namespace: def __init__(self): self._symbol_table = {} + def __contains__(self, name): + return name in self._symbol_table + + def __getattr__(self, name): + return self._symbol_table[name] + def assign(self, name, value): """ :returns: *name* """ - if name in self.symbol_table: + if name in self._symbol_table: raise ValueError(f"'{name}' is already assigned") self._symbol_table[name] = value @@ -132,14 +138,14 @@ class Namespace: :returns: A :term:`array expression` referring to *name*. """ - value = self.symbol_table[name] + value = self._symbol_table[name] v = prim.Variable(name) ituple = tuple("_%d" % i for i in range(len(value.shape))) return IndexLambda( - self.namespace, - name, index_expr=v[ituple], shape=value.shape, dtype=value.dtype) + self, name, index_expr=v[ituple], shape=value.shape, + dtype=value.dtype) # }}} @@ -410,17 +416,33 @@ class IndexLambda(Array): .. automethod:: is_reference """ - def __init__(self, namespace, name, index_expr, shape, bindings=None, tags=None): + + # TODO: write make_index_lambda() that does dtype inference + + def __init__(self, namespace, name, index_expr, shape, dtype, bindings=None, + tags=None): if bindings is None: bindings = {} super().__init__(namespace, name, tags=tags) - self.shape = shape + self._shape = shape + self._dtype = dtype self.index_expr = index_expr self.bindings = bindings + @property + def shape(self): + return self._shape + + @property + def dtype(self): + return self._dtype + def is_reference(self): + # FIXME: Do we want a specific 'reference' node to make all this + # checking unnecessary? + if isinstance(self.index_expr, prim.Subscript): assert isinstance(self.index_expr.aggregate, prim.Variable) name = self.index_expr.aggregate.name @@ -431,11 +453,21 @@ class IndexLambda(Array): else: return False - if name not in self.namespace: + if index != tuple("_%d" % i for i in range(len(self.shape))): + return False + + try: + val = self.namespace[name] + except KeyError: assert name in self.bindings return False - return index == tuple("_%d" % i for i in range(len(self.shape))) + if self.shape != val.shape: + return False + if self.dtype != val.dtype: + return False + + return True # }}} -- GitLab From 8d3bb541dcf8e6081da47b7e9e3d1d1fe028822a Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Wed, 27 May 2020 23:41:42 -0500 Subject: [PATCH 06/15] Respond to design doc review comments --- doc/design.rst | 81 ++++++++++++++++++++++++++++++------------------- pytato/array.py | 78 +++++++++++++++++++++++++++-------------------- 2 files changed, 95 insertions(+), 64 deletions(-) diff --git a/doc/design.rst b/doc/design.rst index 5a9efa7..3fdcb17 100644 --- a/doc/design.rst +++ b/doc/design.rst @@ -19,62 +19,81 @@ Computation and Results - Results of computations are either implement the :class:`~Array` interface or are a :class:`~DictOfNamedArrays`. - The former are referred to as an :term:`array expression`. The union type - of both of them is referred to as an *array result*. (FIXME? name) + The former are referred to as :term:`array expression`\ s. The union type + of both is referred to as an :term:`array result`. - Array data is computed lazily, i.e., a representation of the desired computation is built, but computation/code generation is not carried out until instructed by the user. Evaluation/computation is never triggered implicitly. -- :class:`IndexLambda` is the main means by which element-wise - expressions, expressions involving reductions, and - prefix sums/scans are expressed. No expression nodes should - be created for array expressions that are expressible without - loss of information as a :class:`IndexLambda`. :class:`IndexLambda` - allows anything for which :mod:`numpy` might use a - :class:`numpy.ufunc`, but for example :func:`numpy.reshape` - is not expressible without loss of information and therefore - realized as its own node, :class:`Reshape`. +- :attr:`Array.dtype` is evaluated eagerly. + +- :attr:`Array.shape` is evaluated as eagerly as possible, however + data-dependent name references in shapes are allowed. (This implies + that the number of array axes must be statically known.) + + Consider the the example of fancy indexing:: + + A[A > 0] + + Here, the length of the resulting array depends on the data contained + in *A* and cannot be statically determined at code generation time. + + In the case of data-dependent shapes, the shape is expressed in terms of + scalar (i.e. an :attr:`Array.shape` of `()`) values + with an integral :attr:`Array.dtype` (i.e. ``dtype.kind == "i"``) + referenced by name from the :attr:`Array.namespace`. Such a name + marks the boundary between eager and lazy evaluation. + +- :class:`IndexLambda` is used to express the following functionality: + + - Broadcasting (equivalently, outer products) + - Slicing + - Arithmetic + - Element-wise function application, similar :class:`numpy.ufunc` + - Reductions + - Prefix sums/scans + + No new expression node types should be created for operations that + are well-expressed by :class:`IndexLambda`. An operation is well-expressed + if it is possible to (reasonably equivalently) recover the operation + (and its inputs and ordering with respect to other preceding/following + operations) by examining :attr:`IndexLambda.expr`. + + FIXME: This is not sharp. I'm not sure how to make it sharp. Naming ------ -- There is one (for now) computation :class:`~Namespace` per - computation that defines the computational "environment". - Operations involving array expressions not using the same - namespace are prohibited. +- There is one (for now) :class:`~Namespace` per computation that defines the + computational "environment". Operations involving array expressions not + using the same namespace are prohibited. -- Names in the :class:`~Namespace` are under user control - and unique. I.e. new names in the :class:`~Namespace` outside - that are not a :ref:`reserved_identifier` - not generated automatically without explicit user input. +- Names in the :class:`~Namespace` are under user control and unique. I.e. + new names in the :class:`~Namespace` that are not a + :ref:`reserved_identifier` are not generated automatically without explicit + user input. - The (array) value associated with a name is immutable once evaluated. In-place slice assignment may be simulated by returning a new node realizing a "partial replacement". -- :attr:`Array.shape` and :attr:`Array.dtype` are evaluated eagerly. - -- Results of array computations that are scalar (i.e. an :attr:`Array.shape` of `()`) - and have an integral :attr:`Array.dtype` (i.e. ``dtype.kind == "i"``) may be used in - shapes once they have been assigned a name. - - For some computations such as fancy indexing:: +- For arrays with data-dependent shapes, such as fancy indexing:: A[A > 0] it may be necessary to automatically generate names, in this case to describe the shape of the index array used to realize - the access``A[A>0]``. These will be drawn from the reserved namespace + the access ``A[A>0]``. These will be drawn from the reserved namespace ``_pt_shp``. Users may control the naming of these counts by assigning the tag :attr:`pytato.array.CountNamed`, like so:: - B = A[(A > 0).tagged(CountNamed("mycount"))] + A[(A > 0).tagged(CountNamed("mycount"))] - :class:`Placeholder` expressions, like all array expressions, are considered read-only. When computation begins, the same - actual memory may be supplied for multiple :term:`placeholder name`s, + actual memory may be supplied for multiple :term:`placeholder name`\ s, i.e. those arrays may alias. .. note:: @@ -89,7 +108,7 @@ Reserved Identifiers -------------------- - Identifiers beginning with ``_pt_`` are reserved for internal use - by :module:`pytato`. Any such internal use must be drawn from one + by :mod:`pytato`. Any such internal use must be drawn from one of the following sub-regions, identified by their identifier prefixes: @@ -114,7 +133,7 @@ Glossary .. glossary:: array expression - An object implementing the :clas:`~Array` interface + An object implementing the :class:`~Array` interface array result An :term:`array expression` or an instance of diff --git a/pytato/array.py b/pytato/array.py index 49084df..492733d 100644 --- a/pytato/array.py +++ b/pytato/array.py @@ -33,7 +33,7 @@ __doc__ = """ Expression trees based on this package are picklable as long as no non-picklable data (e.g. :class:`pyopencl.array.Array`) - is referenced from :class:`DataWrapper`. + is referenced from :class:`~pytato.array.DataWrapper`. Array Interface --------------- @@ -41,6 +41,7 @@ Array Interface .. autoclass :: Namespace .. autoclass :: Array .. autoclass :: Tag +.. autoclass :: UniqueTag .. autoclass :: DictOfNamedArrays Supporting Functionality @@ -48,9 +49,16 @@ Supporting Functionality .. autoclass :: DottedName +.. currentmodule:: pytato.array + +Pre-Defined Tags +---------------- + +.. autoclass:: ImplementAs +.. autoclass:: CountNamed + Built-in Expression Nodes ------------------------- -.. currentmodule:: pytato.array .. autoclass:: IndexLambda .. autoclass:: Einsum @@ -104,12 +112,15 @@ class DottedName: class Namespace: # Possible future extension: .parent attribute - """ - Represents a mapping from :term:`identifier` strings to :term:`array expression`s + r""" + Represents a mapping from :term:`identifier` strings to :term:`array expression`\ s or *None*, where *None* indicates that the name may not be used. (:class:`Placeholder` instances register their names in this way to avoid ambiguity.) + .. automethod:: __contains__ + .. automethod:: __getitem__ + .. automethod:: __iter__ .. automethod:: assign .. automethod:: ref """ @@ -120,9 +131,12 @@ class Namespace: def __contains__(self, name): return name in self._symbol_table - def __getattr__(self, name): + def __getitem__(self, name): return self._symbol_table[name] + def __iter__(self): + return iter(self._symbol_table) + def assign(self, name, value): """ :returns: *name* @@ -135,16 +149,17 @@ class Namespace: def ref(self, name): """ - :returns: A :term:`array expression` referring to *name*. + :returns: An :term:`array expression` referring to *name*. """ value = self._symbol_table[name] - v = prim.Variable(name) - ituple = tuple("_%d" % i for i in range(len(value.shape))) + var_ref = prim.Variable(name) + if value.shape: + var_ref = var_ref[tuple("_%d" % i for i in range(len(value.shape)))] return IndexLambda( - self, name, index_expr=v[ituple], shape=value.shape, + self, name, expr=var_ref, shape=value.shape, dtype=value.dtype) @@ -196,14 +211,19 @@ class UniqueTag(Tag): class Array: """ - A base class (abstract interface + - supplemental functionality) for lazily - evaluating array expressions. + A base class (abstract interface + supplemental functionality) for lazily + evaluating array expressions. The interface seeks to maximize :mod:`numpy` + compatibility, though not at all costs. + + Objects of this type are hashable and support structural equality + comparison (and are therefore immutable). .. note:: - The interface seeks to maximize :mod:`numpy` - compatibility, though not at all costs. + Hashability and equality testing *does* break :mod:`numpy` + compatibility, purposefully so. + + FIXME: Point out our equivalent for :mod:`numpy`'s ``==``. .. attribute:: namespace @@ -248,14 +268,6 @@ class Array: .. attribute:: ndim - Objects of this type are hashable and support - structural equality comparison (and are therefore - immutable). - - .. note:: - - This *does* break :mod:`numpy` compatibility, - purposefully so. """ def __init__(self, namespace, tags=None): @@ -393,7 +405,7 @@ class DictOfNamedArrays(collections.abc.Mapping): class IndexLambda(Array): """ - .. attribute:: index_expr + .. attribute:: expr A scalar-valued :mod:`pymbolic` expression such as ``a[_1] + b[_2, _1]``. @@ -412,14 +424,14 @@ class IndexLambda(Array): Python identifiers to objects implementing the :class:`Array` interface, making array expressions available for use in - :attr:`index_expr`. + :attr:`expr`. .. automethod:: is_reference """ # TODO: write make_index_lambda() that does dtype inference - def __init__(self, namespace, name, index_expr, shape, dtype, bindings=None, + def __init__(self, namespace, name, expr, shape, dtype, bindings=None, tags=None): if bindings is None: bindings = {} @@ -428,7 +440,7 @@ class IndexLambda(Array): self._shape = shape self._dtype = dtype - self.index_expr = index_expr + self.expr = expr self.bindings = bindings @property @@ -443,12 +455,12 @@ class IndexLambda(Array): # FIXME: Do we want a specific 'reference' node to make all this # checking unnecessary? - if isinstance(self.index_expr, prim.Subscript): - assert isinstance(self.index_expr.aggregate, prim.Variable) - name = self.index_expr.aggregate.name - index = self.index_expr.index - elif isinstance(self.index_expr, prim.Variable): - name = self.index_expr.aggregate.name + if isinstance(self.expr, prim.Subscript): + assert isinstance(self.expr.aggregate, prim.Variable) + name = self.expr.aggregate.name + index = self.expr.index + elif isinstance(self.expr, prim.Variable): + name = self.expr.aggregate.name index = () else: return False @@ -507,7 +519,7 @@ class DataWrapper(Array): type must be understood by the appropriate execution backend. Starting with the construction of the :class:`DataWrapper`, - may not be updated in-place. + this array may not be updated in-place. """ def __init__(self, namespace, data, tags=None): -- GitLab From eed1cf5a7fb1ba240b315246628e90abdb8b087d Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Wed, 27 May 2020 23:45:31 -0500 Subject: [PATCH 07/15] Placate flake8 (line length) --- pytato/array.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pytato/array.py b/pytato/array.py index 492733d..920743e 100644 --- a/pytato/array.py +++ b/pytato/array.py @@ -113,10 +113,10 @@ class DottedName: class Namespace: # Possible future extension: .parent attribute r""" - Represents a mapping from :term:`identifier` strings to :term:`array expression`\ s - or *None*, where *None* indicates that the name may not be used. - (:class:`Placeholder` instances register their names in this way to - avoid ambiguity.) + Represents a mapping from :term:`identifier` strings to + :term:`array expression`\ s or *None*, where *None* indicates that the name + may not be used. (:class:`Placeholder` instances register their names in + this way to avoid ambiguity.) .. automethod:: __contains__ .. automethod:: __getitem__ -- GitLab From 2b9f48397876fe4fae1415346c700617b7a74c57 Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Thu, 28 May 2020 16:28:10 -0500 Subject: [PATCH 08/15] Remove name arg from IndexLambda constructor --- pytato/array.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pytato/array.py b/pytato/array.py index 920743e..e28d336 100644 --- a/pytato/array.py +++ b/pytato/array.py @@ -431,12 +431,12 @@ class IndexLambda(Array): # TODO: write make_index_lambda() that does dtype inference - def __init__(self, namespace, name, expr, shape, dtype, bindings=None, + def __init__(self, namespace, expr, shape, dtype, bindings=None, tags=None): if bindings is None: bindings = {} - super().__init__(namespace, name, tags=tags) + super().__init__(namespace, tags=tags) self._shape = shape self._dtype = dtype -- GitLab From 7edd84f5a9a97ef539df19a6f79993b5a0432bbf Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Thu, 28 May 2020 16:29:07 -0500 Subject: [PATCH 09/15] Gitignore sphinx output --- .gitignore | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.gitignore b/.gitignore index 7a7df09..d97d175 100644 --- a/.gitignore +++ b/.gitignore @@ -14,3 +14,5 @@ distribute*tar.gz .cache .ipynb_checkpoints + +doc/_build -- GitLab From 56a11e15e63d257288cd293f6c8ea8f106c1a7c8 Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Thu, 28 May 2020 16:29:38 -0500 Subject: [PATCH 10/15] Gitlab CI: be specific about which pocl device to use --- .gitlab-ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 510eba8..b1931d2 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -1,7 +1,7 @@ Python 3 POCL: script: - export PY_EXE=python3 - - export PYOPENCL_TEST=portable + - export PYOPENCL_TEST=portable:pthread - export EXTRA_INSTALL="pyopencl" - curl -L -O -k https://gitlab.tiker.net/inducer/ci-support/raw/master/build-and-test-py-project.sh - ". ./build-and-test-py-project.sh" -- GitLab From bb1170c46e312c5faae5cd5030309c2c1d4c3e66 Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Thu, 28 May 2020 16:32:31 -0500 Subject: [PATCH 11/15] Fix call to IndexLambda constructor in Namespace.ref --- pytato/array.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pytato/array.py b/pytato/array.py index e28d336..582530d 100644 --- a/pytato/array.py +++ b/pytato/array.py @@ -159,7 +159,7 @@ class Namespace: var_ref = var_ref[tuple("_%d" % i for i in range(len(value.shape)))] return IndexLambda( - self, name, expr=var_ref, shape=value.shape, + self, expr=var_ref, shape=value.shape, dtype=value.dtype) -- GitLab From 570a8204a7302e0bbd27bf917a4c0cf7359d889e Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Fri, 29 May 2020 10:28:34 -0500 Subject: [PATCH 12/15] Design doc: Rephrase section on when to use what node --- doc/design.rst | 34 ++++++++++++++++++---------------- pytato/array.py | 1 - 2 files changed, 18 insertions(+), 17 deletions(-) diff --git a/doc/design.rst b/doc/design.rst index 3fdcb17..ea0031e 100644 --- a/doc/design.rst +++ b/doc/design.rst @@ -46,22 +46,24 @@ Computation and Results referenced by name from the :attr:`Array.namespace`. Such a name marks the boundary between eager and lazy evaluation. -- :class:`IndexLambda` is used to express the following functionality: - - - Broadcasting (equivalently, outer products) - - Slicing - - Arithmetic - - Element-wise function application, similar :class:`numpy.ufunc` - - Reductions - - Prefix sums/scans - - No new expression node types should be created for operations that - are well-expressed by :class:`IndexLambda`. An operation is well-expressed - if it is possible to (reasonably equivalently) recover the operation - (and its inputs and ordering with respect to other preceding/following - operations) by examining :attr:`IndexLambda.expr`. - - FIXME: This is not sharp. I'm not sure how to make it sharp. +- There is (deliberate) overlap in what various expression nodes can + express, e.g. + + - Array reshaping can be expressed as a :class:`pytato.array.Reshape` + or as an :class:`pytato.array.IndexLambda` + + - Linear algebra operations can be expressed via :class:`pytato.array.Einsum` + or as an :class:`pytato.array.IndexLambda` + + Expression capture (the "frontend") should use the "highest-level" + (most abstract) node type available that captures the user-intended + operation. Lowering transformations (e.g. during code generation) may + then convert these operations to a less abstract, more uniform + representation. + + Operations that introduce nontrivial mappings on indices (e.g. reshape, + strided slice, roll) are identified as potential candidates for being captured + in their own high-level node vs. as an :class:`pytato.array.IndexLambda`. Naming ------ diff --git a/pytato/array.py b/pytato/array.py index 582530d..78fda78 100644 --- a/pytato/array.py +++ b/pytato/array.py @@ -162,7 +162,6 @@ class Namespace: self, expr=var_ref, shape=value.shape, dtype=value.dtype) - # }}} -- GitLab From f5996d2c4cc87e06619c3131c7a7b0aee5117ba5 Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Fri, 29 May 2020 10:38:31 -0500 Subject: [PATCH 13/15] Grammar fixes --- doc/design.rst | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/doc/design.rst b/doc/design.rst index ea0031e..988f8f8 100644 --- a/doc/design.rst +++ b/doc/design.rst @@ -17,7 +17,7 @@ TODO Computation and Results ----------------------- -- Results of computations are either implement the :class:`~Array` +- Results of computations either implement the :class:`~Array` interface or are a :class:`~DictOfNamedArrays`. The former are referred to as :term:`array expression`\ s. The union type of both is referred to as an :term:`array result`. @@ -33,7 +33,7 @@ Computation and Results data-dependent name references in shapes are allowed. (This implies that the number of array axes must be statically known.) - Consider the the example of fancy indexing:: + Consider the example of fancy indexing:: A[A > 0] @@ -41,8 +41,8 @@ Computation and Results in *A* and cannot be statically determined at code generation time. In the case of data-dependent shapes, the shape is expressed in terms of - scalar (i.e. an :attr:`Array.shape` of `()`) values - with an integral :attr:`Array.dtype` (i.e. ``dtype.kind == "i"``) + scalar (i.e. having a :attr:`Array.shape` of `()`) values + with an integral :attr:`Array.dtype` (i.e. having ``dtype.kind == "i"``) referenced by name from the :attr:`Array.namespace`. Such a name marks the boundary between eager and lazy evaluation. -- GitLab From 78084c7378dd74b1319eeb96cc5251591483654e Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Fri, 29 May 2020 10:38:46 -0500 Subject: [PATCH 14/15] Design doc: Namespace clarifications --- doc/design.rst | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/doc/design.rst b/doc/design.rst index 988f8f8..2dffdf1 100644 --- a/doc/design.rst +++ b/doc/design.rst @@ -68,9 +68,11 @@ Computation and Results Naming ------ -- There is one (for now) :class:`~Namespace` per computation that defines the - computational "environment". Operations involving array expressions not - using the same namespace are prohibited. +- There is (for now) one :class:`~Namespace` per computation "universe" that defines + the computational "environment", by mapping :term:`identifier`\ s to :term:`array expression`\ s + (note: :term:`DictOfNamedArrays` instances may not be named, but their constituent + parts can, by using :class:`AttributeLookup`). + Operations involving array expressions not using the same namespace are prohibited. - Names in the :class:`~Namespace` are under user control and unique. I.e. new names in the :class:`~Namespace` that are not a -- GitLab From 0f7a45ab17e68c0838e7a113c7ec5b9e10e1ac54 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andreas=20Kl=C3=B6ckner?= Date: Fri, 29 May 2020 17:46:24 +0200 Subject: [PATCH 15/15] Apply suggestion to doc/design.rst --- doc/design.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/design.rst b/doc/design.rst index 2dffdf1..2309491 100644 --- a/doc/design.rst +++ b/doc/design.rst @@ -70,7 +70,7 @@ Naming - There is (for now) one :class:`~Namespace` per computation "universe" that defines the computational "environment", by mapping :term:`identifier`\ s to :term:`array expression`\ s - (note: :term:`DictOfNamedArrays` instances may not be named, but their constituent + (note: :class:`DictOfNamedArrays` instances may not be named, but their constituent parts can, by using :class:`AttributeLookup`). Operations involving array expressions not using the same namespace are prohibited. -- GitLab