diff --git a/.gitignore b/.gitignore index 7a7df094eaf4bac78ca41b3bb411b0218aa71c4b..d97d175d3a129309a5a2be0dd04239c02fee31ed 100644 --- a/.gitignore +++ b/.gitignore @@ -14,3 +14,5 @@ distribute*tar.gz .cache .ipynb_checkpoints + +doc/_build diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 510eba806fc6f27ed09dcee146feb931fed0c3de..b1931d272744bdcf1c62eeab073de0adfd674f53 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -1,7 +1,7 @@ Python 3 POCL: script: - export PY_EXE=python3 - - export PYOPENCL_TEST=portable + - export PYOPENCL_TEST=portable:pthread - export EXTRA_INSTALL="pyopencl" - curl -L -O -k https://gitlab.tiker.net/inducer/ci-support/raw/master/build-and-test-py-project.sh - ". ./build-and-test-py-project.sh" diff --git a/doc/design.rst b/doc/design.rst index 7837eaf5032cf9287c0f44d139fc44d5b3f637a6..2309491944833a1f3b4396281daf91e51096c985 100644 --- a/doc/design.rst +++ b/doc/design.rst @@ -1,8 +1,157 @@ Design Decisions in Pytato ========================== -- There is one (for now) computation :class:`pytato.N -- Shapes and dtypes are computed eagerly. -- Array data is computed eagerly. -- Results of array computations may *beomc +.. currentmodule:: pytato +TODO +---- + +- reduction inames +- finish trawling the design doc +- expression nodes in index lambda + - what pymbolic expression nodes are OK + - reductions + - function identifier scoping + - piecewise def (use ISL?) + +Computation and Results +----------------------- + +- Results of computations either implement the :class:`~Array` + interface or are a :class:`~DictOfNamedArrays`. + The former are referred to as :term:`array expression`\ s. The union type + of both is referred to as an :term:`array result`. + +- Array data is computed lazily, i.e., a representation of the desired + computation is built, but computation/code generation is not carried + out until instructed by the user. Evaluation/computation + is never triggered implicitly. + +- :attr:`Array.dtype` is evaluated eagerly. + +- :attr:`Array.shape` is evaluated as eagerly as possible, however + data-dependent name references in shapes are allowed. (This implies + that the number of array axes must be statically known.) + + Consider the example of fancy indexing:: + + A[A > 0] + + Here, the length of the resulting array depends on the data contained + in *A* and cannot be statically determined at code generation time. + + In the case of data-dependent shapes, the shape is expressed in terms of + scalar (i.e. having a :attr:`Array.shape` of `()`) values + with an integral :attr:`Array.dtype` (i.e. having ``dtype.kind == "i"``) + referenced by name from the :attr:`Array.namespace`. Such a name + marks the boundary between eager and lazy evaluation. + +- There is (deliberate) overlap in what various expression nodes can + express, e.g. + + - Array reshaping can be expressed as a :class:`pytato.array.Reshape` + or as an :class:`pytato.array.IndexLambda` + + - Linear algebra operations can be expressed via :class:`pytato.array.Einsum` + or as an :class:`pytato.array.IndexLambda` + + Expression capture (the "frontend") should use the "highest-level" + (most abstract) node type available that captures the user-intended + operation. Lowering transformations (e.g. during code generation) may + then convert these operations to a less abstract, more uniform + representation. + + Operations that introduce nontrivial mappings on indices (e.g. reshape, + strided slice, roll) are identified as potential candidates for being captured + in their own high-level node vs. as an :class:`pytato.array.IndexLambda`. + +Naming +------ + +- There is (for now) one :class:`~Namespace` per computation "universe" that defines + the computational "environment", by mapping :term:`identifier`\ s to :term:`array expression`\ s + (note: :class:`DictOfNamedArrays` instances may not be named, but their constituent + parts can, by using :class:`AttributeLookup`). + Operations involving array expressions not using the same namespace are prohibited. + +- Names in the :class:`~Namespace` are under user control and unique. I.e. + new names in the :class:`~Namespace` that are not a + :ref:`reserved_identifier` are not generated automatically without explicit + user input. + +- The (array) value associated with a name is immutable once evaluated. + In-place slice assignment may be simulated by returning a new + node realizing a "partial replacement". + +- For arrays with data-dependent shapes, such as fancy indexing:: + + A[A > 0] + + it may be necessary to automatically generate names, in this + case to describe the shape of the index array used to realize + the access ``A[A>0]``. These will be drawn from the reserved namespace + ``_pt_shp``. Users may control the naming of these counts + by assigning the tag :attr:`pytato.array.CountNamed`, like so:: + + A[(A > 0).tagged(CountNamed("mycount"))] + +- :class:`Placeholder` expressions, like all array expressions, + are considered read-only. When computation begins, the same + actual memory may be supplied for multiple :term:`placeholder name`\ s, + i.e. those arrays may alias. + + .. note:: + + This does not preclude the arrays being declared with + C's ``*restrict`` qualifier in generated code, as they + do not alias any data that is being modified. + +.. _reserved_identifier: + +Reserved Identifiers +-------------------- + +- Identifiers beginning with ``_pt_`` are reserved for internal use + by :mod:`pytato`. Any such internal use must be drawn from one + of the following sub-regions, identified by their identifier + prefixes: + + - ``_pt_shp``: Used to automatically generate identifiers used + in data-dependent shapes. + +- Identifiers used in index lambdas are also reserved. These include: + + - Identifiers matching the regular expression ``_[0-9]+``. They are used + as index ("iname") placeholders. + + - Identifiers matching the regular expression ``_r[0-9]+``. They are used + as reduction indices. + + - Identifiers matching the regular expression ``_in[0-9]+``. They are used + as automatically generated names (if required) in + :attr:`IndexLambda.bindings`. + +Glossary +======== + +.. glossary:: + + array expression + An object implementing the :class:`~Array` interface + + array result + An :term:`array expression` or an instance of + :class:`~DictOfNamedArrays`. + + identifier + Any string for which :meth:`str.isidentifier` returns + *True*. See also :ref:`reserved_identifier`. + + namespace name + The name by which an :term:`array expression` is known + in a :class:`Namespace`. + + placeholder name + See :attr:`Placeholder.name`. + +.. vim: shiftwidth=4 diff --git a/pytato/__init__.py b/pytato/__init__.py index 5f047b9d1238f8ddfed2a4ac5ffa2c6d8745487b..de2ea921b3668ea0c972f35e00796314756d0f22 100644 --- a/pytato/__init__.py +++ b/pytato/__init__.py @@ -25,7 +25,8 @@ THE SOFTWARE. """ from pytato.array import ( - DottedName, Namespace, Array, DictOfNamedArrays, + DottedName, Namespace, Array, DictOfNamedArrays, Tag ) -__all__ = ("DottedName", "Namespace", "Array", "DictOfNamedArrays") +__all__ = ("DottedName", "Namespace", "Array", "DictOfNamedArrays", + "Tag") diff --git a/pytato/array.py b/pytato/array.py index 918b53ebfc6261c68dd8a6a4f41ad68a560a41ce..78fda781db5cb0a7d35433914b42bd65ec59a390 100644 --- a/pytato/array.py +++ b/pytato/array.py @@ -24,21 +24,24 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. """ -__doc__ = """ +# {{{ docs +__doc__ = """ +.. currentmodule:: pytato -Expression trees based on this package are picklable -as long as no non-picklable data -(e.g. :class:`pyopencl.array.Array`) -is referenced from :class:`DataWrapper`. +.. note:: + Expression trees based on this package are picklable + as long as no non-picklable data + (e.g. :class:`pyopencl.array.Array`) + is referenced from :class:`~pytato.array.DataWrapper`. Array Interface --------------- -.. currentmodule:: pytato - .. autoclass :: Namespace .. autoclass :: Array +.. autoclass :: Tag +.. autoclass :: UniqueTag .. autoclass :: DictOfNamedArrays Supporting Functionality @@ -46,74 +49,180 @@ Supporting Functionality .. autoclass :: DottedName +.. currentmodule:: pytato.array + +Pre-Defined Tags +---------------- + +.. autoclass:: ImplementAs +.. autoclass:: CountNamed + Built-in Expression Nodes ------------------------- -.. currentmodule:: pytato.array .. autoclass:: IndexLambda .. autoclass:: Einsum +.. autoclass:: Reshape .. autoclass:: DataWrapper .. autoclass:: Placeholder .. autoclass:: LoopyFunction """ +# }}} + import collections.abc +from dataclasses import dataclass from pytools import single_valued, is_single_valued +import pymbolic.primitives as prim + +# {{{ dotted name class DottedName: """ .. attribute:: name_parts A tuple of strings, each of which is a valid - Python identifier. + Python identifier. No name part may start with + a double underscore. The name (at least morally) exists in the name space defined by the Python module system. It need not necessarily identify an importable object. + + .. automethod:: from_class """ def __init__(self, name_parts): self.name_parts = name_parts + @classmethod + def from_class(cls, argcls): + name_parts = tuple(argcls.__module__.split(".") + [argcls.__name__]) + if not all(not npart.startswith("__") for npart in name_parts): + raise ValueError(f"some name parts of {'.'.join(name_parts)} " + "start with double underscores") + return cls(name_parts) + +# }}} + + +# {{{ namespace class Namespace: # Possible future extension: .parent attribute - """ - .. attribute:: symbol_table - - A mapping from strings that must be valid - C identifiers to objects implementing the - :class:`Array` interface. + r""" + Represents a mapping from :term:`identifier` strings to + :term:`array expression`\ s or *None*, where *None* indicates that the name + may not be used. (:class:`Placeholder` instances register their names in + this way to avoid ambiguity.) + + .. automethod:: __contains__ + .. automethod:: __getitem__ + .. automethod:: __iter__ + .. automethod:: assign + .. automethod:: ref """ def __init__(self): - self.symbol_table = {} + self._symbol_table = {} + + def __contains__(self, name): + return name in self._symbol_table + + def __getitem__(self, name): + return self._symbol_table[name] + + def __iter__(self): + return iter(self._symbol_table) def assign(self, name, value): - if name in self.symbol_table: + """ + :returns: *name* + """ + if name in self._symbol_table: raise ValueError(f"'{name}' is already assigned") - self.symbol_table[name] = value + self._symbol_table[name] = value + return name -class Array: + def ref(self, name): + """ + :returns: An :term:`array expression` referring to *name*. + """ + + value = self._symbol_table[name] + + var_ref = prim.Variable(name) + if value.shape: + var_ref = var_ref[tuple("_%d" % i for i in range(len(value.shape)))] + + return IndexLambda( + self, expr=var_ref, shape=value.shape, + dtype=value.dtype) + +# }}} + + +# {{{ tag + +tag_dataclass = dataclass(init=True, eq=True, frozen=True, repr=True) + + +@tag_dataclass +class Tag: """ - A base class (abstract interface + - supplemental functionality) for lazily - evaluating array expressions. + Generic metadata, applied to, among other things, + instances of :class:`Array`. - .. note:: + .. attribute:: tag_name - The interface seeks to maximize :mod:`numpy` - compatibility, though not at all costs. + A fully qualified :class:`DottedName` that reflects + the class name of the tag. - All these are abstract: + Instances of this type must be immutable, hashable, + picklable, and have a reasonably concise :meth:`__repr__` + of the form ``dotted.name(attr1=value1, attr2=value2)``. + Positional arguments are not allowed. - .. attribute:: name + .. note:: + + This mirrors the tagging scheme that :mod:`loopy` + is headed towards. + """ + + @property + def tag_name(self): + return DottedName.from_class(type(self)) + + +class UniqueTag(Tag): + """ + Only one instance of this type of tag may be assigned + to a single tagged object. + """ + +# }}} + + +# {{{ array inteface + +class Array: + """ + A base class (abstract interface + supplemental functionality) for lazily + evaluating array expressions. The interface seeks to maximize :mod:`numpy` + compatibility, though not at all costs. + + Objects of this type are hashable and support structural equality + comparison (and are therefore immutable). + + .. note:: + + Hashability and equality testing *does* break :mod:`numpy` + compatibility, purposefully so. - A name in :attr:`namespace` that has been assigned - to this expression. May be (and typically is) *None*. + FIXME: Point out our equivalent for :mod:`numpy`'s ``==``. .. attribute:: namespace @@ -123,13 +232,18 @@ class Array: .. attribute:: shape - Identifiers (:class:`pymbolic.Variable`) refer to - names from :attr:`namespace`. - A tuple of integers or :mod:`pymbolic` expressions. + Identifiers (:class:`pymbolic.Variable`) refer to names from + :attr:`namespace`. A tuple of integers or :mod:`pymbolic` expressions. Shape may be (at most affinely) symbolic in these identifiers. - # FIXME: -> https://gitlab.tiker.net/inducer/pytato/-/issues/1 + .. note:: + + Affine-ness is mainly required by code generation for + :class:`IndexLambda`, but :class:`IndexLambda` is used to produce + references to named arrays. Since any array that needs to be + referenced in this way needs to obey this restriction anyway, + a decision was made to requir the same of *all* array expressions. .. attribute:: dtype @@ -138,51 +252,28 @@ class Array: .. attribute:: tags A :class:`dict` mapping :class:`DottedName` instances - to an argument object, whose structure is defined - by the tag. + to instances of the :class:`Tag` interface. Motivation: `RDF `__ triples (subject: implicitly the array being tagged, predicate: the tag, object: the arg). - For example:: - - # tag - DottedName("our_array_thing.impl_mode"): - - # argument - DottedName( - "our_array_thing.loopy_target.subst_rule") - - .. note:: - - This mirrors the tagging scheme that :mod:`loopy` - is headed towards. + .. automethod:: named + .. automethod:: tagged + .. automethod:: without_tag Derived attributes: .. attribute:: ndim - Objects of this type are hashable and support - structural equality comparison (and are therefore - immutable). - - .. note:: - - This *does* break :mod:`numpy` compatibility, - purposefully so. """ - def __init__(self, namespace, name, tags=None): + def __init__(self, namespace, tags=None): if tags is None: tags = {} - if name is not None: - namespace.assign(name, self) - self.namespace = namespace - self.name = name self.tags = tags def copy(self, **kwargs): @@ -192,33 +283,79 @@ class Array: def shape(self): raise NotImplementedError + def named(self, name): + return self.namespace.ref(self.namespace.assign(name, self)) + @property def ndim(self): return len(self.shape) - def with_tag(self, dotted_name, args=None): + def tagged(self, tag: Tag): """ - Returns a copy of *self* tagged with *dotted_name* - and arguments *args* - If a tag *dotted_name* is already present, it is - replaced in the returned object. + Returns a copy of *self* tagged with *tag*. + If *tag* is a :class:`UniqueTag` and other + tags of this type are already present, an error + is raised. """ - if args is None: - pass + pass def without_tag(self, dotted_name): pass - def with_name(self, name): - self.namespace.assign_name(name, self) - return self.copy(name=name) - # TODO: - # - tags # - codegen interface - # - naming + +# }}} + + +# {{{ pre-defined tag: ImplementAs + +@tag_dataclass +class ImplementationStrategy(Tag): + pass + + +@tag_dataclass +class ImplStored(ImplementationStrategy): + pass +@tag_dataclass +class ImplInlined(ImplementationStrategy): + pass + + +@tag_dataclass +class ImplDefault(ImplementationStrategy): + pass + + +@tag_dataclass +class ImplementAs(UniqueTag): + """ + .. attribute:: strategy + """ + + strategy: ImplementationStrategy + +# }}} + + +# {{{ pre-defined tag: CountNamed + +@tag_dataclass +class CountNamed(UniqueTag): + """ + .. attribute:: name + """ + + name: str + +# }}} + + +# {{{ dict of named arrays + class DictOfNamedArrays(collections.abc.Mapping): """A container that maps valid Python identifiers to instances of :class:`Array`. May occur as a result @@ -260,36 +397,111 @@ class DictOfNamedArrays(collections.abc.Mapping): def __len__(self): return len(self._data) +# }}} + + +# {{{ index lambda class IndexLambda(Array): """ - .. attribute:: index_expr + .. attribute:: expr A scalar-valued :mod:`pymbolic` expression such as - ``a[_1] + b[_2, _1]`` depending on TODO + ``a[_1] + b[_2, _1]``. Identifiers in the expression are resolved, in - order, by lookups in :attr:`inputs`, then in + order, by lookups in :attr:`bindings`, then in :attr:`namespace`. Scalar functions in this expression must - be identified by a dotted name - (e.g. ``our_array_thing.c99.sin``). + be identified by a dotted name representing + a Python object (e.g. ``pytato.c99.sin``). - .. attribute:: binding + .. attribute:: bindings A :class:`dict` mapping strings that are valid Python identifiers to objects implementing the :class:`Array` interface, making array expressions available for use in - :attr:`index_expr`. + :attr:`expr`. + + .. automethod:: is_reference """ + # TODO: write make_index_lambda() that does dtype inference + + def __init__(self, namespace, expr, shape, dtype, bindings=None, + tags=None): + if bindings is None: + bindings = {} + + super().__init__(namespace, tags=tags) + + self._shape = shape + self._dtype = dtype + self.expr = expr + self.bindings = bindings + + @property + def shape(self): + return self._shape + + @property + def dtype(self): + return self._dtype + + def is_reference(self): + # FIXME: Do we want a specific 'reference' node to make all this + # checking unnecessary? + + if isinstance(self.expr, prim.Subscript): + assert isinstance(self.expr.aggregate, prim.Variable) + name = self.expr.aggregate.name + index = self.expr.index + elif isinstance(self.expr, prim.Variable): + name = self.expr.aggregate.name + index = () + else: + return False + + if index != tuple("_%d" % i for i in range(len(self.shape))): + return False + + try: + val = self.namespace[name] + except KeyError: + assert name in self.bindings + return False + + if self.shape != val.shape: + return False + if self.dtype != val.dtype: + return False + + return True + +# }}} + + +# {{{ einsum class Einsum(Array): """ """ +# }}} + + +# {{{ reshape + +class Reshape(Array): + """ + """ + +# }}} + + +# {{{ data wrapper class DataWrapper(Array): # TODO: Name? @@ -297,44 +509,76 @@ class DataWrapper(Array): Takes concrete array data and packages it to be compatible with the :class:`Array` interface. - A way - .. attrib + .. attribute:: data A concrete array (containing data), given as, for example, a :class:`numpy.ndarray`, or a :class:`pyopencl.array.Array`. + This must offer ``shape`` and ``dtype`` attributes but is + otherwise considered opaque. At evaluation time, its + type must be understood by the appropriate execution backend. + Starting with the construction of the :class:`DataWrapper`, + this array may not be updated in-place. """ + def __init__(self, namespace, data, tags=None): + super().__init__(namespace, tags) + + self.data = data + + @property + def shape(self): + self.data.shape + + @property + def dtype(self): + self.data.dtype + +# }}} + + +# {{{ placeholder class Placeholder(Array): """ A named placeholder for an array whose concrete value is supplied by the user during evaluation. + .. attribute:: name + + The name by which a value is supplied + for the placeholder once computation begins. + .. note:: - A symbolically represented - A symbolic - On - is required, and :attr:`shape` is given as data. + :attr:`name` is not a :term:`namespace name`. In fact, + it is prohibited from being one. (This has to be the case: Suppose a + :class:`Placeholder` is :meth:`~Array.tagged`, would the namespace name + refer to the tagged or the untagged version?) """ + def __init__(self, namespace, name, shape, tags=None): + if name is None: + raise ValueError("Placeholder instances must have a name") + + # Reserve the name, prevent others from using it. + namespace.assign(name, None) + + super().__init__(namespace=namespace, tags=tags) + + self.name = name + self._shape = shape + @property def shape(self): # Matt added this to make Pylint happy. # Not tied to this, open for discussion about how to implement this. return self._shape - def __init__(self, namespace, name, shape, tags=None): - if name is None: - raise ValueError("Placeholder instances must have a name") - super().__init__( - namespace=namespace, - name=name, - tags=tags) +# }}} - self._shape = shape +# {{{ loopy function class LoopyFunction(DictOfNamedArrays): """ @@ -344,3 +588,7 @@ class LoopyFunction(DictOfNamedArrays): and one that's obtained by importing a dotted name. """ + +# }}} + +# vim: foldmethod=marker diff --git a/setup.py b/setup.py index c7d57e46918732720ce87672cf4f78c0895a24f3..a0f3d056b619010d5b68cdb4178c4d63e2d7f524 100644 --- a/setup.py +++ b/setup.py @@ -23,7 +23,6 @@ setup(name="pytato", 'Programming Language :: Python', 'Programming Language :: Python', 'Programming Language :: Python :: 3', - 'Programming Language :: Python :: 3.6', 'Programming Language :: Python :: 3.7', 'Programming Language :: Python :: 3.8', 'Topic :: Scientific/Engineering',