diff --git a/arraycontext/.pytest.py.swm b/arraycontext/.pytest.py.swm new file mode 100644 index 0000000000000000000000000000000000000000..76d766eb6afded79da5bfdcbeed45a1d8c874513 Binary files /dev/null and b/arraycontext/.pytest.py.swm differ diff --git a/arraycontext/.pytest.py.swn b/arraycontext/.pytest.py.swn new file mode 100644 index 0000000000000000000000000000000000000000..f96e886cfe2870ff96eadde22407c174a8facfd3 Binary files /dev/null and b/arraycontext/.pytest.py.swn differ diff --git a/arraycontext/__init__.py b/arraycontext/__init__.py index 76242ef45afa83aca5d734ac39a59c77e8f58ea8..1d286603a6f7fec0cdb806e63cb1805f69383615 100644 --- a/arraycontext/__init__.py +++ b/arraycontext/__init__.py @@ -60,9 +60,11 @@ from .container.traversal import ( from .impl.pyopencl import PyOpenCLArrayContext from .impl.pytato import PytatoPyOpenCLArrayContext +from .impl.pycuda import PyCUDAArrayContext from .pytest import ( PytestPyOpenCLArrayContextFactory, + PytestPyCUDAArrayContextFactory, pytest_generate_tests_for_array_contexts, pytest_generate_tests_for_pyopencl_array_context) @@ -91,11 +93,12 @@ __all__ = ( "thaw", "freeze", "from_numpy", "to_numpy", - "PyOpenCLArrayContext", "PytatoPyOpenCLArrayContext", + "PyOpenCLArrayContext", "PytatoPyOpenCLArrayContext","PyCUDAArrayContext", "make_loopy_program", "PytestPyOpenCLArrayContextFactory", + "PyCUDAArrayContextFactory", "pytest_generate_tests_for_array_contexts", "pytest_generate_tests_for_pyopencl_array_context" ) diff --git a/arraycontext/fake_numpy.py b/arraycontext/fake_numpy.py index cdb95348c6ca912bb39b01428aa7a0a96ecbfdb2..fdfc3c6dc38855f40ffdf7f08f2fe75548e1fbb9 100644 --- a/arraycontext/fake_numpy.py +++ b/arraycontext/fake_numpy.py @@ -145,31 +145,6 @@ class BaseFakeNumpyNamespace: _c_to_numpy_arc_functions = {c_name: numpy_name for numpy_name, c_name in _numpy_to_c_arc_functions.items()} - def __getattr__(self, name): - def loopy_implemented_elwise_func(*args): - actx = self._array_context - prg = _get_scalar_func_loopy_program(actx, - c_name, nargs=len(args), naxes=len(args[0].shape)) - outputs = actx.call_loopy(prg, - **{"inp%d" % i: arg for i, arg in enumerate(args)}) - return outputs["out"] - - if name in self._c_to_numpy_arc_functions: - from warnings import warn - warn(f"'{name}' in ArrayContext.np is deprecated. " - "Use '{c_to_numpy_arc_functions[name]}' as in numpy. " - "The old name will stop working in 2021.", - DeprecationWarning, stacklevel=3) - - # normalize to C names anyway - c_name = self._numpy_to_c_arc_functions.get(name, name) - - # limit which functions we try to hand off to loopy - if name in self._numpy_math_functions: - return multimapped_over_array_containers(loopy_implemented_elwise_func) - else: - raise AttributeError(name) - def _new_like(self, ary, alloc_like): from numbers import Number diff --git a/arraycontext/impl/pycuda/__init__.py b/arraycontext/impl/pycuda/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..b6a06111b96dcdb9b888bb57816da0ee21048e0e --- /dev/null +++ b/arraycontext/impl/pycuda/__init__.py @@ -0,0 +1,119 @@ +""" +.. currentmodule:: arraycontext +.. autoclass:: PyCUDAArrayContext +""" + +__copyright__ = """ +Copyright (C) 2021 University of Illinois Board of Trustees +""" + +__license__ = """ +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +""" + +from warnings import warn +from typing import Dict, List, Sequence, Optional, Union, TYPE_CHECKING + +import numpy as np + +from pytools.tag import Tag + +from arraycontext.context import ArrayContext + + +if TYPE_CHECKING: + import pycuda + + +# {{{ PyCUDAArrayContext + +class PyCUDAArrayContext(ArrayContext): + """ + A :class:`ArrayContext` that uses :class:`pycuda.gpuarray.GPUArray` instances + for its base array class. + + .. attribute:: allocator + + A PyCUDA memory allocator. Can also be `None` (default) or `False` to + use the default allocator. + + .. automethod:: __init__ + """ + + def __init__(self, allocator=None): + import pycuda + super().__init__() + if allocator == None: + self.allocator = pycuda.driver.mem_alloc + from warnings import warn + warn("Allocator is None") + else: + self.allocator = allocator + + def _get_fake_numpy_namespace(self): + from arraycontext.impl.pycuda.fake_numpy import PyCUDAFakeNumpyNamespace + return PyCUDAFakeNumpyNamespace(self) + + # {{{ ArrayContext interface + + def empty(self, shape, dtype): + import pycuda.gpuarray as gpuarray + return gpuarray.empty(shape=shape, dtype=dtype, + allocator=self.allocator) + + def zeros(self, shape, dtype): + import pycuda.gpuarray as gpuarray + return gpuarray.zeros(shape=shape, dtype=dtype, + allocator=self.allocator) + + def from_numpy(self, array: np.ndarray): + import pycuda.gpuarray as gpuarray + return gpuarray.to_gpu(array, allocator=self.allocator) + + def to_numpy(self, array): + import pycuda.gpuarray as gpuarray + return array.get() + + def call_loopy(self, t_unit, **kwargs): + raise NotImplementedError('Waiting for loopy to be more capable') + + def freeze(self, array): + return array + + def thaw(self, array): + return array + + # }}} + + def clone(self): + return type(self)(self.allocator) + + def tag(self, array): + return array + + def tag_axis(self, array): + return array + + @property + def permits_inplace_modification(self): + return True + +# }}} + +# vim: foldmethod=marker diff --git a/arraycontext/impl/pycuda/fake_numpy.py b/arraycontext/impl/pycuda/fake_numpy.py new file mode 100644 index 0000000000000000000000000000000000000000..ce9ceb2167b81bc8b67f173923fafb8a6e61977c --- /dev/null +++ b/arraycontext/impl/pycuda/fake_numpy.py @@ -0,0 +1,152 @@ +""" +.. currentmodule:: arraycontext +.. autoclass:: PyCUDAArrayContext +""" +__copyright__ = """ +Copyright (C) 2021 University of Illinois Board of Trustees +""" + +__license__ = """ +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +""" + +from functools import partial, reduce +import operator + +from arraycontext.fake_numpy import \ + BaseFakeNumpyNamespace, BaseFakeNumpyLinalgNamespace +from arraycontext.container.traversal import ( + rec_multimap_array_container, rec_map_array_container, + rec_map_reduce_array_container, + ) + +import pycuda + +try: + import pycuda.gpuarray as gpuarray +except ImportError: + pass + + +# {{{ fake numpy + +class PyCUDAFakeNumpyNamespace(BaseFakeNumpyNamespace): + def _get_fake_numpy_linalg_namespace(self): + return _PyCUDAFakeNumpyLinalgNamespace(self._array_context) + + def __getattr__(self, name): + print(name) + pycuda_funcs = ["abs", "sin", "cos", "tan", "arcsin", "arccos", "arctan", + "sinh", "cosh", "tanh", "exp", "log", "log10", "isnan", + "sqrt", "exp"] + if name in pycuda_funcs: + from functools import partial + return partial(rec_map_array_container, getattr(pycuda, name)) + + return super().__getattr__(name) + + # {{{ comparisons + + # FIXME: This should be documentation, not a comment. + # These are here mainly because some arrays may choose to interpret + # equality comparison as a binary predicate of structural identity, + # i.e. more like "are you two equal", and not like numpy semantics. + # These operations provide access to numpy-style comparisons in that + # case. + + def equal(self, x, y): + return rec_multimap_array_container(operator.eq, x, y) + + def not_equal(self, x, y): + return rec_multimap_array_container(operator.ne, x, y) + + def greater(self, x, y): + return rec_multimap_array_container(operator.gt, x, y) + + def greater_equal(self, x, y): + return rec_multimap_array_container(operator.ge, x, y) + + def less(self, x, y): + return rec_multimap_array_container(operator.lt, x, y) + + def less_equal(self, x, y): + return rec_multimap_array_container(operator.le, x, y) + + # }}} + + def maximum(self, x, y): + return rec_multimap_array_container(gpuarray.maximum,x, y) + + def minimum(self, x, y): + return rec_multimap_array_container(gpuarray.minimum,x, y) + + def where(self, criterion, then, else_): + def where_inner(inner_crit, inner_then, inner_else): + if isinstance(inner_crit, bool): + return inner_then if inner_crit else inner_else + return gpuarray.if_positive(inner_crit != 0, inner_then, inner_else) + + return rec_multimap_array_container(where_inner, criterion, then, else_) + + def sum(self, a, dtype=None): + def _gpuarray_sum(ary): + if dtype not in [ary.dtype, None]: + raise NotImplementedError + + return gpuarray.sum(ary) + + return rec_map_reduce_array_container(sum, _gpuarray_sum, a) + + def min(self, a): + return rec_map_reduce_array_container( + partial(reduce, partial(gpuarray.minimum)),partial(gpuarray.min),a) + + def max(self, a): + return rec_map_reduce_array_container( + partial(reduce, partial(gpuarray.maximum)), partial(gpuarray.max), a) + + def stack(self, arrays, axis=0): + return rec_multimap_array_container( + lambda *args: gpuarray.stack(arrays=args, axis=axis), + *arrays) + + def reshape(self, a, newshape): + return gpuarray.reshape(a, newshape) + + def concatenate(self, arrays, axis=0): + return gpuarray.concatenate( + arrays, axis, + self._array_context.allocator + ) + + def ravel(self, a, order="C"): + return gpuarray.reshape(a,-1,order=order) + +# }}} + + +# {{{ fake np.linalg + +class _PyCUDAFakeNumpyLinalgNamespace(BaseFakeNumpyLinalgNamespace): + pass + +# }}} + + +# vim: foldmethod=marker diff --git a/arraycontext/impl/pyopencl/fake_numpy.py b/arraycontext/impl/pyopencl/fake_numpy.py index 01054bac6b90d2960f3ddc6ee25cd13fc1d91d4d..a1f74ea7f024181c5fd4e82ebba75c5ef283c987 100644 --- a/arraycontext/impl/pyopencl/fake_numpy.py +++ b/arraycontext/impl/pyopencl/fake_numpy.py @@ -31,6 +31,8 @@ import operator from arraycontext.fake_numpy import \ BaseFakeNumpyNamespace, BaseFakeNumpyLinalgNamespace +from arraycontext.loopy import \ + LoopyBasedFakeNumpyNamespace from arraycontext.container.traversal import ( rec_multimap_array_container, rec_map_array_container, rec_map_reduce_array_container, @@ -45,7 +47,7 @@ except ImportError: # {{{ fake numpy -class PyOpenCLFakeNumpyNamespace(BaseFakeNumpyNamespace): +class PyOpenCLFakeNumpyNamespace(LoopyBasedFakeNumpyNamespace): def _get_fake_numpy_linalg_namespace(self): return _PyOpenCLFakeNumpyLinalgNamespace(self._array_context) @@ -58,6 +60,17 @@ class PyOpenCLFakeNumpyNamespace(BaseFakeNumpyNamespace): # These operations provide access to numpy-style comparisons in that # case. + def __getattr__(self, name): + print(name) + cl_funcs = ["abs", "sin", "cos", "tan", "arcsin", "arccos", "arctan", + "sinh", "cosh", "tanh", "exp", "log", "log10", "isnan", + "sqrt", "exp"] + if name in cl_funcs: + from functools import partial + return partial(rec_map_array_container, getattr(cl, name)) + + return super().__getattr__(name) + def equal(self, x, y): return rec_multimap_array_container(operator.eq, x, y) diff --git a/arraycontext/loopy.py b/arraycontext/loopy.py index f4c97754d731961baaaf0191f70dcfeca287b688..0efb3bbb77974f794fb899df2c0c3670c45e03ed 100644 --- a/arraycontext/loopy.py +++ b/arraycontext/loopy.py @@ -27,7 +27,7 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. """ -import loopy as lp +import arraycontext.fake_numpy import BaseFakeNumpyNamespace from loopy.version import MOST_RECENT_LANGUAGE_VERSION @@ -70,5 +70,47 @@ def get_default_entrypoint(t_unit): # }}} +class LoopyBasedFakeNumpyNamespace(BaseFakeNumpyNamespace): + _numpy_to_c_arc_functions = { + "arcsin": "asin", + "arccos": "acos", + "arctan": "atan", + "arctan2": "atan2", -# vim: foldmethod=marker + "arcsinh": "asinh", + "arccosh": "acosh", + "arctanh": "atanh", + } + + _c_to_numpy_arc_functions = {c_name: numpy_name + for numpy_name, c_name in _numpy_to_c_arc_functions.items()} + + def __getattr__(self, name): + def loopy_implemented_elwise_func(*args): + actx = self._array_context + prg = _get_scalar_func_loopy_program(actx, + c_name, nargs=len(args), naxes=len(args[0].shape)) + outputs = actx.call_loopy(prg, + **{"inp%d" % i: arg for i, arg in enumerate(args)}) + return outputs["out"] + + if name in self._c_to_numpy_arc_functions: + from warnings import warn + warn(f"'{name}' in ArrayContext.np is deprecated. " + "Use '{c_to_numpy_arc_functions[name]}' as in numpy. " + "The old name will stop working in 2022.", + DeprecationWarning, stacklevel=3) + + # normalize to C names anyway + c_name = self._numpy_to_c_arc_functions.get(name, name) + + # limit which functions we try to hand off to loopy + if name in self._numpy_math_functions: + return multimapped_over_array_containers(loopy_implemented_elwise_func) + else: + raise AttributeError(name) + + + + +vim: foldmethod=marker diff --git a/arraycontext/pytest.py b/arraycontext/pytest.py index e93a8b38bd8528d8719dfe818bd58c56214a3c66..9b6ebfb3f2f598a2dd3c8e81834e7f62409c1196 100644 --- a/arraycontext/pytest.py +++ b/arraycontext/pytest.py @@ -2,6 +2,7 @@ .. currentmodule:: arraycontext .. autoclass:: PytestPyOpenCLArrayContextFactory +.. autoclass:: PytestPyCUDAArrayContextFactory .. autofunction:: pytest_generate_tests_for_array_contexts .. autofunction:: pytest_generate_tests_for_pyopencl_array_context @@ -34,6 +35,7 @@ THE SOFTWARE. from typing import Any, Callable, Dict, Sequence, Type, Union import pyopencl as cl +import pycuda from arraycontext.context import ArrayContext @@ -70,6 +72,22 @@ class PytestPyOpenCLArrayContextFactory: raise NotImplementedError +class PytestPyCUDAArrayContextFactory: + """ + .. automethod:: __init__ + .. automethod:: __call__ + """ + + def __init__(self, allocator): + """ + :arg allocator: a :class:`gpuarray.allocator`. + """ + self.allocator = allocator + + def __call__(self) -> ArrayContext: + raise NotImplementedError + + class _PytestPyOpenCLArrayContextFactoryWithClass(PytestPyOpenCLArrayContextFactory): force_device_scalars = True @@ -126,6 +144,75 @@ class _PytestPytatoPyOpenCLArrayContextFactory( self.device.platform.name.strip())) +class _PytestPyCUDAArrayContextFactory( + PytestPyCUDAArrayContextFactory): + + @property + def actx_class(self): + from arraycontext import PyCUDAArrayContext + return PyCUDAArrayContext + + def __call__(self): + def make_default_context(ctx_maker=None): + if ctx_maker is None: + + def ctx_maker(dev): + return dev.make_context() + + ndevices = cuda.Device.count() + if ndevices == 0: + raise RuntimeError( + "No CUDA enabled device found. " "Please check your installation." + ) + + # Is CUDA_DEVICE set? + import os + + devn = os.environ.get("CUDA_DEVICE") + + # Is $HOME/.cuda_device set ? + if devn is None: + try: + homedir = os.environ.get("HOME") + assert homedir is not None + devn = open(os.path.join(homedir, ".cuda_device")).read().strip() + except Exception: + pass + + # If either CUDA_DEVICE or $HOME/.cuda_device is set, try to use it + if devn is not None: + try: + devn = int(devn) + except TypeError: + raise TypeError( + "CUDA device number (CUDA_DEVICE or ~/.cuda_device)" + " must be an integer" + ) + + dev = cuda.Device(devn) + return ctx_maker(dev) + + # Otherwise, try to use any available device + else: + for devn in range(ndevices): + dev = cuda.Device(devn) + try: + return ctx_maker(dev) + except cuda.Error: + pass + + raise RuntimeError( + "make_default_context() wasn't able to create a context " + "on any of the %d detected devices" % ndevices + ) + + import pycuda.driver as cuda + actx_class = self.actx_class(None) + cuda.init() + ctx = make_default_context() + return actx_class + + _ARRAY_CONTEXT_FACTORY_REGISTRY: \ Dict[str, Type[PytestPyOpenCLArrayContextFactory]] = { "pyopencl": _PytestPyOpenCLArrayContextFactoryWithClass, diff --git a/test/test_arraycontext.py b/test/test_arraycontext.py index 9e855b06943046f1853f75f3e466d472fc0faddf..c87793f8bcd893f045fbf53720930043d36f857b 100644 --- a/test/test_arraycontext.py +++ b/test/test_arraycontext.py @@ -34,12 +34,14 @@ from arraycontext import ( FirstAxisIsElementsTag, PyOpenCLArrayContext, PytatoPyOpenCLArrayContext, - ArrayContainer,) + PyCUDAArrayContext, + ArrayContainer) from arraycontext import ( # noqa: F401 pytest_generate_tests_for_array_contexts, ) from arraycontext.pytest import (_PytestPyOpenCLArrayContextFactoryWithClass, - _PytestPytatoPyOpenCLArrayContextFactory) + _PytestPytatoPyOpenCLArrayContextFactory, + _PytestPyCUDAArrayContextFactory) import logging @@ -66,6 +68,15 @@ class _PytatoPyOpenCLArrayContextForTests(PytatoPyOpenCLArrayContext): def transform_loopy_program(self, t_unit): return t_unit +class _PyCUDAArrayContextForTests(PyCUDAArrayContext): + """Like :class:`PyCUDAArrayContext`, but applies no program + transformations whatsoever. Only to be used for testing internal to + :mod:`arraycontext`. + """ + + def transform_loopy_program(self, t_unit): + return t_unit + class _PyOpenCLArrayContextWithHostScalarsForTestsFactory( _PytestPyOpenCLArrayContextFactoryWithClass): @@ -82,13 +93,22 @@ class _PytatoPyOpenCLArrayContextForTestsFactory( actx_class = _PytatoPyOpenCLArrayContextForTests +class _PyCUDAArrayContextForTestsFactory( + _PytestPyCUDAArrayContextFactory): + actx_class = _PyCUDAArrayContextForTests + + +#pytest_generate_tests = pytest_generate_tests_for_array_contexts([ + #_PyOpenCLArrayContextForTestsFactory, + #_PyOpenCLArrayContextWithHostScalarsForTestsFactory, + #_PytatoPyOpenCLArrayContextForTestsFactory, + #_PyCUDAArrayContextForTestsFactory, + #]) + pytest_generate_tests = pytest_generate_tests_for_array_contexts([ - _PyOpenCLArrayContextForTestsFactory, - _PyOpenCLArrayContextWithHostScalarsForTestsFactory, - _PytatoPyOpenCLArrayContextForTestsFactory, + _PyCUDAArrayContextForTestsFactory ]) - def _acf(): import pyopencl as cl @@ -305,6 +325,8 @@ def test_array_context_np_workalike(actx_factory, sym_name, n_args, dtype): ]) def test_array_context_np_like(actx_factory, sym_name, n_args, dtype): actx = actx_factory() + if not hasattr(actx.np, sym_name): + pytest.skip(f"'{sym_name}' not implemented on '{type(actx).__name__}'") ndofs = 512 args = [randn(ndofs, dtype) for i in range(n_args)]